├── rust-toolchain
├── tests
    ├── benchmarks
    │   ├── __init__.py
    │   └── test_downsamplers.py
    ├── requirements-linting.txt
    ├── requirements.txt
    ├── test_config.py
    ├── test_rust_mods.py
    ├── test_algos_python_compliance.py
    └── test_tsdownsample.py
├── tsdownsample
    ├── py.typed
    ├── _python
    │   ├── __init__.py
    │   └── downsamplers.py
    ├── _rust
    │   └── __init__.py
    ├── __init__.py
    ├── downsamplers.py
    └── downsampling_interface.py
├── notebooks
    └── requirements.txt
├── .github
    ├── FUNDING.yml
    └── workflows
    │   ├── codspeed.yml
    │   ├── codeql.yml
    │   ├── ci-downsample_rs.yml
    │   └── ci-tsdownsample.yml
├── downsample_rs
    ├── README.md
    ├── dev_utils
    │   ├── src
    │   │   ├── lib.rs
    │   │   ├── config.rs
    │   │   └── utils.rs
    │   └── Cargo.toml
    ├── src
    │   ├── types.rs
    │   ├── lib.rs
    │   ├── helpers.rs
    │   ├── lttb.rs
    │   ├── minmaxlttb.rs
    │   ├── searchsorted.rs
    │   ├── minmax.rs
    │   └── m4.rs
    ├── Cargo.toml
    ├── LICENSE
    └── benches
    │   ├── bench_lttb.rs
    │   ├── bench_m4.rs
    │   ├── bench_minmax.rs
    │   ├── bench_minmaxlttb.rs
    │   └── results
├── Cargo.toml
├── LICENSE
├── Makefile
├── pyproject.toml
├── .gitignore
├── CONTRIBUTING.md
├── README.md
└── src
    └── lib.rs


/rust-toolchain:
--------------------------------------------------------------------------------
1 | nightly
2 | 


--------------------------------------------------------------------------------
/tests/benchmarks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tsdownsample/py.typed:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tsdownsample/_python/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/requirements-linting.txt:
--------------------------------------------------------------------------------
1 | black
2 | ruff
3 | mypy
4 | 


--------------------------------------------------------------------------------
/notebooks/requirements.txt:
--------------------------------------------------------------------------------
1 | tsdownsample
2 | numpy
3 | pandas


--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov
3 | pytest-benchmark
4 | 


--------------------------------------------------------------------------------
/tsdownsample/_rust/__init__.py:
--------------------------------------------------------------------------------
1 | # In this folder the compiled rust code should be placed.
2 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: [jvdd, jonasvdd]
4 | 


--------------------------------------------------------------------------------
/downsample_rs/README.md:
--------------------------------------------------------------------------------
1 | # downsample_rs
2 | 
3 | Implementation of (time series) downsampling algorithms in rust.
4 | 


--------------------------------------------------------------------------------
/downsample_rs/dev_utils/src/lib.rs:
--------------------------------------------------------------------------------
1 | pub mod config;
2 | pub use config::*;
3 | pub mod utils;
4 | pub use utils::*;
5 | 


--------------------------------------------------------------------------------
/downsample_rs/dev_utils/src/config.rs:
--------------------------------------------------------------------------------
1 | // pub const ARRAY_LENGTH_SHORT: usize = 512;
2 | pub const ARRAY_LENGTH_SHORT: usize = 1_024;
3 | // pub const ARRAY_LENGTH_LONG: usize = 512 * 10;
4 | pub const ARRAY_LENGTH_LONG: usize = 102_400;
5 | 


--------------------------------------------------------------------------------
/downsample_rs/dev_utils/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "dev_utils"
 3 | version = "0.1.1"
 4 | authors = ["Jeroen Van Der Donckt"]
 5 | edition = "2021"
 6 | description = "Shared utilities for development (tests & benchmarks)"
 7 | 
 8 | [dependencies]
 9 | rand = { version = "0.7.2", default-features = false }
10 | rand_distr = { version = "0.2.2", default-features = false }
11 | 


--------------------------------------------------------------------------------
/downsample_rs/src/types.rs:
--------------------------------------------------------------------------------
 1 | use std::ops::{Add, Div, Mul, Sub};
 2 | 
 3 | pub trait Num:
 4 |     Copy
 5 |     + PartialOrd
 6 |     + Add<Output = Self>
 7 |     + Sub<Output = Self>
 8 |     + Mul<Output = Self>
 9 |     + Div<Output = Self>
10 | {
11 | }
12 | 
13 | // Implement the trait for all types that satisfy the trait bounds
14 | impl<T> Num for T where
15 |     T: Copy + PartialOrd + Add<Output = T> + Sub<Output = T> + Mul<Output = T> + Div<Output = T>
16 | {
17 | }
18 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "tsdownsample"  # Same name as the Python package
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | authors = ["Jeroen Van Der Donckt"]
 6 | description = "Python bindings for time series downsampling algorithms"
 7 | repository = "https://github.com/predict-idlab/tsdownsample"
 8 | license = "MIT"
 9 | 
10 | [dependencies]
11 | downsample_rs = { path = "downsample_rs", features = ["half"]}
12 | pyo3 = { version = "0.26", features = ["extension-module"] }
13 | numpy = { version = "0.26", features = ["half"] }
14 | half = { version = "2.3.1", default-features = false }
15 | paste = { version = "1.0.14", default-features = false }
16 | 
17 | [lib]
18 | name = "tsdownsample"
19 | crate-type = ["cdylib"]
20 | 


--------------------------------------------------------------------------------
/tsdownsample/__init__.py:
--------------------------------------------------------------------------------
 1 | """tsdownsample: high performance downsampling of time series data for visualization."""
 2 | 
 3 | from .downsamplers import (
 4 |     EveryNthDownsampler,
 5 |     LTTBDownsampler,
 6 |     M4Downsampler,
 7 |     MinMaxDownsampler,
 8 |     MinMaxLTTBDownsampler,
 9 |     NaNM4Downsampler,
10 |     NaNMinMaxDownsampler,
11 |     NaNMinMaxLTTBDownsampler,
12 | )
13 | 
14 | __version__ = "0.1.4.1"
15 | __author__ = "Jeroen Van Der Donckt"
16 | 
17 | __all__ = [
18 |     "EveryNthDownsampler",
19 |     "MinMaxDownsampler",
20 |     "M4Downsampler",
21 |     "LTTBDownsampler",
22 |     "MinMaxLTTBDownsampler",
23 |     "NaNMinMaxDownsampler",
24 |     "NaNM4Downsampler",
25 |     "NaNMinMaxLTTBDownsampler",
26 | ]
27 | 


--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
 1 | # Store some global configuration for tests
 2 | 
 3 | import numpy as np
 4 | 
 5 | _core_supported_dtypes = [
 6 |     np.float32,
 7 |     np.float64,
 8 |     np.int16,
 9 |     np.int32,
10 |     np.int64,
11 |     np.uint16,
12 |     np.uint32,
13 |     np.uint64,
14 |     np.datetime64,
15 |     np.timedelta64,
16 | ]
17 | 
18 | supported_dtypes_x = _core_supported_dtypes
19 | supported_dtypes_y = _core_supported_dtypes + [np.float16, np.int8, np.uint8, np.bool_]
20 | 
21 | _core_rust_primitive_types = ["f32", "f64", "i16", "i32", "i64", "u16", "u32", "u64"]
22 | 
23 | rust_primitive_types_x = _core_rust_primitive_types
24 | rust_primitive_types_y = _core_rust_primitive_types + ["f16", "i8", "u8"]
25 | rust_primitive_types_y_nan = ["f16", "f32", "f64"]
26 | 


--------------------------------------------------------------------------------
/downsample_rs/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "downsample_rs"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | authors = ["Jeroen Van Der Donckt"]
 6 | description = "Downsample time series data"
 7 | license = "MIT"
 8 | 
 9 | [dependencies]
10 | # TODO: perhaps use polars?
11 | argminmax = { version = "0.6.1", features = ["half"] }
12 | half = { version = "2.3.1", default-features = false , features=["num-traits"], optional = true}
13 | num-traits = { version = "0.2.17", default-features = false }
14 | once_cell = "1"
15 | rayon = { version = "1.8.0", default-features = false }
16 | 
17 | [dev-dependencies]
18 | rstest = { version = "0.18.2", default-features = false }
19 | rstest_reuse = { version = "0.6", default-features = false }
20 | criterion = "0.5.1"
21 | dev_utils = { path = "dev_utils" }
22 | 
23 | [[bench]]
24 | name = "bench_m4"
25 | harness = false
26 | 
27 | [[bench]]
28 | name = "bench_minmax"
29 | harness = false
30 | 
31 | [[bench]]
32 | name = "bench_lttb"
33 | harness = false
34 | 
35 | [[bench]]
36 | name = "bench_minmaxlttb"
37 | harness = false
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Jeroen Van Der Donckt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/downsample_rs/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Jeroen Van Der Donckt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/downsample_rs/dev_utils/src/utils.rs:
--------------------------------------------------------------------------------
 1 | use std::ops::{Add, Sub};
 2 | 
 3 | use rand::{thread_rng, Rng};
 4 | use rand_distr::Uniform;
 5 | 
 6 | // random array that samples between min and max of T
 7 | pub fn get_random_array<T>(n: usize, min_value: T, max_value: T) -> Vec<T>
 8 | where
 9 |     T: Copy + rand::distributions::uniform::SampleUniform,
10 | {
11 |     let rng = thread_rng();
12 |     let uni = Uniform::new_inclusive(min_value, max_value);
13 |     let arr: Vec<T> = rng.sample_iter(uni).take(n).collect();
14 |     arr
15 | }
16 | 
17 | // worst case array that alternates between increasing max and decreasing min values
18 | pub fn get_worst_case_array<T>(n: usize, step: T) -> Vec<T>
19 | where
20 |     T: Copy + Default + Sub<Output = T> + Add<Output = T>,
21 | {
22 |     let mut arr: Vec<T> = Vec::with_capacity(n);
23 |     let mut min_value: T = Default::default();
24 |     let mut max_value: T = Default::default();
25 |     for i in 0..n {
26 |         if i % 2 == 0 {
27 |             arr.push(min_value);
28 |             min_value = min_value - step;
29 |         } else {
30 |             arr.push(max_value);
31 |             max_value = max_value + step;
32 |         }
33 |     }
34 |     arr
35 | }
36 | 


--------------------------------------------------------------------------------
/downsample_rs/src/lib.rs:
--------------------------------------------------------------------------------
 1 | // It is necessary to import this at the root of the crate
 2 | // See: https://github.com/la10736/rstest/tree/master/rstest_reuse#use-rstest_resuse-at-the-top-of-your-crate
 3 | #[cfg(test)]
 4 | use rstest_reuse;
 5 | 
 6 | pub mod minmax;
 7 | pub use minmax::*;
 8 | pub mod lttb;
 9 | pub use lttb::*;
10 | pub mod minmaxlttb;
11 | pub use minmaxlttb::*;
12 | pub mod m4;
13 | pub use m4::*;
14 | pub(crate) mod helpers;
15 | pub(crate) mod searchsorted;
16 | pub(crate) mod types;
17 | 
18 | use once_cell::sync::Lazy;
19 | use rayon::{ThreadPool, ThreadPoolBuilder};
20 | 
21 | // Inspired by: https://github.com/pola-rs/polars/blob/9a69062aa0beb2a1bc5d57294cac49961fc91058/crates/polars-core/src/lib.rs#L49
22 | pub static POOL: Lazy<ThreadPool> = Lazy::new(|| {
23 |     ThreadPoolBuilder::new()
24 |         .num_threads(
25 |             std::env::var("TSDOWNSAMPLE_MAX_THREADS")
26 |                 .map(|s| s.parse::<usize>().expect("integer"))
27 |                 .unwrap_or_else(|_| {
28 |                     std::thread::available_parallelism()
29 |                         .unwrap_or(std::num::NonZeroUsize::new(1).unwrap())
30 |                         .get()
31 |                 }),
32 |         )
33 |         .build()
34 |         .expect("could not spawn threads")
35 | });
36 | 


--------------------------------------------------------------------------------
/.github/workflows/codspeed.yml:
--------------------------------------------------------------------------------
 1 | name: CodSpeed Benchmarks
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   # `workflow_dispatch` allows CodSpeed to trigger backtest
 9 |   # performance analysis in order to generate initial data.
10 |   workflow_dispatch:
11 | 
12 | jobs:
13 |   Benchmarks:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |       - uses: actions/setup-python@v4
18 |         with:
19 |           python-version: '3.10'
20 | 
21 |       - name: Install Rust toolchain
22 |         uses: actions-rs/toolchain@v1
23 |         with:
24 |           profile: minimal
25 |           toolchain: nightly
26 |           components: clippy, rustfmt
27 |       - name: Setup Rust
28 |         run: |
29 |           rustup update nightly --no-self-update
30 |           rustup default nightly
31 |       - name: Cache rust
32 |         uses: Swatinem/rust-cache@v2
33 | 
34 |       - name: install develop version
35 |         run: make install
36 | 
37 |       - run: pip install -r tests/requirements.txt
38 |       - run: pip install pytest-codspeed
39 | 
40 |       - run: pip freeze
41 | 
42 |       # this is required so that pytest uses the installed package
43 |       # - run: rm tests/__init__.py
44 | 
45 |       - name: Run CodSpeed benchmarks
46 |         uses: CodSpeedHQ/action@v3
47 |         with:
48 |           run: pytest tests/benchmarks/ --codspeed
49 | 


--------------------------------------------------------------------------------
/downsample_rs/src/helpers.rs:
--------------------------------------------------------------------------------
 1 | use num_traits::AsPrimitive;
 2 | 
 3 | use crate::types::Num;
 4 | 
 5 | // ------------ AVERAGE
 6 | 
 7 | // TODO: future work -> this can be optimized by using SIMD instructions (similar to the argminmax crate)
 8 | // TODO: this implementation can overfow (but numpy does the same)
 9 | 
10 | // This trait implements the average function for all types that this crate
11 | // supports. It is used in the lttb algorithm.
12 | // We intend to use the same implementation for all types as is used in the
13 | // numpy (Python) library (- which uses add reduce):
14 | //  - f64 & f32: use the data type to calculate the average
15 | //  - f16: cast to f32 and calculate the average
16 | //  - signed & unsigned integers: cast to f64 and calculate the average
17 | // Note: the only difference with the numpy implementation is that this
18 | // implementation always returns a f64, while numpy returns f32 for f32 and f16
19 | // (however the calculation is done in f32 - only the result is casted to f64).
20 | // See more details: https://github.com/numpy/numpy/blob/8cec82012694571156e8d7696307c848a7603b4e/numpy/core/_methods.py#L164
21 | 
22 | pub trait Average {
23 |     fn average(&self) -> f64;
24 | }
25 | 
26 | impl<T> Average for [T]
27 | where
28 |     T: Num + AsPrimitive<f64>,
29 | {
30 |     fn average(&self) -> f64 {
31 |         self.iter().fold(0f64, |acc, &x| acc + x.as_()) as f64 / self.len() as f64
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .DEFAULT_GOAL := all
 2 | black = black tsdownsample tests
 3 | 
 4 | install:
 5 | 	pip install .
 6 | 
 7 | .PHONY: install-dev-requirements
 8 | install-dev-requirements:
 9 | 	pip install -r tests/requirements.txt
10 | 	pip install -r tests/requirements-linting.txt
11 | 
12 | .PHONY: format
13 | format:
14 | 	ruff format tsdownsample tests
15 | 	$(black)
16 | 	cargo fmt
17 | 
18 | .PHONY: lint-python
19 | lint-python:
20 | 	ruff check tsdownsample tests
21 | 	$(black) --check --diff
22 | 
23 | .PHONY: lint-rust
24 | lint-rust:
25 | 	cargo fmt --version
26 | 	cargo fmt --all -- --check
27 | 	cargo clippy --version
28 | 	cargo clippy -- -D warnings -A incomplete_features -W clippy::dbg_macro -W clippy::print_stdout -A clippy::empty_line_after_doc_comments
29 | 
30 | .PHONY: lint
31 | lint: lint-python lint-rust
32 | 
33 | .PHONY: mypy
34 | mypy:
35 | 	mypy tsdownsample
36 | 
37 | 
38 | .PHONY: test
39 | test:
40 | 	pytest --benchmark-skip --cov=tsdownsample --cov-report=term-missing --cov-report=html --cov-report=xml
41 | 
42 | .PHONY: bench
43 | bench:
44 | 	pytest --benchmark-only --benchmark-max-time=5
45 | 
46 | 
47 | .PHONY: all
48 | all: lint mypy test
49 | 
50 | .PHONY: clean
51 | clean:
52 | 	rm -rf `find . -name __pycache__`
53 | 	rm -f `find . -type f -name '*.py[co]' `
54 | 	rm -f `find . -type f -name '*~' `
55 | 	rm -f `find . -type f -name '.*~' `
56 | 	rm -f `find . -type f -name '*.cpython-*' `
57 | 	rm -rf dist
58 | 	rm -rf build
59 | 	rm -rf target
60 | 	rm -rf .cache
61 | 	rm -rf .pytest_cache
62 | 	rm -rf .mypy_cache
63 | 	rm -rf htmlcov
64 | 	rm -rf *.egg-info
65 | 	rm -rf .ruff*
66 | 	rm -f .coverage
67 | 	rm -f .coverage.*
68 | 	rm -rf build
69 | 	rm -f tsdownsample/*.so


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | name: "CodeQL"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |   pull_request:
 7 |     # The branches below must be a subset of the branches above
 8 |     branches: [ "main" ]
 9 |   schedule:
10 |     - cron: '00 00 * * 1'
11 | 
12 | jobs:
13 |   analyze:
14 |     name: Analyze
15 |     # Runner size impacts CodeQL analysis time. To learn more, please see:
16 |     #   - https://gh.io/recommended-hardware-resources-for-running-codeql
17 |     #   - https://gh.io/supported-runners-and-hardware-resources
18 |     #   - https://gh.io/using-larger-runners
19 |     # Consider using larger runners for possible analysis time improvements.
20 |     runs-on: 'ubuntu-latest'
21 |     timeout-minutes: 360
22 |     permissions:
23 |       actions: read
24 |       contents: read
25 |       security-events: write
26 | 
27 |     strategy:
28 |       fail-fast: false
29 |       matrix:
30 |         language: [ 'python' ]
31 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ]
32 |         # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
33 | 
34 |     steps:
35 |     - name: Checkout repository
36 |       uses: actions/checkout@v3
37 | 
38 |     - name: Initialize CodeQL
39 |       uses: github/codeql-action/init@v2
40 |       with:
41 |         languages: ${{ matrix.language }}
42 |         # If you wish to specify custom queries, you can do so here or in a config file.
43 |         # By default, queries listed here will override any specified in a config file.
44 |         # Prefix the list here with "+" to use these queries and those in the config file.
45 | 
46 |         # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
47 |         # queries: security-extended,security-and-quality
48 | 
49 |     - name: Perform CodeQL Analysis
50 |       uses: github/codeql-action/analyze@v2
51 |       with:
52 |         category: "/language:${{matrix.language}}"
53 | 


--------------------------------------------------------------------------------
/downsample_rs/benches/bench_lttb.rs:
--------------------------------------------------------------------------------
 1 | use downsample_rs::lttb as lttb_mod;
 2 | 
 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion};
 4 | use dev_utils::{config, utils};
 5 | 
 6 | fn lttb_f32_random_array_long(c: &mut Criterion) {
 7 |     let n = config::ARRAY_LENGTH_LONG;
 8 |     let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
 9 |     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
10 |     c.bench_function("lttb_scalx_f32", |b| {
11 |         b.iter(|| {
12 |             lttb_mod::lttb_with_x(
13 |                 black_box(x.as_slice()),
14 |                 black_box(y.as_slice()),
15 |                 black_box(2_000),
16 |             )
17 |         })
18 |     });
19 | }
20 | fn lttb_f32_random_array_50m(c: &mut Criterion) {
21 |     let n = 50_000_000;
22 |     let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
23 |     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
24 |     c.bench_function("lttb_scalx_50M_f32", |b| {
25 |         b.iter(|| {
26 |             lttb_mod::lttb_with_x(
27 |                 black_box(x.as_slice()),
28 |                 black_box(y.as_slice()),
29 |                 black_box(2_000),
30 |             )
31 |         })
32 |     });
33 | }
34 | 
35 | fn lttb_without_x_f32_random_array_long(c: &mut Criterion) {
36 |     let n = config::ARRAY_LENGTH_LONG;
37 |     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
38 |     c.bench_function("lttb_scal_f32", |b| {
39 |         b.iter(|| lttb_mod::lttb_without_x(black_box(y.as_slice()), black_box(2_000)))
40 |     });
41 | }
42 | fn lttb_without_x_f32_random_array_50m(c: &mut Criterion) {
43 |     let n = 50_000_000;
44 |     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
45 |     c.bench_function("lttb_scal_50M_f32", |b| {
46 |         b.iter(|| lttb_mod::lttb_without_x(black_box(y.as_slice()), black_box(2_000)))
47 |     });
48 | }
49 | 
50 | criterion_group!(
51 |     benches,
52 |     // lttb_f32_random_array_long,
53 |     lttb_f32_random_array_50m,
54 |     // lttb_without_x_f32_random_array_long,
55 |     lttb_without_x_f32_random_array_50m,
56 | );
57 | criterion_main!(benches);
58 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["maturin>=1.1,<2.0"]
 3 | build-backend = "maturin"
 4 | 
 5 | [project]
 6 | name = "tsdownsample"
 7 | description = "Time series downsampling in rust"
 8 | version = "0.1.4.1"
 9 | requires-python = ">=3.8"
10 | dependencies = ["numpy"]
11 | authors = [{name = "Jeroen Van Der Donckt"}]
12 | readme = "README.md"
13 | license = {text = "MIT"}
14 | keywords = ["time series", "downsampling", "rust", "data science", "visualization"]
15 | classifiers = [
16 |     'Intended Audience :: Developers',
17 |     'License :: OSI Approved :: MIT License',
18 |     'Programming Language :: Python :: 3',
19 |     'Programming Language :: Python :: 3.8',
20 |     'Programming Language :: Python :: 3.9',
21 |     'Programming Language :: Python :: 3.10',
22 |     'Programming Language :: Python :: 3.11',
23 |     'Programming Language :: Python :: 3.12',
24 |     'Programming Language :: Python :: 3.13',
25 |     'Programming Language :: Python :: 3.14',
26 |     'Operating System :: POSIX',
27 |     'Operating System :: MacOS :: MacOS X',
28 |     'Operating System :: Microsoft :: Windows'
29 | ]
30 | 
31 | [project.urls]
32 | Homepage = "https://github.com/predict-idlab/tsdownsample"
33 | Repository = "https://github.com/predict-idlab/tsdownsample"
34 | 
35 | # Build Python bindings for rust
36 | [tool.maturin]
37 | bindings = "pyo3"
38 | module-name = "tsdownsample._rust._tsdownsample_rs" # The path to place the compiled Rust module
39 | # See: https://www.maturin.rs/project_layout.html#import-rust-as-a-submodule-of-your-project
40 | 
41 | # Linting
42 | [tool.ruff]
43 | line-length = 88
44 | 
45 | [tool.ruff.lint]
46 | select = ["E", "F", "I"]
47 | extend-select = ["Q"]
48 | ignore = ["E402", "F403"]
49 | 
50 | # Formatting
51 | [tool.black]
52 | color = true
53 | line-length = 88
54 | skip-string-normalization = true
55 | skip-magic-trailing-comma = true
56 | 
57 | # Static typing
58 | [tool.mypy]
59 | follow_imports = "normal"
60 | strict_optional = true
61 | warn_redundant_casts = true
62 | warn_unused_ignores = true
63 | check_untyped_defs = true
64 | no_implicit_reexport = true
65 | disallow_untyped_defs = false
66 | disallow_any_generics = false
67 | ignore_missing_imports = true
68 | 


--------------------------------------------------------------------------------
/tests/test_rust_mods.py:
--------------------------------------------------------------------------------
 1 | import tsdownsample._rust._tsdownsample_rs as tsds_rs
 2 | from test_config import (
 3 |     rust_primitive_types_x,
 4 |     rust_primitive_types_y,
 5 |     rust_primitive_types_y_nan,
 6 | )
 7 | 
 8 | 
 9 | def _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl: bool):
10 |     # Without x
11 |     for sub_mod in sub_mods:
12 |         assert hasattr(mod, sub_mod)
13 |         m = getattr(mod, sub_mod)
14 |         for ty in rust_primitive_types_y:
15 |             assert hasattr(m, f"downsample_{ty}")
16 |     # With x
17 |     if not has_x_impl:
18 |         return
19 |     for sub_mod in sub_mods:
20 |         assert hasattr(mod, sub_mod)
21 |         m = getattr(mod, sub_mod)
22 |         for tx in rust_primitive_types_x:
23 |             for ty in rust_primitive_types_y:
24 |                 assert hasattr(m, f"downsample_{tx}_{ty}")
25 | 
26 | 
27 | def _test_rust_nan_mod_correctly_build(mod, sub_mods, has_x_impl: bool):
28 |     # without x
29 |     for sub_mod in sub_mods:
30 |         assert hasattr(mod, sub_mod)
31 |         m = getattr(mod, sub_mod)
32 |         for ty in rust_primitive_types_y_nan:
33 |             assert hasattr(m, f"downsample_nan_{ty}")
34 | 
35 |     # with x
36 |     if not has_x_impl:
37 |         return
38 |     for sub_mod in sub_mods:
39 |         assert hasattr(mod, sub_mod)
40 |         m = getattr(mod, sub_mod)
41 |         for tx in rust_primitive_types_x:
42 |             for ty in rust_primitive_types_y_nan:
43 |                 assert hasattr(m, f"downsample_{tx}_{ty}")
44 | 
45 | 
46 | def test_minmax_rust_mod_correctly_build():
47 |     mod = tsds_rs.minmax
48 |     sub_mods = ["sequential", "parallel"]
49 |     _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=True)
50 |     _test_rust_nan_mod_correctly_build(mod, sub_mods, has_x_impl=True)
51 | 
52 | 
53 | def test_m4_rust_mod_correctly_build():
54 |     mod = tsds_rs.m4
55 |     sub_mods = ["sequential", "parallel"]
56 |     _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=True)
57 |     _test_rust_nan_mod_correctly_build(mod, sub_mods, has_x_impl=True)
58 | 
59 | 
60 | def test_lttb_rust_mod_correctly_build():
61 |     mod = tsds_rs.lttb
62 |     sub_mods = ["sequential"]
63 |     _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=True)
64 | 
65 | 
66 | def test_minmaxlttb_rust_mod_correctly_build():
67 |     mod = tsds_rs.minmaxlttb
68 |     sub_mods = ["sequential", "parallel"]
69 |     _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=True)
70 |     _test_rust_nan_mod_correctly_build(mod, sub_mods, has_x_impl=True)
71 | 


--------------------------------------------------------------------------------
/tests/test_algos_python_compliance.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from tsdownsample import (
 5 |     LTTBDownsampler,
 6 |     M4Downsampler,
 7 |     MinMaxDownsampler,
 8 |     NaNM4Downsampler,
 9 |     NaNMinMaxDownsampler,
10 | )
11 | from tsdownsample._python.downsamplers import (
12 |     LTTB_py,
13 |     M4_py,
14 |     MinMax_py,
15 |     NaNM4_py,
16 |     NaNMinMax_py,
17 | )
18 | 
19 | 
20 | @pytest.mark.parametrize(
21 |     "rust_python_pair",
22 |     [
23 |         (MinMaxDownsampler(), MinMax_py()),
24 |         (M4Downsampler(), M4_py()),
25 |         (LTTBDownsampler(), LTTB_py()),
26 |         # Include NaN downsamplers
27 |         (NaNMinMaxDownsampler(), NaNMinMax_py()),
28 |         (NaNM4Downsampler(), NaNM4_py()),
29 |     ],
30 | )
31 | @pytest.mark.parametrize("n", [10_000, 10_032, 20_321, 23_489])
32 | @pytest.mark.parametrize("n_out", [100, 200, 252])
33 | def test_resampler_accordance(rust_python_pair, n, n_out):
34 |     rust_downsampler, python_downsampler = rust_python_pair
35 |     x = np.arange(n)
36 |     y = np.random.randn(n)
37 |     # Without x passed to the rust downsampler
38 |     assert np.allclose(
39 |         rust_downsampler.downsample(y, n_out=n_out),
40 |         python_downsampler.downsample(x, y, n_out=n_out),
41 |     )
42 |     # With x passed to the rust downsampler
43 |     assert np.allclose(
44 |         rust_downsampler.downsample(x, y, n_out=n_out),
45 |         python_downsampler.downsample(x, y, n_out=n_out),
46 |     )
47 | 
48 | 
49 | @pytest.mark.parametrize(
50 |     "rust_python_pair",
51 |     [(NaNMinMaxDownsampler(), NaNMinMax_py()), (NaNM4Downsampler(), NaNM4_py())],
52 | )
53 | @pytest.mark.parametrize("n", [10_000, 10_032, 20_321, 23_489])
54 | @pytest.mark.parametrize("n_random_nans", [100, 200, 500, 2000, 5000])
55 | @pytest.mark.parametrize("n_out", [100, 200, 252])
56 | def test_nan_resampler_accordance(rust_python_pair, n, n_random_nans, n_out):
57 |     rust_downsampler, python_downsampler = rust_python_pair
58 |     x = np.arange(n)
59 |     y = np.random.randn(n)
60 |     y[np.random.choice(y.size, n_random_nans, replace=False)] = np.nan
61 |     # Without x passed to the rust downsampler
62 |     rust_result = rust_downsampler.downsample(y, n_out=n_out)
63 |     python_result = python_downsampler.downsample(x, y, n_out=n_out)
64 |     assert np.allclose(rust_result, python_result)
65 |     # With x passed to the rust downsampler
66 |     assert np.allclose(
67 |         rust_downsampler.downsample(x, y, n_out=n_out),
68 |         python_downsampler.downsample(x, y, n_out=n_out),
69 |     )
70 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .ruff*
  2 | .vscode/*
  3 | venv/
  4 | TODO.md
  5 | main.rs
  6 | 
  7 | ### ----- rust gitignore
  8 | 
  9 | # Generated by Cargo
 10 | # will have compiled files and executables
 11 | debug/
 12 | target/
 13 | 
 14 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
 15 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
 16 | Cargo.lock
 17 | 
 18 | # These are backup files generated by rustfmt
 19 | **/*.rs.bk
 20 | 
 21 | # MSVC Windows builds of rustc generate these, which store debugging information
 22 | *.pdb
 23 | 
 24 | 
 25 | ### ----- Python gitignore
 26 | 
 27 | # Byte-compiled / optimized / DLL files
 28 | __pycache__/
 29 | *.py[cod]
 30 | *$py.class
 31 | 
 32 | # C extensions
 33 | *.so
 34 | 
 35 | # Distribution / packaging
 36 | .Python
 37 | build/
 38 | develop-eggs/
 39 | dist/
 40 | downloads/
 41 | eggs/
 42 | .eggs/
 43 | lib/
 44 | lib64/
 45 | parts/
 46 | sdist/
 47 | var/
 48 | wheels/
 49 | pip-wheel-metadata/
 50 | share/python-wheels/
 51 | *.egg-info/
 52 | .installed.cfg
 53 | *.egg
 54 | MANIFEST
 55 | 
 56 | # PyInstaller
 57 | #  Usually these files are written by a python script from a template
 58 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 59 | *.manifest
 60 | *.spec
 61 | 
 62 | # Installer logs
 63 | pip-log.txt
 64 | pip-delete-this-directory.txt
 65 | 
 66 | # Unit test / coverage reports
 67 | htmlcov/
 68 | .tox/
 69 | .nox/
 70 | .coverage
 71 | .coverage.*
 72 | .cache
 73 | nosetests.xml
 74 | coverage.xml
 75 | *.cover
 76 | *.py,cover
 77 | .hypothesis/
 78 | .pytest_cache/
 79 | 
 80 | # Translations
 81 | *.mo
 82 | *.pot
 83 | 
 84 | # Django stuff:
 85 | *.log
 86 | local_settings.py
 87 | db.sqlite3
 88 | db.sqlite3-journal
 89 | 
 90 | # Flask stuff:
 91 | instance/
 92 | .webassets-cache
 93 | 
 94 | # Scrapy stuff:
 95 | .scrapy
 96 | 
 97 | # Sphinx documentation
 98 | docs/_build/
 99 | 
100 | # PyBuilder
101 | target/
102 | 
103 | # Jupyter Notebook
104 | .ipynb_checkpoints
105 | 
106 | # IPython
107 | profile_default/
108 | ipython_config.py
109 | 
110 | # pyenv
111 | .python-version
112 | 
113 | # pipenv
114 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
115 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
116 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
117 | #   install all needed dependencies.
118 | #Pipfile.lock
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | 
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 | 
143 | # Rope project settings
144 | .ropeproject
145 | 
146 | # mkdocs documentation
147 | /site
148 | 
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 | 
154 | # Pyre type checker
155 | .pyre/
156 | 


--------------------------------------------------------------------------------
/downsample_rs/benches/bench_m4.rs:
--------------------------------------------------------------------------------
 1 | use downsample_rs::m4 as m4_mod;
 2 | 
 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion};
 4 | use dev_utils::{config, utils};
 5 | 
 6 | fn m4_f32_random_array_long_single_core(c: &mut Criterion) {
 7 |     let n = config::ARRAY_LENGTH_LONG;
 8 |     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
 9 |     c.bench_function("m4_f32", |b| {
10 |         b.iter(|| m4_mod::m4_without_x(black_box(data.as_slice()), black_box(2_000)))
11 |     });
12 | }
13 | 
14 | fn m4_f32_random_array_long_multi_core(c: &mut Criterion) {
15 |     let n = config::ARRAY_LENGTH_LONG;
16 |     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
17 |     c.bench_function("m4_p_f32", |b| {
18 |         b.iter(|| m4_mod::m4_without_x_parallel(black_box(data.as_slice()), black_box(2_000)))
19 |     });
20 | }
21 | 
22 | fn m4_f32_random_array_50M_single_core(c: &mut Criterion) {
23 |     let n = 50_000_000;
24 |     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
25 |     let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
26 |     c.bench_function("m4_50M_f32", |b| {
27 |         b.iter(|| m4_mod::m4_without_x(black_box(data.as_slice()), black_box(2_000)))
28 |     });
29 |     c.bench_function("m4_x_50M_f32", |b| {
30 |         b.iter(|| {
31 |             m4_mod::m4_with_x(
32 |                 black_box(x.as_slice()),
33 |                 black_box(data.as_slice()),
34 |                 black_box(2_000),
35 |             )
36 |         })
37 |     });
38 | }
39 | 
40 | fn m4_f32_random_array_50M_multi_core(c: &mut Criterion) {
41 |     let n = 50_000_000;
42 |     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
43 |     let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
44 |     c.bench_function("m4_p_50M_f32", |b| {
45 |         b.iter(|| m4_mod::m4_without_x_parallel(black_box(data.as_slice()), black_box(2_000)))
46 |     });
47 |     c.bench_function("m4_x_p_50M_f32", |b| {
48 |         b.iter(|| {
49 |             m4_mod::m4_with_x_parallel(
50 |                 black_box(x.as_slice()),
51 |                 black_box(data.as_slice()),
52 |                 black_box(2_000),
53 |             )
54 |         })
55 |     });
56 | }
57 | 
58 | // fn m4_f32_worst_case_array_long(c: &mut Criterion) {
59 | //     let n = config::ARRAY_LENGTH_LONG;
60 | //     let data = utils::get_worst_case_array::<f32>(n, 1.0);
61 | //     c.bench_function("overlap_worst_long_f32", |b| {
62 | //         b.iter(|| minmax_mod::min_max_overlap(black_box(data.as_slice()), black_box(2_000)))
63 | //     });
64 | //     c.bench_function("simple_worst_long_f32", |b| {
65 | //         b.iter(|| minmax_mod::min_max(black_box(data.as_slice()), black_box(2_000)))
66 | //     });
67 | //     c.bench_function("simd_worst_long_f32", |b| {
68 | //         b.iter(|| minmax_mod::min_max_simd_f32(black_box(data.as_slice()), black_box(2_000)))
69 | //     });
70 | // }
71 | 
72 | criterion_group!(
73 |     benches,
74 |     // m4_f32_random_array_long_single_core,
75 |     // m4_f32_random_array_long_multi_core,
76 |     m4_f32_random_array_50M_single_core,
77 |     m4_f32_random_array_50M_multi_core,
78 |     // m4_f32_worst_case_array_long,
79 | );
80 | criterion_main!(benches);
81 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing to tsdownsample
  2 | 
  3 | Welcome! We're happy to have you here. Thank you in advance for your contribution to tsdownsample.
  4 | 
  5 | ## The basics
  6 | 
  7 | tsdownsample welcomes contributions in the form of Pull Requests. For small changes (e.g., bug fixes), feel free to submit a PR. For larger changes (e.g., new functionality, major refactoring), consider submitting an [Issue](https://github.com/predict-idlab/tsdownsample/issues) outlining your proposed change.
  8 | 
  9 | ### Prerequisites
 10 | 
 11 | tsdownsample is written in Rust. You'll need to install the [Rust toolchain](https://www.rust-lang.org/tools/install) for development.
 12 | 
 13 | This project uses the nightly version of Rust. You can install it with:
 14 | 
 15 | ```bash
 16 | rustup install nightly
 17 | ```
 18 | 
 19 | and then set it as the default toolchain with:
 20 | 
 21 | ```bash
 22 | rustup default nightly
 23 | ```
 24 | 
 25 | ### Installing (locally)
 26 | 
 27 | To install the package locally, run the following command in the root directory of the project:
 28 | 
 29 | ```bash
 30 | make install
 31 | ```
 32 | 
 33 | ### tsdownsample
 34 | 
 35 | The structure of the tsdownsample project is as follows:
 36 | 
 37 | ```bash
 38 | tsdownsample
 39 | ├── Cargo.toml
 40 | ├── README.md
 41 | ├── src
 42 | │   ├── lib.rs     # Python bindings for Rust library
 43 | ├── tsdownsample   # The Python package
 44 | ├── downsample_rs  # Rust library containing the actual implementation
 45 | ├── tests          # Tests for the Python package
 46 | ```
 47 | 
 48 | The Rust library is located in the `downsample_rs` directory. The Python package is located in the `tsdownsample` directory. The `src/lib.rs` file contains the Python bindings for the Rust library.
 49 | 
 50 | Under the hood most downsampling algorithms heavily rely on the [argminmax](https://github.com/jvdd/argminmax) - a SIMD accelerated library for finding the index of the minimum and maximum values in an array. If you want to improve the performance of the library, you could also take a look at the `argminmax` library.
 51 | 
 52 | ### Testing
 53 | 
 54 | Changes to the downsample_rs library can be tested with:
 55 | 
 56 | ```bash
 57 | cd downsample_rs
 58 | cargo test
 59 | ```
 60 | 
 61 | Changes to the Python package can be tested using the [`Makefile`](Makefile) in the root directory of the project:
 62 | 
 63 | *Make sure you have the test dependencies installed:*
 64 | 
 65 | ```bash
 66 | pip install -r test/requirements.txt          # Install test dependencies
 67 | pip install -r test/requirements-linting.txt  # Install linting dependencies
 68 | ```
 69 | 
 70 | To run the tests:
 71 | ```bash
 72 | make test
 73 | ```
 74 | 
 75 | To run the tests and linting:
 76 | ```bash
 77 | make lint
 78 | ```
 79 | 
 80 | ### Formatting
 81 | 
 82 | We use [black](https://github.com/psf/black) and [isort](https://github.com/PyCQA/isort) to format the Python code.
 83 | 
 84 | To format the code, run the following command (more details in the [Makefile](Makefile)):
 85 | ```sh
 86 | make format
 87 | ```
 88 | 
 89 | *(make sure you have the test linting dependencies installed)*
 90 | 
 91 | To format the Rust code, run the following command:
 92 | ```sh
 93 | cargo fmt
 94 | ```
 95 | 
 96 | ---
 97 | 
 98 | ## Improving the performance
 99 | 
100 | When a PR is submitted that improves the performance of the library, we would highly appreciate if the PR also includes a (verifiable) benchmark that shows the improvement.


--------------------------------------------------------------------------------
/downsample_rs/benches/bench_minmax.rs:
--------------------------------------------------------------------------------
 1 | use downsample_rs::minmax as minmax_mod;
 2 | 
 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion};
 4 | use dev_utils::{config, utils};
 5 | 
 6 | fn minmax_f32_random_array_long_single_core(c: &mut Criterion) {
 7 |     let n = config::ARRAY_LENGTH_LONG;
 8 |     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
 9 |     c.bench_function("minmax_f32", |b| {
10 |         b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(2_000)))
11 |     });
12 | }
13 | 
14 | fn minmax_f32_random_array_long_multi_core(c: &mut Criterion) {
15 |     let n = config::ARRAY_LENGTH_LONG;
16 |     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
17 |     c.bench_function("minmax_p_f32", |b| {
18 |         b.iter(|| {
19 |             minmax_mod::min_max_without_x_parallel(black_box(data.as_slice()), black_box(2_000))
20 |         })
21 |     });
22 | }
23 | 
24 | fn minmax_f32_random_array_50M_single_core(c: &mut Criterion) {
25 |     let n = 50_000_000;
26 |     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
27 |     let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
28 |     c.bench_function("minmax_50M_f32", |b| {
29 |         b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(2_000)))
30 |     });
31 |     c.bench_function("minmax_x_50M_f32", |b| {
32 |         b.iter(|| {
33 |             minmax_mod::min_max_with_x(
34 |                 black_box(x.as_slice()),
35 |                 black_box(data.as_slice()),
36 |                 black_box(2_000),
37 |             )
38 |         })
39 |     });
40 | 
41 |     // c.bench_function("minmax_50M_f32", |b| {
42 |     //     b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(60_000)))
43 |     // });
44 |     // c.bench_function("minmax_x_50M_f32", |b| {
45 |     //     b.iter(|| minmax_mod::min_max_with_x(black_box(x.as_slice()), black_box(data.as_slice()), black_box(60_000)))
46 |     // });
47 | }
48 | 
49 | fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) {
50 |     let n = 50_000_000;
51 |     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
52 |     let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
53 |     c.bench_function("minmax_p_50M_f32", |b| {
54 |         b.iter(|| {
55 |             minmax_mod::min_max_without_x_parallel(black_box(data.as_slice()), black_box(2_000))
56 |         })
57 |     });
58 |     c.bench_function("minmax_x_p_50M_f32", |b| {
59 |         b.iter(|| {
60 |             minmax_mod::min_max_with_x_parallel(
61 |                 black_box(x.as_slice()),
62 |                 black_box(data.as_slice()),
63 |                 black_box(2_000),
64 |             )
65 |         })
66 |     });
67 | 
68 |     // c.bench_function("minmax_p_50M_f32", |b| {
69 |     //     b.iter(|| minmax_mod::min_max_without_x_parallel(black_box(data.as_slice()), black_box(60_000)))
70 |     // });
71 |     // c.bench_function("minmax_x_p_50M_f32", |b| {
72 |     //     b.iter(|| minmax_mod::min_max_with_x_parallel(black_box(x.as_slice()), black_box(data.as_slice()), black_box(60_000)))
73 |     // });
74 | }
75 | 
76 | // fn minmax_f32_worst_case_array_long(c: &mut Criterion) {
77 | //     let n = config::ARRAY_LENGTH_LONG;
78 | //     let data = utils::get_worst_case_array::<f32>(n, 1.0);
79 | //     c.bench_function("overlap_worst_long_f32", |b| {
80 | //         b.iter(|| minmax_mod::min_max_overlap(black_box(data.as_slice()), black_box(2_000)))
81 | //     });
82 | //     c.bench_function("simple_worst_long_f32", |b| {
83 | //         b.iter(|| minmax_mod::min_max(black_box(data.as_slice()), black_box(2_000)))
84 | //     });
85 | //     c.bench_function("simd_worst_long_f32", |b| {
86 | //         b.iter(|| minmax_mod::min_max_simd_f32(black_box(data.as_slice()), black_box(2_000)))
87 | //     });
88 | // }
89 | 
90 | criterion_group!(
91 |     benches,
92 |     // minmax_f32_random_array_long_single_core,
93 |     // minmax_f32_random_array_long_multi_core,
94 |     minmax_f32_random_array_50M_single_core,
95 |     minmax_f32_random_array_50M_long_multi_core,
96 |     // minmax_f32_worst_case_array_long,
97 | );
98 | criterion_main!(benches);
99 | 


--------------------------------------------------------------------------------
/downsample_rs/benches/bench_minmaxlttb.rs:
--------------------------------------------------------------------------------
  1 | use downsample_rs::minmaxlttb as minmaxlttb_mod;
  2 | 
  3 | use criterion::{black_box, criterion_group, criterion_main, Criterion};
  4 | use dev_utils::{config, utils};
  5 | 
  6 | const MINMAX_RATIO: usize = 30;
  7 | 
  8 | fn minmaxlttb_f32_random_array_long_single_core(c: &mut Criterion) {
  9 |     let n = config::ARRAY_LENGTH_LONG;
 10 |     let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
 11 |     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
 12 |     c.bench_function("mlttb_x_f32", |b| {
 13 |         b.iter(|| {
 14 |             minmaxlttb_mod::minmaxlttb_with_x(
 15 |                 black_box(x.as_slice()),
 16 |                 black_box(y.as_slice()),
 17 |                 black_box(2_000),
 18 |                 black_box(MINMAX_RATIO),
 19 |             )
 20 |         })
 21 |     });
 22 | }
 23 | 
 24 | fn minmaxlttb_f32_random_array_long_multi_core(c: &mut Criterion) {
 25 |     let n = config::ARRAY_LENGTH_LONG;
 26 |     let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
 27 |     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
 28 |     c.bench_function("mlttb_x_p_f32", |b| {
 29 |         b.iter(|| {
 30 |             minmaxlttb_mod::minmaxlttb_with_x_parallel(
 31 |                 black_box(x.as_slice()),
 32 |                 black_box(y.as_slice()),
 33 |                 black_box(2_000),
 34 |                 black_box(MINMAX_RATIO),
 35 |             )
 36 |         })
 37 |     });
 38 | }
 39 | 
 40 | fn minmaxlttb_f32_random_array_50M_single_core(c: &mut Criterion) {
 41 |     let n = 50_000_000;
 42 |     let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
 43 |     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
 44 |     c.bench_function("mlttb_x_50M_f32", |b| {
 45 |         b.iter(|| {
 46 |             minmaxlttb_mod::minmaxlttb_with_x(
 47 |                 black_box(x.as_slice()),
 48 |                 black_box(y.as_slice()),
 49 |                 black_box(2_000),
 50 |                 black_box(MINMAX_RATIO),
 51 |             )
 52 |         })
 53 |     });
 54 | }
 55 | 
 56 | fn minmaxlttb_f32_random_array_50M_multi_core(c: &mut Criterion) {
 57 |     let n = 50_000_000;
 58 |     let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
 59 |     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
 60 |     c.bench_function("mlttb_x_p_50M_f32", |b| {
 61 |         b.iter(|| {
 62 |             minmaxlttb_mod::minmaxlttb_with_x_parallel(
 63 |                 black_box(x.as_slice()),
 64 |                 black_box(y.as_slice()),
 65 |                 black_box(2_000),
 66 |                 black_box(MINMAX_RATIO),
 67 |             )
 68 |         })
 69 |     });
 70 | }
 71 | 
 72 | fn minmaxlttb_without_x_f32_random_array_50M_single_core(c: &mut Criterion) {
 73 |     let n = 50_000_000;
 74 |     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
 75 |     c.bench_function("mlttb_50M_f32", |b| {
 76 |         b.iter(|| {
 77 |             minmaxlttb_mod::minmaxlttb_without_x(
 78 |                 black_box(y.as_slice()),
 79 |                 black_box(2_000),
 80 |                 black_box(MINMAX_RATIO),
 81 |             )
 82 |         })
 83 |     });
 84 | }
 85 | 
 86 | fn minmaxlttb_without_x_f32_random_array_50M_multi_core(c: &mut Criterion) {
 87 |     let n = 50_000_000;
 88 |     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
 89 |     c.bench_function("mlttb_p_50M_f32", |b| {
 90 |         b.iter(|| {
 91 |             minmaxlttb_mod::minmaxlttb_without_x_parallel(
 92 |                 black_box(y.as_slice()),
 93 |                 black_box(2_000),
 94 |                 black_box(MINMAX_RATIO),
 95 |             )
 96 |         })
 97 |     });
 98 | }
 99 | 
100 | criterion_group!(
101 |     benches,
102 |     // minmaxlttb_f32_random_array_long_single_core,
103 |     // minmaxlttb_f32_random_array_long_multi_core,
104 |     minmaxlttb_f32_random_array_50M_single_core,
105 |     minmaxlttb_f32_random_array_50M_multi_core,
106 |     minmaxlttb_without_x_f32_random_array_50M_single_core,
107 |     minmaxlttb_without_x_f32_random_array_50M_multi_core,
108 |     // minmaxlttb_f32_random_array_100m
109 | );
110 | criterion_main!(benches);
111 | 


--------------------------------------------------------------------------------
/.github/workflows/ci-downsample_rs.yml:
--------------------------------------------------------------------------------
  1 | name: CI downsample_rs
  2 | 
  3 | on:
  4 |   pull_request:
  5 |   push:
  6 |     branches:
  7 |       - main
  8 |     paths: 
  9 |       - "downsample_rs/**"
 10 |       - "!downsample_rs/LICENSE"
 11 |       - "!downsample_rs/README.md"
 12 | 
 13 | defaults:
 14 |   run:
 15 |     shell: bash
 16 |     working-directory: downsample_rs
 17 | 
 18 | jobs:
 19 |   Check:
 20 |     runs-on: ubuntu-latest
 21 |     steps:
 22 |       - name: Checkout
 23 |         uses: actions/checkout@v2
 24 | 
 25 |       - name: Install Rust toolchain
 26 |         uses: actions-rs/toolchain@v1
 27 |         with:
 28 |           profile: minimal
 29 |           toolchain: nightly
 30 |           components: clippy, rustfmt
 31 |       - name: Setup Rust
 32 |         run: |
 33 |           rustup update nightly --no-self-update
 34 |           rustup default nightly
 35 | 
 36 |       - name: Rust toolchain info
 37 |         run: |
 38 |           cargo --version --verbose
 39 |           rustc --version
 40 |           cargo clippy --version
 41 |           cargo fmt --version
 42 | 
 43 |       - name: check no optional features
 44 |         run: cargo check --verbose
 45 |       - name: check with all features
 46 |         run: cargo check --verbose --all-features
 47 |       - name: formatting check
 48 |         run: cargo fmt --all -- --check
 49 |       # - name: check with clippy
 50 |       #   run: cargo clippy --all --all-targets --all-features -- -D warnings
 51 | 
 52 |   Test:
 53 |     runs-on: ${{ matrix.os }}
 54 |     strategy:
 55 |       fail-fast: false
 56 |       matrix:
 57 |         os: ["windows-latest", "macOS-latest", "ubuntu-latest"]
 58 |         rust: ["nightly"] # ['stable', 'beta']
 59 | 
 60 |     steps:
 61 |       - name: Checkout
 62 |         uses: actions/checkout@v2
 63 | 
 64 |       - name: Install Rust toolchain
 65 |         uses: actions-rs/toolchain@v1
 66 |         with:
 67 |           profile: minimal
 68 |           toolchain: ${{ matrix.rust }}
 69 |       - name: Setup Rust
 70 |         run: |
 71 |           rustup update nightly --no-self-update
 72 |           rustup default nightly
 73 | 
 74 |       - name: Cache Dependencies
 75 |         uses: Swatinem/rust-cache@v1
 76 | 
 77 |       - name: Run tests (debug)
 78 |         run: cargo test --verbose --all-features
 79 |       - name: Run tests (release)
 80 |         run: cargo test --verbose --all-features --release
 81 | 
 82 |   Bench:
 83 |     runs-on: ${{ matrix.os }}
 84 |     strategy:
 85 |       fail-fast: false
 86 |       matrix:
 87 |         os: ["ubuntu-latest"] # ['windows-latest', 'macOS-latest']
 88 |         rust: ["nightly"] # ['stable', 'beta']
 89 | 
 90 |     steps:
 91 |       - name: Checkout
 92 |         uses: actions/checkout@v2
 93 | 
 94 |       - name: Install Rust toolchain
 95 |         uses: actions-rs/toolchain@v1
 96 |         with:
 97 |           profile: minimal
 98 |           toolchain: ${{ matrix.rust }}
 99 |       - name: Setup Rust
100 |         run: |
101 |           rustup update nightly --no-self-update
102 |           rustup default nightly
103 | 
104 |       - name: Cache Dependencies
105 |         uses: Swatinem/rust-cache@v1
106 | 
107 |       - name: Run benchmarks
108 |         run: cargo bench --quiet --message-format=short  --all-features | grep "time:"
109 | 
110 |   Build:
111 |     runs-on: ubuntu-latest
112 |     strategy:
113 |       fail-fast: false
114 |       matrix:
115 |         target:
116 |           # We shouldn't really have any OS-specific code, so think of this as a list of architectures
117 |           - x86_64-unknown-linux-gnu
118 |           - i686-unknown-linux-gnu
119 |           - i586-unknown-linux-gnu
120 |           - aarch64-unknown-linux-gnu
121 |           - armv7-unknown-linux-gnueabihf
122 |           # MIPS is currently not supported anymore on nightly chains.
123 |           # more information:
124 |           #   - https://github.com/rust-lang/compiler-team/issues/648
125 |           #   - https://github.com/rust-lang/rust/pull/113274
126 |           # - mips-unknown-linux-gnu
127 |           # - mips64-unknown-linux-gnuabi64
128 |           - powerpc-unknown-linux-gnu
129 |           - powerpc64-unknown-linux-gnu
130 |           - riscv64gc-unknown-linux-gnu
131 |           - s390x-unknown-linux-gnu
132 |           - sparc64-unknown-linux-gnu
133 |           - wasm32-unknown-unknown
134 | 
135 |     steps:
136 |       - uses: actions/checkout@v2
137 |       - name: Setup Rust
138 |         run: |
139 |           rustup update nightly --no-self-update
140 |           rustup default nightly
141 |           rustup target add ${{ matrix.target }}
142 |           # rustup component add clippy
143 |       # - name: Run Clippy
144 |       #   run: cargo clippy --all-targets --target ${{ matrix.target }}
145 |       - name: Build (release)
146 |         run: cargo build --target ${{ matrix.target }} --release --all-features
147 | 
148 |       # - name: Run cargo-tarpaulin
149 |       #   uses: actions-rs/tarpaulin@v0.1
150 |       #   with:
151 |       #     args: '--features half -- --test-threads 1'
152 | 
153 |       # - name: Upload to codecov.io
154 |       #   uses: codecov/codecov-action@v3
155 | #
156 | #
157 | # largely inspired by: https://github.com/rust-lang/portable-simd/blob/master/.github/workflows/ci.yml
158 | 


--------------------------------------------------------------------------------
/tsdownsample/downsamplers.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from typing import Union
  3 | 
  4 | import numpy as np
  5 | 
  6 | # ------------------ Rust Downsamplers ------------------
  7 | from tsdownsample._rust import _tsdownsample_rs  # type: ignore[attr-defined]
  8 | 
  9 | from .downsampling_interface import (
 10 |     AbstractDownsampler,
 11 |     AbstractRustDownsampler,
 12 |     AbstractRustNaNDownsampler,
 13 | )
 14 | 
 15 | 
 16 | class MinMaxDownsampler(AbstractRustDownsampler):
 17 |     """Downsampler that uses the MinMax algorithm. If the y data contains NaNs, these
 18 |     ignored (i.e. the NaNs are not taken into account when selecting data points).
 19 | 
 20 |     For each bin, the indices of the minimum and maximum values are selected.
 21 |     """
 22 | 
 23 |     @property
 24 |     def rust_mod(self):
 25 |         return _tsdownsample_rs.minmax
 26 | 
 27 |     @staticmethod
 28 |     def _check_valid_n_out(n_out: int):
 29 |         AbstractRustDownsampler._check_valid_n_out(n_out)
 30 |         if n_out % 2 != 0:
 31 |             raise ValueError("n_out must be even")
 32 | 
 33 | 
 34 | class NaNMinMaxDownsampler(AbstractRustNaNDownsampler):
 35 |     """Downsampler that uses the MinMax algorithm. If the y data contains NaNs, the
 36 |     indices of these NaNs are returned.
 37 | 
 38 |     For each bin, the indices of the minimum and maximum values are selected.
 39 |     """
 40 | 
 41 |     @property
 42 |     def rust_mod(self):
 43 |         return _tsdownsample_rs.minmax
 44 | 
 45 |     @staticmethod
 46 |     def _check_valid_n_out(n_out: int):
 47 |         AbstractRustDownsampler._check_valid_n_out(n_out)
 48 |         if n_out % 2 != 0:
 49 |             raise ValueError("n_out must be even")
 50 | 
 51 | 
 52 | class M4Downsampler(AbstractRustDownsampler):
 53 |     """Downsampler that uses the M4 algorithm. If the y data contains NaNs, these are
 54 |     ignored (i.e. the NaNs are not taken into account when selecting data points).
 55 | 
 56 |     For each bin, the indices of the first, last, minimum and maximum values are
 57 |     selected.
 58 |     """
 59 | 
 60 |     @property
 61 |     def rust_mod(self):
 62 |         return _tsdownsample_rs.m4
 63 | 
 64 |     @staticmethod
 65 |     def _check_valid_n_out(n_out: int):
 66 |         AbstractRustDownsampler._check_valid_n_out(n_out)
 67 |         if n_out % 4 != 0:
 68 |             raise ValueError("n_out must be a multiple of 4")
 69 | 
 70 | 
 71 | class NaNM4Downsampler(AbstractRustNaNDownsampler):
 72 |     """Downsampler that uses the M4 algorithm. If the y data contains NaNs, the indices
 73 |     of these NaNs are returned.
 74 | 
 75 |     For each bin, the indices of the first, last, minimum and maximum values are
 76 |     selected.
 77 |     """
 78 | 
 79 |     @property
 80 |     def rust_mod(self):
 81 |         return _tsdownsample_rs.m4
 82 | 
 83 |     @staticmethod
 84 |     def _check_valid_n_out(n_out: int):
 85 |         AbstractRustDownsampler._check_valid_n_out(n_out)
 86 |         if n_out % 4 != 0:
 87 |             raise ValueError("n_out must be a multiple of 4")
 88 | 
 89 | 
 90 | class LTTBDownsampler(AbstractRustDownsampler):
 91 |     """Downsampler that uses the LTTB algorithm."""
 92 | 
 93 |     @property
 94 |     def rust_mod(self):
 95 |         return _tsdownsample_rs.lttb
 96 | 
 97 | 
 98 | class MinMaxLTTBDownsampler(AbstractRustDownsampler):
 99 |     """Downsampler that uses the MinMaxLTTB algorithm. If the y data contains NaNs,
100 |     these are ignored (i.e. the NaNs are not taken into account when selecting data
101 |     points).
102 | 
103 |     MinMaxLTTB paper: https://arxiv.org/abs/2305.00332
104 |     """
105 | 
106 |     @property
107 |     def rust_mod(self):
108 |         return _tsdownsample_rs.minmaxlttb
109 | 
110 |     def downsample(
111 |         self, *args, n_out: int, minmax_ratio: int = 4, parallel: bool = False, **_
112 |     ):
113 |         assert minmax_ratio > 0, "minmax_ratio must be greater than 0"
114 |         return super().downsample(
115 |             *args, n_out=n_out, parallel=parallel, ratio=minmax_ratio
116 |         )
117 | 
118 | 
119 | class NaNMinMaxLTTBDownsampler(AbstractRustNaNDownsampler):
120 |     """Downsampler that uses the MinMaxLTTB algorithm. If the y data contains NaNs, the
121 |     indices of these NaNs are returned.
122 | 
123 |     MinMaxLTTB paper: https://arxiv.org/abs/2305.00332
124 |     """
125 | 
126 |     @property
127 |     def rust_mod(self):
128 |         return _tsdownsample_rs.minmaxlttb
129 | 
130 |     def downsample(
131 |         self, *args, n_out: int, minmax_ratio: int = 4, parallel: bool = False, **_
132 |     ):
133 |         assert minmax_ratio > 0, "minmax_ratio must be greater than 0"
134 |         return super().downsample(
135 |             *args, n_out=n_out, parallel=parallel, ratio=minmax_ratio
136 |         )
137 | 
138 | 
139 | # ------------------ EveryNth Downsampler ------------------
140 | 
141 | 
142 | class EveryNthDownsampler(AbstractDownsampler):
143 |     """Downsampler that selects every nth data point"""
144 | 
145 |     def __init__(self, **kwargs):
146 |         super().__init__(check_contiguous=False, **kwargs)
147 | 
148 |     def _downsample(
149 |         self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **_
150 |     ) -> np.ndarray:
151 |         if x is not None:
152 |             name = self.__class__.__name__
153 |             warnings.warn(
154 |                 f"x is passed to downsample method of {name}, but is not taken "
155 |                 "into account by the current implementation of the EveryNth algorithm."
156 |             )
157 |         step = max(1, len(y) / n_out)
158 |         return np.arange(start=0, stop=len(y) - 0.1, step=step).astype(np.uint)
159 | 


--------------------------------------------------------------------------------
/downsample_rs/benches/results:
--------------------------------------------------------------------------------
  1 | overlap_random_long_f32 time:   [45.116 µs 45.175 µs 45.255 µs]
  2 | simple_random_long_f32  time:   [24.639 µs 24.711 µs 24.793 µs]
  3 | simd_random_long_f32    time:   [10.549 µs 10.580 µs 10.615 µs]
  4 | 
  5 | ---
  6 | 
  7 | overlap_random_long_f32 time:   [8.9179 ms 8.9405 ms 8.9688 ms]
  8 | simple_random_long_f32  time:   [7.9809 ms 8.0065 ms 8.0416 ms]
  9 | simd_random_long_f32    time:   [2.4118 ms 2.4177 ms 2.4242 ms]
 10 | 
 11 | 
 12 | overlap_random_long_f32 time:   [45.010 µs 45.040 µs 45.071 µs]
 13 | simple_random_long_f32  time:   [22.929 µs 22.979 µs 23.036 µs]
 14 | simd_random_long_f32    time:   [9.8801 µs 9.8925 µs 9.9034 µs]
 15 | overlap_random_10m_f32  time:   [9.0349 ms 9.0440 ms 9.0537 ms]
 16 | simple_random_10m_f32   time:   [7.7164 ms 7.7322 ms 7.7481 ms]
 17 | simd_random_10m_f32     time:   [2.4348 ms 2.4424 ms 2.4505 ms]
 18 | 
 19 | -> parallel
 20 | 
 21 | overlap_random_long_f32 time:   [17.514 µs 17.711 µs 17.960 µs]
 22 | simple_random_long_f32  time:   [6.9441 µs 6.9717 µs 7.0060 µs]
 23 | simd_random_long_f32    time:   [33.948 µs 34.284 µs 34.674 µs]
 24 | overlap_random_10m_f32  time:   [8.9596 ms 8.9664 ms 8.9736 ms]
 25 | simple_random_10m_f32   time:   [8.4142 ms 8.4373 ms 8.4582 ms]
 26 | simd_random_10m_f32     time:   [1.5374 ms 1.5640 ms 1.5931 ms]
 27 | 
 28 | 
 29 | overlap_rand_long_f32   time:   [17.789 µs 17.953 µs 18.180 µs]
 30 | simple_rand_long_f32    time:   [6.8148 µs 6.8297 µs 6.8479 µs]
 31 | simd_rand_long_f32      time:   [9.5135 µs 9.5796 µs 9.6567 µs]
 32 | simple_p_rand_long_f32  time:   [34.526 µs 35.000 µs 35.524 µs]
 33 | simd_p_rand_long_f32    time:   [35.538 µs 36.052 µs 36.584 µs]
 34 | overlap_rand_50m_f32    time:   [44.472 ms 44.533 ms 44.594 ms]
 35 | simple_rand_50m_f32     time:   [45.433 ms 45.514 ms 45.589 ms]
 36 | simd_rand_50m_f32       time:   [15.185 ms 15.221 ms 15.263 ms]
 37 | simple_p_rand_50m_f32   time:   [6.8875 ms 6.9377 ms 6.9899 ms]
 38 | simd_p_rand_50m_f32     time:   [7.2611 ms 7.2999 ms 7.3410 ms]
 39 | 
 40 | 
 41 | 
 42 | overlap_rand_50m_f32    time:   [260.86 ms 262.64 ms 264.74 ms]
 43 | simple_rand_50m_f32     time:   [97.680 ms 98.330 ms 99.087 ms]
 44 | simple_rand__50m_f32    time:   [97.766 ms 98.251 ms 98.774 ms]
 45 | simd_rand_50m_f32       time:   [39.015 ms 39.545 ms 40.096 ms]
 46 | 
 47 | overlap_rand_50m_f32    time:   [87.381 ms 87.739 ms 88.166 ms]
 48 | simple_rand_50m_f32     time:   [35.099 ms 35.327 ms 35.573 ms]
 49 | simple_rand__50m_f32    time:   [35.639 ms 35.858 ms 36.104 ms]
 50 | simd_rand_50m_f32       time:   [16.300 ms 16.498 ms 16.710 ms]
 51 | 
 52 | -> op de redoxv2 server
 53 | overlap_rand_50m_f32    time:   [118.54 ms 118.67 ms 118.80 ms]
 54 | simple_rand_50m_f32     time:   [55.846 ms 56.661 ms 57.730 ms]
 55 | simple_p_rand_50m_f32   time:   [5.8644 ms 5.8852 ms 5.9127 ms]
 56 | 
 57 | 
 58 | ----
 59 | mmlttb_rand_10m_f32     time:   [25.205 ms 25.450 ms 25.754 ms]
 60 | mmlttb_p_rand_10m_f32   time:   [25.090 ms 25.283 ms 25.498 ms]
 61 | mmlttb_rand_50m_f32     time:   [41.006 ms 41.430 ms 41.892 ms]
 62 | mmlttb_p_rand_50m_f32   time:   [39.729 ms 40.115 ms 40.730 ms]
 63 | 
 64 | 
 65 | simple_rand_50m_f32     time:   [36.226 ms 37.007 ms 37.984 ms]
 66 | simd_rand_50m_f32       time:   [17.323 ms 17.350 ms 17.378 ms]
 67 | simple_p_rand_50m_f32   time:   [10.506 ms 10.529 ms 10.553 ms]
 68 | simd_p_rand_50m_f32     time:   [10.102 ms 10.126 ms 10.154 ms]
 69 | 
 70 | mmltb_rand_50mf32       time:   [48.331 ms 48.417 ms 48.514 ms]
 71 | mmltb_p_rand_50mf32     time:   [8.9456 ms 8.9841 ms 9.0265 ms]
 72 | mmltb_nox_rand_50mf32   time:   [47.381 ms 47.446 ms 47.527 ms]
 73 | mmltb_p_nox_rand_50mf32 time:   [8.6851 ms 8.7108 ms 8.7373 ms]
 74 | 
 75 | 
 76 | mmltb_rand_1Bf32        time:   [844.96 ms 846.33 ms 847.83 ms]
 77 | mmltb_p_rand_1Bf32      time:   [151.16 ms 151.48 ms 151.85 ms]
 78 | mmltb_nox_rand_1Bf32    time:   [842.46 ms 843.47 ms 844.56 ms]
 79 | mmltb_p_nox_rand_1Bf32  time:   [150.72 ms 151.04 ms 151.40 ms]
 80 | --> do not wrap the simple_argminmax into an option function
 81 | mmltb_rand_1Bf32        time:   [846.90 ms 848.46 ms 850.11 ms]
 82 | mmltb_p_rand_1Bf32      time:   [147.21 ms 147.99 ms 148.90 ms]
 83 | mmltb_nox_rand_1Bf32    time:   [843.30 ms 844.33 ms 845.55 ms]
 84 | mmltb_p_nox_rand_1Bf32  time:   [147.32 ms 148.14 ms 149.07 ms]
 85 | ---> optimize the simple_argminmax function
 86 | mmltb_rand_1Bf32        time:   [786.27 ms 786.83 ms 787.45 ms]
 87 | mmltb_p_rand_1Bf32      time:   [138.83 ms 139.19 ms 139.58 ms]
 88 | mmltb_nox_rand_1Bf32    time:   [785.77 ms 786.92 ms 788.46 ms]
 89 | mmltb_p_nox_rand_1Bf32  time:   [138.14 ms 138.42 ms 138.73 ms]
 90 | 
 91 | ---------------------
 92 | 
 93 | lttb_scal_50M_f32       time:   [114.35 ms 114.60 ms 114.88 ms]
 94 | lttbnox_scal_50M_f32    time:   [127.56 ms 127.70 ms 127.84 ms]
 95 | m4_scal_50M_f32         time:   [31.478 ms 31.539 ms 31.596 ms]
 96 | m4_simd_50M_f32         time:   [12.065 ms 12.168 ms 12.270 ms]
 97 | m4_scalx_50M_f32        time:   [30.513 ms 30.536 ms 30.560 ms]
 98 | m4_simdx_50M_f32        time:   [12.318 ms 12.430 ms 12.540 ms]
 99 | m4_scal_p_50M_f32       time:   [9.2012 ms 9.2176 ms 9.2352 ms]
100 | m4_simd_p_50M_f32       time:   [9.0070 ms 9.0274 ms 9.0500 ms]
101 | m4_scalx_p_50M_f32      time:   [9.7611 ms 9.7895 ms 9.8213 ms]
102 | m4_simdx_p_50M_f32      time:   [9.4658 ms 9.4908 ms 9.5187 ms]
103 | minmax_scal_50M_f32     time:   [31.585 ms 31.693 ms 31.814 ms]
104 | minmax_simd_50M_f32     time:   [11.940 ms 12.046 ms 12.152 ms]
105 | minmax_scalx_50M_f32    time:   [30.701 ms 30.769 ms 30.852 ms]
106 | minmax_simdx_50M_f32    time:   [12.392 ms 12.499 ms 12.607 ms]
107 | minmax_scal_p_50M_f32   time:   [9.2281 ms 9.2515 ms 9.2781 ms]
108 | minmax_simd_p_50M_f32   time:   [9.0181 ms 9.0404 ms 9.0645 ms]
109 | minmax_scalx_p_50M_f32  time:   [10.075 ms 10.100 ms 10.133 ms]
110 | minmax_simdx_p_50M_f32  time:   [9.7846 ms 9.8051 ms 9.8272 ms]
111 | mlttb_scalx_50M_f32     time:   [40.820 ms 40.855 ms 40.894 ms]
112 | mlttb_simdx_50M_f32     time:   [22.739 ms 22.788 ms 22.843 ms]
113 | mlttb_scalx_p_50M_f32   time:   [19.783 ms 19.816 ms 19.851 ms] // 2x slower bc MinMax with 60k n_out is 2x slower when using x
114 | mlttb_simdx_p_50M_f32   time:   [19.713 ms 19.752 ms 19.796 ms] // 2x slower bc MinMax with 60k n_out is 2x slower when using x
115 | mlttbnox_scal_50M_f32   time:   [36.289 ms 36.327 ms 36.364 ms]
116 | mlttbnox_simd_50M_f32   time:   [16.706 ms 16.744 ms 16.784 ms]
117 | mlttbnox_scal_p_50M_f32 time:   [10.252 ms 10.272 ms 10.293 ms]
118 | mlttbnox_simd_p_50M_f32 time:   [10.037 ms 10.069 ms 10.105 ms]
119 | 


--------------------------------------------------------------------------------
/.github/workflows/ci-tsdownsample.yml:
--------------------------------------------------------------------------------
  1 | name: CI tsdownsample
  2 | 
  3 | on:
  4 |   pull_request: {}
  5 |   push:
  6 |     branches:
  7 |       - main
  8 |     tags:
  9 |       - '**'
 10 | 
 11 | defaults:
 12 |   run:
 13 |     shell: bash
 14 | 
 15 | jobs:
 16 | 
 17 |   Lint_and_Check:
 18 |     runs-on: ubuntu-latest
 19 |     steps:
 20 |       - uses: actions/checkout@v3
 21 |       - uses: actions/setup-python@v4
 22 |         with:
 23 |           python-version: '3.10'
 24 |       - name: Install Rust toolchain
 25 |         uses: actions-rs/toolchain@v1
 26 |         with:
 27 |           profile: minimal
 28 |           toolchain: nightly
 29 |           components: clippy, rustfmt
 30 |       - name: Setup Rust
 31 |         run: |
 32 |           rustup update nightly --no-self-update
 33 |           rustup default nightly
 34 |       - name: Cache rust
 35 |         uses: Swatinem/rust-cache@v2
 36 | 
 37 |       - run: pip install -r tests/requirements-linting.txt
 38 |       - run: pip freeze
 39 |       - run: make lint  # Lint Python & Rust
 40 |       - run: make mypy  # Type check Python
 41 | 
 42 |   Test:
 43 |     runs-on: ${{ matrix.os }}
 44 |     strategy:
 45 |       fail-fast: false
 46 |       matrix:
 47 |         os: ['windows-latest', 'macOS-latest', 'ubuntu-latest']
 48 |         rust: ['nightly']  # ['stable', 'beta']
 49 |         python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13', '3.14']
 50 |     
 51 |     env:
 52 |       PYTHON: ${{ matrix.python-version }}
 53 | 
 54 |     steps:
 55 |       - uses: actions/checkout@v4
 56 |       - uses: actions/setup-python@v5
 57 |         with:
 58 |           python-version: ${{ matrix.python-version }}
 59 |       - run: pip install -r tests/requirements.txt
 60 | 
 61 |       - name: Install Rust toolchain
 62 |         uses: actions-rs/toolchain@v1
 63 |         with:
 64 |           profile: minimal
 65 |           toolchain: nightly
 66 |           components: clippy, rustfmt
 67 |       - name: Setup Rust
 68 |         run: |
 69 |           rustup update nightly --no-self-update
 70 |           rustup default nightly
 71 |       - name: Cache rust
 72 |         uses: Swatinem/rust-cache@v2
 73 | 
 74 |       - name: install develop version
 75 |         run: make install
 76 |         
 77 |       - run: pip install -r tests/requirements.txt
 78 | 
 79 |       - run: pip freeze
 80 | 
 81 |       - run: make test  # Test Python
 82 | 
 83 |       - name: Upload coverage to Codecov
 84 |         uses: codecov/codecov-action@v5
 85 | 
 86 |   Build:
 87 |   # Perhaps smth more in line with this https://github.com/messense/crfs-rs/blob/main/.github/workflows/Python.yml
 88 |     name: build on ${{ matrix.os }} (${{ matrix.target }} - ${{ matrix.manylinux || 'auto' }})
 89 |     # only run on push to main and on release
 90 |     if: "success() && (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || contains(github.event.pull_request.labels.*.name, 'Full Build'))"
 91 |     strategy:
 92 |       fail-fast: false
 93 |       matrix:
 94 |         os: [ubuntu, macos, windows]
 95 |         target: [x86_64, aarch64]
 96 |         manylinux: [auto]
 97 |         include:
 98 |           - os: windows
 99 |             ls: dir
100 |           - os: windows
101 |             ls: dir
102 |             target: i686
103 |             python-architecture: x86
104 |           - os: macos
105 |             target: aarch64
106 |           - os: ubuntu
107 |             target: i686
108 |           # GCC 4.8.5 in manylinux2014 container doesn't support c11 atomic
109 |           # we use manylinux_2_24 container for aarch64 and armv7 targets instead,
110 |           - os: ubuntu
111 |             target: aarch64
112 |             container: messense/manylinux_2_24-cross:aarch64
113 |           - os: ubuntu
114 |             target: armv7
115 |             container: messense/manylinux_2_24-cross:armv7
116 |           - os: ubuntu
117 |             target: ppc64le
118 |             container: messense/manylinux_2_24-cross:ppc64le
119 |           - os: ubuntu
120 |             target: s390x
121 |             container: messense/manylinux_2_24-cross:s390x
122 |           # musllinux
123 |           - os: ubuntu
124 |             target: x86_64
125 |             manylinux: musllinux_1_1
126 |           - os: ubuntu
127 |             target: aarch64
128 |             manylinux: musllinux_1_1
129 |         exclude:
130 |           # this fails
131 |           - os: windows
132 |             target: aarch64
133 | 
134 |     runs-on: ${{ matrix.os }}-latest
135 |     steps:
136 |     - uses: actions/checkout@v3
137 | 
138 |     - name: set up python
139 |       uses: actions/setup-python@v5
140 |       with:
141 |         python-version: 3.13
142 |         architecture: ${{ matrix.python-architecture || 'x64' }}
143 | 
144 |     - name: build sdist
145 |       if: ${{ matrix.os == 'ubuntu' && matrix.target == 'x86_64' && matrix.manylinux == 'auto' }}
146 |       uses: PyO3/maturin-action@v1
147 |       with:
148 |         command: sdist
149 |         args: --out dist
150 | 
151 |     - name: build wheels
152 |       uses: PyO3/maturin-action@v1
153 |       with:
154 |         rust-toolchain: nightly
155 |         target: ${{ matrix.target }}
156 |         manylinux: ${{ matrix.manylinux || 'auto' }}
157 |         container: ${{ matrix.container }}
158 |         args: --release --out dist --interpreter ${{ matrix.interpreter || '3.8 3.9 3.10 3.11 3.12 3.13 3.14' }}
159 | 
160 |     - run: ${{ matrix.ls || 'ls -lh' }} dist/
161 | 
162 |     - uses: actions/upload-artifact@v4
163 |       with:
164 |         name: pypi_files-${{ matrix.os }}-${{ matrix.target }}-${{ matrix.interpreter || 'all' }}-${{ matrix.manylinux || 'auto' }}
165 |         path: dist
166 | 
167 |   Release:
168 |     needs: [Lint_and_Check, Test, Build]
169 |     if: "success() && startsWith(github.ref, 'refs/tags/')"
170 |     runs-on: ubuntu-latest
171 | 
172 |     steps:
173 |     - uses: actions/checkout@v3
174 | 
175 |     - name: set up python
176 |       uses: actions/setup-python@v4
177 |       # with:
178 |         # python-version: '3.10'
179 | 
180 |     - run: pip install -U twine packaging
181 | 
182 |     - name: get dist artifacts
183 |       uses: actions/download-artifact@v4
184 |       with:
185 |         pattern: pypi_files-*
186 |         merge-multiple: true
187 |         path: dist
188 | 
189 |     - run: twine check dist/*
190 | 
191 |     - name: upload to pypi
192 |       run: twine upload dist/*
193 |       env:
194 |         TWINE_USERNAME: __token__
195 |         TWINE_PASSWORD: ${{ secrets.pypi_token }}
196 | 
197 | # https://github.com/samuelcolvin/rtoml/blob/main/.github/workflows/ci.yml
198 | # https://github.com/messense/rjmespath-py/blob/main/.github/workflows/CI.yml
199 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # tsdownsample
  2 | 
  3 | [![PyPI Latest Release](https://img.shields.io/pypi/v/tsdownsample.svg)](https://pypi.org/project/tsdownsample/)
  4 | [![support-version](https://img.shields.io/pypi/pyversions/tsdownsample)](https://img.shields.io/pypi/pyversions/tsdownsample)
  5 | [![Downloads](https://static.pepy.tech/badge/tsdownsample)](https://pepy.tech/project/tsdownsample)
  6 | [![CodeQL](https://github.com/predict-idlab/tsdownsample/actions/workflows/codeql.yml/badge.svg)](https://github.com/predict-idlab/tsdownsample/actions/workflows/codeql.yml)
  7 | [![Testing](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-downsample_rs.yml/badge.svg)](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-downsample_rs.yml)
  8 | [![Testing](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-tsdownsample.yml/badge.svg)](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-tsdownsample.yml)
  9 | [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?logo=discord&logoColor=white)](https://discord.gg/k2d59GrxPX)
 10 | 
 11 | <!-- TODO: codecov -->
 12 | 
 13 | Extremely fast **time series downsampling 📈** for visualization, written in Rust.
 14 | 
 15 | ## Features ✨
 16 | 
 17 | - **Fast**: written in rust with PyO3 bindings
 18 |   - leverages optimized [argminmax](https://github.com/jvdd/argminmax) - which is SIMD accelerated with runtime feature detection
 19 |   - scales linearly with the number of data points
 20 |   <!-- TODO check if it scales sublinearly -->
 21 |   - multithreaded with Rayon (in Rust)
 22 |     <details>
 23 |       <summary><i>Why we do not use Python multiprocessing</i></summary>
 24 |       Citing the <a href="https://pyo3.rs/v0.17.3/parallelism.html">PyO3 docs on parallelism</a>:<br>
 25 |       <blockquote>
 26 |           CPython has the infamous Global Interpreter Lock, which prevents several threads from executing Python bytecode in parallel. This makes threading in Python a bad fit for CPU-bound tasks and often forces developers to accept the overhead of multiprocessing.
 27 |       </blockquote>
 28 |       In Rust - which is a compiled language - there is no GIL, so CPU-bound tasks can be parallelized (with <a href="https://github.com/rayon-rs/rayon">Rayon</a>) with little to no overhead.
 29 |     </details>
 30 | - **Efficient**: memory efficient
 31 |   - works on views of the data (no copies)
 32 |   - no intermediate data structures are created
 33 | - **Flexible**: works on any type of data
 34 |   - supported datatypes are
 35 |     - for `x`: `f32`, `f64`, `i16`, `i32`, `i64`, `u16`, `u32`, `u64`, `datetime64`, `timedelta64`
 36 |     - for `y`: `f16`, `f32`, `f64`, `i8`, `i16`, `i32`, `i64`, `u8`, `u16`, `u32`, `u64`, `datetime64`, `timedelta64`, `bool`
 37 |     <details>
 38 |       <summary><i>!! 🚀 <code>f16</code> <a href="https://github.com/jvdd/argminmax">argminmax</a> is 200-300x faster than numpy</i></summary>
 39 |       In contrast with all other data types above, <code>f16</code> is *not* hardware supported (i.e., no instructions for f16) by most modern CPUs!! <br>
 40 |       🐌 Programming languages facilitate support for this datatype by either (i) upcasting to <u>f32</u> or (ii) using a software implementation. <br>
 41 |       💡 As for argminmax, only comparisons are needed - and thus no arithmetic operations - creating a <u>symmetrical ordinal mapping from <code>f16</code> to <code>i16</code></u> is sufficient. This mapping allows to use the hardware supported scalar and SIMD <code>i16</code> instructions - while not producing any memory overhead 🎉 <br>
 42 |       <i>More details are described in <a href="https://github.com/jvdd/argminmax/pull/1">argminmax PR #1</a>.</i>
 43 |     </details>
 44 | - **Easy to use**: simple & flexible API
 45 | 
 46 | ## Install
 47 | 
 48 | ```bash
 49 | pip install tsdownsample
 50 | ```
 51 | 
 52 | ## Usage
 53 | 
 54 | ```python
 55 | from tsdownsample import MinMaxLTTBDownsampler
 56 | import numpy as np
 57 | 
 58 | # Create a time series
 59 | y = np.random.randn(10_000_000)
 60 | x = np.arange(len(y))
 61 | 
 62 | # Downsample to 1000 points (assuming constant sampling rate)
 63 | s_ds = MinMaxLTTBDownsampler().downsample(y, n_out=1000)
 64 | 
 65 | # Select downsampled data
 66 | downsampled_y = y[s_ds]
 67 | 
 68 | # Downsample to 1000 points using the (possible irregularly spaced) x-data
 69 | s_ds = MinMaxLTTBDownsampler().downsample(x, y, n_out=1000)
 70 | 
 71 | # Select downsampled data
 72 | downsampled_x = x[s_ds]
 73 | downsampled_y = y[s_ds]
 74 | ```
 75 | 
 76 | ## Downsampling algorithms & API
 77 | 
 78 | ### Downsampling API 📑
 79 | 
 80 | Each downsampling algorithm is implemented as a class that implements a `downsample` method.
 81 | The signature of the `downsample` method:
 82 | 
 83 | ```
 84 | downsample([x], y, n_out, **kwargs) -> ndarray[uint64]
 85 | ```
 86 | 
 87 | **Arguments**:
 88 | 
 89 | - `x` is optional
 90 | - `x` and `y` are both positional arguments
 91 | - `n_out` is a mandatory keyword argument that defines the number of output values<sup>*</sup>
 92 | - `**kwargs` are optional keyword arguments *(see [table below](#downsampling-algorithms-📈))*:
 93 |   - `parallel`: whether to use multi-threading (default: `False`)  
 94 |      ❗ The max number of threads can be configured with the `TSDOWNSAMPLE_MAX_THREADS` ENV var (e.g. `os.environ["TSDOWNSAMPLE_MAX_THREADS"] = "4"`)
 95 |   - ...
 96 | 
 97 | **Returns**: a `ndarray[uint64]` of indices that can be used to index the original data.
 98 | 
 99 | <sup>\*</sup><i>When there are gaps in the time series, fewer than `n_out` indices may be returned.</i>
100 | 
101 | ### Downsampling algorithms 📈
102 | 
103 | The following downsampling algorithms (classes) are implemented:
104 | 
105 | | Downsampler | Description | `**kwargs` |
106 | | ---:| --- |--- |
107 | | `MinMaxDownsampler` | selects the **min and max** value in each bin | `parallel` |
108 | | `M4Downsampler` | selects the [**min, max, first and last**](https://dl.acm.org/doi/pdf/10.14778/2732951.2732953) value in each bin | `parallel` |
109 | | `LTTBDownsampler` | performs the [**Largest Triangle Three Buckets**](https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf) algorithm | `parallel` |
110 | | `MinMaxLTTBDownsampler` | (*new two-step algorithm 🎉*) first selects `n_out` * `minmax_ratio` **min and max** values, then further reduces these to `n_out` values using the **Largest Triangle Three Buckets** algorithm | `parallel`, `minmax_ratio`<sup>*</sup> |
111 | 
112 | <sup>*</sup><i>Default value for `minmax_ratio` is 4, which is empirically proven to be a good default. More details here: https://arxiv.org/abs/2305.00332</i>
113 | 
114 | ### Handling NaNs
115 | 
116 | This library supports two `NaN`-policies:
117 | 
118 | 1. Omit `NaN`s (`NaN`s are ignored during downsampling).
119 | 2. Return index of first `NaN` once there is at least one present in the bin of the considered data.
120 | 
121 | |             Omit `NaN`s | Return `NaN`s              |
122 | | ----------------------: | :------------------------- |
123 | |     `MinMaxDownsampler` | `NaNMinMaxDownsampler`     |
124 | |         `M4Downsampler` | `NaNM4Downsampler`         |
125 | | `MinMaxLTTBDownsampler` | `NaNMinMaxLTTBDownsampler` |
126 | |       `LTTBDownsampler` |                            |
127 | 
128 | > Note that NaNs are not supported for `x`-data.
129 | 
130 | ## Limitations & assumptions 🚨
131 | 
132 | Assumes;
133 | 
134 | 1. `x`-data is (non-strictly) monotonic increasing (i.e., sorted)
135 | 2. no `NaN`s in `x`-data
136 | 
137 | ---
138 | 
139 | <p align="center">
140 | 👤 <i>Jeroen Van Der Donckt</i>
141 | </p>
142 | 


--------------------------------------------------------------------------------
/downsample_rs/src/lttb.rs:
--------------------------------------------------------------------------------
  1 | use super::helpers::Average;
  2 | use super::types::Num;
  3 | use num_traits::AsPrimitive;
  4 | use std::cmp;
  5 | 
  6 | #[inline(always)]
  7 | fn f64_to_i64unsigned(v: f64) -> i64 {
  8 |     // Transmute to i64 and mask out the sign bit
  9 |     let v: i64 = unsafe { std::mem::transmute::<f64, i64>(v) };
 10 |     v & 0x7FFF_FFFF_FFFF_FFFF
 11 | }
 12 | 
 13 | // ----------------------------------- NON-PARALLEL ------------------------------------
 14 | 
 15 | // ----------- WITH X
 16 | 
 17 | pub fn lttb_with_x<Tx: Num + AsPrimitive<f64>, Ty: Num + AsPrimitive<f64>>(
 18 |     x: &[Tx],
 19 |     y: &[Ty],
 20 |     n_out: usize,
 21 | ) -> Vec<usize> {
 22 |     assert_eq!(x.len(), y.len());
 23 |     if n_out >= x.len() {
 24 |         return (0..x.len()).collect::<Vec<usize>>();
 25 |     }
 26 |     assert!(n_out >= 3); // avoid division by 0
 27 | 
 28 |     // Bucket size. Leave room for start and end data points.
 29 |     let every: f64 = (x.len() - 2) as f64 / (n_out - 2) as f64;
 30 |     // Initially a is the first point in the triangle.
 31 |     let mut a: usize = 0;
 32 | 
 33 |     let mut sampled_indices: Vec<usize> = vec![usize::default(); n_out];
 34 | 
 35 |     // Always add the first point
 36 |     sampled_indices[0] = 0;
 37 | 
 38 |     for i in 0..n_out - 2 {
 39 |         // Calculate point average for next bucket (containing c).
 40 |         let avg_range_start = (every * (i + 1) as f64) as usize + 1;
 41 |         let avg_range_end = cmp::min((every * (i + 2) as f64) as usize + 1, x.len());
 42 | 
 43 |         let y_slice = &y[avg_range_start..avg_range_end];
 44 |         let avg_y: f64 = y_slice.average();
 45 |         // TODO: avg_y could be approximated argminmax instead of mean?
 46 |         // TODO: below is faster than above, but not as accurate
 47 |         // let avg_x: f64 = (x_slice[avg_range_end - 1].as_() + x_slice[avg_range_start].as_()) / 2.0;
 48 |         let avg_x: f64 = unsafe {
 49 |             (x.get_unchecked(avg_range_end - 1).as_() + x.get_unchecked(avg_range_start).as_())
 50 |                 / 2.0
 51 |         };
 52 | 
 53 |         // Get the range for this bucket
 54 |         let range_offs = (every * i as f64) as usize + 1;
 55 |         let range_to = avg_range_start; // = start of the next bucket
 56 | 
 57 |         // Point a
 58 |         let point_ax = unsafe { x.get_unchecked(a).as_() };
 59 |         let point_ay = unsafe { y.get_unchecked(a).as_() };
 60 | 
 61 |         let d1 = point_ax - avg_x;
 62 |         let d2 = avg_y - point_ay;
 63 |         let offset: f64 = d1 * point_ay + d2 * point_ax;
 64 | 
 65 |         let x_slice = &x[range_offs..range_to];
 66 |         let y_slice = &y[range_offs..range_to];
 67 |         (_, a) = y_slice.iter().zip(x_slice.iter()).enumerate().fold(
 68 |             (-1i64, a),
 69 |             |(max_area, a), (i, (y_, x_))| {
 70 |                 // Calculate triangle area over three buckets
 71 |                 // -> area = d1 * (y_ - point_ay) - (point_ax - x_) * d2;
 72 |                 // let area = d1 * y[i].as_() + d2 * x[i].as_() - offset;
 73 |                 // let area = d1 * y_slice[i].as_() + d2 * x_slice[i].as_() - offset;
 74 |                 let area = d1 * y_.as_() + d2 * x_.as_() - offset;
 75 |                 let area = f64_to_i64unsigned(area); // this is faster than abs
 76 |                 if area > max_area {
 77 |                     (area, i)
 78 |                 } else {
 79 |                     (max_area, a)
 80 |                 }
 81 |             },
 82 |         );
 83 |         a += range_offs;
 84 | 
 85 |         sampled_indices[i + 1] = a;
 86 |     }
 87 | 
 88 |     // Always add the last point
 89 |     sampled_indices[n_out - 1] = y.len() - 1;
 90 | 
 91 |     sampled_indices
 92 | }
 93 | 
 94 | // ----------- WITHOUT X
 95 | 
 96 | pub fn lttb_without_x<Ty: Num + AsPrimitive<f64>>(y: &[Ty], n_out: usize) -> Vec<usize> {
 97 |     if n_out >= y.len() {
 98 |         return (0..y.len()).collect::<Vec<usize>>();
 99 |     }
100 |     assert!(n_out >= 3); // avoid division by 0
101 | 
102 |     // Bucket size. Leave room for start and end data points.
103 |     let every: f64 = (y.len() - 2) as f64 / (n_out - 2) as f64;
104 |     // Initially a is the first point in the triangle.
105 |     let mut a: usize = 0;
106 | 
107 |     let mut sampled_indices: Vec<usize> = vec![usize::default(); n_out];
108 | 
109 |     // Always add the first point
110 |     sampled_indices[0] = 0;
111 | 
112 |     for i in 0..n_out - 2 {
113 |         // Calculate point average for next bucket (containing c).
114 |         let avg_range_start = (every * (i + 1) as f64) as usize + 1;
115 |         let avg_range_end = cmp::min((every * (i + 2) as f64) as usize + 1, y.len());
116 | 
117 |         let y_slice = &y[avg_range_start..avg_range_end];
118 |         let avg_y: f64 = y_slice.average();
119 |         let avg_x: f64 = (avg_range_start + avg_range_end - 1) as f64 / 2.0;
120 | 
121 |         // Get the range for this bucket
122 |         let range_offs = (every * i as f64) as usize + 1;
123 |         let range_to = avg_range_start; // = start of the next bucket
124 | 
125 |         // Point a
126 |         let point_ay = unsafe { y.get_unchecked(a).as_() };
127 |         let point_ax = a as f64;
128 | 
129 |         let d1 = point_ax - avg_x;
130 |         let d2 = avg_y - point_ay;
131 |         let point_ax = point_ax - range_offs as f64;
132 | 
133 |         // let mut max_area = -1i64;
134 |         let mut ax_x = point_ax; // point_ax - x[i]
135 |         let offset: f64 = d1 * point_ay;
136 | 
137 |         // TODO: for some reason is this faster than the loop below -> check if this is true for other devices
138 |         let y_slice = &y[range_offs..range_to];
139 |         (_, a) = y_slice
140 |             .iter()
141 |             .enumerate()
142 |             .fold((-1i64, a), |(max_area, a), (i, y)| {
143 |                 // Calculate triangle area over three buckets
144 |                 // -> area: f64 = d1 * y[i].as_() - ax_x * d2;
145 |                 let area: f64 = d1 * y.as_() - ax_x * d2 - offset;
146 |                 let area: i64 = f64_to_i64unsigned(area);
147 |                 ax_x -= 1.0;
148 |                 if area > max_area {
149 |                     (area, i + range_offs)
150 |                 } else {
151 |                     (max_area, a)
152 |                 }
153 |             });
154 | 
155 |         // let y_slice = unsafe { std::slice::from_raw_parts(y_ptr.add(range_offs), range_to - range_offs) };
156 |         // (_, a) = y_slice
157 |         //     .iter()
158 |         //     .enumerate()
159 |         //     .fold((-1i64, a), |(max_area, a), (i, y_)| {
160 |         //         // Calculate triangle area over three buckets
161 |         //         // -> area: f64 = d1 * y[i].as_() - ax_x * d2;
162 |         //         let area: f64 = d1 * y_.as_() - ax_x * d2 - offset;
163 |         //         let area: i64 = f64_to_i64unsigned(area);
164 |         //         ax_x -= 1.0;
165 |         //         if area > max_area {
166 |         //             (area, i)
167 |         //         } else {
168 |         //             (max_area, a)
169 |         //         }
170 |         //     });
171 |         // a += range_offs;
172 | 
173 |         sampled_indices[i + 1] = a;
174 |     }
175 | 
176 |     // Always add the last point
177 |     sampled_indices[n_out - 1] = y.len() - 1;
178 | 
179 |     sampled_indices
180 | }
181 | 
182 | // --------------------------------------- TESTS ---------------------------------------
183 | 
184 | #[cfg(test)]
185 | mod tests {
186 |     use dev_utils::utils;
187 | 
188 |     use super::{lttb_with_x, lttb_without_x};
189 | 
190 |     #[test]
191 |     fn test_lttb_with_x() {
192 |         let x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
193 |         let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
194 |         let sampled_indices = lttb_with_x(&x, &y, 4);
195 |         assert_eq!(sampled_indices, vec![0, 1, 5, 9]);
196 |     }
197 | 
198 |     #[test]
199 |     fn test_lttb_without_x() {
200 |         let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
201 |         let sampled_indices = lttb_without_x(&y, 4);
202 |         assert_eq!(sampled_indices, vec![0, 1, 5, 9]);
203 |     }
204 | 
205 |     #[test]
206 |     fn test_random_same_output() {
207 |         for _ in 0..100 {
208 |             const N: usize = 5_000;
209 |             let x: [i32; N] = core::array::from_fn(|i| i as i32);
210 |             let y = utils::get_random_array(N, f32::MIN, f32::MAX);
211 |             let sampled_indices1 = lttb_with_x(&x, y.as_slice(), 200);
212 |             let sampled_indices2 = lttb_without_x(y.as_slice(), 200);
213 |             assert_eq!(sampled_indices1, sampled_indices2);
214 |         }
215 |     }
216 | }
217 | 


--------------------------------------------------------------------------------
/tsdownsample/_python/downsamplers.py:
--------------------------------------------------------------------------------
  1 | from typing import Union
  2 | 
  3 | import numpy as np
  4 | 
  5 | from ..downsampling_interface import AbstractDownsampler
  6 | 
  7 | 
  8 | def _get_bin_idxs(x: np.ndarray, nb_bins: int) -> np.ndarray:
  9 |     """Get the equidistant indices of the bins to use for the aggregation.
 10 | 
 11 |     Parameters
 12 |     ----------
 13 |     x : np.ndarray
 14 |         The x values of the input data.
 15 |     nb_bins : int
 16 |         The number of bins.
 17 | 
 18 |     Returns
 19 |     -------
 20 |     np.ndarray
 21 |         The indices of the bins to use for the aggregation.
 22 |     """
 23 |     # Thanks to the `linspace` the data is evenly distributed over the index-range
 24 |     # The searchsorted function returns the index positions
 25 |     bins = np.searchsorted(x, np.linspace(x[0], x[-1], nb_bins + 1), side="right")
 26 |     bins[0] = 0
 27 |     bins[-1] = len(x)
 28 |     return np.array(bins)
 29 | 
 30 | 
 31 | class LTTB_py(AbstractDownsampler):
 32 |     @staticmethod
 33 |     def _argmax_area(prev_x, prev_y, avg_next_x, avg_next_y, x_bucket, y_bucket) -> int:
 34 |         """Vectorized triangular area argmax computation.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         prev_x : float
 39 |             The previous selected point is x value.
 40 |         prev_y : float
 41 |             The previous selected point its y value.
 42 |         avg_next_x : float
 43 |             The x mean of the next bucket
 44 |         avg_next_y : float
 45 |             The y mean of the next bucket
 46 |         x_bucket : np.ndarray
 47 |             All x values in the bucket
 48 |         y_bucket : np.ndarray
 49 |             All y values in the bucket
 50 | 
 51 |         Returns
 52 |         -------
 53 |         int
 54 |             The index of the point with the largest triangular area.
 55 |         """
 56 |         return np.abs(
 57 |             x_bucket * (prev_y - avg_next_y)
 58 |             + y_bucket * (avg_next_x - prev_x)
 59 |             + (prev_x * avg_next_y - avg_next_x * prev_y)
 60 |         ).argmax()
 61 | 
 62 |     def _downsample(
 63 |         self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs
 64 |     ) -> np.ndarray:
 65 |         """TODO complete docs"""
 66 |         if x is None:
 67 |             # Is fine for this implementation as this is only used for testing
 68 |             x = np.arange(y.shape[0])
 69 | 
 70 |         # Bucket size. Leave room for start and end data points
 71 |         block_size = (y.shape[0] - 2) / (n_out - 2)
 72 |         # Note this 'astype' cast must take place after array creation (and not with the
 73 |         # aranage() its dtype argument) or it will cast the `block_size` step to an int
 74 |         # before the arange array creation
 75 |         offset = np.arange(start=1, stop=y.shape[0], step=block_size).astype(np.int64)
 76 | 
 77 |         # Construct the output array
 78 |         sampled_x = np.empty(n_out, dtype="int64")
 79 |         sampled_x[0] = 0
 80 |         sampled_x[-1] = x.shape[0] - 1
 81 | 
 82 |         # Convert x & y to int if it is boolean
 83 |         if x.dtype == np.bool_:
 84 |             x = x.astype(np.int8)
 85 |         if y.dtype == np.bool_:
 86 |             y = y.astype(np.int8)
 87 | 
 88 |         a = 0
 89 |         for i in range(n_out - 3):
 90 |             a = (
 91 |                 LTTB_py._argmax_area(
 92 |                     prev_x=x[a],
 93 |                     prev_y=y[a],
 94 |                     avg_next_x=np.mean(x[offset[i + 1] : offset[i + 2]]),
 95 |                     avg_next_y=y[offset[i + 1] : offset[i + 2]].mean(),
 96 |                     x_bucket=x[offset[i] : offset[i + 1]],
 97 |                     y_bucket=y[offset[i] : offset[i + 1]],
 98 |                 )
 99 |                 + offset[i]
100 |             )
101 |             sampled_x[i + 1] = a
102 | 
103 |         # ------------ EDGE CASE ------------
104 |         # next-average of last bucket = last point
105 |         sampled_x[-2] = (
106 |             LTTB_py._argmax_area(
107 |                 prev_x=x[a],
108 |                 prev_y=y[a],
109 |                 avg_next_x=x[-1],  # last point
110 |                 avg_next_y=y[-1],
111 |                 x_bucket=x[offset[-2] : offset[-1]],
112 |                 y_bucket=y[offset[-2] : offset[-1]],
113 |             )
114 |             + offset[-2]
115 |         )
116 |         return sampled_x
117 | 
118 | 
119 | class MinMax_py(AbstractDownsampler):
120 |     """Aggregation method which performs binned min-max aggregation over fully
121 |     overlapping windows.
122 |     """
123 | 
124 |     @staticmethod
125 |     def _check_valid_n_out(n_out: int):
126 |         assert n_out % 2 == 0, "n_out must be a multiple of 2"
127 | 
128 |     def _downsample(
129 |         self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs
130 |     ) -> np.ndarray:
131 |         if x is None:
132 |             # Is fine for this implementation as this is only used for testing
133 |             x = np.arange(y.shape[0])
134 | 
135 |         xdt = x.dtype
136 |         if np.issubdtype(xdt, np.datetime64) or np.issubdtype(xdt, np.timedelta64):
137 |             x = x.view(np.int64)
138 | 
139 |         bins = _get_bin_idxs(x, n_out // 2)
140 | 
141 |         rel_idxs = []
142 |         for lower, upper in zip(bins, bins[1:]):
143 |             y_slice = y[lower:upper]
144 |             if not len(y_slice):
145 |                 continue
146 |             # calculate the argmin(slice) & argmax(slice)
147 |             rel_idxs.append(lower + np.nanargmin(y_slice))
148 |             rel_idxs.append(lower + np.nanargmax(y_slice))
149 |         return np.unique(rel_idxs)
150 | 
151 | 
152 | class NaNMinMax_py(AbstractDownsampler):
153 |     @staticmethod
154 |     def _check_valid_n_out(n_out: int):
155 |         assert n_out % 2 == 0, "n_out must be a multiple of 2"
156 | 
157 |     def _downsample(
158 |         self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs
159 |     ) -> np.ndarray:
160 |         if x is None:
161 |             # Is fine for this implementation as this is only used for testing
162 |             x = np.arange(y.shape[0])
163 | 
164 |         xdt = x.dtype
165 |         if np.issubdtype(xdt, np.datetime64) or np.issubdtype(xdt, np.timedelta64):
166 |             x = x.view(np.int64)
167 | 
168 |         bins = _get_bin_idxs(x, n_out // 2)
169 | 
170 |         rel_idxs = []
171 |         for lower, upper in zip(bins, bins[1:]):
172 |             y_slice = y[lower:upper]
173 |             if not len(y_slice):
174 |                 continue
175 |             # calculate the argmin(slice) & argmax(slice)
176 |             rel_idxs.append(lower + np.argmin(y_slice))
177 |             rel_idxs.append(lower + np.argmax(y_slice))
178 |         return np.array(sorted(rel_idxs))
179 | 
180 | 
181 | class M4_py(AbstractDownsampler):
182 |     """Aggregation method which selects the 4 M-s, i.e y-argmin, y-argmax, x-argmin, and
183 |     x-argmax per bin.
184 | 
185 |     .. note::
186 |         When `n_out` is 4 * the canvas its pixel widht it should create a pixel-perfect
187 |         visualization w.r.t. the raw data.
188 | 
189 |     """
190 | 
191 |     @staticmethod
192 |     def _check_valid_n_out(n_out: int):
193 |         assert n_out % 4 == 0, "n_out must be a multiple of 4"
194 | 
195 |     def _downsample(
196 |         self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs
197 |     ) -> np.ndarray:
198 |         """TODO complete docs"""
199 |         if x is None:
200 |             # Is fine for this implementation as this is only used for testing
201 |             x = np.arange(y.shape[0])
202 | 
203 |         xdt = x.dtype
204 |         if np.issubdtype(xdt, np.datetime64) or np.issubdtype(xdt, np.timedelta64):
205 |             x = x.view(np.int64)
206 | 
207 |         bins = _get_bin_idxs(x, n_out // 4)
208 | 
209 |         rel_idxs = []
210 |         for lower, upper in zip(bins, bins[1:]):
211 |             y_slice = y[lower:upper]
212 |             if not len(y_slice):
213 |                 continue
214 | 
215 |             # calculate the min(idx), argmin(slice), argmax(slice), max(idx)
216 |             rel_idxs.append(lower)
217 |             rel_idxs.append(lower + np.nanargmin(y_slice))
218 |             rel_idxs.append(lower + np.nanargmax(y_slice))
219 |             rel_idxs.append(upper - 1)
220 | 
221 |         # NOTE: we do not use the np.unique so that all indices are retained
222 |         return np.array(sorted(rel_idxs))
223 | 
224 | 
225 | class NaNM4_py(AbstractDownsampler):
226 |     @staticmethod
227 |     def _check_valid_n_out(n_out: int):
228 |         assert n_out % 4 == 0, "n_out must be a multiple of 4"
229 | 
230 |     def _downsample(
231 |         self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs
232 |     ) -> np.ndarray:
233 |         """TODO complete docs"""
234 |         if x is None:
235 |             # Is fine for this implementation as this is only used for testing
236 |             x = np.arange(y.shape[0])
237 | 
238 |         xdt = x.dtype
239 |         if np.issubdtype(xdt, np.datetime64) or np.issubdtype(xdt, np.timedelta64):
240 |             x = x.view(np.int64)
241 | 
242 |         bins = _get_bin_idxs(x, n_out // 4)
243 | 
244 |         rel_idxs = []
245 |         for lower, upper in zip(bins, bins[1:]):
246 |             y_slice = y[lower:upper]
247 |             if not len(y_slice):
248 |                 continue
249 | 
250 |             # calculate the min(idx), argmin(slice), argmax(slice), max(idx)
251 |             rel_idxs.append(lower)
252 |             rel_idxs.append(lower + y_slice.argmin())
253 |             rel_idxs.append(lower + y_slice.argmax())
254 |             rel_idxs.append(upper - 1)
255 | 
256 |         # NOTE: we do not use the np.unique so that all indices are retained
257 |         return np.array(sorted(rel_idxs))
258 | 


--------------------------------------------------------------------------------
/downsample_rs/src/minmaxlttb.rs:
--------------------------------------------------------------------------------
  1 | use argminmax::{ArgMinMax, NaNArgMinMax};
  2 | 
  3 | use super::lttb::{lttb_with_x, lttb_without_x};
  4 | use super::types::Num;
  5 | 
  6 | use super::minmax;
  7 | use num_traits::{AsPrimitive, FromPrimitive};
  8 | 
  9 | // ----------------------------------- NON-PARALLEL ------------------------------------
 10 | 
 11 | // ----------- WITH X
 12 | 
 13 | macro_rules! minmaxlttb_with_x {
 14 |     ($func_name:ident, $trait:ident, $f_minmax:expr) => {
 15 |         pub fn $func_name<Tx, Ty>(
 16 |             x: &[Tx],
 17 |             y: &[Ty],
 18 |             n_out: usize,
 19 |             minmax_ratio: usize,
 20 |         ) -> Vec<usize>
 21 |         where
 22 |             for<'a> &'a [Ty]: $trait,
 23 |             Tx: Num + AsPrimitive<f64> + FromPrimitive,
 24 |             Ty: Num + AsPrimitive<f64>,
 25 |         {
 26 |             minmaxlttb_generic(x, y, n_out, minmax_ratio, $f_minmax)
 27 |         }
 28 |     };
 29 | }
 30 | 
 31 | minmaxlttb_with_x!(minmaxlttb_with_x, ArgMinMax, minmax::min_max_with_x);
 32 | minmaxlttb_with_x!(
 33 |     minmaxlttb_with_x_nan,
 34 |     NaNArgMinMax,
 35 |     minmax::min_max_with_x_nan
 36 | );
 37 | 
 38 | // ----------- WITHOUT X
 39 | 
 40 | macro_rules! minmaxlttb_without_x {
 41 |     ($func_name:ident, $trait:ident, $f_minmax:expr) => {
 42 |         pub fn $func_name<Ty: Num + AsPrimitive<f64>>(
 43 |             y: &[Ty],
 44 |             n_out: usize,
 45 |             minmax_ratio: usize,
 46 |         ) -> Vec<usize>
 47 |         where
 48 |             for<'a> &'a [Ty]: $trait,
 49 |         {
 50 |             minmaxlttb_generic_without_x(y, n_out, minmax_ratio, $f_minmax)
 51 |         }
 52 |     };
 53 | }
 54 | 
 55 | minmaxlttb_without_x!(minmaxlttb_without_x, ArgMinMax, minmax::min_max_without_x);
 56 | minmaxlttb_without_x!(
 57 |     minmaxlttb_without_x_nan,
 58 |     NaNArgMinMax,
 59 |     minmax::min_max_without_x_nan
 60 | );
 61 | 
 62 | // ------------------------------------- PARALLEL --------------------------------------
 63 | 
 64 | // ----------- WITH X
 65 | 
 66 | macro_rules! minmaxlttb_with_x_parallel {
 67 |     ($func_name:ident, $trait:ident, $f_minmax:expr) => {
 68 |         pub fn $func_name<Tx, Ty>(
 69 |             x: &[Tx],
 70 |             y: &[Ty],
 71 |             n_out: usize,
 72 |             minmax_ratio: usize,
 73 |         ) -> Vec<usize>
 74 |         where
 75 |             for<'a> &'a [Ty]: $trait,
 76 |             Tx: Num + AsPrimitive<f64> + FromPrimitive + Send + Sync,
 77 |             Ty: Num + AsPrimitive<f64> + Send + Sync,
 78 |         {
 79 |             minmaxlttb_generic(x, y, n_out, minmax_ratio, $f_minmax)
 80 |         }
 81 |     };
 82 | }
 83 | 
 84 | minmaxlttb_with_x_parallel!(
 85 |     minmaxlttb_with_x_parallel,
 86 |     ArgMinMax,
 87 |     minmax::min_max_with_x_parallel
 88 | );
 89 | minmaxlttb_with_x_parallel!(
 90 |     minmaxlttb_with_x_parallel_nan,
 91 |     NaNArgMinMax,
 92 |     minmax::min_max_with_x_parallel_nan
 93 | );
 94 | 
 95 | // ----------- WITHOUT X
 96 | 
 97 | macro_rules! minmaxlttb_without_x_parallel {
 98 |     ($func_name:ident, $trait:ident, $f_minmax:expr) => {
 99 |         pub fn $func_name<Ty: Num + AsPrimitive<f64> + Send + Sync>(
100 |             y: &[Ty],
101 |             n_out: usize,
102 |             minmax_ratio: usize,
103 |         ) -> Vec<usize>
104 |         where
105 |             for<'a> &'a [Ty]: $trait,
106 |         {
107 |             minmaxlttb_generic_without_x(y, n_out, minmax_ratio, $f_minmax)
108 |         }
109 |     };
110 | }
111 | 
112 | minmaxlttb_without_x_parallel!(
113 |     minmaxlttb_without_x_parallel,
114 |     ArgMinMax,
115 |     minmax::min_max_without_x_parallel
116 | );
117 | minmaxlttb_without_x_parallel!(
118 |     minmaxlttb_without_x_parallel_nan,
119 |     NaNArgMinMax,
120 |     minmax::min_max_without_x_parallel_nan
121 | );
122 | 
123 | // ----------------------------------- GENERICS ------------------------------------
124 | 
125 | #[inline(always)]
126 | pub(crate) fn minmaxlttb_generic<Tx: Num + AsPrimitive<f64>, Ty: Num + AsPrimitive<f64>>(
127 |     x: &[Tx],
128 |     y: &[Ty],
129 |     n_out: usize,
130 |     minmax_ratio: usize,
131 |     f_minmax: fn(&[Tx], &[Ty], usize) -> Vec<usize>,
132 | ) -> Vec<usize> {
133 |     assert_eq!(x.len(), y.len());
134 |     assert!(minmax_ratio > 1);
135 |     // Apply first min max aggregation (if above ratio)
136 |     if x.len() / n_out > minmax_ratio {
137 |         // Get index of min max points
138 |         let mut index = f_minmax(
139 |             &x[1..(x.len() - 1)],
140 |             &y[1..(x.len() - 1)],
141 |             n_out * minmax_ratio,
142 |         );
143 |         // inplace + 1
144 |         index.iter_mut().for_each(|elem| *elem += 1);
145 |         // Prepend first and last point
146 |         index.insert(0, 0);
147 |         index.push(x.len() - 1);
148 |         // Get x and y values at index
149 |         let x = unsafe {
150 |             index
151 |                 .iter()
152 |                 .map(|i| *x.get_unchecked(*i))
153 |                 .collect::<Vec<Tx>>()
154 |         };
155 |         let y = unsafe {
156 |             index
157 |                 .iter()
158 |                 .map(|i| *y.get_unchecked(*i))
159 |                 .collect::<Vec<Ty>>()
160 |         };
161 |         // Apply lttb on the reduced data
162 |         let index_points_selected = lttb_with_x(x.as_slice(), y.as_slice(), n_out);
163 |         // Return the original index
164 |         return index_points_selected
165 |             .iter()
166 |             .map(|i| index[*i])
167 |             .collect::<Vec<usize>>();
168 |     }
169 |     // Apply lttb on all data when requirement is not met
170 |     lttb_with_x(x, y, n_out)
171 | }
172 | 
173 | #[inline(always)]
174 | pub(crate) fn minmaxlttb_generic_without_x<Ty: Num + AsPrimitive<f64>>(
175 |     y: &[Ty],
176 |     n_out: usize,
177 |     minmax_ratio: usize,
178 |     f_minmax: fn(&[Ty], usize) -> Vec<usize>,
179 | ) -> Vec<usize> {
180 |     assert!(minmax_ratio > 1);
181 |     // Apply first min max aggregation (if above ratio)
182 |     if y.len() / n_out > minmax_ratio {
183 |         // Get index of min max points
184 |         let mut index = f_minmax(&y[1..(y.len() - 1)], n_out * minmax_ratio);
185 |         // inplace + 1
186 |         index.iter_mut().for_each(|elem| *elem += 1);
187 |         // Prepend first and last point
188 |         index.insert(0, 0);
189 |         index.push(y.len() - 1);
190 |         // Get y values at index
191 |         let y = unsafe {
192 |             index
193 |                 .iter()
194 |                 .map(|i| *y.get_unchecked(*i))
195 |                 .collect::<Vec<Ty>>()
196 |         };
197 |         // Apply lttb on the reduced data (using the preselect data its index)
198 |         let index_points_selected = lttb_with_x(index.as_slice(), y.as_slice(), n_out);
199 |         // Return the original index
200 |         return index_points_selected
201 |             .iter()
202 |             .map(|i| index[*i])
203 |             .collect::<Vec<usize>>();
204 |     }
205 |     // Apply lttb on all data when requirement is not met
206 |     lttb_without_x(y, n_out).to_vec()
207 | }
208 | 
209 | #[cfg(test)]
210 | mod tests {
211 |     use rstest::rstest;
212 |     use rstest_reuse::{self, *};
213 | 
214 |     use super::{minmaxlttb_with_x, minmaxlttb_without_x};
215 |     use super::{minmaxlttb_with_x_parallel, minmaxlttb_without_x_parallel};
216 | 
217 |     use dev_utils::utils;
218 | 
219 |     fn get_array_f32(n: usize) -> Vec<f32> {
220 |         utils::get_random_array(n, f32::MIN, f32::MAX)
221 |     }
222 | 
223 |     // Template for n_out
224 |     #[template]
225 |     #[rstest]
226 |     #[case(98)]
227 |     #[case(100)]
228 |     #[case(102)]
229 |     fn n_outs(#[case] n_out: usize) {}
230 | 
231 |     #[test]
232 |     fn test_minmaxlttb_with_x() {
233 |         let x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
234 |         let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
235 |         let sampled_indices = minmaxlttb_with_x(&x, &y, 4, 2);
236 |         assert_eq!(sampled_indices, vec![0, 1, 5, 9]);
237 |     }
238 | 
239 |     #[test]
240 |     fn test_minmaxlttb_without_x() {
241 |         let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
242 |         let sampled_indices = minmaxlttb_without_x(&y, 4, 2);
243 |         assert_eq!(sampled_indices, vec![0, 1, 5, 9]);
244 |     }
245 | 
246 |     #[test]
247 |     fn test_minmaxlttb_with_x_parallel() {
248 |         let x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
249 |         let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
250 |         let sampled_indices = minmaxlttb_with_x_parallel(&x, &y, 4, 2);
251 |         assert_eq!(sampled_indices, vec![0, 1, 5, 9]);
252 |     }
253 | 
254 |     #[test]
255 |     fn test_minmaxlttb_without_x_parallel() {
256 |         let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
257 |         let sampled_indices = minmaxlttb_without_x_parallel(&y, 4, 2);
258 |         assert_eq!(sampled_indices, vec![0, 1, 5, 9]);
259 |     }
260 | 
261 |     #[test]
262 |     fn test_same_output() {
263 |         let N: usize = 2001;
264 |         let n_out: usize = 100;
265 |         let y = (0..N).map(|v| v as f32).collect::<Vec<f32>>();
266 |         let x = (0..N as i32).collect::<Vec<i32>>();
267 |         let sampled_indices1 = minmaxlttb_with_x(&x, &y, n_out, 4);
268 |         let sampled_indices2 = minmaxlttb_without_x(&y, n_out, 4);
269 |         assert_eq!(sampled_indices1, sampled_indices2);
270 | 
271 |         let N: usize = 1001;
272 |         let n_out: usize = 26;
273 |         let y = (0..N).map(|v| v as f32).collect::<Vec<f32>>();
274 |         let x = (0..N as i32).collect::<Vec<i32>>();
275 |         let sampled_indices1 = minmaxlttb_with_x(&x, &y, n_out, 4);
276 |         let sampled_indices2 = minmaxlttb_without_x(&y, n_out, 4);
277 |         assert_eq!(sampled_indices1, sampled_indices2);
278 |     }
279 | 
280 |     #[apply(n_outs)]
281 |     fn test_many_random_runs_same_output(n_out: usize) {
282 |         const N: usize = 20_000;
283 |         const MINMAX_RATIO: usize = 5;
284 |         for _ in 0..100 {
285 |             // TODO: test with x
286 |             let arr = get_array_f32(N);
287 |             let idxs1 = minmaxlttb_without_x(arr.as_slice(), n_out, MINMAX_RATIO);
288 |             let idxs2 = minmaxlttb_without_x_parallel(arr.as_slice(), n_out, MINMAX_RATIO);
289 |             assert_eq!(idxs1, idxs2);
290 |         }
291 |     }
292 | }
293 | 


--------------------------------------------------------------------------------
/tests/benchmarks/test_downsamplers.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from tsdownsample import (
  5 |     EveryNthDownsampler,
  6 |     LTTBDownsampler,
  7 |     M4Downsampler,
  8 |     MinMaxDownsampler,
  9 |     MinMaxLTTBDownsampler,
 10 |     NaNM4Downsampler,
 11 |     NaNMinMaxDownsampler,
 12 |     NaNMinMaxLTTBDownsampler,
 13 | )
 14 | 
 15 | NB_SAMPLES = ["100,000", "1,000,000"]
 16 | N_OUT = ["100", "1,000", "5,000"]
 17 | Y_DTYPES = [np.float32, np.float64] + [np.int32, np.int64]
 18 | 
 19 | 
 20 | # --------------------------------------------------------------------------- #
 21 | #                               MinMaxDownsampler
 22 | # --------------------------------------------------------------------------- #
 23 | 
 24 | 
 25 | @pytest.mark.benchmark(group="minmax")
 26 | @pytest.mark.parametrize("n_samples", NB_SAMPLES)
 27 | @pytest.mark.parametrize("n_out", N_OUT)
 28 | @pytest.mark.parametrize("dtype", Y_DTYPES)
 29 | @pytest.mark.parametrize("parallel", [False, True])
 30 | def test_minmax_no_x(benchmark, n_samples, n_out, dtype, parallel):
 31 |     """Test the MinMaxDownsampler."""
 32 |     downsampler = MinMaxDownsampler()
 33 |     n_samples = int(n_samples.replace(",", ""))
 34 |     n_out = int(n_out.replace(",", ""))
 35 | 
 36 |     y = np.random.randn(n_samples).astype(dtype)
 37 | 
 38 |     benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel)
 39 | 
 40 | 
 41 | @pytest.mark.benchmark(group="minmax")
 42 | @pytest.mark.parametrize("n_samples", NB_SAMPLES)
 43 | @pytest.mark.parametrize("n_out", N_OUT)
 44 | @pytest.mark.parametrize("dtype", Y_DTYPES)
 45 | @pytest.mark.parametrize("parallel", [False, True])
 46 | def test_minmax_with_x(benchmark, n_samples, n_out, dtype, parallel):
 47 |     """Test the MinMaxDownsampler."""
 48 |     downsampler = MinMaxDownsampler()
 49 |     n_samples = int(n_samples.replace(",", ""))
 50 |     n_out = int(n_out.replace(",", ""))
 51 | 
 52 |     x = np.arange(n_samples)
 53 |     y = np.random.randn(n_samples).astype(dtype)
 54 | 
 55 |     benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel)
 56 | 
 57 | 
 58 | @pytest.mark.benchmark(group="nanminmax")
 59 | @pytest.mark.parametrize("n_samples", NB_SAMPLES)
 60 | @pytest.mark.parametrize("n_out", N_OUT)
 61 | @pytest.mark.parametrize("dtype", Y_DTYPES)
 62 | @pytest.mark.parametrize("parallel", [False, True])
 63 | def test_nanminmax_no_x(benchmark, n_samples, n_out, dtype, parallel):
 64 |     """Test the MinMaxDownsampler."""
 65 |     downsampler = NaNMinMaxDownsampler()
 66 |     n_samples = int(n_samples.replace(",", ""))
 67 |     n_out = int(n_out.replace(",", ""))
 68 | 
 69 |     y = np.random.randn(n_samples).astype(dtype)
 70 | 
 71 |     benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel)
 72 | 
 73 | 
 74 | @pytest.mark.benchmark(group="nanminmax")
 75 | @pytest.mark.parametrize("n_samples", NB_SAMPLES)
 76 | @pytest.mark.parametrize("n_out", N_OUT)
 77 | @pytest.mark.parametrize("dtype", Y_DTYPES)
 78 | @pytest.mark.parametrize("parallel", [False, True])
 79 | def test_nanminmax_with_x(benchmark, n_samples, n_out, dtype, parallel):
 80 |     """Test the MinMaxDownsampler."""
 81 |     downsampler = NaNMinMaxDownsampler()
 82 |     n_samples = int(n_samples.replace(",", ""))
 83 |     n_out = int(n_out.replace(",", ""))
 84 | 
 85 |     x = np.arange(n_samples)
 86 |     y = np.random.randn(n_samples).astype(dtype)
 87 | 
 88 |     benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel)
 89 | 
 90 | 
 91 | # --------------------------------------------------------------------------- #
 92 | #                               M4Downsampler
 93 | # --------------------------------------------------------------------------- #
 94 | 
 95 | 
 96 | @pytest.mark.benchmark(group="m4")
 97 | @pytest.mark.parametrize("n_samples", NB_SAMPLES)
 98 | @pytest.mark.parametrize("n_out", N_OUT)
 99 | @pytest.mark.parametrize("dtype", Y_DTYPES)
100 | @pytest.mark.parametrize("parallel", [False, True])
101 | def test_m4_no_x(benchmark, n_samples, n_out, dtype, parallel):
102 |     """Test the M4Downsampler."""
103 |     downsampler = M4Downsampler()
104 |     n_samples = int(n_samples.replace(",", ""))
105 |     n_out = int(n_out.replace(",", ""))
106 | 
107 |     y = np.random.randn(n_samples).astype(dtype)
108 | 
109 |     benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel)
110 | 
111 | 
112 | @pytest.mark.benchmark(group="m4")
113 | @pytest.mark.parametrize("n_samples", NB_SAMPLES)
114 | @pytest.mark.parametrize("n_out", N_OUT)
115 | @pytest.mark.parametrize("dtype", Y_DTYPES)
116 | @pytest.mark.parametrize("parallel", [False, True])
117 | def test_m4_with_x(benchmark, n_samples, n_out, dtype, parallel):
118 |     """Test the M4Downsampler."""
119 |     downsampler = M4Downsampler()
120 |     n_samples = int(n_samples.replace(",", ""))
121 |     n_out = int(n_out.replace(",", ""))
122 | 
123 |     x = np.arange(n_samples)
124 |     y = np.random.randn(n_samples).astype(dtype)
125 | 
126 |     benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel)
127 | 
128 | 
129 | @pytest.mark.benchmark(group="nanm4")
130 | @pytest.mark.parametrize("n_samples", NB_SAMPLES)
131 | @pytest.mark.parametrize("n_out", N_OUT)
132 | @pytest.mark.parametrize("dtype", Y_DTYPES)
133 | @pytest.mark.parametrize("parallel", [False, True])
134 | def test_nanm4_no_x(benchmark, n_samples, n_out, dtype, parallel):
135 |     """Test the M4Downsampler."""
136 |     downsampler = NaNM4Downsampler()
137 |     n_samples = int(n_samples.replace(",", ""))
138 |     n_out = int(n_out.replace(",", ""))
139 | 
140 |     y = np.random.randn(n_samples).astype(dtype)
141 | 
142 |     benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel)
143 | 
144 | 
145 | @pytest.mark.benchmark(group="nanm4")
146 | @pytest.mark.parametrize("n_samples", NB_SAMPLES)
147 | @pytest.mark.parametrize("n_out", N_OUT)
148 | @pytest.mark.parametrize("dtype", Y_DTYPES)
149 | @pytest.mark.parametrize("parallel", [False, True])
150 | def test_nanm4_with_x(benchmark, n_samples, n_out, dtype, parallel):
151 |     """Test the M4Downsampler."""
152 |     downsampler = NaNM4Downsampler()
153 |     n_samples = int(n_samples.replace(",", ""))
154 |     n_out = int(n_out.replace(",", ""))
155 | 
156 |     x = np.arange(n_samples)
157 |     y = np.random.randn(n_samples).astype(dtype)
158 | 
159 |     benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel)
160 | 
161 | 
162 | # --------------------------------------------------------------------------- #
163 | #                              LTTBDownsampler
164 | # --------------------------------------------------------------------------- #
165 | 
166 | 
167 | @pytest.mark.benchmark(group="lttb")
168 | @pytest.mark.parametrize("n_samples", NB_SAMPLES)
169 | @pytest.mark.parametrize("n_out", N_OUT)
170 | @pytest.mark.parametrize("dtype", Y_DTYPES)
171 | @pytest.mark.parametrize("parallel", [False, True])
172 | def test_lttb_no_x(benchmark, n_samples, n_out, dtype, parallel):
173 |     """Test the LTTBDownsampler."""
174 |     downsampler = LTTBDownsampler()
175 |     n_samples = int(n_samples.replace(",", ""))
176 |     n_out = int(n_out.replace(",", ""))
177 | 
178 |     y = np.random.randn(n_samples).astype(dtype)
179 | 
180 |     benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel)
181 | 
182 | 
183 | @pytest.mark.benchmark(group="lttb")
184 | @pytest.mark.parametrize("n_samples", NB_SAMPLES)
185 | @pytest.mark.parametrize("n_out", N_OUT)
186 | @pytest.mark.parametrize("dtype", Y_DTYPES)
187 | @pytest.mark.parametrize("parallel", [False, True])
188 | def test_lttb_with_x(benchmark, n_samples, n_out, dtype, parallel):
189 |     """Test the LTTBDownsampler."""
190 |     downsampler = LTTBDownsampler()
191 |     n_samples = int(n_samples.replace(",", ""))
192 |     n_out = int(n_out.replace(",", ""))
193 | 
194 |     x = np.arange(n_samples)
195 |     y = np.random.randn(n_samples).astype(dtype)
196 | 
197 |     benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel)
198 | 
199 | 
200 | # --------------------------------------------------------------------------- #
201 | #                          MinMaxLTTBDownsampler
202 | # --------------------------------------------------------------------------- #
203 | 
204 | 
205 | @pytest.mark.benchmark(group="minmaxlttb")
206 | @pytest.mark.parametrize("n_samples", NB_SAMPLES)
207 | @pytest.mark.parametrize("n_out", N_OUT)
208 | @pytest.mark.parametrize("dtype", Y_DTYPES)
209 | @pytest.mark.parametrize("parallel", [False, True])
210 | def test_minmaxlttb_no_x(benchmark, n_samples, n_out, dtype, parallel):
211 |     """Test the MinMaxLTTBDownsampler."""
212 |     downsampler = MinMaxLTTBDownsampler()
213 |     n_samples = int(n_samples.replace(",", ""))
214 |     n_out = int(n_out.replace(",", ""))
215 | 
216 |     y = np.random.randn(n_samples).astype(dtype)
217 | 
218 |     benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel)
219 | 
220 | 
221 | @pytest.mark.benchmark(group="minmaxlttb")
222 | @pytest.mark.parametrize("n_samples", NB_SAMPLES)
223 | @pytest.mark.parametrize("n_out", N_OUT)
224 | @pytest.mark.parametrize("dtype", Y_DTYPES)
225 | @pytest.mark.parametrize("parallel", [False, True])
226 | def test_minmaxlttb_with_x(benchmark, n_samples, n_out, dtype, parallel):
227 |     """Test the MinMaxLTTBDownsampler."""
228 |     downsampler = MinMaxLTTBDownsampler()
229 |     n_samples = int(n_samples.replace(",", ""))
230 |     n_out = int(n_out.replace(",", ""))
231 | 
232 |     x = np.arange(n_samples)
233 |     y = np.random.randn(n_samples).astype(dtype)
234 | 
235 |     benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel)
236 | 
237 | 
238 | @pytest.mark.benchmark(group="nanminmaxlttb")
239 | @pytest.mark.parametrize("n_samples", NB_SAMPLES)
240 | @pytest.mark.parametrize("n_out", N_OUT)
241 | @pytest.mark.parametrize("dtype", Y_DTYPES)
242 | @pytest.mark.parametrize("parallel", [False, True])
243 | def test_nanminmaxlttb_no_x(benchmark, n_samples, n_out, dtype, parallel):
244 |     """Test the MinMaxLTTBDownsampler."""
245 |     downsampler = NaNMinMaxLTTBDownsampler()
246 |     n_samples = int(n_samples.replace(",", ""))
247 |     n_out = int(n_out.replace(",", ""))
248 | 
249 |     y = np.random.randn(n_samples).astype(dtype)
250 | 
251 |     benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel)
252 | 
253 | 
254 | @pytest.mark.benchmark(group="nanminmaxlttb")
255 | @pytest.mark.parametrize("n_samples", NB_SAMPLES)
256 | @pytest.mark.parametrize("n_out", N_OUT)
257 | @pytest.mark.parametrize("dtype", Y_DTYPES)
258 | @pytest.mark.parametrize("parallel", [False, True])
259 | def test_nanminmaxlttb_with_x(benchmark, n_samples, n_out, dtype, parallel):
260 |     """Test the MinMaxLTTBDownsampler."""
261 |     downsampler = NaNMinMaxLTTBDownsampler()
262 |     n_samples = int(n_samples.replace(",", ""))
263 |     n_out = int(n_out.replace(",", ""))
264 | 
265 |     x = np.arange(n_samples)
266 |     y = np.random.randn(n_samples).astype(dtype)
267 | 
268 |     benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel)
269 | 
270 | 
271 | # --------------------------------------------------------------------------- #
272 | #                             EveryNthDownsampler
273 | # --------------------------------------------------------------------------- #
274 | 
275 | 
276 | @pytest.mark.benchmark(group="everynth")
277 | @pytest.mark.parametrize("n_samples", NB_SAMPLES)
278 | @pytest.mark.parametrize("n_out", N_OUT)
279 | def test_everynth(benchmark, n_samples, n_out):
280 |     """Test the EveryNthDownsampler."""
281 |     downsampler = EveryNthDownsampler()
282 |     n_samples = int(n_samples.replace(",", ""))
283 |     n_out = int(n_out.replace(",", ""))
284 | 
285 |     y = np.random.randn(n_samples)
286 | 
287 |     benchmark(downsampler.downsample, y, n_out=n_out)
288 | 


--------------------------------------------------------------------------------
/downsample_rs/src/searchsorted.rs:
--------------------------------------------------------------------------------
  1 | use rayon::iter::IndexedParallelIterator;
  2 | use rayon::prelude::*;
  3 | 
  4 | use super::types::Num;
  5 | use super::POOL;
  6 | use num_traits::{AsPrimitive, FromPrimitive};
  7 | 
  8 | const EPSILON: f64 = 1e-12; // Small value to avoid precision errors
  9 | 
 10 | // ---------------------- Binary search ----------------------
 11 | 
 12 | /// Binary search for the index position of the given value in the given array.
 13 | /// The array must be sorted in ascending order and contain no duplicates.
 14 | ///
 15 | /// Complies with the Python bisect function
 16 | /// https://docs.python.org/3/library/bisect.html#bisect.bisect
 17 | ///
 18 | // #[inline(always)]
 19 | fn binary_search<T: Copy + PartialOrd>(arr: &[T], value: T, left: usize, right: usize) -> usize {
 20 |     let mut size: usize = right - left;
 21 |     let mut left: usize = left;
 22 |     let mut right: usize = right;
 23 |     // Return the index where the value is >= arr[index] and arr[index-1] < value
 24 |     while left < right {
 25 |         let mid = left + size / 2;
 26 |         if arr[mid] < value {
 27 |             left = mid + 1;
 28 |         } else {
 29 |             right = mid;
 30 |         }
 31 |         size = right - left;
 32 |     }
 33 |     if arr[left] <= value {
 34 |         left + 1
 35 |     } else {
 36 |         left
 37 |     }
 38 | }
 39 | 
 40 | /// Binary search for the index position of the given value in the given array.
 41 | /// The array must be sorted in ascending order and contain no duplicates.
 42 | ///
 43 | /// The mid index is pre-guessed to speed up the search.
 44 | ///
 45 | /// Complies with the Python bisect function
 46 | /// https://docs.python.org/3/library/bisect.html#bisect.bisect
 47 | ///
 48 | // #[inline(always)]
 49 | fn binary_search_with_mid<T: Copy + PartialOrd>(
 50 |     arr: &[T],
 51 |     value: T,
 52 |     left: usize,
 53 |     right: usize,
 54 |     mid: usize,
 55 | ) -> usize {
 56 |     assert!(mid >= left || mid <= right);
 57 |     let mut left: usize = left;
 58 |     let mut right: usize = right;
 59 |     let mut mid: usize = mid;
 60 |     // Return the index where the value is <= arr[index] and arr[index+1] < value
 61 |     while left < right {
 62 |         if arr[mid] < value {
 63 |             left = mid + 1;
 64 |         } else {
 65 |             right = mid;
 66 |         }
 67 |         let size = right - left;
 68 |         mid = left + size / 2;
 69 |     }
 70 |     if arr[left] <= value {
 71 |         left + 1
 72 |     } else {
 73 |         left
 74 |     }
 75 | }
 76 | 
 77 | // ------------------- Equidistant binning --------------------
 78 | 
 79 | #[inline(always)]
 80 | fn sequential_add_mul(start_val: f64, add_val: f64, mul: usize, epsilon: f64) -> f64 {
 81 |     // start_val + add_val * mul will sometimes overflow when add_val * mul is
 82 |     // larger than the largest positive f64 number.
 83 |     // This code should not fail when: (f64::MAX - start_val) < (add_val * mul).
 84 |     //   -> Note that f64::MAX - start_val can be up to 2 * f64::MAX.
 85 |     let mul_2: f64 = mul as f64 / 2.0;
 86 |     // start_val + add_val * mul_2 as f64 + add_val * (mul - mul_2) as f64
 87 |     start_val + add_val * mul_2 + add_val * mul_2 + epsilon
 88 | }
 89 | 
 90 | // --- Sequential version
 91 | 
 92 | pub(crate) fn get_equidistant_bin_idx_iterator<T>(
 93 |     arr: &[T],
 94 |     nb_bins: usize,
 95 | ) -> impl Iterator<Item = Option<(usize, usize)>> + '_
 96 | where
 97 |     T: Num + FromPrimitive + AsPrimitive<f64>,
 98 | {
 99 |     assert!(nb_bins >= 2);
100 |     // 1. Compute the step between each bin
101 |     // Divide by nb_bins to avoid overflow!
102 |     let val_step: f64 =
103 |         (arr[arr.len() - 1].as_() / nb_bins as f64) - (arr[0].as_() / nb_bins as f64);
104 |     // Estimate the step between each index (used to pre-guess the mid index)
105 |     let idx_step: usize = arr.len() / nb_bins;
106 | 
107 |     // 2. The moving index & value
108 |     let arr0: f64 = arr[0].as_(); // The first value of the array
109 |     let mut idx: usize = 0; // Index of the search value
110 | 
111 |     // 3. Iterate over the bins
112 |     (0..nb_bins).map(move |i| {
113 |         let start_idx: usize = idx; // Start index of the bin (previous end index)
114 | 
115 |         // Update the search value
116 |         let search_value: T =
117 |             T::from_f64(sequential_add_mul(arr0, val_step, i + 1, EPSILON)).unwrap();
118 |         if arr[start_idx] >= search_value {
119 |             // If the first value of the bin is already >= the search value,
120 |             // then the bin is empty.
121 |             return None;
122 |         }
123 |         // Update the pre-guess index
124 |         let mid: usize = std::cmp::min(idx + idx_step, arr.len() - 2);
125 |         // TODO: Implementation WITHOUT pre-guessing mid is slower!!
126 |         idx = binary_search_with_mid(arr, search_value, idx, arr.len() - 1, mid); // End index of the bin
127 |         Some((start_idx, idx))
128 |     })
129 | }
130 | 
131 | // --- Parallel version
132 | 
133 | pub(crate) fn get_equidistant_bin_idx_iterator_parallel<T>(
134 |     arr: &[T],
135 |     nb_bins: usize,
136 | ) -> impl IndexedParallelIterator<Item = impl Iterator<Item = Option<(usize, usize)>> + '_> + '_
137 | where
138 |     T: Num + FromPrimitive + AsPrimitive<f64> + Sync + Send,
139 | {
140 |     assert!(nb_bins >= 2);
141 |     // 1. Compute the step between each bin
142 |     // Divide by nb_bins to avoid overflow!
143 |     let val_step: f64 =
144 |         (arr[arr.len() - 1].as_() / nb_bins as f64) - (arr[0].as_() / nb_bins as f64);
145 |     let arr0: f64 = arr[0].as_(); // The first value of the array
146 | 
147 |     // 2. Compute the number of threads & bins per thread
148 |     let n_threads = std::cmp::min(POOL.current_num_threads(), nb_bins);
149 |     let nb_bins_per_thread = nb_bins / n_threads;
150 |     let nb_bins_last_thread = nb_bins - nb_bins_per_thread * (n_threads - 1);
151 | 
152 |     // 3. Iterate over the number of threads
153 |     // -> for each thread perform the binary search sorted with moving left and
154 |     // yield the indices (using the same idea as for the sequential version)
155 |     (0..n_threads).into_par_iter().map(move |i| {
156 |         // The moving index & value (for the thread)
157 |         let arr0_thr: f64 = sequential_add_mul(arr0, val_step, i * nb_bins_per_thread, EPSILON); // Search value
158 |         let start_value: T = T::from_f64(arr0_thr).unwrap();
159 |         // Search the start of the fist bin (of the thread)
160 |         let mut idx: usize = 0; // Index of the search value
161 |         if i > 0 {
162 |             idx = binary_search(arr, start_value, 0, arr.len() - 1);
163 |         }
164 | 
165 |         // The number of bins for the thread
166 |         let nb_bins_thread = if i == n_threads - 1 {
167 |             nb_bins_last_thread
168 |         } else {
169 |             nb_bins_per_thread
170 |         };
171 |         // Perform sequential binary search for the end of the bins (of the thread)
172 |         (0..nb_bins_thread).map(move |i| {
173 |             let start_idx: usize = idx; // Start index of the bin (previous end index)
174 | 
175 |             // Update the search value
176 |             let search_value: T = T::from_f64(arr0_thr + val_step * (i + 1) as f64).unwrap();
177 |             if arr[start_idx] >= search_value {
178 |                 // If the first value of the bin is already >= the search value,
179 |                 // then the bin is empty.
180 |                 return None;
181 |             }
182 |             idx = binary_search(arr, search_value, idx, arr.len() - 1); // End index of the bin
183 |             Some((start_idx, idx))
184 |         })
185 |     })
186 | }
187 | 
188 | // --------------------------------------- TESTS ---------------------------------------
189 | 
190 | #[cfg(test)]
191 | mod tests {
192 |     use rstest::rstest;
193 |     use rstest_reuse::{self, *};
194 | 
195 |     use super::*;
196 | 
197 |     use dev_utils::utils::get_random_array;
198 | 
199 |     // Template for nb_bins
200 |     #[template]
201 |     #[rstest]
202 |     #[case(99)]
203 |     #[case(100)]
204 |     #[case(101)]
205 |     fn nb_bins(#[case] nb_bins: usize) {}
206 | 
207 |     #[test]
208 |     fn test_sequential_add_mul() {
209 |         assert_eq!(sequential_add_mul(0.0, 1.0, 0, 0.0), 0.0);
210 |         assert_eq!(sequential_add_mul(-1.0, 1.0, 1, 0.0), 0.0);
211 |         assert_eq!(sequential_add_mul(-1.0, 1.0, 1, EPSILON), EPSILON);
212 |         // Really large values
213 |         assert_eq!(sequential_add_mul(0.0, 1.0, 1_000_000, 0.0), 1_000_000.0);
214 |         assert!(sequential_add_mul(f64::MIN, f64::MAX / 2.0, 3, 0.0) < f64::MAX,);
215 |         // TODO: the next tests fails due to very minor precision error
216 |         // -> however, this precision error is needed to avoid the issue with m4_with_x
217 |         // assert_eq!(
218 |         //     sequential_add_mul(f64::MIN, f64::MAX / 2.0, 3, 0.0),
219 |         //     f64::MIN + f64::MAX / 2.0 + f64::MAX
220 |         // );
221 |     }
222 | 
223 |     #[test]
224 |     fn test_search_sorted_identicial_to_np_linspace_searchsorted() {
225 |         // Create a 0..9999 array
226 |         let arr: [u32; 10_000] = core::array::from_fn(|i| i.as_());
227 |         assert!(arr.len() == 10_000);
228 |         let iterator = get_equidistant_bin_idx_iterator(&arr, 4);
229 |         // Check the iterator
230 |         let mut idx: usize = 0;
231 |         for bin in iterator {
232 |             let (start_idx, end_idx) = bin.unwrap();
233 |             assert!(start_idx == idx);
234 |             assert!(end_idx == idx + 2_500);
235 |             idx += 2_500;
236 |         }
237 |     }
238 | 
239 |     #[test]
240 |     fn test_binary_search() {
241 |         let arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
242 |         assert_eq!(binary_search(&arr, 0, 0, arr.len() - 1), 0);
243 |         assert_eq!(binary_search(&arr, 1, 0, arr.len() - 1), 1);
244 |         assert_eq!(binary_search(&arr, 2, 0, arr.len() - 1), 2);
245 |         assert_eq!(binary_search(&arr, 3, 0, arr.len() - 1), 3);
246 |         assert_eq!(binary_search(&arr, 4, 0, arr.len() - 1), 4);
247 |         assert_eq!(binary_search(&arr, 5, 0, arr.len() - 1), 5);
248 |         assert_eq!(binary_search(&arr, 6, 0, arr.len() - 1), 6);
249 |         assert_eq!(binary_search(&arr, 7, 0, arr.len() - 1), 7);
250 |         assert_eq!(binary_search(&arr, 8, 0, arr.len() - 1), 8);
251 |         assert_eq!(binary_search(&arr, 9, 0, arr.len() - 1), 9);
252 |         assert_eq!(binary_search(&arr, 10, 0, arr.len() - 1), 10);
253 |         assert_eq!(binary_search(&arr, 11, 0, arr.len() - 1), 10);
254 |     }
255 | 
256 |     #[test]
257 |     fn test_binary_search_with_mid() {
258 |         let arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
259 |         assert_eq!(binary_search_with_mid(&arr, 0, 0, arr.len() - 1, 0), 0);
260 |         assert_eq!(binary_search_with_mid(&arr, 1, 0, arr.len() - 1, 0), 1);
261 |         assert_eq!(binary_search_with_mid(&arr, 2, 0, arr.len() - 1, 1), 2);
262 |         assert_eq!(binary_search_with_mid(&arr, 3, 0, arr.len() - 1, 2), 3);
263 |         assert_eq!(binary_search_with_mid(&arr, 4, 0, arr.len() - 1, 3), 4);
264 |         assert_eq!(binary_search_with_mid(&arr, 5, 0, arr.len() - 1, 4), 5);
265 |         assert_eq!(binary_search_with_mid(&arr, 6, 0, arr.len() - 1, 5), 6);
266 |         assert_eq!(binary_search_with_mid(&arr, 7, 0, arr.len() - 1, 6), 7);
267 |         assert_eq!(binary_search_with_mid(&arr, 8, 0, arr.len() - 1, 7), 8);
268 |         assert_eq!(binary_search_with_mid(&arr, 9, 0, arr.len() - 1, 8), 9);
269 |         assert_eq!(binary_search_with_mid(&arr, 10, 0, arr.len() - 1, 9), 10);
270 |         // this line causes the code to crash -> because value higher than arr[mid]
271 |         // assert_eq!(binary_search_with_mid(&arr, 11, 0, arr.len() - 1, 9), 10);
272 |     }
273 | 
274 |     #[test]
275 |     fn test_get_equidistant_bin_idxs() {
276 |         let expected_indices = vec![0, 4, 7];
277 | 
278 |         let arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
279 |         let bin_idxs_iter = get_equidistant_bin_idx_iterator(&arr, 3);
280 |         let bin_idxs = bin_idxs_iter.map(|x| x.unwrap().0).collect::<Vec<usize>>();
281 |         assert_eq!(bin_idxs, expected_indices);
282 | 
283 |         let bin_idxs_iter = get_equidistant_bin_idx_iterator_parallel(&arr, 3);
284 |         let bin_idxs = bin_idxs_iter
285 |             .map(|x| x.map(|x| x.unwrap().0).collect::<Vec<usize>>())
286 |             .flatten()
287 |             .collect::<Vec<usize>>();
288 |         assert_eq!(bin_idxs, expected_indices);
289 |     }
290 | 
291 |     #[apply(nb_bins)]
292 |     fn test_many_random_same_result(nb_bins: usize) {
293 |         let n = 5_000;
294 | 
295 |         for _ in 0..100 {
296 |             let mut arr = get_random_array::<i32>(n, i32::MIN, i32::MAX);
297 |             // Sort the array
298 |             arr.sort_by(|a, b| a.partial_cmp(b).unwrap());
299 | 
300 |             // Calculate the bin indexes
301 |             let bin_idxs_iter = get_equidistant_bin_idx_iterator(&arr[..], nb_bins);
302 |             let bin_idxs = bin_idxs_iter.map(|x| x.unwrap().0).collect::<Vec<usize>>();
303 | 
304 |             // Calculate the bin indexes in parallel
305 |             let bin_idxs_iter = get_equidistant_bin_idx_iterator_parallel(&arr[..], nb_bins);
306 |             let bin_idxs_parallel = bin_idxs_iter
307 |                 .map(|x| x.map(|x| x.unwrap().0).collect::<Vec<usize>>())
308 |                 .flatten()
309 |                 .collect::<Vec<usize>>();
310 | 
311 |             // Check that the results are the same
312 |             assert_eq!(bin_idxs, bin_idxs_parallel);
313 |         }
314 |     }
315 | }
316 | 


--------------------------------------------------------------------------------
/tests/test_tsdownsample.py:
--------------------------------------------------------------------------------
  1 | from typing import Iterable
  2 | 
  3 | import numpy as np
  4 | import pytest
  5 | from test_config import supported_dtypes_x, supported_dtypes_y
  6 | 
  7 | from tsdownsample import (  # MeanDownsampler,; MedianDownsampler,
  8 |     EveryNthDownsampler,
  9 |     LTTBDownsampler,
 10 |     M4Downsampler,
 11 |     MinMaxDownsampler,
 12 |     MinMaxLTTBDownsampler,
 13 |     NaNM4Downsampler,
 14 |     NaNMinMaxDownsampler,
 15 |     NaNMinMaxLTTBDownsampler,
 16 | )
 17 | from tsdownsample.downsampling_interface import (
 18 |     AbstractDownsampler,
 19 |     AbstractRustNaNDownsampler,
 20 | )
 21 | 
 22 | # TODO: Improve tests
 23 | #   - compare implementations with existing plotly_resampler implementations
 24 | 
 25 | 
 26 | RUST_DOWNSAMPLERS = [
 27 |     MinMaxDownsampler(),
 28 |     M4Downsampler(),
 29 |     LTTBDownsampler(),
 30 |     MinMaxLTTBDownsampler(),
 31 | ]
 32 | 
 33 | RUST_NAN_DOWNSAMPLERS = [
 34 |     NaNMinMaxDownsampler(),
 35 |     NaNM4Downsampler(),
 36 |     NaNMinMaxLTTBDownsampler(),
 37 | ]
 38 | 
 39 | OTHER_DOWNSAMPLERS = [EveryNthDownsampler()]
 40 | 
 41 | 
 42 | def generate_rust_downsamplers() -> Iterable[AbstractDownsampler]:
 43 |     for downsampler in RUST_DOWNSAMPLERS + RUST_NAN_DOWNSAMPLERS:
 44 |         yield downsampler
 45 | 
 46 | 
 47 | def generate_rust_nan_downsamplers() -> Iterable[AbstractDownsampler]:
 48 |     for downsampler in RUST_NAN_DOWNSAMPLERS:
 49 |         yield downsampler
 50 | 
 51 | 
 52 | def generate_all_downsamplers() -> Iterable[AbstractDownsampler]:
 53 |     for downsampler in RUST_DOWNSAMPLERS + RUST_NAN_DOWNSAMPLERS + OTHER_DOWNSAMPLERS:
 54 |         yield downsampler
 55 | 
 56 | 
 57 | def generate_datapoints():
 58 |     N_DATAPOINTS = 10_000
 59 |     return np.arange(N_DATAPOINTS)
 60 | 
 61 | 
 62 | def generate_nan_datapoints():
 63 |     N_DATAPOINTS = 10_000
 64 |     datapoints = np.arange(N_DATAPOINTS, dtype=np.float64)
 65 |     datapoints[0] = np.nan
 66 |     datapoints[9960] = np.nan
 67 |     return datapoints
 68 | 
 69 | 
 70 | @pytest.mark.parametrize("downsampler", generate_all_downsamplers())
 71 | def test_serialization_copy(downsampler: AbstractDownsampler):
 72 |     """Test serialization."""
 73 |     from copy import copy, deepcopy
 74 | 
 75 |     dc = copy(downsampler)
 76 |     ddc = deepcopy(downsampler)
 77 | 
 78 |     arr = generate_datapoints()
 79 | 
 80 |     orig_downsampled = downsampler.downsample(arr, n_out=100)
 81 |     dc_downsampled = dc.downsample(arr, n_out=100)
 82 |     ddc_downsampled = ddc.downsample(arr, n_out=100)
 83 |     assert np.all(orig_downsampled == dc_downsampled)
 84 |     assert np.all(orig_downsampled == ddc_downsampled)
 85 | 
 86 | 
 87 | @pytest.mark.parametrize("downsampler", generate_all_downsamplers())
 88 | def test_serialization_pickle(downsampler: AbstractDownsampler):
 89 |     """Test serialization."""
 90 |     import pickle
 91 | 
 92 |     dc = pickle.loads(pickle.dumps(downsampler))
 93 | 
 94 |     arr = generate_datapoints()
 95 |     orig_downsampled = downsampler.downsample(arr, n_out=100)
 96 |     dc_downsampled = dc.downsample(arr, n_out=100)
 97 |     assert np.all(orig_downsampled == dc_downsampled)
 98 | 
 99 | 
100 | @pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
101 | def test_rust_downsampler(downsampler: AbstractDownsampler):
102 |     """Test the Rust downsamplers."""
103 |     arr = generate_datapoints()
104 |     s_downsampled = downsampler.downsample(arr, n_out=100)
105 |     assert s_downsampled[0] == 0
106 |     assert s_downsampled[-1] == len(arr) - 1
107 | 
108 | 
109 | @pytest.mark.parametrize("downsampler", generate_rust_nan_downsamplers())
110 | def test_rust_nan_downsampler(downsampler: AbstractRustNaNDownsampler):
111 |     """Test the Rust NaN downsamplers."""
112 |     datapoints = generate_nan_datapoints()
113 |     s_downsampled = downsampler.downsample(datapoints, n_out=100)
114 |     print(s_downsampled)
115 |     assert s_downsampled[0] == 0
116 |     assert s_downsampled[-2] == 9960
117 |     assert s_downsampled[50] != np.nan
118 | 
119 | 
120 | def test_everynth_downsampler():
121 |     """Test EveryNth downsampler."""
122 |     arr = np.arange(10_000)
123 |     downsampler = EveryNthDownsampler()
124 |     s_downsampled = downsampler.downsample(arr, n_out=100)
125 |     assert s_downsampled[0] == 0
126 |     assert s_downsampled[-1] == 9_900
127 | 
128 | 
129 | @pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
130 | def test_parallel_downsampling(downsampler: AbstractDownsampler):
131 |     """Test parallel downsampling."""
132 |     arr = np.random.randn(10_000).astype(np.float32)
133 |     s_downsampled = downsampler.downsample(arr, n_out=100)
134 |     s_downsampled_p = downsampler.downsample(arr, n_out=100, parallel=True)
135 |     assert np.all(s_downsampled == s_downsampled_p)
136 | 
137 | 
138 | @pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
139 | def test_parallel_downsampling_with_x(downsampler: AbstractDownsampler):
140 |     """Test parallel downsampling with x."""
141 |     arr = np.random.randn(10_001).astype(np.float32)  # 10_001 to test edge case
142 |     idx = np.arange(len(arr))
143 |     s_downsampled = downsampler.downsample(idx, arr, n_out=100)
144 |     s_downsampled_p = downsampler.downsample(idx, arr, n_out=100, parallel=True)
145 |     assert np.all(s_downsampled == s_downsampled_p)
146 | 
147 | 
148 | @pytest.mark.parametrize("downsampler", generate_all_downsamplers())
149 | def test_downsampling_with_x(downsampler: AbstractDownsampler):
150 |     """Test downsampling with x."""
151 |     arr = np.random.randn(2_001).astype(np.float32)  # 2_001 to test edge case
152 |     idx = np.arange(len(arr))
153 |     s_downsampled = downsampler.downsample(arr, n_out=100)
154 |     s_downsampled_x = downsampler.downsample(idx, arr, n_out=100)
155 |     assert np.all(s_downsampled == s_downsampled_x)
156 | 
157 | 
158 | @pytest.mark.parametrize("downsampler", generate_all_downsamplers())
159 | def test_downsampling_with_gaps_in_x(downsampler: AbstractDownsampler):
160 |     """Test downsampling with gaps in x.
161 | 
162 |     With gap we do NOT mean a NaN in the array, but a large gap in the x values.
163 |     """
164 |     # TODO: might improve this test, now we just validate that the code does
165 |     # not crash
166 |     arr = np.random.randn(10_000).astype(np.float32)
167 |     idx = np.arange(len(arr))
168 |     idx[: len(idx) // 2] += len(idx) // 2  # add large gap in x
169 |     s_downsampled = downsampler.downsample(idx, arr, n_out=100)
170 |     assert len(s_downsampled) <= 100
171 |     assert len(s_downsampled) >= 66
172 | 
173 | 
174 | @pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
175 | def test_downsampling_different_dtypes(downsampler: AbstractDownsampler):
176 |     """Test downsampling with different data types."""
177 |     arr_orig = np.random.randint(0, 100, size=10_000)
178 |     res = []
179 |     for dtype_y in supported_dtypes_y:
180 |         arr = arr_orig.astype(dtype_y)
181 |         s_downsampled = downsampler.downsample(arr, n_out=100)
182 |         if dtype_y is not np.bool_:
183 |             res += [s_downsampled]
184 |     for i in range(1, len(res)):
185 |         assert np.all(res[0] == res[i])
186 | 
187 | 
188 | @pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
189 | def test_downsampling_different_dtypes_with_x(downsampler: AbstractDownsampler):
190 |     """Test downsampling with x with different data types."""
191 |     arr_orig = np.random.randint(0, 100, size=10_000)
192 |     idx_orig = np.arange(len(arr_orig))
193 |     for dtype_x in supported_dtypes_x:
194 |         res = []
195 |         idx = idx_orig.astype(dtype_x)
196 |         for dtype_y in supported_dtypes_y:
197 |             arr = arr_orig.astype(dtype_y)
198 |             s_downsampled = downsampler.downsample(idx, arr, n_out=100)
199 |             if dtype_y is not np.bool_:
200 |                 res += [s_downsampled]
201 |         for i in range(1, len(res)):
202 |             assert np.all(res[0] == res[i])
203 | 
204 | 
205 | @pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
206 | def test_downsampling_no_out_of_bounds_different_dtypes(
207 |     downsampler: AbstractDownsampler,
208 | ):
209 |     """Test no out of bounds issues when downsampling with different data types."""
210 |     arr_orig = np.random.randint(0, 100, size=100)
211 |     res = []
212 |     for dtype in supported_dtypes_y:
213 |         arr = arr_orig.astype(dtype)
214 |         s_downsampled = downsampler.downsample(arr, n_out=76)
215 |         s_downsampled_p = downsampler.downsample(arr, n_out=76, parallel=True)
216 |         assert np.all(s_downsampled == s_downsampled_p)
217 |         if dtype is not np.bool_:
218 |             res += [s_downsampled]
219 |     for i in range(1, len(res)):
220 |         assert np.all(res[0] == res[i])
221 | 
222 | 
223 | @pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
224 | def test_downsampling_no_out_of_bounds_different_dtypes_with_x(
225 |     downsampler: AbstractDownsampler,
226 | ):
227 |     """Test no out of bounds issues when downsampling with different data types."""
228 |     arr_orig = np.random.randint(0, 100, size=100)
229 |     idx_orig = np.arange(len(arr_orig))
230 |     for dtype_x in supported_dtypes_x:
231 |         res = []
232 |         idx = idx_orig.astype(dtype_x)
233 |         for dtype_y in supported_dtypes_y:
234 |             arr = arr_orig.astype(dtype_y)
235 |             s_downsampled = downsampler.downsample(idx, arr, n_out=76)
236 |             s_downsampled_p = downsampler.downsample(idx, arr, n_out=76, parallel=True)
237 |             assert np.all(s_downsampled == s_downsampled_p)
238 |             if dtype_y is not np.bool_:
239 |                 res += [s_downsampled]
240 |         for i in range(1, len(res)):
241 |             assert np.all(res[0] == res[i])
242 | 
243 | 
244 | def test_lttb_no_overflow():
245 |     """Test no overflow when calculating average."""
246 |     ### THIS SHOULD NOT OVERFLOW & HAVE THE SAME RESULT
247 |     arr_orig = np.array([2 * 10**5] * 10_000, dtype=np.float64)
248 |     s_downsampled = LTTBDownsampler().downsample(arr_orig, n_out=100)
249 |     arr = arr_orig.astype(np.float32)
250 |     s_downsampled_f32 = LTTBDownsampler().downsample(arr, n_out=100)
251 |     assert np.all(s_downsampled == s_downsampled_f32)
252 |     ### THIS SHOULD OVERFLOW & THUS HAVE A DIFFERENT RESULT...
253 |     # max float32 is 3.4028235 × 1038 (so 2*10**38 is too big when adding 2 values)
254 |     arr_orig = np.array([2 * 10**38] * 10_000, dtype=np.float64)
255 |     s_downsampled = LTTBDownsampler().downsample(arr_orig, n_out=100)
256 |     arr = arr_orig.astype(np.float32)
257 |     s_downsampled_f32 = LTTBDownsampler().downsample(arr, n_out=100)
258 |     assert not np.all(s_downsampled == s_downsampled_f32)  # TODO :(
259 |     # I will leave this test here, but as many (much larger) libraries do not
260 |     # really account for this, I guess it is perhaps less of an issue than I
261 |     # thought. In the end f32 MAX is 3.4028235 × 1038 & f64 MAX is
262 |     # 1.7976931348623157 × 10308 => which is in the end quite a lot.. (and all
263 |     # integer averages are handled using f64) - f32 is only used for f16 & f32
264 |     # (just as in numpy).
265 | 
266 | 
267 | def test_invalid_nout():
268 |     """Test invalid n_out."""
269 |     arr = np.random.randint(0, 100, size=10_000)
270 |     with pytest.raises(ValueError):
271 |         LTTBDownsampler().downsample(arr, n_out=-1)
272 |     with pytest.raises(ValueError):
273 |         # Should be even
274 |         MinMaxDownsampler().downsample(arr, n_out=33)
275 |     with pytest.raises(ValueError):
276 |         # Should be multiple of 4
277 |         M4Downsampler().downsample(arr, n_out=34)
278 | 
279 | 
280 | def test_error_unsupported_dtype():
281 |     """Test unsupported dtype."""
282 |     arr = np.random.randint(0, 100, size=10_000)
283 |     arr = arr.astype("object")
284 |     with pytest.raises(ValueError):
285 |         MinMaxDownsampler().downsample(arr, n_out=100)
286 | 
287 | 
288 | def test_error_invalid_args():
289 |     """Test invalid arguments."""
290 |     arr = np.random.randint(0, 100, size=10_000)
291 |     # No args
292 |     with pytest.raises(ValueError) as e_msg:
293 |         MinMaxDownsampler().downsample(n_out=100, parallel=True)
294 |     assert "takes 1 or 2 positional arguments" in str(e_msg.value)
295 |     # Too many args
296 |     with pytest.raises(ValueError) as e_msg:
297 |         MinMaxDownsampler().downsample(arr, arr, arr, n_out=100, parallel=True)
298 |     assert "takes 1 or 2 positional arguments" in str(e_msg.value)
299 |     # Invalid y
300 |     with pytest.raises(ValueError) as e_msg:
301 |         MinMaxDownsampler().downsample(arr.reshape(5, 2_000), n_out=100, parallel=True)
302 |     assert "y must be 1D" in str(e_msg.value)
303 |     # Invalid x
304 |     with pytest.raises(ValueError) as e_msg:
305 |         MinMaxDownsampler().downsample(
306 |             arr.reshape(5, 2_000), arr, n_out=100, parallel=True
307 |         )
308 |     assert "x must be 1D" in str(e_msg.value)
309 |     # Invalid x and y (different length)
310 |     with pytest.raises(ValueError) as e_msg:
311 |         MinMaxDownsampler().downsample(arr, arr[:-1], n_out=100, parallel=True)
312 |     assert "x and y must have the same length" in str(e_msg.value)
313 | 
314 | 
315 | @pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
316 | def test_non_contiguous_array(downsampler: AbstractDownsampler):
317 |     """Test non contiguous array."""
318 |     arr = np.random.randint(0, 100, size=10_000).astype(np.float32)
319 |     arr = arr[::2]
320 |     assert not arr.flags["C_CONTIGUOUS"]
321 |     with pytest.raises(ValueError) as e_msg:
322 |         downsampler.downsample(arr, n_out=100)
323 |     assert "must be contiguous" in str(e_msg.value)
324 | 
325 | 
326 | def test_everynth_non_contiguous_array():
327 |     """Test non contiguous array."""
328 |     arr = np.random.randint(0, 100, size=10_000)
329 |     arr = arr[::2]
330 |     assert not arr.flags["C_CONTIGUOUS"]
331 |     downsampler = EveryNthDownsampler()
332 |     s_downsampled = downsampler.downsample(arr, n_out=100)
333 |     assert s_downsampled[0] == 0
334 |     assert s_downsampled[-1] == 4950
335 | 
336 | 
337 | def test_nan_minmax_downsampler():
338 |     """Test NaN downsamplers."""
339 |     arr = np.random.randn(50_000)
340 |     arr[::5] = np.nan
341 |     s_downsampled = NaNMinMaxDownsampler().downsample(arr, n_out=100)
342 |     arr_downsampled = arr[s_downsampled]
343 |     assert np.all(np.isnan(arr_downsampled))
344 | 
345 | 
346 | def test_nan_m4_downsampler():
347 |     """Test NaN downsamplers."""
348 |     arr = np.random.randn(50_000)
349 |     arr[::5] = np.nan
350 |     s_downsampled = NaNM4Downsampler().downsample(arr, n_out=100)
351 |     arr_downsampled = arr[s_downsampled]
352 |     assert np.all(np.isnan(arr_downsampled[1::4]))  # min is NaN
353 |     assert np.all(np.isnan(arr_downsampled[2::4]))  # max is NaN
354 | 
355 | 
356 | def test_nan_minmaxlttb_downsampler():
357 |     """Test NaN downsamplers."""
358 |     arr = np.random.randn(50_000)
359 |     arr[::5] = np.nan
360 |     s_downsampled = NaNMinMaxLTTBDownsampler().downsample(arr, n_out=100)
361 |     arr_downsampled = arr[s_downsampled]
362 |     assert np.all(np.isnan(arr_downsampled[1:-1]))  # first and last are not NaN
363 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | use half::f16;
  2 | 
  3 | use numpy::{IntoPyArray, PyArray1, PyReadonlyArray1};
  4 | use paste::paste;
  5 | use pyo3::prelude::*;
  6 | use pyo3::wrap_pymodule;
  7 | 
  8 | /// ------------------------- MACROS -------------------------
  9 | 
 10 | // Create macros to avoid duplicate code for the various resample functions over the
 11 | // different data types.
 12 | 
 13 | // ----- Helper macros -----
 14 | 
 15 | // Without x-range
 16 | 
 17 | macro_rules! _create_pyfunc_without_x {
 18 |     ($name:ident, $resample_mod:ident, $resample_fn:ident, $type:ty, $mod:ident) => {
 19 |         // Create the Python function
 20 |         #[pyfunction]
 21 |         fn $name<'py>(
 22 |             py: Python<'py>,
 23 |             y: PyReadonlyArray1<$type>,
 24 |             n_out: usize,
 25 |         ) -> Bound<'py, PyArray1<usize>> {
 26 |             let y = y.as_slice().unwrap();
 27 |             let sampled_indices = $resample_mod::$resample_fn(y, n_out);
 28 |             sampled_indices.into_pyarray(py)
 29 |         }
 30 |         // Add the function to the module
 31 |         $mod.add_wrapped(wrap_pyfunction!($name))?;
 32 |     };
 33 | }
 34 | 
 35 | macro_rules! _create_pyfunc_without_x_with_ratio {
 36 |     ($name:ident, $resample_mod:ident, $resample_fn:ident, $type:ty, $mod:ident) => {
 37 |         // Create the Python function
 38 |         #[pyfunction]
 39 |         fn $name<'py>(
 40 |             py: Python<'py>,
 41 |             y: PyReadonlyArray1<$type>,
 42 |             n_out: usize,
 43 |             ratio: usize,
 44 |         ) -> Bound<'py, PyArray1<usize>> {
 45 |             let y = y.as_slice().unwrap();
 46 |             let sampled_indices = $resample_mod::$resample_fn(y, n_out, ratio);
 47 |             sampled_indices.into_pyarray(py)
 48 |         }
 49 |         // Add the function to the module
 50 |         $mod.add_wrapped(wrap_pyfunction!($name))?;
 51 |     };
 52 | }
 53 | 
 54 | macro_rules! _create_pyfuncs_without_x_generic {
 55 |     ($create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($t:ty)*) => {
 56 |         $(
 57 |             paste! {
 58 |                 $create_macro!([<downsample_ $t>], $resample_mod, $resample_fn, $t, $mod);
 59 |             }
 60 |         )*
 61 |     };
 62 | 
 63 |     (@nan $create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($t:ty)*) => {
 64 |         $(
 65 |             paste! {
 66 |                 $create_macro!([<downsample_nan_ $t>], $resample_mod, $resample_fn, $t, $mod);
 67 |             }
 68 |         )*
 69 |     };
 70 | }
 71 | 
 72 | // With x-range
 73 | 
 74 | macro_rules! _create_pyfunc_with_x {
 75 |     ($name:ident, $resample_mod:ident, $resample_fn:ident, $type_x:ty, $type_y:ty, $mod:ident) => {
 76 |         // Create the Python function
 77 |         #[pyfunction]
 78 |         fn $name<'py>(
 79 |             py: Python<'py>,
 80 |             x: PyReadonlyArray1<$type_x>,
 81 |             y: PyReadonlyArray1<$type_y>,
 82 |             n_out: usize,
 83 |         ) -> Bound<'py, PyArray1<usize>> {
 84 |             let x = x.as_slice().unwrap();
 85 |             let y = y.as_slice().unwrap();
 86 |             let sampled_indices = $resample_mod::$resample_fn(x, y, n_out);
 87 |             sampled_indices.into_pyarray(py)
 88 |         }
 89 |         // Add the function to the module
 90 |         $mod.add_wrapped(wrap_pyfunction!($name))?;
 91 |     };
 92 | }
 93 | 
 94 | macro_rules! _create_pyfunc_with_x_with_ratio {
 95 |     ($name:ident, $resample_mod:ident, $resample_fn:ident, $type_x:ty, $type_y:ty, $mod:ident) => {
 96 |         // Create the Python function
 97 |         #[pyfunction]
 98 |         fn $name<'py>(
 99 |             py: Python<'py>,
100 |             x: PyReadonlyArray1<$type_x>,
101 |             y: PyReadonlyArray1<$type_y>,
102 |             n_out: usize,
103 |             ratio: usize,
104 |         ) -> Bound<'py, PyArray1<usize>> {
105 |             let x = x.as_slice().unwrap();
106 |             let y = y.as_slice().unwrap();
107 |             let sampled_indices = $resample_mod::$resample_fn(x, y, n_out, ratio);
108 |             sampled_indices.into_pyarray(py)
109 |         }
110 |         // Add the function to the module
111 |         $mod.add_wrapped(wrap_pyfunction!($name))?;
112 |     };
113 | }
114 | 
115 | macro_rules! _create_pyfuncs_with_x_generic {
116 | 
117 |     ($create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+, $($ty:ty)+) => {
118 |         // The macro will implement the function for all combinations of $tx and $ty (for respectively type x and y).
119 |         _create_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($tx)+; $($ty),+);
120 |     };
121 | 
122 |     // Base case: there is only one type (for y) left
123 |     (@inner $create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+; $ty:ty) => {
124 |         $(
125 |             paste! {
126 |                 $create_macro!([<downsample_ $tx _ $ty>], $resample_mod, $resample_fn, $tx, $ty, $mod);
127 |             }
128 |         )*
129 |     };
130 |     // The head/tail recursion: pick the first element -> apply the base case, and recurse over the rest.
131 |     (@inner $create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+; $ty_head:ty, $($ty_rest:ty),+) => {
132 |         _create_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($tx)+; $ty_head);
133 |         _create_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($tx)+; $($ty_rest),+);
134 |     };
135 | 
136 |     // Huge thx to https://stackoverflow.com/a/54552848
137 |     // and https://users.rust-lang.org/t/tail-recursive-macros/905/3
138 | }
139 | 
140 | // TODO: there must be a better way to combine normal and nan macros
141 | macro_rules! _create_nan_pyfuncs_with_x_generic {
142 | 
143 |     ($create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+, $($ty:ty)+) => {
144 |         // The macro will implement the function for all combinations of $tx and $ty (for respectively type x and y).
145 |         _create_nan_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($tx)+; $($ty),+);
146 |     };
147 | 
148 |     // Base case: there is only one type (for y) left
149 |     (@inner $create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+; $ty:ty) => {
150 |         $(
151 |             paste! {
152 |                 $create_macro!([<downsample_nan_ $tx _ $ty>], $resample_mod, $resample_fn, $tx, $ty, $mod);
153 |             }
154 |         )*
155 |     };
156 |     // The head/tail recursion: pick the first element -> apply the base case, and recurse over the rest.
157 |     (@inner $create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+; $ty_head:ty, $($ty_rest:ty),+) => {
158 |         _create_nan_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($tx)+; $ty_head);
159 |         _create_nan_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($tx)+; $($ty_rest),+);
160 |     };
161 | 
162 |     // Huge thx to https://stackoverflow.com/a/54552848
163 |     // and https://users.rust-lang.org/t/tail-recursive-macros/905/3
164 | }
165 | // ------ Main macros ------
166 | 
167 | macro_rules! _create_pyfuncs_without_x_helper {
168 |     ($pyfunc_fn:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident) => {
169 |         _create_pyfuncs_without_x_generic!($pyfunc_fn, $resample_mod, $resample_fn, $mod, f16 f32 f64 i8 i16 i32 i64 u8 u16 u32 u64);
170 |     };
171 | 
172 |     (@nan $pyfunc_fn:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident) => {
173 |         _create_pyfuncs_without_x_generic!(@nan $pyfunc_fn, $resample_mod, $resample_fn, $mod, f16 f32 f64);
174 |     };
175 | }
176 | 
177 | macro_rules! create_pyfuncs_without_x {
178 |     ($resample_mod:ident, $resample_fn:ident, $mod:ident) => {
179 |         _create_pyfuncs_without_x_helper!(
180 |             _create_pyfunc_without_x,
181 |             $resample_mod,
182 |             $resample_fn,
183 |             $mod
184 |         );
185 |     };
186 |     (@nan $resample_mod:ident, $resample_fn:ident, $mod:ident) => {
187 |         _create_pyfuncs_without_x_helper!(@nan
188 |             _create_pyfunc_without_x,
189 |             $resample_mod,
190 |             $resample_fn,
191 |             $mod
192 |         );
193 |     };
194 | }
195 | 
196 | macro_rules! create_pyfuncs_without_x_with_ratio {
197 |     ($resample_mod:ident, $resample_fn:ident, $mod:ident) => {
198 |         _create_pyfuncs_without_x_helper!(
199 |             _create_pyfunc_without_x_with_ratio,
200 |             $resample_mod,
201 |             $resample_fn,
202 |             $mod
203 |         );
204 |     };
205 |     (@nan $resample_mod:ident, $resample_fn:ident, $mod:ident) => {
206 |         _create_pyfuncs_without_x_helper!(@nan
207 |             _create_pyfunc_without_x_with_ratio,
208 |             $resample_mod,
209 |             $resample_fn,
210 |             $mod
211 |         );
212 |     };
213 | }
214 | 
215 | macro_rules! _create_pyfuncs_with_x_helper {
216 |     ($pyfunc_fn:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident) => {
217 |         _create_pyfuncs_with_x_generic!($pyfunc_fn, $resample_mod, $resample_fn, $mod, f32 f64 i16 i32 i64 u16 u32 u64, f16 f32 f64 i8 i16 i32 i64 u8 u16 u32 u64);
218 |     };
219 |     (@nan $pyfunc_fn:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident) => {
220 |         _create_nan_pyfuncs_with_x_generic!($pyfunc_fn, $resample_mod, $resample_fn, $mod, f32 f64 i16 i32 i64 u16 u32 u64, f16 f32 f64);
221 |     };
222 | }
223 | 
224 | macro_rules! create_pyfuncs_with_x {
225 |     ($resample_mod:ident, $resample_fn:ident, $mod:ident) => {
226 |         _create_pyfuncs_with_x_helper!(_create_pyfunc_with_x, $resample_mod, $resample_fn, $mod);
227 |     };
228 |     (@nan $resample_mod:ident, $resample_fn:ident, $mod:ident) => {
229 |         _create_pyfuncs_with_x_helper!(@nan _create_pyfunc_with_x, $resample_mod, $resample_fn, $mod);
230 |     };
231 | }
232 | 
233 | macro_rules! create_pyfuncs_with_x_with_ratio {
234 |     ($resample_mod:ident, $resample_fn:ident, $mod:ident) => {
235 |         _create_pyfuncs_with_x_helper!(
236 |             _create_pyfunc_with_x_with_ratio,
237 |             $resample_mod,
238 |             $resample_fn,
239 |             $mod
240 |         );
241 |     };
242 |     (@nan $resample_mod:ident, $resample_fn:ident, $mod:ident) => {
243 |         _create_pyfuncs_with_x_helper!(@nan
244 |             _create_pyfunc_with_x_with_ratio,
245 |             $resample_mod,
246 |             $resample_fn,
247 |             $mod
248 |         );
249 |     };
250 | }
251 | 
252 | // -------------------------------------- MINMAX ---------------------------------------
253 | 
254 | use downsample_rs::minmax as minmax_mod;
255 | 
256 | // Create a sub module for the minmax algorithm
257 | #[pymodule]
258 | fn minmax(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
259 |     // ----------------- SEQUENTIAL
260 | 
261 |     let sequential_mod = PyModule::new(_py, "sequential")?;
262 | 
263 |     // ----- WITHOUT X
264 |     {
265 |         create_pyfuncs_without_x!(minmax_mod, min_max_without_x, sequential_mod);
266 |         create_pyfuncs_without_x!(@nan minmax_mod, min_max_without_x_nan, sequential_mod);
267 |     }
268 | 
269 |     // ----- WITH X
270 |     {
271 |         create_pyfuncs_with_x!(minmax_mod, min_max_with_x, sequential_mod);
272 |         create_pyfuncs_with_x!(@nan minmax_mod, min_max_with_x_nan, sequential_mod);
273 |     }
274 | 
275 |     // ----------------- PARALLEL
276 | 
277 |     let parallel_mod = PyModule::new(_py, "parallel")?;
278 | 
279 |     // ----- WITHOUT X
280 |     {
281 |         create_pyfuncs_without_x!(minmax_mod, min_max_without_x_parallel, parallel_mod);
282 |         create_pyfuncs_without_x!(@nan minmax_mod, min_max_without_x_parallel, parallel_mod);
283 |     }
284 | 
285 |     // ----- WITH X
286 |     {
287 |         create_pyfuncs_with_x!(minmax_mod, min_max_with_x_parallel, parallel_mod);
288 |         create_pyfuncs_with_x!(@nan minmax_mod, min_max_with_x_parallel, parallel_mod);
289 |     }
290 | 
291 |     // Add the sub modules to the module
292 |     m.add_submodule(&sequential_mod)?;
293 |     m.add_submodule(&parallel_mod)?;
294 | 
295 |     Ok(())
296 | }
297 | 
298 | // --------------------------------------- M4 ------------------------------------------
299 | 
300 | use downsample_rs::m4 as m4_mod;
301 | 
302 | // Create a sub module for the M4 algorithm
303 | #[pymodule]
304 | fn m4(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
305 |     // ----------------- SEQUENTIAL
306 | 
307 |     let sequential_mod = PyModule::new(_py, "sequential")?;
308 | 
309 |     // ----- WITHOUT X
310 |     {
311 |         create_pyfuncs_without_x!(m4_mod, m4_without_x, sequential_mod);
312 |         create_pyfuncs_without_x!(@nan m4_mod, m4_without_x_nan, sequential_mod);
313 |     }
314 | 
315 |     // ----- WITH X
316 |     {
317 |         create_pyfuncs_with_x!(m4_mod, m4_with_x, sequential_mod);
318 |         create_pyfuncs_with_x!(@nan m4_mod, m4_with_x_nan, sequential_mod);
319 |     }
320 | 
321 |     // ----------------- PARALLEL
322 | 
323 |     let parallel_mod = PyModule::new(_py, "parallel")?;
324 | 
325 |     // ----- WITHOUT X
326 |     {
327 |         create_pyfuncs_without_x!(m4_mod, m4_without_x_parallel, parallel_mod);
328 |         create_pyfuncs_without_x!(@nan m4_mod, m4_without_x_parallel, parallel_mod);
329 |     }
330 | 
331 |     // ----- WITH X
332 |     {
333 |         create_pyfuncs_with_x!(m4_mod, m4_with_x_parallel, parallel_mod);
334 |         create_pyfuncs_with_x!(@nan m4_mod, m4_with_x_parallel, parallel_mod);
335 |     }
336 | 
337 |     // Add the sub modules to the module
338 |     m.add_submodule(&sequential_mod)?;
339 |     m.add_submodule(&parallel_mod)?;
340 | 
341 |     Ok(())
342 | }
343 | 
344 | // -------------------------------------- LTTB -----------------------------------------
345 | 
346 | use downsample_rs::lttb as lttb_mod;
347 | 
348 | // Create a sub module for the LTTB algorithm
349 | #[pymodule]
350 | fn lttb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
351 |     // ----------------- SEQUENTIAL
352 | 
353 |     let sequential_mod = PyModule::new(_py, "sequential")?;
354 | 
355 |     // Create the Python functions for the module
356 |     // ----- WITHOUT X
357 |     {
358 |         create_pyfuncs_without_x!(lttb_mod, lttb_without_x, sequential_mod);
359 |     }
360 | 
361 |     // ----- WITH X
362 |     {
363 |         create_pyfuncs_with_x!(lttb_mod, lttb_with_x, sequential_mod);
364 |     }
365 | 
366 |     // Add the sub modules to the module
367 |     m.add_submodule(&sequential_mod)?;
368 | 
369 |     Ok(())
370 | }
371 | 
372 | // -------------------------------------- MINMAXLTTB -----------------------------------------
373 | 
374 | use downsample_rs::minmaxlttb as minmaxlttb_mod;
375 | 
376 | // Create a sub module for the MINMAXLTTB algorithm
377 | #[pymodule]
378 | fn minmaxlttb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
379 |     // ----------------- SEQUENTIAL
380 | 
381 |     let sequential_mod = PyModule::new(_py, "sequential")?;
382 | 
383 |     // ----- WITHOUT X
384 |     {
385 |         create_pyfuncs_without_x_with_ratio!(minmaxlttb_mod, minmaxlttb_without_x, sequential_mod);
386 |         create_pyfuncs_without_x_with_ratio!(@nan minmaxlttb_mod, minmaxlttb_without_x_nan, sequential_mod);
387 |     }
388 | 
389 |     // ----- WITH X
390 |     {
391 |         create_pyfuncs_with_x_with_ratio!(minmaxlttb_mod, minmaxlttb_with_x, sequential_mod);
392 |         create_pyfuncs_with_x_with_ratio!(@nan minmaxlttb_mod, minmaxlttb_with_x_nan, sequential_mod);
393 |     }
394 | 
395 |     // ----------------- PARALLEL
396 | 
397 |     let parallel_mod = PyModule::new(_py, "parallel")?;
398 | 
399 |     // ----- WITHOUT X
400 |     {
401 |         create_pyfuncs_without_x_with_ratio!(
402 |             minmaxlttb_mod,
403 |             minmaxlttb_without_x_parallel,
404 |             parallel_mod
405 |         );
406 |         create_pyfuncs_without_x_with_ratio!(@nan
407 |             minmaxlttb_mod,
408 |             minmaxlttb_without_x_parallel,
409 |             parallel_mod
410 |         );
411 |     }
412 | 
413 |     // ----- WITH X
414 |     {
415 |         create_pyfuncs_with_x_with_ratio!(minmaxlttb_mod, minmaxlttb_with_x_parallel, parallel_mod);
416 |         create_pyfuncs_with_x_with_ratio!(@nan minmaxlttb_mod, minmaxlttb_with_x_parallel, parallel_mod);
417 |     }
418 | 
419 |     // Add the submodules to the module
420 |     m.add_submodule(&sequential_mod)?;
421 |     m.add_submodule(&parallel_mod)?;
422 | 
423 |     Ok(())
424 | }
425 | 
426 | // ------------------------------- DOWNSAMPLING MODULE ------------------------------ //
427 | 
428 | #[pymodule] // The super module
429 | #[pyo3(name = "_tsdownsample_rs")] // How the module is imported in Python: https://github.com/PyO3/maturin/issues/256#issuecomment-1038576218
430 | fn tsdownsample(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
431 |     m.add_wrapped(wrap_pymodule!(minmax))?;
432 |     m.add_wrapped(wrap_pymodule!(m4))?;
433 |     m.add_wrapped(wrap_pymodule!(lttb))?;
434 |     m.add_wrapped(wrap_pymodule!(minmaxlttb))?;
435 | 
436 |     Ok(())
437 | }
438 | 


--------------------------------------------------------------------------------
/downsample_rs/src/minmax.rs:
--------------------------------------------------------------------------------
  1 | use rayon::iter::IndexedParallelIterator;
  2 | use rayon::prelude::*;
  3 | 
  4 | use argminmax::{ArgMinMax, NaNArgMinMax};
  5 | use num_traits::{AsPrimitive, FromPrimitive};
  6 | 
  7 | use super::searchsorted::{
  8 |     get_equidistant_bin_idx_iterator, get_equidistant_bin_idx_iterator_parallel,
  9 | };
 10 | use super::types::Num;
 11 | use super::POOL;
 12 | 
 13 | // ----------------------------------- NON-PARALLEL ------------------------------------
 14 | 
 15 | // ----------- WITH X
 16 | 
 17 | macro_rules! min_max_with_x {
 18 |     ($func_name:ident, $trait:path, $f_argminmax:expr) => {
 19 |         pub fn $func_name<Tx, Ty>(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec<usize>
 20 |         where
 21 |             for<'a> &'a [Ty]: $trait,
 22 |             Tx: Num + FromPrimitive + AsPrimitive<f64>,
 23 |             Ty: Copy + PartialOrd,
 24 |         {
 25 |             assert_eq!(n_out % 2, 0);
 26 |             let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 2);
 27 |             min_max_generic_with_x(arr, bin_idx_iterator, n_out, $f_argminmax)
 28 |         }
 29 |     };
 30 | }
 31 | 
 32 | min_max_with_x!(min_max_with_x, ArgMinMax, |arr| arr.argminmax());
 33 | min_max_with_x!(min_max_with_x_nan, NaNArgMinMax, |arr| arr.nanargminmax());
 34 | 
 35 | // ----------- WITHOUT X
 36 | 
 37 | macro_rules! min_max_without_x {
 38 |     ($func_name:ident, $trait:path, $f_argminmax:expr) => {
 39 |         pub fn $func_name<T: Copy + PartialOrd>(arr: &[T], n_out: usize) -> Vec<usize>
 40 |         where
 41 |             for<'a> &'a [T]: $trait,
 42 |         {
 43 |             assert_eq!(n_out % 2, 0);
 44 |             min_max_generic(arr, n_out, $f_argminmax)
 45 |         }
 46 |     };
 47 | }
 48 | 
 49 | min_max_without_x!(min_max_without_x, ArgMinMax, |arr| arr.argminmax());
 50 | min_max_without_x!(min_max_without_x_nan, NaNArgMinMax, |arr| arr
 51 |     .nanargminmax());
 52 | 
 53 | // ------------------------------------- PARALLEL --------------------------------------
 54 | 
 55 | // ----------- WITH X
 56 | 
 57 | macro_rules! min_max_with_x_parallel {
 58 |     ($func_name:ident, $trait:path, $f_argminmax:expr) => {
 59 |         pub fn $func_name<Tx, Ty>(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec<usize>
 60 |         where
 61 |             for<'a> &'a [Ty]: $trait,
 62 |             Tx: Num + FromPrimitive + AsPrimitive<f64> + Send + Sync,
 63 |             Ty: Copy + PartialOrd + Send + Sync,
 64 |         {
 65 |             assert_eq!(n_out % 2, 0);
 66 |             let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 2);
 67 |             min_max_generic_with_x_parallel(arr, bin_idx_iterator, n_out, $f_argminmax)
 68 |         }
 69 |     };
 70 | }
 71 | 
 72 | min_max_with_x_parallel!(min_max_with_x_parallel, ArgMinMax, |arr| arr.argminmax());
 73 | min_max_with_x_parallel!(min_max_with_x_parallel_nan, NaNArgMinMax, |arr| arr
 74 |     .nanargminmax());
 75 | 
 76 | // ----------- WITHOUT X
 77 | 
 78 | macro_rules! min_max_without_x_parallel {
 79 |     ($func_name:ident, $trait:path, $f_argminmax:expr) => {
 80 |         pub fn $func_name<T: Copy + PartialOrd + Send + Sync>(arr: &[T], n_out: usize) -> Vec<usize>
 81 |         where
 82 |             for<'a> &'a [T]: $trait,
 83 |         {
 84 |             assert_eq!(n_out % 2, 0);
 85 |             min_max_generic_parallel(arr, n_out, $f_argminmax)
 86 |         }
 87 |     };
 88 | }
 89 | 
 90 | min_max_without_x_parallel!(min_max_without_x_parallel, ArgMinMax, |arr| arr.argminmax());
 91 | min_max_without_x_parallel!(min_max_without_x_parallel_nan, NaNArgMinMax, |arr| arr
 92 |     .nanargminmax());
 93 | 
 94 | // ----------------------------------- GENERICS ------------------------------------
 95 | 
 96 | // --------------------- WITHOUT X
 97 | 
 98 | #[inline(always)]
 99 | pub(crate) fn min_max_generic<T: Copy>(
100 |     arr: &[T],
101 |     n_out: usize,
102 |     f_argminmax: fn(&[T]) -> (usize, usize),
103 | ) -> Vec<usize> {
104 |     // Assumes n_out is a multiple of 2
105 |     if n_out >= arr.len() {
106 |         return (0..arr.len()).collect::<Vec<usize>>();
107 |     }
108 | 
109 |     // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1)
110 |     let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 2) as f64;
111 | 
112 |     let mut sampled_indices = vec![usize::default(); n_out];
113 | 
114 |     let mut start_idx: usize = 0;
115 |     for i in 0..n_out / 2 {
116 |         // Decided to use multiplication instead of adding to the accumulator (end)
117 |         // as multiplication seems to be less prone to rounding errors.
118 |         let end: f64 = block_size * (i + 1) as f64;
119 |         let end_idx: usize = end as usize + 1;
120 | 
121 |         let (min_index, max_index) = f_argminmax(&arr[start_idx..end_idx]);
122 | 
123 |         // Add the indexes in sorted order
124 |         if min_index < max_index {
125 |             sampled_indices[2 * i] = min_index + start_idx;
126 |             sampled_indices[2 * i + 1] = max_index + start_idx;
127 |         } else {
128 |             sampled_indices[2 * i] = max_index + start_idx;
129 |             sampled_indices[2 * i + 1] = min_index + start_idx;
130 |         }
131 | 
132 |         start_idx = end_idx;
133 |     }
134 | 
135 |     sampled_indices
136 | }
137 | 
138 | #[inline(always)]
139 | pub(crate) fn min_max_generic_parallel<T: Copy + PartialOrd + Send + Sync>(
140 |     arr: &[T],
141 |     n_out: usize,
142 |     f_argminmax: fn(&[T]) -> (usize, usize),
143 | ) -> Vec<usize> {
144 |     // Assumes n_out is a multiple of 2
145 |     if n_out >= arr.len() {
146 |         return (0..arr.len()).collect::<Vec<usize>>();
147 |     }
148 | 
149 |     // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1)
150 |     let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 2) as f64;
151 | 
152 |     // Store the enumerated indexes in the output array
153 |     // These indexes are used to calculate the start and end indexes of each bin in
154 |     // the multi-threaded execution
155 |     let mut sampled_indices: Vec<usize> = (0..n_out).collect::<Vec<usize>>();
156 | 
157 |     POOL.install(|| {
158 |         sampled_indices
159 |             .par_chunks_exact_mut(2)
160 |             .for_each(|sampled_index_chunk| {
161 |                 let i: f64 = unsafe { *sampled_index_chunk.get_unchecked(0) >> 1 } as f64;
162 |                 let start_idx: usize = (block_size * i) as usize + (i != 0.0) as usize;
163 |                 let end_idx: usize = (block_size * (i + 1.0)) as usize + 1;
164 | 
165 |                 let (min_index, max_index) = f_argminmax(&arr[start_idx..end_idx]);
166 | 
167 |                 // Add the indexes in sorted order
168 |                 if min_index < max_index {
169 |                     sampled_index_chunk[0] = min_index + start_idx;
170 |                     sampled_index_chunk[1] = max_index + start_idx;
171 |                 } else {
172 |                     sampled_index_chunk[0] = max_index + start_idx;
173 |                     sampled_index_chunk[1] = min_index + start_idx;
174 |                 }
175 |             })
176 |     });
177 | 
178 |     sampled_indices
179 | }
180 | 
181 | // --------------------- WITH X
182 | 
183 | #[inline(always)]
184 | pub(crate) fn min_max_generic_with_x<T: Copy>(
185 |     arr: &[T],
186 |     bin_idx_iterator: impl Iterator<Item = Option<(usize, usize)>>,
187 |     n_out: usize,
188 |     f_argminmax: fn(&[T]) -> (usize, usize),
189 | ) -> Vec<usize> {
190 |     // Assumes n_out is a multiple of 2
191 |     if n_out >= arr.len() {
192 |         return (0..arr.len()).collect::<Vec<usize>>();
193 |     }
194 | 
195 |     let mut sampled_indices: Vec<usize> = Vec::with_capacity(n_out);
196 | 
197 |     bin_idx_iterator.for_each(|bin| {
198 |         if let Some((start, end)) = bin {
199 |             if end <= start + 2 {
200 |                 // If the bin has <= 2 elements, just add them all
201 |                 for i in start..end {
202 |                     sampled_indices.push(i);
203 |                 }
204 |             } else {
205 |                 // If the bin has at least two elements, add the argmin and argmax
206 |                 let step = &arr[start..end];
207 |                 let (min_index, max_index) = f_argminmax(step);
208 | 
209 |                 // Add the indexes in sorted order
210 |                 if min_index < max_index {
211 |                     sampled_indices.push(min_index + start);
212 |                     sampled_indices.push(max_index + start);
213 |                 } else {
214 |                     sampled_indices.push(max_index + start);
215 |                     sampled_indices.push(min_index + start);
216 |                 }
217 |             }
218 |         }
219 |     });
220 | 
221 |     sampled_indices
222 | }
223 | 
224 | #[inline(always)]
225 | pub(crate) fn min_max_generic_with_x_parallel<T: Copy + Send + Sync>(
226 |     arr: &[T],
227 |     bin_idx_iterator: impl IndexedParallelIterator<Item = impl Iterator<Item = Option<(usize, usize)>>>,
228 |     n_out: usize,
229 |     f_argminmax: fn(&[T]) -> (usize, usize),
230 | ) -> Vec<usize> {
231 |     // Assumes n_out is a multiple of 2
232 |     if n_out >= arr.len() {
233 |         return (0..arr.len()).collect::<Vec<usize>>();
234 |     }
235 | 
236 |     POOL.install(|| {
237 |         bin_idx_iterator
238 |             .flat_map(|bin_idx_iterator| {
239 |                 bin_idx_iterator
240 |                     .map(|bin| {
241 |                         match bin {
242 |                             Some((start, end)) => {
243 |                                 if end <= start + 2 {
244 |                                     // If the bin has <= 2 elements, just return them all
245 |                                     return (start..end).collect::<Vec<usize>>();
246 |                                 }
247 | 
248 |                                 // If the bin has at least two elements, return the argmin and argmax
249 |                                 let step = &arr[start..end];
250 |                                 let (min_index, max_index) = f_argminmax(step);
251 | 
252 |                                 // Return the indexes in sorted order
253 |                                 if min_index < max_index {
254 |                                     vec![min_index + start, max_index + start]
255 |                                 } else {
256 |                                     vec![max_index + start, min_index + start]
257 |                                 }
258 |                             } // If the bin is empty, return empty Vec
259 |                             None => {
260 |                                 vec![]
261 |                             }
262 |                         }
263 |                     })
264 |                     .collect::<Vec<Vec<usize>>>()
265 |             })
266 |             .flatten()
267 |             .collect::<Vec<usize>>()
268 |     })
269 | }
270 | 
271 | #[cfg(test)]
272 | mod tests {
273 |     use num_traits::AsPrimitive;
274 |     use rstest::rstest;
275 |     use rstest_reuse::{self, *};
276 | 
277 |     use super::{min_max_with_x, min_max_without_x};
278 |     use super::{min_max_with_x_parallel, min_max_without_x_parallel};
279 | 
280 |     use dev_utils::utils;
281 | 
282 |     fn get_array_f32(n: usize) -> Vec<f32> {
283 |         utils::get_random_array(n, f32::MIN, f32::MAX)
284 |     }
285 | 
286 |     // Template for n_out
287 |     #[template]
288 |     #[rstest]
289 |     #[case(198)]
290 |     #[case(200)]
291 |     #[case(202)]
292 |     fn n_outs(#[case] n_out: usize) {}
293 | 
294 |     #[test]
295 |     fn test_min_max_scalar_without_x_correct() {
296 |         let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
297 | 
298 |         let sampled_indices = min_max_without_x(&arr, 10);
299 |         let sampled_values = sampled_indices
300 |             .iter()
301 |             .map(|x| arr[*x])
302 |             .collect::<Vec<f32>>();
303 | 
304 |         let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99];
305 |         let expected_values = expected_indices
306 |             .iter()
307 |             .map(|x| *x as f32)
308 |             .collect::<Vec<f32>>();
309 | 
310 |         assert_eq!(sampled_indices, expected_indices);
311 |         assert_eq!(sampled_values, expected_values);
312 |     }
313 | 
314 |     #[test]
315 |     fn test_min_max_scalar_without_x_parallel_correct() {
316 |         let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
317 | 
318 |         let sampled_indices = min_max_without_x_parallel(&arr, 10);
319 |         let sampled_values = sampled_indices
320 |             .iter()
321 |             .map(|x| arr[*x])
322 |             .collect::<Vec<f32>>();
323 | 
324 |         let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99];
325 |         let expected_values = expected_indices
326 |             .iter()
327 |             .map(|x| *x as f32)
328 |             .collect::<Vec<f32>>();
329 | 
330 |         assert_eq!(sampled_indices, expected_indices);
331 |         assert_eq!(sampled_values, expected_values);
332 |     }
333 | 
334 |     #[test]
335 |     fn test_min_max_scalar_with_x_correct() {
336 |         let x: [i32; 100] = core::array::from_fn(|i| i.as_());
337 |         let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
338 | 
339 |         let sampled_indices = min_max_with_x(&x, &arr, 10);
340 |         let sampled_values = sampled_indices
341 |             .iter()
342 |             .map(|x| arr[*x])
343 |             .collect::<Vec<f32>>();
344 | 
345 |         let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99];
346 |         let expected_values = expected_indices
347 |             .iter()
348 |             .map(|x| *x as f32)
349 |             .collect::<Vec<f32>>();
350 | 
351 |         assert_eq!(sampled_indices, expected_indices);
352 |         assert_eq!(sampled_values, expected_values);
353 |     }
354 | 
355 |     #[test]
356 |     fn test_min_max_scalar_with_x_parallel_correct() {
357 |         let x: [i32; 100] = core::array::from_fn(|i| i.as_());
358 |         let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
359 | 
360 |         let sampled_indices = min_max_with_x_parallel(&x, &arr, 10);
361 |         let sampled_values = sampled_indices
362 |             .iter()
363 |             .map(|x| arr[*x])
364 |             .collect::<Vec<f32>>();
365 | 
366 |         let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99];
367 |         let expected_values = expected_indices
368 |             .iter()
369 |             .map(|x| *x as f32)
370 |             .collect::<Vec<f32>>();
371 | 
372 |         assert_eq!(sampled_indices, expected_indices);
373 |         assert_eq!(sampled_values, expected_values);
374 |     }
375 | 
376 |     #[test]
377 |     fn test_min_max_scalar_with_x_gap() {
378 |         // We will create a gap in the middle of the array
379 |         // Increment the second half of the array by 50
380 |         let x: [i32; 100] = core::array::from_fn(|i| if i > 50 { (i + 50).as_() } else { i.as_() });
381 |         let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
382 | 
383 |         let sampled_indices = min_max_with_x(&x, &arr, 10);
384 |         assert_eq!(sampled_indices.len(), 8); // One full gap
385 |         let expected_indices = vec![0, 29, 30, 50, 51, 69, 70, 99];
386 |         assert_eq!(sampled_indices, expected_indices);
387 | 
388 |         // Increment the second half of the array by 50 again
389 |         let x = x.map(|i| if i > 101 { i + 50 } else { i });
390 | 
391 |         let sampled_indices = min_max_with_x(&x, &arr, 10);
392 |         assert_eq!(sampled_indices.len(), 9); // Gap with 1 value
393 |         let expected_indices = vec![0, 39, 40, 50, 51, 52, 59, 60, 99];
394 |         assert_eq!(sampled_indices, expected_indices);
395 |     }
396 | 
397 |     #[test]
398 |     fn test_min_max_scalar_with_x_parallel_gap() {
399 |         // Create a gap in the middle of the array
400 |         // Increment the second half of the array by 50
401 |         let x: [i32; 100] = core::array::from_fn(|i| if i > 50 { (i + 50).as_() } else { i.as_() });
402 |         let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
403 | 
404 |         let sampled_indices = min_max_with_x_parallel(&x, &arr, 10);
405 |         assert_eq!(sampled_indices.len(), 8); // One full gap
406 |         let expected_indices = vec![0, 29, 30, 50, 51, 69, 70, 99];
407 |         assert_eq!(sampled_indices, expected_indices);
408 | 
409 |         // Increment the second half of the array by 50 again
410 |         let x = x.map(|i| if i > 101 { i + 50 } else { i });
411 | 
412 |         let sampled_indices = min_max_with_x_parallel(&x, &arr, 10);
413 |         assert_eq!(sampled_indices.len(), 9); // Gap with 1 value
414 |         let expected_indices = vec![0, 39, 40, 50, 51, 52, 59, 60, 99];
415 |         assert_eq!(sampled_indices, expected_indices);
416 |     }
417 | 
418 |     #[test]
419 |     fn test_same_output() {
420 |         const N: usize = 1001 - 2;
421 |         const n_out: usize = 26 * 4;
422 |         let y = (0..N).map(|v| v as f32).collect::<Vec<f32>>();
423 |         let x = (1..(N + 1) as i32).collect::<Vec<i32>>();
424 |         let sampled_indices1 = min_max_with_x(&x, &y, n_out);
425 |         let sampled_indices2 = min_max_without_x(&y, n_out);
426 |         assert_eq!(sampled_indices1, sampled_indices2);
427 |     }
428 | 
429 |     #[apply(n_outs)]
430 |     fn test_many_random_runs_same_output(n_out: usize) {
431 |         const N: usize = 20_003;
432 |         let x: [i32; N] = core::array::from_fn(|i| i.as_());
433 |         for _ in 0..100 {
434 |             let mut arr = get_array_f32(N);
435 |             arr[N - 1] = f32::INFINITY; // Make sure the last value is always the max
436 |             let idxs1 = min_max_without_x(arr.as_slice(), n_out);
437 |             let idxs2 = min_max_without_x_parallel(arr.as_slice(), n_out);
438 |             let idxs3 = min_max_with_x(&x, arr.as_slice(), n_out);
439 |             let idxs4 = min_max_with_x_parallel(&x, arr.as_slice(), n_out);
440 |             assert_eq!(idxs1, idxs2);
441 |             assert_eq!(idxs1, idxs3);
442 |             assert_eq!(idxs1, idxs4);
443 |         }
444 |     }
445 | }
446 | 


--------------------------------------------------------------------------------
/tsdownsample/downsampling_interface.py:
--------------------------------------------------------------------------------
  1 | """AbstractDownsampler interface-class, subclassed by concrete downsamplers."""
  2 | 
  3 | __author__ = "Jeroen Van Der Donckt"
  4 | 
  5 | import re
  6 | import warnings
  7 | from abc import ABC, abstractmethod
  8 | from copy import deepcopy
  9 | from types import ModuleType
 10 | from typing import Callable, List, Optional, Tuple, Union
 11 | 
 12 | import numpy as np
 13 | 
 14 | 
 15 | class AbstractDownsampler(ABC):
 16 |     """AbstractDownsampler interface-class, subclassed by concrete downsamplers."""
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         check_contiguous: bool = True,
 21 |         x_dtype_regex_list: Optional[List[str]] = None,
 22 |         y_dtype_regex_list: Optional[List[str]] = None,
 23 |     ):
 24 |         self.check_contiguous = check_contiguous
 25 |         self.x_dtype_regex_list = x_dtype_regex_list
 26 |         self.y_dtype_regex_list = y_dtype_regex_list
 27 | 
 28 |     def _check_contiguous(self, arr: np.ndarray, y: bool = True):
 29 |         # necessary for rust downsamplers as they don't support non-contiguous arrays
 30 |         # (we call .as_slice().unwrap() on the array) in the lib.rs file
 31 |         # which will panic if the array is not contiguous
 32 |         if not self.check_contiguous:
 33 |             return
 34 | 
 35 |         if arr.flags["C_CONTIGUOUS"]:
 36 |             return
 37 | 
 38 |         raise ValueError(f"{'y' if y else 'x'} array must be contiguous.")
 39 | 
 40 |     def _supports_dtype(self, arr: np.ndarray, y: bool = True):
 41 |         dtype_regex_list = self.y_dtype_regex_list if y else self.x_dtype_regex_list
 42 |         # base case
 43 |         if dtype_regex_list is None:
 44 |             return
 45 | 
 46 |         for dtype_regex_str in dtype_regex_list:
 47 |             m = re.compile(dtype_regex_str).match(str(arr.dtype))
 48 |             if m is not None:  # a match is found
 49 |                 return
 50 |         raise ValueError(
 51 |             f"{arr.dtype} doesn't match with any regex in {dtype_regex_list} "
 52 |             f"for the {'y' if y else 'x'}-data"
 53 |         )
 54 | 
 55 |     @staticmethod
 56 |     def _check_valid_downsample_args(
 57 |         *args,
 58 |     ) -> Tuple[Union[np.ndarray, None], np.ndarray]:
 59 |         if len(args) == 2:
 60 |             x, y = args
 61 |         elif len(args) == 1:
 62 |             x, y = None, args[0]
 63 |         else:
 64 |             raise ValueError(
 65 |                 "downsample() takes 1 or 2 positional arguments but "
 66 |                 f"{len(args)} were given"
 67 |             )
 68 | 
 69 |         if x is not None and not isinstance(x, np.ndarray):
 70 |             x = np.array(x)
 71 |         if not isinstance(y, np.ndarray):
 72 |             y = np.array(y)
 73 | 
 74 |         # y must be 1D array
 75 |         if y.ndim != 1:
 76 |             raise ValueError("y must be 1D array")
 77 |         # x must be 1D array with same length as y or None
 78 |         if x is not None:
 79 |             if x.ndim != 1:
 80 |                 raise ValueError("x must be 1D array")
 81 |             if len(x) != len(y):
 82 |                 raise ValueError("x and y must have the same length")
 83 | 
 84 |         return x, y
 85 | 
 86 |     @staticmethod
 87 |     def _check_valid_n_out(n_out: int):
 88 |         if n_out <= 0:
 89 |             raise ValueError("n_out must be greater than 0")
 90 | 
 91 |     @abstractmethod
 92 |     def _downsample(
 93 |         self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs
 94 |     ) -> np.ndarray:
 95 |         """Downsample the data in x and y.
 96 | 
 97 |         Returns
 98 |         -------
 99 |         np.ndarray
100 |             The selected indices.
101 |         """
102 |         raise NotImplementedError
103 | 
104 |     def downsample(self, *args, n_out: int, **kwargs):  # x and y are optional
105 |         """Downsample y (and x).
106 | 
107 |         Call signatures::
108 |             downsample([x], y, n_out, **kwargs)
109 | 
110 | 
111 |         Parameters
112 |         ----------
113 |         x, y : array-like
114 |             The horizontal / vertical coordinates of the data points.
115 |             *x* values are optional.
116 |             These parameters should be 1D arrays.
117 |             These arguments cannot be passed as keywords.
118 |         n_out : int
119 |             The number of points to keep.
120 |         **kwargs
121 |             Additional keyword arguments are passed to the downsampler.
122 | 
123 |         Returns
124 |         -------
125 |         np.ndarray
126 |             The selected indices.
127 |         """
128 |         self._check_valid_n_out(n_out)
129 |         x, y = self._check_valid_downsample_args(*args)
130 |         self._supports_dtype(y, y=True)
131 |         self._check_contiguous(y, y=True)
132 |         if x is not None:
133 |             self._supports_dtype(x, y=False)
134 |             self._check_contiguous(x, y=False)
135 |         return self._downsample(x, y, n_out, **kwargs)
136 | 
137 | 
138 | # ------------------- Rust Downsample Interface -------------------
139 | DOWNSAMPLE_F = "downsample"
140 | 
141 | 
142 | # the following dtypes are supported by the rust downsamplers (x and y)
143 | _rust_dtypes = [
144 |     "float32",
145 |     "float64",
146 |     "uint16",
147 |     "uint32",
148 |     "uint64",
149 |     "int16",
150 |     "int32",
151 |     "int64",
152 |     "datetime64",
153 |     "timedelta64",
154 | ]
155 | # <= 8-bit x-dtypes are not supported as the range of the values is too small to require
156 | # downsampling
157 | _y_rust_dtypes = _rust_dtypes + ["float16", "int8", "uint8", "bool"]
158 | 
159 | 
160 | class AbstractRustDownsampler(AbstractDownsampler, ABC):
161 |     """RustDownsampler interface-class, subclassed by concrete downsamplers."""
162 | 
163 |     def __init__(self):
164 |         super().__init__(True, _rust_dtypes, _y_rust_dtypes)  # same for x and y
165 | 
166 |     @property
167 |     def _downsample_func_prefix(self) -> str:
168 |         """The prefix of the downsample functions in the rust module."""
169 |         return DOWNSAMPLE_F
170 | 
171 |     @property
172 |     def rust_mod(self) -> ModuleType:
173 |         """The compiled Rust module for the current downsampler."""
174 |         raise NotImplementedError
175 | 
176 |     @property
177 |     def mod_single_core(self) -> ModuleType:
178 |         """Get the single-core Rust module.
179 | 
180 |         Returns
181 |         -------
182 |         ModuleType
183 |             If SIMD compiled module is available, that one is returned. Otherwise, the
184 |             scalar compiled module is returned.
185 |         """
186 |         return self.rust_mod.sequential
187 | 
188 |     @property
189 |     def mod_multi_core(self) -> Union[ModuleType, None]:
190 |         """Get the multi-core Rust module.
191 | 
192 |         Returns
193 |         -------
194 |         ModuleType or None
195 |             If SIMD parallel compiled module is available, that one is returned.
196 |             Otherwise, the scalar parallel compiled module is returned.
197 |             If no parallel compiled module is available, None is returned.
198 |         """
199 |         if hasattr(self.rust_mod, "parallel"):
200 |             # use SIMD implementation if available
201 |             return self.rust_mod.parallel
202 |         return None  # no parallel compiled module available
203 | 
204 |     @staticmethod
205 |     def _view_x(x: np.ndarray) -> np.ndarray:
206 |         """View the x-data as different dtype (if necessary)."""
207 |         if np.issubdtype(x.dtype, np.datetime64):
208 |             # datetime64 is viewed as int64
209 |             return x.view(dtype=np.int64)
210 |         elif np.issubdtype(x.dtype, np.timedelta64):
211 |             # timedelta64 is viewed as int64
212 |             return x.view(dtype=np.int64)
213 |         return x
214 | 
215 |     @staticmethod
216 |     def _view_y(y: np.ndarray) -> np.ndarray:
217 |         """View the y-data as different dtype (if necessary)."""
218 |         if y.dtype == "bool":
219 |             # bool is viewed as int8
220 |             return y.view(dtype=np.int8)
221 |         elif np.issubdtype(y.dtype, np.datetime64):
222 |             # datetime64 is viewed as int64
223 |             return y.view(dtype=np.int64)
224 |         elif np.issubdtype(y.dtype, np.timedelta64):
225 |             # timedelta64 is viewed as int64
226 |             return y.view(dtype=np.int64)
227 |         return y
228 | 
229 |     def _switch_mod_with_y(
230 |         self, y_dtype: np.dtype, mod: ModuleType, downsample_func: Optional[str] = None
231 |     ) -> Callable:
232 |         """Select the appropriate function from the rust module for the y-data.
233 | 
234 |         Assumes equal binning (when no data for x is passed -> only this function is
235 |         executed).
236 |         Equidistant binning is  utilized when a `downsample_func` is passed from the
237 |         `_switch_mod_with_x_and_y` method (since the x-data is considered in the
238 |         downsampling).
239 | 
240 |         Parameters
241 |         ----------
242 |         y_dtype : np.dtype
243 |             The dtype of the y-data
244 |         mod : ModuleType
245 |             The module to select the appropriate function from
246 |         downsample_func : str, optional
247 |             The name of the function to use, by default DOWNSAMPLE_FUNC.
248 |             This argument is passed from the `_switch_mod_with_x_and_y` method when
249 |             the x-data is considered in the downsampling.
250 |         """
251 |         if downsample_func is None:
252 |             downsample_func = self._downsample_func_prefix
253 |         # FLOATS
254 |         if np.issubdtype(y_dtype, np.floating):
255 |             if y_dtype == np.float16:
256 |                 return getattr(mod, downsample_func + "_f16")
257 |             elif y_dtype == np.float32:
258 |                 return getattr(mod, downsample_func + "_f32")
259 |             elif y_dtype == np.float64:
260 |                 return getattr(mod, downsample_func + "_f64")
261 |         # UINTS
262 |         elif np.issubdtype(y_dtype, np.unsignedinteger):
263 |             if y_dtype == np.uint8:
264 |                 return getattr(mod, downsample_func + "_u8")
265 |             elif y_dtype == np.uint16:
266 |                 return getattr(mod, downsample_func + "_u16")
267 |             elif y_dtype == np.uint32:
268 |                 return getattr(mod, downsample_func + "_u32")
269 |             elif y_dtype == np.uint64:
270 |                 return getattr(mod, downsample_func + "_u64")
271 |         # INTS (need to be last because uint is subdtype of int)
272 |         elif np.issubdtype(y_dtype, np.integer):
273 |             if y_dtype == np.int8:
274 |                 return getattr(mod, downsample_func + "_i8")
275 |             elif y_dtype == np.int16:
276 |                 return getattr(mod, downsample_func + "_i16")
277 |             elif y_dtype == np.int32:
278 |                 return getattr(mod, downsample_func + "_i32")
279 |             elif y_dtype == np.int64:
280 |                 return getattr(mod, downsample_func + "_i64")
281 |         # DATETIME -> i64 (datetime64 is viewed as int64)
282 |         # TIMEDELTA -> i64 (timedelta64 is viewed as int64)
283 |         # BOOLS -> int8 (bool is viewed as int8)
284 |         raise ValueError(f"Unsupported data type (for y): {y_dtype}")
285 | 
286 |     def _switch_mod_with_x_and_y(
287 |         self,  # necessary to access the class its _switch_mod_with_y method
288 |         x_dtype: np.dtype,
289 |         y_dtype: np.dtype,
290 |         mod: ModuleType,
291 |         downsample_func: Optional[str] = None,
292 |     ) -> Callable:
293 |         """The x-data is considered in the downsampling
294 | 
295 |         Assumes equal binning.
296 | 
297 |         Parameters
298 |         ----------
299 |         x_dtype : np.dtype
300 |             The dtype of the x-data
301 |         y_dtype : np.dtype
302 |             The dtype of the y-data
303 |         mod : ModuleType
304 |             The module to select the appropriate function from
305 |         downsample_func : str, optional
306 |             The name of the function to use, by default DOWNSAMPLE_FUNC.
307 |         """
308 |         if downsample_func is None:
309 |             downsample_func = self._downsample_func_prefix
310 |         # FLOATS
311 |         if np.issubdtype(x_dtype, np.floating):
312 |             if x_dtype == np.float16:
313 |                 return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_f16")
314 |             elif x_dtype == np.float32:
315 |                 return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_f32")
316 |             elif x_dtype == np.float64:
317 |                 return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_f64")
318 |         # UINTS
319 |         elif np.issubdtype(x_dtype, np.unsignedinteger):
320 |             if x_dtype == np.uint16:
321 |                 return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_u16")
322 |             elif x_dtype == np.uint32:
323 |                 return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_u32")
324 |             elif x_dtype == np.uint64:
325 |                 return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_u64")
326 |         # INTS (need to be last because uint is subdtype of int)
327 |         elif np.issubdtype(x_dtype, np.integer):
328 |             if x_dtype == np.int16:
329 |                 return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_i16")
330 |             elif x_dtype == np.int32:
331 |                 return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_i32")
332 |             elif x_dtype == np.int64:
333 |                 return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_i64")
334 |         # DATETIME -> i64 (datetime64 is viewed as int64)
335 |         # TIMEDELTA -> i64 (timedelta64 is viewed as int64)
336 |         raise ValueError(f"Unsupported data type (for x): {x_dtype}")
337 | 
338 |     def _downsample(
339 |         self,
340 |         x: Union[np.ndarray, None],
341 |         y: np.ndarray,
342 |         n_out: int,
343 |         parallel: bool = False,
344 |         **kwargs,
345 |     ) -> np.ndarray:
346 |         """Downsample the data in x and y."""
347 |         mod = self.mod_single_core
348 |         if parallel:
349 |             if self.mod_multi_core is None:
350 |                 name = self.__class__.__name__
351 |                 warnings.warn(
352 |                     f"No parallel implementation available for {name}. "
353 |                     "Falling back to single-core implementation."
354 |                 )
355 |             else:
356 |                 mod = self.mod_multi_core
357 |         ## Viewing the y-data as different dtype (if necessary)
358 |         y = self._view_y(y)
359 |         ## Viewing the x-data as different dtype (if necessary)
360 |         if x is None:
361 |             downsample_f = self._switch_mod_with_y(y.dtype, mod)
362 |             return downsample_f(y, n_out, **kwargs)
363 |         x = self._view_x(x)
364 |         ## Getting the appropriate downsample function
365 |         downsample_f = self._switch_mod_with_x_and_y(x.dtype, y.dtype, mod)
366 |         return downsample_f(x, y, n_out, **kwargs)
367 | 
368 |     def downsample(self, *args, n_out: int, parallel: bool = False, **kwargs):
369 |         """Downsample the data in x and y.
370 | 
371 |         The x and y arguments are positional-only arguments. If only one argument is
372 |         passed, it is considered to be the y-data. If two arguments are passed, the
373 |         first argument is considered to be the x-data and the second argument is
374 |         considered to be the y-data.
375 |         """
376 |         return super().downsample(*args, n_out=n_out, parallel=parallel, **kwargs)
377 | 
378 |     def __deepcopy__(self, memo):
379 |         """Deepcopy the object."""
380 |         cls = self.__class__
381 |         result = cls.__new__(cls)
382 |         memo[id(self)] = result
383 |         for k, v in self.__dict__.items():
384 |             if k.endswith("_mod") or k.startswith("mod_"):
385 |                 # Don't (deep)copy the compiled modules
386 |                 setattr(result, k, v)
387 |             else:
388 |                 setattr(result, k, deepcopy(v, memo))
389 |         return result
390 | 
391 | 
392 | NAN_DOWNSAMPLE_F = "downsample_nan"
393 | 
394 | 
395 | class AbstractRustNaNDownsampler(AbstractRustDownsampler, ABC):
396 |     """RustNaNDownsampler interface-class, subclassed by concrete downsamplers."""
397 | 
398 |     @property
399 |     def _downsample_func_prefix(self) -> str:
400 |         """The prefix of the downsample functions in the rust module."""
401 |         return NAN_DOWNSAMPLE_F
402 | 
403 |     def _switch_mod_with_y(
404 |         self, y_dtype: np.dtype, mod: ModuleType, downsample_func: Optional[str] = None
405 |     ) -> Callable:
406 |         """Select the appropriate function from the rust module for the y-data.
407 | 
408 |         Assumes equal binning (when no data for x is passed -> only this function is
409 |         executed).
410 |         Equidistant binning is  utilized when a `downsample_func` is passed from the
411 |         `_switch_mod_with_x_and_y` method (since the x-data is considered in the
412 |         downsampling).
413 | 
414 |         Parameters
415 |         ----------
416 |         y_dtype : np.dtype
417 |             The dtype of the y-data
418 |         mod : ModuleType
419 |             The module to select the appropriate function from
420 |         downsample_func : str, optional
421 |             The name of the function to use, by default NAN_DOWNSAMPLE_F.
422 |             This argument is passed from the `_switch_mod_with_x_and_y` method when
423 |             the x-data is considered in the downsampling.
424 |         """
425 |         if downsample_func is None:
426 |             downsample_func = self._downsample_func_prefix
427 |         if not np.issubdtype(y_dtype, np.floating):
428 |             # When y is not a float, we need to remove the _nan suffix to use the
429 |             # regular downsample function as the _nan suffix is only used for floats.
430 |             # (Note that NaNs only exist for floats)
431 |             downsample_func = downsample_func.replace("_nan", "")
432 |         return super()._switch_mod_with_y(y_dtype, mod, downsample_func)
433 | 


--------------------------------------------------------------------------------
/downsample_rs/src/m4.rs:
--------------------------------------------------------------------------------
  1 | use argminmax::{ArgMinMax, NaNArgMinMax};
  2 | use num_traits::{AsPrimitive, FromPrimitive};
  3 | use rayon::iter::IndexedParallelIterator;
  4 | use rayon::prelude::*;
  5 | 
  6 | use super::searchsorted::{
  7 |     get_equidistant_bin_idx_iterator, get_equidistant_bin_idx_iterator_parallel,
  8 | };
  9 | use super::types::Num;
 10 | use super::POOL;
 11 | 
 12 | // ----------------------------------- NON-PARALLEL ------------------------------------
 13 | 
 14 | // ----------- WITH X
 15 | 
 16 | macro_rules! m4_with_x {
 17 |     ($func_name:ident, $trait:path, $f_argminmax:expr) => {
 18 |         pub fn $func_name<Tx, Ty>(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec<usize>
 19 |         where
 20 |             for<'a> &'a [Ty]: $trait,
 21 |             Tx: Num + FromPrimitive + AsPrimitive<f64>,
 22 |             Ty: Copy + PartialOrd,
 23 |         {
 24 |             assert_eq!(n_out % 4, 0);
 25 |             let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 4);
 26 |             m4_generic_with_x(arr, bin_idx_iterator, n_out, $f_argminmax)
 27 |         }
 28 |     };
 29 | }
 30 | 
 31 | m4_with_x!(m4_with_x, ArgMinMax, |arr| arr.argminmax());
 32 | m4_with_x!(m4_with_x_nan, NaNArgMinMax, |arr| arr.nanargminmax());
 33 | 
 34 | // ----------- WITHOUT X
 35 | 
 36 | macro_rules! m4_without_x {
 37 |     ($func_name:ident, $trait:path, $f_argminmax:expr) => {
 38 |         pub fn $func_name<T: Copy + PartialOrd>(arr: &[T], n_out: usize) -> Vec<usize>
 39 |         where
 40 |             for<'a> &'a [T]: $trait,
 41 |         {
 42 |             assert_eq!(n_out % 4, 0);
 43 |             m4_generic(arr, n_out, $f_argminmax)
 44 |         }
 45 |     };
 46 | }
 47 | 
 48 | m4_without_x!(m4_without_x, ArgMinMax, |arr| arr.argminmax());
 49 | m4_without_x!(m4_without_x_nan, NaNArgMinMax, |arr| arr.nanargminmax());
 50 | 
 51 | // ------------------------------------- PARALLEL --------------------------------------
 52 | 
 53 | // ----------- WITH X
 54 | 
 55 | macro_rules! m4_with_x_parallel {
 56 |     ($func_name:ident, $trait:path, $f_argminmax:expr) => {
 57 |         pub fn $func_name<Tx, Ty>(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec<usize>
 58 |         where
 59 |             for<'a> &'a [Ty]: $trait,
 60 |             Tx: Num + FromPrimitive + AsPrimitive<f64> + Send + Sync,
 61 |             Ty: Copy + PartialOrd + Send + Sync,
 62 |         {
 63 |             assert_eq!(n_out % 4, 0);
 64 |             let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 4);
 65 |             m4_generic_with_x_parallel(arr, bin_idx_iterator, n_out, $f_argminmax)
 66 |         }
 67 |     };
 68 | }
 69 | 
 70 | m4_with_x_parallel!(m4_with_x_parallel, ArgMinMax, |arr| arr.argminmax());
 71 | m4_with_x_parallel!(m4_with_x_parallel_nan, NaNArgMinMax, |arr| arr
 72 |     .nanargminmax());
 73 | 
 74 | // ----------- WITHOUT X
 75 | 
 76 | macro_rules! m4_without_x_parallel {
 77 |     ($func_name:ident, $trait:path, $f_argminmax:expr) => {
 78 |         pub fn $func_name<T: Copy + PartialOrd + Send + Sync>(arr: &[T], n_out: usize) -> Vec<usize>
 79 |         where
 80 |             for<'a> &'a [T]: $trait,
 81 |         {
 82 |             assert_eq!(n_out % 4, 0);
 83 |             m4_generic_parallel(arr, n_out, $f_argminmax)
 84 |         }
 85 |     };
 86 | }
 87 | 
 88 | m4_without_x_parallel!(m4_without_x_parallel, ArgMinMax, |arr| arr.argminmax());
 89 | m4_without_x_parallel!(m4_without_x_parallel_nan, NaNArgMinMax, |arr| arr
 90 |     .nanargminmax());
 91 | 
 92 | // TODO: check for duplicate data in the output array
 93 | // -> In the current implementation we always add 4 datapoints per bin (if of
 94 | //    course the bin has >= 4 datapoints). However, the argmin and argmax might
 95 | //    be the start and end of the bin, which would result in duplicate data in
 96 | //    the output array. (this is for example the case for monotonic data).
 97 | 
 98 | // ----------------------------------- GENERICS ------------------------------------
 99 | 
100 | // --------------------- WITHOUT X
101 | 
102 | #[inline(always)]
103 | pub(crate) fn m4_generic<T: Copy + PartialOrd>(
104 |     arr: &[T],
105 |     n_out: usize,
106 |     f_argminmax: fn(&[T]) -> (usize, usize),
107 | ) -> Vec<usize> {
108 |     // Assumes n_out is a multiple of 4
109 |     if n_out >= arr.len() {
110 |         return (0..arr.len()).collect();
111 |     }
112 | 
113 |     // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1)
114 |     let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 4) as f64;
115 | 
116 |     let mut sampled_indices: Vec<usize> = vec![usize::default(); n_out];
117 | 
118 |     let mut start_idx: usize = 0;
119 |     for i in 0..n_out / 4 {
120 |         // Decided to use multiplication instead of adding to the accumulator (end)
121 |         // as multiplication seems to be less prone to rounding errors.
122 |         let end: f64 = block_size * (i + 1) as f64;
123 |         let end_idx: usize = end as usize + 1;
124 | 
125 |         let (min_index, max_index) = f_argminmax(&arr[start_idx..end_idx]);
126 | 
127 |         // Add the indexes in sorted order
128 |         sampled_indices[4 * i] = start_idx;
129 |         if min_index < max_index {
130 |             sampled_indices[4 * i + 1] = min_index + start_idx;
131 |             sampled_indices[4 * i + 2] = max_index + start_idx;
132 |         } else {
133 |             sampled_indices[4 * i + 1] = max_index + start_idx;
134 |             sampled_indices[4 * i + 2] = min_index + start_idx;
135 |         }
136 |         sampled_indices[4 * i + 3] = end_idx - 1;
137 | 
138 |         start_idx = end_idx;
139 |     }
140 | 
141 |     sampled_indices
142 | }
143 | 
144 | #[inline(always)]
145 | pub(crate) fn m4_generic_parallel<T: Copy + PartialOrd + Send + Sync>(
146 |     arr: &[T],
147 |     n_out: usize,
148 |     f_argminmax: fn(&[T]) -> (usize, usize),
149 | ) -> Vec<usize> {
150 |     // Assumes n_out is a multiple of 4
151 |     if n_out >= arr.len() {
152 |         return (0..arr.len()).collect::<Vec<usize>>();
153 |     }
154 | 
155 |     // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1)
156 |     let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 4) as f64;
157 | 
158 |     // Store the enumerated indexes in the output array
159 |     // These indexes are used to calculate the start and end indexes of each bin in
160 |     // the multi-threaded execution
161 |     let mut sampled_indices: Vec<usize> = (0..n_out).collect::<Vec<usize>>();
162 | 
163 |     POOL.install(|| {
164 |         sampled_indices
165 |             .par_chunks_exact_mut(4)
166 |             .for_each(|sampled_index_chunk| {
167 |                 let i: f64 = unsafe { *sampled_index_chunk.get_unchecked(0) >> 2 } as f64;
168 |                 let start_idx: usize = (block_size * i) as usize + (i != 0.0) as usize;
169 |                 let end_idx: usize = (block_size * (i + 1.0)) as usize + 1;
170 | 
171 |                 let (min_index, max_index) = f_argminmax(&arr[start_idx..end_idx]);
172 | 
173 |                 sampled_index_chunk[0] = start_idx;
174 |                 // Add the indexes in sorted order
175 |                 if min_index < max_index {
176 |                     sampled_index_chunk[1] = min_index + start_idx;
177 |                     sampled_index_chunk[2] = max_index + start_idx;
178 |                 } else {
179 |                     sampled_index_chunk[1] = max_index + start_idx;
180 |                     sampled_index_chunk[2] = min_index + start_idx;
181 |                 }
182 |                 sampled_index_chunk[3] = end_idx - 1;
183 |             })
184 |     });
185 | 
186 |     sampled_indices
187 | }
188 | 
189 | // --------------------- WITH X
190 | 
191 | #[inline(always)]
192 | pub(crate) fn m4_generic_with_x<T: Copy>(
193 |     arr: &[T],
194 |     bin_idx_iterator: impl Iterator<Item = Option<(usize, usize)>>,
195 |     n_out: usize,
196 |     f_argminmax: fn(&[T]) -> (usize, usize),
197 | ) -> Vec<usize> {
198 |     // Assumes n_out is a multiple of 4
199 |     if n_out >= arr.len() {
200 |         return (0..arr.len()).collect::<Vec<usize>>();
201 |     }
202 | 
203 |     let mut sampled_indices: Vec<usize> = Vec::with_capacity(n_out);
204 | 
205 |     bin_idx_iterator.for_each(|bin| {
206 |         if let Some((start, end)) = bin {
207 |             if end <= start + 4 {
208 |                 // If the bin has <= 4 elements, just add them all
209 |                 for i in start..end {
210 |                     sampled_indices.push(i);
211 |                 }
212 |             } else {
213 |                 // If the bin has > 4 elements, add the first and last + argmin and argmax
214 |                 let step = &arr[start..end];
215 |                 let (min_index, max_index) = f_argminmax(step);
216 | 
217 |                 sampled_indices.push(start);
218 | 
219 |                 // Add the indexes in sorted order
220 |                 if min_index < max_index {
221 |                     sampled_indices.push(min_index + start);
222 |                     sampled_indices.push(max_index + start);
223 |                 } else {
224 |                     sampled_indices.push(max_index + start);
225 |                     sampled_indices.push(min_index + start);
226 |                 }
227 | 
228 |                 sampled_indices.push(end - 1);
229 |             }
230 |         }
231 |     });
232 | 
233 |     sampled_indices
234 | }
235 | 
236 | #[inline(always)]
237 | pub(crate) fn m4_generic_with_x_parallel<T: Copy + PartialOrd + Send + Sync>(
238 |     arr: &[T],
239 |     bin_idx_iterator: impl IndexedParallelIterator<Item = impl Iterator<Item = Option<(usize, usize)>>>,
240 |     n_out: usize,
241 |     f_argminmax: fn(&[T]) -> (usize, usize),
242 | ) -> Vec<usize> {
243 |     // Assumes n_out is a multiple of 4
244 |     if n_out >= arr.len() {
245 |         return (0..arr.len()).collect::<Vec<usize>>();
246 |     }
247 | 
248 |     POOL.install(|| {
249 |         bin_idx_iterator
250 |             .flat_map(|bin_idx_iterator| {
251 |                 bin_idx_iterator
252 |                     .map(|bin| {
253 |                         match bin {
254 |                             Some((start, end)) => {
255 |                                 if end <= start + 4 {
256 |                                     // If the bin has <= 4 elements, just return them all
257 |                                     return (start..end).collect::<Vec<usize>>();
258 |                                 }
259 | 
260 |                                 // If the bin has > 4 elements, return the first and last + argmin and argmax
261 |                                 let step = &arr[start..end];
262 |                                 let (min_index, max_index) = f_argminmax(step);
263 | 
264 |                                 // Return the indexes in sorted order
265 |                                 let mut sampled_index = vec![start, 0, 0, end - 1];
266 |                                 if min_index < max_index {
267 |                                     sampled_index[1] = min_index + start;
268 |                                     sampled_index[2] = max_index + start;
269 |                                 } else {
270 |                                     sampled_index[1] = max_index + start;
271 |                                     sampled_index[2] = min_index + start;
272 |                                 }
273 |                                 sampled_index
274 |                             } // If the bin is empty, return empty Vec
275 |                             None => {
276 |                                 vec![]
277 |                             }
278 |                         }
279 |                     })
280 |                     .collect::<Vec<Vec<usize>>>()
281 |             })
282 |             .flatten()
283 |             .collect::<Vec<usize>>()
284 |     })
285 | }
286 | 
287 | #[cfg(test)]
288 | mod tests {
289 |     use num_traits::AsPrimitive;
290 |     use rstest::rstest;
291 |     use rstest_reuse::{self, *};
292 | 
293 |     use super::{m4_with_x, m4_without_x};
294 |     use super::{m4_with_x_parallel, m4_without_x_parallel};
295 | 
296 |     use dev_utils::utils;
297 | 
298 |     fn get_array_f32(n: usize) -> Vec<f32> {
299 |         utils::get_random_array(n, f32::MIN, f32::MAX)
300 |     }
301 | 
302 |     // Template for n_out
303 |     #[template]
304 |     #[rstest]
305 |     #[case(196)]
306 |     #[case(200)]
307 |     #[case(204)]
308 |     fn n_outs(#[case] n_out: usize) {}
309 | 
310 |     #[test]
311 |     fn test_m4_scalar_without_x_correct() {
312 |         let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
313 | 
314 |         let sampled_indices = m4_without_x(&arr, 12);
315 |         let sampled_values = sampled_indices
316 |             .iter()
317 |             .map(|x| arr[*x])
318 |             .collect::<Vec<f32>>();
319 | 
320 |         let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99];
321 |         let expected_values = expected_indices
322 |             .iter()
323 |             .map(|x| *x as f32)
324 |             .collect::<Vec<f32>>();
325 | 
326 |         assert_eq!(sampled_indices, expected_indices);
327 |         assert_eq!(sampled_values, expected_values);
328 |     }
329 | 
330 |     #[test]
331 |     fn test_m4_scalar_without_x_parallel_correct() {
332 |         let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
333 | 
334 |         let sampled_indices = m4_without_x_parallel(&arr, 12);
335 |         let sampled_values = sampled_indices
336 |             .iter()
337 |             .map(|x| arr[*x])
338 |             .collect::<Vec<f32>>();
339 | 
340 |         let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99];
341 |         let expected_values = expected_indices
342 |             .iter()
343 |             .map(|x| *x as f32)
344 |             .collect::<Vec<f32>>();
345 | 
346 |         assert_eq!(sampled_indices, expected_indices);
347 |         assert_eq!(sampled_values, expected_values);
348 |     }
349 | 
350 |     #[test]
351 |     fn test_m4_scalar_with_x_correct() {
352 |         let x: [i32; 100] = core::array::from_fn(|i| i.as_());
353 |         let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
354 | 
355 |         let sampled_indices = m4_with_x(&x, &arr, 12);
356 |         let sampled_values = sampled_indices
357 |             .iter()
358 |             .map(|x| arr[*x])
359 |             .collect::<Vec<f32>>();
360 | 
361 |         let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99];
362 |         let expected_values = expected_indices
363 |             .iter()
364 |             .map(|x| *x as f32)
365 |             .collect::<Vec<f32>>();
366 | 
367 |         assert_eq!(sampled_indices, expected_indices);
368 |         assert_eq!(sampled_values, expected_values);
369 |     }
370 | 
371 |     #[test]
372 |     fn test_m4_scalar_with_x_parallel_correct() {
373 |         let x: [i32; 100] = core::array::from_fn(|i| i.as_());
374 |         let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
375 | 
376 |         let sampled_indices = m4_with_x_parallel(&x, &arr, 12);
377 |         let sampled_values = sampled_indices
378 |             .iter()
379 |             .map(|x| arr[*x])
380 |             .collect::<Vec<f32>>();
381 | 
382 |         let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99];
383 |         let expected_values = expected_indices
384 |             .iter()
385 |             .map(|x| *x as f32)
386 |             .collect::<Vec<f32>>();
387 | 
388 |         assert_eq!(sampled_indices, expected_indices);
389 |         assert_eq!(sampled_values, expected_values);
390 |     }
391 | 
392 |     #[test]
393 |     fn test_m4_scalar_with_x_gap() {
394 |         // We will create a gap in the middle of the array
395 |         // Increment the second half of the array by 50
396 |         let x: [i32; 100] = core::array::from_fn(|i| if i > 50 { (i + 50).as_() } else { i.as_() });
397 |         let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
398 | 
399 |         let sampled_indices = m4_with_x(&x, &arr, 20);
400 |         assert_eq!(sampled_indices.len(), 16); // One full gap
401 |         let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99];
402 |         assert_eq!(sampled_indices, expected_indices);
403 | 
404 |         // Increment the second half of the array by 50 again
405 |         let x = x.map(|x| if x > 101 { x + 50 } else { x });
406 | 
407 |         let sampled_indices = m4_with_x(&x, &arr, 20);
408 |         assert_eq!(sampled_indices.len(), 17); // Gap with 1 value
409 |         let expected_indices = vec![
410 |             0, 0, 39, 39, 40, 40, 50, 50, 51, 52, 52, 59, 59, 60, 60, 99, 99,
411 |         ];
412 |         assert_eq!(sampled_indices, expected_indices);
413 |     }
414 | 
415 |     #[test]
416 |     fn test_m4_scalar_with_x_gap_parallel() {
417 |         // We will create a gap in the middle of the array
418 |         // Increment the second half of the array by 50
419 |         let x: [i32; 100] = core::array::from_fn(|i| if i > 50 { (i + 50).as_() } else { i.as_() });
420 |         let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
421 | 
422 |         let sampled_indices = m4_with_x_parallel(&x, &arr, 20);
423 |         assert_eq!(sampled_indices.len(), 16); // One full gap
424 |         let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99];
425 |         assert_eq!(sampled_indices, expected_indices);
426 | 
427 |         // Increment the second half of the array by 50 again
428 |         let x = x.map(|x| if x > 101 { x + 50 } else { x });
429 | 
430 |         let sampled_indices = m4_with_x_parallel(&x, &arr, 20);
431 |         assert_eq!(sampled_indices.len(), 17); // Gap with 1 value
432 |         let expected_indices = vec![
433 |             0, 0, 39, 39, 40, 40, 50, 50, 51, 52, 52, 59, 59, 60, 60, 99, 99,
434 |         ];
435 |         assert_eq!(sampled_indices, expected_indices);
436 |     }
437 | 
438 |     #[apply(n_outs)]
439 |     fn test_many_random_runs_correct(n_out: usize) {
440 |         const N: usize = 20_003;
441 |         let x: [i32; N] = core::array::from_fn(|i| i.as_());
442 |         for _ in 0..100 {
443 |             let arr = get_array_f32(N);
444 |             let idxs1 = m4_without_x(arr.as_slice(), n_out);
445 |             let idxs2 = m4_with_x(&x, arr.as_slice(), n_out);
446 |             assert_eq!(idxs1, idxs2);
447 |             let idxs3 = m4_without_x_parallel(arr.as_slice(), n_out);
448 |             let idxs4 = m4_with_x_parallel(&x, arr.as_slice(), n_out);
449 |             assert_eq!(idxs1, idxs3);
450 |             // TODO: check whether this still fails after fixing the sequential_add_mul
451 |             assert_eq!(idxs1, idxs4); // TODO: this fails when nb. of threads = 16
452 |         }
453 |     }
454 | }
455 | 


--------------------------------------------------------------------------------