├── .github
    └── workflows
    │   ├── ci.yml
    │   ├── publish.yml
    │   └── purge-cache.yml.disabled
├── .gitignore
├── LICENSE
├── NOTICE
├── README.md
├── example.png
├── python
    ├── .gitignore
    ├── MANIFEST.in
    ├── README.md
    ├── examples
    │   ├── benchmark.py
    │   └── illustration.py
    ├── pyproject.toml
    ├── rolling_quantiles
    │   └── __init__.py
    ├── setup.cfg
    ├── setup.py
    └── tests
    │   ├── input.py
    │   ├── pytest.ini
    │   ├── requirements.txt
    │   ├── test_guards.py
    │   ├── test_highpass.py
    │   ├── test_interpolation.py
    │   └── test_lowpass.py
└── src
    ├── filter.c
    ├── filter.h
    ├── heap.c
    ├── heap.h
    ├── python.c
    ├── quantile.c
    ├── quantile.h
    └── test.c


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: Python Tests
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs: # jobs run in parallel. individual steps within a job do not.
 6 |   build-and-test: # arbitrary name
 7 |     runs-on: ${{ matrix.os }}
 8 |     strategy:
 9 |       matrix:
10 |         os: [ubuntu-latest, macos-latest, windows-latest] # the matrix strategy takes essentially a cartesian product of these options
11 |         python-version: [3.8]
12 |     steps:
13 |     - uses: actions/checkout@v2
14 |     - name: Set up Python ${{ matrix.python-version }}.
15 |       uses: actions/setup-python@v2
16 |       with:
17 |         python-version: ${{ matrix.python-version }}
18 |     - name: Build and install.
19 |       shell: bash # not cmd/powershell that windows uses
20 |       run: |
21 |         python -m pip install --upgrade pip build
22 |         cd python
23 |         MACOSX_DEPLOYMENT_TARGET=10.9 python -m build --wheel
24 |         python -m pip install dist/*.whl
25 |         cd ..
26 |     - name: Run tests.
27 |       shell: bash
28 |       run: | # pytest with no args. if it fails with a nonzero status code, that *should* pop up as a failure in Github's statuses
29 |         cd python/tests
30 |         python -m pip install -r requirements.txt
31 |         pytest
32 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published] # when a release or pre-release published. don't mind the nuances that differentiate this from 'created' or 'released'
 6 | 
 7 | jobs:
 8 |   build-and-store:
 9 |     runs-on: ${{ matrix.os }}
10 |     strategy:
11 |       matrix:
12 |         os: [ubuntu-22.04, macos-latest, windows-latest] # use an old enough Linux distro to peacefully generate `manylinux` packages
13 |         python-version: ["3.8", "3.9", "3.10"]
14 |     steps:
15 |     - uses: actions/checkout@v2
16 |     - name: Set up Python ${{ matrix.python-version }}.
17 |       uses: actions/setup-python@v2
18 |       with:
19 |         python-version: ${{ matrix.python-version }}
20 |     - name: Build and install. # mostly redundant with `ci.yml`
21 |       shell: bash
22 |       run: |
23 |         python -m pip install --upgrade pip build
24 |         cd python
25 |         MACOSX_DEPLOYMENT_TARGET=10.9 python -m build --wheel
26 |         python -m pip install dist/*.whl
27 |         cd ..
28 |     - name: Run tests.
29 |       shell: bash
30 |       run: |
31 |         cd python/tests
32 |         python -m pip install -r requirements.txt
33 |         pytest
34 |         cd ../..
35 |     - name: Audit wheel for manylinux.
36 |       if: matrix.os == 'ubuntu-22.04'
37 |       shell: bash
38 |       run: |
39 |         cd python
40 |         python -m pip install --upgrade auditwheel
41 |         python -m auditwheel repair --plat manylinux1_x86_64 dist/*.whl
42 |         rm -r dist
43 |         mv wheelhouse dist
44 |     - name: Store the binary wheel.
45 |       uses: actions/upload-artifact@v2
46 |       with:
47 |         name: python-package-distributions
48 |         path: python/dist/
49 | 
50 |   publish: # can only upload from ubuntu, so collect all the packages here
51 |     runs-on: ubuntu-latest
52 |     needs: build-and-store
53 |     steps:
54 |     - name: Download all the wheels.
55 |       uses: actions/download-artifact@v2
56 |       with:
57 |         name: python-package-distributions
58 |         path: dist/
59 |     - name: Publish.
60 |       uses: pypa/gh-action-pypi-publish@master # should upload contents of the `dist/` folder
61 |       with:
62 |         user: __token__
63 |         password: ${{ secrets.PYPI_TOKEN }}
64 |         packages_dir: dist/
65 | 


--------------------------------------------------------------------------------
/.github/workflows/purge-cache.yml.disabled:
--------------------------------------------------------------------------------
 1 | # purge cache of our README's "pip downloads" badge from Github's static image host, compelling it to refresh regularly.
 2 | name: Purge Cache
 3 | on:
 4 |   schedule:
 5 |   - cron: "0 0 * * *" # cron syntax is funky. herein, we execute at the end of every day (UTC)
 6 | jobs:
 7 |   clean-pip-downloads-badge:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |     - name: Issue the curl request.
11 |       run: |
12 |         curl -X PURGE https://camo.githubusercontent.com/ded078724cea6c7f2e1fdf788d2c4a7ec9c2a88b558493e2c0f34d397914b18e/68747470733a2f2f7374617469632e706570792e746563682f706572736f6e616c697a65642d62616467652f726f6c6c696e672d7175616e74696c65733f706572696f643d746f74616c26756e6974733d696e7465726e6174696f6e616c5f73797374656d266c6566745f636f6c6f723d626c75652672696768745f636f6c6f723d6f72616e6765266c6566745f746578743d706970253230646f776e6c6f616473
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | a.out*
3 | *.o
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Rolling Quantiles
2 | Copyright 2021 Myrl Marmarelis
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Rolling Quantiles for NumPy
  2 | 
  3 | [![Python tests](https://github.com/marmarelis/rolling-quantiles/actions/workflows/ci.yml/badge.svg?branch=master&event=push)](https://github.com/marmarelis/rolling-quantiles/actions/workflows/ci.yml)
  4 | 
  5 | ## Hyper-efficient and composable filters.
  6 | 
  7 | * Simple, clean, intuitive interface.
  8 | * Streaming or batch processing.
  9 | * Python 3 bindings for a lean library written in pure C.
 10 | 
 11 | ### A Quick Tour
 12 | 
 13 | Let me give you but a superficial overview of this module's elegance.
 14 | 
 15 | ```python
 16 | import numpy as np
 17 | import rolling_quantiles as rq
 18 | 
 19 | pipe = rq.Pipeline( # rq.Pipeline is the only stateful object
 20 |   # declare a cascade of filters by a sequence of immutable description objects
 21 |   rq.LowPass(window=201, portion=100, subsample_rate=2),
 22 |     # the above takes a median (101th element out of 201) of the most recent 200
 23 |     # points and then spits out every other one
 24 |   rq.HighPass(window=10, portion=3))
 25 |     # that subsampled rolling median is then fed into this filter that takes a
 26 |     # 30% quantile on a window of size 10, and subtracts it from its raw input
 27 | 
 28 | # the pipeline exposes a set of read-only attributes that describe it
 29 | pipe.lag # = 60.0, the effective number of time units that the real-time output
 30 |          #   is delayed from the input
 31 | pipe.stride # = 2, how many inputs it takes to produce an output
 32 |             #  (>1 due to subsampling)
 33 | 
 34 | 
 35 | input = np.random.randn(1000)
 36 | output = pipe.feed(input) # the core, singular exposed method
 37 | 
 38 | # every other output will be a NaN to demarcate unready values
 39 | subsampled_output = output[1::pipe.stride]
 40 | ```
 41 | ![Example Signal](example.png)
 42 | 
 43 | That may be a lot to take in, so let me break it down for you:
 44 | * `rq.Pipeline(description...)` constructs a filter pipeline from one or more filter descriptions and initializes internal state.
 45 | * `.feed(*)` takes in a Python number or `np.array` and its output is shaped likewise.
 46 | * The two filter types are `rq.LowPass` and `rq.HighPass` that compute rolling quantiles and return them as is, and subtract them from the raw signal respectively. Compose them however you like!
 47 | * `NaN`s in the output purposefully indicate missing values, usually due to subsampling. If you pass a `NaN` into a `LowPass` filter, it will slowly deplete its reserve and continue to return valid quantiles until the window empties completely.
 48 | * `rq.LowPass` and `rq.HighPass` alternatively take in a `quantile=q` argument, `0<=q<=1`. The filters would perform a linear interpolation in this case. In order to control the statistical characteristics of this quantile estimate, parameters `alpha` and `beta` are exposed as well with default values `(1, 1)`. Refer to SciPy's [documentation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mstats.mquantiles.html) for details on this aspect.
 49 | ```python
 50 | interpolated_pipe = rq.Pipeline(
 51 |     # attempt to estimate the exact 40%-quantile by the
 52 |     # default linear interpolation with parameters (1, 1)
 53 |     rq.LowPass(window=30, quantile=0.4),
 54 |     # here, the estimate is "approximately unbiased" in
 55 |     # the case of Gaussian white noise
 56 |     rq.HighPass(window=10, quantile=0.3,
 57 |       alpha=3/8, beta=3/8))
 58 | ```
 59 | See this [Wikipedia section](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample) for an elucidating overview.
 60 | 
 61 | I also expose a convenience function `rq.medfilt(signal, window_size)` at the top-level of the package to directly supplant `scipy.signal.medfilt`.
 62 | 
 63 | That's it! I detailed the entire library. Don't let the size of its interface fool you!
 64 | 
 65 | ## Installation
 66 | [![Downloads](https://pepy.tech/badge/rolling-quantiles)](https://pepy.tech/project/rolling-quantiles)
 67 | 
 68 | If you are running Linux, MacOS, or Windows with Python 3.8+ and NumPy ~1.20, execute the following:
 69 | 
 70 | `pip install rolling-quantiles`
 71 | 
 72 | These are the conditions under which binaries are built and sent to the Python Package Index, which holds `pip`'s packages. Should the NumPy version be unsuitable, for instance, I suggest building the package from source. This is rather straightforward because the handful of source files in C have absolutely minimal dependencies.
 73 | 
 74 | ### Building from Source
 75 | 
 76 | The meat of this package is a handful of C files with no external dependencies, besides NumPy 1.16+ and Python 3.7+ for the bindings located in `src/python.c`. As such, you may build from source by running the following from the project's root directory:
 77 | 1. `cd python`
 78 | 2. Check `pyproject.toml` to make sure the listed NumPy version matches your desired target. The compiled package will be forward- but not backward-compatible.
 79 | 3. `python -m build` (make sure this invokes Python 3)
 80 | 4. `pip install dist/<name_of_generated_wheel_file>.whl`
 81 | 
 82 | #### Note of Caution on MacOS Big Sur
 83 | Make sure to specify `MACOSX_DEPLOYMENT_TARGET=10.X` as a prefix to the build command, e.g. `python -m build`. The placeholder `X` can be any MacOS version earlier than Big Sur (I use `9`.) By default, the build system would attempt to build for MacOS 11 that is incompatible with current Python interpreters that have been compiled against a prior version.
 84 | 
 85 | 
 86 | ### Benchmarking a median filter on 100 million doubles.
 87 | 
 88 | I make use of binary heaps that impart desirable guarantees on their amortized runtime. Realistically, their performance may depend on the statistics of the incoming signal. I pummeled the filters with Gaussian Brownian motion to gauge their practical usability under a typical drifting stochastic process.
 89 | 
 90 | | `window` | `rolling_quantiles` [1] | `scipy` [2] | `pandas` [3] |
 91 | | :------- | ------------------:     | ----------: | -----------: |
 92 | | 4        | 14 seconds              | 22 seconds  | 25 seconds   |
 93 | | 10       | 21 seconds              | 47 seconds  | 31 seconds   |
 94 | | 20       | 28 seconds              | 95 seconds  | 35 seconds   |
 95 | | 30       | 30 seconds              | 140 seconds | 37 seconds   |
 96 | | 40       | 34 seconds              | 190 seconds | 40 seconds   |
 97 | | 50       | 36 seconds              | 242 seconds | 40 seconds   |
 98 | | 1,000    | 61 seconds              | N/A         | 62 seconds   |
 99 | 
100 | Likewise, with simulated Gaussian white noise (no drift in the signal):
101 | 
102 | | `window` | `rolling_quantiles` [1] | `scipy` [2] | `pandas` [3] |
103 | | :------- | ------------------:     | ----------: | -----------: |
104 | | 4        | 14 seconds              | 22 seconds  | 25 seconds   |
105 | | 10       | 20 seconds              | 51 seconds  | 31 seconds   |
106 | | 20       | 25 seconds              | 105 seconds | 36 seconds   |
107 | | 30       | 27 seconds              | 156 seconds | 39 seconds   |
108 | | 40       | 30 seconds              | 218 seconds | 41 seconds   |
109 | | 50       | 30 seconds              | 279 seconds | 42 seconds   |
110 | | 1,000    | 45 seconds              | N/A         | 70 seconds   |
111 | 
112 | Intel(R) Core(TM) i7-8700T CPU @ 2.40GHz, single-threaded performance on Linux. My algorithm looked even better (relative to pandas) on a 2020 MacBook Pro. Check out this [StackOverflow answer](https://stackoverflow.com/questions/60100276/fastest-way-for-2d-rolling-window-quantile/66482238#66482238) for a particular use case.
113 | 
114 | [1] `rq.Pipeline(...)`
115 | 
116 | [2] `scipy.signal.medfilt(...)`
117 | 
118 | [3] `pd.Series.rolling(*).quantile(...)`
119 | 
120 | 
121 | 
122 | #### Brought to you by [Myrl](https://myrl.marmarel.is)
123 | 


--------------------------------------------------------------------------------
/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marmarelis/rolling-quantiles/aeaeedf7ea39553a5a9199cd91c0113ff44d47d7/example.png


--------------------------------------------------------------------------------
/python/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | dist/
3 | rolling_quantiles.egg-info
4 | src/
5 | __pycache__/
6 | LICENSE
7 | pypi-token.txt
8 | 


--------------------------------------------------------------------------------
/python/MANIFEST.in:
--------------------------------------------------------------------------------
1 | global-exclude .DS_Store
2 | global-exclude pypi-token.txt
3 | include src/*.h
4 | include src/*.c
5 | 


--------------------------------------------------------------------------------
/python/README.md:
--------------------------------------------------------------------------------
 1 | # Rolling Quantiles for NumPy
 2 | ## Hyper-efficient and composable filters.
 3 | 
 4 | * Simple, clean, intuitive interface.
 5 | * Supports streaming data or bulk processing.
 6 | * Python 3 bindings for a compact library written in pure C.
 7 | 
 8 | ### A Quick Tour
 9 | 
10 | ```python
11 | import numpy as np
12 | import rolling_quantiles as rq
13 | 
14 | pipe = rq.Pipeline( # rq.Pipeline is the only stateful object
15 |   # declare a cascade of filters by a sequence of immutable description objects
16 |   rq.LowPass(window=200, portion=100, subsample_rate=2),
17 |     # the above takes a median (100 out of 200) of the most recent 200 points
18 |     # and then spits out every other one
19 |   rq.HighPass(window=10, portion=3,  subsample_rate=1))
20 |     # that subsampled rolling median is then fed into this filter that takes a
21 |     # 30% quantile on a window of size 10, and subtracts it from its raw input
22 | 
23 | # the pipeline exposes a set of read-only attributes that describe it
24 | pipe.lag # = 60.0, the effective number of time units that the real-time output
25 |          #   is delayed from the input
26 | pipe.stride # = 2, how many inputs it takes to produce an output
27 |             #  (>1 due to subsampling)
28 | 
29 | 
30 | input = np.random.randn(1000)
31 | output = pipe.feed(input) # the core, singular exposed method
32 | 
33 | # every other output will be a NaN to demarcate unready values
34 | subsampled_output = output[1::pipe.stride]
35 | ```
36 | 
37 | See the [Github repository](https://github.com/marmarelis/rolling-quantiles) for more details.
38 | 


--------------------------------------------------------------------------------
/python/examples/benchmark.py:
--------------------------------------------------------------------------------
 1 | # estimate the average number of values processed per second in offline mode (not streaming,
 2 | # although it's all the same for my technique) to compare against scipy. signals that are less
 3 | # stationary should induce more tree operations; hence, compare the following for different
 4 | # window sizes: Gaussian white noise, Brownian motion, and Levy flights.
 5 | 
 6 | # an interesting consequence is that my amortized runtime complexity is well-characterized,
 7 | # but in practice it depends on the signal behavior (so nondeterministic for stochastic processes)
 8 | 
 9 | import numpy as np
10 | from scipy.signal import medfilt
11 | from scipy.stats import levy
12 | import pandas as pd
13 | import rolling_quantiles as rq
14 | import time
15 | from matplotlib import pyplot as plt
16 | plt.ion()
17 | 
18 | def measure_runtime(f):
19 |   start = time.perf_counter() # could also try time.monotonic()
20 |   res = f()
21 |   return time.perf_counter() - start, res
22 | 
23 | signal = np.cumsum(np.random.normal(size=100_000_000))
24 | series = pd.Series(signal) # construct a priori for fairness
25 | window_sizes = np.array([4, 10, 20, 30, 40, 50]) + 1 # odd
26 | 
27 | rq_times, sc_times, pd_times = [], [], []
28 | 
29 | for window_size in window_sizes:
30 |   pipe = rq.Pipeline(rq.LowPass(window=window_size, portion=window_size//2, subsample_rate=1))
31 |   rq_time, rq_res = measure_runtime(lambda: pipe.feed(signal))
32 |   sc_time, sc_res = measure_runtime(lambda: medfilt(signal, window_size))
33 |   pd_time, pd_res = measure_runtime(lambda: series.rolling(window_size).quantile(0.5, interpolation="nearest"))
34 |   # rq_res and sc_res will differ slightly at the edges because medfilt pads both sides with zeros as if it were a convolution.
35 |   # I pad at the beginning only, since I employ an online algorithm.
36 |   offset = window_size // 2
37 |   discrepancy = rq_res[1000:2000] - sc_res[(1000-offset):(2000-offset)]
38 |   #print("maximum discrepancy between the two is", np.amax(np.abs(discrepancy)))
39 |   assert np.amax(np.abs(discrepancy)) < 1e-10
40 |   print("runtimes are", rq_time, "versus", sc_time, "versus", pd_time)
41 |   rq_times.append(rq_time)
42 |   sc_times.append(sc_time)
43 |   pd_times.append(pd_time)
44 | 
45 | plt.plot(window_sizes, rq_times)
46 | plt.plot(window_sizes, sc_times)
47 | plt.plot(window_sizes, pd_times)
48 | 


--------------------------------------------------------------------------------
/python/examples/illustration.py:
--------------------------------------------------------------------------------
 1 | # illustration of what my hypothetical API should look like
 2 | 
 3 | import numpy as np
 4 | import rolling_quantiles as rq
 5 | 
 6 | filter = rq.Pipeline( # stateful filter
 7 |   rq.LowPass(window=100, portion=50, subsample_rate=2),
 8 |   rq.HighPass(window=10, portion=3,  subsample_rate=1))
 9 | 
10 | # expose specialized pipelines like `rq.MedianFilter`
11 | 
12 | input = np.random.randn(1000)
13 | output = filter.feed(input) # a single `ufunc` entry point that takes in arrays or scalars and spits out an appropriate amount of output
14 | 
15 | 
16 | ## CONCEPT.
17 | 
18 | rq.LineUp(rq.Pipeline) # possibly parallelized execution of parallel pipelines
19 | 
20 | big_input = np.random.randn(100, 1000)
21 | # broadcast. route one row to each pipeline.
22 | big_output = pipes.feed(big_input) # respects Fortran or C ordering to preserve cache locality
23 | 


--------------------------------------------------------------------------------
/python/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=54", "wheel", "numpy~=1.20"] #"packaging>=20.5"
3 | build-backend = "setuptools.build_meta"
4 | 


--------------------------------------------------------------------------------
/python/rolling_quantiles/__init__.py:
--------------------------------------------------------------------------------
 1 | # A Python module is basically a file. A Python package is a directory that acts as a parent module with many submodules.
 2 | 
 3 | __version__ = "1.1.0"
 4 | 
 5 | from .triton import *
 6 | 
 7 | # expose a rolling-median convenience method as a direct replacement to scipy.signal.medfilt
 8 | def medfilt(signal, window_size):
 9 |   import numpy as np # don't pollute the top-level namespace
10 |   pipeline = Pipeline(
11 |     LowPass(window=window_size, quantile=0.5, subsample_rate=1))
12 |   return pipeline.feed(np.array(signal))
13 | 


--------------------------------------------------------------------------------
/python/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = rolling_quantiles
 3 | version = attr: rolling_quantiles.__version__
 4 | description = Composable and blazing fast rolling-quantile filters for streaming data and bulk batches.
 5 | long_description = file: README.md
 6 | long_description_content_type = text/markdown
 7 | keywords = numpy, filter, numeric, signal, streaming, scipy, quantiles, rolling, efficient, realtime
 8 | license_files = ../LICENSE
 9 | url = https://github.com/marmarelis/rolling-quantiles
10 | author = Myrl Marmarelis
11 | author_email = myrl@marmarel.is
12 | 
13 | [options]
14 | zip_safe = true
15 | packages = find:
16 | include_package_data = true
17 | setup_requires = numpy ~= 1.20
18 | install_requires = numpy ~= 1.20
19 | python_requires = >=3.7
20 | 


--------------------------------------------------------------------------------
/python/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools.extension import Extension
 2 | from setuptools import setup # using this instead of numpy.distutils.core, as there seem to be incompatibilities with the "new way" of defining setups
 3 | from numpy.distutils.misc_util import get_info
 4 | import numpy as np
 5 | import os
 6 | import shutil
 7 | from glob import glob
 8 | 
 9 | # it feels like there's a billion different ways to do things: not very Pythonic! There is even overlap between pyproject.toml and setup.cfg!
10 | # this all feels completely like a work in progress.
11 | 
12 | # NOTE (I learned this the hard way): DO NOT TRY TO IMPORT THIS PACKAGE FROM A PYTHON CONSOLE IN THIS DIRECTORY.
13 | # IT WILL GRAVITATE TO THE LOCAL COPY, AND FAIL TO LOCATE TRITON.
14 | 
15 | source_files = sum((glob(os.path.join("..", "src", f"*.{ext}")) for ext in ["h", "c"]), start=[])
16 | os.makedirs("src", exist_ok=True)
17 | for file in source_files:
18 |   shutil.copy(file, "src")
19 | 
20 | ext_files = ["filter.c", "heap.c", "quantile.c", "python.c"] # cryptic errors all ove rthe place...
21 | 
22 | setup(
23 |   ext_package = "rolling_quantiles", # important to specify that triton's fully qualified name should be rolling_quantiles.triton
24 |   ext_modules = [
25 |     Extension("triton", # does a triton/__init__.py need to exist as a placeholder marker for my extension module?
26 |       [os.path.join("src", file) for file in ext_files],
27 |       include_dirs = [np.get_include()],
28 |       extra_compile_args=["-O3"])
29 |   ]
30 | )
31 | 


--------------------------------------------------------------------------------
/python/tests/input.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | def example_input(length):
4 |   return np.cumsum(np.random.normal(size=length))
5 | 


--------------------------------------------------------------------------------
/python/tests/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | filterwarnings =
3 |   error
4 |   ignore::UserWarning
5 |   ignore::DeprecationWarning
6 | 


--------------------------------------------------------------------------------
/python/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas >= 1.2
2 | numpy >= 1.20
3 | scipy >= 1.5
4 | pytest
5 | 


--------------------------------------------------------------------------------
/python/tests/test_guards.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import rolling_quantiles as rq
 4 | 
 5 | def test_window_size():
 6 |   with pytest.raises(ValueError):
 7 |     rq.Pipeline(rq.LowPass())
 8 | 
 9 | def test_interpolator_bounds():
10 |   with pytest.raises(ValueError):
11 |     rq.Pipeline(rq.LowPass(
12 |       window=10, portion=2, subsample_rate=1, quantile=0.5, alpha=2.0))
13 |   with pytest.raises(ValueError):
14 |     rq.Pipeline(rq.LowPass(
15 |       window=10, portion=2, subsample_rate=1, quantile=2.5))
16 | 


--------------------------------------------------------------------------------
/python/tests/test_highpass.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import rolling_quantiles as rq
 4 | from input import example_input
 5 | 
 6 | def test_median_scalar_inputs(window_size=3, length=100):
 7 |   pipe = rq.Pipeline(rq.HighPass(window=window_size, portion=window_size//2))
 8 |   v = example_input(length)
 9 |   assert pipe.lag == window_size/2
10 |   for i, x in enumerate(v):
11 |     y = pipe.feed(x)
12 |     if i >= window_size:
13 |       median = np.median(v[(i-window_size+1):(i+1)])
14 |       assert y == (v[i-window_size//2] - median)
15 | 
16 | def test_median_array_input(window_size=71, length=1000):
17 |   pipe = rq.Pipeline(rq.HighPass(window=window_size, portion=window_size//2))
18 |   x = example_input(length)
19 |   y = pipe.feed(x)
20 |   z = pd.Series(x).rolling(window_size).median()
21 |   lag = window_size//2 # note: as evidenced, high-pass filters do not interpolate on half-windows yet.
22 |   assert pipe.lag == window_size/2
23 |   assert np.equal(
24 |       y[window_size:],
25 |       x[lag+1:-lag] - z.values[window_size:]
26 |     ).all() # exact equality.
27 | 


--------------------------------------------------------------------------------
/python/tests/test_interpolation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from scipy.stats.mstats import mquantiles
 4 | import rolling_quantiles as rq
 5 | from input import example_input
 6 | 
 7 | def test_innocuous_interpolation(window_size=1001, length=10000):
 8 |   pipe = rq.Pipeline(rq.LowPass(window=window_size, quantile=0.5))
 9 |   x = example_input(length)
10 |   y = pipe.feed(x)
11 |   z = pd.Series(x).rolling(window_size).median()
12 |   assert np.equal(y[window_size:], z.values[window_size:]).all()
13 | 
14 | def test_typical_interpolation(window_size=40, quantile=0.2):
15 |   x = example_input(window_size) # one window only, due to scipy
16 |   pipe = rq.Pipeline(rq.LowPass(window=window_size, quantile=quantile))
17 |   y = pipe.feed(x)
18 |   z = mquantiles(x, quantile, alphap=1, betap=1)
19 |   assert z == y[-1]
20 | 
21 | # a flavor of fuzzing
22 | def test_fancy_interpolation(window_size=10, n_trials=200): # small windows may be more prone to boundary/edge-condition bugs
23 |   for trial in range(n_trials):
24 |     x = example_input(window_size)
25 |     quantile = np.random.uniform()
26 |     alpha, beta = np.random.uniform(size=2)
27 |     pipe = rq.Pipeline(rq.LowPass(window=window_size, quantile=quantile, alpha=alpha, beta=beta))
28 |     y = pipe.feed(x)
29 |     z = mquantiles(x, quantile, alphap=alpha, betap=beta)
30 |     assert z == y[-1]
31 | 


--------------------------------------------------------------------------------
/python/tests/test_lowpass.py:
--------------------------------------------------------------------------------
 1 | # for pytest. I do not hook this up to pyproject.toml as it is intended or perhaps best practice.
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import rolling_quantiles as rq
 6 | from input import example_input
 7 | 
 8 | def test_median_scalar_inputs(window_size=3, length=100): # no interpolation yet
 9 |   pipe = rq.Pipeline(rq.LowPass(window=window_size, portion=window_size//2))
10 |   v = example_input(length)
11 |   for i, x in enumerate(v):
12 |     y = pipe.feed(x)
13 |     if i >= window_size:
14 |       assert y == np.median(v[(i-window_size+1):(i+1)])
15 | 
16 | def test_median_array_input(window_size=71, length=1000):
17 |   pipe = rq.Pipeline(rq.LowPass(window=window_size, portion=window_size//2))
18 |   x = example_input(length)
19 |   y = pipe.feed(x)
20 |   z = pd.Series(x).rolling(window_size).median()
21 |   assert np.equal(y[window_size:], z.values[window_size:]).all() # exact equality, since no arithmetic is done on the numbers
22 | 
23 | def test_basic_nans(window_size=5, length=20):
24 |   # make sure the pipeline effectively flushes its contents with NaNs
25 |   pipe = rq.Pipeline(rq.LowPass(window=window_size, portion=window_size//2))
26 |   x = example_input(length)
27 |   y = pipe.feed(x)
28 |   for i in range(window_size):
29 |     pipe.feed(np.nan)
30 |   z = pipe.feed(x)
31 |   assert np.equal(y, z).all()
32 | 


--------------------------------------------------------------------------------
/src/filter.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |   Copyright 2021 Myrl Marmarelis
  3 | 
  4 |   Licensed under the Apache License, Version 2.0 (the "License");
  5 |   you may not use this file except in compliance with the License.
  6 |   You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 |   Unless required by applicable law or agreed to in writing, software
 11 |   distributed under the License is distributed on an "AS IS" BASIS,
 12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |   See the License for the specific language governing permissions and
 14 |   limitations under the License.
 15 |  */
 16 | 
 17 | #include "filter.h"
 18 | 
 19 | #include <stdlib.h>
 20 | #include <tgmath.h>
 21 | #include <stdbool.h>
 22 | 
 23 | // only supports adding and obtaining the middle element
 24 | struct high_pass_buffer {
 25 |   // this `head` is unsigned (rather than a pointer) because we will do math on it
 26 |   // points to the element right after the latest entry
 27 |   unsigned head;
 28 |   unsigned size;
 29 |   bool full;
 30 |   double entries[];
 31 | };
 32 | 
 33 | static struct high_pass_buffer* create_high_pass_buffer(unsigned size) {
 34 |   struct high_pass_buffer* buffer = malloc(sizeof(struct high_pass_buffer) + sizeof(double)*size);
 35 |   buffer->head = 0;
 36 |   buffer->size = size;
 37 |   buffer->full = false;
 38 |   // buffer->entries remains uninitialized on purpose
 39 |   return buffer;
 40 | }
 41 | 
 42 | static void add_to_high_pass_buffer(struct high_pass_buffer* buffer, double value) {
 43 |   if (buffer->head == buffer->size) {
 44 |     buffer->full = true; // always set---would be more expensive to read and conditionally write
 45 |     buffer->head = 0;
 46 |   }
 47 |   buffer->entries[buffer->head++] = value;
 48 | }
 49 | 
 50 | static double find_high_pass_buffer_middle(struct high_pass_buffer* buffer) {
 51 |   if (!buffer->full) {
 52 |     // match the below, which subtracts in the other direction. we're implictly rounding up, in a way, by not subtracting one
 53 |     int half = buffer->head / 2; // should optimize to bit shifts. don't use the remainder, (buffer->head % 2)
 54 |     return buffer->entries[half];
 55 |   }
 56 |   int half = (buffer->size / 2) + (buffer->size % 2);
 57 |   // by not subtracting one from head (and rounding `half` up,) I index the element to the right of the middle with even sizes
 58 |   int index = (int)buffer->head - half;
 59 |   if (index < 0)
 60 |     index = (int)buffer->size + index;
 61 |   return buffer->entries[index];
 62 | }
 63 | 
 64 | static void destroy_high_pass_buffer(struct high_pass_buffer* buffer) {
 65 |   free(buffer);
 66 | }
 67 | 
 68 | struct cascade_filter create_cascade_filter(struct cascade_description description) {
 69 |   unsigned portion = description.portion;
 70 |   double target = description.interpolation.target_quantile;
 71 |   if (!isnan(target)) {
 72 |     double target = compute_interpolation_target(
 73 |       description.window, description.interpolation);
 74 |     portion = (unsigned)fmax(floor(target), 1.0) - 1;
 75 |   }
 76 |   struct cascade_filter filter = {
 77 |     .monitor = create_rolling_quantile_monitor(
 78 |       description.window, portion, description.interpolation),
 79 |     .clock = 0,
 80 |     .subsample_rate = description.subsample_rate,
 81 |     .high_pass_buffer = NULL,
 82 |   };
 83 |   if (description.mode == HIGH_PASS) {
 84 |     filter.high_pass_buffer = create_high_pass_buffer(description.window);
 85 |   }
 86 |   return filter;
 87 | }
 88 | 
 89 | struct filter_pipeline* create_filter_pipeline(unsigned n_filters, struct cascade_description* descriptions) {
 90 |   for (struct cascade_description* description = descriptions;
 91 |       description != (descriptions + n_filters); description += 1) {
 92 |     if (!validate_interpolation(description->interpolation))
 93 |       return NULL; // before allocating anything
 94 |   }
 95 |   struct filter_pipeline* pipeline = malloc(
 96 |     sizeof(struct filter_pipeline) + n_filters*sizeof(struct cascade_filter));
 97 |   pipeline->n_filters = n_filters;
 98 |   for (unsigned i = 0; i < n_filters; i += 1) {
 99 |     pipeline->filters[i] = create_cascade_filter(descriptions[i]);
100 |   }
101 |   return pipeline;
102 | }
103 | 
104 | double feed_filter_pipeline(struct filter_pipeline* pipeline, double entry) {
105 |   double trickling_value = entry;
106 |   for (unsigned i = 0; i < pipeline->n_filters; i += 1) { // trickle down the pipeline
107 |     struct cascade_filter* filter = pipeline->filters + i;
108 |     double quantile = update_rolling_quantile(&filter->monitor, trickling_value);
109 |     if (filter->high_pass_buffer != NULL) { // explicit conditional for enhanced clarity
110 |       add_to_high_pass_buffer(filter->high_pass_buffer, trickling_value);
111 |       double middle = find_high_pass_buffer_middle(filter->high_pass_buffer);
112 |       trickling_value = middle - quantile;
113 |     } else {
114 |       trickling_value = quantile;
115 |     }
116 |     if ((++filter->clock) < filter->subsample_rate)
117 |       return NAN;
118 |     filter->clock = 0;
119 |   }
120 |   return trickling_value; // made it all the way through the torturous path!
121 | }
122 | 
123 | bool verify_pipeline(struct filter_pipeline* pipeline) {
124 |   for (unsigned i = 0; i < pipeline->n_filters; i += 1) {
125 |     if (!verify_monitor(&pipeline->filters[i].monitor))
126 |       return false;
127 |   }
128 |   return true;
129 | }
130 | 
131 | void destroy_filter_pipeline(struct filter_pipeline* pipeline) {
132 |   for (unsigned i = 0; i < pipeline->n_filters; i += 1) {
133 |     destroy_rolling_quantile_monitor(&pipeline->filters[i].monitor);
134 |     struct high_pass_buffer* buffer = pipeline->filters[i].high_pass_buffer;
135 |     if (buffer != NULL) destroy_high_pass_buffer(buffer);
136 |   }
137 |   free(pipeline);
138 | }
139 | 


--------------------------------------------------------------------------------
/src/filter.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Copyright 2021 Myrl Marmarelis
 3 | 
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License.
15 |  */
16 | 
17 | #ifndef FILTER_H
18 | #define FILTER_H
19 | 
20 | #include "quantile.h"
21 | 
22 | /*
23 |   For a high-pass, wherein I would subtract a smoothed signal from the raw, I
24 |   would need to keep track of the temporal order so that I can refer back to
25 |   the "middle" value.
26 |     (IN TESTING) Perhaps it can be done rather straightforwardly by means
27 |       of the ring_buffer, but I have not ventured into that question yet. I simply
28 |       mention this because it appears to be a logical addition to the pipeline
29 |       functionality/"DSL".
30 |  */
31 | 
32 | /*
33 |   The high-pass filter does not support missing values demarcated by NaN, as
34 |   that mode relies upon the raw signal's availability. One could affix a
35 |   low-pass filter onto a high-pass intake to "smooth out" the NaNs before
36 |   they have a chance of entering the high-pass filter down the line.
37 |  */
38 | 
39 | enum cascade_mode {
40 |   HIGH_PASS, LOW_PASS
41 | };
42 | 
43 | struct cascade_description {
44 |   unsigned window;
45 |   unsigned portion;
46 |   struct interpolation interpolation; // if NAN, refer to `portion`
47 |   unsigned subsample_rate;
48 |   enum cascade_mode mode;
49 | };
50 | 
51 | struct high_pass_buffer;
52 | 
53 | struct cascade_filter {
54 |   struct rolling_quantile monitor;
55 |   unsigned clock;
56 |   unsigned subsample_rate;
57 |   struct high_pass_buffer* high_pass_buffer; // set to NULL when a low pass is desired
58 | };
59 | 
60 | struct filter_pipeline {
61 |   unsigned n_filters;
62 |   struct cascade_filter filters[];
63 | };
64 | 
65 | struct cascade_filter create_cascade_filter(struct cascade_description description);
66 | struct filter_pipeline* create_filter_pipeline(unsigned n_filters, struct cascade_description* descriptions);
67 | double feed_filter_pipeline(struct filter_pipeline* pipeline, double entry);
68 | bool verify_pipeline(struct filter_pipeline* pipeline);
69 | void destroy_filter_pipeline(struct filter_pipeline* pipeline);
70 | 
71 | 
72 | #endif
73 | 


--------------------------------------------------------------------------------
/src/heap.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |   Copyright 2021 Myrl Marmarelis
  3 | 
  4 |   Licensed under the Apache License, Version 2.0 (the "License");
  5 |   you may not use this file except in compliance with the License.
  6 |   You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 |   Unless required by applicable law or agreed to in writing, software
 11 |   distributed under the License is distributed on an "AS IS" BASIS,
 12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |   See the License for the specific language governing permissions and
 14 |   limitations under the License.
 15 |  */
 16 | 
 17 | #include "heap.h"
 18 | 
 19 | #include <stdlib.h>
 20 | #include <tgmath.h>
 21 | #include <stdbool.h>
 22 | #include <stdarg.h>
 23 | #include <string.h>
 24 | #include <stdio.h>
 25 | 
 26 | struct ring_buffer* create_queue(unsigned size) {
 27 |   unsigned buffer_size = size * sizeof(ring_buffer_elem);
 28 |   struct ring_buffer* buffer = malloc(sizeof(struct ring_buffer) + buffer_size);
 29 |   buffer->size = size;
 30 |   buffer->n_entries = 0;
 31 |   buffer->head = &buffer->entries[0]; // slight semantic (NOT teleological) distinction between this and `buffer->entries`
 32 |   memset(buffer->entries, 0, buffer_size);
 33 |   return buffer;
 34 | }
 35 | 
 36 | struct heap* create_heap(enum heap_mode mode, unsigned size, struct ring_buffer* queue) {
 37 |   unsigned n_entries = size; // not necessarily trivial
 38 |   struct heap* data = calloc(1, sizeof(struct heap) + n_entries*sizeof(struct heap_element)); // calloc in order to ensure our elements are zeroed out
 39 |   data->mode = mode;
 40 |   data->size = size;
 41 |   data->queue = queue;
 42 |   return data;
 43 | }
 44 | 
 45 | void destroy_queue(struct ring_buffer* queue) {
 46 |   free(queue);
 47 | }
 48 | 
 49 | void destroy_heap(struct heap* heap) {
 50 |   free(heap);
 51 | }
 52 | 
 53 | bool is_ring_buffer_full(struct ring_buffer* buffer) {
 54 |   return buffer->n_entries == buffer->size;
 55 | }
 56 | 
 57 | bool is_ring_buffer_empty(struct ring_buffer* buffer) {
 58 |   return buffer->n_entries == 0;
 59 | }
 60 | 
 61 | void advance_ring_buffer(struct ring_buffer* buffer) {
 62 |   buffer->head++;
 63 |   if (buffer->head == (buffer->entries + buffer->size)) {
 64 |     buffer->head = buffer->entries;
 65 |   }
 66 | }
 67 | 
 68 | static // buffer does not have to be full as long as it isn't empty, but it might return NULL if it isn't full
 69 | ring_buffer_elem extract_oldest_entry_from_ring_buffer(struct ring_buffer* buffer) { // removes the entry too, and increments the head
 70 |   struct heap_element* entry = *buffer->head;
 71 |   *buffer->head = NULL;
 72 |   return entry;
 73 | }
 74 | 
 75 | static
 76 | ring_buffer_elem* get_next_position_in_ring_buffer(struct ring_buffer* buffer) {
 77 |   return buffer->head;
 78 | }
 79 | 
 80 | static
 81 | void xor_swap(void* a, void* b, size_t size) { // overkill hahah. do it byte by byte so that we are data-type agnostic
 82 |   char* a_bytes = a;
 83 |   char* b_bytes = b;
 84 |   for (size_t i = 0; i < size; i += 1) {
 85 |     a_bytes[i] ^= b_bytes[i];
 86 |     b_bytes[i] ^= a_bytes[i];
 87 |     a_bytes[i] ^= b_bytes[i];
 88 |   }
 89 | }
 90 | 
 91 | static
 92 | void plain_swap(void* a, void* b, size_t size) {
 93 |   char* a_bytes = a;
 94 |   char* b_bytes = b;
 95 |   for (size_t i = 0; i < size; i += 1) {
 96 |     char c = a_bytes[i];
 97 |     a_bytes[i] = b_bytes[i];
 98 |     b_bytes[i] = c;
 99 |   }
100 | }
101 | 
102 | static
103 | void swap(void* a, void* b, size_t size) {
104 |   plain_swap(a, b, size);
105 | }
106 | 
107 | static
108 | void swap_elements_in_heap(struct heap_element* a, struct heap_element* b) {
109 |   swap(&a->member, &b->member, sizeof(double));
110 |   /*if (a->loc_in_buffer && b->loc_in_buffer) { swap with actual addresses, rather than queue entries
111 |     swap(a->loc_in_buffer, b->loc_in_buffer, sizeof(struct heap_element*));
112 |   } else*/
113 |   if (a->loc_in_buffer) {
114 |     *a->loc_in_buffer = b;
115 |   }
116 |   if (b->loc_in_buffer) {
117 |     *b->loc_in_buffer = a;
118 |   }
119 |   swap(&a->loc_in_buffer, &b->loc_in_buffer, sizeof(struct heap_element**));
120 | }
121 | 
122 | static
123 | void trickle_down(struct heap* heap, unsigned i) { // conscious of the tags in the queue that may be invalidated
124 |   struct heap_element* node         = heap->elements + i;
125 |   struct heap_element* first_child  = heap->elements + (2*i + 1);
126 |   struct heap_element* second_child = heap->elements + (2*i + 2);
127 |   struct heap_element* limit = heap->elements + heap->n_entries;
128 |   if (heap->mode == MAX_HEAP) {
129 |     if (first_child >= limit) {
130 |       if (second_child >= limit)
131 |         return;
132 |       if (node->member < second_child->member)
133 |         swap_elements_in_heap(second_child, node);
134 |       return;
135 |     }
136 |     if (second_child >= limit) {
137 |       if (node->member < first_child->member)
138 |         swap_elements_in_heap(first_child, node);
139 |       return;
140 |     }
141 |     if ((node->member > first_child->member) && (node->member > second_child->member))
142 |       return;
143 |     if (first_child->member > second_child->member) {
144 |       swap_elements_in_heap(first_child, node);
145 |       trickle_down(heap, 2*i + 1);
146 |     } else {
147 |       swap_elements_in_heap(second_child, node);
148 |       trickle_down(heap, 2*i + 2); // tail-call optimized
149 |     }
150 |   } else if (heap->mode == MIN_HEAP) {
151 |     if (first_child >= limit) { // redundant, since the first child is always right before the second child.
152 |       if (second_child >= limit)
153 |         return;
154 |       if (node->member > second_child->member)
155 |         swap_elements_in_heap(second_child, node);
156 |       return;
157 |     }
158 |     if (second_child >= limit) {
159 |       if (node->member > first_child->member)
160 |         swap_elements_in_heap(first_child, node);
161 |       return;
162 |     }
163 |     if ((node->member < first_child->member) && (node->member < second_child->member))
164 |       return;
165 |     if (first_child->member < second_child->member) {
166 |       swap_elements_in_heap(first_child, node);
167 |       trickle_down(heap, 2*i + 1);
168 |     } else {
169 |       swap_elements_in_heap(second_child, node);
170 |       trickle_down(heap, 2*i + 2);
171 |     }
172 |   }
173 | }
174 | 
175 | static
176 | unsigned trickle_up(struct heap* heap, unsigned i) {
177 |   if (i == 0) return 0;
178 |   unsigned pos = i;
179 |   unsigned parent_index = (i-1) / 2;
180 |   struct heap_element* node   = &heap->elements[i];
181 |   struct heap_element* parent = &heap->elements[parent_index]; // division should be efficient
182 |   if (heap->mode == MAX_HEAP) {
183 |     if (node->member > parent->member) {
184 |       swap_elements_in_heap(parent, node);
185 |       return trickle_up(heap, parent_index);
186 |     }
187 |   } else if (heap->mode == MIN_HEAP) {
188 |     if (node->member < parent->member) {
189 |       swap_elements_in_heap(parent, node);
190 |       return trickle_up(heap, parent_index);
191 |     }
192 |   }
193 |   return pos;
194 | }
195 | 
196 | bool belongs_to_this_heap(struct heap* heap, struct heap_element* elem) { // when we come from a queue connected to many heaps, we need to locate the heap that contains each element
197 |   return (elem >= heap->elements) && (elem < (heap->elements + heap->n_entries));
198 | }
199 | 
200 | void remove_front_element_from_heap(struct heap* heap, struct heap_element* dest) { // the circular queue still maintains its order, and simply skips over the entries that have already been extracted when it's their time to expire
201 |   if (heap->n_entries == 0) {
202 |     *dest = (struct heap_element) { .member = NAN, .loc_in_buffer = NULL };
203 |     return;
204 |   }
205 |   struct heap_element* last_node = heap->elements + heap->n_entries - 1;
206 |   struct heap_element* root_node = heap->elements;
207 |   //struct heap_element extremum = *root_node;
208 |   swap_elements_in_heap(root_node, last_node);
209 |   heap->n_entries -= 1;
210 |   trickle_down(heap, 0);
211 |   //*extremum.loc_in_buffer = NULL; // clear our entry in the queue so that it doesn't mess up the guy that takes our address. keep track of loc_in_buffer so that it can be updated later.
212 |   swap_elements_in_heap(last_node, dest); // `last_node` cannot be affected by the trickler
213 |   if (last_node->loc_in_buffer != NULL) {
214 |     // since `swap_elements_in_heap` is quite aggressive with restoring previously-null queue entries, clear this out for the case that it is never brought back
215 |     // honestly, the (teleological! post hoc?) reasoning behind this line is a little confusing
216 |     *last_node->loc_in_buffer = NULL;
217 |   }
218 | }
219 | 
220 | double view_front_of_heap(struct heap* heap) {
221 |   if (heap->n_entries == 0)
222 |     return NAN;
223 |   return heap->elements[0].member;
224 | }
225 | 
226 | struct heap_element* add_value_to_heap(struct heap* heap, double value) {
227 |   // note: cannot swap into this local variable, even though its own `loc_in_buffer` is empty
228 |   struct heap_element new_entry = {
229 |     .member = value,
230 |     .loc_in_buffer = NULL };
231 |   return add_element_to_heap(heap, new_entry);
232 | }
233 | 
234 | // there is a shortcut path for inserting and then immediately extracting. Consider implementing that as a special case.
235 | struct heap_element* add_element_to_heap(struct heap* heap, struct heap_element new_elem) { // returns new heap element, not -> if it popped its oldest member in order to make space, then it returns heap_element with loc_in_buffer repurposed to act like an optional value's flag
236 |   if (heap->n_entries == heap->size)
237 |     return NULL; // (struct heap_element) { .member = NAN, .loc_in_buffer = NULL };
238 |   unsigned index_to_place = heap->n_entries;
239 |   heap->elements[index_to_place] = new_elem;
240 |   if (new_elem.loc_in_buffer != NULL) { // if this element was taken from a different heap, rectify its stale pointer. do it before trickling up, so that the correct pointer is propagated
241 |     *new_elem.loc_in_buffer = heap->elements + index_to_place;
242 |   }
243 |   heap->n_entries += 1;
244 |   unsigned index_placed = trickle_up(heap, index_to_place);
245 |   return heap->elements + index_placed;
246 | }
247 | 
248 | void register_in_queue(struct ring_buffer* queue, struct heap_element* elem) {
249 |   queue->n_entries += 1;
250 |   elem->loc_in_buffer = get_next_position_in_ring_buffer(queue);
251 |   *elem->loc_in_buffer = elem;
252 | }
253 | 
254 | /*
255 |   Return value.
256 |   -> if -1, the queue was already empty
257 |   -> if 0, the expired entry did not belong to a heap
258 |   -> if positive, then the index of the expired entry's heap (1-based)
259 |  */
260 | int expire_stale_entry_in_queue(struct ring_buffer* queue, unsigned n_heaps, ...) {
261 |   //if (!is_ring_buffer_full(queue)) drastic change of behavior since this...
262 |   //  return true;
263 |   if (is_ring_buffer_empty(queue))
264 |     return -1;
265 |   struct heap_element* oldest_elem = extract_oldest_entry_from_ring_buffer(queue);
266 |   if (oldest_elem == NULL)
267 |     return -1;
268 |   if (queue->n_entries > 0) { // this better not happen, but have a safeguard just in case...
269 |     queue->n_entries -= 1;
270 |   }
271 |   va_list heaps;
272 |   va_start(heaps, n_heaps);
273 |   unsigned i;
274 |   for (i = 0; i < n_heaps; i += 1) {
275 |     struct heap* heap = va_arg(heaps, struct heap*);
276 |     if (!belongs_to_this_heap(heap, oldest_elem))
277 |       continue;
278 |     //*oldest_elem->loc_in_buffer = NULL; // signal that it's already been removed. since we already advanced the buffer, we may not have to do this in practice.
279 |     struct heap_element* last_elem = heap->elements + heap->n_entries - 1;
280 |     heap->n_entries -= 1;
281 |     if (last_elem != oldest_elem) {
282 |       double oldest_value = oldest_elem->member;
283 |       double last_value = last_elem->member;
284 |       *oldest_elem = *last_elem; // last_entry will stay in the queue after another entry is added, since oldest_entry will be thrown instead. no need to void last_entry since we'll immediately add a new one on top of it
285 |       *last_elem->loc_in_buffer = oldest_elem; // in the end, we are swapping without care for the ultimate contents of the old last_elem
286 |       unsigned index_of_oldest = oldest_elem - heap->elements;
287 |       if ((heap->mode == MIN_HEAP && oldest_value < last_value) ||
288 |           (heap->mode == MAX_HEAP && oldest_value > last_value)) {
289 |         trickle_down(heap, index_of_oldest); // we moved the last guy on top of the oldest, so we may have to trickle it down again
290 |       } else {
291 |         trickle_up(heap, index_of_oldest); // DID THIS: can I somehow avoid having to do both? as it is, the last element that got transplanted may have to go up or down depending on which parent it lands. I know! If this really becomes an issue, I may compare this element to the previous occupant to know which direction it should take.
292 |       }
293 |     }
294 |     break;
295 |   }
296 |   va_end(heaps);
297 |   if (i < n_heaps) { // did we locate an owner heap?
298 |     return (int)(i + 1);
299 |   } else {
300 |     return 0;
301 |   }
302 | }
303 | 
304 | bool verify_heap(struct heap* heap) {
305 |   for (unsigned i = 0; i < heap->n_entries; i += 1) {
306 |     unsigned left_child = 2*i + 1;
307 |     unsigned right_child = 2*i + 2;
308 |     if (heap->mode == MAX_HEAP &&
309 |        ((left_child < heap->n_entries && heap->elements[i].member < heap->elements[left_child].member) ||
310 |        (right_child < heap->n_entries && heap->elements[i].member < heap->elements[right_child].member))) {
311 |       return false;
312 |     }
313 |     if (heap->mode == MIN_HEAP &&
314 |        ((left_child < heap->n_entries && heap->elements[i].member > heap->elements[left_child].member) ||
315 |        (right_child < heap->n_entries && heap->elements[i].member > heap->elements[right_child].member))) {
316 |       return false;
317 |     }
318 |   }
319 |   return true;
320 | }
321 | 


--------------------------------------------------------------------------------
/src/heap.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Copyright 2021 Myrl Marmarelis
 3 | 
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License.
15 |  */
16 | 
17 | #ifndef HEAP_H
18 | #define HEAP_H
19 | 
20 | #include <stddef.h> // no need to hassle over the myriad of different data types provided here, which seem to matter most in the stylized abstract world of the C standard
21 | #include <stdbool.h>
22 | 
23 | enum heap_mode {
24 |   MAX_HEAP, MIN_HEAP
25 | };
26 | 
27 | typedef struct heap_element* ring_buffer_elem;
28 | 
29 | struct heap_element {
30 |   double member;
31 |   ring_buffer_elem* loc_in_buffer; // element is marked as nonexistent when this is set to null
32 | };
33 | 
34 | struct ring_buffer {
35 |   unsigned size; // could've called this the capacity
36 |   unsigned n_entries;
37 |   ring_buffer_elem* head;
38 |   ring_buffer_elem entries[]; // the alternative would be preprocessor magic with fixed sizes, but I don't think that gives us much benefit for the cost it bears.
39 | };
40 | 
41 | struct heap { // I like this simple naming scheme best.
42 |   enum heap_mode mode;
43 |   unsigned size;
44 |   unsigned n_entries; // multiple heaps may share a queue, so we need to maintain our own set of counting statistics
45 |   struct ring_buffer* queue; // sadly, this must be a pointer in order to remain standard C because ring_buffer is also variably sized.
46 |   struct heap_element elements[]; // keep all data in one contiguous block---one less layer of indirection (funny grammer, since we would otherwise say "fewer layers")
47 | };
48 | 
49 | 
50 | // Let's see how rusty my C(++) is. This shall take advantage of the most elegant parts of C17 (i.e. C11.) Feels nice to get back into the groove!
51 | // Const-correctness is a pain in the ass. Instead, I shall trust myself to properly use my interfaces.
52 | 
53 | bool belongs_to_this_heap(struct heap* heap, struct heap_element* elem);
54 | struct heap_element* add_value_to_heap(struct heap* heap, double value);
55 | struct heap_element* add_element_to_heap(struct heap* heap, struct heap_element new_elem); // this and the below should not remove from the conveyor-belt queue, since adding it back would cause it to lose its original position.
56 | void remove_front_element_from_heap(struct heap* heap, struct heap_element* destination); // swaps into the destination slot. no longer returns by value to signal transfer of ownership. all these methods exposed gives granular control to the operator
57 | double view_front_of_heap(struct heap* heap);
58 | bool is_ring_buffer_full(struct ring_buffer* queue);
59 | bool is_ring_buffer_empty(struct ring_buffer* queue);
60 | void advance_ring_buffer(struct ring_buffer* queue);
61 | void register_in_queue(struct ring_buffer* queue, struct heap_element* elem); // modifies element to point to a fresh spot on the queue. will expire on its own after some time.
62 | int expire_stale_entry_in_queue(struct ring_buffer* queue, unsigned n_heaps, ...); // pass pointers to all of the heaps attached to this queue
63 | struct ring_buffer* create_queue(unsigned size);
64 | struct heap* create_heap(enum heap_mode mode, unsigned size, struct ring_buffer* queue);
65 | bool verify_heap(struct heap* heap);
66 | void destroy_queue(struct ring_buffer* queue);
67 | void destroy_heap(struct heap* heap);
68 | 
69 | #endif
70 | 


--------------------------------------------------------------------------------
/src/python.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |   Copyright 2021 Myrl Marmarelis
  3 | 
  4 |   Licensed under the Apache License, Version 2.0 (the "License");
  5 |   you may not use this file except in compliance with the License.
  6 |   You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 |   Unless required by applicable law or agreed to in writing, software
 11 |   distributed under the License is distributed on an "AS IS" BASIS,
 12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |   See the License for the specific language governing permissions and
 14 |   limitations under the License.
 15 |  */
 16 | 
 17 | #define PY_SSIZE_T_CLEAN
 18 | #include "Python.h"
 19 | #include "structmember.h"
 20 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 21 | #include "numpy/ndarrayobject.h"
 22 | #include "numpy/ufuncobject.h"
 23 | 
 24 | #include "filter.h"
 25 | 
 26 | #include <stdbool.h>
 27 | 
 28 | // Bypass the need for highly scalable storage of overwhelming data streams!
 29 | // Highly verbose, "bare metal" Python bindings.
 30 | 
 31 | struct description {
 32 |   PyObject_HEAD
 33 |   // can I just compose this with the actual underlying type `struct cascade_description`?
 34 |   unsigned window;
 35 |   unsigned portion;
 36 |   unsigned subsample_rate;
 37 |   double quantile;
 38 |   double alpha;
 39 |   double beta;
 40 | };
 41 | 
 42 | static PyMemberDef description_members[] = { // base class of HighPass and LowPass
 43 |   {
 44 |     "window", T_UINT, offsetof(struct description, window), 0,
 45 |     "window size"
 46 |   }, {
 47 |     "portion", T_UINT, offsetof(struct description, portion), 0,
 48 |     "rank for the quantile element out of the window size"
 49 |   }, {
 50 |     "subsample_rate", T_UINT, offsetof(struct description, subsample_rate), 0,
 51 |     "every how many data points to subsample"
 52 |   }, {
 53 |     "quantile", T_DOUBLE, offsetof(struct description, quantile), 0,
 54 |     "target quantile to achieve by linear interpolation; setting this ignores `portion`"
 55 |   }, {
 56 |     "alpha", T_DOUBLE, offsetof(struct description, alpha), 0,
 57 |     "interpolation parameter, 0 <= alpha <= 1"
 58 |   }, {
 59 |     "beta", T_DOUBLE, offsetof(struct description, beta), 0,
 60 |     "interpolation parameter, 0 <= beta <= 1"
 61 |   }, {NULL}
 62 | };
 63 | 
 64 | static int description_init(struct description* self, PyObject* args, PyObject* kwds) {
 65 |   static char* keyword_list[] = {
 66 |     "window", "portion", "subsample_rate", "quantile", "alpha", "beta", NULL};
 67 |   unsigned window = 0;
 68 |   unsigned portion = 0;
 69 |   unsigned subsample_rate = 1;
 70 |   double quantile = NAN;
 71 |   double alpha = 1.0;
 72 |   double beta = 1.0;
 73 |   // specify optional '|' and then keyword-only '$' arguments
 74 |   if (!PyArg_ParseTupleAndKeywords(args, kwds, "|$IIIddd", keyword_list,
 75 |       &window, &portion, &subsample_rate, &quantile, &alpha, &beta)) {
 76 |     PyErr_SetString(PyExc_TypeError,
 77 |       "invalid arguments passed to Description (either LowPass or HighPass) constructor");
 78 |     return -1;
 79 |   }
 80 |   if (window == 0) {
 81 |     PyErr_SetString(PyExc_ValueError, "please set a positive window size");
 82 |     return -1;
 83 |   }
 84 |   self->window = window;
 85 |   self->portion = portion;
 86 |   self->subsample_rate = subsample_rate;
 87 |   self->quantile = quantile;
 88 |   self->alpha = alpha;
 89 |   self->beta = beta; // my current setup is a little redundant; for instance, I could pass &self->beta directly
 90 |   return 0;
 91 | }
 92 | 
 93 | static PyTypeObject description_type = {
 94 |   PyVarObject_HEAD_INIT(NULL, 0) // funky macro
 95 |   .tp_name = "triton.Description",
 96 |   .tp_doc = "Base filter description. Do not use this directly; it enables subclasses that act like algebraic data types.",
 97 |   .tp_basicsize = sizeof(struct description),
 98 |   .tp_itemsize = 0, // for variably sized objects
 99 |   .tp_flags = Py_TPFLAGS_DEFAULT,
100 |   .tp_new = PyType_GenericNew,
101 |   .tp_members = description_members,
102 |   .tp_init = (initproc)description_init,
103 | };
104 | 
105 | bool init_description(PyObject* self) {
106 |   if (PyType_Ready(&description_type) < 0)
107 |     return false;
108 |   Py_INCREF(&description_type);
109 |   if (PyModule_AddObject(self, "Description", (PyObject*) &description_type) < 0) {
110 |     Py_DECREF(&description_type);
111 |     return false;
112 |   }
113 |   return true;
114 | }
115 | 
116 | struct high_pass {
117 |   // first member defines and enables subclassing. now this type is polymorphic and may be cast as a `struct description`
118 |   struct description description;
119 |   // I probably do not even need a new struct type, but I keep it here in case I wish to extend it later.
120 | };
121 | 
122 | static PyTypeObject high_pass_type = {
123 |   PyVarObject_HEAD_INIT(NULL, 0) // funky macro
124 |   .tp_name = "triton.HighPass",
125 |   .tp_doc = "High-pass filter description.",
126 |   .tp_basicsize = sizeof(struct high_pass),
127 |   .tp_itemsize = 0, // for variably sized objects
128 |   .tp_flags = Py_TPFLAGS_DEFAULT,
129 |   .tp_new = PyType_GenericNew,
130 |   .tp_members = description_members, // just reuse the description struct
131 | };
132 | 
133 | bool init_high_pass(PyObject* self) {
134 |   high_pass_type.tp_base = &description_type; // must be set at runtime, not statically
135 |   if (PyType_Ready(&high_pass_type) < 0)
136 |     return false;
137 |   Py_INCREF(&high_pass_type);
138 |   if (PyModule_AddObject(self, "HighPass", (PyObject*) &high_pass_type) < 0) {
139 |     Py_DECREF(&high_pass_type);
140 |     return false;
141 |   }
142 |   return true;
143 | }
144 | 
145 | struct low_pass {
146 |   struct description description;
147 | };
148 | 
149 | static PyTypeObject low_pass_type = {
150 |   PyVarObject_HEAD_INIT(NULL, 0) // funky macro
151 |   .tp_name = "triton.LowPass",
152 |   .tp_doc = "Low-pass filter description.",
153 |   .tp_basicsize = sizeof(struct description),
154 |   .tp_itemsize = 0, // for variably sized objects
155 |   .tp_flags = Py_TPFLAGS_DEFAULT,
156 |   .tp_new = PyType_GenericNew,
157 |   .tp_members = description_members, // just reuse the description struct
158 | };
159 | 
160 | bool init_low_pass(PyObject* self) {
161 |   low_pass_type.tp_base = &description_type; // must be set at runtime, not statically
162 |   if (PyType_Ready(&low_pass_type) < 0)
163 |     return false;
164 |   Py_INCREF(&low_pass_type);
165 |   if (PyModule_AddObject(self, "LowPass", (PyObject*) &low_pass_type) < 0) {
166 |     Py_DECREF(&low_pass_type);
167 |     return false;
168 |   }
169 |   return true;
170 | }
171 | 
172 | /*
173 |   I have decided against providing a `ufunc` method to the Pipeline object for feeding,
174 |   not only because that would be a pain in the wrong place, but also because the semantics
175 |   are mismatched. I do not want to vectorize over arbitrary dimensions. I shall take in either
176 |   a single value, a generator of values, or a unidimensional array of values. No more, no less.
177 |  */
178 | 
179 | struct pipeline {
180 |   PyObject_HEAD
181 |   struct filter_pipeline* filters;
182 |   unsigned stride;
183 |   double lag; // in agnostic time units, increments of one half (since we bisect the window)
184 | };
185 | 
186 | static PyMemberDef pipeline_members[] = { // base class of HighPass and LowPass
187 |   {
188 |     "stride", T_UINT, offsetof(struct pipeline, stride), READONLY,
189 |     "the total stride between subsamples: unit if no subsampling occurs"
190 |   }, {
191 |     "lag", T_DOUBLE, offsetof(struct pipeline, lag), READONLY,
192 |     "the effective lag time between the pipeline's output and its input, for a balanced filter"
193 |     // the moment it's received. balanced -> zero-phase or something like that?
194 |   }, {NULL}
195 | };
196 | 
197 | static PyObject* pipeline_new(PyTypeObject* type, PyObject* args, PyObject* kwds) {
198 |   struct pipeline* self = (struct pipeline*)type->tp_alloc(type, 0);
199 |   if (self == NULL)
200 |     return NULL;
201 |   self->filters = NULL;
202 |   return (PyObject*)self;
203 | }
204 | 
205 | /*
206 |   Construct with keyword arguments.
207 |   Do I need to call INCREF or DECREF on the arguments here? I'm following the philosophy that they should flow right through me.
208 |  */
209 | static int pipeline_init(struct pipeline* self, PyObject* args, PyObject* kwds) {
210 |   if (!PyTuple_Check(args))
211 |     return -1;
212 |   Py_ssize_t n_filters = PyTuple_Size(args);
213 |   struct cascade_description* descriptions = malloc(n_filters * sizeof(struct cascade_description));
214 |   unsigned stride = 1;
215 |   double lag = 0.0;
216 |   // double cascading_rate = 1.0; do the whole real-units shebang with a higher-level description structure
217 |   for (Py_ssize_t i = 0; i < n_filters; i += 1) {
218 |     PyObject* item = PyTuple_GetItem(args, i);
219 |     if (item == NULL) {
220 |       PyErr_SetString(PyExc_TypeError, "encountered a null description");
221 |       return -1;
222 |     }
223 |     struct description* desc_item = (struct description*)item;
224 |     if (PyObject_TypeCheck(item, &description_type)) { // can I just access it straight?
225 |       descriptions[i].window = desc_item->window;
226 |       descriptions[i].portion = desc_item->portion;
227 |       descriptions[i].subsample_rate = desc_item->subsample_rate;
228 |       descriptions[i].interpolation = (struct interpolation) {
229 |         .target_quantile = desc_item->quantile,
230 |         .alpha = desc_item->alpha,
231 |         .beta = desc_item->beta };
232 |       lag += 0.5 * (double)(desc_item->window * stride); // buildup/cascade/waterfall of lags
233 |       stride *= desc_item->subsample_rate;
234 |     }
235 |     //switch (item->ob_type) {
236 |     //  case &high_pass_type: {
237 |     if (PyObject_TypeCheck(item, &high_pass_type)) { // allows for subtypes as well, as opposed to item->ob_type equality checks
238 |       descriptions[i].mode = HIGH_PASS;
239 |     } else if (PyObject_TypeCheck(item, &low_pass_type)) {
240 |       descriptions[i].mode = LOW_PASS;
241 |     } else {
242 |       PyErr_SetString(PyExc_TypeError, "one of the descriptions is neither a HighPass nor a LowPass");
243 |       return -1;
244 |     }
245 |   }
246 |   self->filters = create_filter_pipeline((unsigned)n_filters, descriptions);
247 |   if (self->filters == NULL) {
248 |     PyErr_SetString(PyExc_ValueError, "invalid descriptions passed to pipeline constructor");
249 |     return -1;
250 |   }
251 |   self->stride = stride;
252 |   self->lag = lag;
253 |   return 0;
254 | }
255 | 
256 | // there is also .tp_finalize that is better suited to deconstructors that perform complex interactions with Python objects
257 | static void pipeline_dealloc(struct pipeline* self) {
258 |   destroy_filter_pipeline(self->filters);
259 |   Py_TYPE(self)->tp_free(self); // why is the TYPE macro needed? in case of multiple inheritance (composition)?
260 | }
261 | 
262 | static PyObject* pipeline_repr(struct pipeline* self) {
263 |   static const char* format = "FilterPipeline(<%d cascades>)"; // each cascade consits of a filter and a subsample
264 |   return PyUnicode_FromFormat(format, self->filters->n_filters);
265 | }
266 | 
267 | // use the fastcall convention, because why the heck not (Python 3.7+). take in a constant array of PyObject pointers.
268 | /*
269 |   Currently I accept a scalar or an NumPy array. In the future, I would like to consume a boolean `inplace` parameter
270 |   for the latter instance to allow me to modify the array in place without creating a new one.
271 | 
272 |   I should consider checking the Python version with macros, and falling back to a traditional-style (not fastcall)
273 |   method definition for versions prior to 3.7.
274 |  */
275 | static PyObject* pipeline_feed(struct pipeline* self, PyObject* const* args, Py_ssize_t n_args) {
276 |   if (n_args != 1) {
277 |     PyErr_SetString(PyExc_NotImplementedError, "pipeline.feed(*) only accepts a singular argument"); // ValueError?
278 |     return NULL;
279 |   }
280 |   if (PyFloat_Check(args[0]) || PyLong_Check(args[0])) {
281 |     double input = PyFloat_AsDouble(args[0]); // implicitly converts integers and other related types
282 |     double output = feed_filter_pipeline(self->filters, input);
283 |     return PyFloat_FromDouble(output);
284 |   }
285 |   if (PyArray_Check(args[0])) {
286 |     PyArrayObject* array = (PyArrayObject*)args[0];
287 |     if (PyArray_NDIM(array) > 1) {
288 |       PyErr_SetString(PyExc_ValueError, "array can't have multiple dimensions");
289 |       return NULL;
290 |     }
291 |     //PyArrayObject* output_array = PyArray_NewLikeArray(array, NPY_KEEPORDER, NULL, 1);
292 |     if (PyArray_Size((PyObject*)array) == 0) {
293 |       return (PyObject*)array; // nothing to do
294 |     }
295 |     PyArrayObject* array_operands[2];
296 |     array_operands[0] = array;
297 |     array_operands[1] = NULL; // second operand will be designated as the output, and allocated automatically by the iterator
298 |     npy_uint32 op_flags[2];
299 |     op_flags[0] = NPY_ITER_READONLY;
300 |     op_flags[1] = NPY_ITER_WRITEONLY | NPY_ITER_ALLOCATE;
301 |     PyArray_Descr* op_desc[2];
302 |     op_desc[0] = PyArray_DescrFromType(NPY_DOUBLE);
303 |     op_desc[1] = PyArray_DescrFromType(NPY_DOUBLE); // cast to double and output double
304 |     NpyIter* iterator = NpyIter_MultiNew(2, array_operands,
305 |       // no seperate external "inner loop", as we treat it all like a flat array.
306 |       // is there any impact on efficiency for our use-case, to keep advancing the iterator for each element?
307 |       NPY_ITER_REFS_OK|NPY_ITER_BUFFERED, // buffered to allow casting on the fly
308 |       // is KEEPORDER the right thing here (or significant), when I treat the array as a 1D ordered sequence?
309 |       NPY_KEEPORDER, NPY_SAME_KIND_CASTING, op_flags, op_desc);
310 |       // for NpyIter_New (not the above), the final `NULL` is for an error-message output argument
311 |     Py_DECREF(op_desc[0]);
312 |     Py_DECREF(op_desc[1]);
313 |     if (iterator == NULL) {
314 |       PyErr_SetString(PyExc_ValueError, "could not initialize an iterator on the array");
315 |       return NULL;
316 |     }
317 |     NpyIter_IterNextFunc* iter_next = NpyIter_GetIterNext(iterator, NULL);
318 |     if (iter_next == NULL) {
319 |       NpyIter_Deallocate(iterator);
320 |       PyErr_SetString(PyExc_ValueError, "could not initialize the iterator `next function` on the array");
321 |       return NULL;
322 |     }
323 |     double** data = (double**)NpyIter_GetDataPtrArray(iterator);
324 |     do {
325 |       double input = *data[0];
326 |       double* output = data[1];
327 |       // interspersed with NaNs to maintain harmony and consistency with the general API
328 |       *output = feed_filter_pipeline(self->filters, input);
329 |     } while (iter_next(iterator));
330 |     PyArrayObject* output_array = NpyIter_GetOperandArray(iterator)[1];
331 |     Py_INCREF(output_array);
332 |     // only call this after incrementing its output's reference count
333 |     if (NpyIter_Deallocate(iterator) != NPY_SUCCEED) {
334 |       Py_DECREF(output_array);
335 |       return NULL;
336 |     }
337 |     return (PyObject*)output_array;
338 |   }
339 |   // numeric lists are not supported yet. at this point, just do generators and comprehensions.
340 |   // no extra performance benefits would be afforded.
341 |   PyErr_SetString(PyExc_TypeError, "please pass a number or unidimensional np.array to pipeline.feed(*)");
342 |   return NULL;
343 | }
344 | 
345 | static struct PyMethodDef pipeline_methods[] = {
346 |   {"feed", (PyCFunction)pipeline_feed, METH_FASTCALL, // not truly a PyCFunction, due to METH_FASTCALL ...?
347 |     "Feed a value, or a series thereof (array, list, generator,) into the filter pipeline."},
348 |   {NULL, NULL, 0, NULL} // sentinel
349 | };
350 | 
351 | static PyTypeObject pipeline_type = {
352 |   PyVarObject_HEAD_INIT(NULL, 0)
353 |   .tp_name = "triton.Pipeline",
354 |   .tp_doc = "A filter pipeline.",
355 |   .tp_basicsize = sizeof(struct pipeline),
356 |   .tp_itemsize = 0, // for variably sized objects
357 |   .tp_flags = Py_TPFLAGS_DEFAULT,
358 |   .tp_methods = pipeline_methods,
359 |   .tp_members = pipeline_members,
360 |   .tp_init = (initproc)pipeline_init,
361 |   .tp_new = pipeline_new,
362 |   .tp_del = (destructor)pipeline_dealloc,
363 |   .tp_repr = (reprfunc)pipeline_repr,
364 | };
365 | 
366 | bool init_pipeline(PyObject* self) {
367 |   if (PyType_Ready(&pipeline_type) < 0)
368 |     return false;
369 |   Py_INCREF(&pipeline_type);
370 |   if (PyModule_AddObject(self, "Pipeline", (PyObject*) &pipeline_type) < 0) {
371 |     Py_DECREF(&pipeline_type);
372 |     return false;
373 |   }
374 |   return true;
375 | }
376 | 
377 | 
378 | // unused in `module` structure below. things can be added dynamically upon initialization
379 | static struct PyMethodDef methods[] = {
380 |   {NULL, NULL, 0, NULL} // sentinel
381 | };
382 | 
383 | static struct PyModuleDef module = {
384 |   PyModuleDef_HEAD_INIT,
385 |   .m_name = "triton", // is this triton ir rolling_quantiles.triton ?
386 |   .m_doc = "The blazing-fast filter implementation.", // docs
387 |   .m_size = 0, // memory required for global state. we don't use any.
388 |   .m_methods = methods,
389 | };
390 | 
391 | 
392 | PyMODINIT_FUNC PyInit_triton(void) {
393 |   PyObject* self =  PyModule_Create(&module);
394 |   import_array();
395 |   static bool (*type_initializers[])(PyObject*) = { // array of function pointers
396 |     init_description, init_high_pass, init_low_pass, init_pipeline, NULL
397 |   };
398 |   bool (**init)(PyObject*) = &type_initializers[0];
399 |   while (*init != NULL) {
400 |     if (!(*init)(self)) {
401 |       Py_DECREF(self);
402 |       return NULL;
403 |     }
404 |     ++init;
405 |   }
406 |   return self;
407 | }
408 | 


--------------------------------------------------------------------------------
/src/quantile.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |   Copyright 2021 Myrl Marmarelis
  3 | 
  4 |   Licensed under the Apache License, Version 2.0 (the "License");
  5 |   you may not use this file except in compliance with the License.
  6 |   You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 |   Unless required by applicable law or agreed to in writing, software
 11 |   distributed under the License is distributed on an "AS IS" BASIS,
 12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |   See the License for the specific language governing permissions and
 14 |   limitations under the License.
 15 |  */
 16 | 
 17 | #include "quantile.h"
 18 | #include "heap.h"
 19 | 
 20 | #include <stdlib.h>
 21 | #include <tgmath.h>
 22 | #include <stdbool.h>
 23 | #include <stdarg.h>
 24 | #include <stdio.h>
 25 | 
 26 | const struct interpolation NO_INTERPOLATION = { .target_quantile = NAN };
 27 | 
 28 | struct rolling_quantile create_rolling_quantile_monitor(unsigned window, unsigned portion, struct interpolation interp) {
 29 |   //if (window % 2 == 0) this only makes sense for the median special case.
 30 |   //  return NULL;
 31 |   struct ring_buffer* queue = create_queue(window);
 32 |   struct rolling_quantile monitor = {
 33 |     .queue = queue,
 34 |     .left_heap = create_heap(MAX_HEAP, portion + 1, queue),
 35 |     .right_heap = create_heap(MIN_HEAP, window - portion, queue), // - 1 and then + 1
 36 |     .current_value = (struct heap_element) {.member = NAN, .loc_in_buffer = NULL}, // to keep track of queue position
 37 |     .window = window,
 38 |     .portion = portion,
 39 |     .count = 0,
 40 |     .interpolation = interp,
 41 |   };
 42 |   return monitor;
 43 | }
 44 | 
 45 | void destroy_rolling_quantile_monitor(struct rolling_quantile* monitor) {
 46 |   destroy_heap(monitor->left_heap);
 47 |   destroy_heap(monitor->right_heap);
 48 |   destroy_queue(monitor->queue);
 49 | }
 50 | 
 51 | static bool is_between_zero_and_one(double val) { // null and unit
 52 |   return (val >= 0.0) && (val <= 1.0);
 53 | }
 54 | 
 55 | bool validate_interpolation(struct interpolation interp) {
 56 |   return isnan(interp.target_quantile) || (
 57 |     is_between_zero_and_one(interp.target_quantile) &&
 58 |     is_between_zero_and_one(interp.alpha) &&
 59 |     is_between_zero_and_one(interp.beta));
 60 | }
 61 | 
 62 | double compute_interpolation_target(unsigned window, struct interpolation interp) {
 63 |   double real_portion = (double)window * interp.target_quantile;
 64 |   double correction = interp.alpha +
 65 |     interp.target_quantile*(1.0 - interp.alpha - interp.beta);
 66 |   return real_portion + correction;
 67 | }
 68 | 
 69 | static double interpolate_current_rolling_quantile(struct rolling_quantile* monitor) {
 70 |   struct interpolation interp = monitor->interpolation; // is copy worth the locality?
 71 |   double target = compute_interpolation_target(monitor->window, interp);
 72 |   double gamma = target - floor(target); // must be between 0 and 1, but avoid checking for the sake of performance
 73 |   int index = (int)floor(target) - 1; // subtract one because `portion` refers to the number of items in the left heap (but `target_portion` does *not*)
 74 |   int portion = (int)monitor->portion;
 75 |   double current = monitor->current_value.member;
 76 |   if (index == portion) {
 77 |     if (monitor->right_heap->n_entries == 0)
 78 |       return current;
 79 |     double next = view_front_of_heap(monitor->right_heap);
 80 |     return (1.0-gamma)*current + gamma*next;
 81 |   } else if (index == (portion-1)) {
 82 |     if (monitor->left_heap->n_entries == 0)
 83 |       return current;
 84 |     double previous = view_front_of_heap(monitor->left_heap);
 85 |     return (1.0-gamma)*previous + gamma*current;
 86 |   }
 87 |   return NAN; // monitor.portion is uncalibrated/corrupted
 88 | }
 89 | 
 90 | /*
 91 |   Game plan.
 92 |     We shall first expel the stale entry, then add the new entry to its rightful receptacle based on its ordering wrt the current value.
 93 |     If a NaN is added, we will simply count it as a cycle without a new observation: old will be expelled with no replenishing.
 94 |     *Do not* contaminate the heaps with NaNs. That may cause their rebalancing to spiral out of control.
 95 |     Flushing. If the whole window empties, effectively reset the filter and revert `current_value` to its initial state.
 96 | */
 97 | double update_rolling_quantile(struct rolling_quantile* monitor, double next_entry) {
 98 |   //unsigned left_entries = monitor->left_heap->n_entries;
 99 |   unsigned right_entries = monitor->right_heap->n_entries;
100 |   //unsigned total_entries = left_entries + right_entries + 1;
101 |   // we control the advancement ourselves, since it must happen exactly once per call to this method
102 |   // this makes life much easier than engineering an overly clever ring-buffer interface
103 |   advance_ring_buffer(monitor->queue);
104 |   if (isnan(monitor->current_value.member)) { // total_entries will be 1 regardless of whether current_value has anything in it. we want to be careful, since NaNs will also signal missing values coming in
105 |     if (isnan(next_entry))
106 |       return NAN;
107 |     monitor->current_value.member = next_entry;
108 |     register_in_queue(monitor->queue, &monitor->current_value);
109 |     monitor->count += 1;
110 |     return next_entry;
111 |   }
112 |   int expired_in_heap = expire_stale_entry_in_queue(monitor->queue, 2, monitor->left_heap, monitor->right_heap);
113 |   if (expired_in_heap == 0) { // expired, but did not belong to a heap
114 |     if (monitor->queue->n_entries == 0) { // there do not exist other entries
115 |       // basically reset and go again
116 |       monitor->current_value.member = NAN;
117 |       return update_rolling_quantile(monitor, next_entry); // a delicate corner case, looping us back to the top. tread carefully
118 |     }
119 |     struct heap* some_heap = (right_entries > 0)? monitor->right_heap : monitor->left_heap; // pick arbitrarily
120 |     remove_front_element_from_heap(some_heap, &monitor->current_value);
121 |   } // else if (expired_in_heap == -1) { ... } // there was nothing to expire
122 |   if (!isnan(next_entry)) {
123 |     struct heap* heap_for_next = (next_entry > monitor->current_value.member)? monitor->right_heap : monitor->left_heap;
124 |     struct heap_element* next_elem = add_value_to_heap(heap_for_next, next_entry);
125 |     if (next_elem == NULL) // BY DESIGN SHOULD NEVER HAPPEN
126 |       printf("TRIED TO ADD TO A FULL HEAP\n");
127 |     register_in_queue(monitor->queue, next_elem);
128 |   }
129 |   monitor->count += 1;
130 |   rebalance_rolling_quantile(monitor); // should run a provably deterministic number of times (once?)
131 |   if (!isnan(monitor->interpolation.target_quantile))
132 |     return interpolate_current_rolling_quantile(monitor);
133 |   return monitor->current_value.member;
134 | }
135 | 
136 | int rebalance_rolling_quantile(struct rolling_quantile* monitor) {
137 |   unsigned left_entries = monitor->left_heap->n_entries;
138 |   unsigned right_entries = monitor->right_heap->n_entries;
139 |   unsigned total_entries = left_entries + right_entries + 1;
140 |   unsigned left_target = (monitor->portion * total_entries) / monitor->window; // builds up gradually when the pipeline is not yet saturated
141 |   if (left_entries == left_target)
142 |     return 0; // if-clauses with lone return statements don't need brackets in my book
143 |   struct heap* overdue_heap = (left_entries < left_target)? monitor->right_heap : monitor->left_heap;
144 |   struct heap_element holdover = monitor->current_value;
145 |   remove_front_element_from_heap(overdue_heap, &monitor->current_value); // take from the correct heap to restore balance. expelled element is transferred into our current slot
146 |   struct heap* other_heap = (overdue_heap == monitor->right_heap)? monitor->left_heap : monitor->right_heap; // is it worth avoiding two separate branches of slightly redundant code?
147 |   if (!isnan(holdover.member)) {
148 |     // this part does not rely on the actual address of `holdover`/`current_value`, thankfully
149 |     add_element_to_heap(other_heap, holdover); // the method knows that `*holdover.loc_in_buffer` is stale after copying
150 |   }
151 |   return rebalance_rolling_quantile(monitor) + 1; // is non-tail-call recursion *always* dangerous? each round performs one set of "remove and add"
152 | }
153 | 
154 | /*
155 |   Consists of various sanity checks and tests on integrity.
156 | */
157 | bool verify_monitor(struct rolling_quantile* monitor) {
158 |   double left = view_front_of_heap(monitor->left_heap);
159 |   if (!isnan(left) && (left > monitor->current_value.member))
160 |    return false;
161 |   double right = view_front_of_heap(monitor->right_heap);
162 |   if (!isnan(right) && (right < monitor->current_value.member))
163 |     return false;
164 |   return verify_heap(monitor->left_heap) && verify_heap(monitor->right_heap);
165 | }
166 | 


--------------------------------------------------------------------------------
/src/quantile.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Copyright 2021 Myrl Marmarelis
 3 | 
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License.
15 |  */
16 | 
17 | #ifndef QUANTILE_H
18 | #define QUANTILE_H
19 | 
20 | #include "heap.h"
21 | 
22 | #include <stdbool.h>
23 | 
24 | /*
25 |   Composable (in pipelines/chains) rolling quantiles of arbitrary time scales.
26 | */
27 | 
28 | /*
29 |   Optional interpolation with (alpha, beta) parameters as post-processing to refine
30 |   the estimate. See the following:
31 |     https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample
32 |     https://github.com/scipy/scipy/blob/v1.6.1/scipy/stats/mstats_basic.py#L2607-L2732
33 | 
34 |   We assume that `target_quantile` and `monitor.portion` have been set such
35 |   that they are in agreement with one another.
36 |  */
37 | struct interpolation {
38 |   double target_quantile; // NaN if no interpolation is to be performed
39 |   double alpha;
40 |   double beta;
41 | };
42 | 
43 | extern const struct interpolation NO_INTERPOLATION;
44 | 
45 | // Can't hide this structure's implementation in quantile.c because we want to be able to handle it by value. Comprise other structures of it without having many layers of indirection.
46 | struct rolling_quantile {
47 |   struct heap_element current_value;
48 |   unsigned window;
49 |   unsigned portion;
50 |   struct ring_buffer* queue;
51 |   struct heap* left_heap;
52 |   struct heap* right_heap;
53 |   unsigned count;
54 |   struct interpolation interpolation; // store this optional setting without indirection.
55 | };
56 | 
57 | struct rolling_quantile create_rolling_quantile_monitor(unsigned window, unsigned portion, struct interpolation interp); // window should be an odd number. portion is how much probability mass goes to the left side, so (portion+0.5)/window gives the quantile.
58 | bool validate_interpolation(struct interpolation interp);
59 | double compute_interpolation_target(unsigned window, struct interpolation interp);
60 | double update_rolling_quantile(struct rolling_quantile* monitor, double entry);
61 | int rebalance_rolling_quantile(struct rolling_quantile* monitor); // returns the number of sifts and shifts it had to perform
62 | bool verify_monitor(struct rolling_quantile* monitor);
63 | void destroy_rolling_quantile_monitor(struct rolling_quantile* monitor);
64 | 
65 | #endif
66 | 


--------------------------------------------------------------------------------
/src/test.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |   Copyright 2021 Myrl Marmarelis
  3 | 
  4 |   Licensed under the Apache License, Version 2.0 (the "License");
  5 |   you may not use this file except in compliance with the License.
  6 |   You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 |   Unless required by applicable law or agreed to in writing, software
 11 |   distributed under the License is distributed on an "AS IS" BASIS,
 12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |   See the License for the specific language governing permissions and
 14 |   limitations under the License.
 15 |  */
 16 | 
 17 | #include "heap.h"
 18 | #include "quantile.h"
 19 | #include "filter.h"
 20 | 
 21 | #include <stdlib.h>
 22 | #include <stdio.h>
 23 | #include <math.h>
 24 | #include <time.h>
 25 | 
 26 | void test_single_heap(void) {
 27 |   struct ring_buffer* queue = create_queue(9);
 28 |   struct heap* heap = create_heap(MAX_HEAP, 10, queue);
 29 |   struct heap_element* elem;
 30 |   for (double i = 1.0; i < 15.0; i += 1.0) {
 31 |     elem = add_value_to_heap(heap, i);
 32 |     expire_stale_entry_in_queue(queue, 1, heap);
 33 |     register_in_queue(queue, elem);
 34 |   }
 35 |   struct heap_element output;
 36 |   for (unsigned i = 0; i < 10; i += 1) {
 37 |     remove_front_element_from_heap(heap, &output);
 38 |     printf("%f\n", output.member);
 39 |   }
 40 | }
 41 | 
 42 | void test_multiple_heaps(void) {
 43 |   struct ring_buffer* queue = create_queue(9);
 44 |   struct heap* heap1 = create_heap(MAX_HEAP, 10, queue);
 45 |   struct heap* heap2 = create_heap(MAX_HEAP, 10, queue);
 46 |   struct heap* heap = heap1;
 47 |   for (double i = 1.0; i < 50.0; i += 1.0) {
 48 |     heap = heap==heap1? heap2 : heap1;
 49 |     struct heap_element* elem = add_value_to_heap(heap, i);
 50 |     expire_stale_entry_in_queue(queue, 2, heap1, heap2);
 51 |     register_in_queue(queue, elem);
 52 |   }
 53 |   struct heap_element output;
 54 |   for (unsigned i = 0; i < 10; i += 1) {
 55 |     remove_front_element_from_heap(heap, &output);
 56 |     printf("%f\n", output.member);
 57 |   }
 58 | }
 59 | 
 60 | double generate_random_value(void) {
 61 |   return (double)rand() / (double)RAND_MAX;
 62 | }
 63 | 
 64 | void test_quantile(void) {
 65 |   printf("Testing...\n");
 66 |   struct rolling_quantile monitor = create_rolling_quantile_monitor(5, 2, NO_INTERPOLATION);
 67 |   double test_entries[] = {4.0, 2.0, 3.0, 2.5, 4.5, 3.5, 2.7, 3.9, 3.8, 3.1};
 68 |   unsigned test_size = sizeof(test_entries) / sizeof(double);
 69 |   for (unsigned i = 0; i < test_size; i += 1) {
 70 |     double quantile = update_rolling_quantile(&monitor, test_entries[i]);
 71 |     printf("%f\n", quantile);
 72 |   }
 73 | }
 74 | 
 75 | void stress_test_quantile_for_correctness(unsigned size, unsigned n_iterations) {
 76 |   printf("Stress-testing...\n");
 77 |   if (size % 2 == 0) size += 1;
 78 |   unsigned middle = (size-1)/2;
 79 |   struct rolling_quantile monitor = create_rolling_quantile_monitor(size, middle, NO_INTERPOLATION);
 80 |   double* window = malloc(size*sizeof(double));
 81 |   double* buffer = malloc(size*sizeof(double));
 82 |   bool* unsorted = malloc(size*sizeof(bool));
 83 |   unsigned window_pos = 0;
 84 |   for (unsigned i = 0; i < size; i += 1) {
 85 |     double value = generate_random_value();
 86 |     update_rolling_quantile(&monitor, value);
 87 |     window[i] = value;
 88 |   }
 89 |   for (unsigned t = 0; t < n_iterations; t += 1) {
 90 |     double value = generate_random_value();
 91 |     struct timespec timespec;
 92 |     clock_gettime(CLOCK_REALTIME, &timespec);
 93 |     double begin_time = (double)timespec.tv_sec + ((double)timespec.tv_nsec / 1e9);
 94 |     double pred_median = update_rolling_quantile(&monitor, value);
 95 |     clock_gettime(CLOCK_REALTIME, &timespec);
 96 |     double end_time = (double)timespec.tv_sec + ((double)timespec.tv_nsec / 1e9);
 97 |     printf("%.3e seconds; ", end_time - begin_time);
 98 |     window[window_pos++] = value;
 99 |     if (window_pos == size)
100 |       window_pos = 0;
101 |     // perform selection sort now, building up our one buffer
102 |     for (unsigned i = 0; i < size; i += 1)
103 |       unsorted[i] = true;
104 |     for (unsigned i = 0; i < size; i += 1) {
105 |       double min = INFINITY;
106 |       unsigned min_ind; // UNINITIALIZED
107 |       for (unsigned j = 0; j < size; j += 1) {
108 |         if ((window[j] <= min) && unsorted[j]) {
109 |           min = window[j];
110 |           min_ind = j;
111 |         }
112 |       }
113 |       buffer[i] = min;
114 |       unsorted[min_ind] = false;
115 |     }
116 |     // now buffer is sorted
117 |     double median = buffer[middle];
118 |     //for (unsigned i = 0; i < size; i += 1) printf("    %f    ", window[i]);
119 |     //for (unsigned i = 0; i < monitor->left_heap->n_entries; i += 1) printf("\n%f", monitor->left_heap->elements[i].member);
120 |     //printf("\n %f\n", monitor->current_value.member);
121 |     //for (unsigned i = 0; i < monitor->right_heap->n_entries; i += 1) printf("%f\n", monitor->right_heap->elements[i].member);
122 |     printf("%f %f %f %d %d\n", value, pred_median, median, pred_median==median, verify_monitor(&monitor));
123 |   }
124 | }
125 | 
126 | void test_pipeline(void) {
127 |   struct cascade_description descriptions[] = {
128 |     {.window = 5, .portion = 2, .subsample_rate = 2,
129 |       .mode = LOW_PASS, .interpolation = NO_INTERPOLATION},
130 |     {.window = 3,  .portion = 2, .subsample_rate = 1,
131 |       .mode = HIGH_PASS, .interpolation = NO_INTERPOLATION},
132 |   };
133 |   struct filter_pipeline* pipeline = create_filter_pipeline(2, descriptions);
134 |   double test_entries[] = {4.0, 2.0, 3.0, 2.5, 1.5, 1.2, 1.7, 0.9, 0.8, 1.1, 0.1, 0.3};
135 |   unsigned test_size = sizeof(test_entries) / sizeof(double);
136 |   for (unsigned i = 0; i < test_size; i += 1) {
137 |     double output = feed_filter_pipeline(pipeline, test_entries[i]);
138 |     printf("%f\n", output);
139 |   }
140 |   destroy_filter_pipeline(pipeline);
141 | }
142 | 
143 | void test_interpolating_pipeline(void) {
144 |   struct cascade_description descriptions[] = {
145 |     {.window = 3, .portion = 0, .subsample_rate = 1,
146 |       .mode = LOW_PASS, .interpolation = {
147 |         .target_quantile = 0.4, .alpha = 1.0, .beta = 1.0,
148 |       }},
149 |   };
150 |   struct filter_pipeline* pipeline = create_filter_pipeline(1, descriptions);
151 |   double test_entries[] = {4.0, 2.0, 3.0, 2.5, 1.5, 1.2, 1.7, 0.9, 0.8, 1.1, 0.1, 0.3};
152 |   unsigned test_size = sizeof(test_entries) / sizeof(double);
153 |   for (unsigned i = 0; i < test_size; i += 1) {
154 |     if (!verify_pipeline(pipeline)) {
155 |       printf("INVALID PIPELINE\n");
156 |     }
157 |     double output = feed_filter_pipeline(pipeline, test_entries[i]);
158 |     printf("%f\n", output);
159 |   }
160 |   destroy_filter_pipeline(pipeline);
161 | }
162 | 
163 | int main(void) {
164 |   test_quantile();
165 |   stress_test_quantile_for_correctness(3001, 10000);
166 |   //test_interpolating_pipeline();
167 | }
168 | 


--------------------------------------------------------------------------------