├── .github └── workflows │ ├── ci.yml │ ├── publish.yml │ └── purge-cache.yml.disabled ├── .gitignore ├── LICENSE ├── NOTICE ├── README.md ├── example.png ├── python ├── .gitignore ├── MANIFEST.in ├── README.md ├── examples │ ├── benchmark.py │ └── illustration.py ├── pyproject.toml ├── rolling_quantiles │ └── __init__.py ├── setup.cfg ├── setup.py └── tests │ ├── input.py │ ├── pytest.ini │ ├── requirements.txt │ ├── test_guards.py │ ├── test_highpass.py │ ├── test_interpolation.py │ └── test_lowpass.py └── src ├── filter.c ├── filter.h ├── heap.c ├── heap.h ├── python.c ├── quantile.c ├── quantile.h └── test.c /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Python Tests 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: # jobs run in parallel. individual steps within a job do not. 6 | build-and-test: # arbitrary name 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | matrix: 10 | os: [ubuntu-latest, macos-latest, windows-latest] # the matrix strategy takes essentially a cartesian product of these options 11 | python-version: [3.8] 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Set up Python ${{ matrix.python-version }}. 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: ${{ matrix.python-version }} 18 | - name: Build and install. 19 | shell: bash # not cmd/powershell that windows uses 20 | run: | 21 | python -m pip install --upgrade pip build 22 | cd python 23 | MACOSX_DEPLOYMENT_TARGET=10.9 python -m build --wheel 24 | python -m pip install dist/*.whl 25 | cd .. 26 | - name: Run tests. 27 | shell: bash 28 | run: | # pytest with no args. if it fails with a nonzero status code, that *should* pop up as a failure in Github's statuses 29 | cd python/tests 30 | python -m pip install -r requirements.txt 31 | pytest 32 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI 2 | 3 | on: 4 | release: 5 | types: [published] # when a release or pre-release published. don't mind the nuances that differentiate this from 'created' or 'released' 6 | 7 | jobs: 8 | build-and-store: 9 | runs-on: ${{ matrix.os }} 10 | strategy: 11 | matrix: 12 | os: [ubuntu-22.04, macos-latest, windows-latest] # use an old enough Linux distro to peacefully generate `manylinux` packages 13 | python-version: ["3.8", "3.9", "3.10"] 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Set up Python ${{ matrix.python-version }}. 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | - name: Build and install. # mostly redundant with `ci.yml` 21 | shell: bash 22 | run: | 23 | python -m pip install --upgrade pip build 24 | cd python 25 | MACOSX_DEPLOYMENT_TARGET=10.9 python -m build --wheel 26 | python -m pip install dist/*.whl 27 | cd .. 28 | - name: Run tests. 29 | shell: bash 30 | run: | 31 | cd python/tests 32 | python -m pip install -r requirements.txt 33 | pytest 34 | cd ../.. 35 | - name: Audit wheel for manylinux. 36 | if: matrix.os == 'ubuntu-22.04' 37 | shell: bash 38 | run: | 39 | cd python 40 | python -m pip install --upgrade auditwheel 41 | python -m auditwheel repair --plat manylinux1_x86_64 dist/*.whl 42 | rm -r dist 43 | mv wheelhouse dist 44 | - name: Store the binary wheel. 45 | uses: actions/upload-artifact@v2 46 | with: 47 | name: python-package-distributions 48 | path: python/dist/ 49 | 50 | publish: # can only upload from ubuntu, so collect all the packages here 51 | runs-on: ubuntu-latest 52 | needs: build-and-store 53 | steps: 54 | - name: Download all the wheels. 55 | uses: actions/download-artifact@v2 56 | with: 57 | name: python-package-distributions 58 | path: dist/ 59 | - name: Publish. 60 | uses: pypa/gh-action-pypi-publish@master # should upload contents of the `dist/` folder 61 | with: 62 | user: __token__ 63 | password: ${{ secrets.PYPI_TOKEN }} 64 | packages_dir: dist/ 65 | -------------------------------------------------------------------------------- /.github/workflows/purge-cache.yml.disabled: -------------------------------------------------------------------------------- 1 | # purge cache of our README's "pip downloads" badge from Github's static image host, compelling it to refresh regularly. 2 | name: Purge Cache 3 | on: 4 | schedule: 5 | - cron: "0 0 * * *" # cron syntax is funky. herein, we execute at the end of every day (UTC) 6 | jobs: 7 | clean-pip-downloads-badge: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Issue the curl request. 11 | run: | 12 | curl -X PURGE https://camo.githubusercontent.com/ded078724cea6c7f2e1fdf788d2c4a7ec9c2a88b558493e2c0f34d397914b18e/68747470733a2f2f7374617469632e706570792e746563682f706572736f6e616c697a65642d62616467652f726f6c6c696e672d7175616e74696c65733f706572696f643d746f74616c26756e6974733d696e7465726e6174696f6e616c5f73797374656d266c6566745f636f6c6f723d626c75652672696768745f636f6c6f723d6f72616e6765266c6566745f746578743d706970253230646f776e6c6f616473 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | a.out* 3 | *.o 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Rolling Quantiles 2 | Copyright 2021 Myrl Marmarelis 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rolling Quantiles for NumPy 2 | 3 | [![Python tests](https://github.com/marmarelis/rolling-quantiles/actions/workflows/ci.yml/badge.svg?branch=master&event=push)](https://github.com/marmarelis/rolling-quantiles/actions/workflows/ci.yml) 4 | 5 | ## Hyper-efficient and composable filters. 6 | 7 | * Simple, clean, intuitive interface. 8 | * Streaming or batch processing. 9 | * Python 3 bindings for a lean library written in pure C. 10 | 11 | ### A Quick Tour 12 | 13 | Let me give you but a superficial overview of this module's elegance. 14 | 15 | ```python 16 | import numpy as np 17 | import rolling_quantiles as rq 18 | 19 | pipe = rq.Pipeline( # rq.Pipeline is the only stateful object 20 | # declare a cascade of filters by a sequence of immutable description objects 21 | rq.LowPass(window=201, portion=100, subsample_rate=2), 22 | # the above takes a median (101th element out of 201) of the most recent 200 23 | # points and then spits out every other one 24 | rq.HighPass(window=10, portion=3)) 25 | # that subsampled rolling median is then fed into this filter that takes a 26 | # 30% quantile on a window of size 10, and subtracts it from its raw input 27 | 28 | # the pipeline exposes a set of read-only attributes that describe it 29 | pipe.lag # = 60.0, the effective number of time units that the real-time output 30 | # is delayed from the input 31 | pipe.stride # = 2, how many inputs it takes to produce an output 32 | # (>1 due to subsampling) 33 | 34 | 35 | input = np.random.randn(1000) 36 | output = pipe.feed(input) # the core, singular exposed method 37 | 38 | # every other output will be a NaN to demarcate unready values 39 | subsampled_output = output[1::pipe.stride] 40 | ``` 41 | ![Example Signal](example.png) 42 | 43 | That may be a lot to take in, so let me break it down for you: 44 | * `rq.Pipeline(description...)` constructs a filter pipeline from one or more filter descriptions and initializes internal state. 45 | * `.feed(*)` takes in a Python number or `np.array` and its output is shaped likewise. 46 | * The two filter types are `rq.LowPass` and `rq.HighPass` that compute rolling quantiles and return them as is, and subtract them from the raw signal respectively. Compose them however you like! 47 | * `NaN`s in the output purposefully indicate missing values, usually due to subsampling. If you pass a `NaN` into a `LowPass` filter, it will slowly deplete its reserve and continue to return valid quantiles until the window empties completely. 48 | * `rq.LowPass` and `rq.HighPass` alternatively take in a `quantile=q` argument, `0<=q<=1`. The filters would perform a linear interpolation in this case. In order to control the statistical characteristics of this quantile estimate, parameters `alpha` and `beta` are exposed as well with default values `(1, 1)`. Refer to SciPy's [documentation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mstats.mquantiles.html) for details on this aspect. 49 | ```python 50 | interpolated_pipe = rq.Pipeline( 51 | # attempt to estimate the exact 40%-quantile by the 52 | # default linear interpolation with parameters (1, 1) 53 | rq.LowPass(window=30, quantile=0.4), 54 | # here, the estimate is "approximately unbiased" in 55 | # the case of Gaussian white noise 56 | rq.HighPass(window=10, quantile=0.3, 57 | alpha=3/8, beta=3/8)) 58 | ``` 59 | See this [Wikipedia section](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample) for an elucidating overview. 60 | 61 | I also expose a convenience function `rq.medfilt(signal, window_size)` at the top-level of the package to directly supplant `scipy.signal.medfilt`. 62 | 63 | That's it! I detailed the entire library. Don't let the size of its interface fool you! 64 | 65 | ## Installation 66 | [![Downloads](https://pepy.tech/badge/rolling-quantiles)](https://pepy.tech/project/rolling-quantiles) 67 | 68 | If you are running Linux, MacOS, or Windows with Python 3.8+ and NumPy ~1.20, execute the following: 69 | 70 | `pip install rolling-quantiles` 71 | 72 | These are the conditions under which binaries are built and sent to the Python Package Index, which holds `pip`'s packages. Should the NumPy version be unsuitable, for instance, I suggest building the package from source. This is rather straightforward because the handful of source files in C have absolutely minimal dependencies. 73 | 74 | ### Building from Source 75 | 76 | The meat of this package is a handful of C files with no external dependencies, besides NumPy 1.16+ and Python 3.7+ for the bindings located in `src/python.c`. As such, you may build from source by running the following from the project's root directory: 77 | 1. `cd python` 78 | 2. Check `pyproject.toml` to make sure the listed NumPy version matches your desired target. The compiled package will be forward- but not backward-compatible. 79 | 3. `python -m build` (make sure this invokes Python 3) 80 | 4. `pip install dist/.whl` 81 | 82 | #### Note of Caution on MacOS Big Sur 83 | Make sure to specify `MACOSX_DEPLOYMENT_TARGET=10.X` as a prefix to the build command, e.g. `python -m build`. The placeholder `X` can be any MacOS version earlier than Big Sur (I use `9`.) By default, the build system would attempt to build for MacOS 11 that is incompatible with current Python interpreters that have been compiled against a prior version. 84 | 85 | 86 | ### Benchmarking a median filter on 100 million doubles. 87 | 88 | I make use of binary heaps that impart desirable guarantees on their amortized runtime. Realistically, their performance may depend on the statistics of the incoming signal. I pummeled the filters with Gaussian Brownian motion to gauge their practical usability under a typical drifting stochastic process. 89 | 90 | | `window` | `rolling_quantiles` [1] | `scipy` [2] | `pandas` [3] | 91 | | :------- | ------------------: | ----------: | -----------: | 92 | | 4 | 14 seconds | 22 seconds | 25 seconds | 93 | | 10 | 21 seconds | 47 seconds | 31 seconds | 94 | | 20 | 28 seconds | 95 seconds | 35 seconds | 95 | | 30 | 30 seconds | 140 seconds | 37 seconds | 96 | | 40 | 34 seconds | 190 seconds | 40 seconds | 97 | | 50 | 36 seconds | 242 seconds | 40 seconds | 98 | | 1,000 | 61 seconds | N/A | 62 seconds | 99 | 100 | Likewise, with simulated Gaussian white noise (no drift in the signal): 101 | 102 | | `window` | `rolling_quantiles` [1] | `scipy` [2] | `pandas` [3] | 103 | | :------- | ------------------: | ----------: | -----------: | 104 | | 4 | 14 seconds | 22 seconds | 25 seconds | 105 | | 10 | 20 seconds | 51 seconds | 31 seconds | 106 | | 20 | 25 seconds | 105 seconds | 36 seconds | 107 | | 30 | 27 seconds | 156 seconds | 39 seconds | 108 | | 40 | 30 seconds | 218 seconds | 41 seconds | 109 | | 50 | 30 seconds | 279 seconds | 42 seconds | 110 | | 1,000 | 45 seconds | N/A | 70 seconds | 111 | 112 | Intel(R) Core(TM) i7-8700T CPU @ 2.40GHz, single-threaded performance on Linux. My algorithm looked even better (relative to pandas) on a 2020 MacBook Pro. Check out this [StackOverflow answer](https://stackoverflow.com/questions/60100276/fastest-way-for-2d-rolling-window-quantile/66482238#66482238) for a particular use case. 113 | 114 | [1] `rq.Pipeline(...)` 115 | 116 | [2] `scipy.signal.medfilt(...)` 117 | 118 | [3] `pd.Series.rolling(*).quantile(...)` 119 | 120 | 121 | 122 | #### Brought to you by [Myrl](https://myrl.marmarel.is) 123 | -------------------------------------------------------------------------------- /example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marmarelis/rolling-quantiles/aeaeedf7ea39553a5a9199cd91c0113ff44d47d7/example.png -------------------------------------------------------------------------------- /python/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | dist/ 3 | rolling_quantiles.egg-info 4 | src/ 5 | __pycache__/ 6 | LICENSE 7 | pypi-token.txt 8 | -------------------------------------------------------------------------------- /python/MANIFEST.in: -------------------------------------------------------------------------------- 1 | global-exclude .DS_Store 2 | global-exclude pypi-token.txt 3 | include src/*.h 4 | include src/*.c 5 | -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 | # Rolling Quantiles for NumPy 2 | ## Hyper-efficient and composable filters. 3 | 4 | * Simple, clean, intuitive interface. 5 | * Supports streaming data or bulk processing. 6 | * Python 3 bindings for a compact library written in pure C. 7 | 8 | ### A Quick Tour 9 | 10 | ```python 11 | import numpy as np 12 | import rolling_quantiles as rq 13 | 14 | pipe = rq.Pipeline( # rq.Pipeline is the only stateful object 15 | # declare a cascade of filters by a sequence of immutable description objects 16 | rq.LowPass(window=200, portion=100, subsample_rate=2), 17 | # the above takes a median (100 out of 200) of the most recent 200 points 18 | # and then spits out every other one 19 | rq.HighPass(window=10, portion=3, subsample_rate=1)) 20 | # that subsampled rolling median is then fed into this filter that takes a 21 | # 30% quantile on a window of size 10, and subtracts it from its raw input 22 | 23 | # the pipeline exposes a set of read-only attributes that describe it 24 | pipe.lag # = 60.0, the effective number of time units that the real-time output 25 | # is delayed from the input 26 | pipe.stride # = 2, how many inputs it takes to produce an output 27 | # (>1 due to subsampling) 28 | 29 | 30 | input = np.random.randn(1000) 31 | output = pipe.feed(input) # the core, singular exposed method 32 | 33 | # every other output will be a NaN to demarcate unready values 34 | subsampled_output = output[1::pipe.stride] 35 | ``` 36 | 37 | See the [Github repository](https://github.com/marmarelis/rolling-quantiles) for more details. 38 | -------------------------------------------------------------------------------- /python/examples/benchmark.py: -------------------------------------------------------------------------------- 1 | # estimate the average number of values processed per second in offline mode (not streaming, 2 | # although it's all the same for my technique) to compare against scipy. signals that are less 3 | # stationary should induce more tree operations; hence, compare the following for different 4 | # window sizes: Gaussian white noise, Brownian motion, and Levy flights. 5 | 6 | # an interesting consequence is that my amortized runtime complexity is well-characterized, 7 | # but in practice it depends on the signal behavior (so nondeterministic for stochastic processes) 8 | 9 | import numpy as np 10 | from scipy.signal import medfilt 11 | from scipy.stats import levy 12 | import pandas as pd 13 | import rolling_quantiles as rq 14 | import time 15 | from matplotlib import pyplot as plt 16 | plt.ion() 17 | 18 | def measure_runtime(f): 19 | start = time.perf_counter() # could also try time.monotonic() 20 | res = f() 21 | return time.perf_counter() - start, res 22 | 23 | signal = np.cumsum(np.random.normal(size=100_000_000)) 24 | series = pd.Series(signal) # construct a priori for fairness 25 | window_sizes = np.array([4, 10, 20, 30, 40, 50]) + 1 # odd 26 | 27 | rq_times, sc_times, pd_times = [], [], [] 28 | 29 | for window_size in window_sizes: 30 | pipe = rq.Pipeline(rq.LowPass(window=window_size, portion=window_size//2, subsample_rate=1)) 31 | rq_time, rq_res = measure_runtime(lambda: pipe.feed(signal)) 32 | sc_time, sc_res = measure_runtime(lambda: medfilt(signal, window_size)) 33 | pd_time, pd_res = measure_runtime(lambda: series.rolling(window_size).quantile(0.5, interpolation="nearest")) 34 | # rq_res and sc_res will differ slightly at the edges because medfilt pads both sides with zeros as if it were a convolution. 35 | # I pad at the beginning only, since I employ an online algorithm. 36 | offset = window_size // 2 37 | discrepancy = rq_res[1000:2000] - sc_res[(1000-offset):(2000-offset)] 38 | #print("maximum discrepancy between the two is", np.amax(np.abs(discrepancy))) 39 | assert np.amax(np.abs(discrepancy)) < 1e-10 40 | print("runtimes are", rq_time, "versus", sc_time, "versus", pd_time) 41 | rq_times.append(rq_time) 42 | sc_times.append(sc_time) 43 | pd_times.append(pd_time) 44 | 45 | plt.plot(window_sizes, rq_times) 46 | plt.plot(window_sizes, sc_times) 47 | plt.plot(window_sizes, pd_times) 48 | -------------------------------------------------------------------------------- /python/examples/illustration.py: -------------------------------------------------------------------------------- 1 | # illustration of what my hypothetical API should look like 2 | 3 | import numpy as np 4 | import rolling_quantiles as rq 5 | 6 | filter = rq.Pipeline( # stateful filter 7 | rq.LowPass(window=100, portion=50, subsample_rate=2), 8 | rq.HighPass(window=10, portion=3, subsample_rate=1)) 9 | 10 | # expose specialized pipelines like `rq.MedianFilter` 11 | 12 | input = np.random.randn(1000) 13 | output = filter.feed(input) # a single `ufunc` entry point that takes in arrays or scalars and spits out an appropriate amount of output 14 | 15 | 16 | ## CONCEPT. 17 | 18 | rq.LineUp(rq.Pipeline) # possibly parallelized execution of parallel pipelines 19 | 20 | big_input = np.random.randn(100, 1000) 21 | # broadcast. route one row to each pipeline. 22 | big_output = pipes.feed(big_input) # respects Fortran or C ordering to preserve cache locality 23 | -------------------------------------------------------------------------------- /python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=54", "wheel", "numpy~=1.20"] #"packaging>=20.5" 3 | build-backend = "setuptools.build_meta" 4 | -------------------------------------------------------------------------------- /python/rolling_quantiles/__init__.py: -------------------------------------------------------------------------------- 1 | # A Python module is basically a file. A Python package is a directory that acts as a parent module with many submodules. 2 | 3 | __version__ = "1.1.0" 4 | 5 | from .triton import * 6 | 7 | # expose a rolling-median convenience method as a direct replacement to scipy.signal.medfilt 8 | def medfilt(signal, window_size): 9 | import numpy as np # don't pollute the top-level namespace 10 | pipeline = Pipeline( 11 | LowPass(window=window_size, quantile=0.5, subsample_rate=1)) 12 | return pipeline.feed(np.array(signal)) 13 | -------------------------------------------------------------------------------- /python/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = rolling_quantiles 3 | version = attr: rolling_quantiles.__version__ 4 | description = Composable and blazing fast rolling-quantile filters for streaming data and bulk batches. 5 | long_description = file: README.md 6 | long_description_content_type = text/markdown 7 | keywords = numpy, filter, numeric, signal, streaming, scipy, quantiles, rolling, efficient, realtime 8 | license_files = ../LICENSE 9 | url = https://github.com/marmarelis/rolling-quantiles 10 | author = Myrl Marmarelis 11 | author_email = myrl@marmarel.is 12 | 13 | [options] 14 | zip_safe = true 15 | packages = find: 16 | include_package_data = true 17 | setup_requires = numpy ~= 1.20 18 | install_requires = numpy ~= 1.20 19 | python_requires = >=3.7 20 | -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools.extension import Extension 2 | from setuptools import setup # using this instead of numpy.distutils.core, as there seem to be incompatibilities with the "new way" of defining setups 3 | from numpy.distutils.misc_util import get_info 4 | import numpy as np 5 | import os 6 | import shutil 7 | from glob import glob 8 | 9 | # it feels like there's a billion different ways to do things: not very Pythonic! There is even overlap between pyproject.toml and setup.cfg! 10 | # this all feels completely like a work in progress. 11 | 12 | # NOTE (I learned this the hard way): DO NOT TRY TO IMPORT THIS PACKAGE FROM A PYTHON CONSOLE IN THIS DIRECTORY. 13 | # IT WILL GRAVITATE TO THE LOCAL COPY, AND FAIL TO LOCATE TRITON. 14 | 15 | source_files = sum((glob(os.path.join("..", "src", f"*.{ext}")) for ext in ["h", "c"]), start=[]) 16 | os.makedirs("src", exist_ok=True) 17 | for file in source_files: 18 | shutil.copy(file, "src") 19 | 20 | ext_files = ["filter.c", "heap.c", "quantile.c", "python.c"] # cryptic errors all ove rthe place... 21 | 22 | setup( 23 | ext_package = "rolling_quantiles", # important to specify that triton's fully qualified name should be rolling_quantiles.triton 24 | ext_modules = [ 25 | Extension("triton", # does a triton/__init__.py need to exist as a placeholder marker for my extension module? 26 | [os.path.join("src", file) for file in ext_files], 27 | include_dirs = [np.get_include()], 28 | extra_compile_args=["-O3"]) 29 | ] 30 | ) 31 | -------------------------------------------------------------------------------- /python/tests/input.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def example_input(length): 4 | return np.cumsum(np.random.normal(size=length)) 5 | -------------------------------------------------------------------------------- /python/tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | error 4 | ignore::UserWarning 5 | ignore::DeprecationWarning 6 | -------------------------------------------------------------------------------- /python/tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas >= 1.2 2 | numpy >= 1.20 3 | scipy >= 1.5 4 | pytest 5 | -------------------------------------------------------------------------------- /python/tests/test_guards.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import rolling_quantiles as rq 4 | 5 | def test_window_size(): 6 | with pytest.raises(ValueError): 7 | rq.Pipeline(rq.LowPass()) 8 | 9 | def test_interpolator_bounds(): 10 | with pytest.raises(ValueError): 11 | rq.Pipeline(rq.LowPass( 12 | window=10, portion=2, subsample_rate=1, quantile=0.5, alpha=2.0)) 13 | with pytest.raises(ValueError): 14 | rq.Pipeline(rq.LowPass( 15 | window=10, portion=2, subsample_rate=1, quantile=2.5)) 16 | -------------------------------------------------------------------------------- /python/tests/test_highpass.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import rolling_quantiles as rq 4 | from input import example_input 5 | 6 | def test_median_scalar_inputs(window_size=3, length=100): 7 | pipe = rq.Pipeline(rq.HighPass(window=window_size, portion=window_size//2)) 8 | v = example_input(length) 9 | assert pipe.lag == window_size/2 10 | for i, x in enumerate(v): 11 | y = pipe.feed(x) 12 | if i >= window_size: 13 | median = np.median(v[(i-window_size+1):(i+1)]) 14 | assert y == (v[i-window_size//2] - median) 15 | 16 | def test_median_array_input(window_size=71, length=1000): 17 | pipe = rq.Pipeline(rq.HighPass(window=window_size, portion=window_size//2)) 18 | x = example_input(length) 19 | y = pipe.feed(x) 20 | z = pd.Series(x).rolling(window_size).median() 21 | lag = window_size//2 # note: as evidenced, high-pass filters do not interpolate on half-windows yet. 22 | assert pipe.lag == window_size/2 23 | assert np.equal( 24 | y[window_size:], 25 | x[lag+1:-lag] - z.values[window_size:] 26 | ).all() # exact equality. 27 | -------------------------------------------------------------------------------- /python/tests/test_interpolation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from scipy.stats.mstats import mquantiles 4 | import rolling_quantiles as rq 5 | from input import example_input 6 | 7 | def test_innocuous_interpolation(window_size=1001, length=10000): 8 | pipe = rq.Pipeline(rq.LowPass(window=window_size, quantile=0.5)) 9 | x = example_input(length) 10 | y = pipe.feed(x) 11 | z = pd.Series(x).rolling(window_size).median() 12 | assert np.equal(y[window_size:], z.values[window_size:]).all() 13 | 14 | def test_typical_interpolation(window_size=40, quantile=0.2): 15 | x = example_input(window_size) # one window only, due to scipy 16 | pipe = rq.Pipeline(rq.LowPass(window=window_size, quantile=quantile)) 17 | y = pipe.feed(x) 18 | z = mquantiles(x, quantile, alphap=1, betap=1) 19 | assert z == y[-1] 20 | 21 | # a flavor of fuzzing 22 | def test_fancy_interpolation(window_size=10, n_trials=200): # small windows may be more prone to boundary/edge-condition bugs 23 | for trial in range(n_trials): 24 | x = example_input(window_size) 25 | quantile = np.random.uniform() 26 | alpha, beta = np.random.uniform(size=2) 27 | pipe = rq.Pipeline(rq.LowPass(window=window_size, quantile=quantile, alpha=alpha, beta=beta)) 28 | y = pipe.feed(x) 29 | z = mquantiles(x, quantile, alphap=alpha, betap=beta) 30 | assert z == y[-1] 31 | -------------------------------------------------------------------------------- /python/tests/test_lowpass.py: -------------------------------------------------------------------------------- 1 | # for pytest. I do not hook this up to pyproject.toml as it is intended or perhaps best practice. 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import rolling_quantiles as rq 6 | from input import example_input 7 | 8 | def test_median_scalar_inputs(window_size=3, length=100): # no interpolation yet 9 | pipe = rq.Pipeline(rq.LowPass(window=window_size, portion=window_size//2)) 10 | v = example_input(length) 11 | for i, x in enumerate(v): 12 | y = pipe.feed(x) 13 | if i >= window_size: 14 | assert y == np.median(v[(i-window_size+1):(i+1)]) 15 | 16 | def test_median_array_input(window_size=71, length=1000): 17 | pipe = rq.Pipeline(rq.LowPass(window=window_size, portion=window_size//2)) 18 | x = example_input(length) 19 | y = pipe.feed(x) 20 | z = pd.Series(x).rolling(window_size).median() 21 | assert np.equal(y[window_size:], z.values[window_size:]).all() # exact equality, since no arithmetic is done on the numbers 22 | 23 | def test_basic_nans(window_size=5, length=20): 24 | # make sure the pipeline effectively flushes its contents with NaNs 25 | pipe = rq.Pipeline(rq.LowPass(window=window_size, portion=window_size//2)) 26 | x = example_input(length) 27 | y = pipe.feed(x) 28 | for i in range(window_size): 29 | pipe.feed(np.nan) 30 | z = pipe.feed(x) 31 | assert np.equal(y, z).all() 32 | -------------------------------------------------------------------------------- /src/filter.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 Myrl Marmarelis 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #include "filter.h" 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | // only supports adding and obtaining the middle element 24 | struct high_pass_buffer { 25 | // this `head` is unsigned (rather than a pointer) because we will do math on it 26 | // points to the element right after the latest entry 27 | unsigned head; 28 | unsigned size; 29 | bool full; 30 | double entries[]; 31 | }; 32 | 33 | static struct high_pass_buffer* create_high_pass_buffer(unsigned size) { 34 | struct high_pass_buffer* buffer = malloc(sizeof(struct high_pass_buffer) + sizeof(double)*size); 35 | buffer->head = 0; 36 | buffer->size = size; 37 | buffer->full = false; 38 | // buffer->entries remains uninitialized on purpose 39 | return buffer; 40 | } 41 | 42 | static void add_to_high_pass_buffer(struct high_pass_buffer* buffer, double value) { 43 | if (buffer->head == buffer->size) { 44 | buffer->full = true; // always set---would be more expensive to read and conditionally write 45 | buffer->head = 0; 46 | } 47 | buffer->entries[buffer->head++] = value; 48 | } 49 | 50 | static double find_high_pass_buffer_middle(struct high_pass_buffer* buffer) { 51 | if (!buffer->full) { 52 | // match the below, which subtracts in the other direction. we're implictly rounding up, in a way, by not subtracting one 53 | int half = buffer->head / 2; // should optimize to bit shifts. don't use the remainder, (buffer->head % 2) 54 | return buffer->entries[half]; 55 | } 56 | int half = (buffer->size / 2) + (buffer->size % 2); 57 | // by not subtracting one from head (and rounding `half` up,) I index the element to the right of the middle with even sizes 58 | int index = (int)buffer->head - half; 59 | if (index < 0) 60 | index = (int)buffer->size + index; 61 | return buffer->entries[index]; 62 | } 63 | 64 | static void destroy_high_pass_buffer(struct high_pass_buffer* buffer) { 65 | free(buffer); 66 | } 67 | 68 | struct cascade_filter create_cascade_filter(struct cascade_description description) { 69 | unsigned portion = description.portion; 70 | double target = description.interpolation.target_quantile; 71 | if (!isnan(target)) { 72 | double target = compute_interpolation_target( 73 | description.window, description.interpolation); 74 | portion = (unsigned)fmax(floor(target), 1.0) - 1; 75 | } 76 | struct cascade_filter filter = { 77 | .monitor = create_rolling_quantile_monitor( 78 | description.window, portion, description.interpolation), 79 | .clock = 0, 80 | .subsample_rate = description.subsample_rate, 81 | .high_pass_buffer = NULL, 82 | }; 83 | if (description.mode == HIGH_PASS) { 84 | filter.high_pass_buffer = create_high_pass_buffer(description.window); 85 | } 86 | return filter; 87 | } 88 | 89 | struct filter_pipeline* create_filter_pipeline(unsigned n_filters, struct cascade_description* descriptions) { 90 | for (struct cascade_description* description = descriptions; 91 | description != (descriptions + n_filters); description += 1) { 92 | if (!validate_interpolation(description->interpolation)) 93 | return NULL; // before allocating anything 94 | } 95 | struct filter_pipeline* pipeline = malloc( 96 | sizeof(struct filter_pipeline) + n_filters*sizeof(struct cascade_filter)); 97 | pipeline->n_filters = n_filters; 98 | for (unsigned i = 0; i < n_filters; i += 1) { 99 | pipeline->filters[i] = create_cascade_filter(descriptions[i]); 100 | } 101 | return pipeline; 102 | } 103 | 104 | double feed_filter_pipeline(struct filter_pipeline* pipeline, double entry) { 105 | double trickling_value = entry; 106 | for (unsigned i = 0; i < pipeline->n_filters; i += 1) { // trickle down the pipeline 107 | struct cascade_filter* filter = pipeline->filters + i; 108 | double quantile = update_rolling_quantile(&filter->monitor, trickling_value); 109 | if (filter->high_pass_buffer != NULL) { // explicit conditional for enhanced clarity 110 | add_to_high_pass_buffer(filter->high_pass_buffer, trickling_value); 111 | double middle = find_high_pass_buffer_middle(filter->high_pass_buffer); 112 | trickling_value = middle - quantile; 113 | } else { 114 | trickling_value = quantile; 115 | } 116 | if ((++filter->clock) < filter->subsample_rate) 117 | return NAN; 118 | filter->clock = 0; 119 | } 120 | return trickling_value; // made it all the way through the torturous path! 121 | } 122 | 123 | bool verify_pipeline(struct filter_pipeline* pipeline) { 124 | for (unsigned i = 0; i < pipeline->n_filters; i += 1) { 125 | if (!verify_monitor(&pipeline->filters[i].monitor)) 126 | return false; 127 | } 128 | return true; 129 | } 130 | 131 | void destroy_filter_pipeline(struct filter_pipeline* pipeline) { 132 | for (unsigned i = 0; i < pipeline->n_filters; i += 1) { 133 | destroy_rolling_quantile_monitor(&pipeline->filters[i].monitor); 134 | struct high_pass_buffer* buffer = pipeline->filters[i].high_pass_buffer; 135 | if (buffer != NULL) destroy_high_pass_buffer(buffer); 136 | } 137 | free(pipeline); 138 | } 139 | -------------------------------------------------------------------------------- /src/filter.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 Myrl Marmarelis 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #ifndef FILTER_H 18 | #define FILTER_H 19 | 20 | #include "quantile.h" 21 | 22 | /* 23 | For a high-pass, wherein I would subtract a smoothed signal from the raw, I 24 | would need to keep track of the temporal order so that I can refer back to 25 | the "middle" value. 26 | (IN TESTING) Perhaps it can be done rather straightforwardly by means 27 | of the ring_buffer, but I have not ventured into that question yet. I simply 28 | mention this because it appears to be a logical addition to the pipeline 29 | functionality/"DSL". 30 | */ 31 | 32 | /* 33 | The high-pass filter does not support missing values demarcated by NaN, as 34 | that mode relies upon the raw signal's availability. One could affix a 35 | low-pass filter onto a high-pass intake to "smooth out" the NaNs before 36 | they have a chance of entering the high-pass filter down the line. 37 | */ 38 | 39 | enum cascade_mode { 40 | HIGH_PASS, LOW_PASS 41 | }; 42 | 43 | struct cascade_description { 44 | unsigned window; 45 | unsigned portion; 46 | struct interpolation interpolation; // if NAN, refer to `portion` 47 | unsigned subsample_rate; 48 | enum cascade_mode mode; 49 | }; 50 | 51 | struct high_pass_buffer; 52 | 53 | struct cascade_filter { 54 | struct rolling_quantile monitor; 55 | unsigned clock; 56 | unsigned subsample_rate; 57 | struct high_pass_buffer* high_pass_buffer; // set to NULL when a low pass is desired 58 | }; 59 | 60 | struct filter_pipeline { 61 | unsigned n_filters; 62 | struct cascade_filter filters[]; 63 | }; 64 | 65 | struct cascade_filter create_cascade_filter(struct cascade_description description); 66 | struct filter_pipeline* create_filter_pipeline(unsigned n_filters, struct cascade_description* descriptions); 67 | double feed_filter_pipeline(struct filter_pipeline* pipeline, double entry); 68 | bool verify_pipeline(struct filter_pipeline* pipeline); 69 | void destroy_filter_pipeline(struct filter_pipeline* pipeline); 70 | 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /src/heap.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 Myrl Marmarelis 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #include "heap.h" 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | struct ring_buffer* create_queue(unsigned size) { 27 | unsigned buffer_size = size * sizeof(ring_buffer_elem); 28 | struct ring_buffer* buffer = malloc(sizeof(struct ring_buffer) + buffer_size); 29 | buffer->size = size; 30 | buffer->n_entries = 0; 31 | buffer->head = &buffer->entries[0]; // slight semantic (NOT teleological) distinction between this and `buffer->entries` 32 | memset(buffer->entries, 0, buffer_size); 33 | return buffer; 34 | } 35 | 36 | struct heap* create_heap(enum heap_mode mode, unsigned size, struct ring_buffer* queue) { 37 | unsigned n_entries = size; // not necessarily trivial 38 | struct heap* data = calloc(1, sizeof(struct heap) + n_entries*sizeof(struct heap_element)); // calloc in order to ensure our elements are zeroed out 39 | data->mode = mode; 40 | data->size = size; 41 | data->queue = queue; 42 | return data; 43 | } 44 | 45 | void destroy_queue(struct ring_buffer* queue) { 46 | free(queue); 47 | } 48 | 49 | void destroy_heap(struct heap* heap) { 50 | free(heap); 51 | } 52 | 53 | bool is_ring_buffer_full(struct ring_buffer* buffer) { 54 | return buffer->n_entries == buffer->size; 55 | } 56 | 57 | bool is_ring_buffer_empty(struct ring_buffer* buffer) { 58 | return buffer->n_entries == 0; 59 | } 60 | 61 | void advance_ring_buffer(struct ring_buffer* buffer) { 62 | buffer->head++; 63 | if (buffer->head == (buffer->entries + buffer->size)) { 64 | buffer->head = buffer->entries; 65 | } 66 | } 67 | 68 | static // buffer does not have to be full as long as it isn't empty, but it might return NULL if it isn't full 69 | ring_buffer_elem extract_oldest_entry_from_ring_buffer(struct ring_buffer* buffer) { // removes the entry too, and increments the head 70 | struct heap_element* entry = *buffer->head; 71 | *buffer->head = NULL; 72 | return entry; 73 | } 74 | 75 | static 76 | ring_buffer_elem* get_next_position_in_ring_buffer(struct ring_buffer* buffer) { 77 | return buffer->head; 78 | } 79 | 80 | static 81 | void xor_swap(void* a, void* b, size_t size) { // overkill hahah. do it byte by byte so that we are data-type agnostic 82 | char* a_bytes = a; 83 | char* b_bytes = b; 84 | for (size_t i = 0; i < size; i += 1) { 85 | a_bytes[i] ^= b_bytes[i]; 86 | b_bytes[i] ^= a_bytes[i]; 87 | a_bytes[i] ^= b_bytes[i]; 88 | } 89 | } 90 | 91 | static 92 | void plain_swap(void* a, void* b, size_t size) { 93 | char* a_bytes = a; 94 | char* b_bytes = b; 95 | for (size_t i = 0; i < size; i += 1) { 96 | char c = a_bytes[i]; 97 | a_bytes[i] = b_bytes[i]; 98 | b_bytes[i] = c; 99 | } 100 | } 101 | 102 | static 103 | void swap(void* a, void* b, size_t size) { 104 | plain_swap(a, b, size); 105 | } 106 | 107 | static 108 | void swap_elements_in_heap(struct heap_element* a, struct heap_element* b) { 109 | swap(&a->member, &b->member, sizeof(double)); 110 | /*if (a->loc_in_buffer && b->loc_in_buffer) { swap with actual addresses, rather than queue entries 111 | swap(a->loc_in_buffer, b->loc_in_buffer, sizeof(struct heap_element*)); 112 | } else*/ 113 | if (a->loc_in_buffer) { 114 | *a->loc_in_buffer = b; 115 | } 116 | if (b->loc_in_buffer) { 117 | *b->loc_in_buffer = a; 118 | } 119 | swap(&a->loc_in_buffer, &b->loc_in_buffer, sizeof(struct heap_element**)); 120 | } 121 | 122 | static 123 | void trickle_down(struct heap* heap, unsigned i) { // conscious of the tags in the queue that may be invalidated 124 | struct heap_element* node = heap->elements + i; 125 | struct heap_element* first_child = heap->elements + (2*i + 1); 126 | struct heap_element* second_child = heap->elements + (2*i + 2); 127 | struct heap_element* limit = heap->elements + heap->n_entries; 128 | if (heap->mode == MAX_HEAP) { 129 | if (first_child >= limit) { 130 | if (second_child >= limit) 131 | return; 132 | if (node->member < second_child->member) 133 | swap_elements_in_heap(second_child, node); 134 | return; 135 | } 136 | if (second_child >= limit) { 137 | if (node->member < first_child->member) 138 | swap_elements_in_heap(first_child, node); 139 | return; 140 | } 141 | if ((node->member > first_child->member) && (node->member > second_child->member)) 142 | return; 143 | if (first_child->member > second_child->member) { 144 | swap_elements_in_heap(first_child, node); 145 | trickle_down(heap, 2*i + 1); 146 | } else { 147 | swap_elements_in_heap(second_child, node); 148 | trickle_down(heap, 2*i + 2); // tail-call optimized 149 | } 150 | } else if (heap->mode == MIN_HEAP) { 151 | if (first_child >= limit) { // redundant, since the first child is always right before the second child. 152 | if (second_child >= limit) 153 | return; 154 | if (node->member > second_child->member) 155 | swap_elements_in_heap(second_child, node); 156 | return; 157 | } 158 | if (second_child >= limit) { 159 | if (node->member > first_child->member) 160 | swap_elements_in_heap(first_child, node); 161 | return; 162 | } 163 | if ((node->member < first_child->member) && (node->member < second_child->member)) 164 | return; 165 | if (first_child->member < second_child->member) { 166 | swap_elements_in_heap(first_child, node); 167 | trickle_down(heap, 2*i + 1); 168 | } else { 169 | swap_elements_in_heap(second_child, node); 170 | trickle_down(heap, 2*i + 2); 171 | } 172 | } 173 | } 174 | 175 | static 176 | unsigned trickle_up(struct heap* heap, unsigned i) { 177 | if (i == 0) return 0; 178 | unsigned pos = i; 179 | unsigned parent_index = (i-1) / 2; 180 | struct heap_element* node = &heap->elements[i]; 181 | struct heap_element* parent = &heap->elements[parent_index]; // division should be efficient 182 | if (heap->mode == MAX_HEAP) { 183 | if (node->member > parent->member) { 184 | swap_elements_in_heap(parent, node); 185 | return trickle_up(heap, parent_index); 186 | } 187 | } else if (heap->mode == MIN_HEAP) { 188 | if (node->member < parent->member) { 189 | swap_elements_in_heap(parent, node); 190 | return trickle_up(heap, parent_index); 191 | } 192 | } 193 | return pos; 194 | } 195 | 196 | bool belongs_to_this_heap(struct heap* heap, struct heap_element* elem) { // when we come from a queue connected to many heaps, we need to locate the heap that contains each element 197 | return (elem >= heap->elements) && (elem < (heap->elements + heap->n_entries)); 198 | } 199 | 200 | void remove_front_element_from_heap(struct heap* heap, struct heap_element* dest) { // the circular queue still maintains its order, and simply skips over the entries that have already been extracted when it's their time to expire 201 | if (heap->n_entries == 0) { 202 | *dest = (struct heap_element) { .member = NAN, .loc_in_buffer = NULL }; 203 | return; 204 | } 205 | struct heap_element* last_node = heap->elements + heap->n_entries - 1; 206 | struct heap_element* root_node = heap->elements; 207 | //struct heap_element extremum = *root_node; 208 | swap_elements_in_heap(root_node, last_node); 209 | heap->n_entries -= 1; 210 | trickle_down(heap, 0); 211 | //*extremum.loc_in_buffer = NULL; // clear our entry in the queue so that it doesn't mess up the guy that takes our address. keep track of loc_in_buffer so that it can be updated later. 212 | swap_elements_in_heap(last_node, dest); // `last_node` cannot be affected by the trickler 213 | if (last_node->loc_in_buffer != NULL) { 214 | // since `swap_elements_in_heap` is quite aggressive with restoring previously-null queue entries, clear this out for the case that it is never brought back 215 | // honestly, the (teleological! post hoc?) reasoning behind this line is a little confusing 216 | *last_node->loc_in_buffer = NULL; 217 | } 218 | } 219 | 220 | double view_front_of_heap(struct heap* heap) { 221 | if (heap->n_entries == 0) 222 | return NAN; 223 | return heap->elements[0].member; 224 | } 225 | 226 | struct heap_element* add_value_to_heap(struct heap* heap, double value) { 227 | // note: cannot swap into this local variable, even though its own `loc_in_buffer` is empty 228 | struct heap_element new_entry = { 229 | .member = value, 230 | .loc_in_buffer = NULL }; 231 | return add_element_to_heap(heap, new_entry); 232 | } 233 | 234 | // there is a shortcut path for inserting and then immediately extracting. Consider implementing that as a special case. 235 | struct heap_element* add_element_to_heap(struct heap* heap, struct heap_element new_elem) { // returns new heap element, not -> if it popped its oldest member in order to make space, then it returns heap_element with loc_in_buffer repurposed to act like an optional value's flag 236 | if (heap->n_entries == heap->size) 237 | return NULL; // (struct heap_element) { .member = NAN, .loc_in_buffer = NULL }; 238 | unsigned index_to_place = heap->n_entries; 239 | heap->elements[index_to_place] = new_elem; 240 | if (new_elem.loc_in_buffer != NULL) { // if this element was taken from a different heap, rectify its stale pointer. do it before trickling up, so that the correct pointer is propagated 241 | *new_elem.loc_in_buffer = heap->elements + index_to_place; 242 | } 243 | heap->n_entries += 1; 244 | unsigned index_placed = trickle_up(heap, index_to_place); 245 | return heap->elements + index_placed; 246 | } 247 | 248 | void register_in_queue(struct ring_buffer* queue, struct heap_element* elem) { 249 | queue->n_entries += 1; 250 | elem->loc_in_buffer = get_next_position_in_ring_buffer(queue); 251 | *elem->loc_in_buffer = elem; 252 | } 253 | 254 | /* 255 | Return value. 256 | -> if -1, the queue was already empty 257 | -> if 0, the expired entry did not belong to a heap 258 | -> if positive, then the index of the expired entry's heap (1-based) 259 | */ 260 | int expire_stale_entry_in_queue(struct ring_buffer* queue, unsigned n_heaps, ...) { 261 | //if (!is_ring_buffer_full(queue)) drastic change of behavior since this... 262 | // return true; 263 | if (is_ring_buffer_empty(queue)) 264 | return -1; 265 | struct heap_element* oldest_elem = extract_oldest_entry_from_ring_buffer(queue); 266 | if (oldest_elem == NULL) 267 | return -1; 268 | if (queue->n_entries > 0) { // this better not happen, but have a safeguard just in case... 269 | queue->n_entries -= 1; 270 | } 271 | va_list heaps; 272 | va_start(heaps, n_heaps); 273 | unsigned i; 274 | for (i = 0; i < n_heaps; i += 1) { 275 | struct heap* heap = va_arg(heaps, struct heap*); 276 | if (!belongs_to_this_heap(heap, oldest_elem)) 277 | continue; 278 | //*oldest_elem->loc_in_buffer = NULL; // signal that it's already been removed. since we already advanced the buffer, we may not have to do this in practice. 279 | struct heap_element* last_elem = heap->elements + heap->n_entries - 1; 280 | heap->n_entries -= 1; 281 | if (last_elem != oldest_elem) { 282 | double oldest_value = oldest_elem->member; 283 | double last_value = last_elem->member; 284 | *oldest_elem = *last_elem; // last_entry will stay in the queue after another entry is added, since oldest_entry will be thrown instead. no need to void last_entry since we'll immediately add a new one on top of it 285 | *last_elem->loc_in_buffer = oldest_elem; // in the end, we are swapping without care for the ultimate contents of the old last_elem 286 | unsigned index_of_oldest = oldest_elem - heap->elements; 287 | if ((heap->mode == MIN_HEAP && oldest_value < last_value) || 288 | (heap->mode == MAX_HEAP && oldest_value > last_value)) { 289 | trickle_down(heap, index_of_oldest); // we moved the last guy on top of the oldest, so we may have to trickle it down again 290 | } else { 291 | trickle_up(heap, index_of_oldest); // DID THIS: can I somehow avoid having to do both? as it is, the last element that got transplanted may have to go up or down depending on which parent it lands. I know! If this really becomes an issue, I may compare this element to the previous occupant to know which direction it should take. 292 | } 293 | } 294 | break; 295 | } 296 | va_end(heaps); 297 | if (i < n_heaps) { // did we locate an owner heap? 298 | return (int)(i + 1); 299 | } else { 300 | return 0; 301 | } 302 | } 303 | 304 | bool verify_heap(struct heap* heap) { 305 | for (unsigned i = 0; i < heap->n_entries; i += 1) { 306 | unsigned left_child = 2*i + 1; 307 | unsigned right_child = 2*i + 2; 308 | if (heap->mode == MAX_HEAP && 309 | ((left_child < heap->n_entries && heap->elements[i].member < heap->elements[left_child].member) || 310 | (right_child < heap->n_entries && heap->elements[i].member < heap->elements[right_child].member))) { 311 | return false; 312 | } 313 | if (heap->mode == MIN_HEAP && 314 | ((left_child < heap->n_entries && heap->elements[i].member > heap->elements[left_child].member) || 315 | (right_child < heap->n_entries && heap->elements[i].member > heap->elements[right_child].member))) { 316 | return false; 317 | } 318 | } 319 | return true; 320 | } 321 | -------------------------------------------------------------------------------- /src/heap.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 Myrl Marmarelis 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #ifndef HEAP_H 18 | #define HEAP_H 19 | 20 | #include // no need to hassle over the myriad of different data types provided here, which seem to matter most in the stylized abstract world of the C standard 21 | #include 22 | 23 | enum heap_mode { 24 | MAX_HEAP, MIN_HEAP 25 | }; 26 | 27 | typedef struct heap_element* ring_buffer_elem; 28 | 29 | struct heap_element { 30 | double member; 31 | ring_buffer_elem* loc_in_buffer; // element is marked as nonexistent when this is set to null 32 | }; 33 | 34 | struct ring_buffer { 35 | unsigned size; // could've called this the capacity 36 | unsigned n_entries; 37 | ring_buffer_elem* head; 38 | ring_buffer_elem entries[]; // the alternative would be preprocessor magic with fixed sizes, but I don't think that gives us much benefit for the cost it bears. 39 | }; 40 | 41 | struct heap { // I like this simple naming scheme best. 42 | enum heap_mode mode; 43 | unsigned size; 44 | unsigned n_entries; // multiple heaps may share a queue, so we need to maintain our own set of counting statistics 45 | struct ring_buffer* queue; // sadly, this must be a pointer in order to remain standard C because ring_buffer is also variably sized. 46 | struct heap_element elements[]; // keep all data in one contiguous block---one less layer of indirection (funny grammer, since we would otherwise say "fewer layers") 47 | }; 48 | 49 | 50 | // Let's see how rusty my C(++) is. This shall take advantage of the most elegant parts of C17 (i.e. C11.) Feels nice to get back into the groove! 51 | // Const-correctness is a pain in the ass. Instead, I shall trust myself to properly use my interfaces. 52 | 53 | bool belongs_to_this_heap(struct heap* heap, struct heap_element* elem); 54 | struct heap_element* add_value_to_heap(struct heap* heap, double value); 55 | struct heap_element* add_element_to_heap(struct heap* heap, struct heap_element new_elem); // this and the below should not remove from the conveyor-belt queue, since adding it back would cause it to lose its original position. 56 | void remove_front_element_from_heap(struct heap* heap, struct heap_element* destination); // swaps into the destination slot. no longer returns by value to signal transfer of ownership. all these methods exposed gives granular control to the operator 57 | double view_front_of_heap(struct heap* heap); 58 | bool is_ring_buffer_full(struct ring_buffer* queue); 59 | bool is_ring_buffer_empty(struct ring_buffer* queue); 60 | void advance_ring_buffer(struct ring_buffer* queue); 61 | void register_in_queue(struct ring_buffer* queue, struct heap_element* elem); // modifies element to point to a fresh spot on the queue. will expire on its own after some time. 62 | int expire_stale_entry_in_queue(struct ring_buffer* queue, unsigned n_heaps, ...); // pass pointers to all of the heaps attached to this queue 63 | struct ring_buffer* create_queue(unsigned size); 64 | struct heap* create_heap(enum heap_mode mode, unsigned size, struct ring_buffer* queue); 65 | bool verify_heap(struct heap* heap); 66 | void destroy_queue(struct ring_buffer* queue); 67 | void destroy_heap(struct heap* heap); 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /src/python.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 Myrl Marmarelis 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #define PY_SSIZE_T_CLEAN 18 | #include "Python.h" 19 | #include "structmember.h" 20 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 21 | #include "numpy/ndarrayobject.h" 22 | #include "numpy/ufuncobject.h" 23 | 24 | #include "filter.h" 25 | 26 | #include 27 | 28 | // Bypass the need for highly scalable storage of overwhelming data streams! 29 | // Highly verbose, "bare metal" Python bindings. 30 | 31 | struct description { 32 | PyObject_HEAD 33 | // can I just compose this with the actual underlying type `struct cascade_description`? 34 | unsigned window; 35 | unsigned portion; 36 | unsigned subsample_rate; 37 | double quantile; 38 | double alpha; 39 | double beta; 40 | }; 41 | 42 | static PyMemberDef description_members[] = { // base class of HighPass and LowPass 43 | { 44 | "window", T_UINT, offsetof(struct description, window), 0, 45 | "window size" 46 | }, { 47 | "portion", T_UINT, offsetof(struct description, portion), 0, 48 | "rank for the quantile element out of the window size" 49 | }, { 50 | "subsample_rate", T_UINT, offsetof(struct description, subsample_rate), 0, 51 | "every how many data points to subsample" 52 | }, { 53 | "quantile", T_DOUBLE, offsetof(struct description, quantile), 0, 54 | "target quantile to achieve by linear interpolation; setting this ignores `portion`" 55 | }, { 56 | "alpha", T_DOUBLE, offsetof(struct description, alpha), 0, 57 | "interpolation parameter, 0 <= alpha <= 1" 58 | }, { 59 | "beta", T_DOUBLE, offsetof(struct description, beta), 0, 60 | "interpolation parameter, 0 <= beta <= 1" 61 | }, {NULL} 62 | }; 63 | 64 | static int description_init(struct description* self, PyObject* args, PyObject* kwds) { 65 | static char* keyword_list[] = { 66 | "window", "portion", "subsample_rate", "quantile", "alpha", "beta", NULL}; 67 | unsigned window = 0; 68 | unsigned portion = 0; 69 | unsigned subsample_rate = 1; 70 | double quantile = NAN; 71 | double alpha = 1.0; 72 | double beta = 1.0; 73 | // specify optional '|' and then keyword-only '$' arguments 74 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "|$IIIddd", keyword_list, 75 | &window, &portion, &subsample_rate, &quantile, &alpha, &beta)) { 76 | PyErr_SetString(PyExc_TypeError, 77 | "invalid arguments passed to Description (either LowPass or HighPass) constructor"); 78 | return -1; 79 | } 80 | if (window == 0) { 81 | PyErr_SetString(PyExc_ValueError, "please set a positive window size"); 82 | return -1; 83 | } 84 | self->window = window; 85 | self->portion = portion; 86 | self->subsample_rate = subsample_rate; 87 | self->quantile = quantile; 88 | self->alpha = alpha; 89 | self->beta = beta; // my current setup is a little redundant; for instance, I could pass &self->beta directly 90 | return 0; 91 | } 92 | 93 | static PyTypeObject description_type = { 94 | PyVarObject_HEAD_INIT(NULL, 0) // funky macro 95 | .tp_name = "triton.Description", 96 | .tp_doc = "Base filter description. Do not use this directly; it enables subclasses that act like algebraic data types.", 97 | .tp_basicsize = sizeof(struct description), 98 | .tp_itemsize = 0, // for variably sized objects 99 | .tp_flags = Py_TPFLAGS_DEFAULT, 100 | .tp_new = PyType_GenericNew, 101 | .tp_members = description_members, 102 | .tp_init = (initproc)description_init, 103 | }; 104 | 105 | bool init_description(PyObject* self) { 106 | if (PyType_Ready(&description_type) < 0) 107 | return false; 108 | Py_INCREF(&description_type); 109 | if (PyModule_AddObject(self, "Description", (PyObject*) &description_type) < 0) { 110 | Py_DECREF(&description_type); 111 | return false; 112 | } 113 | return true; 114 | } 115 | 116 | struct high_pass { 117 | // first member defines and enables subclassing. now this type is polymorphic and may be cast as a `struct description` 118 | struct description description; 119 | // I probably do not even need a new struct type, but I keep it here in case I wish to extend it later. 120 | }; 121 | 122 | static PyTypeObject high_pass_type = { 123 | PyVarObject_HEAD_INIT(NULL, 0) // funky macro 124 | .tp_name = "triton.HighPass", 125 | .tp_doc = "High-pass filter description.", 126 | .tp_basicsize = sizeof(struct high_pass), 127 | .tp_itemsize = 0, // for variably sized objects 128 | .tp_flags = Py_TPFLAGS_DEFAULT, 129 | .tp_new = PyType_GenericNew, 130 | .tp_members = description_members, // just reuse the description struct 131 | }; 132 | 133 | bool init_high_pass(PyObject* self) { 134 | high_pass_type.tp_base = &description_type; // must be set at runtime, not statically 135 | if (PyType_Ready(&high_pass_type) < 0) 136 | return false; 137 | Py_INCREF(&high_pass_type); 138 | if (PyModule_AddObject(self, "HighPass", (PyObject*) &high_pass_type) < 0) { 139 | Py_DECREF(&high_pass_type); 140 | return false; 141 | } 142 | return true; 143 | } 144 | 145 | struct low_pass { 146 | struct description description; 147 | }; 148 | 149 | static PyTypeObject low_pass_type = { 150 | PyVarObject_HEAD_INIT(NULL, 0) // funky macro 151 | .tp_name = "triton.LowPass", 152 | .tp_doc = "Low-pass filter description.", 153 | .tp_basicsize = sizeof(struct description), 154 | .tp_itemsize = 0, // for variably sized objects 155 | .tp_flags = Py_TPFLAGS_DEFAULT, 156 | .tp_new = PyType_GenericNew, 157 | .tp_members = description_members, // just reuse the description struct 158 | }; 159 | 160 | bool init_low_pass(PyObject* self) { 161 | low_pass_type.tp_base = &description_type; // must be set at runtime, not statically 162 | if (PyType_Ready(&low_pass_type) < 0) 163 | return false; 164 | Py_INCREF(&low_pass_type); 165 | if (PyModule_AddObject(self, "LowPass", (PyObject*) &low_pass_type) < 0) { 166 | Py_DECREF(&low_pass_type); 167 | return false; 168 | } 169 | return true; 170 | } 171 | 172 | /* 173 | I have decided against providing a `ufunc` method to the Pipeline object for feeding, 174 | not only because that would be a pain in the wrong place, but also because the semantics 175 | are mismatched. I do not want to vectorize over arbitrary dimensions. I shall take in either 176 | a single value, a generator of values, or a unidimensional array of values. No more, no less. 177 | */ 178 | 179 | struct pipeline { 180 | PyObject_HEAD 181 | struct filter_pipeline* filters; 182 | unsigned stride; 183 | double lag; // in agnostic time units, increments of one half (since we bisect the window) 184 | }; 185 | 186 | static PyMemberDef pipeline_members[] = { // base class of HighPass and LowPass 187 | { 188 | "stride", T_UINT, offsetof(struct pipeline, stride), READONLY, 189 | "the total stride between subsamples: unit if no subsampling occurs" 190 | }, { 191 | "lag", T_DOUBLE, offsetof(struct pipeline, lag), READONLY, 192 | "the effective lag time between the pipeline's output and its input, for a balanced filter" 193 | // the moment it's received. balanced -> zero-phase or something like that? 194 | }, {NULL} 195 | }; 196 | 197 | static PyObject* pipeline_new(PyTypeObject* type, PyObject* args, PyObject* kwds) { 198 | struct pipeline* self = (struct pipeline*)type->tp_alloc(type, 0); 199 | if (self == NULL) 200 | return NULL; 201 | self->filters = NULL; 202 | return (PyObject*)self; 203 | } 204 | 205 | /* 206 | Construct with keyword arguments. 207 | Do I need to call INCREF or DECREF on the arguments here? I'm following the philosophy that they should flow right through me. 208 | */ 209 | static int pipeline_init(struct pipeline* self, PyObject* args, PyObject* kwds) { 210 | if (!PyTuple_Check(args)) 211 | return -1; 212 | Py_ssize_t n_filters = PyTuple_Size(args); 213 | struct cascade_description* descriptions = malloc(n_filters * sizeof(struct cascade_description)); 214 | unsigned stride = 1; 215 | double lag = 0.0; 216 | // double cascading_rate = 1.0; do the whole real-units shebang with a higher-level description structure 217 | for (Py_ssize_t i = 0; i < n_filters; i += 1) { 218 | PyObject* item = PyTuple_GetItem(args, i); 219 | if (item == NULL) { 220 | PyErr_SetString(PyExc_TypeError, "encountered a null description"); 221 | return -1; 222 | } 223 | struct description* desc_item = (struct description*)item; 224 | if (PyObject_TypeCheck(item, &description_type)) { // can I just access it straight? 225 | descriptions[i].window = desc_item->window; 226 | descriptions[i].portion = desc_item->portion; 227 | descriptions[i].subsample_rate = desc_item->subsample_rate; 228 | descriptions[i].interpolation = (struct interpolation) { 229 | .target_quantile = desc_item->quantile, 230 | .alpha = desc_item->alpha, 231 | .beta = desc_item->beta }; 232 | lag += 0.5 * (double)(desc_item->window * stride); // buildup/cascade/waterfall of lags 233 | stride *= desc_item->subsample_rate; 234 | } 235 | //switch (item->ob_type) { 236 | // case &high_pass_type: { 237 | if (PyObject_TypeCheck(item, &high_pass_type)) { // allows for subtypes as well, as opposed to item->ob_type equality checks 238 | descriptions[i].mode = HIGH_PASS; 239 | } else if (PyObject_TypeCheck(item, &low_pass_type)) { 240 | descriptions[i].mode = LOW_PASS; 241 | } else { 242 | PyErr_SetString(PyExc_TypeError, "one of the descriptions is neither a HighPass nor a LowPass"); 243 | return -1; 244 | } 245 | } 246 | self->filters = create_filter_pipeline((unsigned)n_filters, descriptions); 247 | if (self->filters == NULL) { 248 | PyErr_SetString(PyExc_ValueError, "invalid descriptions passed to pipeline constructor"); 249 | return -1; 250 | } 251 | self->stride = stride; 252 | self->lag = lag; 253 | return 0; 254 | } 255 | 256 | // there is also .tp_finalize that is better suited to deconstructors that perform complex interactions with Python objects 257 | static void pipeline_dealloc(struct pipeline* self) { 258 | destroy_filter_pipeline(self->filters); 259 | Py_TYPE(self)->tp_free(self); // why is the TYPE macro needed? in case of multiple inheritance (composition)? 260 | } 261 | 262 | static PyObject* pipeline_repr(struct pipeline* self) { 263 | static const char* format = "FilterPipeline(<%d cascades>)"; // each cascade consits of a filter and a subsample 264 | return PyUnicode_FromFormat(format, self->filters->n_filters); 265 | } 266 | 267 | // use the fastcall convention, because why the heck not (Python 3.7+). take in a constant array of PyObject pointers. 268 | /* 269 | Currently I accept a scalar or an NumPy array. In the future, I would like to consume a boolean `inplace` parameter 270 | for the latter instance to allow me to modify the array in place without creating a new one. 271 | 272 | I should consider checking the Python version with macros, and falling back to a traditional-style (not fastcall) 273 | method definition for versions prior to 3.7. 274 | */ 275 | static PyObject* pipeline_feed(struct pipeline* self, PyObject* const* args, Py_ssize_t n_args) { 276 | if (n_args != 1) { 277 | PyErr_SetString(PyExc_NotImplementedError, "pipeline.feed(*) only accepts a singular argument"); // ValueError? 278 | return NULL; 279 | } 280 | if (PyFloat_Check(args[0]) || PyLong_Check(args[0])) { 281 | double input = PyFloat_AsDouble(args[0]); // implicitly converts integers and other related types 282 | double output = feed_filter_pipeline(self->filters, input); 283 | return PyFloat_FromDouble(output); 284 | } 285 | if (PyArray_Check(args[0])) { 286 | PyArrayObject* array = (PyArrayObject*)args[0]; 287 | if (PyArray_NDIM(array) > 1) { 288 | PyErr_SetString(PyExc_ValueError, "array can't have multiple dimensions"); 289 | return NULL; 290 | } 291 | //PyArrayObject* output_array = PyArray_NewLikeArray(array, NPY_KEEPORDER, NULL, 1); 292 | if (PyArray_Size((PyObject*)array) == 0) { 293 | return (PyObject*)array; // nothing to do 294 | } 295 | PyArrayObject* array_operands[2]; 296 | array_operands[0] = array; 297 | array_operands[1] = NULL; // second operand will be designated as the output, and allocated automatically by the iterator 298 | npy_uint32 op_flags[2]; 299 | op_flags[0] = NPY_ITER_READONLY; 300 | op_flags[1] = NPY_ITER_WRITEONLY | NPY_ITER_ALLOCATE; 301 | PyArray_Descr* op_desc[2]; 302 | op_desc[0] = PyArray_DescrFromType(NPY_DOUBLE); 303 | op_desc[1] = PyArray_DescrFromType(NPY_DOUBLE); // cast to double and output double 304 | NpyIter* iterator = NpyIter_MultiNew(2, array_operands, 305 | // no seperate external "inner loop", as we treat it all like a flat array. 306 | // is there any impact on efficiency for our use-case, to keep advancing the iterator for each element? 307 | NPY_ITER_REFS_OK|NPY_ITER_BUFFERED, // buffered to allow casting on the fly 308 | // is KEEPORDER the right thing here (or significant), when I treat the array as a 1D ordered sequence? 309 | NPY_KEEPORDER, NPY_SAME_KIND_CASTING, op_flags, op_desc); 310 | // for NpyIter_New (not the above), the final `NULL` is for an error-message output argument 311 | Py_DECREF(op_desc[0]); 312 | Py_DECREF(op_desc[1]); 313 | if (iterator == NULL) { 314 | PyErr_SetString(PyExc_ValueError, "could not initialize an iterator on the array"); 315 | return NULL; 316 | } 317 | NpyIter_IterNextFunc* iter_next = NpyIter_GetIterNext(iterator, NULL); 318 | if (iter_next == NULL) { 319 | NpyIter_Deallocate(iterator); 320 | PyErr_SetString(PyExc_ValueError, "could not initialize the iterator `next function` on the array"); 321 | return NULL; 322 | } 323 | double** data = (double**)NpyIter_GetDataPtrArray(iterator); 324 | do { 325 | double input = *data[0]; 326 | double* output = data[1]; 327 | // interspersed with NaNs to maintain harmony and consistency with the general API 328 | *output = feed_filter_pipeline(self->filters, input); 329 | } while (iter_next(iterator)); 330 | PyArrayObject* output_array = NpyIter_GetOperandArray(iterator)[1]; 331 | Py_INCREF(output_array); 332 | // only call this after incrementing its output's reference count 333 | if (NpyIter_Deallocate(iterator) != NPY_SUCCEED) { 334 | Py_DECREF(output_array); 335 | return NULL; 336 | } 337 | return (PyObject*)output_array; 338 | } 339 | // numeric lists are not supported yet. at this point, just do generators and comprehensions. 340 | // no extra performance benefits would be afforded. 341 | PyErr_SetString(PyExc_TypeError, "please pass a number or unidimensional np.array to pipeline.feed(*)"); 342 | return NULL; 343 | } 344 | 345 | static struct PyMethodDef pipeline_methods[] = { 346 | {"feed", (PyCFunction)pipeline_feed, METH_FASTCALL, // not truly a PyCFunction, due to METH_FASTCALL ...? 347 | "Feed a value, or a series thereof (array, list, generator,) into the filter pipeline."}, 348 | {NULL, NULL, 0, NULL} // sentinel 349 | }; 350 | 351 | static PyTypeObject pipeline_type = { 352 | PyVarObject_HEAD_INIT(NULL, 0) 353 | .tp_name = "triton.Pipeline", 354 | .tp_doc = "A filter pipeline.", 355 | .tp_basicsize = sizeof(struct pipeline), 356 | .tp_itemsize = 0, // for variably sized objects 357 | .tp_flags = Py_TPFLAGS_DEFAULT, 358 | .tp_methods = pipeline_methods, 359 | .tp_members = pipeline_members, 360 | .tp_init = (initproc)pipeline_init, 361 | .tp_new = pipeline_new, 362 | .tp_del = (destructor)pipeline_dealloc, 363 | .tp_repr = (reprfunc)pipeline_repr, 364 | }; 365 | 366 | bool init_pipeline(PyObject* self) { 367 | if (PyType_Ready(&pipeline_type) < 0) 368 | return false; 369 | Py_INCREF(&pipeline_type); 370 | if (PyModule_AddObject(self, "Pipeline", (PyObject*) &pipeline_type) < 0) { 371 | Py_DECREF(&pipeline_type); 372 | return false; 373 | } 374 | return true; 375 | } 376 | 377 | 378 | // unused in `module` structure below. things can be added dynamically upon initialization 379 | static struct PyMethodDef methods[] = { 380 | {NULL, NULL, 0, NULL} // sentinel 381 | }; 382 | 383 | static struct PyModuleDef module = { 384 | PyModuleDef_HEAD_INIT, 385 | .m_name = "triton", // is this triton ir rolling_quantiles.triton ? 386 | .m_doc = "The blazing-fast filter implementation.", // docs 387 | .m_size = 0, // memory required for global state. we don't use any. 388 | .m_methods = methods, 389 | }; 390 | 391 | 392 | PyMODINIT_FUNC PyInit_triton(void) { 393 | PyObject* self = PyModule_Create(&module); 394 | import_array(); 395 | static bool (*type_initializers[])(PyObject*) = { // array of function pointers 396 | init_description, init_high_pass, init_low_pass, init_pipeline, NULL 397 | }; 398 | bool (**init)(PyObject*) = &type_initializers[0]; 399 | while (*init != NULL) { 400 | if (!(*init)(self)) { 401 | Py_DECREF(self); 402 | return NULL; 403 | } 404 | ++init; 405 | } 406 | return self; 407 | } 408 | -------------------------------------------------------------------------------- /src/quantile.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 Myrl Marmarelis 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #include "quantile.h" 18 | #include "heap.h" 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | const struct interpolation NO_INTERPOLATION = { .target_quantile = NAN }; 27 | 28 | struct rolling_quantile create_rolling_quantile_monitor(unsigned window, unsigned portion, struct interpolation interp) { 29 | //if (window % 2 == 0) this only makes sense for the median special case. 30 | // return NULL; 31 | struct ring_buffer* queue = create_queue(window); 32 | struct rolling_quantile monitor = { 33 | .queue = queue, 34 | .left_heap = create_heap(MAX_HEAP, portion + 1, queue), 35 | .right_heap = create_heap(MIN_HEAP, window - portion, queue), // - 1 and then + 1 36 | .current_value = (struct heap_element) {.member = NAN, .loc_in_buffer = NULL}, // to keep track of queue position 37 | .window = window, 38 | .portion = portion, 39 | .count = 0, 40 | .interpolation = interp, 41 | }; 42 | return monitor; 43 | } 44 | 45 | void destroy_rolling_quantile_monitor(struct rolling_quantile* monitor) { 46 | destroy_heap(monitor->left_heap); 47 | destroy_heap(monitor->right_heap); 48 | destroy_queue(monitor->queue); 49 | } 50 | 51 | static bool is_between_zero_and_one(double val) { // null and unit 52 | return (val >= 0.0) && (val <= 1.0); 53 | } 54 | 55 | bool validate_interpolation(struct interpolation interp) { 56 | return isnan(interp.target_quantile) || ( 57 | is_between_zero_and_one(interp.target_quantile) && 58 | is_between_zero_and_one(interp.alpha) && 59 | is_between_zero_and_one(interp.beta)); 60 | } 61 | 62 | double compute_interpolation_target(unsigned window, struct interpolation interp) { 63 | double real_portion = (double)window * interp.target_quantile; 64 | double correction = interp.alpha + 65 | interp.target_quantile*(1.0 - interp.alpha - interp.beta); 66 | return real_portion + correction; 67 | } 68 | 69 | static double interpolate_current_rolling_quantile(struct rolling_quantile* monitor) { 70 | struct interpolation interp = monitor->interpolation; // is copy worth the locality? 71 | double target = compute_interpolation_target(monitor->window, interp); 72 | double gamma = target - floor(target); // must be between 0 and 1, but avoid checking for the sake of performance 73 | int index = (int)floor(target) - 1; // subtract one because `portion` refers to the number of items in the left heap (but `target_portion` does *not*) 74 | int portion = (int)monitor->portion; 75 | double current = monitor->current_value.member; 76 | if (index == portion) { 77 | if (monitor->right_heap->n_entries == 0) 78 | return current; 79 | double next = view_front_of_heap(monitor->right_heap); 80 | return (1.0-gamma)*current + gamma*next; 81 | } else if (index == (portion-1)) { 82 | if (monitor->left_heap->n_entries == 0) 83 | return current; 84 | double previous = view_front_of_heap(monitor->left_heap); 85 | return (1.0-gamma)*previous + gamma*current; 86 | } 87 | return NAN; // monitor.portion is uncalibrated/corrupted 88 | } 89 | 90 | /* 91 | Game plan. 92 | We shall first expel the stale entry, then add the new entry to its rightful receptacle based on its ordering wrt the current value. 93 | If a NaN is added, we will simply count it as a cycle without a new observation: old will be expelled with no replenishing. 94 | *Do not* contaminate the heaps with NaNs. That may cause their rebalancing to spiral out of control. 95 | Flushing. If the whole window empties, effectively reset the filter and revert `current_value` to its initial state. 96 | */ 97 | double update_rolling_quantile(struct rolling_quantile* monitor, double next_entry) { 98 | //unsigned left_entries = monitor->left_heap->n_entries; 99 | unsigned right_entries = monitor->right_heap->n_entries; 100 | //unsigned total_entries = left_entries + right_entries + 1; 101 | // we control the advancement ourselves, since it must happen exactly once per call to this method 102 | // this makes life much easier than engineering an overly clever ring-buffer interface 103 | advance_ring_buffer(monitor->queue); 104 | if (isnan(monitor->current_value.member)) { // total_entries will be 1 regardless of whether current_value has anything in it. we want to be careful, since NaNs will also signal missing values coming in 105 | if (isnan(next_entry)) 106 | return NAN; 107 | monitor->current_value.member = next_entry; 108 | register_in_queue(monitor->queue, &monitor->current_value); 109 | monitor->count += 1; 110 | return next_entry; 111 | } 112 | int expired_in_heap = expire_stale_entry_in_queue(monitor->queue, 2, monitor->left_heap, monitor->right_heap); 113 | if (expired_in_heap == 0) { // expired, but did not belong to a heap 114 | if (monitor->queue->n_entries == 0) { // there do not exist other entries 115 | // basically reset and go again 116 | monitor->current_value.member = NAN; 117 | return update_rolling_quantile(monitor, next_entry); // a delicate corner case, looping us back to the top. tread carefully 118 | } 119 | struct heap* some_heap = (right_entries > 0)? monitor->right_heap : monitor->left_heap; // pick arbitrarily 120 | remove_front_element_from_heap(some_heap, &monitor->current_value); 121 | } // else if (expired_in_heap == -1) { ... } // there was nothing to expire 122 | if (!isnan(next_entry)) { 123 | struct heap* heap_for_next = (next_entry > monitor->current_value.member)? monitor->right_heap : monitor->left_heap; 124 | struct heap_element* next_elem = add_value_to_heap(heap_for_next, next_entry); 125 | if (next_elem == NULL) // BY DESIGN SHOULD NEVER HAPPEN 126 | printf("TRIED TO ADD TO A FULL HEAP\n"); 127 | register_in_queue(monitor->queue, next_elem); 128 | } 129 | monitor->count += 1; 130 | rebalance_rolling_quantile(monitor); // should run a provably deterministic number of times (once?) 131 | if (!isnan(monitor->interpolation.target_quantile)) 132 | return interpolate_current_rolling_quantile(monitor); 133 | return monitor->current_value.member; 134 | } 135 | 136 | int rebalance_rolling_quantile(struct rolling_quantile* monitor) { 137 | unsigned left_entries = monitor->left_heap->n_entries; 138 | unsigned right_entries = monitor->right_heap->n_entries; 139 | unsigned total_entries = left_entries + right_entries + 1; 140 | unsigned left_target = (monitor->portion * total_entries) / monitor->window; // builds up gradually when the pipeline is not yet saturated 141 | if (left_entries == left_target) 142 | return 0; // if-clauses with lone return statements don't need brackets in my book 143 | struct heap* overdue_heap = (left_entries < left_target)? monitor->right_heap : monitor->left_heap; 144 | struct heap_element holdover = monitor->current_value; 145 | remove_front_element_from_heap(overdue_heap, &monitor->current_value); // take from the correct heap to restore balance. expelled element is transferred into our current slot 146 | struct heap* other_heap = (overdue_heap == monitor->right_heap)? monitor->left_heap : monitor->right_heap; // is it worth avoiding two separate branches of slightly redundant code? 147 | if (!isnan(holdover.member)) { 148 | // this part does not rely on the actual address of `holdover`/`current_value`, thankfully 149 | add_element_to_heap(other_heap, holdover); // the method knows that `*holdover.loc_in_buffer` is stale after copying 150 | } 151 | return rebalance_rolling_quantile(monitor) + 1; // is non-tail-call recursion *always* dangerous? each round performs one set of "remove and add" 152 | } 153 | 154 | /* 155 | Consists of various sanity checks and tests on integrity. 156 | */ 157 | bool verify_monitor(struct rolling_quantile* monitor) { 158 | double left = view_front_of_heap(monitor->left_heap); 159 | if (!isnan(left) && (left > monitor->current_value.member)) 160 | return false; 161 | double right = view_front_of_heap(monitor->right_heap); 162 | if (!isnan(right) && (right < monitor->current_value.member)) 163 | return false; 164 | return verify_heap(monitor->left_heap) && verify_heap(monitor->right_heap); 165 | } 166 | -------------------------------------------------------------------------------- /src/quantile.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 Myrl Marmarelis 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #ifndef QUANTILE_H 18 | #define QUANTILE_H 19 | 20 | #include "heap.h" 21 | 22 | #include 23 | 24 | /* 25 | Composable (in pipelines/chains) rolling quantiles of arbitrary time scales. 26 | */ 27 | 28 | /* 29 | Optional interpolation with (alpha, beta) parameters as post-processing to refine 30 | the estimate. See the following: 31 | https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample 32 | https://github.com/scipy/scipy/blob/v1.6.1/scipy/stats/mstats_basic.py#L2607-L2732 33 | 34 | We assume that `target_quantile` and `monitor.portion` have been set such 35 | that they are in agreement with one another. 36 | */ 37 | struct interpolation { 38 | double target_quantile; // NaN if no interpolation is to be performed 39 | double alpha; 40 | double beta; 41 | }; 42 | 43 | extern const struct interpolation NO_INTERPOLATION; 44 | 45 | // Can't hide this structure's implementation in quantile.c because we want to be able to handle it by value. Comprise other structures of it without having many layers of indirection. 46 | struct rolling_quantile { 47 | struct heap_element current_value; 48 | unsigned window; 49 | unsigned portion; 50 | struct ring_buffer* queue; 51 | struct heap* left_heap; 52 | struct heap* right_heap; 53 | unsigned count; 54 | struct interpolation interpolation; // store this optional setting without indirection. 55 | }; 56 | 57 | struct rolling_quantile create_rolling_quantile_monitor(unsigned window, unsigned portion, struct interpolation interp); // window should be an odd number. portion is how much probability mass goes to the left side, so (portion+0.5)/window gives the quantile. 58 | bool validate_interpolation(struct interpolation interp); 59 | double compute_interpolation_target(unsigned window, struct interpolation interp); 60 | double update_rolling_quantile(struct rolling_quantile* monitor, double entry); 61 | int rebalance_rolling_quantile(struct rolling_quantile* monitor); // returns the number of sifts and shifts it had to perform 62 | bool verify_monitor(struct rolling_quantile* monitor); 63 | void destroy_rolling_quantile_monitor(struct rolling_quantile* monitor); 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /src/test.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 Myrl Marmarelis 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #include "heap.h" 18 | #include "quantile.h" 19 | #include "filter.h" 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | void test_single_heap(void) { 27 | struct ring_buffer* queue = create_queue(9); 28 | struct heap* heap = create_heap(MAX_HEAP, 10, queue); 29 | struct heap_element* elem; 30 | for (double i = 1.0; i < 15.0; i += 1.0) { 31 | elem = add_value_to_heap(heap, i); 32 | expire_stale_entry_in_queue(queue, 1, heap); 33 | register_in_queue(queue, elem); 34 | } 35 | struct heap_element output; 36 | for (unsigned i = 0; i < 10; i += 1) { 37 | remove_front_element_from_heap(heap, &output); 38 | printf("%f\n", output.member); 39 | } 40 | } 41 | 42 | void test_multiple_heaps(void) { 43 | struct ring_buffer* queue = create_queue(9); 44 | struct heap* heap1 = create_heap(MAX_HEAP, 10, queue); 45 | struct heap* heap2 = create_heap(MAX_HEAP, 10, queue); 46 | struct heap* heap = heap1; 47 | for (double i = 1.0; i < 50.0; i += 1.0) { 48 | heap = heap==heap1? heap2 : heap1; 49 | struct heap_element* elem = add_value_to_heap(heap, i); 50 | expire_stale_entry_in_queue(queue, 2, heap1, heap2); 51 | register_in_queue(queue, elem); 52 | } 53 | struct heap_element output; 54 | for (unsigned i = 0; i < 10; i += 1) { 55 | remove_front_element_from_heap(heap, &output); 56 | printf("%f\n", output.member); 57 | } 58 | } 59 | 60 | double generate_random_value(void) { 61 | return (double)rand() / (double)RAND_MAX; 62 | } 63 | 64 | void test_quantile(void) { 65 | printf("Testing...\n"); 66 | struct rolling_quantile monitor = create_rolling_quantile_monitor(5, 2, NO_INTERPOLATION); 67 | double test_entries[] = {4.0, 2.0, 3.0, 2.5, 4.5, 3.5, 2.7, 3.9, 3.8, 3.1}; 68 | unsigned test_size = sizeof(test_entries) / sizeof(double); 69 | for (unsigned i = 0; i < test_size; i += 1) { 70 | double quantile = update_rolling_quantile(&monitor, test_entries[i]); 71 | printf("%f\n", quantile); 72 | } 73 | } 74 | 75 | void stress_test_quantile_for_correctness(unsigned size, unsigned n_iterations) { 76 | printf("Stress-testing...\n"); 77 | if (size % 2 == 0) size += 1; 78 | unsigned middle = (size-1)/2; 79 | struct rolling_quantile monitor = create_rolling_quantile_monitor(size, middle, NO_INTERPOLATION); 80 | double* window = malloc(size*sizeof(double)); 81 | double* buffer = malloc(size*sizeof(double)); 82 | bool* unsorted = malloc(size*sizeof(bool)); 83 | unsigned window_pos = 0; 84 | for (unsigned i = 0; i < size; i += 1) { 85 | double value = generate_random_value(); 86 | update_rolling_quantile(&monitor, value); 87 | window[i] = value; 88 | } 89 | for (unsigned t = 0; t < n_iterations; t += 1) { 90 | double value = generate_random_value(); 91 | struct timespec timespec; 92 | clock_gettime(CLOCK_REALTIME, ×pec); 93 | double begin_time = (double)timespec.tv_sec + ((double)timespec.tv_nsec / 1e9); 94 | double pred_median = update_rolling_quantile(&monitor, value); 95 | clock_gettime(CLOCK_REALTIME, ×pec); 96 | double end_time = (double)timespec.tv_sec + ((double)timespec.tv_nsec / 1e9); 97 | printf("%.3e seconds; ", end_time - begin_time); 98 | window[window_pos++] = value; 99 | if (window_pos == size) 100 | window_pos = 0; 101 | // perform selection sort now, building up our one buffer 102 | for (unsigned i = 0; i < size; i += 1) 103 | unsorted[i] = true; 104 | for (unsigned i = 0; i < size; i += 1) { 105 | double min = INFINITY; 106 | unsigned min_ind; // UNINITIALIZED 107 | for (unsigned j = 0; j < size; j += 1) { 108 | if ((window[j] <= min) && unsorted[j]) { 109 | min = window[j]; 110 | min_ind = j; 111 | } 112 | } 113 | buffer[i] = min; 114 | unsorted[min_ind] = false; 115 | } 116 | // now buffer is sorted 117 | double median = buffer[middle]; 118 | //for (unsigned i = 0; i < size; i += 1) printf(" %f ", window[i]); 119 | //for (unsigned i = 0; i < monitor->left_heap->n_entries; i += 1) printf("\n%f", monitor->left_heap->elements[i].member); 120 | //printf("\n %f\n", monitor->current_value.member); 121 | //for (unsigned i = 0; i < monitor->right_heap->n_entries; i += 1) printf("%f\n", monitor->right_heap->elements[i].member); 122 | printf("%f %f %f %d %d\n", value, pred_median, median, pred_median==median, verify_monitor(&monitor)); 123 | } 124 | } 125 | 126 | void test_pipeline(void) { 127 | struct cascade_description descriptions[] = { 128 | {.window = 5, .portion = 2, .subsample_rate = 2, 129 | .mode = LOW_PASS, .interpolation = NO_INTERPOLATION}, 130 | {.window = 3, .portion = 2, .subsample_rate = 1, 131 | .mode = HIGH_PASS, .interpolation = NO_INTERPOLATION}, 132 | }; 133 | struct filter_pipeline* pipeline = create_filter_pipeline(2, descriptions); 134 | double test_entries[] = {4.0, 2.0, 3.0, 2.5, 1.5, 1.2, 1.7, 0.9, 0.8, 1.1, 0.1, 0.3}; 135 | unsigned test_size = sizeof(test_entries) / sizeof(double); 136 | for (unsigned i = 0; i < test_size; i += 1) { 137 | double output = feed_filter_pipeline(pipeline, test_entries[i]); 138 | printf("%f\n", output); 139 | } 140 | destroy_filter_pipeline(pipeline); 141 | } 142 | 143 | void test_interpolating_pipeline(void) { 144 | struct cascade_description descriptions[] = { 145 | {.window = 3, .portion = 0, .subsample_rate = 1, 146 | .mode = LOW_PASS, .interpolation = { 147 | .target_quantile = 0.4, .alpha = 1.0, .beta = 1.0, 148 | }}, 149 | }; 150 | struct filter_pipeline* pipeline = create_filter_pipeline(1, descriptions); 151 | double test_entries[] = {4.0, 2.0, 3.0, 2.5, 1.5, 1.2, 1.7, 0.9, 0.8, 1.1, 0.1, 0.3}; 152 | unsigned test_size = sizeof(test_entries) / sizeof(double); 153 | for (unsigned i = 0; i < test_size; i += 1) { 154 | if (!verify_pipeline(pipeline)) { 155 | printf("INVALID PIPELINE\n"); 156 | } 157 | double output = feed_filter_pipeline(pipeline, test_entries[i]); 158 | printf("%f\n", output); 159 | } 160 | destroy_filter_pipeline(pipeline); 161 | } 162 | 163 | int main(void) { 164 | test_quantile(); 165 | stress_test_quantile_for_correctness(3001, 10000); 166 | //test_interpolating_pipeline(); 167 | } 168 | --------------------------------------------------------------------------------