├── .flake8
├── .dockerignore
├── .gitignore
├── .gitattributes
├── .vscode
    ├── settings.json
    └── tasks.json
├── Dockerfile
├── rapidhrv
    ├── __init__.py
    ├── visualization.py
    ├── data.py
    ├── preprocessing.py
    └── analysis.py
├── .github
    ├── actions
    │   └── setup
    │   │   └── action.yaml
    └── workflows
    │   ├── verify.yaml
    │   └── publish.yml
├── tests
    └── test_rapidhrv.py
├── LICENSE
├── pyproject.toml
├── README.md
└── resources
    └── tutorial.ipynb


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = E203
3 | max-line-length = 99
4 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | *.
2 | Dockerfile
3 | README.md
4 | LICENSE
5 | notebooks/


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea/
 2 | __pycache__/
 3 | .ipynb_checkpoints/
 4 | .mypy_cache/
 5 | .pytest_cache/
 6 | dist/
 7 | 
 8 | *.hdf5
 9 | *.csv
10 | .coverage
11 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.npy filter=lfs diff=lfs merge=lfs -text
2 | *.hdf5 filter=lfs diff=lfs merge=lfs -text
3 | 
4 | *.ipynb filter=nbstripout
5 | *.zpln filter=nbstripout
6 | *.ipynb diff=ipynb
7 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.formatting.provider": "black",
3 |     "python.testing.pytestArgs": [
4 |         "tests"
5 |     ],
6 |     "python.testing.unittestEnabled": false,
7 |     "python.testing.pytestEnabled": true
8 | }


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9
 2 | 
 3 | RUN pip install "poetry==1.1.7"
 4 | RUN poetry config virtualenvs.create false
 5 | 
 6 | WORKDIR /app
 7 | COPY poetry.lock pyproject.toml /app/
 8 | 
 9 | RUN poetry install --no-interaction --no-ansi
10 | 
11 | COPY . /app


--------------------------------------------------------------------------------
/rapidhrv/__init__.py:
--------------------------------------------------------------------------------
 1 | from .analysis import analyze
 2 | from .data import OutlierDetectionSettings, Signal, get_example_data
 3 | from .preprocessing import preprocess
 4 | from .visualization import visualize
 5 | 
 6 | __all__ = (
 7 |     "analyze",
 8 |     "OutlierDetectionSettings",
 9 |     "Signal",
10 |     "get_example_data",
11 |     "preprocess",
12 |     "visualize",
13 | )
14 | 


--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // See https://go.microsoft.com/fwlink/?LinkId=733558
 3 |     // for the documentation about the tasks.json format
 4 |     "version": "2.0.0",
 5 |     "tasks": [
 6 |         {
 7 |             "label": "verify",
 8 |             "type": "shell",
 9 |             "command": "poetry run poe test",
10 |             "problemMatcher": []
11 |         }
12 |     ]
13 | }


--------------------------------------------------------------------------------
/.github/actions/setup/action.yaml:
--------------------------------------------------------------------------------
 1 | name: Setup RapidHRV
 2 | description: Setup RapidHRV
 3 | runs:
 4 |   using: "composite"
 5 |   steps:
 6 |     - name: Setup Python
 7 |       uses: actions/setup-python@v2
 8 |       with:
 9 |         python-version: "3.12"
10 |     - name: Install Poetry
11 |       run: pip install poetry
12 |       shell: bash
13 |     - name: Install Dependencies in Virtual Environment
14 |       run: poetry install
15 |       shell: bash
16 | 


--------------------------------------------------------------------------------
/.github/workflows/verify.yaml:
--------------------------------------------------------------------------------
 1 | name: verify
 2 | on: [push, pull_request]
 3 | jobs:
 4 |   test:
 5 |     runs-on: ubuntu-latest
 6 |     steps:
 7 |       - uses: actions/checkout@v2
 8 |       - uses: ./.github/actions/setup
 9 |       - name: Run Tests
10 |         run: poetry run poe test
11 |   build:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v2
15 |       - uses: ./.github/actions/setup
16 |       - name: Build project
17 |         run: poetry build
18 | 


--------------------------------------------------------------------------------
/tests/test_rapidhrv.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import rapidhrv as rhv
 4 | 
 5 | 
 6 | def test_pipeline():
 7 |     """Basic smoke test"""
 8 |     signal = rhv.get_example_data()
 9 |     preprocessed = rhv.preprocess(signal)
10 |     result = rhv.analyze(preprocessed)
11 |     bpm = np.nanmean(result["BPM"])
12 |     rmssd = np.nanmean(result["RMSSD"])
13 | 
14 |     assert result is not None
15 |     assert len(result) > 0
16 |     assert not np.all(np.isnan(result["BPM"]))
17 |     assert 59 < bpm < 61
18 |     assert 26 < rmssd < 28
19 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: publish
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | permissions:
 8 |   contents: read
 9 | 
10 | jobs:
11 |   pypi-publish:
12 |     runs-on: ubuntu-latest
13 |     environment:
14 |       name: pypi
15 |       url: https://pypi.org/project/rapidhrv
16 |     permissions:
17 |       id-token: write
18 |     steps:
19 |       - uses: actions/checkout@v4
20 |       - uses: ./.github/actions/setup
21 |       - name: Build project
22 |         run: poetry build
23 |       - name: Publish package distributions to PyPI
24 |         uses: pypa/gh-action-pypi-publish@release/v1
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Peter A. Kirk, Sarah N. Garfinkel, & Oliver J. Robinson.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "rapidhrv"
 3 | version = "0.2.7"
 4 | description = "A package for preprocessing, analyzing and visualizing cardiac data"
 5 | authors = [
 6 |   "Peter Kirk <p.kirk@ucl.ac.uk>",
 7 |   "Alexander Davidson Bryan <alxdb@pm.me>",
 8 | ]
 9 | license = "MIT"
10 | readme = "README.md"
11 | 
12 | [tool.poe.tasks]
13 | format = [{ cmd = "black ." }, { cmd = "isort ." }]
14 | pytest = "pytest --cov=rapidhrv ."
15 | test = [
16 |   { cmd = "black --check ." },
17 |   { cmd = "isort --check ." },
18 |   { cmd = "flake8 ." },
19 |   { cmd = "mypy ." },
20 |   { ref = "pytest" },
21 | ]
22 | 
23 | [tool.mypy]
24 | ignore_missing_imports = true
25 | 
26 | [tool.black]
27 | line-length = 99
28 | 
29 | [tool.poetry.dependencies]
30 | python = ">=3.11,<3.13"
31 | numpy = "^2.3.2"
32 | scipy = "^1.16.1"
33 | scikit-learn = "^1.7.1"
34 | pandas = "^2.3.2"
35 | jupyter = { version = "^1.0.0", optional = true }
36 | matplotlib = { version = "^3.4.2", optional = true }
37 | h5py = "^3.3.0"
38 | dash = "^3.2.0"
39 | 
40 | [tool.poetry.group.dev.dependencies]
41 | black = "^25.1.0"
42 | isort = "^6.0.1"
43 | mypy = "^1.17.1"
44 | pytest = "^8.4.1"
45 | nbstripout = "^0.8.1"
46 | flake8 = "^7.3.0"
47 | poethepoet = "^0.37.0"
48 | pytest-cov = "^6.2.1"
49 | 
50 | [tool.poetry.extras]
51 | notebooks = ["jupyter", "matplotlib"]
52 | 
53 | [build-system]
54 | requires = ["poetry-core>=1.0.0"]
55 | build-backend = "poetry.core.masonry.api"
56 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RapidHRV
 2 | 
 3 | RapidHRV is a data processing pipeline for the analysis and visualization of cardiac data.
 4 | 
 5 | Please provide credit where appropriate:
 6 | 
 7 | Kirk, P. A., Bryan, A. D., Garfinkel, S. N., & Robinson, O. J. (2022). RapidHRV: An open-source toolbox for extracting heart rate and heart rate variability. PeerJ, 10, e13147. https://doi.org/10.7717/peerj.13147
 8 | 
 9 | This library is distributed under an 
10 | [MIT License](https://raw.githubusercontent.com/peterakirk/RapidHRV/main/LICENSE)
11 | 
12 | ## Installation
13 | 
14 | ```shell
15 | pip install rapidhrv
16 | ```
17 | 
18 | ## Usage
19 | 
20 | Given a numpy array, or something convertable to it (such as a list),
21 | `rapidhrv.preprocess` can generate input suitable for analysis with
22 | `rapidhrv.analyze`, which will return a pandas dataframe containing HRV data.
23 | 
24 | ```python
25 | import numpy as np
26 | import rapidhrv as rhv
27 | 
28 | my_data = np.load("my_data.npy")  # Load data
29 | data = rhv.Signal(my_data, sample_rate=50)  # Convert to rhv Signal class
30 | preprocessed = rhv.preprocess(data)  # Preprocess: may interpolate data, check the docstring on `rapidhrv.preprocess`
31 | result = rhv.analyze(preprocessed)  # Analyze signal
32 | ```
33 | 
34 | ## Documentation
35 | 
36 | Please see the included [tutorial notebook](https://github.com/peterakirk/RapidHRV/blob/main/resources/tutorial.ipynb).
37 | 
38 | ## Development
39 | 
40 | In order to get a working development environment,
41 | please install [Poetry](https://python-poetry.org/) for your platform,
42 | and run `poetry install` to generate a virtual environment.
43 | 
44 | If you plan on making any changes to the included notebooks,
45 | please run `nbstripout --install` from within the poetry venv before committing any changes.
46 | 
47 | To run said notebooks from the environment provided by poetry,
48 | install the required dependencies with `poetry install --extras notebooks`.
49 | 
50 | 


--------------------------------------------------------------------------------
/rapidhrv/visualization.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List
  2 | 
  3 | import dash
  4 | import pandas as pd
  5 | import plotly.graph_objects as go
  6 | from dash import dcc, html
  7 | from dash.dependencies import Input, Output
  8 | 
  9 | import rapidhrv as rhv
 10 | 
 11 | # Define the type for an option
 12 | OptionType = Dict[str, str]
 13 | 
 14 | 
 15 | def results_graph(non_outliers, outliers, selected_column):
 16 |     fig = go.Figure(
 17 |         [
 18 |             go.Scatter(
 19 |                 x=non_outliers["Time"],
 20 |                 y=non_outliers[selected_column],
 21 |                 name=selected_column,
 22 |                 mode="lines+markers",
 23 |             ),
 24 |             go.Scatter(
 25 |                 x=outliers["Time"],
 26 |                 y=outliers[selected_column],
 27 |                 name="Outliers",
 28 |                 mode="markers",
 29 |             ),
 30 |         ]
 31 |     )
 32 | 
 33 |     fig.update_layout(template="plotly_white", clickmode="event+select")
 34 |     fig.update_traces(marker_size=10)
 35 | 
 36 |     return fig
 37 | 
 38 | 
 39 | def window_graph(window_data):
 40 |     signal, peaks, properties = window_data
 41 |     fig = go.Figure(
 42 |         [
 43 |             go.Scatter(y=signal),
 44 |             go.Scatter(
 45 |                 x=peaks,
 46 |                 y=properties["peak_heights"],
 47 |                 mode="markers",
 48 |                 marker=dict(line_color="darkorange", symbol="x-thin", size=24, line_width=1.5),
 49 |             ),
 50 |         ],
 51 |     )
 52 |     fig.update_layout(showlegend=False, template="plotly_white")
 53 | 
 54 |     return fig
 55 | 
 56 | 
 57 | def visualize(analyzed: pd.DataFrame, debug=False):
 58 |     app = dash.Dash()
 59 | 
 60 |     options: List[OptionType] = [{"label": col, "value": col} for col in rhv.analysis.DATA_COLUMNS]
 61 | 
 62 |     non_outlier_data = analyzed.loc[~analyzed["Outlier"]]
 63 |     outlier_data = analyzed.loc[analyzed["Outlier"]]
 64 | 
 65 |     selected_column = "BPM"
 66 |     results = results_graph(non_outlier_data, outlier_data, selected_column)
 67 | 
 68 |     app.layout = html.Div(
 69 |         [
 70 |             dcc.Dropdown(
 71 |                 id="column-dropdown",
 72 |                 options=options,  # type: ignore
 73 |                 value=selected_column,
 74 |                 clearable=False,
 75 |             ),
 76 |             dcc.Graph(id="results-graph", figure=results),
 77 |             html.Div(id="window-container"),
 78 |         ]
 79 |     )
 80 | 
 81 |     @app.callback(Output("results-graph", "figure"), Input("column-dropdown", "value"))
 82 |     def update_results_graph(column):
 83 |         return results_graph(non_outlier_data, outlier_data, column)
 84 | 
 85 |     @app.callback(Output("window-container", "children"), Input("results-graph", "clickData"))
 86 |     def update_window_graph(click_data):
 87 |         if click_data is None:
 88 |             return []
 89 | 
 90 |         selected_point = click_data["points"][0]
 91 | 
 92 |         if selected_point["curveNumber"] == 0:
 93 |             window_data = non_outlier_data.iloc[selected_point["pointNumber"]]["Window"]
 94 |         elif selected_point["curveNumber"] == 1:
 95 |             window_data = outlier_data.iloc[selected_point["pointNumber"]]["Window"]
 96 | 
 97 |         return [dcc.Graph(figure=window_graph(window_data))]
 98 | 
 99 |     app.run(debug=debug, dev_tools_silence_routes_logging=True)
100 | 
101 | 
102 | if __name__ == "__main__":
103 |     signal = rhv.data.get_example_data()
104 |     preprocessed = rhv.preprocess(signal)
105 |     analyzed = rhv.analyze(preprocessed)
106 |     visualize(analyzed, debug=True)
107 | 


--------------------------------------------------------------------------------
/rapidhrv/data.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import dataclasses
  4 | 
  5 | import h5py
  6 | import numpy as np
  7 | import pandas as pd
  8 | 
  9 | 
 10 | @dataclasses.dataclass
 11 | class OutlierDetectionSettings:
 12 |     """Settings for outlier detection.
 13 | 
 14 |     Attributes
 15 |     ----------
 16 |     bpm_range:
 17 |         Range of acceptable bpm values.
 18 |     rmssd_range:
 19 |         Range of acceptable rmssd values.
 20 |     mad_threshold:
 21 |         Threshold for peak heights and prominences to register as outliers.
 22 |         (in median absolute deviation units)
 23 |     ibi_mad_threshold:
 24 |         Threshold for peak intervals to register as an outliers.
 25 |         (in median absolute deviation units)
 26 |     min_total_peak_distance:
 27 |         Acceptable ratio between total width to distance between first and last peaks in window.
 28 |     """
 29 | 
 30 |     bpm_range: tuple[int, int]
 31 |     rmssd_range: tuple[int, int]
 32 |     mad_threshold: int
 33 |     ibi_mad_threshold: int
 34 |     min_total_peak_distance: float = 0.5
 35 | 
 36 |     @classmethod
 37 |     def from_method(cls, method: str) -> OutlierDetectionSettings:
 38 |         """Generate settings from method name.
 39 | 
 40 |         Method names are: "liberal", "moderate", "conservative".
 41 |         "conservative" is the most stringent, "liberal" is the least and "moderate" is in-between.
 42 |         """
 43 |         if method == "liberal":
 44 |             return OutlierDetectionSettings(
 45 |                 bpm_range=(20, 200), rmssd_range=(0, 300), mad_threshold=7, ibi_mad_threshold=7
 46 |             )
 47 |         elif method == "moderate":
 48 |             return OutlierDetectionSettings(
 49 |                 bpm_range=(30, 190), rmssd_range=(5, 262), mad_threshold=5, ibi_mad_threshold=5
 50 |             )
 51 |         elif method == "conservative":
 52 |             return OutlierDetectionSettings(
 53 |                 bpm_range=(40, 180), rmssd_range=(10, 200), mad_threshold=4, ibi_mad_threshold=4
 54 |             )
 55 |         else:
 56 |             raise RuntimeError(f"Invalid outlier detection method: {method}.")
 57 | 
 58 | 
 59 | @dataclasses.dataclass
 60 | class Signal:
 61 |     """Raw signal with associated metadata.
 62 | 
 63 |     Attributes
 64 |     ----------
 65 |     data:
 66 |         Raw signal data.
 67 |     sample_rate:
 68 |         Signal rate in Hertz of raw signal.
 69 |     """
 70 | 
 71 |     data: np.ndarray
 72 |     sample_rate: int
 73 | 
 74 |     def __post_init__(self):
 75 |         self.data = self.data if isinstance(self.data, np.ndarray) else np.array(self.data)
 76 | 
 77 |     def save(self, filename: str) -> None:
 78 |         """Save as filename.hdf5"""
 79 |         with h5py.File(filename, "w") as f:
 80 |             f["data"] = self.data
 81 |             f.attrs["sample_rate"] = self.sample_rate
 82 | 
 83 |     @classmethod
 84 |     def load(cls, filename: str) -> Signal:
 85 |         """Load from filename.hdf5"""
 86 |         with h5py.File(filename, "r") as f:
 87 |             return cls(
 88 |                 data=f["data"],
 89 |                 sample_rate=int(f.attrs["sample_rate"]),
 90 |             )
 91 | 
 92 |     @classmethod
 93 |     def from_csv(cls, filename: str, sample_rate: int):
 94 |         data = pd.read_csv(filename).to_numpy()[0]
 95 |         return cls(data=data, sample_rate=sample_rate)
 96 | 
 97 |     @classmethod
 98 |     def from_txt(cls, filename: str, sample_rate: int):
 99 |         data = np.loadtxt(filename)
100 |         return cls(data=data, sample_rate=sample_rate)
101 | 
102 | 
103 | def get_example_data() -> Signal:
104 |     """Function to get example data from `OSF <https://osf.io>`
105 | 
106 |     Returns
107 |     -------
108 |     array_like
109 |         example data
110 |     """
111 |     return Signal.from_csv("https://osf.io/wqnjh/download", sample_rate=20)
112 | 


--------------------------------------------------------------------------------
/rapidhrv/preprocessing.py:
--------------------------------------------------------------------------------
  1 | import dataclasses
  2 | from typing import Literal, Optional
  3 | 
  4 | import numpy as np
  5 | import scipy.interpolate
  6 | import scipy.signal
  7 | 
  8 | from .data import Signal
  9 | 
 10 | 
 11 | def cubic_spline_interpolation(signal: Signal, resample_rate: int) -> Signal:
 12 |     if resample_rate % signal.sample_rate != 0:
 13 |         raise RuntimeError(
 14 |             f"Cannot resample from {signal.sample_rate=}Hz to {resample_rate=}Hz: "
 15 |             f"{resample_rate % signal.sample_rate=} must be zero."
 16 |         )
 17 | 
 18 |     sample_ratio = resample_rate / signal.sample_rate
 19 |     result_size = len(signal.data) * sample_ratio
 20 |     b_spline = scipy.interpolate.make_interp_spline(
 21 |         np.arange(0, result_size, sample_ratio), signal.data
 22 |     )
 23 |     return Signal(data=b_spline(np.arange(0, result_size)), sample_rate=resample_rate)
 24 | 
 25 | 
 26 | def butterworth_filter(
 27 |     signal: Signal,
 28 |     cutoff_freq: float,
 29 |     filter_type: Literal["highpass", "lowpass"],
 30 | ) -> Signal:
 31 |     nyquist_freq = signal.sample_rate / 2
 32 |     sos = scipy.signal.butter(
 33 |         N=5, Wn=(cutoff_freq / nyquist_freq), btype=filter_type, output="sos"
 34 |     )
 35 |     return dataclasses.replace(signal, data=scipy.signal.sosfiltfilt(sos, signal.data))
 36 | 
 37 | 
 38 | def sg_filter(signal: Signal, sg_settings: tuple[int, int]) -> Signal:
 39 |     poly_order, smoothing_window_ms = sg_settings
 40 |     smoothing_window = (smoothing_window_ms / 1000) * signal.sample_rate
 41 |     smoothing_window = round(smoothing_window)
 42 | 
 43 |     # smoothing_window must be odd
 44 |     if smoothing_window % 2 == 0:
 45 |         smoothing_window += 1
 46 | 
 47 |     return dataclasses.replace(
 48 |         signal, data=scipy.signal.savgol_filter(signal.data, smoothing_window, poly_order)
 49 |     )
 50 | 
 51 | 
 52 | def preprocess(
 53 |     signal: Signal,
 54 |     resample_rate: Optional[int] = 1000,
 55 |     highpass_cutoff: Optional[float] = 0.5,
 56 |     lowpass_cutoff: Optional[float] = None,
 57 |     sg_settings: Optional[tuple[int, int]] = (3, 100),
 58 | ) -> Signal:
 59 |     """Prepares cardiac data for analysis using global functions.
 60 | 
 61 |     Applies in order:
 62 |     cubic spline interpolation,
 63 |     highpass and lowpass Butterworth filters
 64 |     and Savitzky-Golay smoothing.
 65 | 
 66 |     Parameters set to None imply that aspect of the pipeline will not be applied.
 67 |     For example, the default value for `lowpass_cutoff` is None,
 68 |     which implies that by default the lowpass filter will not be applied.
 69 | 
 70 |     Parameters
 71 |     ----------
 72 |     signal : Signal
 73 |         Cardiac signal to be processed.
 74 |     resample_rate : int, default: 1000
 75 |         If greater than `signal.sample_rate`,
 76 |         will be used as the target sample rate (hertz) for cubic spline interpolation.
 77 |         Must be divisible by `signal.sample_rate`.
 78 |     highpass_cutoff : float, default: 0.5
 79 |         Butterworth highpass filter cutoff frequency in hertz.
 80 |     lowpass_cutoff : float, optional
 81 |         Butterworth lowpass filter cutoff frequency in Hertz, filter is off by default.
 82 |     sg_settings : (int, int), default: (3, 100)
 83 |         Savitzky-Golay smoothing parameters,
 84 |         where the first element of the tuple is the polynomial order
 85 |         and the second is the window size in milliseconds.
 86 | 
 87 |     Returns
 88 |     -------
 89 |     array_like
 90 |         Preprocessed signal
 91 |     """
 92 |     nans = np.isnan(signal.data)
 93 |     if np.any(nans):
 94 |         raise RuntimeError(
 95 |             "Cannot preprocess data containing NaN values. "
 96 |             f"First NaN found at index {nans.nonzero()[0][0]}."
 97 |         )
 98 | 
 99 |     if resample_rate is not None and resample_rate > signal.sample_rate:
100 |         result = cubic_spline_interpolation(signal, resample_rate)
101 |     else:
102 |         result = signal
103 | 
104 |     if highpass_cutoff is not None:
105 |         result = butterworth_filter(result, highpass_cutoff, "highpass")
106 | 
107 |     if lowpass_cutoff is not None:
108 |         result = butterworth_filter(result, lowpass_cutoff, "lowpass")
109 | 
110 |     if sg_settings:
111 |         result = sg_filter(result, sg_settings)
112 | 
113 |     return result
114 | 


--------------------------------------------------------------------------------
/resources/tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# RapidHRV Tutorial\n",
  8 |     "\n",
  9 |     "## Basic guide"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import rapidhrv as rhv"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "We have included an example timeseries. This is a class containing a 5 minute segment of simulated PPG data and a specified sampling rate of 20Hz. This can be downloaded using the function below. When using your own data with RapidHRV however, the timeseries should be loaded into RapidHRV's Signal class with the specified sampling rate, e.g. Signal.from_csv('my_data.csv', sampling_rate=100)."
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "# Download example signal\n",
 35 |     "\n",
 36 |     "signal = rhv.get_example_data()"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "Using default preprocessing, a signal will then be upsampled to 1000Hz, high-pass filtered, and smoothed. The default analysis will use 'moderate' outlier rejection constraints. This can be altered via the outlier_detection_settings argument (accepting e.g. 'liberal', 'moderate', and 'conservative'; see our manuscript for further details)."
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 3,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "# Preprocess and analyze data\n",
 53 |     "\n",
 54 |     "preprocessed = rhv.preprocess(signal)\n",
 55 |     "analyzed = rhv.analyze(preprocessed)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "The data has now been fully analyzed and be plotted using rapidhrv's visualization tool. The GUI will allow you to select HR/HRV metrics and inspect the analyzed timeseries. Individual datapoints can then be selected to view the specified windows of extraction. To access the GUI, click on the URL link outputted in the console."
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "# Visualize data\n",
 72 |     "\n",
 73 |     "rhv.visualize(analyzed)"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "## Adjusting preprocessing parameters"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "The preprocessing steps implemented with RapidhRV will suffice for many uses. However, we have provided arguments which enable the user to alter preprocessing parameters. We list some examples below."
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "# As RapidHRV upsamples to 1000Hz by default, this will need to be adjusted if your original sampling rate is \n",
 97 |     "# not a factor of 1000\n",
 98 |     "preprocessed = rhv.preprocess(signal, resample_rate=500) \n",
 99 |     "\n",
100 |     "# The high-pass filter is implemented with a cutoff of 0.5Hz by default, which can be changed with highpass_cutoff.\n",
101 |     "preprocessed = rhv.preprocess(signal, highpass_cutoff=0.05)\n",
102 |     "\n",
103 |     "# To clean high frequency noise, RapidHRV imeplements Savitzky-Golay smoothing. The polynomial (default 3rd order)\n",
104 |     "# and window (default 100ms) can be changed. E.g. 4th order polynomial and a window width of 200ms:\n",
105 |     "preprocessed = rhv.preprocess(signal, sg_settings=(4,200))\n",
106 |     "\n",
107 |     "# If you still want to apply low-pass filtering nonetheless (which is off by default), the user can specify a \n",
108 |     "# frequency in the lowpass_cutoff argument to turn this on.\n",
109 |     "preprocessed = rhv.preprocess(signal, lowpass_cutoff=50)"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {},
115 |    "source": [
116 |     "## Adjusting analysis parameters"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "While we have strived to make RapidHRV a modality-general package, tweaks in analysis parameters may yield more sensitivty/accuracy, particularly the stringency of outlier rejection. To make this more user friendly we allow the use of semantically-labelled arguments. Below we list recommendations for dealing with different modalities."
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "# For dealing with very clean ECG data, outlier rejection does not need to be too stringent. The user can thus \n",
133 |     "# instruct RapidHRV to take a 'liberal' approach (default=\"moderate\").  \n",
134 |     "analyzed = rhv.analyze(preprocessed, outlier_detection_settings=\"liberal\")\n",
135 |     "\n",
136 |     "# This should suit in most instances. However, for dealing atypical ECG signals, we have also enabled a k-means\n",
137 |     "# clustering algorithm to help delineate P, R, and T waves.\n",
138 |     "analyzed = rhv.analyze(preprocessed, outlier_detection_settings=\"liberal\", ecg_prt_clustering=True)\n",
139 |     "\n",
140 |     "# For dealing with data with moderate artifacts, RapidHRV's default settings should typically suffice. However,\n",
141 |     "# if the user wishes to have shorter/longer windows, with overlap between them, this can be specified. \n",
142 |     "# For instance, to use 5 second windows with 1 second increments (4 second overlap), this can be specified by:\n",
143 |     "analyzed = rhv.analyze(preprocessed, window_width=5, window_overlap=4)\n",
144 |     "\n",
145 |     "# When dealing with highly noisy data, such as PPG measures, more rigorous measures may be necessary. Here are\n",
146 |     "# recommended parameters for dealing with such data. These reduce the minimum amplitude for peak detection to 30%\n",
147 |     "# (from a default of 50%), increase window overlap, and apply more stringent outlier cleaning tools.\n",
148 |     "analyzed = rhv.analyze(preprocessed, outlier_detection_settings=\"conservative\", amplitude_threshold=30, window_overlap=9)"
149 |    ]
150 |   }
151 |  ],
152 |  "metadata": {
153 |   "kernelspec": {
154 |    "display_name": "Python 3",
155 |    "language": "python",
156 |    "name": "python3"
157 |   },
158 |   "language_info": {
159 |    "codemirror_mode": {
160 |     "name": "ipython",
161 |     "version": 3
162 |    },
163 |    "file_extension": ".py",
164 |    "mimetype": "text/x-python",
165 |    "name": "python",
166 |    "nbconvert_exporter": "python",
167 |    "pygments_lexer": "ipython3",
168 |    "version": "3.9.9"
169 |   }
170 |  },
171 |  "nbformat": 4,
172 |  "nbformat_minor": 1
173 | }
174 | 


--------------------------------------------------------------------------------
/rapidhrv/analysis.py:
--------------------------------------------------------------------------------
  1 | from typing import Union
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | import scipy.interpolate
  6 | import scipy.signal
  7 | import scipy.stats
  8 | import sklearn.cluster
  9 | import sklearn.preprocessing
 10 | 
 11 | from .data import OutlierDetectionSettings, Signal
 12 | 
 13 | DATA_COLUMNS = ["BPM", "RMSSD", "SDNN", "SDSD", "pNN20", "pNN50", "HF"]
 14 | DATAFRAME_COLUMNS = ["Time", *DATA_COLUMNS, "Outlier", "Window"]
 15 | 
 16 | 
 17 | def analyze(
 18 |     signal: Signal,
 19 |     window_width: int = 10,
 20 |     window_overlap: int = 0,
 21 |     ecg_prt_clustering: bool = False,
 22 |     amplitude_threshold: int = 50,
 23 |     distance_threshold: int = 250,
 24 |     n_required_peaks: int = 3,
 25 |     outlier_detection_settings: Union[str, OutlierDetectionSettings] = "moderate",
 26 | ) -> pd.DataFrame:
 27 |     """Analyzes cardiac data.
 28 | 
 29 |     Extracts BPM, RMSSD and SDNN from `input_data`.
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     signal : Signal
 34 |         Cardiac signal to be analyzed.
 35 |     window_width : int, default: 10
 36 |         Width of the sliding window in seconds.
 37 |     window_overlap: int, default: 0
 38 |         Amount of overlap between windows in seconds.
 39 |         Accepts negative values, interpreted as space between windows.
 40 |     ecg_prt_clustering: bool, default: False
 41 |         Use k-means clustering to detect P, R and T waves in the data.
 42 |         Useful for atypical morphologies (e.g. T amplitude > R amplitude).
 43 |         If enabled, `amplitude_threshold` and `distance_threshold` will be ignored.
 44 |     amplitude_threshold: int, default: 50
 45 |         Minimum signal amplitude for a peak to be registered.
 46 |         For PPG data, the recommended value is 30.
 47 |     distance_threshold: int, default: 250
 48 |         Minimum time in milliseconds since last peak for a new peak to be registered.
 49 |     n_required_peaks: int, default: 3
 50 |         Minimum number of peaks in a window required to record analysis for that window.
 51 |         Values less than three are invalid.
 52 |     outlier_detection_settings: str or OutlierDetectionSettings, default: "moderate"
 53 |         Settings for the Outlier detection algorithm.
 54 |         Accepts either an `OutlierDetectionSettings` object, or a string specifying a method.
 55 |         Refer to :class:`OutlierDetectionSettings` for details.
 56 | 
 57 |     Returns
 58 |     -------
 59 |     Dataframe containing Extracted heart data.
 60 |     """
 61 |     # Validate arguments
 62 |     outlier_detection_settings = (
 63 |         OutlierDetectionSettings.from_method(outlier_detection_settings)
 64 |         if isinstance(outlier_detection_settings, str)
 65 |         else outlier_detection_settings
 66 |     )
 67 | 
 68 |     if n_required_peaks < 3:
 69 |         raise ValueError("Parameter 'n_required_peaks' must be greater than three.")
 70 | 
 71 |     # Peak detection settings
 72 |     if ecg_prt_clustering:
 73 |         distance = 1
 74 |         prominence = 5
 75 |     else:
 76 |         distance = int((distance_threshold / 1000) * signal.sample_rate)
 77 |         prominence = amplitude_threshold
 78 | 
 79 |     # Windowing function
 80 |     results = []
 81 |     for sample_start in range(
 82 |         0, len(signal.data), (window_width - window_overlap) * signal.sample_rate
 83 |     ):
 84 |         timestamp = sample_start / signal.sample_rate
 85 | 
 86 |         segment = signal.data[sample_start : sample_start + (window_width * signal.sample_rate)]
 87 |         normalized = sklearn.preprocessing.minmax_scale(segment, (0, 100))
 88 |         peaks, properties = peak_detection(normalized, distance, prominence, ecg_prt_clustering)
 89 |         window_data = (normalized, peaks, properties)
 90 | 
 91 |         ibi = np.diff(peaks) * 1000 / signal.sample_rate
 92 |         sd = np.diff(ibi)
 93 | 
 94 |         if len(peaks) <= n_required_peaks:
 95 |             results.append([timestamp, *[np.nan] * len(DATA_COLUMNS), True, window_data])
 96 |         else:
 97 |             # Time-domain metrics
 98 |             bpm = ((len(peaks) - 1) / ((peaks[-1] - peaks[0]) / signal.sample_rate)) * 60
 99 |             rmssd = np.sqrt(np.mean(np.square(sd)))
100 |             sdnn = np.std(ibi)
101 |             sdsd = np.std(sd)  # Standard deviation of successive differences
102 |             p_nn20 = np.sum(sd > 20) / len(sd)  # Proportion of successive differences > 20ms
103 |             p_nn50 = np.sum(sd > 50) / len(sd)  # Proportion of successive differences > 50ms
104 | 
105 |             # Frequency-domain metrics
106 |             hf = frequency_domain(x=ibi, sfreq=signal.sample_rate)
107 | 
108 |             is_outlier = outlier_detection(
109 |                 peaks,
110 |                 properties,
111 |                 ibi,
112 |                 signal.sample_rate,
113 |                 window_width,
114 |                 bpm,
115 |                 rmssd,
116 |                 outlier_detection_settings,
117 |             )
118 | 
119 |             results.append(
120 |                 [timestamp, bpm, rmssd, sdnn, sdsd, p_nn20, p_nn50, hf, is_outlier, window_data]
121 |             )
122 | 
123 |     return pd.DataFrame(
124 |         results,
125 |         columns=DATAFRAME_COLUMNS,
126 |     )
127 | 
128 | 
129 | def peak_detection(
130 |     segment: np.ndarray, distance: int, prominence: int, use_clustering: bool
131 | ) -> tuple[np.ndarray, dict]:
132 |     """Returns the indexes of detected peaks and associated properties."""
133 |     peaks, properties = scipy.signal.find_peaks(
134 |         segment, distance=distance, prominence=prominence, height=0, width=0
135 |     )
136 | 
137 |     # Attempt to determine correct peaks by distinguishing the R wave from P and T waves
138 |     if len(peaks) >= 3 and use_clustering:
139 |         k_means = sklearn.cluster.KMeans(n_clusters=3).fit(
140 |             np.column_stack(
141 |                 (properties["widths"], properties["peak_heights"], properties["prominences"])
142 |             )
143 |         )
144 | 
145 |         # Use width centroids to determine correct wave (least width, most prominence)
146 |         # If the two lowest values are too close (< 5), use prominence to distinguish them
147 |         width_cen = k_means.cluster_centers_[:, 0]
148 |         labels_sort_width = np.argsort(width_cen)
149 |         if width_cen[labels_sort_width[1]] - width_cen[labels_sort_width[0]] < 5:
150 |             # Label of maximum prominence for lowest two widths
151 |             prom_cen = k_means.cluster_centers_[:, 2]
152 |             wave_label = np.argsort(prom_cen[labels_sort_width[:2]])[1]
153 |         else:
154 |             wave_label = labels_sort_width[0]
155 | 
156 |         is_wave_peak = k_means.labels_ == wave_label
157 | 
158 |         wave_peaks = peaks[is_wave_peak]
159 |         wave_props = {k: v[is_wave_peak] for k, v in properties.items()}
160 |     else:
161 |         wave_peaks = peaks
162 |         wave_props = properties
163 | 
164 |     # @PeterKirk does this need to be > 3 or >= 3?
165 |     # Also, should this potentially be done before clustering?
166 |     if len(wave_peaks) > 3:
167 |         # Approximate prominences at edges of window
168 |         base_height = segment[wave_peaks] - wave_props["prominences"]
169 |         wave_props["prominences"][0] = wave_props["peak_heights"][0] - base_height[1]
170 |         wave_props["prominences"][-1] = wave_props["peak_heights"][-1] - base_height[-2]
171 | 
172 |     return wave_peaks, wave_props
173 | 
174 | 
175 | def frequency_domain(x, sfreq: int = 5):
176 |     """This function and docstring was modified from Systole
177 |     (https://github.com/embodied-computation-group/systole)
178 |     Extracts the frequency domain features of heart rate variability.
179 |     Parameters
180 |     ----------
181 |     x : np.ndarray or list
182 |         Interval time-series (R-R, beat-to-beat...), in miliseconds.
183 |     sfreq : int
184 |         The sampling frequency (Hz).
185 |     Returns
186 |     -------
187 |     stats : :py:class:`pandas.DataFrame`
188 |         Frequency domain summary statistics.
189 |         * ``'power_hf_per'`` : High frequency power (%).
190 |     Notes
191 |     -----
192 |     The dataframe containing the summary statistics is returned in the long
193 |     format to facilitate the creation of group summary data frame that can
194 |     easily be transferred to other plotting or statistics library. You can
195 |     easily convert it into a wide format for a subject-level inline report
196 |     using the py:pandas.pivot_table() function:
197 |     >>> pd.pivot_table(stats, values='Values', columns='Metric')
198 |     """
199 |     if len(x) < 4:  # RapidHRV edit: Can't run with less than 4 IBIs
200 |         return np.nan
201 | 
202 |     # Interpolate R-R interval
203 |     time = np.cumsum(x)
204 |     f = scipy.interpolate.interp1d(time, x, kind="cubic")
205 |     new_time = np.arange(time[0], time[-1], 1000 / sfreq)  # sfreq = 5 Hz
206 |     x = f(new_time)
207 | 
208 |     # Define window length
209 |     nperseg = 256 * sfreq
210 |     if nperseg > len(x):
211 |         nperseg = len(x)
212 | 
213 |     # Compute Power Spectral Density
214 |     freq, psd = scipy.signal.welch(x=x, fs=sfreq, nperseg=nperseg, nfft=nperseg)
215 |     psd = psd / 1000000
216 |     fbands = {"hf": ("High frequency", (0.15, 0.4), "r")}
217 | 
218 |     # Extract HRV parameters
219 |     ########################
220 |     stats = pd.DataFrame(columns=["Values", "Metric"])
221 |     band = "hf"
222 | 
223 |     this_psd = psd[(freq >= fbands[band][1][0]) & (freq < fbands[band][1][1])]
224 |     this_freq = freq[(freq >= fbands[band][1][0]) & (freq < fbands[band][1][1])]
225 | 
226 |     if (len(this_psd) == 0) | (len(this_psd) == 0):  # RapidHRV edit: if no power
227 |         return np.nan
228 | 
229 |     # Peaks (Hz)
230 |     peak = round(this_freq[np.argmax(this_psd)], 4)
231 |     stats.loc[len(stats) + 1, :] = [peak, band + "_peak"]
232 | 
233 |     # Power (ms**2)
234 |     power = np.trapezoid(x=this_freq, y=this_psd) * 1000000
235 |     stats.loc[len(stats) + 1, :] = [power, band + "_power"]
236 | 
237 |     hf = stats.Values[stats.Metric == "hf_power"].values[0]
238 | 
239 |     return hf
240 | 
241 | 
242 | def outlier_detection(
243 |     peaks: np.ndarray,
244 |     peak_properties: dict,
245 |     ibi: np.ndarray,
246 |     sample_rate: int,
247 |     window_width: int,
248 |     bpm: float,
249 |     rmssd: float,
250 |     settings: OutlierDetectionSettings,
251 | ) -> bool:
252 |     bpm_in_range = settings.bpm_range[0] < bpm < settings.bpm_range[1]
253 |     rmssd_in_range = settings.rmssd_range[0] < rmssd < settings.rmssd_range[1]
254 |     if not (bpm_in_range and rmssd_in_range):
255 |         return True
256 | 
257 |     max_peak_distance = (peaks[-1] - peaks[0]) / sample_rate
258 |     if max_peak_distance < (window_width * settings.min_total_peak_distance):
259 |         return True
260 | 
261 |     def mad_outlier_detection(x: np.ndarray, threshold: float) -> np.ndarray:
262 |         x = x - np.median(x)
263 |         mad = scipy.stats.median_abs_deviation(x) * threshold
264 |         return (x > mad) | (x < -mad)
265 | 
266 |     prominence_outliers = mad_outlier_detection(
267 |         peak_properties["prominences"], settings.mad_threshold
268 |     )
269 |     if np.any(prominence_outliers):
270 |         return True
271 | 
272 |     height_outliers = mad_outlier_detection(
273 |         peak_properties["peak_heights"], settings.mad_threshold
274 |     )
275 |     if np.any(height_outliers):
276 |         return True
277 | 
278 |     ibi_outliers = mad_outlier_detection(ibi, settings.ibi_mad_threshold)
279 |     if np.any(ibi_outliers):
280 |         return True
281 | 
282 |     return False
283 | 


--------------------------------------------------------------------------------