├── .flake8 ├── .dockerignore ├── .gitignore ├── .gitattributes ├── .vscode ├── settings.json └── tasks.json ├── Dockerfile ├── rapidhrv ├── __init__.py ├── visualization.py ├── data.py ├── preprocessing.py └── analysis.py ├── .github ├── actions │ └── setup │ │ └── action.yaml └── workflows │ ├── verify.yaml │ └── publish.yml ├── tests └── test_rapidhrv.py ├── LICENSE ├── pyproject.toml ├── README.md └── resources └── tutorial.ipynb /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E203 3 | max-line-length = 99 4 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | *. 2 | Dockerfile 3 | README.md 4 | LICENSE 5 | notebooks/ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | __pycache__/ 3 | .ipynb_checkpoints/ 4 | .mypy_cache/ 5 | .pytest_cache/ 6 | dist/ 7 | 8 | *.hdf5 9 | *.csv 10 | .coverage 11 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.npy filter=lfs diff=lfs merge=lfs -text 2 | *.hdf5 filter=lfs diff=lfs merge=lfs -text 3 | 4 | *.ipynb filter=nbstripout 5 | *.zpln filter=nbstripout 6 | *.ipynb diff=ipynb 7 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.formatting.provider": "black", 3 | "python.testing.pytestArgs": [ 4 | "tests" 5 | ], 6 | "python.testing.unittestEnabled": false, 7 | "python.testing.pytestEnabled": true 8 | } -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9 2 | 3 | RUN pip install "poetry==1.1.7" 4 | RUN poetry config virtualenvs.create false 5 | 6 | WORKDIR /app 7 | COPY poetry.lock pyproject.toml /app/ 8 | 9 | RUN poetry install --no-interaction --no-ansi 10 | 11 | COPY . /app -------------------------------------------------------------------------------- /rapidhrv/__init__.py: -------------------------------------------------------------------------------- 1 | from .analysis import analyze 2 | from .data import OutlierDetectionSettings, Signal, get_example_data 3 | from .preprocessing import preprocess 4 | from .visualization import visualize 5 | 6 | __all__ = ( 7 | "analyze", 8 | "OutlierDetectionSettings", 9 | "Signal", 10 | "get_example_data", 11 | "preprocess", 12 | "visualize", 13 | ) 14 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | // See https://go.microsoft.com/fwlink/?LinkId=733558 3 | // for the documentation about the tasks.json format 4 | "version": "2.0.0", 5 | "tasks": [ 6 | { 7 | "label": "verify", 8 | "type": "shell", 9 | "command": "poetry run poe test", 10 | "problemMatcher": [] 11 | } 12 | ] 13 | } -------------------------------------------------------------------------------- /.github/actions/setup/action.yaml: -------------------------------------------------------------------------------- 1 | name: Setup RapidHRV 2 | description: Setup RapidHRV 3 | runs: 4 | using: "composite" 5 | steps: 6 | - name: Setup Python 7 | uses: actions/setup-python@v2 8 | with: 9 | python-version: "3.12" 10 | - name: Install Poetry 11 | run: pip install poetry 12 | shell: bash 13 | - name: Install Dependencies in Virtual Environment 14 | run: poetry install 15 | shell: bash 16 | -------------------------------------------------------------------------------- /.github/workflows/verify.yaml: -------------------------------------------------------------------------------- 1 | name: verify 2 | on: [push, pull_request] 3 | jobs: 4 | test: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v2 8 | - uses: ./.github/actions/setup 9 | - name: Run Tests 10 | run: poetry run poe test 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - uses: ./.github/actions/setup 16 | - name: Build project 17 | run: poetry build 18 | -------------------------------------------------------------------------------- /tests/test_rapidhrv.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import rapidhrv as rhv 4 | 5 | 6 | def test_pipeline(): 7 | """Basic smoke test""" 8 | signal = rhv.get_example_data() 9 | preprocessed = rhv.preprocess(signal) 10 | result = rhv.analyze(preprocessed) 11 | bpm = np.nanmean(result["BPM"]) 12 | rmssd = np.nanmean(result["RMSSD"]) 13 | 14 | assert result is not None 15 | assert len(result) > 0 16 | assert not np.all(np.isnan(result["BPM"])) 17 | assert 59 < bpm < 61 18 | assert 26 < rmssd < 28 19 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: publish 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | permissions: 8 | contents: read 9 | 10 | jobs: 11 | pypi-publish: 12 | runs-on: ubuntu-latest 13 | environment: 14 | name: pypi 15 | url: https://pypi.org/project/rapidhrv 16 | permissions: 17 | id-token: write 18 | steps: 19 | - uses: actions/checkout@v4 20 | - uses: ./.github/actions/setup 21 | - name: Build project 22 | run: poetry build 23 | - name: Publish package distributions to PyPI 24 | uses: pypa/gh-action-pypi-publish@release/v1 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Peter A. Kirk, Sarah N. Garfinkel, & Oliver J. Robinson. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "rapidhrv" 3 | version = "0.2.7" 4 | description = "A package for preprocessing, analyzing and visualizing cardiac data" 5 | authors = [ 6 | "Peter Kirk ", 7 | "Alexander Davidson Bryan ", 8 | ] 9 | license = "MIT" 10 | readme = "README.md" 11 | 12 | [tool.poe.tasks] 13 | format = [{ cmd = "black ." }, { cmd = "isort ." }] 14 | pytest = "pytest --cov=rapidhrv ." 15 | test = [ 16 | { cmd = "black --check ." }, 17 | { cmd = "isort --check ." }, 18 | { cmd = "flake8 ." }, 19 | { cmd = "mypy ." }, 20 | { ref = "pytest" }, 21 | ] 22 | 23 | [tool.mypy] 24 | ignore_missing_imports = true 25 | 26 | [tool.black] 27 | line-length = 99 28 | 29 | [tool.poetry.dependencies] 30 | python = ">=3.11,<3.13" 31 | numpy = "^2.3.2" 32 | scipy = "^1.16.1" 33 | scikit-learn = "^1.7.1" 34 | pandas = "^2.3.2" 35 | jupyter = { version = "^1.0.0", optional = true } 36 | matplotlib = { version = "^3.4.2", optional = true } 37 | h5py = "^3.3.0" 38 | dash = "^3.2.0" 39 | 40 | [tool.poetry.group.dev.dependencies] 41 | black = "^25.1.0" 42 | isort = "^6.0.1" 43 | mypy = "^1.17.1" 44 | pytest = "^8.4.1" 45 | nbstripout = "^0.8.1" 46 | flake8 = "^7.3.0" 47 | poethepoet = "^0.37.0" 48 | pytest-cov = "^6.2.1" 49 | 50 | [tool.poetry.extras] 51 | notebooks = ["jupyter", "matplotlib"] 52 | 53 | [build-system] 54 | requires = ["poetry-core>=1.0.0"] 55 | build-backend = "poetry.core.masonry.api" 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RapidHRV 2 | 3 | RapidHRV is a data processing pipeline for the analysis and visualization of cardiac data. 4 | 5 | Please provide credit where appropriate: 6 | 7 | Kirk, P. A., Bryan, A. D., Garfinkel, S. N., & Robinson, O. J. (2022). RapidHRV: An open-source toolbox for extracting heart rate and heart rate variability. PeerJ, 10, e13147. https://doi.org/10.7717/peerj.13147 8 | 9 | This library is distributed under an 10 | [MIT License](https://raw.githubusercontent.com/peterakirk/RapidHRV/main/LICENSE) 11 | 12 | ## Installation 13 | 14 | ```shell 15 | pip install rapidhrv 16 | ``` 17 | 18 | ## Usage 19 | 20 | Given a numpy array, or something convertable to it (such as a list), 21 | `rapidhrv.preprocess` can generate input suitable for analysis with 22 | `rapidhrv.analyze`, which will return a pandas dataframe containing HRV data. 23 | 24 | ```python 25 | import numpy as np 26 | import rapidhrv as rhv 27 | 28 | my_data = np.load("my_data.npy") # Load data 29 | data = rhv.Signal(my_data, sample_rate=50) # Convert to rhv Signal class 30 | preprocessed = rhv.preprocess(data) # Preprocess: may interpolate data, check the docstring on `rapidhrv.preprocess` 31 | result = rhv.analyze(preprocessed) # Analyze signal 32 | ``` 33 | 34 | ## Documentation 35 | 36 | Please see the included [tutorial notebook](https://github.com/peterakirk/RapidHRV/blob/main/resources/tutorial.ipynb). 37 | 38 | ## Development 39 | 40 | In order to get a working development environment, 41 | please install [Poetry](https://python-poetry.org/) for your platform, 42 | and run `poetry install` to generate a virtual environment. 43 | 44 | If you plan on making any changes to the included notebooks, 45 | please run `nbstripout --install` from within the poetry venv before committing any changes. 46 | 47 | To run said notebooks from the environment provided by poetry, 48 | install the required dependencies with `poetry install --extras notebooks`. 49 | 50 | -------------------------------------------------------------------------------- /rapidhrv/visualization.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | 3 | import dash 4 | import pandas as pd 5 | import plotly.graph_objects as go 6 | from dash import dcc, html 7 | from dash.dependencies import Input, Output 8 | 9 | import rapidhrv as rhv 10 | 11 | # Define the type for an option 12 | OptionType = Dict[str, str] 13 | 14 | 15 | def results_graph(non_outliers, outliers, selected_column): 16 | fig = go.Figure( 17 | [ 18 | go.Scatter( 19 | x=non_outliers["Time"], 20 | y=non_outliers[selected_column], 21 | name=selected_column, 22 | mode="lines+markers", 23 | ), 24 | go.Scatter( 25 | x=outliers["Time"], 26 | y=outliers[selected_column], 27 | name="Outliers", 28 | mode="markers", 29 | ), 30 | ] 31 | ) 32 | 33 | fig.update_layout(template="plotly_white", clickmode="event+select") 34 | fig.update_traces(marker_size=10) 35 | 36 | return fig 37 | 38 | 39 | def window_graph(window_data): 40 | signal, peaks, properties = window_data 41 | fig = go.Figure( 42 | [ 43 | go.Scatter(y=signal), 44 | go.Scatter( 45 | x=peaks, 46 | y=properties["peak_heights"], 47 | mode="markers", 48 | marker=dict(line_color="darkorange", symbol="x-thin", size=24, line_width=1.5), 49 | ), 50 | ], 51 | ) 52 | fig.update_layout(showlegend=False, template="plotly_white") 53 | 54 | return fig 55 | 56 | 57 | def visualize(analyzed: pd.DataFrame, debug=False): 58 | app = dash.Dash() 59 | 60 | options: List[OptionType] = [{"label": col, "value": col} for col in rhv.analysis.DATA_COLUMNS] 61 | 62 | non_outlier_data = analyzed.loc[~analyzed["Outlier"]] 63 | outlier_data = analyzed.loc[analyzed["Outlier"]] 64 | 65 | selected_column = "BPM" 66 | results = results_graph(non_outlier_data, outlier_data, selected_column) 67 | 68 | app.layout = html.Div( 69 | [ 70 | dcc.Dropdown( 71 | id="column-dropdown", 72 | options=options, # type: ignore 73 | value=selected_column, 74 | clearable=False, 75 | ), 76 | dcc.Graph(id="results-graph", figure=results), 77 | html.Div(id="window-container"), 78 | ] 79 | ) 80 | 81 | @app.callback(Output("results-graph", "figure"), Input("column-dropdown", "value")) 82 | def update_results_graph(column): 83 | return results_graph(non_outlier_data, outlier_data, column) 84 | 85 | @app.callback(Output("window-container", "children"), Input("results-graph", "clickData")) 86 | def update_window_graph(click_data): 87 | if click_data is None: 88 | return [] 89 | 90 | selected_point = click_data["points"][0] 91 | 92 | if selected_point["curveNumber"] == 0: 93 | window_data = non_outlier_data.iloc[selected_point["pointNumber"]]["Window"] 94 | elif selected_point["curveNumber"] == 1: 95 | window_data = outlier_data.iloc[selected_point["pointNumber"]]["Window"] 96 | 97 | return [dcc.Graph(figure=window_graph(window_data))] 98 | 99 | app.run(debug=debug, dev_tools_silence_routes_logging=True) 100 | 101 | 102 | if __name__ == "__main__": 103 | signal = rhv.data.get_example_data() 104 | preprocessed = rhv.preprocess(signal) 105 | analyzed = rhv.analyze(preprocessed) 106 | visualize(analyzed, debug=True) 107 | -------------------------------------------------------------------------------- /rapidhrv/data.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import dataclasses 4 | 5 | import h5py 6 | import numpy as np 7 | import pandas as pd 8 | 9 | 10 | @dataclasses.dataclass 11 | class OutlierDetectionSettings: 12 | """Settings for outlier detection. 13 | 14 | Attributes 15 | ---------- 16 | bpm_range: 17 | Range of acceptable bpm values. 18 | rmssd_range: 19 | Range of acceptable rmssd values. 20 | mad_threshold: 21 | Threshold for peak heights and prominences to register as outliers. 22 | (in median absolute deviation units) 23 | ibi_mad_threshold: 24 | Threshold for peak intervals to register as an outliers. 25 | (in median absolute deviation units) 26 | min_total_peak_distance: 27 | Acceptable ratio between total width to distance between first and last peaks in window. 28 | """ 29 | 30 | bpm_range: tuple[int, int] 31 | rmssd_range: tuple[int, int] 32 | mad_threshold: int 33 | ibi_mad_threshold: int 34 | min_total_peak_distance: float = 0.5 35 | 36 | @classmethod 37 | def from_method(cls, method: str) -> OutlierDetectionSettings: 38 | """Generate settings from method name. 39 | 40 | Method names are: "liberal", "moderate", "conservative". 41 | "conservative" is the most stringent, "liberal" is the least and "moderate" is in-between. 42 | """ 43 | if method == "liberal": 44 | return OutlierDetectionSettings( 45 | bpm_range=(20, 200), rmssd_range=(0, 300), mad_threshold=7, ibi_mad_threshold=7 46 | ) 47 | elif method == "moderate": 48 | return OutlierDetectionSettings( 49 | bpm_range=(30, 190), rmssd_range=(5, 262), mad_threshold=5, ibi_mad_threshold=5 50 | ) 51 | elif method == "conservative": 52 | return OutlierDetectionSettings( 53 | bpm_range=(40, 180), rmssd_range=(10, 200), mad_threshold=4, ibi_mad_threshold=4 54 | ) 55 | else: 56 | raise RuntimeError(f"Invalid outlier detection method: {method}.") 57 | 58 | 59 | @dataclasses.dataclass 60 | class Signal: 61 | """Raw signal with associated metadata. 62 | 63 | Attributes 64 | ---------- 65 | data: 66 | Raw signal data. 67 | sample_rate: 68 | Signal rate in Hertz of raw signal. 69 | """ 70 | 71 | data: np.ndarray 72 | sample_rate: int 73 | 74 | def __post_init__(self): 75 | self.data = self.data if isinstance(self.data, np.ndarray) else np.array(self.data) 76 | 77 | def save(self, filename: str) -> None: 78 | """Save as filename.hdf5""" 79 | with h5py.File(filename, "w") as f: 80 | f["data"] = self.data 81 | f.attrs["sample_rate"] = self.sample_rate 82 | 83 | @classmethod 84 | def load(cls, filename: str) -> Signal: 85 | """Load from filename.hdf5""" 86 | with h5py.File(filename, "r") as f: 87 | return cls( 88 | data=f["data"], 89 | sample_rate=int(f.attrs["sample_rate"]), 90 | ) 91 | 92 | @classmethod 93 | def from_csv(cls, filename: str, sample_rate: int): 94 | data = pd.read_csv(filename).to_numpy()[0] 95 | return cls(data=data, sample_rate=sample_rate) 96 | 97 | @classmethod 98 | def from_txt(cls, filename: str, sample_rate: int): 99 | data = np.loadtxt(filename) 100 | return cls(data=data, sample_rate=sample_rate) 101 | 102 | 103 | def get_example_data() -> Signal: 104 | """Function to get example data from `OSF ` 105 | 106 | Returns 107 | ------- 108 | array_like 109 | example data 110 | """ 111 | return Signal.from_csv("https://osf.io/wqnjh/download", sample_rate=20) 112 | -------------------------------------------------------------------------------- /rapidhrv/preprocessing.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | from typing import Literal, Optional 3 | 4 | import numpy as np 5 | import scipy.interpolate 6 | import scipy.signal 7 | 8 | from .data import Signal 9 | 10 | 11 | def cubic_spline_interpolation(signal: Signal, resample_rate: int) -> Signal: 12 | if resample_rate % signal.sample_rate != 0: 13 | raise RuntimeError( 14 | f"Cannot resample from {signal.sample_rate=}Hz to {resample_rate=}Hz: " 15 | f"{resample_rate % signal.sample_rate=} must be zero." 16 | ) 17 | 18 | sample_ratio = resample_rate / signal.sample_rate 19 | result_size = len(signal.data) * sample_ratio 20 | b_spline = scipy.interpolate.make_interp_spline( 21 | np.arange(0, result_size, sample_ratio), signal.data 22 | ) 23 | return Signal(data=b_spline(np.arange(0, result_size)), sample_rate=resample_rate) 24 | 25 | 26 | def butterworth_filter( 27 | signal: Signal, 28 | cutoff_freq: float, 29 | filter_type: Literal["highpass", "lowpass"], 30 | ) -> Signal: 31 | nyquist_freq = signal.sample_rate / 2 32 | sos = scipy.signal.butter( 33 | N=5, Wn=(cutoff_freq / nyquist_freq), btype=filter_type, output="sos" 34 | ) 35 | return dataclasses.replace(signal, data=scipy.signal.sosfiltfilt(sos, signal.data)) 36 | 37 | 38 | def sg_filter(signal: Signal, sg_settings: tuple[int, int]) -> Signal: 39 | poly_order, smoothing_window_ms = sg_settings 40 | smoothing_window = (smoothing_window_ms / 1000) * signal.sample_rate 41 | smoothing_window = round(smoothing_window) 42 | 43 | # smoothing_window must be odd 44 | if smoothing_window % 2 == 0: 45 | smoothing_window += 1 46 | 47 | return dataclasses.replace( 48 | signal, data=scipy.signal.savgol_filter(signal.data, smoothing_window, poly_order) 49 | ) 50 | 51 | 52 | def preprocess( 53 | signal: Signal, 54 | resample_rate: Optional[int] = 1000, 55 | highpass_cutoff: Optional[float] = 0.5, 56 | lowpass_cutoff: Optional[float] = None, 57 | sg_settings: Optional[tuple[int, int]] = (3, 100), 58 | ) -> Signal: 59 | """Prepares cardiac data for analysis using global functions. 60 | 61 | Applies in order: 62 | cubic spline interpolation, 63 | highpass and lowpass Butterworth filters 64 | and Savitzky-Golay smoothing. 65 | 66 | Parameters set to None imply that aspect of the pipeline will not be applied. 67 | For example, the default value for `lowpass_cutoff` is None, 68 | which implies that by default the lowpass filter will not be applied. 69 | 70 | Parameters 71 | ---------- 72 | signal : Signal 73 | Cardiac signal to be processed. 74 | resample_rate : int, default: 1000 75 | If greater than `signal.sample_rate`, 76 | will be used as the target sample rate (hertz) for cubic spline interpolation. 77 | Must be divisible by `signal.sample_rate`. 78 | highpass_cutoff : float, default: 0.5 79 | Butterworth highpass filter cutoff frequency in hertz. 80 | lowpass_cutoff : float, optional 81 | Butterworth lowpass filter cutoff frequency in Hertz, filter is off by default. 82 | sg_settings : (int, int), default: (3, 100) 83 | Savitzky-Golay smoothing parameters, 84 | where the first element of the tuple is the polynomial order 85 | and the second is the window size in milliseconds. 86 | 87 | Returns 88 | ------- 89 | array_like 90 | Preprocessed signal 91 | """ 92 | nans = np.isnan(signal.data) 93 | if np.any(nans): 94 | raise RuntimeError( 95 | "Cannot preprocess data containing NaN values. " 96 | f"First NaN found at index {nans.nonzero()[0][0]}." 97 | ) 98 | 99 | if resample_rate is not None and resample_rate > signal.sample_rate: 100 | result = cubic_spline_interpolation(signal, resample_rate) 101 | else: 102 | result = signal 103 | 104 | if highpass_cutoff is not None: 105 | result = butterworth_filter(result, highpass_cutoff, "highpass") 106 | 107 | if lowpass_cutoff is not None: 108 | result = butterworth_filter(result, lowpass_cutoff, "lowpass") 109 | 110 | if sg_settings: 111 | result = sg_filter(result, sg_settings) 112 | 113 | return result 114 | -------------------------------------------------------------------------------- /resources/tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# RapidHRV Tutorial\n", 8 | "\n", 9 | "## Basic guide" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import rapidhrv as rhv" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "We have included an example timeseries. This is a class containing a 5 minute segment of simulated PPG data and a specified sampling rate of 20Hz. This can be downloaded using the function below. When using your own data with RapidHRV however, the timeseries should be loaded into RapidHRV's Signal class with the specified sampling rate, e.g. Signal.from_csv('my_data.csv', sampling_rate=100)." 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "# Download example signal\n", 35 | "\n", 36 | "signal = rhv.get_example_data()" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "Using default preprocessing, a signal will then be upsampled to 1000Hz, high-pass filtered, and smoothed. The default analysis will use 'moderate' outlier rejection constraints. This can be altered via the outlier_detection_settings argument (accepting e.g. 'liberal', 'moderate', and 'conservative'; see our manuscript for further details)." 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 3, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "# Preprocess and analyze data\n", 53 | "\n", 54 | "preprocessed = rhv.preprocess(signal)\n", 55 | "analyzed = rhv.analyze(preprocessed)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "The data has now been fully analyzed and be plotted using rapidhrv's visualization tool. The GUI will allow you to select HR/HRV metrics and inspect the analyzed timeseries. Individual datapoints can then be selected to view the specified windows of extraction. To access the GUI, click on the URL link outputted in the console." 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "# Visualize data\n", 72 | "\n", 73 | "rhv.visualize(analyzed)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "## Adjusting preprocessing parameters" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "The preprocessing steps implemented with RapidhRV will suffice for many uses. However, we have provided arguments which enable the user to alter preprocessing parameters. We list some examples below." 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "# As RapidHRV upsamples to 1000Hz by default, this will need to be adjusted if your original sampling rate is \n", 97 | "# not a factor of 1000\n", 98 | "preprocessed = rhv.preprocess(signal, resample_rate=500) \n", 99 | "\n", 100 | "# The high-pass filter is implemented with a cutoff of 0.5Hz by default, which can be changed with highpass_cutoff.\n", 101 | "preprocessed = rhv.preprocess(signal, highpass_cutoff=0.05)\n", 102 | "\n", 103 | "# To clean high frequency noise, RapidHRV imeplements Savitzky-Golay smoothing. The polynomial (default 3rd order)\n", 104 | "# and window (default 100ms) can be changed. E.g. 4th order polynomial and a window width of 200ms:\n", 105 | "preprocessed = rhv.preprocess(signal, sg_settings=(4,200))\n", 106 | "\n", 107 | "# If you still want to apply low-pass filtering nonetheless (which is off by default), the user can specify a \n", 108 | "# frequency in the lowpass_cutoff argument to turn this on.\n", 109 | "preprocessed = rhv.preprocess(signal, lowpass_cutoff=50)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "## Adjusting analysis parameters" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "While we have strived to make RapidHRV a modality-general package, tweaks in analysis parameters may yield more sensitivty/accuracy, particularly the stringency of outlier rejection. To make this more user friendly we allow the use of semantically-labelled arguments. Below we list recommendations for dealing with different modalities." 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "# For dealing with very clean ECG data, outlier rejection does not need to be too stringent. The user can thus \n", 133 | "# instruct RapidHRV to take a 'liberal' approach (default=\"moderate\"). \n", 134 | "analyzed = rhv.analyze(preprocessed, outlier_detection_settings=\"liberal\")\n", 135 | "\n", 136 | "# This should suit in most instances. However, for dealing atypical ECG signals, we have also enabled a k-means\n", 137 | "# clustering algorithm to help delineate P, R, and T waves.\n", 138 | "analyzed = rhv.analyze(preprocessed, outlier_detection_settings=\"liberal\", ecg_prt_clustering=True)\n", 139 | "\n", 140 | "# For dealing with data with moderate artifacts, RapidHRV's default settings should typically suffice. However,\n", 141 | "# if the user wishes to have shorter/longer windows, with overlap between them, this can be specified. \n", 142 | "# For instance, to use 5 second windows with 1 second increments (4 second overlap), this can be specified by:\n", 143 | "analyzed = rhv.analyze(preprocessed, window_width=5, window_overlap=4)\n", 144 | "\n", 145 | "# When dealing with highly noisy data, such as PPG measures, more rigorous measures may be necessary. Here are\n", 146 | "# recommended parameters for dealing with such data. These reduce the minimum amplitude for peak detection to 30%\n", 147 | "# (from a default of 50%), increase window overlap, and apply more stringent outlier cleaning tools.\n", 148 | "analyzed = rhv.analyze(preprocessed, outlier_detection_settings=\"conservative\", amplitude_threshold=30, window_overlap=9)" 149 | ] 150 | } 151 | ], 152 | "metadata": { 153 | "kernelspec": { 154 | "display_name": "Python 3", 155 | "language": "python", 156 | "name": "python3" 157 | }, 158 | "language_info": { 159 | "codemirror_mode": { 160 | "name": "ipython", 161 | "version": 3 162 | }, 163 | "file_extension": ".py", 164 | "mimetype": "text/x-python", 165 | "name": "python", 166 | "nbconvert_exporter": "python", 167 | "pygments_lexer": "ipython3", 168 | "version": "3.9.9" 169 | } 170 | }, 171 | "nbformat": 4, 172 | "nbformat_minor": 1 173 | } 174 | -------------------------------------------------------------------------------- /rapidhrv/analysis.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import scipy.interpolate 6 | import scipy.signal 7 | import scipy.stats 8 | import sklearn.cluster 9 | import sklearn.preprocessing 10 | 11 | from .data import OutlierDetectionSettings, Signal 12 | 13 | DATA_COLUMNS = ["BPM", "RMSSD", "SDNN", "SDSD", "pNN20", "pNN50", "HF"] 14 | DATAFRAME_COLUMNS = ["Time", *DATA_COLUMNS, "Outlier", "Window"] 15 | 16 | 17 | def analyze( 18 | signal: Signal, 19 | window_width: int = 10, 20 | window_overlap: int = 0, 21 | ecg_prt_clustering: bool = False, 22 | amplitude_threshold: int = 50, 23 | distance_threshold: int = 250, 24 | n_required_peaks: int = 3, 25 | outlier_detection_settings: Union[str, OutlierDetectionSettings] = "moderate", 26 | ) -> pd.DataFrame: 27 | """Analyzes cardiac data. 28 | 29 | Extracts BPM, RMSSD and SDNN from `input_data`. 30 | 31 | Parameters 32 | ---------- 33 | signal : Signal 34 | Cardiac signal to be analyzed. 35 | window_width : int, default: 10 36 | Width of the sliding window in seconds. 37 | window_overlap: int, default: 0 38 | Amount of overlap between windows in seconds. 39 | Accepts negative values, interpreted as space between windows. 40 | ecg_prt_clustering: bool, default: False 41 | Use k-means clustering to detect P, R and T waves in the data. 42 | Useful for atypical morphologies (e.g. T amplitude > R amplitude). 43 | If enabled, `amplitude_threshold` and `distance_threshold` will be ignored. 44 | amplitude_threshold: int, default: 50 45 | Minimum signal amplitude for a peak to be registered. 46 | For PPG data, the recommended value is 30. 47 | distance_threshold: int, default: 250 48 | Minimum time in milliseconds since last peak for a new peak to be registered. 49 | n_required_peaks: int, default: 3 50 | Minimum number of peaks in a window required to record analysis for that window. 51 | Values less than three are invalid. 52 | outlier_detection_settings: str or OutlierDetectionSettings, default: "moderate" 53 | Settings for the Outlier detection algorithm. 54 | Accepts either an `OutlierDetectionSettings` object, or a string specifying a method. 55 | Refer to :class:`OutlierDetectionSettings` for details. 56 | 57 | Returns 58 | ------- 59 | Dataframe containing Extracted heart data. 60 | """ 61 | # Validate arguments 62 | outlier_detection_settings = ( 63 | OutlierDetectionSettings.from_method(outlier_detection_settings) 64 | if isinstance(outlier_detection_settings, str) 65 | else outlier_detection_settings 66 | ) 67 | 68 | if n_required_peaks < 3: 69 | raise ValueError("Parameter 'n_required_peaks' must be greater than three.") 70 | 71 | # Peak detection settings 72 | if ecg_prt_clustering: 73 | distance = 1 74 | prominence = 5 75 | else: 76 | distance = int((distance_threshold / 1000) * signal.sample_rate) 77 | prominence = amplitude_threshold 78 | 79 | # Windowing function 80 | results = [] 81 | for sample_start in range( 82 | 0, len(signal.data), (window_width - window_overlap) * signal.sample_rate 83 | ): 84 | timestamp = sample_start / signal.sample_rate 85 | 86 | segment = signal.data[sample_start : sample_start + (window_width * signal.sample_rate)] 87 | normalized = sklearn.preprocessing.minmax_scale(segment, (0, 100)) 88 | peaks, properties = peak_detection(normalized, distance, prominence, ecg_prt_clustering) 89 | window_data = (normalized, peaks, properties) 90 | 91 | ibi = np.diff(peaks) * 1000 / signal.sample_rate 92 | sd = np.diff(ibi) 93 | 94 | if len(peaks) <= n_required_peaks: 95 | results.append([timestamp, *[np.nan] * len(DATA_COLUMNS), True, window_data]) 96 | else: 97 | # Time-domain metrics 98 | bpm = ((len(peaks) - 1) / ((peaks[-1] - peaks[0]) / signal.sample_rate)) * 60 99 | rmssd = np.sqrt(np.mean(np.square(sd))) 100 | sdnn = np.std(ibi) 101 | sdsd = np.std(sd) # Standard deviation of successive differences 102 | p_nn20 = np.sum(sd > 20) / len(sd) # Proportion of successive differences > 20ms 103 | p_nn50 = np.sum(sd > 50) / len(sd) # Proportion of successive differences > 50ms 104 | 105 | # Frequency-domain metrics 106 | hf = frequency_domain(x=ibi, sfreq=signal.sample_rate) 107 | 108 | is_outlier = outlier_detection( 109 | peaks, 110 | properties, 111 | ibi, 112 | signal.sample_rate, 113 | window_width, 114 | bpm, 115 | rmssd, 116 | outlier_detection_settings, 117 | ) 118 | 119 | results.append( 120 | [timestamp, bpm, rmssd, sdnn, sdsd, p_nn20, p_nn50, hf, is_outlier, window_data] 121 | ) 122 | 123 | return pd.DataFrame( 124 | results, 125 | columns=DATAFRAME_COLUMNS, 126 | ) 127 | 128 | 129 | def peak_detection( 130 | segment: np.ndarray, distance: int, prominence: int, use_clustering: bool 131 | ) -> tuple[np.ndarray, dict]: 132 | """Returns the indexes of detected peaks and associated properties.""" 133 | peaks, properties = scipy.signal.find_peaks( 134 | segment, distance=distance, prominence=prominence, height=0, width=0 135 | ) 136 | 137 | # Attempt to determine correct peaks by distinguishing the R wave from P and T waves 138 | if len(peaks) >= 3 and use_clustering: 139 | k_means = sklearn.cluster.KMeans(n_clusters=3).fit( 140 | np.column_stack( 141 | (properties["widths"], properties["peak_heights"], properties["prominences"]) 142 | ) 143 | ) 144 | 145 | # Use width centroids to determine correct wave (least width, most prominence) 146 | # If the two lowest values are too close (< 5), use prominence to distinguish them 147 | width_cen = k_means.cluster_centers_[:, 0] 148 | labels_sort_width = np.argsort(width_cen) 149 | if width_cen[labels_sort_width[1]] - width_cen[labels_sort_width[0]] < 5: 150 | # Label of maximum prominence for lowest two widths 151 | prom_cen = k_means.cluster_centers_[:, 2] 152 | wave_label = np.argsort(prom_cen[labels_sort_width[:2]])[1] 153 | else: 154 | wave_label = labels_sort_width[0] 155 | 156 | is_wave_peak = k_means.labels_ == wave_label 157 | 158 | wave_peaks = peaks[is_wave_peak] 159 | wave_props = {k: v[is_wave_peak] for k, v in properties.items()} 160 | else: 161 | wave_peaks = peaks 162 | wave_props = properties 163 | 164 | # @PeterKirk does this need to be > 3 or >= 3? 165 | # Also, should this potentially be done before clustering? 166 | if len(wave_peaks) > 3: 167 | # Approximate prominences at edges of window 168 | base_height = segment[wave_peaks] - wave_props["prominences"] 169 | wave_props["prominences"][0] = wave_props["peak_heights"][0] - base_height[1] 170 | wave_props["prominences"][-1] = wave_props["peak_heights"][-1] - base_height[-2] 171 | 172 | return wave_peaks, wave_props 173 | 174 | 175 | def frequency_domain(x, sfreq: int = 5): 176 | """This function and docstring was modified from Systole 177 | (https://github.com/embodied-computation-group/systole) 178 | Extracts the frequency domain features of heart rate variability. 179 | Parameters 180 | ---------- 181 | x : np.ndarray or list 182 | Interval time-series (R-R, beat-to-beat...), in miliseconds. 183 | sfreq : int 184 | The sampling frequency (Hz). 185 | Returns 186 | ------- 187 | stats : :py:class:`pandas.DataFrame` 188 | Frequency domain summary statistics. 189 | * ``'power_hf_per'`` : High frequency power (%). 190 | Notes 191 | ----- 192 | The dataframe containing the summary statistics is returned in the long 193 | format to facilitate the creation of group summary data frame that can 194 | easily be transferred to other plotting or statistics library. You can 195 | easily convert it into a wide format for a subject-level inline report 196 | using the py:pandas.pivot_table() function: 197 | >>> pd.pivot_table(stats, values='Values', columns='Metric') 198 | """ 199 | if len(x) < 4: # RapidHRV edit: Can't run with less than 4 IBIs 200 | return np.nan 201 | 202 | # Interpolate R-R interval 203 | time = np.cumsum(x) 204 | f = scipy.interpolate.interp1d(time, x, kind="cubic") 205 | new_time = np.arange(time[0], time[-1], 1000 / sfreq) # sfreq = 5 Hz 206 | x = f(new_time) 207 | 208 | # Define window length 209 | nperseg = 256 * sfreq 210 | if nperseg > len(x): 211 | nperseg = len(x) 212 | 213 | # Compute Power Spectral Density 214 | freq, psd = scipy.signal.welch(x=x, fs=sfreq, nperseg=nperseg, nfft=nperseg) 215 | psd = psd / 1000000 216 | fbands = {"hf": ("High frequency", (0.15, 0.4), "r")} 217 | 218 | # Extract HRV parameters 219 | ######################## 220 | stats = pd.DataFrame(columns=["Values", "Metric"]) 221 | band = "hf" 222 | 223 | this_psd = psd[(freq >= fbands[band][1][0]) & (freq < fbands[band][1][1])] 224 | this_freq = freq[(freq >= fbands[band][1][0]) & (freq < fbands[band][1][1])] 225 | 226 | if (len(this_psd) == 0) | (len(this_psd) == 0): # RapidHRV edit: if no power 227 | return np.nan 228 | 229 | # Peaks (Hz) 230 | peak = round(this_freq[np.argmax(this_psd)], 4) 231 | stats.loc[len(stats) + 1, :] = [peak, band + "_peak"] 232 | 233 | # Power (ms**2) 234 | power = np.trapezoid(x=this_freq, y=this_psd) * 1000000 235 | stats.loc[len(stats) + 1, :] = [power, band + "_power"] 236 | 237 | hf = stats.Values[stats.Metric == "hf_power"].values[0] 238 | 239 | return hf 240 | 241 | 242 | def outlier_detection( 243 | peaks: np.ndarray, 244 | peak_properties: dict, 245 | ibi: np.ndarray, 246 | sample_rate: int, 247 | window_width: int, 248 | bpm: float, 249 | rmssd: float, 250 | settings: OutlierDetectionSettings, 251 | ) -> bool: 252 | bpm_in_range = settings.bpm_range[0] < bpm < settings.bpm_range[1] 253 | rmssd_in_range = settings.rmssd_range[0] < rmssd < settings.rmssd_range[1] 254 | if not (bpm_in_range and rmssd_in_range): 255 | return True 256 | 257 | max_peak_distance = (peaks[-1] - peaks[0]) / sample_rate 258 | if max_peak_distance < (window_width * settings.min_total_peak_distance): 259 | return True 260 | 261 | def mad_outlier_detection(x: np.ndarray, threshold: float) -> np.ndarray: 262 | x = x - np.median(x) 263 | mad = scipy.stats.median_abs_deviation(x) * threshold 264 | return (x > mad) | (x < -mad) 265 | 266 | prominence_outliers = mad_outlier_detection( 267 | peak_properties["prominences"], settings.mad_threshold 268 | ) 269 | if np.any(prominence_outliers): 270 | return True 271 | 272 | height_outliers = mad_outlier_detection( 273 | peak_properties["peak_heights"], settings.mad_threshold 274 | ) 275 | if np.any(height_outliers): 276 | return True 277 | 278 | ibi_outliers = mad_outlier_detection(ibi, settings.ibi_mad_threshold) 279 | if np.any(ibi_outliers): 280 | return True 281 | 282 | return False 283 | --------------------------------------------------------------------------------