├── content
├── about
│ ├── GitHub-Mark-32px_source.txt
│ ├── GitHub-Mark-32px.png
│ ├── about-contributing.md
│ └── about-maintenance.md
├── icon.png
├── images
│ ├── access.png
│ ├── bidmc.png
│ ├── bidmc3.png
│ ├── lightwave.png
│ ├── mimicdua.png
│ ├── physionet.png
│ ├── google_cloud.png
│ ├── icu_patient.png
│ ├── eicu_discovery.png
│ ├── examplepatient.jpg
│ ├── mimic_workflow.png
│ ├── sccm_datathon3.png
│ ├── springer_nature2.png
│ └── waveform_viewer.png
├── tutorials.md
├── tutorials-for-the-future.md
├── tutorial
│ ├── data-modelling.md
│ ├── data-interpretation.md
│ ├── signal-quality-assessment.md
│ ├── data-analysis.md
│ └── notebooks
│ │ ├── fiducial_point_functions.py
│ │ ├── data-extraction.ipynb
│ │ ├── data-exploration.ipynb
│ │ └── beat_detection_functions.py
├── about.md
├── workshop
│ ├── schedule.md
│ ├── prep.md
│ ├── aims.md
│ └── synopsis.md
├── mimic-database.md
├── mimic
│ ├── context.md
│ ├── wfdb-toolbox.md
│ ├── physionet.md
│ ├── structure.md
│ └── formatting.md
├── _toc.yml
├── intro.md
├── _config.yml
├── additional-resources.md
├── case-study.md
├── workshop.md
└── references.bib
├── requirements.txt
├── .github
└── workflows
│ ├── deploy.yml
│ └── run-tests.yml
├── .all-contributorsrc
├── .gitignore
└── README.md
/content/about/GitHub-Mark-32px_source.txt:
--------------------------------------------------------------------------------
1 | https://github.com/logos
2 |
3 | 08-Apr-2022
--------------------------------------------------------------------------------
/content/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/icon.png
--------------------------------------------------------------------------------
/content/images/access.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/access.png
--------------------------------------------------------------------------------
/content/images/bidmc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/bidmc.png
--------------------------------------------------------------------------------
/content/images/bidmc3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/bidmc3.png
--------------------------------------------------------------------------------
/content/images/lightwave.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/lightwave.png
--------------------------------------------------------------------------------
/content/images/mimicdua.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/mimicdua.png
--------------------------------------------------------------------------------
/content/images/physionet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/physionet.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Jinja2==3.1.4
2 | jupyter-book==1.0.2
3 | matplotlib==3.5.2
4 | numpy>=1.23.1
5 | wfdb==3.4.1
6 |
--------------------------------------------------------------------------------
/content/images/google_cloud.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/google_cloud.png
--------------------------------------------------------------------------------
/content/images/icu_patient.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/icu_patient.png
--------------------------------------------------------------------------------
/content/images/eicu_discovery.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/eicu_discovery.png
--------------------------------------------------------------------------------
/content/images/examplepatient.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/examplepatient.jpg
--------------------------------------------------------------------------------
/content/images/mimic_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/mimic_workflow.png
--------------------------------------------------------------------------------
/content/images/sccm_datathon3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/sccm_datathon3.png
--------------------------------------------------------------------------------
/content/about/GitHub-Mark-32px.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/about/GitHub-Mark-32px.png
--------------------------------------------------------------------------------
/content/images/springer_nature2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/springer_nature2.png
--------------------------------------------------------------------------------
/content/images/waveform_viewer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/waveform_viewer.png
--------------------------------------------------------------------------------
/content/tutorials.md:
--------------------------------------------------------------------------------
1 | # Tutorials
2 |
3 | Follow the links to explore tutorials on Biomedical Signal Processing using the MIMIC Waveform Database.
4 |
--------------------------------------------------------------------------------
/content/tutorials-for-the-future.md:
--------------------------------------------------------------------------------
1 | # Future Tutorials
2 |
3 | Follow the links to explore ideas for possible tutorials which could be added in the future.
4 |
--------------------------------------------------------------------------------
/content/tutorial/data-modelling.md:
--------------------------------------------------------------------------------
1 | # Data modelling
2 |
3 | _Tutorial on data modelling - i.e. training a ML algorithm to estimate BP from PPG pulse wave features._
--------------------------------------------------------------------------------
/content/about.md:
--------------------------------------------------------------------------------
1 | About this Book
2 | =======================
3 |
4 | - [Contributors](https://github.com/wfdb/mimic_wfdb_tutorials#contributors-)
5 | - [How to contribute](./about/about-contributing)
6 | - [Maintenance](./about/about-maintenance)
7 |
--------------------------------------------------------------------------------
/content/tutorial/data-interpretation.md:
--------------------------------------------------------------------------------
1 | # Data interpretation
2 |
3 | _Tutorial on data interpretation_
4 |
5 | ```{admonition} Suggestions
6 | This involves interpreting the results, _i.e._ how well does the PPG-based approach to estimating BP perform? Perhaps it would be helpful to compare the performance against standards for BP monitors (_e.g._ the AAMI standard).
7 | ```
--------------------------------------------------------------------------------
/content/tutorial/signal-quality-assessment.md:
--------------------------------------------------------------------------------
1 | # Signal Quality Assessment
2 |
3 | _Tutorial on signal quality assessment_
4 |
5 | ```{admonition} Suggestions
6 | I think this step is optional because we can still estimate BP without it.
7 |
8 | Similarly to the beat detection tutorial, I'd suggest we firstly see if HeartPy contains some signal quality assessment functionality, and if so, use it.
9 | ```
--------------------------------------------------------------------------------
/content/tutorial/data-analysis.md:
--------------------------------------------------------------------------------
1 | # Data analysis
2 |
3 | _Tutorial on data analysis_
4 |
5 | ```{admonition} Suggestions
6 | This could involve:
7 | - Identifying pairs of PPG-estimated BPs and corresponding reference BPs.
8 | - Calculating error statistics (_e.g._ mean absolute error, bias and limits of agreement, r^2)
9 | - Making plots (_e.g._ Bland-Altman, correlation plot).
10 | - Repeating this process for SBP and DBP.
11 | ```
--------------------------------------------------------------------------------
/content/workshop/schedule.md:
--------------------------------------------------------------------------------
1 | # Schedule
2 |
3 | The workshop will last 3.5 hours.
4 |
5 | | Time | Content |
6 | | :--- | :--- |
7 | | 5 mins | [Welcome and overview](../workshop) |
8 | | 20 mins | [Introduction to the MIMIC Waveform Database](../workshop) |
9 | | 5 mins | [Overview of case study](../case-study) |
10 | | 45 mins | [Interactive Tutorials](../tutorials) |
11 | | 2 hours | [Case Study](../case-study) |
12 | | 15 mins | Group presentations |
13 |
14 |
--------------------------------------------------------------------------------
/content/workshop/prep.md:
--------------------------------------------------------------------------------
1 | # Preparation
2 |
3 | ## In advance
4 |
5 | You must be registered for the workshop in order to attend, as it is not included in the standard conference registration. You can register for the workshop at the [IEEE EMBC conference website](https://embc.embs.org/2022/).
6 |
7 | Whilst no preparation is required for the workshop, attendees are encouraged to:
8 | 1. Familiarise yourself with these resources.
9 | 2. Ensure that you can run the tutorials and case study in Google Colabs.
10 |
11 | ## On the day
12 |
13 | - Bring your laptop
--------------------------------------------------------------------------------
/content/workshop/aims.md:
--------------------------------------------------------------------------------
1 | # Aims
2 |
3 | This interactive workshop provides key knowledge, skills, and tools for conducting open research in the field of biomedical signal processing.
4 |
5 | Broadly, the workshop:
6 | 1. Introduces publicly accessible datasets of physiological signals, focusing on those on _PhysioNet_ ([https://www.physionet.org](https://www.physionet.org)).
7 | 2. Teaches essential skills for conducting high quality research using open source software.
8 | 3. Offers an opportunity to work in groups on a cuffless blood pressure estimation case study.
9 |
--------------------------------------------------------------------------------
/content/mimic-database.md:
--------------------------------------------------------------------------------
1 | # MIMIC Database
2 |
3 | The [MIMIC Database](https://mimic.mit.edu/) is a publicly accessible critical care database. It is widely used in biomedical signal processing research because it contains a variety of physiological signals collected from many thousands of patients.
4 |
5 | The database is a valuable resource for open research in biomedical signal processing because:
6 | - the signals (_a.k.a_ waveforms) are openly available ([here](https://physionet.org/content/mimic4wdb/0.1.0/))
7 | - open software is provided to analyse the waveforms ([here](https://wfdb.readthedocs.io/en/stable/))
8 |
9 | Follow the links to find out more about the MIMIC Database.
10 |
--------------------------------------------------------------------------------
/content/mimic/context.md:
--------------------------------------------------------------------------------
1 | # Clinical Context
2 |
3 | ## MIMIC-IV
4 |
5 | - Publicly accessible critical care database
6 | - Developed in collaboration with Beth Israel Deaconess Medical Center
7 | - \>50,000 ICU stays and \>400,000 ED stays between 2008-2019
8 | - Modular (structured EHR, X-rays, waveforms, clinical reports, echos)
9 |
10 | 
11 |
12 | ---
13 |
14 | ## Reproducible workflow
15 |
16 | 
17 |
18 | ---
19 |
20 | ## Critical care
21 |
22 | 
23 |
24 | ---
25 |
26 | ## Example patient
27 |
28 | 
29 | _Reproduced under [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) from: Johnson AEW et al. MIMIC-III, a freely accessible critical care database. Sci Data 2016; 3: 160035. https://doi.org/10.1038/sdata.2016.35_
30 |
--------------------------------------------------------------------------------
/content/about/about-contributing.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | To contribute to this book, either:
4 | ```{dropdown} **1. Propose edits:** Follow these instructions to propose a specific change
5 | - Go to the page you would like to edit
6 | - Hover over the  button at the top of the page, and click 'suggest edit' from the dropdown list that appears.
7 | - This will take you to GitHub, where you can edit the page directly and submit the proposed edits for approval. You will require a GitHub login to do this.
8 | ```
9 | ```{dropdown} **2. Suggest areas for improvement:** Follow these instructions to make a general suggestion
10 | - If your suggestion relates to a specific page, then go to that page.
11 | - Hover over the  button at the top of the page, and click 'open issue' from the dropdown list that appears.
12 | - This will take you to GitHub, where you can post suggestions for improvement. You will require a GitHub login to do this.
13 | ```
14 | When contributing, please either contribute your own ideas/text, or clearly acknowledge the original source of the ideas/text.
15 |
--------------------------------------------------------------------------------
/content/_toc.yml:
--------------------------------------------------------------------------------
1 | format: jb-article
2 | root: intro
3 | sections:
4 | - file: workshop
5 | sections:
6 | - file: workshop/aims
7 | - file: workshop/synopsis
8 | - file: workshop/schedule
9 | - file: workshop/prep
10 | - file: mimic-database
11 | sections:
12 | - file: mimic/physionet
13 | - file: mimic/context
14 | - file: mimic/structure
15 | - file: mimic/formatting
16 | - file: mimic/wfdb-toolbox
17 | - file: tutorials
18 | sections:
19 | - file: tutorial/notebooks/data-exploration
20 | - file: tutorial/notebooks/data-extraction
21 | - file: tutorial/notebooks/data-visualisation
22 | - file: tutorial/notebooks/signal-filtering
23 | - file: tutorial/notebooks/differentiation
24 | - file: tutorial/notebooks/beat-detection
25 | - file: tutorial/notebooks/pulse-wave-analysis
26 | - file: tutorial/notebooks/extracting-reference-bp
27 | - file: case-study
28 | - file: additional-resources
29 | - file: about
30 | sections:
31 | - file: about/about-contributing
32 | - file: about/about-maintenance
33 | - file: tutorials-for-the-future
34 | sections:
35 | - file: tutorial/notebooks/qrs-detection
36 | - file: tutorial/signal-quality-assessment
37 | - file: tutorial/data-modelling
38 | - file: tutorial/data-analysis
39 | - file: tutorial/data-interpretation
40 |
--------------------------------------------------------------------------------
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
1 | name: deploy-book
2 |
3 | # Only run this when the main branch changes
4 | on:
5 | push:
6 | branches:
7 | - main
8 | # If your git repository has the Jupyter Book within some-subfolder next to
9 | # unrelated files, you can make this run only if a file within that specific
10 | # folder has been modified.
11 | #
12 | # paths:
13 | # - some-subfolder/**
14 |
15 | # This job installs dependencies, builds the book, and pushes it to `gh-pages`
16 | jobs:
17 | deploy-book:
18 | runs-on: ubuntu-latest
19 | steps:
20 | - uses: actions/checkout@v4
21 |
22 | # Install dependencies
23 | - name: Set up Python
24 | uses: actions/setup-python@v5
25 | with:
26 | python-version: '3.10'
27 |
28 | - name: Install dependencies
29 | run: |
30 | sudo apt-get install libsndfile1
31 | pip install -r requirements.txt
32 |
33 | # Build the book
34 | - name: Build the book
35 | run: |
36 | jupyter book build content --all
37 |
38 | # Push the book's HTML to github-pages
39 | - name: GitHub Pages action
40 | uses: peaceiris/actions-gh-pages@v3.6.1
41 | if: ${{ github.ref == 'refs/heads/main' }}
42 | with:
43 | github_token: ${{ secrets.GITHUB_TOKEN }}
44 | publish_dir: ./content/_build/html
--------------------------------------------------------------------------------
/.github/workflows/run-tests.yml:
--------------------------------------------------------------------------------
1 | # Link repository with GitHub Actions
2 | # https://docs.github.com/en/actions/learn-github-actions/introduction-to-github-actions
3 |
4 | name: run-tests
5 |
6 | on:
7 | push:
8 | branches:
9 | - main
10 | pull_request:
11 | branches:
12 | - main
13 |
14 | jobs:
15 | test:
16 | runs-on: ${{ matrix.os }}
17 | strategy:
18 | matrix:
19 | os: [windows-latest, ubuntu-latest, macos-latest]
20 | python-version: ["3.10"]
21 | steps:
22 | # Checkout the latest code from the repo
23 | # https://github.com/actions/checkout
24 | - name: Checkout repo
25 | uses: actions/checkout@v4
26 | # Setup which version of Python to use
27 | # https://github.com/actions/setup-python
28 | - name: Set up Python ${{ matrix.python-version }}
29 | uses: actions/setup-python@v5
30 | with:
31 | python-version: ${{ matrix.python-version }}
32 | # Display the Python version being used
33 | - name: Display Python version
34 | run: python -c "import sys; print(sys.version)"
35 | # Install the dependencies for the package.
36 | - name: Install dependencies
37 | run: |
38 | python -m pip install --upgrade pip
39 | pip install -r requirements.txt
40 | # Build the book
41 | - name: Build the book
42 | run: jupyter-book build content/
43 |
--------------------------------------------------------------------------------
/content/intro.md:
--------------------------------------------------------------------------------
1 | # Overview
2 |
3 | This book presents tutorials on using the MIMIC Waveform Database for Biomedical Signal Processing.
4 |
5 | ```{note}
6 | These resources are currently being developed.
7 | ```
8 |
9 | The book includes:
10 |
11 | - [Workshop](../workshop): Details of the workshop for which these resources were designed.
12 | - [MIMIC Database](../mimic-database): An overview of the MIMIC Waveform Database.
13 | - [Case Study](../case-study): A case study on cuffless blood pressure estimation using the MIMIC Database.
14 | - [Additional Resources](../additional-resources): Additional resources on the topics covered in these tutorials.
15 |
16 | ---
17 |
18 | # Contributing
19 |
20 | All are welcome to contribute to this project (as described in [About this Book](../about)). You may wish to contribute:
21 | - **Content on new topics:** You may have your own ideas for new topics to be included in the book, or you might want to contribute towards writing on [these topics](https://github.com/peterhcharlton/mimic_wfdb_tutorials/issues/1) which we think should be included.
22 | - **Modifications to existing content:** You may propose edits to existing content, by following the instructions under 'Propose edits' [here](./about/about-contributing).
23 | - **General suggestions for improvements:** You may make a general suggestion by following the instructions under 'Suggest areas for improvement' [here](./about/about-contributing).
24 |
--------------------------------------------------------------------------------
/content/_config.yml:
--------------------------------------------------------------------------------
1 | # Book settings
2 | # Learn more at https://jupyterbook.org/customize/config.html
3 |
4 | title: MIMIC WFDB Tutorials
5 | author: Peter H Charlton
6 | copyright: "2022"
7 | logo: icon.png
8 | exclude_patterns: [README.md]
9 |
10 | # Force re-execution of notebooks on each build.
11 | # See https://jupyterbook.org/content/execute.html
12 | execute:
13 | execute_notebooks: force
14 |
15 | # Define the name of the latex output file for PDF builds
16 | latex:
17 | latex_documents:
18 | targetname: mimic_wfdb_tutorials.tex
19 |
20 | # Add a bibtex file so that we can create citations
21 | bibtex_bibfiles:
22 | - references.bib
23 |
24 | # Information about where the book exists on the web
25 | repository:
26 | url: https://github.com/wfdb/mimic_wfdb_tutorials # Online location of your book
27 | branch: main # Which branch of the repository should be used when creating links (optional)
28 | path_to_book: content
29 |
30 | # Add GitHub buttons to your book
31 | # See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository
32 | html:
33 | use_issues_button: true
34 | use_repository_button: true
35 | use_edit_page_button: true
36 |
37 | # Add launch buttons
38 | # See https://jupyterbook.org/en/stable/interactive/launchbuttons.html
39 | launch_buttons:
40 | # Google Colab links will only work for pages that have the .ipynb extension.
41 | colab_url: "https://colab.research.google.com"
42 | binderhub_url: "https://mybinder.org"
43 |
44 |
--------------------------------------------------------------------------------
/content/mimic/wfdb-toolbox.md:
--------------------------------------------------------------------------------
1 | # WFDB Toolbox
2 |
3 | ## Overview
4 |
5 | The Waveform Database (WFDB) is a set of file standards designed for reading and storing physiologic signal data, and associated annotations. See the [WFDB Spec repository](https://github.com/wfdb/wfdb-spec/) for the specification details.
6 |
7 | Example signal types include ECG and EEG. Example annotation types include automated machine-labelled heart-beats, and clinician comments regarding specific signal artifacts.
8 |
9 | There are several available software packages that implement the WFDB specifications. Consider using one of them if you want to conduct research or build algorithms using physiologic data.
10 |
11 | ## Software Packages
12 |
13 | The WFDB specification is openly-licensed, so anyone can implement and modify software according to the spec. Here are the main packages and implementations:
14 |
15 | - [WFDB Software Package](https://doi.org/10.13026/gjvw-1m31): The original software package written in C. Contains the core library, command line tools, and WAVE. See also the PhysioNet publication. Associated documents:
16 | - [WFDB Python Package](https://wfdb.readthedocs.io/en/stable/): A native Python implementation of WFDB.
17 | - [WFDB Toolbox for Matlab](https://archive.physionet.org/physiotools/matlab/wfdb-swig-matlab/new_version.shtml): A set of Java, GUI, and m-code wrapper functions, which make system calls to WFDB Software Package and other applications.
18 |
19 | ## WFDB-Python
20 |
21 | For the purposes of this workshop, we will be using the [WFDB Python Package](https://wfdb.readthedocs.io/en/stable/), a library of tools for reading, writing, and processing physiological signals and annotations.
22 |
23 | The distribution is hosted on PyPI, the package manager for Python. The software can be installed directly from PyPI using the following command:
24 |
25 | ```python
26 | $ pip install wfdb
27 | ```
28 |
29 |
30 |
--------------------------------------------------------------------------------
/.all-contributorsrc:
--------------------------------------------------------------------------------
1 | {
2 | "files": [
3 | "README.md"
4 | ],
5 | "imageSize": 100,
6 | "commit": false,
7 | "contributors": [
8 | {
9 | "login": "peterhcharlton",
10 | "name": "Peter H Charlton",
11 | "avatar_url": "https://avatars.githubusercontent.com/u/9865941?v=4",
12 | "profile": "https://peterhcharlton.github.io/",
13 | "contributions": [
14 | "content"
15 | ]
16 | },
17 | {
18 | "login": "tompollard",
19 | "name": "Tom Pollard",
20 | "avatar_url": "https://avatars.githubusercontent.com/u/822601?v=4",
21 | "profile": "https://github.com/tompollard",
22 | "contributions": [
23 | "design"
24 | ]
25 | },
26 | {
27 | "login": "elisamejia",
28 | "name": "Elisa Mejía",
29 | "avatar_url": "https://avatars.githubusercontent.com/u/10887584?v=4",
30 | "profile": "https://github.com/elisamejia",
31 | "contributions": [
32 | "design"
33 | ]
34 | },
35 | {
36 | "login": "bemoody",
37 | "name": "bemoody",
38 | "avatar_url": "https://avatars.githubusercontent.com/u/7748246?v=4",
39 | "profile": "https://github.com/bemoody",
40 | "contributions": [
41 | "design"
42 | ]
43 | },
44 | {
45 | "login": "briangow",
46 | "name": "Brian Gow",
47 | "avatar_url": "https://avatars.githubusercontent.com/u/4754434?v=4",
48 | "profile": "https://github.com/briangow",
49 | "contributions": [
50 | "design"
51 | ]
52 | },
53 | {
54 | "login": "danamouk",
55 | "name": "danamouk",
56 | "avatar_url": "https://avatars.githubusercontent.com/u/49573192?v=4",
57 | "profile": "https://github.com/danamouk",
58 | "contributions": [
59 | "design"
60 | ]
61 | }
62 | ],
63 | "contributorsPerLine": 7,
64 | "projectName": "mimic_wfdb_tutorials",
65 | "projectOwner": "wfdb",
66 | "repoType": "github",
67 | "repoHost": "https://github.com",
68 | "skipCi": true
69 | }
70 |
--------------------------------------------------------------------------------
/content/mimic/physionet.md:
--------------------------------------------------------------------------------
1 | # PhysioNet
2 |
3 | ## Overview
4 |
5 | - [PhysioNet](https://physionet.org/) is a data sharing platform built and maintained at the Laboratory of Computational Physiology at MIT
6 | - Established as an outreach component of a research project in 1999
7 | - Rebuilt from scratch in 2019 following "[FAIR principles](https://www.go-fair.org/fair-principles/)"
8 | - \>50,000 registered, active users
9 | - Supports access control for sensitive data via data use agreements and training
10 |
11 | 
12 |
13 | ---
14 |
15 | ## Recommended repository
16 |
17 | - PhysioNet is a recommended repository for a number of journals:
18 | - Springer Nature
19 | - PLOS
20 | - eLife
21 |
22 | 
23 | _Source: [Springer Nature](https://www.springernature.com/gp/authors/research-data-policy/repositories-health/12327108)_
24 |
25 | ---
26 |
27 | ## Access control
28 |
29 | - **Open data**
30 | - **Restricted**:
31 | - Data Use Agreement
32 | - **Credentialed**:
33 | - Data Use Agreement
34 | - Training in human subject research
35 | - Identity check
36 | - **Contributor-managed**:
37 | - Data Use Agreement
38 | - Approval of the contributor
39 |
40 | 
41 |
42 | ---
43 |
44 | ## Enhanced discovery
45 |
46 | - Structured metadata is distributed to search indexes
47 | - Project reuse can be tracked through unique identifiers or DOI's
48 | - Strongly support data objects as research outputs in their own right
49 |
50 | 
51 |
52 | ---
53 |
54 | ## Integrated viewers
55 |
56 | - Data such as waveforms can be viewed directly in the browser
57 |
58 | 
59 |
60 | ---
61 |
62 | ## Cloud integration
63 |
64 | - Enables analysis without the need to download files
65 |
66 | 
67 |
68 | ---
69 |
70 | ## Diverse, active community
71 |
72 | - Regular workshops, challenges, and datathons based around PhysioNet datasets
73 |
74 | 
--------------------------------------------------------------------------------
/content/workshop/synopsis.md:
--------------------------------------------------------------------------------
1 | # Synopsis
2 |
3 | The field of Biomedical Signal Processing stands to benefit greatly from open research. Reproducible studies, accompanied by code and data, allow others to build on the state-of-the-art and to quickly translate between academia and industry. Openly available tools are widely used. Indeed, the MIMIC Waveform Database (WFDB) was referenced in 125 EMBC papers between 2016 and 2020.
4 |
5 | The aim of this interactive workshop is to provide participants with the knowledge, skills and tools required to conduct open research in the field of Biomedical Signal Processing. It will include a formal announcement of the release of the MIMIC-IV Waveform Database, and hands-on experience of using MIMIC data for cuffless blood pressure estimation.
6 |
7 | Firstly, the workshop will provide participants with an understanding of publicly available datasets containing physiological signals, focusing on those on PhysioNet. This will include an overview of the MIMIC Waveform Database, including its clinical context, structure and formatting.
8 |
9 | Secondly, the workshop will provide participants with essential skills for conducting high quality research with openly available data. Participants will work through interactive tutorials in the Python programming language using the WFDB Toolbox, a library of Biomedical Signal Processing tools. The tutorials will introduce key aspects of signal processing, including: data exploration, selection and extraction; pre-processing; feature extraction; modelling; analysis; and interpretation.
10 |
11 | Finally, participants will work in groups on a cuffless blood pressure estimation case study. This will provide hands-on experience and opportunity for networking. Example code will be provided, and researchers will be on hand to answer questions.
12 |
13 | The workshop will be led by researchers who develop MIMIC and the WFDB toolboxes, and who have a track-record in reproducible research. It will be of great interest to students, researchers, and engineers: bring a laptop to participate fully.
14 |
15 |
16 |
--------------------------------------------------------------------------------
/content/mimic/structure.md:
--------------------------------------------------------------------------------
1 | # Database Structure
2 |
3 | ## MIMIC-IV modules
4 |
5 | MIMIC-IV is a modular dataset, comprising of a core set of clinical data (MIMIC-IV Clinical) that can be linked to datasets such as:
6 |
7 | - MIMIC-IV-ED: emergency department data;
8 | - MIMIC-IV-ED: chest X-ray images;
9 | - MIMIC-IV-ECG: 10-second 12-lead diagnostic ECGs;
10 | - MIMIC-IV-waveform: varying-length, high-time-resolution waveforms such as ECG and PPG.
11 |
12 | Typically the datasets are linked by unique patient ID (`subject_id`) and unique hospital stay ID (`hadm_id`). For the purposes of this workshop, we will focus on the MIMIC-IV waveform dataset.
13 |
14 | ## Available monitor data
15 |
16 | The MIMIC-IV Waveform Database consists of raw data that is sampled by the bedside monitor. The available types of data vary from one patient to another.
17 |
18 | ### ECG
19 |
20 | Virtually all patients have a continuous ECG monitor, measuring electrical activity in the heart. For MIMIC-IV patients, typically two or three channels are measured (one or two limb leads, one chest lead.) Each channel is sampled at 250 samples per second.
21 |
22 | Measurements derived from the ECG include:
23 | - Heart rate (averaged once per 1.024 seconds)
24 | - Instantaneous ("beat to beat") heart rate
25 | - ST elevation
26 | - QT interval
27 |
28 | The same electrodes are also used to measure impedance across the chest ("Resp", 62.5 samples per second), which is used to derive respiration rate ("RR").
29 |
30 | ### PPG
31 |
32 | Virtually all patients have a PPG (photoplethysmogram) sensor, measuring blood oxygen in the fingertip or other extremity. This sensor provides:
33 | - A continuous waveform ("Pleth", 125 samples per second)
34 | - Average oxygen saturation ("SpO2", once per 1.024 seconds)
35 | - Perfusion index ("Perf")
36 | - Pulse rate ("Pulse (SpO2)")
37 |
38 | ### Blood pressure
39 |
40 | Blood pressure is measured using an automatic cuff at set intervals (e.g. every 5, 15, 30, or 60 minutes). This is recorded as "NBPs", "NBPd", and "NBPm" (systolic, diastolic, and mean).
41 |
42 | Some patients also have a continuous, invasive arterial pressure sensor, which provides:
43 | - A pressure waveform ("ABP", 125 samples per second)
44 | - Systolic pressure ("ABPs", once per 1.024 seconds)
45 | - Diastolic pressure ("ABPd")
46 | - Mean pressure ("ABPm")
47 | - Pulse rate ("Pulse (ABP)")
48 |
49 | ### Other measurements
50 |
51 | Other measurements may be collected depending on the patient, such as:
52 |
53 | - Temperature ("Tblood", "Tcore", "Tesoph", etc.)
54 | - Other pressure waveforms ("CVP", "ICP", etc.)
55 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # vscode
132 | .vscode/
133 |
134 | # MACOS DS_Store
135 | .DS_Store
136 |
137 | # Jupyter Book build directory
138 | _build/
139 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MIMIC WFDB Tutorials
2 |
3 | [](#contributors-)
4 |
5 |
6 | This repository contains a Jupyter book called 'MIMIC WFDB Tutorials', which presents tutorials on using the MIMIC Waveform Database for Biomedical Signal Processing.
7 |
8 | The book is available [here](https://wfdb.github.io/mimic_wfdb_tutorials/intro.html).
9 |
10 | ## Contributors ✨
11 |
12 | Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
13 |
14 |
15 |
16 |
17 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
34 |
35 | ## Development
36 |
37 | This website was created with [JupyterBook](https://jupyterbook.org/). To set up a local development environment, follow the steps below:
38 |
39 | 1. Navigate to the project directory (e.g. `mimic_wfdb_tutorials`)
40 | 2. Install the required packages with `pip install -r requirements.txt` (preferably in a virtual environment using something like venv, virtualenv, conda etc.)
41 | 3. Change to the directory with the content (e.g. `cd content`)
42 | 4. Run `jupyter-book build --all ./` from within this directory to build the book.
43 | 5. The HTML bookfiles should have been created in a `_build` folder.
44 |
--------------------------------------------------------------------------------
/content/additional-resources.md:
--------------------------------------------------------------------------------
1 | # Additional Resources
2 |
3 | This page provides some additional resources for further reading after the Workshop which may be of interest.
4 |
5 | ## MIMIC
6 |
7 | Further information on the MIMIC Database is available at:
8 | - [The MIMIC Database homepage](https://mimic.mit.edu/)
9 | - [A brief introduction to MIMIC on the Laboratory for Computational Physiology's website](https://lcp.mit.edu/mimic)
10 | - [A textbook on MIMIC](https://doi.org/10.1007/978-3-319-43742-2)
11 | - [The article describing MIMIC-III](https://doi.org/10.1038/sdata.2016.35)
12 |
13 | ## The WFDB Specifications and Toolbox
14 |
15 | - [WFDB Specifications](https://github.com/wfdb/wfdb-spec/): Documentation of the specifications for Waveform Database (WFDB) files and concepts.
16 | - [WFDB Software Package](https://doi.org/10.13026/gjvw-1m31): The original software package written in C.
17 | - [WFDB Python Package](https://wfdb.readthedocs.io/en/stable/)
18 | - [WFDB Toolbox for Matlab](https://archive.physionet.org/physiotools/matlab/wfdb-swig-matlab/new_version.shtml)
19 |
20 | ## Open Research
21 |
22 | - [The Turing Way handbook to reproducible, ethical and collaborative data science](https://the-turing-way.netlify.app/), which includes guidelines on [Open Research](https://the-turing-way.netlify.app/reproducible-research/open.html).
23 | - [The Software Sustainability Institute](https://www.software.ac.uk/), including [Guides](https://www.software.ac.uk/resources/guides) on ensuring software sustainability.
24 |
25 | ## Software development and management
26 |
27 | - An introduction to Python is available [here](https://prodigiouspython.github.io/ProdigiousPython/intro.html).
28 | - Tutorials on Git and GitHub, which are used for version control and collaborative code development, are available [here](https://swcarpentry.github.io/git-novice/) and [here](https://www.youtube.com/playlist?list=PL4cUxeGkcC9goXbgTDQ0n_4TBzOO0ocPR).
29 |
30 | ## Cuffless Blood Pressure estimation
31 |
32 | - [Review of Cuffless Blood Pressure Measurement](https://doi.org/10.1146/annurev-bioeng-110220-014644): a review of the topic by leaders in the field.
33 | - [Evaluating Cuffless Blood Pressure Devices](https://doi.org/10.1161/HYPERTENSIONAHA.121.17747): a review on "the capabilities and limitations of emerging cuffless BP measurement devices", with proposals of how to evaluate such devices.
34 |
35 | ## Photoplethysmography
36 |
37 | - [Photoplethysmography Signal Processing and Synthesis](https://peterhcharlton.github.io/publication/ppg_sig_proc_chapter/): a textbook chapter providing a comprehensive overview of PPG signal processing.
38 | - [Wearable photoplethysmography for cardiovascular monitoring](https://doi.org/10.1109/JPROC.2022.3149785): a review paper detailing different aspects of wearable photoplethysmography, including signal processing and clinical applications.
39 | - [Establishing best practices in photoplethysmography signal acquisition and processing](https://doi.org/10.1088/1361-6579/ac6cc4): an article discussing whether it would be possible and beneficial to establish best practices for photoplethysmography signal acquisition and processing.
40 |
41 | ## Datasets containing physiological signals
42 |
43 | - [Physionet](https://www.physionet.org/): PhysioNet hosts many datasets containing physiological signals, listed [here](https://www.physionet.org/about/database/).
44 | - [Photoplethysmography datasets](https://peterhcharlton.github.io/post/ppg_datasets/): There are several publicly available datasets containing photoplethysmogram signals, many of which are listed [here](https://peterhcharlton.github.io/post/ppg_datasets/).
45 |
--------------------------------------------------------------------------------
/content/case-study.md:
--------------------------------------------------------------------------------
1 | # Case Study
2 |
3 | In the tutorials we have explored different steps which would commonly be encountered when developing techniques for cuffless blood pressure estimation.
4 |
5 | For the remainder of the workshop we would like you to work together in groups to train and test a model for estimating BP from the PPG.
6 |
7 | Forming groups:
8 | - Please could any coders spread themselves between groups
9 | - We recommend groups of between 4 and 6 people
10 |
11 | A suggested workflow is provided below - feel free to use this or ignore it!
12 |
13 | We will be on hand to help, and we will ask groups to share their experiences shortly before the end of the session.
14 |
15 | # Suggested workflow
16 |
17 | I would suggest the following:
18 | 1. Loop through ICU stays, determining whether each stay meets the inclusion criteria for the study (contains at least 10 minutes of simultaneous PPG and ABP signals). The [Data Visualisation tutorial](https://wfdb.io/mimic_wfdb_tutorials/tutorial/notebooks/data-visualisation.html) provides scripts for doing this (except that it only runs on a specified ICU stay, and doesn't loop through stays). Continue looping until 60 ICU stays have been identified which meet
19 | 2. Extract 10 minutes of simultaneous PPG and ABP signals from each ICU stay which meets the inclusion criteria.
20 | 3. Run signal processing scripts to extract a parameter from the shape of each PPG pulse wave (let's call the parameter the stiffness index - SI). This will produce a vector of values for each ICU stay (with a length of approximately 600 - i.e. one value per heart beat - which varies from one stay to the next), and a vector of corresponding time stamps.
21 | 4. Run signal processing scripts to extract systolic and diastolic blood pressure (SBP and DBP) values from each ABP pulse wave. Similarly, this will produce two vectors of values for each ICU stay, one for systolic blood pressure, and one for diastolic blood pressure, and a vector of corresponding time stamps.
22 | 5. Calculate an average (e.g. median) value of the SI for each 30 second window, and repeat for SBP and DBP, ensuring that the same timings are used for the SI, SBP and DBP windows. For each ICU stay, this will produce three vectors each of length 20 (because the 10 minute segments can be split into 20 non-overlapping 30-second windows). The three vectors will contain SI, SBP and DBP respectively.
23 | 6. Create 'overall' vectors by concatenating each of the three vectors across all ICU stays. This will generate three vectors each of length 1200 (i.e. 20 values for 60 ICU stays). In addition, create a vector of ICU stays (i.e. a vector of length 1200 which contains the ICU stay ID from which each window was obtained).
24 | 7. Split the data into training and testing data, at the ICU stay level. E.g. the first 600 values (corresponding to the first 30 ICU stays) are designated as training data, and the remaining 600 values are designated as testing data.
25 | 8. Train a linear regression model on the training data to estimate either SBP (or DBP) from SI. The default behaviour should be to use SBP, but it would be nice to include the option to change this to DBP.
26 | 9. Test the performance of the model on the testing data:
27 | - Use the model to estimate SBP (or DBP) from each SI value in the testing data. This should produce a vector of estimated SBP (or DBP) values of length 600.
28 | - Calculate the errors between the estimated and reference SBP (or DBP) values (using error = estimated - reference).
29 | - Calculate error statistics for the entire testing dataset. e.g. mean absolute error, bias (i.e. mean error), limits of agreement (i.e. 1.96 * standard deviation of errors).
30 |
--------------------------------------------------------------------------------
/content/workshop.md:
--------------------------------------------------------------------------------
1 | # Workshop
2 |
3 | ## Welcome
4 |
5 | ### Introductions
6 |
7 | This workshop is being run by researchers from MIT, the University of Cambridge, and City, University of London. You can find details of the individuals who have contributed [here](https://github.com/wfdb/mimic_wfdb_tutorials#contributors-).
8 |
9 | ### Outline
10 |
11 | The workshop brings together three themes:
12 |
13 | **Open research:** Facilitating high quality research through the sharing of data and code. This may include the use of online repositories such as [PhysioNet](https://physionet.org/), collaborative code development using tools and platforms such as Git and [GitHub](https://github.com/), and making research reproducible through the sharing of the data and code used in studies. Indeed, the tools used in this workshop are hosted in a GitHub repository [here](https://github.com/wfdb/mimic_wfdb_tutorials).
14 |
15 | **Biomedical signal processing:** Research into techniques to derive information from physiological signals, often for health or well-being purposes. In this workshop, we'll be referring to physiological signals which are sampled many times per second, such as the electrocardiogram (ECG).
16 |
17 | 
18 |
19 | Source: _Charlton PH, [Photoplethysmogram signals at rest and during exercise](https://commons.wikimedia.org/wiki/File:Photoplethysmogram_signals_at_rest_and_during_exercise.svg), Wikimedia Commons (CC BY 4.0)._
20 |
21 | **Cuffless blood pressure estimation:** Estimating blood pressure from physiological signals which can be acquired unobtrusively without the use of a blood pressure cuff. Broadly, there are three approaches for cuffless blood pressure estimation. These are based on the observation that the speed with which the pulse wave propagates increases with blood pressure:
22 | - measuring the pulse transit time (PTT) between two arterial pulse waves (one closer to the heart and one further away).
23 | - measuring the pulse arrival time (PAT) between a marker of ventricular contraction, and the arrival of a pulse wave (preferably at a peripheral site)
24 | - estimating BP from the shape of a pulse wave (such as a photoplethysmography, PPG, pulse wave), since changes in pulse wave velocity and BP influence the shapes of pulse waves.
25 |
26 | 
27 |
28 | Source: _Charlton PH et al., [Assessing hemodynamics from the photoplethysmogram to gain insights into vascular age: a review from VascAgeNet](https://doi.org/10.1152/ajpheart.00392.2021), AJP Heart Circ, 2022 (CC BY 4.0)._
29 |
30 | ### Questions
31 |
32 | Consider the following to familiarise yourself with the concepts of this workshop:
33 | - Have you used publicly available data or code before?
34 | - Have you shared data or code from your research?
35 | - How would you rate your biomedical signal processing skills?
36 | - Have you ever worked on estimating BP from PPG signals?
37 |
38 | ## In-person event
39 |
40 | These resources were designed for the following workshop at the [2022 IEEE EMBC Conference](https://embc.embs.org/2022/) in Glasgow, UK:
41 |
42 | **'Open research in Biomedical Signal Processing: Cuffless Blood Pressure Estimation Using the MIMIC-IV Database'**
43 |
44 | - 11th July 2022
45 | - 08.30 - 12.30
46 | - Boisdale-2, Ground Floor (as shown in the Ground Level floorplan [here](https://www.sec.co.uk/organise-an-event/capacities-dimensions))
47 | - Scottish Event Campus (SEC) Centre
48 |
49 | Follow the links on the left for further details of the workshop.
50 |
--------------------------------------------------------------------------------
/content/about/about-maintenance.md:
--------------------------------------------------------------------------------
1 | # Maintenance
2 |
3 | ## Editing the book
4 |
5 | There are two ways to edit the book:
6 | ```{dropdown} **1. Edit online:** Open to all
7 | - Submit a proposed edit using the instructions provided above under ['Contributing to the book'](#contributing-to-the-book).
8 | - This will be reviewed in due course.
9 | ```
10 | ```{dropdown} **2. Edit on a local computer:** Only open to project administrators
11 | - If you don't already have the _mimic_wfdb_tutorials_ repository on your computer, then clone the repository
12 |
13 | `cd /Users/petercharlton/Documents/GitHub/; git clone https://github.com/wfdb/mimic_wfdb_tutorials`
14 | - If you do already have the repository, then pull the latest version:
15 |
16 | `cd /Users/petercharlton/Documents/GitHub/mimic_wfdb_tutorials; git pull https://github.com/wfdb/mimic_wfdb_tutorials main`
17 | - Make edits to the files on a local computer:
18 | - `cd /Users/petercharlton/Documents/GitHub/mimic_wfdb_tutorials` - make the current directory the repo directory.
19 | - `git checkout -b ` - Creates a new branch on which to make the edits(specified by ``), and makes it the current branch.
20 | - edit the files ([Atom](https://atom.io/) is a helpful text editor for this).
21 | - `git add .` - adds all changed files to the staging area.
22 | - `git commit -m ""` - commit the changes to the current branch.
23 | - `git push https://github.com/wfdb/mimic_wfdb_tutorials ` - pushes the changes to the remote repo on GitHub.
24 | - Log in to GitHub via a web browser, and go to the [repo home page](https://github.com/wfdb/mimic_wfdb_tutorials). Assuming you have access, then you should see a message at the top of the page allowing you to create a pull request, to pull the changes from your new branch over to the main branch.
25 |
26 | _The following are legacy instructions, which may or may not still be required when making changes to a Jupyter notebook:_
27 |
28 | - Upload the files through a git push (as detailed [here](https://jupyterbook.org/start/publish.html#create-an-online-repository-for-your-book)):
29 |
30 | `cd /Users/petercharlton/Documents/GitHub/mimic_wfdb_tutorials; git add ./*; git commit -m "brief edit"; git push`
31 | - Build the book locally (as detailed [here](https://jupyterbook.org/start/build.html#build-your-books-html)):
32 |
33 | `cd /Users/petercharlton/Documents/GitHub/mimic_wfdb_tutorials/; jupyter-book build --path-output . content`
34 | - Upload the built book to GitHub pages (as detailed [here](https://jupyterbook.org/start/publish.html#publish-your-book-online-with-github-pages)):
35 |
36 | `cd /Users/petercharlton/Documents/GitHub/mimic_wfdb_tutorials/; ghp-import -n -p -f _build/html`
37 | ```
38 |
39 | ## Recognising contributors
40 |
41 | Contributors to the Book who have GitHub accounts can be recognised using the 'All Contributors' app (see details [here](https://allcontributors.org/docs/en/bot/usage)).
42 |
43 | ## Creating the book
44 |
45 | The book was created as follows (largely following the instructions provided [here](https://jupyterbook.org/start/your-first-book.html)):
46 | ```{dropdown} **Steps to create the book:**
47 | 1. Install Jupyter book via conda-forge (as detailed [here](https://jupyterbook.org/start/overview.html))
48 | 2. Create a template book (as detailed [here](https://jupyterbook.org/start/create.html))
49 | 3. Modify the template to include content from Peter Charlton's original project guidelines (available [here](https://peterhcharlton.github.io/info/tools/project_guidelines.html)).
50 | 4. Build the book (as detailed [here](https://jupyterbook.org/start/build.html)).
51 | 5. Publish the book online (storing the source files in a GitHub repository, and publishing the book using GitHub pages, as detailed [here](https://jupyterbook.org/start/publish.html)).
52 | ```
53 |
--------------------------------------------------------------------------------
/content/mimic/formatting.md:
--------------------------------------------------------------------------------
1 | # Waveform Data Formats
2 |
3 | The waveform database is organized into "records". Each record represents a single patient and roughly corresponds to a single ICU stay (not always, because the bedside monitor may be temporarily shut off.) Each record is stored in a separate subdirectory.
4 |
5 | To avoid providing information that could identify individual patients, the record does not include any actual date or time information. Instead, measurements are recorded according to the "elapsed time" from the beginning of the record. To allow cross-referencing events with the other MIMIC-IV modules, the *surrogate date and time* for the start of the record are also provided.
6 |
7 | An example of the file structure is shown below. Here there are two patients (`subject_id` 10014354 and 10039708). There is one record (81739927) belonging to the first patient, and two records (83411188 and 85583557) belonging to the second.
8 |
9 | ```
10 | waves
11 | └── p100
12 | ├── p10014354
13 | │ └── 81739927
14 | │ ├── 81739927.dat
15 | │ ├── 81739927_0000.hea
16 | │ ├── 81739927_0001.hea
17 | │ ├── 81739927_0001e.dat
18 | │ ├── 81739927_0001r.dat
19 | │ ├── 81739927_0002.hea
20 | │ ├── 81739927_0002e.dat
21 | │ ├── 81739927_0002p.dat
22 | │ ├── 81739927_0002r.dat
23 | │ ├── ...
24 | │ └── 81739927n.csv.gz
25 | └── p10039708
26 | ├── 83411188
27 | │ ├── 83411188.hea
28 | │ ├── ...
29 | │ └── 83411188n.csv.gz
30 | └── 85583557
31 | ├── 85583557.hea
32 | ├── ...
33 | └── 85583557n.csv.gz
34 | ```
35 |
36 | ## Numerics
37 |
38 | "Numerics" are defined as measurements that are sampled irregularly or infrequently (less than once per second.) These measurements are stored as a single table, such as [83411188n.csv.gz](https://physionet.org/content/mimic4wdb/0.1.0/waves/p100/p10039708/83411188/83411188n.csv.gz).
39 |
40 | This file is a gzip-compressed CSV file, which can be loaded using software packages such as [Pandas](https://pandas.pydata.org/), or it can be unpacked using [gzip](https://www.gnu.org/software/gzip/) and parsed as you would parse any CSV file. Note that in contrast to most other MIMIC-IV data tables, the list of *columns* in this table are not the same from one patient to another.
41 |
42 | Note that "elapsed time" for numeric values is measured in counter ticks (1/999.52 second, or about one millisecond.)
43 |
44 | ## Waveforms
45 |
46 | "Waveforms" are defined as measurements that are sampled regularly at high resolution (62.47 samples per second or more.) These measurements are stored as a set of files in WFDB (Waveform Database) format.
47 |
48 | For the sake of storage and processing efficiency, waveforms are broken into multiple *segments* representing different time intervals. It's common for some signals not to be available for the entire duration of a patient's ICU stay, but within a given segment, the available signals are sampled continously and the list of available signals doesn't change.
49 |
50 | A segment, in turn, consists of a *header file* (such as [83411188_0001.hea](https://physionet.org/content/mimic4wdb/0.1.0/waves/p100/p10039708/83411188/83411188_0001.hea) and one or more *signal files* (such as [83411188_0001e.dat](https://physionet.org/content/mimic4wdb/0.1.0/waves/p100/p10039708/83411188/83411188_0001e.dat) and [83411188_0001r.dat](https://physionet.org/content/mimic4wdb/0.1.0/waves/p100/p10039708/83411188/83411188_0001r.dat).
51 |
52 | In general, you do not need to parse these files yourself, and it is easiest to use one of the existing software packages for doing so: the [WFDB Python Package](https://github.com/MIT-LCP/wfdb-python) or the original [WFDB Software Package](https://physionet.org/content/wfdb/). Data can also be converted into other formats using tools such as `rdsamp` or `wfdb2mat` from the WFDB Software Package.
53 |
--------------------------------------------------------------------------------
/content/references.bib:
--------------------------------------------------------------------------------
1 | ---
2 | ---
3 |
4 | @inproceedings{holdgraf_evidence_2014,
5 | address = {Brisbane, Australia, Australia},
6 | title = {Evidence for {Predictive} {Coding} in {Human} {Auditory} {Cortex}},
7 | booktitle = {International {Conference} on {Cognitive} {Neuroscience}},
8 | publisher = {Frontiers in Neuroscience},
9 | author = {Holdgraf, Christopher Ramsay and de Heer, Wendy and Pasley, Brian N. and Knight, Robert T.},
10 | year = {2014}
11 | }
12 |
13 | @article{holdgraf_rapid_2016,
14 | title = {Rapid tuning shifts in human auditory cortex enhance speech intelligibility},
15 | volume = {7},
16 | issn = {2041-1723},
17 | url = {http://www.nature.com/doifinder/10.1038/ncomms13654},
18 | doi = {10.1038/ncomms13654},
19 | number = {May},
20 | journal = {Nature Communications},
21 | author = {Holdgraf, Christopher Ramsay and de Heer, Wendy and Pasley, Brian N. and Rieger, Jochem W. and Crone, Nathan and Lin, Jack J. and Knight, Robert T. and Theunissen, Frédéric E.},
22 | year = {2016},
23 | pages = {13654},
24 | file = {Holdgraf et al. - 2016 - Rapid tuning shifts in human auditory cortex enhance speech intelligibility.pdf:C\:\\Users\\chold\\Zotero\\storage\\MDQP3JWE\\Holdgraf et al. - 2016 - Rapid tuning shifts in human auditory cortex enhance speech intelligibility.pdf:application/pdf}
25 | }
26 |
27 | @inproceedings{holdgraf_portable_2017,
28 | title = {Portable learning environments for hands-on computational instruction using container-and cloud-based technology to teach data science},
29 | volume = {Part F1287},
30 | isbn = {978-1-4503-5272-7},
31 | doi = {10.1145/3093338.3093370},
32 | abstract = {© 2017 ACM. There is an increasing interest in learning outside of the traditional classroom setting. This is especially true for topics covering computational tools and data science, as both are challenging to incorporate in the standard curriculum. These atypical learning environments offer new opportunities for teaching, particularly when it comes to combining conceptual knowledge with hands-on experience/expertise with methods and skills. Advances in cloud computing and containerized environments provide an attractive opportunity to improve the effciency and ease with which students can learn. This manuscript details recent advances towards using commonly-Available cloud computing services and advanced cyberinfrastructure support for improving the learning experience in bootcamp-style events. We cover the benets (and challenges) of using a server hosted remotely instead of relying on student laptops, discuss the technology that was used in order to make this possible, and give suggestions for how others could implement and improve upon this model for pedagogy and reproducibility.},
33 | booktitle = {{ACM} {International} {Conference} {Proceeding} {Series}},
34 | author = {Holdgraf, Christopher Ramsay and Culich, A. and Rokem, A. and Deniz, F. and Alegro, M. and Ushizima, D.},
35 | year = {2017},
36 | keywords = {Teaching, Bootcamps, Cloud computing, Data science, Docker, Pedagogy}
37 | }
38 |
39 | @article{holdgraf_encoding_2017,
40 | title = {Encoding and decoding models in cognitive electrophysiology},
41 | volume = {11},
42 | issn = {16625137},
43 | doi = {10.3389/fnsys.2017.00061},
44 | abstract = {© 2017 Holdgraf, Rieger, Micheli, Martin, Knight and Theunissen. Cognitive neuroscience has seen rapid growth in the size and complexity of data recorded from the human brain as well as in the computational tools available to analyze this data. This data explosion has resulted in an increased use of multivariate, model-based methods for asking neuroscience questions, allowing scientists to investigate multiple hypotheses with a single dataset, to use complex, time-varying stimuli, and to study the human brain under more naturalistic conditions. These tools come in the form of “Encoding” models, in which stimulus features are used to model brain activity, and “Decoding” models, in which neural features are used to generated a stimulus output. Here we review the current state of encoding and decoding models in cognitive electrophysiology and provide a practical guide toward conducting experiments and analyses in this emerging field. Our examples focus on using linear models in the study of human language and audition. We show how to calculate auditory receptive fields from natural sounds as well as how to decode neural recordings to predict speech. The paper aims to be a useful tutorial to these approaches, and a practical introduction to using machine learning and applied statistics to build models of neural activity. The data analytic approaches we discuss may also be applied to other sensory modalities, motor systems, and cognitive systems, and we cover some examples in these areas. In addition, a collection of Jupyter notebooks is publicly available as a complement to the material covered in this paper, providing code examples and tutorials for predictive modeling in python. The aimis to provide a practical understanding of predictivemodeling of human brain data and to propose best-practices in conducting these analyses.},
45 | journal = {Frontiers in Systems Neuroscience},
46 | author = {Holdgraf, Christopher Ramsay and Rieger, J.W. and Micheli, C. and Martin, S. and Knight, R.T. and Theunissen, F.E.},
47 | year = {2017},
48 | keywords = {Decoding models, Encoding models, Electrocorticography (ECoG), Electrophysiology/evoked potentials, Machine learning applied to neuroscience, Natural stimuli, Predictive modeling, Tutorials}
49 | }
50 |
51 | @book{ruby,
52 | title = {The Ruby Programming Language},
53 | author = {Flanagan, David and Matsumoto, Yukihiro},
54 | year = {2008},
55 | publisher = {O'Reilly Media}
56 | }
57 |
--------------------------------------------------------------------------------
/content/tutorial/notebooks/fiducial_point_functions.py:
--------------------------------------------------------------------------------
1 | import scipy.signal as sp
2 | import numpy as np
3 | from matplotlib import pyplot as plt
4 |
5 | def fiducial_points(x,pks,fs,vis):
6 | """
7 | Description: Pulse detection and correction from pulsatile signals
8 | Inputs: x, array with pulsatile signal [user defined units]
9 | pks, array with the position of the peaks [number of samples]
10 | fs, sampling rate of signal [Hz]
11 | vis, visualisation option [True, False]
12 | Outputs: fidp, dictionary with the positions of several fiducial points for the cardiac cycles [number of samples]
13 |
14 | Fiducial points: 1: Systolic peak (pks)
15 | 2: Onset, as the minimum before the systolic peak (ons)
16 | 3: Onset, using the tangent intersection method (ti)
17 | 4: Diastolic peak (dpk)
18 | 5: Maximum slope (m1d)
19 | 6: a point from second derivative PPG (a2d)
20 | 7: b point from second derivative PPG (b2d)
21 | 8: c point from second derivative PPG (c2d)
22 | 9: d point from second derivative PPG (d2d)
23 | 10: e point from second derivative PPG (e2d)
24 | 11: p1 from the third derivative PPG (p1)
25 | 12: p2 from the third derivative PPG (p2)
26 |
27 | Libraries: NumPy (as np), SciPy (Signal, as sp), Matplotlib (PyPlot, as plt)
28 |
29 | Version: 1.0 - June 2022
30 |
31 | Developed by: Elisa Mejía-Mejía
32 | City, University of London
33 |
34 | Edited by: Peter Charlton (see "Added by PC")
35 |
36 | """
37 | # First, second and third derivatives
38 | d1x = sp.savgol_filter(x, 9, 5, deriv = 1)
39 | d2x = sp.savgol_filter(x, 9, 5, deriv = 2)
40 | d3x = sp.savgol_filter(x, 9, 5, deriv = 3)
41 |
42 | #plt.figure()
43 | #plt.plot(x/np.max(x))
44 | #plt.plot(d1x/np.max(d1x))
45 | #plt.plot(d2x/np.max(d2x))
46 | #plt.plot(d3x/np.max(d3x))
47 |
48 | # Search in time series: Onsets between consecutive peaks
49 | ons = np.empty(0)
50 | for i in range(len(pks) - 1):
51 | start = pks[i]
52 | stop = pks[i + 1]
53 | ibi = x[start:stop]
54 | #plt.figure()
55 | #plt.plot(ibi, color = 'black')
56 | aux_ons, = np.where(ibi == np.min(ibi))
57 | if len(aux_ons) > 1:
58 | aux_ons = aux_ons[0]
59 | ind_ons = aux_ons.astype(int)
60 | ons = np.append(ons, ind_ons + start)
61 | #plt.plot(ind_ons, ibi[ind_ons], marker = 'o', color = 'red')
62 | ons = ons.astype(int)
63 | #print('Onsets: ' + str(ons))
64 | #plt.figure()
65 | #plt.plot(x, color = 'black')
66 | #plt.scatter(pks, x[pks], marker = 'o', color = 'red')
67 | #plt.scatter(ons, x[ons], marker = 'o', color = 'blue')
68 |
69 | # Search in time series: Diastolic peak and dicrotic notch between consecutive onsets
70 | dia = np.empty(0)
71 | dic = np.empty(0)
72 | for i in range(len(ons) - 1):
73 | start = ons[i]
74 | stop = ons[i + 1]
75 | ind_pks, = np.intersect1d(np.where(pks < stop), np.where(pks > start))
76 | ind_pks = pks[ind_pks]
77 | ibi_portion = x[ind_pks:stop]
78 | ibi_2d_portion = d2x[ind_pks:stop]
79 | #plt.figure()
80 | #plt.plot(ibi_portion/np.max(ibi_portion))
81 | #plt.plot(ibi_2d_portion/np.max(ibi_2d_portion))
82 | aux_dic, _ = sp.find_peaks(ibi_2d_portion)
83 | aux_dic = aux_dic.astype(int)
84 | aux_dia, _ = sp.find_peaks(-ibi_2d_portion)
85 | aux_dia = aux_dia.astype(int)
86 | if len(aux_dic) != 0:
87 | ind_max, = np.where(ibi_2d_portion[aux_dic] == np.max(ibi_2d_portion[aux_dic]))
88 | aux_dic_max = aux_dic[ind_max]
89 | if len(aux_dia) != 0:
90 | nearest = aux_dia - aux_dic_max
91 | aux_dic = aux_dic_max
92 | dic = np.append(dic, (aux_dic + ind_pks).astype(int))
93 | #plt.scatter(aux_dic, ibi_portion[aux_dic]/np.max(ibi_portion), marker = 'o')
94 | ind_dia, = np.where(nearest > 0)
95 | aux_dia = aux_dia[ind_dia]
96 | nearest = nearest[ind_dia]
97 | if len(nearest) != 0:
98 | ind_nearest, = np.where(nearest == np.min(nearest))
99 | aux_dia = aux_dia[ind_nearest]
100 | dia = np.append(dia, (aux_dia + ind_pks).astype(int))
101 | #plt.scatter(aux_dia, ibi_portion[aux_dia]/np.max(ibi_portion), marker = 'o')
102 | #break
103 | else:
104 | dic = np.append(dic, (aux_dic_max + ind_pks).astype(int))
105 | #plt.scatter(aux_dia, ibi_portion[aux_dia]/np.max(ibi_portion), marker = 'o')
106 | dia = dia.astype(int)
107 | dic = dic.astype(int)
108 | #plt.scatter(dia, x[dia], marker = 'o', color = 'orange')
109 | #plt.scatter(dic, x[dic], marker = 'o', color = 'green')
110 |
111 | # Search in D1: Maximum slope point
112 | m1d = np.empty(0)
113 | for i in range(len(ons) - 1):
114 | start = ons[i]
115 | stop = ons[i + 1]
116 | ind_pks, = np.intersect1d(np.where(pks < stop), np.where(pks > start))
117 | ind_pks = pks[ind_pks]
118 | ibi_portion = x[start:ind_pks]
119 | ibi_1d_portion = d1x[start:ind_pks]
120 | #plt.figure()
121 | #plt.plot(ibi_portion/np.max(ibi_portion))
122 | #plt.plot(ibi_1d_portion/np.max(ibi_1d_portion))
123 | aux_m1d, _ = sp.find_peaks(ibi_1d_portion)
124 | aux_m1d = aux_m1d.astype(int)
125 | if len(aux_m1d) != 0:
126 | ind_max, = np.where(ibi_1d_portion[aux_m1d] == np.max(ibi_1d_portion[aux_m1d]))
127 | aux_m1d_max = aux_m1d[ind_max]
128 | if len(aux_m1d_max) > 1:
129 | aux_m1d_max = aux_m1d_max[0]
130 | m1d = np.append(m1d, (aux_m1d_max + start).astype(int))
131 | #plt.scatter(aux_m1d, ibi_portion[aux_dic]/np.max(ibi_portion), marker = 'o')
132 | #break
133 | m1d = m1d.astype(int)
134 | #plt.scatter(m1d, x[m1d], marker = 'o', color = 'purple')
135 |
136 | # Search in time series: Tangent intersection points
137 | tip = np.empty(0)
138 | for i in range(len(ons) - 1):
139 | start = ons[i]
140 | stop = ons[i + 1]
141 | ibi_portion = x[start:stop]
142 | ibi_1d_portion = d1x[start:stop]
143 | ind_m1d, = np.intersect1d(np.where(m1d < stop), np.where(m1d > start))
144 | ind_m1d = m1d[ind_m1d] - start
145 | #plt.figure()
146 | #plt.plot(ibi_portion/np.max(ibi_portion))
147 | #plt.plot(ibi_1d_portion/np.max(ibi_1d_portion))
148 | #plt.scatter(ind_m1d, ibi_portion[ind_m1d]/np.max(ibi_portion), marker = 'o')
149 | #plt.scatter(ind_m1d, ibi_1d_portion[ind_m1d]/np.max(ibi_1d_portion), marker = 'o')
150 | aux_tip = np.round(((ibi_portion[0] - ibi_portion[ind_m1d])/ibi_1d_portion[ind_m1d]) + ind_m1d)
151 | aux_tip = aux_tip.astype(int)
152 | tip = np.append(tip, (aux_tip + start).astype(int))
153 | #plt.scatter(aux_tip, ibi_portion[aux_tip]/np.max(ibi_portion), marker = 'o')
154 | #break
155 | tip = tip.astype(int)
156 | #plt.scatter(tip, x[tip], marker = 'o', color = 'aqua')
157 |
158 | # Search in D2: A, B, C, D and E points
159 | a2d = np.empty(0)
160 | b2d = np.empty(0)
161 | c2d = np.empty(0)
162 | d2d = np.empty(0)
163 | e2d = np.empty(0)
164 | for i in range(len(ons) - 1):
165 | start = ons[i]
166 | stop = ons[i + 1]
167 | ibi_portion = x[start:stop]
168 | ibi_1d_portion = d1x[start:stop]
169 | ibi_2d_portion = d2x[start:stop]
170 | ind_m1d = np.intersect1d(np.where(m1d > start),np.where(m1d < stop))
171 | ind_m1d = m1d[ind_m1d]
172 | #plt.figure()
173 | #plt.plot(ibi_portion/np.max(ibi_portion))
174 | #plt.plot(ibi_1d_portion/np.max(ibi_1d_portion))
175 | #plt.plot(ibi_2d_portion/np.max(ibi_2d_portion))
176 | aux_m2d_pks, _ = sp.find_peaks(ibi_2d_portion)
177 | aux_m2d_ons, _ = sp.find_peaks(-ibi_2d_portion)
178 | # a point:
179 | ind_a, = np.where(ibi_2d_portion[aux_m2d_pks] == np.max(ibi_2d_portion[aux_m2d_pks]))
180 | ind_a = aux_m2d_pks[ind_a]
181 | if (ind_a < ind_m1d):
182 | a2d = np.append(a2d, ind_a + start)
183 | #plt.scatter(ind_a, ibi_2d_portion[ind_a]/np.max(ibi_2d_portion), marker = 'o')
184 | # b point:
185 | ind_b = np.where(ibi_2d_portion[aux_m2d_ons] == np.min(ibi_2d_portion[aux_m2d_ons]))
186 | ind_b = aux_m2d_ons[ind_b]
187 | if (ind_b > ind_a) and (ind_b < len(ibi_2d_portion)):
188 | b2d = np.append(b2d, ind_b + start)
189 | #plt.scatter(ind_b, ibi_2d_portion[ind_b]/np.max(ibi_2d_portion), marker = 'o')
190 | # e point:
191 | ind_e, = np.where(aux_m2d_pks > ind_m1d - start)
192 | aux_m2d_pks = aux_m2d_pks[ind_e]
193 | ind_e, = np.where(aux_m2d_pks < 0.6*len(ibi_2d_portion))
194 | ind_e = aux_m2d_pks[ind_e]
195 | if len(ind_e) >= 1:
196 | if len(ind_e) >= 2:
197 | ind_e = ind_e[1]
198 | e2d = np.append(e2d, ind_e + start)
199 | #plt.scatter(ind_e, ibi_2d_portion[ind_e]/np.max(ibi_2d_portion), marker = 'o')
200 | # c point:
201 | ind_c, = np.where(aux_m2d_pks < ind_e)
202 | if len(ind_c) != 0:
203 | ind_c_aux = aux_m2d_pks[ind_c]
204 | ind_c, = np.where(ibi_2d_portion[ind_c_aux] == np.max(ibi_2d_portion[ind_c_aux]))
205 | ind_c = ind_c_aux[ind_c]
206 | if len(ind_c) != 0:
207 | c2d = np.append(c2d, ind_c + start)
208 | #plt.scatter(ind_c, ibi_2d_portion[ind_c]/np.max(ibi_2d_portion), marker = 'o')
209 | else:
210 | aux_m1d_ons, _ = sp.find_peaks(-ibi_1d_portion)
211 | ind_c, = np.where(aux_m1d_ons < ind_e)
212 | ind_c_aux = aux_m1d_ons[ind_c]
213 | if len(ind_c) != 0:
214 | ind_c, = np.where(ind_c_aux > ind_b)
215 | ind_c = ind_c_aux[ind_c]
216 | if len(ind_c) > 1:
217 | ind_c = ind_c[0]
218 | c2d = np.append(c2d, ind_c + start)
219 | #plt.scatter(ind_c, ibi_2d_portion[ind_c]/np.max(ibi_2d_portion), marker = 'o')
220 | # d point:
221 | if len(ind_c) != 0:
222 | ind_d = np.intersect1d(np.where(aux_m2d_ons < ind_e), np.where(aux_m2d_ons > ind_c))
223 | if len(ind_d) != 0:
224 | ind_d_aux = aux_m2d_ons[ind_d]
225 | ind_d, = np.where(ibi_2d_portion[ind_d_aux] == np.min(ibi_2d_portion[ind_d_aux]))
226 | ind_d = ind_d_aux[ind_d]
227 | if len(ind_d) != 0:
228 | d2d = np.append(d2d, ind_d + start)
229 | #plt.scatter(ind_d, ibi_2d_portion[ind_d]/np.max(ibi_2d_portion), marker = 'o')
230 | else:
231 | ind_d = ind_c
232 | d2d = np.append(d2d, ind_d + start)
233 | #plt.scatter(ind_d, ibi_2d_portion[ind_d]/np.max(ibi_2d_portion), marker = 'o')
234 | a2d = a2d.astype(int)
235 | b2d = b2d.astype(int)
236 | c2d = c2d.astype(int)
237 | d2d = d2d.astype(int)
238 | e2d = e2d.astype(int)
239 | #plt.figure()
240 | #plt.plot(d2x, color = 'black')
241 | #plt.scatter(a2d, d2x[a2d], marker = 'o', color = 'red')
242 | #plt.scatter(b2d, d2x[b2d], marker = 'o', color = 'blue')
243 | #plt.scatter(c2d, d2x[c2d], marker = 'o', color = 'green')
244 | #plt.scatter(d2d, d2x[d2d], marker = 'o', color = 'orange')
245 | #plt.scatter(e2d, d2x[e2d], marker = 'o', color = 'purple')
246 |
247 | # Search in D3: P1 and P2 points
248 | p1p = np.empty(0)
249 | p2p = np.empty(0)
250 | for i in range(len(ons) - 1):
251 | start = ons[i]
252 | stop = ons[i + 1]
253 | ibi_portion = x[start:stop]
254 | ibi_1d_portion = d1x[start:stop]
255 | ibi_2d_portion = d2x[start:stop]
256 | ibi_3d_portion = d3x[start:stop]
257 | ind_b = np.intersect1d(np.where(b2d > start),np.where(b2d < stop))
258 | ind_b = b2d[ind_b]
259 | ind_c = np.intersect1d(np.where(c2d > start),np.where(c2d < stop))
260 | ind_c = c2d[ind_c]
261 | ind_d = np.intersect1d(np.where(d2d > start),np.where(d2d < stop))
262 | ind_d = d2d[ind_d]
263 | ind_dic = np.intersect1d(np.where(dic > start),np.where(dic < stop))
264 | ind_dic = dic[ind_dic]
265 | #plt.figure()
266 | #plt.plot(ibi_portion/np.max(ibi_portion))
267 | #plt.plot(ibi_1d_portion/np.max(ibi_1d_portion))
268 | #plt.plot(ibi_2d_portion/np.max(ibi_2d_portion))
269 | #plt.plot(ibi_3d_portion/np.max(ibi_3d_portion))
270 | #plt.scatter(ind_b - start, ibi_3d_portion[ind_b - start]/np.max(ibi_3d_portion), marker = 'o')
271 | #plt.scatter(ind_c - start, ibi_3d_portion[ind_c - start]/np.max(ibi_3d_portion), marker = 'o')
272 | #plt.scatter(ind_d - start, ibi_3d_portion[ind_d - start]/np.max(ibi_3d_portion), marker = 'o')
273 | #plt.scatter(ind_dic - start, ibi_3d_portion[ind_dic - start]/np.max(ibi_3d_portion), marker = 'o')
274 | aux_p3d_pks, _ = sp.find_peaks(ibi_3d_portion)
275 | aux_p3d_ons, _ = sp.find_peaks(-ibi_3d_portion)
276 | # P1:
277 | if (len(aux_p3d_pks) != 0 and len(ind_b) != 0):
278 | ind_p1, = np.where(aux_p3d_pks > ind_b - start)
279 | if len(ind_p1) != 0:
280 | ind_p1 = aux_p3d_pks[ind_p1[0]]
281 | p1p = np.append(p1p, ind_p1 + start)
282 | #plt.scatter(ind_p1, ibi_3d_portion[ind_p1]/np.max(ibi_3d_portion), marker = 'o')
283 | # P2:
284 | if (len(aux_p3d_ons) != 0 and len(ind_c) != 0 and len(ind_d) != 0):
285 | if ind_c == ind_d:
286 | ind_p2, = np.where(aux_p3d_ons > ind_d - start)
287 | ind_p2 = aux_p3d_ons[ind_p2[0]]
288 | else:
289 | ind_p2, = np.where(aux_p3d_ons < ind_d - start)
290 | ind_p2 = aux_p3d_ons[ind_p2[-1]]
291 | if len(ind_dic) != 0:
292 | aux_x_pks, _ = sp.find_peaks(ibi_portion)
293 | if ind_p2 > ind_dic - start:
294 | ind_between = np.intersect1d(np.where(aux_x_pks < ind_p2), np.where(aux_x_pks > ind_dic - start))
295 | else:
296 | ind_between = np.intersect1d(np.where(aux_x_pks > ind_p2), np.where(aux_x_pks < ind_dic - start))
297 | if len(ind_between) != 0:
298 | ind_p2 = aux_x_pks[ind_between[0]]
299 | p2p = np.append(p2p, ind_p2 + start)
300 | #plt.scatter(ind_p2, ibi_3d_portion[ind_p2]/np.max(ibi_3d_portion), marker = 'o')
301 | p1p = p1p.astype(int)
302 | p2p = p2p.astype(int)
303 | #plt.figure()
304 | #plt.plot(d3x, color = 'black')
305 | #plt.scatter(p1p, d3x[p1p], marker = 'o', color = 'green')
306 | #plt.scatter(p2p, d3x[p2p], marker = 'o', color = 'orange')
307 |
308 | # Added by PC: Magnitudes of second derivative points
309 | bmag2d = np.zeros(len(b2d))
310 | cmag2d = np.zeros(len(b2d))
311 | dmag2d = np.zeros(len(b2d))
312 | emag2d = np.zeros(len(b2d))
313 | for beat_no in range(0,len(d2d)):
314 | bmag2d[beat_no] = d2x[b2d[beat_no]]/d2x[a2d[beat_no]]
315 | cmag2d[beat_no] = d2x[c2d[beat_no]]/d2x[a2d[beat_no]]
316 | dmag2d[beat_no] = d2x[d2d[beat_no]]/d2x[a2d[beat_no]]
317 | emag2d[beat_no] = d2x[e2d[beat_no]]/d2x[a2d[beat_no]]
318 |
319 | # Added by PC: Refine the list of fiducial points to only include those corresponding to beats for which a full set of points is available
320 | off = ons[1:]
321 | ons = ons[:-1]
322 | if pks[0] < ons[0]:
323 | pks = pks[1:]
324 | if pks[-1] > off[-1]:
325 | pks = pks[:-1]
326 |
327 | # Visualise results
328 | if vis == True:
329 | fig, (ax1,ax2,ax3,ax4) = plt.subplots(4, 1, sharex = True, sharey = False, figsize=(10,10))
330 | fig.suptitle('Fiducial points')
331 |
332 | ax1.plot(x, color = 'black')
333 | ax1.scatter(pks, x[pks.astype(int)], color = 'orange', label = 'pks')
334 | ax1.scatter(ons, x[ons.astype(int)], color = 'green', label = 'ons')
335 | ax1.scatter(off, x[off.astype(int)], marker = '*', color = 'green', label = 'off')
336 | ax1.scatter(dia, x[dia.astype(int)], color = 'yellow', label = 'dia')
337 | ax1.scatter(dic, x[dic.astype(int)], color = 'blue', label = 'dic')
338 | ax1.scatter(tip, x[tip.astype(int)], color = 'purple', label = 'dic')
339 | ax1.legend()
340 | ax1.set_ylabel('x')
341 |
342 | ax2.plot(d1x, color = 'black')
343 | ax2.scatter(m1d, d1x[m1d.astype(int)], color = 'orange', label = 'm1d')
344 | ax2.legend()
345 | ax2.set_ylabel('d1x')
346 |
347 | ax3.plot(d2x, color = 'black')
348 | ax3.scatter(a2d, d2x[a2d.astype(int)], color = 'orange', label = 'a')
349 | ax3.scatter(b2d, d2x[b2d.astype(int)], color = 'green', label = 'b')
350 | ax3.scatter(c2d, d2x[c2d.astype(int)], color = 'yellow', label = 'c')
351 | ax3.scatter(d2d, d2x[d2d.astype(int)], color = 'blue', label = 'd')
352 | ax3.scatter(e2d, d2x[e2d.astype(int)], color = 'purple', label = 'e')
353 | ax3.legend()
354 | ax3.set_ylabel('d2x')
355 |
356 | ax4.plot(d3x, color = 'black')
357 | ax4.scatter(p1p, d3x[p1p.astype(int)], color = 'orange', label = 'p1')
358 | ax4.scatter(p2p, d3x[p2p.astype(int)], color = 'green', label = 'p2')
359 | ax4.legend()
360 | ax4.set_ylabel('d3x')
361 |
362 | plt.subplots_adjust(left = 0.1,
363 | bottom = 0.1,
364 | right = 0.9,
365 | top = 0.9,
366 | wspace = 0.4,
367 | hspace = 0.4)
368 |
369 | # Creation of dictionary
370 | fidp = {'pks': pks.astype(int),
371 | 'ons': ons.astype(int),
372 | 'off': off.astype(int), # Added by PC
373 | 'tip': tip.astype(int),
374 | 'dia': dia.astype(int),
375 | 'dic': dic.astype(int),
376 | 'm1d': m1d.astype(int),
377 | 'a2d': a2d.astype(int),
378 | 'b2d': b2d.astype(int),
379 | 'c2d': c2d.astype(int),
380 | 'd2d': d2d.astype(int),
381 | 'e2d': e2d.astype(int),
382 | 'bmag2d': bmag2d,
383 | 'cmag2d': cmag2d,
384 | 'dmag2d': dmag2d,
385 | 'emag2d': emag2d,
386 | 'p1p': p1p.astype(int),
387 | 'p2p': p2p.astype(int)
388 | }
389 |
390 | return fidp
391 |
--------------------------------------------------------------------------------
/content/tutorial/notebooks/data-extraction.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "5d037743",
6 | "metadata": {
7 | "id": "5d037743"
8 | },
9 | "source": [
10 | "# Data Extraction\n",
11 | "\n",
12 | "In this tutorial we'll extract data from the MIMIC-IV Waveform Database.\n",
13 | "\n",
14 | "Our **objectives** are to:\n",
15 | "- Extract signals from one segment of a record.\n",
16 | "- Limit the segment to only the required duration of relevant signals (_i.e._ 10 min of photoplethysmography and blood pressure signals)."
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "id": "fe20dd08",
22 | "metadata": {
23 | "id": "fe20dd08"
24 | },
25 | "source": [
26 | "\n",
27 | "
Context:\n",
28 | " In the Data Exploration tutorial we learnt how to identify segments of waveform data which are suitable for a particular research study (i.e. which have the required duration of the required signals). We extracted metadata for such a segment, providing high-level details of what is contained in the segment (e.g. which signals, their sampling frequency, and their duration). Now we will go a step further to extract signals for analysis.
\n",
29 | "
"
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "id": "fd8a0055",
35 | "metadata": {
36 | "id": "fd8a0055"
37 | },
38 | "source": [
39 | "---\n",
40 | "## Setup\n",
41 | ""
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "id": "f4e37777",
48 | "metadata": {
49 | "id": "f4e37777"
50 | },
51 | "source": [
52 | "- Specify the required Python packages"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 3,
58 | "id": "10fdf08b",
59 | "metadata": {
60 | "id": "10fdf08b"
61 | },
62 | "outputs": [],
63 | "source": [
64 | "import sys\n",
65 | "from pathlib import Path"
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "id": "ccce3426",
71 | "metadata": {
72 | "id": "ccce3426"
73 | },
74 | "source": [
75 | "- Install and import the WFDB toolbox"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": 4,
81 | "id": "06c8cc1f",
82 | "metadata": {
83 | "id": "06c8cc1f",
84 | "outputId": "747c5f42-e691-4981-fb53-c6f38007e456",
85 | "colab": {
86 | "base_uri": "https://localhost:8080/"
87 | }
88 | },
89 | "outputs": [
90 | {
91 | "output_type": "stream",
92 | "name": "stdout",
93 | "text": [
94 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
95 | "Requirement already satisfied: wfdb==4.0.0 in /usr/local/lib/python3.7/dist-packages (4.0.0)\n",
96 | "Requirement already satisfied: SoundFile<0.12.0,>=0.10.0 in /usr/local/lib/python3.7/dist-packages (from wfdb==4.0.0) (0.10.3.post1)\n",
97 | "Requirement already satisfied: requests<3.0.0,>=2.8.1 in /usr/local/lib/python3.7/dist-packages (from wfdb==4.0.0) (2.23.0)\n",
98 | "Requirement already satisfied: pandas<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from wfdb==4.0.0) (1.3.5)\n",
99 | "Requirement already satisfied: scipy<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from wfdb==4.0.0) (1.4.1)\n",
100 | "Requirement already satisfied: matplotlib<4.0.0,>=3.2.2 in /usr/local/lib/python3.7/dist-packages (from wfdb==4.0.0) (3.2.2)\n",
101 | "Requirement already satisfied: numpy<2.0.0,>=1.10.1 in /usr/local/lib/python3.7/dist-packages (from wfdb==4.0.0) (1.21.6)\n",
102 | "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib<4.0.0,>=3.2.2->wfdb==4.0.0) (2.8.2)\n",
103 | "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib<4.0.0,>=3.2.2->wfdb==4.0.0) (1.4.3)\n",
104 | "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib<4.0.0,>=3.2.2->wfdb==4.0.0) (3.0.9)\n",
105 | "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib<4.0.0,>=3.2.2->wfdb==4.0.0) (0.11.0)\n",
106 | "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib<4.0.0,>=3.2.2->wfdb==4.0.0) (4.1.1)\n",
107 | "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas<2.0.0,>=1.0.0->wfdb==4.0.0) (2022.1)\n",
108 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib<4.0.0,>=3.2.2->wfdb==4.0.0) (1.15.0)\n",
109 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.8.1->wfdb==4.0.0) (2022.6.15)\n",
110 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.8.1->wfdb==4.0.0) (1.24.3)\n",
111 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.8.1->wfdb==4.0.0) (3.0.4)\n",
112 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.8.1->wfdb==4.0.0) (2.10)\n",
113 | "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.7/dist-packages (from SoundFile<0.12.0,>=0.10.0->wfdb==4.0.0) (1.15.0)\n",
114 | "Requirement already satisfied: pycparser in /usr/local/lib/python3.7/dist-packages (from cffi>=1.0->SoundFile<0.12.0,>=0.10.0->wfdb==4.0.0) (2.21)\n"
115 | ]
116 | }
117 | ],
118 | "source": [
119 | "!pip install wfdb==4.0.0\n",
120 | "import wfdb"
121 | ]
122 | },
123 | {
124 | "cell_type": "markdown",
125 | "id": "524ed046",
126 | "metadata": {
127 | "id": "524ed046"
128 | },
129 | "source": [
130 | "- Specify the settings for the MIMIC-IV database"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": 5,
136 | "id": "2915e121",
137 | "metadata": {
138 | "id": "2915e121"
139 | },
140 | "outputs": [],
141 | "source": [
142 | "# The name of the MIMIC-IV Waveform Database on PhysioNet\n",
143 | "database_name = 'mimic4wdb/0.1.0'"
144 | ]
145 | },
146 | {
147 | "cell_type": "markdown",
148 | "id": "3ea79319",
149 | "metadata": {
150 | "id": "3ea79319"
151 | },
152 | "source": [
153 | "- Provide a list of segments which meet the requirements for the study (NB: these are copied from the end of the [Data Exploration Tutorial](https://wfdb.io/mimic_wfdb_tutorials/tutorial/notebooks/data-exploration.html))."
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 6,
159 | "id": "0ee58931",
160 | "metadata": {
161 | "id": "0ee58931"
162 | },
163 | "outputs": [],
164 | "source": [
165 | "segment_names = ['83404654_0005']\n",
166 | "segment_dirs = ['mimic4wdb/0.1.0/waves/p100/p10020306/83404654']"
167 | ]
168 | },
169 | {
170 | "cell_type": "markdown",
171 | "id": "0e90110a",
172 | "metadata": {
173 | "id": "0e90110a"
174 | },
175 | "source": [
176 | "- Specify a segment from which to extract data"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": 9,
182 | "id": "05fb68d0",
183 | "metadata": {
184 | "id": "05fb68d0",
185 | "outputId": "776068c7-a586-4f0a-a6b3-3154bde5459a",
186 | "colab": {
187 | "base_uri": "https://localhost:8080/"
188 | }
189 | },
190 | "outputs": [
191 | {
192 | "output_type": "stream",
193 | "name": "stdout",
194 | "text": [
195 | "Specified segment '83404654_0005' in directory: 'mimic4wdb/0.1.0/waves/p100/p10020306/83404654'\n"
196 | ]
197 | }
198 | ],
199 | "source": [
200 | "rel_segment_no = 0\n",
201 | "rel_segment_name = segment_names[rel_segment_no]\n",
202 | "rel_segment_dir = segment_dirs[rel_segment_no]\n",
203 | "print(f\"Specified segment '{rel_segment_name}' in directory: '{rel_segment_dir}'\")"
204 | ]
205 | },
206 | {
207 | "cell_type": "markdown",
208 | "id": "d00513bd",
209 | "metadata": {
210 | "id": "d00513bd"
211 | },
212 | "source": [
213 | "\n",
214 | "
Extension: Have a look at the files which make up this record here (NB: you will need to scroll to the bottom of the page).
\n",
215 | "
"
216 | ]
217 | },
218 | {
219 | "cell_type": "markdown",
220 | "id": "3b2e6adb",
221 | "metadata": {
222 | "id": "3b2e6adb"
223 | },
224 | "source": [
225 | "---\n",
226 | "## Extract data for this segment"
227 | ]
228 | },
229 | {
230 | "cell_type": "markdown",
231 | "id": "e8810358",
232 | "metadata": {
233 | "id": "e8810358"
234 | },
235 | "source": [
236 | "- Use the [`rdrecord`](https://wfdb.readthedocs.io/en/latest/io.html#wfdb.io.rdrecord) function from the WFDB toolbox to read the data for this segment."
237 | ]
238 | },
239 | {
240 | "cell_type": "code",
241 | "execution_count": 8,
242 | "id": "8626ebac",
243 | "metadata": {
244 | "id": "8626ebac",
245 | "outputId": "8c0d3d8e-fb01-4a3a-e75f-6502941fad70",
246 | "colab": {
247 | "base_uri": "https://localhost:8080/"
248 | }
249 | },
250 | "outputs": [
251 | {
252 | "output_type": "stream",
253 | "name": "stdout",
254 | "text": [
255 | "Data loaded from segment: 83404654_0005\n"
256 | ]
257 | }
258 | ],
259 | "source": [
260 | "segment_data = wfdb.rdrecord(record_name=rel_segment_name, pn_dir=rel_segment_dir) \n",
261 | "print(f\"Data loaded from segment: {rel_segment_name}\")"
262 | ]
263 | },
264 | {
265 | "cell_type": "markdown",
266 | "id": "5032d6c4",
267 | "metadata": {
268 | "id": "5032d6c4"
269 | },
270 | "source": [
271 | "- Look at class type of the object in which the data are stored:"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": 10,
277 | "id": "967fa4ef",
278 | "metadata": {
279 | "id": "967fa4ef",
280 | "outputId": "9e9b7857-dfd6-470c-9722-9a3a196687c3",
281 | "colab": {
282 | "base_uri": "https://localhost:8080/"
283 | }
284 | },
285 | "outputs": [
286 | {
287 | "output_type": "stream",
288 | "name": "stdout",
289 | "text": [
290 | "Data stored in class of type: \n"
291 | ]
292 | }
293 | ],
294 | "source": [
295 | "print(f\"Data stored in class of type: {type(segment_data)}\")"
296 | ]
297 | },
298 | {
299 | "cell_type": "markdown",
300 | "id": "cf2d5ed7",
301 | "metadata": {
302 | "id": "cf2d5ed7"
303 | },
304 | "source": [
305 | "\n",
306 | "
Resource: You can find out more about the class representing single segment WFDB records here.
\n",
307 | "
"
308 | ]
309 | },
310 | {
311 | "cell_type": "markdown",
312 | "id": "85a0d656",
313 | "metadata": {
314 | "id": "85a0d656"
315 | },
316 | "source": [
317 | "- Find out about the signals which have been extracted"
318 | ]
319 | },
320 | {
321 | "cell_type": "code",
322 | "execution_count": 13,
323 | "id": "6d5416b6",
324 | "metadata": {
325 | "id": "6d5416b6",
326 | "outputId": "e72883c0-0675-4e32-f814-a2bc84e03259",
327 | "colab": {
328 | "base_uri": "https://localhost:8080/"
329 | }
330 | },
331 | "outputs": [
332 | {
333 | "output_type": "stream",
334 | "name": "stdout",
335 | "text": [
336 | "This segment contains waveform data for the following 6 signals: ['II', 'V', 'aVR', 'ABP', 'Pleth', 'Resp']\n",
337 | "The signals are sampled at a base rate of 62.4725 Hz (and some are sampled at multiples of this)\n",
338 | "They last for 52.4 minutes\n"
339 | ]
340 | }
341 | ],
342 | "source": [
343 | "print(f\"This segment contains waveform data for the following {segment_data.n_sig} signals: {segment_data.sig_name}\")\n",
344 | "print(f\"The signals are sampled at a base rate of {segment_data.fs} Hz (and some are sampled at multiples of this)\")\n",
345 | "print(f\"They last for {segment_data.sig_len/(60*segment_data.fs):.1f} minutes\")"
346 | ]
347 | },
348 | {
349 | "cell_type": "markdown",
350 | "id": "0d40fab4",
351 | "metadata": {
352 | "id": "0d40fab4"
353 | },
354 | "source": [
355 | "\n",
356 | "
Question: Can you find out which signals are sampled at multiples of the base sampling frequency by looking at the following contents of the 'segment_data' variable?
\n",
357 | "
"
358 | ]
359 | },
360 | {
361 | "cell_type": "code",
362 | "execution_count": 16,
363 | "id": "b0903fcf",
364 | "metadata": {
365 | "id": "b0903fcf",
366 | "outputId": "cee5ffd3-aaf5-4e46-9b3e-af94a844a9da",
367 | "colab": {
368 | "base_uri": "https://localhost:8080/"
369 | }
370 | },
371 | "outputs": [
372 | {
373 | "output_type": "stream",
374 | "name": "stdout",
375 | "text": [
376 | "{'adc_gain': [200.0, 200.0, 200.0, 16.0, 4096.0, 4093.0],\n",
377 | " 'adc_res': [14, 14, 14, 13, 12, 12],\n",
378 | " 'adc_zero': [8192, 8192, 8192, 4096, 2048, 2048],\n",
379 | " 'base_counter': 10219520.0,\n",
380 | " 'base_date': None,\n",
381 | " 'base_time': None,\n",
382 | " 'baseline': [8192, 8192, 8192, 800, 0, 2],\n",
383 | " 'block_size': [0, 0, 0, 0, 0, 0],\n",
384 | " 'byte_offset': [None, None, None, None, None, None],\n",
385 | " 'checksum': [10167, 1300, 56956, 35887, 29987, 21750],\n",
386 | " 'comments': ['signal 0 (II): channel=0 bandpass=[0.5,35]',\n",
387 | " 'signal 1 (V): channel=1 bandpass=[0.5,35]',\n",
388 | " 'signal 2 (aVR): channel=2 bandpass=[0.5,35]'],\n",
389 | " 'counter_freq': 999.56,\n",
390 | " 'd_signal': None,\n",
391 | " 'e_d_signal': None,\n",
392 | " 'e_p_signal': None,\n",
393 | " 'file_name': ['83404654_0005e.dat',\n",
394 | " '83404654_0005e.dat',\n",
395 | " '83404654_0005e.dat',\n",
396 | " '83404654_0005p.dat',\n",
397 | " '83404654_0005p.dat',\n",
398 | " '83404654_0005r.dat'],\n",
399 | " 'fmt': ['516', '516', '516', '516', '516', '516'],\n",
400 | " 'fs': 62.4725,\n",
401 | " 'init_value': [0, 0, 0, 0, 0, 0],\n",
402 | " 'n_sig': 6,\n",
403 | " 'p_signal': array([[ 0.00000000e+00, -6.50000000e-02, -5.00000000e-03,\n",
404 | " nan, 5.02929688e-01, 1.56120205e-01],\n",
405 | " [ 5.00000000e-03, -4.50000000e-02, -5.00000000e-03,\n",
406 | " nan, 5.02929688e-01, 1.56853164e-01],\n",
407 | " [ 1.50000000e-02, -2.50000000e-02, 5.00000000e-03,\n",
408 | " nan, 5.02929688e-01, 1.57097484e-01],\n",
409 | " ...,\n",
410 | " [-1.50000000e-02, 7.00000000e-02, -4.00000000e-02,\n",
411 | " 7.25000000e+01, 5.74951172e-01, 3.57683850e-01],\n",
412 | " [-1.50000000e-02, 5.50000000e-02, -4.50000000e-02,\n",
413 | " 7.25000000e+01, 5.70800781e-01, 3.61104324e-01],\n",
414 | " [ 0.00000000e+00, 9.00000000e-02, -5.50000000e-02,\n",
415 | " 7.25000000e+01, 5.62255859e-01, 3.63791840e-01]]),\n",
416 | " 'record_name': '83404654_0005',\n",
417 | " 'samps_per_frame': [4, 4, 4, 2, 2, 1],\n",
418 | " 'sig_len': 196480,\n",
419 | " 'sig_name': ['II', 'V', 'aVR', 'ABP', 'Pleth', 'Resp'],\n",
420 | " 'skew': [None, None, None, None, None, None],\n",
421 | " 'units': ['mV', 'mV', 'mV', 'mmHg', 'NU', 'Ohm']}\n"
422 | ]
423 | }
424 | ],
425 | "source": [
426 | "from pprint import pprint\n",
427 | "pprint(vars(segment_data))"
428 | ]
429 | },
430 | {
431 | "cell_type": "code",
432 | "source": [
433 | ""
434 | ],
435 | "metadata": {
436 | "id": "gKtupgmahzpt"
437 | },
438 | "id": "gKtupgmahzpt",
439 | "execution_count": null,
440 | "outputs": []
441 | }
442 | ],
443 | "metadata": {
444 | "kernelspec": {
445 | "display_name": "Python 3",
446 | "language": "python",
447 | "name": "python3"
448 | },
449 | "language_info": {
450 | "codemirror_mode": {
451 | "name": "ipython",
452 | "version": 3
453 | },
454 | "file_extension": ".py",
455 | "mimetype": "text/x-python",
456 | "name": "python",
457 | "nbconvert_exporter": "python",
458 | "pygments_lexer": "ipython3",
459 | "version": "3.8.8"
460 | },
461 | "toc": {
462 | "base_numbering": 1,
463 | "nav_menu": {},
464 | "number_sections": true,
465 | "sideBar": true,
466 | "skip_h1_title": true,
467 | "title_cell": "Table of Contents",
468 | "title_sidebar": "Contents",
469 | "toc_cell": false,
470 | "toc_position": {},
471 | "toc_section_display": true,
472 | "toc_window_display": false
473 | },
474 | "colab": {
475 | "name": "data-extraction.ipynb",
476 | "provenance": []
477 | }
478 | },
479 | "nbformat": 4,
480 | "nbformat_minor": 5
481 | }
--------------------------------------------------------------------------------
/content/tutorial/notebooks/data-exploration.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "5d037743",
6 | "metadata": {
7 | "id": "5d037743"
8 | },
9 | "source": [
10 | "# Data Exploration"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "id": "fbae8e9b",
16 | "metadata": {
17 | "id": "fbae8e9b"
18 | },
19 | "source": [
20 | "Let's begin by exploring data in the MIMIC Waveform Database.\n",
21 | "\n",
22 | "Our **objectives** are to:\n",
23 | "- Review the structure of the MIMIC Waveform Database (considering subjects, studies, records, and segments).\n",
24 | "- Load waveforms using the WFDB toolbox.\n",
25 | "- Find out which signals are present in selected records and segments, and how long the signals last.\n",
26 | "- Search for records that contain signals of interest."
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "id": "0b240726",
32 | "metadata": {
33 | "id": "0b240726"
34 | },
35 | "source": [
36 | "\n",
37 | "
Resource: You can find out more about the MIMIC Waveform Database here.
\n",
38 | "
"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "id": "28b8e213",
44 | "metadata": {
45 | "id": "28b8e213"
46 | },
47 | "source": [
48 | "---\n",
49 | "## Setup"
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "id": "5dac032e",
55 | "metadata": {
56 | "id": "5dac032e"
57 | },
58 | "source": [
59 | "### Specify the required Python packages\n",
60 | "We'll import the following:\n",
61 | "- _sys_: an essential python package\n",
62 | "- _pathlib_ (well a particular function from _pathlib_, called _Path_)"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": null,
68 | "id": "ce3cdfde",
69 | "metadata": {
70 | "id": "ce3cdfde"
71 | },
72 | "outputs": [],
73 | "source": [
74 | "import sys\n",
75 | "from pathlib import Path"
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "id": "9976c5e4",
81 | "metadata": {
82 | "id": "9976c5e4"
83 | },
84 | "source": [
85 | "### Specify a particular version of the WFDB Toolbox"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "id": "6533154b",
91 | "metadata": {
92 | "id": "6533154b"
93 | },
94 | "source": [
95 | "- _wfdb_: For this workshop we will be using version 4 of the WaveForm DataBase (WFDB) Toolbox package. The package contains tools for processing waveform data such as those found in MIMIC:"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": null,
101 | "id": "5fdfa989",
102 | "metadata": {
103 | "id": "5fdfa989"
104 | },
105 | "outputs": [],
106 | "source": [
107 | "!pip install wfdb==4.0.0\n",
108 | "import wfdb"
109 | ]
110 | },
111 | {
112 | "cell_type": "markdown",
113 | "id": "e11ce5b6",
114 | "metadata": {
115 | "id": "e11ce5b6"
116 | },
117 | "source": [
118 | "\n",
119 | "
Resource: You can find out more about the WFDB package here.
\n",
120 | "
"
121 | ]
122 | },
123 | {
124 | "cell_type": "markdown",
125 | "id": "d492e49f",
126 | "metadata": {
127 | "id": "d492e49f"
128 | },
129 | "source": [
130 | "Now that we have imported these packages (_i.e._ toolboxes) we have a set of tools (functions) ready to use."
131 | ]
132 | },
133 | {
134 | "cell_type": "markdown",
135 | "id": "e7d38297",
136 | "metadata": {
137 | "id": "e7d38297"
138 | },
139 | "source": [
140 | "### Specify the name of the MIMIC Waveform Database"
141 | ]
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "id": "68491718",
146 | "metadata": {
147 | "id": "68491718"
148 | },
149 | "source": [
150 | "- Specify the name of the MIMIC IV Waveform Database on Physionet, which comes from the URL: https://physionet.org/content/mimic4wdb/0.1.0/"
151 | ]
152 | },
153 | {
154 | "cell_type": "code",
155 | "execution_count": null,
156 | "id": "982b8154",
157 | "metadata": {
158 | "id": "982b8154"
159 | },
160 | "outputs": [],
161 | "source": [
162 | "database_name = 'mimic4wdb/0.1.0'"
163 | ]
164 | },
165 | {
166 | "cell_type": "markdown",
167 | "id": "e49196a6",
168 | "metadata": {
169 | "id": "e49196a6"
170 | },
171 | "source": [
172 | "---\n",
173 | "## Identify the records in the database"
174 | ]
175 | },
176 | {
177 | "cell_type": "markdown",
178 | "id": "b476f9b7",
179 | "metadata": {
180 | "id": "b476f9b7"
181 | },
182 | "source": [
183 | "### Get a list of records\n",
184 | "\n",
185 | "- Use the [`get_record_list`](https://wfdb.readthedocs.io/en/latest/io.html#wfdb.io.get_record_list) function from the WFDB toolbox to get a list of records in the database."
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": null,
191 | "id": "d91aa6a7",
192 | "metadata": {
193 | "id": "d91aa6a7",
194 | "outputId": "db8e3169-76ac-4bdd-bbaa-91cf626c1a6b",
195 | "colab": {
196 | "base_uri": "https://localhost:8080/"
197 | }
198 | },
199 | "outputs": [
200 | {
201 | "output_type": "stream",
202 | "name": "stdout",
203 | "text": [
204 | "The 'mimic4wdb/0.1.0' database contains data from 198 subjects\n"
205 | ]
206 | }
207 | ],
208 | "source": [
209 | "# each subject may be associated with multiple records\n",
210 | "subjects = wfdb.get_record_list(database_name)\n",
211 | "print(f\"The '{database_name}' database contains data from {len(subjects)} subjects\")\n",
212 | "\n",
213 | "# set max number of records to load\n",
214 | "max_records_to_load = 200"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "source": [
220 | "# iterate the subjects to get a list of records\n",
221 | "records = []\n",
222 | "for subject in subjects:\n",
223 | " studies = wfdb.get_record_list(f'{database_name}/{subject}')\n",
224 | " for study in studies:\n",
225 | " records.append(Path(f'{subject}{study}'))\n",
226 | " # stop if we've loaded enough records\n",
227 | " if len(records) >= max_records_to_load:\n",
228 | " print(\"Reached maximum required number of records.\")\n",
229 | " break\n",
230 | "\n",
231 | "print(f\"Loaded {len(records)} records from the '{database_name}' database.\")"
232 | ],
233 | "metadata": {
234 | "id": "0RzQmqjiQ9LD",
235 | "outputId": "31eb6067-de92-4424-b32b-f292623215a5",
236 | "colab": {
237 | "base_uri": "https://localhost:8080/"
238 | }
239 | },
240 | "id": "0RzQmqjiQ9LD",
241 | "execution_count": null,
242 | "outputs": [
243 | {
244 | "output_type": "stream",
245 | "name": "stdout",
246 | "text": [
247 | "Reached maximum required number of records.\n",
248 | "Loaded 200 records from the 'mimic4wdb/0.1.0' database.\n"
249 | ]
250 | }
251 | ]
252 | },
253 | {
254 | "cell_type": "markdown",
255 | "id": "fc82d67e",
256 | "metadata": {
257 | "id": "fc82d67e"
258 | },
259 | "source": [
260 | "### Look at the records"
261 | ]
262 | },
263 | {
264 | "cell_type": "markdown",
265 | "id": "29552f5a",
266 | "metadata": {
267 | "id": "29552f5a"
268 | },
269 | "source": [
270 | "- Display the first few records"
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": null,
276 | "id": "bb5745a7",
277 | "metadata": {
278 | "id": "bb5745a7",
279 | "outputId": "8fe32e59-c542-4a40-bd06-0c04fdcfbbfe",
280 | "colab": {
281 | "base_uri": "https://localhost:8080/"
282 | }
283 | },
284 | "outputs": [
285 | {
286 | "output_type": "stream",
287 | "name": "stdout",
288 | "text": [
289 | "First five records: \n",
290 | " - waves/p100/p10014354/81739927/81739927\n",
291 | " - waves/p100/p10019003/87033314/87033314\n",
292 | " - waves/p100/p10020306/83404654/83404654\n",
293 | " - waves/p100/p10039708/83411188/83411188\n",
294 | " - waves/p100/p10039708/85583557/85583557\n",
295 | "\n",
296 | "Note the formatting of these records:\n",
297 | " - intermediate directory ('p100' in this case)\n",
298 | " - subject identifier (e.g. 'p10014354')\n",
299 | " - record identifier (e.g. '81739927'\n",
300 | " \n"
301 | ]
302 | }
303 | ],
304 | "source": [
305 | "# format and print first five records\n",
306 | "first_five_records = [str(x) for x in records[0:5]]\n",
307 | "first_five_records = \"\\n - \".join(first_five_records)\n",
308 | "print(f\"First five records: \\n - {first_five_records}\")\n",
309 | "\n",
310 | "print(\"\"\"\n",
311 | "Note the formatting of these records:\n",
312 | " - intermediate directory ('p100' in this case)\n",
313 | " - subject identifier (e.g. 'p10014354')\n",
314 | " - record identifier (e.g. '81739927'\n",
315 | " \"\"\")"
316 | ]
317 | },
318 | {
319 | "cell_type": "markdown",
320 | "id": "b56c29d5",
321 | "metadata": {
322 | "id": "b56c29d5"
323 | },
324 | "source": [
325 | "\n",
326 | "
Q: Can you print the names of the last five records?
Hint: in Python, the last five elements can be specified using '[-5:]'
\n",
327 | "
"
328 | ]
329 | },
330 | {
331 | "cell_type": "markdown",
332 | "id": "cb21a93b",
333 | "metadata": {
334 | "id": "cb21a93b"
335 | },
336 | "source": [
337 | "---\n",
338 | "## Extract metadata for a record"
339 | ]
340 | },
341 | {
342 | "cell_type": "markdown",
343 | "id": "c39dc9f3",
344 | "metadata": {
345 | "id": "c39dc9f3"
346 | },
347 | "source": [
348 | "Each record contains metadata stored in a header file, named \"`.hea`\""
349 | ]
350 | },
351 | {
352 | "cell_type": "markdown",
353 | "id": "3b2e6adb",
354 | "metadata": {
355 | "id": "3b2e6adb"
356 | },
357 | "source": [
358 | "### Specify the online directory containing a record's data"
359 | ]
360 | },
361 | {
362 | "cell_type": "code",
363 | "execution_count": null,
364 | "id": "86eed39f",
365 | "metadata": {
366 | "id": "86eed39f",
367 | "outputId": "5cfa40d0-b4d4-4605-b677-164d9b603f90",
368 | "colab": {
369 | "base_uri": "https://localhost:8080/"
370 | }
371 | },
372 | "outputs": [
373 | {
374 | "output_type": "stream",
375 | "name": "stdout",
376 | "text": [
377 | "PhysioNet directory specified for record: mimic4wdb/0.1.0/waves/p100/p10039708/83411188\n"
378 | ]
379 | }
380 | ],
381 | "source": [
382 | "# Specify the 4th record (note, in Python indexing begins at 0)\n",
383 | "idx = 3\n",
384 | "record = records[idx]\n",
385 | "record_dir = f'{database_name}/{record.parent}'\n",
386 | "print(\"PhysioNet directory specified for record: {}\".format(record_dir))"
387 | ]
388 | },
389 | {
390 | "cell_type": "markdown",
391 | "id": "b5220ad3",
392 | "metadata": {
393 | "id": "b5220ad3"
394 | },
395 | "source": [
396 | "### Specify the subject identifier"
397 | ]
398 | },
399 | {
400 | "cell_type": "markdown",
401 | "id": "d7a5bbef",
402 | "metadata": {
403 | "id": "d7a5bbef"
404 | },
405 | "source": [
406 | "Extract the record name (e.g. '83411188') from the record (e.g. 'p100/p10039708/83411188/83411188'):"
407 | ]
408 | },
409 | {
410 | "cell_type": "code",
411 | "execution_count": null,
412 | "id": "b4bc247b",
413 | "metadata": {
414 | "id": "b4bc247b",
415 | "outputId": "a74ca902-ca05-496a-fd5d-2dbb0d95f998",
416 | "colab": {
417 | "base_uri": "https://localhost:8080/"
418 | }
419 | },
420 | "outputs": [
421 | {
422 | "output_type": "stream",
423 | "name": "stdout",
424 | "text": [
425 | "Record name: 83411188\n"
426 | ]
427 | }
428 | ],
429 | "source": [
430 | "record_name = record.name\n",
431 | "print(\"Record name: {}\".format(record_name))"
432 | ]
433 | },
434 | {
435 | "cell_type": "markdown",
436 | "id": "742071da",
437 | "metadata": {
438 | "id": "742071da"
439 | },
440 | "source": [
441 | "### Load the metadata for this record\n",
442 | "- Use the [`rdheader`](https://wfdb.readthedocs.io/en/latest/io.html#wfdb.io.rdheader) function from the WFDB toolbox to load metadata from the record header file"
443 | ]
444 | },
445 | {
446 | "cell_type": "code",
447 | "execution_count": null,
448 | "id": "c5a0afc5",
449 | "metadata": {
450 | "id": "c5a0afc5",
451 | "outputId": "13b3dfa2-d489-4a77-c07d-a5116d67b4ec",
452 | "colab": {
453 | "base_uri": "https://localhost:8080/"
454 | }
455 | },
456 | "outputs": [
457 | {
458 | "output_type": "stream",
459 | "name": "stdout",
460 | "text": [
461 | "Done: metadata loaded for record '83411188' from the header file at:\n",
462 | "https://physionet.org/content/mimic4wdb/0.1.0/waves/p100/p10039708/83411188/83411188.hea\n"
463 | ]
464 | }
465 | ],
466 | "source": [
467 | "record_data = wfdb.rdheader(record_name, pn_dir=record_dir, rd_segments=True)\n",
468 | "remote_url = \"https://physionet.org/content/\" + record_dir + \"/\" + record_name + \".hea\"\n",
469 | "print(f\"Done: metadata loaded for record '{record_name}' from the header file at:\\n{remote_url}\")"
470 | ]
471 | },
472 | {
473 | "cell_type": "markdown",
474 | "id": "f7a4d25d",
475 | "metadata": {
476 | "id": "f7a4d25d"
477 | },
478 | "source": [
479 | "---\n",
480 | "## Inspect details of physiological signals recorded in this record\n",
481 | "- Printing a few details of the signals from the extracted metadata"
482 | ]
483 | },
484 | {
485 | "cell_type": "code",
486 | "execution_count": null,
487 | "id": "58630149",
488 | "metadata": {
489 | "id": "58630149",
490 | "outputId": "e19d66b1-690c-4cc5-c754-c4b5d1b16d38",
491 | "colab": {
492 | "base_uri": "https://localhost:8080/"
493 | }
494 | },
495 | "outputs": [
496 | {
497 | "output_type": "stream",
498 | "name": "stdout",
499 | "text": [
500 | "- Number of signals: 6\n",
501 | "- Duration: 14.2 hours\n",
502 | "- Base sampling frequency: 62.4725 Hz\n"
503 | ]
504 | }
505 | ],
506 | "source": [
507 | "print(f\"- Number of signals: {record_data.n_sig}\".format())\n",
508 | "print(f\"- Duration: {record_data.sig_len/(record_data.fs*60*60):.1f} hours\") \n",
509 | "print(f\"- Base sampling frequency: {record_data.fs} Hz\")"
510 | ]
511 | },
512 | {
513 | "cell_type": "markdown",
514 | "id": "7b3da17f",
515 | "metadata": {
516 | "id": "7b3da17f"
517 | },
518 | "source": [
519 | "---\n",
520 | "## Inspect the segments making up a record\n",
521 | "Each record is typically made up of several segments"
522 | ]
523 | },
524 | {
525 | "cell_type": "code",
526 | "execution_count": null,
527 | "id": "b127c857",
528 | "metadata": {
529 | "id": "b127c857",
530 | "outputId": "4fe5a2b3-b95b-4bbe-db18-fabb199f0584",
531 | "colab": {
532 | "base_uri": "https://localhost:8080/"
533 | }
534 | },
535 | "outputs": [
536 | {
537 | "output_type": "stream",
538 | "name": "stdout",
539 | "text": [
540 | "The 6 segments from record 83411188 are:\n",
541 | "['83411188_0000', '83411188_0001', '83411188_0002', '83411188_0003', '83411188_0004', '83411188_0005']\n"
542 | ]
543 | }
544 | ],
545 | "source": [
546 | "segments = record_data.seg_name\n",
547 | "print(f\"The {len(segments)} segments from record {record_name} are:\\n{segments}\")"
548 | ]
549 | },
550 | {
551 | "cell_type": "markdown",
552 | "id": "b379eaaf",
553 | "metadata": {
554 | "id": "b379eaaf"
555 | },
556 | "source": [
557 | "The format of filename for each segment is: `record directory, \"_\", segment number`"
558 | ]
559 | },
560 | {
561 | "cell_type": "markdown",
562 | "id": "f19d231b",
563 | "metadata": {
564 | "id": "f19d231b"
565 | },
566 | "source": [
567 | "---\n",
568 | "## Inspect an individual segment\n",
569 | "### Read the metadata for this segment\n",
570 | "- Read the metadata from the header file"
571 | ]
572 | },
573 | {
574 | "cell_type": "code",
575 | "execution_count": null,
576 | "id": "7f70d34f",
577 | "metadata": {
578 | "id": "7f70d34f",
579 | "outputId": "d1bd96de-09d9-4cf2-fa35-1bbcb5ddced4",
580 | "colab": {
581 | "base_uri": "https://localhost:8080/"
582 | }
583 | },
584 | "outputs": [
585 | {
586 | "output_type": "stream",
587 | "name": "stdout",
588 | "text": [
589 | "Header metadata loaded for: \n",
590 | "- the segment '83411188_0001'\n",
591 | "- in record '83411188'\n",
592 | "- for subject 'p10039708'\n",
593 | "\n"
594 | ]
595 | }
596 | ],
597 | "source": [
598 | "segment_metadata = wfdb.rdheader(record_name=segments[2], pn_dir=record_dir)\n",
599 | "\n",
600 | "print(f\"\"\"Header metadata loaded for: \n",
601 | "- the segment '{segments[2]}'\n",
602 | "- in record '{record_name}'\n",
603 | "- for subject '{str(Path(record_dir).parent.parts[-1])}'\n",
604 | "\"\"\")"
605 | ]
606 | },
607 | {
608 | "cell_type": "markdown",
609 | "id": "d28771ac",
610 | "metadata": {
611 | "id": "d28771ac"
612 | },
613 | "source": [
614 | "### Find out what signals are present"
615 | ]
616 | },
617 | {
618 | "cell_type": "code",
619 | "execution_count": null,
620 | "id": "324727df",
621 | "metadata": {
622 | "id": "324727df",
623 | "outputId": "223bdb49-5023-453d-f2b7-a016a603fec9",
624 | "colab": {
625 | "base_uri": "https://localhost:8080/"
626 | }
627 | },
628 | "outputs": [
629 | {
630 | "output_type": "stream",
631 | "name": "stdout",
632 | "text": [
633 | "This segment contains the following signals: ['II', 'V', 'aVR', 'ABP', 'Pleth', 'Resp']\n",
634 | "The signals are measured in units of: ['mV', 'mV', 'mV', 'mmHg', 'NU', 'Ohm']\n"
635 | ]
636 | }
637 | ],
638 | "source": [
639 | "print(f\"This segment contains the following signals: {segment_metadata.sig_name}\")\n",
640 | "print(f\"The signals are measured in units of: {segment_metadata.units}\")"
641 | ]
642 | },
643 | {
644 | "cell_type": "markdown",
645 | "id": "f09b3f37",
646 | "metadata": {
647 | "id": "f09b3f37"
648 | },
649 | "source": [
650 | "See [here](https://archive.physionet.org/mimic2/mimic2_waveform_overview.shtml#signals-125-samplessecond) for definitions of signal abbreviations."
651 | ]
652 | },
653 | {
654 | "cell_type": "markdown",
655 | "id": "3f56dd61",
656 | "metadata": {
657 | "id": "3f56dd61"
658 | },
659 | "source": [
660 | "\n",
661 | "
Q: Which of these signals is no longer present in segment '83411188_0005'?
\n",
662 | "
"
663 | ]
664 | },
665 | {
666 | "cell_type": "markdown",
667 | "id": "9f921f27",
668 | "metadata": {
669 | "id": "9f921f27"
670 | },
671 | "source": [
672 | "### Find out how long each signal lasts"
673 | ]
674 | },
675 | {
676 | "cell_type": "markdown",
677 | "id": "d217b764",
678 | "metadata": {
679 | "id": "d217b764"
680 | },
681 | "source": [
682 | "All signals in a segment are time-aligned, measured at the same sampling frequency, and last the same duration:"
683 | ]
684 | },
685 | {
686 | "cell_type": "code",
687 | "execution_count": null,
688 | "id": "c44f00a7",
689 | "metadata": {
690 | "id": "c44f00a7",
691 | "outputId": "1cfa789e-b66b-4c8e-805b-4197c663ba18",
692 | "colab": {
693 | "base_uri": "https://localhost:8080/"
694 | }
695 | },
696 | "outputs": [
697 | {
698 | "output_type": "stream",
699 | "name": "stdout",
700 | "text": [
701 | "The signals have a base sampling frequency of 62.5 Hz\n",
702 | "and they last for 0.9 minutes\n"
703 | ]
704 | }
705 | ],
706 | "source": [
707 | "print(f\"The signals have a base sampling frequency of {segment_metadata.fs:.1f} Hz\")\n",
708 | "print(f\"and they last for {segment_metadata.sig_len/(segment_metadata.fs*60):.1f} minutes\")"
709 | ]
710 | },
711 | {
712 | "cell_type": "markdown",
713 | "id": "d2a80895",
714 | "metadata": {
715 | "id": "d2a80895"
716 | },
717 | "source": [
718 | "## Identify records suitable for analysis"
719 | ]
720 | },
721 | {
722 | "cell_type": "markdown",
723 | "id": "1a3218d3",
724 | "metadata": {
725 | "id": "1a3218d3"
726 | },
727 | "source": [
728 | "- The signals and their durations vary from one record (and segment) to the next. \n",
729 | "- Since most studies require specific types of signals (e.g. blood pressure and photoplethysmography signals), we need to be able to identify which records (or segments) contain the required signals and duration."
730 | ]
731 | },
732 | {
733 | "cell_type": "markdown",
734 | "id": "b02c0b4e",
735 | "metadata": {
736 | "id": "b02c0b4e"
737 | },
738 | "source": [
739 | "### Setup"
740 | ]
741 | },
742 | {
743 | "cell_type": "code",
744 | "execution_count": null,
745 | "id": "5bb47556",
746 | "metadata": {
747 | "id": "5bb47556"
748 | },
749 | "outputs": [],
750 | "source": [
751 | "import pandas as pd\n",
752 | "from pprint import pprint"
753 | ]
754 | },
755 | {
756 | "cell_type": "code",
757 | "execution_count": null,
758 | "id": "95181681",
759 | "metadata": {
760 | "id": "95181681",
761 | "outputId": "544c69db-59d9-432c-ee6c-10e1b0f54318",
762 | "colab": {
763 | "base_uri": "https://localhost:8080/"
764 | }
765 | },
766 | "outputs": [
767 | {
768 | "output_type": "stream",
769 | "name": "stdout",
770 | "text": [
771 | "Earlier, we loaded 200 records from the 'mimic4wdb/0.1.0' database.\n"
772 | ]
773 | }
774 | ],
775 | "source": [
776 | "print(f\"Earlier, we loaded {len(records)} records from the '{database_name}' database.\")"
777 | ]
778 | },
779 | {
780 | "cell_type": "markdown",
781 | "id": "7f2b5955",
782 | "metadata": {
783 | "id": "7f2b5955"
784 | },
785 | "source": [
786 | "### Specify requirements"
787 | ]
788 | },
789 | {
790 | "cell_type": "markdown",
791 | "id": "83f8611c",
792 | "metadata": {
793 | "id": "83f8611c"
794 | },
795 | "source": [
796 | "- Required signals"
797 | ]
798 | },
799 | {
800 | "cell_type": "code",
801 | "execution_count": null,
802 | "id": "3d1505ab",
803 | "metadata": {
804 | "id": "3d1505ab"
805 | },
806 | "outputs": [],
807 | "source": [
808 | "required_sigs = ['ABP', 'Pleth']"
809 | ]
810 | },
811 | {
812 | "cell_type": "markdown",
813 | "id": "03920810",
814 | "metadata": {
815 | "id": "03920810"
816 | },
817 | "source": [
818 | "- Required duration"
819 | ]
820 | },
821 | {
822 | "cell_type": "code",
823 | "execution_count": null,
824 | "id": "568a93c1",
825 | "metadata": {
826 | "id": "568a93c1"
827 | },
828 | "outputs": [],
829 | "source": [
830 | "# convert from minutes to seconds\n",
831 | "req_seg_duration = 10*60 "
832 | ]
833 | },
834 | {
835 | "cell_type": "markdown",
836 | "id": "d49187cd",
837 | "metadata": {
838 | "id": "d49187cd"
839 | },
840 | "source": [
841 | "### Find out how many records meet the requirements"
842 | ]
843 | },
844 | {
845 | "cell_type": "markdown",
846 | "id": "65f2cdce",
847 | "metadata": {
848 | "id": "65f2cdce"
849 | },
850 | "source": [
851 | "_NB: This step may take a while. The results are copied below to save running it yourself._"
852 | ]
853 | },
854 | {
855 | "cell_type": "code",
856 | "execution_count": null,
857 | "id": "015b47d3",
858 | "metadata": {
859 | "id": "015b47d3"
860 | },
861 | "outputs": [],
862 | "source": [
863 | "matching_recs = {'dir':[], 'seg_name':[], 'length':[]}\n",
864 | "\n",
865 | "for record in records:\n",
866 | " print('Record: {}'.format(record), end=\"\", flush=True)\n",
867 | " record_dir = f'{database_name}/{record.parent}'\n",
868 | " record_name = record.name\n",
869 | " print(' (reading data)')\n",
870 | " record_data = wfdb.rdheader(record_name,\n",
871 | " pn_dir=record_dir,\n",
872 | " rd_segments=True)\n",
873 | "\n",
874 | " # Check whether the required signals are present in the record\n",
875 | " sigs_present = record_data.sig_name\n",
876 | " if not all(x in sigs_present for x in required_sigs):\n",
877 | " print(' (missing signals)')\n",
878 | " continue\n",
879 | "\n",
880 | " # Get the segments for the record\n",
881 | " segments = record_data.seg_name\n",
882 | "\n",
883 | " # Check to see if the segment is 10 min long\n",
884 | " # If not, move to the next one\n",
885 | " gen = (segment for segment in segments if segment != '~')\n",
886 | " for segment in gen:\n",
887 | " print(' - Segment: {}'.format(segment), end=\"\", flush=True)\n",
888 | " segment_metadata = wfdb.rdheader(record_name=segment,\n",
889 | " pn_dir=record_dir)\n",
890 | " seg_length = segment_metadata.sig_len/(segment_metadata.fs)\n",
891 | "\n",
892 | " if seg_length < req_seg_duration:\n",
893 | " print(f' (too short at {seg_length/60:.1f} mins)')\n",
894 | " continue\n",
895 | "\n",
896 | " # Next check that all required signals are present in the segment\n",
897 | " sigs_present = segment_metadata.sig_name\n",
898 | " \n",
899 | " if all(x in sigs_present for x in required_sigs):\n",
900 | " matching_recs['dir'].append(record_dir)\n",
901 | " matching_recs['seg_name'].append(segment)\n",
902 | " matching_recs['length'].append(seg_length)\n",
903 | " print(' (met requirements)')\n",
904 | " # Since we only need one segment per record break out of loop\n",
905 | " break\n",
906 | " else:\n",
907 | " print(' (long enough, but missing signal(s))')\n",
908 | "\n",
909 | "print(f\"A total of {len(matching_recs['dir'])} records met the requirements:\")\n",
910 | "\n",
911 | "#df_matching_recs = pd.DataFrame(data=matching_recs)\n",
912 | "#df_matching_recs.to_csv('matching_records.csv', index=False)\n",
913 | "#p=1"
914 | ]
915 | },
916 | {
917 | "cell_type": "code",
918 | "execution_count": null,
919 | "id": "75ec15f4",
920 | "metadata": {
921 | "id": "75ec15f4",
922 | "outputId": "3ea832cd-4a4b-4265-bc2b-275d0f6c1802",
923 | "colab": {
924 | "base_uri": "https://localhost:8080/"
925 | }
926 | },
927 | "outputs": [
928 | {
929 | "output_type": "stream",
930 | "name": "stdout",
931 | "text": [
932 | "A total of 52 out of 200 records met the requirements.\n",
933 | "\n",
934 | "The relevant segment names are:\n",
935 | " - 83404654_0005\n",
936 | " - 82924339_0007\n",
937 | " - 84248019_0005\n",
938 | " - 82439920_0004\n",
939 | " - 82800131_0002\n",
940 | " - 84304393_0001\n",
941 | " - 89464742_0001\n",
942 | " - 88958796_0004\n",
943 | " - 88995377_0001\n",
944 | " - 85230771_0004\n",
945 | " - 86643930_0004\n",
946 | " - 81250824_0005\n",
947 | " - 87706224_0003\n",
948 | " - 83058614_0005\n",
949 | " - 82803505_0017\n",
950 | " - 88574629_0001\n",
951 | " - 87867111_0012\n",
952 | " - 84560969_0001\n",
953 | " - 87562386_0001\n",
954 | " - 88685937_0001\n",
955 | " - 86120311_0001\n",
956 | " - 89866183_0014\n",
957 | " - 89068160_0002\n",
958 | " - 86380383_0001\n",
959 | " - 85078610_0008\n",
960 | " - 87702634_0007\n",
961 | " - 84686667_0002\n",
962 | " - 84802706_0002\n",
963 | " - 81811182_0004\n",
964 | " - 84421559_0005\n",
965 | " - 88221516_0007\n",
966 | " - 80057524_0005\n",
967 | " - 84209926_0018\n",
968 | " - 83959636_0010\n",
969 | " - 89989722_0016\n",
970 | " - 89225487_0007\n",
971 | " - 84391267_0001\n",
972 | " - 80889556_0002\n",
973 | " - 85250558_0011\n",
974 | " - 84567505_0005\n",
975 | " - 85814172_0007\n",
976 | " - 88884866_0005\n",
977 | " - 80497954_0012\n",
978 | " - 80666640_0014\n",
979 | " - 84939605_0004\n",
980 | " - 82141753_0018\n",
981 | " - 86874920_0014\n",
982 | " - 84505262_0010\n",
983 | " - 86288257_0001\n",
984 | " - 89699401_0001\n",
985 | " - 88537698_0013\n",
986 | " - 83958172_0001\n",
987 | "\n",
988 | "The corresponding directories are: \n",
989 | " - mimic4wdb/0.1.0/waves/p100/p10020306/83404654\n",
990 | " - mimic4wdb/0.1.0/waves/p101/p10126957/82924339\n",
991 | " - mimic4wdb/0.1.0/waves/p102/p10209410/84248019\n",
992 | " - mimic4wdb/0.1.0/waves/p109/p10952189/82439920\n",
993 | " - mimic4wdb/0.1.0/waves/p111/p11109975/82800131\n",
994 | " - mimic4wdb/0.1.0/waves/p113/p11392990/84304393\n",
995 | " - mimic4wdb/0.1.0/waves/p121/p12168037/89464742\n",
996 | " - mimic4wdb/0.1.0/waves/p121/p12173569/88958796\n",
997 | " - mimic4wdb/0.1.0/waves/p121/p12188288/88995377\n",
998 | " - mimic4wdb/0.1.0/waves/p128/p12872596/85230771\n",
999 | " - mimic4wdb/0.1.0/waves/p129/p12933208/86643930\n",
1000 | " - mimic4wdb/0.1.0/waves/p130/p13016481/81250824\n",
1001 | " - mimic4wdb/0.1.0/waves/p132/p13240081/87706224\n",
1002 | " - mimic4wdb/0.1.0/waves/p136/p13624686/83058614\n",
1003 | " - mimic4wdb/0.1.0/waves/p137/p13791821/82803505\n",
1004 | " - mimic4wdb/0.1.0/waves/p141/p14191565/88574629\n",
1005 | " - mimic4wdb/0.1.0/waves/p142/p14285792/87867111\n",
1006 | " - mimic4wdb/0.1.0/waves/p143/p14356077/84560969\n",
1007 | " - mimic4wdb/0.1.0/waves/p143/p14363499/87562386\n",
1008 | " - mimic4wdb/0.1.0/waves/p146/p14695840/88685937\n",
1009 | " - mimic4wdb/0.1.0/waves/p149/p14931547/86120311\n",
1010 | " - mimic4wdb/0.1.0/waves/p151/p15174162/89866183\n",
1011 | " - mimic4wdb/0.1.0/waves/p153/p15312343/89068160\n",
1012 | " - mimic4wdb/0.1.0/waves/p153/p15342703/86380383\n",
1013 | " - mimic4wdb/0.1.0/waves/p155/p15552902/85078610\n",
1014 | " - mimic4wdb/0.1.0/waves/p156/p15649186/87702634\n",
1015 | " - mimic4wdb/0.1.0/waves/p158/p15857793/84686667\n",
1016 | " - mimic4wdb/0.1.0/waves/p158/p15865327/84802706\n",
1017 | " - mimic4wdb/0.1.0/waves/p158/p15896656/81811182\n",
1018 | " - mimic4wdb/0.1.0/waves/p159/p15920699/84421559\n",
1019 | " - mimic4wdb/0.1.0/waves/p160/p16034243/88221516\n",
1020 | " - mimic4wdb/0.1.0/waves/p165/p16566444/80057524\n",
1021 | " - mimic4wdb/0.1.0/waves/p166/p16644640/84209926\n",
1022 | " - mimic4wdb/0.1.0/waves/p167/p16709726/83959636\n",
1023 | " - mimic4wdb/0.1.0/waves/p167/p16715341/89989722\n",
1024 | " - mimic4wdb/0.1.0/waves/p168/p16818396/89225487\n",
1025 | " - mimic4wdb/0.1.0/waves/p170/p17032851/84391267\n",
1026 | " - mimic4wdb/0.1.0/waves/p172/p17229504/80889556\n",
1027 | " - mimic4wdb/0.1.0/waves/p173/p17301721/85250558\n",
1028 | " - mimic4wdb/0.1.0/waves/p173/p17325001/84567505\n",
1029 | " - mimic4wdb/0.1.0/waves/p174/p17490822/85814172\n",
1030 | " - mimic4wdb/0.1.0/waves/p177/p17738824/88884866\n",
1031 | " - mimic4wdb/0.1.0/waves/p177/p17744715/80497954\n",
1032 | " - mimic4wdb/0.1.0/waves/p179/p17957832/80666640\n",
1033 | " - mimic4wdb/0.1.0/waves/p180/p18080257/84939605\n",
1034 | " - mimic4wdb/0.1.0/waves/p181/p18109577/82141753\n",
1035 | " - mimic4wdb/0.1.0/waves/p183/p18324626/86874920\n",
1036 | " - mimic4wdb/0.1.0/waves/p187/p18742074/84505262\n",
1037 | " - mimic4wdb/0.1.0/waves/p188/p18824975/86288257\n",
1038 | " - mimic4wdb/0.1.0/waves/p191/p19126489/89699401\n",
1039 | " - mimic4wdb/0.1.0/waves/p193/p19313794/88537698\n",
1040 | " - mimic4wdb/0.1.0/waves/p196/p19619764/83958172\n"
1041 | ]
1042 | }
1043 | ],
1044 | "source": [
1045 | "print(f\"A total of {len(matching_recs['dir'])} out of {len(records)} records met the requirements.\")\n",
1046 | "\n",
1047 | "relevant_segments_names = \"\\n - \".join(matching_recs['seg_name'])\n",
1048 | "print(f\"\\nThe relevant segment names are:\\n - {relevant_segments_names}\")\n",
1049 | "\n",
1050 | "relevant_dirs = \"\\n - \".join(matching_recs['dir'])\n",
1051 | "print(f\"\\nThe corresponding directories are: \\n - {relevant_dirs}\")"
1052 | ]
1053 | },
1054 | {
1055 | "cell_type": "markdown",
1056 | "id": "719f20f8",
1057 | "metadata": {
1058 | "id": "719f20f8"
1059 | },
1060 | "source": [
1061 | "\n",
1062 | "
Question: Is this enough data for a study? Consider different types of studies, e.g. assessing the performance of a previously proposed algorithm to estimate BP from the PPG signal, vs. developing a deep learning approach to estimate BP from the PPG.
\n",
1063 | "
"
1064 | ]
1065 | },
1066 | {
1067 | "cell_type": "code",
1068 | "execution_count": null,
1069 | "id": "6fccda20",
1070 | "metadata": {
1071 | "id": "6fccda20"
1072 | },
1073 | "outputs": [],
1074 | "source": [
1075 | ""
1076 | ]
1077 | }
1078 | ],
1079 | "metadata": {
1080 | "kernelspec": {
1081 | "display_name": "Python 3",
1082 | "language": "python",
1083 | "name": "python3"
1084 | },
1085 | "language_info": {
1086 | "codemirror_mode": {
1087 | "name": "ipython",
1088 | "version": 3
1089 | },
1090 | "file_extension": ".py",
1091 | "mimetype": "text/x-python",
1092 | "name": "python",
1093 | "nbconvert_exporter": "python",
1094 | "pygments_lexer": "ipython3",
1095 | "version": "3.8.8"
1096 | },
1097 | "toc": {
1098 | "base_numbering": 1,
1099 | "nav_menu": {},
1100 | "number_sections": true,
1101 | "sideBar": true,
1102 | "skip_h1_title": true,
1103 | "title_cell": "Table of Contents",
1104 | "title_sidebar": "Contents",
1105 | "toc_cell": false,
1106 | "toc_position": {
1107 | "height": "calc(100% - 180px)",
1108 | "left": "10px",
1109 | "top": "150px",
1110 | "width": "306px"
1111 | },
1112 | "toc_section_display": true,
1113 | "toc_window_display": false
1114 | },
1115 | "colab": {
1116 | "name": "data-exploration.ipynb",
1117 | "provenance": []
1118 | }
1119 | },
1120 | "nbformat": 4,
1121 | "nbformat_minor": 5
1122 | }
1123 |
--------------------------------------------------------------------------------
/content/tutorial/notebooks/beat_detection_functions.py:
--------------------------------------------------------------------------------
1 | import scipy.signal as sp
2 | import numpy as np
3 |
4 | def pulse_detect(x,fs,w,alg):
5 | """
6 | Description: Pulse detection and correction from pulsatile signals
7 | Inputs: x, array with pulsatile signal [user defined units]
8 | fs, sampling rate of signal [Hz]
9 | w, window length for analysis [s]
10 | alg, string with the name of the algorithm to apply ['heartpy','d2max','upslopes','delineator']
11 | Outputs: ibis, location of cardiac cycles as detected by the selected algorithm [number of samples]
12 |
13 | Algorithms: 1: HeartPy (van Gent et al, 2019, DOI: 10.1016/j.trf.2019.09.015)
14 | 2: 2nd derivative maxima (Elgendi et al, 2013, DOI: 10.1371/journal.pone.0076585)
15 | 3: Systolic upslopes (Arguello Prada and Serna Maldonado, 2018,
16 | DOI: 10.1080/03091902.2019.1572237)
17 | 4: Delineator (Li et al, 2010, DOI: 10.1109/TBME.2005.855725)
18 | Fiducial points: 1: Systolic peak (pks)
19 | 2: Onset, as the minimum before the systolic peak (ons)
20 | 3: Onset, using the tangent intersection method (ti)
21 | 4: Diastolic peak (dpk)
22 | 5: Maximum slope (m1d)
23 | 6: a point from second derivative PPG (a2d)
24 | 7: b point from second derivative PPG (b2d)
25 | 8: c point from second derivative PPG (c2d)
26 | 9: d point from second derivative PPG (d2d)
27 | 10: e point from second derivative PPG (e2d)
28 | 11: p1 from the third derivative PPG (p1)
29 | 12: p2 from the third derivative PPG (p2)
30 |
31 | Libraries: NumPy (as np), SciPy (Signal, as sp), Matplotlib (PyPlot, as plt)
32 |
33 | Version: 1.0 - June 2022
34 |
35 | Developed by: Elisa Mejía-Mejía
36 | City, University of London
37 |
38 | """
39 |
40 | # Check selected algorithm
41 | pos_alg = ['heartpy','d2max','upslopes','delineator']
42 | if not(alg in pos_alg):
43 | print('Unknown algorithm determined. Using D2max as default')
44 | alg = 'd2max'
45 |
46 | # Pre-processing of signal
47 | x_d = sp.detrend(x)
48 | sos = sp.butter(10, [0.5, 10], btype = 'bp', analog = False, output = 'sos', fs = fs)
49 | x_f = sp.sosfiltfilt(sos, x_d)
50 |
51 | # Peak detection in windows of length w
52 | n_int = np.floor(len(x_f)/(w*fs))
53 | for i in range(int(n_int)):
54 | start = i*fs*w
55 | stop = (i + 1)*fs*w - 1
56 | # print('Start: ' + str(start) + ', stop: ' + str(stop) + ', fs: ' + str(fs))
57 | aux = x_f[range(start,stop)]
58 | if alg == 'heartpy':
59 | locs = heartpy(aux,fs,40,180,5)
60 | elif alg == 'd2max':
61 | locs = d2max(aux,fs)
62 | elif alg == 'upslopes':
63 | locs = upslopes(aux)
64 | elif alg == 'delineator':
65 | locs = delineator(aux,fs)
66 | locs = locs + start
67 | if i == 0:
68 | ibis = locs
69 | else:
70 | ibis = np.append(ibis,locs)
71 | if n_int*fs*w != len(x_f):
72 | start = stop + 1
73 | stop = len(x_f)
74 | aux = x_f[range(start,stop)]
75 | if len(aux) > 20:
76 | if alg == 'heartpy':
77 | locs = heartpy(aux,fs,40,180,5)
78 | elif alg == 'd2max':
79 | locs = d2max(aux,fs)
80 | elif alg == 'upslopes':
81 | locs = upslopes(aux)
82 | elif alg == 'delineator':
83 | locs = delineator(aux,fs)
84 | locs = locs + start
85 | ibis = np.append(ibis,locs)
86 | ind, = np.where(ibis <= len(x_f))
87 | ibis = ibis[ind]
88 |
89 | ibis = peak_correction(x,ibis,fs,20,5,[0.5, 1.5])
90 |
91 | #fig = plt.figure()
92 | #plt.plot(x)
93 | #plt.plot(x_d)
94 | #plt.plot(x_f)
95 | #plt.scatter(ibis,x_f[ibis],marker = 'o',color = 'red')
96 | #plt.scatter(ibis,x[ibis],marker = 'o',color = 'red')
97 |
98 | return ibis
99 |
100 | def peak_correction(x,locs,fs,t,stride,th_len):
101 | """
102 | Correction of peaks detected from pulsatile signals
103 |
104 | Inputs: x, pulsatile signal [user defined units]
105 | locs, location of the detected interbeat intervals [number of samples]
106 | fs, sampling rate [Hz]
107 | t, duration of intervals for the correction [s]
108 | stride, stride between consecutive intervals for the correction [s]
109 | th_len, array with the percentage of lower and higher thresholds for comparing the duration of IBIs
110 | [proportions]
111 | Outputs: ibis, array with the corrected points related to the start of the inter-beat intervals [number of samples]
112 |
113 | Developed by: Elisa Mejía Mejía
114 | City, University of London
115 | Version: 1.0 - June, 2022
116 |
117 | """
118 |
119 | #fig = plt.figure()
120 | #plt.plot(x)
121 | #plt.scatter(locs,x[locs],marker = 'o',color = 'red', label = 'Original')
122 | #plt.title('Peak correction')
123 |
124 | # Correction of long and short IBIs
125 | len_window = np.round(t*fs)
126 | #print('Window length: ' + str(len_window))
127 | first_i = 0
128 | second_i = len_window - 1
129 | while second_i < len(x):
130 | ind1, = np.where(locs >= first_i)
131 | ind2, = np.where(locs <= second_i)
132 | ind = np.intersect1d(ind1, ind2)
133 |
134 | win = locs[ind]
135 | dif = np.diff(win)
136 | #print('Indices: ' + str(ind) + ', locs: ' + str(locs[ind]) + ', dif: ' + str(dif))
137 |
138 | th_dif = np.zeros(2)
139 | th_dif[0] = th_len[0]*np.median(dif)
140 | th_dif[1] = th_len[1]*np.median(dif)
141 |
142 | th_amp = np.zeros(2)
143 | th_amp[0] = 0.75*np.median(x[win])
144 | th_amp[1] = 1.25*np.median(x[win])
145 | #print('Length thresholds: ' + str(th_dif) + ', amplitude thresholds: ' + str(th_amp))
146 |
147 | j = 0
148 | while j < len(dif):
149 | if dif[j] <= th_dif[0]:
150 | if j == 0:
151 | opt = np.append(win[j], win[j + 1])
152 | else:
153 | opt = np.append(win[j], win[j + 1]) - win[j - 1]
154 | print('Optional: ' + str(opt))
155 | dif_abs = np.abs(opt - np.median(dif))
156 | min_val = np.min(dif_abs)
157 | ind_min, = np.where(dif_abs == min_val)
158 | print('Minimum: ' + str(min_val) + ', index: ' + str(ind_min))
159 | if ind_min == 0:
160 | print('Original window: ' + str(win), end = '')
161 | win = np.delete(win, win[j + 1])
162 | print(', modified window: ' + str(win))
163 | else:
164 | print('Original window: ' + str(win), end = '')
165 | win = np.delete(win, win[j])
166 | print(', modified window: ' + str(win))
167 | dif = np.diff(win)
168 | elif dif[j] >= th_dif[1]:
169 | aux_x = x[win[j]:win[j + 1]]
170 | locs_pks, _ = sp.find_peaks(aux_x)
171 | #fig = plt.figure()
172 | #plt.plot(aux_x)
173 | #plt.scatter(locs_pks,aux_x[locs_pks],marker = 'o',color = 'red')
174 |
175 | locs_pks = locs_pks + win[j]
176 | ind1, = np.where(x[locs_pks] >= th_amp[0])
177 | ind2, = np.where(x[locs_pks] <= th_amp[1])
178 | ind = np.intersect1d(ind1, ind2)
179 | locs_pks = locs_pks[ind]
180 | #print('Locations: ' + str(locs_pks))
181 |
182 | if len(locs_pks) != 0:
183 | opt = locs_pks - win[j]
184 |
185 | dif_abs = np.abs(opt - np.median(dif))
186 | min_val = np.min(dif_abs)
187 | ind_min, = np.where(dif_abs == min_val)
188 |
189 | win = np.append(win, locs_pks[ind_min])
190 | win = np.sort(win)
191 | dif = np.diff(win)
192 | j = j + 1
193 | else:
194 | opt = np.round(win[j] + np.median(dif))
195 | if opt < win[j + 1]:
196 | win = np.append(win, locs_pks[ind_min])
197 | win = np.sort(win)
198 | dif = np.diff(win)
199 | j = j + 1
200 | else:
201 | j = j + 1
202 | else:
203 | j = j + 1
204 |
205 | locs = np.append(win, locs)
206 | locs = np.sort(locs)
207 |
208 | first_i = first_i + stride*fs - 1
209 | second_i = second_i + stride*fs - 1
210 |
211 | dif = np.diff(locs)
212 | dif = np.append(0, dif)
213 | ind, = np.where(dif != 0)
214 | locs = locs[ind]
215 |
216 | #plt.scatter(locs,x[locs],marker = 'o',color = 'green', label = 'After length correction')
217 |
218 | # Correction of points that are not peaks
219 | i = 0
220 | pre_loc = 0
221 | while i < len(locs):
222 | if locs[i] == 0:
223 | locs = np.delete(locs, locs[i])
224 | elif locs[i] == len(x):
225 | locs = np.delete(locs, locs[i])
226 | else:
227 | #print('Previous: ' + str(x[locs[i] - 1]) + ', actual: ' + str(x[locs[i]]) + ', next: ' + str(x[locs[i] + 1]))
228 | cond = (x[locs[i]] >= x[locs[i] - 1]) and (x[locs[i]] >= x[locs[i] + 1])
229 | #print('Condition: ' + str(cond))
230 | if cond:
231 | i = i + 1
232 | else:
233 | if locs[i] == pre_loc:
234 | i = i + 1
235 | else:
236 | if i == 0:
237 | aux = x[0:locs[i + 1] - 1]
238 | aux_loc = locs[i] - 1
239 | aux_start = 0
240 | elif i == len(locs) - 1:
241 | aux = x[locs[i - 1]:len(x) - 1]
242 | aux_loc = locs[i] - locs[i - 1]
243 | aux_start = locs[i - 1]
244 | else:
245 | aux = x[locs[i - 1]:locs[i + 1]]
246 | aux_loc = locs[i] - locs[i - 1]
247 | aux_start = locs[i - 1]
248 | #print('i ' + str(i) + ' out of ' + str(len(locs)) + ', aux length: ' + str(len(aux)) +
249 | # ', location: ' + str(aux_loc))
250 | #print('Locs i - 1: ' + str(locs[i - 1]) + ', locs i: ' + str(locs[i]) + ', locs i + 1: ' + str(locs[i + 1]))
251 |
252 | pre = find_closest_peak(aux, aux_loc, 'backward')
253 | pos = find_closest_peak(aux, aux_loc, 'forward')
254 | #print('Previous: ' + str(pre) + ', next: ' + str(pos) + ', actual: ' + str(aux_loc))
255 |
256 | ibi_pre = np.append(pre - 1, len(aux) - pre)
257 | ibi_pos = np.append(pos - 1, len(aux) - pos)
258 | ibi_act = np.append(aux_loc - 1, len(aux) - aux_loc)
259 | #print('Previous IBIs: ' + str(ibi_pre) + ', next IBIs: ' + str(ibi_pos) +
260 | # ', actual IBIs: ' + str(ibi_act))
261 |
262 | dif_pre = np.abs(ibi_pre - np.mean(np.diff(locs)))
263 | dif_pos = np.abs(ibi_pos - np.mean(np.diff(locs)))
264 | dif_act = np.abs(ibi_act - np.mean(np.diff(locs)))
265 | #print('Previous DIF: ' + str(dif_pre) + ', next DIF: ' + str(dif_pos) +
266 | # ', actual DIF: ' + str(dif_act))
267 |
268 | avgs = [np.mean(dif_pre), np.mean(dif_pos), np.mean(dif_act)]
269 | min_avg = np.min(avgs)
270 | ind, = np.where(min_avg == avgs)
271 | #print('Averages: ' + str(avgs) + ', min index: ' + str(ind))
272 | if len(ind) != 0:
273 | ind = ind[0]
274 |
275 | if ind == 0:
276 | locs[i] = pre + aux_start - 1
277 | elif ind == 1:
278 | locs[i] = pos + aux_start - 1
279 | elif ind == 2:
280 | locs[i] = aux_loc + aux_start - 1
281 | i = i + 1
282 |
283 | #plt.scatter(locs,x[locs],marker = 'o',color = 'yellow', label = 'After not-peak correction')
284 |
285 | # Correction of peaks according to amplitude
286 | len_window = np.round(t*fs)
287 | #print('Window length: ' + str(len_window))
288 | keep = np.empty(0)
289 | first_i = 0
290 | second_i = len_window - 1
291 | while second_i < len(x):
292 | ind1, = np.where(locs >= first_i)
293 | ind2, = np.where(locs <= second_i)
294 | ind = np.intersect1d(ind1, ind2)
295 | win = locs[ind]
296 | if np.median(x[win]) > 0:
297 | th_amp_low = 0.5*np.median(x[win])
298 | th_amp_high = 3*np.median(x[win])
299 | else:
300 | th_amp_low = -3*np.median(x[win])
301 | th_amp_high = 1.5*np.median(x[win])
302 | ind1, = np.where(x[win] >= th_amp_low)
303 | ind2, = np.where(x[win] <= th_amp_high)
304 | aux_keep = np.intersect1d(ind1,ind2)
305 | keep = np.append(keep, aux_keep)
306 |
307 | first_i = second_i + 1
308 | second_i = second_i + stride*fs - 1
309 |
310 | if len(keep) != 0:
311 | keep = np.unique(keep)
312 | locs = locs[keep.astype(int)]
313 |
314 | #plt.scatter(locs,x[locs],marker = 'o',color = 'purple', label = 'After amplitude correction')
315 | #plt.legend()
316 |
317 | return locs
318 |
319 | def find_closest_peak(x, loc, dir_search):
320 | """
321 | Finds the closest peak to the initial location in x
322 |
323 | Inputs: x, signal of interest [user defined units]
324 | loc, initial location [number of samples]
325 | dir_search, direction of search ['backward','forward']
326 | Outputs: pos, location of the first peak detected in specified direction [number of samples]
327 |
328 | Developed by: Elisa Mejía Mejía
329 | City, University of London
330 | Version: 1.0 - June, 2022
331 |
332 | """
333 |
334 | pos = -1
335 | if dir_search == 'backward':
336 | i = loc - 2
337 | while i > 0:
338 | if (x[i] > x[i - 1]) and (x[i] > x[i + 1]):
339 | pos = i
340 | i = 0
341 | else:
342 | i = i - 1
343 | if pos == -1:
344 | pos = loc
345 | elif dir_search == 'forward':
346 | i = loc + 1
347 | while i < len(x) - 1:
348 | if (x[i] > x[i - 1]) and (x[i] > x[i + 1]):
349 | pos = i
350 | i = len(x)
351 | else:
352 | i = i + 1
353 | if pos == -1:
354 | pos = loc
355 |
356 | return pos
357 |
358 | def seek_local(x, start, end):
359 | val_min = x[start]
360 | val_max = x[start]
361 |
362 | ind_min = start
363 | ind_max = start
364 |
365 | for j in range(start, end):
366 | if x[j] > val_max:
367 | val_max = x[j]
368 | ind_max = j
369 | elif x[j] < val_min:
370 | val_min = x[j]
371 | ind_min = j
372 |
373 | return val_min, ind_min, val_max, ind_max
374 |
375 | def heartpy(x, fs, min_ihr, max_ihr, w):
376 | """
377 | Detects inter-beat intervals using HeartPy
378 | Citation: van Gent P, Farah H, van Nes N, van Arem B (2019) Heartpy: A novel heart rate algorithm
379 | for the analysis of noisy signals. Transp Res Part F, vol. 66, pp. 368-378. DOI: 10.1016/j.trf.2019.09.015
380 |
381 | Inputs: x, pulsatile signal [user defined units]
382 | fs, sampling rate [Hz]
383 | min_ihr, minimum value of instantaneous heart rate to be accepted [bpm]
384 | max_ihr, maximum value of instantaneous heart rate to be accepted [bpm]
385 | w, length of segments for correction of peaks [s]
386 | Outputs: ibis, position of the starting points of inter-beat intervals [number of samples]
387 |
388 | Developed by: Elisa Mejía Mejía
389 | City, University of London
390 | Version: 1.0 - June, 2022
391 |
392 | """
393 |
394 | # Identification of peaks
395 | is_roi = 0
396 | n_rois = 0
397 | pos_pks = np.empty(0).astype(int)
398 | locs = np.empty(0).astype(int)
399 |
400 | len_ma = int(np.round(0.75*fs))
401 | #print(len_ma)
402 | sig = np.append(x[0]*np.ones(len_ma), x)
403 | sig = np.append(sig, x[-1]*np.ones(len_ma))
404 |
405 | i = len_ma
406 | while i < len(sig) - len_ma:
407 | ma = np.mean(sig[i - len_ma:i + len_ma - 1])
408 | #print(len(sig[i - len_ma:i + len_ma - 1]),ma)
409 |
410 | # If it is the beginning of a new ROI:
411 | if is_roi == 0 and sig[i] >= ma:
412 | is_roi = 1
413 | n_rois = n_rois + 1
414 | #print('New ROI ---' + str(n_rois) + ' @ ' + str(i))
415 | # If it is a peak:
416 | if sig[i] >= sig[i - 1] and sig[i] >= sig[i + 1]:
417 | pos_pks = np.append(pos_pks, int(i))
418 | #print('Possible peaks: ' + str(pos_pks))
419 |
420 | # If it is part of a ROI which is not over:
421 | elif is_roi == 1 and sig[i] > ma:
422 | #print('Actual ROI ---' + str(n_rois) + ' @ ' + str(i))
423 | # If it is a peak:
424 | if sig[i] >= sig[i - 1] and sig[i] >= sig[i + 1]:
425 | pos_pks = np.append(pos_pks, int(i))
426 | #print('Possible peaks: ' + str(pos_pks))
427 |
428 | # If the ROI is over or the end of the signal has been reached:
429 | elif is_roi == 1 and (sig[i] < ma or i == (len(sig) - len_ma)):
430 | #print('End of ROI ---' + str(n_rois) + ' @ ' + str(i) + '. Pos pks: ' + str(pos_pks))
431 | is_roi = 0 # Lowers flag
432 |
433 | # If it is the end of the first ROI:
434 | if n_rois == 1:
435 | # If at least one peak has been found:
436 | if len(pos_pks) != 0:
437 | # Determines the location of the maximum peak:
438 | max_pk = np.max(sig[pos_pks])
439 | ind, = np.where(max_pk == np.max(sig[pos_pks]))
440 | #print('First ROI: (1) Max Peak: ' + str(max_pk) + ', amplitudes: ' + str(sig[pos_pks]) +
441 | # ', index: ' + str(int(ind)), ', pk_ind: ' + str(pos_pks[ind]))
442 | # The maximum peak is added to the list:
443 | locs = np.append(locs, pos_pks[ind])
444 | #print('Locations: ' + str(locs))
445 | # If no peak was found:
446 | else:
447 | # Counter for ROIs is reset to previous value:
448 | n_rois = n_rois - 1
449 |
450 | # If it is the end of the second ROI:
451 | elif n_rois == 2:
452 | # If at least one peak has been found:
453 | if len(pos_pks) != 0:
454 | # Measures instantantaneous HR of found peaks with respect to the previous peak:
455 | ihr = 60/((pos_pks - locs[-1])/fs)
456 | good_ihr, = np.where(ihr <= max_ihr and ihr >= min_ihr)
457 | #print('Second ROI IHR check: (1) IHR: ' + str(ihr) + ', valid peaks: ' + str(good_ihr) +
458 | # ', pos_pks before: ' + str(pos_pks) + ', pos_pks after: ' + str(pos_pks[good_ihr]))
459 | pos_pks = pos_pks[good_ihr].astype(int)
460 |
461 | # If at least one peak is between HR limits:
462 | if len(pos_pks) != 0:
463 | # Determines the location of the maximum peak:
464 | max_pk = np.max(sig[pos_pks])
465 | ind, = np.where(max_pk == np.max(sig[pos_pks]))
466 | #print('Second ROI: (1) Max Peak: ' + str(max_pk) + ', amplitudes: ' + str(sig[pos_pks]) +
467 | # ', index: ' + str(int(ind)), ', pk_ind: ' + str(pos_pks[ind]))
468 | # The maximum peak is added to the list:
469 | locs = np.append(locs, pos_pks[ind])
470 | #print('Locations: ' + str(locs))
471 | # If no peak was found:
472 | else:
473 | # Counter for ROIs is reset to previous value:
474 | n_rois = n_rois - 1
475 |
476 | # If it is the end of the any further ROI:
477 | else:
478 | # If at least one peak has been found:
479 | if len(pos_pks) != 0:
480 | # Measures instantantaneous HR of found peaks with respect to the previous peak:
481 | ihr = 60/((pos_pks - locs[-1])/fs)
482 | good_ihr, = np.where(ihr <= max_ihr and ihr >= min_ihr)
483 | #print('Third ROI IHR check: (1) IHR: ' + str(ihr) + ', valid peaks: ' + str(good_ihr) +
484 | # ', pos_pks before: ' + str(pos_pks) + ', pos_pks after: ' + str(pos_pks[good_ihr]))
485 | pos_pks = pos_pks[good_ihr].astype(int)
486 |
487 | # If at least one peak is between HR limits:
488 | if len(pos_pks) != 0:
489 | # Calculates SDNN with the possible peaks on the ROI:
490 | sdnn = np.zeros(len(pos_pks))
491 | for j in range(len(pos_pks)):
492 | sdnn[j] = np.std(np.append(locs/fs, pos_pks[j]/fs))
493 | # Determines the new peak as that one with the lowest SDNN:
494 | min_pk = np.min(sdnn)
495 | ind, = np.where(min_pk == np.min(sdnn))
496 | #print('Third ROI: (1) Min SDNN Peak: ' + str(min_pk) + ', amplitudes: ' + str(sig[pos_pks]) +
497 | # ', index: ' + str(int(ind)), ', pk_ind: ' + str(pos_pks[ind]))
498 | locs = np.append(locs, pos_pks[ind])
499 | #print('Locations: ' + str(locs))
500 | # If no peak was found:
501 | else:
502 | # Counter for ROIs is reset to previous value:
503 | n_rois = n_rois - 1
504 |
505 | # Resets possible peaks for next ROI:
506 | pos_pks = np.empty(0)
507 |
508 | i = i + 1;
509 |
510 | locs = locs - len_ma
511 |
512 | # Correction of peaks
513 | c_locs = np.empty(0)
514 | n_int = np.floor(len(x)/(w*fs))
515 | for i in range(int(n_int)):
516 | ind1, = np.where(locs >= i*w*fs)
517 | #print('Locs >= ' + str((i)*w*fs) + ': ' + str(locs[ind1]))
518 | ind2, = np.where(locs < (i + 1)*w*fs)
519 | #print('Locs < ' + str((i + 1)*w*fs) + ': ' + str(locs[ind2]))
520 | ind = np.intersect1d(ind1, ind2)
521 | #print('Larger and lower than locs: ' + str(locs[ind]))
522 | int_locs = locs[ind]
523 |
524 | if i == 0:
525 | aux_ibis = np.diff(int_locs)
526 | else:
527 | ind, = np.where(locs >= i*w*fs)
528 | last = locs[ind[0] - 1]
529 | aux_ibis = np.diff(np.append(last, int_locs))
530 | avg_ibis = np.mean(aux_ibis)
531 | th = np.append((avg_ibis - 0.3*avg_ibis), (avg_ibis + 0.3*avg_ibis))
532 | ind1, = np.where(aux_ibis > th[0])
533 | #print('Ind1: ' + str(ind1))
534 | ind2, = np.where(aux_ibis < th[1])
535 | #print('Ind2: ' + str(ind2))
536 | ind = np.intersect1d(ind1, ind2)
537 | #print('Ind: ' + str(ind))
538 |
539 | c_locs = np.append(c_locs, int_locs[ind]).astype(int)
540 | print(c_locs)
541 |
542 | #fig = plt.figure()
543 | #plt.plot(x)
544 | #plt.plot(sig)
545 | #plt.scatter(locs,x[locs],marker = 'o',color = 'red')
546 | #if len(c_locs) != 0:
547 | #plt.scatter(c_locs,x[c_locs],marker = 'o',color = 'blue')
548 |
549 | if len(c_locs) != 0:
550 | ibis = c_locs
551 | else:
552 | ibis = locs
553 |
554 | return ibis
555 |
556 | def d2max(x, fs):
557 | """
558 | Detects inter-beat intervals using D2Max
559 | Citation: Elgendi M, Norton I, Brearley M, Abbott D, Schuurmans D (2013) Systolic Peak Detection in Acceleration
560 | Photoplethysmograms Measured from Emergency Responders in Tropical Conditions. PLoS ONE, vol. 8, no. 10,
561 | pp. e76585. DOI: 10.1371/journal.pone.0076585
562 |
563 | Inputs: x, pulsatile signal [user defined units]
564 | fs, sampling rate [Hz]
565 | Outputs: ibis, position of the starting points of inter-beat intervals [number of samples]
566 |
567 | Developed by: Elisa Mejía Mejía
568 | City, University of London
569 | Version: 1.0 - June, 2022
570 |
571 | """
572 |
573 | # Bandpass filter
574 | if len(x) < 4098:
575 | z_fill = np.zeros(4098 - len(x) + 1)
576 | x_z = np.append(x, z_fill)
577 | sos = sp.butter(10, [0.5, 8], btype = 'bp', analog = False, output = 'sos', fs = fs)
578 | x_f = sp.sosfiltfilt(sos, x_z)
579 |
580 | # Signal clipping
581 | ind, = np.where(x_f < 0)
582 | x_c = x_f
583 | x_c[ind] = 0
584 |
585 | # Signal squaring
586 | x_s = x_c**2
587 |
588 | #plt.figure()
589 | #plt.plot(x)
590 | #plt.plot(x_z)
591 | #plt.plot(x_f)
592 | #plt.plot(x_c)
593 | #plt.plot(x_s)
594 |
595 | # Blocks of interest
596 | w1 = (111e-3)*fs
597 | w1 = int(2*np.floor(w1/2) + 1)
598 | b = (1/w1)*np.ones(w1)
599 | ma_pk = sp.filtfilt(b,1,x_s)
600 |
601 | w2 = (667e-3)*fs
602 | w2 = int(2*np.floor(w2/2) + 1)
603 | b = (1/w2)*np.ones(w1)
604 | ma_bpm = sp.filtfilt(b,1,x_s)
605 |
606 | #plt.figure()
607 | #plt.plot(x_s/np.max(x_s))
608 | #plt.plot(ma_pk/np.max(ma_pk))
609 | #plt.plot(ma_bpm/np.max(ma_bpm))
610 |
611 | # Thresholding
612 | alpha = 0.02*np.mean(ma_pk)
613 | th_1 = ma_bpm + alpha
614 | th_2 = w1
615 | boi = (ma_pk > th_1).astype(int)
616 |
617 | blocks_init, = np.where(np.diff(boi) > 0)
618 | blocks_init = blocks_init + 1
619 | blocks_end, = np.where(np.diff(boi) < 0)
620 | blocks_end = blocks_end + 1
621 | if blocks_init[0] > blocks_end[0]:
622 | blocks_init = np.append(1, blocks_init)
623 | if blocks_init[-1] > blocks_end[-1]:
624 | blocks_end = np.append(blocks_end, len(x_s))
625 | #print('Initial locs BOI: ' + str(blocks_init))
626 | #print('Final locs BOI: ' + str(blocks_end))
627 |
628 | #plt.figure()
629 | #plt.plot(x_s[range(len(x))]/np.max(x_s))
630 | #plt.plot(boi[range(len(x))])
631 |
632 | # Search for peaks inside BOIs
633 | len_blks = np.zeros(len(blocks_init))
634 | ibis = np.zeros(len(blocks_init))
635 | for i in range(len(blocks_init)):
636 | ind, = np.where(blocks_end > blocks_init[i])
637 | ind = ind[0]
638 | len_blks[i] = blocks_end[ind] - blocks_init[i]
639 | if len_blks[i] >= th_2:
640 | aux = x[blocks_init[i]:blocks_end[ind]]
641 | if len(aux) != 0:
642 | max_val = np.max(aux)
643 | max_ind, = np.where(max_val == aux)
644 | ibis[i] = max_ind + blocks_init[i] - 1
645 |
646 | ind, = np.where(len_blks < th_2)
647 | if len(ind) != 0:
648 | for i in range(len(ind)):
649 | boi[blocks_init[i]:blocks_end[i]] = 0
650 | ind, = np.where(ibis == 0)
651 | ibis = (np.delete(ibis, ind)).astype(int)
652 |
653 | #plt.plot(boi[range(len(x))])
654 |
655 | #plt.figure()
656 | #plt.plot(x)
657 | #plt.scatter(ibis, x[ibis], marker = 'o',color = 'red')
658 |
659 | return ibis
660 |
661 | def upslopes(x):
662 | """
663 | Detects inter-beat intervals using Upslopes
664 | Citation: Arguello Prada EJ, Serna Maldonado RD (2018) A novel and low-complexity peak detection algorithm for
665 | heart rate estimation from low-amplitude photoplethysmographic (PPG) signals. J Med Eng Technol, vol. 42,
666 | no. 8, pp. 569-577. DOI: 10.1080/03091902.2019.1572237
667 |
668 | Inputs: x, pulsatile signal [user defined units]
669 | Outputs: ibis, position of the starting points of inter-beat intervals [number of samples]
670 |
671 | Developed by: Elisa Mejía Mejía
672 | City, University of London
673 | Version: 1.0 - June, 2022
674 |
675 | """
676 |
677 | # Peak detection
678 | th = 6
679 | pks = np.empty(0)
680 | pos_pk = np.empty(0)
681 | pos_pk_b = 0
682 | n_pos_pk = 0
683 | n_up = 0
684 |
685 | for i in range(1, len(x)):
686 | if x[i] > x[i - 1]:
687 | n_up = n_up + 1
688 | else:
689 | if n_up > th:
690 | pos_pk = np.append(pos_pk, i)
691 | pos_pk_b = 1
692 | n_pos_pk = n_pos_pk + 1
693 | n_up_pre = n_up
694 | else:
695 | pos_pk = pos_pk.astype(int)
696 | #print('Possible peaks: ' + str(pos_pk) + ', number of peaks: ' + str(n_pos_pk))
697 | if pos_pk_b == 1:
698 | if x[i - 1] > x[pos_pk[n_pos_pk - 1]]:
699 | pos_pk[n_pos_pk - 1] = i - 1
700 | else:
701 | pks = np.append(pks, pos_pk[n_pos_pk - 1])
702 | th = 0.6*n_up_pre
703 | pos_pk_b = 0
704 | n_up = 0
705 | ibis = pks.astype(int)
706 | #print(ibis)
707 |
708 | #plt.figure()
709 | #plt.plot(x)
710 | #plt.scatter(ibis, x[ibis], marker = 'o',color = 'red')
711 |
712 | return ibis
713 |
714 | def delineator(x, fs):
715 | """
716 | Detects inter-beat intervals using Delineator
717 | Citation: Li BN, Dong MC, Vai MI (2010) On an automatic delineator for arterial blood pressure waveforms. Biomed
718 | Signal Process Control, vol. 5, no. 1, pp. 76-81. DOI: 10.1016/j.bspc.2009.06.002
719 |
720 | Inputs: x, pulsatile signal [user defined units]
721 | fs, sampling rate [Hz]
722 | Outputs: ibis, position of the starting points of inter-beat intervals [number of samples]
723 |
724 | Developed by: Elisa Mejía Mejía
725 | City, University of London
726 | Version: 1.0 - June, 2022
727 |
728 | """
729 |
730 | # Lowpass filter
731 | od = 3
732 | sos = sp.butter(od, 25, btype = 'low', analog = False, output = 'sos', fs = fs)
733 | x_f = sp.sosfiltfilt(sos, x)
734 | x_m = 1000*x_f
735 |
736 | #plt.figure()
737 | #plt.plot(x)
738 | #plt.plot(x_f)
739 | #plt.plot(x_m)
740 |
741 | # Moving average
742 | n = 5
743 | b = (1/n)*np.ones(n)
744 | x_ma = sp.filtfilt(b,1,x_m)
745 |
746 | # Compute differentials
747 | dif = np.diff(x_ma)
748 | dif = 100*np.append(dif[0], dif)
749 | dif_ma = sp.filtfilt(b,1,dif)
750 |
751 | #plt.figure()
752 | #plt.plot(x_ma)
753 | #plt.plot(dif_ma)
754 |
755 | # Average thresholds in original signal
756 | x_len = len(x)
757 | if x_len > 12*fs:
758 | n = 10
759 | elif x_len > 7*fs:
760 | n = 5
761 | elif x_len > 4*fs:
762 | n = 2
763 | else:
764 | n = 1
765 | #print(n)
766 |
767 | max_min = np.empty(0)
768 | if n > 1:
769 | #plt.figure()
770 | #plt.plot(x_ma)
771 | n_int = np.floor(x_len/(n + 2))
772 | #print('Length of intervals: ' + str(n_int))
773 | for j in range(n):
774 | # Searches for max and min in 1 s intervals
775 | amp_min, ind_min, amp_max, ind_max = seek_local(x_ma, int(j*n_int), int(j*n_int + fs))
776 | #plt.scatter(ind_min, amp_min, marker = 'o', color = 'red')
777 | #plt.scatter(ind_max, amp_max, marker = 'o', color = 'green')
778 | max_min = np.append(max_min, (amp_max - amp_min))
779 | max_min_avg = np.mean(max_min)
780 | #print('Local max and min: ' + str(max_min) + ', average amplitude: ' + str(max_min_avg))
781 | else:
782 | amp_min, ind_min , amp_max, ind_max = seek_local(x_ma, int(close_win), int(x_len))
783 | #plt.figure()
784 | #plt.plot(x_ma)
785 | #plt.scatter(ind_min, amp_min, marker = 'o', color = 'red')
786 | #plt.scatter(ind_max, amp_max, marker = 'o', color = 'green')
787 | max_min_avg = amp_max - amp_min
788 | #print('Local max and min: ' + str(max_min) + ', average amplitude: ' + str(max_min_avg))
789 |
790 | max_min_lt = 0.4*max_min_avg
791 |
792 | # Seek pulse beats by min-max method
793 | step_win = 2*fs # Window length to look for peaks/onsets
794 | close_win = np.floor(0.1*fs)
795 | # Value of what is considered too close
796 |
797 | pks = np.empty(0) # Location of peaks
798 | ons = np.empty(0) # Location of onsets
799 | dic = np.empty(0) # Location of dicrotic notches
800 |
801 | pk_index = -1 # Number of peaks found
802 | on_index = -1 # Number of onsets found
803 | dn_index = -1 # Number of dicrotic notches found
804 |
805 | i = int(close_win) # Initializes counter
806 | while i < x_len: # Iterates through the signal
807 | #print('i: ' + str(i))
808 | amp_min = x_ma[i] # Gets the initial value for the minimum amplitude
809 | amp_max = x_ma[i] # Gets the initial value for the maximum amplitude
810 |
811 | ind = i # Initializes the temporal location of the index
812 | aux_pks = i # Initializes the temporal location of the peak
813 | aux_ons = i # Initializes the temporal location of the onset
814 |
815 | # Iterates while ind is lower than the length of the signal
816 | while ind < x_len - 1:
817 | #print('Ind: ' + str(ind))
818 | # Verifies if no peak has been found in 2 seconds
819 | if (ind - i) > step_win:
820 | #print('Peak not found in 2 s')
821 | ind = i # Refreshes the temporal location of the index
822 | max_min_avg = 0.6*max_min_avg # Refreshes the threshold for the amplitude
823 | # Verifies if the threshold is lower than the lower limit
824 | if max_min_avg <= max_min_lt:
825 | max_min_avg = 2.5*max_min_lt # Refreshes the threshold
826 | break
827 |
828 | # Verifies if the location is a candidate peak
829 | if (dif_ma[ind - 1]*dif_ma[ind + 1]) <= 0:
830 | #print('There is a candidate peak')
831 | # Determines initial and end points of a window to search for local peaks and onsets
832 | if (ind + 5) < x_len:
833 | i_stop = ind + 5
834 | else:
835 | i_stop = x_len - 1
836 | if (ind - 5) >= 0:
837 | i_start = ind - 5
838 | else:
839 | i_start = 0
840 |
841 | # Checks for artifacts of saturated or signal loss
842 | if (i_stop - ind) >= 5:
843 | for j in range(ind, i_stop):
844 | if dif_ma[j] != 0:
845 | break
846 | if j == i_stop:
847 | #print('Artifact')
848 | break
849 |
850 | # Candidate onset
851 | #print('Looking for candidate onsets...')
852 | #plt.figure()
853 | #plt.plot(x_ma)
854 | if dif_ma[i_start] < 0:
855 | if dif_ma[i_stop] > 0:
856 | aux_min, ind_min, _, _ = seek_local(x_ma, int(i_start), int(i_stop))
857 | #plt.scatter(ind_min, aux_min, marker = 'o', color = 'red')
858 | if np.abs(ind_min - ind) <= 2:
859 | amp_min = aux_min
860 | aux_ons = ind_min
861 | #print('Candidate onset: ' + str([ind_min, amp_min]))
862 | # Candidate peak
863 | #print('Looking for candidate peaks...')
864 | if dif_ma[i_start] > 0:
865 | if dif_ma[i_stop] < 0:
866 | _, _, aux_max, ind_max = seek_local(x_ma, int(i_start), int(i_stop))
867 | #plt.scatter(ind_max, aux_max, marker = 'o', color = 'green')
868 | if np.abs(ind_max - ind) <= 2:
869 | amp_max = aux_max
870 | aux_pks = ind_max
871 | #print('Candidate peak: ' + str([ind_max, amp_max]))
872 | # Verifies if the amplitude of the pulse is larger than 0.4 times the mean value:
873 | #print('Pulse amplitude: ' + str(amp_max - amp_min) + ', thresholds: ' +
874 | # str([0.4*max_min_avg, 2*max_min_avg]))
875 | if (amp_max - amp_min) > 0.4*max_min_avg:
876 | #print('Expected amplitude of pulse')
877 | # Verifies if the amplitude of the pulse is lower than 2 times the mean value:
878 | if (amp_max - amp_min) < 2*max_min_avg:
879 | #print('Expected duration of pulse')
880 | if aux_pks > aux_ons:
881 | #print('Refining onsets...')
882 | # Refine onsets:
883 | aux_min = x_ma[aux_ons]
884 | temp_ons = aux_ons
885 | for j in range(aux_pks, aux_ons + 1, -1):
886 | if x_ma[j] < aux_min:
887 | aux_min = x_ma[j]
888 | temp_ons = j
889 | amp_min = aux_min
890 | aux_ons = temp_ons
891 |
892 | # If there is at least one peak found before:
893 | #print('Number of previous peaks: ' + str(pk_index + 1))
894 | if pk_index >= 0:
895 | #print('There were previous peaks')
896 | #print('Duration of ons to peak interval: ' + str(aux_ons - pks[pk_index]) +
897 | # ', threshold: ' + str([3*close_win, step_win]))
898 | # If the duration of the pulse is too short:
899 | if (aux_ons - pks[pk_index]) < 3*close_win:
900 | #print('Too short interbeat interval')
901 | ind = i
902 | max_min_avg = 2.5*max_min_lt
903 | break
904 | # If the time difference between consecutive peaks is longer:
905 | if (aux_pks - pks[pk_index]) > step_win:
906 | #print('Too long interbeat interval')
907 | pk_index = pk_index - 1
908 | on_index = on_index - 1
909 | #if dn_index > 0:
910 | # dn_index = dn_index - 1
911 | # If there are still peaks, add the new peak:
912 | if pk_index >= 0:
913 | #print('There are still previous peaks')
914 | pk_index = pk_index + 1
915 | on_index = on_index + 1
916 | pks = np.append(pks, aux_pks)
917 | ons = np.append(ons, aux_ons)
918 | #print('Peaks: ' + str(pks))
919 | #print('Onsets: ' + str(ons))
920 |
921 | tf = ons[pk_index] - ons[pk_index - 1]
922 |
923 | to = np.floor(fs/20)
924 | tff = np.floor(0.1*tf)
925 | if tff < to:
926 | to = tff
927 | to = pks[pk_index - 1] + to
928 |
929 | te = np.floor(fs/20)
930 | tff = np.floor(0.5*tf)
931 | if tff < te:
932 | te = tff
933 | te = pks[pk_index - 1] + te
934 |
935 | #tff = seek_dicrotic(dif_ma[to:te])
936 | #if tff == 0:
937 | # tff = te - pks[pk_index - 1]
938 | # tff = np.floor(tff/3)
939 | #dn_index = dn_index + 1
940 | #dic[dn_index] = to + tff
941 |
942 | ind = ind + close_win
943 | break
944 | # If it is the first peak:
945 | if pk_index < 0:
946 | #print('There were no previous peaks')
947 | pk_index = pk_index + 1
948 | on_index = on_index + 1
949 | pks = np.append(pks, aux_pks)
950 | ons = np.append(ons, aux_ons)
951 | #print('Peaks: ' + str(pks))
952 | #print('Onsets: ' + str(ons))
953 | ind = ind + close_win
954 | break
955 |
956 | ind = ind + 1
957 | i = int(ind + 1)
958 |
959 | if len(pks) == 0:
960 | return -1
961 | else:
962 | x_len = len(pks)
963 | temp_p = np.empty(0)
964 | for i in range(x_len):
965 | temp_p = np.append(temp_p, pks[i] - od)
966 | ttk = temp_p[0]
967 | if ttk < 0:
968 | temp_p[0] = 0
969 | pks = temp_p
970 |
971 | x_len = len(ons)
972 | temp_o = np.empty(0)
973 | for i in range(x_len):
974 | temp_o = np.append(temp_o, ons[i] - od)
975 | ttk = temp_o[0]
976 | if ttk < 0:
977 | temp_o[0] = 0
978 | ons = temp_o
979 |
980 | pks = pks + 5
981 | ibis = pks.astype(int)
982 |
983 | return ibis
984 |
--------------------------------------------------------------------------------