├── content ├── about │ ├── GitHub-Mark-32px_source.txt │ ├── GitHub-Mark-32px.png │ ├── about-contributing.md │ └── about-maintenance.md ├── icon.png ├── images │ ├── access.png │ ├── bidmc.png │ ├── bidmc3.png │ ├── lightwave.png │ ├── mimicdua.png │ ├── physionet.png │ ├── google_cloud.png │ ├── icu_patient.png │ ├── eicu_discovery.png │ ├── examplepatient.jpg │ ├── mimic_workflow.png │ ├── sccm_datathon3.png │ ├── springer_nature2.png │ └── waveform_viewer.png ├── tutorials.md ├── tutorials-for-the-future.md ├── tutorial │ ├── data-modelling.md │ ├── data-interpretation.md │ ├── signal-quality-assessment.md │ ├── data-analysis.md │ └── notebooks │ │ ├── fiducial_point_functions.py │ │ ├── data-extraction.ipynb │ │ ├── data-exploration.ipynb │ │ └── beat_detection_functions.py ├── about.md ├── workshop │ ├── schedule.md │ ├── prep.md │ ├── aims.md │ └── synopsis.md ├── mimic-database.md ├── mimic │ ├── context.md │ ├── wfdb-toolbox.md │ ├── physionet.md │ ├── structure.md │ └── formatting.md ├── _toc.yml ├── intro.md ├── _config.yml ├── additional-resources.md ├── case-study.md ├── workshop.md └── references.bib ├── requirements.txt ├── .github └── workflows │ ├── deploy.yml │ └── run-tests.yml ├── .all-contributorsrc ├── .gitignore └── README.md /content/about/GitHub-Mark-32px_source.txt: -------------------------------------------------------------------------------- 1 | https://github.com/logos 2 | 3 | 08-Apr-2022 -------------------------------------------------------------------------------- /content/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/icon.png -------------------------------------------------------------------------------- /content/images/access.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/access.png -------------------------------------------------------------------------------- /content/images/bidmc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/bidmc.png -------------------------------------------------------------------------------- /content/images/bidmc3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/bidmc3.png -------------------------------------------------------------------------------- /content/images/lightwave.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/lightwave.png -------------------------------------------------------------------------------- /content/images/mimicdua.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/mimicdua.png -------------------------------------------------------------------------------- /content/images/physionet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/physionet.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Jinja2==3.1.4 2 | jupyter-book==1.0.2 3 | matplotlib==3.5.2 4 | numpy>=1.23.1 5 | wfdb==3.4.1 6 | -------------------------------------------------------------------------------- /content/images/google_cloud.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/google_cloud.png -------------------------------------------------------------------------------- /content/images/icu_patient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/icu_patient.png -------------------------------------------------------------------------------- /content/images/eicu_discovery.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/eicu_discovery.png -------------------------------------------------------------------------------- /content/images/examplepatient.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/examplepatient.jpg -------------------------------------------------------------------------------- /content/images/mimic_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/mimic_workflow.png -------------------------------------------------------------------------------- /content/images/sccm_datathon3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/sccm_datathon3.png -------------------------------------------------------------------------------- /content/about/GitHub-Mark-32px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/about/GitHub-Mark-32px.png -------------------------------------------------------------------------------- /content/images/springer_nature2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/springer_nature2.png -------------------------------------------------------------------------------- /content/images/waveform_viewer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wfdb/mimic_wfdb_tutorials/HEAD/content/images/waveform_viewer.png -------------------------------------------------------------------------------- /content/tutorials.md: -------------------------------------------------------------------------------- 1 | # Tutorials 2 | 3 | Follow the links to explore tutorials on Biomedical Signal Processing using the MIMIC Waveform Database. 4 | -------------------------------------------------------------------------------- /content/tutorials-for-the-future.md: -------------------------------------------------------------------------------- 1 | # Future Tutorials 2 | 3 | Follow the links to explore ideas for possible tutorials which could be added in the future. 4 | -------------------------------------------------------------------------------- /content/tutorial/data-modelling.md: -------------------------------------------------------------------------------- 1 | # Data modelling 2 | 3 | _Tutorial on data modelling - i.e. training a ML algorithm to estimate BP from PPG pulse wave features._ -------------------------------------------------------------------------------- /content/about.md: -------------------------------------------------------------------------------- 1 | About this Book 2 | ======================= 3 | 4 | - [Contributors](https://github.com/wfdb/mimic_wfdb_tutorials#contributors-) 5 | - [How to contribute](./about/about-contributing) 6 | - [Maintenance](./about/about-maintenance) 7 | -------------------------------------------------------------------------------- /content/tutorial/data-interpretation.md: -------------------------------------------------------------------------------- 1 | # Data interpretation 2 | 3 | _Tutorial on data interpretation_ 4 | 5 | ```{admonition} Suggestions 6 | This involves interpreting the results, _i.e._ how well does the PPG-based approach to estimating BP perform? Perhaps it would be helpful to compare the performance against standards for BP monitors (_e.g._ the AAMI standard). 7 | ``` -------------------------------------------------------------------------------- /content/tutorial/signal-quality-assessment.md: -------------------------------------------------------------------------------- 1 | # Signal Quality Assessment 2 | 3 | _Tutorial on signal quality assessment_ 4 | 5 | ```{admonition} Suggestions 6 | I think this step is optional because we can still estimate BP without it. 7 | 8 | Similarly to the beat detection tutorial, I'd suggest we firstly see if HeartPy contains some signal quality assessment functionality, and if so, use it. 9 | ``` -------------------------------------------------------------------------------- /content/tutorial/data-analysis.md: -------------------------------------------------------------------------------- 1 | # Data analysis 2 | 3 | _Tutorial on data analysis_ 4 | 5 | ```{admonition} Suggestions 6 | This could involve: 7 | - Identifying pairs of PPG-estimated BPs and corresponding reference BPs. 8 | - Calculating error statistics (_e.g._ mean absolute error, bias and limits of agreement, r^2) 9 | - Making plots (_e.g._ Bland-Altman, correlation plot). 10 | - Repeating this process for SBP and DBP. 11 | ``` -------------------------------------------------------------------------------- /content/workshop/schedule.md: -------------------------------------------------------------------------------- 1 | # Schedule 2 | 3 | The workshop will last 3.5 hours. 4 | 5 | | Time | Content | 6 | | :--- | :--- | 7 | | 5 mins | [Welcome and overview](../workshop) | 8 | | 20 mins | [Introduction to the MIMIC Waveform Database](../workshop) | 9 | | 5 mins | [Overview of case study](../case-study) | 10 | | 45 mins | [Interactive Tutorials](../tutorials) | 11 | | 2 hours | [Case Study](../case-study) | 12 | | 15 mins | Group presentations | 13 | 14 | -------------------------------------------------------------------------------- /content/workshop/prep.md: -------------------------------------------------------------------------------- 1 | # Preparation 2 | 3 | ## In advance 4 | 5 | You must be registered for the workshop in order to attend, as it is not included in the standard conference registration. You can register for the workshop at the [IEEE EMBC conference website](https://embc.embs.org/2022/). 6 | 7 | Whilst no preparation is required for the workshop, attendees are encouraged to: 8 | 1. Familiarise yourself with these resources. 9 | 2. Ensure that you can run the tutorials and case study in Google Colabs. 10 | 11 | ## On the day 12 | 13 | - Bring your laptop -------------------------------------------------------------------------------- /content/workshop/aims.md: -------------------------------------------------------------------------------- 1 | # Aims 2 | 3 | This interactive workshop provides key knowledge, skills, and tools for conducting open research in the field of biomedical signal processing. 4 | 5 | Broadly, the workshop: 6 | 1. Introduces publicly accessible datasets of physiological signals, focusing on those on _PhysioNet_ ([https://www.physionet.org](https://www.physionet.org)). 7 | 2. Teaches essential skills for conducting high quality research using open source software. 8 | 3. Offers an opportunity to work in groups on a cuffless blood pressure estimation case study. 9 | -------------------------------------------------------------------------------- /content/mimic-database.md: -------------------------------------------------------------------------------- 1 | # MIMIC Database 2 | 3 | The [MIMIC Database](https://mimic.mit.edu/) is a publicly accessible critical care database. It is widely used in biomedical signal processing research because it contains a variety of physiological signals collected from many thousands of patients. 4 | 5 | The database is a valuable resource for open research in biomedical signal processing because: 6 | - the signals (_a.k.a_ waveforms) are openly available ([here](https://physionet.org/content/mimic4wdb/0.1.0/)) 7 | - open software is provided to analyse the waveforms ([here](https://wfdb.readthedocs.io/en/stable/)) 8 | 9 | Follow the links to find out more about the MIMIC Database. 10 | -------------------------------------------------------------------------------- /content/mimic/context.md: -------------------------------------------------------------------------------- 1 | # Clinical Context 2 | 3 | ## MIMIC-IV 4 | 5 | - Publicly accessible critical care database 6 | - Developed in collaboration with Beth Israel Deaconess Medical Center 7 | - \>50,000 ICU stays and \>400,000 ED stays between 2008-2019 8 | - Modular (structured EHR, X-rays, waveforms, clinical reports, echos) 9 | 10 | ![](../images/bidmc3.png) 11 | 12 | --- 13 | 14 | ## Reproducible workflow 15 | 16 | ![](../images/mimic_workflow.png) 17 | 18 | --- 19 | 20 | ## Critical care 21 | 22 | ![](../images/icu_patient.png) 23 | 24 | --- 25 | 26 | ## Example patient 27 | 28 | ![](../images/examplepatient.jpg) 29 | _Reproduced under [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) from: Johnson AEW et al. MIMIC-III, a freely accessible critical care database. Sci Data 2016; 3: 160035. https://doi.org/10.1038/sdata.2016.35_ 30 | -------------------------------------------------------------------------------- /content/about/about-contributing.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | To contribute to this book, either: 4 | ```{dropdown} **1. Propose edits:** Follow these instructions to propose a specific change 5 | - Go to the page you would like to edit 6 | - Hover over the ![GitHub-Mark](GitHub-Mark-32px.png) button at the top of the page, and click 'suggest edit' from the dropdown list that appears. 7 | - This will take you to GitHub, where you can edit the page directly and submit the proposed edits for approval. You will require a GitHub login to do this. 8 | ``` 9 | ```{dropdown} **2. Suggest areas for improvement:** Follow these instructions to make a general suggestion 10 | - If your suggestion relates to a specific page, then go to that page. 11 | - Hover over the ![GitHub-Mark](GitHub-Mark-32px.png) button at the top of the page, and click 'open issue' from the dropdown list that appears. 12 | - This will take you to GitHub, where you can post suggestions for improvement. You will require a GitHub login to do this. 13 | ``` 14 | When contributing, please either contribute your own ideas/text, or clearly acknowledge the original source of the ideas/text. 15 | -------------------------------------------------------------------------------- /content/_toc.yml: -------------------------------------------------------------------------------- 1 | format: jb-article 2 | root: intro 3 | sections: 4 | - file: workshop 5 | sections: 6 | - file: workshop/aims 7 | - file: workshop/synopsis 8 | - file: workshop/schedule 9 | - file: workshop/prep 10 | - file: mimic-database 11 | sections: 12 | - file: mimic/physionet 13 | - file: mimic/context 14 | - file: mimic/structure 15 | - file: mimic/formatting 16 | - file: mimic/wfdb-toolbox 17 | - file: tutorials 18 | sections: 19 | - file: tutorial/notebooks/data-exploration 20 | - file: tutorial/notebooks/data-extraction 21 | - file: tutorial/notebooks/data-visualisation 22 | - file: tutorial/notebooks/signal-filtering 23 | - file: tutorial/notebooks/differentiation 24 | - file: tutorial/notebooks/beat-detection 25 | - file: tutorial/notebooks/pulse-wave-analysis 26 | - file: tutorial/notebooks/extracting-reference-bp 27 | - file: case-study 28 | - file: additional-resources 29 | - file: about 30 | sections: 31 | - file: about/about-contributing 32 | - file: about/about-maintenance 33 | - file: tutorials-for-the-future 34 | sections: 35 | - file: tutorial/notebooks/qrs-detection 36 | - file: tutorial/signal-quality-assessment 37 | - file: tutorial/data-modelling 38 | - file: tutorial/data-analysis 39 | - file: tutorial/data-interpretation 40 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: deploy-book 2 | 3 | # Only run this when the main branch changes 4 | on: 5 | push: 6 | branches: 7 | - main 8 | # If your git repository has the Jupyter Book within some-subfolder next to 9 | # unrelated files, you can make this run only if a file within that specific 10 | # folder has been modified. 11 | # 12 | # paths: 13 | # - some-subfolder/** 14 | 15 | # This job installs dependencies, builds the book, and pushes it to `gh-pages` 16 | jobs: 17 | deploy-book: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | # Install dependencies 23 | - name: Set up Python 24 | uses: actions/setup-python@v5 25 | with: 26 | python-version: '3.10' 27 | 28 | - name: Install dependencies 29 | run: | 30 | sudo apt-get install libsndfile1 31 | pip install -r requirements.txt 32 | 33 | # Build the book 34 | - name: Build the book 35 | run: | 36 | jupyter book build content --all 37 | 38 | # Push the book's HTML to github-pages 39 | - name: GitHub Pages action 40 | uses: peaceiris/actions-gh-pages@v3.6.1 41 | if: ${{ github.ref == 'refs/heads/main' }} 42 | with: 43 | github_token: ${{ secrets.GITHUB_TOKEN }} 44 | publish_dir: ./content/_build/html -------------------------------------------------------------------------------- /.github/workflows/run-tests.yml: -------------------------------------------------------------------------------- 1 | # Link repository with GitHub Actions 2 | # https://docs.github.com/en/actions/learn-github-actions/introduction-to-github-actions 3 | 4 | name: run-tests 5 | 6 | on: 7 | push: 8 | branches: 9 | - main 10 | pull_request: 11 | branches: 12 | - main 13 | 14 | jobs: 15 | test: 16 | runs-on: ${{ matrix.os }} 17 | strategy: 18 | matrix: 19 | os: [windows-latest, ubuntu-latest, macos-latest] 20 | python-version: ["3.10"] 21 | steps: 22 | # Checkout the latest code from the repo 23 | # https://github.com/actions/checkout 24 | - name: Checkout repo 25 | uses: actions/checkout@v4 26 | # Setup which version of Python to use 27 | # https://github.com/actions/setup-python 28 | - name: Set up Python ${{ matrix.python-version }} 29 | uses: actions/setup-python@v5 30 | with: 31 | python-version: ${{ matrix.python-version }} 32 | # Display the Python version being used 33 | - name: Display Python version 34 | run: python -c "import sys; print(sys.version)" 35 | # Install the dependencies for the package. 36 | - name: Install dependencies 37 | run: | 38 | python -m pip install --upgrade pip 39 | pip install -r requirements.txt 40 | # Build the book 41 | - name: Build the book 42 | run: jupyter-book build content/ 43 | -------------------------------------------------------------------------------- /content/intro.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | This book presents tutorials on using the MIMIC Waveform Database for Biomedical Signal Processing. 4 | 5 | ```{note} 6 | These resources are currently being developed. 7 | ``` 8 | 9 | The book includes: 10 | 11 | - [Workshop](../workshop): Details of the workshop for which these resources were designed. 12 | - [MIMIC Database](../mimic-database): An overview of the MIMIC Waveform Database. 13 | - [Case Study](../case-study): A case study on cuffless blood pressure estimation using the MIMIC Database. 14 | - [Additional Resources](../additional-resources): Additional resources on the topics covered in these tutorials. 15 | 16 | --- 17 | 18 | # Contributing 19 | 20 | All are welcome to contribute to this project (as described in [About this Book](../about)). You may wish to contribute: 21 | - **Content on new topics:** You may have your own ideas for new topics to be included in the book, or you might want to contribute towards writing on [these topics](https://github.com/peterhcharlton/mimic_wfdb_tutorials/issues/1) which we think should be included. 22 | - **Modifications to existing content:** You may propose edits to existing content, by following the instructions under 'Propose edits' [here](./about/about-contributing). 23 | - **General suggestions for improvements:** You may make a general suggestion by following the instructions under 'Suggest areas for improvement' [here](./about/about-contributing). 24 | -------------------------------------------------------------------------------- /content/_config.yml: -------------------------------------------------------------------------------- 1 | # Book settings 2 | # Learn more at https://jupyterbook.org/customize/config.html 3 | 4 | title: MIMIC WFDB Tutorials 5 | author: Peter H Charlton 6 | copyright: "2022" 7 | logo: icon.png 8 | exclude_patterns: [README.md] 9 | 10 | # Force re-execution of notebooks on each build. 11 | # See https://jupyterbook.org/content/execute.html 12 | execute: 13 | execute_notebooks: force 14 | 15 | # Define the name of the latex output file for PDF builds 16 | latex: 17 | latex_documents: 18 | targetname: mimic_wfdb_tutorials.tex 19 | 20 | # Add a bibtex file so that we can create citations 21 | bibtex_bibfiles: 22 | - references.bib 23 | 24 | # Information about where the book exists on the web 25 | repository: 26 | url: https://github.com/wfdb/mimic_wfdb_tutorials # Online location of your book 27 | branch: main # Which branch of the repository should be used when creating links (optional) 28 | path_to_book: content 29 | 30 | # Add GitHub buttons to your book 31 | # See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository 32 | html: 33 | use_issues_button: true 34 | use_repository_button: true 35 | use_edit_page_button: true 36 | 37 | # Add launch buttons 38 | # See https://jupyterbook.org/en/stable/interactive/launchbuttons.html 39 | launch_buttons: 40 | # Google Colab links will only work for pages that have the .ipynb extension. 41 | colab_url: "https://colab.research.google.com" 42 | binderhub_url: "https://mybinder.org" 43 | 44 | -------------------------------------------------------------------------------- /content/mimic/wfdb-toolbox.md: -------------------------------------------------------------------------------- 1 | # WFDB Toolbox 2 | 3 | ## Overview 4 | 5 | The Waveform Database (WFDB) is a set of file standards designed for reading and storing physiologic signal data, and associated annotations. See the [WFDB Spec repository](https://github.com/wfdb/wfdb-spec/) for the specification details. 6 | 7 | Example signal types include ECG and EEG. Example annotation types include automated machine-labelled heart-beats, and clinician comments regarding specific signal artifacts. 8 | 9 | There are several available software packages that implement the WFDB specifications. Consider using one of them if you want to conduct research or build algorithms using physiologic data. 10 | 11 | ## Software Packages 12 | 13 | The WFDB specification is openly-licensed, so anyone can implement and modify software according to the spec. Here are the main packages and implementations: 14 | 15 | - [WFDB Software Package](https://doi.org/10.13026/gjvw-1m31): The original software package written in C. Contains the core library, command line tools, and WAVE. See also the PhysioNet publication. Associated documents: 16 | - [WFDB Python Package](https://wfdb.readthedocs.io/en/stable/): A native Python implementation of WFDB. 17 | - [WFDB Toolbox for Matlab](https://archive.physionet.org/physiotools/matlab/wfdb-swig-matlab/new_version.shtml): A set of Java, GUI, and m-code wrapper functions, which make system calls to WFDB Software Package and other applications. 18 | 19 | ## WFDB-Python 20 | 21 | For the purposes of this workshop, we will be using the [WFDB Python Package](https://wfdb.readthedocs.io/en/stable/), a library of tools for reading, writing, and processing physiological signals and annotations. 22 | 23 | The distribution is hosted on PyPI, the package manager for Python. The software can be installed directly from PyPI using the following command: 24 | 25 | ```python 26 | $ pip install wfdb 27 | ``` 28 | 29 | 30 | -------------------------------------------------------------------------------- /.all-contributorsrc: -------------------------------------------------------------------------------- 1 | { 2 | "files": [ 3 | "README.md" 4 | ], 5 | "imageSize": 100, 6 | "commit": false, 7 | "contributors": [ 8 | { 9 | "login": "peterhcharlton", 10 | "name": "Peter H Charlton", 11 | "avatar_url": "https://avatars.githubusercontent.com/u/9865941?v=4", 12 | "profile": "https://peterhcharlton.github.io/", 13 | "contributions": [ 14 | "content" 15 | ] 16 | }, 17 | { 18 | "login": "tompollard", 19 | "name": "Tom Pollard", 20 | "avatar_url": "https://avatars.githubusercontent.com/u/822601?v=4", 21 | "profile": "https://github.com/tompollard", 22 | "contributions": [ 23 | "design" 24 | ] 25 | }, 26 | { 27 | "login": "elisamejia", 28 | "name": "Elisa Mejía", 29 | "avatar_url": "https://avatars.githubusercontent.com/u/10887584?v=4", 30 | "profile": "https://github.com/elisamejia", 31 | "contributions": [ 32 | "design" 33 | ] 34 | }, 35 | { 36 | "login": "bemoody", 37 | "name": "bemoody", 38 | "avatar_url": "https://avatars.githubusercontent.com/u/7748246?v=4", 39 | "profile": "https://github.com/bemoody", 40 | "contributions": [ 41 | "design" 42 | ] 43 | }, 44 | { 45 | "login": "briangow", 46 | "name": "Brian Gow", 47 | "avatar_url": "https://avatars.githubusercontent.com/u/4754434?v=4", 48 | "profile": "https://github.com/briangow", 49 | "contributions": [ 50 | "design" 51 | ] 52 | }, 53 | { 54 | "login": "danamouk", 55 | "name": "danamouk", 56 | "avatar_url": "https://avatars.githubusercontent.com/u/49573192?v=4", 57 | "profile": "https://github.com/danamouk", 58 | "contributions": [ 59 | "design" 60 | ] 61 | } 62 | ], 63 | "contributorsPerLine": 7, 64 | "projectName": "mimic_wfdb_tutorials", 65 | "projectOwner": "wfdb", 66 | "repoType": "github", 67 | "repoHost": "https://github.com", 68 | "skipCi": true 69 | } 70 | -------------------------------------------------------------------------------- /content/mimic/physionet.md: -------------------------------------------------------------------------------- 1 | # PhysioNet 2 | 3 | ## Overview 4 | 5 | - [PhysioNet](https://physionet.org/) is a data sharing platform built and maintained at the Laboratory of Computational Physiology at MIT 6 | - Established as an outreach component of a research project in 1999 7 | - Rebuilt from scratch in 2019 following "[FAIR principles](https://www.go-fair.org/fair-principles/)" 8 | - \>50,000 registered, active users 9 | - Supports access control for sensitive data via data use agreements and training 10 | 11 | ![](../images/physionet.png) 12 | 13 | --- 14 | 15 | ## Recommended repository 16 | 17 | - PhysioNet is a recommended repository for a number of journals: 18 | - Springer Nature 19 | - PLOS 20 | - eLife 21 | 22 | ![](../images/springer_nature2.png) 23 | _Source: [Springer Nature](https://www.springernature.com/gp/authors/research-data-policy/repositories-health/12327108)_ 24 | 25 | --- 26 | 27 | ## Access control 28 | 29 | - **Open data** 30 | - **Restricted**: 31 | - Data Use Agreement 32 | - **Credentialed**: 33 | - Data Use Agreement 34 | - Training in human subject research 35 | - Identity check 36 | - **Contributor-managed**: 37 | - Data Use Agreement 38 | - Approval of the contributor 39 | 40 | ![](../images/access.png) 41 | 42 | --- 43 | 44 | ## Enhanced discovery 45 | 46 | - Structured metadata is distributed to search indexes 47 | - Project reuse can be tracked through unique identifiers or DOI's 48 | - Strongly support data objects as research outputs in their own right 49 | 50 | ![](../images/eicu_discovery.png) 51 | 52 | --- 53 | 54 | ## Integrated viewers 55 | 56 | - Data such as waveforms can be viewed directly in the browser 57 | 58 | ![](../images/lightwave.png) 59 | 60 | --- 61 | 62 | ## Cloud integration 63 | 64 | - Enables analysis without the need to download files 65 | 66 | ![](../images/google_cloud.png) 67 | 68 | --- 69 | 70 | ## Diverse, active community 71 | 72 | - Regular workshops, challenges, and datathons based around PhysioNet datasets 73 | 74 | ![](../images/sccm_datathon3.png) -------------------------------------------------------------------------------- /content/workshop/synopsis.md: -------------------------------------------------------------------------------- 1 | # Synopsis 2 | 3 | The field of Biomedical Signal Processing stands to benefit greatly from open research. Reproducible studies, accompanied by code and data, allow others to build on the state-of-the-art and to quickly translate between academia and industry. Openly available tools are widely used. Indeed, the MIMIC Waveform Database (WFDB) was referenced in 125 EMBC papers between 2016 and 2020. 4 | 5 | The aim of this interactive workshop is to provide participants with the knowledge, skills and tools required to conduct open research in the field of Biomedical Signal Processing. It will include a formal announcement of the release of the MIMIC-IV Waveform Database, and hands-on experience of using MIMIC data for cuffless blood pressure estimation. 6 | 7 | Firstly, the workshop will provide participants with an understanding of publicly available datasets containing physiological signals, focusing on those on PhysioNet. This will include an overview of the MIMIC Waveform Database, including its clinical context, structure and formatting. 8 | 9 | Secondly, the workshop will provide participants with essential skills for conducting high quality research with openly available data. Participants will work through interactive tutorials in the Python programming language using the WFDB Toolbox, a library of Biomedical Signal Processing tools. The tutorials will introduce key aspects of signal processing, including: data exploration, selection and extraction; pre-processing; feature extraction; modelling; analysis; and interpretation. 10 | 11 | Finally, participants will work in groups on a cuffless blood pressure estimation case study. This will provide hands-on experience and opportunity for networking. Example code will be provided, and researchers will be on hand to answer questions. 12 | 13 | The workshop will be led by researchers who develop MIMIC and the WFDB toolboxes, and who have a track-record in reproducible research. It will be of great interest to students, researchers, and engineers: bring a laptop to participate fully. 14 | 15 | 16 | -------------------------------------------------------------------------------- /content/mimic/structure.md: -------------------------------------------------------------------------------- 1 | # Database Structure 2 | 3 | ## MIMIC-IV modules 4 | 5 | MIMIC-IV is a modular dataset, comprising of a core set of clinical data (MIMIC-IV Clinical) that can be linked to datasets such as: 6 | 7 | - MIMIC-IV-ED: emergency department data; 8 | - MIMIC-IV-ED: chest X-ray images; 9 | - MIMIC-IV-ECG: 10-second 12-lead diagnostic ECGs; 10 | - MIMIC-IV-waveform: varying-length, high-time-resolution waveforms such as ECG and PPG. 11 | 12 | Typically the datasets are linked by unique patient ID (`subject_id`) and unique hospital stay ID (`hadm_id`). For the purposes of this workshop, we will focus on the MIMIC-IV waveform dataset. 13 | 14 | ## Available monitor data 15 | 16 | The MIMIC-IV Waveform Database consists of raw data that is sampled by the bedside monitor. The available types of data vary from one patient to another. 17 | 18 | ### ECG 19 | 20 | Virtually all patients have a continuous ECG monitor, measuring electrical activity in the heart. For MIMIC-IV patients, typically two or three channels are measured (one or two limb leads, one chest lead.) Each channel is sampled at 250 samples per second. 21 | 22 | Measurements derived from the ECG include: 23 | - Heart rate (averaged once per 1.024 seconds) 24 | - Instantaneous ("beat to beat") heart rate 25 | - ST elevation 26 | - QT interval 27 | 28 | The same electrodes are also used to measure impedance across the chest ("Resp", 62.5 samples per second), which is used to derive respiration rate ("RR"). 29 | 30 | ### PPG 31 | 32 | Virtually all patients have a PPG (photoplethysmogram) sensor, measuring blood oxygen in the fingertip or other extremity. This sensor provides: 33 | - A continuous waveform ("Pleth", 125 samples per second) 34 | - Average oxygen saturation ("SpO2", once per 1.024 seconds) 35 | - Perfusion index ("Perf") 36 | - Pulse rate ("Pulse (SpO2)") 37 | 38 | ### Blood pressure 39 | 40 | Blood pressure is measured using an automatic cuff at set intervals (e.g. every 5, 15, 30, or 60 minutes). This is recorded as "NBPs", "NBPd", and "NBPm" (systolic, diastolic, and mean). 41 | 42 | Some patients also have a continuous, invasive arterial pressure sensor, which provides: 43 | - A pressure waveform ("ABP", 125 samples per second) 44 | - Systolic pressure ("ABPs", once per 1.024 seconds) 45 | - Diastolic pressure ("ABPd") 46 | - Mean pressure ("ABPm") 47 | - Pulse rate ("Pulse (ABP)") 48 | 49 | ### Other measurements 50 | 51 | Other measurements may be collected depending on the patient, such as: 52 | 53 | - Temperature ("Tblood", "Tcore", "Tesoph", etc.) 54 | - Other pressure waveforms ("CVP", "ICP", etc.) 55 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # vscode 132 | .vscode/ 133 | 134 | # MACOS DS_Store 135 | .DS_Store 136 | 137 | # Jupyter Book build directory 138 | _build/ 139 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MIMIC WFDB Tutorials 2 | 3 | [![All Contributors](https://img.shields.io/badge/all_contributors-6-orange.svg?style=flat-square)](#contributors-) 4 | 5 | 6 | This repository contains a Jupyter book called 'MIMIC WFDB Tutorials', which presents tutorials on using the MIMIC Waveform Database for Biomedical Signal Processing. 7 | 8 | The book is available [here](https://wfdb.github.io/mimic_wfdb_tutorials/intro.html). 9 | 10 | ## Contributors ✨ 11 | 12 | Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)): 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 |

Peter H Charlton

🎨

Tom Pollard

🎨

Elisa Mejía

🎨

bemoody

🎨

Brian Gow

🎨

danamouk

🎨
27 | 28 | 29 | 30 | 31 | 32 | 33 | This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome! 34 | 35 | ## Development 36 | 37 | This website was created with [JupyterBook](https://jupyterbook.org/). To set up a local development environment, follow the steps below: 38 | 39 | 1. Navigate to the project directory (e.g. `mimic_wfdb_tutorials`) 40 | 2. Install the required packages with `pip install -r requirements.txt` (preferably in a virtual environment using something like venv, virtualenv, conda etc.) 41 | 3. Change to the directory with the content (e.g. `cd content`) 42 | 4. Run `jupyter-book build --all ./` from within this directory to build the book. 43 | 5. The HTML bookfiles should have been created in a `_build` folder. 44 | -------------------------------------------------------------------------------- /content/additional-resources.md: -------------------------------------------------------------------------------- 1 | # Additional Resources 2 | 3 | This page provides some additional resources for further reading after the Workshop which may be of interest. 4 | 5 | ## MIMIC 6 | 7 | Further information on the MIMIC Database is available at: 8 | - [The MIMIC Database homepage](https://mimic.mit.edu/) 9 | - [A brief introduction to MIMIC on the Laboratory for Computational Physiology's website](https://lcp.mit.edu/mimic) 10 | - [A textbook on MIMIC](https://doi.org/10.1007/978-3-319-43742-2) 11 | - [The article describing MIMIC-III](https://doi.org/10.1038/sdata.2016.35) 12 | 13 | ## The WFDB Specifications and Toolbox 14 | 15 | - [WFDB Specifications](https://github.com/wfdb/wfdb-spec/): Documentation of the specifications for Waveform Database (WFDB) files and concepts. 16 | - [WFDB Software Package](https://doi.org/10.13026/gjvw-1m31): The original software package written in C. 17 | - [WFDB Python Package](https://wfdb.readthedocs.io/en/stable/) 18 | - [WFDB Toolbox for Matlab](https://archive.physionet.org/physiotools/matlab/wfdb-swig-matlab/new_version.shtml) 19 | 20 | ## Open Research 21 | 22 | - [The Turing Way handbook to reproducible, ethical and collaborative data science](https://the-turing-way.netlify.app/), which includes guidelines on [Open Research](https://the-turing-way.netlify.app/reproducible-research/open.html). 23 | - [The Software Sustainability Institute](https://www.software.ac.uk/), including [Guides](https://www.software.ac.uk/resources/guides) on ensuring software sustainability. 24 | 25 | ## Software development and management 26 | 27 | - An introduction to Python is available [here](https://prodigiouspython.github.io/ProdigiousPython/intro.html). 28 | - Tutorials on Git and GitHub, which are used for version control and collaborative code development, are available [here](https://swcarpentry.github.io/git-novice/) and [here](https://www.youtube.com/playlist?list=PL4cUxeGkcC9goXbgTDQ0n_4TBzOO0ocPR). 29 | 30 | ## Cuffless Blood Pressure estimation 31 | 32 | - [Review of Cuffless Blood Pressure Measurement](https://doi.org/10.1146/annurev-bioeng-110220-014644): a review of the topic by leaders in the field. 33 | - [Evaluating Cuffless Blood Pressure Devices](https://doi.org/10.1161/HYPERTENSIONAHA.121.17747): a review on "the capabilities and limitations of emerging cuffless BP measurement devices", with proposals of how to evaluate such devices. 34 | 35 | ## Photoplethysmography 36 | 37 | - [Photoplethysmography Signal Processing and Synthesis](https://peterhcharlton.github.io/publication/ppg_sig_proc_chapter/): a textbook chapter providing a comprehensive overview of PPG signal processing. 38 | - [Wearable photoplethysmography for cardiovascular monitoring](https://doi.org/10.1109/JPROC.2022.3149785): a review paper detailing different aspects of wearable photoplethysmography, including signal processing and clinical applications. 39 | - [Establishing best practices in photoplethysmography signal acquisition and processing](https://doi.org/10.1088/1361-6579/ac6cc4): an article discussing whether it would be possible and beneficial to establish best practices for photoplethysmography signal acquisition and processing. 40 | 41 | ## Datasets containing physiological signals 42 | 43 | - [Physionet](https://www.physionet.org/): PhysioNet hosts many datasets containing physiological signals, listed [here](https://www.physionet.org/about/database/). 44 | - [Photoplethysmography datasets](https://peterhcharlton.github.io/post/ppg_datasets/): There are several publicly available datasets containing photoplethysmogram signals, many of which are listed [here](https://peterhcharlton.github.io/post/ppg_datasets/). 45 | -------------------------------------------------------------------------------- /content/case-study.md: -------------------------------------------------------------------------------- 1 | # Case Study 2 | 3 | In the tutorials we have explored different steps which would commonly be encountered when developing techniques for cuffless blood pressure estimation. 4 | 5 | For the remainder of the workshop we would like you to work together in groups to train and test a model for estimating BP from the PPG. 6 | 7 | Forming groups: 8 | - Please could any coders spread themselves between groups 9 | - We recommend groups of between 4 and 6 people 10 | 11 | A suggested workflow is provided below - feel free to use this or ignore it! 12 | 13 | We will be on hand to help, and we will ask groups to share their experiences shortly before the end of the session. 14 | 15 | # Suggested workflow 16 | 17 | I would suggest the following: 18 | 1. Loop through ICU stays, determining whether each stay meets the inclusion criteria for the study (contains at least 10 minutes of simultaneous PPG and ABP signals). The [Data Visualisation tutorial](https://wfdb.io/mimic_wfdb_tutorials/tutorial/notebooks/data-visualisation.html) provides scripts for doing this (except that it only runs on a specified ICU stay, and doesn't loop through stays). Continue looping until 60 ICU stays have been identified which meet 19 | 2. Extract 10 minutes of simultaneous PPG and ABP signals from each ICU stay which meets the inclusion criteria. 20 | 3. Run signal processing scripts to extract a parameter from the shape of each PPG pulse wave (let's call the parameter the stiffness index - SI). This will produce a vector of values for each ICU stay (with a length of approximately 600 - i.e. one value per heart beat - which varies from one stay to the next), and a vector of corresponding time stamps. 21 | 4. Run signal processing scripts to extract systolic and diastolic blood pressure (SBP and DBP) values from each ABP pulse wave. Similarly, this will produce two vectors of values for each ICU stay, one for systolic blood pressure, and one for diastolic blood pressure, and a vector of corresponding time stamps. 22 | 5. Calculate an average (e.g. median) value of the SI for each 30 second window, and repeat for SBP and DBP, ensuring that the same timings are used for the SI, SBP and DBP windows. For each ICU stay, this will produce three vectors each of length 20 (because the 10 minute segments can be split into 20 non-overlapping 30-second windows). The three vectors will contain SI, SBP and DBP respectively. 23 | 6. Create 'overall' vectors by concatenating each of the three vectors across all ICU stays. This will generate three vectors each of length 1200 (i.e. 20 values for 60 ICU stays). In addition, create a vector of ICU stays (i.e. a vector of length 1200 which contains the ICU stay ID from which each window was obtained). 24 | 7. Split the data into training and testing data, at the ICU stay level. E.g. the first 600 values (corresponding to the first 30 ICU stays) are designated as training data, and the remaining 600 values are designated as testing data. 25 | 8. Train a linear regression model on the training data to estimate either SBP (or DBP) from SI. The default behaviour should be to use SBP, but it would be nice to include the option to change this to DBP. 26 | 9. Test the performance of the model on the testing data: 27 | - Use the model to estimate SBP (or DBP) from each SI value in the testing data. This should produce a vector of estimated SBP (or DBP) values of length 600. 28 | - Calculate the errors between the estimated and reference SBP (or DBP) values (using error = estimated - reference). 29 | - Calculate error statistics for the entire testing dataset. e.g. mean absolute error, bias (i.e. mean error), limits of agreement (i.e. 1.96 * standard deviation of errors). 30 | -------------------------------------------------------------------------------- /content/workshop.md: -------------------------------------------------------------------------------- 1 | # Workshop 2 | 3 | ## Welcome 4 | 5 | ### Introductions 6 | 7 | This workshop is being run by researchers from MIT, the University of Cambridge, and City, University of London. You can find details of the individuals who have contributed [here](https://github.com/wfdb/mimic_wfdb_tutorials#contributors-). 8 | 9 | ### Outline 10 | 11 | The workshop brings together three themes: 12 | 13 | **Open research:** Facilitating high quality research through the sharing of data and code. This may include the use of online repositories such as [PhysioNet](https://physionet.org/), collaborative code development using tools and platforms such as Git and [GitHub](https://github.com/), and making research reproducible through the sharing of the data and code used in studies. Indeed, the tools used in this workshop are hosted in a GitHub repository [here](https://github.com/wfdb/mimic_wfdb_tutorials). 14 | 15 | **Biomedical signal processing:** Research into techniques to derive information from physiological signals, often for health or well-being purposes. In this workshop, we'll be referring to physiological signals which are sampled many times per second, such as the electrocardiogram (ECG). 16 | 17 | ![PPG signals at rest and during exercise](https://upload.wikimedia.org/wikipedia/commons/8/8a/Photoplethysmogram_signals_at_rest_and_during_exercise.svg) 18 | 19 | Source: _Charlton PH, [Photoplethysmogram signals at rest and during exercise](https://commons.wikimedia.org/wiki/File:Photoplethysmogram_signals_at_rest_and_during_exercise.svg), Wikimedia Commons (CC BY 4.0)._ 20 | 21 | **Cuffless blood pressure estimation:** Estimating blood pressure from physiological signals which can be acquired unobtrusively without the use of a blood pressure cuff. Broadly, there are three approaches for cuffless blood pressure estimation. These are based on the observation that the speed with which the pulse wave propagates increases with blood pressure: 22 | - measuring the pulse transit time (PTT) between two arterial pulse waves (one closer to the heart and one further away). 23 | - measuring the pulse arrival time (PAT) between a marker of ventricular contraction, and the arrival of a pulse wave (preferably at a peripheral site) 24 | - estimating BP from the shape of a pulse wave (such as a photoplethysmography, PPG, pulse wave), since changes in pulse wave velocity and BP influence the shapes of pulse waves. 25 | 26 | ![Deriving indicators of BP](https://journals.physiology.org/cms/10.1152/ajpheart.00392.2021/asset/images/large/ajpheart.00392.2021_f003.jpeg) 27 | 28 | Source: _Charlton PH et al., [Assessing hemodynamics from the photoplethysmogram to gain insights into vascular age: a review from VascAgeNet](https://doi.org/10.1152/ajpheart.00392.2021), AJP Heart Circ, 2022 (CC BY 4.0)._ 29 | 30 | ### Questions 31 | 32 | Consider the following to familiarise yourself with the concepts of this workshop: 33 | - Have you used publicly available data or code before? 34 | - Have you shared data or code from your research? 35 | - How would you rate your biomedical signal processing skills? 36 | - Have you ever worked on estimating BP from PPG signals? 37 | 38 | ## In-person event 39 | 40 | These resources were designed for the following workshop at the [2022 IEEE EMBC Conference](https://embc.embs.org/2022/) in Glasgow, UK: 41 | 42 | **'Open research in Biomedical Signal Processing: Cuffless Blood Pressure Estimation Using the MIMIC-IV Database'** 43 | 44 | - 11th July 2022 45 | - 08.30 - 12.30 46 | - Boisdale-2, Ground Floor (as shown in the Ground Level floorplan [here](https://www.sec.co.uk/organise-an-event/capacities-dimensions)) 47 | - Scottish Event Campus (SEC) Centre 48 | 49 | Follow the links on the left for further details of the workshop. 50 | -------------------------------------------------------------------------------- /content/about/about-maintenance.md: -------------------------------------------------------------------------------- 1 | # Maintenance 2 | 3 | ## Editing the book 4 | 5 | There are two ways to edit the book: 6 | ```{dropdown} **1. Edit online:** Open to all 7 | - Submit a proposed edit using the instructions provided above under ['Contributing to the book'](#contributing-to-the-book). 8 | - This will be reviewed in due course. 9 | ``` 10 | ```{dropdown} **2. Edit on a local computer:** Only open to project administrators 11 | - If you don't already have the _mimic_wfdb_tutorials_ repository on your computer, then clone the repository 12 | 13 | `cd /Users/petercharlton/Documents/GitHub/; git clone https://github.com/wfdb/mimic_wfdb_tutorials` 14 | - If you do already have the repository, then pull the latest version: 15 | 16 | `cd /Users/petercharlton/Documents/GitHub/mimic_wfdb_tutorials; git pull https://github.com/wfdb/mimic_wfdb_tutorials main` 17 | - Make edits to the files on a local computer: 18 | - `cd /Users/petercharlton/Documents/GitHub/mimic_wfdb_tutorials` - make the current directory the repo directory. 19 | - `git checkout -b ` - Creates a new branch on which to make the edits(specified by ``), and makes it the current branch. 20 | - edit the files ([Atom](https://atom.io/) is a helpful text editor for this). 21 | - `git add .` - adds all changed files to the staging area. 22 | - `git commit -m ""` - commit the changes to the current branch. 23 | - `git push https://github.com/wfdb/mimic_wfdb_tutorials ` - pushes the changes to the remote repo on GitHub. 24 | - Log in to GitHub via a web browser, and go to the [repo home page](https://github.com/wfdb/mimic_wfdb_tutorials). Assuming you have access, then you should see a message at the top of the page allowing you to create a pull request, to pull the changes from your new branch over to the main branch. 25 | 26 | _The following are legacy instructions, which may or may not still be required when making changes to a Jupyter notebook:_ 27 | 28 | - Upload the files through a git push (as detailed [here](https://jupyterbook.org/start/publish.html#create-an-online-repository-for-your-book)): 29 | 30 | `cd /Users/petercharlton/Documents/GitHub/mimic_wfdb_tutorials; git add ./*; git commit -m "brief edit"; git push` 31 | - Build the book locally (as detailed [here](https://jupyterbook.org/start/build.html#build-your-books-html)): 32 | 33 | `cd /Users/petercharlton/Documents/GitHub/mimic_wfdb_tutorials/; jupyter-book build --path-output . content` 34 | - Upload the built book to GitHub pages (as detailed [here](https://jupyterbook.org/start/publish.html#publish-your-book-online-with-github-pages)): 35 | 36 | `cd /Users/petercharlton/Documents/GitHub/mimic_wfdb_tutorials/; ghp-import -n -p -f _build/html` 37 | ``` 38 | 39 | ## Recognising contributors 40 | 41 | Contributors to the Book who have GitHub accounts can be recognised using the 'All Contributors' app (see details [here](https://allcontributors.org/docs/en/bot/usage)). 42 | 43 | ## Creating the book 44 | 45 | The book was created as follows (largely following the instructions provided [here](https://jupyterbook.org/start/your-first-book.html)): 46 | ```{dropdown} **Steps to create the book:** 47 | 1. Install Jupyter book via conda-forge (as detailed [here](https://jupyterbook.org/start/overview.html)) 48 | 2. Create a template book (as detailed [here](https://jupyterbook.org/start/create.html)) 49 | 3. Modify the template to include content from Peter Charlton's original project guidelines (available [here](https://peterhcharlton.github.io/info/tools/project_guidelines.html)). 50 | 4. Build the book (as detailed [here](https://jupyterbook.org/start/build.html)). 51 | 5. Publish the book online (storing the source files in a GitHub repository, and publishing the book using GitHub pages, as detailed [here](https://jupyterbook.org/start/publish.html)). 52 | ``` 53 | -------------------------------------------------------------------------------- /content/mimic/formatting.md: -------------------------------------------------------------------------------- 1 | # Waveform Data Formats 2 | 3 | The waveform database is organized into "records". Each record represents a single patient and roughly corresponds to a single ICU stay (not always, because the bedside monitor may be temporarily shut off.) Each record is stored in a separate subdirectory. 4 | 5 | To avoid providing information that could identify individual patients, the record does not include any actual date or time information. Instead, measurements are recorded according to the "elapsed time" from the beginning of the record. To allow cross-referencing events with the other MIMIC-IV modules, the *surrogate date and time* for the start of the record are also provided. 6 | 7 | An example of the file structure is shown below. Here there are two patients (`subject_id` 10014354 and 10039708). There is one record (81739927) belonging to the first patient, and two records (83411188 and 85583557) belonging to the second. 8 | 9 | ``` 10 | waves 11 | └── p100 12 | ├── p10014354 13 | │ └── 81739927 14 | │ ├── 81739927.dat 15 | │ ├── 81739927_0000.hea 16 | │ ├── 81739927_0001.hea 17 | │ ├── 81739927_0001e.dat 18 | │ ├── 81739927_0001r.dat 19 | │ ├── 81739927_0002.hea 20 | │ ├── 81739927_0002e.dat 21 | │ ├── 81739927_0002p.dat 22 | │ ├── 81739927_0002r.dat 23 | │ ├── ... 24 | │ └── 81739927n.csv.gz 25 | └── p10039708 26 | ├── 83411188 27 | │ ├── 83411188.hea 28 | │ ├── ... 29 | │ └── 83411188n.csv.gz 30 | └── 85583557 31 | ├── 85583557.hea 32 | ├── ... 33 | └── 85583557n.csv.gz 34 | ``` 35 | 36 | ## Numerics 37 | 38 | "Numerics" are defined as measurements that are sampled irregularly or infrequently (less than once per second.) These measurements are stored as a single table, such as [83411188n.csv.gz](https://physionet.org/content/mimic4wdb/0.1.0/waves/p100/p10039708/83411188/83411188n.csv.gz). 39 | 40 | This file is a gzip-compressed CSV file, which can be loaded using software packages such as [Pandas](https://pandas.pydata.org/), or it can be unpacked using [gzip](https://www.gnu.org/software/gzip/) and parsed as you would parse any CSV file. Note that in contrast to most other MIMIC-IV data tables, the list of *columns* in this table are not the same from one patient to another. 41 | 42 | Note that "elapsed time" for numeric values is measured in counter ticks (1/999.52 second, or about one millisecond.) 43 | 44 | ## Waveforms 45 | 46 | "Waveforms" are defined as measurements that are sampled regularly at high resolution (62.47 samples per second or more.) These measurements are stored as a set of files in WFDB (Waveform Database) format. 47 | 48 | For the sake of storage and processing efficiency, waveforms are broken into multiple *segments* representing different time intervals. It's common for some signals not to be available for the entire duration of a patient's ICU stay, but within a given segment, the available signals are sampled continously and the list of available signals doesn't change. 49 | 50 | A segment, in turn, consists of a *header file* (such as [83411188_0001.hea](https://physionet.org/content/mimic4wdb/0.1.0/waves/p100/p10039708/83411188/83411188_0001.hea) and one or more *signal files* (such as [83411188_0001e.dat](https://physionet.org/content/mimic4wdb/0.1.0/waves/p100/p10039708/83411188/83411188_0001e.dat) and [83411188_0001r.dat](https://physionet.org/content/mimic4wdb/0.1.0/waves/p100/p10039708/83411188/83411188_0001r.dat). 51 | 52 | In general, you do not need to parse these files yourself, and it is easiest to use one of the existing software packages for doing so: the [WFDB Python Package](https://github.com/MIT-LCP/wfdb-python) or the original [WFDB Software Package](https://physionet.org/content/wfdb/). Data can also be converted into other formats using tools such as `rdsamp` or `wfdb2mat` from the WFDB Software Package. 53 | -------------------------------------------------------------------------------- /content/references.bib: -------------------------------------------------------------------------------- 1 | --- 2 | --- 3 | 4 | @inproceedings{holdgraf_evidence_2014, 5 | address = {Brisbane, Australia, Australia}, 6 | title = {Evidence for {Predictive} {Coding} in {Human} {Auditory} {Cortex}}, 7 | booktitle = {International {Conference} on {Cognitive} {Neuroscience}}, 8 | publisher = {Frontiers in Neuroscience}, 9 | author = {Holdgraf, Christopher Ramsay and de Heer, Wendy and Pasley, Brian N. and Knight, Robert T.}, 10 | year = {2014} 11 | } 12 | 13 | @article{holdgraf_rapid_2016, 14 | title = {Rapid tuning shifts in human auditory cortex enhance speech intelligibility}, 15 | volume = {7}, 16 | issn = {2041-1723}, 17 | url = {http://www.nature.com/doifinder/10.1038/ncomms13654}, 18 | doi = {10.1038/ncomms13654}, 19 | number = {May}, 20 | journal = {Nature Communications}, 21 | author = {Holdgraf, Christopher Ramsay and de Heer, Wendy and Pasley, Brian N. and Rieger, Jochem W. and Crone, Nathan and Lin, Jack J. and Knight, Robert T. and Theunissen, Frédéric E.}, 22 | year = {2016}, 23 | pages = {13654}, 24 | file = {Holdgraf et al. - 2016 - Rapid tuning shifts in human auditory cortex enhance speech intelligibility.pdf:C\:\\Users\\chold\\Zotero\\storage\\MDQP3JWE\\Holdgraf et al. - 2016 - Rapid tuning shifts in human auditory cortex enhance speech intelligibility.pdf:application/pdf} 25 | } 26 | 27 | @inproceedings{holdgraf_portable_2017, 28 | title = {Portable learning environments for hands-on computational instruction using container-and cloud-based technology to teach data science}, 29 | volume = {Part F1287}, 30 | isbn = {978-1-4503-5272-7}, 31 | doi = {10.1145/3093338.3093370}, 32 | abstract = {© 2017 ACM. There is an increasing interest in learning outside of the traditional classroom setting. This is especially true for topics covering computational tools and data science, as both are challenging to incorporate in the standard curriculum. These atypical learning environments offer new opportunities for teaching, particularly when it comes to combining conceptual knowledge with hands-on experience/expertise with methods and skills. Advances in cloud computing and containerized environments provide an attractive opportunity to improve the effciency and ease with which students can learn. This manuscript details recent advances towards using commonly-Available cloud computing services and advanced cyberinfrastructure support for improving the learning experience in bootcamp-style events. We cover the benets (and challenges) of using a server hosted remotely instead of relying on student laptops, discuss the technology that was used in order to make this possible, and give suggestions for how others could implement and improve upon this model for pedagogy and reproducibility.}, 33 | booktitle = {{ACM} {International} {Conference} {Proceeding} {Series}}, 34 | author = {Holdgraf, Christopher Ramsay and Culich, A. and Rokem, A. and Deniz, F. and Alegro, M. and Ushizima, D.}, 35 | year = {2017}, 36 | keywords = {Teaching, Bootcamps, Cloud computing, Data science, Docker, Pedagogy} 37 | } 38 | 39 | @article{holdgraf_encoding_2017, 40 | title = {Encoding and decoding models in cognitive electrophysiology}, 41 | volume = {11}, 42 | issn = {16625137}, 43 | doi = {10.3389/fnsys.2017.00061}, 44 | abstract = {© 2017 Holdgraf, Rieger, Micheli, Martin, Knight and Theunissen. Cognitive neuroscience has seen rapid growth in the size and complexity of data recorded from the human brain as well as in the computational tools available to analyze this data. This data explosion has resulted in an increased use of multivariate, model-based methods for asking neuroscience questions, allowing scientists to investigate multiple hypotheses with a single dataset, to use complex, time-varying stimuli, and to study the human brain under more naturalistic conditions. These tools come in the form of “Encoding” models, in which stimulus features are used to model brain activity, and “Decoding” models, in which neural features are used to generated a stimulus output. Here we review the current state of encoding and decoding models in cognitive electrophysiology and provide a practical guide toward conducting experiments and analyses in this emerging field. Our examples focus on using linear models in the study of human language and audition. We show how to calculate auditory receptive fields from natural sounds as well as how to decode neural recordings to predict speech. The paper aims to be a useful tutorial to these approaches, and a practical introduction to using machine learning and applied statistics to build models of neural activity. The data analytic approaches we discuss may also be applied to other sensory modalities, motor systems, and cognitive systems, and we cover some examples in these areas. In addition, a collection of Jupyter notebooks is publicly available as a complement to the material covered in this paper, providing code examples and tutorials for predictive modeling in python. The aimis to provide a practical understanding of predictivemodeling of human brain data and to propose best-practices in conducting these analyses.}, 45 | journal = {Frontiers in Systems Neuroscience}, 46 | author = {Holdgraf, Christopher Ramsay and Rieger, J.W. and Micheli, C. and Martin, S. and Knight, R.T. and Theunissen, F.E.}, 47 | year = {2017}, 48 | keywords = {Decoding models, Encoding models, Electrocorticography (ECoG), Electrophysiology/evoked potentials, Machine learning applied to neuroscience, Natural stimuli, Predictive modeling, Tutorials} 49 | } 50 | 51 | @book{ruby, 52 | title = {The Ruby Programming Language}, 53 | author = {Flanagan, David and Matsumoto, Yukihiro}, 54 | year = {2008}, 55 | publisher = {O'Reilly Media} 56 | } 57 | -------------------------------------------------------------------------------- /content/tutorial/notebooks/fiducial_point_functions.py: -------------------------------------------------------------------------------- 1 | import scipy.signal as sp 2 | import numpy as np 3 | from matplotlib import pyplot as plt 4 | 5 | def fiducial_points(x,pks,fs,vis): 6 | """ 7 | Description: Pulse detection and correction from pulsatile signals 8 | Inputs: x, array with pulsatile signal [user defined units] 9 | pks, array with the position of the peaks [number of samples] 10 | fs, sampling rate of signal [Hz] 11 | vis, visualisation option [True, False] 12 | Outputs: fidp, dictionary with the positions of several fiducial points for the cardiac cycles [number of samples] 13 | 14 | Fiducial points: 1: Systolic peak (pks) 15 | 2: Onset, as the minimum before the systolic peak (ons) 16 | 3: Onset, using the tangent intersection method (ti) 17 | 4: Diastolic peak (dpk) 18 | 5: Maximum slope (m1d) 19 | 6: a point from second derivative PPG (a2d) 20 | 7: b point from second derivative PPG (b2d) 21 | 8: c point from second derivative PPG (c2d) 22 | 9: d point from second derivative PPG (d2d) 23 | 10: e point from second derivative PPG (e2d) 24 | 11: p1 from the third derivative PPG (p1) 25 | 12: p2 from the third derivative PPG (p2) 26 | 27 | Libraries: NumPy (as np), SciPy (Signal, as sp), Matplotlib (PyPlot, as plt) 28 | 29 | Version: 1.0 - June 2022 30 | 31 | Developed by: Elisa Mejía-Mejía 32 | City, University of London 33 | 34 | Edited by: Peter Charlton (see "Added by PC") 35 | 36 | """ 37 | # First, second and third derivatives 38 | d1x = sp.savgol_filter(x, 9, 5, deriv = 1) 39 | d2x = sp.savgol_filter(x, 9, 5, deriv = 2) 40 | d3x = sp.savgol_filter(x, 9, 5, deriv = 3) 41 | 42 | #plt.figure() 43 | #plt.plot(x/np.max(x)) 44 | #plt.plot(d1x/np.max(d1x)) 45 | #plt.plot(d2x/np.max(d2x)) 46 | #plt.plot(d3x/np.max(d3x)) 47 | 48 | # Search in time series: Onsets between consecutive peaks 49 | ons = np.empty(0) 50 | for i in range(len(pks) - 1): 51 | start = pks[i] 52 | stop = pks[i + 1] 53 | ibi = x[start:stop] 54 | #plt.figure() 55 | #plt.plot(ibi, color = 'black') 56 | aux_ons, = np.where(ibi == np.min(ibi)) 57 | if len(aux_ons) > 1: 58 | aux_ons = aux_ons[0] 59 | ind_ons = aux_ons.astype(int) 60 | ons = np.append(ons, ind_ons + start) 61 | #plt.plot(ind_ons, ibi[ind_ons], marker = 'o', color = 'red') 62 | ons = ons.astype(int) 63 | #print('Onsets: ' + str(ons)) 64 | #plt.figure() 65 | #plt.plot(x, color = 'black') 66 | #plt.scatter(pks, x[pks], marker = 'o', color = 'red') 67 | #plt.scatter(ons, x[ons], marker = 'o', color = 'blue') 68 | 69 | # Search in time series: Diastolic peak and dicrotic notch between consecutive onsets 70 | dia = np.empty(0) 71 | dic = np.empty(0) 72 | for i in range(len(ons) - 1): 73 | start = ons[i] 74 | stop = ons[i + 1] 75 | ind_pks, = np.intersect1d(np.where(pks < stop), np.where(pks > start)) 76 | ind_pks = pks[ind_pks] 77 | ibi_portion = x[ind_pks:stop] 78 | ibi_2d_portion = d2x[ind_pks:stop] 79 | #plt.figure() 80 | #plt.plot(ibi_portion/np.max(ibi_portion)) 81 | #plt.plot(ibi_2d_portion/np.max(ibi_2d_portion)) 82 | aux_dic, _ = sp.find_peaks(ibi_2d_portion) 83 | aux_dic = aux_dic.astype(int) 84 | aux_dia, _ = sp.find_peaks(-ibi_2d_portion) 85 | aux_dia = aux_dia.astype(int) 86 | if len(aux_dic) != 0: 87 | ind_max, = np.where(ibi_2d_portion[aux_dic] == np.max(ibi_2d_portion[aux_dic])) 88 | aux_dic_max = aux_dic[ind_max] 89 | if len(aux_dia) != 0: 90 | nearest = aux_dia - aux_dic_max 91 | aux_dic = aux_dic_max 92 | dic = np.append(dic, (aux_dic + ind_pks).astype(int)) 93 | #plt.scatter(aux_dic, ibi_portion[aux_dic]/np.max(ibi_portion), marker = 'o') 94 | ind_dia, = np.where(nearest > 0) 95 | aux_dia = aux_dia[ind_dia] 96 | nearest = nearest[ind_dia] 97 | if len(nearest) != 0: 98 | ind_nearest, = np.where(nearest == np.min(nearest)) 99 | aux_dia = aux_dia[ind_nearest] 100 | dia = np.append(dia, (aux_dia + ind_pks).astype(int)) 101 | #plt.scatter(aux_dia, ibi_portion[aux_dia]/np.max(ibi_portion), marker = 'o') 102 | #break 103 | else: 104 | dic = np.append(dic, (aux_dic_max + ind_pks).astype(int)) 105 | #plt.scatter(aux_dia, ibi_portion[aux_dia]/np.max(ibi_portion), marker = 'o') 106 | dia = dia.astype(int) 107 | dic = dic.astype(int) 108 | #plt.scatter(dia, x[dia], marker = 'o', color = 'orange') 109 | #plt.scatter(dic, x[dic], marker = 'o', color = 'green') 110 | 111 | # Search in D1: Maximum slope point 112 | m1d = np.empty(0) 113 | for i in range(len(ons) - 1): 114 | start = ons[i] 115 | stop = ons[i + 1] 116 | ind_pks, = np.intersect1d(np.where(pks < stop), np.where(pks > start)) 117 | ind_pks = pks[ind_pks] 118 | ibi_portion = x[start:ind_pks] 119 | ibi_1d_portion = d1x[start:ind_pks] 120 | #plt.figure() 121 | #plt.plot(ibi_portion/np.max(ibi_portion)) 122 | #plt.plot(ibi_1d_portion/np.max(ibi_1d_portion)) 123 | aux_m1d, _ = sp.find_peaks(ibi_1d_portion) 124 | aux_m1d = aux_m1d.astype(int) 125 | if len(aux_m1d) != 0: 126 | ind_max, = np.where(ibi_1d_portion[aux_m1d] == np.max(ibi_1d_portion[aux_m1d])) 127 | aux_m1d_max = aux_m1d[ind_max] 128 | if len(aux_m1d_max) > 1: 129 | aux_m1d_max = aux_m1d_max[0] 130 | m1d = np.append(m1d, (aux_m1d_max + start).astype(int)) 131 | #plt.scatter(aux_m1d, ibi_portion[aux_dic]/np.max(ibi_portion), marker = 'o') 132 | #break 133 | m1d = m1d.astype(int) 134 | #plt.scatter(m1d, x[m1d], marker = 'o', color = 'purple') 135 | 136 | # Search in time series: Tangent intersection points 137 | tip = np.empty(0) 138 | for i in range(len(ons) - 1): 139 | start = ons[i] 140 | stop = ons[i + 1] 141 | ibi_portion = x[start:stop] 142 | ibi_1d_portion = d1x[start:stop] 143 | ind_m1d, = np.intersect1d(np.where(m1d < stop), np.where(m1d > start)) 144 | ind_m1d = m1d[ind_m1d] - start 145 | #plt.figure() 146 | #plt.plot(ibi_portion/np.max(ibi_portion)) 147 | #plt.plot(ibi_1d_portion/np.max(ibi_1d_portion)) 148 | #plt.scatter(ind_m1d, ibi_portion[ind_m1d]/np.max(ibi_portion), marker = 'o') 149 | #plt.scatter(ind_m1d, ibi_1d_portion[ind_m1d]/np.max(ibi_1d_portion), marker = 'o') 150 | aux_tip = np.round(((ibi_portion[0] - ibi_portion[ind_m1d])/ibi_1d_portion[ind_m1d]) + ind_m1d) 151 | aux_tip = aux_tip.astype(int) 152 | tip = np.append(tip, (aux_tip + start).astype(int)) 153 | #plt.scatter(aux_tip, ibi_portion[aux_tip]/np.max(ibi_portion), marker = 'o') 154 | #break 155 | tip = tip.astype(int) 156 | #plt.scatter(tip, x[tip], marker = 'o', color = 'aqua') 157 | 158 | # Search in D2: A, B, C, D and E points 159 | a2d = np.empty(0) 160 | b2d = np.empty(0) 161 | c2d = np.empty(0) 162 | d2d = np.empty(0) 163 | e2d = np.empty(0) 164 | for i in range(len(ons) - 1): 165 | start = ons[i] 166 | stop = ons[i + 1] 167 | ibi_portion = x[start:stop] 168 | ibi_1d_portion = d1x[start:stop] 169 | ibi_2d_portion = d2x[start:stop] 170 | ind_m1d = np.intersect1d(np.where(m1d > start),np.where(m1d < stop)) 171 | ind_m1d = m1d[ind_m1d] 172 | #plt.figure() 173 | #plt.plot(ibi_portion/np.max(ibi_portion)) 174 | #plt.plot(ibi_1d_portion/np.max(ibi_1d_portion)) 175 | #plt.plot(ibi_2d_portion/np.max(ibi_2d_portion)) 176 | aux_m2d_pks, _ = sp.find_peaks(ibi_2d_portion) 177 | aux_m2d_ons, _ = sp.find_peaks(-ibi_2d_portion) 178 | # a point: 179 | ind_a, = np.where(ibi_2d_portion[aux_m2d_pks] == np.max(ibi_2d_portion[aux_m2d_pks])) 180 | ind_a = aux_m2d_pks[ind_a] 181 | if (ind_a < ind_m1d): 182 | a2d = np.append(a2d, ind_a + start) 183 | #plt.scatter(ind_a, ibi_2d_portion[ind_a]/np.max(ibi_2d_portion), marker = 'o') 184 | # b point: 185 | ind_b = np.where(ibi_2d_portion[aux_m2d_ons] == np.min(ibi_2d_portion[aux_m2d_ons])) 186 | ind_b = aux_m2d_ons[ind_b] 187 | if (ind_b > ind_a) and (ind_b < len(ibi_2d_portion)): 188 | b2d = np.append(b2d, ind_b + start) 189 | #plt.scatter(ind_b, ibi_2d_portion[ind_b]/np.max(ibi_2d_portion), marker = 'o') 190 | # e point: 191 | ind_e, = np.where(aux_m2d_pks > ind_m1d - start) 192 | aux_m2d_pks = aux_m2d_pks[ind_e] 193 | ind_e, = np.where(aux_m2d_pks < 0.6*len(ibi_2d_portion)) 194 | ind_e = aux_m2d_pks[ind_e] 195 | if len(ind_e) >= 1: 196 | if len(ind_e) >= 2: 197 | ind_e = ind_e[1] 198 | e2d = np.append(e2d, ind_e + start) 199 | #plt.scatter(ind_e, ibi_2d_portion[ind_e]/np.max(ibi_2d_portion), marker = 'o') 200 | # c point: 201 | ind_c, = np.where(aux_m2d_pks < ind_e) 202 | if len(ind_c) != 0: 203 | ind_c_aux = aux_m2d_pks[ind_c] 204 | ind_c, = np.where(ibi_2d_portion[ind_c_aux] == np.max(ibi_2d_portion[ind_c_aux])) 205 | ind_c = ind_c_aux[ind_c] 206 | if len(ind_c) != 0: 207 | c2d = np.append(c2d, ind_c + start) 208 | #plt.scatter(ind_c, ibi_2d_portion[ind_c]/np.max(ibi_2d_portion), marker = 'o') 209 | else: 210 | aux_m1d_ons, _ = sp.find_peaks(-ibi_1d_portion) 211 | ind_c, = np.where(aux_m1d_ons < ind_e) 212 | ind_c_aux = aux_m1d_ons[ind_c] 213 | if len(ind_c) != 0: 214 | ind_c, = np.where(ind_c_aux > ind_b) 215 | ind_c = ind_c_aux[ind_c] 216 | if len(ind_c) > 1: 217 | ind_c = ind_c[0] 218 | c2d = np.append(c2d, ind_c + start) 219 | #plt.scatter(ind_c, ibi_2d_portion[ind_c]/np.max(ibi_2d_portion), marker = 'o') 220 | # d point: 221 | if len(ind_c) != 0: 222 | ind_d = np.intersect1d(np.where(aux_m2d_ons < ind_e), np.where(aux_m2d_ons > ind_c)) 223 | if len(ind_d) != 0: 224 | ind_d_aux = aux_m2d_ons[ind_d] 225 | ind_d, = np.where(ibi_2d_portion[ind_d_aux] == np.min(ibi_2d_portion[ind_d_aux])) 226 | ind_d = ind_d_aux[ind_d] 227 | if len(ind_d) != 0: 228 | d2d = np.append(d2d, ind_d + start) 229 | #plt.scatter(ind_d, ibi_2d_portion[ind_d]/np.max(ibi_2d_portion), marker = 'o') 230 | else: 231 | ind_d = ind_c 232 | d2d = np.append(d2d, ind_d + start) 233 | #plt.scatter(ind_d, ibi_2d_portion[ind_d]/np.max(ibi_2d_portion), marker = 'o') 234 | a2d = a2d.astype(int) 235 | b2d = b2d.astype(int) 236 | c2d = c2d.astype(int) 237 | d2d = d2d.astype(int) 238 | e2d = e2d.astype(int) 239 | #plt.figure() 240 | #plt.plot(d2x, color = 'black') 241 | #plt.scatter(a2d, d2x[a2d], marker = 'o', color = 'red') 242 | #plt.scatter(b2d, d2x[b2d], marker = 'o', color = 'blue') 243 | #plt.scatter(c2d, d2x[c2d], marker = 'o', color = 'green') 244 | #plt.scatter(d2d, d2x[d2d], marker = 'o', color = 'orange') 245 | #plt.scatter(e2d, d2x[e2d], marker = 'o', color = 'purple') 246 | 247 | # Search in D3: P1 and P2 points 248 | p1p = np.empty(0) 249 | p2p = np.empty(0) 250 | for i in range(len(ons) - 1): 251 | start = ons[i] 252 | stop = ons[i + 1] 253 | ibi_portion = x[start:stop] 254 | ibi_1d_portion = d1x[start:stop] 255 | ibi_2d_portion = d2x[start:stop] 256 | ibi_3d_portion = d3x[start:stop] 257 | ind_b = np.intersect1d(np.where(b2d > start),np.where(b2d < stop)) 258 | ind_b = b2d[ind_b] 259 | ind_c = np.intersect1d(np.where(c2d > start),np.where(c2d < stop)) 260 | ind_c = c2d[ind_c] 261 | ind_d = np.intersect1d(np.where(d2d > start),np.where(d2d < stop)) 262 | ind_d = d2d[ind_d] 263 | ind_dic = np.intersect1d(np.where(dic > start),np.where(dic < stop)) 264 | ind_dic = dic[ind_dic] 265 | #plt.figure() 266 | #plt.plot(ibi_portion/np.max(ibi_portion)) 267 | #plt.plot(ibi_1d_portion/np.max(ibi_1d_portion)) 268 | #plt.plot(ibi_2d_portion/np.max(ibi_2d_portion)) 269 | #plt.plot(ibi_3d_portion/np.max(ibi_3d_portion)) 270 | #plt.scatter(ind_b - start, ibi_3d_portion[ind_b - start]/np.max(ibi_3d_portion), marker = 'o') 271 | #plt.scatter(ind_c - start, ibi_3d_portion[ind_c - start]/np.max(ibi_3d_portion), marker = 'o') 272 | #plt.scatter(ind_d - start, ibi_3d_portion[ind_d - start]/np.max(ibi_3d_portion), marker = 'o') 273 | #plt.scatter(ind_dic - start, ibi_3d_portion[ind_dic - start]/np.max(ibi_3d_portion), marker = 'o') 274 | aux_p3d_pks, _ = sp.find_peaks(ibi_3d_portion) 275 | aux_p3d_ons, _ = sp.find_peaks(-ibi_3d_portion) 276 | # P1: 277 | if (len(aux_p3d_pks) != 0 and len(ind_b) != 0): 278 | ind_p1, = np.where(aux_p3d_pks > ind_b - start) 279 | if len(ind_p1) != 0: 280 | ind_p1 = aux_p3d_pks[ind_p1[0]] 281 | p1p = np.append(p1p, ind_p1 + start) 282 | #plt.scatter(ind_p1, ibi_3d_portion[ind_p1]/np.max(ibi_3d_portion), marker = 'o') 283 | # P2: 284 | if (len(aux_p3d_ons) != 0 and len(ind_c) != 0 and len(ind_d) != 0): 285 | if ind_c == ind_d: 286 | ind_p2, = np.where(aux_p3d_ons > ind_d - start) 287 | ind_p2 = aux_p3d_ons[ind_p2[0]] 288 | else: 289 | ind_p2, = np.where(aux_p3d_ons < ind_d - start) 290 | ind_p2 = aux_p3d_ons[ind_p2[-1]] 291 | if len(ind_dic) != 0: 292 | aux_x_pks, _ = sp.find_peaks(ibi_portion) 293 | if ind_p2 > ind_dic - start: 294 | ind_between = np.intersect1d(np.where(aux_x_pks < ind_p2), np.where(aux_x_pks > ind_dic - start)) 295 | else: 296 | ind_between = np.intersect1d(np.where(aux_x_pks > ind_p2), np.where(aux_x_pks < ind_dic - start)) 297 | if len(ind_between) != 0: 298 | ind_p2 = aux_x_pks[ind_between[0]] 299 | p2p = np.append(p2p, ind_p2 + start) 300 | #plt.scatter(ind_p2, ibi_3d_portion[ind_p2]/np.max(ibi_3d_portion), marker = 'o') 301 | p1p = p1p.astype(int) 302 | p2p = p2p.astype(int) 303 | #plt.figure() 304 | #plt.plot(d3x, color = 'black') 305 | #plt.scatter(p1p, d3x[p1p], marker = 'o', color = 'green') 306 | #plt.scatter(p2p, d3x[p2p], marker = 'o', color = 'orange') 307 | 308 | # Added by PC: Magnitudes of second derivative points 309 | bmag2d = np.zeros(len(b2d)) 310 | cmag2d = np.zeros(len(b2d)) 311 | dmag2d = np.zeros(len(b2d)) 312 | emag2d = np.zeros(len(b2d)) 313 | for beat_no in range(0,len(d2d)): 314 | bmag2d[beat_no] = d2x[b2d[beat_no]]/d2x[a2d[beat_no]] 315 | cmag2d[beat_no] = d2x[c2d[beat_no]]/d2x[a2d[beat_no]] 316 | dmag2d[beat_no] = d2x[d2d[beat_no]]/d2x[a2d[beat_no]] 317 | emag2d[beat_no] = d2x[e2d[beat_no]]/d2x[a2d[beat_no]] 318 | 319 | # Added by PC: Refine the list of fiducial points to only include those corresponding to beats for which a full set of points is available 320 | off = ons[1:] 321 | ons = ons[:-1] 322 | if pks[0] < ons[0]: 323 | pks = pks[1:] 324 | if pks[-1] > off[-1]: 325 | pks = pks[:-1] 326 | 327 | # Visualise results 328 | if vis == True: 329 | fig, (ax1,ax2,ax3,ax4) = plt.subplots(4, 1, sharex = True, sharey = False, figsize=(10,10)) 330 | fig.suptitle('Fiducial points') 331 | 332 | ax1.plot(x, color = 'black') 333 | ax1.scatter(pks, x[pks.astype(int)], color = 'orange', label = 'pks') 334 | ax1.scatter(ons, x[ons.astype(int)], color = 'green', label = 'ons') 335 | ax1.scatter(off, x[off.astype(int)], marker = '*', color = 'green', label = 'off') 336 | ax1.scatter(dia, x[dia.astype(int)], color = 'yellow', label = 'dia') 337 | ax1.scatter(dic, x[dic.astype(int)], color = 'blue', label = 'dic') 338 | ax1.scatter(tip, x[tip.astype(int)], color = 'purple', label = 'dic') 339 | ax1.legend() 340 | ax1.set_ylabel('x') 341 | 342 | ax2.plot(d1x, color = 'black') 343 | ax2.scatter(m1d, d1x[m1d.astype(int)], color = 'orange', label = 'm1d') 344 | ax2.legend() 345 | ax2.set_ylabel('d1x') 346 | 347 | ax3.plot(d2x, color = 'black') 348 | ax3.scatter(a2d, d2x[a2d.astype(int)], color = 'orange', label = 'a') 349 | ax3.scatter(b2d, d2x[b2d.astype(int)], color = 'green', label = 'b') 350 | ax3.scatter(c2d, d2x[c2d.astype(int)], color = 'yellow', label = 'c') 351 | ax3.scatter(d2d, d2x[d2d.astype(int)], color = 'blue', label = 'd') 352 | ax3.scatter(e2d, d2x[e2d.astype(int)], color = 'purple', label = 'e') 353 | ax3.legend() 354 | ax3.set_ylabel('d2x') 355 | 356 | ax4.plot(d3x, color = 'black') 357 | ax4.scatter(p1p, d3x[p1p.astype(int)], color = 'orange', label = 'p1') 358 | ax4.scatter(p2p, d3x[p2p.astype(int)], color = 'green', label = 'p2') 359 | ax4.legend() 360 | ax4.set_ylabel('d3x') 361 | 362 | plt.subplots_adjust(left = 0.1, 363 | bottom = 0.1, 364 | right = 0.9, 365 | top = 0.9, 366 | wspace = 0.4, 367 | hspace = 0.4) 368 | 369 | # Creation of dictionary 370 | fidp = {'pks': pks.astype(int), 371 | 'ons': ons.astype(int), 372 | 'off': off.astype(int), # Added by PC 373 | 'tip': tip.astype(int), 374 | 'dia': dia.astype(int), 375 | 'dic': dic.astype(int), 376 | 'm1d': m1d.astype(int), 377 | 'a2d': a2d.astype(int), 378 | 'b2d': b2d.astype(int), 379 | 'c2d': c2d.astype(int), 380 | 'd2d': d2d.astype(int), 381 | 'e2d': e2d.astype(int), 382 | 'bmag2d': bmag2d, 383 | 'cmag2d': cmag2d, 384 | 'dmag2d': dmag2d, 385 | 'emag2d': emag2d, 386 | 'p1p': p1p.astype(int), 387 | 'p2p': p2p.astype(int) 388 | } 389 | 390 | return fidp 391 | -------------------------------------------------------------------------------- /content/tutorial/notebooks/data-extraction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "5d037743", 6 | "metadata": { 7 | "id": "5d037743" 8 | }, 9 | "source": [ 10 | "# Data Extraction\n", 11 | "\n", 12 | "In this tutorial we'll extract data from the MIMIC-IV Waveform Database.\n", 13 | "\n", 14 | "Our **objectives** are to:\n", 15 | "- Extract signals from one segment of a record.\n", 16 | "- Limit the segment to only the required duration of relevant signals (_i.e._ 10 min of photoplethysmography and blood pressure signals)." 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "id": "fe20dd08", 22 | "metadata": { 23 | "id": "fe20dd08" 24 | }, 25 | "source": [ 26 | "
\n", 27 | "

Context:\n", 28 | " In the Data Exploration tutorial we learnt how to identify segments of waveform data which are suitable for a particular research study (i.e. which have the required duration of the required signals). We extracted metadata for such a segment, providing high-level details of what is contained in the segment (e.g. which signals, their sampling frequency, and their duration). Now we will go a step further to extract signals for analysis.

\n", 29 | "
" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "id": "fd8a0055", 35 | "metadata": { 36 | "id": "fd8a0055" 37 | }, 38 | "source": [ 39 | "---\n", 40 | "## Setup\n", 41 | "
\n", 42 | "

Resource: These steps are taken from the Data Exploration tutorial.

" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "id": "f4e37777", 48 | "metadata": { 49 | "id": "f4e37777" 50 | }, 51 | "source": [ 52 | "- Specify the required Python packages" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "id": "10fdf08b", 59 | "metadata": { 60 | "id": "10fdf08b" 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "import sys\n", 65 | "from pathlib import Path" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "id": "ccce3426", 71 | "metadata": { 72 | "id": "ccce3426" 73 | }, 74 | "source": [ 75 | "- Install and import the WFDB toolbox" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 4, 81 | "id": "06c8cc1f", 82 | "metadata": { 83 | "id": "06c8cc1f", 84 | "outputId": "747c5f42-e691-4981-fb53-c6f38007e456", 85 | "colab": { 86 | "base_uri": "https://localhost:8080/" 87 | } 88 | }, 89 | "outputs": [ 90 | { 91 | "output_type": "stream", 92 | "name": "stdout", 93 | "text": [ 94 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 95 | "Requirement already satisfied: wfdb==4.0.0 in /usr/local/lib/python3.7/dist-packages (4.0.0)\n", 96 | "Requirement already satisfied: SoundFile<0.12.0,>=0.10.0 in /usr/local/lib/python3.7/dist-packages (from wfdb==4.0.0) (0.10.3.post1)\n", 97 | "Requirement already satisfied: requests<3.0.0,>=2.8.1 in /usr/local/lib/python3.7/dist-packages (from wfdb==4.0.0) (2.23.0)\n", 98 | "Requirement already satisfied: pandas<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from wfdb==4.0.0) (1.3.5)\n", 99 | "Requirement already satisfied: scipy<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from wfdb==4.0.0) (1.4.1)\n", 100 | "Requirement already satisfied: matplotlib<4.0.0,>=3.2.2 in /usr/local/lib/python3.7/dist-packages (from wfdb==4.0.0) (3.2.2)\n", 101 | "Requirement already satisfied: numpy<2.0.0,>=1.10.1 in /usr/local/lib/python3.7/dist-packages (from wfdb==4.0.0) (1.21.6)\n", 102 | "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib<4.0.0,>=3.2.2->wfdb==4.0.0) (2.8.2)\n", 103 | "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib<4.0.0,>=3.2.2->wfdb==4.0.0) (1.4.3)\n", 104 | "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib<4.0.0,>=3.2.2->wfdb==4.0.0) (3.0.9)\n", 105 | "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib<4.0.0,>=3.2.2->wfdb==4.0.0) (0.11.0)\n", 106 | "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib<4.0.0,>=3.2.2->wfdb==4.0.0) (4.1.1)\n", 107 | "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas<2.0.0,>=1.0.0->wfdb==4.0.0) (2022.1)\n", 108 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib<4.0.0,>=3.2.2->wfdb==4.0.0) (1.15.0)\n", 109 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.8.1->wfdb==4.0.0) (2022.6.15)\n", 110 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.8.1->wfdb==4.0.0) (1.24.3)\n", 111 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.8.1->wfdb==4.0.0) (3.0.4)\n", 112 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.8.1->wfdb==4.0.0) (2.10)\n", 113 | "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.7/dist-packages (from SoundFile<0.12.0,>=0.10.0->wfdb==4.0.0) (1.15.0)\n", 114 | "Requirement already satisfied: pycparser in /usr/local/lib/python3.7/dist-packages (from cffi>=1.0->SoundFile<0.12.0,>=0.10.0->wfdb==4.0.0) (2.21)\n" 115 | ] 116 | } 117 | ], 118 | "source": [ 119 | "!pip install wfdb==4.0.0\n", 120 | "import wfdb" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "id": "524ed046", 126 | "metadata": { 127 | "id": "524ed046" 128 | }, 129 | "source": [ 130 | "- Specify the settings for the MIMIC-IV database" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 5, 136 | "id": "2915e121", 137 | "metadata": { 138 | "id": "2915e121" 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "# The name of the MIMIC-IV Waveform Database on PhysioNet\n", 143 | "database_name = 'mimic4wdb/0.1.0'" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "id": "3ea79319", 149 | "metadata": { 150 | "id": "3ea79319" 151 | }, 152 | "source": [ 153 | "- Provide a list of segments which meet the requirements for the study (NB: these are copied from the end of the [Data Exploration Tutorial](https://wfdb.io/mimic_wfdb_tutorials/tutorial/notebooks/data-exploration.html))." 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 6, 159 | "id": "0ee58931", 160 | "metadata": { 161 | "id": "0ee58931" 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "segment_names = ['83404654_0005']\n", 166 | "segment_dirs = ['mimic4wdb/0.1.0/waves/p100/p10020306/83404654']" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "id": "0e90110a", 172 | "metadata": { 173 | "id": "0e90110a" 174 | }, 175 | "source": [ 176 | "- Specify a segment from which to extract data" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 9, 182 | "id": "05fb68d0", 183 | "metadata": { 184 | "id": "05fb68d0", 185 | "outputId": "776068c7-a586-4f0a-a6b3-3154bde5459a", 186 | "colab": { 187 | "base_uri": "https://localhost:8080/" 188 | } 189 | }, 190 | "outputs": [ 191 | { 192 | "output_type": "stream", 193 | "name": "stdout", 194 | "text": [ 195 | "Specified segment '83404654_0005' in directory: 'mimic4wdb/0.1.0/waves/p100/p10020306/83404654'\n" 196 | ] 197 | } 198 | ], 199 | "source": [ 200 | "rel_segment_no = 0\n", 201 | "rel_segment_name = segment_names[rel_segment_no]\n", 202 | "rel_segment_dir = segment_dirs[rel_segment_no]\n", 203 | "print(f\"Specified segment '{rel_segment_name}' in directory: '{rel_segment_dir}'\")" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "id": "d00513bd", 209 | "metadata": { 210 | "id": "d00513bd" 211 | }, 212 | "source": [ 213 | "
\n", 214 | "

Extension: Have a look at the files which make up this record here (NB: you will need to scroll to the bottom of the page).

\n", 215 | "
" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "id": "3b2e6adb", 221 | "metadata": { 222 | "id": "3b2e6adb" 223 | }, 224 | "source": [ 225 | "---\n", 226 | "## Extract data for this segment" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "id": "e8810358", 232 | "metadata": { 233 | "id": "e8810358" 234 | }, 235 | "source": [ 236 | "- Use the [`rdrecord`](https://wfdb.readthedocs.io/en/latest/io.html#wfdb.io.rdrecord) function from the WFDB toolbox to read the data for this segment." 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 8, 242 | "id": "8626ebac", 243 | "metadata": { 244 | "id": "8626ebac", 245 | "outputId": "8c0d3d8e-fb01-4a3a-e75f-6502941fad70", 246 | "colab": { 247 | "base_uri": "https://localhost:8080/" 248 | } 249 | }, 250 | "outputs": [ 251 | { 252 | "output_type": "stream", 253 | "name": "stdout", 254 | "text": [ 255 | "Data loaded from segment: 83404654_0005\n" 256 | ] 257 | } 258 | ], 259 | "source": [ 260 | "segment_data = wfdb.rdrecord(record_name=rel_segment_name, pn_dir=rel_segment_dir) \n", 261 | "print(f\"Data loaded from segment: {rel_segment_name}\")" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "id": "5032d6c4", 267 | "metadata": { 268 | "id": "5032d6c4" 269 | }, 270 | "source": [ 271 | "- Look at class type of the object in which the data are stored:" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 10, 277 | "id": "967fa4ef", 278 | "metadata": { 279 | "id": "967fa4ef", 280 | "outputId": "9e9b7857-dfd6-470c-9722-9a3a196687c3", 281 | "colab": { 282 | "base_uri": "https://localhost:8080/" 283 | } 284 | }, 285 | "outputs": [ 286 | { 287 | "output_type": "stream", 288 | "name": "stdout", 289 | "text": [ 290 | "Data stored in class of type: \n" 291 | ] 292 | } 293 | ], 294 | "source": [ 295 | "print(f\"Data stored in class of type: {type(segment_data)}\")" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "id": "cf2d5ed7", 301 | "metadata": { 302 | "id": "cf2d5ed7" 303 | }, 304 | "source": [ 305 | "
\n", 306 | "

Resource: You can find out more about the class representing single segment WFDB records here.

\n", 307 | "
" 308 | ] 309 | }, 310 | { 311 | "cell_type": "markdown", 312 | "id": "85a0d656", 313 | "metadata": { 314 | "id": "85a0d656" 315 | }, 316 | "source": [ 317 | "- Find out about the signals which have been extracted" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 13, 323 | "id": "6d5416b6", 324 | "metadata": { 325 | "id": "6d5416b6", 326 | "outputId": "e72883c0-0675-4e32-f814-a2bc84e03259", 327 | "colab": { 328 | "base_uri": "https://localhost:8080/" 329 | } 330 | }, 331 | "outputs": [ 332 | { 333 | "output_type": "stream", 334 | "name": "stdout", 335 | "text": [ 336 | "This segment contains waveform data for the following 6 signals: ['II', 'V', 'aVR', 'ABP', 'Pleth', 'Resp']\n", 337 | "The signals are sampled at a base rate of 62.4725 Hz (and some are sampled at multiples of this)\n", 338 | "They last for 52.4 minutes\n" 339 | ] 340 | } 341 | ], 342 | "source": [ 343 | "print(f\"This segment contains waveform data for the following {segment_data.n_sig} signals: {segment_data.sig_name}\")\n", 344 | "print(f\"The signals are sampled at a base rate of {segment_data.fs} Hz (and some are sampled at multiples of this)\")\n", 345 | "print(f\"They last for {segment_data.sig_len/(60*segment_data.fs):.1f} minutes\")" 346 | ] 347 | }, 348 | { 349 | "cell_type": "markdown", 350 | "id": "0d40fab4", 351 | "metadata": { 352 | "id": "0d40fab4" 353 | }, 354 | "source": [ 355 | "
\n", 356 | "

Question: Can you find out which signals are sampled at multiples of the base sampling frequency by looking at the following contents of the 'segment_data' variable?

\n", 357 | "
" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 16, 363 | "id": "b0903fcf", 364 | "metadata": { 365 | "id": "b0903fcf", 366 | "outputId": "cee5ffd3-aaf5-4e46-9b3e-af94a844a9da", 367 | "colab": { 368 | "base_uri": "https://localhost:8080/" 369 | } 370 | }, 371 | "outputs": [ 372 | { 373 | "output_type": "stream", 374 | "name": "stdout", 375 | "text": [ 376 | "{'adc_gain': [200.0, 200.0, 200.0, 16.0, 4096.0, 4093.0],\n", 377 | " 'adc_res': [14, 14, 14, 13, 12, 12],\n", 378 | " 'adc_zero': [8192, 8192, 8192, 4096, 2048, 2048],\n", 379 | " 'base_counter': 10219520.0,\n", 380 | " 'base_date': None,\n", 381 | " 'base_time': None,\n", 382 | " 'baseline': [8192, 8192, 8192, 800, 0, 2],\n", 383 | " 'block_size': [0, 0, 0, 0, 0, 0],\n", 384 | " 'byte_offset': [None, None, None, None, None, None],\n", 385 | " 'checksum': [10167, 1300, 56956, 35887, 29987, 21750],\n", 386 | " 'comments': ['signal 0 (II): channel=0 bandpass=[0.5,35]',\n", 387 | " 'signal 1 (V): channel=1 bandpass=[0.5,35]',\n", 388 | " 'signal 2 (aVR): channel=2 bandpass=[0.5,35]'],\n", 389 | " 'counter_freq': 999.56,\n", 390 | " 'd_signal': None,\n", 391 | " 'e_d_signal': None,\n", 392 | " 'e_p_signal': None,\n", 393 | " 'file_name': ['83404654_0005e.dat',\n", 394 | " '83404654_0005e.dat',\n", 395 | " '83404654_0005e.dat',\n", 396 | " '83404654_0005p.dat',\n", 397 | " '83404654_0005p.dat',\n", 398 | " '83404654_0005r.dat'],\n", 399 | " 'fmt': ['516', '516', '516', '516', '516', '516'],\n", 400 | " 'fs': 62.4725,\n", 401 | " 'init_value': [0, 0, 0, 0, 0, 0],\n", 402 | " 'n_sig': 6,\n", 403 | " 'p_signal': array([[ 0.00000000e+00, -6.50000000e-02, -5.00000000e-03,\n", 404 | " nan, 5.02929688e-01, 1.56120205e-01],\n", 405 | " [ 5.00000000e-03, -4.50000000e-02, -5.00000000e-03,\n", 406 | " nan, 5.02929688e-01, 1.56853164e-01],\n", 407 | " [ 1.50000000e-02, -2.50000000e-02, 5.00000000e-03,\n", 408 | " nan, 5.02929688e-01, 1.57097484e-01],\n", 409 | " ...,\n", 410 | " [-1.50000000e-02, 7.00000000e-02, -4.00000000e-02,\n", 411 | " 7.25000000e+01, 5.74951172e-01, 3.57683850e-01],\n", 412 | " [-1.50000000e-02, 5.50000000e-02, -4.50000000e-02,\n", 413 | " 7.25000000e+01, 5.70800781e-01, 3.61104324e-01],\n", 414 | " [ 0.00000000e+00, 9.00000000e-02, -5.50000000e-02,\n", 415 | " 7.25000000e+01, 5.62255859e-01, 3.63791840e-01]]),\n", 416 | " 'record_name': '83404654_0005',\n", 417 | " 'samps_per_frame': [4, 4, 4, 2, 2, 1],\n", 418 | " 'sig_len': 196480,\n", 419 | " 'sig_name': ['II', 'V', 'aVR', 'ABP', 'Pleth', 'Resp'],\n", 420 | " 'skew': [None, None, None, None, None, None],\n", 421 | " 'units': ['mV', 'mV', 'mV', 'mmHg', 'NU', 'Ohm']}\n" 422 | ] 423 | } 424 | ], 425 | "source": [ 426 | "from pprint import pprint\n", 427 | "pprint(vars(segment_data))" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "source": [ 433 | "" 434 | ], 435 | "metadata": { 436 | "id": "gKtupgmahzpt" 437 | }, 438 | "id": "gKtupgmahzpt", 439 | "execution_count": null, 440 | "outputs": [] 441 | } 442 | ], 443 | "metadata": { 444 | "kernelspec": { 445 | "display_name": "Python 3", 446 | "language": "python", 447 | "name": "python3" 448 | }, 449 | "language_info": { 450 | "codemirror_mode": { 451 | "name": "ipython", 452 | "version": 3 453 | }, 454 | "file_extension": ".py", 455 | "mimetype": "text/x-python", 456 | "name": "python", 457 | "nbconvert_exporter": "python", 458 | "pygments_lexer": "ipython3", 459 | "version": "3.8.8" 460 | }, 461 | "toc": { 462 | "base_numbering": 1, 463 | "nav_menu": {}, 464 | "number_sections": true, 465 | "sideBar": true, 466 | "skip_h1_title": true, 467 | "title_cell": "Table of Contents", 468 | "title_sidebar": "Contents", 469 | "toc_cell": false, 470 | "toc_position": {}, 471 | "toc_section_display": true, 472 | "toc_window_display": false 473 | }, 474 | "colab": { 475 | "name": "data-extraction.ipynb", 476 | "provenance": [] 477 | } 478 | }, 479 | "nbformat": 4, 480 | "nbformat_minor": 5 481 | } -------------------------------------------------------------------------------- /content/tutorial/notebooks/data-exploration.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "5d037743", 6 | "metadata": { 7 | "id": "5d037743" 8 | }, 9 | "source": [ 10 | "# Data Exploration" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "fbae8e9b", 16 | "metadata": { 17 | "id": "fbae8e9b" 18 | }, 19 | "source": [ 20 | "Let's begin by exploring data in the MIMIC Waveform Database.\n", 21 | "\n", 22 | "Our **objectives** are to:\n", 23 | "- Review the structure of the MIMIC Waveform Database (considering subjects, studies, records, and segments).\n", 24 | "- Load waveforms using the WFDB toolbox.\n", 25 | "- Find out which signals are present in selected records and segments, and how long the signals last.\n", 26 | "- Search for records that contain signals of interest." 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "id": "0b240726", 32 | "metadata": { 33 | "id": "0b240726" 34 | }, 35 | "source": [ 36 | "
\n", 37 | "

Resource: You can find out more about the MIMIC Waveform Database here.

\n", 38 | "
" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "id": "28b8e213", 44 | "metadata": { 45 | "id": "28b8e213" 46 | }, 47 | "source": [ 48 | "---\n", 49 | "## Setup" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "id": "5dac032e", 55 | "metadata": { 56 | "id": "5dac032e" 57 | }, 58 | "source": [ 59 | "### Specify the required Python packages\n", 60 | "We'll import the following:\n", 61 | "- _sys_: an essential python package\n", 62 | "- _pathlib_ (well a particular function from _pathlib_, called _Path_)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "id": "ce3cdfde", 69 | "metadata": { 70 | "id": "ce3cdfde" 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "import sys\n", 75 | "from pathlib import Path" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "id": "9976c5e4", 81 | "metadata": { 82 | "id": "9976c5e4" 83 | }, 84 | "source": [ 85 | "### Specify a particular version of the WFDB Toolbox" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "id": "6533154b", 91 | "metadata": { 92 | "id": "6533154b" 93 | }, 94 | "source": [ 95 | "- _wfdb_: For this workshop we will be using version 4 of the WaveForm DataBase (WFDB) Toolbox package. The package contains tools for processing waveform data such as those found in MIMIC:" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "id": "5fdfa989", 102 | "metadata": { 103 | "id": "5fdfa989" 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "!pip install wfdb==4.0.0\n", 108 | "import wfdb" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "id": "e11ce5b6", 114 | "metadata": { 115 | "id": "e11ce5b6" 116 | }, 117 | "source": [ 118 | "
\n", 119 | "

Resource: You can find out more about the WFDB package here.

\n", 120 | "
" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "id": "d492e49f", 126 | "metadata": { 127 | "id": "d492e49f" 128 | }, 129 | "source": [ 130 | "Now that we have imported these packages (_i.e._ toolboxes) we have a set of tools (functions) ready to use." 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "id": "e7d38297", 136 | "metadata": { 137 | "id": "e7d38297" 138 | }, 139 | "source": [ 140 | "### Specify the name of the MIMIC Waveform Database" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "id": "68491718", 146 | "metadata": { 147 | "id": "68491718" 148 | }, 149 | "source": [ 150 | "- Specify the name of the MIMIC IV Waveform Database on Physionet, which comes from the URL: https://physionet.org/content/mimic4wdb/0.1.0/" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "id": "982b8154", 157 | "metadata": { 158 | "id": "982b8154" 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "database_name = 'mimic4wdb/0.1.0'" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "id": "e49196a6", 168 | "metadata": { 169 | "id": "e49196a6" 170 | }, 171 | "source": [ 172 | "---\n", 173 | "## Identify the records in the database" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "id": "b476f9b7", 179 | "metadata": { 180 | "id": "b476f9b7" 181 | }, 182 | "source": [ 183 | "### Get a list of records\n", 184 | "\n", 185 | "- Use the [`get_record_list`](https://wfdb.readthedocs.io/en/latest/io.html#wfdb.io.get_record_list) function from the WFDB toolbox to get a list of records in the database." 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "id": "d91aa6a7", 192 | "metadata": { 193 | "id": "d91aa6a7", 194 | "outputId": "db8e3169-76ac-4bdd-bbaa-91cf626c1a6b", 195 | "colab": { 196 | "base_uri": "https://localhost:8080/" 197 | } 198 | }, 199 | "outputs": [ 200 | { 201 | "output_type": "stream", 202 | "name": "stdout", 203 | "text": [ 204 | "The 'mimic4wdb/0.1.0' database contains data from 198 subjects\n" 205 | ] 206 | } 207 | ], 208 | "source": [ 209 | "# each subject may be associated with multiple records\n", 210 | "subjects = wfdb.get_record_list(database_name)\n", 211 | "print(f\"The '{database_name}' database contains data from {len(subjects)} subjects\")\n", 212 | "\n", 213 | "# set max number of records to load\n", 214 | "max_records_to_load = 200" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "source": [ 220 | "# iterate the subjects to get a list of records\n", 221 | "records = []\n", 222 | "for subject in subjects:\n", 223 | " studies = wfdb.get_record_list(f'{database_name}/{subject}')\n", 224 | " for study in studies:\n", 225 | " records.append(Path(f'{subject}{study}'))\n", 226 | " # stop if we've loaded enough records\n", 227 | " if len(records) >= max_records_to_load:\n", 228 | " print(\"Reached maximum required number of records.\")\n", 229 | " break\n", 230 | "\n", 231 | "print(f\"Loaded {len(records)} records from the '{database_name}' database.\")" 232 | ], 233 | "metadata": { 234 | "id": "0RzQmqjiQ9LD", 235 | "outputId": "31eb6067-de92-4424-b32b-f292623215a5", 236 | "colab": { 237 | "base_uri": "https://localhost:8080/" 238 | } 239 | }, 240 | "id": "0RzQmqjiQ9LD", 241 | "execution_count": null, 242 | "outputs": [ 243 | { 244 | "output_type": "stream", 245 | "name": "stdout", 246 | "text": [ 247 | "Reached maximum required number of records.\n", 248 | "Loaded 200 records from the 'mimic4wdb/0.1.0' database.\n" 249 | ] 250 | } 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "id": "fc82d67e", 256 | "metadata": { 257 | "id": "fc82d67e" 258 | }, 259 | "source": [ 260 | "### Look at the records" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "id": "29552f5a", 266 | "metadata": { 267 | "id": "29552f5a" 268 | }, 269 | "source": [ 270 | "- Display the first few records" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "id": "bb5745a7", 277 | "metadata": { 278 | "id": "bb5745a7", 279 | "outputId": "8fe32e59-c542-4a40-bd06-0c04fdcfbbfe", 280 | "colab": { 281 | "base_uri": "https://localhost:8080/" 282 | } 283 | }, 284 | "outputs": [ 285 | { 286 | "output_type": "stream", 287 | "name": "stdout", 288 | "text": [ 289 | "First five records: \n", 290 | " - waves/p100/p10014354/81739927/81739927\n", 291 | " - waves/p100/p10019003/87033314/87033314\n", 292 | " - waves/p100/p10020306/83404654/83404654\n", 293 | " - waves/p100/p10039708/83411188/83411188\n", 294 | " - waves/p100/p10039708/85583557/85583557\n", 295 | "\n", 296 | "Note the formatting of these records:\n", 297 | " - intermediate directory ('p100' in this case)\n", 298 | " - subject identifier (e.g. 'p10014354')\n", 299 | " - record identifier (e.g. '81739927'\n", 300 | " \n" 301 | ] 302 | } 303 | ], 304 | "source": [ 305 | "# format and print first five records\n", 306 | "first_five_records = [str(x) for x in records[0:5]]\n", 307 | "first_five_records = \"\\n - \".join(first_five_records)\n", 308 | "print(f\"First five records: \\n - {first_five_records}\")\n", 309 | "\n", 310 | "print(\"\"\"\n", 311 | "Note the formatting of these records:\n", 312 | " - intermediate directory ('p100' in this case)\n", 313 | " - subject identifier (e.g. 'p10014354')\n", 314 | " - record identifier (e.g. '81739927'\n", 315 | " \"\"\")" 316 | ] 317 | }, 318 | { 319 | "cell_type": "markdown", 320 | "id": "b56c29d5", 321 | "metadata": { 322 | "id": "b56c29d5" 323 | }, 324 | "source": [ 325 | "
\n", 326 | "

Q: Can you print the names of the last five records?
Hint: in Python, the last five elements can be specified using '[-5:]'

\n", 327 | "
" 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "id": "cb21a93b", 333 | "metadata": { 334 | "id": "cb21a93b" 335 | }, 336 | "source": [ 337 | "---\n", 338 | "## Extract metadata for a record" 339 | ] 340 | }, 341 | { 342 | "cell_type": "markdown", 343 | "id": "c39dc9f3", 344 | "metadata": { 345 | "id": "c39dc9f3" 346 | }, 347 | "source": [ 348 | "Each record contains metadata stored in a header file, named \"`.hea`\"" 349 | ] 350 | }, 351 | { 352 | "cell_type": "markdown", 353 | "id": "3b2e6adb", 354 | "metadata": { 355 | "id": "3b2e6adb" 356 | }, 357 | "source": [ 358 | "### Specify the online directory containing a record's data" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "id": "86eed39f", 365 | "metadata": { 366 | "id": "86eed39f", 367 | "outputId": "5cfa40d0-b4d4-4605-b677-164d9b603f90", 368 | "colab": { 369 | "base_uri": "https://localhost:8080/" 370 | } 371 | }, 372 | "outputs": [ 373 | { 374 | "output_type": "stream", 375 | "name": "stdout", 376 | "text": [ 377 | "PhysioNet directory specified for record: mimic4wdb/0.1.0/waves/p100/p10039708/83411188\n" 378 | ] 379 | } 380 | ], 381 | "source": [ 382 | "# Specify the 4th record (note, in Python indexing begins at 0)\n", 383 | "idx = 3\n", 384 | "record = records[idx]\n", 385 | "record_dir = f'{database_name}/{record.parent}'\n", 386 | "print(\"PhysioNet directory specified for record: {}\".format(record_dir))" 387 | ] 388 | }, 389 | { 390 | "cell_type": "markdown", 391 | "id": "b5220ad3", 392 | "metadata": { 393 | "id": "b5220ad3" 394 | }, 395 | "source": [ 396 | "### Specify the subject identifier" 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "id": "d7a5bbef", 402 | "metadata": { 403 | "id": "d7a5bbef" 404 | }, 405 | "source": [ 406 | "Extract the record name (e.g. '83411188') from the record (e.g. 'p100/p10039708/83411188/83411188'):" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": null, 412 | "id": "b4bc247b", 413 | "metadata": { 414 | "id": "b4bc247b", 415 | "outputId": "a74ca902-ca05-496a-fd5d-2dbb0d95f998", 416 | "colab": { 417 | "base_uri": "https://localhost:8080/" 418 | } 419 | }, 420 | "outputs": [ 421 | { 422 | "output_type": "stream", 423 | "name": "stdout", 424 | "text": [ 425 | "Record name: 83411188\n" 426 | ] 427 | } 428 | ], 429 | "source": [ 430 | "record_name = record.name\n", 431 | "print(\"Record name: {}\".format(record_name))" 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "id": "742071da", 437 | "metadata": { 438 | "id": "742071da" 439 | }, 440 | "source": [ 441 | "### Load the metadata for this record\n", 442 | "- Use the [`rdheader`](https://wfdb.readthedocs.io/en/latest/io.html#wfdb.io.rdheader) function from the WFDB toolbox to load metadata from the record header file" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": null, 448 | "id": "c5a0afc5", 449 | "metadata": { 450 | "id": "c5a0afc5", 451 | "outputId": "13b3dfa2-d489-4a77-c07d-a5116d67b4ec", 452 | "colab": { 453 | "base_uri": "https://localhost:8080/" 454 | } 455 | }, 456 | "outputs": [ 457 | { 458 | "output_type": "stream", 459 | "name": "stdout", 460 | "text": [ 461 | "Done: metadata loaded for record '83411188' from the header file at:\n", 462 | "https://physionet.org/content/mimic4wdb/0.1.0/waves/p100/p10039708/83411188/83411188.hea\n" 463 | ] 464 | } 465 | ], 466 | "source": [ 467 | "record_data = wfdb.rdheader(record_name, pn_dir=record_dir, rd_segments=True)\n", 468 | "remote_url = \"https://physionet.org/content/\" + record_dir + \"/\" + record_name + \".hea\"\n", 469 | "print(f\"Done: metadata loaded for record '{record_name}' from the header file at:\\n{remote_url}\")" 470 | ] 471 | }, 472 | { 473 | "cell_type": "markdown", 474 | "id": "f7a4d25d", 475 | "metadata": { 476 | "id": "f7a4d25d" 477 | }, 478 | "source": [ 479 | "---\n", 480 | "## Inspect details of physiological signals recorded in this record\n", 481 | "- Printing a few details of the signals from the extracted metadata" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": null, 487 | "id": "58630149", 488 | "metadata": { 489 | "id": "58630149", 490 | "outputId": "e19d66b1-690c-4cc5-c754-c4b5d1b16d38", 491 | "colab": { 492 | "base_uri": "https://localhost:8080/" 493 | } 494 | }, 495 | "outputs": [ 496 | { 497 | "output_type": "stream", 498 | "name": "stdout", 499 | "text": [ 500 | "- Number of signals: 6\n", 501 | "- Duration: 14.2 hours\n", 502 | "- Base sampling frequency: 62.4725 Hz\n" 503 | ] 504 | } 505 | ], 506 | "source": [ 507 | "print(f\"- Number of signals: {record_data.n_sig}\".format())\n", 508 | "print(f\"- Duration: {record_data.sig_len/(record_data.fs*60*60):.1f} hours\") \n", 509 | "print(f\"- Base sampling frequency: {record_data.fs} Hz\")" 510 | ] 511 | }, 512 | { 513 | "cell_type": "markdown", 514 | "id": "7b3da17f", 515 | "metadata": { 516 | "id": "7b3da17f" 517 | }, 518 | "source": [ 519 | "---\n", 520 | "## Inspect the segments making up a record\n", 521 | "Each record is typically made up of several segments" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": null, 527 | "id": "b127c857", 528 | "metadata": { 529 | "id": "b127c857", 530 | "outputId": "4fe5a2b3-b95b-4bbe-db18-fabb199f0584", 531 | "colab": { 532 | "base_uri": "https://localhost:8080/" 533 | } 534 | }, 535 | "outputs": [ 536 | { 537 | "output_type": "stream", 538 | "name": "stdout", 539 | "text": [ 540 | "The 6 segments from record 83411188 are:\n", 541 | "['83411188_0000', '83411188_0001', '83411188_0002', '83411188_0003', '83411188_0004', '83411188_0005']\n" 542 | ] 543 | } 544 | ], 545 | "source": [ 546 | "segments = record_data.seg_name\n", 547 | "print(f\"The {len(segments)} segments from record {record_name} are:\\n{segments}\")" 548 | ] 549 | }, 550 | { 551 | "cell_type": "markdown", 552 | "id": "b379eaaf", 553 | "metadata": { 554 | "id": "b379eaaf" 555 | }, 556 | "source": [ 557 | "The format of filename for each segment is: `record directory, \"_\", segment number`" 558 | ] 559 | }, 560 | { 561 | "cell_type": "markdown", 562 | "id": "f19d231b", 563 | "metadata": { 564 | "id": "f19d231b" 565 | }, 566 | "source": [ 567 | "---\n", 568 | "## Inspect an individual segment\n", 569 | "### Read the metadata for this segment\n", 570 | "- Read the metadata from the header file" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": null, 576 | "id": "7f70d34f", 577 | "metadata": { 578 | "id": "7f70d34f", 579 | "outputId": "d1bd96de-09d9-4cf2-fa35-1bbcb5ddced4", 580 | "colab": { 581 | "base_uri": "https://localhost:8080/" 582 | } 583 | }, 584 | "outputs": [ 585 | { 586 | "output_type": "stream", 587 | "name": "stdout", 588 | "text": [ 589 | "Header metadata loaded for: \n", 590 | "- the segment '83411188_0001'\n", 591 | "- in record '83411188'\n", 592 | "- for subject 'p10039708'\n", 593 | "\n" 594 | ] 595 | } 596 | ], 597 | "source": [ 598 | "segment_metadata = wfdb.rdheader(record_name=segments[2], pn_dir=record_dir)\n", 599 | "\n", 600 | "print(f\"\"\"Header metadata loaded for: \n", 601 | "- the segment '{segments[2]}'\n", 602 | "- in record '{record_name}'\n", 603 | "- for subject '{str(Path(record_dir).parent.parts[-1])}'\n", 604 | "\"\"\")" 605 | ] 606 | }, 607 | { 608 | "cell_type": "markdown", 609 | "id": "d28771ac", 610 | "metadata": { 611 | "id": "d28771ac" 612 | }, 613 | "source": [ 614 | "### Find out what signals are present" 615 | ] 616 | }, 617 | { 618 | "cell_type": "code", 619 | "execution_count": null, 620 | "id": "324727df", 621 | "metadata": { 622 | "id": "324727df", 623 | "outputId": "223bdb49-5023-453d-f2b7-a016a603fec9", 624 | "colab": { 625 | "base_uri": "https://localhost:8080/" 626 | } 627 | }, 628 | "outputs": [ 629 | { 630 | "output_type": "stream", 631 | "name": "stdout", 632 | "text": [ 633 | "This segment contains the following signals: ['II', 'V', 'aVR', 'ABP', 'Pleth', 'Resp']\n", 634 | "The signals are measured in units of: ['mV', 'mV', 'mV', 'mmHg', 'NU', 'Ohm']\n" 635 | ] 636 | } 637 | ], 638 | "source": [ 639 | "print(f\"This segment contains the following signals: {segment_metadata.sig_name}\")\n", 640 | "print(f\"The signals are measured in units of: {segment_metadata.units}\")" 641 | ] 642 | }, 643 | { 644 | "cell_type": "markdown", 645 | "id": "f09b3f37", 646 | "metadata": { 647 | "id": "f09b3f37" 648 | }, 649 | "source": [ 650 | "See [here](https://archive.physionet.org/mimic2/mimic2_waveform_overview.shtml#signals-125-samplessecond) for definitions of signal abbreviations." 651 | ] 652 | }, 653 | { 654 | "cell_type": "markdown", 655 | "id": "3f56dd61", 656 | "metadata": { 657 | "id": "3f56dd61" 658 | }, 659 | "source": [ 660 | "
\n", 661 | "

Q: Which of these signals is no longer present in segment '83411188_0005'?

\n", 662 | "
" 663 | ] 664 | }, 665 | { 666 | "cell_type": "markdown", 667 | "id": "9f921f27", 668 | "metadata": { 669 | "id": "9f921f27" 670 | }, 671 | "source": [ 672 | "### Find out how long each signal lasts" 673 | ] 674 | }, 675 | { 676 | "cell_type": "markdown", 677 | "id": "d217b764", 678 | "metadata": { 679 | "id": "d217b764" 680 | }, 681 | "source": [ 682 | "All signals in a segment are time-aligned, measured at the same sampling frequency, and last the same duration:" 683 | ] 684 | }, 685 | { 686 | "cell_type": "code", 687 | "execution_count": null, 688 | "id": "c44f00a7", 689 | "metadata": { 690 | "id": "c44f00a7", 691 | "outputId": "1cfa789e-b66b-4c8e-805b-4197c663ba18", 692 | "colab": { 693 | "base_uri": "https://localhost:8080/" 694 | } 695 | }, 696 | "outputs": [ 697 | { 698 | "output_type": "stream", 699 | "name": "stdout", 700 | "text": [ 701 | "The signals have a base sampling frequency of 62.5 Hz\n", 702 | "and they last for 0.9 minutes\n" 703 | ] 704 | } 705 | ], 706 | "source": [ 707 | "print(f\"The signals have a base sampling frequency of {segment_metadata.fs:.1f} Hz\")\n", 708 | "print(f\"and they last for {segment_metadata.sig_len/(segment_metadata.fs*60):.1f} minutes\")" 709 | ] 710 | }, 711 | { 712 | "cell_type": "markdown", 713 | "id": "d2a80895", 714 | "metadata": { 715 | "id": "d2a80895" 716 | }, 717 | "source": [ 718 | "## Identify records suitable for analysis" 719 | ] 720 | }, 721 | { 722 | "cell_type": "markdown", 723 | "id": "1a3218d3", 724 | "metadata": { 725 | "id": "1a3218d3" 726 | }, 727 | "source": [ 728 | "- The signals and their durations vary from one record (and segment) to the next. \n", 729 | "- Since most studies require specific types of signals (e.g. blood pressure and photoplethysmography signals), we need to be able to identify which records (or segments) contain the required signals and duration." 730 | ] 731 | }, 732 | { 733 | "cell_type": "markdown", 734 | "id": "b02c0b4e", 735 | "metadata": { 736 | "id": "b02c0b4e" 737 | }, 738 | "source": [ 739 | "### Setup" 740 | ] 741 | }, 742 | { 743 | "cell_type": "code", 744 | "execution_count": null, 745 | "id": "5bb47556", 746 | "metadata": { 747 | "id": "5bb47556" 748 | }, 749 | "outputs": [], 750 | "source": [ 751 | "import pandas as pd\n", 752 | "from pprint import pprint" 753 | ] 754 | }, 755 | { 756 | "cell_type": "code", 757 | "execution_count": null, 758 | "id": "95181681", 759 | "metadata": { 760 | "id": "95181681", 761 | "outputId": "544c69db-59d9-432c-ee6c-10e1b0f54318", 762 | "colab": { 763 | "base_uri": "https://localhost:8080/" 764 | } 765 | }, 766 | "outputs": [ 767 | { 768 | "output_type": "stream", 769 | "name": "stdout", 770 | "text": [ 771 | "Earlier, we loaded 200 records from the 'mimic4wdb/0.1.0' database.\n" 772 | ] 773 | } 774 | ], 775 | "source": [ 776 | "print(f\"Earlier, we loaded {len(records)} records from the '{database_name}' database.\")" 777 | ] 778 | }, 779 | { 780 | "cell_type": "markdown", 781 | "id": "7f2b5955", 782 | "metadata": { 783 | "id": "7f2b5955" 784 | }, 785 | "source": [ 786 | "### Specify requirements" 787 | ] 788 | }, 789 | { 790 | "cell_type": "markdown", 791 | "id": "83f8611c", 792 | "metadata": { 793 | "id": "83f8611c" 794 | }, 795 | "source": [ 796 | "- Required signals" 797 | ] 798 | }, 799 | { 800 | "cell_type": "code", 801 | "execution_count": null, 802 | "id": "3d1505ab", 803 | "metadata": { 804 | "id": "3d1505ab" 805 | }, 806 | "outputs": [], 807 | "source": [ 808 | "required_sigs = ['ABP', 'Pleth']" 809 | ] 810 | }, 811 | { 812 | "cell_type": "markdown", 813 | "id": "03920810", 814 | "metadata": { 815 | "id": "03920810" 816 | }, 817 | "source": [ 818 | "- Required duration" 819 | ] 820 | }, 821 | { 822 | "cell_type": "code", 823 | "execution_count": null, 824 | "id": "568a93c1", 825 | "metadata": { 826 | "id": "568a93c1" 827 | }, 828 | "outputs": [], 829 | "source": [ 830 | "# convert from minutes to seconds\n", 831 | "req_seg_duration = 10*60 " 832 | ] 833 | }, 834 | { 835 | "cell_type": "markdown", 836 | "id": "d49187cd", 837 | "metadata": { 838 | "id": "d49187cd" 839 | }, 840 | "source": [ 841 | "### Find out how many records meet the requirements" 842 | ] 843 | }, 844 | { 845 | "cell_type": "markdown", 846 | "id": "65f2cdce", 847 | "metadata": { 848 | "id": "65f2cdce" 849 | }, 850 | "source": [ 851 | "_NB: This step may take a while. The results are copied below to save running it yourself._" 852 | ] 853 | }, 854 | { 855 | "cell_type": "code", 856 | "execution_count": null, 857 | "id": "015b47d3", 858 | "metadata": { 859 | "id": "015b47d3" 860 | }, 861 | "outputs": [], 862 | "source": [ 863 | "matching_recs = {'dir':[], 'seg_name':[], 'length':[]}\n", 864 | "\n", 865 | "for record in records:\n", 866 | " print('Record: {}'.format(record), end=\"\", flush=True)\n", 867 | " record_dir = f'{database_name}/{record.parent}'\n", 868 | " record_name = record.name\n", 869 | " print(' (reading data)')\n", 870 | " record_data = wfdb.rdheader(record_name,\n", 871 | " pn_dir=record_dir,\n", 872 | " rd_segments=True)\n", 873 | "\n", 874 | " # Check whether the required signals are present in the record\n", 875 | " sigs_present = record_data.sig_name\n", 876 | " if not all(x in sigs_present for x in required_sigs):\n", 877 | " print(' (missing signals)')\n", 878 | " continue\n", 879 | "\n", 880 | " # Get the segments for the record\n", 881 | " segments = record_data.seg_name\n", 882 | "\n", 883 | " # Check to see if the segment is 10 min long\n", 884 | " # If not, move to the next one\n", 885 | " gen = (segment for segment in segments if segment != '~')\n", 886 | " for segment in gen:\n", 887 | " print(' - Segment: {}'.format(segment), end=\"\", flush=True)\n", 888 | " segment_metadata = wfdb.rdheader(record_name=segment,\n", 889 | " pn_dir=record_dir)\n", 890 | " seg_length = segment_metadata.sig_len/(segment_metadata.fs)\n", 891 | "\n", 892 | " if seg_length < req_seg_duration:\n", 893 | " print(f' (too short at {seg_length/60:.1f} mins)')\n", 894 | " continue\n", 895 | "\n", 896 | " # Next check that all required signals are present in the segment\n", 897 | " sigs_present = segment_metadata.sig_name\n", 898 | " \n", 899 | " if all(x in sigs_present for x in required_sigs):\n", 900 | " matching_recs['dir'].append(record_dir)\n", 901 | " matching_recs['seg_name'].append(segment)\n", 902 | " matching_recs['length'].append(seg_length)\n", 903 | " print(' (met requirements)')\n", 904 | " # Since we only need one segment per record break out of loop\n", 905 | " break\n", 906 | " else:\n", 907 | " print(' (long enough, but missing signal(s))')\n", 908 | "\n", 909 | "print(f\"A total of {len(matching_recs['dir'])} records met the requirements:\")\n", 910 | "\n", 911 | "#df_matching_recs = pd.DataFrame(data=matching_recs)\n", 912 | "#df_matching_recs.to_csv('matching_records.csv', index=False)\n", 913 | "#p=1" 914 | ] 915 | }, 916 | { 917 | "cell_type": "code", 918 | "execution_count": null, 919 | "id": "75ec15f4", 920 | "metadata": { 921 | "id": "75ec15f4", 922 | "outputId": "3ea832cd-4a4b-4265-bc2b-275d0f6c1802", 923 | "colab": { 924 | "base_uri": "https://localhost:8080/" 925 | } 926 | }, 927 | "outputs": [ 928 | { 929 | "output_type": "stream", 930 | "name": "stdout", 931 | "text": [ 932 | "A total of 52 out of 200 records met the requirements.\n", 933 | "\n", 934 | "The relevant segment names are:\n", 935 | " - 83404654_0005\n", 936 | " - 82924339_0007\n", 937 | " - 84248019_0005\n", 938 | " - 82439920_0004\n", 939 | " - 82800131_0002\n", 940 | " - 84304393_0001\n", 941 | " - 89464742_0001\n", 942 | " - 88958796_0004\n", 943 | " - 88995377_0001\n", 944 | " - 85230771_0004\n", 945 | " - 86643930_0004\n", 946 | " - 81250824_0005\n", 947 | " - 87706224_0003\n", 948 | " - 83058614_0005\n", 949 | " - 82803505_0017\n", 950 | " - 88574629_0001\n", 951 | " - 87867111_0012\n", 952 | " - 84560969_0001\n", 953 | " - 87562386_0001\n", 954 | " - 88685937_0001\n", 955 | " - 86120311_0001\n", 956 | " - 89866183_0014\n", 957 | " - 89068160_0002\n", 958 | " - 86380383_0001\n", 959 | " - 85078610_0008\n", 960 | " - 87702634_0007\n", 961 | " - 84686667_0002\n", 962 | " - 84802706_0002\n", 963 | " - 81811182_0004\n", 964 | " - 84421559_0005\n", 965 | " - 88221516_0007\n", 966 | " - 80057524_0005\n", 967 | " - 84209926_0018\n", 968 | " - 83959636_0010\n", 969 | " - 89989722_0016\n", 970 | " - 89225487_0007\n", 971 | " - 84391267_0001\n", 972 | " - 80889556_0002\n", 973 | " - 85250558_0011\n", 974 | " - 84567505_0005\n", 975 | " - 85814172_0007\n", 976 | " - 88884866_0005\n", 977 | " - 80497954_0012\n", 978 | " - 80666640_0014\n", 979 | " - 84939605_0004\n", 980 | " - 82141753_0018\n", 981 | " - 86874920_0014\n", 982 | " - 84505262_0010\n", 983 | " - 86288257_0001\n", 984 | " - 89699401_0001\n", 985 | " - 88537698_0013\n", 986 | " - 83958172_0001\n", 987 | "\n", 988 | "The corresponding directories are: \n", 989 | " - mimic4wdb/0.1.0/waves/p100/p10020306/83404654\n", 990 | " - mimic4wdb/0.1.0/waves/p101/p10126957/82924339\n", 991 | " - mimic4wdb/0.1.0/waves/p102/p10209410/84248019\n", 992 | " - mimic4wdb/0.1.0/waves/p109/p10952189/82439920\n", 993 | " - mimic4wdb/0.1.0/waves/p111/p11109975/82800131\n", 994 | " - mimic4wdb/0.1.0/waves/p113/p11392990/84304393\n", 995 | " - mimic4wdb/0.1.0/waves/p121/p12168037/89464742\n", 996 | " - mimic4wdb/0.1.0/waves/p121/p12173569/88958796\n", 997 | " - mimic4wdb/0.1.0/waves/p121/p12188288/88995377\n", 998 | " - mimic4wdb/0.1.0/waves/p128/p12872596/85230771\n", 999 | " - mimic4wdb/0.1.0/waves/p129/p12933208/86643930\n", 1000 | " - mimic4wdb/0.1.0/waves/p130/p13016481/81250824\n", 1001 | " - mimic4wdb/0.1.0/waves/p132/p13240081/87706224\n", 1002 | " - mimic4wdb/0.1.0/waves/p136/p13624686/83058614\n", 1003 | " - mimic4wdb/0.1.0/waves/p137/p13791821/82803505\n", 1004 | " - mimic4wdb/0.1.0/waves/p141/p14191565/88574629\n", 1005 | " - mimic4wdb/0.1.0/waves/p142/p14285792/87867111\n", 1006 | " - mimic4wdb/0.1.0/waves/p143/p14356077/84560969\n", 1007 | " - mimic4wdb/0.1.0/waves/p143/p14363499/87562386\n", 1008 | " - mimic4wdb/0.1.0/waves/p146/p14695840/88685937\n", 1009 | " - mimic4wdb/0.1.0/waves/p149/p14931547/86120311\n", 1010 | " - mimic4wdb/0.1.0/waves/p151/p15174162/89866183\n", 1011 | " - mimic4wdb/0.1.0/waves/p153/p15312343/89068160\n", 1012 | " - mimic4wdb/0.1.0/waves/p153/p15342703/86380383\n", 1013 | " - mimic4wdb/0.1.0/waves/p155/p15552902/85078610\n", 1014 | " - mimic4wdb/0.1.0/waves/p156/p15649186/87702634\n", 1015 | " - mimic4wdb/0.1.0/waves/p158/p15857793/84686667\n", 1016 | " - mimic4wdb/0.1.0/waves/p158/p15865327/84802706\n", 1017 | " - mimic4wdb/0.1.0/waves/p158/p15896656/81811182\n", 1018 | " - mimic4wdb/0.1.0/waves/p159/p15920699/84421559\n", 1019 | " - mimic4wdb/0.1.0/waves/p160/p16034243/88221516\n", 1020 | " - mimic4wdb/0.1.0/waves/p165/p16566444/80057524\n", 1021 | " - mimic4wdb/0.1.0/waves/p166/p16644640/84209926\n", 1022 | " - mimic4wdb/0.1.0/waves/p167/p16709726/83959636\n", 1023 | " - mimic4wdb/0.1.0/waves/p167/p16715341/89989722\n", 1024 | " - mimic4wdb/0.1.0/waves/p168/p16818396/89225487\n", 1025 | " - mimic4wdb/0.1.0/waves/p170/p17032851/84391267\n", 1026 | " - mimic4wdb/0.1.0/waves/p172/p17229504/80889556\n", 1027 | " - mimic4wdb/0.1.0/waves/p173/p17301721/85250558\n", 1028 | " - mimic4wdb/0.1.0/waves/p173/p17325001/84567505\n", 1029 | " - mimic4wdb/0.1.0/waves/p174/p17490822/85814172\n", 1030 | " - mimic4wdb/0.1.0/waves/p177/p17738824/88884866\n", 1031 | " - mimic4wdb/0.1.0/waves/p177/p17744715/80497954\n", 1032 | " - mimic4wdb/0.1.0/waves/p179/p17957832/80666640\n", 1033 | " - mimic4wdb/0.1.0/waves/p180/p18080257/84939605\n", 1034 | " - mimic4wdb/0.1.0/waves/p181/p18109577/82141753\n", 1035 | " - mimic4wdb/0.1.0/waves/p183/p18324626/86874920\n", 1036 | " - mimic4wdb/0.1.0/waves/p187/p18742074/84505262\n", 1037 | " - mimic4wdb/0.1.0/waves/p188/p18824975/86288257\n", 1038 | " - mimic4wdb/0.1.0/waves/p191/p19126489/89699401\n", 1039 | " - mimic4wdb/0.1.0/waves/p193/p19313794/88537698\n", 1040 | " - mimic4wdb/0.1.0/waves/p196/p19619764/83958172\n" 1041 | ] 1042 | } 1043 | ], 1044 | "source": [ 1045 | "print(f\"A total of {len(matching_recs['dir'])} out of {len(records)} records met the requirements.\")\n", 1046 | "\n", 1047 | "relevant_segments_names = \"\\n - \".join(matching_recs['seg_name'])\n", 1048 | "print(f\"\\nThe relevant segment names are:\\n - {relevant_segments_names}\")\n", 1049 | "\n", 1050 | "relevant_dirs = \"\\n - \".join(matching_recs['dir'])\n", 1051 | "print(f\"\\nThe corresponding directories are: \\n - {relevant_dirs}\")" 1052 | ] 1053 | }, 1054 | { 1055 | "cell_type": "markdown", 1056 | "id": "719f20f8", 1057 | "metadata": { 1058 | "id": "719f20f8" 1059 | }, 1060 | "source": [ 1061 | "
\n", 1062 | "

Question: Is this enough data for a study? Consider different types of studies, e.g. assessing the performance of a previously proposed algorithm to estimate BP from the PPG signal, vs. developing a deep learning approach to estimate BP from the PPG.

\n", 1063 | "
" 1064 | ] 1065 | }, 1066 | { 1067 | "cell_type": "code", 1068 | "execution_count": null, 1069 | "id": "6fccda20", 1070 | "metadata": { 1071 | "id": "6fccda20" 1072 | }, 1073 | "outputs": [], 1074 | "source": [ 1075 | "" 1076 | ] 1077 | } 1078 | ], 1079 | "metadata": { 1080 | "kernelspec": { 1081 | "display_name": "Python 3", 1082 | "language": "python", 1083 | "name": "python3" 1084 | }, 1085 | "language_info": { 1086 | "codemirror_mode": { 1087 | "name": "ipython", 1088 | "version": 3 1089 | }, 1090 | "file_extension": ".py", 1091 | "mimetype": "text/x-python", 1092 | "name": "python", 1093 | "nbconvert_exporter": "python", 1094 | "pygments_lexer": "ipython3", 1095 | "version": "3.8.8" 1096 | }, 1097 | "toc": { 1098 | "base_numbering": 1, 1099 | "nav_menu": {}, 1100 | "number_sections": true, 1101 | "sideBar": true, 1102 | "skip_h1_title": true, 1103 | "title_cell": "Table of Contents", 1104 | "title_sidebar": "Contents", 1105 | "toc_cell": false, 1106 | "toc_position": { 1107 | "height": "calc(100% - 180px)", 1108 | "left": "10px", 1109 | "top": "150px", 1110 | "width": "306px" 1111 | }, 1112 | "toc_section_display": true, 1113 | "toc_window_display": false 1114 | }, 1115 | "colab": { 1116 | "name": "data-exploration.ipynb", 1117 | "provenance": [] 1118 | } 1119 | }, 1120 | "nbformat": 4, 1121 | "nbformat_minor": 5 1122 | } 1123 | -------------------------------------------------------------------------------- /content/tutorial/notebooks/beat_detection_functions.py: -------------------------------------------------------------------------------- 1 | import scipy.signal as sp 2 | import numpy as np 3 | 4 | def pulse_detect(x,fs,w,alg): 5 | """ 6 | Description: Pulse detection and correction from pulsatile signals 7 | Inputs: x, array with pulsatile signal [user defined units] 8 | fs, sampling rate of signal [Hz] 9 | w, window length for analysis [s] 10 | alg, string with the name of the algorithm to apply ['heartpy','d2max','upslopes','delineator'] 11 | Outputs: ibis, location of cardiac cycles as detected by the selected algorithm [number of samples] 12 | 13 | Algorithms: 1: HeartPy (van Gent et al, 2019, DOI: 10.1016/j.trf.2019.09.015) 14 | 2: 2nd derivative maxima (Elgendi et al, 2013, DOI: 10.1371/journal.pone.0076585) 15 | 3: Systolic upslopes (Arguello Prada and Serna Maldonado, 2018, 16 | DOI: 10.1080/03091902.2019.1572237) 17 | 4: Delineator (Li et al, 2010, DOI: 10.1109/TBME.2005.855725) 18 | Fiducial points: 1: Systolic peak (pks) 19 | 2: Onset, as the minimum before the systolic peak (ons) 20 | 3: Onset, using the tangent intersection method (ti) 21 | 4: Diastolic peak (dpk) 22 | 5: Maximum slope (m1d) 23 | 6: a point from second derivative PPG (a2d) 24 | 7: b point from second derivative PPG (b2d) 25 | 8: c point from second derivative PPG (c2d) 26 | 9: d point from second derivative PPG (d2d) 27 | 10: e point from second derivative PPG (e2d) 28 | 11: p1 from the third derivative PPG (p1) 29 | 12: p2 from the third derivative PPG (p2) 30 | 31 | Libraries: NumPy (as np), SciPy (Signal, as sp), Matplotlib (PyPlot, as plt) 32 | 33 | Version: 1.0 - June 2022 34 | 35 | Developed by: Elisa Mejía-Mejía 36 | City, University of London 37 | 38 | """ 39 | 40 | # Check selected algorithm 41 | pos_alg = ['heartpy','d2max','upslopes','delineator'] 42 | if not(alg in pos_alg): 43 | print('Unknown algorithm determined. Using D2max as default') 44 | alg = 'd2max' 45 | 46 | # Pre-processing of signal 47 | x_d = sp.detrend(x) 48 | sos = sp.butter(10, [0.5, 10], btype = 'bp', analog = False, output = 'sos', fs = fs) 49 | x_f = sp.sosfiltfilt(sos, x_d) 50 | 51 | # Peak detection in windows of length w 52 | n_int = np.floor(len(x_f)/(w*fs)) 53 | for i in range(int(n_int)): 54 | start = i*fs*w 55 | stop = (i + 1)*fs*w - 1 56 | # print('Start: ' + str(start) + ', stop: ' + str(stop) + ', fs: ' + str(fs)) 57 | aux = x_f[range(start,stop)] 58 | if alg == 'heartpy': 59 | locs = heartpy(aux,fs,40,180,5) 60 | elif alg == 'd2max': 61 | locs = d2max(aux,fs) 62 | elif alg == 'upslopes': 63 | locs = upslopes(aux) 64 | elif alg == 'delineator': 65 | locs = delineator(aux,fs) 66 | locs = locs + start 67 | if i == 0: 68 | ibis = locs 69 | else: 70 | ibis = np.append(ibis,locs) 71 | if n_int*fs*w != len(x_f): 72 | start = stop + 1 73 | stop = len(x_f) 74 | aux = x_f[range(start,stop)] 75 | if len(aux) > 20: 76 | if alg == 'heartpy': 77 | locs = heartpy(aux,fs,40,180,5) 78 | elif alg == 'd2max': 79 | locs = d2max(aux,fs) 80 | elif alg == 'upslopes': 81 | locs = upslopes(aux) 82 | elif alg == 'delineator': 83 | locs = delineator(aux,fs) 84 | locs = locs + start 85 | ibis = np.append(ibis,locs) 86 | ind, = np.where(ibis <= len(x_f)) 87 | ibis = ibis[ind] 88 | 89 | ibis = peak_correction(x,ibis,fs,20,5,[0.5, 1.5]) 90 | 91 | #fig = plt.figure() 92 | #plt.plot(x) 93 | #plt.plot(x_d) 94 | #plt.plot(x_f) 95 | #plt.scatter(ibis,x_f[ibis],marker = 'o',color = 'red') 96 | #plt.scatter(ibis,x[ibis],marker = 'o',color = 'red') 97 | 98 | return ibis 99 | 100 | def peak_correction(x,locs,fs,t,stride,th_len): 101 | """ 102 | Correction of peaks detected from pulsatile signals 103 | 104 | Inputs: x, pulsatile signal [user defined units] 105 | locs, location of the detected interbeat intervals [number of samples] 106 | fs, sampling rate [Hz] 107 | t, duration of intervals for the correction [s] 108 | stride, stride between consecutive intervals for the correction [s] 109 | th_len, array with the percentage of lower and higher thresholds for comparing the duration of IBIs 110 | [proportions] 111 | Outputs: ibis, array with the corrected points related to the start of the inter-beat intervals [number of samples] 112 | 113 | Developed by: Elisa Mejía Mejía 114 | City, University of London 115 | Version: 1.0 - June, 2022 116 | 117 | """ 118 | 119 | #fig = plt.figure() 120 | #plt.plot(x) 121 | #plt.scatter(locs,x[locs],marker = 'o',color = 'red', label = 'Original') 122 | #plt.title('Peak correction') 123 | 124 | # Correction of long and short IBIs 125 | len_window = np.round(t*fs) 126 | #print('Window length: ' + str(len_window)) 127 | first_i = 0 128 | second_i = len_window - 1 129 | while second_i < len(x): 130 | ind1, = np.where(locs >= first_i) 131 | ind2, = np.where(locs <= second_i) 132 | ind = np.intersect1d(ind1, ind2) 133 | 134 | win = locs[ind] 135 | dif = np.diff(win) 136 | #print('Indices: ' + str(ind) + ', locs: ' + str(locs[ind]) + ', dif: ' + str(dif)) 137 | 138 | th_dif = np.zeros(2) 139 | th_dif[0] = th_len[0]*np.median(dif) 140 | th_dif[1] = th_len[1]*np.median(dif) 141 | 142 | th_amp = np.zeros(2) 143 | th_amp[0] = 0.75*np.median(x[win]) 144 | th_amp[1] = 1.25*np.median(x[win]) 145 | #print('Length thresholds: ' + str(th_dif) + ', amplitude thresholds: ' + str(th_amp)) 146 | 147 | j = 0 148 | while j < len(dif): 149 | if dif[j] <= th_dif[0]: 150 | if j == 0: 151 | opt = np.append(win[j], win[j + 1]) 152 | else: 153 | opt = np.append(win[j], win[j + 1]) - win[j - 1] 154 | print('Optional: ' + str(opt)) 155 | dif_abs = np.abs(opt - np.median(dif)) 156 | min_val = np.min(dif_abs) 157 | ind_min, = np.where(dif_abs == min_val) 158 | print('Minimum: ' + str(min_val) + ', index: ' + str(ind_min)) 159 | if ind_min == 0: 160 | print('Original window: ' + str(win), end = '') 161 | win = np.delete(win, win[j + 1]) 162 | print(', modified window: ' + str(win)) 163 | else: 164 | print('Original window: ' + str(win), end = '') 165 | win = np.delete(win, win[j]) 166 | print(', modified window: ' + str(win)) 167 | dif = np.diff(win) 168 | elif dif[j] >= th_dif[1]: 169 | aux_x = x[win[j]:win[j + 1]] 170 | locs_pks, _ = sp.find_peaks(aux_x) 171 | #fig = plt.figure() 172 | #plt.plot(aux_x) 173 | #plt.scatter(locs_pks,aux_x[locs_pks],marker = 'o',color = 'red') 174 | 175 | locs_pks = locs_pks + win[j] 176 | ind1, = np.where(x[locs_pks] >= th_amp[0]) 177 | ind2, = np.where(x[locs_pks] <= th_amp[1]) 178 | ind = np.intersect1d(ind1, ind2) 179 | locs_pks = locs_pks[ind] 180 | #print('Locations: ' + str(locs_pks)) 181 | 182 | if len(locs_pks) != 0: 183 | opt = locs_pks - win[j] 184 | 185 | dif_abs = np.abs(opt - np.median(dif)) 186 | min_val = np.min(dif_abs) 187 | ind_min, = np.where(dif_abs == min_val) 188 | 189 | win = np.append(win, locs_pks[ind_min]) 190 | win = np.sort(win) 191 | dif = np.diff(win) 192 | j = j + 1 193 | else: 194 | opt = np.round(win[j] + np.median(dif)) 195 | if opt < win[j + 1]: 196 | win = np.append(win, locs_pks[ind_min]) 197 | win = np.sort(win) 198 | dif = np.diff(win) 199 | j = j + 1 200 | else: 201 | j = j + 1 202 | else: 203 | j = j + 1 204 | 205 | locs = np.append(win, locs) 206 | locs = np.sort(locs) 207 | 208 | first_i = first_i + stride*fs - 1 209 | second_i = second_i + stride*fs - 1 210 | 211 | dif = np.diff(locs) 212 | dif = np.append(0, dif) 213 | ind, = np.where(dif != 0) 214 | locs = locs[ind] 215 | 216 | #plt.scatter(locs,x[locs],marker = 'o',color = 'green', label = 'After length correction') 217 | 218 | # Correction of points that are not peaks 219 | i = 0 220 | pre_loc = 0 221 | while i < len(locs): 222 | if locs[i] == 0: 223 | locs = np.delete(locs, locs[i]) 224 | elif locs[i] == len(x): 225 | locs = np.delete(locs, locs[i]) 226 | else: 227 | #print('Previous: ' + str(x[locs[i] - 1]) + ', actual: ' + str(x[locs[i]]) + ', next: ' + str(x[locs[i] + 1])) 228 | cond = (x[locs[i]] >= x[locs[i] - 1]) and (x[locs[i]] >= x[locs[i] + 1]) 229 | #print('Condition: ' + str(cond)) 230 | if cond: 231 | i = i + 1 232 | else: 233 | if locs[i] == pre_loc: 234 | i = i + 1 235 | else: 236 | if i == 0: 237 | aux = x[0:locs[i + 1] - 1] 238 | aux_loc = locs[i] - 1 239 | aux_start = 0 240 | elif i == len(locs) - 1: 241 | aux = x[locs[i - 1]:len(x) - 1] 242 | aux_loc = locs[i] - locs[i - 1] 243 | aux_start = locs[i - 1] 244 | else: 245 | aux = x[locs[i - 1]:locs[i + 1]] 246 | aux_loc = locs[i] - locs[i - 1] 247 | aux_start = locs[i - 1] 248 | #print('i ' + str(i) + ' out of ' + str(len(locs)) + ', aux length: ' + str(len(aux)) + 249 | # ', location: ' + str(aux_loc)) 250 | #print('Locs i - 1: ' + str(locs[i - 1]) + ', locs i: ' + str(locs[i]) + ', locs i + 1: ' + str(locs[i + 1])) 251 | 252 | pre = find_closest_peak(aux, aux_loc, 'backward') 253 | pos = find_closest_peak(aux, aux_loc, 'forward') 254 | #print('Previous: ' + str(pre) + ', next: ' + str(pos) + ', actual: ' + str(aux_loc)) 255 | 256 | ibi_pre = np.append(pre - 1, len(aux) - pre) 257 | ibi_pos = np.append(pos - 1, len(aux) - pos) 258 | ibi_act = np.append(aux_loc - 1, len(aux) - aux_loc) 259 | #print('Previous IBIs: ' + str(ibi_pre) + ', next IBIs: ' + str(ibi_pos) + 260 | # ', actual IBIs: ' + str(ibi_act)) 261 | 262 | dif_pre = np.abs(ibi_pre - np.mean(np.diff(locs))) 263 | dif_pos = np.abs(ibi_pos - np.mean(np.diff(locs))) 264 | dif_act = np.abs(ibi_act - np.mean(np.diff(locs))) 265 | #print('Previous DIF: ' + str(dif_pre) + ', next DIF: ' + str(dif_pos) + 266 | # ', actual DIF: ' + str(dif_act)) 267 | 268 | avgs = [np.mean(dif_pre), np.mean(dif_pos), np.mean(dif_act)] 269 | min_avg = np.min(avgs) 270 | ind, = np.where(min_avg == avgs) 271 | #print('Averages: ' + str(avgs) + ', min index: ' + str(ind)) 272 | if len(ind) != 0: 273 | ind = ind[0] 274 | 275 | if ind == 0: 276 | locs[i] = pre + aux_start - 1 277 | elif ind == 1: 278 | locs[i] = pos + aux_start - 1 279 | elif ind == 2: 280 | locs[i] = aux_loc + aux_start - 1 281 | i = i + 1 282 | 283 | #plt.scatter(locs,x[locs],marker = 'o',color = 'yellow', label = 'After not-peak correction') 284 | 285 | # Correction of peaks according to amplitude 286 | len_window = np.round(t*fs) 287 | #print('Window length: ' + str(len_window)) 288 | keep = np.empty(0) 289 | first_i = 0 290 | second_i = len_window - 1 291 | while second_i < len(x): 292 | ind1, = np.where(locs >= first_i) 293 | ind2, = np.where(locs <= second_i) 294 | ind = np.intersect1d(ind1, ind2) 295 | win = locs[ind] 296 | if np.median(x[win]) > 0: 297 | th_amp_low = 0.5*np.median(x[win]) 298 | th_amp_high = 3*np.median(x[win]) 299 | else: 300 | th_amp_low = -3*np.median(x[win]) 301 | th_amp_high = 1.5*np.median(x[win]) 302 | ind1, = np.where(x[win] >= th_amp_low) 303 | ind2, = np.where(x[win] <= th_amp_high) 304 | aux_keep = np.intersect1d(ind1,ind2) 305 | keep = np.append(keep, aux_keep) 306 | 307 | first_i = second_i + 1 308 | second_i = second_i + stride*fs - 1 309 | 310 | if len(keep) != 0: 311 | keep = np.unique(keep) 312 | locs = locs[keep.astype(int)] 313 | 314 | #plt.scatter(locs,x[locs],marker = 'o',color = 'purple', label = 'After amplitude correction') 315 | #plt.legend() 316 | 317 | return locs 318 | 319 | def find_closest_peak(x, loc, dir_search): 320 | """ 321 | Finds the closest peak to the initial location in x 322 | 323 | Inputs: x, signal of interest [user defined units] 324 | loc, initial location [number of samples] 325 | dir_search, direction of search ['backward','forward'] 326 | Outputs: pos, location of the first peak detected in specified direction [number of samples] 327 | 328 | Developed by: Elisa Mejía Mejía 329 | City, University of London 330 | Version: 1.0 - June, 2022 331 | 332 | """ 333 | 334 | pos = -1 335 | if dir_search == 'backward': 336 | i = loc - 2 337 | while i > 0: 338 | if (x[i] > x[i - 1]) and (x[i] > x[i + 1]): 339 | pos = i 340 | i = 0 341 | else: 342 | i = i - 1 343 | if pos == -1: 344 | pos = loc 345 | elif dir_search == 'forward': 346 | i = loc + 1 347 | while i < len(x) - 1: 348 | if (x[i] > x[i - 1]) and (x[i] > x[i + 1]): 349 | pos = i 350 | i = len(x) 351 | else: 352 | i = i + 1 353 | if pos == -1: 354 | pos = loc 355 | 356 | return pos 357 | 358 | def seek_local(x, start, end): 359 | val_min = x[start] 360 | val_max = x[start] 361 | 362 | ind_min = start 363 | ind_max = start 364 | 365 | for j in range(start, end): 366 | if x[j] > val_max: 367 | val_max = x[j] 368 | ind_max = j 369 | elif x[j] < val_min: 370 | val_min = x[j] 371 | ind_min = j 372 | 373 | return val_min, ind_min, val_max, ind_max 374 | 375 | def heartpy(x, fs, min_ihr, max_ihr, w): 376 | """ 377 | Detects inter-beat intervals using HeartPy 378 | Citation: van Gent P, Farah H, van Nes N, van Arem B (2019) Heartpy: A novel heart rate algorithm 379 | for the analysis of noisy signals. Transp Res Part F, vol. 66, pp. 368-378. DOI: 10.1016/j.trf.2019.09.015 380 | 381 | Inputs: x, pulsatile signal [user defined units] 382 | fs, sampling rate [Hz] 383 | min_ihr, minimum value of instantaneous heart rate to be accepted [bpm] 384 | max_ihr, maximum value of instantaneous heart rate to be accepted [bpm] 385 | w, length of segments for correction of peaks [s] 386 | Outputs: ibis, position of the starting points of inter-beat intervals [number of samples] 387 | 388 | Developed by: Elisa Mejía Mejía 389 | City, University of London 390 | Version: 1.0 - June, 2022 391 | 392 | """ 393 | 394 | # Identification of peaks 395 | is_roi = 0 396 | n_rois = 0 397 | pos_pks = np.empty(0).astype(int) 398 | locs = np.empty(0).astype(int) 399 | 400 | len_ma = int(np.round(0.75*fs)) 401 | #print(len_ma) 402 | sig = np.append(x[0]*np.ones(len_ma), x) 403 | sig = np.append(sig, x[-1]*np.ones(len_ma)) 404 | 405 | i = len_ma 406 | while i < len(sig) - len_ma: 407 | ma = np.mean(sig[i - len_ma:i + len_ma - 1]) 408 | #print(len(sig[i - len_ma:i + len_ma - 1]),ma) 409 | 410 | # If it is the beginning of a new ROI: 411 | if is_roi == 0 and sig[i] >= ma: 412 | is_roi = 1 413 | n_rois = n_rois + 1 414 | #print('New ROI ---' + str(n_rois) + ' @ ' + str(i)) 415 | # If it is a peak: 416 | if sig[i] >= sig[i - 1] and sig[i] >= sig[i + 1]: 417 | pos_pks = np.append(pos_pks, int(i)) 418 | #print('Possible peaks: ' + str(pos_pks)) 419 | 420 | # If it is part of a ROI which is not over: 421 | elif is_roi == 1 and sig[i] > ma: 422 | #print('Actual ROI ---' + str(n_rois) + ' @ ' + str(i)) 423 | # If it is a peak: 424 | if sig[i] >= sig[i - 1] and sig[i] >= sig[i + 1]: 425 | pos_pks = np.append(pos_pks, int(i)) 426 | #print('Possible peaks: ' + str(pos_pks)) 427 | 428 | # If the ROI is over or the end of the signal has been reached: 429 | elif is_roi == 1 and (sig[i] < ma or i == (len(sig) - len_ma)): 430 | #print('End of ROI ---' + str(n_rois) + ' @ ' + str(i) + '. Pos pks: ' + str(pos_pks)) 431 | is_roi = 0 # Lowers flag 432 | 433 | # If it is the end of the first ROI: 434 | if n_rois == 1: 435 | # If at least one peak has been found: 436 | if len(pos_pks) != 0: 437 | # Determines the location of the maximum peak: 438 | max_pk = np.max(sig[pos_pks]) 439 | ind, = np.where(max_pk == np.max(sig[pos_pks])) 440 | #print('First ROI: (1) Max Peak: ' + str(max_pk) + ', amplitudes: ' + str(sig[pos_pks]) + 441 | # ', index: ' + str(int(ind)), ', pk_ind: ' + str(pos_pks[ind])) 442 | # The maximum peak is added to the list: 443 | locs = np.append(locs, pos_pks[ind]) 444 | #print('Locations: ' + str(locs)) 445 | # If no peak was found: 446 | else: 447 | # Counter for ROIs is reset to previous value: 448 | n_rois = n_rois - 1 449 | 450 | # If it is the end of the second ROI: 451 | elif n_rois == 2: 452 | # If at least one peak has been found: 453 | if len(pos_pks) != 0: 454 | # Measures instantantaneous HR of found peaks with respect to the previous peak: 455 | ihr = 60/((pos_pks - locs[-1])/fs) 456 | good_ihr, = np.where(ihr <= max_ihr and ihr >= min_ihr) 457 | #print('Second ROI IHR check: (1) IHR: ' + str(ihr) + ', valid peaks: ' + str(good_ihr) + 458 | # ', pos_pks before: ' + str(pos_pks) + ', pos_pks after: ' + str(pos_pks[good_ihr])) 459 | pos_pks = pos_pks[good_ihr].astype(int) 460 | 461 | # If at least one peak is between HR limits: 462 | if len(pos_pks) != 0: 463 | # Determines the location of the maximum peak: 464 | max_pk = np.max(sig[pos_pks]) 465 | ind, = np.where(max_pk == np.max(sig[pos_pks])) 466 | #print('Second ROI: (1) Max Peak: ' + str(max_pk) + ', amplitudes: ' + str(sig[pos_pks]) + 467 | # ', index: ' + str(int(ind)), ', pk_ind: ' + str(pos_pks[ind])) 468 | # The maximum peak is added to the list: 469 | locs = np.append(locs, pos_pks[ind]) 470 | #print('Locations: ' + str(locs)) 471 | # If no peak was found: 472 | else: 473 | # Counter for ROIs is reset to previous value: 474 | n_rois = n_rois - 1 475 | 476 | # If it is the end of the any further ROI: 477 | else: 478 | # If at least one peak has been found: 479 | if len(pos_pks) != 0: 480 | # Measures instantantaneous HR of found peaks with respect to the previous peak: 481 | ihr = 60/((pos_pks - locs[-1])/fs) 482 | good_ihr, = np.where(ihr <= max_ihr and ihr >= min_ihr) 483 | #print('Third ROI IHR check: (1) IHR: ' + str(ihr) + ', valid peaks: ' + str(good_ihr) + 484 | # ', pos_pks before: ' + str(pos_pks) + ', pos_pks after: ' + str(pos_pks[good_ihr])) 485 | pos_pks = pos_pks[good_ihr].astype(int) 486 | 487 | # If at least one peak is between HR limits: 488 | if len(pos_pks) != 0: 489 | # Calculates SDNN with the possible peaks on the ROI: 490 | sdnn = np.zeros(len(pos_pks)) 491 | for j in range(len(pos_pks)): 492 | sdnn[j] = np.std(np.append(locs/fs, pos_pks[j]/fs)) 493 | # Determines the new peak as that one with the lowest SDNN: 494 | min_pk = np.min(sdnn) 495 | ind, = np.where(min_pk == np.min(sdnn)) 496 | #print('Third ROI: (1) Min SDNN Peak: ' + str(min_pk) + ', amplitudes: ' + str(sig[pos_pks]) + 497 | # ', index: ' + str(int(ind)), ', pk_ind: ' + str(pos_pks[ind])) 498 | locs = np.append(locs, pos_pks[ind]) 499 | #print('Locations: ' + str(locs)) 500 | # If no peak was found: 501 | else: 502 | # Counter for ROIs is reset to previous value: 503 | n_rois = n_rois - 1 504 | 505 | # Resets possible peaks for next ROI: 506 | pos_pks = np.empty(0) 507 | 508 | i = i + 1; 509 | 510 | locs = locs - len_ma 511 | 512 | # Correction of peaks 513 | c_locs = np.empty(0) 514 | n_int = np.floor(len(x)/(w*fs)) 515 | for i in range(int(n_int)): 516 | ind1, = np.where(locs >= i*w*fs) 517 | #print('Locs >= ' + str((i)*w*fs) + ': ' + str(locs[ind1])) 518 | ind2, = np.where(locs < (i + 1)*w*fs) 519 | #print('Locs < ' + str((i + 1)*w*fs) + ': ' + str(locs[ind2])) 520 | ind = np.intersect1d(ind1, ind2) 521 | #print('Larger and lower than locs: ' + str(locs[ind])) 522 | int_locs = locs[ind] 523 | 524 | if i == 0: 525 | aux_ibis = np.diff(int_locs) 526 | else: 527 | ind, = np.where(locs >= i*w*fs) 528 | last = locs[ind[0] - 1] 529 | aux_ibis = np.diff(np.append(last, int_locs)) 530 | avg_ibis = np.mean(aux_ibis) 531 | th = np.append((avg_ibis - 0.3*avg_ibis), (avg_ibis + 0.3*avg_ibis)) 532 | ind1, = np.where(aux_ibis > th[0]) 533 | #print('Ind1: ' + str(ind1)) 534 | ind2, = np.where(aux_ibis < th[1]) 535 | #print('Ind2: ' + str(ind2)) 536 | ind = np.intersect1d(ind1, ind2) 537 | #print('Ind: ' + str(ind)) 538 | 539 | c_locs = np.append(c_locs, int_locs[ind]).astype(int) 540 | print(c_locs) 541 | 542 | #fig = plt.figure() 543 | #plt.plot(x) 544 | #plt.plot(sig) 545 | #plt.scatter(locs,x[locs],marker = 'o',color = 'red') 546 | #if len(c_locs) != 0: 547 | #plt.scatter(c_locs,x[c_locs],marker = 'o',color = 'blue') 548 | 549 | if len(c_locs) != 0: 550 | ibis = c_locs 551 | else: 552 | ibis = locs 553 | 554 | return ibis 555 | 556 | def d2max(x, fs): 557 | """ 558 | Detects inter-beat intervals using D2Max 559 | Citation: Elgendi M, Norton I, Brearley M, Abbott D, Schuurmans D (2013) Systolic Peak Detection in Acceleration 560 | Photoplethysmograms Measured from Emergency Responders in Tropical Conditions. PLoS ONE, vol. 8, no. 10, 561 | pp. e76585. DOI: 10.1371/journal.pone.0076585 562 | 563 | Inputs: x, pulsatile signal [user defined units] 564 | fs, sampling rate [Hz] 565 | Outputs: ibis, position of the starting points of inter-beat intervals [number of samples] 566 | 567 | Developed by: Elisa Mejía Mejía 568 | City, University of London 569 | Version: 1.0 - June, 2022 570 | 571 | """ 572 | 573 | # Bandpass filter 574 | if len(x) < 4098: 575 | z_fill = np.zeros(4098 - len(x) + 1) 576 | x_z = np.append(x, z_fill) 577 | sos = sp.butter(10, [0.5, 8], btype = 'bp', analog = False, output = 'sos', fs = fs) 578 | x_f = sp.sosfiltfilt(sos, x_z) 579 | 580 | # Signal clipping 581 | ind, = np.where(x_f < 0) 582 | x_c = x_f 583 | x_c[ind] = 0 584 | 585 | # Signal squaring 586 | x_s = x_c**2 587 | 588 | #plt.figure() 589 | #plt.plot(x) 590 | #plt.plot(x_z) 591 | #plt.plot(x_f) 592 | #plt.plot(x_c) 593 | #plt.plot(x_s) 594 | 595 | # Blocks of interest 596 | w1 = (111e-3)*fs 597 | w1 = int(2*np.floor(w1/2) + 1) 598 | b = (1/w1)*np.ones(w1) 599 | ma_pk = sp.filtfilt(b,1,x_s) 600 | 601 | w2 = (667e-3)*fs 602 | w2 = int(2*np.floor(w2/2) + 1) 603 | b = (1/w2)*np.ones(w1) 604 | ma_bpm = sp.filtfilt(b,1,x_s) 605 | 606 | #plt.figure() 607 | #plt.plot(x_s/np.max(x_s)) 608 | #plt.plot(ma_pk/np.max(ma_pk)) 609 | #plt.plot(ma_bpm/np.max(ma_bpm)) 610 | 611 | # Thresholding 612 | alpha = 0.02*np.mean(ma_pk) 613 | th_1 = ma_bpm + alpha 614 | th_2 = w1 615 | boi = (ma_pk > th_1).astype(int) 616 | 617 | blocks_init, = np.where(np.diff(boi) > 0) 618 | blocks_init = blocks_init + 1 619 | blocks_end, = np.where(np.diff(boi) < 0) 620 | blocks_end = blocks_end + 1 621 | if blocks_init[0] > blocks_end[0]: 622 | blocks_init = np.append(1, blocks_init) 623 | if blocks_init[-1] > blocks_end[-1]: 624 | blocks_end = np.append(blocks_end, len(x_s)) 625 | #print('Initial locs BOI: ' + str(blocks_init)) 626 | #print('Final locs BOI: ' + str(blocks_end)) 627 | 628 | #plt.figure() 629 | #plt.plot(x_s[range(len(x))]/np.max(x_s)) 630 | #plt.plot(boi[range(len(x))]) 631 | 632 | # Search for peaks inside BOIs 633 | len_blks = np.zeros(len(blocks_init)) 634 | ibis = np.zeros(len(blocks_init)) 635 | for i in range(len(blocks_init)): 636 | ind, = np.where(blocks_end > blocks_init[i]) 637 | ind = ind[0] 638 | len_blks[i] = blocks_end[ind] - blocks_init[i] 639 | if len_blks[i] >= th_2: 640 | aux = x[blocks_init[i]:blocks_end[ind]] 641 | if len(aux) != 0: 642 | max_val = np.max(aux) 643 | max_ind, = np.where(max_val == aux) 644 | ibis[i] = max_ind + blocks_init[i] - 1 645 | 646 | ind, = np.where(len_blks < th_2) 647 | if len(ind) != 0: 648 | for i in range(len(ind)): 649 | boi[blocks_init[i]:blocks_end[i]] = 0 650 | ind, = np.where(ibis == 0) 651 | ibis = (np.delete(ibis, ind)).astype(int) 652 | 653 | #plt.plot(boi[range(len(x))]) 654 | 655 | #plt.figure() 656 | #plt.plot(x) 657 | #plt.scatter(ibis, x[ibis], marker = 'o',color = 'red') 658 | 659 | return ibis 660 | 661 | def upslopes(x): 662 | """ 663 | Detects inter-beat intervals using Upslopes 664 | Citation: Arguello Prada EJ, Serna Maldonado RD (2018) A novel and low-complexity peak detection algorithm for 665 | heart rate estimation from low-amplitude photoplethysmographic (PPG) signals. J Med Eng Technol, vol. 42, 666 | no. 8, pp. 569-577. DOI: 10.1080/03091902.2019.1572237 667 | 668 | Inputs: x, pulsatile signal [user defined units] 669 | Outputs: ibis, position of the starting points of inter-beat intervals [number of samples] 670 | 671 | Developed by: Elisa Mejía Mejía 672 | City, University of London 673 | Version: 1.0 - June, 2022 674 | 675 | """ 676 | 677 | # Peak detection 678 | th = 6 679 | pks = np.empty(0) 680 | pos_pk = np.empty(0) 681 | pos_pk_b = 0 682 | n_pos_pk = 0 683 | n_up = 0 684 | 685 | for i in range(1, len(x)): 686 | if x[i] > x[i - 1]: 687 | n_up = n_up + 1 688 | else: 689 | if n_up > th: 690 | pos_pk = np.append(pos_pk, i) 691 | pos_pk_b = 1 692 | n_pos_pk = n_pos_pk + 1 693 | n_up_pre = n_up 694 | else: 695 | pos_pk = pos_pk.astype(int) 696 | #print('Possible peaks: ' + str(pos_pk) + ', number of peaks: ' + str(n_pos_pk)) 697 | if pos_pk_b == 1: 698 | if x[i - 1] > x[pos_pk[n_pos_pk - 1]]: 699 | pos_pk[n_pos_pk - 1] = i - 1 700 | else: 701 | pks = np.append(pks, pos_pk[n_pos_pk - 1]) 702 | th = 0.6*n_up_pre 703 | pos_pk_b = 0 704 | n_up = 0 705 | ibis = pks.astype(int) 706 | #print(ibis) 707 | 708 | #plt.figure() 709 | #plt.plot(x) 710 | #plt.scatter(ibis, x[ibis], marker = 'o',color = 'red') 711 | 712 | return ibis 713 | 714 | def delineator(x, fs): 715 | """ 716 | Detects inter-beat intervals using Delineator 717 | Citation: Li BN, Dong MC, Vai MI (2010) On an automatic delineator for arterial blood pressure waveforms. Biomed 718 | Signal Process Control, vol. 5, no. 1, pp. 76-81. DOI: 10.1016/j.bspc.2009.06.002 719 | 720 | Inputs: x, pulsatile signal [user defined units] 721 | fs, sampling rate [Hz] 722 | Outputs: ibis, position of the starting points of inter-beat intervals [number of samples] 723 | 724 | Developed by: Elisa Mejía Mejía 725 | City, University of London 726 | Version: 1.0 - June, 2022 727 | 728 | """ 729 | 730 | # Lowpass filter 731 | od = 3 732 | sos = sp.butter(od, 25, btype = 'low', analog = False, output = 'sos', fs = fs) 733 | x_f = sp.sosfiltfilt(sos, x) 734 | x_m = 1000*x_f 735 | 736 | #plt.figure() 737 | #plt.plot(x) 738 | #plt.plot(x_f) 739 | #plt.plot(x_m) 740 | 741 | # Moving average 742 | n = 5 743 | b = (1/n)*np.ones(n) 744 | x_ma = sp.filtfilt(b,1,x_m) 745 | 746 | # Compute differentials 747 | dif = np.diff(x_ma) 748 | dif = 100*np.append(dif[0], dif) 749 | dif_ma = sp.filtfilt(b,1,dif) 750 | 751 | #plt.figure() 752 | #plt.plot(x_ma) 753 | #plt.plot(dif_ma) 754 | 755 | # Average thresholds in original signal 756 | x_len = len(x) 757 | if x_len > 12*fs: 758 | n = 10 759 | elif x_len > 7*fs: 760 | n = 5 761 | elif x_len > 4*fs: 762 | n = 2 763 | else: 764 | n = 1 765 | #print(n) 766 | 767 | max_min = np.empty(0) 768 | if n > 1: 769 | #plt.figure() 770 | #plt.plot(x_ma) 771 | n_int = np.floor(x_len/(n + 2)) 772 | #print('Length of intervals: ' + str(n_int)) 773 | for j in range(n): 774 | # Searches for max and min in 1 s intervals 775 | amp_min, ind_min, amp_max, ind_max = seek_local(x_ma, int(j*n_int), int(j*n_int + fs)) 776 | #plt.scatter(ind_min, amp_min, marker = 'o', color = 'red') 777 | #plt.scatter(ind_max, amp_max, marker = 'o', color = 'green') 778 | max_min = np.append(max_min, (amp_max - amp_min)) 779 | max_min_avg = np.mean(max_min) 780 | #print('Local max and min: ' + str(max_min) + ', average amplitude: ' + str(max_min_avg)) 781 | else: 782 | amp_min, ind_min , amp_max, ind_max = seek_local(x_ma, int(close_win), int(x_len)) 783 | #plt.figure() 784 | #plt.plot(x_ma) 785 | #plt.scatter(ind_min, amp_min, marker = 'o', color = 'red') 786 | #plt.scatter(ind_max, amp_max, marker = 'o', color = 'green') 787 | max_min_avg = amp_max - amp_min 788 | #print('Local max and min: ' + str(max_min) + ', average amplitude: ' + str(max_min_avg)) 789 | 790 | max_min_lt = 0.4*max_min_avg 791 | 792 | # Seek pulse beats by min-max method 793 | step_win = 2*fs # Window length to look for peaks/onsets 794 | close_win = np.floor(0.1*fs) 795 | # Value of what is considered too close 796 | 797 | pks = np.empty(0) # Location of peaks 798 | ons = np.empty(0) # Location of onsets 799 | dic = np.empty(0) # Location of dicrotic notches 800 | 801 | pk_index = -1 # Number of peaks found 802 | on_index = -1 # Number of onsets found 803 | dn_index = -1 # Number of dicrotic notches found 804 | 805 | i = int(close_win) # Initializes counter 806 | while i < x_len: # Iterates through the signal 807 | #print('i: ' + str(i)) 808 | amp_min = x_ma[i] # Gets the initial value for the minimum amplitude 809 | amp_max = x_ma[i] # Gets the initial value for the maximum amplitude 810 | 811 | ind = i # Initializes the temporal location of the index 812 | aux_pks = i # Initializes the temporal location of the peak 813 | aux_ons = i # Initializes the temporal location of the onset 814 | 815 | # Iterates while ind is lower than the length of the signal 816 | while ind < x_len - 1: 817 | #print('Ind: ' + str(ind)) 818 | # Verifies if no peak has been found in 2 seconds 819 | if (ind - i) > step_win: 820 | #print('Peak not found in 2 s') 821 | ind = i # Refreshes the temporal location of the index 822 | max_min_avg = 0.6*max_min_avg # Refreshes the threshold for the amplitude 823 | # Verifies if the threshold is lower than the lower limit 824 | if max_min_avg <= max_min_lt: 825 | max_min_avg = 2.5*max_min_lt # Refreshes the threshold 826 | break 827 | 828 | # Verifies if the location is a candidate peak 829 | if (dif_ma[ind - 1]*dif_ma[ind + 1]) <= 0: 830 | #print('There is a candidate peak') 831 | # Determines initial and end points of a window to search for local peaks and onsets 832 | if (ind + 5) < x_len: 833 | i_stop = ind + 5 834 | else: 835 | i_stop = x_len - 1 836 | if (ind - 5) >= 0: 837 | i_start = ind - 5 838 | else: 839 | i_start = 0 840 | 841 | # Checks for artifacts of saturated or signal loss 842 | if (i_stop - ind) >= 5: 843 | for j in range(ind, i_stop): 844 | if dif_ma[j] != 0: 845 | break 846 | if j == i_stop: 847 | #print('Artifact') 848 | break 849 | 850 | # Candidate onset 851 | #print('Looking for candidate onsets...') 852 | #plt.figure() 853 | #plt.plot(x_ma) 854 | if dif_ma[i_start] < 0: 855 | if dif_ma[i_stop] > 0: 856 | aux_min, ind_min, _, _ = seek_local(x_ma, int(i_start), int(i_stop)) 857 | #plt.scatter(ind_min, aux_min, marker = 'o', color = 'red') 858 | if np.abs(ind_min - ind) <= 2: 859 | amp_min = aux_min 860 | aux_ons = ind_min 861 | #print('Candidate onset: ' + str([ind_min, amp_min])) 862 | # Candidate peak 863 | #print('Looking for candidate peaks...') 864 | if dif_ma[i_start] > 0: 865 | if dif_ma[i_stop] < 0: 866 | _, _, aux_max, ind_max = seek_local(x_ma, int(i_start), int(i_stop)) 867 | #plt.scatter(ind_max, aux_max, marker = 'o', color = 'green') 868 | if np.abs(ind_max - ind) <= 2: 869 | amp_max = aux_max 870 | aux_pks = ind_max 871 | #print('Candidate peak: ' + str([ind_max, amp_max])) 872 | # Verifies if the amplitude of the pulse is larger than 0.4 times the mean value: 873 | #print('Pulse amplitude: ' + str(amp_max - amp_min) + ', thresholds: ' + 874 | # str([0.4*max_min_avg, 2*max_min_avg])) 875 | if (amp_max - amp_min) > 0.4*max_min_avg: 876 | #print('Expected amplitude of pulse') 877 | # Verifies if the amplitude of the pulse is lower than 2 times the mean value: 878 | if (amp_max - amp_min) < 2*max_min_avg: 879 | #print('Expected duration of pulse') 880 | if aux_pks > aux_ons: 881 | #print('Refining onsets...') 882 | # Refine onsets: 883 | aux_min = x_ma[aux_ons] 884 | temp_ons = aux_ons 885 | for j in range(aux_pks, aux_ons + 1, -1): 886 | if x_ma[j] < aux_min: 887 | aux_min = x_ma[j] 888 | temp_ons = j 889 | amp_min = aux_min 890 | aux_ons = temp_ons 891 | 892 | # If there is at least one peak found before: 893 | #print('Number of previous peaks: ' + str(pk_index + 1)) 894 | if pk_index >= 0: 895 | #print('There were previous peaks') 896 | #print('Duration of ons to peak interval: ' + str(aux_ons - pks[pk_index]) + 897 | # ', threshold: ' + str([3*close_win, step_win])) 898 | # If the duration of the pulse is too short: 899 | if (aux_ons - pks[pk_index]) < 3*close_win: 900 | #print('Too short interbeat interval') 901 | ind = i 902 | max_min_avg = 2.5*max_min_lt 903 | break 904 | # If the time difference between consecutive peaks is longer: 905 | if (aux_pks - pks[pk_index]) > step_win: 906 | #print('Too long interbeat interval') 907 | pk_index = pk_index - 1 908 | on_index = on_index - 1 909 | #if dn_index > 0: 910 | # dn_index = dn_index - 1 911 | # If there are still peaks, add the new peak: 912 | if pk_index >= 0: 913 | #print('There are still previous peaks') 914 | pk_index = pk_index + 1 915 | on_index = on_index + 1 916 | pks = np.append(pks, aux_pks) 917 | ons = np.append(ons, aux_ons) 918 | #print('Peaks: ' + str(pks)) 919 | #print('Onsets: ' + str(ons)) 920 | 921 | tf = ons[pk_index] - ons[pk_index - 1] 922 | 923 | to = np.floor(fs/20) 924 | tff = np.floor(0.1*tf) 925 | if tff < to: 926 | to = tff 927 | to = pks[pk_index - 1] + to 928 | 929 | te = np.floor(fs/20) 930 | tff = np.floor(0.5*tf) 931 | if tff < te: 932 | te = tff 933 | te = pks[pk_index - 1] + te 934 | 935 | #tff = seek_dicrotic(dif_ma[to:te]) 936 | #if tff == 0: 937 | # tff = te - pks[pk_index - 1] 938 | # tff = np.floor(tff/3) 939 | #dn_index = dn_index + 1 940 | #dic[dn_index] = to + tff 941 | 942 | ind = ind + close_win 943 | break 944 | # If it is the first peak: 945 | if pk_index < 0: 946 | #print('There were no previous peaks') 947 | pk_index = pk_index + 1 948 | on_index = on_index + 1 949 | pks = np.append(pks, aux_pks) 950 | ons = np.append(ons, aux_ons) 951 | #print('Peaks: ' + str(pks)) 952 | #print('Onsets: ' + str(ons)) 953 | ind = ind + close_win 954 | break 955 | 956 | ind = ind + 1 957 | i = int(ind + 1) 958 | 959 | if len(pks) == 0: 960 | return -1 961 | else: 962 | x_len = len(pks) 963 | temp_p = np.empty(0) 964 | for i in range(x_len): 965 | temp_p = np.append(temp_p, pks[i] - od) 966 | ttk = temp_p[0] 967 | if ttk < 0: 968 | temp_p[0] = 0 969 | pks = temp_p 970 | 971 | x_len = len(ons) 972 | temp_o = np.empty(0) 973 | for i in range(x_len): 974 | temp_o = np.append(temp_o, ons[i] - od) 975 | ttk = temp_o[0] 976 | if ttk < 0: 977 | temp_o[0] = 0 978 | ons = temp_o 979 | 980 | pks = pks + 5 981 | ibis = pks.astype(int) 982 | 983 | return ibis 984 | --------------------------------------------------------------------------------