├── .github ├── release.yml └── workflows │ └── build_publish.yml ├── .gitignore ├── .readthedocs.yaml ├── LICENSE ├── README.md ├── doc ├── Makefile ├── environment.yml ├── make.bat └── source │ ├── _static │ ├── custom.css │ ├── favicon.png │ ├── flowchart.svg │ └── logo.png │ ├── conf.py │ ├── examples │ ├── n400.qmd │ └── ucap.qmd │ ├── index.rst │ ├── inputs_py.rst │ ├── installation_py.rst │ ├── installation_r.rst │ ├── outputs_py.rst │ ├── outputs_r.rst │ ├── processing_group.rst │ ├── processing_overview.rst │ ├── processing_participant.rst │ ├── processing_tfr.rst │ ├── quickstart_py.rst │ ├── quickstart_r.rst │ ├── reference_py.rst │ ├── references.bib │ └── tables_py │ ├── averaging.csv │ ├── epoching.csv │ ├── inputs.csv │ ├── outputs.csv │ ├── performance.csv │ ├── perm.csv │ ├── preprocessing.csv │ ├── ride.csv │ └── tfr.csv ├── pipeline ├── __init__.py ├── averaging.py ├── boilerplate.py ├── datasets │ ├── __init__.py │ ├── erpcore.py │ ├── erpcore_manifest.csv │ ├── ucap.py │ ├── ucap_manifest.csv │ └── utils.py ├── epoching.py ├── group.py ├── io.py ├── participant.py ├── perm.py ├── preprocessing.py ├── report.py ├── ride.py └── tfr.py ├── pyproject.toml └── setup.py /.github/release.yml: -------------------------------------------------------------------------------- 1 | changelog: 2 | exclude: 3 | labels: 4 | - ignore-for-release 5 | categories: 6 | - title: 🛠 Breaking changes 7 | labels: 8 | - breaking-change 9 | - title: 🎉 Exciting new features 10 | labels: 11 | - enhancement 12 | - title: 👎 Deprecations 13 | labels: 14 | - deprecation 15 | - title: 🐛 Bug fixes 16 | labels: 17 | - bug 18 | - title: 📄 Documentation 19 | labels: 20 | - documentation 21 | - title: Other Changes 22 | labels: 23 | - "*" 24 | -------------------------------------------------------------------------------- /.github/workflows/build_publish.yml: -------------------------------------------------------------------------------- 1 | name: Build & publish package 📦 2 | 3 | on: 4 | release: 5 | types: [published] 6 | push: 7 | branches: 8 | - main 9 | 10 | permissions: 11 | id-token: write 12 | 13 | jobs: 14 | build: 15 | name: Build package ⚙️ 16 | runs-on: ubuntu-latest 17 | steps: 18 | - name: Checkout source 19 | uses: actions/checkout@v2 20 | with: 21 | fetch-depth: 0 22 | 23 | - name: Install pypa/build 24 | run: | 25 | pip install build 26 | python -m build 27 | 28 | - uses: actions/upload-artifact@v4 29 | with: 30 | name: package 31 | path: dist/ 32 | 33 | publish: 34 | name: Publish package 🌍 35 | runs-on: ubuntu-latest 36 | needs: build 37 | steps: 38 | - name: Checkout source 39 | uses: actions/checkout@v2 40 | with: 41 | fetch-depth: 0 42 | 43 | - name: Download built artifact 44 | uses: actions/download-artifact@v4 45 | with: 46 | name: package 47 | path: dist 48 | 49 | - name: Publish on TestPyPI 50 | if: startsWith(github.ref, 'refs/tags') != true 51 | uses: pypa/gh-action-pypi-publish@release/v1 52 | with: 53 | repository-url: https://test.pypi.org/legacy/ 54 | 55 | - name: Publish on PyPI 56 | if: startsWith(github.ref, 'refs/tags') 57 | uses: pypa/gh-action-pypi-publish@release/v1 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # General 2 | .DS_Store 3 | .Rhistory 4 | __pycache__/ 5 | /scratch/ 6 | 7 | # Packaging 8 | *.egg-info/ 9 | _version.py 10 | /dist/ 11 | 12 | # Documentation 13 | /doc/_build/ 14 | /doc/source/generated/ 15 | /doc/source/examples/output/ 16 | /doc/source/tables_r/ 17 | /doc/source/inputs_r.rst 18 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for Sphinx projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "mambaforge-4.10" 12 | 13 | # Build documentation in the "doc/source/" directory with Sphinx 14 | sphinx: 15 | configuration: doc/source/conf.py 16 | 17 | # # Optionally build your docs in additional formats such as PDF and ePub 18 | # formats: all 19 | 20 | # Optional but recommended, declare the Python requirements required 21 | # to build your documentation 22 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 23 | python: 24 | install: 25 | - method: pip 26 | path: . 27 | conda: 28 | environment: doc/environment.yml 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Alexander Enge 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hu-neuro-pipeline 2 | 3 | ![PyPI](https://img.shields.io/pypi/v/hu-neuro-pipeline) 4 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/hu-neuro-pipeline) 5 | ![GitHub](https://img.shields.io/github/license/alexenge/hu-neuro-pipeline) 6 | 7 | Single trial EEG pipeline at the [Abdel Rahman Lab for Neurocognitive Psychology](https://abdelrahmanlab.com), Humboldt-Universität zu Berlin 8 | 9 | Based on Frömer, R., Maier, M., & Abdel Rahman, R. (2018). 10 | Group-level EEG-processing pipeline for flexible single trial-based analyses including linear mixed models. 11 | *Frontiers in Neuroscience*, *12*, 48. 12 | 13 | ## 1. Installation 14 | 15 | ### 1.1 For Python users 16 | 17 | Install the pipeline via `pip` from the [Python Package Index (PyPI)](https://pypi.org/project/hu-neuro-pipeline/): 18 | 19 | ```bash 20 | pip install hu-neuro-pipeline 21 | ``` 22 | 23 | Alternatively, you can install the latest development version from [GitHub](https://github.com/alexenge/hu-neuro-pipeline.git): 24 | 25 | ```bash 26 | pip install git+https://github.com/alexenge/hu-neuro-pipeline.git 27 | ``` 28 | 29 | ### 1.2 For R users 30 | 31 | First install and load [reticulate](https://rstudio.github.io/reticulate/) (an R package for accessing Python functionality from within R): 32 | 33 | ```r 34 | install.packages("reticulate") 35 | library("reticulate") 36 | ``` 37 | 38 | Check if you already have [conda](https://docs.conda.io/en/latest/) (a scientific Python distribution) installed on your system: 39 | 40 | ```r 41 | conda_exe() 42 | ``` 43 | 44 | If this shows you the path to a conda executable, you can skip the next step. 45 | If instead it shows you an error, you need to install conda: 46 | 47 | ```r 48 | install_miniconda() 49 | ``` 50 | 51 | Then install the pipeline from the [Python Package Index (PyPI)](https://pypi.org/project/hu-neuro-pipeline/): 52 | 53 | ```r 54 | py_install("hu-neuro-pipeline", pip = TRUE) 55 | ``` 56 | 57 | Alternatively, you can install the latest development version from [GitHub](https://github.com/alexenge/hu-neuro-pipeline.git): 58 | 59 | ```r 60 | py_install("git+https://github.com/alexenge/hu-neuro-pipeline.git", pip = TRUE) 61 | ``` 62 | 63 | ## 2. Usage 64 | 65 | ### 2.1 For Python users 66 | 67 | Here is a fairly minimal example for a (fictional) N400/P600 experiment with two experimental factors: `semantics` (e.g., related versus unrelated words) and emotional `context` (e.g., emotionally negative versus neutral). 68 | 69 | ```python 70 | from pipeline import group_pipeline 71 | 72 | trials, evokeds, config = group_pipeline( 73 | raw_files='Results/EEG/raw', 74 | log_files='Results/RT', 75 | output_dir='Results/EEG/export', 76 | besa_files='Results/EEG/cali', 77 | triggers=[201, 202, 211, 212], 78 | skip_log_conditions={'semantics': 'filler'}, 79 | components={'name': ['N400', 'P600'], 80 | 'tmin': [0.3, 0.5], 81 | 'tmax': [0.5, 0.9], 82 | 'roi': [['C1', 'Cz', 'C2', 'CP1', 'CPz', 'CP2'], 83 | ['Fz', 'FC1', 'FC2', 'C1', 'Cz', 'C2']]}, 84 | average_by={'related': 'semantics == "related"', 85 | 'unrelated': 'semantics == "unrelated"'}) 86 | ``` 87 | 88 | In this example we have specified: 89 | 90 | * The paths to the raw EEG data, to the behavioral log files, to the desired output directory, and to the BESA files for ocular correction 91 | 92 | * Four different EEG `triggers` corresponding to each of the four cells in the 2 × 2 design 93 | 94 | * The fact that log files contain additional trials from a semantic `'filler'` condition (which we want to skip because they don't have corresponding EEG triggers) 95 | 96 | * The *a priori* defined time windows and regions of interest for the N400 and P600 `components` 97 | 98 | * The log file columns (`average_by`) for which we want to obtain by-participant averaged waveforms (i.e., for all main and interaction effects) 99 | 100 | ### 2.2 For R users 101 | 102 | Here is the same example as above but for using the pipeline from R: 103 | 104 | ```R 105 | # Import Python module 106 | pipeline <- reticulate::import("pipeline") 107 | 108 | # Run the group level pipeline 109 | res <- pipeline$group_pipeline( 110 | raw_files = "Results/EEG/raw", 111 | log_files = "Results/RT", 112 | output_dir = "Results/EEG/export", 113 | besa_files = "Results/EEG/cali", 114 | triggers = c(201, 202, 211, 212), 115 | skip_log_conditions = list("semantics" = "filler"), 116 | components = list( 117 | "name" = list("N400", "P600"), 118 | "tmin" = list(0.3, 0.5), 119 | "tmax" = list(0.5, 0.9), 120 | "roi" = list( 121 | c("C1", "Cz", "C2", "CP1", "CPz", "CP2"), 122 | c("Fz", "FC1", "FC2", "C1", "Cz", "C2") 123 | ) 124 | ), 125 | average_by = list( 126 | related = "semantics == 'related'", 127 | unrelated = "semantics == 'unrelated'" 128 | ) 129 | ) 130 | 131 | # Extract results 132 | trials <- res[[1]] 133 | evokeds <- res[[2]] 134 | config <- res[[3]] 135 | ``` 136 | 137 | ## 3. Processing details 138 | 139 | 140 | 141 | See the [documentation](https://hu-neuro-pipeline.readthedocs.io/en/latest/) for more details about how to use the pipeline and how it works under the hood. 142 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /doc/environment.yml: -------------------------------------------------------------------------------- 1 | name: hu-neuro-pipeline 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - ipykernel 7 | - jupytext 8 | - myst-nb 9 | - pip 10 | - python=3.11 11 | - quarto=1.4.550 12 | - r-base 13 | - r-dplyr 14 | - r-ggplot2 15 | - r-irkernel 16 | - r-lme4 17 | - r-matrix=1.6_5 18 | - r-reticulate 19 | - r-rmisc 20 | - seaborn 21 | - sphinx-book-theme=0.3.3 22 | - sphinx-copybutton 23 | - sphinx-gallery=0.7.0 24 | - sphinxcontrib-bibtex 25 | - pip: 26 | - pybtex-apa-style 27 | - sphinxcontrib-apa 28 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /doc/source/_static/custom.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --pst-color-link: 38, 98, 157; 3 | --pst-color-inline-code: 73, 143, 37; 4 | } 5 | 6 | a.footnote-reference { 7 | vertical-align: baseline; 8 | font-size: 100%; 9 | } 10 | 11 | .output.text_html { 12 | overflow: auto; 13 | } 14 | -------------------------------------------------------------------------------- /doc/source/_static/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexenge/hu-neuro-pipeline/45aeaa180051e9b8b3c1cafa90f366651d200dcf/doc/source/_static/favicon.png -------------------------------------------------------------------------------- /doc/source/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexenge/hu-neuro-pipeline/45aeaa180051e9b8b3c1cafa90f366651d200dcf/doc/source/_static/logo.png -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | import inspect 7 | import os 8 | import sys 9 | from pathlib import Path 10 | 11 | import pandas as pd 12 | 13 | # Make sure the pipeline package is available 14 | sys.path.insert(0, Path(__file__).parents[2].resolve().as_posix()) 15 | import pipeline 16 | 17 | # Make sure Quarto and its dependencies are available 18 | # This seems to be necessary when install Quarto via conda -- it doesn't by 19 | # itself find the `share` directory or `deno` in the correct places 20 | bin_path = Path(sys.executable).parent 21 | share_path = bin_path.parent.joinpath('share') 22 | os.environ['QUARTO_SHARE_PATH'] = share_path.joinpath('quarto').resolve().as_posix() 23 | os.environ['DENO_DIR'] = bin_path.resolve().as_posix() 24 | os.environ['DENO_BIN'] = bin_path.joinpath('deno').resolve().as_posix() 25 | os.environ['QUARTO_DENO'] = bin_path.joinpath('deno').resolve().as_posix() 26 | 27 | # -- Project information ----------------------------------------------------- 28 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 29 | 30 | project = 'hu-neuro-pipeline' 31 | copyright = '2024, Abdel Rahman Lab for Neurocognitive Psychology' 32 | author = 'Abdel Rahman Lab for Neurocognitive Psychology' 33 | version = '.'.join(pipeline.__version__.split('.', 2)[:2]) 34 | release = pipeline.__version__ 35 | 36 | # -- General configuration --------------------------------------------------- 37 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 38 | 39 | extensions = ['sphinx.ext.autodoc', 40 | 'sphinx.ext.autosummary', 41 | 'sphinx.ext.intersphinx', 42 | 'sphinx.ext.linkcode', 43 | 'sphinx.ext.napoleon', 44 | 'sphinxcontrib.bibtex', 45 | 'sphinxcontrib.apa', 46 | 'myst_nb', 47 | 'sphinx_copybutton', 48 | 'sphinx_gallery.load_style'] 49 | templates_path = ['_templates'] 50 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints'] 51 | source_suffix = { 52 | '.rst': 'restructuredtext', 53 | '.qmd': 'myst-nb' 54 | } 55 | 56 | # -- Options for HTML output ------------------------------------------------- 57 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 58 | 59 | html_title = 'hu-neuro-pipeline' 60 | html_logo = '_static/logo.png' 61 | html_favicon = '_static/favicon.png' 62 | html_theme = 'sphinx_book_theme' 63 | html_theme_options = { 64 | 'path_to_docs': 'doc', 65 | 'repository_url': 'https://github.com/alexenge/hu-neuro-pipeline', 66 | 'repository_branch': 'main', 67 | 'use_repository_button': True, 68 | 'use_issues_button': True, 69 | 'use_edit_page_button': True, 70 | 'use_fullscreen_button': False, 71 | 'extra_navbar': ''} 72 | html_static_path = ['_static'] 73 | html_css_files = ['custom.css'] 74 | pygments_style = 'tango' 75 | 76 | # -- Options for sphinx.linkscode -------------------------------------------- 77 | # https://www.sphinx-doc.org/en/master/usage/extensions/linkcode.html 78 | 79 | def linkcode_resolve(domain, info): 80 | def find_source(): 81 | # try to find the file and line number, based on code from numpy: 82 | # https://github.com/numpy/numpy/blob/master/doc/source/conf.py#L286 83 | obj = sys.modules[info['module']] 84 | for part in info['fullname'].split('.'): 85 | obj = getattr(obj, part) 86 | fn = inspect.getsourcefile(obj) 87 | fn = os.path.relpath(fn, start=os.path.dirname(pipeline.__file__)) 88 | source, lineno = inspect.getsourcelines(obj) 89 | return fn, lineno, lineno + len(source) - 1 90 | 91 | if domain != 'py' or not info['module']: 92 | return None 93 | try: 94 | filename = 'pipeline/%s#L%d-L%d' % find_source() 95 | except Exception: 96 | filename = info['module'].replace('.', '/') + '.py' 97 | tag = 'main' if 'dev' in release else ('v' + release) 98 | 99 | return "https://github.com/alexenge/hu-neuro-pipeline/blob/%s/%s" % (tag, filename) 100 | 101 | # -- InterSphinx options ----------------------------------------------------- 102 | # https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#configuration 103 | 104 | intersphinx_mapping = { 105 | 'python': ('https://docs.python.org/3', None), 106 | 'mne': ('https://mne.tools/stable', None), 107 | 'numpy': ('https://numpy.org/doc/stable', None), 108 | 'scipy': ('https://docs.scipy.org/doc/scipy', None), 109 | 'matplotlib': ('https://matplotlib.org/stable', None), 110 | 'sklearn': ('https://scikit-learn.org/stable', None), 111 | 'pandas': ('https://pandas.pydata.org/pandas-docs/stable', None), 112 | } 113 | 114 | # -- Napoleon options -------------------------------------------------------- 115 | # https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html#configuration 116 | 117 | napoleon_preprocess_types = True 118 | 119 | ## -- sphinxcontrib-bibtex options ------------------------------------------- 120 | # https://sphinxcontrib-bibtex.readthedocs.io/en/latest/usage.html#configuration 121 | 122 | bibtex_bibfiles = ['references.bib'] 123 | bibtex_default_style = 'apa' 124 | 125 | # -- nbsphinx options -------------------------------------------------------- 126 | # https://nbsphinx.readthedocs.io/en/latest/configuration.html 127 | 128 | nb_execution_timeout = 600 129 | nb_custom_formats = { 130 | '.pct.py': ['jupytext.reads', {'fmt': 'py:percent'}], 131 | '.qmd': ['jupytext.reads', {'fmt': 'quarto'}], 132 | '.Rmd': ['jupytext.reads', {'fmt': 'Rmd'}] 133 | } 134 | nb_render_image_options = {'width': '70%', 'align': 'center'} 135 | 136 | # -- Convert Python syntax examples to R syntax examples --------------------- 137 | 138 | 139 | def convert_input_tables(): 140 | """Converts tables with Python syntax examples to R syntax examples.""" 141 | 142 | input_dir = Path(__file__).parent / 'tables_py' 143 | input_files = input_dir.glob('*.csv') 144 | 145 | output_dir = Path(__file__).parent / 'tables_r' 146 | output_dir.mkdir(exist_ok=True) 147 | 148 | for input_file in input_files: 149 | 150 | df = pd.read_csv(input_file) 151 | 152 | df.to_csv(input_file, index=False) 153 | 154 | for col_name in ['Argument', 'Example']: 155 | 156 | python_strings = list(df[col_name]) 157 | 158 | r_strings = [] 159 | 160 | for python_string in python_strings: 161 | 162 | if not isinstance(python_string, str): 163 | 164 | r_strings.append(python_string) 165 | 166 | continue 167 | 168 | r_string = python_string.\ 169 | replace('\'', 'PLACEHOLDER').\ 170 | replace('\"', '\'').\ 171 | replace('PLACEHOLDER', '\"').\ 172 | replace('[(', 'list(c(').\ 173 | replace(')]', '))').\ 174 | replace('[[', 'list(c(').\ 175 | replace(': [', ' = list(').\ 176 | replace('[', 'c(').\ 177 | replace(']', ')').\ 178 | replace('{', 'list(').\ 179 | replace('":', '" =').\ 180 | replace('}', ')').\ 181 | replace('``(', '``c(').\ 182 | replace('True', 'TRUE').\ 183 | replace('False', 'FALSE').\ 184 | replace('None', 'NULL').\ 185 | replace('np.arange', 'seq').\ 186 | replace('np.linspace', 'seq').\ 187 | replace('step=', 'by = ').\ 188 | replace('num=', 'length.out = ').\ 189 | replace(r'^nan$', '') 190 | r_strings.append(r_string) 191 | 192 | df[col_name] = r_strings 193 | 194 | output_file = output_dir / input_file.name 195 | df.to_csv(output_file, index=False) 196 | 197 | 198 | def convert_input_page(): 199 | 200 | input_file = Path(__file__).parent / 'inputs_py.rst' 201 | 202 | with open(input_file, 'r') as file: 203 | input = file.read() 204 | output = input.\ 205 | replace('Python syntax', 'R syntax').\ 206 | replace(' tables_py/', ' tables_r/') 207 | 208 | output_file = Path(__file__).parent / 'inputs_r.rst' 209 | 210 | with open(output_file, 'w') as file: 211 | file.write(output) 212 | 213 | 214 | def run_before_docs(app): 215 | """Runs some functions before the documentation is built.""" 216 | 217 | convert_input_tables() 218 | convert_input_page() 219 | 220 | 221 | def setup(app): 222 | """Controls the setup of the Sphinx documentation build process.""" 223 | 224 | app.connect('builder-inited', run_before_docs) 225 | -------------------------------------------------------------------------------- /doc/source/examples/n400.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | jupyter: 3 | jupytext: 4 | text_representation: 5 | extension: .qmd 6 | format_name: quarto 7 | format_version: '1.0' 8 | jupytext_version: 1.14.7 9 | kernelspec: 10 | display_name: Python 3 11 | language: python 12 | name: python3 13 | --- 14 | 15 | # Example: ERP CORE data 16 | 17 | ## Loading Python modules 18 | 19 | ```{python} 20 | import json 21 | import seaborn as sns 22 | from pipeline import group_pipeline 23 | from pipeline.datasets import get_erpcore 24 | ``` 25 | 26 | ## Downloading example data 27 | 28 | The pipeline comes with a function to download example data from the ERP CORE dataset. 29 | This dataset contains data from 40 participants who completed six different EEG experiments which were specifically designed to elicit seven common ERP components. 30 | 31 | For example, in the N400 experiment, participants viewed pairs of prime and target words that were either semantically related (EEG triggers `211` and `212`) or unrelated (EEG triggers `221` and `222`). 32 | 33 | The raw data are stored in the [Open Science Framework](https://osf.io/29xpq) and more details about the study are in [Kappenman et al. (2021)](https://doi.org/10.1016/j.neuroimage.2020.117465). 34 | 35 | ```{python} 36 | #| tags: [hide-output] 37 | n400_files = get_erpcore('N400', participants=4) 38 | ``` 39 | 40 | ```{python} 41 | def print_dict(d): print(json.dumps(d, indent=4)) 42 | print_dict(n400_files) 43 | ``` 44 | 45 | To save time, we only download and process data from the first four participants. 46 | Feel free to re-run the example with more participants by increasing or removing the `n_participants` argument. 47 | 48 | The paths of the downloaded raw EEG files (`.set`) and events file (`.tsv`) can now be fed into pipeline. 49 | 50 | ## Running the pipeline 51 | 52 | We run a simple pipeline for single-trial ERP analysis with the following steps: 53 | 54 | - Downsampling from 1024 to 256 Hz 55 | - Applying standard Biosemi montage for channel locations 56 | - Re-referencing to common average (not shown) 57 | - Automatic detection and interpolation of bad channels 58 | - Ocular correction with ICA 59 | - Bandpass filtering between 0.1 and 30 Hz 60 | - Segmentation to epochs around stimulus triggers 61 | - Baseline correction (not shown) 62 | - Rejecting bad epochs based on peak-to-peak amplitudes > 200 µV (not shown) 63 | - Computing single trial N400 amplitudes by averaging across the time window and channels of interest 64 | - Creating by-participant averages for the related and unrelated conditions 65 | 66 | ```{python} 67 | #| tags: [hide-output] 68 | trials, evokeds, config = group_pipeline( 69 | 70 | # Input/output paths 71 | raw_files=n400_files['raw_files'], 72 | log_files=n400_files['log_files'], 73 | output_dir='output', 74 | 75 | # Preprocessing options 76 | downsample_sfreq=256.0, 77 | montage='biosemi64', 78 | bad_channels='auto', 79 | ica_method='fastica', 80 | highpass_freq=0.1, 81 | lowpass_freq=30.0, 82 | 83 | # Epoching options 84 | triggers=[211, 212, 221, 222], 85 | skip_log_conditions={'value': [111, 112, 121, 122, 201, 202]}, 86 | components={'name': ['N400'], 87 | 'tmin': [0.3], 88 | 'tmax': [0.5], 89 | 'roi': [['Cz', 'CPz']]}, 90 | 91 | # Averaging options 92 | average_by={'related': 'value in [211, 212]', 93 | 'unrelated': 'value in [221, 222]'}) 94 | ``` 95 | 96 | See the [Pipeline inputs](../inputs_py.rst) page for a list of all available processing options. 97 | 98 | ## Checking the results 99 | 100 | This pipeline returns three objects: A dataframe of single trial ERP amplitudes, a dataframe of by-participant condition averages, and a dictionary of pipeline metadata. 101 | 102 | ### Single trial amplitudes 103 | These are basically just the log files, concatenated for all participants, with two added columns for the two ERP components of interest. 104 | Each value in these columns reflects the single trial ERP amplitude, averaged across time points and channels of interest. 105 | 106 | Here are the first couple of lines of the dataframe: 107 | 108 | ```{python} 109 | trials.head() 110 | ``` 111 | 112 | Since we only have four participant and relatively few trials per participant, we can show all the single trial amplitudes in one plot (color-coded by condition): 113 | 114 | ```{python, tags=c("nbsphinx-thumbnail")} 115 | trials['condition'] = trials['value'].map({211: 'related', 212: 'related', 116 | 221: 'unrelated', 222: 'unrelated'}) 117 | trials['participant'] = trials['participant_id'].str.extract(r'(sub-\d+)') 118 | 119 | _ = sns.swarmplot(data=trials, x='participant', y='N400', hue='condition') 120 | ``` 121 | 122 | We could also use this dataframe for statistical analysis on the single trial level, e.g., using linear mixed-effects models with the `lme4` package in R (see [UCAP example](ucap.qmd)) or the `statsmodels` package in Python. 123 | 124 | ### By-participant averages 125 | This is one big data frame which, unlike `trials`, is averaged across trials (i.e., losing any single trial information) but *not* averaged across time points or channels (i.e., retaining the millisecond-wise ERP waveform at all electrodes). 126 | 127 | ```{python} 128 | evokeds.head() 129 | ``` 130 | 131 | We can use it to display the grand-averaged ERP waveforms for different conditions as a timecourse plot at a single channel or ROI (here for the N400 ROI): 132 | 133 | ```{python} 134 | _ = sns.lineplot(data=evokeds, x='time', y='N400', hue='label', errorbar=None) 135 | ``` 136 | 137 | Note that we're explicitly disabling error bars here because they would be invalid due to the fact that our condition effect (related vs. unrelated) is a within-participant factor. 138 | See the [UCAP example](ucap.qmd) for how to compute and plot valid within-participant error bars around the grand-averged evoked waveform. 139 | 140 | ### Pipeline metadata 141 | 142 | This is a dictionary with various metadata about the pipeline run. 143 | It contains: 144 | 145 | - The user-specified input arguments 146 | - The default values for those arguments that were not specified 147 | - Some descriptive statistics that were computed by the pipeline along the way 148 | (e.g., the number of indices of rejecected epochs based on peak-to-peak amplitude) 149 | - The software version of Python, the pipeline and its most important dependency packages 150 | 151 | ```{python} 152 | print_dict(config) 153 | ``` 154 | -------------------------------------------------------------------------------- /doc/source/examples/ucap.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | jupyter: 3 | jupytext: 4 | text_representation: 5 | extension: .qmd 6 | format_name: quarto 7 | format_version: '1.0' 8 | jupytext_version: 1.14.7 9 | kernelspec: 10 | display_name: R 11 | language: R 12 | name: ir 13 | --- 14 | 15 | # Example: UCAP data 16 | 17 | ## Loading R packages 18 | 19 | ```{r} 20 | #| tags: [remove-output] 21 | library("reticulate") 22 | library("Rmisc") 23 | library("dplyr") 24 | library("ggplot2") 25 | library("lme4") 26 | ``` 27 | 28 | ## Loading the pipeline 29 | 30 | After following the [installation instructions for R users](../installation_r.rst), we can use the [reticulate](https://rstudio.github.io/reticulate/) Package to load the Python pipeline package directly from R. 31 | 32 | ```{r} 33 | #| tags: [remove-output] 34 | pipeline <- import("pipeline") 35 | ``` 36 | 37 | ## Downloading example data 38 | 39 | The pipeline comes with a function to download example data from the Abdel Rahman Lab's UCAP study. 40 | In this EEG experiment, participants performed a visual search task with visual objects that were either presented visually intact (factor `n_b == "normal"`) or blurred (factor `n_b == "blurr"`). 41 | 42 | The raw data are stored in the [Open Science Framework](https://osf.io/hdxvb) and more details about the study are in [Frömer et al. (2018)](https://doi.org/10.3389/fnins.2018.00048). 43 | 44 | ```{r} 45 | ucap_files <- pipeline$datasets$get_ucap(participants = 2) 46 | print(ucap_files) 47 | ``` 48 | 49 | To save time, we only download and process data from the first two participants. 50 | Feel free to re-run the example with more participants by increasing or removing the `n_participants` argument. 51 | 52 | The paths of the downloaded EEG header files (`.vhdr`), behavioral log files (`.txt`), and ocular correction files (`.matrix`) can now be fed into pipeline. 53 | 54 | ## Running the pipeline 55 | 56 | We run a simple pipeline for single-trial ERP analysis with the following steps: 57 | 58 | - Downsampling to 250 Hz 59 | - Re-referencing to common average (per default) 60 | - Ocular correction with BESA/MSEC matrices 61 | - Default bandpass filtering between 0.1 and 40 Hz (per default) 62 | - Segmentation to epochs around stimulus triggers 63 | - Baseline correction (per default) 64 | - Rejecting bad epochs based on peak-to-peak amplitudes > 200 µV (per default) 65 | - Computing single trial N2 and P3b amplitudes by averaging across time windows and channels of interest 66 | - Creating by-participant averages for the blurred and normal conditions 67 | 68 | ```{r} 69 | res <- pipeline$group_pipeline( 70 | 71 | # Input/output paths 72 | raw_files = ucap_files$raw_files, 73 | log_files = ucap_files$log_files, 74 | output_dir = "output", 75 | 76 | # Preprocessing options 77 | downsample_sfreq = 250.0, 78 | besa_files = ucap_files$besa_files, 79 | 80 | # Epoching options 81 | triggers = c(201:208, 211:218), 82 | components = list( 83 | "name" = list("N2", "P3b"), 84 | "tmin" = list(0.25, 0.4), 85 | "tmax" = list(0.35, 0.55), 86 | "roi" = list( 87 | c("FC1", "FC2", "C1", "C2", "Cz"), 88 | c("CP3", "CP1", "CPz", "CP2", "CP4", "P3", "Pz", "P4", "PO3", "POz", "PO4") 89 | ) 90 | ), 91 | 92 | # Averaging options 93 | average_by = list( 94 | blurr = "n_b == 'blurr'", 95 | normal = "n_b == 'normal'" 96 | ) 97 | ) 98 | ``` 99 | 100 | See the [Pipeline inputs](../inputs_r.rst) page for a list of all available processing options. 101 | 102 | ## Checking the results 103 | 104 | The resulting object (`res`) is a list with three components: A dataframe of single trial ERP amplitudes, a dataframe of by-participant condition averages, and a dictionary of pipeline metadata. 105 | 106 | ```{r} 107 | str(res, max.level = 1) 108 | ``` 109 | 110 | ### Single-trial ERP amplitudes 111 | 112 | These are basically just the log files, concatenated for all participants, with two added columns for the two ERP components of interest. 113 | Each value in these columns reflects the single trial ERP amplitude, averaged across time points and channels of interest. 114 | 115 | Here are the first couple of lines of the dataframe: 116 | 117 | ```{r} 118 | trials <- res[[1]] 119 | head(trials) 120 | ``` 121 | 122 | We can plot the single trial ERP amplitudes (here for the N2 component), separately for the blurred and normal conditions, e.g., as a density plot: 123 | 124 | ```{r} 125 | trials |> 126 | ggplot(aes(x = N2, fill = n_b)) + 127 | geom_density(color = NA, alpha = 0.5) + 128 | labs(x = "N2 amplitude (µV)", y = "Density", fill = "Condition") + 129 | theme_minimal(base_size = 25.0) + 130 | theme(legend.position = "top") 131 | ``` 132 | 133 | Raincloud plots ([Allen et al., 2021](https://doi.org/10.12688/wellcomeopenres.15191.2)) would be a fancier alternative (e.g., using the [ggrain](https://github.com/njudd/ggrain) package). 134 | 135 | Note that these kinds of plots do not take into account the fact that the single trial amplitudes are nested within participants (and/or items). 136 | To do this, and to quantify if any descriptive differences between conditions are statistically reliable, we can run a linear mixed-effects model: 137 | 138 | ```{r} 139 | mod <- lmer(N2 ~ n_b + (1 | participant_id), data = trials) 140 | summary(mod) 141 | ``` 142 | 143 | Here we predict the single trial N2 amplitude based on the fixed effect of blurred vs. normal, and we allow for random variation in the intercept between participants. 144 | 145 | Note that for sound inference on the full dataset, we would want to: 146 | 147 | - apply proper contrast coding to the `n_b` factor (e.g., [Schad et al., 2020](https://doi.org/10.1016/j.jml.2019.104038)), 148 | - include random effects not just for participants, but also for items (e.g., [Judd et al., 2012](https://doi.org/10.1037/a0028347)), and 149 | - include not just random intercepts, but also random slopes (e.g., [Barr et al., 2013](https://doi.org/10.1016/j.jml.2012.11.001)). 150 | 151 | ### By-participant condition averages 152 | 153 | This is one big data frame which, unlike `trials`, is averaged across trials (i.e., losing any single trial information) but *not* averaged across time points or channels (i.e., retaining the millisecond-wise ERP waveform at all electrodes). 154 | 155 | ```{r} 156 | evokeds <- res[[2]] 157 | head(evokeds) 158 | ``` 159 | 160 | We can use it to display the grand-averaged ERP waveforms for different conditions as a timecourse plot at a single channel or ROI (here for the N2 ROI): 161 | 162 | ```{r} 163 | evokeds |> 164 | ggplot(aes(x = time, y = N2, color = label)) + 165 | stat_summary(geom = "line", fun = mean) + 166 | labs(x = "Time (s)", y = "N2 amplitude (µV)", color = "Condition") + 167 | theme_minimal(base_size = 25.0) + 168 | theme(legend.position = "top") 169 | ``` 170 | 171 | We can add error bars to the waveforms using the appropriate [standard error for whin-participant variables](http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#error-bars-for-within-subjects-variables): 172 | 173 | ```{r, tags=c("nbsphinx-thumbnail")} 174 | evokeds |> 175 | summarySEwithin( 176 | measurevar = "N2", 177 | withinvars = c("label", "time"), 178 | idvar = "participant_id" 179 | ) |> 180 | mutate(time = as.numeric(as.character(time))) |> 181 | ggplot(aes(x = time, y = N2)) + 182 | geom_ribbon(aes(ymin = N2 - se, ymax = N2 + se, fill = label), alpha = 0.2) + 183 | geom_line(aes(color = label)) + 184 | labs( 185 | x = "Time (s)", 186 | y = "N2 amplitude (µV)", 187 | color = "Condition", 188 | fill = "Condition" 189 | ) + 190 | theme_minimal(base_size = 25.0) + 191 | theme(legend.position = "top") 192 | ``` 193 | 194 | Note that (a) these error bars do not necessarily have to agree with the mixed model inference above, since one is performed on data averaged across trials and the other on data averaged across time, and (b) that the error bars in this example are very large and noisy because they are based on only two participants. 195 | 196 | ### Pipeline metadata 197 | 198 | This is a dictionary (i.e., a named list) with various metadata about the pipeline run. 199 | 200 | ```{r} 201 | config <- res[[3]] 202 | names(config) 203 | ``` 204 | 205 | It includes any input arguments that were used by the pipeline (either user-specified or default values). 206 | Additionally, it contains some statistics that were automatically computed by the pipeline along the way, such as the number of rejected epochs (based on a peak-to-peak amplitude threshold) per participant: 207 | 208 | ```{r} 209 | lengths(config$auto_rejected_epochs) 210 | ``` 211 | 212 | Finally, it records the Python version and the versions of the most important Python packages that were used by the pipeline: 213 | 214 | ```{r} 215 | config$package_versions 216 | ``` 217 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. hu-neuro-pipeline documentation master file, created by 2 | sphinx-quickstart on Fri Jan 27 11:23:58 2023. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | hu-neuro-pipeline 7 | ================= 8 | 9 | .. image:: https://img.shields.io/pypi/v/hu-neuro-pipeline 10 | :target: https://pypi.org/project/hu-neuro-pipeline 11 | :alt: Latest Version 12 | 13 | .. image:: https://img.shields.io/pypi/pyversions/hu-neuro-pipeline.svg 14 | :target: https://img.shields.io/pypi/pyversions/hu-neuro-pipeline 15 | :alt: PyPI - Python Version 16 | 17 | .. image:: https://img.shields.io/github/license/alexenge/hu-neuro-pipeline 18 | :target: https://github.com/alexenge/hu-neuro-pipeline/blob/main/LICENSE 19 | :alt: License 20 | 21 | | 22 | 23 | Single trial EEG pipeline at the `Abdel Rahman Lab for Neurocognitive Psychology `_, Humboldt-Universität zu Berlin 24 | 25 | Based on Frömer, R., Maier, M., & Abdel Rahman, R. (2018). 26 | Group-level EEG-processing pipeline for flexible single trial-based analyses including linear mixed models. 27 | *Frontiers in Neuroscience*, *12*, 48. `https://doi.org/10.3389/fnins.2018.00048 `_ 28 | 29 | .. toctree:: 30 | :maxdepth: 1 31 | :caption: For Python users 32 | 33 | installation_py 34 | quickstart_py 35 | inputs_py 36 | outputs_py 37 | examples/n400 38 | reference_py 39 | 40 | .. toctree:: 41 | :maxdepth: 1 42 | :caption: For R users 43 | 44 | installation_r 45 | quickstart_r 46 | inputs_r 47 | outputs_r 48 | examples/ucap 49 | 50 | .. toctree:: 51 | :maxdepth: 1 52 | :caption: Processing details 53 | 54 | processing_overview 55 | processing_participant 56 | processing_group 57 | processing_tfr 58 | -------------------------------------------------------------------------------- /doc/source/inputs_py.rst: -------------------------------------------------------------------------------- 1 | Pipeline inputs 2 | =============== 3 | 4 | Input file paths 5 | ---------------- 6 | 7 | .. csv-table:: 8 | :file: tables_py/inputs.csv 9 | :widths: 20, 45, 35 10 | :header-rows: 1 11 | 12 | Output file paths 13 | ----------------- 14 | 15 | .. csv-table:: 16 | :file: tables_py/outputs.csv 17 | :widths: 20, 45, 35 18 | :header-rows: 1 19 | 20 | Preprocessing options 21 | --------------------- 22 | 23 | .. csv-table:: 24 | :file: tables_py/preprocessing.csv 25 | :widths: 20, 45, 35 26 | :header-rows: 1 27 | 28 | Epoching options 29 | ---------------- 30 | 31 | .. csv-table:: 32 | :file: tables_py/epoching.csv 33 | :widths: 20, 45, 35 34 | :header-rows: 1 35 | 36 | RIDE correction options 37 | ----------------------- 38 | 39 | .. csv-table:: 40 | :file: tables_py/ride.csv 41 | :widths: 20, 45, 35 42 | :header-rows: 1 43 | 44 | Averaging options 45 | ----------------- 46 | 47 | .. csv-table:: 48 | :file: tables_py/averaging.csv 49 | :widths: 20, 45, 35 50 | :header-rows: 1 51 | 52 | Time-frequency analysis options 53 | ------------------------------- 54 | 55 | .. csv-table:: 56 | :file: tables_py/tfr.csv 57 | :widths: 20, 45, 35 58 | :header-rows: 1 59 | 60 | Permutation test options 61 | ------------------------ 62 | 63 | .. csv-table:: 64 | :file: tables_py/perm.csv 65 | :widths: 20, 45, 35 66 | :header-rows: 1 67 | 68 | Performance options 69 | ------------------- 70 | 71 | .. csv-table:: 72 | :file: tables_py/performance.csv 73 | :widths: 20, 45, 35 74 | :header-rows: 1 75 | -------------------------------------------------------------------------------- /doc/source/installation_py.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | The pipeline can be installed from the `Python Package Index (PyPI) `_ via the command line: 5 | 6 | .. code-block:: bash 7 | 8 | pip install hu-neuro-pipeline 9 | 10 | Alternatively, to install the latest development version directly from `GitHub `_: 11 | 12 | .. code-block:: bash 13 | 14 | pip install git+https://github.com/alexenge/hu-neuro-pipeline.git 15 | 16 | The pipeline requires Python Version ≥ 3.8 and a number of `dependency packages `_, which will get installed automatically when running the commands above. 17 | 18 | 19 | What next? 20 | ---------- 21 | 22 | To jump right into how to use the pipeline, see :doc:`Quickstart `. 23 | 24 | To learn about the different steps that the pipeline is carrying out, see :doc:`Processing details `. 25 | 26 | If you have questions or need help with using the pipeline, please `create an issue on GitHub `_. 27 | -------------------------------------------------------------------------------- /doc/source/installation_r.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | First install and load `reticulate `_ (an R package for accessing Python functionality from within R): 5 | 6 | .. code-block:: r 7 | 8 | install.packages("reticulate") 9 | library("reticulate") 10 | 11 | Check if you already have `conda `_ (a scientific Python distribution) installed on your system: 12 | 13 | .. code-block:: r 14 | 15 | conda_exe() 16 | 17 | If this shows you the path to a conda executable, you can skip the next step. 18 | If instead it shows you an error, you need to install conda: 19 | 20 | .. code-block:: r 21 | 22 | install_miniconda() 23 | 24 | Then install the pipeline from the `Python Package Index (PyPI) `_: 25 | 26 | .. code-block:: r 27 | 28 | py_install("hu-neuro-pipeline", pip = TRUE) 29 | 30 | Alternatively, you can install the latest development version from `GitHub `_: 31 | 32 | .. code-block:: r 33 | 34 | py_install("git+https://github.com/alexenge/hu-neuro-pipeline.git", pip = TRUE) 35 | 36 | What next? 37 | ---------- 38 | 39 | To jump right into how to use the pipeline, see :doc:`Quickstart `. 40 | 41 | To learn about the different steps that the pipeline is carrying out, see :doc:`Processing details `. 42 | 43 | If you have questions or need help with using the pipeline, please `create an issue on GitHub `_. 44 | -------------------------------------------------------------------------------- /doc/source/outputs_py.rst: -------------------------------------------------------------------------------- 1 | Pipeline outputs 2 | ================ 3 | 4 | *Work in progress* 5 | -------------------------------------------------------------------------------- /doc/source/outputs_r.rst: -------------------------------------------------------------------------------- 1 | Pipeline outputs 2 | ================ 3 | 4 | *Work in progress* 5 | -------------------------------------------------------------------------------- /doc/source/processing_group.rst: -------------------------------------------------------------------------------- 1 | Group level 2 | =========== 3 | 4 | Combine trials 5 | -------------- 6 | 7 | The single trial amplitude dataframes from all participants, containing the log file data plus the computed single trial mean amplitudes for the ERP components of interest, are concatenated and saved in ``.csv`` format. 8 | This dataframe can be viewed with a spreadsheed software and/or directly be used as the input into a statistical model (e.g., an LMM in R with lme4). 9 | 10 | Combine evokeds 11 | --------------- 12 | 13 | The by-participant condition averages (evokeds) from all participants are concatenated across participants and saved as a dataframe in ``.csv`` format and/or as an MNE-Python evoked object in ``.fif`` format. 14 | This dataframe can be used for plotting time courses and scalp topographies. 15 | 16 | Compute grand averages 17 | ---------------------- 18 | 19 | The evokeds for each condition are averaged across participants and saved as a dataframe in ``.csv`` format and/or as an MNE-Python evoked object in ``.fif`` format. 20 | This dataframe can be used for plotting time courses and scalp topographies. 21 | 22 | Cluster-based permutation tests 23 | ------------------------------- 24 | 25 | *Work in progress* 26 | -------------------------------------------------------------------------------- /doc/source/processing_overview.rst: -------------------------------------------------------------------------------- 1 | Overview 2 | ======== 3 | 4 | .. image:: _static/flowchart.svg 5 | :alt: Flow diagram of the pipeline steps 6 | :align: center 7 | -------------------------------------------------------------------------------- /doc/source/processing_participant.rst: -------------------------------------------------------------------------------- 1 | Participant level 2 | ================= 3 | 4 | Read raw data 5 | ------------- 6 | 7 | Reads the raw data (currently assumed to be in BrainVision format) into MNE-Python. 8 | 9 | Downsample 10 | ---------- 11 | 12 | Optionally reduces the sampling rate of the data. 13 | No downsampling is performed by default but moderate downsampling (e.g., from 500 to 250 Hz) will make subsequent computations faster and reduce the amount of disc space needed (especially relevant for time-frequency analysis). 14 | 15 | Read channel locations 16 | ---------------------- 17 | 18 | Reads the 2D and/or 3D positions of the EEG sensors based on a known standard montage or based on a custom channel location file. 19 | The channel locations are written into an output file (``channel_locations.csv``) in such a format that they can easily be used for plotting using the R package `eegUtils `_. 20 | 21 | Interpolate bad channels 22 | ------------------------ 23 | 24 | Optionally replaces the data from EEG channels known to be "bad" (e.g., noisy or flat) with an interpolated value based on their neighboring channels. 25 | The interpolation is done using spherical splines, the default in MNE-Python [#]_. 26 | 27 | Re-reference to average 28 | ----------------------- 29 | 30 | The EEG channels are re-referenced from the online reference (e.g., M1, the left mastoid) to a different channel or set of channels. 31 | The most common choice is to re-reference to an average reference, meaning that at each time point, the average of all EEG channels is subtracted from each channel. 32 | This is done to reduce the impact of any noise or spatial bias that may be present in the online reference electrode. 33 | It has the effect that the average of all EEG channels is zero at each time point but preserves any relative difference between channels at different areas of the scalp. 34 | 35 | Ocular correction 36 | ----------------- 37 | 38 | Optionally performs a correction of eye blink and eye movement artifacts using multiple source eye correction (MSEC/BESA) or independent component analysis (ICA). 39 | 40 | * **MSEC/BESA** requires one custom correction matrix file for each participant, created using the commercial BESA software based on calibration data. This is a channels × channels matrix with correction weights that gets multiplied with the continous EEG data to correct for eye blinks and eye movements. 41 | 42 | * **ICA** is computed based on an initial principal component analysis (PCA) of a high-pass filtered (cutoff = 1 Hz) copy of the continous EEG data. A fixed or adaptive number of principal components will be used and the flavor of the ICA algorithm can be selected (``'fastica'`` seems to be a reasonable default). The pipeline then automatically detects and removes independent components (ICs) that are likely to reflect eye blinks or eye movements, indicated by a significant correlation between the time course of the IC and either of two virtual EOG channels (VEOG or HEOG). The quality of the ICA and the selected components can be inspected in the quality control HTML reports that are optionally generated for each participant when setting the ``report_dir`` argument. 43 | 44 | Frequency filter 45 | ---------------- 46 | 47 | By default applies a band-pass filter between 0.1 and 40 Hz to the data. 48 | This removes low-frequency noise (e.g., electrode drifts due to sweat) and high-frequency noise (e.g., line noise and muscle artifacts). 49 | Either or both of the cutoff frequencies can be changed or disabled so that data will only be low-pass filtered, high-pass filtered, or not filtered at all. 50 | The default filter from MNE-Python is used which, at the time of writing, is a one-pass, zero-phase, non-causal finite impulse response (FIR) filter with a Hamming window [#]_. 51 | More information about the filter (e.g., the transition bandwidth and filter length) is also printed to the console while the pipeline is running. 52 | Note that excessive filtering (esp. high-pass filtering > 0.1 Hz) can introduce artifactual "bumps" in the ERP [#]_. 53 | 54 | Segment to epochs 55 | ----------------- 56 | 57 | The continous EEG data is segmented into discontinous epochs around the events (typically stimuli or responses) of interest. 58 | Each event of interest needs to have a numerical EEG trigger value associated with it. 59 | Epochs should typically be one to two seconds long and include a couple of hundreds of milliseconds before event onset (default: -0.5 s to 1.5 s). 60 | An interval before stimulus onset (default: -0.2 to 0.0 s) is typically used for baseline correction to remove any voltage offset between trials. 61 | At each channel and for each epoch, the average voltage during this time window is subtracted from all time points in the epoch. 62 | 63 | Read + match log file 64 | --------------------- 65 | 66 | The pipeline assumes that there is a text file (called the "log file") that contains tabular information about each EEG trials, containing information such as the stimulus that was presented, the experimental condition(s) to which it belonged, and the reaction time of the participant. 67 | Such files are typically written automatically by the software that was used to display the experiment, such as Presentation or PsychoPy. 68 | **It is super important that there are the same number of trials (rows) in the log file as there are triggers (epochs) in the EEG data.** 69 | If this is not the case, the log file can be manipulated (e.g., in R or pandas) to exclude any trials or entire conditions without corresponding triggers. 70 | It is also possible to let the pipeline search for and delete behavioral log file trials with missing EEG data automatically, as long as you have a log file column with the (expected) EEG trigger for every trial. 71 | 72 | Reject bad epochs 73 | ----------------- 74 | 75 | The pipeline will declare epochs as "bad" if the peak-to-peak amplitude (i.e., the difference between the highest voltage and the lowest voltage) at any channel exceeds a certain threshold (default: 200 µV). 76 | Declaring epochs as "bad" means that their single trial mean ERP amplitude will be set to ``NaN`` for all components in the single trial data frame, and that these epochs will not enter the computation of the by-participant condition averages (evokeds). 77 | 78 | .. _ride-details: 79 | 80 | RIDE speech artifact correction 81 | ------------------------------- 82 | 83 | RIDE (Residue Iteration Decomposition) is a method to decompose event-related potentials into separate component clusters [#]_ [#]_. 84 | One typical use case (and the only one implemented here) is to separate the ERP into a stimulus-related component (the "S" component) and a response-related component (the "R" component), and then subtract the R component (containing the speech artifact) from each single trial, based on its individual response latency (e.g., measured with a voice onset trigger in a language production experiment). 85 | The result are single trial ERPs that are "cleaned" from any response-related artifacts (e.g., the speech artifacts that occur during language production tasks; see [#]_ for details). 86 | 87 | There are a few things to note when you want to use RIDE for speech artifact correction: 88 | 89 | * RIDE should be applied separately for each experimental condition, so you will need to specify a ``ride_condition_column`` from your log files. 90 | 91 | * Your epochs need to be long enough to cover the entire speech artifact, so you may want to adjust your ``epochs_tmin`` and ``epochs_tmax`` arguments. 92 | 93 | * In case you want to shorten your epochs after RIDE correction (e.g., to save disk space), you can specify the ``ride_epochs_tmin_after_ride`` and ``ride_epochs_tmax_after_ride`` arguments. 94 | 95 | * RIDE expects that the epochs were cleaned from all other artifacts beforehand, which is why we apply artifact rejection (based on peak-to-peak amplitude) *before* RIDE (see above). However, when using a relatively stringent peak-to-peak threshold, many epochs will be rejected because of the speech artifacts. When using RIDE, we therefore suggest to use a relatively lenient rejection threshold for ``reject_peak_to_peak`` (e.g., 250 µV) and then enable a second, more stringent rejection threshold *after* RIDE using the ``ride_reject_peak_to_peak`` argument, to deal with remaining non-speech artifacts. 96 | 97 | If you've enabled the visual HTML reports using the ``report_dir`` argument, plots of the results of the RIDE correction for all experimental conditions will be included in the report. 98 | 99 | Compute single trial amplitudes 100 | ------------------------------- 101 | 102 | For each ERP component of interest, the pipeline computes one value per trial. 103 | This value reflects the mean ERP amplitude for this component (in µV) averaged across (a) the time window of interest (e.g., 300--500 ms for the N400 component) and (b) the channels in the region of interest (e.g., channels C1, Cz, C2, CP1, CPz, and CP2 for the N400 component). 104 | Performing this step on the single trial level is the main advantage of the Frömer et al. (2018) [#]_ pipeline compared to more traditional ERP analysis approach, where the amplitudes are additionally averaged across trials from the same condition, thereby losing any information available on the single trial level (e.g., item-level confounds or random effects). 105 | 106 | Compute by-participant condition averages 107 | ----------------------------------------- 108 | 109 | In addition to the single trial amplitudes (usually used for statistical modeling), the pipeline computes average waveforms for each participant and experimental condition. 110 | Unlike the single trial amplitudes, these averages are computed by averaging across trials from the same condition, but they retain the temporal information (all time points in the epoch) and spatial information (all channels) of the epoched data. 111 | These averages are typically used for visualization as time course plots or scalp topographies or for cluster-based permutation tests. 112 | They could also be used for "traditional" statistical modeling such as repeated measures ANOVAs, but this is not recommended because it discards the single trial information and makes more questionable assumptions than the single trial mixed modeling approach. 113 | 114 | Create quality control reports 115 | ------------------------------ 116 | 117 | Optionally, the pipeline creates one quality control (QC) report file in HTML format for each participant. 118 | This contains plots of the data before and after preprocessing as well as some summary statistics and metadata. 119 | It is especially recommended to check these reports when using ICA for artifact correction, to confirm that the automatic component detection algorithm has indeed indentified plausible eye blink and eye movement components. 120 | 121 | Notes 122 | ----- 123 | 124 | .. [#] https://mne.tools/stable/generated/mne.io.Raw.html#mne.io.Raw.interpolate_bads 125 | .. [#] https://mne.tools/stable/auto_tutorials/preprocessing/25_background_filtering.html 126 | .. [#] Tanner, D., Morgan-Short, K., & Luck, S. J. (2015). How inappropriate high-pass filters can produce artifactual effects and incorrect conclusions in ERP studies of language and cognition. *Psychophysiology*, 52(8), 997–1009. https://doi.org/10.1111/psyp.12437 127 | .. [#] Ouyang, G., Herzmann, G., Zhou, C., & Sommer, W. (2011). Residue iteration decomposition (RIDE): A new method to separate ERP components on the basis of latency variability in single trials. *Psychophysiology*, 48(12), 1631–1647. https://doi.org/10.1111/j.1469-8986.2011.01269.x 128 | .. [#] Ouyang, G., Sommer, W., & Zhou, C. (2015). A toolbox for residue iteration decomposition (RIDE)—A method for the decomposition, reconstruction, and single trial analysis of event related potentials. *Journal of Neuroscience Methods*, 250, 7–21. https://doi.org/10.1016/j.jneumeth.2014.10.009 129 | .. [#] Ouyang, G., Sommer, W., Zhou, C., Aristei, S., Pinkpank, T., & Abdel Rahman, R. (2016). Articulation artifacts during overt language production in event-related brain potentials: Description and correction. *Brain Topography*, 29(6), 791–813. https://doi.org/10.1007/s10548-016-0515-1 130 | .. [#] Frömer, R., Maier, M., & Abdel Rahman, R. (2018). Group-level EEG-processing pipeline for flexible single trial-based analyses including linear mixed models. *Frontiers in Neuroscience*, 12, 48. https://doi.org/10.3389/fnins.2018.00048 131 | -------------------------------------------------------------------------------- /doc/source/processing_tfr.rst: -------------------------------------------------------------------------------- 1 | Time-frequency analysis 2 | ======================= 3 | 4 | *Work in progress* 5 | -------------------------------------------------------------------------------- /doc/source/quickstart_py.rst: -------------------------------------------------------------------------------- 1 | Quickstart 2 | ========== 3 | 4 | The pipeline provides a single high-level function, ``group_pipeline()``, to carry out a full EEG analysis on a group of participants. 5 | 6 | Here is a fairly minimal example for a (fictional) N400/P600 experiment with two experimental factors: ``semantics`` (e.g., related versus unrelated words) and emotional ``context`` (e.g., emotionally negative versus neutral). 7 | 8 | .. code-block:: python 9 | 10 | from pipeline import group_pipeline 11 | 12 | trials, evokeds, config = group_pipeline( 13 | raw_files='Results/EEG/raw', 14 | log_files='Results/RT', 15 | output_dir='Results/EEG/export', 16 | besa_files='Results/EEG/cali', 17 | triggers=[201, 202, 211, 212], 18 | skip_log_conditions={'semantics': 'filler'}, 19 | components={ 20 | 'name': ['N400', 'P600'], 21 | 'tmin': [0.3, 0.5], 22 | 'tmax': [0.5, 0.9], 23 | 'roi': [['C1', 'Cz', 'C2', 'CP1', 'CPz', 'CP2'], 24 | ['Fz', 'FC1', 'FC2', 'C1', 'Cz', 'C2']]}, 25 | average_by={ 26 | 'related_negative': 'semantics == "related" and context == "negative"', 27 | 'related_neutral': 'semantics == "related" and context == "neutral"', 28 | 'unrelated_negative': 'semantics == "unrelated" and context == "negative"', 29 | 'unrelated_neutral': 'semantics == "unrelated" and context == "neutral"'}}) 30 | 31 | In this example we have specified: 32 | 33 | - ``raw_files``, ``log_files``, ``output_dir``, ``besa_files``: The paths to the raw EEG data, to the behavioral log files, to the desired output directory, and to the BESA files for ocular correction 34 | 35 | - ``triggers``: The four different numerical EEG trigger codes corresponding to each of the four cells in the 2 × 2 design 36 | 37 | - ``skip_log_conditions``: Our log files may contain additional trials from a "filler" condition without corresponding EEG trials/triggers. These filler trials are marked with the condition label ``'filler'`` in the log file column ``semantics`` 38 | 39 | - ``components``: The *a priori* defined time windows and regions of interest for the relevant ERP components (N400 and P600) 40 | 41 | - ``average_by``: The relevant groupings of trials for which by-participant averaged waveforms should be created. The keys (e.g., ``'related_negative'``) are custom labels of our choice; the values are the corresponding logical conditions that must be met for a trial to be included in the average. 42 | 43 | For (way) more options, see :doc:`Pipeline inputs `. 44 | -------------------------------------------------------------------------------- /doc/source/quickstart_r.rst: -------------------------------------------------------------------------------- 1 | Quickstart 2 | ========== 3 | 4 | The pipeline provides a single high-level function, ``group_pipeline()``, to carry out a full EEG analysis on a group of participants. 5 | 6 | Here is a fairly minimal example for a (fictional) N400/P600 experiment with two experimental factors: ``semantics`` (e.g., related versus unrelated words) and emotional ``context`` (e.g., emotionally negative versus neutral). 7 | 8 | .. code-block:: r 9 | 10 | pipeline <- reticulate::import("pipeline") 11 | 12 | res <- pipeline$group_pipeline( 13 | raw_files = "Results/EEG/raw", 14 | log_files = "Results/RT", 15 | output_dir = "Results/EEG/export", 16 | besa_files = "Results/EEG/cali", 17 | triggers = c(201, 202, 211, 212), 18 | skip_log_conditions = list("semantics" = "filler"), 19 | components = list( 20 | "name" = list("N400", "P600"), 21 | "tmin" = list(0.3, 0.5), 22 | "tmax" = list(0.5, 0.9), 23 | "roi" = list( 24 | c("C1", "Cz", "C2", "CP1", "CPz", "CP2"), 25 | c("Fz", "FC1", "FC2", "C1", "Cz", "C2") 26 | ) 27 | ), 28 | average_by = list( 29 | "related_negative" = "semantics == 'related' & context == 'negative'", 30 | "related_neutral" = "semantics == 'related' & context == 'neutral'", 31 | "unrelated_negative" = "semantics == 'unrelated' & context == 'negative'", 32 | "unrelated_neutral" = "semantics == 'unrelated' & context == 'neutral'" 33 | ) 34 | ) 35 | 36 | trials <- res[[1]] 37 | evokeds <- res[[2]] 38 | config <- res[[3]] 39 | 40 | In this example we have specified: 41 | 42 | - ``raw_files``, ``log_files``, ``output_dir``, ``besa_files``: The paths to the raw EEG data, to the behavioral log files, to the desired output directory, and to the BESA files for ocular correction 43 | 44 | - ``triggers``: The four different numerical EEG trigger codes corresponding to each of the four cells in the 2 × 2 design 45 | 46 | - ``skip_log_conditions``: Our log files may contain additional trials from a "filler" condition without corresponding EEG trials/triggers. These filler trials are marked with the condition label ``'filler'`` in the log file column ``semantics`` 47 | 48 | - ``components``: The *a priori* defined time windows and regions of interest for the relevant ERP components (N400 and P600) 49 | 50 | - ``average_by``: The relevant groupings of trials for which by-participant averaged waveforms should be created. The keys (e.g., ``"related_negative"``) are custom labels of our choice; the values are the corresponding logical conditions that must be met for a trial to be included in the average. 51 | 52 | For (way) more options, see :doc:`Pipeline inputs `. 53 | -------------------------------------------------------------------------------- /doc/source/reference_py.rst: -------------------------------------------------------------------------------- 1 | Function reference 2 | ================== 3 | 4 | EEG processing 5 | -------------- 6 | 7 | .. autosummary:: 8 | :toctree: generated 9 | 10 | pipeline.group_pipeline 11 | pipeline.participant_pipeline 12 | 13 | Sample datasets 14 | --------------- 15 | 16 | .. autosummary:: 17 | :toctree: generated 18 | 19 | pipeline.datasets.get_erpcore 20 | pipeline.datasets.get_ucap 21 | -------------------------------------------------------------------------------- /doc/source/references.bib: -------------------------------------------------------------------------------- 1 | @article{fromer2018, 2 | title = {Group-Level {{EEG-processing}} Pipeline for Flexible Single Trial-Based Analyses Including Linear Mixed Models}, 3 | author = {Fr{\"o}mer, Romy and Maier, Martin and Abdel Rahman, Rasha}, 4 | year = {2018}, 5 | journal = {Frontiers in Neuroscience}, 6 | volume = {12}, 7 | pages = {48}, 8 | publisher = {{Frontiers}}, 9 | doi = {10.3389/fnins.2018.00048}, 10 | urldate = {2021-11-06} 11 | } 12 | 13 | @article{kappenman2021, 14 | title = {{{ERP CORE}}: {{An}} Open Resource for Human Event-Related Potential Research}, 15 | author = {Kappenman, Emily S. and Farrens, Jaclyn L. and Zhang, Wendy and Stewart, Andrew X. and Luck, Steven J.}, 16 | year = {2021}, 17 | journal = {NeuroImage}, 18 | volume = {225}, 19 | pages = {117465}, 20 | doi = {10.1016/j.neuroimage.2020.117465}, 21 | urldate = {2022-10-04}, 22 | langid = {english} 23 | } 24 | -------------------------------------------------------------------------------- /doc/source/tables_py/averaging.csv: -------------------------------------------------------------------------------- 1 | Argument,Description,Example 2 | "``average_by`` (recommended, default: ``None``)","Selection of (combinations of) conditions to create by-participant averages for (keys = custom condition labels, values = `Pandas query `_)","``{'neg_unrel': 'context == ""negative"" & semantics == ""unrelated"" & rt < 3000', ...}``" 3 | -------------------------------------------------------------------------------- /doc/source/tables_py/epoching.csv: -------------------------------------------------------------------------------- 1 | Argument,Description,Example 2 | "``triggers`` (recommended, default: ``None``)",Numerical EEG triggers for events of interest,"``[201, 202]``" 3 | ``triggers_column`` (default: ``None``),Name of log file column with EEG triggers for automatic matching,``'trigger'`` 4 | ``epochs_tmin`` (default: ``-0.5``),Start of epochs relative to event onset (in s),``-0.5`` 5 | ``epochs_tmax`` (default: ``1.5``),End of epochs relative to event onset (in s),``1.5`` 6 | "``baseline`` (default: ``(-0.2, 0.0)``)",Time window for baseline correction (in s) or,"``(-0.2, 0.0)``" 7 | ,Use entire prestimulus interval or,"``(None, 0.0)``" 8 | ,Do not perform baseline correction,``None`` 9 | ``reject_peak_to_peak`` (default: ``200.0``),Peak-to-peak threshold for rejecting epochs (in µV) or,``200.0`` 10 | ,Do not reject epochs based on peak-to-peak amplitude,``None`` 11 | "``components`` (recommended, default: ``None``)",Definition of single trial ERP components of interest,"``{'name': ['P1', 'N170'], 'tmin': [0.08, 0.15], 'tmax': [0.13, 0.2], 'roi': [['PO3', ...], ['P7', ...]]}``" 12 | -------------------------------------------------------------------------------- /doc/source/tables_py/inputs.csv: -------------------------------------------------------------------------------- 1 | Argument,Description,Example 2 | ``vhdr_files`` (required),List of raw EEG file paths,"``['raw/Vp01.vhdr', 'raw/Vp02.vhdr', ...]``" 3 | ,Nested list if (some) participants have multiple EEG files or,"``[['raw/Vp01_a.vhdr', 'raw/Vp01_b.vhdr'], 'raw/Vp02.vhdr', ...]``" 4 | ,Directory path with raw EEG files,``'raw'`` 5 | ``log_files`` (required),List of behavioral log file paths,"``['log/Vp01.txt', 'log/Vp02.txt', ...]``" 6 | ,Directory of raw EEG files,``'log'`` 7 | -------------------------------------------------------------------------------- /doc/source/tables_py/outputs.csv: -------------------------------------------------------------------------------- 1 | Argument,Description,Example 2 | ``output_dir`` (required),Output directory,``'data/output'`` 3 | ``clean_dir`` (default: ``None``),Cleaned (continuous) data output directory,``'data/clean'`` 4 | ``epochs_dir`` (default: ``None``),Epoched data output directory,``'data/clean'`` 5 | ``report_dir`` (default: ``None``),HTML quality control report output directory,``'data/reports'`` 6 | ``to_df`` (default: ``True``),Save outputs as data frames with comma-separated values or,``True`` 7 | ,Save outputs as MNE-Python (``.fif``) files or,``False`` 8 | ,Save outputs as data frames *and* MNE-Python files,``'both'`` 9 | -------------------------------------------------------------------------------- /doc/source/tables_py/performance.csv: -------------------------------------------------------------------------------- 1 | Argument,Description,Example 2 | ``n_jobs`` (default: ``1``),"Number of jobs (i.e., participants) to be processed in parallel","``4`` or ``-1`` (i.e., use all CPUs)" 3 | -------------------------------------------------------------------------------- /doc/source/tables_py/perm.csv: -------------------------------------------------------------------------------- 1 | Argument,Description,Example 2 | ``perm_contrasts`` (default: ``None``),Contrast(s) between condition labels (see ``average_by``) to compute `cluster-based permutation tests `_ for,"``[('related', 'unrelated')]``" 3 | ``perm_tmin`` (default: ``0.0``),Start of time window (in s relative to stimulus onset) for restricting the permutation test,"``0.0`` or ``None`` (i.e., use entire epoch)" 4 | ``perm_tmax`` (default: ``1.0``),End of time window (in s relative to stimulus onset) for restricting the permutation test,"``1.0`` or ``None`` (i.e., use entire epoch)" 5 | ``perm_channels`` (default: ``None``),Selection of channels for restricting the permutation test,"``['C1', 'Cz', 'C2', ...]`` or ``None`` (i.e., use all channels)" 6 | ``perm_fmin`` (default: ``None``),Lowest frequency (in Hz) for restricting the permutation test (event-related power only),"``8.0`` or ``None`` (i.e., use all frequencies)" 7 | ``perm_fmax`` (default: ``None``),Highest frequency (in Hz) for restricting the permutation test (event-related power only),"``30.0`` or ``None`` (i.e., use all frequencies)" 8 | -------------------------------------------------------------------------------- /doc/source/tables_py/preprocessing.csv: -------------------------------------------------------------------------------- 1 | Argument,Description,Example 2 | ``downsample_sfreq`` (default: ``None``),Downsample to lower sampling rate or,``250.0`` 3 | ,Do not downsample,``None`` 4 | ``veog_channels`` (default: ``'auto'``),Construct bipolar VEOG from two EEG or EOG channels or,"``['Fp1', 'IO1']``" 5 | ,Construct VEOG from default channels or,``'auto'`` 6 | ,Do not construct a new VEOG channel,``None`` 7 | ``heog_channels`` (default: ``'auto'``),Construct bipolar HEOG from two EEG or EOG channels or,"``['F9', 'F10']``" 8 | ,Construct HEOG from default channels or,``'auto'`` 9 | ,Do not construct a new HEOG channel,``None`` 10 | ``montage`` (default: ``'easycap-M1'``),`Standard EEG montage name `_ or,``'easycap-M1'`` 11 | ,`Custom EEG montage file path `_,``'data/chanlocs.elc'`` 12 | ``ref_channels`` (default: ``'average'``),List of channels to re-reference EEG channels to or,"``['M1', 'M2']``" 13 | ,Re-reference EEG channels to an average reference or,``'average'`` 14 | ,Use the `Reference Electrode Standardization Technique (REST) `_,``'REST'`` 15 | ``bad_channels`` (default: ``None``),Lists of bad channels for each participant or,"``[['P7', 'Oz'], ['Fp2'], ...]``" 16 | ,Dict with participant labels and their list of bad channels or,"``{'Vp05': ['PO8', 'O2'], ...}``" 17 | ,Auto-detect bad channels based on standard error across epochs or,``'auto'`` 18 | ,Don't interpolate any bad channels,``None`` 19 | ``besa_files`` (default: ``None``),Directory of BESA/MSEC correction matrix files or,``'data/cali'`` 20 | ,List of BESA/MSEC correction matrix file paths or,"``['data/cali/Vp01.matrix', 'data/cali/Vp02.matrix', ...]``" 21 | ,Don't use BESA/MSEC ocular correction,``None`` 22 | ``ica_method`` (default: ``None``),`ICA method `_ or,``'fastica'`` or ``'infomax'`` or ``'picard'`` 23 | ,Don't apply ICA,``None`` 24 | ``ica_n_components`` (default: ``None``),Number of ICA components to use or,``15`` 25 | ,Proportion of variance explained by ICA components or,``0.99`` 26 | ,Use (`almost `_) all possible ICA components,``None`` 27 | ``highpass_freq`` (default: ``0.1``),High-pass filter cutoff frequency or,``0.1`` 28 | ,Do not apply high-pass filter,``None`` 29 | ``lowpass_freq`` (default: ``40.0``),Low-pass filter cutoff frequency or,``40.0`` 30 | ,Do not apply low-pass filter,``None`` 31 | -------------------------------------------------------------------------------- /doc/source/tables_py/ride.csv: -------------------------------------------------------------------------------- 1 | Argument,Description,Example 2 | ``perform_ride`` (default: ``False``),"Enable RIDE for speech-artifact correction (`Ouyang et al, 2015 `_)", ``True`` or ``False`` 3 | ``ride_condition_column`` (default: ``None``),Name of log file column coding each experimental condition (may be identical to triggers_column),``'ride_condition'`` 4 | ``ride_rt_column`` (default: ``'RT'``),Name of log file column containing the voice (key) onset times,``'RT'`` 5 | "``ride_s_twd`` (default: ``(0.0, 0.6)``)",Time window in which stimulus-locked component should occur (in s),"``(0.0, 0.6)``" 6 | "``ride_r_twd`` (default: ``(-0.3, 0.3)``)",Time window around RT (voice onset time) in wich response-locked component should occur (in s). Default is `'supposed to be efficient to cover RT-locked component cluster' `_,"``(-0.3, 0.3)``" 7 | ``ride_epochs_tmin_after_ride`` (default: ``None``),Crop epochs t_min and t_max if longer epochs needed for ride (in s) (usual analyses time window can be used after RIDE) or,``-0.5`` 8 | ,Do not crop time window after RIDE,``None`` 9 | ``ride_epochs_tmax_after_ride`` (default: ``None``),Crop epochs t_min and t_max if longer epochs needed for ride (in s) or,``1.5`` 10 | ,Do not crop time window after RIDE,``None`` 11 | ,Note: ``epochs_tmin`` and ``epochs_tmax`` must be adjusted to contain entire speech artifact (min 1s)!, 12 | ``reject_peak_to_peak`` (default: ``None``),Peak-to-peak threshold for rejecting epochs after RIDE correction (in µV) or,``200.0`` 13 | ,Do not re-reject epochs after RIDE,``None`` 14 | ,Note: you may use a relatively lenient ``reject_peak_to_peak`` threshold before RIDE and a more stringent one here (see :ref:`ride-details`), 15 | -------------------------------------------------------------------------------- /doc/source/tables_py/tfr.csv: -------------------------------------------------------------------------------- 1 | Argument,Description,Example 2 | ``perform_tfr`` (default: ``False``),Enable time-frequency analysis,``True`` or ``False`` 3 | ``tfr_subtract_evoked`` (default: ``False``),Subtract evoked activity from epochs before time-frequency analysis,``True`` or ``False`` 4 | "``tfr_freqs`` (default: ``np.linspace(4.0, 40.0, num=37)``)",Frequencies for the family of Morlet wavelets,"``np.arange(6.0, 41.0, step=2.0)``" 5 | "``tfr_cycles`` (default: ``np.linspace(2.0, 20.0, num=37)``)",Numbers of cycles for the family of Morlet wavelets,"``np.arange(2.0, 21.0, step=1.0)``" 6 | ``tfr_mode`` (default: ``'percent'``),"`Method for divisive baseline correction `_ of event-related power using the full epoch interval (`Delorme & Grandchamp, 2012 `_)",``'percent'`` or ``'ratio'`` or ``'logratio'`` or ``'zscore'`` or ``'zlogratio'`` or ``None`` 7 | "``tfr_baseline`` (default: ``(-0.45, -0.05)``)",Time window for subtractive baseline correction of event-related power,"``(-0.45, -0.05)``" 8 | ``tfr_components`` (default: ``None``),"Similar to ``components``, the definition of single trial event-related power bands of interest","``{'name': ['alpha'], 'tmin': [0.05], 'tmax': [0.25], 'fmin': [8.0], 'fmax': [13.0], 'roi': [['PO9', ...]]}``" 9 | -------------------------------------------------------------------------------- /pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | """Single trial EEG pipeline at the Abdel Rahman Lab for Neurocognitive 2 | Psychology, Humboldt-Universität zu Berlin""" 3 | 4 | # Import submodules 5 | from . import datasets 6 | 7 | # Make central functions available as top level imports 8 | from .group import group_pipeline 9 | from .participant import participant_pipeline 10 | 11 | # Get current package version 12 | try: 13 | from ._version import version as __version__ 14 | from ._version import version_tuple 15 | except ImportError: 16 | __version__ = "unknown version" 17 | version_tuple = (0, 0, "unknown version") 18 | -------------------------------------------------------------------------------- /pipeline/averaging.py: -------------------------------------------------------------------------------- 1 | from warnings import warn 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from mne import Epochs, Evoked, grand_average 6 | from mne.time_frequency import AverageTFR, EpochsTFR 7 | 8 | 9 | def compute_evokeds(epochs, average_by=None, bad_ixs=[], participant_id=None): 10 | """Computes condition averages (evokeds) based on triggers or columns.""" 11 | 12 | # Average by triggers in case no log file columns were provided 13 | if average_by is None: 14 | all_evokeds, all_evokeds_df = compute_evokeds_triggers( 15 | epochs, bad_ixs, participant_id) 16 | elif isinstance(average_by, dict): 17 | all_evokeds, all_evokeds_df = compute_evokeds_queries( 18 | epochs, average_by, bad_ixs, participant_id) 19 | else: 20 | warn('Passing a list of column names to `average_by` will ' + 21 | 'be deprecated in a future version of the pipeline. ' + 22 | 'Please use a dict of labels and log file queries ' + 23 | 'instead (see https://github.com/alexenge/hu-neuro-pipeline/blob/main/docs/inputs.md#average_by-recommended-default-none)') 24 | all_evokeds, all_evokeds_df = compute_evokeds_cols( 25 | epochs, average_by, bad_ixs, participant_id) 26 | 27 | return all_evokeds, all_evokeds_df 28 | 29 | 30 | def compute_evokeds_triggers(epochs, bad_ixs=[], participant_id=None): 31 | """Computes condition averages (evokeds) based on triggers.""" 32 | 33 | # Get indices of good epochs 34 | good_ixs = [ix for ix in range(len(epochs)) if ix not in bad_ixs] 35 | 36 | # Prepare emtpy lists 37 | all_evokeds = [] 38 | all_evokeds_dfs = [] 39 | 40 | # Compute evokeds 41 | epochs_good = epochs.copy()[good_ixs] 42 | evokeds = average_by_events(epochs_good) 43 | all_evokeds = all_evokeds + evokeds 44 | 45 | # Convert to DataFrame 46 | evokeds_df = create_evokeds_df(evokeds, participant_id=participant_id) 47 | all_evokeds_dfs.append(evokeds_df) 48 | 49 | # Combine DataFrames 50 | all_evokeds_df = pd.concat(all_evokeds_dfs, ignore_index=True) 51 | 52 | return all_evokeds, all_evokeds_df 53 | 54 | 55 | def compute_evokeds_queries(epochs, queries, bad_ixs=[], participant_id=None): 56 | """Computes condition averages (evokeds) based on log file queries.""" 57 | 58 | # Get indices of good epochs 59 | good_ixs = [ix for ix in range(len(epochs)) if ix not in bad_ixs] 60 | 61 | # Reset index so that trials start at 0 62 | epochs.metadata.reset_index(drop=True, inplace=True) 63 | 64 | # Create evokeds for each query 65 | evokeds = [] 66 | evoked_dfs = [] 67 | for label, query in queries.items(): 68 | 69 | # Compute evokeds for trials that match the current query 70 | evoked = compute_evoked_query(epochs[good_ixs], query, label) 71 | if evoked is not None: 72 | evokeds.append(evoked) 73 | 74 | # Convert to data frame 75 | extra_cols = {'participant_id': participant_id, 76 | 'label': label, 77 | 'query': query} 78 | evoked_df = evoked_to_df(evoked, extra_cols) 79 | evoked_dfs.append(evoked_df) 80 | 81 | # Combine data frames 82 | evokeds_df = pd.concat(evoked_dfs, ignore_index=True) 83 | 84 | return evokeds, evokeds_df 85 | 86 | 87 | def compute_evoked_query(epochs, query, label): 88 | """Computes one condition average (evoked) based on a log file query.""" 89 | 90 | if len(epochs[query]) == 0: 91 | warn(f'No trials found for query "{query}" (label: "{label}"). ' + 92 | 'This condition for this participant won\'t be included in the ' + 93 | 'evokeds and grand averages.') 94 | return None 95 | 96 | # Compute evokeds based on ERP or TFR epochs 97 | if isinstance(epochs, EpochsTFR): 98 | evoked = epochs[query].average() 99 | else: # `EpochsTFR.average()` has no `picks` argument 100 | evoked = epochs[query].average(picks=['eeg', 'misc']) 101 | evoked.comment = label 102 | 103 | return evoked 104 | 105 | 106 | def evoked_to_df(evoked, extra_cols={}): 107 | """Converts MNE's Evoked or AverageTFR to a pandas data frame.""" 108 | 109 | # Convert to data frame 110 | if isinstance(evoked, AverageTFR): 111 | evoked_df = evoked.to_data_frame() 112 | else: # `AverageTFR.to_data_frame()` has no `scalings` argument 113 | evoked_df = \ 114 | evoked.to_data_frame(scalings={'eeg': 1e6, 'misc': 1e6}) 115 | 116 | # Optionally add extra columns 117 | for column, value in reversed(extra_cols.items()): 118 | evoked_df.insert(0, column, value) 119 | 120 | return evoked_df 121 | 122 | 123 | def compute_evokeds_cols( 124 | epochs, average_by=None, bad_ixs=[], participant_id=None): 125 | """Computes condition averages (evokeds) based on log file columns.""" 126 | 127 | # Make sure that provided values are stored in a list 128 | if isinstance(average_by, str): 129 | average_by = [average_by] 130 | 131 | # Get indices of good epochs 132 | good_ixs = [ix for ix in range(len(epochs)) if ix not in bad_ixs] 133 | 134 | # Prepare emtpy lists 135 | all_evokeds = [] 136 | all_evokeds_dfs = [] 137 | 138 | # Iterate over the provided main effects and interactions 139 | for cols in average_by: 140 | 141 | # Parse interaction effects into a list 142 | cols = cols.split('/') 143 | 144 | # Compute evokeds 145 | epochs_update = update_events(epochs, cols)[good_ixs] 146 | evokeds = average_by_events(epochs_update) 147 | all_evokeds = all_evokeds + evokeds 148 | 149 | # Convert to DataFrame 150 | trials = epochs_update.metadata 151 | evokeds_df = create_evokeds_df( 152 | evokeds, cols, trials, participant_id) 153 | 154 | # Append info about averaging 155 | value = '/'.join(cols) 156 | evokeds_df.insert(loc=1, column='average_by', value=value) 157 | all_evokeds_dfs.append(evokeds_df) 158 | 159 | # Combine DataFrames 160 | all_evokeds_df = pd.concat(all_evokeds_dfs, ignore_index=True) 161 | 162 | # Move condition columns back to the front 163 | # They might have been moved to the end while concatenating 164 | if average_by is not None: 165 | time_ix = all_evokeds_df.columns.get_loc('time') 166 | for cols in reversed(average_by): 167 | if not '/' in cols: 168 | all_evokeds_df.insert( 169 | time_ix - 1, column=cols, value=all_evokeds_df.pop(cols)) 170 | 171 | # Convert NaNs to empty strings so that R can represent them 172 | all_evokeds_df[cols] = all_evokeds_df[cols].fillna('') 173 | 174 | return all_evokeds, all_evokeds_df 175 | 176 | 177 | def average_by_events(epochs, method='mean'): 178 | """Create a list of evokeds from epochs, one per event type.""" 179 | 180 | # Pick channel types for ERPs 181 | # The `average` method for `EpochsTFR` doesn't support `picks` 182 | picks_dict = {'picks': ['eeg', 'misc']} \ 183 | if isinstance(epochs, Epochs) else {} 184 | 185 | # Loop over event types and average 186 | # TODO: Use MNE built-in argument `by_event_type` once it's in `EpochsTFR` 187 | evokeds = [] 188 | for event_type in epochs.event_id.keys(): 189 | evoked = epochs[event_type].average(**picks_dict, method=method) 190 | evoked.comment = event_type 191 | evokeds.append(evoked) 192 | 193 | return evokeds 194 | 195 | 196 | def update_events(epochs, cols): 197 | """Updates the events/event_id structures using cols from the metadata.""" 198 | 199 | # Generate event codes for the relevant columns 200 | cols_df = pd.DataFrame(epochs.metadata[cols]) 201 | cols_df = cols_df.astype('str') 202 | ids = cols_df.agg('/'.join, axis=1) 203 | codes = ids.astype('category').cat.codes 204 | 205 | # Create copy of the data with the new event codes 206 | epochs_update = epochs.copy() 207 | epochs_update.events[:, 2] = codes 208 | epochs_update.event_id = dict(zip(ids, codes)) 209 | 210 | return epochs_update 211 | 212 | 213 | def create_evokeds_df(evokeds, cols=None, trials=None, participant_id=None): 214 | """Converts mne.Evoked into a pd.DataFrame with metadata.""" 215 | 216 | # Convert ERP amplitudes from volts to microvolts 217 | # The `to_data_frame` method for `AverageTFR` doesn't support `scalings` 218 | scalings_dict = {'scalings': {'eeg': 1e6, 'misc': 1e6}} \ 219 | if isinstance(evokeds[0], Evoked) else {} 220 | 221 | # Convert all evokeds to a single DataFrame 222 | evokeds_dfs = [evoked.to_data_frame(**scalings_dict, time_format=None) 223 | for evoked in evokeds] 224 | evokeds_df = pd.concat(evokeds_dfs, ignore_index=True) 225 | 226 | # Optionally add columns from the metadata 227 | repeats = len(evokeds_df) 228 | if cols is not None: 229 | assert trials is not None, 'Must provide trials (metadata) with cols' 230 | cols_df = pd.DataFrame(trials[cols]) 231 | cols_df = cols_df.astype('str') 232 | cols_df = cols_df.drop_duplicates() 233 | repeats = len(evokeds_df) / len(cols_df) 234 | cols_df = cols_df.loc[cols_df.index.repeat(repeats)] 235 | cols_df = cols_df.reset_index(drop=True) 236 | evokeds_df = pd.concat([cols_df, evokeds_df], axis=1) 237 | 238 | # Otherwise add comments from evokeds (assumed to contain event IDs) 239 | else: 240 | comments = [evoked.comment for evoked in evokeds] 241 | repeats = len(evokeds_df) / len(comments) 242 | comments = np.repeat(comments, repeats) 243 | evokeds_df.insert(loc=0, column='event_id', value=comments) 244 | 245 | # Optionally add participant_id 246 | if participant_id is not None: 247 | evokeds_df.insert(loc=0, column='participant_id', value=participant_id) 248 | 249 | return evokeds_df 250 | 251 | 252 | def compute_grands(evokeds_per_participant): 253 | """Averages evokeds of all participants into grand averages.""" 254 | 255 | # Average across participants for each condition 256 | evokeds_per_condition = list(map(list, zip(*evokeds_per_participant))) 257 | grands = [grand_average(x) for x in evokeds_per_condition] 258 | 259 | # Add meaningful comments 260 | comments = [x[0].comment for x in evokeds_per_condition] 261 | for grand, comment in zip(grands, comments): 262 | grand.comment = comment 263 | 264 | return grands 265 | 266 | 267 | def compute_grands_df(evokeds_df): 268 | """Averages evoked DataFrames of all participants into grand averages.""" 269 | 270 | # Get indices of columns to group by (conditions, times, frequencies) 271 | first_grouping_ix = 1 # Column 0 is participant_id (to average over) 272 | last_grouping_col = 'freq' if 'freq' in evokeds_df.columns else 'time' 273 | last_grouping_ix = evokeds_df.columns.get_loc(last_grouping_col) 274 | grouping_ixs = range(first_grouping_ix, last_grouping_ix + 1) 275 | 276 | # Average by grouping columns 277 | group_cols = list(evokeds_df.columns[grouping_ixs]) 278 | grands_df = evokeds_df.groupby( 279 | group_cols, dropna=False).mean(numeric_only=True) 280 | 281 | # Convert conditions from index back to columns 282 | grands_df = grands_df.reset_index() 283 | 284 | return grands_df 285 | -------------------------------------------------------------------------------- /pipeline/boilerplate.py: -------------------------------------------------------------------------------- 1 | from platform import python_version 2 | from statistics import mean, median 3 | 4 | import mne 5 | 6 | 7 | def boilerplate(config): 8 | """Auto-creates part of the methods section based on pipeline options.""" 9 | 10 | # Prepare empty lists 11 | boilerplate = [] 12 | references = [] 13 | 14 | # Pipeline intro 15 | text = ( 16 | 'The continuous EEG from {n_participants} participants was processed ' 17 | 'offline using the single trial EEG pipeline proposed by Frömer et ' 18 | 'al. (2018). The pipeline was implemented with the packages MNE ' 19 | '(Version {mne_version}; Gramfort et al., 2013) and STEP-MNE ' 20 | '({step_mne_url}) for Python (Version {python_version}; Van Rossum & ' 21 | 'Drake, 2009).\n') 22 | text = text.format( 23 | n_participants=len(config['raw_files']), 24 | mne_version=mne.__version__, 25 | step_mne_url='https://github.com/alexenge/step-mne', # TODO: Add version 26 | python_version=python_version() 27 | ) 28 | boilerplate.append(text) 29 | references.append( 30 | 'Frömer, R., Maier, M., & Abdel Rahman, R. (2018). Group-level EEG-' 31 | 'processing pipeline for flexible single trial-based analyses ' 32 | 'including linear mixed models. Frontiers in Neuroscience, 12, 48. ' 33 | 'https://doi.org/10.3389/fnins.2018.00048') 34 | references.append( 35 | 'Gramfort, A., Luessi, M., Larson, E., Engemann, D. A., Strohmeier, ' 36 | 'D., Brodbeck, C., Goj, R., Jas, M., Brooks, T., Parkkonen, L., & ' 37 | 'Hämäläinen, M. (2013). MEG and EEG data analysis with MNE-Python. ' 38 | 'Frontiers in Neuroscience, 7. ' 39 | 'https://doi.org/10.3389/fnins.2013.00267') 40 | references.append( 41 | 'Van Rossum, G., & Drake, F. L. (2009). Python 3 reference manual. ' 42 | 'CreateSpace.') 43 | 44 | # Downsampling 45 | if config['downsample_sfreq'] is not None: 46 | text = ( 47 | 'The data from each participant were downsampled to ' 48 | '{downsample_sfreq} Hz. ' 49 | ) 50 | text = text.format(downsample_sfreq = int(config['downsample_sfreq'])) 51 | boilerplate.append(text) 52 | 53 | # Bad channels 54 | ns_bads = [len(l) for l in config['bad_channels']] 55 | if sum(ns_bads) > 0: 56 | if config['bad_channels'] == 'auto': 57 | text = ( 58 | 'An average of {mean_bads} EEG channels per participant ' 59 | '(Mdn = {mdn_bads}, range {min_bads} to {max_bads}) were ' 60 | 'automatically flagged for bad data quality. Channels were ' 61 | 'flagged as bad if their inclusion would have led to the ' 62 | 'rejection of at least 5% of all available epochs for the ' 63 | 'participant, according to the artifact rejection threshold ' 64 | 'defined below. The signal for these channels was replaced by ' 65 | 'the signal of the neighboring channels using spherical ' 66 | 'spline interpolation (Perrin et al., 1989). ' 67 | ) 68 | else: 69 | text = ( 70 | 'An average of {mean_bads} EEG channels per participant ' 71 | '(Mdn = {mdn_bads}, range {min_bads} to {max_bads}) were ' 72 | 'manually flagged for bad data quality. The signal for these ' 73 | 'channels was replaced by the signal of the neighboring ' 74 | 'channels using spherical spline interpolation (Perrin et ' 75 | 'al., 1989). ' 76 | ) 77 | text = text.format( 78 | mean_bads = '{:.1f}'.format(mean(ns_bads)), 79 | mdn_bads = median(ns_bads), 80 | min_bads = min(ns_bads), 81 | max_bads = max(ns_bads) 82 | ) 83 | boilerplate.append(text) 84 | references.append( 85 | 'Perrin, F., Pernier, J., Bertrand, O., & Echallier, J. F. ' 86 | '(1989). Spherical splines for scalp potential and current ' 87 | 'density mapping. Electroencephalography and Clinical ' 88 | 'Neurophysiology, 72(2), 184–187. ' 89 | 'https://doi.org/10.1016/0013-4694(89)90180-6' 90 | ) 91 | else: 92 | boilerplate.append( 93 | 'No EEG channels were excluded or interpolated based on bad data ' 94 | 'quality. ' 95 | ) 96 | 97 | # Re-referencing 98 | boilerplate.append( 99 | 'Next, the data were re-referenced to the common average of all EEG ' 100 | 'channels.' 101 | ) 102 | 103 | # Ocular correction 104 | if config['ica_method'] is not None: 105 | boilerplate.append( 106 | 'Artifacts resulting from blinks and eye movements were corrected ' 107 | 'using independent component analysis (ICA). For this, we ' 108 | 'temporarily low-pass filtered the data at 1 Hz and extracted the ' 109 | 'first 15 independent components using the FastICA algorithm ' 110 | '(Hyvärinen, 1999). Components were then removed automatically ' 111 | 'using `find_bads_eog` function in MNE-Python). This function ' 112 | 'iteratievly removes components if they are signifcantly ' 113 | 'correlated (z > 3.0) with either of two virtual EOG channels ' 114 | '(VEOG: Fp1 - IO1, HEOG: F9 - F10).' 115 | ) 116 | references.append( 117 | 'Hyvärinen, A. (1999). Fast and robust fixed-point algorithms for ' 118 | 'independent component analysis. IEEE Transactions on Neural ' 119 | 'Networks, 10(3), 626–634. https://doi.org/10.1109/72.761722' 120 | ) 121 | 122 | 123 | # Combine and print 124 | boilerplate = ''.join(boilerplate) 125 | print(boilerplate) 126 | -------------------------------------------------------------------------------- /pipeline/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .erpcore import get_erpcore 2 | from .ucap import get_ucap 3 | -------------------------------------------------------------------------------- /pipeline/datasets/erpcore.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from urllib.request import urlopen 4 | 5 | import pandas as pd 6 | 7 | from .utils import get_dataset 8 | 9 | LOCAL_CACHE = 'hu-neuro-pipeline/erpcore' 10 | MANIFEST_FILE = Path(__file__).parent.joinpath('erpcore_manifest.csv') 11 | 12 | OSF_IDS = {'ERN': 'q6gwp', 13 | 'LRP': '28e6c', 14 | 'MMN': '5q4xs', 15 | 'N170': 'pfde9', 16 | 'N2pc': 'yefrq', 17 | 'N400': '29xpq', 18 | 'P3': 'etdkz'} 19 | 20 | FILE_TYPE_DICT = {'eeg.set': 'raw_files', 21 | 'events.tsv': 'log_files'} 22 | 23 | 24 | def get_erpcore(component, participants=40, path=None): 25 | """Get sample data from the ERP CORE dataset. 26 | 27 | Data that are not yet available locally will be downloaded from the OSF. 28 | See :footcite:`kappenman2021` for details on the ERP CORE dataset. 29 | 30 | Parameters 31 | ---------- 32 | component : str 33 | Which ERP CORE experiment to download. Each experiment was designed to 34 | elicit one of seven common ERP components: 35 | 36 | - ``'ERN'`` (flanker task) 37 | - ``'LRP'`` (flanker task) 38 | - ``'MMN'`` (passive auditory oddball task) 39 | - ``'N170'`` (face perception task) 40 | - ``'N2pc'`` (simple visual search task) 41 | - ``'N400'`` (word pair judgment task) 42 | - ``'P3'`` (active visual oddball task) 43 | 44 | participants : int or list of str, optional 45 | Which participants to download. By default, downloads all 40 46 | participants available in the dataset. If an integer, downloads that 47 | many participants starting from the first participant. If a list of 48 | strings, downloads the participants with the given IDs (e.g., 49 | ``['sub-001', 'sub-002']``). 50 | path : str or Path, optional 51 | Local directory path to download the data to. By default, uses the 52 | user's local cache directory. An alternative way to specify the 53 | download path is to set the environment variable ``PIPELINE_DATA_DIR``. 54 | 55 | Returns 56 | ------- 57 | dict 58 | A dictionary with the file paths of the downloaded data: 59 | 60 | - ``'raw_files'``: A list with the paths of the raw EEG files 61 | (``eeg.set``) 62 | - ``'log_files'`` A list with the paths of the log files 63 | (``events.tsv``) 64 | 65 | See Also 66 | -------- 67 | pipeline.datasets.get_ucap 68 | 69 | References 70 | ---------- 71 | .. footbibliography:: 72 | """ 73 | 74 | manifest_df = pd.read_csv(MANIFEST_FILE, dtype={'participant_id': str}) 75 | manifest_df = manifest_df[manifest_df['component'] == component] 76 | 77 | osf_id = OSF_IDS[component] 78 | base_url = f'https://files.de-1.osf.io/v1/resources/{osf_id}/providers/osfstorage' 79 | 80 | return get_dataset(manifest_df, base_url, participants, path) 81 | 82 | 83 | def _write_erpcore_manifest(): 84 | """Writes a CSV table containing the file paths of the ERP CORE datasets.""" 85 | 86 | dfs = [] 87 | for component, osf_id in OSF_IDS.items(): 88 | 89 | base_url = f'https://files.de-1.osf.io/v1/resources/{osf_id}/providers/osfstorage/' 90 | 91 | bids_suffix = _find_bids_remote_path(base_url) 92 | 93 | files = _list_files(base_url, bids_suffix, exclude_dirs=['stimuli']) 94 | 95 | attributes = [file['attributes'] for file in files] 96 | df = pd.DataFrame.from_dict(attributes) 97 | 98 | df.insert(0, 'component', component) 99 | 100 | participants = df['name'].str.split('_|\.').str[0] 101 | participants = [p if p.startswith('sub') else '' for p in participants] 102 | df.insert(1, 'participant_id', participants) 103 | df = df.sort_values('participant_id') 104 | 105 | local_paths = df['materialized'].str.\ 106 | replace(f'/{component} Raw Data BIDS-Compatible/', 107 | f'erpcore/{component}/') 108 | df.insert(2, 'local_path', local_paths) 109 | 110 | hashes = df['extra'].apply(lambda x: f'md5:{x["hashes"]["md5"]}') 111 | df.insert(3, 'hash', hashes) 112 | 113 | urls = df['path'].apply(lambda x: f'{base_url}{x}') 114 | df.insert(4, 'url', urls) 115 | 116 | file_exts = df['name'].str.split('_').str[-1] 117 | file_types = file_exts.map(FILE_TYPE_DICT) 118 | df.insert(5, 'file_type', file_types) 119 | 120 | df = df[['component', 'local_path', 'url', 'hash', 'participant_id', 121 | 'file_type', 'size']] 122 | 123 | dfs.append(df) 124 | 125 | df = pd.concat(dfs, ignore_index=True) 126 | 127 | df.to_csv(MANIFEST_FILE, index=False) 128 | 129 | 130 | def _find_bids_remote_path(base_url): 131 | """Finds the BIDS directory for a given ERP CORE component dataaset.""" 132 | 133 | with urlopen(base_url) as url: 134 | files = json.loads(url.read().decode())['data'] 135 | bids_dir = [f for f in files 136 | if 'Raw Data BIDS-Compatible' 137 | in f['attributes']['name']][0] 138 | 139 | return bids_dir['attributes']['path'] 140 | 141 | 142 | def _list_files(base_url, suffix, exclude_dirs=None): 143 | """Lists files recursively in a remote directory on OSF.""" 144 | 145 | with urlopen(f'{base_url}/{suffix}') as url: 146 | files = json.loads(url.read().decode())['data'] 147 | 148 | for file in files: 149 | if file['attributes']['kind'] == 'folder': 150 | 151 | if exclude_dirs is not None \ 152 | and file['attributes']['name'] in exclude_dirs: 153 | continue 154 | 155 | new_suffix = file['attributes']['path'] 156 | files += _list_files(base_url, new_suffix, exclude_dirs) 157 | 158 | files = [file for file in files 159 | if file['attributes']['kind'] == 'file'] 160 | 161 | return files 162 | -------------------------------------------------------------------------------- /pipeline/datasets/ucap.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from urllib.request import urlopen 4 | 5 | import pandas as pd 6 | 7 | from .utils import get_dataset 8 | 9 | BASE_URL = 'https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage' 10 | MANIFEST_FILE = Path(__file__).parent.joinpath('ucap_manifest.csv') 11 | 12 | FILE_TYPE_DICT = {'vhdr': 'raw_files', 13 | 'txt': 'log_files', 14 | 'matrix': 'besa_files'} 15 | 16 | 17 | def get_ucap(participants=40, path=None): 18 | """Get sample data from the UCAP dataset. 19 | 20 | Data that are not yet available locally will be downloaded from the OSF. 21 | See :footcite:`fromer2018` for details on the UCAP dataset. 22 | 23 | Parameters 24 | ---------- 25 | participants : int or list of str, optional 26 | Which participants to download. By default, downloads all 40 27 | participants available in the dataset. If an integer, downloads that 28 | many participants starting from the first participant. If a list of 29 | strings, downloads the participants with the given IDs (e.g., 30 | ``['05', '07']``). 31 | path : str or Path, optional 32 | Local directory path to download the data to. By default, uses the 33 | user's local cache directory. An alternative way to specify the 34 | download path is to set the environment variable ``PIPELINE_DATA_DIR``. 35 | 36 | Returns 37 | ------- 38 | dict 39 | A dictionary with the file paths of the downloaded data: 40 | 41 | - ``'raw_files'``: A list with the paths of the raw EEG files 42 | (``.vhdr``) 43 | - ``'log_files'`` A list with the paths of the log files (``.txt``) 44 | - ``'besa_files'`` A list with the paths of the BESA calibration files 45 | (``.matrix``) 46 | 47 | See Also 48 | -------- 49 | pipeline.datasets.get_erpcore 50 | 51 | References 52 | ---------- 53 | .. footbibliography:: 54 | """ 55 | 56 | manifest_df = pd.read_csv(MANIFEST_FILE, dtype={'participant_id': str}) 57 | 58 | return get_dataset(manifest_df, BASE_URL, participants, path) 59 | 60 | 61 | def _write_ucap_manifest(): 62 | """Writes a CSV table containing the file paths of the UCAP dataset.""" 63 | 64 | eeg_url = '59cf07fa6c613b02958f3364/' 65 | log_url = '59cf12259ad5a102cc5c4b93/' 66 | cali_url = '59cf089e6c613b02968f5724/' 67 | 68 | files = [] 69 | for url in [eeg_url, log_url, cali_url]: 70 | with urlopen(f'{BASE_URL}/{url}') as url: 71 | files += json.loads(url.read().decode())['data'] 72 | 73 | attributes = [file['attributes'] for file in files] 74 | 75 | df = pd.DataFrame.from_dict(attributes) 76 | 77 | participants = df['name'].str.split('_|\.').str[0].str.zfill(2) 78 | 79 | n_expected_files = 5 # Complete participants have 3 x EEG, 1 x log, 1 x cali 80 | n_files = participants.value_counts() 81 | good_participant_ids = n_files[n_files == n_expected_files].index.to_list() 82 | 83 | df.insert(0, 'participant_id', participants) 84 | df = df.sort_values(by=['participant_id', 'name']) 85 | df = df[df['participant_id'].isin(good_participant_ids)] 86 | 87 | local_paths = df['materialized'].str.replace('/UCAP/Data/', 'ucap/') 88 | df.insert(1, 'local_path', local_paths) 89 | 90 | hashes = df['extra'].apply(lambda x: f'md5:{x["hashes"]["md5"]}') 91 | df.insert(2, 'hash', hashes) 92 | 93 | urls = df['path'].apply(lambda x: f'{BASE_URL}{x}') 94 | df.insert(3, 'url', urls) 95 | 96 | file_exts = df['name'].apply(lambda x: Path(x).suffix[1:]) 97 | file_types = file_exts.map(FILE_TYPE_DICT) 98 | df.insert(4, 'file_type', file_types) 99 | 100 | df = df[['local_path', 'url', 'hash', 'participant_id', 101 | 'file_type', 'size']] 102 | 103 | df.to_csv(MANIFEST_FILE, index=False) 104 | -------------------------------------------------------------------------------- /pipeline/datasets/ucap_manifest.csv: -------------------------------------------------------------------------------- 1 | local_path,url,hash,participant_id,file_type,size 2 | ucap/raw/05.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf3ae2594d9002c47faefd,md5:5c30e88ce15a64f908e81ea4ad75d138,05,,268325120 3 | ucap/raw/05.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf16ec6c613b02968f5acc,md5:8551c4d64f53c239500eb78c063decc1,05,raw_files,11635 4 | ucap/raw/05.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf16ff9ad5a102cb5c38ba,md5:75f0b138b1f9219e21732c1dacbccaf6,05,,391137 5 | ucap/cali/05_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d3c9ad5a102cc5c4df7,md5:330dc47e0e05d9882a438ae4638a6e8f,05,besa_files,47335 6 | ucap/log/5_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1bbeb83f6902b3a664a3,md5:1d7a02bfc2322e27617b3317c9f9a919,05,log_files,504978 7 | ucap/raw/07.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf467d594d9002c47fb129,md5:264814714af35f2681251d52d6924db2,07,,272064000 8 | ucap/raw/07.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf16ed9ad5a102ce5c83a3,md5:293978391d7729e827ef87c52a057d13,07,raw_files,11774 9 | ucap/raw/07.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1701b83f6902b1a626ec,md5:6940ca784f2bb871f1c2dfad71d91984,07,,389309 10 | ucap/cali/07_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d3c6c613b02968f5cca,md5:40224fd852ae80251d1eb1e8c4d26b82,07,besa_files,47302 11 | ucap/log/7_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1bcd6c613b02958f37c3,md5:cd4c01a586390a4f5d4508681769dada,07,log_files,501162 12 | ucap/raw/08.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf46a89ad5a102ce5c93ea,md5:7eb581b819555cfdbea4d27ce448bfd7,08,,248824320 13 | ucap/raw/08.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf17026c613b02968f5ad2,md5:f3c150a37520047ec2350b451606a18c,08,raw_files,11774 14 | ucap/raw/08.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf17129ad5a102ce5c83b2,md5:95ac3d402a8ca2678929c185e4545ddc,08,,389918 15 | ucap/cali/08_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d3fb83f6902b2a63acb,md5:3936d61643de3adb5c57406496fa9376,08,besa_files,47611 16 | ucap/log/8_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1bcc594d9002c57fb90f,md5:b38fa7045aaf79f8069fde066194c83a,08,log_files,501162 17 | ucap/raw/09.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf46586c613b02968f68d0,md5:294453b8b3f9e426aa49953e5be21cda,09,,246005760 18 | ucap/raw/09.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf17186c613b02978f427e,md5:1910df2cb36fbc5373da2c7ae607dea7,09,raw_files,11774 19 | ucap/raw/09.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf17209ad5a102cd5c5b9b,md5:65caf0211aa3054b5e081ac28838dc3c,09,,389009 20 | ucap/cali/09_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d40b83f6902b0a6223c,md5:af36707efa1ba6f40e51cc22f87f1bd8,09,besa_files,47536 21 | ucap/log/9_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1bcf594d9002c57fb913,md5:3cb0fce820de4d9676785f786c9bfe90,09,log_files,505029 22 | ucap/raw/12.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf46b4b83f6902b2a64412,md5:24a54d345803c5a8a74f368f4de97dec,12,,240920320 23 | ucap/raw/12.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1a489ad5a102cd5c5c6b,md5:7c4c1f4b5fafd8ab7451801af245b9fa,12,raw_files,11774 24 | ucap/raw/12.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1a52594d9002c47fa870,md5:dc507467e03b0b3026f0ef4abff0e68c,12,,389070 25 | ucap/cali/12_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d43b83f6902b3a66531,md5:521ebfecc783084718d6a95470b4a1cc,12,besa_files,47709 26 | ucap/log/12_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1bd76c613b02948f3566,md5:dfb2bdd90f0e52da49d725fa86f4cbc8,12,log_files,503073 27 | ucap/raw/13.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf46a2594d9002c67ff74f,md5:35385b2896ba3fb1d9befe744832ce5d,13,,256258560 28 | ucap/raw/13.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf08a06c613b02978f3eed,md5:6cbd3efc55e76d3b719e2d2ef817e208,13,raw_files,11774 29 | ucap/raw/13.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf08a8b83f6902b1a62426,md5:eb2fc1cc29975d82fb501302ef5ded10,13,,389814 30 | ucap/cali/13_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d44594d9002c47fa930,md5:fe716a2ed1671a1ef16a71200b6606e2,13,besa_files,47595 31 | ucap/log/13_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1bd56c613b02968f5c2f,md5:25dd0a2555ce5219db8d140c96f658d1,13,log_files,506929 32 | ucap/raw/14.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf48d86c613b02958f426b,md5:cfc098f748e76bc5751d0d6cd8ed93fa,14,,234369280 33 | ucap/raw/14.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56dfb83f6902b3a67936,md5:aa3828eaffba4bfc43978bce6c86ff88,14,raw_files,11774 34 | ucap/raw/14.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56f5594d9002c67ffc5b,md5:c5b15af644fc2a01817f005ff89a8700,14,,388482 35 | ucap/cali/14_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d46b83f6902b3a66534,md5:25f4372b3f2e509a7399476c61221957,14,besa_files,47407 36 | ucap/log/14_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1bd66c613b02968f5c32,md5:5d6d89761ed2813c2ec711d2b15572bf,14,log_files,506905 37 | ucap/raw/16.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf48f4b83f6902b2a64496,md5:e6210bde8fec7144d749d36771edfb5e,16,,239178240 38 | ucap/raw/16.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56e06c613b02968f6d1a,md5:0f606cb8ff12782a90e288653ac7d075,16,raw_files,11774 39 | ucap/raw/16.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56f89ad5a102cd5c6b29,md5:eba124228ae4df5f42e583bd78233f4e,16,,389448 40 | ucap/cali/16_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d466c613b02978f4426,md5:91d4a0ed5b6c499c8c15de7e53a344ac,16,besa_files,48124 41 | ucap/log/16_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1bd76c613b02958f37ca,md5:54d82ddf16f7eafb2abb3572241c28ae,16,log_files,503068 42 | ucap/raw/17.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4894b83f6902b3a67498,md5:467b7c036db7fcd7e1e1c43882fab468,17,,249309440 43 | ucap/raw/17.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56e0b83f6902b0a62ce5,md5:0954951c5a8aff131c4e1c7b11c51614,17,raw_files,11774 44 | ucap/raw/17.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56f66c613b02948f4267,md5:a7406ae7a8335bcd7bc965eb383e2da5,17,,390163 45 | ucap/cali/17_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d46594d9002c37fa093,md5:61bf3ec4d3eb6a7174545f2a9f222e14,17,besa_files,47801 46 | ucap/log/17_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1be9b83f6902b2a63a67,md5:4a0180dfa417d63afd573fd6d65355a8,17,log_files,506906 47 | ucap/raw/18.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf48ba6c613b02948f3f23,md5:cd96049bbc799ba3e28793f622935b2c,18,,228142080 48 | ucap/raw/18.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56e29ad5a102cc5c59ac,md5:13490420b759f1b051ca4f83e7afef9b,18,raw_files,11774 49 | ucap/raw/18.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56f8594d9002c47fb46a,md5:efeae4c507a395cb187ba346a1a14471,18,,396154 50 | ucap/cali/18_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d489ad5a102cd5c5d49,md5:aa655d8d7ae25cdd954b10d01d69e646,18,besa_files,47958 51 | ucap/log/18_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1be7b83f6902b0a621f3,md5:b6abedf16e0b7d364f8d6eefe9555019,18,log_files,506904 52 | ucap/raw/19.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4918b83f6902b0a62aa0,md5:0ab014f506e02a423003c0d225947f14,19,,285811200 53 | ucap/raw/19.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56e2b83f6902b2a647a3,md5:eed7e8febe6d3b48b11a077bc913f963,19,raw_files,11774 54 | ucap/raw/19.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56f96c613b02968f6d38,md5:02d5b5b1c08920f0c3378afc691cd784,19,,391941 55 | ucap/cali/19_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d4ab83f6902b3a6653b,md5:02dc9b5c58e4e8a4f8006504e698d90e,19,besa_files,47911 56 | ucap/log/19_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1be76c613b02948f3575,md5:4ba467b2e73d434a8ca0a08b3634b116,19,log_files,503064 57 | ucap/raw/20.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf49a06c613b02968f69a8,md5:f0e8ff9bbabbc3fbb5ecaa0121d2cc98,20,,232780800 58 | ucap/raw/20.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56e29ad5a102cd5c6b12,md5:73054b3c4e170cf4532d4416ebf5d454,20,raw_files,11774 59 | ucap/raw/20.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56f8b83f6902b3a67979,md5:70910d067d571dea7b135c6f7ac2b670,20,,389884 60 | ucap/cali/20_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d496c613b02948f35d5,md5:0456d4eef81d993b9830456a3ff7fae9,20,besa_files,47458 61 | ucap/log/20_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1be9594d9002c67fe88c,md5:ffe619ac39ea0fa7e1b6d462c1ee46e6,20,log_files,503079 62 | ucap/raw/21.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4a059ad5a102cc5c574f,md5:1dc4476cf275c4e99863000a69c4db19,21,,233470720 63 | ucap/raw/21.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56e2b83f6902b3a67941,md5:ab37fcc87b87f793081e190d2f299f52,21,raw_files,11774 64 | ucap/raw/21.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56fcb83f6902b0a62cf3,md5:905853f5cc8a639ef52cde90aa263d11,21,,391026 65 | ucap/cali/21_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d4a594d9002c37fa097,md5:a65537ffca0dc10605badacf1f44578f,21,besa_files,47794 66 | ucap/log/21_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1be69ad5a102cb5c3997,md5:532e7a6485f0eafb4fd64d1e39d7e7c0,21,log_files,506905 67 | ucap/raw/22.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4a97b83f6902b2a644e6,md5:113789ce32fbf08acb3d5847f0c7598f,22,,262647040 68 | ucap/raw/22.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56e4b83f6902b3a67945,md5:d4fe398f6ee8402cb51cd9994be862f7,22,raw_files,11774 69 | ucap/raw/22.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56fc594d9002c57fc71b,md5:4b11713ab8c5d2feca27a1eac3197966,22,,391395 70 | ucap/cali/22_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d4a6c613b02948f35d8,md5:84fb2e984b6cebefcb1728aae1d3f24c,22,besa_files,47571 71 | ucap/log/22_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1bf96c613b02948f3582,md5:831d068ed686d0da378758ebbf4b24a2,22,log_files,506918 72 | ucap/raw/23.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4a839ad5a102ce5c951f,md5:54dc62503fb24a164087db11c7930786,23,,252595200 73 | ucap/raw/23.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56e5594d9002c57fc70d,md5:640475be2ddb7b356baac314d514ca5f,23,raw_files,11774 74 | ucap/raw/23.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56fb6c613b02978f51a6,md5:55b223fd2772301f99e04d23a8826e0d,23,,386330 75 | ucap/cali/23_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d4c6c613b02968f5cde,md5:fd888c7217011b0c131ebff7a794ced5,23,besa_files,47698 76 | ucap/log/23_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1bf76c613b02978f43b6,md5:463f3b964cfaac66a7f028b6a06343f5,23,log_files,502955 77 | ucap/raw/24.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4a6b594d9002c67ff870,md5:57412836093e11db49c19c337975b218,24,,249995520 78 | ucap/raw/24.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56e5594d9002c67ffc4a,md5:5f23fa39b22ea9c079aaaa150b172ffd,24,raw_files,11774 79 | ucap/raw/24.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56fb6c613b02948f426a,md5:36954cc5e23be08bb8b40d51d3e85343,24,,389820 80 | ucap/cali/24_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d506c613b02968f5ce2,md5:b7aeef15329f6c4d9fceeb7249ce9647,24,besa_files,47688 81 | ucap/log/24_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1bfab83f6902b3a664bd,md5:97b8107d6179d9c33dabfe44a6c1d8e0,24,log_files,503031 82 | ucap/raw/25.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4aa3594d9002c47fb1e1,md5:641ba4b041a3ed0012ac846a0bb0cf88,25,,245283840 83 | ucap/raw/25.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56e69ad5a102cc5c59b5,md5:a814c72d782aeae63b0e55c27eb46450,25,raw_files,11774 84 | ucap/raw/25.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56ff6c613b02968f6d42,md5:27b65a888f7ec90b9d003a03eaa05732,25,,389454 85 | ucap/cali/25_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d54b83f6902b1a62887,md5:2aee4340bd0abdab35eec6c0efb41c3e,25,besa_files,47879 86 | ucap/log/25_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1bfd6c613b02948f3589,md5:a14c48f17eb6aa017bb1ffe530f933d6,25,log_files,506907 87 | ucap/raw/26.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4c95594d9002c37faa86,md5:8a5c7cd8cb2dbbcb39bbad29ea7ba928,26,,235558400 88 | ucap/raw/26.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56e66c613b02948f425c,md5:f1fef39fddb761f995d7169882e0b8d2,26,raw_files,11774 89 | ucap/raw/26.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56fe9ad5a102cc5c59c4,md5:91068b0b069d52b32d4ecfa1c56773a6,26,,388122 90 | ucap/cali/26_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d54594d9002c67fe92a,md5:2b4e66bda0455a5c49a616a9cc717366,26,besa_files,47540 91 | ucap/log/26_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1bfa6c613b02968f5c40,md5:a47d9dcca8df4cacb21caf85bc851b01,26,log_files,506790 92 | ucap/raw/27.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4c8d6c613b02978f4f33,md5:537393d12790b45cbedeeaf189d84118,27,,247159040 93 | ucap/raw/27.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56e7b83f6902b2a647a7,md5:1da287fcf22e8e99975e1a4e1c0bccea,27,raw_files,11774 94 | ucap/raw/27.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56ffb83f6902b3a6798b,md5:fe4b984ceeef8be7e8401652cb7c872b,27,,389379 95 | ucap/cali/27_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d539ad5a102ce5c860c,md5:3d65e0024094a5b708c2755e7ff3575e,27,besa_files,47629 96 | ucap/log/27_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c079ad5a102cb5c39a1,md5:4515d97965cd4cfc025cfddb3415dfce,27,log_files,503105 97 | ucap/raw/28.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4ca0b83f6902b1a631bc,md5:e38b6dee5a3a78ce3845f474138d7116,28,,257712640 98 | ucap/raw/28.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56eb594d9002c57fc710,md5:78cf33c816cab2e2da3b5ce7a4c4c62f,28,raw_files,11774 99 | ucap/raw/28.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf5702b83f6902b1a633be,md5:67178dc22ffadecb0f7885eed235dacf,28,,390937 100 | ucap/cali/28_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d549ad5a102ce5c860f,md5:e1f631082684aea8e44458ee03f3f7d5,28,besa_files,47674 101 | ucap/log/28_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c07b83f6902b3a664c8,md5:434a044e1775b0e23eff8b14b9ff06df,28,log_files,503120 102 | ucap/raw/29.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4c8e594d9002c47fb23c,md5:f920ac35796e74dab4bdb87cf400d96f,29,,248382720 103 | ucap/raw/29.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56ec9ad5a102cb5c43f7,md5:d4dbe5f4752d0c141df3d6cfabea07ff,29,raw_files,11774 104 | ucap/raw/29.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf5700594d9002c67ffc68,md5:ab25fa3d46313c46964a180b451d4f29,29,,388634 105 | ucap/cali/29_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d54594d9002c67fe927,md5:974c4cb37dfbff4db2a500e759561799,29,besa_files,47768 106 | ucap/log/29_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c076c613b02968f5c4c,md5:4937289dad17903aff89eae2c22f1ad5,29,log_files,506896 107 | ucap/raw/30.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4ce9594d9002c57fc4c2,md5:c90e2f9d5bc64e874c6e3b2be2f78796,30,,291279360 108 | ucap/raw/30.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56ed6c613b02968f6d29,md5:0f787dde9ee997140a0249d825effb19,30,raw_files,11774 109 | ucap/raw/30.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf5700b83f6902b3a67990,md5:8788607d18010603417a367cfb9fedc5,30,,390019 110 | ucap/cali/30_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d566c613b02948f35dd,md5:bdcd278dfc146f2e891c458623269ad3,30,besa_files,47598 111 | ucap/log/30_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c08b83f6902b1a62821,md5:31d7aa39d9c4f113bd9db9a176eb6ddf,30,log_files,506950 112 | ucap/raw/31.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4fb29ad5a102ce5c96c6,md5:829ce23b46ab13fd128d742c968a8b18,31,,262684160 113 | ucap/raw/31.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56dab83f6902b0a62cd9,md5:20a184cd7a79ddd8330e6f35da7ffc04,31,raw_files,11635 114 | ucap/raw/31.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf5701594d9002c67ffc6b,md5:34b39adc58c1774895f4c3230f95e45a,31,,390140 115 | ucap/cali/31_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d599ad5a102cc5c4e07,md5:7c90424ca54e7293b9f3acdb3946f138,31,besa_files,47379 116 | ucap/log/31_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c07594d9002c57fb932,md5:6185fad458b61baccd4fce28976624b4,31,log_files,503135 117 | ucap/raw/32.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4fa46c613b02968f6b33,md5:2add57f74cbb1ffd329d025299736e9c,32,,247575040 118 | ucap/raw/32.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56db594d9002c47fb452,md5:03ba99eb1c3d637a663bf0d5baa5c6a7,32,raw_files,11774 119 | ucap/raw/32.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf57026c613b02948f426f,md5:9d63fb79bcef6621d859febdd3ddec7e,32,,388911 120 | ucap/cali/32_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d59b83f6902b1a6288a,md5:4a4e2acc259bf58c3b5ca30671720b6c,32,besa_files,47613 121 | ucap/log/32_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c1d9ad5a102ce5c8587,md5:b68fde26a062c89b6b367f7b16799353,32,log_files,503066 122 | ucap/raw/33.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4f92b83f6902b3a676a6,md5:e00e2b421547ef08ccebe81b682a2cde,33,,235572480 123 | ucap/raw/33.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56da6c613b02978f5192,md5:f583ecff5b3e436c17180846fc01149c,33,raw_files,11774 124 | ucap/raw/33.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56ed594d9002c67ffc55,md5:b63ffd1ad4e3cfb02eaaf8c80957f452,33,,389186 125 | ucap/cali/33_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d5a594d9002c57fb999,md5:bae9d3943d9c85b189e3b12a2f9c475d,33,besa_files,47393 126 | ucap/log/33_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c1d9ad5a102cd5c5cf3,md5:eccbc62f65217103af618199e0b0b874,33,log_files,506904 127 | ucap/raw/34.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4f9f9ad5a102cb5c42a5,md5:4bc1f4d3ab4d1b92fe2aff4a73e49b45,34,,261277440 128 | ucap/raw/34.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56d96c613b02978f518e,md5:4b09dfad5af6acbc52f914f09d890154,34,raw_files,11774 129 | ucap/raw/34.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56ec6c613b02968f6d26,md5:36b5bd5038743a0b614c8987dbae4828,34,,391704 130 | ucap/cali/34_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d5a6c613b02978f442f,md5:fcb5211a3344b9801caf0a5c2e549f09,34,besa_files,47433 131 | ucap/log/34_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c1d9ad5a102cc5c4da7,md5:10c0b4d5adbc4051754a29523fa78692,34,log_files,506861 132 | ucap/raw/35.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf4f56594d9002c67ff9da,md5:56f81c061e501d61b5565d81e91c0aa0,35,,240788480 133 | ucap/raw/35.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56da9ad5a102cd5c6b0c,md5:20ae499a011e995570488a4ba9f43ae9,35,raw_files,11774 134 | ucap/raw/35.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56f4b83f6902b3a67969,md5:c8f036b793a160b04bd116b1868b489d,35,,389908 135 | ucap/cali/35_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d5ab83f6902b1a6288d,md5:5350382f4003eb4c842af8406fde8c30,35,besa_files,47645 136 | ucap/log/35_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c1d9ad5a102cc5c4da4,md5:8fca5f050af416e194f5039928bc4f7a,35,log_files,503085 137 | ucap/raw/36.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf51acb83f6902b3a6776d,md5:e2f442754acc032f452caa0a837d070c,36,,247325440 138 | ucap/raw/36.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56dc9ad5a102ce5c9911,md5:4cf34582f4bf35b822977122a3d9043c,36,raw_files,11774 139 | ucap/raw/36.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56f26c613b02978f519e,md5:bed4f7af23ffacd4c2c0807ee8253983,36,,388568 140 | ucap/cali/36_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d5e6c613b02978f4433,md5:764d882b651ce04ce63bf843aee066f4,36,besa_files,48185 141 | ucap/log/36_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c1c594d9002c67fe8ad,md5:0212e9207800b2a2c0a1ba6b88a5c1d2,36,log_files,503080 142 | ucap/raw/37.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf51de6c613b02948f411e,md5:4a6ea3820eb56db85488e2e5a7b39871,37,,256417280 143 | ucap/raw/37.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56de594d9002c37fac7e,md5:e91fabedf61b416ce607a0fa65061d6e,37,raw_files,11774 144 | ucap/raw/37.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56f1b83f6902b0a62cee,md5:f7b9faf8ab3a5b5f6a7d582ed353d8cb,37,,391342 145 | ucap/cali/37_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d5e594d9002c57fb9a0,md5:86f9cf25a52b29ad3138ecf72c8cbb53,37,besa_files,47586 146 | ucap/log/37_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c306c613b02948f3594,md5:2b2e0ed461cd2c7332a07377ac776a02,37,log_files,506912 147 | ucap/raw/38.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf51ac594d9002c37fab5b,md5:34a37beda71e07d88fe528cd353d653c,38,,230406400 148 | ucap/raw/38.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56dc6c613b02948f424c,md5:b2200fc01730cb253a5c56a815b1bf4b,38,raw_files,11774 149 | ucap/raw/38.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56f36c613b02978f51a2,md5:abd41fa26321229481be5f48a015d184,38,,389068 150 | ucap/cali/38_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d5f6c613b02978f443c,md5:c9f7ac0df079f1dcaf693f16e0c750f2,38,besa_files,47657 151 | ucap/log/38_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c31594d9002c57fb943,md5:942df1dc886b525fee260c4d1bc08fe4,38,log_files,506899 152 | ucap/raw/39.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf52166c613b02978f507c,md5:9df4886fbb9be085ae223f99d9c925cb,39,,253369600 153 | ucap/raw/39.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56deb83f6902b0a62ce1,md5:a267c25bf21bd04bf4ca45fad65c67f6,39,raw_files,11774 154 | ucap/raw/39.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56f39ad5a102cd5c6b20,md5:2215a398d45ed4ab122297dfc492f1f7,39,,390121 155 | ucap/cali/39_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d5e9ad5a102cb5c3a00,md5:1ead9acab2439d116a4b7ce2fc4d21cf,39,besa_files,47640 156 | ucap/log/39_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c329ad5a102cd5c5cfa,md5:83bd51b398871c541e01fedba8b57483,39,log_files,503076 157 | ucap/raw/40.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf51beb83f6902b3a67777,md5:f53ecbf7a1c71359aed6d2840269b2d1,40,,244153600 158 | ucap/raw/40.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56deb83f6902b3a6792f,md5:6437171b179caf6679d019c2362815f4,40,raw_files,11635 159 | ucap/raw/40.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf56f3594d9002c47fb467,md5:29f61a0f835d54da85d78dc3776024cb,40,,389408 160 | ucap/cali/40_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d5f6c613b02948f35e6,md5:08a92e9ce1c39397d80529070d8e2047,40,besa_files,47849 161 | ucap/log/40_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c319ad5a102ce5c8599,md5:734f1632bae54ae243c51988f8a1112d,40,log_files,503030 162 | ucap/raw/41.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf537c6c613b02968f6c4b,md5:bf3f4f2260239210785a3abb15512445,41,,252418560 163 | ucap/raw/41.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf13e8b83f6902b2a6387b,md5:d05ff8479106f6f0608587a590c81e24,41,raw_files,11774 164 | ucap/raw/41.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf13ef6c613b02968f5a04,md5:74d651278b2e0521decba01805e425df,41,,388889 165 | ucap/cali/41_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d62594d9002c37fa0a9,md5:0c5ac59d28273fb8be5bd18e5747bda0,41,besa_files,47744 166 | ucap/log/41_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c32b83f6902b3a664df,md5:9b7943b5a91ab87757792ee23c065498,41,log_files,506900 167 | ucap/raw/42.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf53a06c613b02948f417c,md5:6a60a272e6ba4b0c739b2176e05b92ee,42,,241088000 168 | ucap/raw/42.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf13deb83f6902b1a62640,md5:b84ff379d0a352f776bcc1bfd210d4d8,42,raw_files,11774 169 | ucap/raw/42.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf13e49ad5a102ce5c827c,md5:fdff06c66b7f6cb2745edd6efda2488e,42,,389918 170 | ucap/cali/42_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d639ad5a102cc5c4e0e,md5:2a69cfcef85e1fa3881be417c59edbc0,42,besa_files,47398 171 | ucap/log/42_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c459ad5a102cd5c5d02,md5:25c9945d954fe3c2df7a7f50d69092bd,42,log_files,506842 172 | ucap/raw/43.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf53a86c613b02958f4488,md5:566e7858f4df37d58d28037388bd587d,43,,254544640 173 | ucap/raw/43.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf13c29ad5a102ce5c826c,md5:74eabfaeb448199929ca0257550bfe54,43,raw_files,11774 174 | ucap/raw/43.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf13d79ad5a102ce5c8275,md5:ea47a112dbcfab492585e7767948f014,43,,391143 175 | ucap/cali/43_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d636c613b02958f3831,md5:6ff038f6edb7fa81b6d2dadefd559eba,43,besa_files,47957 176 | ucap/log/43_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c45b83f6902b3a664e4,md5:c312f4bb8bc585262c87c9095a3f63fd,43,log_files,503160 177 | ucap/raw/44.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf53ac594d9002c57fc64c,md5:d2379ebb69d9bf03562baaff98f2446b,44,,226494720 178 | ucap/raw/44.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf13bd6c613b02978f4173,md5:6ca95518cb0fed5ce5023f9cb72cf555,44,raw_files,11774 179 | ucap/raw/44.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf13d86c613b02978f417d,md5:f61ae8e23fdc0494fa561be923bf130b,44,,388600 180 | ucap/cali/44_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d639ad5a102cc5c4e12,md5:d0b20bd8b89992eaf9cc4e73a982ca32,44,besa_files,48069 181 | ucap/log/44_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c466c613b02968f5c6c,md5:53cddf2623d2bfb03fcae75f73c0c5c1,44,log_files,503058 182 | ucap/raw/46.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf53909ad5a102cd5c6a50,md5:dde6f58c99aee9fe4857e16e72b4b23b,46,,224920320 183 | ucap/raw/46.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf0e12b83f6902b2a63726,md5:5787adebb26c624a947e0027d80754b9,46,raw_files,11774 184 | ucap/raw/46.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf0e1e9ad5a102ce5c808e,md5:03d6446bcab148fd30611f5508fbc77b,46,,389971 185 | ucap/cali/46_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d646c613b02978f4443,md5:5944eb5955b28a291f258cd098c55d6f,46,besa_files,47736 186 | ucap/log/46_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c466c613b02958f37ef,md5:4501718598edf8448226e5a91b7e32e5,46,log_files,506891 187 | ucap/raw/47.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf5689594d9002c67ffc1b,md5:7a4f34c38932b748c5ec982ed1ae42c1,47,,267520000 188 | ucap/raw/47.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf0889b83f6902b2a63609,md5:871c26bbe195fc797c4343cfbbc845eb,47,raw_files,11774 189 | ucap/raw/47.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf08979ad5a102ce5c7ed1,md5:62a9633c19fc35fccf0ff1843e7bfdf9,47,,390177 190 | ucap/cali/47_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d699ad5a102cd5c5d5f,md5:d485eaa980e4d039d5fad75f8d4d604a,47,besa_files,47757 191 | ucap/log/47_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c47b83f6902b2a63a88,md5:b8d742098e07312c103d0f48ecd94b9b,47,log_files,503120 192 | ucap/raw/50.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf5520594d9002c37fac1f,md5:ad75ff138d22536e5865717eafea179c,50,,246759680 193 | ucap/raw/50.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf08746c613b02948f3136,md5:4f33b1d928e414040a4e8aa2279777ba,50,raw_files,11774 194 | ucap/raw/50.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf08869ad5a102cb5c3657,md5:755640eebd85db6ca6d61202fa9b0e1b,50,,391006 195 | ucap/cali/50_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d69594d9002c37fa0b0,md5:c9087ad5f7ee399c8f77f774c4490809,50,besa_files,47726 196 | ucap/log/50_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c519ad5a102ce5c85a4,md5:db2234d11ed3b7b5ad6b99457457577c,50,log_files,506897 197 | ucap/raw/51.eeg,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf5579594d9002c67ffbcd,md5:108eb489d2f4d790b97c3e3df486edfa,51,,271640320 198 | ucap/raw/51.vhdr,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf0870b83f6902b2a63600,md5:662dd97b2efb04b1e791de5a00607c93,51,raw_files,11774 199 | ucap/raw/51.vmrk,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf08859ad5a102ce5c7eca,md5:b9b9a7d8638a09437962f80b3e2be302,51,,389889 200 | ucap/cali/51_cali.matrix,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1d68b83f6902b1a62893,md5:39ec9a5a8a4056bf76566a5726f09105,51,besa_files,47785 201 | ucap/log/51_test.txt,https://files.de-1.osf.io/v1/resources/hdxvb/providers/osfstorage/59cf1c516c613b02948f359d,md5:d4a49760807125d052c5ed8f683de230,51,log_files,503074 202 | -------------------------------------------------------------------------------- /pipeline/datasets/utils.py: -------------------------------------------------------------------------------- 1 | from warnings import warn 2 | 3 | import pandas as pd 4 | import pooch 5 | from numpy import nan 6 | from pandas.api.types import is_list_like 7 | 8 | LOCAL_CACHE = 'hu-neuro-pipeline' 9 | 10 | 11 | def get_dataset(manifest_df, base_url, participants, path): 12 | 13 | if path is None: 14 | path = pooch.os_cache(LOCAL_CACHE) 15 | env = 'PIPELINE_DATA_DIR' 16 | else: 17 | env = None 18 | 19 | fetcher = pooch.create(path=path, base_url=base_url, env=env) 20 | local_dir = fetcher.abspath 21 | 22 | manifest_df = _select_participants(manifest_df, participants) 23 | 24 | file_types = manifest_df['file_type'].unique() 25 | file_types = file_types[~pd.isnull(file_types)] 26 | paths = {file_type: [] for file_type in file_types} 27 | 28 | for ix, row in manifest_df.iterrows(): 29 | 30 | local_file = local_dir.joinpath(row['local_path']) 31 | 32 | if not local_file.exists(): 33 | fetcher.registry[row['local_path']] = row['hash'] 34 | fetcher.urls[row['local_path']] = row['url'] 35 | _ = fetcher.fetch(row['local_path']) 36 | 37 | if row['file_type'] in paths: 38 | paths[row['file_type']].append(str(local_file)) 39 | 40 | return paths 41 | 42 | 43 | def _select_participants(df, participants): 44 | """Selects a subset of participants by their IDs or total number.""" 45 | 46 | all_participants = df['participant_id'].str.zfill(2).unique().tolist() 47 | if nan in all_participants: # Ignore general (e.g., BIDS) files for now 48 | all_participants.remove(nan) 49 | 50 | if isinstance(participants, float): 51 | warn(f'Converting `participants` from float ({participants}) to ' + 52 | f'int ({int(participants)})') 53 | selected_participants = all_participants[:int(participants)] 54 | 55 | if isinstance(participants, int): 56 | assert participants in range(1, len(all_participants) + 1), \ 57 | '`participants` must be an integer between 1 and ' + \ 58 | f'{len(all_participants)}' 59 | selected_participants = all_participants[:participants] 60 | 61 | if isinstance(participants, str): 62 | assert participants in all_participants, \ 63 | f'Participant \'{participants}\' not found in the dataset. ' + \ 64 | f'Valid participants are {all_participants}' 65 | selected_participants = [participants] 66 | 67 | if is_list_like(participants): 68 | missing_participants = list(set(participants) - set(all_participants)) 69 | assert not missing_participants, \ 70 | f'Participants {missing_participants} not found in the ' + \ 71 | f'dataset. Valid participants are {all_participants}' 72 | selected_participants = participants 73 | 74 | selected_participants += [nan] # Re-include general (e.g., BIDS) files 75 | 76 | return df[df['participant_id'].isin(selected_participants)] 77 | -------------------------------------------------------------------------------- /pipeline/epoching.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from warnings import warn 3 | 4 | import chardet 5 | import numpy as np 6 | import pandas as pd 7 | from mne import (combine_evoked, events_from_annotations, pick_channels, 8 | set_log_level) 9 | from mne.channels import combine_channels 10 | from mne.io.brainvision.brainvision import RawBrainVision 11 | from pandas.api.types import is_list_like 12 | from scipy.stats import zscore 13 | 14 | 15 | def get_events(raw, triggers=None): 16 | """Extracts events from raw data based on a list of numeric triggers.""" 17 | 18 | events, event_id = events_from_annotations(raw, verbose=False) 19 | 20 | if triggers is not None: 21 | if isinstance(raw, RawBrainVision): 22 | event_id = {str(trigger): int(trigger) for trigger in triggers} 23 | else: 24 | event_id = {key: value for key, value in event_id.items() 25 | if int(key) in triggers} 26 | 27 | return events, event_id 28 | 29 | 30 | def update_skip_log_rows(skip_log_rows, epochs): 31 | """Updates log file rows to skip, based on dropped epochs.""" 32 | 33 | if dropped_ixs := get_dropped_epochs(epochs): 34 | 35 | if skip_log_rows is None: 36 | return dropped_ixs 37 | 38 | else: 39 | return list(set(skip_log_rows) | set(dropped_ixs)) 40 | 41 | 42 | def get_dropped_epochs(epochs): 43 | """Gets indices of dropped epochs (e.g., due to 'NO_DATA').""" 44 | 45 | drop_log = [elem for elem in epochs.drop_log if elem != ('IGNORED',)] 46 | 47 | reasons = ['NO_DATA', 'TOO_SHORT'] 48 | dropped_ixs = set() 49 | for reason in reasons: 50 | if ixs := [ix for ix, elem in enumerate(drop_log) if reason in elem]: 51 | dropped_ixs.update(ixs) 52 | message = f'Dropped {len(ixs)} epochs ({ixs}) for reason ' + \ 53 | f'"{reason}". They will also be dropped from the log file.' 54 | if reason == 'TOO_SHORT': 55 | message += ' You may want reduce `epochs_tmax` to avoid this.' 56 | warn(message) 57 | 58 | return list(dropped_ixs) 59 | 60 | 61 | def read_log(log_file, skip_log_rows=None, skip_log_conditions=None): 62 | """Reads the behavioral log file with information about each EEG trial.""" 63 | 64 | # Check if data are already in a DataFrame 65 | if isinstance(log_file, pd.DataFrame): 66 | log = log_file 67 | else: 68 | 69 | # Detect file encoding 70 | with open(log_file, 'rb') as f: 71 | data = f.read() 72 | chardet_res = chardet.detect(data) 73 | encoding = chardet_res['encoding'] 74 | 75 | # Read into DataFrame 76 | if Path(log_file).suffix == '.csv': 77 | log = pd.read_csv(log_file, encoding=encoding) 78 | else: 79 | log = pd.read_csv(log_file, delimiter='\t', encoding=encoding) 80 | 81 | # Remove rows via indices (e.g., if the EEG was paused accidently) 82 | if skip_log_rows is not None: 83 | log = log.drop(skip_log_rows) 84 | 85 | # Remove rows via conditions (e.g., for filler stimuli without triggers) 86 | if skip_log_conditions is not None: 87 | assert isinstance(skip_log_conditions, dict), \ 88 | '"skip_log_conditions" must be a dict ({column: [conditions]})' 89 | for col, values in skip_log_conditions.items(): 90 | if not is_list_like(values): 91 | log = log[log[col] != values] 92 | else: 93 | log = log[~log[col].isin(values)] 94 | 95 | return log 96 | 97 | 98 | def match_log_to_epochs(epochs, log, triggers_column, depth=10): 99 | """Auto-detects missing EEG trials and removes them from the log file.""" 100 | 101 | # Make sure that the requested column is available in the log file 102 | assert triggers_column in log.columns, \ 103 | f'Column \'{triggers_column}\' is not in the log file' 104 | 105 | # Read lists of triggers from log file 106 | events_log = log[triggers_column].tolist() 107 | 108 | # Read lists of triggers from EEG epochs 109 | event_id_keys = list(epochs.event_id.keys()) 110 | event_id_values = list(epochs.event_id.values()) 111 | events_epochs = [int(event_id_keys[event_id_values.index(event)]) 112 | for event in epochs.events[:, 2]] 113 | 114 | # Check for each row in the log file 115 | previous_repaired = False 116 | for ix in range(len(events_log)): 117 | 118 | # Add `nan` in case trials are missing at the end of the EEG... 119 | if len(events_epochs) <= ix: 120 | print(f'Log file (row index {ix}): Found missing EEG epoch') 121 | events_epochs.insert(ix, np.nan) 122 | 123 | # ... or if the log and EEG trigers don't match up 124 | elif events_log[ix] != events_epochs[ix]: 125 | print(f'Log file (row index {ix}): Found missing EEG epoch') 126 | events_epochs.insert(ix, np.nan) 127 | previous_repaired = True 128 | 129 | # If they do match up, we check that the next trials do match as well 130 | elif previous_repaired: 131 | if events_log[ix:ix + depth] != events_epochs[ix:ix + depth]: 132 | print(f'Log file (row index {ix}): Assuming missing EEG epoch') 133 | events_epochs.insert(ix, np.nan) 134 | else: 135 | previous_repaired = False 136 | 137 | # Remove trials with missing EEG epochs from the log file 138 | missing_ixs = np.where(np.isnan(events_epochs))[0].tolist() 139 | print(f'Dropping rows from the log file data: {missing_ixs}') 140 | log = log.reset_index(drop=True) 141 | log = log.drop(index=missing_ixs) 142 | 143 | return log, missing_ixs 144 | 145 | 146 | def get_bad_epochs(epochs, reject_peak_to_peak=None): 147 | """Detects bad epochs based on peak-to-peak amplitude.""" 148 | 149 | # Convert thresholds to volts 150 | if reject_peak_to_peak is not None: 151 | reject_peak_to_peak = {'eeg': reject_peak_to_peak * 1e-6} 152 | 153 | # Reject on a copy of the data 154 | epochs_rej = epochs.copy().drop_bad(reject_peak_to_peak) 155 | 156 | # Get indices of bad epochs from the rejection log 157 | drop_log = [elem for ix, elem 158 | in enumerate(epochs_rej.drop_log) 159 | if epochs.drop_log[ix] == ()] 160 | bad_ixs = [ix for ix, elem in enumerate(drop_log) if elem != ()] 161 | 162 | return bad_ixs 163 | 164 | 165 | def get_bad_channels(epochs, threshold=3., by_event_type=True): 166 | """Automatically detects bad channels using their average standard error""" 167 | 168 | # Compute standard error for each condition seperately, then average... 169 | if by_event_type: 170 | ses = epochs.standard_error(by_event_type=True) 171 | ses = combine_evoked(ses, weights='nave') 172 | 173 | # ... or directly compute standard error across all epochs 174 | else: 175 | ses = epochs.standard_error() 176 | 177 | # Average across time points for each channel 178 | ses = ses.data.mean(axis=1) 179 | 180 | # Convert to z scores 181 | zs = zscore(ses) 182 | 183 | # Look up bad channel labels 184 | ixs = np.where(zs > threshold)[0] 185 | bad_channels = [epochs.ch_names[ix] for ix in ixs] 186 | if bad_channels != []: 187 | print(f'Automatically detected bad channels {bad_channels} with ' 188 | f'z_SE > {threshold}') 189 | else: 190 | print(f'Didn\'t detect any bad channels with z_SE > {threshold}') 191 | 192 | return bad_channels 193 | 194 | 195 | def compute_single_trials(epochs, components, bad_ixs=None): 196 | """Computes single trial mean amplitudes a dict of multiple components.""" 197 | 198 | # Check dict keys 199 | for key in ['name', 'tmin', 'tmax', 'roi']: 200 | assert key in components, \ 201 | f'Key \'{key}\' is missing from the `components` argument' 202 | if 'se' not in components: 203 | components['se'] = False 204 | 205 | # Check that values in the dict are lists 206 | for key in ['name', 'tmin', 'tmax', 'roi', 'se']: 207 | if not is_list_like(components[key]): 208 | max_len = max([len(value) for value in components.values() 209 | if is_list_like(value)]) 210 | components[key] = [components[key]] * max_len 211 | 212 | # Loop over components 213 | components_df = pd.DataFrame(components) 214 | for _, component in components_df.iterrows(): 215 | 216 | # Compute single trial mean ERP amplitudes 217 | compute_component( 218 | epochs, component['name'], component['tmin'], 219 | component['tmax'], component['roi'], component['se'], 220 | bad_ixs) 221 | 222 | return epochs.metadata 223 | 224 | 225 | def compute_component(epochs, name, tmin, tmax, roi, se, bad_ixs=None): 226 | """Computes single trial mean amplitudes for single component.""" 227 | 228 | # Check that requested region of interest channels are present in the data 229 | if not is_list_like(roi): 230 | roi = [roi] 231 | for ch in roi: 232 | assert ch in epochs.ch_names, f'ROI channel \'{ch}\' not in the data' 233 | 234 | # Create virtual channel for the average in the region of interest 235 | print(f'Computing single trial ERP amplitudes for \'{name}\'') 236 | set_log_level('ERROR') 237 | roi_dict = {name: pick_channels(epochs.ch_names, roi)} 238 | epochs_roi = combine_channels(epochs, roi_dict) 239 | epochs.add_channels([epochs_roi], force_update_info=True) 240 | epochs.set_channel_types({name: 'misc'}) 241 | 242 | # Compute mean amplitude in the region and time window of interest 243 | data = epochs.\ 244 | copy().\ 245 | pick_channels(roi).\ 246 | crop(tmin, tmax).\ 247 | get_data(units='uV') 248 | mean_amp = data.mean(axis=(1, 2)) 249 | 250 | # Optionally compute standard error 251 | if se: 252 | name_se = f'{name}_se' 253 | sd_amp = data.std(axis=(1, 2), ddof=1) 254 | n_samples = data.shape[1] * data.shape[2] 255 | se_amp = sd_amp / np.sqrt(n_samples) 256 | 257 | # Set ERPs for bad epochs to NaN 258 | if bad_ixs is not None: 259 | if isinstance(bad_ixs, int): 260 | bad_ixs = [bad_ixs] 261 | mean_amp[bad_ixs] = np.nan 262 | if se: 263 | se_amp[bad_ixs] = np.nan 264 | 265 | # Add as a new column to the original metadata 266 | epochs.metadata.reset_index(drop=True, inplace=True) 267 | epochs.metadata[name] = mean_amp 268 | if se: 269 | epochs.metadata[name_se] = se_amp 270 | set_log_level('INFO') 271 | -------------------------------------------------------------------------------- /pipeline/group.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from pathlib import Path 3 | 4 | import numpy as np 5 | import pandas as pd 6 | from joblib import Parallel, delayed 7 | 8 | from .averaging import compute_grands, compute_grands_df 9 | from .io import (besa_extensions, convert_participant_input, eeg_extensions, 10 | files_from_dir, get_participant_id, log_extensions, 11 | package_versions, save_config, save_df, save_evokeds) 12 | from .participant import participant_pipeline 13 | from .perm import compute_perm, compute_perm_tfr 14 | 15 | 16 | def group_pipeline( 17 | raw_files=None, 18 | log_files=None, 19 | output_dir=None, 20 | clean_dir=None, 21 | epochs_dir=None, 22 | report_dir=None, 23 | to_df=True, 24 | downsample_sfreq=None, 25 | veog_channels='auto', 26 | heog_channels='auto', 27 | montage='easycap-M1', 28 | bad_channels=None, 29 | ref_channels='average', 30 | besa_files=None, 31 | ica_method=None, 32 | ica_n_components=None, 33 | highpass_freq=0.1, 34 | lowpass_freq=40.0, 35 | triggers=None, 36 | triggers_column=None, 37 | epochs_tmin=-0.5, 38 | epochs_tmax=1.5, 39 | baseline=(-0.2, 0.0), 40 | skip_log_rows=None, 41 | skip_log_conditions=None, 42 | reject_peak_to_peak=200.0, 43 | perform_ride=False, 44 | ride_condition_column=None, 45 | ride_rt_column='RT', 46 | ride_s_twd=(0.0, 0.6), 47 | ride_r_twd=(-0.3, 0.3), 48 | ride_epochs_tmin_after_ride=None, 49 | ride_epochs_tmax_after_ride=None, 50 | ride_reject_peak_to_peak=None, 51 | components={'name': [], 'tmin': [], 'tmax': [], 'roi': []}, 52 | average_by=None, 53 | perform_tfr=False, 54 | tfr_subtract_evoked=False, 55 | tfr_freqs=np.linspace(4.0, 40.0, num=37), 56 | tfr_cycles=np.linspace(2.0, 20.0, num=37), 57 | tfr_mode='percent', 58 | tfr_baseline=(-0.45, -0.05), 59 | tfr_components={ 60 | 'name': [], 'tmin': [], 'tmax': [], 'fmin': [], 'fmax': [], 'roi': []}, 61 | perm_contrasts=[], 62 | perm_tmin=0.0, 63 | perm_tmax=1.0, 64 | perm_channels=None, 65 | perm_fmin=None, 66 | perm_fmax=None, 67 | n_jobs=1, 68 | vhdr_files=None 69 | ): 70 | """Process EEG data for a group of participants. 71 | 72 | Performs preprocessing and computes single trial mean amplitudes for ERP 73 | components of interest as well as by-participant averaged waveforms. 74 | Optionally, performs time-frequency analysis and/or cluster-based 75 | permutation tests. 76 | 77 | Parameters & returns 78 | -------------------- 79 | See `Usage <../usage.html>`_ for the pipeline input arguments and outputs. 80 | """ 81 | 82 | # Convert input types 83 | tfr_freqs = list(tfr_freqs) 84 | tfr_cycles = list(tfr_cycles) 85 | 86 | # Backup input arguments for re-use 87 | config = locals().copy() 88 | 89 | # Create partial function with arguments shared across participants 90 | partial_pipeline = partial( 91 | participant_pipeline, 92 | skip_log_conditions=skip_log_conditions, 93 | downsample_sfreq=downsample_sfreq, 94 | veog_channels=veog_channels, 95 | heog_channels=heog_channels, 96 | montage=montage, 97 | ref_channels=ref_channels, 98 | ica_method=ica_method, 99 | ica_n_components=ica_n_components, 100 | highpass_freq=highpass_freq, 101 | lowpass_freq=lowpass_freq, 102 | triggers=triggers, 103 | triggers_column=triggers_column, 104 | epochs_tmin=epochs_tmin, 105 | epochs_tmax=epochs_tmax, 106 | baseline=baseline, 107 | reject_peak_to_peak=reject_peak_to_peak, 108 | perform_ride=perform_ride, 109 | ride_condition_column=ride_condition_column, 110 | ride_rt_column=ride_rt_column, 111 | ride_s_twd=ride_s_twd, 112 | ride_r_twd=ride_r_twd, 113 | ride_epochs_tmin_after_ride=ride_epochs_tmin_after_ride, 114 | ride_epochs_tmax_after_ride=ride_epochs_tmax_after_ride, 115 | ride_reject_peak_to_peak=ride_reject_peak_to_peak, 116 | components=components, 117 | average_by=average_by, 118 | perform_tfr=perform_tfr, 119 | tfr_subtract_evoked=tfr_subtract_evoked, 120 | tfr_freqs=tfr_freqs, 121 | tfr_cycles=tfr_cycles, 122 | tfr_mode=tfr_mode, 123 | tfr_baseline=tfr_baseline, 124 | tfr_components=tfr_components, 125 | clean_dir=clean_dir, 126 | epochs_dir=epochs_dir, 127 | chanlocs_dir=output_dir, 128 | report_dir=report_dir, 129 | to_df=to_df) 130 | 131 | if raw_files is None: 132 | if vhdr_files is not None: 133 | from warnings import warn 134 | warn('⚠️ The `vhdr_files` argument has been renamed to `raw_files` ' + 135 | 'and will cease to work in a future version of the pipeline. ' + 136 | 'Please update your code accordingly.') 137 | raw_files = vhdr_files 138 | 139 | if ica_method is not None and ica_n_components is None: 140 | from warnings import warn 141 | warn('The default value of `ica_n_components` has changed from ' + 142 | '`0.99` (i.e., 99% explained variance) to `None` (i.e., ' + 143 | 'extract as many components as possible). To reproduce ' + 144 | 'previous results, explicitly set `ica_n_components=0.99`.') 145 | 146 | # Get input file paths if directories were provided 147 | if isinstance(raw_files, (str, Path)): 148 | raw_files = files_from_dir(raw_files, eeg_extensions) 149 | if isinstance(log_files, (str, Path)): 150 | log_files = files_from_dir(log_files, log_extensions) 151 | assert len(log_files) == len(raw_files), \ 152 | f'Number of `log_files` ({len(log_files)}) does not match ' + \ 153 | f'number of `raw_files` ({len(raw_files)})' 154 | 155 | # Get input BESA matrix files if necessary 156 | if isinstance(besa_files, (str, Path)): 157 | besa_files = files_from_dir(besa_files, besa_extensions) 158 | elif besa_files is None: 159 | besa_files = [None] * len(raw_files) 160 | assert len(besa_files) == len(raw_files), \ 161 | f'Number of `besa_files` ({len(besa_files)}) does not match ' + \ 162 | f'number of `raw_files` ({len(raw_files)})' 163 | 164 | # Extract participant IDs from filenames 165 | participant_ids = [get_participant_id(f) for f in raw_files] 166 | 167 | # Construct lists of bad_channels and skip_log_rows per participant 168 | bad_channels = convert_participant_input(bad_channels, participant_ids) 169 | skip_log_rows = convert_participant_input(skip_log_rows, participant_ids) 170 | 171 | # Combine participant-specific inputs 172 | participant_args = zip(raw_files, log_files, besa_files, 173 | bad_channels, skip_log_rows) 174 | 175 | # Do processing in parallel 176 | n_jobs = int(n_jobs) 177 | res = Parallel(n_jobs)( 178 | delayed(partial_pipeline)(*args) for args in participant_args) 179 | 180 | # Sort outputs into seperate lists 181 | print(f'\n\n=== Processing group level ===') 182 | trials, evokeds, evokeds_dfs, configs = list(map(list, zip(*res)))[0:4] 183 | 184 | # Combine trials and save 185 | trials = pd.concat(trials, ignore_index=True) 186 | save_df(trials, output_dir, suffix='trials') 187 | 188 | # Combine evokeds_dfs and save 189 | evokeds_df = pd.concat(evokeds_dfs, ignore_index=True) 190 | save_df(evokeds_df, output_dir, suffix='ave') 191 | 192 | # Compute grand averaged ERPs and save 193 | grands = compute_grands(evokeds) 194 | grands_df = compute_grands_df(evokeds_df) 195 | save_evokeds( 196 | grands, grands_df, output_dir, participant_id='grand', to_df=to_df) 197 | 198 | # Update config with participant-specific inputs... 199 | config['raw_files'] = raw_files 200 | config['bad_channels'] = bad_channels 201 | config['besa_files'] = besa_files 202 | config['skip_log_rows'] = skip_log_rows 203 | 204 | # ... and outputs that might have been created along the way 205 | config['log_files'] = [p_config['log_file'] for p_config in configs] 206 | auto_keys = ['auto_bad_channels', 'auto_missing_epochs', 207 | 'auto_rejected_epochs', 'auto_rejected_epochs_before_ride', 208 | 'auto_ica_n_components', 'auto_ica_bad_components'] 209 | for key in auto_keys: 210 | if any(key in p_config and p_config[key] is not None 211 | for p_config in configs): 212 | config[key] = {p_id: p_config[key] for p_id, p_config 213 | in zip(participant_ids, configs)} 214 | 215 | # Save config 216 | config['package_versions'] = package_versions() 217 | save_config(config, output_dir) 218 | 219 | # Define standard returns 220 | returns = [trials, evokeds_df, config] 221 | 222 | # Cluster based permutation tests for ERPs 223 | if perm_contrasts != []: 224 | cluster_df = compute_perm(evokeds, perm_contrasts, perm_tmin, 225 | perm_tmax, perm_channels, n_jobs) 226 | save_df(cluster_df, output_dir, suffix='clusters') 227 | returns.append(cluster_df) 228 | 229 | # Combine time-frequency results 230 | if perform_tfr: 231 | 232 | # Sort outputs into seperate lists 233 | tfr_evokeds, tfr_evokeds_dfs = list(map(list, zip(*res)))[4:6] 234 | 235 | # Combine evokeds_df for power and save 236 | tfr_evokeds_df = pd.concat(tfr_evokeds_dfs, ignore_index=True) 237 | save_df(tfr_evokeds_df, output_dir, 238 | suffix='tfr_ave') 239 | returns.append(tfr_evokeds_df) 240 | 241 | # Compute grand averaged power and save 242 | tfr_grands = compute_grands(tfr_evokeds) 243 | tfr_grands_df = compute_grands_df(tfr_evokeds_df) 244 | save_evokeds(tfr_grands, tfr_grands_df, output_dir, 245 | participant_id='tfr_grand', to_df=to_df) 246 | 247 | # Cluster based permutation tests for TFR 248 | if perm_contrasts != []: 249 | tfr_cluster_df = compute_perm_tfr( 250 | tfr_evokeds, perm_contrasts, perm_tmin, perm_tmax, 251 | perm_channels, perm_fmin, perm_fmax, n_jobs) 252 | save_df(tfr_cluster_df, output_dir, suffix='tfr_clusters') 253 | returns.append(tfr_cluster_df) 254 | 255 | return returns 256 | -------------------------------------------------------------------------------- /pipeline/io.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | from glob import glob 4 | from os import makedirs, path 5 | from pathlib import Path 6 | from platform import python_version 7 | 8 | import pandas as pd 9 | from mne import Evoked 10 | from mne import __version__ as mne_version 11 | from mne import write_evokeds 12 | from mne.channels.layout import _find_topomap_coords 13 | from mne.io import concatenate_raws, read_raw 14 | from mne.time_frequency import AverageTFR, write_tfrs 15 | from numpy import __version__ as numpy_version 16 | from pandas import __version__ as pandas_version 17 | from pandas.api.types import is_list_like 18 | from sklearn import __version__ as sk_version 19 | 20 | from ._version import version as pipeline_version 21 | 22 | 23 | def read_eeg(raw_file_or_files): 24 | """Reads one or more raw EEG datasets from the same participant.""" 25 | 26 | # Read raw datasets and combine if a list was provided 27 | if is_list_like(raw_file_or_files): 28 | raw_files = raw_file_or_files 29 | print(f'\n=== Reading and combining raw data from {raw_files} ===') 30 | raw_list = [read_raw(f, preload=True) for f in raw_files] 31 | raw = concatenate_raws(raw_list) 32 | participant_id = get_participant_id(raw_files) 33 | 34 | # Read raw dataset if only a single one was provided 35 | else: 36 | raw_file = raw_file_or_files 37 | print(f'\n=== Reading raw data from {raw_file} ===') 38 | raw = read_raw(raw_file, preload=True) 39 | participant_id = get_participant_id(raw_file) 40 | 41 | return raw, participant_id 42 | 43 | 44 | def get_participant_id(raw_file_or_files): 45 | """Extracts the basename of an input file to use as participant ID.""" 46 | 47 | # Extract participant ID from raw data file name(s) 48 | if is_list_like(raw_file_or_files): 49 | raw_files = raw_file_or_files 50 | participant_id = [path.basename(f).split('.')[0] for f in raw_files] 51 | participant_id = '_'.join(participant_id) 52 | else: 53 | raw_file = raw_file_or_files 54 | participant_id = path.basename(raw_file).split('.')[0] 55 | 56 | return participant_id 57 | 58 | 59 | def files_from_dir(dir_path, extensions, natsort_files=True): 60 | """Retrieves files matching pattern(s) from a given parent directory.""" 61 | 62 | # Find all files with one of the right extensions 63 | assert path.isdir(dir_path), f'Didn\'t find directory `{dir_path}`!' 64 | files = [] 65 | for extension in extensions: 66 | files += glob(f'{dir_path}/*{extension}') 67 | 68 | # For BrainVision files, make sure to only return the header (`.vhdr`) file 69 | if any('.vhdr' in f for f in files): 70 | files = [f for f in files if '.eeg' not in f and '.vmrk' not in f] 71 | 72 | # Sort naturally because some files might not have leading zeros 73 | if natsort_files: 74 | files = sorted(files, key=natsort) 75 | 76 | return files 77 | 78 | 79 | # `readers` changed to `_get_supported` in MNE 1.6.0, we currently support both 80 | try: 81 | from mne.io._read_raw import readers 82 | except ImportError: 83 | from mne.io._read_raw import _get_supported 84 | readers = _get_supported() 85 | 86 | eeg_extensions = list(readers.keys()) 87 | log_extensions = ['.csv', '.tsv', '.txt'] 88 | besa_extensions = ['.matrix'] 89 | 90 | 91 | def natsort(s): 92 | """Natural-sort list of strings to handle (non-)leading zeros.""" 93 | 94 | return [int(t) if t.isdigit() else t.lower() for t in re.split('(\d+)', s)] 95 | 96 | 97 | def convert_participant_input(input, participant_ids): 98 | """Converts different inputs (e.g., dict) into a per-participant list.""" 99 | 100 | # If it's a dict, convert to list 101 | if isinstance(input, dict): 102 | participant_dict = {id: None for id in participant_ids} 103 | for id, values in input.items(): 104 | assert id in participant_ids, \ 105 | f'Participant ID {id} is not in raw_files' 106 | values = values if is_list_like(values) else [values] 107 | participant_dict[id] = values 108 | return list(participant_dict.values()) 109 | 110 | # If it's a list of list, it must have the same length as participant_ids 111 | elif is_nested_list(input): 112 | assert len(input) == len(participant_ids), \ 113 | 'Input lists must have the same length' 114 | return input 115 | 116 | # Otherwise all participants get the same values 117 | else: 118 | return [input] * len(participant_ids) 119 | 120 | 121 | def is_nested_list(input): 122 | """Checks if a list is nested, i.e., contains at least one other list.""" 123 | 124 | # Check if there is any list in the list 125 | if is_list_like(input): 126 | return any(is_list_like(elem) for elem in input) 127 | else: 128 | return False 129 | 130 | 131 | def save_clean(raw, output_dir, participant_id=''): 132 | """Saves cleaned (continuous) EEG data in `.fif` format.""" 133 | 134 | # Re-format participant ID for filename 135 | participant_id_ = '' if participant_id == '' else f'{participant_id}_' 136 | suffix = 'cleaned_eeg' 137 | 138 | # Create output folder and save 139 | makedirs(output_dir, exist_ok=True) 140 | fname = f'{output_dir}/{participant_id_}{suffix}.fif' 141 | raw.save(fname, overwrite=True) 142 | 143 | 144 | def save_df(df, output_dir, participant_id='', suffix=''): 145 | """Saves pd.DataFrame in `.csv` format.""" 146 | 147 | # Create output folder 148 | makedirs(output_dir, exist_ok=True) 149 | 150 | # Re-format participant ID and suffix for filename 151 | participant_id_ = '' if participant_id == '' else f'{participant_id}_' 152 | suffix = '' if suffix == '' else suffix 153 | 154 | # Save DataFrame 155 | fname = f'{output_dir}/{participant_id_}{suffix}.csv' 156 | df.to_csv( 157 | fname, na_rep='NA', float_format='%.4f', index=False) 158 | 159 | 160 | def save_epochs(epochs, output_dir, participant_id='', to_df=True): 161 | """Saves mne.Epochs with metadata in `.fif` and/or `.csv` format.""" 162 | 163 | # Create output folder 164 | makedirs(output_dir, exist_ok=True) 165 | 166 | # Re-format participant ID for filename 167 | participant_id_ = '' if participant_id == '' else f'{participant_id}_' 168 | suffix = 'epo' 169 | 170 | # Convert to DataFrame 171 | if to_df is True or to_df == 'both': 172 | scalings = {'eeg': 1e6, 'misc': 1e6} 173 | epochs_df = epochs.to_data_frame(scalings=scalings, time_format=None) 174 | epochs_df = epochs_df.rename(columns={'condition': 'event_id'}) 175 | 176 | # Add metadata from log file 177 | metadata_df = epochs.metadata.copy() 178 | metadata_df = metadata_df.drop([col for col in metadata_df.columns 179 | if col in epochs_df.columns], axis=1) 180 | n_samples = len(epochs.times) 181 | metadata_df = metadata_df.loc[metadata_df.index.repeat(n_samples)] 182 | metadata_df = metadata_df.reset_index(drop=True) 183 | epochs_df = pd.concat([metadata_df, epochs_df], axis=1) 184 | 185 | # Save DataFrame 186 | save_df(epochs_df, output_dir, participant_id, suffix) 187 | 188 | # Save as MNE object 189 | if to_df is False or to_df == 'both': 190 | fname = f'{output_dir}/{participant_id_}{suffix}.fif' 191 | epochs.save(fname, overwrite=True) 192 | 193 | 194 | def save_evokeds( 195 | evokeds, evokeds_df, output_dir, participant_id='', to_df=True): 196 | """Saves a list of mne.Evokeds in `.fif` and/or `.csv` format.""" 197 | 198 | # Re-format participant ID for filename 199 | participant_id_ = '' if participant_id == '' else f'{participant_id}_' 200 | suffix = 'ave' 201 | 202 | # Create output directory 203 | makedirs(output_dir, exist_ok=True) 204 | 205 | # Save evokeds as DataFrame 206 | if to_df is True or to_df == 'both': 207 | save_df(evokeds_df, output_dir, participant_id, suffix) 208 | 209 | # Save evokeds as MNE object 210 | if to_df is False or to_df == 'both': 211 | 212 | # Save evokeds for ERPs 213 | if isinstance(evokeds[0], Evoked): 214 | fname = f'{output_dir}/{participant_id_}{suffix}.fif' 215 | write_evokeds(fname, evokeds, overwrite=True, verbose=False) 216 | 217 | # Save vokeds for TFR 218 | elif isinstance(evokeds[0], AverageTFR): 219 | fname = f'{output_dir}/{participant_id_}{suffix}.h5' 220 | write_tfrs(fname, evokeds, overwrite=True, verbose=False) 221 | 222 | 223 | def save_montage(epochs, output_dir): 224 | """Saves channel locations in `.csv` format.""" 225 | 226 | # Create output directory 227 | makedirs(output_dir, exist_ok=True) 228 | 229 | # Get locations of EEG channels 230 | chs = epochs.copy().pick_types(eeg=True).info['chs'] 231 | coords = [ch['loc'][:3] for ch in chs] 232 | coords_df = pd.DataFrame( 233 | columns=['cart_x', 'cart_y', 'cart_z'], data=coords) 234 | 235 | # Add channel names 236 | ch_names = [ch['ch_name'] for ch in chs] 237 | coords_df.insert(loc=0, column='channel', value=ch_names) 238 | 239 | # Add 2D flattened coordinates 240 | # Multiplied to mm scale (with head radius =~ 95 mm as in R-eegUtils) 241 | coords_df[['x', 'y']] = \ 242 | _find_topomap_coords(epochs.info, ch_names, ignore_overlap=True) * 947 243 | 244 | # Save 245 | save_df(coords_df, output_dir, suffix='channel_locations') 246 | 247 | 248 | def save_config(config, output_dir): 249 | """Saves dict of pipeline config options in `.json` format.""" 250 | 251 | # Create output directory 252 | makedirs(output_dir, exist_ok=True) 253 | 254 | # Save 255 | fname = f'{output_dir}/config.json' 256 | with open(fname, 'w') as f: 257 | json.dump(stringify(config), f, indent=4) 258 | 259 | 260 | def stringify(inst, types=None): 261 | """Recursively converts list/dict entries from other types to str.""" 262 | 263 | if types is None: 264 | types = (Path, range) 265 | 266 | if is_list_like(inst): 267 | for ix, elem in enumerate(inst): 268 | if isinstance(elem, types): 269 | inst[ix] = str(elem) 270 | elif is_list_like(elem): 271 | inst[ix] = stringify(elem) 272 | elif isinstance(elem, dict): 273 | for key, value in elem.items(): 274 | inst[ix][key] = stringify(value) 275 | 276 | if isinstance(inst, dict): 277 | for key, value in inst.items(): 278 | if isinstance(value, types): 279 | inst[key] = str(value) 280 | else: 281 | stringify(value) 282 | 283 | return inst 284 | 285 | 286 | def save_report(report, output_dir, participant_id): 287 | """Saves HTML report.""" 288 | 289 | # Create output directory 290 | makedirs(output_dir, exist_ok=True) 291 | 292 | # Save 293 | fname = f'{output_dir}/{participant_id}_report.html' 294 | print(f'Saving HTML report to {fname}') 295 | _ = report.save(fname, open_browser=False, overwrite=True) 296 | 297 | 298 | def package_versions(): 299 | """Returns pipeline version and important dependency package versions.""" 300 | 301 | return ({'python': python_version(), 302 | 'pipeline': pipeline_version, 303 | 'mne': mne_version, 304 | 'numpy': numpy_version, 305 | 'pandas': pandas_version, 306 | 'scikit-learn': sk_version}) 307 | -------------------------------------------------------------------------------- /pipeline/participant.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from mne import Epochs 4 | from mne.time_frequency import tfr_morlet 5 | 6 | from .averaging import compute_evokeds 7 | from .epoching import (compute_single_trials, get_bad_channels, get_bad_epochs, 8 | get_events, match_log_to_epochs, read_log, 9 | update_skip_log_rows) 10 | from .io import (read_eeg, save_clean, save_df, save_epochs, save_evokeds, 11 | save_montage, save_report) 12 | from .preprocessing import (add_heog_veog, apply_montage, correct_besa, 13 | correct_ica, interpolate_bad_channels) 14 | from .report import create_report 15 | from .ride import correct_ride 16 | from .tfr import compute_single_trials_tfr, subtract_evoked 17 | 18 | 19 | def participant_pipeline( 20 | raw_file, 21 | log_file, 22 | besa_file=None, 23 | bad_channels=None, 24 | auto_bad_channels=None, 25 | skip_log_rows=None, 26 | skip_log_conditions=None, 27 | downsample_sfreq=None, 28 | veog_channels='auto', 29 | heog_channels='auto', 30 | montage='easycap-M1', 31 | ref_channels='average', 32 | ica_method=None, 33 | ica_n_components=None, 34 | highpass_freq=0.1, 35 | lowpass_freq=40.0, 36 | triggers=None, 37 | triggers_column=None, 38 | epochs_tmin=-0.5, 39 | epochs_tmax=1.5, 40 | baseline=(-0.2, 0.0), 41 | reject_peak_to_peak=200.0, 42 | perform_ride=False, 43 | ride_condition_column=None, 44 | ride_rt_column='RT', 45 | ride_s_twd=(0.0, 0.6), 46 | ride_r_twd=(-0.3, 0.3), 47 | ride_epochs_tmin_after_ride=None, 48 | ride_epochs_tmax_after_ride=None, 49 | ride_reject_peak_to_peak=None, 50 | components={'name': [], 'tmin': [], 'tmax': [], 'roi': []}, 51 | average_by=None, 52 | perform_tfr=False, 53 | tfr_subtract_evoked=False, 54 | tfr_freqs=np.linspace(4.0, 40.0, num=37), 55 | tfr_cycles=np.linspace(2.0, 20.0, num=37), 56 | tfr_mode='percent', 57 | tfr_baseline=(-0.45, -0.05), 58 | tfr_components={ 59 | 'name': [], 'tmin': [], 'tmax': [], 'fmin': [], 'fmax': [], 'roi': []}, 60 | clean_dir=None, 61 | epochs_dir=None, 62 | trials_dir=None, 63 | evokeds_dir=None, 64 | chanlocs_dir=None, 65 | tfr_dir=None, 66 | report_dir=None, 67 | to_df=True, 68 | ): 69 | """Process EEG data for a single participant. 70 | 71 | Performs preprocessing and computes single trial mean amplitudes for ERP 72 | components of interest as well as averaged waveforms. 73 | 74 | Parameters & returns 75 | -------------------- 76 | See `Usage <../usage.html>`_ for the pipeline input arguments and outputs. 77 | """ 78 | 79 | # Backup input arguments for re-use 80 | config = locals() 81 | 82 | # Read raw data 83 | raw, participant_id = read_eeg(raw_file) 84 | 85 | # Create backup of the raw data for the HTML report 86 | if report_dir is not None: 87 | dirty = raw.copy() 88 | 89 | # Downsample 90 | if downsample_sfreq is not None: 91 | sfreq = raw.info['sfreq'] 92 | downsample_sfreq = float(downsample_sfreq) 93 | print(f'Downsampling from {sfreq} Hz to {downsample_sfreq} Hz') 94 | raw.resample(downsample_sfreq) 95 | 96 | # Add EOG channels 97 | raw = add_heog_veog(raw, veog_channels, heog_channels) 98 | 99 | # Apply custom or standard montage 100 | apply_montage(raw, montage) 101 | 102 | # Handle any bad channels 103 | raw, interpolated_channels = interpolate_bad_channels( 104 | raw, bad_channels, auto_bad_channels) 105 | 106 | # Re-reference to a set of channels or the average 107 | _ = raw.set_eeg_reference(ref_channels) 108 | 109 | # Do ocular correction with BESA and/or ICA 110 | if besa_file is not None: 111 | raw = correct_besa(raw, besa_file) 112 | if ica_method is not None: 113 | raw, ica = correct_ica(raw, ica_method, ica_n_components) 114 | else: 115 | ica = None 116 | 117 | # Filtering 118 | filt = raw.copy().filter(highpass_freq, lowpass_freq, n_jobs=1, picks='eeg') 119 | 120 | # Determine events and the corresponding (selection of) triggers 121 | events, event_id = get_events(filt, triggers) 122 | 123 | # Epoching including baseline correction 124 | if baseline is not None: 125 | baseline = tuple(baseline) 126 | epochs = Epochs(filt, events, event_id, epochs_tmin, epochs_tmax, baseline, 127 | preload=True, on_missing='warn') 128 | 129 | # Automatically detect bad channels and interpolate if necessary 130 | if bad_channels == 'auto' and auto_bad_channels is None: 131 | auto_bad_channels = get_bad_channels(epochs) 132 | config['auto_bad_channels'] = auto_bad_channels 133 | if auto_bad_channels != []: 134 | print('Restarting with interpolation of bad channels') 135 | return participant_pipeline(**config) 136 | 137 | # Add bad ICA components to config 138 | if ica is not None: 139 | if ica_n_components is None or ica_n_components < 1.0: 140 | config['auto_ica_n_components'] = int(ica.n_components_) 141 | config['auto_ica_bad_components'] = [int(x) for x in ica.exclude] 142 | 143 | # Drop the last sample to produce a nice even number 144 | _ = epochs.crop(tmin=None, tmax=epochs_tmax, include_tmax=False) 145 | print(epochs.__str__().replace(u"\u2013", "-")) 146 | 147 | # Read behavioral log file and match to the epochs 148 | skip_log_rows = update_skip_log_rows(skip_log_rows, epochs) 149 | log = read_log(log_file, skip_log_rows, skip_log_conditions) 150 | if triggers_column is not None: 151 | log, missing_ixs = match_log_to_epochs(epochs, log, triggers_column) 152 | config['auto_missing_epochs'] = missing_ixs 153 | epochs.metadata = log 154 | epochs.metadata.insert(0, column='participant_id', value=participant_id) 155 | 156 | # If log file was provided as a DataFrame, convert to dict for config 157 | if isinstance(log_file, pd.DataFrame): 158 | log_file = log_file.astype(object) # Convert NaNs to null for JSON 159 | log_file = log_file.where(pd.notnull(log_file), None) 160 | config['log_file'] = log_file.to_dict(orient='list') 161 | 162 | # Get indices of bad epochs 163 | bad_ixs = get_bad_epochs(epochs, reject_peak_to_peak) 164 | config['auto_rejected_epochs'] = bad_ixs 165 | 166 | # Perform RIDE to correct speech artifacts 167 | if perform_ride: 168 | epochs, ride_results_conditions = \ 169 | correct_ride(epochs, bad_ixs, ride_condition_column, 170 | ride_rt_column, ride_s_twd, ride_r_twd) 171 | epochs.crop(ride_epochs_tmin_after_ride, ride_epochs_tmax_after_ride) 172 | config['auto_rejected_epochs_before_ride'] = bad_ixs 173 | bad_ixs = get_bad_epochs(epochs, ride_reject_peak_to_peak) 174 | config['auto_rejected_epochs'] = bad_ixs 175 | else: 176 | ride_results_conditions = None 177 | 178 | # Compute single trial mean ERP amplitudes and add to metadata 179 | trials = compute_single_trials(epochs, components, bad_ixs) 180 | 181 | # Compute evokeds 182 | evokeds, evokeds_df = compute_evokeds( 183 | epochs, average_by, bad_ixs, participant_id) 184 | 185 | # Save cleaned continuous data 186 | if clean_dir is not None: 187 | save_clean(filt, clean_dir, participant_id) 188 | 189 | # Save channel locations 190 | if chanlocs_dir is not None: 191 | save_montage(epochs, chanlocs_dir) 192 | 193 | # Save epochs as data frame and/or MNE object 194 | if epochs_dir is not None: 195 | save_epochs(epochs, epochs_dir, participant_id, to_df) 196 | 197 | # Save evokeds as data frame and/or MNE object 198 | if evokeds_dir is not None: 199 | save_evokeds(evokeds, evokeds_df, evokeds_dir, participant_id, to_df) 200 | 201 | # Create and save HTML report 202 | if report_dir is not None: 203 | dirty.info['bads'] = interpolated_channels 204 | report = create_report(participant_id, dirty, ica, filt, events, 205 | event_id, epochs, ride_results_conditions, 206 | evokeds) 207 | save_report(report, report_dir, participant_id) 208 | 209 | # Time-frequency analysis 210 | if perform_tfr: 211 | 212 | # Epoching again without filtering 213 | epochs_unfilt = Epochs(raw, events, event_id, epochs_tmin, epochs_tmax, 214 | baseline, preload=True, on_missing='warn', 215 | verbose=False) 216 | 217 | # Drop the last sample to produce a nice even number 218 | _ = epochs_unfilt.crop(tmin=None, tmax=epochs_tmax, include_tmax=False) 219 | 220 | # Copy original metadata 221 | epochs_unfilt.metadata = epochs.metadata.copy() 222 | 223 | # Optionally subtract evoked activity 224 | # See, e.g., https://doi.org/10.1016/j.neuroimage.2006.02.034 225 | if tfr_subtract_evoked: 226 | epochs_unfilt = subtract_evoked(epochs_unfilt, average_by, evokeds) 227 | 228 | # Morlet wavelet convolution 229 | print('Doing time-frequency transform with Morlet wavelets') 230 | tfr = tfr_morlet(epochs_unfilt, tfr_freqs, tfr_cycles, use_fft=True, 231 | return_itc=False, n_jobs=1, average=False) 232 | 233 | # First, divisive baseline correction using the full epoch 234 | # See https://doi.org/10.3389/fpsyg.2011.00236 235 | if tfr_mode is not None: 236 | tfr_modes = \ 237 | ['ratio', 'logratio', 'percent', 'zscore', 'zlogratio'] 238 | assert tfr_mode in tfr_modes, \ 239 | f'`tfr_baseline_mode` must be one of {tfr_modes}' 240 | tfr.apply_baseline(baseline=(None, None), mode=tfr_mode) 241 | 242 | # Second, additive baseline correction using the prestimulus interval 243 | if tfr_baseline is not None: 244 | tfr_baseline = tuple(tfr_baseline) 245 | tfr.apply_baseline(baseline=tfr_baseline, mode='mean') 246 | 247 | # Reduce numerical precision to reduce object size 248 | tfr.data = np.float32(tfr.data) 249 | 250 | # Add single trial mean power to metadata 251 | trials = compute_single_trials_tfr(tfr, tfr_components, bad_ixs) 252 | 253 | # Save single trial data (again) 254 | if trials_dir is not None: 255 | save_df(trials, trials_dir, participant_id, suffix='trials') 256 | 257 | # Compute evoked power 258 | tfr_evokeds, tfr_evokeds_df = compute_evokeds( 259 | tfr, average_by, bad_ixs, participant_id) 260 | 261 | # Save evoked power 262 | if tfr_dir is not None: 263 | save_evokeds( 264 | tfr_evokeds, tfr_evokeds_df, tfr_dir, participant_id, to_df) 265 | 266 | return trials, evokeds, evokeds_df, config, tfr_evokeds, tfr_evokeds_df 267 | 268 | return trials, evokeds, evokeds_df, config 269 | -------------------------------------------------------------------------------- /pipeline/perm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from mne.channels import find_ch_adjacency 4 | from mne.stats import combine_adjacency, permutation_cluster_1samp_test 5 | 6 | 7 | def compute_perm(evokeds_per_participant, contrasts, tmin=0.0, tmax=1.0, 8 | channels=None, n_jobs=1, n_permutations=5001, seed=1234): 9 | """Performs a cluster based permutation test for a given contrast""" 10 | 11 | # Extract one example evoked for reading data dimensions 12 | example_evoked = evokeds_per_participant[0][0].copy() 13 | 14 | # Get relevant time samples 15 | times = example_evoked.times 16 | if tmin is not None: 17 | times = [t for t in times if t >= tmin] 18 | if tmax is not None: 19 | times = [t for t in times if t < tmax] 20 | 21 | # Get relevant channels 22 | if channels is None: 23 | channels = example_evoked.pick_types(eeg=True).ch_names 24 | else: 25 | assert all([ch in example_evoked.ch_names for ch in channels]), \ 26 | 'All channels in `perm_channels` must be present in the data!' 27 | 28 | # Get dimensions of data for the permutation test 29 | n_participants = len(evokeds_per_participant) 30 | n_times = len(times) 31 | n_channels = len(channels) 32 | 33 | # Prepare emtpy list for results 34 | cluster_dfs = [] 35 | 36 | # Sequentially handle each contrast 37 | for contrast in contrasts: 38 | 39 | # Prepare empty array 40 | X = np.zeros((n_participants, n_times, n_channels)) 41 | 42 | # Compute a difference wave for each participant 43 | for ix, evokeds in enumerate(evokeds_per_participant): 44 | 45 | # Extract evoked data for the two conditions of interest 46 | data_conditions = [] 47 | for condition in contrast: 48 | evoked = [ev for ev in evokeds if ev.comment == condition][0] 49 | evoked = evoked.copy().crop( 50 | tmin, tmax, include_tmax=False).pick_channels(channels) 51 | data_conditions.append(evoked.data) 52 | 53 | # Compute difference between conditions 54 | data_diff = data_conditions[0] - data_conditions[1] 55 | data_diff = data_diff.swapaxes(1, 0) # Time points, channels 56 | X[ix] = data_diff 57 | 58 | # Compute channel adjacency matrix 59 | ch_adjacency, _ = find_ch_adjacency(evoked.info, 'eeg') 60 | 61 | # Run permutation test 62 | t_obs, clusters, cluster_p_vals, H0 = permutation_cluster_1samp_test( 63 | X, n_permutations=n_permutations, adjacency=ch_adjacency, 64 | n_jobs=n_jobs, seed=seed) 65 | 66 | # Sort clusters by p values 67 | cluster_ranks = cluster_p_vals.argsort() 68 | cluster_p_vals = cluster_p_vals[cluster_ranks] 69 | clusters = [clusters[rank] for rank in cluster_ranks] 70 | 71 | # Create cluster images with cluster labels and p values 72 | labels = np.full_like(t_obs, 'NA', dtype=object) 73 | p_vals = np.ones_like(t_obs) 74 | pos_ix = 0 75 | neg_ix = 0 76 | for ix, cluster in enumerate(clusters): 77 | 78 | # Check if the cluster is positive or negative 79 | if t_obs[cluster][0] > 0: 80 | pos_ix += 1 81 | labels[cluster] = f'pos_{pos_ix}' 82 | else: 83 | neg_ix += 1 84 | labels[cluster] = f'neg_{neg_ix}' 85 | 86 | # Extract cluster level p value 87 | p_val = cluster_p_vals[ix] 88 | p_vals[cluster] = p_val 89 | 90 | # Prepare DataFrame for storing t values, cluster labels, and p values 91 | cluster_df = pd.DataFrame({ 92 | 'contrast': ' - '.join(contrast), 93 | 'time': np.repeat(times, repeats=n_channels), 94 | 'channel': np.tile(channels, reps=n_times)}) 95 | 96 | # Add t values, cluster labels, and p values 97 | arrs = [t_obs, labels, p_vals] 98 | stats = ['t_obs', 'cluster', 'p_val'] 99 | for arr, stat in zip(arrs, stats): 100 | 101 | # Convert array to long format 102 | # Initial array is has shape (times, channels) 103 | # New array is has shape (times * channels,) 104 | arr_long = arr.flatten() 105 | 106 | # Add to DataFrame 107 | cluster_df[stat] = arr_long 108 | 109 | # Append to the list of all contrasts 110 | cluster_dfs.append(cluster_df) 111 | 112 | # Combine DataFrames of all contrasts 113 | cluster_df = pd.concat(cluster_dfs, ignore_index=True) 114 | 115 | return cluster_df 116 | 117 | 118 | def compute_perm_tfr( 119 | evokeds_per_participant, contrasts, tmin=0.0, tmax=1.0, channels=None, 120 | fmin=None, fmax=None, n_jobs=1, n_permutations=5001, seed=1234): 121 | """Performs a cluster based permutation test on time-frequency data""" 122 | 123 | # Extract one example evoked for reading data dimensions 124 | example_evoked = evokeds_per_participant[0][0].copy() 125 | 126 | # Get relevant time samples 127 | times = example_evoked.times 128 | if tmin is not None: 129 | times = [t for t in times if t >= tmin] 130 | if tmax is not None: 131 | times = [t for t in times if t < tmax] 132 | 133 | # Get relevant frequencies 134 | freqs = example_evoked.freqs 135 | if fmin is not None: 136 | freqs = [f for f in freqs if f >= fmin] 137 | if fmax is not None: 138 | freqs = [f for f in freqs if f < fmax] 139 | 140 | # Get relevant channels 141 | if channels is None: 142 | channels = example_evoked.pick_types(eeg=True).ch_names 143 | else: 144 | assert all([ch in example_evoked.ch_names for ch in channels]), \ 145 | 'All channels in `perm_channels` must be present in the data!' 146 | 147 | # Get dimensions of data for the permutation test 148 | n_participants = len(evokeds_per_participant) 149 | n_times = len(times) 150 | n_freqs = len(freqs) 151 | n_channels = len(channels) 152 | 153 | # Prepare emtpy list for results 154 | cluster_dfs = [] 155 | 156 | # Sequentially handle each contrast 157 | for contrast in contrasts: 158 | 159 | # Prepare empty array 160 | X = np.zeros((n_participants, n_times, n_freqs, n_channels)) 161 | 162 | # Compute a difference wave for each participant 163 | for ix, evokeds in enumerate(evokeds_per_participant): 164 | 165 | # Extract evoked data for the two conditions of interest 166 | data_conditions = [] 167 | for condition in contrast: 168 | evoked = [ev for ev in evokeds if ev.comment == condition][0] 169 | evoked = evoked.copy().crop( 170 | tmin, tmax, fmin, fmax, include_tmax=False).pick_channels( 171 | channels) 172 | data_conditions.append(evoked.data) 173 | 174 | # Compute difference between conditions 175 | data_diff = data_conditions[0] - data_conditions[1] 176 | data_diff = data_diff.swapaxes(0, 2) # Times, freqs, channels 177 | X[ix] = data_diff 178 | 179 | # Compute frequency and channel adjacency matrix 180 | # Based on channel locations and a lattice matrix for frequencies 181 | ch_adjacency, _ = find_ch_adjacency(evoked.info, 'eeg') 182 | adjacency = combine_adjacency(n_freqs, ch_adjacency) 183 | 184 | # Run permutation test 185 | t_obs, clusters, cluster_p_vals, H0 = permutation_cluster_1samp_test( 186 | X, n_permutations=n_permutations, adjacency=adjacency, 187 | n_jobs=n_jobs, seed=seed) 188 | 189 | # Sort clusters by p values 190 | cluster_ranks = cluster_p_vals.argsort() 191 | cluster_p_vals = cluster_p_vals[cluster_ranks] 192 | clusters = [clusters[rank] for rank in cluster_ranks] 193 | 194 | # Create cluster images with cluster labels and p values 195 | labels = np.full_like(t_obs, 'NA', dtype=object) 196 | p_vals = np.ones_like(t_obs) 197 | pos_ix = 0 198 | neg_ix = 0 199 | for ix, cluster in enumerate(clusters): 200 | 201 | # Check if the cluster is positive or negative 202 | if t_obs[cluster][0] > 0: 203 | pos_ix += 1 204 | labels[cluster] = f'pos_{pos_ix}' 205 | else: 206 | neg_ix += 1 207 | labels[cluster] = f'neg_{neg_ix}' 208 | 209 | # Extract cluster level p value 210 | p_val = cluster_p_vals[ix] 211 | p_vals[cluster] = p_val 212 | 213 | # Prepare DataFrame for storing t values, cluster labels, and p values 214 | cluster_df = pd.DataFrame({ 215 | 'contrast': ' - '.join(contrast), 216 | 'time': np.repeat(times, n_channels * n_freqs), 217 | 'freq': np.repeat(np.tile(freqs, n_times), n_channels), 218 | 'channel': np.tile(channels, n_times * n_freqs)}) 219 | 220 | # Add t values, cluster labels, and p values 221 | arrs = [t_obs, labels, p_vals] 222 | stats = ['t_obs', 'cluster', 'p_val'] 223 | for arr, stat in zip(arrs, stats): 224 | 225 | # Convert array to long format 226 | # Initial array is has shape (times, channels) 227 | # New array is has shape (times * channels,) 228 | arr_long = arr.flatten() 229 | 230 | # Add to DataFrame 231 | cluster_df[stat] = arr_long 232 | 233 | # Append to the list of all contrasts 234 | cluster_dfs.append(cluster_df) 235 | 236 | # Combine DataFrames of all contrasts 237 | cluster_df = pd.concat(cluster_dfs, ignore_index=True) 238 | 239 | return cluster_df 240 | -------------------------------------------------------------------------------- /pipeline/preprocessing.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | from warnings import warn 3 | 4 | import pandas as pd 5 | from mne import set_bipolar_reference 6 | from mne.channels import make_standard_montage, read_custom_montage 7 | from mne.preprocessing import ICA 8 | 9 | 10 | def add_heog_veog(raw, veog_channels='auto', heog_channels='auto'): 11 | """Adds virtual VEOG and HEOG using default or non-default EOG names.""" 12 | 13 | # Add bipolar VEOG channel 14 | if veog_channels is not None: 15 | if veog_channels == 'auto': 16 | veog_channels = ['Fp1', 'FP1', 'Auge_u', 'IO1', 17 | 'VEOG_lower', 'VEOG_upper'] 18 | raw = add_eog(raw, veog_channels, new_name='VEOG') 19 | 20 | # Add bipolar HEOG channel 21 | if heog_channels is not None: 22 | if heog_channels == 'auto': 23 | heog_channels = ['F9', 'F10', 'Afp9', 'Afp10', 24 | 'HEOG_left', 'HEOG_right'] 25 | raw = add_eog(raw, heog_channels, new_name='HEOG') 26 | 27 | return raw 28 | 29 | 30 | def add_eog(raw, channels, new_name): 31 | """Computes a single bipolar EOG channel from a list of possible names.""" 32 | 33 | # Check that exactly two of the provided channels are in the data 34 | channels = [ch for ch in channels if ch in raw.ch_names] 35 | assert len(channels) == 2, ( 36 | 'Could not find exactly two channels for computing bipolar ' 37 | f'{new_name}. Please provide different channel names or choose `None`') 38 | 39 | # Compute bipolar EOG channel 40 | anode = channels[0] 41 | cathode = channels[1] 42 | print(f'Adding bipolar channel {new_name} ({anode} - {cathode})') 43 | raw = set_bipolar_reference( 44 | raw, anode, cathode, new_name, drop_refs=False, verbose=False) 45 | raw = raw.set_channel_types({new_name: 'eog'}) 46 | 47 | return raw 48 | 49 | 50 | def apply_montage(raw, montage): 51 | """Reads channel locations from custom file or standard montage.""" 52 | 53 | # Load custom montage from file 54 | if path.isfile(montage): 55 | print(f'Loading custom montage from {montage}') 56 | digmontage = read_custom_montage(montage) 57 | 58 | # Or load standard montage 59 | else: 60 | print(f'Loading standard montage {montage}') 61 | digmontage = make_standard_montage(montage) 62 | 63 | # Make sure that EOG channels are of the `eog` type 64 | eog_channels = ['HEOG', 'VEOG', 'IO1', 'IO2', 'Afp9', 'Afp10', 'Auge_u', 65 | 'VEOG_upper', 'VEOG_lower', 'HEOG_left', 'HEOG_right'] 66 | for ch_name in eog_channels: 67 | if ch_name in raw.ch_names: 68 | raw.set_channel_types({ch_name: 'eog'}) 69 | 70 | # Make sure that mastoid channels are of the `misc` type 71 | misc_channels = ['A1', 'A2', 'M1', 'M2', 'audio', 'sound', 'pulse'] 72 | for ch_name in misc_channels: 73 | if ch_name in raw.ch_names: 74 | raw.set_channel_types({ch_name: 'misc'}) 75 | 76 | # Apply montage 77 | raw.set_montage(digmontage, match_case=False, on_missing='warn') 78 | 79 | 80 | def interpolate_bad_channels(raw, bad_channels=None, auto_bad_channels=None): 81 | """Interpolates any channels from the two lists.""" 82 | 83 | # Combine lists of bad channels 84 | all_bad_channels = [] 85 | if bad_channels is not None and bad_channels != 'auto': 86 | all_bad_channels += bad_channels 87 | if auto_bad_channels is not None: 88 | all_bad_channels += auto_bad_channels 89 | 90 | # Interpolate bad channels 91 | if all_bad_channels != []: 92 | raw.info['bads'] += all_bad_channels 93 | raw = raw.interpolate_bads() 94 | 95 | return raw, all_bad_channels 96 | 97 | 98 | def correct_ica(raw, method='fastica', n_components=None, random_seed=1234): 99 | """Corrects ocular artifacts using ICA and automatic component removal.""" 100 | 101 | # Convert number of components to integer 102 | if n_components is not None and n_components >= 1.0 \ 103 | and not isinstance(n_components, int): 104 | warn(f'Converting `ica_n_components` to integer: {n_components} -> ' + 105 | f'{int(n_components)}') 106 | n_components = int(n_components) 107 | 108 | # Run ICA on a copy of the data 109 | raw_filt_ica = raw.copy() 110 | raw_filt_ica.load_data().filter(l_freq=1, h_freq=None, verbose=False) 111 | ica = ICA( 112 | n_components, random_state=random_seed, method=method, max_iter='auto') 113 | ica.fit(raw_filt_ica) 114 | 115 | # Remove bad components from the raw data 116 | eog_indices, _ = ica.find_bads_eog( 117 | raw, ch_name=['HEOG', 'VEOG'], verbose=False) 118 | ica.exclude = eog_indices 119 | raw = ica.apply(raw) 120 | 121 | return raw, ica 122 | 123 | 124 | def correct_besa(raw, besa_file): 125 | """Corrects ocular artifacts using a pre-computed MSEC (BESA) matrix.""" 126 | 127 | # Read BESA matrix 128 | print(f'Doing ocular correction with MSEC (BESA)') 129 | besa_matrix = pd.read_csv(besa_file, delimiter='\t', index_col=0) 130 | 131 | # Get EEG channel labels that are present in the data 132 | eeg_channels = raw.copy().pick_types(eeg=True).ch_names 133 | 134 | # Convert EEG channel labels to uppercase 135 | eeg_upper = pd.Series(eeg_channels).str.upper().values 136 | 137 | # Also convert BESA matrix labels to uppercase 138 | besa_matrix.index = besa_matrix.index.str.upper() 139 | besa_matrix.columns = besa_matrix.columns.str.upper() 140 | 141 | # Match so that the BESA matrix only contains channels that are in the data 142 | row_channels = [ch for ch in besa_matrix.index if ch in eeg_upper] 143 | col_channels = [ch for ch in besa_matrix.columns if ch in eeg_upper] 144 | besa_matrix = besa_matrix.reindex(index=row_channels, columns=col_channels) 145 | 146 | # Apply BESA matrix to the data 147 | eeg_data, _ = raw[eeg_channels] 148 | eeg_data = besa_matrix.values.dot(eeg_data) 149 | raw[eeg_channels] = eeg_data 150 | 151 | return raw 152 | -------------------------------------------------------------------------------- /pipeline/report.py: -------------------------------------------------------------------------------- 1 | import matplotlib as mpl 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | from mne import Report, set_log_level 5 | 6 | 7 | def create_report(participant_id, raw, ica, clean, events, event_id, epochs, 8 | ride_results_conditions, evokeds): 9 | """Creates a HTML report for the processing steps of one participant.""" 10 | 11 | # Disable warnings about number of open figures 12 | mpl.rcParams.update({'figure.max_open_warning': 0}) 13 | plt.ioff() 14 | 15 | # Initialize HTML report 16 | print('Creating HTML report') 17 | set_log_level('ERROR') 18 | report = Report(title=f'Report for {participant_id}', verbose=False) 19 | 20 | # Add raw data info 21 | report.add_raw(raw, title='Raw data', butterfly=False) 22 | 23 | # Add raw time series plots 24 | n_figs = 10 25 | raw_figs = plot_time_series(raw, n_figs) 26 | captions = [f'Segment {i + 1} of {n_figs}' for i in range(n_figs)] 27 | report.add_figure( 28 | raw_figs, title='Raw time series', caption=captions, tags=('raw',)) 29 | 30 | # Add ICA 31 | if ica is not None: 32 | raw.info['bads'] = [] # Else plotting ICA fails 33 | report.add_ica(ica, title='ICA', inst=raw) 34 | 35 | # Add cleaned data info 36 | report.add_raw( 37 | clean, title='Cleaned data', butterfly=False, tags=('clean',)) 38 | 39 | # Add cleaned time series plots 40 | clean_figs = plot_time_series(clean, n_figs) 41 | report.add_figure(clean_figs, title='Cleaned time series', 42 | caption=captions, tags=('clean',)) 43 | 44 | # Add events 45 | sfreq = clean.info['sfreq'] 46 | report.add_events( 47 | events, title='Event triggers', event_id=event_id, sfreq=sfreq) 48 | 49 | # Add epochs 50 | report.add_epochs(epochs, title='Epochs') 51 | 52 | # Add RIDE results 53 | if ride_results_conditions is not None: 54 | for condition, ride_results in ride_results_conditions.items(): 55 | fig = ride_results.plot() 56 | _ = report.add_figure(fig, 57 | title=f'Condition "{condition}"', 58 | section='RIDE correction', tags=('ride',)) 59 | 60 | # Add evokeds 61 | report.add_evokeds(evokeds) # Automatically uses comments as titles 62 | set_log_level('INFO') 63 | 64 | return report 65 | 66 | 67 | def plot_time_series(raw, n=10, duration=10.): 68 | """Plots some seconds of raw data for all channels at `n` time points.""" 69 | 70 | # Get evenly spaced starting time points 71 | starts = np.linspace(0, raw.times[-1], num=n, endpoint=False).astype('int') 72 | 73 | # Create an all-channel plot for each time window 74 | n_channels = len(raw.ch_names) 75 | figs = [raw.plot( 76 | start=start, 77 | duration=duration, 78 | n_channels=n_channels, 79 | bad_color='red', 80 | show=False, 81 | show_scrollbars=False, 82 | show_scalebars=False) 83 | for start in starts] 84 | 85 | return figs 86 | -------------------------------------------------------------------------------- /pipeline/ride.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ride import RideCfg, correct_trials, ride_call 3 | 4 | 5 | def correct_ride(epochs, bad_ixs, ride_condition_column, ride_rt_column, 6 | ride_s_twd, ride_r_twd): 7 | """Estimates speech artifacts using RIDE and subtracts them from epochs.""" 8 | 9 | assert ride_condition_column in epochs.metadata.columns, \ 10 | f'Column "{ride_condition_column}" not found in the log file' 11 | assert ride_rt_column in epochs.metadata.columns, \ 12 | f'Column "{ride_rt_column}" not found in the log file' 13 | 14 | # Prepare RIDE configuration 15 | comp_name = ['s', 'r'] 16 | comp_twd = [[x * 1000 for x in ride_s_twd], # Ride expects ms, not s 17 | [x * 1000 for x in ride_r_twd]] 18 | sfreq = epochs.info['sfreq'] 19 | epoch_twd = [epochs.tmin * 1000, epochs.tmax * 1000] 20 | re_samp = 1000 / sfreq 21 | prg = 0 22 | bl = np.abs(epochs.baseline[0]) * 1000 23 | 24 | # Perform RIDE correction separately for each condition 25 | conditions = epochs.metadata[ride_condition_column].unique() 26 | ride_results_conditions = {} 27 | epochs_corr = epochs.copy() 28 | for condition in conditions: 29 | 30 | # Select epochs of the current condition 31 | print(f'Performing RIDE correction for condition "{condition}"') 32 | is_condition = epochs.metadata[ride_condition_column] == condition 33 | condition_ixs = np.where(is_condition)[0] 34 | epochs_condition = epochs[condition_ixs].copy() 35 | 36 | # Exclude bad epochs 37 | condition_good_ixs = [ix for ix in condition_ixs if ix not in bad_ixs] 38 | epochs_condition_good = epochs[condition_good_ixs].copy() 39 | comp_latency = [0.0, 40 | epochs_condition_good.metadata[ride_rt_column].values] 41 | 42 | # Perform RIDE correction 43 | cfg = RideCfg(comp_name, comp_twd, comp_latency, sfreq, epoch_twd, 44 | re_samp=re_samp, prg=prg, bl=bl) 45 | ride_results = ride_call(epochs_condition_good, cfg) 46 | ride_results_conditions[condition] = ride_results 47 | 48 | # Subtract RIDE R component from all (good + bad) epochs 49 | rt = epochs_condition.metadata[ride_rt_column].values 50 | epochs_condition_corr = correct_trials(ride_results, epochs_condition, 51 | rt) 52 | epochs_corr._data[condition_ixs] = epochs_condition_corr._data 53 | 54 | return epochs_corr, ride_results_conditions 55 | -------------------------------------------------------------------------------- /pipeline/tfr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from mne import concatenate_epochs, set_log_level 4 | from pandas.api.types import is_list_like 5 | 6 | 7 | def subtract_evoked(epochs, average_by=None, evokeds=None): 8 | """Subtracts evoked activity (across or by conditions) from epochs.""" 9 | 10 | # If no columns were requested, subtract evoked activity across conditions 11 | set_log_level('ERROR') 12 | if average_by is None: 13 | print('Subtracting evoked activity') 14 | epochs = epochs.subtract_evoked() 15 | 16 | # Otherwise subtract seperately for all (combinations of) conditions 17 | else: 18 | print('Subtracting evoked activity per condition in `average_by`') 19 | epochs = subtract_evoked_conditions(epochs, average_by, evokeds) 20 | set_log_level('INFO') 21 | 22 | return epochs 23 | 24 | 25 | def subtract_evoked_conditions(epochs, average_by, evokeds): 26 | """Subtracts evoked activity (separately by conditions) from epochs.""" 27 | 28 | # Loop over epochs (painfully slow) 29 | epochs_subtracted = [] 30 | for ix, _ in enumerate(epochs): 31 | for query, evoked in zip(average_by.values(), evokeds): 32 | if len(epochs[ix][query]) > 0: 33 | epoch_subtracted = epochs[ix].subtract_evoked(evoked) 34 | epochs_subtracted.append(epoch_subtracted) 35 | 36 | return concatenate_epochs(epochs_subtracted) 37 | 38 | 39 | def compute_single_trials_tfr(epochs, components, bad_ixs=None): 40 | """Computes single trial power for a dict of multiple components.""" 41 | 42 | # Check that values in the dict are lists 43 | for key in ['name', 'tmin', 'tmax', 'roi']: 44 | if not is_list_like(components[key]): 45 | components[key] = [components[key]] 46 | 47 | # Loop over components 48 | components_df = pd.DataFrame(components) 49 | for _, component in components_df.iterrows(): 50 | 51 | # Comput single trial power 52 | compute_component_tfr( 53 | epochs, component['name'], component['tmin'], 54 | component['tmax'], component['fmin'], component['fmax'], 55 | component['roi'], bad_ixs) 56 | 57 | return epochs.metadata 58 | 59 | 60 | def compute_component_tfr( 61 | epochs, name, tmin, tmax, fmin, fmax, roi, bad_ixs=None): 62 | """Computes single trial power for a single component.""" 63 | 64 | # Check that requested region of interest channels are present in the data 65 | for ch in roi: 66 | assert ch in epochs.ch_names, f'ROI channel \'{ch}\' not in the data' 67 | 68 | # Select region, time window, and frequencies of interest 69 | print(f'Computing single trial power amplitudes for \'{name}\'') 70 | epochs_oi = epochs.copy().pick_channels(roi).crop(tmin, tmax, fmin, fmax) 71 | 72 | # Compute mean power per trial 73 | mean_power = epochs_oi.data.mean(axis=(1, 2, 3)) 74 | 75 | # Set power for bad epochs to NaN 76 | if bad_ixs is not None: 77 | if isinstance(bad_ixs, int): 78 | bad_ixs = [bad_ixs] 79 | mean_power[bad_ixs] = np.nan 80 | 81 | # Add as a new column to the original metadata 82 | epochs.metadata[name] = mean_power 83 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=45", "setuptools_scm>=6.2", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.setuptools_scm] 6 | local_scheme = "no-local-version" 7 | write_to = "pipeline/_version.py" 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as op 3 | 4 | import setuptools 5 | 6 | 7 | def package_tree(pkgroot): 8 | """Get the submodule list (adapted from MNE/VisPy).""" 9 | 10 | # Get all submodules based on their `__init__.py` 11 | path = op.dirname(__file__) 12 | subdirs = [op.relpath(i[0], path).replace(op.sep, '.') 13 | for i in os.walk(op.join(path, pkgroot)) 14 | if '__init__.py' in i[2]] 15 | 16 | return sorted(subdirs) 17 | 18 | 19 | if __name__ == "__main__": 20 | 21 | # Paste README as long description 22 | with open('README.md', 'r', encoding='utf-8') as fh: 23 | long_description = fh.read() 24 | 25 | # Actual setup 26 | setuptools.setup( 27 | name='hu-neuro-pipeline', 28 | author='Alexander Enge', 29 | author_email='alexander.enge@hu-berlin.de', 30 | description='Single trial EEG pipeline at the Abdel Rahman Lab for ' 31 | 'Neurocognitive Psychology, Humboldt-Universität zu Berlin', 32 | long_description=long_description, 33 | long_description_content_type='text/markdown', 34 | url='https://github.com/alexenge/hu-neuro-pipeline', 35 | project_urls={ 36 | 'Issue trackers': 'https://github.com/alexenge/hu-neuro-pipeline/issues', 37 | }, 38 | classifiers=[ 39 | 'Programming Language :: Python', 40 | 'Programming Language :: Python :: 3', 41 | 'License :: OSI Approved :: MIT License', 42 | 'Operating System :: OS Independent', 43 | 'Intended Audience :: Science/Research', 44 | 'Topic :: Scientific/Engineering' 45 | ], 46 | packages=package_tree('pipeline'), 47 | package_data={'pipeline.datasets': ['erpcore_manifest.csv', 48 | 'ucap_manifest.csv']}, 49 | install_requires=[ 50 | 'chardet', 51 | 'eeg-ride', 52 | 'joblib', 53 | 'matplotlib', 54 | 'mne>=0.24.0', 55 | 'pandas!=1.4.0', 56 | 'pooch>=1.5', 57 | 'scikit-learn' 58 | ], 59 | python_requires='>=3.8', 60 | ) 61 | --------------------------------------------------------------------------------