├── docs ├── requirements.txt ├── source │ ├── _static │ │ ├── favicon.ico │ │ ├── light-logo.svg │ │ └── dark-logo.svg │ ├── releases │ │ ├── si-0.100.7.rst │ │ ├── si-0.102.1.rst │ │ ├── 1.0.0.rst │ │ └── si-0.101.2.rst │ ├── releases.rst │ ├── index.rst │ ├── conf.py │ ├── troubleshooting.rst │ ├── introduction.rst │ ├── installation.rst │ ├── pipeline_steps.rst │ ├── deployments.rst │ ├── input_output.rst │ ├── customization.rst │ └── parameters.rst ├── make.bat └── Makefile ├── .codespellrc ├── .gitignore ├── environment ├── requirements.txt ├── Dockerfile_kilosort25 ├── Dockerfile_spykingcircus2 ├── Dockerfile_nwb ├── build_all.sh ├── Dockerfile_kilosort4 ├── Dockerfile_base └── push_all.sh ├── .readthedocs.yaml ├── .github └── workflows │ ├── codespell.yml │ ├── nextflow_test.config │ ├── test_pipeline.yml │ └── params_test.json ├── pipeline ├── capsule_versions.env ├── nextflow.config ├── slurm_submit.sh ├── nextflow_local.config ├── nextflow_slurm.config ├── default_params.json ├── main.nf └── main_multi_backend.nf ├── metadata └── metadata.yml ├── LICENSE ├── sample_dataset └── create_test_nwb.py ├── RELASE_NOTES.md ├── tests ├── test_pipeline_local.sh ├── test_pipeline_slurm.sh └── params_test.json ├── README.md └── pull_pipeline_images.sh /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | furo 3 | sphinx-design 4 | numpydoc 5 | sphinxcontrib-jquery -------------------------------------------------------------------------------- /docs/source/_static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenNeuralDynamics/aind-ephys-pipeline/HEAD/docs/source/_static/favicon.ico -------------------------------------------------------------------------------- /.codespellrc: -------------------------------------------------------------------------------- 1 | [codespell] 2 | # Ref: https://github.com/codespell-project/codespell#using-a-config-file 3 | skip = .git*,*.svg,.codespellrc 4 | check-hidden = true 5 | # ignore-regex = 6 | # ignore-words-list = 7 | -------------------------------------------------------------------------------- /docs/source/releases/si-0.100.7.rst: -------------------------------------------------------------------------------- 1 | .. _si-0.100.7: 2 | 3 | si-0.100.7 - February 2024 4 | ========================== 5 | 6 | First pipeline release with SpikeInterface 0.100.7. 7 | 8 | Package versions 9 | ---------------- 10 | * ``spikeinterface``: 0.100.7 11 | * ``aind-data-schema``: 0.38.5 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Data, builds, results, and work directories 2 | /data/ 3 | /*data*/ 4 | /work/ 5 | /*work*/ 6 | **/work/ 7 | /results/ 8 | /*results*/ 9 | /pipeline/work/ 10 | **/.nextflow* 11 | /docs/build/ 12 | tests/.nfs* 13 | 14 | # User-specific files 15 | pipeline/nextflow_slurm_custom.config 16 | -------------------------------------------------------------------------------- /environment/requirements.txt: -------------------------------------------------------------------------------- 1 | spikeinterface==0.103.0 2 | pyvips==2.2.1 3 | torch==2.6.0 4 | pynwb==2.8.2 5 | hdbscan==0.8.40 6 | neuroconv==0.6.7 7 | wavpack-numcodecs==0.2.2 8 | hdmf-zarr==0.9.0 9 | s3fs==2025.3.0 10 | seaborn==0.13.2 11 | aind-data-schema==1.3.0 12 | aind-qcportal-schema==0.4.0 13 | aind-metadata-upgrader==0.0.24 14 | aind-nwb-utils==0.1.0 -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sphinx: 4 | # Path to your Sphinx configuration file. 5 | configuration: docs/source/conf.py 6 | 7 | build: 8 | os: ubuntu-22.04 9 | tools: 10 | python: "3.11" 11 | 12 | 13 | # Optionally declare the Python requirements required to build your docs 14 | python: 15 | install: 16 | - requirements: docs/requirements.txt 17 | -------------------------------------------------------------------------------- /docs/source/releases.rst: -------------------------------------------------------------------------------- 1 | .. _releases: 2 | 3 | Releases 4 | ======== 5 | 6 | The AIND Ephys Pipeline releases are tagged with the SpikeInterface version, 7 | since the pipeline is mainly built on top of SpikeInterface. 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | 12 | releases/1.0.0.rst 13 | releases/si-0.102.1.rst 14 | releases/si-0.101.2.rst 15 | releases/si-0.100.7.rst 16 | -------------------------------------------------------------------------------- /docs/source/releases/si-0.102.1.rst: -------------------------------------------------------------------------------- 1 | .. _si-0.102.1: 2 | 3 | si-0.102.1 - March 2025 4 | ======================= 5 | 6 | Package versions 7 | ---------------- 8 | * ``spikeinterface`` version: 0.102.1 9 | * ``aind-data-schema`` version: 1.3.0 10 | 11 | Bug fixes 12 | --------- 13 | * Fix NWB export with multi-shanks and multi-groups 14 | 15 | New features 16 | ------------ 17 | * Removed unit classifier process (moved to curation) 18 | * Centralized capsule versions 19 | * Added CI tests and automated deployments -------------------------------------------------------------------------------- /environment/Dockerfile_kilosort25: -------------------------------------------------------------------------------- 1 | FROM spikeinterface/kilosort2_5-compiled-base:latest 2 | 3 | ARG DEBIAN_FRONTEND=noninteractive 4 | 5 | # Copy requirements.txt into the container 6 | COPY requirements.txt . 7 | 8 | RUN pip install --upgrade pip && \ 9 | grep -E 'aind-data-schema' requirements.txt | xargs pip install 10 | 11 | # Extract the spikeinterface version and install with extras 12 | RUN pip install --upgrade pip && \ 13 | SPIKEINTERFACE_VERSION=$(grep '^spikeinterface==' requirements.txt | cut -d'=' -f3) && \ 14 | pip install "spikeinterface[full]==${SPIKEINTERFACE_VERSION}" 15 | -------------------------------------------------------------------------------- /environment/Dockerfile_spykingcircus2: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3:23.9.0-0 2 | 3 | ARG DEBIAN_FRONTEND=noninteractive 4 | 5 | # Copy requirements.txt into the container 6 | COPY requirements.txt . 7 | 8 | RUN pip install --upgrade pip && \ 9 | grep -E 'aind-data-schema|hdbscan|torch' requirements.txt | xargs pip install 10 | 11 | # Extract the spikeinterface version and install with extras 12 | RUN pip install --upgrade pip && \ 13 | SPIKEINTERFACE_VERSION=$(grep '^spikeinterface==' requirements.txt | cut -d'=' -f3) && \ 14 | pip install "spikeinterface[full]==${SPIKEINTERFACE_VERSION}" 15 | -------------------------------------------------------------------------------- /.github/workflows/codespell.yml: -------------------------------------------------------------------------------- 1 | # Codespell configuration is within .codespellrc 2 | --- 3 | name: Codespell 4 | 5 | on: 6 | push: 7 | branches: [main] 8 | pull_request: 9 | branches: [main] 10 | 11 | permissions: 12 | contents: read 13 | 14 | jobs: 15 | codespell: 16 | name: Check for spelling errors 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - name: Checkout 21 | uses: actions/checkout@v4 22 | - name: Annotate locations with typos 23 | uses: codespell-project/codespell-problem-matcher@v1 24 | - name: Codespell 25 | uses: codespell-project/actions-codespell@v2 26 | -------------------------------------------------------------------------------- /environment/Dockerfile_nwb: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3:23.9.0-0 2 | 3 | ARG DEBIAN_FRONTEND=noninteractive 4 | 5 | # Copy requirements.txt into the container 6 | COPY requirements.txt . 7 | 8 | RUN apt-get update \ 9 | && apt-get install -y --no-install-recommends \ 10 | build-essential \ 11 | && rm -rf /var/lib/apt/lists/* 12 | 13 | RUN pip install --upgrade pip && \ 14 | grep -E 'aind-data-schema|aind-nwb-utils|hdmf-zarr|pynwb|neuroconv|wavpack-numcodecs' requirements.txt | xargs pip install 15 | 16 | # Extract the spikeinterface version and install with extras 17 | RUN pip install --upgrade pip && \ 18 | SPIKEINTERFACE_VERSION=$(grep '^spikeinterface==' requirements.txt | cut -d'=' -f3) && \ 19 | pip install "spikeinterface[full]==${SPIKEINTERFACE_VERSION}" 20 | -------------------------------------------------------------------------------- /environment/build_all.sh: -------------------------------------------------------------------------------- 1 | SPIKEINTERFACE_VERSION=$(grep '^spikeinterface==' requirements.txt | cut -d'=' -f3) 2 | 3 | docker build -t ghcr.io/allenneuraldynamics/aind-ephys-pipeline-base:si-$SPIKEINTERFACE_VERSION -f Dockerfile_base . 4 | docker build -t ghcr.io/allenneuraldynamics/aind-ephys-spikesort-kilosort25:si-$SPIKEINTERFACE_VERSION -f Dockerfile_kilosort25 . 5 | docker build -t ghcr.io/allenneuraldynamics/aind-ephys-spikesort-kilosort4:si-$SPIKEINTERFACE_VERSION -f Dockerfile_kilosort4 . 6 | docker build -t ghcr.io/allenneuraldynamics/aind-ephys-pipeline-nwb:si-$SPIKEINTERFACE_VERSION -f Dockerfile_nwb . 7 | 8 | # the base image also includes the nwb and spykingcircus2 requirements 9 | # docker build -t ghcr.io/allenneuraldynamics/aind-ephys-spikesort-spykingcircus2:si-$SPIKEINTERFACE_VERSION -f Dockerfile_spykingcircus2 . -------------------------------------------------------------------------------- /pipeline/capsule_versions.env: -------------------------------------------------------------------------------- 1 | JOB_DISPATCH=82103472153da7ca25a36f48173e52826825e8cf 2 | PREPROCESSING=9ef7b85c6d76ba84083f54e043ffe792ea16faa4 3 | SPIKESORT_KS4=f5893a8cf00a2a941b39a77e1ed72c7f0be59bf1 4 | SPIKESORT_KS25=15c59ac6d25b39aa5a8f6bdc48629aaac6c0a214 5 | SPIKESORT_SC2=76787c04994a1be96c518865543f75139423ad49 6 | POSTPROCESSING=eeaa6801ed9cf05d893a6beb96a20b617c9b59f7 7 | CURATION=4d8f38adecb21d640295a556a57446018249ab76 8 | VISUALIZATION=3ee85b89716d144ea4a5fef422c096409017d893 9 | RESULTS_COLLECTOR=3964e470f40ad8840f7c90f07f5f812774189220 10 | QUALITY_CONTROL=c18647a0246ab6f3d59b4aacf52b3462e56fa20c 11 | QUALITY_CONTROL_COLLECTOR=ab592569e17e2290d1005af559a1b49f6670bbb6 12 | NWB_ECEPHYS=fe426c2b9876690ef38c3f52f6e1ac4576dd0fb9 13 | NWB_UNITS=32a2afa8f35cb2f3a23983de00cf02e6fdecb051 14 | SPIKEINTERFACE_VERSION=0.103.0 15 | -------------------------------------------------------------------------------- /pipeline/nextflow.config: -------------------------------------------------------------------------------- 1 | process { 2 | executor = 'awsbatch' 3 | queueSize = 100 4 | } 5 | 6 | process.resourceLabels = ['allen-batch-pipeline': 'aind-ephys-pipeline-kilosort4'] 7 | 8 | process { 9 | withName: capsule_aind_ephys_postprocessing_5 { 10 | containerOptions = '--memory-swap 240000 --memory-swappiness 20 --shm-size 16000000' 11 | } 12 | withName: capsule_spikesort_kilosort_4_ecephys_7 { 13 | errorStrategy = 'retry' 14 | maxRetries = 3 15 | maxErrors = 5 16 | } 17 | withName: capsule_quality_control_ecephys_13 { 18 | containerOptions = '--memory-swap 240000 --memory-swappiness 20 --shm-size 16000000' 19 | errorStrategy = 'ignore' 20 | } 21 | } 22 | 23 | env.PIPELINE_URL = "https://github.com/AllenNeuralDynamics/aind-ephys-pipeline" 24 | env.PIPELINE_VERSION = "kilosort4_1.0.0" -------------------------------------------------------------------------------- /.github/workflows/nextflow_test.config: -------------------------------------------------------------------------------- 1 | process { 2 | executor = 'local' 3 | debug = true 4 | cpus = 2 5 | memory = '8 GB' 6 | } 7 | 8 | docker { 9 | enabled = true 10 | platform = 'linux/amd64' 11 | runOptions = '--volume $DATA_PATH:/tmp/data' 12 | envWhitelist = ['KACHERY_ZONE', 'KACHERY_API_KEY'] 13 | } 14 | 15 | dag { 16 | enabled = true 17 | file = RESULTS_PATH + '/nextflow/dag.html' 18 | overwrite = true 19 | } 20 | 21 | report { 22 | enabled = true 23 | file = RESULTS_PATH + '/nextflow/report.html' 24 | overwrite = true 25 | } 26 | 27 | timeline { 28 | enabled = true 29 | file = RESULTS_PATH + '/nextflow/timeline.html' 30 | overwrite = true 31 | } 32 | 33 | trace { 34 | enabled = true 35 | file = RESULTS_PATH + '/nextflow/trace.txt' 36 | overwrite = true 37 | } 38 | -------------------------------------------------------------------------------- /environment/Dockerfile_kilosort4: -------------------------------------------------------------------------------- 1 | FROM spikeinterface/kilosort4-base:4.0.18_cuda-12.0.0 2 | 3 | ARG DEBIAN_FRONTEND=noninteractive 4 | 5 | # Copy requirements.txt into the container 6 | COPY requirements.txt . 7 | 8 | RUN pip install --upgrade pip && \ 9 | grep -E 'aind-data-schema' requirements.txt | xargs pip install 10 | 11 | # Extract the spikeinterface version and install with extras 12 | RUN pip install --upgrade pip && \ 13 | SPIKEINTERFACE_VERSION=$(grep '^spikeinterface==' requirements.txt | cut -d'=' -f3) && \ 14 | pip install "spikeinterface[full]==${SPIKEINTERFACE_VERSION}" 15 | 16 | # Fix for multiple channel groups: https://github.com/SpikeInterface/spikeinterface/pull/3944 17 | RUN git clone https://github.com/alejoe91/spikeinterface.git && cd spikeinterface && \ 18 | git checkout 64cee6d2be24288ef803c9a686f137105f975655 && pip install . && \ 19 | cd .. && rm -rf spikeinterface -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /metadata/metadata.yml: -------------------------------------------------------------------------------- 1 | metadata_version: 1 2 | name: aind-ephys-pipeline (kilosort4) 3 | description: |- 4 | Electrophysiology analysis pipeline using Kilosort2.5 via SpikeInterface. 5 | 6 | The pipeline includes: 7 | 8 | - preprocessing: phase_shift, highpass filter, denoising (bad channel removal + common median reference ("cmr") or highpass spatial filter - "destripe"), and motion estimation (optionally correction) 9 | - spike sorting: with Kilosort2.5 10 | - postprocessing: remove duplicate units, compute amplitudes, spike/unit locations, PCA, correlograms, template similarity, template metrics, and quality metrics 11 | - curation: based on ISI violation ratio, presence ratio, and amplitude cutoff and unit labeling (noise, MUA, SUA) based on pre-trained classifier (UnitRefine) 12 | - visualization: timeseries, drift maps, and sorting output in sortingview 13 | - export session, subject, and units data to NWB 14 | tags: 15 | - ecephys 16 | authors: 17 | - name: AIND 18 | - name: Alessio Buccino 19 | -------------------------------------------------------------------------------- /docs/source/releases/1.0.0.rst: -------------------------------------------------------------------------------- 1 | .. _1.0.0: 2 | 3 | 1.0.0 - August 2025 4 | =================== 5 | 6 | Major release of the AIND Ephys Pipeline, which includes several new features and bug fixes. 7 | 8 | The main changes in this release are: 9 | 10 | * Update to DSL2 over DSL1 for nextflow scripts 11 | * Unification of pipeline script for multiple backends (``main_multi_backend.nf``) 12 | * Possibility to use a single JSON file for parameters 13 | * Addition of ``spikeinterface`` input type (which supports any SpikeInterface-supported format) 14 | 15 | Package versions 16 | ---------------- 17 | * ``spikeinterface`` version: 0.103.0 18 | * ``aind-data-schema`` version: 1.3.0 19 | 20 | Bug fixes 21 | --------- 22 | * Additional fixes for NWB export with multi-shanks and multi-groups 23 | * Fix remapping of preprocessed and postprocessd data in result collector 24 | * Fix output results: from symlinks to copy 25 | 26 | New features 27 | ------------ 28 | * Motion estimation: support for failed estimation and parameter tuning on probe span -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | AIND Ephys Pipeline Documentation 2 | ================================= 3 | 4 | Welcome to the AIND Ephys Pipeline documentation. This pipeline provides a comprehensive solution for electrophysiology data analysis using SpikeInterface. 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | :caption: Contents: 9 | 10 | introduction 11 | installation 12 | pipeline_steps 13 | input_output 14 | parameters 15 | deployments 16 | customization 17 | troubleshooting 18 | releases 19 | 20 | Overview 21 | -------- 22 | 23 | The AIND Ephys Pipeline is built on ``Nextflow`` and integrates multiple tools for electrophysiology data processing, 24 | from preprocessing through spike sorting to visualization and NWB export. 25 | 26 | Quick Links 27 | ----------- 28 | 29 | - `GitHub Repository `_ 30 | - `SpikeInterface Documentation `_ 31 | - `Nextflow Documentation `_ 32 | -------------------------------------------------------------------------------- /pipeline/slurm_submit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --nodes=1 3 | #SBATCH --ntasks-per-node=1 4 | #SBATCH --mem=4GB 5 | #SBATCH --partition={your-partition} 6 | #SBATCH --time=2:00:00 7 | 8 | # modify this section to make the nextflow command available to your environment 9 | # e.g., using a conda environment with nextflow installed 10 | conda activate env_nf 11 | 12 | PIPELINE_PATH="path-to-your-cloned-repo" 13 | DATA_PATH="path-to-data-folder" 14 | RESULTS_PATH="path-to-results-folder" 15 | WORKDIR="path-to-workdir-folder" 16 | 17 | # check if nextflow_local_custom.config exists 18 | if [ -f "$PIPELINE_PATH/pipeline/nextflow_slurm_custom.config" ]; then 19 | CONFIG_FILE="$PIPELINE_PATH/pipeline/nextflow_slurm_custom.config" 20 | else 21 | CONFIG_FILE="$PIPELINE_PATH/pipeline/nextflow_slurm.config" 22 | fi 23 | echo "Using config file: $CONFIG_FILE" 24 | 25 | DATA_PATH=$DATA_PATH RESULTS_PATH=$RESULTS_PATH nextflow \ 26 | -C $CONFIG_FILE \ 27 | -log $RESULTS_PATH/nextflow/nextflow.log \ 28 | run $PIPELINE_PATH/pipeline/main_multi_backend.nf \ 29 | -work-dir $WORKDIR 30 | # additional parameters here 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Allen Institute for Neural Dynamics 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /sample_dataset/create_test_nwb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | This script creates a 3-minute synthetic recording and saves it to NWB for testing the pipeline. 4 | 5 | Requirements: 6 | - spikeinterface 7 | - pynwb 8 | - neuroconv 9 | """ 10 | 11 | import spikeinterface as si 12 | from pathlib import Path 13 | 14 | from pynwb import NWBHDF5IO 15 | from pynwb.testing.mock.file import mock_NWBFile, mock_Subject 16 | from neuroconv.tools.spikeinterface import add_recording_to_nwbfile 17 | 18 | this_folder = Path(__file__).parent 19 | 20 | def generate_nwb(): 21 | duration = 180 22 | num_channels = 32 23 | num_units = 20 24 | output_folder = this_folder / "nwb" 25 | output_folder.mkdir(exist_ok=True) 26 | 27 | recording, _ = si.generate_ground_truth_recording( 28 | num_channels=num_channels, 29 | num_units=num_units, 30 | durations=[duration], 31 | ) 32 | 33 | nwbfile = mock_NWBFile() 34 | nwbfile.subject = mock_Subject() 35 | add_recording_to_nwbfile(recording, nwbfile=nwbfile) 36 | 37 | with NWBHDF5IO(output_folder / "sample.nwb", mode="w") as io: 38 | io.write(nwbfile) 39 | 40 | if __name__ == '__main__': 41 | generate_nwb() 42 | -------------------------------------------------------------------------------- /environment/Dockerfile_base: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3:23.9.0-0 2 | 3 | ARG DEBIAN_FRONTEND=noninteractive 4 | 5 | # Copy requirements.txt into the container 6 | COPY requirements.txt . 7 | 8 | RUN apt-get update \ 9 | && apt-get install -y --no-install-recommends \ 10 | build-essential \ 11 | git \ 12 | fonts-freefont-ttf=20120503-10 \ 13 | && rm -rf /var/lib/apt/lists/* 14 | 15 | # correct mapping to make libvips work 16 | ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libffi.so.7 17 | 18 | # install libvips 19 | RUN apt-get update \ 20 | && apt-get install -y libvips libvips-dev libvips-tools libtiff5-dev 21 | 22 | # install default fonts 23 | RUN apt-get install -y fonts-freefont-ttf 24 | 25 | RUN pip install --upgrade pip && \ 26 | grep -E 'aind-data-schema|aind-metadata-upgrader|aind-qcportal-schema|pyvips|wavpack-numcodecs|pynwb|hdmf-zarr|torch|hdbscan|s3fs|seaborn' requirements.txt | xargs pip install 27 | 28 | # Extract the spikeinterface version and install with extras 29 | RUN pip install --upgrade pip && \ 30 | SPIKEINTERFACE_VERSION=$(grep '^spikeinterface==' requirements.txt | cut -d'=' -f3) && \ 31 | pip install "spikeinterface[full,widgets]==${SPIKEINTERFACE_VERSION}" -------------------------------------------------------------------------------- /environment/push_all.sh: -------------------------------------------------------------------------------- 1 | SPIKEINTERFACE_VERSION=$(grep '^spikeinterface==' requirements.txt | cut -d'=' -f3) 2 | 3 | docker tag ghcr.io/allenneuraldynamics/aind-ephys-pipeline-base:si-$SPIKEINTERFACE_VERSION ghcr.io/allenneuraldynamics/aind-ephys-pipeline-base:latest 4 | docker push --all-tags ghcr.io/allenneuraldynamics/aind-ephys-pipeline-base 5 | docker tag ghcr.io/allenneuraldynamics/aind-ephys-spikesort-kilosort25:si-$SPIKEINTERFACE_VERSION ghcr.io/allenneuraldynamics/aind-ephys-spikesort-kilosort25:latest 6 | docker push --all-tags ghcr.io/allenneuraldynamics/aind-ephys-spikesort-kilosort25 7 | docker tag ghcr.io/allenneuraldynamics/aind-ephys-spikesort-kilosort4:si-$SPIKEINTERFACE_VERSION ghcr.io/allenneuraldynamics/aind-ephys-spikesort-kilosort4:latest 8 | docker push --all-tags ghcr.io/allenneuraldynamics/aind-ephys-spikesort-kilosort4 9 | docker tag ghcr.io/allenneuraldynamics/aind-ephys-pipeline-nwb:si-$SPIKEINTERFACE_VERSION ghcr.io/allenneuraldynamics/aind-ephys-pipeline-nwb:latest 10 | docker push --all-tags ghcr.io/allenneuraldynamics/aind-ephys-pipeline-nwb 11 | 12 | # docker tag ghcr.io/allenneuraldynamics/aind-ephys-spikesort-spykingcircus2:si-$SPIKEINTERFACE_VERSION ghcr.io/allenneuraldynamics/aind-ephys-spikesort-spykingcircus2:latest 13 | # docker push --all-tags ghcr.io/allenneuraldynamics/aind-ephys-spikesort-spykingcircus2 -------------------------------------------------------------------------------- /docs/source/releases/si-0.101.2.rst: -------------------------------------------------------------------------------- 1 | .. _si-0.101.2: 2 | 3 | si-0.101.2 - November 2024 4 | ========================== 5 | 6 | Package versions 7 | ---------------- 8 | * ``spikeinterface`` version: 0.101.2 9 | * ``aind-data-schema`` version: 1.0.0 10 | 11 | Bug fixes 12 | --------- 13 | * Fix handling of recordings with non-monotonically increasing timestamps, by resetting timestamps to start from 0.0. 14 | * Fix NWB LFP export in case of multiple channel groups: they are aggregated and saved as a single LFP electrical series in the NWB file. 15 | * Fixed a few bugs in dealing with multi-segment recordings (when `concatenate` option in job dispatch is enabled). 16 | 17 | New features 18 | ------------ 19 | * Use `SortingAnalyzer` zarr backend as postprocessed output format. This removes the need of the `postprocessed-sorting` folder since the `Sorting` object is saved by the `SortingAnalyzer` by default. 20 | * Remapping of recording JSON files in `preprocessed` and `postprocessed` folders to the using the session name from the `data_description` file (if available). This enables to reload the preprocessed recording automatically, provided that the raw asset is attached. 21 | * Motion correction now uses `dredge_fast` as default and handles multi-segment recordings (which are concatenated before motion correction). 22 | * Added option to `split_groups` in `job_dispatch` to process different groups independently. 23 | -------------------------------------------------------------------------------- /pipeline/nextflow_local.config: -------------------------------------------------------------------------------- 1 | params.executor = "local" 2 | 3 | process { 4 | executor = 'local' 5 | debug = true 6 | cpus = 8 7 | memory = '32 GB' 8 | containerOptions = '--memory-swap=-1 --memory-swappiness 20 --shm-size=4g' 9 | 10 | // change max forks for specific processes to allow multiple forks 11 | withName: preprocessing { 12 | maxForks = 1 13 | } 14 | withName: spikesort_kilosort4 { 15 | maxForks = 1 16 | containerOptions = '--gpus all' 17 | } 18 | withName: spikesort_kilosort25 { 19 | maxForks = 1 20 | containerOptions = '--gpus all' 21 | } 22 | withName: spikesort_spykingcircus2 { 23 | maxForks = 1 24 | } 25 | withName: postprocessing { 26 | maxForks = 1 27 | } 28 | withName: quality_control { 29 | maxForks = 1 30 | } 31 | } 32 | 33 | docker { 34 | enabled = true 35 | platform = 'linux/amd64' 36 | runOptions = '--volume $DATA_PATH:/tmp/data' 37 | envWhitelist = ['KACHERY_ZONE', 'KACHERY_API_KEY'] 38 | } 39 | 40 | dag { 41 | enabled = true 42 | file = RESULTS_PATH + '/nextflow/dag.html' 43 | overwrite = true 44 | } 45 | 46 | report { 47 | enabled = true 48 | file = RESULTS_PATH + '/nextflow/report.html' 49 | overwrite = true 50 | } 51 | 52 | timeline { 53 | enabled = true 54 | file = RESULTS_PATH + '/nextflow/timeline.html' 55 | overwrite = true 56 | } 57 | 58 | trace { 59 | enabled = true 60 | file = RESULTS_PATH + '/nextflow/trace.txt' 61 | overwrite = true 62 | } 63 | -------------------------------------------------------------------------------- /.github/workflows/test_pipeline.yml: -------------------------------------------------------------------------------- 1 | name: Test Nextflow Pipeline 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | types: [synchronize, opened, reopened] 7 | jobs: 8 | test: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - name: Checkout repository 13 | uses: actions/checkout@v4 14 | 15 | - name: Set up Python 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: "3.11" 19 | 20 | - name: Free up disk space 21 | run: | 22 | echo "Before cleanup:" 23 | df -h 24 | sudo rm -rf /usr/local/lib/android 25 | sudo rm -rf /opt/ghc 26 | sudo rm -rf /usr/share/dotnet 27 | sudo rm -rf /opt/hostedtoolcache 28 | echo "After cleanup:" 29 | df -h 30 | 31 | - name: Install packages 32 | run: | 33 | python -m pip install --upgrade pip 34 | pip install spikeinterface neuroconv "pynwb<3" 35 | 36 | - name: Generate test NWB files 37 | run: | 38 | python sample_dataset/create_test_nwb.py 39 | 40 | - name: Set up Nextflow 41 | uses: nf-core/setup-nextflow@v1 42 | with: 43 | version: latest-stable 44 | 45 | - name: Run Nextflow pipeline 46 | run: | 47 | DATA_PATH="$(pwd)/sample_dataset/nwb" RESULTS_PATH="$(pwd)/sample_dataset/nwb/results" \ 48 | nextflow -C "$(pwd)/.github/workflows/nextflow_test.config" run pipeline/main_multi_backend.nf \ 49 | --params_file "$(pwd)/.github/workflows/params_test.json" -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | 3 | # You can set these variables from the command line, and also 4 | # from the environment for the first two. 5 | SPHINXOPTS ?= 6 | SPHINXBUILD ?= sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext 15 | 16 | # Clean build directory 17 | clean: 18 | rm -rf $(BUILDDIR)/* 19 | 20 | # Target to build HTML documentation 21 | html: 22 | @$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 23 | @echo 24 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 25 | 26 | # Target for PDF generation via LaTeX 27 | latexpdf: 28 | @$(SPHINXBUILD) -M latexpdf "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 29 | @echo 30 | @echo "Build finished. The PDF file is in $(BUILDDIR)/latex." 31 | 32 | # Target to check external links 33 | linkcheck: 34 | @$(SPHINXBUILD) -M linkcheck "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 35 | @echo 36 | @echo "Link check complete." 37 | 38 | # Build HTML documentation and open it in the default browser 39 | livehtml: 40 | @$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 41 | @echo "Opening documentation in browser..." 42 | @python -m webbrowser -t "file://$(shell pwd)/$(BUILDDIR)/html/index.html" 43 | 44 | # Catch-all target: route all unknown targets to Sphinx 45 | %: Makefile 46 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 47 | -------------------------------------------------------------------------------- /RELASE_NOTES.md: -------------------------------------------------------------------------------- 1 | # Releases 2 | 3 | ## si-0.102.1 - March 2025 4 | 5 | ## Package versions 6 | * `spikeinterface` version: 0.102.1 7 | * `aind-data-schema` version: 1.3.0 8 | 9 | ### Bug fixes: 10 | * Fix NWB export with multi-shanks and multi-groups 11 | 12 | ### New features: 13 | * Removed unit classifier process (moved to curation) 14 | * Centralized capsule versions 15 | * Added CI tests and automated deployments 16 | 17 | 18 | ## si-0.101.2 - November 2024 19 | 20 | ### Package versions: 21 | * `spikeinterface` version: 0.101.2 22 | * `aind-data-schema` version: 1.0.0 23 | 24 | ### Bug fixes: 25 | * Fix handling of recordings with non-monotonically increasing timestamps, by resetting timestamps to start from 0.0. 26 | * Fix NWB LFP export in case of multiple channel groups: they are aggregated and saved as a single LFP electrical series in the NWB file. 27 | * Fixed a few bugs in dealing with multi-segment recordings (when `concatenate` option in job dispatch is enabled). 28 | 29 | ### New features: 30 | * Use `SortingAnalyzer` zarr backend as postprocessed output format. This removes the need of the `postprocessed-sorting` folder since the `Sorting` object is saved by the `SortingAnalyzer` by default. 31 | * Remapping of recording JSON files in `preprocessed` and `postprocessed` folders to the using the session name from the `data_description` file (if available). This enables to reload the preprocessed recording automatically, provided that the raw asset is attached. 32 | * Motion correction now uses `dredge_fast` as default and handles multi-segment recordings (which are concatenated before motion correction). 33 | * Added option to `split_groups` in `job_dispatch` to process different groups independently. 34 | 35 | ## si-0.100.7 - February 2024 36 | 37 | ### Package versions: 38 | * `spikeinterface`: 0.100.7 39 | * `aind-data-schema`: 0.38.5 -------------------------------------------------------------------------------- /tests/test_pipeline_local.sh: -------------------------------------------------------------------------------- 1 | # test pipeline with sample_nwb file 2 | # DOCKER_IMAGE="ghcr.io/allenneuraldynamics/aind-ephys-pipeline-nwb:si-0.102.1" 3 | NXF_VERSION="25.04.1" 4 | 5 | # Check if arguments are passed 6 | if [ "$#" -gt 0 ]; then 7 | ARGS="$@" 8 | echo "Arguments passed: $ARGS" 9 | else 10 | ARGS="" 11 | echo "No arguments passed" 12 | fi 13 | 14 | SCRIPT_PATH="$(realpath "$0")" 15 | echo "Running script at: $SCRIPT_PATH" 16 | 17 | SAMPLE_DATASET_PATH="$(realpath $(dirname "$SCRIPT_PATH")/../sample_dataset)" 18 | echo "Sample dataset path: $SAMPLE_DATASET_PATH" 19 | 20 | PIPELINE_PATH="$(realpath $(dirname "$SCRIPT_PATH")/..)" 21 | echo "Pipeline path: $PIPELINE_PATH" 22 | 23 | # check if sample_dataset/nwb/sample.nwb exists 24 | if [ ! -f "$SAMPLE_DATASET_PATH/nwb/sample.nwb" ]; then 25 | echo "$SAMPLE_DATASET_PATH/nwb/sample.nwb not found" 26 | # this needs to run in an env with spikeinterface/pynwb/neuroconv installed 27 | # docker run --name create_nwb -t -d $DOCKER_IMAGE 28 | # docker cp $SAMPLE_DATASET_PATH/create_test_nwb.py create_nwb:/create_test_nwb.py 29 | # docker exec create_nwb python /create_test_nwb.py 30 | # mkdir $SAMPLE_DATASET_PATH/nwb 31 | # docker cp create_nwb:/nwb/sample.nwb $SAMPLE_DATASET_PATH/nwb/sample.nwb 32 | python $SAMPLE_DATASET_PATH/create_test_nwb.py 33 | fi 34 | 35 | # define INPUT and OUTPUT directories 36 | DATA_PATH="$SAMPLE_DATASET_PATH/nwb" 37 | RESULTS_PATH="$SAMPLE_DATASET_PATH/nwb_results" 38 | 39 | # check if nextflow_local_custom.config exists 40 | if [ -f "$PIPELINE_PATH/pipeline/nextflow_local_custom.config" ]; then 41 | CONFIG_FILE="$PIPELINE_PATH/pipeline/nextflow_local_custom.config" 42 | else 43 | CONFIG_FILE="$PIPELINE_PATH/pipeline/nextflow_local.config" 44 | fi 45 | echo "Using config file: $CONFIG_FILE" 46 | 47 | # run pipeline 48 | NXF_VER=$NXF_VERSION DATA_PATH=$DATA_PATH RESULTS_PATH=$RESULTS_PATH nextflow \ 49 | -C $CONFIG_FILE -log $RESULTS_PATH/nextflow/nextflow.log \ 50 | run $PIPELINE_PATH/pipeline/main_multi_backend.nf \ 51 | --params_file params_test.json $ARGS 52 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | project = 'AIND Ephys Pipeline' 10 | copyright = '2025, AIND' 11 | author = 'AIND' 12 | 13 | version = '1.0' 14 | release = '1.0' 15 | 16 | # -- General configuration --------------------------------------------------- 17 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 18 | 19 | extensions = [ 20 | 'sphinx.ext.autodoc', 21 | 'sphinx.ext.viewcode', 22 | 'sphinx.ext.napoleon', 23 | 'sphinx.ext.intersphinx', 24 | ] 25 | 26 | # Napoleon settings 27 | napoleon_google_docstring = True 28 | napoleon_numpy_docstring = True 29 | napoleon_include_init_with_doc = False 30 | napoleon_include_private_with_doc = False 31 | napoleon_include_special_with_doc = True 32 | napoleon_use_admonition_for_examples = False 33 | napoleon_use_admonition_for_notes = False 34 | napoleon_use_admonition_for_references = False 35 | napoleon_use_ivar = False 36 | napoleon_use_param = True 37 | napoleon_use_rtype = True 38 | napoleon_type_aliases = None 39 | 40 | # Intersphinx settings 41 | intersphinx_mapping = { 42 | 'python': ('https://docs.python.org/3', None), 43 | 'numpy': ('https://numpy.org/doc/stable/', None), 44 | 'scipy': ('https://docs.scipy.org/doc/scipy/', None), 45 | } 46 | 47 | templates_path = ['_templates'] 48 | exclude_patterns = [] 49 | 50 | language = 'en' 51 | 52 | # -- Options for HTML output ------------------------------------------------- 53 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 54 | 55 | html_theme = 'furo' 56 | html_static_path = ['_static'] 57 | html_favicon = "_static/favicon.ico" 58 | html_theme_options = { 59 | "light_logo": "light-logo.svg", 60 | "dark_logo": "dark-logo.svg", 61 | } 62 | -------------------------------------------------------------------------------- /docs/source/troubleshooting.rst: -------------------------------------------------------------------------------- 1 | .. _troubleshooting: 2 | 3 | Troubleshooting 4 | =============== 5 | 6 | This section provides solutions to common issues encountered while using the AIND Ephys Pipeline. 7 | If you encounter a problem not listed here, please consider opening an issue on our GitHub repository. 8 | 9 | 10 | NUMBA cache issue: ``RuntimeError: cannot cache function`` 11 | ---------------------------------------------------------- 12 | 13 | The curation step may fail because NUMBA cannot cache the compiled functions to the location where the 14 | Python environment is installed. This can happen if the environment is installed in a read-only location, such as a 15 | Apptainer/Singularity container. 16 | 17 | To resolve this issue, you can create a folder where your user has write access and set the environment variable 18 | ``NUMBA_CACHE_DIR`` to it. 19 | 20 | .. note:: 21 | 22 | To make these changes persistent, you can add the following lines to your ``.bashrc`` or ``.bash_profile`` file: 23 | .. code-block:: bash 24 | 25 | export NUMBA_CACHE_DIR=/path/to/your/cache/dir 26 | 27 | This environment variables are already in the apptainer/singularity ``envWhiteList`` of the 28 | `nextflow_slurm.config `_ 29 | file, so they will be automatically used automatically if defined. 30 | 31 | ``OSError: Read-only file system`` error 32 | ---------------------------------------- 33 | 34 | The curation and visualization steps may also fail because of similar caching issues. 35 | In this case, the easiest solution is to bind your home directory to the container, so that the 36 | pipeline can write to a folder in your home directory. 37 | 38 | You can do this by simply uncommenting the 39 | `this line `_ 40 | in the ``nextflow_slurm.config`` file: 41 | 42 | .. code-block:: bash 43 | 44 | // containerOptions = "--bind \$HOME:\$HOME" 45 | 46 | 47 | ``OSError: Unable to synchronously open file`` 48 | ---------------------------------------------- 49 | 50 | This error can occur when using NWB with HDF5 backend as input to the pipeline on a filesystem that does not support file locking, such as 51 | NFS or certain cloud storage solutions (mainly SLURM clusters). 52 | 53 | To resolve this issue, you can set the environment variable ``HDF5_USE_FILE_LOCKING`` to ``FALSE``. -------------------------------------------------------------------------------- /tests/test_pipeline_slurm.sh: -------------------------------------------------------------------------------- 1 | # Test pipeline with sample_nwb file on slurm 2 | 3 | # Steps to run the test script on SLURM: 4 | # * Appropriate resources allocation, e.g. 5 | # `salloc -N 1 -n 12 -t 00:30:00 --gres=gpu:1 --mem=32G` 6 | 7 | # * Environment with nextflow and the required dependencies installed 8 | # e.g., create an environment.yml file with the following content: 9 | # ``` 10 | # name: aind-ephys-pipeline 11 | # channels: 12 | # - conda-forge 13 | # - defaults 14 | # dependencies: 15 | # - python=3.10 16 | # - pip 17 | # - pip: 18 | # - kachery-cloud 19 | # - spikeinterface[full] 20 | # - aind-data-schema 21 | # - pynwb 22 | # - neuroconv 23 | # ``` 24 | # then create the environment with: 25 | # ``` 26 | # module load miniforge/24.3.0-0 # or your preferred conda version 27 | # mamba env create -f environment.yml 28 | # ``` 29 | # Follow the instructions to install Nextflow. 30 | 31 | # * Load the required modules, activate the conda environment, and run the script: 32 | # ```bash 33 | # module load miniforge/24.3.0-0 apptainer/1.1.9 34 | # conda activate aind-ephys-pipeline 35 | # export NXF_SINGULARITY_CACHEDIR=/path/to/cache 36 | # cd tests 37 | # bash test_pipeline_slurm.sh 38 | # ``` 39 | 40 | SCRIPT_PATH="$(realpath "$0")" 41 | echo "Running script at: $SCRIPT_PATH" 42 | 43 | SAMPLE_DATASET_PATH="$(realpath $(dirname "$SCRIPT_PATH")/../sample_dataset)" 44 | echo "Sample dataset path: $SAMPLE_DATASET_PATH" 45 | 46 | PIPELINE_PATH="$(realpath $(dirname "$SCRIPT_PATH")/..)" 47 | echo "Pipeline path: $PIPELINE_PATH" 48 | 49 | # check if sample_dataset/nwb/sample.nwb exists 50 | if [ ! -f "$SAMPLE_DATASET_PATH/nwb/sample.nwb" ]; then 51 | echo "$SAMPLE_DATASET_PATH/nwb/sample.nwb not found" 52 | python $SAMPLE_DATASET_PATH/create_test_nwb.py 53 | fi 54 | 55 | # define INPUT and OUTPUT directories 56 | DATA_PATH="$SAMPLE_DATASET_PATH/nwb" 57 | RESULTS_PATH="$SAMPLE_DATASET_PATH/nwb_results" 58 | 59 | # check if nextflow_local_custom.config exists 60 | if [ -f "$PIPELINE_PATH/pipeline/nextflow_slurm_custom.config" ]; then 61 | CONFIG_FILE="$PIPELINE_PATH/pipeline/nextflow_slurm_custom.config" 62 | else 63 | CONFIG_FILE="$PIPELINE_PATH/pipeline/nextflow_slurm.config" 64 | fi 65 | echo "Using config file: $CONFIG_FILE" 66 | 67 | # run pipeline 68 | DATA_PATH=$DATA_PATH RESULTS_PATH=$RESULTS_PATH nextflow \ 69 | -C $CONFIG_FILE -log $RESULTS_PATH/nextflow/nextflow.log \ 70 | run $PIPELINE_PATH/pipeline/main_multi_backend.nf \ 71 | --params_file params_test.json $ARGS 72 | -------------------------------------------------------------------------------- /docs/source/introduction.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | The AIND Ephys Pipeline is an electrophysiology analysis pipeline built with `SpikeInterface `_ and `Nextflow `_. 5 | It provides a comprehensive suite of tools for processing and analyzing electrophysiology data. 6 | 7 | Key concepts 8 | ------------ 9 | 10 | The pipeline is designed to process raw electrophysiology data through a series of "discrete" steps 11 | (e.g., preprocessing, spike sorting, and postprocessing -- see :ref:`pipeline_steps` for more details). 12 | 13 | Each step is a `Nextflow process `_ that runs independently 14 | in a containerized environment. 15 | The `script` of each process (or capsule) -- i.e. the part that actually runs code -- is implemented 16 | in a separate GitHub repository and pinned to a specific `git` commit/version. The combination of containerized environments and pinned versions for each step 17 | **ensures full reproducibility**. 18 | 19 | ``Nextflow`` orchestrates the pipeline, managing the flow of data between processes and ensuring that each step is 20 | executed in the correct order. In addition, ``Nextflow`` provides built-in support for parallel processing, which 21 | is achieved by running the key pipeline steps (preprocessing/spike sorting/postprocessing/curation/visualization) in parallel 22 | across multiple *blocks* (i.e., *experiments* in ``Open Ephys``), *streams* (i.e., probes), 23 | *groups* (i.e., individual shanks for multi-shank probes), and optionally *segments* (i.e., *recordings* in ``Open Ephys``). 24 | 25 | .. note:: 26 | 27 | With **parallel** we do not mean using parallel processes/threads on the same machine, but rather running multiple 28 | independent *nodes* (e.g., on a cluster or on a cloud batch) in parallel. 29 | 30 | For example, if you have recorded from 3 Neuropixels 2.0 multi-shank probes, each with 4 shanks, 3 experiments with 31 | 3 recordings each, the pipeline will process 3 (blocks) x 3 (segments) x 3 (streams) x 4 (groups) = 108 jobs in parallel! 32 | 33 | 34 | Key Features 35 | ------------ 36 | 37 | - Parallel processing capabilities 38 | - Multiple spike sorter support 39 | - Comprehensive preprocessing options 40 | - Advanced quality control and curation 41 | - Standardized NWB output 42 | - Interactive visualization tools 43 | - Container-based deployment 44 | - Support for multiple platforms (local, SLURM, AWS batch) 45 | 46 | 47 | The pipeline is designed to be modular and flexible, allowing for deployment across various platforms while maintaining 48 | consistent processing standards and output formats. 49 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Requirements 5 | ------------ 6 | 7 | The pipeline has different requirements depending on your deployment target. 8 | Here are the core requirements for each deployment option: 9 | 10 | Local Deployment 11 | ~~~~~~~~~~~~~~~~ 12 | 13 | For local deployment, you need: 14 | 15 | * ``nextflow`` (version 22.10.8 recommended) 16 | * ``docker`` (19.03+ if going to use GPUs, e.g. for spikesort_kilosort* workflows) 17 | * ``figurl`` (optional, for cloud visualization) 18 | 19 | SLURM Deployment 20 | ~~~~~~~~~~~~~~~~ 21 | 22 | For SLURM cluster deployment: 23 | 24 | * ``nextflow`` (version 22.10.8 recommended) 25 | * ``apptainer`` or ``singularity`` 26 | * Access to a SLURM cluster 27 | * ``figurl`` (optional, for cloud visualization) 28 | 29 | Installation Steps 30 | ------------------ 31 | 32 | Local Setup 33 | ~~~~~~~~~~~ 34 | 35 | 1. Install Nextflow: 36 | 37 | Follow the `Nextflow installation guide `_ 38 | 39 | 2. Install Docker: 40 | 41 | Follow the `Docker installation instructions `_ 42 | 43 | 3. (Optional) Set up Figurl: 44 | 45 | a. Initialize Kachery Client: 46 | 47 | i. Register at `kachery.vercel.app `_ using your GitHub account. 48 | ii. Go to settings and provide your name, an email address and a short description of your research purpose. 49 | iii. Set the ``KACHERY_API_KEY`` environment variable with your assigned API key. 50 | 51 | b. Set credentials: 52 | 53 | * Click on settings and generate a new API key. 54 | * Set environment variables: 55 | 56 | .. code-block:: bash 57 | 58 | export KACHERY_API_KEY="your-client-id" 59 | # Optional: Set custom Kachery zone 60 | export KACHERY_ZONE="your-zone" 61 | 62 | c. (optional) Set up a custom kachery zone: 63 | 64 | If you plan to use the Figurl service extensively, please consider creating your own "zone". 65 | Follow the instructions in the `Kachery documentation `_. 66 | 67 | SLURM Setup 68 | ~~~~~~~~~~~ 69 | 70 | 1. Install Nextflow on your cluster environment 71 | 2. Ensure Apptainer/Singularity is available 72 | 3. Set up environment variables: 73 | 74 | .. code-block:: bash 75 | 76 | # Optional: Set custom Apptainer (or Singularity) cache directory 77 | export NXF_APPTAINER_CACHEDIR="/path/to/cache" 78 | # export NXF_SINGULARITY_CACHEDIR="/path/to/cache" 79 | 80 | 4. (Optional) Follow the same Figurl setup steps as in the local deployment 81 | 82 | Clone the Repository 83 | -------------------- 84 | 85 | Clone the pipeline repository: 86 | 87 | .. code-block:: bash 88 | 89 | git clone https://github.com/AllenNeuralDynamics/aind-ephys-pipeline.git 90 | cd aind-ephys-pipeline 91 | cd pipeline 92 | 93 | The pipeline is now ready to be configured and run on your chosen platform. 94 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AIND Ephys Pipeline 2 | ## aind-ephys-pipeline 3 | 4 | Electrophysiology analysis pipeline with [SpikeInterface](https://github.com/SpikeInterface/spikeinterface). 5 | 6 | # Overview 7 | 8 | The pipeline is based on [Nextflow](https://www.nextflow.io/) and it includes the following steps: 9 | 10 | - [job-dispatch](https://github.com/AllenNeuralDynamics/aind-ephys-job-dispatch/): generates a list of JSON files to be processed in parallel. Parallelization is performed over multiple probes and multiple shanks (e.g., for NP2-4shank probes). The steps from `preprocessing` to `visualization` are run in parallel. 11 | - [preprocessing](https://github.com/AllenNeuralDynamics/aind-ephys-preprocessing/): phase_shift, highpass filter, denoising (bad channel removal + common median reference ("cmr") or highpass spatial filter - "destripe"), and motion estimation (optionally correction) 12 | - spike sorting: several spike sorters are available: 13 | - [kilosort2.5](https://github.com/AllenNeuralDynamics/aind-ephys-spikesort-kilosort25/) 14 | - [kilosort4](https://github.com/AllenNeuralDynamics/aind-ephys-spikesort-kilosort4/) 15 | - [spykingcircus2](https://github.com/AllenNeuralDynamics/aind-ephys-spikesort-spykingcircus2/) 16 | - [postprocessing](https://github.com/AllenNeuralDynamics/aind-ephys-postprocessing/): remove duplicate units, compute amplitudes, spike/unit locations, PCA, correlograms, template similarity, template metrics, and quality metrics 17 | - [curation](https://github.com/AllenNeuralDynamics/aind-ephys-curation/): based on ISI violation ratio, presence ratio, and amplitude cutoff and pretrained unit classifier (UnitRefine) 18 | - [visualization](https://github.com/AllenNeuralDynamics/aind-ephys-visualization/): timeseries, drift maps, and sorting output in [figurl](https://github.com/flatironinstitute/figurl/blob/main/README.md) 19 | - [result collection](https://github.com/AllenNeuralDynamics/aind-ephys-result-collector/): this step collects the output of all parallel jobs and copies the output folders to the results folder 20 | - export to NWB: creates NWB output files. Each file can contain multiple streams (e.g., probes), but only a continuous chunk of data (such as an Open Ephys experiment+recording or an NWB `ElectricalSeries`). This step includes additional sub-steps: 21 | - [ecephys](https://github.com/AllenNeuralDynamics/aind-ecephys-nwb) 22 | - [units](https://github.com/AllenNeuralDynamics/aind-units-nwb) 23 | 24 | 25 | # Documentation 26 | 27 | The documentation is available at [ReadTheDocs](https://aind-ephys-pipeline.readthedocs.io/en/latest/). 28 | 29 | 30 | ## Code Ocean Deployment (AIND) 31 | 32 | At AIND, the pipeline is deployed on the Code Ocean platform. Since currently Code Ocean does not support conditional processes, pipelines running different sorters and AIND-specific options are implemented in separate branches. 33 | 34 | This is a list of the available pipeline branches that are deployed in Code Ocean: 35 | 36 | - `main`/`co_kilosort4`: pipeline with Kilosort4 sorter 37 | - `co_kilosort25`: pipeline with Kilosort2.5 sorter 38 | - `co_spykingcircus2`: pipeline with Spyking Circus 2 sorter 39 | - `co_kilosort25_opto`: pipeline with Kilosort2.5 sorter and optogenetics artifact removal 40 | - `co_kilosort4_opto`: pipeline with Kilosort4 sorter and optogenetics artifact removal 41 | - `co_spykingcircus2_opto`: pipeline with Spyking Circus 2 sorter and optogenetics artifact removal -------------------------------------------------------------------------------- /pull_pipeline_images.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Pre-pull AIND ephys pipeline containers into a Nextflow Singularity cache dir 3 | # 4 | # Usage: 5 | # pull_aind_images.sh [--cache CACHE_DIR] [--tag si-X.Y.Z] [--sorter SORTER] 6 | # 7 | # Defaults: 8 | # --cache : $NXF_SINGULARITY_CACHEDIR 9 | # --tag : resolved from pipeline/capsule_versions.env (SPIKEINTERFACE_VERSION) 10 | # or defaults to si-0.103.0 11 | # --sorter : kilosort4 (options: kilosort25, kilosort4, spykingcircus2, all) 12 | # 13 | 14 | set -euo pipefail 15 | 16 | CACHE_DIR="${NXF_APPTAINER_CACHEDIR:-${NXF_SINGULARITY_CACHEDIR:-}}" 17 | TAG="" 18 | SORTER="kilosort4" 19 | 20 | # ------------------------------ 21 | # Parse arguments 22 | # ------------------------------ 23 | while [[ $# -gt 0 ]]; do 24 | case "$1" in 25 | --cache) 26 | CACHE_DIR="$2"; shift 2;; 27 | --tag) 28 | TAG="$2"; shift 2;; 29 | --sorter) 30 | SORTER="$2"; shift 2;; 31 | *) 32 | echo "Unknown arg: $1" >&2; exit 64;; 33 | esac 34 | done 35 | 36 | if [[ -z "$CACHE_DIR" ]]; then 37 | echo "ERROR: must provide --cache or set \$NXF_SINGULARITY_CACHEDIR" >&2 38 | exit 64 39 | fi 40 | 41 | # ------------------------------ 42 | # Resolve tag 43 | # ------------------------------ 44 | if [[ -z "$TAG" ]]; then 45 | if [[ -f "pipeline/capsule_versions.env" ]]; then 46 | SPIKEINTERFACE_VERSION=$(grep -E '^SPIKEINTERFACE_VERSION=' pipeline/capsule_versions.env | cut -d= -f2 | tr -d '[:space:]') 47 | if [[ -n "$SPIKEINTERFACE_VERSION" ]]; then 48 | TAG="si-$SPIKEINTERFACE_VERSION" 49 | fi 50 | fi 51 | fi 52 | 53 | if [[ -z "$TAG" ]]; then 54 | TAG="si-0.103.0" 55 | fi 56 | 57 | # ------------------------------ 58 | # Setup cache 59 | # ------------------------------ 60 | mkdir -p "$CACHE_DIR" 61 | export SINGULARITY_CACHEDIR="$CACHE_DIR" 62 | export APPTAINER_CACHEDIR="$CACHE_DIR" 63 | 64 | echo "Cache dir : $CACHE_DIR" 65 | echo "Tag : $TAG" 66 | echo "Sorter : $SORTER" 67 | 68 | # ------------------------------ 69 | # Base + non-sorter images 70 | # ------------------------------ 71 | IMAGES=( 72 | "ghcr.io/allenneuraldynamics/aind-ephys-pipeline-base" 73 | "ghcr.io/allenneuraldynamics/aind-ephys-pipeline-nwb" 74 | ) 75 | 76 | # ------------------------------ 77 | # Sorter selection 78 | # ------------------------------ 79 | case "$SORTER" in 80 | kilosort25) 81 | IMAGES+=("ghcr.io/allenneuraldynamics/aind-ephys-spikesort-kilosort25");; 82 | kilosort4) 83 | IMAGES+=("ghcr.io/allenneuraldynamics/aind-ephys-spikesort-kilosort4");; 84 | spykingcircus2) 85 | IMAGES+=("ghcr.io/allenneuraldynamics/aind-ephys-spikesort-spykingcircus2");; 86 | all) 87 | IMAGES+=( 88 | "ghcr.io/allenneuraldynamics/aind-ephys-spikesort-kilosort25" 89 | "ghcr.io/allenneuraldynamics/aind-ephys-spikesort-kilosort4" 90 | "ghcr.io/allenneuraldynamics/aind-ephys-spikesort-spykingcircus2" 91 | );; 92 | *) 93 | echo "ERROR: invalid --sorter value '$SORTER' (valid: kilosort25, kilosort4, spykingcircus2, all)" >&2 94 | exit 65;; 95 | esac 96 | 97 | # ------------------------------ 98 | # Pull images 99 | # ------------------------------ 100 | 101 | 102 | for img in "${IMAGES[@]}"; do 103 | IMG_NAME="$img:$TAG" 104 | IMG_NAME="${IMG_NAME//\//-}" 105 | 106 | echo "[pull] $img:$TAG to $CACHE_DIR/$IMG_NAME.img" 107 | if command -v singularity >/dev/null 2>&1; then 108 | singularity pull --name "$IMG_NAME.img" --dir "$CACHE_DIR" "docker://$img:$TAG" || true 109 | elif command -v apptainer >/dev/null 2>&1; then 110 | apptainer pull --name "$IMG_NAME.img" --dir "$CACHE_DIR" "docker://$img:$TAG" || true 111 | else 112 | echo "ERROR: neither singularity nor apptainer found in PATH" >&2 113 | exit 127 114 | fi 115 | done 116 | 117 | echo "Done. Images are cached in $CACHE_DIR" 118 | -------------------------------------------------------------------------------- /docs/source/pipeline_steps.rst: -------------------------------------------------------------------------------- 1 | .. _pipeline_steps: 2 | 3 | Pipeline Steps 4 | ============== 5 | 6 | The AIND Ephys Pipeline consists of several key processing steps that are executed in sequence. Here's a detailed look at each step: 7 | 8 | Job Dispatch 9 | ------------ 10 | 11 | The `job-dispatch `_ step: 12 | 13 | * Generates JSON files for parallel processing 14 | * Enables parallelization across: 15 | * Multiple probes 16 | * Multiple shanks (e.g., for NP2-4shank probes) 17 | * Creates independent processing jobs for parallel execution 18 | 19 | Preprocessing 20 | ------------- 21 | 22 | The `preprocessing `_ step handles several critical data preparation tasks: 23 | 24 | * Phase shift correction 25 | * Highpass filtering 26 | * Denoising 27 | * Bad channel removal 28 | * Common median reference ("cmr") or highpass spatial filter ("destripe") 29 | * Motion estimation and correction (optional) 30 | 31 | Spike Sorting 32 | ------------- 33 | 34 | The pipeline supports multiple spike sorting algorithms: 35 | 36 | * `Kilosort2.5 `_ 37 | * `Kilosort4 `_ 38 | * `SpykingCircus2 `_ 39 | 40 | Each sorter can be selected based on your specific needs and data characteristics. 41 | 42 | Postprocessing 43 | -------------- 44 | 45 | The `postprocessing `_ step performs additional processing on the 46 | combined preprocessed recording and sorted data: 47 | 48 | * Removal of duplicate units 49 | * Computations of *extensions*: 50 | 51 | * Waveforms extraction 52 | * Templates 53 | * Spike amplitudes 54 | * Unit locations 55 | * Principal Component Analysis (PCA) projections 56 | * Spike locations 57 | * Correlograms 58 | * Template similarity 59 | * Template metrics 60 | * Quality metrics 61 | 62 | Curation 63 | -------- 64 | 65 | The `curation `_ step applies quality control by: 66 | 67 | * Quality metrics-based filtering using thresholds on: 68 | * ISI violation ratio 69 | * Presence ratio 70 | * Amplitude cutoff 71 | * Unit classification as noise, MUA, or SUA using pretrained classifier (`UnitRefine `_) 72 | 73 | The *recipe* for quality metrics can be customized to suit your specific needs. 74 | 75 | Visualization 76 | ------------- 77 | 78 | The `visualization `_ step generates static figures and interactive Figurl links for each probe: 79 | 80 | * *timeseries*: including snippets of raw data, drift map, and motion visualizations 81 | * *sorting_summary*: for spike sorting results inspection and curation 82 | 83 | Each plot of the *timeseries* is also saved as a static image in the ``visualization/`` folder. 84 | 85 | Result Collection 86 | ----------------- 87 | 88 | The `result collection `_ step: 89 | 90 | * Aggregates outputs from all parallel jobs 91 | * Copies output folders to the results directory 92 | * Organizes results in a standardized structure 93 | 94 | NWB Export 95 | ---------- 96 | 97 | The final step creates standardized NWB output files, including: 98 | 99 | * Session and subject information from `aind-subject-nwb `_ 100 | * Ecephys data from `aind-ecephys-nwb `_ 101 | * Unit data from `aind-units-nwb `_ 102 | 103 | Features: 104 | 105 | * Supports multiple streams (e.g., probes) per file 106 | * Optional raw data and LFP data writing 107 | -------------------------------------------------------------------------------- /docs/source/deployments.rst: -------------------------------------------------------------------------------- 1 | Deployment Options 2 | ================== 3 | 4 | The pipeline can be deployed in several environments: 5 | 6 | 7 | SLURM Deployment 8 | ---------------- 9 | 10 | Deploying on a SLURM cluster provides better performance and resource management. 11 | 12 | Requirements 13 | ~~~~~~~~~~~~ 14 | * Access to a SLURM cluster 15 | * Nextflow installation 16 | * Singularity/Apptainer installation 17 | * Optional: Figurl setup for visualizations 18 | 19 | Configuration 20 | ~~~~~~~~~~~~~ 21 | 22 | 1. Clone the repository: 23 | 24 | .. code-block:: bash 25 | 26 | git clone https://github.com/AllenNeuralDynamics/aind-ephys-pipeline.git 27 | cd aind-ephys-pipeline 28 | cd pipeline 29 | 30 | 2. Copy and modify the SLURM configuration: 31 | 32 | .. code-block:: bash 33 | 34 | cp nextflow_slurm.config nextflow_slurm_custom.config 35 | 36 | 3. Update the ``params.default_queue`` and ``params.gpu_queue`` parameters in ``nextflow_slurm_custom.config`` to match your cluster's partitions. 37 | The latter is only needed if different than the default queue. 38 | 39 | 4. Create a new or modify the existing a submission script (``slurm_submit.sh``): 40 | 41 | .. code-block:: bash 42 | 43 | #!/bin/bash 44 | #SBATCH --nodes=1 45 | #SBATCH --ntasks-per-node=1 46 | #SBATCH --mem=4GB 47 | #SBATCH --partition={your-partition} 48 | #SBATCH --time=2:00:00 49 | 50 | # Load required environment (if nextflow is installed in a conda environment) 51 | conda activate env_nf 52 | 53 | PIPELINE_PATH="path-to-your-cloned-repo" 54 | DATA_PATH="path-to-data-folder" 55 | RESULTS_PATH="path-to-results-folder" 56 | WORKDIR="path-to-large-workdir" 57 | 58 | DATA_PATH=$DATA_PATH RESULTS_PATH=$RESULTS_PATH nextflow \ 59 | -C $PIPELINE_PATH/pipeline/nextflow_slurm_custom.config \ 60 | -log $RESULTS_PATH/nextflow/nextflow.log \ 61 | run $PIPELINE_PATH/pipeline/main_multi_backend.nf \ 62 | -work-dir $WORKDIR \ 63 | -resume 64 | 65 | 5. (Optional) Pre-build required Apptainer/Singularity/Apptainer images for faster startup: 66 | 67 | .. code-block:: bash 68 | 69 | ./pull_pipeline_images.sh --sorter kilosort4 70 | 71 | This will pull and build the necessary Apptainer/Singularity images for the Kilosort4 sorter. Adjust the ``--sorter`` argument as 72 | needed (e.g., to ``kilosort25`` or ``spykingcircus2`` or ``all``). 73 | Note that this step requires you to set the ``NXF_APPTAINER_CACHEDIR``/``NXF_SINGULARITY_CACHEDIR`` environment variable to a directory with 74 | enough space to store the images. Images used by the nextflow script will be cached automatically if not pre-built. 75 | 76 | 1. Submit the pipeline job: 77 | 78 | .. code-block:: bash 79 | 80 | sbatch slurm_submit.sh 81 | 82 | 83 | Local Deployment 84 | ---------------- 85 | 86 | .. warning:: 87 | While local deployment is possible, it's recommended to use SLURM or batch processing systems for better performance. 88 | Local deployment limits parallelization of resource-intensive processes to avoid system overload. 89 | 90 | Requirements 91 | ~~~~~~~~~~~~ 92 | See the :doc:`installation` page for detailed setup instructions. 93 | 94 | Running Locally 95 | ~~~~~~~~~~~~~~~ 96 | 97 | 1. Clone the repository: 98 | 99 | .. code-block:: bash 100 | 101 | git clone https://github.com/AllenNeuralDynamics/aind-ephys-pipeline.git 102 | cd aind-ephys-pipeline 103 | cd pipeline 104 | 105 | 2. Run the pipeline: 106 | 107 | .. code-block:: bash 108 | 109 | DATA_PATH=$PWD/../data RESULTS_PATH=$PWD/../results \ 110 | nextflow -C nextflow_local.config -log $RESULTS_PATH/nextflow/nextflow.log \ 111 | run main_multi_backend.nf \ 112 | --n_jobs 8 -resume 113 | 114 | 115 | Code Ocean Deployment (AIND) 116 | ---------------------------- 117 | 118 | For AIND internal use, the pipeline is deployed on Code Ocean with different branches for various configurations: 119 | 120 | Main Branches 121 | ~~~~~~~~~~~~~ 122 | * ``main``/``co_kilosort4``: Kilosort4 sorter 123 | * ``co_kilosort25``: Kilosort2.5 sorter 124 | * ``co_spykingcircus2``: Spyking Circus 2 sorter 125 | 126 | Optogenetics Branches 127 | ~~~~~~~~~~~~~~~~~~~~~ 128 | * ``co_kilosort25_opto``: Kilosort2.5 with opto artifact removal 129 | * ``co_kilosort4_opto``: Kilosort4 with opto artifact removal 130 | * ``co_spykingcircus2_opto``: Spyking Circus 2 with opto artifact removal 131 | -------------------------------------------------------------------------------- /docs/source/input_output.rst: -------------------------------------------------------------------------------- 1 | Input and Output 2 | ================ 3 | 4 | Input Data Types 5 | ---------------- 6 | 7 | The pipeline supports several input data formats: 8 | 9 | SpikeGLX 10 | ~~~~~~~~ 11 | * Input folder should contain a SpikeGLX saved folder 12 | * Recommended: Include ``subject.json`` and ``data_description.json`` following `aind-data-schema `_ specification 13 | 14 | Open Ephys 15 | ~~~~~~~~~~ 16 | * Input folder should contain an Open Ephys folder 17 | * Recommended: Include ``subject.json`` and ``data_description.json`` following `aind-data-schema `_ specification 18 | 19 | NWB 20 | ~~~ 21 | * Input folder should contain a single NWB file 22 | * Supports both HDF5 and Zarr backends 23 | 24 | SpikeInterface 25 | ~~~~~~~~~~~~~~ 26 | * Input folder should contain a SpikeInterface recording object 27 | * The ``spikeinterface_info`` parameter can be used to specify the recording type and additional parameters 28 | 29 | AIND 30 | ~~~~ 31 | * Used for AIND-specific data ingestion 32 | * Input folder structure (as defined in `aind-file-standards `_): 33 | * ``ecephys/`` directory containing: 34 | * ``ecephys_clipped/`` (clipped Open Ephys folder) 35 | * ``ecephys_compressed/`` (compressed traces with Zarr) 36 | * JSON files following `aind-data-schema `_ specification 37 | 38 | 39 | Pipeline Output 40 | --------------- 41 | 42 | The pipeline output is organized in the ``RESULTS_PATH`` directory with the following structure: 43 | 44 | ``preprocessed/`` 45 | ~~~~~~~~~~~~~~~~~ 46 | Contains preprocessing outputs: 47 | 48 | * Preprocessed JSON files for each stream 49 | * Motion folders with estimated motion 50 | * Can be loaded with SpikeInterface: 51 | 52 | .. code-block:: python 53 | 54 | import spikeinterface as si 55 | recording_preprocessed = si.load( 56 | "path-to-preprocessed.json", 57 | base_folder="path-to-raw-data-parent" 58 | ) 59 | 60 | # Load motion data 61 | import spikeinterface.preprocessing as spre 62 | motion_info = spre.load_motion_info("path-to-motion-folder") 63 | 64 | ``spikesorted/`` 65 | ~~~~~~~~~~~~~~~~ 66 | Contains raw spike sorting output: 67 | 68 | * One folder per stream 69 | * Can be loaded as: 70 | 71 | .. code-block:: python 72 | 73 | import spikeinterface as si 74 | sorting_raw = si.load("path-to-spikesorted-folder") 75 | 76 | ``curated/`` 77 | ~~~~~~~~~~~~ 78 | Contains curated spike sorting outputs: 79 | 80 | * Includes unit deduplication and quality metric-based curation 81 | * Unit classification results 82 | * Load with SpikeInterface: 83 | 84 | .. code-block:: python 85 | 86 | import spikeinterface as si 87 | sorting_curated = si.load("path-to-curated-folder") 88 | 89 | # Access curation properties 90 | default_qc = sorting_curated.get_property("default_qc") # True/False for QC pass 91 | decoder_label = sorting_curated.get_property("decoder_label") # noise/MUA/SUA 92 | 93 | ``postprocessed/`` 94 | ~~~~~~~~~~~~~~~~~~ 95 | Contains postprocessing output in Zarr format: 96 | 97 | * One folder per stream 98 | * Load with SpikeInterface as a ``SortingAnalyzer`` object: 99 | 100 | .. code-block:: python 101 | 102 | import spikeinterface as si 103 | sorting_analyzer = si.load("path-to-postprocessed-folder.zarr") 104 | 105 | # Access extensions 106 | unit_locations = sorting_analyzer.get_extension("unit_locations").get_data() 107 | qm = sorting_analyzer.get_extension("quality_metrics").get_data() 108 | 109 | .. note:: 110 | The ``default_qc`` and ``decoder_label`` properties are also included in the ``SortingAnalyzer``! 111 | You can access them using: ``sorting_analyzer.get_sorting_property("default_qc"/"decoder_label")``. 112 | 113 | ``nwb/`` 114 | ~~~~~~~~ 115 | Contains generated NWB files: 116 | 117 | * One NWB file per block/segment 118 | * Includes all streams for that block/segment 119 | * Contains: 120 | * Session/subject information 121 | * Ecephys metadata 122 | * LFP signals (optional) 123 | * Units data 124 | 125 | ``visualization/`` 126 | ~~~~~~~~~~~~~~~~~~ 127 | Contains generated visualizations: 128 | 129 | * Drift maps 130 | * Motion plots 131 | * Sample traces for all streams 132 | 133 | Additional Files 134 | ---------------- 135 | 136 | * ``visualization_output.json``: Contains Figurl links for each stream 137 | * ``processing.json``: Logs processing steps, parameters, and execution times 138 | * ``nextflow/``: Contains all Nextflow-generated files 139 | -------------------------------------------------------------------------------- /docs/source/customization.rst: -------------------------------------------------------------------------------- 1 | Customization 2 | ============= 3 | 4 | The pipeline is designed to be flexible and customizable, given its modular structure. 5 | 6 | There are two main customization points that we foresee: custom data ingestion and custom spike sorting. 7 | 8 | 9 | 1. Custom Data Ingestion 10 | ------------------------ 11 | 12 | To support additional data formats: 13 | 14 | 1. Create a custom job dispatch implementation (see `job dispatch README `_) 15 | 2. If additional dependencies are needed, create a custom Docker image that includes them 16 | 3. Modify ``main_multi_backend.nf`` to use your custom job dispatch repo and container 17 | 18 | This allows for flexible adaptation to different data formats while maintaining the pipeline's core functionality. 19 | 20 | 2. Custom Spike Sorting 21 | ----------------------- 22 | 23 | To add a new spike sorting algorithm: 24 | 25 | 1. | Create a GitHub repo (e.g., ``https://github.com/new-sorter-capsule-repo.git``) with the custom spike sorting implementation. 26 | | You can follow existing sorters as a starting point 27 | | (e.g., from the `Kilosort4 capsule `_) 28 | | and just change the `run spike sorting section `_, 29 | | the `sorter info `_, 30 | | and the `default parameters `_. 31 | 2. Create a new Docker image for your spike sorter, which should include the sorter installation and SpikeInterface. The image should be pushed to a container registry (e.g., Docker Hub, Singularity Hub, etc.). 32 | You can use the existing Dockerfiles as a reference. 33 | 3. Add the commit hash of the version of the sorter you want to use in the ``capsule_versions.env`` file: ``SPIKESORT_NEWSORTER=commit_hash``. 34 | This file is used to define the versions of the sorter and the capsule. The commit hash should be the one you want to use for your sorter. 35 | 4. Add a new process. This can also be defined in a different file, e.g. ``new_sorter.nf`` and imported in the main workflow. 36 | 37 | 38 | .. code:: java 39 | 40 | process spikesort_newsorter { 41 | tag 'spikesort-newsorter' 42 | def container_name = "my-new-sorter-container" 43 | container container_name 44 | 45 | input: 46 | val max_duration_minutes 47 | path preprocessing_results, stageAs: 'capsule/data/*' 48 | 49 | output: 50 | path 'capsule/results/*', emit: results 51 | 52 | script: 53 | """ 54 | #!/usr/bin/env bash 55 | set -e 56 | 57 | mkdir -p capsule 58 | mkdir -p capsule/data 59 | mkdir -p capsule/results 60 | mkdir -p capsule/scratch 61 | 62 | if [[ ${params.executor} == "slurm" ]]; then 63 | echo "[${task.tag}] allocated task time: ${task.time}" 64 | fi 65 | 66 | echo "[${task.tag}] cloning git repo..." 67 | git clone "https://github.com/new-sorter-capsule-repo.git" capsule-repo 68 | git -C capsule-repo -c core.fileMode=false checkout ${versions['SPIKESORT_NEWSORTER']} --quiet 69 | mv capsule-repo/code capsule/code 70 | rm -rf capsule-repo 71 | 72 | echo "[${task.tag}] running capsule..." 73 | cd capsule/code 74 | chmod +x run 75 | ./run ${spikesorting_args} ${job_args} 76 | 77 | echo "[${task.tag}] completed!" 78 | """ 79 | } 80 | 81 | 5. Modify the ``main_multi_backend.nf`` to add a new channel: 82 | 83 | .. code:: bash 84 | 85 | ... in the workflow definition ... 86 | 87 | if (sorter == 'kilosort25') { 88 | spikesort_out = spikesort_kilosort25( 89 | max_duration_minutes, 90 | preprocessing_out.results 91 | ) 92 | } else if (sorter == 'kilosort4') { 93 | spikesort_out = spikesort_kilosort4( 94 | max_duration_minutes, 95 | preprocessing_out.results 96 | ) 97 | } else if (sorter == 'spykingcircus2') { 98 | spikesort_out = spikesort_spykingcircus2( 99 | max_duration_minutes, 100 | preprocessing_out.results 101 | ) 102 | } else if (sorter == 'new_sorter') { 103 | spikesort_out = spikesort_new_sorter( 104 | max_duration_minutes, 105 | preprocessing_out.results 106 | ) 107 | } 108 | -------------------------------------------------------------------------------- /pipeline/nextflow_slurm.config: -------------------------------------------------------------------------------- 1 | params.executor = "slurm" 2 | 3 | // define your preferred queues here 4 | params.default_queue = 'my-queue-name' // Default queue 5 | params.gpu_queue = null // Optional: Only set if GPU queue is needed and different from default queue 6 | 7 | // Detect engine: prefer apptainer if available, else singularity 8 | def hasApptainer = ['bash','-c','command -v apptainer'].execute().waitFor() == 0 9 | def hasSingularity = ['bash','-c','command -v singularity'].execute().waitFor() == 0 10 | 11 | if( hasApptainer ) { 12 | apptainer { 13 | enabled = true 14 | autoMounts = true 15 | apptainer.enabled = true 16 | apptainer.autoMounts = true 17 | platform = 'linux/amd64' 18 | envWhitelist = ['KACHERY_ZONE', 'KACHERY_API_KEY', 'NUMBA_CACHE_DIR', 'HDF5_USE_FILE_LOCKING'] 19 | } 20 | } 21 | else if( hasSingularity ) { 22 | singularity { 23 | enabled = true 24 | autoMounts = true 25 | singularity.enabled = true 26 | singularity.autoMounts = true 27 | platform = 'linux/amd64' 28 | envWhitelist = ['KACHERY_ZONE', 'KACHERY_API_KEY', 'NUMBA_CACHE_DIR', 'HDF5_USE_FILE_LOCKING'] 29 | } 30 | } 31 | else { 32 | throw new RuntimeException("Neither apptainer nor singularity found in PATH") 33 | } 34 | 35 | process { 36 | executor = 'slurm' 37 | queue = params.default_queue // Default queue assignment 38 | debug = true 39 | 40 | // this could be needed if the NextFlow workdir is outside of the user home folder 41 | // containerOptions = "--bind \$HOME:\$HOME" 42 | 43 | // change max forks for specific processes to allow multiple forks 44 | withName: job_dispatch { 45 | cpus=4 46 | memory='32 GB' 47 | time='1h' 48 | } 49 | // time can be specified as absolute time (e.g. '1h') or as relative to the recording duration 50 | // e.g. time={ max_duration_minutes.toFloat()*4 + 'm' } means 4x recording duration 51 | withName: preprocessing { 52 | cpus=16 53 | memory='64 GB' 54 | // Allocate 4x recording duration 55 | time={ max_duration_minutes.toFloat()*4 + 'm' } 56 | } 57 | withName: spikesort_kilosort4 { 58 | cpus=16 59 | memory='64 GB' 60 | containerOptions='--nv' 61 | clusterOptions='--gres=gpu:1' 62 | queue=params.gpu_queue ?: params.default_queue 63 | // Some systems may require 'module cuda' directive 64 | // module cuda 65 | // Allocate 4x recording duration 66 | time={ max_duration_minutes.toFloat()*4 + 'm' } 67 | } 68 | withName: spikesort_kilosort25 { 69 | cpus=16 70 | memory='64 GB' 71 | containerOptions='--nv' 72 | clusterOptions='--gres=gpu:1' 73 | queue=params.gpu_queue ?: params.default_queue 74 | // Some systems may require 'module cuda' directive 75 | // module cuda 76 | // Allocate 4x recording duration 77 | time={ max_duration_minutes.toFloat()*4 + 'm' } 78 | } 79 | withName: spikesort_spykingcircus2 { 80 | cpus=16 81 | memory='64 GB' 82 | // Allocate 4x recording duration 83 | time={ max_duration_minutes.toFloat()*4 + 'm' } 84 | } 85 | withName: postprocessing { 86 | cpus=16 87 | memory='64 GB' 88 | // Allocate 4x recording duration 89 | time={ max_duration_minutes.toFloat()*4 + 'm' } 90 | } 91 | withName: curation { 92 | cpus=4 93 | memory='32 GB' 94 | // Allocate 10min per recording hour. Minimum 10m 95 | time={ max_duration_minutes.toFloat()/6 > 10 ? max_duration_minutes.value.toFloat()/6 + 'm' : '10m' } 96 | } 97 | withName: visualization { 98 | cpus=4 99 | memory='32 GB' 100 | // Allocate 2h per recording hour 101 | time={ max_duration_minutes.toFloat()*2 + 'm' } 102 | } 103 | withName: results_collector { 104 | cpus=4 105 | memory='32 GB' 106 | // Allocate 1x recording duration 107 | time={ max_duration_minutes.toFloat() > 10 ? max_duration_minutes.toFloat() + 'm' : '10m' } 108 | } 109 | withName: quality_control { 110 | cpus=16 111 | memory='64 GB' 112 | // Allocate 2h per recording hour 113 | time={ max_duration_minutes.toFloat()*2 + 'm' } 114 | } 115 | withName: quality_control_collector { 116 | cpus=4 117 | memory='32 GB' 118 | // Allocate 10min per recording hour. Minimum 10m 119 | time={ max_duration_minutes.toFloat()/6 > 10 ? max_duration_minutes.toFloat()/6 + 'm' : '10m' } 120 | } 121 | withName: nwb_subject { 122 | cpus=4 123 | memory='32 GB' 124 | // Allocate 10min per recording hour. Minimum 10m 125 | time={ max_duration_minutes.toFloat()/6 > 10 ? max_duration_minutes.toFloat()/6 + 'm' : '10m' } 126 | } 127 | withName: nwb_ecephys { 128 | cpus=16 129 | memory='64 GB' 130 | // Allocate 2x recording duration 131 | time={ max_duration_minutes.toFloat()*2 + 'm' } 132 | } 133 | withName: nwb_units { 134 | cpus=4 135 | memory='32 GB' 136 | // Allocate 2x recording duration 137 | time={ max_duration_minutes.toFloat()*2 + 'm' } 138 | } 139 | } 140 | 141 | 142 | dag { 143 | enabled = true 144 | file = RESULTS_PATH + '/nextflow/dag.html' 145 | overwrite = true 146 | } 147 | 148 | report { 149 | enabled = true 150 | file = RESULTS_PATH + '/nextflow/report.html' 151 | overwrite = true 152 | } 153 | 154 | timeline { 155 | enabled = true 156 | file = RESULTS_PATH + '/nextflow/timeline.html' 157 | overwrite = true 158 | } 159 | 160 | trace { 161 | enabled = true 162 | file = RESULTS_PATH + '/nextflow/trace.txt' 163 | overwrite = true 164 | } 165 | -------------------------------------------------------------------------------- /docs/source/parameters.rst: -------------------------------------------------------------------------------- 1 | Pipeline Parameters 2 | =================== 3 | 4 | Global Parameters 5 | ----------------- 6 | 7 | The pipeline accepts several global parameters that control its overall behavior: 8 | 9 | .. code-block:: bash 10 | 11 | --n_jobs N_JOBS Number of parallel jobs (for local deployment) 12 | --runmode {full,fast} Processing mode ('fast' skips some steps like motion correction) 13 | --sorter {kilosort25,kilosort4,spykingcircus2} Spike sorter selection 14 | 15 | 16 | Parameter File 17 | -------------- 18 | 19 | A parameter file can be used to set all parameters at once. 20 | This is the recommended way to configure the pipeline, especially for complex setups. 21 | The parameter file should be in JSON format and you can use the ``pipeline/default_params.json`` file as a template. 22 | 23 | To use a parameter file, specify it with the ``--params_file`` option: 24 | 25 | .. code-block:: bash 26 | 27 | --params_file PATH_TO_PARAMS_FILE 28 | # Example: --params_file pipeline/default_params.json 29 | 30 | Note that the parameter file will override any command line parameters specified. 31 | 32 | .. note:: 33 | 34 | In the ``spikesorting`` section of the parameter file, you can specify the sorter and its parameters. 35 | The ``sorter`` field, if specified and not null, will override the command line ``--sorter`` parameter. 36 | 37 | 38 | Process-Specific Command Line Arguments 39 | --------------------------------------- 40 | 41 | Each pipeline step can be configured with specific parameters using the format: 42 | 43 | .. code-block:: bash 44 | 45 | --{step_name}_args "{args}" 46 | 47 | Job Dispatch Parameters 48 | ~~~~~~~~~~~~~~~~~~~~~~~ 49 | 50 | .. code-block:: bash 51 | 52 | --job_dispatch_args " 53 | --no-split-segments # Whether to concatenate or split recording segments or not. Default: split segments 54 | --no-split-groups # Whether to process different groups separately. Default: split groups 55 | --debug # Whether to run in DEBUG mode. Default: False 56 | --debug-duration DURATION # Duration of clipped recording in debug mode. Only used if debug is enabled. Default: 30 seconds 57 | --skip-timestamps-check # Skip timestamps check. Default: False 58 | --input {aind,spikeglx,openephys,nwb,spikeinterface} 59 | # Which 'loader' to use (aind | spikeglx | openephys | nwb | spikeinterface) 60 | --spikeinterface-info SPIKEINTERFACE_INFO 61 | # A JSON path or string to specify how to parse the recording in spikeinterface, including: 62 | - 1. reader_type (required): string with the reader type (e.g. 'plexon', 'neuralynx', 'intan' etc.). 63 | - 2. reader_kwargs (optional): dictionary with the reader kwargs (e.g. {'folder': '/path/to/folder'}). 64 | - 3. keep_stream_substrings (optional): string or list of strings with the stream names to load (e.g. 'AP' or ['AP', 'LFP']). 65 | - 4. skip_stream_substrings (optional): string (or list of strings) with substrings used to skip streams (e.g. 'NIDQ' or ['USB', 'EVENTS']). 66 | - 5. probe_paths (optional): string or dict the probe paths to a ProbeInterface JSON file (e.g. '/path/to/probe.json'). If a dict is provided, the key is the stream name and the value is the probe path. If reader_kwargs is not provided, the reader will be created with default parameters. The probe_path is required if the reader doesn't load the probe automatically. 67 | 68 | " 69 | 70 | Preprocessing Parameters 71 | ~~~~~~~~~~~~~~~~~~~~~~~~ 72 | 73 | .. code-block:: bash 74 | 75 | --preprocessing_args " 76 | --denoising {cmr,destripe} # Denoising strategy 77 | --filter-type {highpass,bandpass} # Filter type 78 | --no-remove-out-channels # Skip out-channels removal 79 | --no-remove-bad-channels # Skip bad-channels removal 80 | --max-bad-channel-fraction FRACTION # Max fraction of bad channels 81 | --motion {skip,compute,apply} # Motion correction mode 82 | --motion-preset PRESET # Motion correction preset 83 | --motion-temporal-bin-s # Temporal bin size (seconds) 84 | --t-start START # Recording start time (seconds) 85 | --t-stop STOP # Recording stop time (seconds) 86 | --min-duration DURATION # Minimum recording duration (seconds) to run preprocessing 87 | " 88 | 89 | Available motion presets: 90 | * ``dredge`` 91 | * ``dredge_fast`` (default) 92 | * ``nonrigid_accurate`` 93 | * ``nonrigid_fast_and_accurate`` 94 | * ``rigid_fast`` 95 | * ``kilosort_like`` 96 | 97 | Spike Sorting Parameters 98 | ~~~~~~~~~~~~~~~~~~~~~~~~ 99 | 100 | .. code-block:: bash 101 | 102 | --spikesort_args " 103 | --raise-if-fails # Raise error on failure 104 | --skip-motion-correction # Skip sorter motion correction 105 | --min-drift-channels N # Min channels for motion correction 106 | --clear-cache # Force PyTorch memory cleanup (Kilosort4) 107 | " 108 | 109 | 110 | NWB Ecephys Parameters 111 | ~~~~~~~~~~~~~~~~~~~~~~ 112 | 113 | .. code-block:: bash 114 | 115 | --nwb_ecephys_args " 116 | --backend {zarr,hdf5} # Backend to use for NWB writing (default: zarr) 117 | --skip-lfp # Skip LFP electrical series 118 | --write-raw # Write RAW electrical series 119 | --lfp_temporal_factor N # Temporal subsampling factor 120 | --lfp_spatial_factor N # Spatial subsampling factor 121 | --lfp_highpass_freq_min F # LFP highpass filter cutoff (Hz) 122 | " 123 | 124 | Example Usage of CLI Arguments 125 | ------------------------------ 126 | 127 | Here's an example of running the pipeline with custom parameters: 128 | 129 | .. code-block:: bash 130 | 131 | DATA_PATH=$DATA RESULTS_PATH=$RESULTS \ 132 | nextflow -C nextflow_local.config run main_multi_backend.nf \ 133 | --n_jobs 16 \ 134 | --sorter kilosort4 \ 135 | --job_dispatch_args "--input spikeglx --debug --debug-duration 120" \ 136 | --preprocessing_args "--motion compute --motion-preset nonrigid_fast_and_accurate" \ 137 | --nwb_ecephys_args "--skip-lfp" 138 | 139 | This example: 140 | * Runs 16 parallel jobs 141 | * Uses Kilosort4 for spike sorting 142 | * Processes SpikeGLX data in debug mode 143 | * Computes nonrigid motion correction 144 | * Skips LFP export in NWB files 145 | -------------------------------------------------------------------------------- /docs/source/_static/light-logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 9 | 10 | 11 | 12 | 17 | 18 | 19 | 21 | 22 | 23 | 25 | 26 | 27 | 31 | 32 | 33 | 42 | 43 | 44 | 49 | 50 | 51 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 63 | 64 | 65 | 67 | 68 | 69 | 70 | 71 | 72 | 74 | 75 | 76 | 88 | 89 | 90 | 92 | 93 | 94 | 95 | 96 | 97 | 99 | 100 | 101 | 102 | 103 | 104 | 106 | 108 | 110 | 111 | 113 | 114 | 115 | 117 | 119 | 120 | 123 | 126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /docs/source/_static/dark-logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 10 | 11 | 12 | 13 | 18 | 19 | 20 | 22 | 23 | 24 | 26 | 27 | 28 | 32 | 33 | 34 | 43 | 44 | 45 | 50 | 51 | 52 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 64 | 65 | 66 | 68 | 69 | 70 | 71 | 72 | 73 | 75 | 76 | 77 | 89 | 90 | 91 | 93 | 94 | 95 | 96 | 97 | 98 | 100 | 101 | 102 | 103 | 104 | 105 | 107 | 109 | 111 | 112 | 114 | 115 | 116 | 118 | 120 | 121 | 124 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /.github/workflows/params_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "job_dispatch": { 3 | "concatenate": false, 4 | "split_groups": true, 5 | "debug": false, 6 | "debug_duration": 30, 7 | "skip_timestamps_check": false, 8 | "multi_session": false, 9 | "input": "nwb" 10 | }, 11 | "preprocessing": { 12 | "job_kwargs": { 13 | "chunk_duration": "1s", 14 | "progress_bar": false 15 | }, 16 | "denoising_strategy": "cmr", 17 | "min_preprocessing_duration": 120, 18 | "filter_type": "bandpass", 19 | "highpass_filter": { 20 | "freq_min": 300.0, 21 | "margin_ms": 5.0 22 | }, 23 | "bandpass_filter": { 24 | "freq_min": 300.0, 25 | "freq_max": 6000.0, 26 | "margin_ms": 5.0 27 | }, 28 | "phase_shift": { 29 | "margin_ms": 100.0 30 | }, 31 | "detect_bad_channels": { 32 | "method": "coherence+psd", 33 | "dead_channel_threshold": -0.5, 34 | "noisy_channel_threshold": 1.0, 35 | "outside_channel_threshold": -0.3, 36 | "outside_channels_location": "top", 37 | "n_neighbors": 11, 38 | "seed": 0 39 | }, 40 | "remove_out_channels": true, 41 | "remove_bad_channels": true, 42 | "max_bad_channel_fraction": 0.5, 43 | "common_reference": { 44 | "reference": "global", 45 | "operator": "median" 46 | }, 47 | "highpass_spatial_filter": { 48 | "n_channel_pad": 60, 49 | "n_channel_taper": null, 50 | "direction": "y", 51 | "apply_agc": true, 52 | "agc_window_length_s": 0.01, 53 | "highpass_butter_order": 3, 54 | "highpass_butter_wn": 0.01 55 | }, 56 | "motion_correction": { 57 | "compute": true, 58 | "apply": false, 59 | "preset": "dredge_fast", 60 | "detect_kwargs": {}, 61 | "select_kwargs": {}, 62 | "localize_peaks_kwargs": {}, 63 | "estimate_motion_kwargs": { 64 | "win_step_norm": 0.1, 65 | "win_scale_norm": 0.1 66 | }, 67 | "interpolate_motion_kwargs": {} 68 | } 69 | }, 70 | "postprocessing": { 71 | "job_kwargs": { 72 | "chunk_duration": "1s", 73 | "progress_bar": false 74 | }, 75 | "use_motion_corrected": false, 76 | "sparsity": { 77 | "method": "radius", 78 | "radius_um": 100 79 | }, 80 | "duplicate_threshold": 0.9, 81 | "return_scaled": true, 82 | "random_spikes": { 83 | "max_spikes_per_unit": 500, 84 | "method": "uniform", 85 | "margin_size": null, 86 | "seed": null 87 | }, 88 | "noise_levels": { 89 | "num_chunks_per_segment": 20, 90 | "chunk_size": 10000, 91 | "seed": null 92 | }, 93 | "waveforms": { 94 | "ms_before": 3.0, 95 | "ms_after": 4.0, 96 | "dtype": null 97 | }, 98 | "templates": {}, 99 | "spike_amplitudes": { 100 | "peak_sign": "neg" 101 | }, 102 | "template_similarity": { 103 | "method": "cosine_similarity" 104 | }, 105 | "correlograms": { 106 | "window_ms": 50.0, 107 | "bin_ms": 1.0 108 | }, 109 | "isi_histograms": { 110 | "window_ms": 100.0, 111 | "bin_ms": 5.0 112 | }, 113 | "unit_locations": { 114 | "method": "monopolar_triangulation" 115 | }, 116 | "spike_locations": { 117 | "method": "grid_convolution" 118 | }, 119 | "template_metrics": { 120 | "upsampling_factor": 10, 121 | "sparsity": null, 122 | "include_multi_channel_metrics": true 123 | }, 124 | "principal_components": { 125 | "n_components": 5, 126 | "mode": "by_channel_local", 127 | "whiten": true 128 | }, 129 | "quality_metrics_names": [ 130 | "num_spikes", 131 | "firing_rate", 132 | "presence_ratio", 133 | "snr", 134 | "isi_violation", 135 | "rp_violation", 136 | "sliding_rp_violation", 137 | "amplitude_cutoff", 138 | "amplitude_median", 139 | "amplitude_cv", 140 | "synchrony", 141 | "firing_range", 142 | "drift", 143 | "isolation_distance", 144 | "l_ratio", 145 | "d_prime", 146 | "nearest_neighbor", 147 | "silhouette" 148 | ], 149 | "quality_metrics": { 150 | "presence_ratio": { 151 | "bin_duration_s": 60 152 | }, 153 | "snr": { 154 | "peak_sign": "neg", 155 | "peak_mode": "extremum" 156 | }, 157 | "isi_violation": { 158 | "isi_threshold_ms": 1.5, 159 | "min_isi_ms": 0 160 | }, 161 | "rp_violation": { 162 | "refractory_period_ms": 1, 163 | "censored_period_ms": 0.0 164 | }, 165 | "sliding_rp_violation": { 166 | "bin_size_ms": 0.25, 167 | "window_size_s": 1, 168 | "exclude_ref_period_below_ms": 0.5, 169 | "max_ref_period_ms": 10, 170 | "contamination_values": null 171 | }, 172 | "amplitude_cutoff": { 173 | "peak_sign": "neg", 174 | "num_histogram_bins": 100, 175 | "histogram_smoothing_value": 3, 176 | "amplitudes_bins_min_ratio": 5 177 | }, 178 | "amplitude_median": { 179 | "peak_sign": "neg" 180 | }, 181 | "amplitude_cv": { 182 | "average_num_spikes_per_bin": 50, 183 | "percentiles": [5, 95], 184 | "min_num_bins": 10, 185 | "amplitude_extension": "spike_amplitudes" 186 | }, 187 | "firing_range": { 188 | "bin_size_s": 5, 189 | "percentiles": [5, 95] 190 | }, 191 | "synchrony": { 192 | "synchrony_sizes": [2, 4, 8] 193 | }, 194 | "nearest_neighbor": { 195 | "max_spikes": 10000, 196 | "n_neighbors": 4 197 | }, 198 | "nn_isolation": { 199 | "max_spikes": 10000, 200 | "min_spikes": 10, 201 | "n_neighbors": 4, 202 | "n_components": 10, 203 | "radius_um": 100 204 | }, 205 | "nn_noise_overlap": { 206 | "max_spikes": 10000, 207 | "min_spikes": 10, 208 | "n_neighbors": 4, 209 | "n_components": 10, 210 | "radius_um": 100 211 | }, 212 | "silhouette": { 213 | "method": ["simplified"] 214 | } 215 | } 216 | }, 217 | "curation": { 218 | "job_kwargs": { 219 | "chunk_duration": "1s", 220 | "progress_bar": false 221 | }, 222 | "query": "isi_violations_ratio < 0.5 and presence_ratio > 0.8 and amplitude_cutoff < 0.1", 223 | "noise_neural_classifier": "SpikeInterface/UnitRefine_noise_neural_classifier", 224 | "sua_mua_classifier": "SpikeInterface/UnitRefine_sua_mua_classifier" 225 | }, 226 | "visualization": { 227 | "job_kwargs": { 228 | "chunk_duration": "1s", 229 | "progress_bar": false 230 | }, 231 | "timeseries": { 232 | "n_snippets_per_segment": 2, 233 | "snippet_duration_s": 0.5 234 | }, 235 | "drift": { 236 | "detection": { 237 | "peak_sign": "neg", 238 | "detect_threshold": 5, 239 | "exclude_sweep_ms": 0.1 240 | }, 241 | "localization": { 242 | "ms_before": 0.1, 243 | "ms_after": 0.3, 244 | "radius_um": 100.0 245 | }, 246 | "n_skip": 30, 247 | "alpha": 0.15, 248 | "vmin": -200, 249 | "vmax": 0, 250 | "cmap": "Greys_r", 251 | "figsize": [10, 10] 252 | }, 253 | "motion": { 254 | "cmap": "Greys_r", 255 | "scatter_decimate": 15, 256 | "figsize": [15, 10] 257 | } 258 | }, 259 | "nwb": { 260 | "backend": "zarr", 261 | "ecephys": { 262 | "stub": false, 263 | "stub_seconds": 10, 264 | "write_lfp": true, 265 | "write_raw": false, 266 | "lfp_temporal_factor": 2, 267 | "lfp_spatial_factor": 4, 268 | "lfp_highpass_freq_min": 0.1, 269 | "surface_channel_agar_probes_indices": "", 270 | "lfp": { 271 | "filter": { 272 | "freq_min": 0.1, 273 | "freq_max": 500 274 | }, 275 | "sampling_rate": 2500 276 | } 277 | } 278 | }, 279 | "spikesorting": { 280 | "sorter": "spykingcircus2", 281 | "spykingcircus2": { 282 | "job_kwargs": { 283 | "chunk_duration": "1s", 284 | "progress_bar": false 285 | }, 286 | "sorter": { 287 | "general": {"ms_before": 2, "ms_after": 2, "radius_um": 100}, 288 | "sparsity": {"method": "snr", "amplitude_mode": "peak_to_peak", "threshold": 0.25}, 289 | "filtering": {"freq_min": 150, "freq_max": 7000, "ftype": "bessel", "filter_order": 2, "margin_ms": 10}, 290 | "whitening": {"mode": "local", "regularize": false}, 291 | "detection": {"peak_sign": "neg", "detect_threshold": 5}, 292 | "selection": { 293 | "method": "uniform", 294 | "n_peaks_per_channel": 5000, 295 | "min_n_peaks": 100000, 296 | "select_per_channel": false, 297 | "seed": 42 298 | }, 299 | "apply_motion_correction": true, 300 | "motion_correction": {"preset": "dredge_fast"}, 301 | "merging": {"max_distance_um": 50}, 302 | "clustering": {"legacy": true}, 303 | "matching": {"method": "wobble"}, 304 | "apply_preprocessing": false, 305 | "templates_from_svd": true, 306 | "cache_preprocessing": {"mode": "no-cache", "memory_limit": 0.5, "delete_cache": true}, 307 | "multi_units_only": false, 308 | "job_kwargs": {"n_jobs": 0.9}, 309 | "seed": 42, 310 | "debug": false 311 | } 312 | } 313 | } 314 | } 315 | -------------------------------------------------------------------------------- /tests/params_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "job_dispatch": { 3 | "concatenate": false, 4 | "split_groups": true, 5 | "debug": false, 6 | "debug_duration": 30, 7 | "skip_timestamps_check": false, 8 | "multi_session": false, 9 | "input": "nwb" 10 | }, 11 | "preprocessing": { 12 | "job_kwargs": { 13 | "chunk_duration": "1s", 14 | "progress_bar": false 15 | }, 16 | "denoising_strategy": "cmr", 17 | "min_preprocessing_duration": 120, 18 | "filter_type": "bandpass", 19 | "highpass_filter": { 20 | "freq_min": 300.0, 21 | "margin_ms": 5.0 22 | }, 23 | "bandpass_filter": { 24 | "freq_min": 300.0, 25 | "freq_max": 6000.0, 26 | "margin_ms": 5.0 27 | }, 28 | "phase_shift": { 29 | "margin_ms": 100.0 30 | }, 31 | "detect_bad_channels": { 32 | "method": "coherence+psd", 33 | "dead_channel_threshold": -0.5, 34 | "noisy_channel_threshold": 1.0, 35 | "outside_channel_threshold": -0.3, 36 | "outside_channels_location": "top", 37 | "n_neighbors": 11, 38 | "seed": 0 39 | }, 40 | "remove_out_channels": true, 41 | "remove_bad_channels": true, 42 | "max_bad_channel_fraction": 0.5, 43 | "common_reference": { 44 | "reference": "global", 45 | "operator": "median" 46 | }, 47 | "highpass_spatial_filter": { 48 | "n_channel_pad": 60, 49 | "n_channel_taper": null, 50 | "direction": "y", 51 | "apply_agc": true, 52 | "agc_window_length_s": 0.01, 53 | "highpass_butter_order": 3, 54 | "highpass_butter_wn": 0.01 55 | }, 56 | "motion_correction": { 57 | "compute": true, 58 | "apply": false, 59 | "preset": "dredge_fast", 60 | "detect_kwargs": {}, 61 | "select_kwargs": {}, 62 | "localize_peaks_kwargs": {}, 63 | "estimate_motion_kwargs": { 64 | "win_step_norm": 0.1, 65 | "win_scale_norm": 0.1 66 | }, 67 | "interpolate_motion_kwargs": {} 68 | } 69 | }, 70 | "postprocessing": { 71 | "job_kwargs": { 72 | "chunk_duration": "1s", 73 | "progress_bar": false 74 | }, 75 | "use_motion_corrected": false, 76 | "sparsity": { 77 | "method": "radius", 78 | "radius_um": 100 79 | }, 80 | "duplicate_threshold": 0.9, 81 | "return_scaled": true, 82 | "random_spikes": { 83 | "max_spikes_per_unit": 500, 84 | "method": "uniform", 85 | "margin_size": null, 86 | "seed": null 87 | }, 88 | "noise_levels": { 89 | "num_chunks_per_segment": 20, 90 | "chunk_size": 10000, 91 | "seed": null 92 | }, 93 | "waveforms": { 94 | "ms_before": 3.0, 95 | "ms_after": 4.0, 96 | "dtype": null 97 | }, 98 | "templates": {}, 99 | "spike_amplitudes": { 100 | "peak_sign": "neg" 101 | }, 102 | "template_similarity": { 103 | "method": "cosine_similarity" 104 | }, 105 | "correlograms": { 106 | "window_ms": 50.0, 107 | "bin_ms": 1.0 108 | }, 109 | "isi_histograms": { 110 | "window_ms": 100.0, 111 | "bin_ms": 5.0 112 | }, 113 | "unit_locations": { 114 | "method": "monopolar_triangulation" 115 | }, 116 | "spike_locations": { 117 | "method": "grid_convolution" 118 | }, 119 | "template_metrics": { 120 | "upsampling_factor": 10, 121 | "sparsity": null, 122 | "include_multi_channel_metrics": true 123 | }, 124 | "principal_components": { 125 | "n_components": 5, 126 | "mode": "by_channel_local", 127 | "whiten": true 128 | }, 129 | "quality_metrics_names": [ 130 | "num_spikes", 131 | "firing_rate", 132 | "presence_ratio", 133 | "snr", 134 | "isi_violation", 135 | "rp_violation", 136 | "sliding_rp_violation", 137 | "amplitude_cutoff", 138 | "amplitude_median", 139 | "amplitude_cv", 140 | "synchrony", 141 | "firing_range", 142 | "drift", 143 | "isolation_distance", 144 | "l_ratio", 145 | "d_prime", 146 | "nearest_neighbor", 147 | "silhouette" 148 | ], 149 | "quality_metrics": { 150 | "presence_ratio": { 151 | "bin_duration_s": 60 152 | }, 153 | "snr": { 154 | "peak_sign": "neg", 155 | "peak_mode": "extremum" 156 | }, 157 | "isi_violation": { 158 | "isi_threshold_ms": 1.5, 159 | "min_isi_ms": 0 160 | }, 161 | "rp_violation": { 162 | "refractory_period_ms": 1, 163 | "censored_period_ms": 0.0 164 | }, 165 | "sliding_rp_violation": { 166 | "bin_size_ms": 0.25, 167 | "window_size_s": 1, 168 | "exclude_ref_period_below_ms": 0.5, 169 | "max_ref_period_ms": 10, 170 | "contamination_values": null 171 | }, 172 | "amplitude_cutoff": { 173 | "peak_sign": "neg", 174 | "num_histogram_bins": 100, 175 | "histogram_smoothing_value": 3, 176 | "amplitudes_bins_min_ratio": 5 177 | }, 178 | "amplitude_median": { 179 | "peak_sign": "neg" 180 | }, 181 | "amplitude_cv": { 182 | "average_num_spikes_per_bin": 50, 183 | "percentiles": [5, 95], 184 | "min_num_bins": 10, 185 | "amplitude_extension": "spike_amplitudes" 186 | }, 187 | "firing_range": { 188 | "bin_size_s": 5, 189 | "percentiles": [5, 95] 190 | }, 191 | "synchrony": { 192 | "synchrony_sizes": [2, 4, 8] 193 | }, 194 | "nearest_neighbor": { 195 | "max_spikes": 10000, 196 | "n_neighbors": 4 197 | }, 198 | "nn_isolation": { 199 | "max_spikes": 10000, 200 | "min_spikes": 10, 201 | "n_neighbors": 4, 202 | "n_components": 10, 203 | "radius_um": 100 204 | }, 205 | "nn_noise_overlap": { 206 | "max_spikes": 10000, 207 | "min_spikes": 10, 208 | "n_neighbors": 4, 209 | "n_components": 10, 210 | "radius_um": 100 211 | }, 212 | "silhouette": { 213 | "method": ["simplified"] 214 | } 215 | } 216 | }, 217 | "curation": { 218 | "job_kwargs": { 219 | "chunk_duration": "1s", 220 | "progress_bar": false 221 | }, 222 | "query": "isi_violations_ratio < 0.5 and presence_ratio > 0.8 and amplitude_cutoff < 0.1", 223 | "noise_neural_classifier": "SpikeInterface/UnitRefine_noise_neural_classifier", 224 | "sua_mua_classifier": "SpikeInterface/UnitRefine_sua_mua_classifier" 225 | }, 226 | "visualization": { 227 | "job_kwargs": { 228 | "chunk_duration": "1s", 229 | "progress_bar": false 230 | }, 231 | "timeseries": { 232 | "n_snippets_per_segment": 2, 233 | "snippet_duration_s": 0.5 234 | }, 235 | "drift": { 236 | "detection": { 237 | "peak_sign": "neg", 238 | "detect_threshold": 5, 239 | "exclude_sweep_ms": 0.1 240 | }, 241 | "localization": { 242 | "ms_before": 0.1, 243 | "ms_after": 0.3, 244 | "radius_um": 100.0 245 | }, 246 | "n_skip": 30, 247 | "alpha": 0.15, 248 | "vmin": -200, 249 | "vmax": 0, 250 | "cmap": "Greys_r", 251 | "figsize": [10, 10] 252 | }, 253 | "motion": { 254 | "cmap": "Greys_r", 255 | "scatter_decimate": 15, 256 | "figsize": [15, 10] 257 | } 258 | }, 259 | "nwb": { 260 | "ecephys": { 261 | "backend": "zarr", 262 | "stub": false, 263 | "stub_seconds": 10, 264 | "write_lfp": true, 265 | "write_raw": false, 266 | "lfp_temporal_factor": 2, 267 | "lfp_spatial_factor": 4, 268 | "lfp_highpass_freq_min": 0.1, 269 | "surface_channel_agar_probes_indices": "", 270 | "lfp": { 271 | "filter": { 272 | "freq_min": 0.1, 273 | "freq_max": 500 274 | }, 275 | "sampling_rate": 2500 276 | } 277 | } 278 | }, 279 | "spikesorting": { 280 | "sorter": null, 281 | "kilosort4": { 282 | "job_kwargs": { 283 | "chunk_duration": "1s", 284 | "progress_bar": false 285 | }, 286 | "skip_motion_correction": false, 287 | "min_drift_channels": 96, 288 | "raise_if_fails": true, 289 | "clear_cache": false, 290 | "sorter": { 291 | "batch_size": 60000, 292 | "nblocks": 5, 293 | "Th_universal": 9, 294 | "Th_learned": 8, 295 | "do_CAR": true, 296 | "invert_sign": false, 297 | "nt": 61, 298 | "shift": null, 299 | "scale": null, 300 | "artifact_threshold": null, 301 | "nskip": 25, 302 | "whitening_range": 32, 303 | "highpass_cutoff": 300, 304 | "binning_depth": 5, 305 | "sig_interp": 20, 306 | "drift_smoothing": [0.5, 0.5, 0.5], 307 | "nt0min": null, 308 | "dmin": null, 309 | "dminx": 32, 310 | "min_template_size": 10, 311 | "template_sizes": 5, 312 | "nearest_chans": 10, 313 | "nearest_templates": 100, 314 | "max_channel_distance": null, 315 | "templates_from_data": true, 316 | "n_templates": 6, 317 | "n_pcs": 6, 318 | "Th_single_ch": 6, 319 | "acg_threshold": 0.2, 320 | "ccg_threshold": 0.25, 321 | "cluster_downsampling": 20, 322 | "x_centers": null, 323 | "duplicate_spike_ms": 0.25, 324 | "save_preprocessed_copy": false, 325 | "torch_device": "auto", 326 | "bad_channels": null, 327 | "clear_cache": false, 328 | "save_extra_vars": false, 329 | "do_correction": true, 330 | "keep_good_only": false, 331 | "skip_kilosort_preprocessing": false, 332 | "use_binary_file": null, 333 | "delete_recording_dat": true 334 | } 335 | }, 336 | "kilosort25": { 337 | "job_kwargs": { 338 | "chunk_duration": "1s", 339 | "progress_bar": false 340 | }, 341 | "skip_motion_correction": false, 342 | "min_drift_channels": 96, 343 | "raise_if_fails": true, 344 | "sorter": { 345 | "detect_threshold": 6, 346 | "projection_threshold": [10, 4], 347 | "preclust_threshold": 8, 348 | "car": true, 349 | "minFR": 0.1, 350 | "minfr_goodchannels": 0.1, 351 | "nblocks": 5, 352 | "sig": 20, 353 | "freq_min": 150, 354 | "sigmaMask": 30, 355 | "nPCs": 3, 356 | "ntbuff": 64, 357 | "nfilt_factor": 4, 358 | "NT": null, 359 | "AUCsplit": 0.9, 360 | "do_correction": true, 361 | "wave_length": 61, 362 | "keep_good_only": false, 363 | "skip_kilosort_preprocessing": false, 364 | "scaleproc": null, 365 | "save_rez_to_mat": false, 366 | "delete_tmp_files": ["matlab_files"], 367 | "delete_recording_dat": false 368 | } 369 | }, 370 | "spykingcircus2": { 371 | "job_kwargs": { 372 | "chunk_duration": "1s", 373 | "progress_bar": false 374 | }, 375 | "sorter": { 376 | "general": {"ms_before": 2, "ms_after": 2, "radius_um": 100}, 377 | "sparsity": {"method": "snr", "amplitude_mode": "peak_to_peak", "threshold": 0.25}, 378 | "filtering": {"freq_min": 150, "freq_max": 7000, "ftype": "bessel", "filter_order": 2, "margin_ms": 10}, 379 | "whitening": {"mode": "local", "regularize": false}, 380 | "detection": {"peak_sign": "neg", "detect_threshold": 5}, 381 | "selection": { 382 | "method": "uniform", 383 | "n_peaks_per_channel": 5000, 384 | "min_n_peaks": 100000, 385 | "select_per_channel": false, 386 | "seed": 42 387 | }, 388 | "apply_motion_correction": true, 389 | "motion_correction": {"preset": "dredge_fast"}, 390 | "merging": {"max_distance_um": 50}, 391 | "clustering": {"legacy": true}, 392 | "matching": {"method": "wobble"}, 393 | "apply_preprocessing": false, 394 | "templates_from_svd": true, 395 | "cache_preprocessing": {"mode": "no-cache", "memory_limit": 0.5, "delete_cache": true}, 396 | "multi_units_only": false, 397 | "job_kwargs": {"n_jobs": 0.9}, 398 | "seed": 42, 399 | "debug": false 400 | } 401 | } 402 | } 403 | } 404 | -------------------------------------------------------------------------------- /pipeline/default_params.json: -------------------------------------------------------------------------------- 1 | { 2 | "job_dispatch": { 3 | "split_segments": true, 4 | "split_groups": true, 5 | "debug": false, 6 | "debug_duration": 30, 7 | "skip_timestamps_check": false, 8 | "multi_session": false, 9 | "input": "aind" 10 | }, 11 | "preprocessing": { 12 | "job_kwargs": { 13 | "chunk_duration": "1s", 14 | "progress_bar": false 15 | }, 16 | "denoising_strategy": "cmr", 17 | "min_preprocessing_duration": 120, 18 | "filter_type": "highpass", 19 | "highpass_filter": { 20 | "freq_min": 300.0, 21 | "margin_ms": 5.0 22 | }, 23 | "bandpass_filter": { 24 | "freq_min": 300.0, 25 | "freq_max": 6000.0, 26 | "margin_ms": 5.0 27 | }, 28 | "phase_shift": { 29 | "margin_ms": 100.0 30 | }, 31 | "detect_bad_channels": { 32 | "method": "coherence+psd", 33 | "dead_channel_threshold": -0.5, 34 | "noisy_channel_threshold": 1.0, 35 | "outside_channel_threshold": -0.3, 36 | "outside_channels_location": "top", 37 | "n_neighbors": 11, 38 | "seed": 0 39 | }, 40 | "remove_out_channels": true, 41 | "remove_bad_channels": true, 42 | "max_bad_channel_fraction": 0.5, 43 | "common_reference": { 44 | "reference": "global", 45 | "operator": "median" 46 | }, 47 | "highpass_spatial_filter": { 48 | "n_channel_pad": 60, 49 | "n_channel_taper": null, 50 | "direction": "y", 51 | "apply_agc": true, 52 | "agc_window_length_s": 0.01, 53 | "highpass_butter_order": 3, 54 | "highpass_butter_wn": 0.01 55 | }, 56 | "motion_correction": { 57 | "compute": true, 58 | "apply": false, 59 | "preset": "dredge_fast", 60 | "detect_kwargs": {}, 61 | "select_kwargs": {}, 62 | "localize_peaks_kwargs": {}, 63 | "estimate_motion_kwargs": { 64 | "win_step_norm": 0.1, 65 | "win_scale_norm": 0.1 66 | }, 67 | "interpolate_motion_kwargs": {} 68 | } 69 | }, 70 | "postprocessing": { 71 | "job_kwargs": { 72 | "chunk_duration": "1s", 73 | "progress_bar": false 74 | }, 75 | "use_motion_corrected": false, 76 | "sparsity": { 77 | "method": "radius", 78 | "radius_um": 100 79 | }, 80 | "duplicate_threshold": 0.9, 81 | "return_scaled": true, 82 | "random_spikes": { 83 | "max_spikes_per_unit": 500, 84 | "method": "uniform", 85 | "margin_size": null, 86 | "seed": null 87 | }, 88 | "noise_levels": { 89 | "num_chunks_per_segment": 20, 90 | "chunk_size": 10000, 91 | "seed": null 92 | }, 93 | "waveforms": { 94 | "ms_before": 3.0, 95 | "ms_after": 4.0, 96 | "dtype": null 97 | }, 98 | "templates": {}, 99 | "spike_amplitudes": { 100 | "peak_sign": "neg" 101 | }, 102 | "template_similarity": { 103 | "method": "cosine_similarity" 104 | }, 105 | "correlograms": { 106 | "window_ms": 50.0, 107 | "bin_ms": 1.0 108 | }, 109 | "isi_histograms": { 110 | "window_ms": 100.0, 111 | "bin_ms": 5.0 112 | }, 113 | "unit_locations": { 114 | "method": "monopolar_triangulation" 115 | }, 116 | "spike_locations": { 117 | "method": "grid_convolution" 118 | }, 119 | "template_metrics": { 120 | "upsampling_factor": 10, 121 | "sparsity": null, 122 | "include_multi_channel_metrics": true 123 | }, 124 | "principal_components": { 125 | "n_components": 5, 126 | "mode": "by_channel_local", 127 | "whiten": true 128 | }, 129 | "quality_metrics_names": [ 130 | "num_spikes", 131 | "firing_rate", 132 | "presence_ratio", 133 | "snr", 134 | "isi_violation", 135 | "rp_violation", 136 | "sliding_rp_violation", 137 | "amplitude_cutoff", 138 | "amplitude_median", 139 | "amplitude_cv", 140 | "synchrony", 141 | "firing_range", 142 | "drift", 143 | "isolation_distance", 144 | "l_ratio", 145 | "d_prime", 146 | "nearest_neighbor", 147 | "silhouette" 148 | ], 149 | "quality_metrics": { 150 | "presence_ratio": { 151 | "bin_duration_s": 60 152 | }, 153 | "snr": { 154 | "peak_sign": "neg", 155 | "peak_mode": "extremum" 156 | }, 157 | "isi_violation": { 158 | "isi_threshold_ms": 1.5, 159 | "min_isi_ms": 0 160 | }, 161 | "rp_violation": { 162 | "refractory_period_ms": 1, 163 | "censored_period_ms": 0.0 164 | }, 165 | "sliding_rp_violation": { 166 | "bin_size_ms": 0.25, 167 | "window_size_s": 1, 168 | "exclude_ref_period_below_ms": 0.5, 169 | "max_ref_period_ms": 10, 170 | "contamination_values": null 171 | }, 172 | "amplitude_cutoff": { 173 | "peak_sign": "neg", 174 | "num_histogram_bins": 100, 175 | "histogram_smoothing_value": 3, 176 | "amplitudes_bins_min_ratio": 5 177 | }, 178 | "amplitude_median": { 179 | "peak_sign": "neg" 180 | }, 181 | "amplitude_cv": { 182 | "average_num_spikes_per_bin": 50, 183 | "percentiles": [5, 95], 184 | "min_num_bins": 10, 185 | "amplitude_extension": "spike_amplitudes" 186 | }, 187 | "firing_range": { 188 | "bin_size_s": 5, 189 | "percentiles": [5, 95] 190 | }, 191 | "synchrony": { 192 | "synchrony_sizes": [2, 4, 8] 193 | }, 194 | "nearest_neighbor": { 195 | "max_spikes": 10000, 196 | "n_neighbors": 4 197 | }, 198 | "nn_isolation": { 199 | "max_spikes": 10000, 200 | "min_spikes": 10, 201 | "n_neighbors": 4, 202 | "n_components": 10, 203 | "radius_um": 100 204 | }, 205 | "nn_noise_overlap": { 206 | "max_spikes": 10000, 207 | "min_spikes": 10, 208 | "n_neighbors": 4, 209 | "n_components": 10, 210 | "radius_um": 100 211 | }, 212 | "silhouette": { 213 | "method": ["simplified"] 214 | } 215 | } 216 | }, 217 | "curation": { 218 | "job_kwargs": { 219 | "chunk_duration": "1s", 220 | "progress_bar": false 221 | }, 222 | "query": "isi_violations_ratio < 0.5 and presence_ratio > 0.8 and amplitude_cutoff < 0.1", 223 | "noise_neural_classifier": "SpikeInterface/UnitRefine_noise_neural_classifier", 224 | "sua_mua_classifier": "SpikeInterface/UnitRefine_sua_mua_classifier" 225 | }, 226 | "visualization": { 227 | "job_kwargs": { 228 | "chunk_duration": "1s", 229 | "progress_bar": false 230 | }, 231 | "timeseries": { 232 | "n_snippets_per_segment": 2, 233 | "snippet_duration_s": 0.5 234 | }, 235 | "drift": { 236 | "detection": { 237 | "peak_sign": "neg", 238 | "detect_threshold": 5, 239 | "exclude_sweep_ms": 0.1 240 | }, 241 | "localization": { 242 | "ms_before": 0.1, 243 | "ms_after": 0.3, 244 | "radius_um": 100.0 245 | }, 246 | "n_skip": 30, 247 | "alpha": 0.15, 248 | "vmin": -200, 249 | "vmax": 0, 250 | "cmap": "Greys_r", 251 | "figsize": [10, 10] 252 | }, 253 | "motion": { 254 | "cmap": "Greys_r", 255 | "scatter_decimate": 15, 256 | "figsize": [15, 10] 257 | } 258 | }, 259 | "nwb": { 260 | "ecephys": { 261 | "backend": "zarr", 262 | "stub": false, 263 | "stub_seconds": 10, 264 | "write_lfp": true, 265 | "write_raw": false, 266 | "lfp_temporal_factor": 2, 267 | "lfp_spatial_factor": 4, 268 | "lfp_highpass_freq_min": 0.1, 269 | "surface_channel_agar_probes_indices": "", 270 | "lfp": { 271 | "filter": { 272 | "freq_min": 0.1, 273 | "freq_max": 500 274 | }, 275 | "sampling_rate": 2500 276 | } 277 | } 278 | }, 279 | "spikesorting": { 280 | "sorter": null, 281 | "kilosort4": { 282 | "job_kwargs": { 283 | "chunk_duration": "1s", 284 | "progress_bar": false 285 | }, 286 | "skip_motion_correction": false, 287 | "min_drift_channels": 96, 288 | "raise_if_fails": true, 289 | "clear_cache": false, 290 | "sorter": { 291 | "batch_size": 60000, 292 | "nblocks": 5, 293 | "Th_universal": 9, 294 | "Th_learned": 8, 295 | "do_CAR": true, 296 | "invert_sign": false, 297 | "nt": 61, 298 | "shift": null, 299 | "scale": null, 300 | "artifact_threshold": null, 301 | "nskip": 25, 302 | "whitening_range": 32, 303 | "highpass_cutoff": 300, 304 | "binning_depth": 5, 305 | "sig_interp": 20, 306 | "drift_smoothing": [0.5, 0.5, 0.5], 307 | "nt0min": null, 308 | "dmin": null, 309 | "dminx": 32, 310 | "min_template_size": 10, 311 | "template_sizes": 5, 312 | "nearest_chans": 10, 313 | "nearest_templates": 100, 314 | "max_channel_distance": null, 315 | "templates_from_data": true, 316 | "n_templates": 6, 317 | "n_pcs": 6, 318 | "Th_single_ch": 6, 319 | "acg_threshold": 0.2, 320 | "ccg_threshold": 0.25, 321 | "cluster_downsampling": 20, 322 | "x_centers": null, 323 | "duplicate_spike_ms": 0.25, 324 | "save_preprocessed_copy": false, 325 | "torch_device": "auto", 326 | "bad_channels": null, 327 | "clear_cache": false, 328 | "save_extra_vars": false, 329 | "do_correction": true, 330 | "keep_good_only": false, 331 | "skip_kilosort_preprocessing": false, 332 | "use_binary_file": null, 333 | "delete_recording_dat": true 334 | } 335 | }, 336 | "kilosort25": { 337 | "job_kwargs": { 338 | "chunk_duration": "1s", 339 | "progress_bar": false 340 | }, 341 | "skip_motion_correction": false, 342 | "min_drift_channels": 96, 343 | "raise_if_fails": true, 344 | "sorter": { 345 | "detect_threshold": 6, 346 | "projection_threshold": [10, 4], 347 | "preclust_threshold": 8, 348 | "car": true, 349 | "minFR": 0.1, 350 | "minfr_goodchannels": 0.1, 351 | "nblocks": 5, 352 | "sig": 20, 353 | "freq_min": 150, 354 | "sigmaMask": 30, 355 | "nPCs": 3, 356 | "ntbuff": 64, 357 | "nfilt_factor": 4, 358 | "NT": null, 359 | "AUCsplit": 0.9, 360 | "do_correction": true, 361 | "wave_length": 61, 362 | "keep_good_only": false, 363 | "skip_kilosort_preprocessing": false, 364 | "scaleproc": null, 365 | "save_rez_to_mat": false, 366 | "delete_tmp_files": ["matlab_files"], 367 | "delete_recording_dat": false 368 | } 369 | }, 370 | "spykingcircus2": { 371 | "job_kwargs": { 372 | "chunk_duration": "1s", 373 | "progress_bar": false 374 | }, 375 | "sorter": { 376 | "general": {"ms_before": 2, "ms_after": 2, "radius_um": 100}, 377 | "sparsity": {"method": "snr", "amplitude_mode": "peak_to_peak", "threshold": 0.25}, 378 | "filtering": {"freq_min": 150, "freq_max": 7000, "ftype": "bessel", "filter_order": 2, "margin_ms": 10}, 379 | "whitening": {"mode": "local", "regularize": false}, 380 | "detection": {"peak_sign": "neg", "detect_threshold": 5}, 381 | "selection": { 382 | "method": "uniform", 383 | "n_peaks_per_channel": 5000, 384 | "min_n_peaks": 100000, 385 | "select_per_channel": false, 386 | "seed": 42 387 | }, 388 | "apply_motion_correction": true, 389 | "motion_correction": {"preset": "dredge_fast"}, 390 | "merging": {"max_distance_um": 50}, 391 | "clustering": {"legacy": true}, 392 | "matching": {"method": "wobble"}, 393 | "apply_preprocessing": false, 394 | "templates_from_svd": true, 395 | "cache_preprocessing": {"mode": "no-cache", "memory_limit": 0.5, "delete_cache": true}, 396 | "multi_units_only": false, 397 | "job_kwargs": {"n_jobs": 0.9}, 398 | "seed": 42, 399 | "debug": false 400 | } 401 | } 402 | } 403 | } 404 | -------------------------------------------------------------------------------- /pipeline/main.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | // hash:sha256:e2bd9e3d200127ccbadf2f10fcb7fe9a3524ca3f0502228c21e0f7fa61990679 3 | 4 | nextflow.enable.dsl = 1 5 | 6 | params.ecephys_url = 's3://aind-ephys-data/ecephys_713593_2024-02-08_14-10-37' 7 | 8 | capsule_aind_ephys_job_dispatch_4_to_capsule_aind_ephys_preprocessing_1_1 = channel.create() 9 | ecephys_to_preprocess_ecephys_2 = channel.fromPath(params.ecephys_url + "/", type: 'any') 10 | capsule_aind_ephys_postprocessing_5_to_capsule_aind_ephys_curation_2_3 = channel.create() 11 | ecephys_to_job_dispatch_ecephys_4 = channel.fromPath(params.ecephys_url + "/", type: 'any') 12 | ecephys_to_postprocess_ecephys_5 = channel.fromPath(params.ecephys_url + "/", type: 'any') 13 | capsule_spikesort_kilosort_4_ecephys_7_to_capsule_aind_ephys_postprocessing_5_6 = channel.create() 14 | capsule_aind_ephys_preprocessing_1_to_capsule_aind_ephys_postprocessing_5_7 = channel.create() 15 | capsule_aind_ephys_job_dispatch_4_to_capsule_aind_ephys_postprocessing_5_8 = channel.create() 16 | capsule_aind_ephys_job_dispatch_4_to_capsule_aind_ephys_visualization_6_9 = channel.create() 17 | capsule_aind_ephys_preprocessing_1_to_capsule_aind_ephys_visualization_6_10 = channel.create() 18 | capsule_aind_ephys_curation_2_to_capsule_aind_ephys_visualization_6_11 = channel.create() 19 | capsule_spikesort_kilosort_4_ecephys_7_to_capsule_aind_ephys_visualization_6_12 = channel.create() 20 | capsule_aind_ephys_postprocessing_5_to_capsule_aind_ephys_visualization_6_13 = channel.create() 21 | ecephys_to_visualize_ecephys_14 = channel.fromPath(params.ecephys_url + "/", type: 'any') 22 | capsule_aind_ephys_preprocessing_1_to_capsule_spikesort_kilosort_4_ecephys_7_15 = channel.create() 23 | capsule_aind_ephys_job_dispatch_4_to_capsule_aind_ephys_results_collector_9_16 = channel.create() 24 | capsule_aind_ephys_preprocessing_1_to_capsule_aind_ephys_results_collector_9_17 = channel.create() 25 | capsule_spikesort_kilosort_4_ecephys_7_to_capsule_aind_ephys_results_collector_9_18 = channel.create() 26 | capsule_aind_ephys_postprocessing_5_to_capsule_aind_ephys_results_collector_9_19 = channel.create() 27 | capsule_aind_ephys_curation_2_to_capsule_aind_ephys_results_collector_9_20 = channel.create() 28 | capsule_aind_ephys_visualization_6_to_capsule_aind_ephys_results_collector_9_21 = channel.create() 29 | ecephys_to_collect_results_ecephys_22 = channel.fromPath(params.ecephys_url + "/", type: 'any') 30 | capsule_aind_ephys_job_dispatch_4_to_capsule_nwb_packaging_units_11_23 = channel.create() 31 | capsule_nwb_packaging_ecephys_capsule_12_to_capsule_nwb_packaging_units_11_24 = channel.create() 32 | capsule_aind_ephys_results_collector_9_to_capsule_nwb_packaging_units_11_25 = channel.create() 33 | ecephys_to_nwb_packaging_units_26 = channel.fromPath(params.ecephys_url + "/", type: 'any') 34 | capsule_aind_ephys_job_dispatch_4_to_capsule_nwb_packaging_ecephys_capsule_12_27 = channel.create() 35 | ecephys_to_nwb_packaging_ecephys_28 = channel.fromPath(params.ecephys_url + "/", type: 'any') 36 | capsule_aind_ephys_job_dispatch_4_to_capsule_quality_control_ecephys_13_29 = channel.create() 37 | capsule_aind_ephys_results_collector_9_to_capsule_quality_control_ecephys_13_30 = channel.create() 38 | ecephys_to_quality_control_ecephys_31 = channel.fromPath(params.ecephys_url + "/", type: 'any') 39 | capsule_quality_control_ecephys_13_to_capsule_quality_control_collector_ecephys_14_32 = channel.create() 40 | 41 | // capsule - Preprocess Ecephys 42 | process capsule_aind_ephys_preprocessing_1 { 43 | tag 'capsule-0331265' 44 | container "$REGISTRY_HOST/published/49b76676-d1f6-4202-9473-c763b2b83563:v6" 45 | 46 | cpus 16 47 | memory '60 GB' 48 | 49 | input: 50 | path 'capsule/data/' from capsule_aind_ephys_job_dispatch_4_to_capsule_aind_ephys_preprocessing_1_1.flatten() 51 | path 'capsule/data/ecephys_session' from ecephys_to_preprocess_ecephys_2.collect() 52 | 53 | output: 54 | path 'capsule/results/*' into capsule_aind_ephys_preprocessing_1_to_capsule_aind_ephys_postprocessing_5_7 55 | path 'capsule/results/*' into capsule_aind_ephys_preprocessing_1_to_capsule_aind_ephys_visualization_6_10 56 | path 'capsule/results/*' into capsule_aind_ephys_preprocessing_1_to_capsule_spikesort_kilosort_4_ecephys_7_15 57 | path 'capsule/results/*' into capsule_aind_ephys_preprocessing_1_to_capsule_aind_ephys_results_collector_9_17 58 | 59 | script: 60 | """ 61 | #!/usr/bin/env bash 62 | set -e 63 | 64 | export CO_CAPSULE_ID=49b76676-d1f6-4202-9473-c763b2b83563 65 | export CO_CPUS=16 66 | export CO_MEMORY=64424509440 67 | 68 | mkdir -p capsule 69 | mkdir -p capsule/data && ln -s \$PWD/capsule/data /data 70 | mkdir -p capsule/results && ln -s \$PWD/capsule/results /results 71 | mkdir -p capsule/scratch && ln -s \$PWD/capsule/scratch /scratch 72 | 73 | echo "[${task.tag}] cloning git repo..." 74 | if [[ "\$(printf '%s\n' "2.20.0" "\$(git version | awk '{print \$3}')" | sort -V | head -n1)" = "2.20.0" ]]; then 75 | git clone --filter=tree:0 --branch v6.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-0331265.git" capsule-repo 76 | else 77 | git clone --branch v6.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-0331265.git" capsule-repo 78 | fi 79 | mv capsule-repo/code capsule/code && ln -s \$PWD/capsule/code /code 80 | rm -rf capsule-repo 81 | 82 | echo "[${task.tag}] running capsule..." 83 | cd capsule/code 84 | chmod +x run 85 | ./run ${params.capsule_aind_ephys_preprocessing_1_args} 86 | 87 | echo "[${task.tag}] completed!" 88 | """ 89 | } 90 | 91 | // capsule - Curate Ecephys 92 | process capsule_aind_ephys_curation_2 { 93 | tag 'capsule-3565647' 94 | container "$REGISTRY_HOST/published/da74428e-26f9-4f08-a9bf-898dfca44722:v5" 95 | 96 | cpus 8 97 | memory '60 GB' 98 | 99 | input: 100 | path 'capsule/data/' from capsule_aind_ephys_postprocessing_5_to_capsule_aind_ephys_curation_2_3 101 | 102 | output: 103 | path 'capsule/results/*' into capsule_aind_ephys_curation_2_to_capsule_aind_ephys_visualization_6_11 104 | path 'capsule/results/*' into capsule_aind_ephys_curation_2_to_capsule_aind_ephys_results_collector_9_20 105 | 106 | script: 107 | """ 108 | #!/usr/bin/env bash 109 | set -e 110 | 111 | export CO_CAPSULE_ID=da74428e-26f9-4f08-a9bf-898dfca44722 112 | export CO_CPUS=8 113 | export CO_MEMORY=64424509440 114 | 115 | mkdir -p capsule 116 | mkdir -p capsule/data && ln -s \$PWD/capsule/data /data 117 | mkdir -p capsule/results && ln -s \$PWD/capsule/results /results 118 | mkdir -p capsule/scratch && ln -s \$PWD/capsule/scratch /scratch 119 | 120 | echo "[${task.tag}] cloning git repo..." 121 | if [[ "\$(printf '%s\n' "2.20.0" "\$(git version | awk '{print \$3}')" | sort -V | head -n1)" = "2.20.0" ]]; then 122 | git clone --filter=tree:0 --branch v5.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-3565647.git" capsule-repo 123 | else 124 | git clone --branch v5.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-3565647.git" capsule-repo 125 | fi 126 | mv capsule-repo/code capsule/code && ln -s \$PWD/capsule/code /code 127 | rm -rf capsule-repo 128 | 129 | echo "[${task.tag}] running capsule..." 130 | cd capsule/code 131 | chmod +x run 132 | ./run 133 | 134 | echo "[${task.tag}] completed!" 135 | """ 136 | } 137 | 138 | // capsule - Job Dispatch Ecephys 139 | process capsule_aind_ephys_job_dispatch_4 { 140 | tag 'capsule-6237826' 141 | container "$REGISTRY_HOST/published/d75d79c4-8f21-4d17-83ec-13b2a43dcaa0:v6" 142 | 143 | cpus 4 144 | memory '30 GB' 145 | 146 | input: 147 | path 'capsule/data/ecephys_session' from ecephys_to_job_dispatch_ecephys_4.collect() 148 | 149 | output: 150 | path 'capsule/results/*' into capsule_aind_ephys_job_dispatch_4_to_capsule_aind_ephys_preprocessing_1_1 151 | path 'capsule/results/*' into capsule_aind_ephys_job_dispatch_4_to_capsule_aind_ephys_postprocessing_5_8 152 | path 'capsule/results/*' into capsule_aind_ephys_job_dispatch_4_to_capsule_aind_ephys_visualization_6_9 153 | path 'capsule/results/*' into capsule_aind_ephys_job_dispatch_4_to_capsule_aind_ephys_results_collector_9_16 154 | path 'capsule/results/*' into capsule_aind_ephys_job_dispatch_4_to_capsule_nwb_packaging_units_11_23 155 | path 'capsule/results/*' into capsule_aind_ephys_job_dispatch_4_to_capsule_nwb_packaging_ecephys_capsule_12_27 156 | path 'capsule/results/*' into capsule_aind_ephys_job_dispatch_4_to_capsule_quality_control_ecephys_13_29 157 | 158 | script: 159 | """ 160 | #!/usr/bin/env bash 161 | set -e 162 | 163 | export CO_CAPSULE_ID=d75d79c4-8f21-4d17-83ec-13b2a43dcaa0 164 | export CO_CPUS=4 165 | export CO_MEMORY=32212254720 166 | 167 | mkdir -p capsule 168 | mkdir -p capsule/data && ln -s \$PWD/capsule/data /data 169 | mkdir -p capsule/results && ln -s \$PWD/capsule/results /results 170 | mkdir -p capsule/scratch && ln -s \$PWD/capsule/scratch /scratch 171 | 172 | echo "[${task.tag}] cloning git repo..." 173 | if [[ "\$(printf '%s\n' "2.20.0" "\$(git version | awk '{print \$3}')" | sort -V | head -n1)" = "2.20.0" ]]; then 174 | git clone --filter=tree:0 --branch v6.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-6237826.git" capsule-repo 175 | else 176 | git clone --branch v6.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-6237826.git" capsule-repo 177 | fi 178 | mv capsule-repo/code capsule/code && ln -s \$PWD/capsule/code /code 179 | rm -rf capsule-repo 180 | 181 | echo "[${task.tag}] running capsule..." 182 | cd capsule/code 183 | chmod +x run 184 | ./run ${params.capsule_aind_ephys_job_dispatch_4_args} 185 | 186 | echo "[${task.tag}] completed!" 187 | """ 188 | } 189 | 190 | // capsule - Postprocess Ecephys 191 | process capsule_aind_ephys_postprocessing_5 { 192 | tag 'capsule-4319008' 193 | container "$REGISTRY_HOST/published/1639e98a-74dc-4b37-9464-1b6a3868c9b0:v6" 194 | 195 | cpus 16 196 | memory '60 GB' 197 | 198 | input: 199 | path 'capsule/data/ecephys_session' from ecephys_to_postprocess_ecephys_5.collect() 200 | path 'capsule/data/' from capsule_spikesort_kilosort_4_ecephys_7_to_capsule_aind_ephys_postprocessing_5_6.collect() 201 | path 'capsule/data/' from capsule_aind_ephys_preprocessing_1_to_capsule_aind_ephys_postprocessing_5_7.collect() 202 | path 'capsule/data/' from capsule_aind_ephys_job_dispatch_4_to_capsule_aind_ephys_postprocessing_5_8.flatten() 203 | 204 | output: 205 | path 'capsule/results/*' into capsule_aind_ephys_postprocessing_5_to_capsule_aind_ephys_curation_2_3 206 | path 'capsule/results/*' into capsule_aind_ephys_postprocessing_5_to_capsule_aind_ephys_visualization_6_13 207 | path 'capsule/results/*' into capsule_aind_ephys_postprocessing_5_to_capsule_aind_ephys_results_collector_9_19 208 | 209 | script: 210 | """ 211 | #!/usr/bin/env bash 212 | set -e 213 | 214 | export CO_CAPSULE_ID=1639e98a-74dc-4b37-9464-1b6a3868c9b0 215 | export CO_CPUS=16 216 | export CO_MEMORY=64424509440 217 | 218 | mkdir -p capsule 219 | mkdir -p capsule/data && ln -s \$PWD/capsule/data /data 220 | mkdir -p capsule/results && ln -s \$PWD/capsule/results /results 221 | mkdir -p capsule/scratch && ln -s \$PWD/capsule/scratch /scratch 222 | 223 | echo "[${task.tag}] cloning git repo..." 224 | if [[ "\$(printf '%s\n' "2.20.0" "\$(git version | awk '{print \$3}')" | sort -V | head -n1)" = "2.20.0" ]]; then 225 | git clone --filter=tree:0 --branch v6.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-4319008.git" capsule-repo 226 | else 227 | git clone --branch v6.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-4319008.git" capsule-repo 228 | fi 229 | mv capsule-repo/code capsule/code && ln -s \$PWD/capsule/code /code 230 | rm -rf capsule-repo 231 | 232 | echo "[${task.tag}] running capsule..." 233 | cd capsule/code 234 | chmod +x run 235 | ./run 236 | 237 | echo "[${task.tag}] completed!" 238 | """ 239 | } 240 | 241 | // capsule - Visualize Ecephys 242 | process capsule_aind_ephys_visualization_6 { 243 | tag 'capsule-6869873' 244 | container "$REGISTRY_HOST/published/e7af8ddc-08ca-418b-9e36-8249e363404e:v6" 245 | 246 | cpus 4 247 | memory '30 GB' 248 | 249 | input: 250 | path 'capsule/data/' from capsule_aind_ephys_job_dispatch_4_to_capsule_aind_ephys_visualization_6_9.collect() 251 | path 'capsule/data/' from capsule_aind_ephys_preprocessing_1_to_capsule_aind_ephys_visualization_6_10 252 | path 'capsule/data/' from capsule_aind_ephys_curation_2_to_capsule_aind_ephys_visualization_6_11.collect() 253 | path 'capsule/data/' from capsule_spikesort_kilosort_4_ecephys_7_to_capsule_aind_ephys_visualization_6_12.collect() 254 | path 'capsule/data/' from capsule_aind_ephys_postprocessing_5_to_capsule_aind_ephys_visualization_6_13.collect() 255 | path 'capsule/data/ecephys_session' from ecephys_to_visualize_ecephys_14.collect() 256 | 257 | output: 258 | path 'capsule/results/*' into capsule_aind_ephys_visualization_6_to_capsule_aind_ephys_results_collector_9_21 259 | 260 | script: 261 | """ 262 | #!/usr/bin/env bash 263 | set -e 264 | 265 | export CO_CAPSULE_ID=e7af8ddc-08ca-418b-9e36-8249e363404e 266 | export CO_CPUS=4 267 | export CO_MEMORY=32212254720 268 | 269 | mkdir -p capsule 270 | mkdir -p capsule/data && ln -s \$PWD/capsule/data /data 271 | mkdir -p capsule/results && ln -s \$PWD/capsule/results /results 272 | mkdir -p capsule/scratch && ln -s \$PWD/capsule/scratch /scratch 273 | 274 | echo "[${task.tag}] cloning git repo..." 275 | if [[ "\$(printf '%s\n' "2.20.0" "\$(git version | awk '{print \$3}')" | sort -V | head -n1)" = "2.20.0" ]]; then 276 | git clone --filter=tree:0 --branch v6.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-6869873.git" capsule-repo 277 | else 278 | git clone --branch v6.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-6869873.git" capsule-repo 279 | fi 280 | mv capsule-repo/code capsule/code && ln -s \$PWD/capsule/code /code 281 | rm -rf capsule-repo 282 | 283 | echo "[${task.tag}] running capsule..." 284 | cd capsule/code 285 | chmod +x run 286 | ./run 287 | 288 | echo "[${task.tag}] completed!" 289 | """ 290 | } 291 | 292 | // capsule - Spikesort Kilosort4 Ecephys 293 | process capsule_spikesort_kilosort_4_ecephys_7 { 294 | tag 'capsule-4110207' 295 | container "$REGISTRY_HOST/published/3372ccfd-0388-4e1e-8c4f-46b470fcf871:v4" 296 | 297 | cpus 16 298 | memory '60 GB' 299 | accelerator 1 300 | label 'gpu' 301 | 302 | input: 303 | path 'capsule/data/' from capsule_aind_ephys_preprocessing_1_to_capsule_spikesort_kilosort_4_ecephys_7_15 304 | 305 | output: 306 | path 'capsule/results/*' into capsule_spikesort_kilosort_4_ecephys_7_to_capsule_aind_ephys_postprocessing_5_6 307 | path 'capsule/results/*' into capsule_spikesort_kilosort_4_ecephys_7_to_capsule_aind_ephys_visualization_6_12 308 | path 'capsule/results/*' into capsule_spikesort_kilosort_4_ecephys_7_to_capsule_aind_ephys_results_collector_9_18 309 | 310 | script: 311 | """ 312 | #!/usr/bin/env bash 313 | set -e 314 | 315 | export CO_CAPSULE_ID=3372ccfd-0388-4e1e-8c4f-46b470fcf871 316 | export CO_CPUS=16 317 | export CO_MEMORY=64424509440 318 | 319 | mkdir -p capsule 320 | mkdir -p capsule/data && ln -s \$PWD/capsule/data /data 321 | mkdir -p capsule/results && ln -s \$PWD/capsule/results /results 322 | mkdir -p capsule/scratch && ln -s \$PWD/capsule/scratch /scratch 323 | 324 | echo "[${task.tag}] cloning git repo..." 325 | if [[ "\$(printf '%s\n' "2.20.0" "\$(git version | awk '{print \$3}')" | sort -V | head -n1)" = "2.20.0" ]]; then 326 | git clone --filter=tree:0 --branch v4.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-4110207.git" capsule-repo 327 | else 328 | git clone --branch v4.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-4110207.git" capsule-repo 329 | fi 330 | mv capsule-repo/code capsule/code && ln -s \$PWD/capsule/code /code 331 | rm -rf capsule-repo 332 | 333 | echo "[${task.tag}] running capsule..." 334 | cd capsule/code 335 | chmod +x run 336 | ./run ${params.capsule_spikesort_kilosort_4_ecephys_7_args} 337 | 338 | echo "[${task.tag}] completed!" 339 | """ 340 | } 341 | 342 | // capsule - Collect Results Ecephys 343 | process capsule_aind_ephys_results_collector_9 { 344 | tag 'capsule-0338545' 345 | container "$REGISTRY_HOST/published/5b7e48bb-8123-4b4c-b7bf-ebaa2de8555e:v6" 346 | 347 | cpus 4 348 | memory '30 GB' 349 | 350 | publishDir "$RESULTS_PATH", saveAs: { filename -> new File(filename).getName() } 351 | 352 | input: 353 | path 'capsule/data/' from capsule_aind_ephys_job_dispatch_4_to_capsule_aind_ephys_results_collector_9_16.collect() 354 | path 'capsule/data/' from capsule_aind_ephys_preprocessing_1_to_capsule_aind_ephys_results_collector_9_17.collect() 355 | path 'capsule/data/' from capsule_spikesort_kilosort_4_ecephys_7_to_capsule_aind_ephys_results_collector_9_18.collect() 356 | path 'capsule/data/' from capsule_aind_ephys_postprocessing_5_to_capsule_aind_ephys_results_collector_9_19.collect() 357 | path 'capsule/data/' from capsule_aind_ephys_curation_2_to_capsule_aind_ephys_results_collector_9_20.collect() 358 | path 'capsule/data/' from capsule_aind_ephys_visualization_6_to_capsule_aind_ephys_results_collector_9_21.collect() 359 | path 'capsule/data/ecephys_session' from ecephys_to_collect_results_ecephys_22.collect() 360 | 361 | output: 362 | path 'capsule/results/*' 363 | path 'capsule/results/*' into capsule_aind_ephys_results_collector_9_to_capsule_nwb_packaging_units_11_25 364 | path 'capsule/results/*' into capsule_aind_ephys_results_collector_9_to_capsule_quality_control_ecephys_13_30 365 | 366 | script: 367 | """ 368 | #!/usr/bin/env bash 369 | set -e 370 | 371 | export CO_CAPSULE_ID=5b7e48bb-8123-4b4c-b7bf-ebaa2de8555e 372 | export CO_CPUS=4 373 | export CO_MEMORY=32212254720 374 | 375 | mkdir -p capsule 376 | mkdir -p capsule/data && ln -s \$PWD/capsule/data /data 377 | mkdir -p capsule/results && ln -s \$PWD/capsule/results /results 378 | mkdir -p capsule/scratch && ln -s \$PWD/capsule/scratch /scratch 379 | 380 | echo "[${task.tag}] cloning git repo..." 381 | if [[ "\$(printf '%s\n' "2.20.0" "\$(git version | awk '{print \$3}')" | sort -V | head -n1)" = "2.20.0" ]]; then 382 | git clone --filter=tree:0 --branch v6.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-0338545.git" capsule-repo 383 | else 384 | git clone --branch v6.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-0338545.git" capsule-repo 385 | fi 386 | mv capsule-repo/code capsule/code && ln -s \$PWD/capsule/code /code 387 | rm -rf capsule-repo 388 | 389 | echo "[${task.tag}] running capsule..." 390 | cd capsule/code 391 | chmod +x run 392 | ./run ${params.capsule_aind_ephys_results_collector_9_args} 393 | 394 | echo "[${task.tag}] completed!" 395 | """ 396 | } 397 | 398 | // capsule - NWB Packaging Units 399 | process capsule_nwb_packaging_units_11 { 400 | tag 'capsule-5841110' 401 | container "$REGISTRY_HOST/published/b9333ffe-ae7c-4b67-882f-ea71054889dd:v8" 402 | 403 | cpus 4 404 | memory '30 GB' 405 | 406 | publishDir "$RESULTS_PATH/nwb", saveAs: { filename -> new File(filename).getName() } 407 | 408 | input: 409 | path 'capsule/data/' from capsule_aind_ephys_job_dispatch_4_to_capsule_nwb_packaging_units_11_23.collect() 410 | path 'capsule/data/' from capsule_nwb_packaging_ecephys_capsule_12_to_capsule_nwb_packaging_units_11_24.collect() 411 | path 'capsule/data/' from capsule_aind_ephys_results_collector_9_to_capsule_nwb_packaging_units_11_25.collect() 412 | path 'capsule/data/ecephys_session' from ecephys_to_nwb_packaging_units_26.collect() 413 | 414 | output: 415 | path 'capsule/results/*' 416 | 417 | script: 418 | """ 419 | #!/usr/bin/env bash 420 | set -e 421 | 422 | export CO_CAPSULE_ID=b9333ffe-ae7c-4b67-882f-ea71054889dd 423 | export CO_CPUS=4 424 | export CO_MEMORY=32212254720 425 | 426 | mkdir -p capsule 427 | mkdir -p capsule/data && ln -s \$PWD/capsule/data /data 428 | mkdir -p capsule/results && ln -s \$PWD/capsule/results /results 429 | mkdir -p capsule/scratch && ln -s \$PWD/capsule/scratch /scratch 430 | 431 | echo "[${task.tag}] cloning git repo..." 432 | if [[ "\$(printf '%s\n' "2.20.0" "\$(git version | awk '{print \$3}')" | sort -V | head -n1)" = "2.20.0" ]]; then 433 | git clone --filter=tree:0 --branch v8.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-5841110.git" capsule-repo 434 | else 435 | git clone --branch v8.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-5841110.git" capsule-repo 436 | fi 437 | mv capsule-repo/code capsule/code && ln -s \$PWD/capsule/code /code 438 | rm -rf capsule-repo 439 | 440 | echo "[${task.tag}] running capsule..." 441 | cd capsule/code 442 | chmod +x run 443 | ./run ${params.capsule_nwb_packaging_units_11_args} 444 | 445 | echo "[${task.tag}] completed!" 446 | """ 447 | } 448 | 449 | // capsule - NWB Packaging Ecephys 450 | process capsule_nwb_packaging_ecephys_capsule_12 { 451 | tag 'capsule-3438484' 452 | container "$REGISTRY_HOST/published/b16dfc92-eab4-425d-978f-0ba61632c413:v9" 453 | 454 | cpus 8 455 | memory '60 GB' 456 | 457 | input: 458 | path 'capsule/data/' from capsule_aind_ephys_job_dispatch_4_to_capsule_nwb_packaging_ecephys_capsule_12_27.collect() 459 | path 'capsule/data/ecephys_session' from ecephys_to_nwb_packaging_ecephys_28.collect() 460 | 461 | output: 462 | path 'capsule/results/*' into capsule_nwb_packaging_ecephys_capsule_12_to_capsule_nwb_packaging_units_11_24 463 | 464 | script: 465 | """ 466 | #!/usr/bin/env bash 467 | set -e 468 | 469 | export CO_CAPSULE_ID=b16dfc92-eab4-425d-978f-0ba61632c413 470 | export CO_CPUS=8 471 | export CO_MEMORY=64424509440 472 | 473 | mkdir -p capsule 474 | mkdir -p capsule/data && ln -s \$PWD/capsule/data /data 475 | mkdir -p capsule/results && ln -s \$PWD/capsule/results /results 476 | mkdir -p capsule/scratch && ln -s \$PWD/capsule/scratch /scratch 477 | 478 | echo "[${task.tag}] cloning git repo..." 479 | if [[ "\$(printf '%s\n' "2.20.0" "\$(git version | awk '{print \$3}')" | sort -V | head -n1)" = "2.20.0" ]]; then 480 | git clone --filter=tree:0 --branch v9.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-3438484.git" capsule-repo 481 | else 482 | git clone --branch v9.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-3438484.git" capsule-repo 483 | fi 484 | mv capsule-repo/code capsule/code && ln -s \$PWD/capsule/code /code 485 | rm -rf capsule-repo 486 | 487 | echo "[${task.tag}] running capsule..." 488 | cd capsule/code 489 | chmod +x run 490 | ./run ${params.capsule_nwb_packaging_ecephys_capsule_12_args} 491 | 492 | echo "[${task.tag}] completed!" 493 | """ 494 | } 495 | 496 | // capsule - Quality Control Ecephys 497 | process capsule_quality_control_ecephys_13 { 498 | tag 'capsule-0625308' 499 | container "$REGISTRY_HOST/published/56a55c84-3013-4683-be83-14d607d2cfe6:v7" 500 | 501 | cpus 8 502 | memory '60 GB' 503 | 504 | input: 505 | path 'capsule/data/' from capsule_aind_ephys_job_dispatch_4_to_capsule_quality_control_ecephys_13_29.flatten() 506 | path 'capsule/data/' from capsule_aind_ephys_results_collector_9_to_capsule_quality_control_ecephys_13_30.collect() 507 | path 'capsule/data/ecephys_session' from ecephys_to_quality_control_ecephys_31.collect() 508 | 509 | output: 510 | path 'capsule/results/*' into capsule_quality_control_ecephys_13_to_capsule_quality_control_collector_ecephys_14_32 511 | 512 | script: 513 | """ 514 | #!/usr/bin/env bash 515 | set -e 516 | 517 | export CO_CAPSULE_ID=56a55c84-3013-4683-be83-14d607d2cfe6 518 | export CO_CPUS=8 519 | export CO_MEMORY=64424509440 520 | 521 | mkdir -p capsule 522 | mkdir -p capsule/data && ln -s \$PWD/capsule/data /data 523 | mkdir -p capsule/results && ln -s \$PWD/capsule/results /results 524 | mkdir -p capsule/scratch && ln -s \$PWD/capsule/scratch /scratch 525 | 526 | echo "[${task.tag}] cloning git repo..." 527 | if [[ "\$(printf '%s\n' "2.20.0" "\$(git version | awk '{print \$3}')" | sort -V | head -n1)" = "2.20.0" ]]; then 528 | git clone --filter=tree:0 --branch v7.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-0625308.git" capsule-repo 529 | else 530 | git clone --branch v7.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-0625308.git" capsule-repo 531 | fi 532 | mv capsule-repo/code capsule/code && ln -s \$PWD/capsule/code /code 533 | rm -rf capsule-repo 534 | 535 | echo "[${task.tag}] running capsule..." 536 | cd capsule/code 537 | chmod +x run 538 | ./run 539 | 540 | echo "[${task.tag}] completed!" 541 | """ 542 | } 543 | 544 | // capsule - Quality Control Collector Ecephys 545 | process capsule_quality_control_collector_ecephys_14 { 546 | tag 'capsule-8310834' 547 | container "$REGISTRY_HOST/published/324399bc-41bd-43f2-8da4-954bd243973f:v1" 548 | 549 | cpus 1 550 | memory '7.5 GB' 551 | 552 | publishDir "$RESULTS_PATH", saveAs: { filename -> new File(filename).getName() } 553 | 554 | input: 555 | path 'capsule/data/' from capsule_quality_control_ecephys_13_to_capsule_quality_control_collector_ecephys_14_32.collect() 556 | 557 | output: 558 | path 'capsule/results/*' 559 | 560 | script: 561 | """ 562 | #!/usr/bin/env bash 563 | set -e 564 | 565 | export CO_CAPSULE_ID=324399bc-41bd-43f2-8da4-954bd243973f 566 | export CO_CPUS=1 567 | export CO_MEMORY=8053063680 568 | 569 | mkdir -p capsule 570 | mkdir -p capsule/data && ln -s \$PWD/capsule/data /data 571 | mkdir -p capsule/results && ln -s \$PWD/capsule/results /results 572 | mkdir -p capsule/scratch && ln -s \$PWD/capsule/scratch /scratch 573 | 574 | echo "[${task.tag}] cloning git repo..." 575 | if [[ "\$(printf '%s\n' "2.20.0" "\$(git version | awk '{print \$3}')" | sort -V | head -n1)" = "2.20.0" ]]; then 576 | git clone --filter=tree:0 --branch v1.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-8310834.git" capsule-repo 577 | else 578 | git clone --branch v1.0 "https://\$GIT_ACCESS_TOKEN@\$GIT_HOST/capsule-8310834.git" capsule-repo 579 | fi 580 | mv capsule-repo/code capsule/code && ln -s \$PWD/capsule/code /code 581 | rm -rf capsule-repo 582 | 583 | echo "[${task.tag}] running capsule..." 584 | cd capsule/code 585 | chmod +x run 586 | ./run 587 | 588 | echo "[${task.tag}] completed!" 589 | """ 590 | } 591 | -------------------------------------------------------------------------------- /pipeline/main_multi_backend.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | nextflow.enable.dsl = 2 3 | 4 | params.ecephys_path = DATA_PATH 5 | params.params_file = null 6 | 7 | // Git repository prefix - can be overridden via command line or environment variable 8 | params.git_repo_prefix = System.getenv('GIT_REPO_PREFIX') ?: 'https://github.com/AllenNeuralDynamics/aind-' 9 | 10 | // Helper function for git cloning 11 | def gitCloneFunction = ''' 12 | clone_repo() { 13 | local repo_url="$1" 14 | local commit_hash="$2" 15 | 16 | echo "cloning git repo: \${repo_url} (commit: \${commit_hash})..." 17 | 18 | git clone "\${repo_url}" capsule-repo 19 | git -C capsule-repo -c core.fileMode=false checkout "\${commit_hash}" --quiet 20 | 21 | mv capsule-repo/code capsule/code 22 | rm -rf capsule-repo 23 | } 24 | ''' 25 | 26 | println "DATA_PATH: ${DATA_PATH}" 27 | println "RESULTS_PATH: ${RESULTS_PATH}" 28 | 29 | // Load parameters from JSON file if provided 30 | def json_params = [:] 31 | if (params.params_file) { 32 | json_params = new groovy.json.JsonSlurper().parseText(new File(params.params_file).text) 33 | println "Loaded parameters from ${params.params_file}" 34 | } 35 | 36 | println "PARAMS: ${params}" 37 | 38 | // get commit hashes for capsules 39 | params.capsule_versions = "${baseDir}/capsule_versions.env" 40 | def versions = [:] 41 | file(params.capsule_versions).eachLine { line -> 42 | def (key, value) = line.tokenize('=') 43 | versions[key] = value 44 | } 45 | 46 | // container tag 47 | params.container_tag = "si-${versions['SPIKEINTERFACE_VERSION']}" 48 | println "CONTAINER TAG: ${params.container_tag}" 49 | 50 | params_keys = params.keySet() 51 | 52 | // if not specified, assume local executor 53 | if (!params_keys.contains('executor')) { 54 | params.executor = "local" 55 | } 56 | // set global n_jobs for local executor 57 | if (params.executor == "local") 58 | { 59 | if ("n_jobs" in params_keys) { 60 | n_jobs = params.n_jobs 61 | } 62 | else { 63 | n_jobs = -1 64 | } 65 | println "N JOBS: ${n_jobs}" 66 | job_args=" --n-jobs ${n_jobs}" 67 | } 68 | else { 69 | job_args="" 70 | } 71 | 72 | // set runmode 73 | if ("runmode" in params_keys) { 74 | runmode = params.runmode 75 | } 76 | else { 77 | runmode = "full" 78 | } 79 | println "Using RUNMODE: ${runmode}" 80 | 81 | if (params.params_file) { 82 | println "Using parameters from JSON file: ${params.params_file}" 83 | } else { 84 | println "No parameters file provided, using command line arguments." 85 | } 86 | 87 | // Initialize args variables with params from JSON file or command line args 88 | def job_dispatch_args = "" 89 | if (params.params_file && json_params.job_dispatch) { 90 | job_dispatch_args = "--params '${groovy.json.JsonOutput.toJson(json_params.job_dispatch)}'" 91 | } else if ("job_dispatch_args" in params_keys) { 92 | job_dispatch_args = params.job_dispatch_args 93 | } 94 | 95 | def preprocessing_args = "" 96 | if (params.params_file && json_params.preprocessing) { 97 | preprocessing_args = "--params '${groovy.json.JsonOutput.toJson(json_params.preprocessing)}'" 98 | } else if ("preprocessing_args" in params_keys) { 99 | preprocessing_args = params.preprocessing_args 100 | } 101 | 102 | def postprocessing_args = "" 103 | if (params.params_file && json_params.postprocessing) { 104 | postprocessing_args = "--params '${groovy.json.JsonOutput.toJson(json_params.postprocessing)}'" 105 | } else if ("postprocessing_args" in params_keys) { 106 | postprocessing_args = params.postprocessing_args 107 | } 108 | 109 | def curation_args = "" 110 | if (params.params_file && json_params.curation) { 111 | curation_args = "--params '${groovy.json.JsonOutput.toJson(json_params.curation)}'" 112 | } else if ("curation_args" in params_keys) { 113 | curation_args = params.curation_args 114 | } 115 | 116 | def visualization_kwargs = "" 117 | if (params.params_file && json_params.visualization) { 118 | visualization_kwargs = "--params '${groovy.json.JsonOutput.toJson(json_params.visualization)}'" 119 | } else if ("visualization_kwargs" in params_keys) { 120 | visualization_kwargs = params.visualization_kwargs 121 | } 122 | 123 | def nwb_ecephys_args = "" 124 | if (params.params_file && json_params.nwb?.ecephys) { 125 | nwb_ecephys_args = "--params '${groovy.json.JsonOutput.toJson(json_params.nwb.ecephys)}'" 126 | } else if ("nwb_ecephys_args" in params_keys) { 127 | nwb_ecephys_args = params.nwb_ecephys_args 128 | } 129 | 130 | // For spikesorting, use the parameters for the selected sorter 131 | def sorter = null 132 | if (params.params_file && json_params.spikesorting) { 133 | sorter = json_params.spikesorting.sorter ?: null 134 | } 135 | 136 | if (sorter == null && "sorter" in params_keys) { 137 | sorter = params.sorter ?: "kilosort4" 138 | } 139 | 140 | def spikesorting_args = "" 141 | if (params.params_file && json_params.spikesorting) { 142 | def sorter_params = json_params.spikesorting[sorter] 143 | if (sorter_params) { 144 | spikesorting_args = "--params '${groovy.json.JsonOutput.toJson(sorter_params)}'" 145 | } 146 | } else if ("spikesorting_args" in params_keys) { 147 | spikesorting_args = params.spikesorting_args 148 | } 149 | 150 | if (sorter == null) { 151 | println "No sorter specified, defaulting to kilosort4" 152 | sorter = "kilosort4" 153 | } 154 | 155 | println "Using SORTER: ${sorter} with args: ${spikesorting_args}" 156 | 157 | if (runmode == 'fast'){ 158 | preprocessing_args = "--motion skip" 159 | postprocessing_args = "--skip-extensions spike_locations,principal_components" 160 | nwb_ecephys_args = "--skip-lfp" 161 | println "Running in fast mode. Setting parameters:" 162 | println "preprocessing_args: ${preprocessing_args}" 163 | println "postprocessing_args: ${postprocessing_args}" 164 | println "nwb_ecephys_args: ${nwb_ecephys_args}" 165 | } 166 | 167 | // Process definitions 168 | process job_dispatch { 169 | tag 'job-dispatch' 170 | def container_name = "ghcr.io/allenneuraldynamics/aind-ephys-pipeline-base:${params.container_tag}" 171 | container container_name 172 | 173 | input: 174 | path input_folder, stageAs: 'capsule/data/ecephys_session' 175 | 176 | output: 177 | path 'capsule/results/*', emit: results 178 | path 'max_duration.txt', emit: max_duration_file // file containing the value 179 | 180 | 181 | script: 182 | """ 183 | #!/usr/bin/env bash 184 | set -e 185 | 186 | mkdir -p capsule 187 | mkdir -p capsule/data 188 | mkdir -p capsule/results 189 | mkdir -p capsule/scratch 190 | 191 | if [[ ${params.executor} == "slurm" ]]; then 192 | echo "[${task.tag}] allocated task time: ${task.time}" 193 | fi 194 | 195 | TASK_DIR=\$(pwd) 196 | 197 | echo "[${task.tag}] cloning git repo..." 198 | ${gitCloneFunction} 199 | clone_repo "${params.git_repo_prefix}ephys-job-dispatch.git" "${versions['JOB_DISPATCH']}" 200 | 201 | echo "[${task.tag}] running capsule..." 202 | cd capsule/code 203 | chmod +x run 204 | ./run ${job_dispatch_args} 205 | 206 | MAX_DURATION_MIN=\$(python get_max_recording_duration_min.py) 207 | 208 | cd \$TASK_DIR 209 | echo "\$MAX_DURATION_MIN" > max_duration.txt 210 | 211 | echo "[${task.tag}] completed!" 212 | 213 | """ 214 | } 215 | 216 | process preprocessing { 217 | tag 'preprocessing' 218 | def container_name = "ghcr.io/allenneuraldynamics/aind-ephys-pipeline-base:${params.container_tag}" 219 | container container_name 220 | 221 | input: 222 | val max_duration_minutes 223 | path ecephys_session_input, stageAs: 'capsule/data/ecephys_session' 224 | path job_dispatch_results, stageAs: 'capsule/data/*' 225 | 226 | output: 227 | path 'capsule/results/*', emit: results 228 | 229 | script: 230 | """ 231 | #!/usr/bin/env bash 232 | set -e 233 | 234 | mkdir -p capsule 235 | mkdir -p capsule/data 236 | mkdir -p capsule/results 237 | mkdir -p capsule/scratch 238 | 239 | if [[ ${params.executor} == "slurm" ]]; then 240 | echo "[${task.tag}] allocated task time: ${task.time}" 241 | fi 242 | 243 | echo "[${task.tag}] cloning git repo..." 244 | ${gitCloneFunction} 245 | clone_repo "${params.git_repo_prefix}ephys-preprocessing.git" "${versions['PREPROCESSING']}" 246 | 247 | echo "[${task.tag}] running capsule..." 248 | cd capsule/code 249 | chmod +x run 250 | ./run ${preprocessing_args} ${job_args} 251 | 252 | echo "[${task.tag}] completed!" 253 | """ 254 | } 255 | 256 | process spikesort_kilosort25 { 257 | tag 'spikesort-kilosort25' 258 | def container_name = "ghcr.io/allenneuraldynamics/aind-ephys-spikesort-kilosort25:${params.container_tag}" 259 | container container_name 260 | 261 | input: 262 | val max_duration_minutes 263 | path preprocessing_results, stageAs: 'capsule/data/*' 264 | 265 | output: 266 | path 'capsule/results/*', emit: results 267 | 268 | script: 269 | """ 270 | #!/usr/bin/env bash 271 | set -e 272 | 273 | mkdir -p capsule 274 | mkdir -p capsule/data 275 | mkdir -p capsule/results 276 | mkdir -p capsule/scratch 277 | 278 | if [[ ${params.executor} == "slurm" ]]; then 279 | echo "[${task.tag}] allocated task time: ${task.time}" 280 | fi 281 | 282 | echo "[${task.tag}] cloning git repo..." 283 | ${gitCloneFunction} 284 | clone_repo "${params.git_repo_prefix}ephys-spikesort-kilosort25.git" "${versions['SPIKESORT_KS25']}" 285 | 286 | echo "[${task.tag}] running capsule..." 287 | cd capsule/code 288 | chmod +x run 289 | ./run ${spikesorting_args} ${job_args} 290 | 291 | echo "[${task.tag}] completed!" 292 | """ 293 | } 294 | 295 | process spikesort_kilosort4 { 296 | tag 'spikesort-kilosort4' 297 | def container_name = "ghcr.io/allenneuraldynamics/aind-ephys-spikesort-kilosort4:${params.container_tag}" 298 | container container_name 299 | 300 | input: 301 | val max_duration_minutes 302 | path preprocessing_results, stageAs: 'capsule/data/*' 303 | 304 | output: 305 | path 'capsule/results/*', emit: results 306 | 307 | script: 308 | """ 309 | #!/usr/bin/env bash 310 | set -e 311 | 312 | mkdir -p capsule 313 | mkdir -p capsule/data 314 | mkdir -p capsule/results 315 | mkdir -p capsule/scratch 316 | 317 | if [[ ${params.executor} == "slurm" ]]; then 318 | echo "[${task.tag}] allocated task time: ${task.time}" 319 | fi 320 | 321 | echo "[${task.tag}] cloning git repo..." 322 | ${gitCloneFunction} 323 | clone_repo "${params.git_repo_prefix}ephys-spikesort-kilosort4.git" "${versions['SPIKESORT_KS4']}" 324 | 325 | echo "[${task.tag}] running capsule..." 326 | cd capsule/code 327 | chmod +x run 328 | ./run ${spikesorting_args} ${job_args} 329 | 330 | echo "[${task.tag}] completed!" 331 | """ 332 | } 333 | 334 | process spikesort_spykingcircus2 { 335 | tag 'spikesort-spykingcircus2' 336 | def container_name = "ghcr.io/allenneuraldynamics/aind-ephys-pipeline-base:${params.container_tag}" 337 | container container_name 338 | 339 | input: 340 | val max_duration_minutes 341 | path preprocessing_results, stageAs: 'capsule/data/*' 342 | 343 | output: 344 | path 'capsule/results/*', emit: results 345 | 346 | script: 347 | """ 348 | #!/usr/bin/env bash 349 | set -e 350 | 351 | mkdir -p capsule 352 | mkdir -p capsule/data 353 | mkdir -p capsule/results 354 | mkdir -p capsule/scratch 355 | 356 | if [[ ${params.executor} == "slurm" ]]; then 357 | echo "[${task.tag}] allocated task time: ${task.time}" 358 | fi 359 | 360 | echo "[${task.tag}] cloning git repo..." 361 | ${gitCloneFunction} 362 | clone_repo "${params.git_repo_prefix}ephys-spikesort-spykingcircus2.git" "${versions['SPIKESORT_SC2']}" 363 | 364 | echo "[${task.tag}] running capsule..." 365 | cd capsule/code 366 | chmod +x run 367 | ./run ${spikesorting_args} ${job_args} 368 | 369 | echo "[${task.tag}] completed!" 370 | """ 371 | } 372 | 373 | process postprocessing { 374 | tag 'postprocessing' 375 | def container_name = "ghcr.io/allenneuraldynamics/aind-ephys-pipeline-base:${params.container_tag}" 376 | container container_name 377 | 378 | input: 379 | val max_duration_minutes 380 | path ecephys_session_input, stageAs: 'capsule/data/ecephys_session' 381 | path job_dispatch_results, stageAs: 'capsule/data/*' 382 | path preprocessing_results, stageAs: 'capsule/data/*' 383 | path spikesort_results, stageAs: 'capsule/data/*' 384 | 385 | output: 386 | path 'capsule/results/*', emit: results 387 | 388 | script: 389 | """ 390 | #!/usr/bin/env bash 391 | set -e 392 | 393 | mkdir -p capsule 394 | mkdir -p capsule/data 395 | mkdir -p capsule/results 396 | mkdir -p capsule/scratch 397 | 398 | if [[ ${params.executor} == "slurm" ]]; then 399 | echo "[${task.tag}] allocated task time: ${task.time}" 400 | fi 401 | 402 | echo "[${task.tag}] cloning git repo..." 403 | ${gitCloneFunction} 404 | clone_repo "${params.git_repo_prefix}ephys-postprocessing.git" "${versions['POSTPROCESSING']}" 405 | 406 | echo "[${task.tag}] running capsule..." 407 | cd capsule/code 408 | chmod +x run 409 | ./run ${postprocessing_args} ${job_args} 410 | 411 | echo "[${task.tag}] completed!" 412 | """ 413 | } 414 | 415 | process curation { 416 | tag 'curation' 417 | def container_name = "ghcr.io/allenneuraldynamics/aind-ephys-pipeline-base:${params.container_tag}" 418 | container container_name 419 | 420 | input: 421 | val max_duration_minutes 422 | path postprocessing_results, stageAs: 'capsule/data/*' 423 | 424 | output: 425 | path 'capsule/results/*', emit: results 426 | 427 | script: 428 | """ 429 | #!/usr/bin/env bash 430 | set -e 431 | 432 | mkdir -p capsule 433 | mkdir -p capsule/data 434 | mkdir -p capsule/results 435 | mkdir -p capsule/scratch 436 | 437 | if [[ ${params.executor} == "slurm" ]]; then 438 | echo "[${task.tag}] allocated task time: ${task.time}" 439 | fi 440 | 441 | echo "[${task.tag}] cloning git repo..." 442 | ${gitCloneFunction} 443 | clone_repo "${params.git_repo_prefix}ephys-curation.git" "${versions['CURATION']}" 444 | 445 | echo "[${task.tag}] running capsule..." 446 | cd capsule/code 447 | chmod +x run 448 | ./run ${curation_args} ${job_args} 449 | 450 | echo "[${task.tag}] completed!" 451 | """ 452 | } 453 | 454 | process visualization { 455 | tag 'visualization' 456 | def container_name = "ghcr.io/allenneuraldynamics/aind-ephys-pipeline-base:${params.container_tag}" 457 | container container_name 458 | 459 | input: 460 | val max_duration_minutes 461 | path ecephys_session_input, stageAs: 'capsule/data/ecephys_session' 462 | path job_dispatch_results, stageAs: 'capsule/data/*' 463 | path preprocessing_results, stageAs: 'capsule/data/*' 464 | path spikesort_results, stageAs: 'capsule/data/*' 465 | path postprocessing_results, stageAs: 'capsule/data/*' 466 | path curation_results, stageAs: 'capsule/data/*' 467 | 468 | output: 469 | path 'capsule/results/*', emit: results 470 | 471 | script: 472 | """ 473 | #!/usr/bin/env bash 474 | set -e 475 | 476 | mkdir -p capsule 477 | mkdir -p capsule/data 478 | mkdir -p capsule/results 479 | mkdir -p capsule/scratch 480 | 481 | if [[ ${params.executor} == "slurm" ]]; then 482 | echo "[${task.tag}] allocated task time: ${task.time}" 483 | fi 484 | 485 | echo "[${task.tag}] cloning git repo..." 486 | ${gitCloneFunction} 487 | clone_repo "${params.git_repo_prefix}ephys-visualization.git" "${versions['VISUALIZATION']}" 488 | 489 | echo "[${task.tag}] running capsule..." 490 | cd capsule/code 491 | chmod +x run 492 | ./run ${visualization_kwargs} 493 | 494 | echo "[${task.tag}] completed!" 495 | """ 496 | } 497 | 498 | process results_collector { 499 | tag 'result-collector' 500 | def container_name = "ghcr.io/allenneuraldynamics/aind-ephys-pipeline-base:${params.container_tag}" 501 | container container_name 502 | 503 | publishDir "$RESULTS_PATH", saveAs: { filename -> new File(filename).getName() }, mode: 'copy' 504 | 505 | input: 506 | val max_duration_minutes 507 | path ecephys_session_input, stageAs: 'capsule/data/ecephys_session' 508 | path job_dispatch_results, stageAs: 'capsule/data/*' 509 | path preprocessing_results, stageAs: 'capsule/data/*' 510 | path spikesort_results, stageAs: 'capsule/data/*' 511 | path postprocessing_results, stageAs: 'capsule/data/*' 512 | path curation_results, stageAs: 'capsule/data/*' 513 | path visualization_results, stageAs: 'capsule/data/*' 514 | 515 | output: 516 | path 'capsule/results/*', emit: results 517 | path 'capsule/results/*', emit: nwb_data 518 | path 'capsule/results/*', emit: qc_data 519 | 520 | script: 521 | """ 522 | #!/usr/bin/env bash 523 | set -e 524 | 525 | mkdir -p capsule 526 | mkdir -p capsule/data 527 | mkdir -p capsule/results 528 | mkdir -p capsule/scratch 529 | 530 | if [[ ${params.executor} == "slurm" ]]; then 531 | echo "[${task.tag}] allocated task time: ${task.time}" 532 | fi 533 | 534 | echo "[${task.tag}] cloning git repo..." 535 | ${gitCloneFunction} 536 | clone_repo "${params.git_repo_prefix}ephys-results-collector.git" "${versions['RESULTS_COLLECTOR']}" 537 | 538 | echo "[${task.tag}] running capsule..." 539 | cd capsule/code 540 | chmod +x run 541 | ./run --pipeline-data-path ${DATA_PATH} --pipeline-results-path ${RESULTS_PATH} 542 | 543 | echo "[${task.tag}] completed!" 544 | """ 545 | } 546 | 547 | process quality_control { 548 | tag 'quality-control' 549 | def container_name = "ghcr.io/allenneuraldynamics/aind-ephys-pipeline-base:${params.container_tag}" 550 | container container_name 551 | 552 | input: 553 | val max_duration_minutes 554 | path ecephys_session_input, stageAs: 'capsule/data/ecephys_session' 555 | path job_dispatch_results, stageAs: 'capsule/data/*' 556 | path results_data, stageAs: 'capsule/data/*' 557 | 558 | output: 559 | path 'capsule/results/*', emit: results 560 | 561 | script: 562 | """ 563 | #!/usr/bin/env bash 564 | set -e 565 | 566 | mkdir -p capsule 567 | mkdir -p capsule/data 568 | mkdir -p capsule/results 569 | mkdir -p capsule/scratch 570 | 571 | if [[ ${params.executor} == "slurm" ]]; then 572 | echo "[${task.tag}] allocated task time: ${task.time}" 573 | fi 574 | 575 | echo "[${task.tag}] cloning git repo..." 576 | ${gitCloneFunction} 577 | clone_repo "${params.git_repo_prefix}ephys-processing-qc.git" "${versions['QUALITY_CONTROL']}" 578 | 579 | echo "[${task.tag}] running capsule..." 580 | cd capsule/code 581 | chmod +x run 582 | ./run 583 | 584 | echo "[${task.tag}] completed!" 585 | """ 586 | } 587 | 588 | process quality_control_collector { 589 | tag 'qc-collector' 590 | def container_name = "ghcr.io/allenneuraldynamics/aind-ephys-pipeline-base:${params.container_tag}" 591 | container container_name 592 | 593 | publishDir "$RESULTS_PATH", saveAs: { filename -> new File(filename).getName() }, mode: 'copy' 594 | 595 | input: 596 | val max_duration_minutes 597 | path quality_control_results, stageAs: 'capsule/data/*' 598 | 599 | output: 600 | path 'capsule/results/*' 601 | 602 | script: 603 | """ 604 | #!/usr/bin/env bash 605 | set -e 606 | 607 | mkdir -p capsule 608 | mkdir -p capsule/data 609 | mkdir -p capsule/results 610 | mkdir -p capsule/scratch 611 | 612 | if [[ ${params.executor} == "slurm" ]]; then 613 | echo "[${task.tag}] allocated task time: ${task.time}" 614 | fi 615 | 616 | echo "[${task.tag}] cloning git repo..." 617 | ${gitCloneFunction} 618 | clone_repo "${params.git_repo_prefix}ephys-qc-collector.git" "${versions['QUALITY_CONTROL_COLLECTOR']}" 619 | 620 | echo "[${task.tag}] running capsule..." 621 | cd capsule/code 622 | chmod +x run 623 | ./run 624 | 625 | echo "[${task.tag}] completed!" 626 | """ 627 | } 628 | 629 | process nwb_subject { 630 | tag 'nwb-subject' 631 | def container_name = "ghcr.io/allenneuraldynamics/aind-ephys-pipeline-nwb:${params.container_tag}" 632 | container container_name 633 | 634 | input: 635 | val max_duration_minutes 636 | path ecephys_session_input, stageAs: 'capsule/data/ecephys_session' 637 | 638 | output: 639 | path 'capsule/results/*', emit: results 640 | 641 | script: 642 | """ 643 | #!/usr/bin/env bash 644 | set -e 645 | 646 | mkdir -p capsule 647 | mkdir -p capsule/data 648 | mkdir -p capsule/results 649 | mkdir -p capsule/scratch 650 | 651 | if [[ ${params.executor} == "slurm" ]]; then 652 | echo "[${task.tag}] allocated task time: ${task.time}" 653 | fi 654 | 655 | echo "[${task.tag}] cloning git repo..." 656 | ${gitCloneFunction} 657 | clone_repo "${params.git_repo_prefix}subject-nwb.git" "${versions['NWB_SUBJECT']}" 658 | 659 | echo "[${task.tag}] running capsule..." 660 | cd capsule/code 661 | chmod +x run 662 | ./run ${nwb_subject_args} 663 | 664 | echo "[${task.tag}] completed!" 665 | """ 666 | } 667 | 668 | process nwb_ecephys { 669 | tag 'nwb-ecephys' 670 | def container_name = "ghcr.io/allenneuraldynamics/aind-ephys-pipeline-nwb:${params.container_tag}" 671 | container container_name 672 | 673 | input: 674 | val max_duration_minutes 675 | path ecephys_session_input, stageAs: 'capsule/data/ecephys_session' 676 | path job_dispatch_results, stageAs: 'capsule/data/*' 677 | 678 | output: 679 | path 'capsule/results/*', emit: results 680 | 681 | script: 682 | """ 683 | #!/usr/bin/env bash 684 | set -e 685 | 686 | mkdir -p capsule 687 | mkdir -p capsule/data 688 | mkdir -p capsule/results 689 | mkdir -p capsule/scratch 690 | 691 | if [[ ${params.executor} == "slurm" ]]; then 692 | echo "[${task.tag}] allocated task time: ${task.time}" 693 | fi 694 | 695 | echo "[${task.tag}] cloning git repo..." 696 | ${gitCloneFunction} 697 | clone_repo "${params.git_repo_prefix}ecephys-nwb.git" "${versions['NWB_ECEPHYS']}" 698 | 699 | echo "[${task.tag}] running capsule..." 700 | cd capsule/code 701 | chmod +x run 702 | ./run ${nwb_ecephys_args} 703 | 704 | echo "[${task.tag}] completed!" 705 | """ 706 | } 707 | 708 | process nwb_units { 709 | tag 'nwb-units' 710 | def container_name = "ghcr.io/allenneuraldynamics/aind-ephys-pipeline-nwb:${params.container_tag}" 711 | container container_name 712 | 713 | publishDir "$RESULTS_PATH/nwb", saveAs: { filename -> new File(filename).getName() }, mode: 'copy' 714 | 715 | input: 716 | val max_duration_minutes 717 | path ecephys_session_input, stageAs: 'capsule/data/ecephys_session' 718 | path job_dispatch_results, stageAs: 'capsule/data/*' 719 | path results_data, stageAs: 'capsule/data/*' 720 | path nwb_ecephys_results, stageAs: 'capsule/data/*' 721 | 722 | output: 723 | path 'capsule/results/*' 724 | 725 | script: 726 | """ 727 | #!/usr/bin/env bash 728 | set -e 729 | 730 | mkdir -p capsule 731 | mkdir -p capsule/data 732 | mkdir -p capsule/results 733 | mkdir -p capsule/scratch 734 | 735 | echo "[${task.tag}] cloning git repo..." 736 | ${gitCloneFunction} 737 | clone_repo "${params.git_repo_prefix}units-nwb.git" "${versions['NWB_UNITS']}" 738 | 739 | if [[ ${params.executor} == "slurm" ]]; then 740 | echo "[${task.tag}] allocated task time: ${task.time}" 741 | fi 742 | 743 | echo "[${task.tag}] running capsule..." 744 | cd capsule/code 745 | chmod +x run 746 | ./run 747 | 748 | echo "[${task.tag}] completed!" 749 | """ 750 | } 751 | 752 | workflow { 753 | // Input channel from ecephys path 754 | ecephys_ch = Channel.fromPath(params.ecephys_path + "/", type: 'any') 755 | 756 | // Job dispatch 757 | job_dispatch_out = job_dispatch(ecephys_ch.collect()) 758 | 759 | max_duration_file = job_dispatch_out.max_duration_file 760 | max_duration_minutes = max_duration_file.map { it.text.trim() } 761 | max_duration_minutes.view { "Max recording duration: ${it}min" } 762 | 763 | // Preprocessing 764 | preprocessing_out = preprocessing( 765 | max_duration_minutes, 766 | ecephys_ch.collect(), 767 | job_dispatch_out.results.flatten() 768 | ) 769 | 770 | // Spike sorting based on selected sorter 771 | // def spikesort 772 | if (sorter == 'kilosort25') { 773 | spikesort_out = spikesort_kilosort25( 774 | max_duration_minutes, 775 | preprocessing_out.results 776 | ) 777 | } else if (sorter == 'kilosort4') { 778 | spikesort_out = spikesort_kilosort4( 779 | max_duration_minutes, 780 | preprocessing_out.results 781 | ) 782 | } else if (sorter == 'spykingcircus2') { 783 | spikesort_out = spikesort_spykingcircus2( 784 | max_duration_minutes, 785 | preprocessing_out.results 786 | ) 787 | } 788 | 789 | // Postprocessing 790 | postprocessing_out = postprocessing( 791 | max_duration_minutes, 792 | ecephys_ch.collect(), 793 | job_dispatch_out.results.flatten(), 794 | preprocessing_out.results.collect(), 795 | spikesort_out.results.collect() 796 | ) 797 | 798 | // Curation 799 | curation_out = curation( 800 | max_duration_minutes, 801 | postprocessing_out.results 802 | ) 803 | 804 | // Visualization 805 | visualization_out = visualization( 806 | max_duration_minutes, 807 | ecephys_ch.collect(), 808 | job_dispatch_out.results.collect(), 809 | preprocessing_out.results, 810 | spikesort_out.results.collect(), 811 | postprocessing_out.results.collect(), 812 | curation_out.results.collect() 813 | ) 814 | 815 | // Results collection 816 | results_collector_out = results_collector( 817 | max_duration_minutes, 818 | ecephys_ch.collect(), 819 | job_dispatch_out.results.collect(), 820 | preprocessing_out.results.collect(), 821 | spikesort_out.results.collect(), 822 | postprocessing_out.results.collect(), 823 | curation_out.results.collect(), 824 | visualization_out.results.collect() 825 | ) 826 | 827 | // Quality control 828 | quality_control_out = quality_control( 829 | max_duration_minutes, 830 | ecephys_ch.collect(), 831 | job_dispatch_out.results.flatten(), 832 | results_collector_out.qc_data.collect() 833 | ) 834 | 835 | // Quality control collection 836 | quality_control_collector( 837 | max_duration_minutes, 838 | quality_control_out.results.collect() 839 | ) 840 | 841 | // NWB ecephys 842 | nwb_ecephys_out = nwb_ecephys( 843 | max_duration_minutes, 844 | ecephys_ch.collect(), 845 | job_dispatch_out.results.collect(), 846 | ) 847 | 848 | // NWB units 849 | nwb_units( 850 | max_duration_minutes, 851 | ecephys_ch.collect(), 852 | job_dispatch_out.results.collect(), 853 | results_collector_out.nwb_data.collect(), 854 | nwb_ecephys_out.results.collect() 855 | ) 856 | } 857 | --------------------------------------------------------------------------------