├── requirements.txt ├── datalad_run.bash ├── LICENSE ├── singularity_run.bash ├── TODO ├── README.md ├── .gitignore └── fmriprep-slurm └── main.py /requirements.txt: -------------------------------------------------------------------------------- 1 | pybids==0.11.1 2 | templateflow==0.6.0 -------------------------------------------------------------------------------- /datalad_run.bash: -------------------------------------------------------------------------------- 1 | ### datalad container version 2 | 3 | # add the container to datalad 4 | CONTAINER_VERSION=20.2.1lts 5 | datalad containers-add --url docker://poldracklab/fmriprep:${CONTAINER_VERSION} -i ./derivatives/fmriprep fmriprep-${CONTAINER_VERSION} 6 | 7 | PROJECT_PATH="/lustre03/project/6003287" 8 | DATASET_PATH=$1 9 | DATASET_FOLDER="${DATASET_PATH%/*}" 10 | PYTHON_CMD="python /fmriprep-slurm/fmriprep-slurm/main.py "$@ 11 | 12 | echo "Running fmriprep-slurm "$@ 13 | export SINGULARITYENV_TEMPLATEFLOW_HOME=/templateflow 14 | module load singularity/3.6 15 | 16 | echo $PYTHON_CMD | xargs singularity exec -B $PROJECT_PATH/fmriprep-slurm:/fmriprep-slurm \ 17 | -B $DATASET_FOLDER:/DATA -B /etc/pki:/etc/pki \ 18 | -B /home/$USER/.cache/templateflow:/templateflow \ 19 | $PROJECT_PATH/derivatives/fmriprep/fmriprep-${CONTAINER_VERSION} 20 | 21 | # now you can use it 22 | datalad containers-run --name fmriprep-${CONTAINER_VERSION} 23 | 24 | ### datalad run version 25 | datalad run ${PROJECT_PATH}/fmriprep-slurm/singularity_run.bash $@ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 SIMEXP 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /singularity_run.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -- "${1%/}" "${@:2}" # trick to remove last path char from first arg (dataset path) 4 | PROJECT_PATH="/project/rrg-pbellec" 5 | DATASET_PATH=$1 6 | DATASET_FOLDER="${DATASET_PATH%/*}" 7 | DATASET_NAME=${DATASET_PATH##*/} 8 | OUTPUT_DIR=$SCRATCH/$DATASET_NAME/$(date +%s) 9 | TEMPLATEFLOW_DIR=/home/$USER/.cache/templateflow 10 | PYTHON_CMD="python /fmriprep-slurm/fmriprep-slurm/main.py "$@" --output-path "$OUTPUT_DIR 11 | 12 | echo "###" 13 | echo "singularity_run.bash "$@ 14 | export APPTAINERENV_TEMPLATEFLOW_HOME=/templateflow 15 | module load apptainer/1.3.5 16 | 17 | echo "Creating templateflow directory in "$TEMPLATEFLOW_DIR 18 | mkdir -p $TEMPLATEFLOW_DIR 19 | 20 | echo "Running fmriprep-slurm inside apptainer with the following command" 21 | echo $PYTHON_CMD 22 | mkdir -p $OUTPUT_DIR 23 | echo $PYTHON_CMD | xargs apptainer exec -B $PROJECT_PATH/fmriprep-slurm:/fmriprep-slurm \ 24 | -B $DATASET_FOLDER:/DATA \ 25 | -B /etc/pki:/etc/pki \ 26 | -B $TEMPLATEFLOW_DIR:/templateflow \ 27 | -B $OUTPUT_DIR:/OUTPUT \ 28 | $PROJECT_PATH/containers/fmriprep-20.2.8.sif 29 | 30 | echo "" 31 | echo "Finished!" 32 | echo "" 33 | echo "Please review your slurm scripts in "$OUTPUT_DIR" before submitting them with:" 34 | echo "find "$OUTPUT_DIR"/.slurm/smriprep_sub-*.sh -type f | while read file; do sbatch \"\$file\"; done" 35 | echo "###" 36 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | * fmriprep with datalad: https://handbook.datalad.org/en/latest/beyond_basics/101-171-enki.html 2 | pros: 3 | - capture preproc output provenance 4 | cons: 5 | - Manage a lot (lot) of branch within the beluga filesystem is suicidal 6 | - Much much more complex 7 | - Accessing preproc output for qc not obvious (need to switch branch, because qc is done before merging) 8 | `datalad-run-container` is not sufficient, we need `datalad-run slurm_script` so it can be re-run. 9 | 10 | Instead, we don't create branch, we generate the scripts with `datalad run fmriprep-slurm`, preproc each output with for each subject , do the qc and `datalad save` them at the end (directly on master branch) 11 | pros: 12 | - no need to manage multiple branches 13 | - Less complex, git log cleaner 14 | - qc can be done normally (each output exists on the same branch, no checkout needed) 15 | cons: 16 | - preproc output not captured, (but anyway the advantage of capture would be able to re-run them which is not possible even with the previous solution) 17 | output is captured independently through the slurm scripts (which are run with `datalad run fmriprep-slurm ...`) 18 | 19 | * if folder .datalad exists, then we should copy (to SSD) just the actual subject, else, we can hard-copy as now. 20 | * better management of anat vs func data 21 | * submit a sample script (one subject) to have an heuristic of the total pre-processing time, and hardware requirements. (--resource-monitor) 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fmriprep-slurm 2 | Generate and run [fMRIPrep](https://fmriprep.org/en/stable/) [SLURM](https://slurm.schedmd.com/documentation.html) jobs on HPCs. 3 | 4 | It is carefully designed and optimized to run the preprocessing on a [BIDS](https://bids-specification.readthedocs.io/en/stable/) dataset. 5 | It has also the advantage to prepare the dataset by checking the integrity, and caching some of the BIDS database artifacts. 6 | 7 | 8 | Originally from https://github.com/courtois-neuromod/ds_prep/blob/master/derivatives/fmriprep/fmriprep.py 9 | 10 | ## Usage 11 | 12 | ### Arguments 13 | ``` 14 | positional arguments: 15 | bids_path BIDS folder to run fmriprep on. 16 | 17 | derivatives_name name of the output folder in derivatives. 18 | 19 | optional arguments: 20 | -h, --help show this help message and exit 21 | 22 | --preproc PREPROC anat, func or all (default: all) 23 | 24 | --slurm-account SLURM_ACCOUNT 25 | SLURM account for job submission (default: rrg-pbellec) 26 | 27 | --email EMAIL email for SLURM notifications 28 | 29 | --container CONTAINER 30 | name of the fmriprep singularity container under the default container location (default: fmriprep-20.2.1lts) 31 | 32 | --participant-label PARTICIPANT_LABEL [PARTICIPANT_LABEL ...] 33 | a space delimited list of participant identifiers or a single identifier (the sub- prefix can be removed) 34 | 35 | --output-spaces OUTPUT_SPACES [OUTPUT_SPACES ...] 36 | a space delimited list of templates as defined by templateflow (default: ["MNI152NLin2009cAsym, MNI152NLin6Asym"]) 37 | 38 | --fmriprep-args FMRIPREP_ARGS 39 | additionnal arguments to the fmriprep command as a string (ex: --fmriprep-args="--fs-no-reconall --use-aroma") 40 | 41 | --session-label SESSION_LABEL [SESSION_LABEL ...] 42 | a space delimited list of session identifiers or a single identifier (the ses- prefix can be removed) 43 | 44 | --force-reindex Force pyBIDS reset_database and reindexing 45 | 46 | --submit Submit SLURM jobs 47 | 48 | --bids-filter BIDS_FILTER 49 | Path to an optionnal bids_filter.json template 50 | 51 | --time TIME Time duration for the slurm job in slurm format (dd-)hh:mm:ss (default: 24h structural, 12h functionnal) 52 | 53 | --mem-per-cpu MEM_PER_CPU 54 | upper bound memory limit for fMRIPrep processes(default: 4096MB) 55 | 56 | --cpus CPUS maximum number of cpus for all processes(default: 16) 57 | ``` 58 | Templateflow valid identifiers can be found at https://github.com/templateflow/templateflow 59 | 60 | ## Default fmriprep command 61 | 62 | By default, we use the following fMRIPrep arguments: 63 | 64 | `--participant-label` dependning on the user argument choice. 65 | 66 | `--bids-database-dir` which takes as an input the cached from pybids. 67 | 68 | `--bids-filter-file` dependning on the user argument choice. 69 | 70 | `--notrack` since beluga compute nodes does not have access to internet, and reduce computation burden. 71 | 72 | `--skip_bids_validation` since it was already done inside `fmriprep-slurm`. 73 | 74 | `--write-graph` for debugging. 75 | 76 | `--omp-nthreads`, `--nprocs` and `--mem_mb` dependning on the user argument choice. 77 | 78 | `--resource-monitor` for debugging. 79 | 80 | For more information on each of these option, you can check the [documentation](https://simexp-documentation.readthedocs.io/en/latest/giga_preprocessing/preprocessing.html). 81 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | cover/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | .pybuilder/ 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | # For a library or package, you might want to ignore these files since the code is 89 | # intended to run in multiple environments; otherwise, check them in: 90 | # .python-version 91 | 92 | # pipenv 93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 96 | # install all needed dependencies. 97 | #Pipfile.lock 98 | 99 | # poetry 100 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 101 | # This is especially recommended for binary packages to ensure reproducibility, and is more 102 | # commonly ignored for libraries. 103 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 104 | #poetry.lock 105 | 106 | # pdm 107 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 108 | #pdm.lock 109 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 110 | # in version control. 111 | # https://pdm.fming.dev/#use-with-ide 112 | .pdm.toml 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | -------------------------------------------------------------------------------- /fmriprep-slurm/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import os 4 | import sys 5 | import argparse 6 | import bids 7 | import subprocess 8 | import json 9 | import re 10 | import fnmatch 11 | import templateflow.api as tf_api 12 | 13 | SCRIPT_DIR = os.path.dirname(__file__) 14 | 15 | SLURM_JOB_DIR = ".slurm" 16 | 17 | SMRIPREP_REQ = {"cpus": 1, "mem_per_cpu": 16384, 18 | "time": "24:00:00", "omp_nthreads": 1} 19 | FMRIPREP_REQ = {"cpus": 1, "mem_per_cpu": 16384, 20 | "time": "24:00:00", "omp_nthreads": 1} 21 | 22 | APPTAINER_DATA_PATH = "/DATA" 23 | APPTAINER_OUTPUT_PATH = "/OUTPUT" 24 | FMRIPREP_DEFAULT_VERSION = "fmriprep-20.2.8lts" 25 | FMRIPREP_DEFAULT_APPTAINER_FOLDER = "/project/rrg-pbellec/hwang1/containers" 26 | OUTPUT_SPACES_DEFAULT = ["MNI152NLin2009cAsym", "MNI152NLin6Asym"] 27 | SLURM_ACCOUNT_DEFAULT = "rrg-pbellec" 28 | PREPROC_DEFAULT = "all" 29 | TEMPLATEFLOW_HOME = os.path.join(os.path.join( 30 | os.environ["HOME"], ".cache"), "templateflow",) 31 | APPTAINER_CMD_BASE = " ".join( 32 | [ 33 | "apptainer run", 34 | "--cleanenv", 35 | f"-B $SLURM_TMPDIR:{APPTAINER_DATA_PATH}", 36 | f"-B {TEMPLATEFLOW_HOME}:/templateflow", 37 | "-B /etc/pki:/etc/pki/", 38 | "-B {original_output}:{APPTAINER_OUTPUT_PATH}" 39 | ] 40 | ) 41 | 42 | slurm_preamble = """#!/bin/bash 43 | #SBATCH --account={slurm_account} 44 | #SBATCH --job-name={jobname}.job 45 | #SBATCH --output={original_output}/{jobname}.out 46 | #SBATCH --error={original_output}/{jobname}.err 47 | #SBATCH --time={time} 48 | #SBATCH --cpus-per-task={cpus} 49 | #SBATCH --mem-per-cpu={mem_per_cpu}M 50 | #SBATCH --mail-user={email} 51 | #SBATCH --mail-type=BEGIN 52 | #SBATCH --mail-type=END 53 | #SBATCH --mail-type=FAIL 54 | 55 | export APPTAINERENV_FS_LICENSE=$HOME/.freesurfer.txt 56 | export APPTAINERENV_TEMPLATEFLOW_HOME=/templateflow 57 | 58 | module load apptainer/1.3.5 59 | 60 | #copying input dataset into local scratch space 61 | rsync -rltv --info=progress2 --exclude="sub*" --exclude="derivatives" {bids_root} $SLURM_TMPDIR 62 | rsync -rltv --info=progress2 {bids_root}/{participant} $SLURM_TMPDIR/{bids_basename} 63 | 64 | """ 65 | 66 | copy_anat = """ 67 | mkdir -p $SLURM_TMPDIR/anat/derivatives/fmriprep 68 | mkdir -p $SLURM_TMPDIR/anat/derivatives/freesurfer 69 | 70 | rsync -rltv --info=progress2 {fmriprep_path}/{participant} $SLURM_TMPDIR/anat/derivatives/fmriprep 71 | rsync -rltv --info=progress2 {freesurfer_path}/{participant} $SLURM_TMPDIR/anat/derivatives/freesurfer 72 | rsync -rltv --info=progress2 {freesurfer_path}/fsaverage $SLURM_TMPDIR/anat/derivatives/freesurfer 73 | 74 | """ 75 | 76 | #default fmriprep bids filter 77 | BIDS_FILTERS = { 78 | 'fmap': {'datatype': 'fmap'}, 79 | 'bold': {'datatype': 'func', 'suffix': 'bold'}, 80 | 'sbref': {'datatype': 'func', 'suffix': 'sbref'}, 81 | 'flair': {'datatype': 'anat', 'suffix': 'FLAIR'}, 82 | 't2w': {'datatype': 'anat', 'suffix': 'T2w'}, 83 | 't1w': {'datatype': 'anat', 'suffix': 'T1w'}, 84 | 'roi': {'datatype': 'anat', 'suffix': 'roi'}, 85 | } 86 | # old bids filter 87 | # BIDS_FILTERS = {"t1w": {"reconstruction": None, "acquisition": None}, "t2w": { 88 | # "reconstruction": None, "acquisition": None}, "bold": {}} 89 | 90 | 91 | def load_bidsignore(bids_root): 92 | """Load .bidsignore file from a BIDS dataset, returns list of regexps""" 93 | bids_ignore_path = os.path.join(bids_root, ".bidsignore") 94 | if os.path.exists(bids_ignore_path): 95 | with open(bids_ignore_path) as f: 96 | bids_ignores = f.read().splitlines() 97 | return tuple( 98 | [ 99 | re.compile(fnmatch.translate(bi)) 100 | for bi in bids_ignores 101 | if len(bi) and bi.strip()[0] != "#" 102 | ] 103 | ) 104 | return tuple() 105 | 106 | 107 | def write_job_footer(fd, jobname, bids_path, fmriprep_workdir, derivatives_name, output_path, resource_monitor=False): 108 | fd.write("fmriprep_exitcode=$?\n") 109 | dataset_name = os.path.basename(bids_path) 110 | local_derivative_dir = os.path.join( 111 | "$SLURM_TMPDIR", dataset_name, "derivatives", derivatives_name) 112 | fd.write( 113 | f"if [ $fmriprep_exitcode -ne 0 ] ; then rsync -rltv --info=progress2 {fmriprep_workdir} {output_path}/{jobname}.workdir ; fi \n" 114 | ) 115 | if resource_monitor: 116 | fd.write( 117 | f"if [ $fmriprep_exitcode -eq 0 ] ; then rsync -rltv --info=progress2 {fmriprep_workdir}/fmriprep_wf/resource_monitor.json {output_path}/{jobname}_resource_monitor.json ; fi \n" 118 | ) 119 | fd.write( 120 | f"if [ $fmriprep_exitcode -eq 0 ] ; then mkdir -p {output_path}/{derivatives_name} ; fi \n" 121 | ) 122 | fd.write( 123 | f"if [ $fmriprep_exitcode -eq 0 ] ; then rsync -rltv --info=progress2 {local_derivative_dir}/* {output_path}/{derivatives_name}/ ; fi \n" 124 | ) 125 | fd.write("exit $fmriprep_exitcode \n") 126 | 127 | 128 | def write_fmriprep_job(layout, subject, args, anat_only=True): 129 | job_specs = dict( 130 | original_output=args.output_path, 131 | slurm_account=args.slurm_account, 132 | jobname=f"smriprep_sub-{subject}", 133 | email=args.email, 134 | bids_root=os.path.realpath(args.bids_path), 135 | bids_basename=os.path.basename(os.path.realpath(args.bids_path)), 136 | participant=f"sub-{subject}", 137 | ) 138 | job_specs.update(SMRIPREP_REQ) 139 | if args.time: 140 | job_specs.update({"time": args.time, }) 141 | if args.mem_per_cpu: 142 | job_specs.update({"mem_per_cpu": int(args.mem_per_cpu), }) 143 | if args.cpus: 144 | job_specs.update({"cpus": int(args.cpus), }) 145 | 146 | job_path = os.path.join(APPTAINER_OUTPUT_PATH, 147 | SLURM_JOB_DIR, f"{job_specs['jobname']}.sh") 148 | 149 | derivatives_path = os.path.join( 150 | layout.root, 151 | "derivatives", 152 | args.derivatives_name 153 | ) 154 | 155 | # use json load/dump to copy filters (and validate json in the meantime) 156 | bids_filters_path = os.path.join( 157 | APPTAINER_OUTPUT_PATH, 158 | "bids_filters.json") 159 | # checking if bids_filter path provided by user is valid, if not default bids_filter is used 160 | if args.bids_filter: 161 | if os.path.exists(args.bids_filter): 162 | bids_filters = json.load(open(args.bids_filter)) 163 | else: 164 | bids_filters = BIDS_FILTERS 165 | with open(bids_filters_path, "w") as f: 166 | json.dump(bids_filters, f) 167 | 168 | fmriprep_APPTAINER_path = os.path.join( 169 | FMRIPREP_DEFAULT_APPTAINER_FOLDER, args.container + ".sif") 170 | sing_fmriprep_workdir = os.path.join( 171 | APPTAINER_DATA_PATH, "fmriprep_work") 172 | resource_monitor = True if "--resource-monitor" in args.fmriprep_args else False 173 | 174 | with open(job_path, "w") as f: 175 | f.write(slurm_preamble.format(**job_specs)) 176 | f.write( 177 | " ".join( 178 | [ 179 | APPTAINER_CMD_BASE.format( 180 | original_output=args.output_path, APPTAINER_OUTPUT_PATH=APPTAINER_OUTPUT_PATH), 181 | fmriprep_APPTAINER_path, 182 | f"-w {sing_fmriprep_workdir}", 183 | f"--participant-label {subject}", 184 | "--anat-only" if anat_only else "", 185 | f" --bids-filter-file {bids_filters_path}", 186 | " ".join(args.fmriprep_args), 187 | " --output-spaces", 188 | *args.output_spaces, 189 | "--output-layout bids", 190 | "--notrack", 191 | "--skip_bids_validation", 192 | "--write-graph", 193 | f"--omp-nthreads {job_specs['omp_nthreads']}", 194 | f"--nprocs {job_specs['cpus']}", 195 | "--random-seed 0", 196 | layout.root, 197 | derivatives_path, 198 | "participant", 199 | "\n", 200 | ] 201 | ) 202 | ) 203 | fmriprep_workdir = os.path.join("$SLURM_TMPDIR", "fmriprep_work") 204 | write_job_footer(f, job_specs["jobname"], os.path.realpath( 205 | args.bids_path), fmriprep_workdir, args.derivatives_name, args.output_path, resource_monitor) 206 | return job_path 207 | 208 | 209 | def write_func_job(layout, subject, session, args): 210 | outputs_exist = False 211 | study = os.path.basename(layout.root) 212 | anat_path = os.path.join( 213 | APPTAINER_DATA_PATH, 214 | "anat", 215 | "derivatives", 216 | ) 217 | derivatives_path = os.path.join( 218 | layout.root, 219 | "derivatives", 220 | args.derivatives_name) 221 | 222 | bold_runs = layout.get( 223 | subject=subject, session=session, extension=[".nii", ".nii.gz"], suffix="bold" 224 | ) 225 | 226 | bold_derivatives = [] 227 | for bold_run in bold_runs: 228 | entities = bold_run.entities 229 | entities = [ 230 | (ent, entities[ent]) 231 | for ent in ["subject", "session", "task", "run"] 232 | if ent in entities 233 | ] 234 | preproc_entities = entities + [ 235 | ("space", args.output_spaces[0]), 236 | ("desc", "preproc"), 237 | ] 238 | dtseries_entities = entities + [("space", "fsLR"), ("den", "91k")] 239 | func_path = os.path.join( 240 | derivatives_path, 241 | "fmriprep", 242 | f"sub-{subject}", 243 | f"ses-{session}", 244 | "func", 245 | ) 246 | preproc_path = os.path.join( 247 | func_path, 248 | "_".join( 249 | [ 250 | "%s-%s" % (k[:3] if k in ["subject", "session"] else k, v) 251 | for k, v in preproc_entities 252 | ] 253 | ) 254 | + "_bold.nii.gz", 255 | ) 256 | dtseries_path = os.path.join( 257 | func_path, 258 | "_".join( 259 | [ 260 | "%s-%s" % (k[:3] if k in ["subject", "session"] else k, v) 261 | for k, v in dtseries_entities 262 | ] 263 | ) 264 | + "_bold.dtseries.nii", 265 | ) 266 | # test if file or symlink (even broken if git-annex and not pulled) 267 | bold_deriv = os.path.lexists( 268 | preproc_path) and os.path.lexists(dtseries_path) 269 | if bold_deriv: 270 | print( 271 | f"found existing derivatives for {bold_run.path} : {preproc_path}, {dtseries_path}" 272 | ) 273 | bold_derivatives.append(bold_deriv) 274 | outputs_exist = all(bold_derivatives) 275 | # n_runs = len(bold_runs) 276 | # run_shapes = [run.get_image().shape for run in bold_runs] 277 | # run_lengths = [rs[-1] for rs in run_shapes] 278 | bids_basename=os.path.basename(os.path.realpath(args.bids_path)) 279 | job_specs = dict( 280 | original_output=args.output_path, 281 | slurm_account=args.slurm_account, 282 | jobname=f"fmriprep_study-{study}_sub-{subject}_ses-{session}", 283 | email=args.email, 284 | bids_root=os.path.realpath(args.bids_path), 285 | bids_basename=bids_basename, 286 | participant=f"sub-{subject}", 287 | fmriprep_path=os.path.join(os.path.realpath(args.output_path), args.derivatives_name), 288 | freesurfer_path=os.path.join(os.path.realpath(args.output_path), args.derivatives_name, "sourcedata", "freesurfer"), 289 | ) 290 | job_specs.update(FMRIPREP_REQ) 291 | if args.time: 292 | job_specs.update({"time": args.time, }) 293 | if args.mem_per_cpu: 294 | job_specs.update({"mem_per_cpu": int(args.mem_per_cpu), }) 295 | if args.cpus: 296 | job_specs.update({"cpus": int(args.cpus), }) 297 | 298 | job_path = os.path.join(APPTAINER_OUTPUT_PATH, 299 | SLURM_JOB_DIR, f"{job_specs['jobname']}.sh") 300 | bids_filters_path = os.path.join( 301 | APPTAINER_OUTPUT_PATH, 302 | f"{job_specs['jobname']}_bids_filters.json" 303 | ) 304 | 305 | # checking if bids_filter path provided by user is valid, if not default bids_filter is used 306 | if args.bids_filter: 307 | if os.path.exists(args.bids_filter): 308 | bids_filters = json.load(open(args.bids_filter)) 309 | else: 310 | bids_filters = BIDS_FILTERS 311 | # filter for session 312 | bids_filters["bold"].update({"session": session}) 313 | with open(bids_filters_path, "w") as f: 314 | json.dump(bids_filters, f) 315 | 316 | fmriprep_APPTAINER_path = os.path.join( 317 | FMRIPREP_DEFAULT_APPTAINER_FOLDER, args.container + ".sif") 318 | sing_fmriprep_workdir = os.path.join( 319 | APPTAINER_DATA_PATH, "fmriprep_work") 320 | resource_monitor = True if "--resource-monitor" in args.fmriprep_args else False 321 | with open(job_path, "w") as f: 322 | f.write(slurm_preamble.format(**job_specs)) 323 | f.write(copy_anat.format(**job_specs)) 324 | f.write( 325 | " ".join( 326 | [ 327 | APPTAINER_CMD_BASE.format( 328 | original_output=args.output_path, APPTAINER_OUTPUT_PATH=APPTAINER_OUTPUT_PATH), 329 | fmriprep_APPTAINER_path, 330 | f"-w {sing_fmriprep_workdir}", 331 | f"--participant-label {subject}", 332 | f"--anat-derivatives {anat_path}/fmriprep", 333 | f"--fs-subjects-dir {anat_path}/freesurfer", 334 | f" --bids-filter-file {bids_filters_path}", 335 | " --ignore slicetiming", 336 | "--use-syn-sdc", 337 | " ".join(args.fmriprep_args), 338 | "--output-spaces", 339 | *args.output_spaces, 340 | "--output-layout bids", 341 | "--notrack", 342 | "--write-graph", 343 | "--skip_bids_validation", 344 | f"--omp-nthreads {job_specs['omp_nthreads']}", 345 | f"--nprocs {job_specs['cpus']}", 346 | "--random-seed 0", 347 | layout.root, 348 | derivatives_path, 349 | "participant", 350 | "\n", 351 | ] 352 | ) 353 | ) 354 | fmriprep_workdir = os.path.join("$SLURM_TMPDIR", "fmriprep_work") 355 | write_job_footer(f, job_specs["jobname"], os.path.realpath( 356 | args.bids_path), fmriprep_workdir, args.derivatives_name, args.output_path, resource_monitor) 357 | 358 | return job_path, outputs_exist 359 | 360 | 361 | def submit_slurm_job(job_path): 362 | return subprocess.run(["sbatch", job_path]) 363 | 364 | 365 | def run_fmriprep(layout, args): 366 | 367 | subjects = args.participant_label 368 | if not subjects: 369 | subjects = layout.get_subjects() 370 | 371 | for subject in subjects: 372 | print(f"\t {subject}") 373 | if args.session_label: 374 | sessions = args.session_label 375 | else: 376 | sessions = layout.get_sessions(subject=subject) 377 | 378 | if args.preproc == "func": 379 | for session in sessions: 380 | job_path, outputs_exist = write_func_job( 381 | layout, subject, session, args) 382 | if outputs_exist: 383 | print( 384 | f"all output already exists for sub-{subject} ses-{session}, not rerunning" 385 | ) 386 | continue 387 | yield job_path 388 | elif args.preproc == "anat": 389 | yield write_fmriprep_job(layout, subject, args, anat_only=True) 390 | elif args.preproc == "all": 391 | yield write_fmriprep_job(layout, subject, args, anat_only=False) 392 | 393 | 394 | def parse_args(): 395 | parser = argparse.ArgumentParser( 396 | formatter_class=argparse.RawTextHelpFormatter, 397 | description="create fmriprep jobs scripts", 398 | ) 399 | parser.add_argument( 400 | "bids_path", 401 | help="BIDS folder to run fmriprep on." 402 | ) 403 | parser.add_argument( 404 | "derivatives_name", 405 | help="name of the output folder in derivatives.", 406 | ) 407 | parser.add_argument( 408 | "--output-path", 409 | action="store", 410 | type=str, 411 | help="output path for SLURM files, logs and also bids filters" 412 | ) 413 | parser.add_argument( 414 | "--preproc", 415 | action="store", 416 | default=PREPROC_DEFAULT, 417 | help="anat, func or all (default: all)" 418 | ) 419 | parser.add_argument( 420 | "--slurm-account", 421 | action="store", 422 | default=SLURM_ACCOUNT_DEFAULT, 423 | help="SLURM account for job submission (default: rrg-pbellec)", 424 | ) 425 | parser.add_argument( 426 | "--email", 427 | action="store", 428 | help="email for SLURM notifications" 429 | ) 430 | parser.add_argument( 431 | "--container", 432 | action="store", 433 | default=FMRIPREP_DEFAULT_VERSION, 434 | help="name of the fmriprep APPTAINER container under the default container location (default: fmriprep-20.2.1lts)" 435 | ) 436 | parser.add_argument( 437 | "--participant-label", 438 | action="store", 439 | nargs="+", 440 | help="a space delimited list of participant identifiers or a single " 441 | "identifier (the sub- prefix can be removed)", 442 | ) 443 | parser.add_argument( 444 | "--output-spaces", 445 | action="store", 446 | nargs="+", 447 | default=OUTPUT_SPACES_DEFAULT, 448 | help="a space delimited list of templates as defined by templateflow " 449 | "(default: [\"MNI152NLin2009cAsym\", \"MNI152NLin6Asym\"])", 450 | ) 451 | parser.add_argument( 452 | "--fmriprep-args", 453 | action="store", 454 | type=str, 455 | nargs='+', 456 | default="", 457 | help="additionnal arguments to the fmriprep command as a string (ex: --fmriprep-args=\"--fs-no-reconall --use-aroma\") ", 458 | ) 459 | parser.add_argument( 460 | "--session-label", 461 | action="store", 462 | nargs="+", 463 | help="a space delimited list of session identifiers or a single " 464 | "identifier (the ses- prefix can be removed)", 465 | ) 466 | parser.add_argument( 467 | "--force-reindex", 468 | action="store_true", 469 | help="Force pyBIDS reset_database and reindexing", 470 | ) 471 | parser.add_argument( 472 | "--submit", 473 | action="store_true", 474 | help="Submit SLURM jobs", 475 | ) 476 | parser.add_argument( 477 | "--bids-filter", 478 | action="store", 479 | help="Path to an optionnal bids_filter.json template", 480 | ) 481 | parser.add_argument( 482 | "--time", 483 | action="store", 484 | help="Time duration for the slurm job in slurm format (dd-)hh:mm:ss " 485 | "(default: 24h)", 486 | ) 487 | parser.add_argument( 488 | "--mem-per-cpu", 489 | action="store", 490 | help="upper bound memory limit for fMRIPrep processes" 491 | "(default: 16384MB)", 492 | ) 493 | parser.add_argument( 494 | "--cpus", 495 | action="store", 496 | help="maximum number of cpus for all processes" 497 | "(default: 1)", 498 | ) 499 | 500 | return parser.parse_args() 501 | 502 | 503 | def main(): 504 | 505 | args = parse_args() 506 | print("\n### Running fmriprep-slurm\n") 507 | print(vars(args)) 508 | 509 | print("\n# Loading pyBIDS database (it might take few hours for a big dataset)...\n") 510 | sing_bids_path = os.path.join( 511 | APPTAINER_DATA_PATH, os.path.basename(args.bids_path)) 512 | layout = bids.BIDSLayout( 513 | sing_bids_path, 514 | reset_database=args.force_reindex, 515 | ignore=( 516 | "code", 517 | "stimuli", 518 | "sourcedata", 519 | "models", 520 | re.compile(r"^\."), 521 | ) 522 | + load_bidsignore(sing_bids_path), 523 | ) 524 | job_path = os.path.join(APPTAINER_OUTPUT_PATH, SLURM_JOB_DIR) 525 | if not os.path.exists(job_path): 526 | os.mkdir(job_path) 527 | 528 | print("\n# Prefectch templateflow templates ...\n") 529 | # prefectch templateflow templates 530 | os.environ["TEMPLATEFLOW_HOME"] = TEMPLATEFLOW_HOME 531 | tf_api.get(args.output_spaces + ["OASIS30ANTs", "fsLR", "fsaverage"]) 532 | 533 | print("\n# Processing slurm files into {}\n".format( 534 | os.path.join(args.output_path, SLURM_JOB_DIR))) 535 | for job_file in run_fmriprep(layout, args): 536 | if args.submit: 537 | submit_slurm_job(job_file) 538 | 539 | 540 | if __name__ == "__main__": 541 | main() 542 | --------------------------------------------------------------------------------