├── requirements.txt
├── datalad_run.bash
├── LICENSE
├── singularity_run.bash
├── TODO
├── README.md
├── .gitignore
└── fmriprep-slurm
    └── main.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | pybids==0.11.1
2 | templateflow==0.6.0


--------------------------------------------------------------------------------
/datalad_run.bash:
--------------------------------------------------------------------------------
 1 | ### datalad container version
 2 | 
 3 | # add the container to datalad
 4 | CONTAINER_VERSION=20.2.1lts
 5 | datalad containers-add --url docker://poldracklab/fmriprep:${CONTAINER_VERSION} -i ./derivatives/fmriprep fmriprep-${CONTAINER_VERSION}
 6 | 
 7 | PROJECT_PATH="/lustre03/project/6003287"
 8 | DATASET_PATH=$1
 9 | DATASET_FOLDER="${DATASET_PATH%/*}"
10 | PYTHON_CMD="python /fmriprep-slurm/fmriprep-slurm/main.py "$@
11 | 
12 | echo "Running fmriprep-slurm "$@
13 | export SINGULARITYENV_TEMPLATEFLOW_HOME=/templateflow
14 | module load singularity/3.6
15 | 
16 | echo $PYTHON_CMD | xargs singularity exec -B $PROJECT_PATH/fmriprep-slurm:/fmriprep-slurm \
17 |   -B $DATASET_FOLDER:/DATA -B /etc/pki:/etc/pki \
18 |   -B /home/$USER/.cache/templateflow:/templateflow \
19 |   $PROJECT_PATH/derivatives/fmriprep/fmriprep-${CONTAINER_VERSION}
20 | 
21 | # now you can use it
22 | datalad containers-run --name fmriprep-${CONTAINER_VERSION}
23 | 
24 | ### datalad run version
25 | datalad run ${PROJECT_PATH}/fmriprep-slurm/singularity_run.bash $@


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 SIMEXP
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/singularity_run.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -- "${1%/}" "${@:2}" # trick to remove last path char from first arg (dataset path)
 4 | PROJECT_PATH="/project/rrg-pbellec"
 5 | DATASET_PATH=$1
 6 | DATASET_FOLDER="${DATASET_PATH%/*}"
 7 | DATASET_NAME=${DATASET_PATH##*/}
 8 | OUTPUT_DIR=$SCRATCH/$DATASET_NAME/$(date +%s)
 9 | TEMPLATEFLOW_DIR=/home/$USER/.cache/templateflow
10 | PYTHON_CMD="python /fmriprep-slurm/fmriprep-slurm/main.py "$@" --output-path "$OUTPUT_DIR
11 | 
12 | echo "###"
13 | echo "singularity_run.bash "$@
14 | export APPTAINERENV_TEMPLATEFLOW_HOME=/templateflow
15 | module load apptainer/1.3.5
16 | 
17 | echo "Creating templateflow directory in "$TEMPLATEFLOW_DIR
18 | mkdir -p $TEMPLATEFLOW_DIR
19 | 
20 | echo "Running fmriprep-slurm inside apptainer with the following command"
21 | echo $PYTHON_CMD
22 | mkdir -p $OUTPUT_DIR
23 | echo $PYTHON_CMD | xargs apptainer exec -B $PROJECT_PATH/fmriprep-slurm:/fmriprep-slurm \
24 |   -B $DATASET_FOLDER:/DATA \
25 |   -B /etc/pki:/etc/pki \
26 |   -B $TEMPLATEFLOW_DIR:/templateflow \
27 |   -B $OUTPUT_DIR:/OUTPUT \
28 |   $PROJECT_PATH/containers/fmriprep-20.2.8.sif
29 | 
30 | echo ""
31 | echo "Finished!"
32 | echo ""
33 | echo "Please review your slurm scripts in "$OUTPUT_DIR" before submitting them with:"
34 | echo "find "$OUTPUT_DIR"/.slurm/smriprep_sub-*.sh -type f | while read file; do sbatch \"\$file\"; done"
35 | echo "###"
36 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
 1 | * fmriprep with datalad: https://handbook.datalad.org/en/latest/beyond_basics/101-171-enki.html
 2 | pros:
 3 | - capture preproc output provenance
 4 | cons:
 5 | - Manage a lot (lot) of branch within the beluga filesystem is suicidal
 6 | - Much much more complex
 7 | - Accessing preproc output for qc not obvious (need to switch branch, because qc is done before merging)
 8 | `datalad-run-container` is not sufficient, we need `datalad-run slurm_script` so it can be re-run.
 9 | 
10 | Instead, we don't create branch, we generate the scripts with `datalad run fmriprep-slurm`, preproc each output with for each subject , do the qc and `datalad save` them at the end  (directly on master branch)
11 | pros:
12 | - no need to manage multiple branches
13 | - Less complex, git log cleaner
14 | - qc can be done normally (each output exists on the same branch, no checkout needed)
15 | cons:
16 | - preproc output not captured, (but anyway the advantage of capture would be able to re-run them which is not possible even with the previous solution)
17 | output is captured independently through the slurm scripts (which are run with `datalad run fmriprep-slurm ...`)
18 | 
19 | * if folder .datalad exists, then we should copy (to SSD) just the actual subject, else, we can hard-copy as now.
20 | * better management of anat vs func data
21 | * submit a sample script (one subject) to have an heuristic of the total pre-processing time, and hardware requirements. (--resource-monitor)
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # fmriprep-slurm
 2 | Generate and run [fMRIPrep](https://fmriprep.org/en/stable/) [SLURM](https://slurm.schedmd.com/documentation.html) jobs on HPCs.
 3 | 
 4 | It is carefully designed and optimized to run the preprocessing on a [BIDS](https://bids-specification.readthedocs.io/en/stable/) dataset.
 5 | It has also the advantage to prepare the dataset by checking the integrity, and caching some of the BIDS database artifacts.
 6 | 
 7 | 
 8 | Originally from https://github.com/courtois-neuromod/ds_prep/blob/master/derivatives/fmriprep/fmriprep.py
 9 | 
10 | ## Usage
11 | 
12 | ### Arguments
13 | ```
14 | positional arguments:  
15 |   bids_path             BIDS folder to run fmriprep on.  
16 | 
17 |   derivatives_name      name of the output folder in derivatives.  
18 | 
19 | optional arguments:  
20 |   -h, --help            show this help message and exit
21 | 
22 |   --preproc PREPROC     anat, func or all (default: all)
23 | 
24 |   --slurm-account SLURM_ACCOUNT
25 |                         SLURM account for job submission (default: rrg-pbellec)
26 | 
27 |   --email EMAIL         email for SLURM notifications
28 | 
29 |   --container CONTAINER
30 |                         name of the fmriprep singularity container under the default container location (default: fmriprep-20.2.1lts)
31 | 
32 |   --participant-label PARTICIPANT_LABEL [PARTICIPANT_LABEL ...]
33 |                         a space delimited list of participant identifiers or a single identifier (the sub- prefix can be removed)
34 | 
35 |   --output-spaces OUTPUT_SPACES [OUTPUT_SPACES ...]
36 |                         a space delimited list of templates as defined by templateflow (default: ["MNI152NLin2009cAsym, MNI152NLin6Asym"])
37 | 
38 |   --fmriprep-args FMRIPREP_ARGS
39 |                         additionnal arguments to the fmriprep command as a string (ex: --fmriprep-args="--fs-no-reconall --use-aroma")
40 | 
41 |   --session-label SESSION_LABEL [SESSION_LABEL ...]
42 |                         a space delimited list of session identifiers or a single identifier (the ses- prefix can be removed)
43 | 
44 |   --force-reindex       Force pyBIDS reset_database and reindexing
45 | 
46 |   --submit              Submit SLURM jobs
47 | 
48 |   --bids-filter BIDS_FILTER
49 |                         Path to an optionnal bids_filter.json template
50 | 
51 |   --time TIME           Time duration for the slurm job in slurm format (dd-)hh:mm:ss (default: 24h structural, 12h functionnal)
52 | 
53 |   --mem-per-cpu MEM_PER_CPU
54 |                         upper bound memory limit for fMRIPrep processes(default: 4096MB)
55 |                         
56 |   --cpus CPUS           maximum number of cpus for all processes(default: 16) 
57 | ```
58 | Templateflow valid identifiers can be found at https://github.com/templateflow/templateflow
59 | 
60 | ## Default fmriprep command
61 | 
62 | By default, we use the following fMRIPrep arguments:
63 | 
64 | `--participant-label` dependning on the user argument choice.
65 | 
66 | `--bids-database-dir` which takes as an input the cached from pybids.
67 | 
68 | `--bids-filter-file` dependning on the user argument choice.
69 | 
70 | `--notrack` since beluga compute nodes does not have access to internet, and reduce computation burden.
71 | 
72 | `--skip_bids_validation` since it was already done inside `fmriprep-slurm`.
73 | 
74 | `--write-graph` for debugging.
75 | 
76 | `--omp-nthreads`, `--nprocs` and `--mem_mb` dependning on the user argument choice.
77 | 
78 | `--resource-monitor` for debugging.
79 | 
80 | For more information on each of these option, you can check the [documentation](https://simexp-documentation.readthedocs.io/en/latest/giga_preprocessing/preprocessing.html).
81 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .vscode
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | *.py,cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | cover/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | db.sqlite3-journal
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | .pybuilder/
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | #   For a library or package, you might want to ignore these files since the code is
 89 | #   intended to run in multiple environments; otherwise, check them in:
 90 | # .python-version
 91 | 
 92 | # pipenv
 93 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 94 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 95 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 96 | #   install all needed dependencies.
 97 | #Pipfile.lock
 98 | 
 99 | # poetry
100 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
101 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
102 | #   commonly ignored for libraries.
103 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
104 | #poetry.lock
105 | 
106 | # pdm
107 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
108 | #pdm.lock
109 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
110 | #   in version control.
111 | #   https://pdm.fming.dev/#use-with-ide
112 | .pdm.toml
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 


--------------------------------------------------------------------------------
/fmriprep-slurm/main.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | import os
  4 | import sys
  5 | import argparse
  6 | import bids
  7 | import subprocess
  8 | import json
  9 | import re
 10 | import fnmatch
 11 | import templateflow.api as tf_api
 12 | 
 13 | SCRIPT_DIR = os.path.dirname(__file__)
 14 | 
 15 | SLURM_JOB_DIR = ".slurm"
 16 | 
 17 | SMRIPREP_REQ = {"cpus": 1, "mem_per_cpu": 16384,
 18 |                 "time": "24:00:00", "omp_nthreads": 1}
 19 | FMRIPREP_REQ = {"cpus": 1, "mem_per_cpu": 16384,
 20 |                 "time": "24:00:00", "omp_nthreads": 1}
 21 | 
 22 | APPTAINER_DATA_PATH = "/DATA"
 23 | APPTAINER_OUTPUT_PATH = "/OUTPUT"
 24 | FMRIPREP_DEFAULT_VERSION = "fmriprep-20.2.8lts"
 25 | FMRIPREP_DEFAULT_APPTAINER_FOLDER = "/project/rrg-pbellec/hwang1/containers"
 26 | OUTPUT_SPACES_DEFAULT = ["MNI152NLin2009cAsym", "MNI152NLin6Asym"]
 27 | SLURM_ACCOUNT_DEFAULT = "rrg-pbellec"
 28 | PREPROC_DEFAULT = "all"
 29 | TEMPLATEFLOW_HOME = os.path.join(os.path.join(
 30 |     os.environ["HOME"], ".cache"), "templateflow",)
 31 | APPTAINER_CMD_BASE = " ".join(
 32 |     [
 33 |         "apptainer run",
 34 |         "--cleanenv",
 35 |         f"-B $SLURM_TMPDIR:{APPTAINER_DATA_PATH}",
 36 |         f"-B {TEMPLATEFLOW_HOME}:/templateflow",
 37 |         "-B /etc/pki:/etc/pki/",
 38 |         "-B {original_output}:{APPTAINER_OUTPUT_PATH}"
 39 |     ]
 40 | )
 41 | 
 42 | slurm_preamble = """#!/bin/bash
 43 | #SBATCH --account={slurm_account}
 44 | #SBATCH --job-name={jobname}.job
 45 | #SBATCH --output={original_output}/{jobname}.out
 46 | #SBATCH --error={original_output}/{jobname}.err
 47 | #SBATCH --time={time}
 48 | #SBATCH --cpus-per-task={cpus}
 49 | #SBATCH --mem-per-cpu={mem_per_cpu}M
 50 | #SBATCH --mail-user={email}
 51 | #SBATCH --mail-type=BEGIN
 52 | #SBATCH --mail-type=END
 53 | #SBATCH --mail-type=FAIL
 54 | 
 55 | export APPTAINERENV_FS_LICENSE=$HOME/.freesurfer.txt
 56 | export APPTAINERENV_TEMPLATEFLOW_HOME=/templateflow
 57 | 
 58 | module load apptainer/1.3.5
 59 | 
 60 | #copying input dataset into local scratch space
 61 | rsync -rltv --info=progress2 --exclude="sub*" --exclude="derivatives" {bids_root} $SLURM_TMPDIR
 62 | rsync -rltv --info=progress2 {bids_root}/{participant} $SLURM_TMPDIR/{bids_basename}
 63 | 
 64 | """
 65 | 
 66 | copy_anat = """
 67 | mkdir -p $SLURM_TMPDIR/anat/derivatives/fmriprep
 68 | mkdir -p $SLURM_TMPDIR/anat/derivatives/freesurfer
 69 | 
 70 | rsync -rltv --info=progress2 {fmriprep_path}/{participant} $SLURM_TMPDIR/anat/derivatives/fmriprep
 71 | rsync -rltv --info=progress2 {freesurfer_path}/{participant} $SLURM_TMPDIR/anat/derivatives/freesurfer
 72 | rsync -rltv --info=progress2 {freesurfer_path}/fsaverage $SLURM_TMPDIR/anat/derivatives/freesurfer
 73 | 
 74 | """
 75 | 
 76 | #default fmriprep bids filter
 77 | BIDS_FILTERS = {
 78 |   'fmap': {'datatype': 'fmap'},
 79 |   'bold': {'datatype': 'func', 'suffix': 'bold'},
 80 |   'sbref': {'datatype': 'func', 'suffix': 'sbref'},
 81 |   'flair': {'datatype': 'anat', 'suffix': 'FLAIR'},
 82 |   't2w': {'datatype': 'anat', 'suffix': 'T2w'},
 83 |   't1w': {'datatype': 'anat', 'suffix': 'T1w'},
 84 |   'roi': {'datatype': 'anat', 'suffix': 'roi'},
 85 | }
 86 | # old bids filter
 87 | # BIDS_FILTERS = {"t1w": {"reconstruction": None, "acquisition": None}, "t2w": {
 88 | #     "reconstruction": None, "acquisition": None}, "bold": {}}
 89 | 
 90 | 
 91 | def load_bidsignore(bids_root):
 92 |     """Load .bidsignore file from a BIDS dataset, returns list of regexps"""
 93 |     bids_ignore_path = os.path.join(bids_root, ".bidsignore")
 94 |     if os.path.exists(bids_ignore_path):
 95 |         with open(bids_ignore_path) as f:
 96 |             bids_ignores = f.read().splitlines()
 97 |         return tuple(
 98 |             [
 99 |                 re.compile(fnmatch.translate(bi))
100 |                 for bi in bids_ignores
101 |                 if len(bi) and bi.strip()[0] != "#"
102 |             ]
103 |         )
104 |     return tuple()
105 | 
106 | 
107 | def write_job_footer(fd, jobname, bids_path, fmriprep_workdir, derivatives_name, output_path, resource_monitor=False):
108 |     fd.write("fmriprep_exitcode=$?\n")
109 |     dataset_name = os.path.basename(bids_path)
110 |     local_derivative_dir = os.path.join(
111 |         "$SLURM_TMPDIR", dataset_name, "derivatives", derivatives_name)
112 |     fd.write(
113 |         f"if [ $fmriprep_exitcode -ne 0 ] ; then rsync -rltv --info=progress2 {fmriprep_workdir} {output_path}/{jobname}.workdir ; fi \n"
114 |     )
115 |     if resource_monitor:
116 |         fd.write(
117 |             f"if [ $fmriprep_exitcode -eq 0 ] ; then rsync -rltv --info=progress2 {fmriprep_workdir}/fmriprep_wf/resource_monitor.json {output_path}/{jobname}_resource_monitor.json ; fi \n"
118 |         )
119 |     fd.write(
120 |         f"if [ $fmriprep_exitcode -eq 0 ] ; then mkdir -p {output_path}/{derivatives_name} ; fi \n"
121 |     )
122 |     fd.write(
123 |         f"if [ $fmriprep_exitcode -eq 0 ] ; then rsync -rltv --info=progress2 {local_derivative_dir}/* {output_path}/{derivatives_name}/ ; fi \n"
124 |     )
125 |     fd.write("exit $fmriprep_exitcode \n")
126 | 
127 | 
128 | def write_fmriprep_job(layout, subject, args, anat_only=True):
129 |     job_specs = dict(
130 |         original_output=args.output_path,
131 |         slurm_account=args.slurm_account,
132 |         jobname=f"smriprep_sub-{subject}",
133 |         email=args.email,
134 |         bids_root=os.path.realpath(args.bids_path),
135 |         bids_basename=os.path.basename(os.path.realpath(args.bids_path)),
136 |         participant=f"sub-{subject}",
137 |     )
138 |     job_specs.update(SMRIPREP_REQ)
139 |     if args.time:
140 |         job_specs.update({"time": args.time, })
141 |     if args.mem_per_cpu:
142 |         job_specs.update({"mem_per_cpu": int(args.mem_per_cpu), })
143 |     if args.cpus:
144 |         job_specs.update({"cpus": int(args.cpus), })
145 | 
146 |     job_path = os.path.join(APPTAINER_OUTPUT_PATH,
147 |                             SLURM_JOB_DIR, f"{job_specs['jobname']}.sh")
148 | 
149 |     derivatives_path = os.path.join(
150 |         layout.root,
151 |         "derivatives",
152 |         args.derivatives_name
153 |     )
154 | 
155 |     # use json load/dump to copy filters (and validate json in the meantime)
156 |     bids_filters_path = os.path.join(
157 |         APPTAINER_OUTPUT_PATH,
158 |         "bids_filters.json")
159 |     # checking if bids_filter path provided by user is valid, if not default bids_filter is used
160 |     if args.bids_filter:
161 |         if os.path.exists(args.bids_filter):
162 |             bids_filters = json.load(open(args.bids_filter))
163 |     else:
164 |         bids_filters = BIDS_FILTERS
165 |     with open(bids_filters_path, "w") as f:
166 |         json.dump(bids_filters, f)
167 | 
168 |     fmriprep_APPTAINER_path = os.path.join(
169 |         FMRIPREP_DEFAULT_APPTAINER_FOLDER, args.container + ".sif")
170 |     sing_fmriprep_workdir = os.path.join(
171 |         APPTAINER_DATA_PATH, "fmriprep_work")
172 |     resource_monitor = True if "--resource-monitor" in args.fmriprep_args else False
173 | 
174 |     with open(job_path, "w") as f:
175 |         f.write(slurm_preamble.format(**job_specs))
176 |         f.write(
177 |             " ".join(
178 |                 [
179 |                     APPTAINER_CMD_BASE.format(
180 |                         original_output=args.output_path, APPTAINER_OUTPUT_PATH=APPTAINER_OUTPUT_PATH),
181 |                     fmriprep_APPTAINER_path,
182 |                     f"-w {sing_fmriprep_workdir}",
183 |                     f"--participant-label {subject}",
184 |                     "--anat-only" if anat_only else "",
185 |                     f" --bids-filter-file {bids_filters_path}",
186 |                     " ".join(args.fmriprep_args),
187 |                     " --output-spaces",
188 |                     *args.output_spaces,
189 |                     "--output-layout bids",
190 |                     "--notrack",
191 |                     "--skip_bids_validation",
192 |                     "--write-graph",
193 |                     f"--omp-nthreads {job_specs['omp_nthreads']}",
194 |                     f"--nprocs {job_specs['cpus']}",
195 |                     "--random-seed 0",
196 |                     layout.root,
197 |                     derivatives_path,
198 |                     "participant",
199 |                     "\n",
200 |                 ]
201 |             )
202 |         )
203 |         fmriprep_workdir = os.path.join("$SLURM_TMPDIR", "fmriprep_work")
204 |         write_job_footer(f, job_specs["jobname"], os.path.realpath(
205 |             args.bids_path), fmriprep_workdir, args.derivatives_name, args.output_path, resource_monitor)
206 |     return job_path
207 | 
208 | 
209 | def write_func_job(layout, subject, session, args):
210 |     outputs_exist = False
211 |     study = os.path.basename(layout.root)
212 |     anat_path = os.path.join(
213 |         APPTAINER_DATA_PATH,
214 |         "anat",
215 |         "derivatives",
216 |     )
217 |     derivatives_path = os.path.join(
218 |         layout.root,
219 |         "derivatives",
220 |         args.derivatives_name)
221 | 
222 |     bold_runs = layout.get(
223 |         subject=subject, session=session, extension=[".nii", ".nii.gz"], suffix="bold"
224 |     )
225 | 
226 |     bold_derivatives = []
227 |     for bold_run in bold_runs:
228 |         entities = bold_run.entities
229 |         entities = [
230 |             (ent, entities[ent])
231 |             for ent in ["subject", "session", "task", "run"]
232 |             if ent in entities
233 |         ]
234 |         preproc_entities = entities + [
235 |             ("space", args.output_spaces[0]),
236 |             ("desc", "preproc"),
237 |         ]
238 |         dtseries_entities = entities + [("space", "fsLR"), ("den", "91k")]
239 |         func_path = os.path.join(
240 |             derivatives_path,
241 |             "fmriprep",
242 |             f"sub-{subject}",
243 |             f"ses-{session}",
244 |             "func",
245 |         )
246 |         preproc_path = os.path.join(
247 |             func_path,
248 |             "_".join(
249 |                 [
250 |                     "%s-%s" % (k[:3] if k in ["subject", "session"] else k, v)
251 |                     for k, v in preproc_entities
252 |                 ]
253 |             )
254 |             + "_bold.nii.gz",
255 |         )
256 |         dtseries_path = os.path.join(
257 |             func_path,
258 |             "_".join(
259 |                 [
260 |                     "%s-%s" % (k[:3] if k in ["subject", "session"] else k, v)
261 |                     for k, v in dtseries_entities
262 |                 ]
263 |             )
264 |             + "_bold.dtseries.nii",
265 |         )
266 |         # test if file or symlink (even broken if git-annex and not pulled)
267 |         bold_deriv = os.path.lexists(
268 |             preproc_path) and os.path.lexists(dtseries_path)
269 |         if bold_deriv:
270 |             print(
271 |                 f"found existing derivatives for {bold_run.path} : {preproc_path}, {dtseries_path}"
272 |             )
273 |         bold_derivatives.append(bold_deriv)
274 |     outputs_exist = all(bold_derivatives)
275 |     # n_runs = len(bold_runs)
276 |     # run_shapes = [run.get_image().shape for run in bold_runs]
277 |     # run_lengths = [rs[-1] for rs in run_shapes]
278 |     bids_basename=os.path.basename(os.path.realpath(args.bids_path))
279 |     job_specs = dict(
280 |         original_output=args.output_path,
281 |         slurm_account=args.slurm_account,
282 |         jobname=f"fmriprep_study-{study}_sub-{subject}_ses-{session}",
283 |         email=args.email,
284 |         bids_root=os.path.realpath(args.bids_path),
285 |         bids_basename=bids_basename,
286 |         participant=f"sub-{subject}",
287 |         fmriprep_path=os.path.join(os.path.realpath(args.output_path), args.derivatives_name),
288 |         freesurfer_path=os.path.join(os.path.realpath(args.output_path), args.derivatives_name, "sourcedata", "freesurfer"),
289 |     )
290 |     job_specs.update(FMRIPREP_REQ)
291 |     if args.time:
292 |         job_specs.update({"time": args.time, })
293 |     if args.mem_per_cpu:
294 |         job_specs.update({"mem_per_cpu": int(args.mem_per_cpu), })
295 |     if args.cpus:
296 |         job_specs.update({"cpus": int(args.cpus), })
297 | 
298 |     job_path = os.path.join(APPTAINER_OUTPUT_PATH,
299 |                             SLURM_JOB_DIR, f"{job_specs['jobname']}.sh")
300 |     bids_filters_path = os.path.join(
301 |         APPTAINER_OUTPUT_PATH,
302 |         f"{job_specs['jobname']}_bids_filters.json"
303 |     )
304 | 
305 |     # checking if bids_filter path provided by user is valid, if not default bids_filter is used
306 |     if args.bids_filter:
307 |         if os.path.exists(args.bids_filter):
308 |             bids_filters = json.load(open(args.bids_filter))
309 |     else:
310 |         bids_filters = BIDS_FILTERS
311 |     # filter for session
312 |     bids_filters["bold"].update({"session": session})
313 |     with open(bids_filters_path, "w") as f:
314 |         json.dump(bids_filters, f)
315 | 
316 |     fmriprep_APPTAINER_path = os.path.join(
317 |         FMRIPREP_DEFAULT_APPTAINER_FOLDER, args.container + ".sif")
318 |     sing_fmriprep_workdir = os.path.join(
319 |         APPTAINER_DATA_PATH, "fmriprep_work")
320 |     resource_monitor = True if "--resource-monitor" in args.fmriprep_args else False
321 |     with open(job_path, "w") as f:
322 |         f.write(slurm_preamble.format(**job_specs))
323 |         f.write(copy_anat.format(**job_specs))
324 |         f.write(
325 |             " ".join(
326 |                 [
327 |                     APPTAINER_CMD_BASE.format(
328 |                         original_output=args.output_path, APPTAINER_OUTPUT_PATH=APPTAINER_OUTPUT_PATH),
329 |                     fmriprep_APPTAINER_path,
330 |                     f"-w {sing_fmriprep_workdir}",
331 |                     f"--participant-label {subject}",
332 |                     f"--anat-derivatives {anat_path}/fmriprep",
333 |                     f"--fs-subjects-dir {anat_path}/freesurfer",
334 |                     f" --bids-filter-file {bids_filters_path}",
335 |                     " --ignore slicetiming",
336 |                     "--use-syn-sdc",
337 |                     " ".join(args.fmriprep_args),
338 |                     "--output-spaces",
339 |                     *args.output_spaces,
340 |                     "--output-layout bids",
341 |                     "--notrack",
342 |                     "--write-graph",
343 |                     "--skip_bids_validation",
344 |                     f"--omp-nthreads {job_specs['omp_nthreads']}",
345 |                     f"--nprocs {job_specs['cpus']}",
346 |                     "--random-seed 0",
347 |                     layout.root,
348 |                     derivatives_path,
349 |                     "participant",
350 |                     "\n",
351 |                 ]
352 |             )
353 |         )
354 |         fmriprep_workdir = os.path.join("$SLURM_TMPDIR", "fmriprep_work")
355 |         write_job_footer(f, job_specs["jobname"], os.path.realpath(
356 |             args.bids_path), fmriprep_workdir, args.derivatives_name, args.output_path, resource_monitor)
357 | 
358 |     return job_path, outputs_exist
359 | 
360 | 
361 | def submit_slurm_job(job_path):
362 |     return subprocess.run(["sbatch", job_path])
363 | 
364 | 
365 | def run_fmriprep(layout, args):
366 | 
367 |     subjects = args.participant_label
368 |     if not subjects:
369 |         subjects = layout.get_subjects()
370 | 
371 |     for subject in subjects:
372 |         print(f"\t {subject}")
373 |         if args.session_label:
374 |             sessions = args.session_label
375 |         else:
376 |             sessions = layout.get_sessions(subject=subject)
377 | 
378 |         if args.preproc == "func":
379 |             for session in sessions:
380 |                 job_path, outputs_exist = write_func_job(
381 |                     layout, subject, session, args)
382 |                 if outputs_exist:
383 |                     print(
384 |                         f"all output already exists for sub-{subject} ses-{session}, not rerunning"
385 |                     )
386 |                     continue
387 |                 yield job_path
388 |         elif args.preproc == "anat":
389 |             yield write_fmriprep_job(layout, subject, args, anat_only=True)
390 |         elif args.preproc == "all":
391 |             yield write_fmriprep_job(layout, subject, args, anat_only=False)
392 | 
393 | 
394 | def parse_args():
395 |     parser = argparse.ArgumentParser(
396 |         formatter_class=argparse.RawTextHelpFormatter,
397 |         description="create fmriprep jobs scripts",
398 |     )
399 |     parser.add_argument(
400 |         "bids_path",
401 |         help="BIDS folder to run fmriprep on."
402 |     )
403 |     parser.add_argument(
404 |         "derivatives_name",
405 |         help="name of the output folder in derivatives.",
406 |     )
407 |     parser.add_argument(
408 |         "--output-path",
409 |         action="store",
410 |         type=str,
411 |         help="output path for SLURM files, logs and also bids filters"
412 |     )
413 |     parser.add_argument(
414 |         "--preproc",
415 |         action="store",
416 |         default=PREPROC_DEFAULT,
417 |         help="anat, func or all (default: all)"
418 |     )
419 |     parser.add_argument(
420 |         "--slurm-account",
421 |         action="store",
422 |         default=SLURM_ACCOUNT_DEFAULT,
423 |         help="SLURM account for job submission (default: rrg-pbellec)",
424 |     )
425 |     parser.add_argument(
426 |         "--email",
427 |         action="store",
428 |         help="email for SLURM notifications"
429 |     )
430 |     parser.add_argument(
431 |         "--container",
432 |         action="store",
433 |         default=FMRIPREP_DEFAULT_VERSION,
434 |         help="name of the fmriprep APPTAINER container under the default container location (default: fmriprep-20.2.1lts)"
435 |     )
436 |     parser.add_argument(
437 |         "--participant-label",
438 |         action="store",
439 |         nargs="+",
440 |         help="a space delimited list of participant identifiers or a single "
441 |         "identifier (the sub- prefix can be removed)",
442 |     )
443 |     parser.add_argument(
444 |         "--output-spaces",
445 |         action="store",
446 |         nargs="+",
447 |         default=OUTPUT_SPACES_DEFAULT,
448 |         help="a space delimited list of templates as defined by templateflow "
449 |         "(default: [\"MNI152NLin2009cAsym\", \"MNI152NLin6Asym\"])",
450 |     )
451 |     parser.add_argument(
452 |         "--fmriprep-args",
453 |         action="store",
454 |         type=str,
455 |         nargs='+',
456 |         default="",
457 |         help="additionnal arguments to the fmriprep command as a string (ex: --fmriprep-args=\"--fs-no-reconall --use-aroma\") ",
458 |     )
459 |     parser.add_argument(
460 |         "--session-label",
461 |         action="store",
462 |         nargs="+",
463 |         help="a space delimited list of session identifiers or a single "
464 |         "identifier (the ses- prefix can be removed)",
465 |     )
466 |     parser.add_argument(
467 |         "--force-reindex",
468 |         action="store_true",
469 |         help="Force pyBIDS reset_database and reindexing",
470 |     )
471 |     parser.add_argument(
472 |         "--submit",
473 |         action="store_true",
474 |         help="Submit SLURM jobs",
475 |     )
476 |     parser.add_argument(
477 |         "--bids-filter",
478 |         action="store",
479 |         help="Path to an optionnal bids_filter.json template",
480 |     )
481 |     parser.add_argument(
482 |         "--time",
483 |         action="store",
484 |         help="Time duration for the slurm job in slurm format (dd-)hh:mm:ss "
485 |         "(default: 24h)",
486 |     )
487 |     parser.add_argument(
488 |         "--mem-per-cpu",
489 |         action="store",
490 |         help="upper bound memory limit for fMRIPrep processes"
491 |         "(default: 16384MB)",
492 |     )
493 |     parser.add_argument(
494 |         "--cpus",
495 |         action="store",
496 |         help="maximum number of cpus for all processes"
497 |         "(default: 1)",
498 |     )
499 | 
500 |     return parser.parse_args()
501 | 
502 | 
503 | def main():
504 | 
505 |     args = parse_args()
506 |     print("\n### Running fmriprep-slurm\n")
507 |     print(vars(args))
508 | 
509 |     print("\n# Loading pyBIDS database (it might take few hours for a big dataset)...\n")
510 |     sing_bids_path = os.path.join(
511 |         APPTAINER_DATA_PATH, os.path.basename(args.bids_path))
512 |     layout = bids.BIDSLayout(
513 |         sing_bids_path,
514 |         reset_database=args.force_reindex,
515 |         ignore=(
516 |             "code",
517 |             "stimuli",
518 |             "sourcedata",
519 |             "models",
520 |             re.compile(r"^\."),
521 |         )
522 |         + load_bidsignore(sing_bids_path),
523 |     )
524 |     job_path = os.path.join(APPTAINER_OUTPUT_PATH, SLURM_JOB_DIR)
525 |     if not os.path.exists(job_path):
526 |         os.mkdir(job_path)
527 | 
528 |     print("\n# Prefectch templateflow templates ...\n")
529 |     # prefectch templateflow templates
530 |     os.environ["TEMPLATEFLOW_HOME"] = TEMPLATEFLOW_HOME
531 |     tf_api.get(args.output_spaces + ["OASIS30ANTs", "fsLR", "fsaverage"])
532 | 
533 |     print("\n# Processing slurm files into {}\n".format(
534 |         os.path.join(args.output_path, SLURM_JOB_DIR)))
535 |     for job_file in run_fmriprep(layout, args):
536 |         if args.submit:
537 |             submit_slurm_job(job_file)
538 | 
539 | 
540 | if __name__ == "__main__":
541 |     main()
542 | 


--------------------------------------------------------------------------------