├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    └── workflows
    │   └── python-package-conda.yml
├── .gitignore
├── .readthedocs.yaml
├── LICENSE
├── Makefile
├── README.md
├── docs
    ├── Makefile
    ├── conf.py
    ├── examples.rst
    ├── icc_funcs.rst
    ├── img_png
    │   ├── atlases_ex-deterministic.jpg
    │   ├── atlases_ex-probabilistic.jpg
    │   ├── brainicc_fig.png
    │   ├── brainicc_fig.psd
    │   ├── est_difumo.png
    │   ├── example_voxelwiseicc.png
    │   ├── fake_cuelocktimes.png
    │   ├── fake_events-timeseries.png
    │   ├── fake_events.png
    │   ├── fake_timeseries.png
    │   ├── hcp_handfoot.png
    │   ├── intraclasscorr_example.png
    │   ├── maskedtimeseries_example.png
    │   ├── maskedtimeseries_example.psd
    │   ├── n3_nacctimeseries.png
    │   ├── pyrelimri_fig.png
    │   ├── pyrelimri_fig.psd
    │   ├── pyrelimri_logo.png
    │   ├── pyrelimri_logo.psd
    │   ├── roiicc_ex-shaefer400.jpg
    │   ├── roiicc_fig.png
    │   ├── roiicc_fig.psd
    │   ├── similarity_fig.png
    │   ├── similarity_fig.psd
    │   ├── trlock-gainsneutrals_mid.png
    │   └── voxelwise_example.png
    ├── index.rst
    ├── install.rst
    ├── introduction.rst
    ├── make.bat
    ├── requirements.txt
    ├── similarity_funcs.rst
    ├── timeseries_extract.rst
    └── usage.rst
├── pyrelimri
    ├── __init__.py
    ├── brain_icc.py
    ├── conn_icc.py
    ├── icc.py
    ├── masked_timeseries.py
    ├── mktestdata.py
    ├── similarity.py
    └── tetrachoric_correlation.py
├── requirements.txt
├── setup.py
├── test.npy
├── tests
    ├── test_brainicc.py
    ├── test_connicc.py
    ├── test_maskedtimeseries.py
    └── test_similarity-icc.py
└── tools
    ├── local_gitignore
    └── make_gitignore


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package-conda.yml:
--------------------------------------------------------------------------------
 1 | name: Python package
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 | 
 8 |     runs-on: ubuntu-latest
 9 |     strategy:
10 |       matrix:
11 |         python-version: ["3.8", "3.9", "3.10", "3.11"]
12 | 
13 |     steps:
14 |       - uses: actions/checkout@v3
15 |       - name: Set up Python ${{ matrix.python-version }}
16 |         uses: actions/setup-python@v4
17 |         with:
18 |           python-version: ${{ matrix.python-version }}
19 |       - name: Install dependencies
20 |         run: |
21 |           python -m pip install --upgrade pip
22 |           pip install flake8 pytest
23 |           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
24 |       - name: Lint with flake8
25 |         run: |
26 |           # stop the build if there are Python syntax errors or undefined names
27 |           flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
28 |           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
29 |           flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
30 |       - name: Test with pytest
31 |         run: |
32 |           python -m pytest tests/*.py
33 | 
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Auto-generated by make_gitignore on Sat Sep 16 11:55:45 PDT 2023
  2 | # Add repository-specific ignores here
  3 | .idea/
  4 | Reliability_calcs.ipynb
  5 | tests/reliability_tests.ipynb
  6 | pyrelimri/mktestdata.py
  7 | # Byte-compiled / optimized / DLL files
  8 | __pycache__/
  9 | *.py[cod]
 10 | *$py.class
 11 | 
 12 | # C extensions
 13 | *.so
 14 | 
 15 | # Distribution / packaging
 16 | .Python
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | share/python-wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | MANIFEST
 34 | 
 35 | # PyInstaller
 36 | #  Usually these files are written by a python script from a template
 37 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 38 | *.manifest
 39 | *.spec
 40 | 
 41 | # Installer logs
 42 | pip-log.txt
 43 | pip-delete-this-directory.txt
 44 | 
 45 | # Unit test / coverage reports
 46 | htmlcov/
 47 | .tox/
 48 | .nox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | *.py,cover
 56 | .hypothesis/
 57 | .pytest_cache/
 58 | cover/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | db.sqlite3
 68 | db.sqlite3-journal
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/_build/
 79 | 
 80 | # PyBuilder
 81 | .pybuilder/
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | #   For a library or package, you might want to ignore these files since the code is
 93 | #   intended to run in multiple environments; otherwise, check them in:
 94 | # .python-version
 95 | 
 96 | # pipenv
 97 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 98 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 99 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
100 | #   install all needed dependencies.
101 | #Pipfile.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/#use-with-ide
116 | .pdm.toml
117 | 
118 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
119 | __pypackages__/
120 | 
121 | # Celery stuff
122 | celerybeat-schedule
123 | celerybeat.pid
124 | 
125 | # SageMath parsed files
126 | *.sage.py
127 | 
128 | # Environments
129 | .env
130 | .venv
131 | env/
132 | venv/
133 | ENV/
134 | env.bak/
135 | venv.bak/
136 | 
137 | # Spyder project settings
138 | .spyderproject
139 | .spyproject
140 | 
141 | # Rope project settings
142 | .ropeproject
143 | 
144 | # mkdocs documentation
145 | /site
146 | 
147 | # mypy
148 | .mypy_cache/
149 | .dmypy.json
150 | dmypy.json
151 | 
152 | # Pyre type checker
153 | .pyre/
154 | 
155 | # pytype static type analyzer
156 | .pytype/
157 | 
158 | # Cython debug symbols
159 | cython_debug/
160 | 
161 | # PyCharm
162 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
165 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
166 | #.idea/
167 | 
168 | # -*- mode: gitignore; -*-
169 | *~
170 | \#*\#
171 | /.emacs.desktop
172 | /.emacs.desktop.lock
173 | *.elc
174 | auto-save-list
175 | tramp
176 | .\#*
177 | 
178 | # Org-mode
179 | .org-id-locations
180 | *_archive
181 | 
182 | # flymake-mode
183 | *_flymake.*
184 | 
185 | # eshell files
186 | /eshell/history
187 | /eshell/lastdir
188 | 
189 | # elpa packages
190 | /elpa/
191 | 
192 | # reftex files
193 | *.rel
194 | 
195 | # AUCTeX auto folder
196 | /auto/
197 | 
198 | # cask packages
199 | .cask/
200 | dist/
201 | 
202 | # Flycheck
203 | flycheck_*.el
204 | 
205 | # server auth directory
206 | /server/
207 | 
208 | # projectiles files
209 | .projectile
210 | 
211 | # directory configuration
212 | .dir-locals.el
213 | 
214 | # network security
215 | /network-security.data
216 | 
217 | 
218 | *~
219 | 
220 | # temporary files which can be created if a process still has a handle open of a deleted file
221 | .fuse_hidden*
222 | 
223 | # KDE directory preferences
224 | .directory
225 | 
226 | # Linux trash folder which might appear on any partition or disk
227 | .Trash-*
228 | 
229 | # .nfs files are created when an open file is removed but is still being accessed
230 | .nfs*
231 | 
232 | # Swap
233 | [._]*.s[a-v][a-z]
234 | !*.svg  # comment out if you don't need vector files
235 | [._]*.sw[a-p]
236 | [._]s[a-rt-v][a-z]
237 | [._]ss[a-gi-z]
238 | [._]sw[a-p]
239 | 
240 | # Session
241 | Session.vim
242 | Sessionx.vim
243 | 
244 | # Temporary
245 | .netrwhist
246 | *~
247 | # Auto-generated tag files
248 | tags
249 | # Persistent undo
250 | [._]*.un~
251 | 
252 | .vscode/*
253 | !.vscode/settings.json
254 | !.vscode/tasks.json
255 | !.vscode/launch.json
256 | !.vscode/extensions.json
257 | !.vscode/*.code-snippets
258 | 
259 | # Local History for Visual Studio Code
260 | .history/
261 | 
262 | # Built Visual Studio Code Extensions
263 | *.vsix
264 | 
265 | # Windows thumbnail cache files
266 | Thumbs.db
267 | Thumbs.db:encryptable
268 | ehthumbs.db
269 | ehthumbs_vista.db
270 | 
271 | # Dump file
272 | *.stackdump
273 | 
274 | # Folder config file
275 | [Dd]esktop.ini
276 | 
277 | # Recycle Bin used on file shares
278 | $RECYCLE.BIN/
279 | 
280 | # Windows Installer files
281 | *.cab
282 | *.msi
283 | *.msix
284 | *.msm
285 | *.msp
286 | 
287 | # Windows shortcuts
288 | *.lnk
289 | 
290 | # General
291 | .DS_Store
292 | .AppleDouble
293 | .LSOverride
294 | 
295 | # Icon must end with two \r
296 | Icon
297 | 
298 | # Thumbnails
299 | ._*
300 | 
301 | # Files that might appear in the root of a volume
302 | .DocumentRevisions-V100
303 | .fseventsd
304 | .Spotlight-V100
305 | .TemporaryItems
306 | .Trashes
307 | .VolumeIcon.icns
308 | .com.apple.timemachine.donotpresent
309 | 
310 | # Directories potentially created on remote AFP share
311 | .AppleDB
312 | .AppleDesktop
313 | Network Trash Folder
314 | Temporary Items
315 | .apdisk
316 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Required
 2 | version: 2
 3 | 
 4 | # Set the version of Python and other tools you might need
 5 | build:
 6 |   os: "ubuntu-22.04"
 7 |   tools:
 8 |     python: "3.11"
 9 | 
10 | # Build documentation in the docs/ directory with Sphinx
11 | sphinx:
12 |   configuration: docs/conf.py
13 | 
14 | # Explicitly set the version of Python and its requirements & install local pyreli
15 | python:
16 |   install:
17 |     - method: setuptools
18 |       path: .
19 |     - requirements: docs/requirements.txt
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Michael Demidenko
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for running tests with pytest
 2 | # To run: make <type>
 3 | # Example: make brain_icc  <- runs tests for all in test_brainicc.py
 4 | 
 5 | # Default target
 6 | .PHONY: brain_icc
 7 | brain_icc:
 8 | 	python -m pytest tests/test_brainicc.py
 9 | 
10 | # Default target
11 | .PHONY: conn_icc
12 | conn_icc:
13 | 	python -m pytest tests/test_connicc.py
14 | 
15 | 
16 | # Default target
17 | .PHONY: timeseries
18 | timeseries:
19 | 	python -m pytest tests/test_maskedtimeseries.py
20 | 
21 | # Default target
22 | .PHONY: similarity_icc
23 | similarity_icc:
24 | 	python -m pytest tests/test_similarity-icc.py
25 | 
26 | # Target to run all tests in the tests directory
27 | .PHONY: test-all
28 | test-all:
29 | 	python -m pytest -v tests/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyReliMRI: Python-based Reliability in MRI
 2 | 
 3 | [![Python package](https://github.com/demidenm/PyReliMRI/actions/workflows/python-package-conda.yml/badge.svg?style=plastic&logo=Python)](https://github.com/demidenm/PyReliMRI/actions/workflows/python-package-conda.yml)
 4 | [![Documentation Status](https://readthedocs.org/projects/pyrelimri/badge/?version=latest&style=plastic)](https://pyrelimri.readthedocs.io/en/latest/?badge=latest&style=plastic)
 5 | [![PyPI](https://img.shields.io/pypi/v/PyReliMRI.svg)](https://pypi.org/project/PyReliMRI/)
 6 | [![Funded By](https://img.shields.io/badge/NIDA-F32%20DA055334--01A1-yellowgreen?style=plastic)](https://reporter.nih.gov/project-details/10525501)
 7 | [![DOI](https://zenodo.org/badge/576430868.svg)](https://zenodo.org/badge/latestdoi/576430868)
 8 | 
 9 | <img src="docs/img_png/pyrelimri_logo.png" alt="Pyrelimri Logo" style="width:50%;">
10 | 
11 | ## Introduction
12 | 
13 | PyReliMRI provides multiple reliability metrics for task fMRI and resting state fMRI data, essential \
14 | for assessing the consistency and reproducibility of MRI-based research. The package is described and used in the [Preprint](https://www.doi.org/10.1101/2024.03.19.585755)
15 | <img src="docs/img_png/pyrelimri_fig.png" alt="Pyrelimri Features" style="width:100%;">
16 | 
17 | 
18 | ## Authors
19 | 
20 | - [Michael I. Demidenko](https://orcid.org/0000-0001-9270-0124)
21 | - [Jeanette A. Mumford](https://orcid.org/0000-0002-0926-3531)
22 | - [Russell A. Poldrack](https://orcid.org/0000-0001-6755-0259)
23 | 
24 | ### Citation
25 | 
26 | If you use PyReliMRI in your research, please cite it using the following DOI:
27 | 
28 | Demidenko, M., Mumford, J., & Poldrack, R. (2024). PyReliMRI: An Open-source Python tool for Estimates of Reliability \
29 | in MRI Data (2.1.0) [Computer software]. Zenodo. https://doi.org/10.5281/zenodo.12522260
30 | 
31 | 
32 | ## Purpose
33 | 
34 | Reliability questions for [task fMRI](https://www.doi.org/10.1177/0956797620916786) and [resting state fMRI](https://www.doi.org/10.1016/j.neuroimage.2019.116157) are increasing. As described in [2010](https://www.doi.org/10.1111/j.1749-6632.2010.05446.x), there are various ways that researchers calculate reliability. Few open-source packages exist to calculate multiple individual and group reliability metrics using one tool.
35 |  PyReliMRI offers comprehensive tools for calculating reliability metrics in MRI data at both individual and group levels. It supports various MRI analysis scenarios including multi-run and multi-session studies.
36 | 
37 | ### Features
38 | 
39 | - **Group Level:**
40 |   - `similarity.py`: Calculates similarity coefficients between fMRI images.
41 |   - `icc.py`: Computes Intraclass Correlation Coefficients (ICC) across subjects.
42 | 
43 | - **Individual Level:**
44 |   - `brain_icc.py`: Computes voxel-wise and atlas-based ICC.
45 |   - `conn_icc.py`: Estimates ICC for precomputed correlation matrices.
46 | 
47 | - **Utility:**
48 |   - `masked_timeseries.py`: Extracts and processes timeseries data from ROI masks or coordinates.
49 | 
50 | ## Scripts Overview
51 | 
52 | | **Script Name**                    | **Functions**                                          | **Inputs**                                                                                                                                                                                          | **Purpose**                                                                                                                                                               |
53 | |:-----------------------------------|:-------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
54 | | [brain_icc.py](/pyrelimri/brain_icc.py) | voxelwise_icc, roi_icc                                 | See detailed descriptions for required and optional inputs.                                                                                                                                                                                                                                                                                                | Calculates intraclass correlation (ICC) metrics for voxel-wise and ROI-based data, supporting various ICC types and outputs.                                             |
55 | | [icc.py](/pyrelimri/icc.py)         | sumsq_total, sumsq, sumsq_btwn, icc_confint, sumsq_icc | Panda long dataframe with subject, session, scores, and ICC type inputs required.                                                                                                                                                                                  | Computes sum of squares and ICC estimates with confidence intervals, useful for assessing reliability across measurements.                                                 |
56 | | [similarity.py](/pyrelimri/similarity.py) | image_similarity, pairwise_similarity                  | Input paths for Nifti images and optional parameters for image similarity calculations.                                                                                                                                                                           | Computes similarity coefficients between fMRI images, facilitating pairwise comparisons and similarity type selection.                                                    |
57 | | [conn_icc.py](/pyrelimri/conn_icc.py) | triang_to_fullmat, edgewise_icc                        | List of paths to precomputed correlation matrices as required inputs.                                                                                                                                                                                             | Calculates ICC for edge-wise correlations in precomputed matrices, enhancing reliability assessment in connectivity studies.                                               |
58 | | [masked_timeseries.py](/pyrelimri/masked_timeseries.py) | extract_time_series, extract_postcue_trs_for_conditions | Detailed inputs required for various functions: extract_time_series, extract_postcue_trs_for_conditions, etc.                                                                                                                                                      | Extracts and processes timeseries data from BOLD images, supporting ROI-based analysis and event-locked responses for functional MRI studies.                           |
59 | 
60 | 
61 | ## Conclusion
62 | 
63 | PyReliMRI simplifies the calculation of reliability metrics for MRI data, supporting both research and clinical applications. For detailed usage instructions, visit the [documentation](https://pyrelimri.readthedocs.io/en/latest/).
64 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # rapidtide documentation build configuration file, created by
  5 | # sphinx-quickstart on Thu Jun 16 15:27:19 2016.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | import os
 17 | import sys
 18 | 
 19 | # If extensions (or modules to document with autodoc) are in another directory,
 20 | # add these directories to sys.path here. If the directory is relative to the
 21 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 22 | sys.path.insert(0, os.path.abspath("sphinxext"))
 23 | sys.path.insert(0, os.path.abspath("../pyrelimri"))
 24 | 
 25 | 
 26 | # -- Project information -----------------------------------------------------
 27 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 28 | 
 29 | project = 'Python-based Reliability in MRI (PyReliMRI)'
 30 | copyright = '2024, Michael I. Demidenko & Russell A. Poldrack'
 31 | author = 'Michael I. Demidenko & Russell A. Poldrack'
 32 | 
 33 | # -- General configuration ---------------------------------------------------
 34 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
 35 | 
 36 | # If your documentation needs a minimal Sphinx version, state it here.
 37 | # needs_sphinx = '1.0'
 38 | pdf_break_level = 2
 39 | 
 40 | # generate autosummary even if no references
 41 | autosummary_generate = True
 42 | autodoc_default_flags = ["members", "inherited-members"]
 43 | add_module_names = True
 44 | 
 45 | # Add any Sphinx extension module names here, as strings. They can be
 46 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 47 | # ones.
 48 | extensions = [
 49 |     'sphinx.ext.autodoc',
 50 |     'sphinx.ext.napoleon',
 51 |     'sphinx.ext.intersphinx',
 52 |     'sphinx.ext.viewcode',
 53 |     'sphinx_rtd_theme',
 54 | ]
 55 | 
 56 | # -- meta tags for base URL ---
 57 | html_context = {
 58 |     'meta_tags': [
 59 |         ('robots', 'index, follow'),
 60 |         ('googlebot', 'index, follow'),
 61 |         ('bingbot', 'index, follow'),
 62 |         ('slurp', 'index, follow'),
 63 |     ],
 64 |     'canonical_url': 'https://pyrelimri.readthedocs.io/',
 65 | }
 66 | 
 67 | # -- Options for HTML output -------------------------------------------------
 68 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 69 | 
 70 | # -- Options for HTML output ----------------------------------------------
 71 | 
 72 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 73 | # a list of builtin themes.
 74 | # html_theme = 'sphinxdoc'
 75 | import sphinx_rtd_theme
 76 | 
 77 | html_theme = "sphinx_rtd_theme"
 78 | 
 79 | # Theme options are theme-specific and customize the look and feel of a theme
 80 | # further.  For a list of options available for each theme, see the
 81 | # documentation.
 82 | # html_theme_options = {}
 83 | 
 84 | # Add any paths that contain custom themes here, relative to this directory.
 85 | # html_theme_path = []
 86 | 
 87 | # The name for this set of Sphinx documents.
 88 | # "<project> v<release> documentation" by default.
 89 | # html_title = 'rapidtide v0.1.0'
 90 | 
 91 | # A shorter title for the navigation bar.  Default is the same as html_title.
 92 | # html_short_title = None
 93 | 
 94 | # The name of an image file (relative to this directory) to place at the top
 95 | # of the sidebar.
 96 | # html_logo = None
 97 | 
 98 | # The name of an image file (relative to this directory) to use as a favicon of
 99 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
100 | # pixels large.
101 | # html_favicon = None
102 | 
103 | # Add any paths that contain custom static files (such as style sheets) here,
104 | # relative to this directory. They are copied after the builtin static files,
105 | # so a file named "default.css" will overwrite the builtin "default.css".
106 | html_static_path = ["_static"]
107 | 
108 | 
109 | # Add any extra paths that contain custom files (such as robots.txt or
110 | # .htaccess) here, relative to this directory. These files are copied
111 | # directly to the root of the documentation.
112 | # html_extra_path = []
113 | 
114 | # If not None, a 'Last updated on:' timestamp is inserted at every page
115 | # bottom, using the given strftime format.
116 | # The empty string is equivalent to '%b %d, %Y'.
117 | # html_last_updated_fmt = None
118 | 
119 | # If true, SmartyPants will be used to convert quotes and dashes to
120 | # typographically correct entities.
121 | # html_use_smartypants = True
122 | 
123 | # Custom sidebar templates, maps document names to template names.
124 | # html_sidebars = {}
125 | 
126 | # Additional templates that should be rendered to pages, maps page names to
127 | # template names.
128 | # html_additional_pages = {}
129 | 
130 | # If false, no module index is generated.
131 | # html_domain_indices = True
132 | 
133 | # If false, no index is generated.
134 | # html_use_index = True
135 | 
136 | # If true, the index is split into individual pages for each letter.
137 | # html_split_index = False
138 | 
139 | # If true, links to the reST sources are added to the pages.
140 | # html_show_sourcelink = True
141 | 
142 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
143 | # html_show_sphinx = True
144 | 
145 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
146 | # html_show_copyright = True
147 | 
148 | # If true, an OpenSearch description file will be output, and all pages will
149 | # contain a <link> tag referring to it.  The value of this option must be the
150 | # base URL from which the finished HTML is served.
151 | # html_use_opensearch = ''
152 | 
153 | # This is the file name suffix for HTML files (e.g. ".xhtml").
154 | # html_file_suffix = None
155 | 
156 | # Language to be used for generating the HTML full-text search index.
157 | # Sphinx supports the following languages:
158 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
159 | #   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh'
160 | # html_search_language = 'en'
161 | 
162 | # A dictionary with options for the search language support, empty by default.
163 | # 'ja' uses this config value.
164 | # 'zh' user can custom change `jieba` dictionary path.
165 | # html_search_options = {'type': 'default'}
166 | 
167 | # The name of a javascript file (relative to the configuration directory) that
168 | # implements a search results scorer. If empty, the default will be used.
169 | # html_search_scorer = 'scorer.js'
170 | 
171 | # Output file base name for HTML help builder.
172 | htmlhelp_basename = "PyReliMRIdoc"
173 | 


--------------------------------------------------------------------------------
/docs/examples.rst:
--------------------------------------------------------------------------------
 1 | Brief Description
 2 | =================
 3 | 
 4 | The PyReliMRI package intergrates several modules designed to facilitate reliability estimation on MRI data. \
 5 | The code is simplified by leveraging features from `Nilearn <https://nilearn.github.io/stable/index.html>`_. \
 6 | These modules can be categorized into two main groups:
 7 | 
 8 | Similarity and Tetrachoric Correlation
 9 | ---------------------------------------
10 | 
11 | - `similarity.py`: Computes similarity coefficients (Dice, Jaccard, etc.) between 3D Nifti images. Includes functions like `image_similarity` for pairwise comparisons.
12 | 
13 | - `tetrachoric_correlation.py`: Calculates the tetrachoric correlation between binary vectors, useful for certain types of data analysis.
14 | 
15 | Intraclass Correlation
16 | -----------------------
17 | 
18 | - `icc.py`: Computes various components used in ICC calculations, such as ICC(1), ICC(2,1), or ICC(3,1), along with confidence intervals and variance components.
19 | 
20 | - `brain_icc.py`: Calculates voxelwise and ROI-based ICCs across multiple runs/sessions. Integrates with Nilearn datasets for atlas options, facilitating quick atlas integration.
21 | 
22 | - `conn_icc.py`: Estimates ICC for precomputed correlation matrices, useful for connectivity analyses.
23 | 
24 | Stimulus-Locked TR-by-TR Timeseries
25 | -------------------------------------
26 | 
27 | The `masked_timeseries.py` module provides functionality for extracting and processing stimulus-locked timeseries data from BOLD images. It includes methods for ROI-based analysis and event-locked responses.
28 | 
29 | 
30 | Combined, these modules collectively support a wide range of reliability assessments in MRI studies, from basic similarity metrics to advanced ICC calculations and timeseries analysis.
31 | 


--------------------------------------------------------------------------------
/docs/icc_funcs.rst:
--------------------------------------------------------------------------------
  1 | Intraclass Correlation Functions
  2 | ================================
  3 | 
  4 | The intraclass correlation (ICC) estimates are a complement to the similarity functions. The variability/similarity \
  5 | in the data can be parsed in several ways. One can estimate how similar things are above a threshold (e.g., 1/0) or \
  6 | how similar specific continuous estimates are across subjects. The ICC is used for the latter.
  7 | 
  8 | Two modules with examples are reviewed here: The  `icc`, `brain_icc` and `conn_icc` modules. The first is the manual estimation \
  9 | of the ICC estimates, such as the (1) :math:`SS_{total}`,  the (2) :math:`SS_{within}`, (3) :math:`SS_{Between}` \
 10 | their associated mean sums of squares (which is a decomposition of variance 1-3 divided by specific degrees of freedoms) in the `icc` module. \
 11 | Then, these ICC estimates are calculated on a voxel-by-voxel basis (or if you wanted to, ROI by ROI) using `brain` module. \
 12 | and roi-by-roi basis using 'roi_icc'. Finally, an example is provided using the `conn_icc` module to estimate edgewise ICC \
 13 | on matrices.
 14 | 
 15 | 
 16 | icc
 17 | ---
 18 | 
 19 | While `icc` is within the package for MRI reliability estimates, it can still be used to calculate different values on dataframes. \
 20 | Below I describe the different components and use `seaborns.anagrams <https://github.com/mwaskom/seaborn-data/blob/master/anagrams.csv>`_ \
 21 | as the example for each of these components.
 22 | 
 23 | The first 5-rows of the anagrams data are:
 24 | 
 25 | +--------+---------+-----+-----+-----+
 26 | | subidr |  attnr  | num1| num2| num3|
 27 | +========+=========+=====+=====+=====+
 28 | |    1   | divided |  2  |  4  |  7  |
 29 | +--------+---------+-----+-----+-----+
 30 | |    2   | divided |  3  |  4  |  5  |
 31 | +--------+---------+-----+-----+-----+
 32 | |    3   | divided |  3  |  5  |  6  |
 33 | +--------+---------+-----+-----+-----+
 34 | |    4   | divided |  5  |  7  |  5  |
 35 | +--------+---------+-----+-----+-----+
 36 | |    5   | divided |  4  |  5  |  8  |
 37 | +--------+---------+-----+-----+-----+
 38 | 
 39 | We can load the example data, filter to only use the `divided` values and convert the data into a long data format:
 40 | 
 41 | .. code-block:: python
 42 | 
 43 |     import seaborn as sns
 44 |     data = sns.load_dataset('anagrams') # load
 45 |     a_wd = data[data['attnr'] == 'divided'] # filter
 46 |     # convert to wide, so the subject/id variables are still `subidr` and values were stacking from `num1`,`num2`,num3`
 47 |     # the values will be stored in the column `vals` and the session labels (from num1-num3) into `sess`
 48 |     long_df = pd.DataFrame(
 49 |         pd.melt(a_wd,
 50 |                id_vars="subidr",
 51 |                value_vars=["num1", "num2", "num3"],
 52 |                var_name="sess",
 53 |                value_name="vals"))
 54 | 
 55 | **sumsq_total**
 56 | 
 57 | The sum of squared total is the estimate of the total variance across all subjects and measurement occasions. Expressed \
 58 | by formula:
 59 | 
 60 | .. math::
 61 | 
 62 |     \text{sumsq_{total}}(df_{long}, values) = \sum_{i=1}^{n}(x_i - \bar{x})^2
 63 | 
 64 | where:
 65 |     * :math:`df_{long}` = pandas DataFrame (df) in long format \
 66 |     * values = is a variable string for the values containing the scores in df \
 67 |     * :math:`x_i` = is each value in the column specified by values column in df \
 68 |     * :math:`\bar x` = is the global mean specified by 'values' column in df
 69 | 
 70 | Using the anagrams `long_df` I'll calculate the sum of square total using:
 71 | 
 72 | .. code-block:: python
 73 | 
 74 |     from pyrelimri import icc
 75 |     icc.sumsq_total(df_long=long_df, values="vals")
 76 | 
 77 | We will get the result of 71.8 sum of squared `total`.
 78 | 
 79 | **sumsq_within**
 80 | 
 81 | 
 82 | .. math::
 83 | 
 84 |     \text{sumsq_within}(df_{long}, sessions, values, n_{subjects}) = n_{subjects} \sum_{i=1}^m (\overline{x}_i - \overline{x})^2
 85 | 
 86 | where:
 87 |     * :math:`df_{long}` = pandas DataFrame in long format \
 88 |     * sessions = is a session (repeated measurement) variable, string, in df \
 89 |     * values = is a variable, string, for the values containing the scores in df \
 90 |     * :math:`n_{subjects}` = the number of subjects in df \
 91 |     * :math:`\bar x_i` = is the mean of the `values` column for session `i` in df \
 92 |     * :math:`\bar x` = is the global mean specified by 'values' column in df
 93 |     * m = is the number of sessions
 94 | 
 95 | 
 96 | We can calculate the sum of squares within using the below:
 97 | 
 98 | .. code-block:: python
 99 | 
100 |     # if you havent imported the package already
101 |     from pyrelimri import icc
102 |     icc.sumsq_within(df_long=long_df, sessions="sess", values="vals", n_subjects=10)
103 | 
104 | We will get the result of 29.2 sum of squares `between` subject factor.
105 | 
106 | **sumsq_btwn**
107 | 
108 | .. math::
109 | 
110 |     \text{sumsq_btwn}(df_{long}, subj, values, n_{sessions}) = n_{sessions} \sum_{i=1}^s (\overline{x}_i - \overline{x})^2
111 | 
112 | where:
113 |     * :math:`df_{long}` = pandas DataFrame in long format \
114 |     * subj = is the subject variable, string, in df \
115 |     * values = is a variable, string, for the values containing the scores in df \
116 |     * :math:`n_{sessions}` = the number of sessions in df \
117 |     * :math:`\bar x_i` = is the mean of the `values` column for subject `i` in df \
118 |     * :math:`\bar x` = is the global mean specified by 'values' column in df
119 |     * s = is the number of subjects
120 | 
121 | .. code-block:: python
122 | 
123 |     # if you havent imported the package already
124 |     from pyrelimri import icc
125 |     icc.sumsq_btwn(df_long=long_df, subj="subidr", values="vals", n_sessions=3) # 3 = num1-num3
126 | 
127 | We will get the result of 20.0 sum of squares `between` subject factor.
128 | 
129 | Note: If you recall that ICC is the decomposition of `total` variance, you'll notice that 29.2 + 20.0 \
130 | do not sum to the total variance, 71.8. This is because there is the subj*sess variance component \
131 | and the residual variance, too. You can review this in an ANOVA table:
132 | 
133 | .. code-block:: python
134 | 
135 |     # if you havent imported the package already
136 |     from pinguin import anova
137 |     round(anova(dv='vals', between=['subidr','sess'], data=long_df, detailed=True),3)
138 | 
139 | +---------------+-----------+----+-----------+-----+
140 | |     Source    |     SS    | DF |     MS    | np2 |
141 | +===============+===========+====+===========+=====+
142 | |     subidr    | 20.008 |  9 | 2.223  | 1.0 |
143 | +---------------+-----------+----+-----------+-----+
144 | |      sess     | 29.217 |  2 | 14.608 | 1.0 |
145 | +---------------+-----------+----+-----------+-----+
146 | | subidr * sess | 22.617 | 18 | 1.256  | 1.0 |
147 | +---------------+-----------+----+-----------+-----+
148 | |    Residual   |   0.000000|  0 |    -      | -   |
149 | +---------------+-----------+----+-----------+-----+
150 | 
151 | 
152 | **icc_confint**
153 | 
154 | For each ICC estimate that can be requested, ICC(1), ICC(2,1) and ICC(3,1), a confidence interval \
155 | is returned for each associated ICC estimate. The implementation for the confidence interval is the same as in \
156 | the the `pingouin <https://github.com/raphaelvallat/pingouin/blob/master/pingouin/reliability.py>`_ \
157 | package in Python and the `ICC() from psych <https://search.r-project.org/CRAN/refmans/psych/html/ICC.html>`_ \
158 | package in R.
159 | 
160 | 
161 | **sumsq_icc**
162 | 
163 | Now that the internal calculations of the ICC have been reviewed, I will use the package to get the values of interest. \
164 | The formulas for the ICC(1), ICC(2,1) and ICC(3,1) are described below.
165 | 
166 | .. math:: \text{ICC(1)} = \frac{MS_{Btwn} - MS_{Wthn}}{MS_{Btwn} + (\text{sess} - 1) MS_{Wthn}}
167 | 
168 | .. math:: \text{ICC(2,1)} = \frac{MS_{Btwn} - MS_{Err}}{MS_{Btwn} + (\text{sess} - 1) * MS_{Err} + \text{sessions} * \left( \frac{MS_{sess} - MS_{Err}}{N_{subjs}} \right)}
169 | 
170 | .. math:: \text{ICC(3,1)} = \frac{MS_{Btwn} - MS_{Err}}{MS_{Btwn} + (\text{sess} - 1) * MS_{Err}}
171 | 
172 | 
173 | Where:
174 | 
175 | - :math:`BS_{Btwn}`: mean square between subjects
176 | - :math:`MS_{Wthn}`: mean square within subjects
177 | - :math:`MS_{Err}`: mean squared residual error
178 | - :math:`MS_{Sess}`: mean squared error of sessions
179 | - :math:`\bar x`: is the number of sessions
180 | - :math:`N_{subjs}`: numbers of subjects
181 | 
182 | In terms to the above ICC(1), ICC(2,1) and ICC(3,1) formulas, these are also written in Table 1 in `Liljequist et al., 2019 <https://www.doi.org/10.1371/journal.pone.0219854>`_
183 | as below. These are in terms of between subject variance (deviation from mean for :math:`subject_i` = :math:`\sigma_r^2`), \
184 | noise/within subject variance (variance in measurement :math:`j` for :math:`subject_i` = :math:`\sigma_v^2`), and \
185 | measurement additive bias (bias in measurement :math:`j` = :math:`\sigma_c^2`):
186 | 
187 | .. math:: \text{ICC(1)} = \frac{\sigma_r^2}{\sigma_r^2 + \sigma_v^2}
188 | 
189 | .. math:: \text{ICC(2,1)} = \frac{\sigma_r^2}{\sigma_r^2 + \sigma_c^2 + \sigma_v^2}
190 | 
191 | .. math:: \text{ICC(3,1)} = \frac{\sigma_r^2}{\sigma_r^2 + \sigma_v^2}
192 | 
193 | 
194 | Hence, `sumsq_icc` can be used on a dataset with multiple subjects with 1+ measurement occasions. The ICC can be calculated \
195 | for the anagrams data references above.
196 | Note: the required inputs are a long dataframe, subject variable, \
197 | session variable and the value scores variables that are contained in the long dataframe, plus the \
198 | icc to return (options: icc_1, icc_2, icc_3; default: icc_3).
199 | 
200 | The `sumsq_icc` function will return [six] values: the ICC estimate, lower bound 95% confidence interval, \
201 | upper bound 95% confidence interval and specific to each computation, the between-subject variance (:math:`\sigma_r^2`), \
202 | within subject variance (:math:`\sigma_v^2`), and in case of ICC(2,1) between-measure variance (:math:`\sigma_c^2`). \
203 | This information will print to the terminal or can be saved to six variables. Example:
204 | 
205 | .. code-block:: python
206 | 
207 |     # if you havent imported the package already
208 |     from pyrelimri import icc
209 | 
210 |     icc3, icc3_lb, icc3_up, icc3_btwnsub, \
211 |     icc3_withinsub, _ = icc.sumsq_icc(df_long=a_ld,sub_var="subidr",
212 |                                               sess_var="sess",value_var="vals",icc_type="icc_3")
213 | 
214 | This will store the five associated values in the five variables:
215 |     - `icc3`: ICC estimate
216 |     - `icc3_lb`: 95% lower bound CI for ICC estimate
217 |     - `icc3_lb`: 95% upper bound CI for ICC estimate
218 |     - `icc3_btwnsub`: Between Subject Variance used for ICC estimate (:math:`\sigma_r^r`)
219 |     - `icc3_withinsub`: Within Subject Variance used for ICC estimate (:math:`\sigma_r^v`)
220 |     - `icc3_btwnmeas`: setting to _ as between measure variance is not computed for ICC(3,1) (:math:`\sigma_c^2`)
221 | 
222 | Reminder: If NaN/missing values, `icc` implements a mean replacement of all column values. If this is not preferred, handle missing/unbalanced \
223 | cases beforehand.
224 | 
225 | 
226 | brain_icc
227 | ---------
228 | 
229 | The `brain_icc` module is a big wrapper for for the `icc` module. \
230 | In short, the `voxelwise_icc` function within the `brain_icc` modules calculates the ICC for 3D nifti brain images \
231 | across subjects and sessions on a voxel-by-voxel basis.
232 | 
233 | Here are the steps it uses:
234 | 
235 |     - Function takes a list of paths to the 3D nifti brain images for each session, the path to the nifti mask object, and the ICC type to be calculated.
236 |     - Function checks if there are the same number of files in each session (e.g., list[0], list[1], etc) and raises an error if they are of different length.
237 |     - Function concatenates the 3D images into a 4D nifti image (4th dimension is subjects) using image.concat_imgs().
238 |     - Function uses the provided nifti mask to mask the images using NiftiMasker.
239 |     - Function loops over the voxels in the `imgdata[0].shape[-1]` and creates a pandas DataFrame with the voxel values for each subject and session using sumsq_icc().
240 |     - The function calculates and returns a dictionary with five 3D volumes: est, lower (lower_bound) and upper (upper_bound) of the ICC 95% confidence interval, and between subject, within subject and between measure variance from sumsq_icc().
241 |     - Note, the shape of the provided 3D volume is determined using inverse_transform from NiftiMasker.
242 | 
243 | **voxelwise_icc**
244 | 
245 | As mentioned above, the `voxelwise_icc` calculates the ICC values for each voxel in the 3D volumes. \
246 | Think of an image as having the dimensions of [45, 45, 90], that can be unraveled to fit into a single vector \
247 | for each subject that is 182,250 values long (the length in the voxelwise case is the number of voxels). \
248 | The `voxelwise_icc` returns an equal size in length array that contains the ICC estimate for each voxels, \
249 | between subjects across the measurement occasions. For example:
250 | 
251 | .. figure:: img_png/voxelwise_example.png
252 |    :align: center
253 |    :alt: Figure 1: HCP Left Hand (A) and Left Foot (B) Activation maps.
254 |    :figclass: align-center
255 | 
256 | To use the `voxelwise_icc` function, you have to provide the following information:
257 |     - multisession_list: A list of listed paths to the Nifti z-stat, t-stat or beta maps for sess1, 2, 3, etc (or run 1,2,3..)
258 |     - mask: The Nifti binarized masks that will be used to mask the 3D volumes.
259 |     - icc_type: The ICC estimate that will be calculated for each voxel. Options: `icc_1`, `icc_2`, `icc_3`. Default: `icc_3`
260 | 
261 | The function returns a dictionary with 3D volumes for:
262 |     - ICC estimates ('est')
263 |     - ICC lowerbound 95% CI ('lowbound')
264 |     - ICC upperbound 95% CI ('upbound')
265 |     - Between Subject Variance ('btwnsub')
266 |     - Within Subject Variance ('wthnsub')
267 |     - Between Measure Variance ('btwnmeas')
268 | 
269 | So the resulting stored variable will be a dictionary, e.g. "brain_output", from which you can access to view and save images such \
270 | as the ICC estimates (brain_output['est']) and/or within subject variance (brain_output['wthnsub']).
271 | 
272 | Say I have stored paths to session 1 and session 2 in the following variables (Note: subjects in list have same order!):
273 | 
274 | .. code-block:: python
275 | 
276 | 
277 |     # session 1 paths
278 |     scan1 = ["./scan1/sub-1_t-stat.nii.gz", "./scan1/sub-2_t-stat.nii.gz", "./scan1/sub-3_t-stat.nii.gz", "./scan1/sub-4_t-stat.nii.gz", "./scan1/sub-5_t-stat.nii.gz",
279 |              "./scan1/sub-6_t-stat.nii.gz", "./scan1/sub-7_t-stat.nii.gz", "./scan1/sub-8_t-stat.nii.gz"]
280 |     scan2 = ["./scan2/sub-1_t-stat.nii.gz", "./scan2/sub-2_t-stat.nii.gz", "./scan2/sub-3_t-stat.nii.gz", "./scan2/sub-4_t-stat.nii.gz", "./scan2/sub-5_t-stat.nii.gz",
281 |              "./scan2/sub-6_t-stat.nii.gz", "./scan2/sub-7_t-stat.nii.gz", "./scan2/sub-8_t-stat.nii.gz"]
282 | 
283 | Next, I can call these images paths in the function and save the 3d volumes using:
284 | 
285 | .. code-block:: python
286 | 
287 |     from pyrelimri import brain_icc
288 | 
289 |     brain_icc_dict = brain_icc.voxelwise_icc(multisession_list = [scan1, scan2], mask = "./mask/brain_mask.nii.gz", icc_type = "icc_3")
290 | 
291 | This will return the associated dictionary with nifti 3D volumes which can be manipulated further.
292 | 
293 | Here I plot the icc estimates (i.e. 'est') using nilearn's plotting
294 | 
295 | .. code-block:: python
296 | 
297 |     from nilearn.plotting import view_img_on_surf
298 | 
299 |     view_img_on_surf(stat_map_img = brain_icc_dict["est"],
300 |                      surf_mesh = 'fsaverage5', threshold = 0,
301 |                      title_fontsize = 16, colorbar_height = .75,
302 |                      colorbar_fontsize = 14).open_in_browser()
303 | 
304 | 
305 | Here I save the image using nibabel:
306 | 
307 | .. code-block:: python
308 | 
309 |     import nibabel as nib
310 |         nib.save(brain_icc_dict["est"], os.path.join('output_dir', 'file_name.nii.gz'))
311 | 
312 | Here is a real-world example using neurovaults data collection for Precision Functional Mapping of Individual brains. The \
313 | collection is: `2447 <https://neurovault.org/collections/2447/>`_. The neurovault collection provides data for ten subjects, with \
314 | ten sessions. I will use the first two sessions. I will use the block-design motor task and focus on the [Left] Hand univariate \
315 | beta maps which are listed under "other".
316 | 
317 | Let's use nilearn to load these data for 10 subjects and 2 sessions.
318 | 
319 | .. code-block:: python
320 | 
321 |     from nilearn.datasets import fetch_neurovault_ids
322 |     # Fetch left hand motor IDs
323 |     MSC01_ses1 = fetch_neurovault_ids(image_ids=[48068]) # MSC01 motor session1 1 L Hand beta
324 |     MSC01_ses2 = fetch_neurovault_ids(image_ids=[48073]) # MSC01 motor session2 1 L Hand beta
325 |     MSC02_ses1 = fetch_neurovault_ids(image_ids=[48118])
326 |     MSC02_ses2 = fetch_neurovault_ids(image_ids=[48123])
327 |     MSC03_ses1 = fetch_neurovault_ids(image_ids=[48168])
328 |     MSC03_ses2 = fetch_neurovault_ids(image_ids=[48173])
329 |     MSC04_ses1 = fetch_neurovault_ids(image_ids=[48218])
330 |     MSC04_ses2 = fetch_neurovault_ids(image_ids=[48223])
331 |     MSC05_ses1 = fetch_neurovault_ids(image_ids=[48268])
332 |     MSC05_ses2 = fetch_neurovault_ids(image_ids=[48273])
333 |     MSC06_ses1 = fetch_neurovault_ids(image_ids=[48318])
334 |     MSC06_ses2 = fetch_neurovault_ids(image_ids=[48323])
335 |     MSC07_ses1 = fetch_neurovault_ids(image_ids=[48368])
336 |     MSC07_ses2 = fetch_neurovault_ids(image_ids=[48368])
337 |     MSC08_ses1 = fetch_neurovault_ids(image_ids=[48418])
338 |     MSC08_ses2 = fetch_neurovault_ids(image_ids=[48423])
339 |     MSC09_ses1 = fetch_neurovault_ids(image_ids=[48468])
340 |     MSC09_ses2 = fetch_neurovault_ids(image_ids=[48473])
341 |     MSC10_ses1 = fetch_neurovault_ids(image_ids=[48518])
342 |     MSC10_ses2 = fetch_neurovault_ids(image_ids=[48523])
343 | 
344 | 
345 | Now that the data are loaded, I specify the session paths (recall, Nilearn saves the paths to the images on your computer) \
346 | and then I will provide this information to `voxelwise_icc` function within `brain_icc` module
347 | 
348 | 
349 | .. code-block:: python
350 | 
351 |     # session 1 list from MSC
352 |     sess1_paths = [MSC01_ses1.images[0], MSC02_ses1.images[0], MSC03_ses1.images[0],
353 |                    MSC04_ses1.images[0], MSC05_ses1.images[0], MSC06_ses1.images[0],
354 |                    MSC07_ses1.images[0], MSC08_ses1.images[0],MSC09_ses1.images[0],
355 |                    MSC10_ses1.images[0]]
356 |     # session 2 list form MSC
357 |     sess2_paths = [MSC01_ses2.images[0], MSC02_ses2.images[0], MSC03_ses2.images[0],
358 |                    MSC04_ses2.images[0], MSC05_ses2.images[0], MSC06_ses2.images[0],
359 |                    MSC07_ses2.images[0], MSC08_ses2.images[0],MSC09_ses2.images[0],
360 |                    MSC10_ses2.images[0]]
361 | 
362 | 
363 | Notice, the function asks for a mask. These data do not have a mask provided on neurovault, \
364 | so I will calculate one on my own and save it to the filepath of these data using nilearns multi-image masking option.
365 | 
366 | .. code-block:: python
367 | 
368 |     from nilearn.masking import compute_multi_brain_mask
369 |     import nibabel as nib
370 |     import os # so Ican use only the directory location of the MSC img path
371 | 
372 |     mask = compute_multi_brain_mask(target_imgs = sess1_paths)
373 |     mask_path = os.path.join(os.path.dirname(MSC01_ses1.images[0]), 'mask.nii.gz')
374 |     nib.save(mask, mask_path)
375 | 
376 | Okay, now I have everything I need: the paths to the images and to the mask.
377 | 
378 | .. code-block:: python
379 | 
380 |     from pyrelimri import brain_icc
381 | 
382 |     brain_icc_msc = brain_icc.voxelwise_icc(multisession_list = [sess1_paths, sess2_paths ],
383 |                                             mask=mask_path, icc_type='icc_1')
384 | 
385 | Since the dictionary is saved within the environment, you should see the dictionary with five items. On my mac (i9, 16GM mem),
386 | it took ~4minutes to run this and get the results. Time will depend on the size of data and your machine. \
387 | 
388 | You can plot the volumes using your favorite plotting method in Python. For this example. Figure 2A shows the three \
389 | 3D volumes for ICC, 95% upper bound and 95% lower bound. Then, Figure 2B shows the two different variance components, \
390 | mean squared between subject (msbs) and mean squared within subject (msws) variance. Note, depending on the map will \
391 | determine the thresholding you may want to use. Some voxels will have quite high variability so here the example is thresholded \
392 | +2/-2. Alternatively, you can standardize the values within the image before plotting to avoid issues with outliers.
393 | 
394 | .. figure:: img_png/example_voxelwiseicc.png
395 |    :align: center
396 |    :alt: Figure 2: Information about the ICC (A) and different variance components (B) for ten subjects.
397 |    :figclass: align-center
398 | 
399 | As before, you can save out the images using nibabel to a directory. Here I will save it to where the images are stored:
400 | 
401 | .. code-block:: python
402 | 
403 |     import nibabel as nib
404 |     nib.save(brain_icc_msc["est"], os.path.join('output_dir', 'MSC-LHandbeta_estimate-icc.nii.gz'))
405 |     nib.save(brain_icc_msc["btwnsub"], os.path.join('output_dir', 'MSC-LHandbeta_estimate-iccbtwnsub.nii.gz'))
406 | 
407 | 
408 | **roi_icc**
409 | 
410 | Similar to the steps described for `voxelwise_icc` above, the ``brain_icc`` module includes the option to calculate \
411 | ICC values based on a pre-specified probablistic or determistic Nilearn Atlas. As mentioned elsewhere, the atlases \
412 | are described on `Nilearn datasets webpage <https://nilearn.github.io/dev/modules/datasets.html>`_.
413 | 
414 | The Determistic atlas options (visual ex. Figure 3):
415 | 
416 |     - AAL, Destrieux 2009, Harvard-Oxford, Juelich, Pauli 2017, Shaefer 2018, Talairach
417 | 
418 | .. figure:: img_png/atlases_ex-deterministic.jpg
419 |    :align: center
420 |    :alt: Figure 3: MNI Display of Nilearn's Determinist Atlases (Example).
421 |    :figclass: align-center
422 | 
423 | The Probabilistic atlas options (visual ex. Figure 4):
424 | 
425 |     - Difumo, Harvard-Oxford, Juelich and Pauli 2017
426 | 
427 | .. figure:: img_png/atlases_ex-probabilistic.jpg
428 |    :align: center
429 |    :alt: Figure 4: MNI Display of Nilearn's Probabilistic Atlases (Example).
430 |    :figclass: align-center
431 | 
432 | Using the same MSC Neurovault data from above, the method to calculate ROI based ICCs is nearly identical to voxelwise_icc() \
433 | with a few exceptions. First, since I am masking the data by ROIs (e.g., atlas), a mask is not necessary. Second, since \
434 | the atlas and data may be in different affine space, to preserve the boundaries of ROIs the deterministic atlases as resampled \
435 | to the atlas (e.g., NiftiLabelsMasker(... resampling_target = 'labels')). However, as the boundaries are less clear for probabilistic atlases and \
436 | the compute time is decreased, the atlas is resampled to the data (e.g. in NiftiMapssMasker(... \
437 | resampling_target = 'data'). Third, the resulting dictionary will contain 11 variables:
438 | 
439 |     - Atlas ROI Labels ('roi_labels'): This contains the order of labels (e.g., pulled from atlas.labels)
440 |     - ICC estimates ('est'): 1D array that contains ICCs estimated for N ROIs in atlas (atlas.maps[1:] to skip background).
441 |     - ICC lower bound (lb) 95% CI ('lowbound'): 1D array that contains lb ICCs estimated for N ROIs in atlas.
442 |     - ICC upper bound (up) 95% CI ('upbound'): 1D array that contains ub ICCs estimated for N ROIs in atlas.
443 |     - Between Subject Variance ('btwnsub'): 1D array that contains between subject variance estimated for N ROIs in atlas.
444 |     - Within Subject Variance ('wthnsub'): 1D array that contains within subject variance estimated for N ROIs in atlas.
445 |     - Between Measure Variance ('btwnmeas'): 1D array that contains between measure variance estimated for N ROIs in atlas (ICC[2,1] only, otherwise filled None)
446 |     - ICC estimates transformed back to space of ROI mask ('est_3d'): Nifti 3D volume of ICC estimates
447 |     - ICC lower bound 95% CI transformed back to space of ROI mask ('lowbound_3d'): Nifti 3D volume of lb ICC estimates
448 |     - ICC upper bound 95% CI transformed back to space of ROI mask ('upbound_3d'): Nifti 3D volume of up ICC estimates
449 |     - Between Subject Variance transformed back to space of ROI mask ('btwnsub_3d'): Nifti 3D volume of between subject variance estimates
450 |     - Within Subject Variance transformed back to space of ROI mask ('wthnsub_3d'): Nifti 3D volume of within subject variance estimates
451 |     - Between Measure Variance transformed back to space of ROI mask ('btwnmeas_3d'):  Nifti 3D volume of between measure variance estimates
452 | 
453 | An important caveat: Probabilistic atlases are 4D volumes for N ROIs. This is because each voxel has an associated probability \
454 | that it belongs to ROI A and ROI B. Thus, ROIs may overlap and so the estimates (as in example below) will be more smooth.
455 | 
456 | Here is an example to run `roi_icc` using the MSC data loaded above for the deterministic Shaefer 400 ROIs atlas. We call the \
457 | `roi_icc` function within the `brain_icc` module, specify the multisession list of data, the atlas, defaults and/or requirements \
458 | the atlas requires (e.g., here, I specify n_rois = 400 which is the default), the directory where I want to save the atlas \
459 | (I chose '/tmp/' on Mac) and the icc type (similar as above, ICC[1])
460 | 
461 | .. code-block:: python
462 | 
463 |     from pyrelimri import brain_icc
464 | 
465 |     shaefer_icc_msc = brain_icc.roi_icc(multisession_list=[sess1_paths,sess2_paths],
466 |                                     type_atlas='shaefer_2018', n_rois = 400,
467 |                                     atlas_dir='/tmp/', icc_type='icc_1')
468 | 
469 | 
470 | This will run A LOT faster than the `voxelwise_icc` method as 'roi_icc' is reducing the voxel dimensions to ROI dimension (slower for probabilistic) and looping over \
471 | the length of ROIs in the atlas. So in many cases it is reducing 200,000 voxel calculations to 400 ROI calculations.
472 | 
473 | You can access the array of estimates and plot the Nifti image using:
474 | .. code-block:: python
475 | 
476 |     from nilearn import plotting
477 | 
478 |     # access estimates for ICC values
479 |     shaefer_icc_msc['est']
480 | 
481 |     # plot estimate nifti volume
482 |     plotting.plot_stat_map(stat_map_img=shaefer_icc_msc['est_3d'], title='ICC(1) Estimate')
483 | 
484 | Figure 5 is a visual example of `est_3d`, `lowerbound_3d`, `upperbound_3d`, `btwnsub_3d`, `wthnsub_3d`, 'btwnmeas_3d' for the 400 \
485 | ROI Shaefer atlas.
486 | 
487 | .. figure:: img_png/roiicc_ex-shaefer400.jpg
488 |    :align: center
489 |    :alt: Figure 5: Estimates from roi_icc for Shaefer 400 Atlas on MSC data.
490 |    :figclass: align-center
491 | 
492 | 
493 | I can do the same for a probabilistic atlas -- say the 256 ROI Difumo atlas.
494 | 
495 | .. code-block:: python
496 | 
497 |     from pyrelimri import brain_icc
498 | 
499 |     difumo_icc_msc = brain_icc.roi_icc(multisession_list=[sess1_paths,sess2_paths],
500 |                                     type_atlas='difumo', dimension = 256, # notice, 'dimension' is unqiue to this atlas
501 |                                     atlas_dir='/tmp/', icc_type='icc_1')
502 | 
503 | 
504 | 
505 | 
506 | Figure 6 contains the estimates from the Difumo 256 atlas. Again, since this is a probabilistic atlas each voxel has an \
507 | association probability belonging to each ROI and so there are not clear boundaries. The data will have slightly different \
508 | distributions and appear more smooth so interpreting the maps should be approached with this in mind.
509 | 
510 | .. figure:: img_png/est_difumo.png
511 |    :align: center
512 |    :alt: Figure 6: Estimates from roi_icc for Difumo Atlas on MSC data.
513 |    :figclass: align-center
514 | 
515 | 
516 | 
517 | conn_icc
518 | ---------
519 | 
520 | The `conn_icc` module is a wrapper for for the `icc` module. \
521 | In short, the `edgewise_icc` function, like `voxelwise_icc` within the `brain_icc` module, calculates the ICC for an NxN matrix \
522 | across subjects and sessions on a cell-by-cell basis (or edge-by-edge).
523 | 
524 | Here are the steps it uses:
525 | 
526 |     - Function takes list of subject a) paths to .npy, .txt, .csv correlation matrices or b) numpy arrays for each session, the number of columns in each matrix (e.g., ROI names), the list of column names (if not provided, populates as 1:len(number columes) and the ICC type to be calculated.
527 |     - Function checks the list names and number of columns match and confirms N per session is the same.
528 |     - If the list of lists are strings, the files are loaded based on .npy, .txt or .csv extensive with provided separator. If .csv pandas assumes header/index col = None (e.g. read_csv(matrix, sep=separator, header=None, index_col=False).values)
529 |     - Once loaded, only the lower triangle and diagonal are retained as a 1D numpy array.
530 |     - Function loops over each edge and creates a pandas DataFrame with the edge value for each subject and session used in  sumsq_icc().
531 |     - The function calculates and returns a dictionary with six NxN matrices: est, lower (lower_bound) and upper (upper_bound) of the ICC 95% confidence interval, and between subject, within subject and between measure variance from sumsq_icc().
532 |     - Note, the number of columns is used to reshape the data from the NxN matrix to lower triangle 1D array and back to NxN lower triangle matrix.
533 | 
534 | **edgewise_icc**
535 | 
536 | As mentioned above, the `edgewise_icc` estimates ICC components for each edge in NxN matrix. \
537 | To use the `edgewise_icc` function, you have to provide the following information:
538 | 
539 |     - multisession_list: A list of listed paths to the .txt, .csv or .npy correlation matrices, or a list t-stat or beta maps for sess1, 2, 3, etc (or run 1,2,3..)
540 |     - n_cols: number of columns expected in the provided matrices int
541 |     - col_names: A list of column names for the matrices.
542 |     - separator: If providing strings to paths, the separator to use to open file (e.g., ',','\t')
543 |     - icc_type: The ICC estimate that will be calculated for each voxel. Options: `icc_1`, `icc_2`, `icc_3`. Default: `icc_3`
544 | 
545 | The function returns a dictionary with NxN matrix for:
546 | 
547 |     - ICC estimates ('est')
548 |     - ICC lowerbound 95% CI ('lowbound')
549 |     - ICC upperbound 95% CI ('upbound')
550 |     - Between Subject Variance ('btwnsub')
551 |     - Within Subject Variance ('wthnsub')
552 |     - Between Measure Variance ('btwnmeas')
553 | 
554 | So the resulting stored variable will be a dictionary, e.g. "icc_fcc_mat", from which you can access to view and plot matrices such \
555 | as the ICC estimates (icc_fcc_mat['est']) and/or within subject variance (icc_fcc_mat['wthnsub']).
556 | 
557 | Say I have stored paths to session 1 and session 2 in the following variables (Note: subjects in list have same order!):
558 | 
559 | .. code-block:: python
560 | 
561 | 
562 |     # session 1 paths
563 |     ses1_matrices = ["./scan1/sub-1_ses-1_task-fake.csv", "./scan1/sub-2_ses-1_task-fake.csv", "./scan1/sub-3_ses-1_task-fake.csv", "./scan1/sub-4_ses-1_task-fake.csv"]
564 |     ses2_matrices = ["./scan2/sub-1_ses-2_task-fake.csv", "./scan2/sub-2_ses-2_task-fake.csv", "./scan2/sub-3_ses-2_task-fake.csv", "./scan2/sub-4_ses-2_task-fake.csv"]
565 |     two_sess_matrices = [ses1_matrices, ses2_matrices]
566 | 
567 | Next, we can run the edgewise ICC function. Since `col_names` is not provided, it is populated with number 1 to `n_cols`.
568 | 
569 | .. code-block:: python
570 | 
571 | 
572 |     icc_fcc_mat = edgewise_icc(multisession_list=two_sess_matrices, n_cols = 96, icc_type='icc_3', separator=',')
573 | 
574 | 
575 | FAQ
576 | ---
577 | 
578 | * `Why was a manual sum of squares used for ICC?` \
579 | 
580 | The intraclass correlation can be calculated using the ANOVA or Hiearchical Linear Model (HLM). In practice, ANOVA or HLM \
581 | packages could have been used to extract some of the parameters. However, the manual calculation was used because it was \
582 | found to be the most efficient and transparent. In addition, several additional parameters are calculated in the ANOVA & \
583 | HLM packages that can cause warnings during the analyses. The goal was to make things more efficient (3x faster on average) \
584 | and alleviate warnings that may occur due to calculates in other packages for metrics that are not used. However, tests were used \
585 | to confirm ICC and between and within subject variance components were consistent across the `icc.py` and HLm method.
586 | 
587 | * `Is brain_icc module only limited to fMRI voxelwise data inputs?` \
588 | 
589 | In theory, the function voxelwise_icc in the brain_icc model can work on alternative data that is not voxelwise. For example, \
590 | if you have converted your voxelwise data into a parcellation (e.g., reducing it from ~100,000 voxels with a beta estimate \
591 | to 900 ROIs with an estimate) that is an .nii 3D volume, you can give this information to the function, too. It simply converts \
592 | and masks the 3D volumes, converts the 3D (x, y, z) to 1D (length = x*y*x) and iterates over each value. Furthermore, you can \
593 | also provide it with any other normalize 3D .nii inputs that have voxels (e.g., T1w).
594 | In cases where you have ROI mean-signal intensity values already extract per ROI, subject and session, you can use `sumsq_icc) \
595 | by looping over the ROIs treating the each ROI for the subjects and session as it's own dataset (similar to ICC() in R or pinguin ICC \
596 | in python.
597 | In future iterations of the `PyReliMRI` package the option of running ICCs for 1 of the 18 specified \
598 | `Nilearn Atlases <https://nilearn.github.io/dev/modules/datasets.html>`_
599 | 
600 | * `How many sessions can I use with this package?` \
601 | 
602 | In theory, you can use add into `multisession_list = [sess1, sess2, sess3, sess4, sess5]` any wide range of values.
603 | As the code is currently written this will restructure and label the sessions accordingly. The key aspect \
604 | is that subjects and runs are in the order that is required. We cannot assume for the files the naming structure. \
605 | The function is flexible to inputs of 3D nifti images and will not assume to naming rules of the files. As a result, the \
606 | order for subjects in session 1 = [1, 2, 3, 4, 5] must be the same in session 2 = [1, 2, 3, 4, 5]. If there are not, \
607 | the *resulting estimates will be incorrect*. They will be incorrect because across sessions you may enounter same/different \
608 | subjects instead of same-same across sessions.


--------------------------------------------------------------------------------
/docs/img_png/atlases_ex-deterministic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/atlases_ex-deterministic.jpg


--------------------------------------------------------------------------------
/docs/img_png/atlases_ex-probabilistic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/atlases_ex-probabilistic.jpg


--------------------------------------------------------------------------------
/docs/img_png/brainicc_fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/brainicc_fig.png


--------------------------------------------------------------------------------
/docs/img_png/brainicc_fig.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/brainicc_fig.psd


--------------------------------------------------------------------------------
/docs/img_png/est_difumo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/est_difumo.png


--------------------------------------------------------------------------------
/docs/img_png/example_voxelwiseicc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/example_voxelwiseicc.png


--------------------------------------------------------------------------------
/docs/img_png/fake_cuelocktimes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/fake_cuelocktimes.png


--------------------------------------------------------------------------------
/docs/img_png/fake_events-timeseries.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/fake_events-timeseries.png


--------------------------------------------------------------------------------
/docs/img_png/fake_events.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/fake_events.png


--------------------------------------------------------------------------------
/docs/img_png/fake_timeseries.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/fake_timeseries.png


--------------------------------------------------------------------------------
/docs/img_png/hcp_handfoot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/hcp_handfoot.png


--------------------------------------------------------------------------------
/docs/img_png/intraclasscorr_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/intraclasscorr_example.png


--------------------------------------------------------------------------------
/docs/img_png/maskedtimeseries_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/maskedtimeseries_example.png


--------------------------------------------------------------------------------
/docs/img_png/maskedtimeseries_example.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/maskedtimeseries_example.psd


--------------------------------------------------------------------------------
/docs/img_png/n3_nacctimeseries.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/n3_nacctimeseries.png


--------------------------------------------------------------------------------
/docs/img_png/pyrelimri_fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/pyrelimri_fig.png


--------------------------------------------------------------------------------
/docs/img_png/pyrelimri_fig.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/pyrelimri_fig.psd


--------------------------------------------------------------------------------
/docs/img_png/pyrelimri_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/pyrelimri_logo.png


--------------------------------------------------------------------------------
/docs/img_png/pyrelimri_logo.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/pyrelimri_logo.psd


--------------------------------------------------------------------------------
/docs/img_png/roiicc_ex-shaefer400.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/roiicc_ex-shaefer400.jpg


--------------------------------------------------------------------------------
/docs/img_png/roiicc_fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/roiicc_fig.png


--------------------------------------------------------------------------------
/docs/img_png/roiicc_fig.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/roiicc_fig.psd


--------------------------------------------------------------------------------
/docs/img_png/similarity_fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/similarity_fig.png


--------------------------------------------------------------------------------
/docs/img_png/similarity_fig.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/similarity_fig.psd


--------------------------------------------------------------------------------
/docs/img_png/trlock-gainsneutrals_mid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/trlock-gainsneutrals_mid.png


--------------------------------------------------------------------------------
/docs/img_png/voxelwise_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/docs/img_png/voxelwise_example.png


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. Python-based Reliability in MRI (PyReliMRI) documentation master file, created by
 2 |    sphinx-quickstart on Wed Mar 22 14:27:57 2023.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to PyReliMRI's documentation!
 7 | =======================================================================
 8 | .. figure:: img_png/pyrelimri_logo.png
 9 |    :align: center
10 |    :figclass: align-center
11 |    :scale: 40%
12 | 
13 | Python-based Reliability in MRI (PyReliMRI) is an open-source Python tool to calculate multiple group- and individual-level reliability metrics. This package is designed for researchers using MRI data to easily report reliability estimates in their manuscripts, particularly for multi-run and/or multi-session data.
14 | 
15 | Several packages exist to address different aspects covered in this package. Specifically, tools are available for calculating similarity coefficients, intraclass correlations (e.g., 3dICC in AFNI), or both (e.g., in Python-based `nipype <https://nipype.readthedocs.io/en/latest/>`_ or Matlab's `fmreli <https://github.com/nkroemer/reliability>`_). Alternatively, Ting Xu offers a `Shiny App <https://tingsterx.shinyapps.io/ReliabilityExplorer/>`_ for calculating univariate and multivariate ICCs from .csv data. However, some flexibility may be limited or certain features unavailable when working with preprocessed MRI data. For example, `ICC_rep_anova` is restricted to ICC(3,1), and `fmreli` requires a Matlab license and does not support tetrachoric correlation, pairwise comparisons across images, or atlas-based reliability estimates.
16 | 
17 | Our goal is to integrate various functions (see Figure 1) into a single package that can be easily downloaded and imported into Python for universal use.
18 | 
19 | .. figure:: img_png/pyrelimri_fig.png
20 |    :align: center
21 |    :alt: Figure 1: Available Features within PyReliMRI.
22 |    :figclass: align-center
23 | 
24 |    Figure 1. Functions within the PyReliMRI Library
25 | 
26 | 
27 | PyReliMRI
28 | ====================================
29 | 
30 | .. toctree::
31 |    :maxdepth: 2
32 |    :caption: Introduction:
33 | 
34 |    introduction.rst
35 | 
36 | Module Documentation
37 | ====================================
38 | 
39 | .. toctree::
40 |    :maxdepth: 2
41 |    :caption: Usage:
42 | 
43 |    install.rst
44 |    usage.rst
45 | 
46 | Usage Examples
47 | ====================
48 | 
49 | .. toctree::
50 |    :maxdepth: 2
51 |    :caption: Examples:
52 | 
53 |    examples.rst
54 |    similarity_funcs.rst
55 |    icc_funcs.rst
56 |    timeseries_extract.rst
57 | 
58 | Citing PyReliMRI
59 | -------------------
60 | 
61 |   Demidenko, M., Mumford, J., & Poldrack, R. (2024). PyReliMRI: An Open-source Python tool for Estimates of Reliability in MRI Data (2.1.0) [Computer software]. Zenodo. https://doi.org/10.5281/zenodo.12522260
62 | 
63 | Indices and tables
64 | ==================
65 | 
66 | * :ref:`genindex`
67 | * :ref:`modindex`
68 | * :ref:`search`
69 | 


--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
 1 | PyReliMRI Installation
 2 | -----------------------
 3 | 
 4 | You can install the PyReliMRI package directly from your terminal using *pip install* for a (A) specific tagged release \
 5 | or (B) latest release that is on PyPI
 6 | 
 7 | .. code-block:: bash
 8 | 
 9 |    # [A] specific tagged release
10 |    pip install git+https://github.com/demidenm/PyReliMRI.git@2.0.0
11 |    # [B] latest release on PyPI (note: new versions appear quicker on git than PyPI)
12 |    pip install pyrelimri
13 |    # [C] Updating previous version
14 |    pip install --upgrade git+https://github.com/demidenm/PyReliMRI.git@2.1.0
15 | 
16 | 
17 | 
18 | If the installation is successful, you will see something along the lines of *Successfully installed PyReliMRI-2.0.0* into your terminal.
19 | 
20 | Once the package is installed, you can import the `pyrelimri` module using:
21 | 
22 | .. code-block:: python
23 | 
24 |    import pyrelimri
25 | 
26 | Alternatively, you can load a specific function from the available modules. For example, if you're only interested in \
27 | calculating the similarity between 3D Nifti images, you can load
28 | 
29 | .. code-block:: python
30 | 
31 |    from pyrelimri import similarity
32 | 
33 | Once the module is loaded, the functions within the module can be used. You can check with suffix '?' what input \
34 | is required to run the function.
35 | 
36 | 
37 | Required dependencies
38 | `````````````````````
39 | 
40 | While a number of calculations are performed manually, PyReliMRI depends on several packages that must be installed. \
41 | The effort here isn't to reinvent the wheel but integrate as many tools. While the versions below are not required \
42 | they were sufficient to run each script during testing.
43 | 
44 | -  Python>=>3.6
45 | -  numpy>=1.2
46 | -  scipy=>1.9
47 | -  pandas=>1.4
48 | -  nilearn=>0.9
49 | -  nibabel=>4.0.2
50 | -  sklearn=>1.0.2
51 | 


--------------------------------------------------------------------------------
/docs/introduction.rst:
--------------------------------------------------------------------------------
 1 | Python-based Reliability in MRI (PyReliMRI)
 2 | ============================================
 3 | 
 4 | .. image:: https://github.com/demidenm/PyReliMRI/actions/workflows/python-package-conda.yml/badge.svg
 5 |    :target: https://github.com/demidenm/PyReliMRI/actions/workflows/python-package-conda.yml
 6 | 
 7 | PyReliMRI is a Python package designed to address the increasing interest for reliability assessment in MRI research, \
 8 | particularly in `task fMRI <https://www.doi.org/10.1177/0956797620916786>`_ and `resting state fMRI <https://www.doi.org/10.1016/j.neuroimage.2019.116157>`_. \
 9 | Researchers use various methods to calculate reliability, but there is a lack of open-source tools that integrate \
10 | multiple metrics for both individual and group analyses.
11 | 
12 | Purpose of PyReliMRI
13 | ---------------------
14 | 
15 | PyReliMRI (pronounced: Pi-Rely-MRI) aims to fill the gap by providing an open-source Python package for estimating \
16 | multiple reliability metrics on fMRI (or MRI) data in standard space. It supports analysis at both the group and \
17 | individual levels, facilitating comprehensive reporting in multi-run and/or multi-session MRI studies. \
18 | Even with single-session and single-run data, PyReliMRI remains useful. For example:
19 | 
20 | - Assessing reliability or similarity metrics on individual files by splitting the run and modeling them separately.
21 | - Using group-level maps (e.g., from neurovault or across studies) to compute various similarity metrics.
22 | 
23 | Modules Overview
24 | -----------------
25 | 
26 | PyReliMRI comprises several modules tailored to different use cases:
27 | 
28 | - **`icc`**: Computes various components used in ICC calculations, including ICC(1), ICC(2,1), or ICC(3,1), confidence intervals, between-subject variance, and within-subject variance.
29 | - **`brain_icc`**: Calculates voxelwise and ROI-based ICCs across multiple sessions, integrating with `Nilearn datasets <https://nilearn.github.io/dev/modules/datasets.html>`_ for atlas options.
30 | - **`conn_icc`**: Estimates ICC for precomputed correlation matrices, useful for connectivity studies.
31 | - **`similarity`**: Computes similarity coefficients (Dice, Jaccard, tetrachoric, Spearman) between 3D Nifti images, including pairwise comparisons across multiple images.
32 | - **`tetrachoric_correlation`**: Calculates tetrachoric correlation between binary vectors.
33 | - **`masked_timeseries`**: Extracts and processes timeseries data from BOLD image paths, facilitating ROI-based analysis and event-locked responses.
34 | 
35 | Each module is designed to answer specific questions about data reliability, supporting a range of MRI analyses in standard spaces like MNI or Talairach.
36 | 
37 | Citation
38 | ---------
39 | 
40 | If you use PyReliMRI in your research, please cite it using the following Zenodo DOI:
41 | 
42 |     Demidenko, M., Mumford, J., & Poldrack, R. (2024). PyReliMRI: An Open-source Python tool for Estimates of Reliability in MRI Data (2.1.0) [Computer software]. Zenodo. https://doi.org/10.5281/zenodo.12522260
43 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | sphinx
 2 | sphinx_rtd_theme
 3 | numpy
 4 | pandas
 5 | pytest
 6 | nilearn
 7 | nibabel
 8 | scipy
 9 | seaborn
10 | scikit-learn
11 | hypothesis
12 | matplotlib
13 | joblib
14 | statsmodels


--------------------------------------------------------------------------------
/docs/similarity_funcs.rst:
--------------------------------------------------------------------------------
  1 | Similarity Based Functions
  2 | ===========================
  3 | 
  4 | In this section, the similarity functions are reviewed with some examples for `similarity` module. The purpose of the \
  5 | `similarity` module is to calculate different types of similarity between \
  6 | two or more 3D NifTi images. The types of similarity estimates include the Dice Coefficient, Jaccard Coefficient, the \
  7 | Tetrachoric or Spearman Rank correlation coefficients.
  8 | 
  9 | The formulas for each are as follows:
 10 | 
 11 | .. math:: \text{Dice Similarity Coefficient} = \frac{2|A \cap B|}{|A| + |B|}
 12 | 
 13 | .. math:: \text{Jaccard Similarity Coefficient} = \frac{|A \cap B|}{|A \cup B|}
 14 | 
 15 | .. math:: \text{Tetrachoric Correlation} = \cos\left(\frac{\pi}{1+\sqrt{\frac{AD}{BC}}}\right)
 16 | 
 17 | .. math:: \text{Spearman Rank Correlation} =  1 - \frac{6 \sum d_i^2}{n(n^2 - 1)}
 18 | 
 19 | 
 20 | image_similarity
 21 | ----------------
 22 | 
 23 | The NifTi images that are used with this function can be from SPM, FSL, AFNI or Nilearn *preprocessed* outputs. The two requirements \
 24 | the user has to confirm are:
 25 | 
 26 | * Image Shape: The two images being compared have to be of the same shape. If the images are of different length the the comparison of the volumes will be wrong. The package will throw an error if the images are of the wrong shape.
 27 | * Normal/Coregistration: The images should be in standard space for appropriate comparison. While the above error should arise, it will not check whether the images are in standard space.
 28 | 
 29 | You can check the shape of the data using the following. However, standard space has to be confirmed via your preprocessing pipeline.
 30 | .. code-block:: python
 31 | 
 32 |     from nilearn import image
 33 |     # checking image shape
 34 |     img1 = image.load_img("/path/dir/path_to_img.nii.gz")
 35 |     img1.shape
 36 | 
 37 | If your data does meet the requirements, you can easily use `image_similarity()`. The requirements for the function \
 38 | are:
 39 | 
 40 | * `imgfile1`: this is the string for the path to the first image (e.g., /path/dir/path_to_img1.nii.gz)
 41 | * `imgfile2` this is the string for the path to the first image (e.g., /path/dir/path_to_img2.nii.gz)
 42 | * `mask`: The mask is optional, but it is the path to the mask (e.g., /path/dir/path_to_img_mask.nii.gz)
 43 | * `thresh`: The threshold is optional but highly recommended for Dice/Jaccard/Tetrachoric, the similarity between unthresholded binary images will usually be one (unless they were thresholded before). Base the threshold on your input image type. For example a beta.nii.gz/cope.nii.gz file would be thresholded using a different interger than zstat.nii.gz
 44 | * `similarity_type`: This the similarity calculation you want returned. The options are: 'dice', 'jaccard', 'tetrachoric' or 'spearman'
 45 | 
 46 | 
 47 | Let's say I want to fetch some data off of neurovault and calculate the similarity between two images. For this example \
 48 | I will use the `HCP task group activation maps <https://neurovault.org/collections/457/>`_. Instead of manually downloading and adding paths, \
 49 | you can use nilearn to fetch these maps.
 50 | 
 51 | .. code-block:: python
 52 | 
 53 |     from nilearn.datasets import fetch_neurovault_ids
 54 |     # Fetch hand and foot left motor map IDs
 55 |     L_foot_map = fetch_neurovault_ids(image_ids=[3156])
 56 |     L_hand_map = fetch_neurovault_ids(image_ids=[3158])
 57 | 
 58 | You can look at the images to see the activation maps for each image:
 59 | 
 60 | .. figure:: img_png/hcp_handfoot.png
 61 |    :align: center
 62 |    :alt: Figure 1: HCP Left Hand (A) and Left Foot (B) Activation maps.
 63 |    :figclass: align-center
 64 | 
 65 | Now that the data are have in the environment, load the similarity package from `pyrelimri` and calculate the jaccard similarity coefficient  \
 66 | and tetrachoric correlation between the two images.
 67 | 
 68 | 
 69 | .. code-block:: python
 70 | 
 71 |     from pyrelimri import similarity
 72 | 
 73 |     # calculate jaccard coefficient
 74 |     similarity.image_similarity(imgfile1=L_hand_map.images[0], imgfile2=L_foot_map.images[0], thresh = 1.5, similarity_type = 'jaccard')
 75 |     similarity.image_similarity(imgfile1=L_hand_map.images[0], imgfile2=L_foot_map.images[0], thresh = 1.5, similarity_type = 'tetrachoric')
 76 | 
 77 | 
 78 | The Jaccard coefficient is 0.18 and the Tetrachoric similarity is .776.
 79 | 
 80 | Try changing the threshold to 1.0. What would happen? In this instance the correlation will decrease and the Jaccard Coefficient will increase. \
 81 | Why? This is, in part, explained by the decreased overlapping zeros between the binary images \
 82 | and the increased number of overlapping voxels in the Jaccard calculation.
 83 | 
 84 | 
 85 | pairwise_similarity
 86 | -------------------
 87 | 
 88 | The `pairwise_similarity()` function is a wrapper for the `image_similarity()` function \
 89 | within the similarity module. It accepts similar values, except this time instead of `imgfile1` and `imgfile2` \
 90 | the function takes in a list of paths to NifTi images. The inputs to the `pairwise_similarity()` function are:
 91 | 
 92 | * `nii_filelist`: A list of Nifti files, (e.g., ["/path/dir/path_to_img1.nii.gz", "/path/dir/path_to_img2.nii.gz", "/path/dir/path_to_img3.nii.gz")
 93 | * `mask`: The mask is optional but it is the path to a Nifti brain mask (e.g., /path/dir/path_to_img_mask.nii.gz)
 94 | * `thresh`: The threshold is optional but highly recommended for binary estimates. The similarity between unthresholded binary images is usually one (unless they were thresholded before). Base the threshold on your input image type. For example a beta.nii.gz/cope.nii.gz file would be thresholded using a different interger than zstat.nii.gz
 95 | * `similarity_type`: This the similarity estimate between images that you want returned. The options are: 'dice', 'jaccard', 'tetrachoric' or 'spearman'
 96 | 
 97 | Using the HCP example from above, add two more images into the mix.
 98 | 
 99 | .. code-block:: python
100 | 
101 |     from nilearn.datasets import fetch_neurovault_ids
102 |     # Fetch hand and foot left motor map IDs
103 |     L_foot_map = fetch_neurovault_ids(image_ids=[3156])
104 |     L_hand_map = fetch_neurovault_ids(image_ids=[3158])
105 |     R_foot_map = fetch_neurovault_ids(image_ids=[3160])
106 |     R_hand_map = fetch_neurovault_ids(image_ids=[3162])
107 | 
108 | 
109 | I wont plot these images, but for reference there are now four image paths: `L_hand_map.images[0]`, `L_foot_map.images[0]`, \
110 | `R_hand_map.images[0]`, `R_foot_map.images[0]`. Now I can try to run the `pairwise_similarity()` function:
111 | 
112 | 
113 | .. code-block:: python
114 | 
115 |     # If you hadn't, import the package
116 |     from pyrelimri import similarity
117 |     similarity.pairwise_similarity(nii_filelist=[L_foot_map.images[0],L_hand_map.images[0],
118 |                               R_foot_map.images[0],R_hand_map.images[0]],thresh=1.5, similarity_type='jaccard')
119 | 
120 | As noted previously, the permutations are across the image combinations. This will return a pandas Dataframe. Such as:
121 | 
122 | +------+-----------------------+-------------------------------------------+
123 | |      | similar_coef          | image_labels                              |
124 | +======+=======================+===========================================+
125 | | 0    | 0.18380588591461908   | image_3156.nii.gz ~ image_3158.nii.gz     |
126 | +------+-----------------------+-------------------------------------------+
127 | | 1    | 0.681449273874364     | image_3156.nii.gz ~ image_3160.nii.gz     |
128 | +------+-----------------------+-------------------------------------------+
129 | | 2    | 0.3912509226509201    | image_3156.nii.gz ~ image_3162.nii.gz     |
130 | +------+-----------------------+-------------------------------------------+
131 | | 3    | 0.18500433729643165   | image_3158.nii.gz ~ image_3160.nii.gz     |
132 | +------+-----------------------+-------------------------------------------+
133 | | 4    | 0.2340488091737724    | image_3158.nii.gz ~ image_3162.nii.gz     |
134 | +------+-----------------------+-------------------------------------------+
135 | | 5    | 0.41910546659304254   | image_3160.nii.gz ~ image_3162.nii.gz     |
136 | +------+-----------------------+-------------------------------------------+
137 | 
138 | 
139 | FAQ
140 | ---
141 | 
142 | * `Can I use these function on output from FSL, AFNI or SPM?` \
143 | 
144 | Yes, you can use these functions on any NifTi data that are of the same shape and in the same space. \
145 | You just need the paths to the locations of the .nii or .nii.gz files for the contrast beta, t-stat or z-stat maps.
146 | 
147 | * `Are there restrictions on which data I should or shouldn't calculate similarity between?` \
148 | 
149 | It all depends on the question. You can calculate similarity between group level maps or individual maps. \
150 | There are two things to keep in mind: Ensure the data is in the form that is expected and be cautious about the \
151 | thresholding that is used because a threshold of 2.3 on a t-stat.nii.gz may not be as restrictive on the group maps \
152 | as it is on the the individual maps. Note, the spearman estimate is intended to be on raw values and not thresholded values. \
153 | In this case, `thresh` should remain as default 'None'.
154 | 


--------------------------------------------------------------------------------
/docs/timeseries_extract.rst:
--------------------------------------------------------------------------------
  1 | TR-by-TR Cue-locked Timeseries
  2 | ================================
  3 | 
  4 | 
  5 | The `masked_timeseries` module extracts timeseries for region of interest (ROI) mask or ROI coordinates and generates TR-by-TR cue-locked timeseries. \
  6 | Below I cover three primary functions: extract_time_series extract_postcue_trs_for_conditions and plot_responses functions.
  7 | `extract_time_series` extracts the values from BOLD files, `extract_postcue_trs_for_conditions` aligns events timings to TRs ans generates TR-by-TR values \
  8 | and `plot_responses` uses the resulting values to generate a TR-by-TR figure.
  9 | 
 10 | **extract_time_series**
 11 | 
 12 | As mentioned above, the `extract_time_series` function extracts time series data from BOLD images for specified ROI or coordinates. \
 13 | This is achieve by either using NiftiLabelsMasker or nifti_spheres_masker from Nilearn to extract the timeseries and average the voxels within that ROI.
 14 | 
 15 | **extract_time_series**
 16 | 
 17 | As mentioned above, the `extract_time_series` function extracts time series data from BOLD images for specified regions of interest (ROI) or coordinates. The function uses either NiftiLabelsMasker or NiftiSpheresMasker from Nilearn to perform the extraction.
 18 | 
 19 | To use the `extract_time_series` function, you have to provide the following information:
 20 |     - bold_paths: A list of paths to the BOLD image files for each subject, run, and/or session.
 21 |     - roi_type: The type of ROI ('mask' or 'coords').
 22 |     - high_pass_sec: The high-pass filter cutoff in seconds (optional).
 23 |     - roi_mask: The path to the ROI mask file. If this is provided, the function will use NiftiLabelsMasker (required if roi_type is 'mask').
 24 |     - roi_coords: A tuple of coordinates (x, y, z) for the center of the sphere ROI (required if roi_type is 'coords').
 25 |     - radius_mm: The radius of the sphere in millimeters (required if roi_type is 'coords').
 26 |     - detrend: Whether to detrend the BOLD signal using Nilearn's detrend function (optional, default is False).
 27 |     - fwhm_smooth: The full-width at half-maximum (FWHM) value for Gaussian smoothing of the BOLD data (optional).
 28 |     - n_jobs: The number of CPUs to use for parallel processing (optional, default is 1). Depending on data size, at least 16GB per CPU is recommended.
 29 | 
 30 | The function returns:
 31 |     - If roi_type is 'mask':
 32 |         - List of numpy arrays containing the extracted time series data for each subject/run.
 33 |         - List of subject information strings formatted as 'sub-{sub_id}_run-{run_id}' reflecting the order of list of timeseries arrays.
 34 |     - If roi_type is 'coords':
 35 |         - List of numpy arrays containing the extracted time series data for each subject/run.
 36 |         - Nifti1Image coordinate mask that was used in the timeseries extraction.
 37 |         - List of subject information strings formatted as 'sub-{sub_id}_run-{run_id}' reflecting the order of list of timeseries arrays.
 38 | 
 39 | 
 40 | Utility
 41 | -------
 42 | 
 43 | Simply,  in fMRI, the goal is to fit a design matrix (comprised of task-relevant stimulus onsets & nuisance regressors), :math:`X\beta`, \
 44 | to timeseries data, :math:`Y`, for every given voxel or ROI. Plus some noise, :math:`\epsilon`. The model being:
 45 | 
 46 | .. math::
 47 | 
 48 |    Y = X \beta + \epsilon
 49 | 
 50 | A whole brain scan can can comprised of hundreds of thousands of voxels. However, we can use an ROI mask to get the average \
 51 | of voxels for a given mask for each volume (t). Any example of what that may look like from the AHRB study:
 52 | 
 53 | .. figure:: img_png/n3_nacctimeseries.png
 54 |    :align: center
 55 |    :alt: Figure 1: Extracted timeseries for Harvard-Oxford Left NAcc for Four Subjects.
 56 | 
 57 | In the timeseries, there is a lot of variability. It is also harder to see meaningful fluctuations because we 1) can't easily see \
 58 | when events occur and 2) the change in amplitude is often very small. See discussions in `Drew (2019) <https://www.sciencedirect.com/science/article/pii/S0959438818302575>`_ \
 59 | and `Drew (2022) <https://www.sciencedirect.com/science/article/pii/S0166223622001618>`_. Nevertheless, by plotting the data \
 60 | for a given ROI/voxel, you can observe the change. Below is an example of how locked the onset of two cues, `LargeGain` and `NoMoneyStake` \
 61 | to the TRs from the timeseries from a modified version of the MID task, a change *may* be observed.
 62 | 
 63 | .. figure:: img_png/trlock-gainsneutrals_mid.png
 64 |    :align: center
 65 |    :alt: Figure 2: TR-by-TR plots locked to Gains & Neutral Anticipation from MID for Left NAcc.
 66 | 
 67 | 
 68 | The purpose of the `masked_timeseries.py` module is to perform this sort of data extraction and plotting. Often, it is useful to \
 69 | dump out your timeseries signal to confirm that you are getting a response in visual and/or motor regions around the times they should occur. \
 70 | Second to the large effect regions, evaluating whether you are observed a response in a ROI specific to the process/stimulus of interest.
 71 | 
 72 | Example
 73 | -------
 74 | 
 75 | **dummy exampple**
 76 | Let's start off with a conceptual example. Say you have a time series for cue evoked response: Happy & Sad photos. You extract timeseries \
 77 | for a region that is more sensitive to `Happy` > `Sad` (whatever that may be...). In this region, these two cues would be differentiated by the signal. \
 78 | A prior, there is a thesis for how this may be. First, take a peak at the fake timeseries that includes both events.
 79 | 
 80 | .. figure:: img_png/fake_timeseries.png
 81 |    :align: center
 82 |    :alt: Figure 3: Fake Timeseries.
 83 | 
 84 | It is a bit challenging to make out which even is where. Let's add sticks at which time the events occur.
 85 | 
 86 | .. figure:: img_png/fake_events.png
 87 |    :align: center
 88 |    :alt: Figure 4: Fake Happy & Sad Task Events.
 89 | 
 90 | Now, let's combine the two to see how the fake BOLD response is delayed after each cue but follows some ordered structure. \
 91 | The trend of the data becomes more apparent.
 92 | 
 93 | .. figure:: img_png/fake_events-timeseries.png
 94 |    :align: center
 95 |    :alt: Figure 5: Fake Happy & Sad Event Sticks with Timeseries.
 96 | 
 97 | In this example, the the effect for `Happy` was intentionally made larger than `Sad`. So it is easier to visualize as it is made \
 98 | with Nilearn's SPM function (for code of this end-to-end example, see end of this doc). Nevertheless, we can extract the timeseries \
 99 | locked to these conditions to make it especially apparent:
100 | 
101 | .. figure:: img_png/fake_cuelocktimes.png
102 |    :align: center
103 |    :alt: Figure 6: Cue-locked TR-by-TR for Happy & Sad conditions.
104 | 
105 | 
106 | **masked_timeseries example**
107 | Now, I will review an example of how timeseries for an ROI can be extracted, locked to event files and plotted TR-by-TR using \
108 | the `masked_timeseries.py` module.
109 | 
110 | .. code-block:: python
111 | 
112 |     from pyrelimri import masked_timeseries
113 | 
114 |     n3_boldpaths = ["./sub-1-ses-1_task-kewl_run-01_bold.nii.gz", "./sub-2-ses-1_task-kewl_run-01_bold.nii.gz", "./sub-3-ses-1_task-kewl_run-01_bold.nii.gz"]
115 |     roi_mask_path = "./roi_mask.nii.gz"
116 | 
117 |     # mask versus coordinates example
118 |     timeser_mask_n3, id_order = masked_timeseries.extract_time_series(bold_paths=n3_boldpaths, roi_type='mask',
119 |                                                                       high_pass_sec=True, roi_mask=roi_mask_path,
120 |                                                                       detrend=True, fwhm_smooth=4, n_jobs=2)
121 |     # Extract timeseries using ROI coordinates with a radius of 6mm
122 |     # coordinates
123 |     coords = [(30, -22, -18), (50, 30, 40)]
124 |     timeser_coord_n3, roi_sphere, id_order = masked_timeseries.extract_time_series(masked_timeseries.extract_time_series(bold_paths=n3_boldpaths,
125 |                                                                                                                       roi_type='coords', high_pass_sec=True,
126 |                                                                                                                       roi_coords=coords, radius_mm=6,
127 |                                                                                                                    detrend=True, fwhm_smooth=4, n_jobs=2)
128 | 
129 | **extract_postcue_trs_for_conditions**
130 | 
131 | This function extracts the TR-by-TR cue-locked timeseries for different conditions at cue onset + TR delay.
132 | 
133 | To use the `extract_postcue_trs_for_conditions` function, you have to provide the following information:
134 |     - events_data: A list of paths to the behavioral data files. This should match the order of subjects/runs/tasks as the BOLD file list.
135 |     - onset: The name of the column containing onset values in the behavioral data.
136 |     - trial_name: The name of the column containing condition values in the behavioral data.
137 |     - bold_tr: The repetition time (TR) for the acquisition of BOLD data in seconds.
138 |     - bold_vols: The number of volumes for BOLD acquisition.
139 |     - time_series: The timeseries data extracted using the `extract_time_series` function.
140 |     - conditions: A list of conditions to extract the post-cue timeseries for.
141 |     - tr_delay: The number of TRs after onset of stimulus to extract and plot.
142 |     - list_trpaths: The list of subject information strings formatted as 'sub-{sub_id}_run-{run_id}'.
143 | 
144 | The function returns a pandas DataFrame containing mean signal intensity values, subject labels, trial labels, TR values, and cue labels for all specified conditions.
145 | 
146 | Example:
147 | 
148 | .. code-block:: python
149 | 
150 |     from pyrelimri import masked_timeseries
151 | 
152 |     # Paths to events files
153 |     events_data = ['/sub-1-ses-1_task-kewl_run-01_events.csv', './sub-2-ses-1_task-kewl_run-01_events.csv', './sub-3-ses-1_task-kewl_run-01_events.csv']
154 | 
155 |     # Onset column name
156 |     onset = 'onset'
157 | 
158 |     # Trial type column name for onset timees and conditions, and list of conditions to plot
159 |     trial_name = 'trial_type'
160 |     conditions = ['Happy', 'Sad']
161 | 
162 |     # TR delay, 0 + delay to create
163 |     tr_delay = 5
164 | 
165 |     # Extract post-cue timeseries for conditions. Notice, timeser_mask_n3 and id_order are from above example
166 |     out_df = masked_timeseries.extract_postcue_trs_for_conditions(
167 |         events_data=events_data, onset=onset, trial_name=trial_name, bold_tr=2.0, bold_vols=150,
168 |         time_series=timeser_mask_n3, conditions=conditions, tr_delay=12, list_trpaths=id_order
169 |     )
170 | 
171 | 
172 | 
173 | **plot_responses**
174 | 
175 | This function plots the average response for each condition using the post-cue timeseries.
176 | 
177 | To use the `plot_responses` function, you need to provide:
178 |     - postcue_timeseries_dict: The dictionary with post-cue timeseries for each condition.
179 |     - conditions: The list of conditions to plot.
180 |     - output_file: The path to save the plot image.
181 | 
182 | The function does not return any value, but it saves the plot to the specified output file.
183 | 
184 | Example:
185 | 
186 | .. code-block:: python
187 | 
188 |     # Path to save the plot image
189 |     output_file = "./responses_plot.png"
190 | 
191 |     # Plot average responses for conditions
192 |     masked_timeseries.plot_responses(postcue_timeseries_dict=out_df, conditions=conditions, output_file=output_file)
193 | 
194 | 
195 | This will generate and save a plot of the average response for each condition to the specified output file.
196 | 
197 | 
198 | 
199 | 
200 | 
201 | 
202 | 
203 | **Fake TR-by-TR code**
204 | 
205 | Defined a couple of functions. Some functions from `masked_timeseries.py` and some functions are based on Russ `Poldrack's MID simulations <https://github.com/poldrack/MID_simulations>`_
206 | 
207 | .. code-block:: python
208 | 
209 |     def extract_postcue_trs_for_conditions(events_data: list, onset: str, trial_name: str,
210 |                                        bold_tr: float, bold_vols: int, time_series: np.ndarray,
211 |                                        conditions: list, tr_delay: int, list_trpaths: list):
212 | 
213 |     dfs = []
214 |     id_list = []
215 |     # check array names first
216 |     for beh_path in events_data:
217 |         # create sub ID array to text again bold array
218 |         beh_name = os.path.basename(beh_path)
219 |         path_parts = beh_name.split('_')
220 |         sub_id, run_id = None, None
221 |         for val in path_parts:
222 |             if 'sub-' in val:
223 |                 sub_id = val.split('-')[1]
224 |             elif 'run-' in val:
225 |                 run_id = val.split('-')[1]
226 |         sub_info = 'sub-' + sub_id + '_' + 'run-' + run_id
227 |         id_list.append(sub_info)
228 | 
229 |     assert len(id_list) == len(list_trpaths), f"Length of behavioral files {len(id_list)} does not TR list {len(list_trpaths)}"
230 |     assert (np.array(id_list) == np.array(list_trpaths)).all(), "Mismatch in order of IDs between Beh/BOLD"
231 | 
232 |     for cue in conditions:
233 |         cue_dfs = [] # creating separate cue dfs to accomodate different number of trials for cue types
234 |         sub_n = 0
235 |         for index, beh_path in enumerate(events_data):
236 |             subset_df = trlocked_events(events_path=beh_path, onsets_column=onset,
237 |                                         trial_name=trial_name, bold_tr=bold_tr, bold_vols=bold_vols, separator='\t')
238 |             trial_type = subset_df[subset_df[trial_name] == cue]
239 |             out_trs_array = extract_time_series_values(behave_df=trial_type, time_series_array=time_series[index],
240 |                                                        delay=tr_delay)
241 |             sub_n = sub_n + 1  # subject is equated to every event file N, subj n = 1 to len(events_data)
242 | 
243 |             # nth trial, list of TRs
244 |             for n_trial, trs in enumerate(out_trs_array):
245 |                 num_delay = len(trs)  # Number of TRs for the current trial
246 |                 if num_delay != tr_delay:
247 |                     raise ValueError(f"Mismatch between tr_delay ({tr_delay}) and number of delay TRs ({num_delay})")
248 | 
249 |                 reshaped_array = np.array(trs).reshape(-1, 1)
250 |                 df = pd.DataFrame(reshaped_array, columns=['Mean_Signal'])
251 |                 df['Subject'] = sub_n
252 |                 df['Trial'] = n_trial + 1
253 |                 tr_values = np.arange(1, tr_delay + 1)
254 |                 df['TR'] = tr_values
255 |                 cue_values = [cue] * num_delay
256 |                 df['Cue'] = cue_values
257 |                 cue_dfs.append(df)
258 | 
259 |         dfs.append(pd.concat(cue_dfs, ignore_index=True))
260 | 
261 |     return pd.concat(dfs, ignore_index=True)
262 | 
263 | 
264 |     def plot_responses(df, tr: int, delay: int, style: str = 'white', save_path: str = None,
265 |                        show_plot: bool = True, ylim: tuple = (-1, 1)):
266 | 
267 |         plt.figure(figsize=(10, 8), dpi=300)
268 |         if style not in ['white', 'whitegrid']:
269 |             raise ValueError("Style should be white or whitegrid, provided:", style)
270 | 
271 |         sns.set(style=style, font='DejaVu Serif')
272 | 
273 |         sns.lineplot(x="TR", y="Mean_Signal", hue="Cue", style="Cue", palette="Set1",
274 |                      errorbar='se', err_style="band", err_kws={'alpha': 0.1}, n_boot=1000,
275 |                      legend="brief", data=df)
276 | 
277 |         if plt_hrf in ['spm','glover']:
278 |             if plt_hrf == 'spm':
279 |                 hrf = spm_hrf(tr=tr, oversampling=1, time_length=delay*2, onset=0)
280 |                 time_points = np.arange(1, delay + 1, 1)
281 |                 plt.plot(time_points, hrf, linewidth=2, linestyle='--',label='SPM HRF', color='black')
282 |             if plt_hrf == 'glover':
283 |                 hrf = glover_hrf(tr=tr, oversampling=1, time_length=delay*2, onset=0)
284 |                 time_points = np.arange(1, delay + 1, 1)
285 |                 plt.plot(time_points, hrf, linewidth=2, linestyle='--',label='Glover HRF', color='black')
286 | 
287 |         # Set labels and title
288 |         plt.xlabel(f'Seconds (TR: {tr} sec)')
289 |         plt.ylabel('Avg. Signal Change')
290 |         plt.ylim(ylim[0], ylim[1])
291 |         plt.xlim(0, delay)
292 |         plt.xticks(np.arange(0, delay, 1),
293 |                    [f'{round((i * tr)-tr, 1)}' for i in range(0, delay)],
294 |                    rotation=45)
295 | 
296 |         # Show legend
297 |         plt.legend(loc='upper right')
298 | 
299 |         # Check if save_path is provided
300 |         if save_path:
301 |             # Get the directory path from save_path
302 |             directory = os.path.dirname(save_path)
303 |             # Check if directory exists, if not, create it
304 |             if not os.path.exists(directory):
305 |                 os.makedirs(directory)
306 |             # Save plot
307 |             plt.savefig(save_path)
308 | 
309 |         # Show plot if show_plot is True
310 |         if not show_plot:
311 |             plt.close()
312 | 
313 | 
314 |     def make_stick_function(onsets, durations, length=.1, resolution=.1):
315 |     """
316 |     Create a stick function with onsets and durations
317 | 
318 |     Parameters
319 |     ----------
320 |     onsets : list
321 |         List of onset times
322 |     durations : list
323 |         List of duration times
324 |     length : float
325 |         Length of the stick function (in seconds)
326 |     resolution : float
327 |         Resolution of the stick function (in seconds)
328 |         0.1 secs by default
329 | 
330 |     Returns
331 |     -------
332 |     sf : np.array
333 |         Timepoints of the stick function
334 |     """
335 |     timepoints = np.arange(0, length, resolution)
336 |     df = np.zeros_like(timepoints)
337 |     for onset, duration in zip(onsets, durations):
338 |         df[(timepoints >= onset) & (timepoints < onset + duration)] = 1
339 |     sf_df = pd.DataFrame({'impulse': df})
340 |     sf_df.index = timepoints
341 |     return sf_df
342 | 
343 |     def generate_data(desmtx_conv, beta_dict, noise_sd=.005, beta_sub_sd=.005):
344 |         """
345 |         Generate data based on the design matrix and beta values
346 | 
347 |         Parameters
348 |         ----------
349 | 
350 |         desmtx_conv : pd.DataFrame
351 |             Design matrix with convolved regressors
352 |         beta_dict : dict
353 |             Dictionary of beta values for each regressor of interest
354 |         noise_sd : float
355 |             Standard deviation of the noise
356 |         beta_sub_sd : float
357 |             Standard deviation of the betas across subjects
358 |         """
359 |         # check the beta dict
360 |         betas = np.zeros(desmtx_conv.shape[1])
361 |         for key in beta_dict.keys():
362 |             assert key in desmtx_conv.columns, f'{key} not in desmtx'
363 |         betas = np.array([beta_dict[key] if key in beta_dict.keys() else 0 for key in desmtx_conv.columns ], dtype='float32')
364 |         if beta_sub_sd > 0:
365 |             betas += np.random.normal(0, beta_sub_sd, betas.shape)
366 | 
367 |         data = np.dot(desmtx_conv.values, betas) + np.random.normal(0, noise_sd, desmtx_conv.shape[0])
368 |         data_df = pd.DataFrame({'data': data})
369 |         data_df.index = desmtx_conv.index
370 |         return data_df
371 | 
372 |     def create_conv_mat(eventsdf, tr_dur=None, acq_dur=None):
373 |         vol_time = acq_dur
374 |         tr = tr_dur
375 |         design_mat = make_first_level_design_matrix(
376 |             frame_times=np.linspace(0, vol_time, int(vol_time/tr)),
377 |             events=eventsdf, hrf_model='spm',
378 |             drift_model=None, high_pass=None)
379 |         return design_mat
380 | 
381 | 
382 | Create a fake events file, convolve them using Nilearn's function and generate a timeseries.
383 | 
384 | .. code-block:: python
385 | 
386 |     task_time = 180
387 |     onsets = np.arange(0, task_time, 10)
388 | 
389 |     np.random.seed(11)
390 |     dur_opts = [1.5, 2, 2.5]
391 |     prob_durs = [.50, .25, .25]
392 |     durations = np.random.choice(dur_opts, size=len(onsets), p=prob_durs)
393 | 
394 |     trial_types = ["Happy" if i % 2 == 0 else "Sad" for i in range(len(onsets))]
395 | 
396 |     events_df = pd.DataFrame({
397 |         "onset": onsets,
398 |         "duration": durations,
399 |         "trial_type": trial_types
400 |     })
401 | 
402 |     conv_vals = create_conv_mat(eventsdf=events_df, tr_dur=tr, acq_dur=task_time)
403 | 
404 |     beta_dict = {'Happy': 1, 'Sad': .6}
405 |     data_fake = generate_data(conv_vals[['Happy','Sad']], beta_dict)
406 | 
407 | 
408 | Plot the 1) fake timeseries and 2) timeseries and events combined
409 | 
410 | .. code-block:: python
411 | 
412 |     plt.figure(figsize=(14, 4))
413 |     plt.plot(data_fake, color='black', linewidth=3)
414 |     plt.ylim(0,.5)
415 |     plt.xticks(fontsize=20)
416 |     plt.yticks(fontsize=20)
417 |     plt.legend(fontsize=20)
418 | 
419 | .. code-block:: python
420 | 
421 |     plt.figure(figsize=(14, 4))
422 |     plt.plot(data_fake, color='black')
423 |     # sticks at each onset
424 |     for onset in events_df[events_df['trial_type'] == 'Happy']['onset']:
425 |         plt.vlines(onset, ymin=0, ymax=conv_vals.values.max(), color='#1f77b4', linestyle='--',
426 |                    linewidth=3, label='MakinIt ~ 1')
427 | 
428 |     for onset in events_df[events_df['trial_type'] == 'Sad']['onset']:
429 |         plt.vlines(onset, ymin=0, ymax=conv_vals.values.max(), color='#ff7f0e', linestyle='--',
430 |                    linewidth=3, label='FakinIt ~ .6')
431 | 
432 |     # only getting main legends
433 |     handles, labels = plt.gca().get_legend_handles_labels()
434 |     by_label = dict(zip(labels, handles))
435 |     plt.legend(by_label.values(), by_label.keys(), fontsize=20)
436 |     plt.ylim(0,.5)
437 |     plt.xticks(fontsize=20)
438 |     plt.yticks(fontsize=20)
439 | 
440 |     plt.show()
441 | 
442 | 
443 | First, reshape file from volumes (225, 1) --> into the shape that is expected: number of subjects, volumes, 1 (1, 225, 1). \
444 | Save the fake events file path for this one fake subject and use it in the function from `masked_timeseries.py`.
445 | 
446 | .. code-block:: python
447 | 
448 |     timeseries_reshaped = np.reshape(data_fake, (1, len(data_fake), 1))
449 |     events_df.to_csv('/tmp/sub-01_run-01_test-events.csv',sep = '\t')
450 |     events_file = ['/tmp/sub-01_run-01_test-events.csv']
451 | 
452 |     events_file = ['/tmp/sub-01_run-01_test-events.csv']
453 |     conditions=['Happy','Sad']
454 |     trdelay=int(15/tr)
455 |     df = extract_postcue_trs_for_conditions(events_data=events_file, onset='onset', trial_name='trial_type',
456 |                                            bold_tr=tr, bold_vols=len(timeseries_reshaped[0]), time_series=timeseries_reshaped,
457 |                                            conditions=conditions, tr_delay=trdelay,list_trpaths=['sub-01_run-01'])
458 |     plot_responses(df=df,tr=tr, delay=trdelay, save_path=None,style='whitegrid',
459 |                        show_plot=True, ylim=(-.05, .5))
460 | 


--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
  1 | ===============
  2 | PyReliMRI Usage
  3 | ===============
  4 | 
  5 | The `pyrelimri` package contains multiple modules for calculating image reliability measures.
  6 | 
  7 | brain_icc
  8 | ---------
  9 | 
 10 | From `pyrelimri`, the `brain_icc` module contains functions for voxelwise and atlas-based intraclass correlation estimates on 3D volumes.
 11 | 
 12 | voxelwise_icc
 13 | ~~~~~~~~~~~~~
 14 | 
 15 | .. autofunction:: pyrelimri.brain_icc.voxelwise_icc
 16 | 
 17 | roi_icc
 18 | ~~~~~~~
 19 | 
 20 | .. autofunction:: pyrelimri.brain_icc.roi_icc
 21 | 
 22 | .. figure:: img_png/brainicc_fig.png
 23 |    :align: center
 24 |    :alt: Voxelwise Intraclass Correlation
 25 |    :figclass: align-center
 26 | 
 27 |    Figure 1. Voxelwise Intraclass Correlation
 28 | 
 29 | 
 30 | icc
 31 | ---
 32 | 
 33 | From `pyrelimri`, the `icc` module contains various functions related to intraclass correlation coefficient calculations.
 34 | 
 35 | sumsq_total
 36 | ~~~~~~~~~~~
 37 | 
 38 | .. autofunction:: pyrelimri.icc.sumsq_total
 39 | 
 40 | sumsq_within
 41 | ~~~~~~~~~~~~
 42 | 
 43 | .. autofunction:: pyrelimri.icc.sumsq_within
 44 | 
 45 | sumsq_btwn
 46 | ~~~~~~~~~~
 47 | 
 48 | .. autofunction:: pyrelimri.icc.sumsq_btwn
 49 | 
 50 | icc_confint
 51 | ~~~~~~~~~~~
 52 | 
 53 | .. autofunction:: pyrelimri.icc.icc_confint
 54 | 
 55 | sumsq_icc
 56 | ~~~~~~~~~
 57 | 
 58 | .. autofunction:: pyrelimri.icc.sumsq_icc
 59 | 
 60 | similarity
 61 | ----------
 62 | 
 63 | From `pyrelimri`, the `similarity` module contains functions to calculate image similarity coefficients.
 64 | 
 65 | image_similarity
 66 | ~~~~~~~~~~~~~~~~
 67 | 
 68 | .. autofunction:: pyrelimri.similarity.image_similarity
 69 | 
 70 | pairwise_similarity
 71 | ~~~~~~~~~~~~~~~~~~~
 72 | 
 73 | .. autofunction:: pyrelimri.similarity.pairwise_similarity
 74 | 
 75 | .. figure:: img_png/similarity_fig.png
 76 |    :align: center
 77 |    :alt: Similarity Between Images
 78 |    :figclass: align-center
 79 | 
 80 |    Figure 3. Similarity Between Images
 81 | 
 82 | 
 83 | tetrachoric_correlation
 84 | -----------------------
 85 | 
 86 | From `pyrelimri`, the `tetrachoric_correlation` module contains functions to calculate tetrachoric correlation between binary images.
 87 | 
 88 | tetrachoric_corr
 89 | ~~~~~~~~~~~~~~~~
 90 | 
 91 | .. autofunction:: pyrelimri.tetrachoric_correlation.tetrachoric_corr
 92 | 
 93 | conn_icc
 94 | --------
 95 | 
 96 | The `conn_icc` module is a wrapper for the `icc` module, specifically focusing on edge-wise intraclass correlation coefficient calculations.
 97 | 
 98 | edgewise_icc
 99 | ~~~~~~~~~~~~
100 | 
101 | .. autofunction:: pyrelimri.conn_icc.edgewise_icc
102 | 
103 | masked_timeseries
104 | =================
105 | 
106 | The `masked_timeseries` module extracts timeseries data from BOLD images for regions of interest (ROI).
107 | 
108 | .. figure:: img_png/maskedtimeseries_example.png
109 |    :align: center
110 |    :alt: Masked Timeseries Example
111 |    :figclass: align-center
112 | 
113 |    Figure 4. Masked Timeseries Illustration
114 | 
115 | extract_time_series
116 | ~~~~~~~~~~~~~~~~~~~
117 | 
118 | .. autofunction:: pyrelimri.masked_timeseries.extract_time_series
119 | 
120 | extract_postcue_trs_for_conditions
121 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
122 | 
123 | .. autofunction:: pyrelimri.masked_timeseries.extract_postcue_trs_for_conditions
124 | 
125 | plot_responses
126 | ~~~~~~~~~~~~~~
127 | 
128 | .. autofunction:: pyrelimri.masked_timeseries.plot_responses
129 | 


--------------------------------------------------------------------------------
/pyrelimri/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/pyrelimri/__init__.py


--------------------------------------------------------------------------------
/pyrelimri/brain_icc.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import nibabel as nib
  3 | from pandas import DataFrame
  4 | from sklearn.preprocessing import minmax_scale
  5 | from pyrelimri.icc import sumsq_icc
  6 | from nilearn import image
  7 | from nilearn.maskers import (NiftiMasker, NiftiMapsMasker, NiftiLabelsMasker)
  8 | from nilearn.datasets import (
  9 |     fetch_atlas_aal,
 10 |     fetch_atlas_destrieux_2009,
 11 |     fetch_atlas_difumo,
 12 |     fetch_atlas_harvard_oxford,
 13 |     fetch_atlas_juelich,
 14 |     fetch_atlas_msdl,
 15 |     fetch_atlas_pauli_2017,
 16 |     fetch_atlas_schaefer_2018,
 17 |     fetch_atlas_talairach
 18 | )
 19 | SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL = True
 20 | 
 21 | 
 22 | def voxelwise_icc(multisession_list: list, mask: str, icc_type: str = 'icc_3') -> dict:
 23 |     """
 24 |     Calculate the Intraclass Correlation Coefficient (ICC) along with lower and upper bound confidence intervals
 25 |     by voxel for specified input files using manual sum of squares calculations.
 26 | 
 27 |     Args:
 28 |         multisession_list (list of list of str):
 29 |             List of lists containing paths to subject 3D volumes for each session.
 30 | 
 31 |             Example:
 32 |                 dat_ses1 = ["./ses1/sub-00_Contrast-A_bold.nii.gz", "./ses1/sub-01_Contrast-A_bold.nii.gz", "./ses1/sub-03_Contrast-A_bold.nii.gz"]
 33 |                 dat_ses2 = ["./ses2/sub-00_Contrast-A_bold.nii.gz", "./ses2/sub-01_Contrast-A_bold.nii.gz", "./ses2/sub-03_Contrast-A_bold.nii.gz"]
 34 |                 dat_ses3 = ["./ses3/sub-00_Contrast-A_bold.nii.gz", "./ses3/sub-01_Contrast-A_bold.nii.gz", "./ses3/sub-03_Contrast-A_bold.nii.gz"]
 35 |                 The order of the subjects in each list has to be the same.
 36 | 
 37 |         mask (str):
 38 |             Path to 3D mask in NIfTI format.
 39 | 
 40 |         icc_type (str, optional):
 41 |             Type of ICC to compute, default is 'icc_3'.
 42 |             Options: 'icc_1', 'icc_2', 'icc_3'.
 43 | 
 44 |     Returns:
 45 |         dict:
 46 |             Dictionary containing the following 3D images:
 47 |                 'est' (nibabel.Nifti1Image): Estimated ICC values.
 48 |                 'lowbound' (nibabel.Nifti1Image): Lower bound of ICC confidence intervals.
 49 |                 'upbound' (nibabel.Nifti1Image): Upper bound of ICC confidence intervals.
 50 |                 'btwnsub' (nibabel.Nifti1Image): Between-subject variance.
 51 |                 'wthnsub' (nibabel.Nifti1Image): Within-subject variance.
 52 |                 'btwnmeas' (nibabel.Nifti1Image): Between-measurement variance.
 53 |     """
 54 |     session_lengths = [len(session) for session in multisession_list]
 55 |     session_all_same = all(length == session_lengths[0] for length in session_lengths)
 56 | 
 57 |     assert session_all_same, f"Not all lists in session_files have the same length. " \
 58 |                              f"Mismatched lengths: {', '.join(str(length) for length in session_lengths)}"
 59 | 
 60 |     # concatenate the paths to 3D images into a 4D nifti image (4th dimension are subjs) using image concat
 61 |     # iterates over list of lists
 62 |     try:
 63 |         session_data = [image.concat_imgs(i) for i in multisession_list]
 64 |     except ValueError as e:
 65 |         print(e)
 66 |         print("Error when attempting to concatenate images. Confirm affine/size of images.")
 67 | 
 68 |     # mask images
 69 |     masker = NiftiMasker(mask_img=mask)
 70 |     imgdata = [masker.fit_transform(i) for i in session_data]
 71 | 
 72 |     # get subj details per session to use in pandas df
 73 |     subj_n = imgdata[0].shape[:-1]
 74 |     subj_list = np.arange(subj_n[0])
 75 | 
 76 |     # calculate number of session, creating session labels and number of voxels
 77 |     num_sessions = len(imgdata)
 78 |     sess_labels = [f"sess{i + 1}" for i in range(num_sessions)]
 79 |     voxel_n = imgdata[0].shape[-1]
 80 | 
 81 |     # empty list for icc, low/upper bound 95% ICC, mean square between & within subject
 82 |     est, lowbound, upbound, \
 83 |         btwn_sub_var, within_sub_var, btwn_meas_var = np.empty((6, voxel_n))
 84 | 
 85 |     for voxel in range(voxel_n):
 86 |         np_voxdata = np.column_stack((np.tile(subj_list, num_sessions),
 87 |                                       np.hstack(
 88 |                                           [[sess_labels[j]] * len(imgdata[j][:, voxel]) for j in range(num_sessions)]),
 89 |                                       np.hstack([imgdata[j][:, voxel] for j in range(num_sessions)])
 90 |                                       ))
 91 | 
 92 |         vox_pd = DataFrame(data=np_voxdata, columns=["subj", "sess", "vals"])
 93 |         vox_pd = vox_pd.astype({"subj": int, "sess": "category", "vals": float})
 94 | 
 95 |         est[voxel], lowbound[voxel], upbound[voxel], \
 96 |             btwn_sub_var[voxel], within_sub_var[voxel], \
 97 |             btwn_meas_var[voxel] = sumsq_icc(df_long=vox_pd, sub_var="subj", sess_var="sess",
 98 |                                              value_var="vals", icc_type=icc_type)
 99 | 
100 |     # using unmask to reshape the 1D voxels back to 3D specified mask and saving to dictionary
101 |     result_dict = {
102 |         'est': masker.inverse_transform(np.array(est)),
103 |         'lowbound': masker.inverse_transform(np.array(lowbound)),
104 |         'upbound': masker.inverse_transform(np.array(upbound)),
105 |         'btwnsub': masker.inverse_transform(np.array(btwn_sub_var)),
106 |         'wthnsub': masker.inverse_transform(np.array(within_sub_var)),
107 |         'btwnmeas': masker.inverse_transform(np.array(btwn_meas_var))
108 |     }
109 | 
110 |     return result_dict
111 | 
112 | 
113 | def setup_atlas(name_atlas: str, **kwargs) -> nib.Nifti1Image:
114 |     """
115 |     Setup & fetch a brain atlas based on the provided atlas name & optional parameters via kwargs associated
116 |     with documentation from Nilearn.
117 | 
118 |     Args:
119 |         name_atlas (str):
120 |             Name of the atlas to fetch. Available options are:
121 |             'aal', 'destrieux_2009', 'difumo', 'harvard_oxford', 'juelich',
122 |             'msdl', 'pauli_2017', 'schaefer_2018', 'talairach'.
123 | 
124 |         **kwargs:
125 |             Additional parameters to customize the fetching process. Examples:
126 |                 - 'data_dir' (str): Directory where the fetched atlas data will be stored. Default is '/tmp/'.
127 |                 - 'verbose' (int): Verbosity level of the process. Default is 0.
128 | 
129 |     Returns:
130 |         nib.Nifti1Image:
131 |             Fetched brain atlas in NIfTI format.
132 |     """
133 |     default_params = {
134 |         'data_dir': '/tmp/',
135 |         'verbose': 0
136 |     }
137 | 
138 |     # Dictionary mapping atlas names to their corresponding fetch functions
139 |     grab_atlas = {
140 |         'aal': fetch_atlas_aal,
141 |         'destrieux_2009': fetch_atlas_destrieux_2009,
142 |         'difumo': fetch_atlas_difumo,
143 |         'harvard_oxford': fetch_atlas_harvard_oxford,
144 |         'juelich': fetch_atlas_juelich,
145 |         'msdl': fetch_atlas_msdl,
146 |         'pauli_2017': fetch_atlas_pauli_2017,
147 |         'shaefer_2018': fetch_atlas_schaefer_2018,
148 |         'talairach': fetch_atlas_talairach
149 |     }
150 |     try:
151 |         atlas_grabbed = grab_atlas.get(name_atlas)
152 |     except TypeError as e:
153 |         print("Addition parameters required for atlas: {name_atlas}. Review: Nilearn Atlases for Details")
154 |         print(e)
155 | 
156 |     if atlas_grabbed is None:
157 |         raise ValueError(f"INCORRECT atlas name. PROVIDED: {name_atlas}\n"
158 |                          f"OPTIONS: {', '.join(grab_atlas.keys())}")
159 |     else:
160 |         default_params.update(kwargs)
161 |         return atlas_grabbed(**default_params)
162 | 
163 | 
164 | def prob_atlas_scale(nifti_map, estimate_array):
165 |     """
166 |     Rescales a probabilistic 3D Nifti map to match the range of estimated values.
167 | 
168 |     Args:
169 |         nifti_map (Nifti1Image):
170 |             Input 3D Nifti image to be rescaled.
171 | 
172 |         estimate_array (ndarray):
173 |             1D NumPy array containing the estimates used for scaling.
174 | 
175 |     Returns:
176 |         Nifti1Image:
177 |             Rescaled 3D image where non-zero values are scaled to match the range of `estimate_array`.
178 | 
179 |     Notes:
180 |         This function rescales the non-zero values in the input Nifti image `nifti_map` using the minimum and maximum
181 |         values of `estimate_array`. The spatial/header info from `nifti_map` is preserved.
182 |     """
183 |     temp_img_array = nifti_map.get_fdata().flatten()
184 |     non_zero_mask = temp_img_array != 0
185 | 
186 |     # Scale the non-zero values using minmax_scale from sklearn
187 |     scaled_values = minmax_scale(
188 |         temp_img_array[non_zero_mask],
189 |         feature_range=(min(estimate_array), max(estimate_array))
190 |     )
191 |     # New array w/ zeros & replace the non-zero values with the [new] scaled values
192 |     rescaled = np.zeros_like(temp_img_array, dtype=float)
193 |     rescaled[non_zero_mask] = scaled_values
194 |     new_img_shape = np.reshape(rescaled, nifti_map.shape)
195 | 
196 |     return image.new_img_like(nifti_map, new_img_shape)
197 | 
198 | 
199 | def roi_icc(multisession_list: list, type_atlas: str, atlas_dir: str, icc_type='icc_3', **kwargs):
200 |     """
201 |     Calculate the Intraclass Correlation Coefficient (ICC) for each ROI in a specified atlas
202 |     (+lower bound & upper bound CI) for input files using manual sum of squares calculations.
203 |     It also provides associated between subject variance, within subject variance and between
204 |     measure variance estimates.
205 |     The function expects the subject's data paths to be provided as a list of lists for sessions:
206 | 
207 |     Example:
208 |         dat_ses1 = ["./ses1/sub-00_Contrast-A_bold.nii.gz", "./ses1/sub-01_Contrast-A_bold.nii.gz", "./ses1/sub-03_Contrast-A_bold.nii.gz"]
209 |         dat_ses2 = ["./ses2/sub-00_Contrast-A_bold.nii.gz", "./ses2/sub-01_Contrast-A_bold.nii.gz", "./ses2/sub-03_Contrast-A_bold.nii.gz"]
210 |         dat_ses3 = ["./ses3/sub-00_Contrast-A_bold.nii.gz", "./ses3/sub-01_Contrast-A_bold.nii.gz", "./ses3/sub-03_Contrast-A_bold.nii.gz"]
211 |         ** The order of the subjects in each list has to be the same **
212 | 
213 |     Examples:
214 |         # Two-session example:
215 |         multisession_list = [dat_ses1, dat_ses2]
216 |         # Three-session example:
217 |         multisession_list = [dat_ses1, dat_ses2, dat_ses3]
218 | 
219 |     Inter-subject variance corresponds to variance between subjects across all sessions (1, 2, 3).
220 |     Intra-subject variance corresponds to variance within subjects across all sessions (1, 2, 3).
221 | 
222 |     The atlas name should be one of the probabilistic and ROI parcellations listed:
223 |     https://nilearn.github.io/dev/modules/datasets.html#atlases
224 | 
225 |     Args:
226 |         multisession_list (list of list of str): List of lists containing paths to subject 3D volumes for each session.
227 |         type_atlas (str): Name of the atlas type provided within Nilearn atlases.
228 |         atlas_dir (str): Location to download/store downloaded atlas. Recommended: '/tmp/'.
229 |         icc_type (str, optional): Type of ICC to compute, default is 'icc_3'. Options: 'icc_1', 'icc_2', 'icc_3'.
230 |         **kwargs (optional): Additional parameters to customize the atlas fetching process and masker
231 |             settings.
232 |             - data_dir (str): Directory where the fetched atlas data will be stored. Default is '/tmp/'.
233 |             - verbose (int): Verbosity level of the fetching process. Default is 0.
234 | 
235 |     Returns:
236 |         dict: Dictionary containing the following arrays and values:
237 |             - roi_labels (list): Labels of the ROIs in the atlas.
238 |             - est (ndarray): Estimated ICC values for each ROI.
239 |             - lowbound (ndarray): Lower bound of ICC confidence intervals for each ROI.
240 |             - upbound (ndarray): Upper bound of ICC confidence intervals for each ROI.
241 |             - btwnsub (ndarray): Between-subject variance for each ROI.
242 |             - wthnsub (ndarray): Within-subject variance for each ROI.
243 |             - btwnmeas (ndarray): Between-measurement variance for each ROI.
244 |             - est_3d (nibabel.Nifti1Image): Estimated ICC values for each ROI.
245 |             - lowbound_3d (nibabel.Nifti1Image): Lower bound of ICC confidence intervals for each ROI.
246 |             - upbound_3d (nibabel.Nifti1Image): Upper bound of ICC confidence intervals for each ROI.
247 |             - btwnsub_3d (nibabel.Nifti1Image): Between-subject variance for each ROI.
248 |             - wthnsub_3d (nibabel.Nifti1Image): Within-subject variance for each ROI.
249 |             - btwnmeas_3d (nibabel.Nifti1Image): Between-measurement variance for each ROI.
250 | 
251 |     Example:
252 |         # Calculate ICC for ROIs using multisession data and AAL atlas
253 |         result = roi_icc(multisession_list=multisession_list, type_atlas='aal', atlas_dir='/tmp/', icc_type='icc_2')
254 |     """
255 |     # combine brain data
256 |     session_lengths = [len(session) for session in multisession_list]
257 |     session_all_same = all(length == session_lengths[0] for length in session_lengths)
258 | 
259 |     assert session_all_same, f"Not all lists in session_files have the same length. " \
260 |                              f"Mismatched lengths: {', '.join(str(length) for length in session_lengths)}"
261 | 
262 |     # concatenate the paths to 3D images into a 4D nifti image (4th dimension are subjs) using image concat
263 |     try:
264 |         session_data = [image.concat_imgs(i) for i in multisession_list]
265 |     except ValueError as e:
266 |         print(e)
267 |         print("Error when attempting to concatenate images. Confirm affine/size of images.")
268 | 
269 |     # grab atlas data
270 |     try:
271 |         atlas = setup_atlas(name_atlas=type_atlas, data_dir=atlas_dir, **kwargs)
272 |     except TypeError as e:
273 |         raise TypeError(f"Addition parameters required for atlas: {type_atlas}."
274 |                         f"Review: Nilearn Atlases for Details. \nError: {e}")
275 | 
276 |     # Atlases are either deterministic (3D) or probabilistic (4D). Try except to circumvent error
277 |     # Get dimensions and then mask
278 |     try:
279 |         atlas_dim = len(atlas.maps.shape)
280 |     except AttributeError:
281 |         atlas_dim = len(nib.load(atlas.maps).shape)
282 | 
283 |     if atlas_dim == 3:
284 |         masker = NiftiLabelsMasker(
285 |             labels_img=atlas.maps,
286 |             standardize=False,
287 |             resampling_target='labels',
288 |             verbose=0
289 |         ).fit()
290 |     elif atlas_dim == 4:
291 |         masker = NiftiMapsMasker(
292 |             maps_img=atlas.maps,
293 |             allow_overlap=True,
294 |             standardize=False,
295 |             resampling_target='data',
296 |             verbose=0
297 |         ).fit()
298 |     else:
299 |         raise ValueError("Atlas maps isn't 3D or 4D, so incompatible with Nifti[Labels/Maps]Masker() .")
300 | 
301 |     imgdata = [masker.transform(i) for i in session_data]
302 | 
303 |     # get subj details per session to use in pandas df
304 |     subj_n = imgdata[0].shape[:-1]
305 |     subj_list = np.arange(subj_n[0])
306 | 
307 |     # calculate number of session, creating session labels and number of voxels
308 |     num_sessions = len(imgdata)
309 |     sess_labels = [f"sess{i + 1}" for i in range(num_sessions)]
310 |     roi_n = imgdata[0].shape[-1]
311 | 
312 |     # empty list for icc, low/upper bound 95% ICC, between sub, within sub and between measure var
313 |     est, lowbound, upbound, \
314 |         btwn_sub_var, within_sub_var, btwn_meas_var = np.empty((6, roi_n))
315 | 
316 |     for roi in range(roi_n):
317 |         np_roidata = np.column_stack((np.tile(subj_list, num_sessions),
318 |                                       np.hstack(
319 |                                           [[sess_labels[j]] * len(imgdata[j][:, roi]) for j in range(num_sessions)]),
320 |                                       np.hstack([imgdata[j][:, roi] for j in range(num_sessions)])
321 |                                       ))
322 | 
323 |         roi_pd = DataFrame(data=np_roidata, columns=["subj", "sess", "vals"])
324 |         roi_pd = roi_pd.astype({"subj": int, "sess": "category", "vals": float})
325 | 
326 |         est[roi], lowbound[roi], upbound[roi], \
327 |             btwn_sub_var[roi], within_sub_var[roi], \
328 |             btwn_meas_var[roi] = sumsq_icc(df_long=roi_pd, sub_var="subj", sess_var="sess",
329 |                                            value_var="vals", icc_type=icc_type)
330 | 
331 |     # using unmask to reshape the 1D ROI data back to 3D specified mask and saving to dictionary
332 |     result_dict = {
333 |         'roi_labels': atlas.labels[1:],
334 |         'est': np.array(est),
335 |         'lowbound': np.array(lowbound),
336 |         'upbound': np.array(upbound),
337 |         'btwnsub': np.array(btwn_sub_var),
338 |         'wthnsub': np.array(within_sub_var),
339 |         'btwnmeas': np.array(btwn_meas_var)
340 |     }
341 | 
342 |     est_string = {"est_3d": est,
343 |                   "lowbound_3d": lowbound, "upbound_3d": upbound,
344 |                   "btwnsub_3d": btwn_sub_var, "wthnsub_3d": within_sub_var,
345 |                   "btwnmeas_3d": btwn_meas_var
346 |                   }
347 | 
348 |     if atlas_dim == 4:
349 |         for name, var in est_string.items():
350 |             est_img = masker.inverse_transform(np.array(var))
351 |             resample_img = prob_atlas_scale(est_img, np.array(var))
352 |             result_dict[name] = resample_img
353 |     else:
354 |         update_values = {
355 |             'est_3d': masker.inverse_transform(np.array(est)),
356 |             'lowbound_3d': masker.inverse_transform(np.array(lowbound)),
357 |             'upbound_3d': masker.inverse_transform(np.array(upbound)),
358 |             'btwnsub_3d': masker.inverse_transform(np.array(btwn_sub_var)),
359 |             'wthnsub_3d': masker.inverse_transform(np.array(within_sub_var)),
360 |             'btwnmeas_3d': masker.inverse_transform(np.array(within_sub_var))
361 |         }
362 |         result_dict.update(update_values)
363 | 
364 |     return result_dict
365 | 


--------------------------------------------------------------------------------
/pyrelimri/conn_icc.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | from pandas import read_csv, DataFrame
  4 | from pyrelimri.icc import sumsq_icc
  5 | 
  6 | 
  7 | def triang_to_fullmat(corr_1darray, size: int):
  8 |     """
  9 |     Convert a 1D array representing the lower triangular part of a correlation matrix (including diagonal)
 10 |     into a full NxN correlation matrix.
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     corr_1darray : numpy.ndarray
 15 |         A 1D array containing the elements of the lower triangular part of the correlation matrix,
 16 |         including the diagonal elements.
 17 | 
 18 |     size : int
 19 |         The number of variables (N), corresponding to the dimension of the resulting 2D square matrix.
 20 | 
 21 |     Returns
 22 |     -------
 23 |     numpy.ndarray
 24 |         A 2D array representing the full NxN correlation matrix reconstructed from corr_1darray.
 25 |     """
 26 | 
 27 |     # Check if the length of the input array matches the expected length
 28 |     expected_2d = size * (size + 1) // 2
 29 |     length_1d = len(corr_1darray)
 30 | 
 31 |     if length_1d != expected_2d:
 32 |         raise ValueError(f"Expected length: {expected_2d}, but got {length_1d}")
 33 | 
 34 |     # Initialize a row-by-col matrix with zeros
 35 |     full_matrix = np.zeros((size, size))
 36 | 
 37 |     # Fill in the lower triangular part including the diagonal
 38 |     index = 0
 39 |     for i in range(size):
 40 |         for j in range(i + 1):
 41 |             full_matrix[i, j] = corr_1darray[index]
 42 |             index += 1
 43 | 
 44 |     return full_matrix
 45 | 
 46 | 
 47 | def edgewise_icc(multisession_list: list, n_cols: int, col_names: list = None,
 48 |                  separator=None, icc_type='icc_3'):
 49 |     """
 50 |     Calculates the Intraclass Correlation Coefficient (ICC), its confidence intervals (lower and upper bounds)
 51 |     and between subject, within subject and between measure variance components for each edge within specified input
 52 |     files or NDarrays using manual sum of squares calculations.
 53 |     The path to the subject's data (or ndarrays) should be provided as a list of lists for each session.
 54 | 
 55 |     Example of input lists for three sessions:
 56 |     dat_ses1 = ["./ses1/sub-01_ses-01_task-pilot_conn.csv", "./ses1/sub-02_ses-01_task-pilot_conn.csv",
 57 |     "./ses1/sub-03_ses-01_task-pilot_conn.csv"]
 58 |     dat_ses2 = ["./ses2/sub-01_ses-02_task-pilot_conn.csv", "./ses2/sub-02_ses-02_task-pilot_conn.csv",
 59 |     "./ses2/sub-03_ses-02_task-pilot_conn.csv"]
 60 |     dat_ses3 = ["./ses3/sub-01_ses-03_task-pilot_conn.csv", "./ses3/sub-02_ses-03_task-pilot_conn.csv",
 61 |     "./ses3/sub-03_ses-03_task-pilot_conn.csv"]
 62 | 
 63 |     The order of the subjects in each list must be the same.
 64 | 
 65 |     Two session example:
 66 |     multisession_list = [dat_ses1, dat_ses2]
 67 |     Three session example:
 68 |     multisession_list = [dat_ses1, dat_ses2, dat_ses3]
 69 | 
 70 |     Inter-subject variance: between subjects in sessions 1, 2, and 3
 71 |     Intra-subject variance: within subject across sessions 1, 2, and 3
 72 | 
 73 |     Parameters
 74 |     ----------
 75 |     multisession_list : list of lists
 76 |         Contains paths to .npy files or NDarrays for subjects' connectivity MxN square matrices for each session.
 77 | 
 78 |     n_cols : int
 79 |         Expected number of columns/rows in the NxN matrix.
 80 | 
 81 |     col_names : list of str, optional
 82 |         List of column names corresponding to the MxN matrix. Defaults to None.
 83 | 
 84 |     separator : str, optional
 85 |         If `multisession_list` contains file paths and not .npy extension,
 86 |         provide separator to load dataframes, e.g., ',' or '\t'. Defaults to None.
 87 | 
 88 |     icc_type : str, optional
 89 |         Specify ICC type. Default is 'icc_3'. Options: 'icc_1', 'icc_2', 'icc_3'.
 90 | 
 91 |     Returns
 92 |     -------
 93 |     dict
 94 |         A dictionary with the following keys:
 95 |             - 'roi_labels': List of column names representing the connectivity edges.
 96 |             - 'est': Estimated ICCs as a 2D matrix.
 97 |             - 'lowbound': Lower bounds of ICC confidence intervals as a 2D matrix.
 98 |             - 'upbound': Upper bounds of ICC confidence intervals as a 2D matrix.
 99 |             - 'btwn_sub': Between-subject variance as a 2D matrix.
100 |             - 'wthn_sub': Within-subject variance as a 2D matrix.
101 |             - 'btwn_meas': Between-measure variance as a 2D matrix.
102 | 
103 |     Example
104 |     -------
105 |     icc_results = edgewise_icc(multisession_list=[dat_ses1, dat_ses2, dat_ses3],
106 |                                n_cols=10, col_names=['left_pfc', 'right_pfc', ..., 'right_nacc'],
107 |                                separator=',', icc_type='icc_3')
108 |     """
109 | 
110 |     session_lengths = [len(session) for session in multisession_list]
111 |     session_all_same = all(length == session_lengths[0] for length in session_lengths)
112 | 
113 |     if col_names is None:
114 |         col_names = np.arange(1, n_cols + 1, 1)
115 | 
116 |     assert n_cols == len(
117 |         col_names), f"Specified number ({n_cols}) of columns doesn't match " \
118 |                     f"the length of column names ({len(col_names)})"
119 |     assert session_all_same, f"Not all lists in session_files have the same length. " \
120 |                              f"Mismatched lengths: {', '.join(str(length) for length in session_lengths)}"
121 | 
122 |     for i, list_set in enumerate(multisession_list):
123 |         if all(isinstance(item, str) for item in list_set):
124 |             print(f"All values in the list set {i} are strings")
125 |         elif all(isinstance(item, np.ndarray) for item in list_set):
126 |             print(f"All values in the list set {i} are ndarrays")
127 |         else:
128 |             raise TypeError(f"Values in the list {i} are not all NumPy ndarrays or strings. Check file types/names.")
129 | 
130 |     sub_n = np.array(multisession_list).shape[1]
131 |     subj_list = np.arange(sub_n)
132 |     sess_n = np.array(multisession_list).shape[0]
133 |     corr_cols = n_cols
134 |     sess_labels = [f"sess{i + 1}" for i in range(sess_n)]
135 | 
136 |     session_lowertriangle = []
137 |     for session in multisession_list:
138 |         session_vectors = []
139 |         for matrix in session:
140 |             if isinstance(matrix, str):
141 |                 file_extension = os.path.splitext(matrix)[1]
142 |                 # Test 5
143 |                 try:
144 |                     if file_extension == '.npy':
145 |                         matrix = np.load(matrix)
146 |                     elif file_extension == '.csv':
147 |                         matrix = read_csv(matrix, sep=separator, header=None, index_col=False).values
148 |                     elif file_extension == '.txt':
149 |                         matrix = np.loadtxt(matrix, delimiter=separator)
150 |                     else:
151 |                         print(
152 |                             f"Unsupported file extension for file {matrix}. Supported extensions are .npy, .csv, .txt")
153 |                 except Exception as e:
154 |                     print(f"Warning: Error loading file {matrix}: {e}")
155 | 
156 |             lower_triangle_indices_with_diag = np.tril_indices_from(matrix, k=0)
157 |             lower_triangle_vector_with_diag = matrix[lower_triangle_indices_with_diag]
158 |             session_vectors.append(lower_triangle_vector_with_diag)
159 |         session_lowertriangle.append(session_vectors)
160 | 
161 |     session_lowertriangle = [np.array(session) for session in session_lowertriangle]
162 | 
163 |     est, lowbound, upbound, \
164 |         btwn_sub, wthn_sub, btwn_meas = np.empty((6, session_lowertriangle[0].shape[-1]))
165 | 
166 |     for edge in range(session_lowertriangle[0].shape[-1]):
167 |         np_roidata = np.column_stack((
168 |             np.tile(subj_list, sess_n),
169 |             np.hstack([[sess_labels[j]] * len(session_lowertriangle[j][:, edge]) for j in range(sess_n)]),
170 |             np.hstack([session_lowertriangle[j][:, edge] for j in range(sess_n)])
171 |         ))
172 |         roi_pd = DataFrame(data=np_roidata, columns=["subj", "sess", "vals"])
173 |         roi_pd = roi_pd.astype({"subj": int, "sess": "category", "vals": float})
174 | 
175 |         est[edge], lowbound[edge], upbound[edge], \
176 |             btwn_sub[edge], wthn_sub[edge], \
177 |             btwn_meas[edge] = sumsq_icc(df_long=roi_pd, sub_var="subj", sess_var="sess",
178 |                                         value_var="vals", icc_type=icc_type)
179 | 
180 |     result_dict = {
181 |         'roi_labels': col_names,
182 |         'est': triang_to_fullmat(corr_1darray=np.array(est), size=corr_cols),
183 |         'lowbound': triang_to_fullmat(corr_1darray=np.array(lowbound), size=corr_cols),
184 |         'upbound': triang_to_fullmat(corr_1darray=np.array(upbound), size=corr_cols),
185 |         'btwnsub': triang_to_fullmat(corr_1darray=np.array(btwn_sub), size=corr_cols),
186 |         'wthnsub': triang_to_fullmat(corr_1darray=np.array(wthn_sub), size=corr_cols),
187 |         'btwnmeas': triang_to_fullmat(corr_1darray=np.array(btwn_meas), size=corr_cols)
188 |     }
189 | 
190 |     return result_dict
191 | 


--------------------------------------------------------------------------------
/pyrelimri/icc.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from pandas import DataFrame
  3 | from scipy.stats import f
  4 | from numpy.typing import NDArray
  5 | 
  6 | 
  7 | def sumsq_total(df_long: DataFrame, values: str) -> NDArray:
  8 |     """
  9 |     Calculate the total sum of squares for a given column in a DataFrame.
 10 |     The total sum of squares is the sum of the squared differences between each value in the column
 11 |     and the overall mean of that column.
 12 |     Parameters
 13 |     ----------
 14 |     df_long : DataFrame
 15 |         A pandas DataFrame in long format.
 16 |     values : str
 17 |         The name of the column containing the values for which to calculate the total sum of squares.
 18 | 
 19 |     Returns
 20 |     -------
 21 |     NDArray
 22 |         The total sum of squares of the specified values column.
 23 | 
 24 |     """
 25 |     return np.sum((df_long[values] - df_long[values].mean()) ** 2)
 26 | 
 27 | 
 28 | def sumsq_within(df_long: DataFrame, sessions: str, values: str, n_subjects: int) -> NDArray:
 29 |     """
 30 |     Calculate the sum of squared within-subject variance.
 31 |     This function computes the sum of the squared differences between the average session value and the overall average
 32 |     of values, multiplied by the number of subjects.
 33 | 
 34 |     Parameters
 35 |     ----------
 36 |     df_long : DataFrame
 37 |         A pandas DataFrame in long format, e.g., scores across subjects and 1+ sessions.
 38 |     sessions : str
 39 |         The name of the column representing sessions (repeated measurements) in the DataFrame.
 40 |     values : str
 41 |         The name of the column containing the values for subjects across sessions.
 42 |     n_subjects : int
 43 |         The number of subjects.
 44 | 
 45 |     Returns
 46 |     -------
 47 |     NDArray
 48 |         The sum of squared within-subject variance.
 49 | 
 50 |     """
 51 | 
 52 |     return np.sum(
 53 |         ((df_long[values].mean() -
 54 |           df_long[[sessions, values]].groupby(by=sessions, observed=False)[values].mean()) ** 2) * n_subjects
 55 |     )
 56 | 
 57 | 
 58 | def sumsq_btwn(df_long: DataFrame, subj: str, values: str, n_sessions: int) -> NDArray:
 59 |     """
 60 |     Calculate the sum of squared between-subject variance.
 61 |     This function computes the sum of the squared differences between the average subject value and the overall average
 62 |     of values, multiplied by the number of sessions.
 63 | 
 64 |     Parameters
 65 |     ----------
 66 |     df_long : DataFrame
 67 |         A pandas DataFrame in long format, e.g. scores across subjects and 1+ sessions.
 68 |     subj : str
 69 |         The name of the column representing subjects (i.e. targets) in the DataFrame.
 70 |     values : str
 71 |         The name of the column containing the values for subjects (i.e. ratings) across sessions.
 72 |     n_sessions : int
 73 |         The number of sessions (i.e. raters)
 74 | 
 75 |     Returns
 76 |     -------
 77 |     NDArray
 78 |         The sum of squared between-subject variance.
 79 | 
 80 |     """
 81 |     return np.sum(
 82 |         ((df_long[values].mean() - df_long[[subj, values]].groupby(by=subj, observed=False)[values].mean()) ** 2) * n_sessions
 83 |     )
 84 | 
 85 | 
 86 | def check_icc_type(icc_type, allowed_types=None):
 87 |     if allowed_types is None:
 88 |         allowed_types = ['icc_1', 'icc_2', 'icc_3']
 89 |     assert icc_type in allowed_types, \
 90 |         f'ICC type should be in {",".join(allowed_types)}' \
 91 |         f'{icc_type} entered'
 92 | 
 93 | 
 94 | def icc_confint(msbs: float, msws: float, mserr: float, msc: float,
 95 |                 n_subjs: int, n_sess: int, icc_2=None, alpha=0.05, icc_type='icc_3'):
 96 |     """
 97 |     Calculate the confidence interval for ICC(1), ICC(2,1), or ICC(3,1) using the F-distribution method.
 98 |     This function computes the 95% confidence interval for the Intraclass Correlation Coefficient (ICC) based on
 99 |     the specified ICC type (1, 2, or 3). The technique is adopted from the Pinguin library, see:
100 |     https://pingouin-stats.org/build/html/index.html, which is based on the ICC() function from Psych package in R:
101 |     https://www.rdocumentation.org/packages/psych/versions/2.4.3/topics/ICC
102 | 
103 |     Parameters
104 |     ----------
105 |     msbs : float
106 |         The mean square between-subject.
107 |     msws : float
108 |         The mean square within-subject.
109 |     mserr : float
110 |         The mean square error.
111 |     msc : float
112 |         The mean square for the rater/session effect.
113 |     n_subjs : int
114 |         The number of subjects/targets.
115 |     n_sess : int
116 |         The number of sessions/raters.
117 |     icc_2 : float, optional
118 |         ICC(2,1) estimate used in calculating the confidence interval. Default is None.
119 |     alpha : float, optional
120 |         The significance level for the confidence interval. Default is 0.05.
121 |     icc_type : str, optional
122 |         The type of ICC for which the confidence interval is to be calculated. Default is 'icc_3'.
123 |         Must be one of 'icc_1', 'icc_2', or 'icc_3'.
124 | 
125 |     Returns
126 |     -------
127 |     tuple
128 |         The lower and upper bounds of the 95% confidence interval for the specified ICC type.
129 |     """
130 | 
131 |     check_icc_type(icc_type)
132 | 
133 |     # Calculate F, df, and p-values
134 |     f_stat1 = msbs / msws
135 |     df1 = n_subjs - 1
136 |     df1kd = n_subjs * (n_sess - 1)
137 | 
138 |     f_stat3 = msbs / mserr
139 |     df2kd = (n_subjs - 1) * (n_sess - 1)
140 | 
141 |     # Calculate ICC Confident interval
142 |     if icc_type == 'icc_1':
143 |         f_lb = f_stat1 / f.ppf(1 - alpha / 2, df1, df1kd)
144 |         f_ub = f_stat1 * f.ppf(1 - alpha / 2, df1kd, df1)
145 |         lb_ci = (f_lb - 1) / (f_lb + (n_sess - 1))
146 |         ub_ci = (f_ub - 1) / (f_ub + (n_sess - 1))
147 |     elif icc_type == 'icc_2':
148 |         fc = msc / mserr
149 |         vn = df2kd * (n_sess * icc_2 * fc + n_subjs * (1 + (n_sess - 1) * icc_2) - n_sess * icc_2) ** 2
150 |         vd = df1 * n_sess ** 2 * icc_2 ** 2 * fc ** 2 + (n_subjs * (1 + (n_sess - 1) * icc_2) - n_sess * icc_2) ** 2
151 |         v = vn / vd
152 |         f2u = f.ppf(1 - alpha / 2, n_subjs - 1, v)
153 |         f2l = f.ppf(1 - alpha / 2, v, n_subjs - 1)
154 |         lb_ci = n_subjs * (msbs - f2u * mserr) / (
155 |                 f2u * (n_sess * msc + (n_sess * n_subjs - n_sess - n_subjs) * mserr) + n_subjs * msbs)
156 |         ub_ci = n_subjs * (f2l * msbs - mserr) / (
157 |                 n_sess * msc + (n_sess * n_subjs - n_sess - n_subjs) * mserr + n_subjs * f2l * msbs)
158 |     elif icc_type == 'icc_3':
159 |         f_lb = f_stat3 / f.ppf(1 - alpha / 2, df1, df2kd)
160 |         f_ub = f_stat3 * f.ppf(1 - alpha / 2, df2kd, df1)
161 |         lb_ci = (f_lb - 1) / (f_lb + (n_sess - 1))
162 |         ub_ci = (f_ub - 1) / (f_ub + (n_sess - 1))
163 | 
164 |     return lb_ci, ub_ci
165 | 
166 | 
167 | def sumsq_icc(df_long: DataFrame, sub_var: str,
168 |               sess_var: str, value_var: str, icc_type: str = 'icc_3'):
169 |     """
170 |     Calculate the Intraclass Correlation Coefficient (ICC) using the sum of squares method.
171 |     This function calculates the ICC based on a long format DataFrame where subjects (targets) are repeated for multiple sessions (raters).
172 |     It decomposes the total variance into total, between-subject and within-subject variance components and computes the ICC
173 |     for the specified type (ICC(1), ICC(2,1), or ICC(3,1)).
174 | 
175 |     Parameters
176 |     ----------
177 |     df_long : DataFrame
178 |         A pandas DataFrame containing the data of subjects and sessions in long format (i.e., subjects repeating for 1+ sessions).
179 |     sub_var : str
180 |         The column name in the DataFrame representing the subject identifier.
181 |     sess_var : str
182 |         The column name in the DataFrame representing the session (repeated measurement) variable.
183 |     value_var : str
184 |         The column name in the DataFrame containing the values for each session (rater)
185 |     icc_type : str, optional
186 |         The type of ICC to calculate. Default is 'icc_3'. Must be one of 'icc_1', 'icc_2', or 'icc_3'.
187 | 
188 |     Returns
189 |     -------
190 |     estimate : float
191 |         The ICC estimate for the specified type.
192 |     lowerbound : float
193 |         The lower bound of the 95% confidence interval for the ICC estimate.
194 |     upperbound : float
195 |         The upper bound of the 95% confidence interval for the ICC estimate.
196 |     btwn_sub : float
197 |         The between-subject variance component.
198 |     within_sub : float
199 |         The within-subject variance component.
200 |     btwn_measure : float, optional
201 |         The between-measure variance component for ICC(2,1), otherwise None.
202 |     """
203 |     assert sub_var in df_long.columns, \
204 |         f'sub_var {sub_var} must be a column in the data frame'
205 |     assert sess_var in df_long.columns, \
206 |         f'sess_var {sess_var} must be a column in the data frame'
207 |     assert value_var in df_long.columns, \
208 |         f'value_var {value_var} must be a column in the data frame'
209 | 
210 |     check_icc_type(icc_type)
211 | 
212 |     # check replace missing
213 |     nan_in_vals = df_long.isna().any().any()
214 |     if nan_in_vals:
215 |         # Using mean based replacement; calc mean of values column
216 |         # Note: pinguin in python & ICC in R converts data to wide --> listwise deletion --> convert to long
217 |         mean_vals = df_long[value_var].mean()
218 |         # Replace NaN or missing values with the column mean
219 |         df_long[value_var].fillna(mean_vals, inplace=True)
220 | 
221 |     # num_subjs = number of subjs, num_sess = number of sessions/ratings
222 |     num_subjs = df_long[sub_var].nunique()
223 |     num_sess = df_long[sess_var].nunique()
224 |     DF_r = (num_subjs - 1) * (num_sess - 1)
225 | 
226 |     # Sum of square errors
227 |     SS_Total = sumsq_total(df_long=df_long, values=value_var)
228 |     SS_Btw = sumsq_btwn(df_long=df_long, subj=sub_var, values=value_var, n_sessions=num_sess)
229 |     SS_C = sumsq_within(df_long=df_long, sessions=sess_var, values=value_var, n_subjects=num_subjs)
230 |     SS_Err = SS_Total - SS_Btw - SS_C
231 |     SS_Wth = SS_C + SS_Err
232 | 
233 |     # Mean Sum of Squares
234 |     MSBtw = SS_Btw / (num_subjs - 1)
235 |     MSWtn = SS_Wth / (DF_r + (num_sess - 1))
236 |     MSc = SS_C / (num_sess - 1)
237 |     MSErr = SS_Err / DF_r
238 | 
239 |     # Calculate ICCs
240 |     lowerbound, upperbound = None, None  # set to None in case they are skipped
241 |     btwn_measure = None  # ICC(2,1) for absolute agreement includes a bias term for measures
242 | 
243 |     if icc_type == 'icc_1':
244 |         # ICC(1), Model 1
245 |         try:
246 |             estimate = (MSBtw - MSWtn) / (MSBtw + (num_sess - 1) * MSWtn)
247 |             btwn_sub = (MSBtw - MSWtn) / num_sess
248 |             within_sub = MSWtn
249 |         except RuntimeWarning:
250 |             estimate = 0
251 | 
252 |         if MSWtn > 0 and MSErr > 0:
253 |             lowerbound, upperbound = icc_confint(msbs=MSBtw, msws=MSWtn, mserr=MSErr, msc=MSc,
254 |                                                  n_subjs=num_subjs, n_sess=num_sess, alpha=0.05, icc_type='icc_1')
255 |     elif icc_type == 'icc_2':
256 |         # ICC(2,1)
257 |         try:
258 |             estimate = (MSBtw - MSErr) / (MSBtw + (num_sess - 1) * MSErr + num_sess * (MSc - MSErr) / num_subjs)
259 |             btwn_sub = (MSBtw - MSErr) / num_sess
260 |             within_sub = MSErr
261 |             btwn_measure = (MSc - MSErr) / num_subjs
262 |         except RuntimeWarning:
263 |             estimate = 0
264 | 
265 |         if MSWtn > 0 and MSErr > 0:
266 |             lowerbound, upperbound = icc_confint(msbs=MSBtw, msws=MSWtn, mserr=MSErr, msc=MSc,
267 |                                                  n_subjs=num_subjs, n_sess=num_sess, icc_2=estimate, alpha=0.05,
268 |                                                  icc_type='icc_2')
269 |     elif icc_type == 'icc_3':
270 |         # ICC(3,1)
271 |         try:
272 |             estimate = (MSBtw - MSErr) / (MSBtw + (num_sess - 1) * MSErr)
273 |             btwn_sub = (MSBtw - MSErr) / num_sess
274 |             within_sub = MSErr
275 |         except RuntimeWarning:
276 |             estimate = 0
277 | 
278 |         if MSWtn > 0 and MSErr > 0:
279 |             lowerbound, upperbound = icc_confint(msbs=MSBtw, msws=MSWtn, mserr=MSErr, msc=MSc,
280 |                                                  n_subjs=num_subjs, n_sess=num_sess, alpha=0.05, icc_type='icc_3')
281 | 
282 |     return estimate, lowerbound, upperbound, btwn_sub, within_sub, btwn_measure
283 | 


--------------------------------------------------------------------------------
/pyrelimri/masked_timeseries.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pandas as pd
  3 | import numpy as np
  4 | import seaborn as sns
  5 | import matplotlib.pyplot as plt
  6 | from nibabel import Nifti1Image
  7 | from nilearn.maskers import nifti_spheres_masker
  8 | from nilearn.signal import clean
  9 | from nilearn.masking import apply_mask, _unmask_3d, compute_brain_mask
 10 | from nilearn.image import load_img, new_img_like
 11 | from joblib import Parallel, delayed
 12 | 
 13 | 
 14 | def round_cust(x):
 15 |     return np.floor(x + 0.49)
 16 | 
 17 | 
 18 | def trlocked_events(events_path: str, onsets_column: str, trial_name: str,
 19 |                     bold_tr: float, bold_vols: int, separator: str = '\t'):
 20 |     """
 21 |     Loads behavior data, creates and merges into a TR (rounded -- bankers methods) dataframe to match length of BOLD.
 22 |     Trial onsets are matched to nearby TR using rounding when acquisition is not locked to TR.
 23 | 
 24 |     Parameters
 25 |     ----------
 26 |     events_path : str
 27 |         Path to the events data files for given subject/run.
 28 | 
 29 |     onsets_column : str
 30 |         Name of the column containing onset times for the event/condition.
 31 | 
 32 |     trial_name : str
 33 |         Name of the column containing condition/trial labels.
 34 | 
 35 |     bold_tr : float
 36 |         TR acquisition time (in seconds) of BOLD.
 37 | 
 38 |     bold_vols : int
 39 |         Number of time points for BOLD acquisition.
 40 | 
 41 |     separator : str, optional
 42 |         Separator used in the events data file. Default is '\t'.
 43 | 
 44 |     Returns
 45 |     -------
 46 |     pandas.DataFrame
 47 |         Merged dataframe with time index and events data for each event + TR delays.
 48 | 
 49 |     Example
 50 |     -------
 51 |     tr_locked_events = trlocked_events(events_path='./sub-01_ses-01-task-fake_events.tsv', onsets_column='OnsetTime',
 52 |     trial_name='TrialType', bold_tr=2.0, bold_vols=150)
 53 |     """
 54 |     if not os.path.exists(events_path):
 55 |         raise FileNotFoundError(f"File '{events_path}' not found.")
 56 | 
 57 |     beh_df = pd.read_csv(events_path, sep=separator)
 58 | 
 59 |     missing_cols = [col for col in [onsets_column, trial_name] if col not in beh_df.columns]
 60 |     if missing_cols:
 61 |         raise KeyError(f"Missing columns: {', '.join(missing_cols)}")
 62 | 
 63 |     beh_df = beh_df[[onsets_column, trial_name]]
 64 |     try:
 65 |         beh_df["TimePoint"] = round_cust(
 66 |             beh_df[onsets_column] / bold_tr).astype(int)  # Per Elizabeth, avoids bakers roundings in .round()
 67 |     except Exception as e:
 68 |         print("An error occurred:", e, "Following file included NaN, dropped.", events_path)
 69 |         beh_df.dropna(inplace=True)  # cannot perform operations on missing information
 70 |         beh_df["TimePoint"] = round_cust(beh_df[onsets_column] / bold_tr).astype(int)
 71 | 
 72 |     time_index = pd.RangeIndex(start=0, stop=bold_vols, step=1)
 73 |     time_index_df = pd.DataFrame(index=time_index)
 74 |     # Merge behavior data with time index
 75 |     merged_df = pd.merge(time_index_df, beh_df, how='left', left_index=True, right_on='TimePoint')
 76 | 
 77 |     if len(merged_df) != bold_vols:
 78 |         raise ValueError(f"Merged data length ({len(merged_df)}) doesn't match volumes ({bold_vols}).")
 79 | 
 80 |     return merged_df
 81 | 
 82 | 
 83 | def extract_time_series_values(behave_df: pd.DataFrame, time_series_array: np.ndarray, delay: int):
 84 |     """
 85 |     Extracts time series data from the provided time series BOLD data for associated behavioral data
 86 |     acquired from `trlocked_events` with a specified delay.
 87 | 
 88 |     Parameters
 89 |     ----------
 90 |     behave_df : pandas.DataFrame
 91 |         DataFrame containing behavioral data with a 'TimePoint' column indicating the starting point
 92 |         for each time series extraction.
 93 | 
 94 |     time_series_array : np.ndarray
 95 |         Numpy array containing time series data.
 96 | 
 97 |     delay : int
 98 |         Number of data points to include in each extracted time series.
 99 | 
100 |     Returns
101 |     -------
102 |     np.ndarray
103 |         Array containing the extracted time series data for each time point in the behavioral DataFrame.
104 |         Each row corresponds to a time point, and each column contains the extracted time series data.
105 | 
106 | 
107 |     Example
108 |     -------
109 |     trlocked_cuetimeseries = extract_time_series_values(behave_df, time_series_array, delay=15)
110 |     """
111 |     extracted_series_list = []
112 |     for row in behave_df['TimePoint']:
113 |         start = int(row)
114 |         end = start + delay
115 |         extracted_series = time_series_array[start:end]
116 |         if len(extracted_series) < delay:  # Check if extracted series is shorter than delay
117 |             extracted_series = np.pad(extracted_series, ((0, delay - len(extracted_series)), (0, 0)), mode='constant')
118 |         extracted_series_list.append(extracted_series)
119 |     return np.array(extracted_series_list, dtype=object)
120 | 
121 | 
122 | def process_bold_roi_mask(bold_path: str, roi_mask: str, high_pass_sec: int = None, detrend: bool = False,
123 |                           fwhm_smooth: float = None):
124 |     """
125 |         Processes BOLD data masked by a region of interest (ROI) mask file.
126 |         Loads the BOLD and ROI mask images, applies the mask to the BOLD data, performs preprocessing (optional)
127 |         steps including smoothing, cleaning (detrending and standardization), and averaging across time series.
128 |         Standardizes BOLD signal using Nilearn's percent signal change ('psc')
129 | 
130 |         Parameters
131 |         ----------
132 |         bold_path : str
133 |             Path to the BOLD image file.
134 | 
135 |         roi_mask : str
136 |             Path to the ROI mask image file.
137 | 
138 |         high_pass_sec : float
139 |             High pass filter cutoff in seconds. If None, no high pass filtering is applied.
140 | 
141 |         detrend : bool
142 |             If True, detrend the data during cleaning.
143 | 
144 |         fwhm_smooth : float
145 |             Full-width at half-maximum (FWHM) value for Gaussian smoothing of the BOLD data.
146 | 
147 |         Returns
148 |         -------
149 |         np.ndarray
150 |             2D array containing the averaged time series data after cleaning and preprocessing.
151 | 
152 |         str
153 |             Subject information extracted from the BOLD file name, formatted as 'sub-{sub_id}_run-{run_id}'.
154 | 
155 |         Example
156 |         -------
157 |         # Process BOLD data masked by ROI mask
158 |         time_series_avg, sub_info = process_bold_roi_mask(bold_path='./sub-01_ses-01_task-fake_bold.nii.gz',
159 |                                                          roi_mask='./siq-region_mask.nii.gz',
160 |                                                          high_pass_sec=100.0,
161 |                                                          detrend=True,
162 |                                                          fwhm_smooth=5.0)
163 |     """
164 | 
165 |     img = [load_img(i) for i in [bold_path, roi_mask]]
166 |     bold_name = os.path.basename(bold_path)
167 |     path_parts = bold_name.split('_')
168 |     sub_id, run_id = None, None
169 |     for val in path_parts:
170 |         if 'sub-' in val:
171 |             sub_id = val.split('-')[1]
172 |         elif 'run-' in val:
173 |             run_id = val.split('-')[1]
174 |     sub_info = 'sub-' + sub_id + '_' + 'run-' + run_id
175 | 
176 |     assert img[0].shape[0:3] == img[1].shape, 'images of different shape, BOLD {} and ROI {}'.format(
177 |         img[0].shape, img[1].shape)
178 | 
179 |     masked_data = apply_mask(bold_path, roi_mask, smoothing_fwhm=fwhm_smooth)
180 |     clean_timeseries = clean(masked_data, standardize='psc', detrend=detrend,
181 |                              high_pass=1 / high_pass_sec if high_pass_sec is not None else None)
182 |     time_series_avg = np.mean(clean_timeseries, axis=1)[:, None]
183 | 
184 |     return time_series_avg, sub_info
185 | 
186 | 
187 | def process_bold_roi_coords(bold_path: str, roi_mask: Nifti1Image,
188 |                             high_pass_sec: float, detrend: bool, fwhm_smooth: float):
189 |     """
190 |     Processes BOLD data masked by a spherical region of interest (ROI) defined by coordinates.
191 |     Loads the BOLD and ROI mask images, applies the spherical ROI mask to the BOLD data, performs preprocessing steps
192 |     including smoothing, cleaning (detrending and standardization), and averaging across time series.
193 |     Standardizes BOLD signal using Nilearn's percent signal change ('psc')
194 | 
195 |     Parameters
196 |     ----------
197 |     bold_path : str
198 |         Path to the BOLD image file.
199 | 
200 |     roi_mask : nibabel.Nifti1Image
201 |         ROI created to mask data
202 | 
203 |     high_pass_sec : float
204 |         High pass filter cutoff in seconds. If None, no high pass filtering is applied.
205 | 
206 |     detrend : bool
207 |         If True, detrend the data during cleaning.
208 | 
209 |     fwhm_smooth : float
210 |         Full-width at half-maximum (FWHM) value for Gaussian smoothing of the BOLD data.
211 | 
212 |     Returns
213 |     -------
214 |     np.ndarray
215 |         2D array containing the averaged time series data after cleaning and preprocessing.
216 | 
217 |     str
218 |         Subject information extracted from the BOLD file name, formatted as 'sub-{sub_id}_run-{run_id}'.
219 | 
220 |     Example
221 |     -------
222 |     roi_timeseries_avg, sub_info = process_bold_roi_coords(bold_path='/path/to/bold.nii.gz',
223 |                                                        roi_coords=(30, -15, 0),
224 |                                                        radius_mm=5.0,
225 |                                                        high_pass_sec=100.0,
226 |                                                        detrend=True,
227 |                                                        fwhm_smooth=5.0,
228 |                                                        wb_mask='/path/to/whole_brain_mask.nii.gz')
229 |     """
230 |     coord_mask = roi_mask
231 | 
232 |     img = [load_img(i) for i in [bold_path, coord_mask]]
233 |     bold_name = os.path.basename(bold_path)
234 |     path_parts = bold_name.split('_')
235 |     sub_id, run_id = None, None
236 |     for val in path_parts:
237 |         if 'sub-' in val:
238 |             sub_id = val.split('-')[1]
239 |         elif 'run-' in val:
240 |             run_id = val.split('-')[1]
241 |     sub_info = 'sub-' + sub_id + '_' + 'run-' + run_id
242 | 
243 |     assert img[0].shape[0:3] == img[1].shape, 'images of different shape, BOLD {} and ROI {}'.format(
244 |         img[0].shape[0:3], img[1].shape)
245 | 
246 |     masked_data = apply_mask(bold_path, coord_mask, smoothing_fwhm=fwhm_smooth)
247 |     clean_timeseries = clean(masked_data, standardize='psc', detrend=detrend,
248 |                              high_pass=1 / high_pass_sec if high_pass_sec is not None else None)
249 |     time_series_avg = np.mean(clean_timeseries, axis=1)[:, None]
250 | 
251 |     return time_series_avg, coord_mask, sub_info
252 | 
253 | def extract_time_series(bold_paths: list, roi_type: str, high_pass_sec: int = None, roi_mask: str = None,
254 |                         roi_coords: tuple = None, radius_mm: int = None,
255 |                         detrend: bool = False, fwhm_smooth: float = None, n_jobs=1):
256 |     """
257 |     Extracts time series data from BOLD images for specified regions of interest (ROI) or coordinates.
258 |     For each BOLD path, extracts time series either using a mask or ROI coordinates, leveraging
259 |     Nilearn's NiftiLabelsMasker (for mask) or nifti_spheres_masker (for coordinates).
260 |     BOLD signal using Nilearn's percent signal change ('psc')
261 | 
262 | 
263 |     Parameters
264 |     ----------
265 |         bold_paths : list
266 |             List of paths to BOLD image files for subjects/runs/tasks. The order should match the order of events or
267 |             conditions for each subject.
268 | 
269 |         roi_type : str
270 |             Type of ROI ('mask' or 'coords').
271 | 
272 |         high_pass_sec : int, optional
273 |             High-pass filter cutoff in seconds. If provided, converted to frequency (1/high_pass_sec). Default is None.
274 | 
275 |         roi_mask : str or None, optional
276 |             Path to the ROI mask image. Required if roi_type is 'mask'. Default is None.
277 | 
278 |         roi_coords : tuple or None, optional
279 |             Coordinates (x, y, z) for the center of the sphere ROI. *Required if* roi_type is 'coords'. Default is None.
280 | 
281 |         radius_mm : int or None, optional
282 |             Radius of the sphere in millimeters. Required if roi_type is 'coords'. Default is None.
283 | 
284 |         detrend : bool, optional
285 |             Whether to detrend the BOLD signal using Nilearn's detrend function. Default is False.
286 | 
287 |         fwhm_smooth : float or None, optional
288 |             Full-width at half-maximum (FWHM) value for Gaussian smoothing of the BOLD data. Default is None.
289 | 
290 |         n_jobs : int, optional
291 |             Number of CPUs to use for parallel processing. Default is 1.
292 | 
293 |     Returns
294 |     -------
295 |         list or tuple
296 |             - If roi_type is 'mask':
297 |                 - List of numpy arrays containing the time series (% mean signal change) data for each subject/run.
298 |                 - List of subject information strings formatted as 'sub-{sub_id}_run-{run_id}'.
299 |             - If roi_type is 'coords':
300 |                 - List of numpy arrays containing the averaged time series (% mean signal change) data for each subject/run.
301 |                 - Nifti1Image object representing the coordinate mask used.
302 |                 - List of subject information strings formatted as 'sub-{sub_id}_run-{run_id}'.
303 | 
304 | 
305 |     Example
306 |     -------
307 |         # Extract percent mean signal change time series for BOLD data using a mask ROI
308 |         roi_type = 'mask'
309 |         bold_paths = ['./sub-01_ses-01_task-lit_bold.nii.gz', './sub-02_ses-01_task-lit_bold.nii.gz']
310 |         roi_mask = './siq-roi_mask.nii.gz'
311 |         time_series_list, sub_info_list = extract_time_series(bold_paths, roi_type, roi_mask=roi_mask, high_pass_sec=100, detrend=True, fwhm_smooth=5.0)
312 | 
313 |         # Extract percent mean signal change time series for BOLD data using coordinates ROI
314 |         roi_type = 'coords'
315 |         bold_paths = ['./sub-01_ses-01_task-lit_bold.nii.gz', './sub-02_ses_1_task-lit_bold.nii.gz']
316 |         roi_coords = (30, -15, 0)
317 |         time_series_list, coord_mask, sub_info_list = extract_time_series(bold_paths, roi_type, roi_coords=roi_coords, radius_mm=5, high_pass_sec=100, detrend=True, fwhm_smooth=5.0)
318 |     """
319 |     roi_options = ['mask', 'coords']
320 | 
321 |     if roi_type not in roi_options:
322 |         raise ValueError("Invalid ROI type. Choose 'mask' or 'coords'.")
323 | 
324 |     if roi_type == 'mask':
325 |         results = Parallel(n_jobs=n_jobs)(delayed(process_bold_roi_mask)(
326 |             bold_path, roi_mask, high_pass_sec, detrend, fwhm_smooth) for bold_path in bold_paths)
327 |         roi_series_list, id_list = zip(*results)
328 |         return list(roi_series_list), list(id_list)
329 | 
330 |     elif roi_type == 'coords':
331 |         # get a wb_mask
332 |         wb_mask = compute_brain_mask(bold_paths[0])
333 | 
334 |         # create ROI
335 |         _, roi = nifti_spheres_masker._apply_mask_and_get_affinity(
336 |             seeds=[roi_coords], niimg=None, radius=radius_mm,
337 |             allow_overlap=False, mask_img=wb_mask)
338 |         coord_mask = _unmask_3d(X=roi.toarray().flatten(), mask=wb_mask.get_fdata().astype(bool))
339 |         coord_mask = new_img_like(wb_mask, coord_mask, wb_mask.affine)
340 | 
341 |         results = Parallel(n_jobs=n_jobs)(delayed(process_bold_roi_coords)(
342 |             bold_path, coord_mask, high_pass_sec, detrend, fwhm_smooth, wb_mask) for bold_path in bold_paths)
343 |         coord_series_list, id_list = zip(*results)
344 | 
345 |         return list(coord_series_list), coord_mask, list(id_list)
346 | 
347 |     else:
348 |         print(f'roi_type: {roi_type}, is not in [{roi_options}]')
349 | 
350 | 
351 | def extract_postcue_trs_for_conditions(events_data: list, onset: str, trial_name: str,
352 |                                        bold_tr: float, bold_vols: int, time_series: np.ndarray,
353 |                                        conditions: list, tr_delay: int, list_trpaths: list):
354 |     """
355 |     Extracts time points coinciding with condition onsets plus specified delay TRs for each subjects' behavioral/timeseries data.
356 |     Saves this information to a pandas DataFrame with associated mean signal values for each subject,
357 |     trial and cue across the range of TRs (1 to TR-delay).
358 | 
359 |     Parameters
360 |     ----------
361 |     events_data : list
362 |         List of paths to behavioral data files. Should match the order of subjects/runs/tasks as the BOLD file list.
363 | 
364 |     onset : str
365 |         Name of the column containing onset values in the behavioral data.
366 | 
367 |     trial_name : str
368 |         Name of the column containing condition values in the behavioral data.
369 | 
370 |     bold_tr : float
371 |         TR (Repetition Time) for acquisition of BOLD data in seconds.
372 | 
373 |     bold_vols : int
374 |         Number of volumes for BOLD acquisition.
375 | 
376 |     time_series : numpy.ndarray
377 |        series_list from extract_time_series()
378 | 
379 |     conditions : list
380 |         List of condition cues to iterate over. Must have at least one cue.
381 | 
382 |     tr_delay : int
383 |         Number of TRs after onset of stimulus to extract and plot.
384 | 
385 |     list_trpaths : list
386 |        id_list from extract_time_series()
387 | 
388 |     Returns
389 |     -------
390 |     pd.DataFrame
391 |         DataFrame containing percent mean signal change values, subject labels, trial labels, TR values,
392 |         and cue labels for all specified conditions.
393 | 
394 |     Example
395 |     -------
396 |     # Extract time points and mean signal values for conditions 'A' and 'B'
397 |     events_dfs = ['./sub-01_ses-01_task-siq-events.csv', './sub-02_ses-01_task-siq-events.csv']
398 |     onset = 'OnsetTime'
399 |     trial_name = 'TrialType'
400 |     timeseries_2subs = series list from extract_time_series()
401 |     conditions = ['Up', 'Down']
402 |     tr_delay = 5
403 |     timeseries_order = id_list from extract_time_series()
404 |     result_df = extract_postcue_trs_for_conditions(events_data=events_dfs, onset='OnsetTime', trial_name='TrialType',
405 |     bold_tr=2.0, bold_vols=150, time_series=timeseries_2subs, conditons=['Up','Down'], tr_delay=12,
406 |     list_trpaths=timeseries_order)
407 |     """
408 |     dfs = []
409 | 
410 |     # check array names first
411 |     beh_id_list = []
412 |     for beh_path in events_data:
413 |         # create sub ID array to text again bold array
414 |         beh_name = os.path.basename(beh_path)
415 |         path_parts = beh_name.split('_')
416 |         sub_id, run_id = None, None
417 |         for val in path_parts:
418 |             if 'sub-' in val:
419 |                 sub_id = val.split('-')[1]
420 |             elif 'run-' in val:
421 |                 run_id = val.split('-')[1]
422 |         sub_info = 'sub-' + sub_id + '_' + 'run-' + run_id
423 |         beh_id_list.append(sub_info)
424 | 
425 |     assert len(beh_id_list) == len(list_trpaths), f"Length of behavioral files {len(beh_id_list)} " \
426 |                                                   f"does not match TR list {len(list_trpaths)}"
427 |     assert (np.array(beh_id_list) == np.array(list_trpaths)).all(), "Provided list_trpaths does not match" \
428 |                                                                     f"Beh path order {beh_id_list}"
429 | 
430 |     for cue in conditions:
431 |         cue_dfs = []  # creating separate cue dfs to accomodate different number of trials for cue types
432 |         sub_n = 0
433 |         for index, beh_path in enumerate(events_data):
434 |             subset_df = trlocked_events(events_path=beh_path, onsets_column=onset,
435 |                                         trial_name=trial_name, bold_tr=bold_tr, bold_vols=bold_vols, separator='\t')
436 |             trial_type = subset_df[subset_df[trial_name] == cue]
437 |             out_trs_array = extract_time_series_values(behave_df=trial_type, time_series_array=time_series[index],
438 |                                                        delay=tr_delay)
439 |             sub_n = sub_n + 1  # subject is equated to every event file N, subj n = 1 to len(events_data)
440 | 
441 |             # nth trial, list of TRs
442 |             for n_trial, trs in enumerate(out_trs_array):
443 |                 num_delay = len(trs)  # Number of TRs for the current trial
444 |                 if num_delay != tr_delay:
445 |                     raise ValueError(f"Mismatch between tr_delay ({tr_delay}) and number of delay TRs ({num_delay})")
446 | 
447 |                 reshaped_array = np.array(trs).reshape(-1, 1)
448 |                 df = pd.DataFrame(reshaped_array, columns=['Mean_Signal'])
449 |                 df['Subject'] = sub_n
450 |                 df['Trial'] = n_trial + 1
451 |                 tr_values = np.arange(1, tr_delay + 1)
452 |                 df['TR'] = tr_values
453 |                 cue_values = [cue] * num_delay
454 |                 df['Cue'] = cue_values
455 |                 cue_dfs.append(df)
456 | 
457 |         dfs.append(pd.concat(cue_dfs, ignore_index=True))
458 | 
459 |     return pd.concat(dfs, ignore_index=True)
460 | 
461 | 
462 | def plot_responses(df, tr: int, delay: int, style: str = 'white', save_path: str = None,
463 |                    show_plot: bool = False, ylim: tuple = (-1, 1)):
464 |     """
465 |     Plots the BOLD response (Mean_Signal ~ TR) across the specified delay for cues.
466 |     The plot uses an alpha of 0.1 with n = 1000 bootstraps for standard errors.
467 | 
468 |     Parameters
469 |     ----------
470 |     df : pandas.DataFrame
471 |         DataFrame containing the data to plot from extract_postcue_trs_for_conditions().
472 |         Should include columns 'TR', 'Mean_Signal', and 'Cue'.
473 | 
474 |     tr : int
475 |         TR value in seconds.
476 | 
477 |     delay : int
478 |         Delay value indicating the number of TRs to plot.
479 | 
480 |     style : str, optional
481 |         Style of the plot. Options are 'white' or 'whitegrid'. Default is 'white'.
482 | 
483 |     save_path : str, optional
484 |         Path and filename to save the plot. If None, the plot is not saved. Default is None.
485 | 
486 |     show_plot : bool, optional
487 |         Whether to display the plot. Default is False.
488 | 
489 |     ylim : tuple, optional
490 |         Y-axis limits for the plot. Default is (-1, 1).
491 | 
492 | 
493 |     Returns
494 |     -------
495 |     If show_plot = True, open backend graphics to view figure
496 |     """
497 |     plt.figure(figsize=(10, 8), dpi=300)
498 |     if style not in ['white', 'whitegrid']:
499 |         raise ValueError("Style should be white or whitegrid, provided:", style)
500 | 
501 |     sns.set(style=style, font='DejaVu Serif')
502 | 
503 |     sns.lineplot(x="TR", y="Mean_Signal", hue="Cue", style="Cue", palette="Set1",
504 |                  errorbar='se', err_style="band", err_kws={'alpha': 0.1}, n_boot=1000,
505 |                  legend="brief", data=df)
506 | 
507 |     # Set labels and title
508 |     plt.xlabel(f'Seconds (TR: {tr} sec)')
509 |     plt.ylabel('Avg. Signal Change')
510 |     plt.ylim(ylim[0], ylim[1])
511 |     plt.xlim(0, delay)
512 |     plt.xticks(np.arange(1, delay + 1, 1),
513 |                [f'{round(i * tr, 1)}' for i in range(1, delay + 1)],
514 |                rotation=45)
515 | 
516 |     # Show legend
517 |     plt.legend(loc='upper right')
518 | 
519 |     # Check if save_path is provided
520 |     if save_path:
521 |         # Get the directory path from save_path
522 |         directory = os.path.dirname(save_path)
523 |         # Check if directory exists, if not, create it
524 |         if not os.path.exists(directory):
525 |             os.makedirs(directory)
526 |         # Save plot
527 |         plt.savefig(save_path)
528 | 
529 |     # Show plot if show_plot is True
530 |     if not show_plot:
531 |         plt.close()
532 | 
533 | 


--------------------------------------------------------------------------------
/pyrelimri/mktestdata.py:
--------------------------------------------------------------------------------
1 | import nibabel as nib
2 | import numpy as np
3 | 
4 | imgdata = np.random.randn(64,64,32)
5 | img = nib.Nifti1Image(imgdata, np.eye(4))
6 | nib.save(img, '../testdata/test1.nii.gz')
7 | 


--------------------------------------------------------------------------------
/pyrelimri/similarity.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import warnings
  3 | from pandas import concat, DataFrame
  4 | import numpy as np
  5 | from nilearn import image
  6 | from itertools import combinations
  7 | from nilearn.maskers import NiftiMasker
  8 | from scipy.stats import spearmanr
  9 | from pyrelimri.tetrachoric_correlation import tetrachoric_corr as tet_corr
 10 | 
 11 | 
 12 | def image_similarity(imgfile1: str, imgfile2: str,
 13 |                      mask: str = None, thresh: float = None,
 14 |                      similarity_type: str = 'dice') -> float:
 15 |     """
 16 |     Calculate the similarity between two 3D images using a specified similarity metric.
 17 |     The function computes the ratio of intersecting and union voxels based on the provided threshold and similarity type
 18 |     The result is a similarity coefficient indicating the overlap between the two images.
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     imgfile1 : str
 23 |         Path to the first NIfTI image file.
 24 | 
 25 |     imgfile2 : str
 26 |         Path to the second NIfTI image file.
 27 | 
 28 |     mask : str, optional
 29 |         Path to a binarized mask image for voxel selection. Default is None.
 30 | 
 31 |     thresh : float, optional
 32 |         Threshold value for voxel selection. Positive values retain voxels greater than the threshold,
 33 |         and negative values retain voxels less than the threshold. Default is None.
 34 | 
 35 |     similarity_type : str, optional
 36 |         Similarity calculation method. Options are 'dice', 'jaccard', 'tetrachoric', or 'spearman'. Default is 'dice'.
 37 | 
 38 |     Returns
 39 |     -------
 40 |     float
 41 |         Similarity coefficient based on the selected method.
 42 | 
 43 | 
 44 |     Example
 45 |     -------
 46 |     # Example usage of image_similarity
 47 |     similarity = image_similarity(imgfile1='./img1.nii', imgfile2='./img2.nii',
 48 |     mask='./mask.nii', thresh=0.5, similarity_type='dice')
 49 |     """
 50 |     assert similarity_type.casefold() in ['dice', 'jaccard',
 51 |                                           'tetrachoric', 'spearman'], 'similarity_type must be ' \
 52 |                                                                       '"Dice", "Jaccard", "Tetrachoric" or ' \
 53 |                                                                       '"Spearman". Provided: {}"'.format(similarity_type)
 54 | 
 55 |     # load list of images
 56 |     imagefiles = [imgfile1, imgfile2]
 57 |     img = [image.load_img(i) for i in imagefiles]
 58 | 
 59 |     assert img[0].shape == img[1].shape, 'images of different shape, ' \
 60 |                                          'image 1 {} and image 2 {}'.format(img[0].shape, img[1].shape)
 61 | 
 62 |     # mask image
 63 |     masker = NiftiMasker(mask_img=mask)
 64 |     imgdata = masker.fit_transform(img)
 65 | 
 66 |     # threshold image, compatible for positive & negative values
 67 |     # (i.e., some may want similarity in (de)activation)
 68 |     if thresh is not None and similarity_type.casefold() != 'spearman':
 69 |         if thresh > 0:
 70 |             imgdata = imgdata > thresh
 71 | 
 72 |         elif thresh < 0:
 73 |             imgdata = imgdata < thresh
 74 | 
 75 |     if similarity_type.casefold() in ['dice', 'jaccard']:
 76 |         # Intersection of images
 77 |         intersect = np.logical_and(imgdata[0, :], imgdata[1, :])
 78 |         union = np.logical_or(imgdata[0, :], imgdata[1, :])
 79 |         dice_coeff = (intersect.sum()) / (float(union.sum()) + np.finfo(float).eps)
 80 |         if similarity_type.casefold() == 'dice':
 81 |             coeff = dice_coeff
 82 |         else:
 83 |             coeff = dice_coeff / (2 - dice_coeff)
 84 |     elif similarity_type.casefold() == 'tetrachoric':
 85 |         warnings.filterwarnings('ignore')
 86 |         coeff = tet_corr(vec1=imgdata[0, :], vec2=imgdata[1, :])
 87 | 
 88 |     else:
 89 |         if thresh is not None:
 90 |             raise ValueError(f"Spearman rank should be for unthresholded images."
 91 |                              f"/n Threshold is set to: {thresh}./n Advise: 'None'.")
 92 |         else:
 93 |             coeff = spearmanr(a=imgdata[0, :], b=imgdata[1, :])[0]
 94 | 
 95 |     return coeff
 96 | 
 97 | 
 98 | def pairwise_similarity(nii_filelist: list, mask: str = None,
 99 |                         thresh: float = None, similarity_type: str = 'Dice') -> DataFrame:
100 |     """
101 |     Calculate pairwise similarity between a list of NIfTI images using a specified similarity metric.
102 |     The function generates all possible combinations of the provided NIfTI images and computes the similarity
103 |     coefficient for each pair.
104 | 
105 |     Parameters
106 |     ----------
107 |     nii_filelist : list
108 |         List of paths to NIfTI image files.
109 | 
110 |     mask : str, optional
111 |         Path to the brain mask image for voxel selection. Default is None.
112 | 
113 |     thresh : float, optional
114 |         Threshold value for voxel selection. Positive values retain voxels greater than the threshold,
115 |         and negative values retain voxels less than the threshold. Default is None.
116 | 
117 |     similarity_type : str, optional
118 |         Similarity calculation method. Options are 'dice', 'jaccard', 'tetrachoric', or 'spearman'. Default is 'dice'.
119 | 
120 |     Returns
121 |     -------
122 |     DataFrame
123 |         A pandas DataFrame containing the similarity coefficients and corresponding image labels for each pairwise comparison.
124 | 
125 | 
126 |     Example
127 |     -------
128 |     # Example usage of pairwise_similarity
129 |     similarity_df = pairwise_similarity(['./img1.nii', './img2.nii', './img3.nii'],
130 |     mask='mask.nii', thresh=0.5, similarity_type='dice')
131 |     """
132 |     # test whether function type is of 'Dice' or 'Jaccard', case insensitive
133 |     assert similarity_type.casefold() in ['dice', 'jaccard',
134 |                                           'tetrachoric', 'spearman'], 'similarity_type must be ' \
135 |                                                                      '"Dice", "Jaccard", "Tetrachoric" or ' \
136 |                                                                      '"Spearman". Provided: {}"'.format(similarity_type)
137 | 
138 |     var_pairs = list(combinations(nii_filelist, 2))
139 |     coef_df = DataFrame(columns=['similar_coef', 'image_labels'])
140 | 
141 |     for img_comb in var_pairs:
142 |         # select basename of file name(s)
143 |         path = [os.path.basename(i) for i in img_comb]
144 |         # calculate simiarlity
145 |         val = image_similarity(imgfile1=img_comb[0], imgfile2=img_comb[1], mask=mask,
146 |                                thresh=thresh, similarity_type=similarity_type)
147 | 
148 |         # for each pairwise come, save value + label to pandas df
149 |         similarity_data = DataFrame(np.column_stack((val, " ~ ".join([path[0], path[1]]))),
150 |                                     columns=['similar_coef', 'image_labels'])
151 |         coef_df = concat([coef_df, similarity_data], axis=0, ignore_index=True)
152 | 
153 |     return coef_df
154 | 


--------------------------------------------------------------------------------
/pyrelimri/tetrachoric_correlation.py:
--------------------------------------------------------------------------------
 1 | from numpy import cos, pi, sqrt, logical_and, ndarray, nan
 2 | 
 3 | 
 4 | def tetrachoric_corr(vec1: ndarray, vec2: ndarray) -> float:
 5 |     """
 6 |     Calculates the tetrachoric correlation between two binary vectors, vec1 and vec2.
 7 | 
 8 |     :param vec1: A 1D binary numpy array of length n representing the 1st dichotomous (1/0) variable.
 9 |     :param vec2: A 1D binary numpy array of length n representing the 2nd dichotomous (1/0) variable..
10 | 
11 |     Returns: The tetrachoric correlation between the two binary variables.
12 |    """
13 |     assert len(vec1) > 0, f"Image 1: ({vec1}) is empty, length should be > 0"
14 |     assert len(vec2) > 0, f"Image 2: ({vec1}) is empty, length should be > 0"
15 |     assert len(vec1) == len(vec2), (
16 |         'Input vectors must have the same length. ',
17 |         f'vec1 length: {len(vec1)} and vec2 length: {len(vec2)}'
18 |     )
19 | 
20 |     # check for exact replicas
21 |     if (vec1 == vec2).all():
22 |         
23 |         return 1.0
24 |     
25 |     # frequencies of the four possible combinations of vec1 and vec2
26 |     A = sum(logical_and(vec1 == 0, vec2 == 0))
27 |     B = sum(logical_and(vec1 == 0, vec2 == 1))
28 |     C = sum(logical_and(vec1 == 1, vec2 == 0))
29 |     D = sum(logical_and(vec1 == 1, vec2 == 1))
30 | 
31 |     AD = A*D
32 | 
33 |     if B == 0 or C == 0:
34 |         return nan
35 |     
36 |     return cos(pi/(1+sqrt(AD/B/C)))
37 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | pandas
 3 | pytest
 4 | nilearn
 5 | nibabel
 6 | scipy
 7 | seaborn
 8 | scikit-learn
 9 | hypothesis
10 | matplotlib
11 | joblib
12 | statsmodels
13 | 
14 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | setuptools.setup(
 4 |     name="PyReliMRI",
 5 |     version="2.1.0",
 6 |     description="A package for computing reliability of MRI/fMRI images",
 7 |     author="Michael Demidenko",
 8 |     author_email="demidenko.michael@gmail.com",
 9 |     url="https://github.com/demidenm/PyReliMRI",
10 |     license="MIT",
11 |     packages=setuptools.find_packages(),
12 |     install_requires=[
13 |         "numpy",
14 |         "pandas",
15 |         "pytest",
16 |         "nilearn",
17 |         "nibabel",
18 |         "scipy",
19 |         "seaborn",
20 |         "scikit-learn",
21 |         "hypothesis",
22 |         "matplotlib",
23 |         "joblib",
24 |         "statsmodels",
25 |     ],
26 |     classifiers=[
27 |         "Programming Language :: Python :: 3",
28 |         "License :: OSI Approved :: MIT License",
29 |         "Operating System :: OS Independent",
30 |     ],
31 |     python_requires='>=3.6',
32 | )
33 | 


--------------------------------------------------------------------------------
/test.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/demidenm/PyReliMRI/26fe3277a010b8b543d4669ea1b1d4250f13cf38/test.npy


--------------------------------------------------------------------------------
/tests/test_brainicc.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import os
  3 | import numpy as np
  4 | import nibabel as nib
  5 | from pathlib import Path
  6 | from pyrelimri.similarity import image_similarity
  7 | from pyrelimri.brain_icc import (voxelwise_icc, setup_atlas, roi_icc)
  8 | from nilearn.datasets import fetch_neurovault_ids
  9 | from collections import namedtuple
 10 | from nilearn.masking import compute_multi_brain_mask
 11 | 
 12 | 
 13 | def generate_img_pair(r: float, dir: Path, use_mask: bool = False, tol: float = .001,
 14 |                       imgdims: list = None,
 15 |                       maskdims: list = None) -> namedtuple:
 16 |     """
 17 |     r: correlation bw images
 18 |     dir: Path for saving files
 19 |     use_mask: bool, create mask and mask data
 20 |     tol: tolerance for correlation value - lower than .001 will make it really slow
 21 |     returns:
 22 |         images: path to two image files with specified correlation
 23 |         mask: path to mask image, all ones if use_mask==False
 24 |     """
 25 |     imgpair = namedtuple("ImgPair", "tol r images maskimg")
 26 |     imgpair.images = []
 27 |     imgpair.r = r
 28 |     imgpair.tol = tol
 29 | 
 30 |     rng = np.random.default_rng()
 31 | 
 32 |     if imgdims is None:
 33 |         imgdims = [64, 64, 32]
 34 |     nvox = np.prod(imgdims)
 35 |     if use_mask:
 36 |         if maskdims is None:
 37 |             maskdims = [round(i / 2) for i in imgdims]
 38 |         mask = np.zeros(imgdims).astype('int')
 39 |         mask[:maskdims[0], :maskdims[1], :maskdims[2]] = 1
 40 |     else:
 41 |         mask = np.ones(imgdims)
 42 | 
 43 |     maskvox = mask.reshape(nvox)
 44 | 
 45 |     empirical_r = 10
 46 |     while (np.abs(empirical_r - r) > tol):
 47 |         data = rng.multivariate_normal(mean=[0, 0],
 48 |                                        cov=[[1, r], [r, 1]], size=nvox)
 49 |         empirical_r = np.corrcoef(data[maskvox == 1, :].T)[0, 1]
 50 | 
 51 |     for i in range(2):
 52 |         imgpair.images.append(dir / f'testimg_{i}.nii.gz')
 53 |         tmpimg = nib.Nifti1Image((maskvox * data[:, i]).reshape(imgdims),
 54 |                                  affine=np.eye(4))
 55 |         tmpimg.to_filename(imgpair.images[-1])
 56 |     imgpair.mask = dir / 'mask.nii.gz'
 57 |     maskimg = nib.Nifti1Image(mask, affine=np.eye(4))
 58 |     maskimg.to_filename(imgpair.mask)
 59 |     return imgpair
 60 | 
 61 | 
 62 | @pytest.fixture(scope="session")
 63 | def image_pair(tmp_path_factory):
 64 |     tmpdir = tmp_path_factory.mktemp("data")
 65 |     return generate_img_pair(r=0.5, dir=tmpdir)
 66 | 
 67 | 
 68 | def test_image_pair_smoke(image_pair):
 69 |     assert image_pair.images is not None
 70 | 
 71 | 
 72 | def test_image_pair_images(image_pair):
 73 |     for imgfile in image_pair.images + [image_pair.mask]:
 74 |         img = nib.load(imgfile)
 75 |         assert img is not None
 76 | 
 77 | 
 78 | def create_dummy_nifti(shape, affine, filepath):
 79 |     data = np.random.rand(*shape)
 80 |     img = nib.Nifti1Image(data, affine)
 81 |     nib.save(img, str(filepath))
 82 | 
 83 | 
 84 | def test_session_lengths_mismatch(tmp_path_factory):
 85 |     tmpdir = tmp_path_factory.mktemp("data")
 86 | 
 87 |     # Test case with different session lengths
 88 |     multisession_list = [
 89 |         [tmpdir / "sub-00_ses1_Contrast-A_bold.nii.gz",
 90 |          tmpdir / "sub-01_ses1_Contrast-A_bold.nii.gz"],
 91 |         [tmpdir / "sub-00_ses2_Contrast-A_bold.nii.gz",
 92 |          tmpdir / "sub-01_ses2_Contrast-A_bold.nii.gz",
 93 |          tmpdir / "sub-03_ses2_Contrast-A_bold.nii.gz"]
 94 |     ]
 95 | 
 96 |     icc_type = "icc_3"
 97 | 
 98 |     # Create dummy NIfTI files
 99 |     shape = (97, 115, 97)
100 |     affine = np.eye(4)
101 |     for session in multisession_list:
102 |         for filepath in session:
103 |             os.makedirs(os.path.dirname(filepath), exist_ok=True)
104 |             create_dummy_nifti(shape, affine, filepath)
105 | 
106 |     mask = compute_multi_brain_mask(target_imgs=[
107 |         tmpdir / "sub-00_ses2_Contrast-A_bold.nii.gz",
108 |         tmpdir / "sub-01_ses2_Contrast-A_bold.nii.gz",
109 |         tmpdir / "sub-03_ses2_Contrast-A_bold.nii.gz"
110 |     ])
111 | 
112 |     mask_path = tmpdir / 'test_mask.nii.gz'
113 |     nib.save(mask, mask_path)
114 | 
115 |     # The assertion should raise an exception
116 |     with pytest.raises(AssertionError):
117 |         voxelwise_icc(multisession_list, mask, icc_type)
118 | 
119 | 
120 | @pytest.mark.parametrize("measure", ['Dice', 'Jaccard'])
121 | def test_image_similarity(image_pair, measure):
122 |     imgsim = image_similarity(
123 |         image_pair.images[0], image_pair.images[1], image_pair.mask,
124 |         thresh=1, similarity_type=measure
125 |     )
126 |     assert imgsim is not None
127 | 
128 | 
129 | def test_spearman_similarity(image_pair):
130 |     spearman_sim = image_similarity(
131 |         image_pair.images[0], image_pair.images[1], similarity_type='spearman')
132 |     assert abs(spearman_sim - 0.5) < 0.1, "The similarity is not close to 0.5."
133 | 
134 | 
135 | def test_image_similarity_spearman_value_error(image_pair):
136 |     imgfile1 = image_pair.images[0]
137 |     imgfile2 = image_pair.images[1]
138 |     mask = None
139 |     thresh = 0.5
140 |     similarity_type = "spearman"
141 |     with pytest.raises(ValueError):
142 |         image_similarity(imgfile1, imgfile2, mask=mask, thresh=thresh, similarity_type=similarity_type)
143 | 
144 | 
145 | # test roi based ICC
146 | def setup_atlas_valuerrror():
147 |     with pytest.raises(ValueError):
148 |         setup_atlas(name_atlas='fake_atlas')
149 | 
150 | @pytest.mark.parametrize("atlases", ['aal', 'difumo'])
151 | def setup_atlas_noerror(atlases):
152 |     setup_atlas(name_atlas=atlases)
153 | 
154 | def test_roiicc_msc(tmp_path_factory):
155 | 
156 |     # create temp dir
157 |     tmpdir = tmp_path_factory.mktemp("data")
158 | 
159 |     # Test case w/ neurovault data
160 |     MSC01_1 = fetch_neurovault_ids(image_ids=[48068], data_dir=tmpdir)  # MSC01 motor session1 1 L Hand beta
161 |     MSC01_2 = fetch_neurovault_ids(image_ids=[48073], data_dir=tmpdir)  # MSC01 motor session2 1 L Hand beta
162 |     MSC02_1 = fetch_neurovault_ids(image_ids=[48118], data_dir=tmpdir)
163 |     MSC02_2 = fetch_neurovault_ids(image_ids=[48123], data_dir=tmpdir)
164 |     MSC03_1 = fetch_neurovault_ids(image_ids=[48168], data_dir=tmpdir)
165 |     MSC03_2 = fetch_neurovault_ids(image_ids=[48173], data_dir=tmpdir)
166 | 
167 |     ses1 = [MSC01_1['images'], MSC02_1['images'], MSC03_1['images']]
168 |     ses2 = [MSC01_2['images'], MSC02_2['images'], MSC03_2['images']]
169 | 
170 |     # estimate ICC for roi = 200 in shaefer
171 |     result = roi_icc(multisession_list=[ses1, ses2], type_atlas='shaefer_2018',
172 |                      atlas_dir=tmpdir, icc_type='icc_3')
173 | 
174 |     assert np.allclose(result['est'][200], .70, atol=.01)


--------------------------------------------------------------------------------
/tests/test_connicc.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | from pyrelimri.conn_icc import (triang_to_fullmat, edgewise_icc)
 4 | 
 5 | 
 6 | def test_triangtomat_valid():
 7 |     size = 3
 8 |     corr_1darray = np.array([1, 2, 3, 4, 5, 6])
 9 |     expected_output = np.array([[1, 0, 0], [2, 3, 0], [4, 5, 6]])
10 |     output = triang_to_fullmat(corr_1darray, size)
11 |     assert np.array_equal(output, expected_output)
12 | 
13 | 
14 | def test_triangtomat_invalid():
15 |     size = 3
16 |     corr_1darray_invalid = np.array([1, 2, 3, 4])
17 |     with pytest.raises(ValueError):
18 |         triang_to_fullmat(corr_1darray_invalid, size)
19 | 
20 | 
21 | def test_edgewise_icc_n_cols_and_col_names_length_match():
22 |     multisession_list = [
23 |         [np.eye(3), np.eye(3)],
24 |         [np.eye(3), np.eye(3)]
25 |     ]
26 |     n_cols = 3
27 |     col_names = ["A", "B", "C"]
28 |     result = edgewise_icc(multisession_list, n_cols, col_names=col_names)
29 |     assert result['roi_labels'] == col_names
30 | 
31 | 
32 | def test_edgewise_icc_n_cols_and_col_names_length_mismatch():
33 |     multisession_list = [
34 |         [np.eye(3), np.eye(3)],
35 |         [np.eye(3), np.eye(3)]
36 |     ]
37 |     n_cols = 3
38 |     col_names_mismatch = ["A", "B"]
39 |     with pytest.raises(AssertionError):
40 |         edgewise_icc(multisession_list, n_cols, col_names=col_names_mismatch)
41 | 
42 | 
43 | def test_edgewise_difflength():
44 |     multisession_list = [
45 |         [np.eye(5)],
46 |         [np.eye(5), np.eye(5)]
47 |     ]
48 |     n_cols = 3
49 |     with pytest.raises(AssertionError):
50 |         edgewise_icc(multisession_list, n_cols)
51 | 
52 | 
53 | def test_edgewise_wrongfiletype():
54 |     multisession_list = [
55 |         [np.eye(5), "testing.xls"],
56 |         [np.eye(5), np.eye(5)]
57 |     ]
58 |     n_cols = 3
59 |     with pytest.raises(TypeError):
60 |         edgewise_icc(multisession_list, n_cols)
61 | 
62 | 
63 | def test_edgewise_filetest_result():
64 |     mock_matrix = np.eye(3)
65 |     np.save('test.npy', mock_matrix)
66 |     multisession_list_files = [
67 |         ['test.npy', 'test.npy'],
68 |         ['test.npy', 'test.npy']
69 |     ]
70 |     n_cols = 3
71 |     result = edgewise_icc(multisession_list_files, n_cols)
72 |     assert 'est' in result
73 | 
74 | 
75 | def test_edgewise_wrong_ext():
76 |     multisession_list_invalid_ext = [
77 |         ['file.wrg', 'file.wrg'],
78 |         ['file.wrg', 'file.wrg']
79 |     ]
80 |     n_cols = 3
81 |     with pytest.raises(Exception):
82 |         edgewise_icc(multisession_list_invalid_ext, n_cols)
83 | 


--------------------------------------------------------------------------------
/tests/test_maskedtimeseries.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import numpy as np
  3 | import pandas as pd
  4 | import nibabel as nib
  5 | from pyrelimri.masked_timeseries import (trlocked_events, extract_time_series, extract_postcue_trs_for_conditions)
  6 | import warnings
  7 | warnings.filterwarnings("ignore", message="The nilearn.glm module is experimental.", category=FutureWarning)
  8 | from nilearn.glm.first_level import make_first_level_design_matrix
  9 | 
 10 | 
 11 | def create_dummy_nifti(shape, affine, filepath):
 12 |     data = np.random.rand(*shape)
 13 |     img = nib.Nifti1Image(data, affine)
 14 |     nib.save(img, str(filepath))
 15 | 
 16 | 
 17 | def test_miss_sub_boldpath():
 18 |     bold_paths = ['/tmp/NDA_run-01_bold.nii.gz']
 19 |     roi_type = 'mask'
 20 |     roi_mask = '/tmp/roi_mask.nii.gz'
 21 | 
 22 |     with pytest.raises(ValueError):
 23 |         extract_time_series(bold_paths, roi_type, roi_mask=roi_mask)
 24 | 
 25 | 
 26 | def test_mismatched_shapes_boldroi(tmp_path):
 27 |     # testing that error is thrown when mask and BOLD images are not similar shape
 28 |     bold_path = tmp_path / "sub-01_run-01_bold.nii.gz"
 29 |     roi_mask_path = tmp_path / "roi_mask.nii.gz"
 30 | 
 31 |     # Create dummy BOLD and ROI NIfTI files with mismatched shapes
 32 |     create_dummy_nifti((64, 64, 36, 2), np.eye(4), bold_path)
 33 |     create_dummy_nifti((64, 64, 34), np.eye(4), roi_mask_path)
 34 | 
 35 |     bold_paths = [bold_path]
 36 |     roi_type = 'mask'
 37 | 
 38 |     # Ensure that AssertionError is raised when shapes are mismatched
 39 |     with pytest.raises(AssertionError):
 40 |         extract_time_series(bold_paths, roi_type, roi_mask=str(roi_mask_path))
 41 | 
 42 | def test_wrongorder_behbold_ids():
 43 |     # testing that order of sub & run paths for BOLD paths != Behavioral paths
 44 |     bold_path_list = [f"sub-0{i:02d}_run-01.nii.gz" for i in range(20)] + \
 45 |                      [f"sub-0{i:02d}_run-02.nii.gz" for i in range(20)]
 46 |     beh_path_list = [f"sub-0{i:02d}_run-01.nii.gz" for i in range(15)] + \
 47 |                      [f"sub-0{i:02d}_run-02.nii.gz" for i in range(20)]
 48 | 
 49 |     with pytest.raises(AssertionError):
 50 |         extract_postcue_trs_for_conditions(events_data=beh_path_list, onset='Test',
 51 |                                            trial_name='test', bold_tr=.800, bold_vols=200,
 52 |                                            time_series=[0, 1, 2, 3], conditions=['test'], tr_delay=15,
 53 |                                            list_trpaths=bold_path_list)
 54 | 
 55 | def test_wrongroi_type():
 56 |     # Define invalid ROI type
 57 |     wrong_roi_lab = 'Testinit'
 58 | 
 59 |     # Define other function arguments
 60 |     bold_paths = ["sub-01_run-01_bold.nii.gz"]
 61 |     high_pass_sec = 100
 62 |     roi_mask = "roi_mask.nii.gz"
 63 | 
 64 |     # Test if ValueError is raised for invalid ROI type
 65 |     with pytest.raises(ValueError):
 66 |         extract_time_series(bold_paths, wrong_roi_lab, high_pass_sec=high_pass_sec, roi_mask=roi_mask)
 67 | 
 68 | def test_missing_file():
 69 |     # test when events file is not found
 70 |     events_path = "missing_file_testin-it.csv"
 71 |     onsets_column = "onsets"
 72 |     trial_name = "trial"
 73 |     bold_tr = 2.0
 74 |     bold_vols = 10
 75 | 
 76 |     # Test if FileNotFoundError is raised when the file does not exist
 77 |     with pytest.raises(FileNotFoundError):
 78 |         trlocked_events(events_path, onsets_column, trial_name, bold_tr, bold_vols)
 79 | 
 80 | 
 81 | def test_missing_eventscol(tmp_path):
 82 |     # testing missing column "trial" in events file
 83 |     events_path = tmp_path / "testin-it_events.csv"
 84 |     with open(events_path, "w") as f:
 85 |         f.write("onsets\n0.0\n1.0\n2.0\n")
 86 | 
 87 |     # Define function arguments
 88 |     onsets_column = "onsets"
 89 |     trial_name = "trial"
 90 |     bold_tr = 2.0
 91 |     bold_vols = 10
 92 | 
 93 |     # Test if KeyError is raised when columns are missing
 94 |     with pytest.raises(KeyError):
 95 |         trlocked_events(events_path, onsets_column, trial_name, bold_tr, bold_vols)
 96 | 
 97 | 
 98 | def test_lenbold_mismatchtrlen(tmp_path):
 99 |     # The length of the resulting TR locked values (length) should be similar N to BOLD.
100 |     # assume to always be true but confirm
101 |     events_path = tmp_path / "testin-it_events.csv"
102 |     onset_name = 'onsets'
103 |     trial_name = 'trial'
104 |     bold_tr = 2.0
105 |     bold_vols = 5  # Mismatched bold_vols compared to the expected length of merged_df
106 | 
107 |     with open(events_path, "w") as f:
108 |         f.write(f"{onset_name},{trial_name}\n0.0,Test1\n1.0,Test2\n2.0,Test1\n")
109 | 
110 |     with pytest.raises(ValueError):
111 |         trlocked_events(events_path=events_path, onsets_column=onset_name, trial_name=trial_name,
112 |                         bold_tr=bold_tr, bold_vols=bold_vols, separator=',')
113 | 
114 | def test_runtrlocked_events(tmp_path):
115 |     # The length of the resulting TR locked values (length) should be similar N to BOLD.
116 |     # assume to always be true but confirm
117 |     events_path = tmp_path / "testin-it_events.csv"
118 |     onset_name = 'onsets'
119 |     trial_name = 'trial'
120 |     bold_tr = 2.0
121 |     bold_vols = 3
122 |     with open(events_path, "w") as f:
123 |         f.write(f"{onset_name},{trial_name}\n0.0,Test1\n2.0,Test2\n4.0,Test1\n")
124 | 
125 |     trlocked_events(events_path=events_path, onsets_column=onset_name, trial_name=trial_name,
126 |                     bold_tr=bold_tr, bold_vols=bold_vols, separator=',')
127 | 
128 | 
129 | def create_conv_mat(eventsdf, tr_dur=None, acq_dur=None):
130 |     vol_time = acq_dur
131 |     tr = tr_dur
132 |     design_mat = make_first_level_design_matrix(
133 |         frame_times=np.linspace(0, vol_time, int(vol_time/tr)),
134 |         events=eventsdf, hrf_model='spm',
135 |         drift_model=None, high_pass=None)
136 |     return design_mat
137 | 
138 | 
139 | @pytest.mark.parametrize("TR", [.8, 1.4, 2, 2.6])
140 | @pytest.mark.parametrize("interval", [10, 15, 20])
141 | def test_testsimtrpeak(tmp_path, TR, interval):
142 |     onsets = np.arange(0, 160, interval)
143 |     dur_opts = [1.5, 2, 2.5]
144 |     prob_durs = [.50, .25, .25]
145 |     np.random.seed(11)
146 |     durations = np.random.choice(
147 |         dur_opts, size=len(onsets), p=prob_durs
148 |     )
149 | 
150 |     events_df = pd.DataFrame({
151 |         "onset": onsets,
152 |         "duration": durations,
153 |         "trial_type": "Testinit"
154 |     })
155 |     last_onset = events_df['onset'].iloc[-1]
156 |     tr = TR
157 |     conv_vals = create_conv_mat(eventsdf=events_df, tr_dur=tr, acq_dur=last_onset)
158 | 
159 |     # create n = 1 compatible timeseries for test
160 |     convolved_stacked = np.vstack([conv_vals['Testinit']])
161 |     convolved_stacked = convolved_stacked.reshape((conv_vals.shape[0] * (conv_vals.shape[1] - 1), 1))
162 |     timeseries_reshaped = np.reshape(convolved_stacked, (1, len(convolved_stacked), 1))
163 | 
164 |     events_file_name = tmp_path / "sub-01_run-01_test-events.csv"
165 |     events_df.to_csv(events_file_name, sep='\t')
166 | 
167 |     conditions = ['Testinit']
168 |     trdelay = int(15 / tr)
169 |     df = extract_postcue_trs_for_conditions(events_data=[events_file_name], onset='onset', trial_name='trial_type',
170 |                                             bold_tr=TR, bold_vols=len(timeseries_reshaped[0]),
171 |                                             time_series=timeseries_reshaped,
172 |                                             conditions=conditions, tr_delay=trdelay,
173 |                                             list_trpaths=['sub-01_run-01'])
174 |     df['Mean_Signal'] = pd.to_numeric(df['Mean_Signal'], errors='coerce') # to avoid argmax object error
175 |     tr_peak = df.loc[df['Mean_Signal'].idxmax(), 'TR']
176 |     min_tr = np.floor(float(6 / tr))
177 |     max_tr = np.ceil(float(10 / tr))
178 |     peak_in_tr = np.arange(min_tr, max_tr, .1)
179 |     is_in_array = np.any(np.isclose(peak_in_tr, tr_peak))
180 |     print(f"Checking whether {tr_peak} TR HRF peak is between range min {min_tr} and max {max_tr}")
181 |     assert is_in_array, f"Peak error: Peak should occurs between 5-8sec, peak {round(tr_peak * tr, 2)}"


--------------------------------------------------------------------------------
/tests/test_similarity-icc.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import numpy as np
  3 | import pandas as pd
  4 | import seaborn as sns
  5 | import statsmodels.formula.api as lme
  6 | from pyrelimri.tetrachoric_correlation import tetrachoric_corr
  7 | from pyrelimri.icc import sumsq_icc
  8 | from hypothesis import given, strategies as st
  9 | from hypothesis.extra.numpy import arrays
 10 | 
 11 | 
 12 | @pytest.mark.parametrize("rater", ['focused', 'divided'])
 13 | def test_pyreli_v_lmer_icc3(rater):
 14 | 
 15 |     data = sns.load_dataset('anagrams')  # load
 16 |     sub_df = data[data['attnr'] == rater]  # filter
 17 |     long_df = pd.DataFrame(
 18 |         pd.melt(sub_df,
 19 |                 id_vars="subidr",
 20 |                 value_vars=["num1", "num2", "num3"],
 21 |                 var_name="sess",
 22 |                 value_name="vals"))
 23 | 
 24 |     lmmod = lme.mixedlm("vals ~ sess", long_df, groups=long_df["subidr"], re_formula="~1")
 25 |     lmmod = lmmod.fit()
 26 |     lmmod_btwnvar = lmmod.cov_re.iloc[0, 0]
 27 |     lmmod_wthnvar = lmmod.scale
 28 |     lmmod_icc3 = lmmod_btwnvar / (lmmod_btwnvar + lmmod_wthnvar)
 29 |     icc3_test = sumsq_icc(df_long=long_df, sub_var='subidr',
 30 |                           sess_var='sess', value_var='vals', icc_type='icc_3')
 31 |     iccmod_btwnvar = icc3_test[3]
 32 |     iccmod_withinvar = icc3_test[4]
 33 |     iccmod_icc3 = icc3_test[0]
 34 | 
 35 |     lm_out = np.array([lmmod_btwnvar, lmmod_wthnvar, lmmod_icc3])
 36 |     pyreli_out = np.array([iccmod_btwnvar, iccmod_withinvar, iccmod_icc3])
 37 | 
 38 |     assert np.allclose(a=lm_out, b=pyreli_out, atol=.001)
 39 | 
 40 | 
 41 | def test_calculate_icc1():
 42 |     data = sns.load_dataset('anagrams')
 43 |     # subset to only divided attnr measure occ
 44 |     a_wd = data[data['attnr'] == 'divided']
 45 |     a_ld = pd.DataFrame(
 46 |         pd.melt(a_wd,
 47 |                 id_vars="subidr",
 48 |                 value_vars=["num1", "num2", "num3"],
 49 |                 var_name="sess",
 50 |                 value_name="vals"))
 51 | 
 52 |     icc = sumsq_icc(df_long=a_ld, sub_var="subidr",
 53 |                     sess_var="sess", value_var="vals", icc_type='icc_1')
 54 | 
 55 |     assert np.allclose(icc[0], -0.05, atol=.01)
 56 | 
 57 | 
 58 | def test_calculate_icc2():
 59 |     data = sns.load_dataset('anagrams')
 60 |     # subset to only divided attnr measure occ
 61 |     a_wd = data[data['attnr'] == 'divided']
 62 |     a_ld = pd.DataFrame(
 63 |         pd.melt(a_wd,
 64 |                 id_vars="subidr",
 65 |                 value_vars=["num1", "num2", "num3"],
 66 |                 var_name="sess",
 67 |                 value_name="vals"))
 68 | 
 69 |     icc = sumsq_icc(df_long=a_ld, sub_var="subidr",
 70 |                     sess_var="sess", value_var="vals", icc_type='icc_2')
 71 |     assert np.allclose(icc[0], 0.11, atol=.01)
 72 | 
 73 | 
 74 | def test_tetrachoric_corr():
 75 |     assert np.allclose(
 76 |         tetrachoric_corr(np.array([0, 0, 1, 1]),
 77 |                          np.array([0, 1, 0, 1])),
 78 |         0.0)
 79 |     assert np.allclose(
 80 |         tetrachoric_corr(np.array([0, 0, 1, 1]),
 81 |                          np.array([0, 0, 1, 1])),
 82 |         1.0)
 83 |     assert np.allclose(
 84 |         tetrachoric_corr(np.array([0, 0, 1, 1]),
 85 |                          np.array([1, 1, 0, 0])),
 86 |         -1.0)
 87 | 
 88 | 
 89 | def test_tetrachoric_corr_nanhandling():
 90 |     assert np.isnan(
 91 |         tetrachoric_corr(np.array([0, 0, 1, 1]),
 92 |                          np.array([1, 1, 1, 1])))
 93 | 
 94 | 
 95 | # property based testing with a range of arrays
 96 | @given(vec=arrays(np.int8, (2, 24), elements=st.integers(0, 100)))
 97 | def test_tetrachoric_corr_hypothesis(vec):
 98 |     tc = tetrachoric_corr(vec[0, :], vec[1, :])
 99 |     if (vec[0, :] == vec[1, :]).all():
100 |         assert tc == 1.0
101 |     else:
102 |         B = sum(np.logical_and(vec[0, :] == 0, vec[1, :] == 1))
103 |         C = sum(np.logical_and(vec[0, :] == 1, vec[1, :] == 0))
104 |         # should return nan in these cases
105 |         if B == 0 or C == 0:
106 |             assert np.isnan(tc)
107 |         else:
108 |             assert tc <= 1.0 and tc >= -1.0
109 | 


--------------------------------------------------------------------------------
/tools/local_gitignore:
--------------------------------------------------------------------------------
1 | # Add repository-specific ignores here
2 | .idea/
3 | Reliability_calcs.ipynb
4 | tests/reliability_tests.ipynb
5 | pyrelimri/mktestdata.py


--------------------------------------------------------------------------------
/tools/make_gitignore:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Generate .gitignore from GitHub gitignore templates
 4 | 
 5 | BASE_URL="https://raw.githubusercontent.com/github/gitignore/main"
 6 | ROOT=$( dirname $( dirname $( realpath $0 ) ) )
 7 | 
 8 | SOURCES=(
 9 |     Python
10 |     Global/Emacs
11 |     Global/Linux
12 |     Global/Vim
13 |     Global/VisualStudioCode
14 |     Global/Windows
15 |     Global/macOS
16 | )
17 | 
18 | cat >$ROOT/.gitignore <<END
19 | # Auto-generated by $0 on $( date )
20 | END
21 | 
22 | cat $ROOT/tools/local_gitignore >> $ROOT/.gitignore
23 | 
24 | for SRC in ${SOURCES[@]}; do
25 |     echo >> $ROOT/.gitignore
26 |     curl -sSL ${BASE_URL}/${SRC}.gitignore >> $ROOT/.gitignore
27 | done
28 | 


--------------------------------------------------------------------------------