├── .github └── workflows │ └── deploy-book.yml ├── .gitignore ├── LICENSE ├── README.md ├── binder ├── data_requirement.json └── requirements.txt ├── content ├── _config.yml ├── _toc.yml ├── conf.py ├── encoding.ipynb ├── gcn_decoding.ipynb ├── gcn_decoding │ ├── Brain_connectivity_graph.png │ └── GCN_pipeline_main2022.png ├── graphics │ ├── brain_encoding_decoding_example.png │ ├── brain_encoding_pred_timeseries.png │ └── decoding_pipeline_example.png ├── haxby_data.ipynb ├── haxby_data │ ├── brain-encoding-decoding.png │ ├── masker.png │ └── masker.svg ├── intro.md ├── mlp_decoding.md ├── mlp_decoding │ ├── logistic_regression.png │ ├── multilayer-perceptron.png │ └── multilayer-perceptron.svg ├── references.bib ├── svm_decoding.md └── svm_decoding │ ├── optimal-hyperplane.png │ └── regularizations.png ├── data └── .gitkeep ├── environment.yml ├── images └── neurolibre-logo.png ├── notebooks ├── Brain_connectivity_graph.png ├── GCN_pipeline.png ├── GCN_pipeline_main2022.png ├── Haxby_stimuli.png ├── data_processing.ipynb ├── gcn_decoding.ipynb └── multiple_decoders_haxby_tutorial.ipynb ├── requirements.txt └── src ├── __init__.py ├── __pycache__ ├── gcn_model.cpython-36.pyc ├── gcn_windows_dataset.cpython-36.pyc ├── graph_construction.cpython-36.pyc └── visualization.cpython-36.pyc ├── gcn_model.py ├── gcn_windows_dataset.py ├── graph_construction.py └── visualization.py /.github/workflows/deploy-book.yml: -------------------------------------------------------------------------------- 1 | name: deploy-book 2 | 3 | # Only run this when the master branch changes 4 | on: 5 | push: 6 | branches: 7 | - main 8 | 9 | # This job installs dependencies, build the book, and pushes it to `gh-pages` 10 | jobs: 11 | deploy-book: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | 16 | # Install dependencies 17 | - name: Set up Python 3.11.10 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: 3.11.10 21 | 22 | - name: Install pytorch 23 | run: | 24 | pip install torch==1.13.0 25 | 26 | - name: Install dependencies 27 | run: | 28 | pip install -r binder/requirements.txt 29 | 30 | # Build the page 31 | - name: Build the book 32 | run: | 33 | jupyter-book build content/ 34 | 35 | # Push the book's HTML to github-pages 36 | - name: GitHub Pages action 37 | uses: peaceiris/actions-gh-pages@v3.6.1 38 | with: 39 | github_token: ${{ secrets.GITHUB_TOKEN }} 40 | publish_dir: ./content/_build/html 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | nilearn_cache/ 3 | _build/ 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # poetry 102 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 103 | # This is especially recommended for binary packages to ensure reproducibility, and is more 104 | # commonly ignored for libraries. 105 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 106 | #poetry.lock 107 | 108 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 109 | __pypackages__/ 110 | 111 | # Celery stuff 112 | celerybeat-schedule 113 | celerybeat.pid 114 | 115 | # SageMath parsed files 116 | *.sage.py 117 | 118 | # Environments 119 | .env 120 | .venv 121 | env/ 122 | venv/ 123 | ENV/ 124 | env.bak/ 125 | venv.bak/ 126 | 127 | # Spyder project settings 128 | .spyderproject 129 | .spyproject 130 | 131 | # Rope project settings 132 | .ropeproject 133 | 134 | # mkdocs documentation 135 | /site 136 | 137 | # mypy 138 | .mypy_cache/ 139 | .dmypy.json 140 | dmypy.json 141 | 142 | # Pyre type checker 143 | .pyre/ 144 | 145 | # pytype static type analyzer 146 | .pytype/ 147 | 148 | # Cython debug symbols 149 | cython_debug/ 150 | 151 | # PyCharm 152 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 153 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 154 | # and can be added to the global gitignore or merged into this file. For a more nuclear 155 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 156 | #.idea/ 157 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 main-educational 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # brain_encoding_decoding 2 | 3 | [![Jupyter Book Badge](https://jupyterbook.org/badge.svg)](https://main-educational.github.io/brain_encoding_decoding/intro.html) 4 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/main-educational/brain_encoding_decoding/HEAD) 5 | [![Docker Hub](https://img.shields.io/docker/pulls/user/repo)]() 6 | [![GitHub size](https://img.shields.io/github/repo-size/main-educational/brain_encoding_decoding)](https://github.com/main-educational/brain_encoding_decoding/archive/master.zip) 7 | [![GitHub issues](https://img.shields.io/github/issues/main-educational/brain_encoding_decoding?style=plastic)](https://github.com/main-educational/brain_encoding_decoding) 8 | [![GitHub PR](https://img.shields.io/github/issues-pr/main-educational/brain_encoding_decoding)](https://github.com/main-educational/brain_encoding_decoding/pulls) 9 | [![License](https://img.shields.io/github/license/main-educational/brain_encoding_decoding)](https://github.com/main-educational/brain_encoding_decoding) 10 | [![CONP](https://img.shields.io/badge/Supported%20by-%20CONP%2FPCNO-red)](https://conp.ca/) 11 | 12 | 13 | 14 | This is a jupyter book presenting an introduction to brain encoding and decoding using python. It is rendered on [main-educational.github.io/brain_encoding_decoding](https://main-educational.github.io/brain_encoding_decoding/intro.html). See the introduction of the jupyter book for more details, and acknowledgements. 15 | 16 | ### Build the book 17 | 18 | If you want to build the book locally: 19 | 20 | - Clone this repository 21 | - Run `pip install -r binder/requirements.txt` (it is recommended to run this command in a virtual environment) 22 | - For a clean build, remove `content/_build/` 23 | - Run `jb build content/` 24 | 25 | An html version of the jupyter book will be automatically generated in the folder `content/_build/html/`. 26 | 27 | ### Hosting the book 28 | 29 | The html version of the book is hosted on the `gh-pages` branch of this repo. Navigate to your local build and run, 30 | - `ghp-import -n -p -f content/_build/html` 31 | 32 | This will automatically push your build to the `gh-pages` branch. More information on this hosting process can be found [here](https://jupyterbook.org/publish/gh-pages.html#manually-host-your-book-with-github-pages). 33 | -------------------------------------------------------------------------------- /binder/data_requirement.json: -------------------------------------------------------------------------------- 1 | { 2 | "haxby": { "src": "from nilearn import datasets; datasets.fetch_haxby(subjects=[4], fetch_stimuli=True, data_dir=_dst)", 3 | "dst": "./data", 4 | "projectName": "brain_decoding_main"}, 5 | "post-processed_data": { "src": "https://drive.google.com/uc?id=1LQO2hagRyImTqELh-VE44HDska43yo9Y", 6 | "dst": "./data", 7 | "projectName": "brain_decoding_main"} 8 | } 9 | -------------------------------------------------------------------------------- /binder/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==2.1.0 2 | accessible-pygments==0.0.5 3 | alabaster==0.7.16 4 | anyio==4.6.2.post1 5 | appnope==0.1.4 6 | argon2-cffi==23.1.0 7 | argon2-cffi-bindings==21.2.0 8 | arrow==1.3.0 9 | asttokens==2.4.1 10 | astunparse==1.6.3 11 | async-lru==2.0.4 12 | attrs==24.2.0 13 | babel==2.16.0 14 | beautifulsoup4==4.12.3 15 | bleach==6.1.0 16 | certifi==2024.8.30 17 | cffi==1.17.1 18 | charset-normalizer==3.4.0 19 | click==8.1.7 20 | comm==0.2.2 21 | contourpy==1.3.0 22 | cycler==0.12.1 23 | debugpy==1.8.7 24 | decorator==5.1.1 25 | defusedxml==0.7.1 26 | docutils==0.20.1 27 | executing==2.1.0 28 | fastjsonschema==2.20.0 29 | filelock==3.16.1 30 | flatbuffers==24.3.25 31 | fonttools==4.54.1 32 | fqdn==1.5.1 33 | fsspec==2024.9.0 34 | gast==0.6.0 35 | google-pasta==0.2.0 36 | greenlet==3.1.1 37 | grpcio==1.66.2 38 | h11==0.14.0 39 | h5py==3.12.1 40 | httpcore==1.0.6 41 | httpx==0.27.2 42 | idna==3.10 43 | imagesize==1.4.1 44 | importlib_metadata==8.5.0 45 | importlib_resources==6.4.5 46 | ipykernel==6.29.5 47 | ipython==8.28.0 48 | ipywidgets==8.1.5 49 | isoduration==20.11.0 50 | jedi==0.19.1 51 | Jinja2==3.1.4 52 | joblib==1.4.2 53 | json5==0.9.25 54 | jsonpointer==3.0.0 55 | jsonschema==4.23.0 56 | jsonschema-specifications==2024.10.1 57 | jupyter==1.1.1 58 | jupyter-book==1.0.3 59 | jupyter-cache==1.0.0 60 | jupyter-console==6.6.3 61 | jupyter-events==0.10.0 62 | jupyter-lsp==2.2.5 63 | jupyter_client==8.6.3 64 | jupyter_core==5.7.2 65 | jupyter_server==2.14.2 66 | jupyter_server_terminals==0.5.3 67 | jupyterlab==4.2.5 68 | jupyterlab_pygments==0.3.0 69 | jupyterlab_server==2.27.3 70 | jupyterlab_widgets==3.0.13 71 | keras==3.6.0 72 | kiwisolver==1.4.7 73 | latexcodec==3.0.0 74 | libclang==18.1.1 75 | linkify-it-py==2.0.3 76 | lxml==5.3.0 77 | Markdown==3.7 78 | markdown-it-py==3.0.0 79 | MarkupSafe==3.0.1 80 | matplotlib==3.9.2 81 | matplotlib-inline==0.1.7 82 | mdit-py-plugins==0.4.2 83 | mdurl==0.1.2 84 | mistune==3.0.2 85 | ml-dtypes==0.3.2 86 | mpmath==1.3.0 87 | myst-nb==1.1.2 88 | myst-parser==2.0.0 89 | namex==0.0.8 90 | nbclient==0.10.0 91 | nbconvert==7.16.4 92 | nbformat==5.10.4 93 | nest-asyncio==1.6.0 94 | networkx==3.4.1 95 | nibabel==5.3.0 96 | nilearn==0.10.4 97 | notebook==7.2.2 98 | notebook_shim==0.2.4 99 | numpy==1.26.4 100 | opt_einsum==3.4.0 101 | optree==0.13.0 102 | overrides==7.7.0 103 | packaging==24.1 104 | pandas==2.2.3 105 | pandocfilters==1.5.1 106 | parso==0.8.4 107 | pexpect==4.9.0 108 | pillow==10.4.0 109 | platformdirs==4.3.6 110 | prometheus_client==0.21.0 111 | prompt_toolkit==3.0.48 112 | protobuf==4.25.5 113 | psutil==6.0.0 114 | ptyprocess==0.7.0 115 | pure_eval==0.2.3 116 | pybtex==0.24.0 117 | pybtex-docutils==1.0.3 118 | pycparser==2.22 119 | pydata-sphinx-theme==0.15.4 120 | Pygments==2.18.0 121 | pyparsing==3.2.0 122 | python-dateutil==2.9.0.post0 123 | python-json-logger==2.0.7 124 | pytz==2024.2 125 | PyYAML==6.0.2 126 | pyzmq==26.2.0 127 | referencing==0.35.1 128 | requests==2.32.3 129 | rfc3339-validator==0.1.4 130 | rfc3986-validator==0.1.1 131 | rich==13.9.2 132 | rpds-py==0.20.0 133 | scikit-learn==1.5.2 134 | scipy==1.14.1 135 | seaborn==0.13.2 136 | Send2Trash==1.8.3 137 | six==1.16.0 138 | sniffio==1.3.1 139 | snowballstemmer==2.2.0 140 | soupsieve==2.6 141 | Sphinx==7.4.7 142 | sphinx-book-theme==1.1.3 143 | sphinx-comments==0.0.3 144 | sphinx-copybutton==0.5.2 145 | sphinx-jupyterbook-latex==1.0.0 146 | sphinx-multitoc-numbering==0.1.3 147 | sphinx-thebe==0.3.1 148 | sphinx-togglebutton==0.3.2 149 | sphinx_design==0.6.1 150 | sphinx_external_toc==1.0.1 151 | sphinxcontrib-applehelp==2.0.0 152 | sphinxcontrib-bibtex==2.6.3 153 | sphinxcontrib-devhelp==2.0.0 154 | sphinxcontrib-htmlhelp==2.1.0 155 | sphinxcontrib-jsmath==1.0.1 156 | sphinxcontrib-qthelp==2.0.0 157 | sphinxcontrib-serializinghtml==2.0.0 158 | SQLAlchemy==2.0.35 159 | stack-data==0.6.3 160 | sympy==1.13.3 161 | tabulate==0.9.0 162 | tensorboard==2.16.2 163 | tensorboard-data-server==0.7.2 164 | tensorflow==2.16.2 165 | tensorflow-io-gcs-filesystem==0.37.1 166 | termcolor==2.5.0 167 | terminado==0.18.1 168 | threadpoolctl==3.5.0 169 | tinycss2==1.3.0 170 | torch==2.2.2 171 | tornado==6.4.1 172 | traitlets==5.14.3 173 | types-python-dateutil==2.9.0.20241003 174 | typing_extensions==4.12.2 175 | tzdata==2024.2 176 | uc-micro-py==1.0.3 177 | uri-template==1.3.0 178 | urllib3==2.2.3 179 | wcwidth==0.2.13 180 | webcolors==24.8.0 181 | webencodings==0.5.1 182 | websocket-client==1.8.0 183 | Werkzeug==3.0.4 184 | widgetsnbextension==4.0.13 185 | wrapt==1.16.0 186 | zipp==3.20.2 -------------------------------------------------------------------------------- /content/_config.yml: -------------------------------------------------------------------------------- 1 | ####################################################################################### 2 | # A default configuration that will be loaded for all jupyter books 3 | # Users are expected to override these values in their own `_config.yml` file. 4 | # This is also the "master list" of all allowed keys and values. 5 | 6 | ####################################################################################### 7 | # Book settings 8 | title : "Introduction to brain decodin in fMRI" # The title of the book. Will be placed in the left navbar. 9 | author : "The MAIN Educational Team" # The author of the book 10 | copyright : "2024" # Copyright year to be placed in the footer 11 | logo : "../images/neurolibre-logo.png" # A path to the book logo 12 | exclude_patterns : [] # Patterns to skip when building the book. Can be glob-style (e.g. "*skip.ipynb") 13 | 14 | ####################################################################################### 15 | # Execution settings 16 | execute: 17 | execute_notebooks : force # Whether to execute notebooks at build time. Must be one of ("auto", "force", "cache", "off") 18 | cache : "" # A path to the jupyter cache that will be used to store execution artifacs. Defaults to `_build/.jupyter_cache/` 19 | exclude_patterns : [] # A list of patterns to *skip* in execution (e.g. a notebook that takes a really long time) 20 | timeout : -1 # remove restriction on execution time 21 | ####################################################################################### 22 | # HTML-specific settings 23 | html: 24 | favicon : "" # A path to a favicon image 25 | use_edit_page_button : true # Whether to add an "edit this page" button to pages. If `true`, repository information in repository: must be filled in 26 | use_repository_button : true # Whether to add a link to your repository button 27 | use_issues_button : true # Whether to add an "open an issue" button 28 | extra_navbar : Powered by Jupyter Book # Will be displayed underneath the left navbar. 29 | extra_footer : "" # Will be displayed underneath the footer. 30 | google_analytics_id : "" # A GA id that can be used to track book views. 31 | home_page_in_navbar : true # Whether to include your home page in the left Navigation Bar 32 | baseurl : "https://main-educational.github.io/brain_encoding_decoding/" # The base URL where your book will be hosted. Used for creating image previews and social links. e.g.: https://mypage.com/mybook/ 33 | comments: 34 | hypothesis: true 35 | ####################################################################################### 36 | # Launch button settings 37 | launch_buttons: 38 | notebook_interface : classic # The interface interactive links will activate ["classic", "jupyterlab"] 39 | binderhub_url : https://mybinder.org # The URL of the Neurolibre BinderHub 40 | thebe : false # Add a thebe button to pages (requires the repository to run on Binder) 41 | 42 | repository: 43 | url : https://github.com/main-educational/brain_encoding_decoding # The URL to your book's repository 44 | path_to_book : "content" # A path to your book's folder, relative to the repository root. 45 | branch : main # Which branch of the repository should be used when creating links 46 | 47 | ####################################################################################### 48 | # Sphinx settings 49 | sphinx: 50 | extra_extensions: 51 | - sphinx.ext.autodoc 52 | - sphinx.ext.autosummary 53 | - sphinx.ext.napoleon 54 | - sphinx.ext.intersphinx 55 | - sphinx_design 56 | config: 57 | html_js_files: 58 | - https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js 59 | nb_custom_formats: 60 | .Rmd: 61 | - jupytext.reads 62 | - fmt: Rmd 63 | mathjax3_config: 64 | TeX: 65 | Macros: 66 | "N": "\\mathbb{N}" 67 | "floor": ["\\lfloor#1\\rfloor", 1] 68 | "bmat": ["\\left[\\begin{array}"] 69 | "emat": ["\\end{array}\\right]"] 70 | latex_elements: 71 | preamble: | 72 | \newcommand\N{\mathbb{N}} 73 | \newcommand\floor[1]{\lfloor#1\rfloor} 74 | \newcommand{\bmat}{\left[\begin{array}} 75 | \newcommand{\emat}{\end{array}\right]} 76 | intersphinx_mapping: 77 | ebp: 78 | - "https://executablebooks.org/en/latest/" 79 | - null 80 | myst-parser: 81 | - "https://myst-parser.readthedocs.io/en/latest/" 82 | - null 83 | myst-nb: 84 | - "https://myst-nb.readthedocs.io/en/latest/" 85 | - null 86 | sphinx: 87 | - "https://www.sphinx-doc.org/en/master" 88 | - null 89 | nbformat: 90 | - "https://nbformat.readthedocs.io/en/latest" 91 | - null 92 | 93 | ####################################################################################### 94 | # parse settings 95 | parse: 96 | myst_enable_extensions: 97 | # don't forget to list any other extensions you want enabled, 98 | # including those that are enabled by default! 99 | - html_image 100 | - dollarmath 101 | - linkify 102 | - substitution 103 | - colon_fence 104 | - deflist 105 | 106 | # Add a bibtex file so that we can create citations 107 | bibtex_bibfiles: 108 | - references.bib 109 | -------------------------------------------------------------------------------- /content/_toc.yml: -------------------------------------------------------------------------------- 1 | # Table of contents 2 | # Learn more at https://jupyterbook.org/customize/toc.html 3 | 4 | format: jb-book 5 | root: intro 6 | chapters: 7 | - file: haxby_data 8 | - file: svm_decoding 9 | - file: mlp_decoding 10 | - file: gcn_decoding 11 | - file: encoding -------------------------------------------------------------------------------- /content/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | # -- Project information ----------------------------------------------------- 18 | 19 | project = 'brain_encoding_decoding' 20 | copyright = '2024, The MAIN Educational Team ' 21 | author = '' 22 | 23 | master_doc = "index" 24 | 25 | # -- General configuration --------------------------------------------------- 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be 28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 29 | # ones. 30 | extensions = [ 31 | "myst_nb", 32 | "myst_parser", 33 | "sphinx_togglebutton", 34 | "sphinx_copybutton", 35 | "sphinx.ext.intersphinx", 36 | "sphinx.ext.autodoc", 37 | "sphinx.ext.viewcode", 38 | "sphinx_design" 39 | ] 40 | 41 | # Add any paths that contain templates here, relative to this directory. 42 | templates_path = ["_templates"] 43 | 44 | # List of patterns, relative to source directory, that match files and 45 | # directories to ignore when looking for source files. 46 | # This pattern also affects html_static_path and html_extra_path. 47 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints"] 48 | 49 | 50 | # -- Options for HTML output ------------------------------------------------- 51 | 52 | # The theme to use for HTML and HTML Help pages. See the documentation for 53 | # a list of builtin themes. 54 | # 55 | html_title = "" 56 | html_theme = "sphinx_book_theme" 57 | html_logo = "images/neurolibre_logo.png" 58 | html_theme_options = { 59 | "github_url": "https://github.com/main-educational/brain_encoding_decoding", 60 | "repository_url": "https://github.com/main-educational/brain_encoding_decoding", 61 | "repository_branch": "main", 62 | "use_edit_page_button": True, 63 | "path_to_docs": "docs/", 64 | "expand_sections": ["use/index", "examples/index"], 65 | } 66 | 67 | intersphinx_mapping = { 68 | "python": ("https://docs.python.org/3.8", None), 69 | "jb": ("https://jupyterbook.org/", None), 70 | "myst": ("https://myst-parser.readthedocs.io/en/latest/", None), 71 | "markdown_it": ("https://markdown-it-py.readthedocs.io/en/latest", None), 72 | "nbclient": ("https://nbclient.readthedocs.io/en/latest", None), 73 | "nbformat": ("https://nbformat.readthedocs.io/en/latest", None), 74 | "sphinx": ("https://www.sphinx-doc.org/en/3.x", None), 75 | } 76 | 77 | intersphinx_cache_limit = 5 78 | 79 | nitpick_ignore = [ 80 | ("py:class", "docutils.nodes.document"), 81 | ("py:class", "docutils.nodes.Node"), 82 | ("py:class", "docutils.nodes.container"), 83 | ("py:class", "docutils.nodes.system_message"), 84 | ("py:class", "nbformat.notebooknode.NotebookNode"), 85 | ("py:class", "pygments.lexer.RegexLexer"), 86 | ] 87 | 88 | # Add any paths that contain custom static files (such as style sheets) here, 89 | # relative to this directory. They are copied after the builtin static files, 90 | # so a file named "default.css" will overwrite the builtin "default.css". 91 | html_static_path = ["static"] 92 | 93 | copybutton_selector = "div:not(.output) > div.highlight pre" 94 | 95 | nb_custom_formats = {".Rmd": ["jupytext.reads", {"fmt": "Rmd"}]} 96 | jupyter_execute_notebooks = "cache" 97 | execution_show_tb = "READTHEDOCS" in os.environ 98 | execution_timeout = 60 # Note: 30 was timing out on RTD 99 | 100 | myst_admonition_enable = True 101 | myst_amsmath_enable = True 102 | myst_html_img_enable = True 103 | myst_deflist_enable = True 104 | myst_url_schemes = ("http", "https", "mailto") 105 | panels_add_boostrap_css = False 106 | 107 | myst_enable_extensions = ["dollarmath", "colon_fence"] -------------------------------------------------------------------------------- /content/gcn_decoding/Brain_connectivity_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/content/gcn_decoding/Brain_connectivity_graph.png -------------------------------------------------------------------------------- /content/gcn_decoding/GCN_pipeline_main2022.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/content/gcn_decoding/GCN_pipeline_main2022.png -------------------------------------------------------------------------------- /content/graphics/brain_encoding_decoding_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/content/graphics/brain_encoding_decoding_example.png -------------------------------------------------------------------------------- /content/graphics/brain_encoding_pred_timeseries.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/content/graphics/brain_encoding_pred_timeseries.png -------------------------------------------------------------------------------- /content/graphics/decoding_pipeline_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/content/graphics/decoding_pipeline_example.png -------------------------------------------------------------------------------- /content/haxby_data/brain-encoding-decoding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/content/haxby_data/brain-encoding-decoding.png -------------------------------------------------------------------------------- /content/haxby_data/masker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/content/haxby_data/masker.png -------------------------------------------------------------------------------- /content/intro.md: -------------------------------------------------------------------------------- 1 | 2 | # Welcome 3 | 4 | **"Introduction to brain decoding in fMRI"** 5 | 6 | This `jupyter book` presents an introduction to `brain decoding` using `fMRI`. It was developed within the [educational courses](https://main-educational.github.io), conducted as part of the [Montreal AI and Neuroscience (MAIN) conference](https://www.main2024.org/) in October 2024. 7 | 8 | [![Jupyter Book Badge](https://jupyterbook.org/badge.svg)](https://main-educational.github.io/brain_encoding_decoding/intro.html) 9 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/main-educational/brain_encoding_decoding/HEAD) 10 | [![Docker Hub](https://img.shields.io/docker/pulls/user/repo)]() 11 | [![GitHub size](https://img.shields.io/github/repo-size/main-educational/brain_encoding_decoding)](https://github.com/main-educational/brain_encoding_decoding/archive/master.zip) 12 | [![GitHub issues](https://img.shields.io/github/issues/main-educational/brain_encoding_decoding?style=plastic)](https://github.com/main-educational/brain_encoding_decoding) 13 | [![GitHub PR](https://img.shields.io/github/issues-pr/main-educational/brain_encoding_decoding)](https://github.com/main-educational/brain_encoding_decoding/pulls) 14 | [![License](https://img.shields.io/github/license/main-educational/brain_encoding_decoding)](https://github.com/main-educational/brain_encoding_decoding) 15 | [![CONP](https://img.shields.io/badge/Supported%20by-%20CONP%2FPCNO-red)](https://conp.ca/) 16 | 17 | Building upon the prior sections of the [educational courses](https://main-educational.github.io), the here presented resources aim to provide an overview of how `decoding models` can be applied to `fMRI` data in order to investigate `brain function`. Importantly, the respective methods cannot only be utilized to analyze data from `biological agents` (e.g. `humans`, `non-human primates`, etc.) but also `artificial neural networks`, as well as presenting the opportunity to compare processing in both. They are thus core approaches that are prominently used at the intersection of `neuroscience` and `AI`. 18 | 19 | 20 | ```{figure} haxby_data/brain-encoding-decoding.png 21 | --- 22 | width: 800px 23 | name: brain-encoding-decoding-fig 24 | --- 25 | 26 | To test the consistency of representations in artificial neural networks (ANNs) and the brain, it is possible to **encode** brain activity based on ANN presented with similar stimuli, or **decode** brain activity by predicting the expected ANN activity and corresponding annotation of cognitive states. Figure from [Schrimpf et al. (2020)](https://doi.org/10.1101/407007) {cite:p}`Schrimpf2020-mc`, under a [CC-BY 4.0](https://creativecommons.org/licenses/by/4.0/) license. 27 | ``` 28 | 29 | The tutorials make heavy use of [nilearn](https://nilearn.github.io/stable/index.html) concerning 30 | manipulating and processing `fMRI` data, as well as [scikit-learn](https://scikit-learn.org/stable/) and [pytorch](https://pytorch.org/) to apply `decoding models` on the data. 31 | 32 | We used the [Jupyter Book](https://jupyterbook.org/en/stable/intro.html) framework to provide all materials in an open, structured and interactive manner. ALl pages and section you see here are built from `markdown` files or `jupyter notebooks`, allowing you to read through the materials and/or run them, locally or in the cloud. The three symbols on the top right allow to enable full screen mode, link to the underlying [GitHub repository](https://github.com/main-educational/brain_encoding_decoding) and allow you to download the respective sections as a `pdf` or `jupyter notebook` respectively. Some sections will additionally have a little rocket in that row which will allow you to interactively rerun certain parts via cloud computing (please see the [Binder](#Binder) section for more information). 33 | 34 | 35 | ## Brain decoding vs. encoding 36 | 37 | In short, `encoding` and `decoding` entail contrary operations that can yet be utilized in a complementary manner. `Encoding models` applied to `brain data`, e.g. `fMRI`, aim to predict `brain responses`/`activity` based on `annotations` or `features` of the `stimuli` perceived by the `participant`. These can be obtained from a multitude of options, including `artificial neural networks` which would allow to relate their `processing` of the `stimuli` to that of `biological agents`, ie `brains`. 38 | `Decoding models` on the other hand comprise `models` with which we aim to `estimate`/`predict` what a `participant` is `perceiving` or `doing` based on `recordings` of `brain responses`/`activity`, e.g. `fMRI`. 39 | 40 | ```{figure} graphics/brain_encoding_decoding_example.png 41 | --- 42 | width: 800px 43 | name: brain_encoding_decoding_example_fig 44 | --- 45 | 46 | `Encoding` and `decoding` present contrary, yet complementary operations. While the former targets the prediction of `brain activity`/`responses` based on stimulus percepts/features (e.g. vision & audition), cognitive states or behavior, the latter aims to predict those aspects based on `brain activity`/`responses`. 47 | ``` 48 | 49 | More information and their application can be found in the respective sections of this resource. You can either use the `ToC` on the left or the links below to navigate accordingly. 50 | 51 | 52 | ::::{card-carousel} 2 53 | 54 | :::{card} 55 | :margin: 3 56 | :class-body: text-center 57 | :class-header: bg-light text-center 58 | :link: https://main-educational.github.io/brain_encoding_decoding/haxby_data.html 59 | **An overview of the Haxby Dataset** 60 | ^^^ 61 | ```{image} https://main-educational.github.io/brain_encoding_decoding/_images/e7ee3822777de29b4978b40bd5d14627bd5d68417d8daea46364bd5f0662b01b.png 62 | :height: 100 63 | ``` 64 | 65 | Explore and prepare the tutorial dataset. 66 | +++ 67 | Explore this tutorial {fas}`arrow-right` 68 | ::: 69 | 70 | :::{card} 71 | :margin: 3 72 | :class-body: text-center 73 | :class-header: bg-light text-center 74 | :link: https://main-educational.github.io/brain_encoding_decoding/svm_decoding.html 75 | 76 | **Brain decoding with SVM** 77 | ^^^ 78 | ```{image} https://main-educational.github.io/brain_encoding_decoding/_images/2021c085709559df545bf08eb2ee051f9098c2f5619e666dceeb879ff1801dfb.png 79 | :height: 100 80 | ``` 81 | 82 | Utilizing an SVM classifier to predict percepts from fMRI data. 83 | +++ 84 | Explore this tutorial {fas}`arrow-right` 85 | ::: 86 | :::: 87 | 88 | ::::{card-carousel} 2 89 | 90 | :::{card} 91 | :margin: 3 92 | :class-body: text-center 93 | :class-header: bg-light text-center 94 | :link: https://main-educational.github.io/brain_encoding_decoding/mlp_decoding.html 95 | 96 | **Brain decoding with MLP** 97 | ^^^ 98 | ```{image} https://main-educational.github.io/brain_encoding_decoding/_images/multilayer-perceptron.png 99 | :height: 100 100 | ``` 101 | 102 | Brain decoding using a basic artificial neural network. 103 | +++ 104 | Explore this tutorial {fas}`arrow-right` 105 | ::: 106 | 107 | :::{card} 108 | :margin: 3 109 | :class-body: text-center 110 | :class-header: bg-light text-center 111 | :link: https://main-educational.github.io/brain_encoding_decoding/gcn_decoding.html 112 | 113 | **Brain decoding with GCN** 114 | ^^^ 115 | ```{image} https://main-educational.github.io/brain_encoding_decoding/_images/GCN_pipeline_main2022.png 116 | :height: 100 117 | ``` 118 | 119 | Graph convolutional networks for brain decoding. 120 | +++ 121 | Explore this tutorial {fas}`arrow-right` 122 | ::: 123 | :::: 124 | 125 | 126 | ## Setup 127 | 128 | There are two ways to run the tutorials: `local installation` and using free cloud computing provided by [Binder](https://mybinder.org/). As noted below, we strongly recommend the `local installation`, as the `Binder` option comes with limited computational resources, as well as the missed possibility to directly further explore the `approaches` presented in this tutorial on your own machine. 129 | 130 | 131 | ````{tab-set} 132 | ```{tab-item} Local installation (Recommended) 133 | 134 | For the `local installation` to work, you need two things: the fitting `python environment` and the `content`. Concerning `python`, please have a look at the hint below. 135 | 136 | :::{admonition} Install python 137 | :class: tip 138 | :name: python-install-tip 139 | You need to have access to a `terminal` with `Python 3`. 140 | If you have setup your environment based on instructions of [MAIN educational installation guide](https://main-educational.github.io/installation.html), you are good to go 🎉 141 | 142 | If it not already the case, 143 | [here](https://realpython.com/installing-python/#how-to-check-your-python-version-on-windows) 144 | is a quick guide to install python 3 on any OS. 145 | ::: 146 | 147 | After making sure you have a working `python installation`, you need to get the `content` that is going to presented during the tutorial. In more detail, this is done via interactive `jupyter notebooks` which you can obtain by following the steps below: 148 | 149 | 1. Clone/download this repository to your machine and navigate to the directory. 150 | 151 | ```bash 152 | git clone https://github.com/main-educational/brain_encoding_decoding.git 153 | cd brain_encoding_decoding 154 | ``` 155 | 156 | 2. We encourage you to use a `virtual environment` for this tutorial 157 | (and for all your projects, that's a good practice). 158 | To do this, run the following commands in your terminal, it will create the 159 | `environment` in a folder named `main_edu_brain_decoding`: 160 | 161 | ```bash 162 | python3 -m venv main_edu_brain_decoding 163 | ``` 164 | Then the following `command` will `activate` the `environment`: 165 | 166 | ```bash 167 | source main_edu_brain_decoding/bin/activate 168 | ``` 169 | 170 | Finally, you can install the required `libraries`: 171 | 172 | ```bash 173 | pip install -r requirements.txt 174 | ``` 175 | 176 | 3. Navigate to the `content` of the `jupyter book`: 177 | ```bash 178 | cd content/ 179 | ``` 180 | 181 | Now that you are all set, you can run the notebooks with the command: 182 | 183 | ```bash 184 | jupyter notebook 185 | ``` 186 | Click on the `.md` files. They will be rendered as jupyter notebooks 🎉 187 | 188 | Alternatively, you can use [conda/miniconda](https://docs.conda.io/projects/conda/en/latest/index.html) to create the needed `python environment` like so: 189 | 190 | git clone https://github.com/main-educational/brain_encoding_decoding.git 191 | cd brain_encoding_decoding 192 | conda env create -f environment.yml 193 | 194 | 195 | ``` 196 | 197 | ```{tab-item} Cloud computing with Binder 198 | If you wish to run the tutorial in `Binder`, click on the rocket icon 🚀 in the top right of a given `notebook` to launch it on `Binder`. 199 | 200 | :::{warning} 201 | The computing resource on `Binder` is limited. 202 | Some cells might not execute correctly, or the data download will not be completed. 203 | For the full experience, we recommend using the local set up instruction. 204 | ::: 205 | 206 | ``` 207 | ```` 208 | 209 | 210 | ## Instructors 211 | 212 | This tutorial was prepared and presented by 213 | 214 | ::::{card-carousel} 2 215 | 216 | :::{card} Pierre-Louis Barbarant 217 | :margin: 3 218 | :class-body: text-center 219 | :link: https://github.com/pbarbarant 220 | :img-top: https://avatars.githubusercontent.com/u/104081777?v=4 221 | ::: 222 | 223 | :::{card} Peer Herholz 224 | :margin: 3 225 | :class-body: text-center 226 | :link: https://github.com/PeerHerholz 227 | :img-top: https://avatars.githubusercontent.com/u/20129524?v=4?s=100 228 | ::: 229 | 230 | :::: 231 | 232 | It is based on earlier versions created by: 233 | 234 | ::::{card-carousel} 3 235 | :::{card} Isil Bilgin 236 | :margin: 3 237 | :class-body: text-center 238 | :link: https://github.com/complexbrains 239 | :img-top: https://avatars.githubusercontent.com/u/45263281?v=4 240 | ::: 241 | 242 | :::{card} Alexandre Pasquiou 243 | :margin: 3 244 | :class-body: text-center 245 | :link: https://twitter.com/a_pasquiou 246 | :img-top: https://pbs.twimg.com/profile_images/1542505896386764800/pyC2rgHp_400x400.jpg 247 | ::: 248 | 249 | :::{card} Pravish Sainath 250 | :margin: 3 251 | :class-body: text-center 252 | :link: https://github.com/pravishsainath 253 | :img-top: https://avatars.githubusercontent.com/u/13696562?v=4 254 | ::: 255 | :::: 256 | 257 | ## Thanks and acknowledgements 258 | Parts of the tutorial are directly adapted from a [nilearn tutorial](https://nilearn.github.io/auto_examples/plot_decoding_tutorial.html) on the so-called [Haxby dataset](). 259 | 260 | It was adapted from a prior version which was prepared and presented by 261 | [Pravish Sainath](https://github.com/pravishsainath) 262 | [Shima Rastegarnia](https://github.com/srastegarnia), 263 | [Hao-Ting Wang](https://github.com/htwangtw) 264 | [Loic Tetrel](https://github.com/ltetrel) and [Pierre Bellec](https://github.com/pbellec). 265 | 266 | Furthermore, some `images` and `code` are used from a previous iteration of this `tutorial`, prepared by [Dr Yu Zhang](https://github.com/zhangyu2ustc). 267 | 268 | We would like to thank the Jupyter community, specifically, the Executable/Jupyter Book and mybinder project for enabling us to create this tutorial. Furthermore, we are grateful for the entire open neuroscience community and the amazing support and resources it provides. This includes the community driven development of data and processing standards, as well as unbelievable amount of software packages that make the here introduced approaches possible to begin with. 269 | 270 | The tutorial is rendered here using [Jupyter Book](https://github.com/jupyter/jupyter-book). 271 | 272 | ## References 273 | 274 | ```{bibliography} 275 | :filter: docname in docnames 276 | ``` 277 | --- 278 | 279 | -------------------------------------------------------------------------------- /content/mlp_decoding.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | cell_metadata_filter: -all 4 | formats: md:myst 5 | text_representation: 6 | extension: .md 7 | format_name: myst 8 | format_version: 0.13 9 | jupytext_version: 1.11.5 10 | kernelspec: 11 | display_name: Python 3 (ipykernel) 12 | language: python 13 | name: python3 14 | --- 15 | 16 | # Brain decoding with MLP 17 | 18 | This part of the `session` aims to make `participants` familiar with [Multilayer Peceptrons](https://en.wikipedia.org/wiki/Multilayer_perceptron) as one possible `decoding model` that can be applied to `brain data`. The objectives 📍 are: 19 | 20 | - get to know the basics of `Multilayer Peceptrons` 21 | 22 | - `model` creation 23 | - `model` `training` 24 | - `model` `testing` 25 | 26 | 27 | ## Multilayer Perceptron 28 | ```{figure} mlp_decoding/multilayer-perceptron.png 29 | --- 30 | width: 800px 31 | name: multilayer-perceptron-fig 32 | --- 33 | A multilayer perceptron with 25 units on the input layer, a single hidden layer with 17 units, and an output layer with 9 units. Figure generated with the [NN-SVG](http://alexlenail.me/NN-SVG/index.html) tool by [Alexander Lenail]. The figure is shared under a [CC-BY 4.0](https://creativecommons.org/licenses/by/4.0/) license. 34 | ``` 35 | We are going to train a `Multilayer Perceptron` (`MLP`) `classifier` for `brain decoding` on the [Haxby dataset](https://main-educational.github.io/brain_encoding_decoding/haxby_data.html). `MLP`s are one of the most basic architecture of [artificial neural networks](https://en.wikipedia.org/wiki/Artificial_neural_network). As such, `MLP`s consist of `input` and `output` `layers` as well as `hidden layers` that process the `input` through a succession of `transformations` towards the `output layer` that performs the task at hand, e.g. a `classification` or `regression`. Like other `machine learning models` for `supervised learning`, an `MLP` initially goes through a `training phase`. During this `supervised phase`, the `network` is taught what to look for and what is the desired output via its `objective function`. This refers to, minimizing the `loss`, ie the deviation of `predictions` from the "ground truth", and thus increasing its performance. 36 | 37 | `MLP`s were actually among the first `ANN`s to appear, specifically the [Mark I Peceptron](https://en.wikipedia.org/wiki/Perceptron) which you can see below. 38 | 39 | ```{figure} https://preview.redd.it/wgzps0pvcny91.jpg?width=640&crop=smart&auto=webp&s=0b2e56dc4eaa886ebd01ac0cd8e51fc4efdb1d01 40 | --- 41 | width: 400px 42 | name: markI-perceptron-fig 43 | --- 44 | Frank Rosenblatt with a Mark I Perceptron computer in 1960. 45 | ``` 46 | 47 | 48 | In this tutorial, we are going to train the simplest `MLP` architecture featuring one `input layer`, one `output layer` and just one `hidden layer`. 49 | 50 | ## Theoretical motivation 51 | 52 | The previous tutorial on [brain decoding with SVM](https://main-educational.github.io/brain_encoding_decoding/svm_decoding.html) 53 | shows how to use a linear combination of brain features to train a predictor. 54 | 55 | Let's take a moment to consider this: a 1-layer perceptron with a sigmoid activation function 56 | models the relation between `X` (the input data) and `y` (the predicted data) 57 | the same way a logistic regression would: 58 | $\hat{y} = \sigma(X \beta + \beta_0)$ 59 | 60 | ```{figure} mlp_decoding/logistic_regression.png 61 | --- 62 | width: 200px 63 | name: logistic-regression-fig 64 | --- 65 | A fitted logistic regression function classifying two different classes. Courtesy of [Jérôme Dockès](https://jeromedockes.github.io/). 66 | ``` 67 | 68 | If one optimizes the parameters of this MLP to minimize a cross-entropy loss, 69 | they're actually optimizing for the same objective function as in a classical logistic regression problem: 70 | $\underset{\beta, \beta_0}{\min} \sum_k y_k \log(\hat{y_k}) + (1 - y_k) \log(1 - \hat{y_k})$ 71 | 72 | As a rule of thumb, one can consider that a 1-layer perceptron 73 | (and therefore any last layer of a multi-layer perceptron) 74 | works similarly to an SVC. 75 | 76 | A big motivation for using multiple-layer perceptrons is that they can introduce non-linearities 77 | in our data. When training such models, the hope is that the hidden layers of the model 78 | will find meaningful non-linear combinations of the input features which help us solve 79 | our decoding problem. 80 | 81 | ## Getting the data 82 | 83 | We are going to work with the Haxby dataset {cite:p}`Haxby2001-vt` again. You can check the section {ref}`haxby-dataset` for more details on that `dataset`. Here we are going to quickly `download` and prepare it for `machine learning applications` with a set of `predictive variables`, the `brain time series` `X`, and a `dependent variable`, the respective `cognitive processes`/`function`/`percepts` `y`. 84 | 85 | ```{code-cell} ipython3 86 | import os 87 | import warnings 88 | warnings.filterwarnings(action='once') 89 | 90 | from nilearn import datasets 91 | # We are fetching the data for subject 4 92 | data_dir = os.path.join('..', 'data') 93 | sub_no = 4 94 | haxby_dataset = datasets.fetch_haxby(subjects=[sub_no], fetch_stimuli=True, data_dir=data_dir) 95 | func_file = haxby_dataset.func[0] 96 | 97 | # mask the data 98 | from nilearn.input_data import NiftiMasker 99 | mask_filename = haxby_dataset.mask_vt[0] 100 | masker = NiftiMasker(mask_img=mask_filename, standardize=True, detrend=True) 101 | X = masker.fit_transform(func_file) 102 | 103 | # cognitive annotations 104 | import pandas as pd 105 | behavioral = pd.read_csv(haxby_dataset.session_target[0], delimiter=' ') 106 | y = behavioral['labels'] 107 | ``` 108 | 109 | As an initial check, we'll have a look at the size of `X` and `y`: 110 | 111 | ```{code-cell} ipython3 112 | categories = y.unique() 113 | print(categories) 114 | print(y.shape) 115 | print(X.shape) 116 | ``` 117 | 118 | So we have `1452` `time points`, with one `label` for the respective `stimulus percept` each, and for each `time point` we have `recordings` of `brain` activity obtained via `fMRI` across `675 voxels` (within the `VT` `mask`). We can also see that the `stimulus percept`s span `9` different `categories`. 119 | 120 | However, concerning our planned analyses, we need to convert our `categories` into a [one-hot encoder](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html): 121 | 122 | ```{code-cell} ipython3 123 | # creating instance of one-hot-encoder 124 | from sklearn.preprocessing import OneHotEncoder 125 | import numpy as np 126 | enc = OneHotEncoder(handle_unknown='ignore') 127 | y_onehot = enc.fit_transform(np.array(y).reshape(-1, 1)) 128 | # turn the sparse matrix into a pandas dataframe 129 | y = pd.DataFrame(y_onehot.toarray()) 130 | display(y[:10]) 131 | ``` 132 | 133 | ## Training a model 134 | 135 | As introduced in the prior `tutorials`, one of the most important aspects of `machine learning` is the split between `train` and `tests`. `MLP`s are no exception to that and thus we need to split our dataset accordingly. We will keep `20%` of the `time points` as `test`, and then set up a `10 fold cross validation` for `training/validation`. 136 | 137 | ```{code-cell} ipython3 138 | from sklearn.model_selection import train_test_split 139 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) 140 | ``` 141 | 142 | With that, we can already build our `MLP`. Here, we are going to use [Tensorflow](https://www.tensorflow.org/) and [Keras](https://keras.io/). As with every other `ANN`, we need to `import` the respective components, here, the `model` and `layer` `type`. In our case we will use a [`Sequential` `model`](https://keras.io/guides/sequential_model/) and [`Dense`](https://keras.io/api/layers/core_layers/dense/) `layers`. 143 | 144 | ```{code-cell} ipython3 145 | from keras.models import Sequential 146 | from keras.layers import Dense 147 | ``` 148 | 149 | `````{admonition} A note regarding our MLP 150 | :class: tip 151 | Please note that the example `MLP` we are going to `create` and `train` here is rather simple as we want to enable its application on machines with rather limited computational resources (ie your laptops or binder). "Real-world" models are usually more complex and might also entail different `types` and `layers`. 152 | ````` 153 | 154 | +++ 155 | 156 | Initially, we need to create our, so far, `empty model`. 157 | 158 | ```{code-cell} ipython3 159 | # number of unique conditions that we have 160 | model_mlp = Sequential() 161 | ``` 162 | 163 | Next, we can add the `layers` to our `model`, starting with the `input layer`. Given this is a rather short introduction to the topic and does not focus on `ANN`s, we are going to set the `kernel initialization` and `activation function` to appropriate defaults (Please have a look at the [Introduction to deep learning session](https://main-educational.github.io/material.html#introduction-to-deep-learning-using-pytorch) for more information.). 164 | 165 | ```{code-cell} ipython3 166 | model_mlp.add(Dense(50 , input_dim = 675, kernel_initializer="uniform", activation = 'relu')) 167 | ``` 168 | 169 | As noted above, we are using `Dense` `layers` and as you can see, we set the `input dimensions` to `675`. You might have already notices that this is the number of `voxels` we have `data` from. Setting the `input dimension` according to the `data dimensions` is rather important is referred to as the [semantic gap](https://en.wikipedia.org/wiki/Semantic_gap): the transformation of `actions` & `percepts` conducted/perceived by `human`s into `computational representations`. For example, pictures are "nothing" but a huge `array` for a computer and what will be submitted to the input layer of an `ANN` (note: this also holds true for basically any other type of `data`). Here, our `MLP` receives the extracted `brain activity patterns` as `input` which are already in the right `array` format thanks to `nilearn`. Thus, always carefully think about what your `input` `data` entails and how it is structured to then setup your `input layer` accordingly. 170 | 171 | Next, we are going to add one `hidden layer`. 172 | 173 | ```{code-cell} ipython3 174 | model_mlp.add(Dense(30, kernel_initializer="uniform", activation = 'relu')) 175 | ``` 176 | 177 | And because we are creating a very simple `MLP` with only three `layers`, we already add our `output layer`, using the `softmax` `activation function` given that we aim to `train` our `MLP` to `predict` the different `categories` that were perceived by the `participants` from their `brain activity patterns`. 178 | 179 | ```{code-cell} ipython3 180 | model_mlp.add(Dense(len(categories), activation = 'softmax')) 181 | ``` 182 | 183 | To get a nice overview of our `ANN`, we can now use the `.summary()` `function`, which will provide us with the `model type`, `model parameters` and for each `layer`, the its `type`, `shape` and `parameters`. 184 | 185 | ```{code-cell} ipython3 186 | model_mlp.summary() 187 | ``` 188 | 189 | With that, we already created our `MLP` `architecture`, which is now ready to be `compiled`! Within this step, we will set the `optimizer`, `loss function` and `metric`, ie `components` that define how our `MLP` will `learn`. 190 | 191 | ```{code-cell} ipython3 192 | model_mlp.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy']) 193 | ``` 194 | 195 | Now it's to `train` our `MLP`. Thus, we have to `fit` it to our `data`, specifically only the `training` `data`. Here, we are going to provide a few more `hyperparameters` that will define how our `MLP` is going to `learn`. This entails the `batch size`, the `epochs` and `split` of `validation sets`. We will assign the respective output to a variable so that we can investigate our `MLP`'s `learning process`. 196 | 197 | ```{code-cell} ipython3 198 | history = model_mlp.fit(X_train, y_train, batch_size = 10, 199 | epochs = 10, validation_split = 0.2) 200 | ``` 201 | 202 | This looks about and what we would expect the `learning process` to be: across `epochs`, the `loss` is decreasing and the `accuracy` is increasing. 203 | 204 | `````{admonition} A note regarding the learning process of our MLP 205 | :class: tip 206 | Comparable to its architecture, our `MLP`'s `learning process` is also not really what you would see on the "real world". Usually, `ANN`s are `trained` way more, for longer periods of times, more `epochs` and on more `data`. However, we keep it rather short as we want to enable its application on machines with rather limited computational resources (ie your laptops or binder). 207 | ````` 208 | 209 | While this is already informative, we can also plot the `loss` and `accuracy` in the `training` and `validation` `sets` respectively. Let's start with the `loss`. 210 | 211 | ```{code-cell} ipython3 212 | import matplotlib.pyplot as plt 213 | import seaborn as sns 214 | 215 | plt.plot(history.history['loss'], color='m') 216 | plt.plot(history.history['val_loss'], color='c') 217 | plt.title('MLP loss') 218 | plt.ylabel('loss') 219 | plt.xlabel('epoch') 220 | plt.legend(['train', 'validation'], loc = 'upper right') 221 | 222 | sns.despine(offset=5) 223 | 224 | plt.show() 225 | ``` 226 | 227 | And now the same for the `accuracy`. 228 | 229 | ```{code-cell} ipython3 230 | import matplotlib.pyplot as plt 231 | import seaborn as sns 232 | 233 | plt.plot(history.history['accuracy'], color='m') 234 | plt.plot(history.history['val_accuracy'], color='c') 235 | plt.title('MLP accuracy') 236 | plt.ylabel('accuracy') 237 | plt.xlabel('epoch') 238 | plt.legend(['train', 'validation'], loc = 'upper left') 239 | 240 | sns.despine(offset=5) 241 | 242 | plt.show() 243 | ``` 244 | 245 | `````{admonition} How would you interpret these plots... 246 | :class: tip 247 | concerning our `MLP`'s `learning process`? Does it make sense? If not, how should it look like? Could you use these plots to evaluate certain aspects of the `learning process`, e.g. `regularization`? 248 | ````` 249 | 250 | +++ 251 | 252 | ## Assessing performance 253 | 254 | After evaluating the `training` of our `MLP`, we of course also need to evaluate its (`predictive`) `performance`. Here, this refers to the `accuracy` of our `MLP`'s outcomes, ie its `predictions`. We already saw this in the above plots and during the `training` across `epochs` but let's check the `accuracy` of the `prediction` on the `training set` again: 255 | 256 | ```{code-cell} ipython3 257 | from sklearn.metrics import classification_report 258 | y_train_pred = model_mlp.predict(X_train) 259 | print(classification_report(y_train.values.argmax(axis = 1), y_train_pred.argmax(axis=1))) 260 | ``` 261 | 262 | Why you might think: "Oh, that's awesome, great performance.", such outcomes are usually perceived as dangerously high and indicate that something is off... 263 | 264 | `````{admonition} Why should a close-to-perfect performance indicate that something is wrong? 265 | :class: tip 266 | What do you think is the rationale to say that very high `scores` are actually "suspicious" and tells us that something is most likely wrong? Try thinking about the things you've learned so far: `training`/`test`/`validation` `datasets` and their size, `models`, `predictions`, etc. . 267 | ````` 268 | 269 | 270 | Luckily, we did `split` our `dataset` into **independent** `training` and `test` `sets`. So, let's check our `MLP`'s performance on the `test set`: 271 | 272 | ```{code-cell} ipython3 273 | y_test_pred = model_mlp.predict(X_test) 274 | print(classification_report(y_test.values.argmax(axis = 1), y_test_pred.argmax(axis=1))) 275 | ``` 276 | 277 | As you can see, the `scores`, ie `performance`, drops quite a bit. Do you know why and which you would report, e.g. in a `publication`? 278 | 279 | Beside checking the overall `scores`, there are other options to further evaluate our `MLP`'s (or basically any other model's) `performance`. One of the most commonly used ones is called `confusion matrix` (which you most likely have seen before in this course). A `confusion matrix` displays how often a given `sample` was `predicted` as a certain `label`, thus, for example, providing insights into differentiability, etc. . To implement this, we initially have to compute the `confusion matrix`: 280 | 281 | ```{code-cell} ipython3 282 | import numpy as np 283 | from sklearn.metrics import confusion_matrix 284 | 285 | cm_svm = confusion_matrix(y_test.values.argmax(axis = 1), y_test_pred.argmax(axis=1)) 286 | model_conf_matrix = cm_svm.astype('float') / cm_svm.sum(axis = 1)[:, np.newaxis] 287 | ``` 288 | 289 | After that, we can `plot` it for evaluation. 290 | 291 | ```{code-cell} ipython3 292 | import pandas as pd 293 | import seaborn as sns 294 | 295 | df_cm = pd.DataFrame(model_conf_matrix, index = categories, 296 | columns = categories) 297 | 298 | plt.figure(figsize = (10,7)) 299 | sns.heatmap(df_cm, annot = True, cmap = 'Blues', square = True) 300 | plt.xticks(rotation = 45) 301 | plt.title('MLP decoding results - confusion matrix' , fontsize = 15, fontweight = 'bold') 302 | plt.xlabel("true labels", fontsize = 14, fontweight = 'bold') 303 | plt.ylabel("predicted labels", fontsize = 14, fontweight = 'bold') 304 | plt.show() 305 | ``` 306 | 307 | Based on this outcome: how would you interpret the `confusion matrix`? Are some `categories` better `"decodable"` than others? Could even make such a statement? 308 | 309 | +++ 310 | 311 | ## Summary 312 | 313 | With that, we already reached the end of this `tutorial` within which we talked about how to `create`, `train` and `evaluate` a `MLP` as one possible `decoding model` that can be applied to `brain data`. As mentioned before, the `MLP` utilized here is rather simple and `models` you see (and maybe use) out in the "real world" will most likely be way more complex. However, their application to `brain data` concerning `input`, `hidden` and `output layers` follows the same outline. 314 | 315 | ```{tip} 316 | Unfortunately, visualizing the features/transformations of an `ANN` is quite often not straightforward as it depends on the given `ANN` architecture. However, you can check this fantastic 317 | [distill article](https://distill.pub/2017/feature-visualization/) to learn more about `feature visualization` in `artificial neural networks`. 318 | ``` 319 | 320 | ## Exercises 321 | 322 | * What is the most difficult category to decode? Why? 323 | * The model seemed to overfit. Try adding a `Dropout` layer to regularize the model. You can read about dropout in keras in this [blog post](https://towardsdatascience.com/machine-learning-part-20-dropout-keras-layers-explained-8c9f6dc4c9ab). 324 | * Try to add layers or hidden units, and observe the impact on overfitting and training time. 325 | -------------------------------------------------------------------------------- /content/mlp_decoding/logistic_regression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/content/mlp_decoding/logistic_regression.png -------------------------------------------------------------------------------- /content/mlp_decoding/multilayer-perceptron.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/content/mlp_decoding/multilayer-perceptron.png -------------------------------------------------------------------------------- /content/references.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{grill-spector_functional_2014, 2 | title = "The functional architecture of the ventral temporal cortex and its role in categorization", 3 | volume = 15, 4 | issn = "1471-0048", 5 | url = "https://doi.org/10.1038/nrn3747", 6 | doi = "10.1038/nrn3747", 7 | abstract = "Understanding information processing in the visual system requires an understanding of the interplay among the system's computational goals and representations, and their physical implementation in the brain.Recent results indicate a consistent topology of functional representations relative to each other and anatomical landmarks in high-level visual cortex.The consistent topology of functional representations reveals that axes of representational spaces are physically implemented as axes in cortical space.Anatomical constraints might determine the topology of functional representations in the brain, which would explain the correspondence between representational and anatomical axes in the ventral temporal cortex (VTC).Superimposition and topology generate predictable spatial convergences and divergences among functional representations, which in turn enable information integration and parallel processing, respectively.Superimposition and topological organization in the VTC generates a series of nested functional representations, the arrangements of which generate a spatial hierarchy of category information.The spatial scale of functional representations may be tied to the level of category abstractness in which more abstract information is represented in larger spatial scales across the VTC.", 8 | number = 8, 9 | journal = "Nature Reviews Neuroscience", 10 | author = "Grill-Spector, Kalanit and Weiner, Kevin S.", 11 | month = aug, 12 | year = 2014, 13 | pages = "536--548", 14 | } 15 | 16 | 17 | @ARTICLE{Haxby2001-vt, 18 | title = "Distributed and overlapping representations of faces and objects 19 | in ventral temporal cortex", 20 | author = "Haxby, J V and Gobbini, M I and Furey, M L and Ishai, A and 21 | Schouten, J L and Pietrini, P", 22 | abstract = "The functional architecture of the object vision pathway in the 23 | human brain was investigated using functional magnetic resonance 24 | imaging to measure patterns of response in ventral temporal 25 | cortex while subjects viewed faces, cats, five categories of 26 | man-made objects, and nonsense pictures. A distinct pattern of 27 | response was found for each stimulus category. The 28 | distinctiveness of the response to a given category was not due 29 | simply to the regions that responded maximally to that category, 30 | because the category being viewed also could be identified on the 31 | basis of the pattern of response when those regions were excluded 32 | from the analysis. Patterns of response that discriminated among 33 | all categories were found even within cortical regions that 34 | responded maximally to only one category. These results indicate 35 | that the representations of faces and objects in ventral temporal 36 | cortex are widely distributed and overlapping.", 37 | journal = "Science", 38 | volume = 293, 39 | number = 5539, 40 | pages = "2425--2430", 41 | month = sep, 42 | year = 2001, 43 | language = "en" 44 | } 45 | 46 | @ARTICLE{Schrimpf2020-mc, 47 | title = "{Brain-Score}: Which Artificial Neural Network for Object 48 | Recognition is most {Brain-Like}?", 49 | author = "Schrimpf, Martin and Kubilius, Jonas and Hong, Ha and Majaj, 50 | Najib J and Rajalingham, Rishi and Issa, Elias B and Kar, Kohitij 51 | and Bashivan, Pouya and Prescott-Roy, Jonathan and Geiger, 52 | Franziska and Schmidt, Kailyn and Yamins, Daniel L K and DiCarlo, 53 | James J", 54 | journal = "bioRxiv", 55 | pages = "407007", 56 | month = jan, 57 | year = 2020, 58 | language = "en" 59 | } 60 | 61 | @ARTICLE{Zhang2021-fa, 62 | title = "Functional annotation of human cognitive states using deep graph convolution", 63 | volume = 231, 64 | journal = "NeuroImage", 65 | author = "Zhang, Yu and Tetrel, Loïc and Thirion, Bertrand and Bellec, Pierre", 66 | month = may, 67 | year = 2021, 68 | pages = 117847, 69 | language = "en" 70 | } 71 | -------------------------------------------------------------------------------- /content/svm_decoding.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | cell_metadata_filter: -all 4 | formats: md:myst 5 | text_representation: 6 | extension: .md 7 | format_name: myst 8 | format_version: 0.13 9 | jupytext_version: 1.11.5 10 | kernelspec: 11 | display_name: Python 3 (ipykernel) 12 | language: python 13 | name: python3 14 | --- 15 | 16 | # Brain decoding with SVM 17 | 18 | ## Support vector machines 19 | ```{figure} svm_decoding/optimal-hyperplane.png 20 | --- 21 | width: 500px 22 | name: optimal-hyperplane-fig 23 | --- 24 | A SVM aims at finding an optimal hyperplane to separate two classes in high-dimensional space, while maximizing the margin. Image from the [scikit-learn SVM documentation](https://scikit-learn.org/stable/modules/svm.html) under [BSD 3-Clause license](https://github.com/scikit-learn/scikit-learn/blob/main/COPYING). 25 | ``` 26 | We are going to train a support vector machine (SVM) classifier for brain decoding on the Haxby dataset. SVM is often successful in high dimensional spaces, and it is a popular technique in neuroimaging. 27 | 28 | In the SVM algorithm, we plot each data item as a point in N-dimensional space where N depends on the number of features that distinctly classify the data points (e.g. when the number of features is 3 the hyperplane becomes a two-dimensional plane.). The objective here is finding a hyperplane (decision boundaries that help classify the data points) with the maximum margin (i.e the maximum distance between data points of both classes). Data points falling on either side of the hyperplane can be attributed to different classes. 29 | 30 | The scikit-learn [documentation](https://scikit-learn.org/stable/modules/svm.html) contains a detailed description of different variants of SVM, as well as example of applications with simple datasets. 31 | 32 | ## Getting the data 33 | We are going to download the dataset from Haxby and colleagues (2001) {cite:p}`Haxby2001-vt`. You can check section {ref}`haxby-dataset` for more details on that dataset. Here we are going to quickly download it, and prepare it for machine learning applications with a set of predictive variable, the brain time series `X`, and a dependent variable, the annotation on cognition `y`. 34 | 35 | ```{code-cell} ipython3 36 | import os 37 | import warnings 38 | warnings.filterwarnings(action='ignore') 39 | 40 | from nilearn import datasets 41 | # We are fetching the data for subject 4 42 | data_dir = os.path.join('..', 'data') 43 | sub_no = 4 44 | haxby_dataset = datasets.fetch_haxby(subjects=[sub_no], fetch_stimuli=True, data_dir=data_dir) 45 | func_file = haxby_dataset.func[0] 46 | 47 | # mask the data 48 | from nilearn.maskers import NiftiMasker 49 | mask_filename = haxby_dataset.mask_vt[0] 50 | masker = NiftiMasker(mask_img=mask_filename, standardize=True, detrend=True) 51 | X = masker.fit_transform(func_file) 52 | 53 | # cognitive annotations 54 | import pandas as pd 55 | behavioral = pd.read_csv(haxby_dataset.session_target[0], delimiter=' ') 56 | y = behavioral['labels'] 57 | ``` 58 | 59 | Let's check the size of `X` and `y`: 60 | 61 | ```{code-cell} ipython3 62 | categories = y.unique() 63 | print(categories) 64 | print(y.shape) 65 | print(X.shape) 66 | ``` 67 | 68 | So we have 1452 time points, with one cognitive annotations each, and for each time point we have recordings of fMRI activity across 675 voxels. We can also see that the cognitive annotations span 9 different categories. 69 | 70 | ## Training a model 71 | We are going to start by splitting our dataset between train and test. We will keep 20% of the time points as test, and then set up a 10 fold cross validation for training/validation. 72 | 73 | ```{code-cell} ipython3 74 | from sklearn.model_selection import train_test_split 75 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 76 | ``` 77 | 78 | Now we can initialize a SVM classifier, and train it: 79 | 80 | ```{code-cell} ipython3 81 | from sklearn.svm import SVC 82 | model_svm = SVC(random_state=0, kernel='linear', C=1) 83 | model_svm.fit(X_train, y_train) 84 | ``` 85 | 86 | ## Assessing performance 87 | Let's check the accuracy of the prediction on the training set: 88 | 89 | ```{code-cell} ipython3 90 | from sklearn.metrics import classification_report 91 | y_train_pred = model_svm.predict(X_train) 92 | print(classification_report(y_train, y_train_pred)) 93 | ``` 94 | 95 | This is dangerously high. Let's check on the test set: 96 | 97 | ```{code-cell} ipython3 98 | y_test_pred = model_svm.predict(X_test) 99 | print(classification_report(y_test, y_test_pred)) 100 | ``` 101 | 102 | We can have a look at the confusion matrix: 103 | 104 | ```{code-cell} ipython3 105 | # confusion matrix 106 | import sys 107 | import numpy as np 108 | from sklearn.metrics import confusion_matrix 109 | sys.path.append('../src') 110 | import visualization 111 | cm_svm = confusion_matrix(y_test, y_test_pred) 112 | model_conf_matrix = cm_svm.astype('float') / cm_svm.sum(axis=1)[:, np.newaxis] 113 | 114 | visualization.conf_matrix(model_conf_matrix, 115 | categories, 116 | title='SVM decoding results on Haxby') 117 | ``` 118 | 119 | ## Visualizing the weights 120 | Finally we can visualize the weights of the (linear) classifier to see which brain region seem to impact most the decision, for example for faces: 121 | 122 | ```{code-cell} ipython3 123 | from nilearn import plotting 124 | # first row of coef_ is comparing the first pair of class labels 125 | # with 9 classes, there are 9 * 8 / 2 distinct 126 | coef_img = masker.inverse_transform(model_svm.coef_[0, :]) 127 | plotting.view_img( 128 | coef_img, bg_img=haxby_dataset.anat[0], 129 | title="SVM weights", dim=-1, resampling_interpolation='nearest' 130 | ) 131 | ``` 132 | 133 | ## And now the easy way 134 | We can use the high-level `Decoder` object from Nilearn. See [Decoder object](https://nilearn.github.io/dev/modules/generated/nilearn.decoding.Decoder.html) for details. It reduces model specification and fit to two lines of code: 135 | 136 | ```{code-cell} ipython3 137 | from nilearn.decoding import Decoder 138 | # Specify the classifier to the decoder object. 139 | # With the decoder we can input the masker directly. 140 | # 141 | # cv=5 means that we use 5-fold cross-validation 142 | # 143 | # As a scoring scheme, one can use f1, accuracy or ROC-AUC 144 | # 145 | decoder = Decoder(estimator='svc', cv=5, mask=mask_filename, scoring='f1') 146 | decoder.fit(func_file, y) 147 | ``` 148 | 149 | That's it ! 150 | We can now look at the results: F1 score and coefficient image: 151 | 152 | ```{code-cell} ipython3 153 | print('F1 scores') 154 | for category in categories: 155 | print(f"{category.ljust(15)} {np.mean(decoder.cv_scores_[category]):.2f}") 156 | plotting.view_img( 157 | decoder.coef_img_['face'], bg_img=haxby_dataset.anat[0], 158 | title="SVM weights for face", dim=-1, resampling_interpolation='nearest' 159 | ) 160 | ``` 161 | 162 | Note: the Decoder implements a one-vs-all strategy. Note that this is a better choice in general than one-vs-one. 163 | 164 | 165 | ## Generating sharper weight maps with L1 regularization 166 | Nilearn offers different flavours of SVCs. While the default uses L2 regularization under the hood, 167 | we can can obtain sharper weight maps by encouraging sparsity with L1 regularization. 168 | 169 | ```{figure} svm_decoding/regularizations.png 170 | --- 171 | width: 750px 172 | name: regularizations-fig 173 | --- 174 | L1 penalty promotes sparsity of the estimated coefficients, while L2 penalty promotes weight sharing among all 175 | components. One can combine both L1 and L2 regularization to obtain the [ElasticNet](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html) penalty. 176 | ``` 177 | 178 | ```{code-cell} ipython3 179 | # Let's swap the estimator with the svc_l1 180 | l1_decoder = Decoder(estimator='svc_l1', cv=5, mask=mask_filename, scoring='f1') 181 | l1_decoder.fit(func_file, y) 182 | plotting.view_img( 183 | l1_decoder.coef_img_['face'], bg_img=haxby_dataset.anat[0], 184 | title="L1-SVM weights for face", dim=-1, resampling_interpolation='nearest' 185 | ) 186 | ``` 187 | 188 | We can observe that far fewer components are selected and the information is much more localized. 189 | 190 | ## Getting more meaningful weight maps with Frem 191 | 192 | It is often tempting to interpret regions with high weights as 'important' for the prediction task. However, there is no statistical guarantee on these maps. Moreover, they often do not even exhibit very clear structure. To improve that, a regularization can be brought by using the so-called Fast Regularized Ensembles of models (FREM), that rely on simple averaging and clustering tools to provide smoother maps, yet with minimal computational overhead. 193 | 194 | ```{code-cell} ipython3 195 | from nilearn.decoding import FREMClassifier 196 | frem = FREMClassifier(estimator='svc', cv=5, mask=mask_filename, scoring='f1') 197 | frem.fit(func_file, y) 198 | plotting.view_img( 199 | frem.coef_img_['face'], bg_img=haxby_dataset.anat[0], 200 | title="SVM weights for face", dim=-1, resampling_interpolation='nearest' 201 | ) 202 | ``` 203 | 204 | Note that the resulting accuracy is in general slightly higher: 205 | 206 | ```{code-cell} ipython3 207 | print('F1 scores with FREM') 208 | for category in categories: 209 | print(f"{category.ljust(15)} {np.mean(frem.cv_scores_[category]):.2f}") 210 | ``` 211 | 212 | 213 | ## ⚡️ (Experimental) Running a surfacic analysis 214 | 215 | Nilearn recently expanded its surface API to enable surface-based decoding. We start by projecting our data onto the FreeSurfer `fsaverage4` template, which is a downsampled version of the standard FreeSurfer template containing approximately 2,562 vertices per hemisphere. This template serves as the common space for analysis. We then create a SurfaceImage object that combines the mesh geometry with functional data. This object maintains separate representations for left and right hemispheres while providing a unified interface for surface-based analysis. 216 | ```{code-cell} ipython3 217 | from nilearn.surface import vol_to_surf 218 | from nilearn.experimental.surface._datasets import load_fsaverage 219 | from nilearn.experimental.surface._surface_image import SurfaceImage 220 | 221 | # We first load the fsaverage mesh 222 | mesh = load_fsaverage("fsaverage4")["pial"] 223 | 224 | # We then project the data on each hemisphere 225 | data_lh = vol_to_surf(func_file, mesh["left_hemisphere"]).T 226 | data_rh = vol_to_surf(func_file, mesh["right_hemisphere"]).T 227 | 228 | # Then we build the SurfaceImage object 229 | surf_img = SurfaceImage( 230 | mesh=mesh, 231 | data={ 232 | "left_hemisphere": data_lh, 233 | "right_hemisphere": data_rh, 234 | }, 235 | ) 236 | print(f"Image shape: {surf_img.shape}") 237 | ``` 238 | 239 | The decoder fitting process is similar to the previous ones, but with a key distinction: we implement the SurfaceMasker object for surface-based data processing. This masker is specifically designed to handle cortical surface information, allowing us to maintain the spatial structure of the brain's surface representation throughout the decoding analysis. 240 | ```{code-cell} ipython3 241 | # The following is just disabling a couple of checks performed by the decoder 242 | # that would force us to use a `NiftiMasker`. 243 | from nilearn._utils import param_validation 244 | def monkeypatch_masker_checks(): 245 | def adjust_screening_percentile(screening_percentile, *args, **kwargs): 246 | return screening_percentile 247 | 248 | param_validation.adjust_screening_percentile = adjust_screening_percentile 249 | monkeypatch_masker_checks() 250 | 251 | from nilearn.experimental.surface import SurfaceMasker 252 | 253 | decoder = Decoder(mask=SurfaceMasker(), cv=3, screening_percentile=1) 254 | decoder.fit(surf_img, y) 255 | ``` 256 | 257 | We finally plot the resulting weight map for `face` using an interactive surface viewer: 258 | ```{code-cell} ipython3 259 | plotting.view_surf( 260 | decoder.coef_img_["face"].mesh["right_hemisphere"], 261 | decoder.coef_img_["face"].data["right_hemisphere"], 262 | cmap="coolwarm", 263 | ) 264 | ``` 265 | 266 | ## Exercises 267 | * What is the most difficult category to decode? Why? 268 | * The model seemed to overfit. Can you find a parameter value for `C` in `SVC` such that the model does not overfit as much? 269 | * Try a `'rbf'` kernel in `SVC`. Can you get a better test accuracy than with the `'linear'` kernel? 270 | * Try to explore the weights associated with other labels. 271 | * Instead of doing a 5-fold cross-validation, on should split the data by runs. 272 | Implement a leave-one-run and leave-two-run out cross-validation. For that you will need to access the run information, that is stored in `behavioral[chunks]`. You will also need the [LeavePGroupOut](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.LeavePGroupsOut.html#sklearn.model_selection.LeavePGroupsOut) object of scikit-learn. 273 | * Try implementing a random forest or k nearest neighbor classifier. 274 | * **Hard**: implement a systematic hyper-parameter optimization using nested cross-validation. Tip: check this [scikit-learn tutorial](https://scikit-learn.org/stable/auto_examples/model_selection/plot_grid_search_digits.html#sphx-glr-auto-examples-model-selection-plot-grid-search-digits-py). 275 | * **Hard**: try to account for class imbalance in the dataset. 276 | -------------------------------------------------------------------------------- /content/svm_decoding/optimal-hyperplane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/content/svm_decoding/optimal-hyperplane.png -------------------------------------------------------------------------------- /content/svm_decoding/regularizations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/content/svm_decoding/regularizations.png -------------------------------------------------------------------------------- /data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/data/.gitkeep -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: main_edu_brain_decoding 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - bzip2=1.0.8 7 | - ca-certificates 8 | - libffi=3.4.2 9 | - libsqlite=3.40.0 10 | - libzlib=1.2.13 11 | - ncurses=6.3 12 | - openssl=3.0.7 13 | - pip=22.3.1 14 | - python=3.11.0 15 | - readline=8.1.2 16 | - setuptools=65.5.1 17 | - tk=8.6.12 18 | - wheel=0.38.4 19 | - xz=5.2.6 20 | - pip: 21 | - absl-py==2.1.0 22 | - anyio==4.6.2.post1 23 | - appnope==0.1.4 24 | - argon2-cffi==23.1.0 25 | - argon2-cffi-bindings==21.2.0 26 | - arrow==1.3.0 27 | - asttokens==2.4.1 28 | - astunparse==1.6.3 29 | - async-lru==2.0.4 30 | - attrs==24.2.0 31 | - babel==2.16.0 32 | - beautifulsoup4==4.12.3 33 | - bleach==6.1.0 34 | - certifi==2024.8.30 35 | - cffi==1.17.1 36 | - charset-normalizer==3.4.0 37 | - comm==0.2.2 38 | - contourpy==1.3.0 39 | - cycler==0.12.1 40 | - debugpy==1.8.7 41 | - decorator==5.1.1 42 | - defusedxml==0.7.1 43 | - executing==2.1.0 44 | - fastjsonschema==2.20.0 45 | - filelock==3.16.1 46 | - flatbuffers==24.3.25 47 | - fonttools==4.54.1 48 | - fqdn==1.5.1 49 | - fsspec==2024.9.0 50 | - gast==0.6.0 51 | - google-pasta==0.2.0 52 | - grpcio==1.66.2 53 | - h11==0.14.0 54 | - h5py==3.12.1 55 | - httpcore==1.0.6 56 | - httpx==0.27.2 57 | - idna==3.10 58 | - importlib-resources==6.4.5 59 | - ipykernel==6.29.5 60 | - ipython==8.28.0 61 | - ipywidgets==8.1.5 62 | - isoduration==20.11.0 63 | - jedi==0.19.1 64 | - jinja2==3.1.4 65 | - joblib==1.4.2 66 | - json5==0.9.25 67 | - jsonpointer==3.0.0 68 | - jsonschema==4.23.0 69 | - jsonschema-specifications==2024.10.1 70 | - jupyter==1.1.1 71 | - jupyter-client==8.6.3 72 | - jupyter-console==6.6.3 73 | - jupyter-core==5.7.2 74 | - jupyter-events==0.10.0 75 | - jupyter-lsp==2.2.5 76 | - jupyter-server==2.14.2 77 | - jupyter-server-terminals==0.5.3 78 | - jupyterlab==4.2.5 79 | - jupyterlab-pygments==0.3.0 80 | - jupyterlab-server==2.27.3 81 | - jupyterlab-widgets==3.0.13 82 | - keras==3.6.0 83 | - kiwisolver==1.4.7 84 | - libclang==18.1.1 85 | - lxml==5.3.0 86 | - markdown==3.7 87 | - markdown-it-py==3.0.0 88 | - markupsafe==3.0.1 89 | - matplotlib==3.9.2 90 | - matplotlib-inline==0.1.7 91 | - mdurl==0.1.2 92 | - mistune==3.0.2 93 | - ml-dtypes==0.3.2 94 | - mpmath==1.3.0 95 | - namex==0.0.8 96 | - nbclient==0.10.0 97 | - nbconvert==7.16.4 98 | - nbformat==5.10.4 99 | - nest-asyncio==1.6.0 100 | - networkx==3.4.1 101 | - nibabel==5.3.0 102 | - nilearn==0.10.4 103 | - notebook==7.2.2 104 | - notebook-shim==0.2.4 105 | - numpy==1.26.4 106 | - opt-einsum==3.4.0 107 | - optree==0.13.0 108 | - overrides==7.7.0 109 | - packaging==24.1 110 | - pandas==2.2.3 111 | - pandocfilters==1.5.1 112 | - parso==0.8.4 113 | - pexpect==4.9.0 114 | - pillow==10.4.0 115 | - platformdirs==4.3.6 116 | - prometheus-client==0.21.0 117 | - prompt-toolkit==3.0.48 118 | - protobuf==4.25.5 119 | - psutil==6.0.0 120 | - ptyprocess==0.7.0 121 | - pure-eval==0.2.3 122 | - pycparser==2.22 123 | - pygments==2.18.0 124 | - pyparsing==3.2.0 125 | - python-dateutil==2.9.0.post0 126 | - python-json-logger==2.0.7 127 | - pytz==2024.2 128 | - pyyaml==6.0.2 129 | - pyzmq==26.2.0 130 | - referencing==0.35.1 131 | - requests==2.32.3 132 | - rfc3339-validator==0.1.4 133 | - rfc3986-validator==0.1.1 134 | - rich==13.9.2 135 | - rpds-py==0.20.0 136 | - scikit-learn==1.5.2 137 | - scipy==1.14.1 138 | - seaborn==0.13.2 139 | - send2trash==1.8.3 140 | - six==1.16.0 141 | - sniffio==1.3.1 142 | - soupsieve==2.6 143 | - stack-data==0.6.3 144 | - sympy==1.13.3 145 | - tensorboard==2.16.2 146 | - tensorboard-data-server==0.7.2 147 | - tensorflow==2.16.2 148 | - tensorflow-io-gcs-filesystem==0.37.1 149 | - termcolor==2.5.0 150 | - terminado==0.18.1 151 | - threadpoolctl==3.5.0 152 | - tinycss2==1.3.0 153 | - torch==2.2.2 154 | - tornado==6.4.1 155 | - traitlets==5.14.3 156 | - types-python-dateutil==2.9.0.20241003 157 | - typing-extensions==4.12.2 158 | - tzdata==2024.2 159 | - uri-template==1.3.0 160 | - urllib3==2.2.3 161 | - wcwidth==0.2.13 162 | - webcolors==24.8.0 163 | - webencodings==0.5.1 164 | - websocket-client==1.8.0 165 | - werkzeug==3.0.4 166 | - widgetsnbextension==4.0.13 167 | - wrapt==1.16.0 -------------------------------------------------------------------------------- /images/neurolibre-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/images/neurolibre-logo.png -------------------------------------------------------------------------------- /notebooks/Brain_connectivity_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/notebooks/Brain_connectivity_graph.png -------------------------------------------------------------------------------- /notebooks/GCN_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/notebooks/GCN_pipeline.png -------------------------------------------------------------------------------- /notebooks/GCN_pipeline_main2022.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/notebooks/GCN_pipeline_main2022.png -------------------------------------------------------------------------------- /notebooks/Haxby_stimuli.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/notebooks/Haxby_stimuli.png -------------------------------------------------------------------------------- /notebooks/data_processing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### *This notebook is just for showing all the steps of the data processing for GCN, but for saving time all the output data of this notebook have already been uploaded to the cloud and we don't need to run it during the educational session.* " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 8, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "from nilearn import datasets\n", 18 | "from nilearn.input_data import NiftiMasker\n", 19 | "import pandas as pd\n", 20 | "import glob\n", 21 | "import pathlib\n", 22 | "import numpy as np\n", 23 | "import csv" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "# Fetching Haxby dataset" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "# We are fetching the data for subject 4\n", 40 | "data_dir = os.path.join('..', 'data')\n", 41 | "sub_no = 4\n", 42 | "haxby_ds = datasets.fetch_haxby(subjects=[sub_no], fetch_stimuli=True, data_dir=data_dir)\n", 43 | "\n", 44 | "func_file = haxby_ds.func[0]\n", 45 | "\n", 46 | "# Standardizing\n", 47 | "mask_vt_file = haxby_ds.mask_vt[0]\n", 48 | "masker = NiftiMasker(mask_img=mask_vt_file, standardize=True)\n", 49 | "\n", 50 | "labels = pd.read_csv(haxby_ds.session_target[0], sep=\" \")\n", 51 | "\n", 52 | "# Selecting data\n", 53 | "X = masker.fit_transform(func_file)\n", 54 | "y = labels['labels']\n", 55 | "\n", 56 | "categories = y.unique()" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "# Data paths" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 3, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "proc_path = os.path.join(data_dir, 'haxby_proc/')\n", 73 | "concat_path = os.path.join(data_dir, 'haxby_concat/')\n", 74 | "conn_path = os.path.join(data_dir, 'haxby_connectomes/')\n", 75 | "split_path = os.path.join(data_dir, 'haxby_split_win/')\n", 76 | "\n", 77 | "if not os.path.exists(proc_path):\n", 78 | " os.makedirs(proc_path)\n", 79 | "if not os.path.exists(concat_path):\n", 80 | " os.makedirs(concat_path)\n", 81 | "if not os.path.exists(conn_path):\n", 82 | " os.makedirs(conn_path)\n", 83 | "if not os.path.exists(split_path):\n", 84 | " os.makedirs(split_path)\n", 85 | " \n", 86 | "# delete the contents of a folder to avoid inconsistency\n", 87 | "old_files = glob.glob(concat_path + '/*')\n", 88 | "for f in old_files:\n", 89 | " os.remove(f) \n", 90 | "if os.path.exists(split_path):\n", 91 | " files = glob.glob(os.path.join(split_path, \"*\"))\n", 92 | " for f in files:\n", 93 | " os.remove(f)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "# Data processing" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 4, 106 | "metadata": {}, 107 | "outputs": [ 108 | { 109 | "name": "stdout", 110 | "output_type": "stream", 111 | "text": [ 112 | "[]\n" 113 | ] 114 | } 115 | ], 116 | "source": [ 117 | "old_dirContents = os.listdir(concat_path)\n", 118 | "print(old_dirContents)\n", 119 | "\n", 120 | "concat_bold_files = []\n", 121 | "if (len(old_dirContents) == 0 or len(old_dirContents) == 1): \n", 122 | " if ((len(X)) == len(y)):\n", 123 | " \n", 124 | " for i in range(0,len(y)):\n", 125 | " label = y[i]\n", 126 | " concat_bold_files = X[i:i+1]\n", 127 | " concat_file_name = concat_path + '{}_concat_fMRI.npy'.format(label)\n", 128 | " file = pathlib.Path(concat_file_name)\n", 129 | " \n", 130 | " if file.exists ():\n", 131 | " concat_file = np.load(concat_file_name, allow_pickle = True)\n", 132 | " concat_file = np.concatenate((concat_file, concat_bold_files), axis = 0)\n", 133 | " np.save(concat_file_name, concat_file)\n", 134 | " else:\n", 135 | " np.save(concat_file_name, concat_bold_files)\n", 136 | " \n", 137 | "else:\n", 138 | " print('Folder is Not Empty')" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 5, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "with open(concat_path + 'phenotypic_data.tsv', 'wt') as out_file:\n", 148 | " \n", 149 | " tsv_writer = csv.writer(out_file, delimiter='\\t')\n", 150 | " tsv_writer.writerow(['label'])\n", 151 | " \n", 152 | " for category in categories: \n", 153 | " tsv_writer.writerow([category])" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "# Time windows" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 6, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "window_length = 1\n", 170 | "\n", 171 | "# Path for saving the files\n", 172 | "pheno_file = os.path.join(concat_path, 'phenotypic_data.tsv')\n", 173 | "processed_bold_files = sorted(glob.glob(concat_path + '/*.npy'))\n", 174 | "out_file = os.path.join(split_path, '{}_{:04d}.npy')\n", 175 | "out_csv = os.path.join(split_path, 'labels.csv')" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "## Split fMRI data\n", 183 | "Now we are going to split bold input files to the desired windows lenght, then we will also create a csv file that will contain label for each splited data." 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 7, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "dic_labels = {'rest':0,'face':1,'chair':2,'scissors':3,'shoe':4,'scrambledpix':5,'house':6,'cat':7,'bottle':8}\n", 193 | "label_df = pd.DataFrame(columns=['label', 'filename'])\n", 194 | "\n", 195 | "for proc_bold in processed_bold_files:\n", 196 | " \n", 197 | " ts_data = np.load(proc_bold)\n", 198 | " ts_duration = len(ts_data)\n", 199 | "\n", 200 | " ts_filename = os.path.basename(proc_bold)\n", 201 | " ts_label = ts_filename.split('_', 1)[0]\n", 202 | "\n", 203 | " valid_label = dic_labels[ts_label]\n", 204 | " \n", 205 | " # Split the timeseries\n", 206 | " rem = ts_duration % window_length\n", 207 | " n_splits = int(np.floor(ts_duration / window_length))\n", 208 | "\n", 209 | " ts_data = ts_data[:(ts_duration-rem), :] \n", 210 | " \n", 211 | " for j, split_ts in enumerate(np.split(ts_data, n_splits)):\n", 212 | " ts_output_file_name = out_file.format(ts_filename, j)\n", 213 | "\n", 214 | " split_ts = np.swapaxes(split_ts, 0, 1)\n", 215 | " np.save(ts_output_file_name, split_ts)\n", 216 | " curr_label = {'label': valid_label, 'filename': os.path.basename(ts_output_file_name)}\n", 217 | " label_df = label_df.append(curr_label, ignore_index=True)\n", 218 | " \n", 219 | "label_df.to_csv(out_csv, index=False) " 220 | ] 221 | } 222 | ], 223 | "metadata": { 224 | "kernelspec": { 225 | "display_name": "Python 3", 226 | "language": "python", 227 | "name": "python3" 228 | }, 229 | "language_info": { 230 | "codemirror_mode": { 231 | "name": "ipython", 232 | "version": 3 233 | }, 234 | "file_extension": ".py", 235 | "mimetype": "text/x-python", 236 | "name": "python", 237 | "nbconvert_exporter": "python", 238 | "pygments_lexer": "ipython3", 239 | "version": "3.8.10" 240 | } 241 | }, 242 | "nbformat": 4, 243 | "nbformat_minor": 4 244 | } 245 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==2.1.0 2 | anyio==4.6.2.post1 3 | appnope==0.1.4 4 | argon2-cffi==23.1.0 5 | argon2-cffi-bindings==21.2.0 6 | arrow==1.3.0 7 | asttokens==2.4.1 8 | astunparse==1.6.3 9 | async-lru==2.0.4 10 | attrs==24.2.0 11 | babel==2.16.0 12 | beautifulsoup4==4.12.3 13 | bleach==6.1.0 14 | certifi==2024.8.30 15 | cffi==1.17.1 16 | charset-normalizer==3.4.0 17 | comm==0.2.2 18 | contourpy==1.3.0 19 | cycler==0.12.1 20 | debugpy==1.8.7 21 | decorator==5.1.1 22 | defusedxml==0.7.1 23 | executing==2.1.0 24 | fastjsonschema==2.20.0 25 | filelock==3.16.1 26 | flatbuffers==24.3.25 27 | fonttools==4.54.1 28 | fqdn==1.5.1 29 | fsspec==2024.9.0 30 | gast==0.6.0 31 | google-pasta==0.2.0 32 | grpcio==1.66.2 33 | h11==0.14.0 34 | h5py==3.12.1 35 | httpcore==1.0.6 36 | httpx==0.27.2 37 | idna==3.10 38 | importlib_resources==6.4.5 39 | ipykernel==6.29.5 40 | ipython==8.28.0 41 | ipywidgets==8.1.5 42 | isoduration==20.11.0 43 | jedi==0.19.1 44 | Jinja2==3.1.4 45 | joblib==1.4.2 46 | json5==0.9.25 47 | jsonpointer==3.0.0 48 | jsonschema==4.23.0 49 | jsonschema-specifications==2024.10.1 50 | jupyter==1.1.1 51 | jupyter-console==6.6.3 52 | jupyter-events==0.10.0 53 | jupyter-lsp==2.2.5 54 | jupyter_client==8.6.3 55 | jupyter_core==5.7.2 56 | jupyter_server==2.14.2 57 | jupyter_server_terminals==0.5.3 58 | jupyterlab==4.2.5 59 | jupyterlab_pygments==0.3.0 60 | jupyterlab_server==2.27.3 61 | jupyterlab_widgets==3.0.13 62 | keras==3.6.0 63 | kiwisolver==1.4.7 64 | libclang==18.1.1 65 | lxml==5.3.0 66 | Markdown==3.7 67 | markdown-it-py==3.0.0 68 | MarkupSafe==3.0.1 69 | matplotlib==3.9.2 70 | matplotlib-inline==0.1.7 71 | mdurl==0.1.2 72 | mistune==3.0.2 73 | ml-dtypes==0.3.2 74 | mpmath==1.3.0 75 | namex==0.0.8 76 | nbclient==0.10.0 77 | nbconvert==7.16.4 78 | nbformat==5.10.4 79 | nest-asyncio==1.6.0 80 | networkx==3.4.1 81 | nibabel==5.3.0 82 | nilearn==0.10.4 83 | notebook==7.2.2 84 | notebook_shim==0.2.4 85 | numpy==1.26.4 86 | opt_einsum==3.4.0 87 | optree==0.13.0 88 | overrides==7.7.0 89 | packaging==24.1 90 | pandas==2.2.3 91 | pandocfilters==1.5.1 92 | parso==0.8.4 93 | pexpect==4.9.0 94 | pillow==10.4.0 95 | platformdirs==4.3.6 96 | prometheus_client==0.21.0 97 | prompt_toolkit==3.0.48 98 | protobuf==4.25.5 99 | psutil==6.0.0 100 | ptyprocess==0.7.0 101 | pure_eval==0.2.3 102 | pycparser==2.22 103 | Pygments==2.18.0 104 | pyparsing==3.2.0 105 | python-dateutil==2.9.0.post0 106 | python-json-logger==2.0.7 107 | pytz==2024.2 108 | PyYAML==6.0.2 109 | pyzmq==26.2.0 110 | referencing==0.35.1 111 | requests==2.32.3 112 | rfc3339-validator==0.1.4 113 | rfc3986-validator==0.1.1 114 | rich==13.9.2 115 | rpds-py==0.20.0 116 | scikit-learn==1.5.2 117 | scipy==1.14.1 118 | seaborn==0.13.2 119 | Send2Trash==1.8.3 120 | six==1.16.0 121 | sniffio==1.3.1 122 | soupsieve==2.6 123 | stack-data==0.6.3 124 | sympy==1.13.3 125 | tensorboard==2.16.2 126 | tensorboard-data-server==0.7.2 127 | tensorflow==2.16.2 128 | tensorflow-io-gcs-filesystem==0.37.1 129 | termcolor==2.5.0 130 | terminado==0.18.1 131 | threadpoolctl==3.5.0 132 | tinycss2==1.3.0 133 | torch==2.2.2 134 | tornado==6.4.1 135 | traitlets==5.14.3 136 | types-python-dateutil==2.9.0.20241003 137 | typing_extensions==4.12.2 138 | tzdata==2024.2 139 | uri-template==1.3.0 140 | urllib3==2.2.3 141 | wcwidth==0.2.13 142 | webcolors==24.8.0 143 | webencodings==0.5.1 144 | websocket-client==1.8.0 145 | Werkzeug==3.0.4 146 | widgetsnbextension==4.0.13 147 | wrapt==1.16.0 148 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/src/__init__.py -------------------------------------------------------------------------------- /src/__pycache__/gcn_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/src/__pycache__/gcn_model.cpython-36.pyc -------------------------------------------------------------------------------- /src/__pycache__/gcn_windows_dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/src/__pycache__/gcn_windows_dataset.cpython-36.pyc -------------------------------------------------------------------------------- /src/__pycache__/graph_construction.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/src/__pycache__/graph_construction.cpython-36.pyc -------------------------------------------------------------------------------- /src/__pycache__/visualization.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/main-educational/brain_encoding_decoding/390fc8a7b97f5db3b85efe9a6c78e33ee81638e4/src/__pycache__/visualization.cpython-36.pyc -------------------------------------------------------------------------------- /src/gcn_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch_geometric as tg 5 | import numpy as np 6 | 7 | 8 | class GCN(torch.nn.Module): 9 | def __init__(self, edge_index, edge_weight, n_roi, batch_size=16, n_timepoints=1, n_classes=9): 10 | super().__init__() 11 | self.edge_index = edge_index 12 | self.edge_weight = edge_weight 13 | self.n_roi = n_roi 14 | self.batch_size = batch_size 15 | 16 | self.conv1 = tg.nn.ChebConv( 17 | in_channels=n_timepoints, out_channels=32, K=2, bias=True 18 | ) 19 | self.conv2 = tg.nn.ChebConv(in_channels=32, out_channels=32, K=2, bias=True) 20 | self.conv3 = tg.nn.ChebConv(in_channels=32, out_channels=batch_size, K=2, bias=True) 21 | self.fc1 = nn.Linear(self.n_roi * batch_size, 256) 22 | self.fc2 = nn.Linear(256, 128) 23 | self.fc3 = nn.Linear(128, n_classes) 24 | self.dropout = nn.Dropout(0.2) 25 | 26 | def forward(self, x): 27 | x = self.conv1(x, self.edge_index, self.edge_weight) 28 | x = F.relu(x) 29 | x = self.dropout(x) 30 | x = self.conv2(x, self.edge_index, self.edge_weight) 31 | x = F.relu(x) 32 | x = self.dropout(x) 33 | x = self.conv3(x, self.edge_index, self.edge_weight) 34 | x = F.relu(x) 35 | x = self.dropout(x) 36 | batch_vector = torch.arange(x.size(0), dtype=int) 37 | x = torch.flatten(x, 1) 38 | x = tg.nn.global_mean_pool(x, batch_vector) 39 | x = x.view(-1, self.n_roi * self.batch_size) 40 | x = self.fc1(x) 41 | x = self.dropout(x) 42 | x = self.fc2(x) 43 | x = self.dropout(x) 44 | x = self.fc3(x) 45 | return x 46 | -------------------------------------------------------------------------------- /src/gcn_windows_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | import psutil 4 | import torch 5 | import pandas as pd 6 | import numpy as np 7 | 8 | 9 | class TimeWindowsDataset(torch.utils.data.Dataset): 10 | def __init__( 11 | self, 12 | data_dir, 13 | partition="train", 14 | val_ratio=0.20, 15 | test_ratio=0.10, 16 | shuffle=False, 17 | random_seed=0, 18 | normalize=False, 19 | pin_memory=False, 20 | autoencoder=False, 21 | ): 22 | 23 | # parameters initialization and checks 24 | self.data_dir = data_dir 25 | self.partition = partition 26 | self.test_ratio = test_ratio 27 | self.val_ratio = val_ratio 28 | self.random_seed = random_seed 29 | self.shuffle = shuffle 30 | self.normalize = normalize 31 | self.pin_memory = pin_memory 32 | self.autoencoder = autoencoder 33 | if not os.path.exists(self.data_dir): 34 | raise ValueError("{} does not exists!".format(self.data_dir)) 35 | if (self.test_ratio + self.val_ratio) >= 1.0: 36 | raise ValueError( 37 | "Test and validation ratio are greater than one: {:.2f} > 1.0 !".format( 38 | self.test_ratio + self.val_ratio 39 | ) 40 | ) 41 | valid_partition_names = ["train", "valid", "test"] 42 | if self.partition not in valid_partition_names: 43 | raise ValueError( 44 | "Invalid partition name '{}', available partition names are {}.".format( 45 | self.partition, valid_partition_names 46 | ) 47 | ) 48 | 49 | # read file paths 50 | self._data_filepaths, self._label_filepath = self._read_file_list() 51 | # define indexes for the current partition 52 | self._partition_indexes = self._set_indexes_partition() 53 | self._partition_filepaths = self._data_filepaths[self._partition_indexes] 54 | # read partition data, filepaths or data directly 55 | if self.pin_memory: 56 | self.partition_data = [ 57 | np.load(data_filepath) for data_filepath in self._partition_filepaths 58 | ] 59 | # check RAM usage 60 | avail_ram = psutil.virtual_memory().available 61 | predicted_ram = ( 62 | len(self.partition_data) 63 | * self.partition_data[0].size 64 | * self.partition_data[0].itemsize 65 | ) 66 | if (predicted_ram / avail_ram) > 0.2: 67 | warnings.warn( 68 | "Data uses more than 20% of available RAM ({:.1f} MB), consider using `pin_memory=False`.".format( 69 | predicted_ram / 1e6 70 | ) 71 | ) 72 | else: 73 | self.partition_data = self._partition_filepaths 74 | # read partition targets 75 | if (self._label_filepath is None) | self.autoencoder: 76 | if self.autoencoder == False: 77 | warnings.warn("No labels file, assuming auto-encoder generator.") 78 | self.partition_targets = None 79 | else: 80 | self.partition_targets = self._read_labels()[self._partition_indexes] 81 | 82 | def __repr__(self): 83 | return "{}*({}, {})".format( 84 | self.__len__(), self.__getitem__(0)[0].shape, self.__getitem__(0)[1].shape 85 | ) 86 | 87 | def __len__(self): 88 | """Return the length of the current generator.""" 89 | return len(self._partition_filepaths) 90 | 91 | def __getitem__(self, idx): 92 | """Generate one generator item (data and targets).""" 93 | # reading numpy 94 | if not self.pin_memory: 95 | np_data = np.load(self.partition_data[idx]) 96 | else: 97 | np_data = self.partition_data[idx] 98 | # normalization 99 | if self.normalize: 100 | np_data = self._normalize_data(np_data) 101 | # auto-encoder generator 102 | if self.partition_targets is None: 103 | # outputs = (torch.from_numpy(np_data,dtype='float32'), torch.from_numpy(np_data,dtype='float32')) 104 | outputs = (torch.from_numpy(np_data), torch.from_numpy(np_data)) 105 | else: 106 | # outputs = (torch.from_numpy(np_data,dtype='float32'), self.partition_targets[idx]) 107 | outputs = (torch.from_numpy(np_data), self.partition_targets[idx]) 108 | 109 | return outputs[0], outputs[1] 110 | 111 | def get_item_path(self, idx): 112 | return self._partition_filepaths[idx] 113 | 114 | def _set_indexes_partition(self): 115 | """Partition indexes into train/valid/test data""" 116 | n_samples = len(self._data_filepaths) 117 | train_index = 1 - self.test_ratio - self.val_ratio 118 | val_index = 1 - self.test_ratio 119 | indexes = np.arange(n_samples) 120 | if self.shuffle: 121 | rng = np.random.default_rng(self.random_seed) 122 | rng.shuffle(indexes) 123 | 124 | if self.partition == "train": 125 | range_idx = (0, int(train_index * n_samples)) 126 | elif self.partition == "valid": 127 | range_idx = (int(train_index * n_samples), int(val_index * n_samples)) 128 | elif self.partition == "test": 129 | range_idx = (int(val_index * n_samples), n_samples) 130 | 131 | return indexes[range_idx[0] : range_idx[1]] 132 | 133 | def _normalize_data(self, data): 134 | """Gaussian-normalization of the data, helps the training process for neural network models.""" 135 | return (data - np.mean(data)) / np.std(data) 136 | 137 | def _read_file_list(self): 138 | """Return the list of data files and labels if exists.""" 139 | list_files = [] 140 | data_files = [] 141 | label_file = None 142 | 143 | for root, _, files in os.walk(self.data_dir): 144 | for file in files: 145 | list_files += [os.path.join(root, file)] 146 | list_files = sorted(list_files) 147 | 148 | for f in list_files: 149 | if f.split(".")[-1] == "npy": 150 | data_files += [f] 151 | elif "labels.csv" in f: 152 | label_file = f 153 | 154 | return np.array(data_files), label_file 155 | 156 | def _read_labels(self): 157 | """Read the labels, sorted by the data files.""" 158 | labels = pd.read_csv(self._label_filepath) 159 | labels = labels.sort_values(by=["filename"]) 160 | 161 | return np.array(labels["label"]) 162 | 163 | 164 | # if __name__ == "__main__": 165 | # data_dir = os.path.join(os.path.dirname(__file__), "..", "..", "data_shima", "interim") 166 | # random_seed = 0 167 | 168 | # # Pytorch generator test 169 | # torch.manual_seed(random_seed) 170 | # train_dataset = TimeWindowsDataset(data_dir=data_dir, partition="train", random_seed=random_seed, pin_memory=True) 171 | # train_gen = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True) 172 | # train_features, train_labels = next(iter(train_gen)) 173 | # print(f"Feature batch shape: {train_features.size()}; mean {torch.mean(train_features)}") 174 | # print(f"Labels batch shape: {train_labels.size()}; mean {torch.mean(torch.Tensor.float(train_labels))}") 175 | # # check train, valid and test generator 176 | # valid_dataset = TimeWindowsDataset(data_dir=data_dir, partition="valid", pin_memory=True) 177 | # test_dataset = TimeWindowsDataset(data_dir=data_dir, partition="test", pin_memory=True) 178 | # print("Train generator object: {}".format(train_dataset)) 179 | # for ii, data in enumerate(train_dataset): 180 | # print("\r\t#{} - ({}, {})".format(ii, data[0].shape, data[1].shape), end='') 181 | # print("") 182 | # print("Valid generator object: {}".format(valid_dataset)) 183 | # for ii, data in enumerate(valid_dataset): 184 | # print("\r\t#{} - ({}, {})".format(ii, data[0].shape, data[1].shape), end='') 185 | # print("") 186 | # print("Test generator object: {}".format(test_dataset)) 187 | # for ii, data in enumerate(test_dataset): 188 | # print("\r\t#{} - ({}, {})".format(ii, data[0].shape, data[1].shape), end='') 189 | # print("") 190 | # # test auto-encoder generator 191 | # data_gen = TimeWindowsDataset(data_dir=data_dir, partition="train", pin_memory=True, autoencoder=True) 192 | # print("Auto-encoder generator object: {}".format(data_gen)) 193 | # for ii, data in enumerate(data_gen): 194 | # print("\r\t#{} - ({}, {})".format(ii, data[0].shape, data[1].shape), end='') 195 | # print("") 196 | # # benchmark time gain with pin_memory 197 | # import time 198 | # start = time.time() 199 | # for data in data_gen: 200 | # continue 201 | # print("Memory not-pinned elapsed time: {}s".format(time.time() - start)) 202 | # data_gen = TimeWindowsDataset(data_dir=data_dir, partition="train", pin_memory=True) 203 | # start = time.time() 204 | # for data in data_gen: 205 | # continue 206 | # print("Memory pinned elapsed time: {}s".format(time.time() - start)) 207 | -------------------------------------------------------------------------------- /src/graph_construction.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch_geometric as tg 4 | 5 | 6 | def _make_undirected(mat): 7 | """ 8 | Takes an input adjacency matrix and makes it undirected (symmetric). 9 | 10 | Parameter 11 | ---------- 12 | mat: array 13 | Square adjacency matrix. 14 | """ 15 | if mat.shape[0] != mat.shape[1]: 16 | raise ValueError("Adjacency matrix must be square.") 17 | 18 | sym = (mat + mat.transpose()) / 2 19 | if len(np.unique(mat)) == 2: # if graph was unweighted, return unweighted 20 | return np.ceil(sym) # otherwise return average 21 | return sym 22 | 23 | 24 | def _knn_graph_quantile(mat, self_loops=False, k=8, symmetric=True): 25 | """ 26 | Takes an input correlation matrix and returns a k-Nearest 27 | Neighbour weighted undirected adjacency matrix. 28 | """ 29 | 30 | if mat.shape[0] != mat.shape[1]: 31 | raise ValueError("Adjacency matrix must be square.") 32 | dim = mat.shape[0] 33 | if (k <= 0) or (dim <= k): 34 | raise ValueError("k must be in range [1,n_nodes)") 35 | is_directed = not (mat == mat.transpose()).all() 36 | if is_directed: 37 | raise ValueError( 38 | "Input adjacency matrix must be undirected (matrix symmetric)!" 39 | ) 40 | 41 | # absolute correlation 42 | mat = np.abs(mat) 43 | adj = np.copy(mat) 44 | # get NN thresholds from quantile 45 | quantile_h = np.quantile(mat, (dim - k - 1) / dim, axis=0) 46 | mask_not_neighbours = mat < quantile_h[:, np.newaxis] 47 | adj[mask_not_neighbours] = 0 48 | if not self_loops: 49 | np.fill_diagonal(adj, 0) 50 | if symmetric: 51 | adj = _make_undirected(adj) 52 | return adj 53 | 54 | 55 | def make_group_graph(connectomes, k=8, self_loops=False, symmetric=True): 56 | """ 57 | Parameters 58 | ---------- 59 | connectomes: list of array 60 | List of connectomes in n_roi x n_roi format, connectomes must all be the same shape. 61 | k: int, default=8 62 | Number of neighbours. 63 | self_loops: bool, default=False 64 | Wether or not to keep self loops in graph, if set to False resulting adjacency matrix 65 | has zero along diagonal. 66 | symmetric: bool, default=True 67 | Wether or not to return a symmetric adjacency matrix. In cases where a node is in the neighbourhood 68 | of another node that is not its neighbour, the connection strength between the two will be halved. 69 | 70 | Returns 71 | ------- 72 | Torch geometric graph object of k-Nearest Neighbours graph for the group average connectome. 73 | """ 74 | if connectomes[0].shape[0] != connectomes[0].shape[1]: 75 | raise ValueError("Connectomes must be square.") 76 | 77 | # Group average connectome and nndirected 8 k-NN graph 78 | avg_conn = np.array(connectomes).mean(axis=0) 79 | avg_conn = np.round(avg_conn, 6) 80 | avg_conn_k = _knn_graph_quantile( 81 | avg_conn, k=k, self_loops=self_loops, symmetric=symmetric 82 | ) 83 | 84 | # Format matrix into graph for torch_geometric 85 | adj_sparse = tg.utils.dense_to_sparse(torch.from_numpy(avg_conn_k)) 86 | return tg.data.Data(edge_index=adj_sparse[0], edge_attr=adj_sparse[1]) 87 | -------------------------------------------------------------------------------- /src/visualization.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sn 3 | import pandas as pd 4 | from sklearn.metrics import confusion_matrix 5 | from nilearn.plotting import plot_matrix 6 | 7 | 8 | def classifier_history(history, title): 9 | print(history.history.keys()) 10 | 11 | # summarize history for accuracy 12 | plt.plot(history.history['accuracy']) 13 | plt.plot(history.history['val_accuracy']) 14 | plt.title(title + 'model accuracy') 15 | plt.ylabel('accuracy') 16 | plt.xlabel('epoch') 17 | plt.legend(['train', 'validation'], loc = 'upper left') 18 | plt.show() 19 | 20 | 21 | # summarize history for loss 22 | plt.plot(history.history['loss']) 23 | plt.plot(history.history['val_loss']) 24 | plt.title(title + 'model loss') 25 | plt.ylabel('loss') 26 | plt.xlabel('epoch') 27 | plt.legend(['train', 'validation'], loc = 'upper left') 28 | plt.show() 29 | 30 | def conf_matrix(model_conf_matrix, unique_conditions, title): 31 | 32 | df_cm = pd.DataFrame(model_conf_matrix, index = unique_conditions, 33 | columns = unique_conditions) 34 | plt.figure(figsize = (10,7)) 35 | sn.heatmap(df_cm, annot = True, cmap = 'Blues', square = True) 36 | plt.xticks(rotation = 45) 37 | plt.title(title , fontsize = 15, fontweight = 'bold') 38 | plt.xlabel("true labels", fontsize = 14, fontweight = 'bold') 39 | plt.ylabel("predicted labels", fontsize = 14, fontweight = 'bold') 40 | plt.show() 41 | --------------------------------------------------------------------------------