├── .gitignore ├── LICENSE ├── README.md ├── html ├── 01-setup.html ├── 02-data-handling.html ├── 03-classification.html ├── 04-dimensionality-reduction.html ├── 05-classifier-optimization.html ├── 06-rsa.html ├── 07-searchlight.html ├── 08-connectivity.html ├── 09-fcma.html ├── 10-isc.html ├── 11-srm.html ├── 12-hmm.html ├── 13-real-time.html └── imgs │ ├── lab11 │ └── srm_time_segment_matching.png │ ├── lab12 │ └── hmm_schematics.png │ └── lab7 │ ├── mpi_openmp.jpg │ └── nodes_process.jpg └── tutorials ├── 00-all-imports-test.ipynb ├── 01-setup.ipynb ├── 02-data-handling.ipynb ├── 03-classification.ipynb ├── 04-dimensionality-reduction.ipynb ├── 05-classifier-optimization.ipynb ├── 06-rsa.ipynb ├── 07-searchlight.ipynb ├── 07-searchlight ├── avg18_whole_brain_SL.nii.gz ├── avg3_whole_brain_SL.nii.gz ├── rank_whole_brain_SL.nii.gz ├── run_searchlight.sh ├── searchlight.py ├── searchlight_rank.py └── searchlight_single_subject.py ├── 08-connectivity.ipynb ├── 09-fcma.ipynb ├── 09-fcma ├── fcma_classify.py ├── fcma_voxel_selection_cv.py ├── make_top_voxel_mask.sh ├── run_fcma_classify.sh └── run_fcma_voxel_selection_cv.sh ├── 10-isc.ipynb ├── 11-srm.ipynb ├── 12-hmm.ipynb ├── 13-real-time.ipynb ├── 13-real-time ├── fmrisim │ ├── ROI_A.nii.gz │ ├── ROI_B.nii.gz │ ├── mask.npy │ ├── sub_noise_dict.txt │ └── sub_template.nii.gz ├── generate_data.py └── run_generate_data.sh ├── colab-env-setup.ipynb ├── imgs ├── lab11 │ └── srm_time_segment_matching.png ├── lab12 │ └── hmm_schematics.png └── lab7 │ ├── mpi_openmp.jpg │ └── nodes_process.jpg ├── logs └── .gitkeep ├── requirements.txt ├── run_jupyter.sh ├── run_jupyter_docker.sh ├── run_jupyter_remote_cluster.sh ├── run_jupyter_remote_server.sh ├── setup_environment.sh └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # To be ignored 2 | .DS_Store 3 | .ipynb_checkpoints 4 | __MACOSX 5 | __pycache__ 6 | *.swp 7 | tutorials/logs/* 8 | tutorials/__pycache__ 9 | brainiak 10 | 11 | # To be excepted 12 | !tutorials/logs/.gitkeep 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tutorials 2 | 3 | Welcome to the BrainIAK tutorial repository. 4 | 5 | Advanced fMRI analyses have the potential to answer questions that mainstream methods cannot. BrainIAK aims to integrate these cutting-edge techniques into a single, accessible Python environment. To help users get started, we have created the following set of tutorials based on courses taught at Princeton and Yale Universities. 6 | 7 | Detailed information is available here: https://brainiak.org/tutorials 8 | 9 | If you are an instructor interested in using these materials for a course, we would be happy to share our experiences from teaching these materials. You may contact any of the creators directly or via the [BrainIAK chat room on Gitter](https://gitter.im/brainiak/brainiak) or the [BrainIAK email list](mailto:brainiak@googlegroups.com). 10 | -------------------------------------------------------------------------------- /html/imgs/lab11/srm_time_segment_matching.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brainiak/brainiak-tutorials/fb62ede943d9694fe703aee0df5f43ecf5558415/html/imgs/lab11/srm_time_segment_matching.png -------------------------------------------------------------------------------- /html/imgs/lab12/hmm_schematics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brainiak/brainiak-tutorials/fb62ede943d9694fe703aee0df5f43ecf5558415/html/imgs/lab12/hmm_schematics.png -------------------------------------------------------------------------------- /html/imgs/lab7/mpi_openmp.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brainiak/brainiak-tutorials/fb62ede943d9694fe703aee0df5f43ecf5558415/html/imgs/lab7/mpi_openmp.jpg -------------------------------------------------------------------------------- /html/imgs/lab7/nodes_process.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brainiak/brainiak-tutorials/fb62ede943d9694fe703aee0df5f43ecf5558415/html/imgs/lab7/nodes_process.jpg -------------------------------------------------------------------------------- /tutorials/00-all-imports-test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Test All Imports" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "scrolled": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import warnings\n", 19 | "import sys \n", 20 | "if not sys.warnoptions:\n", 21 | " warnings.simplefilter(\"ignore\")\n", 22 | "\n", 23 | "# The plotting tool we will be using in this course\n", 24 | "import matplotlib.pyplot as plt\n", 25 | "\n", 26 | "# Module essential for data organization and manipulation\n", 27 | "import numpy as np #numpy's \"nickname\" is np\n", 28 | "\n", 29 | "# Import a function from BrainIAK to simulate fMRI data\n", 30 | "import brainiak.utils.fmrisim as sim \n", 31 | "\n", 32 | "import numpy as np\n", 33 | "import nibabel as nib\n", 34 | "from nilearn.input_data import NiftiMasker, MultiNiftiMasker\n", 35 | "from scipy import stats\n", 36 | "from sklearn import preprocessing\n", 37 | "import matplotlib.pyplot as plt \n", 38 | "import seaborn as sns \n", 39 | "\n", 40 | "import os \n", 41 | "import nibabel as nib\n", 42 | "import numpy as np\n", 43 | "from nilearn.input_data import NiftiMasker\n", 44 | "import scipy.io\n", 45 | "from scipy import stats\n", 46 | "import matplotlib.pyplot as plt\n", 47 | "import seaborn as sns \n", 48 | "\n", 49 | "from sklearn.svm import LinearSVC\n", 50 | "from sklearn.model_selection import PredefinedSplit\n", 51 | "from sklearn.preprocessing import StandardScaler\n", 52 | "from brainiak.utils.fmrisim import _double_gamma_hrf as hrf_func\n", 53 | "from brainiak.utils import fmrisim as sim\n", 54 | "\n", 55 | "# Import neuroimaging, analysis and general libraries\n", 56 | "import numpy as np\n", 57 | "from time import time\n", 58 | "import pandas as pd\n", 59 | "\n", 60 | "# Import plotting libraries\n", 61 | "import matplotlib.pyplot as plt\n", 62 | "import seaborn as sns\n", 63 | "\n", 64 | "# Machine learning libraries\n", 65 | "from sklearn.model_selection import cross_val_score, cross_validate, PredefinedSplit\n", 66 | "from sklearn.svm import SVC\n", 67 | "from sklearn.decomposition import PCA\n", 68 | "from sklearn.feature_selection import SelectKBest, RFECV, f_classif\n", 69 | "from sklearn.pipeline import Pipeline\n", 70 | "\n", 71 | "import nibabel as nib\n", 72 | "import numpy as np\n", 73 | "import scipy.io\n", 74 | "from scipy import stats\n", 75 | "import pandas as pd\n", 76 | "\n", 77 | "# Import plotting library\n", 78 | "import matplotlib.pyplot as plt\n", 79 | "import seaborn as sns \n", 80 | "# %matplotlib notebook\n", 81 | "\n", 82 | "# Import machine learning libraries\n", 83 | "from nilearn.input_data import NiftiMasker\n", 84 | "from sklearn import preprocessing\n", 85 | "from sklearn.model_selection import GridSearchCV, PredefinedSplit\n", 86 | "from sklearn.svm import SVC\n", 87 | "from sklearn.decomposition import PCA\n", 88 | "from sklearn.feature_selection import VarianceThreshold, f_classif, SelectKBest\n", 89 | "from sklearn.pipeline import Pipeline\n", 90 | "from sklearn.linear_model import LogisticRegression\n", 91 | "from scipy.stats import sem\n", 92 | "from copy import deepcopy\n", 93 | "\n", 94 | "import os\n", 95 | "import numpy as np\n", 96 | "import pandas as pd\n", 97 | "import scipy.io\n", 98 | "from scipy import stats\n", 99 | "from sklearn.manifold import MDS\n", 100 | "import scipy.spatial.distance as sp_distance\n", 101 | "\n", 102 | "import matplotlib.pyplot as plt\n", 103 | "import seaborn as sns \n", 104 | "from mpl_toolkits.mplot3d import Axes3D\n", 105 | "\n", 106 | "import nibabel as nib\n", 107 | "import numpy as np\n", 108 | "import os \n", 109 | "import time\n", 110 | "from nilearn import plotting\n", 111 | "from brainiak.searchlight.searchlight import Searchlight\n", 112 | "from brainiak.fcma.preprocessing import prepare_searchlight_mvpa_data\n", 113 | "from brainiak import io\n", 114 | "from pathlib import Path\n", 115 | "from shutil import copyfile\n", 116 | "\n", 117 | "# Import machine learning libraries\n", 118 | "from sklearn.model_selection import StratifiedKFold, GridSearchCV, cross_val_score\n", 119 | "from sklearn.svm import SVC\n", 120 | "\n", 121 | "import matplotlib.pyplot as plt\n", 122 | "import seaborn as sns \n", 123 | "\n", 124 | "import numpy as np\n", 125 | "import os \n", 126 | "import nibabel as nib\n", 127 | "from nilearn.input_data import NiftiMasker, NiftiLabelsMasker\n", 128 | "from nilearn import plotting\n", 129 | "from nilearn import datasets\n", 130 | "from nilearn.connectome import ConnectivityMeasure\n", 131 | "from scipy import stats\n", 132 | "from scipy.ndimage.measurements import center_of_mass\n", 133 | "import matplotlib.pyplot as plt\n", 134 | "import seaborn as sns \n", 135 | "import pandas as pd\n", 136 | "import brainiak.utils.fmrisim as sim\n", 137 | "from brainiak.fcma.util import compute_correlation\n", 138 | "from nilearn import input_data\n", 139 | "import time\n", 140 | "from utils import shift_timing\n", 141 | "\n", 142 | "import nibabel as nib\n", 143 | "import numpy as np\n", 144 | "import time\n", 145 | "import os\n", 146 | "from scipy.stats import sem\n", 147 | "\n", 148 | "from nilearn import plotting\n", 149 | "from nilearn.image import coord_transform\n", 150 | "\n", 151 | "import brainiak.utils.fmrisim as sim\n", 152 | "from brainiak.fcma.voxelselector import VoxelSelector\n", 153 | "from brainiak.fcma.preprocessing import prepare_fcma_data\n", 154 | "from brainiak.fcma.preprocessing import RandomType\n", 155 | "from brainiak.fcma.util import compute_correlation\n", 156 | "from brainiak import io\n", 157 | "\n", 158 | "import networkx as nx\n", 159 | "from nxviz.plots import CircosPlot\n", 160 | "\n", 161 | "import matplotlib.pyplot as plt\n", 162 | "import seaborn as sns \n", 163 | "\n", 164 | "import os \n", 165 | "import glob\n", 166 | "import time\n", 167 | "from copy import deepcopy\n", 168 | "import numpy as np\n", 169 | "import pandas as pd \n", 170 | "\n", 171 | "from nilearn import datasets\n", 172 | "from nilearn import surface\n", 173 | "from nilearn import plotting\n", 174 | "from nilearn.input_data import NiftiMasker, NiftiLabelsMasker\n", 175 | "import nibabel as nib\n", 176 | "\n", 177 | "from brainiak import image, io\n", 178 | "from brainiak.isc import isc, isfc\n", 179 | "\n", 180 | "import matplotlib.pyplot as plt\n", 181 | "import seaborn as sns \n", 182 | "\n", 183 | "import os \n", 184 | "\n", 185 | "import numpy as np\n", 186 | "from scipy import stats\n", 187 | "import scipy.spatial.distance as sp_distance\n", 188 | "from sklearn.svm import NuSVC\n", 189 | "\n", 190 | "import brainiak.isc\n", 191 | "from brainiak.fcma.util import compute_correlation\n", 192 | "import brainiak.funcalign.srm\n", 193 | "\n", 194 | "import matplotlib.pyplot as plt\n", 195 | "\n", 196 | "import deepdish as dd\n", 197 | "import numpy as np\n", 198 | "\n", 199 | "import brainiak.eventseg.event\n", 200 | "import nibabel as nib\n", 201 | "from nilearn.input_data import NiftiMasker\n", 202 | "\n", 203 | "import scipy.io\n", 204 | "from scipy import stats\n", 205 | "from scipy.stats import norm, zscore, pearsonr\n", 206 | "from scipy.signal import gaussian, convolve\n", 207 | "from sklearn import decomposition\n", 208 | "from sklearn.model_selection import LeaveOneOut, KFold\n", 209 | "\n", 210 | "from matplotlib import pyplot as plt\n", 211 | "from mpl_toolkits.mplot3d import Axes3D\n", 212 | "import matplotlib.patches as patches\n", 213 | "import seaborn as sns \n", 214 | "\n", 215 | "import os\n", 216 | "import time\n", 217 | "import numpy as np # type: ignore\n", 218 | "import matplotlib.pyplot as plt\n", 219 | "%matplotlib inline\n", 220 | "from sklearn.linear_model import LogisticRegression # type: ignore\n", 221 | "from watchdog.events import PatternMatchingEventHandler # type: ignore\n", 222 | "from watchdog.observers import Observer # type: ignore\n", 223 | "from queue import Queue\n", 224 | "from sklearn import svm\n", 225 | "from sklearn import linear_model\n", 226 | "import scipy.stats\n", 227 | "from IPython import display" 228 | ] 229 | } 230 | ], 231 | "metadata": { 232 | "kernelspec": { 233 | "display_name": "Python 3", 234 | "language": "python", 235 | "name": "python3" 236 | }, 237 | "language_info": { 238 | "codemirror_mode": { 239 | "name": "ipython", 240 | "version": 3 241 | }, 242 | "file_extension": ".py", 243 | "mimetype": "text/x-python", 244 | "name": "python", 245 | "nbconvert_exporter": "python", 246 | "pygments_lexer": "ipython3", 247 | "version": "3.6.4" 248 | } 249 | }, 250 | "nbformat": 4, 251 | "nbformat_minor": 2 252 | } 253 | -------------------------------------------------------------------------------- /tutorials/01-setup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction to BrainIAK tutorials\n", 8 | "\n", 9 | "\n", 10 | "Congratulations, if you are viewing this Jupyter notebook, you have already acquired many of the skills necessary to excel in this course and you are well on your way to learning cutting-edge methods for cognitive neuroscience!\n", 11 | "\n", 12 | "For users on NeuroLibre, you are seeing a ready to run version of these tutorials that needed no installation or configuration on your part. If you would like to install and use these tutorials on your own machines, follow the instructions on [brainiak tutorials](http://brainiak.org/tutorials).\n", 13 | "\n", 14 | "In this course we will use a variety of tools, many of which will likely be new to you. Don't worry if you are having trouble wrapping your head around them now: by the end of this course you will be proficient in not only these useful skills but also the exciting analyses that use them. \n", 15 | "\n", 16 | "## Goal of this notebook\n", 17 | " 1. Familiarize yourself with the tools that will be used in these notebooks. \n", 18 | "\n", 19 | "\n", 20 | "## Table of Contents\n", 21 | "\n", 22 | "Exercises\n", 23 | ">[Exercise 1](#ex1) \n", 24 | "\n", 25 | "[Contributions](#contributions)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## Resources \n", 33 | "\n", 34 | "Here are some resources (Python, fMRI and machine learning, etc.): \n", 35 | "BrainIAK tutorials resource page" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## Import necessary packages\n", 43 | "\n", 44 | "While importing packages, you may see warning messages. It is safe to ignore these warnings as they will not impact your execution of the tutorials. " 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "# suppress warnings\n", 54 | "import warnings\n", 55 | "import sys \n", 56 | "if not sys.warnoptions:\n", 57 | " warnings.simplefilter(\"ignore\")\n", 58 | "\n", 59 | "# The plotting tool we will be using in this course\n", 60 | "import matplotlib.pyplot as plt\n", 61 | "\n", 62 | "# Module essential for data organization and manipulation\n", 63 | "import numpy as np #numpy's \"nickname\" is np\n", 64 | "\n", 65 | "# Import a function from BrainIAK to simulate fMRI data\n", 66 | "import brainiak.utils.fmrisim as sim \n", 67 | "\n", 68 | "# display the plots inline \n", 69 | "%matplotlib inline \n", 70 | "# autosave for every 5 secs\n", 71 | "%autosave 5" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "### Brain template \n", 79 | "\n", 80 | "We are now going to use some of the tools we just loaded. First we'll call a function from `brainiak` to load a gray matter mask from the MNI152 standard brain. Here's an article talking about different anatomical standards, including MNI152: [Structural Brain Atlases: Design, Rationale, and Applications in Normal and Pathological Cohorts](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4324755/)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "# Set the size (in terms of X, Y, Z) of the volume we want to create\n", 90 | "dimensions = np.asarray([64, 64, 64])\n", 91 | "\n", 92 | "# Generate an anatomical image with the size above of brain voxels in gray matter\n", 93 | "# This outputs variables for two versions of the image, binary (mask) and probabilistic (template)\n", 94 | "mask, template = sim.mask_brain(dimensions, mask_self=False)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "Congrats, you just ran a command from BrainIAK!!\n", 102 | "\n", 103 | "We are now going to take a slice from that template and display it." 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "# Get an axial (a.k.a. transverse or horizontal) slice halfway through the brain\n", 113 | "mid_idx = dimensions[2] // 2\n", 114 | "axial_slice = template[:, :, mid_idx]\n", 115 | "\n", 116 | "# imshow can visualize a 2d array \n", 117 | "plt.imshow(axial_slice)\n", 118 | "plt.title('An axial brain slice');" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "There are great tools in Python for exploring brains in notebooks. One such tool is [niwidgets](http://nipy.org/niwidgets/examples.html). Below we use that tool to look at the brain interactively. If you cannot install it, there are other options to consider, like [nibabel.viewers.OrthoSlicer3D](https://nipy.org/nibabel/reference/nibabel.viewers.html#nibabel.viewers.OrthoSlicer3D)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "try:\n", 135 | " # Load in the new variable\n", 136 | " from niwidgets import NiftiWidget\n", 137 | "\n", 138 | " template_nii = nib.Nifti1Image(template, np.eye(4))\n", 139 | " viewer = NiftiWidget(template_nii)\n", 140 | " viewer.nifti_plotter();\n", 141 | "\n", 142 | "except:\n", 143 | " print('niwidgets cannot run, try installing it or some other viewing tool')" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "### \"help()\"\n", 151 | "\n", 152 | "`help` is a very useful function in Python. If you type `help(function_name)` in Python, you will get some basic infomation about how to use this function. If you run the following line, you will see that `sim.mask_brain` takes the dimension of x, y, and z, and then output a MNI152 template with the specified dimensions. Note, you can also do this by typing [SHIFT] + [TAB] while the cursor is hovering over a function name. \n", 153 | "\n", 154 | "**Note:** The [SHIFT] + [TAB] works in Jupyter environments, but you will see small differences in this functionality when these notebooks are are used in other environments such as NeuroLibre that use Binder." 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "help(sim.mask_brain)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "### Look at the source code\n", 171 | "If you want to see the source code, you can use the `getsource` function from the `inspect` package. \n", 172 | "\n", 173 | "Run the following code to see the source code of `sim.mask_brain`. " 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "import inspect # this \"inspect\" package can let you peek what's inside a function\n", 183 | "source_code = inspect.getsource(sim.mask_brain)\n", 184 | "print(source_code)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "#### Creating a Python function\n", 192 | "\n", 193 | "`sim.mask_brain()` is a Python \"function\". In general, a Python function has the following structure: \n", 194 | "\n", 195 | " def function_name(input_1, input_2, ..., input_m):\n", 196 | " some code \n", 197 | " some code\n", 198 | " ...\n", 199 | " some code\n", 200 | " return output1, output2, ... output_n" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "**Exercise 1:** Change the above script in at least 3 ways (examples: add a subplot of different slices, change the colors, show a histogram of values, etc.):" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "## Contributions\n", 215 | " \n", 216 | "M. Kumar, C. Ellis and N. Turk-Browne produced the initial notebook 01/2018 \n", 217 | "T. Meissner minor edits \n", 218 | "Q. Lu: switch to matplotlib, fix dead links, add resources, encapsulate brainiak fmrisim \n", 219 | "C. Ellis updated with comments from cmhn-s19" 220 | ] 221 | } 222 | ], 223 | "metadata": { 224 | "anaconda-cloud": {}, 225 | "kernelspec": { 226 | "display_name": "Python 3", 227 | "language": "python", 228 | "name": "python3" 229 | }, 230 | "language_info": { 231 | "codemirror_mode": { 232 | "name": "ipython", 233 | "version": 3 234 | }, 235 | "file_extension": ".py", 236 | "mimetype": "text/x-python", 237 | "name": "python", 238 | "nbconvert_exporter": "python", 239 | "pygments_lexer": "ipython3", 240 | "version": "3.7.4" 241 | } 242 | }, 243 | "nbformat": 4, 244 | "nbformat_minor": 2 245 | } 246 | -------------------------------------------------------------------------------- /tutorials/02-data-handling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# fMRI Data Loading and Normalization in Python \n", 8 | "\n", 9 | "In cognitive neuroscience, what was considered impossible a few decades ago is now doable. Today, we can infer certain aspects of a person's cognitive processes from measurements of brain activity. This progress has come about due to a confluence of improvements in three different areas: computing speeds, brain imaging methods, and efficient machine learning algorithms. To use all three of these aspects for our data analysis, in an optimal manner, involves learning a complex sequence of steps. Our overarching goal for these exercises is to provide a step-by-step walk-through on how to execute these analysis steps and infer cognitive states from brain activity.\n", 10 | "\n", 11 | "This process begins with running experiments and collecting fMRI data. Then, collected data undergo preprocessing, which involves corrections, transformations, and alignments. Only after these steps have been completed are the data ready for analysis by machine learning algorithms. Preprocessed fMRI data are commonly stored in the NIfTI format, the starting point for our analyses.\n", 12 | "\n", 13 | "In this notebook, our aim is to cover the very first step in the analysis: the extraction and normalization of pre-processed fMRI data. We will use a localizer dataset from [Kim et al. (2017)](https://doi.org/10.1523/JNEUROSCI.3272-16.2017). This dataset will heretofore be referred to as the \"vdc\" dataset. For the localizer, subjects were shown blocks of faces, scenes and objects in 3 separate runs.\n", 14 | "\n", 15 | "**Note:** If you need help understanding terms such as \"localizer\", \"blocks\", \"runs\", please read the glossary below and also do some self-study on fMRI terminology. This [talk series](https://cbmm.mit.edu/fmri-bootcamp) is a great starting place. If you would like to learn more about fMRI preprocessing, you can find some useful lecture material here: https://fsl.fmrib.ox.ac.uk/fslcourse/ (scroll down to 'Lecture slides & Practical Data').\n", 16 | "\n", 17 | "### Goal of this notebook\n", 18 | "This notebook will teach you how to visualize and normalize your data. Specifically, you will learn how to do the following:\n", 19 | " 1. Load fMRI data into Python.\n", 20 | " 2. Plot the timeseries for a voxel.\n", 21 | " 3. Normalize the data with z-scoring.\n", 22 | "\n", 23 | "## Table of Contents\n", 24 | "[1. Import necessary packages](#import) \n", 25 | "\n", 26 | "[2. Load in timing files](#load_timing) \n", 27 | ">[2.1 Timing file description](#load_timing_describe) \n", 28 | ">[2.2 Plot stimulus presentation](#plot) \n", 29 | "\n", 30 | "[3. Load fMRI](#load_fmri) \n", 31 | ">[3.1 Plot voxel time series](#plot_voxel) \n", 32 | "\n", 33 | "[4. Normalization](#zscore) \n", 34 | ">[4.1 Check the z scoring](#zscore_check) \n", 35 | ">[4.2 Exploring a new dataset](#zscore_test)\n", 36 | "\n", 37 | "[5. BIDS Formatted Data](#bids)\n", 38 | "\n", 39 | "\n", 40 | "\n", 41 | "Exercises\n", 42 | ">[Exercise 1](#ex1) [2](#ex2) [3](#ex3) [4](#ex4) [5](#ex5) [6](#ex6) [7](#ex7) [8](#ex8) [9](#ex9) \n", 43 | "\n", 44 | ">[Novel contribution](#novel) \n", 45 | "\n", 46 | "[Contributions](#contributions)\n" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "## 1. Import necessary packages \n", 54 | "The following packages will be used: \n", 55 | ">nibabel: Read fMRI data into Python arrays. \n", 56 | ">numpy: Perform numerical operations in Python. \n", 57 | ">scipy: Scientific computing methods. \n", 58 | ">nilearn: Used to extract masked fMRI data from a NIfTI file. \n", 59 | ">sklearn: Machine learning methods. \n", 60 | ">matplotlib, sns: Plotting libraries. " 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "import warnings\n", 70 | "import sys \n", 71 | "if not sys.warnoptions:\n", 72 | " warnings.simplefilter(\"ignore\")\n", 73 | "import numpy as np\n", 74 | "import nibabel as nib\n", 75 | "from nilearn.input_data import NiftiMasker, MultiNiftiMasker\n", 76 | "from scipy import stats\n", 77 | "from sklearn import preprocessing\n", 78 | "import matplotlib.pyplot as plt \n", 79 | "import seaborn as sns \n", 80 | "import os\n", 81 | "%matplotlib inline \n", 82 | "%autosave 5\n", 83 | "sns.set(style = 'white', context='poster', rc={\"lines.linewidth\": 2.5})\n", 84 | "sns.set(palette=\"colorblind\")" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "### 1.1 Helper Functions\n", 92 | "\n", 93 | "To make it easier for you to achieve the goals of this notebook, we have created helper functions that do the heavy lifting in terms of data extraction. Thus, a complex set of steps to load data, across NIfTI and MATLAB file formats, from multiple runs, is executed in few lines of code in this notebook. In future notebooks, you will be exposed to more details of data extraction. The helper functions are all in the `utils.py` script in this folder." 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "# Load some helper functions\n", 103 | "from utils import load_vdc_stim_labels, load_vdc_mask, load_vdc_epi_data\n", 104 | "\n", 105 | "# Load some constants\n", 106 | "from utils import vdc_data_dir, vdc_all_ROIs, vdc_label_dict, vdc_n_runs, nb2_simulated_data\n", 107 | "\n", 108 | "print('Data dir = %s' % (vdc_data_dir))\n", 109 | "print('Regions of interest = %s' % (vdc_all_ROIs))\n", 110 | "print('Labels = %s' % (vdc_label_dict))\n", 111 | "print('Number of runs = %d' % (vdc_n_runs))" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "
\n", 119 | " Note on data file paths: If you are running this notebook on your local machines you will need to pay attention to the data directory path. The data directory path variable points to a specific location. You will need to make a change to the data file path variable `vdc_data_dir` in `utils.py` based on where you download the dataset on your computer. \n", 120 | "
\n", 121 | " No changes to the data path are needed if you are running on NeuroLibre or Google Colaboratory. \n", 122 | "
" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "**Exercise 1:** Describe the difference in functionality between 'import numpy' and 'from numpy import zeros':" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "**A:**" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "## 2. Load in timing files\n", 144 | "\n", 145 | "The first way we are going to examine the vdc dataset is by looking at the timing of events in the experiment. The labels for each run of the experiment are stored in the localizer MATLAB file (e.g., `sub-01_localizer_01.mat`). We will read and plot the data that show which stimulus was presented at what time during the experiment. The columns in the localizer MATLAB file represent time in the experiment." 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "
\n", 153 | "Throughout these notebooks there will be self-study questions/suggestions. You are encouraged to explore these topics in order to expand your understanding of these topics and tools.
" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "**Self-study:** Navigate through the folder that contains these data (defined in `vdc_data_dir`) to get an understanding of the file structure. Open up the files and look at their contents. Be an explorer!\n", 161 | "\n", 162 | "**If you are using the NeuroLibre binder to run these tutorials, you will not have easy access to view and open the files. You can download the vdc dataset from here: https://brainiak.org/tutorials/, onto your local machine, to view the files.**" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "**Glossary of common terms used to describe experiments**\n", 170 | ">*stimulus*: The item that we wish to present to the participant in an experiment. Typically a picture or a sound. In the 'vdc' study, pictures of faces, places, and objects are shown to the participant. \n", 171 | ">*trial*: One presentation of the stimulus. \n", 172 | ">*block*: A sequence of trials of the same category. In the 'vdc' study, e.g. a block would consist of all face trials. Usually, multiple blocks are presented to the participant. \n", 173 | ">*run*: A sequence of blocks presented to the participant. Typically an experiment will have multiple runs. There are a few reasons for this:\n", 174 | ">> *counter balancing*: If we started an experiment with faces, we want to also start with other categories to ensure there is no effect of the order of the category. This can be accomplished by having multiple runs, and manipulating the order of categories within runs. \n", 175 | ">> *rest time*: Continuously performing a task can lead to fatigue. In fMRI studies, participants get to relax between runs. \n", 176 | "\n", 177 | ">*TR*: Also known as Repetition Time. It is the time interval at which pulses occur and signal is collected. It can thus be considered as the sampling period of the BOLD signal. More details can be found here: https://mriquestions.com/tr-and-te.html\n", 178 | "\n", 179 | ">*Localizers*: Every person has a unique brain anatomy. Thus, it becomes difficult to pinpoint an exact location in an individual that would be preferential to a stimulus e.g. faces, based on a study with other individuals. We could make an approximate guess of the location, but that would only be approximate. To get the precise location of a brain region in an individual that is sensitive to faces, we need to measure brain activity in that person when he/she is viewing faces. This helps localize the face preferred region in the brain of this individual and such experiments are called localizers. They are often used in conjunction with a main study to help localize a brain region preferential to a stimulus, in an individual, and this region is then used for further analysis in the main study. Localizers are typically block design experiments and are analyzed by contrasting the preferred stimuli with other stimuli. They are also used as independent determiners of the preferred stimulus to avoid circular inferences (more details will be covered in later notebooks). You can learn more about localizers here: \n", 180 | "Saxe, R., Brett, M., & Kanwisher, N. (2006). Divide and conquer: A defense of functional localizers. NeuroImage, 30(4), 1088–1096. https://doi.org/10.1016/j.neuroimage.2005.12.062\n" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "### 2.1 Timing file description \n", 188 | "The localizer task consists of 3 runs. Each run contained 15 blocks, with 5 blocks from each of 3 stimulus categories: faces, scenes and objects. There were 10 trials per block. Within a block, a stimulus was presented every 1.5s (1 TR). Between blocks, there were 15s (10 TRs) of fixation. Each run was approximately 310 TRs. \n", 189 | "\n", 190 | "There is a bunch of information about each run in the MATLAB stimulus file that we are about to load. Each row contains a different type of information (e.g., stimulus type, reaction time, etc.) and each column represents a different trial. \n", 191 | "\n", 192 | "Of most importance to us, the first row has the stimulus labels for the localizer runs; whereas the third row contains the time in seconds when the stimulus was presented (relative to the start of each run). The data were loaded in such a way that the three runs were concatenated in time, i.e., columns were added for each run.\n", 193 | "\n", 194 | "The stimulus labels used in row 1 and their corresponding categories are as follows: \n", 195 | "1= Faces \n", 196 | "2= Scenes \n", 197 | "3= Objects \n", 198 | "\n", 199 | "
\n", 200 | " Reminder: Python row and column indexing-- everything begins at [0], not [1].\n", 201 | "
\n", 202 | "When we refer to the first line of a file, and we call it row 1, it is indexed as row [0] in Python. Subsequently, each line number in a file (e.g., row n) will be indexed in Python as [n-1].\n", 203 | "
" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "sub = 'sub-01'\n", 213 | "stim_label_allruns = load_vdc_stim_labels(sub)\n", 214 | "print('stim_label_allruns has shape: ', np.shape(stim_label_allruns))" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "
\n", 222 | " Recommendation: Because Python suppresses output (when there is no error), you may want to include print statements at the end of cells to indicate to you when a cell has executed all lines of code. This also is a useful technique to debug your programs. In Jupyter, there is an indicator to show that a cell is running - the asterisk '*' on the left hand side of the cell. Once the cell execution is complete, this changes to a number.\n", 223 | "
" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": [ 230 | "When analyzing data it is important to keep checking whether the code you wrote actually does what you intend it to do. For example, one possible thing to check is whether the created output variable stim_label_allruns indeed contains the expected number of rows and columns. We use a function from the imported numpy package for this purpose: np.shape(stim_label_allruns). Of course, there are plenty of other ways to check whether your code works the way you want it to work." 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "**Exercise 2:** How big is the dataset in terms of number of subjects?\n", 238 | "\n" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "**A:**" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "### 2.2 Plot stimulus presentation \n", 253 | "\n", 254 | "We want to look at the sequence of stimulus presentations. Of course, you can look at the timeline of the stimulus presentation by selecting these rows and output them as an array of values (stim_label_allruns[0,:] and stim_label_allruns[2,:])." 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "**Self-study:** Don't know what a Python dictionary is? Look it up!\n" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "# choose a run \n", 271 | "run_id = 1\n", 272 | "\n", 273 | "# select the corresponding time points for a given run\n", 274 | "# The 6th row is the run identifier for a particular run.\n", 275 | "time_points_mask = stim_label_allruns[5,:] == run_id\n", 276 | "\n", 277 | "# the 1st row (index =0) is the labels of the stimuli \n", 278 | "labels = stim_label_allruns[0, time_points_mask]\n", 279 | "\n", 280 | "# the 3rd row (index =2) is time in secs \n", 281 | "time_secs = stim_label_allruns[2, time_points_mask]\n", 282 | "\n", 283 | "print('labels:\\n\\n %s \\n\\n' % labels)\n", 284 | "print('time_secs:\\n\\n%s' % time_secs)" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "However, a better way of visualizing the timeline of the stimulus presentation is by creating a plot, which we will now teach you by using the plotting package matplotlib (that you have imported earlier in this script). Here is one way to set up a plot:" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "# Make an empty plot\n", 301 | "plt.figure()\n", 302 | "\n", 303 | "# Plot the data\n", 304 | "plt.plot(time_secs, labels)\n", 305 | "\n", 306 | "# you can add plot title like this\n", 307 | "plt.title('stimulus presentation')\n", 308 | "\n", 309 | "# you can set xlabel like this\n", 310 | "plt.xlabel('time in secs');" 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "We can set this up in a way that is a little more controllable (by capturing the figure and axis handles)" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "n_conditions = len(vdc_label_dict)\n", 327 | "cur_pals = sns.color_palette('colorblind', n_colors=n_conditions)\n", 328 | "\n", 329 | "# create a plotting panel \n", 330 | "f, ax = plt.subplots(1,1, figsize = (14, 5))\n", 331 | "\n", 332 | "# plot the label for each condition\n", 333 | "for i_cond in range(n_conditions): \n", 334 | " label = list(vdc_label_dict.keys())[i_cond]\n", 335 | " temp_mask = label == labels\n", 336 | " ax.scatter(time_secs[temp_mask], labels[temp_mask], \n", 337 | " color = cur_pals[i_cond], marker = 'o')\n", 338 | "ax.legend(vdc_label_dict.values())\n", 339 | "\n", 340 | "# connect the dots \n", 341 | "ax.plot(time_secs, labels, color = 'black', alpha = .5)\n", 342 | "\n", 343 | "# mark the plot\n", 344 | "ax.set_title('Stimulus Presentation for Run %d' % (run_id))\n", 345 | "ax.set_yticks(list(vdc_label_dict.keys()))\n", 346 | "ax.set_yticklabels(vdc_label_dict.values())\n", 347 | "ax.set_xlabel('Time (seconds)');" 348 | ] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": {}, 353 | "source": [ 354 | "**Exercise 3:** Plot the stimulus presentation for runs 2 and 3 for this subject." 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": null, 360 | "metadata": {}, 361 | "outputs": [], 362 | "source": [ 363 | "# Insert code here" 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": {}, 369 | "source": [ 370 | "**Exercise 4:** Is the stimulus presented in the same order for all the three runs?" 371 | ] 372 | }, 373 | { 374 | "cell_type": "markdown", 375 | "metadata": {}, 376 | "source": [ 377 | "**A:**" 378 | ] 379 | }, 380 | { 381 | "cell_type": "markdown", 382 | "metadata": {}, 383 | "source": [ 384 | "## 3. Load fMRI data \n", 385 | "\n", 386 | "We'll load the data for one run. We will also extract a subset of the signal from the whole-brain data by using a mask for the \"fusiform face area\" ('FFA')." 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": null, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [ 395 | "print('Here\\'re the available ROIs: ', vdc_all_ROIs)" 396 | ] 397 | }, 398 | { 399 | "cell_type": "markdown", 400 | "metadata": {}, 401 | "source": [ 402 | "#### Get voxels from an ROI\n", 403 | "\n", 404 | "We will extract BOLD data, only for voxels in a mask, by executing the following sequence of steps: \n", 405 | "1. Load whole brain fMRI data (for a given subject and a given run)\n", 406 | "2. Load the mask of FFA \n", 407 | "3. Use `NiftiMasker` to sub-select FFA voxels from the whole brain data. This is a function from nilearn. Here's an [example](https://nilearn.github.io/auto_examples/04_manipulating_images/plot_mask_computation.html) about how to use it, and here's the official [documentation](https://nilearn.github.io/modules/generated/nilearn.input_data.NiftiMasker.html)." 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": null, 413 | "metadata": {}, 414 | "outputs": [], 415 | "source": [ 416 | "# choose FFA\n", 417 | "ROI_name = 'FFA'\n", 418 | "\n", 419 | "# 1. Load the fMRI data \n", 420 | "epi_data = load_vdc_epi_data(sub, run_id)\n", 421 | "\n", 422 | "# 2. Load the mask file \n", 423 | "mask = load_vdc_mask(ROI_name, sub)\n", 424 | "\n", 425 | "# 3. Apply ROI mask\n", 426 | "nifti_masker = NiftiMasker(mask_img=mask)\n", 427 | "maskedData = nifti_masker.fit_transform(epi_data)\n", 428 | "\n", 429 | "print('Data shape - before masking: ', epi_data.shape)\n", 430 | "print('Data shape - after masking: ', maskedData.shape)\n", 431 | "print('Mask name: ', ROI_name)" 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "metadata": {}, 437 | "source": [ 438 | "**Self-study:** Not quite sure how the process of applying the mask worked in detail? Check out the documentation of the function we used (nilearn.input_data.NiftiMasker).\n", 439 | "\n", 440 | "**Self-study:** The PPA is listed as an ROI in vdc_all_ROIs. What is the parahippocampal place area (PPA)?" 441 | ] 442 | }, 443 | { 444 | "cell_type": "markdown", 445 | "metadata": {}, 446 | "source": [ 447 | "### 3.1. Plot a voxel time-series \n", 448 | "\n", 449 | "After masking, the fMRI dataset at this stage (found in the variable epi_mask_data that was created in the cell above) is in the format rows=time (i.e. 310 rows referring to 310 TRs) and columns=voxels (i.e. the number of voxels in your mask, FFA in this example)." 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": null, 455 | "metadata": {}, 456 | "outputs": [], 457 | "source": [ 458 | "# Plot a voxel value (in this example of voxel 100) through time\n", 459 | "voxel_id = 100\n", 460 | "\n", 461 | "f, ax = plt.subplots(1,1, figsize=(14,5))\n", 462 | "ax.plot(maskedData[:, voxel_id])\n", 463 | "\n", 464 | "ax.set_title('Voxel time series, voxel id = %d' % voxel_id)\n", 465 | "ax.set_xlabel('TR')\n", 466 | "ax.set_ylabel('Voxel Intensity');" 467 | ] 468 | }, 469 | { 470 | "cell_type": "markdown", 471 | "metadata": {}, 472 | "source": [ 473 | "## 4. Normalization \n", 474 | "\n", 475 | "Sometimes it is necessary to rescale data in order to make different sets of data more comparable. In machine learning, normalization is a standard preprocessing step, as described in [scikit-learn](http://scikit-learn.org/stable/modules/preprocessing.html). In fMRI, we often normalize in order to remove differences that may exist between runs or subjects for uninteresting reasons, such as scanner drift and to account for differences in variance.\n", 476 | "\n", 477 | "There are many ways to normalize data. Z-scoring is one of the most common approaches: we center the data to mean of zero and a standard deviation of one ($\\mu=0, \\sigma = 1$). \n", 478 | "\n", 479 | "We will use the StandardScaler method for normalization. " 480 | ] 481 | }, 482 | { 483 | "cell_type": "markdown", 484 | "metadata": {}, 485 | "source": [ 486 | "**Self-study:** Explore other normalization techniques in `scikit-learn` using the link above." 487 | ] 488 | }, 489 | { 490 | "cell_type": "markdown", 491 | "metadata": {}, 492 | "source": [ 493 | "`StandardScaler` is a function from `sklearn.preprocessing`. `sklearn` functions tend to have the following generate usage: \n", 494 | "\n", 495 | "- Step 1: `sklearn_function.fit(some_data)`, which returns as fitted model\n", 496 | "- Step 2: `sklearn_function.transform(some_data)`, which returns the transformed data \n", 497 | " " 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": null, 503 | "metadata": {}, 504 | "outputs": [], 505 | "source": [ 506 | "# The following procedure normalizes the response within voxels over time\n", 507 | "scaler = preprocessing.StandardScaler().fit(maskedData)\n", 508 | "maskedData_zscore = scaler.transform(maskedData)\n", 509 | "\n", 510 | "#another way\n", 511 | "scaler = preprocessing.StandardScaler()\n", 512 | "scaler.fit(maskedData)\n", 513 | "maskedData_zscore = scaler.transform(maskedData)\n", 514 | "\n", 515 | "#yet another way\n", 516 | "maskedData_zscore = preprocessing.StandardScaler().fit_transform(maskedData)\n" 517 | ] 518 | }, 519 | { 520 | "cell_type": "markdown", 521 | "metadata": {}, 522 | "source": [ 523 | "### 4.1 Check the z scoring \n", 524 | "\n", 525 | "The mean values never equal exactly zero, and the standard deviation is never exactly 1. This happens because of rounding and precision limitations. These small values are considered zero for most practical purposes. Below we print out the mean and standard deviation of individual voxels. " 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": null, 531 | "metadata": {}, 532 | "outputs": [], 533 | "source": [ 534 | "voxel_mean = np.mean(maskedData_zscore, axis=0)\n", 535 | "voxel_std = np.std(maskedData_zscore, axis=0)\n", 536 | "print('The number of voxels in the mask is %d' % len(voxel_mean));\n", 537 | "print('The mean of the first few voxels:\\n', voxel_mean[0:4])\n", 538 | "print('The std of the first few voxels:\\n', voxel_std[0:4])" 539 | ] 540 | }, 541 | { 542 | "cell_type": "markdown", 543 | "metadata": {}, 544 | "source": [ 545 | "**Exercise 5:** Compare the raw data vs. the z-scored data" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": null, 551 | "metadata": {}, 552 | "outputs": [], 553 | "source": [ 554 | "# Insert code here" 555 | ] 556 | }, 557 | { 558 | "cell_type": "markdown", 559 | "metadata": {}, 560 | "source": [ 561 | "**Exercise 6:** Plot the distribution of values for a z-scored voxel as a histogram. Sample histogram code can be found [here.](https://matplotlib.org/examples/statistics/histogram_demo_histtypes.html)" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": null, 567 | "metadata": {}, 568 | "outputs": [], 569 | "source": [ 570 | "# Insert code here" 571 | ] 572 | }, 573 | { 574 | "cell_type": "markdown", 575 | "metadata": {}, 576 | "source": [ 577 | "**Exercise 7:** Z-score the data by writing your own code instead of using the StandardScaler() method.\n", 578 | "\n", 579 | "Assign the Z-score data to `maskedData_normalized` \n" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": null, 585 | "metadata": {}, 586 | "outputs": [], 587 | "source": [ 588 | "# Insert code here\n", 589 | "maskedData_normalized =None # modify this to compute the z-score." 590 | ] 591 | }, 592 | { 593 | "cell_type": "code", 594 | "execution_count": null, 595 | "metadata": {}, 596 | "outputs": [], 597 | "source": [ 598 | "if maskedData_normalized is not None:\n", 599 | " f, axes = plt.subplots(2,1,figsize = (14,9))\n", 600 | " v = 0\n", 601 | " t = 0 \n", 602 | "\n", 603 | " # Plot the results for the a feature vector at one time-point\n", 604 | " axes[0].plot(maskedData_normalized[:, v])\n", 605 | " axes[0].set_title('voxel-wise normalized BOLD time course (voxel id = %d)' % (v))\n", 606 | " axes[0].set_xlabel('TR')\n", 607 | "\n", 608 | " # Plot the time-series for one voxel\n", 609 | " axes[1].plot(maskedData_normalized[t, :])\n", 610 | " axes[1].set_title('voxel-wise normalized BOLD whole brain patttern (time = %d)' % (t))\n", 611 | " axes[1].set_xlabel('Voxel index')\n", 612 | "\n", 613 | " f.tight_layout()\n", 614 | "else:\n", 615 | " print('Please assign Z-score values in the previous step to maskedData_normalized.')" 616 | ] 617 | }, 618 | { 619 | "cell_type": "markdown", 620 | "metadata": {}, 621 | "source": [ 622 | "### 4.2 Exploring a new dataset\n", 623 | "\n", 624 | "We have a new dataset that needs some investigating. In this dataset, found in the **'lab1'** folder of the dataset repository, we have two conditions (Face, Scene) of a single participant's data while they viewed short blocks of faces and scenes, respectively (well actually, this is simulated data to prove an analytic point, but the point is an important one!). Four runs of data were simulated. We extracted data for the two conditions from the PPA and averaged the activity of all the voxels in that ROI. In an initial analysis we looked at the mean PPA activity of both conditions after subtracting the baseline (activity when nothing was on screen) and found that the Face condition had greater activation than the Scene condition in this participant. We did this by taking the time point labels and averaging all time points belonging to each condition.\n", 625 | "\n", 626 | "The variables that loaded in are: \n", 627 | "`activity`: the average activity of an ROI, stored time point by time point, and concatenated across runs \n", 628 | "`cond_labels`: which condition does this time point belong to (1 means Face condition, 2 means Scene condition, 0 means neither [baseline])? \n", 629 | "`run_labels`: what run does this time point belong to? " 630 | ] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "execution_count": null, 635 | "metadata": {}, 636 | "outputs": [], 637 | "source": [ 638 | "# Load in the data\n", 639 | "activity = np.load(os.path.join(nb2_simulated_data, 'activity.npy'))\n", 640 | "cond_labels = np.load(os.path.join(nb2_simulated_data,'cond_labels.npy'))\n", 641 | "run_labels = np.load(os.path.join(nb2_simulated_data,'run_labels.npy'))\n", 642 | "\n", 643 | "# When are the face, scene and baseline time points?\n", 644 | "time_shift = 2 # We perform time shifting to account for the hemodynamic lag. This will explained in a future notebook\n", 645 | "face_labels = np.where(cond_labels == 1)[0] + time_shift \n", 646 | "scene_labels = np.where(cond_labels == 2)[0] + time_shift\n", 647 | "baseline_labels = np.where(cond_labels == 0)[0] + time_shift \n", 648 | "\n", 649 | "# Make sure the baseline labels don't exceed the maximum allowed (because of shifting)\n", 650 | "baseline_labels = baseline_labels[baseline_labels < len(activity)]\n", 651 | "\n", 652 | "# Pull out the time points corresponding to the block onset\n", 653 | "face_activity = activity[face_labels]\n", 654 | "scene_activity = activity[scene_labels]\n", 655 | "baseline_activity = activity[baseline_labels]\n", 656 | "\n", 657 | "# Get the difference from baseline for the two conditions\n", 658 | "face_diff = face_activity - np.mean(baseline_activity)\n", 659 | "scene_diff = scene_activity - np.mean(baseline_activity)\n", 660 | "\n", 661 | "# Plot the data\n", 662 | "plt.figure()\n", 663 | "plt.errorbar([1, 2], [np.mean(face_diff), np.mean(scene_diff)], [np.std(face_diff), np.std(scene_diff)]);\n", 664 | "plt.title('Averaged evoked response')\n", 665 | "plt.xticks(labels=['Face', 'Scene'], ticks = [1, 2])\n", 666 | "plt.ylabel('Average Y value')\n", 667 | "plt.xlabel('Condition')\n", 668 | "plt.xlim([0.5, 2.5]);" 669 | ] 670 | }, 671 | { 672 | "cell_type": "markdown", 673 | "metadata": {}, 674 | "source": [ 675 | "**Exercise 8:** However, there is a problem: There is a quirk in this design that makes z-scoring a necessary step. If you were designing the study from scratch it could be avoided with a better experimental design, but we can use normalization to correct the problem. Search through the labels and data to identify the problem. Then use z-scoring to fix the problem and re-plot the result above, describing what has changed.\n", 676 | "\n", 677 | "**Hint:** We *strongly* recommend that you plot the condition labels and the activity across the four runs." 678 | ] 679 | }, 680 | { 681 | "cell_type": "code", 682 | "execution_count": null, 683 | "metadata": {}, 684 | "outputs": [], 685 | "source": [ 686 | "# Put your code here for showing your working" 687 | ] 688 | }, 689 | { 690 | "cell_type": "markdown", 691 | "metadata": {}, 692 | "source": [ 693 | "**A**:" 694 | ] 695 | }, 696 | { 697 | "cell_type": "markdown", 698 | "metadata": {}, 699 | "source": [ 700 | "### 5. BIDS Formatted Data \n", 701 | "\n", 702 | "The BIDS standard for neuroimaging data is being increasingly used to share data across laboratories. It enables open science practices and reproducibility. Instead of using arbitrary names for files, the BIDS format enforces a file naming structure, making it easy for other people to understand the various files. Detailed information on BIDS can be found here: https://bids.neuroimaging.io\n", 703 | "\n", 704 | "Below, we show how to read in files written in the BIDS format. There are a few key elements in the file naming:\n", 705 | ">`root folder`: This is where the data for all subjects is stored. \n", 706 | ">`func`: This folder is where all the functional neuroimaging data (epi) is stored. The `anat` folder stores the structural data. \n", 707 | "\n", 708 | "Keywords that are part of the file name:\n", 709 | ">`ses`: The session that the data was acquired. Typically data acquired when a subject enters and leaves the scanner is counted as one session. Some studies could have participants return on other days and these would be counted as new sessions e.g ses-01, ses-02 etc. This keyword is used in the naming sub folders within a subject, as well as in the file name within a session. \n", 710 | ">`task`: This label typically describes what the experiment was about. \n", 711 | ">`space`: The orientation of the dataset. This could be T1w or MNI depending on the data. \n", 712 | ">`run`: The run id in which the volumes were acquired. \n", 713 | ">`bold`: This suffix denotes the type of data that is contained in the file. For fmri data it is 'bold'. \n", 714 | "The above keywords are always separated by underscores.\n", 715 | "\n", 716 | "\n", 717 | "Using the above elements we can construct the file name. Note you will first need to download BIDS formatted data into an accessible folder and specify the path in `bids_folder` below. Once you have generated the file name, you can use Nilearn's data loading functions to read in the data.\n" 718 | ] 719 | }, 720 | { 721 | "cell_type": "code", 722 | "execution_count": null, 723 | "metadata": {}, 724 | "outputs": [], 725 | "source": [ 726 | "bids_folder= 'bids_root_directory/' #specify path to BIDS folder\n", 727 | "subj_bids='sub-01' # the subject id of the subject\n", 728 | "ses_bids='ses-01' # the session information\n", 729 | "#Using the above information, we can get the fMRI data file location.\n", 730 | "file_loc= os.path.join(bids_folder,subj_bids,ses_bids,'func')\n", 731 | "print('Directory where files are stored: %s' % file_loc)\n", 732 | "\n", 733 | "#To get a specific file, we will need to add the following.\n", 734 | "task_bids='faceplace' # the task name. change this for the specific dataset that you are using.\n", 735 | "space_bids='T1w' # space of the dataset\n", 736 | "run_bids='01' # the run id.\n", 737 | "bids_file_name= '%s_task-%s' % (subj_bids,task_bids) + '_space-%s' % space_bids + \\\n", 738 | " '_run-%s' % run_bids +'_bold.nii.gz'\n", 739 | "print('BIDS file name: %s' % bids_file_name)\n", 740 | "bids_full_path_to_file= os.path.join(file_loc,bids_file_name)\n", 741 | "\n", 742 | "print('Full path to file: %s' % bids_full_path_to_file)\n" 743 | ] 744 | }, 745 | { 746 | "cell_type": "markdown", 747 | "metadata": {}, 748 | "source": [ 749 | "**Novel contribution:** Be creative and make one new discovery by adding an analysis, visualization, or optimization." 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": null, 755 | "metadata": {}, 756 | "outputs": [], 757 | "source": [ 758 | "# Put novel contribution here" 759 | ] 760 | }, 761 | { 762 | "cell_type": "markdown", 763 | "metadata": {}, 764 | "source": [ 765 | "## Contributions \n", 766 | "\n", 767 | "M. Kumar, C. Ellis and N. Turk-Browne produced the initial notebook 01/2018 \n", 768 | "T. Meissner minor edits \n", 769 | "Q. Lu switch to matplotlib, color blind friendly colors, encapsulate helper functions, del ex.3 (loop) \n", 770 | "M. Kumar: Added Exercise 10, deleted masking exercise. \n", 771 | "K.A. Norman provided suggestions on the overall content and made edits to this notebook. \n", 772 | "C. Ellis: Incorporated comments from cmhn-s19. \n", 773 | "A.K. Sahoo made minor edits to the notebook." 774 | ] 775 | } 776 | ], 777 | "metadata": { 778 | "anaconda-cloud": {}, 779 | "kernelspec": { 780 | "display_name": "Python 3", 781 | "language": "python", 782 | "name": "python3" 783 | }, 784 | "language_info": { 785 | "codemirror_mode": { 786 | "name": "ipython", 787 | "version": 3 788 | }, 789 | "file_extension": ".py", 790 | "mimetype": "text/x-python", 791 | "name": "python", 792 | "nbconvert_exporter": "python", 793 | "pygments_lexer": "ipython3", 794 | "version": "3.6.8" 795 | } 796 | }, 797 | "nbformat": 4, 798 | "nbformat_minor": 2 799 | } 800 | -------------------------------------------------------------------------------- /tutorials/04-dimensionality-reduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Dimensionality reduction\n", 8 | "[Contributions](#contributions)\n", 9 | "\n", 10 | "fMRI analysis often has a dimensionality problem: we get approximately 100,000 voxels (i.e., features) per volume, but only 100s of time points or trials (i.e., examples). This makes it very hard for machine learning algorithms to model how each voxel contributes. For more general information on this problem, also dubbed the [curse of dimensionality](https://en.wikipedia.org/wiki/Curse_of_dimensionality), see [these slides from the Texas A&M University Computer Science and Engineering Department](http://courses.cs.tamu.edu/choe/11spring/633/lectures/slide08.pdf). For a neuroimaging-specific view on the curse of dimensionality, you might want to take a look at [Mwangi et al.'s Neuroinformatics review from 2014](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4040248/).\n", 11 | "\n", 12 | "In this notebook we are going to learn various methods that can help us reduce the dimensionality of fMRI data.\n", 13 | "\n", 14 | "## Goal of this script\n", 15 | "1. Learn to compute the covariance of a dataset. \n", 16 | "2. Reduce the feature space using principal component analysis (PCA). \n", 17 | "3. Interpret the meaning of PCA components. \n", 18 | "4. Perform feature selection using cross-validation. \n", 19 | "\n", 20 | "## Pre-requisites\n", 21 | "You should be familiar with the functions in the data loading and classification notebooks.\n", 22 | "\n", 23 | "## Table of Contents\n", 24 | "[1. Load the data](#load-data) \n", 25 | "\n", 26 | "[2. Covariance](#covariance) \n", 27 | "\n", 28 | "[3. PCA](#pca) \n", 29 | ">[3.1 Plot PCA](#plot_pca) \n", 30 | ">[3.2 Scree Plots](#scree) \n", 31 | ">[3.3 Interpreting Components](#cog-relevance) \n", 32 | ">[3.4 Normalization](#pca-norm) \n", 33 | ">[3.5 PCA dimensionality reduction and classification](#wb-pca-class) \n", 34 | "\n", 35 | "[4. Feature Selection](#feat) \n", 36 | ">[4.1 Feature Selection: Pipelines](#pipeline) \n", 37 | ">[4.2 Feature Selection: Univariate](#univariate) \n", 38 | "\n", 39 | "Exercises\n", 40 | ">[Exercise 1](#ex1) [2](#ex2) [3](#ex3) [4](#ex4) [5](#ex5) [6](#ex6) [7](#ex7) [8](#ex8) [9](#ex9)\n" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "**Dataset:** For this script we will use a localizer dataset from [Kim et al. (2017)](https://doi.org/10.1523/JNEUROSCI.3272-16.2017) again. Just to recap: The localizer consisted of 3 runs with 5 blocks of each category (faces, scenes and objects) per run. Each block was presented for 15s. Within a block, a stimulus was presented every 1.5s (1 TR). Between blocks, there was 15s (10 TRs) of fixation. Each run was 310 TRs. In the matlab stimulus file, the first row codes for the stimulus category for each trial (1 = Faces, 2 = Scenes, 3 = Objects). The 3rd row contains the time (in seconds, relative to the start of the run) when the stimulus was presented for each trial.\n", 48 | "\n" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "import warnings\n", 58 | "import sys\n", 59 | "if not sys.warnoptions:\n", 60 | " warnings.simplefilter('ignore')\n", 61 | "\n", 62 | "# Import neuroimaging, analysis and general libraries\n", 63 | "import numpy as np\n", 64 | "from time import time\n", 65 | "import pandas as pd\n", 66 | "\n", 67 | "# Import plotting libraries\n", 68 | "import matplotlib.pyplot as plt\n", 69 | "import seaborn as sns\n", 70 | "\n", 71 | "# Machine learning libraries\n", 72 | "from sklearn.model_selection import cross_val_score, cross_validate, PredefinedSplit\n", 73 | "from sklearn.svm import SVC\n", 74 | "from sklearn.decomposition import PCA\n", 75 | "from sklearn.feature_selection import SelectKBest, RFECV, f_classif\n", 76 | "from sklearn.pipeline import Pipeline\n", 77 | "\n", 78 | "%matplotlib inline\n", 79 | "%autosave 5\n", 80 | "sns.set(style = 'white', context='poster', rc={'lines.linewidth': 2.5})\n", 81 | "sns.set(palette=\"colorblind\")" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "# load some helper functions\n", 91 | "from utils import load_labels, load_data, blockwise_sampling, label2TR, shift_timing, reshape_data\n", 92 | "from utils import normalize, decode\n", 93 | "# load some constants\n", 94 | "from utils import vdc_data_dir, vdc_all_ROIs, vdc_label_dict, vdc_n_runs, vdc_hrf_lag, vdc_TR, vdc_TRs_run\n", 95 | "\n", 96 | "print('Here\\'re some constants, which is specific for VDC data:')\n", 97 | "print('data dir = %s' % (vdc_data_dir))\n", 98 | "print('ROIs = %s' % (vdc_all_ROIs))\n", 99 | "print('Labels = %s' % (vdc_label_dict))\n", 100 | "print('number of runs = %s' % (vdc_n_runs))\n", 101 | "print('1 TR = %.2f sec' % (vdc_TR))\n", 102 | "print('HRF lag = %.2f sec' % (vdc_hrf_lag))\n", 103 | "print('num TRs per run = %d' % (vdc_TRs_run))" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "## 1. Load the data \n", 111 | "\n", 112 | "Load the data for one participant using these helper functions." 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "sub_id = 1\n", 122 | "mask_name = 'FFA' # This is set in order to reduce memory demands in order to run within 4Gb, however, if you want to make this run on whole brain, then set this to ''" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "# Specify the subject name\n", 132 | "sub = 'sub-%.2d' % (sub_id)\n", 133 | "# Convert the shift into TRs\n", 134 | "shift_size = int(vdc_hrf_lag / vdc_TR) \n", 135 | "\n", 136 | "# Load subject labels\n", 137 | "stim_label_allruns = load_labels(vdc_data_dir, sub)\n", 138 | "\n", 139 | "# Load run_ids\n", 140 | "run_ids_raw = stim_label_allruns[5,:] - 1 \n", 141 | "\n", 142 | "# Load the fMRI data using a mask\n", 143 | "epi_mask_data_all = load_data(vdc_data_dir, sub, mask_name=mask_name)[0]\n", 144 | "\n", 145 | "# This can differ per participant\n", 146 | "print(sub, '= TRs: ', epi_mask_data_all.shape[1], '; Voxels: ', epi_mask_data_all.shape[0])\n", 147 | "TRs_run = int(epi_mask_data_all.shape[1] / vdc_n_runs)\n", 148 | "\n", 149 | "# Convert the timing into TR indexes\n", 150 | "stim_label_TR = label2TR(stim_label_allruns, vdc_n_runs, vdc_TR, TRs_run)\n", 151 | "\n", 152 | "# Shift the data some amount\n", 153 | "stim_label_TR_shifted = shift_timing(stim_label_TR, shift_size)\n", 154 | "\n", 155 | "# Perform the reshaping of the data\n", 156 | "bold_data_raw, labels_raw = reshape_data(stim_label_TR_shifted, epi_mask_data_all)\n", 157 | "\n", 158 | "# Normalize raw data within each run\n", 159 | "bold_normalized_raw = normalize(bold_data_raw, run_ids_raw)\n", 160 | "\n", 161 | "# Down sample the data to be blockwise rather than trialwise. \n", 162 | "#We'll use the blockwise data for all the \n", 163 | "bold_data, labels, run_ids = blockwise_sampling(bold_data_raw, labels_raw, run_ids_raw)\n", 164 | "\n", 165 | "# Normalize blockwise data within each run\n", 166 | "bold_normalized = normalize(bold_data, run_ids)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "
\n", 174 | "Block Averaging\n", 175 | "
\n", 176 | "Previously, we have been using data from each trial. Within each block, the voxel activity is correlated across trials. Thus, it is common (and probably better) to take the average value of the activity within a block as your observation in decoding analyses in order to avoid concerns about non-independence. Mean values of activity or beta coefficients (from GLM) are commonly used in the literature.\n", 177 | "
" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "**Self-study:** We introduce a simple kind of debugging here, as we print both the number of expected and resampled blocks (resampled refers to the conversion from trialwise data to blockwise data). Thus, if something went wrong, we would be able to spot it the output. Learn about more ways of debugging your code by using assertions [here](https://wiki.python.org/moin/UsingAssertionsEffectively)." 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "## 2. Covariance \n", 192 | "\n", 193 | "As a precursor to understanding dimensionality reduction techniques, we need to learn how to compute the covariance matrix because it is often used in these methods. \n", 194 | "\n", 195 | "By default, we used an FFA mask to reduce the memory demands in this notebook, but if possible we recommend that you use no mask in order to grapple with the memory issues of working with wholebrain data. There are nearly 1 million voxels in every volume we acquire, of which about 15% are in the brain. The data matrix of >100,000 voxels and <1000 time points is very large, making any computations on all of this data very intensive." 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "The covariance of two variables is calculated as follows: $$ Cov(X,Y) = \\frac{\\sum_{1}^{N}(X-\\bar{X})(Y-\\bar{Y})}{(N-1)}$$\n", 203 | "where $\\mbox{ } \\bar{X} = mean(X), \\mbox{ } \\bar{Y} = mean(Y), \\mbox{ } N = \\mbox{number of samples } $\n", 204 | "\n", 205 | "In fMRI, X and Y could be time-series data for two voxels (two columns in our time by voxels data matrix) or the pattern across voxels for two different time points (two rows in the data matrix). The choice of vectors depends on the application.\n", 206 | "\n", 207 | "**Exercise 1:** Compute the covariance between two blocks (i.e., their averaged patterns across voxels). The steps to do this are outlined below. You could just use a function but we want you to code the individual steps as described (refer [here]( https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.cov.html) for additional help)\n" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "# Enter your code here\n", 217 | "\n", 218 | "# Compute the mean of one row of the block-averaged bold data called: X\n", 219 | "\n", 220 | "# Compute the mean of any other row of the block-averaged bold data called: Y\n", 221 | "\n", 222 | "# Compute the differences of individual voxel values in these rows from the corresponding mean for X or Y.\n", 223 | "\n", 224 | "# Compute the pointwise product of the difference vectors across the two rows.\n", 225 | "\n", 226 | "# Sum over the products of the differences.\n", 227 | "\n", 228 | "# Complete the covariance calculation with these values.\n", 229 | "\n", 230 | "# Compare your result to the answer obtained with np.cov(X,Y)\n" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "Covariance is dependent on the unit and scale of the measurement. Its value is thus not easily interpretable or comparable across datasets -- e.g. is there a strong relationship between X and Y if the covariance is 200 as compared to 2 or 2000?\n", 238 | "\n", 239 | "Correlation solves this problem by normalizing the range of the covariance from -1 to +1.\n", 240 | "\n", 241 | "$$ Corr(X,Y) = \\frac{Cov(X,Y)}{\\sqrt{\\frac{\\sum_{1}^{N}(X-\\bar{X})^2}{(N-1)}}\\sqrt{\\frac{\\sum_{1}^{N}(Y-\\bar{Y})^2}{(N-1)}}}$$\n", 242 | "\n", 243 | "**Exercise 2:** Compute the correlation between all pairs of blocks manually (one pair at a time) and compare the result with a numpy function that calculates the block-by-block correlation matrix in one step." 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "# Compute the correlation manually\n", 253 | "\n", 254 | "# Now with a function \n" 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "**Exercise 3**: Now compute the covariance between time-series across pairs of voxels (using the np.cov). Perform this compution on a group of 100 voxels in order to make a voxel-by-voxel covariance matrix in one step (no `for` loops allowed). Make sure the output is the correct shape (100, 100). " 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "# Insert your code here.\n", 271 | "\n", 272 | "# Subselect 100 voxels from bold_data into a matrix.\n", 273 | "\n", 274 | "# Use np.cov() to compute the covariance of this matrix." 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "## 3. PCA \n", 282 | "\n", 283 | "We will use principal component analysis (PCA) to **reduce the dimensionality** of the data. Some voxels may contain correlated information or no information and so the original voxel-dimensional data matrix (time-by-voxels) can be projected into a lower-dimensional \"component\" matrix space (time-by-component) without losing much information.\n", 284 | "\n", 285 | "![image](https://cdn-images-1.medium.com/max/1200/1*Iri_LDMXuz2Qac-8KPeESA.png)" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "# We now use the PCA function in scikit-learn to reduce the dimensionality of the data\n", 295 | "# The number of components was chosen arbitrarily.\n", 296 | "pca = PCA(n_components=20)\n", 297 | "bold_pca = pca.fit_transform(bold_data)\n", 298 | "\n", 299 | "print('Original data shape:', bold_data.shape)\n", 300 | "print('PCA data shape:', bold_pca.shape)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "metadata": {}, 306 | "source": [ 307 | "### 3.1 Plot PCA \n", 308 | "\n", 309 | "Let's visualize the variance in the data along different component dimensions." 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "# Setting plotting parameter\n", 319 | "n_bins=75\n", 320 | "\n", 321 | "# Plot\n", 322 | "n_plots = 4\n", 323 | "components_to_plot = [0,1,2,19]\n", 324 | "f, axes = plt.subplots(1, n_plots, figsize=(14, 14/n_plots))\n", 325 | "st=f.suptitle(\"Figure 3.1. Histogram of values for each PC dimension \", fontsize=\"x-large\")\n", 326 | "\n", 327 | "for i in range(n_plots): \n", 328 | " axes[i].hist(bold_pca[:, components_to_plot[i]], \n", 329 | " bins=n_bins)\n", 330 | " # mark the plots \n", 331 | " axes[i].set_title('PC Dimension %d'%(components_to_plot[i]+1))\n", 332 | " axes[i].set_ylabel('Frequency')\n", 333 | " axes[i].set_xlabel('Value') \n", 334 | " axes[i].set_xticks([])\n", 335 | " axes[i].set_yticks([]) \n", 336 | "\n", 337 | "f.tight_layout()\n", 338 | "st.set_y(0.95)\n", 339 | "f.subplots_adjust(top=0.75)\n" 340 | ] 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": {}, 345 | "source": [ 346 | "Let's visualize the relationship between variances across pairs of components." 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": null, 352 | "metadata": {}, 353 | "outputs": [], 354 | "source": [ 355 | "\"\"\"\n", 356 | "Plot the low dim representation of the bold data\n", 357 | "\"\"\"\n", 358 | "# Setting plotting parameters\n", 359 | "alpha_val = .8\n", 360 | "cur_pals = sns.color_palette('colorblind', n_colors=vdc_n_runs)\n", 361 | "\n", 362 | "# Plot\n", 363 | "n_plots = 3 \n", 364 | "f, axes = plt.subplots(1, n_plots, figsize=(14,5))\n", 365 | "st=f.suptitle(\"Figure 3.2. Scatter plots comparing PCA dimensions \", fontsize=\"x-large\")\n", 366 | "\n", 367 | "# plot data\n", 368 | "axes[0].scatter(bold_pca[:, 0], bold_pca[:, 1], \n", 369 | " alpha=alpha_val, marker='.', color = 'k')\n", 370 | "axes[1].scatter(bold_pca[:, 2], bold_pca[:, 3], \n", 371 | " alpha=alpha_val, marker='.', color = 'k')\n", 372 | "axes[2].scatter(bold_pca[:, 18], bold_pca[:, 19], \n", 373 | " alpha=alpha_val, marker='.', color = 'k')\n", 374 | "\n", 375 | "axes[0].set_title('PCA Dimensions\\n1 x 2')\n", 376 | "axes[1].set_title('PCA Dimensions\\n3 x 4')\n", 377 | "axes[2].set_title('PCA Dimensions\\n18 x 19')\n", 378 | "\n", 379 | "# modifications that are common to all plots \n", 380 | "for i in range(n_plots): \n", 381 | " axes[i].axis('equal')\n", 382 | " axes[i].set_xticks([])\n", 383 | " axes[i].set_yticks([])\n", 384 | "\n", 385 | "f.tight_layout()\n", 386 | "st.set_y(0.95)\n", 387 | "f.subplots_adjust(top=0.75)\n" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "### 3.2 Scree plots \n", 395 | "\n", 396 | "A [\"scree\" plot](https://www.theanalysisfactor.com/factor-analysis-how-many-factors/) can depict the amount of variance in the original data that is explained by each component.\n", 397 | "\n", 398 | "**Exercise 4:** Make a scree plot for the PCA above. How many components would be sufficient to account for most of the variance?" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": null, 404 | "metadata": {}, 405 | "outputs": [], 406 | "source": [ 407 | "# Plot the scree plot" 408 | ] 409 | }, 410 | { 411 | "cell_type": "markdown", 412 | "metadata": {}, 413 | "source": [ 414 | "**A:**" 415 | ] 416 | }, 417 | { 418 | "cell_type": "markdown", 419 | "metadata": {}, 420 | "source": [ 421 | "### 3.3 Interpreting Components \n", 422 | "\n", 423 | "From the previous plot of the first and second PCA dimension, you can see you have three clusters. You might assume that they correspond to faces, scenes, and objects." 424 | ] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": {}, 429 | "source": [ 430 | "**Exercise 5:** Determine what the three clusters correspond to. First, create a new scatter plot of these two components and mark (e.g., in different symbols or colors) each point on the plot by visual category. Then, create a second scatter plot with points labeled in a way that better corresponds to the clusters (complete this exercise before reading further). (Hint: What else was there three of?)" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": null, 436 | "metadata": {}, 437 | "outputs": [], 438 | "source": [ 439 | "# Put answer\n", 440 | "\n", 441 | "\n", 442 | "\n", 443 | "\n", 444 | "\n", 445 | "\n", 446 | "\n", 447 | "\n", 448 | "\n", 449 | "\n", 450 | " " 451 | ] 452 | }, 453 | { 454 | "cell_type": "markdown", 455 | "metadata": {}, 456 | "source": [ 457 | "### 3.4 Normalization \n", 458 | "\n", 459 | "We ran the PCA analysis without normalizing the data.\n", 460 | "\n", 461 | "**Exercise 6:** Using the variable `bold_normalized` re-compute the PCA (components=20). Plot the results with a scatter plot like **Figure 3.2**. What was the effect of normalization and why is this useful?" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": null, 467 | "metadata": {}, 468 | "outputs": [], 469 | "source": [ 470 | "# Insert code here" 471 | ] 472 | }, 473 | { 474 | "cell_type": "markdown", 475 | "metadata": {}, 476 | "source": [ 477 | "### 3.5 PCA dimensionality reduction and classification \n", 478 | "As mentioned earlier, we use PCA to reduce the dimensionality of the data and thus minimize the 'curse of dimensionality'. Below we explore how PCA affects classification accuracy." 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": null, 484 | "metadata": {}, 485 | "outputs": [], 486 | "source": [ 487 | "# Run a basic n-fold classification\n", 488 | "\n", 489 | "# Get baseline, whole-brain decoding accuracy without PCA\n", 490 | "print('Baseline classification')\n", 491 | "print('Original size: ', bold_normalized.shape)\n", 492 | "svc = SVC(kernel=\"linear\", C=1)\n", 493 | "\n", 494 | "start = time()\n", 495 | "models, scores = decode(bold_normalized, labels, run_ids, svc)\n", 496 | "end = time()\n", 497 | "print('Accuracy: ', scores)\n", 498 | "print('Run time: %0.4fs' %(end - start))" 499 | ] 500 | }, 501 | { 502 | "cell_type": "code", 503 | "execution_count": null, 504 | "metadata": {}, 505 | "outputs": [], 506 | "source": [ 507 | "# Run the classifier on data in component space \n", 508 | "pca = PCA(n_components=20)\n", 509 | "bold_pca_normalized = pca.fit_transform(bold_normalized)\n", 510 | "print('PCA (c=%d) classification' % bold_pca_normalized.shape[1])\n", 511 | "print('New size after PCA: ', bold_pca_normalized.shape)\n", 512 | "\n", 513 | "start = time()\n", 514 | "models_pca, scores_pca = decode(bold_pca_normalized, labels, run_ids, svc)\n", 515 | "end = time()\n", 516 | "print('Accuracy: ', scores_pca)\n", 517 | "print('Run time: %0.4fs' %(end - start))" 518 | ] 519 | }, 520 | { 521 | "cell_type": "markdown", 522 | "metadata": {}, 523 | "source": [ 524 | "In this case PCA does not improve decoding accuracy. However, note that similar performance was achieved with 20 vs. 177,314 features, that the analysis ran 500x faster, and that the resulting model is likely to generalize better to new data (e.g., from a different subject)." 525 | ] 526 | }, 527 | { 528 | "cell_type": "markdown", 529 | "metadata": {}, 530 | "source": [ 531 | "**Exercise 7:** We used an arbitrary number of components. How does decoding accuracy change with more or less components?" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": null, 537 | "metadata": {}, 538 | "outputs": [], 539 | "source": [ 540 | "# Insert code" 541 | ] 542 | }, 543 | { 544 | "cell_type": "markdown", 545 | "metadata": {}, 546 | "source": [ 547 | "### 3.5.1 The PCA Challenge \n", 548 | "\n", 549 | "**Exercise 8:** Given that some of the early PCA dimensions may not be cognitively relevant, determine the smallest number of PCA components from which you can get the highest decoding accuracy. \n", 550 | "\n" 551 | ] 552 | }, 553 | { 554 | "cell_type": "markdown", 555 | "metadata": {}, 556 | "source": [ 557 | "## 4. Feature selection using cross-validation \n", 558 | "\n", 559 | "When we took a few PCA components instead of all voxels, we were performing feature selection. Feature selection is used to reduce noise and increase computational speed. However, a problem with the approach above is that feature selection is applied to all data (prior to division into training and test sets) and is thus a kind of double dipping.\n", 560 | "\n", 561 | "A better way to select features is during cross-validation. In this case, feature selection is only performed on the training set, and the same features are used on the test data. This way the classifier never sees the test data during training.\n", 562 | "\n", 563 | "
\n", 564 | "Aside: While doing PCA on the whole dataset violates the principle of “never touch your test data during training”, researchers have sometimes used this approach, justifying it on the grounds that — while PCA is using the fMRI data from the test set — it is not peeking at the class labels from the test set, and thus it will not bias classification accuracy. Is this OK? It’s difficult to say *a priori*. It is always safer to completely avoid touching the test data during training, so you should do this if at all possible. If you aren’t sure what problems might emerge from a particular analysis method, a good check of your method is to test on random noise; when you do this, classification should not exceed chance (if it does, you have a problem…)\n", 565 | "
\n", 566 | "\n", 567 | "We will perform feature selection during cross-validation in this section. The `Pipelines` method in scikit-learn provides an easy interface to perform these steps and we will use it extensively.\n" 568 | ] 569 | }, 570 | { 571 | "cell_type": "markdown", 572 | "metadata": {}, 573 | "source": [ 574 | "\n", 575 | "### 4.1 Pipelines: Feature selection with cross-validation \n", 576 | "\n", 577 | "The scikit-learn has a method, [Pipeline](http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html#sklearn.pipeline.Pipeline), that simplifies running a sequence of steps in an automated fashion. Below we create a pipeline with the following steps:\n", 578 | " \n", 579 | ">Perform dimensionality reduction. \n", 580 | ">Run an SVM.\n", 581 | "\n", 582 | "To do this systematically during cross-validation, we will embed `Pipeline` in the `cross_validate` method in scikit-learn.\n", 583 | "\n", 584 | "The steps below are based on [this example in scikit-learn](https://scikit-learn.org/stable/auto_examples/compose/plot_compare_reduction.html#illustration-of-pipeline-and-gridsearchcv)." 585 | ] 586 | }, 587 | { 588 | "cell_type": "code", 589 | "execution_count": null, 590 | "metadata": {}, 591 | "outputs": [], 592 | "source": [ 593 | "# Example: \n", 594 | "# Set up the pipeline\n", 595 | "pipe = Pipeline([\n", 596 | " ('reduce_dim', PCA(n_components=20)),\n", 597 | " ('classify', SVC(kernel=\"linear\", C=1)),\n", 598 | "])\n", 599 | "\n", 600 | "# Run the pipeline with cross-validation\n", 601 | "ps = PredefinedSplit(run_ids) # Provides train/test indices to split data in train/test sets\n", 602 | "clf_pipe = cross_validate(\n", 603 | " pipe,bold_normalized,labels,cv=ps,\n", 604 | " return_train_score=True\n", 605 | ")\n", 606 | "\n", 607 | "# Print results from this dimensionality reduction technique\n", 608 | "print(clf_pipe)\n", 609 | "print (\"Average Testing Accuracy: %0.2f\" % (np.mean(clf_pipe['test_score'])))" 610 | ] 611 | }, 612 | { 613 | "cell_type": "markdown", 614 | "metadata": {}, 615 | "source": [ 616 | "Print out the data indices that were used for training and testing. Ensure that they are different for each fold. " 617 | ] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": null, 622 | "metadata": {}, 623 | "outputs": [], 624 | "source": [ 625 | "# Print train/test split\n", 626 | "for cv_idx ,(train_index, test_index) in enumerate(ps.split(bold_normalized, labels)):\n", 627 | " print('CV iteration: %s' % cv_idx)\n", 628 | " print('Train_index: ')\n", 629 | " print(train_index)\n", 630 | " print('Test_index: ')\n", 631 | " print(test_index)\n", 632 | "\n", 633 | "# Print results from this dimensionality reduction technique\n", 634 | "print(clf_pipe)\n", 635 | "print (\"Average Testing Accuracy: %0.2f\" % (np.mean(clf_pipe['test_score'])))" 636 | ] 637 | }, 638 | { 639 | "cell_type": "markdown", 640 | "metadata": {}, 641 | "source": [ 642 | "### 4.2 Feature selection: Univariate \n", 643 | "\n", 644 | "We can also use a variety of univariate methods to do feature selection in scikit-learn. One commonly used technique is to compute an ANOVA on the data and pick voxels with large F values. The F value measures the ratio of the variance between conditions (signal) to the variance within condition (noise). You can learn more about the ANOVA here: [ANOVA F-value](http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.f_classif.html). Note that implementing this completely different feature selection approach requires changing only one line in the pipeline, demonstrating the usefulness of this framework." 645 | ] 646 | }, 647 | { 648 | "cell_type": "markdown", 649 | "metadata": {}, 650 | "source": [ 651 | "**Exercise 9:** Implement the pipeline using ANOVA F-value (imported as `f_classif`) and the [`SelectKBest` method](http://scikit-learn.org/stable/modules/feature_selection.html#univariate-feature-selection) pick the top 100 voxels with the highest F values." 652 | ] 653 | }, 654 | { 655 | "cell_type": "code", 656 | "execution_count": null, 657 | "metadata": {}, 658 | "outputs": [], 659 | "source": [ 660 | "# Insert code" 661 | ] 662 | }, 663 | { 664 | "cell_type": "markdown", 665 | "metadata": {}, 666 | "source": [ 667 | "**Novel contribution:** be creative and make one new discovery by adding an analysis, visualization, or optimization. This week we encourage you to implement a different feature selection [approach](https://scikit-learn.org/stable/modules/classes.html#module-sklearn.feature_selection)." 668 | ] 669 | }, 670 | { 671 | "cell_type": "markdown", 672 | "metadata": {}, 673 | "source": [ 674 | "## Contributions \n", 675 | "\n", 676 | "M. Kumar, C. Ellis and N. Turk-Browne produced the initial notebook 02/2018 \n", 677 | "T. Meissner minor edits and added the ICA section \n", 678 | "Q. Lu revise PCA plots, cb colors, code style improvement, leverage exisiting funcs \n", 679 | "H. Zhang added pipeline section, use blockwise normalized data, other edits \n", 680 | "M. Kumar enhanced section introductions. \n", 681 | "K.A. Norman provided suggestions on the overall content and made edits to this notebook. \n", 682 | "C. Ellis implemented comments from cmhn-s19 \n", 683 | "A.K. Sahoo fixed broken links and did minor edits." 684 | ] 685 | }, 686 | { 687 | "cell_type": "code", 688 | "execution_count": null, 689 | "metadata": {}, 690 | "outputs": [], 691 | "source": [] 692 | } 693 | ], 694 | "metadata": { 695 | "anaconda-cloud": {}, 696 | "kernelspec": { 697 | "display_name": "Python 3", 698 | "language": "python", 699 | "name": "python3" 700 | }, 701 | "language_info": { 702 | "codemirror_mode": { 703 | "name": "ipython", 704 | "version": 3 705 | }, 706 | "file_extension": ".py", 707 | "mimetype": "text/x-python", 708 | "name": "python", 709 | "nbconvert_exporter": "python", 710 | "pygments_lexer": "ipython3", 711 | "version": "3.7.4" 712 | } 713 | }, 714 | "nbformat": 4, 715 | "nbformat_minor": 2 716 | } 717 | -------------------------------------------------------------------------------- /tutorials/06-rsa.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Representational Similarity Analysis\n", 8 | "[Contributions](#contributions)\n", 9 | "\n", 10 | "Oranges and orange ping-pong balls are not kept next to each other in the grocery store. The oranges are usually placed with other fruits and ping-pong balls with other sport equipment. This helps us shop easily, as we usually group our shopping by categories: fruits and vegetables, meat and dairy, frozen foods, and, somewhere far away, kitchen supplies, toys and sports. Beyond grocery stores, are these meaningful conceptual groupings in the brain? Are patterns of neural activity for oranges and apples more similar to each other as compared to the patterns for oranges and ping-pong balls?\n", 11 | "\n", 12 | "One could conceivably group items based on other attributes such as shape and color. This would make the neural representations of oranges and orange colored ping-pong balls very similar to each other. In a brain region that cares about color, the neural similarity would be greater for oranges and orange ping-pong balls, compared to oranges and red apples. How can we determine the similarity between neural representations and which attributes are driving this similarity?\n", 13 | "\n", 14 | "Representational similarity analysis (RSA) is a way to compare and contrast different brain states and the stimuli that elicited them. In RSA, we compute a similarity measure (often a correlation) between patterns of neural activity for all items being compared. Then, to examine whether neural patterns in a brain region are grouped by color, shape, or category, we can order the similarity measure based on a model that groups by these attributes.\n", 15 | "\n", 16 | "RSA is a highly versatile tool: it can be used to compare brain activity to models, compare data across brain imaging techniques, and even to make cross-species comparisons. You can learn more about the RSA method [here](https://doi.org/10.3389/neuro.06.004.2008) and [here](https://doi.org/10.1016/j.tics.2013.06.007).\n", 17 | " \n", 18 | "## Goal of this script\n", 19 | "\n", 20 | ">1. Learn how to perform RSA on a dataset\n", 21 | " >> Calculate and plot Pearson and Spearman correlations in ROIs \n", 22 | " >> Order these similarity matrices in a meaningful way \n", 23 | " >> Interpret a (dis)similarity matrix \n", 24 | ">2. Visualize similarity with multi dimensional scaling (MDS)\n", 25 | "\n", 26 | "## Table of Contents \n", 27 | "[1. Prepare for RSA](#preprocessing)\n", 28 | ">[1.1 Load the data for one subject](#load_data) \n", 29 | ">[1.2 Helper Functions ](#helper) \n", 30 | ">[1.3 Visualize the data](#visualize_data) \n", 31 | "\n", 32 | "[2. Create a similarity matrix](#sim_mat) \n", 33 | ">[2.1 Reorder data into categories](#reorder) \n", 34 | ">[2.2 How to read a similarity matrix](#read_sim_mat) \n", 35 | ">[2.3 Representational dissimilarity](#read_dsim_mat) \n", 36 | ">[2.4 Comparison of representations in monkeys and humans](#rdm-monkey-human) \n", 37 | "\n", 38 | "[3. Manifolds and multi-dimensional scaling (MDS)](#mds) \n", 39 | ">[3.1 Plotting RDM in 2d](#plot_2d) \n", 40 | ">[3.2 Plotting RDM in 3d](#plot_3d) \n", 41 | "\n", 42 | "\n", 43 | "Exercises\n", 44 | ">[Exercise 1](#ex1) [2](#ex2) [3](#ex3) [4](#ex4) [5](#ex5) [6](#ex6) [7](#ex7) [8](#ex8) [9](#ex9) [10](#ex10)\n", 45 | "\n", 46 | "[Novel contribution](#novel) " 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "### Dataset \n", 54 | "\n", 55 | "The dataset we will be using for this exercise is from [Kriegeskorte et al. (2008)](https://www.sciencedirect.com/science/article/pii/S0896627308009434), called 'Ninetysix' in the `datasets` folder. 96 visual stimuli, from the 6 categories listed below, were presented to participants. The image stimuli are stored in the subfolder `Stimuli`.\n", 56 | "\n", 57 | "The data have 6 categories: \n", 58 | " 1. artificial inanimate (object/scene)\n", 59 | " 2. human bodypart \n", 60 | " 3. human face \n", 61 | " 4. natural inanimate (object/scene)\n", 62 | " 5. nonhuman bodypart\n", 63 | " 6. nonhuman face\n", 64 | "\n", 65 | "**Self-study:** Explore the data" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "import warnings\n", 75 | "import sys \n", 76 | "if not sys.warnoptions:\n", 77 | " warnings.simplefilter(\"ignore\")\n", 78 | "\n", 79 | "import os\n", 80 | "import numpy as np\n", 81 | "import pandas as pd\n", 82 | "import scipy.io\n", 83 | "from scipy import stats\n", 84 | "from sklearn.manifold import MDS\n", 85 | "import scipy.spatial.distance as sp_distance\n", 86 | "\n", 87 | "import matplotlib.pyplot as plt\n", 88 | "import seaborn as sns \n", 89 | "from mpl_toolkits.mplot3d import Axes3D\n", 90 | "\n", 91 | "# %matplotlib notebook\n", 92 | "%matplotlib inline\n", 93 | "%autosave 5\n", 94 | "sns.set(style = 'white', context='poster', rc={\"lines.linewidth\": 2.5})\n", 95 | "sns.set(palette=\"colorblind\")" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "### 1.1 Load the data for one subject\n", 103 | "\n", 104 | "The data for these participants are stored as a matlab file (it was 2008 after all...). Python is able to load matlab files using a scipy utility. However, the file formats can be tricky and may require transformations to make compatible with typical Python organization. \n", 105 | "\n", 106 | "The data from matlab are stored as a dictionary where the variables in the matlab files are keys in the dictionary:\n", 107 | "\n", 108 | ">The ROI names are listed in **roinames**. \n", 109 | ">The category label indices for each condition are listed in **labels**. \n", 110 | ">The label indices correspond to entries in **categoryNames**. \n", 111 | ">The data for each ROI, in the order of roinames, is stored in **roi_data**. \n", 112 | "\n", 113 | "Each row of roi_data represents a stimulus (as defined in labels) and each column represents a voxel (there will be different numbers of voxels in different ROIs). These data have been preprocessed and each entry is stored in terms of t-values. There is no time information and no baseline signal to be used for reference.\n", 114 | "\n", 115 | "**Self-study:** What do you think these t-values reflect?\n", 116 | "\n", 117 | "The last 4 rows of the dataset have unknown labels (dun dun dunnnnn!). We'll use only the first 92 rows for analysis, for now.\n", 118 | "\n", 119 | "In the analyses that follow we are going to explore the data of subject 'BE'." 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "### 1.2 Helper Functions \n", 127 | "\n", 128 | "To make it easy for you to achieve the main goals of this notebook, we have created helper functions that extract data from matlab files and convert into numpy arrays." 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "from utils import load_data_for_a_subj, digitize_rdm, ns_data_dir" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "# Load the data for one subject\n", 147 | "subj_initials = 'BE'\n", 148 | "data_pack = load_data_for_a_subj(subj_initials)\n", 149 | "images, roi_data, roi_names, n_rois, categories, n_categories, labels, label_dict = data_pack\n", 150 | "n_stimuli = len(images)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "**Exercise 1:** Inspect the data. \n", 158 | "- Print the name of each category.\n", 159 | "- Report the shape of the data for each ROI and what each dimension means" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "# Insert code here" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "### 1.3 Visualize the data \n", 176 | "\n", 177 | "It is always a good idea to plot data as a sanity check before starting analysis. \n", 178 | "\n", 179 | "We also want to see the corresponding labels. Notice the category order is random." 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "plt.figure()\n", 189 | "\n", 190 | "# Label plot\n", 191 | "plt.subplot(2,1,2)\n", 192 | "plt.plot(labels,'.-')\n", 193 | "plt.xlabel('Stimuli', fontsize=16)\n", 194 | "plt.ylabel('Category', fontsize=16)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "
\n", 202 | "Notice that the category order is random i.e. the stimuli at every point are from a different category compared to the neighbors.\n", 203 | "
" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "## 2. Create a similarity matrix \n", 211 | "\n", 212 | "Let's examine the similarity of the neural representations of each image with the neural patterns of every other image in the dataset. If the neural patterns are similar between images, we will see high values of similarity, but if the neural patterns are dissimilar, we will see low values of similarity. \n", 213 | "\n", 214 | "There are many ways to compute similarity. We start with one of the most common measures of similarity that you are already familiar with: Pearson correlation (see notebook-04). We compute the Pearson correlation on the neural pattern for each image with every other image. We can compute this on data for each of the ROIs that we have just loaded (left and right FFA, and left and right PPA). For each ROI, our computation will result in a 92 x 92 matrix (we only have labels for 92 images). This resulting matrix shows how similar the neural patterns of activity are between images and is called the representational **similarity** matrix (RSM)." 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [ 223 | "print('ROI names: ', roi_names)" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "# Choose your ROI here!\n", 233 | "roi_id = 1\n", 234 | "\n", 235 | "# Plot figure of these correlations\n", 236 | "f, ax = plt.subplots(1,1, figsize=(8, 7))\n", 237 | "\n", 238 | "plt.imshow(\n", 239 | " np.corrcoef(roi_data[roi_id]), \n", 240 | " cmap='bwr', \n", 241 | " vmin=-1,\n", 242 | " vmax=1,\n", 243 | ")\n", 244 | "plt.colorbar()\n", 245 | "ax.set_title('RSM, unsorted, %s' % (roi_names[roi_id])) \n", 246 | "ax.set_xlabel('stimuli id')\n", 247 | "ax.set_ylabel('stimuli id')" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "**Exercise 2:** In the plot above you used Pearson correlation to compute similarity. An alternative metric is a Spearman correlation. \n", 255 | "\n", 256 | "- Explain the difference between Pearson and Spearman correlation. \n", 257 | "- Redo the analysis above with Spearman correlation. \n", 258 | "- Visualize the RSM based on Spearman correlation. " 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "**A:**" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [ 274 | "# Insert code here" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "### 2.1 Reorder data into categories \n", 282 | "\n", 283 | "Although the plot above is useful, it is hard to observe any structure because the order of the stimuli is random. To simplify, let's reorganize into label groups." 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "# Add the stimulus condition labels so that we can sort the data, collecting rows from the same condition together.\n", 293 | "sort_ids = labels.argsort()\n", 294 | "lffa_sorted = roi_data[0][sort_ids, :]\n", 295 | "\n", 296 | "plt.figure(figsize=(9,7))\n", 297 | "\n", 298 | "# Plot the new sorted results\n", 299 | "plt.subplot(2,1,1)\n", 300 | "plt.plot(lffa_sorted[:,0])\n", 301 | "plt.ylabel('t-Value', fontsize=16)\n", 302 | "plt.xlim(0, 96)\n", 303 | "\n", 304 | "plt.subplot(2,1,2)\n", 305 | "plt.plot(labels[sort_ids],'.')\n", 306 | "plt.xlabel('Stimuli', fontsize=16)\n", 307 | "plt.ylabel('Category', fontsize=16)\n", 308 | "plt.show()" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [ 317 | "# Choose your ROI here! \n", 318 | "roi_id = 1\n", 319 | "\n", 320 | "# Calculate the RSM\n", 321 | "rsm = np.corrcoef(roi_data[roi_id][sort_ids, :][:92,])\n", 322 | "\n", 323 | "# Plot \n", 324 | "f, ax = plt.subplots(1,1, figsize=(10, 8))\n", 325 | "plt.imshow(\n", 326 | " rsm, \n", 327 | " cmap='bwr', \n", 328 | " vmin=-1,\n", 329 | " vmax=1,\n", 330 | ")\n", 331 | "plt.colorbar()\n", 332 | "\n", 333 | "# Pull out the bin edges between the different categories\n", 334 | "binsize = np.histogram(labels[:92,], 6)[0]\n", 335 | "edges = np.concatenate([np.asarray([0]), np.cumsum(binsize)])[:-1]\n", 336 | "ax.set_xticks(list(np.array(edges)+8))\n", 337 | "ax.set_xticklabels(categories, rotation = 30)\n", 338 | "ax.set_yticks(list(np.array(edges)+8))\n", 339 | "ax.set_yticklabels(categories)\n", 340 | "ax.vlines(edges,0,92)\n", 341 | "ax.hlines(edges,0,92)\n", 342 | "ax.set_title('RSM, sorted, %s' % roi_names[roi_id])" 343 | ] 344 | }, 345 | { 346 | "cell_type": "markdown", 347 | "metadata": {}, 348 | "source": [ 349 | "
\n", 350 | "Binning the data: In Figure 1 of Kriegeskorte et al. (2008), the raw correlation values were binned into ten bins based on the percentile score of the dissimilarity value, and the percentile value was plotted. We have created a function `digitize_rdm` to perform the same calculation here and make the plots similar to Figure 1 in Kriegeskorte et al. (2008). \n", 351 | "
\n", 352 | "
\n", 353 | "Note that Figure 1 was using dissimilarity rather than similarity and that the data were from a different ROI (inferior temporal cortex, or IT). However, we can apply the same function to our RSM, the only difference being that the percentile will be based on similarity. \n", 354 | "
\n", 355 | "
\n", 356 | "The `digitize_rdm` functions works in the following manner: \n", 357 | "
\n", 358 | "
\n", 359 | "1. Create `n_bins` of percentile values.
\n", 360 | "2. Take in the matrix of correlations and reshape it into a single row.
\n", 361 | "3. Determine the percentile value of every correlation point and assign it to a bin (`np.digitize` does this).
\n", 362 | "4. Reshape the assigned percentile values into the original correlation matrix shape.
\n", 363 | "5. Finally, plot the percentile values.
\n", 364 | "
\n", 365 | "
" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": null, 371 | "metadata": {}, 372 | "outputs": [], 373 | "source": [ 374 | "# Plot the RSM\n", 375 | "f, ax = plt.subplots(1,1, figsize=(10, 8))\n", 376 | "plt.imshow(\n", 377 | " digitize_rdm(rsm), cmap='jet', \n", 378 | ")\n", 379 | "plt.colorbar()\n", 380 | "\n", 381 | "# Pull out the bin edges between the different categories\n", 382 | "binsize = np.histogram(labels[:92,], 6)[0]\n", 383 | "edges = np.concatenate([np.asarray([0]), np.cumsum(binsize)])[:-1]\n", 384 | "ax.set_xticks(list(np.array(edges)+8))\n", 385 | "ax.set_xticklabels(categories, rotation = 30)\n", 386 | "ax.set_yticks(list(np.array(edges)+8))\n", 387 | "ax.set_yticklabels(categories)\n", 388 | "ax.vlines(edges,0,92)\n", 389 | "ax.hlines(edges,0,92)\n", 390 | "ax.set_title('RSM digitized %s' % roi_names[roi_id])" 391 | ] 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "metadata": {}, 396 | "source": [ 397 | "**Exercise 3:** This new organization is helpful but could be improved (based on our knowledge of the brain). Order the datapoints so that the categories are as follows: human face, human body part, non-human face, non-human body part, natural inanimate and artificial inanimate. This will make for a nicer looking correlation matrix and will help you see any structure within and between categories. \n", 398 | "\n", 399 | "- Write a function to re-order the data. \n", 400 | "- Recompute the RSM based on the re-ordered data and visualize it.\n", 401 | "- Visualize the digitized RSM using the digitization function provided earlier. " 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": null, 407 | "metadata": {}, 408 | "outputs": [], 409 | "source": [ 410 | "# Insert code here" 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "metadata": {}, 416 | "source": [ 417 | "### 2.2 How to read a similarity matrix \n", 418 | "\n", 419 | "It is important to be able to read a similarity matrix at a glance. There are couple features to look out for and we will highlight these with some toy data.\n", 420 | "\n", 421 | "Imagine a scenario where we have 12 trial types (e.g., images) grouped into 4 categories (e.g., faces, objects, body parts, and scenes). We are going to simulate some data that has similar activity within category but different activity between categories." 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": {}, 427 | "source": [ 428 | "#### 2.2.1. Create simulated data. " 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "trial_types = 12\n", 438 | "n_sim_categories = 4\n", 439 | "repetitions_per_trial = 24\n", 440 | "trial_noise = 0.5 # multiplying factor for the noise for each trial.\n", 441 | "trials_per_category = int(trial_types / n_sim_categories)\n", 442 | "\n", 443 | "# The template per category. One common signal per category. \n", 444 | "# This will be the similarity within category.\n", 445 | "category_templates = np.random.randn(n_sim_categories, repetitions_per_trial)\n", 446 | "\n", 447 | "# Add some noise to each trial and add the category template to each trial.\n", 448 | "# This will create the trial activity.\n", 449 | "trials = np.zeros((trial_types, repetitions_per_trial))\n", 450 | "for category_counter in range(n_sim_categories):\n", 451 | " category_trials = np.random.randn(trials_per_category, repetitions_per_trial) * trial_noise\n", 452 | " for trial_counter in range(trials_per_category):\n", 453 | " trials[(trials_per_category * category_counter) + trial_counter, :] = category_templates[category_counter, :] + category_trials[trial_counter, :]\n", 454 | "\n" 455 | ] 456 | }, 457 | { 458 | "cell_type": "markdown", 459 | "metadata": {}, 460 | "source": [ 461 | "#### 2.2.2. Compute correlation and plot the result" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": null, 467 | "metadata": {}, 468 | "outputs": [], 469 | "source": [ 470 | "# Create the correlation matrix \n", 471 | "sim_r1 = np.corrcoef(trials)\n", 472 | "plt.figure(figsize=(8, 8))\n", 473 | "plt.imshow(sim_r1, \n", 474 | " interpolation='none',\n", 475 | " cmap='bwr', \n", 476 | " vmin=-1,\n", 477 | " vmax=1,\n", 478 | " )\n", 479 | "plt.colorbar()" 480 | ] 481 | }, 482 | { 483 | "cell_type": "markdown", 484 | "metadata": {}, 485 | "source": [ 486 | "In the above plot you should see some clear blocking structure along the diagonal: items within a category are more similar to one another than they are to items in different categories." 487 | ] 488 | }, 489 | { 490 | "cell_type": "markdown", 491 | "metadata": {}, 492 | "source": [ 493 | "#### 2.2.3. Make two categories similar and observe changes in the similarity matrix \n", 494 | "\n", 495 | "Below we create a plot where there is off-diagonal structure. High similarity in off-diagonal parts of a similarity matrix means that elements that are far apart in the ordering have similar structure. In this toy simulation we create an example where the first and third categories are similar to one another (i.e., faces and body parts)." 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": null, 501 | "metadata": {}, 502 | "outputs": [], 503 | "source": [ 504 | "# Overwrite the template for the 3rd category with the template for 1st category. \n", 505 | "# Python indexing begins at [0].\n", 506 | "category_templates[2, :] = category_templates[0, :]\n", 507 | "\n", 508 | "# Create the trial activity\n", 509 | "trials = np.zeros((trial_types, repetitions_per_trial))\n", 510 | "for category_counter in range(n_sim_categories):\n", 511 | " category_trials = np.random.randn(trials_per_category, repetitions_per_trial) * trial_noise\n", 512 | " for trial_counter in range(trials_per_category):\n", 513 | " trials[(trials_per_category * category_counter) + trial_counter, :] = category_templates[category_counter, :] + category_trials[trial_counter, :]\n", 514 | "\n", 515 | "# Create the correlation matrix \n", 516 | "sim_r2 = np.corrcoef(trials)\n", 517 | "plt.figure(figsize=(8, 8))\n", 518 | "plt.imshow(sim_r2, \n", 519 | " interpolation='none',\n", 520 | " cmap='bwr', \n", 521 | " vmin=-1,\n", 522 | " vmax=1,\n", 523 | " )\n", 524 | "plt.colorbar()" 525 | ] 526 | }, 527 | { 528 | "cell_type": "markdown", 529 | "metadata": {}, 530 | "source": [ 531 | "**Exercise 4:** Now use your new knowledge about reading a similarity matrix to interpret the matrix you created for the real data [Exercise 3](#ex3)." 532 | ] 533 | }, 534 | { 535 | "cell_type": "markdown", 536 | "metadata": {}, 537 | "source": [ 538 | "**A:**" 539 | ] 540 | }, 541 | { 542 | "cell_type": "markdown", 543 | "metadata": {}, 544 | "source": [ 545 | "### 2.3 Representational dissimilarity \n", 546 | "\n", 547 | "The previous analyses framed everything in terms of similarity between the items. However people sometimes prefer to consider this type of data in terms of dissimilarity. This close cousin of the similarity matrix is called the representational dissimilarity matrix (RDM). The dissimilarity matrix is computed simply as 1 - correlation. " 548 | ] 549 | }, 550 | { 551 | "cell_type": "markdown", 552 | "metadata": {}, 553 | "source": [ 554 | "**Exercise 5:** Plot the RDM for the right FFA ROI using the new order as you created in [Exercise 3](#ex3)." 555 | ] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "execution_count": null, 560 | "metadata": {}, 561 | "outputs": [], 562 | "source": [ 563 | "# Insert code here" 564 | ] 565 | }, 566 | { 567 | "cell_type": "markdown", 568 | "metadata": {}, 569 | "source": [ 570 | "**Exercise 6:** For RDM plots based on correlation values, what does an RDM value greater than 1 correspond to in terms of a correlation?" 571 | ] 572 | }, 573 | { 574 | "cell_type": "markdown", 575 | "metadata": {}, 576 | "source": [ 577 | "**A:**" 578 | ] 579 | }, 580 | { 581 | "cell_type": "markdown", 582 | "metadata": {}, 583 | "source": [ 584 | "### 2.4 Comparison of representations in monkeys and humans \n", 585 | "\n", 586 | "The RSA can be used to compare information not just in humans, but across species too. Below is comparison of the RDM for monkeys and humans, in the inferior temporal cortex (Color map altered to match Figure 1 in Kriegeskorte et al. (2008))." 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": null, 592 | "metadata": {}, 593 | "outputs": [], 594 | "source": [ 595 | "# Load the data, and bin to percentile\n", 596 | "monkeyRDM = pd.read_csv(os.path.join(ns_data_dir, 'RDM_mIT_fig1.txt'), header=None)\n", 597 | "humanRDM = pd.read_csv(os.path.join(ns_data_dir, 'RDM_hIT_fig1.txt'), header=None)\n", 598 | "\n", 599 | "monkey_rdm_digitized = digitize_rdm(monkeyRDM)\n", 600 | "human_rdm_digitized = digitize_rdm(humanRDM)\n", 601 | "\n", 602 | "f, axes = plt.subplots(1,2, figsize = (14, 6))\n", 603 | "axes[0].imshow(\n", 604 | " monkey_rdm_digitized, \n", 605 | " cmap='jet', \n", 606 | ")\n", 607 | "axes[1].imshow(\n", 608 | " human_rdm_digitized, \n", 609 | " cmap='jet', \n", 610 | ")\n", 611 | "# plt.colorbar()\n", 612 | "axes[0].set_title('Monkey RDM')\n", 613 | "axes[1].set_title('Human RDM')\n", 614 | "\n", 615 | "#for i in range(2): \n", 616 | "# axes[i].set_xlabel('stimuli id')\n", 617 | "# axes[i].set_ylabel('stimuli id')" 618 | ] 619 | }, 620 | { 621 | "cell_type": "markdown", 622 | "metadata": {}, 623 | "source": [ 624 | "## 3. Multi-dimensional scaling (MDS) \n", 625 | "\n", 626 | "The correlation matrix for the 92 images describes how similar each item is to each other item. This means that if two items have a high positive correlation then they can be thought of as eliciting a very similar activation pattern across voxels. We can reframe this to be thought of as a distance in a high-dimensional space. From this perspective, items that are similar to one another will be grouped close together and far away from points that they are dissimilar to. \n", 627 | "\n", 628 | "MDS allows us to visualize the similarity of our data in a different way than plotting the matrices above. Specifically, it allows to generate a lower-dimensional image (e.g., 2-D or 3-D) in which the distances between points approximate the distances in the original high-dimensional data. There is an MDS [method](https://homepages.uni-tuebingen.de/florian.wickelmaier/pubs/Wickelmaier2003SQRU.pdf) built into [scikit-learn](http://scikit-learn.org/stable/modules/manifold.html#multidimensional-scaling)." 629 | ] 630 | }, 631 | { 632 | "cell_type": "code", 633 | "execution_count": null, 634 | "metadata": {}, 635 | "outputs": [], 636 | "source": [ 637 | "# Create a 2-D MDS\n", 638 | "mds = MDS(n_components=2, dissimilarity=\"precomputed\", random_state=0) # Create the MDS object\n", 639 | "results = mds.fit(digitize_rdm(1 - rsm)) # Use the dissimilarity matrix" 640 | ] 641 | }, 642 | { 643 | "cell_type": "markdown", 644 | "metadata": {}, 645 | "source": [ 646 | "**Exercise 7:** How does changing the order of the data (e.g., shuffling the rows/columns) in your RDM affect the distance between points calculated by MDS? " 647 | ] 648 | }, 649 | { 650 | "cell_type": "markdown", 651 | "metadata": {}, 652 | "source": [ 653 | "**A:**" 654 | ] 655 | }, 656 | { 657 | "cell_type": "markdown", 658 | "metadata": {}, 659 | "source": [ 660 | "### 3.1 Plot the 2D structure of the RDM \n", 661 | "\n", 662 | "We'll plot the 92 images on a \"map\" signifying how close or far apart images are to each other. We use different colors for the 6 categories of images." 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": null, 668 | "metadata": {}, 669 | "outputs": [], 670 | "source": [ 671 | "coords = results.embedding_\n", 672 | "\n", 673 | "plt.figure(figsize=(10, 7))\n", 674 | "for label_counter in np.unique(labels[:92]):\n", 675 | " label_idxs = (labels[:92] == label_counter)[:]\n", 676 | " plt.scatter(\n", 677 | " coords[label_idxs, 0], coords[label_idxs, 1], \n", 678 | " marker = 'o', s = 50\n", 679 | " )\n", 680 | "plt.legend(categories, bbox_to_anchor=(1, .8), loc=\"upper left\")\n", 681 | "plt.title('MDS, 2D');" 682 | ] 683 | }, 684 | { 685 | "cell_type": "markdown", 686 | "metadata": {}, 687 | "source": [ 688 | "**Self-study**: On the MDS plot you are currently plotting each item as a point. You could instead load in each image and plot that image on the MDS plot directly to get a feel for which stimuli are being grouped." 689 | ] 690 | }, 691 | { 692 | "cell_type": "markdown", 693 | "metadata": {}, 694 | "source": [ 695 | "### 3.2 Plot the 3D structure of the RDM \n", 696 | "\n", 697 | "MDS is just trying to find a k-dimensional embedding that minimizes the stress (something akin to the goodness of fit). This means we can actually plot it in arbitrarily high dimensions to try and capture the data structure. Below we make a 3D plot, given that the 2D plot." 698 | ] 699 | }, 700 | { 701 | "cell_type": "code", 702 | "execution_count": null, 703 | "metadata": {}, 704 | "outputs": [], 705 | "source": [ 706 | "# Multi-dimensional scaling\n", 707 | "mds = MDS(n_components=3, dissimilarity=\"precomputed\", random_state=0)\n", 708 | "results = mds.fit(digitize_rdm(1 - rsm))\n", 709 | "\n", 710 | "coords = results.embedding_\n", 711 | "\n", 712 | "fig = plt.figure(figsize=(10, 7))\n", 713 | "ax = fig.add_subplot(111, projection='3d')\n", 714 | "for label_counter in np.unique(labels[:92]):\n", 715 | " label_idxs = (labels[:92] == label_counter)[:]\n", 716 | " ax.scatter(\n", 717 | " coords[label_idxs, 0], coords[label_idxs, 1], coords[label_idxs, 2], \n", 718 | " marker = 'o', s = 50\n", 719 | " )\n", 720 | " \n", 721 | "plt.legend(categories, bbox_to_anchor=(1,.7), loc=\"upper left\")\n", 722 | "plt.title('MDS, 3D')\n", 723 | "plt.tight_layout()" 724 | ] 725 | }, 726 | { 727 | "cell_type": "markdown", 728 | "metadata": {}, 729 | "source": [ 730 | "There are tools available to us to evaluate what is the appropriate dimensionality for visualization of our data (i.e. what dimensionality has sufficiently low stress). We can look at the stress of the MDS with different numbers of components (i.e. different dimensionality) and determine what dimensionality of the data is appropriate.\n", 731 | "\n", 732 | "Let's make a toy problem to start off with in order to get a handle on what this should look like. We are going to make points that lie on a line in 3D space. Because a line has only one dimension of information (along its length) the data ought to be able to be reduced in dimensionality to a single dimension. We will run MDS on this data to see if that is the case." 733 | ] 734 | }, 735 | { 736 | "cell_type": "code", 737 | "execution_count": null, 738 | "metadata": {}, 739 | "outputs": [], 740 | "source": [ 741 | "coords = np.linspace(1, 30, 30)\n", 742 | "coords = np.vstack((coords, coords, coords)).T\n", 743 | "\n", 744 | "fig = plt.figure()\n", 745 | "ax = fig.add_subplot(111, projection='3d')\n", 746 | "ax.scatter(coords[:, 0], coords[:, 1], coords[:, 2])" 747 | ] 748 | }, 749 | { 750 | "cell_type": "code", 751 | "execution_count": null, 752 | "metadata": {}, 753 | "outputs": [], 754 | "source": [ 755 | "# Calculate the euclidean distance of every point from every other point\n", 756 | "dist = sp_distance.squareform(sp_distance.pdist(coords))\n", 757 | "\n", 758 | "# Iterate through different numbers of components\n", 759 | "stress = []\n", 760 | "for components in range(1, 6):\n", 761 | " mds = MDS(n_components=components, dissimilarity=\"precomputed\", random_state=0)\n", 762 | " # Pull out the stress of the MDS fit\n", 763 | " stress.append(mds.fit(dist).stress_)\n", 764 | "\n", 765 | "# Plot the stress\n", 766 | "plt.figure()\n", 767 | "plt.plot(range(1, 6), stress)\n", 768 | "plt.ylabel('Stress')\n", 769 | "plt.xlabel('Dimensionality')" 770 | ] 771 | }, 772 | { 773 | "cell_type": "markdown", 774 | "metadata": {}, 775 | "source": [ 776 | "**Exercise 8:** It is not typical for data to be able to be described accurately in low dimensionality: stress generally decreases with the number of components. Perform a similar analysis below to estimate the appropriate dimensionality to visualize the RDM of the right FFA data from this participant. What is the appropriate lower dimensional representation of the data? Note: Make sure you don't calculate the stress metric from the MDS embedding, calculate the MDS fit from the RDM.\n", 777 | "\n", 778 | "MDS documentation: https://scikit-learn.org/stable/modules/generated/sklearn.manifold.MDS.html \n", 779 | "\n", 780 | "Here's the list of arguments for MDS: \n", 781 | "```\n", 782 | "MDS(n_components=2, metric=True, n_init=4, max_iter=300, verbose=0, eps=0.001, n_jobs=None, random_state=None, dissimilarity=’euclidean’)\n", 783 | "```\n", 784 | "\n", 785 | "Empirically, more stringent convergence criteria (i.e. large `n_init` and `max_iter`, small `eps`) will lead to more stable results. " 786 | ] 787 | }, 788 | { 789 | "cell_type": "markdown", 790 | "metadata": {}, 791 | "source": [ 792 | "**A:**" 793 | ] 794 | }, 795 | { 796 | "cell_type": "code", 797 | "execution_count": null, 798 | "metadata": {}, 799 | "outputs": [], 800 | "source": [ 801 | "# Insert code here" 802 | ] 803 | }, 804 | { 805 | "cell_type": "markdown", 806 | "metadata": {}, 807 | "source": [ 808 | "**Exercise 9:** Compute RDMs and create MDS plots for the left PPA and right PPA using the reordering you created above." 809 | ] 810 | }, 811 | { 812 | "cell_type": "code", 813 | "execution_count": null, 814 | "metadata": {}, 815 | "outputs": [], 816 | "source": [ 817 | "# Insert code here" 818 | ] 819 | }, 820 | { 821 | "cell_type": "markdown", 822 | "metadata": {}, 823 | "source": [ 824 | "**Exercise 10:** The last four rows in the dataset for subject BE have unmarked labels. One of them is a human face. Using the techniques outlined here and your knowledge of the category preference of each ROI, analyze the data to make a best guess of which one of the 4 missing labels is a human face. Show your work and reasoning that led you to this conclusion. Hint: It will help to visualize these 4 points amongst the points you already have. It would also help to compare the response of each of the missing data points with a 'canonical' face response." 825 | ] 826 | }, 827 | { 828 | "cell_type": "code", 829 | "execution_count": null, 830 | "metadata": {}, 831 | "outputs": [], 832 | "source": [ 833 | "# Insert code here" 834 | ] 835 | }, 836 | { 837 | "cell_type": "markdown", 838 | "metadata": {}, 839 | "source": [ 840 | "**Novel contribution:** be creative and make one new discovery by adding an analysis, visualization, or optimization.\n", 841 | "\n", 842 | "Some examples: \n", 843 | "\n", 844 | "- visualize the average RDM across subjects\n", 845 | "- compare the empirical RDM to some theoretical RDM\n", 846 | "- check the consistency between hierarchical clustering vs. ground truth label ordering \n", 847 | "- use other dimensionality reduction methods to visualize the data (PCA, tSNE, etc.)\n", 848 | "- perform some classification on this data set\n", 849 | "- apply RSA on previous datasets (e.g. VDC, the simulated dataset used in the 1st notebook)\n", 850 | "- apply RSA on previous datasets (e.g., VDC)\n", 851 | "- even better, your own ambitious ideas! " 852 | ] 853 | }, 854 | { 855 | "cell_type": "markdown", 856 | "metadata": {}, 857 | "source": [ 858 | "## Contributions \n", 859 | "\n", 860 | "M. Kumar, C. Ellis and N. Turk-Browne produced the initial notebook. 02/2018 \n", 861 | "The mystery label exercise was borrowed from a matlab version created by F. Pereira. \n", 862 | "T. Meissner minor edits. \n", 863 | "Q. Lu plot aesthetics, digitization func, modularize funcs, re-ordering, replicate the original paper. \n", 864 | "K.A. Norman provided suggestions on the overall content and made edits to this notebook. \n", 865 | "C. Ellis implemented updates from cmhn-s19.
\n", 866 | "X. Li edited stimuli from continuous line to discrete dots in section 1.3 and 2.1" 867 | ] 868 | } 869 | ], 870 | "metadata": { 871 | "anaconda-cloud": {}, 872 | "kernelspec": { 873 | "display_name": "Python 3", 874 | "language": "python", 875 | "name": "python3" 876 | }, 877 | "language_info": { 878 | "codemirror_mode": { 879 | "name": "ipython", 880 | "version": 3 881 | }, 882 | "file_extension": ".py", 883 | "mimetype": "text/x-python", 884 | "name": "python", 885 | "nbconvert_exporter": "python", 886 | "pygments_lexer": "ipython3", 887 | "version": "3.7.4" 888 | } 889 | }, 890 | "nbformat": 4, 891 | "nbformat_minor": 2 892 | } -------------------------------------------------------------------------------- /tutorials/07-searchlight/avg18_whole_brain_SL.nii.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brainiak/brainiak-tutorials/fb62ede943d9694fe703aee0df5f43ecf5558415/tutorials/07-searchlight/avg18_whole_brain_SL.nii.gz -------------------------------------------------------------------------------- /tutorials/07-searchlight/avg3_whole_brain_SL.nii.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brainiak/brainiak-tutorials/fb62ede943d9694fe703aee0df5f43ecf5558415/tutorials/07-searchlight/avg3_whole_brain_SL.nii.gz -------------------------------------------------------------------------------- /tutorials/07-searchlight/rank_whole_brain_SL.nii.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brainiak/brainiak-tutorials/fb62ede943d9694fe703aee0df5f43ecf5558415/tutorials/07-searchlight/rank_whole_brain_SL.nii.gz -------------------------------------------------------------------------------- /tutorials/07-searchlight/run_searchlight.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | # Input python command to be submitted as a job 3 | 4 | #SBATCH --output=../logs/searchlight-%j.out 5 | #SBATCH --job-name searchlight 6 | #SBATCH -t 30 # time limit: how many minutes 7 | #SBATCH --mem=4G # memory limit 8 | #SBATCH -n 2 # how many cores to use 9 | 10 | # Set up the environment 11 | source ../setup_environment.sh 12 | 13 | # Run the python script (use mpi if running on the cluster) 14 | if [ $configuration == "cluster" ] 15 | then 16 | srun --mpi=pmi2 python ./searchlight.py 17 | else 18 | python ./searchlight.py 19 | fi 20 | -------------------------------------------------------------------------------- /tutorials/07-searchlight/searchlight.py: -------------------------------------------------------------------------------- 1 | # Run a whole brain searchlight 2 | 3 | # Import libraries 4 | import nibabel as nib 5 | import numpy as np 6 | from mpi4py import MPI 7 | from brainiak.searchlight.searchlight import Searchlight 8 | from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV 9 | from sklearn.svm import SVC 10 | from scipy.spatial.distance import euclidean 11 | import os 12 | import pickle 13 | from utils import results_path 14 | 15 | # Import additional libraries you need 16 | fs_data_dir = os.path.expanduser(results_path + '/searchlight_data') 17 | 18 | num_subj = 3 19 | 20 | # Load and perpare data for one subject 21 | def load_fs_data(sub_id, mask=''): 22 | # find file path 23 | sub = 'sub-%.2d' % (sub_id) 24 | input_dir = os.path.join(fs_data_dir, sub) 25 | data_file = os.path.join(input_dir, 'data.nii.gz') 26 | 27 | if mask == '': 28 | mask_file = os.path.join(fs_data_dir, 'wb_mask.nii.gz') 29 | else: 30 | mask_file = os.path.join(fs_data_dir, '{}_mask.nii.gz'.format(mask)) 31 | 32 | # load bold data and some header information so that we can save searchlight results later 33 | data_file = nib.load(data_file) 34 | bold_data = data_file.get_data() 35 | affine_mat = data_file.affine 36 | dimsize = data_file.header.get_zooms() 37 | 38 | # load mask 39 | brain_mask = nib.load(mask_file) 40 | brain_mask = brain_mask.get_data() 41 | 42 | return bold_data, brain_mask, affine_mat, dimsize 43 | 44 | def load_fs_label(sub_id, mask=''): 45 | # find file path 46 | sub = 'sub-%.2d' % (sub_id) 47 | input_dir = os.path.join(fs_data_dir, sub) 48 | label_file = os.path.join(input_dir, 'label.npz') 49 | # load label 50 | label = np.load(label_file) 51 | label = label['label'] 52 | return label 53 | 54 | # Data Path 55 | data_path = os.path.expanduser(results_path + '/searchlight_results') 56 | # if not os.path.exists(data_path): 57 | # os.makedirs(data_path) 58 | 59 | # Pull out the MPI information 60 | comm = MPI.COMM_WORLD 61 | rank = comm.rank 62 | size = comm.size 63 | 64 | # load mask 65 | mask_file = os.path.join(fs_data_dir, 'wb_mask.nii.gz') 66 | mask = nib.load(mask_file) 67 | mask = mask.get_data() 68 | 69 | # Loop over subjects 70 | data = [] 71 | bcvar = [] 72 | for sub_id in range(1,num_subj+1): 73 | if rank == 0: 74 | data_i, mask, affine_mat, dimsize = load_fs_data(sub_id) 75 | data.append(data_i) 76 | else: 77 | data.append(None) 78 | bcvar_i = load_fs_label(sub_id) 79 | bcvar.append(bcvar_i) 80 | 81 | sl_rad = 1 82 | max_blk_edge = 5 83 | pool_size = 1 84 | 85 | coords = np.where(mask) 86 | 87 | 88 | # Create the searchlight object 89 | sl = Searchlight(sl_rad=sl_rad,max_blk_edge=max_blk_edge) 90 | # print("Setup searchlight inputs") 91 | # print("Number of subjects: " + str(len(data))) 92 | # print("Input data shape: " + str(data[0].shape)) 93 | # print("Input mask shape: " + str(mask.shape) + "\n") 94 | 95 | # Distribute the information to the searchlights (preparing it to run) 96 | sl.distribute(data, mask) 97 | 98 | # Broadcast variables 99 | sl.broadcast(bcvar) 100 | 101 | # Set up the kernel function, in this case an SVM 102 | def calc_svm(data, sl_mask, myrad, bcvar): 103 | accuracy = [] 104 | sl_num_vx = sl_mask.shape[0] * sl_mask.shape[1] * sl_mask.shape[2] 105 | num_epoch = data[0].shape[3] 106 | # Loop over subjects to leave each subject out once: 107 | for idx in range(len(data)): 108 | # Pull out the data 109 | # Testing data 110 | data4D_test = data[idx] 111 | labels_test = bcvar[idx] 112 | bolddata_sl_test = data4D_test.reshape(sl_num_vx, num_epoch).T 113 | 114 | # Training data 115 | labels_train = [] 116 | bolddata_sl_train = np.empty((0, sl_num_vx)) 117 | for train_id in range(len(data)): 118 | if train_id != idx: 119 | labels_train.extend(list(bcvar[train_id])) 120 | bolddata_sl_train = np.concatenate((bolddata_sl_train, data[train_id].reshape(sl_num_vx, num_epoch).T)) 121 | labels_train = np.array(labels_train) 122 | 123 | # Train classifier 124 | clf = SVC(kernel='linear', C=1) 125 | clf.fit(bolddata_sl_train, labels_train) 126 | 127 | # Test classifier 128 | score = clf.score(bolddata_sl_test, labels_test) 129 | accuracy.append(score) 130 | 131 | return accuracy 132 | 133 | # Run the searchlight analysis 134 | print("Begin SearchLight in rank %s\n" % rank) 135 | all_sl_result = sl.run_searchlight(calc_svm, pool_size=pool_size) 136 | print("End SearchLight in rank %s\n" % rank) 137 | 138 | # Only save the data if this is the first core 139 | if rank == 0: 140 | all_sl_result = all_sl_result[mask==1] 141 | all_sl_result = [num_subj*[0] if not n else n for n in all_sl_result] # replace all None 142 | # The average result 143 | avg_vol = np.zeros((mask.shape[0], mask.shape[1], mask.shape[2])) 144 | 145 | # Loop over subjects 146 | for sub_id in range(1,num_subj+1): 147 | sl_result = [r[sub_id-1] for r in all_sl_result] 148 | # reshape 149 | result_vol = np.zeros((mask.shape[0], mask.shape[1], mask.shape[2])) 150 | result_vol[coords[0], coords[1], coords[2]] = sl_result 151 | # Convert the output into what can be used 152 | result_vol = result_vol.astype('double') 153 | result_vol[np.isnan(result_vol)] = 0 # If there are nans we want this 154 | # Add the processed result_vol into avg_vol 155 | avg_vol += result_vol 156 | # Save the volume 157 | output_name = os.path.join(data_path, 'subj%s_whole_brain_SL.nii.gz' % (sub_id)) 158 | sl_nii = nib.Nifti1Image(result_vol, affine_mat) 159 | hdr = sl_nii.header 160 | hdr.set_zooms((dimsize[0], dimsize[1], dimsize[2])) 161 | nib.save(sl_nii, output_name) # Save 162 | 163 | # Save the average result 164 | output_name = os.path.join(data_path, 'avg%s_whole_brain_SL.nii.gz' % (num_subj)) 165 | sl_nii = nib.Nifti1Image(avg_vol/num_subj, affine_mat) 166 | hdr = sl_nii.header 167 | hdr.set_zooms((dimsize[0], dimsize[1], dimsize[2])) 168 | nib.save(sl_nii, output_name) # Save 169 | 170 | print('Finished searchlight') -------------------------------------------------------------------------------- /tutorials/07-searchlight/searchlight_rank.py: -------------------------------------------------------------------------------- 1 | # Run a whole brain searchlight 2 | 3 | # Import libraries 4 | import nibabel as nib 5 | import numpy as np 6 | from mpi4py import MPI 7 | from brainiak.searchlight.searchlight import Searchlight 8 | from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV 9 | from sklearn.svm import SVC 10 | from scipy.spatial.distance import euclidean 11 | import os 12 | import pickle 13 | 14 | # Import additional libraries you need 15 | fs_data_dir = os.path.expanduser('~/searchlight_data') 16 | 17 | num_subj = 1 18 | 19 | # Load and perpare data for one subject 20 | def load_fs_data(sub_id, mask=''): 21 | # find file path 22 | sub = 'sub-%.2d' % (sub_id) 23 | input_dir = os.path.join(fs_data_dir, sub) 24 | data_file = os.path.join(input_dir, 'data.nii.gz') 25 | label_file = os.path.join(input_dir, 'label.npz') 26 | if mask == '': 27 | mask_file = os.path.join(fs_data_dir, 'wb_mask.nii.gz') 28 | else: 29 | mask_file = os.path.join(fs_data_dir, '{}_mask.nii.gz'.format(mask)) 30 | 31 | # load bold data and some header information so that we can save searchlight results later 32 | data_file = nib.load(data_file) 33 | bold_data = data_file.get_data() 34 | affine_mat = data_file.affine 35 | dimsize = data_file.header.get_zooms() 36 | 37 | # load label 38 | label = np.load(label_file) 39 | label = label['label'] 40 | 41 | # load mask 42 | brain_mask = nib.load(mask_file) 43 | brain_mask = brain_mask.get_data() 44 | 45 | return bold_data, label, brain_mask, affine_mat, dimsize 46 | 47 | # Data Path 48 | data_path = os.path.expanduser('~/searchlight_results') 49 | # if not os.path.exists(data_path): 50 | # os.makedirs(data_path) 51 | 52 | # Loop over subjects 53 | data = [] 54 | bcvar = [] 55 | for sub_id in range(1,num_subj+1): 56 | data_i, bcvar_i, mask, affine_mat, dimsize = load_fs_data(sub_id) 57 | data.append(data_i) 58 | bcvar.append(bcvar_i) 59 | 60 | sl_rad = 1 61 | max_blk_edge = 5 62 | pool_size = 1 63 | 64 | coords = np.where(mask) 65 | 66 | # Pull out the MPI information 67 | comm = MPI.COMM_WORLD 68 | rank = comm.rank 69 | size = comm.size 70 | 71 | # Create the searchlight object 72 | sl = Searchlight(sl_rad=sl_rad,max_blk_edge=max_blk_edge) 73 | print("Setup searchlight inputs") 74 | print("Number of subjects: " + str(len(data))) 75 | print("Input data shape: " + str(data[0].shape)) 76 | print("Input mask shape: " + str(mask.shape) + "\n") 77 | 78 | # Distribute the information to the searchlights (preparing it to run) 79 | sl.distribute(data, mask) 80 | 81 | # Broadcast variables 82 | sl.broadcast(bcvar) 83 | 84 | # Set up the kernel function, in this case an SVM 85 | def calc_rank(data, sl_mask, myrad, bcvar): 86 | # Pull out the MPI information 87 | comm = MPI.COMM_WORLD 88 | rank = comm.rank 89 | size = comm.size 90 | return [rank] 91 | 92 | # Run the searchlight analysis 93 | print("Begin SearchLight in rank %s\n" % rank) 94 | all_sl_result = sl.run_searchlight(calc_rank, pool_size=pool_size) 95 | print("End SearchLight in rank %s\n" % rank) 96 | 97 | # Only save the data if this is the first core 98 | if rank == 0: 99 | all_sl_result = all_sl_result[mask==1] 100 | all_sl_result = [num_subj*[0] if not n else n for n in all_sl_result] # replace all None 101 | 102 | # Loop over subjects 103 | for sub_id in range(1,num_subj+1): 104 | sl_result = [r[sub_id-1] for r in all_sl_result] 105 | # reshape 106 | result_vol = np.zeros((mask.shape[0], mask.shape[1], mask.shape[2])) 107 | result_vol[coords[0], coords[1], coords[2]] = sl_result 108 | # Convert the output into what can be used 109 | result_vol = result_vol.astype('double') 110 | result_vol[np.isnan(result_vol)] = 0 # If there are nans we want this 111 | # Save the volume 112 | output_name = os.path.join(data_path, 'rank_whole_brain_SL.nii.gz' ) 113 | sl_nii = nib.Nifti1Image(result_vol, affine_mat) 114 | hdr = sl_nii.header 115 | hdr.set_zooms((dimsize[0], dimsize[1], dimsize[2])) 116 | nib.save(sl_nii, output_name) # Save 117 | 118 | print('Finished searchlight') -------------------------------------------------------------------------------- /tutorials/07-searchlight/searchlight_single_subject.py: -------------------------------------------------------------------------------- 1 | # Run a whole brain searchlight on a single subject in the VDC dataset 2 | 3 | # Import libraries 4 | import nibabel as nib 5 | import numpy as np 6 | from mpi4py import MPI 7 | from brainiak.searchlight.searchlight import Searchlight 8 | from sklearn.model_selection import PredefinedSplit 9 | from sklearn.svm import LinearSVC 10 | from scipy.stats import zscore 11 | import os, sys 12 | 13 | # Import additional libraries you need 14 | sys.path.append('../') 15 | # load some helper functions 16 | from utils import load_vdc_mask, load_vdc_epi_data, load_vdc_stim_labels, label2TR, shift_timing 17 | # load some constants 18 | from utils import vdc_data_dir, results_path,vdc_all_ROIs, vdc_label_dict, vdc_n_runs, vdc_hrf_lag, vdc_TR, vdc_TRs_run 19 | 20 | # parameters 21 | sub = 'sub-01' 22 | roi_name = 'FFA' 23 | 24 | # Pull out the MPI information 25 | comm = MPI.COMM_WORLD 26 | rank = comm.rank 27 | size = comm.size 28 | 29 | # Output data Path 30 | output_path = os.path.join(results_path,'searchlight_results') 31 | if rank == 0: 32 | if not os.path.exists(output_path): 33 | os.makedirs(output_path) 34 | 35 | # load mask of the subject 36 | mask = load_vdc_mask(roi_name, sub) 37 | mask = mask.get_data() 38 | coords = np.where(mask) 39 | 40 | # load labels of the subject in all ranks 41 | stim_label_allruns = load_vdc_stim_labels(sub) 42 | stim_label_TR = label2TR(stim_label_allruns, vdc_n_runs, vdc_TR, vdc_TRs_run) 43 | shift_size = int(vdc_hrf_lag / vdc_TR) 44 | label = shift_timing(stim_label_TR, shift_size) 45 | # extract non-zero labels 46 | label_index = np.squeeze(np.nonzero(label)) 47 | # Pull out the indexes 48 | labels = label[label_index] 49 | 50 | # get run ids (works similarity to cv_ids) 51 | run_ids = stim_label_allruns[5,:] - 1 52 | # split data according to run ids 53 | ps = PredefinedSplit(run_ids) 54 | 55 | # Same them as the broadcast variables 56 | bcvar = [labels, ps] 57 | 58 | # load the data in rank 0 59 | if rank == 0: 60 | # Make a function to load the data 61 | def load_data(directory, subject_name): 62 | # Cycle through the runs 63 | for run in range(1, vdc_n_runs + 1): 64 | epi_data = load_vdc_epi_data(subject_name, run) 65 | bold_data = epi_data.get_data() 66 | affine_mat = epi_data.affine 67 | dimsize = epi_data.header.get_zooms() 68 | # Concatenate the data 69 | if run == 1: 70 | concatenated_data = bold_data 71 | else: 72 | concatenated_data = np.concatenate((concatenated_data, bold_data), axis=-1) 73 | return concatenated_data, affine_mat, dimsize 74 | 75 | data, affine_mat, dimsize = load_data(vdc_data_dir, sub) 76 | # extract bold data for non-zero labels 77 | data = data[:, :, :, label_index] 78 | # normalize the data within each run 79 | for r in range(vdc_n_runs): 80 | data[:, :, :, run_ids==r] = np.nan_to_num(zscore(data[:, :, :, run_ids==r], axis=3)) 81 | else: 82 | data = None 83 | 84 | # Set parameters 85 | sl_rad = 1 86 | max_blk_edge = 5 87 | pool_size = 1 88 | 89 | # Create the searchlight object 90 | sl = Searchlight(sl_rad=sl_rad,max_blk_edge=max_blk_edge) 91 | 92 | # Distribute the information to the searchlights (preparing it to run) 93 | sl.distribute([data], mask) 94 | 95 | # Broadcast variables 96 | sl.broadcast(bcvar) 97 | 98 | # Set up the kernel function, in this case an SVM 99 | def calc_svm(data, sl_mask, myrad, bcvar): 100 | if np.sum(sl_mask) < 14: 101 | return -1 102 | scores = [] 103 | labels, ps = bcvar[0], bcvar[1] 104 | 105 | # Reshape the data 106 | sl_num_vx = sl_mask.shape[0] * sl_mask.shape[1] * sl_mask.shape[2] 107 | num_epoch = data[0].shape[3] 108 | data_sl = data[0].reshape(sl_num_vx, num_epoch).T 109 | 110 | # Classifier: loop over all runs to leave each run out once 111 | model = LinearSVC() 112 | for train_index, test_index in ps.split(): 113 | X_train, X_test = data_sl[train_index], data_sl[test_index] 114 | y_train, y_test = labels[train_index], labels[test_index] 115 | # Fit a svm 116 | model.fit(X_train, y_train) 117 | # Calculate the accuracy for the hold out run 118 | score = model.score(X_test, y_test) 119 | scores.append(score) 120 | 121 | return np.mean(scores) 122 | 123 | # Run the searchlight analysis 124 | print("Begin SearchLight in rank %s\n" % rank) 125 | sl_result = sl.run_searchlight(calc_svm, pool_size=pool_size) 126 | print("End SearchLight in rank %s\n" % rank) 127 | 128 | # Only save the data if this is the first core 129 | if rank == 0: 130 | # Convert NaN to 0 in the output 131 | sl_result = np.nan_to_num(sl_result[mask==1]) 132 | # Reshape 133 | result_vol = np.zeros((mask.shape[0], mask.shape[1], mask.shape[2])) 134 | result_vol[coords[0], coords[1], coords[2]] = sl_result 135 | # Convert the output into what can be used 136 | result_vol = result_vol.astype('double') 137 | # Save the average result 138 | output_name = os.path.join(output_path, '%s_%s_SL.nii.gz' % (sub, roi_name)) 139 | sl_nii = nib.Nifti1Image(result_vol, affine_mat) 140 | hdr = sl_nii.header 141 | hdr.set_zooms((dimsize[0], dimsize[1], dimsize[2])) 142 | nib.save(sl_nii, output_name) # Save 143 | 144 | print('Finished searchlight') 145 | -------------------------------------------------------------------------------- /tutorials/09-fcma/fcma_classify.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Intel Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from brainiak.fcma.classifier import Classifier 16 | from brainiak.fcma.preprocessing import prepare_fcma_data 17 | from brainiak import io 18 | 19 | from sklearn.svm import SVC 20 | import sys 21 | import logging 22 | import numpy as np 23 | from sklearn import model_selection 24 | from mpi4py import MPI 25 | import os 26 | from utils import results_path 27 | 28 | format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' 29 | # if want to output log to a file instead of outputting log to the console, 30 | # replace "stream=sys.stdout" with "filename='fcma.log'" 31 | logging.basicConfig(level=logging.INFO, format=format, stream=sys.stdout) 32 | logger = logging.getLogger(__name__) 33 | 34 | data_dir = sys.argv[1] 35 | suffix = sys.argv[2] 36 | top_n_mask_file = sys.argv[3] # This is not the whole brain mask! This is the voxel selection mask 37 | epoch_file = sys.argv[4] 38 | left_out_subj = sys.argv[5] 39 | if len(sys.argv)==7: 40 | second_mask = sys.argv[6] # Do you want to supply a second mask (for extrinsic analysis) 41 | else: 42 | second_mask = "None" 43 | 44 | # Where do you want to output the classification results? 45 | output_file = results_path + '/classify_result.txt' 46 | 47 | # Do you want to compute this in an easily understood way (0) or a memory efficient way (1)? 48 | is_memory_efficient = 1 49 | 50 | # If a second mask was supplied then this is an extrinsic analysis and treat it as such 51 | if second_mask == "None": 52 | is_extrinsic = 0 53 | else: 54 | is_extrinsic = 1 55 | 56 | if __name__ == '__main__': 57 | 58 | # Send a message on the first node 59 | if MPI.COMM_WORLD.Get_rank()==0: 60 | logger.info( 61 | 'Testing for participant %d.\nProgramming starts in %d process(es)' % 62 | (int(left_out_subj), MPI.COMM_WORLD.Get_size()) 63 | ) 64 | 65 | # Load in the volumes, mask and labels 66 | images = io.load_images_from_dir(data_dir, suffix=suffix) 67 | top_n_mask = io.load_boolean_mask(top_n_mask_file) 68 | epoch_list = io.load_labels(epoch_file) 69 | 70 | # Parse the epoch data for useful dimensions 71 | epochs_per_subj = epoch_list[0].shape[1] 72 | num_subjs = len(epoch_list) 73 | 74 | # Prepare the data 75 | int_data, _, labels = prepare_fcma_data(images, epoch_list, top_n_mask) 76 | 77 | # What indexes pick out the left out participant? 78 | start_idx = int(int(left_out_subj) * epochs_per_subj) 79 | end_idx = int(start_idx + epochs_per_subj) 80 | 81 | # Take out the idxs corresponding to all participants but this one 82 | training_idx = list(set(range(len(labels))) - set(range(start_idx, end_idx))) 83 | testing_idx = list(range(start_idx, end_idx)) 84 | 85 | # Pull out the data 86 | int_data_training = [int_data[i] for i in training_idx] 87 | int_data_testing = [int_data[i] for i in testing_idx] 88 | 89 | # Pull out the labels 90 | labels_training = [labels[i] for i in training_idx] 91 | labels_testing = [labels[i] for i in testing_idx] 92 | 93 | # Prepare the data to be processed efficiently (albeit in a less easy to follow way) 94 | if is_memory_efficient == 1: 95 | rearranged_int_data = int_data_training + int_data_testing 96 | rearranged_labels = labels_training + labels_testing 97 | num_training_samples = epochs_per_subj * (num_subjs - 1) 98 | 99 | # Do you want to perform an intrinsic vs extrinsic analysis 100 | if is_extrinsic > 0 and is_memory_efficient == 1: 101 | 102 | # This needs to be reloaded every time you call prepare_fcma_data 103 | images = io.load_images_from_dir(data_dir, suffix=suffix) 104 | 105 | # Multiply the inverse of the top n mask by the whole brain mask to bound it 106 | second_mask = io.load_boolean_mask(second_mask) 107 | extrinsic_mask = ((top_n_mask == 0) * second_mask)==1 108 | 109 | # Prepare the data using the extrinsic data 110 | ext_data, _, _ = prepare_fcma_data(images, epoch_list, extrinsic_mask) 111 | 112 | # Pull out the appropriate extrinsic data 113 | ext_data_training = [ext_data[i] for i in training_idx] 114 | ext_data_testing = [ext_data[i] for i in testing_idx] 115 | 116 | # Set up data so that the internal mask is correlated with the extrinsic mask 117 | rearranged_ext_data = ext_data_training + ext_data_testing 118 | corr_obj = list(zip(rearranged_ext_data, rearranged_int_data)) 119 | else: 120 | 121 | # Set up data so that the internal mask is correlated with the internal mask 122 | if is_memory_efficient == 1: 123 | corr_obj = list(zip(rearranged_int_data, rearranged_int_data)) 124 | else: 125 | training_obj = list(zip(int_data_training, int_data_training)) 126 | testing_obj = list(zip(int_data_testing, int_data_testing)) 127 | 128 | # no shrinking, set C=1 129 | svm_clf = SVC(kernel='precomputed', shrinking=False, C=1) 130 | 131 | clf = Classifier(svm_clf, epochs_per_subj=epochs_per_subj) 132 | 133 | # Train the model on the training data 134 | if is_memory_efficient == 1: 135 | clf.fit(corr_obj, rearranged_labels, num_training_samples) 136 | else: 137 | clf.fit(training_obj, labels_training) 138 | 139 | # What is the cv accuracy? 140 | if is_memory_efficient == 0: 141 | cv_prediction = clf.predict(training_obj) 142 | 143 | # Test on the testing data 144 | if is_memory_efficient == 1: 145 | predict = clf.predict() 146 | else: 147 | predict = clf.predict(testing_obj) 148 | 149 | # Report results on the first rank core 150 | if MPI.COMM_WORLD.Get_rank()==0: 151 | print('--RESULTS--') 152 | print(clf.decision_function()) 153 | 154 | # How often does the prediction match the target 155 | num_correct = (np.asanyarray(predict) == np.asanyarray(labels_testing)).sum() 156 | 157 | # Print the CV accuracy 158 | if is_memory_efficient == 0: 159 | cv_accuracy = (np.asanyarray(cv_prediction) == np.asanyarray(labels_training)).sum() / len(labels_training) 160 | print('CV accuracy: %0.5f' % (cv_accuracy)) 161 | 162 | intrinsic_vs_extrinsic = ['intrinsic', 'extrinsic'] 163 | 164 | # Report accuracy 165 | logger.info( 166 | 'When leaving subject %d out for testing using the %s mask for an %s correlation, the accuracy is %d / %d = %.2f' % 167 | (int(left_out_subj), top_n_mask_file, intrinsic_vs_extrinsic[int(is_extrinsic)], num_correct, epochs_per_subj, num_correct / epochs_per_subj) 168 | ) 169 | 170 | # Append this accuracy on to a score sheet 171 | with open(output_file, 'a') as fp: 172 | fp.write(top_n_mask_file + ', ' + str(intrinsic_vs_extrinsic[int(is_extrinsic)]) + ': ' + str(num_correct / epochs_per_subj) + '\n') 173 | -------------------------------------------------------------------------------- /tutorials/09-fcma/fcma_voxel_selection_cv.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Intel Corporation 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from brainiak.fcma.voxelselector import VoxelSelector 16 | from brainiak.fcma.preprocessing import prepare_fcma_data 17 | from brainiak.fcma.preprocessing import RandomType 18 | from brainiak import io 19 | from sklearn.svm import SVC 20 | import sys 21 | from mpi4py import MPI 22 | import logging 23 | import numpy as np 24 | import nibabel as nib 25 | import os 26 | 27 | format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' 28 | # if want to output log to a file instead of outputting log to the console, 29 | # replace "stream=sys.stdout" with "filename='fcma.log'" 30 | logging.basicConfig(level=logging.INFO, format=format, stream=sys.stdout) 31 | logger = logging.getLogger(__name__) 32 | 33 | """ 34 | Perform leave one participant out voxel selection with FCMA 35 | """ 36 | 37 | data_dir = sys.argv[1] # What is the directory containing data? 38 | suffix = sys.argv[2] # What is the extension of the data you're loading 39 | mask_file = sys.argv[3] # What is the path to the whole brain mask 40 | epoch_file = sys.argv[4] # What is the path to the epoch file 41 | left_out_subj = sys.argv[5] # Which participant (as an integer) are you leaving out for this cv? 42 | output_dir = sys.argv[6] # What is the path to the folder you want to save this data in 43 | 44 | # Only run the following from the controller core 45 | if __name__ == '__main__': 46 | if MPI.COMM_WORLD.Get_rank()==0: 47 | logger.info( 48 | 'Testing for participant %d.\nProgramming starts in %d process(es)' % 49 | (int(left_out_subj), MPI.COMM_WORLD.Get_size()) 50 | ) 51 | # create output_dir 52 | if not os.path.exists(output_dir): 53 | os.makedirs(output_dir) 54 | 55 | # Load in the volumes, mask and labels 56 | images = io.load_images_from_dir(data_dir, suffix=suffix) 57 | mask = io.load_boolean_mask(mask_file) 58 | epoch_list = io.load_labels(epoch_file) 59 | 60 | # Parse the epoch data for useful dimensions 61 | epochs_per_subj = epoch_list[0].shape[1] 62 | num_subjs = len(epoch_list) 63 | 64 | # Preprocess the data and prepare for FCMA 65 | raw_data, _, labels = prepare_fcma_data(images, epoch_list, mask) 66 | 67 | # enforce left one out 68 | file_str = output_dir + '/fc_no' + str(left_out_subj) + '_' 69 | start_idx = int(int(left_out_subj) * epochs_per_subj) 70 | end_idx = int(start_idx + epochs_per_subj) 71 | 72 | # Take out the idxs corresponding to all participants but this one 73 | subsampled_idx = list(set(range(len(labels))) - set(range(start_idx, end_idx))) 74 | labels_subsampled = [labels[i] for i in subsampled_idx] 75 | raw_data_subsampled = [raw_data[i] for i in subsampled_idx] 76 | 77 | # Set up the voxel selection object for fcma 78 | vs = VoxelSelector(labels_subsampled, epochs_per_subj, num_subjs - 1, raw_data_subsampled) 79 | 80 | # for cross validation, use SVM with precomputed kernel 81 | clf = SVC(kernel='precomputed', shrinking=False, C=1) 82 | results = vs.run(clf) 83 | 84 | # this output is just for result checking 85 | if MPI.COMM_WORLD.Get_rank()==0: 86 | logger.info( 87 | 'correlation-based voxel selection is done' 88 | ) 89 | 90 | # Load in the mask with nibabel 91 | mask_img = nib.load(mask_file) 92 | mask = mask_img.get_data().astype(np.bool) 93 | 94 | # Preset the volumes 95 | score_volume = np.zeros(mask.shape, dtype=np.float32) 96 | score = np.zeros(len(results), dtype=np.float32) 97 | seq_volume = np.zeros(mask.shape, dtype=np.int) 98 | seq = np.zeros(len(results), dtype=np.int) 99 | 100 | # Write a text document of the voxel selection results 101 | with open(file_str + 'result_list.txt', 'w') as fp: 102 | for idx, tuple in enumerate(results): 103 | fp.write(str(tuple[0]) + ' ' + str(tuple[1]) + '\n') 104 | 105 | # Store the score for each voxel 106 | score[tuple[0]] = tuple[1] 107 | seq[tuple[0]] = idx 108 | 109 | # Convert the list into a volume 110 | score_volume[mask] = score 111 | seq_volume[mask] = seq 112 | 113 | # Save volume 114 | io.save_as_nifti_file(score_volume, mask_img.affine, 115 | file_str + 'result_score.nii.gz') 116 | io.save_as_nifti_file(seq_volume, mask_img.affine, 117 | file_str + 'result_seq.nii.gz') 118 | -------------------------------------------------------------------------------- /tutorials/09-fcma/make_top_voxel_mask.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | # 3 | # Takes in a results directory from fcma analysis and a certain number of voxels to threshold for a mask as input 4 | 5 | #You will need to load fsl module/conda for your cluster 6 | source ../setup_environment.sh 7 | 8 | 9 | # Take inputs 10 | input_dir=$1 # What is the path to thedata? 11 | voxel_number=$2 # What voxel threshold would you like to set 12 | output_dir=$3 # Where do you want to put the data 13 | 14 | # Create output_dir 15 | if [ ! -d ${output_dir} ]; then 16 | mkdir ${output_dir} 17 | fi 18 | 19 | # Iterate through each volume in the fcma directory 20 | for file in ${input_dir}/*_seq.nii.gz 21 | do 22 | # Preprocess the file name 23 | fbase=$(basename "$file") 24 | pref="${fbase%%.*}" 25 | 26 | # Create the voxel mask 27 | fslmaths $file -uthr $voxel_number -bin ${output_dir}/${pref}_top${voxel_number}.nii.gz 28 | 29 | done 30 | 31 | # Concatenate all of the masks from each volume 32 | fslmerge -t ${output_dir}/all_top${voxel_number} ${output_dir}/fc_no*top${voxel_number}.nii.gz 33 | 34 | # Create a probability map of each voxel being included across participants 35 | fslmaths ${output_dir}/all_top${voxel_number} -Tmean ${output_dir}/prop_top${voxel_number} -odt float 36 | -------------------------------------------------------------------------------- /tutorials/09-fcma/run_fcma_classify.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #SBATCH -t 15 3 | #SBATCH --mem-per-cpu=12G 4 | #SBATCH -n 2 5 | #SBATCH --job-name fcma_classify 6 | #SBATCH --output=../logs/fcma_classify-%j.out 7 | 8 | # Set up the environment. You will need to modify the module for your cluster. 9 | source ../setup_environment.sh 10 | 11 | # How many threads can you make 12 | export OMP_NUM_THREADS=32 13 | 14 | # set the current dir 15 | currentdir=`pwd` 16 | 17 | # Prepare inputs to voxel selection function 18 | data_dir=$1 # What is the directory containing data? 19 | suffix=$2 # What is the extension of the data you're loading 20 | mask_file=$3 #What is the path to the top N mask file (THIS IS NOT THE WHOLE BRAIN MASK) 21 | epoch_file=$4 # What is the path to the epoch file 22 | left_out_subj=$5 #Which participant (as an integer) are you using for testing? 23 | second_mask=$6 # Do you want to use a second mask to compare the data with? Necessary for extrinsic analyses. Otherwise ignore this input or set to None 24 | 25 | # Run the script 26 | # Run the python script (use mpi if running on the cluster) 27 | if [ $configuration == "cluster" ] 28 | then 29 | srun --mpi=pmi2 python ./fcma_classify.py $data_dir $suffix $mask_file $epoch_file $left_out_subj $second_mask 30 | else 31 | mpirun -np 2 python ./fcma_classify.py $data_dir $suffix $mask_file $epoch_file $left_out_subj $second_mask 32 | fi 33 | -------------------------------------------------------------------------------- /tutorials/09-fcma/run_fcma_voxel_selection_cv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #SBATCH -t 20 3 | #SBATCH --mem-per-cpu=8G 4 | #SBATCH -n 2 5 | #SBATCH --job-name fcma_voxel_select_cv 6 | #SBATCH --output=../logs/fcma_voxel_select_cv-%j.out 7 | 8 | # Set up the environment. You will need to modify the module for your cluster. 9 | source ../setup_environment.sh 10 | 11 | # How many threads can you make 12 | export OMP_NUM_THREADS=32 13 | 14 | # set the current dir 15 | currentdir=`pwd` 16 | 17 | # Prepare inputs to voxel selection function 18 | data_dir=$1 # What is the directory containing data? 19 | suffix=$2 # What is the extension of the data you're loading 20 | mask_file=$3 # What is the path to the whole brain mask 21 | epoch_file=$4 # What is the path to the epoch file 22 | left_out_subj=$5 # Which participant (as an integer) are you leaving out for this cv? 23 | output_dir=$6 # Where do you want to save the data 24 | 25 | # Run the script 26 | if [ $configuration == "cluster" ] 27 | then 28 | srun --mpi=pmi2 python ./fcma_voxel_selection_cv.py $data_dir $suffix $mask_file $epoch_file $left_out_subj $output_dir 29 | else 30 | mpirun -np 2 python ./fcma_voxel_selection_cv.py $data_dir $suffix $mask_file $epoch_file $left_out_subj $output_dir 31 | fi 32 | -------------------------------------------------------------------------------- /tutorials/13-real-time/fmrisim/ROI_A.nii.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brainiak/brainiak-tutorials/fb62ede943d9694fe703aee0df5f43ecf5558415/tutorials/13-real-time/fmrisim/ROI_A.nii.gz -------------------------------------------------------------------------------- /tutorials/13-real-time/fmrisim/ROI_B.nii.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brainiak/brainiak-tutorials/fb62ede943d9694fe703aee0df5f43ecf5558415/tutorials/13-real-time/fmrisim/ROI_B.nii.gz -------------------------------------------------------------------------------- /tutorials/13-real-time/fmrisim/mask.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brainiak/brainiak-tutorials/fb62ede943d9694fe703aee0df5f43ecf5558415/tutorials/13-real-time/fmrisim/mask.npy -------------------------------------------------------------------------------- /tutorials/13-real-time/fmrisim/sub_noise_dict.txt: -------------------------------------------------------------------------------- 1 | {'voxel_size': [3.0, 3.0, 3.5000014], 'max_activity': 1624.0952380952381, 'sfnr': 70.7171164884859, 'fwhm': 5.6615986148452109, 'snr': 69.511670001526468, 'auto_reg_sigma': 0.3953408689278336, 'drift_sigma': 0.6046591310721664} -------------------------------------------------------------------------------- /tutorials/13-real-time/fmrisim/sub_template.nii.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brainiak/brainiak-tutorials/fb62ede943d9694fe703aee0df5f43ecf5558415/tutorials/13-real-time/fmrisim/sub_template.nii.gz -------------------------------------------------------------------------------- /tutorials/13-real-time/generate_data.py: -------------------------------------------------------------------------------- 1 | # Generate a noise volume given a set of parameters 2 | 3 | import os 4 | import glob 5 | import time 6 | import random 7 | import inspect 8 | import typing 9 | import nibabel # type: ignore 10 | import numpy as np # type: ignore 11 | from brainiak.utils import fmrisim as sim # type: ignore 12 | import sys 13 | sys.path.append(os.getcwd()) 14 | import utils 15 | 16 | # Template input directory 17 | frame = inspect.currentframe() 18 | moduleFile = typing.cast(str, frame.f_code.co_filename) 19 | moduleDir = os.path.dirname(moduleFile) 20 | fmrisim_dir = os.path.join(moduleDir, "fmrisim/") 21 | 22 | # Data output directory 23 | data_dir = os.path.join(utils.results_path, "13-real-time/data/") 24 | 25 | # If the folder doesn't exist then make it 26 | if os.path.isdir(data_dir) is False: 27 | os.makedirs(data_dir, exist_ok=True) 28 | 29 | # Specify the volume parameters 30 | trDuration = 2 # seconds 31 | numTRs = 200 # How many TRs will you generate? 32 | 33 | # Set up stimulus event time course parameters 34 | event_duration = 10 # How long is each event 35 | isi = 0 # What is the time between each event 36 | burn_in = 0 # How long before the first event 37 | 38 | # Specify signal magnitude parameters 39 | signal_change = 10 # How much change is there in intensity for the max of the patterns across participants 40 | multivariate_pattern = 0 # Do you want the signal to be a z scored pattern across voxels (1) or a univariate increase (0) 41 | switch_ROI = 0 # Do you want to switch the ROIs over part way through and if so, specify the proportion of TRs before this happens 42 | 43 | print('Load template of average voxel value') 44 | template_nii = nibabel.load(fmrisim_dir + 'sub_template.nii.gz') 45 | template = template_nii.get_data() 46 | 47 | dimensions = np.array(template.shape[0:3]) 48 | 49 | print('Create binary mask and normalize the template range') 50 | mask, template = sim.mask_brain(volume=template, 51 | mask_self=True, 52 | ) 53 | 54 | # Write out the mask as a numpy file 55 | np.save(data_dir + 'mask.npy', mask.astype(np.uint8)) 56 | 57 | # Load the noise dictionary 58 | print('Loading noise parameters') 59 | with open(fmrisim_dir + 'sub_noise_dict.txt', 'r') as f: 60 | noise_dict = f.read() 61 | noise_dict = eval(noise_dict) 62 | noise_dict['matched'] = 0 63 | 64 | print('Generating noise') 65 | noise = sim.generate_noise(dimensions=dimensions, 66 | stimfunction_tr=np.zeros((numTRs, 1)), 67 | tr_duration=int(trDuration), 68 | template=template, 69 | mask=mask, 70 | noise_dict=noise_dict, 71 | ) 72 | 73 | # Create the stimulus time course of the conditions 74 | total_time = int(numTRs * trDuration) 75 | events = int(total_time / event_duration) 76 | onsets_A = [] 77 | onsets_B = [] 78 | for event_counter in range(events): 79 | 80 | # Flip a coin for each epoch to determine whether it is A or B 81 | if np.random.randint(0, 2) == 1: 82 | onsets_A.append(event_counter * event_duration) 83 | else: 84 | onsets_B.append(event_counter * event_duration) 85 | 86 | temporal_res = 0.5 # How many timepoints per second of the stim function are to be generated? 87 | 88 | # Create a time course of events 89 | stimfunc_A = sim.generate_stimfunction(onsets=onsets_A, 90 | event_durations=[event_duration], 91 | total_time=total_time, 92 | temporal_resolution=temporal_res, 93 | ) 94 | 95 | stimfunc_B = sim.generate_stimfunction(onsets=onsets_B, 96 | event_durations=[event_duration], 97 | total_time=total_time, 98 | temporal_resolution=temporal_res, 99 | ) 100 | 101 | # Create a labels timecourse 102 | np.save(data_dir + 'labels.npy', (stimfunc_A + (stimfunc_B * 2))) 103 | 104 | 105 | print('Load ROIs') 106 | nii_A = nibabel.load(fmrisim_dir + 'ROI_A.nii.gz') 107 | nii_B = nibabel.load(fmrisim_dir + 'ROI_B.nii.gz') 108 | ROI_A = nii_A.get_data() 109 | ROI_B = nii_B.get_data() 110 | 111 | # How many voxels per ROI 112 | voxels_A = int(ROI_A.sum()) 113 | voxels_B = int(ROI_B.sum()) 114 | 115 | # Create a pattern of activity across the two voxels 116 | print('Creating signal pattern') 117 | if multivariate_pattern == 1: 118 | pattern_A = np.random.rand(voxels_A).reshape((voxels_A, 1)) 119 | pattern_B = np.random.rand(voxels_B).reshape((voxels_B, 1)) 120 | else: # Just make a univariate increase 121 | pattern_A = np.tile(1, voxels_A).reshape((voxels_A, 1)) 122 | pattern_B = np.tile(1, voxels_B).reshape((voxels_B, 1)) 123 | 124 | # Multiply each pattern by each voxel time course 125 | weights_A = np.tile(stimfunc_A, voxels_A) * pattern_A.T 126 | weights_B = np.tile(stimfunc_B, voxels_B) * pattern_B.T 127 | 128 | # Convolve the onsets with the HRF 129 | print('Creating signal time course') 130 | signal_func_A = sim.convolve_hrf(stimfunction=weights_A, 131 | tr_duration=trDuration, 132 | temporal_resolution=temporal_res, 133 | scale_function=1, 134 | ) 135 | 136 | signal_func_B = sim.convolve_hrf(stimfunction=weights_B, 137 | tr_duration=trDuration, 138 | temporal_resolution=temporal_res, 139 | scale_function=1, 140 | ) 141 | 142 | # Multiply the signal by the signal change 143 | signal_func_A *= signal_change 144 | signal_func_B *= signal_change 145 | 146 | # Combine the signal time course with the signal volume 147 | print('Creating signal volumes') 148 | signal_A = sim.apply_signal(signal_func_A, 149 | ROI_A, 150 | ) 151 | 152 | signal_B = sim.apply_signal(signal_func_B, 153 | ROI_B, 154 | ) 155 | 156 | # Do you want to switch the location of the signal 75% of the way through through? 157 | if switch_ROI > 0: 158 | 159 | # When does the switch occur? 160 | switch_point = int(numTRs * switch_ROI) 161 | 162 | part_1_A = sim.apply_signal(signal_func_A[:switch_point, :], 163 | ROI_A, 164 | ) 165 | 166 | part_2_A = sim.apply_signal(signal_func_A[switch_point:, :], 167 | ROI_B, 168 | ) 169 | 170 | part_1_B = sim.apply_signal(signal_func_B[:switch_point, :], 171 | ROI_B, 172 | ) 173 | 174 | part_2_B = sim.apply_signal(signal_func_B[switch_point:, :], 175 | ROI_A, 176 | ) 177 | 178 | # Concatenate the new volumes 179 | signal_A = np.concatenate((part_1_A, part_2_A), axis=3) 180 | signal_B = np.concatenate((part_1_B, part_2_B), axis=3) 181 | 182 | # # What will you name this file as? 183 | # data_dir = fmrisim_dir + 'data_switched' 184 | 185 | # Combine the two signal timecourses 186 | signal = signal_A + signal_B 187 | 188 | print('Generating TRs in real time') 189 | for idx in range(numTRs): 190 | 191 | # Create the brain volume on this TR 192 | brain = noise[:, :, :, idx] + signal[:, :, :, idx] 193 | 194 | # Save the volume as a numpy file, with each TR as its own file 195 | output_file = data_dir + 'rt_' + format(idx, '03d') + '.npy' 196 | 197 | # Save file 198 | brain_float32 = brain.astype(np.float32) 199 | print("Generate {}".format(output_file)) 200 | np.save(output_file, brain_float32) 201 | 202 | # Sleep until next TR 203 | time.sleep(trDuration) 204 | -------------------------------------------------------------------------------- /tutorials/13-real-time/run_generate_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Input python command to be submitted as a job 3 | 4 | #SBATCH --output=generate_data-%j.out 5 | #SBATCH --job-name generate_data 6 | #SBATCH -t 30 7 | #SBATCH --mem=4G 8 | #SBATCH -n 1 9 | 10 | # Check you are in the correct directory 11 | if [ ${PWD##*/} == '13-real-time' ] 12 | then 13 | cd .. 14 | echo "Changing to the tutorials directory" 15 | fi 16 | 17 | 18 | # Set up the environment 19 | source ./setup_environment.sh 20 | 21 | # Run the python script 22 | python ./13-real-time/generate_data.py 23 | -------------------------------------------------------------------------------- /tutorials/colab-env-setup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "yb-sHbwR4QO2" 8 | }, 9 | "source": [ 10 | "# Brainiak Tutorials Environment Setup for Google CoLab" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "colab_type": "text", 17 | "id": "dyFoGtMhg_kC" 18 | }, 19 | "source": [ 20 | "## Install Brainiak and code dependencies (Approx install time 1 minute)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 0, 26 | "metadata": { 27 | "colab": {}, 28 | "colab_type": "code", 29 | "collapsed": true, 30 | "id": "DZ_oWJwO2392" 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "!pip install deepdish ipython matplotlib nilearn notebook pandas seaborn watchdog\n", 35 | "!pip install pip\\<10\n", 36 | "!pip install git+https://github.com/brainiak/brainiak" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": { 42 | "colab_type": "text", 43 | "id": "327lTJCnhQyY" 44 | }, 45 | "source": [ 46 | "## Git-clone helper files for tutorials" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 0, 52 | "metadata": { 53 | "colab": {}, 54 | "colab_type": "code", 55 | "collapsed": true, 56 | "id": "7sfzHEQA4GJ-" 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "!git clone https://github.com/brainiak/brainiak-tutorials.git\n", 61 | "!cd brainiak-tutorials/tutorials/; cp -r 07-searchlight 09-fcma 13-real-time utils.py setup_environment.sh /content/\n", 62 | "# Make brainiak_datasets sub-directory in home directory\n", 63 | "!mkdir /root/brainiak_datasets" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": { 69 | "colab_type": "text", 70 | "id": "6yLqGuNihci9" 71 | }, 72 | "source": [ 73 | "## Download datasets for tutorials\n", 74 | "Pick one of the following datasets to download for the appropriate tutorial you will work on" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "!mkdir -p /root/brainiak_datasets" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": { 89 | "colab_type": "text", 90 | "id": "20dULqJi1rPL" 91 | }, 92 | "source": [ 93 | "### Download VDC dataset - (tutorial notebooks 02-05) (Approx runtime 8 minutes)\n", 94 | "Dataset size: 5.3 GB, Estimated download time: 2.5 minutes, Estimated unzip time 6 minutes" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 0, 100 | "metadata": { 101 | "colab": {}, 102 | "colab_type": "code", 103 | "collapsed": true, 104 | "id": "iASMWeOdFxjI" 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "!cd /root/brainiak_datasets; wget -q --show-progress --no-check-certificate -r 'https://drive.google.com/uc?id=1tiEjtp96zwIdnl3p726llj5KMETnNJ4A&export=download' -O '02-data-handling-simulated-dataset.zip'\n", 109 | "!cd /root/brainiak_datasets; unzip 02-data-handling-simulated-dataset.zip\n", 110 | "!cd /root/brainiak_datasets; wget -q --show-progress --no-check-certificate -r 'https://drive.google.com/uc?export=download&confirm=KlKd&id=1PrnucQ4hXqUY8gl6ysGJJiTreYu7KOLz' -O 'vdc.zip'\n", 111 | "!cd /root/brainiak_datasets; unzip vdc.zip" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": { 117 | "colab_type": "text", 118 | "id": "e23rxU432dms" 119 | }, 120 | "source": [ 121 | "### Download NinetySix dataset - (tutorial notebook 06-rsa) (Approx runtime 10 sec)\n", 122 | "Dataset size: 150 MB" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 0, 128 | "metadata": { 129 | "colab": {}, 130 | "colab_type": "code", 131 | "collapsed": true, 132 | "id": "1FFAkp5hJrNr" 133 | }, 134 | "outputs": [], 135 | "source": [ 136 | "!cd /root/brainiak_datasets; wget -q --show-progress --no-check-certificate -r 'https://drive.google.com/uc?export=download&confirm=Yqoe&id=14m-YY-N3mCjCdHGkeVlTS4uA7WJzbQS0' -O 'NinetySix.zip'\n", 137 | "!cd /root/brainiak_datasets/; unzip NinetySix.zip" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": { 143 | "colab_type": "text", 144 | "id": "ZXJvLs028xnW" 145 | }, 146 | "source": [ 147 | "### Download Face-scene dataset - (tutorial notebooks 07-searchlights, 09-fcma*) (Approx runtime 10 sec)\n", 148 | "*09-fcma is not supported on CoLab
\n", 149 | "Dataset size: 255 MB" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 0, 155 | "metadata": { 156 | "colab": {}, 157 | "colab_type": "code", 158 | "collapsed": true, 159 | "id": "5M8ulqZb9-H_" 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "!cd /root/brainiak_datasets; wget -q --show-progress --no-check-certificate -r 'https://drive.google.com/uc?export=download&confirm=GYuk&id=1LBhKWx5NSlndUlBev3jP997wNiM6HA9N' -O 'face_scene.zip'\n", 164 | "!cd /root/brainiak_datasets/; unzip face_scene.zip" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": { 170 | "colab_type": "text", 171 | "id": "3f77ezxd9WOl" 172 | }, 173 | "source": [ 174 | "### Download Latatt dataset - (tutorial notebook 08-connectivity) (Approx runtime 15 sec)\n", 175 | "Dataset size: 584 MB" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 0, 181 | "metadata": { 182 | "colab": {}, 183 | "colab_type": "code", 184 | "collapsed": true, 185 | "id": "p1mHpAbK-P0j" 186 | }, 187 | "outputs": [], 188 | "source": [ 189 | "!cd /root/brainiak_datasets; wget -q --show-progress --no-check-certificate -r 'https://drive.google.com/uc?export=download&confirm=jj9P&id=1iX5nLZvQsWuM5AmKeiBNoP8QkZjlOY7T' -O 'latatt.zip'\n", 190 | "!cd /root/brainiak_datasets/; unzip latatt.zip" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": { 196 | "colab_type": "text", 197 | "id": "vh5emd9j9hsH" 198 | }, 199 | "source": [ 200 | "### Download Pieman2 dataset - (tutorial notebook 10-isc, 11-srm) (Approx runtime 3 minutes)\n", 201 | "Dataset size: 2.65 GB, Estimated download time: 1 minute, Estimated unzip time: 2 minutes" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 0, 207 | "metadata": { 208 | "colab": {}, 209 | "colab_type": "code", 210 | "collapsed": true, 211 | "id": "V9HuQQHm-fdf" 212 | }, 213 | "outputs": [], 214 | "source": [ 215 | "!cd /root/brainiak_datasets; wget -q --show-progress --no-check-certificate -r 'https://drive.google.com/uc?export=download&confirm=5QPf&id=1IBA39ZZjeGS1u_DvZdiw1AZZQMS3K5q0' -O 'Pieman2.zip'\n", 216 | "!cd /root/brainiak_datasets/; unzip Pieman2.zip" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": { 222 | "colab_type": "text", 223 | "id": "3tmyB5aw9rgW" 224 | }, 225 | "source": [ 226 | "### Download Raider dataset - (tutorial notebook 11-srm) (Approx runtime 5 sec)\n", 227 | "Dataset size: 31 MB" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 0, 233 | "metadata": { 234 | "colab": {}, 235 | "colab_type": "code", 236 | "collapsed": true, 237 | "id": "p22qFy0n-tPW" 238 | }, 239 | "outputs": [], 240 | "source": [ 241 | "!cd /root/brainiak_datasets; wget -q --show-progress --no-check-certificate -r 'https://drive.google.com/uc?export=download&confirm=SE4m&id=1zCQoulK_rlzzRb4n6YMVp2cI8vZpxnwx' -O 'raider.zip'\n", 242 | "!cd /root/brainiak_datasets/; unzip raider.zip" 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": { 248 | "colab_type": "text", 249 | "id": "YK8ZESol9wME" 250 | }, 251 | "source": [ 252 | "### Download Sherlock_processed dataset - (tutorial notebook 12-hmm) (Approx runtime 10 sec)\n", 253 | "Dataset size: 255 MB" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 0, 259 | "metadata": { 260 | "colab": {}, 261 | "colab_type": "code", 262 | "collapsed": true, 263 | "id": "oAJrPZhP_B88" 264 | }, 265 | "outputs": [], 266 | "source": [ 267 | "!cd /root/brainiak_datasets; wget -q --show-progress --no-check-certificate -r 'https://drive.google.com/uc?export=download&confirm=wZk0&id=11y9iQUoNVyVLANllKebFUrqdvQt-vsXm' -O 'Sherlock_processed.zip'\n", 268 | "!cd /root/brainiak_datasets/; unzip Sherlock_processed.zip" 269 | ] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "metadata": { 274 | "colab_type": "text", 275 | "id": "moZ9nTzt4Ce3" 276 | }, 277 | "source": [ 278 | "### Download all datasets - (uncomment to run) (Approx runtime 14 minutes)\n", 279 | "Dataset size: 9.2 GB, Estimated download time: 5 minutes, Estimated unzip time: 9 minutes" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 0, 285 | "metadata": { 286 | "colab": {}, 287 | "colab_type": "code", 288 | "collapsed": true, 289 | "id": "k--4ayA25Uxa" 290 | }, 291 | "outputs": [], 292 | "source": [ 293 | "#!cd /root; wget -q --show-progress --no-check-certificate -r 'https://docs.google.com/uc?export=download&id=1ZglrmkYw8isBAfsL53n9JgHEucmrnm4E' -O 'tutorials.zip'\n", 294 | "#!cd /root; unzip tutorials.zip" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": { 300 | "colab_type": "text", 301 | "id": "aFpm6-Hbfg9L" 302 | }, 303 | "source": [ 304 | "### Remove downloaded zip files - (uncomment to run)" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 0, 310 | "metadata": { 311 | "colab": {}, 312 | "colab_type": "code", 313 | "collapsed": true, 314 | "id": "E0kSNsP8fkUe" 315 | }, 316 | "outputs": [], 317 | "source": [ 318 | "#!cd /root/brainiak_datasets; rm *.zip" 319 | ] 320 | } 321 | ], 322 | "metadata": { 323 | "colab": { 324 | "collapsed_sections": [], 325 | "name": "Google CoLab Brainiak Tutorial Setup", 326 | "provenance": [], 327 | "toc_visible": true, 328 | "version": "0.3.2" 329 | }, 330 | "kernelspec": { 331 | "display_name": "Python 3", 332 | "language": "python", 333 | "name": "python3" 334 | }, 335 | "language_info": { 336 | "codemirror_mode": { 337 | "name": "ipython", 338 | "version": 3 339 | }, 340 | "file_extension": ".py", 341 | "mimetype": "text/x-python", 342 | "name": "python", 343 | "nbconvert_exporter": "python", 344 | "pygments_lexer": "ipython3", 345 | "version": "3.6.6" 346 | } 347 | }, 348 | "nbformat": 4, 349 | "nbformat_minor": 1 350 | } 351 | -------------------------------------------------------------------------------- /tutorials/imgs/lab11/srm_time_segment_matching.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brainiak/brainiak-tutorials/fb62ede943d9694fe703aee0df5f43ecf5558415/tutorials/imgs/lab11/srm_time_segment_matching.png -------------------------------------------------------------------------------- /tutorials/imgs/lab12/hmm_schematics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brainiak/brainiak-tutorials/fb62ede943d9694fe703aee0df5f43ecf5558415/tutorials/imgs/lab12/hmm_schematics.png -------------------------------------------------------------------------------- /tutorials/imgs/lab7/mpi_openmp.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brainiak/brainiak-tutorials/fb62ede943d9694fe703aee0df5f43ecf5558415/tutorials/imgs/lab7/mpi_openmp.jpg -------------------------------------------------------------------------------- /tutorials/imgs/lab7/nodes_process.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brainiak/brainiak-tutorials/fb62ede943d9694fe703aee0df5f43ecf5558415/tutorials/imgs/lab7/nodes_process.jpg -------------------------------------------------------------------------------- /tutorials/logs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brainiak/brainiak-tutorials/fb62ede943d9694fe703aee0df5f43ecf5558415/tutorials/logs/.gitkeep -------------------------------------------------------------------------------- /tutorials/requirements.txt: -------------------------------------------------------------------------------- 1 | deepdish 2 | ipython 3 | matplotlib 4 | networkx 5 | nilearn 6 | notebook 7 | nxviz 8 | pandas 9 | seaborn 10 | watchdog 11 | niwidgets 12 | -------------------------------------------------------------------------------- /tutorials/run_jupyter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | 3 | source ~/.bashrc 4 | 5 | # Setup the environment 6 | source setup_environment.sh 7 | 8 | # Launch a jupyter notebook 9 | jupyter notebook 10 | -------------------------------------------------------------------------------- /tutorials/run_jupyter_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python3 -m notebook --allow-root --no-browser --ip=0.0.0.0 --port 8899 3 | -------------------------------------------------------------------------------- /tutorials/run_jupyter_remote_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | #SBATCH --partition short 3 | #SBATCH --nodes 1 4 | #SBATCH --time 4:00:00 5 | #SBATCH --mem-per-cpu 12G 6 | #SBATCH --job-name tunnel 7 | #SBATCH --output logs/jupyter-log-%J.txt 8 | 9 | # setup the environment 10 | source setup_environment.sh 11 | 12 | ## get tunneling info 13 | XDG_RUNTIME_DIR="" 14 | ipnport=$(shuf -i8000-9999 -n1) 15 | ipnip=$(hostname -i) 16 | ## print tunneling instructions to jupyter-log-{jobid}.txt 17 | echo -e " 18 | Copy/Paste this in your local terminal to ssh tunnel with remote 19 | ----------------------------------------------------------------- 20 | ssh -N -L $ipnport:$ipnip:$ipnport $USER@${server} 21 | ----------------------------------------------------------------- 22 | 23 | Then open a browser on your local machine to the following address 24 | ------------------------------------------------------------------ 25 | localhost:$ipnport (prefix w/ https:// if using password) 26 | ------------------------------------------------------------------ 27 | " 28 | 29 | ## start an ipcluster instance and launch jupyter 30 | mpirun -n 1 jupyter-notebook --no-browser --port=$ipnport --ip=$ipnip 31 | -------------------------------------------------------------------------------- /tutorials/run_jupyter_remote_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | XDG_RUNTIME_DIR="" 3 | ipnport=$(shuf -i8000-9999 -n1) 4 | ipnip=$(hostname -i) 5 | server=$(hostname) 6 | 7 | echo -e " 8 | Copy/Paste this in your local terminal to ssh tunnel with remote 9 | ----------------------------------------------------------------- 10 | ssh -N -L $ipnport:$ipnip:$ipnport $USER@${server} 11 | ----------------------------------------------------------------- 12 | 13 | Then open a browser on your local machine to the following address 14 | ------------------------------------------------------------------ 15 | localhost:$ipnport (prefix w/ https:// if using password) 16 | ------------------------------------------------------------------ 17 | " 18 | 19 | ## start an ip instance and launch jupyter server 20 | 21 | # Setup environment 22 | source setup_environment.sh 23 | 24 | jupyter notebook --no-browser --port=$ipnport --ip=$ipnip 25 | 26 | # (prefix w/ https:// if using password) 27 | -------------------------------------------------------------------------------- /tutorials/setup_environment.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | 3 | # Specify the code necessary to setup your environment to run BrainIAK on a Jupyter notebook. This could involve activating a conda environment (like below) or importing modules. 4 | CONDA_ENV=mybrainiak 5 | 6 | # How are you interacting with the notebooks? On a cluster, locally on a laptop, using docker, etc.? This will determine how some functions are launched, such as jupyter and some jobs 7 | configuration='server' # includes 'cluster' or 'local' or 'docker' 8 | 9 | # Also setup the environment to use some simple visualization tools, like FSL 10 | #module load FSL 11 | 12 | # If on a cluster, specify the server name you are going to use. This might be the address you use for your SSH key to log in to the cluster. The default is to use the host name that may be appropriate 13 | server=$(hostname) 14 | 15 | if [[ -n $CONDA_ENV ]]; then 16 | # Start the conda environment 17 | conda activate &> /dev/null 18 | if [[ $? -eq 0 ]]; then 19 | # conda activate command is present 20 | conda activate $CONDA_ENV 21 | else 22 | # older versions of conda use source activate instead 23 | source activate $CONDA_ENV 24 | fi 25 | 26 | # Check if the conda command succeeded 27 | if [[ $? -ne 0 ]]; then 28 | echo "Conda not initialized properly, check your conda environment" 29 | exit -1 30 | fi 31 | fi 32 | 33 | -------------------------------------------------------------------------------- /tutorials/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import scipy.io 4 | import nibabel as nib 5 | from nilearn.input_data import NiftiMasker 6 | from nilearn.masking import compute_epi_mask 7 | from sklearn import preprocessing 8 | from sklearn.preprocessing import StandardScaler 9 | from sklearn.model_selection import PredefinedSplit 10 | from copy import deepcopy 11 | 12 | # Data path: Where the data for the tutorials is stored. 13 | # Change this path only if you have saved the data to a different folder. 14 | data_path = os.path.join(os.path.expanduser('~'), 'brainiak_datasets') 15 | 16 | # Results path: Where the results and intermediate analyses of the tutorials are stored. 17 | # Change this path only if you wish to save your outputs to a different folder. 18 | results_path = os.path.join(os.path.expanduser('~'), 'brainiak_results') 19 | 20 | # Data path VDC dataset 21 | vdc_data_dir = os.path.join(data_path, 'vdc') 22 | 23 | # constants for the VDC dataset 24 | vdc_label_dict = {1: "Faces", 2: "Scenes", 3: "Objects"} 25 | vdc_all_ROIs = ['FFA', 'PPA'] 26 | vdc_n_runs = 3 27 | vdc_TR = 1.5 28 | vdc_hrf_lag = 4.5 # In seconds what is the lag between a stimulus onset and the peak bold response 29 | vdc_TRs_run = 310 30 | 31 | #constants for the simulated data in notebook 02-data-handling 32 | nb2_simulated_data = os.path.join(data_path, '02-data-handling-simulated-dataset') 33 | 34 | #constants for ninety six dataset 35 | ns_data_dir = os.path.join(data_path, 'NinetySix') 36 | 37 | all_subj_initials = {'BE', 'KO', 'SN', 'TI'} 38 | rois_to_remove = ['lLO', 'rLO'] 39 | rois_to_keep = ['lFFA', 'rFFA', 'lPPA', 'rPPA'] 40 | 41 | #constants for latatt dataset 42 | latatt_dir = os.path.join(data_path, 'latatt') 43 | 44 | # constants for the FCMA (face-scene) dataset 45 | fs_data_dir = os.path.join(data_path, 'face_scene') 46 | 47 | # for Pieman2 dataset 48 | pieman2_dir = os.path.join(data_path, 'Pieman2') 49 | 50 | # for Raider dataset 51 | raider_data_dir = os.path.join(data_path, 'raider') 52 | 53 | # for Sherlock dataset 54 | sherlock_h5_data = os.path.join(data_path, 'sherlock_h5') 55 | sherlock_dir = os.path.join(data_path, 'Sherlock_processed') 56 | 57 | 58 | 59 | def get_MNI152_template(dim_x, dim_y, dim_z): 60 | """get MNI152 template used in fmrisim 61 | Parameters 62 | ---------- 63 | dim_x: int 64 | dim_y: int 65 | dim_z: int 66 | - dims set the size of the volume we want to create 67 | 68 | Return 69 | ------- 70 | MNI_152_template: 3d array (dim_x, dim_y, dim_z) 71 | """ 72 | # Import the fmrisim from BrainIAK 73 | import brainiak.utils.fmrisim as sim 74 | # Make a grey matter mask into a 3d volume of a given size 75 | dimensions = np.asarray([dim_x, dim_y, dim_z]) 76 | _, MNI_152_template = sim.mask_brain(dimensions) 77 | return MNI_152_template 78 | 79 | 80 | def load_vdc_stim_labels(sub): 81 | """load the stimulus labels for the VDC data 82 | Parameters 83 | ---------- 84 | sub: string, subject id 85 | 86 | Return 87 | ---------- 88 | Stimulus labels for all runs 89 | """ 90 | stim_label = []; 91 | stim_label_allruns = []; 92 | for run in range(1, vdc_n_runs + 1): 93 | in_file = os.path.join(vdc_data_dir, sub ,'ses-day2','design_matrix','%s_localizer_0%d.mat' % (sub, run)) 94 | # Load in data from matlab 95 | stim_label = scipy.io.loadmat(in_file); 96 | stim_label = np.array(stim_label['data']); 97 | # Store the data 98 | if run == 1: 99 | stim_label_allruns = stim_label; 100 | else: 101 | stim_label_allruns = np.hstack((stim_label_allruns, stim_label)) 102 | return stim_label_allruns 103 | 104 | 105 | def load_vdc_mask(ROI_name, sub): 106 | """Load the mask for the VDC data 107 | Parameters 108 | ---------- 109 | ROI_name: string 110 | sub: string 111 | 112 | Return 113 | ---------- 114 | the requested mask 115 | """ 116 | assert ROI_name in vdc_all_ROIs 117 | maskdir = os.path.join(vdc_data_dir,sub,'preprocessed','masks') 118 | # load the mask 119 | maskfile = os.path.join(maskdir, sub + "_ventral_%s_locColl_to_epi1.nii.gz" % (ROI_name)) 120 | mask = nib.load(maskfile) 121 | print("Loaded %s mask" % (ROI_name)) 122 | return mask 123 | 124 | 125 | def load_vdc_epi_data(sub, run): 126 | # Load MRI file (in Nifti format) of one localizer run 127 | epi_in = os.path.join(vdc_data_dir, sub, 128 | "preprocessed","loc","%s_filtered2_d1_firstExampleFunc_r%d.nii" % (sub, run)) 129 | epi_data = nib.load(epi_in) 130 | print("Loading data from %s" % (epi_in)) 131 | return epi_data 132 | 133 | 134 | def mask_data(epi_data, mask): 135 | """mask the input data with the input mask 136 | Parameters 137 | ---------- 138 | epi_data 139 | mask 140 | 141 | Return 142 | ---------- 143 | masked data 144 | """ 145 | nifti_masker = NiftiMasker(mask_img=mask) 146 | epi_masked_data = nifti_masker.fit_transform(epi_data); 147 | return epi_masked_data 148 | 149 | 150 | def scale_data(data): 151 | data_scaled = preprocessing.StandardScaler().fit_transform(data) 152 | return data_scaled 153 | 154 | 155 | # Make a function to load the mask data 156 | def load_vdc_masked_data(directory, subject_name, mask_list): 157 | masked_data_all = [0] * len(mask_list) 158 | 159 | # Cycle through the masks 160 | for mask_counter in range(len(mask_list)): 161 | # load the mask for the corresponding ROI 162 | mask = load_vdc_mask(mask_list[mask_counter], subject_name) 163 | 164 | # Cycle through the runs 165 | for run in range(1, vdc_n_runs + 1): 166 | # load fMRI data 167 | epi_data = load_vdc_epi_data(subject_name, run) 168 | # mask the data 169 | epi_masked_data = mask_data(epi_data, mask) 170 | epi_masked_data = np.transpose(epi_masked_data) 171 | 172 | # concatenate data 173 | if run == 1: 174 | masked_data_all[mask_counter] = epi_masked_data 175 | else: 176 | masked_data_all[mask_counter] = np.hstack( 177 | (masked_data_all[mask_counter], epi_masked_data) 178 | ) 179 | return masked_data_all 180 | 181 | 182 | 183 | """""" 184 | 185 | 186 | # Make a function to load the mask data 187 | def load_data(directory, subject_name, mask_name='', num_runs=3, zscore_data=False): 188 | 189 | # Cycle through the masks 190 | print ("Processing Start ...") 191 | 192 | # If there is a mask supplied then load it now 193 | if mask_name is '': 194 | mask = None 195 | else: 196 | mask = load_vdc_mask(mask_name, subject_name) 197 | 198 | # Cycle through the runs 199 | for run in range(1, num_runs + 1): 200 | epi_data = load_vdc_epi_data(subject_name, run) 201 | 202 | # Mask the data if necessary 203 | if mask_name is not '': 204 | epi_mask_data = mask_data(epi_data, mask).T 205 | else: 206 | # Do a whole brain mask 207 | if run == 1: 208 | # Compute mask from epi 209 | mask = compute_epi_mask(epi_data).get_data() 210 | else: 211 | # Get the intersection mask 212 | # (set voxels that are within the mask on all runs to 1, set all other voxels to 0) 213 | mask *= compute_epi_mask(epi_data).get_data() 214 | 215 | # Reshape all of the data from 4D (X*Y*Z*time) to 2D (voxel*time): not great for memory 216 | epi_mask_data = epi_data.get_data().reshape( 217 | mask.shape[0] * mask.shape[1] * mask.shape[2], 218 | epi_data.shape[3] 219 | ) 220 | 221 | # Transpose and z-score (standardize) the data 222 | if zscore_data == True: 223 | scaler = preprocessing.StandardScaler().fit(epi_mask_data) 224 | preprocessed_data = scaler.transform(epi_mask_data) 225 | else: 226 | preprocessed_data = epi_mask_data 227 | 228 | # Concatenate the data 229 | if run == 1: 230 | concatenated_data = preprocessed_data 231 | else: 232 | concatenated_data = np.hstack((concatenated_data, preprocessed_data)) 233 | 234 | # Apply the whole-brain masking: First, reshape the mask from 3D (X*Y*Z) to 1D (voxel). 235 | # Second, get indices of non-zero voxels, i.e. voxels inside the mask. 236 | # Third, zero out all of the voxels outside of the mask. 237 | if mask_name is '': 238 | mask_vector = np.nonzero(mask.reshape(mask.shape[0] * mask.shape[1] * mask.shape[2], ))[0] 239 | concatenated_data = concatenated_data[mask_vector, :] 240 | 241 | # Return the list of mask data 242 | return concatenated_data, mask 243 | 244 | 245 | # Make a function for loading in the labels 246 | def load_labels(directory, subject_name): 247 | stim_label = []; 248 | stim_label_concatenated = []; 249 | for run in range(1,4): 250 | in_file= os.path.join(directory, subject_name, 'ses-day2','design_matrix' ,"%s_localizer_0%d.mat" % (subject_name, run)) 251 | 252 | # Load in data from matlab 253 | stim_label = scipy.io.loadmat(in_file); 254 | stim_label = np.array(stim_label['data']); 255 | 256 | # Store the data 257 | if run == 1: 258 | stim_label_concatenated = stim_label; 259 | else: 260 | stim_label_concatenated = np.hstack((stim_label_concatenated, stim_label)) 261 | 262 | print("Loaded ", subject_name) 263 | return stim_label_concatenated 264 | 265 | 266 | # Convert the TR 267 | def label2TR(stim_label, num_runs, TR, TRs_run): 268 | 269 | # Calculate the number of events/run 270 | _, events = stim_label.shape 271 | events_run = int(events / num_runs) 272 | 273 | # Preset the array with zeros 274 | stim_label_TR = np.zeros((TRs_run * 3, 1)) 275 | 276 | # Cycle through the runs 277 | for run in range(0, num_runs): 278 | 279 | # Cycle through each element in a run 280 | for i in range(events_run): 281 | 282 | # What element in the concatenated timing file are we accessing 283 | time_idx = run * (events_run) + i 284 | 285 | # What is the time stamp 286 | time = stim_label[2, time_idx] 287 | 288 | # What TR does this timepoint refer to? 289 | TR_idx = int(time / TR) + (run * (TRs_run - 1)) 290 | 291 | # Add the condition label to this timepoint 292 | stim_label_TR[TR_idx]=stim_label[0, time_idx] 293 | 294 | return stim_label_TR 295 | 296 | # Create a function to shift the size 297 | def shift_timing(label_TR, TR_shift_size): 298 | 299 | # Create a short vector of extra zeros 300 | zero_shift = np.zeros((TR_shift_size, 1)) 301 | 302 | # Zero pad the column from the top. 303 | label_TR_shifted = np.vstack((zero_shift, label_TR)) 304 | 305 | # Don't include the last rows that have been shifted out of the time line. 306 | label_TR_shifted = label_TR_shifted[0:label_TR.shape[0],0] 307 | 308 | return label_TR_shifted 309 | 310 | 311 | # Extract bold data for non-zero labels. 312 | def reshape_data(label_TR_shifted, masked_data_all): 313 | label_index = np.nonzero(label_TR_shifted) 314 | label_index = np.squeeze(label_index) 315 | 316 | # Pull out the indexes 317 | indexed_data = np.transpose(masked_data_all[:,label_index]) 318 | nonzero_labels = label_TR_shifted[label_index] 319 | 320 | return indexed_data, nonzero_labels 321 | 322 | # Take in a brain volume and label vector that is the length of the event number and convert it into a list the length of the block number 323 | def blockwise_sampling(eventwise_data, eventwise_labels, eventwise_run_ids, events_per_block=10): 324 | 325 | # How many events are expected 326 | expected_blocks = int(eventwise_data.shape[0] / events_per_block) 327 | 328 | # Average the BOLD data for each block of trials into blockwise_data 329 | blockwise_data = np.zeros((expected_blocks, eventwise_data.shape[1])) 330 | blockwise_labels = np.zeros(expected_blocks) 331 | blockwise_run_ids = np.zeros(expected_blocks) 332 | 333 | for i in range(0, expected_blocks): 334 | start_row = i * events_per_block 335 | end_row = start_row + events_per_block - 1 336 | 337 | blockwise_data[i,:] = np.mean(eventwise_data[start_row:end_row,:], axis = 0) 338 | blockwise_labels[i] = np.mean(eventwise_labels[start_row:end_row]) 339 | blockwise_run_ids[i] = np.mean(eventwise_run_ids[start_row:end_row]) 340 | 341 | # Report the new variable sizes 342 | print('Expected blocks: %d; Resampled blocks: %d' % (expected_blocks, blockwise_data.shape[0])) 343 | 344 | # Return the variables downsampled_data and downsampled_labels 345 | return blockwise_data, blockwise_labels, blockwise_run_ids 346 | 347 | 348 | 349 | 350 | def normalize(bold_data_, run_ids): 351 | """normalized the data within each run 352 | 353 | Parameters 354 | -------------- 355 | bold_data_: np.array, n_stimuli x n_voxels 356 | run_ids: np.array or a list 357 | 358 | Return 359 | -------------- 360 | normalized_data 361 | """ 362 | scaler = StandardScaler() 363 | data = [] 364 | for r in range(vdc_n_runs): 365 | data.append(scaler.fit_transform(bold_data_[run_ids == r, :])) 366 | normalized_data = np.vstack(data) 367 | return normalized_data 368 | 369 | 370 | def decode(X, y, cv_ids, model): 371 | """ 372 | Parameters 373 | -------------- 374 | X: np.array, n_stimuli x n_voxels 375 | y: np.array, n_stimuli, 376 | cv_ids: np.array - n_stimuli, 377 | 378 | Return 379 | -------------- 380 | models, scores 381 | """ 382 | scores = [] 383 | models = [] 384 | ps = PredefinedSplit(cv_ids) 385 | for train_index, test_index in ps.split(): 386 | # split the data 387 | X_train, X_test = X[train_index], X[test_index] 388 | y_train, y_test = y[train_index], y[test_index] 389 | # fit the model on the training set 390 | model.fit(X_train, y_train) 391 | # calculate the accuracy for the hold out run 392 | score = model.score(X_test, y_test) 393 | # save stuff 394 | models.append(deepcopy(model)) 395 | scores.append(score) 396 | return models, scores 397 | 398 | """helper funcs 399 | """ 400 | 401 | def load_data_for_a_subj(subj_initials): 402 | assert subj_initials in all_subj_initials 403 | images = scipy.io.loadmat( 404 | os.path.join(ns_data_dir, '%s_images.mat' % (subj_initials)) 405 | )['images'] 406 | data = scipy.io.loadmat( 407 | os.path.join(ns_data_dir, '%s_roi_data.mat' % (subj_initials)) 408 | ) 409 | 410 | # Unpack metadata 411 | roi_data_all = data['roi_data'] 412 | roi_names = data['roinames'] 413 | labels = np.array(data['labels']) 414 | categoryNames = data['categoryNames'] 415 | 416 | # Re-format metadata labels and ROIs 417 | n_categories = categoryNames.shape[1] 418 | n_rois = roi_names.shape[1] 419 | categories = [categoryNames[0, i][0] for i in range(n_categories)] 420 | roi_names = [roi_names[0, i][0] for i in range(n_rois)] 421 | labels = np.squeeze(labels) 422 | label_dict = {categories[i]: i+1 for i in range(len(categories))} 423 | 424 | # Remove r/lLO 425 | roi_data = [] 426 | for r in range(n_rois): 427 | if roi_names[r] in rois_to_keep: 428 | roi_data.append(roi_data_all[0, r]) 429 | roi_names = rois_to_keep 430 | n_rois = len(rois_to_keep) 431 | return images, roi_data, roi_names, n_rois, categories, n_categories, labels, label_dict 432 | 433 | 434 | def digitize_rdm(rdm_raw, n_bins = 10): 435 | """Digitize an input matrix to n bins (10 bins by default) 436 | rdm_raw: a square matrix 437 | """ 438 | # compute the bins 439 | 440 | rdm_bins = [np.percentile(np.ravel(rdm_raw), 100/n_bins * i) for i in range(n_bins)] 441 | # Compute the vectorized digitized value 442 | rdm_vec_digitized = np.digitize(np.ravel(rdm_raw), bins = rdm_bins) * (100 // n_bins) 443 | 444 | # Reshape to matrix 445 | rdm_digitized = np.reshape(rdm_vec_digitized, np.shape(rdm_raw)) 446 | 447 | # Force symmetry in the plot 448 | rdm_digitized = (rdm_digitized + rdm_digitized.T) / 2 449 | 450 | return rdm_digitized 451 | --------------------------------------------------------------------------------