├── .gitignore ├── HCP_fmripredict ├── .idea │ ├── HCP_fmripredict.iml │ ├── codeStyles │ │ ├── Project.xml │ │ └── codeStyleConfig.xml │ ├── dictionaries │ │ └── yu.xml │ ├── encodings.xml │ ├── inspectionProfiles │ │ └── Project_Default.xml │ ├── libraries │ │ └── R_User_Library.xml │ ├── misc.xml │ ├── modules.xml │ ├── vcs.xml │ └── workspace.xml ├── .ipynb_checkpoints │ ├── HCP_rsfMRI_test-checkpoint.ipynb │ ├── HCP_task_fmri_test-checkpoint.ipynb │ ├── Untitled-checkpoint.ipynb │ └── Untitled1-checkpoint.ipynb ├── HCP_rsfMRI_test.ipynb ├── HCP_task_fmri_cnn_tensorpack.py ├── HCP_task_fmri_cnn_tensorpack_changesize.py ├── HCP_task_fmri_cnn_tensorpack_changesize_bk.py ├── HCP_task_fmri_cnn_tensorpack_changesize_bk2.py ├── HCP_task_fmri_cnn_tensorpack_changesize_bk3.py ├── HCP_task_fmri_cnn_tensorpack_changesize_bk4.py ├── HCP_task_fmri_cnn_tensorpack_changesize_bk4_motor.py ├── HCP_task_fmri_cnn_tensorpack_changesize_bk4_wm.py ├── HCP_task_fmri_cnn_tensorpack_changesize_bk4_wm_bk.py ├── HCP_task_fmri_cnn_tensorpack_changesize_bk4_wm_test.py ├── HCP_task_fmri_cnn_tensorpack_changesize_bk4_wm_test2.py ├── HCP_task_fmri_gcn_test.py ├── HCP_task_fmri_test.ipynb ├── cnn_graph_codes │ ├── HCP_tfmri_test_gcn.ipynb │ ├── HCP_tfmri_test_gcn_test.ipynb │ ├── LICENSE.txt │ ├── README.md │ ├── lib │ │ ├── coarsening.py │ │ ├── graph.py │ │ ├── models.py │ │ └── utils.py │ ├── makefile │ ├── my_test_mnist.ipynb │ ├── my_test_news.ipynb │ ├── rcv1.ipynb │ ├── requirements.txt │ ├── trials │ │ ├── 1_learning_filters.ipynb │ │ ├── 2_classification.ipynb │ │ ├── 3_tensorflow.ipynb │ │ ├── 4_coarsening.ipynb │ │ └── makefile │ └── usage.ipynb ├── config.py ├── extract_fmri_event_data.py ├── fmri_utils.py ├── hcp_test_mnist_gcn_cedar.log ├── model.py ├── my_test_mnist_gcn.py ├── requirements.txt ├── requirements_update.txt ├── tensorflow_test.sh ├── tensorflow_test_wholenode.sh ├── test_model_submit_bk.sh ├── test_model_submit_new.sh ├── test_module.py ├── utils.py └── utils_bk.py ├── README.md ├── fMRI_atlas_ROI_tc.py.ipynb ├── fmri_decoding ├── fmri_decoding_motor_svc_weights.nii.gz ├── fmri_decoding_readme.txt ├── midnight_task_fmri_decoding_CNN.ipynb ├── midnight_task_fmri_decoding_svm.ipynb ├── model_test_2dcnn.h5 ├── model_test_2dcnn.json └── test_gcn_training.ipynb ├── linear_model ├── midnight_project_resting.ipynb ├── project_update_readme.txt └── results_linear_model.txt └── plot_conn_matrix_surchs.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # IPython checkpoints 6 | .ipynb_checkpoints/ 7 | 8 | # Datasets 9 | data/ 10 | 11 | # Tensorflow summaries 12 | summaries/ 13 | 14 | # Model parameters 15 | checkpoints/ 16 | -------------------------------------------------------------------------------- /HCP_fmripredict/.idea/HCP_fmripredict.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 14 | -------------------------------------------------------------------------------- /HCP_fmripredict/.idea/codeStyles/Project.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /HCP_fmripredict/.idea/codeStyles/codeStyleConfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /HCP_fmripredict/.idea/dictionaries/yu.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /HCP_fmripredict/.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /HCP_fmripredict/.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 15 | -------------------------------------------------------------------------------- /HCP_fmripredict/.idea/libraries/R_User_Library.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /HCP_fmripredict/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /HCP_fmripredict/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /HCP_fmripredict/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /HCP_fmripredict/.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 2 6 | } 7 | -------------------------------------------------------------------------------- /HCP_fmripredict/.ipynb_checkpoints/Untitled1-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 2 6 | } 7 | -------------------------------------------------------------------------------- /HCP_fmripredict/HCP_task_fmri_cnn_tensorpack.py: -------------------------------------------------------------------------------- 1 | #!/home/yuzhang/tensorflow-py3.6/bin/python3.6 2 | 3 | # Author: Yu Zhang 4 | # License: simplified BSD 5 | # coding: utf-8 6 | 7 | from pathlib import Path 8 | import glob 9 | import itertools 10 | import os 11 | import time 12 | import numpy as np 13 | import pandas as pd 14 | import nibabel as nib 15 | import matplotlib.pyplot as plt 16 | ###%matplotlib inline 17 | 18 | from nilearn import signal 19 | from nilearn import image 20 | from sklearn import preprocessing 21 | from keras.utils import np_utils 22 | 23 | from tensorpack import dataflow 24 | 25 | from keras.utils import to_categorical 26 | from keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, Dropout,AveragePooling2D 27 | from keras.layers import Conv3D, MaxPooling3D, BatchNormalization, AveragePooling3D 28 | from keras.models import Model 29 | import keras.backend as K 30 | 31 | 32 | #####global variable settings 33 | ''' 34 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # see issue #152 35 | os.environ["CUDA_VISIBLE_DEVICES"]="0,1" 36 | from tensorflow.python.client import device_lib 37 | print(device_lib.list_local_devices()) 38 | ''' 39 | import tensorflow as tf 40 | from keras import backend as K 41 | 42 | USE_GPU_CPU = 1 43 | num_cores = 4 44 | 45 | if not USE_GPU_CPU : 46 | num_GPU = num_cores 47 | num_CPU = 0 48 | else: 49 | num_CPU = 2 50 | num_GPU = 2 51 | 52 | config = tf.ConfigProto(intra_op_parallelism_threads=num_cores,\ 53 | inter_op_parallelism_threads=num_cores, allow_soft_placement=True,\ 54 | device_count = {'CPU' : num_CPU, 'GPU' : num_GPU}) 55 | session = tf.Session(config=config) 56 | K.set_session(session) 57 | 58 | 59 | ######################################################### 60 | pathdata = Path('/project/6002071/yuzhang/HCP/aws_s3_HCP1200/FMRI/') 61 | pathout = '/project/6002071/yuzhang/HCP/' 62 | modality = 'MOTOR' # 'MOTOR' 63 | ###dict for different types of movement 64 | task_contrasts = {"rf": "foot", 65 | "lf": "foot", 66 | "rh": "hand", 67 | "lh": "hand", 68 | "t": "tongue"} 69 | target_name = np.unique(list(task_contrasts.values())) 70 | print(target_name) 71 | 72 | TR = 0.72 73 | nr_thread=5 74 | buffer_size=20 75 | Flag_CNN_Model = '2d' 76 | ######################## 77 | 78 | 79 | def load_fmri_data(pathdata,modality=None,confound_name=None): 80 | ###fMRI decoding: using event signals instead of activation pattern from glm 81 | ##collect task-fMRI signals 82 | 83 | if not modality: 84 | modality = 'MOTOR' # 'MOTOR' 85 | 86 | subjects = [] 87 | fmri_files = [] 88 | confound_files = [] 89 | for fmri_file in sorted(pathdata.glob('tfMRI_'+modality+'_??/*_tfMRI_'+modality+'_??.nii.gz')): 90 | subjects.append(Path(os.path.dirname(fmri_file)).parts[-3]) 91 | fmri_files.append(str(fmri_file)) 92 | 93 | for confound in sorted(pathdata.glob('tfMRI_'+modality+'_??/*_Movement_Regressors.txt')): 94 | confound_files.append(str(confound)) 95 | 96 | print('%d subjects included in the dataset' % len(fmri_files)) 97 | return fmri_files, confound_files, subjects 98 | 99 | 100 | def load_event_files(fmri_files,confound_files,ev_filename=None): 101 | ###collect the event design files 102 | tc_matrix = nib.load(fmri_files[0]) 103 | Subject_Num = len(fmri_files) 104 | Trial_Num = tc_matrix.shape[-1] 105 | print("Data samples including %d subjects with %d trials" % (Subject_Num, Trial_Num)) 106 | 107 | EVS_files = [] 108 | subj = 0 109 | for ev, sub_count in zip(sorted(pathdata.glob('tfMRI_' + modality + '_??/*_combined_events_spm_' + modality + '.csv')),range(Subject_Num)): 110 | ###remove fmri files if the event design is missing 111 | while os.path.dirname(fmri_files[subj]) < os.path.dirname(str(ev)): 112 | print("Event files and fmri data are miss-matching for subject: ") 113 | print(Path(os.path.dirname(str(ev))).parts[-3::2], ':', 114 | Path(os.path.dirname(fmri_files[subj])).parts[-3::2]) 115 | print("Due to missing event files for subject : %s" % os.path.dirname(fmri_files[subj])) 116 | fmri_files[subj] = [] 117 | confound_files[subj] = [] 118 | subj += 1 119 | if subj > Subject_Num: 120 | break 121 | if os.path.dirname(fmri_files[subj]) == os.path.dirname(str(ev)): 122 | EVS_files.append(str(ev)) 123 | subj += 1 124 | 125 | fmri_files = list(filter(None, fmri_files)) 126 | confound_files = list(filter(None, confound_files)) 127 | if len(EVS_files) != len(fmri_files): 128 | print('Miss-matching number of subjects between event:{} and fmri:{} files'.format(len(EVS_files), len(fmri_files))) 129 | 130 | ################################ 131 | ###loading all event designs 132 | if not ev_filename: 133 | ev_filename = "_event_labels_1200R_LR_RL.txt" 134 | 135 | events_all_subjects_file = pathout+modality+ev_filename 136 | if os.path.isfile(events_all_subjects_file): 137 | print('Collecting trial info from file:', events_all_subjects_file) 138 | subjects_trial_labels = pd.read_csv(events_all_subjects_file,sep="\t",encoding="utf8") 139 | ###print(subjects_trial_labels.keys()) 140 | 141 | subjects_trial_label_matrix = subjects_trial_labels.loc[:,'trial1':'trial'+str(Trial_Num)] 142 | sub_name = subjects_trial_labels['subject'] 143 | coding_direct = subjects_trial_labels['coding'] 144 | print(subjects_trial_label_matrix.shape,len(sub_name),len(np.unique(sub_name)),len(coding_direct)) 145 | else: 146 | print('Loading trial info for each task-fmri file and save to csv file:', events_all_subjects_file) 147 | subjects_trial_label_matrix = [] 148 | sub_name = [] 149 | coding_direct = [] 150 | for subj in np.arange(Subject_Num): 151 | pathsub = Path(os.path.dirname(EVS_files[subj])) 152 | sub_name.append(pathsub.parts[-3]) 153 | coding_direct.append(pathsub.parts[-1].split('_')[-1]) 154 | 155 | ##trial info in volume 156 | trial_infos = pd.read_csv(EVS_files[subj],sep="\t",encoding="utf8",header = None,names=['onset','duration','rep','task']) 157 | Onsets = np.ceil((trial_infos.onset/TR)).astype(int) #(trial_infos.onset/TR).astype(int) 158 | Duras = np.ceil((trial_infos.duration/TR)).astype(int) #(trial_infos.duration/TR).astype(int) 159 | Movetypes = trial_infos.task 160 | 161 | labels = ["rest"]*Trial_Num; 162 | for start,dur,move in zip(Onsets,Duras,Movetypes): 163 | for ti in range(start-1,start+dur): 164 | labels[ti]= task_contrasts[move] 165 | subjects_trial_label_matrix.append(labels) 166 | 167 | print(np.array(subjects_trial_label_matrix).shape) 168 | #print(np.array(subjects_trial_label_matrix[0])) 169 | subjects_trial_labels = pd.DataFrame(data=np.array(subjects_trial_label_matrix),columns=['trial'+str(i+1) for i in range(Trial_Num)]) 170 | subjects_trial_labels['subject'] = sub_name 171 | subjects_trial_labels['coding'] = coding_direct 172 | subjects_trial_labels.keys() 173 | #print(subjects_trial_labels['subject'],subjects_trial_labels['coding']) 174 | 175 | ##save the labels 176 | subjects_trial_labels.to_csv(events_all_subjects_file,sep='\t', encoding='utf-8',index=False) 177 | 178 | return subjects_trial_label_matrix, sub_name 179 | 180 | 181 | ############################# 182 | ####################################### 183 | ####tensorpack: multithread 184 | class gen_fmri_file(dataflow.DataFlow): 185 | """ Iterate through fmri filenames, confound filenames and labels 186 | """ 187 | def __init__(self, fmri_files,confound_files, label_matrix,data_type='train',train_percent=0.8): 188 | assert (len(fmri_files) == len(confound_files)) 189 | # self.data=zip(fmri_files,confound_files) 190 | self.fmri_files = fmri_files 191 | self.confound_files = confound_files 192 | self.label_matrix = label_matrix 193 | 194 | self.data_type=data_type 195 | self.train_percent=train_percent 196 | 197 | def size(self): 198 | split_num=int(len(self.fmri_files)*0.8) 199 | if self.data_type=='train': 200 | return split_num 201 | else: 202 | return len(self.fmri_files)-split_num 203 | 204 | def get_data(self): 205 | split_num=int(len(self.fmri_files)*0.8) 206 | if self.data_type=='train': 207 | while True: 208 | rand_pos=np.random.choice(split_num,1)[0] 209 | yield self.fmri_files[rand_pos],self.confound_files[rand_pos],self.label_matrix.iloc[rand_pos] 210 | else: 211 | for pos_ in range(split_num,len(self.fmri_files)): 212 | yield self.fmri_files[pos_],self.confound_files[pos_],self.label_matrix.iloc[pos_] 213 | 214 | 215 | class split_samples(dataflow.DataFlow): 216 | """ Iterate through fmri filenames, confound filenames and labels 217 | """ 218 | def __init__(self, ds): 219 | self.ds=ds 220 | 221 | def size(self): 222 | return 91*284 223 | 224 | def get_data(self): 225 | for data in self.ds.get_data(): 226 | for i in range(data[1].shape[0]): 227 | yield data[0][i],data[1][i] 228 | 229 | 230 | def map_load_fmri_image(dp,target_name): 231 | fmri_file=dp[0] 232 | confound_file=dp[1] 233 | label_trials=dp[2] 234 | 235 | ###remove confound effects 236 | confound = np.loadtxt(confound_file) 237 | fmri_data_clean = image.clean_img(fmri_file, detrend=True, standardize=True, confounds=confound) 238 | 239 | ##pre-select task types 240 | trial_mask = pd.Series(label_trials).isin(target_name) ##['hand', 'foot','tongue'] 241 | fmri_data_cnn = image.index_img(fmri_data_clean, np.where(trial_mask)[0]).get_data() 242 | ###use each slice along z-axis as one sample 243 | label_data_trial = np.array(label_trials.loc[trial_mask]) 244 | le = preprocessing.LabelEncoder() 245 | le.fit(target_name) 246 | label_data_cnn = le.transform(label_data_trial) ##np_utils.to_categorical(): convert label vector to matrix 247 | 248 | img_rows, img_cols, img_deps = fmri_data_cnn.shape[:-1] 249 | fmri_data_cnn_test = np.transpose(fmri_data_cnn.reshape(img_rows, img_cols, np.prod(fmri_data_cnn.shape[2:])), (2, 0, 1)) 250 | label_data_cnn_test = np.repeat(label_data_cnn, img_deps, axis=0).flatten() 251 | print(fmri_file, fmri_data_cnn_test.shape,label_data_cnn_test.shape) 252 | 253 | return fmri_data_cnn_test, label_data_cnn_test 254 | 255 | 256 | def map_load_fmri_image_3d(dp, target_name): 257 | fmri_file = dp[0] 258 | confound_file = dp[1] 259 | label_trials = dp[2] 260 | 261 | ###remove confound effects 262 | confound = np.loadtxt(confound_files[0]) 263 | fmri_data_clean = image.clean_img(fmri_files[0], detrend=True, standardize=True, confounds=confound) 264 | 265 | ##pre-select task types 266 | trial_mask = pd.Series(label_trials).isin(target_name) ##['hand', 'foot','tongue'] 267 | fmri_data_cnn = image.index_img(fmri_data_clean, np.where(trial_mask)[0]).get_data() 268 | ###use each slice along z-axis as one sample 269 | label_data_trial = np.array(label_trials.loc[trial_mask]) 270 | le = preprocessing.LabelEncoder() 271 | le.fit(target_name) 272 | label_data_cnn = le.transform(label_data_trial) ##np_utils.to_categorical(): convert label vector to matrix 273 | 274 | img_rows, img_cols, img_deps = fmri_data_cnn.shape[:-1] 275 | fmri_data_cnn_test = np.transpose(fmri_data_cnn, (3, 0, 1, 2)) 276 | label_data_cnn_test = label_data_cnn.flatten() 277 | print(fmri_file, fmri_data_cnn_test.shape, label_data_cnn_test.shape) 278 | 279 | return fmri_data_cnn_test, label_data_cnn_test 280 | 281 | 282 | def data_pipe(fmri_files,confound_files,label_matrix,target_name=None,batch_size=32,data_type='train', 283 | train_percent=0.8,nr_thread=nr_thread,buffer_size=buffer_size): 284 | assert data_type in ['train', 'val', 'test'] 285 | assert fmri_files is not None 286 | 287 | print('\n\nGenerating dataflow for %s datasets \n' % data_type) 288 | 289 | buffer_size = min(len(fmri_files),buffer_size) 290 | nr_thread = min(len(fmri_files),nr_thread) 291 | 292 | ds0 = gen_fmri_file(fmri_files,confound_files, label_matrix,data_type=data_type,train_percent=train_percent) 293 | print('dataflowSize is ' + str(ds0.size())) 294 | print('Loading data using %d threads with %d buffer_size ... \n' % (nr_thread, buffer_size)) 295 | 296 | if target_name is None: 297 | target_name = np.unique(label_matrix) 298 | 299 | ####running the model 300 | start_time = time.clock() 301 | ds1 = dataflow.MultiThreadMapData( 302 | ds0, nr_thread=nr_thread, 303 | map_func=lambda dp: map_load_fmri_image(dp,target_name), 304 | buffer_size=buffer_size, 305 | strict=True) 306 | 307 | ds1 = dataflow.PrefetchData(ds1, buffer_size,1) 308 | 309 | ds1 = split_samples(ds1) 310 | print('prefetch dataflowSize is ' + str(ds1.size())) 311 | 312 | ds1 = dataflow.LocallyShuffleData(ds1,buffer_size=ds1.size()*buffer_size) 313 | 314 | ds1 = dataflow.BatchData(ds1,batch_size=batch_size) 315 | print('Time Usage of loading data in seconds: {} \n'.format(time.clock() - start_time)) 316 | 317 | ds1 = dataflow.PrefetchDataZMQ(ds1, nr_proc=1) 318 | ds1._reset_once() 319 | ##ds1.reset_state() 320 | 321 | #return ds1.get_data() 322 | for df in ds1.get_data(): 323 | ##print(np.expand_dims(df[0].astype('float32'),axis=3).shape) 324 | yield (np.expand_dims(df[0].astype('float32'),axis=3),to_categorical(df[1].astype('int32'),len(target_name))) 325 | 326 | 327 | def data_pipe_3dcnn(fmri_files, confound_files, label_matrix, target_name=None, flag_cnn='3d', batch_size=32, 328 | data_type='train',train_percent=0.8, nr_thread=nr_thread, buffer_size=buffer_size): 329 | assert data_type in ['train', 'val', 'test'] 330 | assert flag_cnn in ['3d', '2d'] 331 | assert fmri_files is not None 332 | 333 | print('\n\nGenerating dataflow for %s datasets \n' % data_type) 334 | 335 | buffer_size = min(len(fmri_files), buffer_size) 336 | nr_thread = min(len(fmri_files), nr_thread) 337 | 338 | ds0 = gen_fmri_file(fmri_files, confound_files, label_matrix, data_type=data_type, train_percent=train_percent) 339 | print('dataflowSize is ' + str(ds0.size())) 340 | print('Loading data using %d threads with %d buffer_size ... \n' % (nr_thread, buffer_size)) 341 | 342 | if target_name is None: 343 | target_name = np.unique(label_matrix) 344 | 345 | ####running the model 346 | start_time = time.clock() 347 | if flag_cnn == '2d': 348 | ds1 = dataflow.MultiThreadMapData( 349 | ds0, nr_thread=nr_thread, 350 | map_func=lambda dp: map_load_fmri_image(dp, target_name), 351 | buffer_size=buffer_size, 352 | strict=True) 353 | elif flag_cnn == '3d': 354 | ds1 = dataflow.MultiThreadMapData( 355 | ds0, nr_thread=nr_thread, 356 | map_func=lambda dp: map_load_fmri_image_3d(dp, target_name), 357 | buffer_size=buffer_size, 358 | strict=True) 359 | 360 | ds1 = dataflow.PrefetchData(ds1, buffer_size, 1) 361 | 362 | ds1 = split_samples(ds1) 363 | print('prefetch dataflowSize is ' + str(ds1.size())) 364 | 365 | ds1 = dataflow.LocallyShuffleData(ds1, buffer_size=ds1.size() * buffer_size) 366 | 367 | ds1 = dataflow.BatchData(ds1, batch_size=batch_size) 368 | print('Time Usage of loading data in seconds: {} \n'.format(time.clock() - start_time)) 369 | 370 | ds1 = dataflow.PrefetchDataZMQ(ds1, nr_proc=1) 371 | ds1._reset_once() 372 | ##ds1.reset_state() 373 | 374 | ##return ds1.get_data() 375 | 376 | for df in ds1.get_data(): 377 | if flag_cnn == '2d': 378 | yield (np.expand_dims(df[0].astype('float32'), axis=3),to_categorical(df[1].astype('int32'), len(target_name))) 379 | elif flag_cnn == '3d': 380 | yield (np.expand_dims(df[0].astype('float32'), axis=4),to_categorical(df[1].astype('int32'), len(target_name))) 381 | 382 | 383 | ###end of tensorpack: multithread 384 | ############################################################## 385 | 386 | 387 | def plot_history(model_history): 388 | plt.figure() 389 | plt.subplot(121) 390 | plt.plot(model_history.history['acc'], color='r') 391 | plt.plot(model_history.history['val_acc'], color='b') 392 | plt.xlabel('Epochs') 393 | plt.ylabel('Accuracy') 394 | plt.legend(['Training', 'Validation']) 395 | 396 | plt.subplot(122) 397 | plt.plot(model_history.history['loss'], color='r') 398 | plt.plot(model_history.history['val_loss'], color='b') 399 | plt.xlabel('Epochs') 400 | plt.ylabel('Loss Function') 401 | plt.legend(['Training', 'Validation']) 402 | return None 403 | 404 | 405 | def build_cnn_model(input_shape, Nlabels, filters=32, convsize=3, poolsize=2, hidden_size=128, conv_layers=4): 406 | # import keras.backend as K 407 | # if K.image_data_format() == 'channels_first': 408 | # img_shape = (1,img_rows,img_cols) 409 | # elif K.image_data_format() == 'channels_last': 410 | # img_shape = (img_rows,img_cols,1) 411 | 412 | 413 | input0 = Input(shape=input_shape) 414 | drop1 = input0 415 | for li in range(conv_layers): 416 | conv1 = Conv2D(filters, (convsize, convsize), padding='same', activation='relu')(drop1) 417 | conv1 = BatchNormalization()(conv1) 418 | conv1 = Conv2D(filters, (convsize, convsize), padding='same', activation='relu')(conv1) 419 | conv1 = BatchNormalization()(conv1) 420 | pool1 = MaxPooling2D((poolsize, poolsize))(conv1) 421 | drop1 = Dropout(0.25)(pool1) 422 | if (li+1) % 2 == 0: 423 | filters *= 2 424 | 425 | drop2 = drop1 426 | avg1 = AveragePooling2D(pool_size=(5, 5))(drop2) 427 | flat = Flatten()(avg1) 428 | hidden = Dense(hidden_size, activation='relu')(flat) 429 | drop3 = Dropout(0.5)(hidden) 430 | # hidden = Dense((hidden_size/4).astype(int), activation='relu')(drop3) 431 | # drop4 = Dropout(0.5)(hidden) 432 | 433 | out = Dense(Nlabels, activation='softmax')(drop3) 434 | 435 | model = Model(inputs=input0, outputs=out) 436 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 437 | model.summary() 438 | 439 | return model 440 | 441 | 442 | def build_cnn3d_model(input_shape, Nlabels, filters=32, convsize=3, poolsize=2, hidden_size=128, conv_layers=4): 443 | # import keras.backend as K 444 | # if K.image_data_format() == 'channels_first': 445 | # img_shape = (1,img_rows,img_cols,img_deps) 446 | # elif K.image_data_format() == 'channels_last': 447 | # img_shape = (img_rows,img_cols, img_deps,1) 448 | 449 | input0 = Input(shape=input_shape) 450 | drop1 = input0 451 | for li in range(conv_layers): 452 | conv1 = Conv3D(filters, (convsize, convsize, convsize), padding='same', activation='relu')(drop1) 453 | conv1 = BatchNormalization()(conv1) 454 | conv1 = Conv3D(filters, (convsize, convsize, convsize), padding='same', activation='relu')(conv1) 455 | conv1 = BatchNormalization()(conv1) 456 | pool1 = MaxPooling3D((poolsize, poolsize, poolsize))(conv1) 457 | drop1 = Dropout(0.25)(pool1) 458 | if (li+1) % 2 == 0: 459 | filters *= 2 460 | 461 | drop2 = drop1 462 | avg1 = AveragePooling3D(pool_size=(5, 5, 5))(drop2) 463 | flat = Flatten()(avg1) 464 | hidden = Dense(hidden_size, activation='relu')(flat) 465 | drop3 = Dropout(0.5)(hidden) 466 | 467 | out = Dense(Nlabels, activation='softmax')(drop3) 468 | 469 | model = Model(inputs=input0, outputs=out) 470 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 471 | model.summary() 472 | 473 | return model 474 | 475 | 476 | ##################### 477 | ##### 478 | if __name__ == '__main__': 479 | 480 | fmri_files, confound_files, subjects = load_fmri_data(pathdata,modality) 481 | print('including %d fmri files and %d confounds files \n\n' % (len(fmri_files), len(confound_files))) 482 | 483 | label_matrix, sub_name = load_event_files(fmri_files,confound_files) 484 | print('Collecting event design files for subjects and saved into matrix ...' , np.array(label_matrix).shape) 485 | 486 | nb_class = len(target_name) 487 | tc_matrix = nib.load(fmri_files[0]) 488 | img_rows, img_cols, img_deps = tc_matrix.shape[:-1] 489 | img_shape = [] 490 | if Flag_CNN_Model == '2d': 491 | if K.image_data_format() == 'channels_first': 492 | img_shape = (1, img_rows, img_cols) 493 | elif K.image_data_format() == 'channels_last': 494 | img_shape = (img_rows, img_cols, 1) 495 | elif Flag_CNN_Model == '3d': 496 | if K.image_data_format() == 'channels_first': 497 | img_shape = (1, img_rows, img_cols, img_deps) 498 | elif K.image_data_format() == 'channels_last': 499 | img_shape = (img_rows, img_cols, img_deps, 1) 500 | 501 | ######################################### 502 | ''' 503 | ##test whether dataflow from tensorpack works 504 | test_sub_num = 1000 505 | tst = data_pipe_3dcnn(fmri_files[:test_sub_num], confound_files[:test_sub_num], label_matrix.iloc[:test_sub_num], 506 | target_name=target_name, flag_cnn=Flag_CNN_Model, batch_size=16, data_type='train', buffer_size=5) 507 | out = next(tst) 508 | print(out[0].shape) 509 | print(out[1].shape) 510 | ''' 511 | #################### 512 | #####start 2dcnn model 513 | test_sub_num = len(fmri_files) 514 | ##xx = data_pipe(fmri_files,confound_files,label_matrix,target_name=target_name) 515 | train_gen = data_pipe(fmri_files[:test_sub_num],confound_files[:test_sub_num],label_matrix.iloc[:test_sub_num], 516 | target_name=target_name,batch_size=32,data_type='train',nr_thread=4, buffer_size=20) 517 | val_set = data_pipe(fmri_files[:test_sub_num],confound_files[:test_sub_num],label_matrix.iloc[:test_sub_num], 518 | target_name=target_name,batch_size=32,data_type='test',nr_thread=2, buffer_size=20) 519 | 520 | ''' 521 | ######################################### 522 | test_sub_num = len(fmri_files) 523 | ######start cnn model 524 | train_gen = data_pipe_3dcnn(fmri_files[:test_sub_num], confound_files[:test_sub_num],label_matrix.iloc[:test_sub_num], 525 | target_name=target_name, flag_cnn=Flag_CNN_Model, 526 | batch_size=32, data_type='train', nr_thread=4, buffer_size=20) 527 | val_set = data_pipe_3dcnn(fmri_files[:test_sub_num], confound_files[:test_sub_num],label_matrix.iloc[:test_sub_num], 528 | target_name=target_name, flag_cnn=Flag_CNN_Model, 529 | batch_size=32, data_type='test', nr_thread=2, buffer_size=20) 530 | ''' 531 | 532 | if Flag_CNN_Model == '2d': 533 | print('\nTraining the model using 2d-CNN \n') 534 | model_test = build_cnn_model(img_shape, nb_class) 535 | elif Flag_CNN_Model == '3d': 536 | print('\nTraining the model using 3d-CNN \n') 537 | model_test = build_cnn3d_model(img_shape, nb_class) 538 | 539 | ######start training the model 540 | model_test_history = model_test.fit_generator(train_gen, epochs=20, steps_per_epoch=100, verbose=1, shuffle=True) 541 | #validation_data=val_set,validation_steps=10, 542 | #workers=1, use_multiprocessing=False, shuffle=True) 543 | print(model_test_history.history) 544 | for key,val in model_test_history.history.items(): 545 | print(key, val) 546 | 547 | scores = model_test.evaluate_generator(val_set, validation_steps=100, workers=1, shuffle=False) 548 | print(scores) 549 | 550 | import pickle 551 | logfilename = pathout+'train_val_scores_dump2.txt' 552 | if os.path.isfile(logfilename): 553 | logfilename = logfilename.split('.')[0] + '2.txt' 554 | file = open(logfilename, 'w') 555 | pickle.dump(model_test_history.history, file) 556 | file.close() 557 | 558 | ''' 559 | from tensorpack import * 560 | from tensorpack.tfutils import summary 561 | from tensorpack.dataflow import dataset 562 | 563 | class Model(ModelDesc): 564 | def inputs(self,image_shape): 565 | """ 566 | Define all the inputs (with type, shape, name) that the graph will need. 567 | """ 568 | return [tf.placeholder(tf.float32, (None, image_shape.rval()), 'input'), 569 | tf.placeholder(tf.int32, (None,), 'label')] 570 | 571 | def build_graph(self, image, label): 572 | """This function should build the model which takes the input variables 573 | and return cost at the end""" 574 | 575 | # In tensorflow, inputs to convolution function are assumed to be 576 | # NHWC. Add a single channel here. 577 | image = tf.expand_dims(image, 3) 578 | 579 | image = image * 2 - 1 # center the pixels values at zero 580 | # The context manager `argscope` sets the default option for all the layers under 581 | # this context. Here we use 32 channel convolution with shape 3x3 582 | with argscope(Conv2D, kernel_size=3, activation=tf.nn.relu, filters=32): 583 | logits = (LinearWrap(image) 584 | .Conv2D('conv0') 585 | .MaxPooling('pool0', 2) 586 | .Conv2D('conv1') 587 | .Conv2D('conv2') 588 | .MaxPooling('pool1', 2) 589 | .Conv2D('conv3') 590 | .FullyConnected('fc0', 512, activation=tf.nn.relu) 591 | .Dropout('dropout', rate=0.5) 592 | .FullyConnected('fc1', 10, activation=tf.identity)()) 593 | 594 | tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities 595 | 596 | # a vector of length B with loss of each sample 597 | cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) 598 | cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss 599 | 600 | correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct') 601 | accuracy = tf.reduce_mean(correct, name='accuracy') 602 | 603 | # This will monitor training error (in a moving_average fashion): 604 | # 1. write the value to tensosrboard 605 | # 2. write the value to stat.json 606 | # 3. print the value after each epoch 607 | train_error = tf.reduce_mean(1 - correct, name='train_error') 608 | summary.add_moving_summary(train_error, accuracy) 609 | 610 | # Use a regex to find parameters to apply weight decay. 611 | # Here we apply a weight decay on all W (weight matrix) of all fc layers 612 | wd_cost = tf.multiply(1e-5, 613 | regularize_cost('fc.*/W', tf.nn.l2_loss), 614 | name='regularize_loss') 615 | total_cost = tf.add_n([wd_cost, cost], name='total_cost') 616 | summary.add_moving_summary(cost, wd_cost, total_cost) 617 | 618 | # monitor histogram of all weight (of conv and fc layers) in tensorboard 619 | summary.add_param_summary(('.*/W', ['histogram', 'rms'])) 620 | return total_cost 621 | 622 | def get_config(dataset_train,dataset_test): 623 | # How many iterations you want in each epoch. 624 | # This is the default value, don't actually need to set it in the config 625 | steps_per_epoch = dataset_train.size() 626 | 627 | # get the config which contains everything necessary in a training 628 | return TrainConfig( 629 | model=Model(), 630 | dataflow=dataset_train, # the DataFlow instance for training 631 | callbacks=[ 632 | ModelSaver(), # save the model after every epoch 633 | MaxSaver('validation_accuracy'), # save the model with highest accuracy (prefix 'validation_') 634 | InferenceRunner( # run inference(for validation) after every epoch 635 | dataset_test, # the DataFlow instance used for validation 636 | ScalarStats(['cross_entropy_loss', 'accuracy'])), 637 | ], 638 | steps_per_epoch=steps_per_epoch, 639 | max_epoch=100, 640 | ) 641 | 642 | ##main function 643 | config = get_config() 644 | launch_train_with_config(config, SimpleTrainer()) 645 | ''' 646 | -------------------------------------------------------------------------------- /HCP_fmripredict/cnn_graph_codes/LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Michaël Defferrard 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /HCP_fmripredict/cnn_graph_codes/README.md: -------------------------------------------------------------------------------- 1 | # Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering 2 | 3 | The code in this repository implements an efficient generalization of the 4 | popular Convolutional Neural Networks (CNNs) to arbitrary graphs, presented in 5 | our paper: 6 | 7 | Michaël Defferrard, Xavier Bresson, Pierre Vandergheynst, [Convolutional Neural 8 | Networks on Graphs with Fast Localized Spectral Filtering][arXiv], Neural 9 | Information Processing Systems (NIPS), 2016. 10 | 11 | Additional material: 12 | * [NIPS2016 spotlight video][video], 2016-11-22. 13 | * [Deep Learning on Graphs][slides_ntds], a lecture for EPFL's master course [A 14 | Network Tour of Data Science][ntds], 2016-12-21. 15 | * [Deep Learning on Graphs][slides_dlid], an invited talk at the [Deep Learning on 16 | Irregular Domains][dlid] workshop of BMVC, 2017-09-17. 17 | 18 | [video]: https://www.youtube.com/watch?v=cIA_m7vwOVQ 19 | [slides_ntds]: https://doi.org/10.6084/m9.figshare.4491686 20 | [ntds]: https://github.com/mdeff/ntds_2016 21 | [slides_dlid]: https://doi.org/10.6084/m9.figshare.5394805 22 | [dlid]: http://dlid.swansea.ac.uk 23 | 24 | There is also implementations of the filters used in: 25 | * Joan Bruna, Wojciech Zaremba, Arthur Szlam, Yann LeCun, [Spectral Networks 26 | and Locally Connected Networks on Graphs][bruna], International Conference on 27 | Learning Representations (ICLR), 2014. 28 | * Mikael Henaff, Joan Bruna and Yann LeCun, [Deep Convolutional Networks on 29 | Graph-Structured Data][henaff], arXiv, 2015. 30 | 31 | [arXiv]: https://arxiv.org/abs/1606.09375 32 | [bruna]: https://arxiv.org/abs/1312.6203 33 | [henaff]: https://arxiv.org/abs/1506.05163 34 | 35 | ## Installation 36 | 37 | 1. Clone this repository. 38 | ```sh 39 | git clone https://github.com/mdeff/cnn_graph 40 | cd cnn_graph 41 | ``` 42 | 43 | 2. Install the dependencies. The code should run with TensorFlow 1.0 and newer. 44 | ```sh 45 | pip install -r requirements.txt # or make install 46 | ``` 47 | 48 | 3. Play with the Jupyter notebooks. 49 | ```sh 50 | jupyter notebook 51 | ``` 52 | 53 | ## Reproducing our results 54 | 55 | Run all the notebooks to reproduce the experiments on 56 | [MNIST](nips2016/mnist.ipynb) and [20NEWS](nips2016/20news.ipynb) presented in 57 | the paper. 58 | ```sh 59 | cd nips2016 60 | make 61 | ``` 62 | 63 | ## Using the model 64 | 65 | To use our graph ConvNet on your data, you need: 66 | 67 | 1. a data matrix where each row is a sample and each column is a feature, 68 | 2. a target vector, 69 | 3. optionally, an adjacency matrix which encodes the structure as a graph. 70 | 71 | See the [usage notebook][usage] for a simple example with fabricated data. 72 | Please get in touch if you are unsure about applying the model to a different 73 | setting. 74 | 75 | [usage]: http://nbviewer.jupyter.org/github/mdeff/cnn_graph/blob/outputs/usage.ipynb 76 | 77 | ## License & co 78 | 79 | The code in this repository is released under the terms of the [MIT license](LICENSE.txt). 80 | Please cite our [paper][arXiv] if you use it. 81 | 82 | ``` 83 | @inproceedings{cnn_graph, 84 | title = {Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering}, 85 | author = {Defferrard, Micha\"el and Bresson, Xavier and Vandergheynst, Pierre}, 86 | booktitle = {Advances in Neural Information Processing Systems}, 87 | year = {2016}, 88 | url = {https://arxiv.org/abs/1606.09375}, 89 | } 90 | ``` 91 | -------------------------------------------------------------------------------- /HCP_fmripredict/cnn_graph_codes/lib/coarsening.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse 3 | 4 | 5 | def coarsen(A, levels, self_connections=False): 6 | """ 7 | Coarsen a graph, represented by its adjacency matrix A, at multiple 8 | levels. 9 | """ 10 | graphs, parents = metis(A, levels) 11 | perms = compute_perm(parents) 12 | 13 | for i, A in enumerate(graphs): 14 | M, M = A.shape 15 | 16 | if not self_connections: 17 | A = A.tocoo() 18 | A.setdiag(0) 19 | 20 | if i < levels: 21 | A = perm_adjacency(A, perms[i]) 22 | 23 | A = A.tocsr() 24 | A.eliminate_zeros() 25 | graphs[i] = A 26 | 27 | Mnew, Mnew = A.shape 28 | print('Layer {0}: M_{0} = |V| = {1} nodes ({2} added),' 29 | '|E| = {3} edges'.format(i, Mnew, Mnew-M, A.nnz//2)) 30 | 31 | return graphs, perms[0] if levels > 0 else None 32 | 33 | 34 | def metis(W, levels, rid=None): 35 | """ 36 | Coarsen a graph multiple times using the METIS algorithm. 37 | 38 | INPUT 39 | W: symmetric sparse weight (adjacency) matrix 40 | levels: the number of coarsened graphs 41 | 42 | OUTPUT 43 | graph[0]: original graph of size N_1 44 | graph[2]: coarser graph of size N_2 < N_1 45 | graph[levels]: coarsest graph of Size N_levels < ... < N_2 < N_1 46 | parents[i] is a vector of size N_i with entries ranging from 1 to N_{i+1} 47 | which indicate the parents in the coarser graph[i+1] 48 | nd_sz{i} is a vector of size N_i that contains the size of the supernode in the graph{i} 49 | 50 | NOTE 51 | if "graph" is a list of length k, then "parents" will be a list of length k-1 52 | """ 53 | 54 | N, N = W.shape 55 | if rid is None: 56 | rid = np.random.permutation(range(N)) 57 | parents = [] 58 | degree = W.sum(axis=0) - W.diagonal() 59 | graphs = [] 60 | graphs.append(W) 61 | #supernode_size = np.ones(N) 62 | #nd_sz = [supernode_size] 63 | #count = 0 64 | 65 | #while N > maxsize: 66 | for _ in range(levels): 67 | 68 | #count += 1 69 | 70 | # CHOOSE THE WEIGHTS FOR THE PAIRING 71 | # weights = ones(N,1) # metis weights 72 | weights = degree # graclus weights 73 | # weights = supernode_size # other possibility 74 | weights = np.array(weights).squeeze() 75 | 76 | # PAIR THE VERTICES AND CONSTRUCT THE ROOT VECTOR 77 | idx_row, idx_col, val = scipy.sparse.find(W) 78 | perm = np.argsort(idx_row) 79 | rr = idx_row[perm] 80 | cc = idx_col[perm] 81 | vv = val[perm] 82 | cluster_id = metis_one_level(rr,cc,vv,rid,weights) # rr is ordered 83 | parents.append(cluster_id) 84 | 85 | # TO DO 86 | # COMPUTE THE SIZE OF THE SUPERNODES AND THEIR DEGREE 87 | #supernode_size = full( sparse(cluster_id, ones(N,1) , supernode_size ) ) 88 | #print(cluster_id) 89 | #print(supernode_size) 90 | #nd_sz{count+1}=supernode_size; 91 | 92 | # COMPUTE THE EDGES WEIGHTS FOR THE NEW GRAPH 93 | nrr = cluster_id[rr] 94 | ncc = cluster_id[cc] 95 | nvv = vv 96 | Nnew = cluster_id.max() + 1 97 | # CSR is more appropriate: row,val pairs appear multiple times 98 | W = scipy.sparse.csr_matrix((nvv,(nrr,ncc)), shape=(Nnew,Nnew)) 99 | W.eliminate_zeros() 100 | # Add new graph to the list of all coarsened graphs 101 | graphs.append(W) 102 | N, N = W.shape 103 | 104 | # COMPUTE THE DEGREE (OMIT OR NOT SELF LOOPS) 105 | degree = W.sum(axis=0) 106 | #degree = W.sum(axis=0) - W.diagonal() 107 | 108 | # CHOOSE THE ORDER IN WHICH VERTICES WILL BE VISTED AT THE NEXT PASS 109 | #[~, rid]=sort(ss); # arthur strategy 110 | #[~, rid]=sort(supernode_size); # thomas strategy 111 | #rid=randperm(N); # metis/graclus strategy 112 | ss = np.array(W.sum(axis=0)).squeeze() 113 | rid = np.argsort(ss) 114 | 115 | return graphs, parents 116 | 117 | 118 | # Coarsen a graph given by rr,cc,vv. rr is assumed to be ordered 119 | def metis_one_level(rr,cc,vv,rid,weights): 120 | 121 | nnz = rr.shape[0] 122 | N = rr[nnz-1] + 1 123 | 124 | marked = np.zeros(N, np.bool) 125 | rowstart = np.zeros(N, np.int32) 126 | rowlength = np.zeros(N, np.int32) 127 | cluster_id = np.zeros(N, np.int32) 128 | 129 | oldval = rr[0] 130 | count = 0 131 | clustercount = 0 132 | 133 | for ii in range(nnz): 134 | rowlength[count] = rowlength[count] + 1 135 | if rr[ii] > oldval: 136 | oldval = rr[ii] 137 | rowstart[count+1] = ii 138 | count = count + 1 139 | 140 | for ii in range(N): 141 | tid = rid[ii] 142 | if not marked[tid]: 143 | wmax = 0.0 144 | rs = rowstart[tid] 145 | marked[tid] = True 146 | bestneighbor = -1 147 | for jj in range(rowlength[tid]): 148 | nid = cc[rs+jj] 149 | if marked[nid]: 150 | tval = 0.0 151 | else: 152 | tval = vv[rs+jj] * (1.0/weights[tid] + 1.0/weights[nid]) 153 | if tval > wmax: 154 | wmax = tval 155 | bestneighbor = nid 156 | 157 | cluster_id[tid] = clustercount 158 | 159 | if bestneighbor > -1: 160 | cluster_id[bestneighbor] = clustercount 161 | marked[bestneighbor] = True 162 | 163 | clustercount += 1 164 | 165 | return cluster_id 166 | 167 | def compute_perm(parents): 168 | """ 169 | Return a list of indices to reorder the adjacency and data matrices so 170 | that the union of two neighbors from layer to layer forms a binary tree. 171 | """ 172 | 173 | # Order of last layer is random (chosen by the clustering algorithm). 174 | indices = [] 175 | if len(parents) > 0: 176 | M_last = max(parents[-1]) + 1 177 | indices.append(list(range(M_last))) 178 | 179 | for parent in parents[::-1]: 180 | #print('parent: {}'.format(parent)) 181 | 182 | # Fake nodes go after real ones. 183 | pool_singeltons = len(parent) 184 | 185 | indices_layer = [] 186 | for i in indices[-1]: 187 | indices_node = list(np.where(parent == i)[0]) 188 | assert 0 <= len(indices_node) <= 2 189 | #print('indices_node: {}'.format(indices_node)) 190 | 191 | # Add a node to go with a singelton. 192 | if len(indices_node) is 1: 193 | indices_node.append(pool_singeltons) 194 | pool_singeltons += 1 195 | #print('new singelton: {}'.format(indices_node)) 196 | # Add two nodes as children of a singelton in the parent. 197 | elif len(indices_node) is 0: 198 | indices_node.append(pool_singeltons+0) 199 | indices_node.append(pool_singeltons+1) 200 | pool_singeltons += 2 201 | #print('singelton childrens: {}'.format(indices_node)) 202 | 203 | indices_layer.extend(indices_node) 204 | indices.append(indices_layer) 205 | 206 | # Sanity checks. 207 | for i,indices_layer in enumerate(indices): 208 | M = M_last*2**i 209 | # Reduction by 2 at each layer (binary tree). 210 | assert len(indices[0] == M) 211 | # The new ordering does not omit an indice. 212 | assert sorted(indices_layer) == list(range(M)) 213 | 214 | return indices[::-1] 215 | 216 | assert (compute_perm([np.array([4,1,1,2,2,3,0,0,3]),np.array([2,1,0,1,0])]) 217 | == [[3,4,0,9,1,2,5,8,6,7,10,11],[2,4,1,3,0,5],[0,1,2]]) 218 | 219 | def perm_data(x, indices): 220 | """ 221 | Permute data matrix, i.e. exchange node ids, 222 | so that binary unions form the clustering tree. 223 | """ 224 | if indices is None: 225 | return x 226 | 227 | N, M = x.shape 228 | Mnew = len(indices) 229 | assert Mnew >= M 230 | xnew = np.empty((N, Mnew)) 231 | for i,j in enumerate(indices): 232 | # Existing vertex, i.e. real data. 233 | if j < M: 234 | xnew[:,i] = x[:,j] 235 | # Fake vertex because of singeltons. 236 | # They will stay 0 so that max pooling chooses the singelton. 237 | # Or -infty ? 238 | else: 239 | xnew[:,i] = np.zeros(N) 240 | return xnew 241 | 242 | def perm_adjacency(A, indices): 243 | """ 244 | Permute adjacency matrix, i.e. exchange node ids, 245 | so that binary unions form the clustering tree. 246 | """ 247 | if indices is None: 248 | return A 249 | 250 | M, M = A.shape 251 | Mnew = len(indices) 252 | assert Mnew >= M 253 | A = A.tocoo() 254 | 255 | # Add Mnew - M isolated vertices. 256 | if Mnew > M: 257 | rows = scipy.sparse.coo_matrix((Mnew-M, M), dtype=np.float32) 258 | cols = scipy.sparse.coo_matrix((Mnew, Mnew-M), dtype=np.float32) 259 | A = scipy.sparse.vstack([A, rows]) 260 | A = scipy.sparse.hstack([A, cols]) 261 | 262 | # Permute the rows and the columns. 263 | perm = np.argsort(indices) 264 | A.row = np.array(perm)[A.row] 265 | A.col = np.array(perm)[A.col] 266 | 267 | # assert np.abs(A - A.T).mean() < 1e-9 268 | assert type(A) is scipy.sparse.coo.coo_matrix 269 | return A 270 | -------------------------------------------------------------------------------- /HCP_fmripredict/cnn_graph_codes/lib/graph.py: -------------------------------------------------------------------------------- 1 | import sklearn.metrics 2 | import sklearn.neighbors 3 | import matplotlib.pyplot as plt 4 | import scipy.sparse 5 | import scipy.sparse.linalg 6 | import scipy.spatial.distance 7 | import numpy as np 8 | 9 | 10 | def grid(m, dtype=np.float32): 11 | """Return the embedding of a grid graph.""" 12 | M = m**2 13 | x = np.linspace(0, 1, m, dtype=dtype) 14 | y = np.linspace(0, 1, m, dtype=dtype) 15 | xx, yy = np.meshgrid(x, y) 16 | z = np.empty((M, 2), dtype) 17 | z[:, 0] = xx.reshape(M) 18 | z[:, 1] = yy.reshape(M) 19 | return z 20 | 21 | 22 | def distance_scipy_spatial(z, k=4, metric='euclidean'): 23 | """Compute exact pairwise distances.""" 24 | d = scipy.spatial.distance.pdist(z, metric) 25 | d = scipy.spatial.distance.squareform(d) 26 | # k-NN graph. 27 | idx = np.argsort(d)[:, 1:k+1] 28 | d.sort() 29 | d = d[:, 1:k+1] 30 | return d, idx 31 | 32 | 33 | def distance_sklearn_metrics(z, k=4, metric='euclidean'): 34 | """Compute exact pairwise distances.""" 35 | d = sklearn.metrics.pairwise.pairwise_distances( 36 | z, metric=metric, n_jobs=-2) 37 | # k-NN graph. 38 | idx = np.argsort(d)[:, 1:k+1] 39 | d.sort() 40 | d = d[:, 1:k+1] 41 | return d, idx 42 | 43 | 44 | def distance_lshforest(z, k=4, metric='cosine'): 45 | """Return an approximation of the k-nearest cosine distances.""" 46 | assert metric is 'cosine' 47 | lshf = sklearn.neighbors.LSHForest() 48 | lshf.fit(z) 49 | dist, idx = lshf.kneighbors(z, n_neighbors=k+1) 50 | assert dist.min() < 1e-10 51 | dist[dist < 0] = 0 52 | return dist, idx 53 | 54 | # TODO: other ANNs s.a. NMSLIB, EFANNA, FLANN, Annoy, sklearn neighbors, PANN 55 | 56 | 57 | def adjacency(dist, idx): 58 | """Return the adjacency matrix of a kNN graph.""" 59 | M, k = dist.shape 60 | assert M, k == idx.shape 61 | assert dist.min() >= 0 62 | 63 | # Weights. 64 | sigma2 = np.mean(dist[:, -1])**2 65 | dist = np.exp(- dist**2 / sigma2) 66 | 67 | # Weight matrix. 68 | I = np.arange(0, M).repeat(k) 69 | J = idx.reshape(M*k) 70 | V = dist.reshape(M*k) 71 | W = scipy.sparse.coo_matrix((V, (I, J)), shape=(M, M)) 72 | 73 | # No self-connections. 74 | W.setdiag(0) 75 | 76 | # Non-directed graph. 77 | bigger = W.T > W 78 | W = W - W.multiply(bigger) + W.T.multiply(bigger) 79 | 80 | assert W.nnz % 2 == 0 81 | assert np.abs(W - W.T).mean() < 1e-10 82 | assert type(W) is scipy.sparse.csr.csr_matrix 83 | return W 84 | 85 | 86 | def replace_random_edges(A, noise_level): 87 | """Replace randomly chosen edges by random edges.""" 88 | M, M = A.shape 89 | n = int(noise_level * A.nnz // 2) 90 | 91 | indices = np.random.permutation(A.nnz//2)[:n] 92 | rows = np.random.randint(0, M, n) 93 | cols = np.random.randint(0, M, n) 94 | vals = np.random.uniform(0, 1, n) 95 | assert len(indices) == len(rows) == len(cols) == len(vals) 96 | 97 | A_coo = scipy.sparse.triu(A, format='coo') 98 | #assert A_coo.nnz == A.nnz // 2 99 | assert A_coo.nnz >= n 100 | A = A.tolil() 101 | 102 | for idx, row, col, val in zip(indices, rows, cols, vals): 103 | old_row = A_coo.row[idx] 104 | old_col = A_coo.col[idx] 105 | 106 | A[old_row, old_col] = 0 107 | A[old_col, old_row] = 0 108 | A[row, col] = 1 109 | A[col, row] = 1 110 | 111 | A.setdiag(0) 112 | A = A.tocsr() 113 | A.eliminate_zeros() 114 | return A 115 | 116 | 117 | def laplacian(W, normalized=True): 118 | """Return the Laplacian of the weigth matrix.""" 119 | 120 | # Degree matrix. 121 | d = W.sum(axis=0) 122 | 123 | # Laplacian matrix. 124 | if not normalized: 125 | D = scipy.sparse.diags(d.A.squeeze(), 0) 126 | L = D - W 127 | else: 128 | d += np.spacing(np.array(0, W.dtype)) 129 | d = 1 / np.sqrt(d) 130 | D = scipy.sparse.diags(d.A.squeeze(), 0) 131 | I = scipy.sparse.identity(d.size, dtype=W.dtype) 132 | L = I - D * W * D 133 | 134 | # assert np.abs(L - L.T).mean() < 1e-9 135 | assert type(L) is scipy.sparse.csr.csr_matrix 136 | return L 137 | 138 | 139 | def lmax(L, normalized=True): 140 | """Upper-bound on the spectrum.""" 141 | if normalized: 142 | return 2 143 | else: 144 | return scipy.sparse.linalg.eigsh( 145 | L, k=1, which='LM', return_eigenvectors=False)[0] 146 | 147 | 148 | def fourier(L, algo='eigh', k=1): 149 | """Return the Fourier basis, i.e. the EVD of the Laplacian.""" 150 | 151 | def sort(lamb, U): 152 | idx = lamb.argsort() 153 | return lamb[idx], U[:, idx] 154 | 155 | if algo is 'eig': 156 | lamb, U = np.linalg.eig(L.toarray()) 157 | lamb, U = sort(lamb, U) 158 | elif algo is 'eigh': 159 | lamb, U = np.linalg.eigh(L.toarray()) 160 | elif algo is 'eigs': 161 | lamb, U = scipy.sparse.linalg.eigs(L, k=k, which='SM') 162 | lamb, U = sort(lamb, U) 163 | elif algo is 'eigsh': 164 | lamb, U = scipy.sparse.linalg.eigsh(L, k=k, which='SM') 165 | 166 | return lamb, U 167 | 168 | 169 | def plot_spectrum(L, algo='eig'): 170 | """Plot the spectrum of a list of multi-scale Laplacians L.""" 171 | # Algo is eig to be sure to get all eigenvalues. 172 | plt.figure(figsize=(17, 5)) 173 | for i, lap in enumerate(L): 174 | lamb, U = fourier(lap, algo) 175 | step = 2**i 176 | x = range(step//2, L[0].shape[0], step) 177 | lb = 'L_{} spectrum in [{:1.2e}, {:1.2e}]'.format(i, lamb[0], lamb[-1]) 178 | plt.plot(x, lamb, '.', label=lb) 179 | plt.legend(loc='best') 180 | plt.xlim(0, L[0].shape[0]) 181 | plt.ylim(ymin=0) 182 | 183 | 184 | def lanczos(L, X, K): 185 | """ 186 | Given the graph Laplacian and a data matrix, return a data matrix which can 187 | be multiplied by the filter coefficients to filter X using the Lanczos 188 | polynomial approximation. 189 | """ 190 | M, N = X.shape 191 | assert L.dtype == X.dtype 192 | 193 | def basis(L, X, K): 194 | """ 195 | Lanczos algorithm which computes the orthogonal matrix V and the 196 | tri-diagonal matrix H. 197 | """ 198 | a = np.empty((K, N), L.dtype) 199 | b = np.zeros((K, N), L.dtype) 200 | V = np.empty((K, M, N), L.dtype) 201 | V[0, ...] = X / np.linalg.norm(X, axis=0) 202 | for k in range(K-1): 203 | W = L.dot(V[k, ...]) 204 | a[k, :] = np.sum(W * V[k, ...], axis=0) 205 | W = W - a[k, :] * V[k, ...] - ( 206 | b[k, :] * V[k-1, ...] if k > 0 else 0) 207 | b[k+1, :] = np.linalg.norm(W, axis=0) 208 | V[k+1, ...] = W / b[k+1, :] 209 | a[K-1, :] = np.sum(L.dot(V[K-1, ...]) * V[K-1, ...], axis=0) 210 | return V, a, b 211 | 212 | def diag_H(a, b, K): 213 | """Diagonalize the tri-diagonal H matrix.""" 214 | H = np.zeros((K*K, N), a.dtype) 215 | H[:K**2:K+1, :] = a 216 | H[1:(K-1)*K:K+1, :] = b[1:, :] 217 | H.shape = (K, K, N) 218 | Q = np.linalg.eigh(H.T, UPLO='L')[1] 219 | Q = np.swapaxes(Q, 1, 2).T 220 | return Q 221 | 222 | V, a, b = basis(L, X, K) 223 | Q = diag_H(a, b, K) 224 | Xt = np.empty((K, M, N), L.dtype) 225 | for n in range(N): 226 | Xt[..., n] = Q[..., n].T.dot(V[..., n]) 227 | Xt *= Q[0, :, np.newaxis, :] 228 | Xt *= np.linalg.norm(X, axis=0) 229 | return Xt # Q[0, ...] 230 | 231 | 232 | def rescale_L(L, lmax=2): 233 | """Rescale the Laplacian eigenvalues in [-1,1].""" 234 | M, M = L.shape 235 | I = scipy.sparse.identity(M, format='csr', dtype=L.dtype) 236 | L /= lmax / 2 237 | L -= I 238 | return L 239 | 240 | 241 | def chebyshev(L, X, K): 242 | """Return T_k X where T_k are the Chebyshev polynomials of order up to K. 243 | Complexity is O(KMN).""" 244 | M, N = X.shape 245 | assert L.dtype == X.dtype 246 | 247 | # L = rescale_L(L, lmax) 248 | # Xt = T @ X: MxM @ MxN. 249 | Xt = np.empty((K, M, N), L.dtype) 250 | # Xt_0 = T_0 X = I X = X. 251 | Xt[0, ...] = X 252 | # Xt_1 = T_1 X = L X. 253 | if K > 1: 254 | Xt[1, ...] = L.dot(X) 255 | # Xt_k = 2 L Xt_k-1 - Xt_k-2. 256 | for k in range(2, K): 257 | Xt[k, ...] = 2 * L.dot(Xt[k-1, ...]) - Xt[k-2, ...] 258 | return Xt 259 | -------------------------------------------------------------------------------- /HCP_fmripredict/cnn_graph_codes/lib/utils.py: -------------------------------------------------------------------------------- 1 | import gensim 2 | import sklearn, sklearn.datasets 3 | import sklearn.naive_bayes, sklearn.linear_model, sklearn.svm, sklearn.neighbors, sklearn.ensemble 4 | import matplotlib.pyplot as plt 5 | import scipy.sparse 6 | import numpy as np 7 | import time, re, sys 8 | 9 | 10 | # Helpers to process text documents. 11 | 12 | 13 | class TextDataset(object): 14 | def clean_text(self, num='substitute'): 15 | # TODO: stemming, lemmatisation 16 | for i,doc in enumerate(self.documents): 17 | # Digits. 18 | if num is 'spell': 19 | doc = doc.replace('0', ' zero ') 20 | doc = doc.replace('1', ' one ') 21 | doc = doc.replace('2', ' two ') 22 | doc = doc.replace('3', ' three ') 23 | doc = doc.replace('4', ' four ') 24 | doc = doc.replace('5', ' five ') 25 | doc = doc.replace('6', ' six ') 26 | doc = doc.replace('7', ' seven ') 27 | doc = doc.replace('8', ' eight ') 28 | doc = doc.replace('9', ' nine ') 29 | elif num is 'substitute': 30 | # All numbers are equal. Useful for embedding (countable words) ? 31 | doc = re.sub('(\\d+)', ' NUM ', doc) 32 | elif num is 'remove': 33 | # Numbers are uninformative (they are all over the place). Useful for bag-of-words ? 34 | # But maybe some kind of documents contain more numbers, e.g. finance. 35 | # Some documents are indeed full of numbers. At least in 20NEWS. 36 | doc = re.sub('[0-9]', ' ', doc) 37 | # Remove everything except a-z characters and single space. 38 | doc = doc.replace('$', ' dollar ') 39 | doc = doc.lower() 40 | doc = re.sub('[^a-z]', ' ', doc) 41 | doc = ' '.join(doc.split()) # same as doc = re.sub('\s{2,}', ' ', doc) 42 | self.documents[i] = doc 43 | 44 | def vectorize(self, **params): 45 | # TODO: count or tf-idf. Or in normalize ? 46 | vectorizer = sklearn.feature_extraction.text.CountVectorizer(**params) 47 | self.data = vectorizer.fit_transform(self.documents) 48 | self.vocab = vectorizer.get_feature_names() 49 | assert len(self.vocab) == self.data.shape[1] 50 | 51 | def data_info(self, show_classes=False): 52 | N, M = self.data.shape 53 | sparsity = self.data.nnz / N / M * 100 54 | print('N = {} documents, M = {} words, sparsity={:.4f}%'.format(N, M, sparsity)) 55 | if show_classes: 56 | for i in range(len(self.class_names)): 57 | num = sum(self.labels == i) 58 | print(' {:5d} documents in class {:2d} ({})'.format(num, i, self.class_names[i])) 59 | 60 | def show_document(self, i): 61 | label = self.labels[i] 62 | name = self.class_names[label] 63 | try: 64 | text = self.documents[i] 65 | wc = len(text.split()) 66 | except AttributeError: 67 | text = None 68 | wc = 'N/A' 69 | print('document {}: label {} --> {}, {} words'.format(i, label, name, wc)) 70 | try: 71 | vector = self.data[i,:] 72 | for j in range(vector.shape[1]): 73 | if vector[0,j] != 0: 74 | print(' {:.2f} "{}" ({})'.format(vector[0,j], self.vocab[j], j)) 75 | except AttributeError: 76 | pass 77 | return text 78 | 79 | def keep_documents(self, idx): 80 | """Keep the documents given by the index, discard the others.""" 81 | self.documents = [self.documents[i] for i in idx] 82 | self.labels = self.labels[idx] 83 | self.data = self.data[idx,:] 84 | 85 | def keep_words(self, idx): 86 | """Keep the documents given by the index, discard the others.""" 87 | self.data = self.data[:,idx] 88 | self.vocab = [self.vocab[i] for i in idx] 89 | try: 90 | self.embeddings = self.embeddings[idx,:] 91 | except AttributeError: 92 | pass 93 | 94 | def remove_short_documents(self, nwords, vocab='selected'): 95 | """Remove a document if it contains less than nwords.""" 96 | if vocab is 'selected': 97 | # Word count with selected vocabulary. 98 | wc = self.data.sum(axis=1) 99 | wc = np.squeeze(np.asarray(wc)) 100 | elif vocab is 'full': 101 | # Word count with full vocabulary. 102 | wc = np.empty(len(self.documents), dtype=np.int) 103 | for i,doc in enumerate(self.documents): 104 | wc[i] = len(doc.split()) 105 | idx = np.argwhere(wc >= nwords).squeeze() 106 | self.keep_documents(idx) 107 | return wc 108 | 109 | def keep_top_words(self, M, Mprint=20): 110 | """Keep in the vocaluary the M words who appear most often.""" 111 | freq = self.data.sum(axis=0) 112 | freq = np.squeeze(np.asarray(freq)) 113 | idx = np.argsort(freq)[::-1] 114 | idx = idx[:M] 115 | self.keep_words(idx) 116 | print('most frequent words') 117 | for i in range(Mprint): 118 | print(' {:3d}: {:10s} {:6d} counts'.format(i, self.vocab[i], freq[idx][i])) 119 | return freq[idx] 120 | 121 | def normalize(self, norm='l1'): 122 | """Normalize data to unit length.""" 123 | # TODO: TF-IDF. 124 | data = self.data.astype(np.float64) 125 | self.data = sklearn.preprocessing.normalize(data, axis=1, norm=norm) 126 | 127 | def embed(self, filename=None, size=100): 128 | """Embed the vocabulary using pre-trained vectors.""" 129 | if filename: 130 | model = gensim.models.Word2Vec.load_word2vec_format(filename, binary=True) 131 | size = model.vector_size 132 | else: 133 | class Sentences(object): 134 | def __init__(self, documents): 135 | self.documents = documents 136 | def __iter__(self): 137 | for document in self.documents: 138 | yield document.split() 139 | model = gensim.models.Word2Vec(Sentences(self.documents), size) 140 | self.embeddings = np.empty((len(self.vocab), size)) 141 | keep = [] 142 | not_found = 0 143 | for i,word in enumerate(self.vocab): 144 | try: 145 | self.embeddings[i,:] = model[word] 146 | keep.append(i) 147 | except KeyError: 148 | not_found += 1 149 | print('{} words not found in corpus'.format(not_found, i)) 150 | self.keep_words(keep) 151 | 152 | class Text20News(TextDataset): 153 | def __init__(self, **params): 154 | dataset = sklearn.datasets.fetch_20newsgroups(**params) 155 | self.documents = dataset.data 156 | self.labels = dataset.target 157 | self.class_names = dataset.target_names 158 | assert max(self.labels) + 1 == len(self.class_names) 159 | N, C = len(self.documents), len(self.class_names) 160 | print('N = {} documents, C = {} classes'.format(N, C)) 161 | 162 | class TextRCV1(TextDataset): 163 | def __init__(self, **params): 164 | dataset = sklearn.datasets.fetch_rcv1(**params) 165 | self.data = dataset.data 166 | self.target = dataset.target 167 | self.class_names = dataset.target_names 168 | assert len(self.class_names) == 103 # 103 categories according to LYRL2004 169 | N, C = self.target.shape 170 | assert C == len(self.class_names) 171 | print('N = {} documents, C = {} classes'.format(N, C)) 172 | 173 | def remove_classes(self, keep): 174 | ## Construct a lookup table for labels. 175 | labels_row = [] 176 | labels_col = [] 177 | class_lookup = {} 178 | for i,name in enumerate(self.class_names): 179 | class_lookup[name] = i 180 | self.class_names = keep 181 | 182 | # Index of classes to keep. 183 | idx_keep = np.empty(len(keep)) 184 | for i,cat in enumerate(keep): 185 | idx_keep[i] = class_lookup[cat] 186 | self.target = self.target[:,idx_keep] 187 | assert self.target.shape[1] == len(keep) 188 | 189 | def show_doc_per_class(self, print_=False): 190 | """Number of documents per class.""" 191 | docs_per_class = np.array(self.target.astype(np.uint64).sum(axis=0)).squeeze() 192 | print('categories ({} assignments in total)'.format(docs_per_class.sum())) 193 | if print_: 194 | for i,cat in enumerate(self.class_names): 195 | print(' {:5s}: {:6d} documents'.format(cat, docs_per_class[i])) 196 | plt.figure(figsize=(17,5)) 197 | plt.plot(sorted(docs_per_class[::-1]),'.') 198 | 199 | def show_classes_per_doc(self): 200 | """Number of classes per document.""" 201 | classes_per_doc = np.array(self.target.sum(axis=1)).squeeze() 202 | plt.figure(figsize=(17,5)) 203 | plt.plot(sorted(classes_per_doc[::-1]),'.') 204 | 205 | def select_documents(self): 206 | classes_per_doc = np.array(self.target.sum(axis=1)).squeeze() 207 | self.target = self.target[classes_per_doc==1] 208 | self.data = self.data[classes_per_doc==1, :] 209 | 210 | # Convert labels from indicator form to single value. 211 | N, C = self.target.shape 212 | target = self.target.tocoo() 213 | self.labels = target.col 214 | assert self.labels.min() == 0 215 | assert self.labels.max() == C - 1 216 | 217 | # Bruna and Dropout used 2 * 201369 = 402738 documents. Probably the difference btw v1 and v2. 218 | #return classes_per_doc 219 | 220 | ### Helpers to quantify classifier's quality. 221 | 222 | 223 | def baseline(train_data, train_labels, test_data, test_labels, omit=[]): 224 | """Train various classifiers to get a baseline.""" 225 | clf, train_accuracy, test_accuracy, train_f1, test_f1, exec_time = [], [], [], [], [], [] 226 | clf.append(sklearn.neighbors.KNeighborsClassifier(n_neighbors=10)) 227 | clf.append(sklearn.linear_model.LogisticRegression()) 228 | clf.append(sklearn.naive_bayes.BernoulliNB(alpha=.01)) 229 | clf.append(sklearn.ensemble.RandomForestClassifier()) 230 | clf.append(sklearn.naive_bayes.MultinomialNB(alpha=.01)) 231 | clf.append(sklearn.linear_model.RidgeClassifier()) 232 | clf.append(sklearn.svm.LinearSVC()) 233 | for i,c in enumerate(clf): 234 | if i not in omit: 235 | t_start = time.process_time() 236 | c.fit(train_data, train_labels) 237 | train_pred = c.predict(train_data) 238 | test_pred = c.predict(test_data) 239 | train_accuracy.append('{:5.2f}'.format(100*sklearn.metrics.accuracy_score(train_labels, train_pred))) 240 | test_accuracy.append('{:5.2f}'.format(100*sklearn.metrics.accuracy_score(test_labels, test_pred))) 241 | train_f1.append('{:5.2f}'.format(100*sklearn.metrics.f1_score(train_labels, train_pred, average='weighted'))) 242 | test_f1.append('{:5.2f}'.format(100*sklearn.metrics.f1_score(test_labels, test_pred, average='weighted'))) 243 | exec_time.append('{:5.2f}'.format(time.process_time() - t_start)) 244 | print('Train accuracy: {}'.format(' '.join(train_accuracy))) 245 | print('Test accuracy: {}'.format(' '.join(test_accuracy))) 246 | print('Train F1 (weighted): {}'.format(' '.join(train_f1))) 247 | print('Test F1 (weighted): {}'.format(' '.join(test_f1))) 248 | print('Execution time: {}'.format(' '.join(exec_time))) 249 | 250 | def grid_search(params, grid_params, train_data, train_labels, val_data, 251 | val_labels, test_data, test_labels, model): 252 | """Explore the hyper-parameter space with an exhaustive grid search.""" 253 | params = params.copy() 254 | train_accuracy, test_accuracy, train_f1, test_f1 = [], [], [], [] 255 | grid = sklearn.model_selection.ParameterGrid(grid_params) 256 | print('grid search: {} combinations to evaluate'.format(len(grid))) 257 | for grid_params in grid: 258 | params.update(grid_params) 259 | name = '{}'.format(grid) 260 | print('\n\n {} \n\n'.format(grid_params)) 261 | m = model(params) 262 | m.fit(train_data, train_labels, val_data, val_labels) 263 | string, accuracy, f1, loss = m.evaluate(train_data, train_labels) 264 | train_accuracy.append('{:5.2f}'.format(accuracy)); train_f1.append('{:5.2f}'.format(f1)) 265 | print('train {}'.format(string)) 266 | string, accuracy, f1, loss = m.evaluate(test_data, test_labels) 267 | test_accuracy.append('{:5.2f}'.format(accuracy)); test_f1.append('{:5.2f}'.format(f1)) 268 | print('test {}'.format(string)) 269 | print('\n\n') 270 | print('Train accuracy: {}'.format(' '.join(train_accuracy))) 271 | print('Test accuracy: {}'.format(' '.join(test_accuracy))) 272 | print('Train F1 (weighted): {}'.format(' '.join(train_f1))) 273 | print('Test F1 (weighted): {}'.format(' '.join(test_f1))) 274 | for i,grid_params in enumerate(grid): 275 | print('{} --> {} {} {} {}'.format(grid_params, train_accuracy[i], test_accuracy[i], train_f1[i], test_f1[i])) 276 | 277 | 278 | class model_perf(object): 279 | 280 | def __init__(s): 281 | s.names, s.params = set(), {} 282 | s.fit_accuracies, s.fit_losses, s.fit_time = {}, {}, {} 283 | s.train_accuracy, s.train_f1, s.train_loss = {}, {}, {} 284 | s.test_accuracy, s.test_f1, s.test_loss = {}, {}, {} 285 | 286 | def test(s, model, name, params, train_data, train_labels, val_data, val_labels, test_data, test_labels): 287 | s.params[name] = params 288 | sess = s.fit_accuracies[name], s.fit_losses[name], s.fit_time[name] = \ 289 | model.fit(train_data, train_labels, val_data, val_labels) 290 | string, s.train_accuracy[name], s.train_f1[name], s.train_loss[name] = \ 291 | model.evaluate(train_data, train_labels, sess=sess) 292 | print('train {}'.format(string)) 293 | string, s.test_accuracy[name], s.test_f1[name], s.test_loss[name] = \ 294 | model.evaluate(test_data, test_labels, sess=sess) 295 | print('test {}'.format(string)) 296 | sys.stdout.flush() 297 | s.names.add(name) 298 | return s 299 | 300 | def show(s, fontsize=None): 301 | if fontsize: 302 | plt.rc('pdf', fonttype=42) 303 | plt.rc('ps', fonttype=42) 304 | plt.rc('font', size=fontsize) # controls default text sizes 305 | plt.rc('axes', titlesize=fontsize) # fontsize of the axes title 306 | plt.rc('axes', labelsize=fontsize) # fontsize of the x any y labels 307 | plt.rc('xtick', labelsize=fontsize) # fontsize of the tick labels 308 | plt.rc('ytick', labelsize=fontsize) # fontsize of the tick labels 309 | plt.rc('legend', fontsize=fontsize) # legend fontsize 310 | plt.rc('figure', titlesize=fontsize) # size of the figure title 311 | print(' accuracy F1 loss time [ms] name') 312 | print('test train test train test train') 313 | for name in sorted(s.names): 314 | print('{:5.2f} {:5.2f} {:5.2f} {:5.2f} {:.2e} {:.2e} {:3.0f} {}'.format( 315 | s.test_accuracy[name], s.train_accuracy[name], 316 | s.test_f1[name], s.train_f1[name], 317 | s.test_loss[name], s.train_loss[name], s.fit_time[name]*1000, name)) 318 | 319 | fig, ax = plt.subplots(1, 2, figsize=(15, 5)) 320 | for name in sorted(s.names): 321 | steps = np.arange(len(s.fit_accuracies[name])) + 1 322 | steps *= s.params[name]['eval_frequency'] 323 | ax[0].plot(steps, s.fit_accuracies[name], '.-', label=name) 324 | ax[1].plot(steps, s.fit_losses[name], '.-', label=name) 325 | ax[0].set_xlim(min(steps), max(steps)) 326 | ax[1].set_xlim(min(steps), max(steps)) 327 | ax[0].set_xlabel('step') 328 | ax[1].set_xlabel('step') 329 | ax[0].set_ylabel('validation accuracy') 330 | ax[1].set_ylabel('training loss') 331 | ax[0].legend(loc='lower right') 332 | ax[1].legend(loc='upper right') 333 | #fig.savefig('training.pdf') 334 | -------------------------------------------------------------------------------- /HCP_fmripredict/cnn_graph_codes/makefile: -------------------------------------------------------------------------------- 1 | NB = $(sort $(wildcard *.ipynb)) 2 | DIRS = nips2016 trials 3 | 4 | CLEANDIRS = $(DIRS:%=clean-%) 5 | 6 | run: $(NB) $(DIRS) 7 | 8 | $(NB): 9 | jupyter nbconvert --inplace --execute --ExecutePreprocessor.timeout=-1 $@ 10 | 11 | $(DIRS): 12 | $(MAKE) -C $@ 13 | 14 | clean: $(CLEANDIRS) 15 | jupyter nbconvert --inplace --ClearOutputPreprocessor.enabled=True $(NB) 16 | #rm -rf **/*.pyc 17 | 18 | $(CLEANDIRS): 19 | $(MAKE) clean -C $(@:clean-%=%) 20 | 21 | install: 22 | pip install --upgrade pip 23 | pip install -r requirements.txt 24 | 25 | readme: 26 | grip README.md 27 | 28 | .PHONY: run $(NB) $(DIRS) clean $(CLEANDIRS) install readme 29 | -------------------------------------------------------------------------------- /HCP_fmripredict/cnn_graph_codes/rcv1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%load_ext autoreload\n", 12 | "%autoreload 2\n", 13 | "\n", 14 | "from lib import models, graph, coarsening, utils\n", 15 | "\n", 16 | "import tensorflow as tf\n", 17 | "import matplotlib.pyplot as plt\n", 18 | "import scipy.sparse\n", 19 | "import numpy as np\n", 20 | "import time, shutil\n", 21 | "\n", 22 | "%matplotlib inline" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "collapsed": false 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "flags = tf.app.flags\n", 34 | "FLAGS = flags.FLAGS\n", 35 | "\n", 36 | "# Graphs.\n", 37 | "flags.DEFINE_integer('number_edges', 16, 'Graph: minimum number of edges per vertex.')\n", 38 | "flags.DEFINE_string('metric', 'cosine', 'Graph: similarity measure (between features).')\n", 39 | "# TODO: change cgcnn for combinatorial Laplacians.\n", 40 | "flags.DEFINE_bool('normalized_laplacian', True, 'Graph Laplacian: normalized.')\n", 41 | "flags.DEFINE_integer('coarsening_levels', 0, 'Number of coarsened graphs.')\n", 42 | "\n", 43 | "flags.DEFINE_string('dir_data', os.path.join('data', 'rcv1'), 'Directory to store data.')\n", 44 | "flags.DEFINE_integer('val_size', 400, 'Size of the validation set.')" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "# Data\n", 52 | "\n", 53 | "**From Dropout (Bruna did the same).**\n", 54 | "We took the dataset and split it into 63 classes based on the the 63 categories at the second-level of the category tree. We removed 11 categories that did not have any data and one category that had only 4 training examples. We also removed one category that covered a huge chunk (25%) of the examples. This left us with 50 classes and 402,738 documents. We divided the documents into equal-sized training and test sets randomly. Each document was represented\n", 55 | "using the 2000 most frequent non-stopwords in the dataset." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "# Fetch dataset from Scikit-learn.\n", 67 | "dataset = utils.TextRCV1(data_home=FLAGS.dir_data)\n", 68 | "\n", 69 | "# Pre-processing: transform everything to a-z and whitespace.\n", 70 | "#print(train.show_document(1)[:400])\n", 71 | "#train.clean_text(num='substitute')\n", 72 | "\n", 73 | "# Analyzing / tokenizing: transform documents to bags-of-words.\n", 74 | "#stop_words = set(sklearn.feature_extraction.text.ENGLISH_STOP_WORDS)\n", 75 | "# Or stop words from NLTK.\n", 76 | "# Add e.g. don, ve.\n", 77 | "#train.vectorize(stop_words='english')\n", 78 | "#print(train.show_document(1)[:400])" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "collapsed": false 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "# Selection of classes.\n", 90 | "keep = ['C11','C12','C13','C14','C15','C16','C17','C18','C21','C22','C23','C24',\n", 91 | " 'C31','C32','C33','C34','C41','C42','E11','E12','E13','E14','E21','E31',\n", 92 | " 'E41','E51','E61','E71','G15','GCRIM','GDEF','GDIP','GDIS','GENT','GENV',\n", 93 | " 'GFAS','GHEA','GJOB','GMIL','GOBIT','GODD','GPOL','GPRO','GREL','GSCI',\n", 94 | " 'GSPO','GTOUR','GVIO','GVOTE','GWEA','GWELF','M11','M12','M13','M14']\n", 95 | "assert len(keep) == 55 # There is 55 second-level categories according to LYRL2004.\n", 96 | "keep.remove('C15') # 151785 documents\n", 97 | "keep.remove('GMIL') # 5 documents only\n", 98 | "\n", 99 | "dataset.show_doc_per_class()\n", 100 | "dataset.show_classes_per_doc()\n", 101 | "dataset.remove_classes(keep)\n", 102 | "dataset.show_doc_per_class(True)\n", 103 | "dataset.show_classes_per_doc()" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "# Remove documents with multiple classes.\n", 115 | "dataset.select_documents()\n", 116 | "dataset.data_info()" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": { 123 | "collapsed": false 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "# Remove short documents.\n", 128 | "#train.data_info(True)\n", 129 | "#wc = train.remove_short_documents(nwords=20, vocab='full')\n", 130 | "#train.data_info()\n", 131 | "#print('shortest: {}, longest: {} words'.format(wc.min(), wc.max()))\n", 132 | "#plt.figure(figsize=(17,5))\n", 133 | "#plt.semilogy(wc, '.');" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "collapsed": false 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "# Feature selection.\n", 145 | "# Other options include: mutual information or document count.\n", 146 | "#freq = train.keep_top_words(1000, 20)\n", 147 | "#train.data_info()\n", 148 | "#train.show_document(1)\n", 149 | "#plt.figure(figsize=(17,5))\n", 150 | "#plt.semilogy(freq);\n", 151 | "\n", 152 | "# Remove documents whose signal would be the zero vector.\n", 153 | "#wc = train.remove_short_documents(nwords=5, vocab='selected')\n", 154 | "#train.data_info(True)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": { 161 | "collapsed": false 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "#dataset.normalize(norm='l1')\n", 166 | "dataset.show_document(1);" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": { 173 | "collapsed": false 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "# Word embedding\n", 178 | "#if True:\n", 179 | "# train.embed()\n", 180 | "#else:\n", 181 | "# train.embed('data_word2vec/GoogleNews-vectors-negative300.bin')\n", 182 | "#train.data_info()\n", 183 | "# Further feature selection. (TODO)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": { 190 | "collapsed": false 191 | }, 192 | "outputs": [], 193 | "source": [ 194 | "perm = np.random.RandomState(seed=42).permutation(dataset.data.shape[0])\n", 195 | "Ntest = dataset.data.shape[0] // 2\n", 196 | "perm_test = perm[:Ntest]\n", 197 | "perm_train = perm[Ntest:]\n", 198 | "train_data = dataset.data[perm_train,:].astype(np.float32)\n", 199 | "test_data = dataset.data[perm_test,:].astype(np.float32)\n", 200 | "train_labels = dataset.labels[perm_train]\n", 201 | "test_labels = dataset.labels[perm_test]\n", 202 | "\n", 203 | "if False:\n", 204 | " graph_data = train.embeddings.astype(np.float32)\n", 205 | "else:\n", 206 | " graph_data = dataset.data.T.astype(np.float32)\n", 207 | "\n", 208 | "#del dataset" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "# Feature graph" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": { 222 | "collapsed": false 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "t_start = time.process_time()\n", 227 | "dist, idx = graph.distance_lshforest(graph_data.astype(np.float64), k=FLAGS.number_edges, metric=FLAGS.metric)\n", 228 | "A = graph.adjacency(dist.astype(np.float32), idx)\n", 229 | "print(\"{} > {} edges\".format(A.nnz//2, FLAGS.number_edges*graph_data.shape[0]//2))\n", 230 | "A = graph.replace_random_edges(A, 0)\n", 231 | "graphs, perm = coarsening.coarsen(A, levels=FLAGS.coarsening_levels, self_connections=False)\n", 232 | "L = [graph.laplacian(A, normalized=True) for A in graphs]\n", 233 | "print('Execution time: {:.2f}s'.format(time.process_time() - t_start))\n", 234 | "#graph.plot_spectrum(L)\n", 235 | "#del graph_data, A, dist, idx" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": { 242 | "collapsed": false 243 | }, 244 | "outputs": [], 245 | "source": [ 246 | "assert FLAGS.coarsening_levels is 0\n", 247 | "#t_start = time.process_time()\n", 248 | "#train_data = scipy.sparse.csr_matrix(coarsening.perm_data(train_data.toarray(), perm))\n", 249 | "#test_data = scipy.sparse.csr_matrix(coarsening.perm_data(test_data.toarray(), perm))\n", 250 | "#print('Execution time: {:.2f}s'.format(time.process_time() - t_start))\n", 251 | "#del perm" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "# Classification" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": { 265 | "collapsed": false 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "# Training set is shuffled already.\n", 270 | "#perm = np.random.permutation(train_data.shape[0])\n", 271 | "#train_data = train_data[perm,:]\n", 272 | "#train_labels = train_labels[perm]\n", 273 | "\n", 274 | "# Validation set.\n", 275 | "if False:\n", 276 | " val_data = train_data[:FLAGS.val_size,:]\n", 277 | " val_labels = train_labels[:FLAGS.val_size]\n", 278 | " train_data = train_data[FLAGS.val_size:,:]\n", 279 | " train_labels = train_labels[FLAGS.val_size:]\n", 280 | "else:\n", 281 | " val_data = test_data\n", 282 | " val_labels = test_labels" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": { 289 | "collapsed": false 290 | }, 291 | "outputs": [], 292 | "source": [ 293 | "if False:\n", 294 | " utils.baseline(train_data, train_labels, test_data, test_labels)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": null, 300 | "metadata": { 301 | "collapsed": false 302 | }, 303 | "outputs": [], 304 | "source": [ 305 | "common = {}\n", 306 | "common['dir_name'] = 'rcv1/'\n", 307 | "common['num_epochs'] = 4\n", 308 | "common['batch_size'] = 100\n", 309 | "common['decay_steps'] = len(train_labels) / common['batch_size']\n", 310 | "common['eval_frequency'] = 200\n", 311 | "common['filter'] = 'chebyshev5'\n", 312 | "common['brelu'] = 'b1relu'\n", 313 | "common['pool'] = 'mpool1'\n", 314 | "C = max(train_labels) + 1 # number of classes\n", 315 | "\n", 316 | "model_perf = utils.model_perf()" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "metadata": { 323 | "collapsed": false 324 | }, 325 | "outputs": [], 326 | "source": [ 327 | "if True:\n", 328 | " name = 'softmax'\n", 329 | " params = common.copy()\n", 330 | " params['dir_name'] += name\n", 331 | " params['regularization'] = 0\n", 332 | " params['dropout'] = 1\n", 333 | " params['learning_rate'] = 1e3\n", 334 | " params['decay_rate'] = 0.95\n", 335 | " params['momentum'] = 0.9\n", 336 | " params['F'] = []\n", 337 | " params['K'] = []\n", 338 | " params['p'] = []\n", 339 | " params['M'] = [C]\n", 340 | " model_perf.test(models.cgcnn(L, **params), name, params,\n", 341 | " train_data, train_labels, val_data, val_labels, test_data, test_labels)" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": { 348 | "collapsed": false 349 | }, 350 | "outputs": [], 351 | "source": [ 352 | "if True:\n", 353 | " name = 'fc_softmax'\n", 354 | " params = common.copy()\n", 355 | " params['dir_name'] += name\n", 356 | " params['regularization'] = 0\n", 357 | " params['dropout'] = 1\n", 358 | " params['learning_rate'] = 0.1\n", 359 | " params['decay_rate'] = 0.95\n", 360 | " params['momentum'] = 0.9\n", 361 | " params['F'] = []\n", 362 | " params['K'] = []\n", 363 | " params['p'] = []\n", 364 | " params['M'] = [2500, C]\n", 365 | " model_perf.test(models.cgcnn(L, **params), name, params,\n", 366 | " train_data, train_labels, val_data, val_labels, test_data, test_labels)" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": null, 372 | "metadata": { 373 | "collapsed": false 374 | }, 375 | "outputs": [], 376 | "source": [ 377 | "if True:\n", 378 | " name = 'fc_fc_softmax'\n", 379 | " params = common.copy()\n", 380 | " params['dir_name'] += name\n", 381 | " params['regularization'] = 0\n", 382 | " params['dropout'] = 1\n", 383 | " params['learning_rate'] = 0.1\n", 384 | " params['decay_rate'] = 0.95\n", 385 | " params['momentum'] = 0.9\n", 386 | " params['F'] = []\n", 387 | " params['K'] = []\n", 388 | " params['p'] = []\n", 389 | " params['M'] = [2500, 500, C]\n", 390 | " model_perf.test(models.cgcnn(L, **params), name, params,\n", 391 | " train_data, train_labels, val_data, val_labels, test_data, test_labels)" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": null, 397 | "metadata": { 398 | "collapsed": false 399 | }, 400 | "outputs": [], 401 | "source": [ 402 | "if True:\n", 403 | " name = 'cgconv_softmax'\n", 404 | " params = common.copy()\n", 405 | " params['dir_name'] += name\n", 406 | " params['regularization'] = 1e-3\n", 407 | " params['dropout'] = 1\n", 408 | " params['learning_rate'] = 0.1\n", 409 | " params['decay_rate'] = 0.999\n", 410 | " params['momentum'] = 0\n", 411 | " params['F'] = [1]\n", 412 | " params['K'] = [5]\n", 413 | " params['p'] = [1]\n", 414 | " params['M'] = [C]\n", 415 | " model_perf.test(models.cgcnn(L, **params), name, params,\n", 416 | " train_data, train_labels, val_data, val_labels, test_data, test_labels)" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": null, 422 | "metadata": { 423 | "collapsed": false 424 | }, 425 | "outputs": [], 426 | "source": [ 427 | "if True:\n", 428 | " name = 'cgconv_fc_softmax'\n", 429 | " params = common.copy()\n", 430 | " params['dir_name'] += name\n", 431 | " params['regularization'] = 0\n", 432 | " params['dropout'] = 1\n", 433 | " params['learning_rate'] = 0.1\n", 434 | " params['decay_rate'] = 0.999\n", 435 | " params['momentum'] = 0\n", 436 | " params['F'] = [5]\n", 437 | " params['K'] = [15]\n", 438 | " params['p'] = [1]\n", 439 | " params['M'] = [100, C]\n", 440 | " model_perf.test(models.cgcnn(L, **params), name, params,\n", 441 | " train_data, train_labels, val_data, val_labels, test_data, test_labels)" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": null, 447 | "metadata": { 448 | "collapsed": true 449 | }, 450 | "outputs": [], 451 | "source": [ 452 | "model_perf.show()" 453 | ] 454 | } 455 | ], 456 | "metadata": { 457 | "kernelspec": { 458 | "display_name": "Python 3", 459 | "language": "python", 460 | "name": "python3" 461 | }, 462 | "language_info": { 463 | "codemirror_mode": { 464 | "name": "ipython", 465 | "version": 3 466 | }, 467 | "file_extension": ".py", 468 | "mimetype": "text/x-python", 469 | "name": "python", 470 | "nbconvert_exporter": "python", 471 | "pygments_lexer": "ipython3", 472 | "version": "3.4.3" 473 | } 474 | }, 475 | "nbformat": 4, 476 | "nbformat_minor": 0 477 | } 478 | -------------------------------------------------------------------------------- /HCP_fmripredict/cnn_graph_codes/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | scikit-learn 4 | matplotlib 5 | 6 | gensim 7 | tensorflow-gpu 8 | #tensorflow 9 | 10 | jupyter 11 | ipython 12 | -------------------------------------------------------------------------------- /HCP_fmripredict/cnn_graph_codes/trials/3_tensorflow.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Trial 3: TensorFlow\n", 8 | "\n", 9 | "Small experiment to familiarize myself with TensorFlow." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "collapsed": false 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import tensorflow as tf" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "# Data" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": { 34 | "collapsed": false 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "from tensorflow.examples.tutorials.mnist import input_data\n", 39 | "import os\n", 40 | "folder = os.path.join('..', 'data', 'mnist')\n", 41 | "mnist = input_data.read_data_sets(folder, one_hot=True)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "# Model" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": { 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "x = tf.placeholder(tf.float32, [None, 784])\n", 60 | "W = tf.Variable(tf.zeros([784, 10]))\n", 61 | "b = tf.Variable(tf.zeros([10]))\n", 62 | "y = tf.nn.softmax(tf.matmul(x, W) + b)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "# Training" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": { 76 | "collapsed": true 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "y_ = tf.placeholder(tf.float32, [None, 10])\n", 81 | "cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))\n", 82 | "train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)\n", 83 | "\n", 84 | "init = tf.initialize_all_variables()\n", 85 | "sess = tf.Session()\n", 86 | "sess.run(init)\n", 87 | "\n", 88 | "for i in range(1000):\n", 89 | " batch_xs, batch_ys = mnist.train.next_batch(100)\n", 90 | " sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "# Evaluation" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))\n", 109 | "accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n", 110 | "print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))" 111 | ] 112 | } 113 | ], 114 | "metadata": { 115 | "kernelspec": { 116 | "display_name": "Python 3", 117 | "language": "python", 118 | "name": "python3" 119 | }, 120 | "language_info": { 121 | "codemirror_mode": { 122 | "name": "ipython", 123 | "version": 3 124 | }, 125 | "file_extension": ".py", 126 | "mimetype": "text/x-python", 127 | "name": "python", 128 | "nbconvert_exporter": "python", 129 | "pygments_lexer": "ipython3", 130 | "version": "3.5.2" 131 | } 132 | }, 133 | "nbformat": 4, 134 | "nbformat_minor": 0 135 | } 136 | -------------------------------------------------------------------------------- /HCP_fmripredict/cnn_graph_codes/trials/4_coarsening.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# Trial 4: graph coarsening\n", 10 | "\n", 11 | "* First Python implementation of the greedy Metis and Graclus coarsening algorithms.\n", 12 | "* Results comparison with a previously developed matlab implementation.\n", 13 | "* Results comparison with the newer version in the `coarsening` module." 14 | ] 15 | }, 16 | { 17 | "cell_type": "raw", 18 | "metadata": {}, 19 | "source": [ 20 | "METIS COARSENING IMPLEMENTATION AS PROPOSED IN:\n", 21 | "An incremental reseeding strategy for clustering\n", 22 | "X Bresson, H Hu, T Laurent, A Szlam, J von Brecht\n", 23 | "arXiv preprint arXiv:1406.3837\n", 24 | "3 May 2016" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "collapsed": true 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import os\n", 36 | "import scipy.io\n", 37 | "import scipy.sparse\n", 38 | "import numpy as np" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "if False:\n", 50 | " # CHECK PYTHON RESULTS WITH MATLAB CODE\n", 51 | " folder = os.path.join('..', 'data', 'metis_matlab.mat')\n", 52 | " mat = scipy.io.loadmat(folder)\n", 53 | " W = mat['W']\n", 54 | " W = scipy.sparse.csr_matrix(W)\n", 55 | " rid = mat['rid']-1\n", 56 | " rid = rid.T\n", 57 | " rid = rid.squeeze()\n", 58 | " #print(type(W))\n", 59 | " #print(type(rid))\n", 60 | " print(W.shape)\n", 61 | " print(W.nnz)\n", 62 | " #print(rid.shape)\n", 63 | "\n", 64 | "else:\n", 65 | " N = 533\n", 66 | " #np.random.seed(0)\n", 67 | " rid = np.random.permutation(range(N))\n", 68 | " W = np.random.uniform(0.01, 0.99, size=(N,N))\n", 69 | " mask = np.random.uniform(size=(N,N))\n", 70 | " W[mask<0.99] = 0\n", 71 | " W = scipy.sparse.csr_matrix(W)\n", 72 | " print(W.nnz)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "collapsed": true 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "# INPUT\n", 84 | "# W = symmetric sparse weight matrix\n", 85 | "# maxsize = the number of nodes for the coarsest graph\n", 86 | "# OUTPUT\n", 87 | "# graph{1}: original graph of size N_1\n", 88 | "# graph{2}: coarser graph of size N_2 < N_1\n", 89 | "# etc...\n", 90 | "# graph{k}: corsest graph of Size N_k <...< N_2 < N_1\n", 91 | "# parents{i} is a vector of size N_i with entries ranging from 1 to N_{i+1}\n", 92 | "# which indicate the parents in the coarser graph{i+1} \n", 93 | "# nd_sz{i} is a vector of size N_i that contains the size of the supernode in the graph{i}\n", 94 | "# NOTE\n", 95 | "# if \"graph\" is a cell of size k, then \"parents\" will be a cell of size k-1\n", 96 | "\n", 97 | "def metis_coarsening(W,maxsize,rid):\n", 98 | " \n", 99 | " N = W.shape[0]\n", 100 | " print('Size of original graph=',N)\n", 101 | " parents = []\n", 102 | " degree = W.sum(axis=0) - W.diagonal()\n", 103 | " graphs = []\n", 104 | " graphs.append(W)\n", 105 | " supernode_size = np.ones(N)\n", 106 | " nd_sz = [supernode_size]\n", 107 | " count = 0\n", 108 | " \n", 109 | " while N > maxsize:\n", 110 | " \n", 111 | " count = count + 1;\n", 112 | " print('level=',count)\n", 113 | " \n", 114 | " # CHOOSE THE WEIGHTS FOR THE PAIRING\n", 115 | " # weights = ones(N,1) # metis weights\n", 116 | " weights = degree # graclus weights\n", 117 | " # weights = supernode_size # other possibility\n", 118 | " weights = weights.T\n", 119 | " weights = np.array(weights)\n", 120 | " weights = weights.squeeze()\n", 121 | " \n", 122 | " # PAIR THE VERTICES AND CONSTRUCT THE ROOT VECTOR\n", 123 | " idx_row,idx_col,val = scipy.sparse.find(W) \n", 124 | " perm = np.argsort(idx_row)\n", 125 | " rr = idx_row[perm]\n", 126 | " cc = idx_col[perm]\n", 127 | " vv = val[perm]\n", 128 | " cluster_id = one_level_coarsening(rr,cc,vv,rid,weights) # rr is ordered \n", 129 | " parents.append(cluster_id)\n", 130 | " \n", 131 | " # TO DO\n", 132 | " # COMPUTE THE SIZE OF THE SUPERNODES AND THEIR DEGREE \n", 133 | " #supernode_size = full( sparse(cluster_id, ones(N,1) , supernode_size ) )\n", 134 | " #print(cluster_id)\n", 135 | " #print(supernode_size)\n", 136 | " #nd_sz{count+1}=supernode_size;\n", 137 | " \n", 138 | " # COMPUTE THE EDGES WEIGHTS FOR THE NEW GRAPH\n", 139 | " nrr = cluster_id[rr]\n", 140 | " ncc = cluster_id[cc]\n", 141 | " nvv = vv\n", 142 | " Nnew = int(cluster_id.max()) + 1\n", 143 | " print('Size of coarser graph=',Nnew)\n", 144 | " W = scipy.sparse.csr_matrix((nvv,(nrr,ncc)),shape=(Nnew,Nnew))\n", 145 | " # Add new graph to the list of all coarsened graphs\n", 146 | " graphs.append(W)\n", 147 | " N = W.shape[0]\n", 148 | " \n", 149 | " # COMPUTE THE DEGREE (OMIT OR NOT SELF LOOPS)\n", 150 | " degree = W.sum(axis=0)\n", 151 | " #degree = W.sum(axis=0) - W.diagonal()\n", 152 | " \n", 153 | " # CHOOSE THE ORDER IN WHICH VERTICES WILL BE VISTED AT THE NEXT PASS\n", 154 | " #[~, rid]=sort(ss); # arthur strategy\n", 155 | " #[~, rid]=sort(supernode_size); # thomas strategy\n", 156 | " #rid=randperm(N); # metis/graclus strategy \n", 157 | " ss = W.sum(axis=0).T\n", 158 | " rid = [i[0] for i in sorted(enumerate(ss), key=lambda x:x[1])] # [~, rid]=sort(ss);\n", 159 | " \n", 160 | " \n", 161 | " # Remove all diagonal entries in similarity matrices\n", 162 | " for i in range(len(graphs)): \n", 163 | " csr_setdiag_val(graphs[i])\n", 164 | " scipy.sparse.csr_matrix.eliminate_zeros(graphs[i])\n", 165 | " \n", 166 | " \n", 167 | " return graphs,parents" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": { 174 | "collapsed": true 175 | }, 176 | "outputs": [], 177 | "source": [ 178 | "#http://nbviewer.ipython.org/gist/Midnighter/9992103\n", 179 | "def csr_setdiag_val(csr, value=0):\n", 180 | " \"\"\"Set all diagonal nonzero elements\n", 181 | " (elements currently in the sparsity pattern)\n", 182 | " to the given value. Useful to set to 0 mostly.\n", 183 | " \"\"\"\n", 184 | " if csr.format != \"csr\":\n", 185 | " raise ValueError('Matrix given must be of CSR format.')\n", 186 | " csr.sort_indices()\n", 187 | " pointer = csr.indptr\n", 188 | " indices = csr.indices\n", 189 | " data = csr.data\n", 190 | " for i in range(min(csr.shape)):\n", 191 | " ind = indices[pointer[i]: pointer[i + 1]]\n", 192 | " j = ind.searchsorted(i)\n", 193 | " # matrix has only elements up until diagonal (in row i)\n", 194 | " if j == len(ind):\n", 195 | " continue\n", 196 | " j += pointer[i]\n", 197 | " # in case matrix has only elements after diagonal (in row i)\n", 198 | " if indices[j] == i:\n", 199 | " data[j] = value" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": { 206 | "collapsed": false 207 | }, 208 | "outputs": [], 209 | "source": [ 210 | "# Coarsen a graph given by rr,cc,vv. rr is assumed to be ordered\n", 211 | "def one_level_coarsening(rr,cc,vv,rid,weights):\n", 212 | " \n", 213 | " nnz = rr.shape[0]\n", 214 | " N = rr[nnz-1]+1\n", 215 | " #print(nnz,N)\n", 216 | " \n", 217 | " marked = np.zeros(N)\n", 218 | " rowstart = np.zeros(N)\n", 219 | " rowlength = np.zeros(N)\n", 220 | " cluster_id = np.zeros(N)\n", 221 | " \n", 222 | " oldval = rr[0]\n", 223 | " count = 0\n", 224 | " clustercount = 0\n", 225 | " \n", 226 | " for ii in range(nnz):\n", 227 | " rowlength[count] = rowlength[count] + 1\n", 228 | " if rr[ii] > oldval:\n", 229 | " oldval = rr[ii]\n", 230 | " rowstart[count+1] = ii\n", 231 | " count = count + 1\n", 232 | " \n", 233 | " for ii in range(N):\n", 234 | " tid = rid[ii]\n", 235 | " if marked[tid]==0.0:\n", 236 | " wmax = 0.0\n", 237 | " rs = rowstart[tid]\n", 238 | " marked[tid] = 1.0\n", 239 | " bestneighbor = -1\n", 240 | " for jj in range(int(rowlength[tid])):\n", 241 | " nid = cc[rs+jj]\n", 242 | " tval = (1.0-marked[nid]) * vv[rs+jj] * (1.0/weights[tid]+ 1.0/weights[nid])\n", 243 | " if tval > wmax:\n", 244 | " wmax = tval\n", 245 | " bestneighbor = nid\n", 246 | " \n", 247 | " cluster_id[tid] = clustercount;\n", 248 | " \n", 249 | " if bestneighbor > -1:\n", 250 | " cluster_id[bestneighbor] = clustercount\n", 251 | " marked[bestneighbor] = 1.0\n", 252 | " \n", 253 | " clustercount = clustercount + 1\n", 254 | " \n", 255 | " return cluster_id" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": { 262 | "collapsed": false 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "maxsize = 200\n", 267 | "N = W.shape[0]\n", 268 | "#rid = np.random.permutation(range(N))\n", 269 | "#print(N)\n", 270 | "#print(rid[0:10])\n", 271 | "\n", 272 | "graphs,parents = metis_coarsening(W.copy(),maxsize,rid)\n", 273 | "#print(graph)\n", 274 | "#print(parents)\n", 275 | "\n", 276 | "\n", 277 | "# CHECK RESULTS WITH MATLAB CODE\n", 278 | "graph0 = graphs[0]\n", 279 | "print(graph0.shape)\n", 280 | "print(graph0[0,:])\n", 281 | "\n", 282 | "graph1 = graphs[1]\n", 283 | "print(graph1.shape)\n", 284 | "print(graph1[0,:])\n", 285 | "\n", 286 | "graph2 = graphs[2]\n", 287 | "print(graph2.shape)\n", 288 | "print(graph2[0,:])\n", 289 | "\n", 290 | "parents0 = parents[0]\n", 291 | "print(parents0.shape)\n", 292 | "print(parents0[0:10])\n", 293 | "\n", 294 | "parents1 = parents[1]\n", 295 | "print(parents1.shape)\n", 296 | "print(parents1[0:10])" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": null, 302 | "metadata": { 303 | "collapsed": false 304 | }, 305 | "outputs": [], 306 | "source": [ 307 | "import sys\n", 308 | "sys.path.append('..')\n", 309 | "from lib import coarsening\n", 310 | "\n", 311 | "graphs, parents = coarsening.metis(W, 2, rid)\n", 312 | "\n", 313 | "for i,A in enumerate(graphs):\n", 314 | " M, M = A.shape\n", 315 | " A = A.tocoo()\n", 316 | " A.setdiag(0)\n", 317 | " A = A.tocsr()\n", 318 | " A.eliminate_zeros()\n", 319 | " graphs[i] = A\n", 320 | " print('Layer {0}: M_{0} = {1} nodes, {2} edges'.format(i, M, A.nnz))\n", 321 | "\n", 322 | "# CHECK RESULTS WITH MATLAB CODE\n", 323 | "graph0 = graphs[0]\n", 324 | "print(graph0.shape)\n", 325 | "print(graph0[0,:])\n", 326 | "\n", 327 | "graph1 = graphs[1].tocsr()\n", 328 | "print(graph1.shape)\n", 329 | "print(graph1[0,:])\n", 330 | "\n", 331 | "graph2 = graphs[2].tocsr()\n", 332 | "print(graph2.shape)\n", 333 | "print(graph2[0,:])\n", 334 | "\n", 335 | "parents0 = parents[0]\n", 336 | "print(parents0.shape)\n", 337 | "print(parents0[0:10])\n", 338 | "\n", 339 | "parents1 = parents[1]\n", 340 | "print(parents1.shape)\n", 341 | "print(parents1[0:10])" 342 | ] 343 | }, 344 | { 345 | "cell_type": "raw", 346 | "metadata": {}, 347 | "source": [ 348 | "# Python results\n", 349 | "\n", 350 | "Size of original graph= 533\n", 351 | "level= 1\n", 352 | "Size of coarser graph= 279\n", 353 | "level= 2\n", 354 | "Size of coarser graph= 147\n", 355 | "(533, 533)\n", 356 | " (0, 18)\t0.810464124165\n", 357 | " (0, 59)\t0.349678536711\n", 358 | " (0, 60)\t0.591336229831\n", 359 | " (0, 83)\t0.388420442335\n", 360 | " (0, 105)\t0.255134781894\n", 361 | " (0, 210)\t0.656852096558\n", 362 | " (0, 226)\t0.900257809833\n", 363 | " (0, 299)\t0.065093756932\n", 364 | " (0, 340)\t0.810464124165\n", 365 | " (0, 407)\t0.431454676752\n", 366 | "(279, 279)\n", 367 | " (0, 44)\t1.63660876872\n", 368 | " (0, 58)\t2.42459126058\n", 369 | " (0, 71)\t0.186153138092\n", 370 | " (0, 115)\t1.99313658383\n", 371 | " (0, 167)\t1.24818832639\n", 372 | " (0, 168)\t2.95891026039\n", 373 | " (0, 179)\t0.388420442335\n", 374 | " (0, 240)\t0.431454676752\n", 375 | "(147, 147)\n", 376 | " (0, 21)\t5.1886032791\n", 377 | " (0, 85)\t1.08484314421\n", 378 | " (0, 87)\t0.353738954483\n", 379 | " (0, 127)\t0.186153138092\n", 380 | " (0, 135)\t1.88273900708\n", 381 | " (0, 141)\t0.255134781894\n", 382 | "(533,)\n", 383 | "[ 57. 148. 184. 237. 93. 93. 47. 28. 133. 71.]\n", 384 | "(279,)\n", 385 | "[ 127. 4. 88. 128. 50. 120. 54. 123. 146. 26.]" 386 | ] 387 | }, 388 | { 389 | "cell_type": "raw", 390 | "metadata": { 391 | "collapsed": true 392 | }, 393 | "source": [ 394 | "# Matlab results\n", 395 | "\n", 396 | "ans =\n", 397 | "\n", 398 | " (1,19) 0.8105\n", 399 | " (1,60) 0.3497\n", 400 | " (1,61) 0.5913\n", 401 | " (1,84) 0.3884\n", 402 | " (1,106) 0.2551\n", 403 | " (1,211) 0.6569\n", 404 | " (1,227) 0.9003\n", 405 | " (1,300) 0.0651\n", 406 | " (1,341) 0.8105\n", 407 | " (1,408) 0.4315\n", 408 | "\n", 409 | "\n", 410 | "ans =\n", 411 | "\n", 412 | " (1,45) 1.6366\n", 413 | " (1,59) 2.4246\n", 414 | " (1,72) 0.1862\n", 415 | " (1,116) 1.9931\n", 416 | " (1,168) 1.2482\n", 417 | " (1,169) 2.9589\n", 418 | " (1,180) 0.3884\n", 419 | " (1,241) 0.4315\n", 420 | "\n", 421 | "\n", 422 | "ans =\n", 423 | "\n", 424 | " (1,22) 5.1886\n", 425 | " (1,86) 1.0848\n", 426 | " (1,88) 0.3537\n", 427 | " (1,128) 0.1862\n", 428 | " (1,136) 1.8827\n", 429 | " (1,142) 0.2551\n", 430 | "\n", 431 | "\n", 432 | "ans =\n", 433 | "\n", 434 | " 58\n", 435 | " 149\n", 436 | " 185\n", 437 | " 238\n", 438 | " 94\n", 439 | " 94\n", 440 | " 48\n", 441 | " 29\n", 442 | " 134\n", 443 | " 72\n", 444 | "\n", 445 | "\n", 446 | "ans =\n", 447 | "\n", 448 | " 128\n", 449 | " 5\n", 450 | " 89\n", 451 | " 129\n", 452 | " 51\n", 453 | " 121\n", 454 | " 55\n", 455 | " 124\n", 456 | " 147\n", 457 | " 27" 458 | ] 459 | } 460 | ], 461 | "metadata": { 462 | "kernelspec": { 463 | "display_name": "Python 3", 464 | "language": "python", 465 | "name": "python3" 466 | }, 467 | "language_info": { 468 | "codemirror_mode": { 469 | "name": "ipython", 470 | "version": 3 471 | }, 472 | "file_extension": ".py", 473 | "mimetype": "text/x-python", 474 | "name": "python", 475 | "nbconvert_exporter": "python", 476 | "pygments_lexer": "ipython3", 477 | "version": "3.5.2" 478 | } 479 | }, 480 | "nbformat": 4, 481 | "nbformat_minor": 0 482 | } 483 | -------------------------------------------------------------------------------- /HCP_fmripredict/cnn_graph_codes/trials/makefile: -------------------------------------------------------------------------------- 1 | NB = $(sort $(wildcard *.ipynb)) 2 | 3 | run: $(NB) 4 | 5 | $(NB): 6 | jupyter nbconvert --inplace --execute --ExecutePreprocessor.timeout=-1 $@ 7 | 8 | clean: 9 | jupyter nbconvert --inplace --ClearOutputPreprocessor.enabled=True $(NB) 10 | 11 | .PHONY: run $(NB) clean 12 | -------------------------------------------------------------------------------- /HCP_fmripredict/cnn_graph_codes/usage.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction\n", 8 | "\n", 9 | "$\\newcommand{\\G}{\\mathcal{G}}$\n", 10 | "$\\newcommand{\\V}{\\mathcal{V}}$\n", 11 | "$\\newcommand{\\E}{\\mathcal{E}}$\n", 12 | "$\\newcommand{\\R}{\\mathbb{R}}$\n", 13 | "\n", 14 | "This notebook shows how to apply our graph ConvNet ([paper] & [code]), or any other, to your structured or unstructured data. For this example, we assume that we have $n$ samples $x_i \\in \\R^{d_x}$ arranged in a data matrix $$X = [x_1, ..., x_n]^T \\in \\R^{n \\times d_x}.$$ Each sample $x_i$ is associated with a vector $y_i \\in \\R^{d_y}$ for a regression task or a label $y_i \\in \\{0,\\ldots,C\\}$ for a classification task.\n", 15 | "\n", 16 | "[paper]: https://arxiv.org/abs/1606.09375\n", 17 | "[code]: https://github.com/mdeff/cnn_graph\n", 18 | "\n", 19 | "From there, we'll structure our data with a graph $\\G = (\\V, \\E, A)$ where $\\V$ is the set of $d_x = |\\V|$ vertices, $\\E$ is the set of edges and $A \\in \\R^{d_x \\times d_x}$ is the adjacency matrix. That matrix represents the weight of each edge, i.e. $A_{i,j}$ is the weight of the edge connecting $v_i \\in \\V$ to $v_j \\in \\V$. The weights of that feature graph thus represent pairwise relationships between features $i$ and $j$. We call that regime **signal classification / regression**, as the samples $x_i$ to be classified or regressed are graph signals.\n", 20 | "\n", 21 | "Other modelling possibilities include:\n", 22 | "1. Using a data graph, i.e. an adjacency matrix $A \\in \\R^{n \\times n}$ which represents pairwise relationships between samples $x_i \\in \\R^{d_x}$. The problem is here to predict a graph signal $y \\in \\R^{n \\times d_y}$ given a graph characterized by $A$ and some graph signals $X \\in \\R^{n \\times d_x}$. We call that regime **node classification / regression**, as we classify or regress nodes instead of signals.\n", 23 | "2. Another problem of interest is whole graph classification, with or without signals on top. We'll call that third regime **graph classification / regression**. The problem here is to classify or regress a whole graph $A_i \\in \\R^{n \\times n}$ (with or without an associated data matrix $X_i \\in \\R^{n \\times d_x}$) into $y_i \\in \\R^{d_y}$. In case we have no signal, we can use a constant vector $X_i = 1_n$ of size $n$." 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": { 30 | "collapsed": false 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "from lib import models, graph, coarsening, utils\n", 35 | "import numpy as np\n", 36 | "import matplotlib.pyplot as plt\n", 37 | "%matplotlib inline" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "# 1 Data\n", 45 | "\n", 46 | "For the purpose of the demo, let's create a random data matrix $X \\in \\R^{n \\times d_x}$ and somehow infer a label $y_i = f(x_i)$." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "collapsed": false 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "d = 100 # Dimensionality.\n", 58 | "n = 10000 # Number of samples.\n", 59 | "c = 5 # Number of feature communities.\n", 60 | "\n", 61 | "# Data matrix, structured in communities (feature-wise).\n", 62 | "X = np.random.normal(0, 1, (n, d)).astype(np.float32)\n", 63 | "X += np.linspace(0, 1, c).repeat(d // c)\n", 64 | "\n", 65 | "# Noisy non-linear target.\n", 66 | "w = np.random.normal(0, .02, d)\n", 67 | "t = X.dot(w) + np.random.normal(0, .001, n)\n", 68 | "t = np.tanh(t)\n", 69 | "plt.figure(figsize=(15, 5))\n", 70 | "plt.plot(t, '.')\n", 71 | "\n", 72 | "# Classification.\n", 73 | "y = np.ones(t.shape, dtype=np.uint8)\n", 74 | "y[t > t.mean() + 0.4 * t.std()] = 0\n", 75 | "y[t < t.mean() - 0.4 * t.std()] = 2\n", 76 | "print('Class imbalance: ', np.unique(y, return_counts=True)[1])" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "Then split this dataset into training, validation and testing sets." 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": { 90 | "collapsed": false 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "n_train = n // 2\n", 95 | "n_val = n // 10\n", 96 | "\n", 97 | "X_train = X[:n_train]\n", 98 | "X_val = X[n_train:n_train+n_val]\n", 99 | "X_test = X[n_train+n_val:]\n", 100 | "\n", 101 | "y_train = y[:n_train]\n", 102 | "y_val = y[n_train:n_train+n_val]\n", 103 | "y_test = y[n_train+n_val:]" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "# 2 Graph\n", 111 | "\n", 112 | "The second thing we need is a **graph between features**, i.e. an adjacency matrix $A \\in \\mathbb{R}^{d_x \\times d_x}$.\n", 113 | "Structuring data with graphs is very flexible: it can accomodate both structured and unstructured data.\n", 114 | "1. **Structured data**.\n", 115 | " 1. The data is structured by an Euclidean domain, e.g. $x_i$ represents an image, a sound or a video. We can use a classical ConvNet with 1D, 2D or 3D convolutions or a graph ConvNet with a line or grid graph (however losing the orientation).\n", 116 | " 2. The data is structured by a graph, e.g. the data lies on a transportation, energy, brain or social network.\n", 117 | "2. **Unstructured data**. We could use a fully connected network, but the learning and computational complexities are gonna be large. An alternative is to construct a sparse similarity graph between features (or between samples) and use a graph ConvNet, effectively structuring the data and drastically reducing the number of parameters through weight sharing. As for classical ConvNets, the number of parameters are independent of the input size.\n", 118 | "\n", 119 | "There are many ways, supervised or unsupervised, to construct a graph given some data. And better the graph, better the performance ! For this example we'll define the adjacency matrix as a simple similarity measure between features. Below are the choices one has to make when constructing such a graph.\n", 120 | "1. The distance function. We'll use the Euclidean distance $d_{ij} = \\|x_i - x_j\\|_2$.\n", 121 | "2. The kernel. We'll use the Gaussian kernel $a_{ij} = \\exp(d_{ij}^2 / \\sigma^2)$.\n", 122 | "3. The type of graph. We'll use a $k$ nearest neigbors (kNN) graph." 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": { 129 | "collapsed": false 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "dist, idx = graph.distance_scipy_spatial(X_train.T, k=10, metric='euclidean')\n", 134 | "A = graph.adjacency(dist, idx).astype(np.float32)\n", 135 | "\n", 136 | "assert A.shape == (d, d)\n", 137 | "print('d = |V| = {}, k|V| < |E| = {}'.format(d, A.nnz))\n", 138 | "plt.spy(A, markersize=2, color='black');" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "To be able to pool graph signals, we need first to coarsen the graph, i.e. to find which vertices to group together. At the end we'll have multiple graphs, like a pyramid, each at one level of resolution. The finest graph is where the input data lies, the coarsest graph is where the data at the output of the graph convolutional layers lie. That data, of reduced spatial dimensionality, can then be fed to a fully connected layer.\n", 146 | "\n", 147 | "The parameter here is the number of times to coarsen the graph. Each coarsening approximately reduces the size of the graph by a factor two. Thus if you want a pooling of size 4 in the first layer followed by a pooling of size 2 in the second, you'll need to coarsen $\\log_2(4+2) = 3$ times.\n", 148 | "\n", 149 | "After coarsening we rearrange the vertices (and add fake vertices) such that pooling a graph signal is analog to pooling a 1D signal. See the [paper] for details.\n", 150 | "\n", 151 | "[paper]: https://arxiv.org/abs/1606.09375" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": { 158 | "collapsed": false 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "graphs, perm = coarsening.coarsen(A, levels=3, self_connections=False)\n", 163 | "\n", 164 | "X_train = coarsening.perm_data(X_train, perm)\n", 165 | "X_val = coarsening.perm_data(X_val, perm)\n", 166 | "X_test = coarsening.perm_data(X_test, perm)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "We finally need to compute the graph Laplacian $L$ for each of our graphs (the original and the coarsened versions), defined by their adjacency matrices $A$. The sole parameter here is the type of Laplacian, e.g. the combinatorial Laplacian, the normalized Laplacian or the random walk Laplacian." 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": { 180 | "collapsed": false 181 | }, 182 | "outputs": [], 183 | "source": [ 184 | "L = [graph.laplacian(A, normalized=True) for A in graphs]\n", 185 | "graph.plot_spectrum(L)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "# 3 Graph ConvNet\n", 193 | "\n", 194 | "Here we apply the graph convolutional neural network to signals lying on graphs. After designing the architecture and setting the hyper-parameters, the model takes as inputs the data matrix $X$, the target $y$ and a list of graph Laplacians $L$, one per coarsening level.\n", 195 | "\n", 196 | "The data, architecture and hyper-parameters are absolutely *not engineered to showcase performance*. Its sole purpose is to illustrate usage and functionality." 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": { 203 | "collapsed": false 204 | }, 205 | "outputs": [], 206 | "source": [ 207 | "params = dict()\n", 208 | "params['dir_name'] = 'demo'\n", 209 | "params['num_epochs'] = 40\n", 210 | "params['batch_size'] = 100\n", 211 | "params['eval_frequency'] = 200\n", 212 | "\n", 213 | "# Building blocks.\n", 214 | "params['filter'] = 'chebyshev5'\n", 215 | "params['brelu'] = 'b1relu'\n", 216 | "params['pool'] = 'apool1'\n", 217 | "\n", 218 | "# Number of classes.\n", 219 | "C = y.max() + 1\n", 220 | "assert C == np.unique(y).size\n", 221 | "\n", 222 | "# Architecture.\n", 223 | "params['F'] = [32, 64] # Number of graph convolutional filters.\n", 224 | "params['K'] = [20, 20] # Polynomial orders.\n", 225 | "params['p'] = [4, 2] # Pooling sizes.\n", 226 | "params['M'] = [512, C] # Output dimensionality of fully connected layers.\n", 227 | "\n", 228 | "# Optimization.\n", 229 | "params['regularization'] = 5e-4\n", 230 | "params['dropout'] = 1\n", 231 | "params['learning_rate'] = 1e-3\n", 232 | "params['decay_rate'] = 0.95\n", 233 | "params['momentum'] = 0.9\n", 234 | "params['decay_steps'] = n_train / params['batch_size']" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "metadata": { 241 | "collapsed": false 242 | }, 243 | "outputs": [], 244 | "source": [ 245 | "model = models.cgcnn(L, **params)\n", 246 | "accuracy, loss, t_step = model.fit(X_train, y_train, X_val, y_val)" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "# 4 Evaluation\n", 254 | "\n", 255 | "We often want to monitor:\n", 256 | "1. The convergence, i.e. the training loss and the classification accuracy on the validation set.\n", 257 | "2. The performance, i.e. the classification accuracy on the testing set (to be compared with the training set accuracy to spot overfitting).\n", 258 | "\n", 259 | "The `model_perf` class in [utils.py](utils.py) can be used to compactly evaluate multiple models." 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": { 266 | "collapsed": false 267 | }, 268 | "outputs": [], 269 | "source": [ 270 | "fig, ax1 = plt.subplots(figsize=(15, 5))\n", 271 | "ax1.plot(accuracy, 'b.-')\n", 272 | "ax1.set_ylabel('validation accuracy', color='b')\n", 273 | "ax2 = ax1.twinx()\n", 274 | "ax2.plot(loss, 'g.-')\n", 275 | "ax2.set_ylabel('training loss', color='g')\n", 276 | "plt.show()" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": { 283 | "collapsed": false 284 | }, 285 | "outputs": [], 286 | "source": [ 287 | "print('Time per step: {:.2f} ms'.format(t_step*1000))" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": { 294 | "collapsed": false 295 | }, 296 | "outputs": [], 297 | "source": [ 298 | "res = model.evaluate(X_test, y_test)\n", 299 | "print(res[0])" 300 | ] 301 | } 302 | ], 303 | "metadata": { 304 | "kernelspec": { 305 | "display_name": "Python 3", 306 | "language": "python", 307 | "name": "python3" 308 | }, 309 | "language_info": { 310 | "codemirror_mode": { 311 | "name": "ipython", 312 | "version": 3 313 | }, 314 | "file_extension": ".py", 315 | "mimetype": "text/x-python", 316 | "name": "python", 317 | "nbconvert_exporter": "python", 318 | "pygments_lexer": "ipython3", 319 | "version": "3.4.3" 320 | } 321 | }, 322 | "nbformat": 4, 323 | "nbformat_minor": 0 324 | } 325 | -------------------------------------------------------------------------------- /HCP_fmripredict/config.py: -------------------------------------------------------------------------------- 1 | #!/home/yuzhang/tensorflow-py3.6/bin/python3.6 2 | 3 | # Author: Yu Zhang 4 | # License: simplified BSD 5 | # coding: utf-8 6 | 7 | ###default parameter settings 8 | class Config(): 9 | pathfmri = '/home/yuzhang/scratch/HCP/aws_s3_HCP1200/FMRI/' 10 | pathout = '/home/yuzhang/scratch/HCP/temp_res_new/' 11 | 12 | TR = 0.72 13 | lowcut = 0.01 14 | highcut = 0.08 15 | window_size = 6 16 | ##window_size_trial = math.ceil(window_size/TR) 17 | 18 | ##task info 19 | modality = 'MOTOR' 20 | ###dict for different types of movement 21 | task_contrasts = {"rf": "foot", 22 | "lf": "foot", 23 | "rh": "hand", 24 | "lh": "hand", 25 | "t": "tongue"} 26 | 27 | ##the chosen atlas to map fmri data 28 | # mmp_atlas = "/home/yuzhang/projects/rrg-pbellec/yuzhang/HCP/codes/HCP_S1200_GroupAvg_v1/Gordon333.32k_fs_LR.dlabel.nii" 29 | pathsource = "/home/yuzhang/scratch/HCP/codes/" 30 | mmp_atlas = pathsource + "HCP_S1200_GroupAvg_v1/" + "Q1-Q6_RelatedValidation210.CorticalAreas_dil_Final_Final_Areas_Group_Colors.32k_fs_LR.dlabel.nii" 31 | AtlasName = 'MMP' 32 | Subject_Num = 2400 33 | Trial_Num = 284 34 | Node_Num = 32000 35 | Region_Num = 200 36 | 37 | startsub = 0 38 | endsub = Subject_Num 39 | subjectlist = 'ALL' 40 | n_thread = 5 41 | n_buffersize = 50 42 | 43 | ##temp saving file 44 | fmri_filename = 'Atlas.dtseries.nii' 45 | confound_filename = 'Movement_Regressors.txt' 46 | rsfmri_filename = 'Atlas_hp2000_clean.dtseries.nii' 47 | 48 | ''' 49 | ###do not update paras in config 50 | ev_filename = 'event_labels_1200R' + '_test_' + subjectlist + '.h5' # '.txt' 51 | fmri_matrix_filename = AtlasName + '_ROI_act_1200R' + '_test_' + subjectlist + '.lmdb' #'.h5' # '.txt' 52 | #lmdb_filename = config_instance.pathout + hcp_fmri_instance.modality + '_' + fmri_matrix_filename 53 | ''' 54 | 55 | import os 56 | try: 57 | ###params for graph_cnn 58 | import tensorflow as tf 59 | gcnn = tf.app.flags 60 | FLAGS = gcnn.FLAGS 61 | 62 | # Graphs. 63 | gcnn.DEFINE_integer('number_edges', 8, 'Graph: minimum number of edges per vertex.') 64 | gcnn.DEFINE_bool('normalized_laplacian', True, 'Graph Laplacian: normalized.') 65 | gcnn.DEFINE_integer('coarsening_levels', 6, 'Number of coarsened graphs.') 66 | gcnn.DEFINE_string('adj_mat', os.path.join(pathsource, 'MMP_adjacency_mat_white.pconn.nii'), 'Directory to adj matrix on surface data.') 67 | except ImportError: 68 | print("Tensorflow is not avaliable in the current node!") 69 | 70 | gcnn_layers = 3 71 | gcnn_hidden = 256 72 | gcnn_pool = 4 73 | 74 | gcnn_coarsening_levels = 6 75 | gcnn_adj_mat_dict = {'surface': os.path.join(pathsource, 'MMP_adjacency_mat_white.pconn.nii'), 76 | 'SC': os.path.join(pathsource, 'HCP_S1200_GroupAvg_v1/S1200.All.corrThickness_MSMAll.32k_fs_LR.dscalar.nii'), 77 | 'FC': os.path.join(pathsource, 'HCP_S1200_GroupAvg_v1/S1200.All.corrThickness_MSMAll.32k_fs_LR.dscalar.nii')} 78 | gcnn_adj_mat_type = 'SC' 79 | -------------------------------------------------------------------------------- /HCP_fmripredict/extract_fmri_event_data.py: -------------------------------------------------------------------------------- 1 | #!/home/yuzhang/tensorflow-py3.6/bin/python3.6 2 | 3 | # Author: Yu Zhang 4 | # License: simplified BSD 5 | # coding: utf-8 6 | import sys 7 | import os 8 | import warnings 9 | sys.path.append('/home/yuzhang/scratch/HCP/codes/HCP_fmripredict/') 10 | 11 | import argparse 12 | from tensorpack.utils import logger 13 | from tensorpack.utils.serialize import dumps, loads 14 | 15 | import numpy as np 16 | import importlib 17 | import lmdb 18 | from pathlib import Path 19 | import config, utils 20 | #importlib.reload(utils) 21 | 22 | if __name__ == '__main__': 23 | args = sys.argv[1:] 24 | logger.set_logger_dir("train_log/svc_simple_log",action="d") 25 | warnings.simplefilter("ignore") 26 | #warnings.filterwarnings(action='once') 27 | 28 | parser = argparse.ArgumentParser(description='The description of the parameters') 29 | 30 | parser.add_argument('--task_modality', '-c', help='(required, string) Modality name in Capital for fmri and event design files', type=str) 31 | parser.add_argument('--subject_to_start', '-f', help='(optional, int,default=0) The index of the first subject in the all_subjects_list for analysis', type=int) 32 | parser.add_argument('--subject_to_last', '-g', help='(optional, int,default=1086) The index of the last subject in the all_subjects_list for analysis', type=int) 33 | parser.add_argument('--subjectlist_index', '-l', help='(optional, string, default='') The index indicator of the selected subject list', type=str) 34 | 35 | parser.add_argument('--n_thread', '-t', help='(optional, int, default = 5) Number of threads from each cpu to be used', type=int) 36 | parser.add_argument('--n_buffersize', '-b', help='(optional, int, default = 50) Number of files to be read at once', type=int) 37 | parser.add_argument('--n_sessions', '-j', help='(optional, int, default = 0) Total number of session for the subject', type=int) 38 | parser.add_argument('--n_sessions_combined', '-x', help='(optional, int, default = 1) The number of sessions to combine', type=int) 39 | 40 | parsed, unknown = parser.parse_known_args(args) 41 | 42 | modality = parsed.task_modality 43 | 44 | startsub = parsed.subject_to_start 45 | endsub = parsed.subject_to_last 46 | subjectlist = parsed.subjectlist_index 47 | 48 | n_jobs = 1 49 | n_thread = parsed.n_thread 50 | n_buffersize = parsed.n_buffersize 51 | n_sessions = parsed.n_sessions 52 | n_sessions_combined = parsed.n_sessions_combined 53 | 54 | #####re-assign parameter settings in config 55 | config_instance = config.Config() 56 | 57 | if modality: 58 | config_instance.modality = modality 59 | if startsub: 60 | config_instance.startsub = startsub 61 | if endsub: 62 | config_instance.endsub = endsub 63 | if subjectlist: 64 | config_instance.subjectlist = subjectlist 65 | if n_thread: 66 | config_instance.n_thread = n_thread 67 | if n_buffersize: 68 | config_instance.n_buffersize = n_buffersize 69 | if not os.path.exists(config_instance.pathout): 70 | os.makedirs(config_instance.pathout) 71 | 72 | ###use config parameters to collect fmri data 73 | ''' 74 | config_instance = config.Config() 75 | modality = 'MOTOR' 76 | startsub = 0 77 | endsub = 2400 78 | subjectlist = 'ALL' 79 | ''' 80 | hcp_fmri_instance = utils.hcp_task_fmri(config_instance) 81 | 82 | ##prepare fmri data for analysis 83 | subjects_trial_label_matrix, sub_name, coding,trial_dura = hcp_fmri_instance.prepare_fmri_files_list() 84 | print(np.array(subjects_trial_label_matrix).shape) 85 | print("each trial contains %d volumes/TRs for task %s" % (trial_dura,modality)) 86 | ###updating information in the config settings 87 | config_instance.task_contrasts = hcp_fmri_instance.task_contrasts 88 | config_instance.Trial_dura = trial_dura 89 | config_instance.EVS_files = hcp_fmri_instance.EVS_files 90 | config_instance.fmri_files = hcp_fmri_instance.fmri_files 91 | config_instance.confound_files = hcp_fmri_instance.confound_files 92 | 93 | ############ 94 | fmri_files = hcp_fmri_instance.fmri_files 95 | confound_files = hcp_fmri_instance.confound_files 96 | print(np.array(subjects_trial_label_matrix).shape) 97 | #print(np.unique(sub_name), len(sub_name)) 98 | 99 | ###output logs 100 | print("--fmri_folder: ", config_instance.pathfmri) 101 | print('--temp_out:', config_instance.pathout) 102 | print('--atlas_filename: %s \n\n' % config_instance.AtlasName) 103 | 104 | mmp_atlas = config_instance.mmp_atlas 105 | #lmdb_filename = config_instance.pathout+hcp_fmri_instance.modality+'_'+config_instance.AtlasName + '_ROI_act_1200R' + '_test_' + subjectlist + '.lmdb' 106 | ##subjects_tc_matrix, subname_coding = utils.extract_mean_seris(fmri_files, confound_files, mmp_atlas, lmdb_filename, nr_proc=100, buffer_size=10) 107 | subjects_tc_matrix, subname_coding = utils.extract_mean_seris_thread(fmri_files, confound_files, mmp_atlas, 108 | hcp_fmri_instance.lmdb_filename, 109 | hcp_fmri_instance.Trial_Num, 110 | nr_thread=config_instance.n_thread, buffer_size=config_instance.n_buffersize) 111 | print(np.array(subjects_tc_matrix).shape) 112 | print('\n') 113 | 114 | ##### 115 | sub_name = [] 116 | for ss in subname_coding: 117 | sub_name.append(ss.split('_')[0]) 118 | hcp_fmri_instance.sub_name = sub_name 119 | subjects_tc_matrix, subjects_trial_label_matrix = utils.preclean_data_for_shape_match(subjects_tc_matrix,subjects_trial_label_matrix,subname_coding) 120 | config_instance.Subject_Num = np.array(subjects_tc_matrix).shape[0] 121 | print(np.array(subjects_trial_label_matrix).shape) 122 | print(np.array(subjects_tc_matrix).shape) 123 | 124 | ''' 125 | ##only using this for cnn, no need for svm or fc-nn 126 | ###use config parameters to collect rs-fmri data 127 | hcp_rsfmri_instance = utils.hcp_rsfmri(config_instance) 128 | ##prepare fmri data for analysis 129 | subjects_tc_matrix, mean_corr_matrix = hcp_rsfmri_instance.prepare_rsfmri_files_list(sub_name=sub_name,N_thread=4) 130 | 131 | ''' 132 | print('\n Classify different tasks using simple-svm with rbf kernel...') 133 | target_name = np.unique(list(hcp_fmri_instance.task_contrasts.values())) 134 | ##scores= utils.my_svc_simple(subjects_tc_matrix, subjects_trial_label_matrix, target_name, sub_num=1500, block_dura=trial_dura, my_cv_fold=10,my_comp=20) 135 | ##print(scores) 136 | ''' 137 | print('\n Changing the validation process by subject-specific split and average within each trial......') 138 | scores= utils.my_svc_simple_subject_validation_new(subjects_tc_matrix,subjects_trial_label_matrix,target_name,block_dura=trial_dura,my_cv_fold=10,my_testsize=0.2,my_valsize=0.1) 139 | print(scores) 140 | 141 | print('\n Changing the validation process by subject-specific split...') 142 | scores= utils.my_svc_simple_subject_validation_new(subjects_tc_matrix,subjects_trial_label_matrix,target_name,block_dura=1,my_cv_fold=10,my_testsize=0.2,my_valsize=0.1) 143 | print(scores) 144 | ''' 145 | ############################## 146 | ####using fully-connected neural networks for classification of fmri tasks 147 | print('\n Classify different tasks using simple fc-nn...') 148 | ##utils.build_fc_nn_simple(subjects_tc_matrix, subjects_trial_label_matrix, target_name, layers=5, hidden_size=64,dropout=0.25,batch_size=128) 149 | 150 | print('\n Classify different tasks using simple fc-nn by subject-specific split...') 151 | utils.build_fc_nn_subject_validation(subjects_tc_matrix,subjects_trial_label_matrix,target_name,block_dura=trial_dura, 152 | layers=5, hidden_size=256,dropout=0.25,batch_size=128,nepochs=50) 153 | 154 | print('\n Classify different tasks using simple fc-nn by subject-specific split and average within each trial...') 155 | utils.build_fc_nn_subject_validation(subjects_tc_matrix,subjects_trial_label_matrix,target_name,sub_num=100, block_dura=1, 156 | layers=5,hidden_size=256,dropout=0.25,batch_size=128,nepochs=50) 157 | 158 | 159 | ###use config parameters to set parameters for graph convolution 160 | target_name = np.unique(list(hcp_fmri_instance.task_contrasts.values())) 161 | hcp_gcnn_instance = utils.hcp_gcnn_fmri(config_instance) 162 | print('\n Classify different tasks using gcn by subject-specific split...') 163 | train_acc, test_acc, val_acc = hcp_gcnn_instance.build_graph_cnn_subject_validation_new(subjects_tc_matrix, subjects_trial_label_matrix, target_name,block_dura=1, 164 | layers=config_instance.gcnn_layers,hidden_size=config_instance.gcnn_hidden, 165 | pool_size=config_instance.gcnn_pool,batch_size=128, nepochs=50) 166 | 167 | print('\n Classify different tasks using gcn by subject-specific split and average within each trial...') 168 | ##train_acc_trial, test_acc_trial, val_acc_trial = hcp_gcnn_instance.build_graph_cnn_subject_validation(subjects_tc_matrix, subjects_trial_label_matrix, target_name,block_dura=trial_dura,layers=config_instance.gcnn_layers,hidden_size=config_instance.gcnn_hidden,pool_size=config_instance.gcnn_pool,batch_size=128, nepochs=50) 169 | train_acc_trial, test_acc_trial, val_acc_trial = hcp_gcnn_instance.build_graph_cnn_subject_validation_new(subjects_tc_matrix, subjects_trial_label_matrix, target_name,block_dura=trial_dura, 170 | layers=config_instance.gcnn_layers,hidden_size=config_instance.gcnn_hidden, 171 | pool_size=config_instance.gcnn_pool,batch_size=128, nepochs=50) 172 | 173 | 174 | ''' 175 | ####for script testing: 176 | modality='MOTOR' 177 | startsub = 0 178 | endsub = 2400 179 | subjectlist = 'ALL' 180 | 181 | python ./extract_fmri_event_data.py --task_modality=$modality --subject_to_start=0 --subject_to_last=100 --subjectlist_index='t010' 182 | 183 | ''' 184 | -------------------------------------------------------------------------------- /HCP_fmripredict/model.py: -------------------------------------------------------------------------------- 1 | #!/home/yuzhang/tensorflow-py3.6/bin/python3.6 2 | 3 | # Author: Yu Zhang 4 | # License: simplified BSD 5 | # coding: utf-8 6 | 7 | ###define model for training 8 | import sys 9 | sys.path.append('/home/yuzhang/projects/rrg-pbellec/yuzhang/HCP/codes/HCP_fmripredict') 10 | import utils 11 | 12 | import numpy as np 13 | import pandas as pd 14 | 15 | from sklearn import svm, metrics 16 | from sklearn import preprocessing 17 | from sklearn.model_selection import cross_val_score, train_test_split,ShuffleSplit 18 | from sklearn.decomposition import PCA, FastICA, FactorAnalysis, DictionaryLearning, KernelPCA 19 | 20 | try: 21 | from keras.utils import np_utils 22 | from keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, Dropout 23 | from keras.models import Model 24 | except ImportError: 25 | print("Tensorflow is not avaliable in the current node!") 26 | print("deep learning models will not be running for this test!") 27 | 28 | 29 | def build_fc_nn_model(Nfeatures,Nlabels,layers=3,hidden_size=256,dropout=0.25): 30 | ######fully-connected neural networks 31 | input0 = Input(shape=(Nfeatures,)) 32 | drop1 = input0 33 | for li in np.arange(layers): 34 | hidden1 = Dense(hidden_size, activation='relu')(drop1) 35 | drop1 = Dropout(dropout)(hidden1) 36 | hidden_size = np.int32(hidden_size / 2) 37 | if hidden_size < 10: 38 | hidden_size = 16 39 | 40 | hidden2 = Dense(16, activation='relu')(drop1) 41 | drop2 = Dropout(0.5)(hidden2) 42 | out = Dense(Nlabels, activation='softmax')(drop2) 43 | 44 | model = Model(inputs=input0, outputs=out) 45 | #model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['mse','accuracy']) 46 | model.summary() 47 | 48 | return model 49 | 50 | 51 | def build_cnn_model(input_shape, Nlabels, filters=32, convsize=3, poolsize=2, hidden_size=128, conv_layers=2): 52 | # import keras.backend as K 53 | # if K.image_data_format() == 'channels_first': 54 | # img_shape = (1,img_rows,img_cols) 55 | # elif K.image_data_format() == 'channels_last': 56 | # img_shape = (img_rows,img_cols,1) 57 | 58 | 59 | input0 = Input(shape=input_shape) 60 | drop1 = input0 61 | for li in range(conv_layers): 62 | conv1 = Conv2D(filters, (convsize, convsize), padding='same', activation='relu')(drop1) 63 | conv1 = Conv2D(filters, (convsize, convsize), padding='same', activation='relu')(conv1) 64 | pool1 = MaxPooling2D((poolsize, poolsize))(conv1) 65 | drop1 = Dropout(0.25)(pool1) 66 | filters *= 2 67 | 68 | 69 | drop2 = drop1 70 | flat = Flatten()(drop2) 71 | hidden = Dense(hidden_size, activation='relu')(flat) 72 | drop3 = Dropout(0.5)(hidden) 73 | #hidden = Dense((hidden_size/4).astype(int), activation='relu')(drop3) 74 | #drop4 = Dropout(0.5)(hidden) 75 | out = Dense(Nlabels, activation='softmax')(drop3) 76 | 77 | model = Model(inputs=input0, outputs=out) 78 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 79 | model.summary() 80 | 81 | return model 82 | -------------------------------------------------------------------------------- /HCP_fmripredict/my_test_mnist_gcn.py: -------------------------------------------------------------------------------- 1 | #!/home/yuzhang/tensorflow-py3.6/bin/python3.6 2 | 3 | # Author: Yu Zhang 4 | # License: simplified BSD 5 | # coding: utf-8 6 | 7 | import numpy as np 8 | import time 9 | import sys, os 10 | sys.path.append('/home/yu/PycharmProjects/HCP_fmripredict/') 11 | from cnn_graph.lib import models, graph, coarsening, utils 12 | 13 | import tensorflow as tf 14 | from tensorflow.examples.tutorials.mnist import input_data 15 | 16 | num_CPU = 2 17 | config_TF = tf.ConfigProto(intra_op_parallelism_threads=num_CPU,\ 18 | inter_op_parallelism_threads=num_CPU, allow_soft_placement=True,\ 19 | device_count = {'CPU' : num_CPU}) 20 | session = tf.Session(config=config_TF) 21 | 22 | flags = tf.app.flags 23 | FLAGS = flags.FLAGS 24 | 25 | # Graphs. 26 | flags.DEFINE_integer('number_edges', 8, 'Graph: minimum number of edges per vertex.') 27 | flags.DEFINE_string('metric', 'euclidean', 'Graph: similarity measure (between features).') 28 | flags.DEFINE_bool('normalized_laplacian', True, 'Graph Laplacian: normalized.') 29 | flags.DEFINE_integer('coarsening_levels', 4, 'Number of coarsened graphs.') 30 | # Directories. 31 | flags.DEFINE_string('dir_data', os.path.join('..', 'data', 'mnist'), 'Directory to store data.') 32 | 33 | img_dim = 28 34 | 35 | 36 | def grid_graph(m, corners=False): 37 | ##build a graph on minist digit images 38 | z = graph.grid(m) 39 | dist, idx = graph.distance_sklearn_metrics(z, k=FLAGS.number_edges, metric=FLAGS.metric) 40 | A = graph.adjacency(dist, idx) 41 | 42 | # Connections are only vertical or horizontal on the grid. 43 | # Corner vertices are connected to 2 neightbors only. 44 | if corners: 45 | import scipy.sparse 46 | A = A.toarray() 47 | A[A < A.max() / 1.5] = 0 48 | A = scipy.sparse.csr_matrix(A) 49 | print('{} edges'.format(A.nnz)) 50 | 51 | ##plt.spy(A, markersize=2, color='black') 52 | print("{} > {} edges".format(A.nnz // 2, FLAGS.number_edges * m ** 2 // 2)) 53 | return A 54 | 55 | ##data preparation 56 | mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False) 57 | 58 | train_data = mnist.train.images.astype(np.float32) 59 | val_data = mnist.validation.images.astype(np.float32) 60 | test_data = mnist.test.images.astype(np.float32) 61 | train_labels = mnist.train.labels 62 | val_labels = mnist.validation.labels 63 | test_labels = mnist.test.labels 64 | 65 | ###cal the adjcent matrix based on euclidean distance of spatial locations 66 | A = grid_graph(img_dim, corners=False) 67 | A = graph.replace_random_edges(A, 0.01) 68 | ###build multi-level graph using coarsen (div by 2 at each level) 69 | graphs, perm = coarsening.coarsen(A, levels=FLAGS.coarsening_levels, self_connections=False) 70 | L = [graph.laplacian(A, normalized=True) for A in graphs] 71 | 72 | ###paramters for the model 73 | common = {} 74 | common['dir_name'] = 'mnist/' 75 | common['num_epochs'] = 20 76 | common['batch_size'] = 100 77 | common['decay_steps'] = mnist.train.num_examples / common['batch_size'] 78 | common['eval_frequency'] = 30 * common['num_epochs'] 79 | common['brelu'] = 'b1relu' 80 | common['pool'] = 'mpool1' 81 | C = max(mnist.train.labels) + 1 # number of classes 82 | 83 | train_data_perm = coarsening.perm_data(train_data, perm) 84 | val_data_perm = coarsening.perm_data(val_data, perm) 85 | test_data_perm = coarsening.perm_data(test_data, perm) 86 | model_perf = utils.model_perf() 87 | 88 | ###test different param settins 89 | ##model1: no convolution 90 | name = 'softmax' 91 | params = common.copy() 92 | params['dir_name'] += name 93 | params['regularization'] = 5e-4 94 | params['dropout'] = 1 95 | params['learning_rate'] = 0.02 96 | params['decay_rate'] = 0.95 97 | params['momentum'] = 0.9 98 | params['F'] = [] 99 | params['K'] = [] 100 | params['p'] = [] 101 | params['M'] = [C] 102 | 103 | ####training and testing models 104 | print(L) 105 | 106 | t_start = time.process_time() 107 | model_perf.test(models.cgcnn(config_TF, L, **params), name, params, 108 | train_data_perm, train_labels, val_data_perm, val_labels, test_data_perm, test_labels) 109 | t_end_1 = time.process_time() - t_start 110 | print('Model {}; Execution time: {:.2f}s\n\n'.format(name, t_end_1)) 111 | 112 | ###model#2: one-layer convolution with fourier transform as filter 113 | common['regularization'] = 0 114 | common['dropout'] = 1 115 | common['learning_rate'] = 0.02 116 | common['decay_rate'] = 0.95 117 | common['momentum'] = 0.9 118 | common['F'] = [10] # Number of graph convolutional filters. 119 | common['K'] = [20] # Polynomial orders. 120 | common['p'] = [1] # Pooling sizes. 121 | common['M'] = [C] # Output dimensionality of fully connected layers. 122 | 123 | name = 'fgconv_softmax' 124 | params = common.copy() 125 | params['dir_name'] += name 126 | params['filter'] = 'fourier' 127 | params['K'] = [L[0].shape[0]] 128 | 129 | t_start = time.process_time() 130 | model_perf.test(models.cgcnn(config_TF, L, **params), name, params, 131 | train_data_perm, train_labels, val_data_perm, val_labels, test_data_perm, test_labels) 132 | t_end_2 = time.process_time() - t_start 133 | print('Model {}; Execution time: {:.2f}s\n\n'.format(name, t_end_2)) 134 | 135 | ##model#3: one-layer convolution with chebyshev5 and b1relu as filters 136 | name = 'cgconv_softmax' 137 | params = common.copy() 138 | params['dir_name'] += name 139 | params['filter'] = 'chebyshev5' 140 | # params['filter'] = 'chebyshev2' 141 | # params['brelu'] = 'b2relu' 142 | 143 | t_start = time.process_time() 144 | model_perf.test(models.cgcnn(config_TF,L, **params), name, params, 145 | train_data_perm, train_labels, val_data_perm, val_labels, test_data_perm, test_labels) 146 | t_end_3 = time.process_time() - t_start 147 | print('Model {}; Execution time: {:.2f}s\n\n'.format(name, t_end_3)) 148 | 149 | ##model#4: two convolutional layers with fourier transform as filters 150 | common['regularization'] = 5e-4 151 | common['dropout'] = 0.5 152 | common['learning_rate'] = 0.02 # 0.03 in the paper but sgconv_sgconv_fc_softmax has difficulty to converge 153 | common['decay_rate'] = 0.95 154 | common['momentum'] = 0.9 155 | common['F'] = [32, 64] # Number of graph convolutional filters. 156 | common['K'] = [25, 25] # Polynomial orders. 157 | common['p'] = [4, 4] # Pooling sizes. 158 | common['M'] = [512, C] # Output dimensionality of fully connected layers. 159 | 160 | name = 'fgconv_fgconv_fc_softmax' # 'Non-Param' 161 | params = common.copy() 162 | params['dir_name'] += name 163 | params['filter'] = 'fourier' 164 | params['K'] = [L[0].shape[0], L[2].shape[0]] 165 | print([L[li].shape for li in range(len(L))]) 166 | 167 | t_start = time.process_time() 168 | model_perf.test(models.cgcnn(config_TF,L, **params), name, params, 169 | train_data_perm, train_labels, val_data_perm, val_labels, test_data_perm, test_labels) 170 | t_end_4 = time.process_time() - t_start 171 | print('Model {}; Execution time: {:.2f}s\n\n'.format(name, t_end_4)) 172 | 173 | 174 | 175 | ##model#5: two convolutional layers with Chebyshev polynomial as filters 176 | name = 'cgconv_cgconv_fc_softmax' # 'Non-Param' 177 | params = common.copy() 178 | params['dir_name'] += name 179 | params['filter'] = 'chebyshev5' 180 | print(params) 181 | print([L[li].shape for li in range(len(L))]) 182 | 183 | t_start = time.process_time() 184 | model_perf.test(models.cgcnn(config_TF,L, **params), name, params, 185 | train_data_perm, train_labels, val_data_perm, val_labels, test_data_perm, test_labels) 186 | t_end_5 = time.process_time() - t_start 187 | print('Model {}; Execution time: {:.2f}s\n\n'.format(name, t_end_5)) 188 | 189 | 190 | 191 | ###summary 192 | model_perf.show() 193 | print('Execution time for model1: {:.2f}s\n\n'.format(t_end_1)) 194 | print('Execution time for model2: {:.2f}s\n\n'.format(t_end_2)) 195 | print('Execution time for model3: {:.2f}s\n\n'.format(t_end_3)) 196 | print('Execution time for model4: {:.2f}s\n\n'.format(t_end_4)) 197 | print('Execution time for model5: {:.2f}s\n\n'.format(t_end_5)) 198 | -------------------------------------------------------------------------------- /HCP_fmripredict/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.1.10 2 | astor==0.7.1 3 | awscli==1.14.47 4 | bleach==1.5.0 5 | boto==2.49.0 6 | boto3==1.9.70 7 | botocore==1.12.70 8 | bz2file==0.98 9 | certifi==2018.11.29 10 | chardet==3.0.4 11 | colorama==0.3.7 12 | cycler==0.10.0 13 | decorator==4.2.1 14 | docopt==0.6.2 15 | docutils==0.14 16 | entrypoints==0.2.3 17 | gast==0.2.0 18 | gensim==3.6.0 19 | grpcio==1.17.1 20 | h5py==2.7.1 21 | html5lib==0.9999999 22 | idna==2.8 23 | ipykernel==4.8.1 24 | ipython==6.2.1 25 | ipython-genutils==0.2.0 26 | ipywidgets==7.1.1 27 | jedi==0.11.1 28 | jmespath==0.9.3 29 | jsonschema==2.6.0 30 | jupyter==1.0.0 31 | jupyter-client==5.2.2 32 | jupyter-console==5.2.0 33 | jupyter-core==4.4.0 34 | jupyterlab==0.31.8 35 | jupyterlab-launcher==0.10.5 36 | jupyterlmod==1.5.0 37 | Keras==2.1.4 38 | Keras-Applications==1.0.6 39 | Keras-Preprocessing==1.0.5 40 | lmdb==0.93 41 | Markdown==2.6.11 42 | MarkupSafe==1.0 43 | matplotlib==2.1.2 44 | mistune==0.8.3 45 | mock==2.0.0 46 | mpmath==1.1.0 47 | msgpack==0.5.6 48 | msgpack-numpy==0.4.3 49 | nbconvert==5.3.1 50 | nbformat==4.4.0 51 | nbrsessionproxy==0.6.1 52 | nbserverproxy==0.5.1 53 | nibabel==2.2.1 54 | numexpr==2.6.4 55 | numpy==1.14.1 56 | pandas==0.21.0 57 | pandocfilters==1.4.2 58 | parso==0.1.1 59 | pbr==5.1.1 60 | pexpect==4.4.0 61 | pickleshare==0.7.4 62 | Pillow==5.3.0 63 | pkg-resources==0.0.0 64 | prompt-toolkit==1.0.15 65 | protobuf==3.6.1 66 | ptyprocess==0.5.2 67 | pyasn1==0.4.2 68 | Pygments==2.2.0 69 | pyparsing==2.2.0 70 | python-dateutil==2.6.1 71 | pytz==2018.3 72 | pyzmq==17.0.0 73 | qtconsole==4.3.1 74 | requests==2.21.0 75 | rsa==3.4.2 76 | s3transfer==0.1.13 77 | scikit-build==0.6.1 78 | scikit-learn==0.19.1 79 | scipy==1.0.0 80 | seaborn==0.8.1 81 | Send2Trash==1.4.2 82 | simplegeneric==0.8.1 83 | six==1.11.0 84 | sklearn==0.0 85 | smart-open==1.7.1 86 | sympy==1.3 87 | tables==3.4.2 88 | tabulate==0.8.2 89 | tensorboard==1.12.1 90 | tensorflow-gpu==1.12.0 91 | tensorflow-tensorboard==1.5.1 92 | termcolor==1.1.0 93 | terminado==0.8.1 94 | testpath==0.3.1 95 | torch==1.0.0 96 | torchvision==0.2.1 97 | tornado==4.5.3 98 | tqdm==4.19.7 99 | traitlets==4.3.2 100 | urllib3==1.24.1 101 | wcwidth==0.1.7 102 | webencodings==0.5.1 103 | Werkzeug==0.14.1 104 | widgetsnbextension==3.1.3 105 | -------------------------------------------------------------------------------- /HCP_fmripredict/requirements_update.txt: -------------------------------------------------------------------------------- 1 | # packages in environment at /home/yuzhang/miniconda3/envs/tensorflow: 2 | # 3 | # Name Version Build Channel 4 | awscli 1.14.47 5 | bleach 1.5.0 6 | ca-certificates 2018.03.07 0 7 | certifi 2018.1.18 py36_0 8 | ciftify 1.0.1 9 | colorama 0.3.7 10 | cycler 0.10.0 11 | decorator 4.2.1 12 | docopt 0.6.2 13 | docutils 0.14 14 | entrypoints 0.2.3 15 | h5py 2.7.1 16 | html5lib 0.9999999 17 | ipykernel 4.8.1 18 | ipython 6.2.1 19 | ipython-genutils 0.2.0 20 | ipywidgets 7.1.1 21 | jedi 0.11.1 22 | Jinja2 2.10 23 | jmespath 0.9.3 24 | jsonschema 2.6.0 25 | jupyter 1.0.0 26 | jupyter-client 5.2.2 27 | jupyter-console 5.2.0 28 | jupyter-core 4.4.0 29 | jupyterlab 0.31.8 30 | jupyterlab-launcher 0.10.5 31 | jupyterlmod 1.5.0 32 | Keras 2.1.4 33 | libedit 3.1 heed3624_0 34 | libffi 3.2.1 hd88cf55_4 35 | libgcc-ng 7.2.0 hdf63c60_3 36 | libstdcxx-ng 7.2.0 hdf63c60_3 37 | lmdb 0.93 38 | Markdown 2.6.11 39 | MarkupSafe 1.0 40 | matplotlib 2.1.2 41 | mistune 0.8.3 42 | msgpack 0.5.6 43 | msgpack-numpy 0.4.3 44 | nbconvert 5.3.1 45 | nbformat 4.4.0 46 | nbrsessionproxy 0.6.1 47 | nbserverproxy 0.5.1 48 | ncurses 6.0 h9df7e31_2 49 | nibabel 2.2.1 50 | nilearn 0.4.0 51 | notebook 5.4.0 52 | numexpr 2.6.4 53 | openssl 1.0.2o h20670df_0 54 | pandas 0.21.0 55 | pandocfilters 1.4.2 56 | parso 0.1.1 57 | pexpect 4.4.0 58 | pickleshare 0.7.4 59 | pip 9.0.3 py36_0 60 | prompt-toolkit 1.0.15 61 | ptyprocess 0.5.2 62 | pyasn1 0.4.2 63 | Pygments 2.2.0 64 | pyparsing 2.2.0 65 | python 3.6.5 hc3d631a_0 66 | python-dateutil 2.6.1 67 | pytz 2018.3 68 | PyYAML 3.12 69 | pyzmq 17.0.0 70 | qtconsole 4.3.1 71 | readline 7.0 ha6073c6_4 72 | rsa 3.4.2 73 | s3transfer 0.1.13 74 | scikit-build 0.6.1 75 | scikit-learn 0.19.1 76 | scipy 1.0.0 77 | seaborn 0.8.1 78 | Send2Trash 1.4.2 79 | setuptools 39.0.1 py36_0 80 | simplegeneric 0.8.1 81 | six 1.11.0 82 | sklearn 0.0 83 | sqlite 3.22.0 h1bed415_0 84 | tables 3.4.2 85 | tabulate 0.8.2 86 | tensorboard 1.6.0 87 | tensorflow-tensorboard 1.5.1 88 | tensorpack 0.8.2 89 | termcolor 1.1.0 90 | terminado 0.8.1 91 | testpath 0.3.1 92 | tk 8.6.7 hc745277_3 93 | tornado 4.5.3 94 | tqdm 4.19.7 95 | traitlets 4.3.2 96 | wcwidth 0.1.7 97 | webencodings 0.5.1 98 | Werkzeug 0.14.1 99 | wheel 0.31.0 py36_0 100 | widgetsnbextension 3.1.3 101 | xz 5.2.3 h55aa19d_2 102 | zlib 1.2.11 ha838bed_2 103 | -------------------------------------------------------------------------------- /HCP_fmripredict/tensorflow_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --account=def-pbellec 3 | #SBATCH --job-name=cnn_graph 4 | #SBATCH --gres=gpu:2 # request GPU "generic resource" 5 | #SBATCH --cpus-per-task=6 # maximum CPU cores per GPU request: 6 on Cedar, 16 on Graham. 6 | #SBATCH --mem=120G # memory per node 7 | #SBATCH --time=00-15:00 # time (DD-HH:MM) 8 | #SBATCH --output=/home/yuzhang/scratch/HCP/codes/train_log/hcp_task_classify_%x_%N-%j.out ###--output=%N-%j.out # %N for node name, %j for jobID 9 | ####SBATCH --error=/home/yuzhang/scratch/HCP/codes/train_log/hcp_task_classify_%x_%N-%j_%A_%a.err 10 | #SBATCH --workdir="/home/yuzhang/scratch/HCP/codes/HCP_fmripredict/" 11 | 12 | module load cuda cudnn python/3.6.3 13 | source $HOME/tensorflow-py3.6/bin/activate 14 | ps | grep python; pkill python; 15 | 16 | #python ./tensorflow-test.py 17 | 18 | mod=$1 19 | list=$2 20 | if [ -z ${mod} ];then mod='WM';fi 21 | if [ -z ${list} ];then list='ALL';fi 22 | 23 | ###python -W ignore ./extract_fmri_event_data.py --task_modality=${mod} --subject_to_start=0 --subject_to_last=2400 --subjectlist_index=${list} --n_thread=5 24 | 25 | ##python -W ignore ./HCP_task_fmri_cnn_tensorpack.py 26 | python ./HCP_task_fmri_cnn_tensorpack_changesize_bk4_wm.py 27 | -------------------------------------------------------------------------------- /HCP_fmripredict/tensorflow_test_wholenode.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --account=def-pbellec 3 | #SBATCH --job-name=cnn_graph 4 | #SBATCH --nodes=1 5 | #SBATCH --gres=gpu:lgpu:4 ##request whole node 6 | #SBATCH --ntasks=1 7 | #SBATCH --cpus-per-task=24 # There are 24 CPU cores on Cedar GPU nodes 8 | #SBATCH --mem=0 # Request the full memory of the node 9 | #SBATCH --time=00-15:00 # time (DD-HH:MM) 10 | 11 | #SBATCH --output=/home/yuzhang/scratch/HCP/codes/train_log/hcp_task_classify_%x_%N-%j.out ###--output=%N-%j.out # %N for node name, %j for jobID 12 | ####SBATCH --error=/home/yuzhang/scratch/HCP/codes/train_log/hcp_task_classify_%x_%N-%j_%A_%a.err 13 | #SBATCH --workdir="/home/yuzhang/scratch/HCP/codes/HCP_fmripredict/" 14 | 15 | module load cuda cudnn python/3.6.3 16 | source $HOME/tensorflow-py3.6/bin/activate 17 | ps | grep python; pkill python; 18 | 19 | #python ./tensorflow-test.py 20 | 21 | mod=$1 22 | list=$2 23 | if [ -z ${mod} ];then mod='WM';fi 24 | if [ -z ${list} ];then list='ALL';fi 25 | 26 | ###python -W ignore ./extract_fmri_event_data.py --task_modality=${mod} --subject_to_start=0 --subject_to_last=2400 --subjectlist_index=${list} --n_thread=5 27 | 28 | ##python -W ignore ./HCP_task_fmri_cnn_tensorpack.py 29 | python ./HCP_task_fmri_cnn_tensorpack_changesize_bk4_wm.py 30 | -------------------------------------------------------------------------------- /HCP_fmripredict/test_model_submit_bk.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --account=rrg-pbellec 3 | #SBATCH --nodes=1 4 | #SBATCH --tasks-per-node=8 # maximum CPU cores per GPU request: 6 on Cedar, 16 on Graham. 5 | #SBATCH --mem=200G # memory per node 6 | #SBATCH --time=0-8:00 #0-12:00 # time (DD-HH:MM) 7 | #SBATCH --output=../train_log/hcp_loaddata_%x_%N-%j.out # %N for node name, %j for jobID 8 | 9 | #module load cuda cudnn python/3.6.3 10 | source activate tensorflow 11 | #mod='WM' 12 | #list='ALL' 13 | mod=$1 14 | list=$2 15 | if [ -z ${mod} ];then mod='MOTOR';fi 16 | if [ -z ${list} ];then list='ALL';fi 17 | 18 | ##python -W ignore ./extract_fmri_event_data.py --task_modality=${mod} --subject_to_start=0 --subject_to_last=2400 --subjectlist_index=${list} --n_thread=5 19 | 20 | python ./extract_fmri_event_data.py --task_modality=${mod} --subject_to_start=0 --subject_to_last=2400 --subjectlist_index=${list} --n_thread=5 --n_buffersize=30 21 | 22 | ##sbatch --mem=50G --time=0-10:0 --nodes=2 --ntasks-per-node=8 --account=rrg-pbellec --output=../../hcp_loaddata_WM_ALL_logs.txt ./extract_fmri_event_data.py --task_modality='WM' --subject_to_start=0 --subject_to_last=2400 --subjectlist_index='ALL' --n_thread=5 23 | -------------------------------------------------------------------------------- /HCP_fmripredict/test_model_submit_new.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --account=def-pbellec ##rrg-pbellec 3 | #SBATCH --gres=gpu:2 # request GPU "generic resource" 4 | #SBATCH --ntasks=1 5 | #SBATCH --cpus-per-task=8 # maximum CPU cores per GPU request: 6 on Cedar, 16 on Graham. 6 | #SBATCH --mem=120G #200G # memory per node 7 | #SBATCH --time=0-15:00 #0-12:00 # time (DD-HH:MM) 8 | #SBATCH --output=../train_log/hcp_loaddata_%x_%N-%j.out # %N for node name, %j for jobID 9 | 10 | 11 | #module load cuda cudnn python/3.6.3 12 | source activate tensorflow 13 | #mod='WM' 14 | #list='ALL' 15 | mod=$1 16 | list=$2 17 | if [ -z ${mod} ];then mod='MOTOR';fi 18 | if [ -z ${list} ];then list='ALL';fi 19 | 20 | ##python -W ignore ./extract_fmri_event_data.py --task_modality=${mod} --subject_to_start=0 --subject_to_last=2400 --subjectlist_index=${list} --n_thread=5 21 | 22 | python ./extract_fmri_event_data.py --task_modality=${mod} --subject_to_start=0 --subject_to_last=2400 --subjectlist_index=${list} --n_thread=5 --n_buffersize=30 23 | 24 | ##sbatch --mem=50G --time=0-10:0 --nodes=2 --ntasks-per-node=8 --account=rrg-pbellec --output=../../hcp_loaddata_WM_ALL_logs.txt ./extract_fmri_event_data.py --task_modality='WM' --subject_to_start=0 --subject_to_last=2400 --subjectlist_index='ALL' --n_thread=5 25 | -------------------------------------------------------------------------------- /HCP_fmripredict/test_module.py: -------------------------------------------------------------------------------- 1 | #!/home/yuzhang/jupyter_py3/bin/python 2 | 3 | # Author: Yu Zhang 4 | # License: simplified BSD 5 | # coding: utf-8 6 | 7 | import sys 8 | sys.path.append('/home/yuzhang/projects/rrg-pbellec/yuzhang/HCP/codes/HCP_fmripredict') 9 | 10 | import config, utils 11 | 12 | config_instance = config.Config() 13 | print("--modality", config_instance.modality) 14 | print("--fmri_folder: ", config_instance.pathfmri) 15 | print('--temp_out:', config_instance.pathout) 16 | print('--atlas_filename:',config_instance.AtlasName) 17 | 18 | hcp_fmri_instance = utils.hcp_task_fmri(config_instance) 19 | 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fmri_predict 2 | predicting fmri activaties from connectome 3 | 4 | ##to work with git 5 | 1) git status: to check any changes in the repo 6 | 2) git add: to save the changes 7 | 3) git commit -a: to save new updates and commit 8 | 4) git push: to upload any local changes to github 9 | 5) git pull: to clone new changes in github to local computers 10 | 6) git log: to check the log information in the repo 11 | 12 | ##to create virual enviorment via conda 13 | 1) install miniconda: https://conda.io/miniconda.html , run "bash Miniconda3-latest-Linux-x86_64.sh" and "conda update conda" after downloading 14 | 2) create env: conda create -n tensorflow-py3.6 anaconda python=3.6 15 | 3) verify env is created: conda list 16 | 4) activate env: source activate tensorflow-py3.6 17 | 5) save packages info from another env2: pip3 freeze > requirements.txt 18 | 6) loading all requried packages: 19 | while read requirement; do conda install --yes $requirement || pip install $requirement; done < requirements.txt ; 20 | or simply use: pip install -r requirements.txt 21 | 7) install tensorflow for gpu: 22 | pip install --upgrade pip; 23 | pip3 install tensorflow-gpu; 24 | pip3 install --upgrade https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0-cp36-cp36m-linux_x86_64.whl 25 | for validation: ipython -> 26 | import tensorflow as tf; 27 | hello = tf.constant('Hello, TensorFlow!'); 28 | sess = tf.Session(); 29 | print(sess.run(hello)); 30 | or simply run: python -c "import tensorflow as tf; print(tf.GIT_VERSION, tf.VERSION)" 31 | 8) install tensorpack: pip3 install tensorpack; pip3 install --upgrade tensorpack 32 | 9) install notebook: conda install -c anaconda ipykernel ; conda install -c anaconda notebook; 33 | ipython kernel install --user --name tensorflow --display-name "Python3.6"; jupyter kernelspec list 34 | 35 | 36 | 37 | ##discussion with Pierre on Jan 29th 38 | 1) start with a simple model: predicting motor activation from functional connectivity using sparse linear regression model 39 | 2) using atlas: 40 | group atlas: MIST with two resolution (200/1000 regions) 41 | individual atlas 42 | 3) defining network structure: 7-functional networks (non-linear relationship could be learned through convolutional layers; thus no logical conflict) 43 | 4) for limited training samples: use sliding-windows to generate dynamic functional connectivity (duration:5min) 44 | 45 | ###Data 46 | 1) resting-state: 10 sessions under the folder: /data/cisl/raw_data/midnight/Rawdata/nii_data/preproc_fsl/sub01/rest 47 | using warped_F_sess*_res_ICA_filt_sm6.nii.gz for after ICA-AROMA, temporal filtering, spatial smoothing and registered 48 | 2) motor tasks: 10 sessions and 2 runs for each,under the folder: /data/cisl/raw_data/midnight/Rawdata/nii_data/preproc_fsl/sub01/motor 49 | preprocessed fmri: filtered_func_data_ICA.nii.gz 50 | brain activation map from contrasts: 51 | zstat1: foot movement 52 | zstat2: hand 53 | zstat3: tongue 54 | zstat4: foot_left 55 | zstat5: foot_right 56 | zstat6: hand_left 57 | zstat7: hand_right 58 | 59 | ###first practice: predicting task activation from RSFC using linear model 60 | ## codes in linear_model folder 61 | ## script: midnight_project_resting.ipynb 62 | 1) models: LinearRegression, RidgeRegression, Lasso, ElasticNetCV, LinearSVR 63 | for each region, the linear models are trained and the best model are chosen based on cross-validation 64 | 2) data: dynamic functional connectivity (window_size=10min), motor task (2 runs) for 10 sessions 65 | 3) atlas: we used MIST_ROI atlas (210 regions) to extract mean fMRI signal or activation for model training 66 | 4) regions: pre-select regions with moderate activity from the activation maps (z-score>1.9); 67 | after that, we trained the linear models for approximately 50 regions, independently 68 | 69 | 5) further considerations: 70 | a). combining multi-subject data and using multitask models during training 71 | b). statistical test on z-maps first and convert the activation map into binary maps. Thus, we could use classification models instead of regression, which might improve prediction accuracy 72 | -------------------------------------------------------------------------------- /fmri_decoding/fmri_decoding_motor_svc_weights.nii.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SIMEXP/fmri_predict/238d9409ea4e8e35e82f87e1dedf95a78e47afcc/fmri_decoding/fmri_decoding_motor_svc_weights.nii.gz -------------------------------------------------------------------------------- /fmri_decoding/fmri_decoding_readme.txt: -------------------------------------------------------------------------------- 1 | Part I: classify between hand, foot and tongue movements using SVM 2 | 3 | 1. Multiclass SVM with RBF kernels on activation patterns: 4 | 20 samples (sessions*runs) with 210 features (ROIs) 5 | Results: 6 | SVM Scoring with 5-fold cross-validation: mean accuarcy = 1.0 7 | after PCA decomposition into 20 components: mean accuarcy = 0.916 8 | after kernel-PCA decomposition into 20 components: mean accuarcy = 0.99 9 | after ICA decomposition: mean accuarcy = 0.75 10 | after MDS decomposition: mean accuarcy = 0.80 11 | [ 0.83333333 0.91666667 0.75 0.75 0.83333333] 12 | 13 | 14 | 2. Multiclass SVM with RBF kernels on fMRI signals: 15 | 1480 samples (sessions*runs*trials) with 210 features (ROIs) 16 | Results: 17 | SVM Scoring with 10-fold cross-validation: mean accuarcy = 0.431 18 | Reduction into 10 components: 19 | PCA decomposition: mean accuarcy = 0.441 20 | ICA decomposition: mean accuarcy = 0.419 21 | Kernal-PCA decomposition: mean accuarcy = 0.466 22 | MDS decomposition: mean accuarcy = 0.415 23 | ANOVA feature selection based on F-test: mean accuarcy = 0.438 24 | Reduction into 20 components: 25 | PCA decomposition: mean accuarcy = 0.431 26 | ICA decomposition: mean accuarcy = 0.419 27 | Kernal-PCA decomposition: mean accuarcy = 0.479 28 | MDS decomposition: mean accuarcy = 0.415 29 | ANOVA feature selection based on F-test: mean accuarcy = 0.433 30 | Reduction into 50 components: 31 | PCA decomposition: mean accuarcy = 0.433 32 | ICA decomposition: mean accuarcy = 0.419 33 | Kernal-PCA decomposition: mean accuarcy = 0.419 34 | MDS decomposition: mean accuarcy = 0.452 35 | ANOVA feature selection based on F-test: mean accuarcy = 0.439 36 | Reduction into 100 components: 37 | PCA decomposition: mean accuarcy = 0.417 38 | ICA decomposition: mean accuarcy = 0.419 39 | Kernal-PCA decomposition: mean accuarcy = 0.419 40 | MDS decomposition: mean accuarcy = 0.443 41 | ANOVA feature selection based on F-test: mean accuarcy = 0.444 42 | Reduction into 150 components: 43 | PCA decomposition: mean accuarcy = 0.416 44 | ICA decomposition: mean accuarcy = 0.419 45 | Kernal-PCA decomposition: mean accuarcy = 0.419 46 | MDS decomposition: mean accuarcy = 0.453 47 | ANOVA feature selection based on F-test: mean accuarcy = 0.444 48 | 49 | 50 | -------------------------------------------------------------------------------- /fmri_decoding/model_test_2dcnn.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SIMEXP/fmri_predict/238d9409ea4e8e35e82f87e1dedf95a78e47afcc/fmri_decoding/model_test_2dcnn.h5 -------------------------------------------------------------------------------- /fmri_decoding/model_test_2dcnn.json: -------------------------------------------------------------------------------- 1 | {"keras_version": "2.1.1", "backend": "tensorflow", "config": {"input_layers": [["input_7", 0, 0]], "output_layers": [["dense_18", 0, 0]], "name": "model_6", "layers": [{"name": "input_7", "config": {"batch_input_shape": [null, 61, 73, 1], "sparse": false, "name": "input_7", "dtype": "float32"}, "inbound_nodes": [], "class_name": "InputLayer"}, {"name": "conv2d_9", "config": {"name": "conv2d_9", "activation": "relu", "kernel_initializer": {"config": {"mode": "fan_avg", "scale": 1.0, "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "kernel_constraint": null, "kernel_regularizer": null, "kernel_size": [3, 3], "strides": [1, 1], "bias_initializer": {"config": {}, "class_name": "Zeros"}, "filters": 32, "bias_constraint": null, "dilation_rate": [1, 1], "activity_regularizer": null, "padding": "same", "bias_regularizer": null, "use_bias": true, "data_format": "channels_last", "trainable": true}, "inbound_nodes": [[["input_7", 0, 0, {}]]], "class_name": "Conv2D"}, {"name": "conv2d_10", "config": {"name": "conv2d_10", "activation": "relu", "kernel_initializer": {"config": {"mode": "fan_avg", "scale": 1.0, "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "kernel_constraint": null, "kernel_regularizer": null, "kernel_size": [3, 3], "strides": [1, 1], "bias_initializer": {"config": {}, "class_name": "Zeros"}, "filters": 32, "bias_constraint": null, "dilation_rate": [1, 1], "activity_regularizer": null, "padding": "same", "bias_regularizer": null, "use_bias": true, "data_format": "channels_last", "trainable": true}, "inbound_nodes": [[["conv2d_9", 0, 0, {}]]], "class_name": "Conv2D"}, {"name": "max_pooling2d_5", "config": {"pool_size": [2, 2], "strides": [2, 2], "name": "max_pooling2d_5", "padding": "valid", "data_format": "channels_last", "trainable": true}, "inbound_nodes": [[["conv2d_10", 0, 0, {}]]], "class_name": "MaxPooling2D"}, {"name": "dropout_19", "config": {"rate": 0.25, "noise_shape": null, "name": "dropout_19", "trainable": true, "seed": null}, "inbound_nodes": [[["max_pooling2d_5", 0, 0, {}]]], "class_name": "Dropout"}, {"name": "conv2d_11", "config": {"name": "conv2d_11", "activation": "relu", "kernel_initializer": {"config": {"mode": "fan_avg", "scale": 1.0, "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "kernel_constraint": null, "kernel_regularizer": null, "kernel_size": [3, 3], "strides": [1, 1], "bias_initializer": {"config": {}, "class_name": "Zeros"}, "filters": 64, "bias_constraint": null, "dilation_rate": [1, 1], "activity_regularizer": null, "padding": "same", "bias_regularizer": null, "use_bias": true, "data_format": "channels_last", "trainable": true}, "inbound_nodes": [[["dropout_19", 0, 0, {}]]], "class_name": "Conv2D"}, {"name": "conv2d_12", "config": {"name": "conv2d_12", "activation": "relu", "kernel_initializer": {"config": {"mode": "fan_avg", "scale": 1.0, "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "kernel_constraint": null, "kernel_regularizer": null, "kernel_size": [3, 3], "strides": [1, 1], "bias_initializer": {"config": {}, "class_name": "Zeros"}, "filters": 64, "bias_constraint": null, "dilation_rate": [1, 1], "activity_regularizer": null, "padding": "same", "bias_regularizer": null, "use_bias": true, "data_format": "channels_last", "trainable": true}, "inbound_nodes": [[["conv2d_11", 0, 0, {}]]], "class_name": "Conv2D"}, {"name": "max_pooling2d_6", "config": {"pool_size": [2, 2], "strides": [2, 2], "name": "max_pooling2d_6", "padding": "valid", "data_format": "channels_last", "trainable": true}, "inbound_nodes": [[["conv2d_12", 0, 0, {}]]], "class_name": "MaxPooling2D"}, {"name": "dropout_20", "config": {"rate": 0.25, "noise_shape": null, "name": "dropout_20", "trainable": true, "seed": null}, "inbound_nodes": [[["max_pooling2d_6", 0, 0, {}]]], "class_name": "Dropout"}, {"name": "flatten_3", "config": {"name": "flatten_3", "trainable": true}, "inbound_nodes": [[["dropout_20", 0, 0, {}]]], "class_name": "Flatten"}, {"name": "dense_17", "config": {"name": "dense_17", "activation": "relu", "kernel_initializer": {"config": {"mode": "fan_avg", "scale": 1.0, "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "kernel_constraint": null, "kernel_regularizer": null, "units": 128, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "bias_constraint": null, "activity_regularizer": null, "bias_regularizer": null, "use_bias": true, "trainable": true}, "inbound_nodes": [[["flatten_3", 0, 0, {}]]], "class_name": "Dense"}, {"name": "dropout_21", "config": {"rate": 0.5, "noise_shape": null, "name": "dropout_21", "trainable": true, "seed": null}, "inbound_nodes": [[["dense_17", 0, 0, {}]]], "class_name": "Dropout"}, {"name": "dense_18", "config": {"name": "dense_18", "activation": "softmax", "kernel_initializer": {"config": {"mode": "fan_avg", "scale": 1.0, "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "kernel_constraint": null, "kernel_regularizer": null, "units": 3, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "bias_constraint": null, "activity_regularizer": null, "bias_regularizer": null, "use_bias": true, "trainable": true}, "inbound_nodes": [[["dropout_21", 0, 0, {}]]], "class_name": "Dense"}]}, "class_name": "Model"} -------------------------------------------------------------------------------- /fmri_decoding/test_gcn_training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "Using TensorFlow backend.\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "from keras.layers import Input, Dropout\n", 18 | "from keras.models import Model\n", 19 | "from keras.optimizers import Adam\n", 20 | "from keras.regularizers import l2\n", 21 | "import time" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "###graph cnn model\n", 31 | "%matplotlib inline\n", 32 | "%run -i '/data/cisl/raw_data/midnight/Rawdata/nii_data/codes/keras-gcn-master/kegra/layers/graph.py'\n", 33 | "%run -i '/data/cisl/raw_data/midnight/Rawdata/nii_data/codes/keras-gcn-master/kegra/utils.py'" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 6, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "Loading cora dataset...\n", 46 | "Dataset has 2708 nodes, 5429 edges, 1433 features.\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "path=\"/data/cisl/raw_data/midnight/Rawdata/nii_data/codes/keras-gcn-master/kegra/data/cora/\"\n", 52 | "dataset='cora'\n", 53 | "print('Loading {} dataset...'.format(dataset))\n", 54 | "\n", 55 | "idx_features_labels = np.genfromtxt(\"{}{}.content\".format(path, dataset), dtype=np.dtype(str))\n", 56 | "features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32)\n", 57 | "labels = encode_onehot(idx_features_labels[:, -1])\n", 58 | "\n", 59 | "# build graph\n", 60 | "idx = np.array(idx_features_labels[:, 0], dtype=np.int32)\n", 61 | "idx_map = {j: i for i, j in enumerate(idx)}\n", 62 | "edges_unordered = np.genfromtxt(\"{}{}.cites\".format(path, dataset), dtype=np.int32)\n", 63 | "edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),\n", 64 | " dtype=np.int32).reshape(edges_unordered.shape)\n", 65 | "adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),\n", 66 | " shape=(labels.shape[0], labels.shape[0]), dtype=np.float32)\n", 67 | "\n", 68 | "# build symmetric adjacency matrix\n", 69 | "adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)\n", 70 | "\n", 71 | "print('Dataset has {} nodes, {} edges, {} features.'.format(adj.shape[0], edges.shape[0], features.shape[1]))\n" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 10, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | "Loading cora dataset...\n", 84 | "Dataset has 2708 nodes, 5429 edges, 1433 features.\n", 85 | "(2708, 1433) (2708, 7) (2708, 7)\n" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "# Define parameters\n", 91 | "DATAPATH = \"/data/cisl/raw_data/midnight/Rawdata/nii_data/codes/keras-gcn-master/kegra/data/cora/\"\n", 92 | "DATASET = 'cora'\n", 93 | "FILTER = 'chebyshev'\n", 94 | "MAX_DEGREE = 2 # maximum polynomial degree\n", 95 | "SYM_NORM = True # symmetric (True) vs. left-only (False) normalization\n", 96 | "NB_EPOCH = 200\n", 97 | "PATIENCE = 10 # early stopping patience\n", 98 | "\n", 99 | "# Get data\n", 100 | "X, A, y = load_data(path=DATAPATH,dataset=DATASET)\n", 101 | "y_train, y_val, y_test, idx_train, idx_val, idx_test, train_mask = get_splits(y)\n", 102 | "\n", 103 | "# Normalize X\n", 104 | "X /= X.sum(1).reshape(-1, 1)\n", 105 | "print(X.shape,y.shape,y_train.shape)\n", 106 | "\n" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 12, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "name": "stdout", 116 | "output_type": "stream", 117 | "text": [ 118 | "Using Chebyshev polynomial basis filters...\n", 119 | "Calculating largest eigenvalue of normalized graph Laplacian...\n", 120 | "Calculating Chebyshev polynomials up to order 2...\n" 121 | ] 122 | }, 123 | { 124 | "ename": "ValueError", 125 | "evalue": "Dimensions must be equal, but are 4299 and 1433 for 'graph_convolution_2/MatMul' (op: 'MatMul') with input shapes: [?,4299], [1433,16].", 126 | "output_type": "error", 127 | "traceback": [ 128 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 129 | "\u001b[0;31mInvalidArgumentError\u001b[0m Traceback (most recent call last)", 130 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py\u001b[0m in \u001b[0;36m_call_cpp_shape_fn_impl\u001b[0;34m(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)\u001b[0m\n\u001b[1;32m 685\u001b[0m \u001b[0mgraph_def_version\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode_def_str\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_shapes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_tensors\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 686\u001b[0;31m input_tensors_as_shapes, status)\n\u001b[0m\u001b[1;32m 687\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mInvalidArgumentError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 131 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/errors_impl.py\u001b[0m in \u001b[0;36m__exit__\u001b[0;34m(self, type_arg, value_arg, traceback_arg)\u001b[0m\n\u001b[1;32m 472\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc_api\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_Message\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 473\u001b[0;31m c_api.TF_GetCode(self.status.status))\n\u001b[0m\u001b[1;32m 474\u001b[0m \u001b[0;31m# Delete the underlying status object from memory otherwise it stays alive\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 132 | "\u001b[0;31mInvalidArgumentError\u001b[0m: Dimensions must be equal, but are 4299 and 1433 for 'graph_convolution_2/MatMul' (op: 'MatMul') with input shapes: [?,4299], [1433,16].", 133 | "\nDuring handling of the above exception, another exception occurred:\n", 134 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 135 | "\u001b[0;32m/data/cisl/raw_data/midnight/Rawdata/nii_data/codes/keras-gcn-master/kegra/utils.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0;31m# This is somewhat hacky, more elegant options would require rewriting the Layer base class.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0mH\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDropout\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_in\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 28\u001b[0;31m \u001b[0mH\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGraphConvolution\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m16\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msupport\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mactivation\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'relu'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkernel_regularizer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0ml2\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5e-4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mH\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 29\u001b[0m \u001b[0mH\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDropout\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mH\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0mY\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGraphConvolution\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msupport\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mactivation\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'softmax'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mH\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 136 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/keras/engine/topology.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs, **kwargs)\u001b[0m\n\u001b[1;32m 601\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 602\u001b[0m \u001b[0;31m# Actually call the layer, collecting output(s), mask(s), and shape(s).\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 603\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 604\u001b[0m \u001b[0moutput_mask\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_mask\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprevious_mask\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 605\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 137 | "\u001b[0;32m/data/cisl/raw_data/midnight/Rawdata/nii_data/codes/keras-gcn-master/kegra/utils.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, inputs, mask)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0msupports\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mK\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbasis\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeatures\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0msupports\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mK\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcatenate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msupports\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mK\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msupports\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 138 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36mdot\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m 1050\u001b[0m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msparse_tensor_dense_matmul\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1051\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1052\u001b[0;31m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmatmul\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1053\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1054\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 139 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py\u001b[0m in \u001b[0;36mmatmul\u001b[0;34m(a, b, transpose_a, transpose_b, adjoint_a, adjoint_b, a_is_sparse, b_is_sparse, name)\u001b[0m\n\u001b[1;32m 1889\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1890\u001b[0m return gen_math_ops._mat_mul(\n\u001b[0;32m-> 1891\u001b[0;31m a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)\n\u001b[0m\u001b[1;32m 1892\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1893\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 140 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gen_math_ops.py\u001b[0m in \u001b[0;36m_mat_mul\u001b[0;34m(a, b, transpose_a, transpose_b, name)\u001b[0m\n\u001b[1;32m 2435\u001b[0m _, _, _op = _op_def_lib._apply_op_helper(\n\u001b[1;32m 2436\u001b[0m \u001b[0;34m\"MatMul\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtranspose_a\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtranspose_a\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtranspose_b\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtranspose_b\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2437\u001b[0;31m name=name)\n\u001b[0m\u001b[1;32m 2438\u001b[0m \u001b[0m_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_op\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2439\u001b[0m \u001b[0m_inputs_flat\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_op\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 141 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py\u001b[0m in \u001b[0;36m_apply_op_helper\u001b[0;34m(self, op_type_name, name, **keywords)\u001b[0m\n\u001b[1;32m 785\u001b[0m op = g.create_op(op_type_name, inputs, output_types, name=scope,\n\u001b[1;32m 786\u001b[0m \u001b[0minput_types\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minput_types\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mattr_protos\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 787\u001b[0;31m op_def=op_def)\n\u001b[0m\u001b[1;32m 788\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0moutput_structure\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop_def\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_stateful\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 789\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 142 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py\u001b[0m in \u001b[0;36mcreate_op\u001b[0;34m(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device)\u001b[0m\n\u001b[1;32m 2956\u001b[0m op_def=op_def)\n\u001b[1;32m 2957\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcompute_shapes\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2958\u001b[0;31m \u001b[0mset_shapes_for_outputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mret\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2959\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_add_op\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mret\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2960\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_record_op_seen_by_control_dependencies\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mret\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 143 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py\u001b[0m in \u001b[0;36mset_shapes_for_outputs\u001b[0;34m(op)\u001b[0m\n\u001b[1;32m 2207\u001b[0m \u001b[0mshape_func\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_call_cpp_shape_fn_and_require_op\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2208\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2209\u001b[0;31m \u001b[0mshapes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mshape_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2210\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mshapes\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2211\u001b[0m raise RuntimeError(\n", 144 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py\u001b[0m in \u001b[0;36mcall_with_requiring\u001b[0;34m(op)\u001b[0m\n\u001b[1;32m 2157\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2158\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcall_with_requiring\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2159\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mcall_cpp_shape_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequire_shape_fn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2160\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2161\u001b[0m \u001b[0m_call_cpp_shape_fn_and_require_op\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcall_with_requiring\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 145 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py\u001b[0m in \u001b[0;36mcall_cpp_shape_fn\u001b[0;34m(op, require_shape_fn)\u001b[0m\n\u001b[1;32m 625\u001b[0m res = _call_cpp_shape_fn_impl(op, input_tensors_needed,\n\u001b[1;32m 626\u001b[0m \u001b[0minput_tensors_as_shapes_needed\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 627\u001b[0;31m require_shape_fn)\n\u001b[0m\u001b[1;32m 628\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 629\u001b[0m \u001b[0;31m# Handles the case where _call_cpp_shape_fn_impl calls unknown_shape(op).\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 146 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py\u001b[0m in \u001b[0;36m_call_cpp_shape_fn_impl\u001b[0;34m(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)\u001b[0m\n\u001b[1;32m 689\u001b[0m \u001b[0mmissing_shape_fn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 690\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 691\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 692\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 693\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmissing_shape_fn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 147 | "\u001b[0;31mValueError\u001b[0m: Dimensions must be equal, but are 4299 and 1433 for 'graph_convolution_2/MatMul' (op: 'MatMul') with input shapes: [?,4299], [1433,16]." 148 | ] 149 | } 150 | ], 151 | "source": [ 152 | "if FILTER == 'localpool':\n", 153 | " \"\"\" Local pooling filters (see 'renormalization trick' in Kipf & Welling, arXiv 2016) \"\"\"\n", 154 | " print('Using local pooling filters...')\n", 155 | " A_ = preprocess_adj(A, SYM_NORM)\n", 156 | " support = 1\n", 157 | " graph = [X, A_]\n", 158 | " G = [Input(shape=(None, None), batch_shape=(None, None), sparse=True)]\n", 159 | "\n", 160 | "elif FILTER == 'chebyshev':\n", 161 | " \"\"\" Chebyshev polynomial basis filters (Defferard et al., NIPS 2016) \"\"\"\n", 162 | " print('Using Chebyshev polynomial basis filters...')\n", 163 | " L = normalized_laplacian(A, SYM_NORM)\n", 164 | " L_scaled = rescale_laplacian(L)\n", 165 | " T_k = chebyshev_polynomial(L_scaled, MAX_DEGREE)\n", 166 | " support = MAX_DEGREE + 1\n", 167 | " graph = [X]+T_k\n", 168 | " G = [Input(shape=(None, None), batch_shape=(None, None), sparse=True) for _ in range(support)]\n", 169 | "\n", 170 | "else:\n", 171 | " raise Exception('Invalid filter type.')\n", 172 | "\n", 173 | "X_in = Input(shape=(X.shape[1],))\n", 174 | "\n", 175 | "# Define model architecture\n", 176 | "# NOTE: We pass arguments for graph convolutional layers as a list of tensors.\n", 177 | "# This is somewhat hacky, more elegant options would require rewriting the Layer base class.\n", 178 | "H = Dropout(0.5)(X_in)\n", 179 | "H = GraphConvolution(16, support, activation='relu', kernel_regularizer=l2(5e-4))([H]+G)\n", 180 | "H = Dropout(0.5)(H)\n", 181 | "Y = GraphConvolution(y.shape[1], support, activation='softmax')([H]+G)\n", 182 | "\n", 183 | "# Compile model\n", 184 | "model = Model(inputs=[X_in]+G, outputs=Y)\n", 185 | "model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01))\n", 186 | "model.summary()\n", 187 | "\n" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "# Helper variables for main training loop\n", 197 | "wait = 0\n", 198 | "preds = None\n", 199 | "best_val_loss = 99999\n", 200 | "\n", 201 | "# Fit\n", 202 | "for epoch in range(1, NB_EPOCH+1):\n", 203 | "\n", 204 | " # Log wall-clock time\n", 205 | " t = time.time()\n", 206 | "\n", 207 | " # Single training iteration (we mask nodes without labels for loss calculation)\n", 208 | " model.fit(graph, y_train, sample_weight=train_mask,\n", 209 | " batch_size=A.shape[0], epochs=1, shuffle=False, verbose=0)\n", 210 | "\n", 211 | " # Predict on full dataset\n", 212 | " preds = model.predict(graph, batch_size=A.shape[0])\n", 213 | "\n", 214 | " # Train / validation scores\n", 215 | " train_val_loss, train_val_acc = evaluate_preds(preds, [y_train, y_val],\n", 216 | " [idx_train, idx_val])\n", 217 | " print(\"Epoch: {:04d}\".format(epoch),\n", 218 | " \"train_loss= {:.4f}\".format(train_val_loss[0]),\n", 219 | " \"train_acc= {:.4f}\".format(train_val_acc[0]),\n", 220 | " \"val_loss= {:.4f}\".format(train_val_loss[1]),\n", 221 | " \"val_acc= {:.4f}\".format(train_val_acc[1]),\n", 222 | " \"time= {:.4f}\".format(time.time() - t))\n", 223 | "\n", 224 | " # Early stopping\n", 225 | " if train_val_loss[1] < best_val_loss:\n", 226 | " best_val_loss = train_val_loss[1]\n", 227 | " wait = 0\n", 228 | " else:\n", 229 | " if wait >= PATIENCE:\n", 230 | " print('Epoch {}: early stopping'.format(epoch))\n", 231 | " break\n", 232 | " wait += 1\n", 233 | "\n", 234 | "# Testing\n", 235 | "test_loss, test_acc = evaluate_preds(preds, [y_test], [idx_test])\n", 236 | "print(\"Test set results:\",\n", 237 | " \"loss= {:.4f}\".format(test_loss[0]),\n", 238 | " \"accuracy= {:.4f}\".format(test_acc[0]))" 239 | ] 240 | } 241 | ], 242 | "metadata": { 243 | "kernelspec": { 244 | "display_name": "Python 3", 245 | "language": "python", 246 | "name": "tensorflow_gpu_test" 247 | }, 248 | "language_info": { 249 | "codemirror_mode": { 250 | "name": "ipython", 251 | "version": 3 252 | }, 253 | "file_extension": ".py", 254 | "mimetype": "text/x-python", 255 | "name": "python", 256 | "nbconvert_exporter": "python", 257 | "pygments_lexer": "ipython3", 258 | "version": "3.5.2" 259 | } 260 | }, 261 | "nbformat": 4, 262 | "nbformat_minor": 2 263 | } 264 | -------------------------------------------------------------------------------- /linear_model/project_update_readme.txt: -------------------------------------------------------------------------------- 1 | Project Updates: 2 | website: https://github.com/SIMEXP/fmri_predict/blob/master/linear_model/ 3 | using linear models to predict fMRI activation patterns (motor task-hand movement) using resting-state functional connectivity 4 | 5 | The MIST_ROI atlas with 210 regions were used to extract fMRI signals 6 | 1) features: 210*210 region-to-region correlation matrix 7 | 2) output: 210 z-scores from GLM to indicate the probability of brain activation within each region 8 | 3) model: SVR with linear kernel (from sklearn), one seperate model for each region 9 | 4) data: 10 | 10 sessions of rs-fMRI scans from each subject, dynamic functional connectivity with spliding window size=10mins were used 11 | 10 session of two runs of task-fMRI scans, z-score maps from GLM 12 | 5) training: using both cross-validation (10-fold) and train-test-split from sklearn 13 | 14 | 6) estimation: either for each region (using MSE) or whole-brain (using correlation) 15 | for individual region: different models are trained, sometimes lasso/Enet performed better than SVR; mean MSE=0.5 16 | for whole-brain: correlation between estimated and true activation scores: r=0.3069 17 | -------------------------------------------------------------------------------- /plot_conn_matrix_surchs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import itertools\n", 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "import nibabel as nib\n", 14 | "import matplotlib.gridspec as gs\n", 15 | "from matplotlib import colors as mc\n", 16 | "from nilearn import plotting as nlp\n", 17 | "from matplotlib import pyplot as plt\n", 18 | "from matplotlib.colors import LinearSegmentedColormap" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "def make_boxes(mat, cl_def, pad=1, edge=False):\n", 28 | " \"\"\"\n", 29 | " mat: the matrix you want to do stuff to\n", 30 | " cl_def: a list of tuples where the first position is the\n", 31 | " index of the first element in the cluster. the\n", 32 | " second position is the index of the last element\n", 33 | " in the cluster\n", 34 | " pad: an integer value for the number of zero spaces to add\n", 35 | " around clusters\n", 36 | " edge: boolean argument. If True, clusters at the corners will\n", 37 | " be drawn full. If False, clusters will be only drawn on\n", 38 | " the inside edge (no white line around the matrix).\n", 39 | " \n", 40 | " returns:\n", 41 | " omat: the input matrix with the spaces added\n", 42 | " cmat_m: the overlayed cluster boxes in a masked array\n", 43 | " lmat_m: a mask of the added empty spaces\n", 44 | " ind: the new index positions for the data (for x_ticks...)\n", 45 | " \"\"\"\n", 46 | " # Sort the cluster definitions based on the start point\n", 47 | " order = np.argsort([i[0] for i in cl_def])\n", 48 | " cl_def = [(i[0], i[1]) for i in np.array(cl_def)[order]]\n", 49 | " # Extract the values\n", 50 | " if edge:\n", 51 | " starts = [i[0] for i in cl_def]\n", 52 | " stops = [i[1]+1 for i in cl_def]\n", 53 | " else:\n", 54 | " starts = [i[0] for i in cl_def if not i[0]==0]\n", 55 | " stops = [i[1]+1 for i in cl_def if not i[1]+1>=mat.shape[0]]\n", 56 | " \n", 57 | " # Find the breakpoints\n", 58 | " bkp = list(np.unique(starts + stops))\n", 59 | " n_bkp = len(bkp)\n", 60 | " # Convert to new indices\n", 61 | " run = 0\n", 62 | " ind = list()\n", 63 | " for i in np.arange(mat.shape[0]):\n", 64 | " if i in bkp:\n", 65 | " run += pad\n", 66 | " ind.append(i+run)\n", 67 | "\n", 68 | " # Make a grid index\n", 69 | " x = [i[0] for i in itertools.product(ind, ind)]\n", 70 | " y = [i[1] for i in itertools.product(ind, ind)]\n", 71 | "\n", 72 | " # Create the output matrices\n", 73 | " omat = np.zeros([i+n_bkp*pad for i in mat.shape])\n", 74 | " cmat = np.zeros_like(omat)\n", 75 | " lmat = np.zeros_like(omat, dtype=bool)\n", 76 | " \n", 77 | " # Assign input mat to grid index\n", 78 | " omat[x, y] = mat.flatten()\n", 79 | " # Mask grid index for the line mask\n", 80 | " lmat[x,y] = True\n", 81 | " lmat_m = np.ma.masked_where(lmat, lmat)\n", 82 | " \n", 83 | " # Convert the input based breakpoints to the new index\n", 84 | " starts_c = [ind[i[0]]-pad for i in cl_def]\n", 85 | " stops_c = [ind[i[1]]+1 for i in cl_def]\n", 86 | " # Loop through the breakpoints\n", 87 | " for i in np.arange(len(starts_c)):\n", 88 | " start = starts_c[i]\n", 89 | " stop = stops_c[i]\n", 90 | " # Select the range of rows and columns to paint\n", 91 | " start_ind = np.arange(start, start+pad)\n", 92 | " stop_ind = np.arange(stop, stop+pad)\n", 93 | " \n", 94 | " # If this isn't an edge cluster or we paint them\n", 95 | " if not start<=0 or edge:\n", 96 | " # Draw the top left corner first\n", 97 | " cmat[start_ind, start:stop] = i+1\n", 98 | " cmat[start:stop, start_ind] = i+1\n", 99 | " # if this is an edge cluster and we don't paint them\n", 100 | " # only paint the bottom right corner but from the start\n", 101 | " else:\n", 102 | " # Draw the bottom right corner next\n", 103 | " cmat[stop_ind, :stop+pad] = i+1\n", 104 | " cmat[:stop+pad, stop_ind] = i+1\n", 105 | " continue\n", 106 | " if not stop>=omat.shape[0] or edge:\n", 107 | " # Draw the bottom right corner next\n", 108 | " cmat[stop_ind, start:stop+pad] = i+1\n", 109 | " cmat[start:stop+pad, stop_ind] = i+1\n", 110 | " # Mask the cluster matrix\n", 111 | " cmat_m = np.ma.masked_where(cmat==0, cmat)\n", 112 | " return omat, cmat_m, lmat_m, ind" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "# Visualize\n", 122 | "low = 0\n", 123 | "high = -1\n", 124 | "f = plt.figure(figsize=(15, 15), frameon=False)\n", 125 | "ax = f.add_subplot(111)\n", 126 | "ab = ax.matshow(o7[low:high, low:high], vmin=0, vmax=0.8, cmap=plt.cm.viridis, aspect='auto')\n", 127 | "ab = ax.matshow(l7[low:high, low:high], cmap=plt.cm.Greys_r, aspect='auto', alpha=1)\n", 128 | "ab = ax.matshow(l7[low:high, low:high], cmap=plt.cm.Greys, aspect='auto', alpha=1)\n", 129 | "ab = ax.matshow(c7[low:high, low:high], cmap=lin7, vmin=1, vmax=7, aspect='auto')\n", 130 | "\n", 131 | "ab = ax.set_xticks([])\n", 132 | "ab = ax.set_yticks([])\n", 133 | "ax.set_axis_off()\n", 134 | "f.savefig(os.path.join(fig_p, 's7_full.png'), dpi=300, bbox_inches='tight', pad_inches=0)" 135 | ] 136 | } 137 | ], 138 | "metadata": { 139 | "kernelspec": { 140 | "display_name": "Python 3", 141 | "language": "python", 142 | "name": "tensorflow_gpu_test" 143 | }, 144 | "language_info": { 145 | "codemirror_mode": { 146 | "name": "ipython", 147 | "version": 3 148 | }, 149 | "file_extension": ".py", 150 | "mimetype": "text/x-python", 151 | "name": "python", 152 | "nbconvert_exporter": "python", 153 | "pygments_lexer": "ipython3", 154 | "version": "3.5.2" 155 | } 156 | }, 157 | "nbformat": 4, 158 | "nbformat_minor": 2 159 | } 160 | --------------------------------------------------------------------------------