├── .gitignore
├── HCP_fmripredict
├── .idea
│ ├── HCP_fmripredict.iml
│ ├── codeStyles
│ │ ├── Project.xml
│ │ └── codeStyleConfig.xml
│ ├── dictionaries
│ │ └── yu.xml
│ ├── encodings.xml
│ ├── inspectionProfiles
│ │ └── Project_Default.xml
│ ├── libraries
│ │ └── R_User_Library.xml
│ ├── misc.xml
│ ├── modules.xml
│ ├── vcs.xml
│ └── workspace.xml
├── .ipynb_checkpoints
│ ├── HCP_rsfMRI_test-checkpoint.ipynb
│ ├── HCP_task_fmri_test-checkpoint.ipynb
│ ├── Untitled-checkpoint.ipynb
│ └── Untitled1-checkpoint.ipynb
├── HCP_rsfMRI_test.ipynb
├── HCP_task_fmri_cnn_tensorpack.py
├── HCP_task_fmri_cnn_tensorpack_changesize.py
├── HCP_task_fmri_cnn_tensorpack_changesize_bk.py
├── HCP_task_fmri_cnn_tensorpack_changesize_bk2.py
├── HCP_task_fmri_cnn_tensorpack_changesize_bk3.py
├── HCP_task_fmri_cnn_tensorpack_changesize_bk4.py
├── HCP_task_fmri_cnn_tensorpack_changesize_bk4_motor.py
├── HCP_task_fmri_cnn_tensorpack_changesize_bk4_wm.py
├── HCP_task_fmri_cnn_tensorpack_changesize_bk4_wm_bk.py
├── HCP_task_fmri_cnn_tensorpack_changesize_bk4_wm_test.py
├── HCP_task_fmri_cnn_tensorpack_changesize_bk4_wm_test2.py
├── HCP_task_fmri_gcn_test.py
├── HCP_task_fmri_test.ipynb
├── cnn_graph_codes
│ ├── HCP_tfmri_test_gcn.ipynb
│ ├── HCP_tfmri_test_gcn_test.ipynb
│ ├── LICENSE.txt
│ ├── README.md
│ ├── lib
│ │ ├── coarsening.py
│ │ ├── graph.py
│ │ ├── models.py
│ │ └── utils.py
│ ├── makefile
│ ├── my_test_mnist.ipynb
│ ├── my_test_news.ipynb
│ ├── rcv1.ipynb
│ ├── requirements.txt
│ ├── trials
│ │ ├── 1_learning_filters.ipynb
│ │ ├── 2_classification.ipynb
│ │ ├── 3_tensorflow.ipynb
│ │ ├── 4_coarsening.ipynb
│ │ └── makefile
│ └── usage.ipynb
├── config.py
├── extract_fmri_event_data.py
├── fmri_utils.py
├── hcp_test_mnist_gcn_cedar.log
├── model.py
├── my_test_mnist_gcn.py
├── requirements.txt
├── requirements_update.txt
├── tensorflow_test.sh
├── tensorflow_test_wholenode.sh
├── test_model_submit_bk.sh
├── test_model_submit_new.sh
├── test_module.py
├── utils.py
└── utils_bk.py
├── README.md
├── fMRI_atlas_ROI_tc.py.ipynb
├── fmri_decoding
├── fmri_decoding_motor_svc_weights.nii.gz
├── fmri_decoding_readme.txt
├── midnight_task_fmri_decoding_CNN.ipynb
├── midnight_task_fmri_decoding_svm.ipynb
├── model_test_2dcnn.h5
├── model_test_2dcnn.json
└── test_gcn_training.ipynb
├── linear_model
├── midnight_project_resting.ipynb
├── project_update_readme.txt
└── results_linear_model.txt
└── plot_conn_matrix_surchs.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized files
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # IPython checkpoints
6 | .ipynb_checkpoints/
7 |
8 | # Datasets
9 | data/
10 |
11 | # Tensorflow summaries
12 | summaries/
13 |
14 | # Model parameters
15 | checkpoints/
16 |
--------------------------------------------------------------------------------
/HCP_fmripredict/.idea/HCP_fmripredict.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/HCP_fmripredict/.idea/codeStyles/Project.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/HCP_fmripredict/.idea/codeStyles/codeStyleConfig.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/HCP_fmripredict/.idea/dictionaries/yu.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/HCP_fmripredict/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/HCP_fmripredict/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/HCP_fmripredict/.idea/libraries/R_User_Library.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/HCP_fmripredict/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/HCP_fmripredict/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/HCP_fmripredict/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/HCP_fmripredict/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 2
6 | }
7 |
--------------------------------------------------------------------------------
/HCP_fmripredict/.ipynb_checkpoints/Untitled1-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 2
6 | }
7 |
--------------------------------------------------------------------------------
/HCP_fmripredict/HCP_task_fmri_cnn_tensorpack.py:
--------------------------------------------------------------------------------
1 | #!/home/yuzhang/tensorflow-py3.6/bin/python3.6
2 |
3 | # Author: Yu Zhang
4 | # License: simplified BSD
5 | # coding: utf-8
6 |
7 | from pathlib import Path
8 | import glob
9 | import itertools
10 | import os
11 | import time
12 | import numpy as np
13 | import pandas as pd
14 | import nibabel as nib
15 | import matplotlib.pyplot as plt
16 | ###%matplotlib inline
17 |
18 | from nilearn import signal
19 | from nilearn import image
20 | from sklearn import preprocessing
21 | from keras.utils import np_utils
22 |
23 | from tensorpack import dataflow
24 |
25 | from keras.utils import to_categorical
26 | from keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, Dropout,AveragePooling2D
27 | from keras.layers import Conv3D, MaxPooling3D, BatchNormalization, AveragePooling3D
28 | from keras.models import Model
29 | import keras.backend as K
30 |
31 |
32 | #####global variable settings
33 | '''
34 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # see issue #152
35 | os.environ["CUDA_VISIBLE_DEVICES"]="0,1"
36 | from tensorflow.python.client import device_lib
37 | print(device_lib.list_local_devices())
38 | '''
39 | import tensorflow as tf
40 | from keras import backend as K
41 |
42 | USE_GPU_CPU = 1
43 | num_cores = 4
44 |
45 | if not USE_GPU_CPU :
46 | num_GPU = num_cores
47 | num_CPU = 0
48 | else:
49 | num_CPU = 2
50 | num_GPU = 2
51 |
52 | config = tf.ConfigProto(intra_op_parallelism_threads=num_cores,\
53 | inter_op_parallelism_threads=num_cores, allow_soft_placement=True,\
54 | device_count = {'CPU' : num_CPU, 'GPU' : num_GPU})
55 | session = tf.Session(config=config)
56 | K.set_session(session)
57 |
58 |
59 | #########################################################
60 | pathdata = Path('/project/6002071/yuzhang/HCP/aws_s3_HCP1200/FMRI/')
61 | pathout = '/project/6002071/yuzhang/HCP/'
62 | modality = 'MOTOR' # 'MOTOR'
63 | ###dict for different types of movement
64 | task_contrasts = {"rf": "foot",
65 | "lf": "foot",
66 | "rh": "hand",
67 | "lh": "hand",
68 | "t": "tongue"}
69 | target_name = np.unique(list(task_contrasts.values()))
70 | print(target_name)
71 |
72 | TR = 0.72
73 | nr_thread=5
74 | buffer_size=20
75 | Flag_CNN_Model = '2d'
76 | ########################
77 |
78 |
79 | def load_fmri_data(pathdata,modality=None,confound_name=None):
80 | ###fMRI decoding: using event signals instead of activation pattern from glm
81 | ##collect task-fMRI signals
82 |
83 | if not modality:
84 | modality = 'MOTOR' # 'MOTOR'
85 |
86 | subjects = []
87 | fmri_files = []
88 | confound_files = []
89 | for fmri_file in sorted(pathdata.glob('tfMRI_'+modality+'_??/*_tfMRI_'+modality+'_??.nii.gz')):
90 | subjects.append(Path(os.path.dirname(fmri_file)).parts[-3])
91 | fmri_files.append(str(fmri_file))
92 |
93 | for confound in sorted(pathdata.glob('tfMRI_'+modality+'_??/*_Movement_Regressors.txt')):
94 | confound_files.append(str(confound))
95 |
96 | print('%d subjects included in the dataset' % len(fmri_files))
97 | return fmri_files, confound_files, subjects
98 |
99 |
100 | def load_event_files(fmri_files,confound_files,ev_filename=None):
101 | ###collect the event design files
102 | tc_matrix = nib.load(fmri_files[0])
103 | Subject_Num = len(fmri_files)
104 | Trial_Num = tc_matrix.shape[-1]
105 | print("Data samples including %d subjects with %d trials" % (Subject_Num, Trial_Num))
106 |
107 | EVS_files = []
108 | subj = 0
109 | for ev, sub_count in zip(sorted(pathdata.glob('tfMRI_' + modality + '_??/*_combined_events_spm_' + modality + '.csv')),range(Subject_Num)):
110 | ###remove fmri files if the event design is missing
111 | while os.path.dirname(fmri_files[subj]) < os.path.dirname(str(ev)):
112 | print("Event files and fmri data are miss-matching for subject: ")
113 | print(Path(os.path.dirname(str(ev))).parts[-3::2], ':',
114 | Path(os.path.dirname(fmri_files[subj])).parts[-3::2])
115 | print("Due to missing event files for subject : %s" % os.path.dirname(fmri_files[subj]))
116 | fmri_files[subj] = []
117 | confound_files[subj] = []
118 | subj += 1
119 | if subj > Subject_Num:
120 | break
121 | if os.path.dirname(fmri_files[subj]) == os.path.dirname(str(ev)):
122 | EVS_files.append(str(ev))
123 | subj += 1
124 |
125 | fmri_files = list(filter(None, fmri_files))
126 | confound_files = list(filter(None, confound_files))
127 | if len(EVS_files) != len(fmri_files):
128 | print('Miss-matching number of subjects between event:{} and fmri:{} files'.format(len(EVS_files), len(fmri_files)))
129 |
130 | ################################
131 | ###loading all event designs
132 | if not ev_filename:
133 | ev_filename = "_event_labels_1200R_LR_RL.txt"
134 |
135 | events_all_subjects_file = pathout+modality+ev_filename
136 | if os.path.isfile(events_all_subjects_file):
137 | print('Collecting trial info from file:', events_all_subjects_file)
138 | subjects_trial_labels = pd.read_csv(events_all_subjects_file,sep="\t",encoding="utf8")
139 | ###print(subjects_trial_labels.keys())
140 |
141 | subjects_trial_label_matrix = subjects_trial_labels.loc[:,'trial1':'trial'+str(Trial_Num)]
142 | sub_name = subjects_trial_labels['subject']
143 | coding_direct = subjects_trial_labels['coding']
144 | print(subjects_trial_label_matrix.shape,len(sub_name),len(np.unique(sub_name)),len(coding_direct))
145 | else:
146 | print('Loading trial info for each task-fmri file and save to csv file:', events_all_subjects_file)
147 | subjects_trial_label_matrix = []
148 | sub_name = []
149 | coding_direct = []
150 | for subj in np.arange(Subject_Num):
151 | pathsub = Path(os.path.dirname(EVS_files[subj]))
152 | sub_name.append(pathsub.parts[-3])
153 | coding_direct.append(pathsub.parts[-1].split('_')[-1])
154 |
155 | ##trial info in volume
156 | trial_infos = pd.read_csv(EVS_files[subj],sep="\t",encoding="utf8",header = None,names=['onset','duration','rep','task'])
157 | Onsets = np.ceil((trial_infos.onset/TR)).astype(int) #(trial_infos.onset/TR).astype(int)
158 | Duras = np.ceil((trial_infos.duration/TR)).astype(int) #(trial_infos.duration/TR).astype(int)
159 | Movetypes = trial_infos.task
160 |
161 | labels = ["rest"]*Trial_Num;
162 | for start,dur,move in zip(Onsets,Duras,Movetypes):
163 | for ti in range(start-1,start+dur):
164 | labels[ti]= task_contrasts[move]
165 | subjects_trial_label_matrix.append(labels)
166 |
167 | print(np.array(subjects_trial_label_matrix).shape)
168 | #print(np.array(subjects_trial_label_matrix[0]))
169 | subjects_trial_labels = pd.DataFrame(data=np.array(subjects_trial_label_matrix),columns=['trial'+str(i+1) for i in range(Trial_Num)])
170 | subjects_trial_labels['subject'] = sub_name
171 | subjects_trial_labels['coding'] = coding_direct
172 | subjects_trial_labels.keys()
173 | #print(subjects_trial_labels['subject'],subjects_trial_labels['coding'])
174 |
175 | ##save the labels
176 | subjects_trial_labels.to_csv(events_all_subjects_file,sep='\t', encoding='utf-8',index=False)
177 |
178 | return subjects_trial_label_matrix, sub_name
179 |
180 |
181 | #############################
182 | #######################################
183 | ####tensorpack: multithread
184 | class gen_fmri_file(dataflow.DataFlow):
185 | """ Iterate through fmri filenames, confound filenames and labels
186 | """
187 | def __init__(self, fmri_files,confound_files, label_matrix,data_type='train',train_percent=0.8):
188 | assert (len(fmri_files) == len(confound_files))
189 | # self.data=zip(fmri_files,confound_files)
190 | self.fmri_files = fmri_files
191 | self.confound_files = confound_files
192 | self.label_matrix = label_matrix
193 |
194 | self.data_type=data_type
195 | self.train_percent=train_percent
196 |
197 | def size(self):
198 | split_num=int(len(self.fmri_files)*0.8)
199 | if self.data_type=='train':
200 | return split_num
201 | else:
202 | return len(self.fmri_files)-split_num
203 |
204 | def get_data(self):
205 | split_num=int(len(self.fmri_files)*0.8)
206 | if self.data_type=='train':
207 | while True:
208 | rand_pos=np.random.choice(split_num,1)[0]
209 | yield self.fmri_files[rand_pos],self.confound_files[rand_pos],self.label_matrix.iloc[rand_pos]
210 | else:
211 | for pos_ in range(split_num,len(self.fmri_files)):
212 | yield self.fmri_files[pos_],self.confound_files[pos_],self.label_matrix.iloc[pos_]
213 |
214 |
215 | class split_samples(dataflow.DataFlow):
216 | """ Iterate through fmri filenames, confound filenames and labels
217 | """
218 | def __init__(self, ds):
219 | self.ds=ds
220 |
221 | def size(self):
222 | return 91*284
223 |
224 | def get_data(self):
225 | for data in self.ds.get_data():
226 | for i in range(data[1].shape[0]):
227 | yield data[0][i],data[1][i]
228 |
229 |
230 | def map_load_fmri_image(dp,target_name):
231 | fmri_file=dp[0]
232 | confound_file=dp[1]
233 | label_trials=dp[2]
234 |
235 | ###remove confound effects
236 | confound = np.loadtxt(confound_file)
237 | fmri_data_clean = image.clean_img(fmri_file, detrend=True, standardize=True, confounds=confound)
238 |
239 | ##pre-select task types
240 | trial_mask = pd.Series(label_trials).isin(target_name) ##['hand', 'foot','tongue']
241 | fmri_data_cnn = image.index_img(fmri_data_clean, np.where(trial_mask)[0]).get_data()
242 | ###use each slice along z-axis as one sample
243 | label_data_trial = np.array(label_trials.loc[trial_mask])
244 | le = preprocessing.LabelEncoder()
245 | le.fit(target_name)
246 | label_data_cnn = le.transform(label_data_trial) ##np_utils.to_categorical(): convert label vector to matrix
247 |
248 | img_rows, img_cols, img_deps = fmri_data_cnn.shape[:-1]
249 | fmri_data_cnn_test = np.transpose(fmri_data_cnn.reshape(img_rows, img_cols, np.prod(fmri_data_cnn.shape[2:])), (2, 0, 1))
250 | label_data_cnn_test = np.repeat(label_data_cnn, img_deps, axis=0).flatten()
251 | print(fmri_file, fmri_data_cnn_test.shape,label_data_cnn_test.shape)
252 |
253 | return fmri_data_cnn_test, label_data_cnn_test
254 |
255 |
256 | def map_load_fmri_image_3d(dp, target_name):
257 | fmri_file = dp[0]
258 | confound_file = dp[1]
259 | label_trials = dp[2]
260 |
261 | ###remove confound effects
262 | confound = np.loadtxt(confound_files[0])
263 | fmri_data_clean = image.clean_img(fmri_files[0], detrend=True, standardize=True, confounds=confound)
264 |
265 | ##pre-select task types
266 | trial_mask = pd.Series(label_trials).isin(target_name) ##['hand', 'foot','tongue']
267 | fmri_data_cnn = image.index_img(fmri_data_clean, np.where(trial_mask)[0]).get_data()
268 | ###use each slice along z-axis as one sample
269 | label_data_trial = np.array(label_trials.loc[trial_mask])
270 | le = preprocessing.LabelEncoder()
271 | le.fit(target_name)
272 | label_data_cnn = le.transform(label_data_trial) ##np_utils.to_categorical(): convert label vector to matrix
273 |
274 | img_rows, img_cols, img_deps = fmri_data_cnn.shape[:-1]
275 | fmri_data_cnn_test = np.transpose(fmri_data_cnn, (3, 0, 1, 2))
276 | label_data_cnn_test = label_data_cnn.flatten()
277 | print(fmri_file, fmri_data_cnn_test.shape, label_data_cnn_test.shape)
278 |
279 | return fmri_data_cnn_test, label_data_cnn_test
280 |
281 |
282 | def data_pipe(fmri_files,confound_files,label_matrix,target_name=None,batch_size=32,data_type='train',
283 | train_percent=0.8,nr_thread=nr_thread,buffer_size=buffer_size):
284 | assert data_type in ['train', 'val', 'test']
285 | assert fmri_files is not None
286 |
287 | print('\n\nGenerating dataflow for %s datasets \n' % data_type)
288 |
289 | buffer_size = min(len(fmri_files),buffer_size)
290 | nr_thread = min(len(fmri_files),nr_thread)
291 |
292 | ds0 = gen_fmri_file(fmri_files,confound_files, label_matrix,data_type=data_type,train_percent=train_percent)
293 | print('dataflowSize is ' + str(ds0.size()))
294 | print('Loading data using %d threads with %d buffer_size ... \n' % (nr_thread, buffer_size))
295 |
296 | if target_name is None:
297 | target_name = np.unique(label_matrix)
298 |
299 | ####running the model
300 | start_time = time.clock()
301 | ds1 = dataflow.MultiThreadMapData(
302 | ds0, nr_thread=nr_thread,
303 | map_func=lambda dp: map_load_fmri_image(dp,target_name),
304 | buffer_size=buffer_size,
305 | strict=True)
306 |
307 | ds1 = dataflow.PrefetchData(ds1, buffer_size,1)
308 |
309 | ds1 = split_samples(ds1)
310 | print('prefetch dataflowSize is ' + str(ds1.size()))
311 |
312 | ds1 = dataflow.LocallyShuffleData(ds1,buffer_size=ds1.size()*buffer_size)
313 |
314 | ds1 = dataflow.BatchData(ds1,batch_size=batch_size)
315 | print('Time Usage of loading data in seconds: {} \n'.format(time.clock() - start_time))
316 |
317 | ds1 = dataflow.PrefetchDataZMQ(ds1, nr_proc=1)
318 | ds1._reset_once()
319 | ##ds1.reset_state()
320 |
321 | #return ds1.get_data()
322 | for df in ds1.get_data():
323 | ##print(np.expand_dims(df[0].astype('float32'),axis=3).shape)
324 | yield (np.expand_dims(df[0].astype('float32'),axis=3),to_categorical(df[1].astype('int32'),len(target_name)))
325 |
326 |
327 | def data_pipe_3dcnn(fmri_files, confound_files, label_matrix, target_name=None, flag_cnn='3d', batch_size=32,
328 | data_type='train',train_percent=0.8, nr_thread=nr_thread, buffer_size=buffer_size):
329 | assert data_type in ['train', 'val', 'test']
330 | assert flag_cnn in ['3d', '2d']
331 | assert fmri_files is not None
332 |
333 | print('\n\nGenerating dataflow for %s datasets \n' % data_type)
334 |
335 | buffer_size = min(len(fmri_files), buffer_size)
336 | nr_thread = min(len(fmri_files), nr_thread)
337 |
338 | ds0 = gen_fmri_file(fmri_files, confound_files, label_matrix, data_type=data_type, train_percent=train_percent)
339 | print('dataflowSize is ' + str(ds0.size()))
340 | print('Loading data using %d threads with %d buffer_size ... \n' % (nr_thread, buffer_size))
341 |
342 | if target_name is None:
343 | target_name = np.unique(label_matrix)
344 |
345 | ####running the model
346 | start_time = time.clock()
347 | if flag_cnn == '2d':
348 | ds1 = dataflow.MultiThreadMapData(
349 | ds0, nr_thread=nr_thread,
350 | map_func=lambda dp: map_load_fmri_image(dp, target_name),
351 | buffer_size=buffer_size,
352 | strict=True)
353 | elif flag_cnn == '3d':
354 | ds1 = dataflow.MultiThreadMapData(
355 | ds0, nr_thread=nr_thread,
356 | map_func=lambda dp: map_load_fmri_image_3d(dp, target_name),
357 | buffer_size=buffer_size,
358 | strict=True)
359 |
360 | ds1 = dataflow.PrefetchData(ds1, buffer_size, 1)
361 |
362 | ds1 = split_samples(ds1)
363 | print('prefetch dataflowSize is ' + str(ds1.size()))
364 |
365 | ds1 = dataflow.LocallyShuffleData(ds1, buffer_size=ds1.size() * buffer_size)
366 |
367 | ds1 = dataflow.BatchData(ds1, batch_size=batch_size)
368 | print('Time Usage of loading data in seconds: {} \n'.format(time.clock() - start_time))
369 |
370 | ds1 = dataflow.PrefetchDataZMQ(ds1, nr_proc=1)
371 | ds1._reset_once()
372 | ##ds1.reset_state()
373 |
374 | ##return ds1.get_data()
375 |
376 | for df in ds1.get_data():
377 | if flag_cnn == '2d':
378 | yield (np.expand_dims(df[0].astype('float32'), axis=3),to_categorical(df[1].astype('int32'), len(target_name)))
379 | elif flag_cnn == '3d':
380 | yield (np.expand_dims(df[0].astype('float32'), axis=4),to_categorical(df[1].astype('int32'), len(target_name)))
381 |
382 |
383 | ###end of tensorpack: multithread
384 | ##############################################################
385 |
386 |
387 | def plot_history(model_history):
388 | plt.figure()
389 | plt.subplot(121)
390 | plt.plot(model_history.history['acc'], color='r')
391 | plt.plot(model_history.history['val_acc'], color='b')
392 | plt.xlabel('Epochs')
393 | plt.ylabel('Accuracy')
394 | plt.legend(['Training', 'Validation'])
395 |
396 | plt.subplot(122)
397 | plt.plot(model_history.history['loss'], color='r')
398 | plt.plot(model_history.history['val_loss'], color='b')
399 | plt.xlabel('Epochs')
400 | plt.ylabel('Loss Function')
401 | plt.legend(['Training', 'Validation'])
402 | return None
403 |
404 |
405 | def build_cnn_model(input_shape, Nlabels, filters=32, convsize=3, poolsize=2, hidden_size=128, conv_layers=4):
406 | # import keras.backend as K
407 | # if K.image_data_format() == 'channels_first':
408 | # img_shape = (1,img_rows,img_cols)
409 | # elif K.image_data_format() == 'channels_last':
410 | # img_shape = (img_rows,img_cols,1)
411 |
412 |
413 | input0 = Input(shape=input_shape)
414 | drop1 = input0
415 | for li in range(conv_layers):
416 | conv1 = Conv2D(filters, (convsize, convsize), padding='same', activation='relu')(drop1)
417 | conv1 = BatchNormalization()(conv1)
418 | conv1 = Conv2D(filters, (convsize, convsize), padding='same', activation='relu')(conv1)
419 | conv1 = BatchNormalization()(conv1)
420 | pool1 = MaxPooling2D((poolsize, poolsize))(conv1)
421 | drop1 = Dropout(0.25)(pool1)
422 | if (li+1) % 2 == 0:
423 | filters *= 2
424 |
425 | drop2 = drop1
426 | avg1 = AveragePooling2D(pool_size=(5, 5))(drop2)
427 | flat = Flatten()(avg1)
428 | hidden = Dense(hidden_size, activation='relu')(flat)
429 | drop3 = Dropout(0.5)(hidden)
430 | # hidden = Dense((hidden_size/4).astype(int), activation='relu')(drop3)
431 | # drop4 = Dropout(0.5)(hidden)
432 |
433 | out = Dense(Nlabels, activation='softmax')(drop3)
434 |
435 | model = Model(inputs=input0, outputs=out)
436 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
437 | model.summary()
438 |
439 | return model
440 |
441 |
442 | def build_cnn3d_model(input_shape, Nlabels, filters=32, convsize=3, poolsize=2, hidden_size=128, conv_layers=4):
443 | # import keras.backend as K
444 | # if K.image_data_format() == 'channels_first':
445 | # img_shape = (1,img_rows,img_cols,img_deps)
446 | # elif K.image_data_format() == 'channels_last':
447 | # img_shape = (img_rows,img_cols, img_deps,1)
448 |
449 | input0 = Input(shape=input_shape)
450 | drop1 = input0
451 | for li in range(conv_layers):
452 | conv1 = Conv3D(filters, (convsize, convsize, convsize), padding='same', activation='relu')(drop1)
453 | conv1 = BatchNormalization()(conv1)
454 | conv1 = Conv3D(filters, (convsize, convsize, convsize), padding='same', activation='relu')(conv1)
455 | conv1 = BatchNormalization()(conv1)
456 | pool1 = MaxPooling3D((poolsize, poolsize, poolsize))(conv1)
457 | drop1 = Dropout(0.25)(pool1)
458 | if (li+1) % 2 == 0:
459 | filters *= 2
460 |
461 | drop2 = drop1
462 | avg1 = AveragePooling3D(pool_size=(5, 5, 5))(drop2)
463 | flat = Flatten()(avg1)
464 | hidden = Dense(hidden_size, activation='relu')(flat)
465 | drop3 = Dropout(0.5)(hidden)
466 |
467 | out = Dense(Nlabels, activation='softmax')(drop3)
468 |
469 | model = Model(inputs=input0, outputs=out)
470 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
471 | model.summary()
472 |
473 | return model
474 |
475 |
476 | #####################
477 | #####
478 | if __name__ == '__main__':
479 |
480 | fmri_files, confound_files, subjects = load_fmri_data(pathdata,modality)
481 | print('including %d fmri files and %d confounds files \n\n' % (len(fmri_files), len(confound_files)))
482 |
483 | label_matrix, sub_name = load_event_files(fmri_files,confound_files)
484 | print('Collecting event design files for subjects and saved into matrix ...' , np.array(label_matrix).shape)
485 |
486 | nb_class = len(target_name)
487 | tc_matrix = nib.load(fmri_files[0])
488 | img_rows, img_cols, img_deps = tc_matrix.shape[:-1]
489 | img_shape = []
490 | if Flag_CNN_Model == '2d':
491 | if K.image_data_format() == 'channels_first':
492 | img_shape = (1, img_rows, img_cols)
493 | elif K.image_data_format() == 'channels_last':
494 | img_shape = (img_rows, img_cols, 1)
495 | elif Flag_CNN_Model == '3d':
496 | if K.image_data_format() == 'channels_first':
497 | img_shape = (1, img_rows, img_cols, img_deps)
498 | elif K.image_data_format() == 'channels_last':
499 | img_shape = (img_rows, img_cols, img_deps, 1)
500 |
501 | #########################################
502 | '''
503 | ##test whether dataflow from tensorpack works
504 | test_sub_num = 1000
505 | tst = data_pipe_3dcnn(fmri_files[:test_sub_num], confound_files[:test_sub_num], label_matrix.iloc[:test_sub_num],
506 | target_name=target_name, flag_cnn=Flag_CNN_Model, batch_size=16, data_type='train', buffer_size=5)
507 | out = next(tst)
508 | print(out[0].shape)
509 | print(out[1].shape)
510 | '''
511 | ####################
512 | #####start 2dcnn model
513 | test_sub_num = len(fmri_files)
514 | ##xx = data_pipe(fmri_files,confound_files,label_matrix,target_name=target_name)
515 | train_gen = data_pipe(fmri_files[:test_sub_num],confound_files[:test_sub_num],label_matrix.iloc[:test_sub_num],
516 | target_name=target_name,batch_size=32,data_type='train',nr_thread=4, buffer_size=20)
517 | val_set = data_pipe(fmri_files[:test_sub_num],confound_files[:test_sub_num],label_matrix.iloc[:test_sub_num],
518 | target_name=target_name,batch_size=32,data_type='test',nr_thread=2, buffer_size=20)
519 |
520 | '''
521 | #########################################
522 | test_sub_num = len(fmri_files)
523 | ######start cnn model
524 | train_gen = data_pipe_3dcnn(fmri_files[:test_sub_num], confound_files[:test_sub_num],label_matrix.iloc[:test_sub_num],
525 | target_name=target_name, flag_cnn=Flag_CNN_Model,
526 | batch_size=32, data_type='train', nr_thread=4, buffer_size=20)
527 | val_set = data_pipe_3dcnn(fmri_files[:test_sub_num], confound_files[:test_sub_num],label_matrix.iloc[:test_sub_num],
528 | target_name=target_name, flag_cnn=Flag_CNN_Model,
529 | batch_size=32, data_type='test', nr_thread=2, buffer_size=20)
530 | '''
531 |
532 | if Flag_CNN_Model == '2d':
533 | print('\nTraining the model using 2d-CNN \n')
534 | model_test = build_cnn_model(img_shape, nb_class)
535 | elif Flag_CNN_Model == '3d':
536 | print('\nTraining the model using 3d-CNN \n')
537 | model_test = build_cnn3d_model(img_shape, nb_class)
538 |
539 | ######start training the model
540 | model_test_history = model_test.fit_generator(train_gen, epochs=20, steps_per_epoch=100, verbose=1, shuffle=True)
541 | #validation_data=val_set,validation_steps=10,
542 | #workers=1, use_multiprocessing=False, shuffle=True)
543 | print(model_test_history.history)
544 | for key,val in model_test_history.history.items():
545 | print(key, val)
546 |
547 | scores = model_test.evaluate_generator(val_set, validation_steps=100, workers=1, shuffle=False)
548 | print(scores)
549 |
550 | import pickle
551 | logfilename = pathout+'train_val_scores_dump2.txt'
552 | if os.path.isfile(logfilename):
553 | logfilename = logfilename.split('.')[0] + '2.txt'
554 | file = open(logfilename, 'w')
555 | pickle.dump(model_test_history.history, file)
556 | file.close()
557 |
558 | '''
559 | from tensorpack import *
560 | from tensorpack.tfutils import summary
561 | from tensorpack.dataflow import dataset
562 |
563 | class Model(ModelDesc):
564 | def inputs(self,image_shape):
565 | """
566 | Define all the inputs (with type, shape, name) that the graph will need.
567 | """
568 | return [tf.placeholder(tf.float32, (None, image_shape.rval()), 'input'),
569 | tf.placeholder(tf.int32, (None,), 'label')]
570 |
571 | def build_graph(self, image, label):
572 | """This function should build the model which takes the input variables
573 | and return cost at the end"""
574 |
575 | # In tensorflow, inputs to convolution function are assumed to be
576 | # NHWC. Add a single channel here.
577 | image = tf.expand_dims(image, 3)
578 |
579 | image = image * 2 - 1 # center the pixels values at zero
580 | # The context manager `argscope` sets the default option for all the layers under
581 | # this context. Here we use 32 channel convolution with shape 3x3
582 | with argscope(Conv2D, kernel_size=3, activation=tf.nn.relu, filters=32):
583 | logits = (LinearWrap(image)
584 | .Conv2D('conv0')
585 | .MaxPooling('pool0', 2)
586 | .Conv2D('conv1')
587 | .Conv2D('conv2')
588 | .MaxPooling('pool1', 2)
589 | .Conv2D('conv3')
590 | .FullyConnected('fc0', 512, activation=tf.nn.relu)
591 | .Dropout('dropout', rate=0.5)
592 | .FullyConnected('fc1', 10, activation=tf.identity)())
593 |
594 | tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities
595 |
596 | # a vector of length B with loss of each sample
597 | cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
598 | cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss
599 |
600 | correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct')
601 | accuracy = tf.reduce_mean(correct, name='accuracy')
602 |
603 | # This will monitor training error (in a moving_average fashion):
604 | # 1. write the value to tensosrboard
605 | # 2. write the value to stat.json
606 | # 3. print the value after each epoch
607 | train_error = tf.reduce_mean(1 - correct, name='train_error')
608 | summary.add_moving_summary(train_error, accuracy)
609 |
610 | # Use a regex to find parameters to apply weight decay.
611 | # Here we apply a weight decay on all W (weight matrix) of all fc layers
612 | wd_cost = tf.multiply(1e-5,
613 | regularize_cost('fc.*/W', tf.nn.l2_loss),
614 | name='regularize_loss')
615 | total_cost = tf.add_n([wd_cost, cost], name='total_cost')
616 | summary.add_moving_summary(cost, wd_cost, total_cost)
617 |
618 | # monitor histogram of all weight (of conv and fc layers) in tensorboard
619 | summary.add_param_summary(('.*/W', ['histogram', 'rms']))
620 | return total_cost
621 |
622 | def get_config(dataset_train,dataset_test):
623 | # How many iterations you want in each epoch.
624 | # This is the default value, don't actually need to set it in the config
625 | steps_per_epoch = dataset_train.size()
626 |
627 | # get the config which contains everything necessary in a training
628 | return TrainConfig(
629 | model=Model(),
630 | dataflow=dataset_train, # the DataFlow instance for training
631 | callbacks=[
632 | ModelSaver(), # save the model after every epoch
633 | MaxSaver('validation_accuracy'), # save the model with highest accuracy (prefix 'validation_')
634 | InferenceRunner( # run inference(for validation) after every epoch
635 | dataset_test, # the DataFlow instance used for validation
636 | ScalarStats(['cross_entropy_loss', 'accuracy'])),
637 | ],
638 | steps_per_epoch=steps_per_epoch,
639 | max_epoch=100,
640 | )
641 |
642 | ##main function
643 | config = get_config()
644 | launch_train_with_config(config, SimpleTrainer())
645 | '''
646 |
--------------------------------------------------------------------------------
/HCP_fmripredict/cnn_graph_codes/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 Michaël Defferrard
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/HCP_fmripredict/cnn_graph_codes/README.md:
--------------------------------------------------------------------------------
1 | # Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering
2 |
3 | The code in this repository implements an efficient generalization of the
4 | popular Convolutional Neural Networks (CNNs) to arbitrary graphs, presented in
5 | our paper:
6 |
7 | Michaël Defferrard, Xavier Bresson, Pierre Vandergheynst, [Convolutional Neural
8 | Networks on Graphs with Fast Localized Spectral Filtering][arXiv], Neural
9 | Information Processing Systems (NIPS), 2016.
10 |
11 | Additional material:
12 | * [NIPS2016 spotlight video][video], 2016-11-22.
13 | * [Deep Learning on Graphs][slides_ntds], a lecture for EPFL's master course [A
14 | Network Tour of Data Science][ntds], 2016-12-21.
15 | * [Deep Learning on Graphs][slides_dlid], an invited talk at the [Deep Learning on
16 | Irregular Domains][dlid] workshop of BMVC, 2017-09-17.
17 |
18 | [video]: https://www.youtube.com/watch?v=cIA_m7vwOVQ
19 | [slides_ntds]: https://doi.org/10.6084/m9.figshare.4491686
20 | [ntds]: https://github.com/mdeff/ntds_2016
21 | [slides_dlid]: https://doi.org/10.6084/m9.figshare.5394805
22 | [dlid]: http://dlid.swansea.ac.uk
23 |
24 | There is also implementations of the filters used in:
25 | * Joan Bruna, Wojciech Zaremba, Arthur Szlam, Yann LeCun, [Spectral Networks
26 | and Locally Connected Networks on Graphs][bruna], International Conference on
27 | Learning Representations (ICLR), 2014.
28 | * Mikael Henaff, Joan Bruna and Yann LeCun, [Deep Convolutional Networks on
29 | Graph-Structured Data][henaff], arXiv, 2015.
30 |
31 | [arXiv]: https://arxiv.org/abs/1606.09375
32 | [bruna]: https://arxiv.org/abs/1312.6203
33 | [henaff]: https://arxiv.org/abs/1506.05163
34 |
35 | ## Installation
36 |
37 | 1. Clone this repository.
38 | ```sh
39 | git clone https://github.com/mdeff/cnn_graph
40 | cd cnn_graph
41 | ```
42 |
43 | 2. Install the dependencies. The code should run with TensorFlow 1.0 and newer.
44 | ```sh
45 | pip install -r requirements.txt # or make install
46 | ```
47 |
48 | 3. Play with the Jupyter notebooks.
49 | ```sh
50 | jupyter notebook
51 | ```
52 |
53 | ## Reproducing our results
54 |
55 | Run all the notebooks to reproduce the experiments on
56 | [MNIST](nips2016/mnist.ipynb) and [20NEWS](nips2016/20news.ipynb) presented in
57 | the paper.
58 | ```sh
59 | cd nips2016
60 | make
61 | ```
62 |
63 | ## Using the model
64 |
65 | To use our graph ConvNet on your data, you need:
66 |
67 | 1. a data matrix where each row is a sample and each column is a feature,
68 | 2. a target vector,
69 | 3. optionally, an adjacency matrix which encodes the structure as a graph.
70 |
71 | See the [usage notebook][usage] for a simple example with fabricated data.
72 | Please get in touch if you are unsure about applying the model to a different
73 | setting.
74 |
75 | [usage]: http://nbviewer.jupyter.org/github/mdeff/cnn_graph/blob/outputs/usage.ipynb
76 |
77 | ## License & co
78 |
79 | The code in this repository is released under the terms of the [MIT license](LICENSE.txt).
80 | Please cite our [paper][arXiv] if you use it.
81 |
82 | ```
83 | @inproceedings{cnn_graph,
84 | title = {Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering},
85 | author = {Defferrard, Micha\"el and Bresson, Xavier and Vandergheynst, Pierre},
86 | booktitle = {Advances in Neural Information Processing Systems},
87 | year = {2016},
88 | url = {https://arxiv.org/abs/1606.09375},
89 | }
90 | ```
91 |
--------------------------------------------------------------------------------
/HCP_fmripredict/cnn_graph_codes/lib/coarsening.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy.sparse
3 |
4 |
5 | def coarsen(A, levels, self_connections=False):
6 | """
7 | Coarsen a graph, represented by its adjacency matrix A, at multiple
8 | levels.
9 | """
10 | graphs, parents = metis(A, levels)
11 | perms = compute_perm(parents)
12 |
13 | for i, A in enumerate(graphs):
14 | M, M = A.shape
15 |
16 | if not self_connections:
17 | A = A.tocoo()
18 | A.setdiag(0)
19 |
20 | if i < levels:
21 | A = perm_adjacency(A, perms[i])
22 |
23 | A = A.tocsr()
24 | A.eliminate_zeros()
25 | graphs[i] = A
26 |
27 | Mnew, Mnew = A.shape
28 | print('Layer {0}: M_{0} = |V| = {1} nodes ({2} added),'
29 | '|E| = {3} edges'.format(i, Mnew, Mnew-M, A.nnz//2))
30 |
31 | return graphs, perms[0] if levels > 0 else None
32 |
33 |
34 | def metis(W, levels, rid=None):
35 | """
36 | Coarsen a graph multiple times using the METIS algorithm.
37 |
38 | INPUT
39 | W: symmetric sparse weight (adjacency) matrix
40 | levels: the number of coarsened graphs
41 |
42 | OUTPUT
43 | graph[0]: original graph of size N_1
44 | graph[2]: coarser graph of size N_2 < N_1
45 | graph[levels]: coarsest graph of Size N_levels < ... < N_2 < N_1
46 | parents[i] is a vector of size N_i with entries ranging from 1 to N_{i+1}
47 | which indicate the parents in the coarser graph[i+1]
48 | nd_sz{i} is a vector of size N_i that contains the size of the supernode in the graph{i}
49 |
50 | NOTE
51 | if "graph" is a list of length k, then "parents" will be a list of length k-1
52 | """
53 |
54 | N, N = W.shape
55 | if rid is None:
56 | rid = np.random.permutation(range(N))
57 | parents = []
58 | degree = W.sum(axis=0) - W.diagonal()
59 | graphs = []
60 | graphs.append(W)
61 | #supernode_size = np.ones(N)
62 | #nd_sz = [supernode_size]
63 | #count = 0
64 |
65 | #while N > maxsize:
66 | for _ in range(levels):
67 |
68 | #count += 1
69 |
70 | # CHOOSE THE WEIGHTS FOR THE PAIRING
71 | # weights = ones(N,1) # metis weights
72 | weights = degree # graclus weights
73 | # weights = supernode_size # other possibility
74 | weights = np.array(weights).squeeze()
75 |
76 | # PAIR THE VERTICES AND CONSTRUCT THE ROOT VECTOR
77 | idx_row, idx_col, val = scipy.sparse.find(W)
78 | perm = np.argsort(idx_row)
79 | rr = idx_row[perm]
80 | cc = idx_col[perm]
81 | vv = val[perm]
82 | cluster_id = metis_one_level(rr,cc,vv,rid,weights) # rr is ordered
83 | parents.append(cluster_id)
84 |
85 | # TO DO
86 | # COMPUTE THE SIZE OF THE SUPERNODES AND THEIR DEGREE
87 | #supernode_size = full( sparse(cluster_id, ones(N,1) , supernode_size ) )
88 | #print(cluster_id)
89 | #print(supernode_size)
90 | #nd_sz{count+1}=supernode_size;
91 |
92 | # COMPUTE THE EDGES WEIGHTS FOR THE NEW GRAPH
93 | nrr = cluster_id[rr]
94 | ncc = cluster_id[cc]
95 | nvv = vv
96 | Nnew = cluster_id.max() + 1
97 | # CSR is more appropriate: row,val pairs appear multiple times
98 | W = scipy.sparse.csr_matrix((nvv,(nrr,ncc)), shape=(Nnew,Nnew))
99 | W.eliminate_zeros()
100 | # Add new graph to the list of all coarsened graphs
101 | graphs.append(W)
102 | N, N = W.shape
103 |
104 | # COMPUTE THE DEGREE (OMIT OR NOT SELF LOOPS)
105 | degree = W.sum(axis=0)
106 | #degree = W.sum(axis=0) - W.diagonal()
107 |
108 | # CHOOSE THE ORDER IN WHICH VERTICES WILL BE VISTED AT THE NEXT PASS
109 | #[~, rid]=sort(ss); # arthur strategy
110 | #[~, rid]=sort(supernode_size); # thomas strategy
111 | #rid=randperm(N); # metis/graclus strategy
112 | ss = np.array(W.sum(axis=0)).squeeze()
113 | rid = np.argsort(ss)
114 |
115 | return graphs, parents
116 |
117 |
118 | # Coarsen a graph given by rr,cc,vv. rr is assumed to be ordered
119 | def metis_one_level(rr,cc,vv,rid,weights):
120 |
121 | nnz = rr.shape[0]
122 | N = rr[nnz-1] + 1
123 |
124 | marked = np.zeros(N, np.bool)
125 | rowstart = np.zeros(N, np.int32)
126 | rowlength = np.zeros(N, np.int32)
127 | cluster_id = np.zeros(N, np.int32)
128 |
129 | oldval = rr[0]
130 | count = 0
131 | clustercount = 0
132 |
133 | for ii in range(nnz):
134 | rowlength[count] = rowlength[count] + 1
135 | if rr[ii] > oldval:
136 | oldval = rr[ii]
137 | rowstart[count+1] = ii
138 | count = count + 1
139 |
140 | for ii in range(N):
141 | tid = rid[ii]
142 | if not marked[tid]:
143 | wmax = 0.0
144 | rs = rowstart[tid]
145 | marked[tid] = True
146 | bestneighbor = -1
147 | for jj in range(rowlength[tid]):
148 | nid = cc[rs+jj]
149 | if marked[nid]:
150 | tval = 0.0
151 | else:
152 | tval = vv[rs+jj] * (1.0/weights[tid] + 1.0/weights[nid])
153 | if tval > wmax:
154 | wmax = tval
155 | bestneighbor = nid
156 |
157 | cluster_id[tid] = clustercount
158 |
159 | if bestneighbor > -1:
160 | cluster_id[bestneighbor] = clustercount
161 | marked[bestneighbor] = True
162 |
163 | clustercount += 1
164 |
165 | return cluster_id
166 |
167 | def compute_perm(parents):
168 | """
169 | Return a list of indices to reorder the adjacency and data matrices so
170 | that the union of two neighbors from layer to layer forms a binary tree.
171 | """
172 |
173 | # Order of last layer is random (chosen by the clustering algorithm).
174 | indices = []
175 | if len(parents) > 0:
176 | M_last = max(parents[-1]) + 1
177 | indices.append(list(range(M_last)))
178 |
179 | for parent in parents[::-1]:
180 | #print('parent: {}'.format(parent))
181 |
182 | # Fake nodes go after real ones.
183 | pool_singeltons = len(parent)
184 |
185 | indices_layer = []
186 | for i in indices[-1]:
187 | indices_node = list(np.where(parent == i)[0])
188 | assert 0 <= len(indices_node) <= 2
189 | #print('indices_node: {}'.format(indices_node))
190 |
191 | # Add a node to go with a singelton.
192 | if len(indices_node) is 1:
193 | indices_node.append(pool_singeltons)
194 | pool_singeltons += 1
195 | #print('new singelton: {}'.format(indices_node))
196 | # Add two nodes as children of a singelton in the parent.
197 | elif len(indices_node) is 0:
198 | indices_node.append(pool_singeltons+0)
199 | indices_node.append(pool_singeltons+1)
200 | pool_singeltons += 2
201 | #print('singelton childrens: {}'.format(indices_node))
202 |
203 | indices_layer.extend(indices_node)
204 | indices.append(indices_layer)
205 |
206 | # Sanity checks.
207 | for i,indices_layer in enumerate(indices):
208 | M = M_last*2**i
209 | # Reduction by 2 at each layer (binary tree).
210 | assert len(indices[0] == M)
211 | # The new ordering does not omit an indice.
212 | assert sorted(indices_layer) == list(range(M))
213 |
214 | return indices[::-1]
215 |
216 | assert (compute_perm([np.array([4,1,1,2,2,3,0,0,3]),np.array([2,1,0,1,0])])
217 | == [[3,4,0,9,1,2,5,8,6,7,10,11],[2,4,1,3,0,5],[0,1,2]])
218 |
219 | def perm_data(x, indices):
220 | """
221 | Permute data matrix, i.e. exchange node ids,
222 | so that binary unions form the clustering tree.
223 | """
224 | if indices is None:
225 | return x
226 |
227 | N, M = x.shape
228 | Mnew = len(indices)
229 | assert Mnew >= M
230 | xnew = np.empty((N, Mnew))
231 | for i,j in enumerate(indices):
232 | # Existing vertex, i.e. real data.
233 | if j < M:
234 | xnew[:,i] = x[:,j]
235 | # Fake vertex because of singeltons.
236 | # They will stay 0 so that max pooling chooses the singelton.
237 | # Or -infty ?
238 | else:
239 | xnew[:,i] = np.zeros(N)
240 | return xnew
241 |
242 | def perm_adjacency(A, indices):
243 | """
244 | Permute adjacency matrix, i.e. exchange node ids,
245 | so that binary unions form the clustering tree.
246 | """
247 | if indices is None:
248 | return A
249 |
250 | M, M = A.shape
251 | Mnew = len(indices)
252 | assert Mnew >= M
253 | A = A.tocoo()
254 |
255 | # Add Mnew - M isolated vertices.
256 | if Mnew > M:
257 | rows = scipy.sparse.coo_matrix((Mnew-M, M), dtype=np.float32)
258 | cols = scipy.sparse.coo_matrix((Mnew, Mnew-M), dtype=np.float32)
259 | A = scipy.sparse.vstack([A, rows])
260 | A = scipy.sparse.hstack([A, cols])
261 |
262 | # Permute the rows and the columns.
263 | perm = np.argsort(indices)
264 | A.row = np.array(perm)[A.row]
265 | A.col = np.array(perm)[A.col]
266 |
267 | # assert np.abs(A - A.T).mean() < 1e-9
268 | assert type(A) is scipy.sparse.coo.coo_matrix
269 | return A
270 |
--------------------------------------------------------------------------------
/HCP_fmripredict/cnn_graph_codes/lib/graph.py:
--------------------------------------------------------------------------------
1 | import sklearn.metrics
2 | import sklearn.neighbors
3 | import matplotlib.pyplot as plt
4 | import scipy.sparse
5 | import scipy.sparse.linalg
6 | import scipy.spatial.distance
7 | import numpy as np
8 |
9 |
10 | def grid(m, dtype=np.float32):
11 | """Return the embedding of a grid graph."""
12 | M = m**2
13 | x = np.linspace(0, 1, m, dtype=dtype)
14 | y = np.linspace(0, 1, m, dtype=dtype)
15 | xx, yy = np.meshgrid(x, y)
16 | z = np.empty((M, 2), dtype)
17 | z[:, 0] = xx.reshape(M)
18 | z[:, 1] = yy.reshape(M)
19 | return z
20 |
21 |
22 | def distance_scipy_spatial(z, k=4, metric='euclidean'):
23 | """Compute exact pairwise distances."""
24 | d = scipy.spatial.distance.pdist(z, metric)
25 | d = scipy.spatial.distance.squareform(d)
26 | # k-NN graph.
27 | idx = np.argsort(d)[:, 1:k+1]
28 | d.sort()
29 | d = d[:, 1:k+1]
30 | return d, idx
31 |
32 |
33 | def distance_sklearn_metrics(z, k=4, metric='euclidean'):
34 | """Compute exact pairwise distances."""
35 | d = sklearn.metrics.pairwise.pairwise_distances(
36 | z, metric=metric, n_jobs=-2)
37 | # k-NN graph.
38 | idx = np.argsort(d)[:, 1:k+1]
39 | d.sort()
40 | d = d[:, 1:k+1]
41 | return d, idx
42 |
43 |
44 | def distance_lshforest(z, k=4, metric='cosine'):
45 | """Return an approximation of the k-nearest cosine distances."""
46 | assert metric is 'cosine'
47 | lshf = sklearn.neighbors.LSHForest()
48 | lshf.fit(z)
49 | dist, idx = lshf.kneighbors(z, n_neighbors=k+1)
50 | assert dist.min() < 1e-10
51 | dist[dist < 0] = 0
52 | return dist, idx
53 |
54 | # TODO: other ANNs s.a. NMSLIB, EFANNA, FLANN, Annoy, sklearn neighbors, PANN
55 |
56 |
57 | def adjacency(dist, idx):
58 | """Return the adjacency matrix of a kNN graph."""
59 | M, k = dist.shape
60 | assert M, k == idx.shape
61 | assert dist.min() >= 0
62 |
63 | # Weights.
64 | sigma2 = np.mean(dist[:, -1])**2
65 | dist = np.exp(- dist**2 / sigma2)
66 |
67 | # Weight matrix.
68 | I = np.arange(0, M).repeat(k)
69 | J = idx.reshape(M*k)
70 | V = dist.reshape(M*k)
71 | W = scipy.sparse.coo_matrix((V, (I, J)), shape=(M, M))
72 |
73 | # No self-connections.
74 | W.setdiag(0)
75 |
76 | # Non-directed graph.
77 | bigger = W.T > W
78 | W = W - W.multiply(bigger) + W.T.multiply(bigger)
79 |
80 | assert W.nnz % 2 == 0
81 | assert np.abs(W - W.T).mean() < 1e-10
82 | assert type(W) is scipy.sparse.csr.csr_matrix
83 | return W
84 |
85 |
86 | def replace_random_edges(A, noise_level):
87 | """Replace randomly chosen edges by random edges."""
88 | M, M = A.shape
89 | n = int(noise_level * A.nnz // 2)
90 |
91 | indices = np.random.permutation(A.nnz//2)[:n]
92 | rows = np.random.randint(0, M, n)
93 | cols = np.random.randint(0, M, n)
94 | vals = np.random.uniform(0, 1, n)
95 | assert len(indices) == len(rows) == len(cols) == len(vals)
96 |
97 | A_coo = scipy.sparse.triu(A, format='coo')
98 | #assert A_coo.nnz == A.nnz // 2
99 | assert A_coo.nnz >= n
100 | A = A.tolil()
101 |
102 | for idx, row, col, val in zip(indices, rows, cols, vals):
103 | old_row = A_coo.row[idx]
104 | old_col = A_coo.col[idx]
105 |
106 | A[old_row, old_col] = 0
107 | A[old_col, old_row] = 0
108 | A[row, col] = 1
109 | A[col, row] = 1
110 |
111 | A.setdiag(0)
112 | A = A.tocsr()
113 | A.eliminate_zeros()
114 | return A
115 |
116 |
117 | def laplacian(W, normalized=True):
118 | """Return the Laplacian of the weigth matrix."""
119 |
120 | # Degree matrix.
121 | d = W.sum(axis=0)
122 |
123 | # Laplacian matrix.
124 | if not normalized:
125 | D = scipy.sparse.diags(d.A.squeeze(), 0)
126 | L = D - W
127 | else:
128 | d += np.spacing(np.array(0, W.dtype))
129 | d = 1 / np.sqrt(d)
130 | D = scipy.sparse.diags(d.A.squeeze(), 0)
131 | I = scipy.sparse.identity(d.size, dtype=W.dtype)
132 | L = I - D * W * D
133 |
134 | # assert np.abs(L - L.T).mean() < 1e-9
135 | assert type(L) is scipy.sparse.csr.csr_matrix
136 | return L
137 |
138 |
139 | def lmax(L, normalized=True):
140 | """Upper-bound on the spectrum."""
141 | if normalized:
142 | return 2
143 | else:
144 | return scipy.sparse.linalg.eigsh(
145 | L, k=1, which='LM', return_eigenvectors=False)[0]
146 |
147 |
148 | def fourier(L, algo='eigh', k=1):
149 | """Return the Fourier basis, i.e. the EVD of the Laplacian."""
150 |
151 | def sort(lamb, U):
152 | idx = lamb.argsort()
153 | return lamb[idx], U[:, idx]
154 |
155 | if algo is 'eig':
156 | lamb, U = np.linalg.eig(L.toarray())
157 | lamb, U = sort(lamb, U)
158 | elif algo is 'eigh':
159 | lamb, U = np.linalg.eigh(L.toarray())
160 | elif algo is 'eigs':
161 | lamb, U = scipy.sparse.linalg.eigs(L, k=k, which='SM')
162 | lamb, U = sort(lamb, U)
163 | elif algo is 'eigsh':
164 | lamb, U = scipy.sparse.linalg.eigsh(L, k=k, which='SM')
165 |
166 | return lamb, U
167 |
168 |
169 | def plot_spectrum(L, algo='eig'):
170 | """Plot the spectrum of a list of multi-scale Laplacians L."""
171 | # Algo is eig to be sure to get all eigenvalues.
172 | plt.figure(figsize=(17, 5))
173 | for i, lap in enumerate(L):
174 | lamb, U = fourier(lap, algo)
175 | step = 2**i
176 | x = range(step//2, L[0].shape[0], step)
177 | lb = 'L_{} spectrum in [{:1.2e}, {:1.2e}]'.format(i, lamb[0], lamb[-1])
178 | plt.plot(x, lamb, '.', label=lb)
179 | plt.legend(loc='best')
180 | plt.xlim(0, L[0].shape[0])
181 | plt.ylim(ymin=0)
182 |
183 |
184 | def lanczos(L, X, K):
185 | """
186 | Given the graph Laplacian and a data matrix, return a data matrix which can
187 | be multiplied by the filter coefficients to filter X using the Lanczos
188 | polynomial approximation.
189 | """
190 | M, N = X.shape
191 | assert L.dtype == X.dtype
192 |
193 | def basis(L, X, K):
194 | """
195 | Lanczos algorithm which computes the orthogonal matrix V and the
196 | tri-diagonal matrix H.
197 | """
198 | a = np.empty((K, N), L.dtype)
199 | b = np.zeros((K, N), L.dtype)
200 | V = np.empty((K, M, N), L.dtype)
201 | V[0, ...] = X / np.linalg.norm(X, axis=0)
202 | for k in range(K-1):
203 | W = L.dot(V[k, ...])
204 | a[k, :] = np.sum(W * V[k, ...], axis=0)
205 | W = W - a[k, :] * V[k, ...] - (
206 | b[k, :] * V[k-1, ...] if k > 0 else 0)
207 | b[k+1, :] = np.linalg.norm(W, axis=0)
208 | V[k+1, ...] = W / b[k+1, :]
209 | a[K-1, :] = np.sum(L.dot(V[K-1, ...]) * V[K-1, ...], axis=0)
210 | return V, a, b
211 |
212 | def diag_H(a, b, K):
213 | """Diagonalize the tri-diagonal H matrix."""
214 | H = np.zeros((K*K, N), a.dtype)
215 | H[:K**2:K+1, :] = a
216 | H[1:(K-1)*K:K+1, :] = b[1:, :]
217 | H.shape = (K, K, N)
218 | Q = np.linalg.eigh(H.T, UPLO='L')[1]
219 | Q = np.swapaxes(Q, 1, 2).T
220 | return Q
221 |
222 | V, a, b = basis(L, X, K)
223 | Q = diag_H(a, b, K)
224 | Xt = np.empty((K, M, N), L.dtype)
225 | for n in range(N):
226 | Xt[..., n] = Q[..., n].T.dot(V[..., n])
227 | Xt *= Q[0, :, np.newaxis, :]
228 | Xt *= np.linalg.norm(X, axis=0)
229 | return Xt # Q[0, ...]
230 |
231 |
232 | def rescale_L(L, lmax=2):
233 | """Rescale the Laplacian eigenvalues in [-1,1]."""
234 | M, M = L.shape
235 | I = scipy.sparse.identity(M, format='csr', dtype=L.dtype)
236 | L /= lmax / 2
237 | L -= I
238 | return L
239 |
240 |
241 | def chebyshev(L, X, K):
242 | """Return T_k X where T_k are the Chebyshev polynomials of order up to K.
243 | Complexity is O(KMN)."""
244 | M, N = X.shape
245 | assert L.dtype == X.dtype
246 |
247 | # L = rescale_L(L, lmax)
248 | # Xt = T @ X: MxM @ MxN.
249 | Xt = np.empty((K, M, N), L.dtype)
250 | # Xt_0 = T_0 X = I X = X.
251 | Xt[0, ...] = X
252 | # Xt_1 = T_1 X = L X.
253 | if K > 1:
254 | Xt[1, ...] = L.dot(X)
255 | # Xt_k = 2 L Xt_k-1 - Xt_k-2.
256 | for k in range(2, K):
257 | Xt[k, ...] = 2 * L.dot(Xt[k-1, ...]) - Xt[k-2, ...]
258 | return Xt
259 |
--------------------------------------------------------------------------------
/HCP_fmripredict/cnn_graph_codes/lib/utils.py:
--------------------------------------------------------------------------------
1 | import gensim
2 | import sklearn, sklearn.datasets
3 | import sklearn.naive_bayes, sklearn.linear_model, sklearn.svm, sklearn.neighbors, sklearn.ensemble
4 | import matplotlib.pyplot as plt
5 | import scipy.sparse
6 | import numpy as np
7 | import time, re, sys
8 |
9 |
10 | # Helpers to process text documents.
11 |
12 |
13 | class TextDataset(object):
14 | def clean_text(self, num='substitute'):
15 | # TODO: stemming, lemmatisation
16 | for i,doc in enumerate(self.documents):
17 | # Digits.
18 | if num is 'spell':
19 | doc = doc.replace('0', ' zero ')
20 | doc = doc.replace('1', ' one ')
21 | doc = doc.replace('2', ' two ')
22 | doc = doc.replace('3', ' three ')
23 | doc = doc.replace('4', ' four ')
24 | doc = doc.replace('5', ' five ')
25 | doc = doc.replace('6', ' six ')
26 | doc = doc.replace('7', ' seven ')
27 | doc = doc.replace('8', ' eight ')
28 | doc = doc.replace('9', ' nine ')
29 | elif num is 'substitute':
30 | # All numbers are equal. Useful for embedding (countable words) ?
31 | doc = re.sub('(\\d+)', ' NUM ', doc)
32 | elif num is 'remove':
33 | # Numbers are uninformative (they are all over the place). Useful for bag-of-words ?
34 | # But maybe some kind of documents contain more numbers, e.g. finance.
35 | # Some documents are indeed full of numbers. At least in 20NEWS.
36 | doc = re.sub('[0-9]', ' ', doc)
37 | # Remove everything except a-z characters and single space.
38 | doc = doc.replace('$', ' dollar ')
39 | doc = doc.lower()
40 | doc = re.sub('[^a-z]', ' ', doc)
41 | doc = ' '.join(doc.split()) # same as doc = re.sub('\s{2,}', ' ', doc)
42 | self.documents[i] = doc
43 |
44 | def vectorize(self, **params):
45 | # TODO: count or tf-idf. Or in normalize ?
46 | vectorizer = sklearn.feature_extraction.text.CountVectorizer(**params)
47 | self.data = vectorizer.fit_transform(self.documents)
48 | self.vocab = vectorizer.get_feature_names()
49 | assert len(self.vocab) == self.data.shape[1]
50 |
51 | def data_info(self, show_classes=False):
52 | N, M = self.data.shape
53 | sparsity = self.data.nnz / N / M * 100
54 | print('N = {} documents, M = {} words, sparsity={:.4f}%'.format(N, M, sparsity))
55 | if show_classes:
56 | for i in range(len(self.class_names)):
57 | num = sum(self.labels == i)
58 | print(' {:5d} documents in class {:2d} ({})'.format(num, i, self.class_names[i]))
59 |
60 | def show_document(self, i):
61 | label = self.labels[i]
62 | name = self.class_names[label]
63 | try:
64 | text = self.documents[i]
65 | wc = len(text.split())
66 | except AttributeError:
67 | text = None
68 | wc = 'N/A'
69 | print('document {}: label {} --> {}, {} words'.format(i, label, name, wc))
70 | try:
71 | vector = self.data[i,:]
72 | for j in range(vector.shape[1]):
73 | if vector[0,j] != 0:
74 | print(' {:.2f} "{}" ({})'.format(vector[0,j], self.vocab[j], j))
75 | except AttributeError:
76 | pass
77 | return text
78 |
79 | def keep_documents(self, idx):
80 | """Keep the documents given by the index, discard the others."""
81 | self.documents = [self.documents[i] for i in idx]
82 | self.labels = self.labels[idx]
83 | self.data = self.data[idx,:]
84 |
85 | def keep_words(self, idx):
86 | """Keep the documents given by the index, discard the others."""
87 | self.data = self.data[:,idx]
88 | self.vocab = [self.vocab[i] for i in idx]
89 | try:
90 | self.embeddings = self.embeddings[idx,:]
91 | except AttributeError:
92 | pass
93 |
94 | def remove_short_documents(self, nwords, vocab='selected'):
95 | """Remove a document if it contains less than nwords."""
96 | if vocab is 'selected':
97 | # Word count with selected vocabulary.
98 | wc = self.data.sum(axis=1)
99 | wc = np.squeeze(np.asarray(wc))
100 | elif vocab is 'full':
101 | # Word count with full vocabulary.
102 | wc = np.empty(len(self.documents), dtype=np.int)
103 | for i,doc in enumerate(self.documents):
104 | wc[i] = len(doc.split())
105 | idx = np.argwhere(wc >= nwords).squeeze()
106 | self.keep_documents(idx)
107 | return wc
108 |
109 | def keep_top_words(self, M, Mprint=20):
110 | """Keep in the vocaluary the M words who appear most often."""
111 | freq = self.data.sum(axis=0)
112 | freq = np.squeeze(np.asarray(freq))
113 | idx = np.argsort(freq)[::-1]
114 | idx = idx[:M]
115 | self.keep_words(idx)
116 | print('most frequent words')
117 | for i in range(Mprint):
118 | print(' {:3d}: {:10s} {:6d} counts'.format(i, self.vocab[i], freq[idx][i]))
119 | return freq[idx]
120 |
121 | def normalize(self, norm='l1'):
122 | """Normalize data to unit length."""
123 | # TODO: TF-IDF.
124 | data = self.data.astype(np.float64)
125 | self.data = sklearn.preprocessing.normalize(data, axis=1, norm=norm)
126 |
127 | def embed(self, filename=None, size=100):
128 | """Embed the vocabulary using pre-trained vectors."""
129 | if filename:
130 | model = gensim.models.Word2Vec.load_word2vec_format(filename, binary=True)
131 | size = model.vector_size
132 | else:
133 | class Sentences(object):
134 | def __init__(self, documents):
135 | self.documents = documents
136 | def __iter__(self):
137 | for document in self.documents:
138 | yield document.split()
139 | model = gensim.models.Word2Vec(Sentences(self.documents), size)
140 | self.embeddings = np.empty((len(self.vocab), size))
141 | keep = []
142 | not_found = 0
143 | for i,word in enumerate(self.vocab):
144 | try:
145 | self.embeddings[i,:] = model[word]
146 | keep.append(i)
147 | except KeyError:
148 | not_found += 1
149 | print('{} words not found in corpus'.format(not_found, i))
150 | self.keep_words(keep)
151 |
152 | class Text20News(TextDataset):
153 | def __init__(self, **params):
154 | dataset = sklearn.datasets.fetch_20newsgroups(**params)
155 | self.documents = dataset.data
156 | self.labels = dataset.target
157 | self.class_names = dataset.target_names
158 | assert max(self.labels) + 1 == len(self.class_names)
159 | N, C = len(self.documents), len(self.class_names)
160 | print('N = {} documents, C = {} classes'.format(N, C))
161 |
162 | class TextRCV1(TextDataset):
163 | def __init__(self, **params):
164 | dataset = sklearn.datasets.fetch_rcv1(**params)
165 | self.data = dataset.data
166 | self.target = dataset.target
167 | self.class_names = dataset.target_names
168 | assert len(self.class_names) == 103 # 103 categories according to LYRL2004
169 | N, C = self.target.shape
170 | assert C == len(self.class_names)
171 | print('N = {} documents, C = {} classes'.format(N, C))
172 |
173 | def remove_classes(self, keep):
174 | ## Construct a lookup table for labels.
175 | labels_row = []
176 | labels_col = []
177 | class_lookup = {}
178 | for i,name in enumerate(self.class_names):
179 | class_lookup[name] = i
180 | self.class_names = keep
181 |
182 | # Index of classes to keep.
183 | idx_keep = np.empty(len(keep))
184 | for i,cat in enumerate(keep):
185 | idx_keep[i] = class_lookup[cat]
186 | self.target = self.target[:,idx_keep]
187 | assert self.target.shape[1] == len(keep)
188 |
189 | def show_doc_per_class(self, print_=False):
190 | """Number of documents per class."""
191 | docs_per_class = np.array(self.target.astype(np.uint64).sum(axis=0)).squeeze()
192 | print('categories ({} assignments in total)'.format(docs_per_class.sum()))
193 | if print_:
194 | for i,cat in enumerate(self.class_names):
195 | print(' {:5s}: {:6d} documents'.format(cat, docs_per_class[i]))
196 | plt.figure(figsize=(17,5))
197 | plt.plot(sorted(docs_per_class[::-1]),'.')
198 |
199 | def show_classes_per_doc(self):
200 | """Number of classes per document."""
201 | classes_per_doc = np.array(self.target.sum(axis=1)).squeeze()
202 | plt.figure(figsize=(17,5))
203 | plt.plot(sorted(classes_per_doc[::-1]),'.')
204 |
205 | def select_documents(self):
206 | classes_per_doc = np.array(self.target.sum(axis=1)).squeeze()
207 | self.target = self.target[classes_per_doc==1]
208 | self.data = self.data[classes_per_doc==1, :]
209 |
210 | # Convert labels from indicator form to single value.
211 | N, C = self.target.shape
212 | target = self.target.tocoo()
213 | self.labels = target.col
214 | assert self.labels.min() == 0
215 | assert self.labels.max() == C - 1
216 |
217 | # Bruna and Dropout used 2 * 201369 = 402738 documents. Probably the difference btw v1 and v2.
218 | #return classes_per_doc
219 |
220 | ### Helpers to quantify classifier's quality.
221 |
222 |
223 | def baseline(train_data, train_labels, test_data, test_labels, omit=[]):
224 | """Train various classifiers to get a baseline."""
225 | clf, train_accuracy, test_accuracy, train_f1, test_f1, exec_time = [], [], [], [], [], []
226 | clf.append(sklearn.neighbors.KNeighborsClassifier(n_neighbors=10))
227 | clf.append(sklearn.linear_model.LogisticRegression())
228 | clf.append(sklearn.naive_bayes.BernoulliNB(alpha=.01))
229 | clf.append(sklearn.ensemble.RandomForestClassifier())
230 | clf.append(sklearn.naive_bayes.MultinomialNB(alpha=.01))
231 | clf.append(sklearn.linear_model.RidgeClassifier())
232 | clf.append(sklearn.svm.LinearSVC())
233 | for i,c in enumerate(clf):
234 | if i not in omit:
235 | t_start = time.process_time()
236 | c.fit(train_data, train_labels)
237 | train_pred = c.predict(train_data)
238 | test_pred = c.predict(test_data)
239 | train_accuracy.append('{:5.2f}'.format(100*sklearn.metrics.accuracy_score(train_labels, train_pred)))
240 | test_accuracy.append('{:5.2f}'.format(100*sklearn.metrics.accuracy_score(test_labels, test_pred)))
241 | train_f1.append('{:5.2f}'.format(100*sklearn.metrics.f1_score(train_labels, train_pred, average='weighted')))
242 | test_f1.append('{:5.2f}'.format(100*sklearn.metrics.f1_score(test_labels, test_pred, average='weighted')))
243 | exec_time.append('{:5.2f}'.format(time.process_time() - t_start))
244 | print('Train accuracy: {}'.format(' '.join(train_accuracy)))
245 | print('Test accuracy: {}'.format(' '.join(test_accuracy)))
246 | print('Train F1 (weighted): {}'.format(' '.join(train_f1)))
247 | print('Test F1 (weighted): {}'.format(' '.join(test_f1)))
248 | print('Execution time: {}'.format(' '.join(exec_time)))
249 |
250 | def grid_search(params, grid_params, train_data, train_labels, val_data,
251 | val_labels, test_data, test_labels, model):
252 | """Explore the hyper-parameter space with an exhaustive grid search."""
253 | params = params.copy()
254 | train_accuracy, test_accuracy, train_f1, test_f1 = [], [], [], []
255 | grid = sklearn.model_selection.ParameterGrid(grid_params)
256 | print('grid search: {} combinations to evaluate'.format(len(grid)))
257 | for grid_params in grid:
258 | params.update(grid_params)
259 | name = '{}'.format(grid)
260 | print('\n\n {} \n\n'.format(grid_params))
261 | m = model(params)
262 | m.fit(train_data, train_labels, val_data, val_labels)
263 | string, accuracy, f1, loss = m.evaluate(train_data, train_labels)
264 | train_accuracy.append('{:5.2f}'.format(accuracy)); train_f1.append('{:5.2f}'.format(f1))
265 | print('train {}'.format(string))
266 | string, accuracy, f1, loss = m.evaluate(test_data, test_labels)
267 | test_accuracy.append('{:5.2f}'.format(accuracy)); test_f1.append('{:5.2f}'.format(f1))
268 | print('test {}'.format(string))
269 | print('\n\n')
270 | print('Train accuracy: {}'.format(' '.join(train_accuracy)))
271 | print('Test accuracy: {}'.format(' '.join(test_accuracy)))
272 | print('Train F1 (weighted): {}'.format(' '.join(train_f1)))
273 | print('Test F1 (weighted): {}'.format(' '.join(test_f1)))
274 | for i,grid_params in enumerate(grid):
275 | print('{} --> {} {} {} {}'.format(grid_params, train_accuracy[i], test_accuracy[i], train_f1[i], test_f1[i]))
276 |
277 |
278 | class model_perf(object):
279 |
280 | def __init__(s):
281 | s.names, s.params = set(), {}
282 | s.fit_accuracies, s.fit_losses, s.fit_time = {}, {}, {}
283 | s.train_accuracy, s.train_f1, s.train_loss = {}, {}, {}
284 | s.test_accuracy, s.test_f1, s.test_loss = {}, {}, {}
285 |
286 | def test(s, model, name, params, train_data, train_labels, val_data, val_labels, test_data, test_labels):
287 | s.params[name] = params
288 | sess = s.fit_accuracies[name], s.fit_losses[name], s.fit_time[name] = \
289 | model.fit(train_data, train_labels, val_data, val_labels)
290 | string, s.train_accuracy[name], s.train_f1[name], s.train_loss[name] = \
291 | model.evaluate(train_data, train_labels, sess=sess)
292 | print('train {}'.format(string))
293 | string, s.test_accuracy[name], s.test_f1[name], s.test_loss[name] = \
294 | model.evaluate(test_data, test_labels, sess=sess)
295 | print('test {}'.format(string))
296 | sys.stdout.flush()
297 | s.names.add(name)
298 | return s
299 |
300 | def show(s, fontsize=None):
301 | if fontsize:
302 | plt.rc('pdf', fonttype=42)
303 | plt.rc('ps', fonttype=42)
304 | plt.rc('font', size=fontsize) # controls default text sizes
305 | plt.rc('axes', titlesize=fontsize) # fontsize of the axes title
306 | plt.rc('axes', labelsize=fontsize) # fontsize of the x any y labels
307 | plt.rc('xtick', labelsize=fontsize) # fontsize of the tick labels
308 | plt.rc('ytick', labelsize=fontsize) # fontsize of the tick labels
309 | plt.rc('legend', fontsize=fontsize) # legend fontsize
310 | plt.rc('figure', titlesize=fontsize) # size of the figure title
311 | print(' accuracy F1 loss time [ms] name')
312 | print('test train test train test train')
313 | for name in sorted(s.names):
314 | print('{:5.2f} {:5.2f} {:5.2f} {:5.2f} {:.2e} {:.2e} {:3.0f} {}'.format(
315 | s.test_accuracy[name], s.train_accuracy[name],
316 | s.test_f1[name], s.train_f1[name],
317 | s.test_loss[name], s.train_loss[name], s.fit_time[name]*1000, name))
318 |
319 | fig, ax = plt.subplots(1, 2, figsize=(15, 5))
320 | for name in sorted(s.names):
321 | steps = np.arange(len(s.fit_accuracies[name])) + 1
322 | steps *= s.params[name]['eval_frequency']
323 | ax[0].plot(steps, s.fit_accuracies[name], '.-', label=name)
324 | ax[1].plot(steps, s.fit_losses[name], '.-', label=name)
325 | ax[0].set_xlim(min(steps), max(steps))
326 | ax[1].set_xlim(min(steps), max(steps))
327 | ax[0].set_xlabel('step')
328 | ax[1].set_xlabel('step')
329 | ax[0].set_ylabel('validation accuracy')
330 | ax[1].set_ylabel('training loss')
331 | ax[0].legend(loc='lower right')
332 | ax[1].legend(loc='upper right')
333 | #fig.savefig('training.pdf')
334 |
--------------------------------------------------------------------------------
/HCP_fmripredict/cnn_graph_codes/makefile:
--------------------------------------------------------------------------------
1 | NB = $(sort $(wildcard *.ipynb))
2 | DIRS = nips2016 trials
3 |
4 | CLEANDIRS = $(DIRS:%=clean-%)
5 |
6 | run: $(NB) $(DIRS)
7 |
8 | $(NB):
9 | jupyter nbconvert --inplace --execute --ExecutePreprocessor.timeout=-1 $@
10 |
11 | $(DIRS):
12 | $(MAKE) -C $@
13 |
14 | clean: $(CLEANDIRS)
15 | jupyter nbconvert --inplace --ClearOutputPreprocessor.enabled=True $(NB)
16 | #rm -rf **/*.pyc
17 |
18 | $(CLEANDIRS):
19 | $(MAKE) clean -C $(@:clean-%=%)
20 |
21 | install:
22 | pip install --upgrade pip
23 | pip install -r requirements.txt
24 |
25 | readme:
26 | grip README.md
27 |
28 | .PHONY: run $(NB) $(DIRS) clean $(CLEANDIRS) install readme
29 |
--------------------------------------------------------------------------------
/HCP_fmripredict/cnn_graph_codes/rcv1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "collapsed": false
8 | },
9 | "outputs": [],
10 | "source": [
11 | "%load_ext autoreload\n",
12 | "%autoreload 2\n",
13 | "\n",
14 | "from lib import models, graph, coarsening, utils\n",
15 | "\n",
16 | "import tensorflow as tf\n",
17 | "import matplotlib.pyplot as plt\n",
18 | "import scipy.sparse\n",
19 | "import numpy as np\n",
20 | "import time, shutil\n",
21 | "\n",
22 | "%matplotlib inline"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": null,
28 | "metadata": {
29 | "collapsed": false
30 | },
31 | "outputs": [],
32 | "source": [
33 | "flags = tf.app.flags\n",
34 | "FLAGS = flags.FLAGS\n",
35 | "\n",
36 | "# Graphs.\n",
37 | "flags.DEFINE_integer('number_edges', 16, 'Graph: minimum number of edges per vertex.')\n",
38 | "flags.DEFINE_string('metric', 'cosine', 'Graph: similarity measure (between features).')\n",
39 | "# TODO: change cgcnn for combinatorial Laplacians.\n",
40 | "flags.DEFINE_bool('normalized_laplacian', True, 'Graph Laplacian: normalized.')\n",
41 | "flags.DEFINE_integer('coarsening_levels', 0, 'Number of coarsened graphs.')\n",
42 | "\n",
43 | "flags.DEFINE_string('dir_data', os.path.join('data', 'rcv1'), 'Directory to store data.')\n",
44 | "flags.DEFINE_integer('val_size', 400, 'Size of the validation set.')"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "# Data\n",
52 | "\n",
53 | "**From Dropout (Bruna did the same).**\n",
54 | "We took the dataset and split it into 63 classes based on the the 63 categories at the second-level of the category tree. We removed 11 categories that did not have any data and one category that had only 4 training examples. We also removed one category that covered a huge chunk (25%) of the examples. This left us with 50 classes and 402,738 documents. We divided the documents into equal-sized training and test sets randomly. Each document was represented\n",
55 | "using the 2000 most frequent non-stopwords in the dataset."
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": null,
61 | "metadata": {
62 | "collapsed": false
63 | },
64 | "outputs": [],
65 | "source": [
66 | "# Fetch dataset from Scikit-learn.\n",
67 | "dataset = utils.TextRCV1(data_home=FLAGS.dir_data)\n",
68 | "\n",
69 | "# Pre-processing: transform everything to a-z and whitespace.\n",
70 | "#print(train.show_document(1)[:400])\n",
71 | "#train.clean_text(num='substitute')\n",
72 | "\n",
73 | "# Analyzing / tokenizing: transform documents to bags-of-words.\n",
74 | "#stop_words = set(sklearn.feature_extraction.text.ENGLISH_STOP_WORDS)\n",
75 | "# Or stop words from NLTK.\n",
76 | "# Add e.g. don, ve.\n",
77 | "#train.vectorize(stop_words='english')\n",
78 | "#print(train.show_document(1)[:400])"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": null,
84 | "metadata": {
85 | "collapsed": false
86 | },
87 | "outputs": [],
88 | "source": [
89 | "# Selection of classes.\n",
90 | "keep = ['C11','C12','C13','C14','C15','C16','C17','C18','C21','C22','C23','C24',\n",
91 | " 'C31','C32','C33','C34','C41','C42','E11','E12','E13','E14','E21','E31',\n",
92 | " 'E41','E51','E61','E71','G15','GCRIM','GDEF','GDIP','GDIS','GENT','GENV',\n",
93 | " 'GFAS','GHEA','GJOB','GMIL','GOBIT','GODD','GPOL','GPRO','GREL','GSCI',\n",
94 | " 'GSPO','GTOUR','GVIO','GVOTE','GWEA','GWELF','M11','M12','M13','M14']\n",
95 | "assert len(keep) == 55 # There is 55 second-level categories according to LYRL2004.\n",
96 | "keep.remove('C15') # 151785 documents\n",
97 | "keep.remove('GMIL') # 5 documents only\n",
98 | "\n",
99 | "dataset.show_doc_per_class()\n",
100 | "dataset.show_classes_per_doc()\n",
101 | "dataset.remove_classes(keep)\n",
102 | "dataset.show_doc_per_class(True)\n",
103 | "dataset.show_classes_per_doc()"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": null,
109 | "metadata": {
110 | "collapsed": false
111 | },
112 | "outputs": [],
113 | "source": [
114 | "# Remove documents with multiple classes.\n",
115 | "dataset.select_documents()\n",
116 | "dataset.data_info()"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": null,
122 | "metadata": {
123 | "collapsed": false
124 | },
125 | "outputs": [],
126 | "source": [
127 | "# Remove short documents.\n",
128 | "#train.data_info(True)\n",
129 | "#wc = train.remove_short_documents(nwords=20, vocab='full')\n",
130 | "#train.data_info()\n",
131 | "#print('shortest: {}, longest: {} words'.format(wc.min(), wc.max()))\n",
132 | "#plt.figure(figsize=(17,5))\n",
133 | "#plt.semilogy(wc, '.');"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": null,
139 | "metadata": {
140 | "collapsed": false
141 | },
142 | "outputs": [],
143 | "source": [
144 | "# Feature selection.\n",
145 | "# Other options include: mutual information or document count.\n",
146 | "#freq = train.keep_top_words(1000, 20)\n",
147 | "#train.data_info()\n",
148 | "#train.show_document(1)\n",
149 | "#plt.figure(figsize=(17,5))\n",
150 | "#plt.semilogy(freq);\n",
151 | "\n",
152 | "# Remove documents whose signal would be the zero vector.\n",
153 | "#wc = train.remove_short_documents(nwords=5, vocab='selected')\n",
154 | "#train.data_info(True)"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": null,
160 | "metadata": {
161 | "collapsed": false
162 | },
163 | "outputs": [],
164 | "source": [
165 | "#dataset.normalize(norm='l1')\n",
166 | "dataset.show_document(1);"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": null,
172 | "metadata": {
173 | "collapsed": false
174 | },
175 | "outputs": [],
176 | "source": [
177 | "# Word embedding\n",
178 | "#if True:\n",
179 | "# train.embed()\n",
180 | "#else:\n",
181 | "# train.embed('data_word2vec/GoogleNews-vectors-negative300.bin')\n",
182 | "#train.data_info()\n",
183 | "# Further feature selection. (TODO)"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": null,
189 | "metadata": {
190 | "collapsed": false
191 | },
192 | "outputs": [],
193 | "source": [
194 | "perm = np.random.RandomState(seed=42).permutation(dataset.data.shape[0])\n",
195 | "Ntest = dataset.data.shape[0] // 2\n",
196 | "perm_test = perm[:Ntest]\n",
197 | "perm_train = perm[Ntest:]\n",
198 | "train_data = dataset.data[perm_train,:].astype(np.float32)\n",
199 | "test_data = dataset.data[perm_test,:].astype(np.float32)\n",
200 | "train_labels = dataset.labels[perm_train]\n",
201 | "test_labels = dataset.labels[perm_test]\n",
202 | "\n",
203 | "if False:\n",
204 | " graph_data = train.embeddings.astype(np.float32)\n",
205 | "else:\n",
206 | " graph_data = dataset.data.T.astype(np.float32)\n",
207 | "\n",
208 | "#del dataset"
209 | ]
210 | },
211 | {
212 | "cell_type": "markdown",
213 | "metadata": {},
214 | "source": [
215 | "# Feature graph"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": null,
221 | "metadata": {
222 | "collapsed": false
223 | },
224 | "outputs": [],
225 | "source": [
226 | "t_start = time.process_time()\n",
227 | "dist, idx = graph.distance_lshforest(graph_data.astype(np.float64), k=FLAGS.number_edges, metric=FLAGS.metric)\n",
228 | "A = graph.adjacency(dist.astype(np.float32), idx)\n",
229 | "print(\"{} > {} edges\".format(A.nnz//2, FLAGS.number_edges*graph_data.shape[0]//2))\n",
230 | "A = graph.replace_random_edges(A, 0)\n",
231 | "graphs, perm = coarsening.coarsen(A, levels=FLAGS.coarsening_levels, self_connections=False)\n",
232 | "L = [graph.laplacian(A, normalized=True) for A in graphs]\n",
233 | "print('Execution time: {:.2f}s'.format(time.process_time() - t_start))\n",
234 | "#graph.plot_spectrum(L)\n",
235 | "#del graph_data, A, dist, idx"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": null,
241 | "metadata": {
242 | "collapsed": false
243 | },
244 | "outputs": [],
245 | "source": [
246 | "assert FLAGS.coarsening_levels is 0\n",
247 | "#t_start = time.process_time()\n",
248 | "#train_data = scipy.sparse.csr_matrix(coarsening.perm_data(train_data.toarray(), perm))\n",
249 | "#test_data = scipy.sparse.csr_matrix(coarsening.perm_data(test_data.toarray(), perm))\n",
250 | "#print('Execution time: {:.2f}s'.format(time.process_time() - t_start))\n",
251 | "#del perm"
252 | ]
253 | },
254 | {
255 | "cell_type": "markdown",
256 | "metadata": {},
257 | "source": [
258 | "# Classification"
259 | ]
260 | },
261 | {
262 | "cell_type": "code",
263 | "execution_count": null,
264 | "metadata": {
265 | "collapsed": false
266 | },
267 | "outputs": [],
268 | "source": [
269 | "# Training set is shuffled already.\n",
270 | "#perm = np.random.permutation(train_data.shape[0])\n",
271 | "#train_data = train_data[perm,:]\n",
272 | "#train_labels = train_labels[perm]\n",
273 | "\n",
274 | "# Validation set.\n",
275 | "if False:\n",
276 | " val_data = train_data[:FLAGS.val_size,:]\n",
277 | " val_labels = train_labels[:FLAGS.val_size]\n",
278 | " train_data = train_data[FLAGS.val_size:,:]\n",
279 | " train_labels = train_labels[FLAGS.val_size:]\n",
280 | "else:\n",
281 | " val_data = test_data\n",
282 | " val_labels = test_labels"
283 | ]
284 | },
285 | {
286 | "cell_type": "code",
287 | "execution_count": null,
288 | "metadata": {
289 | "collapsed": false
290 | },
291 | "outputs": [],
292 | "source": [
293 | "if False:\n",
294 | " utils.baseline(train_data, train_labels, test_data, test_labels)"
295 | ]
296 | },
297 | {
298 | "cell_type": "code",
299 | "execution_count": null,
300 | "metadata": {
301 | "collapsed": false
302 | },
303 | "outputs": [],
304 | "source": [
305 | "common = {}\n",
306 | "common['dir_name'] = 'rcv1/'\n",
307 | "common['num_epochs'] = 4\n",
308 | "common['batch_size'] = 100\n",
309 | "common['decay_steps'] = len(train_labels) / common['batch_size']\n",
310 | "common['eval_frequency'] = 200\n",
311 | "common['filter'] = 'chebyshev5'\n",
312 | "common['brelu'] = 'b1relu'\n",
313 | "common['pool'] = 'mpool1'\n",
314 | "C = max(train_labels) + 1 # number of classes\n",
315 | "\n",
316 | "model_perf = utils.model_perf()"
317 | ]
318 | },
319 | {
320 | "cell_type": "code",
321 | "execution_count": null,
322 | "metadata": {
323 | "collapsed": false
324 | },
325 | "outputs": [],
326 | "source": [
327 | "if True:\n",
328 | " name = 'softmax'\n",
329 | " params = common.copy()\n",
330 | " params['dir_name'] += name\n",
331 | " params['regularization'] = 0\n",
332 | " params['dropout'] = 1\n",
333 | " params['learning_rate'] = 1e3\n",
334 | " params['decay_rate'] = 0.95\n",
335 | " params['momentum'] = 0.9\n",
336 | " params['F'] = []\n",
337 | " params['K'] = []\n",
338 | " params['p'] = []\n",
339 | " params['M'] = [C]\n",
340 | " model_perf.test(models.cgcnn(L, **params), name, params,\n",
341 | " train_data, train_labels, val_data, val_labels, test_data, test_labels)"
342 | ]
343 | },
344 | {
345 | "cell_type": "code",
346 | "execution_count": null,
347 | "metadata": {
348 | "collapsed": false
349 | },
350 | "outputs": [],
351 | "source": [
352 | "if True:\n",
353 | " name = 'fc_softmax'\n",
354 | " params = common.copy()\n",
355 | " params['dir_name'] += name\n",
356 | " params['regularization'] = 0\n",
357 | " params['dropout'] = 1\n",
358 | " params['learning_rate'] = 0.1\n",
359 | " params['decay_rate'] = 0.95\n",
360 | " params['momentum'] = 0.9\n",
361 | " params['F'] = []\n",
362 | " params['K'] = []\n",
363 | " params['p'] = []\n",
364 | " params['M'] = [2500, C]\n",
365 | " model_perf.test(models.cgcnn(L, **params), name, params,\n",
366 | " train_data, train_labels, val_data, val_labels, test_data, test_labels)"
367 | ]
368 | },
369 | {
370 | "cell_type": "code",
371 | "execution_count": null,
372 | "metadata": {
373 | "collapsed": false
374 | },
375 | "outputs": [],
376 | "source": [
377 | "if True:\n",
378 | " name = 'fc_fc_softmax'\n",
379 | " params = common.copy()\n",
380 | " params['dir_name'] += name\n",
381 | " params['regularization'] = 0\n",
382 | " params['dropout'] = 1\n",
383 | " params['learning_rate'] = 0.1\n",
384 | " params['decay_rate'] = 0.95\n",
385 | " params['momentum'] = 0.9\n",
386 | " params['F'] = []\n",
387 | " params['K'] = []\n",
388 | " params['p'] = []\n",
389 | " params['M'] = [2500, 500, C]\n",
390 | " model_perf.test(models.cgcnn(L, **params), name, params,\n",
391 | " train_data, train_labels, val_data, val_labels, test_data, test_labels)"
392 | ]
393 | },
394 | {
395 | "cell_type": "code",
396 | "execution_count": null,
397 | "metadata": {
398 | "collapsed": false
399 | },
400 | "outputs": [],
401 | "source": [
402 | "if True:\n",
403 | " name = 'cgconv_softmax'\n",
404 | " params = common.copy()\n",
405 | " params['dir_name'] += name\n",
406 | " params['regularization'] = 1e-3\n",
407 | " params['dropout'] = 1\n",
408 | " params['learning_rate'] = 0.1\n",
409 | " params['decay_rate'] = 0.999\n",
410 | " params['momentum'] = 0\n",
411 | " params['F'] = [1]\n",
412 | " params['K'] = [5]\n",
413 | " params['p'] = [1]\n",
414 | " params['M'] = [C]\n",
415 | " model_perf.test(models.cgcnn(L, **params), name, params,\n",
416 | " train_data, train_labels, val_data, val_labels, test_data, test_labels)"
417 | ]
418 | },
419 | {
420 | "cell_type": "code",
421 | "execution_count": null,
422 | "metadata": {
423 | "collapsed": false
424 | },
425 | "outputs": [],
426 | "source": [
427 | "if True:\n",
428 | " name = 'cgconv_fc_softmax'\n",
429 | " params = common.copy()\n",
430 | " params['dir_name'] += name\n",
431 | " params['regularization'] = 0\n",
432 | " params['dropout'] = 1\n",
433 | " params['learning_rate'] = 0.1\n",
434 | " params['decay_rate'] = 0.999\n",
435 | " params['momentum'] = 0\n",
436 | " params['F'] = [5]\n",
437 | " params['K'] = [15]\n",
438 | " params['p'] = [1]\n",
439 | " params['M'] = [100, C]\n",
440 | " model_perf.test(models.cgcnn(L, **params), name, params,\n",
441 | " train_data, train_labels, val_data, val_labels, test_data, test_labels)"
442 | ]
443 | },
444 | {
445 | "cell_type": "code",
446 | "execution_count": null,
447 | "metadata": {
448 | "collapsed": true
449 | },
450 | "outputs": [],
451 | "source": [
452 | "model_perf.show()"
453 | ]
454 | }
455 | ],
456 | "metadata": {
457 | "kernelspec": {
458 | "display_name": "Python 3",
459 | "language": "python",
460 | "name": "python3"
461 | },
462 | "language_info": {
463 | "codemirror_mode": {
464 | "name": "ipython",
465 | "version": 3
466 | },
467 | "file_extension": ".py",
468 | "mimetype": "text/x-python",
469 | "name": "python",
470 | "nbconvert_exporter": "python",
471 | "pygments_lexer": "ipython3",
472 | "version": "3.4.3"
473 | }
474 | },
475 | "nbformat": 4,
476 | "nbformat_minor": 0
477 | }
478 |
--------------------------------------------------------------------------------
/HCP_fmripredict/cnn_graph_codes/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | scikit-learn
4 | matplotlib
5 |
6 | gensim
7 | tensorflow-gpu
8 | #tensorflow
9 |
10 | jupyter
11 | ipython
12 |
--------------------------------------------------------------------------------
/HCP_fmripredict/cnn_graph_codes/trials/3_tensorflow.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Trial 3: TensorFlow\n",
8 | "\n",
9 | "Small experiment to familiarize myself with TensorFlow."
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "metadata": {
16 | "collapsed": false
17 | },
18 | "outputs": [],
19 | "source": [
20 | "import tensorflow as tf"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "# Data"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": null,
33 | "metadata": {
34 | "collapsed": false
35 | },
36 | "outputs": [],
37 | "source": [
38 | "from tensorflow.examples.tutorials.mnist import input_data\n",
39 | "import os\n",
40 | "folder = os.path.join('..', 'data', 'mnist')\n",
41 | "mnist = input_data.read_data_sets(folder, one_hot=True)"
42 | ]
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | "# Model"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": null,
54 | "metadata": {
55 | "collapsed": true
56 | },
57 | "outputs": [],
58 | "source": [
59 | "x = tf.placeholder(tf.float32, [None, 784])\n",
60 | "W = tf.Variable(tf.zeros([784, 10]))\n",
61 | "b = tf.Variable(tf.zeros([10]))\n",
62 | "y = tf.nn.softmax(tf.matmul(x, W) + b)"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "# Training"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": null,
75 | "metadata": {
76 | "collapsed": true
77 | },
78 | "outputs": [],
79 | "source": [
80 | "y_ = tf.placeholder(tf.float32, [None, 10])\n",
81 | "cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))\n",
82 | "train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)\n",
83 | "\n",
84 | "init = tf.initialize_all_variables()\n",
85 | "sess = tf.Session()\n",
86 | "sess.run(init)\n",
87 | "\n",
88 | "for i in range(1000):\n",
89 | " batch_xs, batch_ys = mnist.train.next_batch(100)\n",
90 | " sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})"
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {},
96 | "source": [
97 | "# Evaluation"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": null,
103 | "metadata": {
104 | "collapsed": false
105 | },
106 | "outputs": [],
107 | "source": [
108 | "correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))\n",
109 | "accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n",
110 | "print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))"
111 | ]
112 | }
113 | ],
114 | "metadata": {
115 | "kernelspec": {
116 | "display_name": "Python 3",
117 | "language": "python",
118 | "name": "python3"
119 | },
120 | "language_info": {
121 | "codemirror_mode": {
122 | "name": "ipython",
123 | "version": 3
124 | },
125 | "file_extension": ".py",
126 | "mimetype": "text/x-python",
127 | "name": "python",
128 | "nbconvert_exporter": "python",
129 | "pygments_lexer": "ipython3",
130 | "version": "3.5.2"
131 | }
132 | },
133 | "nbformat": 4,
134 | "nbformat_minor": 0
135 | }
136 |
--------------------------------------------------------------------------------
/HCP_fmripredict/cnn_graph_codes/trials/4_coarsening.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "collapsed": true
7 | },
8 | "source": [
9 | "# Trial 4: graph coarsening\n",
10 | "\n",
11 | "* First Python implementation of the greedy Metis and Graclus coarsening algorithms.\n",
12 | "* Results comparison with a previously developed matlab implementation.\n",
13 | "* Results comparison with the newer version in the `coarsening` module."
14 | ]
15 | },
16 | {
17 | "cell_type": "raw",
18 | "metadata": {},
19 | "source": [
20 | "METIS COARSENING IMPLEMENTATION AS PROPOSED IN:\n",
21 | "An incremental reseeding strategy for clustering\n",
22 | "X Bresson, H Hu, T Laurent, A Szlam, J von Brecht\n",
23 | "arXiv preprint arXiv:1406.3837\n",
24 | "3 May 2016"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": null,
30 | "metadata": {
31 | "collapsed": true
32 | },
33 | "outputs": [],
34 | "source": [
35 | "import os\n",
36 | "import scipy.io\n",
37 | "import scipy.sparse\n",
38 | "import numpy as np"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": null,
44 | "metadata": {
45 | "collapsed": false
46 | },
47 | "outputs": [],
48 | "source": [
49 | "if False:\n",
50 | " # CHECK PYTHON RESULTS WITH MATLAB CODE\n",
51 | " folder = os.path.join('..', 'data', 'metis_matlab.mat')\n",
52 | " mat = scipy.io.loadmat(folder)\n",
53 | " W = mat['W']\n",
54 | " W = scipy.sparse.csr_matrix(W)\n",
55 | " rid = mat['rid']-1\n",
56 | " rid = rid.T\n",
57 | " rid = rid.squeeze()\n",
58 | " #print(type(W))\n",
59 | " #print(type(rid))\n",
60 | " print(W.shape)\n",
61 | " print(W.nnz)\n",
62 | " #print(rid.shape)\n",
63 | "\n",
64 | "else:\n",
65 | " N = 533\n",
66 | " #np.random.seed(0)\n",
67 | " rid = np.random.permutation(range(N))\n",
68 | " W = np.random.uniform(0.01, 0.99, size=(N,N))\n",
69 | " mask = np.random.uniform(size=(N,N))\n",
70 | " W[mask<0.99] = 0\n",
71 | " W = scipy.sparse.csr_matrix(W)\n",
72 | " print(W.nnz)"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": null,
78 | "metadata": {
79 | "collapsed": true
80 | },
81 | "outputs": [],
82 | "source": [
83 | "# INPUT\n",
84 | "# W = symmetric sparse weight matrix\n",
85 | "# maxsize = the number of nodes for the coarsest graph\n",
86 | "# OUTPUT\n",
87 | "# graph{1}: original graph of size N_1\n",
88 | "# graph{2}: coarser graph of size N_2 < N_1\n",
89 | "# etc...\n",
90 | "# graph{k}: corsest graph of Size N_k <...< N_2 < N_1\n",
91 | "# parents{i} is a vector of size N_i with entries ranging from 1 to N_{i+1}\n",
92 | "# which indicate the parents in the coarser graph{i+1} \n",
93 | "# nd_sz{i} is a vector of size N_i that contains the size of the supernode in the graph{i}\n",
94 | "# NOTE\n",
95 | "# if \"graph\" is a cell of size k, then \"parents\" will be a cell of size k-1\n",
96 | "\n",
97 | "def metis_coarsening(W,maxsize,rid):\n",
98 | " \n",
99 | " N = W.shape[0]\n",
100 | " print('Size of original graph=',N)\n",
101 | " parents = []\n",
102 | " degree = W.sum(axis=0) - W.diagonal()\n",
103 | " graphs = []\n",
104 | " graphs.append(W)\n",
105 | " supernode_size = np.ones(N)\n",
106 | " nd_sz = [supernode_size]\n",
107 | " count = 0\n",
108 | " \n",
109 | " while N > maxsize:\n",
110 | " \n",
111 | " count = count + 1;\n",
112 | " print('level=',count)\n",
113 | " \n",
114 | " # CHOOSE THE WEIGHTS FOR THE PAIRING\n",
115 | " # weights = ones(N,1) # metis weights\n",
116 | " weights = degree # graclus weights\n",
117 | " # weights = supernode_size # other possibility\n",
118 | " weights = weights.T\n",
119 | " weights = np.array(weights)\n",
120 | " weights = weights.squeeze()\n",
121 | " \n",
122 | " # PAIR THE VERTICES AND CONSTRUCT THE ROOT VECTOR\n",
123 | " idx_row,idx_col,val = scipy.sparse.find(W) \n",
124 | " perm = np.argsort(idx_row)\n",
125 | " rr = idx_row[perm]\n",
126 | " cc = idx_col[perm]\n",
127 | " vv = val[perm]\n",
128 | " cluster_id = one_level_coarsening(rr,cc,vv,rid,weights) # rr is ordered \n",
129 | " parents.append(cluster_id)\n",
130 | " \n",
131 | " # TO DO\n",
132 | " # COMPUTE THE SIZE OF THE SUPERNODES AND THEIR DEGREE \n",
133 | " #supernode_size = full( sparse(cluster_id, ones(N,1) , supernode_size ) )\n",
134 | " #print(cluster_id)\n",
135 | " #print(supernode_size)\n",
136 | " #nd_sz{count+1}=supernode_size;\n",
137 | " \n",
138 | " # COMPUTE THE EDGES WEIGHTS FOR THE NEW GRAPH\n",
139 | " nrr = cluster_id[rr]\n",
140 | " ncc = cluster_id[cc]\n",
141 | " nvv = vv\n",
142 | " Nnew = int(cluster_id.max()) + 1\n",
143 | " print('Size of coarser graph=',Nnew)\n",
144 | " W = scipy.sparse.csr_matrix((nvv,(nrr,ncc)),shape=(Nnew,Nnew))\n",
145 | " # Add new graph to the list of all coarsened graphs\n",
146 | " graphs.append(W)\n",
147 | " N = W.shape[0]\n",
148 | " \n",
149 | " # COMPUTE THE DEGREE (OMIT OR NOT SELF LOOPS)\n",
150 | " degree = W.sum(axis=0)\n",
151 | " #degree = W.sum(axis=0) - W.diagonal()\n",
152 | " \n",
153 | " # CHOOSE THE ORDER IN WHICH VERTICES WILL BE VISTED AT THE NEXT PASS\n",
154 | " #[~, rid]=sort(ss); # arthur strategy\n",
155 | " #[~, rid]=sort(supernode_size); # thomas strategy\n",
156 | " #rid=randperm(N); # metis/graclus strategy \n",
157 | " ss = W.sum(axis=0).T\n",
158 | " rid = [i[0] for i in sorted(enumerate(ss), key=lambda x:x[1])] # [~, rid]=sort(ss);\n",
159 | " \n",
160 | " \n",
161 | " # Remove all diagonal entries in similarity matrices\n",
162 | " for i in range(len(graphs)): \n",
163 | " csr_setdiag_val(graphs[i])\n",
164 | " scipy.sparse.csr_matrix.eliminate_zeros(graphs[i])\n",
165 | " \n",
166 | " \n",
167 | " return graphs,parents"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": null,
173 | "metadata": {
174 | "collapsed": true
175 | },
176 | "outputs": [],
177 | "source": [
178 | "#http://nbviewer.ipython.org/gist/Midnighter/9992103\n",
179 | "def csr_setdiag_val(csr, value=0):\n",
180 | " \"\"\"Set all diagonal nonzero elements\n",
181 | " (elements currently in the sparsity pattern)\n",
182 | " to the given value. Useful to set to 0 mostly.\n",
183 | " \"\"\"\n",
184 | " if csr.format != \"csr\":\n",
185 | " raise ValueError('Matrix given must be of CSR format.')\n",
186 | " csr.sort_indices()\n",
187 | " pointer = csr.indptr\n",
188 | " indices = csr.indices\n",
189 | " data = csr.data\n",
190 | " for i in range(min(csr.shape)):\n",
191 | " ind = indices[pointer[i]: pointer[i + 1]]\n",
192 | " j = ind.searchsorted(i)\n",
193 | " # matrix has only elements up until diagonal (in row i)\n",
194 | " if j == len(ind):\n",
195 | " continue\n",
196 | " j += pointer[i]\n",
197 | " # in case matrix has only elements after diagonal (in row i)\n",
198 | " if indices[j] == i:\n",
199 | " data[j] = value"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": null,
205 | "metadata": {
206 | "collapsed": false
207 | },
208 | "outputs": [],
209 | "source": [
210 | "# Coarsen a graph given by rr,cc,vv. rr is assumed to be ordered\n",
211 | "def one_level_coarsening(rr,cc,vv,rid,weights):\n",
212 | " \n",
213 | " nnz = rr.shape[0]\n",
214 | " N = rr[nnz-1]+1\n",
215 | " #print(nnz,N)\n",
216 | " \n",
217 | " marked = np.zeros(N)\n",
218 | " rowstart = np.zeros(N)\n",
219 | " rowlength = np.zeros(N)\n",
220 | " cluster_id = np.zeros(N)\n",
221 | " \n",
222 | " oldval = rr[0]\n",
223 | " count = 0\n",
224 | " clustercount = 0\n",
225 | " \n",
226 | " for ii in range(nnz):\n",
227 | " rowlength[count] = rowlength[count] + 1\n",
228 | " if rr[ii] > oldval:\n",
229 | " oldval = rr[ii]\n",
230 | " rowstart[count+1] = ii\n",
231 | " count = count + 1\n",
232 | " \n",
233 | " for ii in range(N):\n",
234 | " tid = rid[ii]\n",
235 | " if marked[tid]==0.0:\n",
236 | " wmax = 0.0\n",
237 | " rs = rowstart[tid]\n",
238 | " marked[tid] = 1.0\n",
239 | " bestneighbor = -1\n",
240 | " for jj in range(int(rowlength[tid])):\n",
241 | " nid = cc[rs+jj]\n",
242 | " tval = (1.0-marked[nid]) * vv[rs+jj] * (1.0/weights[tid]+ 1.0/weights[nid])\n",
243 | " if tval > wmax:\n",
244 | " wmax = tval\n",
245 | " bestneighbor = nid\n",
246 | " \n",
247 | " cluster_id[tid] = clustercount;\n",
248 | " \n",
249 | " if bestneighbor > -1:\n",
250 | " cluster_id[bestneighbor] = clustercount\n",
251 | " marked[bestneighbor] = 1.0\n",
252 | " \n",
253 | " clustercount = clustercount + 1\n",
254 | " \n",
255 | " return cluster_id"
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": null,
261 | "metadata": {
262 | "collapsed": false
263 | },
264 | "outputs": [],
265 | "source": [
266 | "maxsize = 200\n",
267 | "N = W.shape[0]\n",
268 | "#rid = np.random.permutation(range(N))\n",
269 | "#print(N)\n",
270 | "#print(rid[0:10])\n",
271 | "\n",
272 | "graphs,parents = metis_coarsening(W.copy(),maxsize,rid)\n",
273 | "#print(graph)\n",
274 | "#print(parents)\n",
275 | "\n",
276 | "\n",
277 | "# CHECK RESULTS WITH MATLAB CODE\n",
278 | "graph0 = graphs[0]\n",
279 | "print(graph0.shape)\n",
280 | "print(graph0[0,:])\n",
281 | "\n",
282 | "graph1 = graphs[1]\n",
283 | "print(graph1.shape)\n",
284 | "print(graph1[0,:])\n",
285 | "\n",
286 | "graph2 = graphs[2]\n",
287 | "print(graph2.shape)\n",
288 | "print(graph2[0,:])\n",
289 | "\n",
290 | "parents0 = parents[0]\n",
291 | "print(parents0.shape)\n",
292 | "print(parents0[0:10])\n",
293 | "\n",
294 | "parents1 = parents[1]\n",
295 | "print(parents1.shape)\n",
296 | "print(parents1[0:10])"
297 | ]
298 | },
299 | {
300 | "cell_type": "code",
301 | "execution_count": null,
302 | "metadata": {
303 | "collapsed": false
304 | },
305 | "outputs": [],
306 | "source": [
307 | "import sys\n",
308 | "sys.path.append('..')\n",
309 | "from lib import coarsening\n",
310 | "\n",
311 | "graphs, parents = coarsening.metis(W, 2, rid)\n",
312 | "\n",
313 | "for i,A in enumerate(graphs):\n",
314 | " M, M = A.shape\n",
315 | " A = A.tocoo()\n",
316 | " A.setdiag(0)\n",
317 | " A = A.tocsr()\n",
318 | " A.eliminate_zeros()\n",
319 | " graphs[i] = A\n",
320 | " print('Layer {0}: M_{0} = {1} nodes, {2} edges'.format(i, M, A.nnz))\n",
321 | "\n",
322 | "# CHECK RESULTS WITH MATLAB CODE\n",
323 | "graph0 = graphs[0]\n",
324 | "print(graph0.shape)\n",
325 | "print(graph0[0,:])\n",
326 | "\n",
327 | "graph1 = graphs[1].tocsr()\n",
328 | "print(graph1.shape)\n",
329 | "print(graph1[0,:])\n",
330 | "\n",
331 | "graph2 = graphs[2].tocsr()\n",
332 | "print(graph2.shape)\n",
333 | "print(graph2[0,:])\n",
334 | "\n",
335 | "parents0 = parents[0]\n",
336 | "print(parents0.shape)\n",
337 | "print(parents0[0:10])\n",
338 | "\n",
339 | "parents1 = parents[1]\n",
340 | "print(parents1.shape)\n",
341 | "print(parents1[0:10])"
342 | ]
343 | },
344 | {
345 | "cell_type": "raw",
346 | "metadata": {},
347 | "source": [
348 | "# Python results\n",
349 | "\n",
350 | "Size of original graph= 533\n",
351 | "level= 1\n",
352 | "Size of coarser graph= 279\n",
353 | "level= 2\n",
354 | "Size of coarser graph= 147\n",
355 | "(533, 533)\n",
356 | " (0, 18)\t0.810464124165\n",
357 | " (0, 59)\t0.349678536711\n",
358 | " (0, 60)\t0.591336229831\n",
359 | " (0, 83)\t0.388420442335\n",
360 | " (0, 105)\t0.255134781894\n",
361 | " (0, 210)\t0.656852096558\n",
362 | " (0, 226)\t0.900257809833\n",
363 | " (0, 299)\t0.065093756932\n",
364 | " (0, 340)\t0.810464124165\n",
365 | " (0, 407)\t0.431454676752\n",
366 | "(279, 279)\n",
367 | " (0, 44)\t1.63660876872\n",
368 | " (0, 58)\t2.42459126058\n",
369 | " (0, 71)\t0.186153138092\n",
370 | " (0, 115)\t1.99313658383\n",
371 | " (0, 167)\t1.24818832639\n",
372 | " (0, 168)\t2.95891026039\n",
373 | " (0, 179)\t0.388420442335\n",
374 | " (0, 240)\t0.431454676752\n",
375 | "(147, 147)\n",
376 | " (0, 21)\t5.1886032791\n",
377 | " (0, 85)\t1.08484314421\n",
378 | " (0, 87)\t0.353738954483\n",
379 | " (0, 127)\t0.186153138092\n",
380 | " (0, 135)\t1.88273900708\n",
381 | " (0, 141)\t0.255134781894\n",
382 | "(533,)\n",
383 | "[ 57. 148. 184. 237. 93. 93. 47. 28. 133. 71.]\n",
384 | "(279,)\n",
385 | "[ 127. 4. 88. 128. 50. 120. 54. 123. 146. 26.]"
386 | ]
387 | },
388 | {
389 | "cell_type": "raw",
390 | "metadata": {
391 | "collapsed": true
392 | },
393 | "source": [
394 | "# Matlab results\n",
395 | "\n",
396 | "ans =\n",
397 | "\n",
398 | " (1,19) 0.8105\n",
399 | " (1,60) 0.3497\n",
400 | " (1,61) 0.5913\n",
401 | " (1,84) 0.3884\n",
402 | " (1,106) 0.2551\n",
403 | " (1,211) 0.6569\n",
404 | " (1,227) 0.9003\n",
405 | " (1,300) 0.0651\n",
406 | " (1,341) 0.8105\n",
407 | " (1,408) 0.4315\n",
408 | "\n",
409 | "\n",
410 | "ans =\n",
411 | "\n",
412 | " (1,45) 1.6366\n",
413 | " (1,59) 2.4246\n",
414 | " (1,72) 0.1862\n",
415 | " (1,116) 1.9931\n",
416 | " (1,168) 1.2482\n",
417 | " (1,169) 2.9589\n",
418 | " (1,180) 0.3884\n",
419 | " (1,241) 0.4315\n",
420 | "\n",
421 | "\n",
422 | "ans =\n",
423 | "\n",
424 | " (1,22) 5.1886\n",
425 | " (1,86) 1.0848\n",
426 | " (1,88) 0.3537\n",
427 | " (1,128) 0.1862\n",
428 | " (1,136) 1.8827\n",
429 | " (1,142) 0.2551\n",
430 | "\n",
431 | "\n",
432 | "ans =\n",
433 | "\n",
434 | " 58\n",
435 | " 149\n",
436 | " 185\n",
437 | " 238\n",
438 | " 94\n",
439 | " 94\n",
440 | " 48\n",
441 | " 29\n",
442 | " 134\n",
443 | " 72\n",
444 | "\n",
445 | "\n",
446 | "ans =\n",
447 | "\n",
448 | " 128\n",
449 | " 5\n",
450 | " 89\n",
451 | " 129\n",
452 | " 51\n",
453 | " 121\n",
454 | " 55\n",
455 | " 124\n",
456 | " 147\n",
457 | " 27"
458 | ]
459 | }
460 | ],
461 | "metadata": {
462 | "kernelspec": {
463 | "display_name": "Python 3",
464 | "language": "python",
465 | "name": "python3"
466 | },
467 | "language_info": {
468 | "codemirror_mode": {
469 | "name": "ipython",
470 | "version": 3
471 | },
472 | "file_extension": ".py",
473 | "mimetype": "text/x-python",
474 | "name": "python",
475 | "nbconvert_exporter": "python",
476 | "pygments_lexer": "ipython3",
477 | "version": "3.5.2"
478 | }
479 | },
480 | "nbformat": 4,
481 | "nbformat_minor": 0
482 | }
483 |
--------------------------------------------------------------------------------
/HCP_fmripredict/cnn_graph_codes/trials/makefile:
--------------------------------------------------------------------------------
1 | NB = $(sort $(wildcard *.ipynb))
2 |
3 | run: $(NB)
4 |
5 | $(NB):
6 | jupyter nbconvert --inplace --execute --ExecutePreprocessor.timeout=-1 $@
7 |
8 | clean:
9 | jupyter nbconvert --inplace --ClearOutputPreprocessor.enabled=True $(NB)
10 |
11 | .PHONY: run $(NB) clean
12 |
--------------------------------------------------------------------------------
/HCP_fmripredict/cnn_graph_codes/usage.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Introduction\n",
8 | "\n",
9 | "$\\newcommand{\\G}{\\mathcal{G}}$\n",
10 | "$\\newcommand{\\V}{\\mathcal{V}}$\n",
11 | "$\\newcommand{\\E}{\\mathcal{E}}$\n",
12 | "$\\newcommand{\\R}{\\mathbb{R}}$\n",
13 | "\n",
14 | "This notebook shows how to apply our graph ConvNet ([paper] & [code]), or any other, to your structured or unstructured data. For this example, we assume that we have $n$ samples $x_i \\in \\R^{d_x}$ arranged in a data matrix $$X = [x_1, ..., x_n]^T \\in \\R^{n \\times d_x}.$$ Each sample $x_i$ is associated with a vector $y_i \\in \\R^{d_y}$ for a regression task or a label $y_i \\in \\{0,\\ldots,C\\}$ for a classification task.\n",
15 | "\n",
16 | "[paper]: https://arxiv.org/abs/1606.09375\n",
17 | "[code]: https://github.com/mdeff/cnn_graph\n",
18 | "\n",
19 | "From there, we'll structure our data with a graph $\\G = (\\V, \\E, A)$ where $\\V$ is the set of $d_x = |\\V|$ vertices, $\\E$ is the set of edges and $A \\in \\R^{d_x \\times d_x}$ is the adjacency matrix. That matrix represents the weight of each edge, i.e. $A_{i,j}$ is the weight of the edge connecting $v_i \\in \\V$ to $v_j \\in \\V$. The weights of that feature graph thus represent pairwise relationships between features $i$ and $j$. We call that regime **signal classification / regression**, as the samples $x_i$ to be classified or regressed are graph signals.\n",
20 | "\n",
21 | "Other modelling possibilities include:\n",
22 | "1. Using a data graph, i.e. an adjacency matrix $A \\in \\R^{n \\times n}$ which represents pairwise relationships between samples $x_i \\in \\R^{d_x}$. The problem is here to predict a graph signal $y \\in \\R^{n \\times d_y}$ given a graph characterized by $A$ and some graph signals $X \\in \\R^{n \\times d_x}$. We call that regime **node classification / regression**, as we classify or regress nodes instead of signals.\n",
23 | "2. Another problem of interest is whole graph classification, with or without signals on top. We'll call that third regime **graph classification / regression**. The problem here is to classify or regress a whole graph $A_i \\in \\R^{n \\times n}$ (with or without an associated data matrix $X_i \\in \\R^{n \\times d_x}$) into $y_i \\in \\R^{d_y}$. In case we have no signal, we can use a constant vector $X_i = 1_n$ of size $n$."
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "metadata": {
30 | "collapsed": false
31 | },
32 | "outputs": [],
33 | "source": [
34 | "from lib import models, graph, coarsening, utils\n",
35 | "import numpy as np\n",
36 | "import matplotlib.pyplot as plt\n",
37 | "%matplotlib inline"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {},
43 | "source": [
44 | "# 1 Data\n",
45 | "\n",
46 | "For the purpose of the demo, let's create a random data matrix $X \\in \\R^{n \\times d_x}$ and somehow infer a label $y_i = f(x_i)$."
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": null,
52 | "metadata": {
53 | "collapsed": false
54 | },
55 | "outputs": [],
56 | "source": [
57 | "d = 100 # Dimensionality.\n",
58 | "n = 10000 # Number of samples.\n",
59 | "c = 5 # Number of feature communities.\n",
60 | "\n",
61 | "# Data matrix, structured in communities (feature-wise).\n",
62 | "X = np.random.normal(0, 1, (n, d)).astype(np.float32)\n",
63 | "X += np.linspace(0, 1, c).repeat(d // c)\n",
64 | "\n",
65 | "# Noisy non-linear target.\n",
66 | "w = np.random.normal(0, .02, d)\n",
67 | "t = X.dot(w) + np.random.normal(0, .001, n)\n",
68 | "t = np.tanh(t)\n",
69 | "plt.figure(figsize=(15, 5))\n",
70 | "plt.plot(t, '.')\n",
71 | "\n",
72 | "# Classification.\n",
73 | "y = np.ones(t.shape, dtype=np.uint8)\n",
74 | "y[t > t.mean() + 0.4 * t.std()] = 0\n",
75 | "y[t < t.mean() - 0.4 * t.std()] = 2\n",
76 | "print('Class imbalance: ', np.unique(y, return_counts=True)[1])"
77 | ]
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "metadata": {},
82 | "source": [
83 | "Then split this dataset into training, validation and testing sets."
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": null,
89 | "metadata": {
90 | "collapsed": false
91 | },
92 | "outputs": [],
93 | "source": [
94 | "n_train = n // 2\n",
95 | "n_val = n // 10\n",
96 | "\n",
97 | "X_train = X[:n_train]\n",
98 | "X_val = X[n_train:n_train+n_val]\n",
99 | "X_test = X[n_train+n_val:]\n",
100 | "\n",
101 | "y_train = y[:n_train]\n",
102 | "y_val = y[n_train:n_train+n_val]\n",
103 | "y_test = y[n_train+n_val:]"
104 | ]
105 | },
106 | {
107 | "cell_type": "markdown",
108 | "metadata": {},
109 | "source": [
110 | "# 2 Graph\n",
111 | "\n",
112 | "The second thing we need is a **graph between features**, i.e. an adjacency matrix $A \\in \\mathbb{R}^{d_x \\times d_x}$.\n",
113 | "Structuring data with graphs is very flexible: it can accomodate both structured and unstructured data.\n",
114 | "1. **Structured data**.\n",
115 | " 1. The data is structured by an Euclidean domain, e.g. $x_i$ represents an image, a sound or a video. We can use a classical ConvNet with 1D, 2D or 3D convolutions or a graph ConvNet with a line or grid graph (however losing the orientation).\n",
116 | " 2. The data is structured by a graph, e.g. the data lies on a transportation, energy, brain or social network.\n",
117 | "2. **Unstructured data**. We could use a fully connected network, but the learning and computational complexities are gonna be large. An alternative is to construct a sparse similarity graph between features (or between samples) and use a graph ConvNet, effectively structuring the data and drastically reducing the number of parameters through weight sharing. As for classical ConvNets, the number of parameters are independent of the input size.\n",
118 | "\n",
119 | "There are many ways, supervised or unsupervised, to construct a graph given some data. And better the graph, better the performance ! For this example we'll define the adjacency matrix as a simple similarity measure between features. Below are the choices one has to make when constructing such a graph.\n",
120 | "1. The distance function. We'll use the Euclidean distance $d_{ij} = \\|x_i - x_j\\|_2$.\n",
121 | "2. The kernel. We'll use the Gaussian kernel $a_{ij} = \\exp(d_{ij}^2 / \\sigma^2)$.\n",
122 | "3. The type of graph. We'll use a $k$ nearest neigbors (kNN) graph."
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": null,
128 | "metadata": {
129 | "collapsed": false
130 | },
131 | "outputs": [],
132 | "source": [
133 | "dist, idx = graph.distance_scipy_spatial(X_train.T, k=10, metric='euclidean')\n",
134 | "A = graph.adjacency(dist, idx).astype(np.float32)\n",
135 | "\n",
136 | "assert A.shape == (d, d)\n",
137 | "print('d = |V| = {}, k|V| < |E| = {}'.format(d, A.nnz))\n",
138 | "plt.spy(A, markersize=2, color='black');"
139 | ]
140 | },
141 | {
142 | "cell_type": "markdown",
143 | "metadata": {},
144 | "source": [
145 | "To be able to pool graph signals, we need first to coarsen the graph, i.e. to find which vertices to group together. At the end we'll have multiple graphs, like a pyramid, each at one level of resolution. The finest graph is where the input data lies, the coarsest graph is where the data at the output of the graph convolutional layers lie. That data, of reduced spatial dimensionality, can then be fed to a fully connected layer.\n",
146 | "\n",
147 | "The parameter here is the number of times to coarsen the graph. Each coarsening approximately reduces the size of the graph by a factor two. Thus if you want a pooling of size 4 in the first layer followed by a pooling of size 2 in the second, you'll need to coarsen $\\log_2(4+2) = 3$ times.\n",
148 | "\n",
149 | "After coarsening we rearrange the vertices (and add fake vertices) such that pooling a graph signal is analog to pooling a 1D signal. See the [paper] for details.\n",
150 | "\n",
151 | "[paper]: https://arxiv.org/abs/1606.09375"
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": null,
157 | "metadata": {
158 | "collapsed": false
159 | },
160 | "outputs": [],
161 | "source": [
162 | "graphs, perm = coarsening.coarsen(A, levels=3, self_connections=False)\n",
163 | "\n",
164 | "X_train = coarsening.perm_data(X_train, perm)\n",
165 | "X_val = coarsening.perm_data(X_val, perm)\n",
166 | "X_test = coarsening.perm_data(X_test, perm)"
167 | ]
168 | },
169 | {
170 | "cell_type": "markdown",
171 | "metadata": {},
172 | "source": [
173 | "We finally need to compute the graph Laplacian $L$ for each of our graphs (the original and the coarsened versions), defined by their adjacency matrices $A$. The sole parameter here is the type of Laplacian, e.g. the combinatorial Laplacian, the normalized Laplacian or the random walk Laplacian."
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "execution_count": null,
179 | "metadata": {
180 | "collapsed": false
181 | },
182 | "outputs": [],
183 | "source": [
184 | "L = [graph.laplacian(A, normalized=True) for A in graphs]\n",
185 | "graph.plot_spectrum(L)"
186 | ]
187 | },
188 | {
189 | "cell_type": "markdown",
190 | "metadata": {},
191 | "source": [
192 | "# 3 Graph ConvNet\n",
193 | "\n",
194 | "Here we apply the graph convolutional neural network to signals lying on graphs. After designing the architecture and setting the hyper-parameters, the model takes as inputs the data matrix $X$, the target $y$ and a list of graph Laplacians $L$, one per coarsening level.\n",
195 | "\n",
196 | "The data, architecture and hyper-parameters are absolutely *not engineered to showcase performance*. Its sole purpose is to illustrate usage and functionality."
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "execution_count": null,
202 | "metadata": {
203 | "collapsed": false
204 | },
205 | "outputs": [],
206 | "source": [
207 | "params = dict()\n",
208 | "params['dir_name'] = 'demo'\n",
209 | "params['num_epochs'] = 40\n",
210 | "params['batch_size'] = 100\n",
211 | "params['eval_frequency'] = 200\n",
212 | "\n",
213 | "# Building blocks.\n",
214 | "params['filter'] = 'chebyshev5'\n",
215 | "params['brelu'] = 'b1relu'\n",
216 | "params['pool'] = 'apool1'\n",
217 | "\n",
218 | "# Number of classes.\n",
219 | "C = y.max() + 1\n",
220 | "assert C == np.unique(y).size\n",
221 | "\n",
222 | "# Architecture.\n",
223 | "params['F'] = [32, 64] # Number of graph convolutional filters.\n",
224 | "params['K'] = [20, 20] # Polynomial orders.\n",
225 | "params['p'] = [4, 2] # Pooling sizes.\n",
226 | "params['M'] = [512, C] # Output dimensionality of fully connected layers.\n",
227 | "\n",
228 | "# Optimization.\n",
229 | "params['regularization'] = 5e-4\n",
230 | "params['dropout'] = 1\n",
231 | "params['learning_rate'] = 1e-3\n",
232 | "params['decay_rate'] = 0.95\n",
233 | "params['momentum'] = 0.9\n",
234 | "params['decay_steps'] = n_train / params['batch_size']"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": null,
240 | "metadata": {
241 | "collapsed": false
242 | },
243 | "outputs": [],
244 | "source": [
245 | "model = models.cgcnn(L, **params)\n",
246 | "accuracy, loss, t_step = model.fit(X_train, y_train, X_val, y_val)"
247 | ]
248 | },
249 | {
250 | "cell_type": "markdown",
251 | "metadata": {},
252 | "source": [
253 | "# 4 Evaluation\n",
254 | "\n",
255 | "We often want to monitor:\n",
256 | "1. The convergence, i.e. the training loss and the classification accuracy on the validation set.\n",
257 | "2. The performance, i.e. the classification accuracy on the testing set (to be compared with the training set accuracy to spot overfitting).\n",
258 | "\n",
259 | "The `model_perf` class in [utils.py](utils.py) can be used to compactly evaluate multiple models."
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": null,
265 | "metadata": {
266 | "collapsed": false
267 | },
268 | "outputs": [],
269 | "source": [
270 | "fig, ax1 = plt.subplots(figsize=(15, 5))\n",
271 | "ax1.plot(accuracy, 'b.-')\n",
272 | "ax1.set_ylabel('validation accuracy', color='b')\n",
273 | "ax2 = ax1.twinx()\n",
274 | "ax2.plot(loss, 'g.-')\n",
275 | "ax2.set_ylabel('training loss', color='g')\n",
276 | "plt.show()"
277 | ]
278 | },
279 | {
280 | "cell_type": "code",
281 | "execution_count": null,
282 | "metadata": {
283 | "collapsed": false
284 | },
285 | "outputs": [],
286 | "source": [
287 | "print('Time per step: {:.2f} ms'.format(t_step*1000))"
288 | ]
289 | },
290 | {
291 | "cell_type": "code",
292 | "execution_count": null,
293 | "metadata": {
294 | "collapsed": false
295 | },
296 | "outputs": [],
297 | "source": [
298 | "res = model.evaluate(X_test, y_test)\n",
299 | "print(res[0])"
300 | ]
301 | }
302 | ],
303 | "metadata": {
304 | "kernelspec": {
305 | "display_name": "Python 3",
306 | "language": "python",
307 | "name": "python3"
308 | },
309 | "language_info": {
310 | "codemirror_mode": {
311 | "name": "ipython",
312 | "version": 3
313 | },
314 | "file_extension": ".py",
315 | "mimetype": "text/x-python",
316 | "name": "python",
317 | "nbconvert_exporter": "python",
318 | "pygments_lexer": "ipython3",
319 | "version": "3.4.3"
320 | }
321 | },
322 | "nbformat": 4,
323 | "nbformat_minor": 0
324 | }
325 |
--------------------------------------------------------------------------------
/HCP_fmripredict/config.py:
--------------------------------------------------------------------------------
1 | #!/home/yuzhang/tensorflow-py3.6/bin/python3.6
2 |
3 | # Author: Yu Zhang
4 | # License: simplified BSD
5 | # coding: utf-8
6 |
7 | ###default parameter settings
8 | class Config():
9 | pathfmri = '/home/yuzhang/scratch/HCP/aws_s3_HCP1200/FMRI/'
10 | pathout = '/home/yuzhang/scratch/HCP/temp_res_new/'
11 |
12 | TR = 0.72
13 | lowcut = 0.01
14 | highcut = 0.08
15 | window_size = 6
16 | ##window_size_trial = math.ceil(window_size/TR)
17 |
18 | ##task info
19 | modality = 'MOTOR'
20 | ###dict for different types of movement
21 | task_contrasts = {"rf": "foot",
22 | "lf": "foot",
23 | "rh": "hand",
24 | "lh": "hand",
25 | "t": "tongue"}
26 |
27 | ##the chosen atlas to map fmri data
28 | # mmp_atlas = "/home/yuzhang/projects/rrg-pbellec/yuzhang/HCP/codes/HCP_S1200_GroupAvg_v1/Gordon333.32k_fs_LR.dlabel.nii"
29 | pathsource = "/home/yuzhang/scratch/HCP/codes/"
30 | mmp_atlas = pathsource + "HCP_S1200_GroupAvg_v1/" + "Q1-Q6_RelatedValidation210.CorticalAreas_dil_Final_Final_Areas_Group_Colors.32k_fs_LR.dlabel.nii"
31 | AtlasName = 'MMP'
32 | Subject_Num = 2400
33 | Trial_Num = 284
34 | Node_Num = 32000
35 | Region_Num = 200
36 |
37 | startsub = 0
38 | endsub = Subject_Num
39 | subjectlist = 'ALL'
40 | n_thread = 5
41 | n_buffersize = 50
42 |
43 | ##temp saving file
44 | fmri_filename = 'Atlas.dtseries.nii'
45 | confound_filename = 'Movement_Regressors.txt'
46 | rsfmri_filename = 'Atlas_hp2000_clean.dtseries.nii'
47 |
48 | '''
49 | ###do not update paras in config
50 | ev_filename = 'event_labels_1200R' + '_test_' + subjectlist + '.h5' # '.txt'
51 | fmri_matrix_filename = AtlasName + '_ROI_act_1200R' + '_test_' + subjectlist + '.lmdb' #'.h5' # '.txt'
52 | #lmdb_filename = config_instance.pathout + hcp_fmri_instance.modality + '_' + fmri_matrix_filename
53 | '''
54 |
55 | import os
56 | try:
57 | ###params for graph_cnn
58 | import tensorflow as tf
59 | gcnn = tf.app.flags
60 | FLAGS = gcnn.FLAGS
61 |
62 | # Graphs.
63 | gcnn.DEFINE_integer('number_edges', 8, 'Graph: minimum number of edges per vertex.')
64 | gcnn.DEFINE_bool('normalized_laplacian', True, 'Graph Laplacian: normalized.')
65 | gcnn.DEFINE_integer('coarsening_levels', 6, 'Number of coarsened graphs.')
66 | gcnn.DEFINE_string('adj_mat', os.path.join(pathsource, 'MMP_adjacency_mat_white.pconn.nii'), 'Directory to adj matrix on surface data.')
67 | except ImportError:
68 | print("Tensorflow is not avaliable in the current node!")
69 |
70 | gcnn_layers = 3
71 | gcnn_hidden = 256
72 | gcnn_pool = 4
73 |
74 | gcnn_coarsening_levels = 6
75 | gcnn_adj_mat_dict = {'surface': os.path.join(pathsource, 'MMP_adjacency_mat_white.pconn.nii'),
76 | 'SC': os.path.join(pathsource, 'HCP_S1200_GroupAvg_v1/S1200.All.corrThickness_MSMAll.32k_fs_LR.dscalar.nii'),
77 | 'FC': os.path.join(pathsource, 'HCP_S1200_GroupAvg_v1/S1200.All.corrThickness_MSMAll.32k_fs_LR.dscalar.nii')}
78 | gcnn_adj_mat_type = 'SC'
79 |
--------------------------------------------------------------------------------
/HCP_fmripredict/extract_fmri_event_data.py:
--------------------------------------------------------------------------------
1 | #!/home/yuzhang/tensorflow-py3.6/bin/python3.6
2 |
3 | # Author: Yu Zhang
4 | # License: simplified BSD
5 | # coding: utf-8
6 | import sys
7 | import os
8 | import warnings
9 | sys.path.append('/home/yuzhang/scratch/HCP/codes/HCP_fmripredict/')
10 |
11 | import argparse
12 | from tensorpack.utils import logger
13 | from tensorpack.utils.serialize import dumps, loads
14 |
15 | import numpy as np
16 | import importlib
17 | import lmdb
18 | from pathlib import Path
19 | import config, utils
20 | #importlib.reload(utils)
21 |
22 | if __name__ == '__main__':
23 | args = sys.argv[1:]
24 | logger.set_logger_dir("train_log/svc_simple_log",action="d")
25 | warnings.simplefilter("ignore")
26 | #warnings.filterwarnings(action='once')
27 |
28 | parser = argparse.ArgumentParser(description='The description of the parameters')
29 |
30 | parser.add_argument('--task_modality', '-c', help='(required, string) Modality name in Capital for fmri and event design files', type=str)
31 | parser.add_argument('--subject_to_start', '-f', help='(optional, int,default=0) The index of the first subject in the all_subjects_list for analysis', type=int)
32 | parser.add_argument('--subject_to_last', '-g', help='(optional, int,default=1086) The index of the last subject in the all_subjects_list for analysis', type=int)
33 | parser.add_argument('--subjectlist_index', '-l', help='(optional, string, default='') The index indicator of the selected subject list', type=str)
34 |
35 | parser.add_argument('--n_thread', '-t', help='(optional, int, default = 5) Number of threads from each cpu to be used', type=int)
36 | parser.add_argument('--n_buffersize', '-b', help='(optional, int, default = 50) Number of files to be read at once', type=int)
37 | parser.add_argument('--n_sessions', '-j', help='(optional, int, default = 0) Total number of session for the subject', type=int)
38 | parser.add_argument('--n_sessions_combined', '-x', help='(optional, int, default = 1) The number of sessions to combine', type=int)
39 |
40 | parsed, unknown = parser.parse_known_args(args)
41 |
42 | modality = parsed.task_modality
43 |
44 | startsub = parsed.subject_to_start
45 | endsub = parsed.subject_to_last
46 | subjectlist = parsed.subjectlist_index
47 |
48 | n_jobs = 1
49 | n_thread = parsed.n_thread
50 | n_buffersize = parsed.n_buffersize
51 | n_sessions = parsed.n_sessions
52 | n_sessions_combined = parsed.n_sessions_combined
53 |
54 | #####re-assign parameter settings in config
55 | config_instance = config.Config()
56 |
57 | if modality:
58 | config_instance.modality = modality
59 | if startsub:
60 | config_instance.startsub = startsub
61 | if endsub:
62 | config_instance.endsub = endsub
63 | if subjectlist:
64 | config_instance.subjectlist = subjectlist
65 | if n_thread:
66 | config_instance.n_thread = n_thread
67 | if n_buffersize:
68 | config_instance.n_buffersize = n_buffersize
69 | if not os.path.exists(config_instance.pathout):
70 | os.makedirs(config_instance.pathout)
71 |
72 | ###use config parameters to collect fmri data
73 | '''
74 | config_instance = config.Config()
75 | modality = 'MOTOR'
76 | startsub = 0
77 | endsub = 2400
78 | subjectlist = 'ALL'
79 | '''
80 | hcp_fmri_instance = utils.hcp_task_fmri(config_instance)
81 |
82 | ##prepare fmri data for analysis
83 | subjects_trial_label_matrix, sub_name, coding,trial_dura = hcp_fmri_instance.prepare_fmri_files_list()
84 | print(np.array(subjects_trial_label_matrix).shape)
85 | print("each trial contains %d volumes/TRs for task %s" % (trial_dura,modality))
86 | ###updating information in the config settings
87 | config_instance.task_contrasts = hcp_fmri_instance.task_contrasts
88 | config_instance.Trial_dura = trial_dura
89 | config_instance.EVS_files = hcp_fmri_instance.EVS_files
90 | config_instance.fmri_files = hcp_fmri_instance.fmri_files
91 | config_instance.confound_files = hcp_fmri_instance.confound_files
92 |
93 | ############
94 | fmri_files = hcp_fmri_instance.fmri_files
95 | confound_files = hcp_fmri_instance.confound_files
96 | print(np.array(subjects_trial_label_matrix).shape)
97 | #print(np.unique(sub_name), len(sub_name))
98 |
99 | ###output logs
100 | print("--fmri_folder: ", config_instance.pathfmri)
101 | print('--temp_out:', config_instance.pathout)
102 | print('--atlas_filename: %s \n\n' % config_instance.AtlasName)
103 |
104 | mmp_atlas = config_instance.mmp_atlas
105 | #lmdb_filename = config_instance.pathout+hcp_fmri_instance.modality+'_'+config_instance.AtlasName + '_ROI_act_1200R' + '_test_' + subjectlist + '.lmdb'
106 | ##subjects_tc_matrix, subname_coding = utils.extract_mean_seris(fmri_files, confound_files, mmp_atlas, lmdb_filename, nr_proc=100, buffer_size=10)
107 | subjects_tc_matrix, subname_coding = utils.extract_mean_seris_thread(fmri_files, confound_files, mmp_atlas,
108 | hcp_fmri_instance.lmdb_filename,
109 | hcp_fmri_instance.Trial_Num,
110 | nr_thread=config_instance.n_thread, buffer_size=config_instance.n_buffersize)
111 | print(np.array(subjects_tc_matrix).shape)
112 | print('\n')
113 |
114 | #####
115 | sub_name = []
116 | for ss in subname_coding:
117 | sub_name.append(ss.split('_')[0])
118 | hcp_fmri_instance.sub_name = sub_name
119 | subjects_tc_matrix, subjects_trial_label_matrix = utils.preclean_data_for_shape_match(subjects_tc_matrix,subjects_trial_label_matrix,subname_coding)
120 | config_instance.Subject_Num = np.array(subjects_tc_matrix).shape[0]
121 | print(np.array(subjects_trial_label_matrix).shape)
122 | print(np.array(subjects_tc_matrix).shape)
123 |
124 | '''
125 | ##only using this for cnn, no need for svm or fc-nn
126 | ###use config parameters to collect rs-fmri data
127 | hcp_rsfmri_instance = utils.hcp_rsfmri(config_instance)
128 | ##prepare fmri data for analysis
129 | subjects_tc_matrix, mean_corr_matrix = hcp_rsfmri_instance.prepare_rsfmri_files_list(sub_name=sub_name,N_thread=4)
130 |
131 | '''
132 | print('\n Classify different tasks using simple-svm with rbf kernel...')
133 | target_name = np.unique(list(hcp_fmri_instance.task_contrasts.values()))
134 | ##scores= utils.my_svc_simple(subjects_tc_matrix, subjects_trial_label_matrix, target_name, sub_num=1500, block_dura=trial_dura, my_cv_fold=10,my_comp=20)
135 | ##print(scores)
136 | '''
137 | print('\n Changing the validation process by subject-specific split and average within each trial......')
138 | scores= utils.my_svc_simple_subject_validation_new(subjects_tc_matrix,subjects_trial_label_matrix,target_name,block_dura=trial_dura,my_cv_fold=10,my_testsize=0.2,my_valsize=0.1)
139 | print(scores)
140 |
141 | print('\n Changing the validation process by subject-specific split...')
142 | scores= utils.my_svc_simple_subject_validation_new(subjects_tc_matrix,subjects_trial_label_matrix,target_name,block_dura=1,my_cv_fold=10,my_testsize=0.2,my_valsize=0.1)
143 | print(scores)
144 | '''
145 | ##############################
146 | ####using fully-connected neural networks for classification of fmri tasks
147 | print('\n Classify different tasks using simple fc-nn...')
148 | ##utils.build_fc_nn_simple(subjects_tc_matrix, subjects_trial_label_matrix, target_name, layers=5, hidden_size=64,dropout=0.25,batch_size=128)
149 |
150 | print('\n Classify different tasks using simple fc-nn by subject-specific split...')
151 | utils.build_fc_nn_subject_validation(subjects_tc_matrix,subjects_trial_label_matrix,target_name,block_dura=trial_dura,
152 | layers=5, hidden_size=256,dropout=0.25,batch_size=128,nepochs=50)
153 |
154 | print('\n Classify different tasks using simple fc-nn by subject-specific split and average within each trial...')
155 | utils.build_fc_nn_subject_validation(subjects_tc_matrix,subjects_trial_label_matrix,target_name,sub_num=100, block_dura=1,
156 | layers=5,hidden_size=256,dropout=0.25,batch_size=128,nepochs=50)
157 |
158 |
159 | ###use config parameters to set parameters for graph convolution
160 | target_name = np.unique(list(hcp_fmri_instance.task_contrasts.values()))
161 | hcp_gcnn_instance = utils.hcp_gcnn_fmri(config_instance)
162 | print('\n Classify different tasks using gcn by subject-specific split...')
163 | train_acc, test_acc, val_acc = hcp_gcnn_instance.build_graph_cnn_subject_validation_new(subjects_tc_matrix, subjects_trial_label_matrix, target_name,block_dura=1,
164 | layers=config_instance.gcnn_layers,hidden_size=config_instance.gcnn_hidden,
165 | pool_size=config_instance.gcnn_pool,batch_size=128, nepochs=50)
166 |
167 | print('\n Classify different tasks using gcn by subject-specific split and average within each trial...')
168 | ##train_acc_trial, test_acc_trial, val_acc_trial = hcp_gcnn_instance.build_graph_cnn_subject_validation(subjects_tc_matrix, subjects_trial_label_matrix, target_name,block_dura=trial_dura,layers=config_instance.gcnn_layers,hidden_size=config_instance.gcnn_hidden,pool_size=config_instance.gcnn_pool,batch_size=128, nepochs=50)
169 | train_acc_trial, test_acc_trial, val_acc_trial = hcp_gcnn_instance.build_graph_cnn_subject_validation_new(subjects_tc_matrix, subjects_trial_label_matrix, target_name,block_dura=trial_dura,
170 | layers=config_instance.gcnn_layers,hidden_size=config_instance.gcnn_hidden,
171 | pool_size=config_instance.gcnn_pool,batch_size=128, nepochs=50)
172 |
173 |
174 | '''
175 | ####for script testing:
176 | modality='MOTOR'
177 | startsub = 0
178 | endsub = 2400
179 | subjectlist = 'ALL'
180 |
181 | python ./extract_fmri_event_data.py --task_modality=$modality --subject_to_start=0 --subject_to_last=100 --subjectlist_index='t010'
182 |
183 | '''
184 |
--------------------------------------------------------------------------------
/HCP_fmripredict/model.py:
--------------------------------------------------------------------------------
1 | #!/home/yuzhang/tensorflow-py3.6/bin/python3.6
2 |
3 | # Author: Yu Zhang
4 | # License: simplified BSD
5 | # coding: utf-8
6 |
7 | ###define model for training
8 | import sys
9 | sys.path.append('/home/yuzhang/projects/rrg-pbellec/yuzhang/HCP/codes/HCP_fmripredict')
10 | import utils
11 |
12 | import numpy as np
13 | import pandas as pd
14 |
15 | from sklearn import svm, metrics
16 | from sklearn import preprocessing
17 | from sklearn.model_selection import cross_val_score, train_test_split,ShuffleSplit
18 | from sklearn.decomposition import PCA, FastICA, FactorAnalysis, DictionaryLearning, KernelPCA
19 |
20 | try:
21 | from keras.utils import np_utils
22 | from keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, Dropout
23 | from keras.models import Model
24 | except ImportError:
25 | print("Tensorflow is not avaliable in the current node!")
26 | print("deep learning models will not be running for this test!")
27 |
28 |
29 | def build_fc_nn_model(Nfeatures,Nlabels,layers=3,hidden_size=256,dropout=0.25):
30 | ######fully-connected neural networks
31 | input0 = Input(shape=(Nfeatures,))
32 | drop1 = input0
33 | for li in np.arange(layers):
34 | hidden1 = Dense(hidden_size, activation='relu')(drop1)
35 | drop1 = Dropout(dropout)(hidden1)
36 | hidden_size = np.int32(hidden_size / 2)
37 | if hidden_size < 10:
38 | hidden_size = 16
39 |
40 | hidden2 = Dense(16, activation='relu')(drop1)
41 | drop2 = Dropout(0.5)(hidden2)
42 | out = Dense(Nlabels, activation='softmax')(drop2)
43 |
44 | model = Model(inputs=input0, outputs=out)
45 | #model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['mse','accuracy'])
46 | model.summary()
47 |
48 | return model
49 |
50 |
51 | def build_cnn_model(input_shape, Nlabels, filters=32, convsize=3, poolsize=2, hidden_size=128, conv_layers=2):
52 | # import keras.backend as K
53 | # if K.image_data_format() == 'channels_first':
54 | # img_shape = (1,img_rows,img_cols)
55 | # elif K.image_data_format() == 'channels_last':
56 | # img_shape = (img_rows,img_cols,1)
57 |
58 |
59 | input0 = Input(shape=input_shape)
60 | drop1 = input0
61 | for li in range(conv_layers):
62 | conv1 = Conv2D(filters, (convsize, convsize), padding='same', activation='relu')(drop1)
63 | conv1 = Conv2D(filters, (convsize, convsize), padding='same', activation='relu')(conv1)
64 | pool1 = MaxPooling2D((poolsize, poolsize))(conv1)
65 | drop1 = Dropout(0.25)(pool1)
66 | filters *= 2
67 |
68 |
69 | drop2 = drop1
70 | flat = Flatten()(drop2)
71 | hidden = Dense(hidden_size, activation='relu')(flat)
72 | drop3 = Dropout(0.5)(hidden)
73 | #hidden = Dense((hidden_size/4).astype(int), activation='relu')(drop3)
74 | #drop4 = Dropout(0.5)(hidden)
75 | out = Dense(Nlabels, activation='softmax')(drop3)
76 |
77 | model = Model(inputs=input0, outputs=out)
78 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
79 | model.summary()
80 |
81 | return model
82 |
--------------------------------------------------------------------------------
/HCP_fmripredict/my_test_mnist_gcn.py:
--------------------------------------------------------------------------------
1 | #!/home/yuzhang/tensorflow-py3.6/bin/python3.6
2 |
3 | # Author: Yu Zhang
4 | # License: simplified BSD
5 | # coding: utf-8
6 |
7 | import numpy as np
8 | import time
9 | import sys, os
10 | sys.path.append('/home/yu/PycharmProjects/HCP_fmripredict/')
11 | from cnn_graph.lib import models, graph, coarsening, utils
12 |
13 | import tensorflow as tf
14 | from tensorflow.examples.tutorials.mnist import input_data
15 |
16 | num_CPU = 2
17 | config_TF = tf.ConfigProto(intra_op_parallelism_threads=num_CPU,\
18 | inter_op_parallelism_threads=num_CPU, allow_soft_placement=True,\
19 | device_count = {'CPU' : num_CPU})
20 | session = tf.Session(config=config_TF)
21 |
22 | flags = tf.app.flags
23 | FLAGS = flags.FLAGS
24 |
25 | # Graphs.
26 | flags.DEFINE_integer('number_edges', 8, 'Graph: minimum number of edges per vertex.')
27 | flags.DEFINE_string('metric', 'euclidean', 'Graph: similarity measure (between features).')
28 | flags.DEFINE_bool('normalized_laplacian', True, 'Graph Laplacian: normalized.')
29 | flags.DEFINE_integer('coarsening_levels', 4, 'Number of coarsened graphs.')
30 | # Directories.
31 | flags.DEFINE_string('dir_data', os.path.join('..', 'data', 'mnist'), 'Directory to store data.')
32 |
33 | img_dim = 28
34 |
35 |
36 | def grid_graph(m, corners=False):
37 | ##build a graph on minist digit images
38 | z = graph.grid(m)
39 | dist, idx = graph.distance_sklearn_metrics(z, k=FLAGS.number_edges, metric=FLAGS.metric)
40 | A = graph.adjacency(dist, idx)
41 |
42 | # Connections are only vertical or horizontal on the grid.
43 | # Corner vertices are connected to 2 neightbors only.
44 | if corners:
45 | import scipy.sparse
46 | A = A.toarray()
47 | A[A < A.max() / 1.5] = 0
48 | A = scipy.sparse.csr_matrix(A)
49 | print('{} edges'.format(A.nnz))
50 |
51 | ##plt.spy(A, markersize=2, color='black')
52 | print("{} > {} edges".format(A.nnz // 2, FLAGS.number_edges * m ** 2 // 2))
53 | return A
54 |
55 | ##data preparation
56 | mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False)
57 |
58 | train_data = mnist.train.images.astype(np.float32)
59 | val_data = mnist.validation.images.astype(np.float32)
60 | test_data = mnist.test.images.astype(np.float32)
61 | train_labels = mnist.train.labels
62 | val_labels = mnist.validation.labels
63 | test_labels = mnist.test.labels
64 |
65 | ###cal the adjcent matrix based on euclidean distance of spatial locations
66 | A = grid_graph(img_dim, corners=False)
67 | A = graph.replace_random_edges(A, 0.01)
68 | ###build multi-level graph using coarsen (div by 2 at each level)
69 | graphs, perm = coarsening.coarsen(A, levels=FLAGS.coarsening_levels, self_connections=False)
70 | L = [graph.laplacian(A, normalized=True) for A in graphs]
71 |
72 | ###paramters for the model
73 | common = {}
74 | common['dir_name'] = 'mnist/'
75 | common['num_epochs'] = 20
76 | common['batch_size'] = 100
77 | common['decay_steps'] = mnist.train.num_examples / common['batch_size']
78 | common['eval_frequency'] = 30 * common['num_epochs']
79 | common['brelu'] = 'b1relu'
80 | common['pool'] = 'mpool1'
81 | C = max(mnist.train.labels) + 1 # number of classes
82 |
83 | train_data_perm = coarsening.perm_data(train_data, perm)
84 | val_data_perm = coarsening.perm_data(val_data, perm)
85 | test_data_perm = coarsening.perm_data(test_data, perm)
86 | model_perf = utils.model_perf()
87 |
88 | ###test different param settins
89 | ##model1: no convolution
90 | name = 'softmax'
91 | params = common.copy()
92 | params['dir_name'] += name
93 | params['regularization'] = 5e-4
94 | params['dropout'] = 1
95 | params['learning_rate'] = 0.02
96 | params['decay_rate'] = 0.95
97 | params['momentum'] = 0.9
98 | params['F'] = []
99 | params['K'] = []
100 | params['p'] = []
101 | params['M'] = [C]
102 |
103 | ####training and testing models
104 | print(L)
105 |
106 | t_start = time.process_time()
107 | model_perf.test(models.cgcnn(config_TF, L, **params), name, params,
108 | train_data_perm, train_labels, val_data_perm, val_labels, test_data_perm, test_labels)
109 | t_end_1 = time.process_time() - t_start
110 | print('Model {}; Execution time: {:.2f}s\n\n'.format(name, t_end_1))
111 |
112 | ###model#2: one-layer convolution with fourier transform as filter
113 | common['regularization'] = 0
114 | common['dropout'] = 1
115 | common['learning_rate'] = 0.02
116 | common['decay_rate'] = 0.95
117 | common['momentum'] = 0.9
118 | common['F'] = [10] # Number of graph convolutional filters.
119 | common['K'] = [20] # Polynomial orders.
120 | common['p'] = [1] # Pooling sizes.
121 | common['M'] = [C] # Output dimensionality of fully connected layers.
122 |
123 | name = 'fgconv_softmax'
124 | params = common.copy()
125 | params['dir_name'] += name
126 | params['filter'] = 'fourier'
127 | params['K'] = [L[0].shape[0]]
128 |
129 | t_start = time.process_time()
130 | model_perf.test(models.cgcnn(config_TF, L, **params), name, params,
131 | train_data_perm, train_labels, val_data_perm, val_labels, test_data_perm, test_labels)
132 | t_end_2 = time.process_time() - t_start
133 | print('Model {}; Execution time: {:.2f}s\n\n'.format(name, t_end_2))
134 |
135 | ##model#3: one-layer convolution with chebyshev5 and b1relu as filters
136 | name = 'cgconv_softmax'
137 | params = common.copy()
138 | params['dir_name'] += name
139 | params['filter'] = 'chebyshev5'
140 | # params['filter'] = 'chebyshev2'
141 | # params['brelu'] = 'b2relu'
142 |
143 | t_start = time.process_time()
144 | model_perf.test(models.cgcnn(config_TF,L, **params), name, params,
145 | train_data_perm, train_labels, val_data_perm, val_labels, test_data_perm, test_labels)
146 | t_end_3 = time.process_time() - t_start
147 | print('Model {}; Execution time: {:.2f}s\n\n'.format(name, t_end_3))
148 |
149 | ##model#4: two convolutional layers with fourier transform as filters
150 | common['regularization'] = 5e-4
151 | common['dropout'] = 0.5
152 | common['learning_rate'] = 0.02 # 0.03 in the paper but sgconv_sgconv_fc_softmax has difficulty to converge
153 | common['decay_rate'] = 0.95
154 | common['momentum'] = 0.9
155 | common['F'] = [32, 64] # Number of graph convolutional filters.
156 | common['K'] = [25, 25] # Polynomial orders.
157 | common['p'] = [4, 4] # Pooling sizes.
158 | common['M'] = [512, C] # Output dimensionality of fully connected layers.
159 |
160 | name = 'fgconv_fgconv_fc_softmax' # 'Non-Param'
161 | params = common.copy()
162 | params['dir_name'] += name
163 | params['filter'] = 'fourier'
164 | params['K'] = [L[0].shape[0], L[2].shape[0]]
165 | print([L[li].shape for li in range(len(L))])
166 |
167 | t_start = time.process_time()
168 | model_perf.test(models.cgcnn(config_TF,L, **params), name, params,
169 | train_data_perm, train_labels, val_data_perm, val_labels, test_data_perm, test_labels)
170 | t_end_4 = time.process_time() - t_start
171 | print('Model {}; Execution time: {:.2f}s\n\n'.format(name, t_end_4))
172 |
173 |
174 |
175 | ##model#5: two convolutional layers with Chebyshev polynomial as filters
176 | name = 'cgconv_cgconv_fc_softmax' # 'Non-Param'
177 | params = common.copy()
178 | params['dir_name'] += name
179 | params['filter'] = 'chebyshev5'
180 | print(params)
181 | print([L[li].shape for li in range(len(L))])
182 |
183 | t_start = time.process_time()
184 | model_perf.test(models.cgcnn(config_TF,L, **params), name, params,
185 | train_data_perm, train_labels, val_data_perm, val_labels, test_data_perm, test_labels)
186 | t_end_5 = time.process_time() - t_start
187 | print('Model {}; Execution time: {:.2f}s\n\n'.format(name, t_end_5))
188 |
189 |
190 |
191 | ###summary
192 | model_perf.show()
193 | print('Execution time for model1: {:.2f}s\n\n'.format(t_end_1))
194 | print('Execution time for model2: {:.2f}s\n\n'.format(t_end_2))
195 | print('Execution time for model3: {:.2f}s\n\n'.format(t_end_3))
196 | print('Execution time for model4: {:.2f}s\n\n'.format(t_end_4))
197 | print('Execution time for model5: {:.2f}s\n\n'.format(t_end_5))
198 |
--------------------------------------------------------------------------------
/HCP_fmripredict/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==0.1.10
2 | astor==0.7.1
3 | awscli==1.14.47
4 | bleach==1.5.0
5 | boto==2.49.0
6 | boto3==1.9.70
7 | botocore==1.12.70
8 | bz2file==0.98
9 | certifi==2018.11.29
10 | chardet==3.0.4
11 | colorama==0.3.7
12 | cycler==0.10.0
13 | decorator==4.2.1
14 | docopt==0.6.2
15 | docutils==0.14
16 | entrypoints==0.2.3
17 | gast==0.2.0
18 | gensim==3.6.0
19 | grpcio==1.17.1
20 | h5py==2.7.1
21 | html5lib==0.9999999
22 | idna==2.8
23 | ipykernel==4.8.1
24 | ipython==6.2.1
25 | ipython-genutils==0.2.0
26 | ipywidgets==7.1.1
27 | jedi==0.11.1
28 | jmespath==0.9.3
29 | jsonschema==2.6.0
30 | jupyter==1.0.0
31 | jupyter-client==5.2.2
32 | jupyter-console==5.2.0
33 | jupyter-core==4.4.0
34 | jupyterlab==0.31.8
35 | jupyterlab-launcher==0.10.5
36 | jupyterlmod==1.5.0
37 | Keras==2.1.4
38 | Keras-Applications==1.0.6
39 | Keras-Preprocessing==1.0.5
40 | lmdb==0.93
41 | Markdown==2.6.11
42 | MarkupSafe==1.0
43 | matplotlib==2.1.2
44 | mistune==0.8.3
45 | mock==2.0.0
46 | mpmath==1.1.0
47 | msgpack==0.5.6
48 | msgpack-numpy==0.4.3
49 | nbconvert==5.3.1
50 | nbformat==4.4.0
51 | nbrsessionproxy==0.6.1
52 | nbserverproxy==0.5.1
53 | nibabel==2.2.1
54 | numexpr==2.6.4
55 | numpy==1.14.1
56 | pandas==0.21.0
57 | pandocfilters==1.4.2
58 | parso==0.1.1
59 | pbr==5.1.1
60 | pexpect==4.4.0
61 | pickleshare==0.7.4
62 | Pillow==5.3.0
63 | pkg-resources==0.0.0
64 | prompt-toolkit==1.0.15
65 | protobuf==3.6.1
66 | ptyprocess==0.5.2
67 | pyasn1==0.4.2
68 | Pygments==2.2.0
69 | pyparsing==2.2.0
70 | python-dateutil==2.6.1
71 | pytz==2018.3
72 | pyzmq==17.0.0
73 | qtconsole==4.3.1
74 | requests==2.21.0
75 | rsa==3.4.2
76 | s3transfer==0.1.13
77 | scikit-build==0.6.1
78 | scikit-learn==0.19.1
79 | scipy==1.0.0
80 | seaborn==0.8.1
81 | Send2Trash==1.4.2
82 | simplegeneric==0.8.1
83 | six==1.11.0
84 | sklearn==0.0
85 | smart-open==1.7.1
86 | sympy==1.3
87 | tables==3.4.2
88 | tabulate==0.8.2
89 | tensorboard==1.12.1
90 | tensorflow-gpu==1.12.0
91 | tensorflow-tensorboard==1.5.1
92 | termcolor==1.1.0
93 | terminado==0.8.1
94 | testpath==0.3.1
95 | torch==1.0.0
96 | torchvision==0.2.1
97 | tornado==4.5.3
98 | tqdm==4.19.7
99 | traitlets==4.3.2
100 | urllib3==1.24.1
101 | wcwidth==0.1.7
102 | webencodings==0.5.1
103 | Werkzeug==0.14.1
104 | widgetsnbextension==3.1.3
105 |
--------------------------------------------------------------------------------
/HCP_fmripredict/requirements_update.txt:
--------------------------------------------------------------------------------
1 | # packages in environment at /home/yuzhang/miniconda3/envs/tensorflow:
2 | #
3 | # Name Version Build Channel
4 | awscli 1.14.47
5 | bleach 1.5.0
6 | ca-certificates 2018.03.07 0
7 | certifi 2018.1.18 py36_0
8 | ciftify 1.0.1
9 | colorama 0.3.7
10 | cycler 0.10.0
11 | decorator 4.2.1
12 | docopt 0.6.2
13 | docutils 0.14
14 | entrypoints 0.2.3
15 | h5py 2.7.1
16 | html5lib 0.9999999
17 | ipykernel 4.8.1
18 | ipython 6.2.1
19 | ipython-genutils 0.2.0
20 | ipywidgets 7.1.1
21 | jedi 0.11.1
22 | Jinja2 2.10
23 | jmespath 0.9.3
24 | jsonschema 2.6.0
25 | jupyter 1.0.0
26 | jupyter-client 5.2.2
27 | jupyter-console 5.2.0
28 | jupyter-core 4.4.0
29 | jupyterlab 0.31.8
30 | jupyterlab-launcher 0.10.5
31 | jupyterlmod 1.5.0
32 | Keras 2.1.4
33 | libedit 3.1 heed3624_0
34 | libffi 3.2.1 hd88cf55_4
35 | libgcc-ng 7.2.0 hdf63c60_3
36 | libstdcxx-ng 7.2.0 hdf63c60_3
37 | lmdb 0.93
38 | Markdown 2.6.11
39 | MarkupSafe 1.0
40 | matplotlib 2.1.2
41 | mistune 0.8.3
42 | msgpack 0.5.6
43 | msgpack-numpy 0.4.3
44 | nbconvert 5.3.1
45 | nbformat 4.4.0
46 | nbrsessionproxy 0.6.1
47 | nbserverproxy 0.5.1
48 | ncurses 6.0 h9df7e31_2
49 | nibabel 2.2.1
50 | nilearn 0.4.0
51 | notebook 5.4.0
52 | numexpr 2.6.4
53 | openssl 1.0.2o h20670df_0
54 | pandas 0.21.0
55 | pandocfilters 1.4.2
56 | parso 0.1.1
57 | pexpect 4.4.0
58 | pickleshare 0.7.4
59 | pip 9.0.3 py36_0
60 | prompt-toolkit 1.0.15
61 | ptyprocess 0.5.2
62 | pyasn1 0.4.2
63 | Pygments 2.2.0
64 | pyparsing 2.2.0
65 | python 3.6.5 hc3d631a_0
66 | python-dateutil 2.6.1
67 | pytz 2018.3
68 | PyYAML 3.12
69 | pyzmq 17.0.0
70 | qtconsole 4.3.1
71 | readline 7.0 ha6073c6_4
72 | rsa 3.4.2
73 | s3transfer 0.1.13
74 | scikit-build 0.6.1
75 | scikit-learn 0.19.1
76 | scipy 1.0.0
77 | seaborn 0.8.1
78 | Send2Trash 1.4.2
79 | setuptools 39.0.1 py36_0
80 | simplegeneric 0.8.1
81 | six 1.11.0
82 | sklearn 0.0
83 | sqlite 3.22.0 h1bed415_0
84 | tables 3.4.2
85 | tabulate 0.8.2
86 | tensorboard 1.6.0
87 | tensorflow-tensorboard 1.5.1
88 | tensorpack 0.8.2
89 | termcolor 1.1.0
90 | terminado 0.8.1
91 | testpath 0.3.1
92 | tk 8.6.7 hc745277_3
93 | tornado 4.5.3
94 | tqdm 4.19.7
95 | traitlets 4.3.2
96 | wcwidth 0.1.7
97 | webencodings 0.5.1
98 | Werkzeug 0.14.1
99 | wheel 0.31.0 py36_0
100 | widgetsnbextension 3.1.3
101 | xz 5.2.3 h55aa19d_2
102 | zlib 1.2.11 ha838bed_2
103 |
--------------------------------------------------------------------------------
/HCP_fmripredict/tensorflow_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #SBATCH --account=def-pbellec
3 | #SBATCH --job-name=cnn_graph
4 | #SBATCH --gres=gpu:2 # request GPU "generic resource"
5 | #SBATCH --cpus-per-task=6 # maximum CPU cores per GPU request: 6 on Cedar, 16 on Graham.
6 | #SBATCH --mem=120G # memory per node
7 | #SBATCH --time=00-15:00 # time (DD-HH:MM)
8 | #SBATCH --output=/home/yuzhang/scratch/HCP/codes/train_log/hcp_task_classify_%x_%N-%j.out ###--output=%N-%j.out # %N for node name, %j for jobID
9 | ####SBATCH --error=/home/yuzhang/scratch/HCP/codes/train_log/hcp_task_classify_%x_%N-%j_%A_%a.err
10 | #SBATCH --workdir="/home/yuzhang/scratch/HCP/codes/HCP_fmripredict/"
11 |
12 | module load cuda cudnn python/3.6.3
13 | source $HOME/tensorflow-py3.6/bin/activate
14 | ps | grep python; pkill python;
15 |
16 | #python ./tensorflow-test.py
17 |
18 | mod=$1
19 | list=$2
20 | if [ -z ${mod} ];then mod='WM';fi
21 | if [ -z ${list} ];then list='ALL';fi
22 |
23 | ###python -W ignore ./extract_fmri_event_data.py --task_modality=${mod} --subject_to_start=0 --subject_to_last=2400 --subjectlist_index=${list} --n_thread=5
24 |
25 | ##python -W ignore ./HCP_task_fmri_cnn_tensorpack.py
26 | python ./HCP_task_fmri_cnn_tensorpack_changesize_bk4_wm.py
27 |
--------------------------------------------------------------------------------
/HCP_fmripredict/tensorflow_test_wholenode.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #SBATCH --account=def-pbellec
3 | #SBATCH --job-name=cnn_graph
4 | #SBATCH --nodes=1
5 | #SBATCH --gres=gpu:lgpu:4 ##request whole node
6 | #SBATCH --ntasks=1
7 | #SBATCH --cpus-per-task=24 # There are 24 CPU cores on Cedar GPU nodes
8 | #SBATCH --mem=0 # Request the full memory of the node
9 | #SBATCH --time=00-15:00 # time (DD-HH:MM)
10 |
11 | #SBATCH --output=/home/yuzhang/scratch/HCP/codes/train_log/hcp_task_classify_%x_%N-%j.out ###--output=%N-%j.out # %N for node name, %j for jobID
12 | ####SBATCH --error=/home/yuzhang/scratch/HCP/codes/train_log/hcp_task_classify_%x_%N-%j_%A_%a.err
13 | #SBATCH --workdir="/home/yuzhang/scratch/HCP/codes/HCP_fmripredict/"
14 |
15 | module load cuda cudnn python/3.6.3
16 | source $HOME/tensorflow-py3.6/bin/activate
17 | ps | grep python; pkill python;
18 |
19 | #python ./tensorflow-test.py
20 |
21 | mod=$1
22 | list=$2
23 | if [ -z ${mod} ];then mod='WM';fi
24 | if [ -z ${list} ];then list='ALL';fi
25 |
26 | ###python -W ignore ./extract_fmri_event_data.py --task_modality=${mod} --subject_to_start=0 --subject_to_last=2400 --subjectlist_index=${list} --n_thread=5
27 |
28 | ##python -W ignore ./HCP_task_fmri_cnn_tensorpack.py
29 | python ./HCP_task_fmri_cnn_tensorpack_changesize_bk4_wm.py
30 |
--------------------------------------------------------------------------------
/HCP_fmripredict/test_model_submit_bk.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #SBATCH --account=rrg-pbellec
3 | #SBATCH --nodes=1
4 | #SBATCH --tasks-per-node=8 # maximum CPU cores per GPU request: 6 on Cedar, 16 on Graham.
5 | #SBATCH --mem=200G # memory per node
6 | #SBATCH --time=0-8:00 #0-12:00 # time (DD-HH:MM)
7 | #SBATCH --output=../train_log/hcp_loaddata_%x_%N-%j.out # %N for node name, %j for jobID
8 |
9 | #module load cuda cudnn python/3.6.3
10 | source activate tensorflow
11 | #mod='WM'
12 | #list='ALL'
13 | mod=$1
14 | list=$2
15 | if [ -z ${mod} ];then mod='MOTOR';fi
16 | if [ -z ${list} ];then list='ALL';fi
17 |
18 | ##python -W ignore ./extract_fmri_event_data.py --task_modality=${mod} --subject_to_start=0 --subject_to_last=2400 --subjectlist_index=${list} --n_thread=5
19 |
20 | python ./extract_fmri_event_data.py --task_modality=${mod} --subject_to_start=0 --subject_to_last=2400 --subjectlist_index=${list} --n_thread=5 --n_buffersize=30
21 |
22 | ##sbatch --mem=50G --time=0-10:0 --nodes=2 --ntasks-per-node=8 --account=rrg-pbellec --output=../../hcp_loaddata_WM_ALL_logs.txt ./extract_fmri_event_data.py --task_modality='WM' --subject_to_start=0 --subject_to_last=2400 --subjectlist_index='ALL' --n_thread=5
23 |
--------------------------------------------------------------------------------
/HCP_fmripredict/test_model_submit_new.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #SBATCH --account=def-pbellec ##rrg-pbellec
3 | #SBATCH --gres=gpu:2 # request GPU "generic resource"
4 | #SBATCH --ntasks=1
5 | #SBATCH --cpus-per-task=8 # maximum CPU cores per GPU request: 6 on Cedar, 16 on Graham.
6 | #SBATCH --mem=120G #200G # memory per node
7 | #SBATCH --time=0-15:00 #0-12:00 # time (DD-HH:MM)
8 | #SBATCH --output=../train_log/hcp_loaddata_%x_%N-%j.out # %N for node name, %j for jobID
9 |
10 |
11 | #module load cuda cudnn python/3.6.3
12 | source activate tensorflow
13 | #mod='WM'
14 | #list='ALL'
15 | mod=$1
16 | list=$2
17 | if [ -z ${mod} ];then mod='MOTOR';fi
18 | if [ -z ${list} ];then list='ALL';fi
19 |
20 | ##python -W ignore ./extract_fmri_event_data.py --task_modality=${mod} --subject_to_start=0 --subject_to_last=2400 --subjectlist_index=${list} --n_thread=5
21 |
22 | python ./extract_fmri_event_data.py --task_modality=${mod} --subject_to_start=0 --subject_to_last=2400 --subjectlist_index=${list} --n_thread=5 --n_buffersize=30
23 |
24 | ##sbatch --mem=50G --time=0-10:0 --nodes=2 --ntasks-per-node=8 --account=rrg-pbellec --output=../../hcp_loaddata_WM_ALL_logs.txt ./extract_fmri_event_data.py --task_modality='WM' --subject_to_start=0 --subject_to_last=2400 --subjectlist_index='ALL' --n_thread=5
25 |
--------------------------------------------------------------------------------
/HCP_fmripredict/test_module.py:
--------------------------------------------------------------------------------
1 | #!/home/yuzhang/jupyter_py3/bin/python
2 |
3 | # Author: Yu Zhang
4 | # License: simplified BSD
5 | # coding: utf-8
6 |
7 | import sys
8 | sys.path.append('/home/yuzhang/projects/rrg-pbellec/yuzhang/HCP/codes/HCP_fmripredict')
9 |
10 | import config, utils
11 |
12 | config_instance = config.Config()
13 | print("--modality", config_instance.modality)
14 | print("--fmri_folder: ", config_instance.pathfmri)
15 | print('--temp_out:', config_instance.pathout)
16 | print('--atlas_filename:',config_instance.AtlasName)
17 |
18 | hcp_fmri_instance = utils.hcp_task_fmri(config_instance)
19 |
20 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # fmri_predict
2 | predicting fmri activaties from connectome
3 |
4 | ##to work with git
5 | 1) git status: to check any changes in the repo
6 | 2) git add: to save the changes
7 | 3) git commit -a: to save new updates and commit
8 | 4) git push: to upload any local changes to github
9 | 5) git pull: to clone new changes in github to local computers
10 | 6) git log: to check the log information in the repo
11 |
12 | ##to create virual enviorment via conda
13 | 1) install miniconda: https://conda.io/miniconda.html , run "bash Miniconda3-latest-Linux-x86_64.sh" and "conda update conda" after downloading
14 | 2) create env: conda create -n tensorflow-py3.6 anaconda python=3.6
15 | 3) verify env is created: conda list
16 | 4) activate env: source activate tensorflow-py3.6
17 | 5) save packages info from another env2: pip3 freeze > requirements.txt
18 | 6) loading all requried packages:
19 | while read requirement; do conda install --yes $requirement || pip install $requirement; done < requirements.txt ;
20 | or simply use: pip install -r requirements.txt
21 | 7) install tensorflow for gpu:
22 | pip install --upgrade pip;
23 | pip3 install tensorflow-gpu;
24 | pip3 install --upgrade https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0-cp36-cp36m-linux_x86_64.whl
25 | for validation: ipython ->
26 | import tensorflow as tf;
27 | hello = tf.constant('Hello, TensorFlow!');
28 | sess = tf.Session();
29 | print(sess.run(hello));
30 | or simply run: python -c "import tensorflow as tf; print(tf.GIT_VERSION, tf.VERSION)"
31 | 8) install tensorpack: pip3 install tensorpack; pip3 install --upgrade tensorpack
32 | 9) install notebook: conda install -c anaconda ipykernel ; conda install -c anaconda notebook;
33 | ipython kernel install --user --name tensorflow --display-name "Python3.6"; jupyter kernelspec list
34 |
35 |
36 |
37 | ##discussion with Pierre on Jan 29th
38 | 1) start with a simple model: predicting motor activation from functional connectivity using sparse linear regression model
39 | 2) using atlas:
40 | group atlas: MIST with two resolution (200/1000 regions)
41 | individual atlas
42 | 3) defining network structure: 7-functional networks (non-linear relationship could be learned through convolutional layers; thus no logical conflict)
43 | 4) for limited training samples: use sliding-windows to generate dynamic functional connectivity (duration:5min)
44 |
45 | ###Data
46 | 1) resting-state: 10 sessions under the folder: /data/cisl/raw_data/midnight/Rawdata/nii_data/preproc_fsl/sub01/rest
47 | using warped_F_sess*_res_ICA_filt_sm6.nii.gz for after ICA-AROMA, temporal filtering, spatial smoothing and registered
48 | 2) motor tasks: 10 sessions and 2 runs for each,under the folder: /data/cisl/raw_data/midnight/Rawdata/nii_data/preproc_fsl/sub01/motor
49 | preprocessed fmri: filtered_func_data_ICA.nii.gz
50 | brain activation map from contrasts:
51 | zstat1: foot movement
52 | zstat2: hand
53 | zstat3: tongue
54 | zstat4: foot_left
55 | zstat5: foot_right
56 | zstat6: hand_left
57 | zstat7: hand_right
58 |
59 | ###first practice: predicting task activation from RSFC using linear model
60 | ## codes in linear_model folder
61 | ## script: midnight_project_resting.ipynb
62 | 1) models: LinearRegression, RidgeRegression, Lasso, ElasticNetCV, LinearSVR
63 | for each region, the linear models are trained and the best model are chosen based on cross-validation
64 | 2) data: dynamic functional connectivity (window_size=10min), motor task (2 runs) for 10 sessions
65 | 3) atlas: we used MIST_ROI atlas (210 regions) to extract mean fMRI signal or activation for model training
66 | 4) regions: pre-select regions with moderate activity from the activation maps (z-score>1.9);
67 | after that, we trained the linear models for approximately 50 regions, independently
68 |
69 | 5) further considerations:
70 | a). combining multi-subject data and using multitask models during training
71 | b). statistical test on z-maps first and convert the activation map into binary maps. Thus, we could use classification models instead of regression, which might improve prediction accuracy
72 |
--------------------------------------------------------------------------------
/fmri_decoding/fmri_decoding_motor_svc_weights.nii.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SIMEXP/fmri_predict/238d9409ea4e8e35e82f87e1dedf95a78e47afcc/fmri_decoding/fmri_decoding_motor_svc_weights.nii.gz
--------------------------------------------------------------------------------
/fmri_decoding/fmri_decoding_readme.txt:
--------------------------------------------------------------------------------
1 | Part I: classify between hand, foot and tongue movements using SVM
2 |
3 | 1. Multiclass SVM with RBF kernels on activation patterns:
4 | 20 samples (sessions*runs) with 210 features (ROIs)
5 | Results:
6 | SVM Scoring with 5-fold cross-validation: mean accuarcy = 1.0
7 | after PCA decomposition into 20 components: mean accuarcy = 0.916
8 | after kernel-PCA decomposition into 20 components: mean accuarcy = 0.99
9 | after ICA decomposition: mean accuarcy = 0.75
10 | after MDS decomposition: mean accuarcy = 0.80
11 | [ 0.83333333 0.91666667 0.75 0.75 0.83333333]
12 |
13 |
14 | 2. Multiclass SVM with RBF kernels on fMRI signals:
15 | 1480 samples (sessions*runs*trials) with 210 features (ROIs)
16 | Results:
17 | SVM Scoring with 10-fold cross-validation: mean accuarcy = 0.431
18 | Reduction into 10 components:
19 | PCA decomposition: mean accuarcy = 0.441
20 | ICA decomposition: mean accuarcy = 0.419
21 | Kernal-PCA decomposition: mean accuarcy = 0.466
22 | MDS decomposition: mean accuarcy = 0.415
23 | ANOVA feature selection based on F-test: mean accuarcy = 0.438
24 | Reduction into 20 components:
25 | PCA decomposition: mean accuarcy = 0.431
26 | ICA decomposition: mean accuarcy = 0.419
27 | Kernal-PCA decomposition: mean accuarcy = 0.479
28 | MDS decomposition: mean accuarcy = 0.415
29 | ANOVA feature selection based on F-test: mean accuarcy = 0.433
30 | Reduction into 50 components:
31 | PCA decomposition: mean accuarcy = 0.433
32 | ICA decomposition: mean accuarcy = 0.419
33 | Kernal-PCA decomposition: mean accuarcy = 0.419
34 | MDS decomposition: mean accuarcy = 0.452
35 | ANOVA feature selection based on F-test: mean accuarcy = 0.439
36 | Reduction into 100 components:
37 | PCA decomposition: mean accuarcy = 0.417
38 | ICA decomposition: mean accuarcy = 0.419
39 | Kernal-PCA decomposition: mean accuarcy = 0.419
40 | MDS decomposition: mean accuarcy = 0.443
41 | ANOVA feature selection based on F-test: mean accuarcy = 0.444
42 | Reduction into 150 components:
43 | PCA decomposition: mean accuarcy = 0.416
44 | ICA decomposition: mean accuarcy = 0.419
45 | Kernal-PCA decomposition: mean accuarcy = 0.419
46 | MDS decomposition: mean accuarcy = 0.453
47 | ANOVA feature selection based on F-test: mean accuarcy = 0.444
48 |
49 |
50 |
--------------------------------------------------------------------------------
/fmri_decoding/model_test_2dcnn.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SIMEXP/fmri_predict/238d9409ea4e8e35e82f87e1dedf95a78e47afcc/fmri_decoding/model_test_2dcnn.h5
--------------------------------------------------------------------------------
/fmri_decoding/model_test_2dcnn.json:
--------------------------------------------------------------------------------
1 | {"keras_version": "2.1.1", "backend": "tensorflow", "config": {"input_layers": [["input_7", 0, 0]], "output_layers": [["dense_18", 0, 0]], "name": "model_6", "layers": [{"name": "input_7", "config": {"batch_input_shape": [null, 61, 73, 1], "sparse": false, "name": "input_7", "dtype": "float32"}, "inbound_nodes": [], "class_name": "InputLayer"}, {"name": "conv2d_9", "config": {"name": "conv2d_9", "activation": "relu", "kernel_initializer": {"config": {"mode": "fan_avg", "scale": 1.0, "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "kernel_constraint": null, "kernel_regularizer": null, "kernel_size": [3, 3], "strides": [1, 1], "bias_initializer": {"config": {}, "class_name": "Zeros"}, "filters": 32, "bias_constraint": null, "dilation_rate": [1, 1], "activity_regularizer": null, "padding": "same", "bias_regularizer": null, "use_bias": true, "data_format": "channels_last", "trainable": true}, "inbound_nodes": [[["input_7", 0, 0, {}]]], "class_name": "Conv2D"}, {"name": "conv2d_10", "config": {"name": "conv2d_10", "activation": "relu", "kernel_initializer": {"config": {"mode": "fan_avg", "scale": 1.0, "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "kernel_constraint": null, "kernel_regularizer": null, "kernel_size": [3, 3], "strides": [1, 1], "bias_initializer": {"config": {}, "class_name": "Zeros"}, "filters": 32, "bias_constraint": null, "dilation_rate": [1, 1], "activity_regularizer": null, "padding": "same", "bias_regularizer": null, "use_bias": true, "data_format": "channels_last", "trainable": true}, "inbound_nodes": [[["conv2d_9", 0, 0, {}]]], "class_name": "Conv2D"}, {"name": "max_pooling2d_5", "config": {"pool_size": [2, 2], "strides": [2, 2], "name": "max_pooling2d_5", "padding": "valid", "data_format": "channels_last", "trainable": true}, "inbound_nodes": [[["conv2d_10", 0, 0, {}]]], "class_name": "MaxPooling2D"}, {"name": "dropout_19", "config": {"rate": 0.25, "noise_shape": null, "name": "dropout_19", "trainable": true, "seed": null}, "inbound_nodes": [[["max_pooling2d_5", 0, 0, {}]]], "class_name": "Dropout"}, {"name": "conv2d_11", "config": {"name": "conv2d_11", "activation": "relu", "kernel_initializer": {"config": {"mode": "fan_avg", "scale": 1.0, "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "kernel_constraint": null, "kernel_regularizer": null, "kernel_size": [3, 3], "strides": [1, 1], "bias_initializer": {"config": {}, "class_name": "Zeros"}, "filters": 64, "bias_constraint": null, "dilation_rate": [1, 1], "activity_regularizer": null, "padding": "same", "bias_regularizer": null, "use_bias": true, "data_format": "channels_last", "trainable": true}, "inbound_nodes": [[["dropout_19", 0, 0, {}]]], "class_name": "Conv2D"}, {"name": "conv2d_12", "config": {"name": "conv2d_12", "activation": "relu", "kernel_initializer": {"config": {"mode": "fan_avg", "scale": 1.0, "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "kernel_constraint": null, "kernel_regularizer": null, "kernel_size": [3, 3], "strides": [1, 1], "bias_initializer": {"config": {}, "class_name": "Zeros"}, "filters": 64, "bias_constraint": null, "dilation_rate": [1, 1], "activity_regularizer": null, "padding": "same", "bias_regularizer": null, "use_bias": true, "data_format": "channels_last", "trainable": true}, "inbound_nodes": [[["conv2d_11", 0, 0, {}]]], "class_name": "Conv2D"}, {"name": "max_pooling2d_6", "config": {"pool_size": [2, 2], "strides": [2, 2], "name": "max_pooling2d_6", "padding": "valid", "data_format": "channels_last", "trainable": true}, "inbound_nodes": [[["conv2d_12", 0, 0, {}]]], "class_name": "MaxPooling2D"}, {"name": "dropout_20", "config": {"rate": 0.25, "noise_shape": null, "name": "dropout_20", "trainable": true, "seed": null}, "inbound_nodes": [[["max_pooling2d_6", 0, 0, {}]]], "class_name": "Dropout"}, {"name": "flatten_3", "config": {"name": "flatten_3", "trainable": true}, "inbound_nodes": [[["dropout_20", 0, 0, {}]]], "class_name": "Flatten"}, {"name": "dense_17", "config": {"name": "dense_17", "activation": "relu", "kernel_initializer": {"config": {"mode": "fan_avg", "scale": 1.0, "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "kernel_constraint": null, "kernel_regularizer": null, "units": 128, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "bias_constraint": null, "activity_regularizer": null, "bias_regularizer": null, "use_bias": true, "trainable": true}, "inbound_nodes": [[["flatten_3", 0, 0, {}]]], "class_name": "Dense"}, {"name": "dropout_21", "config": {"rate": 0.5, "noise_shape": null, "name": "dropout_21", "trainable": true, "seed": null}, "inbound_nodes": [[["dense_17", 0, 0, {}]]], "class_name": "Dropout"}, {"name": "dense_18", "config": {"name": "dense_18", "activation": "softmax", "kernel_initializer": {"config": {"mode": "fan_avg", "scale": 1.0, "distribution": "uniform", "seed": null}, "class_name": "VarianceScaling"}, "kernel_constraint": null, "kernel_regularizer": null, "units": 3, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "bias_constraint": null, "activity_regularizer": null, "bias_regularizer": null, "use_bias": true, "trainable": true}, "inbound_nodes": [[["dropout_21", 0, 0, {}]]], "class_name": "Dense"}]}, "class_name": "Model"}
--------------------------------------------------------------------------------
/fmri_decoding/test_gcn_training.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stderr",
10 | "output_type": "stream",
11 | "text": [
12 | "Using TensorFlow backend.\n"
13 | ]
14 | }
15 | ],
16 | "source": [
17 | "from keras.layers import Input, Dropout\n",
18 | "from keras.models import Model\n",
19 | "from keras.optimizers import Adam\n",
20 | "from keras.regularizers import l2\n",
21 | "import time"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 2,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "###graph cnn model\n",
31 | "%matplotlib inline\n",
32 | "%run -i '/data/cisl/raw_data/midnight/Rawdata/nii_data/codes/keras-gcn-master/kegra/layers/graph.py'\n",
33 | "%run -i '/data/cisl/raw_data/midnight/Rawdata/nii_data/codes/keras-gcn-master/kegra/utils.py'"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 6,
39 | "metadata": {},
40 | "outputs": [
41 | {
42 | "name": "stdout",
43 | "output_type": "stream",
44 | "text": [
45 | "Loading cora dataset...\n",
46 | "Dataset has 2708 nodes, 5429 edges, 1433 features.\n"
47 | ]
48 | }
49 | ],
50 | "source": [
51 | "path=\"/data/cisl/raw_data/midnight/Rawdata/nii_data/codes/keras-gcn-master/kegra/data/cora/\"\n",
52 | "dataset='cora'\n",
53 | "print('Loading {} dataset...'.format(dataset))\n",
54 | "\n",
55 | "idx_features_labels = np.genfromtxt(\"{}{}.content\".format(path, dataset), dtype=np.dtype(str))\n",
56 | "features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32)\n",
57 | "labels = encode_onehot(idx_features_labels[:, -1])\n",
58 | "\n",
59 | "# build graph\n",
60 | "idx = np.array(idx_features_labels[:, 0], dtype=np.int32)\n",
61 | "idx_map = {j: i for i, j in enumerate(idx)}\n",
62 | "edges_unordered = np.genfromtxt(\"{}{}.cites\".format(path, dataset), dtype=np.int32)\n",
63 | "edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),\n",
64 | " dtype=np.int32).reshape(edges_unordered.shape)\n",
65 | "adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),\n",
66 | " shape=(labels.shape[0], labels.shape[0]), dtype=np.float32)\n",
67 | "\n",
68 | "# build symmetric adjacency matrix\n",
69 | "adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)\n",
70 | "\n",
71 | "print('Dataset has {} nodes, {} edges, {} features.'.format(adj.shape[0], edges.shape[0], features.shape[1]))\n"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 10,
77 | "metadata": {},
78 | "outputs": [
79 | {
80 | "name": "stdout",
81 | "output_type": "stream",
82 | "text": [
83 | "Loading cora dataset...\n",
84 | "Dataset has 2708 nodes, 5429 edges, 1433 features.\n",
85 | "(2708, 1433) (2708, 7) (2708, 7)\n"
86 | ]
87 | }
88 | ],
89 | "source": [
90 | "# Define parameters\n",
91 | "DATAPATH = \"/data/cisl/raw_data/midnight/Rawdata/nii_data/codes/keras-gcn-master/kegra/data/cora/\"\n",
92 | "DATASET = 'cora'\n",
93 | "FILTER = 'chebyshev'\n",
94 | "MAX_DEGREE = 2 # maximum polynomial degree\n",
95 | "SYM_NORM = True # symmetric (True) vs. left-only (False) normalization\n",
96 | "NB_EPOCH = 200\n",
97 | "PATIENCE = 10 # early stopping patience\n",
98 | "\n",
99 | "# Get data\n",
100 | "X, A, y = load_data(path=DATAPATH,dataset=DATASET)\n",
101 | "y_train, y_val, y_test, idx_train, idx_val, idx_test, train_mask = get_splits(y)\n",
102 | "\n",
103 | "# Normalize X\n",
104 | "X /= X.sum(1).reshape(-1, 1)\n",
105 | "print(X.shape,y.shape,y_train.shape)\n",
106 | "\n"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 12,
112 | "metadata": {},
113 | "outputs": [
114 | {
115 | "name": "stdout",
116 | "output_type": "stream",
117 | "text": [
118 | "Using Chebyshev polynomial basis filters...\n",
119 | "Calculating largest eigenvalue of normalized graph Laplacian...\n",
120 | "Calculating Chebyshev polynomials up to order 2...\n"
121 | ]
122 | },
123 | {
124 | "ename": "ValueError",
125 | "evalue": "Dimensions must be equal, but are 4299 and 1433 for 'graph_convolution_2/MatMul' (op: 'MatMul') with input shapes: [?,4299], [1433,16].",
126 | "output_type": "error",
127 | "traceback": [
128 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
129 | "\u001b[0;31mInvalidArgumentError\u001b[0m Traceback (most recent call last)",
130 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py\u001b[0m in \u001b[0;36m_call_cpp_shape_fn_impl\u001b[0;34m(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)\u001b[0m\n\u001b[1;32m 685\u001b[0m \u001b[0mgraph_def_version\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode_def_str\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_shapes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_tensors\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 686\u001b[0;31m input_tensors_as_shapes, status)\n\u001b[0m\u001b[1;32m 687\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mInvalidArgumentError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
131 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/errors_impl.py\u001b[0m in \u001b[0;36m__exit__\u001b[0;34m(self, type_arg, value_arg, traceback_arg)\u001b[0m\n\u001b[1;32m 472\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc_api\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_Message\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 473\u001b[0;31m c_api.TF_GetCode(self.status.status))\n\u001b[0m\u001b[1;32m 474\u001b[0m \u001b[0;31m# Delete the underlying status object from memory otherwise it stays alive\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
132 | "\u001b[0;31mInvalidArgumentError\u001b[0m: Dimensions must be equal, but are 4299 and 1433 for 'graph_convolution_2/MatMul' (op: 'MatMul') with input shapes: [?,4299], [1433,16].",
133 | "\nDuring handling of the above exception, another exception occurred:\n",
134 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
135 | "\u001b[0;32m/data/cisl/raw_data/midnight/Rawdata/nii_data/codes/keras-gcn-master/kegra/utils.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0;31m# This is somewhat hacky, more elegant options would require rewriting the Layer base class.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0mH\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDropout\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_in\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 28\u001b[0;31m \u001b[0mH\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGraphConvolution\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m16\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msupport\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mactivation\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'relu'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkernel_regularizer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0ml2\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5e-4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mH\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 29\u001b[0m \u001b[0mH\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDropout\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mH\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0mY\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGraphConvolution\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msupport\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mactivation\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'softmax'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mH\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
136 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/keras/engine/topology.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs, **kwargs)\u001b[0m\n\u001b[1;32m 601\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 602\u001b[0m \u001b[0;31m# Actually call the layer, collecting output(s), mask(s), and shape(s).\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 603\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 604\u001b[0m \u001b[0moutput_mask\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_mask\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprevious_mask\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 605\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
137 | "\u001b[0;32m/data/cisl/raw_data/midnight/Rawdata/nii_data/codes/keras-gcn-master/kegra/utils.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, inputs, mask)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0msupports\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mK\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbasis\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeatures\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0msupports\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mK\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcatenate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msupports\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mK\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msupports\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
138 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36mdot\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m 1050\u001b[0m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msparse_tensor_dense_matmul\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1051\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1052\u001b[0;31m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmatmul\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1053\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1054\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
139 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py\u001b[0m in \u001b[0;36mmatmul\u001b[0;34m(a, b, transpose_a, transpose_b, adjoint_a, adjoint_b, a_is_sparse, b_is_sparse, name)\u001b[0m\n\u001b[1;32m 1889\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1890\u001b[0m return gen_math_ops._mat_mul(\n\u001b[0;32m-> 1891\u001b[0;31m a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)\n\u001b[0m\u001b[1;32m 1892\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1893\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
140 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gen_math_ops.py\u001b[0m in \u001b[0;36m_mat_mul\u001b[0;34m(a, b, transpose_a, transpose_b, name)\u001b[0m\n\u001b[1;32m 2435\u001b[0m _, _, _op = _op_def_lib._apply_op_helper(\n\u001b[1;32m 2436\u001b[0m \u001b[0;34m\"MatMul\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtranspose_a\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtranspose_a\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtranspose_b\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtranspose_b\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2437\u001b[0;31m name=name)\n\u001b[0m\u001b[1;32m 2438\u001b[0m \u001b[0m_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_op\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2439\u001b[0m \u001b[0m_inputs_flat\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_op\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
141 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py\u001b[0m in \u001b[0;36m_apply_op_helper\u001b[0;34m(self, op_type_name, name, **keywords)\u001b[0m\n\u001b[1;32m 785\u001b[0m op = g.create_op(op_type_name, inputs, output_types, name=scope,\n\u001b[1;32m 786\u001b[0m \u001b[0minput_types\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minput_types\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mattr_protos\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 787\u001b[0;31m op_def=op_def)\n\u001b[0m\u001b[1;32m 788\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0moutput_structure\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop_def\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_stateful\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 789\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
142 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py\u001b[0m in \u001b[0;36mcreate_op\u001b[0;34m(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device)\u001b[0m\n\u001b[1;32m 2956\u001b[0m op_def=op_def)\n\u001b[1;32m 2957\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcompute_shapes\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2958\u001b[0;31m \u001b[0mset_shapes_for_outputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mret\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2959\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_add_op\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mret\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2960\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_record_op_seen_by_control_dependencies\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mret\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
143 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py\u001b[0m in \u001b[0;36mset_shapes_for_outputs\u001b[0;34m(op)\u001b[0m\n\u001b[1;32m 2207\u001b[0m \u001b[0mshape_func\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_call_cpp_shape_fn_and_require_op\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2208\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2209\u001b[0;31m \u001b[0mshapes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mshape_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2210\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mshapes\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2211\u001b[0m raise RuntimeError(\n",
144 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py\u001b[0m in \u001b[0;36mcall_with_requiring\u001b[0;34m(op)\u001b[0m\n\u001b[1;32m 2157\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2158\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcall_with_requiring\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2159\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mcall_cpp_shape_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequire_shape_fn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2160\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2161\u001b[0m \u001b[0m_call_cpp_shape_fn_and_require_op\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcall_with_requiring\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
145 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py\u001b[0m in \u001b[0;36mcall_cpp_shape_fn\u001b[0;34m(op, require_shape_fn)\u001b[0m\n\u001b[1;32m 625\u001b[0m res = _call_cpp_shape_fn_impl(op, input_tensors_needed,\n\u001b[1;32m 626\u001b[0m \u001b[0minput_tensors_as_shapes_needed\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 627\u001b[0;31m require_shape_fn)\n\u001b[0m\u001b[1;32m 628\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 629\u001b[0m \u001b[0;31m# Handles the case where _call_cpp_shape_fn_impl calls unknown_shape(op).\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
146 | "\u001b[0;32m/mnt/home_sq/yzhang/python_venv/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py\u001b[0m in \u001b[0;36m_call_cpp_shape_fn_impl\u001b[0;34m(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)\u001b[0m\n\u001b[1;32m 689\u001b[0m \u001b[0mmissing_shape_fn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 690\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 691\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 692\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 693\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmissing_shape_fn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
147 | "\u001b[0;31mValueError\u001b[0m: Dimensions must be equal, but are 4299 and 1433 for 'graph_convolution_2/MatMul' (op: 'MatMul') with input shapes: [?,4299], [1433,16]."
148 | ]
149 | }
150 | ],
151 | "source": [
152 | "if FILTER == 'localpool':\n",
153 | " \"\"\" Local pooling filters (see 'renormalization trick' in Kipf & Welling, arXiv 2016) \"\"\"\n",
154 | " print('Using local pooling filters...')\n",
155 | " A_ = preprocess_adj(A, SYM_NORM)\n",
156 | " support = 1\n",
157 | " graph = [X, A_]\n",
158 | " G = [Input(shape=(None, None), batch_shape=(None, None), sparse=True)]\n",
159 | "\n",
160 | "elif FILTER == 'chebyshev':\n",
161 | " \"\"\" Chebyshev polynomial basis filters (Defferard et al., NIPS 2016) \"\"\"\n",
162 | " print('Using Chebyshev polynomial basis filters...')\n",
163 | " L = normalized_laplacian(A, SYM_NORM)\n",
164 | " L_scaled = rescale_laplacian(L)\n",
165 | " T_k = chebyshev_polynomial(L_scaled, MAX_DEGREE)\n",
166 | " support = MAX_DEGREE + 1\n",
167 | " graph = [X]+T_k\n",
168 | " G = [Input(shape=(None, None), batch_shape=(None, None), sparse=True) for _ in range(support)]\n",
169 | "\n",
170 | "else:\n",
171 | " raise Exception('Invalid filter type.')\n",
172 | "\n",
173 | "X_in = Input(shape=(X.shape[1],))\n",
174 | "\n",
175 | "# Define model architecture\n",
176 | "# NOTE: We pass arguments for graph convolutional layers as a list of tensors.\n",
177 | "# This is somewhat hacky, more elegant options would require rewriting the Layer base class.\n",
178 | "H = Dropout(0.5)(X_in)\n",
179 | "H = GraphConvolution(16, support, activation='relu', kernel_regularizer=l2(5e-4))([H]+G)\n",
180 | "H = Dropout(0.5)(H)\n",
181 | "Y = GraphConvolution(y.shape[1], support, activation='softmax')([H]+G)\n",
182 | "\n",
183 | "# Compile model\n",
184 | "model = Model(inputs=[X_in]+G, outputs=Y)\n",
185 | "model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01))\n",
186 | "model.summary()\n",
187 | "\n"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": null,
193 | "metadata": {},
194 | "outputs": [],
195 | "source": [
196 | "# Helper variables for main training loop\n",
197 | "wait = 0\n",
198 | "preds = None\n",
199 | "best_val_loss = 99999\n",
200 | "\n",
201 | "# Fit\n",
202 | "for epoch in range(1, NB_EPOCH+1):\n",
203 | "\n",
204 | " # Log wall-clock time\n",
205 | " t = time.time()\n",
206 | "\n",
207 | " # Single training iteration (we mask nodes without labels for loss calculation)\n",
208 | " model.fit(graph, y_train, sample_weight=train_mask,\n",
209 | " batch_size=A.shape[0], epochs=1, shuffle=False, verbose=0)\n",
210 | "\n",
211 | " # Predict on full dataset\n",
212 | " preds = model.predict(graph, batch_size=A.shape[0])\n",
213 | "\n",
214 | " # Train / validation scores\n",
215 | " train_val_loss, train_val_acc = evaluate_preds(preds, [y_train, y_val],\n",
216 | " [idx_train, idx_val])\n",
217 | " print(\"Epoch: {:04d}\".format(epoch),\n",
218 | " \"train_loss= {:.4f}\".format(train_val_loss[0]),\n",
219 | " \"train_acc= {:.4f}\".format(train_val_acc[0]),\n",
220 | " \"val_loss= {:.4f}\".format(train_val_loss[1]),\n",
221 | " \"val_acc= {:.4f}\".format(train_val_acc[1]),\n",
222 | " \"time= {:.4f}\".format(time.time() - t))\n",
223 | "\n",
224 | " # Early stopping\n",
225 | " if train_val_loss[1] < best_val_loss:\n",
226 | " best_val_loss = train_val_loss[1]\n",
227 | " wait = 0\n",
228 | " else:\n",
229 | " if wait >= PATIENCE:\n",
230 | " print('Epoch {}: early stopping'.format(epoch))\n",
231 | " break\n",
232 | " wait += 1\n",
233 | "\n",
234 | "# Testing\n",
235 | "test_loss, test_acc = evaluate_preds(preds, [y_test], [idx_test])\n",
236 | "print(\"Test set results:\",\n",
237 | " \"loss= {:.4f}\".format(test_loss[0]),\n",
238 | " \"accuracy= {:.4f}\".format(test_acc[0]))"
239 | ]
240 | }
241 | ],
242 | "metadata": {
243 | "kernelspec": {
244 | "display_name": "Python 3",
245 | "language": "python",
246 | "name": "tensorflow_gpu_test"
247 | },
248 | "language_info": {
249 | "codemirror_mode": {
250 | "name": "ipython",
251 | "version": 3
252 | },
253 | "file_extension": ".py",
254 | "mimetype": "text/x-python",
255 | "name": "python",
256 | "nbconvert_exporter": "python",
257 | "pygments_lexer": "ipython3",
258 | "version": "3.5.2"
259 | }
260 | },
261 | "nbformat": 4,
262 | "nbformat_minor": 2
263 | }
264 |
--------------------------------------------------------------------------------
/linear_model/project_update_readme.txt:
--------------------------------------------------------------------------------
1 | Project Updates:
2 | website: https://github.com/SIMEXP/fmri_predict/blob/master/linear_model/
3 | using linear models to predict fMRI activation patterns (motor task-hand movement) using resting-state functional connectivity
4 |
5 | The MIST_ROI atlas with 210 regions were used to extract fMRI signals
6 | 1) features: 210*210 region-to-region correlation matrix
7 | 2) output: 210 z-scores from GLM to indicate the probability of brain activation within each region
8 | 3) model: SVR with linear kernel (from sklearn), one seperate model for each region
9 | 4) data:
10 | 10 sessions of rs-fMRI scans from each subject, dynamic functional connectivity with spliding window size=10mins were used
11 | 10 session of two runs of task-fMRI scans, z-score maps from GLM
12 | 5) training: using both cross-validation (10-fold) and train-test-split from sklearn
13 |
14 | 6) estimation: either for each region (using MSE) or whole-brain (using correlation)
15 | for individual region: different models are trained, sometimes lasso/Enet performed better than SVR; mean MSE=0.5
16 | for whole-brain: correlation between estimated and true activation scores: r=0.3069
17 |
--------------------------------------------------------------------------------
/plot_conn_matrix_surchs.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import os\n",
10 | "import itertools\n",
11 | "import numpy as np\n",
12 | "import pandas as pd\n",
13 | "import nibabel as nib\n",
14 | "import matplotlib.gridspec as gs\n",
15 | "from matplotlib import colors as mc\n",
16 | "from nilearn import plotting as nlp\n",
17 | "from matplotlib import pyplot as plt\n",
18 | "from matplotlib.colors import LinearSegmentedColormap"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 1,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "def make_boxes(mat, cl_def, pad=1, edge=False):\n",
28 | " \"\"\"\n",
29 | " mat: the matrix you want to do stuff to\n",
30 | " cl_def: a list of tuples where the first position is the\n",
31 | " index of the first element in the cluster. the\n",
32 | " second position is the index of the last element\n",
33 | " in the cluster\n",
34 | " pad: an integer value for the number of zero spaces to add\n",
35 | " around clusters\n",
36 | " edge: boolean argument. If True, clusters at the corners will\n",
37 | " be drawn full. If False, clusters will be only drawn on\n",
38 | " the inside edge (no white line around the matrix).\n",
39 | " \n",
40 | " returns:\n",
41 | " omat: the input matrix with the spaces added\n",
42 | " cmat_m: the overlayed cluster boxes in a masked array\n",
43 | " lmat_m: a mask of the added empty spaces\n",
44 | " ind: the new index positions for the data (for x_ticks...)\n",
45 | " \"\"\"\n",
46 | " # Sort the cluster definitions based on the start point\n",
47 | " order = np.argsort([i[0] for i in cl_def])\n",
48 | " cl_def = [(i[0], i[1]) for i in np.array(cl_def)[order]]\n",
49 | " # Extract the values\n",
50 | " if edge:\n",
51 | " starts = [i[0] for i in cl_def]\n",
52 | " stops = [i[1]+1 for i in cl_def]\n",
53 | " else:\n",
54 | " starts = [i[0] for i in cl_def if not i[0]==0]\n",
55 | " stops = [i[1]+1 for i in cl_def if not i[1]+1>=mat.shape[0]]\n",
56 | " \n",
57 | " # Find the breakpoints\n",
58 | " bkp = list(np.unique(starts + stops))\n",
59 | " n_bkp = len(bkp)\n",
60 | " # Convert to new indices\n",
61 | " run = 0\n",
62 | " ind = list()\n",
63 | " for i in np.arange(mat.shape[0]):\n",
64 | " if i in bkp:\n",
65 | " run += pad\n",
66 | " ind.append(i+run)\n",
67 | "\n",
68 | " # Make a grid index\n",
69 | " x = [i[0] for i in itertools.product(ind, ind)]\n",
70 | " y = [i[1] for i in itertools.product(ind, ind)]\n",
71 | "\n",
72 | " # Create the output matrices\n",
73 | " omat = np.zeros([i+n_bkp*pad for i in mat.shape])\n",
74 | " cmat = np.zeros_like(omat)\n",
75 | " lmat = np.zeros_like(omat, dtype=bool)\n",
76 | " \n",
77 | " # Assign input mat to grid index\n",
78 | " omat[x, y] = mat.flatten()\n",
79 | " # Mask grid index for the line mask\n",
80 | " lmat[x,y] = True\n",
81 | " lmat_m = np.ma.masked_where(lmat, lmat)\n",
82 | " \n",
83 | " # Convert the input based breakpoints to the new index\n",
84 | " starts_c = [ind[i[0]]-pad for i in cl_def]\n",
85 | " stops_c = [ind[i[1]]+1 for i in cl_def]\n",
86 | " # Loop through the breakpoints\n",
87 | " for i in np.arange(len(starts_c)):\n",
88 | " start = starts_c[i]\n",
89 | " stop = stops_c[i]\n",
90 | " # Select the range of rows and columns to paint\n",
91 | " start_ind = np.arange(start, start+pad)\n",
92 | " stop_ind = np.arange(stop, stop+pad)\n",
93 | " \n",
94 | " # If this isn't an edge cluster or we paint them\n",
95 | " if not start<=0 or edge:\n",
96 | " # Draw the top left corner first\n",
97 | " cmat[start_ind, start:stop] = i+1\n",
98 | " cmat[start:stop, start_ind] = i+1\n",
99 | " # if this is an edge cluster and we don't paint them\n",
100 | " # only paint the bottom right corner but from the start\n",
101 | " else:\n",
102 | " # Draw the bottom right corner next\n",
103 | " cmat[stop_ind, :stop+pad] = i+1\n",
104 | " cmat[:stop+pad, stop_ind] = i+1\n",
105 | " continue\n",
106 | " if not stop>=omat.shape[0] or edge:\n",
107 | " # Draw the bottom right corner next\n",
108 | " cmat[stop_ind, start:stop+pad] = i+1\n",
109 | " cmat[start:stop+pad, stop_ind] = i+1\n",
110 | " # Mask the cluster matrix\n",
111 | " cmat_m = np.ma.masked_where(cmat==0, cmat)\n",
112 | " return omat, cmat_m, lmat_m, ind"
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": null,
118 | "metadata": {},
119 | "outputs": [],
120 | "source": [
121 | "# Visualize\n",
122 | "low = 0\n",
123 | "high = -1\n",
124 | "f = plt.figure(figsize=(15, 15), frameon=False)\n",
125 | "ax = f.add_subplot(111)\n",
126 | "ab = ax.matshow(o7[low:high, low:high], vmin=0, vmax=0.8, cmap=plt.cm.viridis, aspect='auto')\n",
127 | "ab = ax.matshow(l7[low:high, low:high], cmap=plt.cm.Greys_r, aspect='auto', alpha=1)\n",
128 | "ab = ax.matshow(l7[low:high, low:high], cmap=plt.cm.Greys, aspect='auto', alpha=1)\n",
129 | "ab = ax.matshow(c7[low:high, low:high], cmap=lin7, vmin=1, vmax=7, aspect='auto')\n",
130 | "\n",
131 | "ab = ax.set_xticks([])\n",
132 | "ab = ax.set_yticks([])\n",
133 | "ax.set_axis_off()\n",
134 | "f.savefig(os.path.join(fig_p, 's7_full.png'), dpi=300, bbox_inches='tight', pad_inches=0)"
135 | ]
136 | }
137 | ],
138 | "metadata": {
139 | "kernelspec": {
140 | "display_name": "Python 3",
141 | "language": "python",
142 | "name": "tensorflow_gpu_test"
143 | },
144 | "language_info": {
145 | "codemirror_mode": {
146 | "name": "ipython",
147 | "version": 3
148 | },
149 | "file_extension": ".py",
150 | "mimetype": "text/x-python",
151 | "name": "python",
152 | "nbconvert_exporter": "python",
153 | "pygments_lexer": "ipython3",
154 | "version": "3.5.2"
155 | }
156 | },
157 | "nbformat": 4,
158 | "nbformat_minor": 2
159 | }
160 |
--------------------------------------------------------------------------------