├── .gitignore
├── 2020_materials
    ├── data
    │   ├── cpc_camcan_demographics.csv
    │   ├── cpc_camcan_demographics_nordan.csv
    │   ├── cpc_camcan_features.csv
    │   └── cpc_camcan_features_nordan.csv
    ├── tasks
    │   └── cpc_normative_modeling_instructions.ipynb
    └── tasks_key
    │   └── key_cpc_machinelearning.ipynb
├── README.md
├── data
    ├── Z_estimates.csv
    ├── Z_estimates_counts.csv
    ├── Z_long_format.csv
    ├── fcon1000_te.csv
    ├── fcon1000_te_Z.csv
    ├── fcon1000_tr.csv
    ├── nilearn_order.csv
    ├── phenotypes_lh.txt
    ├── phenotypes_rh.txt
    ├── phenotypes_sc.txt
    ├── sz_ct.npy
    ├── sz_labels.npy
    ├── sz_z.npy
    ├── task1_phenotypes.txt
    ├── test_data.csv
    └── train_data.csv
├── nm_utils.py
├── presentation
    ├── GPU.png
    ├── How_nm_compressed2020.pdf
    ├── Normative_Modeling_a_Framework_for_Clinical_Machinelearning.pdf
    ├── Runtime1.png
    ├── Runtime2.png
    ├── keyboard_pref.png
    ├── settings1.png
    └── settings2.png
└── tasks
    ├── 1_fit_normative_models.ipynb
    ├── 2_apply_normative_models.ipynb
    ├── 3_Visualizations.ipynb
    └── 4_post_hoc_analysis.ipynb


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .ipynb_checkpoints
3 | tasks/.ipynb_checkpoints/
4 | tasks_key/.ipynb_checkpoints/
5 | .DS_Store


--------------------------------------------------------------------------------
/2020_materials/data/cpc_camcan_demographics.csv:
--------------------------------------------------------------------------------
  1 | paricipants,age,sex_name,sex,IQ_random
  2 | CC110033,24,MALE,1,73
  3 | CC110037,18,MALE,1,103
  4 | CC110045,24,FEMALE,0,124
  5 | CC110056,22,FEMALE,0,124
  6 | CC110062,20,MALE,1,126
  7 | CC110069,28,FEMALE,0,140
  8 | CC110087,28,FEMALE,0,86
  9 | CC110098,23,MALE,1,108
 10 | CC110101,23,MALE,1,80
 11 | CC110126,22,FEMALE,0,89
 12 | CC110174,25,FEMALE,0,139
 13 | CC110182,18,FEMALE,0,80
 14 | CC110187,25,FEMALE,0,110
 15 | CC110319,28,FEMALE,0,110
 16 | CC110411,25,MALE,1,95
 17 | CC110606,20,MALE,1,124
 18 | CC112141,29,MALE,1,137
 19 | CC120008,26,MALE,1,114
 20 | CC120049,28,MALE,1,147
 21 | CC120061,19,MALE,1,130
 22 | CC120065,25,FEMALE,0,108
 23 | CC120120,25,MALE,1,123
 24 | CC120123,19,FEMALE,0,79
 25 | CC120137,18,MALE,1,99
 26 | CC120166,28,MALE,1,103
 27 | CC120182,26,MALE,1,89
 28 | CC120184,25,FEMALE,0,128
 29 | CC120208,24,FEMALE,0,98
 30 | CC120212,20,MALE,1,131
 31 | CC120218,27,FEMALE,0,86
 32 | CC120234,24,FEMALE,0,109
 33 | CC120264,28,MALE,1,131
 34 | CC120276,23,FEMALE,0,91
 35 | CC120286,22,MALE,1,138
 36 | CC120309,27,MALE,1,81
 37 | CC120313,28,MALE,1,132
 38 | CC120319,27,FEMALE,0,101
 39 | CC120347,21,FEMALE,0,124
 40 | CC120376,18,FEMALE,0,104
 41 | CC120409,18,MALE,1,137
 42 | CC120462,18,FEMALE,0,116
 43 | CC120469,25,FEMALE,0,88
 44 | CC120470,28,FEMALE,0,70
 45 | CC120550,19,MALE,1,120
 46 | CC120640,26,FEMALE,0,122
 47 | CC120727,23,FEMALE,0,135
 48 | CC120764,27,MALE,1,145
 49 | CC120795,24,MALE,1,121
 50 | CC120816,23,MALE,1,122
 51 | CC120987,20,FEMALE,0,81
 52 | CC121106,28,FEMALE,0,125
 53 | CC121111,18,MALE,1,102
 54 | CC121144,26,MALE,1,95
 55 | CC121158,28,FEMALE,0,117
 56 | CC121194,24,FEMALE,0,142
 57 | CC121200,26,FEMALE,0,87
 58 | CC121317,25,FEMALE,0,98
 59 | CC121397,27,MALE,1,129
 60 | CC121411,26,FEMALE,0,122
 61 | CC121428,26,FEMALE,0,146
 62 | CC121479,26,FEMALE,0,83
 63 | CC121685,20,MALE,1,129
 64 | CC121795,25,FEMALE,0,75
 65 | CC122016,23,MALE,1,136
 66 | CC122172,27,MALE,1,78
 67 | CC122405,27,MALE,1,88
 68 | CC122620,24,FEMALE,0,150
 69 | CC210023,31,MALE,1,71
 70 | CC210051,35,FEMALE,0,101
 71 | CC210088,36,FEMALE,0,92
 72 | CC210124,35,FEMALE,0,112
 73 | CC210148,30,FEMALE,0,106
 74 | CC210172,31,FEMALE,0,78
 75 | CC210174,30,MALE,1,104
 76 | CC210182,31,FEMALE,0,125
 77 | CC210250,29,MALE,1,123
 78 | CC210304,32,FEMALE,0,88
 79 | CC210314,34,FEMALE,0,110
 80 | CC210422,34,MALE,1,132
 81 | CC210519,29,FEMALE,0,122
 82 | CC210526,37,MALE,1,118
 83 | CC210617,34,MALE,1,88
 84 | CC210657,37,MALE,1,89
 85 | CC212153,29,FEMALE,0,76
 86 | CC220098,32,FEMALE,0,116
 87 | CC220107,34,MALE,1,108
 88 | CC220115,29,FEMALE,0,137
 89 | CC220132,31,MALE,1,126
 90 | CC220151,30,MALE,1,138
 91 | CC220198,37,FEMALE,0,144
 92 | CC220203,34,FEMALE,0,125
 93 | CC220223,33,MALE,1,119
 94 | CC220232,36,FEMALE,0,134
 95 | CC220234,35,MALE,1,133
 96 | CC220284,34,FEMALE,0,149
 97 | CC220323,33,FEMALE,0,77
 98 | CC220335,34,MALE,1,112
 99 | CC220352,29,FEMALE,0,75
100 | CC220372,37,MALE,1,117
101 | CC220394,33,FEMALE,0,119
102 | CC220419,31,MALE,1,117
103 | CC220506,35,FEMALE,0,136
104 | CC220511,32,FEMALE,0,116
105 | CC220518,30,MALE,1,89
106 | CC220519,29,FEMALE,0,148
107 | CC220526,28,FEMALE,0,93
108 | CC220535,32,MALE,1,88
109 | CC220567,37,MALE,1,134
110 | CC220610,32,FEMALE,0,140
111 | CC220635,36,FEMALE,0,111
112 | CC220697,35,MALE,1,146
113 | CC220713,33,FEMALE,0,101
114 | CC220806,35,FEMALE,0,98
115 | CC220828,33,FEMALE,0,150
116 | CC220843,37,MALE,1,139
117 | CC220901,35,FEMALE,0,98
118 | CC220920,34,FEMALE,0,112
119 | CC220974,37,MALE,1,78
120 | CC220999,37,MALE,1,100
121 | CC221002,37,FEMALE,0,101
122 | CC221031,38,MALE,1,102
123 | CC221033,28,FEMALE,0,144
124 | CC221038,29,MALE,1,77
125 | CC221040,36,MALE,1,142
126 | CC221054,31,MALE,1,122
127 | CC221107,35,MALE,1,94
128 | CC221209,29,FEMALE,0,94
129 | CC221220,37,FEMALE,0,124
130 | CC221244,32,FEMALE,0,79
131 | CC221324,36,MALE,1,140
132 | CC221336,34,MALE,1,126
133 | CC221352,37,FEMALE,0,134
134 | CC221373,29,MALE,1,87
135 | CC221487,35,MALE,1,136
136 | CC221511,36,FEMALE,0,143
137 | CC221527,34,FEMALE,0,134
138 | CC221565,32,FEMALE,0,71
139 | CC221580,31,FEMALE,0,84
140 | CC221585,37,MALE,1,114
141 | CC221595,33,MALE,1,124
142 | CC221648,30,FEMALE,0,143
143 | CC221733,36,MALE,1,136
144 | CC221737,34,FEMALE,0,149
145 | CC221740,35,FEMALE,0,131
146 | CC221755,32,MALE,1,87
147 | CC221775,32,FEMALE,0,126
148 | CC221828,32,FEMALE,0,109
149 | CC221886,33,MALE,1,92
150 | CC221935,37,MALE,1,110
151 | CC221954,32,FEMALE,0,133
152 | CC221977,37,MALE,1,81
153 | CC221980,34,MALE,1,141
154 | CC222120,37,MALE,1,106
155 | CC222125,34,MALE,1,110
156 | CC222185,32,FEMALE,0,102
157 | CC222258,35,FEMALE,0,102
158 | CC222264,37,MALE,1,90
159 | CC222304,38,MALE,1,148
160 | CC222326,35,MALE,1,79
161 | CC222367,37,MALE,1,149
162 | CC222496,38,MALE,1,122
163 | CC222555,29,MALE,1,133
164 | CC222652,30,MALE,1,115
165 | CC222797,31,FEMALE,0,119
166 | CC222956,32,FEMALE,0,128
167 | CC223085,38,MALE,1,144
168 | CC223115,36,MALE,1,101
169 | CC223286,36,MALE,1,137
170 | CC310008,45,MALE,1,122
171 | CC310051,42,FEMALE,0,85
172 | CC310052,42,FEMALE,0,89
173 | CC310086,47,FEMALE,0,146
174 | CC310127,47,FEMALE,0,72
175 | CC310129,40,MALE,1,112
176 | CC310135,42,FEMALE,0,100
177 | CC310142,48,MALE,1,137
178 | CC310160,41,MALE,1,136
179 | CC310203,39,FEMALE,0,84
180 | CC310214,46,FEMALE,0,110
181 | CC310224,47,FEMALE,0,79
182 | CC310252,47,MALE,1,124
183 | CC310256,44,MALE,1,119
184 | CC310263,44,MALE,1,88
185 | CC310331,46,MALE,1,118
186 | CC310361,43,FEMALE,0,146
187 | CC310385,48,MALE,1,149
188 | CC310391,41,MALE,1,143
189 | CC310397,41,FEMALE,0,136
190 | CC310400,43,MALE,1,122
191 | CC310402,46,FEMALE,0,71
192 | CC310407,39,FEMALE,0,148
193 | CC310410,41,FEMALE,0,142
194 | CC310414,40,MALE,1,135
195 | CC310450,46,FEMALE,0,105
196 | CC310463,45,FEMALE,0,97
197 | CC310473,46,MALE,1,98
198 | CC312058,47,MALE,1,150
199 | CC312149,45,MALE,1,129
200 | CC312222,48,FEMALE,0,94
201 | CC320002,46,MALE,1,148
202 | CC320022,40,MALE,1,140
203 | CC320036,45,FEMALE,0,131
204 | CC320059,48,MALE,1,92
205 | CC320077,46,FEMALE,0,130
206 | CC320088,41,MALE,1,148
207 | CC320089,47,FEMALE,0,112
208 | CC320107,47,MALE,1,87
209 | CC320109,39,MALE,1,146
210 | CC320116,42,MALE,1,127
211 | CC320160,48,FEMALE,0,133
212 | CC320199,45,MALE,1,70
213 | CC320202,45,MALE,1,141
214 | CC320206,47,MALE,1,97
215 | CC320218,47,FEMALE,0,114
216 | CC320267,38,MALE,1,116
217 | CC320269,42,FEMALE,0,88
218 | CC320297,40,MALE,1,135
219 | CC320321,40,FEMALE,0,96
220 | CC320325,44,FEMALE,0,106
221 | CC320336,44,FEMALE,0,100
222 | CC320342,40,FEMALE,0,81
223 | CC320359,47,MALE,1,99
224 | CC320361,41,FEMALE,0,79
225 | CC320379,48,MALE,1,117
226 | CC320400,48,FEMALE,0,109
227 | CC320417,39,MALE,1,146
228 | CC320428,45,MALE,1,146
229 | CC320429,48,MALE,1,119
230 | CC320445,47,FEMALE,0,117
231 | CC320448,42,MALE,1,128
232 | CC320461,42,MALE,1,72
233 | CC320478,40,MALE,1,99
234 | CC320500,46,MALE,1,133
235 | CC320553,48,FEMALE,0,108
236 | CC320568,44,FEMALE,0,76
237 | CC320574,40,MALE,1,88
238 | CC320575,39,FEMALE,0,95
239 | CC320576,45,FEMALE,0,96
240 | CC320608,45,MALE,1,96
241 | CC320616,39,MALE,1,113
242 | CC320621,46,FEMALE,0,143
243 | CC320636,45,MALE,1,84
244 | CC320651,42,MALE,1,139
245 | CC320661,43,FEMALE,0,99
246 | CC320680,41,MALE,1,73
247 | CC320686,40,FEMALE,0,88
248 | CC320687,41,FEMALE,0,122
249 | CC320698,45,FEMALE,0,150
250 | CC320759,44,FEMALE,0,141
251 | CC320776,47,MALE,1,78
252 | CC320814,43,FEMALE,0,121
253 | CC320818,43,MALE,1,81
254 | CC320850,47,FEMALE,0,133
255 | CC320861,47,FEMALE,0,108
256 | CC320870,47,FEMALE,0,107
257 | CC320888,47,FEMALE,0,100
258 | CC320893,47,FEMALE,0,131
259 | CC320904,42,MALE,1,109
260 | CC321000,40,MALE,1,86
261 | CC321025,40,MALE,1,82
262 | CC321053,40,FEMALE,0,78
263 | CC321069,41,FEMALE,0,149
264 | CC321073,43,FEMALE,0,116
265 | CC321087,40,FEMALE,0,137
266 | CC321107,44,MALE,1,113
267 | CC321137,41,FEMALE,0,133
268 | CC321140,48,FEMALE,0,124
269 | CC321154,48,MALE,1,133
270 | CC321174,39,FEMALE,0,146
271 | CC321201,47,FEMALE,0,77
272 | CC321203,40,MALE,1,127
273 | CC321281,46,FEMALE,0,110
274 | CC321291,44,FEMALE,0,135
275 | CC321331,46,MALE,1,89
276 | CC321368,45,MALE,1,148
277 | CC321428,42,FEMALE,0,90
278 | CC321431,48,FEMALE,0,103
279 | CC321464,45,MALE,1,112
280 | CC321504,42,MALE,1,91
281 | CC321506,39,FEMALE,0,72
282 | CC321529,39,FEMALE,0,150
283 | CC321544,44,MALE,1,149
284 | CC321557,44,FEMALE,0,145
285 | CC321585,44,FEMALE,0,148
286 | CC321594,43,MALE,1,82
287 | CC321595,44,FEMALE,0,121
288 | CC321880,46,FEMALE,0,88
289 | CC321899,49,FEMALE,0,89
290 | CC321976,42,FEMALE,0,140
291 | CC322186,47,MALE,1,126
292 | CC410015,51,FEMALE,0,103
293 | CC410032,55,MALE,1,139
294 | CC410040,55,MALE,1,148
295 | CC410084,57,FEMALE,0,130
296 | CC410086,57,MALE,1,137
297 | CC410091,57,FEMALE,0,144
298 | CC410094,54,FEMALE,0,74
299 | CC410097,49,FEMALE,0,113
300 | CC410101,56,MALE,1,126
301 | CC410113,55,MALE,1,109
302 | CC410119,57,MALE,1,116
303 | CC410121,52,FEMALE,0,70
304 | CC410129,58,MALE,1,149
305 | CC410169,49,FEMALE,0,99
306 | CC410173,55,MALE,1,128
307 | CC410177,57,MALE,1,102
308 | CC410179,55,MALE,1,123
309 | CC410182,53,FEMALE,0,126
310 | CC410220,52,FEMALE,0,124
311 | CC410222,55,FEMALE,0,150
312 | CC410226,56,MALE,1,77
313 | CC410243,56,FEMALE,0,135
314 | CC410248,54,MALE,1,92
315 | CC410251,53,FEMALE,0,110
316 | CC410284,50,FEMALE,0,101
317 | CC410286,49,FEMALE,0,74
318 | CC410287,58,FEMALE,0,139
319 | CC410289,58,FEMALE,0,76
320 | CC410297,58,FEMALE,0,77
321 | CC410323,51,FEMALE,0,94
322 | CC410325,54,FEMALE,0,88
323 | CC410354,48,MALE,1,146
324 | CC410387,57,MALE,1,78
325 | CC410390,56,MALE,1,80
326 | CC410432,52,MALE,1,110
327 | CC410447,58,FEMALE,0,119
328 | CC412004,54,MALE,1,96
329 | CC412021,54,FEMALE,0,120
330 | CC420004,49,FEMALE,0,79
331 | CC420060,51,MALE,1,147
332 | CC420061,57,MALE,1,102
333 | CC420071,52,FEMALE,0,139
334 | CC420075,52,MALE,1,128
335 | CC420089,48,FEMALE,0,94
336 | CC420091,52,MALE,1,70
337 | CC420094,56,MALE,1,130
338 | CC420100,56,MALE,1,124
339 | CC420137,56,MALE,1,77
340 | CC420143,53,FEMALE,0,122
341 | CC420148,56,FEMALE,0,73
342 | CC420149,52,MALE,1,104
343 | CC420157,58,MALE,1,94
344 | CC420162,52,FEMALE,0,89
345 | CC420167,51,FEMALE,0,112
346 | CC420173,49,FEMALE,0,96
347 | CC420180,56,FEMALE,0,129
348 | CC420182,52,FEMALE,0,118
349 | CC420197,55,FEMALE,0,87
350 | CC420198,58,MALE,1,98
351 | CC420202,51,MALE,1,147
352 | CC420204,53,FEMALE,0,71
353 | CC420217,50,MALE,1,102
354 | CC420222,55,MALE,1,76
355 | CC420226,50,MALE,1,101
356 | CC420229,52,MALE,1,93
357 | CC420231,54,MALE,1,72
358 | CC420236,53,MALE,1,85
359 | CC420241,55,MALE,1,98
360 | CC420244,49,MALE,1,94
361 | CC420259,51,FEMALE,0,90
362 | CC420260,50,FEMALE,0,134
363 | CC420261,54,FEMALE,0,74
364 | CC420286,56,MALE,1,75
365 | CC420322,49,MALE,1,129
366 | CC420324,52,FEMALE,0,94
367 | CC420348,57,FEMALE,0,133
368 | CC420356,54,MALE,1,102
369 | CC420364,54,MALE,1,76
370 | CC420383,48,FEMALE,0,120
371 | CC420392,51,FEMALE,0,125
372 | CC420396,53,MALE,1,122
373 | CC420402,49,MALE,1,149
374 | CC420412,52,MALE,1,144
375 | CC420433,51,MALE,1,75
376 | CC420435,53,FEMALE,0,107
377 | CC420454,54,MALE,1,145
378 | CC420462,55,FEMALE,0,103
379 | CC420464,55,FEMALE,0,130
380 | CC420493,51,FEMALE,0,118
381 | CC420566,50,FEMALE,0,114
382 | CC420582,54,MALE,1,135
383 | CC420587,49,FEMALE,0,140
384 | CC420589,52,MALE,1,146
385 | CC420623,51,MALE,1,85
386 | CC420720,50,FEMALE,0,136
387 | CC420729,56,FEMALE,0,119
388 | CC420776,49,FEMALE,0,115
389 | CC420888,50,FEMALE,0,138
390 | CC510015,58,FEMALE,0,91
391 | CC510017,63,FEMALE,0,85
392 | CC510039,60,FEMALE,0,133
393 | CC510043,58,MALE,1,73
394 | CC510050,59,FEMALE,0,70
395 | CC510062,64,MALE,1,71
396 | CC510076,62,MALE,1,73
397 | CC510086,65,MALE,1,128
398 | CC510115,62,FEMALE,0,130
399 | CC510161,58,MALE,1,101
400 | CC510163,64,FEMALE,0,93
401 | CC510179,62,FEMALE,0,77
402 | CC510208,66,FEMALE,0,80
403 | CC510220,66,MALE,1,77
404 | CC510226,67,MALE,1,96
405 | CC510237,66,MALE,1,93
406 | CC510242,60,MALE,1,104
407 | CC510243,60,MALE,1,111
408 | CC510255,62,MALE,1,97
409 | CC510256,63,MALE,1,130
410 | CC510258,60,MALE,1,146
411 | CC510259,60,FEMALE,0,120
412 | CC510284,60,FEMALE,0,129
413 | CC510304,66,FEMALE,0,116
414 | CC510321,64,FEMALE,0,110
415 | CC510323,64,FEMALE,0,109
416 | CC510329,64,MALE,1,107
417 | CC510342,63,FEMALE,0,102
418 | CC510354,62,FEMALE,0,111
419 | CC510355,65,FEMALE,0,123
420 | CC510392,67,MALE,1,120
421 | CC510393,68,MALE,1,107
422 | CC510395,68,MALE,1,92
423 | CC510415,64,FEMALE,0,71
424 | CC510433,61,FEMALE,0,142
425 | CC510434,65,FEMALE,0,134
426 | CC510438,62,MALE,1,87
427 | CC510473,65,MALE,1,148
428 | CC510474,66,MALE,1,76
429 | CC510480,68,MALE,1,103
430 | CC510483,60,FEMALE,0,92
431 | CC510486,63,FEMALE,0,134
432 | CC510511,68,FEMALE,0,142
433 | CC510534,66,MALE,1,100
434 | CC510548,62,MALE,1,85
435 | CC510551,61,MALE,1,147
436 | CC510568,60,MALE,1,115
437 | CC510572,63,MALE,1,78
438 | CC510609,59,MALE,1,101
439 | CC510629,59,MALE,1,107
440 | CC510639,62,MALE,1,137
441 | CC510648,61,FEMALE,0,73
442 | CC512003,62,MALE,1,135
443 | CC520002,64,FEMALE,0,134
444 | CC520011,60,FEMALE,0,82
445 | CC520013,67,MALE,1,117
446 | CC520042,58,MALE,1,118
447 | CC520053,64,MALE,1,145
448 | CC520055,61,FEMALE,0,88
449 | CC520065,60,MALE,1,93
450 | CC520078,63,MALE,1,126
451 | CC520083,65,FEMALE,0,137
452 | CC520097,62,FEMALE,0,96
453 | CC520114,64,FEMALE,0,111
454 | CC520122,67,MALE,1,91
455 | CC520127,66,MALE,1,122
456 | CC520134,67,FEMALE,0,146
457 | CC520136,65,FEMALE,0,83
458 | CC520147,61,FEMALE,0,73
459 | CC520162,68,MALE,1,90
460 | CC520168,59,MALE,1,87
461 | CC520175,61,MALE,1,96
462 | CC520197,59,FEMALE,0,129
463 | CC520200,67,FEMALE,0,123
464 | CC520209,66,FEMALE,0,78
465 | CC520211,63,FEMALE,0,140
466 | CC520215,63,FEMALE,0,150
467 | CC520239,65,FEMALE,0,122
468 | CC520247,63,MALE,1,73
469 | CC520253,58,MALE,1,104
470 | CC520254,66,FEMALE,0,104
471 | CC520275,66,MALE,1,123
472 | CC520279,68,FEMALE,0,86
473 | CC520287,59,FEMALE,0,92
474 | CC520377,63,FEMALE,0,93
475 | CC520390,65,FEMALE,0,97
476 | CC520391,64,FEMALE,0,84
477 | CC520395,61,FEMALE,0,106
478 | CC520398,59,FEMALE,0,142
479 | CC520424,63,MALE,1,75
480 | CC520436,63,FEMALE,0,149
481 | CC520477,66,MALE,1,115
482 | CC520480,60,MALE,1,83
483 | CC520503,66,MALE,1,74
484 | CC520517,65,MALE,1,79
485 | CC520552,64,MALE,1,98
486 | CC520560,65,MALE,1,89
487 | CC520562,66,MALE,1,93
488 | CC520584,59,FEMALE,0,137
489 | CC520585,68,FEMALE,0,78
490 | CC520597,64,MALE,1,104
491 | CC520607,64,FEMALE,0,126
492 | CC520624,60,FEMALE,0,132
493 | CC520673,67,MALE,1,129
494 | CC520745,63,FEMALE,0,88
495 | CC520775,61,FEMALE,0,137
496 | CC520868,67,MALE,1,107
497 | CC520980,68,MALE,1,70
498 | CC521040,63,FEMALE,0,124
499 | CC610022,68,FEMALE,0,109
500 | CC610028,68,MALE,1,105
501 | CC610039,70,MALE,1,124
502 | CC610040,72,MALE,1,109
503 | CC610046,72,MALE,1,81
504 | CC610050,71,FEMALE,0,145
505 | CC610051,68,MALE,1,142
506 | CC610052,77,MALE,1,89
507 | CC610058,73,MALE,1,98
508 | CC610061,76,FEMALE,0,78
509 | CC610071,69,FEMALE,0,150
510 | CC610076,77,FEMALE,0,120
511 | CC610095,71,FEMALE,0,83
512 | CC610096,73,FEMALE,0,146
513 | CC610099,69,FEMALE,0,114
514 | CC610101,68,MALE,1,136
515 | CC610120,70,MALE,1,94
516 | CC610146,76,FEMALE,0,111
517 | CC610178,72,MALE,1,105
518 | CC610210,75,MALE,1,114
519 | CC610212,77,FEMALE,0,111
520 | CC610227,76,FEMALE,0,128
521 | CC610285,71,MALE,1,132
522 | CC610288,69,MALE,1,73
523 | CC610292,72,FEMALE,0,117
524 | CC610308,69,MALE,1,105
525 | CC610344,71,FEMALE,0,115
526 | CC610372,70,MALE,1,145
527 | CC610392,75,FEMALE,0,117
528 | CC610405,72,FEMALE,0,150
529 | CC610462,76,FEMALE,0,108
530 | CC610469,73,FEMALE,0,122
531 | CC610496,70,MALE,1,124
532 | CC610508,77,FEMALE,0,110
533 | CC610568,76,MALE,1,104
534 | CC610575,71,FEMALE,0,113
535 | CC610576,72,FEMALE,0,104
536 | CC610594,69,FEMALE,0,115
537 | CC610614,74,FEMALE,0,109
538 | CC610625,70,FEMALE,0,73
539 | CC610631,77,FEMALE,0,78
540 | CC610653,71,MALE,1,125
541 | CC610658,78,MALE,1,122
542 | CC610671,70,FEMALE,0,77
543 | CC610697,73,FEMALE,0,92
544 | CC620005,74,FEMALE,0,143
545 | CC620026,70,FEMALE,0,134
546 | CC620044,73,FEMALE,0,83
547 | CC620073,69,MALE,1,85
548 | CC620085,70,FEMALE,0,92
549 | CC620090,71,FEMALE,0,94
550 | CC620106,71,FEMALE,0,131
551 | CC620114,73,MALE,1,143
552 | CC620118,71,FEMALE,0,145
553 | CC620121,68,MALE,1,117
554 | CC620129,75,MALE,1,124
555 | CC620152,73,MALE,1,92
556 | CC620164,72,FEMALE,0,107
557 | CC620193,76,MALE,1,140
558 | CC620200,76,FEMALE,0,114
559 | CC620259,68,FEMALE,0,103
560 | CC620262,69,FEMALE,0,103
561 | CC620264,76,FEMALE,0,143
562 | CC620279,77,MALE,1,97
563 | CC620284,75,FEMALE,0,121
564 | CC620314,74,FEMALE,0,85
565 | CC620354,78,FEMALE,0,93
566 | CC620359,68,MALE,1,84
567 | CC620405,76,FEMALE,0,146
568 | CC620406,68,MALE,1,142
569 | CC620413,76,MALE,1,92
570 | CC620429,69,MALE,1,82
571 | CC620436,78,MALE,1,86
572 | CC620442,71,FEMALE,0,115
573 | CC620444,77,FEMALE,0,135
574 | CC620451,75,MALE,1,117
575 | CC620454,75,FEMALE,0,144
576 | CC620466,75,MALE,1,94
577 | CC620479,69,MALE,1,73
578 | CC620490,74,FEMALE,0,144
579 | CC620496,70,FEMALE,0,135
580 | CC620499,71,MALE,1,132
581 | CC620515,75,FEMALE,0,122
582 | CC620518,78,MALE,1,113
583 | CC620526,73,FEMALE,0,84
584 | CC620527,69,MALE,1,136
585 | CC620549,73,FEMALE,0,147
586 | CC620557,74,FEMALE,0,148
587 | CC620560,72,MALE,1,139
588 | CC620567,74,MALE,1,85
589 | CC620572,78,FEMALE,0,84
590 | CC620592,74,FEMALE,0,128
591 | CC620610,76,MALE,1,120
592 | CC620619,71,MALE,1,97
593 | CC620659,71,FEMALE,0,143
594 | CC620685,77,FEMALE,0,76
595 | CC620720,78,FEMALE,0,121
596 | CC620785,69,MALE,1,82
597 | CC620793,71,MALE,1,92
598 | CC620821,70,MALE,1,90
599 | CC620885,78,MALE,1,140
600 | CC620919,78,MALE,1,87
601 | CC620935,71,MALE,1,70
602 | CC621011,76,MALE,1,137
603 | CC621080,70,MALE,1,98
604 | CC621118,76,MALE,1,112
605 | CC621128,75,MALE,1,115
606 | CC621184,69,MALE,1,83
607 | CC621199,72,MALE,1,85
608 | CC621248,72,FEMALE,0,148
609 | CC621284,79,MALE,1,142
610 | CC621642,73,MALE,1,102
611 | CC710037,78,FEMALE,0,136
612 | CC710088,83,FEMALE,0,127
613 | CC710099,85,FEMALE,0,97
614 | CC710131,85,MALE,1,90
615 | CC710154,83,MALE,1,113
616 | CC710176,78,FEMALE,0,81
617 | CC710214,79,MALE,1,143
618 | CC710223,79,FEMALE,0,147
619 | CC710313,81,MALE,1,99
620 | CC710342,83,FEMALE,0,131
621 | CC710350,81,MALE,1,99
622 | CC710382,83,MALE,1,121
623 | CC710416,82,FEMALE,0,74
624 | CC710429,80,MALE,1,143
625 | CC710446,85,MALE,1,87
626 | CC710462,82,FEMALE,0,140
627 | CC710486,79,FEMALE,0,127
628 | CC710494,80,MALE,1,128
629 | CC710501,80,MALE,1,87
630 | CC710518,83,FEMALE,0,103
631 | CC710548,83,MALE,1,145
632 | CC710551,85,MALE,1,135
633 | CC710566,83,MALE,1,118
634 | CC710591,85,FEMALE,0,111
635 | CC710664,82,FEMALE,0,142
636 | CC710679,84,MALE,1,132
637 | CC710858,79,MALE,1,116
638 | CC710982,79,MALE,1,111
639 | CC711027,80,MALE,1,143
640 | CC711035,88,FEMALE,0,123
641 | CC711128,80,MALE,1,143
642 | CC711141,85,MALE,1,141
643 | CC711158,80,MALE,1,112
644 | CC711244,85,FEMALE,0,85
645 | CC711245,85,MALE,1,116
646 | CC712027,87,MALE,1,74
647 | CC712085,81,MALE,1,113
648 | CC720023,82,FEMALE,0,81
649 | CC720071,82,MALE,1,120
650 | CC720103,80,MALE,1,115
651 | CC720119,79,MALE,1,142
652 | CC720180,78,MALE,1,109
653 | CC720188,78,MALE,1,108
654 | CC720238,80,FEMALE,0,140
655 | CC720290,84,MALE,1,146
656 | CC720304,80,FEMALE,0,112
657 | CC720329,80,MALE,1,141
658 | CC720330,80,MALE,1,108
659 | CC720358,83,FEMALE,0,130
660 | CC720359,81,MALE,1,102
661 | CC720400,86,FEMALE,0,104
662 | CC720407,82,MALE,1,140
663 | CC720497,80,FEMALE,0,93
664 | CC720511,79,MALE,1,118
665 | CC720516,84,FEMALE,0,136
666 | CC720622,81,FEMALE,0,116
667 | CC720646,81,MALE,1,126
668 | CC720655,79,FEMALE,0,148
669 | CC720670,79,FEMALE,0,150
670 | CC720685,81,MALE,1,102
671 | CC720723,84,FEMALE,0,124
672 | CC720774,87,MALE,1,87
673 | CC720941,79,MALE,1,120
674 | CC720986,83,MALE,1,149
675 | CC721052,79,MALE,1,72
676 | CC721107,79,FEMALE,0,76
677 | CC721114,79,FEMALE,0,128
678 | CC721224,87,FEMALE,0,74
679 | CC721291,80,FEMALE,0,96
680 | CC721292,79,MALE,1,134
681 | CC721374,86,MALE,1,132
682 | CC721377,80,MALE,1,130
683 | CC721392,80,MALE,1,137
684 | CC721418,79,FEMALE,0,122
685 | CC721434,84,MALE,1,148
686 | CC721449,80,MALE,1,83
687 | CC721504,82,MALE,1,92
688 | CC721519,79,FEMALE,0,127
689 | CC721585,79,FEMALE,0,107
690 | CC721618,81,FEMALE,0,128
691 | CC721648,80,FEMALE,0,143
692 | CC721704,82,FEMALE,0,77
693 | CC721707,80,FEMALE,0,94
694 | CC721729,81,MALE,1,112
695 | CC721888,78,FEMALE,0,115
696 | CC721891,83,MALE,1,104
697 | CC721894,80,FEMALE,0,88
698 | CC721957,83,MALE,1,105
699 | CC722077,82,FEMALE,0,134
700 | CC722216,86,FEMALE,0,73
701 | CC722421,79,FEMALE,0,128
702 | CC722522,84,FEMALE,0,101
703 | CC722536,79,FEMALE,0,150
704 | CC722542,79,MALE,1,116
705 | CC722651,79,FEMALE,0,128
706 | CC722891,84,FEMALE,0,129
707 | CC723197,80,FEMALE,0,96
708 | CC723395,86,FEMALE,0,145


--------------------------------------------------------------------------------
/2020_materials/data/cpc_camcan_demographics_nordan.csv:
--------------------------------------------------------------------------------
 1 | paricipants,age,sex_name,sex,IQ_random
 2 | NORDAN,65,MALE,1,74
 3 | DEM_PATIENT1,66,MALE,1,74
 4 | DEM_PATIENT2,65,MALE,1,74
 5 | DEM_PATIENT3,67,MALE,1,74
 6 | DEM_PATIENT4,65,MALE,1,77
 7 | DEM_PATIENT5,68,MALE,1,78
 8 | DEM_PATIENT6,63,MALE,1,72
 9 | DEM_PATIENT7,65,MALE,1,72
10 | DEM_PATIENT8,70,MALE,1,76
11 | DEM_PATIENT9,62,MALE,1,72
12 | DEM_PATIENT10,61,MALE,1,80


--------------------------------------------------------------------------------
/2020_materials/data/cpc_camcan_features_nordan.csv:
--------------------------------------------------------------------------------
 1 | participants,left_Hippocampal_tail,left_subiculum,left_CA1,left_hippocampal-fissure,left_presubiculum,left_parasubiculum,left_molecular_layer_HP,left_GC-ML-DG,left_CA3,left_CA4,left_fimbria,left_HATA,left_Whole_hippocampus,right_Hippocampal_tail,right_subiculum,right_CA1,right_hippocampal-fissure,right_presubiculum,right_parasubiculum,right_molecular_layer_HP,right_GC-ML-DG,right_CA3,right_CA4,right_fimbria,right_HATA,right_Whole_hippocampus
 2 | NORDAN,610,330,260,120,280,42,413,240,80,175,46,48,2644,480,328,280,126,226,42,416,203,101,174,22,45,2443
 3 | DEM_PATIENT1,406,314,301,107,287,59,392,217,149,147,41,46,2466,311,312,347,103,229,39,312,214,138,222,29,43,2299
 4 | DEM_PATIENT2,413,312,304,110,278,53,394,220,133,169,42,56,2484,303,311,339,114,207,40,310,214,138,226,37,46,2285
 5 | DEM_PATIENT3,412,305,310,108,210,57,382,239,135,171,44,43,2416,306,314,347,104,217,32,314,215,137,220,29,49,2284
 6 | DEM_PATIENT4,410,307,315,101,218,54,412,202,123,184,53,52,2431,307,304,347,107,214,39,304,201,139,224,36,31,2253
 7 | DEM_PATIENT5,409,306,302,114,257,49,401,223,102,213,46,49,2471,303,308,342,115,217,41,314,206,136,217,28,52,2279
 8 | DEM_PATIENT6,409,311,307,108,310,44,395,217,127,110,51,50,2439,307,302,337,102,233,33,311,212,138,245,37,33,2290
 9 | DEM_PATIENT7,412,302,302,113,289,50,399,217,106,116,41,46,2393,303,305,344,104,227,44,315,215,135,207,27,55,2281
10 | DEM_PATIENT8,414,315,306,101,292,41,399,201,145,104,50,48,2416,306,301,348,107,227,49,312,206,140,229,24,51,2300
11 | DEM_PATIENT9,411,313,308,102,226,57,398,212,117,152,47,49,2392,306,311,348,102,220,33,313,205,138,249,30,41,2296
12 | DEM_PATIENT10,406,312,308,106,210,51,407,219,120,140,43,58,2380,302,310,333,101,226,47,307,212,136,223,21,43,2261


--------------------------------------------------------------------------------
/2020_materials/tasks/cpc_normative_modeling_instructions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "accelerator": "GPU",
  6 |     "colab": {
  7 |       "name": "cpc_normative_modeling_instructions.ipynb",
  8 |       "provenance": [],
  9 |       "collapsed_sections": [],
 10 |       "toc_visible": true,
 11 |       "include_colab_link": true
 12 |     },
 13 |     "kernelspec": {
 14 |       "display_name": "Python 3",
 15 |       "language": "python",
 16 |       "name": "python3"
 17 |     },
 18 |     "language_info": {
 19 |       "codemirror_mode": {
 20 |         "name": "ipython",
 21 |         "version": 3
 22 |       },
 23 |       "file_extension": ".py",
 24 |       "mimetype": "text/x-python",
 25 |       "name": "python",
 26 |       "nbconvert_exporter": "python",
 27 |       "pygments_lexer": "ipython3",
 28 |       "version": "3.7.4"
 29 |     }
 30 |   },
 31 |   "cells": [
 32 |     {
 33 |       "cell_type": "markdown",
 34 |       "metadata": {
 35 |         "id": "view-in-github",
 36 |         "colab_type": "text"
 37 |       },
 38 |       "source": [
 39 |         "<a href=\"https://colab.research.google.com/github/saigerutherford/CPC_2020/blob/master/tasks/cpc_normative_modeling_instructions.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 40 |       ]
 41 |     },
 42 |     {
 43 |       "cell_type": "markdown",
 44 |       "metadata": {
 45 |         "colab_type": "text",
 46 |         "id": "hC8rsih7PHa_"
 47 |       },
 48 |       "source": [
 49 |         "# **CPC TUTORIAL ON NORMATIVE MODELING**\n",
 50 |         "\n",
 51 |         "\n",
 52 |         "Created by \n",
 53 |         "\n",
 54 |         "Mariam Zabihi [@m_zabihi](https://twitter.com/m_zabihi)\n",
 55 |         "\n",
 56 |         "Saige Rutherford [@being_saige](https://twitter.com/being_saige)\n",
 57 |         "\n",
 58 |         "Thomas Wolfers [@ThomasWolfers](https://twitter.com/ThomasWolfers)\n",
 59 |         "_______________________________________________________________________________"
 60 |       ]
 61 |     },
 62 |     {
 63 |       "cell_type": "markdown",
 64 |       "metadata": {
 65 |         "colab_type": "text",
 66 |         "id": "irl08XE1AG9v"
 67 |       },
 68 |       "source": [
 69 |         "## **Background Story**\n",
 70 |         "\n",
 71 |         "Morten and Ingrid are concerned about the health of their father, Nordan. He recently turned 65 years. A few months ago he could not find his way home. Together, they visit a neurologist/psychiatrist to conduct a number of cognitive tests. However, those tests were inconclusive. While Nordan has a relatively low IQ it could not explain his trouble returning home.\n",
 72 |         "\n",
 73 |         "Recently, the family heard about a new screening technique called normative modeling with which one can place individuals in reference to a population norm on for instance measures such as brain volume. Nordan would like to undertake this procedure to better know what is going on and to potentially find targets for treatment. Therefore, the family booked an appointment with you, the normative modeling specialist. To find out what is going on you compare Nordan's hyppocampus to the norm and to a group of persons with Dementia disorders, who have a similar IQ, age as well as the same sex as Nordan.\n",
 74 |         "\n",
 75 |         "Do your best to get as far as you can. However, you do not need to feel bad if you cannot complete everything during the tutorial.\n"
 76 |       ]
 77 |     },
 78 |     {
 79 |       "cell_type": "markdown",
 80 |       "metadata": {
 81 |         "colab_type": "text",
 82 |         "id": "udo6yANOCpvp"
 83 |       },
 84 |       "source": [
 85 |         "## **Task 0:** Load data and install the pcntoolkit"
 86 |       ]
 87 |     },
 88 |     {
 89 |       "cell_type": "code",
 90 |       "metadata": {
 91 |         "colab_type": "code",
 92 |         "id": "yawDkTLoKYRu",
 93 |         "colab": {}
 94 |       },
 95 |       "source": [
 96 |         "#install normative modeling\n",
 97 |         "!pip install pcntoolkit"
 98 |       ],
 99 |       "execution_count": null,
100 |       "outputs": []
101 |     },
102 |     {
103 |       "cell_type": "markdown",
104 |       "metadata": {
105 |         "colab_type": "text",
106 |         "id": "EHDKe2ohCxP4"
107 |       },
108 |       "source": [
109 |         "**Option 1:** Connect your Google Drive account, and load data from Google Drive. Having Google Drive connected will allow you to save any files created back to your Drive folder. This step will require you to download the csv files from [Github](https://github.com/saigerutherford/CPC_2020/tree/master/data) to your computer, and then make a folder in your Google Drive account and upload the csv files to this folder. "
110 |       ]
111 |     },
112 |     {
113 |       "cell_type": "code",
114 |       "metadata": {
115 |         "colab_type": "code",
116 |         "id": "0SMVyxNZqmlv",
117 |         "colab": {}
118 |       },
119 |       "source": [
120 |         "from google.colab import drive\n",
121 |         "drive.mount('/content/drive')\n",
122 |         "\n",
123 |         "#change dir to data on your google drive\n",
124 |         "import os\n",
125 |         "os.chdir('drive/My Drive/name-of-folder-where-you-uploaded-csv-files-from-Github/')\n",
126 |         "\n",
127 |         "# code by T. Wolfers"
128 |       ],
129 |       "execution_count": null,
130 |       "outputs": []
131 |     },
132 |     {
133 |       "cell_type": "markdown",
134 |       "metadata": {
135 |         "colab_type": "text",
136 |         "id": "Bst55nPJDHKb"
137 |       },
138 |       "source": [
139 |         "**Option 2:** Import the files directly from Github, and skip adding them to Google Drive."
140 |       ]
141 |     },
142 |     {
143 |       "cell_type": "code",
144 |       "metadata": {
145 |         "colab_type": "code",
146 |         "id": "zuuSkJwPDRrv",
147 |         "colab": {}
148 |       },
149 |       "source": [
150 |         "!wget -nc https://raw.githubusercontent.com/saigerutherford/CPC_2020/master/data/cpc_camcan_demographics.csv\n",
151 |         "!wget -nc https://raw.githubusercontent.com/saigerutherford/CPC_2020/master/data/cpc_camcan_demographics_nordan.csv\n",
152 |         "!wget -nc https://raw.githubusercontent.com/saigerutherford/CPC_2020/master/data/cpc_camcan_features.csv\n",
153 |         "!wget -nc https://raw.githubusercontent.com/saigerutherford/CPC_2020/master/data/cpc_camcan_features_nordan.csv\n",
154 |         "    \n",
155 |         "# code by S. Rutherford"
156 |       ],
157 |       "execution_count": null,
158 |       "outputs": []
159 |     },
160 |     {
161 |       "cell_type": "markdown",
162 |       "metadata": {
163 |         "colab_type": "text",
164 |         "id": "kvSiRjysuGkV"
165 |       },
166 |       "source": [
167 |         "## **TASK 1:** Format input data"
168 |       ]
169 |     },
170 |     {
171 |       "cell_type": "markdown",
172 |       "metadata": {
173 |         "colab_type": "text",
174 |         "id": "N2Bon1mJAVjJ"
175 |       },
176 |       "source": [
177 |         "You have four files. The features and demographics file for the normsample and two files of the same name for Nordan your test sample. As one of your coworkers has done the preporcessing and quality control there are more subjects in the demographics file than in the features file of the norm sample. Please select the overlap of participants between those two files. \n",
178 |         "\n",
179 |         "\n",
180 |         "*Question for your understanding:*\n",
181 |         "\n",
182 |         "1) Why do we have to select the overlap between participants in terms of featrues and demographics?"
183 |       ]
184 |     },
185 |     {
186 |       "cell_type": "code",
187 |       "metadata": {
188 |         "colab_type": "code",
189 |         "id": "_RSfxGWku6fU",
190 |         "colab": {}
191 |       },
192 |       "source": [
193 |         "import pandas as pd\n",
194 |         "\n",
195 |         "#CODE HERE"
196 |       ],
197 |       "execution_count": null,
198 |       "outputs": []
199 |     },
200 |     {
201 |       "cell_type": "markdown",
202 |       "metadata": {
203 |         "colab_type": "text",
204 |         "id": "fUufLg4lQWdn"
205 |       },
206 |       "source": [
207 |         "## **TASK 2:** Prepare the covariate_normsample and testresponse_normsample file. "
208 |       ]
209 |     },
210 |     {
211 |       "cell_type": "markdown",
212 |       "metadata": {
213 |         "colab_type": "text",
214 |         "id": "g1i1qp7AAh1Q"
215 |       },
216 |       "source": [
217 |         "As mentioned in the introductory presentation those files need a specific format and the entries need to be seperated by spaces. Use whatever method you know to prepare those files based on the data provided in TASK 1. Save those files in .txt format in your drive. Also get rid of the column names and participant IDs.\n",
218 |         "\n",
219 |         "Given that we only have limited time in this practical we have to make a selection for the features based on your prior knowledge. With the information in mind that Nordan does not remember his way home, which subfield of the hyppocampus is probably a good target for the investigations?\n",
220 |         "Select a maximum of four hyppocampal regions as features.\n",
221 |         "\n",
222 |         "NOTE: Normative modeling is a screening tool we just make this selection due to time constraints, in reality we build these models on millions of putative biomarkers that are not restricted to brain imaging.\n",
223 |         "\n",
224 |         "\n",
225 |         "*Qestions for your understanding:*\n",
226 |         "\n",
227 |         "2) What is the requirement for the features in terms of variable properties (e.g. dicotomous or continous)? 3) What is the requirement for the covariates in terms of these properties? 4) What are the requirements for both together? 5) How does this depent on the algorithm used?"
228 |       ]
229 |     },
230 |     {
231 |       "cell_type": "code",
232 |       "metadata": {
233 |         "colab_type": "code",
234 |         "id": "lzt6llxyRPyY",
235 |         "colab": {}
236 |       },
237 |       "source": [
238 |         "#CODE HERE"
239 |       ],
240 |       "execution_count": null,
241 |       "outputs": []
242 |     },
243 |     {
244 |       "cell_type": "markdown",
245 |       "metadata": {
246 |         "colab_type": "text",
247 |         "id": "irR4FAIvQ8ds"
248 |       },
249 |       "source": [
250 |         "## **TASK 3:** Estimate normative model\n"
251 |       ]
252 |     },
253 |     {
254 |       "cell_type": "markdown",
255 |       "metadata": {
256 |         "colab_type": "text",
257 |         "id": "XV61hQUoA1Kd"
258 |       },
259 |       "source": [
260 |         "Once you have prepared and saved all the necessary files. Look at the pcntoolkit for running normative modeling. Select an appropritate method set up the toolkit and run your analyses using 2-fold cross validation in the normsample. Change the output suffix from estimate to '_2fold'. \n",
261 |         "\n",
262 |         "HINT: You primarily need the estimate function. \n",
263 |         "\n",
264 |         "SUGGESTION: While this process is running you can go to the next TASK 4, you will have no doubt when it is correctly running.\n",
265 |         "\n",
266 |         "*Question for your understaning:*\n",
267 |         "\n",
268 |         "6) What does cvfolds mean and why do we use it? 7) What is the output of the estimate function and what does it mean?"
269 |       ]
270 |     },
271 |     {
272 |       "cell_type": "code",
273 |       "metadata": {
274 |         "colab_type": "code",
275 |         "id": "yRTusEg6SRNL",
276 |         "colab": {}
277 |       },
278 |       "source": [
279 |         "import pcntoolkit as pcn\n",
280 |         "\n",
281 |         "#CODE HERE"
282 |       ],
283 |       "execution_count": null,
284 |       "outputs": []
285 |     },
286 |     {
287 |       "cell_type": "markdown",
288 |       "metadata": {
289 |         "colab_type": "text",
290 |         "id": "Nonuk7d_SNM6"
291 |       },
292 |       "source": [
293 |         "## **TASK 4:** Estimate the forward model of the normative model\n"
294 |       ]
295 |     },
296 |     {
297 |       "cell_type": "markdown",
298 |       "metadata": {
299 |         "colab_type": "text",
300 |         "id": "fmn4TD_tBE70"
301 |       },
302 |       "source": [
303 |         "In order to visulize the normative trajectories you first need to run the forward model. To this end you need to set up an appropriate covariate_forwardmodel file that covers the age range appropriately for both sexes. Save this file as .txt . Then you can input the files you made in TASK 1 as well as the file you made now and run the forward model using the appropriate specifications.\n",
304 |         "\n",
305 |         "*Question for your understaning:*\n",
306 |         "\n",
307 |         "8) What is yhat and ys2? 9) Why does the output of the forward model does not inlcude the Z-scores?"
308 |       ]
309 |     },
310 |     {
311 |       "cell_type": "code",
312 |       "metadata": {
313 |         "colab_type": "code",
314 |         "id": "22U-knkWSPsZ",
315 |         "colab": {}
316 |       },
317 |       "source": [
318 |         "#CODE HERE"
319 |       ],
320 |       "execution_count": null,
321 |       "outputs": []
322 |     },
323 |     {
324 |       "cell_type": "markdown",
325 |       "metadata": {
326 |         "colab_type": "text",
327 |         "id": "wxeZlXshQ7eS"
328 |       },
329 |       "source": [
330 |         "## **TASK 5:** Visualize forward model"
331 |       ]
332 |     },
333 |     {
334 |       "cell_type": "markdown",
335 |       "metadata": {
336 |         "colab_type": "text",
337 |         "id": "BVodlChrBg25"
338 |       },
339 |       "source": [
340 |         "Visualize the forward model of the normative model similar to the figure below.\n",
341 |         "\n",
342 |         "![1-s2.0-S245190221830329X-gr2.jpg](data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAcQBxAAD/4QDoRXhpZgAATU0AKgAAAAgABwESAAMAAAABAAEAAAEaAAUAAAABAAAAYgEbAAUAAAABAAAAagEoAAMAAAABAAIAAAExAAIAAAAcAAAAcgEyAAIAAAAUAAAAjodpAAQAAAABAAAAogAAAAAAAABxAAAAAQAAAHEAAAABQWRvYmUgUGhvdG9zaG9wIENTNSBXaW5kb3dzADIwMTk6MDE6MTAgMjE6MDA6MDYAAAOQBAACAAAAFAAAAMygAgAEAAAAAQAAAMmgAwAEAAAAAQAAAHIAAAAAMjAxODoxMjoxMSAxNDozNToxNwD/4RGtaHR0cDovL25zLmFkb2JlLmNvbS94YXAvMS4wLwA8P3hwYWNrZXQgYmVnaW49Iu+7vyIgaWQ9Ilc1TTBNcENlaGlIenJlU3pOVGN6a2M5ZCI/PiA8eDp4bXBtZXRhIHhtbG5zOng9ImFkb2JlOm5zOm1ldGEvIiB4OnhtcHRrPSJYTVAgQ29yZSA1LjQuMCI+IDxyZGY6UkRGIHhtbG5zOnJkZj0iaHR0cDovL3d3dy53My5vcmcvMTk5OS8wMi8yMi1yZGYtc3ludGF4LW5zIyI+IDxyZGY6RGVzY3JpcHRpb24gcmRmOmFib3V0PSIiIHhtbG5zOnhtcE1NPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvbW0vIiB4bWxuczpzdFJlZj0iaHR0cDovL25zLmFkb2JlLmNvbS94YXAvMS4wL3NUeXBlL1Jlc291cmNlUmVmIyIgeG1sbnM6c3RFdnQ9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9zVHlwZS9SZXNvdXJjZUV2ZW50IyIgeG1sbnM6eG1wPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvIiB4bWxuczpwaG90b3Nob3A9Imh0dHA6Ly9ucy5hZG9iZS5jb20vcGhvdG9zaG9wLzEuMC8iIHhtbG5zOmRjPSJodHRwOi8vcHVybC5vcmcvZGMvZWxlbWVudHMvMS4xLyIgeG1wTU06SW5zdGFuY2VJRD0ieG1wLmlpZDpFNTg4MUU5QkVDMTRFOTExOEI3RkU1RDU4RURGM0Q0RSIgeG1wTU06RG9jdW1lbnRJRD0ieG1wLmRpZDoyRUM1RUY5ODQ5RkRFODExQTNBM0EyODcyMEY2OTA2NCIgeG1wTU06T3JpZ2luYWxEb2N1bWVudElEPSJ4bXAuZGlkOjJFQzVFRjk4NDlGREU4MTFBM0EzQTI4NzIwRjY5MDY0IiB4bXA6TW9kaWZ5RGF0ZT0iMjAxOS0wMS0xMFQyMTowMDowNiswNTozMCIgeG1wOkNyZWF0b3JUb29sPSJBZG9iZSBQaG90b3Nob3AgQ1M1IFdpbmRvd3MiIHhtcDpNZXRhZGF0YURhdGU9IjIwMTktMDEtMTBUMjE6MDA6MDYrMDU6MzAiIHhtcDpDcmVhdGVEYXRlPSIyMDE4LTEyLTExVDE0OjM1OjE3KzAxOjAwIiBwaG90b3Nob3A6SUNDUHJvZmlsZT0iRUNJLVJHQi5pY2MiIHBob3Rvc2hvcDpDb2xvck1vZGU9IjMiIGRjOmZvcm1hdD0iaW1hZ2UvanBlZyI+IDx4bXBNTTpEZXJpdmVkRnJvbSBzdFJlZjpvcmlnaW5hbERvY3VtZW50SUQ9InhtcC5kaWQ6MkVDNUVGOTg0OUZERTgxMUEzQTNBMjg3MjBGNjkwNjQiIHN0UmVmOmluc3RhbmNlSUQ9InhtcC5paWQ6RTQ4ODFFOUJFQzE0RTkxMThCN0ZFNUQ1OEVERjNENEUiIHN0UmVmOmRvY3VtZW50SUQ9InhtcC5kaWQ6MkVDNUVGOTg0OUZERTgxMUEzQTNBMjg3MjBGNjkwNjQiLz4gPHhtcE1NOkhpc3Rvcnk+IDxyZGY6U2VxPiA8cmRmOmxpIHN0RXZ0OnNvZnR3YXJlQWdlbnQ9IkFkb2JlIFBob3Rvc2hvcCBDUzYgKFdpbmRvd3MpIiBzdEV2dDp3aGVuPSIyMDE4LTEyLTExVDE0OjM1OjE3KzAxOjAwIiBzdEV2dDppbnN0YW5jZUlEPSJ4bXAuaWlkOjJFQzVFRjk4NDlGREU4MTFBM0EzQTI4NzIwRjY5MDY0IiBzdEV2dDphY3Rpb249ImNyZWF0ZWQiLz4gPHJkZjpsaSBzdEV2dDpzb2Z0d2FyZUFnZW50PSJBZG9iZSBQaG90b3Nob3AgQ1M1IFdpbmRvd3MiIHN0RXZ0OmNoYW5nZWQ9Ii8iIHN0RXZ0OndoZW49IjIwMTktMDEtMTBUMjA6NTE6MDErMDU6MzAiIHN0RXZ0Omluc3RhbmNlSUQ9InhtcC5paWQ6N0QzQ0MyOERFQTE0RTkxMTlFRTJGRkMzQkZENTkzMUUiIHN0RXZ0OmFjdGlvbj0ic2F2ZWQiLz4gPHJkZjpsaSBzdEV2dDpzb2Z0d2FyZUFnZW50PSJBZG9iZSBQaG90b3Nob3AgQ1M1IFdpbmRvd3MiIHN0RXZ0OmNoYW5nZWQ9Ii8iIHN0RXZ0OndoZW49IjIwMTktMDEtMTBUMjA6NTE6MDErMDU6MzAiIHN0RXZ0Omluc3RhbmNlSUQ9InhtcC5paWQ6N0UzQ0MyOERFQTE0RTkxMTlFRTJGRkMzQkZENTkzMUUiIHN0RXZ0OmFjdGlvbj0ic2F2ZWQiLz4gPHJkZjpsaSBzdEV2dDpzb2Z0d2FyZUFnZW50PSJBZG9iZSBQaG90b3Nob3AgQ1M1IFdpbmRvd3MiIHN0RXZ0OmNoYW5nZWQ9Ii8iIHN0RXZ0OndoZW49IjIwMTktMDEtMTBUMjE6MDA6MDYrMDU6MzAiIHN0RXZ0Omluc3RhbmNlSUQ9InhtcC5paWQ6RTQ4ODFFOUJFQzE0RTkxMThCN0ZFNUQ1OEVERjNENEUiIHN0RXZ0OmFjdGlvbj0ic2F2ZWQiLz4gPHJkZjpsaSBzdEV2dDphY3Rpb249ImNvbnZlcnRlZCIgc3RFdnQ6cGFyYW1ldGVycz0iZnJvbSBpbWFnZS90aWZmIHRvIGltYWdlL2pwZWciLz4gPHJkZjpsaSBzdEV2dDphY3Rpb249ImRlcml2ZWQiIHN0RXZ0OnBhcmFtZXRlcnM9ImNvbnZlcnRlZCBmcm9tIGltYWdlL3RpZmYgdG8gaW1hZ2UvanBlZyIvPiA8cmRmOmxpIHN0RXZ0OnNvZnR3YXJlQWdlbnQ9IkFkb2JlIFBob3Rvc2hvcCBDUzUgV2luZG93cyIgc3RFdnQ6Y2hhbmdlZD0iLyIgc3RFdnQ6d2hlbj0iMjAxOS0wMS0xMFQyMTowMDowNiswNTozMCIgc3RFdnQ6aW5zdGFuY2VJRD0ieG1wLmlpZDpFNTg4MUU5QkVDMTRFOTExOEI3RkU1RDU4RURGM0Q0RSIgc3RFdnQ6YWN0aW9uPSJzYXZlZCIvPiA8L3JkZjpTZXE+IDwveG1wTU06SGlzdG9yeT4gPC9yZGY6RGVzY3JpcHRpb24+IDwvcmRmOlJERj4gPC94OnhtcG1ldGE+ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgPD94cGFja2V0IGVuZD0idyI/PgD/7QBkUGhvdG9zaG9wIDMuMAA4QklNBAQAAAAAACwcAVoAAxslRxwCAAACAAIcAj4ACDIwMTgxMjExHAI/AAsxNDM1MTcrMDEwMDhCSU0EJQAAAAAAEG8VJXtBp2YwhKsNALMYCvf/4gIsSUNDX1BST0ZJTEUAAQEAAAIcQURCRQIQAABtbnRyUkdCIFhZWiAHzwACABYADAADAA9hY3NwQVBQTAAAAABub25lAAAAAAAAAAAAAAAAAAAAAAAA9tYAAQAAAADTLUFEQkUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAApjcHJ0AAAA/AAAACRkZXNjAAABIAAAAGd3dHB0AAABiAAAABRia3B0AAABnAAAABRyVFJDAAABsAAAAA5nVFJDAAABwAAAAA5iVFJDAAAB0AAAAA5yWFlaAAAB4AAAABRnWFlaAAAB9AAAABRiWFlaAAACCAAAABR0ZXh0AAAAAChjKSAxOTk5IEFkb2JlIFN5c3RlbXMgSW5jLgBkZXNjAAAAAAAAAAxFQ0ktUkdCLmljYwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFhZWiAAAAAAAAD23AABAAAAANM6WFlaIAAAAAAAAAAAAAAAAAAAAABjdXJ2AAAAAAAAAAEBzQAAY3VydgAAAAAAAAABAc0AAGN1cnYAAAAAAAAAAQHNAABYWVogAAAAAAAApngAAFH+AAAAAFhZWiAAAAAAAAAtlAAAmiAAABFdWFlaIAAAAAAAACLJAAAT4gAAwdD/wAARCAByAMkDAREAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9sAQwADAgICAgIDAgICAwMDAwQGBAQEBAQIBgYFBgkICgoJCAkJCgwPDAoLDgsJCQ0RDQ4PEBAREAoMEhMSEBMPEBAQ/9sAQwEDAwMEAwQIBAQIEAsJCxAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQ/90ABAAa/9oADAMBAAIRAxEAPwD9KKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgD//0P0ooAKACgAoAOvSgA74/CgAoATI9aAI2ubdDhp4wfQsKAEN1bH/AJbKfpQAC7tu9wg+poAVbm3f7k8bfRwaAJeoyOlAB060AFABQAUAFABQAUAFABQAUAFABQAUAf/R/SigAoAKADvjHPpQBheNNFHinw7e+Hk1e4sJLxAont8tIuGDfdBGQcEEZHB6ik9iZarQs6Fp91pGjWWj/aJbn7HAlubm6O6SXaANxAP16mjoNKyL/kO/+tuHPsuEH6c/rTGRQpYXG4wtFP5b7GO4SFWHY5zg0CumWFVV4RAO2BxQMcBnj1oAzbLxFoWpand6NY6tbT3thj7TAkm54s/3h7dPai6Emm7GXceOtBi8c2/w/mgum1K5tjcq/k5iwBkjd1zheuMZIGeaTeocyvY6A2trnIgRT6qAp/Mc0xgLZBzHJKnuJCf50AL5U6/cuc/76A/yIoAN10vVY3+jFf8AGgBPPK/6y2lX3wGH6c0AKLq3Jx5qqTxtb5T+RoAl6jI70AJmgBaADmgDmvHsvjmPRUb4fQ2cuom4TeLkgAQ87sZ4z936DOMmkyZX6HRQed5KG4CibaDIE+7ux82PbNMrdD6ACgD/0v0ooABzxQBG06r8iZdv7q9PxNAFe7mS1tJr3UJ1htreNpZNucKijJJPXoD0x+NAm7K5R8JeKNA8X6QuteG7jz7RpGj3GMoQy9QQ3I6g/iKW4JqWxtUxh0PTn0oA5nwf4D0DwEupPpM1yV1K5+1Tm5m3heuAOOnJ5OW5GScUlZCUbECfFHwdJ4suPBg1Ei8toBM0hx5B6HYHBOW5HQY5PcUlLUSkr2NseI9GbBW73g9CIXZT+IWqHc8x8HSeC/AnxFvtH0jRNTxqsRuJdavHPlpn5/KBZRhcgck5LADoAahPUzi+WTZ6amteGpphKmraa0wHlhxPGXx6A5zV3RppuZPifTPGup61oN74V8R29lpltcF9ShZcm5jJXhTg54DDHy/ezkkYpMUtdjqc8c9fWmUNWRHd40dWaMgOoOSpPTPpQA6gAHPSgBCFPDDII/OgCP7Jb5ysYT/c+Q/oRmgBPJlX7ly3sHG4f0P60AGbpesccg9m2n8jmgA+0Kv+shkT/gOcfiKAHpNFLxHKrewPNAD6ACgAwfQ0Af/T/SV5FjALH73RR1NAEfzzErI/ljqUB+bB9T6f5zQBMiqgwi7evSgCO6ktobaaW8aNbeNGaYyY2hAPm3Z4xjOc0MH5nCaV8SfhzBa3WjeBPJvp7G2kuodO060ZRKRyQmFC5LYBOMjIJqbroRzx+yWvAfjDxd410qS+vPCn/CPyRzGIC9MjFhgHKoQhI98gfWhNsak2a+sj+ydHvNc1rVtRuILKBriSK1AiyFBJChMN0HdjVA9DA0Txd8NtS0nR9buYreyfWXaKzTUV3TPIrFSNzZ7/AMWccgUk11C8VqzV0Xxl4P1zTdT8S+FhHqH9nLJFMba32zPsUtsXIBIPYdDnr1waBeO6LXgXxb/wm/hy38Qrpd1p3ns6CG4HzfKcbgeMg+vH0prUcXc3x7cdse31oHYa8Uco2yRq+eMMuf50aBocr43k8P8AhLw7eeJD4TjvXtgp8q0gVZGy2MlgMgDue1JtImTsjEg1PxFe+ItBh0nwxq1vomq2Auri7e+l32rkE7CrsVXHyjBHORjoanUWrZcsfAdp4EvvEHjPS9cv5Jb5XurxLqJbhW25Y7UTY2euBn246U7W1FyqOpk2/jH4i+ONA0jxB8PbfThbyXpjvEvE8tzEp5ADFht9SrE8j0NF2F3LY1LT4h2XjQeI/DFhZ61p95pyvZTXSW3meVI25Q6bSWOCpI47Ci9xqV7oxvhZeeL/AA7qV34R8U6rb3ej2kZNjqN05iuLiQsDt2SMWwMtnIBG0DODSjfqKN07M9QhvbO4lkgt7uGWSLHmIkgZkyMjIHTIq7mhNQAUAHU0AMeGKT/WRhvTcOlADfs4H+qlkT2DZH5NkUAIBdKRkxyj3+U/4fyoArfb73/nwm/OgD//1P0ljhRCXGS54Lnkn+lAHN2HgLTtO8c6h48jv717vULdbeSB5MwooC9B1/hHBJHXFK1tSeVLUj8e+OJPB+m/aNN0K6127WaOKS1tclolbPzOQp2jOMA4ySKG7bDlLlWg3xLrOnaboP8Aa/jzVF0vTZwsTWse4kmQcxuwG9uM5ChRgHOaL9xN2Wpn+GfhJ4L0DxPb+NfC3nWyG18qG2hlzAysv3+cscqcgFiOhFJR1ugjBbo7ODU9NuL2bTbfULWS7tQDPAkwMkYYZBZRyAR3IqrjumYN54V1258eWfimPxVcxaVBbNBLpQX5JWIbk84zyCTjOVFKzE03K/Q43WPDeva58cdOfVNEkufDGl2fnWrtABbwy7Ccg9AxkC8Hngcbealp3Ia9/XY7qO78B+DFe1jv9D0RbiRpmjM8Vv5jnGWwSOTge4xVaI091GBqHx3+FlvBcHS/GGn6zdxI7Ja6Y5u3eQA4X9wr4yeOelF+wr9jh/CXxT8Wf8JDfeJ/HVn4os9LktwkWl2nhfUruOMjbiQSRwFccMc7j1wQMVMXrqQnJO7Op1b456eumi68M+CfGGqSuyYSbw7qFlCUJ+95s0IQ9eADkk57VTfYpy00NGD43eB/lXUl1/SpSuSmoeHdQgAP1aAKw9xRe473RzmoftCeHIPH+maJDr2iW2hTQM13d6jObSRJMHAUTbOny4ABzk9MUr6ibfMdWPjB4Fn8Tab4X03WoNTn1RN0c9hKlxbp1ADOrEAnaenpzT5lsNyWx2caRRr5cSKiDI2qAAPwHSnsUvI8w8Ya74j1H4paF4L8LXlzpQi/4mGoz/Z90N1GAPl3DqAoZecDLAZ4qXe+hm9ZaHY2XjXwzrfiXUvBdvcNNqWnRiS5heI7CMgEBjwcblz7mnctNN2KWk/DDwnouu6n4i06K8hu9WbdJ5V08aL3OzYVxk9ck+2BxRy2FypGjDte8n07S/FgluLTBmtZmjnMWem4DEgz7tSBGX4p8fL4Hn0228RLbTNqk5t7c2+9HZsgElCGUD5hnLj71O9tBOVnY3hrkMPF9Y39p6mSAuo9y8e5cfUjHegq5atNRsL8FrG9gnA4PlSBsexx0phdFigYUALtb+6fyoA//9X9Iby9trKMSXMgBc7UCgs7t6Ko5P4UBcqCLUtS5uHaxtu0KMPOYf7Tg/J/uqc/7XagWrLtta29lCtvaQrDGuQFUfn9T65/WiwWK2saHo/iGybTda0+G8tmYP5Uq7lBHQ/Uf1NFrjdnozg9U+K9t4N8cXPhzxi+jaJoMdojadNJdL59w2FwscP3m6t8qpxjnqKlPUhN3scbqFt4m1/xoPGXwV8K6vZPcsz6jqGtRtYWl4u5CEVJv3+0lfmYQ4PykdKVtbktNvQPiH4p+PFhrGm+GLPWNLttV1hS0Nto+meYEXkc3VyxViCMnEKgDBJINOTYSlJaGZqfw/j1f4teHfCWtavr2tGGzF5q66zezXNrK23cwjQFYgMZXIjUZ7k4ATvzBd8x6JqPwl+B+oeGJhp/h7w1pFgzAnUtMgtoGUhu8wX14O7PXFN2ZckmrnR634f1eLwQNA+H+qR6dd29vFBZ3Ep3AIuAckA8lRjODyc+9O1loHLp7pxVj8MfH2v+JdRn+Inidp9Mm0wWASxmMa3BKgFinRcMCxOBubHGMgyotkRhJ7s9J8MeHrDwnoNn4f01pmtrKMxo0z7nbJzyfxJ7D6dKvyNErIu/2hZfbjp326D7WF84wCUeaE7NtPOM9+lA7q5zlvofjJ/HOoahqWs2l14ZmtVjt9OaIFlkwuScjpkMc5OcgYGAalLUnladx+q/DD4b62HXVvAXh673kEtLpsRbPY525BHqD3qrD5UzzLV/AGn+Adf03TNE1Dx0sfiC82rJpWoGKKxThVDjDCXaG6v0VTz6xZJ6EWUWds3gr4maSfM8P/F2e828GDxDpFvdIQO2+3Fu47ckt+NWXY4zx/dfGrRLCTXIPDvhPSp45Ee/1fTdTQPdoAAkbrcxRkDOODI+eMGpehEubc66D42+HLeGObxXoniLw2kiiT7Tf6Y8lqQeQ32q3MsODkEEuOO9O5SlbcveDrb4X3uv6j418Fa3peoXurjbPNZaglwp5ydoViFOQMigEknc6y80vS9RaB9Q061uTbOJoPOhV/KccBl3AkHHTGPzFFirIs9ucc5z/WmBVu9L0y+IkvLCCZx913jBYfRuopWCxW/sTyebDVb+3x1Uy+cv0xIGIH0IpisLnxFb8EWN+OvG63b8vmUn3yv4UAZP/CXy/wDQla7/AN+j/hQM/9b9D9R03ULbSb+50N459be2f7PcXhyPMIOAccKucfKoxQxNaaGP4G8Wvc6VJpni7XNLfxFpKM+qpDIoWAbjgt/CMAgNg4BJ5qU+4ovuZr/F2LX5ZLH4WeG7vxg8bGJ7+GVbbSonHUNdvkSEdxCshHene4XvsVdX0HxzfaXda38SfiG+k6VaxNPPpnhWNoPkAyQ13IDO/AIzGITn86Qbak/whm+EV5Ddy/DXSbe3uk2G9eSFvtjs2SDLM5Lyng/MWP8ASmmmCkmaHh/xX4ok8S6/p3iWwtFsLacLpn2ItNNImTy6oSV42n5guCSKL6gpO50bXerXOGtdEWLHR7yZUPPcLGH/ACJU0DZyfxDuviFaWdnDoekwasmoT/Zb6O2hKNFAwwSWZjwem7GBjNJ36EzcrEU3wg8DaT4KvPDt9faiuiK7385kucbWVQS+QvZV6Ht70cugnCKVjlfD+gP4h8f+H/Efgq41CfwZaac1mZRqUqGJkV18oqzBx0Q4HGADmptd3QkryViXxhp3iv4dWGveIdI8V3GuStdRXL2NzfSKbG0JbJwJMuSSi54GB0PNN6bCd4rc6K/vPFviTwNp/iT4U6xN9rvGR9t84YCPlXA8wHBDD8lOM8U9Wrot3cbo0bX4bCDxWnj06sX11rRbeVpYg0X3QpwqFPTA7YA4zT5eoctnc6T/AIqOPqumXPuWkh/TD0ytSjrXiweGNLm1nxFpc9tZ2yhpZ4pY5ETJwBgkNySPur3FK9gvYpXvi7w/r/hG5vrLXbnTLW9tJDDqLW8sSxAqf3gcgDI9iOlK90Ju6F+GDWcfg+wtLPxePEflhx9tLYZxuY4IzuXHI5ORjmmmEdih8Y/BGu+PPCw0bQdRht5I5vPkimXAnCjhd2CVOce3rRNXWgpxclobXgCw1zS/Bul6b4khtIb61gEMkdsf3SKpIQDHH3MA44z0oW2o4rSzIdc+F/w38SzNceIPAXh/ULiTrNcadE0h99+NxPv1p2Ksjgdf0XwP4E1+08PaF4+8XeEru8ge5jW1v3vLKONQfvRXYljQfIfuqvA9MGk3YiVoml4au/idrGlpr3g/4meH/FFmztH5eraI9m7MvUGW3cBT7mE+uO9Fxp82xqjx18RdM/d+IPg5qNwy8GbQdVtbyI+4E7W8n4FPxNFx3AfG/wAC2/7vxB/bfh64GR5Gr6PdQHPoH2GNz6bXINFxOSRs+HviR4D8VOkGheLdLurp13m0FyguVH+1ETvX8RRdDUlLY6XB9DTGf//X+4pfFPxI8axm28GeEm8N2EwIOs+I0KyhD/FDYxsJSx9ZWix3DDilqJ6lPw7+zl8NtIKT65preIrtDu36p+8iLZBz5GRGcEA5ZSQehFCjYlQS1On8F+AtE+Hg1abT9QuWh1Cf7U4uZAEgHPCgAADBPJyeOTximlYajysg8afETSvDmhXmpvaG9toUXzdwGGV22AhScupJ6gbP9qpbsEpJEnhPwv4Zn02PXLCyjjh1iKK5eKC3FrG4K5UNGoycA9HLAHmmrdASW6OqgggtolhtoY4ol4VUTao+mOlMdh0kiRDMjqn+8cUDIzdIfuCST02ocfnQBzms3/jA+J7DTbPQLWfw/dQyC/mnPzg88HDbQOnBBzk8CkyWaui2On6dp0Vp4esdPtrEZMa2rjy+TyQVHPPU0LyGrLYx4fh7okXi3UPGhsxJf6nb/ZZ45Zt0DIQoI2Fe4UA549utK2txcqvc6K3iazgS3tbGCKGNQqRxHaqjjgKFAH/6/WqWg7EhmkA+a1k+oKn+ZoGH2qPo6yr2IMZH8qAIb2DStYtZdN1CK2u7e5XZJBIAyyDvkenSgN9xDo2kvpLaH9gg/s9oDbm3CYjMR4K4GABzS02DRqxnweC/DmnaKNE0rRrO2hhjZLcNCspic5O75s5IY56/jRZWJcVsjnfA/h3UfDNhDoPi3xybjW7iaWW3EV62WiGOFjk4bHJzs43Y6AUlpuJK27Ot+y65b82+rxTL6XVuCx/4FGVH/jpp6laiG91qAEXGipMD3s7lST9RJsx+Zphc5DxE+t33jnRb9WsbbQreKWO+XUbAiUl9wYLIUKgEbRhWA4Od3SpauS1eVzstCsdC0/TktPDlvZQWKklI7QJ5YJOSfl4zzyfami9FsaFMCO6t7e8t5bS7gSaCdCkkcgyrqeCCD2OcUMLLqYGo/Dj4f6tpkei6p4K0S8soRiOG4sInWP3UFSVPuKEkhJJbEH/Cqvh3/wBCfZfrQM//0P0oGc8sfXrnB/GgDkPGWp+KNS0eew+Gc9u+rRSokkz7TFCo5K5Py7/YA4yc4yDUu72Jk7rQ2tP0iZ4Le48Qype3qqrOAMQRy45MceMDkcMctz1FNaDXmZrfD3w7/wAJnP47m8+S9nthayRySgwFAAN23HXaMY6d8d6Vri5Ve5k6H4d17wJofiCfTdWvvE15dStd2sVyvyRnBwgyw3HnnGAcAADrRsKzjdnSeG7nW9W0Ozv/ABDZtp19NHuntlIyhycZOSRkAHAPGQO2aaKjtqZvhrwpr+j+Kdc1nU/E76hY6g4NnaMn/HsM9Mk+mBx1xk80JO4opp3bOrxj/CmUV9QsYNTsLnTbrd5N3C8Em1ip2spU4I5BwetAMyvBng/SvAugxeHtGluZLaJ2kDXD73ZmPJJAAH4DFJKwkrG7TGM82PzDD5i+YBuKZ5A9cUXAfnHPpQBy7w+Ph8QFlS5sP+ESFrtaIj9952OO2Qc477dvbNTrcnVyKWtfFnwLoXiK78L69qBtLi0gWaSWaA+ScgEKuASWwQcBcdcHijmXUTmk7MzT8W/Dt346sfBOg6Rd3/2oK015bHbFErIHVwB95cEEnIAHTPSi+ugufU6HXPB15q3iHR9cg8R3lomlMWa2UsRNnscOox67g2R0xTa1Kcbu4up+BtG1TxPp/i++s0n1LS0KW8gd0CjJPK5IbBJIPHP4UWG43dzoPPcf623kH+0MMP05pjAXMGeZdh9Gyp/I0Ac7B4906fx7P8P10+9W7t7Rbk3DR5hxgHAPXocZ6buKXNfQnmu7G1PomkXUhmm02ATf89VTbIPo4+b9aLDsZevadr9jo15P4Rvrh9SjiZrW3upRJFJIOgJky3I/2gPpQ9hO9tCLw1dePDoVnP4m0zTTqDJm4igmMZQ5IH95SSoGfmA5x0FJX6gnK2pqf25HCMX+n31pz/HCXX8WjLKB9SKY7jf+El8O/wDQbsv+/i/40xn/0f0G8QR67caFfXaWzSzxW8j22mwS7TK+07Q8nUknHyjA9d1J7CexyXwI8CX/AIV8Pz6zr63kWq6zIZZ4LjjylVm2/L2Yg5Pfn2pRT3IpppanpRmLkrbqJCOrZwoqjQQQbjvnbzD16YA/D/HP4UAS4GMDj6UAL+FAHK6dqHj1/GurWOo6JaJ4egt1awuUceZLJ8o2nnv8/UDGB1BpXZMea4/wBrvirxBpM954t8NHRLuO6eOODJO+MAYbnnqSPQ4yOKEEW3udPTKCgAoA5m38A6Rb+Orjx/HdXpvrm2Fs0Rl/dBQAMgf8BHHTqcUrWdyeVXudNTKOX8EeBU8GNqrLrl9qX9qXZuj9qOfKzngepOeT3wKSQkrDfFfwy8H+NdSstV8Q6cZ57NSo2OUEik52vj7wzyPqaTjcTimdDZaZpunbfsFhb25WNIQY4gpEa8KuQOgxgDOKpaDSSMCx8V6vc+O73wlJ4UvIdOtLZZotTb/VSMQpKj5QO5HBLZU5GDST1EndnUUyg/lQAm0HggE/SgDD8V6/4f8ABWjTeI9Y82C2hKIWgQ7yWbAXCnnnnmjRCbSL2l3kWq6Zaarpl2z215ClxD5kfVHGRwMEcHnmgFrqWd9wn3oFYeqv/Q4/nQMpa9JqUujXsOj3K2WoPA62s1xGSkcmDtJwCMA49aGJ6rQr+DbfxHZ+GrG38WajBf6rGjLPcQ42tySv1+XAPHXmktNwSsrM0/sOm/8APlD/AN8imPlR/9L9JXlEeF5Lnoq9TQAwxvL80547RqePxPWgCbgDGBhenoKADuR3HX2oAKAOX8eSeP4rOx/4V/BYy3DXSC7F0QFEOOSM9Rnr39KTZMr9DqPf26UygoAKACgAoAKACgAoAKACgBOep54AJxzQAtAHL+ONE8Z6ydL/AOEQ8Sx6T9muhLeb13edH6Dg5xzwcA55IwKTTJkm7WOoPXpjPUfn+FMohu7W1v7d7W9tYriCTho5UDqw91NANXJEjSNVijRURFCqqjAAHReOABxgCjYVhtwbgW8ptBGZ9jeUJM7d+OM47ZxSew/Q5X4ax/ERNLupPiNNbteyXLPbpDszHF6EoMHnOBycdTSV+pMOa2p1TW8LncEAY/xKdp/MVRRH9lh/uT/9/wA0Af/T/SaOJIx8o+bue5oAdQADrQBy/g7wHa+Dr/Wr+21a+vDrNz9pZLh8rEcn7vvyRn0CiklbUlRS1OoplBQAUAFABQAUAFABQAUAFABQBy/h7SfG1n4q1y+17xBb3mjXTA6baohDwj34GOODydx546UknclJp3Z1FMoKACgAoAKACgAoAKAOY1zx9p2geLNG8JXFhey3GtZ8qaKLMaYOPmOfUHOOg5NJslys7WOnx70yj//U/SigAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKAEKhiGKgsDwTyR680BYXBoA/9X9KKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoABQA7A9BQB//2Q==)\n",
343 |         "\n",
344 |         "HINT: First create a function that calculates the confidence intervals and then plot yhat, y2 of the forward model. Finally, plot the data of individual participants."
345 |       ]
346 |     },
347 |     {
348 |       "cell_type": "code",
349 |       "metadata": {
350 |         "colab_type": "code",
351 |         "id": "ii0H9GDwv-ha",
352 |         "colab": {}
353 |       },
354 |       "source": [
355 |         "import numpy as np\n",
356 |         "import matplotlib.pyplot as plt\n",
357 |         "\n",
358 |         "#CODE HERE"
359 |       ],
360 |       "execution_count": null,
361 |       "outputs": []
362 |     },
363 |     {
364 |       "cell_type": "markdown",
365 |       "metadata": {
366 |         "colab_type": "text",
367 |         "id": "yM4z1BtyWwiF"
368 |       },
369 |       "source": [
370 |         "## **TASK 6:** Apply the normative model to Nordan's data and the dementia patients."
371 |       ]
372 |     },
373 |     {
374 |       "cell_type": "code",
375 |       "metadata": {
376 |         "colab_type": "code",
377 |         "id": "eVTYxKjvWBvm",
378 |         "colab": {}
379 |       },
380 |       "source": [
381 |         "#CODE HERE"
382 |       ],
383 |       "execution_count": null,
384 |       "outputs": []
385 |     },
386 |     {
387 |       "cell_type": "markdown",
388 |       "metadata": {
389 |         "colab_type": "text",
390 |         "id": "LFnHCy0XVVwl"
391 |       },
392 |       "source": [
393 |         "## **TASK 7:** In which hyppocampal subfield(s) does Nordan deviate extremely? \n"
394 |       ]
395 |     },
396 |     {
397 |       "cell_type": "markdown",
398 |       "metadata": {
399 |         "colab_type": "text",
400 |         "id": "jUhmPAOZB0kp"
401 |       },
402 |       "source": [
403 |         "No coding necessary just create a presentation which includes recommendations to Nordan and his family. \n",
404 |         "Use i) |Z| > 3.6 ii) |Z| > 1.96 as definitions for extreme normative deviations."
405 |       ]
406 |     },
407 |     {
408 |       "cell_type": "markdown",
409 |       "metadata": {
410 |         "colab_type": "text",
411 |         "id": "AqQhxN9pEFGC"
412 |       },
413 |       "source": [
414 |         "## **TASK 8 (OPTIONAL):** Implement a function that calculates percentage change. "
415 |       ]
416 |     },
417 |     {
418 |       "cell_type": "markdown",
419 |       "metadata": {
420 |         "colab_type": "text",
421 |         "id": "weASKkZNBMW5"
422 |       },
423 |       "source": [
424 |         "Percentage change = $\\frac{x1 - x2}{|x2|}*100$"
425 |       ]
426 |     },
427 |     {
428 |       "cell_type": "code",
429 |       "metadata": {
430 |         "colab_type": "code",
431 |         "id": "0vIt9fd7EmJx",
432 |         "colab": {}
433 |       },
434 |       "source": [
435 |         "#CODE HERE"
436 |       ],
437 |       "execution_count": null,
438 |       "outputs": []
439 |     },
440 |     {
441 |       "cell_type": "markdown",
442 |       "metadata": {
443 |         "colab_type": "text",
444 |         "id": "1Mypo4xrT7ID"
445 |       },
446 |       "source": [
447 |         "## **TASK 9 (OPTIONAL):** Visualize percent change\n",
448 |         "\n",
449 |         "\n",
450 |         "\n"
451 |       ]
452 |     },
453 |     {
454 |       "cell_type": "markdown",
455 |       "metadata": {
456 |         "colab_type": "text",
457 |         "id": "1I1Kwv5iBUJj"
458 |       },
459 |       "source": [
460 |         "Plot the prercentage change in Yhat of the forward model in reference to age 20. Do that for both sexes seperately."
461 |       ]
462 |     },
463 |     {
464 |       "cell_type": "code",
465 |       "metadata": {
466 |         "colab_type": "code",
467 |         "id": "1DoJid7R1DBX",
468 |         "scrolled": true,
469 |         "colab": {}
470 |       },
471 |       "source": [
472 |         "#CODE HERE"
473 |       ],
474 |       "execution_count": null,
475 |       "outputs": []
476 |     }
477 |   ]
478 | }


--------------------------------------------------------------------------------
/2020_materials/tasks_key/key_cpc_machinelearning.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "accelerator": "GPU",
  6 |     "colab": {
  7 |       "name": "key_cpc_machinelearning.ipynb",
  8 |       "provenance": [],
  9 |       "collapsed_sections": [],
 10 |       "toc_visible": true
 11 |     },
 12 |     "kernelspec": {
 13 |       "display_name": "Python 3",
 14 |       "language": "python",
 15 |       "name": "python3"
 16 |     },
 17 |     "language_info": {
 18 |       "codemirror_mode": {
 19 |         "name": "ipython",
 20 |         "version": 3
 21 |       },
 22 |       "file_extension": ".py",
 23 |       "mimetype": "text/x-python",
 24 |       "name": "python",
 25 |       "nbconvert_exporter": "python",
 26 |       "pygments_lexer": "ipython3",
 27 |       "version": "3.7.4"
 28 |     }
 29 |   },
 30 |   "cells": [
 31 |     {
 32 |       "cell_type": "markdown",
 33 |       "metadata": {
 34 |         "colab_type": "text",
 35 |         "id": "hC8rsih7PHa_"
 36 |       },
 37 |       "source": [
 38 |         "# **CPC TUTORIAL ON NORMATIVE MODELING**\n",
 39 |         "\n",
 40 |         "\n",
 41 |         "Created by \n",
 42 |         "\n",
 43 |         "Mariam Zabihi [@m_zabihi](https://twitter.com/m_zabihi)\n",
 44 |         "\n",
 45 |         "Saige Rutherford [@being_saige](https://twitter.com/being_saige)\n",
 46 |         "\n",
 47 |         "Thomas Wolfers [@ThomasWolfers](https://twitter.com/ThomasWolfers)\n",
 48 |         "_______________________________________________________________________________"
 49 |       ]
 50 |     },
 51 |     {
 52 |       "cell_type": "markdown",
 53 |       "metadata": {
 54 |         "colab_type": "text",
 55 |         "id": "irl08XE1AG9v"
 56 |       },
 57 |       "source": [
 58 |         "## **Background Story**\n",
 59 |         "\n",
 60 |         "Morten and Ingrid are concerned about the health of their father, Nordan. He recently turned 65 years. A few months ago he could not find his way home. Together, they visit a neurologist/psychiatrist to conduct a number of cognitive tests. However, those tests were inconclusive. While Nordan has a relatively low IQ it could not explain his trouble returning home.\n",
 61 |         "\n",
 62 |         "Recently, the family heard about a new screening technique called normative modeling with which one can place individuals in reference to a population norm on for instance measures such as brain volume. Nordan would like to undertake this procedure to better know what is going on and to potentially find targets for treatment. Therefore, the family booked an appointment with you, the normative modeling specialist. To find out what is going on you compare Nordan's hyppocampus to the norm and to a group of persons with Dementia disorders, who have a similar IQ, age as well as the same sex as Nordan.\n",
 63 |         "\n",
 64 |         "Do your best to get as far as you can. However, you do not need to feel bad if you cannot complete everything during the tutorial.\n"
 65 |       ]
 66 |     },
 67 |     {
 68 |       "cell_type": "markdown",
 69 |       "metadata": {
 70 |         "colab_type": "text",
 71 |         "id": "udo6yANOCpvp"
 72 |       },
 73 |       "source": [
 74 |         "## **Task 0:** Load data and install the pcntoolkit"
 75 |       ]
 76 |     },
 77 |     {
 78 |       "cell_type": "code",
 79 |       "metadata": {
 80 |         "colab_type": "code",
 81 |         "id": "yawDkTLoKYRu",
 82 |         "colab": {}
 83 |       },
 84 |       "source": [
 85 |         "#install normative modeling\n",
 86 |         "!pip install pcntoolkit"
 87 |       ],
 88 |       "execution_count": null,
 89 |       "outputs": []
 90 |     },
 91 |     {
 92 |       "cell_type": "markdown",
 93 |       "metadata": {
 94 |         "colab_type": "text",
 95 |         "id": "EHDKe2ohCxP4"
 96 |       },
 97 |       "source": [
 98 |         "**Option 1:** Connect your Google Drive account, and load data from Google Drive. Having Google Drive connected will allow you to save any files created back to your Drive folder. This step will require you to download the csv files from [Github](https://github.com/saigerutherford/CPC_2020/tree/master/data) to your computer, and then make a folder in your Google Drive account and upload the csv files to this folder. "
 99 |       ]
100 |     },
101 |     {
102 |       "cell_type": "code",
103 |       "metadata": {
104 |         "colab_type": "code",
105 |         "id": "0SMVyxNZqmlv",
106 |         "colab": {}
107 |       },
108 |       "source": [
109 |         "from google.colab import drive\n",
110 |         "drive.mount('/content/drive')\n",
111 |         "\n",
112 |         "#change dir to data on your google drive\n",
113 |         "import os\n",
114 |         "os.chdir('drive/My Drive/name-of-folder-where-you-uploaded-csv-files-from-Github/') #Change this path to match the path to your data in Google Drive\n",
115 |         "\n",
116 |         "# code by T. Wolfers"
117 |       ],
118 |       "execution_count": null,
119 |       "outputs": []
120 |     },
121 |     {
122 |       "cell_type": "markdown",
123 |       "metadata": {
124 |         "colab_type": "text",
125 |         "id": "Bst55nPJDHKb"
126 |       },
127 |       "source": [
128 |         "**Option 2:** Import the files directly from Github, and skip adding them to Google Drive."
129 |       ]
130 |     },
131 |     {
132 |       "cell_type": "code",
133 |       "metadata": {
134 |         "colab_type": "code",
135 |         "id": "zuuSkJwPDRrv",
136 |         "colab": {}
137 |       },
138 |       "source": [
139 |         "!wget -nc https://raw.githubusercontent.com/saigerutherford/CPC_2020/master/data/cpc_camcan_demographics.csv\n",
140 |         "!wget -nc https://raw.githubusercontent.com/saigerutherford/CPC_2020/master/data/cpc_camcan_demographics_nordan.csv\n",
141 |         "!wget -nc https://raw.githubusercontent.com/saigerutherford/CPC_2020/master/data/cpc_camcan_features.csv\n",
142 |         "!wget -nc https://raw.githubusercontent.com/saigerutherford/CPC_2020/master/data/cpc_camcan_features_nordan.csv\n",
143 |         "    \n",
144 |         "# code by S. Rutherford"
145 |       ],
146 |       "execution_count": null,
147 |       "outputs": []
148 |     },
149 |     {
150 |       "cell_type": "markdown",
151 |       "metadata": {
152 |         "colab_type": "text",
153 |         "id": "kvSiRjysuGkV"
154 |       },
155 |       "source": [
156 |         "## **TASK 1:** Format input data"
157 |       ]
158 |     },
159 |     {
160 |       "cell_type": "markdown",
161 |       "metadata": {
162 |         "colab_type": "text",
163 |         "id": "N2Bon1mJAVjJ"
164 |       },
165 |       "source": [
166 |         "You have four files. The features and demographics file for the normsample and two files of the same name for Nordan your test sample. As one of your coworkers has done the preporcessing and quality control there are more subjects in the demographics file than in the features file of the norm sample. Please select the overlap of participants between those two files. \n",
167 |         "\n",
168 |         "\n",
169 |         "*Question for your understanding:*\n",
170 |         "\n",
171 |         "1) Why do we have to select the overlap between participants in terms of featrues and demographics?"
172 |       ]
173 |     },
174 |     {
175 |       "cell_type": "code",
176 |       "metadata": {
177 |         "colab_type": "code",
178 |         "id": "_RSfxGWku6fU",
179 |         "colab": {}
180 |       },
181 |       "source": [
182 |         "import pandas as pd\n",
183 |         "\n",
184 |         "# read in the files.\n",
185 |         "norm_demographics = pd.read_csv('cpc_camcan_demographics.csv',\n",
186 |         "                                sep= \",\",\n",
187 |         "                                index_col = 0)\n",
188 |         "norm_features = pd.read_csv('cpc_camcan_features.csv',\n",
189 |         "                            sep=\",\",\n",
190 |         "                            index_col = 0)\n",
191 |         "\n",
192 |         "# check columns through print [there are other better options]\n",
193 |         "print(norm_demographics)\n",
194 |         "print(norm_features)\n",
195 |         "\n",
196 |         "# find overlap in terms of participants between norm_sample_features and \n",
197 |         "# norm_sample_demographics\n",
198 |         "\n",
199 |         "norm_demographics_features = pd.concat([norm_demographics, norm_features],\n",
200 |         "                                       axis = 1,\n",
201 |         "                                       join = 'inner') # inner checks overlap\n",
202 |         "                                                       # outer combines\n",
203 |         "print(norm_demographics_features)\n",
204 |         "\n",
205 |         "# code by T. Wolfers"
206 |       ],
207 |       "execution_count": null,
208 |       "outputs": []
209 |     },
210 |     {
211 |       "cell_type": "markdown",
212 |       "metadata": {
213 |         "colab_type": "text",
214 |         "id": "fUufLg4lQWdn"
215 |       },
216 |       "source": [
217 |         "## **TASK 2:** Prepare the covariate_normsample and testresponse_normsample file. "
218 |       ]
219 |     },
220 |     {
221 |       "cell_type": "markdown",
222 |       "metadata": {
223 |         "colab_type": "text",
224 |         "id": "g1i1qp7AAh1Q"
225 |       },
226 |       "source": [
227 |         "As mentioned in the introductory presentation those files need a specific format and the entries need to be seperated by spaces. Use whatever method you know to prepare those files based on the data provided in TASK 1. Save those files in .txt format in your drive. Also get rid of the column names and participant IDs.\n",
228 |         "\n",
229 |         "Given that we only have limited time in this practical we have to make a selection for the features based on your prior knowledge. With the information in mind that Nordan does not remember his way home, which subfield of the hyppocampus is probably a good target for the investigations?\n",
230 |         "Select a maximum of four hyppocampal regions as features.\n",
231 |         "\n",
232 |         "NOTE: Normative modeling is a screening tool we just make this selection due to time constraints, in reality we build these models on millions of putative biomarkers that are not restricted to brain imaging.\n",
233 |         "\n",
234 |         "\n",
235 |         "*Qestions for your understanding:*\n",
236 |         "\n",
237 |         "2) What is the requirement for the features in terms of variable properties (e.g. dicotomous or continous)? 3) What is the requirement for the covariates in terms of these properties? 4) What are the requirements for both together? 5) How does this depent on the algorithm used?"
238 |       ]
239 |     },
240 |     {
241 |       "cell_type": "code",
242 |       "metadata": {
243 |         "colab_type": "code",
244 |         "id": "lzt6llxyRPyY",
245 |         "colab": {}
246 |       },
247 |       "source": [
248 |         "# perpare covariate_normsample for sex and age\n",
249 |         "covariate_normsample = norm_demographics_features[['sex',\n",
250 |         "                                                   'age']] \n",
251 |         "\n",
252 |         "covariate_normsample.to_csv('covariate_normsample.txt',\n",
253 |         "                            sep = ' ',\n",
254 |         "                            header = False, \n",
255 |         "                            index = False)\n",
256 |         "\n",
257 |         "# perpare features_normsample for relevant hyppocampal subfields\n",
258 |         "features_normsample = norm_demographics_features[['left_CA1', \n",
259 |         "                                                 'left_CA3',\n",
260 |         "                                                 'right_CA1',\n",
261 |         "                                                 'right_CA3']]\n",
262 |         "\n",
263 |         "features_normsample.to_csv('features_normsample.txt', \n",
264 |         "                           sep = ' ', \n",
265 |         "                           header = False, \n",
266 |         "                           index = False)\n",
267 |         "\n",
268 |         "# code by T. Wolfers"
269 |       ],
270 |       "execution_count": null,
271 |       "outputs": []
272 |     },
273 |     {
274 |       "cell_type": "markdown",
275 |       "metadata": {
276 |         "colab_type": "text",
277 |         "id": "irR4FAIvQ8ds"
278 |       },
279 |       "source": [
280 |         "## **TASK 3:** Estimate normative model\n"
281 |       ]
282 |     },
283 |     {
284 |       "cell_type": "markdown",
285 |       "metadata": {
286 |         "colab_type": "text",
287 |         "id": "XV61hQUoA1Kd"
288 |       },
289 |       "source": [
290 |         "Once you have prepared and saved all the necessary files. Look at the pcntoolkit for running normative modeling. Select an appropritate method set up the toolkit and run your analyses using 2-fold cross validation in the normsample. Change the output suffix from estimate to '_2fold'. \n",
291 |         "\n",
292 |         "HINT: You primarily need the estimate function. \n",
293 |         "\n",
294 |         "SUGGESTION: While this process is running you can go to the next TASK 4, you will have no doubt when it is correctly running.\n",
295 |         "\n",
296 |         "*Question for your understaning:*\n",
297 |         "\n",
298 |         "6) What does cvfolds mean and why do we use it? 7) What is the output of the estimate function and what does it mean?"
299 |       ]
300 |     },
301 |     {
302 |       "cell_type": "code",
303 |       "metadata": {
304 |         "colab_type": "code",
305 |         "id": "yRTusEg6SRNL",
306 |         "colab": {}
307 |       },
308 |       "source": [
309 |         "import pcntoolkit as pcn\n",
310 |         "\n",
311 |         "# run normative modeling using 2-fold cross-validation\n",
312 |         "\n",
313 |         "pcn.normative.estimate(covfile = 'covariate_normsample.txt', \n",
314 |         "                       respfile = 'features_normsample.txt',\n",
315 |         "                       cvfolds = 2,\n",
316 |         "                       alg = 'gpr',\n",
317 |         "                       outputsuffix = '_2fold')\n",
318 |         "\n",
319 |         "# code by T. Wolfers"
320 |       ],
321 |       "execution_count": null,
322 |       "outputs": []
323 |     },
324 |     {
325 |       "cell_type": "markdown",
326 |       "metadata": {
327 |         "colab_type": "text",
328 |         "id": "Nonuk7d_SNM6"
329 |       },
330 |       "source": [
331 |         "## **TASK 4:** Estimate the forward model of the normative model\n"
332 |       ]
333 |     },
334 |     {
335 |       "cell_type": "markdown",
336 |       "metadata": {
337 |         "colab_type": "text",
338 |         "id": "fmn4TD_tBE70"
339 |       },
340 |       "source": [
341 |         "In order to visulize the normative trajectories you first need to run the forward model. To this end you need to set up an appropriate covariate_forwardmodel file that covers the age range appropriately for both sexes. Save this file as .txt . Then you can input the files you made in TASK 1 as well as the file you made now and run the forward model using the appropriate specifications.\n",
342 |         "\n",
343 |         "*Question for your understaning:*\n",
344 |         "\n",
345 |         "8) What is yhat and ys2? 9) Why does the output of the forward model does not inlcude the Z-scores?"
346 |       ]
347 |     },
348 |     {
349 |       "cell_type": "code",
350 |       "metadata": {
351 |         "colab_type": "code",
352 |         "id": "22U-knkWSPsZ",
353 |         "colab": {}
354 |       },
355 |       "source": [
356 |         "# create covariate_forwardmodel.txt file\n",
357 |         "covariate_forwardmodel = {'sex': [0, 0, 0, 0, 0, 0, 0,\n",
358 |         "                                  1, 1, 1, 1, 1, 1, 1],\n",
359 |         "                          'age': [20, 30, 40, 50, 60, 70, 80,\n",
360 |         "                                  20, 30, 40, 50, 60, 70, 80]}\n",
361 |         "covariate_forwardmodel = pd.DataFrame(data=covariate_forwardmodel)\n",
362 |         "\n",
363 |         "covariate_forwardmodel.to_csv('covariate_forwardmodel.txt', \n",
364 |         "                           sep = ' ', \n",
365 |         "                           header = False, \n",
366 |         "                           index = False)\n",
367 |         "\n",
368 |         "# estimate forward model\n",
369 |         "pcn.normative.estimate(covfile = 'covariate_normsample.txt', \n",
370 |         "                       respfile = 'features_normsample.txt',\n",
371 |         "                       testcov = 'covariate_forwardmodel.txt',\n",
372 |         "                       cvfolds = None,\n",
373 |         "                       alg = 'gpr',\n",
374 |         "                       outputsuffix = '_forward')\n",
375 |         "\n",
376 |         "# code by T. Wolfers"
377 |       ],
378 |       "execution_count": null,
379 |       "outputs": []
380 |     },
381 |     {
382 |       "cell_type": "markdown",
383 |       "metadata": {
384 |         "colab_type": "text",
385 |         "id": "wxeZlXshQ7eS"
386 |       },
387 |       "source": [
388 |         "## **TASK 5:** Visualize forward model"
389 |       ]
390 |     },
391 |     {
392 |       "cell_type": "markdown",
393 |       "metadata": {
394 |         "colab_type": "text",
395 |         "id": "BVodlChrBg25"
396 |       },
397 |       "source": [
398 |         "Visualize the forward model of the normative model similar to the figure below.\n",
399 |         "\n",
400 |         "![1-s2.0-S245190221830329X-gr2.jpg](data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAcQBxAAD/4QDoRXhpZgAATU0AKgAAAAgABwESAAMAAAABAAEAAAEaAAUAAAABAAAAYgEbAAUAAAABAAAAagEoAAMAAAABAAIAAAExAAIAAAAcAAAAcgEyAAIAAAAUAAAAjodpAAQAAAABAAAAogAAAAAAAABxAAAAAQAAAHEAAAABQWRvYmUgUGhvdG9zaG9wIENTNSBXaW5kb3dzADIwMTk6MDE6MTAgMjE6MDA6MDYAAAOQBAACAAAAFAAAAMygAgAEAAAAAQAAAMmgAwAEAAAAAQAAAHIAAAAAMjAxODoxMjoxMSAxNDozNToxNwD/4RGtaHR0cDovL25zLmFkb2JlLmNvbS94YXAvMS4wLwA8P3hwYWNrZXQgYmVnaW49Iu+7vyIgaWQ9Ilc1TTBNcENlaGlIenJlU3pOVGN6a2M5ZCI/PiA8eDp4bXBtZXRhIHhtbG5zOng9ImFkb2JlOm5zOm1ldGEvIiB4OnhtcHRrPSJYTVAgQ29yZSA1LjQuMCI+IDxyZGY6UkRGIHhtbG5zOnJkZj0iaHR0cDovL3d3dy53My5vcmcvMTk5OS8wMi8yMi1yZGYtc3ludGF4LW5zIyI+IDxyZGY6RGVzY3JpcHRpb24gcmRmOmFib3V0PSIiIHhtbG5zOnhtcE1NPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvbW0vIiB4bWxuczpzdFJlZj0iaHR0cDovL25zLmFkb2JlLmNvbS94YXAvMS4wL3NUeXBlL1Jlc291cmNlUmVmIyIgeG1sbnM6c3RFdnQ9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9zVHlwZS9SZXNvdXJjZUV2ZW50IyIgeG1sbnM6eG1wPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvIiB4bWxuczpwaG90b3Nob3A9Imh0dHA6Ly9ucy5hZG9iZS5jb20vcGhvdG9zaG9wLzEuMC8iIHhtbG5zOmRjPSJodHRwOi8vcHVybC5vcmcvZGMvZWxlbWVudHMvMS4xLyIgeG1wTU06SW5zdGFuY2VJRD0ieG1wLmlpZDpFNTg4MUU5QkVDMTRFOTExOEI3RkU1RDU4RURGM0Q0RSIgeG1wTU06RG9jdW1lbnRJRD0ieG1wLmRpZDoyRUM1RUY5ODQ5RkRFODExQTNBM0EyODcyMEY2OTA2NCIgeG1wTU06T3JpZ2luYWxEb2N1bWVudElEPSJ4bXAuZGlkOjJFQzVFRjk4NDlGREU4MTFBM0EzQTI4NzIwRjY5MDY0IiB4bXA6TW9kaWZ5RGF0ZT0iMjAxOS0wMS0xMFQyMTowMDowNiswNTozMCIgeG1wOkNyZWF0b3JUb29sPSJBZG9iZSBQaG90b3Nob3AgQ1M1IFdpbmRvd3MiIHhtcDpNZXRhZGF0YURhdGU9IjIwMTktMDEtMTBUMjE6MDA6MDYrMDU6MzAiIHhtcDpDcmVhdGVEYXRlPSIyMDE4LTEyLTExVDE0OjM1OjE3KzAxOjAwIiBwaG90b3Nob3A6SUNDUHJvZmlsZT0iRUNJLVJHQi5pY2MiIHBob3Rvc2hvcDpDb2xvck1vZGU9IjMiIGRjOmZvcm1hdD0iaW1hZ2UvanBlZyI+IDx4bXBNTTpEZXJpdmVkRnJvbSBzdFJlZjpvcmlnaW5hbERvY3VtZW50SUQ9InhtcC5kaWQ6MkVDNUVGOTg0OUZERTgxMUEzQTNBMjg3MjBGNjkwNjQiIHN0UmVmOmluc3RhbmNlSUQ9InhtcC5paWQ6RTQ4ODFFOUJFQzE0RTkxMThCN0ZFNUQ1OEVERjNENEUiIHN0UmVmOmRvY3VtZW50SUQ9InhtcC5kaWQ6MkVDNUVGOTg0OUZERTgxMUEzQTNBMjg3MjBGNjkwNjQiLz4gPHhtcE1NOkhpc3Rvcnk+IDxyZGY6U2VxPiA8cmRmOmxpIHN0RXZ0OnNvZnR3YXJlQWdlbnQ9IkFkb2JlIFBob3Rvc2hvcCBDUzYgKFdpbmRvd3MpIiBzdEV2dDp3aGVuPSIyMDE4LTEyLTExVDE0OjM1OjE3KzAxOjAwIiBzdEV2dDppbnN0YW5jZUlEPSJ4bXAuaWlkOjJFQzVFRjk4NDlGREU4MTFBM0EzQTI4NzIwRjY5MDY0IiBzdEV2dDphY3Rpb249ImNyZWF0ZWQiLz4gPHJkZjpsaSBzdEV2dDpzb2Z0d2FyZUFnZW50PSJBZG9iZSBQaG90b3Nob3AgQ1M1IFdpbmRvd3MiIHN0RXZ0OmNoYW5nZWQ9Ii8iIHN0RXZ0OndoZW49IjIwMTktMDEtMTBUMjA6NTE6MDErMDU6MzAiIHN0RXZ0Omluc3RhbmNlSUQ9InhtcC5paWQ6N0QzQ0MyOERFQTE0RTkxMTlFRTJGRkMzQkZENTkzMUUiIHN0RXZ0OmFjdGlvbj0ic2F2ZWQiLz4gPHJkZjpsaSBzdEV2dDpzb2Z0d2FyZUFnZW50PSJBZG9iZSBQaG90b3Nob3AgQ1M1IFdpbmRvd3MiIHN0RXZ0OmNoYW5nZWQ9Ii8iIHN0RXZ0OndoZW49IjIwMTktMDEtMTBUMjA6NTE6MDErMDU6MzAiIHN0RXZ0Omluc3RhbmNlSUQ9InhtcC5paWQ6N0UzQ0MyOERFQTE0RTkxMTlFRTJGRkMzQkZENTkzMUUiIHN0RXZ0OmFjdGlvbj0ic2F2ZWQiLz4gPHJkZjpsaSBzdEV2dDpzb2Z0d2FyZUFnZW50PSJBZG9iZSBQaG90b3Nob3AgQ1M1IFdpbmRvd3MiIHN0RXZ0OmNoYW5nZWQ9Ii8iIHN0RXZ0OndoZW49IjIwMTktMDEtMTBUMjE6MDA6MDYrMDU6MzAiIHN0RXZ0Omluc3RhbmNlSUQ9InhtcC5paWQ6RTQ4ODFFOUJFQzE0RTkxMThCN0ZFNUQ1OEVERjNENEUiIHN0RXZ0OmFjdGlvbj0ic2F2ZWQiLz4gPHJkZjpsaSBzdEV2dDphY3Rpb249ImNvbnZlcnRlZCIgc3RFdnQ6cGFyYW1ldGVycz0iZnJvbSBpbWFnZS90aWZmIHRvIGltYWdlL2pwZWciLz4gPHJkZjpsaSBzdEV2dDphY3Rpb249ImRlcml2ZWQiIHN0RXZ0OnBhcmFtZXRlcnM9ImNvbnZlcnRlZCBmcm9tIGltYWdlL3RpZmYgdG8gaW1hZ2UvanBlZyIvPiA8cmRmOmxpIHN0RXZ0OnNvZnR3YXJlQWdlbnQ9IkFkb2JlIFBob3Rvc2hvcCBDUzUgV2luZG93cyIgc3RFdnQ6Y2hhbmdlZD0iLyIgc3RFdnQ6d2hlbj0iMjAxOS0wMS0xMFQyMTowMDowNiswNTozMCIgc3RFdnQ6aW5zdGFuY2VJRD0ieG1wLmlpZDpFNTg4MUU5QkVDMTRFOTExOEI3RkU1RDU4RURGM0Q0RSIgc3RFdnQ6YWN0aW9uPSJzYXZlZCIvPiA8L3JkZjpTZXE+IDwveG1wTU06SGlzdG9yeT4gPC9yZGY6RGVzY3JpcHRpb24+IDwvcmRmOlJERj4gPC94OnhtcG1ldGE+ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgPD94cGFja2V0IGVuZD0idyI/PgD/7QBkUGhvdG9zaG9wIDMuMAA4QklNBAQAAAAAACwcAVoAAxslRxwCAAACAAIcAj4ACDIwMTgxMjExHAI/AAsxNDM1MTcrMDEwMDhCSU0EJQAAAAAAEG8VJXtBp2YwhKsNALMYCvf/4gIsSUNDX1BST0ZJTEUAAQEAAAIcQURCRQIQAABtbnRyUkdCIFhZWiAHzwACABYADAADAA9hY3NwQVBQTAAAAABub25lAAAAAAAAAAAAAAAAAAAAAAAA9tYAAQAAAADTLUFEQkUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAApjcHJ0AAAA/AAAACRkZXNjAAABIAAAAGd3dHB0AAABiAAAABRia3B0AAABnAAAABRyVFJDAAABsAAAAA5nVFJDAAABwAAAAA5iVFJDAAAB0AAAAA5yWFlaAAAB4AAAABRnWFlaAAAB9AAAABRiWFlaAAACCAAAABR0ZXh0AAAAAChjKSAxOTk5IEFkb2JlIFN5c3RlbXMgSW5jLgBkZXNjAAAAAAAAAAxFQ0ktUkdCLmljYwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFhZWiAAAAAAAAD23AABAAAAANM6WFlaIAAAAAAAAAAAAAAAAAAAAABjdXJ2AAAAAAAAAAEBzQAAY3VydgAAAAAAAAABAc0AAGN1cnYAAAAAAAAAAQHNAABYWVogAAAAAAAApngAAFH+AAAAAFhZWiAAAAAAAAAtlAAAmiAAABFdWFlaIAAAAAAAACLJAAAT4gAAwdD/wAARCAByAMkDAREAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9sAQwADAgICAgIDAgICAwMDAwQGBAQEBAQIBgYFBgkICgoJCAkJCgwPDAoLDgsJCQ0RDQ4PEBAREAoMEhMSEBMPEBAQ/9sAQwEDAwMEAwQIBAQIEAsJCxAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQ/90ABAAa/9oADAMBAAIRAxEAPwD9KKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgD//0P0ooAKACgAoAOvSgA74/CgAoATI9aAI2ubdDhp4wfQsKAEN1bH/AJbKfpQAC7tu9wg+poAVbm3f7k8bfRwaAJeoyOlAB060AFABQAUAFABQAUAFABQAUAFABQAUAf/R/SigAoAKADvjHPpQBheNNFHinw7e+Hk1e4sJLxAont8tIuGDfdBGQcEEZHB6ik9iZarQs6Fp91pGjWWj/aJbn7HAlubm6O6SXaANxAP16mjoNKyL/kO/+tuHPsuEH6c/rTGRQpYXG4wtFP5b7GO4SFWHY5zg0CumWFVV4RAO2BxQMcBnj1oAzbLxFoWpand6NY6tbT3thj7TAkm54s/3h7dPai6Emm7GXceOtBi8c2/w/mgum1K5tjcq/k5iwBkjd1zheuMZIGeaTeocyvY6A2trnIgRT6qAp/Mc0xgLZBzHJKnuJCf50AL5U6/cuc/76A/yIoAN10vVY3+jFf8AGgBPPK/6y2lX3wGH6c0AKLq3Jx5qqTxtb5T+RoAl6jI70AJmgBaADmgDmvHsvjmPRUb4fQ2cuom4TeLkgAQ87sZ4z936DOMmkyZX6HRQed5KG4CibaDIE+7ux82PbNMrdD6ACgD/0v0ooABzxQBG06r8iZdv7q9PxNAFe7mS1tJr3UJ1htreNpZNucKijJJPXoD0x+NAm7K5R8JeKNA8X6QuteG7jz7RpGj3GMoQy9QQ3I6g/iKW4JqWxtUxh0PTn0oA5nwf4D0DwEupPpM1yV1K5+1Tm5m3heuAOOnJ5OW5GScUlZCUbECfFHwdJ4suPBg1Ei8toBM0hx5B6HYHBOW5HQY5PcUlLUSkr2NseI9GbBW73g9CIXZT+IWqHc8x8HSeC/AnxFvtH0jRNTxqsRuJdavHPlpn5/KBZRhcgck5LADoAahPUzi+WTZ6amteGpphKmraa0wHlhxPGXx6A5zV3RppuZPifTPGup61oN74V8R29lpltcF9ShZcm5jJXhTg54DDHy/ezkkYpMUtdjqc8c9fWmUNWRHd40dWaMgOoOSpPTPpQA6gAHPSgBCFPDDII/OgCP7Jb5ysYT/c+Q/oRmgBPJlX7ly3sHG4f0P60AGbpesccg9m2n8jmgA+0Kv+shkT/gOcfiKAHpNFLxHKrewPNAD6ACgAwfQ0Af/T/SV5FjALH73RR1NAEfzzErI/ljqUB+bB9T6f5zQBMiqgwi7evSgCO6ktobaaW8aNbeNGaYyY2hAPm3Z4xjOc0MH5nCaV8SfhzBa3WjeBPJvp7G2kuodO060ZRKRyQmFC5LYBOMjIJqbroRzx+yWvAfjDxd410qS+vPCn/CPyRzGIC9MjFhgHKoQhI98gfWhNsak2a+sj+ydHvNc1rVtRuILKBriSK1AiyFBJChMN0HdjVA9DA0Txd8NtS0nR9buYreyfWXaKzTUV3TPIrFSNzZ7/AMWccgUk11C8VqzV0Xxl4P1zTdT8S+FhHqH9nLJFMba32zPsUtsXIBIPYdDnr1waBeO6LXgXxb/wm/hy38Qrpd1p3ns6CG4HzfKcbgeMg+vH0prUcXc3x7cdse31oHYa8Uco2yRq+eMMuf50aBocr43k8P8AhLw7eeJD4TjvXtgp8q0gVZGy2MlgMgDue1JtImTsjEg1PxFe+ItBh0nwxq1vomq2Auri7e+l32rkE7CrsVXHyjBHORjoanUWrZcsfAdp4EvvEHjPS9cv5Jb5XurxLqJbhW25Y7UTY2euBn246U7W1FyqOpk2/jH4i+ONA0jxB8PbfThbyXpjvEvE8tzEp5ADFht9SrE8j0NF2F3LY1LT4h2XjQeI/DFhZ61p95pyvZTXSW3meVI25Q6bSWOCpI47Ci9xqV7oxvhZeeL/AA7qV34R8U6rb3ej2kZNjqN05iuLiQsDt2SMWwMtnIBG0DODSjfqKN07M9QhvbO4lkgt7uGWSLHmIkgZkyMjIHTIq7mhNQAUAHU0AMeGKT/WRhvTcOlADfs4H+qlkT2DZH5NkUAIBdKRkxyj3+U/4fyoArfb73/nwm/OgD//1P0ljhRCXGS54Lnkn+lAHN2HgLTtO8c6h48jv717vULdbeSB5MwooC9B1/hHBJHXFK1tSeVLUj8e+OJPB+m/aNN0K6127WaOKS1tclolbPzOQp2jOMA4ySKG7bDlLlWg3xLrOnaboP8Aa/jzVF0vTZwsTWse4kmQcxuwG9uM5ChRgHOaL9xN2Wpn+GfhJ4L0DxPb+NfC3nWyG18qG2hlzAysv3+cscqcgFiOhFJR1ugjBbo7ODU9NuL2bTbfULWS7tQDPAkwMkYYZBZRyAR3IqrjumYN54V1258eWfimPxVcxaVBbNBLpQX5JWIbk84zyCTjOVFKzE03K/Q43WPDeva58cdOfVNEkufDGl2fnWrtABbwy7Ccg9AxkC8Hngcbealp3Ia9/XY7qO78B+DFe1jv9D0RbiRpmjM8Vv5jnGWwSOTge4xVaI091GBqHx3+FlvBcHS/GGn6zdxI7Ja6Y5u3eQA4X9wr4yeOelF+wr9jh/CXxT8Wf8JDfeJ/HVn4os9LktwkWl2nhfUruOMjbiQSRwFccMc7j1wQMVMXrqQnJO7Op1b456eumi68M+CfGGqSuyYSbw7qFlCUJ+95s0IQ9eADkk57VTfYpy00NGD43eB/lXUl1/SpSuSmoeHdQgAP1aAKw9xRe473RzmoftCeHIPH+maJDr2iW2hTQM13d6jObSRJMHAUTbOny4ABzk9MUr6ibfMdWPjB4Fn8Tab4X03WoNTn1RN0c9hKlxbp1ADOrEAnaenpzT5lsNyWx2caRRr5cSKiDI2qAAPwHSnsUvI8w8Ya74j1H4paF4L8LXlzpQi/4mGoz/Z90N1GAPl3DqAoZecDLAZ4qXe+hm9ZaHY2XjXwzrfiXUvBdvcNNqWnRiS5heI7CMgEBjwcblz7mnctNN2KWk/DDwnouu6n4i06K8hu9WbdJ5V08aL3OzYVxk9ck+2BxRy2FypGjDte8n07S/FgluLTBmtZmjnMWem4DEgz7tSBGX4p8fL4Hn0228RLbTNqk5t7c2+9HZsgElCGUD5hnLj71O9tBOVnY3hrkMPF9Y39p6mSAuo9y8e5cfUjHegq5atNRsL8FrG9gnA4PlSBsexx0phdFigYUALtb+6fyoA//9X9Iby9trKMSXMgBc7UCgs7t6Ko5P4UBcqCLUtS5uHaxtu0KMPOYf7Tg/J/uqc/7XagWrLtta29lCtvaQrDGuQFUfn9T65/WiwWK2saHo/iGybTda0+G8tmYP5Uq7lBHQ/Uf1NFrjdnozg9U+K9t4N8cXPhzxi+jaJoMdojadNJdL59w2FwscP3m6t8qpxjnqKlPUhN3scbqFt4m1/xoPGXwV8K6vZPcsz6jqGtRtYWl4u5CEVJv3+0lfmYQ4PykdKVtbktNvQPiH4p+PFhrGm+GLPWNLttV1hS0Nto+meYEXkc3VyxViCMnEKgDBJINOTYSlJaGZqfw/j1f4teHfCWtavr2tGGzF5q66zezXNrK23cwjQFYgMZXIjUZ7k4ATvzBd8x6JqPwl+B+oeGJhp/h7w1pFgzAnUtMgtoGUhu8wX14O7PXFN2ZckmrnR634f1eLwQNA+H+qR6dd29vFBZ3Ep3AIuAckA8lRjODyc+9O1loHLp7pxVj8MfH2v+JdRn+Inidp9Mm0wWASxmMa3BKgFinRcMCxOBubHGMgyotkRhJ7s9J8MeHrDwnoNn4f01pmtrKMxo0z7nbJzyfxJ7D6dKvyNErIu/2hZfbjp326D7WF84wCUeaE7NtPOM9+lA7q5zlvofjJ/HOoahqWs2l14ZmtVjt9OaIFlkwuScjpkMc5OcgYGAalLUnladx+q/DD4b62HXVvAXh673kEtLpsRbPY525BHqD3qrD5UzzLV/AGn+Adf03TNE1Dx0sfiC82rJpWoGKKxThVDjDCXaG6v0VTz6xZJ6EWUWds3gr4maSfM8P/F2e828GDxDpFvdIQO2+3Fu47ckt+NWXY4zx/dfGrRLCTXIPDvhPSp45Ee/1fTdTQPdoAAkbrcxRkDOODI+eMGpehEubc66D42+HLeGObxXoniLw2kiiT7Tf6Y8lqQeQ32q3MsODkEEuOO9O5SlbcveDrb4X3uv6j418Fa3peoXurjbPNZaglwp5ydoViFOQMigEknc6y80vS9RaB9Q061uTbOJoPOhV/KccBl3AkHHTGPzFFirIs9ucc5z/WmBVu9L0y+IkvLCCZx913jBYfRuopWCxW/sTyebDVb+3x1Uy+cv0xIGIH0IpisLnxFb8EWN+OvG63b8vmUn3yv4UAZP/CXy/wDQla7/AN+j/hQM/9b9D9R03ULbSb+50N459be2f7PcXhyPMIOAccKucfKoxQxNaaGP4G8Wvc6VJpni7XNLfxFpKM+qpDIoWAbjgt/CMAgNg4BJ5qU+4ovuZr/F2LX5ZLH4WeG7vxg8bGJ7+GVbbSonHUNdvkSEdxCshHene4XvsVdX0HxzfaXda38SfiG+k6VaxNPPpnhWNoPkAyQ13IDO/AIzGITn86Qbak/whm+EV5Ddy/DXSbe3uk2G9eSFvtjs2SDLM5Lyng/MWP8ASmmmCkmaHh/xX4ok8S6/p3iWwtFsLacLpn2ItNNImTy6oSV42n5guCSKL6gpO50bXerXOGtdEWLHR7yZUPPcLGH/ACJU0DZyfxDuviFaWdnDoekwasmoT/Zb6O2hKNFAwwSWZjwem7GBjNJ36EzcrEU3wg8DaT4KvPDt9faiuiK7385kucbWVQS+QvZV6Ht70cugnCKVjlfD+gP4h8f+H/Efgq41CfwZaac1mZRqUqGJkV18oqzBx0Q4HGADmptd3QkryViXxhp3iv4dWGveIdI8V3GuStdRXL2NzfSKbG0JbJwJMuSSi54GB0PNN6bCd4rc6K/vPFviTwNp/iT4U6xN9rvGR9t84YCPlXA8wHBDD8lOM8U9Wrot3cbo0bX4bCDxWnj06sX11rRbeVpYg0X3QpwqFPTA7YA4zT5eoctnc6T/AIqOPqumXPuWkh/TD0ytSjrXiweGNLm1nxFpc9tZ2yhpZ4pY5ETJwBgkNySPur3FK9gvYpXvi7w/r/hG5vrLXbnTLW9tJDDqLW8sSxAqf3gcgDI9iOlK90Ju6F+GDWcfg+wtLPxePEflhx9tLYZxuY4IzuXHI5ORjmmmEdih8Y/BGu+PPCw0bQdRht5I5vPkimXAnCjhd2CVOce3rRNXWgpxclobXgCw1zS/Bul6b4khtIb61gEMkdsf3SKpIQDHH3MA44z0oW2o4rSzIdc+F/w38SzNceIPAXh/ULiTrNcadE0h99+NxPv1p2Ksjgdf0XwP4E1+08PaF4+8XeEru8ge5jW1v3vLKONQfvRXYljQfIfuqvA9MGk3YiVoml4au/idrGlpr3g/4meH/FFmztH5eraI9m7MvUGW3cBT7mE+uO9Fxp82xqjx18RdM/d+IPg5qNwy8GbQdVtbyI+4E7W8n4FPxNFx3AfG/wAC2/7vxB/bfh64GR5Gr6PdQHPoH2GNz6bXINFxOSRs+HviR4D8VOkGheLdLurp13m0FyguVH+1ETvX8RRdDUlLY6XB9DTGf//X+4pfFPxI8axm28GeEm8N2EwIOs+I0KyhD/FDYxsJSx9ZWix3DDilqJ6lPw7+zl8NtIKT65preIrtDu36p+8iLZBz5GRGcEA5ZSQehFCjYlQS1On8F+AtE+Hg1abT9QuWh1Cf7U4uZAEgHPCgAADBPJyeOTximlYajysg8afETSvDmhXmpvaG9toUXzdwGGV22AhScupJ6gbP9qpbsEpJEnhPwv4Zn02PXLCyjjh1iKK5eKC3FrG4K5UNGoycA9HLAHmmrdASW6OqgggtolhtoY4ol4VUTao+mOlMdh0kiRDMjqn+8cUDIzdIfuCST02ocfnQBzms3/jA+J7DTbPQLWfw/dQyC/mnPzg88HDbQOnBBzk8CkyWaui2On6dp0Vp4esdPtrEZMa2rjy+TyQVHPPU0LyGrLYx4fh7okXi3UPGhsxJf6nb/ZZ45Zt0DIQoI2Fe4UA549utK2txcqvc6K3iazgS3tbGCKGNQqRxHaqjjgKFAH/6/WqWg7EhmkA+a1k+oKn+ZoGH2qPo6yr2IMZH8qAIb2DStYtZdN1CK2u7e5XZJBIAyyDvkenSgN9xDo2kvpLaH9gg/s9oDbm3CYjMR4K4GABzS02DRqxnweC/DmnaKNE0rRrO2hhjZLcNCspic5O75s5IY56/jRZWJcVsjnfA/h3UfDNhDoPi3xybjW7iaWW3EV62WiGOFjk4bHJzs43Y6AUlpuJK27Ot+y65b82+rxTL6XVuCx/4FGVH/jpp6laiG91qAEXGipMD3s7lST9RJsx+Zphc5DxE+t33jnRb9WsbbQreKWO+XUbAiUl9wYLIUKgEbRhWA4Od3SpauS1eVzstCsdC0/TktPDlvZQWKklI7QJ5YJOSfl4zzyfami9FsaFMCO6t7e8t5bS7gSaCdCkkcgyrqeCCD2OcUMLLqYGo/Dj4f6tpkei6p4K0S8soRiOG4sInWP3UFSVPuKEkhJJbEH/Cqvh3/wBCfZfrQM//0P0oGc8sfXrnB/GgDkPGWp+KNS0eew+Gc9u+rRSokkz7TFCo5K5Py7/YA4yc4yDUu72Jk7rQ2tP0iZ4Le48Qype3qqrOAMQRy45MceMDkcMctz1FNaDXmZrfD3w7/wAJnP47m8+S9nthayRySgwFAAN23HXaMY6d8d6Vri5Ve5k6H4d17wJofiCfTdWvvE15dStd2sVyvyRnBwgyw3HnnGAcAADrRsKzjdnSeG7nW9W0Ozv/ABDZtp19NHuntlIyhycZOSRkAHAPGQO2aaKjtqZvhrwpr+j+Kdc1nU/E76hY6g4NnaMn/HsM9Mk+mBx1xk80JO4opp3bOrxj/CmUV9QsYNTsLnTbrd5N3C8Em1ip2spU4I5BwetAMyvBng/SvAugxeHtGluZLaJ2kDXD73ZmPJJAAH4DFJKwkrG7TGM82PzDD5i+YBuKZ5A9cUXAfnHPpQBy7w+Ph8QFlS5sP+ESFrtaIj9952OO2Qc477dvbNTrcnVyKWtfFnwLoXiK78L69qBtLi0gWaSWaA+ScgEKuASWwQcBcdcHijmXUTmk7MzT8W/Dt346sfBOg6Rd3/2oK015bHbFErIHVwB95cEEnIAHTPSi+ugufU6HXPB15q3iHR9cg8R3lomlMWa2UsRNnscOox67g2R0xTa1Kcbu4up+BtG1TxPp/i++s0n1LS0KW8gd0CjJPK5IbBJIPHP4UWG43dzoPPcf623kH+0MMP05pjAXMGeZdh9Gyp/I0Ac7B4906fx7P8P10+9W7t7Rbk3DR5hxgHAPXocZ6buKXNfQnmu7G1PomkXUhmm02ATf89VTbIPo4+b9aLDsZevadr9jo15P4Rvrh9SjiZrW3upRJFJIOgJky3I/2gPpQ9hO9tCLw1dePDoVnP4m0zTTqDJm4igmMZQ5IH95SSoGfmA5x0FJX6gnK2pqf25HCMX+n31pz/HCXX8WjLKB9SKY7jf+El8O/wDQbsv+/i/40xn/0f0G8QR67caFfXaWzSzxW8j22mwS7TK+07Q8nUknHyjA9d1J7CexyXwI8CX/AIV8Pz6zr63kWq6zIZZ4LjjylVm2/L2Yg5Pfn2pRT3IpppanpRmLkrbqJCOrZwoqjQQQbjvnbzD16YA/D/HP4UAS4GMDj6UAL+FAHK6dqHj1/GurWOo6JaJ4egt1awuUceZLJ8o2nnv8/UDGB1BpXZMea4/wBrvirxBpM954t8NHRLuO6eOODJO+MAYbnnqSPQ4yOKEEW3udPTKCgAoA5m38A6Rb+Orjx/HdXpvrm2Fs0Rl/dBQAMgf8BHHTqcUrWdyeVXudNTKOX8EeBU8GNqrLrl9qX9qXZuj9qOfKzngepOeT3wKSQkrDfFfwy8H+NdSstV8Q6cZ57NSo2OUEik52vj7wzyPqaTjcTimdDZaZpunbfsFhb25WNIQY4gpEa8KuQOgxgDOKpaDSSMCx8V6vc+O73wlJ4UvIdOtLZZotTb/VSMQpKj5QO5HBLZU5GDST1EndnUUyg/lQAm0HggE/SgDD8V6/4f8ABWjTeI9Y82C2hKIWgQ7yWbAXCnnnnmjRCbSL2l3kWq6Zaarpl2z215ClxD5kfVHGRwMEcHnmgFrqWd9wn3oFYeqv/Q4/nQMpa9JqUujXsOj3K2WoPA62s1xGSkcmDtJwCMA49aGJ6rQr+DbfxHZ+GrG38WajBf6rGjLPcQ42tySv1+XAPHXmktNwSsrM0/sOm/8APlD/AN8imPlR/9L9JXlEeF5Lnoq9TQAwxvL80547RqePxPWgCbgDGBhenoKADuR3HX2oAKAOX8eSeP4rOx/4V/BYy3DXSC7F0QFEOOSM9Rnr39KTZMr9DqPf26UygoAKACgAoAKACgAoAKACgBOep54AJxzQAtAHL+ONE8Z6ydL/AOEQ8Sx6T9muhLeb13edH6Dg5xzwcA55IwKTTJkm7WOoPXpjPUfn+FMohu7W1v7d7W9tYriCTho5UDqw91NANXJEjSNVijRURFCqqjAAHReOABxgCjYVhtwbgW8ptBGZ9jeUJM7d+OM47ZxSew/Q5X4ax/ERNLupPiNNbteyXLPbpDszHF6EoMHnOBycdTSV+pMOa2p1TW8LncEAY/xKdp/MVRRH9lh/uT/9/wA0Af/T/SaOJIx8o+bue5oAdQADrQBy/g7wHa+Dr/Wr+21a+vDrNz9pZLh8rEcn7vvyRn0CiklbUlRS1OoplBQAUAFABQAUAFABQAUAFABQBy/h7SfG1n4q1y+17xBb3mjXTA6baohDwj34GOODydx546UknclJp3Z1FMoKACgAoAKACgAoAKAOY1zx9p2geLNG8JXFhey3GtZ8qaKLMaYOPmOfUHOOg5NJslys7WOnx70yj//U/SigAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKAEKhiGKgsDwTyR680BYXBoA/9X9KKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoAKACgAoABQA7A9BQB//2Q==)\n",
401 |         "\n",
402 |         "HINT: First create a function that calculates the confidence intervals and then plot yhat, y2 of the forward model. Finally, plot the data of individual participants."
403 |       ]
404 |     },
405 |     {
406 |       "cell_type": "code",
407 |       "metadata": {
408 |         "colab_type": "code",
409 |         "id": "ii0H9GDwv-ha",
410 |         "colab": {}
411 |       },
412 |       "source": [
413 |         "import numpy as np\n",
414 |         "import matplotlib.pyplot as plt\n",
415 |         "\n",
416 |         "# confidence interval calculation at x_forward\n",
417 |         "def confidence_interval(s2,x,z):\n",
418 |         "  CI=np.zeros((len(x_forward),4))\n",
419 |         "  for i,xdot in enumerate(x_forward):\n",
420 |         "    ci_inx=np.isin(x,xdot)\n",
421 |         "    S2=s2[ci_inx]\n",
422 |         "    S_hat=np.mean(S2,axis=0)\n",
423 |         "    n=S2.shape[0]\n",
424 |         "    CI[i,:]=z*np.power(S_hat/n,.5)\n",
425 |         "  return CI \n",
426 |         "\n",
427 |         "\n",
428 |         "feature_names=['left_CA1','left_CA3','right_CA1','right_CA3']\n",
429 |         "sex_covariates=[ 'Female','Male']\n",
430 |         "# Creating plots for Female and male \n",
431 |         "for i,sex in enumerate(sex_covariates):\n",
432 |         "#forward model data\n",
433 |         "    forward_yhat = pd.read_csv('yhat_forward.txt', sep = ' ', header=None)\n",
434 |         "    yhat_forward=forward_yhat.values\n",
435 |         "    yhat_forward=yhat_forward[7*i:7*(i+1)]\n",
436 |         "    x_forward=[20, 30, 40, 50, 60, 70, 80]\n",
437 |         "\n",
438 |         "# Find the index of the data exclusively for one sex. Female:0, Male: 1   \n",
439 |         "    inx=np.where(covariate_normsample.sex==i)[0]\n",
440 |         "    x=covariate_normsample.values[inx,1]\n",
441 |         "# actual data\n",
442 |         "    y = pd.read_csv('features_normsample.txt', sep = ' ', header=None)\n",
443 |         "    y=y.values[inx]\n",
444 |         "# confidence Interval yhat+ z *(std/n^.5)-->.95 % CI:z=1.96, 99% CI:z=2.58 \n",
445 |         "    s2= pd.read_csv('ys2_2fold.txt', sep = ' ', header=None)\n",
446 |         "    s2=s2.values[inx]\n",
447 |         "\n",
448 |         "    CI_95=confidence_interval(s2,x,1.96)\n",
449 |         "    CI_99=confidence_interval(s2,x,2.58)\n",
450 |         "\n",
451 |         "# Creat a trejactroy for each point     \n",
452 |         "    for j,name in enumerate(feature_names):\n",
453 |         "         fig=plt.figure()\n",
454 |         "         ax=fig.add_subplot(111)\n",
455 |         "         ax.plot(x_forward,yhat_forward[:,j], linewidth=4, label='Normative trejactory')\n",
456 |         "\n",
457 |         "\n",
458 |         "         ax.plot(x_forward,CI_95[:,j]+yhat_forward[:,j], linewidth=2,linestyle='--',c='g', label='95% confidence interval')  \n",
459 |         "         ax.plot(x_forward,-CI_95[:,j]+yhat_forward[:,j], linewidth=2,linestyle='--',c='g') \n",
460 |         "\n",
461 |         "         ax.plot(x_forward,CI_99[:,j]+yhat_forward[:,j], linewidth=1,linestyle='--',c='k', label='99% confidence interval')  \n",
462 |         "         ax.plot(x_forward,-CI_99[:,j]+yhat_forward[:,j], linewidth=1,linestyle='--',c='k') \n",
463 |         "\n",
464 |         "         ax.scatter(x,y[:,j],c='r', label=name)\n",
465 |         "         plt.legend(loc='upper left')\n",
466 |         "         plt.title('Normative trejectory of' +name+' in '+sex+' cohort')\n",
467 |         "         plt.show()\n",
468 |         "         plt.close()\n",
469 |         "        \n",
470 |         "# code by M. Zabihi"
471 |       ],
472 |       "execution_count": null,
473 |       "outputs": []
474 |     },
475 |     {
476 |       "cell_type": "markdown",
477 |       "metadata": {
478 |         "colab_type": "text",
479 |         "id": "yM4z1BtyWwiF"
480 |       },
481 |       "source": [
482 |         "## **TASK 6:** Apply the normative model to Nordan's data and the dementia patients."
483 |       ]
484 |     },
485 |     {
486 |       "cell_type": "code",
487 |       "metadata": {
488 |         "colab_type": "code",
489 |         "id": "eVTYxKjvWBvm",
490 |         "colab": {}
491 |       },
492 |       "source": [
493 |         "# read in Nordan's as well as the patient's demographics and features\n",
494 |         "demographics_nordan = pd.read_csv('cpc_camcan_demographics_nordan.csv',\n",
495 |         "                                       sep= \",\",\n",
496 |         "                                       index_col = 0)\n",
497 |         "features_nordan = pd.read_csv('cpc_camcan_features_nordan.csv',\n",
498 |         "                            sep=\",\",\n",
499 |         "                            index_col = 0)\n",
500 |         "\n",
501 |         "# create a covariate file for Nordan's as well as the patient's demograhpics\n",
502 |         "covariate_nordan = demographics_nordan[['sex',\n",
503 |         "                                        'age']] \n",
504 |         "covariate_nordan.to_csv('covariate_nordan.txt',\n",
505 |         "                        sep = ' ',\n",
506 |         "                        header = False, \n",
507 |         "                        index = False)\n",
508 |         "\n",
509 |         "# create the corresponding feature file\n",
510 |         "features_nordan = features_nordan[['left_CA1', \n",
511 |         "                                  'left_CA3',\n",
512 |         "                                  'right_CA1',\n",
513 |         "                                  'right_CA3']]\n",
514 |         "\n",
515 |         "features_nordan.to_csv('features_nordan.txt', \n",
516 |         "                        sep = ' ', \n",
517 |         "                        header = False, \n",
518 |         "                        index = False)\n",
519 |         "\n",
520 |         "# apply normative modeling\n",
521 |         "pcn.normative.estimate(covfile = 'covariate_normsample.txt', \n",
522 |         "                       respfile = 'features_normsample.txt',\n",
523 |         "                       testcov = 'covariate_nordan.txt',\n",
524 |         "                       testresp = 'features_nordan.txt',\n",
525 |         "                       cvfolds = None,\n",
526 |         "                       alg = 'gpr',\n",
527 |         "                       outputsuffix = '_nordan')\n",
528 |         "\n",
529 |         "# code by T. Wolfers"
530 |       ],
531 |       "execution_count": null,
532 |       "outputs": []
533 |     },
534 |     {
535 |       "cell_type": "markdown",
536 |       "metadata": {
537 |         "colab_type": "text",
538 |         "id": "LFnHCy0XVVwl"
539 |       },
540 |       "source": [
541 |         "## **TASK 7:** In which hyppocampal subfield(s) does Nordan deviate extremely? \n"
542 |       ]
543 |     },
544 |     {
545 |       "cell_type": "markdown",
546 |       "metadata": {
547 |         "colab_type": "text",
548 |         "id": "jUhmPAOZB0kp"
549 |       },
550 |       "source": [
551 |         "No coding necessary just create a presentation which includes recommendations to Nordan and his family. \n",
552 |         "Use i) |Z| > 3.6 ii) |Z| > 1.96 as definitions for extreme normative deviations."
553 |       ]
554 |     },
555 |     {
556 |       "cell_type": "markdown",
557 |       "metadata": {
558 |         "colab_type": "text",
559 |         "id": "AqQhxN9pEFGC"
560 |       },
561 |       "source": [
562 |         "## **TASK 8 (OPTIONAL):** Implement a function that calculates percentage change. "
563 |       ]
564 |     },
565 |     {
566 |       "cell_type": "markdown",
567 |       "metadata": {
568 |         "colab_type": "text",
569 |         "id": "weASKkZNBMW5"
570 |       },
571 |       "source": [
572 |         "Percentage change = $\\frac{x1 - x2}{|x2|}*100$"
573 |       ]
574 |     },
575 |     {
576 |       "cell_type": "code",
577 |       "metadata": {
578 |         "colab_type": "code",
579 |         "id": "0vIt9fd7EmJx",
580 |         "colab": {}
581 |       },
582 |       "source": [
583 |         "# function that calculates percentage change\n",
584 |         "def calculate_percentage_change(x1, x2):\n",
585 |         "  percentage_change = ((x1 - x2) / abs(x2)) * 100\n",
586 |         "  return percentage_change\n",
587 |         "\n",
588 |         "# code by T. Wolfers"
589 |       ],
590 |       "execution_count": null,
591 |       "outputs": []
592 |     },
593 |     {
594 |       "cell_type": "markdown",
595 |       "metadata": {
596 |         "colab_type": "text",
597 |         "id": "1Mypo4xrT7ID"
598 |       },
599 |       "source": [
600 |         "## **TASK 9 (OPTIONAL):** Visualize percent change\n",
601 |         "\n",
602 |         "\n",
603 |         "\n"
604 |       ]
605 |     },
606 |     {
607 |       "cell_type": "markdown",
608 |       "metadata": {
609 |         "colab_type": "text",
610 |         "id": "1I1Kwv5iBUJj"
611 |       },
612 |       "source": [
613 |         "Plot the prercentage change in Yhat of the forward model in reference to age 20. Do that for both sexes seperately."
614 |       ]
615 |     },
616 |     {
617 |       "cell_type": "code",
618 |       "metadata": {
619 |         "colab_type": "code",
620 |         "id": "1DoJid7R1DBX",
621 |         "scrolled": true,
622 |         "colab": {}
623 |       },
624 |       "source": [
625 |         "import matplotlib.pyplot as plt\n",
626 |         "\n",
627 |         "forward_yhat = pd.read_csv('yhat_forward.txt', sep = ' ', header=None)\n",
628 |         "\n",
629 |         "# You can indicate here which hypocampal subfield you like to visualize\n",
630 |         "hyppocampal_subfield = 0\n",
631 |         "\n",
632 |         "percentage_change_female = []\n",
633 |         "percentage_change_male = []\n",
634 |         "count = 0\n",
635 |         "lengths = len(forward_yhat[hyppocampal_subfield])\n",
636 |         "for entry in forward_yhat[hyppocampal_subfield]:\n",
637 |         "  if count > 0 and count < 7:\n",
638 |         "    loop_percentage_change_female = calculate_percentage_change(entry, \n",
639 |         "                                                                forward_yhat.iloc[0,\n",
640 |         "                                                                                  hyppocampal_subfield])\n",
641 |         "    percentage_change_female.append(loop_percentage_change_female)\n",
642 |         "  elif count > 7: \n",
643 |         "    loop_percentage_change_male = calculate_percentage_change(entry,\n",
644 |         "                                                              forward_yhat.iloc[9,\n",
645 |         "                                                                                hyppocampal_subfield])\n",
646 |         "    percentage_change_male.append(loop_percentage_change_male)\n",
647 |         "  count = count + 1 \n",
648 |         "\n",
649 |         "names = ['30 compared to 20 years', \n",
650 |         "         '40 compared to 20 years', \n",
651 |         "         '50 compared to 20 years', \n",
652 |         "         '60 compared to 20 years', \n",
653 |         "         '70 compared to 20 years',\n",
654 |         "         '80 compared to 20 years']\n",
655 |         "\n",
656 |         "# females\n",
657 |         "plt.subplot(121)\n",
658 |         "plt.bar(names, percentage_change_female)\n",
659 |         "plt.xticks(rotation=90)\n",
660 |         "plt.ylim(-20, 2)\n",
661 |         "\n",
662 |         "# males\n",
663 |         "plt.subplot(122)\n",
664 |         "plt.bar(names, percentage_change_male)\n",
665 |         "plt.xticks(rotation=90)\n",
666 |         "plt.ylim(-20, 2)\n",
667 |         "\n",
668 |         "# code by T. Wolfers"
669 |       ],
670 |       "execution_count": null,
671 |       "outputs": []
672 |     }
673 |   ]
674 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning with Normative Modeling Tutorial 
 2 | # Computational Psychiatry Course 2022
 3 | This repository contains written instructions, links to code, and data used for the (virtual) Machine Learning/Normative Modeling Practical at the [Computational Psychiatry Course](https://www.translationalneuromodeling.org/cpcourse/) on September 17th, 2022.
 4 | 
 5 | This repository is a group effort by [Saige Rutherford](https://twitter.com/being_saige) and [Thomas Wolfers](https://twitter.com/ThomasWolfers).
 6 | 
 7 | We will be running all of our code in Google Colab python notebooks. These are essentially Jupyter notebooks run in the :cloud: *cloud* :cloud:. 
 8 | Running our code using Colab will save us from dealing with python library installation and virtual environment setup. 
 9 | It also ensures that we are all working on the same operating system which makes troubleshooting much easier (since there are only 2 instructors and lots of students)! 
10 | 
11 | If you have never used Google Colab before, you can check out an introduction notebook with lots of helpful links here: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/notebooks/intro.ipynb)
12 | 
13 | We will also be using the Pandas library for a lot of our code. There is a great intro to Pandas Colab notebook here: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/notebooks/mlcc/intro_to_pandas.ipynb)
14 | 
15 | Other helpful pandas:panda_face:/plotting:bar_chart: links (not required to do during the practial, just added for those who might need extra python help):
16 | 1. [Pandas cheatsheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf)
17 | 2. [Pandas Selecting/Indexing API](https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html)
18 | 
19 | ### :warning: Setup instructions for Google Colab :warning:
20 | You can open the python notebook that we will use in this practical directly from this Github account (the links to the notebook are at the bottom of this Read Me file). Before you open the notebook, make sure you are logged into a Google account. All of the code has been tested using Google Chrome web browser. When you are ready to begin, you will click on the **template** Google Colab button below. This will launch a new browser tab with the Google Colab notebook. 
21 | 
22 | Once you are in the Colab notebook tab, in the top right corner you will see a `Connect` (or `Reconnect`) button. Click on this, and a dropdown menu will appear as shown below. Click on `Connect to hosted runtime` this will allow you to run the notebook using Google’s cloud resources, which are likely much faster than your computer. If you would prefer to use your own computer’s resources (this is not recommended and instructors will not be able to help you troubleshoot if you are not running the notebook in the cloud), select `Connect to local runtime`. 
23 | 
24 | :warning: Note: sometimes if the notebook is left running for a long time without any activity (i.e. your computer goes to sleep), you will be disconnected from the runtime. In that case, you will need to click on this same button. It will appear as `Reconnect` instead of `Connect`. You will also need to  re-run all code blocks. 
25 | 
26 | ![](presentation/Runtime1.png)
27 | 
28 | :arrow_right: If you are using the Google cloud hosted option: in the upper left corner, you will see a button called `Runtime`. Click on `Runtime`, and another dropdown panel will appear (as shown below). Click on `Change runtime type`.
29 | 
30 | ![](presentation/Runtime2.png)
31 | 
32 | :arrow_right: This box will open, and you can click the  `GPU` option, then click `save`. 
33 | 
34 | ![](presentation/GPU.png)
35 | 
36 | :arrow_right: In the same menu you used to change the runtime, there are several other optional things you can explore that may make your interacting with the notebook easier. Under ‘Tools’ there is a ‘Settings’ tab, which you can use to change the theme to light or dark mode using the ‘Site’ sub-tab. Then under the ‘Miscellaneous’ sub-tab, you can select Corgi or Kitty mode, and this will make cute animals walk across the top of your screen. There is no practical utility to this whatsoever, and it is for the sole purpose that cute animals spark joy. 
37 | 
38 | :arrow_right: Also under the ‘Tools’ tab, there is an option to look at Keyboard shortcuts. You don’t need to change any of these, but you can review some of them if you want to learn about speeding up your coding practice. 
39 | 
40 | ![](presentation/keyboard_pref.png)
41 | 
42 | :arrow_right: In the Colab python notebook, there are 2 types of cells: text cells & ```code cells```. The text cells have plain text in them, that the notebook will not interpret as code. These are the cells that contain the background story & task instructions. The ```code``` cells have a :arrow_forward: play button on the left side. These are the cells that the notebook will run as code. To run a ```code cell```, you can either click on the play button :arrow_forward: on the left side or use ‘Shift + Enter’ (your cursor must be inside the code cell). 
43 |  
44 | ### Now you are ready to begin coding :brain:	:computer:! 
45 | ### Good luck :four_leaf_clover: and remember to have fun :smiley:! 
46 | 
47 | Before clicking on the colab button below, make sure you are logged into a google account and using Chrome or Firefox internet browser (hopefully a current version)
48 | 
49 | **Task 1: Fitting normative models from scratch** [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/saigerutherford/CPC_ML_tutorial/blob/master/tasks/1_fit_normative_models.ipynb)
50 | 
51 | **Task 2: Applying pre-trained normative models** [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/saigerutherford/CPC_ML_tutorial/blob/master/tasks/2_apply_normative_models.ipynb)
52 | 
53 | **Task 3: Interpreting and visualizing the outputs of normative models** [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/saigerutherford/CPC_ML_tutorial/blob/master/tasks/3_Visualizations.ipynb)
54 | 
55 | **Task 4: Using the outputs (Z-scores) as features in predictive model** [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/saigerutherford/CPC_ML_tutorial/blob/master/tasks/4_post_hoc_analysis.ipynb)
56 | 
57 | 


--------------------------------------------------------------------------------
/data/nilearn_order.csv:
--------------------------------------------------------------------------------
 1 | ROI
 2 | G_and_S_frontomargin
 3 | G_and_S_occipital_inf
 4 | G_and_S_paracentral
 5 | G_and_S_subcentral
 6 | G_and_S_transv_frontopol
 7 | G_and_S_cingul-Ant
 8 | G_and_S_cingul-Mid-Ant
 9 | G_and_S_cingul-Mid-Post
10 | G_cingul-Post-dorsal
11 | G_cingul-Post-ventral
12 | G_cuneus
13 | G_front_inf-Opercular
14 | G_front_inf-Orbital
15 | G_front_inf-Triangul
16 | G_front_middle
17 | G_front_sup
18 | G_Ins_lg_and_S_cent_ins
19 | G_insular_short
20 | G_occipital_middle
21 | G_occipital_sup
22 | G_oc-temp_lat-fusifor
23 | G_oc-temp_med-Lingual
24 | G_oc-temp_med-Parahip
25 | G_orbital
26 | G_pariet_inf-Angular
27 | G_pariet_inf-Supramar
28 | G_parietal_sup
29 | G_postcentral
30 | G_precentral
31 | G_precuneus
32 | G_rectus
33 | G_subcallosal
34 | G_temp_sup-G_T_transv
35 | G_temp_sup-Lateral
36 | G_temp_sup-Plan_polar
37 | G_temp_sup-Plan_tempo
38 | G_temporal_inf
39 | G_temporal_middle
40 | Lat_Fis-ant-Horizont
41 | Lat_Fis-ant-Vertical
42 | Lat_Fis-post
43 | Medial_wall
44 | Pole_occipital
45 | Pole_temporal
46 | S_calcarine
47 | S_central
48 | S_cingul-Marginalis
49 | S_circular_insula_ant
50 | S_circular_insula_inf
51 | S_circular_insula_sup
52 | S_collat_transv_ant
53 | S_collat_transv_post
54 | S_front_inf
55 | S_front_middle
56 | S_front_sup
57 | S_interm_prim-Jensen
58 | S_intrapariet_and_P_trans
59 | S_oc_middle_and_Lunatus
60 | S_oc_sup_and_transversal
61 | S_occipital_ant
62 | S_oc-temp_lat
63 | S_oc-temp_med_and_Lingual
64 | S_orbital_lateral
65 | S_orbital_med-olfact
66 | S_orbital-H_Shaped
67 | S_parieto_occipital
68 | S_pericallosal
69 | S_postcentral
70 | S_precentral-inf-part
71 | S_precentral-sup-part
72 | S_suborbital
73 | S_subparietal
74 | S_temporal_inf
75 | S_temporal_sup
76 | S_temporal_transverse
77 | 


--------------------------------------------------------------------------------
/data/phenotypes_lh.txt:
--------------------------------------------------------------------------------
 1 | lh_G&S_frontomargin_thickness
 2 | lh_G&S_occipital_inf_thickness
 3 | lh_G&S_paracentral_thickness
 4 | lh_G&S_subcentral_thickness
 5 | lh_G&S_transv_frontopol_thickness
 6 | lh_G&S_cingul-Ant_thickness
 7 | lh_G&S_cingul-Mid-Ant_thickness
 8 | lh_G&S_cingul-Mid-Post_thickness
 9 | lh_G_cingul-Post-dorsal_thickness
10 | lh_G_cingul-Post-ventral_thickness
11 | lh_G_cuneus_thickness
12 | lh_G_front_inf-Opercular_thickness
13 | lh_G_front_inf-Orbital_thickness
14 | lh_G_front_inf-Triangul_thickness
15 | lh_G_front_middle_thickness
16 | lh_G_front_sup_thickness
17 | lh_G_Ins_lg&S_cent_ins_thickness
18 | lh_G_insular_short_thickness
19 | lh_G_occipital_middle_thickness
20 | lh_G_occipital_sup_thickness
21 | lh_G_oc-temp_lat-fusifor_thickness
22 | lh_G_oc-temp_med-Lingual_thickness
23 | lh_G_oc-temp_med-Parahip_thickness
24 | lh_G_orbital_thickness
25 | lh_G_pariet_inf-Angular_thickness
26 | lh_G_pariet_inf-Supramar_thickness
27 | lh_G_parietal_sup_thickness
28 | lh_G_postcentral_thickness
29 | lh_G_precentral_thickness
30 | lh_G_precuneus_thickness
31 | lh_G_rectus_thickness
32 | lh_G_subcallosal_thickness
33 | lh_G_temp_sup-G_T_transv_thickness
34 | lh_G_temp_sup-Lateral_thickness
35 | lh_G_temp_sup-Plan_polar_thickness
36 | lh_G_temp_sup-Plan_tempo_thickness
37 | lh_G_temporal_inf_thickness
38 | lh_G_temporal_middle_thickness
39 | lh_Lat_Fis-ant-Horizont_thickness
40 | lh_Lat_Fis-ant-Vertical_thickness
41 | lh_Lat_Fis-post_thickness
42 | lh_Pole_occipital_thickness
43 | lh_Pole_temporal_thickness
44 | lh_S_calcarine_thickness
45 | lh_S_central_thickness
46 | lh_S_cingul-Marginalis_thickness
47 | lh_S_circular_insula_ant_thickness
48 | lh_S_circular_insula_inf_thickness
49 | lh_S_circular_insula_sup_thickness
50 | lh_S_collat_transv_ant_thickness
51 | lh_S_collat_transv_post_thickness
52 | lh_S_front_inf_thickness
53 | lh_S_front_middle_thickness
54 | lh_S_front_sup_thickness
55 | lh_S_interm_prim-Jensen_thickness
56 | lh_S_intrapariet&P_trans_thickness
57 | lh_S_oc_middle&Lunatus_thickness
58 | lh_S_oc_sup&transversal_thickness
59 | lh_S_occipital_ant_thickness
60 | lh_S_oc-temp_lat_thickness
61 | lh_S_oc-temp_med&Lingual_thickness
62 | lh_S_orbital_lateral_thickness
63 | lh_S_orbital_med-olfact_thickness
64 | lh_S_orbital-H_Shaped_thickness
65 | lh_S_parieto_occipital_thickness
66 | lh_S_pericallosal_thickness
67 | lh_S_postcentral_thickness
68 | lh_S_precentral-inf-part_thickness
69 | lh_S_precentral-sup-part_thickness
70 | lh_S_suborbital_thickness
71 | lh_S_subparietal_thickness
72 | lh_S_temporal_inf_thickness
73 | lh_S_temporal_sup_thickness
74 | lh_S_temporal_transverse_thickness
75 | lh_MeanThickness_thickness


--------------------------------------------------------------------------------
/data/phenotypes_rh.txt:
--------------------------------------------------------------------------------
 1 | rh_G&S_frontomargin_thickness
 2 | rh_G&S_occipital_inf_thickness
 3 | rh_G&S_paracentral_thickness
 4 | rh_G&S_subcentral_thickness
 5 | rh_G&S_transv_frontopol_thickness
 6 | rh_G&S_cingul-Ant_thickness
 7 | rh_G&S_cingul-Mid-Ant_thickness
 8 | rh_G&S_cingul-Mid-Post_thickness
 9 | rh_G_cingul-Post-dorsal_thickness
10 | rh_G_cingul-Post-ventral_thickness
11 | rh_G_cuneus_thickness
12 | rh_G_front_inf-Opercular_thickness
13 | rh_G_front_inf-Orbital_thickness
14 | rh_G_front_inf-Triangul_thickness
15 | rh_G_front_middle_thickness
16 | rh_G_front_sup_thickness
17 | rh_G_Ins_lg&S_cent_ins_thickness
18 | rh_G_insular_short_thickness
19 | rh_G_occipital_middle_thickness
20 | rh_G_occipital_sup_thickness
21 | rh_G_oc-temp_lat-fusifor_thickness
22 | rh_G_oc-temp_med-Lingual_thickness
23 | rh_G_oc-temp_med-Parahip_thickness
24 | rh_G_orbital_thickness
25 | rh_G_pariet_inf-Angular_thickness
26 | rh_G_pariet_inf-Supramar_thickness
27 | rh_G_parietal_sup_thickness
28 | rh_G_postcentral_thickness
29 | rh_G_precentral_thickness
30 | rh_G_precuneus_thickness
31 | rh_G_rectus_thickness
32 | rh_G_subcallosal_thickness
33 | rh_G_temp_sup-G_T_transv_thickness
34 | rh_G_temp_sup-Lateral_thickness
35 | rh_G_temp_sup-Plan_polar_thickness
36 | rh_G_temp_sup-Plan_tempo_thickness
37 | rh_G_temporal_inf_thickness
38 | rh_G_temporal_middle_thickness
39 | rh_Lat_Fis-ant-Horizont_thickness
40 | rh_Lat_Fis-ant-Vertical_thickness
41 | rh_Lat_Fis-post_thickness
42 | rh_Pole_occipital_thickness
43 | rh_Pole_temporal_thickness
44 | rh_S_calcarine_thickness
45 | rh_S_central_thickness
46 | rh_S_cingul-Marginalis_thickness
47 | rh_S_circular_insula_ant_thickness
48 | rh_S_circular_insula_inf_thickness
49 | rh_S_circular_insula_sup_thickness
50 | rh_S_collat_transv_ant_thickness
51 | rh_S_collat_transv_post_thickness
52 | rh_S_front_inf_thickness
53 | rh_S_front_middle_thickness
54 | rh_S_front_sup_thickness
55 | rh_S_interm_prim-Jensen_thickness
56 | rh_S_intrapariet&P_trans_thickness
57 | rh_S_oc_middle&Lunatus_thickness
58 | rh_S_oc_sup&transversal_thickness
59 | rh_S_occipital_ant_thickness
60 | rh_S_oc-temp_lat_thickness
61 | rh_S_oc-temp_med&Lingual_thickness
62 | rh_S_orbital_lateral_thickness
63 | rh_S_orbital_med-olfact_thickness
64 | rh_S_orbital-H_Shaped_thickness
65 | rh_S_parieto_occipital_thickness
66 | rh_S_pericallosal_thickness
67 | rh_S_postcentral_thickness
68 | rh_S_precentral-inf-part_thickness
69 | rh_S_precentral-sup-part_thickness
70 | rh_S_suborbital_thickness
71 | rh_S_subparietal_thickness
72 | rh_S_temporal_inf_thickness
73 | rh_S_temporal_sup_thickness
74 | rh_S_temporal_transverse_thickness
75 | rh_MeanThickness_thickness


--------------------------------------------------------------------------------
/data/phenotypes_sc.txt:
--------------------------------------------------------------------------------
 1 | Left-Lateral-Ventricle
 2 | Left-Inf-Lat-Vent
 3 | Left-Cerebellum-White-Matter
 4 | Left-Cerebellum-Cortex
 5 | Left-Thalamus-Proper
 6 | Left-Caudate
 7 | Left-Putamen
 8 | Left-Pallidum
 9 | 3rd-Ventricle
10 | 4th-Ventricle
11 | Brain-Stem
12 | Left-Hippocampus
13 | Left-Amygdala
14 | CSF
15 | Left-Accumbens-area
16 | Left-VentralDC
17 | Left-vessel
18 | Left-choroid-plexus
19 | Right-Lateral-Ventricle
20 | Right-Inf-Lat-Vent
21 | Right-Cerebellum-White-Matter
22 | Right-Cerebellum-Cortex
23 | Right-Thalamus-Proper
24 | Right-Caudate
25 | Right-Putamen
26 | Right-Pallidum
27 | Right-Hippocampus
28 | Right-Amygdala
29 | Right-Accumbens-area
30 | Right-VentralDC
31 | Right-vessel
32 | Right-choroid-plexus
33 | SubCortGrayVol
34 | TotalGrayVol
35 | SupraTentorialVol
36 | SupraTentorialVolNotVent
37 | EstimatedTotalIntraCranialVol
38 | 


--------------------------------------------------------------------------------
/data/sz_ct.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saigerutherford/CPC_ML_tutorial/6e77e6e87b2780063dae57e87f0923dd54cf1845/data/sz_ct.npy


--------------------------------------------------------------------------------
/data/sz_labels.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saigerutherford/CPC_ML_tutorial/6e77e6e87b2780063dae57e87f0923dd54cf1845/data/sz_labels.npy


--------------------------------------------------------------------------------
/data/sz_z.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saigerutherford/CPC_ML_tutorial/6e77e6e87b2780063dae57e87f0923dd54cf1845/data/sz_z.npy


--------------------------------------------------------------------------------
/data/task1_phenotypes.txt:
--------------------------------------------------------------------------------
 1 | lh_bankssts_thickness	
 2 | lh_caudalanteriorcingulate_thickness	
 3 | lh_caudalmiddlefrontal_thickness	
 4 | lh_cuneus_thickness	
 5 | lh_entorhinal_thickness	
 6 | lh_fusiform_thickness	
 7 | lh_inferiorparietal_thickness	
 8 | lh_inferiortemporal_thickness	
 9 | lh_isthmuscingulate_thickness	
10 | lh_lateraloccipital_thickness	
11 | lh_lateralorbitofrontal_thickness	
12 | lh_lingual_thickness	
13 | lh_medialorbitofrontal_thickness	
14 | lh_middletemporal_thickness	
15 | lh_parahippocampal_thickness	
16 | lh_paracentral_thickness	
17 | lh_parsopercularis_thickness	
18 | lh_parsorbitalis_thickness	
19 | lh_parstriangularis_thickness	
20 | lh_pericalcarine_thickness	
21 | lh_postcentral_thickness	
22 | lh_posteriorcingulate_thickness	
23 | lh_precentral_thickness	
24 | lh_precuneus_thickness	
25 | lh_rostralanteriorcingulate_thickness	
26 | lh_rostralmiddlefrontal_thickness	
27 | lh_superiorfrontal_thickness	
28 | lh_superiorparietal_thickness	
29 | lh_superiortemporal_thickness	
30 | lh_supramarginal_thickness	
31 | lh_frontalpole_thickness	
32 | lh_temporalpole_thickness	
33 | lh_transversetemporal_thickness	
34 | lh_insula_thickness	
35 | lh_MeanThickness_thickness	
36 | rh_bankssts_thickness	
37 | rh_caudalanteriorcingulate_thickness	
38 | rh_caudalmiddlefrontal_thickness	
39 | rh_cuneus_thickness	
40 | rh_entorhinal_thickness	
41 | rh_fusiform_thickness	
42 | rh_inferiorparietal_thickness	
43 | rh_inferiortemporal_thickness	
44 | rh_isthmuscingulate_thickness	
45 | rh_lateraloccipital_thickness	
46 | rh_lateralorbitofrontal_thickness	
47 | rh_lingual_thickness	
48 | rh_medialorbitofrontal_thickness	
49 | rh_middletemporal_thickness	
50 | rh_parahippocampal_thickness	
51 | rh_paracentral_thickness	
52 | rh_parsopercularis_thickness	
53 | rh_parsorbitalis_thickness	
54 | rh_parstriangularis_thickness	
55 | rh_pericalcarine_thickness	
56 | rh_postcentral_thickness	
57 | rh_posteriorcingulate_thickness	
58 | rh_precentral_thickness	
59 | rh_precuneus_thickness	
60 | rh_rostralanteriorcingulate_thickness	
61 | rh_rostralmiddlefrontal_thickness	
62 | rh_superiorfrontal_thickness	
63 | rh_superiorparietal_thickness	
64 | rh_superiortemporal_thickness	
65 | rh_supramarginal_thickness	
66 | rh_frontalpole_thickness	
67 | rh_temporalpole_thickness	
68 | rh_transversetemporal_thickness	
69 | rh_insula_thickness	
70 | rh_MeanThickness_thickness	


--------------------------------------------------------------------------------
/nm_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import glob
  3 | import re
  4 | import numpy as np
  5 | import pandas as pd
  6 | import shutil
  7 | import pickle
  8 | import subprocess
  9 | 
 10 | import pcntoolkit.dataio.fileio as fileio
 11 | 
 12 | #################################### FUNCTIONS ################################
 13 | def calibration_descriptives(x):
 14 |   n = np.shape(x)[0]
 15 |   m1 = np.mean(x)
 16 |   m2 = sum((x-m1)**2)
 17 |   m3 = sum((x-m1)**3)
 18 |   m4 = sum((x-m1)**4)
 19 |   s1 = np.std(x)
 20 |   skew = n*m3/(n-1)/(n-2)/s1**3
 21 |   sdskew = np.sqrt( 6*n*(n-1) / ((n-2)*(n+1)*(n+3)) )
 22 |   kurtosis = (n*(n+1)*m4 - 3*m2**2*(n-1)) / ((n-1)*(n-2)*(n-3)*s1**4)
 23 |   sdkurtosis = np.sqrt( 4*(n**2-1) * sdskew**2 / ((n-3)*(n+5)) )
 24 |   semean = np.sqrt(np.var(x)/n)
 25 |   sesd = s1/np.sqrt(2*(n-1))
 26 |   cd = [skew, sdskew, kurtosis, sdkurtosis, semean, sesd]
 27 |   return cd
 28 | 
 29 | 
 30 |     
 31 | def save_output(src_dir, dst_dir, savemodel=True):
 32 |   
 33 |     # move everything else to the destination dir
 34 |     files = []
 35 |     files.extend(glob.glob(os.path.join(src_dir,'Z*')))
 36 |     files.extend(glob.glob(os.path.join(src_dir,'yhat*')))
 37 |     files.extend(glob.glob(os.path.join(src_dir,'ys2*')))
 38 |     files.extend(glob.glob(os.path.join(src_dir,'Rho*')))
 39 |     files.extend(glob.glob(os.path.join(src_dir,'pRho*')))
 40 |     files.extend(glob.glob(os.path.join(src_dir,'RMSE*')))
 41 |     files.extend(glob.glob(os.path.join(src_dir,'SMSE*')))
 42 |     files.extend(glob.glob(os.path.join(src_dir,'MSLL*')))
 43 |     files.extend(glob.glob(os.path.join(src_dir,'EXPV*')))
 44 |     
 45 |     if savemodel:
 46 |         model_files = glob.glob(os.path.join(src_dir,'Models/*'))
 47 |         dst_model_dir = os.path.join(dst_dir, 'Models')
 48 |         os.makedirs(dst_model_dir, exist_ok=True)
 49 |         for f in model_files:
 50 |             fdir, fnam = os.path.split(f)
 51 |             shutil.move(f, os.path.join(dst_model_dir,fnam))
 52 |         os.rmdir(os.path.join(src_dir,'Models'))
 53 |     else:
 54 |         # remove the model directory to save space
 55 |         shutil.rmtree(os.path.join(src_dir,'Models'))
 56 |     
 57 |     for f in files:
 58 |         fdir, fnam = os.path.split(f)
 59 |         shutil.move(f, os.path.join(dst_dir,fnam))
 60 |     return
 61 | 
 62 | def predict_on_new_sites(blr, hyp, X, y, Xs=None, 
 63 |                          ys=None, 
 64 |                          var_groups_test=None):
 65 |     """ Function to transfer the model to a new site"""
 66 |     # Get predictions from old model on new data X
 67 |     ys_ref, s2_ref = blr.predict(hyp, None, None, X)
 68 | 
 69 |     # Subtract the predictions from true data to get the residuals
 70 |     if blr.warp is None:
 71 |         residuals = ys_ref-y
 72 |     else:
 73 |         # Calculate the residuals in warped space
 74 |         y_ref_ws = blr.warp.f(y, hyp[1:blr.warp.get_n_params()+1])
 75 |         residuals = ys_ref - y_ref_ws 
 76 |   
 77 |     residuals_mu = np.mean(residuals)
 78 |     residuals_sd = np.std(residuals)
 79 | 
 80 |     # Adjust the mean with the mean of the residuals
 81 |     #blr.m = blr.m-np.ones((len(blr.m)))*residuals_mu 
 82 |     #ys,s2 = blr.predict(hyp, None, None, Xs)
 83 |     if ys is None:
 84 |         if Xs is None:
 85 |             raise(ValueError, 'Either ys or Xs must be specified')
 86 |         else:
 87 |             ys, s2 = blr.predict(hyp, None, None, Xs)
 88 |             ys = ys - residuals_mu 
 89 |     else:
 90 |         if blr.warp is not None:
 91 |             y_ws = blr.warp.f(y, hyp[1:blr.warp.get_n_params()+1])
 92 |             ys = y_ws - residuals_mu 
 93 |         else:
 94 |             ys = ys - residuals_mu    
 95 |         
 96 |     # Set the deviation to the devations of the residuals
 97 |     s2 = np.ones(len(s2))*residuals_sd**2
 98 |         
 99 |     return ys, s2
100 |         
101 | 
102 | def test_func(x, epsilon, b):
103 |         return np.sinh(b * np.arcsinh(x) + epsilon * b)
104 |     
105 | def remove_bad_subjects(df, qc):#qc_file):
106 |     
107 |     """
108 |     Removes low-quality subjects from multi-site data based on Euler characteristic 
109 |     measure.
110 |     
111 |     * Inputs:
112 |         - df: the data in a pandas' dataframe format.
113 |         - qc: pandas dataframe containing the euler charcteristics.
114 |     
115 |     * Outputs:
116 |         - df: the updated data after removing bad subjects.
117 |         - removed_subjects: the list of removed subjects.
118 |     """
119 |     
120 |     n = df.shape[0]
121 |     
122 |     euler_nums = qc['avg_en'].to_numpy(dtype=np.float32)
123 |     # convert to numeric site indices
124 |     #sites = df['site'].to_numpy(dtype=np.int)
125 |     site_ids = pd.Series(df['site'], copy=True)
126 |     for i,s in enumerate(site_ids.unique()):
127 |         site_ids.loc[site_ids == s] = i
128 |     sites = site_ids.to_numpy(dtype=np.int)
129 |     subjects = qc.index
130 |     for site in np.unique(sites):
131 |         euler_nums[sites==site] = np.sqrt(-(euler_nums[sites==site])) - np.nanmedian(np.sqrt(-(euler_nums[sites==site])))
132 |     
133 |     good_subjects = list(subjects[np.bitwise_or(euler_nums<=5, np.isnan(euler_nums))])
134 |     removed_subjects = list(subjects[euler_nums>5])
135 |     
136 |     good_subjects = list(set(good_subjects))
137 |     
138 |     dfout = df.loc[good_subjects]
139 |     
140 |     print(len(removed_subjects), 'subjects are removed!') 
141 |     
142 |     return dfout, removed_subjects
143 | 
144 | def retrieve_eulernum(freesurfer_dir, subjects=None):
145 |     """ Get the Euler Characteristic from a set of subjects
146 |         :param freesurfer_dir: Freesurfer SUBJECTS_DIR
147 |         :param subjects: a list of subjects to process
148 |     """
149 |     
150 |     if subjects is None:
151 |         subjects = [temp for temp in os.listdir(freesurfer_dir) 
152 |                     if os.path.isdir(os.path.join(freesurfer_dir ,temp))]
153 |         
154 |     df = pd.DataFrame(index=subjects, columns=['lh_en','rh_en','avg_en'])
155 |     missing_subjects = []
156 |     
157 |     for s, sub in enumerate(subjects):
158 |         sub_dir = os.path.join(freesurfer_dir, sub)
159 |         log_file = os.path.join(sub_dir, 'scripts', 'recon-all.log')
160 |         
161 |         if os.path.exists(sub_dir):
162 |             if os.path.exists(log_file):    
163 |                 with open(log_file) as f:
164 |                     for line in f:
165 |                         # find the part that refers to the EC
166 |                         if re.search('orig.nofix lheno', line):
167 |                             eno_line = line
168 |                 f.close()
169 |                 eno_l = eno_line.split()[3][0:-1] # remove the trailing comma
170 |                 eno_r = eno_line.split()[6]
171 |                 euler = (float(eno_l) + float(eno_r)) / 2
172 |                 
173 |                 df.at[sub, 'lh_en'] = eno_l
174 |                 df.at[sub, 'rh_en'] = eno_r
175 |                 df.at[sub, 'avg_en'] = euler
176 |                 
177 |                 print('%d: Subject %s is successfully processed. EN = %f' 
178 |                       %(s, sub, df.at[sub, 'avg_en']))
179 |             else:
180 |                 print('%d: Subject %s is missing log file, running QC ...' %(s, sub))
181 |                 try:
182 |                     bashCommand = 'mris_euler_number '+ freesurfer_dir + sub +'/surf/lh.orig.nofix>' + 'temp_l.txt 2>&1'
183 |                     res = subprocess.run(bashCommand, stdout=subprocess.PIPE, shell=True)
184 |                     file = open('temp_l.txt', mode = 'r', encoding = 'utf-8-sig')
185 |                     lines = file.readlines()
186 |                     file.close()
187 |                     words = []
188 |                     for line in lines:
189 |                         line = line.strip()
190 |                         words.append([item.strip() for item in line.split(' ')])
191 |                     eno_l = np.float32(words[0][12])
192 |                     
193 |                     bashCommand = 'mris_euler_number '+ freesurfer_dir + sub +'/surf/rh.orig.nofix>' + 'temp_r.txt 2>&1'
194 |                     res = subprocess.run(bashCommand, stdout=subprocess.PIPE, shell=True)
195 |                     file = open('temp_r.txt', mode = 'r', encoding = 'utf-8-sig')
196 |                     lines = file.readlines()
197 |                     file.close()
198 |                     words = []
199 |                     for line in lines:
200 |                         line = line.strip()
201 |                         words.append([item.strip() for item in line.split(' ')])
202 |                     eno_r = np.float32(words[0][12])
203 |                     
204 |                     df.at[sub, 'lh_en'] = eno_l
205 |                     df.at[sub, 'rh_en'] = eno_r
206 |                     df.at[sub, 'avg_en'] = (eno_r + eno_l) / 2
207 |                 
208 |                     print('%d: Subject %s is successfully processed. EN = %f' 
209 |                           %(s, sub, df.at[sub, 'avg_en']))
210 |                     
211 |                 except:
212 |                     missing_subjects.append(sub)
213 |                     print('%d: QC is failed for subject %s.' %(s, sub))
214 |                 
215 |         else:
216 |             missing_subjects.append(sub)
217 |             print('%d: Subject %s is missing.' %(s, sub))
218 |         df = df.dropna()
219 |              
220 |     return df, missing_subjects
221 | 
222 | def load_2d(filename):
223 |     """ this simple function loads a data type supported by PCNtoolkit and
224 |         ensures that the output is a 2d numpy array
225 |     """
226 |     
227 |     x = fileio.load(filename)
228 |     if len(x.shape) == 1:
229 |         x = x[:, np.newaxis]
230 |     
231 |     return x


--------------------------------------------------------------------------------
/presentation/GPU.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saigerutherford/CPC_ML_tutorial/6e77e6e87b2780063dae57e87f0923dd54cf1845/presentation/GPU.png


--------------------------------------------------------------------------------
/presentation/How_nm_compressed2020.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saigerutherford/CPC_ML_tutorial/6e77e6e87b2780063dae57e87f0923dd54cf1845/presentation/How_nm_compressed2020.pdf


--------------------------------------------------------------------------------
/presentation/Normative_Modeling_a_Framework_for_Clinical_Machinelearning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saigerutherford/CPC_ML_tutorial/6e77e6e87b2780063dae57e87f0923dd54cf1845/presentation/Normative_Modeling_a_Framework_for_Clinical_Machinelearning.pdf


--------------------------------------------------------------------------------
/presentation/Runtime1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saigerutherford/CPC_ML_tutorial/6e77e6e87b2780063dae57e87f0923dd54cf1845/presentation/Runtime1.png


--------------------------------------------------------------------------------
/presentation/Runtime2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saigerutherford/CPC_ML_tutorial/6e77e6e87b2780063dae57e87f0923dd54cf1845/presentation/Runtime2.png


--------------------------------------------------------------------------------
/presentation/keyboard_pref.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saigerutherford/CPC_ML_tutorial/6e77e6e87b2780063dae57e87f0923dd54cf1845/presentation/keyboard_pref.png


--------------------------------------------------------------------------------
/presentation/settings1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saigerutherford/CPC_ML_tutorial/6e77e6e87b2780063dae57e87f0923dd54cf1845/presentation/settings1.png


--------------------------------------------------------------------------------
/presentation/settings2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saigerutherford/CPC_ML_tutorial/6e77e6e87b2780063dae57e87f0923dd54cf1845/presentation/settings2.png


--------------------------------------------------------------------------------
/tasks/1_fit_normative_models.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "4b64f505-ad16-437a-94de-2646f35ae55f",
  6 |    "metadata": {
  7 |     "id": "4b64f505-ad16-437a-94de-2646f35ae55f"
  8 |    },
  9 |    "source": [
 10 |     "## Estimating lifespan normative models\n",
 11 |     "\n",
 12 |     "This notebook provides a complete walkthrough for an analysis of normative modelling using your own dataset. Training and testing data is provided for this tutorial. However, the idea is that you could subsitute our provided training and testing datasets for you own dataset (as long as it matches the same format!)\n",
 13 |     "\n",
 14 |     "First, if necessary, we install PCNtoolkit (note: this tutorial requires at least version 0.20)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "id": "84ec2ca6-c0a2-4abf-8f05-29edc9e0fa24",
 21 |    "metadata": {
 22 |     "id": "84ec2ca6-c0a2-4abf-8f05-29edc9e0fa24"
 23 |    },
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "# Make sure to click the restart runtime button at the \n",
 27 |     "# bottom of this code blocks' output (after you run the cell)\n",
 28 |     "! pip install pcntoolkit==0.20"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "id": "909c3b45-ad46-4e6d-8732-dc5ac68488c6",
 34 |    "metadata": {
 35 |     "id": "909c3b45-ad46-4e6d-8732-dc5ac68488c6"
 36 |    },
 37 |    "source": [
 38 |     "Then we import the required libraries"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "id": "DGQhP2LbElmI",
 45 |    "metadata": {
 46 |     "id": "DGQhP2LbElmI"
 47 |    },
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "! git clone https://github.com/saigerutherford/CPC_ML_tutorial.git"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 14,
 56 |    "id": "d451c106-08e2-4f5b-baf9-da240768e68b",
 57 |    "metadata": {
 58 |     "id": "d451c106-08e2-4f5b-baf9-da240768e68b"
 59 |    },
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "# we need to be in the CPC_ML_tutorial folder when we import the libraries in the code block below,\n",
 63 |     "# because there is a function called nm_utils that is in this folder that we need to import\n",
 64 |     "import os\n",
 65 |     "os.chdir('/content/CPC_ML_tutorial/')"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 15,
 71 |    "id": "83c494d3-6ebd-4cde-aff0-8fc9344374dd",
 72 |    "metadata": {
 73 |     "id": "83c494d3-6ebd-4cde-aff0-8fc9344374dd"
 74 |    },
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "import numpy as np\n",
 78 |     "import pandas as pd\n",
 79 |     "import pickle\n",
 80 |     "from matplotlib import pyplot as plt\n",
 81 |     "import seaborn as sns\n",
 82 |     "\n",
 83 |     "from pcntoolkit.normative import estimate, predict, evaluate\n",
 84 |     "from pcntoolkit.util.utils import compute_MSLL, create_design_matrix\n",
 85 |     "from nm_utils import calibration_descriptives, remove_bad_subjects, load_2d"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "id": "9822cc19-48e9-428b-8c5e-e059fd2d23f7",
 91 |    "metadata": {
 92 |     "id": "9822cc19-48e9-428b-8c5e-e059fd2d23f7"
 93 |    },
 94 |    "source": [
 95 |     "Now, we configure the locations in which the data are stored. \n",
 96 |     "\n",
 97 |     "**Notes:** \n",
 98 |     "- The data are assumed to be in CSV format and will be loaded as pandas dataframes\n",
 99 |     "- Generally the raw data will be in a different location to the analysis\n",
100 |     "- The data can have arbitrary columns but some are required by the script, i.e. 'age', 'sex' and 'site', plus the phenotypes you wish to estimate (see below)"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 16,
106 |    "id": "7da01c88-7033-498b-a811-79ad58e8c17a",
107 |    "metadata": {
108 |     "id": "7da01c88-7033-498b-a811-79ad58e8c17a"
109 |    },
110 |    "outputs": [],
111 |    "source": [
112 |     "# where the raw data are stored\n",
113 |     "data_dir = '/content/CPC_ML_tutorial/data/'\n",
114 |     "\n",
115 |     "# where the analysis takes place\n",
116 |     "root_dir = '/content/CPC_ML_tutorial/'\n",
117 |     "out_dir = os.path.join(root_dir,'models','test')\n",
118 |     "\n",
119 |     "# create the output directory if it does not already exist\n",
120 |     "os.makedirs(out_dir, exist_ok=True)"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "id": "01141f19-a960-4823-baad-8604975304c3",
126 |    "metadata": {
127 |     "id": "01141f19-a960-4823-baad-8604975304c3"
128 |    },
129 |    "source": [
130 |     "Now we load the data. \n",
131 |     "\n",
132 |     "We will load one pandas dataframe for the training set and one dataframe for the test set. We also configure a list of site ids."
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 17,
138 |    "id": "850fee6b-421f-41d9-8fd6-7e1dafbf0e9f",
139 |    "metadata": {
140 |     "id": "850fee6b-421f-41d9-8fd6-7e1dafbf0e9f"
141 |    },
142 |    "outputs": [],
143 |    "source": [
144 |     "df_tr = pd.read_csv(os.path.join(data_dir,'train_data.csv'), index_col=0) \n",
145 |     "df_te = pd.read_csv(os.path.join(data_dir,'test_data.csv'), index_col=0)\n",
146 |     "\n",
147 |     "# extract a list of unique site ids from the training set\n",
148 |     "site_ids =  sorted(set(df_tr['site'].to_list()))"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "id": "29f9593a-d3c9-4d08-a877-8794203c0001",
154 |    "metadata": {
155 |     "id": "29f9593a-d3c9-4d08-a877-8794203c0001"
156 |    },
157 |    "source": [
158 |     "### Configure which models to fit\n",
159 |     "\n",
160 |     "Next, we load the image derived phenotypes (IDPs) which we will process in this analysis. This is effectively just a list of columns in your dataframe. Here we estimate normative models for the left hemisphere, right hemisphere and cortical structures."
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": null,
166 |    "id": "7438ef7e-9340-4f13-8d57-816918923705",
167 |    "metadata": {
168 |     "id": "7438ef7e-9340-4f13-8d57-816918923705"
169 |    },
170 |    "outputs": [],
171 |    "source": [
172 |     "# we choose here to process all idps. Uncomment lines 2-7 (and comment line 11) to run models for the whole brain, but we suggest just starting with several ROIs\n",
173 |     "#os.chdir(root_dir)\n",
174 |     "#!wget -nc https://raw.githubusercontent.com/saigerutherford/CPC_ML_tutorial/master/data/task1_phenotypes.txt\n",
175 |     "#with open(os.path.join(root_dir,'task1_phenotypes.txt')) as f:\n",
176 |     "#  idp_ids = f.read().splitlines()\n",
177 |     "#for idx, ele in enumerate(idp_ids):\n",
178 |     "#        idp_ids[idx] = ele.replace('\\t', '')\n",
179 |     "\n",
180 |     "# we could also just specify a list of IDPs. Use this line to run just 2 models (not the whole brain)...this is a good place to start. If you have time,\n",
181 |     "# you can uncomment the above line and run the whole brain models. Be sure to comment out this line if you uncomment the above line. \n",
182 |     "idp_ids = ['lh_MeanThickness_thickness', 'rh_MeanThickness_thickness']"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "markdown",
187 |    "id": "5d791db6-8fe5-450c-88eb-84a390b8753a",
188 |    "metadata": {
189 |     "id": "5d791db6-8fe5-450c-88eb-84a390b8753a"
190 |    },
191 |    "source": [
192 |     "### Configure model parameters\n",
193 |     "\n",
194 |     "Now, we configure some parameters for the regression model we use to fit the normative model. Here we will use a 'warped' Bayesian linear regression model. To model non-Gaussianity, we select a sin arcsinh warp and to model non-linearity, we stick with the default value for the basis expansion (a cubic b-spline basis set with 5 knot points). Since we are sticking with the default value, we do not need to specify any parameters for this, but we do need to specify the limits. We choose to pad the input by a few years either side of the input range. We will also set a couple of options that control the estimation of the model\n",
195 |     "\n",
196 |     "For further details about the likelihood warping approach, see [Fraza et al 2021](https://www.biorxiv.org/content/10.1101/2021.04.05.438429v1)."
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": null,
202 |    "id": "0IYl-eg2xGWE",
203 |    "metadata": {
204 |     "id": "0IYl-eg2xGWE"
205 |    },
206 |    "outputs": [],
207 |    "source": [
208 |     "# check the min & max age of the dataset, use this info to update the xmin & xmax variables in the code block below. \n",
209 |     "df_tr['age'].describe()"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 22,
215 |    "id": "e44e257c-676e-49d8-89ec-657e506c3b74",
216 |    "metadata": {
217 |     "id": "e44e257c-676e-49d8-89ec-657e506c3b74"
218 |    },
219 |    "outputs": [],
220 |    "source": [
221 |     "# which data columns do we wish to use as covariates? \n",
222 |     "# You could add additional covariates from your own dataset here that you wish to use as predictors.\n",
223 |     "# However, for this tutorial today we will keep it simple and just use age & sex. \n",
224 |     "# Maybe discuss with your partner ideas you have for other covariates you would like to include.\n",
225 |     "cols_cov = ['age','sex']\n",
226 |     "\n",
227 |     "# which warping function to use? We can set this to None in order to fit a vanilla Gaussian noise model\n",
228 |     "warp =  'WarpSinArcsinh'\n",
229 |     "\n",
230 |     "# limits for cubic B-spline basis \n",
231 |     "# check the min & max ages of the dataframes, add 5 to the max \n",
232 |     "# and subtract 5 from the min and adjust these variables accordingly\n",
233 |     "xmin = 13# set this variable\n",
234 |     "xmax = 92# set this variable\n",
235 |     "\n",
236 |     "# Do we want to force the model to be refit every time? \n",
237 |     "# When training normative model from scratch like we are doing in this notebook (not re-using a pre-trained model), \n",
238 |     "# this variable should be = True\n",
239 |     "force_refit = True \n",
240 |     "\n",
241 |     "# Absolute Z treshold above which a sample is considered to be an outlier (without fitting any model)\n",
242 |     "outlier_thresh = 7"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "markdown",
247 |    "id": "896842d7-8913-4137-9d86-4757c42bcf1b",
248 |    "metadata": {
249 |     "id": "896842d7-8913-4137-9d86-4757c42bcf1b"
250 |    },
251 |    "source": [
252 |     "### Fit the models\n",
253 |     "\n",
254 |     "Now we fit the models. This involves looping over the IDPs we have selected. We will use a module from PCNtoolkit to set up the design matrices, containing the covariates, fixed effects for site and nonlinear basis expansion. "
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": null,
260 |    "id": "a4e9b50c-574b-4e2c-a511-cc444db4393e",
261 |    "metadata": {
262 |     "id": "a4e9b50c-574b-4e2c-a511-cc444db4393e"
263 |    },
264 |    "outputs": [],
265 |    "source": [
266 |     "for idp_num, idp in enumerate(idp_ids): \n",
267 |     "    print('Running IDP', idp_num, idp, ':')\n",
268 |     "   \n",
269 |     "    # set output dir \n",
270 |     "    idp_dir = os.path.join(out_dir, idp)\n",
271 |     "    os.makedirs(os.path.join(idp_dir), exist_ok=True)\n",
272 |     "    os.chdir(idp_dir)\n",
273 |     "    \n",
274 |     "    # extract the response variables for training and test set\n",
275 |     "    y_tr = df_tr[idp].to_numpy() \n",
276 |     "    y_te = df_te[idp].to_numpy()\n",
277 |     "    \n",
278 |     "    # remove gross outliers and implausible values\n",
279 |     "    yz_tr = (y_tr - np.mean(y_tr)) / np.std(y_tr)\n",
280 |     "    yz_te = (y_te - np.mean(y_te)) / np.std(y_te)\n",
281 |     "    nz_tr = np.bitwise_and(np.abs(yz_tr) < outlier_thresh, y_tr > 0)\n",
282 |     "    nz_te = np.bitwise_and(np.abs(yz_te) < outlier_thresh, y_te > 0)\n",
283 |     "    y_tr = y_tr[nz_tr]\n",
284 |     "    y_te = y_te[nz_te]\n",
285 |     "    \n",
286 |     "    # write out the response variables for training and test\n",
287 |     "    resp_file_tr = os.path.join(idp_dir, 'resp_tr.txt')\n",
288 |     "    resp_file_te = os.path.join(idp_dir, 'resp_te.txt') \n",
289 |     "    np.savetxt(resp_file_tr, y_tr)\n",
290 |     "    np.savetxt(resp_file_te, y_te)\n",
291 |     "        \n",
292 |     "    # configure the design matrix\n",
293 |     "    X_tr = create_design_matrix(df_tr[cols_cov].loc[nz_tr], \n",
294 |     "                                site_ids = df_tr['site'].loc[nz_tr],\n",
295 |     "                                basis = 'bspline', \n",
296 |     "                                xmin = xmin, \n",
297 |     "                                xmax = xmax)\n",
298 |     "    X_te = create_design_matrix(df_te[cols_cov].loc[nz_te], \n",
299 |     "                                site_ids = df_te['site'].loc[nz_te], \n",
300 |     "                                all_sites=site_ids,\n",
301 |     "                                basis = 'bspline', \n",
302 |     "                                xmin = xmin, \n",
303 |     "                                xmax = xmax)\n",
304 |     "\n",
305 |     "    # configure and save the covariates\n",
306 |     "    cov_file_tr = os.path.join(idp_dir, 'cov_bspline_tr.txt')\n",
307 |     "    cov_file_te = os.path.join(idp_dir, 'cov_bspline_te.txt')\n",
308 |     "    np.savetxt(cov_file_tr, X_tr)\n",
309 |     "    np.savetxt(cov_file_te, X_te)\n",
310 |     "\n",
311 |     "    if not force_refit and os.path.exists(os.path.join(idp_dir, 'Models', 'NM_0_0_estimate.pkl')):\n",
312 |     "        print('Making predictions using a pre-existing model...')\n",
313 |     "        suffix = 'predict'\n",
314 |     "        \n",
315 |     "        # Make prdictsion with test data\n",
316 |     "        predict(cov_file_te, \n",
317 |     "                alg='blr', \n",
318 |     "                respfile=resp_file_te, \n",
319 |     "                model_path=os.path.join(idp_dir,'Models'),\n",
320 |     "                outputsuffix=suffix)\n",
321 |     "    else:\n",
322 |     "        print('Estimating the normative model...')\n",
323 |     "        estimate(cov_file_tr, resp_file_tr, testresp=resp_file_te, \n",
324 |     "                 testcov=cov_file_te, alg='blr', optimizer = 'l-bfgs-b', \n",
325 |     "                 savemodel=True, warp=warp, warp_reparam=True)\n",
326 |     "        suffix = 'estimate'"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "markdown",
331 |    "id": "925f77cf-c873-4047-91ac-50b9571704fd",
332 |    "metadata": {
333 |     "id": "925f77cf-c873-4047-91ac-50b9571704fd"
334 |    },
335 |    "source": [
336 |     "### Compute error metrics\n",
337 |     "\n",
338 |     "In this section we compute the following error metrics for all IDPs (all evaluated on the test set):\n",
339 |     "\n",
340 |     "- Negative log likelihood (NLL)\n",
341 |     "- Explained variance (EV)\n",
342 |     "- Mean standardized log loss (MSLL)\n",
343 |     "- Bayesian information Criteria (BIC)\n",
344 |     "- Skew and Kurtosis of the Z-distribution"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "code",
349 |    "execution_count": null,
350 |    "id": "2e9d7500-4f46-4ee1-9756-81758ae5b1d1",
351 |    "metadata": {
352 |     "id": "2e9d7500-4f46-4ee1-9756-81758ae5b1d1"
353 |    },
354 |    "outputs": [],
355 |    "source": [
356 |     "# initialise dataframe we will use to store quantitative metrics \n",
357 |     "blr_metrics = pd.DataFrame(columns = ['eid', 'NLL', 'EV', 'MSLL', 'BIC','Skew','Kurtosis'])\n",
358 |     "\n",
359 |     "for idp_num, idp in enumerate(idp_ids): \n",
360 |     "    idp_dir = os.path.join(out_dir, idp)\n",
361 |     "    \n",
362 |     "    # load the predictions and true data. We use a custom function that ensures 2d arrays\n",
363 |     "    # equivalent to: y = np.loadtxt(filename); y = y[:, np.newaxis]\n",
364 |     "    yhat_te = load_2d(os.path.join(idp_dir, 'yhat_' + suffix + '.txt'))\n",
365 |     "    s2_te = load_2d(os.path.join(idp_dir, 'ys2_' + suffix + '.txt'))\n",
366 |     "    y_te = load_2d(os.path.join(idp_dir, 'resp_te.txt'))\n",
367 |     "    \n",
368 |     "    with open(os.path.join(idp_dir,'Models', 'NM_0_0_estimate.pkl'), 'rb') as handle:\n",
369 |     "        nm = pickle.load(handle) \n",
370 |     "    \n",
371 |     "    # compute error metrics\n",
372 |     "    if warp is None:\n",
373 |     "        metrics = evaluate(y_te, yhat_te)  \n",
374 |     "        \n",
375 |     "        # compute MSLL manually as a sanity check\n",
376 |     "        y_tr_mean = np.array( [[np.mean(y_tr)]] )\n",
377 |     "        y_tr_var = np.array( [[np.var(y_tr)]] )\n",
378 |     "        MSLL = compute_MSLL(y_te, yhat_te, s2_te, y_tr_mean, y_tr_var)         \n",
379 |     "    else:\n",
380 |     "        warp_param = nm.blr.hyp[1:nm.blr.warp.get_n_params()+1] \n",
381 |     "        W = nm.blr.warp\n",
382 |     "        \n",
383 |     "        # warp predictions\n",
384 |     "        med_te = W.warp_predictions(np.squeeze(yhat_te), np.squeeze(s2_te), warp_param)[0]\n",
385 |     "        med_te = med_te[:, np.newaxis]\n",
386 |     "       \n",
387 |     "        # evaluation metrics\n",
388 |     "        metrics = evaluate(y_te, med_te)\n",
389 |     "        \n",
390 |     "        # compute MSLL manually\n",
391 |     "        y_te_w = W.f(y_te, warp_param)\n",
392 |     "        y_tr_w = W.f(y_tr, warp_param)\n",
393 |     "        y_tr_mean = np.array( [[np.mean(y_tr_w)]] )\n",
394 |     "        y_tr_var = np.array( [[np.var(y_tr_w)]] )\n",
395 |     "        MSLL = compute_MSLL(y_te_w, yhat_te, s2_te, y_tr_mean, y_tr_var)     \n",
396 |     "    \n",
397 |     "    Z = np.loadtxt(os.path.join(idp_dir, 'Z_' + suffix + '.txt'))\n",
398 |     "    [skew, sdskew, kurtosis, sdkurtosis, semean, sesd] = calibration_descriptives(Z)\n",
399 |     "    \n",
400 |     "    BIC = len(nm.blr.hyp) * np.log(y_tr.shape[0]) + 2 * nm.neg_log_lik\n",
401 |     "    \n",
402 |     "    blr_metrics.loc[len(blr_metrics)] = [idp, nm.neg_log_lik, metrics['EXPV'][0], \n",
403 |     "                                         MSLL[0], BIC, skew, kurtosis]\n",
404 |     "    \n",
405 |     "display(blr_metrics)\n",
406 |     "\n",
407 |     "blr_metrics.to_csv(os.path.join(out_dir,'blr_metrics.csv'))"
408 |    ]
409 |   },
410 |   {
411 |    "cell_type": "code",
412 |    "execution_count": null,
413 |    "id": "NCpzbIwGxVWj",
414 |    "metadata": {
415 |     "id": "NCpzbIwGxVWj"
416 |    },
417 |    "outputs": [],
418 |    "source": [
419 |     "blr_metrics['EV'].describe()"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "code",
424 |    "execution_count": null,
425 |    "id": "Et7L-t9RJl75",
426 |    "metadata": {
427 |     "id": "Et7L-t9RJl75"
428 |    },
429 |    "outputs": [],
430 |    "source": [
431 |     "blr_metrics['MSLL'].describe()"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "code",
436 |    "execution_count": null,
437 |    "id": "s25LW4QuJqfW",
438 |    "metadata": {
439 |     "id": "s25LW4QuJqfW"
440 |    },
441 |    "outputs": [],
442 |    "source": [
443 |     "blr_metrics['EV'].hist()"
444 |    ]
445 |   },
446 |   {
447 |    "cell_type": "code",
448 |    "execution_count": null,
449 |    "id": "mBhUMsojJu5J",
450 |    "metadata": {
451 |     "id": "mBhUMsojJu5J"
452 |    },
453 |    "outputs": [],
454 |    "source": [
455 |     "blr_metrics['MSLL'].hist()"
456 |    ]
457 |   }
458 |  ],
459 |  "metadata": {
460 |   "colab": {
461 |    "name": "1_fit_normative_models.ipynb",
462 |    "provenance": [],
463 |    "toc_visible": true
464 |   },
465 |   "kernelspec": {
466 |    "display_name": "Python 3 (ipykernel)",
467 |    "language": "python",
468 |    "name": "python3"
469 |   },
470 |   "language_info": {
471 |    "codemirror_mode": {
472 |     "name": "ipython",
473 |     "version": 3
474 |    },
475 |    "file_extension": ".py",
476 |    "mimetype": "text/x-python",
477 |    "name": "python",
478 |    "nbconvert_exporter": "python",
479 |    "pygments_lexer": "ipython3",
480 |    "version": "3.9.7"
481 |   }
482 |  },
483 |  "nbformat": 4,
484 |  "nbformat_minor": 5
485 | }
486 | 


--------------------------------------------------------------------------------
/tasks/2_apply_normative_models.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "2d8fb4c8-4360-4fdc-b0a2-e1c2e22bd8f9",
  6 |    "metadata": {
  7 |     "id": "2d8fb4c8-4360-4fdc-b0a2-e1c2e22bd8f9"
  8 |    },
  9 |    "source": [
 10 |     "## Using lifespan models to make predictions on new data\n",
 11 |     "\n",
 12 |     "This notebook shows how to apply the coefficients from [pre-estimated normative models](https://www.biorxiv.org/content/10.1101/2021.08.08.455487v2) to new data. This can be done in two different ways: (i) using a new set of data derived from the same sites used to estimate the model and (ii) on a completely different set of sites. In the latter case, we also need to estimate the site effect, which requires some calibration/adaptation data. As an illustrative example, we use a dataset derived from the [1000 functional connectomes project](https://www.nitrc.org/forum/forum.php?thread_id=2907&forum_id=1383) and adapt the learned model to make predictions on these data. \n",
 13 |     "\n",
 14 |     "First, if necessary, we install PCNtoolkit (note: this tutorial requires at least version 0.20)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "id": "8d05182a-5346-49d2-bfbf-fd3769ecc061",
 21 |    "metadata": {
 22 |     "colab": {
 23 |      "base_uri": "https://localhost:8080/",
 24 |      "height": 1000
 25 |     },
 26 |     "id": "8d05182a-5346-49d2-bfbf-fd3769ecc061",
 27 |     "outputId": "22c20334-2291-4553-8e95-9477882ce5c5"
 28 |    },
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "! pip install pcntoolkit==0.20"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "id": "5V6JFzpdJ43R",
 38 |    "metadata": {
 39 |     "colab": {
 40 |      "base_uri": "https://localhost:8080/"
 41 |     },
 42 |     "id": "5V6JFzpdJ43R",
 43 |     "outputId": "385682e4-f053-4cc6-d6b4-7e018eede435"
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "! git clone https://github.com/predictive-clinical-neuroscience/braincharts.git"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "id": "_5ZET1btKF6J",
 54 |    "metadata": {
 55 |     "id": "_5ZET1btKF6J"
 56 |    },
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "# we need to be in the scripts folder when we import the libraries in the code block below,\n",
 60 |     "# because there is a function called nm_utils that is in the scripts folder that we need to import\n",
 61 |     "import os\n",
 62 |     "os.chdir('/content/braincharts/scripts/')"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "id": "b2227bc7-e798-470a-99bc-33561ce4511b",
 68 |    "metadata": {
 69 |     "id": "b2227bc7-e798-470a-99bc-33561ce4511b"
 70 |    },
 71 |    "source": [
 72 |     "Now we import the required libraries"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "id": "ff661cf2-7d80-46bb-bcfb-1650a93eed3d",
 79 |    "metadata": {
 80 |     "id": "ff661cf2-7d80-46bb-bcfb-1650a93eed3d"
 81 |    },
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "import numpy as np\n",
 85 |     "import pandas as pd\n",
 86 |     "import pickle\n",
 87 |     "from matplotlib import pyplot as plt\n",
 88 |     "import seaborn as sns\n",
 89 |     "\n",
 90 |     "from pcntoolkit.normative import estimate, predict, evaluate\n",
 91 |     "from pcntoolkit.util.utils import compute_MSLL, create_design_matrix\n",
 92 |     "from nm_utils import remove_bad_subjects, load_2d"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "id": "TFxsGN-KgfE0",
 98 |    "metadata": {
 99 |     "id": "TFxsGN-KgfE0"
100 |    },
101 |    "source": [
102 |     "We need to unzip the models. "
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "id": "0OvpUTaIgekS",
109 |    "metadata": {
110 |     "id": "0OvpUTaIgekS"
111 |    },
112 |    "outputs": [],
113 |    "source": [
114 |     "os.chdir('/content/braincharts/models/')"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "id": "WBP9CEVcgsjT",
121 |    "metadata": {
122 |     "colab": {
123 |      "base_uri": "https://localhost:8080/"
124 |     },
125 |     "id": "WBP9CEVcgsjT",
126 |     "outputId": "da6f94f5-fff3-4ebb-aee1-45ddd0af0210"
127 |    },
128 |    "outputs": [],
129 |    "source": [
130 |     "ls"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": null,
136 |    "id": "is47bTl_guD4",
137 |    "metadata": {
138 |     "id": "is47bTl_guD4"
139 |    },
140 |    "outputs": [],
141 |    "source": [
142 |     "# we will use the biggest sample as our training set (approx. N=57000 subjects from 82 sites)\n",
143 |     "# for more info on the other pretrained models available in this repository, \n",
144 |     "# please refer to the accompanying preprint https://www.biorxiv.org/content/10.1101/2021.08.08.455487v2\n",
145 |     "! unzip lifespan_57K_82sites.zip"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "id": "802b1da6-04cc-4310-af81-f50d38c3e653",
151 |    "metadata": {
152 |     "id": "802b1da6-04cc-4310-af81-f50d38c3e653"
153 |    },
154 |    "source": [
155 |     "Next, we configure some basic variables, like where we want the analysis to be done and which model we want to use.\n",
156 |     "\n",
157 |     "**Note:** We maintain a list of site ids for each dataset, which describe the site names in the training and test data (`site_ids_tr` and `site_ids_te`), plus also the adaptation data . The training site ids are provided as a text file in the distribution and the test ids are extracted automatically from the pandas dataframe (see below). If you use additional data from the sites (e.g. later waves from ABCD), it may be necessary to adjust the site names to match the names in the training set. See the accompanying [paper](https://www.biorxiv.org/content/10.1101/2021.08.08.455487v2) for more details."
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "id": "26b35c64-41fd-4ecd-bf6e-3e7b34a67279",
164 |    "metadata": {
165 |     "id": "26b35c64-41fd-4ecd-bf6e-3e7b34a67279"
166 |    },
167 |    "outputs": [],
168 |    "source": [
169 |     "# which model do we wish to use?\n",
170 |     "model_name = 'lifespan_57K_82sites'\n",
171 |     "site_names = 'site_ids_ct_82sites.txt'\n",
172 |     "\n",
173 |     "# where the analysis takes place\n",
174 |     "root_dir = '/content/braincharts'\n",
175 |     "out_dir = os.path.join(root_dir, 'models', model_name)\n",
176 |     "\n",
177 |     "# load a set of site ids from this model. This must match the training data\n",
178 |     "with open(os.path.join(root_dir,'docs', site_names)) as f:\n",
179 |     "    site_ids_tr = f.read().splitlines()"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "id": "8dbaebd7-4f86-47d8-82a5-1776eb96690f",
185 |    "metadata": {
186 |     "id": "8dbaebd7-4f86-47d8-82a5-1776eb96690f"
187 |    },
188 |    "source": [
189 |     "### Download test dataset\n",
190 |     "\n",
191 |     "As mentioned above, to demonstrate this tool we will use a test dataset derived from the FCON 1000 dataset. We provide a prepackaged training/test split of these data in the required format (also after removing sites with only a few data points), [here](https://github.com/predictive-clinical-neuroscience/PCNtoolkit-demo/tree/main/data). you can get these data by running the following commmands:"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": null,
197 |    "id": "60f72165-9b2f-4248-ba72-1a1f9683d280",
198 |    "metadata": {
199 |     "colab": {
200 |      "base_uri": "https://localhost:8080/"
201 |     },
202 |     "id": "60f72165-9b2f-4248-ba72-1a1f9683d280",
203 |     "outputId": "7f665ae9-4bac-4b95-e733-d063624d24ea"
204 |    },
205 |    "outputs": [],
206 |    "source": [
207 |     "os.chdir(root_dir)\n",
208 |     "!wget -nc https://raw.githubusercontent.com/saigerutherford/CPC_ML_tutorial/master/data/fcon1000_tr.csv\n",
209 |     "!wget -nc https://raw.githubusercontent.com/saigerutherford/CPC_ML_tutorial/master/data/fcon1000_te.csv"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "markdown",
214 |    "id": "3aab54a5-2579-48d8-a81b-bbd34cea1213",
215 |    "metadata": {
216 |     "id": "3aab54a5-2579-48d8-a81b-bbd34cea1213"
217 |    },
218 |    "source": [
219 |     "### Load test data\n",
220 |     "\n",
221 |     "Now we load the test data and remove some subjects that may have poor scan quality. This asssesment is based on the Freesurfer Euler characteristic as described in the papers below. \n",
222 |     "\n",
223 |     "**Note:** For the purposes of this tutorial, we make predictions for all sites in the FCON 1000 dataset, but two of them were also included in the training data (named 'Baltimore' and 'NewYork_a'). In this case, this will only slightly bias the accuracy, but in order to replicate the results in the paper, it would be necessary to additionally remove these sites from the test dataframe.\n",
224 |     "\n",
225 |     "**References**\n",
226 |     "- [Kia et al 2021](https://www.biorxiv.org/content/10.1101/2021.05.28.446120v1.abstract)\n",
227 |     "- [Rosen et al 2018](https://www.sciencedirect.com/science/article/abs/pii/S1053811917310832?via%3Dihub)"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "id": "262d429a-160b-4ba3-9ba4-9acc195bc644",
234 |    "metadata": {
235 |     "colab": {
236 |      "base_uri": "https://localhost:8080/"
237 |     },
238 |     "id": "262d429a-160b-4ba3-9ba4-9acc195bc644",
239 |     "outputId": "e38c0a03-3f44-463b-e385-ec01eafb660a"
240 |    },
241 |    "outputs": [],
242 |    "source": [
243 |     "test_data = os.path.join(root_dir, 'fcon1000_te.csv')\n",
244 |     "\n",
245 |     "df_te = pd.read_csv(test_data, index_col=0)\n",
246 |     "\n",
247 |     "# remove some bad subjects\n",
248 |     "df_te, bad_sub = remove_bad_subjects(df_te, df_te)\n",
249 |     "\n",
250 |     "# extract a list of unique site ids from the test set\n",
251 |     "site_ids_te =  sorted(set(df_te['site'].to_list()))"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "markdown",
256 |    "id": "c636509a-8b12-43f1-811c-08cb22640be2",
257 |    "metadata": {
258 |     "id": "c636509a-8b12-43f1-811c-08cb22640be2"
259 |    },
260 |    "source": [
261 |     "### Load adaptation data\n",
262 |     "\n",
263 |     "If the data you wish to make predictions for is not derived from the same scanning sites as those in the trainig set, it is necessary to learn the site effect so that we can account for it in the predictions. In order to do this in an unbiased way, we use a separate dataset, which we refer to as 'adaptation' data. This must contain data for all the same sites as in the test dataset and we assume these are coded in the same way, based on a the 'sitenum' column in the dataframe. "
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": null,
269 |    "id": "53551023-aff6-4934-ad2d-d77bc63c562d",
270 |    "metadata": {
271 |     "colab": {
272 |      "base_uri": "https://localhost:8080/"
273 |     },
274 |     "id": "53551023-aff6-4934-ad2d-d77bc63c562d",
275 |     "outputId": "b59cc4e3-3646-47b7-eff8-0abb60dce75e"
276 |    },
277 |    "outputs": [],
278 |    "source": [
279 |     "adaptation_data = os.path.join(root_dir, 'fcon1000_tr.csv')\n",
280 |     "\n",
281 |     "df_ad = pd.read_csv(adaptation_data, index_col=0)\n",
282 |     "\n",
283 |     "# remove some bad subjects\n",
284 |     "df_ad, bad_sub = remove_bad_subjects(df_ad, df_ad)\n",
285 |     "\n",
286 |     "# extract a list of unique site ids from the test set\n",
287 |     "site_ids_ad =  sorted(set(df_ad['site'].to_list()))\n",
288 |     "\n",
289 |     "if not all(elem in site_ids_ad for elem in site_ids_te):\n",
290 |     "    print('Warning: some of the testing sites are not in the adaptation data')"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "id": "4f73e30e-c693-44b8-98c6-52b71b577ea8",
296 |    "metadata": {
297 |     "id": "4f73e30e-c693-44b8-98c6-52b71b577ea8"
298 |    },
299 |    "source": [
300 |     "### Configure which models to fit\n",
301 |     "\n",
302 |     "Now, we configure which imaging derived phenotypes (IDPs) we would like to process. This is just a list of column names in the dataframe we have loaded above. \n",
303 |     "\n",
304 |     "We could load the whole set i.e., all phenotypes for which we have models for (188 brain regions)."
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": null,
310 |    "id": "b48e104c-cbac-4ae2-8377-cd3ff80162fd",
311 |    "metadata": {
312 |     "id": "b48e104c-cbac-4ae2-8377-cd3ff80162fd"
313 |    },
314 |    "outputs": [],
315 |    "source": [
316 |     "# load the list of idps for left and right hemispheres, plus subcortical regions\n",
317 |     "with open(os.path.join(root_dir,'docs','phenotypes_ct_lh.txt')) as f:\n",
318 |     "    idp_ids_lh = f.read().splitlines()\n",
319 |     "with open(os.path.join(root_dir,'docs','phenotypes_ct_rh.txt')) as f:\n",
320 |     "    idp_ids_rh = f.read().splitlines()\n",
321 |     "with open(os.path.join(root_dir,'docs','phenotypes_sc.txt')) as f:\n",
322 |     "    idp_ids_sc = f.read().splitlines()\n",
323 |     "\n",
324 |     "# we choose here to process all idps\n",
325 |     "idp_ids = idp_ids_lh + idp_ids_rh + idp_ids_sc"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "markdown",
330 |    "id": "280731ad-47d8-43e2-8cb5-4eccfd9f3f81",
331 |    "metadata": {
332 |     "id": "280731ad-47d8-43e2-8cb5-4eccfd9f3f81"
333 |    },
334 |    "source": [
335 |     "... or alternatively, we could just specify a list of the brain regions we are interested in. "
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "code",
340 |    "execution_count": null,
341 |    "id": "8b74d75f-77a5-474a-9c9b-29aab1ce53a2",
342 |    "metadata": {
343 |     "id": "8b74d75f-77a5-474a-9c9b-29aab1ce53a2"
344 |    },
345 |    "outputs": [],
346 |    "source": [
347 |     "idp_ids = [ 'Left-Thalamus-Proper', 'Left-Lateral-Ventricle', 'rh_MeanThickness_thickness']"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "markdown",
352 |    "id": "56ee1f7f-8684-4f1c-b142-a68176407029",
353 |    "metadata": {
354 |     "id": "56ee1f7f-8684-4f1c-b142-a68176407029"
355 |    },
356 |    "source": [
357 |     "### Configure covariates \n",
358 |     "\n",
359 |     "Now, we configure some parameters to fit the model. First, we choose which columns of the pandas dataframe contain the covariates (age and sex). The site parameters are configured automatically later on by the `configure_design_matrix()` function, when we loop through the IDPs in the list\n",
360 |     "\n",
361 |     "The supplied coefficients are derived from a 'warped' Bayesian linear regression model, which uses a nonlinear warping function to model non-Gaussianity (`sinarcsinh`) plus a non-linear basis expansion (a cubic b-spline basis set with 5 knot points, which is the default value in the PCNtoolkit package). Since we are sticking with the default value, we do not need to specify any parameters for this, but we do need to specify the limits. We choose to pad the input by a few years either side of the input range. We will also set a couple of options that control the estimation of the model\n",
362 |     "\n",
363 |     "For further details about the likelihood warping approach, see the accompanying paper and [Fraza et al 2021](https://www.biorxiv.org/content/10.1101/2021.04.05.438429v1)."
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "code",
368 |    "execution_count": null,
369 |    "id": "62312b8e-4972-4238-abf9-87d9bb33cc10",
370 |    "metadata": {
371 |     "id": "62312b8e-4972-4238-abf9-87d9bb33cc10"
372 |    },
373 |    "outputs": [],
374 |    "source": [
375 |     "# which data columns do we wish to use as covariates? \n",
376 |     "cols_cov = ['age','sex']\n",
377 |     "\n",
378 |     "# limits for cubic B-spline basis \n",
379 |     "xmin = -5 \n",
380 |     "xmax = 110\n",
381 |     "\n",
382 |     "# Absolute Z treshold above which a sample is considered to be an outlier (without fitting any model)\n",
383 |     "outlier_thresh = 7"
384 |    ]
385 |   },
386 |   {
387 |    "cell_type": "markdown",
388 |    "id": "42bc1072-e9ed-4f2a-9fdd-cbd626a61542",
389 |    "metadata": {
390 |     "id": "42bc1072-e9ed-4f2a-9fdd-cbd626a61542"
391 |    },
392 |    "source": [
393 |     "### Make predictions\n",
394 |     "\n",
395 |     "This will make predictions for each IDP separately. This is done by extracting a column from the dataframe (i.e. specifying the IDP as the response variable) and saving it as a numpy array. Then, we configure the covariates, which is a numpy data array having the number of rows equal to the number of datapoints in the test set. The columns are specified as follows: \n",
396 |     "\n",
397 |     "- A global intercept (column of ones)\n",
398 |     "- The covariate columns (here age and sex, coded as 0=female/1=male)\n",
399 |     "- Dummy coded columns for the sites in the training set (one column per site)\n",
400 |     "- Columns for the basis expansion (seven columns for the default parameterisation)\n",
401 |     "\n",
402 |     "Once these are saved as numpy arrays in ascii format (as here) or (alternatively) in pickle format, these are passed as inputs to the `predict()` method in the PCNtoolkit normative modelling framework. These are written in the same format to the location specified by `idp_dir`. At the end of this step, we have a set of predictions and Z-statistics for the test dataset that we can take forward to further analysis.\n",
403 |     "\n",
404 |     "Note that when we need to make predictions on new data, the procedure is more involved, since we need to prepare, process and store covariates, response variables and site ids for the adaptation data. "
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "code",
409 |    "execution_count": null,
410 |    "id": "07b7471b-c334-464f-8273-b409b7acaac2",
411 |    "metadata": {
412 |     "colab": {
413 |      "base_uri": "https://localhost:8080/"
414 |     },
415 |     "id": "07b7471b-c334-464f-8273-b409b7acaac2",
416 |     "outputId": "b5345b37-8335-47c6-c962-47d17a41c384"
417 |    },
418 |    "outputs": [],
419 |    "source": [
420 |     "for idp_num, idp in enumerate(idp_ids): \n",
421 |     "    print('Running IDP', idp_num, idp, ':')\n",
422 |     "    idp_dir = os.path.join(out_dir, idp)\n",
423 |     "    os.chdir(idp_dir)\n",
424 |     "    \n",
425 |     "    # extract and save the response variables for the test set\n",
426 |     "    y_te = df_te[idp].to_numpy()\n",
427 |     "    \n",
428 |     "    # save the variables\n",
429 |     "    resp_file_te = os.path.join(idp_dir, 'resp_te.txt') \n",
430 |     "    np.savetxt(resp_file_te, y_te)\n",
431 |     "        \n",
432 |     "    # configure and save the design matrix\n",
433 |     "    cov_file_te = os.path.join(idp_dir, 'cov_bspline_te.txt')\n",
434 |     "    X_te = create_design_matrix(df_te[cols_cov], \n",
435 |     "                                site_ids = df_te['site'],\n",
436 |     "                                all_sites = site_ids_tr,\n",
437 |     "                                basis = 'bspline', \n",
438 |     "                                xmin = xmin, \n",
439 |     "                                xmax = xmax)\n",
440 |     "    np.savetxt(cov_file_te, X_te)\n",
441 |     "    \n",
442 |     "    # check whether all sites in the test set are represented in the training set\n",
443 |     "    if all(elem in site_ids_tr for elem in site_ids_te):\n",
444 |     "        print('All sites are present in the training data')\n",
445 |     "        \n",
446 |     "        # just make predictions\n",
447 |     "        yhat_te, s2_te, Z = predict(cov_file_te, \n",
448 |     "                                    alg='blr', \n",
449 |     "                                    respfile=resp_file_te, \n",
450 |     "                                    model_path=os.path.join(idp_dir,'Models'))\n",
451 |     "    else:\n",
452 |     "        print('Some sites missing from the training data. Adapting model')\n",
453 |     "        \n",
454 |     "        # save the covariates for the adaptation data\n",
455 |     "        X_ad = create_design_matrix(df_ad[cols_cov], \n",
456 |     "                                    site_ids = df_ad['site'],\n",
457 |     "                                    all_sites = site_ids_tr,\n",
458 |     "                                    basis = 'bspline', \n",
459 |     "                                    xmin = xmin, \n",
460 |     "                                    xmax = xmax)\n",
461 |     "        cov_file_ad = os.path.join(idp_dir, 'cov_bspline_ad.txt')          \n",
462 |     "        np.savetxt(cov_file_ad, X_ad)\n",
463 |     "        \n",
464 |     "        # save the responses for the adaptation data\n",
465 |     "        resp_file_ad = os.path.join(idp_dir, 'resp_ad.txt') \n",
466 |     "        y_ad = df_ad[idp].to_numpy()\n",
467 |     "        np.savetxt(resp_file_ad, y_ad)\n",
468 |     "       \n",
469 |     "        # save the site ids for the adaptation data\n",
470 |     "        sitenum_file_ad = os.path.join(idp_dir, 'sitenum_ad.txt') \n",
471 |     "        site_num_ad = df_ad['sitenum'].to_numpy(dtype=int)\n",
472 |     "        np.savetxt(sitenum_file_ad, site_num_ad)\n",
473 |     "        \n",
474 |     "        # save the site ids for the test data \n",
475 |     "        sitenum_file_te = os.path.join(idp_dir, 'sitenum_te.txt')\n",
476 |     "        site_num_te = df_te['sitenum'].to_numpy(dtype=int)\n",
477 |     "        np.savetxt(sitenum_file_te, site_num_te)\n",
478 |     "         \n",
479 |     "        yhat_te, s2_te, Z = predict(cov_file_te, \n",
480 |     "                                    alg = 'blr', \n",
481 |     "                                    respfile = resp_file_te, \n",
482 |     "                                    model_path = os.path.join(idp_dir,'Models'),\n",
483 |     "                                    adaptrespfile = resp_file_ad,\n",
484 |     "                                    adaptcovfile = cov_file_ad,\n",
485 |     "                                    adaptvargroupfile = sitenum_file_ad,\n",
486 |     "                                    testvargroupfile = sitenum_file_te)"
487 |    ]
488 |   },
489 |   {
490 |    "cell_type": "markdown",
491 |    "id": "75210821-ccb8-4bd2-82f3-641708811b21",
492 |    "metadata": {
493 |     "id": "75210821-ccb8-4bd2-82f3-641708811b21"
494 |    },
495 |    "source": [
496 |     "### Preparing dummy data for plotting\n",
497 |     "\n",
498 |     "Now, we plot the centiles of variation estimated by the normative model. \n",
499 |     "\n",
500 |     "We do this by making use of a set of dummy covariates that span the whole range of the input space (for age) for a fixed value of the other covariates (e.g. sex) so that we can make predictions for these dummy data points, then plot them. We configure these dummy predictions using the same procedure as we used for the real data. We can use the same dummy data for all the IDPs we wish to plot"
501 |    ]
502 |   },
503 |   {
504 |    "cell_type": "code",
505 |    "execution_count": null,
506 |    "id": "2d0743d8-28ca-4a14-8ef0-99bf40434b5b",
507 |    "metadata": {
508 |     "colab": {
509 |      "base_uri": "https://localhost:8080/"
510 |     },
511 |     "id": "2d0743d8-28ca-4a14-8ef0-99bf40434b5b",
512 |     "outputId": "7d4c8f2e-ca79-46e1-e5a8-0733503fde94"
513 |    },
514 |    "outputs": [],
515 |    "source": [
516 |     "# which sex do we want to plot? \n",
517 |     "sex = 1 # 1 = male 0 = female\n",
518 |     "if sex == 1: \n",
519 |     "    clr = 'blue';\n",
520 |     "else:\n",
521 |     "    clr = 'red'\n",
522 |     "\n",
523 |     "# create dummy data for visualisation\n",
524 |     "print('configuring dummy data ...')\n",
525 |     "xx = np.arange(xmin, xmax, 0.5)\n",
526 |     "X0_dummy = np.zeros((len(xx), 2))\n",
527 |     "X0_dummy[:,0] = xx\n",
528 |     "X0_dummy[:,1] = sex\n",
529 |     "\n",
530 |     "# create the design matrix\n",
531 |     "X_dummy = create_design_matrix(X0_dummy, xmin=xmin, xmax=xmax, site_ids=None, all_sites=site_ids_tr)\n",
532 |     "\n",
533 |     "# save the dummy covariates\n",
534 |     "cov_file_dummy = os.path.join(out_dir,'cov_bspline_dummy_mean.txt')\n",
535 |     "np.savetxt(cov_file_dummy, X_dummy)"
536 |    ]
537 |   },
538 |   {
539 |    "cell_type": "markdown",
540 |    "id": "126323a3-2270-4796-97c4-94629730ddf7",
541 |    "metadata": {
542 |     "id": "126323a3-2270-4796-97c4-94629730ddf7"
543 |    },
544 |    "source": [
545 |     "### Plotting the normative models\n",
546 |     "\n",
547 |     "Now we loop through the IDPs, plotting each one separately. The outputs of this step are a set of quantitative regression metrics for each IDP and a set of centile curves which we plot the test data against. \n",
548 |     "\n",
549 |     "This part of the code is relatively complex because we need to keep track of many quantities for the plotting. We also need to remember whether the data need to be warped or not. By default in PCNtoolkit, predictions in the form of `yhat, s2` are always in the warped (Gaussian) space. If we want predictions in the input (non-Gaussian) space, then we need to warp them with the inverse of the estimated warping function. This can be done using the function `nm.blr.warp.warp_predictions()`. \n",
550 |     "\n",
551 |     "**Note:** it is necessary to update the intercept for each of the sites. For purposes of visualisation, here we do this by adjusting the median of the data to match the dummy predictions, but note that all the quantitative metrics are estimated using the predictions that are adjusted properly using a learned offset (or adjusted using a hold-out adaptation set, as above). Note also that for the calibration data we require at least two data points of the same sex in each site to be able to estimate the variance. Of course, in a real example, you would want many more than just two since we need to get a reliable estimate of the variance for each site. "
552 |    ]
553 |   },
554 |   {
555 |    "cell_type": "code",
556 |    "execution_count": null,
557 |    "id": "cdd68cc6-212b-4149-b86a-24e842078e1a",
558 |    "metadata": {
559 |     "id": "cdd68cc6-212b-4149-b86a-24e842078e1a"
560 |    },
561 |    "outputs": [],
562 |    "source": [
563 |     "sns.set(style='whitegrid')\n",
564 |     "\n",
565 |     "for idp_num, idp in enumerate(idp_ids): \n",
566 |     "    print('Running IDP', idp_num, idp, ':')\n",
567 |     "    idp_dir = os.path.join(out_dir, idp)\n",
568 |     "    os.chdir(idp_dir)\n",
569 |     "    \n",
570 |     "    # load the true data points\n",
571 |     "    yhat_te = load_2d(os.path.join(idp_dir, 'yhat_predict.txt'))\n",
572 |     "    s2_te = load_2d(os.path.join(idp_dir, 'ys2_predict.txt'))\n",
573 |     "    y_te = load_2d(os.path.join(idp_dir, 'resp_te.txt'))\n",
574 |     "            \n",
575 |     "    # set up the covariates for the dummy data\n",
576 |     "    print('Making predictions with dummy covariates (for visualisation)')\n",
577 |     "    yhat, s2 = predict(cov_file_dummy, \n",
578 |     "                       alg = 'blr', \n",
579 |     "                       respfile = None, \n",
580 |     "                       model_path = os.path.join(idp_dir,'Models'), \n",
581 |     "                       outputsuffix = '_dummy')\n",
582 |     "    \n",
583 |     "    # load the normative model\n",
584 |     "    with open(os.path.join(idp_dir,'Models', 'NM_0_0_estimate.pkl'), 'rb') as handle:\n",
585 |     "        nm = pickle.load(handle) \n",
586 |     "    \n",
587 |     "    # get the warp and warp parameters\n",
588 |     "    W = nm.blr.warp\n",
589 |     "    warp_param = nm.blr.hyp[1:nm.blr.warp.get_n_params()+1] \n",
590 |     "        \n",
591 |     "    # first, we warp predictions for the true data and compute evaluation metrics\n",
592 |     "    med_te = W.warp_predictions(np.squeeze(yhat_te), np.squeeze(s2_te), warp_param)[0]\n",
593 |     "    med_te = med_te[:, np.newaxis]\n",
594 |     "    print('metrics:', evaluate(y_te, med_te))\n",
595 |     "    \n",
596 |     "    # then, we warp dummy predictions to create the plots\n",
597 |     "    med, pr_int = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2), warp_param)\n",
598 |     "    \n",
599 |     "    # extract the different variance components to visualise\n",
600 |     "    beta, junk1, junk2 = nm.blr._parse_hyps(nm.blr.hyp, X_dummy)\n",
601 |     "    s2n = 1/beta # variation (aleatoric uncertainty)\n",
602 |     "    s2s = s2-s2n # modelling uncertainty (epistemic uncertainty)\n",
603 |     "    \n",
604 |     "    # plot the data points\n",
605 |     "    y_te_rescaled_all = np.zeros_like(y_te)\n",
606 |     "    for sid, site in enumerate(site_ids_te):\n",
607 |     "        # plot the true test data points \n",
608 |     "        if all(elem in site_ids_tr for elem in site_ids_te):\n",
609 |     "            # all data in the test set are present in the training set\n",
610 |     "            \n",
611 |     "            # first, we select the data points belonging to this particular site\n",
612 |     "            idx = np.where(np.bitwise_and(X_te[:,2] == sex, X_te[:,sid+len(cols_cov)+1] !=0))[0]\n",
613 |     "            if len(idx) == 0:\n",
614 |     "                print('No data for site', sid, site, 'skipping...')\n",
615 |     "                continue\n",
616 |     "            \n",
617 |     "            # then directly adjust the data\n",
618 |     "            idx_dummy = np.bitwise_and(X_dummy[:,1] > X_te[idx,1].min(), X_dummy[:,1] < X_te[idx,1].max())\n",
619 |     "            y_te_rescaled = y_te[idx] - np.median(y_te[idx]) + np.median(med[idx_dummy])\n",
620 |     "        else:\n",
621 |     "            # we need to adjust the data based on the adaptation dataset \n",
622 |     "            \n",
623 |     "            # first, select the data point belonging to this particular site\n",
624 |     "            idx = np.where(np.bitwise_and(X_te[:,2] == sex, (df_te['site'] == site).to_numpy()))[0]\n",
625 |     "            \n",
626 |     "            # load the adaptation data\n",
627 |     "            y_ad = load_2d(os.path.join(idp_dir, 'resp_ad.txt'))\n",
628 |     "            X_ad = load_2d(os.path.join(idp_dir, 'cov_bspline_ad.txt'))\n",
629 |     "            idx_a = np.where(np.bitwise_and(X_ad[:,2] == sex, (df_ad['site'] == site).to_numpy()))[0]\n",
630 |     "            if len(idx) < 2 or len(idx_a) < 2:\n",
631 |     "                print('Insufficent data for site', sid, site, 'skipping...')\n",
632 |     "                continue\n",
633 |     "            \n",
634 |     "            # adjust and rescale the data\n",
635 |     "            y_te_rescaled, s2_rescaled = nm.blr.predict_and_adjust(nm.blr.hyp, \n",
636 |     "                                                                   X_ad[idx_a,:], \n",
637 |     "                                                                   np.squeeze(y_ad[idx_a]), \n",
638 |     "                                                                   Xs=None, \n",
639 |     "                                                                   ys=np.squeeze(y_te[idx]))\n",
640 |     "        # plot the (adjusted) data points\n",
641 |     "        plt.scatter(X_te[idx,1], y_te_rescaled, s=4, color=clr, alpha = 0.1)\n",
642 |     "       \n",
643 |     "    # plot the median of the dummy data\n",
644 |     "    plt.plot(xx, med, clr)\n",
645 |     "    \n",
646 |     "    # fill the gaps in between the centiles\n",
647 |     "    junk, pr_int25 = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2), warp_param, percentiles=[0.25,0.75])\n",
648 |     "    junk, pr_int95 = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2), warp_param, percentiles=[0.05,0.95])\n",
649 |     "    junk, pr_int99 = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2), warp_param, percentiles=[0.01,0.99])\n",
650 |     "    plt.fill_between(xx, pr_int25[:,0], pr_int25[:,1], alpha = 0.1,color=clr)\n",
651 |     "    plt.fill_between(xx, pr_int95[:,0], pr_int95[:,1], alpha = 0.1,color=clr)\n",
652 |     "    plt.fill_between(xx, pr_int99[:,0], pr_int99[:,1], alpha = 0.1,color=clr)\n",
653 |     "            \n",
654 |     "    # make the width of each centile proportional to the epistemic uncertainty\n",
655 |     "    junk, pr_int25l = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2-0.5*s2s), warp_param, percentiles=[0.25,0.75])\n",
656 |     "    junk, pr_int95l = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2-0.5*s2s), warp_param, percentiles=[0.05,0.95])\n",
657 |     "    junk, pr_int99l = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2-0.5*s2s), warp_param, percentiles=[0.01,0.99])\n",
658 |     "    junk, pr_int25u = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2+0.5*s2s), warp_param, percentiles=[0.25,0.75])\n",
659 |     "    junk, pr_int95u = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2+0.5*s2s), warp_param, percentiles=[0.05,0.95])\n",
660 |     "    junk, pr_int99u = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2+0.5*s2s), warp_param, percentiles=[0.01,0.99])    \n",
661 |     "    plt.fill_between(xx, pr_int25l[:,0], pr_int25u[:,0], alpha = 0.3,color=clr)\n",
662 |     "    plt.fill_between(xx, pr_int95l[:,0], pr_int95u[:,0], alpha = 0.3,color=clr)\n",
663 |     "    plt.fill_between(xx, pr_int99l[:,0], pr_int99u[:,0], alpha = 0.3,color=clr)\n",
664 |     "    plt.fill_between(xx, pr_int25l[:,1], pr_int25u[:,1], alpha = 0.3,color=clr)\n",
665 |     "    plt.fill_between(xx, pr_int95l[:,1], pr_int95u[:,1], alpha = 0.3,color=clr)\n",
666 |     "    plt.fill_between(xx, pr_int99l[:,1], pr_int99u[:,1], alpha = 0.3,color=clr)\n",
667 |     "\n",
668 |     "    # plot actual centile lines\n",
669 |     "    plt.plot(xx, pr_int25[:,0],color=clr, linewidth=0.5)\n",
670 |     "    plt.plot(xx, pr_int25[:,1],color=clr, linewidth=0.5)\n",
671 |     "    plt.plot(xx, pr_int95[:,0],color=clr, linewidth=0.5)\n",
672 |     "    plt.plot(xx, pr_int95[:,1],color=clr, linewidth=0.5)\n",
673 |     "    plt.plot(xx, pr_int99[:,0],color=clr, linewidth=0.5)\n",
674 |     "    plt.plot(xx, pr_int99[:,1],color=clr, linewidth=0.5)\n",
675 |     "    \n",
676 |     "    plt.xlabel('Age')\n",
677 |     "    plt.ylabel(idp) \n",
678 |     "    plt.title(idp)\n",
679 |     "    plt.xlim((0,90))\n",
680 |     "    plt.savefig(os.path.join(idp_dir, 'centiles_' + str(sex)),  bbox_inches='tight')\n",
681 |     "    plt.show()\n",
682 |     "\n",
683 |     "os.chdir(out_dir)"
684 |    ]
685 |   },
686 |   {
687 |    "cell_type": "code",
688 |    "execution_count": null,
689 |    "id": "OMUyOWOLmU1b",
690 |    "metadata": {
691 |     "colab": {
692 |      "base_uri": "https://localhost:8080/"
693 |     },
694 |     "id": "OMUyOWOLmU1b",
695 |     "outputId": "b0111629-5919-40a4-cde7-dbf5eaf9f692"
696 |    },
697 |    "outputs": [],
698 |    "source": [
699 |     "# explore an example output folder of a single model (one ROI)\n",
700 |     "# think about what each of these output files represents. \n",
701 |     "# Hint: look at the variable names and comments in the code block above\n",
702 |     "! ls rh_MeanThickness_thickness/"
703 |    ]
704 |   },
705 |   {
706 |    "cell_type": "code",
707 |    "execution_count": null,
708 |    "id": "TJIFVhQ5zKBw",
709 |    "metadata": {
710 |     "colab": {
711 |      "base_uri": "https://localhost:8080/"
712 |     },
713 |     "id": "TJIFVhQ5zKBw",
714 |     "outputId": "d9f6e492-7ec1-40af-bcb4-1ea94eaed09e"
715 |    },
716 |    "outputs": [],
717 |    "source": [
718 |     "# check that the number of deviation scores matches the number of subjects in the test set\n",
719 |     "# there should be one deviation score per subject (one line per subject), so we can\n",
720 |     "# verify by counting the line numbers in the Z_predict.txt file\n",
721 |     "! cat rh_MeanThickness_thickness/Z_predict.txt | wc"
722 |    ]
723 |   },
724 |   {
725 |    "cell_type": "markdown",
726 |    "id": "hZEs7Ej4-qGi",
727 |    "metadata": {
728 |     "id": "hZEs7Ej4-qGi"
729 |    },
730 |    "source": [
731 |     "The deviation scores are output as a text file in separate folders. We want to summarize the deviation scores across all models estimates so we can organize them into a single file, and merge the deviation scores into the original data file. "
732 |    ]
733 |   },
734 |   {
735 |    "cell_type": "code",
736 |    "execution_count": null,
737 |    "id": "L-OauNfc5Jrx",
738 |    "metadata": {
739 |     "id": "L-OauNfc5Jrx"
740 |    },
741 |    "outputs": [],
742 |    "source": [
743 |     "! mkdir deviation_scores"
744 |    ]
745 |   },
746 |   {
747 |    "cell_type": "code",
748 |    "execution_count": null,
749 |    "id": "ZEgnixDd5KgK",
750 |    "metadata": {
751 |     "id": "ZEgnixDd5KgK"
752 |    },
753 |    "outputs": [],
754 |    "source": [
755 |     "! for i in *; do if [[ -e ${i}/Z_predict.txt ]]; then cp ${i}/Z_predict.txt deviation_scores/${i}_Z_predict.txt; fi; done"
756 |    ]
757 |   },
758 |   {
759 |    "cell_type": "code",
760 |    "execution_count": null,
761 |    "id": "10gP5z-t7-ZC",
762 |    "metadata": {
763 |     "id": "10gP5z-t7-ZC"
764 |    },
765 |    "outputs": [],
766 |    "source": [
767 |     "z_dir = '/content/braincharts/models/lifespan_57K_82sites/deviation_scores/'\n",
768 |     "filelist = [name for name in os.listdir(z_dir)]"
769 |    ]
770 |   },
771 |   {
772 |    "cell_type": "code",
773 |    "execution_count": null,
774 |    "id": "Q2GAFv5F8TFa",
775 |    "metadata": {
776 |     "id": "Q2GAFv5F8TFa"
777 |    },
778 |    "outputs": [],
779 |    "source": [
780 |     "os.chdir(z_dir)\n",
781 |     "Z_df = pd.concat([pd.read_csv(item, names=[item[:-4]]) for item in filelist], axis=1)"
782 |    ]
783 |   },
784 |   {
785 |    "cell_type": "code",
786 |    "execution_count": null,
787 |    "id": "FHcx4vsj8eMf",
788 |    "metadata": {
789 |     "id": "FHcx4vsj8eMf"
790 |    },
791 |    "outputs": [],
792 |    "source": [
793 |     "df_te.reset_index(inplace=True)"
794 |    ]
795 |   },
796 |   {
797 |    "cell_type": "code",
798 |    "execution_count": null,
799 |    "id": "9werTREu8c0P",
800 |    "metadata": {
801 |     "id": "9werTREu8c0P"
802 |    },
803 |    "outputs": [],
804 |    "source": [
805 |     "Z_df['sub_id'] = df_te['sub_id']"
806 |    ]
807 |   },
808 |   {
809 |    "cell_type": "code",
810 |    "execution_count": null,
811 |    "id": "WgAJ86wy9U5A",
812 |    "metadata": {
813 |     "id": "WgAJ86wy9U5A"
814 |    },
815 |    "outputs": [],
816 |    "source": [
817 |     "df_te_Z = pd.merge(df_te, Z_df, on='sub_id', how='inner')"
818 |    ]
819 |   },
820 |   {
821 |    "cell_type": "code",
822 |    "execution_count": null,
823 |    "id": "bn6I12zh9t1g",
824 |    "metadata": {
825 |     "id": "bn6I12zh9t1g"
826 |    },
827 |    "outputs": [],
828 |    "source": [
829 |     "df_te_Z.to_csv('fcon1000_te_Z.csv', index=False)"
830 |    ]
831 |   }
832 |  ],
833 |  "metadata": {
834 |   "colab": {
835 |    "name": "apply_normative_models.ipynb",
836 |    "provenance": []
837 |   },
838 |   "kernelspec": {
839 |    "display_name": "Python 3 (ipykernel)",
840 |    "language": "python",
841 |    "name": "python3"
842 |   },
843 |   "language_info": {
844 |    "codemirror_mode": {
845 |     "name": "ipython",
846 |     "version": 3
847 |    },
848 |    "file_extension": ".py",
849 |    "mimetype": "text/x-python",
850 |    "name": "python",
851 |    "nbconvert_exporter": "python",
852 |    "pygments_lexer": "ipython3",
853 |    "version": "3.9.7"
854 |   }
855 |  },
856 |  "nbformat": 4,
857 |  "nbformat_minor": 5
858 | }
859 | 


--------------------------------------------------------------------------------
/tasks/3_Visualizations.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "id": "HWR8M_FM0kCa"
  7 |    },
  8 |    "source": [
  9 |     "# Brain Space Visualization of Deviation Scores"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {
 15 |     "id": "nEkEuf7H0kCb"
 16 |    },
 17 |    "source": [
 18 |     "## Count the number of extreme (positive & negative) deviations at each brain region and visualize the count for each hemisphere."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {
 25 |     "id": "SdVyEOWVJNyy"
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "! git clone https://github.com/saigerutherford/CPC_ML_tutorial.git"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {
 36 |     "id": "6c0O3oKQ0kCW"
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "import os\n",
 41 |     "import pandas as pd\n",
 42 |     "import numpy as np\n",
 43 |     "import matplotlib.pyplot as plt\n",
 44 |     "import seaborn as sns"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {
 51 |     "id": "XwQOtrKmKd-T"
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "os.chdir('/content/CPC_ML_tutorial')"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {
 62 |     "id": "bruuGS8Z0kCb"
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "Z_df = pd.read_csv('data/Z_long_format.csv')"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {
 73 |     "id": "CtDtz47p0kCn"
 74 |    },
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "# Change this threshold to view more or less extreme deviations.\n",
 78 |     "# Discuss with your partner what you think is an appropriate threshold and adjust the below variables accordingly.\n",
 79 |     "Z_positive = Z_df.query('value > 2')\n",
 80 |     "Z_negative = Z_df.query('value < -2')"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {
 87 |     "id": "OCrM8a-c0kCn"
 88 |    },
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "positive_left_z = Z_positive.query('hemi == \"left\"')\n",
 92 |     "positive_right_z = Z_positive.query('hemi == \"right\"')\n",
 93 |     "positive_sc_z = Z_positive.query('hemi == \"subcortical\"')\n",
 94 |     "negative_left_z = Z_negative.query('hemi == \"left\"')\n",
 95 |     "negative_right_z = Z_negative.query('hemi == \"right\"')\n",
 96 |     "negative_sc_z = Z_negative.query('hemi == \"subcortical\"')"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {
103 |     "id": "2oeEd6Ay0kCo"
104 |    },
105 |    "outputs": [],
106 |    "source": [
107 |     "positive_left_z2 = positive_left_z['ROI_name'].value_counts().rename_axis('ROI').reset_index(name='counts')\n",
108 |     "positive_right_z2 = positive_right_z['ROI_name'].value_counts().rename_axis('ROI').reset_index(name='counts')\n",
109 |     "positive_sc_z2 = positive_sc_z['ROI_name'].value_counts().rename_axis('ROI').reset_index(name='counts')\n",
110 |     "negative_left_z2 = negative_left_z['ROI_name'].value_counts().rename_axis('ROI').reset_index(name='counts')\n",
111 |     "negative_right_z2 = negative_right_z['ROI_name'].value_counts().rename_axis('ROI').reset_index(name='counts')\n",
112 |     "negative_sc_z2 = negative_sc_z['ROI_name'].value_counts().rename_axis('ROI').reset_index(name='counts')"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {
119 |     "id": "NlTVUuR6TyXq"
120 |    },
121 |    "outputs": [],
122 |    "source": [
123 |     "positive_left_z2.describe()"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {
130 |     "id": "ugzcZHLDT8ve"
131 |    },
132 |    "outputs": [],
133 |    "source": [
134 |     "positive_right_z2.describe()"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {
141 |     "id": "ugzcZHLDT8ve"
142 |    },
143 |    "outputs": [],
144 |    "source": [
145 |     "positive_sc_z2.describe()"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {
152 |     "id": "NlTVUuR6TyXq"
153 |    },
154 |    "outputs": [],
155 |    "source": [
156 |     "negative_left_z2.describe()"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": null,
162 |    "metadata": {
163 |     "id": "ugzcZHLDT8ve"
164 |    },
165 |    "outputs": [],
166 |    "source": [
167 |     "negative_right_z2.describe()"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {
174 |     "id": "ugzcZHLDT8ve"
175 |    },
176 |    "outputs": [],
177 |    "source": [
178 |     "negative_sc_z2.describe()"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {
185 |     "id": "BikyDuO_K_I3"
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "! pip install nilearn"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": null,
195 |    "metadata": {
196 |     "id": "mzYmi_cK0kCo"
197 |    },
198 |    "outputs": [],
199 |    "source": [
200 |     "from nilearn import plotting\n",
201 |     "import nibabel as nib\n",
202 |     "from nilearn import datasets"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": null,
208 |    "metadata": {
209 |     "id": "jxRXeYHd0kCp"
210 |    },
211 |    "outputs": [],
212 |    "source": [
213 |     "destrieux_atlas = datasets.fetch_atlas_surf_destrieux()\n",
214 |     "fsaverage = datasets.fetch_surf_fsaverage()"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": null,
220 |    "metadata": {
221 |     "id": "18hyJU3Z0kCp"
222 |    },
223 |    "outputs": [],
224 |    "source": [
225 |     "# The parcellation is already loaded into memory\n",
226 |     "parcellation_l = destrieux_atlas['map_left']\n",
227 |     "parcellation_r = destrieux_atlas['map_right']"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "metadata": {
234 |     "id": "9ewObTkj0kCp"
235 |    },
236 |    "outputs": [],
237 |    "source": [
238 |     "nl = pd.read_csv('data/nilearn_order.csv')"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "metadata": {
245 |     "id": "RoAgRhiO0kCq"
246 |    },
247 |    "outputs": [],
248 |    "source": [
249 |     "atlas_r = destrieux_atlas['map_right']\n",
250 |     "atlas_l = destrieux_atlas['map_left']"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": null,
256 |    "metadata": {
257 |     "id": "gvN_Slut0kCq"
258 |    },
259 |    "outputs": [],
260 |    "source": [
261 |     "nl_ROI = nl['ROI'].to_list()"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "markdown",
266 |    "metadata": {
267 |     "id": "T8wirzP50kCq"
268 |    },
269 |    "source": [
270 |     "# Extreme positive deviation viz"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": null,
276 |    "metadata": {
277 |     "id": "sVur0mfY0kCq"
278 |    },
279 |    "outputs": [],
280 |    "source": [
281 |     "nl_positive_left = pd.merge(nl, positive_left_z2, on='ROI', how='left')\n",
282 |     "nl_positive_right = pd.merge(nl, positive_right_z2, on='ROI', how='left')"
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "code",
287 |    "execution_count": null,
288 |    "metadata": {
289 |     "id": "V1b4dXlB0kCq"
290 |    },
291 |    "outputs": [],
292 |    "source": [
293 |     "nl_positive_left['counts'] = nl_positive_right['counts'].fillna(0)\n",
294 |     "nl_positive_right['counts'] = nl_positive_right['counts'].fillna(0)"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": null,
300 |    "metadata": {
301 |     "id": "6TIc-8JE0kCr"
302 |    },
303 |    "outputs": [],
304 |    "source": [
305 |     "nl_positive_left = nl_positive_left['counts'].to_numpy()\n",
306 |     "nl_positive_right = nl_positive_right['counts'].to_numpy()"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "code",
311 |    "execution_count": null,
312 |    "metadata": {
313 |     "id": "dpFgV9610kCr"
314 |    },
315 |    "outputs": [],
316 |    "source": [
317 |     "a_list = list(range(1, 76))\n",
318 |     "parcellation_positive_l = atlas_l\n",
319 |     "for i, j in enumerate(a_list):\n",
320 |     "    parcellation_positive_l = np.where(parcellation_positive_l == j, nl_positive_left[i], parcellation_positive_l)"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "code",
325 |    "execution_count": null,
326 |    "metadata": {
327 |     "id": "LEb86JQP0kCr"
328 |    },
329 |    "outputs": [],
330 |    "source": [
331 |     "a_list = list(range(1, 76))\n",
332 |     "parcellation_positive_r = atlas_r\n",
333 |     "for i, j in enumerate(a_list):\n",
334 |     "    parcellation_positive_r = np.where(parcellation_positive_r == j, nl_positive_right[i], parcellation_positive_r)"
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": null,
340 |    "metadata": {
341 |     "id": "Uvo8bs0J0kCr"
342 |    },
343 |    "outputs": [],
344 |    "source": [
345 |     "# you can click around in 3D space on this visualization. Scroll in/out, move the brain around, etc. Have fun with it :) \n",
346 |     "view = plotting.view_surf(fsaverage.infl_right, parcellation_positive_r, threshold=None, symmetric_cmap=False, cmap='plasma', bg_map=fsaverage.sulc_right)\n",
347 |     "\n",
348 |     "view"
349 |    ]
350 |   },
351 |   {
352 |    "cell_type": "code",
353 |    "execution_count": null,
354 |    "metadata": {
355 |     "id": "knItaGcv0kCr"
356 |    },
357 |    "outputs": [],
358 |    "source": [
359 |     "view = plotting.view_surf(fsaverage.infl_left, parcellation_positive_l, threshold=None, symmetric_cmap=False, cmap='plasma', bg_map=fsaverage.sulc_left)\n",
360 |     "\n",
361 |     "view"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "markdown",
366 |    "metadata": {
367 |     "id": "k-ASdN3T0kCr"
368 |    },
369 |    "source": [
370 |     "# Extreme negative deviation viz"
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "code",
375 |    "execution_count": null,
376 |    "metadata": {
377 |     "id": "8jEf15fg0kCr"
378 |    },
379 |    "outputs": [],
380 |    "source": [
381 |     "nl_negative_left = pd.merge(nl, negative_left_z2, on='ROI', how='left')\n",
382 |     "nl_negative_right = pd.merge(nl, negative_right_z2, on='ROI', how='left')"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": null,
388 |    "metadata": {
389 |     "id": "TcPzp2ZC0kCr"
390 |    },
391 |    "outputs": [],
392 |    "source": [
393 |     "nl_negative_left['counts'] = nl_negative_left['counts'].fillna(0)\n",
394 |     "nl_negative_right['counts'] = nl_negative_right['counts'].fillna(0)"
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "code",
399 |    "execution_count": null,
400 |    "metadata": {
401 |     "id": "KIoAvRlN0kCs"
402 |    },
403 |    "outputs": [],
404 |    "source": [
405 |     "nl_negative_left = nl_negative_left['counts'].to_numpy()\n",
406 |     "nl_negative_right = nl_negative_right['counts'].to_numpy()"
407 |    ]
408 |   },
409 |   {
410 |    "cell_type": "code",
411 |    "execution_count": null,
412 |    "metadata": {
413 |     "id": "Ksv8QuLW0kCs"
414 |    },
415 |    "outputs": [],
416 |    "source": [
417 |     "a_list = list(range(1, 76))\n",
418 |     "parcellation_negative_l = atlas_l\n",
419 |     "for i, j in enumerate(a_list):\n",
420 |     "    parcellation_negative_l = np.where(parcellation_negative_l == j, nl_negative_left[i], parcellation_negative_l)"
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "code",
425 |    "execution_count": null,
426 |    "metadata": {
427 |     "id": "QXmjD4jQ0kCs"
428 |    },
429 |    "outputs": [],
430 |    "source": [
431 |     "a_list = list(range(1, 76))\n",
432 |     "parcellation_negative_r = atlas_r\n",
433 |     "for i, j in enumerate(a_list):\n",
434 |     "    parcellation_negative_r = np.where(parcellation_negative_r == j, nl_negative_right[i], parcellation_negative_r)"
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "code",
439 |    "execution_count": null,
440 |    "metadata": {
441 |     "id": "suomHT4C0kCs"
442 |    },
443 |    "outputs": [],
444 |    "source": [
445 |     "view = plotting.view_surf(fsaverage.infl_right, parcellation_negative_r, threshold=None, symmetric_cmap=False, cmap='plasma', bg_map=fsaverage.sulc_right)\n",
446 |     "\n",
447 |     "view"
448 |    ]
449 |   },
450 |   {
451 |    "cell_type": "code",
452 |    "execution_count": null,
453 |    "metadata": {
454 |     "id": "FbbhY4L80kCs"
455 |    },
456 |    "outputs": [],
457 |    "source": [
458 |     "view = plotting.view_surf(fsaverage.infl_left, parcellation_negative_l, threshold=None, symmetric_cmap=False, cmap='plasma', bg_map=fsaverage.sulc_left)\n",
459 |     "\n",
460 |     "view"
461 |    ]
462 |   }
463 |  ],
464 |  "metadata": {
465 |   "colab": {
466 |    "name": "3_Visualizations.ipynb",
467 |    "provenance": [],
468 |    "toc_visible": true
469 |   },
470 |   "kernelspec": {
471 |    "display_name": "Python 3",
472 |    "language": "python",
473 |    "name": "python3"
474 |   },
475 |   "language_info": {
476 |    "codemirror_mode": {
477 |     "name": "ipython",
478 |     "version": 3
479 |    },
480 |    "file_extension": ".py",
481 |    "mimetype": "text/x-python",
482 |    "name": "python",
483 |    "nbconvert_exporter": "python",
484 |    "pygments_lexer": "ipython3",
485 |    "version": "3.8.8"
486 |   }
487 |  },
488 |  "nbformat": 4,
489 |  "nbformat_minor": 4
490 | }
491 | 


--------------------------------------------------------------------------------
/tasks/4_post_hoc_analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "id": "hnjOHEPnSgqZ"
  7 |    },
  8 |    "source": [
  9 |     "# SVM classification SZ vs. HC. 5-fold cross validation"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {
 15 |     "id": "T1dgnArWand-"
 16 |    },
 17 |    "source": [
 18 |     "Classify schizophrenia group from controls using cortical thickness deviation scores (z-scores) and then the true cortical thickness data to see which type of data better separates the groups."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {
 25 |     "id": "dl-cWCkhU5OH"
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "! git clone https://github.com/saigerutherford/CPC_ML_tutorial.git"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {
 36 |     "id": "oer08RX7Sgqc"
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "import pandas as pd\n",
 41 |     "import numpy as np\n",
 42 |     "import os\n",
 43 |     "import matplotlib.pyplot as plt\n",
 44 |     "os.chdir('/content/CPC_ML_tutorial/')"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {
 51 |     "id": "kBA6wv5_Sgqd"
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "Z_df = pd.read_csv('data/fcon1000_te_Z.csv')"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {
 62 |     "id": "_AtT_a9QSgqe"
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "from sklearn import svm\n",
 67 |     "from sklearn.metrics import auc\n",
 68 |     "from sklearn.metrics import plot_roc_curve\n",
 69 |     "from sklearn.model_selection import StratifiedKFold"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "metadata": {
 76 |     "id": "0m3frZSqWHFt"
 77 |    },
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "Z_df.dropna(subset=['group'], inplace=True)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {
 87 |     "id": "reWNrhN6Wge0"
 88 |    },
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "Z_df['group'] = Z_df['group'].replace(\"SZ\",0)"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {
 98 |     "id": "LuddguUsW_UI"
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "Z_df['group'] = Z_df['group'].replace(\"Control\",1)"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {
109 |     "id": "wBuQvJKqVz0p"
110 |    },
111 |    "outputs": [],
112 |    "source": [
113 |     "deviations = Z_df.loc[:, Z_df.columns.str.contains('Z_predict')]"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {
120 |     "id": "QZvu0iXlZg7P"
121 |    },
122 |    "outputs": [],
123 |    "source": [
124 |     "cortical_thickness = Z_df.loc[:, Z_df.columns.str.endswith('_thickness')]"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {
131 |     "id": "HECqM4rZVcD9"
132 |    },
133 |    "outputs": [],
134 |    "source": [
135 |     "# Data IO and generation\n",
136 |     "X1 = deviations\n",
137 |     "X2 = cortical_thickness\n",
138 |     "y = Z_df['group']\n",
139 |     "n_samples, n_features = X1.shape\n",
140 |     "random_state = np.random.RandomState(0)"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {
147 |     "id": "iho4wkAESgqf"
148 |    },
149 |    "outputs": [],
150 |    "source": [
151 |     "X1 = X1.to_numpy()"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {
158 |     "id": "zi7v5e8vZ0Ms"
159 |    },
160 |    "outputs": [],
161 |    "source": [
162 |     "X2 = X2.to_numpy()"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {
169 |     "id": "xcA4w73TSgqf"
170 |    },
171 |    "outputs": [],
172 |    "source": [
173 |     "y = y.astype(int)"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": null,
179 |    "metadata": {
180 |     "id": "mKcM-dA3ZG_u"
181 |    },
182 |    "outputs": [],
183 |    "source": [
184 |     "y = y.to_numpy()"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {
191 |     "id": "NNRcb-pvSgqf"
192 |    },
193 |    "outputs": [],
194 |    "source": [
195 |     "# #############################################################################\n",
196 |     "# Classification and ROC analysis\n",
197 |     "\n",
198 |     "# Run classifier with cross-validation and plot ROC curves\n",
199 |     "cv = StratifiedKFold(n_splits=5)\n",
200 |     "classifier = svm.SVC(kernel='linear', probability=True,\n",
201 |     "                     random_state=random_state)\n",
202 |     "\n",
203 |     "tprs = []\n",
204 |     "aucs = []\n",
205 |     "mean_fpr = np.linspace(0, 1, 100)\n",
206 |     "\n",
207 |     "fig, ax = plt.subplots(figsize=(15,15))\n",
208 |     "parameters = {'axes.labelsize': 20,\n",
209 |     "          'axes.titlesize': 25, 'xtick.labelsize':16,'ytick.labelsize':16,'legend.fontsize':14,'legend.title_fontsize':16}\n",
210 |     "plt.rcParams.update(parameters)\n",
211 |     "\n",
212 |     "for i, (train, test) in enumerate(cv.split(X1, y)):\n",
213 |     "    classifier.fit(X1[train], y[train])\n",
214 |     "    viz = plot_roc_curve(classifier, X1[test], y[test],\n",
215 |     "                         name='ROC fold {}'.format(i),\n",
216 |     "                         alpha=0.3, lw=1, ax=ax)\n",
217 |     "    interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)\n",
218 |     "    interp_tpr[0] = 0.0\n",
219 |     "    tprs.append(interp_tpr)\n",
220 |     "    aucs.append(viz.roc_auc)\n",
221 |     "\n",
222 |     "ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',\n",
223 |     "        label='Chance', alpha=.8)\n",
224 |     "\n",
225 |     "mean_tpr = np.mean(tprs, axis=0)\n",
226 |     "mean_tpr[-1] = 1.0\n",
227 |     "mean_auc = auc(mean_fpr, mean_tpr)\n",
228 |     "std_auc = np.std(aucs)\n",
229 |     "ax.plot(mean_fpr, mean_tpr, color='b',\n",
230 |     "        label=r'Mean ROC (AUC = %0.2f $\\pm$ %0.2f)' % (mean_auc, std_auc),\n",
231 |     "        lw=2, alpha=.8)\n",
232 |     "\n",
233 |     "std_tpr = np.std(tprs, axis=0)\n",
234 |     "tprs_upper = np.minimum(mean_tpr + std_tpr, 1)\n",
235 |     "tprs_lower = np.maximum(mean_tpr - std_tpr, 0)\n",
236 |     "ax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,\n",
237 |     "                label=r'$\\pm$ 1 std. dev.')\n",
238 |     "\n",
239 |     "ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05])\n",
240 |     "ax.set_title('Receiver operating characteristic SZ vs. HC (deviations)', fontweight=\"bold\", size=20)\n",
241 |     "ax.legend(loc=\"lower right\")\n",
242 |     "plt.show()"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": null,
248 |    "metadata": {
249 |     "id": "WYPilmZOaNgs"
250 |    },
251 |    "outputs": [],
252 |    "source": [
253 |     "# #############################################################################\n",
254 |     "# Classification and ROC analysis\n",
255 |     "\n",
256 |     "# Run classifier with cross-validation and plot ROC curves\n",
257 |     "cv = StratifiedKFold(n_splits=5)\n",
258 |     "classifier = svm.SVC(kernel='linear', probability=True,\n",
259 |     "                     random_state=random_state)\n",
260 |     "\n",
261 |     "tprs = []\n",
262 |     "aucs = []\n",
263 |     "mean_fpr = np.linspace(0, 1, 100)\n",
264 |     "\n",
265 |     "fig, ax = plt.subplots(figsize=(15,15))\n",
266 |     "parameters = {'axes.labelsize': 20,\n",
267 |     "          'axes.titlesize': 25, 'xtick.labelsize':16,'ytick.labelsize':16,'legend.fontsize':14,'legend.title_fontsize':16}\n",
268 |     "plt.rcParams.update(parameters)\n",
269 |     "\n",
270 |     "for i, (train, test) in enumerate(cv.split(X2, y)):\n",
271 |     "    classifier.fit(X2[train], y[train])\n",
272 |     "    viz = plot_roc_curve(classifier, X2[test], y[test],\n",
273 |     "                         name='ROC fold {}'.format(i),\n",
274 |     "                         alpha=0.3, lw=1, ax=ax)\n",
275 |     "    interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)\n",
276 |     "    interp_tpr[0] = 0.0\n",
277 |     "    tprs.append(interp_tpr)\n",
278 |     "    aucs.append(viz.roc_auc)\n",
279 |     "\n",
280 |     "ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',\n",
281 |     "        label='Chance', alpha=.8)\n",
282 |     "\n",
283 |     "mean_tpr = np.mean(tprs, axis=0)\n",
284 |     "mean_tpr[-1] = 1.0\n",
285 |     "mean_auc = auc(mean_fpr, mean_tpr)\n",
286 |     "std_auc = np.std(aucs)\n",
287 |     "ax.plot(mean_fpr, mean_tpr, color='b',\n",
288 |     "        label=r'Mean ROC (AUC = %0.2f $\\pm$ %0.2f)' % (mean_auc, std_auc),\n",
289 |     "        lw=2, alpha=.8)\n",
290 |     "\n",
291 |     "std_tpr = np.std(tprs, axis=0)\n",
292 |     "tprs_upper = np.minimum(mean_tpr + std_tpr, 1)\n",
293 |     "tprs_lower = np.maximum(mean_tpr - std_tpr, 0)\n",
294 |     "ax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,\n",
295 |     "                label=r'$\\pm$ 1 std. dev.')\n",
296 |     "\n",
297 |     "ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05])\n",
298 |     "ax.set_title('Receiver operating characteristic SZ vs. HC (cortical thickness)', fontweight=\"bold\", size=20)\n",
299 |     "ax.legend(loc=\"lower right\")\n",
300 |     "plt.show()"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "markdown",
305 |    "metadata": {
306 |     "id": "Y9iIgxR1YMzq"
307 |    },
308 |    "source": [
309 |     "Which brain feature leads to a better classification between SZ & HC? "
310 |    ]
311 |   }
312 |  ],
313 |  "metadata": {
314 |   "colab": {
315 |    "name": "4_post_hoc_analysis.ipynb",
316 |    "provenance": []
317 |   },
318 |   "kernelspec": {
319 |    "display_name": "Python 3 (ipykernel)",
320 |    "language": "python",
321 |    "name": "python3"
322 |   },
323 |   "language_info": {
324 |    "codemirror_mode": {
325 |     "name": "ipython",
326 |     "version": 3
327 |    },
328 |    "file_extension": ".py",
329 |    "mimetype": "text/x-python",
330 |    "name": "python",
331 |    "nbconvert_exporter": "python",
332 |    "pygments_lexer": "ipython3",
333 |    "version": "3.9.7"
334 |   }
335 |  },
336 |  "nbformat": 4,
337 |  "nbformat_minor": 4
338 | }
339 | 


--------------------------------------------------------------------------------