├── README.md ├── SConstruct ├── __init__.py ├── examples ├── __init__.py ├── graphnet_example.py ├── simple_data.py └── synthetic_data.py ├── graphnet_sa ├── __init__.py ├── afnifunctions.py ├── baseclasses.py ├── crossvalidation.py ├── datamanager.py ├── directorytools.py ├── example_runner.py └── graphnet_hook.py ├── gui └── __init__.py ├── neuroparser.py ├── optimization ├── SConscript ├── __init__.py ├── cwpath │ ├── SConscript │ ├── __init__.py │ ├── cwpath.pyx │ ├── graphnet.pyx │ ├── libRblas.dylib │ ├── mask.py │ ├── regression.pyx │ ├── strategy.py │ └── tests │ │ ├── Rclone.py │ │ ├── __init__.py │ │ ├── libRblas.dylib │ │ ├── makedata.py │ │ ├── profile.py │ │ ├── sandbox.py │ │ ├── test.R │ │ ├── testR.py │ │ └── test_graphnet.py └── graphs │ ├── __init__.py │ ├── __init__.pyc │ ├── graph_laplacian.py │ └── graph_laplacian.pyc ├── scons ├── __init__.py └── cython.py ├── setup.py ├── site_scons └── site_tools │ ├── cython.py │ └── pyext.py └── todo.txt /README.md: -------------------------------------------------------------------------------- 1 | Neuroparser 2 | =========== 3 | 4 | Setup.py Installation 5 | --------------------- 6 | The usual 7 | 8 | python setup.py install 9 | 10 | should do the trick if you have the dependencies installed. 11 | 12 | SCONS Installation 13 | ------------------ 14 | 15 | If you have scons installed, simply type 16 | 17 | scons 18 | 19 | in the top directory (neuroparser). 20 | 21 | Warning! 22 | -------- 23 | 24 | This code is currently in an early state of development for public use, and will undergo many changes in the coming months. Use it at your own risk! It will likely be good to download the latest code with some frequency and rebuild by for example using 25 | 26 | scons -c 27 | 28 | to clear away old files, and then 29 | 30 | scons 31 | 32 | to rebuild them. 33 | 34 | Usage 35 | ----- 36 | 37 | Some basic examples of how the code can be called are in: 38 | 39 | /neuroparser/examples/graphnet_example.py 40 | 41 | and some more use cases can be found in: 42 | 43 | /neuroparser/optimization/cwpath/tests/profile.py 44 | /neuroparser/optimization/cwpath/tests/test_graphnet.py 45 | 46 | Please report bugs or feature requests to logang@stanford.edu or better using GitHub. 47 | 48 | Logan Grosenick 49 | Kiefer Katovich 50 | Brad Klingenberg 51 | Jonathan Taylor 52 | 53 | -------------------------------------------------------------------------------- /SConstruct: -------------------------------------------------------------------------------- 1 | import numpy, os 2 | 3 | # Get env and make Python extensions look up compiler flags and include paths etc. from distutils) 4 | env = Environment(ENV=os.environ, tools = ["default"], toolpath = '.', PYEXT_USE_DISTUTILS=True) 5 | 6 | # Set tools and numpy path 7 | env.Tool("pyext") 8 | env.Tool("cython") 9 | env.Append(PYEXTINCPATH=[numpy.get_include()]) 10 | 11 | # Override location of Cython 12 | #env.Replace(CYTHON="python /Library/Frameworks/EPD64.framework/Versions/7.0/lib/python2.7/site-packages/cython.py") 13 | 14 | # Specify extensions to be compiled 15 | #env.PythonExtension('optimization.cwpath.cwpath', ['./optimization/cwpath/cwpath.pyx']) 16 | #env.PythonExtension('optimization.cwpath.graphnet', ['./optimization/cwpath/graphnet.pyx']) 17 | #env.PythonExtension('optimization.cwpath.regression', ['./optimization/cwpath/regression.pyx']) 18 | #env.PythonExtension('./optimization/cwpath/lasso', ['./optimization/cwpath/lasso.pyx']) 19 | 20 | Export('env') 21 | SConscript('optimization/cwpath/SConscript') 22 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logang/neuroparser/835fc7b5b6e2d2ce47fd286a498fdecb0144d4d2/examples/__init__.py -------------------------------------------------------------------------------- /examples/graphnet_example.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from synthetic_data import gen_correlated_dataset, classification_dataset 3 | from optimization.cwpath import cwpath, graphnet 4 | from optimization.cwpath.mask import prepare_adj 5 | import scipy.optimize 6 | 7 | #-------------------------------------------------------------------------- 8 | # A simple use case on fake data 9 | 10 | n = 25 11 | #Generate toy data: 3x3x3x3 data on n trials 12 | X_orig = np.random.normal(0,1,n*3**4).reshape((n,3,3,3,3)) 13 | 14 | #Reshape X to 2 dimensions 15 | p = np.prod(X_orig.shape[1:]) 16 | X = np.zeros((n,p)) 17 | for i in range(n): 18 | X[i,:] = X_orig[i].reshape((1,p)) 19 | 20 | beta = np.append(5*np.ones(10),np.zeros(71)) 21 | Y = np.dot(X,beta) 22 | 23 | #Prepare adjacency matrix from "mask" of X (usually we'd get it from a mask file, see mask.py) 24 | Adj = prepare_adj(1+0.*X_orig[0],0,1,1,1) 25 | 26 | l1 = 50. 27 | l2 = 5. 28 | l3 = 30. 29 | delta = 100. 30 | 31 | #Run optimization 32 | l = cwpath.CoordWise((X, Y, Adj), graphnet.GraphNet, initial_coefs=np.random.normal(0,1,81)) 33 | l.problem.assign_penalty(l1=l1,l2=l2,l3=l3)#,delta=delta) 34 | l.fit() 35 | 36 | beta =np.round(1000*l.problem.coefficients)/1000 37 | print beta 38 | 39 | #-------------------------------------------------------------------------- 40 | # Test the Naive GraphNet classifier 41 | 42 | def test_NaiveGraphNet_classification( n, p, num_obs_class1 = 100, num_obs_class2 = 100 ): 43 | """ 44 | Test the naive graphnet on a binary classification problem 45 | """ 46 | # classification data 47 | class_data = classification_dataset( n, p, num_obs_class1, num_obs_class2, SNR = 1, corr1 = 0.9, corr2 = None ) 48 | X = class_data[1] 49 | y = class_data[0] 50 | 51 | #Prepare adjacency matrix from "mask" of X (usually we'd get it from a mask file, see mask.py) 52 | Adj = prepare_adj(1+0.*X,0,1,1,0) 53 | 1/0 54 | l1 = 50. 55 | l2 = 5. 56 | l3 = 30. 57 | delta = 100. 58 | 59 | #Run optimization 60 | l = cwpath.CoordWise((X, y, Adj), graphnet.GraphNet ) #, initial_coefs=np.random.normal(0,1,81)) 61 | l.problem.assign_penalty(l1=l1,l2=l2,l3=l3)#,delta=delta) 62 | l.fit() 63 | 64 | beta =np.round(1000*l.problem.coefficients)/1000 65 | print beta 66 | 67 | if __name__ is "__main__": 68 | test_NaiveGraphNet_classification(100,100) 69 | -------------------------------------------------------------------------------- /examples/simple_data.py: -------------------------------------------------------------------------------- 1 | # import libraries 2 | import numpy as np 3 | 4 | # simulation parameters 5 | n = 200 # number of image pixels 6 | 7 | -------------------------------------------------------------------------------- /examples/synthetic_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | chol = np.linalg.cholesky 3 | 4 | def gen_correlated_matrix( n, p, corr1 = 0.8, corr2 = None ): 5 | """ 6 | Generate correlated random normal matrix using the Cholesky decomposition 7 | of the partial autocorrelation matrix 'sigma', which should be p \times p. 8 | """ 9 | if corr2 is None: 10 | corr2 = corr1 11 | 12 | # generate partial autocorrelation matrices 13 | ys, xs = np.mgrid[:p, :p] 14 | sigma1 = corr1 ** abs(ys - xs) 15 | ys, xs = np.mgrid[:n, :n] 16 | sigma2 = corr2 ** abs(ys - xs) 17 | 18 | # get cholesky factoriztions 19 | C1 = np.matrix(chol(sigma1)) 20 | C2 = np.matrix(chol(sigma2)) 21 | 22 | # generate data with (independently) correlated rows and colums 23 | # (should probably add a spectral version) 24 | X = np.random.randn(n,p) 25 | return C2*X*C1 26 | 27 | def gen_correlated_instance( n, p, corr1, corr2 = None, signal = None ): 28 | """ 29 | Generate a particular n \times p image instance with correlated noise. 30 | """ 31 | if signal is None: 32 | return gen_correlated_matrix( n, p, corr1 = corr1, corr2 = corr2 ) 33 | else: 34 | return signal + gen_correlated_matrix( n, p, corr1 = corr1, corr2 = corr2 ) 35 | 36 | def gen_correlated_dataset( num_obs, n, p, corr1 = 0.8, corr2 = None, signal=None ): 37 | """ 38 | Generate a data set consisting of num_obs instances of signal + correlated noise 39 | with correlation parameter 'corr' and size n \times p, 40 | """ 41 | dataset = [] 42 | for i in xrange(num_obs): 43 | dataset.append( gen_correlated_instance( n, p, corr1, corr2, signal ) ) 44 | return dataset 45 | 46 | def classification_dataset( n, p, num_obs_class1, num_obs_class2, SNR = 1, corr1 = 0.9, corr2 = None ): 47 | """ 48 | Generate a two class data set with spatially correlated signal and noise. 49 | """ 50 | # class 1 signal 51 | signal1 = np.zeros( (n,p) ) 52 | signal1[10:15, 10:15] = 1.*SNR 53 | signal1[30:40, 30:40] = -1.*SNR 54 | signal1[60:80, 60:80] = 1.*SNR 55 | 56 | # class 2 signal 57 | signal2 = np.zeros( (n,p) ) 58 | signal2[30:45, 30:45] = 1.*SNR 59 | signal2[30:50, 60:80] = 1.*SNR 60 | 61 | # make data 62 | x = gen_correlated_dataset( num_obs_class1, n, p, corr1 = corr1, corr2 = corr2, signal=signal1 ) 63 | x.extend( gen_correlated_dataset( num_obs_class2, n, p, corr1 = corr1, corr2 = corr2, signal=signal2 ) ) 64 | class_labels = np.hstack( ( np.zeros( num_obs_class1 ), np.ones( num_obs_class2 ) ) ).T 65 | 66 | # generate data matrix 67 | for i in xrange( len(x) ): 68 | if i == 0: 69 | X = x[0].flatten() 70 | else: 71 | X = np.vstack( (X, x[i].flatten()) ) 72 | 73 | return (class_labels, X) 74 | 75 | if __name__ == "__main__": 76 | import pylab as pl 77 | # image parameters 78 | n = 100 79 | p = 100 80 | # random iid matrix for comparison 81 | y = np.random.randn(n,p) 82 | # generate data 83 | signal = np.zeros( (n,p) ) 84 | signal[10:15, 10:15] = 1. 85 | signal[30:40, 30:40] = -1. 86 | signal[60:80, 60:80] = 1. 87 | x = gen_correlated_dataset( 100, n, p, signal=signal) 88 | # plot data 89 | pl.subplot(211) 90 | pl.imshow(y,interpolation=None) 91 | pl.subplot(212) 92 | pl.imshow(x[0],interpolation=None) 93 | pl.show() 94 | 95 | # classification data 96 | num_obs_class1 = 100 97 | num_obs_class2 = 100 98 | class_data = classification_dataset( n, p, num_obs_class1, num_obs_class2, SNR = 1, corr1 = 0.9, corr2 = None ) 99 | 1/0 100 | -------------------------------------------------------------------------------- /graphnet_sa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logang/neuroparser/835fc7b5b6e2d2ce47fd286a498fdecb0144d4d2/graphnet_sa/__init__.py -------------------------------------------------------------------------------- /graphnet_sa/afnifunctions.py: -------------------------------------------------------------------------------- 1 | 2 | import os, sys 3 | import glob 4 | import subprocess 5 | import shutil 6 | from baseclasses import Process 7 | 8 | 9 | 10 | 11 | 12 | class AfniFunction(object): 13 | 14 | 15 | def __init__(self): 16 | super(AfniFunction, self).__init__() 17 | 18 | 19 | def _clean_remove(self, files): 20 | print files 21 | for file in files: 22 | print file 23 | try: 24 | os.remove(file) 25 | except: 26 | pass 27 | 28 | def _check_afni_suffix(self, path, suffix='orig'): 29 | if suffix == 'orig': 30 | if not path.endswith('+orig.') and not path.endswith('+orig'): 31 | path = path+'+orig.' 32 | elif suffix == 'tlrc': 33 | if not path.endswith('+tlrc.') and not path.endswith('+tlrc'): 34 | path = path+'+tlrc.' 35 | return path 36 | 37 | 38 | def _clean(self, glob_prefix, clean_type=None, path=None): 39 | if path: 40 | prefix = os.path.join(path, glob_prefix) 41 | else: 42 | prefix = glob_prefix 43 | 44 | if clean_type is None: 45 | self._clean_remove(glob.glob(prefix)) 46 | 47 | elif clean_type is 'orig': 48 | self._clean_remove(glob.glob(prefix+'+orig*')) 49 | 50 | elif clean_type is 'tlrc': 51 | print 'cleaning tlrc' 52 | print prefix 53 | print glob.glob(prefix+'+tlrc') 54 | self._clean_remove(glob.glob(prefix+'+tlrc*')) 55 | 56 | elif clean_type is 'afni': 57 | self._clean_remove(glob.glob(prefix+'+orig*')) 58 | self._clean_remove(glob.glob(prefix+'+tlrc*')) 59 | 60 | 61 | 62 | 63 | class Copy3d(AfniFunction): 64 | 65 | def __init__(self): 66 | super(Copy3d, self).__init__() 67 | 68 | 69 | def __call__(self, input_path, output_path_prefix): 70 | 71 | self._clean(output_path_prefix, clean_type='orig') 72 | cmd = ['3dcopy', input_path, output_path_prefix] 73 | subprocess.call(cmd) 74 | 75 | 76 | def write(self, scriptwriter, input, output_prefix): 77 | 78 | section = {'command':['3dcopy ${anatomical_nifti} ${anatomical_name}'], 79 | 'variables':{'anatomical_nifti':input, 80 | 'anatomical_name':output_prefix}, 81 | 'clean':[['anatomical_name','orig']], 82 | 'header':'Convert Anatomical'} 83 | 84 | scriptwriter.add_section(section) 85 | 86 | 87 | class TcatBuffer(AfniFunction): 88 | 89 | def __init__(self): 90 | super(TcatBuffer, self).__init__() 91 | 92 | 93 | def __call__(self, functional_path, output_path_prefix, 94 | abs_leadin, abs_leadout): 95 | 96 | self._clean(output_path_prefix, clean_type='orig') 97 | cut_dset = functional_path+'['+str(abs_leadin)+'..'+str(abs_leadout)+']' 98 | cmd = ['3dTcat', '-prefix', output_path_prefix, cut_dset] 99 | subprocess.call(cmd) 100 | 101 | 102 | def write(self, scriptwriter, functional, epi_prefix, leadin, 103 | leadout, cmd_only=False): 104 | 105 | clean = {'afni':{'epi_prefix':epi_prefix}} 106 | header = 'Cut off lead-in and lead-out:' 107 | 108 | write_cmd = ['3dTcat -prefix ${epi_prefix} \'${functional_nifti}[${leadin}..${leadout}]\''] 109 | write_vars = {'functional_nifti':functional, 110 | 'leadin':leadin, 'leadout':leadout, 'epi_prefix':epi_prefix} 111 | 112 | 113 | if not cmd_only: 114 | section = {'header':header, 'command':write_cmd, 115 | 'variables':write_vars, 'clean':[['epi_prefix','orig']]} 116 | else: 117 | section = {'command':write_cmd, 'variables':write_vars} 118 | 119 | scriptwriter.add_section(section) 120 | 121 | 122 | 123 | class Refit(AfniFunction): 124 | 125 | def __init__(self): 126 | super(Refit, self).__init__() 127 | 128 | 129 | def __call__(self, input_path, tr_length): 130 | 131 | input_path = self._check_afni_suffix(input_path) 132 | cmd = ['3dRefit', '-TR', str(tr_length), input_path] 133 | subprocess.call(cmd) 134 | 135 | 136 | def write(self, scriptwriter, input, tr_length, cmd_only=False): 137 | header = 'Refit to ensure correct TR length:' 138 | write_cmd = ['3drefit -TR ${tr_length} ${epi_prefix}+orig.'] 139 | write_vars = {'epi_prefix':input, 'tr_length':tr_length} 140 | 141 | if not cmd_only: 142 | section = {'header':header, 'command':write_cmd, 143 | 'variables':write_vars} 144 | else: 145 | section = {'command':write_cmd, 'variables':write_vars} 146 | 147 | scriptwriter.add_section(section) 148 | 149 | 150 | 151 | class RefittoParent(AfniFunction): 152 | 153 | def __init__(self): 154 | super(RefittoParent, self).__init__() 155 | 156 | 157 | def __call__(self, apar_path, dpar_path): 158 | 159 | input_path = self._check_afni_suffix(dpar_path) 160 | cmd = ['3dRefit', '-apar', apar_path, dpar_path] 161 | subprocess.call(cmd) 162 | 163 | 164 | def write(self, scriptwriter, apar, dpar): 165 | header = 'Refit dataset to anatomical parent:' 166 | write_cmd = ['3drefit -apar ${refit_apar} ${refit_dpar}+orig.'] 167 | write_vars = {'refit_apar':apar, 'refit_dpar':dpar} 168 | 169 | section = {'header':header, 'command':write_cmd, 'variables':write_vars} 170 | 171 | scriptwriter.add_section(section) 172 | 173 | 174 | 175 | 176 | class Tshift(AfniFunction): 177 | 178 | def __init__(self): 179 | super(Tshift, self).__init__() 180 | 181 | 182 | def __call__(self, input_path, output_path_prefix, tshift_slice, tpattern): 183 | 184 | self._clean(output_path_prefix, clean_type='orig') 185 | cmd = ['3dTshift', '-slice', str(tshift_slice), '-tpattern', 186 | tpattern, '-prefix', output_path_prefix, input_path] 187 | subprocess.call(cmd) 188 | 189 | 190 | def write(self, scriptwriter, input, output_prefix, tshift_slice, tpattern, 191 | cmd_only=False): 192 | 193 | header = 'Time slice correction:' 194 | write_cmd = ['3dTshift -slice ${tshift_slice} -tpattern ${tpattern} -prefix ${epits_prefix} ${epi_prefix}+orig.'] 195 | write_vars = {'epi_prefix':input, 'tshift_slice':tshift_slice, 196 | 'tpattern':tpattern, 'epits_prefix':output_prefix} 197 | 198 | if not cmd_only: 199 | section = {'header':header, 'command':write_cmd, 'variables':write_vars, 200 | 'clean':[['epits_prefix', 'orig']]} 201 | else: 202 | section = {'command':write_cmd, 'variables':write_vars} 203 | 204 | scriptwriter.add_section(section) 205 | 206 | 207 | 208 | 209 | 210 | 211 | class TcatDatasets(AfniFunction): 212 | 213 | def __init__(self): 214 | super(TcatDatasets, self).__init__() 215 | 216 | 217 | def __call__(self, input_paths, output_path_prefix, cleanup=True): 218 | 219 | self._clean(output_path_prefix, clean_type='orig') 220 | cmd = ['3dTcat', '-prefix', output_path_prefix]+input_paths 221 | subprocess.call(cmd) 222 | 223 | if cleanup: 224 | for ip in input_paths: 225 | self._clean(ip, clean_type='orig') 226 | 227 | 228 | def write(self, scriptwriter, inputs, output_prefix): 229 | 230 | header = 'Concatenate epis into functional dataset:' 231 | write_cmd = ['3dTcat -prefix ${functional_name} ${epi_names}'] 232 | if type(inputs) not in (list, tuple): 233 | inputs = [inputs] 234 | 235 | inputs = [x.rstrip('+orig.') for x in inputs if x.startswith('+orig') 236 | or x.startswith('+orig.')] 237 | inputs = ' '.join([x+'+orig' for x in inputs]) 238 | write_vars = {'epi_names':inputs, 'functional_name':output_prefix} 239 | 240 | section = {'header':header, 'command':write_cmd, 'variables':write_vars, 241 | 'clean':[['functional_name','orig']]} 242 | 243 | scriptwriter.add_section(section) 244 | 245 | 246 | 247 | class Volreg(AfniFunction): 248 | 249 | def __init__(self): 250 | super(Volreg, self).__init__() 251 | 252 | 253 | def __call__(self, input_path, output_path_prefix, motionfile, volreg_base): 254 | 255 | self._clean(output_path_prefix, clean_type='orig') 256 | self._clean(motionfile) 257 | cmd = ['3dvolreg','-Fourier','-twopass','-prefix', output_path_prefix, 258 | '-base', str(volreg_base), '-dfile', motionfile, input_path] 259 | subprocess.call(cmd) 260 | 261 | 262 | def write(self, scriptwriter, input, output, motionfile, volreg_base): 263 | 264 | header = 'Motion correction:' 265 | write_cmd = ['3dvolreg -Fourier -twopass -prefix ${output} -base ${volreg_base} -dfile ${motionfile_name} ${input}+orig'] 266 | write_vars = {'input':input, 267 | 'output':output, 'volreg_base':volreg_base, 268 | 'motionfile_name':motionfile} 269 | 270 | section = {'header':header, 'variables':write_vars, 'command':write_cmd, 271 | 'clean':[['output','orig'],['3dmotion.1D',None]]} 272 | 273 | scriptwriter.add_section(section) 274 | 275 | 276 | 277 | 278 | 279 | class Smooth(AfniFunction): 280 | 281 | def __init__(self): 282 | super(Smooth, self).__init__() 283 | 284 | 285 | def __call__(self, input_path, output_path_prefix, blur_kernel): 286 | 287 | self._clean(output_path_prefix, clean_type='orig') 288 | cmd = ['3dmerge', '-prefix', output_path_prefix, '-1blur_fwhm', 289 | str(blur_kernel), '-doall', input_path] 290 | subprocess.call(cmd) 291 | 292 | 293 | def write(self, scriptwriter, input, output, blur_kernel): 294 | 295 | header = 'Blur dataset:' 296 | write_cmd = ['3dmerge -prefix ${output} -1blur_fwhm ${blur_kernel} -doall ${input}+orig'] 297 | write_vars = {'blur_kernel':blur_kernel, 'input':input, 'output':output} 298 | 299 | section = {'header':header, 'variables':write_vars, 'command':write_cmd, 300 | 'clean':[['output','orig']]} 301 | 302 | scriptwriter.add_section(section) 303 | 304 | 305 | 306 | 307 | class NormalizePSC(AfniFunction): 308 | 309 | def __init__(self): 310 | super(NormalizePSC, self).__init__() 311 | 312 | 313 | def __call__(self, input_path, ave_path_prefix, output_path_prefix, trs, expression): 314 | 315 | self._clean(output_path_prefix, clean_type='orig') 316 | self._clean(ave_path_prefix, clean_type='orig') 317 | 318 | ave_cmd = ['3dTstat', '-prefix', ave_path_prefix, 319 | input_path+'[0..'+str(trs)+']'] 320 | refit_cmd = ['3drefit', '-abuc', ave_path_prefix+'+orig'] 321 | calc_cmd = ['3dcalc', '-datum', 'float', '-a', 322 | input_path+'[0..'+str(trs)+']', '-b', 323 | ave_path_prefix+'+orig', '-expr', expression, '-prefix', 324 | output_path_prefix] 325 | 326 | subprocess.call(ave_cmd) 327 | subprocess.call(refit_cmd) 328 | subprocess.call(calc_cmd) 329 | 330 | 331 | 332 | def write(self, scriptwriter, input, output, average, trs, expression): 333 | 334 | header = 'Normalize dataset:' 335 | 336 | tstat_cmd = ['3dTstat -prefix ${average} \'${input}+orig[0..${trs}]]\''] 337 | tstat_vars = {'average':average, 'input':input, 'trs':trs} 338 | 339 | average_d = {'header':header, 'command':tstat_cmd, 'variables':tstat_vars, 340 | 'clean':[['average','orig']]} 341 | 342 | scriptwriter.add_section(average_d) 343 | 344 | refit_cmd = ['3drefit -abuc ${average}+orig'] 345 | refit_vars = {'average':average} 346 | 347 | refit = {'command':refit_cmd, 'variables':refit_vars} 348 | 349 | scriptwriter.add_section(refit) 350 | 351 | calc_cmd = ['3dcalc -datum float -a \'${input}+orig[0..${trs}]\' -b ${average} -expr \"${expression}\" -prefix ${output}'] 352 | calc_vars = {'input':input, 'output':output, 353 | 'average':average, 'trs':trs, 354 | 'expression':expression} 355 | 356 | norm = {'command':calc_cmd, 'variables':calc_vars, 'clean':[['output','orig']]} 357 | 358 | scriptwriter.add_section(norm) 359 | 360 | 361 | 362 | 363 | class HighpassFilter(AfniFunction): 364 | 365 | def __init__(self): 366 | super(HighpassFilter, self).__init__() 367 | 368 | 369 | def __call__(self, input_path, output_path_prefix, highpass_value): 370 | 371 | self._clean(output_path_prefix, clean_type='orig') 372 | cmd = ['3dFourier', '-prefix', output_path_prefix, '-highpass', 373 | str(highpass_value), input_path] 374 | subprocess.call(cmd) 375 | 376 | 377 | def write(self, scriptwriter, input, output, highpass_value): 378 | 379 | header = 'Fourier highpass filter:' 380 | write_cmd = ['3dFourier -prefix ${output} -highpass ${highpass_value} ${input}+orig'] 381 | write_vars = {'input':input, 'output':output, 382 | 'highpass_value':highpass_value} 383 | 384 | section = {'header':header, 'command':write_cmd, 'variables':write_vars, 385 | 'clean':[['output','orig']]} 386 | 387 | scriptwriter.add_section(section) 388 | 389 | 390 | 391 | class TalairachWarp(AfniFunction): 392 | 393 | def __init__(self): 394 | super(TalairachWarp, self).__init__() 395 | 396 | 397 | def __call__(self, functional_path, anatomical_path, template_path): 398 | 399 | import os 400 | curdir = os.getcwd() 401 | os.chdir(os.path.split(anatomical_path)[0]) 402 | anatomical_path = os.path.split(anatomical_path)[1] 403 | print anatomical_path 404 | self._clean(anatomical_path[:-5], clean_type='tlrc') 405 | cmd = ['@auto_tlrc', '-warp_orig_vol', '-suffix', 'NONE', 406 | '-base', template_path, '-input', anatomical_path] 407 | refit_cmd = ['3drefit', '-apar', anatomical_path[:-5]+'+tlrc', 408 | functional_path] 409 | 410 | subprocess.call(cmd) 411 | subprocess.call(refit_cmd) 412 | os.chdir(curdir) 413 | 414 | 415 | def write(self, scriptwriter, functional, anatomical, template_path): 416 | 417 | header = 'Warp to talairach space:' 418 | tlrc_cmd = ['@auto_tlrc -warp_orig_vol -suffix NONE -base ${talairach_template} -input ${anatomical}+orig'] 419 | tlrc_vars = {'anatomical':anatomical, 420 | 'talairach_template':template_path} 421 | 422 | tlrc_section = {'header':header, 'variables':tlrc_vars, 'command':tlrc_cmd, 423 | 'clean':[['anatomical','tlrc']]} 424 | 425 | scriptwriter.add_section(tlrc_section) 426 | 427 | refit_cmd = ['3drefit -apar ${anatomical}+orig ${functional}+orig'] 428 | refit_vars = {'anatomical':anatomical, 429 | 'functional':functional} 430 | 431 | refit_section = {'command':refit_cmd, 'variables':refit_vars} 432 | scriptwriter.add_section(refit_section) 433 | 434 | 435 | class Adwarp(AfniFunction): 436 | 437 | def __init__(self): 438 | super(Adwarp, self).__init__() 439 | 440 | 441 | def __call__(self, apar_path, dpar_path, output_path_prefix, dxyz=1., 442 | adwarp_type='tlrc', force=False): 443 | 444 | self._clean(output_path_prefix, clean_type=adwarp_type) 445 | if not force: 446 | cmd = ['adwarp', '-apar', apar_path, '-dpar', dpar_path, 447 | '-dxyz', str(dxyz), '-prefix', output_path_prefix] 448 | else: 449 | cmd = ['adwarp', '-apar', apar_path, '-dpar', dpar_path, 450 | '-dxyz', str(dxyz), '-force', '-prefix', output_path_prefix] 451 | subprocess.call(cmd) 452 | 453 | 454 | def write(self, scriptwriter, apar, dpar, output_prefix, dxyz=1., force=False): 455 | 456 | header = 'Adwarp dataset:' 457 | if not force: 458 | adwarp_cmd = ['adwarp -apar ${apar_dset} -dpar ${dpar_dset} -dxyz ${adwarp_dxyz} -prefix ${adwarp_output}'] 459 | else: 460 | adwarp_cmd = ['adwarp -force -apar ${adwarp_apar} -dpar ${adwarp_dpar} -dxyz ${adwarp_dxyz} -prefix ${adwarp_output}'] 461 | adwarp_vars = {'adwarp_apar':apar, 'adwarp_dpar': dpar, 462 | 'adwarp_dxyz':str(dxyz), 'adwarp_output':output_prefix} 463 | 464 | 465 | section = {'header':header, 'command':adwarp_cmd, 'variables':adwarp_vars, 466 | 'clean':[['adwarp_output', 'tlrc']]} 467 | 468 | scriptwriter.add_section(section) 469 | 470 | 471 | 472 | class Automask(AfniFunction): 473 | 474 | def __init__(self): 475 | super(Automask, self).__init__() 476 | 477 | 478 | def __call__(self, mask_dset_path, output_path_prefix, clfrac=.3, 479 | dset_type='tlrc'): 480 | 481 | self._clean(output_path_prefix, clean_type=dset_type) 482 | cmd = ['3dAutomask','-prefix',output_path_prefix, '-clfrac', 483 | str(clfrac), mask_dset_path] 484 | subprocess.call(cmd) 485 | 486 | 487 | 488 | '''BROKEN (scriptwriter needs to be updated)''' 489 | class AfnitoNifti(AfniFunction): 490 | 491 | def __init__(self): 492 | super(AfnitoNifti, self).__init__() 493 | 494 | 495 | def __call__(self, input_path, output_path_prefix): 496 | 497 | self._clean(output_path_prefix+'.nii') 498 | cmd = ['3dAFNItoNIFTI', '-prefix', output_path_prefix, input_path] 499 | subprocess.call(cmd) 500 | 501 | 502 | def write(self, scriptwriter, input, output_prefix): 503 | 504 | header = 'Convert Afni file to Nifti:' 505 | clean = {'standard':{'nifti_output':output_prefix+'.nii'}} 506 | a2n_cmd = ['3dAFNItoNIFTI -prefix ${nifti_output} ${afni_input}'] 507 | a2n_vars = {'nifti_output':output_prefix, 'afni_input':input} 508 | 509 | section = {'header':header, 'command':a2n_cmd, 'variables':a2n_vars, 510 | 'clean':[['nifti_output','nii']]} 511 | 512 | scriptwriter.add_section(section) 513 | 514 | 515 | 516 | class MaskAve(AfniFunction): 517 | 518 | def __init__(self): 519 | super(MaskAve, self).__init__() 520 | 521 | 522 | def __call__(self, mask_path, dataset_path, mask_area_strs=['l','r','b'], 523 | mask_area_codes=[[1,1],[2,2],[1,2]], tmp_tc_dir='raw_tc'): 524 | 525 | subject_path = os.path.split(dataset_path)[0] 526 | subject_name = os.path.split(subject_path)[1] 527 | 528 | mask_name = os.path.split(mask_path)[1].split('+')[0] 529 | 530 | print 'maskave', subject_name, mask_name 531 | 532 | tmpdir = os.path.join(subject_path, tmp_tc_dir) 533 | try: 534 | os.makedirs(tmpdir) 535 | except: 536 | pass 537 | 538 | for area, codes in zip(mask_area_strs, mask_area_codes): 539 | 540 | raw_tc = '_'.join([subject_name, area, mask_name, 'raw.tc']) 541 | raw_tc = os.path.join(tmpdir, raw_tc) 542 | 543 | self._clean(raw_tc) 544 | 545 | cmd = ['3dmaskave', '-mask', mask_path, '-quiet', '-mrange', 546 | str(codes[0]), str(codes[1]), dataset_path] 547 | 548 | fcontent = subprocess.Popen(cmd, stdout=subprocess.PIPE) 549 | fcontent.wait() 550 | fcontent = fcontent.communicate()[0] 551 | 552 | fid = open(raw_tc, 'w') 553 | fid.write(fcontent) 554 | fid.close() 555 | 556 | 557 | 558 | class Info(AfniFunction): 559 | 560 | def __init__(self): 561 | super(Info, self).__init__() 562 | 563 | 564 | def __call__(self, dataset_path, output_filepath=None): 565 | if output_filepath: 566 | fid = open(output_filepath, 'w') 567 | 568 | cmd = ['3dinfo', dataset_path] 569 | 570 | fcontent = subprocess.Popen(cmd, stdout=subprocess.PIPE) 571 | fcontent.wait() 572 | fcontent = fcontent.communicate()[0] 573 | 574 | if output_filepath: 575 | fid.write(fcontent) 576 | fid.close() 577 | 578 | return fcontent 579 | 580 | 581 | 582 | class FractionizeMask(AfniFunction): 583 | 584 | def __init__(self): 585 | super(FractionizeMask, self).__init__() 586 | 587 | 588 | def __call__(self, mask_path, dataset_path, anat_path, subject_mask_suffix='r'): 589 | 590 | subject_path = os.path.split(dataset_path)[0] 591 | print mask_path 592 | mask_name = os.path.split(mask_path)[1].split('+')[0] 593 | print mask_name 594 | 595 | subject_mask = os.path.join(subject_path, mask_name+subject_mask_suffix+'+orig') 596 | self._clean(subject_mask+'*') 597 | 598 | cmd = ['3dfractionize', '-template', dataset_path, '-input', mask_path, 599 | '-warp', anat_path, '-clip', '0.1', '-preserve', '-prefix', 600 | subject_mask] 601 | 602 | subprocess.call(cmd) 603 | return subject_mask 604 | 605 | 606 | 607 | class MaskDump(AfniFunction): 608 | 609 | def __init__(self): 610 | super(MaskDump, self).__init__() 611 | self.fractionize = FractionizeMask() 612 | self.maskave = MaskAve() 613 | 614 | 615 | def run(self, dataset_path, anat_path, mask_paths, mask_area_strs=['l','r','b'], 616 | mask_area_codes=[[1,1],[2,2],[1,2]]): 617 | 618 | subject_path = os.path.split(dataset_path)[0] 619 | 620 | #if clean_type(mask_paths) in (list, tuple): 621 | # for mask in mask_paths: 622 | # subject_mask = self.fractionize.run(mask, dataset_path, anat_path) 623 | # self.maskave.run(subject_mask, dataset_path, mask_area_strs=mask_area_strs, 624 | # mask_area_codes=mask_area_codes) 625 | 626 | 627 | for maskp in mask_paths: 628 | subject_mask = self.fractionize(maskp, dataset_path, anat_path) 629 | self.maskave(subject_mask, dataset_path, mask_area_strs=mask_area_strs, 630 | mask_area_codes=mask_area_codes) 631 | 632 | 633 | def run_over_subjects(self, subject_dirs=None, functional_name=None, anatomical_name=None, 634 | mask_names=None, mask_dir=None, mask_area_strs=['l','r','b'], 635 | mask_area_codes=[[1,1],[2,2],[1,2]]): 636 | 637 | self.mask_names = mask_names 638 | self.functional_name = functional_name 639 | self.anatomical_name = anatomical_name 640 | self.mask_dir = mask_dir 641 | self.subject_dirs = subject_dirs 642 | 643 | for i, mask in enumerate(self.mask_names): 644 | if not mask.endswith('+tlrc'): 645 | self.mask_names[i] = mask+'+tlrc' 646 | 647 | mask_paths = [os.path.join(self.mask_dir, name) for name in self.mask_names] 648 | 649 | for dir in self.subject_dirs: 650 | dataset_path = os.path.join(dir, self.functional_name) 651 | anat_path = os.path.join(dir, self.anatomical_name) 652 | 653 | self.run(dataset_path, anat_path, mask_paths, mask_area_strs=mask_area_strs, 654 | mask_area_codes=mask_area_codes) 655 | 656 | 657 | 658 | class RegAna(AfniFunction): 659 | 660 | def __init__(self): 661 | super(AfniFunction, self).__init__() 662 | 663 | 664 | def __call__(self, subject_dirs, dataset_name, output_path, Xrows, modelX=[], null=[0], 665 | rmsmin=0): 666 | 667 | dataset_paths = [os.path.join(sdir, dataset_name) for sdir in subject_dirs] 668 | 669 | self._clean(output_path) 670 | 671 | if not modelX: 672 | modelX = [x+1 for x in range(len(Xrows[0]))] 673 | 674 | cols = len(Xrows[0]) 675 | rows = len(Xrows) 676 | 677 | cmd = ['3dRegAna', '-rows', str(rows), '-cols', str(cols)] 678 | 679 | for dset, sX in zip(dataset_paths, Xrows): 680 | subrow = ['-xydata']+[str(x) for x in sX]+[dset] 681 | cmd.extend(subrow) 682 | 683 | cmd.extend(['-rmsmin', str(rmsmin)]) 684 | cmd.extend(['-model']+[str(x) for x in modelX]+[':']+[str(x) for x in null]) 685 | cmd.extend(['-bucket','0',output_path]) 686 | 687 | subprocess.call(cmd) 688 | 689 | 690 | def write(self, scriptwriter, subjects, dataset_name, dataset_ind, output_name, Xrows, 691 | modelX=[], null=[0], rmsmin=0): 692 | 693 | header = '3dRegAna auto-script:' 694 | 695 | if not (dataset_name.endswith('+tlrc')) or not (dataset_name.endswith('+tlrc.')): 696 | dataset_name = dataset_name+'+tlrc' 697 | 698 | dataset_with_ind = dataset_name+'['+str(dataset_ind)+']' 699 | 700 | dataset_paths = [os.path.join('..', sub, '${regana_dataset}') for sub in subjects] 701 | 702 | if not modelX: 703 | modelX = [x+1 for x in range(len(Xrows[0]))] 704 | 705 | cols = len(Xrows[0]) 706 | rows = len(Xrows) 707 | 708 | cmd = ['3dRegAna -rows '+str(rows)+' -cols '+str(cols)+' \\'] 709 | subcmd = [] 710 | for dset, sX in zip(dataset_paths, Xrows): 711 | subrow = '-xydata '+' '.join([str(x) for x in sX])+' '+dset+' \\' 712 | subcmd.append(subrow) 713 | 714 | 715 | subcmd.append('-rmsmin '+str(rmsmin)) 716 | subcmd.append('-model '+' '.join([str(x) for x in modelX])+' : '+' '.join([str(x) for x in null])) 717 | subcmd.append('-bucket 0 '+output_name) 718 | 719 | cmd.append(subcmd) 720 | 721 | regana_vars = {'regana_dataset':dataset_with_ind} 722 | 723 | section = {'header':header, 'command':cmd, 'variables':regana_vars} 724 | 725 | scriptwriter.add_section(section) 726 | 727 | 728 | 729 | 730 | 731 | 732 | class AfniWrapper(object): 733 | 734 | def __init__(self): 735 | super(AfniWrapper, self).__init__() 736 | 737 | self.maskave = MaskAve() 738 | self.fractionize = FractionizeMask() 739 | self.maskdump = MaskDump() 740 | self.talairachwarp = TalairachWarp() 741 | self.highpassfilter = HighpassFilter() 742 | self.normalize = NormalizePSC() 743 | self.smooth = Smooth() 744 | self.volreg = Volreg() 745 | self.tcatdsets = TcatDatasets() 746 | self.tcatbuffer = TcatBuffer() 747 | self.tshift = Tshift() 748 | self.refit = Refit() 749 | self.refit_apar = RefittoParent() 750 | self.copy3d = Copy3d() 751 | self.adwarp = Adwarp() 752 | self.automask = Automask() 753 | self.afnitonifti = AfnitoNifti() 754 | self.info = Info() 755 | self.regana = RegAna() 756 | 757 | 758 | 759 | 760 | 761 | 762 | 763 | 764 | -------------------------------------------------------------------------------- /graphnet_sa/baseclasses.py: -------------------------------------------------------------------------------- 1 | 2 | import sys, os 3 | 4 | class Process(object): 5 | 6 | def __init__(self, variable_dict=None): 7 | super(Process, self).__init__() 8 | if variable_dict: 9 | self._instantiate(variable_dict) 10 | 11 | 12 | def _instantiate(self, variable_dict): 13 | for key, value in variable_dict.items(): 14 | setattr(self, key, value) 15 | 16 | 17 | def _apply_defaults(self, defaults): 18 | for key, value in defaults.items(): 19 | if getattr(self, key, None) is None: 20 | setattr(self, key, value) 21 | elif key in variable_dict: 22 | if variable_dict[key] is None: 23 | setattr(self, key, value) 24 | 25 | 26 | def _assign_variables(self, variable_dict): 27 | for key, value in variable_dict.items(): 28 | if value is not None: 29 | setattr(self, key, value) 30 | 31 | 32 | def _check_variables(self, variable_dict): 33 | proceed = True 34 | for key in variable_dict.keys(): 35 | if getattr(self, key, None) is None: 36 | print 'ERROR: %s is set to None.' % key 37 | proceed = False 38 | return proceed 39 | 40 | 41 | -------------------------------------------------------------------------------- /graphnet_sa/crossvalidation.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import random 4 | import itertools 5 | import functools 6 | from pprint import pprint 7 | from process import Process 8 | 9 | 10 | 11 | 12 | class CVObject(Process): 13 | 14 | 15 | def __init__(self, variable_dict=None, data_obj=None): 16 | super(CVObject, self).__init__(variable_dict=variable_dict) 17 | self.crossvalidator = Crossvalidation() 18 | if data_obj: 19 | self.data = data_obj 20 | self.subject_indices = getattr(self.data, 'subject_indices', None) 21 | self.indices_dict = self.subject_indices 22 | else: 23 | self.data = None 24 | self.crossvalidation_ready = False 25 | 26 | 27 | def set_folds(self, folds): 28 | self.crossvalidator.folds = folds 29 | self.folds = folds 30 | 31 | 32 | def replace_Y_vals(self, Y, original_val, new_val): 33 | replace = lambda val: new_val if (val==original_val) else val 34 | return np.array([replace(v) for v in Y]) 35 | 36 | 37 | def replace_Y_negative_ones(self): 38 | self.Y = self.replace_Y_vals(self.Y, -1., 0.) 39 | 40 | 41 | def prepare_folds(self, indices_dict=None, folds=None, leave_mod_in=False): 42 | # indices dict must be a python dictionary with keys corresponding to 43 | # some kind of grouping (typically keys for subjects/brains). 44 | # the values for those keys in the dict are the indices of the X and Y 45 | # matrices in the data object "attached" to these subjects. 46 | # this allows for flexible and decently clear upsampling, downsampling, 47 | # and crossvalidation across folds of these "keys" 48 | 49 | # if no indices dict specified, try and get it from self.data's 50 | # subject_indices, assuming it has been made. 51 | if not indices_dict: 52 | if not self.data: 53 | print 'Unable to find indices_dict, quitting crossvalidation preparation' 54 | return False 55 | else: 56 | if not getattr(self.data, 'subject_indices', None): 57 | print 'Unable to find indices_dict, quitting crossvalidation preparation' 58 | return False 59 | else: 60 | indices_dict = self.data.subject_indices 61 | 62 | # set folds: 63 | if folds: 64 | self.set_folds(folds) 65 | else: 66 | self.folds = None 67 | 68 | # have the crossvalidator object make training and testing dicts: 69 | self.crossvalidator.create_crossvalidation_folds(indices_dict=indices_dict, 70 | folds=self.folds, 71 | leave_mod_in=leave_mod_in) 72 | 73 | # reassign variables of CVObject from the crossvalidator: 74 | self.folds = self.crossvalidator.folds 75 | self.train_dict = self.crossvalidator.train_dict 76 | self.test_dict = self.crossvalidator.test_dict 77 | 78 | #if (getattr(self, 'X', None) is not None) and (getattr(self, 'Y', None) is not None): 79 | print 'assigning to trainX, trainY, testX, testY...' 80 | #self.cv_group_XY(self.X, self.Y) 81 | self.cv_group_XY() 82 | self.crossvalidation_ready = True 83 | #else: 84 | # print 'X and Y matrices, unset.. run cv_group_XY(X, Y) when ready to get cv groups' 85 | 86 | 87 | return True 88 | 89 | 90 | def subselect(self, data, indices): 91 | return np.array([data[i].tolist() for i in indices]) 92 | 93 | 94 | def subselect_from_memmap(self, indices, X_memmap_path=None, X_memmap_shape=None, 95 | verbose=True): 96 | 97 | if X_memmap_path: 98 | self.data.X_memmap_path = X_memmap_path 99 | if X_memmap_shape: 100 | self.data.X_memmap_shape = X_memmap_shape 101 | 102 | #bm = raw_input('Before memmap') 103 | X_memmap = np.memmap(self.data.X_memmap_path, dtype='float64', mode='r', shape=self.data.X_memmap_shape) 104 | 105 | if verbose: 106 | print X_memmap.shape, np.sum(X_memmap) 107 | 108 | #bf = raw_input('Before subset') 109 | subset = np.array([X_memmap[i] for i in indices]) 110 | 111 | if verbose: 112 | print subset.shape, np.sum(subset) 113 | #print 'original X:' 114 | #origX_subset = np.array([self.X[i].tolist() for i in indices]) 115 | #print np.sum(origX_subset) 116 | 117 | #bd = raw_input('Before delete') 118 | del X_memmap 119 | #ad = raw_input('After Delete') 120 | 121 | return subset 122 | 123 | 124 | def cv_group_XY(self): 125 | if getattr(self, 'train_dict', None) and getattr(self, 'test_dict', None): 126 | fold_inds = self.train_dict.keys() 127 | assert fold_inds == self.test_dict.keys() 128 | 129 | #self.trainX = [self.subselect(X, self.train_dict[tg]) for tg in fold_inds] 130 | #self.trainY = [self.subselect(Y, self.train_dict[tg]) for tg in fold_inds] 131 | #self.testX = [self.subselect(X, self.test_dict[tg]) for tg in fold_inds] 132 | #self.testY = [self.subselect(Y, self.test_dict[tg]) for tg in fold_inds] 133 | 134 | self.trainX = [self.train_dict[tg] for tg in fold_inds] 135 | self.trainY = [self.train_dict[tg] for tg in fold_inds] 136 | self.testX = [self.test_dict[tg] for tg in fold_inds] 137 | self.testY = [self.test_dict[tg] for tg in fold_inds] 138 | 139 | self.subjects_in_folds = [self.crossvalidator.cv_sets[i] for i in fold_inds] 140 | 141 | self.crossvalidation_ready = True 142 | print 'completed groupings into trainX/Y, testX/Y' 143 | else: 144 | print 'Could not make train/test X Y matrices' 145 | 146 | 147 | 148 | def statsfunction_over_folds(self, statsfunction, Xgroups, Ygroups, **kwargs): 149 | # the statsfunction ported in must contain ONLY 2 NON-KEYWORD ARGUMENTS: 150 | # X data and Y data. The rest of the arguments MUST BE KEYWORDED. 151 | # you can pass the keyword arguments to this function that you would 152 | # have passed to the statsfunction originally. Note that the keyword 153 | # arguments (obviously) have to have the same name as they did in 154 | # statsfunction since they will be passed along to statsfunction soon 155 | # enough. 156 | results = [] 157 | for X, Y in zip(Xgroups, Ygroups): 158 | statspartial = functools.partial(statsfunction, X, Y, **kwargs) 159 | results.append([statspartial()]) 160 | return results 161 | 162 | 163 | 164 | 165 | 166 | def traintest_crossvalidator(self, trainfunction, testfunction, trainXgroups, 167 | trainYgroups, testXgroups, testYgroups, train_kwargs_dict={}, 168 | test_kwargs_dict={}, X=None, Y=None, use_memmap=False, verbose=True): 169 | 170 | if not use_memmap: 171 | if X: 172 | fullX = X 173 | elif hasattr(self, 'X'): 174 | fullX = self.X 175 | else: 176 | print 'no X (either specified or in class)' 177 | return False 178 | 179 | if Y: 180 | fullY = Y 181 | elif hasattr(self, 'Y'): 182 | fullY = self.Y 183 | else: 184 | print 'no Y (either specified or in class)' 185 | return False 186 | 187 | #print fullY 188 | 189 | trainresults = [] 190 | testresults = [] 191 | 192 | 193 | for trainX, trainY, testX, testY in zip(trainXgroups, trainYgroups, 194 | testXgroups, testYgroups): 195 | if verbose: 196 | print 'Crossvalidating next group.' 197 | 198 | # assert independence of indices: 199 | for train_index in trainX: 200 | assert train_index not in testX 201 | for dependent_index in trainY: 202 | assert dependent_index not in testY 203 | 204 | if not use_memmap: 205 | subX = self.subselect(fullX, trainX) 206 | else: 207 | subX = self.subselect_from_memmap(trainX) 208 | #nothing = raw_input('subX loaded.') 209 | 210 | #print fullY, trainY 211 | subY = self.subselect(fullY, trainY) 212 | #print subY 213 | #nothing = raw_input('subY loaded.') 214 | 215 | trainpartial = functools.partial(trainfunction, subX, subY, **train_kwargs_dict) 216 | trainresult = trainpartial() 217 | 218 | if not use_memmap: 219 | subX = self.subselect(fullX, testX) 220 | else: 221 | subX = self.subselect_from_memmap(testX) 222 | 223 | subY = self.subselect(fullY, testY) 224 | 225 | testpartial = functools.partial(testfunction, subX, subY, trainresult, **test_kwargs_dict) 226 | testresult = testpartial() 227 | 228 | if verbose: 229 | print 'this groups\' test result:' 230 | pprint(testresult) 231 | 232 | trainresults.append(trainresult) 233 | testresults.append(testresult) 234 | 235 | #if verbose: 236 | # print sum(testresults)/len(testresults) 237 | 238 | return trainresults, testresults 239 | 240 | 241 | 242 | 243 | class Crossvalidation(object): 244 | 245 | def __init__(self, indices_dict=None, folds=None): 246 | super(Crossvalidation, self).__init__() 247 | self.indices_dict = indices_dict 248 | self.folds = folds 249 | 250 | 251 | def chunker(self, indices, chunksize): 252 | # chunker splits indices into equal sized groups, returns dict with IDs: 253 | groups = [indices[i:i+chunksize] for i in range(0, len(indices), chunksize)] 254 | 255 | # assert that each group is the same size: 256 | for g in groups: 257 | assert len(g) == chunksize 258 | 259 | # assert that indices are not repeated in other groups: 260 | # for each group: 261 | for i, g in enumerate(groups): 262 | # for each group not the same number as g: 263 | for j, o in enumerate(groups): 264 | if j != i: 265 | # for each item in the other group: 266 | for o_x in o: 267 | # assert the item is not in the original group: 268 | assert o_x not in g 269 | 270 | cv_sets = {} 271 | for i, group in enumerate(groups): 272 | cv_sets[i] = group 273 | return cv_sets 274 | 275 | 276 | def excise_remainder(self, indices, folds): 277 | modulus = len(indices) % folds 278 | random.shuffle(indices) 279 | return indices[modulus:], indices[0:modulus] 280 | 281 | 282 | def generate_sets(self, cv_sets, perms, mod_keys, include_mod): 283 | 284 | train_dict = {} 285 | test_dict = {} 286 | 287 | self.test_subject_byfold = [] 288 | 289 | for p, groups in enumerate(perms): 290 | 291 | train_dict[p] = [] 292 | test_dict[p] = [] 293 | 294 | training_keys = [] 295 | testing_keys = [] 296 | 297 | for gkey in groups: 298 | training_keys.extend(cv_sets[gkey]) 299 | 300 | for tr_key in training_keys: 301 | train_dict[p].extend(self.indices_dict[tr_key]) 302 | 303 | for cv_key in cv_sets.keys(): 304 | if cv_key not in groups: 305 | testing_subjects = cv_sets[cv_key] 306 | if include_mod: 307 | testing_subjects.extend(mod_keys) 308 | 309 | for te_key in testing_subjects: 310 | test_dict[p].extend(self.indices_dict[te_key]) 311 | 312 | # assert that there are no repeated indices in the training and 313 | # testing sets for this fold: 314 | for test_index in test_dict[p]: 315 | assert test_index not in train_dict[p] 316 | 317 | self.test_subject_byfold.append(testing_subjects) 318 | 319 | return train_dict, test_dict 320 | 321 | 322 | 323 | def create_crossvalidation_folds(self, indices_dict=None, folds=None, leave_mod_in=False, 324 | verbose=True): 325 | 326 | self.folds = folds or getattr(self,'folds',None) 327 | self.indices_dict = indices_dict or getattr(self,'indices_dict',None) 328 | 329 | self.train_dict = {} 330 | self.test_dict = {} 331 | 332 | if self.indices_dict is None: 333 | print 'No indices dictionary provided, exiting...' 334 | return False 335 | 336 | index_keys = self.indices_dict.keys() 337 | if verbose: 338 | print index_keys 339 | 340 | if len(index_keys) == 1: 341 | print 'Cannot do crossvalidation with just 1 subject!' 342 | return False 343 | 344 | if self.folds is None: 345 | print 'Folds unset, defaulting to leave one out crossvalidation...' 346 | self.folds = len(index_keys) 347 | 348 | divisible_keys, remainder_keys = self.excise_remainder(index_keys, self.folds) 349 | 350 | for rk in remainder_keys: 351 | assert rk not in divisible_keys 352 | 353 | # cv_sets is a dict with group IDs and indices: 354 | self.cv_sets = self.chunker(divisible_keys, len(divisible_keys)/self.folds) 355 | 356 | # ensure that the number of sets is equal to the number of folds: 357 | assert len(self.cv_sets) == self.folds 358 | 359 | # find the permutations of the group IDs, leaving one out: 360 | set_permutations = itertools.combinations(self.cv_sets.keys(), len(self.cv_sets.keys())-1) 361 | 362 | 363 | self.train_dict, self.test_dict = self.generate_sets(self.cv_sets, set_permutations, 364 | remainder_keys, leave_mod_in) 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | -------------------------------------------------------------------------------- /graphnet_sa/directorytools.py: -------------------------------------------------------------------------------- 1 | 2 | import re, os, glob 3 | import shutil 4 | 5 | 6 | 7 | def dirs(topdir=os.getcwd(), prefixes=[], exclude=[], regexp=None, initial_glob='*'): 8 | 9 | files = [f for f in glob.glob(os.path.join(topdir,initial_glob)) if os.path.isdir(f)] 10 | if regexp: 11 | files = [f for f in files if re.search(regexp, os.path.split(f)[1])] 12 | files = [f for f in files if not any([os.path.split(f)[1].startswith(ex) for ex in exclude])] 13 | if prefixes: 14 | files = [f for f in files if any([os.path.split(f)[1].startswith(pr) for pr in prefixes])] 15 | 16 | return sorted(files) 17 | 18 | 19 | 20 | def subject_dirs(topdir=os.getcwd(), prefixes=[], exclude=[], initial_glob='*'): 21 | return dirs(topdir=topdir, prefixes=prefixes, exclude=exclude, 22 | initial_glob=initial_glob,regexp=r'[a-zA-Z]\d\d\d\d\d\d') 23 | 24 | 25 | 26 | 27 | def subjects(max_length=None, topdir=os.getcwd(), prefixes=[], exclude=[], 28 | initial_glob='*'): 29 | subjdirs=subject_dirs(topdir=topdir, prefixes=prefixes, exclude=exclude, 30 | initial_glob=initial_glob) 31 | if not max_length: 32 | return [os.path.split(x)[1] for x in subjdirs] 33 | else: 34 | return [os.path.split(x)[1][0:min(max_length,len(os.path.split(x)[1]))] for x in subjdirs] 35 | 36 | 37 | 38 | def consprint(input_list, python=True, bash=True): 39 | 40 | if python: 41 | print '[\''+'\',\''.join([str(x) for x in input_list])+'\']' 42 | 43 | if bash: 44 | print '( '+' '.join([str(x) for x in input_list])+' )' 45 | 46 | 47 | 48 | 49 | def glob_remove(file_prefix, suffix='*'): 50 | candidates = glob.glob(file_prefix+suffix) 51 | for c in candidates: 52 | try: 53 | os.remove(c) 54 | except: 55 | pass 56 | 57 | 58 | 59 | class DirectoryCleaner(object): 60 | 61 | def __init__(self, prefixes=[], exclude=[], topdir=None): 62 | super(DirectoryCleaner, self).__init__() 63 | self.topdir = topdir or os.getcwd() 64 | if prefixes: 65 | self.dirs = parse_dirs(topdir=self.topdir, prefixes=prefixes, 66 | exclude=exclude) 67 | else: 68 | self.dirs = subject_dirs(topdir=topdir, exclude=exclude) 69 | self.types, self.files = [], [] 70 | 71 | def walk_directories(self, function): 72 | for dir in self.dirs: 73 | os.chdir() 74 | self.files = glob.glob('./*') 75 | function() 76 | os.chdir('..') 77 | self.files = [] 78 | 79 | def action_flag(self, action): 80 | if action == 'remove': 81 | for file in self.files: 82 | for suffix in self.types: 83 | if file.endswith(suffix): os.remove(file) 84 | elif action == 'move': 85 | for flag in self.types: 86 | if not flag.endswith('HEAD') or not flag.endswith('BRIK'): 87 | dir_name = 'old_'+flag.strip('.') 88 | if not os.path.exists(dir_name): 89 | os.makedirs(dir_name) 90 | for file in self.files: 91 | if file.endswith(flag): shutil.move(file, dir_name) 92 | else: 93 | dir_name = 'old_afni' 94 | if not os.path.exists(dir_name): 95 | os.makedirs(dir_name) 96 | for file in self.files: 97 | if file.endswith(flag): shutil.move(file, dir_name) 98 | 99 | def remove(self, *args): 100 | print os.getcwd() 101 | if args: 102 | self.types = args 103 | print self.types 104 | if not self.files: 105 | self.walk_directories(self.remove) 106 | else: 107 | self.action_flag('rm') 108 | 109 | def move(self, *args): 110 | print os.getcwd() 111 | if args: 112 | self.types = args 113 | print self.types 114 | if not self.files: 115 | self.walk_directories(self.move) 116 | else: 117 | self.action_flag('mv') -------------------------------------------------------------------------------- /graphnet_sa/example_runner.py: -------------------------------------------------------------------------------- 1 | 2 | import os, sys, glob 3 | from directorytools import subjects as get_subjects 4 | from directorytools import subject_dirs as get_subject_dirs 5 | from graphnet_hook import GraphnetInterface, Gridsearch 6 | from datamanager import BrainData 7 | 8 | 9 | if __name__ == "__main__": 10 | 11 | ''' 12 | Examples of using the graphnet interface with nifti data. 13 | ''' 14 | 15 | #-----------------------------------------------------------# 16 | # Load in subject data: 17 | #-----------------------------------------------------------# 18 | 19 | thisdir = os.getcwd() 20 | subject_top_dir = os.path.split(thisdir)[0] 21 | 22 | subject_folder_names = ['subject1','subject2','subject3','subject4','subject5'] 23 | 24 | subject_directories = get_subject_dirs(topdir=subject_top_dir, 25 | prefixes=subject_folder_names) 26 | 27 | 28 | mask = os.path.join(subject_top_dir, 'mask.nii') 29 | 30 | functional_name = 'functional_warped.nii' 31 | trial_demarcation_vector_name = 'trial_markers.1D' 32 | 33 | lag = 2 34 | selected_trial_trs = [1,2,3,4,5] 35 | 36 | datamanager = BrainData() 37 | 38 | datamanager.make_masks(mask, len(selected_trial_trs)) 39 | 40 | datamanager.create_design(subject_directories, 41 | functional_name, 42 | trial_demarcation_vector_name, 43 | selected_trial_trs, 44 | lag=lag) 45 | 46 | datamanager.create_XY_matrices(downsample_type='subject', 47 | with_replacement=True, 48 | replacement_ceiling=36, 49 | Ybinary=[1.,-1]) 50 | 51 | datamanager.delete_subject_design() 52 | 53 | graphnet = GraphnetInterface(data_obj=datamanager) 54 | 55 | #-----------------------------------------------------------# 56 | # Basic usage and dumping coefficients: 57 | #-----------------------------------------------------------# 58 | 59 | graphnet.train_graphnet(datamanager.X, datamanager.Y, 60 | trial_mask=datamanager.trial_mask, 61 | l1=10., l2=100., l3=1000., delta=0.8, adaptive=True) 62 | 63 | coefs = graphnet.coefficients[0].copy() 64 | 65 | unmasked_coefs = datamanager.unmask_Xcoefs(coefs, len(selected_trial_trs), 66 | slice_off_back=datamanager.X.shape[0]) 67 | 68 | datamanager.save_unmasked_coefs(unmasked_coefs, 'graphnet_coef_map') 69 | 70 | 71 | #-----------------------------------------------------------# 72 | # Crossvalidation: 73 | #-----------------------------------------------------------# 74 | 75 | 76 | train_keyword_args = {'trial_mask':datamanager.trial_mask, 77 | 'l1':10., 'l2':100., 'l3':1000., 'delta':0.8, 78 | 'adaptive':True} 79 | 80 | graphnet.setup_crossvalidation(folds=5, leave_mod_in=True) 81 | 82 | graphnet.crossvalidate(train_keyword_args) 83 | 84 | 85 | #-----------------------------------------------------------# 86 | # Gridsearching: 87 | #-----------------------------------------------------------# 88 | 89 | gridsearch = Gridsearch() 90 | gridsearch.folds = 5 91 | gridsearch.initial_l1_min = 10. 92 | gridsearch.initial_l1_max = 60. 93 | gridsearch.l1_stepsizes = [5.,3.,1.] 94 | gridsearch.deltas = [.3,.5,.7] 95 | 96 | gridsearch.zoom_gridsearch(graphnet, 97 | name='graphnet_gridsearch', 98 | adaptive=True) 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /graphnet_sa/graphnet_hook.py: -------------------------------------------------------------------------------- 1 | 2 | import os, sys, time 3 | import numpy as np 4 | import scipy.optimize 5 | from nose.tools import * 6 | import h5py 7 | #import matplotlib 8 | import pylab as pl 9 | pl.ion() 10 | from nipy.io.api import load_image 11 | from nipy.core.api import Image 12 | 13 | 14 | ''' TODO: THESE IMPORTS NEED TO BE FIXED: 15 | 16 | from optimization.cwpath import cwpath, strategy 17 | from optimization.cwpath.cwpath import inner1d 18 | from optimization.graphs.graph_laplacian import construct_adjacency_list 19 | 20 | path_to_graphnetC_packages = os.path.abspath('./graphnetc/.') 21 | sys.path.append(path_to_graphnetC_packages) 22 | import graphnet 23 | ''' 24 | 25 | from datamanager import NiftiTools, simple_normalize 26 | from crossvalidation import CVObject 27 | 28 | 29 | 30 | 31 | 32 | def adj_from_nii(maskfile,num_time_points,numt=0,numx=1,numy=1,numz=1,regions=None): 33 | """ 34 | Construct adjacency array from .nii mask file 35 | 36 | INPUT: 37 | 38 | maskfile: Path to mask file (.nii) 39 | 40 | Other parameters are passed directly to prepare_adj (see that function for docs) 41 | 42 | OUTPUT: 43 | 44 | adj: An array containing adjacency information 45 | """ 46 | mask = load_image(maskfile)._data 47 | newmask = np.zeros(np.append(num_time_points,mask.shape)) 48 | for i in range(num_time_points): 49 | newmask[i] = mask 50 | adj = prepare_adj(newmask,numt,numx,numy,numz,regions) 51 | adj = convert_to_array(adj) 52 | return adj 53 | 54 | 55 | 56 | 57 | def prepare_adj(mask,numt=0,numx=1,numy=1,numz=1,regions=None, gm_mask=None, verbose=True): 58 | """ 59 | Return adjacency list, where the voxels are considered 60 | neighbors if they fall in a ball of radius numt, numx, numy, and numz 61 | for time, x position, y position, and z position respectively. 62 | 63 | INPUT: 64 | 65 | X: a 5-dimensional ndarray. The first index is trial, the second index is time, 66 | the third index is x position, the fourth index is y position and the fifth 67 | position is z position. 68 | 69 | mask: a binary 4-dimensional ndarray, the same size as X[0,:,:,:,:] where 70 | 1 indicates that the voxel-timepoint is included and 0 indicates that it is 71 | excluded. NOTE: Usually the mask is thought of as a 3-dimensional ndarray, since 72 | it is uniform across time. 73 | 74 | regions: a multivalued array the same size as the mask that indicates different 75 | regions in the spatial structure. No adjacency edges will be made across region 76 | boundaries. 77 | 78 | numt: an integer, the radius of the "neighborhood ball" in the t direction 79 | numx: an integer, the radius of the "neighborhood ball" in the x direction 80 | numy: an integer, the radius of the "neighborhood ball" in the y direction 81 | numz: an integer, the radius of the "neighborhood ball" in the z direction 82 | 83 | OUTPUT: 84 | 85 | newX: The matrix X reshaped as a 2-dimensional array for analysis 86 | adj: The adjacency list associated with newX 87 | 88 | """ 89 | 90 | #Create map going from X to predictor vector indices. The entries of 91 | # this array are -1 if the voxel is not included in the mask, and the 92 | # index in the new predictor corresponding to the voxel if the voxel 93 | # is included in the mask. 94 | 95 | if regions == None: 96 | regions = np.zeros(mask.shape) 97 | regions.shape = mask.shape 98 | reg_values = np.unique(regions) 99 | 100 | vmap = np.cumsum(mask).reshape(mask.shape) 101 | mask = np.bool_(mask.copy()) 102 | vmap[~mask] = -1 103 | vmap -= 1 # now vmap's values run from 0 to mask.sum()-1 104 | 105 | 106 | if gm_mask is not None: 107 | gm = True 108 | else: 109 | gm = False 110 | 111 | if verbose: 112 | if gm: 113 | print 'Constructing adjacency matrix with greymatter mask' 114 | 115 | # Create adjacency list 116 | 117 | adj = [] 118 | #gm_adj = [] 119 | 120 | nt,nx,ny,nz = mask.shape 121 | 122 | for t in range(nt): 123 | for i in range(nx): 124 | for j in range(ny): 125 | for k in range(nz): 126 | if mask[t,i,j,k]: 127 | local_map = vmap[max((t-numt),0):(t+numt+1), 128 | max((i-numx),0):(i+numx+1), 129 | max((j-numy),0):(j+numy+1), 130 | max((k-numz),0):(k+numz+1)] 131 | 132 | if gm: 133 | local_gm = gm_mask[max((t-numt),0):(t+numt+1), 134 | max((i-numx),0):(i+numx+1), 135 | max((j-numy),0):(j+numy+1), 136 | max((k-numz),0):(k+numz+1)] 137 | 138 | local_reg = regions[max((t-numt),0):(t+numt+1), 139 | max((i-numx),0):(i+numx+1), 140 | max((j-numy),0):(j+numy+1), 141 | max((k-numz),0):(k+numz+1)] 142 | 143 | region = regions[t,i,j,k] 144 | ind = (local_map>-1)*(local_reg == region) 145 | ind = np.bool_(ind) 146 | adjrow = np.array(local_map[ind], dtype=int) 147 | 148 | if gm: 149 | gmrow = np.array(local_gm[ind], dtype=float) 150 | adj.append([[adjr, gmr] for adjr, gmr in zip(adjrow, gmrow)]) 151 | 152 | else: 153 | adj.append([[adjr, 1.] for adjr in adjrow]) 154 | #else: 155 | #gmrow = np.ones(len(adjrow), dtype=float) 156 | 157 | 158 | 159 | #adj.append(np.array(local_map[ind],dtype=int)) 160 | #if gm: 161 | # gm_adj.append(np.array(vgm_mask[ind], dtype=float)) 162 | #adj.append(local_map[ind]) 163 | 164 | 165 | #accum = [] 166 | #for i, a in enumerate(adj): 167 | # for [ax, g] in a: 168 | # accum.append(g) 169 | 170 | #print np.unique(g), np.unique(vgm_mask) 171 | #print np.sum(g), np.sum(vgm_mask) 172 | #stop 173 | 174 | for i, a in enumerate(adj): 175 | for j, [ax, g] in enumerate(a): 176 | if ax == i: 177 | a[j] = [-1, g] 178 | adj[i] = a 179 | 180 | #a[np.equal(a,i)] = -1 181 | #adj[i] = a.tolist() 182 | 183 | #if gm: 184 | # for i, g in enumerate(gm_adj): 185 | # gm_adj[i] = g.tolist() 186 | #return convert_to_array(adj) 187 | 188 | #if gm: 189 | # return adj, gm_adj 190 | #else: 191 | # return adj 192 | 193 | 194 | return adj 195 | 196 | 197 | 198 | 199 | def convert_to_array(adj): 200 | num_ind = np.max([len(a) for a in adj]) 201 | adjarray = -np.ones((len(adj),num_ind)) 202 | for i in range(len(adj)): 203 | for j in range(len(adj[i])): 204 | adjarray[i,j] = adj[i][j] 205 | return adjarray 206 | 207 | 208 | def test_prep(nt=0,nx=1,ny=1,nz=1): 209 | """ 210 | Let's make this into a proper test...... what should newa, adj be in this case? 211 | """ 212 | a = np.array(range(1,1+2*3*4*4*4)).reshape((2,3,4,4,4)) 213 | mask = a[0]*0 214 | mask[:,0,0,0] = 1 215 | mask[:,1,1,:] = 1 216 | # print mask[0] 217 | # print a[0,0] 218 | adj = prepare_adj(mask,nt,nx,ny,nz) 219 | # print newa[0,0], adj[0], newa[0,adj[0]] 220 | 221 | 222 | class GraphnetInterface(CVObject): 223 | 224 | 225 | def __init__(self, data_obj=None, variable_dict=None, folds=None): 226 | super(GraphnetInterface, self).__init__(variable_dict=variable_dict, data_obj=data_obj) 227 | self.set_folds(folds) 228 | self.niftitools = NiftiTools() 229 | 230 | self.X = getattr(self.data, 'X', None) 231 | self.Y = getattr(self.data, 'Y', None) 232 | self.trial_mask = getattr(self.data, 'trial_mask', None) 233 | if not self.Y is None: 234 | self.replace_Y_negative_ones() 235 | self.indices_dict = getattr(self.data, 'subject_trial_indices', None) 236 | 237 | 238 | def huber(self, r, delta): 239 | r = np.fabs(r) 240 | t = np.greater(r, delta) 241 | return (1-t)*r**2 + t*(2*delta*r - delta**2) 242 | 243 | 244 | def huber_svm(self, r, delta): 245 | t1 = np.greater(r, delta) 246 | t2 = np.greater(r, 0) 247 | return t1*(r - delta/2) + (1-t1)*t2*(r**2/(2*delta)) 248 | 249 | 250 | def huber_svm_error(self, beta, Y, Xp2, delta): 251 | r = 1-Y*np.dot*(Xp2, beta) 252 | return self.huber(r, delta) 253 | 254 | 255 | def get_lambda_max(self, X, y): 256 | """ 257 | Find the value of lambda at which all coefficients are set to zero 258 | by finding the minimum value such that 0 is in the subdifferential 259 | and the coefficients are all zero. 260 | """ 261 | subgrads = np.fabs(inner1d(X.T, y)) 262 | return np.max(subgrads) 263 | 264 | ''' 265 | def adj_array_as_list(self, adj): 266 | v = [] 267 | for a in adj: 268 | v.append(a[np.greater(a, -1)]) 269 | return v 270 | 271 | 272 | def gen_adj(self, p): 273 | print 'generating adjacency matrix' 274 | Afull = np.zeros((p, p), dtype=int) 275 | A = -np.ones((p, p), dtype=int) 276 | counts = np.zeros(p) 277 | for i in range(p): 278 | for j in range(p): 279 | if np.random.uniform(0, 1) < 0.3: 280 | if i != j: 281 | if Afull[i,j] == 0: 282 | Afull[i,j] = -1 283 | Afull[j,i] = -1 284 | Afull[i,i] += 1 285 | Afull[j,j] += 1 286 | A[i, counts[i]] = j 287 | A[j, counts[j]] = i 288 | counts[i] += 1 289 | counts[j] += 1 290 | return self.adj_array_as_list(A), Afull 291 | ''' 292 | 293 | def regression_type_selector(self, l1, l2, l3, delta, svmdelta): 294 | print l1, l2, l3, delta, svmdelta 295 | l1b = all(l1) 296 | if (l1b != False) and (l2 != None) and (l3 != None) and (delta != None) and (svmdelta != None): 297 | return 'HuberSVMGraphNet' 298 | elif (l1b != False) and (l2 != None) and (l3 != None) and (delta != None): 299 | return 'RobustGraphNet' 300 | elif (l1b != False) and (l2 != None) and (l3 != None): 301 | return 'NaiveGraphNet' 302 | elif (l1b != False) and (l2 != None): 303 | return 'NaiveENet' 304 | elif (l1b != False): 305 | return 'Lasso' 306 | else: 307 | return None 308 | 309 | 310 | def setup_crossvalidation(self, folds=None, subject_indices=None, leave_mod_in=False): 311 | if subject_indices: 312 | self.subject_indices = subject_indices 313 | if getattr(self, 'subject_indices', None): 314 | self.prepare_folds(folds=folds, indices_dict=self.subject_indices, leave_mod_in=leave_mod_in) 315 | else: 316 | print 'no subject indices set, cant setup cv folds' 317 | 318 | 319 | def crossvalidate(self, train_kwargs_dict, use_memmap=False): 320 | 321 | trainresults, testresults = self.traintest_crossvalidator(self.train_graphnet, 322 | self.test_graphnet, 323 | self.trainX, self.trainY, 324 | self.testX, self.testY, 325 | train_kwargs_dict=train_kwargs_dict, 326 | use_memmap=use_memmap) 327 | 328 | self.accuracies = testresults 329 | self.average_accuracies = [] 330 | for i in range(len(self.accuracies[0])): 331 | accs = [] 332 | for j in range(len(self.accuracies)): 333 | accs.append(self.accuracies[j][i]) 334 | self.average_accuracies.append(sum(accs)/len(accs)) 335 | 336 | #self.average_accuracies = [sum(x)/len(x) for x in self.accuracies] 337 | print 'Average accuracies: ', self.average_accuracies 338 | 339 | # trainresults: list of coefficients for each l1 by fold 340 | # AS OF NOW JUST TAKING THE COEFS FOR ONE OF THE FOLDS: 341 | sub_tresults = trainresults[0] 342 | self.non_zero_coefs = [len([x for x in tr if x != 0.]) for tr in sub_tresults] 343 | 344 | return self.accuracies, self.average_accuracies, self.non_zero_coefs 345 | 346 | 347 | 348 | def test_graphnet(self, X, Y, coefs): 349 | 350 | X = simple_normalize(X) 351 | accuracies = [] 352 | 353 | for i, coefset in enumerate(coefs): 354 | 355 | correct = [] 356 | print 'Checking accuracy for test group' 357 | 358 | if self.problemkey == 'RobustGraphNet': 359 | coefset = coefset[:-self.trainX_shape[0]] 360 | 361 | for trial, outcome in zip(X, Y): 362 | predict = trial*coefset 363 | #print np.sum(predict) 364 | Ypredsign = np.sign(np.sum(predict)) 365 | if Ypredsign < 0.: 366 | Ypredsign = 0. 367 | else: 368 | Ypredsign = 1. 369 | #print Ypredsign, outcome, (Ypredsign == outcome) 370 | correct.append(Ypredsign == outcome) 371 | 372 | fold_accuracy = np.sum(correct) * 1. / len(correct) 373 | 374 | print 'coef number:', i 375 | print 'fold accuracy: ', fold_accuracy 376 | accuracies.append(fold_accuracy) 377 | 378 | 379 | return accuracies 380 | 381 | 382 | def train_graphnet(self, X, Y, trial_mask=None, G=None, l1=None, l2=None, l3=None, delta=None, 383 | svmdelta=None, initial=None, adaptive=False, svm=False, 384 | scipy_compare=False, tol=1e-5, greymatter_mask=None, initial_l1weights=None, 385 | use_adj_time=True): 386 | 387 | if not type(l1) in [list, tuple]: 388 | l1 = [l1] 389 | 390 | X = simple_normalize(X) 391 | 392 | tic = time.clock() 393 | 394 | #problemkey = self.regression_type_selector(*[bool(x) for x in [l1, l2, l3, delta, svmdelta]]) 395 | 396 | problemkey = self.regression_type_selector(l1, l2, l3, delta, svmdelta) 397 | 398 | self.problemkey = problemkey 399 | self.trainX_shape = X.shape 400 | 401 | if problemkey in ('HuberSVMGraphNet', 'RobustGraphNet', 'NaiveGraphNet'): 402 | if G is None: 403 | #nx = 60 404 | #ny = 60 405 | #A, Afull = construct_adjacency_list(nx, ny, 1, return_full=True) 406 | #A, Afull = self.gen_adj(X.shape[1]) 407 | #if greymatter_mask is not None: 408 | # A, GMA = prepare_adj(trial_mask, numt=1, gm_mask=greymatter_mask) 409 | #else: 410 | # A = prepare_adj(trial_mask, numt=1) 411 | # GMA = None 412 | if use_adj_time: 413 | A = prepare_adj(trial_mask, numt=1, gm_mask=greymatter_mask) 414 | else: 415 | A = prepare_adj(trial_mask, numt=0, gm_mask=greymatter_mask) 416 | 417 | else: 418 | A = G.copy() 419 | 420 | if initial_l1weights is not None: 421 | newl1 = l1 422 | else: 423 | newl1 = None 424 | 425 | if problemkey is 'RobustGraphNet': 426 | problemtype = graphnet.RobustGraphNet 427 | print 'Robust GraphNet with penalties (l1, l2, l3, delta): ', l1, l2, l3, delta 428 | l = cwpath.CoordWise((X, Y, A), problemtype, initial_coefs=initial)#, gma=GMA) 429 | l.problem.assign_penalty(path_key='l1', l1=l1, l2=l2, l3=l3, delta=delta, l1weights=initial_l1weights, 430 | newl1=newl1) 431 | 432 | elif problemkey is 'HuberSVMGraphNet': 433 | problemtype = graphnet.GraphSVM 434 | print 'HuberSVM GraphNet with penalties (l1, l2, l3, delta): ', l1, l2, l3, delta 435 | Y = 2*np.round(np.random.uniform(0, 1, len(Y)))-1 436 | l = cwpath.CoordWise((X, Y, A), problemtype)#, gma=GMA) 437 | l.problem.assign_penalty(path_key='l1', l1=l1, l2=l2, l3=l3, delta=delta, l1weights=initial_l1weights, 438 | newl1=newl1) 439 | 440 | elif problemkey is 'NaiveGraphNet': 441 | problemtype = graphnet.NaiveGraphNet 442 | print 'Testing GraphNet with penalties (l1, l2, l3): ', l1, l2, l3 443 | l = cwpath.CoordWise((X, Y, A), problemtype, initial_coefs=initial)#, gma=GMA) 444 | l.problem.assign_penalty(path_key='l1', l1=l1, l2=l2, l3=l3, l1weights=initial_l1weights, 445 | newl1=newl1) 446 | 447 | elif problemkey is 'NaiveENet': 448 | problemtype = graphnet.NaiveENet 449 | print 'Testing ENET with penalties (l1, l2): ', l1, l2 450 | l = cwpath.CoordWise((X, Y), problemtype, initial_coefs=initial) 451 | l.problem.assign_penalty(path_key='l1', l1=l1, l2=l2, l1weights=initial_l1weights, 452 | newl1=newl1) 453 | 454 | elif problemkey is 'Lasso': 455 | problemtype = graphnet.Lasso 456 | print 'Testing LASSO with penalty (l1): ', l1 457 | l = cwpath.CoordWise((X, Y), problemtype, initial_coefs=initial) 458 | l.problem.assign_penalty(path_key='l1', l1=l1, l1weights=initial_l1weights, newl1=newl1) 459 | 460 | else: 461 | print 'Incorrect parameters set (no problem key).' 462 | return False 463 | 464 | # Solve the problem: 465 | print 'Solving the problem...' 466 | 467 | coefficients, residuals = l.fit(tol=tol, initial=initial) 468 | 469 | self.coefficients = coefficients 470 | self.residuals = residuals 471 | 472 | print '\t---> Fitting problem with coordinate decesnt took: ', time.clock()-tic, 'seconds.' 473 | 474 | if adaptive: 475 | tic = time.clock() 476 | safety = 1e-5 477 | l1weights = 1./(self.coefficients[-1]+safety) 478 | l = cwpath.CoordWise((X, Y, A), problemtype, initial_coefs=initial) 479 | l.problem.assign_penalty(path_key='l1', l1=l1, l2=l2, l3=l3, delta=delta, l1weights=l1weights, newl1=l1) 480 | adaptive_coefficients, adaptive_residuals = l.fit(tol=tol, initial=initial) 481 | print '\t---> Fitting Adaptive GraphNet problem with coordinate descent took: ', time.clock()-tic, 'seconds.' 482 | 483 | self.firstpass_coefficients = self.coefficients 484 | self.firstpass_residuals = self.residuals 485 | self.coefficients = adaptive_coefficients 486 | self.residuals = adaptive_residuals 487 | 488 | ''' 489 | if scipy_compare: 490 | 491 | l1 = l1[-1] 492 | beta = self.coefficients[-1] 493 | 494 | print '\t---> Fitting with scipy for comparison...' 495 | 496 | tic = time.clock() 497 | 498 | if problemkey is 'RobustGraphNet': 499 | def f(beta): 500 | huber_sum = self.huber(Y - np.dot(X, beta), delta).sum()/2 501 | beta_l1 = l1*np.dot(np.fabs(beta), l1weights) 502 | beta_l2 = l2*np.linalg.norm(beta)**2/2 503 | beta_l3 = l3*np.dot(beta, np.dot(Afull, beta))/2 504 | return huber_sum + beta_l1 + beta_l2 + beta_l3 505 | 506 | elif problemkey is 'HuberSVMGraphNet': 507 | Xp2 = np.hstack([np.ones(X.shape[0])[:,np.newaxis], X]) 508 | def f(beta): 509 | ind = range(1, len(beta)) 510 | huber_err_sum = self.huber_svm_error(beta, Y, Xp2, delta).sum() 511 | beta_l1 = np.fabs(beta[ind]).sum()*l1 512 | beta_l2 = l2*(np.linalg.norm(beta[ind])**2/2) 513 | beta_l3 = l3*(np.dot(beta[ind], np.dot(Afull, beta[ind])))/2 514 | return huber_error_sum + beta_l1 + beta_l2 + beta_l3 515 | 516 | elif problemkey is 'NaiveGraphNet': 517 | def f(beta): 518 | beta_XY = np.linalg.norm(Y - np.dot(X, beta))**2/2 519 | beta_l1 = l1*np.fabs(beta).sum() 520 | beta_l2 = l2*np.linalg.norm(beta)**2/2 521 | beta_l3 = l3*np.dot(beta, np.dot(Afull, beta))/2 522 | return beta_XY + beta_l1 + beta_l2 + beta_l3 523 | 524 | elif problemkey is 'NaiveENet': 525 | def f(beta): 526 | beta_XY = np.linalg.norm(Y - np.dot(X, beta))**2/2 527 | beta_l1 = l1*np.fabs(beta).sum() 528 | beta_l2 = np.linalg.norm(beta)**2/2 529 | 530 | elif problemkey is 'Lasso': 531 | def f(beta): 532 | beta_XY = np.linalg.norm(Y - np.dot(X, beta))**2/2 533 | beta_l1 = l1*np.fabs(beta).sum() 534 | 535 | if problemkey is 'HuberSVMGraphNet': 536 | v = scipy.optimize.fmin_powell(f, np.zeros(Xp2.shape[1]), ftol=1.0e-14, xtol=1.0e-14, maxfun=100000) 537 | else: 538 | v = scipy.optimize.fmin_powell(f, np.zeros(X.shape[1]), ftol=1.0e-10, xtol=1.0e-10, maxfun=100000) 539 | 540 | v = np.asarray(v) 541 | 542 | print '\t---> Fitting GraphNet with scipy took: ', time.clock()-tic, 'seconds.' 543 | 544 | assert_true(np.fabs(f(v) - f(beta)) / np.fabs(f(v) + f(beta)) < tol) 545 | if np.linalg.norm(v) > 1e-8: 546 | assert_true(np.linalg.norm(v - beta) / np.linalg.norm(v) < tol) 547 | else: 548 | assert_true(np.linalg.norm(beta) < 1e-8) 549 | 550 | print '\t---> Coordinate-wise and Scipy optimization agree.' 551 | ''' 552 | 553 | 554 | return self.coefficients 555 | 556 | 557 | 558 | 559 | class Gridsearch(object): 560 | 561 | def __init__(self, savedir=os.getcwd()): 562 | super(Gridsearch, self).__init__() 563 | self.verbose = True 564 | self.savedir = savedir 565 | 566 | #self.l1_range = [] 567 | #self.l2_range = [] 568 | #self.l3_range = [] 569 | 570 | self.folds = 5 571 | 572 | self.searches = [] 573 | 574 | st = time.localtime() 575 | timestr = str(st.tm_mon)+'_'+str(st.tm_mday)+'_'+str(st.tm_hour)+'_'+str(st.tm_min) 576 | 577 | self.logfile_name = 'fgrid_'+timestr+'.json' 578 | 579 | self.records = {} 580 | 581 | 582 | def generate_l1_values(self, l1_lower, l1_upper, granularity, round_to_int=True, 583 | inclusive_max=True): 584 | 585 | distance = float(l1_upper)-float(l1_lower) 586 | 587 | step = distance*granularity 588 | if round_to_int: 589 | step = round(step) 590 | 591 | if inclusive_max: 592 | l1_values = [l1_lower+(x*step) for x in range(int(round(1.*granularity))+1)] 593 | else: 594 | l1_values = [l1_lower+(x*step) for x in range(int(round(1.*granularity)))] 595 | 596 | if self.verbose: 597 | print 'l1_range:', l1_lower, l1_upper 598 | print 'distance:', distance 599 | print 'granularity:', granularity 600 | print 'step size:', step 601 | print 'l1 values:', l1_values 602 | 603 | return l1_values, step 604 | 605 | 606 | def simple_generate_l1_range(self, l1min, l1max, stepsize, no_negative=True): 607 | 608 | l1min, l1max, stepsize = float(l1min), float(l1max), float(stepsize) 609 | 610 | l1_range = [l1min] 611 | while l1_range[-1]+stepsize < l1max: 612 | l1_range.append(l1_range[-1]+stepsize) 613 | l1_range.append(l1max) 614 | 615 | if no_negative: 616 | l1_range = [x for x in l1_range if x > 0.] 617 | 618 | return l1_range 619 | 620 | 621 | 622 | def log_progress(self): 623 | 624 | jsonpath = os.path.join(self.savedir, self.logfile_name) 625 | jfid = open(jsonpath,'w') 626 | simplejson.dump(self.records, jfid) 627 | jfid.close() 628 | 629 | 630 | def run_naive_gnet(self, csearch, l1_list=None, use_memmap=False, greymatter_mask=None, 631 | adaptive=False, test_run=False): 632 | 633 | cparams = csearch['parameters'] 634 | 635 | print cparams 636 | 637 | if l1_list: 638 | print 'l1s:',l1_list 639 | train_kwargs = {'trial_mask':self.gnet.trial_mask, 'l1':l1_list, 640 | 'l2':cparams['l2'], 'l3':cparams['l3'], 'greymatter_mask':greymatter_mask, 641 | 'adaptive':adaptive, 'delta':cparams['delta']} 642 | else: 643 | train_kwargs = {'trial_mask':self.gnet.trial_mask, 'l1':cparams['l1'], 644 | 'l2':cparams['l2'], 'l3':cparams['l3'],'greymatter_mask':greymatter_mask, 645 | 'adaptive':adaptive, 'delta':cparams['delta']} 646 | 647 | 648 | self.gnet.setup_crossvalidation(subject_indices=self.gnet.subject_indices, folds=self.folds) 649 | 650 | if not test_run: 651 | accuracies, average_accuracies, nz_coefs = self.gnet.crossvalidate(train_kwargs, use_memmap=use_memmap) 652 | 653 | else: 654 | accuracies = [[random.random() for x in range(len(l1_list))] for x in range(5)] 655 | average_accuracies = [] 656 | for i in range(len(accuracies[0])): 657 | accs = [] 658 | for j in range(len(accuracies)): 659 | accs.append(accuracies[j][i]) 660 | average_accuracies.append(sum(accs)/len(accs)) 661 | nz_coefs = [random.randint(0,1000) for x in range(len(l1_list))] 662 | 663 | 664 | 665 | self.accuracies = accuracies 666 | self.average_accuracies = average_accuracies 667 | self.non_zero_coefs = nz_coefs 668 | 669 | if l1_list: 670 | self.csearches = [] 671 | for ind, l1 in enumerate(l1_list): 672 | nsearch = {} 673 | nsearch['parameters'] = {'l1':l1, 'l2':cparams['l2'], 'l3':cparams['l3'], 'delta':cparams['delta']} 674 | nsearch['parameters']['l1'] = l1 675 | group_accuracies = [] 676 | for i in range(len(self.accuracies)): 677 | group_accuracies.append(self.accuracies[i][ind]) 678 | nsearch['accuracies'] = group_accuracies 679 | nsearch['average_accuracy'] = average_accuracies[ind] 680 | nsearch['non_zero_coefs'] = nz_coefs[ind] 681 | nsearch['search_iter'] = csearch['search_iter'] + ind 682 | 683 | #pprint(nsearch) 684 | 685 | self.csearches.append(nsearch) 686 | return self.csearches 687 | else: 688 | csearch['accuracies'] = accuracies[0] 689 | csearch['average_accuracy'] = average_accuracies[0] 690 | csearch['non_zero_coefs'] = nz_coefs[0] 691 | return csearch 692 | 693 | 694 | 695 | 696 | def _multi_l1_pass(self, l1_range, l2, l3, delta=None, reverse_range=True, 697 | use_memmap=False, adaptive=False, greymatter_mask=None, 698 | verbose=True, test_run=False): 699 | 700 | cur_l1_range = l1_range[:] 701 | if reverse_range: 702 | cur_l1_range.reverse() 703 | 704 | cur_params = {'l1':[], 'l2':l2, 'l3':l3, 'delta':delta} 705 | csearch = {} 706 | csearch['search_iter'] = self.search_count 707 | csearch['parameters'] = cur_params 708 | 709 | if self.verbose: 710 | print '\nPREFORMING NEXT MULTI-SEARCH GRAPHNET\n' 711 | print 'l1 range:', cur_l1_range 712 | print 'l2', l2 713 | print 'l3', l3 714 | print 'delta', delta 715 | 716 | 717 | csearches = self.run_naive_gnet(csearch, l1_list=cur_l1_range, 718 | use_memmap=use_memmap, greymatter_mask=greymatter_mask, 719 | adaptive=adaptive, test_run=test_run) 720 | 721 | for cs in csearches: 722 | self.searches.append(cs) 723 | self.search_count += 1 724 | self.records['current_iter'] = self.search_count 725 | self.records['searches'] = self.searches 726 | 727 | for srec in self.searches: 728 | cacc = srec['average_accuracy'] 729 | if cacc > self.best_acc: 730 | self.best_acc = cacc 731 | self.best_parameters = srec['parameters'] 732 | print 'new best parameters:', self.best_parameters 733 | 734 | self.records['best_acc'] = self.best_acc 735 | self.records['best_parameters'] = self.best_parameters 736 | 737 | self.log_progress() 738 | 739 | 740 | def _zoom_determine_l1minmax(self): 741 | best_l1 = self.best_parameters['l1'] 742 | half_dist = float(self.current_l1_distance)/2. 743 | temp_min = best_l1 - half_dist 744 | temp_max = best_l1 + half_dist 745 | 746 | if min(temp_min, self.l1_hard_min) == self.l1_hard_min: 747 | return self.l1_hard_min, self.l1_hard_min+self.current_l1_distance 748 | else: 749 | return temp_min, temp_max 750 | 751 | 752 | def _zoom_cut_priorl1s(self, l1_list, prior_parameters): 753 | 754 | for parameters in prior_parameters: 755 | prior_l1 = parameters[0] 756 | l1_list = [x for x in l1_list if x != prior_l1] 757 | 758 | return l1_list 759 | 760 | 761 | def zoom_gridsearch(self, gnet, name='zoom_gsearch', adaptive=False, use_memmap=False, 762 | greymatter_mask=None, test_run=False, verbose=True): 763 | 764 | self.gnet = gnet 765 | self.records['title'] = name 766 | if name: 767 | st = time.localtime() 768 | timestr = str(st.tm_mon)+'_'+str(st.tm_mday)+'_'+str(st.tm_hour)+'_'+str(st.tm_min) 769 | self.logfile_name = name+'_'+timestr+'.json' 770 | 771 | defaults = {'initial_l1_min':5., 772 | 'initial_l1_max':65., 773 | 'l1_stepsizes':[6.,3.,1.], 774 | 'l1_hard_min':5., 775 | 'l1_shrink_coef':.5, 776 | 'l2_range':[1.,10.,100.,1000.,10000.], 777 | 'l3_range':[1.,10.,100.,1000.,10000.], 778 | 'deltas':[]} 779 | 780 | 781 | for gs_var, var_val in defaults.items(): 782 | if getattr(self, gs_var, None) in [None, [], {}, 0., 0, False]: 783 | setattr(self, gs_var, var_val) 784 | self.records[gs_var] = getattr(self, gs_var, None) 785 | 786 | if greymatter_mask is not None: 787 | self.greymatter_mask = greymatter_mask 788 | 789 | 790 | 791 | self.records['adaptive'] = adaptive 792 | self.records['use_memmap'] = use_memmap 793 | self.records['greymatter_mask'] = hasattr(self, 'greymatter_mask') 794 | 795 | self.records['folds'] = self.folds 796 | self.records['current_iter'] = 0 797 | self.records['searches'] = self.searches 798 | 799 | self.search_count = 0 800 | self.best_acc = 0. 801 | self.best_parameters = {} 802 | self.parameter_tracker = [] 803 | self.initial_l1_distance = self.initial_l1_max - self.initial_l1_min 804 | self.records['l1_distances'] = [] 805 | self.records['l1_ranges'] = [] 806 | 807 | self.log_progress() 808 | 809 | 810 | # l1 step zooms: 811 | for zoom_n, l1_step in enumerate(self.l1_stepsizes): 812 | 813 | if zoom_n == 0: 814 | self.current_l1_distance = self.initial_l1_distance 815 | self.current_l1_min = self.initial_l1_min 816 | self.current_l1_max = self.initial_l1_max 817 | else: 818 | self.current_l1_distance = self.l1_shrink_coef*self.current_l1_distance 819 | self.current_l1_min, self.current_l1_max = self._zoom_determine_l1minmax() 820 | 821 | self.records['l1_distances'].append(self.current_l1_distance) 822 | 823 | 824 | if verbose: 825 | print 'Preforming zoom pass...' 826 | print 'zoom distance', self.current_l1_distance 827 | 828 | 829 | self.l1_range = self.simple_generate_l1_range(self.current_l1_min, 830 | self.current_l1_max, 831 | l1_step) 832 | 833 | sparse_l1_range = self._zoom_cut_priorl1s(self.l1_range, self.parameter_tracker) 834 | 835 | self.records['l1_ranges'].append(sparse_l1_range) 836 | 837 | for l3 in self.l3_range: 838 | for l2 in self.l2_range: 839 | if not self.deltas: 840 | self._multi_l1_pass(sparse_l1_range, l2, l3, test_run=test_run, delta=None, adaptive=adaptive, 841 | greymatter_mask=greymatter_mask) 842 | for l1 in sparse_l1_range: 843 | self.parameter_tracker.append([l1,l2,l3]) 844 | else: 845 | for delta in self.deltas: 846 | self._multi_l1_pass(sparse_l1_range, l2, l3, delta=delta, test_run=test_run, adaptive=adaptive, 847 | greymatter_mask=greymatter_mask) 848 | 849 | 850 | 851 | 852 | 853 | 854 | def standard_gridsearch(self, gnet, reverse_range=True, name='standard_gsearch', adaptive=False, 855 | use_memmap=False, greymatter_mask=None): 856 | 857 | self.gnet = gnet 858 | self.records['title'] = name 859 | 860 | if name: 861 | st = time.localtime() 862 | timestr = str(st.tm_mon)+'_'+str(st.tm_mday)+'_'+str(st.tm_hour)+'_'+str(st.tm_min) 863 | self.logfile_name = name+'_'+timestr+'.json' 864 | 865 | self.records['l1_range'] = self.l1_range 866 | self.records['l2_range'] = self.l2_range 867 | self.records['l3_range'] = self.l3_range 868 | 869 | self.records['folds'] = self.folds 870 | self.records['current_iter'] = 0 871 | self.records['searches'] = self.searches 872 | 873 | 874 | search_count = 0 875 | l1min = self.l1_range[0] 876 | l1max = self.l1_range[-1] 877 | best_acc = 0. 878 | best_l1 = -1 879 | best_l2 = -1 880 | best_l3 = -1 881 | cur_distance = l1max-l1min 882 | 883 | self.log_progress() 884 | 885 | for l3 in self.l3_range: 886 | for l2 in self.l2_range: 887 | cur_l1_range = self.l1_range[:] 888 | 889 | if reverse_range: 890 | cur_l1_range.reverse() 891 | 892 | cur_params = {'l1':[], 'l2':l2, 'l3':l3} 893 | csearch = {} 894 | csearch['search_iter'] = search_count 895 | self.records['current_iter'] = search_count 896 | csearch['parameters'] = cur_params 897 | 898 | if self.verbose: 899 | print '\nPREFORMING NEXT MULTI-SEARCH GRAPHNET\n' 900 | print 'l1 range:', cur_l1_range 901 | print 'l2', l2 902 | print 'l3', l3 903 | 904 | csearches = self.run_naive_gnet(csearch, l1_list=cur_l1_range, 905 | use_memmap=use_memmap, greymatter_mask=greymatter_mask, 906 | adaptive=adaptive) 907 | 908 | for cs in csearches: 909 | self.searches.append(cs) 910 | search_count += 1 911 | self.records['current_iter'] = search_count 912 | self.records['searches'] = self.searches 913 | 914 | for srec in self.searches: 915 | cacc = srec['average_accuracy'] 916 | if cacc > best_acc: 917 | best_acc = cacc 918 | best_parameters = srec['parameters'] 919 | 920 | self.records['best_acc'] = best_acc 921 | self.records['best_parameters'] = best_parameters 922 | 923 | self.log_progress() 924 | 925 | 926 | 927 | 928 | 929 | 930 | 931 | 932 | 933 | 934 | 935 | 936 | 937 | 938 | 939 | 940 | -------------------------------------------------------------------------------- /gui/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logang/neuroparser/835fc7b5b6e2d2ce47fd286a498fdecb0144d4d2/gui/__init__.py -------------------------------------------------------------------------------- /neuroparser.py: -------------------------------------------------------------------------------- 1 | import sys, time 2 | import numpy as np 3 | import pyqtgraph 4 | 5 | #------------------------------------------------------------------------------- 6 | 7 | 8 | -------------------------------------------------------------------------------- /optimization/SConscript: -------------------------------------------------------------------------------- 1 | import commands 2 | 3 | # ----------------- 4 | # LOCAL ENVIRONMENT 5 | # ---------------- 6 | Import('env') 7 | optimization_env = env.Clone() 8 | 9 | #optimization_env.PythonExtension('cwpath/graphnet', ['./graphnet.pyx']) 10 | 11 | #env.PythonExtension('./optimization/cwpath/lasso', ['./optimization/cwpath/lasso.pyx']) 12 | #env.PythonExtension('./optimization/cwpath/regression', ['./optimization/cwpath/regression.pyx']) 13 | -------------------------------------------------------------------------------- /optimization/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /optimization/cwpath/SConscript: -------------------------------------------------------------------------------- 1 | import numpy, os 2 | import commands 3 | 4 | # ----------------- 5 | # LOCAL ENVIRONMENT 6 | # ---------------- 7 | Import('env') 8 | cwpath_env = env.Clone() 9 | 10 | cwpath_env.PythonExtension('cwpath', ['./cwpath.pyx']) 11 | cwpath_env.PythonExtension('graphnet', ['./graphnet.pyx']) 12 | cwpath_env.PythonExtension('regression', ['./regression.pyx']) 13 | 14 | 15 | -------------------------------------------------------------------------------- /optimization/cwpath/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /optimization/cwpath/cwpath.pyx: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | # filename: cwpath.pyx 4 | # cython: profile=True 5 | 6 | """ 7 | ------------------------------------------------------------------------------------------------------------------------------ 8 | 9 | This optimization module implements large scale 'Pathwise coordinate optimization' 10 | techniques for graph-constrained sparse linear models using active set methods, warm starts, 11 | SAFE/STRONG rules for variable screening, and infimal convolution, as described in 12 | 13 | Friedman J., Hastie, T., Hofling, H., and Tibshirani, R. Pathwise coordinate optimization. Annals of Applied Statistics, 2007. 14 | [other active set refs] 15 | [SAFE] 16 | [STRONG] 17 | [Rockafeller] 18 | [our paper] 19 | 20 | ------------------------------------------------------------------------------------------------------------------------------ 21 | 22 | The optimization problems solved by the CoordWise class are determined 23 | by the following callables: 24 | 25 | CoordWise.update(active, penalty, nonzero, *problem_args): 26 | 27 | CoordWise.initialize(data): 28 | return initial value of *problem_args based on problem data 29 | 30 | CoordWise.stop(olddata, newdata): 31 | convergence check comparing current candidate to old candidate. 32 | 33 | CoordWise.output(*problem_args): 34 | return the "intersting part" of the arguments, i.e. the parts to be stored once the algorithm has found a solution. 35 | 36 | CoordWise.copy(*problem_args): 37 | return a copy of output(*problem_args) to compare to a new solution. 38 | 39 | ------------------------------------------------------------------------------------------------------------------------------ 40 | """ 41 | 42 | import numpy as np 43 | cimport numpy as np 44 | import time, copy 45 | 46 | ## Compile-time datatypes 47 | DTYPE_float = np.float 48 | ctypedef np.float_t DTYPE_float_t 49 | 50 | DTYPE_int = np.int 51 | ctypedef np.int_t DTYPE_int_t 52 | 53 | # should we replace this with cython? 54 | from numpy.core.umath_tests import inner1d 55 | 56 | # ---------------------------------------------------------------------------------------------------------------------------- 57 | 58 | class CoordWise(object): 59 | """ 60 | Solve a (nonsmooth) convex regression minimization problem using 61 | pathwise coordinate descent. 62 | 63 | Inputs: 64 | ------- 65 | -- data: a tuple (X,Y,G), where X are the independent regression variables ("inputs", "features"), 66 | and Y the dependent variable ("output", "target"). G is an optional sparse graph (such as a graph Laplacian) 67 | used in the regularized regression, and represented as a list of lists of indices of adjacent voxels. 68 | -- problemtype: a problem object, such as graphnet.GraphNet, that takes data and and optional initial set of coefficients. 69 | -- strategy: s 70 | -- penalty: A single set of penalty values for the penalized regression problem, or a list of penalties if 71 | problem is to be solved over a path of penalty parameters (recommended). 72 | -- initial_coefs: Defaults to None. 73 | -- debug: a flag specifying whether debugging information should be printed to screen. Defaults to False. 74 | """ 75 | 76 | def __init__(self, data, problemtype, strategy=None, penalty=None, initial_coefs=None, debug=True): 77 | self.data = data 78 | self.problem = problemtype(data,initial_coefs) 79 | self.strategy = strategy or self.problem.default_strategy() 80 | self.debug = debug 81 | # self.initial_coefs = initial_coefs 82 | # self.screen_type = "STRONG" 83 | self.screen_type = "all" 84 | self.KKT_checking_ON = False 85 | 86 | # ---- Main update and fitting methods ---- # 87 | 88 | def update(self, active=None, permute=False): 89 | """ 90 | Go through each coordinate in "active" set, 91 | updating coefficients beta and residual r. 92 | 93 | Returns: 94 | -------- 95 | nonzero: coordinates not set to zero in update 96 | """ 97 | if active is None: 98 | active = self.active 99 | 100 | nonzero = [] 101 | if self.debug: 102 | print "Active set:", active.astype(np.int) 103 | 104 | # Update the active set, permuting the coordinate cycle order if permute is True. 105 | # Return the nonzero coefficients. 106 | self.problem.update(active.astype(np.int), nonzero, permute=permute) 107 | return np.asarray(nonzero) 108 | 109 | def fit(self, penalty=None, active=None, initial=None, tol=1e-6, repeat_update=1, refit=False, debug=False): 110 | """ 111 | Fit a pathwise coordinate optimization model with initial estimates 112 | and a guess at the active set. 113 | 114 | It applies an optional initial strategy, if supplied (see strategy.py). 115 | """ 116 | if debug: 117 | print "Coefficients descriptive statistics:" 118 | print "\t--> Standard deviation:", np.std(self.problem.coefficients) 119 | print "\t--> Range: [",np.min(self.problem.coefficients),np.max(self.problem.coefficients)," ]" 120 | print "\t--> Mean:", np.mean(self.problem.coefficients) 121 | print "\t--> Median:", np.median(self.problem.coefficients) 122 | print "\t--> Mode:", np.mode(self.problem.coefficients) 123 | 124 | # keep copy of penalty around 125 | # penalty = self.problem.penalty.copy() 126 | 127 | if penalty is not None: 128 | self.problem.penalty = penalty 129 | 130 | # else: 131 | # raise ValueError("No penalty supplied.") 132 | 133 | # Check if there is a list of penalties to be used as a path. 134 | # If there is not, make the given value a list with one element, 135 | # otherwise, use the given list. 136 | if type(self.problem.penalty)!=type([]): 137 | penalties = [self.problem.penalty] 138 | else: 139 | penalties = self.problem.penalty 140 | 141 | # initialize conv, the worst difference ratio between 142 | # sequential estimates of nonzero coefficients 143 | self.conv = 10. 144 | 145 | # declare path length 146 | cdef long path_length = len(penalties) 147 | 148 | # ---- main loop over penalties ---- # 149 | # main fitting loop for a particular set of penalites 150 | # and eligible set 151 | # Does: 152 | # (1) Set penalties 153 | # (2) Run "repeat_update" updates of active set 154 | # (3) Check for convergence of active set. 155 | # (4) If KKT_checking_ON is True: 156 | # If active set converged, check coordinate-wise for violations of KKT conditions in the eligible set. 157 | # Add any violations to active set and repeat (2)-(3) until KKT conditions are not violated. 158 | # (5) If KKT_checking_ON is True: 159 | # Check coordinate-wise for violations of KKT conditions in the entire set of variables. 160 | # If KKT conditions satisfied, proceed to next fit in path. 161 | # Otherwise, add violations to active set and repeat (2)-(4). 162 | 163 | # initialize 164 | coefficients = [] 165 | residuals = [] 166 | cdef long i 167 | cdef float old_tol = tol 168 | cdef float alpha = 1.0 169 | 170 | # loop over penalty path 171 | for i in range(path_length): 172 | 173 | # inner loop initialization 174 | self.fitit = 0 175 | self.updateit = 0 176 | tol = old_tol 177 | 178 | # set penalties 179 | self.problem.penalty = penalties[i] 180 | self.lam_max = self.get_lambda_max() 181 | 182 | # set starting active set as eligible set 183 | if i == 0: 184 | if self.KKT_checking_ON: 185 | self.eligible = [] 186 | while len(self.eligible) < 1: 187 | self.eligible = self._get_eligible_set( alpha*self.lam_max, penalties[i]['l1'], "STRONG", self.problem.penalty ) 188 | alpha *= 0.99 189 | print alpha 190 | self.active = self.eligible.copy() 191 | else: 192 | if initial is None: 193 | print "\t---> Initial pass through all coefficients..." 194 | self.active = self.update( np.array(range(self.problem.X.shape[1])) ) 195 | print "\t---> Done with initial pass." 196 | else: 197 | print "\t---> Setting active set to last solution in path." 198 | self.active = initial 199 | else: 200 | print "\t---> Path index", i 201 | if self.KKT_checking_ON: 202 | self.eligible = self._get_eligible_set(penalties[i-1], penalties[i]['l1'], self.screen_type, self.problem.penalty) 203 | self.active = self.eligible.copy() 204 | 205 | # if debugging print problem size 206 | if self.debug: 207 | print "Problem size:", self.problem.X.shape 208 | 209 | while True: 210 | # keep count of fit iterations 211 | self.fitit += 1 212 | 213 | # store the last problem solution 214 | old = self.problem.copy() 215 | 216 | # repeat update on just the current active variables 217 | for rep in xrange(repeat_update): 218 | self.active = self.update(active=self.active) 219 | 220 | # Check for convergence of active set, and if finished set 'finished' to True 221 | # If return_worst is True, return largest difference ratio between an active variable 222 | # on this and the previous iteration. 223 | finished, worst = self.problem.stop(old,tol=tol,return_worst=True) 224 | if finished: 225 | print "\t---> Active set of size", len(self.active), "converged." 226 | 227 | # If active set has converged, check KKT conditions for eligible set for SAFE/STRONG. 228 | # If there are no violations on the eligible set, run KKT on all variables. If there 229 | # is a violation in either case, add the violating variables to the active set and continue 230 | # updates (set finished=False). 231 | if finished and self.KKT_checking_ON: 232 | if self.screen_type == "STRONG" or self.screen_type == "SAFE": 233 | KKT_violations, KKT_violvals, eligible = self.check_KKT("eligible") 234 | if len(KKT_violations) > 1: 235 | print "KKT violations:", KKT_violations 236 | print "KKT violation values:", KKT_violvals 237 | self.active = np.unique( np.append( self.active, KKT_violations )) 238 | print "New active set:", self.active 239 | finished = False 240 | else: 241 | print "No KKT violations on eligible set", eligible 242 | KKT_violations, KKT_violvals, eligible = self.check_KKT("all") 243 | if len(KKT_violations) > 1: 244 | print "KKT violations:", KKT_violations 245 | print "KKT violation values:", KKT_violvals 246 | self.active = np.unique( np.append( self.active, KKT_violations )) 247 | print "New active set:", self.active 248 | finished = False 249 | else: 250 | print "No KKT violations on full variable set." 251 | elif finished and not self.KKT_checking_ON: 252 | self.current_active = self.active.copy() 253 | self.active = np.array( range(self.problem.X.shape[1]) ) 254 | self.active = self.update(self.active) 255 | if len( np.setdiff1d(self.current_active, self.active) ) > 0: 256 | print "Added ", np.setdiff1d(self.current_active, self.active), " to active set." 257 | finished = False 258 | 259 | if self.debug: 260 | print '\tFit iteration: %d, Number active: %d, Max relative change: %g' % (self.fitit, self.active.shape[0], worst) 261 | # print '\tFit iteration: %d, Greedy Iteration: %d, Number active: %d, Max relative change: %g' % (self.fitit, self.greedyit, self.active.shape[0], worst) 262 | 263 | # Store worst difference ratio between nonzero coefficients 264 | self.conv = worst 265 | 266 | # if finished is true or if the maximum number of fit iterations has been reached, break. 267 | if finished or self.fitit > self.strategy.max_fitit: 268 | break 269 | 270 | # collect results 271 | coefficients.append(self.current[0]) 272 | residuals.append(self.current[1]) 273 | 274 | self.problem.penalty = penalty 275 | return coefficients, residuals 276 | 277 | def check_KKT(self, KKT_type="all", conv_eps=0.1): 278 | if KKT_type == "all": 279 | eligible = np.setdiff1d( np.array(range( self.problem.X.shape[1] )), self.active) 280 | # eligible = [i for i in np.array(range( self.problem.X.shape[1] )) if i not in self.active] 281 | print "KKT type: all" 282 | elif KKT_type == "eligible": 283 | eligible = np.setdiff1d( self.eligible, self.active) 284 | # eligible = [i for i in self.eligible if i not in self.active] 285 | print "KKT type: eligible." 286 | if len(eligible) > 0: 287 | # check KKT on eligible set 288 | beta_hat, r = self.current 289 | n = self.problem.X.shape[0] 290 | p = self.problem.X.shape[1] 291 | X = self.problem.X[:,eligible] 292 | y = self.data[1] 293 | G = [self.data[2][i] for i in eligible] 294 | l1 = self.problem.penalty['l1']; l2 = self.problem.penalty['l2']; l3 = self.problem.penalty['l3'] 295 | resid_prod = inner1d(X.T,r)/n 296 | pen_prod = l3*np.array([ len(G[i])*beta_hat[eligible[i]] - 0.5*np.sum(beta_hat[G[i]]) for i in xrange(len(G))]) #/p 297 | subgrad = l1*np.sign(beta_hat[eligible]) 298 | print "subgrad, resid_prod, pen_prod", subgrad, resid_prod, pen_prod 299 | print "Resid prod shape",resid_prod.shape, pen_prod.shape, subgrad.shape 300 | KKT = subgrad + resid_prod + pen_prod 301 | idx = np.where( np.fabs(KKT) > conv_eps )[0] 302 | return idx, KKT[idx], eligible 303 | else: 304 | return [],[],[] 305 | 306 | def get_lambda_max(self): 307 | """ 308 | Find the value of lambda at which all coefficients are set to zero 309 | by finding the minimum value such that 0 is in the subdifferential 310 | and the coefficients are all zero. 311 | """ 312 | subgrads = np.fabs( inner1d(self.problem.X.T, self.data[1]) ) 313 | return np.max( subgrads ) 314 | 315 | # ---- Methods for getting results and coefficients ---- # 316 | 317 | def _getcurrent(self): 318 | return self.problem.output() 319 | current = property(_getcurrent) 320 | 321 | def _get_eligible_set(self, lam_max, lam, type="STRONG", penalties=None): 322 | if type != "all": 323 | if self.fitit == 0: 324 | resids = self.data[1] 325 | else: 326 | _, resids = self.current 327 | if type == "STRONG": 328 | if lam_max == None or lam == None: 329 | raise ValueError("Lambda parameters not given.") 330 | eligible = self.strategy.STRONG(lam_max,lam,resids,self.problem.X) 331 | elif type == "SAFE": 332 | if lam_max == None or lam == None: 333 | raise ValueError("Lambda parameters not given.") 334 | eligible = self.strategy.SAFE(lam_max,lam,resids,self.problem.X) 335 | elif type == "all": 336 | eligible = self.strategy.all() 337 | else: 338 | raise ValueError("Strategy type does not exist") 339 | return eligible 340 | 341 | def _getresults(self): 342 | """ 343 | Function to strip extra coefficients as necessary 344 | """ 345 | if hasattr(self.problem,'get_coefficients'): 346 | return self.problem.coefficients,self.current[1] 347 | else: 348 | return self.current 349 | results = property(_getresults) 350 | 351 | def _get_num_coefs(self): 352 | """ 353 | Function to strip extra coefficients as necessary 354 | """ 355 | if hasattr(self.problem,'num_coefs'): 356 | return self.problem.num_coefs() 357 | else: 358 | return None 359 | num_coefs = property(_get_num_coefs) 360 | 361 | # ---- Methods for choosing sets of variables to search over ---- # 362 | 363 | def greedy(self, active=None, tol=None, DTYPE_int_t min_iter=5): 364 | """ 365 | Greedy algorithm: update active set at each iteration 366 | using a given strategy 367 | and continue until convergence. 368 | """ 369 | self.greedyit = 0 370 | if active is not None: 371 | self.active = active 372 | 373 | while True: 374 | self.greedyit += 1 375 | old = self.problem.copy() 376 | nactive = self.update() 377 | if self.greedyit >= min_iter: 378 | if self.problem.stop(old,tol=tol) or self.greedyit > self.strategy.max_greedyit: 379 | break 380 | self.active = self.strategy(self.greedyit, self.active, nactive) 381 | 382 | return self.current 383 | 384 | def append(self, value): 385 | self.results.append(value) 386 | 387 | # ---------------------------------------------------------------------------------------------------------------------------- 388 | # EOF 389 | -------------------------------------------------------------------------------- /optimization/cwpath/libRblas.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logang/neuroparser/835fc7b5b6e2d2ce47fd286a498fdecb0144d4d2/optimization/cwpath/libRblas.dylib -------------------------------------------------------------------------------- /optimization/cwpath/mask.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from nipy.io.api import load_image 3 | from nipy.core.api import Image 4 | 5 | def adj_from_nii(maskfile,num_time_points,numt=0,numx=1,numy=1,numz=1,regions=None): 6 | """ 7 | Construct adjacency array from .nii mask file 8 | 9 | INPUT: 10 | 11 | maskfile: Path to mask file (.nii) 12 | 13 | Other parameters are passed directly to prepare_adj (see that function for docs) 14 | 15 | OUTPUT: 16 | 17 | adj: An array containing adjacency information 18 | """ 19 | mask = load_image(maskfile)._data 20 | newmask = np.zeros(np.append(num_time_points,mask.shape)) 21 | for i in range(num_time_points): 22 | newmask[i] = mask 23 | adj = prepare_adj(newmask,numt,numx,numy,numz,regions) 24 | adj = convert_to_array(adj) 25 | return adj 26 | 27 | def prepare_adj(mask,numt=0,numx=1,numy=1,numz=1,regions=None): 28 | """ 29 | Return adjacency list, where the voxels are considered 30 | neighbors if they fall in a ball of radius numt, numx, numy, and numz 31 | for time, x position, y position, and z position respectively. 32 | 33 | INPUT: 34 | 35 | X: a 5-dimensional ndarray. The first index is trial, the second index is time, 36 | the third index is x position, the fourth index is y position and the fifth 37 | position is z position. 38 | 39 | mask: a binary 4-dimensional ndarray, the same size as X[0,:,:,:,:] where 40 | 1 indicates that the voxel-timepoint is included and 0 indicates that it is 41 | excluded. NOTE: Usually the mask is thought of as a 3-dimensional ndarray, since 42 | it is uniform across time. 43 | 44 | regions: a multivalued array the same size as the mask that indicates different 45 | regions in the spatial structure. No adjacency edges will be made across region 46 | boundaries. 47 | 48 | numt: an integer, the radius of the "neighborhood ball" in the t direction 49 | numx: an integer, the radius of the "neighborhood ball" in the x direction 50 | numy: an integer, the radius of the "neighborhood ball" in the y direction 51 | numz: an integer, the radius of the "neighborhood ball" in the z direction 52 | 53 | OUTPUT: 54 | 55 | newX: The matrix X reshaped as a 2-dimensional array for analysis 56 | adj: The adjacency list associated with newX 57 | 58 | """ 59 | 60 | #Create map going from X to predictor vector indices. The entries of 61 | # this array are -1 if the voxel is not included in the mask, and the 62 | # index in the new predictor corresponding to the voxel if the voxel 63 | # is included in the mask. 64 | 65 | if regions == None: 66 | regions = np.zeros(mask.shape) 67 | regions.shape = mask.shape 68 | reg_values = np.unique(regions) 69 | 70 | vmap = np.cumsum(mask).reshape(mask.shape) 71 | mask = np.bool_(mask.copy()) 72 | vmap[~mask] = -1 73 | vmap -= 1 # now vmap's values run from 0 to mask.sum()-1 74 | 75 | # Create adjacency list 76 | 77 | adj = [] 78 | 79 | nt,nx,ny,nz = mask.shape 80 | 81 | for t in range(nt): 82 | for i in range(nx): 83 | for j in range(ny): 84 | for k in range(nz): 85 | if mask[t,i,j,k]: 86 | local_map = vmap[max((t-numt),0):(t+numt+1), 87 | max((i-numx),0):(i+numx+1), 88 | max((j-numy),0):(j+numy+1), 89 | max((k-numz),0):(k+numz+1)] 90 | local_reg = regions[max((t-numt),0):(t+numt+1), 91 | max((i-numx),0):(i+numx+1), 92 | max((j-numy),0):(j+numy+1), 93 | max((k-numz),0):(k+numz+1)] 94 | region = regions[t,i,j,k] 95 | ind = (local_map>-1)*(local_reg == region) 96 | ind = np.bool_(ind) 97 | adj.append(np.array(local_map[ind],dtype=int)) 98 | for i, a in enumerate(adj): 99 | a[np.equal(a,i)] = -1 100 | #return convert_to_array(adj) 101 | return adj 102 | 103 | def convert_to_array(adj): 104 | num_ind = np.max([len(a) for a in adj]) 105 | adjarray = -np.ones((len(adj),num_ind)) 106 | for i in range(len(adj)): 107 | for j in range(len(adj[i])): 108 | adjarray[i,j] = adj[i][j] 109 | return adjarray 110 | 111 | def test_prep(nt=0,nx=1,ny=1,nz=1): 112 | """ 113 | Let's make this into a proper test...... what should newa, adj be in this case? 114 | """ 115 | a = np.array(range(1,1+2*3*4*4*4)).reshape((2,3,4,4,4)) 116 | mask = a[0]*0 117 | mask[:,0,0,0] = 1 118 | mask[:,1,1,:] = 1 119 | # print mask[0] 120 | # print a[0,0] 121 | newa, adj = prepare_adj(a,mask,nt,nx,ny,nz) 122 | # print newa[0,0], adj[0], newa[0,adj[0]] 123 | -------------------------------------------------------------------------------- /optimization/cwpath/regression.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | import time 4 | 5 | ## Local imports 6 | 7 | ## Compile-time datatypes 8 | DTYPE_float = np.float 9 | ctypedef np.float_t DTYPE_float_t 10 | 11 | DTYPE_int = np.int 12 | ctypedef np.int_t DTYPE_int_t 13 | 14 | """ 15 | Functions specific to regression problems. 16 | stop: convergence criterion based on residuals. 17 | """ 18 | 19 | import numpy as np 20 | import strategy 21 | 22 | class Regression(object): 23 | 24 | npath = 0 25 | 26 | def __init__(self, data, initial_coefs=None): 27 | 28 | self.X, self.Y = [np.asarray(x) for x in data] 29 | self.r = self.Y.copy() 30 | self.initial_coefs = initial_coefs 31 | self.beta = np.zeros(self.X.shape[1]) 32 | 33 | self.initialize() 34 | 35 | def coefficientCheck(self, 36 | np.ndarray[DTYPE_float_t, ndim=1] bold, 37 | np.ndarray[DTYPE_float_t, ndim=1] bnew, 38 | DTYPE_float_t tol): 39 | 40 | #Check if all coefficients have relative errors < tol 41 | 42 | cdef long N = len(bold) 43 | cdef long i,j 44 | 45 | for i in range(N): 46 | if bold[i] == 0.: 47 | if bnew[i] != 0.: 48 | return False 49 | if np.fabs(np.fabs(bold[i]-bnew[i])/bold[i]) > tol: 50 | return False 51 | return True 52 | 53 | def coefficientCheckVal(self, 54 | np.ndarray[DTYPE_float_t, ndim=1] bold, 55 | np.ndarray[DTYPE_float_t, ndim=1] bnew, 56 | DTYPE_float_t tol): 57 | 58 | #Check if all coefficients have relative errors < tol 59 | 60 | cdef long N = len(bold) 61 | cdef long i,j 62 | cdef DTYPE_float_t max_so_far = 0. 63 | cdef DTYPE_float_t max_active = 0. 64 | cdef DTYPE_float_t ratio = 0. 65 | 66 | for i in range(N): 67 | if bold[i] ==0.: 68 | if bnew[i] !=0.: 69 | max_so_far = 10. 70 | else: 71 | ratio = np.fabs(np.fabs(bold[i]-bnew[i])/bold[i]) 72 | if ratio > max_active: 73 | max_active = ratio 74 | 75 | if max_active > max_so_far: 76 | max_so_far = max_active 77 | 78 | return max_so_far < tol, max_active 79 | 80 | 81 | def initialize(self): 82 | """ 83 | Abstract method for initialization of regression problems. 84 | """ 85 | pass 86 | 87 | def stop(self, 88 | previous, 89 | DTYPE_float_t tol=1e-4, 90 | DTYPE_int_t return_worst = False): 91 | """ 92 | Convergence check: check whether 93 | residuals have not significantly changed or 94 | they are small enough. 95 | 96 | Both old and current are expected to be (beta, r) tuples, i.e. 97 | regression coefficent and residual tuples. 98 | 99 | """ 100 | 101 | 102 | cdef np.ndarray[DTYPE_float_t, ndim=1] bold, bcurrent 103 | bold, _ = previous 104 | bcurrent, _ = self.output() 105 | 106 | if return_worst: 107 | status, worst = self.coefficientCheckVal(bold, bcurrent, tol) 108 | if status: 109 | return True, worst 110 | return False, worst 111 | else: 112 | status = self.coefficientCheck(bold, bcurrent, tol) 113 | if status: 114 | return True 115 | return False 116 | 117 | 118 | def output(self): 119 | """ 120 | Return the 'interesting' part of the problem arguments. 121 | 122 | In the regression case, this is the tuple (beta, r). 123 | """ 124 | return self.coefficients, self.r 125 | 126 | def final_stop(self, previous, tol=1.0e-5): 127 | """ 128 | Need a better way to check this? 129 | """ 130 | if self.npath > 500: 131 | return True 132 | self.npath += 1 133 | ## Y = self.Y 134 | ## beta, r = [np.asarray(x) for x in previous] 135 | ## R2 = (r**2).sum() / (Y**2).sum() 136 | ## if R2 < tol: 137 | ## return True 138 | 139 | ## R2 = ((r - self.r)**2).sum() / (self.r**2).sum() 140 | ## if R2 < tol: 141 | ## return True 142 | 143 | return False 144 | 145 | def copy(self): 146 | """ 147 | Copy relevant output. 148 | """ 149 | 150 | cdef np.ndarray[DTYPE_float_t, ndim=1] coefs, r 151 | coefs, r = self.output() 152 | return (coefs.copy(), r.copy()) 153 | 154 | def update(self, active, nonzero): 155 | """ 156 | Update coefficients in active set, returning nonzero coefficients. 157 | 158 | Abstract method for update step. 159 | """ 160 | raise NotImplementedError 161 | 162 | def initial_strategy(self): 163 | """ 164 | Initial strategy in the pathwise search. 165 | """ 166 | return strategy.NStepBurnin(self.total_coefs, nstep=8, burnin=2) 167 | 168 | def default_strategy(self): 169 | """ 170 | Default strategy. 171 | """ 172 | return strategy.Strategy(self.total_coefs) 173 | 174 | def default_active(self): 175 | """ 176 | Default active set. 177 | """ 178 | return np.arange(self.total_coefs) 179 | 180 | def default_penalty(self): 181 | """ 182 | Abstract method for default penalty. 183 | """ 184 | raise NotImplementedError 185 | 186 | def assign_penalty(self, path_key=None, **params): 187 | """ 188 | Abstract method for assigning penalty parameters. 189 | """ 190 | if path_key is None: 191 | path_length = 1 192 | else: 193 | path_length = len(params[path_key]) 194 | penalty_list = [] 195 | for i in range(path_length): 196 | # penalty = self.penalty.copy() 197 | penalty = dict() 198 | for key in params: 199 | if key==path_key: 200 | penalty[key] = params[key][i] 201 | else: 202 | penalty[key] = params[key] 203 | penalty_list.append(penalty) 204 | if path_length == 1: 205 | penalty_list = penalty_list[0] 206 | self.penalty = penalty_list 207 | 208 | 209 | -------------------------------------------------------------------------------- /optimization/cwpath/strategy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Strategies for changing active set based on current iteration, 3 | the current active set and a candidate set of non-zero coefficients. 4 | """ 5 | import numpy as np 6 | import scipy.linalg as la 7 | 8 | # should we replace this with cython? 9 | from numpy.core.umath_tests import inner1d 10 | 11 | class Strategy: 12 | 13 | """ 14 | Strategy for choosing active set for p 15 | variables, given current active set and a 16 | candidate active set consisting of all coordinates 17 | that were updated in coordinate wise algorithm. 18 | 19 | Information on the current iteration the algorithm 20 | is on may also be used. 21 | """ 22 | 23 | max_greedyit = np.inf 24 | max_fitit = 1e4 25 | 26 | def __init__(self, p): 27 | """ 28 | Default strategy is to include all variables in 29 | the active set. 30 | """ 31 | self.p = p 32 | 33 | def __call__(self, iteration, current, candidate): 34 | return self.all() 35 | 36 | def all(self): 37 | """ 38 | All variables are active. 39 | """ 40 | return np.arange(self.p) 41 | 42 | def SAFE(self, lam_max, lam, y, X): 43 | """ 44 | Screen variables using the SAFE rule. 45 | """ 46 | resid_prod = np.fabs( inner1d(X.T,resid) ) 47 | idx = resid_prod >= lam - la.norm(X[:,i])*la.norm(y)*((lam_max-lam)/lam_max) 48 | return np.where(idx)[0] 49 | 50 | def STRONG(self, lam_max, lam, resid, X): 51 | """ 52 | Screen variables using the STRONG rule. 53 | """ 54 | resid_prod = np.fabs( inner1d(X.T,resid) ) 55 | idx = resid_prod >= 2*lam_max - lam 56 | return np.where(idx)[0] 57 | 58 | class NStep(Strategy): 59 | 60 | __doc__ = Strategy.__doc__ 61 | 62 | def __init__(self, p, nstep=5): 63 | """ 64 | Update the active set active set with the candidate 65 | if iteration % nstep == 0. 66 | """ 67 | 68 | self.p = p 69 | self.nstep = nstep 70 | 71 | def __call__(self, iteration, current, candidate): 72 | if iteration % self.nstep == 0: 73 | current = np.asarray(candidate) 74 | return np.asarray(current) 75 | 76 | class NStepBurnin(Strategy): 77 | 78 | __doc__ = Strategy.__doc__ 79 | 80 | def __init__(self, p, nstep=5, burnin=1): 81 | """ 82 | Update the active set with the candidate 83 | if it % self.nstep == 0, unless it==self.burnin, in which 84 | case also return the candidate. 85 | 86 | Implicitly assumes that the 87 | initial active set is "large", and one update is 88 | enough to get a very good idea of the active set. 89 | 90 | Further iterations, can still drop variables from the active set 91 | after every nstep iterations. 92 | 93 | """ 94 | self.p = p 95 | self.nstep = nstep 96 | self.burnin = burnin 97 | if burnin >= nstep: 98 | raise ValueError, 'expecting burnin < nstep' 99 | 100 | def __call__(self, it, current, candidate): 101 | if it % self.nstep == 0 or it == self.burnin: 102 | current = np.asarray(candidate) 103 | return np.asarray(current) 104 | -------------------------------------------------------------------------------- /optimization/cwpath/tests/Rclone.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def lassoR(X, Y, l1, tol=1.0e-10): 4 | """ 5 | "Literal" translation of R script into python. 6 | """ 7 | 8 | it = 0 9 | Y -= Y.mean() 10 | X = np.asarray(X) 11 | n, p = X.shape 12 | beta = np.zeros(p) 13 | S = np.dot(X.T, Y) 14 | r = Y 15 | 16 | err = np.inf 17 | 18 | vals = [] 19 | C = np.zeros(X.shape) 20 | while err > tol: 21 | it += 1 22 | vals.append(beta.copy()) 23 | for j in range(p): 24 | r = Y - np.dot(X, beta) 25 | S = (X[:,j] * (r + X[:,j] * beta[j])).sum() 26 | beta[j] = np.sign(S) * pospart(np.fabs(S) - l1 / np.sqrt(n)) / n 27 | if it > 1: 28 | err = np.fabs(beta - vals[-1]).sum() / p 29 | return np.array(vals[-1]) 30 | -------------------------------------------------------------------------------- /optimization/cwpath/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logang/neuroparser/835fc7b5b6e2d2ce47fd286a498fdecb0144d4d2/optimization/cwpath/tests/__init__.py -------------------------------------------------------------------------------- /optimization/cwpath/tests/libRblas.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logang/neuroparser/835fc7b5b6e2d2ce47fd286a498fdecb0144d4d2/optimization/cwpath/tests/libRblas.dylib -------------------------------------------------------------------------------- /optimization/cwpath/tests/makedata.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.signal as signal 3 | import pylab as pl 4 | 5 | def basic_data(n,p,SNR=1.): 6 | # input signal and noisy signal 7 | Xsig = np.random.randn(n,p) 8 | Xsig -= np.mean(Xsig, axis=0) 9 | Xsig /= np.std(Xsig, axis=0) 10 | 11 | X = Xsig + np.random.randn(n,p)/SNR 12 | X -= np.mean(X, axis=0) 13 | X /= np.std(X, axis=0) 14 | 15 | # coefficients 16 | coefs = np.zeros((p,)) 17 | coefs[0:5] = 5. 18 | 19 | # output 20 | y = np.dot(Xsig, coefs) 21 | y -= np.mean(y) 22 | y /= np.std(y) 23 | 24 | return X,y 25 | 26 | def spatial_data(n=1000, im_side=100, SNR=10., outlier_frac=0.05, thresh=0.1): 27 | # input signal image and noise image, p must have a square root. 28 | pos_im = smoothed_point_process_2D( 0.0009, im_side, im_side, 20 ) 29 | neg_im = smoothed_point_process_2D( 0.0009, im_side, im_side, 20 ) 30 | im = pos_im - neg_im 31 | im[np.fabs(im) < thresh*np.max(np.fabs(im))] = 0 32 | side = pos_im.shape[0] 33 | p = side**2 34 | y = [] 35 | for i in xrange(n): 36 | # y.append(np.random.uniform()) 37 | coin = np.random.binomial(1,0.5) 38 | outlier_coin = np.random.binomial(1,outlier_frac) 39 | y.append(float(coin)) 40 | 41 | if i==0: 42 | Xsig = (y[i]*im.flatten('F') + np.zeros(im.shape).flatten('F')).reshape((1,p)) 43 | Xnoise = np.random.normal(0.0,1.0,size=p).reshape((1,p)) 44 | outlier_sign = 1 45 | if outlier_coin: 46 | print "outlier!" 47 | Xnoise += np.random.laplace(0.0,10.0,size=p).reshape((1,p)) 48 | else: 49 | sig = (y[i]*im.flatten('F') + np.zeros(im.shape).flatten('F')).reshape((1,p)) 50 | Xsig = np.vstack((Xsig, sig)) 51 | noise = np.random.normal(0.0,1.0,size=p).reshape((1,p)) 52 | if outlier_coin: 53 | print "outlier!" 54 | outlier_sign *= -1 55 | noise += np.random.laplace(0.0,10.0,size=p).reshape((1,p)) 56 | Xnoise = np.vstack((Xnoise, noise)) 57 | 58 | # standardize 59 | X = Xsig + Xnoise/SNR 60 | X -= np.mean(X, axis=0) 61 | X /= np.std(X, axis=0) 62 | 63 | # output 64 | y = np.array(y) 65 | y -= np.mean(y) 66 | y /= np.std(y) 67 | 68 | return X,y,im,outlier_frac 69 | 70 | def gauss_kern( size, sizey = None ): 71 | """ Returns a normalized 2D gauss kernel array for convolutions """ 72 | size = int(size) 73 | if not sizey: 74 | sizey = size 75 | else: 76 | sizey = int(sizey) 77 | x, y = np.mgrid[-size:size+1, -sizey:sizey+1] 78 | g = np.exp(-(x**2/float(size)+y**2/float(sizey))) 79 | 80 | return g / g.sum() 81 | 82 | def gauss_blur(im, n, ny = None): 83 | """ 84 | Blurs the image by convolving with a gaussian kernel of typical 85 | size n. The optional keyword argument ny allows for a different 86 | size in the y direction. 87 | """ 88 | g = gauss_kern( n, sizey = ny) 89 | improc = signal.convolve( im, g, mode='valid') 90 | 91 | return(improc) 92 | 93 | def point_process_2D( eta, x, y ): 94 | """ 95 | Creates a random Poisson process in 2D with intensity parameter eta. 96 | """ 97 | return np.random.poisson( eta, size = (x,y) ) 98 | 99 | def smoothed_point_process_2D(eta=None, x=None, y=None, blur_width=None): 100 | """ 101 | Creates a smoothed random Poisson process in 2D with intensity parameter eta. 102 | """ 103 | if eta is not None: 104 | eta = eta 105 | else: 106 | eta = 0.001 107 | if x is not None: 108 | x = x 109 | else: 110 | x = 100 111 | if y is not None: 112 | y = y 113 | else: 114 | y = 100 115 | im = point_process_2D( eta, x, y ) 116 | return gauss_blur( im, blur_width ) 117 | 118 | if __name__ == '__main__': 119 | spatial_test = True 120 | basic_test = False 121 | 122 | if spatial_test: 123 | # make 1000 points for training, 1000 for validation, and 1000 for test 124 | X,Y,sig_im,outlier_frac = spatial_data(n=3000) 125 | np.savez("Data",X=X,Y=Y,sig_im=sig_im, outlier_frac=outlier_frac) 126 | pl.imsave("sig.png",sig_im) 127 | 128 | if basic_test: 129 | X,y = basic_data(50,100,SNR=100) 130 | np.save('X',X) 131 | np.save('Y',y) 132 | -------------------------------------------------------------------------------- /optimization/cwpath/tests/profile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | # filename: profile.py 4 | # cython: profile=True 5 | 6 | # import profiling stuff 7 | import pstats, cProfile 8 | 9 | # cython functions are not yet seen as packages. import path. 10 | import sys, os, time 11 | path_to_cython_packages = os.path.abspath('../.') 12 | sys.path.append(path_to_cython_packages) 13 | 14 | # scons still building packges funny, get path to graphs 15 | path_to_graphs = os.path.abspath('../../graphs/.') 16 | sys.path.append(path_to_graphs) 17 | 18 | # import major libraries 19 | import numpy as np 20 | import time 21 | 22 | # local imports 23 | # import testR 24 | import cwpath, graphnet, strategy 25 | from graph_laplacian import construct_adjacency_list 26 | from test_graphnet import train_all, test_graphnet, get_lambda_max 27 | 28 | #------------------------------------------------------------------------------------------------------------- 29 | 30 | def profile_test_graphnet(): 31 | # get training data and constants 32 | Data = np.load("Data.npz") 33 | X = Data['X'][0:1000,:] 34 | Y = Data['Y'][0:1000] 35 | G = None 36 | lam_max = get_lambda_max(X,Y) 37 | cwpathtol = 1e-6 38 | 39 | # penalty grid values 40 | l1vec = np.linspace(0.95*lam_max, 0.0001*lam_max, num=100).tolist() 41 | results, problemkey = test_graphnet(X, Y, G, l1vec, 1e4, 1e6, 0.1, -999.0,initial=None,tol=cwpathtol,scipy_compare=False) 42 | 43 | def profile_cwpath_robust_graphnet(): 44 | # get training data and constants 45 | Data = np.load("Data.npz") 46 | X = Data['X'][0:1000,:] 47 | print "Data matrix size:",X.shape 48 | Y = Data['Y'][0:1000] 49 | nx = np.sqrt(X.shape[1]) 50 | ny = np.sqrt(X.shape[1]) 51 | A = construct_adjacency_list(nx,ny,1) 52 | lam_max = get_lambda_max(X,Y) 53 | tol = 1e-6 54 | initial=None 55 | 56 | # choose penalty grid 57 | l1 = np.linspace(4*lam_max, 0.2*lam_max, num=100) 58 | l2 = 100. 59 | l3 = 1000. 60 | delta = 1.0 61 | 62 | # setup problem 63 | problemtype = graphnet.RobustGraphNet 64 | problemkey = 'RobustGraphNet' 65 | print "Robust GraphNet with penalties (l1, l2, l3, delta)", l1, l2, l3, delta 66 | l = cwpath.CoordWise((X, Y, A), problemtype, initial_coefs=initial) #,initial_coefs=np.array([14.]*10)) 67 | l.problem.assign_penalty(path_key='l1',l1=l1,l2=l2,l3=l3,delta=delta) 68 | coefficients, residuals = l.fit(tol=tol, initial=initial) 69 | 70 | 71 | 72 | if __name__ == "__main__": 73 | 74 | # cProfile.runctx("train_all()", globals(), locals(), "Profile.prof") 75 | cProfile.runctx("profile_cwpath_robust_graphnet()", globals(), locals(), "Profile.prof") 76 | 77 | s = pstats.Stats("Profile.prof") 78 | s.strip_dirs().sort_stats("time").print_stats() 79 | -------------------------------------------------------------------------------- /optimization/cwpath/tests/sandbox.py: -------------------------------------------------------------------------------- 1 | import rpy 2 | import numpy as np 3 | 4 | import cwpath, lasso 5 | import pylab 6 | reload(cwpath) 7 | 8 | rpy.r("library(lars)") 9 | rpy.r("data(diabetes)") 10 | X = rpy.r("diabetes$x") 11 | Y = rpy.r("diabetes$y") 12 | 13 | c = cwpath.CoordWise((X,Y), lasso.Lasso) 14 | p = c.problem.penalty 15 | p.value = 1.e+03 16 | 17 | c.fit(1.0) 18 | p.value = 1.0e+03 19 | print c.current 20 | 21 | c.path() 22 | 23 | def plot_path(c): 24 | b = np.asarray([a[1][0] for a in c.results]) 25 | l1 = np.sum(np.fabs(b), axis=1) 26 | for i in range(10): 27 | pylab.scatter(l1, b[:,i]) 28 | pylab.show() 29 | print b.shape 30 | return b 31 | 32 | b = plot_path(c) 33 | 34 | 35 | -------------------------------------------------------------------------------- /optimization/cwpath/tests/test.R: -------------------------------------------------------------------------------- 1 | ######################## 2 | ## Libraries ## 3 | ######################## 4 | 5 | library(MASS) 6 | library(lars) 7 | library(elasticnet) 8 | 9 | ######################## 10 | ## Functions ## 11 | ######################## 12 | 13 | "pospart" <- function(a) { 14 | if(a > 0) {return(a)} else {return(0)} 15 | } 16 | 17 | ######################## 18 | ## Data ## 19 | ######################## 20 | 21 | data(diabetes) 22 | X <- diabetes$x 23 | X <- as.matrix(X) 24 | X <- apply(X,2,scale) 25 | Y <- diabetes$y 26 | Y <- scale(matrix(Y), center=T, scale=F) 27 | 28 | ######################## 29 | ## Parameters ## 30 | ######################## 31 | 32 | cwenet <- function(X, Y, tol, l1, l2) { 33 | n <- dim(X)[1] 34 | p <- dim(X)[2] 35 | 36 | # Regularization and Convergence Params # 37 | 38 | ######################## 39 | ## LASSO ## 40 | ######################## 41 | 42 | y <- matrix(Y) 43 | y <- scale(y, center=T, scale=F) 44 | X <- as.matrix(X) 45 | X <- apply(X,2,scale) 46 | 47 | # Initialize Betas # 48 | b <- b_old <- numeric(p) 49 | 50 | # Coordinate-wise Fit # 51 | i <- 0 52 | del = 1 53 | while(abs(del) > tol) { 54 | i <- i+1 55 | b_old <- rbind(b_old, b) 56 | for(j in 1:p) { 57 | rj <- y - X[,-j]%*%b[-j] 58 | S <- t(X[,j])%*%rj 59 | b[j] <- (1/n)*(sign(S)*pospart(abs(S) - l1)) 60 | } 61 | del <- abs(sum(b-b_old[i,]))/length(b) 62 | } 63 | return(b) 64 | } 65 | 66 | l <- lars(X, Y, type='lasso') 67 | lc <- predict(l, s=88, mode='norm', type='coef') 68 | print(sum(abs(lc$coef))) 69 | -------------------------------------------------------------------------------- /optimization/cwpath/tests/testR.py: -------------------------------------------------------------------------------- 1 | # cython functions are not yet seen as packages. import path. 2 | import sys, os 3 | path_to_cython_packages = os.path.abspath('../.') 4 | print path_to_cython_packages 5 | sys.path.append(path_to_cython_packages) 6 | 7 | # R imports 8 | #import rpy2.rpy_classic as rpy 9 | #rpy.set_default_mode(NO_CONVERSION) 10 | import rpy2 11 | from rpy2 import robjects as rpy 12 | 13 | # major python libraries 14 | import numpy as np 15 | 16 | # local imports 17 | import cwpath, graphnet #, lasso 18 | 19 | class AlternativeConvergence(graphnet.Lasso): 20 | """ 21 | Use same convergence criterion as in R script 22 | """ 23 | tol = 1.0e-10 24 | 25 | def Rfit(self, penalty=None): 26 | penalty = penalty or self.penalty 27 | rpy.r.assign('X', self.X.ravel()) 28 | rpy.r("X = matrix(X, %d, %d, byrow=T)" % self.X.shape) 29 | rpy.r.assign('Y', self.Y) 30 | rpy.r.assign('tol', self.tol) 31 | rpy.r.assign('p', penalty) 32 | return rpy.r("cwenet(X, Y, tol, p, 0)") 33 | 34 | def stop(self, previous, tol=None): 35 | """ 36 | Uses l1 convergence criteria for coefficients 37 | """ 38 | 39 | bold, rold = previous 40 | bcurrent, rcurrent = self.output() 41 | err = np.fabs(bold - bcurrent).sum() / bold.shape[0] 42 | 43 | if err < self.tol: 44 | return True 45 | 46 | return False 47 | 48 | # Preamble for R 49 | 50 | data = {} 51 | def setup(): 52 | rpy.r(''' 53 | 54 | ######################## 55 | ## Libraries ## 56 | ######################## 57 | 58 | library(MASS) 59 | library(lars) 60 | library(elasticnet) 61 | 62 | ######################## 63 | ## Functions ## 64 | ######################## 65 | 66 | "pospart" <- function(a) { 67 | if(a > 0) {return(a)} else {return(0)} 68 | } 69 | 70 | ######################## 71 | ## Data ## 72 | ######################## 73 | 74 | data(diabetes) 75 | X <- diabetes$x 76 | X <- as.matrix(X) 77 | X <- apply(X,2,scale) 78 | Y <- diabetes$y 79 | Y <- scale(matrix(Y), center=T, scale=F) 80 | 81 | ######################## 82 | ## Parameters ## 83 | ######################## 84 | 85 | cwenet <- function(X, Y, tol, l1, l2) { 86 | n <- dim(X)[1] 87 | p <- dim(X)[2] 88 | 89 | # Regularization and Convergence Params # 90 | 91 | ######################## 92 | ## LASSO ## 93 | ######################## 94 | 95 | y <- matrix(Y) 96 | y <- scale(y, center=T, scale=F) 97 | X <- as.matrix(X) 98 | X <- apply(X,2,scale) 99 | 100 | # Initialize Betas # 101 | b <- b_old <- numeric(p) 102 | 103 | # Coordinate-wise Fit # 104 | i <- 0 105 | del = 1 106 | while(abs(del) > tol) { 107 | i <- i+1 108 | b_old <- rbind(b_old, b) 109 | for(j in 1:p) { 110 | rj <- y - X[,-j]%*%b[-j] 111 | S <- t(X[,j])%*%rj 112 | b[j] <- (1/(n-1))*(sign(S)*pospart(abs(S) - l1)) 113 | } 114 | del <- abs(sum(b-b_old[i,]))/length(b) 115 | } 116 | return(b) 117 | } 118 | ''') 119 | 120 | data['Y'] = np.asarray(rpy.r("Y")) 121 | data['Y'] = data['Y'].reshape((data['Y'].shape[0],)) 122 | data['X'] = np.asarray(rpy.r("X")) 123 | 124 | def test_Renet(): 125 | raise ValueError('write a test using ENet!') 126 | 127 | def test_R(): 128 | X = data['X'] 129 | Y = data['Y'] 130 | l = cwpath.CoordWise((X, Y), AlternativeConvergence) 131 | p = l.problem.penalty 132 | l1 = 1000 133 | l.problem.penalty = l1 / np.sqrt(X.shape[0]) 134 | 135 | l.fit(l.problem.penalty) 136 | print l.current[0], l.problem.Rfit(l.problem.penalty) 137 | assert np.allclose(l.current[0], l.problem.Rfit(l.problem.penalty)) 138 | 139 | def test_final(): 140 | X = data['X'] 141 | Y = data['Y'] 142 | l = cwpath.CoordWise((X, Y), AlternativeConvergence) 143 | l.problem.penalty = 0 144 | l.tol = 1.0e-14 145 | l.fit() 146 | b = np.dot(np.linalg.pinv(l.problem.X), l.problem.Y) 147 | assert(np.allclose(l.problem.beta, b)) 148 | assert(np.allclose(l.problem.Rfit(l.problem.penalty), b)) 149 | -------------------------------------------------------------------------------- /optimization/cwpath/tests/test_graphnet.py: -------------------------------------------------------------------------------- 1 | # cython functions are not yet seen as packages. import path. 2 | import sys, os, time 3 | path_to_cython_packages = os.path.abspath('../.') 4 | sys.path.append(path_to_cython_packages) 5 | 6 | # scons still building packges funny, get path to graphs 7 | path_to_graphs = os.path.abspath('../../graphs/.') 8 | sys.path.append(path_to_graphs) 9 | 10 | # import major libraries 11 | import numpy as np 12 | import scipy.optimize 13 | from nose.tools import * 14 | import time 15 | #import h5py 16 | 17 | # for plotting 18 | import matplotlib 19 | matplotlib.use('agg') 20 | import pylab as pl 21 | pl.ion() 22 | 23 | # local imports 24 | # import testR 25 | import cwpath, graphnet, strategy 26 | from graph_laplacian import construct_adjacency_list 27 | 28 | # functions 29 | from numpy.core.umath_tests import inner1d 30 | from multiprocessing import Pool 31 | import pp 32 | 33 | # setup some R stuff 34 | def setup(): 35 | testR.setup() 36 | 37 | #------------------------------------------------------------------------------------------------------# 38 | # Run all GraphNet tests 39 | 40 | def train_all(num_l1_steps=100,test_imap=False,test_pp=True): 41 | # get training data and constants 42 | Data = np.load("Data.npz") 43 | X = Data['X'][0:1000,:] 44 | Y = Data['Y'][0:1000] 45 | G = [None] 46 | lam_max = get_lambda_max(X,Y) 47 | cwpathtol = 1e-6 48 | 49 | # penalty grid values 50 | l1vec = np.linspace(0.95*lam_max, 0.0001*lam_max, num=num_l1_steps).tolist() 51 | 52 | # # test grid 53 | l2vec = [0.0, 1e6] 54 | l3vec = [1e6, 1e12] # [0.0, 100, 1e6] 55 | deltavec = [-999.0, 0.1] #, 0.2, 0.3, 0.5, 1.0, 1e10] #, 0.1, 0.5, 1., 1e10] 56 | svmdeltavec = [-999.0] #, 1] 57 | 58 | # big grid 59 | # l2vec = [0.0, 1.0, 10, 100, 1000, 1e4, 1e6, 1e8] 60 | # l3vec = [0.0, 1.0, 10, 100, 1000, 1e4, 1e6, 1e8] 61 | # deltavec = [-999.0, 0.25, 0.5, 1.0, 10, 100] 62 | # svmdeltavec = [-999.0, 0.25, 0.5, 1.0, 10, 100] 63 | 64 | # construct parameter grid 65 | penalties = [] 66 | for l2 in l2vec: 67 | for l3 in l3vec: 68 | for delta in deltavec: 69 | for svmdelta in svmdeltavec: 70 | penalties.append((l2,l3,delta,svmdelta)) 71 | 72 | # construct problems 73 | problems = ( [ ( X, Y, G, 74 | penalties[t], 75 | num_l1_steps, 76 | lam_max, 77 | t ) for t in range( len(penalties) ) ] ) 78 | # test imap 79 | if test_imap: 80 | in_tuple = (X,Y,G,(100,1e10,-999.0,-999.0),500,lam_max,80) 81 | out_tuple = _graphnet_imap( in_tuple ) 82 | results = out_tuple[3] 83 | coefs = results[0][499] 84 | pl.imsave('imtest.png',coefs.reshape((60,60),order='F')) 85 | #1/0 86 | 87 | # run problems in parallel with imap 88 | pool = Pool() 89 | results = pool.imap( _graphnet_imap, problems ) 90 | 91 | # write results to h5 file 92 | outfile = h5py.File("Grid_output.h5",'w') 93 | for r in results: 94 | if r[0] not in outfile.keys(): 95 | outfile.create_group(r[0]) # group by problem type 96 | if str(r[1]) not in outfile[r[0]].keys(): 97 | outfile[r[0]].create_group(str(r[1])) # group by penalty tuple 98 | outfile[r[0]][str(r[1])]['params'] = np.array(r[1]) 99 | outfile[r[0]][str(r[1])]['l1vec'] = np.array(r[2]) 100 | outfile[r[0]][str(r[1])]['coefficients'] = np.array(r[3][0]) 101 | outfile[r[0]][str(r[1])]['residuals'] = np.array(r[3][1]) 102 | print "Mean and median residuals:", np.mean(r[3][1]), np.median(r[3][1]) 103 | # TODO: save parameter grid to hdf5 file 104 | outfile.close() 105 | 106 | if test_pp: 107 | import pp 108 | job_server = pp.Server() 109 | jobs = [(in_tuple, job_server.submit(_graphnet_pp,in_tuple, (test_graphnet, get_lambda_max,construct_adjacency_list), ("numpy as np","time","graphnet","cwpath"))) for in_tuple in problems] 110 | print "Running jobs!" 111 | 112 | for job in jobs: 113 | print job[1]() 114 | 115 | print "\n\n Congratulations - nothing exploded!" 116 | 117 | def validate_all(h5file): 118 | # get validation data 119 | Data = np.load("Data.npz") 120 | X = Data['X'][1000:2000,:] 121 | Y = Data['Y'][1000:2000] 122 | y = Y.copy() 123 | y[Y>0] = 1.0 124 | y[Y<=0] = -1.0 125 | y.shape = (1000,1) 126 | 127 | # # big grid 128 | # l1_len = 100 129 | # l2vec = [0.0, 1.0, 10, 100, 1000, 1e4, 1e5, 1e6, 1e7, 1e8] 130 | # l3vec = [0.0, 1.0, 10, 100, 1000, 1e4, 1e5, 1e6, 1e7, 1e8] 131 | # deltavec = [0.0, 0.1, 0.25, 0.5, 0.75, 1.0, 2, 10, 100] 132 | # svmdeltavec = [0.0, 0.1, 0.25, 0.5, 0.75, 1.0, 2, 10, 100] 133 | 134 | # test grid 135 | l2vec = [0.0, 1e6] 136 | l3vec = [0.0, 1e6] 137 | deltavec = [0.0, 0.01, 0.04, 1] 138 | svmdeltavec = [0.0, 0.4, 1] 139 | 140 | rate_arr_dims = (l1_len, len(l2vec), len(l3vec), len(deltavec), len(svmdeltavec)) 141 | 142 | fits = h5py.File(h5file) 143 | for model in fits.keys(): 144 | current_model = fits[model] 145 | rate_arr = np.zeros(rate_arr_dims) 146 | for params in current_model.keys(): 147 | print model, params 148 | current_fit = current_model[params] 149 | coefs = np.array(current_fit['coefficients']).T 150 | params = np.array(current_fit['params']) 151 | if model == 'HuberSVMGraphNet': 152 | coefs = coefs[1::,:] 153 | preds = np.dot(X,coefs) 154 | preds[preds>0] = 1.0 155 | preds[preds<=0] = -1.0 156 | errs = y-preds != 0.0 157 | err_rates = np.sum(errs,axis=0)/1000.0 158 | #1/0 159 | fits[model+'/'+str(tuple(params.tolist()))+'/'+'err_rates'] = err_rates 160 | print '\t---> Best rate:', 1.-np.min(err_rates) 161 | rate_arr[:,np.where(params[0]==l2vec)[0][0],np.where(params[1]==l3vec)[0][0],np.where(params[2]==deltavec)[0][0],np.where(params[3]==svmdeltavec)[0][0]] = err_rates 162 | fits[model+'/'+'err_rate_array'] = rate_arr 163 | fits.close() 164 | print "Error rates on validation data have been added to file." 165 | 166 | #------------------------------------------------------------------------------------------------------# 167 | # Wrapper for running GraphNet problems using multiprocessing 168 | 169 | def _graphnet_imap( in_tuple ): 170 | """ 171 | Run a graphnet model for a particular tuple (X,Y,G,(l2,l3,delta,svmdelta),num_l1,lam_max) 172 | for a grid of l1 parameters. 173 | """ 174 | X = in_tuple[0] 175 | Y = in_tuple[1] 176 | G = in_tuple[2][0] 177 | pen = in_tuple[3] 178 | num_l1 = in_tuple[4] 179 | lam_max = in_tuple[5] 180 | 181 | l2 = in_tuple[3][0] 182 | l3 = in_tuple[3][1] 183 | delta = in_tuple[3][2] 184 | svmdelta = in_tuple[3][3] 185 | 186 | lam_max = get_lambda_max(X,Y) 187 | l1vec = np.linspace(0.95*lam_max, 0.2*lam_max, num=num_l1) 188 | cwpathtol = 1e-6 189 | 190 | results, problemkey = test_graphnet(X,Y,G,l1vec,l2,l3,delta,svmdelta,initial=None,tol=cwpathtol,scipy_compare=False) 191 | return (problemkey, pen, l1vec, results) 192 | 193 | #------------------------------------------------------------------------------------------------------# 194 | # Wrapper for running GraphNet problems using parallel python 195 | 196 | def _graphnet_pp( X,Y,G,pen,num_l1,lam_max,problemkey ): 197 | """ 198 | Run a graphnet model for a particular tuple (X,Y,G,(l2,l3,delta,svmdelta),num_l1,lam_max) 199 | for a grid of l1 parameters. 200 | """ 201 | G = G[0] 202 | l2 = pen[0] 203 | l3 = pen[1] 204 | delta = pen[2] 205 | svmdelta = pen[3] 206 | l1vec = np.linspace(0.95*lam_max, 0.2*lam_max, num=num_l1) 207 | cwpathtol = 1e-6 208 | 209 | return problemkey, test_graphnet(X,Y,G,l1vec,l2,l3,delta,svmdelta,initial=None,tol=cwpathtol,scipy_compare=False) 210 | # return results, problemkey #= test_graphnet(X,Y,G,l1vec,l2,l3,delta,svmdelta,initial=None,tol=cwpathtol,scipy_compare=False) 211 | # return (problemkey, pen, l1vec, results) 212 | 213 | #------------------------------------------------------------------------------------------------------# 214 | # Main Graphnet problem testing function 215 | 216 | def test_graphnet(X,Y,G=None,l1=500.,l2=-999.0,l3=-999.0,delta=-999.0,svmdelta=-999.0,initial=None,adaptive=False,svm=False,scipy_compare=True,tol=1e-5): 217 | tic = time.clock() 218 | # Cases set based on parameters and robust/adaptive/svm flags 219 | if l2 != -999.0 or l3 != -999.0 or delta != -999.0 or svmdelta != -999.0: 220 | if l3 != -999.0 or delta != -999.0 or svmdelta != -999.0: 221 | if G is None: 222 | nx = 60 223 | ny = 60 224 | A, Afull = construct_adjacency_list(nx,ny,1,return_full=True) 225 | # A, Afull = gen_adj(X.shape[1]) 226 | else: 227 | A = G.copy() 228 | if delta != -999.0: 229 | if svmdelta != -999.0: 230 | print "-------------------------------------------HUBER SVM---------------------------------------------------" 231 | problemtype = "HuberSVMGraphNet" 232 | problemkey = "HuberSVMGraphNet" 233 | print "HuberSVM GraphNet with penalties (l1,l2,l3,delta):", l1, l2, l3, delta 234 | Y = 2*np.round(np.random.uniform(0,1,len(Y)))-1 235 | l = cwpath.CoordWise((X, Y, A), graphnet.GraphSVM) #, initial_coefs=10.*np.array(range(11)*1)) 236 | l.problem.assign_penalty(path_key='l1',l1=l1,l2=l2,l3=l3,delta=delta) 237 | else: 238 | print "----------------------------------------ROBUST GRAPHNET------------------------------------------------" 239 | problemtype = graphnet.RobustGraphNet 240 | problemkey = 'RobustGraphNet' 241 | print "Robust GraphNet with penalties (l1, l2, l3, delta)", l1, l2, l3, delta 242 | l = cwpath.CoordWise((X, Y, A), problemtype, initial_coefs = initial) #,initial_coefs=np.array([14.]*10)) 243 | l.problem.assign_penalty(path_key='l1',l1=l1,l2=l2,l3=l3,delta=delta) 244 | else: 245 | print "-------------------------------------------GRAPHNET---------------------------------------------------" 246 | problemtype = graphnet.NaiveGraphNet 247 | problemkey = 'NaiveGraphNet' 248 | print "Testing GraphNet with penalties (l1,l2,l3):", l1,l2,l3 249 | l = cwpath.CoordWise((X, Y, A), problemtype, initial_coefs = initial) 250 | l.problem.assign_penalty(path_key='l1',l1=l1, l2=l2, l3=l3) 251 | else: 252 | print "-------------------------------------------ELASTIC NET---------------------------------------------------" 253 | problemtype = graphnet.NaiveENet 254 | problemkey = 'NaiveENet' 255 | print "Testing ENET with penalties (l1,l2):", l1, l2 256 | l = cwpath.CoordWise((X, Y), problemtype, initial_coefs = initial) #, initial_coefs = np.array([4.]*10)) 257 | l.problem.assign_penalty(path_key='l1',l1=l1, l2=l2) 258 | else: 259 | print "-------------------------------------------LASSO---------------------------------------------------" 260 | problemtype = graphnet.Lasso 261 | problemkey = 'Lasso' 262 | print "Testing LASSO with penalty:", l1 263 | l = cwpath.CoordWise((X, Y), problemtype, initial_coefs = initial) #, initial_coefs= np.array([7.]*10)) 264 | l.problem.assign_penalty(path_key='l1',l1=l1) 265 | 266 | # fit and get results 267 | coefficients, residuals = l.fit(tol=tol, initial=initial) 268 | print "\t---> Fitting GraphNet problem with coordinate descent took:", time.clock()-tic, "seconds." 269 | 270 | if adaptive: 271 | betas = coefficients 272 | tic = time.clock() 273 | eps = 1e-5 274 | l1weights = 1./(beta + eps) 275 | l = cwpath.CoordWise((X, Y, A), problemtype, initial_coefs = initial) 276 | l.problem.assign_penalty(l1=l1,l2=l2,l3=l3,delta=delta,l1weights=l1weights,newl1=l1) 277 | adaptive_coefficients, adaptive_residuals = l.fit(tol=tol, initial=initial) 278 | print "\t---> Fitting Adaptive GraphNet problem with coordinate descent took:", time.clock()-tic, "seconds." 279 | 280 | # if compare to scipy flag is set, 281 | # compare the above result with the same problem 282 | # solved using a built in scipy solver (fmin_powell). 283 | if scipy_compare: 284 | print "\t---> Fitting with scipy for comparison..." 285 | tic = time.clock() 286 | l1 = l1[-1] # choose only last l1 value 287 | beta = coefficients[-1] # coordinate-wise coefficients 288 | if l2 != -999.0 or l3 != -999.0 or delta != -999.0 or svmdelta != -999.0: 289 | if l3 != -999.0 or delta != -999.0 or svmdelta != -999.0: 290 | if delta != -999.0: 291 | if adaptive: 292 | if svmdelta != -999.0: 293 | # HuberSVM Graphnet 294 | Xp2 = np.hstack([np.ones(X.shape[0])[:,np.newaxis],X]) 295 | def f(beta): 296 | ind = range(1,len(beta)) 297 | return huber_svm_error(beta,Y,Xp2,delta).sum() + np.fabs(beta[ind]).sum()*l1 + l2 * np.linalg.norm(beta[ind])**2/2 + l3 * np.dot(beta[ind], np.dot(Afull, beta[ind]))/2 298 | else: 299 | # Robust Adaptive Graphnet 300 | def f(beta): 301 | return huber(Y - np.dot(X, beta),delta).sum()/2 + l1*np.dot(np.fabs(beta),l1weights) + l2*np.linalg.norm(beta)**2/2 + l3*np.dot(beta, np.dot(Afull, beta))/2 302 | else: 303 | # Robust Graphnet 304 | def f(beta): 305 | try: 306 | return huber(Y - np.dot(X, beta.T),delta).sum()/2 + np.fabs(beta).sum()*l1 + l2 * np.linalg.norm(beta)**2/2 + l3 * np.dot(beta, np.dot(Afull, beta.T))/2 307 | except: 308 | return huber(Y - np.dot(X, beta),delta).sum()/2 + np.fabs(beta).sum()*l1 + l2 * np.linalg.norm(beta)**2/2 + l3 * np.dot(beta, np.dot(Afull, beta).T)/2 309 | else: 310 | # Graphnet 311 | def f(beta): 312 | return np.linalg.norm(Y - np.dot(X, beta))**2/2 + np.fabs(beta).sum()*l1 + l2 * np.linalg.norm(beta)**2/2 + l3 * np.dot(beta, np.dot(Afull, beta))/2 313 | else: 314 | # Elastic Net 315 | def f(beta): 316 | return np.linalg.norm(Y - np.dot(X, beta))**2/2 + np.fabs(beta).sum()*l1 + l2 * np.linalg.norm(beta)**2/2 317 | else: 318 | # Lasso 319 | def f(beta): 320 | return np.linalg.norm(Y - np.dot(X, beta))**2/2 + np.fabs(beta).sum()*l1 321 | # optimize 322 | if problemkey == 'HuberSVMGraphNet': 323 | v = scipy.optimize.fmin_powell(f, np.zeros(Xp2.shape[1]), ftol=1.0e-14, xtol=1.0e-14, maxfun=100000) 324 | else: 325 | v = scipy.optimize.fmin_powell(f, np.zeros(X.shape[1]), ftol=1.0e-10, xtol=1.0e-10,maxfun=100000) 326 | v = np.asarray(v) 327 | print "\t---> Fitting GraphNet with scipy took:", time.clock()-tic, "seconds." 328 | 329 | # print np.round(100*v)/100,'\n', np.round(100*beta)/100 330 | assert_true(np.fabs(f(v) - f(beta)) / np.fabs(f(v) + f(beta)) < tol) 331 | if np.linalg.norm(v) > 1e-8: 332 | assert_true(np.linalg.norm(v - beta) / np.linalg.norm(v) < tol) 333 | else: 334 | assert_true(np.linalg.norm(beta) < 1e-8) 335 | 336 | print "\t---> Coordinate-wise and Scipy optimization agree!" 337 | 338 | return (coefficients, residuals), problemkey 339 | 340 | #------------------------------------------------------------------------------------------------------# 341 | # Adjacency matrix functions 342 | 343 | def adj_array_as_list(adj): 344 | # Now create the adjacency list 345 | v = [] 346 | for a in adj: 347 | v.append(a[np.greater(a, -1)]) 348 | return v 349 | 350 | def gen_adj(p): 351 | Afull = np.zeros((p,p),dtype=int) 352 | A = - np.ones((p,p),dtype=int) 353 | counts = np.zeros(p) 354 | for i in range(p): 355 | for j in range(p): 356 | if np.random.uniform(0,1) < 0.3: 357 | if i != j: 358 | if Afull[i,j] == 0: 359 | Afull[i,j] = -1 360 | Afull[j,i] = -1 361 | Afull[i,i] += 1 362 | Afull[j,j] += 1 363 | A[i,counts[i]] = j 364 | A[j,counts[j]] = i 365 | counts[i] += 1 366 | counts[j] += 1 367 | return adj_array_as_list(A), Afull 368 | 369 | #------------------------------------------------------------------------------------------------------# 370 | # For finding starting lambda 371 | def get_lambda_max(X,y): 372 | """ 373 | Find the value of lambda at which all coefficients are set to zero 374 | by finding the minimum value such that 0 is in the subdifferential 375 | and the coefficients are all zero. 376 | """ 377 | subgrads = np.fabs( inner1d(X.T, y)) 378 | return np.max( subgrads ) 379 | 380 | #------------------------------------------------------------------------------------------------------# 381 | # Some loss functions for tests 382 | 383 | def huber(r,delta): 384 | r = np.fabs(r) 385 | t = np.greater(r, delta) 386 | return (1-t)*r**2 + t*(2*delta*r - delta**2) 387 | 388 | def huber_svm(r,delta): 389 | t1 = np.greater(r, delta) 390 | t2 = np.greater(r,0) 391 | return t1*(r - delta/2) + (1-t1)*t2*(r**2/(2*delta)) 392 | 393 | def huber_svm_error(beta,Y,Xp2,delta): 394 | r = 1-Y*np.dot(Xp2,beta) 395 | return huber(r,delta) 396 | 397 | #-------------------------------------------------------------------------------------------------------------------# 398 | # plotting functions 399 | 400 | def plot_coefficient_images(h5file, output_dir, data_file='Data.npz', x=None, y=None,problemtype="RobustGraphNet"): 401 | """ 402 | Iterate through hdf5 file of fits, plotting the coefficients as images and slices of images. 403 | """ 404 | # get ground truth 405 | Data = np.load(data_file) 406 | true_im = Data['sig_im'] 407 | 408 | # get fit results 409 | f = h5py.File(h5file,'r') 410 | results = f[problemtype] 411 | 412 | # make appropriate directories for saving images 413 | if not os.path.isdir(output_dir): 414 | os.makedirs(output_dir) 415 | for k in results.keys(): 416 | local_dir = output_dir + k 417 | if not os.path.isdir(local_dir): 418 | os.makedirs(local_dir) 419 | os.makedirs(local_dir + "/slice_plots/") 420 | # get coefficients and l1 values 421 | solution = results[k+'/coefficients'].value 422 | l1_path= results[k+'/l1vec'].value 423 | if x is None and y is None: 424 | x = np.sqrt(solution.shape[1]) 425 | y = x # image is square 426 | # make plots 427 | for i in xrange(solution.shape[0]): 428 | im = solution[i,:].reshape((x,y),order='F') 429 | pl.imsave(local_dir + "/l1=" + str(l1_path[i]) + ".png", im) 430 | print "\t---> Saved coefficient image", i 431 | plot_image_slice(im, true_im, x_slice=45, out_path=local_dir+"/slice_plots/l1="+str(l1_path[i])+".png") 432 | print "\t---> Saved coefficient image slice", i 433 | 434 | def plot_image_slice(im, true_im, x_slice, out_path): 435 | im_slice = im[x_slice,:] 436 | true_im_slice = true_im[x_slice,:] 437 | pl.clf() 438 | pl.plot(im_slice) 439 | pl.plot(true_im_slice,'r--') 440 | pl.savefig(out_path) 441 | 442 | #-------------------------------------------------------------------------------------------------------------------# 443 | 444 | if __name__ == "__main__": 445 | pass 446 | 447 | #EOF 448 | -------------------------------------------------------------------------------- /optimization/graphs/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /optimization/graphs/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logang/neuroparser/835fc7b5b6e2d2ce47fd286a498fdecb0144d4d2/optimization/graphs/__init__.pyc -------------------------------------------------------------------------------- /optimization/graphs/graph_laplacian.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def construct_adjacency_list(nx,ny,nz,return_full=False): 4 | """ 5 | Generate and return a list of lists for which the ith list contains the indices of the 6 | adjacent voxels. 7 | """ 8 | nvoxels = nx*ny*nz 9 | 10 | from scipy.sparse import coo_matrix 11 | 12 | y_coords = np.reshape(np.tile(np.arange(ny, dtype=np.int32), (nx*nz, 1)), (nvoxels), order='f') 13 | x_coords = np.reshape(np.tile(np.reshape(np.tile(np.arange(nx, dtype=np.int32), (nz, 1)), 14 | (nz*nx), order='f'), (ny, 1)), (nx*ny*nz)) 15 | z_coords = np.tile(np.arange(nz, dtype=np.int32), (nx*ny)) 16 | 17 | F = coo_matrix((nvoxels, nvoxels), dtype=np.float32) 18 | diag_coords = y_coords*nx*nz + x_coords*nz + z_coords 19 | 20 | # Form the z+1 difference entries 21 | valid_idxs = np.nonzero(z_coords+1 < nz) 22 | diff_coords = y_coords*nx*nz + x_coords*nz + (z_coords+1) 23 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diff_coords[valid_idxs])), 24 | shape = (nvoxels, nvoxels), dtype=np.float32) 25 | 26 | # Form the z-1 difference entries 27 | valid_idxs = np.nonzero(z_coords-1 >= 0) 28 | diff_coords = y_coords*nx*nz + x_coords*nz + (z_coords-1) 29 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diff_coords[valid_idxs])), 30 | shape = (nvoxels, nvoxels), dtype=np.float32) 31 | 32 | # Form the x+1 difference entries 33 | valid_idxs = np.nonzero(x_coords+1 < nx) 34 | diff_coords = y_coords*nx*nz + (x_coords+1)*nz + z_coords 35 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diff_coords[valid_idxs])), 36 | shape = (nvoxels, nvoxels), dtype=np.float32) 37 | 38 | # Form the x-1 difference entries 39 | valid_idxs = np.nonzero(x_coords-1 >= 0) 40 | diff_coords = y_coords*nx*nz + (x_coords-1)*nz + z_coords 41 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diff_coords[valid_idxs])), 42 | shape = (nvoxels, nvoxels), dtype=np.float32) 43 | 44 | # Form the y+1 difference entries 45 | valid_idxs = np.nonzero(y_coords+1 < ny) 46 | diff_coords = (y_coords+1)*nx*nz + x_coords*nz + z_coords 47 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diff_coords[valid_idxs])), 48 | shape = (nvoxels, nvoxels), dtype=np.float32) 49 | 50 | # Form the y-1 difference entries 51 | valid_idxs = np.nonzero(y_coords-1 >= 0) 52 | diff_coords = (y_coords-1)*nx*nz + x_coords*nz + z_coords 53 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diff_coords[valid_idxs])), 54 | shape = (nvoxels, nvoxels), dtype=np.float32) 55 | 56 | A = F.tolil().rows.tolist() 57 | if return_full: 58 | return A, F.todense() 59 | else: 60 | return A 61 | 62 | def construct_laplacian_3D(vol): 63 | 64 | # Smoothing regularization. Compute the laplacian of the volume. 65 | lapvol = np.copy(vol) 66 | lapvol[0:ny-1,:,:] -= 1/6.0 * vol[1:ny ,:,:] 67 | lapvol[1:ny ,:,:] -= 1/6.0 * vol[0:ny-1,:,:] 68 | lapvol[:,0:nx-1,:] -= 1/6.0 * vol[:,1:nx ,:] 69 | lapvol[:,1:nx ,:] -= 1/6.0 * vol[:,0:nx-1,:] 70 | lapvol[:,:,0:nz-1] -= 1/6.0 * vol[:,:,1:nz ] 71 | lapvol[:,:,1:nz ] -= 1/6.0 * vol[:,:,0:nz-1] 72 | 73 | # Zero out laplacian around the edges. 74 | lapvol[0,:,:] = 0.0; 75 | lapvol[:,0,:] = 0.0; 76 | lapvol[:,:,0] = 0.0; 77 | lapvol[ny-1,:,:] = 0.0; 78 | lapvol[:,nx-1,:] = 0.0; 79 | lapvol[:,:,nz-1] = 0.0; 80 | 81 | def sparse_Laplacian_matrix(nx, ny, nz): 82 | ''' 83 | Builds a sparse, square matrix of local first differences, 84 | equivalent to the graph Laplacian (Degree - Adjacency) matrix. 85 | This is defined as: 86 | 87 | n for i==j where n is the number of voxels adjacent to x_i (node degree) 88 | F_ij = -1 for i \neq j but adjacent to j 89 | 0 otherwise 90 | ''' 91 | nvoxels = nx*ny*nz 92 | 93 | from scipy.sparse import coo_matrix 94 | 95 | y_coords = np.reshape(np.tile(np.arange(ny, dtype=np.int32), (nx*nz, 1)), (nvoxels), order='f') 96 | x_coords = np.reshape(np.tile(np.reshape(np.tile(np.arange(nx, dtype=np.int32), (nz, 1)), 97 | (nz*nx), order='f'), (ny, 1)), (nx*ny*nz)) 98 | z_coords = np.tile(np.arange(nz, dtype=np.int32), (nx*ny)) 99 | 100 | # Form the diagonal entries of F (should be equal to number of neighbors) 101 | diag_coords = y_coords*nx*nz + x_coords*nz + z_coords 102 | F = coo_matrix((np.ones(nvoxels)*6, (diag_coords, diag_coords)), 103 | shape = (nvoxels, nvoxels), dtype=np.float32) 104 | 105 | # Form the z+1 difference entries 106 | valid_idxs = np.nonzero(z_coords+1 < nz) 107 | diff_coords = y_coords*nx*nz + x_coords*nz + (z_coords+1) 108 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diff_coords[valid_idxs])), 109 | shape = (nvoxels, nvoxels), dtype=np.float32) 110 | 111 | # Form the z-1 difference entries 112 | valid_idxs = np.nonzero(z_coords-1 >= 0) 113 | diff_coords = y_coords*nx*nz + x_coords*nz + (z_coords-1) 114 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diff_coords[valid_idxs])), 115 | shape = (nvoxels, nvoxels), dtype=np.float32) 116 | 117 | # Form the x+1 difference entries 118 | valid_idxs = np.nonzero(x_coords+1 < nx) 119 | diff_coords = y_coords*nx*nz + (x_coords+1)*nz + z_coords 120 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diff_coords[valid_idxs])), 121 | shape = (nvoxels, nvoxels), dtype=np.float32) 122 | 123 | # Form the x-1 difference entries 124 | valid_idxs = np.nonzero(x_coords-1 >= 0) 125 | diff_coords = y_coords*nx*nz + (x_coords-1)*nz + z_coords 126 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diff_coords[valid_idxs])), 127 | shape = (nvoxels, nvoxels), dtype=np.float32) 128 | 129 | # Form the y+1 difference entries 130 | valid_idxs = np.nonzero(y_coords+1 < ny) 131 | diff_coords = (y_coords+1)*nx*nz + x_coords*nz + z_coords 132 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diff_coords[valid_idxs])), 133 | shape = (nvoxels, nvoxels), dtype=np.float32) 134 | 135 | # Form the y-1 difference entries 136 | valid_idxs = np.nonzero(y_coords-1 >= 0) 137 | diff_coords = (y_coords-1)*nx*nz + x_coords*nz + z_coords 138 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diff_coords[valid_idxs])), 139 | shape = (nvoxels, nvoxels), dtype=np.float32) 140 | 141 | # Fix edge coeffs so that the entire matrix, when multiplied by a vector of only ones, equals zero. 142 | 143 | # Fix z = 0 coeffs 144 | valid_idxs = np.nonzero(z_coords == 0) 145 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diag_coords[valid_idxs])), 146 | shape = (nvoxels, nvoxels), dtype=np.float32) 147 | 148 | # Fix z = nz coeffs 149 | valid_idxs = np.nonzero(z_coords == nz-1) 150 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diag_coords[valid_idxs])), 151 | shape = (nvoxels, nvoxels), dtype=np.float32) 152 | 153 | # Fix y = 0 coeffs 154 | valid_idxs = np.nonzero(y_coords == 0) 155 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diag_coords[valid_idxs])), 156 | shape = (nvoxels, nvoxels), dtype=np.float32) 157 | 158 | # Fix y = ny coeffs 159 | valid_idxs = np.nonzero(y_coords == ny-1) 160 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diag_coords[valid_idxs])), 161 | shape = (nvoxels, nvoxels), dtype=np.float32) 162 | 163 | # Fix x = 0 coeffs 164 | valid_idxs = np.nonzero(x_coords == 0) 165 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diag_coords[valid_idxs])), 166 | shape = (nvoxels, nvoxels), dtype=np.float32) 167 | 168 | # Fix x = nx coeffs 169 | valid_idxs = np.nonzero(x_coords == nx-1) 170 | F = F + coo_matrix((np.ones(len(valid_idxs[0]))*-1.0, (diag_coords[valid_idxs], diag_coords[valid_idxs])), 171 | shape = (nvoxels, nvoxels), dtype=np.float32) 172 | # Tests 173 | try: 174 | ones = np.ones((F.shape[0],)) 175 | Finner1 = (F*ones).reshape(ny,nx,nz) 176 | assert(np.sum(Finner1) == 0.0) 177 | except: 178 | print "First differences matrix has non-zero inner product with vector of ones!!!" 179 | try: 180 | ones = np.ones((F.shape[0],)) 181 | Finner1 = (F.T*ones).reshape(ny,nx,nz) 182 | assert(np.sum(Finner1) == 0.0) 183 | except: 184 | print "First differences matrix transpose has non-zero inner product with vector of ones!!!" 185 | 186 | return F 187 | 188 | if __name__ is '__main__': 189 | L = construct_adjacency_list(10,10,1) 190 | 1/0 191 | -------------------------------------------------------------------------------- /optimization/graphs/graph_laplacian.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logang/neuroparser/835fc7b5b6e2d2ce47fd286a498fdecb0144d4d2/optimization/graphs/graph_laplacian.pyc -------------------------------------------------------------------------------- /scons/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logang/neuroparser/835fc7b5b6e2d2ce47fd286a498fdecb0144d4d2/scons/__init__.py -------------------------------------------------------------------------------- /scons/cython.py: -------------------------------------------------------------------------------- 1 | 2 | """ Builders for Cython. 3 | 4 | This module will add the following builders to the environment : 5 | * Cython : compile .pyx files to .c files (e.g. env.Cython("foo.pyx")) 6 | * CythonModule : compile .pyx files to a shared library, loadable from 7 | Python (e.g. env.CythonModule("my_module", ["foo.pyx", "bar.pyx"])) 8 | """ 9 | 10 | import Cython.Compiler.Main 11 | from SCons.Builder import Builder 12 | 13 | def module_builder(env, module_name, source) : 14 | """ Pseudo-builder for a cython module. 15 | """ 16 | c_source = env.Cython(source) 17 | env.PythonModule(module_name, c_source) 18 | 19 | def exists(env): 20 | return env.Detect("cython") 21 | 22 | def generate(env): 23 | env["BUILDERS"]["Cython"] = Builder(action="cython $SOURCE", 24 | suffix = ".c", src_suffix = ".pyx") 25 | env.AddMethod(module_builder, "CythonModule") 26 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Imports 4 | import os, sys, string, numpy 5 | from setuptools import setup, find_packages 6 | 7 | from distutils.extension import Extension 8 | from Cython.Distutils import build_ext 9 | from Cython.Compiler import Main 10 | 11 | #-----------------------------------------------------------------------------# 12 | # Functions 13 | 14 | def read(fname): 15 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 16 | 17 | #-----------------------------------------------------------------------------# 18 | # setup call 19 | 20 | #ext_modules = [Extension("optimization.cwpath.lasso", ["./optimization/cwpath/lasso.pyx"],include_dirs=[numpy.get_include()])] 21 | ext_modules = [Extension("optimization.cwpath.graphnet", ["./optimization/cwpath/graphnet.pyx"],include_dirs=[numpy.get_include()])] 22 | ext_modules += [Extension("cwpath.graphnet", ["./optimization/cwpath/graphnet.pyx"],include_dirs=[numpy.get_include()])] 23 | ext_modules += [Extension("optimization.cwpath.regression", ["./optimization/cwpath/regression.pyx"],include_dirs=[numpy.get_include()])] 24 | ext_modules += [Extension("optimization.cwpath.cwpath", ["./optimization/cwpath/cwpath.pyx"],include_dirs=[numpy.get_include()])] 25 | 26 | setup( 27 | name = "Neuroparser", 28 | version = "0.1", 29 | packages = find_packages(), 30 | py_modules = ['optimization','gui','examples','optimization.cwpath','optimization.cwpath.graphnet'], 31 | cmdclass = {'build_ext' : build_ext }, 32 | ext_modules = ext_modules, 33 | # Project uses Numpy, Scipy, Matplotlib, h5py, multiprocessing 34 | install_requires = ['numpy>=1.3', 'scipy>=0.7', 'matplotlib>=0.99', 'h5py>=1.3', 'multiprocessing>=0.7'], 35 | 36 | # metadata for upload to PyPI 37 | author = "Logan Grosenick, Brad Klingenberg, Jonathan Taylor", 38 | author_email = "logang@gmail.com", 39 | description = "Neuroparser is a package for applying supervised and unsupervised statistical learning methods to large neuroimaging data.", 40 | long_description=read('README'), 41 | license = "PSF", 42 | keywords = ["fmri", "sparse", "structured", "multivariate", "calcium imaging", "neuronal dynamics"], 43 | url = "https://github.com/logang/neuroparser", 44 | 45 | classifiers=[ 46 | "Development Status :: Alpha", 47 | ], 48 | ) 49 | 50 | 51 | 52 | # import os, sys 53 | # import string 54 | 55 | # from Cython.Compiler import Main 56 | # from distutils.extension import Extension 57 | # from Cython.Distutils import build_ext 58 | 59 | # def cython_extension(srcfile): 60 | # options = Main.CompilationOptions(include_path=[os.path.join(os.path.abspath(os.path.dirname(__file__)), 'include')]) 61 | # Main.compile(srcfile, options=options) 62 | 63 | # def configuration(parent_package='',top_path=None): 64 | # from numpy.distutils.misc_util import Configuration 65 | # config = Configuration(None,parent_package,top_path) 66 | # config.add_subpackage('optimization/cwpath') 67 | # return config 68 | 69 | # if __name__ == '__main__': 70 | 71 | # # ext_modules = [Extension("optimization.cwpath.lasso", ["./optimization/cwpath/lasso.pyx"])] 72 | # ext_modules = [Extension("optimization.cwpath.graphnet", ["./optimization/cwpath/graphnet.pyx"])] 73 | # ext_modules += [Extension("optimization.cwpath.regression", ["./optimization/cwpath/regression.pyx"])] 74 | # ext_modules += [Extension("optimization.cwpath.cwpath", ["./optimization/cwpath/cwpath.pyx"])] 75 | 76 | # #cython_extension("optimization/cwpath/lasso.pyx") 77 | # # cython_extension("optimization/cwpath/graphnet.pyx") 78 | # # cython_extension("optimization/cwpath/regression.pyx") 79 | # # cython_extension("optimization/cwpath/cwpath.pyx") 80 | 81 | # from numpy.distutils.core import setup 82 | 83 | # c = configuration(top_path='', 84 | # ext_modules=ext_modules, 85 | # cmdclass = {'build_ext': build_ext} 86 | # ).todict() 87 | # setup(**c) 88 | -------------------------------------------------------------------------------- /site_scons/site_tools/cython.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tool to run Cython files (.pyx) into .c and .cpp. 3 | 4 | TODO: 5 | - Add support for dynamically selecting in-process Cython 6 | through CYTHONINPROCESS variable. 7 | - Have a CYTHONCPP option which turns on C++ in flags and 8 | changes output extension at the same time 9 | 10 | VARIABLES: 11 | - CYTHON - The path to the "cython" command line tool. 12 | - CYTHONFLAGS - Flags to pass to the "cython" command line tool. 13 | 14 | AUTHORS: 15 | - David Cournapeau 16 | - Dag Sverre Seljebotn 17 | 18 | """ 19 | import SCons 20 | from SCons.Builder import Builder 21 | from SCons.Action import Action 22 | 23 | #def cython_action(target, source, env): 24 | # print target, source, env 25 | # from Cython.Compiler.Main import compile as cython_compile 26 | # res = cython_compile(str(source[0])) 27 | 28 | cythonAction = Action("$CYTHONCOM") 29 | 30 | def create_builder(env): 31 | try: 32 | cython = env['BUILDERS']['Cython'] 33 | except KeyError: 34 | cython = SCons.Builder.Builder( 35 | action = cythonAction, 36 | emitter = {}, 37 | suffix = cython_suffix_emitter, 38 | single_source = 1) 39 | env['BUILDERS']['Cython'] = cython 40 | 41 | return cython 42 | 43 | def cython_suffix_emitter(env, source): 44 | return "$CYTHONCFILESUFFIX" 45 | 46 | def generate(env): 47 | env["CYTHON"] = "cython" 48 | env["CYTHONCOM"] = "$CYTHON $CYTHONFLAGS -o $TARGET $SOURCE" 49 | env["CYTHONCFILESUFFIX"] = ".c" 50 | 51 | c_file, cxx_file = SCons.Tool.createCFileBuilders(env) 52 | 53 | c_file.suffix['.pyx'] = cython_suffix_emitter 54 | c_file.add_action('.pyx', cythonAction) 55 | 56 | c_file.suffix['.py'] = cython_suffix_emitter 57 | c_file.add_action('.py', cythonAction) 58 | 59 | create_builder(env) 60 | 61 | def exists(env): 62 | try: 63 | # import Cython 64 | return True 65 | except ImportError: 66 | return False 67 | -------------------------------------------------------------------------------- /site_scons/site_tools/pyext.py: -------------------------------------------------------------------------------- 1 | """SCons.Tool.pyext 2 | 3 | Tool-specific initialization for python extensions builder. 4 | 5 | AUTHORS: 6 | - David Cournapeau 7 | - Dag Sverre Seljebotn 8 | 9 | """ 10 | 11 | # 12 | # __COPYRIGHT__ 13 | # 14 | # Permission is hereby granted, free of charge, to any person obtaining 15 | # a copy of this software and associated documentation files (the 16 | # "Software"), to deal in the Software without restriction, including 17 | # without limitation the rights to use, copy, modify, merge, publish, 18 | # distribute, sublicense, and/or sell copies of the Software, and to 19 | # permit persons to whom the Software is furnished to do so, subject to 20 | # the following conditions: 21 | # 22 | # The above copyright notice and this permission notice shall be included 23 | # in all copies or substantial portions of the Software. 24 | # 25 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 26 | # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 27 | # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 29 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 30 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 31 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 32 | # 33 | 34 | # modified 3/1/2013 by L. Grosenick. 35 | 36 | __revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__" 37 | 38 | import sys 39 | 40 | import SCons 41 | from SCons.Tool import SourceFileScanner, ProgramScanner 42 | 43 | # Create common python builders 44 | 45 | def createPythonObjectBuilder(env): 46 | """This is a utility function that creates the PythonObject Builder in an 47 | Environment if it is not there already. 48 | 49 | If it is already there, we return the existing one. 50 | """ 51 | 52 | try: 53 | pyobj = env['BUILDERS']['PythonObject'] 54 | except KeyError: 55 | pyobj = SCons.Builder.Builder(action = {}, 56 | emitter = {}, 57 | prefix = '$PYEXTOBJPREFIX', 58 | suffix = '$PYEXTOBJSUFFIX', 59 | src_builder = ['CFile', 'CXXFile'], 60 | source_scanner = SourceFileScanner, 61 | single_source = 1) 62 | env['BUILDERS']['PythonObject'] = pyobj 63 | 64 | return pyobj 65 | 66 | def createPythonExtensionBuilder(env): 67 | """This is a utility function that creates the PythonExtension Builder in 68 | an Environment if it is not there already. 69 | 70 | If it is already there, we return the existing one. 71 | """ 72 | 73 | try: 74 | pyext = env['BUILDERS']['PythonExtension'] 75 | except KeyError: 76 | import SCons.Action 77 | import SCons.Defaults 78 | action = SCons.Action.Action("$PYEXTLINKCOM", "$PYEXTLINKCOMSTR") 79 | action_list = [ SCons.Defaults.SharedCheck, 80 | action] 81 | pyext = SCons.Builder.Builder(action = action_list, 82 | emitter = "$SHLIBEMITTER", 83 | prefix = '$PYEXTPREFIX', 84 | suffix = '$PYEXTSUFFIX', 85 | target_scanner = ProgramScanner, 86 | src_suffix = '$PYEXTOBJSUFFIX', 87 | src_builder = 'PythonObject') 88 | env['BUILDERS']['PythonExtension'] = pyext 89 | 90 | return pyext 91 | 92 | def pyext_coms(platform): 93 | """Return PYEXTCCCOM, PYEXTCXXCOM and PYEXTLINKCOM for the given 94 | platform.""" 95 | if platform == 'win32': 96 | pyext_cccom = "$PYEXTCC /Fo$TARGET /c $PYEXTCCSHARED "\ 97 | "$PYEXTCFLAGS $PYEXTCCFLAGS $_CCCOMCOM "\ 98 | "$_PYEXTCPPINCFLAGS $SOURCES" 99 | pyext_cxxcom = "$PYEXTCXX /Fo$TARGET /c $PYEXTCSHARED "\ 100 | "$PYEXTCXXFLAGS $PYEXTCCFLAGS $_CCCOMCOM "\ 101 | "$_PYEXTCPPINCFLAGS $SOURCES" 102 | pyext_linkcom = '${TEMPFILE("$PYEXTLINK $PYEXTLINKFLAGS '\ 103 | '/OUT:$TARGET.windows $( $_LIBDIRFLAGS $) '\ 104 | '$_LIBFLAGS $_PYEXTRUNTIME $SOURCES.windows")}' 105 | else: 106 | pyext_cccom = "$PYEXTCC -o $TARGET -c $PYEXTCCSHARED "\ 107 | "$PYEXTCFLAGS $PYEXTCCFLAGS $_CCCOMCOM "\ 108 | "$_PYEXTCPPINCFLAGS $SOURCES" 109 | pyext_cxxcom = "$PYEXTCXX -o $TARGET -c $PYEXTCSHARED "\ 110 | "$PYEXTCXXFLAGS $PYEXTCCFLAGS $_CCCOMCOM "\ 111 | "$_PYEXTCPPINCFLAGS $SOURCES" 112 | pyext_linkcom = "$PYEXTLINK -o $TARGET $PYEXTLINKFLAGS "\ 113 | "$SOURCES $_LIBDIRFLAGS $_LIBFLAGS $_PYEXTRUNTIME" 114 | 115 | if platform == 'darwin': 116 | pyext_linkcom += ' $_FRAMEWORKPATH $_FRAMEWORKS $FRAMEWORKSFLAGS' 117 | 118 | return pyext_cccom, pyext_cxxcom, pyext_linkcom 119 | 120 | def set_basic_vars(env): 121 | # Set construction variables which are independant on whether we are using 122 | # distutils or not. 123 | env['PYEXTCPPPATH'] = SCons.Util.CLVar('$PYEXTINCPATH') 124 | 125 | env['_PYEXTCPPINCFLAGS'] = '$( ${_concat(INCPREFIX, PYEXTCPPPATH, '\ 126 | 'INCSUFFIX, __env__, RDirs, TARGET, SOURCE)} $)' 127 | env['PYEXTOBJSUFFIX'] = '$SHOBJSUFFIX' 128 | env['PYEXTOBJPREFIX'] = '$SHOBJPREFIX' 129 | 130 | env['PYEXTRUNTIME'] = SCons.Util.CLVar("") 131 | # XXX: this should be handled with different flags 132 | env['_PYEXTRUNTIME'] = '$( ${_concat(LIBLINKPREFIX, PYEXTRUNTIME, '\ 133 | 'LIBLINKSUFFIX, __env__)} $)' 134 | # XXX: This won't work in all cases (using mingw, for example). To make 135 | # this work, we need to know whether PYEXTCC accepts /c and /Fo or -c -o. 136 | # This is difficult with the current way tools work in scons. 137 | pycc, pycxx, pylink = pyext_coms(sys.platform) 138 | 139 | env['PYEXTLINKFLAGSEND'] = SCons.Util.CLVar('$LINKFLAGSEND') 140 | 141 | env['PYEXTCCCOM'] = pycc 142 | env['PYEXTCXXCOM'] = pycxx 143 | env['PYEXTLINKCOM'] = pylink 144 | 145 | def _set_configuration_nodistutils(env): 146 | # Set env variables to sensible values when not using distutils 147 | def_cfg = {'PYEXTCC' : '$SHCC', 148 | 'PYEXTCFLAGS' : '$SHCFLAGS', 149 | 'PYEXTCCFLAGS' : '$SHCCFLAGS', 150 | 'PYEXTCXX' : '$SHCXX', 151 | 'PYEXTCXXFLAGS' : '$SHCXXFLAGS', 152 | 'PYEXTLINK' : '$LDMODULE', 153 | 'PYEXTSUFFIX' : '$LDMODULESUFFIX', 154 | 'PYEXTPREFIX' : ''} 155 | 156 | if sys.platform == 'darwin': 157 | def_cfg['PYEXTSUFFIX'] = '.so' 158 | 159 | for k, v in def_cfg.items(): 160 | ifnotset(env, k, v) 161 | 162 | ifnotset(env, 'PYEXT_ALLOW_UNDEFINED', 163 | SCons.Util.CLVar('$ALLOW_UNDEFINED')) 164 | ifnotset(env, 'PYEXTLINKFLAGS', SCons.Util.CLVar('$LDMODULEFLAGS')) 165 | 166 | env.AppendUnique(PYEXTLINKFLAGS = env['PYEXT_ALLOW_UNDEFINED']) 167 | 168 | def ifnotset(env, name, value): 169 | if not env.has_key(name): 170 | env[name] = value 171 | 172 | def set_configuration(env, use_distutils): 173 | """Set construction variables which are platform dependants. 174 | 175 | If use_distutils == True, use distutils configuration. Otherwise, use 176 | 'sensible' default. 177 | 178 | Any variable already defined is untouched.""" 179 | 180 | # We define commands as strings so that we can either execute them using 181 | # eval (same python for scons and distutils) or by executing them through 182 | # the shell. 183 | dist_cfg = {'PYEXTCC': ("sysconfig.get_config_var('CC')", False), 184 | 'PYEXTCFLAGS': ("sysconfig.get_config_var('CFLAGS')", True), 185 | 'PYEXTCCSHARED': ("sysconfig.get_config_var('CCSHARED')", False), 186 | 'PYEXTLINKFLAGS': ("sysconfig.get_config_var('LDFLAGS')", True), 187 | 'PYEXTLINK': ("sysconfig.get_config_var('LDSHARED')", False), 188 | 'PYEXTINCPATH': ("sysconfig.get_python_inc()", False), 189 | 'PYEXTSUFFIX': ("sysconfig.get_config_var('SO')", False)} 190 | 191 | from distutils import sysconfig 192 | 193 | # We set the python path even when not using distutils, because we rarely 194 | # want to change this, even if not using distutils 195 | ifnotset(env, 'PYEXTINCPATH', sysconfig.get_python_inc()) 196 | 197 | try: 198 | if use_distutils: 199 | for k, (v, should_split) in dist_cfg.items(): 200 | val = eval(v) 201 | try: 202 | if should_split: 203 | val = val.split() 204 | except: 205 | print "Value ", val, "could not be split." 206 | ifnotset(env, k, val) 207 | else: 208 | _set_configuration_nodistutils(env) 209 | except: 210 | print "Error while trying to use Distutils! Continuing without using Distutils." 211 | _set_configuration_nodistutils(env) 212 | 213 | def generate(env): 214 | """Add Builders and construction variables for python extensions to an 215 | Environment.""" 216 | 217 | if not env.has_key('PYEXT_USE_DISTUTILS'): 218 | env['PYEXT_USE_DISTUTILS'] = False 219 | 220 | # This sets all constructions variables used for pyext builders. 221 | set_basic_vars(env) 222 | 223 | set_configuration(env, env['PYEXT_USE_DISTUTILS']) 224 | 225 | # Create the PythonObject builder 226 | pyobj = createPythonObjectBuilder(env) 227 | action = SCons.Action.Action("$PYEXTCCCOM", "$PYEXTCCCOMSTR") 228 | pyobj.add_emitter('.c', SCons.Defaults.SharedObjectEmitter) 229 | pyobj.add_action('.c', action) 230 | 231 | action = SCons.Action.Action("$PYEXTCXXCOM", "$PYEXTCXXCOMSTR") 232 | pyobj.add_emitter('$CXXFILESUFFIX', SCons.Defaults.SharedObjectEmitter) 233 | pyobj.add_action('$CXXFILESUFFIX', action) 234 | 235 | # Create the PythonExtension builder 236 | createPythonExtensionBuilder(env) 237 | 238 | def exists(env): 239 | try: 240 | # This is not quite right: if someone defines all variables by himself, 241 | # it would work without distutils 242 | from distutils import sysconfig 243 | return True 244 | except ImportError: 245 | return False 246 | -------------------------------------------------------------------------------- /todo.txt: -------------------------------------------------------------------------------- 1 | # List of to do items for Neuroparser development 2 | 3 | # -------------- GUI -------------- # 4 | 5 | Include panels for: 6 | -- Loading, viewing, and saving data 7 | -- Running/Managing analysis 8 | -- Viewing coefficients 9 | -- Viewing parameters 10 | -- Benchmarking/Synthetic data 11 | 12 | # -------------- IO --------------- # 13 | 14 | Loading files: 15 | -- load NIFTI files 16 | -- load NPZ files 17 | -- load behavioral variables 18 | 19 | # -------------- DISPLAY -------------- # 20 | 21 | Coefficient View: 22 | -- integrate real-time viewing of coefficients with sliders 23 | -- can we use real-time Chaco code easily within PyQt4, or should it be rewritten? 24 | 25 | # -------------- DOCS -------------- # 26 | 27 | Improve README: 28 | -- thorough install instructions cross-platform 29 | -- dependencies and their install instructions 30 | 31 | Better example code! 32 | --------------------------------------------------------------------------------