├── README.md ├── test.py ├── predict.py ├── run.py ├── plotting.py ├── BinnedFisher.py └── Fisher.py /README.md: -------------------------------------------------------------------------------- 1 | # FisherDisc 2 | Fisher Disciminant, including Kernel Fisher Discriminant, and a binned FLD. Implementation for large p datasets, based on: Zhang, et. al. 'Regularized Discriminant Analysis, Ridge Regression and Beyond' Journal of Machine Learning Research 11 (2010) 2199-222 8 3 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from Fisher import Fisher 4 | from BinnedFisher import BinnedFisher 5 | 6 | 7 | #make random two classes 8 | class0 = np.random.normal(1,1, (50,5) ) 9 | class1 = np.random.normal(-2,0.5, (50,5) ) 10 | 11 | X = np.vstack( (class0, class1) ) 12 | y = np.array( [0 for i in range(50)] + [1 for i in range(50)] ) 13 | 14 | 15 | f = Fisher() 16 | 17 | f.fit(X, y, tol = 0.1) 18 | 19 | print f.transform(X) 20 | 21 | 22 | #after fit, can update tolerance 23 | f.update_tol( tol = 0.01 ) 24 | 25 | print f.transform(X) 26 | 27 | 28 | 29 | #add additional variable to binning, for BinnedFisher 30 | v = np.array( [[0.25 for i in range(25)]+[0.75 for i in range(25)]+[0.25 for i in range(25)]+[0.75 for i in range(25)]] ) 31 | 32 | 33 | X = np.hstack( (v.T, X) ) 34 | 35 | bf = BinnedFisher( bins = [0.0,0.5,1.0] ) 36 | 37 | bf.fit(X,y, tol=[0.01, 0.01]) 38 | 39 | print bf.transform(X) 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import cPickle 4 | 5 | import numpy as np 6 | from scipy import linalg 7 | 8 | import matplotlib.pyplot as plt 9 | import matplotlib.colors as colors 10 | 11 | import plotting 12 | 13 | from BinnedFisher import BinnedFisher 14 | 15 | def predict( normImage = True, saveFile = False, makePlot = True): 16 | 17 | bfish = cPickle.load( file('trained_'+('norm_' if normImage else '')+ 'DR_fisher.pkl', 'r') ) 18 | 19 | testfile = file('data/alldata_'+('norm_' if normImage else '')+ 'TEST.pkl', 'r') 20 | data = cPickle.load( testfile ) 21 | spec = cPickle.load( testfile ) 22 | testfile.close() 23 | 24 | drbin = (data[:,1]>=0.5)*(data[:,1]<0.75) 25 | data = data[drbin,:] 26 | spec = spec[drbin,:] 27 | 28 | X = data[:,1:] 29 | y = data[:,0] 30 | dr = data[:,1] 31 | tau21 = spec[:,2] 32 | 33 | bfish.update_tol(tol=[1.0e-3, 0.75e-6, 0.1e-3]) # normed 34 | #bfish.update_tol(tol=[2.5e0, 2.5e-1, 0.3e0]) # non-normed 35 | 36 | t = bfish.transform(X, return_ll=False) 37 | 38 | print t 39 | 40 | 41 | 42 | 43 | if saveFile: 44 | out_arr = np.hstack( (np.array([y]).T, spec, np.array([dr]).T, np.array([t]).T) ) 45 | print "out shape=", out_arr.shape 46 | np.savetxt('TEST_predict/TEST_'+('norm_' if normImage else '')+ 'DR_Fisher.txt', out_arr, delimiter=',') 47 | 48 | #sys.exit(0) 49 | 50 | 51 | 52 | 53 | if makePlot: 54 | s, bns = np.histogram(t[y==1], normed=True) 55 | b, bns = np.histogram(t[y==0], bins=bns, normed=True) 56 | 57 | x_cen = [ 0.5*(bns[i]+bns[i+1]) for i in range(len(bns)-1)] 58 | 59 | plt.figure() 60 | plt.plot(x_cen, s, color='g', linewidth=3) 61 | plt.plot(x_cen, b, color='b', linewidth=3) 62 | #plt.show() 63 | 64 | Sigs = [ t[y==1], tau21[y==1] ] 65 | Bkgs = [ t[y==0], tau21[y==0] ] 66 | Labs = ["Fisher","Tau21"] 67 | cut_type=['g','l'] 68 | 69 | plotting.ROC(Sigs, Bkgs, Labs, cut_type=cut_type) 70 | 71 | for ifish in range(len(bfish.comp)): 72 | fish = bfish.fish[ifish].w_[0][::-1] 73 | 74 | fig = plt.figure(figsize=(7,5)) 75 | ax = fig.add_subplot(111) 76 | elem = fish.reshape(25,25) 77 | vmin = np.min(elem) 78 | vmax = np.max(elem) 79 | 80 | elem /= np.max( [ abs(vmin), abs(vmax)] ) 81 | vmin = np.min(elem) 82 | vmax = np.max(elem) 83 | 84 | cm_bi = colors.LinearSegmentedColormap.from_list('bi', 85 | [(0,'red'), (abs(vmin)/(vmax-vmin), 'white'),(1,'blue')]) 86 | ret = ax.imshow(elem, 87 | cmap=cm_bi, 88 | interpolation='nearest', 89 | origin='lower') #extent=[low, high, low, high], 90 | ax.set_title("Fisher "+str(ifish), size='xx-large') 91 | 92 | plt.show() 93 | 94 | 95 | if __name__=="__main__": 96 | predict() 97 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import cPickle 4 | 5 | import numpy as np 6 | from scipy import linalg 7 | 8 | import matplotlib.pyplot as plt 9 | import matplotlib.colors as colors 10 | 11 | from BinnedFisher import BinnedFisher 12 | 13 | usePartialData=True 14 | 15 | print "### Loading Data ###" 16 | ## if not usePartialData: 17 | ## sig = np.loadtxt('data/signal.txt', delimiter=',') 18 | ## bkg = np.loadtxt('data/qcd.txt', delimiter=',') 19 | 20 | ## alldata = np.concatenate( (sig, bkg), axis=0) 21 | ## #np.random.shuffle(alldata) 22 | 23 | ## #alldata = alldata[0::10, :] 24 | ## outfile = file('alldata.pkl', 'wb') 25 | ## cPickle.dump(alldata, outfile, protocol=cPickle.HIGHEST_PROTOCOL) 26 | ## outfile.close() 27 | ## sys.exit(0) 28 | 29 | normImage = False 30 | 31 | makePlot = False 32 | 33 | alldata = cPickle.load( file('data/alldata_'+('norm_' if normImage else '')+ 'TRAIN.pkl', 'r') ) 34 | 35 | 36 | print "### Building Model ###" 37 | 38 | X = alldata[:,1:] 39 | y = alldata[:,0] 40 | 41 | bfish = BinnedFisher( bins=[0.25, 0.5, 0.75, float('inf')] ) 42 | 43 | #bfish.fit(X, y, tol=[4.0e-3, 1.0e-3, 0.5e-3]) #old 44 | if normImage: 45 | #bfish.fit(X, y, tol=[2.0e-3, 0.6e-3, 0.2e-3]) #good for normed, 10k per bin per label 46 | bfish.fit(X, y, tol=[1.0e-3, 0.75e-4, 0.1e-3]) #good for normed, 10k per bin per label 47 | 48 | else: 49 | #bfish.fit(X, y, tol=[9.5e0, 11e0, 3.0e0]) #good-ish for non-normed, 10k per bin per label 50 | bfish.fit(X, y, tol=[2.5e0, 2.5e-1, 0.3e0]) #good-ish for non-normed, 10k per bin per label 51 | 52 | outfile = file('trained_'+('norm_' if normImage else '')+ 'DR_fisher.pkl', 'wb') 53 | cPickle.dump(bfish, outfile, protocol=cPickle.HIGHEST_PROTOCOL) 54 | outfile.close() 55 | 56 | 57 | 58 | 59 | if makePlot: 60 | for ifish in range(len(bfish.comp)): 61 | fish = bfish.comp[ifish][::-1] 62 | 63 | print 'fish', ifish,'singular values:' 64 | print bfish.fish[ifish].singular_vals 65 | 66 | fig = plt.figure(figsize=(7,5)) 67 | ax = fig.add_subplot(111) 68 | elem = fish.reshape(25,25) 69 | vmin = np.min(elem) 70 | vmax = np.max(elem) 71 | 72 | elem /= np.max( [ abs(vmin), abs(vmax)] ) 73 | vmin = np.min(elem) 74 | vmax = np.max(elem) 75 | 76 | cm_bi = colors.LinearSegmentedColormap.from_list('bi', 77 | [(0,'red'), (abs(vmin)/(vmax-vmin), 'white'),(1,'blue')]) 78 | ret = ax.imshow(elem, 79 | cmap=cm_bi, 80 | interpolation='nearest', 81 | origin='lower') #extent=[low, high, low, high], 82 | ax.set_title("Fisher "+str(ifish), size='xx-large') 83 | 84 | plt.show() 85 | 86 | 87 | 88 | 89 | 90 | 91 | ## t = bfish.transform(X) 92 | 93 | ## s, bns = np.histogram(t[y==1], normed=True) 94 | ## b, bns = np.histogram(t[y==0], bins=bns, normed=True) 95 | 96 | ## x_cen = [ 0.5*(bns[i]+bns[i+1]) for i in range(len(bns)-1)] 97 | 98 | ## plt.figure() 99 | ## plt.plot(x_cen, s, color='g', linewidth=3) 100 | ## plt.plot(x_cen, b, color='b', linewidth=3) 101 | ## plt.show() 102 | 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /plotting.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | import numpy as np 4 | import scipy as sc 5 | 6 | 7 | def ROC( signal, background, label, cut_start=None, cut_end=None, cut_type=None): 8 | 9 | ## s = np.array( signal ) 10 | ## b = np.array( background ) 11 | ## l = np.array( label ) 12 | 13 | ## if len(s.shape)==1: 14 | ## s = np.array( [signal] ) 15 | ## if len(b.shape)==1: 16 | ## b = np.array( [background] ) 17 | ## if len(l.shape)==0: 18 | ## l = np.array( [label] ) 19 | 20 | fig = plt.figure() 21 | 22 | if cut_type==None: 23 | cut_type=['g' for ic in range(len(signal)) ] 24 | 25 | for ivar in range(len(signal)): 26 | s_sort = np.sort( signal[ivar] ) 27 | b_sort = np.sort( background[ivar] ) 28 | 29 | #c_start=(0.0 if cut_start==None else cut_start) 30 | #c_end= (1.0 if cut_end==None else cut_end) 31 | 32 | c_start=np.min( (s_sort[0], b_sort[0]) ) 33 | c_end= np.max( (s_sort[len(s_sort)-1], b_sort[len(b_sort)-1]) ) 34 | 35 | if c_start==-float('inf'): 36 | c_start = -2*c_end 37 | 38 | print label[ivar], "min(", s_sort[0], b_sort[0], ")=", c_start 39 | print label[ivar], "max(", s_sort[-1], b_sort[-1], ")=", c_end 40 | 41 | s_eff=[] 42 | b_rej=[] 43 | 44 | n_points = 1000 45 | c_delta = (1.0*c_end - 1.0*c_start) / (1.0*n_points) 46 | for i in range(1000): 47 | cut = c_start + i*1.0*c_delta 48 | if cut_type[ivar]=='g': 49 | s_eff.append( 1.0*np.count_nonzero( s_sort > cut ) / (1.0*len(s_sort)) ) 50 | b_count = np.count_nonzero( b_sort > cut ) 51 | elif cut_type[ivar]=='l': 52 | s_eff.append( 1.0*np.count_nonzero( s_sort < cut ) / (1.0*len(s_sort)) ) 53 | b_count = np.count_nonzero( b_sort < cut ) 54 | b_rej.append( (1.0*len(b_sort)) / (1.0 if b_count==0 else (1.0*b_count)) ) 55 | 56 | #print s_eff 57 | plt.plot(s_eff,b_rej) 58 | 59 | plt.legend(label, loc='lower left', prop={'size':6}) 60 | plt.yscale('log') 61 | #plt.show() 62 | 63 | 64 | return 65 | 66 | 67 | def Eff_vs_Var( disc, var, label, bins, cuts= None, eff_target=0.7 ): 68 | 69 | fig = plt.figure() 70 | 71 | 72 | bin_error=[] 73 | bin_center=[] 74 | for ibin in range(len(bins)-1): 75 | ierror = (bins[ibin+1] - bins[ibin])/2.0 76 | bin_error.append( ierror ) 77 | bin_center.append( bins[ibin] + ierror ) 78 | 79 | for isamp in range(len(disc)): 80 | 81 | idisc = np.array(disc[isamp]) 82 | ivar = np.array(var[isamp]) 83 | 84 | if cuts == None: 85 | cut_val = Get_Cut_Value(idisc, eff_target) 86 | #cut_val = np.sort(idisc)[ int((1.0-eff_target)*len(idisc)) ] 87 | else: 88 | cut_val = cuts[isamp] 89 | 90 | #sort_indices = np.argsort(disc[isamp]) 91 | 92 | eff = [] 93 | yerr = [] 94 | for ibin in range(len(bins)-1): 95 | idisc_ibin = idisc[ (ivar>=bins[ibin]) * (ivar cut_val ) 98 | 99 | eff.append( (1.0*n_pass) / (1.0*n_tot) ) 100 | yerr.append( (1.0/(1.0*n_tot)) * np.sqrt( n_pass * (1.0 - (1.0*n_pass) / (1.0*n_tot))) ) 101 | 102 | print bin_center[ibin], n_pass, n_tot, eff[ibin], yerr[ibin] 103 | 104 | plt.errorbar( bin_center, eff, xerr = bin_error, yerr = yerr) 105 | 106 | plt.legend(label, loc='best', prop={'size':6}) 107 | 108 | return 109 | 110 | 111 | def Get_Cut_Value(disc, eff_target): 112 | return np.sort(disc)[ int((1.0-eff_target)*len(disc)) ] 113 | -------------------------------------------------------------------------------- /BinnedFisher.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | 4 | import numpy as np 5 | from scipy import linalg 6 | 7 | from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin 8 | 9 | 10 | from Fisher import Fisher 11 | 12 | __all__ = ['BinnedFisher'] 13 | 14 | 15 | ##################################################################################################################### 16 | #NOTE TO SELF: 17 | # np.inner(A,B) sums over last indices, i.e. = A[i,j]*B[k,j] 18 | # so if you want to do A*B, you should do np.inner(A, B.T) 19 | # Also, np.inner is faster than np.dot 20 | ##################################################################################################################### 21 | 22 | 23 | class BinnedFisher(BaseEstimator, ClassifierMixin, TransformerMixin): 24 | 25 | 26 | def __init__(self, norm_covariance = True, n_components=None, priors=None, bins = [-float('inf'), float('inf')] ): 27 | 28 | self.nbins = len(bins)-1 29 | self.bins = np.sort(bins) 30 | self.bin_trained = [False for i in range(self.nbins)] 31 | 32 | self.fish = [ Fisher(norm_covariance, n_components, priors) for i in range(self.nbins) ] 33 | 34 | 35 | def fit(self, X, y, tol=[1.0e-4], store_covariance=False, do_smooth_reg=False, cov_class=None, cov_power=1, entries_per_ll_bin = 10): 36 | X = np.asarray(X) 37 | y = np.asarray(y) 38 | 39 | if len(tol)==1: 40 | tol = [tol for i in range(self.nbins)] 41 | elif len(tol) != self.nbins: 42 | print "tol must have length 1 or nbins. exiting" 43 | sys.exit(2) 44 | 45 | self.tol = tol 46 | self.do_smooth_reg = do_smooth_reg 47 | self.cov_class = cov_class 48 | self.cov_power = cov_power 49 | self.entries_per_ll_bin = entries_per_ll_bin 50 | self.comp = [] 51 | self.ll_sig = [] 52 | self.ll_bkg = [] 53 | 54 | self.ll_bin_edges = [] 55 | 56 | 57 | for i in range(self.nbins): 58 | print "Starting fit for bin", i 59 | ts = time.time() 60 | 61 | low, high = self.bins[i], self.bins[i+1] 62 | 63 | the_entries = (X[:,0] >=low) * (X[:,0] =low) * (X[:,0] (self.nbins-1): 156 | print ("bin number must be between 0 and %d", self.nbins-1) 157 | sys.exit(2) 158 | 159 | if not override and not self.bin_trained[bin_number]: 160 | print ("bin %d not trained! Can't transform before running fit!", i) 161 | sys.exit(2) 162 | 163 | Xi = np.asarray(Xi) 164 | 165 | out = self.fish[bin_number].transform(Xi) 166 | 167 | return out 168 | 169 | 170 | def _eval_ll_bin(self, Ti, bin_number): 171 | ''' 172 | only works on transformed data 173 | ''' 174 | 175 | if bin_number < 0 or bin_number > (self.nbins-1): 176 | print ("bin number must be between 0 and %d", self.nbins-1) 177 | sys.exit(2) 178 | 179 | if not self.bin_trained[bin_number]: 180 | print ("bin %d not trained! Can't transform before running fit!", i) 181 | sys.exit(2) 182 | 183 | Ti = np.asarray(Ti) 184 | 185 | # anything not found, gets value of 1 186 | llout = np.ones( Ti.shape[0] ) 187 | 188 | for i in range(len(self.ll_bin_edges[bin_number]) - 1): 189 | the_entries = (Ti >= self.ll_bin_edges[bin_number][i]) * (Ti < self.ll_bin_edges[bin_number][i+1]) 190 | llout[ the_entries ] = np.repeat( self.ll_sig[bin_number][i] / self.ll_bkg[bin_number][i], np.count_nonzero(the_entries)) 191 | 192 | return llout 193 | 194 | 195 | 196 | -------------------------------------------------------------------------------- /Fisher.py: -------------------------------------------------------------------------------- 1 | """ 2 | The module implements Fisher Discriminant Analysis. 3 | """ 4 | __author__ = 'Michael Kagan mkagan@cern.ch' 5 | # 6 | # Code based on sklearn LDA code written by: Matthieu Perrot 7 | # Mathieu Blondel 8 | # 9 | # using algorithms as described in: 10 | # Zhang, et. al. 'Regularized Discriminant Analysis, Ridge Regression and Beyond' Journal of Machine Learning Research 11 (2010) 2199-2228 11 | # 12 | 13 | import warnings 14 | import sys 15 | import time 16 | 17 | import numpy as np 18 | from scipy import linalg 19 | 20 | from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin 21 | from sklearn.utils.extmath import logsumexp 22 | from sklearn.utils.validation import check_X_y 23 | from sklearn.preprocessing import KernelCenterer 24 | from sklearn.metrics.pairwise import pairwise_kernels 25 | 26 | __all__ = ['Fisher', 'KernelFisher'] 27 | 28 | 29 | ##################################################################################################################### 30 | #NOTE TO SELF: 31 | # np.inner(A,B) sums over last indices, i.e. = A[i,j]*B[k,j] 32 | # so if you want to do A*B, you should do np.inner(A, B.T) 33 | # Also, np.inner is faster than np.dot 34 | ##################################################################################################################### 35 | 36 | 37 | class Fisher(BaseEstimator, ClassifierMixin, TransformerMixin): 38 | """ 39 | Fisher Discriminant Analysis (LDA) 40 | 41 | A classifier with a linear decision boundary, generated 42 | by fitting class conditional densities to the data 43 | fisher criteria of maximizing between class variance 44 | while minimizing within class variance 45 | 46 | The fitted model can also be used to reduce the dimensionality 47 | of the input, by projecting it to the most discriminative 48 | directions. 49 | 50 | Parameters 51 | ---------- 52 | 53 | norm_covariance : boolean 54 | if true, the covariance of each class will be divided by (n_points_in_class - 1) 55 | 56 | n_components: int 57 | Number of components (< n_classes - 1) for dimensionality reduction 58 | 59 | priors : array, optional, shape = [n_classes] 60 | Priors on classes 61 | 62 | Attributes 63 | ---------- 64 | `means_` : array-like, shape = [n_components_found_, [n_classes, n_features] ] 65 | Class means, for each component found 66 | `w_` : array-like, shape = [n_components_found_, n_features ] 67 | decision vector, for each component found 68 | `priors_` : array-like, shape = [n_classes] 69 | Class priors (sum to 1) 70 | `covs_` : array, shape = [n_components_found_, [ [n_features, n_features], [n_features, n_features] ] one cov for class=0 and one for class=1 71 | Covariance matrix (shared by all classes) 72 | `n_components_found_` : int 73 | number of fisher components found, which is <= n_components 74 | 75 | Examples (put fisher.py in working directory) 76 | -------- 77 | >>> import numpy as np 78 | >>> from fisher import Fisher 79 | >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) 80 | >>> y = np.array([0, 0, 0, 1, 1, 1]) 81 | >>> fd = Fisher() 82 | >>> fd.fit(X, y) 83 | Fisher(n_components=1, norm_covariance=True, priors=None) 84 | >>> print(fd.transform([[-0.8, -1]])) 85 | [[-1.]] 86 | 87 | 88 | """ 89 | 90 | def __init__(self, norm_covariance = True, n_components=None, priors=None): 91 | self.norm_covariance = norm_covariance 92 | self.n_components = 1 if n_components==None else n_components 93 | self.priors = np.asarray(priors) if priors is not None else None 94 | self.basic_fit = False 95 | 96 | if self.priors is not None: 97 | if (self.priors < 0).any(): 98 | raise ValueError('priors must be non-negative') 99 | if self.priors.sum() != 1: 100 | print 'warning: the priors do not sum to 1. Renormalizing' 101 | self.priors = self.priors / self.priors.sum() 102 | 103 | 104 | def fit(self, X, y, store_covariance=False, tol=1.0e-4, 105 | do_smooth_reg=False, cov_class=None, cov_power=1): 106 | """ 107 | Fit the Fisher Discriminant model according to the given training data and parameters. 108 | 109 | Parameters 110 | ---------- 111 | X : array-like, shape = [n_samples, n_features] 112 | Training vector, where n_samples in the number of samples and 113 | n_features is the number of features. 114 | y : array, shape = [n_samples] 115 | Target values (integers) 116 | store_covariance : boolean 117 | If True the covariance matrix of each class and each iteration is computed 118 | and stored in `self.covs_` attribute. has dimensions [n_iterations][2] where 2 is for nclasses = 2 119 | tol: float 120 | used for regularization, either for svd series truncation or smoothing. 121 | do_smooth_reg: boolean 122 | If False, truncate SVD matrix inversion for singular values less then tol. 123 | If True, apply smooth regularization (filter factor) on inversion, such that 1/s_i --> s_i/(s_i^2 + tol^2), where s_i is singular value 124 | """ 125 | X, y = check_X_y(X, y) #does not accept sparse arrays 126 | self.classes_, y = np.unique( (y>0), return_inverse=True) 127 | n_samples, n_features = X.shape 128 | n_classes = len(self.classes_) 129 | if n_classes < 2: 130 | raise ValueError('y has less than 2 classes') 131 | if self.priors is None: 132 | self.priors_ = np.bincount(y) / float(n_samples) 133 | else: 134 | self.priors_ = self.priors 135 | 136 | self.n_features=n_features 137 | self.means_ = [] 138 | self.covs_ = [] 139 | 140 | wvecs = [] 141 | 142 | # Group means n_classes*n_features matrix 143 | 144 | means = [] 145 | nevt = np.zeros(n_classes) 146 | Xc = [] 147 | Xg = [] 148 | covs = [] 149 | cov = None 150 | 151 | for ind in xrange(n_classes): 152 | Xg = X[y == ind, :] 153 | meang = Xg.mean(0) 154 | means.append(meang) 155 | nevt[ind] = Xg.shape[0] 156 | 157 | # centered group data 158 | if cov_class is None or cov_class == ind: 159 | Xgc = Xg - meang 160 | covg = np.zeros((n_features, n_features)) 161 | covg += np.dot(Xgc.T, Xgc) 162 | covs.append(covg) 163 | 164 | 165 | # check rank of Sb = m * m.T 166 | # if rank = 0, we are in null space of Sb, and can not calculate fisher component 167 | m = means[0] - means[1] 168 | if linalg.norm(m) ==0: 169 | print "WARNING: Inter-class matrix is zero, i.e. classes have same mean!" 170 | print " Fisher can not discriminate in this case --> Exiting" 171 | sys.exit(2) 172 | 173 | Sb = np.outer( m, m ) 174 | #svdvalsSb = linalg.svdvals( Sb ) 175 | #rank = np.sum( svdvalsSb > tol ) 176 | #print "rank Sb = ",rank 177 | 178 | self.means_.append( np.asarray(means) ) 179 | 180 | #covs_array = [ np.asarray(covs[0]) , np.asarray(covs[1]) ] 181 | covs_array = [np.asarray(cc) for cc in covs] 182 | if self.norm_covariance: 183 | for ii in range(len(covs_array)): 184 | covs_array[ii] /= ( (nevt[ii]-1.0) if nevt[ii] > 1 else 1 ) 185 | # covs_array[0] /= ( (nevt[0]-1.0) if nevt[0] > 1 else 1 ) 186 | # covs_array[1] /= ( (nevt[1]-1.0) if nevt[1] > 1 else 1 ) 187 | 188 | if store_covariance: 189 | self.covs_.append( covs_array ) 190 | 191 | #if norm_covariance: 192 | # nevt[0] = nevt[0] if nevt[0] > 1 else 2 193 | # nevt[1] = nevt[1] if nevt[1] > 1 else 2 194 | # self.covs_.append( [ np.asarray(covs[0]) / (nevt[0]-1.0), np.asarray(covs[1]) / (nevt[1]-1.0) ] ) 195 | #else: 196 | # self.covs_.append( [ np.asarray(covs[0]), np.asarray(covs[1]) ] ) 197 | 198 | #Sw = covs_array[0] + covs_array[1] 199 | Sw = sum(covs_array) 200 | 201 | #---------------------------- 202 | # for 2 class system, need to solve for w in 203 | # Sb * w = lambda * Sw * w 204 | # where lambda is eigenvalue of this generalized eigenvalue problem 205 | # however, Sb * w = m mT * w = m * constant 206 | # implies we only need to solve m = Sw * w 207 | # (overall constant wet later with ||w||=1 ) 208 | # solution: Sw = U*S*Vh using svd ==> S.inv*U.T*m = Vh *w ==> w = Sum_i^rank(S) vh_i * (U.T * m)_i / S_i 209 | # where vh_i is a vector 210 | #---------------------------- 211 | # step 1) svd of Sw 212 | # step 2) calculate sum for all non singular components 213 | U, S, V = linalg.svd(Sw) 214 | 215 | rank = np.sum(S > tol) 216 | #print "rank Sw = ", rank 217 | 218 | S = np.power(S, cov_power) 219 | 220 | UTm = np.inner(U.T, m) 221 | w = np.zeros(n_features) 222 | for i in range(len(S)): 223 | if do_smooth_reg==True: 224 | w += V[i,:] * UTm[i] * ( S[i] / (S[i]*S[i]+ tol**(2*cov_power)) ) 225 | #w += V[i,:] * UTm[i] * ( S[i] / (S[i]*S[i] + tol*tol) ) 226 | else: 227 | if S[i] < tol: 228 | continue 229 | w += V[i,:] * UTm[i] / S[i] 230 | 231 | if linalg.norm(w) != 0: 232 | w /= linalg.norm(w) 233 | else: 234 | print "WARNING: Fisher discriminant line has norm=0 --> no discriminating curved found! Exiting" 235 | sys.exit(2) 236 | 237 | #check if signal (1) projection smaller than bkg (0), if so, add minus sign 238 | if(np.inner(means[1],w) < np.inner(means[0],w)): 239 | w *= (-1.0) 240 | 241 | wvecs.append( w ) 242 | 243 | 244 | self.w_ = np.asarray(wvecs) 245 | self.n_components_found_ = len(self.w_) 246 | self.S = S 247 | self.U = U 248 | self.V = V 249 | self.m = m 250 | self.cov_power = cov_power 251 | self.basic_fit = True 252 | 253 | return self 254 | 255 | 256 | def update_tol(self, tol, do_smooth_reg=False): 257 | if self.basic_fit == False: 258 | print "Must have done basic Fisher.fit(...) to use this function. NOT UPDATING" 259 | return self 260 | 261 | UTm = np.inner(self.U.T, self.m) 262 | w = np.zeros(self.n_features) 263 | for i in range(len(self.S)): 264 | if do_smooth_reg==True: 265 | w += self.V[i,:] * UTm[i] * ( self.S[i] / (self.S[i]*self.S[i]+ tol**(2*self.cov_power)) ) 266 | #w += V[i,:] * UTm[i] * ( S[i] / (S[i]*S[i] + tol*tol) ) 267 | else: 268 | if self.S[i] < tol: 269 | continue 270 | w += self.V[i,:] * UTm[i] / self.S[i] 271 | 272 | if linalg.norm(w) != 0: 273 | w /= linalg.norm(w) 274 | else: 275 | print "WARNING: Fisher discriminant line has norm=0 --> no discriminating curved found! Exiting" 276 | sys.exit(2) 277 | 278 | #check if signal (1) projection smaller than bkg (0), if so, add minus sign 279 | if(np.inner(self.means_[0][1],w) < np.inner(self.means_[0][0],w)): 280 | w *= (-1.0) 281 | 282 | wvecs = [] 283 | wvecs.append( w ) 284 | 285 | self.w_ = np.asarray(wvecs) 286 | self.n_components_found_ = len(self.w_) 287 | 288 | return self 289 | 290 | 291 | def fit_multiclass(self, X, y, use_total_scatter=False, solution_norm="N", sigma_sqrd=1e-8, tol=1.0e-3, print_timing=False): 292 | """ 293 | Fit the Fisher Discriminant model according to the given training data and parameters. 294 | Based on (but depending on options not exactly the same as) "Algorithm 4" in 295 | Zhang, et. al. 'Regularized Discriminant Analysis, Ridge Regression and Beyond' Journal of Machine Learning Research 11 (2010) 2199-2228 296 | NOTE: setting norm_covariance=False and use_total_scatter=True, and solution_norm = 'A' or 'B' will give the algorithm from paper 297 | 298 | Parameters 299 | ---------- 300 | X : array-like, shape = [n_samples, n_features] 301 | Training vector, where n_samples in the number of samples and 302 | n_features is the number of features. 303 | y : array, shape = [n_samples] 304 | Target values (integers) 305 | use_total_scatter : boolean 306 | If True then use total scatter matrix St = Sum_i (x_i - m)(x_i - m).T instead of Sw 307 | If False, use Sw = Sum_{c=1... n_classes} Sum_{i; x in class c} norm_c (x_i - m_c)(x_i - m_c).T 308 | where norm_c = 1/N_samples_class_c if norm_covariance=True, else norm_c = 1 309 | solution_norm: boolean 310 | 3 kinds of norms, "A", "B", or "N", were "N" means normalize to 1. "A" and "B" (see paper reference) have normalizations 311 | that may be important when consitering n_classes > 2 312 | sigma_sqrd: float 313 | smooth regularization parameter, which is size of singular value where smoothing becomes important. 314 | NOTE: is fraction in case norm_covariance=False, as a priori the scale of the singular values is not known in this case 315 | tol: float 316 | used for truncated SVD of Sw. Essentially a form of regularization. Tol for SVD(R) is 1e-6, fixed right now 317 | print_timing: boolean 318 | print time for several matrix operations in the algorithm 319 | """ 320 | X, y = X, y = check_X_y(X, y) #does not accept sparse arrays 321 | self.classes_, y = np.unique( y, return_inverse=True) 322 | n_samples, n_features = X.shape 323 | n_classes = len(self.classes_) 324 | n_samples_perclass = np.bincount(y) 325 | if n_classes < 2: 326 | raise ValueError('y has less than 2 classes') 327 | if self.priors is None: 328 | self.priors_ = np.bincount(y) / float(n_samples) 329 | else: 330 | self.priors_ = self.priors 331 | 332 | if not any( np.array(["A","B","N"])==solution_norm ): 333 | print 'WARNING: solution_norm must be one of ["A","B","N"]! Exiting' 334 | sys.exit(2) 335 | 336 | ts = time.time() 337 | 338 | self.means_ = [] 339 | for ind in xrange(n_classes): 340 | Xg = X[y == ind, :] 341 | meang = Xg.mean(0) 342 | self.means_.append(np.asarray(meang)) 343 | if print_timing: print 'fit_multiclass: means took', time.time() - ts 344 | 345 | ts = time.time() 346 | PI_diag = np.diag( 1.0*n_samples_perclass ) # shape(PI_diag) = n_classes x n_classes 347 | PI_inv = np.diag( 1.0 / (1.0*n_samples_perclass) ) # shape(PI_inv) = n_classes x n_classes 348 | PI_sqrt_inv = np.sqrt( PI_inv ) # shape(PI_sqrt_inv) = n_classes x n_classes 349 | #H = np.identity(n_samples) - (1.0/(1.0*n_samples))*np.ones((n_samples,n_samples)) 350 | E=np.zeros( (n_samples,n_classes) ) 351 | E[[range(n_samples),y]]=1 352 | if print_timing: print 'fit_multiclass: matrices took', time.time() - ts 353 | 354 | 355 | ts = time.time() 356 | #note: computation of this is fast, can always do it inline, if memory consumption gets large 357 | Xt_H = X.T - (1.0/(1.0*n_samples))*np.repeat( np.array([X.T.sum(1)]).T, n_samples, axis=1) # shape(Xt_H) = n_features x n_samples 358 | if print_timing: print 'fit_multiclass: Xt_H took', time.time() - ts 359 | 360 | ts = time.time() 361 | ##################################################################################################################### 362 | #Sb = X.T * H * E * PI_inv * E.T * H * X = (X.T * H * E * PI_sqrt_inv) * (X.T * H * E * PI_sqrt_inv).T 363 | #if norm_covariance: Sb = X.T * H * E * PI_inv * PI_inv * E.T * H * X = (X.T * H * E * PI_inv) * (X.T * H * E * PI_inv).T 364 | #This norm actually doesn't matter in 2-class, I think it jsut becomes an overall scaling, which gets normalized away 365 | #I expect id doesn't matter for multiclass either... but not sure 366 | #to be clear, multi-class fisher does not norm! but then its harder to set the regularization factor for Sw 367 | ##################################################################################################################### 368 | 369 | Xt_H_E_PIsi = None # shape(Xt_H_E_PIsi) = n_features x n_classes 370 | if self.norm_covariance: 371 | Xt_H_E_PIsi = np.dot(Xt_H, np.dot(E, PI_inv) ) 372 | else: 373 | Xt_H_E_PIsi = np.dot(Xt_H, np.dot(E, PI_sqrt_inv) ) 374 | if print_timing: print 'fit_multiclass: Xt_H_E_PIsi took', time.time() - ts 375 | 376 | 377 | #St_reg = ( np.dot(X.T np.dot(H, X)) - (sigma*sigma)*np.identity(n_features)) 378 | 379 | ts = time.time() 380 | ##################################################################################################################### 381 | #Sw = X.T * [ 1 - E*PI_inv*E.T ] * X = X.T * X - M.T * PI * M 382 | # if norm_covariance: Sw = X.T * [ P - E*PI_inv*PI_inv*E.T ] * X = X.T *P * X - M.T * M 383 | ##################################################################################################################### 384 | M = np.asarray(self.means_) # shape(M) = n_classes x n_features 385 | #P = np.diag( np.dot(E, 1.0/(1.0*n_samples_perclass)) ) 386 | P_vec = np.array([np.dot(E, 1.0/(1.0*n_samples_perclass))]).T # shape(P_vec) = n_samples x 1 387 | Sw=None # shape(Sw) = n_features x n_features 388 | if not use_total_scatter: 389 | if self.norm_covariance: 390 | #Sw = np.inner( np.inner(X.T, P), X.T) - np.dot( M.T, M) 391 | Sw = np.inner( (P_vec*X).T, X.T) - np.dot( M.T, M) 392 | else: 393 | Sw = np.inner(X.T, X.T) - np.dot( M.T, np.dot(PI_diag, M)) 394 | 395 | if print_timing: print 'fit_multiclass: Sw took', time.time() - ts 396 | 397 | ##################################################################################################################### 398 | #assume (I think true) for condensed svd, where we only take vectors for non-zero singular values 399 | #that if M is symmetric, then Uc=Vc where condensed_svd(M) = Uc * Sc * Vc.T 400 | #this is because the singular values of a symmetric matrix are the abosolute values of the non-zero eigenvalues 401 | #so assuming the singular vectors of the non-zero singular values are the same as eigen vectors 402 | #and since condensed svd only keeps singular vectors for non-zero singular values, should have Uc==Vc 403 | ##################################################################################################################### 404 | 405 | 406 | ts = time.time() 407 | Uc, Sc, Utc, Sc_norm = None, None, None, None 408 | if use_total_scatter: 409 | St_norm = (1.0/(1.0*n_samples)) if self.norm_covariance else 1.0 410 | Uc, Sc, Utc, Sc_norm = self.condensed_svd( St_norm * np.inner(Xt_H, X.T), tol, store_singular_vals=True ) 411 | else: 412 | Uc, Sc, Utc, Sc_norm = self.condensed_svd( Sw, tol, store_singular_vals=True ) 413 | if print_timing: print 'fit_multiclass: Uc, Sc, Utc took', time.time() - ts 414 | 415 | ts = time.time() 416 | #scale up sigma to appropriate range of singular values 417 | reg_factor = sigma_sqrd * Sc_norm 418 | St_reg_inv = np.dot( Uc, np.dot(np.diag(1.0/(Sc + reg_factor)), Utc) ) # shape(St_reg_inv) = n_features x n_features 419 | if print_timing: print 'fit_multiclass: St_reg_inv took', time.time() - ts 420 | 421 | ts = time.time() 422 | G = np.dot(St_reg_inv, Xt_H_E_PIsi) # shape(G) = n_features x n_classes 423 | if print_timing: print 'fit_multiclass: G took', time.time() - ts 424 | 425 | ts = time.time() 426 | R = np.dot( Xt_H_E_PIsi.T, G) # shape(R) = n_classes x n_classes 427 | if print_timing: print 'fit_multiclass: R took', time.time() - ts 428 | 429 | ts = time.time() 430 | Vr, Lr, Vtr, Lr_norm = self.condensed_svd( R, tol=1e-6 ) # shape(Vr) = n_classes x rank_R 431 | if print_timing: print 'fit_multiclass: Vr, Lr, Vtr took', time.time() - ts 432 | 433 | ts = time.time() 434 | W = np.dot( G, Vr) # shape(W) = n_features x rank_R 435 | if print_timing: print 'fit_multiclass: B took', time.time() - ts 436 | 437 | if solution_norm=="A": 438 | W = np.dot(W, np.diag(1.0 / np.sqrt(Lr)) ) 439 | 440 | elif solution_norm=="N": 441 | for i in range( W.shape[1] ): 442 | if linalg.norm(W[:,i]) != 0: 443 | W[:,i] /= linalg.norm(W[:,i]) 444 | else: 445 | print "WARNING: Fisher discriminant line has norm=0 --> no discriminating curved found! Exiting" 446 | sys.exit(2) 447 | 448 | 449 | self.w_ = W.T #transpose here just because want to store the matrix where rows have length n_features, i.e. are discriminants 450 | 451 | return self 452 | 453 | def condensed_svd(self, M, tol=1e-3, store_singular_vals=False): 454 | U, S, Vt = linalg.svd(M, full_matrices=False) 455 | 456 | if store_singular_vals: 457 | self.singular_vals = S 458 | 459 | #want tolerance on fraction of variance in singular value 460 | #when not norm_covariance, need to normalize singular values 461 | S_norm = 1.0 if self.norm_covariance else np.sum(S) 462 | 463 | rank = np.sum( (S/S_norm) > tol ) 464 | 465 | return U[:,:rank], S[:rank], Vt[:rank,:], S_norm 466 | 467 | 468 | @property 469 | def classes(self): 470 | warnings.warn("Fisher.classes is deprecated and will be removed in 0.14. " 471 | "Use .classes_ instead.", DeprecationWarning, 472 | stacklevel=2) 473 | return self.classes_ 474 | 475 | def _decision_function(self, X): 476 | X = np.asarray(X) 477 | # center and scale data 478 | #X = np.dot(X - self.xbar_, self.scaling) 479 | #return np.dot(X, self.coef_.T) + self.intercept_ 480 | return np.inner( X, self.w_ ) 481 | 482 | def decision_function(self, X): 483 | """ 484 | This function return the decision function values related to each 485 | class on an array of test vectors X. 486 | 487 | Parameters 488 | ---------- 489 | X : array-like, shape = [n_samples, n_features] 490 | 491 | Returns 492 | ------- 493 | C : array, shape = [n_samples, n_components_found_] 494 | Decision function values related to each class, per sample 495 | n_components_found_ is the number of components requested and found 496 | even if n_components_found_=1, a 2D array is found, 497 | but can be promoted to 1D array with dimension [n_samples] with decision_function(X)[:,0] 498 | """ 499 | dec_func = self._decision_function(X) 500 | #if len(self.w_) == 1: 501 | # return dec_func[:, 0] 502 | return dec_func 503 | 504 | def transform(self, X): 505 | """ 506 | Project the data so as to maximize class separation (large separation 507 | between projected class means and small variance within each class). 508 | 509 | Parameters 510 | ---------- 511 | X : array-like, shape = [n_samples, n_features] 512 | 513 | Returns 514 | ------- 515 | X_new : array, shape = [n_samples, n_components_found_] 516 | """ 517 | X = np.asarray(X) 518 | # center and scale data 519 | #X = np.dot(X - self.xbar_, self.scaling) 520 | #n_comp = X.shape[1] if self.n_components is None else self.n_components 521 | #return np.dot(X, self.coef_[:n_comp].T) 522 | dec_func = self._decision_function(X) 523 | return dec_func 524 | 525 | def fit_transform(self, X, y, store_covariance=False, tol=1.0e-4): 526 | """ 527 | Fit the Fisher Discriminant model according to the given training data and parameters. 528 | The project the data onto up to n_components so as to maximize class separation (large separation 529 | between projected class means and small variance within each class). 530 | NOTE this function is not clever, it simply runs fit(X,y [, store_covariance, tol]).transform(X) 531 | 532 | Parameters 533 | ---------- 534 | X : array-like, shape = [n_samples, n_features] 535 | y : array, shape = [n_samples] 536 | Target values (integers) 537 | store_covariance : boolean 538 | If True the covariance matrix of each class and each iteration is computed 539 | and stored in `self.covs_` attribute. has dimensions [n_iterations][2] where 2 is for nclasses = 2 540 | 541 | Returns 542 | ------- 543 | X_new : array, shape = [n_samples, n_components_found_] 544 | """ 545 | return self.fit(X, y, store_covariance, tol).transform(X) 546 | 547 | 548 | 549 | ######################################################################## 550 | ######################################################################## 551 | ######################################################################## 552 | ######################################################################## 553 | 554 | 555 | 556 | class KernelFisher(BaseEstimator, ClassifierMixin, TransformerMixin): 557 | """ 558 | Kernalized Fisher Discriminant Analysis (KDA) 559 | 560 | A classifier with a non-linear decision boundary, generated 561 | by fitting class conditional densities to the data 562 | fisher criteria of maximizing between class variance 563 | while minimizing within class variance. 564 | 565 | The fisher criteria is used in a non-linear space, by transforming 566 | the data, X, of dimension D onto a D-dimensional manifold of 567 | a D' dimensional space (where D' is possible infinite) using a funtion f(X). 568 | The key to solving the problem in the non-linear space is to write 569 | the solution to fisher only in terms of inner products of 570 | the vectors X*Y. Then the kernel trick can be employed, such that 571 | the standard inner product is promoted to a general inner product. 572 | That is, K(X,Y) = X*Y --> K(X,Y) = f(X)*f(Y), which is allowed for 573 | valid Kernels. In this case, the function f() does not need to be 574 | known, but only the kernel K(X,Y). 575 | 576 | The fitted model can also be used to reduce the dimensionality 577 | of the input, by projecting it to the most discriminative 578 | directions. 579 | 580 | Parameters 581 | ---------- 582 | 583 | use_total_scatter : boolean 584 | If True then use total scatter matrix St = Sum_i (x_i - m)(x_i - m).T instead of Sw 585 | If False, use Sw = Sum_{c=1... n_classes} Sum_{i; x in class c} norm_c (x_i - m_c)(x_i - m_c).T 586 | where norm_c = 1/N_samples_class_c if norm_covariance=True, else norm_c = 1 587 | 588 | sigma_sqrd: float 589 | smooth regularization parameter, which is size of singular value where smoothing becomes important. 590 | NOTE: is fraction in case norm_covariance=False, as a priori the scale of the singular values is not known in this case 591 | 592 | tol: float 593 | used for truncated SVD of St. Essentially a form of regularization. Tol for SVD(R) is 1e-6, fixed right now 594 | 595 | kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" 596 | Kernel used for generalized inner product. 597 | Default: "linear" 598 | 599 | degree : int, optional 600 | Degree for poly 601 | Default: 3. 602 | 603 | gamma : float, optional 604 | Kernel coefficient for rbf, sigmoid and poly kernels. 605 | Default: 1/n_features. 606 | 607 | coef0 : float, optional 608 | Independent term in poly and sigmoid kernels. 609 | 610 | norm_covariance : boolean 611 | if true, the covariance of each class will be divided by (n_points_in_class - 1) 612 | NOTE: not currently used 613 | 614 | priors : array, optional, shape = [n_classes] 615 | Priors on classes 616 | 617 | print_timing: boolean 618 | print time for several matrix operations in the algorithm 619 | 620 | Attributes 621 | ---------- 622 | `means_` : array-like, shape = [n_components_found_, [n_classes, n_features] ] 623 | Class means, for each component found 624 | `priors_` : array-like, shape = [n_classes] 625 | Class priors (sum to 1) 626 | 627 | `n_components_found_` : int 628 | number of fisher components found, which is <= n_components 629 | 630 | Examples (put fisher.py in working directory) 631 | -------- 632 | >>> import numpy as np 633 | >>> from fisher import KernelFisher 634 | >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) 635 | >>> y = np.array([0, 0, 0, 1, 1, 1]) 636 | >>> fd = KernelFisher() 637 | >>> fd.fit(X, y) 638 | KernelFisher(coef0=1, degree=3, gamma=None, kernel='linear', 639 | norm_covariance=False, print_timing=False, priors=None, 640 | sigma_sqrd=1e-08, tol=0.001, use_total_scatter=True) 641 | >>> print(fd.transform([[-0.8, -1]])) 642 | [[-7.62102356]]] 643 | 644 | """ 645 | 646 | def __init__(self, use_total_scatter=True, sigma_sqrd=1e-8, tol=1.0e-3, 647 | kernel="linear", gamma=None, degree=3, coef0=1, 648 | norm_covariance = False, priors=None, print_timing=False): 649 | 650 | self.use_total_scatter = use_total_scatter 651 | self.sigma_sqrd = sigma_sqrd 652 | self.tol = tol 653 | self.kernel = kernel.lower() 654 | self.gamma = gamma 655 | self.degree = degree 656 | self.coef0 = coef0 657 | self._centerer = KernelCenterer() 658 | 659 | self.norm_covariance = norm_covariance 660 | self.print_timing = print_timing 661 | 662 | 663 | self.priors = np.asarray(priors) if priors is not None else None 664 | 665 | if self.priors is not None: 666 | if (self.priors < 0).any(): 667 | raise ValueError('priors must be non-negative') 668 | if self.priors.sum() != 1: 669 | print 'warning: the priors do not sum to 1. Renormalizing' 670 | self.priors = self.priors / self.priors.sum() 671 | 672 | 673 | @property 674 | def _pairwise(self): 675 | return self.kernel == "precomputed" 676 | 677 | def _get_kernel(self, X, Y=None): 678 | params = {"gamma": self.gamma, 679 | "degree": self.degree, 680 | "coef0": self.coef0} 681 | try: 682 | return pairwise_kernels(X, Y, metric=self.kernel, 683 | filter_params=True, **params) 684 | except AttributeError: 685 | raise ValueError("%s is not a valid kernel. Valid kernels are: " 686 | "rbf, poly, sigmoid, linear and precomputed." 687 | % self.kernel) 688 | 689 | 690 | def fit(self, X, y): 691 | """ 692 | Fit the Kernelized Fisher Discriminant model according to the given training data and parameters. 693 | Based on "Algorithm 5" in 694 | Zhang, et. al. 'Regularized Discriminant Analysis, Ridge Regression and Beyond' Journal of Machine Learning Research 11 (2010) 2199-2228 695 | NOTE: setting norm_covariance=False and use_total_scatter=True, and solution_norm = 'A' or 'B' will give the algorithm from paper 696 | 697 | Parameters 698 | ---------- 699 | X : array-like, shape = [n_samples, n_features] 700 | Training vector, where n_samples in the number of samples and 701 | n_features is the number of features. 702 | 703 | y : array, shape = [n_samples] 704 | Target values (integers) 705 | 706 | """ 707 | X, y = check_X_y(X, y) #does not accept sparse arrays 708 | self.classes_, y = unique( y, return_inverse=True) 709 | n_samples, n_features = X.shape 710 | n_classes = len(self.classes_) 711 | n_samples_perclass = np.bincount(y) 712 | if n_classes < 2: 713 | raise ValueError('y has less than 2 classes') 714 | if self.priors is None: 715 | self.priors_ = np.bincount(y) / float(n_samples) 716 | else: 717 | self.priors_ = self.priors 718 | 719 | ts = time.time() 720 | 721 | self.means_ = [] 722 | for ind in xrange(n_classes): 723 | Xg = X[y == ind, :] 724 | meang = Xg.mean(0) 725 | self.means_.append(np.asarray(meang)) 726 | if self.print_timing: print 'KernelFisher.fit: means took', time.time() - ts 727 | 728 | 729 | ts = time.time() 730 | PI_diag = np.diag( 1.0*n_samples_perclass ) # shape(PI_diag) = n_classes x n_classes 731 | PI_inv = np.diag( 1.0 / (1.0*n_samples_perclass) ) # shape(PI_inv) = n_classes x n_classes 732 | PI_sqrt_inv = np.sqrt( PI_inv ) # shape(PI_sqrt_inv) = n_classes x n_classes 733 | #H = np.identity(n_samples) - (1.0/(1.0*n_samples))*np.ones((n_samples,n_samples)) 734 | E=np.zeros( (n_samples,n_classes) ) # shape(E) = n_samples x n_classes 735 | E[[range(n_samples),y]]=1 736 | E_PIsi = np.dot(E, PI_sqrt_inv) 737 | One_minus_E_Pi_Et = np.identity(n_samples) - np.inner( E, np.inner(PI_diag, E).T ) # shape(One_minus_E_Pi_Et) = n_samples x n_samples 738 | if self.print_timing: print 'KernelFisher.fit: matrices took', time.time() - ts 739 | 740 | 741 | ##################################################################################################################### 742 | #C = HKH = (I - 1/n 1x1.T) K (I - 1/n 1x1.T) = (K - 1xK_mean.T) * (I - 1/n 1x1.T) 743 | # = K - K_meanx1.T - 1xK_mean.T + K_allmean 1x1 744 | # --> which is the same as what self._centerer.fit_transform(C) performs 745 | # 746 | # if use_total_scatter=False, 747 | # then using Sw which is (1-E*Pi*E.T)K(1-E*Pi*E.T) 748 | ##################################################################################################################### 749 | ts = time.time() 750 | C = self._get_kernel(X) 751 | K_mean = np.sum(C, axis=1) / (1.0*C.shape[1]) 752 | 753 | if self.use_total_scatter: 754 | C = self._centerer.fit_transform(C) 755 | else: 756 | C = np.inner( One_minus_E_Pi_Et, np.inner(C, One_minus_E_Pi_Et).T) 757 | if self.print_timing: print 'KernelFisher.fit: Kernel Calculation took', time.time() - ts 758 | 759 | 760 | ts = time.time() 761 | Uc, Sc, Utc, Sc_norm = self.condensed_svd( C, self.tol, store_singular_vals=True ) 762 | if self.print_timing: print 'KernelFisher.fit: Uc, Sc, Utc took', time.time() - ts 763 | 764 | 765 | ts = time.time() 766 | #scale up sigma to appropriate range of singular values 767 | reg_factor = self.sigma_sqrd * Sc_norm 768 | St_reg_inv = np.inner( Uc, np.inner(np.diag(1.0/(Sc + reg_factor)), Utc.T).T ) 769 | if self.print_timing: print 'KernelFisher.fit: St_reg_inv took', time.time() - ts 770 | 771 | ts = time.time() 772 | R = np.inner(E_PIsi.T, np.inner(C, np.inner( St_reg_inv, E_PIsi.T ).T ).T ) 773 | if self.print_timing: print 'KernelFisher.fit: R took', time.time() - ts 774 | 775 | 776 | ts = time.time() 777 | Vr, Lr, Vtr, Lr_norm = self.condensed_svd( R, tol=1e-6 ) 778 | if self.print_timing: print 'KernelFisher.fit: Vr, Lr, Vtr took', time.time() - ts 779 | 780 | 781 | ts = time.time() 782 | ##################################################################################################################### 783 | #This capital Z is Upsilon.T * H from equation (22) 784 | ##################################################################################################################### 785 | #Z = np.inner( np.diag(1.0 / np.sqrt(Lr)), np.inner(Vtr, np.inner(E_PIsi.T, np.inner(C, St_reg_inv.T ).T ).T ).T ) 786 | Z = np.inner( np.inner( np.inner( np.inner( np.diag(1.0 / np.sqrt(Lr)), Vtr.T), E_PIsi), C.T), St_reg_inv) 787 | 788 | Z = (Z.T - (Z.sum(axis=1) / (1.0*Z.shape[1])) ).T 789 | if self.print_timing: print 'KernelFisher.fit: Z took', time.time() - ts 790 | 791 | self.Z = Z 792 | self.n_components_found_ = Z.shape[0] 793 | 794 | ##################################################################################################################### 795 | #This K_mean is (1/n) K*1_n from equation (22) 796 | ##################################################################################################################### 797 | self.K_mean = K_mean 798 | 799 | #print Z.shape, K_mean.shape, self.n_components_found_ 800 | 801 | self.X_fit_ = X 802 | return self 803 | 804 | def condensed_svd(self, M, tol=1e-3, store_singular_vals=False): 805 | U, S, Vt = linalg.svd(M, full_matrices=False) 806 | if store_singular_vals: 807 | self.singular_vals = S 808 | 809 | #want tolerance on fraction of variance in singular value 810 | #when not norm_covariance, need to normalize singular values 811 | S_norm = np.sum(S) 812 | 813 | rank = np.sum( (S/S_norm) > tol ) 814 | 815 | return U[:,:rank], S[:rank], Vt[:rank,:], S_norm 816 | 817 | 818 | @property 819 | def classes(self): 820 | warnings.warn("KernelFisher.classes is deprecated and will be removed in 0.14. " 821 | "Use .classes_ instead.", DeprecationWarning, 822 | stacklevel=2) 823 | return self.classes_ 824 | 825 | def _decision_function(self, X): 826 | #X = np.asarray(X) 827 | return self.transform(X) 828 | 829 | def decision_function(self, X): 830 | """ 831 | This function return the decision function values related to each 832 | class on an array of test vectors X. 833 | 834 | Parameters 835 | ---------- 836 | X : array-like, shape = [n_samples, n_features] 837 | 838 | Returns 839 | ------- 840 | X_new : array, shape = [n_samples, n_components_found_] 841 | Decision function values related to each class, per sample 842 | n_components_found_ is the number of components requested and found 843 | NOTE: currently identical to self.transform(X) 844 | """ 845 | return self._decision_function(X) 846 | 847 | def transform(self, X): 848 | """ 849 | Project the data so as to maximize class separation (large separation 850 | between projected class means and small variance within each class). 851 | 852 | Parameters 853 | ---------- 854 | X : array-like, shape = [n_samples, n_features] 855 | 856 | Returns 857 | ------- 858 | X_new : array, shape = [n_samples, n_components_found_] 859 | """ 860 | 861 | #X = np.asarray(X) 862 | #ts = time.time() 863 | k = self._get_kernel(X, self.X_fit_) 864 | #if self.print_timing: print 'KernelFisher.transform: k took', time.time() - ts 865 | 866 | #ts = time.time() 867 | z = np.inner(self.Z, (k-self.K_mean) ).T 868 | #if self.print_timing: print 'KernelFisher.transform: z took', time.time() - ts 869 | 870 | return z 871 | 872 | 873 | 874 | def fit_transform(self, X, y, use_total_scatter=True, sigma_sqrd=1e-8, tol=1.0e-3): 875 | """ 876 | Fit the Fisher Discriminant model according to the given training data and parameters. 877 | The project the data onto up to n_components_found_ so as to maximize class separation (large separation 878 | between projected class means and small variance within each class). 879 | NOTE this function is not clever, it simply runs fit(X,y [, ...]).transform(X) 880 | 881 | Parameters 882 | ---------- 883 | X : array-like, shape = [n_samples, n_features] 884 | y : array, shape = [n_samples] 885 | Target values (integers) 886 | store_covariance : boolean 887 | If True the covariance matrix of each class and each iteration is computed 888 | and stored in `self.covs_` attribute. has dimensions [n_iterations][2] where 2 is for nclasses = 2 889 | 890 | Returns 891 | ------- 892 | X_new : array, shape = [n_samples, n_components_found_] 893 | """ 894 | return self.fit(X, y, use_total_scatter=use_total_scatter, sigma_sqrd=sigma_sqrd, tol=tol).transform(X) 895 | --------------------------------------------------------------------------------