├── README.md
├── test.py
├── predict.py
├── run.py
├── plotting.py
├── BinnedFisher.py
└── Fisher.py


/README.md:
--------------------------------------------------------------------------------
1 | # FisherDisc
2 | Fisher Disciminant, including Kernel Fisher Discriminant, and a binned FLD. Implementation for large p datasets, based on: Zhang, et. al. 'Regularized Discriminant Analysis, Ridge Regression and Beyond' Journal of Machine Learning Research 11 (2010) 2199-222 8
3 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from Fisher import Fisher
 4 | from BinnedFisher import BinnedFisher
 5 | 
 6 | 
 7 | #make random two classes
 8 | class0 = np.random.normal(1,1, (50,5) )
 9 | class1 = np.random.normal(-2,0.5, (50,5) )
10 | 
11 | X = np.vstack( (class0, class1) )
12 | y = np.array( [0 for i in range(50)] + [1 for i in range(50)] )
13 | 
14 | 
15 | f = Fisher()
16 | 
17 | f.fit(X, y, tol = 0.1)
18 | 
19 | print f.transform(X)
20 | 
21 | 
22 | #after fit, can update tolerance
23 | f.update_tol( tol = 0.01 )
24 | 
25 | print f.transform(X)
26 | 
27 | 
28 | 
29 | #add additional variable to binning, for BinnedFisher
30 | v = np.array( [[0.25 for i in range(25)]+[0.75 for i in range(25)]+[0.25 for i in range(25)]+[0.75 for i in range(25)]] )
31 | 
32 | 
33 | X = np.hstack( (v.T, X) )
34 | 
35 | bf = BinnedFisher( bins = [0.0,0.5,1.0] )
36 | 
37 | bf.fit(X,y, tol=[0.01, 0.01])
38 | 
39 | print bf.transform(X)
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import time
 3 | import cPickle
 4 | 
 5 | import numpy as np
 6 | from scipy import linalg
 7 | 
 8 | import matplotlib.pyplot as plt
 9 | import matplotlib.colors as colors
10 | 
11 | import plotting
12 | 
13 | from BinnedFisher import BinnedFisher
14 | 
15 | def predict( normImage = True, saveFile = False, makePlot = True):
16 | 
17 |     bfish = cPickle.load( file('trained_'+('norm_' if normImage else '')+ 'DR_fisher.pkl', 'r') )
18 |     
19 |     testfile = file('data/alldata_'+('norm_' if normImage else '')+ 'TEST.pkl', 'r')
20 |     data = cPickle.load( testfile )
21 |     spec = cPickle.load( testfile )
22 |     testfile.close()
23 | 
24 |     drbin = (data[:,1]>=0.5)*(data[:,1]<0.75)
25 |     data = data[drbin,:]
26 |     spec = spec[drbin,:]
27 |     
28 |     X = data[:,1:]
29 |     y = data[:,0]
30 |     dr = data[:,1]
31 |     tau21 = spec[:,2]
32 | 
33 |     bfish.update_tol(tol=[1.0e-3, 0.75e-6, 0.1e-3]) # normed
34 |     #bfish.update_tol(tol=[2.5e0, 2.5e-1, 0.3e0]) # non-normed
35 |     
36 |     t =  bfish.transform(X, return_ll=False)
37 |     
38 |     print t
39 | 
40 | 
41 | 
42 | 
43 |     if saveFile:
44 |         out_arr = np.hstack( (np.array([y]).T, spec, np.array([dr]).T, np.array([t]).T) )
45 |         print "out shape=", out_arr.shape
46 |         np.savetxt('TEST_predict/TEST_'+('norm_' if normImage else '')+ 'DR_Fisher.txt', out_arr, delimiter=',')
47 | 
48 |     #sys.exit(0)
49 | 
50 | 
51 | 
52 | 
53 |     if makePlot:
54 |         s, bns = np.histogram(t[y==1], normed=True)
55 |         b, bns = np.histogram(t[y==0], bins=bns, normed=True)
56 |         
57 |         x_cen = [ 0.5*(bns[i]+bns[i+1]) for i in range(len(bns)-1)]
58 |         
59 |         plt.figure()
60 |         plt.plot(x_cen, s, color='g', linewidth=3)
61 |         plt.plot(x_cen, b, color='b', linewidth=3)
62 |         #plt.show()
63 |     
64 |         Sigs = [ t[y==1], tau21[y==1] ]
65 |         Bkgs = [ t[y==0], tau21[y==0] ]
66 |         Labs = ["Fisher","Tau21"]
67 |         cut_type=['g','l']
68 | 
69 |         plotting.ROC(Sigs, Bkgs, Labs, cut_type=cut_type)
70 | 
71 |         for ifish in range(len(bfish.comp)):
72 |             fish = bfish.fish[ifish].w_[0][::-1]
73 |         
74 |             fig = plt.figure(figsize=(7,5))
75 |             ax = fig.add_subplot(111)
76 |             elem = fish.reshape(25,25)
77 |             vmin = np.min(elem)
78 |             vmax = np.max(elem)
79 |         
80 |             elem /= np.max(  [ abs(vmin), abs(vmax)] ) 
81 |             vmin = np.min(elem)
82 |             vmax = np.max(elem)
83 |         
84 |             cm_bi = colors.LinearSegmentedColormap.from_list('bi', 
85 |                                 [(0,'red'), (abs(vmin)/(vmax-vmin), 'white'),(1,'blue')])
86 |             ret = ax.imshow(elem,
87 |                             cmap=cm_bi,
88 |                             interpolation='nearest',
89 |                             origin='lower') #extent=[low, high, low, high],
90 |             ax.set_title("Fisher "+str(ifish), size='xx-large')
91 | 
92 |         plt.show()
93 | 
94 | 
95 | if __name__=="__main__":
96 | 	predict()
97 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import cPickle
  4 | 
  5 | import numpy as np
  6 | from scipy import linalg
  7 | 
  8 | import matplotlib.pyplot as plt
  9 | import matplotlib.colors as colors
 10 | 
 11 | from BinnedFisher import BinnedFisher
 12 | 
 13 | usePartialData=True
 14 | 
 15 | print "### Loading Data ###"
 16 | ## if not usePartialData:
 17 | ##     sig = np.loadtxt('data/signal.txt', delimiter=',')
 18 | ##     bkg = np.loadtxt('data/qcd.txt', delimiter=',')
 19 | 
 20 | ##     alldata =  np.concatenate( (sig, bkg), axis=0) 
 21 | ##     #np.random.shuffle(alldata)
 22 | 
 23 | ##     #alldata = alldata[0::10, :]
 24 | ##     outfile = file('alldata.pkl', 'wb')
 25 | ##     cPickle.dump(alldata, outfile, protocol=cPickle.HIGHEST_PROTOCOL)
 26 | ##     outfile.close()
 27 | ##     sys.exit(0)
 28 | 
 29 | normImage = False
 30 | 
 31 | makePlot = False
 32 | 
 33 | alldata = cPickle.load( file('data/alldata_'+('norm_' if normImage else '')+ 'TRAIN.pkl', 'r') )
 34 | 
 35 | 
 36 | print "### Building Model ###"
 37 | 
 38 | X = alldata[:,1:]
 39 | y = alldata[:,0]
 40 | 
 41 | bfish = BinnedFisher( bins=[0.25, 0.5, 0.75, float('inf')] )
 42 | 
 43 | #bfish.fit(X, y, tol=[4.0e-3, 1.0e-3, 0.5e-3]) #old
 44 | if normImage:
 45 |     #bfish.fit(X, y, tol=[2.0e-3, 0.6e-3, 0.2e-3]) #good for normed, 10k per bin per label
 46 |     bfish.fit(X, y, tol=[1.0e-3, 0.75e-4, 0.1e-3]) #good for normed, 10k per bin per label
 47 | 
 48 | else:
 49 |     #bfish.fit(X, y, tol=[9.5e0, 11e0, 3.0e0]) #good-ish for non-normed, 10k per bin per label
 50 |     bfish.fit(X, y, tol=[2.5e0, 2.5e-1, 0.3e0]) #good-ish for non-normed, 10k per bin per label
 51 |     
 52 | outfile = file('trained_'+('norm_' if normImage else '')+ 'DR_fisher.pkl', 'wb')
 53 | cPickle.dump(bfish, outfile, protocol=cPickle.HIGHEST_PROTOCOL)
 54 | outfile.close()
 55 | 
 56 | 
 57 | 
 58 | 
 59 | if makePlot:
 60 |     for ifish in range(len(bfish.comp)):
 61 |         fish = bfish.comp[ifish][::-1]
 62 | 
 63 |         print 'fish', ifish,'singular values:'
 64 |         print  bfish.fish[ifish].singular_vals
 65 |         
 66 |         fig = plt.figure(figsize=(7,5))
 67 |         ax = fig.add_subplot(111)
 68 |         elem = fish.reshape(25,25)
 69 |         vmin = np.min(elem)
 70 |         vmax = np.max(elem)
 71 |     
 72 |         elem /= np.max(  [ abs(vmin), abs(vmax)] ) 
 73 |         vmin = np.min(elem)
 74 |         vmax = np.max(elem)
 75 |         
 76 |         cm_bi = colors.LinearSegmentedColormap.from_list('bi', 
 77 |                             [(0,'red'), (abs(vmin)/(vmax-vmin), 'white'),(1,'blue')])
 78 |         ret = ax.imshow(elem,
 79 |                         cmap=cm_bi,
 80 |                         interpolation='nearest',
 81 |                         origin='lower') #extent=[low, high, low, high],
 82 |         ax.set_title("Fisher "+str(ifish), size='xx-large')
 83 | 
 84 |     plt.show()
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | ## t =  bfish.transform(X)
 92 | 
 93 | ## s, bns = np.histogram(t[y==1], normed=True)
 94 | ## b, bns = np.histogram(t[y==0], bins=bns, normed=True)
 95 | 
 96 | ## x_cen = [ 0.5*(bns[i]+bns[i+1]) for i in range(len(bns)-1)]
 97 | 
 98 | ## plt.figure()
 99 | ## plt.plot(x_cen, s, color='g', linewidth=3)
100 | ## plt.plot(x_cen, b, color='b', linewidth=3)
101 | ## plt.show()
102 | 
103 | 
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------
/plotting.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | 
  3 | import numpy as np
  4 | import scipy as sc
  5 | 
  6 | 
  7 | def ROC( signal, background, label, cut_start=None, cut_end=None, cut_type=None):
  8 | 
  9 |     ## s = np.array( signal )
 10 |     ## b = np.array( background )
 11 |     ## l = np.array( label )
 12 | 
 13 |     ## if len(s.shape)==1:
 14 |     ##     s = np.array( [signal] )
 15 |     ## if len(b.shape)==1:
 16 |     ##     b = np.array( [background] )
 17 |     ## if len(l.shape)==0:
 18 |     ##     l = np.array( [label] )
 19 | 
 20 |     fig = plt.figure()
 21 | 
 22 |     if cut_type==None:
 23 |         cut_type=['g' for ic in range(len(signal)) ]
 24 |     
 25 |     for ivar in range(len(signal)):
 26 |         s_sort = np.sort( signal[ivar] )
 27 |         b_sort = np.sort( background[ivar] )
 28 | 
 29 |         #c_start=(0.0 if cut_start==None else cut_start)
 30 |         #c_end=  (1.0 if cut_end==None else cut_end)
 31 | 
 32 |         c_start=np.min( (s_sort[0], b_sort[0]) )
 33 |         c_end=  np.max( (s_sort[len(s_sort)-1], b_sort[len(b_sort)-1]) )
 34 |         
 35 |         if c_start==-float('inf'):
 36 |             c_start = -2*c_end
 37 | 
 38 |         print label[ivar], "min(", s_sort[0],  b_sort[0],  ")=", c_start
 39 |         print label[ivar], "max(", s_sort[-1], b_sort[-1], ")=", c_end
 40 |         
 41 |         s_eff=[]
 42 |         b_rej=[]
 43 | 
 44 |         n_points = 1000
 45 |         c_delta = (1.0*c_end - 1.0*c_start) / (1.0*n_points)
 46 |         for i in range(1000):
 47 |             cut = c_start + i*1.0*c_delta
 48 |             if cut_type[ivar]=='g':
 49 |                 s_eff.append( 1.0*np.count_nonzero( s_sort > cut ) / (1.0*len(s_sort))  )
 50 |                 b_count = np.count_nonzero( b_sort > cut )
 51 |             elif cut_type[ivar]=='l':
 52 |                 s_eff.append( 1.0*np.count_nonzero( s_sort < cut ) / (1.0*len(s_sort))  )
 53 |                 b_count = np.count_nonzero( b_sort < cut )
 54 |             b_rej.append(  (1.0*len(b_sort)) / (1.0 if b_count==0 else (1.0*b_count))  )
 55 | 
 56 |         #print s_eff
 57 |         plt.plot(s_eff,b_rej)
 58 | 
 59 |     plt.legend(label, loc='lower left', prop={'size':6})
 60 |     plt.yscale('log')
 61 |     #plt.show() 
 62 | 
 63 | 
 64 |     return
 65 | 
 66 | 
 67 | def Eff_vs_Var( disc, var, label, bins, cuts= None, eff_target=0.7  ):
 68 | 
 69 |     fig = plt.figure()
 70 | 
 71 |     
 72 |     bin_error=[]
 73 |     bin_center=[]
 74 |     for ibin in range(len(bins)-1):
 75 |         ierror = (bins[ibin+1] - bins[ibin])/2.0
 76 |         bin_error.append( ierror )
 77 |         bin_center.append( bins[ibin] +  ierror )
 78 | 
 79 |     for isamp in range(len(disc)):
 80 | 
 81 |         idisc = np.array(disc[isamp])
 82 |         ivar  = np.array(var[isamp])
 83 | 
 84 |         if cuts == None:
 85 |             cut_val = Get_Cut_Value(idisc, eff_target)
 86 |             #cut_val = np.sort(idisc)[ int((1.0-eff_target)*len(idisc)) ]
 87 |         else:
 88 |             cut_val = cuts[isamp]
 89 |         
 90 |         #sort_indices = np.argsort(disc[isamp])
 91 | 
 92 |         eff = []
 93 |         yerr = []
 94 |         for ibin in range(len(bins)-1):
 95 |             idisc_ibin = idisc[  (ivar>=bins[ibin]) * (ivar<bins[ibin+1]) ]
 96 |             n_tot = len(idisc_ibin)
 97 |             n_pass = np.count_nonzero( idisc_ibin > cut_val   )
 98 | 
 99 |             eff.append( (1.0*n_pass) / (1.0*n_tot) )
100 |             yerr.append( (1.0/(1.0*n_tot)) * np.sqrt( n_pass * (1.0 - (1.0*n_pass) / (1.0*n_tot)))  )
101 | 
102 |             print bin_center[ibin], n_pass, n_tot, eff[ibin], yerr[ibin]
103 |     
104 |         plt.errorbar( bin_center, eff, xerr = bin_error, yerr = yerr)
105 |             
106 |     plt.legend(label, loc='best', prop={'size':6})
107 | 
108 |     return
109 | 
110 | 
111 | def Get_Cut_Value(disc, eff_target):
112 |     return np.sort(disc)[ int((1.0-eff_target)*len(disc)) ]
113 | 


--------------------------------------------------------------------------------
/BinnedFisher.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | 
  4 | import numpy as np
  5 | from scipy import linalg
  6 | 
  7 | from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin
  8 | 
  9 | 
 10 | from Fisher import Fisher
 11 | 
 12 | __all__ = ['BinnedFisher']
 13 | 
 14 | 
 15 | #####################################################################################################################
 16 | #NOTE TO SELF:
 17 | # np.inner(A,B) sums over last indices, i.e. = A[i,j]*B[k,j]
 18 | # so if you want to do A*B, you should do np.inner(A, B.T)
 19 | # Also, np.inner is faster than np.dot
 20 | #####################################################################################################################
 21 | 
 22 | 
 23 | class BinnedFisher(BaseEstimator, ClassifierMixin, TransformerMixin):
 24 | 
 25 |     
 26 |     def __init__(self, norm_covariance = True, n_components=None, priors=None, bins = [-float('inf'), float('inf')] ):
 27 |         
 28 |         self.nbins = len(bins)-1
 29 |         self.bins = np.sort(bins)
 30 |         self.bin_trained = [False for i in range(self.nbins)]
 31 | 
 32 |         self.fish = [ Fisher(norm_covariance, n_components, priors) for i in range(self.nbins) ]
 33 | 
 34 | 
 35 |     def fit(self, X, y, tol=[1.0e-4], store_covariance=False,  do_smooth_reg=False, cov_class=None, cov_power=1, entries_per_ll_bin = 10):
 36 |         X = np.asarray(X)
 37 |         y = np.asarray(y)
 38 |         
 39 |         if len(tol)==1:
 40 |             tol = [tol for i in range(self.nbins)]
 41 |         elif len(tol) != self.nbins:
 42 |             print "tol must have length 1 or nbins. exiting"
 43 |             sys.exit(2)
 44 | 
 45 |         self.tol = tol
 46 |         self.do_smooth_reg = do_smooth_reg
 47 |         self.cov_class = cov_class
 48 |         self.cov_power = cov_power
 49 |         self.entries_per_ll_bin = entries_per_ll_bin
 50 |         self.comp = []
 51 |         self.ll_sig = []
 52 |         self.ll_bkg = []
 53 | 
 54 |         self.ll_bin_edges = []
 55 | 
 56 | 
 57 |         for i in range(self.nbins):
 58 |             print "Starting fit for bin", i
 59 |             ts = time.time()
 60 |             
 61 |             low, high = self.bins[i], self.bins[i+1]
 62 | 
 63 |             the_entries = (X[:,0] >=low) * (X[:,0] <high)
 64 | 
 65 |             if len(the_entries) == 0:
 66 |                 print ("Warning: no entries in bin=[%d , %d], not training!", low, high)
 67 |                 continue
 68 | 
 69 |             Xi = X[ the_entries, 1:]
 70 | 
 71 |             yi = y[ the_entries ]
 72 | 
 73 |             self.fish[i].fit(Xi, yi, tol=self.tol[i], 
 74 |                             do_smooth_reg=self.do_smooth_reg, 
 75 |                             cov_class=self.cov_class,
 76 |                             cov_power=self.cov_power, store_covariance=True)
 77 |             
 78 |             self.comp.append( self.fish[i].w_[0] ) # should be normed in fisher.py with linalg.norm(self.fish[i].w_[0])
 79 | 
 80 | 
 81 |             #now making log-likelihood
 82 |             sigs = np.sort(self._transform_bin(Xi[ yi==1 ], bin_number=i, override=True).flatten())
 83 |             bkgs = np.sort(self._transform_bin(Xi[ yi==0 ], bin_number=i, override=True).flatten())
 84 |             
 85 |             
 86 |             self.ll_bin_edges.append( bkgs[0::self.entries_per_ll_bin] )
 87 |             self.ll_bin_edges[i][0]  = np.minimum(bkgs[0], sigs[0])
 88 |             self.ll_bin_edges[i][-1] = np.maximum(bkgs[-1], sigs[-1])
 89 | 
 90 |             sig_hist, temp = np.histogram( sigs, bins=self.ll_bin_edges[i], normed=True)
 91 |             bkg_hist, temp = np.histogram( bkgs, bins=self.ll_bin_edges[i], normed=True)
 92 | 
 93 |             #set any zero entries to 1e-4 so that log(sig_hist) is always well defined
 94 |             sig_hist[ sig_hist==0 ] = np.repeat(1e-4, np.count_nonzero(sig_hist==0))
 95 | 
 96 |             self.ll_sig.append( np.log(sig_hist) )
 97 |             self.ll_bkg.append( np.log(bkg_hist) )
 98 | 
 99 |             self.bin_trained[i] = True
100 | 
101 |             print 'Fitting bin %d took %d seconds' % (i, time.time() - ts)
102 | 
103 | 
104 |         return self
105 | 
106 |     def update_tol(self, tol):
107 |         if len(tol)==1:
108 |             tol = [tol for i in range(self.nbins)]
109 |         elif len(tol) != self.nbins:
110 |             print "tol must have length 1 or nbins. exiting"
111 |             sys.exit(2)
112 | 
113 |         for i in range(self.nbins):
114 |              self.fish[i].update_tol(tol[i])
115 | 
116 |         return self
117 | 
118 |     def transform(self, X, return_ll=False):
119 |         t, l = self._transform(X)
120 |         if return_ll:
121 |             return l
122 |         else:
123 |             return t
124 |         
125 | 
126 |     def _transform(self, X):
127 |         
128 |         X = np.asarray(X)
129 | 
130 |         out  = np.zeros( X.shape[0] ) # default transformed value is 0
131 |         llout = np.ones( X.shape[0] ) # default ll value is 1 (i.e. equal prob)
132 | 
133 |         for i in range(self.nbins):
134 |             if not self.bin_trained[i]:
135 |                 print ("bin %d not trained! Can't transform before running fit!", i)
136 |                 sys.exit(2)
137 |             
138 |             low, high = self.bins[i], self.bins[i+1]
139 |             the_entries = (X[:,0] >=low) * (X[:,0] <high)
140 | 
141 |             Xi = X[ the_entries, 1:]
142 | 
143 |             out[ the_entries ] = self._transform_bin(Xi, i) #self.fish[i].transform(Xi)
144 | 
145 |             llout[ the_entries ] = self._eval_ll_bin( out[ the_entries ].flatten(), i)
146 | 
147 |         return out, llout
148 |     
149 | 
150 |     def _transform_bin(self, Xi, bin_number, override=False):
151 |         '''
152 |         ony works after binned var stripped off
153 |         '''
154 | 
155 |         if bin_number < 0 or bin_number > (self.nbins-1):
156 |             print ("bin number must be between 0 and %d", self.nbins-1)
157 |             sys.exit(2)
158 | 
159 |         if not override and not self.bin_trained[bin_number]:
160 |             print ("bin %d not trained! Can't transform before running fit!", i)
161 |             sys.exit(2)
162 |         
163 |         Xi = np.asarray(Xi)
164 | 
165 |         out  = self.fish[bin_number].transform(Xi)
166 |         
167 |         return out
168 | 
169 |     
170 |     def _eval_ll_bin(self, Ti, bin_number):
171 |         '''
172 |         only works on transformed data
173 |         '''
174 | 
175 |         if bin_number < 0 or bin_number > (self.nbins-1):
176 |             print ("bin number must be between 0 and %d", self.nbins-1)
177 |             sys.exit(2)
178 | 
179 |         if not self.bin_trained[bin_number]:
180 |             print ("bin %d not trained! Can't transform before running fit!", i)
181 |             sys.exit(2)
182 | 
183 |         Ti = np.asarray(Ti)
184 | 
185 |         # anything not found, gets value of 1
186 |         llout = np.ones( Ti.shape[0] )
187 | 
188 |         for i in range(len(self.ll_bin_edges[bin_number]) - 1):
189 |             the_entries = (Ti >= self.ll_bin_edges[bin_number][i]) * (Ti < self.ll_bin_edges[bin_number][i+1])            
190 |             llout[ the_entries ] = np.repeat( self.ll_sig[bin_number][i] / self.ll_bkg[bin_number][i], np.count_nonzero(the_entries))
191 | 
192 |         return llout
193 | 
194 | 
195 |         
196 | 


--------------------------------------------------------------------------------
/Fisher.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The module implements Fisher Discriminant Analysis.
  3 | """
  4 | __author__ = 'Michael Kagan mkagan@cern.ch'
  5 | #
  6 | # Code based on sklearn LDA code written by: Matthieu Perrot
  7 | #                                            Mathieu Blondel
  8 | #
  9 | # using algorithms as described in:
 10 | # Zhang, et. al. 'Regularized Discriminant Analysis, Ridge Regression and Beyond' Journal of Machine Learning Research 11 (2010) 2199-2228
 11 | #
 12 |  
 13 | import warnings
 14 | import sys
 15 | import time
 16 | 
 17 | import numpy as np
 18 | from scipy import linalg
 19 | 
 20 | from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin
 21 | from sklearn.utils.extmath import logsumexp
 22 | from sklearn.utils.validation import check_X_y
 23 | from sklearn.preprocessing import KernelCenterer
 24 | from sklearn.metrics.pairwise import pairwise_kernels
 25 | 
 26 | __all__ = ['Fisher', 'KernelFisher']
 27 | 
 28 | 
 29 | #####################################################################################################################
 30 | #NOTE TO SELF:
 31 | # np.inner(A,B) sums over last indices, i.e. = A[i,j]*B[k,j]
 32 | # so if you want to do A*B, you should do np.inner(A, B.T)
 33 | # Also, np.inner is faster than np.dot
 34 | #####################################################################################################################
 35 | 
 36 | 
 37 | class Fisher(BaseEstimator, ClassifierMixin, TransformerMixin):
 38 |     """
 39 |     Fisher Discriminant Analysis (LDA)
 40 | 
 41 |     A classifier with a linear decision boundary, generated
 42 |     by fitting class conditional densities to the data
 43 |     fisher criteria of maximizing between class variance
 44 |     while minimizing within class variance
 45 | 
 46 |     The fitted model can also be used to reduce the dimensionality
 47 |     of the input, by projecting it to the most discriminative
 48 |     directions.
 49 | 
 50 |     Parameters
 51 |     ----------
 52 | 
 53 |     norm_covariance :  boolean
 54 |         if true, the covariance of each class will be divided by (n_points_in_class - 1)
 55 | 
 56 |     n_components: int
 57 |         Number of components (< n_classes - 1) for dimensionality reduction
 58 | 
 59 |     priors : array, optional, shape = [n_classes]
 60 |         Priors on classes
 61 | 
 62 |     Attributes
 63 |     ----------
 64 |     `means_` : array-like, shape = [n_components_found_, [n_classes, n_features] ]
 65 |         Class means, for each component found
 66 |     `w_` : array-like, shape = [n_components_found_, n_features ]
 67 |         decision vector, for each component found
 68 |     `priors_` : array-like, shape = [n_classes]
 69 |         Class priors (sum to 1)
 70 |     `covs_` : array, shape = [n_components_found_, [ [n_features, n_features], [n_features, n_features] ] one cov for class=0 and one for class=1
 71 |         Covariance matrix (shared by all classes)
 72 |     `n_components_found_` : int
 73 |         number of fisher components found, which is <= n_components
 74 |         
 75 |     Examples (put fisher.py in working directory)
 76 |     --------
 77 |     >>> import numpy as np
 78 |     >>> from fisher import Fisher
 79 |     >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
 80 |     >>> y = np.array([0, 0, 0, 1, 1, 1])
 81 |     >>> fd = Fisher()
 82 |     >>> fd.fit(X, y)
 83 |     Fisher(n_components=1, norm_covariance=True, priors=None)
 84 |     >>> print(fd.transform([[-0.8, -1]]))
 85 |     [[-1.]]
 86 | 
 87 | 
 88 |     """
 89 | 
 90 |     def __init__(self, norm_covariance = True, n_components=None, priors=None):
 91 |         self.norm_covariance = norm_covariance
 92 |         self.n_components = 1 if n_components==None else n_components
 93 |         self.priors = np.asarray(priors) if priors is not None else None
 94 |         self.basic_fit = False
 95 | 
 96 |         if self.priors is not None:
 97 |             if (self.priors < 0).any():
 98 |                 raise ValueError('priors must be non-negative')
 99 |             if self.priors.sum() != 1:
100 |                 print 'warning: the priors do not sum to 1. Renormalizing'
101 |                 self.priors = self.priors / self.priors.sum()
102 | 
103 | 
104 |     def fit(self, X, y, store_covariance=False, tol=1.0e-4,
105 |             do_smooth_reg=False, cov_class=None, cov_power=1):
106 |         """
107 |         Fit the Fisher Discriminant model according to the given training data and parameters.
108 | 
109 |         Parameters
110 |         ----------
111 |         X : array-like, shape = [n_samples, n_features]
112 |             Training vector, where n_samples in the number of samples and
113 |             n_features is the number of features.
114 |         y : array, shape = [n_samples]
115 |             Target values (integers)
116 |         store_covariance : boolean
117 |             If True the covariance matrix of each class and each iteration is computed
118 |             and stored in `self.covs_` attribute. has dimensions [n_iterations][2] where 2 is for nclasses = 2
119 |         tol:  float
120 |             used for regularization, either for svd series truncation or smoothing.
121 |         do_smooth_reg: boolean
122 |             If False, truncate SVD matrix inversion for singular values less then tol.
123 |             If True, apply smooth regularization (filter factor) on inversion, such that 1/s_i --> s_i/(s_i^2 + tol^2), where s_i is singular value
124 |         """
125 |         X, y = check_X_y(X, y) #does not accept sparse arrays
126 |         self.classes_, y = np.unique( (y>0), return_inverse=True)
127 |         n_samples, n_features = X.shape
128 |         n_classes = len(self.classes_)
129 |         if n_classes < 2:
130 |             raise ValueError('y has less than 2 classes')
131 |         if self.priors is None:
132 |             self.priors_ = np.bincount(y) / float(n_samples)
133 |         else:
134 |             self.priors_ = self.priors
135 | 
136 |         self.n_features=n_features
137 |         self.means_ = []
138 |         self.covs_  = []
139 |         
140 |         wvecs = []
141 | 
142 |         # Group means n_classes*n_features matrix
143 | 
144 |         means = []
145 |         nevt = np.zeros(n_classes)
146 |         Xc = []
147 |         Xg = []
148 |         covs = []
149 |         cov = None
150 |             
151 |         for ind in xrange(n_classes):
152 |             Xg = X[y == ind, :]
153 |             meang = Xg.mean(0)
154 |             means.append(meang)
155 |             nevt[ind] = Xg.shape[0]
156 |                 
157 |         # centered group data
158 |             if cov_class is None or cov_class == ind:
159 |                 Xgc = Xg - meang
160 |                 covg = np.zeros((n_features, n_features))
161 |                 covg += np.dot(Xgc.T, Xgc)
162 |                 covs.append(covg)
163 |              
164 | 
165 |         # check rank of Sb = m * m.T
166 |         # if rank = 0, we are in null space of Sb, and can not calculate fisher component
167 |         m = means[0] - means[1]
168 |         if linalg.norm(m) ==0:
169 |             print "WARNING: Inter-class matrix is zero, i.e. classes have same mean!"
170 |             print "         Fisher can not discriminate in this case --> Exiting"
171 |             sys.exit(2)
172 |             
173 |         Sb = np.outer( m, m )
174 |         #svdvalsSb = linalg.svdvals( Sb )
175 |         #rank = np.sum( svdvalsSb > tol )
176 |         #print "rank Sb = ",rank            
177 | 
178 |         self.means_.append( np.asarray(means) )
179 | 
180 |         #covs_array = [ np.asarray(covs[0]) , np.asarray(covs[1]) ]
181 |         covs_array = [np.asarray(cc) for cc in covs]
182 |         if self.norm_covariance:
183 |             for ii in range(len(covs_array)):
184 |                 covs_array[ii] /= ( (nevt[ii]-1.0) if nevt[ii] > 1 else 1 )
185 | #            covs_array[0] /= ( (nevt[0]-1.0) if nevt[0] > 1 else 1 )
186 | #            covs_array[1] /= ( (nevt[1]-1.0) if nevt[1] > 1 else 1 )
187 | 
188 |         if store_covariance:
189 |             self.covs_.append( covs_array )
190 | 
191 |         #if norm_covariance:
192 |         #    nevt[0] = nevt[0] if nevt[0] > 1 else 2
193 |         #    nevt[1] = nevt[1] if nevt[1] > 1 else 2
194 |         #    self.covs_.append( [ np.asarray(covs[0]) / (nevt[0]-1.0), np.asarray(covs[1]) / (nevt[1]-1.0) ] )
195 |         #else:
196 |         #    self.covs_.append( [ np.asarray(covs[0]), np.asarray(covs[1]) ] )
197 | 
198 |         #Sw = covs_array[0] + covs_array[1]
199 |         Sw = sum(covs_array)
200 | 
201 |         #----------------------------
202 |         # for 2 class system, need to solve for w in
203 |         # Sb * w = lambda * Sw * w
204 |         # where lambda is eigenvalue of this generalized eigenvalue problem
205 |         # however, Sb * w = m mT * w = m * constant
206 |         # implies we only need to solve m = Sw * w   
207 |         # (overall constant wet later with ||w||=1 )
208 |         # solution: Sw = U*S*Vh using svd ==> S.inv*U.T*m = Vh *w ==> w = Sum_i^rank(S) vh_i * (U.T * m)_i / S_i
209 |         # where vh_i is a vector
210 |         #----------------------------
211 |         # step 1)  svd of Sw
212 |         # step 2) calculate sum for all non singular components
213 |         U, S, V = linalg.svd(Sw)        
214 | 
215 |         rank = np.sum(S > tol)
216 |         #print "rank Sw = ", rank
217 | 
218 |         S = np.power(S, cov_power)
219 |        
220 |         UTm = np.inner(U.T, m)
221 |         w = np.zeros(n_features)
222 |         for i in range(len(S)):
223 |             if do_smooth_reg==True:
224 |                 w += V[i,:] * UTm[i] * ( S[i] / (S[i]*S[i]+ tol**(2*cov_power)) )
225 |                 #w += V[i,:] * UTm[i] * ( S[i] / (S[i]*S[i] + tol*tol) )
226 |             else:
227 |                 if S[i] < tol: 
228 |                     continue
229 |                 w += V[i,:] * UTm[i] / S[i]
230 | 
231 |         if linalg.norm(w) != 0:
232 |             w /= linalg.norm(w)
233 |         else:
234 |             print "WARNING: Fisher discriminant line has norm=0 --> no discriminating curved found! Exiting"
235 |             sys.exit(2)
236 |             
237 |         #check if signal (1) projection smaller than bkg (0), if so, add minus sign
238 |         if(np.inner(means[1],w) < np.inner(means[0],w)):
239 |             w *= (-1.0)
240 | 
241 |         wvecs.append( w ) 
242 | 
243 |         
244 |         self.w_ = np.asarray(wvecs)
245 |         self.n_components_found_ = len(self.w_)
246 |         self.S = S
247 |         self.U = U
248 |         self.V = V
249 |         self.m = m
250 |         self.cov_power = cov_power
251 |         self.basic_fit = True
252 | 
253 |         return self
254 | 
255 |     
256 |     def update_tol(self, tol, do_smooth_reg=False):
257 |         if self.basic_fit == False:
258 |             print "Must have done basic Fisher.fit(...) to use this function. NOT UPDATING"
259 |             return self
260 | 
261 |         UTm = np.inner(self.U.T, self.m)
262 |         w = np.zeros(self.n_features)
263 |         for i in range(len(self.S)):
264 |             if do_smooth_reg==True:
265 |                 w += self.V[i,:] * UTm[i] * ( self.S[i] / (self.S[i]*self.S[i]+ tol**(2*self.cov_power)) )
266 |                 #w += V[i,:] * UTm[i] * ( S[i] / (S[i]*S[i] + tol*tol) )
267 |             else:
268 |                 if self.S[i] < tol: 
269 |                     continue
270 |                 w += self.V[i,:] * UTm[i] / self.S[i]
271 | 
272 |         if linalg.norm(w) != 0:
273 |             w /= linalg.norm(w)
274 |         else:
275 |             print "WARNING: Fisher discriminant line has norm=0 --> no discriminating curved found! Exiting"
276 |             sys.exit(2)
277 |             
278 |         #check if signal (1) projection smaller than bkg (0), if so, add minus sign
279 |         if(np.inner(self.means_[0][1],w) < np.inner(self.means_[0][0],w)):
280 |             w *= (-1.0)
281 | 
282 |         wvecs = []
283 |         wvecs.append( w ) 
284 |         
285 |         self.w_ = np.asarray(wvecs)
286 |         self.n_components_found_ = len(self.w_)
287 |         
288 |         return self
289 | 
290 | 
291 |     def fit_multiclass(self, X, y, use_total_scatter=False, solution_norm="N", sigma_sqrd=1e-8, tol=1.0e-3, print_timing=False):
292 |         """
293 |         Fit the Fisher Discriminant model according to the given training data and parameters.
294 |         Based on (but depending on options not exactly the same as) "Algorithm 4" in
295 |         Zhang, et. al. 'Regularized Discriminant Analysis, Ridge Regression and Beyond' Journal of Machine Learning Research 11 (2010) 2199-2228
296 |         NOTE: setting norm_covariance=False and use_total_scatter=True, and solution_norm = 'A' or 'B' will give the algorithm from paper
297 | 
298 |         Parameters
299 |         ----------
300 |         X : array-like, shape = [n_samples, n_features]
301 |             Training vector, where n_samples in the number of samples and
302 |             n_features is the number of features.
303 |         y : array, shape = [n_samples]
304 |             Target values (integers)
305 |         use_total_scatter : boolean
306 |             If True then use total scatter matrix St = Sum_i (x_i - m)(x_i - m).T instead of Sw
307 |             If False, use Sw = Sum_{c=1... n_classes} Sum_{i; x in class c} norm_c (x_i - m_c)(x_i - m_c).T
308 |                       where norm_c = 1/N_samples_class_c if norm_covariance=True, else norm_c = 1
309 |         solution_norm: boolean
310 |             3 kinds of norms, "A", "B", or "N", were "N" means normalize to 1.  "A" and "B" (see paper reference) have normalizations
311 |             that may be important when consitering n_classes > 2
312 |         sigma_sqrd:  float
313 |             smooth regularization parameter, which is size of singular value where smoothing becomes important.
314 |             NOTE: is fraction in case norm_covariance=False, as a priori the scale of the singular values is not known in this case
315 |         tol:  float
316 |             used for truncated SVD of Sw.  Essentially a form of regularization.  Tol for SVD(R) is 1e-6, fixed right now
317 |         print_timing: boolean
318 |             print time for several matrix operations in the algorithm
319 |         """
320 |         X, y = X, y = check_X_y(X, y) #does not accept sparse arrays
321 |         self.classes_, y = np.unique( y, return_inverse=True)
322 |         n_samples, n_features = X.shape
323 |         n_classes = len(self.classes_)
324 |         n_samples_perclass = np.bincount(y)
325 |         if n_classes < 2:
326 |             raise ValueError('y has less than 2 classes')
327 |         if self.priors is None:
328 |             self.priors_ = np.bincount(y) / float(n_samples)
329 |         else:
330 |             self.priors_ = self.priors
331 | 
332 |         if not any( np.array(["A","B","N"])==solution_norm ):
333 |              print 'WARNING: solution_norm must be one of ["A","B","N"]! Exiting'
334 |              sys.exit(2)
335 | 
336 |         ts = time.time()
337 |                     
338 |         self.means_ = []
339 |         for ind in xrange(n_classes):
340 |             Xg = X[y == ind, :]
341 |             meang = Xg.mean(0)
342 |             self.means_.append(np.asarray(meang))
343 |         if print_timing: print 'fit_multiclass: means took', time.time() - ts
344 | 
345 |         ts = time.time()
346 |         PI_diag = np.diag( 1.0*n_samples_perclass )                                       # shape(PI_diag) = n_classes x n_classes
347 |         PI_inv = np.diag( 1.0 / (1.0*n_samples_perclass) )                                # shape(PI_inv) = n_classes x n_classes
348 |         PI_sqrt_inv = np.sqrt( PI_inv )                                                   # shape(PI_sqrt_inv) = n_classes x n_classes
349 |         #H = np.identity(n_samples) - (1.0/(1.0*n_samples))*np.ones((n_samples,n_samples))
350 |         E=np.zeros( (n_samples,n_classes) )
351 |         E[[range(n_samples),y]]=1
352 |         if print_timing: print 'fit_multiclass: matrices took', time.time() - ts
353 | 
354 | 
355 |         ts = time.time()
356 |         #note: computation of this is fast, can always do it inline, if memory consumption gets large
357 |         Xt_H = X.T - (1.0/(1.0*n_samples))*np.repeat( np.array([X.T.sum(1)]).T, n_samples, axis=1)    # shape(Xt_H) = n_features x n_samples
358 |         if print_timing: print 'fit_multiclass: Xt_H took', time.time() - ts
359 | 
360 |         ts = time.time()
361 |         #####################################################################################################################
362 |         #Sb = X.T * H * E * PI_inv * E.T * H * X = (X.T * H * E * PI_sqrt_inv) * (X.T * H * E * PI_sqrt_inv).T
363 |         #if norm_covariance: Sb = X.T * H * E * PI_inv * PI_inv * E.T * H * X = (X.T * H * E * PI_inv) * (X.T * H * E * PI_inv).T
364 |         #This norm actually doesn't matter in 2-class, I think it jsut becomes an overall scaling, which gets normalized away
365 |         #I expect id doesn't matter for multiclass either... but not sure
366 |         #to be clear, multi-class fisher does not norm! but then its harder to set the regularization factor for Sw
367 |         #####################################################################################################################
368 | 
369 |         Xt_H_E_PIsi = None                                                      # shape(Xt_H_E_PIsi) = n_features x n_classes
370 |         if self.norm_covariance:
371 |            Xt_H_E_PIsi =  np.dot(Xt_H, np.dot(E, PI_inv) )
372 |         else:
373 |            Xt_H_E_PIsi = np.dot(Xt_H, np.dot(E, PI_sqrt_inv) )
374 |         if print_timing: print 'fit_multiclass: Xt_H_E_PIsi took', time.time() - ts
375 | 
376 |         
377 |         #St_reg = ( np.dot(X.T np.dot(H, X)) - (sigma*sigma)*np.identity(n_features))
378 | 
379 |         ts = time.time()
380 |         #####################################################################################################################
381 |         #Sw = X.T * [ 1 - E*PI_inv*E.T ] * X = X.T * X - M.T * PI * M
382 |         # if norm_covariance: Sw = X.T * [ P - E*PI_inv*PI_inv*E.T ] * X = X.T *P * X - M.T * M
383 |         #####################################################################################################################
384 |         M = np.asarray(self.means_)                                              # shape(M) = n_classes x n_features
385 |         #P = np.diag( np.dot(E, 1.0/(1.0*n_samples_perclass)) )
386 |         P_vec = np.array([np.dot(E, 1.0/(1.0*n_samples_perclass))]).T            # shape(P_vec) = n_samples x 1
387 |         Sw=None                                                                  # shape(Sw) = n_features x n_features 
388 |         if not use_total_scatter:
389 |             if self.norm_covariance:
390 |                 #Sw = np.inner( np.inner(X.T, P), X.T) - np.dot( M.T, M)
391 |                 Sw = np.inner( (P_vec*X).T, X.T) - np.dot( M.T, M)
392 |             else:
393 |                 Sw = np.inner(X.T, X.T) - np.dot( M.T, np.dot(PI_diag, M))
394 |                 
395 |             if print_timing: print 'fit_multiclass: Sw took', time.time() - ts
396 | 
397 |         #####################################################################################################################
398 |         #assume (I think true) for condensed svd, where we only take vectors for non-zero singular values
399 |         #that if M is symmetric, then Uc=Vc where condensed_svd(M) = Uc * Sc * Vc.T
400 |         #this is because the singular values of a symmetric matrix are the abosolute values of the non-zero eigenvalues
401 |         #so assuming the singular vectors of the non-zero singular values are the same as eigen vectors
402 |         #and since condensed svd only keeps singular vectors for non-zero singular values, should have Uc==Vc
403 |         #####################################################################################################################
404 | 
405 | 
406 |         ts = time.time()
407 |         Uc, Sc, Utc, Sc_norm = None, None, None, None
408 |         if use_total_scatter:
409 |             St_norm = (1.0/(1.0*n_samples)) if self.norm_covariance else 1.0
410 |             Uc, Sc, Utc, Sc_norm = self.condensed_svd( St_norm * np.inner(Xt_H, X.T), tol, store_singular_vals=True )
411 |         else:
412 |             Uc, Sc, Utc, Sc_norm = self.condensed_svd( Sw, tol, store_singular_vals=True )
413 |         if print_timing: print 'fit_multiclass: Uc, Sc, Utc took', time.time() - ts
414 | 
415 |         ts = time.time()
416 |         #scale up sigma to appropriate range of singular values
417 |         reg_factor = sigma_sqrd * Sc_norm 
418 |         St_reg_inv = np.dot( Uc, np.dot(np.diag(1.0/(Sc + reg_factor)), Utc) )    # shape(St_reg_inv) = n_features x n_features
419 |         if print_timing: print 'fit_multiclass: St_reg_inv took', time.time() - ts
420 | 
421 |         ts = time.time()
422 |         G = np.dot(St_reg_inv, Xt_H_E_PIsi)                                       # shape(G) = n_features x n_classes
423 |         if print_timing: print 'fit_multiclass: G took', time.time() - ts
424 | 
425 |         ts = time.time()
426 |         R = np.dot( Xt_H_E_PIsi.T, G)                                             # shape(R) = n_classes x n_classes
427 |         if print_timing: print 'fit_multiclass: R took', time.time() - ts
428 | 
429 |         ts = time.time()
430 |         Vr, Lr, Vtr, Lr_norm =  self.condensed_svd( R, tol=1e-6 )                 # shape(Vr) = n_classes x rank_R
431 |         if print_timing: print 'fit_multiclass: Vr, Lr, Vtr took', time.time() - ts
432 |         
433 |         ts = time.time()
434 |         W = np.dot( G, Vr)                                                        # shape(W) = n_features x rank_R
435 |         if print_timing: print 'fit_multiclass: B took', time.time() - ts
436 |         
437 |         if solution_norm=="A":
438 |             W = np.dot(W, np.diag(1.0 / np.sqrt(Lr)) )
439 | 
440 |         elif solution_norm=="N":
441 |             for i in range( W.shape[1] ):
442 |                 if linalg.norm(W[:,i]) != 0:
443 |                     W[:,i] /= linalg.norm(W[:,i])
444 |                 else:
445 |                     print "WARNING: Fisher discriminant line has norm=0 --> no discriminating curved found! Exiting"
446 |                     sys.exit(2)
447 | 
448 |         
449 |         self.w_ = W.T  #transpose here just because want to store the matrix where rows have length n_features, i.e. are discriminants 
450 | 
451 |         return self
452 | 
453 |     def condensed_svd(self, M, tol=1e-3, store_singular_vals=False):
454 |         U, S, Vt = linalg.svd(M, full_matrices=False)
455 | 
456 |         if store_singular_vals:
457 |             self.singular_vals = S
458 | 
459 |         #want tolerance on fraction of variance in singular value
460 |         #when not norm_covariance, need to normalize singular values
461 |         S_norm = 1.0 if self.norm_covariance else np.sum(S)
462 | 
463 |         rank = np.sum( (S/S_norm) > tol )
464 | 
465 |         return U[:,:rank], S[:rank], Vt[:rank,:], S_norm
466 | 
467 | 
468 |     @property
469 |     def classes(self):
470 |         warnings.warn("Fisher.classes is deprecated and will be removed in 0.14. "
471 |                       "Use .classes_ instead.", DeprecationWarning,
472 |                       stacklevel=2)
473 |         return self.classes_
474 | 
475 |     def _decision_function(self, X):
476 |         X = np.asarray(X)
477 |         # center and scale data
478 |         #X = np.dot(X - self.xbar_, self.scaling)
479 |         #return np.dot(X, self.coef_.T) + self.intercept_
480 |         return np.inner( X, self.w_ )
481 | 
482 |     def decision_function(self, X):
483 |         """
484 |         This function return the decision function values related to each
485 |         class on an array of test vectors X.
486 | 
487 |         Parameters
488 |         ----------
489 |         X : array-like, shape = [n_samples, n_features]
490 | 
491 |         Returns
492 |         -------
493 |         C : array, shape = [n_samples, n_components_found_]
494 |             Decision function values related to each class, per sample
495 |             n_components_found_ is the number of components requested and found
496 |             even if n_components_found_=1, a 2D array is found, 
497 |             but can be promoted to 1D array with dimension [n_samples] with decision_function(X)[:,0]
498 |         """
499 |         dec_func = self._decision_function(X)
500 |         #if len(self.w_) == 1:
501 |         #    return dec_func[:, 0]
502 |         return dec_func
503 | 
504 |     def transform(self, X):
505 |         """
506 |         Project the data so as to maximize class separation (large separation
507 |         between projected class means and small variance within each class).
508 | 
509 |         Parameters
510 |         ----------
511 |         X : array-like, shape = [n_samples, n_features]
512 | 
513 |         Returns
514 |         -------
515 |         X_new : array, shape = [n_samples, n_components_found_]
516 |         """
517 |         X = np.asarray(X)
518 |         # center and scale data
519 |         #X = np.dot(X - self.xbar_, self.scaling)
520 |         #n_comp = X.shape[1] if self.n_components is None else self.n_components
521 |         #return np.dot(X, self.coef_[:n_comp].T)
522 |         dec_func = self._decision_function(X)
523 |         return dec_func
524 | 
525 |     def fit_transform(self, X, y, store_covariance=False, tol=1.0e-4):
526 |         """
527 |         Fit the Fisher Discriminant model according to the given training data and parameters.
528 |         The project the data onto up to n_components so as to maximize class separation (large separation
529 |         between projected class means and small variance within each class).
530 |         NOTE this function is not clever, it simply runs fit(X,y [, store_covariance, tol]).transform(X)
531 | 
532 |         Parameters
533 |         ----------
534 |         X : array-like, shape = [n_samples, n_features]
535 |         y : array, shape = [n_samples]
536 |             Target values (integers)
537 |         store_covariance : boolean
538 |             If True the covariance matrix of each class and each iteration is computed
539 |             and stored in `self.covs_` attribute. has dimensions [n_iterations][2] where 2 is for nclasses = 2
540 | 
541 |         Returns
542 |         -------
543 |         X_new : array, shape = [n_samples, n_components_found_]
544 |         """
545 |         return self.fit(X, y, store_covariance, tol).transform(X)
546 | 
547 | 
548 | 
549 | ########################################################################
550 | ########################################################################
551 | ########################################################################
552 | ########################################################################
553 | 
554 | 
555 | 
556 | class KernelFisher(BaseEstimator, ClassifierMixin, TransformerMixin):
557 |     """
558 |     Kernalized Fisher Discriminant Analysis (KDA)
559 | 
560 |     A classifier with a non-linear decision boundary, generated
561 |     by fitting class conditional densities to the data
562 |     fisher criteria of maximizing between class variance
563 |     while minimizing within class variance.
564 | 
565 |     The fisher criteria is used in a non-linear space, by transforming
566 |     the data, X, of dimension D onto a D-dimensional manifold of
567 |     a D' dimensional space (where D' is possible infinite) using a funtion f(X).
568 |     The key to solving the problem in the non-linear space is to write
569 |     the solution to fisher only in terms of inner products of
570 |     the vectors X*Y.  Then the kernel trick can be employed, such that
571 |     the standard inner product is promoted to a general inner product.
572 |     That is, K(X,Y) = X*Y --> K(X,Y) = f(X)*f(Y), which is allowed for
573 |     valid Kernels.  In this case, the function f() does not need to be
574 |     known, but only the kernel K(X,Y).
575 | 
576 |     The fitted model can also be used to reduce the dimensionality
577 |     of the input, by projecting it to the most discriminative
578 |     directions.
579 | 
580 |     Parameters
581 |     ----------
582 | 
583 |     use_total_scatter : boolean
584 |         If True then use total scatter matrix St = Sum_i (x_i - m)(x_i - m).T instead of Sw
585 |         If False, use Sw = Sum_{c=1... n_classes} Sum_{i; x in class c} norm_c (x_i - m_c)(x_i - m_c).T
586 |                    where norm_c = 1/N_samples_class_c if norm_covariance=True, else norm_c = 1
587 | 
588 |     sigma_sqrd:  float
589 |         smooth regularization parameter, which is size of singular value where smoothing becomes important.
590 |         NOTE: is fraction in case norm_covariance=False, as a priori the scale of the singular values is not known in this case
591 | 
592 |     tol:  float
593 |          used for truncated SVD of St.  Essentially a form of regularization.  Tol for SVD(R) is 1e-6, fixed right now
594 | 
595 |     kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed"
596 |         Kernel used for generalized inner product.
597 |         Default: "linear"
598 | 
599 |     degree : int, optional
600 |         Degree for poly
601 |         Default: 3.
602 | 
603 |     gamma : float, optional
604 |         Kernel coefficient for rbf, sigmoid and poly kernels.
605 |         Default: 1/n_features.
606 | 
607 |     coef0 : float, optional
608 |         Independent term in poly and sigmoid kernels.
609 | 
610 |     norm_covariance :  boolean
611 |         if true, the covariance of each class will be divided by (n_points_in_class - 1)
612 |         NOTE: not currently used
613 | 
614 |     priors : array, optional, shape = [n_classes]
615 |         Priors on classes
616 | 
617 |     print_timing: boolean
618 |         print time for several matrix operations in the algorithm
619 | 
620 |     Attributes
621 |     ----------
622 |     `means_` : array-like, shape = [n_components_found_, [n_classes, n_features] ]
623 |         Class means, for each component found
624 |     `priors_` : array-like, shape = [n_classes]
625 |         Class priors (sum to 1)
626 |     
627 |     `n_components_found_` : int
628 |         number of fisher components found, which is <= n_components
629 |         
630 |     Examples (put fisher.py in working directory)
631 |     --------
632 |     >>> import numpy as np
633 |     >>> from fisher import KernelFisher
634 |     >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
635 |     >>> y = np.array([0, 0, 0, 1, 1, 1])
636 |     >>> fd = KernelFisher()
637 |     >>> fd.fit(X, y)
638 |     KernelFisher(coef0=1, degree=3, gamma=None, kernel='linear',
639 |        norm_covariance=False, print_timing=False, priors=None,
640 |        sigma_sqrd=1e-08, tol=0.001, use_total_scatter=True)
641 |     >>> print(fd.transform([[-0.8, -1]]))
642 |     [[-7.62102356]]]
643 | 
644 |     """
645 | 
646 |     def __init__(self, use_total_scatter=True, sigma_sqrd=1e-8, tol=1.0e-3,
647 |                  kernel="linear", gamma=None, degree=3, coef0=1,
648 |                  norm_covariance = False, priors=None, print_timing=False):
649 | 
650 |         self.use_total_scatter = use_total_scatter
651 |         self.sigma_sqrd = sigma_sqrd
652 |         self.tol = tol
653 |         self.kernel = kernel.lower()
654 |         self.gamma = gamma
655 |         self.degree = degree
656 |         self.coef0 = coef0
657 |         self._centerer = KernelCenterer()
658 | 
659 |         self.norm_covariance = norm_covariance
660 |         self.print_timing = print_timing
661 |         
662 |         
663 |         self.priors = np.asarray(priors) if priors is not None else None
664 |         
665 |         if self.priors is not None:
666 |             if (self.priors < 0).any():
667 |                 raise ValueError('priors must be non-negative')
668 |             if self.priors.sum() != 1:
669 |                 print 'warning: the priors do not sum to 1. Renormalizing'
670 |                 self.priors = self.priors / self.priors.sum()
671 |                 
672 |                 
673 |     @property
674 |     def _pairwise(self):
675 |         return self.kernel == "precomputed"
676 | 
677 |     def _get_kernel(self, X, Y=None):
678 |         params = {"gamma": self.gamma,
679 |                   "degree": self.degree,
680 |                   "coef0": self.coef0}
681 |         try:
682 |             return pairwise_kernels(X, Y, metric=self.kernel,
683 |                                     filter_params=True, **params)
684 |         except AttributeError:
685 |             raise ValueError("%s is not a valid kernel. Valid kernels are: "
686 |                              "rbf, poly, sigmoid, linear and precomputed."
687 |                              % self.kernel)
688 | 
689 | 
690 |     def fit(self, X, y):
691 |         """
692 |         Fit the Kernelized Fisher Discriminant model according to the given training data and parameters.
693 |         Based on "Algorithm 5" in
694 |         Zhang, et. al. 'Regularized Discriminant Analysis, Ridge Regression and Beyond' Journal of Machine Learning Research 11 (2010) 2199-2228
695 |         NOTE: setting norm_covariance=False and use_total_scatter=True, and solution_norm = 'A' or 'B' will give the algorithm from paper
696 | 
697 |         Parameters
698 |         ----------
699 |         X : array-like, shape = [n_samples, n_features]
700 |             Training vector, where n_samples in the number of samples and
701 |             n_features is the number of features.
702 | 
703 |         y : array, shape = [n_samples]
704 |             Target values (integers)
705 |         
706 |         """
707 |         X, y = check_X_y(X, y) #does not accept sparse arrays
708 |         self.classes_, y = unique( y, return_inverse=True)
709 |         n_samples, n_features = X.shape
710 |         n_classes = len(self.classes_)
711 |         n_samples_perclass = np.bincount(y)
712 |         if n_classes < 2:
713 |             raise ValueError('y has less than 2 classes')
714 |         if self.priors is None:
715 |             self.priors_ = np.bincount(y) / float(n_samples)
716 |         else:
717 |             self.priors_ = self.priors
718 | 
719 |         ts = time.time()
720 |                     
721 |         self.means_ = []
722 |         for ind in xrange(n_classes):
723 |             Xg = X[y == ind, :]
724 |             meang = Xg.mean(0)
725 |             self.means_.append(np.asarray(meang))
726 |         if self.print_timing: print 'KernelFisher.fit: means took', time.time() - ts
727 | 
728 | 
729 |         ts = time.time()
730 |         PI_diag = np.diag( 1.0*n_samples_perclass )                                        # shape(PI_diag) = n_classes x n_classes
731 |         PI_inv = np.diag( 1.0 / (1.0*n_samples_perclass) )                                 # shape(PI_inv) = n_classes x n_classes
732 |         PI_sqrt_inv = np.sqrt( PI_inv )                                                    # shape(PI_sqrt_inv) = n_classes x n_classes
733 |         #H = np.identity(n_samples) - (1.0/(1.0*n_samples))*np.ones((n_samples,n_samples))
734 |         E=np.zeros( (n_samples,n_classes) )                                                # shape(E) = n_samples x n_classes
735 |         E[[range(n_samples),y]]=1
736 |         E_PIsi = np.dot(E, PI_sqrt_inv)
737 |         One_minus_E_Pi_Et = np.identity(n_samples) - np.inner( E, np.inner(PI_diag, E).T ) # shape(One_minus_E_Pi_Et) = n_samples x n_samples
738 |         if self.print_timing: print 'KernelFisher.fit: matrices took', time.time() - ts
739 | 
740 | 
741 |         #####################################################################################################################
742 |         #C = HKH = (I - 1/n 1x1.T) K (I - 1/n 1x1.T) = (K -  1xK_mean.T) * (I - 1/n 1x1.T)
743 |         #        = K - K_meanx1.T - 1xK_mean.T + K_allmean 1x1
744 |         #  --> which is the same as what self._centerer.fit_transform(C) performs
745 |         #
746 |         # if use_total_scatter=False,
747 |         #      then using Sw which is (1-E*Pi*E.T)K(1-E*Pi*E.T)
748 |         #####################################################################################################################
749 |         ts = time.time()
750 |         C = self._get_kernel(X) 
751 |         K_mean = np.sum(C, axis=1) / (1.0*C.shape[1])
752 | 
753 |         if self.use_total_scatter:
754 |             C = self._centerer.fit_transform(C)
755 |         else:
756 |             C = np.inner( One_minus_E_Pi_Et, np.inner(C, One_minus_E_Pi_Et).T)
757 |         if self.print_timing: print 'KernelFisher.fit: Kernel Calculation took', time.time() - ts
758 | 
759 | 
760 |         ts = time.time()
761 |         Uc, Sc, Utc, Sc_norm = self.condensed_svd( C, self.tol, store_singular_vals=True )
762 |         if self.print_timing: print 'KernelFisher.fit: Uc, Sc, Utc took', time.time() - ts
763 | 
764 | 
765 |         ts = time.time()
766 |         #scale up sigma to appropriate range of singular values
767 |         reg_factor = self.sigma_sqrd * Sc_norm 
768 |         St_reg_inv = np.inner( Uc, np.inner(np.diag(1.0/(Sc + reg_factor)), Utc.T).T )   
769 |         if self.print_timing: print 'KernelFisher.fit: St_reg_inv took', time.time() - ts
770 | 
771 |         ts = time.time()
772 |         R = np.inner(E_PIsi.T, np.inner(C, np.inner( St_reg_inv, E_PIsi.T ).T ).T )
773 |         if self.print_timing: print 'KernelFisher.fit: R took', time.time() - ts
774 | 
775 | 
776 |         ts = time.time()
777 |         Vr, Lr, Vtr, Lr_norm =  self.condensed_svd( R, tol=1e-6 )                
778 |         if self.print_timing: print 'KernelFisher.fit: Vr, Lr, Vtr took', time.time() - ts
779 | 
780 | 
781 |         ts = time.time()
782 |         #####################################################################################################################
783 |         #This capital Z is Upsilon.T * H from equation (22)
784 |         #####################################################################################################################
785 |         #Z = np.inner( np.diag(1.0 / np.sqrt(Lr)), np.inner(Vtr, np.inner(E_PIsi.T, np.inner(C, St_reg_inv.T ).T ).T ).T )
786 |         Z = np.inner( np.inner( np.inner( np.inner( np.diag(1.0 / np.sqrt(Lr)), Vtr.T), E_PIsi), C.T), St_reg_inv)
787 | 
788 |         Z = (Z.T - (Z.sum(axis=1) / (1.0*Z.shape[1])) ).T
789 |         if self.print_timing: print 'KernelFisher.fit: Z took', time.time() - ts
790 | 
791 |         self.Z = Z
792 |         self.n_components_found_ = Z.shape[0]
793 | 
794 |         #####################################################################################################################
795 |         #This K_mean is (1/n) K*1_n from equation (22)
796 |         #####################################################################################################################
797 |         self.K_mean = K_mean
798 | 
799 |         #print Z.shape, K_mean.shape, self.n_components_found_
800 | 
801 |         self.X_fit_ = X
802 |         return self
803 | 
804 |     def condensed_svd(self, M, tol=1e-3, store_singular_vals=False):
805 |         U, S, Vt = linalg.svd(M, full_matrices=False)
806 |         if store_singular_vals:
807 |             self.singular_vals = S
808 | 
809 |         #want tolerance on fraction of variance in singular value
810 |         #when not norm_covariance, need to normalize singular values
811 |         S_norm = np.sum(S)
812 | 
813 |         rank = np.sum( (S/S_norm) > tol )
814 | 
815 |         return U[:,:rank], S[:rank], Vt[:rank,:], S_norm
816 | 
817 | 
818 |     @property
819 |     def classes(self):
820 |         warnings.warn("KernelFisher.classes is deprecated and will be removed in 0.14. "
821 |                       "Use .classes_ instead.", DeprecationWarning,
822 |                       stacklevel=2)
823 |         return self.classes_
824 | 
825 |     def _decision_function(self, X):
826 |         #X = np.asarray(X)
827 |         return self.transform(X)
828 | 
829 |     def decision_function(self, X):
830 |         """
831 |         This function return the decision function values related to each
832 |         class on an array of test vectors X.
833 | 
834 |         Parameters
835 |         ----------
836 |         X : array-like, shape = [n_samples, n_features]
837 | 
838 |         Returns
839 |         -------
840 |         X_new : array, shape = [n_samples, n_components_found_]
841 |             Decision function values related to each class, per sample
842 |             n_components_found_ is the number of components requested and found
843 |             NOTE: currently identical to self.transform(X)
844 |         """
845 |         return self._decision_function(X)
846 | 
847 |     def transform(self, X):
848 |         """
849 |         Project the data so as to maximize class separation (large separation
850 |         between projected class means and small variance within each class).
851 | 
852 |         Parameters
853 |         ----------
854 |         X : array-like, shape = [n_samples, n_features]
855 | 
856 |         Returns
857 |         -------
858 |         X_new : array, shape = [n_samples, n_components_found_]
859 |         """
860 | 
861 |         #X = np.asarray(X)
862 |         #ts = time.time()
863 |         k = self._get_kernel(X, self.X_fit_)
864 |         #if self.print_timing: print 'KernelFisher.transform: k took', time.time() - ts
865 | 
866 |         #ts = time.time()
867 |         z = np.inner(self.Z, (k-self.K_mean) ).T
868 |         #if self.print_timing: print 'KernelFisher.transform: z took', time.time() - ts
869 | 
870 |         return z
871 |         
872 |     
873 | 
874 |     def fit_transform(self, X, y, use_total_scatter=True, sigma_sqrd=1e-8, tol=1.0e-3):
875 |         """
876 |         Fit the Fisher Discriminant model according to the given training data and parameters.
877 |         The project the data onto up to n_components_found_ so as to maximize class separation (large separation
878 |         between projected class means and small variance within each class).
879 |         NOTE this function is not clever, it simply runs fit(X,y [, ...]).transform(X)
880 | 
881 |         Parameters
882 |         ----------
883 |         X : array-like, shape = [n_samples, n_features]
884 |         y : array, shape = [n_samples]
885 |             Target values (integers)
886 |         store_covariance : boolean
887 |             If True the covariance matrix of each class and each iteration is computed
888 |             and stored in `self.covs_` attribute. has dimensions [n_iterations][2] where 2 is for nclasses = 2
889 | 
890 |         Returns
891 |         -------
892 |         X_new : array, shape = [n_samples, n_components_found_]
893 |         """
894 |         return self.fit(X, y, use_total_scatter=use_total_scatter, sigma_sqrd=sigma_sqrd, tol=tol).transform(X)
895 | 


--------------------------------------------------------------------------------