├── SupervisedPCA.pyc
├── supervised_pca.pyc
├── README.md
├── note.py
├── notepad.py
├── Untitled.py
├── test_supervised_pca.py
├── supervised_pca_vs_lda_qda.py
├── superpca_vs_elasticnet.py
└── supervised_pca.py


/SupervisedPCA.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stylianos-kampakis/supervisedPCA-Python/HEAD/SupervisedPCA.pyc


--------------------------------------------------------------------------------
/supervised_pca.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stylianos-kampakis/supervisedPCA-Python/HEAD/supervised_pca.pyc


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # supervisedPCA-Python
2 | 
3 | Original code was meant as an addition for scikit-learn 0.16, but it
4 | can also be used independently.
5 | 


--------------------------------------------------------------------------------
/note.py:
--------------------------------------------------------------------------------
 1 | from SupervisedPCA import SupervisedPCARegressor
 2 | from SupervisedPCA import SupervisedPCAClassifier
 3 | from sklearn.utils.testing import assert_almost_equal
 4 | from sklearn.utils.testing import assert_array_equal
 5 | from sklearn import datasets
 6 | import numpy as np
 7 | 
 8 | diabetes=datasets.load_iris()
 9 | X = diabetes.data
10 | Y = diabetes.target
11 | 
12 | spca = SupervisedPCAClassifier()
13 | spca.fit(X, Y,threshold=1.7)
14 | print(spca._model.coef_)


--------------------------------------------------------------------------------
/notepad.py:
--------------------------------------------------------------------------------
 1 | print(__doc__)
 2 | 
 3 | 
 4 | # Code source: Jaques Grobler
 5 | # License: BSD 3 clause
 6 | 
 7 | 
 8 | import matplotlib.pyplot as plt
 9 | import numpy as np
10 | from sklearn import datasets, linear_model
11 | 
12 | # Load the diabetes dataset
13 | diabetes = datasets.load_diabetes()
14 | 
15 | 
16 | # Use only one feature
17 | diabetes_X = diabetes.data[:, np.newaxis]
18 | diabetes_X_temp = diabetes_X[:, :, 2]
19 | 
20 | # Split the data into training/testing sets
21 | diabetes_X_train = diabetes_X_temp[:-20]
22 | diabetes_X_test = diabetes_X_temp[-20:]
23 | 
24 | # Split the targets into training/testing sets
25 | diabetes_y_train = diabetes.target[:-20]
26 | diabetes_y_test = diabetes.target[-20:]
27 | 
28 | # Create linear regression object
29 | regr = linear_model.LinearRegression()
30 | 
31 | # Train the model using the training sets
32 | regr.fit(diabetes_X_train, diabetes_y_train)
33 | 
34 | # The coefficients
35 | print('Coefficients: \n', regr.coef_)
36 | # The mean square error
37 | print("Residual sum of squares: %.2f"
38 |       % np.mean((regr.predict(diabetes_X_test) - diabetes_y_test) ** 2))
39 | # Explained variance score: 1 is perfect prediction
40 | print('Variance score: %.2f' % regr.score(diabetes_X_test, diabetes_y_test))
41 | 
42 | # Plot outputs
43 | plt.scatter(diabetes_X_test, diabetes_y_test,  color='black')
44 | plt.plot(diabetes_X_test, regr.predict(diabetes_X_test), color='blue',
45 |          linewidth=3)
46 | 
47 | plt.xticks(())
48 | plt.yticks(())
49 | 
50 | plt.show()


--------------------------------------------------------------------------------
/Untitled.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def concordance_correlation_coefficient(y_true, y_pred,
 4 |                        sample_weight=None,
 5 |                        multioutput='uniform_average'):
 6 |     """Concordance correlation coefficient.
 7 | 
 8 |     The concordance correlation coefficient is a measure of inter-rater agreement.
 9 |     It measures the deviation of the relationship between predicted and true values
10 |     from the 45 degree angle.
11 | 
12 |     Read more: https://en.wikipedia.org/wiki/Concordance_correlation_coefficient
13 |     Original paper: Lawrence, I., and Kuei Lin. "A concordance correlation coefficient to evaluate reproducibility." Biometrics (1989): 255-268.  
14 | 
15 |     Parameters
16 |     ----------
17 |     y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
18 |         Ground truth (correct) target values.
19 | 
20 |     y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
21 |         Estimated target values.
22 | 
23 |     Returns
24 |     -------
25 |     loss : A float in the range [-1,1]. A value of 1 indicates perfect agreement
26 |     between the true and the predicted values.
27 | 
28 |     Examples
29 |     --------
30 |     >>> from sklearn.metrics import concordance_correlation_coefficient
31 |     >>> y_true = [3, -0.5, 2, 7]
32 |     >>> y_pred = [2.5, 0.0, 2, 8]
33 |     >>> concordance_correlation_coefficient(y_true, y_pred)
34 |     0.97678916827853024
35 | 
36 | 
37 |     """
38 |     cor=np.corrcoef(y_true,y_pred)[0][1]
39 |     
40 |     mean_true=np.mean(y_true)
41 |     mean_pred=np.mean(y_pred)
42 |     
43 |     var_true=np.var(y_true)
44 |     var_pred=np.var(y_pred)
45 |     
46 |     sd_true=np.std(y_true)
47 |     sd_pred=np.std(y_pred)
48 |     
49 |     numerator=2*cor*sd_true*sd_pred
50 |     
51 |     denominator=var_true+var_pred+(mean_true-mean_pred)**2
52 | 
53 |     return numerator/denominator
54 | 
55 | n_samples=1000
56 | y_true = np.arange(n_samples)
57 | y_pred = y_true + 500
58 | c=concordance_correlation_coefficient(y_true,y_pred)


--------------------------------------------------------------------------------
/test_supervised_pca.py:
--------------------------------------------------------------------------------
 1 | # Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
 2 | #         Fabian Pedregosa <fabian.pedregosa@inria.fr>
 3 | #
 4 | # License: BSD 3 clause
 5 | 
 6 | 
 7 | from SupervisedPCA import SupervisedPCARegressor
 8 | from SupervisedPCA import SupervisedPCAClassifier
 9 | from sklearn.utils.testing import assert_almost_equal
10 | from sklearn.utils.testing import assert_array_equal
11 | from sklearn import datasets
12 | import numpy as np
13 | 
14 | def test_supervisedpcaRegressor_fit():
15 |     # Test LinearRegression on a simple dataset.
16 |     # a simple dataset
17 |     diabetes=datasets.load_diabetes()
18 |     X = diabetes.data
19 |     Y = diabetes.target
20 | 
21 |     spca = SupervisedPCARegressor()
22 |     spca.fit(X, Y,threshold=300,n_components=2)
23 | 
24 |     assert_array_equal(spca._leavouts,[1,5])
25 |     assert_almost_equal(spca._model.coef_[0], [-537.7584256])
26 | 
27 | def test_supervisedpcaRegressor_predict():
28 |     diabetes=datasets.load_diabetes()
29 |     X = diabetes.data
30 |     Y = diabetes.target
31 | 
32 |     spca = SupervisedPCARegressor()
33 |     spca.fit(X, Y)
34 |     
35 |     predictions = spca.predict(X)
36 |     mae=np.mean(abs(predictions-Y))
37 |     assert_almost_equal(mae,51.570682097)
38 |     
39 | def test_supervisedpcaClassifier():
40 |     iris=datasets.load_iris()
41 |     X = iris.data
42 |     Y = iris.target
43 | 
44 |     spca = SupervisedPCAClassifier()
45 |     spca.fit(X, Y,threshold=1,n_components=2)
46 | 
47 |     assert_array_equal(spca._leavouts,[0,1])
48 |     assert_almost_equal(spca._model.coef_[0][0], -2.43973048)
49 |     
50 | def test_supervisedpcaClassifier_predict():
51 |     iris=datasets.load_iris()
52 |     X = iris.data
53 |     Y = iris.target
54 | 
55 |     spca = SupervisedPCAClassifier()
56 |     spca.fit(X, Y)
57 |     
58 |     predictions = spca.predict(X)
59 |     error=np.mean(sum(abs(predictions-Y))/float(len(predictions)))
60 |     assert_almost_equal(error,0.08666666)
61 |     
62 |     
63 |     
64 | test_supervisedpcaRegressor_fit()
65 | test_supervisedpcaRegressor_predict()
66 | test_supervisedpcaClassifier()
67 | test_supervisedpcaClassifier_predict()


--------------------------------------------------------------------------------
/supervised_pca_vs_lda_qda.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | =====================
 6 | Supervised PCA against LDA and QDA
 7 | =====================
 8 | 
 9 | A comparison of supervised PCA against LDA and QDA.
10 | """
11 | print(__doc__)
12 | 
13 | 
14 | # Code source: Gaël Varoquaux
15 | #              Andreas Müller
16 | # Modified for documentation by Jaques Grobler
17 | # License: BSD 3 clause
18 | 
19 | import numpy as np
20 | import matplotlib.pyplot as plt
21 | from sklearn.cross_validation import train_test_split
22 | from sklearn.preprocessing import StandardScaler
23 | from sklearn.datasets import make_classification
24 | from sklearn.lda import LDA
25 | from sklearn.qda import QDA
26 | from supervised_pca import SupervisedPCAClassifier
27 | 
28 | total_range=100
29 | performances={}
30 | 
31 | names = ["LDA", "QDA","SuperPCA thres=0","SuperPCA thres=0.3","SuperPCA thres=0.7"]
32 | ncomponents={names[2]:[],names[3]:[],names[4]:[]}
33 | 
34 | classifiers = [
35 |     LDA(),
36 |     QDA(),
37 |     SupervisedPCAClassifier(threshold=0),
38 |     SupervisedPCAClassifier(threshold=0.3),
39 |     SupervisedPCAClassifier(threshold=0.7)
40 |     ]  
41 | 
42 | for name in names:
43 |     performances[name]=[]
44 | 
45 |     # iterate over classifiers
46 | 
47 |     
48 | for i in range(1,total_range):
49 |     X, y = make_classification(n_features=i*10, n_redundant=i*5, n_informative=i,
50 |                                random_state=1, n_clusters_per_class=1)
51 |     
52 | 
53 |     X = StandardScaler().fit_transform(X)
54 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)
55 |                       
56 |         
57 |     for name, clf in zip(names, classifiers):
58 |         
59 |         clf.fit(X_train, y_train)
60 |         pred=clf.predict(X_test)
61 |         score = sum(y_test==pred)/(1.0*len(pred))
62 |        
63 |         performances[name].append(score)
64 |         try:
65 |             ncomponents[name].append(clf.get_n_components())
66 |         except:
67 |             pass
68 |     
69 |  
70 | x=[k*10 for k in range(1,total_range)]
71 | 
72 | plt.figure()
73 | plt.subplot(311)
74 | plt.title("Score against number of features")  
75 | for name in names:    
76 |     plt.plot(x,performances[name])
77 | 
78 | plt.legend(labels=names,loc="best")
79 | 
80 | plt.subplot(312)
81 | 
82 | plt.title("Score boxplots for each classifier")
83 | 
84 | dummy=[]
85 | for name in names:
86 |     dummy.append(performances[name])
87 | plt.boxplot(dummy,labels=names)
88 | 
89 | plt.subplot(313)
90 | 
91 | plt.title("Number of components against features")
92 | plotcomponentsSPCA0=plt.plot(x,ncomponents[names[2]])
93 | plotComponentsSPCA01=plt.plot(x,ncomponents[names[3]])
94 | plotComponentsSPCA07=plt.plot(x,ncomponents[names[4]])
95 | plt.legend([names[2],names[3],names[4]],loc="best")
96 |         
97 |         
98 | 


--------------------------------------------------------------------------------
/superpca_vs_elasticnet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | =====================
  6 | Supervised PCA against LDA and QDA
  7 | =====================
  8 | 
  9 | A comparison of supervised PCA against elastic net.
 10 | 
 11 | The data are artificially created and the variables can be described by a 
 12 | lower dimensional space.
 13 | 
 14 | Supervised PCA shows better results until about 100 features. The performance of all models
 15 | is similar after that point. A supervisedPCA model with a threshold=0.7 works with a smaller
 16 | number of components, while having similar performance to the rest of the models. This can be particularly
 17 | useful in situations where interpretability is important.
 18 | """
 19 | print(__doc__)
 20 | 
 21 | 
 22 | # Code source: Gaël Varoquaux
 23 | #              Andreas Müller
 24 | # Modified for documentation by Jaques Grobler
 25 | # License: BSD 3 clause
 26 | 
 27 | import numpy as np
 28 | import matplotlib.pyplot as plt
 29 | from sklearn.cross_validation import train_test_split
 30 | from sklearn.preprocessing import StandardScaler
 31 | from sklearn.datasets import make_regression
 32 | from supervised_pca import SupervisedPCARegressor
 33 | from sklearn.linear_model import ElasticNet
 34 | 
 35 | total_range=50
 36 | performances={}
 37 | 
 38 | names = ["ElasticNet","SuperPCA thres=0","SuperPCA thres=0.1","SuperPCA thres=0.7"]
 39 | ncomponents={names[1]:[],names[2]:[],names[3]:[]}
 40 | 
 41 | 
 42 | 
 43 | for name in names:
 44 |     performances[name]=[]
 45 | 
 46 |     # iterate over classifiers
 47 | 
 48 |     
 49 | for i in range(1,total_range):
 50 |     print(i)
 51 |     
 52 |     classifiers = [
 53 |         ElasticNet(),
 54 |         SupervisedPCARegressor(threshold=0),
 55 |         SupervisedPCARegressor(threshold=0.1),
 56 |         SupervisedPCARegressor(threshold=0.7)
 57 |         ]      
 58 |     
 59 |     X, y = make_regression(n_features=i*5, n_informative=i*4,
 60 |                                random_state=1,effective_rank=i)
 61 |     
 62 | 
 63 |     X = StandardScaler().fit_transform(X)
 64 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)
 65 |                       
 66 |         
 67 |     for name, clf in zip(names, classifiers):
 68 |         
 69 |         clf.fit(X_train, y_train)
 70 |         pred=clf.predict(X_test)
 71 |         score = np.mean(abs(y_test-pred))
 72 |        
 73 |         performances[name].append(score)
 74 |         try:
 75 |             ncomponents[name].append(clf.get_n_components())
 76 |         except:
 77 |             pass
 78 |     
 79 |  
 80 | x=[k*5 for k in range(1,total_range)]
 81 | 
 82 | plt.figure()
 83 | plt.subplot(311)
 84 | plt.title("MAE against number of features")  
 85 | for name in names:    
 86 |     plt.plot(x,performances[name])
 87 | 
 88 | plt.legend(labels=names,loc="best")
 89 | 
 90 | plt.subplot(312)
 91 | 
 92 | plt.title("MAE boxplots for each classifier")
 93 | 
 94 | dummy=[]
 95 | for name in names:
 96 |     dummy.append(performances[name])
 97 | plt.boxplot(dummy,labels=names)
 98 | 
 99 | plt.subplot(313)
100 | 
101 | plt.title("Number of components against features")
102 | plotcomponentsSPCA0=plt.plot(x,ncomponents[names[1]])
103 | plotComponentsSPCA01=plt.plot(x,ncomponents[names[2]])
104 | plotComponentsSPCA07=plt.plot(x,ncomponents[names[3]])
105 | plt.legend([names[1],names[2],names[3]],loc="best")
106 |         
107 |         
108 | 


--------------------------------------------------------------------------------
/supervised_pca.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.linear_model import LinearRegression
  3 | from sklearn.linear_model import LogisticRegression
  4 | from sklearn.decomposition import PCA
  5 | from sklearn.base import RegressorMixin
  6 | from sklearn.base import ClassifierMixin
  7 | 
  8 |    
  9 | class BaseSupervisedPCA(object):
 10 |     """
 11 |     Supervised PCA algorithm proposed by Bair et al. (2006).
 12 |     
 13 |     
 14 |     Parameters
 15 |     ----------
 16 |     
 17 |     fit_intercept : boolean, optional
 18 |         whether to calculate the intercept for this model. If set
 19 |         to false, no intercept will be used in calculations
 20 |         (e.g. data is expected to be already centered).
 21 |         
 22 |     model : The supervised learning model that will be used to conduct supervised PCA.
 23 |     
 24 |     Attributes
 25 |     ----------
 26 |         
 27 |     
 28 |     References
 29 |     ----------
 30 |     Bair, Eric, et al. "Prediction by supervised principal components." Journal of the American Statistical Association 101.473 (2006).
 31 | 
 32 |     
 33 |     """
 34 |     
 35 |     def __init__(self, fit_intercept=True, model=None,threshold=0,n_components=-1):
 36 |         self.fit_intercept = fit_intercept
 37 |         self._model=model
 38 |         
 39 |         self._pca=None
 40 |         self._leavouts=None
 41 |         self._threshold=threshold
 42 |         self._n_components=n_components
 43 |         
 44 |     def fit(self,X,y):
 45 |         """
 46 |         Fit the supervised PCA model
 47 |         .
 48 |         Parameters
 49 |         ----------
 50 |         X : numpy array or sparse matrix of shape [n_samples,n_features]
 51 |             Training data
 52 |         y : numpy array of shape [n_samples, n_targets]
 53 |             Target values
 54 |         threshold : the threshold for the coefficient below which it is discarded.
 55 |         n_components : the number of components to keep, after running PCA
 56 |         
 57 |         Returns
 58 |         -------
 59 |         self : returns an instance of self.
 60 |         """
 61 |         
 62 |         #these are the columns that will be removed
 63 |         self._leavouts=[]        
 64 |                 
 65 |         dummy_X=X[:,np.newaxis]
 66 |         
 67 |         #test the absolute value of the coefficient for each variable. If it
 68 |         #is below a the threshold, then add it to the leaveouts        
 69 |         for i in range(0,dummy_X.shape[2]):
 70 |             current_X=dummy_X[:,:,i]
 71 |             self._model.fit(current_X, y)
 72 |             #the all([]) syntax is there in order to support both linear and logistic
 73 |             #regression. Logistic regression coefficients for multiclass problems
 74 |             #come in multi-dimensional arrays.
 75 |             if(all([abs(self._model.coef_[0])<self._threshold])):
 76 |                 self._leavouts.append(i)
 77 |         
 78 |         if(len(self._leavouts)==dummy_X.shape[2]):
 79 |             raise ValueError('The total number of features to be left out is equal to the total number of features. Please try with a smaller threshold value.')
 80 | 
 81 |         
 82 |         #delete the variables that were below the threshold
 83 |         if(len(self._leavouts)>0):
 84 |             dummy_X=np.delete(dummy_X,self._leavouts,2)
 85 |         
 86 |         #conduct PCA for the designated number of components.
 87 |         #If no number was designated (or an illegal value<=0) then use the max number of component
 88 |         if(self._n_components>0):
 89 |             self._pca = PCA(n_components=self._n_components)
 90 |         else:
 91 |             self._pca = PCA(n_components=dummy_X.shape[2])
 92 |         dummy_X=self._pca.fit_transform(dummy_X[:,0,:])
 93 |         
 94 |         self._model=self._model.fit(dummy_X,y)
 95 |         
 96 |         return self
 97 |         
 98 |     def predict(self,X):
 99 |         """Predict using the supervised PCA model
100 |         Parameters
101 |         ----------
102 |         X : {array-like, sparse matrix}, shape = (n_samples, n_features)
103 |             Samples.
104 |         Returns
105 |         -------
106 |         C : array, shape = (n_samples,)
107 |             Returns predicted values.        
108 |         """
109 |         #remove the leavouts, transform the data and fit the regression model
110 |         transformed_X=self.get_transformed_data(X)
111 |         return self._model.predict(transformed_X)
112 |     
113 |     def get_transformed_data(self,X):
114 |         """Calculates the components on a new matrix.
115 |         Parameters
116 |         ----------
117 |         X : numpy array or sparse matrix of shape [n_samples,n_features]
118 |             
119 |         Returns
120 |         -------
121 |         transformed_X: Returns a transformed numpy array or sparse matrix. The
122 |         leavouts have been removed and the remaining variables are transformed into
123 |         components using the weights of the PCA object.
124 |         
125 |         Notes
126 |         -------
127 |         The algorithm should have first been executed on a dataset.
128 |         
129 |         """
130 |         transformed_X=np.delete(X,self._leavouts,1)
131 |         transformed_X=self._pca.transform(transformed_X)
132 |         return transformed_X
133 |         
134 |     def get_n_components(self):
135 |         return self._pca.n_components_
136 |     
137 |     
138 |     #I am implementing a function here to get the components in order to avoid
139 |     #the user having to access the pca object. Another option would be to 
140 |     #copy the components from the pca to a variable located at 'self'. However,
141 |     #this might be too redundant.
142 |     def get_components(self):
143 |         """Returns the components formerly calculated on a training dataset.
144 |             
145 |         Returns
146 |         -------
147 |         components: A numpy matrix with the loadings of the PCA components.
148 |         
149 |         Notes
150 |         -------
151 |         The algorithm should have first been executed on a dataset.
152 |         
153 |         """
154 |         return self._pca.components_
155 |     
156 |     #same principle as in the get_components function
157 |     def get_coefs(self):
158 |         return self._model.coef_
159 |         
160 |     def score(self,X,y):
161 |         return self._model.score(X,y)
162 |     
163 |         
164 |         
165 | class SupervisedPCARegressor(BaseSupervisedPCA,RegressorMixin):
166 |     """
167 |     Implementation of supervisedPCA for regression. The underlying model
168 |     is a linear regression model.
169 |     
170 |     Parameters
171 |     ----------
172 |     normalize : boolean, optional, default False
173 |         If True, the regressors X will be normalized before regression.
174 |     copy_X : boolean, optional, default True
175 |         If True, X will be copied; else, it may be overwritten.
176 |     n_jobs : int, optional, default 1
177 |         The number of jobs to use for the computation.
178 |         If -1 all CPUs are used. This will only provide speedup for
179 |         n_targets > 1 and sufficient large problems.
180 |     Attributes
181 |     ----------
182 |     coef_ : array, shape (n_features, ) or (n_targets, n_features)
183 |         Estimated coefficients for the linear regression problem.
184 |         If multiple targets are passed during the fit (y 2D), this
185 |         is a 2D array of shape (n_targets, n_features), while if only
186 |         one target is passed, this is a 1D array of length n_features.
187 |     intercept_ : array
188 |         Independent term in the linear model.
189 |     
190 |     """
191 |     def __init__(self,fit_intercept=True, normalize=False, copy_X=True,n_jobs=1,threshold=0,n_components=-1):
192 |         model=LinearRegression(copy_X=copy_X,normalize=normalize,n_jobs=n_jobs)  
193 |         super(SupervisedPCARegressor,self).__init__(fit_intercept=fit_intercept,model=model,threshold=threshold,n_components=n_components)
194 | 
195 | 
196 | class SupervisedPCAClassifier(BaseSupervisedPCA,ClassifierMixin):
197 |     """Implementation of supervisedPCA for classification. The underlying model
198 |     is a logistic regression model.
199 | 
200 |     Parameters
201 |     ----------
202 |     penalty : str, 'l1' or 'l2'
203 |         Used to specify the norm used in the penalization. The newton-cg and
204 |         lbfgs solvers support only l2 penalties.
205 |     dual : bool
206 |         Dual or primal formulation. Dual formulation is only implemented for
207 |         l2 penalty with liblinear solver. Prefer dual=False when
208 |         n_samples > n_features.
209 |     C : float, optional (default=1.0)
210 |         Inverse of regularization strength; must be a positive float.
211 |         Like in support vector machines, smaller values specify stronger
212 |         regularization.
213 |     fit_intercept : bool, default: True
214 |         Specifies if a constant (a.k.a. bias or intercept) should be
215 |         added the decision function.
216 |     intercept_scaling : float, default: 1
217 |         Useful only if solver is liblinear.
218 |         when self.fit_intercept is True, instance vector x becomes
219 |         [x, self.intercept_scaling],
220 |         i.e. a "synthetic" feature with constant value equals to
221 |         intercept_scaling is appended to the instance vector.
222 |         The intercept becomes intercept_scaling * synthetic feature weight
223 |         Note! the synthetic feature weight is subject to l1/l2 regularization
224 |         as all other features.
225 |         To lessen the effect of regularization on synthetic feature weight
226 |         (and therefore on the intercept) intercept_scaling has to be increased.
227 |     class_weight : dict or 'balanced', optional
228 |         Weights associated with classes in the form ``{class_label: weight}``.
229 |         If not given, all classes are supposed to have weight one.
230 |         The "balanced" mode uses the values of y to automatically adjust
231 |         weights inversely proportional to class frequencies in the input data
232 |         as ``n_samples / (n_classes * np.bincount(y))``
233 |     max_iter : int
234 |         Useful only for the newton-cg and lbfgs solvers. Maximum number of
235 |         iterations taken for the solvers to converge.
236 |     random_state : int seed, RandomState instance, or None (default)
237 |         The seed of the pseudo random number generator to use when
238 |         shuffling the data.
239 |     solver : {'newton-cg', 'lbfgs', 'liblinear'}
240 |         Algorithm to use in the optimization problem.
241 |     tol : float, optional
242 |         Tolerance for stopping criteria.
243 |     multi_class : str, {'ovr', 'multinomial'}
244 |         Multiclass option can be either 'ovr' or 'multinomial'. If the option
245 |         chosen is 'ovr', then a binary problem is fit for each label. Else
246 |         the loss minimised is the multinomial loss fit across
247 |         the entire probability distribution. Works only for the 'lbfgs'
248 |         solver.
249 |     verbose : int
250 |         For the liblinear and lbfgs solvers set verbose to any positive
251 |         number for verbosity.
252 | 
253 |     """
254 |     def __init__(self,fit_intercept=True, normalize=False, copy_X=True,penalty='l2', dual=False, tol=1e-4, C=1.0,
255 |                  intercept_scaling=1, class_weight=None,
256 |                  random_state=None, solver='liblinear', max_iter=100,
257 |                  multi_class='ovr', verbose=0,threshold=0,n_components=-1):
258 |         model=LogisticRegression()  
259 |         super(SupervisedPCAClassifier,self).__init__(fit_intercept=fit_intercept,model=model,threshold=threshold,n_components=n_components)
260 |     
261 |     def predict_proba(self,X):
262 |         return self._model.predict_proba(X)


--------------------------------------------------------------------------------