├── LICENSE.txt
├── README.md
├── pystacknet
    ├── __init__.py
    ├── metrics.py
    ├── notebook
    │   ├── StackNetRegressor_Example.ipynb
    │   ├── test.csv
    │   └── train.csv
    ├── pystacknet.py
    └── test
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-36.pyc
    │       └── test_pystacknet.cpython-36-PYTEST.pyc
    │   ├── test_amazon.py
    │   └── test_pystacknet.py
└── setup.py


/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Marios Michailidis
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## About
 2 | 
 3 | `pystacknet` is a light python version of [StackNet](https://github.com/kaz-Anova/StackNet) which was originally made in Java.
 4 | 
 5 | It supports many of the original features, with some new elements. 
 6 | 
 7 | 
 8 | ## Installation
 9 | 
10 | ```
11 | git clone https://github.com/h2oai/pystacknet
12 | cd pystacknet
13 | python setup.py install
14 | ```
15 | 
16 | ## New features
17 | 
18 | `pystacknet`'s main object is a 2-dimensional list of sklearn type of models. This list defines the StackNet structure. This is the equivalent of [parameters](https://github.com/kaz-Anova/StackNet#parameters-file) in the Java version. A representative example could be:
19 | 
20 | ```python
21 | from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
22 | from sklearn.linear_model import LogisticRegression
23 | 
24 |     models=[ 
25 |             ######## First level ########
26 |             [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
27 |              ExtraTreesClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
28 |              GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
29 |              LogisticRegression(random_state=1)
30 |              ],
31 |             ######## Second level ########
32 |             [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)]
33 |             ]
34 | ```
35 | 
36 | `pystacknet` is not as strict as in the `Java` version and can allow `Regressors`, `Classifiers` or even `Transformers` at any level of StackNet. In other words the following could work just fine:
37 | 
38 | ```python
39 | from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor, GradientBoostingClassifier,GradientBoostingRegressor
40 | from sklearn.linear_model import LogisticRegression, Ridge
41 | from sklearn.decomposition import PCA
42 |     models=[ 
43 |             
44 |             [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
45 |              ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
46 |              GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
47 |              LogisticRegression(random_state=1),
48 |              PCA(n_components=4,random_state=1)
49 |              ],
50 |             
51 |             [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)]
52 |             
53 |             
54 |             ]
55 | ```
56 | 
57 | **Note** that not all transformers are meaningful in this context and you should use it at your own risk. 
58 | 
59 | 
60 | ## Parameters
61 | 
62 | A typical usage for classification could be : 
63 | 
64 | ```python
65 | from pystacknet.pystacknet import StackNetClassifier
66 | 
67 | model=StackNetClassifier(models, metric="auc", folds=4,
68 | 	restacking=False,use_retraining=True, use_proba=True, 
69 | 	random_state=12345,n_jobs=1, verbose=1)
70 | 
71 | model.fit(x,y)
72 | preds=model.predict_proba(x_test)
73 | 
74 | 
75 | ```
76 | Where :
77 | 
78 | 
79 | Command | Explanation
80 | --- | ---
81 | models  |  List of models. This should be a 2-dimensional list . The first level hould defice the stacking level and each entry is the model. 
82 | metric  | Can be "auc","logloss","accuracy","f1","matthews" or your own custom metric as long as it implements (ytrue,ypred,sample_weight=)
83 | folds   |  This can be either integer to define the number of folds used in `StackNet` or an iterable yielding train/test splits.
84 | restacking   |  True for [restacking](https://github.com/kaz-Anova/StackNet#restacking-mode) else False
85 | use_proba   |  When evaluating the metric, it will use probabilities instead of class predictions if `use_proba==True`
86 | use_retraining   |  If `True` it does one model based on the whole training data in order to score the test data. Otherwise it takes the average of all models used in the folds ( however this takes more memory and there is no guarantee that it will work better.) 
87 | random_state   |  Integer for randomised procedures
88 | n_jobs   |   Number of models to run in parallel. This is independent of any extra threads allocated
89 |  n_jobs   |   Number of models to run in parallel. This is independent of any extra threads allocated from the selected algorithms. e.g. it is possible to run 4 models in parallel where one is a randomforest that runs on 10 threads (it selected).
90 |  verbose   |   Integer value higher than zero to allow printing at the console. 
91 | 


--------------------------------------------------------------------------------
/pystacknet/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.0.2'


--------------------------------------------------------------------------------
/pystacknet/metrics.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Aug 31 18:33:58 2018
  4 | 
  5 | @author: Marios Michailidis
  6 | 
  7 | metrics and method to check metrics used within StackNet
  8 | 
  9 | """
 10 | 
 11 | from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score , mean_squared_log_error  #regression metrics
 12 | from sklearn.metrics import roc_auc_score, log_loss ,accuracy_score, f1_score ,matthews_corrcoef
 13 | import numpy as np
 14 | 
 15 | valid_regression_metrics=["rmse","mae","rmsle","r2","mape","smape"]
 16 | valid_classification_metrics=["auc","logloss","accuracy","f1","matthews"]
 17 | 
 18 | ############ classification metrics ############
 19 | 
 20 | def auc(y_true, y_pred, sample_weight=None):    
 21 |     return roc_auc_score(y_true, y_pred, sample_weight=sample_weight)
 22 | 
 23 | def logloss(y_true, y_pred, sample_weight=None):    
 24 |     return log_loss(y_true, y_pred, sample_weight=sample_weight)
 25 | 
 26 | def accuracy(y_true, y_pred, sample_weight=None):    
 27 |     return accuracy_score(y_true, y_pred, sample_weight=sample_weight)
 28 | 
 29 | def f1(y_true, y_pred, sample_weight=None):    
 30 |     return f1_score(y_true, y_pred, sample_weight=sample_weight)
 31 | 
 32 | def matthews(y_true, y_pred, sample_weight=None):    
 33 |     return matthews_corrcoef(y_true, y_pred, sample_weight=sample_weight)
 34 | 
 35 | ############ regression metrics ############
 36 | 
 37 | def rmse(y_true, y_pred, sample_weight=None):    
 38 |     return np.sqrt(mean_squared_error(y_true, y_pred, sample_weight=sample_weight))
 39 | 
 40 | def mae(y_true, y_pred, sample_weight=None):    
 41 |     return mean_absolute_error(y_true, y_pred, sample_weight=sample_weight)
 42 | 
 43 | def rmsle (y_true, y_pred, sample_weight=None):    
 44 |     return np.sqrt(mean_squared_log_error(y_true, y_pred, sample_weight=sample_weight))
 45 | 
 46 | def r2(y_true, y_pred, sample_weight=None):    
 47 |     return r2_score(y_true, y_pred, sample_weight=sample_weight)
 48 | 
 49 | 
 50 | def mape(y_true, y_pred, sample_weight=None):
 51 |     y_true = y_true.ravel()
 52 |     y_pred = y_pred.ravel()
 53 |     if sample_weight is not None:
 54 |         sample_weight = sample_weight.ravel()
 55 |     eps = 1E-15
 56 |     ape = np.abs((y_true - y_pred) / (y_true + eps)) * 100
 57 |     ape[y_true == 0] = 0
 58 |     return np.average(ape, weights=sample_weight)
 59 | 
 60 | 
 61 | def smape(y_true, y_pred, sample_weight=None):
 62 |     
 63 |     y_true = y_true.ravel()
 64 |     y_pred = y_pred.ravel()
 65 |     if sample_weight is not None:
 66 |         sample_weight = sample_weight.ravel()
 67 |     eps = 1E-15
 68 |     sape = (np.abs(y_true - y_pred) / (0.5 * (np.abs(y_true) + np.abs(y_pred)) + eps)) * 100
 69 |     sape[(y_true == 0) & (y_pred == 0)] = 0
 70 |     return np.average(sape, weights=sample_weight)         
 71 | 
 72 | 
 73 | """
 74 | metric: string or class that returns a metric given (y_true, y_pred, sample_weight=None)
 75 | Curently supported metrics are "rmse","mae","rmsle","r2","mape","smape"
 76 | """
 77 | 
 78 | 
 79 | def check_regression_metric(metric):
 80 |     
 81 |     if type(metric) is type(None):
 82 |         raise Exception ("metric cannot be None")
 83 |     if isinstance(metric, str)  :
 84 |         if metric not in valid_regression_metrics:
 85 |             raise Exception ("The regression metric has to be one of %s " % (", ".join([str(k) for k in valid_regression_metrics])))
 86 |         if metric=="rmse":
 87 |             return rmse,metric
 88 |         elif metric=="mae":
 89 |             return mae,metric
 90 |         elif metric=="rmsle":
 91 |             return rmsle,metric       
 92 |         elif metric=="r2":
 93 |             return r2,metric      
 94 |         elif metric=="mape":
 95 |             return mape,metric      
 96 |         elif metric=="smape":
 97 |             return smape,metric    
 98 |         else :
 99 |             raise Exception ("The metric %s is not recognised " % (metric) ) 
100 |     else : #customer metrics is given
101 |         try:
102 |             y_true_temp=[[1],[2],[3]]
103 |             y_pred_temp=[[2],[1],[3]]
104 |             y_true_temp=np.array(y_true_temp)
105 |             y_pred_temp=np.array(y_pred_temp)            
106 |             sample_weight_temp=[1,0.5,1]
107 |             metric(y_true_temp,y_pred_temp,  sample_weight=sample_weight_temp )
108 |             return metric,"custom"
109 |             
110 |         except:
111 |             raise Exception ("The custom metric has to implement metric(y_true, y_pred, sample_weight=None)" ) 
112 |             
113 |             
114 | """
115 | metric: string or class that returns a metric given (y_true, y_pred, sample_weight=None)
116 | Curently supported metrics are "rmse","mae","rmsle","r2","mape","smape"
117 | """
118 | 
119 | 
120 | def check_classification_metric(metric):
121 |     
122 |     if type(metric) is type(None):
123 |         raise Exception ("metric cannot be None")
124 |     if isinstance(metric, str)  :
125 |         if metric not in valid_classification_metrics:
126 |             raise Exception ("The classification metric has to be one of %s " % (", ".join([str(k) for k in valid_classification_metrics])))
127 |         if metric=="auc":
128 |             return auc,metric
129 |         elif metric=="logloss":
130 |             return logloss,metric
131 |         elif metric=="accuracy":
132 |             return accuracy,metric       
133 |         elif metric=="r2":
134 |             return r2,metric      
135 |         elif metric=="f1":
136 |             return f1,metric      
137 |         elif metric=="matthews":
138 |             return matthews,metric    
139 |         else :
140 |             raise Exception ("The metric %s is not recognised " % (metric) ) 
141 |     else : #customer metrics is given
142 |         try:
143 |             y_true_temp=[[1],[0],[1]]
144 |             y_pred_temp=[[0.4],[1],[0.2]]
145 |             y_true_temp=np.array(y_true_temp)
146 |             y_pred_temp=np.array(y_pred_temp)
147 |             sample_weight_temp=[1,0.5,1]
148 |             metric(y_true_temp,y_pred_temp,  sample_weight=sample_weight_temp )
149 |             return metric,"custom"
150 |             
151 |         except:
152 |             raise Exception ("The custom metric has to implement metric(y_true, y_pred, sample_weight=None)" ) 
153 |             
154 |                         
155 |             
156 |             
157 |             


--------------------------------------------------------------------------------
/pystacknet/pystacknet.py:
--------------------------------------------------------------------------------
   1 | # -*- coding: utf-8 -*-
   2 | """
   3 | Created on Thu Aug 30 23:56:58 2018
   4 | 
   5 | @author: mimar
   6 | 
   7 | 
   8 | This module will implement StackNet[https://github.com/kaz-Anova/StackNet] , allowing for both Regression and classification. 
   9 | 
  10 | 
  11 | """
  12 | 
  13 | import numpy as np
  14 | import pandas as pd
  15 | from scipy.sparse import csr_matrix,hstack,vstack ,csc_matrix
  16 | from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
  17 | from sklearn.base import clone
  18 | from pystacknet.metrics import check_regression_metric, check_classification_metric
  19 | from sklearn.model_selection import KFold
  20 | from sklearn.utils import check_X_y,check_array,check_consistent_length, column_or_1d
  21 | import inspect
  22 | from sklearn.externals.joblib import delayed,Parallel
  23 | import operator
  24 | import time
  25 | from sklearn.preprocessing import LabelEncoder
  26 | 
  27 | proba_metrics=["auc","logloss"]
  28 | non_proba_metrics=["accuracy","f1","matthews"]
  29 | 
  30 | #(estimator, safe=True)
  31 | 
  32 | 
  33 | 
  34 | ####### methods for paralellism ############
  35 | 
  36 | def _parallel_build_estimators(estimator, X, y, sample_weight, index):
  37 |     """Private function used to build a batch of estimators within a job."""
  38 |     # Retrieve settings
  39 |     n_samples, n_features = X.shape
  40 |     
  41 | 
  42 |     if  not type(sample_weight) is type (None):
  43 |             if "sample_weight" in inspect.getfullargspec(estimator.fit).args:
  44 |                 estimator.fit(X, y, sample_weight=sample_weight)
  45 |             else :
  46 |                  estimator.fit(X)
  47 |     else:
  48 |         estimator.fit(X, y)
  49 | 
  50 |     return estimator,index
  51 | 
  52 | 
  53 | def _parallel_predict_proba(estimator, X, index):
  54 |     
  55 |     """Private function used to compute (proba-)predictions within a job."""
  56 |     
  57 |     if   hasattr(estimator, 'predict_proba') :
  58 |          predictions = estimator.predict_proba(X)
  59 |     elif hasattr(estimator, 'predict') :
  60 |          predictions = estimator.predict(X)   
  61 |     elif hasattr(estimator, 'transform') :
  62 |          predictions = estimator.transform(X)   
  63 |     else :
  64 |         raise Exception ("Each model/algorithm needs to implement at least one of ('predict()','predict_proba()' or 'transform()' ")
  65 |         
  66 |     if  hasattr(estimator, 'predict_proba') and len(predictions.shape)==2 and predictions.shape[1]==2:
  67 |         predictions=predictions[:,1]
  68 |     elif len(predictions.shape)==2 and predictions.shape[1]==1:
  69 |          predictions=predictions[:,0]
  70 |                   
  71 | 
  72 |     return predictions,index
  73 | 
  74 | def _parallel_predict_proba_scoring(estimators, X, index):
  75 |     preds=None
  76 |     """Private function used to compute (proba-)predictions within a job."""
  77 |     for estimator in estimators:
  78 |         if   hasattr(estimator, 'predict_proba') :
  79 |              predictions = estimator.predict_proba(X)
  80 |         elif hasattr(estimator, 'predict') :
  81 |              predictions = estimator.predict(X)   
  82 |         elif hasattr(estimator, 'transform') :
  83 |              predictions = estimator.transform(X)   
  84 |         else :
  85 |             raise Exception ("Each model/algorithm needs to implement at least one of ('predict()','predict_proba()' or 'transform()' ")
  86 |             
  87 |         if  hasattr(estimator, 'predict_proba') and len(predictions.shape)==2 and predictions.shape[1]==2:
  88 |             predictions=predictions[:,1]
  89 |         elif len(predictions.shape)==2 and predictions.shape[1]==1:
  90 |              predictions=predictions[:,0]
  91 |              
  92 |         if type(preds) is type(None):
  93 |             preds=predictions
  94 |         else :
  95 |             if predictions.shape!=preds.shape:
  96 |                 
  97 |                 raise Exception (" predictions' shape not equal among estimators within the  batch as %d!=%d " % (predictions.shape[1],preds.shape[1]))
  98 |                 
  99 |             preds+=predictions
 100 |     preds/=float(len(estimators))
 101 | 
 102 |     return preds,index
 103 | 
 104 | 
 105 | def _parallel_predict(estimator, X, index):
 106 |     
 107 |     """Private function used to compute (proba-)predictions within a job."""
 108 |     
 109 |     if hasattr(estimator, 'predict') :
 110 |          predictions = estimator.predict(X)   
 111 |     elif hasattr(estimator, 'transform') :
 112 |          predictions = estimator.transform(X)   
 113 |     else :
 114 |         raise Exception ("Each model/algorithm needs to implement at least one of ('predict()' or 'transform()' ")
 115 |         
 116 |                   
 117 |     return predictions,index
 118 | 
 119 | 
 120 | def predict_from_broba(probas):
 121 |     preds=np.zeros(probas.shape[0]) 
 122 |     
 123 |     if len(probas.shape)==1:
 124 |         preds[probas>=0.5]=1.
 125 |     else :
 126 |         preds=np.argmax(probas, axis=1) 
 127 |     return preds
 128 |         
 129 | 
 130 | 
 131 | 
 132 | ########################### Classifier #########################
 133 | 
 134 | """
 135 | 
 136 | models: List of models. This should be a 2-dimensional list . The first level hould defice the stacking level and each entry is the model.  
 137 | metric: Can be "auc","logloss","accuracy","f1","matthews" or your own custom metric as long as it implements (ytrue,ypred,sample_weight=)
 138 | folds: This can be either integer to define the number of folds used in StackNet or an iterable yielding train/test splits. 
 139 | restacking: True for restacking (https://github.com/kaz-Anova/StackNet#restacking-mode) else False
 140 | use_proba : When evaluating the metric, it will use probabilities instead of class predictions if use_proba==True
 141 | use_retraining : If True it does one model based on the whole training data in order to score the test data. Otherwise it takes the average of all models used in the folds ( however this takes more memory and there is no guarantee that it will work better.) 
 142 | random_state :  Integer for randomised procedures
 143 | n_jobs :  Number of models to run in parallel. This is independent of any extra threads allocated from the selected algorithms. e.g. it is possible to run 4 models in parallel where one is a randomforest that runs on 10 threads (it selected).
 144 | verbose	: Integer value higher than zero to allow printing at the console. 
 145 | 
 146 | """
 147 | 
 148 | 
 149 | class StackNetClassifier(BaseEstimator, ClassifierMixin):
 150 |     
 151 |   def __init__(self, models, metric="logloss", folds=3, restacking=False, use_retraining=True, use_proba=True, random_state=12345, n_jobs=1, verbose=0):
 152 |     
 153 |     #check models 
 154 |     if type(models) is type(None):
 155 |         raise Exception("Models cannot be None. It needs to be a list of sklearn type of models ")
 156 |     if not isinstance(models, list):     
 157 |          raise Exception("Models has to be a list of sklearn type of models ")
 158 |     for l in range (len(models)):
 159 |         if not isinstance(models[l], list):     
 160 |                  raise Exception("Each element in the models' list has to be a list . In other words a 2-dimensional list is epected. ")         
 161 |         for m in range (len(models[l])):
 162 |             if not hasattr(models[l][m], 'fit') :
 163 |                 raise Exception("Each model/algorithm needs to implement a 'fit() method ")                         
 164 |             
 165 |             if not hasattr(models[l][m], 'predict_proba') and not hasattr(models[l][m], 'predict') and not hasattr(models[l][m], 'transform') :
 166 |                 raise Exception("Each model/algorithm needs to implement at least one of ('predict()','predict_proba()' or 'transform()' ")                         
 167 |     self.models= models
 168 |     
 169 |     #check metrics
 170 |     self.metric,self.metric_name=check_classification_metric(metric)  
 171 |     
 172 |     #check kfold
 173 |     if not isinstance(folds, int):  
 174 |              try:
 175 |                  object_iterator = iter(folds)
 176 |              except TypeError as te:
 177 |                  raise Exception( 'folds is not int nor iterable')
 178 |     else:
 179 |         if folds <2:
 180 |              raise Exception( 'folds must be 2 or more')
 181 |              
 182 |     self.folds=folds            
 183 |     #check use_proba
 184 |     if use_proba not in [True, False]:
 185 |          raise Exception("use_proba has to be True or False")
 186 |          
 187 |     if self.metric_name in  non_proba_metrics and   use_proba==True:
 188 |         self.use_proba=False
 189 |     else :
 190 |         self.use_proba=use_proba
 191 |     
 192 |     self.layer_legths=[]
 193 | 
 194 |     #check restacking
 195 |     
 196 |     if restacking not in [True, False]:
 197 |          raise Exception("restacking has to be True (to include previous inputs/layers to current layers in stacking) or False")
 198 |     self.restacking= restacking
 199 | 
 200 |     #check retraining
 201 |     
 202 |     if use_retraining not in [True, False]:
 203 |          raise Exception("use_retraining has to be True or False")
 204 |          
 205 |     self.use_retraining= use_retraining
 206 |     
 207 |     #check random state    
 208 |     if not isinstance(random_state, int):
 209 |          raise Exception("random_state has to be int")    
 210 |     self.random_state= random_state
 211 | 
 212 |     #check verbose 
 213 |     if not isinstance(verbose, int):
 214 |          raise Exception("Cerbose has to be int") 
 215 | 
 216 |     #check verbose 
 217 |     self.n_jobs= n_jobs
 218 |     if self.n_jobs<=0:
 219 |         self.n_jobs=-1
 220 |     
 221 |     if not isinstance(n_jobs, int):
 222 |          raise Exception("n_jobs has to be int")       
 223 |          
 224 |     self.verbose= verbose
 225 |     
 226 |     
 227 |     
 228 |     self.n_classes_=None
 229 |     self.classes_ = None
 230 |     self.n_features_=None
 231 |     self.estimators_=None
 232 |     self._n_samples=None
 233 |     self._sparse=None
 234 |     self._label_encoder=None
 235 |     self._level_dims=None
 236 |     
 237 |     
 238 |   def fit (self, X, y, sample_weight=None):
 239 |         
 240 |         start_time = time.time()
 241 | 
 242 | 
 243 |         # Convert data (X is required to be 2d and indexable)
 244 |         X, y = check_X_y(
 245 |             X, y, ['csr', 'csc'], dtype=None, force_all_finite=False,
 246 |             multi_output=True
 247 |         )
 248 |         
 249 |         if  isinstance(X, list):
 250 |             X=np.array(X)        
 251 |         
 252 |         if isinstance(X, csr_matrix) or isinstance(X, csc_matrix):
 253 |             self._sparse=True
 254 |         else :
 255 |              self._sparse=False
 256 |              if len(X.shape)==1:
 257 |                 X=X.reshape((X.shape[0],1))
 258 |                 
 259 |         
 260 |         if type(sample_weight) is not type(None):
 261 |             sample_weight = check_array(sample_weight, ensure_2d=False)
 262 |             check_consistent_length(y, sample_weight)
 263 | 
 264 |         # Remap output
 265 |         self._n_samples, self.n_features_ = X.shape
 266 |         self._validate_y(y)
 267 |         
 268 |         self._label_encoder=LabelEncoder()
 269 |         y=self._label_encoder.fit_transform(y)
 270 |         
 271 |         
 272 |         classes = np.unique(y)
 273 |         #print (classes)
 274 |         if len(classes)<=1:
 275 |             raise Exception ("Number of classes must be at least 2, here only %d was given " %(len(classes)))
 276 |             
 277 |         self.classes_=classes
 278 |         self.n_classes_=len(self.classes_)
 279 |         
 280 |         if  isinstance(self.folds, int)   :
 281 |              indices=KFold( n_splits=self.folds,shuffle=True, random_state=self.random_state).split(y) 
 282 |              
 283 |         else :
 284 |             indices=self.folds
 285 |             
 286 |         self._level_dims =[]  
 287 |              
 288 |         previous_input=None #holds previous data for restackng 
 289 |         current_input=X
 290 |         
 291 |         self.estimators_=[]
 292 |         ##start the level training 
 293 |         for level in range (len(self.models)):
 294 |             start_level_time = time.time()
 295 |             
 296 |             if self.verbose>0:
 297 |                 print ("====================== Start of Level %d ======================" % (level))            
 298 |             
 299 |             if not type(previous_input) is type(None) and self.restacking:
 300 |                 if self._sparse:
 301 |                     
 302 |                     current_input=csr_matrix(hstack( [csr_matrix(previous_input), csr_matrix(current_input)]  ))
 303 |                 else :
 304 |                     
 305 |                     current_input=np.column_stack((previous_input,current_input)  )
 306 |                     
 307 |             if self.verbose>0:
 308 |                 print ("Input Dimensionality %d at Level %d " % (current_input.shape[1], level)) 
 309 |                 
 310 |             this_level_models=self.models[level] 
 311 |             
 312 |             if self.verbose>0:
 313 |                 print ("%d models included in Level %d " % (len(this_level_models), level)) 
 314 |             
 315 |             
 316 |             train_oof=None
 317 |             metrics=[0.0 for k in range(len(this_level_models))]
 318 |             
 319 |             
 320 |             indices=[t for t in indices]
 321 | 
 322 |             iter_count=len(indices)
 323 |             #print ("iter_count",iter_count)
 324 |             
 325 |             i=0
 326 |             #print (i)
 327 |             #print (indices)
 328 |             for train_index, test_index in indices:
 329 |                 
 330 |                 #print ( i, i, i)
 331 |                 metrics_i=[0.0 for k in range(len(this_level_models))]
 332 |                 
 333 |                 X_train, X_cv = current_input[train_index], current_input[test_index]
 334 |                 y_train, y_cv = y[train_index], y[test_index]
 335 |                 w_train,w_cv=None,None
 336 |                 if not type(sample_weight) is type (None):
 337 |                     w_train, w_cv = sample_weight[train_index], sample_weight[test_index]
 338 |                 
 339 |         
 340 |                 all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_models)), verbose=0)(
 341 |                     delayed(_parallel_build_estimators)(
 342 |                         clone(this_level_models[d]),
 343 |                         X_train,
 344 |                         y_train,
 345 |                         w_train, d)
 346 |                     for d in range(len(this_level_models)))
 347 |         
 348 |                 # Reduce
 349 |                 this_level_estimators_ = [ [t[0],t[1]] for t in all_results]
 350 |                 
 351 |                 this_level_estimators_=sorted(this_level_estimators_, key=operator.itemgetter(1), reverse=False)
 352 |                 
 353 |                 if self.use_retraining==False:
 354 |                     fitted_estimators=[t[0] for t in  this_level_estimators_]
 355 |                     if i==0:
 356 |                         self.estimators_.append([fitted_estimators]) #add level
 357 |                     else :
 358 |                         self.estimators_[level].append(fitted_estimators)
 359 |                 
 360 |                 #parallel predict
 361 |                 all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_models)), verbose=0)(
 362 |                     delayed(_parallel_predict_proba)(
 363 |                         this_level_estimators_[d][0],
 364 |                         X_cv,d)                
 365 |                     for d in range(len(this_level_models)))
 366 |                 this_level_predictions_ = [ [t[0],t[1]] for t in all_results]
 367 |                 
 368 |                 this_level_predictions_=sorted(this_level_predictions_, key=operator.itemgetter(1), reverse=False) 
 369 |                 predictions_=[t[0] for t in  this_level_predictions_]
 370 |                 
 371 |                 for d in range (len(this_level_models)):
 372 |                     this_model=this_level_models[d]
 373 |                     if  self.use_proba:
 374 |                         if hasattr(this_model, 'predict_proba') :
 375 |                             metrics_i[d]=self.metric(y_cv,predictions_[d], sample_weight=w_cv)
 376 |                             metrics[d]+=metrics_i[d]
 377 |                             if self.verbose>0:
 378 |                                 print ("Fold %d/%d , model %d , %s===%f " % (i+1, iter_count, d, self.metric_name, metrics_i[d]))
 379 |                         elif self.n_classes_==2 and hasattr(this_model, 'predict'):
 380 |                             metrics_i[d]=self.metric(y_cv,predictions_[d], sample_weight=w_cv)
 381 |                             metrics[d]+=metrics_i[d]
 382 |                             if self.verbose>0:
 383 |                                 print ("Fold %d/%d , model %d , %s===%f " % (i+1, iter_count, d, self.metric_name, metrics_i[d]))
 384 |                             
 385 |                     
 386 |                     else :
 387 |                         if hasattr(this_model, 'predict_proba') :
 388 |                             preds_transformed=predict_from_broba(predictions_[d])
 389 |                             metrics_i[d]=self.metric(y_cv,preds_transformed, sample_weight=w_cv) #
 390 |                             metrics[d]+=metrics_i[d]
 391 |                             if self.verbose>0:
 392 |                                 print ("Level %d, fold %d/%d , model %d , %s===%f " % (level, i+1, iter_count, d, self.metric_name, metrics_i[d]))
 393 |                         elif self.n_classes_==2 and hasattr(this_model, 'predict'):                
 394 |                             preds_transformed=predict_from_broba(predictions_[d])
 395 |                             metrics_i[d]=self.metric(y_cv,preds_transformed, sample_weight=w_cv) #
 396 |                             metrics[d]+=metrics_i[d]
 397 |                             if self.verbose>0:
 398 |                                 print ("Level %d, fold %d/%d , model %d , %s===%f " % (level, i+1, iter_count, d, self.metric_name, metrics_i[d]))
 399 |                 
 400 |                 
 401 |                 #concatenate predictions  
 402 |                 preds_concat_=np.column_stack( predictions_)
 403 |                 #print ("preds_concat_.shape", preds_concat_.shape)
 404 |                 if type(train_oof) is type(None):
 405 |                     train_oof=np.zeros ( (current_input.shape[0], preds_concat_.shape[1]))
 406 |                     self._level_dims.append(preds_concat_.shape[1])
 407 | 
 408 |                 
 409 |                 if self._level_dims[level]!=preds_concat_.shape[1]:
 410 |                     raise Exception ("Output dimensionality among folds is not consistent as %d!=%d " % ( self._level_dims[level],preds_concat_.shape[1]))
 411 |                 train_oof[test_index] = preds_concat_
 412 |                 if self.verbose>0:
 413 |                     print ("=========== end of fold %i in level %d ===========" %(i+1,level))
 414 |                 i+=1
 415 |                 
 416 |             metrics=np.array(metrics)
 417 |             metrics/=float(iter_count)
 418 |             
 419 |             if self.verbose>0:
 420 |                 for d in range(len(this_level_models)):
 421 |                     this_model=this_level_models[d]
 422 |                     if hasattr(this_model, 'predict_proba') :
 423 |                          print ("Level %d, model %d , %s===%f " % (level, d, self.metric_name, metrics[d]))
 424 |                     
 425 |                     
 426 |             #done cv
 427 |                         
 428 |             if self.use_retraining:
 429 |                 
 430 |                 all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_models)), verbose=0)(
 431 |                     delayed(_parallel_build_estimators)(
 432 |                         clone(this_level_models[d]),
 433 |                         current_input,
 434 |                         y,
 435 |                         sample_weight, d)
 436 |                     for d in range(len(this_level_models)))              
 437 |                 
 438 |                 
 439 |                 this_level_estimators_ = [ [t[0],t[1]] for t in all_results]
 440 |                 
 441 |                 this_level_estimators_=sorted(this_level_estimators_, key=operator.itemgetter(1), reverse=False)
 442 | 
 443 |                 fitted_estimators=[t[0] for t in  this_level_estimators_]
 444 | 
 445 |                 self.estimators_.append([fitted_estimators]) #add level   
 446 |             
 447 |                 
 448 |             previous_input=current_input
 449 |             current_input=train_oof
 450 |             if self.verbose>0:
 451 |                 print ("Output dimensionality of level %d is %d " % ( level,current_input.shape[1] ))             
 452 |             
 453 |             
 454 |            
 455 |             end_of_level_time=time.time()
 456 |             if self.verbose>0:            
 457 |                 print ("====================== End of Level %d ======================" % (level))  
 458 |                 print (" level %d lasted %f seconds " % (level,end_of_level_time-start_level_time ))
 459 |         
 460 |         end_of_fit_time=time.time()        
 461 |         if self.verbose>0:          
 462 |             
 463 |             print ("====================== End of fit ======================")  
 464 |             print (" fit() lasted %f seconds " % (end_of_fit_time-start_time )) 
 465 |             
 466 |             
 467 |   # fit method that returns all out of fold predictions/outputs for all levels
 468 |   #each ith entry is a stack of oof predictions for the ith level
 469 |           
 470 |   def fit_oof (self, X, y, sample_weight=None):
 471 |         
 472 |         start_time = time.time()
 473 | 
 474 | 
 475 |         # Convert data (X is required to be 2d and indexable)
 476 |         X, y = check_X_y(
 477 |             X, y, ['csr', 'csc'], dtype=None, force_all_finite=False,
 478 |             multi_output=True
 479 |         )
 480 |         
 481 |         if  isinstance(X, list):
 482 |             X=np.array(X)        
 483 |         
 484 |         if isinstance(X, csr_matrix) or isinstance(X, csc_matrix):
 485 |             self._sparse=True
 486 |         else :
 487 |              self._sparse=False
 488 |              if len(X.shape)==1:
 489 |                 X=X.reshape((X.shape[0],1))
 490 |                 
 491 |         
 492 |         if type(sample_weight) is not type(None):
 493 |             sample_weight = check_array(sample_weight, ensure_2d=False)
 494 |             check_consistent_length(y, sample_weight)
 495 | 
 496 |         # Remap output
 497 |         self._n_samples, self.n_features_ = X.shape
 498 |         self._validate_y(y)
 499 |         
 500 |         self._label_encoder=LabelEncoder()
 501 |         y=self._label_encoder.fit_transform(y)
 502 |         
 503 |         out_puts=[]
 504 |         
 505 |         classes = np.unique(y)
 506 |         #print (classes)
 507 |         if len(classes)<=1:
 508 |             raise Exception ("Number of classes must be at least 2, here only %d was given " %(len(classes)))
 509 |             
 510 |         self.classes_=classes
 511 |         self.n_classes_=len(self.classes_)
 512 |         
 513 |         if  isinstance(self.folds, int)   :
 514 |              indices=KFold( n_splits=self.folds,shuffle=True, random_state=self.random_state).split(y) 
 515 |              
 516 |         else :
 517 |             indices=self.folds
 518 |             
 519 |         self._level_dims =[]  
 520 |              
 521 |         previous_input=None #holds previous data for restackng 
 522 |         current_input=X
 523 |         
 524 |         self.estimators_=[]
 525 |         ##start the level training 
 526 |         for level in range (len(self.models)):
 527 |             start_level_time = time.time()
 528 |             
 529 |             if self.verbose>0:
 530 |                 print ("====================== Start of Level %d ======================" % (level))            
 531 |             
 532 |             if not type(previous_input) is type(None) and self.restacking:
 533 |                 if self._sparse:
 534 |                     
 535 |                     current_input=csr_matrix(hstack( [csr_matrix(previous_input), csr_matrix(current_input)]  ))
 536 |                 else :
 537 |                     
 538 |                     current_input=np.column_stack((previous_input,current_input)  )
 539 |                     
 540 |             if self.verbose>0:
 541 |                 print ("Input Dimensionality %d at Level %d " % (current_input.shape[1], level)) 
 542 |                 
 543 |             this_level_models=self.models[level] 
 544 |             
 545 |             if self.verbose>0:
 546 |                 print ("%d models included in Level %d " % (len(this_level_models), level)) 
 547 |             
 548 |             
 549 |             train_oof=None
 550 |             metrics=[0.0 for k in range(len(this_level_models))]
 551 |             
 552 |             
 553 |             indices=[t for t in indices]
 554 | 
 555 |             iter_count=len(indices)
 556 |             #print ("iter_count",iter_count)
 557 |             
 558 |             i=0
 559 |             #print (i)
 560 |             #print (indices)
 561 |             for train_index, test_index in indices:
 562 |                 
 563 |                 #print ( i, i, i)
 564 |                 metrics_i=[0.0 for k in range(len(this_level_models))]
 565 |                 
 566 |                 X_train, X_cv = current_input[train_index], current_input[test_index]
 567 |                 y_train, y_cv = y[train_index], y[test_index]
 568 |                 w_train,w_cv=None,None
 569 |                 if not type(sample_weight) is type (None):
 570 |                     w_train, w_cv = sample_weight[train_index], sample_weight[test_index]
 571 |                 
 572 |         
 573 |                 all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_models)), verbose=0)(
 574 |                     delayed(_parallel_build_estimators)(
 575 |                         clone(this_level_models[d]),
 576 |                         X_train,
 577 |                         y_train,
 578 |                         w_train, d)
 579 |                     for d in range(len(this_level_models)))
 580 |         
 581 |                 # Reduce
 582 |                 this_level_estimators_ = [ [t[0],t[1]] for t in all_results]
 583 |                 
 584 |                 this_level_estimators_=sorted(this_level_estimators_, key=operator.itemgetter(1), reverse=False)
 585 |                 
 586 |                 if self.use_retraining==False:
 587 |                     fitted_estimators=[t[0] for t in  this_level_estimators_]
 588 |                     if i==0:
 589 |                         self.estimators_.append([fitted_estimators]) #add level
 590 |                     else :
 591 |                         self.estimators_[level].append(fitted_estimators)
 592 |                 
 593 |                 #parallel predict
 594 |                 all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_models)), verbose=0)(
 595 |                     delayed(_parallel_predict_proba)(
 596 |                         this_level_estimators_[d][0],
 597 |                         X_cv,d)                
 598 |                     for d in range(len(this_level_models)))
 599 |                 this_level_predictions_ = [ [t[0],t[1]] for t in all_results]
 600 |                 
 601 |                 this_level_predictions_=sorted(this_level_predictions_, key=operator.itemgetter(1), reverse=False) 
 602 |                 predictions_=[t[0] for t in  this_level_predictions_]
 603 |                 
 604 |                 for d in range (len(this_level_models)):
 605 |                     this_model=this_level_models[d]
 606 |                     if  self.use_proba:
 607 |                         if hasattr(this_model, 'predict_proba') :
 608 |                             metrics_i[d]=self.metric(y_cv,predictions_[d], sample_weight=w_cv)
 609 |                             metrics[d]+=metrics_i[d]
 610 |                             if self.verbose>0:
 611 |                                 print ("Fold %d/%d , model %d , %s===%f " % (i+1, iter_count, d, self.metric_name, metrics_i[d]))
 612 |                         elif self.n_classes_==2 and hasattr(this_model, 'predict'):
 613 |                             metrics_i[d]=self.metric(y_cv,predictions_[d], sample_weight=w_cv)
 614 |                             metrics[d]+=metrics_i[d]
 615 |                             if self.verbose>0:
 616 |                                 print ("Fold %d/%d , model %d , %s===%f " % (i+1, iter_count, d, self.metric_name, metrics_i[d]))
 617 |                             
 618 |                     
 619 |                     else :
 620 |                         if hasattr(this_model, 'predict_proba') :
 621 |                             preds_transformed=predict_from_broba(predictions_[d])
 622 |                             metrics_i[d]=self.metric(y_cv,preds_transformed, sample_weight=w_cv) #
 623 |                             metrics[d]+=metrics_i[d]
 624 |                             if self.verbose>0:
 625 |                                 print ("Level %d, fold %d/%d , model %d , %s===%f " % (level, i+1, iter_count, d, self.metric_name, metrics_i[d]))
 626 |                         elif self.n_classes_==2 and hasattr(this_model, 'predict'):                
 627 |                             preds_transformed=predict_from_broba(predictions_[d])
 628 |                             metrics_i[d]=self.metric(y_cv,preds_transformed, sample_weight=w_cv) #
 629 |                             metrics[d]+=metrics_i[d]
 630 |                             if self.verbose>0:
 631 |                                 print ("Level %d, fold %d/%d , model %d , %s===%f " % (level, i+1, iter_count, d, self.metric_name, metrics_i[d]))
 632 |                 
 633 |                 
 634 |                 #concatenate predictions  
 635 |                 preds_concat_=np.column_stack( predictions_)
 636 |                 
 637 | 
 638 |                 
 639 |                 #print ("preds_concat_.shape", preds_concat_.shape)
 640 |                 if type(train_oof) is type(None):
 641 |                     train_oof=np.zeros ( (current_input.shape[0], preds_concat_.shape[1]))
 642 |                     self._level_dims.append(preds_concat_.shape[1])
 643 | 
 644 |                 
 645 |                 if self._level_dims[level]!=preds_concat_.shape[1]:
 646 |                     raise Exception ("Output dimensionality among folds is not consistent as %d!=%d " % ( self._level_dims[level],preds_concat_.shape[1]))
 647 |                 train_oof[test_index] = preds_concat_
 648 |                 if self.verbose>0:
 649 |                     print ("=========== end of fold %i in level %d ===========" %(i+1,level))
 650 |                 i+=1
 651 |                 
 652 |             metrics=np.array(metrics)
 653 |             metrics/=float(iter_count)
 654 |             
 655 |             if self.verbose>0:
 656 |                 for d in range(len(this_level_models)):
 657 |                     this_model=this_level_models[d]
 658 |                     if hasattr(this_model, 'predict_proba') :
 659 |                          print ("Level %d, model %d , %s===%f " % (level, d, self.metric_name, metrics[d]))
 660 |                     
 661 |                     
 662 |             #done cv
 663 |                         
 664 |             if self.use_retraining:
 665 |                 
 666 |                 all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_models)), verbose=0)(
 667 |                     delayed(_parallel_build_estimators)(
 668 |                         clone(this_level_models[d]),
 669 |                         current_input,
 670 |                         y,
 671 |                         sample_weight, d)
 672 |                     for d in range(len(this_level_models)))              
 673 |                 
 674 |                 
 675 |                 this_level_estimators_ = [ [t[0],t[1]] for t in all_results]
 676 |                 
 677 |                 this_level_estimators_=sorted(this_level_estimators_, key=operator.itemgetter(1), reverse=False)
 678 | 
 679 |                 fitted_estimators=[t[0] for t in  this_level_estimators_]
 680 | 
 681 |                 self.estimators_.append([fitted_estimators]) #add level   
 682 |             
 683 |             out_puts.append(train_oof)  
 684 |             
 685 |             previous_input=current_input
 686 |             current_input=train_oof
 687 |             if self.verbose>0:
 688 |                 print ("Output dimensionality of level %d is %d " % ( level,current_input.shape[1] ))             
 689 |             
 690 |             
 691 |            
 692 |             end_of_level_time=time.time()
 693 |             if self.verbose>0:            
 694 |                 print ("====================== End of Level %d ======================" % (level))  
 695 |                 print (" level %d lasted %f seconds " % (level,end_of_level_time-start_level_time ))
 696 |         
 697 |         end_of_fit_time=time.time()        
 698 |         if self.verbose>0:          
 699 |             
 700 |             print ("====================== End of fit ======================")  
 701 |             print (" fit() lasted %f seconds " % (end_of_fit_time-start_time )) 
 702 |             
 703 |         return out_puts
 704 |             
 705 |             
 706 |   def predict_proba (self, X):
 707 |         
 708 |         if type(self.n_classes_) is type(None) or self.n_classes_==1:
 709 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")
 710 |         if type(self.classes_) is type(None) or len(self.classes_)==1:
 711 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")   
 712 |         if type(self.n_features_) is type(None) :
 713 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")             
 714 |         if type(self.estimators_) is type(None) :
 715 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ") 
 716 |         if type(self._n_samples) is type(None) :
 717 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")             
 718 |         if type(self._sparse) is type(None) :
 719 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")             
 720 |         if type(self._label_encoder) is type(None) :
 721 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")             
 722 |         if type(self._level_dims) is type(None) :
 723 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")   
 724 |     
 725 |         if  isinstance(X, list):
 726 |             X=np.array(X)        
 727 |         
 728 |         predict_sparse=None
 729 |         if isinstance(X, csr_matrix) or isinstance(X, csc_matrix):
 730 |             predict_sparse=True
 731 |         else :
 732 |             predict_sparse=False
 733 |             if len(X.shape)==1:
 734 |                 X=X.reshape((X.shape[0],1))    
 735 |                 
 736 |         if X.shape[1]!=self.n_features_:
 737 |             raise Exception("Input dimensionality of %d is not the same as the trained one with %d " % ( X.shape[1], self.n_features_))
 738 | 
 739 | 
 740 |         # Remap output
 741 |         predict_sparse_samples, predict_sparse_n_features_ = X.shape        
 742 |         
 743 |         previous_input=None #holds previous data for restackng 
 744 |         current_input=X
 745 |         
 746 |         ##start the level training 
 747 |         
 748 |         for level in range (len(self.estimators_)):
 749 |             #start_level_time = time.time()
 750 |             
 751 |             if self.verbose>0:
 752 |                 print ("====================== Start of Level %d ======================" % (level))            
 753 |             
 754 |             if not type(previous_input) is type(None) and self.restacking:
 755 |                 if predict_sparse:
 756 |                     
 757 |                     current_input=csr_matrix(hstack( [csr_matrix(previous_input), csr_matrix(current_input)]  ))
 758 |                 else :
 759 |                     
 760 |                     current_input=np.column_stack((previous_input,current_input)  )        
 761 |         
 762 |             this_level_estimators=self.estimators_[level] 
 763 |             
 764 |             if self.verbose>0:
 765 |                 print ("%d estimators included in Level %d " % (len(this_level_estimators), level)) 
 766 |             
 767 |             
 768 | 
 769 |             all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_estimators[0])), verbose=0)(
 770 |                 delayed(_parallel_predict_proba_scoring)(
 771 |                     [this_level_estimators[s][d] for s in range (len(this_level_estimators))],
 772 |                     current_input,d)                
 773 |                 for d in range(len(this_level_estimators[0])))
 774 |                 
 775 |             this_level_predictions_ = [ [t[0],t[1]] for t in all_results]
 776 |             
 777 |             this_level_predictions_=sorted(this_level_predictions_, key=operator.itemgetter(1), reverse=False) 
 778 |             predictions_=[t[0] for t in  this_level_predictions_]
 779 |             
 780 | 
 781 |             #concatenate predictions  
 782 |             test_pred=np.column_stack( predictions_)  
 783 |             if test_pred.shape[1]!= self._level_dims[level]:
 784 |                 raise Exception ("Output dimensionality for level %d with %d is not the same as the one during training with %d " %(level,test_pred.shape[1], self._level_dims[level] ))
 785 |             
 786 |             previous_input=current_input
 787 |             current_input=test_pred       
 788 |         
 789 |         if len(test_pred.shape)==2 and test_pred.shape[1]==1 :
 790 |              pr=np.zeros( (test_pred.shape[0],2))
 791 |              pr[:,1]=test_pred[:,0]
 792 |              pr[:,0]=1-test_pred[:,0]
 793 |              test_pred=pr 
 794 |         elif len(test_pred.shape)==1:
 795 |              pr=np.zeros( (test_pred.shape[0],2))
 796 |              pr[:,1]=test_pred
 797 |              pr[:,0]=1-test_pred
 798 |              test_pred=pr             
 799 |         return test_pred
 800 |             
 801 |   #predicts output up to the specified level
 802 |           
 803 |   def predict_up_to(self, X, lev=None):
 804 |         
 805 |         if type(self.n_classes_) is type(None) or self.n_classes_==1:
 806 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")
 807 |         if type(self.classes_) is type(None) or len(self.classes_)==1:
 808 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")   
 809 |         if type(self.n_features_) is type(None) :
 810 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")             
 811 |         if type(self.estimators_) is type(None) :
 812 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ") 
 813 |         if type(self._n_samples) is type(None) :
 814 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")             
 815 |         if type(self._sparse) is type(None) :
 816 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")             
 817 |         if type(self._label_encoder) is type(None) :
 818 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")             
 819 |         if type(self._level_dims) is type(None) :
 820 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")   
 821 |     
 822 |         if  isinstance(X, list):
 823 |             X=np.array(X)        
 824 |         
 825 |         predict_sparse=None
 826 |         if isinstance(X, csr_matrix) or isinstance(X, csc_matrix):
 827 |             predict_sparse=True
 828 |         else :
 829 |             predict_sparse=False
 830 |             if len(X.shape)==1:
 831 |                 X=X.reshape((X.shape[0],1))    
 832 |                 
 833 |         if X.shape[1]!=self.n_features_:
 834 |             raise Exception("Input dimensionality of %d is not the same as the trained one with %d " % ( X.shape[1], self.n_features_))
 835 | 
 836 | 
 837 |         # Remap output
 838 |         predict_sparse_samples, predict_sparse_n_features_ = X.shape        
 839 |         
 840 |         previous_input=None #holds previous data for restackng 
 841 |         current_input=X
 842 |         
 843 |         if type(lev) is type(None):
 844 |             lev=len(self.estimators_)
 845 |         
 846 |         if not isinstance(lev, int):
 847 |             raise Exception("lev has to be int") 
 848 |          
 849 |         out_puts=[]    
 850 |         lev=min(lev,len(self.estimators_) )
 851 |         
 852 |         ##start the level training 
 853 |         
 854 |         for level in range (lev):
 855 |             #start_level_time = time.time()
 856 |             
 857 |             if self.verbose>0:
 858 |                 print ("====================== Start of Level %d ======================" % (level))            
 859 |             
 860 |             if not type(previous_input) is type(None) and self.restacking:
 861 |                 if predict_sparse:
 862 |                     
 863 |                     current_input=csr_matrix(hstack( [csr_matrix(previous_input), csr_matrix(current_input)]  ))
 864 |                 else :
 865 |                     
 866 |                     current_input=np.column_stack((previous_input,current_input)  )        
 867 |         
 868 |             this_level_estimators=self.estimators_[level] 
 869 |             
 870 |             if self.verbose>0:
 871 |                 print ("%d estimators included in Level %d " % (len(this_level_estimators), level)) 
 872 |             
 873 |             
 874 | 
 875 |             all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_estimators[0])), verbose=0)(
 876 |                 delayed(_parallel_predict_proba_scoring)(
 877 |                     [this_level_estimators[s][d] for s in range (len(this_level_estimators))],
 878 |                     current_input,d)                
 879 |                 for d in range(len(this_level_estimators[0])))
 880 |                 
 881 |             this_level_predictions_ = [ [t[0],t[1]] for t in all_results]
 882 |             
 883 |             this_level_predictions_=sorted(this_level_predictions_, key=operator.itemgetter(1), reverse=False) 
 884 |             predictions_=[t[0] for t in  this_level_predictions_]
 885 |             
 886 | 
 887 |             #concatenate predictions  
 888 |             test_pred=np.column_stack( predictions_)  
 889 |             if test_pred.shape[1]!= self._level_dims[level]:
 890 |                 raise Exception ("Output dimensionality for level %d with %d is not the same as the one during training with %d " %(level,test_pred.shape[1], self._level_dims[level] ))
 891 |             
 892 |             out_puts.append(test_pred)
 893 |             
 894 |             previous_input=current_input
 895 |             current_input=test_pred       
 896 |         
 897 |            
 898 |         return out_puts
 899 |                         
 900 |             
 901 |         
 902 |         
 903 |   def _validate_y(self, y):
 904 |         if len(y.shape) == 1 or y.shape[1] == 1:
 905 |             return column_or_1d(y, warn=True)
 906 |         else:
 907 |             return y        
 908 |         
 909 | 
 910 | 
 911 | 
 912 | 
 913 | 
 914 | ########################### Regression #########################
 915 | 
 916 | 
 917 |         
 918 | """
 919 | 
 920 | models: List of models. This should be a 2-dimensional list . The first level hould defice the stacking level and each entry is the model.  
 921 | metric: Can be "rmse","mae","rmsle","r2","mape","smape" or your own custom metric as long as it implements (ytrue,ypred,sample_weight=)
 922 | folds: This can be either integer to define the number of folds used in StackNet or an iterable yielding train/test splits. 
 923 | restacking: True for restacking (https://github.com/kaz-Anova/StackNet#restacking-mode) else False
 924 | use_retraining : If True it does one model based on the whole training data in order to score the test data. Otherwise it takes the average of all models used in the folds ( however this takes more memory and there is no guarantee that it will work better.) 
 925 | random_state :  Integer for randomised procedures
 926 | n_jobs :  Number of models to run in parallel. This is independent of any extra threads allocated from the selected algorithms. e.g. it is possible to run 4 models in parallel where one is a randomforest that runs on 10 threads (it selected).
 927 | verbose	: Integer value higher than zero to allow printing at the console. 
 928 | 
 929 | """
 930 | 
 931 | 
 932 | class StackNetRegressor(BaseEstimator, RegressorMixin):
 933 |     
 934 |   def __init__(self, models, metric="rmse", folds=3, restacking=False, use_retraining=True, random_state=12345, n_jobs=1, verbose=0):
 935 |     
 936 |     #check models 
 937 |     if type(models) is type(None):
 938 |         raise Exception("Models cannot be None. It needs to be a list of sklearn type of models ")
 939 |     if not isinstance(models, list):     
 940 |          raise Exception("Models has to be a list of sklearn type of models ")
 941 |     for l in range (len(models)):
 942 |         if not isinstance(models[l], list):     
 943 |                  raise Exception("Each element in the models' list has to be a list . In other words a 2-dimensional list is epected. ")         
 944 |         for m in range (len(models[l])):
 945 |             if not hasattr(models[l][m], 'fit') :
 946 |                 raise Exception("Each model/algorithm needs to implement a 'fit() method ")                         
 947 |             
 948 |             if not hasattr(models[l][m], 'predict_proba') and not hasattr(models[l][m], 'predict') and not hasattr(models[l][m], 'transform') :
 949 |                 raise Exception("Each model/algorithm needs to implement at least one of ('predict()','predict_proba()' or 'transform()' ")                         
 950 |     self.models= models
 951 |     
 952 |     #check metrics
 953 |     self.metric,self.metric_name=check_regression_metric(metric)  
 954 |     
 955 |     #check kfold
 956 |     if not isinstance(folds, int):  
 957 |              try:
 958 |                  object_iterator = iter(folds)
 959 |              except TypeError as te:
 960 |                  raise Exception( 'folds is not int nor iterable')
 961 |     else:
 962 |         if folds <2:
 963 |              raise Exception( 'folds must be 2 or more')
 964 |              
 965 |     self.folds=folds            
 966 |     
 967 |     self.layer_legths=[]
 968 | 
 969 |     #check restacking
 970 |     
 971 |     if restacking not in [True, False]:
 972 |          raise Exception("restacking has to be True (to include previous inputs/layers to current layers in stacking) or False")
 973 |     self.restacking= restacking
 974 | 
 975 |     #check retraining
 976 |     
 977 |     if use_retraining not in [True, False]:
 978 |          raise Exception("use_retraining has to be True or False")
 979 |          
 980 |     self.use_retraining= use_retraining
 981 |     
 982 |     #check random state    
 983 |     if not isinstance(random_state, int):
 984 |          raise Exception("random_state has to be int")    
 985 |     self.random_state= random_state
 986 | 
 987 |     #check verbose 
 988 |     if not isinstance(verbose, int):
 989 |          raise Exception("Cerbose has to be int") 
 990 | 
 991 |     #check verbose 
 992 |     self.n_jobs= n_jobs
 993 |     if self.n_jobs<=0:
 994 |         self.n_jobs=-1
 995 |     
 996 |     if not isinstance(n_jobs, int):
 997 |          raise Exception("n_jobs has to be int")       
 998 |          
 999 |     self.verbose= verbose
1000 | 
1001 |     self.n_features_=None
1002 |     self.estimators_=None
1003 |     self._n_samples=None
1004 |     self._sparse=None
1005 |     self._level_dims=None
1006 |     
1007 |     
1008 |   def fit (self, X, y, sample_weight=None):
1009 |         start_time = time.time()
1010 | 
1011 | 
1012 |         # Convert data (X is required to be 2d and indexable)
1013 |         X, y = check_X_y(
1014 |             X, y, ['csr', 'csc'], dtype=None, force_all_finite=False,
1015 |             multi_output=True
1016 |         )
1017 |         
1018 |         if  isinstance(X, list):
1019 |             X=np.array(X)        
1020 |         
1021 |         if isinstance(X, csr_matrix) or isinstance(X, csc_matrix):
1022 |             self._sparse=True
1023 |         else :
1024 |              self._sparse=False
1025 |              if len(X.shape)==1:
1026 |                 X=X.reshape((X.shape[0],1))
1027 |                 
1028 |         
1029 |         if type(sample_weight) is not type(None):
1030 |             sample_weight = check_array(sample_weight, ensure_2d=False)
1031 |             check_consistent_length(y, sample_weight)
1032 | 
1033 |         # Remap output
1034 |         self._n_samples, self.n_features_ = X.shape
1035 |         self._validate_y(y)
1036 |         
1037 |         
1038 | 
1039 |         if  isinstance(self.folds, int)   :
1040 |              indices=KFold( n_splits=self.folds,shuffle=True, random_state=self.random_state).split(y) 
1041 |              
1042 |         else :
1043 |             indices=self.folds
1044 |             
1045 |         self._level_dims =[]  
1046 |              
1047 |         previous_input=None #holds previous data for restackng 
1048 |         current_input=X
1049 |         
1050 |         self.estimators_=[]
1051 |         ##start the level training 
1052 |         for level in range (len(self.models)):
1053 |             start_level_time = time.time()
1054 |             
1055 |             if self.verbose>0:
1056 |                 print ("====================== Start of Level %d ======================" % (level))            
1057 |             
1058 |             if not type(previous_input) is type(None) and self.restacking:
1059 |                 if self._sparse:
1060 |                     
1061 |                     current_input=csr_matrix(hstack( [csr_matrix(previous_input), csr_matrix(current_input)]  ))
1062 |                 else :
1063 |                     
1064 |                     current_input=np.column_stack((previous_input,current_input)  )
1065 |                     
1066 |             if self.verbose>0:
1067 |                 print ("Input Dimensionality %d at Level %d " % (current_input.shape[1], level)) 
1068 |                 
1069 |             this_level_models=self.models[level] 
1070 |             
1071 |             if self.verbose>0:
1072 |                 print ("%d models included in Level %d " % (len(this_level_models), level)) 
1073 |             
1074 |             
1075 |             train_oof=None
1076 |             metrics=[0.0 for k in range(len(this_level_models))]
1077 |             
1078 |             
1079 |             indices=[t for t in indices]
1080 | 
1081 |             iter_count=len(indices)
1082 |             #print ("iter_count",iter_count)
1083 |             
1084 |             i=0
1085 |             #print (i)
1086 |             #print (indices)
1087 |             for train_index, test_index in indices:
1088 |                 
1089 |                 #print ( i, i, i)
1090 |                 metrics_i=[0.0 for k in range(len(this_level_models))]
1091 |                 
1092 |                 X_train, X_cv = current_input[train_index], current_input[test_index]
1093 |                 y_train, y_cv = y[train_index], y[test_index]
1094 |                 w_train,w_cv=None,None
1095 |                 if not type(sample_weight) is type (None):
1096 |                     w_train, w_cv = sample_weight[train_index], sample_weight[test_index]
1097 |                 
1098 |         
1099 |                 all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_models)), verbose=0)(
1100 |                     delayed(_parallel_build_estimators)(
1101 |                         clone(this_level_models[d]),
1102 |                         X_train,
1103 |                         y_train,
1104 |                         w_train, d)
1105 |                     for d in range(len(this_level_models)))
1106 |         
1107 |                 # Reduce
1108 |                 this_level_estimators_ = [ [t[0],t[1]] for t in all_results]
1109 |                 
1110 |                 this_level_estimators_=sorted(this_level_estimators_, key=operator.itemgetter(1), reverse=False)
1111 |                 
1112 |                 if self.use_retraining==False:
1113 |                     fitted_estimators=[t[0] for t in  this_level_estimators_]
1114 |                     if i==0:
1115 |                         self.estimators_.append([fitted_estimators]) #add level
1116 |                     else :
1117 |                         self.estimators_[level].append(fitted_estimators)
1118 |                 
1119 |                 #parallel predict
1120 |                 all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_models)), verbose=0)(
1121 |                     delayed(_parallel_predict_proba)(
1122 |                         this_level_estimators_[d][0],
1123 |                         X_cv,d)                
1124 |                     for d in range(len(this_level_models)))
1125 |                 this_level_predictions_ = [ [t[0],t[1]] for t in all_results]
1126 |                 
1127 |                 this_level_predictions_=sorted(this_level_predictions_, key=operator.itemgetter(1), reverse=False) 
1128 |                 predictions_=[t[0] for t in  this_level_predictions_]
1129 |                 
1130 |                 for d in range (len(this_level_models)):
1131 |                     this_model=this_level_models[d]
1132 |                     if hasattr(this_model, 'predict') :
1133 |                         metrics_i[d]=self.metric(y_cv,predictions_[d], sample_weight=w_cv)
1134 |                         metrics[d]+=metrics_i[d]
1135 |                         if self.verbose>0:
1136 |                             print ("Fold %d/%d , model %d , %s===%f " % (i+1, iter_count, d, self.metric_name, metrics_i[d]))
1137 |                     elif  predictions_[d].shape==y_cv.shape  :                       
1138 |                         metrics_i[d]=self.metric(y_cv,predictions_[d], sample_weight=w_cv)
1139 |                         metrics[d]+=metrics_i[d]
1140 |                         if self.verbose>0:
1141 |                             print ("Fold %d/%d , model %d , %s===%f " % (i+1, iter_count, d, self.metric_name, metrics_i[d]))
1142 | 
1143 |                 
1144 |                 #concatenate predictions  
1145 |                 preds_concat_=np.column_stack( predictions_)
1146 |                 #print ("preds_concat_.shape", preds_concat_.shape)
1147 |                 if type(train_oof) is type(None):
1148 |                     train_oof=np.zeros ( (current_input.shape[0], preds_concat_.shape[1]))
1149 |                     self._level_dims.append(preds_concat_.shape[1])
1150 | 
1151 |                 
1152 |                 if self._level_dims[level]!=preds_concat_.shape[1]:
1153 |                     raise Exception ("Output dimensionality among folds is not consistent as %d!=%d " % ( self._level_dims[level],preds_concat_.shape[1]))
1154 |                 train_oof[test_index] = preds_concat_
1155 |                 if self.verbose>0:
1156 |                     print ("=========== end of fold %i in level %d ===========" %(i+1,level))
1157 |                 i+=1
1158 |                 
1159 |             metrics=np.array(metrics)
1160 |             metrics/=float(iter_count)
1161 |             
1162 |             if self.verbose>0:
1163 |                 for d in range(len(this_level_models)):
1164 |                     this_model=this_level_models[d]
1165 |                     if hasattr(this_model, 'predict_proba') :
1166 |                          print ("Level %d, model %d , %s===%f " % (level, d, self.metric_name, metrics[d]))
1167 |                     
1168 |                     
1169 |             #done cv
1170 |                         
1171 |             if self.use_retraining:
1172 |                 
1173 |                 all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_models)), verbose=0)(
1174 |                     delayed(_parallel_build_estimators)(
1175 |                         clone(this_level_models[d]),
1176 |                         current_input,
1177 |                         y,
1178 |                         sample_weight, d)
1179 |                     for d in range(len(this_level_models)))              
1180 |                 
1181 |                 
1182 |                 this_level_estimators_ = [ [t[0],t[1]] for t in all_results]
1183 |                 
1184 |                 this_level_estimators_=sorted(this_level_estimators_, key=operator.itemgetter(1), reverse=False)
1185 | 
1186 |                 fitted_estimators=[t[0] for t in  this_level_estimators_]
1187 | 
1188 |                 self.estimators_.append([fitted_estimators]) #add level   
1189 |             
1190 |                 
1191 |             previous_input=current_input
1192 |             current_input=train_oof
1193 |             if self.verbose>0:
1194 |                 print ("Output dimensionality of level %d is %d " % ( level,current_input.shape[1] ))             
1195 |             
1196 |             
1197 |            
1198 |             end_of_level_time=time.time()
1199 |             if self.verbose>0:            
1200 |                 print ("====================== End of Level %d ======================" % (level))  
1201 |                 print (" level %d lasted %f seconds " % (level,end_of_level_time-start_level_time ))
1202 |         
1203 |         end_of_fit_time=time.time()        
1204 |         if self.verbose>0:          
1205 |             
1206 |             print ("====================== End of fit ======================")  
1207 |             print (" fit() lasted %f seconds " % (end_of_fit_time-start_time )) 
1208 |             
1209 |             
1210 |   # fit method that returns all out of fold predictions/outputs for all levels
1211 |   #each ith entry is a stack of oof predictions for the ith level
1212 |           
1213 |   def fit_oof (self, X, y, sample_weight=None):
1214 |         
1215 |         start_time = time.time()
1216 | 
1217 | 
1218 |         # Convert data (X is required to be 2d and indexable)
1219 |         X, y = check_X_y(
1220 |             X, y, ['csr', 'csc'], dtype=None, force_all_finite=False,
1221 |             multi_output=True
1222 |         )
1223 |         
1224 |         if  isinstance(X, list):
1225 |             X=np.array(X)        
1226 |         
1227 |         if isinstance(X, csr_matrix) or isinstance(X, csc_matrix):
1228 |             self._sparse=True
1229 |         else :
1230 |              self._sparse=False
1231 |              if len(X.shape)==1:
1232 |                 X=X.reshape((X.shape[0],1))
1233 |                 
1234 |         
1235 |         if type(sample_weight) is not type(None):
1236 |             sample_weight = check_array(sample_weight, ensure_2d=False)
1237 |             check_consistent_length(y, sample_weight)
1238 | 
1239 |         # Remap output
1240 |         self._n_samples, self.n_features_ = X.shape
1241 |         self._validate_y(y)
1242 |         
1243 |         
1244 |         out_puts=[]
1245 |         
1246 |         if  isinstance(self.folds, int)   :
1247 |              indices=KFold( n_splits=self.folds,shuffle=True, random_state=self.random_state).split(y) 
1248 |              
1249 |         else :
1250 |             indices=self.folds
1251 |             
1252 |         self._level_dims =[]  
1253 |              
1254 |         previous_input=None #holds previous data for restackng 
1255 |         current_input=X
1256 |         
1257 |         self.estimators_=[]
1258 |         ##start the level training 
1259 |         for level in range (len(self.models)):
1260 |             start_level_time = time.time()
1261 |             
1262 |             if self.verbose>0:
1263 |                 print ("====================== Start of Level %d ======================" % (level))            
1264 |             
1265 |             if not type(previous_input) is type(None) and self.restacking:
1266 |                 if self._sparse:
1267 |                     
1268 |                     current_input=csr_matrix(hstack( [csr_matrix(previous_input), csr_matrix(current_input)]  ))
1269 |                 else :
1270 |                     
1271 |                     current_input=np.column_stack((previous_input,current_input)  )
1272 |                     
1273 |             if self.verbose>0:
1274 |                 print ("Input Dimensionality %d at Level %d " % (current_input.shape[1], level)) 
1275 |                 
1276 |             this_level_models=self.models[level] 
1277 |             
1278 |             if self.verbose>0:
1279 |                 print ("%d models included in Level %d " % (len(this_level_models), level)) 
1280 |             
1281 |             
1282 |             train_oof=None
1283 |             metrics=[0.0 for k in range(len(this_level_models))]
1284 |             
1285 |             
1286 |             indices=[t for t in indices]
1287 | 
1288 |             iter_count=len(indices)
1289 |             #print ("iter_count",iter_count)
1290 |             
1291 |             i=0
1292 |             #print (i)
1293 |             #print (indices)
1294 |             for train_index, test_index in indices:
1295 |                 
1296 |                 #print ( i, i, i)
1297 |                 metrics_i=[0.0 for k in range(len(this_level_models))]
1298 |                 
1299 |                 X_train, X_cv = current_input[train_index], current_input[test_index]
1300 |                 y_train, y_cv = y[train_index], y[test_index]
1301 |                 w_train,w_cv=None,None
1302 |                 if not type(sample_weight) is type (None):
1303 |                     w_train, w_cv = sample_weight[train_index], sample_weight[test_index]
1304 |                 
1305 |         
1306 |                 all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_models)), verbose=0)(
1307 |                     delayed(_parallel_build_estimators)(
1308 |                         clone(this_level_models[d]),
1309 |                         X_train,
1310 |                         y_train,
1311 |                         w_train, d)
1312 |                     for d in range(len(this_level_models)))
1313 |         
1314 |                 # Reduce
1315 |                 this_level_estimators_ = [ [t[0],t[1]] for t in all_results]
1316 |                 
1317 |                 this_level_estimators_=sorted(this_level_estimators_, key=operator.itemgetter(1), reverse=False)
1318 |                 
1319 |                 if self.use_retraining==False:
1320 |                     fitted_estimators=[t[0] for t in  this_level_estimators_]
1321 |                     if i==0:
1322 |                         self.estimators_.append([fitted_estimators]) #add level
1323 |                     else :
1324 |                         self.estimators_[level].append(fitted_estimators)
1325 |                 
1326 |                 #parallel predict
1327 |                 all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_models)), verbose=0)(
1328 |                     delayed(_parallel_predict_proba)(
1329 |                         this_level_estimators_[d][0],
1330 |                         X_cv,d)                
1331 |                     for d in range(len(this_level_models)))
1332 |                 this_level_predictions_ = [ [t[0],t[1]] for t in all_results]
1333 |                 
1334 |                 this_level_predictions_=sorted(this_level_predictions_, key=operator.itemgetter(1), reverse=False) 
1335 |                 predictions_=[t[0] for t in  this_level_predictions_]
1336 |                 
1337 |                 for d in range (len(this_level_models)):
1338 |                     this_model=this_level_models[d]
1339 |                     if hasattr(this_model, 'predict') :
1340 |                         metrics_i[d]=self.metric(y_cv,predictions_[d], sample_weight=w_cv)
1341 |                         metrics[d]+=metrics_i[d]
1342 |                         if self.verbose>0:
1343 |                             print ("Fold %d/%d , model %d , %s===%f " % (i+1, iter_count, d, self.metric_name, metrics_i[d]))
1344 |                     elif  predictions_[d].shape==y_cv.shape  :                       
1345 |                         metrics_i[d]=self.metric(y_cv,predictions_[d], sample_weight=w_cv)
1346 |                         metrics[d]+=metrics_i[d]
1347 |                         if self.verbose>0:
1348 |                             print ("Fold %d/%d , model %d , %s===%f " % (i+1, iter_count, d, self.metric_name, metrics_i[d]))
1349 | 
1350 |                 
1351 |                 #concatenate predictions  
1352 |                 preds_concat_=np.column_stack( predictions_)
1353 |                 #print ("preds_concat_.shape", preds_concat_.shape)
1354 |                 if type(train_oof) is type(None):
1355 |                     train_oof=np.zeros ( (current_input.shape[0], preds_concat_.shape[1]))
1356 |                     self._level_dims.append(preds_concat_.shape[1])
1357 | 
1358 |                 
1359 |                 if self._level_dims[level]!=preds_concat_.shape[1]:
1360 |                     raise Exception ("Output dimensionality among folds is not consistent as %d!=%d " % ( self._level_dims[level],preds_concat_.shape[1]))
1361 |                 train_oof[test_index] = preds_concat_
1362 |                 if self.verbose>0:
1363 |                     print ("=========== end of fold %i in level %d ===========" %(i+1,level))
1364 |                 i+=1
1365 |                 
1366 |             metrics=np.array(metrics)
1367 |             metrics/=float(iter_count)
1368 |             
1369 |             if self.verbose>0:
1370 |                 for d in range(len(this_level_models)):
1371 |                     this_model=this_level_models[d]
1372 |                     if hasattr(this_model, 'predict_proba') :
1373 |                          print ("Level %d, model %d , %s===%f " % (level, d, self.metric_name, metrics[d]))
1374 |                     
1375 |                     
1376 |             #done cv
1377 |                         
1378 |             if self.use_retraining:
1379 |                 
1380 |                 all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_models)), verbose=0)(
1381 |                     delayed(_parallel_build_estimators)(
1382 |                         clone(this_level_models[d]),
1383 |                         current_input,
1384 |                         y,
1385 |                         sample_weight, d)
1386 |                     for d in range(len(this_level_models)))              
1387 |                 
1388 |                 
1389 |                 this_level_estimators_ = [ [t[0],t[1]] for t in all_results]
1390 |                 
1391 |                 this_level_estimators_=sorted(this_level_estimators_, key=operator.itemgetter(1), reverse=False)
1392 | 
1393 |                 fitted_estimators=[t[0] for t in  this_level_estimators_]
1394 | 
1395 |                 self.estimators_.append([fitted_estimators]) #add level   
1396 |             
1397 |             out_puts.append(train_oof)  
1398 |             
1399 |             previous_input=current_input
1400 |             current_input=train_oof
1401 |             if self.verbose>0:
1402 |                 print ("Output dimensionality of level %d is %d " % ( level,current_input.shape[1] ))             
1403 |             
1404 |             
1405 |            
1406 |             end_of_level_time=time.time()
1407 |             if self.verbose>0:            
1408 |                 print ("====================== End of Level %d ======================" % (level))  
1409 |                 print (" level %d lasted %f seconds " % (level,end_of_level_time-start_level_time ))
1410 |         
1411 |         end_of_fit_time=time.time()        
1412 |         if self.verbose>0:          
1413 |             
1414 |             print ("====================== End of fit ======================")  
1415 |             print (" fit() lasted %f seconds " % (end_of_fit_time-start_time )) 
1416 |             
1417 |         return out_puts
1418 |             
1419 |             
1420 |   def predict (self, X):
1421 |         
1422 | 
1423 |         if type(self.n_features_) is type(None) :
1424 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")             
1425 |         if type(self.estimators_) is type(None) :
1426 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ") 
1427 |         if type(self._n_samples) is type(None) :
1428 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")             
1429 |         if type(self._sparse) is type(None) :
1430 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")                      
1431 |         if type(self._level_dims) is type(None) :
1432 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")   
1433 |     
1434 |         if  isinstance(X, list):
1435 |             X=np.array(X)        
1436 |         
1437 |         predict_sparse=None
1438 |         if isinstance(X, csr_matrix) or isinstance(X, csc_matrix):
1439 |             predict_sparse=True
1440 |         else :
1441 |             predict_sparse=False
1442 |             if len(X.shape)==1:
1443 |                 X=X.reshape((X.shape[0],1))    
1444 |                 
1445 |         if X.shape[1]!=self.n_features_:
1446 |             raise Exception("Input dimensionality of %d is not the same as the trained one with %d " % ( X.shape[1], self.n_features_))
1447 | 
1448 | 
1449 |         # Remap output
1450 |         predict_sparse_samples, predict_sparse_n_features_ = X.shape        
1451 |         
1452 |         previous_input=None #holds previous data for restackng 
1453 |         current_input=X
1454 |         
1455 |         ##start the level training 
1456 |         
1457 |         for level in range (len(self.estimators_)):
1458 |             #start_level_time = time.time()
1459 |             
1460 |             if self.verbose>0:
1461 |                 print ("====================== Start of Level %d ======================" % (level))            
1462 |             
1463 |             if not type(previous_input) is type(None) and self.restacking:
1464 |                 if predict_sparse:
1465 |                     
1466 |                     current_input=csr_matrix(hstack( [csr_matrix(previous_input), csr_matrix(current_input)]  ))
1467 |                 else :
1468 |                     
1469 |                     current_input=np.column_stack((previous_input,current_input)  )        
1470 |         
1471 |             this_level_estimators=self.estimators_[level] 
1472 |             
1473 |             if self.verbose>0:
1474 |                 print ("%d estimators included in Level %d " % (len(this_level_estimators), level)) 
1475 |             
1476 |             
1477 | 
1478 |             all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_estimators[0])), verbose=0)(
1479 |                 delayed(_parallel_predict_proba_scoring)(
1480 |                     [this_level_estimators[s][d] for s in range (len(this_level_estimators))],
1481 |                     current_input,d)                
1482 |                 for d in range(len(this_level_estimators[0])))
1483 |                 
1484 |             this_level_predictions_ = [ [t[0],t[1]] for t in all_results]
1485 |             
1486 |             this_level_predictions_=sorted(this_level_predictions_, key=operator.itemgetter(1), reverse=False) 
1487 |             predictions_=[t[0] for t in  this_level_predictions_]
1488 |             
1489 | 
1490 |             #concatenate predictions  
1491 |             test_pred=np.column_stack( predictions_)  
1492 |             if test_pred.shape[1]!= self._level_dims[level]:
1493 |                 raise Exception ("Output dimensionality for level %d with %d is not the same as the one during training with %d " %(level,test_pred.shape[1], self._level_dims[level] ))
1494 |             
1495 |             previous_input=current_input
1496 |             current_input=test_pred       
1497 |         
1498 |            
1499 |         return test_pred
1500 |             
1501 |   #predicts output up to the specified level
1502 |           
1503 |   def predict_up_to(self, X, lev=None):
1504 |         
1505 |         if type(self.n_features_) is type(None) :
1506 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")             
1507 |         if type(self.estimators_) is type(None) :
1508 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ") 
1509 |         if type(self._n_samples) is type(None) :
1510 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")             
1511 |         if type(self._sparse) is type(None) :
1512 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")                         
1513 |         if type(self._level_dims) is type(None) :
1514 |             raise Exception ("fit() must run successfuly to be able to execute the current method. ")   
1515 |     
1516 |         if  isinstance(X, list):
1517 |             X=np.array(X)        
1518 |         
1519 |         predict_sparse=None
1520 |         if isinstance(X, csr_matrix) or isinstance(X, csc_matrix):
1521 |             predict_sparse=True
1522 |         else :
1523 |             predict_sparse=False
1524 |             if len(X.shape)==1:
1525 |                 X=X.reshape((X.shape[0],1))    
1526 |                 
1527 |         if X.shape[1]!=self.n_features_:
1528 |             raise Exception("Input dimensionality of %d is not the same as the trained one with %d " % ( X.shape[1], self.n_features_))
1529 | 
1530 | 
1531 |         # Remap output
1532 |         predict_sparse_samples, predict_sparse_n_features_ = X.shape        
1533 |         
1534 |         previous_input=None #holds previous data for restackng 
1535 |         current_input=X
1536 |         
1537 |         if type(lev) is type(None):
1538 |             lev=len(self.estimators_)
1539 |         
1540 |         if not isinstance(lev, int):
1541 |             raise Exception("lev has to be int") 
1542 |             
1543 |         lev=min(lev,len(self.estimators_) )
1544 |         out_puts=[]
1545 |         ##start the level training 
1546 |         for level in range (lev):
1547 |             #start_level_time = time.time()
1548 |             
1549 |             if self.verbose>0:
1550 |                 print ("====================== Start of Level %d ======================" % (level))            
1551 |             
1552 |             if not type(previous_input) is type(None) and self.restacking:
1553 |                 if predict_sparse:
1554 |                     
1555 |                     current_input=csr_matrix(hstack( [csr_matrix(previous_input), csr_matrix(current_input)]  ))
1556 |                 else :
1557 |                     
1558 |                     current_input=np.column_stack((previous_input,current_input)  )        
1559 |         
1560 |             this_level_estimators=self.estimators_[level] 
1561 |             
1562 |             if self.verbose>0:
1563 |                 print ("%d estimators included in Level %d " % (len(this_level_estimators), level)) 
1564 |             
1565 |             
1566 | 
1567 |             all_results = Parallel(n_jobs=min(self.n_jobs,len(this_level_estimators[0])), verbose=0)(
1568 |                 delayed(_parallel_predict_proba_scoring)(
1569 |                     [this_level_estimators[s][d] for s in range (len(this_level_estimators))],
1570 |                     current_input,d)                
1571 |                 for d in range(len(this_level_estimators[0])))
1572 |                 
1573 |             this_level_predictions_ = [ [t[0],t[1]] for t in all_results]
1574 |             
1575 |             this_level_predictions_=sorted(this_level_predictions_, key=operator.itemgetter(1), reverse=False) 
1576 |             predictions_=[t[0] for t in  this_level_predictions_]
1577 |             
1578 | 
1579 |             #concatenate predictions  
1580 |             test_pred=np.column_stack( predictions_)
1581 |             print (test_pred.shape)
1582 |             if test_pred.shape[1]!= self._level_dims[level]:
1583 |                 raise Exception ("Output dimensionality for level %d with %d is not the same as the one during training with %d " %(level,test_pred.shape[1], self._level_dims[level] ))
1584 |             
1585 |             out_puts.append(test_pred)
1586 |             previous_input=current_input
1587 |             current_input=test_pred       
1588 |         
1589 |             
1590 |         return out_puts
1591 |                         
1592 |             
1593 |         
1594 |         
1595 |   def _validate_y(self, y):
1596 |         if len(y.shape) == 1 or y.shape[1] == 1:
1597 |             return column_or_1d(y, warn=True)
1598 |         else:
1599 |             return y          
1600 |     
1601 | 


--------------------------------------------------------------------------------
/pystacknet/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/pystacknet/af571e0b5517470563859a15eafb87b594e766eb/pystacknet/test/__init__.py


--------------------------------------------------------------------------------
/pystacknet/test/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/pystacknet/af571e0b5517470563859a15eafb87b594e766eb/pystacknet/test/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/pystacknet/test/__pycache__/test_pystacknet.cpython-36-PYTEST.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/h2oai/pystacknet/af571e0b5517470563859a15eafb87b594e766eb/pystacknet/test/__pycache__/test_pystacknet.cpython-36-PYTEST.pyc


--------------------------------------------------------------------------------
/pystacknet/test/test_amazon.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sun Sep  2 23:06:17 2018
  4 | 
  5 | @author: mimar
  6 | 
  7 | uses the dataset from https://www.kaggle.com/c/amazon-employee-access-challenge/data
  8 | 
  9 | """
 10 | import numpy as np
 11 | from pystacknet.pystacknet import StackNetClassifier
 12 | from sklearn.ensemble import RandomForestClassifier
 13 | from sklearn.linear_model import LogisticRegression, Ridge
 14 | from sklearn import preprocessing
 15 | from lightgbm import LGBMClassifier
 16 | from xgboost import XGBClassifier
 17 | 
 18 | def load_data(pat, filename, use_labels=True):
 19 |     """
 20 |     Load data from CSV files and return them as numpy arrays
 21 |     The use_labels parameter indicates whether one should
 22 |     read the first column (containing class labels). If false,
 23 |     return all 0s. 
 24 |     """
 25 | 
 26 |     # load column 1 to 8 (ignore last one)
 27 |     data = np.loadtxt(open(pat+ filename), delimiter=',',
 28 |                       usecols=range(1, 8), skiprows=1)
 29 |     if use_labels:
 30 |         labels = np.loadtxt(open(pat + filename), delimiter=',',
 31 |                             usecols=[0], skiprows=1)
 32 |     else:
 33 |         labels = np.zeros(data.shape[0])
 34 |     return labels, data
 35 | 
 36 | def save_results(predictions, filename):
 37 |     """Given a vector of predictions, save results in CSV format."""
 38 |     with open(filename, 'w') as f:
 39 |         f.write("id,ACTION\n")
 40 |         for i, pred in enumerate(predictions):
 41 |             f.write("%d,%f\n" % (i + 1, pred))
 42 | 
 43 | 
 44 | 
 45 | def test_pystacknet():
 46 |     
 47 |     
 48 |     path=""
 49 |     
 50 |     
 51 | 
 52 |     y, X = load_data(path, 'train.csv')
 53 |     y_test, X_test = load_data(path, 'test.csv', use_labels=False)
 54 | 
 55 |     # === one-hot encoding === #
 56 |     # we want to encode the category IDs encountered both in
 57 |     # the training and the test set, so we fit the encoder on both
 58 |     encoder = preprocessing.OneHotEncoder()
 59 |     encoder.fit(np.vstack((X, X_test)))
 60 |     X = encoder.transform(X)  # Returns a sparse matrix (see numpy.sparse)
 61 |     X_test = encoder.transform(X_test)
 62 | 
 63 |     
 64 |     
 65 |     #####################################################################################
 66 |     ###############################  CLASSIFICATION #####################################        
 67 |     #####################################################################################
 68 |     
 69 |     
 70 |     models=[ 
 71 |             
 72 |             [LogisticRegression(C=1,  random_state=1),
 73 |              LogisticRegression(C=3,  random_state=1),
 74 |              Ridge(alpha=0.1, random_state=1),
 75 |              LogisticRegression(penalty="l1", C=1, random_state=1),
 76 |              XGBClassifier(max_depth=5,learning_rate=0.1, n_estimators=300, objective="binary:logistic", n_jobs=1, booster="gbtree", random_state=1, colsample_bytree=0.4 ),
 77 |              XGBClassifier(max_depth=5,learning_rate=0.3, reg_lambda=0.1, n_estimators=300, objective="binary:logistic", n_jobs=1, booster="gblinear", random_state=1, colsample_bytree=0.4 ),
 78 |              XGBClassifier(max_depth=5,learning_rate=0.1, n_estimators=300, objective="rank:pairwise", n_jobs=1, booster="gbtree", random_state=1, colsample_bytree=0.4 ),
 79 |              LGBMClassifier(boosting_type='gbdt', num_leaves=40, max_depth=-1, learning_rate=0.01, n_estimators=1000, subsample_for_bin=1000, objective="xentropy", min_split_gain=0.0, min_child_weight=0.01, min_child_samples=10, subsample=0.9, subsample_freq=1, colsample_bytree=0.5, reg_alpha=0.0, reg_lambda=0.0, random_state=1, n_jobs=1)             
 80 |              ],
 81 |             
 82 |             [RandomForestClassifier (n_estimators=300, criterion="entropy", max_depth=6, max_features=0.5, random_state=1)]
 83 |             
 84 |             
 85 |             ]
 86 |     
 87 |     
 88 |     ##################  proba metric ###############################    
 89 |     
 90 |     model=StackNetClassifier(models, metric="auc", folds=4, restacking=False,
 91 |                              use_retraining=True, use_proba=True, random_state=12345,
 92 |                              n_jobs=1, verbose=1)
 93 |     
 94 |     model.fit(X,y )
 95 |     preds=model.predict_proba(X_test)[:,1]
 96 |     
 97 |     save_results(preds,path+ "pystacknet_pred.csv")
 98 |     #print ("auc test 2 , auc %f " % (roc_auc_score(y_test,preds)))   
 99 |     
100 | 
101 |     
102 | if __name__ == '__main__':
103 |     test_pystacknet()


--------------------------------------------------------------------------------
/pystacknet/test/test_pystacknet.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sun Sep  2 23:06:17 2018
  4 | 
  5 | @author: mimar
  6 | """
  7 | import numpy as np
  8 | from pystacknet.pystacknet import StackNetClassifier,StackNetRegressor
  9 | from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor, GradientBoostingClassifier,GradientBoostingRegressor
 10 | from sklearn.linear_model import LogisticRegression, Ridge
 11 | from sklearn.metrics import roc_auc_score, log_loss
 12 | from scipy.sparse import csr_matrix
 13 | from sklearn.cross_validation import StratifiedKFold
 14 | from sklearn.decomposition import PCA
 15 | from pystacknet.metrics import rmse,mae
 16 | 
 17 | 
 18 | X=[    [20000,2,2,1,24,-2,2,-1,-1,-2,-2,3913,3102,689,0,0,0,0,689,0,0,0,0],
 19 | [120000,2,2,2,26,-1,2,0,0,0,2,2682,1725,2682,3272,3455,3261,0,1000,1000,1000,0,2000],
 20 | [90000,2,2,2,34,0,0,0,0,0,0,29239,14027,13559,14331,14948,15549,1518,1500,1000,1000,1000,5000],
 21 | [50000,2,2,1,37,1,0,0,0,0,0,46990,48233,49291,28314,28959,29547,2000,2019,1200,1100,1069,1000],
 22 | [50000,1,2,1,57,2,0,-1,0,0,0,8617,5670,35835,20940,19146,19131,2000,36681,10000,9000,689,679],
 23 | [50000,1,1,2,37,3,0,0,0,0,0,64400,57069,57608,19394,19619,20024,2500,1815,657,1000,1000,800],
 24 | [500000,1,1,2,29,4,0,0,0,0,0,367965,412023,445007,542653,483003,473944,55000,40000,38000,20239,13750,13770],
 25 | [100000,2,2,2,23,5,-1,-1,0,0,-1,11876,380,601,221,-159,567,380,601,0,581,1687,1542],
 26 | [140000,2,3,1,28,6,0,2,0,0,0,11285,14096,12108,12211,11793,3719,3329,0,432,1000,1000,1000],
 27 | [20000,1,3,2,35,7,-2,-2,-2,-1,-1,0,0,0,0,13007,13912,0,0,0,13007,1122,0],
 28 | [200000,2,3,2,34,8,0,2,0,0,-1,11073,9787,5535,2513,1828,3731,2306,12,50,300,3738,66],
 29 | [260000,2,1,2,51,-1,-1,-1,-1,-1,2,12261,21670,9966,8517,22287,13668,21818,9966,8583,22301,0,3640],
 30 | [630000,2,2,2,41,-1,0,-1,-1,-1,-1,12137,6500,6500,6500,6500,2870,1000,6500,6500,6500,2870,0],
 31 | [70000,1,2,2,30,1,2,2,0,0,2,65802,67369,65701,66782,36137,36894,3200,0,3000,3000,1500,0],
 32 | [250000,1,1,2,29,0,0,0,0,0,0,70887,67060,63561,59696,56875,55512,3000,3000,3000,3000,3000,3000],
 33 | [50000,2,3,3,23,1,2,0,0,0,0,50614,29173,28116,28771,29531,30211,0,1500,1100,1200,1300,1100],
 34 | [20000,1,1,2,24,0,0,2,2,2,2,15376,18010,17428,18338,17905,19104,3200,0,1500,0,1650,0],
 35 | [320000,1,1,1,49,0,0,0,-1,-1,-1,253286,246536,194663,70074,5856,195599,10358,10000,75940,20000,195599,50000],
 36 | [360000,2,1,1,49,1,-2,-2,-2,-2,-2,0,0,0,0,0,0,0,0,0,0,0,0],
 37 | [180000,2,1,2,29,1,-2,-2,-2,-2,-2,0,0,0,0,0,0,0,0,0,0,0,0],
 38 | [130000,2,3,2,39,0,0,0,0,0,-1,38358,27688,24489,20616,11802,930,3000,1537,1000,2000,930,33764],
 39 | [120000,2,2,1,39,-1,-1,-1,-1,-1,-1,316,316,316,0,632,316,316,316,0,632,316,0],
 40 | [70000,2,2,2,26,2,0,0,2,2,2,41087,42445,45020,44006,46905,46012,2007,3582,0,3601,0,1820],
 41 | [450000,2,1,1,40,-2,-2,-2,-2,-2,-2,5512,19420,1473,560,0,0,19428,1473,560,0,0,1128],
 42 | [90000,1,1,2,23,0,0,0,-1,0,0,4744,7070,0,5398,6360,8292,5757,0,5398,1200,2045,2000],
 43 | [50000,1,3,2,23,0,0,0,0,0,0,47620,41810,36023,28967,29829,30046,1973,1426,1001,1432,1062,997],
 44 | [60000,1,1,2,27,1,-2,-1,-1,-1,-1,-109,-425,259,-57,127,-189,0,1000,0,500,0,1000],
 45 | [50000,2,3,2,30,0,0,0,0,0,0,22541,16138,17163,17878,18931,19617,1300,1300,1000,1500,1000,1012],
 46 | [50000,2,3,1,47,-1,-1,-1,-1,-1,-1,650,3415,3416,2040,30430,257,3415,3421,2044,30430,257,0],
 47 | [50000,1,1,2,26,0,0,0,0,0,0,15329,16575,17496,17907,18375,11400,1500,1500,1000,1000,1600,0],
 48 | [230000,2,1,2,27,-1,-1,-1,-1,-1,-1,16646,17265,13266,15339,14307,36923,17270,13281,15339,14307,37292,0],
 49 | [50000,1,2,2,33,2,0,0,0,0,0,30518,29618,22102,22734,23217,23680,1718,1500,1000,1000,1000,716],
 50 | [100000,1,1,2,32,0,0,0,0,0,0,93036,84071,82880,80958,78703,75589,3023,3511,3302,3204,3200,2504],
 51 | [500000,2,2,1,54,-2,-2,-2,-2,-2,-2,10929,4152,22722,7521,71439,8981,4152,22827,7521,71439,981,51582],
 52 | [500000,1,1,1,58,-2,-2,-2,-2,-2,-2,13709,5006,31130,3180,0,5293,5006,31178,3180,0,5293,768],
 53 | [160000,1,1,2,30,-1,-1,-2,-2,-2,-1,30265,-131,-527,-923,-1488,-1884,131,396,396,565,792,0],
 54 | [280000,1,2,1,40,0,0,0,0,0,0,186503,181328,180422,170410,173901,177413,8026,8060,6300,6400,6400,6737],
 55 | [60000,2,2,2,22,0,0,0,0,0,-1,15054,9806,11068,6026,-28335,18660,1500,1518,2043,0,47671,617],
 56 | [50000,1,1,2,25,1,-1,-1,-2,-2,-2,0,780,0,0,0,0,780,0,0,0,0,0],
 57 | [280000,1,1,2,31,-1,-1,2,-1,0,-1,498,9075,4641,9976,17976,9477,9075,0,9976,8000,9525,781],
 58 | [360000,1,1,2,33,0,0,0,0,0,0,218668,221296,206895,628699,195969,179224,10000,7000,6000,188840,28000,4000],
 59 | [70000,2,1,2,25,0,0,0,0,0,0,67521,66999,63949,63699,64718,65970,3000,4500,4042,2500,2800,2500],
 60 | [10000,1,2,2,22,0,0,0,0,0,0,1877,3184,6003,3576,3670,4451,1500,2927,1000,300,1000,500],
 61 | [140000,2,2,1,37,0,0,0,0,0,0,59504,61544,62925,64280,67079,69802,3000,3000,3000,4000,4000,3000],
 62 | [40000,2,1,2,30,0,0,0,2,0,0,18927,21295,25921,25209,26636,29197,3000,5000,0,2000,3000,0],
 63 | [210000,1,1,2,29,-2,-2,-2,-2,-2,-2,0,0,0,0,0,0,0,0,0,0,0,0],
 64 | [20000,2,1,2,22,0,0,2,-1,0,0,14028,16484,15800,16341,16675,0,3000,0,16741,334,0,0],
 65 | [150000,2,5,2,46,0,0,-1,0,0,-2,4463,3034,1170,1170,0,0,1013,1170,0,0,0,0],
 66 | [380000,1,2,2,32,-1,-1,-1,-1,-1,-1,22401,21540,15134,32018,11849,11873,21540,15138,24677,11851,11875,8251],
 67 | [20000,1,1,2,24,0,0,0,0,0,0,17447,18479,19476,19865,20480,20063,1318,1315,704,928,912,1069],
 68 | [70000,1,3,2,42,1,2,2,2,2,0,37042,36171,38355,39423,38659,39362,0,3100,2000,0,1500,1500],
 69 | [100000,2,3,3,43,0,0,0,0,0,0,61559,51163,43824,39619,35762,33258,2000,1606,1500,2000,1500,1000],
 70 | [310000,2,2,1,49,-2,-2,-2,-2,-2,-2,13465,7867,7600,11185,3544,464,7875,7600,11185,3544,464,0],
 71 | [180000,2,1,2,25,1,2,0,0,0,0,41402,41742,42758,43510,44420,45319,1300,2010,1762,1762,1790,1622],
 72 | [150000,2,1,2,29,2,0,0,0,0,0,46224,34993,31434,26518,21042,16540,1600,1718,1049,1500,2000,5000],
 73 | [500000,2,1,1,45,-2,-2,-2,-2,-2,-2,1905,3640,162,0,151,2530,3640,162,0,151,2530,0],
 74 | [180000,2,3,1,34,0,0,0,-1,-1,-1,16386,15793,8441,7142,-679,8321,8500,1500,7500,679,9000,2000],
 75 | [180000,2,2,1,34,0,0,0,0,0,0,175886,173440,172308,168608,132202,129918,8083,7296,5253,4814,4816,3800],
 76 | [200000,2,1,2,34,-1,3,2,2,2,2,1587,1098,782,1166,700,1414,0,0,700,0,1200,0],
 77 | [400000,2,2,1,29,0,0,0,0,0,0,400134,398857,404205,360199,356656,364089,17000,15029,30000,12000,12000,23000],
 78 | [500000,2,3,1,28,0,0,0,0,0,0,22848,23638,18878,14937,13827,15571,1516,1300,1000,1000,2000,2000],
 79 | [70000,1,2,1,39,0,0,0,0,0,-1,70800,72060,69938,16518,14096,830,4025,2095,1000,2000,3000,0],
 80 | [50000,1,1,2,29,2,2,2,2,2,2,24987,24300,26591,25865,27667,28264,0,2700,0,2225,1200,0],
 81 | [50000,2,2,1,46,0,0,0,-2,-2,-2,28718,29166,0,0,0,0,1000,0,0,0,0,0],
 82 | [130000,2,2,1,51,-1,-1,-2,-2,-1,-1,99,0,0,0,2353,0,0,0,0,2353,0,0],
 83 | [200000,1,1,1,57,-2,-2,-2,-1,2,2,152519,148751,144076,8174,8198,7918,0,0,8222,300,0,1000],
 84 | [10000,1,2,1,56,2,2,2,0,0,0,2097,4193,3978,4062,4196,4326,2300,0,150,200,200,160],
 85 | [210000,2,1,2,30,2,-1,-1,-1,-1,-1,300,300,1159,2280,300,4250,300,1159,2280,300,4250,909],
 86 | [130000,2,3,2,29,1,-2,-2,-1,2,-1,-190,-9850,-9850,10311,10161,7319,0,0,20161,0,7319,13899],
 87 | [20000,1,5,2,22,2,0,0,0,0,0,18565,17204,17285,18085,11205,5982,0,1200,1000,500,1000,0],
 88 | [80000,1,1,2,31,-1,-1,-1,-1,-1,-1,780,0,390,390,390,390,0,390,390,390,390,390],
 89 | [320000,1,2,2,29,2,2,2,2,2,2,58267,59246,60184,58622,62307,63526,2500,2500,0,4800,2400,1600],
 90 | [200000,2,2,1,32,-1,-1,-1,-1,2,-1,9076,5787,-684,5247,3848,3151,5818,15,9102,17,3165,1395],
 91 | [290000,2,1,2,37,1,-2,-1,-1,-1,-1,0,0,3155,0,2359,0,0,3155,0,2359,0,0],
 92 | [340000,1,1,2,32,-1,-1,-1,-1,-1,-1,3048,5550,23337,4291,80153,25820,5713,23453,4314,80552,25949,2016],
 93 | [20000,1,2,2,24,0,0,2,0,0,0,14619,17216,16642,16976,17332,18543,2850,0,610,630,1500,0],
 94 | [50000,1,3,2,25,-1,0,0,0,0,0,42838,37225,36087,9636,9590,10030,1759,1779,320,500,1000,1000],
 95 | [300000,2,1,1,45,-1,-1,-1,-1,-1,-1,291,291,291,291,291,291,291,291,291,291,291,291],
 96 | [30000,2,2,2,22,0,0,0,0,0,0,28387,29612,30326,28004,26446,6411,1686,1400,560,3000,1765,0],
 97 | [240000,2,2,2,44,1,-2,-2,-2,-2,-2,0,0,0,0,0,0,0,0,0,0,0,0],
 98 | [470000,2,3,3,33,0,0,0,0,0,0,165254,157784,162702,69923,29271,29889,6400,7566,3000,960,1000,3000],
 99 | [360000,2,1,2,26,0,0,0,0,0,-1,23411,27796,30400,33100,180000,196,4796,3400,3100,146900,196,2963],
100 | [60000,1,3,2,30,0,0,0,0,0,0,26324,27471,28108,21993,19899,19771,1576,1213,648,768,1140,0],
101 | [400000,2,2,1,44,0,0,2,0,0,0,131595,139060,126819,104430,104990,94058,10700,3,3050,3000,3200,2800],
102 | [50000,2,3,2,49,0,0,0,0,0,0,48909,47863,21489,20414,19342,19482,1676,1302,700,699,849,826],
103 | [160000,1,2,2,33,0,0,0,0,0,0,130028,107808,71934,118418,118407,120418,4400,3547,80000,4500,4800,4500],
104 | [360000,2,1,1,45,-1,-1,2,0,-1,-1,390,1170,780,390,390,390,1170,0,0,390,390,390],
105 | [160000,2,2,2,32,0,0,0,0,0,-1,3826,4751,6604,8604,7072,766,1147,2000,2000,0,766,2303],
106 | [130000,2,1,1,35,0,0,0,-1,-1,-1,81313,117866,17740,1330,7095,1190,40000,5000,1330,7095,1190,2090],
107 | [20000,1,3,2,44,2,2,0,0,0,2,8583,8303,9651,10488,12314,11970,0,1651,1000,2000,0,1500],
108 | [200000,1,1,1,53,2,2,2,2,2,2,138180,140774,142460,144098,147124,149531,6300,5500,5500,5500,5000,5000],
109 | [280000,2,1,2,39,-1,-1,-1,0,0,-2,7524,0,3968,3868,0,0,0,3968,0,0,0,0],
110 | [100000,2,1,2,27,-2,-2,-2,-2,-2,-2,-2000,5555,0,0,0,0,7555,0,0,0,0,0],
111 | [160000,2,2,1,37,-1,-1,-1,-1,-1,-2,880,1602,840,840,0,0,1602,840,840,0,0,7736],
112 | [60000,2,2,2,23,0,0,0,0,0,0,45648,46850,47214,19595,19209,19323,1937,1301,682,690,816,835],
113 | [90000,1,2,2,35,0,0,0,0,0,0,83725,85996,87653,35565,30942,30835,3621,3597,1179,1112,1104,1143],
114 | [360000,1,1,1,43,-1,-1,-1,-1,-1,0,3967,8322,3394,6451,26370,9956,8339,3394,12902,27000,0,68978],
115 | [150000,1,1,2,27,0,0,0,0,0,0,86009,86108,89006,89775,87725,40788,4031,10006,3266,4040,1698,800],
116 | [50000,2,3,1,22,0,0,0,0,0,0,18722,18160,16997,13150,8866,7899,1411,1194,379,281,321,197],
117 | [20000,1,2,1,38,0,0,0,0,0,-1,17973,19367,19559,18240,17928,150,1699,1460,626,1750,150,0],
118 | [140000,1,1,2,32,-2,-2,-2,-2,-2,-2,672,10212,850,415,100,1430,10212,850,415,100,1430,0],
119 | [380000,2,1,2,30,-2,-2,-1,0,0,0,-81,-303,32475,32891,33564,34056,223,33178,1171,1197,1250,5000],
120 | [480000,1,1,1,63,0,0,0,2,2,0,422069,431342,479432,487066,471145,469961,16078,55693,17000,0,18000,24200],
121 | [50000,2,3,2,22,0,0,0,0,0,0,44698,42254,38347,32496,23477,24094,1767,1362,1002,840,995,904],
122 | [60000,2,2,2,26,2,2,2,2,2,0,56685,55208,59175,60218,55447,55305,0,5000,2511,6,3000,3000],
123 | [70000,2,2,2,24,-1,-1,-2,-2,-2,-1,5580,0,0,0,0,26529,0,0,0,0,26529,2000],
124 | [80000,2,2,1,36,-1,-1,-1,-1,-1,-1,6108,2861,3277,3319,1150,1150,2861,3279,3319,1150,1150,1035],
125 | [350000,1,1,2,52,-1,-1,-1,-1,-1,-1,713,2272,722,867,1150,5263,2272,722,867,1150,5263,5011],
126 | [130000,1,2,2,38,0,0,0,-1,-1,-1,171438,178382,39940,120483,44127,126568,10908,0,133657,4566,133841,4796],
127 | [360000,1,2,1,35,1,-2,-2,-2,-2,-2,-103,-103,-103,-103,-103,-103,0,0,0,0,0,0],
128 | [330000,2,1,1,31,0,0,2,0,0,0,105879,108431,105594,105896,106491,107289,9260,0,3593,4100,15794,0],
129 | [50000,1,3,1,47,0,0,2,0,0,0,13244,14722,15181,15928,16671,17393,2000,1000,1000,1000,1000,1000],
130 | [280000,1,2,1,41,2,2,2,2,2,3,135673,138532,134813,144401,152174,149415,6500,0,14254,14850,0,5000],
131 | [100000,2,1,2,24,0,0,0,0,0,0,52128,52692,54477,56076,60100,59713,2000,2677,3076,5080,3000,2033],
132 | [50000,1,2,2,41,0,0,0,0,0,0,19015,19294,20259,20274,20311,19957,1340,1305,700,718,724,684],
133 | [30000,1,1,2,24,-1,2,0,0,3,2,18199,17618,18631,21319,20692,21201,0,1312,3000,0,1000,1000],
134 | [240000,1,1,2,28,-1,-1,-1,-1,-1,-1,326,326,326,5676,476,326,326,326,5676,476,326,526],
135 | [80000,1,2,2,26,2,0,0,0,0,0,14029,15493,16630,17055,17629,18186,2000,1700,1000,1000,1000,1000],
136 | [400000,1,2,1,34,-1,-1,-1,-1,-1,-1,19660,9666,11867,7839,14837,7959,9677,11867,7839,14837,7959,5712],
137 | [240000,2,2,2,38,0,0,0,0,-1,-1,50254,51445,53015,52479,1307,1203,2000,3000,3000,1307,1203,563],
138 | [50000,1,3,2,37,2,2,2,3,2,2,46004,45976,48953,48851,49318,51143,1000,4035,1000,1400,2800,0],
139 | [450000,1,1,1,40,1,-2,-2,-2,-2,-2,0,0,0,0,0,0,0,0,0,0,0,0],
140 | [110000,2,1,1,48,1,-2,-2,-2,-2,-2,0,0,0,0,0,0,0,0,0,0,0,0],
141 | [310000,2,2,1,35,2,0,0,0,0,0,304991,311243,306314,258610,246491,198889,13019,11128,8407,8599,6833,5987],
142 | [20000,1,1,2,27,0,0,0,0,0,0,19115,18962,19298,19378,19717,15630,1404,1130,600,861,313,0],
143 | [20000,1,2,2,23,1,-2,-2,-2,-2,-2,0,0,0,0,0,0,0,0,0,0,0,0],
144 | [200000,1,3,2,52,0,0,0,0,0,0,110151,99530,98951,100914,103146,104993,3568,3585,3602,3848,3669,3784],
145 | [180000,1,1,1,36,0,0,0,0,0,0,163736,116422,99278,95766,97753,95927,4655,2690,2067,2142,2217,1000],
146 | [50000,1,2,1,51,0,0,0,0,0,0,3347,3899,4503,5347,6375,7077,1000,1066,1300,1500,1200,134],
147 | [60000,1,3,1,55,3,2,2,0,0,0,60521,61450,57244,28853,29510,26547,2504,7,1200,1200,1100,1500],
148 | [30000,2,1,2,23,1,-2,-2,-2,-1,-1,4000,5645,3508,-27,13744,5906,5645,3508,27,13771,5911,3024],
149 | [240000,1,1,2,41,1,-1,-1,0,0,-1,95,2622,3301,3164,360,1737,2622,3301,0,360,1737,924],
150 | [420000,1,2,1,34,0,0,0,0,0,0,253454,247743,229049,220951,210606,188108,9744,9553,7603,7830,7253,11326],
151 | [330000,1,3,1,46,0,0,0,0,0,0,227389,228719,229644,227587,227775,228203,8210,8095,8025,8175,8391,8200],
152 | [30000,2,2,2,22,0,0,0,0,0,0,28452,26145,26712,25350,17603,-780,2000,1400,0,500,0,1560],
153 | [240000,1,2,1,34,0,0,0,2,2,2,10674,12035,13681,13269,14158,13891,1500,1800,0,1000,0,327],
154 | [150000,1,1,2,27,0,0,0,0,0,0,17444,19342,22000,24614,27200,30229,2500,3000,3000,3000,3500,5000],
155 | [210000,2,2,1,33,0,0,0,0,0,0,7166,7997,8792,9189,4404,5708,1500,1500,1000,500,2000,546],
156 | [50000,2,3,1,51,-1,-1,-1,-1,-2,-2,752,300,5880,0,0,0,300,5880,0,0,0,0],
157 | [50000,1,1,2,24,0,0,0,0,0,0,50801,50143,49586,19430,19375,18995,2360,1700,1000,900,870,2130],
158 | [240000,1,1,2,47,1,-2,-2,-2,-2,-2,0,0,0,0,0,0,0,0,0,0,0,0],
159 | [180000,1,2,2,28,-1,-1,-1,-1,-1,-1,1832,0,832,332,416,416,0,416,332,500,3500,832],
160 | [50000,1,2,2,23,1,2,2,2,0,0,10131,10833,20583,19996,19879,18065,1000,10000,400,700,800,600],
161 | [170000,1,2,2,29,-2,-2,-2,-2,-2,-2,12159,10000,10000,10000,9983,15846,10000,10000,10000,9983,15863,10000],
162 | [20000,1,1,2,29,-1,-1,-1,-1,0,-1,1199,15586,344,2340,6702,339,15586,344,2340,4702,339,330],
163 | [50000,1,1,2,28,0,0,0,0,0,3,4999,5913,7315,9195,10624,10138,1000,1500,2000,1583,1100,0],
164 | [170000,2,2,2,27,0,0,0,0,0,0,19269,20313,20852,17560,17918,9100,1661,1200,351,358,182,0],
165 | [200000,1,1,2,34,1,2,0,0,0,0,197236,176192,93069,135668,132233,59875,8000,5000,55500,5000,5000,8500],
166 | [80000,2,2,1,23,1,2,3,2,0,0,9168,10522,10205,9898,10123,12034,1650,0,0,379,2091,1],
167 | [260000,2,1,1,60,1,-2,-1,-1,-1,-1,-1100,-1100,21400,0,969,869,0,22500,0,969,1000,0],
168 | [140000,1,2,1,32,0,0,0,0,2,0,86627,78142,68336,64648,58319,55251,3455,3110,5000,0,2100,2602],
169 | [80000,1,1,2,25,0,0,0,0,0,0,42444,55744,43476,41087,41951,31826,30000,3000,6000,8000,2000,14000],
170 | [350000,1,1,2,41,1,-1,-1,-1,-1,-2,208,2906,1000,630,0,0,2906,1000,630,0,0,0],
171 | [280000,2,2,1,56,0,0,0,0,0,0,208775,182350,132257,101783,177145,169311,8042,6700,5137,100000,7000,6321],
172 | [30000,2,3,2,26,0,0,0,0,0,0,9014,10406,11427,11935,13084,14206,1700,1500,1000,1500,1500,1500],
173 | [140000,1,1,1,34,0,0,0,0,0,0,23944,28049,32073,43129,47086,48699,5000,5000,11885,5000,3000,5504],
174 | [200000,2,1,2,37,0,0,0,0,0,0,105420,102870,89643,90938,92505,94031,4000,3250,3250,3500,3560,5000],
175 | [200000,2,3,2,30,0,0,0,0,0,0,196031,196143,189524,167163,146975,122324,7300,7108,7680,6200,5000,4500],
176 | [210000,1,3,1,45,2,3,4,4,5,6,115785,122904,129847,137277,145533,154105,10478,10478,11078,11078,11678,10478],
177 | [50000,1,3,1,57,3,2,0,0,0,0,12854,12362,13447,13427,13711,14083,0,1600,500,500,600,600],
178 | [30000,1,1,2,41,2,2,2,2,2,0,24357,27453,26718,28168,27579,28321,3500,0,2200,0,1200,1250],
179 | [50000,1,2,2,27,2,-1,-1,-1,-1,2,390,390,780,216,1080,540,390,780,216,864,0,390],
180 | [290000,1,3,1,47,-1,-1,-1,-1,0,-1,1234,396,396,792,396,423,396,396,792,0,423,369],
181 | [250000,2,1,1,34,0,0,2,0,0,0,141223,156858,151841,152803,155997,160220,17994,0,5469,5656,6811,3920],
182 | [60000,2,2,1,46,0,0,0,0,0,0,21148,23803,24908,26034,26655,27756,3000,1500,1500,1000,1500,1500],
183 | [110000,2,1,2,27,0,0,0,0,0,0,101640,104795,104855,74737,76058,77254,5500,3900,3000,2900,3000,2800],
184 | [370000,1,1,2,50,-2,-2,-2,-2,-2,-2,6093,15130,8204,15398,4792,13453,15383,8204,15413,4792,13453,4699],
185 | [100000,1,2,1,27,-1,2,2,0,0,0,102349,96847,58824,29336,22979,-246,3166,0,1330,1398,12,50000],
186 | [90000,2,2,1,35,0,0,0,0,0,2,72112,73854,75526,77317,85852,88290,3500,3500,3652,10000,4000,0],
187 | [50000,2,2,2,22,0,0,0,0,0,0,28040,29092,29366,27737,28318,28806,1510,1442,982,1017,1277,567],
188 | [270000,1,2,2,37,0,0,0,0,0,0,37695,33397,30534,27598,26344,24641,5000,2000,3000,4000,3000,2000],
189 | [300000,2,1,2,30,-1,-1,-1,-1,-1,-1,688,3280,0,4340,2672,800,3288,0,4340,2672,800,746],
190 | [50000,2,2,2,22,-1,0,0,0,0,0,8567,15273,11650,7457,3115,7725,15000,1000,149,0,5000,10000],
191 | [50000,2,1,2,24,1,-2,-2,-2,-2,-2,-709,-709,-709,-2898,-3272,-3272,0,0,0,0,0,0],
192 | [360000,1,1,2,29,1,-2,-1,-1,-2,-2,0,0,77,0,0,0,0,77,0,0,0,0],
193 | [130000,1,3,1,56,1,2,2,2,2,3,64617,65978,67282,68557,72796,71345,3000,3000,3000,5500,0,0],
194 | [80000,1,1,2,30,-2,-1,0,0,0,0,6187,100,600,1438,1919,5380,504,500,1000,500,3500,0],
195 | [50000,1,2,2,30,1,2,0,0,0,2,48860,47801,48363,30221,22877,22361,0,1500,1000,2000,0,2000],
196 | [20000,2,2,2,22,0,0,0,0,0,0,16001,12622,13221,13130,14034,14906,1212,1201,500,1500,1500,1000],
197 | [80000,2,2,1,29,0,0,2,0,0,0,77883,81811,80250,61467,10662,11486,5800,1000,600,400,1000,0],
198 | [240000,1,1,2,37,-1,-1,2,0,0,-1,12212,26578,25331,26605,26279,1256,15000,0,2000,0,1256,65935],
199 | [80000,2,3,2,35,0,-1,0,0,0,0,49608,12412,14873,17364,17770,17460,12500,6500,3000,2000,3000,2000],
200 | [500000,2,1,1,47,0,0,0,0,0,0,56422,110616,110340,122967,108834,70064,70010,30357,30000,20000,52183,20000],
201 | [60000,2,2,1,24,0,0,0,0,0,0,58024,57891,48839,18971,19323,19395,2500,1600,3000,1000,737,2000],
202 | [20000,1,2,2,25,0,0,0,0,0,-1,10642,11677,13070,12280,1615,1620,1200,1593,601,135,1824,0],
203 | [100000,2,2,1,38,1,2,0,0,2,0,14483,13961,15323,16268,15868,16448,0,1600,1500,0,1000,1500],
204 | [360000,2,1,2,32,1,-1,-1,-1,-1,-1,2616,57077,5287,68445,13881,16240,57087,5295,68454,13889,16250,38313],
205 | [200000,2,3,2,47,2,2,2,2,2,2,199436,202947,193936,196186,200162,189915,8214,7000,6800,7134,0,6836],
206 | [130000,2,2,1,34,1,-1,0,0,0,0,0,5396,10270,13576,13864,14636,5396,5000,3500,501,1000,2000],
207 | [20000,2,2,2,31,1,5,4,4,3,2,21703,21087,21461,20835,20219,20487,0,1000,0,0,760,0],
208 | [310000,1,1,2,32,0,0,0,0,0,0,59901,62147,62102,65875,60387,43328,10020,6031,10057,5028,5060,4223],
209 | [60000,2,1,2,27,2,0,0,0,2,0,19625,20347,21669,23005,22499,22873,1342,1664,2000,0,900,846],
210 | [180000,2,1,2,29,-1,-1,-1,-2,-1,0,11386,199,0,0,17227,17042,199,0,0,17227,341,5114],
211 | [180000,2,1,2,24,-1,-1,2,0,0,-2,14670,22087,21282,10200,0,0,37867,0,200,0,0,0],
212 | [50000,1,2,1,36,0,0,0,0,-1,-1,47790,18114,18250,-14,72,658,2000,1000,2000,500,1000,20011],
213 | [50000,2,1,2,24,1,2,2,2,2,2,36166,37188,37680,38462,39228,40035,1900,1400,1700,1532,1600,0],
214 | [150000,2,2,1,34,-2,-2,-2,-2,-2,-2,0,0,0,116,0,1500,0,0,116,0,1500,0],
215 | [20000,2,1,2,22,0,0,0,0,-1,0,18553,19446,19065,8332,18868,19247,1500,1032,541,20000,693,1000],
216 | [500000,2,1,1,34,-2,-2,-2,-1,-1,-1,412,138,2299,1251,1206,1151,138,2299,1251,1206,1151,15816],
217 | [30000,2,3,2,22,1,2,2,0,0,0,29010,29256,28122,29836,1630,0,1000,85,1714,104,0,0],
218 | [180000,2,1,1,38,-2,-2,-2,-2,-2,-2,750,0,0,0,0,0,0,0,0,0,0,0]
219 | ]
220 | 
221 | y=[1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,
222 |    0,0,0,0,1,0,1,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,
223 |    0,0,0,1,0,1,0,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
224 |    0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0]
225 | 
226 | y2d=[1,1,0,2,0,2,0,0,2,0,0,0,0,1,0,0,1,0,0,0,0,1,1,1,0,2,1,0,0,0,0,1,0,0,2,0,2,0,1,0,0,0,2,0,0,1,1,1,0,0,1,0,0,2,0,0,
227 |    0,0,0,0,1,0,1,1,0,1,1,0,0,2,0,1,0,2,2,0,2,0,1,1,0,0,1,0,0,0,1,0,2,2,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,2,0,0,0,0,2,0,
228 |    0,0,0,1,0,1,0,0,1,1,0,1,0,2,0,1,1,0,0,2,0,0,0,0,0,0,1,0,1,0,2,0,0,0,1,0,2,0,0,2,0,0,2,0,0,0,1,0,0,0,0,0,0,2,0,0,
229 |    0,0,0,2,0,1,0,1,0,0,0,2,0,0,0,1,0,1,0,1,0,2,0,1,0,0,0,1,0,2,0,1,0]
230 | 
231 | 
232 | weight=[1. if d%2==0 else 2. for d in range (len(y))]
233 | 
234 | x_train=X[:100]
235 | y_train=y[:100]
236 | w_train=weight[:100]
237 | x_test=X[100:]
238 | y_test=y[100:]
239 | w_test=weight[100:]
240 | 
241 | def gini(y_true, y_pred, sample_weight=None):    
242 |     return (roc_auc_score(y_true, y_pred, sample_weight=sample_weight)*2 ) -1. 
243 | 
244 | 
245 | 
246 | def R(y_true ,y_pred , sample_weight=None):
247 |     
248 |          sx=0.0
249 |          sy=0.0
250 |          sx_2=0.0
251 |          sy_2=0.0
252 |          sxy=0.0
253 |          n=0.0
254 |          if sample_weight==None:
255 |              for i in range(0,len(y_pred)):
256 |                  sx=sx+y_pred[i]
257 |                  sy=sy+y_true[i]
258 |                  sx_2=sx_2+(y_pred[i]*y_pred[i])
259 |                  sy_2=sy_2+(y_true[i]*y_true[i]) 
260 |                  sxy=sxy+(y_pred[i]*y_true[i])
261 |                  n+=1.0
262 |          else :
263 |              for i in range(0,len(y_pred)):
264 |                  sx=sx+y_pred[i]*sample_weight[i]
265 |                  sy=sy+y_true[i]*sample_weight[i]
266 |                  sx_2=sx_2+(y_pred[i]*y_pred[i])*sample_weight[i]
267 |                  sy_2=sy_2+(y_true[i]*y_true[i])*sample_weight[i] 
268 |                  sxy=sxy+(y_pred[i]*y_true[i])*sample_weight[i]
269 |                  n+=sample_weight[i]
270 |          cor=(n*sxy - sx*sy)/(np.sqrt(np.abs(n*sx_2-(sx*sx))*np.abs(n*sy_2-(sy*sy))))
271 |          return cor
272 | 
273 | def test_pystacknet():
274 |     
275 |     Xn=np.array(x_train)
276 |     yn=np.array(y_train)
277 |     print (Xn.shape, yn.shape)
278 |     
279 |     
280 |     #####################################################################################
281 |     ###############################  CLASSIFICATION #####################################        
282 |     #####################################################################################
283 |     
284 |     
285 |     models=[ 
286 |             
287 |             [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
288 |              ExtraTreesClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
289 |              GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
290 |              LogisticRegression(random_state=1)
291 |              ],
292 |             
293 |             [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)]
294 |             
295 |             
296 |             ]
297 |     
298 |     ################## no proba metric ###############################
299 |     model=StackNetClassifier(models, metric="accuracy", folds=4, restacking=False,
300 |                              use_retraining=True, use_proba=True, random_state=12345,
301 |                              n_jobs=1, verbose=1)
302 |     
303 |     model.fit(x_train,y_train )
304 |     preds=model.predict_proba(x_test)[:,1]
305 |     print ("accuracy test 1 , auc %f " % (roc_auc_score(y_test,preds)))
306 |     
307 |     ##################  proba metric ###############################    
308 |     
309 |     model=StackNetClassifier(models, metric="auc", folds=4, restacking=False,
310 |                              use_retraining=True, use_proba=True, random_state=12345,
311 |                              n_jobs=1, verbose=1)
312 |     
313 |     model.fit(x_train,y_train )
314 |     preds=model.predict_proba(x_test)[:,1]
315 |     print ("auc test 2 , auc %f " % (roc_auc_score(y_test,preds)))   
316 |     
317 |     ##################  custom metric ###############################    
318 |     
319 |     model=StackNetClassifier(models, metric=gini, folds=4, restacking=False,
320 |                              use_retraining=True, use_proba=True, random_state=12345,
321 |                              n_jobs=1, verbose=1)
322 |     
323 |     model.fit(x_train,y_train )
324 |     preds=model.predict_proba(x_test)[:,1]
325 |     print ("custom metric gini test 3 , auc %f " % (gini(y_test,preds))) 
326 | 
327 |     ##################  numpy input ###############################    
328 |     
329 |     model=StackNetClassifier(models, metric="auc", folds=4, restacking=False,
330 |                              use_retraining=True, use_proba=True, random_state=12345,
331 |                              n_jobs=1, verbose=1)
332 |     
333 |     model.fit(Xn,yn )
334 |     preds=model.predict_proba(np.array(x_test))[:,1]
335 |     print ("numpy auc test 4 , auc %f " % (roc_auc_score(y_test,preds)))         
336 |     
337 |     ##################  csr_matrix input ############################### 
338 |     
339 |     model=StackNetClassifier(models, metric="auc", folds=4, restacking=False,
340 |                              use_retraining=True, use_proba=True, random_state=12345,
341 |                              n_jobs=1, verbose=1)
342 |     
343 |     model.fit(csr_matrix( Xn) ,yn )
344 |     preds=model.predict_proba(csr_matrix(x_test))[:,1]
345 |     print ("csr auc test 5 , auc %f " % (roc_auc_score(y_test,preds)))  
346 | 
347 |     ##################  restacking ############################### 
348 |     
349 |     model=StackNetClassifier(models, metric="auc", folds=4, restacking=True,
350 |                              use_retraining=True, use_proba=True, random_state=12345,
351 |                              n_jobs=1, verbose=1)
352 |     
353 |     model.fit(csr_matrix( Xn) ,yn )
354 |     preds=model.predict_proba(csr_matrix(x_test))[:,1]
355 |     print ("restacking auc test 6 , auc %f " % (roc_auc_score(y_test,preds))) 
356 |     
357 |     ##################  without retraining ############################### 
358 |     
359 |     model=StackNetClassifier(models, metric="auc", folds=4, restacking=True,
360 |                              use_retraining=False, use_proba=True, random_state=12345,
361 |                              n_jobs=1, verbose=1)
362 |     
363 |     model.fit(csr_matrix( Xn) ,yn )
364 |     preds=model.predict_proba(csr_matrix(x_test))[:,1]
365 |     print ("no retraining auc test 7 , auc %f " % (roc_auc_score(y_test,preds)))     
366 |     
367 |     ##################  custom k folder object ###############################     
368 |     
369 |     
370 |     k=StratifiedKFold(yn, n_folds=4, shuffle=True, random_state=1251)
371 |     
372 |     model=StackNetClassifier(models, metric="auc", folds=k, restacking=True,
373 |                              use_retraining=False, use_proba=True, random_state=12345,
374 |                              n_jobs=1, verbose=1)
375 |     
376 |     model.fit(csr_matrix( Xn) ,yn )
377 |     preds=model.predict_proba(csr_matrix(x_test))[:,1]
378 |     print ("custom kfold auc test 8 , auc %f " % (roc_auc_score(y_test,preds)))   
379 | 
380 | 
381 | 
382 |     ##################  regressor in base level ###############################    
383 |     models_reg=[ 
384 |             
385 |             [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
386 |              ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
387 |              GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
388 |              LogisticRegression(random_state=1)
389 |              ],
390 |             
391 |             [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)]
392 |             
393 |             
394 |             ]
395 |             
396 |     model=StackNetClassifier(models_reg, metric="auc", folds=4, restacking=False,
397 |                              use_retraining=True, use_proba=True, random_state=12345,
398 |                              n_jobs=1, verbose=1)
399 |     
400 |     model.fit(x_train,y_train )
401 |     preds=model.predict_proba(x_test)[:,1]
402 |     print ("with regressor test 9 , auc %f " % (roc_auc_score(y_test,preds)))      
403 |     
404 | 
405 |     ##################  transformer in base level ###############################    
406 |     models_pca=[ 
407 |             
408 |             [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
409 |              ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
410 |              GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
411 |              LogisticRegression(random_state=1),
412 |              PCA(n_components=4,random_state=1)
413 |              ],
414 |             
415 |             [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)]
416 |             
417 |             
418 |             ]
419 |             
420 |     model=StackNetClassifier(models_pca, metric="auc", folds=4, restacking=False,
421 |                              use_retraining=True, use_proba=True, random_state=12345,
422 |                              n_jobs=1, verbose=1)
423 |     
424 |     model.fit(x_train,y_train )
425 |     preds=model.predict_proba(x_test)[:,1]
426 |     print ("with PCA test 10 , auc %f " % (roc_auc_score(y_test,preds)))  
427 |     
428 |     
429 |     ##################  multiclass metric ###############################    
430 |     
431 |     model=StackNetClassifier(models, metric="logloss", folds=4, restacking=False,
432 |                              use_retraining=True, use_proba=True, random_state=12345,
433 |                              n_jobs=1, verbose=1)
434 |     
435 |     model.fit(x_train,y2d[:100] )
436 |     preds=model.predict_proba(x_test)
437 |     print ("logloss test 11 , auc %f " % (log_loss(y2d[100:],preds)))    
438 |     
439 |     
440 |     
441 |     ##################  3 levels  ###############################    
442 |     
443 |     models3=[ 
444 |             
445 |             [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
446 |              ExtraTreesClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
447 |              GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
448 |              LogisticRegression(random_state=1)
449 |              ],
450 |             
451 |             [GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
452 |              LogisticRegression(random_state=1)
453 |              ],
454 |             
455 |             [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)]
456 |             
457 |             
458 |             ]    
459 |     
460 |     
461 |     model=StackNetClassifier(models3, metric="logloss", folds=4, restacking=False,
462 |                              use_retraining=True, use_proba=True, random_state=12345,
463 |                              n_jobs=1, verbose=1)
464 |     
465 |     model.fit(x_train,y2d[:100] )
466 |     preds=model.predict_proba(x_test)
467 |     print ("3 levels test 12 , auc %f " % (log_loss(y2d[100:],preds)))   
468 | 
469 | 
470 |     ################## with sample_weight ###############################    
471 |     
472 |     model=StackNetClassifier(models, metric="auc", folds=4, restacking=False,
473 |                              use_retraining=True, use_proba=True, random_state=12345,
474 |                              n_jobs=1, verbose=1)
475 |     
476 |     model.fit(x_train,y_train , sample_weight=w_train)
477 |     preds=model.predict_proba(x_test)[:,1]
478 |     print ("auc weighted test 13 , auc %f " % (roc_auc_score(y_test,preds, sample_weight=w_test)))  
479 |     
480 | 
481 |     #####################################################################################
482 |     ###############################  REGRESSION #########################################        
483 |     #####################################################################################
484 |     
485 |     
486 |     
487 |     models=[ 
488 |             
489 |             [RandomForestRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
490 |              ExtraTreesRegressor (n_estimators=100,  max_depth=5, max_features=0.5, random_state=1),
491 |              GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
492 |              Ridge(random_state=1)
493 |              ],
494 |             
495 |             [RandomForestRegressor (n_estimators=200, max_depth=5, max_features=0.5, random_state=1)]
496 |             
497 |             
498 |             ]
499 |     
500 |     ################## rmse  metric ###############################
501 |     model=StackNetRegressor(models, metric="rmse", folds=4, restacking=False,
502 |                              use_retraining=True, random_state=12345,
503 |                              n_jobs=1, verbose=1)
504 |     
505 |     model.fit(x_train,y_train )
506 |     preds=model.predict(x_test)
507 |     print ("rmse test 1 , %f " % (rmse(y_test,preds)))
508 |     
509 |     ##################  mae metric ###############################    
510 |     
511 |     model=StackNetRegressor(models, metric="mae", folds=4, restacking=False,
512 |                              use_retraining=True, random_state=12345,
513 |                              n_jobs=1, verbose=1)
514 |     
515 |     model.fit(x_train,y_train )
516 |     preds=model.predict(x_test)
517 |     print ("mae test 2 , %f " % (mae(y_test,preds)))   
518 |     
519 |     ##################  custom metric ###############################    
520 |     
521 |     model=StackNetRegressor(models, metric=R, folds=4, restacking=False,
522 |                              use_retraining=True, random_state=12345,
523 |                              n_jobs=1, verbose=1)
524 |     
525 |     model.fit(x_train,y_train )
526 |     preds=model.predict(x_test)
527 |     print ("custom metric R test 3  %f " % (R(y_test,preds))) 
528 | 
529 |     ##################  numpy input ###############################    
530 |     
531 |     model=StackNetRegressor(models, metric="rmse", folds=4, restacking=False,
532 |                              use_retraining=True, random_state=12345,
533 |                              n_jobs=1, verbose=1)
534 |     
535 |     model.fit(Xn,yn )
536 |     preds=model.predict(x_test)
537 |     print ("numpy rmse test 4  %f " % (rmse(y_test,preds)))         
538 |     
539 |     ##################  csr_matrix input ############################### 
540 |     
541 |     model=StackNetRegressor(models, metric="rmse", folds=4, restacking=False,
542 |                              use_retraining=True, random_state=12345,
543 |                              n_jobs=1, verbose=1)
544 |     
545 |     model.fit(csr_matrix( Xn) ,yn )
546 |     preds=model.predict(x_test)
547 |     print ("csr test 5 , rmse %f " % (rmse(y_test,preds)))  
548 | 
549 |     ##################  restacking ############################### 
550 |     
551 |     model=StackNetRegressor(models, metric="rmse", folds=4, restacking=True,
552 |                              use_retraining=True,  random_state=12345,
553 |                              n_jobs=1, verbose=1)
554 |     
555 |     model.fit(csr_matrix( Xn) ,yn )
556 |     preds=model.predict(x_test)
557 |     print ("restacking rmse test 6 , rmse %f " % (rmse(y_test,preds))) 
558 |     
559 |     ##################  without retraining ############################### 
560 |     
561 |     model=StackNetRegressor(models, metric="rmse", folds=4, restacking=True,
562 |                              use_retraining=False,  random_state=12345,
563 |                              n_jobs=1, verbose=1)
564 |     
565 |     model.fit(csr_matrix( Xn) ,yn )
566 |     preds=model.predict(x_test)
567 |     print ("no retraining rmse test 7, rmse %f " % (rmse(y_test,preds)))     
568 |     
569 |     ##################  custom k folder object ###############################     
570 |     
571 |     
572 |     k=StratifiedKFold(yn, n_folds=4, shuffle=True, random_state=1251)
573 |     
574 |     model=StackNetRegressor(models, metric="rmse", folds=k, restacking=True,
575 |                              use_retraining=False,random_state=12345,
576 |                              n_jobs=1, verbose=1)
577 |     
578 |     model.fit(csr_matrix( Xn) ,yn )
579 |     preds=model.predict(x_test)
580 |     print ("custom kfold rmse test 8, %f " % (rmse(y_test,preds)))   
581 | 
582 | 
583 | 
584 |     ##################  classifier in base level ###############################    
585 |     models_class=[ 
586 |             
587 |             [RandomForestRegressor(n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
588 |              ExtraTreesClassifier (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
589 |              GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
590 |              Ridge(random_state=1)
591 |              ],
592 |             
593 |             [RandomForestRegressor (n_estimators=200, max_depth=5, max_features=0.5, random_state=1)]
594 |             
595 |             
596 |             ]
597 |             
598 |     model=StackNetRegressor(models_class, metric="rmse", folds=4, restacking=False,
599 |                              use_retraining=True, random_state=12345,
600 |                              n_jobs=1, verbose=1)
601 |     
602 |     model.fit(x_train,y_train )
603 |     preds=model.predict(x_test)
604 |     print ("with regressor test 9, rmse %f " % (rmse(y_test,preds)))      
605 |     
606 | 
607 |     ##################  transformer in base level ###############################    
608 |     models_pca=[ 
609 |             
610 |             [RandomForestRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
611 |              ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
612 |              GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
613 |              Ridge(random_state=1),
614 |              PCA(n_components=4,random_state=1)
615 |              ],
616 |             
617 |             [RandomForestRegressor(n_estimators=200, max_depth=5, max_features=0.5, random_state=1)]
618 |             
619 |             
620 |             ]
621 |             
622 |     model=StackNetRegressor(models_pca, metric="rmse", folds=4, restacking=False,
623 |                              use_retraining=True,  random_state=12345,
624 |                              n_jobs=1, verbose=1)
625 |     
626 |     model.fit(x_train,y_train )
627 |     preds=model.predict(x_test)
628 |     print ("with PCA test 10 , rmse %f " % (rmse(y_test,preds)))  
629 |     
630 |     
631 |     ##################  2d target ###############################    
632 |     models2=[ 
633 |             
634 |             [RandomForestRegressor(n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
635 |              ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
636 |              #GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
637 |              Ridge(random_state=1)
638 |              ],
639 |             
640 |             
641 |             [RandomForestRegressor(n_estimators=200, max_depth=5, max_features=0.5, random_state=1)]
642 |             
643 |             
644 |             ]    
645 |     
646 |     
647 |     model=StackNetRegressor(models2, metric="rmse", folds=4, restacking=False,
648 |                              use_retraining=True, random_state=12345,
649 |                              n_jobs=1, verbose=1)
650 |     
651 |     model.fit(x_train,np.column_stack((y_train,y2d[:100] )))
652 |     preds=model.predict(x_test)
653 |     print ("rmse test 11 , rmse %f " % (rmse(np.column_stack((y_test,y2d[100:])),preds)))    
654 |     
655 |     
656 |     
657 |     ##################  3 levels  ###############################    
658 |     
659 |     models3=[ 
660 |             
661 |             [RandomForestRegressor(n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
662 |              ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
663 |              #GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
664 |              Ridge(random_state=1)
665 |              ],
666 |             
667 |             [ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
668 |              Ridge(random_state=1)
669 |              ],
670 |             
671 |             [RandomForestRegressor(n_estimators=200, max_depth=5, max_features=0.5, random_state=1)]
672 |             
673 |             
674 |             ]    
675 |     
676 |     
677 |     model=StackNetRegressor(models3, metric="rmse", folds=4, restacking=False,
678 |                              use_retraining=True, random_state=12345,
679 |                              n_jobs=1, verbose=1)
680 |     
681 |     model.fit(x_train,y2d[:100] )
682 |     preds=model.predict(x_test)
683 |     print ("3 levels test 12 , rmse %f " % (rmse(y2d[100:],preds)))   
684 |     
685 |     
686 |     ################## with sample)weight ###############################
687 |     model=StackNetRegressor(models, metric="rmse", folds=4, restacking=False,
688 |                              use_retraining=True, random_state=12345,
689 |                              n_jobs=1, verbose=1)
690 |     
691 |     model.fit(x_train,y_train,sample_weight=w_train )
692 |     preds=model.predict(x_test)
693 |     print ("rmse weighted test 13 , %f " % (rmse(y_test,preds, sample_weight=w_test)))    
694 |     
695 |     
696 | if __name__ == '__main__':
697 |     test_pystacknet()


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | try:
 4 |     from pypandoc import convert
 5 |     read_md = lambda f: convert(f, 'rst')
 6 | except ImportError:
 7 |     print("warning: pypandoc module not found, could not convert Markdown to RST")
 8 |     read_md = lambda f: open(f, 'r').read()
 9 | 
10 | setup(
11 |     name='pystacknet',
12 |     version='0.0.1',
13 | 
14 |     author='Marios Michailidis',
15 |     author_email='kazanovassoftware@gmail.com',
16 | 
17 |     packages=['pystacknet',
18 |               'pystacknet.test'],
19 |               
20 |     url='https://github.com/h2oai/pystacknet',
21 |     license='LICENSE.txt',
22 | 
23 |     description='StackNet framework for python',
24 |     long_description=read_md('README.md'),
25 | 
26 |     install_requires=[
27 |         'numpy >= 1.14.0',
28 |         'scipy >= 1.1.0',
29 |         'scikit-learn >= 0.19.1'
30 |     ],
31 | 
32 |     classifiers=[
33 | 
34 | 
35 |         'Programming Language :: Python :: 3',
36 |         'Programming Language :: Python :: 3.2',
37 |         'Programming Language :: Python :: 3.3',
38 |         'Programming Language :: Python :: 3.4',
39 |         'Programming Language :: Python :: 3.5',  
40 |         'Programming Language :: Python :: 3.6'         
41 |     ],
42 | )


--------------------------------------------------------------------------------