├── README.md ├── pylrpredictor ├── __init__.py ├── curvefunctions.py ├── curvemodels.py ├── ensemblecurvemodel.py ├── mcmcmodelplotter.py ├── modelfactory.py ├── terminationcriterion.py └── tests │ ├── __init__.py │ ├── test_curvemodels.py │ └── test_terminationcriterion.py └── setup.py /README.md: -------------------------------------------------------------------------------- 1 | # pylearningcurvepredictor 2 | predicting learning curves in python 3 | 4 | 5 | 6 | ## Paper 7 | Tobias Domhan, Jost Tobias Springenberg, Frank Hutter. Speeding up Automatic Hyperparameter Optimization of Deep Neural Networks by Extrapolation of Learning Curves. IJCAI, 2015. 8 | 9 | ## Installation 10 | 11 | ## Usage 12 | ### Standalone 13 | 14 | ### caffe 15 | 16 | 17 | ## License 18 | TBD 19 | -------------------------------------------------------------------------------- /pylrpredictor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tdomhan/pylearningcurvepredictor/44348022037e27939f8dcc327f6252c09dd741c2/pylrpredictor/__init__.py -------------------------------------------------------------------------------- /pylrpredictor/curvefunctions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | #all the models that we considered at some point 4 | all_models = {} 5 | model_defaults = {} 6 | display_name_mapping = {} 7 | 8 | 9 | def pow3(x, c, a, alpha): 10 | return c - a * x**(-alpha) 11 | all_models["pow3"] = pow3 12 | model_defaults["pow3"] = {"c": 0.84, "a": 0.52, "alpha": 0.01} 13 | display_name_mapping["pow3"] = "pow$_3$" 14 | 15 | def linear(x, a, b): 16 | return a*x + b 17 | #models["linear"] = linear 18 | all_models["linear"] = linear 19 | 20 | """ 21 | Source: curve expert 22 | """ 23 | def log_power(x, a, b, c): 24 | #logistic power 25 | return a/(1.+(x/np.exp(b))**c) 26 | all_models["log_power"] = log_power 27 | model_defaults["log_power"] = {"a": 0.77, "c": -0.51, "b": 2.98} 28 | display_name_mapping["log_power"] = "log power" 29 | 30 | def weibull(x, alpha, beta, kappa, delta): 31 | """ 32 | Weibull modell 33 | 34 | http://www.pisces-conservation.com/growthhelp/index.html?morgan_mercer_floden.htm 35 | 36 | alpha: upper asymptote 37 | beta: lower asymptote 38 | k: growth rate 39 | delta: controls the x-ordinate for the point of inflection 40 | """ 41 | return alpha - (alpha - beta) * np.exp(-(kappa * x)**delta) 42 | all_models["weibull"] = weibull 43 | model_defaults["weibull"] = {"alpha": .7, "beta": 0.1, "kappa": 0.01, "delta": 1} 44 | display_name_mapping["weibull"] = "Weibull" 45 | 46 | 47 | def mmf(x, alpha, beta, kappa, delta): 48 | """ 49 | Morgan-Mercer-Flodin 50 | 51 | description: 52 | Nonlinear Regression page 342 53 | http://bit.ly/1jodG17 54 | http://www.pisces-conservation.com/growthhelp/index.html?morgan_mercer_floden.htm 55 | 56 | alpha: upper asymptote 57 | kappa: growth rate 58 | beta: initial value 59 | delta: controls the point of inflection 60 | """ 61 | return alpha - (alpha - beta) / (1. + (kappa * x)**delta) 62 | all_models["mmf"] = mmf 63 | model_defaults["mmf"] = {"alpha": .7, "kappa": 0.01, "beta": 0.1, "delta": 5} 64 | display_name_mapping["mmf"] = "MMF" 65 | 66 | def janoschek(x, a, beta, k, delta): 67 | """ 68 | http://www.pisces-conservation.com/growthhelp/janoschek.htm 69 | """ 70 | return a - (a - beta) * np.exp(-k*x**delta) 71 | all_models["janoschek"] = janoschek 72 | model_defaults["janoschek"] = {"a": 0.73, "beta": 0.07, "k": 0.355, "delta": 0.46} 73 | display_name_mapping["janoschek"] = "Janoschek" 74 | 75 | def ilog2(x, c, a): 76 | x = 1 + x 77 | assert(np.all(x>1)) 78 | return c - a / np.log(x) 79 | all_models["ilog2"] = ilog2 80 | model_defaults["ilog2"] = {"a": 0.43, "c": 0.78} 81 | display_name_mapping["ilog2"] = "ilog$_2$" 82 | 83 | 84 | def dr_hill_zero_background(x, theta, eta, kappa): 85 | return (theta* x**eta) / (kappa**eta + x**eta) 86 | all_models["dr_hill_zero_background"] = dr_hill_zero_background 87 | model_defaults["dr_hill_zero_background"] = {"theta": 0.772320, "eta": 0.586449, "kappa": 2.460843} 88 | display_name_mapping["dr_hill_zero_background"] = "Hill$_3$" 89 | 90 | 91 | def logx_linear(x, a, b): 92 | x = np.log(x) 93 | return a*x + b 94 | all_models["logx_linear"] = logx_linear 95 | model_defaults["logx_linear"] = {"a": 0.378106, "b": 0.046506} 96 | display_name_mapping["logx_linear"] = "log x linear" 97 | 98 | 99 | def vap(x, a, b, c): 100 | """ Vapor pressure model """ 101 | return np.exp(a+b/x+c*np.log(x)) 102 | all_models["vap"] = vap 103 | model_defaults["vap"] = {"a": -0.622028, "c": 0.042322, "b": -0.470050} 104 | display_name_mapping["vap"] = "vapor pressure" 105 | 106 | 107 | 108 | def loglog_linear(x, a, b): 109 | x = np.log(x) 110 | return np.log(a*x + b) 111 | all_models["loglog_linear"] = loglog_linear 112 | display_name_mapping["loglog_linear"] = "log log linear" 113 | 114 | 115 | #Models that we chose not to use in the ensembles/model combinations: 116 | 117 | #source: http://aclweb.org/anthology//P/P12/P12-1003.pdf 118 | def exp3(x, c, a, b): 119 | return c - np.exp(-a*x+b) 120 | all_models["exp3"] = exp3 121 | model_defaults["exp3"] = {"c": 0.7, "a": 0.01, "b": -1} 122 | display_name_mapping["exp3"] = "exp$_3$" 123 | 124 | 125 | def exp4(x, c, a, b, alpha): 126 | return c - np.exp(-a*(x**alpha)+b) 127 | all_models["exp4"] = exp4 128 | model_defaults["exp4"] = {"c": 0.7, "a": 0.8, "b":-0.8, "alpha": 0.3} 129 | display_name_mapping["exp4"] = "exp$_4$" 130 | 131 | 132 | #not bounded! 133 | #def logy_linear(x, a, b): 134 | # return np.log(a*x + b) 135 | #all_models["logy_linear"] = logy_linear 136 | 137 | def pow2(x, a, alpha): 138 | return a * x**(-alpha) 139 | all_models["pow2"] = pow2 140 | model_defaults["pow2"] = {"a": 0.1, "alpha": -0.3} 141 | display_name_mapping["pow2"] = "pow$_2$" 142 | 143 | def pow4(x, c, a, b, alpha): 144 | return c - (a*x+b)**-alpha 145 | all_models["pow4"] = pow4 146 | model_defaults["pow4"] = {"alpha": 0.1, "a": 200, "b": 0., "c": 0.8} 147 | display_name_mapping["pow4"] = "pow$_4$" 148 | 149 | 150 | 151 | def sat_growth(x, a, b): 152 | return a * x / (b + x) 153 | all_models["sat_growth"] = sat_growth 154 | model_defaults["sat_growth"] = {"a": 0.7, "b": 20} 155 | display_name_mapping["sat_growth"] = "saturated growth rate" 156 | 157 | 158 | def dr_hill(x, alpha, theta, eta, kappa): 159 | return alpha + (theta*(x**eta)) / (kappa**eta + x**eta) 160 | all_models["dr_hill"] = dr_hill 161 | model_defaults["dr_hill"] = {"alpha": 0.1, "theta": 0.772320, "eta": 0.586449, "kappa": 2.460843} 162 | display_name_mapping["dr_hill"] = "Hill$_4$" 163 | 164 | 165 | 166 | def gompertz(x, a, b, c): 167 | """ 168 | Gompertz growth function. 169 | 170 | sigmoidal family 171 | a is the upper asymptote, since 172 | b, c are negative numbers 173 | b sets the displacement along the x axis (translates the graph to the left or right) 174 | c sets the growth rate (y scaling) 175 | 176 | e.g. used to model the growth of tumors 177 | 178 | http://en.wikipedia.org/wiki/Gompertz_function 179 | """ 180 | return a*np.exp(-b*np.exp(-c*x)) 181 | #return a + b * np.exp(np.exp(-k*(x-i))) 182 | all_models["gompertz"] = gompertz 183 | model_defaults["gompertz"] = {"a": 0.8, "b": 1000, "c": 0.05} 184 | display_name_mapping["gompertz"] = "Gompertz" 185 | 186 | 187 | def logistic_curve(x, a, k, b): 188 | """ 189 | a: asymptote 190 | k: 191 | b: inflection point 192 | http://www.pisces-conservation.com/growthhelp/logistic_curve.htm 193 | """ 194 | return a / (1. + np.exp(-k*(x-b))) 195 | all_models["logistic_curve"] = logistic_curve 196 | model_defaults["logistic_curve"] = {"a": 0.8, "k": 0.01, "b": 1.} 197 | display_name_mapping["logistic_curve"] = "logistic curve" 198 | 199 | 200 | 201 | def bertalanffy(x, a, k): 202 | """ 203 | a: asymptote 204 | k: growth rate 205 | http://www.pisces-conservation.com/growthhelp/von_bertalanffy.htm 206 | """ 207 | return a * (1. - np.exp(-k*x)) 208 | all_models["bertalanffy"] = bertalanffy 209 | model_defaults["bertalanffy"] = {"a": 0.8, "k": 0.01} 210 | display_name_mapping["bertalanffy"] = "Bertalanffy" 211 | 212 | 213 | 214 | curve_combination_models_old = ["vap", "ilog2", "weibull", "pow3", "pow4", "loglog_linear", 215 | "mmf", "janoschek", "dr_hill_zero_background", "log_power", 216 | "exp4"] 217 | 218 | 219 | curve_combination_models = ["weibull", "pow4", "mmf", "pow3", "loglog_linear", 220 | "janoschek", "dr_hill_zero_background", "log_power", "exp4"] 221 | 222 | curve_ensemble_models = ["vap", "ilog2", "weibull", "pow3", "pow4", "loglog_linear", 223 | "mmf", "janoschek", "dr_hill_zero_background", "log_power", 224 | "exp4"] 225 | -------------------------------------------------------------------------------- /pylrpredictor/curvemodels.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import emcee 3 | import inspect 4 | import traceback 5 | from scipy.stats import norm, kde 6 | from scipy.optimize import curve_fit, leastsq, fmin_bfgs, fmin_l_bfgs_b, nnls 7 | import lmfit 8 | import logging 9 | from scipy.misc import logsumexp 10 | from curvefunctions import all_models 11 | 12 | def recency_weights(num): 13 | if num == 1: 14 | return np.ones(1) 15 | else: 16 | recency_weights = [10**(1./num)] * num 17 | recency_weights = recency_weights**(np.arange(0, num)) 18 | return recency_weights 19 | 20 | def masked_mean_x_greater_than(posterior_distribution, y): 21 | """ 22 | P(E[f(x)] > E[y] | Data) 23 | """ 24 | predictions = np.ma.masked_invalid(posterior_distribution) 25 | return np.sum(predictions > y) / float(np.sum(~predictions.mask)) 26 | 27 | 28 | class CurveModel(object): 29 | 30 | def __init__(self, 31 | function, 32 | function_der=None, 33 | min_vals={}, 34 | max_vals={}, 35 | default_vals={}): 36 | """ 37 | function: the function to be fit 38 | function_der: derivative of that function 39 | """ 40 | self.function = function 41 | if function_der != None: 42 | raise NotImplementedError("function derivate is not implemented yet...sorry!") 43 | self.function_der = function_der 44 | assert isinstance(min_vals, dict) 45 | self.min_vals = min_vals.copy() 46 | assert isinstance(max_vals, dict) 47 | self.max_vals = max_vals.copy() 48 | function_args = inspect.getargspec(function).args 49 | assert "x" in function_args, "The function needs 'x' as a parameter." 50 | for default_param_name in default_vals.keys(): 51 | if default_param_name == "sigma": 52 | continue 53 | msg = "function %s doesn't take default param %s" % (function.__name__, default_param_name) 54 | assert default_param_name in function_args, msg 55 | self.function_params = [param for param in function_args if param != 'x'] 56 | #set default values: 57 | self.default_vals = default_vals.copy() 58 | for param_name in self.function_params: 59 | if param_name not in default_vals: 60 | print "setting function parameter %s to default of 1.0 for function %s" % (param_name, 61 | self.function.__name__) 62 | self.default_vals[param_name] = 1.0 63 | self.all_param_names = [param for param in self.function_params] 64 | self.all_param_names.append("sigma") 65 | self.name = self.function.__name__ 66 | self.ndim = len(self.all_param_names) 67 | 68 | #uniform noise prior over interval: 69 | if "sigma" not in self.min_vals: 70 | self.min_vals["sigma"] = 0. 71 | if "sigma" not in self.max_vals: 72 | self.max_vals["sigma"] = 1.0 73 | if "sigma" not in self.default_vals: 74 | self.default_vals["sigma"] = 0.05 75 | 76 | def default_function_param_array(self): 77 | return np.asarray([self.default_vals[param_name] for param_name in self.function_params]) 78 | 79 | def are_params_in_bounds(self, theta): 80 | """ 81 | Are the parameters in their respective bounds? 82 | """ 83 | in_bounds = True 84 | 85 | for param_name, param_value in zip(self.all_param_names, theta): 86 | if param_name in self.min_vals: 87 | if param_value < self.min_vals[param_name]: 88 | in_bounds = False 89 | if param_name in self.max_vals: 90 | if param_value > self.max_vals[param_name]: 91 | in_bounds = False 92 | return in_bounds 93 | 94 | def split_theta(self, theta): 95 | """Split theta into the function parameters (dict) and sigma. """ 96 | params = {} 97 | sigma = None 98 | for param_name, param_value in zip(self.all_param_names, theta): 99 | if param_name in self.function_params: 100 | params[param_name] = param_value 101 | elif param_name == "sigma": 102 | sigma = param_value 103 | return params, sigma 104 | 105 | def split_theta_to_array(self, theta): 106 | """Split theta into the function parameters (array) and sigma. """ 107 | params = theta[:-1] 108 | sigma = theta[-1] 109 | return params, sigma 110 | 111 | def fit(self, x, y): 112 | raise NotImplementedError() 113 | 114 | def predict(self, x): 115 | raise NotImplementedError() 116 | 117 | def predict_given_theta(self, x, theta): 118 | """ 119 | Make predictions given a single theta 120 | """ 121 | params, sigma = self.split_theta(theta) 122 | predictive_mu = self.function(x, **params) 123 | return predictive_mu, sigma 124 | 125 | def likelihood(self, x, y): 126 | """ 127 | for each y_i in y: 128 | p(y_i|x, model) 129 | """ 130 | params, sigma = self.split_theta(self.ml_params) 131 | return norm.pdf(y-self.function(x, **params), loc=0, scale=sigma) 132 | 133 | 134 | class MLCurveModel(CurveModel): 135 | """ 136 | ML fit of a curve. 137 | """ 138 | 139 | def __init__(self, recency_weighting=True, **kwargs): 140 | super(MLCurveModel, self).__init__(**kwargs) 141 | 142 | #Maximum Likelihood values of the parameters 143 | self.ml_params = None 144 | self.recency_weighting = recency_weighting 145 | 146 | def fit(self, x, y, weights=None, start_from_default=True): 147 | """ 148 | weights: None or weight for each sample. 149 | """ 150 | return self.fit_ml(x, y, weights, start_from_default) 151 | 152 | def predict(self, x): 153 | #assert len(x.shape) == 1 154 | params, sigma = self.split_theta_to_array(self.ml_params) 155 | return self.function(x, *params) 156 | #return np.asarray([self.function(x_pred, **params) for x_pred in x]) 157 | 158 | def fit_ml(self, x, y, weights, start_from_default): 159 | """ 160 | non-linear least-squares fit of the data. 161 | 162 | First tries Levenberg-Marquardt and falls back 163 | to BFGS in case that fails. 164 | 165 | Start from default values or from previous ml_params? 166 | """ 167 | successful = self.fit_leastsq(x, y, weights, start_from_default) 168 | if not successful: 169 | successful = self.fit_bfgs(x, y, weights, start_from_default) 170 | if not successful: 171 | return False 172 | return successful 173 | 174 | def ml_sigma(self, x, y, popt, weights): 175 | """ 176 | Given the ML parameters (popt) get the ML estimate of sigma. 177 | """ 178 | if weights is None: 179 | if self.recency_weighting: 180 | variance = np.average((y-self.function(x, *popt))**2, 181 | weights=recency_weights(len(y))) 182 | sigma = np.sqrt(variance) 183 | else: 184 | sigma = (y-self.function(x, *popt)).std() 185 | else: 186 | if self.recency_weighting: 187 | variance = np.average((y-self.function(x, *popt))**2, 188 | weights=recency_weights(len(y)) * weights) 189 | sigma = np.sqrt(variance) 190 | else: 191 | variance = np.average((y-self.function(x, *popt))**2, 192 | weights=weights) 193 | sigma = np.sqrt(variance) 194 | return sigma 195 | 196 | def fit_leastsq(self, x, y, weights, start_from_default): 197 | try: 198 | if weights is None: 199 | if self.recency_weighting: 200 | residuals = lambda p: np.sqrt(recency_weights(len(y))) * (self.function(x, *p) - y) 201 | else: 202 | residuals = lambda p: self.function(x, *p) - y 203 | else: 204 | #the return value of this function will be squared, hence 205 | #we need to take the sqrt of the weights here 206 | if self.recency_weighting: 207 | residuals = lambda p: np.sqrt(recency_weights(len(y))*weights) * (self.function(x, *p) - y) 208 | else: 209 | residuals = lambda p: np.sqrt(weights) * (self.function(x, *p) - y) 210 | 211 | 212 | if start_from_default: 213 | initial_params = self.default_function_param_array() 214 | else: 215 | initial_params, _ = self.split_theta_to_array(self.ml_params) 216 | 217 | popt, cov_popt, info, msg, status = leastsq(residuals, 218 | x0=initial_params, 219 | full_output=True) 220 | #Dfun=, 221 | #col_deriv=True) 222 | 223 | if np.any(np.isnan(info["fjac"])): 224 | return False 225 | 226 | leastsq_success_statuses = [1,2,3,4] 227 | if status in leastsq_success_statuses: 228 | if any(np.isnan(popt)): 229 | return False 230 | #within bounds? 231 | if not self.are_params_in_bounds(popt): 232 | return False 233 | 234 | sigma = self.ml_sigma(x, y, popt, weights) 235 | self.ml_params = np.append(popt, [sigma]) 236 | 237 | logging.info("leastsq successful for model %s" % self.function.__name__) 238 | 239 | return True 240 | else: 241 | logging.warn("leastsq NOT successful for model %s, msg: %s" % (self.function.__name__, msg)) 242 | logging.warn("best parameters found: " + str(popt)) 243 | return False 244 | except Exception as e: 245 | print e 246 | tb = traceback.format_exc() 247 | print tb 248 | return False 249 | 250 | def fit_bfgs(self, x, y, weights, start_from_default): 251 | try: 252 | def objective(params): 253 | if weights is None: 254 | if self.recency_weighting: 255 | return np.sum(recency_weights(len(y))*(self.function(x, *params) - y)**2) 256 | else: 257 | return np.sum((self.function(x, *params) - y)**2) 258 | else: 259 | if self.recency_weighting: 260 | return np.sum(weights * recency_weights(len(y)) * (self.function(x, *params) - y)**2) 261 | else: 262 | return np.sum(weights * (self.function(x, *params) - y)**2) 263 | bounds = [] 264 | for param_name in self.function_params: 265 | if param_name in self.min_vals and param_name in self.max_vals: 266 | bounds.append((self.min_vals[param_name], self.max_vals[param_name])) 267 | elif param_name in self.min_vals: 268 | bounds.append((self.min_vals[param_name], None)) 269 | elif param_name in self.max_vals: 270 | bounds.append((None, self.max_vals[param_name])) 271 | else: 272 | bounds.append((None, None)) 273 | 274 | if start_from_default: 275 | initial_params = self.default_function_param_array() 276 | else: 277 | initial_params, _ = self.split_theta_to_array(self.ml_params) 278 | 279 | popt, fval, info= fmin_l_bfgs_b(objective, 280 | x0=initial_params, 281 | bounds=bounds, 282 | approx_grad=True) 283 | if info["warnflag"] != 0: 284 | logging.warn("BFGS not converged! (warnflag %d) for model %s" % (info["warnflag"], self.name)) 285 | logging.warn(info) 286 | return False 287 | 288 | if popt is None: 289 | return False 290 | if any(np.isnan(popt)): 291 | logging.info("bfgs NOT successful for model %s, parameter NaN" % self.name) 292 | return False 293 | sigma = self.ml_sigma(x, y, popt, weights) 294 | self.ml_params = np.append(popt, [sigma]) 295 | logging.info("bfgs successful for model %s" % self.name) 296 | return True 297 | except: 298 | return False 299 | 300 | def aic(self, x, y): 301 | """ 302 | Akaike information criterion 303 | http://en.wikipedia.org/wiki/Akaike_information_criterion 304 | """ 305 | params, sigma = self.split_theta_to_array(self.ml_params) 306 | y_model = self.function(x, *params) 307 | log_likelihood = norm.logpdf(y-y_model, loc=0, scale=sigma).sum() 308 | return 2 * len(self.function_params) - 2 * log_likelihood 309 | 310 | 311 | 312 | class MCMCCurveModel(CurveModel): 313 | """ 314 | MLE curve fitting + MCMC sampling with uniform priors for parameter uncertainty. 315 | 316 | Model: y ~ f(x) + eps with eps ~ N(0, sigma^2) 317 | """ 318 | def __init__(self, 319 | function, 320 | function_der=None, 321 | min_vals={}, 322 | max_vals={}, 323 | default_vals={}, 324 | burn_in=300, 325 | nwalkers=100, 326 | nsamples=800, 327 | nthreads=1, 328 | recency_weighting=False): 329 | """ 330 | function: the function to be fit 331 | function_der: derivative of that function 332 | """ 333 | super(MCMCCurveModel, self).__init__( 334 | function=function, 335 | function_der=function_der, 336 | min_vals=min_vals, 337 | max_vals=max_vals, 338 | default_vals=default_vals) 339 | self.ml_curve_model = MLCurveModel( 340 | function=function, 341 | function_der=function_der, 342 | min_vals=self.min_vals, 343 | max_vals=self.max_vals, 344 | default_vals=self.default_vals, 345 | recency_weighting=recency_weighting) 346 | 347 | #TODO: have two burn-ins, one for when the ML fitting is successful and one for when not! 348 | self.burn_in = burn_in 349 | self.nwalkers = nwalkers 350 | self.nsamples = nsamples 351 | self.nthreads = nthreads 352 | self.recency_weighting = recency_weighting 353 | 354 | def fit(self, x, y): 355 | try: 356 | if self.ml_curve_model.fit(x, y): 357 | logging.info("ML fit: " + str(self.ml_curve_model.ml_params)) 358 | self.fit_mcmc(x, y) 359 | return True 360 | else: 361 | return False 362 | except Exception as e: 363 | print e 364 | tb = traceback.format_exc() 365 | print tb 366 | return False 367 | 368 | #priors 369 | def ln_prior(self, theta): 370 | """ 371 | log-prior is (up to a constant) 372 | """ 373 | if self.are_params_in_bounds(theta): 374 | return 0.0 375 | else: 376 | return -np.inf 377 | 378 | #likelihood 379 | def ln_likelihood(self, theta, x, y): 380 | """ 381 | y = y_true + y_noise 382 | with y_noise ~ N(0, sigma^2) 383 | """ 384 | params, sigma = self.split_theta(theta) 385 | y_model = self.function(x, **params) 386 | if self.recency_weighting: 387 | weight = recency_weights(len(y)) 388 | ln_likelihood = (weight*norm.logpdf(y-y_model, loc=0, scale=sigma)).sum() 389 | else: 390 | ln_likelihood = norm.logpdf(y-y_model, loc=0, scale=sigma).sum() 391 | if np.isnan(ln_likelihood): 392 | return -np.inf 393 | else: 394 | return ln_likelihood 395 | 396 | def ln_prob(self, theta, x, y): 397 | """ 398 | posterior probability 399 | """ 400 | lp = self.ln_prior(theta) 401 | if not np.isfinite(lp): 402 | return -np.inf 403 | return lp + self.ln_likelihood(theta, x, y) 404 | 405 | def fit_mcmc(self, x, y): 406 | #initialize in an area around the starting position 407 | #pos = [start + 1e-4*np.random.randn(self.ndim) for i in range(self.nwalkers)] 408 | assert self.ml_curve_model.ml_params is not None 409 | pos = [self.ml_curve_model.ml_params + 1e-6*np.random.randn(self.ndim) for i in range(self.nwalkers)] 410 | if self.nthreads <= 1: 411 | sampler = emcee.EnsembleSampler(self.nwalkers, 412 | self.ndim, 413 | self.ln_prob, 414 | args=(x, y)) 415 | else: 416 | sampler = emcee.EnsembleSampler( 417 | self.nwalkers, 418 | self.ndim, 419 | model_ln_prob, 420 | args=(self, x, y), 421 | threads=self.nthreads) 422 | sampler.run_mcmc(pos, self.nsamples) 423 | self.mcmc_chain = sampler.chain 424 | 425 | def get_burned_in_samples(self): 426 | samples = self.mcmc_chain[:, self.burn_in:, :].reshape((-1, self.ndim)) 427 | return samples 428 | 429 | def predictive_distribution(self, x, thin=1): 430 | assert isinstance(x, float) or isinstance(x, int) 431 | samples = self.get_burned_in_samples() 432 | predictions = [] 433 | for theta in samples[::thin]: 434 | params, sigma = self.split_theta(theta) 435 | predictions.append(self.function(x, **params)) 436 | return np.asarray(predictions) 437 | 438 | def predictive_ln_prob_distribution(self, x, y, thin=1): 439 | """ 440 | posterior log p(y|x,D) for each sample 441 | """ 442 | #assert isinstance(x, float) or isinstance(x, int) 443 | samples = self.get_burned_in_samples() 444 | ln_probs = [] 445 | for theta in samples[::thin]: 446 | ln_prob = self.ln_likelihood(theta, x, y) 447 | ln_probs.append(ln_prob) 448 | return np.asarray(ln_probs) 449 | 450 | def posterior_ln_prob(self, x, y, thin=10): 451 | """ 452 | posterior log p(y|x,D) 453 | 454 | 1/S sum p(y|D,theta_s) 455 | equivalent to: 456 | logsumexp(log p(y|D,theta_s)) - log(S) 457 | """ 458 | assert not np.isscalar(x) 459 | assert not np.isscalar(y) 460 | x = np.asarray(x) 461 | y = np.asarray(y) 462 | ln_probs = self.predictive_ln_prob_distribution(x, y) 463 | #print ln_probs 464 | #print np.max(ln_probs) 465 | #print np.min(ln_probs) 466 | #print np.mean(ln_probs) 467 | #print "logsumexp(ln_probs)", logsumexp(ln_probs) 468 | #print "np.log(len(ln_probs)) ", np.log(len(ln_probs)) 469 | #print logsumexp(ln_probs) - np.log(len(ln_probs)) 470 | return logsumexp(ln_probs) - np.log(len(ln_probs)) 471 | 472 | def predict(self, x): 473 | """ 474 | E[f(x)] 475 | """ 476 | predictions = self.predictive_distribution(x) 477 | return np.ma.masked_invalid(predictions).mean() 478 | 479 | def predictive_density(self, x_pos, x_density): 480 | density = kde.gaussian_kde(self.predictive_distribution(x_pos)) 481 | return density(x_density) 482 | 483 | def prob_x_greater_than(self, x, y, theta): 484 | """ 485 | P(f(x) > y | Data, theta) 486 | """ 487 | params, sigma = self.split_theta(theta) 488 | mu = self.function(x, **params) 489 | cdf = norm.cdf(y, loc=mu, scale=sigma) 490 | return 1. - cdf 491 | 492 | def posterior_mean_prob_x_greater_than(self, x, y, thin=1): 493 | """ 494 | P(E[f(x)] > E[y] | Data) 495 | 496 | thin: only use every thin'th sample 497 | 498 | Posterior probability that the expected valuef(x) is greater than 499 | the expected value of y. 500 | """ 501 | posterior_distribution = self.predictive_distribution(x, thin) 502 | return masked_mean_x_greater_than(posterior_distribution, y) 503 | 504 | 505 | def posterior_prob_x_greater_than(self, x, y, thin=1): 506 | """ 507 | P(f(x) > y | Data) 508 | 509 | Posterior probability that f(x) is greater than y. 510 | """ 511 | assert isinstance(x, float) or isinstance(x, int) 512 | assert isinstance(y, float) or isinstance(y, int) 513 | probs = [] 514 | samples = self.get_burned_in_samples() 515 | for theta in samples[::thin]: 516 | probs.append(self.prob_x_greater_than(x, y, theta)) 517 | 518 | return np.ma.masked_invalid(probs).mean() 519 | 520 | def posterior_log_likelihoods(self, x, y): 521 | #DEPRECATED! 522 | samples = self.get_burned_in_samples() 523 | log_likelihoods = [] 524 | for theta in samples: 525 | params, sigma = self.split_theta(theta) 526 | log_likelihood = self.ln_likelihood(theta, x, y) 527 | #TODO: rather add a -np.inf? 528 | if not np.isnan(log_likelihood) and np.isfinite(log_likelihood): 529 | log_likelihoods.append(log_likelihood) 530 | return log_likelihoods 531 | 532 | def mean_posterior_log_likelihood(self, x, y): 533 | #DEPRECATED! 534 | return np.ma.masked_invalid(self.posterior_log_likelihoods(x, y)).mean() 535 | 536 | def median_posterior_log_likelihood(self, x, y): 537 | #DEPRECATED! 538 | masked_x = np.ma.masked_invalid(self.posterior_log_likelihoods(x, y)) 539 | return np.ma.extras.median(masked_x) 540 | 541 | def max_posterior_log_likelihood(self, x, y): 542 | #DEPRECATED! 543 | return np.ma.masked_invalid(self.posterior_log_likelihoods(x, y)).max() 544 | 545 | def posterior_log_likelihood(self, x, y): 546 | #DEPRECATED! 547 | return self.median_posterior_log_likelihood(x, y) 548 | 549 | def predictive_std(self, x, thin=1): 550 | """ 551 | sqrt(Var[f(x)]) 552 | """ 553 | predictions = self.predictive_distribution(x, thin) 554 | return np.ma.masked_invalid(predictions).std() 555 | 556 | def dic(self, x, y): 557 | """ Deviance Information Criterion. """ 558 | samples = self.get_burned_in_samples() 559 | deviances = [] 560 | for theta in samples: 561 | params, sigma = self.split_theta(theta) 562 | deviance = -2 * self.ln_likelihood(theta, x, y) 563 | deviances.append(deviance) 564 | mean_theta = samples.mean(axis=0) 565 | theta_mean_deviance = -2 * self.ln_likelihood(mean_theta, x, y) 566 | DIC = 2 * np.mean(deviances) - theta_mean_deviance 567 | return DIC 568 | 569 | 570 | class LinearCurveModel(CurveModel): 571 | """ 572 | Fits a function f(x) = a * x + b using OLS. 573 | """ 574 | 575 | def __init__(self, *arg, **kwargs): 576 | if "default_vals" in kwargs: 577 | logging.warn("default values not needed for the linear model.") 578 | kwargs["default_vals"] = {"a": 0, "b": 0} 579 | kwargs["min_vals"] = {"a": 0} 580 | super(LinearCurveModel, self).__init__( 581 | function=all_models["linear"], 582 | *arg, 583 | **kwargs) 584 | 585 | def fit(self, x, y, weights=None, start_from_default=True): 586 | return self.fit_ml(x, y, weights) 587 | 588 | def fit_ml(self, x, y, weights): 589 | """ 590 | Ordinary Least Squares fit. 591 | 592 | TODO: use the weights! 593 | """ 594 | #TODO: check if the results agree with the minimum/maximum values! 595 | X = np.asarray([np.ones(len(x)), x]).T 596 | bh = np.dot(np.linalg.inv(np.dot(X.T,X)),np.dot(X.T,y)) 597 | a = bh[1] 598 | b = bh[0] 599 | sigma = (y-self.function(x, a, b)).std() 600 | self.ml_params = np.asarray([a, b, sigma]) 601 | return True 602 | 603 | def predict(self, x): 604 | a = self.ml_params[0] 605 | b = self.ml_params[1] 606 | return a * x + b 607 | 608 | 609 | class LinearMCMCCurveModel(MCMCCurveModel): 610 | def __init__(self, **kwargs): 611 | ml_curve_model = LinearCurveModel() 612 | super(LinearMCMCCurveModel, self).__init__( 613 | function=ml_curve_model.function, 614 | min_vals=ml_curve_model.min_vals, 615 | max_vals=ml_curve_model.max_vals, 616 | default_vals=ml_curve_model.default_vals, 617 | **kwargs) 618 | self.ml_curve_model = ml_curve_model 619 | 620 | 621 | def model_ln_prob(theta, model, x, y): 622 | return model.ln_prob(theta, x, y) 623 | 624 | 625 | class MCMCCurveModelCombination(object): 626 | 627 | def __init__(self, 628 | ml_curve_models, 629 | xlim, 630 | burn_in=500, 631 | nwalkers=100, 632 | nsamples=2500, 633 | normalize_weights=True, 634 | monotonicity_constraint=True, 635 | soft_monotonicity_constraint=False, 636 | initial_model_weight_ml_estimate=False, 637 | normalized_weights_initialization="constant", 638 | strictly_positive_weights=True, 639 | sanity_check_prior=True, 640 | nthreads=1, 641 | recency_weighting=True): 642 | """ 643 | xlim: the point on the x axis we eventually want to make predictions for. 644 | """ 645 | self.ml_curve_models = ml_curve_models 646 | self.xlim = xlim 647 | self.burn_in = burn_in 648 | self.nwalkers = nwalkers 649 | self.nsamples = nsamples 650 | self.normalize_weights = normalize_weights 651 | assert not (monotonicity_constraint and soft_monotonicity_constraint), "choose either the monotonicity_constraint or the soft_monotonicity_constraint, but not both" 652 | self.monotonicity_constraint = monotonicity_constraint 653 | self.soft_monotonicity_constraint = soft_monotonicity_constraint 654 | self.initial_model_weight_ml_estimate = initial_model_weight_ml_estimate 655 | self.normalized_weights_initialization = normalized_weights_initialization 656 | self.strictly_positive_weights = strictly_positive_weights 657 | self.sanity_check_prior = sanity_check_prior 658 | self.nthreads = nthreads 659 | self.recency_weighting = recency_weighting 660 | #the constant used for initializing the parameters in a ball around the ML parameters 661 | self.rand_init_ball = 1e-6 662 | self.name = "model combination"# (%s)" % ", ".join([model.name for model in self.ml_curve_models]) 663 | 664 | def fit(self, x, y, model_weights=None): 665 | if self.fit_ml_individual(x, y, model_weights): 666 | #run MCMC: 667 | self.fit_mcmc(x, y) 668 | return True 669 | else: 670 | print "fit_ml_individual failed" 671 | return False 672 | 673 | def y_lim_sanity_check(self, ylim): 674 | # just make sure that the prediction is not below 0 nor insanely big 675 | # HOWEVER: there might be cases where some models might predict value larger than 1.0 676 | # and this is alright, because in those cases we don't necessarily want to stop a run. 677 | if not np.isfinite(ylim) or ylim < 0. or ylim > 100.0: 678 | return False 679 | else: 680 | return True 681 | 682 | def fit_ml_individual(self, x, y, model_weights): 683 | """ 684 | Do a ML fit for each model individually and then another ML fit for the combination of models. 685 | """ 686 | self.fit_models = [] 687 | for model in self.ml_curve_models: 688 | if model.fit(x, y): 689 | ylim = model.predict(self.xlim) 690 | if not self.y_lim_sanity_check(ylim): 691 | print "ML fit of model %s is out of bound range [0.0, 100.] at xlim." % (model.function.__name__) 692 | continue 693 | params, sigma = model.split_theta_to_array(model.ml_params) 694 | if not np.isfinite(self.ln_model_prior(model, params)): 695 | print "ML fit of model %s is not supported by prior." % model.function.__name__ 696 | continue 697 | self.fit_models.append(model) 698 | 699 | if len(self.fit_models) == 0: 700 | return False 701 | 702 | if model_weights is None: 703 | if self.normalize_weights: 704 | if self.normalized_weights_initialization == "constant": 705 | #initialize with a constant value 706 | #we will sample in this unnormalized space and then later normalize 707 | model_weights = [10. for model in self.fit_models] 708 | else:# self.normalized_weights_initialization == "normalized" 709 | model_weights = [1./len(self.fit_models) for model in self.fit_models] 710 | else: 711 | if self.initial_model_weight_ml_estimate: 712 | model_weights = self.get_ml_model_weights(x, y) 713 | print model_weights 714 | non_zero_fit_models = [] 715 | non_zero_weights = [] 716 | for w, model in zip(model_weights, self.fit_models): 717 | if w > 1e-4: 718 | non_zero_fit_models.append(model) 719 | non_zero_weights.append(w) 720 | self.fit_models = non_zero_fit_models 721 | model_weights = non_zero_weights 722 | else: 723 | model_weights = [1./len(self.fit_models) for model in self.fit_models] 724 | 725 | #build joint ml estimated parameter vector 726 | model_params = [] 727 | all_model_params = [] 728 | for model in self.fit_models: 729 | params, sigma = model.split_theta_to_array(model.ml_params) 730 | model_params.append(params) 731 | all_model_params.extend(params) 732 | 733 | y_predicted = self.predict_given_params(x, model_params, model_weights) 734 | sigma = (y - y_predicted).std() 735 | 736 | self.ml_params = self.join_theta(all_model_params, sigma, model_weights) 737 | self.ndim = len(self.ml_params) 738 | if self.nwalkers < 2*self.ndim: 739 | self.nwalkers = 2*self.ndim 740 | print "warning: increasing number of walkers to 2*ndim=%d" % (self.nwalkers) 741 | return True 742 | 743 | 744 | def get_ml_model_weights(self, x, y_target): 745 | """ 746 | Get the ML estimate of the model weights. 747 | """ 748 | 749 | """ 750 | Take all the models that have been fit using ML. 751 | For each model we get a prediction of y: y_i 752 | 753 | Now how can we combine those to reduce the squared error: 754 | 755 | argmin_w (y_target - w_1 * y_1 - w_2 * y_2 - w_3 * y_3 ...) 756 | 757 | Deriving and setting to zero we get a linear system of equations that we need to solve. 758 | 759 | 760 | Resource on QP: 761 | http://stats.stackexchange.com/questions/21565/how-do-i-fit-a-constrained-regression-in-r-so-that-coefficients-total-1 762 | http://maggotroot.blogspot.de/2013/11/constrained-linear-least-squares-in.html 763 | """ 764 | num_models = len(self.fit_models) 765 | y_predicted = [] 766 | b = [] 767 | for model in self.fit_models: 768 | y_model = model.predict(x) 769 | y_predicted.append(y_model) 770 | b.append(y_model.dot(y_target)) 771 | a = np.zeros((num_models, num_models)) 772 | for i in range(num_models): 773 | for j in range(num_models): 774 | a[i, j] = y_predicted[i].dot(y_predicted[j]) 775 | #if i == j: 776 | # a[i, j] -= 0.1 #constraint the weights! 777 | a_rank = np.linalg.matrix_rank(a) 778 | if a_rank != num_models: 779 | print "Rank %d not sufficcient for solving the linear system. %d needed at least." % (a_rank, num_models) 780 | try: 781 | print np.linalg.lstsq(a, b)[0] 782 | print np.linalg.solve(a, b) 783 | print nnls(a, b)[0] 784 | ##return np.linalg.solve(a, b) 785 | weights = nnls(a, b)[0] 786 | #weights = [w if w > 1e-4 else 1e-4 for w in weights] 787 | return weights 788 | #except LinAlgError as e: 789 | except: 790 | return [1./len(self.fit_models) for model in self.fit_models] 791 | 792 | 793 | #priors 794 | def ln_prior(self, theta): 795 | ln = 0 796 | model_params, sigma, model_weights = self.split_theta(theta) 797 | for model, params in zip(self.fit_models, model_params): 798 | ln += self.ln_model_prior(model, params) 799 | #if self.normalize_weights: 800 | #when we normalize we expect all weights to be positive 801 | #we expect all weights to be positive 802 | if self.strictly_positive_weights and np.any(model_weights < 0): 803 | return -np.inf 804 | return ln 805 | 806 | 807 | def ln_model_prior(self, model, params): 808 | if not model.are_params_in_bounds(params): 809 | return -np.inf 810 | if self.monotonicity_constraint: 811 | #check for monotonicity(this obviously this is a hack, but it works for now): 812 | x_mon = np.linspace(2, self.xlim, 100) 813 | y_mon = model.function(x_mon, *params) 814 | if np.any(np.diff(y_mon) < 0): 815 | return -np.inf 816 | elif self.soft_monotonicity_constraint: 817 | #soft monotonicity: defined as the last value being bigger than the first one 818 | x_mon = np.asarray([2, self.xlim]) 819 | y_mon = model.function(x_mon, *params) 820 | if y_mon[0] > y_mon[-1]: 821 | return -np.inf 822 | ylim = model.function(self.xlim, *params) 823 | #sanity check for ylim 824 | if self.sanity_check_prior and not self.y_lim_sanity_check(ylim): 825 | return -np.inf 826 | else: 827 | return 0.0 828 | 829 | #likelihood 830 | def ln_likelihood(self, theta, x, y): 831 | y_model, sigma = self.predict_given_theta(x, theta) 832 | 833 | if self.recency_weighting: 834 | weight = recency_weights(len(y)) 835 | ln_likelihood = (weight*norm.logpdf(y-y_model, loc=0, scale=sigma)).sum() 836 | else: 837 | ln_likelihood = norm.logpdf(y-y_model, loc=0, scale=sigma).sum() 838 | 839 | if np.isnan(ln_likelihood): 840 | return -np.inf 841 | else: 842 | return ln_likelihood 843 | 844 | def ln_prob(self, theta, x, y): 845 | """ 846 | posterior probability 847 | """ 848 | lp = self.ln_prior(theta) 849 | if not np.isfinite(lp): 850 | return -np.inf 851 | return lp + self.ln_likelihood(theta, x, y) 852 | 853 | def split_theta(self, theta): 854 | """ 855 | theta is structured as follows: 856 | for each model i 857 | for each model parameter j 858 | theta = (theta_ij, sigma, w_i) 859 | """ 860 | num_models = len(self.fit_models) 861 | 862 | model_weights = theta[-len(self.fit_models):] 863 | 864 | all_model_params = [] 865 | for model in self.fit_models: 866 | num_model_params = len(model.function_params) 867 | model_params = theta[:num_model_params] 868 | all_model_params.append(model_params) 869 | 870 | theta = theta[num_model_params:] 871 | sigma = theta[0] 872 | model_weights = theta[1:] 873 | assert len(model_weights) == len(self.fit_models) 874 | return all_model_params, sigma, model_weights 875 | 876 | 877 | def join_theta(self, model_params, sigma, model_weights): 878 | #assert len(model_params) == len(model_weights) 879 | theta = [] 880 | theta.extend(model_params) 881 | theta.append(sigma) 882 | theta.extend(model_weights) 883 | return theta 884 | 885 | def fit_mcmc(self, x, y): 886 | #initialize in an area around the starting position 887 | 888 | assert self.ml_params is not None 889 | pos = [self.ml_params + self.rand_init_ball*np.random.randn(self.ndim) for i in range(self.nwalkers)] 890 | 891 | if self.nthreads <= 1: 892 | sampler = emcee.EnsembleSampler(self.nwalkers, 893 | self.ndim, 894 | self.ln_prob, 895 | args=(x, y)) 896 | else: 897 | sampler = emcee.EnsembleSampler( 898 | self.nwalkers, 899 | self.ndim, 900 | model_ln_prob, 901 | args=(self, x, y), 902 | threads=self.nthreads) 903 | sampler.run_mcmc(pos, self.nsamples) 904 | self.mcmc_chain = sampler.chain 905 | 906 | if self.normalize_weights: 907 | self.normalize_chain_model_weights() 908 | 909 | def normalize_chain_model_weights(self): 910 | """ 911 | In the chain we sample w_1,... w_i however we are interested in the model 912 | probabilities p_1,... p_i 913 | """ 914 | model_weights_chain = self.mcmc_chain[:,:,-len(self.fit_models):] 915 | model_probabilities_chain = model_weights_chain / model_weights_chain.sum(axis=2)[:,:,np.newaxis] 916 | #replace in chain 917 | self.mcmc_chain[:,:,-len(self.fit_models):] = model_probabilities_chain 918 | 919 | def get_burned_in_samples(self): 920 | samples = self.mcmc_chain[:, self.burn_in:, :].reshape((-1, self.ndim)) 921 | return samples 922 | 923 | def print_probs(self): 924 | burned_in_chain = self.get_burned_in_samples() 925 | model_probabilities = burned_in_chain[:,-len(self.fit_models):] 926 | print model_probabilities.mean(axis=0) 927 | 928 | def predict_given_theta(self, x, theta): 929 | """ 930 | returns y_predicted, sigma 931 | """ 932 | model_params, sigma, model_weights = self.split_theta(theta) 933 | 934 | y_predicted = self.predict_given_params(x, model_params, model_weights) 935 | 936 | return y_predicted, sigma 937 | 938 | def predict_given_params(self, x, model_params, model_weights): 939 | """ 940 | returns y_predicted 941 | """ 942 | if self.normalize_weights: 943 | model_weight_sum = np.sum(model_weights) 944 | model_ws = [weight/model_weight_sum for weight in model_weights] 945 | else: 946 | model_ws = model_weights 947 | 948 | y_model = [] 949 | for model, model_w, params in zip(self.fit_models, model_ws, model_params): 950 | y_model.append(model_w*model.function(x, *params)) 951 | y_predicted = reduce(lambda a, b: a+b, y_model) 952 | return y_predicted 953 | 954 | def prob_x_greater_than(self, x, y, theta): 955 | """ 956 | P(f(x) > y | Data, theta) 957 | """ 958 | model_params, sigma, model_weights = self.split_theta(theta) 959 | 960 | y_predicted = self.predict_given_params(x, model_params, model_weights) 961 | 962 | cdf = norm.cdf(y, loc=y_predicted, scale=sigma) 963 | return 1. - cdf 964 | 965 | def posterior_prob_x_greater_than(self, x, y, thin=1): 966 | """ 967 | P(f(x) > y | Data) 968 | 969 | thin: only use every thin'th sample 970 | 971 | Posterior probability that f(x) is greater than y. 972 | """ 973 | assert isinstance(x, float) or isinstance(x, int) 974 | assert isinstance(y, float) or isinstance(y, int) 975 | probs = [] 976 | samples = self.get_burned_in_samples() 977 | for theta in samples[::thin]: 978 | probs.append(self.prob_x_greater_than(x, y, theta)) 979 | return np.ma.masked_invalid(probs).mean() 980 | 981 | 982 | def posterior_mean_prob_x_greater_than(self, x, y, thin=1): 983 | """ 984 | P(E[f(x)] > E[y] | Data) 985 | 986 | thin: only use every thin'th sample 987 | 988 | Posterior probability that the expected valuef(x) is greater than 989 | the expected value of y. 990 | """ 991 | posterior_distribution = self.predictive_distribution(x, thin) 992 | return masked_mean_x_greater_than(posterior_distribution, y) 993 | 994 | 995 | def predictive_distribution(self, x, thin=1): 996 | assert isinstance(x, float) or isinstance(x, int) 997 | samples = self.get_burned_in_samples() 998 | predictions = [] 999 | for theta in samples[::thin]: 1000 | model_params, sigma, model_weights = self.split_theta(theta) 1001 | y_predicted = self.predict_given_params(x, model_params, model_weights) 1002 | predictions.append(y_predicted) 1003 | return np.asarray(predictions) 1004 | 1005 | def predictive_ln_prob_distribution(self, x, y, thin=1): 1006 | """ 1007 | posterior log p(y|x,D) for each sample 1008 | """ 1009 | #assert isinstance(x, float) or isinstance(x, int) 1010 | samples = self.get_burned_in_samples() 1011 | ln_probs = [] 1012 | for theta in samples[::thin]: 1013 | ln_prob = self.ln_likelihood(theta, x, y) 1014 | ln_probs.append(ln_prob) 1015 | return np.asarray(ln_probs) 1016 | 1017 | def posterior_ln_prob(self, x, y, thin=10): 1018 | """ 1019 | posterior log p(y|x,D) 1020 | 1021 | 1/S sum p(y|D,theta_s) 1022 | equivalent to: 1023 | logsumexp(log p(y|D,theta_s)) - log(S) 1024 | """ 1025 | assert not np.isscalar(x) 1026 | assert not np.isscalar(y) 1027 | x = np.asarray(x) 1028 | y = np.asarray(y) 1029 | ln_probs = self.predictive_ln_prob_distribution(x, y) 1030 | return logsumexp(ln_probs) - np.log(len(ln_probs)) 1031 | 1032 | def predict(self, x, thin=1): 1033 | """ 1034 | E[f(x)] 1035 | """ 1036 | predictions = self.predictive_distribution(x, thin) 1037 | return np.ma.masked_invalid(predictions).mean() 1038 | 1039 | def predictive_std(self, x, thin=1): 1040 | """ 1041 | sqrt(Var[f(x)]) 1042 | """ 1043 | predictions = self.predictive_distribution(x, thin) 1044 | return np.ma.masked_invalid(predictions).std() 1045 | 1046 | def serialize(self, fname): 1047 | import pickle 1048 | pickle.dump(self, open(fname, "wb")) 1049 | 1050 | 1051 | class MlCurveMixtureModel(object): 1052 | """ 1053 | Maximum Likelihood fit of a convex combination of curve models 1054 | using the Expectation Maxization algorithm. 1055 | 1056 | http://www.slideshare.net/butest/lecture-18-gaussian-mixture-models-and-expectation-maximization 1057 | http://melodi.ee.washington.edu/people/bilmes/mypapers/em.pdf 1058 | http://www.igi.tugraz.at/lehre/MLA/WS07/chapter9.pdf 1059 | 1060 | With Dirichlet prior: 1061 | ftp://tlp.limsi.fr/public/map93.pdf 1062 | 1063 | Finite Mixture Model with Dirichlet Distribution 1064 | http://blog.datumbox.com/finite-mixture-model-based-on-dirichlet-distribution/ 1065 | 1066 | Variational Bayesian Gaussian Mixture Model (VBGMM) 1067 | http://kittipatkampa.wordpress.com/2010/10/14/variational-bayesian-gaussian-mixture-model-vbgmm/ 1068 | """ 1069 | def __init__(self, ml_curve_models): 1070 | self.ml_curve_models = ml_curve_models 1071 | 1072 | def fit(self, x, y, num_iter=1): 1073 | fit_models = [] 1074 | for model in self.ml_curve_models: 1075 | if model.fit(x, y, start_from_default=True): 1076 | fit_models.append(model) 1077 | model_weights = [1./len(fit_models) for m in fit_models] 1078 | if len(fit_models) == 0: 1079 | return False 1080 | try: 1081 | for i in range(0, num_iter): 1082 | #E-step: 1083 | responsibilities = [] 1084 | for model_weight, model in zip(model_weights, fit_models): 1085 | #responsibilities.append(0.000001 + model_weight * model.likelihood(x, y)) 1086 | #responsibilities.append(0.0001 + model_weight * model.likelihood(x, y)) 1087 | responsibilities.append(0.000001 + model_weight * model.likelihood(x, y)) 1088 | responsibilities = np.asarray(responsibilities) 1089 | #normalize: 1090 | responsibilities = responsibilities / responsibilities.sum(axis=0) 1091 | 1092 | #M-step: 1093 | previous_fit_model_weights = responsibilities.mean(axis=1) 1094 | new_fit_models = [] 1095 | model_weights = [] 1096 | for model_idx, model in enumerate(fit_models): 1097 | if (previous_fit_model_weights[model_idx] > 0.000001 1098 | and model.fit(x, y, responsibilities[model_idx, :], start_from_default=False)): 1099 | new_fit_models.append(model) 1100 | #model_weights.append(previous_fit_model_weights[model_idx]) 1101 | model_weights.append(0.01 + previous_fit_model_weights[model_idx]) 1102 | model_weights = np.asarray(model_weights) 1103 | #renormalize (in case a model couldn't be fit anymore) 1104 | model_weights = model_weights / model_weights.sum() 1105 | fit_models = new_fit_models 1106 | for model_weight, model in zip(model_weights, fit_models): 1107 | logging.debug("%s %f" % (model.function.__name__, model_weight)) 1108 | 1109 | #print model_weights 1110 | self.model_weights = model_weights 1111 | self.fit_models = fit_models 1112 | return True 1113 | except: 1114 | return False 1115 | 1116 | def predict(self, x): 1117 | y_predicted = None 1118 | for model_weight, model in zip(self.model_weights, self.fit_models): 1119 | if y_predicted is None: 1120 | y_predicted = model_weight * model.predict(x) 1121 | else: 1122 | y_predicted += model_weight * model.predict(x) 1123 | return y_predicted 1124 | 1125 | 1126 | class MCMCCurveMixtureModel(object): 1127 | 1128 | def __init__(self, 1129 | ml_curve_models, 1130 | xlim, 1131 | burn_in=600, 1132 | nwalkers=80, 1133 | nsamples=5000, 1134 | monotonicity_constraint=True, 1135 | soft_monotonicity_constraint=False, 1136 | nthreads=1, 1137 | recency_weighting=True): 1138 | """ 1139 | xlim: the point on the x axis we eventually want to make predictions for. 1140 | """ 1141 | self.ml_curve_models = ml_curve_models 1142 | self.ml_curve_mixture_model = MlCurveMixtureModel(ml_curve_models) 1143 | self.xlim = xlim 1144 | self.burn_in = burn_in 1145 | self.nwalkers = nwalkers 1146 | self.nsamples = nsamples 1147 | assert not (monotonicity_constraint and soft_monotonicity_constraint), "choose either the monotonicity_constraint or the soft_monotonicity_constraint, but not both" 1148 | self.monotonicity_constraint = monotonicity_constraint 1149 | self.soft_monotonicity_constraint = soft_monotonicity_constraint 1150 | self.nthreads = nthreads 1151 | self.recency_weighting = recency_weighting 1152 | #the constant used for initializing the parameters in a ball around the ML parameters 1153 | self.rand_init_ball = 1e-6 1154 | 1155 | def fit(self, x, y): 1156 | if self.fit_ml_individual(x, y): 1157 | #run MCMC: 1158 | self.fit_mcmc(x, y) 1159 | return True 1160 | else: 1161 | print "fit_ml_individual failed" 1162 | return False 1163 | 1164 | def fit_ml_individual(self, x, y): 1165 | """ 1166 | Do a ML fit for each model individually and then another ML fit for the combination of models. 1167 | """ 1168 | if self.ml_curve_mixture_model.fit(x, y): 1169 | model_weights = self.ml_curve_mixture_model.model_weights 1170 | self.fit_models = self.ml_curve_mixture_model.fit_models 1171 | else: 1172 | self.fit_models = [] 1173 | for model in self.ml_curve_models: 1174 | if model.fit(x, y): 1175 | if np.isfinite(self.ln_model_prior(model, model.ml_params)): 1176 | self.fit_models.append(model) 1177 | else: 1178 | print "ML fit of model %s is not supported by prior." % model.function.__name__ 1179 | model_weights = [10. for model in self.fit_models] 1180 | if len(self.fit_models) == 0: 1181 | return False 1182 | 1183 | #build joint ml estimated parameter vector 1184 | all_model_params = [] 1185 | for model in self.fit_models: 1186 | all_model_params.extend(model.ml_params) 1187 | print "model weights: ", model_weights 1188 | self.ml_params = self.join_theta(all_model_params, model_weights) 1189 | self.ndim = len(self.ml_params) 1190 | return True 1191 | 1192 | #priors 1193 | def ln_prior(self, theta): 1194 | ln = 0 1195 | model_thetas, model_weights = self.split_theta(theta) 1196 | for model, theta in zip(self.fit_models, model_thetas): 1197 | ln += self.ln_model_prior(model, theta) 1198 | #if self.normalize_weights: 1199 | #when we normalize we expect all weights to be positive 1200 | #we expect all weights to be positive 1201 | if np.any(model_weights < 0): 1202 | return -np.inf 1203 | return ln 1204 | 1205 | def ln_model_prior(self, model, theta): 1206 | if not model.are_params_in_bounds(theta): 1207 | return -np.inf 1208 | if self.monotonicity_constraint: 1209 | #check for monotonicity(this obviously this is a hack, but it works for now): 1210 | x_mon = np.linspace(2, self.xlim, 100) 1211 | params, sigma = model.split_theta_to_array(theta) 1212 | y_mon = model.function(x_mon, *params) 1213 | if np.any(np.diff(y_mon) < 0): 1214 | return -np.inf 1215 | elif self.soft_monotonicity_constraint: 1216 | #soft monotonicity: defined as the last value being bigger than the first one 1217 | x_mon = np.asarray([2, self.xlim]) 1218 | y_mon = model.function(x_mon, *params) 1219 | if y_mon[0] > y_mon[-1]: 1220 | return -np.inf 1221 | return 0.0 1222 | 1223 | #likelihood 1224 | def ln_likelihood(self, theta, x, y): 1225 | """ 1226 | """ 1227 | sample_weights = None 1228 | if self.recency_weighting: 1229 | sample_weights = [10**(1./len(y))] * len(y) 1230 | sample_weights = sample_weights**(np.arange(0, len(y))) 1231 | 1232 | model_thetas, model_weights = self.split_theta(theta) 1233 | #normalize the weights 1234 | model_weight_sum = np.sum(model_weights) 1235 | model_weights = [weight/model_weight_sum for weight in model_weights] 1236 | 1237 | ln_likelihoods = [] 1238 | for model, model_theta, model_weight in zip(self.fit_models, model_thetas, model_weights): 1239 | ln_likelihood = np.log(model_weight) 1240 | params, sigma = model.split_theta_to_array(model_theta) 1241 | y_model = model.function(x, *params) 1242 | if sample_weights is None: 1243 | ln_likelihood += norm.logpdf(y-y_model, loc=0, scale=sigma).sum() 1244 | else: 1245 | ln_likelihood += (sample_weights*norm.logpdf(y-y_model, loc=0, scale=sigma)).sum() 1246 | 1247 | ln_likelihoods.append(ln_likelihood) 1248 | 1249 | if np.any(np.isnan(ln_likelihoods)): 1250 | return -np.inf 1251 | else: 1252 | return logsumexp(ln_likelihoods) 1253 | 1254 | def ln_prob(self, theta, x, y): 1255 | """ 1256 | posterior probability 1257 | """ 1258 | lp = self.ln_prior(theta) 1259 | if not np.isfinite(lp): 1260 | return -np.inf 1261 | return lp + self.ln_likelihood(theta, x, y) 1262 | 1263 | def split_theta(self, theta): 1264 | """ 1265 | theta is structured as follows: 1266 | for each model i 1267 | for each model parameter j 1268 | theta = (theta_ij, sigma, w_i) 1269 | """ 1270 | num_models = len(self.fit_models) 1271 | 1272 | model_weights = theta[-len(self.fit_models):] 1273 | 1274 | all_model_params = [] 1275 | for model in self.fit_models: 1276 | num_model_params = len(model.all_param_names) 1277 | model_params = theta[:num_model_params] 1278 | all_model_params.append(model_params) 1279 | 1280 | theta = theta[num_model_params:] 1281 | model_weights = theta 1282 | assert len(model_weights) == len(self.fit_models) 1283 | return all_model_params, model_weights 1284 | 1285 | 1286 | def join_theta(self, model_params, model_weights): 1287 | #assert len(model_params) == len(model_weights) 1288 | theta = [] 1289 | theta.extend(model_params) 1290 | theta.extend(model_weights) 1291 | return theta 1292 | 1293 | def fit_mcmc(self, x, y): 1294 | #initialize in an area around the starting position 1295 | 1296 | assert self.ml_params is not None 1297 | pos = [self.ml_params + self.rand_init_ball*np.random.randn(self.ndim) for i in range(self.nwalkers)] 1298 | 1299 | if self.nthreads <= 1: 1300 | sampler = emcee.EnsembleSampler(self.nwalkers, 1301 | self.ndim, 1302 | self.ln_prob, 1303 | args=(x, y)) 1304 | else: 1305 | sampler = emcee.EnsembleSampler( 1306 | self.nwalkers, 1307 | self.ndim, 1308 | model_ln_prob, 1309 | args=(self, x, y), 1310 | threads=self.nthreads) 1311 | sampler.run_mcmc(pos, self.nsamples) 1312 | self.mcmc_chain = sampler.chain 1313 | 1314 | #we normalize the weights in the chain model, so that the trace plot make more sense 1315 | self.normalize_chain_model_weights() 1316 | 1317 | def normalize_chain_model_weights(self): 1318 | """ 1319 | In the chain we sample w_1,... w_i however we are interested in the model 1320 | probabilities p_1,... p_i 1321 | """ 1322 | model_weights_chain = self.mcmc_chain[:,:,-len(self.fit_models):] 1323 | model_probabilities_chain = model_weights_chain / model_weights_chain.sum(axis=2)[:,:,np.newaxis] 1324 | #replace in chain 1325 | self.mcmc_chain[:,:,-len(self.fit_models):] = model_probabilities_chain 1326 | 1327 | def get_burned_in_samples(self): 1328 | samples = self.mcmc_chain[:, self.burn_in:, :].reshape((-1, self.ndim)) 1329 | return samples 1330 | 1331 | def print_probs(self): 1332 | burned_in_chain = self.get_burned_in_samples() 1333 | model_probabilities = burned_in_chain[:,-len(self.fit_models):] 1334 | print model_probabilities.mean(axis=0) 1335 | 1336 | def predict_given_theta(self, x, theta): 1337 | """ 1338 | returns y_predicted, sigma 1339 | """ 1340 | model_params, model_weights = self.split_theta(theta) 1341 | 1342 | y_predicted = self.predict_given_params(x, model_params, model_weights) 1343 | 1344 | return y_predicted 1345 | 1346 | def predict_given_params(self, x, model_thetas, model_weights): 1347 | """ 1348 | returns y_predicted 1349 | """ 1350 | #normalize the weights 1351 | model_weight_sum = np.sum(model_weights) 1352 | model_ws = [weight/model_weight_sum for weight in model_weights] 1353 | 1354 | y_model = [] 1355 | for model, model_w, theta in zip(self.fit_models, model_ws, model_thetas): 1356 | params, sigma = model.split_theta_to_array(theta) 1357 | y_model.append(model_w*model.function(x, *params)) 1358 | y_predicted = reduce(lambda a, b: a+b, y_model) 1359 | return y_predicted 1360 | 1361 | def prob_x_greater_than(self, x, y, theta): 1362 | """ 1363 | P(f(x) > y | Data, theta) 1364 | """ 1365 | model_params, model_weights = self.split_theta(theta) 1366 | 1367 | y_predicted = self.predict_given_params(x, model_params, model_weights) 1368 | 1369 | cdf = norm.cdf(y, loc=y_predicted, scale=sigma) 1370 | return 1. - cdf 1371 | 1372 | def posterior_prob_x_greater_than(self, x, y, thin=1): 1373 | """ 1374 | P(f(x) > y | Data) 1375 | 1376 | thin: only use every thin'th sample 1377 | 1378 | Posterior probability that f(x) is greater than y. 1379 | """ 1380 | assert isinstance(x, float) or isinstance(x, int) 1381 | assert isinstance(y, float) or isinstance(y, int) 1382 | probs = [] 1383 | samples = self.get_burned_in_samples() 1384 | for theta in samples[::thin]: 1385 | probs.append(self.prob_x_greater_than(x, y, theta)) 1386 | 1387 | return np.ma.masked_invalid(probs).mean() 1388 | 1389 | def predictive_distribution(self, x, thin=1): 1390 | assert isinstance(x, float) or isinstance(x, int) 1391 | samples = self.get_burned_in_samples() 1392 | predictions = [] 1393 | for theta in samples[::thin]: 1394 | model_params, sigma, model_weights = self.split_theta(theta) 1395 | y_predicted = self.predict_given_params(x, model_params, model_weights) 1396 | predictions.append(y_predicted) 1397 | return np.asarray(predictions) 1398 | 1399 | def predict(self, x, thin=1): 1400 | """ 1401 | E[f(x)] 1402 | """ 1403 | predictions = self.predictive_distribution(x, thin) 1404 | return np.ma.masked_invalid(predictions).mean() 1405 | 1406 | def predictive_std(self, x, thin=1): 1407 | """ 1408 | sqrt(Var[f(x)]) 1409 | """ 1410 | predictions = self.predictive_distribution(x, thin) 1411 | return np.ma.masked_invalid(predictions).std() 1412 | 1413 | def serialize(self, fname): 1414 | import pickle 1415 | pickle.dump(self, open(fname, "wb")) 1416 | -------------------------------------------------------------------------------- /pylrpredictor/ensemblecurvemodel.py: -------------------------------------------------------------------------------- 1 | from scipy.misc import logsumexp 2 | from functools import partial 3 | import numpy as np 4 | import time 5 | 6 | 7 | def fit_model(model, x_train, y_train): 8 | success = model.fit(x_train, y_train) 9 | return (success, model) 10 | 11 | def model_log_likelihood(model, x_test, y_test): 12 | return model.posterior_log_likelihood(x_test, y_test) 13 | 14 | def model_posterior_prob_x_greater_than(model, x, y): 15 | return model.posterior_prob_x_greater_than(x, y) 16 | 17 | def train_test_split(x, y, train_fraction): 18 | #split into train/test 19 | if train_fraction > 0.99: 20 | x_train = x 21 | y_train = y 22 | x_test = x 23 | y_test = y 24 | else: 25 | num_train = int(train_fraction * len(x)) 26 | x_train = x[:num_train] 27 | y_train = y[:num_train] 28 | x_test = x[num_train:] 29 | y_test = y[num_train:] 30 | 31 | return x_train, y_train, x_test, y_test 32 | 33 | 34 | class Ensemble(object): 35 | """ 36 | """ 37 | 38 | def __init__(self, models, map=map): 39 | """ 40 | models: ensemble models 41 | map: map function, if multiprocessing is desired 42 | """ 43 | self.all_models = models 44 | self._map = map 45 | self.fit_models = [] 46 | 47 | 48 | class CurveModelEnsemble(Ensemble): 49 | """ 50 | 51 | """ 52 | 53 | def __init__(self, models, map=map): 54 | super(CurveModelEnsemble, self).__init__(models, map) 55 | 56 | def fit(self, x, y, train_fraction=0.8): 57 | assert len(x) == len(y) 58 | 59 | model_log_likelihoods = [] 60 | self.fit_models = [] 61 | 62 | x_train, y_train, x_test, y_test = train_test_split(x, y, train_fraction) 63 | 64 | fit_result = self._map( 65 | partial(fit_model, x_train=x_train, y_train=y_train), 66 | self.all_models) 67 | for success, model in fit_result: 68 | if success: 69 | self.fit_models.append(model) 70 | 71 | if len(self.fit_models) == 0: 72 | logging.warn("EnsembleCurveModel couldn't fit any models!!!") 73 | return False 74 | 75 | model_log_likelihoods = self._map( 76 | partial(model_log_likelihood, x_test=x_test, y_test=y_test), 77 | self.fit_models) 78 | 79 | normalizing_constant = logsumexp(model_log_likelihoods) 80 | 81 | self.model_probabilities = [np.exp(log_lik - normalizing_constant) for log_lik in model_log_likelihoods] 82 | return True 83 | 84 | def posterior_prob_x_greater_than(self, x, y): 85 | """ 86 | The probability under the models that a value y is exceeded at position x. 87 | IMPORTANT: if all models fail, by definition the probability is 1.0 and NOT 0.0 88 | """ 89 | if len(self.fit_models) == 0: 90 | return 1.0 91 | 92 | models_prob_x_greater_than = model_log_likelihoods = self._map( 93 | partial(model_posterior_prob_x_greater_than, x=x, y=y), 94 | self.fit_models) 95 | 96 | overall_prob = 0 97 | for prob_x_greater_than, model_prob in zip(models_prob_x_greater_than, self.model_probabilities): 98 | overall_prob += model_prob * prob_x_greater_than 99 | return overall_prob 100 | 101 | def posterior_log_likelihood(self, x, y): 102 | log_liks = [] 103 | for model, model_prob in zip(self.fit_models, self.model_probabilities): 104 | log_lik = model.posterior_log_likelihood(x, y) 105 | log_liks.append(np.log(model_prob) + log_lik) 106 | return logsumexp(log_liks) 107 | 108 | def predict(self, x): 109 | if np.isscalar(x): 110 | y = 0 111 | else: 112 | y = np.zeros(x.shape) 113 | for model, model_prob in zip(self.fit_models, self.model_probabilities): 114 | y += model_prob * model.predict(x) 115 | return y 116 | 117 | def __str__(self): 118 | ret = [] 119 | model_names = [model.function.__name__ for model in self.fit_models] 120 | for model_prob, model_name in zip(self.model_probabilities, model_names): 121 | ret.append("%s: %f\n" % (model_name, model_prob)) 122 | return "".join(ret) 123 | 124 | def serialize(self, fname): 125 | import pickle 126 | map_tmp = self._map 127 | self._map = None 128 | pickle.dump(self, open(fname, "wb")) 129 | self._map = map_tmp 130 | 131 | 132 | 133 | class CurveEnsemble(Ensemble): 134 | """ 135 | 1. MCMC fitting 136 | 2. Now take each theta as a model 137 | 3. Make predictions as an weighted average of those models 138 | The weight is the (normalized) likelihood of some held out validation data. 139 | """ 140 | 141 | def __init__(self, models, map=map): 142 | super(CurveEnsemble, self).__init__(models, map) 143 | 144 | def fit(self, x, y, train_fraction=0.8): 145 | assert len(x) == len(y) 146 | 147 | model_log_likelihoods = [] 148 | self.fit_models = [] 149 | 150 | x_train, y_train, x_test, y_test = train_test_split(x, y, train_fraction) 151 | 152 | fit_result = self._map( 153 | partial(fit_model, x_train=x_train, y_train=y_train), 154 | self.all_models) 155 | for success, model in fit_result: 156 | if success: 157 | self.fit_models.append(model) 158 | 159 | if len(self.fit_models) == 0: 160 | logging.warn("EnsembleCurveModel couldn't fit any models!!!") 161 | return 162 | 163 | #Now we interpret each theta as a separate model 164 | #TODO: parallelize! 165 | all_log_likelihoods = [] 166 | for model in self.fit_models: 167 | model_log_likelihoods = [] 168 | thetas = model.get_burned_in_samples() 169 | for theta_idx in range(thetas.shape[0]): 170 | theta = thetas[theta_idx,:] 171 | log_likelihood = model.ln_likelihood(theta, x_test, y_test) 172 | model_log_likelihoods.append(log_likelihood) 173 | all_log_likelihoods.append(model_log_likelihoods) 174 | 175 | self.model_theta_probabilities = [np.exp(model_log_likelihoods - logsumexp(model_log_likelihoods)) for model_log_likelihoods in all_log_likelihoods] 176 | 177 | normalizing_constant = logsumexp(all_log_likelihoods) 178 | 179 | normalize = lambda log_lik: np.exp(log_lik - normalizing_constant) 180 | 181 | self.all_model_probabilities = [[normalize(log_lik) for log_lik in model_log_likelihoods] for model_log_likelihoods in all_log_likelihoods] 182 | 183 | #sum up on a per model family basis: 184 | self.model_probabilities = [sum(model_probabilities) for model_probabilities in self.all_model_probabilities] 185 | 186 | 187 | def posterior_prob_x_greater_than(self, x, y): 188 | """ 189 | The probability under the models that a value y is exceeded at position x. 190 | IMPORTANT: if all models fail, by definition the probability is 1.0 and NOT 0.0 191 | """ 192 | if len(self.fit_models) == 0: 193 | return 1.0 194 | 195 | overall_prob = 0 196 | for model, theta_model_probabilities in zip(self.fit_models, self.all_model_probabilities): 197 | thetas = model.get_burned_in_samples() 198 | for theta_idx, theta_model_probability in zip(range(thetas.shape[0]), theta_model_probabilities): 199 | theta = thetas[theta_idx, :] 200 | overall_prob += theta_model_probability * model.prob_x_greater_than(x, y, theta) 201 | return overall_prob 202 | 203 | 204 | def predict(self, x): 205 | if np.isscalar(x): 206 | y = 0 207 | else: 208 | y = np.zeros(x.shape) 209 | #TOOD: implement!! 210 | return y 211 | 212 | def __str__(self): 213 | ret = [] 214 | model_names = [model.function.__name__ for model in self.fit_models] 215 | for model_prob, model_name in zip(self.model_probabilities, model_names): 216 | ret.append("%s: %f\n" % (model_name, model_prob)) 217 | return "".join(ret) 218 | -------------------------------------------------------------------------------- /pylrpredictor/mcmcmodelplotter.py: -------------------------------------------------------------------------------- 1 | from pylab import * 2 | import triangle 3 | import random 4 | 5 | 6 | def greek_label_mapping(oldlabels): 7 | labels = [] 8 | for param_name in oldlabels: 9 | if param_name in ["alpha", "beta", "delta", "sigma"]:#"kappa", 10 | labels.append("$\%s$" % param_name) 11 | else: 12 | labels.append("$%s$" % param_name) 13 | return labels 14 | 15 | class MCMCCurveModelPlotter(object): 16 | def __init__(self, model): 17 | self.model = model 18 | 19 | def trace_plot(self,rasterized=False): 20 | num_plots = len(self.model.all_param_names) 21 | 22 | fig, axes = subplots(num_plots, 1, sharex=True, figsize=(8, 9)) 23 | 24 | for idx, param_name in enumerate(self.model.all_param_names): 25 | 26 | axes[idx].plot(self.model.mcmc_chain[:, :, idx].T, color="k", 27 | alpha=0.4, rasterized=rasterized) 28 | axes[idx].yaxis.set_major_locator(MaxNLocator(5)) 29 | #axes[0].axhline(m_true, color="#888888", lw=2) 30 | axes[idx].set_ylabel("$%s$" % param_name) 31 | 32 | if idx == num_plots-1: 33 | axes[idx].set_xlabel("step number") 34 | 35 | tight_layout(h_pad=0.0) 36 | 37 | 38 | def triangle_plot(self, labels=None, truths=None): 39 | samples = self.model.get_burned_in_samples() 40 | if labels is None: 41 | labels = greek_label_mapping(self.model.all_param_names) 42 | fig = triangle.corner(samples, 43 | labels=labels, 44 | truths=truths) 45 | 46 | def posterior_sample_plot(self, x, y=None, vline=None, 47 | xaxislabel="$x$", yaxislabel="$y$", alpha=0.3, 48 | label="", color="k", x_axis_scale=0.1, nsamples=50, 49 | plot_ml_estimate=False, ml_estimate_color="#4682b4", 50 | rasterized=False): 51 | samples = self.model.get_burned_in_samples() 52 | 53 | x_plot = x_axis_scale*np.asarray(x) 54 | 55 | if y is not None: 56 | plot(x_plot, y, color="r", lw=2, alpha=0.8, rasterized=rasterized) 57 | 58 | # Plot some samples onto the data. 59 | for idx, theta in enumerate(samples[np.random.randint(len(samples), size=nsamples)]): 60 | #for idx, theta in enumerate(samples): 61 | #print theta 62 | predictive_mu, sigma = self.model.predict_given_theta(x, theta) 63 | 64 | if idx == 0: 65 | plot(x_plot, predictive_mu, color=color, alpha=alpha, label=label, rasterized=rasterized) 66 | else: 67 | plot(x_plot, predictive_mu, color=color, alpha=alpha, rasterized=rasterized) 68 | fill_between(x_plot, predictive_mu-sigma, predictive_mu+sigma, color=color, alpha=0.01, rasterized=rasterized) 69 | 70 | if plot_ml_estimate: 71 | ml_theta = self.model.ml_curve_model.ml_params 72 | predictive_mu, sigma = self.model.predict_given_theta(x, ml_theta) 73 | plot(x_plot, predictive_mu, alpha=1.0, color=ml_estimate_color, lw=3, rasterized=rasterized) 74 | fill_between(x_plot, predictive_mu-sigma, predictive_mu+sigma, color=ml_estimate_color, alpha=0.3, rasterized=rasterized) 75 | 76 | 77 | if vline is not None: 78 | axvline(x_axis_scale*vline, color="k") 79 | ylim(0, 1) 80 | xlabel(xaxislabel) 81 | ylabel(yaxislabel) 82 | tight_layout() 83 | 84 | 85 | 86 | def predictive_density_plot(self, x): 87 | x_lin = linspace(0., 1., 100) 88 | plot(x_lin, self.model.predictive_density(x, x_lin)); 89 | 90 | 91 | 92 | class MCMCCurveModelCombinationPlotter(object): 93 | def __init__(self, model): 94 | self.model = model 95 | self.colors = ['r', 'g', 'b', 'y', 'cyan', 'magenta', 'Chocolate', 'Crimson', 'DeepSkyBlue', 'Khaki'] 96 | 97 | def trace_plot(self, figsize=None): 98 | num_plots = self.model.ndim 99 | 100 | if figsize is None: 101 | figsize = (8, num_plots) 102 | fig, axes = subplots(num_plots, 1, sharex=True, figsize=figsize) 103 | 104 | labels = ["$%s$" % (param_name) for model in self.model.fit_models for param_name in model.function_params] 105 | titles = ["%s" % (model.name) for model in self.model.fit_models for param_name in model.function_params] 106 | labels.append("$sigma$") 107 | titles.append("") 108 | labels.extend(["$w%d$" % idx for idx in range(len(self.model.fit_models))]) 109 | titles.extend(["%s weight" % model.name for model in self.model.fit_models]) 110 | 111 | for idx, label, title in zip(range(self.model.ndim), labels, titles): 112 | axes[idx].set_title(title) 113 | axes[idx].set_ylabel(label) 114 | axes[idx].plot(self.model.mcmc_chain[:, :, idx].T, color="k", alpha=0.4) 115 | axes[idx].yaxis.set_major_locator(MaxNLocator(5)) 116 | #axes[0].axhline(m_true, color="#888888", lw=2) 117 | 118 | if idx == num_plots-1: 119 | axes[idx].set_xlabel("step number") 120 | 121 | tight_layout(h_pad=0.0) 122 | 123 | 124 | def triangle_plot(self, labels=None): 125 | samples = self.model.get_burned_in_samples() 126 | if labels is None: 127 | labels = ["$%s$" % (param_name) for model in self.model.fit_models for param_name in model.function_params] 128 | labels.append("$sigma$") 129 | labels.extend(["$w%d$" % idx for idx in range(len(self.model.fit_models))]) 130 | labels = greek_label_mapping(labels) 131 | fig = triangle.corner(samples, 132 | labels=labels) 133 | 134 | def weights_triangle_plot(self, labels=None, thin=1): 135 | samples = self.model.get_burned_in_samples() 136 | if labels is None: 137 | labels = ["w%d" % idx for idx in range(len(self.model.fit_models))] 138 | #labels = greek_label_mapping(labels) 139 | print labels 140 | fig = triangle.corner( 141 | samples[::thin,-len(self.model.fit_models):])#, 142 | #labels=labels) 143 | 144 | def weights_triangle_plot(self, labels=None, thin=1): 145 | samples = self.model.get_burned_in_samples() 146 | if labels is None: 147 | labels = ["w%d" % idx for idx in range(len(self.model.fit_models))] 148 | #labels = greek_label_mapping(labels) 149 | print labels 150 | fig = triangle.corner( 151 | samples[::thin,-len(self.model.fit_models):])#, 152 | #labels=labels) 153 | 154 | def posterior_plot(self, *args, **kwargs): 155 | self.posterior_sample_plot(*args, **kwargs) 156 | 157 | def posterior_sample_plot(self, x, y=None, vline=None, alpha=0.1, label="", 158 | xaxislabel="$x$", yaxislabel="$y$", color="k", x_axis_scale=0.1, 159 | x_lim=None, plot_individual=False, y_plot_lw=2, 160 | rasterized=False): 161 | samples = self.model.get_burned_in_samples() 162 | 163 | if x_lim is None: 164 | x_predict = x 165 | else: 166 | x_predict = np.arange(1, x_lim, 1) 167 | 168 | x = x_axis_scale*x 169 | x_plot = x_axis_scale*np.asarray(x_predict) 170 | 171 | # Plot some samples onto the data. 172 | for idx, theta in enumerate(samples[np.random.randint(len(samples), size=100)]): 173 | predictive_mu, sigma = self.model.predict_given_theta(x_predict, theta) 174 | 175 | if idx == 0: 176 | plot(x_plot, predictive_mu, color=color, alpha=alpha, label=label, rasterized=rasterized) 177 | else: 178 | plot(x_plot, predictive_mu, color=color, alpha=alpha, rasterized=rasterized) 179 | 180 | fill_between(x_plot, predictive_mu-2*sigma, predictive_mu+2*sigma, color=color, 181 | rasterized=rasterized, alpha=0.01) 182 | if not plot_individual: 183 | continue 184 | #plot the contributions of the individual models: 185 | model_params, sigma, model_weights = self.model.split_theta(theta) 186 | if self.model.normalize_weights: 187 | model_weight_sum = np.sum(model_weights) 188 | model_probs = [weight/model_weight_sum for weight in model_weights] 189 | else: 190 | model_probs = model_weights 191 | for fit_model, model_color, model_prob, params in zip(self.model.fit_models, self.colors, model_probs, model_params): 192 | #if fit_model.function.__name__ != "ilog2": 193 | # continue 194 | try: 195 | sub_model_predictive_mu = fit_model.function(x, *params) 196 | #plot(x_plot, model_prob * sub_model_predictive_mu, color=model_color, alpha=alpha) 197 | plot(x_plot, sub_model_predictive_mu, color=model_color, alpha=alpha) 198 | except: 199 | print "error with model: ", fit_model.function.__name__ 200 | 201 | if y is not None: 202 | plot(x, y, color="r", lw=y_plot_lw, alpha=0.8, label="data") 203 | if vline is not None: 204 | axvline(x_axis_scale*vline, color="k") 205 | ylim(0, 1) 206 | xlabel(xaxislabel) 207 | ylabel(yaxislabel) 208 | tight_layout() 209 | 210 | def ml_single_models_plot(self, x, y, vline=None, x_axis_scale=0.1): 211 | lin_comb = None 212 | x_plot = x_axis_scale*np.asarray(x) 213 | for m in self.model.fit_models: 214 | plot(x_plot, m.predict(x), alpha=0.7, label=m.function.__name__, lw=2) 215 | model_weight = 1. / len(self.model.fit_models) 216 | if lin_comb is None: 217 | lin_comb = model_weight * m.predict(x) 218 | else: 219 | lin_comb += model_weight * m.predict(x) 220 | plot(x_plot, m.predict(x), alpha=0.7, label="linear combination", lw=2) 221 | 222 | plot(x_plot, y, color="r", lw=2, alpha=0.8) 223 | if vline is not None: 224 | axvline(x_axis_scale*vline, color="k") 225 | ylim(0, 1) 226 | legend(loc=4) 227 | xlabel("$x$") 228 | ylabel("$y$") 229 | tight_layout() 230 | 231 | 232 | def predictive_density_plot(self, x): 233 | x_lin = linspace(0., 1., 100) 234 | plot(x_lin, self.model.predictive_density(x, x_lin)); 235 | 236 | 237 | class MCMCCurveMixtureModelPlotter(object): 238 | def __init__(self, model): 239 | self.model = model 240 | self.colors = ['r', 'g', 'b', 'y', 'cyan', 'magenta', 'Chocolate', 'Crimson', 'DeepSkyBlue', 'Khaki'] 241 | 242 | def trace_plot(self, figsize=None): 243 | num_plots = self.model.ndim 244 | 245 | if figsize is None: 246 | figsize = (8, num_plots) 247 | fig, axes = subplots(num_plots, 1, sharex=True, figsize=figsize) 248 | 249 | labels = ["$%s$" % (param_name) for model in self.model.fit_models for param_name in model.all_param_names] 250 | titles = ["%s" % (model.name) for model in self.model.fit_models for param_name in model.all_param_names] 251 | labels.extend(["$w%d$" % idx for idx in range(len(self.model.fit_models))]) 252 | titles.extend(["%s weight" % model.name for model in self.model.fit_models]) 253 | 254 | for idx, label, title in zip(range(self.model.ndim), labels, titles): 255 | axes[idx].set_title(title) 256 | axes[idx].set_ylabel(label) 257 | axes[idx].plot(self.model.mcmc_chain[:, :, idx].T, color="k", alpha=0.4) 258 | axes[idx].yaxis.set_major_locator(MaxNLocator(5)) 259 | #axes[0].axhline(m_true, color="#888888", lw=2) 260 | 261 | if idx == num_plots-1: 262 | axes[idx].set_xlabel("step number") 263 | 264 | tight_layout(h_pad=0.0) 265 | 266 | 267 | def triangle_plot(self): 268 | samples = self.model.get_burned_in_samples() 269 | 270 | labels = ["$%s$" % param_name for model in self.model.fit_models for param_name in model.all_param_names] 271 | labels.extend(["$%s$" for model in self.model.fit_models]) 272 | fig = triangle.corner(samples, labels=labels) 273 | 274 | 275 | def posterior_plot(self, *args, **kwargs): 276 | self.posterior_sample_plot(*args, **kwargs) 277 | 278 | def posterior_sample_plot(self, x, y, vline=None, alpha=0.8, label="", color="k", x_axis_scale=0.1): 279 | samples = self.model.get_burned_in_samples() 280 | 281 | x_plot = x_axis_scale*np.asarray(x) 282 | 283 | # Plot some samples onto the data. 284 | for idx, theta in enumerate(samples[np.random.randint(len(samples), size=100)]): 285 | predictive_mu = self.model.predict_given_theta(x, theta) 286 | 287 | if idx == 0: 288 | plot(x_plot, predictive_mu, color=color, alpha=alpha, label=label) 289 | else: 290 | plot(x_plot, predictive_mu, color=color, alpha=alpha) 291 | 292 | plot(x_plot, y, color="r", lw=2, alpha=0.8) 293 | if vline is not None: 294 | axvline(x_axis_scale*vline, color="k") 295 | ylim(0, 1) 296 | xlabel("$x$") 297 | ylabel("$y$") 298 | tight_layout() 299 | 300 | 301 | class EnsemblePlotter(object): 302 | 303 | def __init__(self, 304 | ensemble_curve_model, 305 | colors=['r', 'g', 'b', 'y', 'cyan', 'magenta', 'Chocolate', 'Crimson', 'DeepSkyBlue', 'Khaki']): 306 | self.ensemble_curve_model = ensemble_curve_model 307 | self.colors = colors 308 | assert len(colors) >= len(ensemble_curve_model.all_models), "Not enough colors for plot all fit models. Supply more colors!" 309 | #TODO: always use the same color for the same model! 310 | self.model_colors = {model.name: model_color for model, model_color in zip(ensemble_curve_model.all_models, colors)} 311 | 312 | def posterior_plot(self, x, y, vline=None, num_curves=100, x_label="epochs", y_label="accuracy", x_axis_scale=0.1): 313 | """ 314 | x, y: data the posterior will be plotted upon. 315 | num_curves: the number of curves to plot 316 | x_axis_scale: scale the values on the xaxis (only for plotting but not passed to the function) 317 | """ 318 | 319 | x_plot = x_axis_scale*np.asarray(x) 320 | 321 | for i in range(num_curves): 322 | predictive_mu, sigma, color = self.get_random_curve(x) 323 | 324 | plot(x_plot, predictive_mu, color=color, alpha=0.1) 325 | 326 | fill_between(x_plot, predictive_mu-2*sigma, predictive_mu+2*sigma, color=color, alpha=0.01) 327 | 328 | 329 | plot(x_plot, y, color="k", lw=2, alpha=0.8) 330 | if vline is not None: 331 | axvline(x_axis_scale*vline, color="k") 332 | ylim(0, 1) 333 | xlabel(x_label) 334 | ylabel(y_label) 335 | tight_layout() 336 | 337 | def get_random_curve(self, x): 338 | raise NotImplementedError("get_random_curve needs to be overriden") 339 | 340 | 341 | class CurveModelEnsemblePlotter(EnsemblePlotter): 342 | 343 | def __init__(self, *args, **kwargs): 344 | super(CurveModelEnsemblePlotter, self).__init__(*args, **kwargs) 345 | 346 | def get_random_curve(self, x): 347 | """ 348 | Sample a single curve under the given ensemble model. 349 | """ 350 | #sample model: 351 | model_idx = np.random.multinomial(1, self.ensemble_curve_model.model_probabilities).argmax() 352 | model = self.ensemble_curve_model.fit_models[model_idx] 353 | if model.name in self.model_colors: 354 | model_color = self.model_colors[model.name] 355 | else: 356 | print "No color defined for model %s" % model.name 357 | #sample curve: 358 | model_samples = model.get_burned_in_samples() 359 | theta_idx = np.random.randint(0, model_samples.shape[0], 1)[0] 360 | theta = model_samples[theta_idx, :] 361 | 362 | params, sigma = model.split_theta(theta) 363 | predictive_mu = model.function(x, **params) 364 | return predictive_mu, sigma, model_color 365 | 366 | 367 | class CurveEnsemblePlotter(EnsemblePlotter): 368 | def __init__(self, *args, **kwargs): 369 | super(CurveEnsemblePlotter, self).__init__(*args, **kwargs) 370 | 371 | 372 | def get_random_curve(self, x): 373 | #sample model: 374 | model_idx = np.random.multinomial(1, self.ensemble_curve_model.model_probabilities).argmax() 375 | model = self.ensemble_curve_model.fit_models[model_idx] 376 | model_theta_probabilities = self.ensemble_curve_model.model_theta_probabilities[model_idx] 377 | 378 | if model.name in self.model_colors: 379 | model_color = self.model_colors[model.name] 380 | else: 381 | print "No color defined for model %s" % model.name 382 | 383 | model_samples = model.get_burned_in_samples() 384 | 385 | theta_idx = np.random.multinomial(1, model_theta_probabilities).argmax() 386 | theta = model_samples[theta_idx, :] 387 | params, sigma = model.split_theta(theta) 388 | predictive_mu = model.function(x, **params) 389 | return predictive_mu, sigma, model_color 390 | 391 | -------------------------------------------------------------------------------- /pylrpredictor/modelfactory.py: -------------------------------------------------------------------------------- 1 | from pylrpredictor.curvefunctions import all_models, curve_combination_models, curve_ensemble_models, model_defaults 2 | from pylrpredictor.curvemodels import MCMCCurveModel, LinearMCMCCurveModel 3 | from pylrpredictor.ensemblecurvemodel import CurveEnsemble, CurveModelEnsemble 4 | from pylrpredictor.mcmcmodelplotter import MCMCCurveModelPlotter, CurveModelEnsemblePlotter, CurveEnsemblePlotter 5 | from pylrpredictor.mcmcmodelplotter import MCMCCurveModelCombinationPlotter 6 | from pylrpredictor.curvemodels import MLCurveModel, LinearCurveModel 7 | from pylrpredictor.curvemodels import MCMCCurveModelCombination 8 | 9 | 10 | def setup_model_combination(xlim, 11 | models=curve_combination_models, 12 | recency_weighting=False, 13 | normalize_weights=True, 14 | monotonicity_constraint=False, 15 | soft_monotonicity_constraint=True, 16 | nthreads=1): 17 | 18 | curve_models = [] 19 | for model_name in models: 20 | if model_name == "linear": 21 | m = LinearCurveModel() 22 | else: 23 | if model_name in model_defaults: 24 | m = MLCurveModel(function=all_models[model_name], 25 | default_vals=model_defaults[model_name], 26 | recency_weighting=recency_weighting) 27 | else: 28 | m = MLCurveModel(function=all_models[model_name], 29 | recency_weighting=recency_weighting) 30 | curve_models.append(m) 31 | 32 | model_combination = MCMCCurveModelCombination(curve_models, 33 | xlim=xlim, 34 | recency_weighting=recency_weighting, 35 | normalize_weights=normalize_weights, 36 | monotonicity_constraint=monotonicity_constraint, 37 | soft_monotonicity_constraint=soft_monotonicity_constraint, 38 | nthreads=nthreads) 39 | return model_combination 40 | 41 | 42 | pool = None 43 | def setup_curve_model(ensemble_type="curve_model", recency_weighting=False, pool_size=16): 44 | """ 45 | type: either curve_model or curve 46 | """ 47 | if pool_size > 1: 48 | pool = Pool(pool_size) 49 | map_fun = pool.map 50 | else: 51 | map_fun = map 52 | 53 | ensemble_models = [] 54 | for model_name in curve_ensemble_models: 55 | if model_name == "linear": 56 | m = LinearCurveModel() 57 | else: 58 | if model_name in model_defaults: 59 | m = MLCurveModel(function=all_models[model_name], 60 | default_vals=model_defaults[model_name], 61 | recency_weighting=recency_weighting) 62 | else: 63 | m = MLCurveModel(function=all_models[model_name], 64 | recency_weighting=recency_weighting) 65 | ensemble_models.append(m) 66 | 67 | if ensemble_type == "curve_model": 68 | ensemble_curve_model = CurveModelEnsemble(ensemble_models, map=map_fun) 69 | elif ensemble_type == "curve": 70 | ensemble_curve_model = CurveEnsemble(ensemble_models, map=map_fun) 71 | else: 72 | assert False, "unkown ensemble type" 73 | 74 | return ensemble_curve_model 75 | 76 | 77 | def create_model(model_type, xlim, nthreads=1, recency_weighting=False): 78 | """ 79 | type: either curve_combination, curve_model or curve 80 | curve_combination: Bayesian Model curve_combination 81 | curve_model: Bayesian Model Averaging 82 | xlim: the target point that we want to predict eventually 83 | nthreads: 1 for no parallelization 84 | """ 85 | if model_type == "curve_combination": 86 | curve_model = setup_model_combination( 87 | xlim=xlim, 88 | recency_weighting=recency_weighting, 89 | nthreads=nthreads) 90 | elif model_type == "curve_model" or model_type == "curve": 91 | curve_model = setup_curve_model( 92 | ensemble_type=model_type, 93 | recency_weighting=recency_weighting, 94 | pool_size=nthreads) 95 | return curve_model 96 | 97 | 98 | def create_plotter(model_type, model): 99 | if model_type == "curve_combination": 100 | return MCMCCurveModelCombinationPlotter(model) 101 | elif model_type == "curve_model" or model_type == "curve": 102 | return CurveModelEnsemblePlotter(ensemble_curve_model) 103 | -------------------------------------------------------------------------------- /pylrpredictor/terminationcriterion.py: -------------------------------------------------------------------------------- 1 | """ 2 | Input: 3 | learning_curve.txt a file that contains the current learning curve 4 | ybest.txt the best result achieved so far 5 | xlim.txt the x position we are targetting 6 | 7 | Output: 8 | y_predict.txt prediction of performance at xlim (in case the exit code is 1) 9 | 10 | 11 | Reads learning_curve.txt and make a prediction whether to cancel the current run, 12 | based on the likelihood that we will exceed ybest at xlim. The result will 13 | be set in the return code. 14 | 15 | Returns: 16 | exit code 0 means: continue running the algorithm. 17 | exit code 1 means: save to cancel the run. 18 | """ 19 | #TODO: remove num_cut from xlim! 20 | import os 21 | import sys 22 | import argparse 23 | 24 | from caffe.proto import caffe_pb2 25 | import google 26 | from google.protobuf import text_format 27 | 28 | import numpy as np 29 | 30 | from pylrpredictor.modelfactory import create_model, setup_model_combination 31 | 32 | IMPROVEMENT_PROB_THRESHOLD = 0.05 33 | PREDICTIVE_STD_THRESHOLD = 0.005 34 | 35 | PREDICTION_THINNING = 10 36 | NTHREADS = 4 37 | 38 | 39 | def cut_beginning(y, threshold=0.05, look_ahead=5): 40 | """ 41 | we start at a point where we are bigger than the initial value for look_ahead steps 42 | """ 43 | if len(y) < look_ahead: 44 | return y 45 | num_cut = 0 46 | for idx in range(len(y)-look_ahead): 47 | start_here = True 48 | for idx_ahead in range(idx, idx+look_ahead): 49 | if not (y[idx_ahead] - y[0] > threshold): 50 | start_here = False 51 | if start_here: 52 | num_cut = idx 53 | break 54 | return y[num_cut:] 55 | 56 | 57 | def get_xlim(): 58 | assert os.path.exists("caffenet_solver.prototxt") 59 | solver = caffe_pb2.SolverParameter() 60 | solver_txt = open("caffenet_solver.prototxt").read() 61 | try: 62 | google.protobuf.text_format.Merge(solver_txt, solver) 63 | except Exception as e: 64 | #this may happen if fields are added. However everything else should be parse 65 | #hence, it's ok to be ignored 66 | print "error parsing solver: ", str(e) 67 | assert solver.max_iter > 0 68 | assert solver.test_interval > 0 69 | return solver.max_iter / float(solver.test_interval) 70 | 71 | 72 | class TerminationCriterion(object): 73 | def __init__(self, nthreads, prob_x_greater_type): 74 | open("helloworld", "w").write("test") 75 | self.prob_x_greater_type = prob_x_greater_type 76 | print "prob_x_greater_type: %s" % prob_x_greater_type 77 | #just make sure there is no y_predict file from previous runs: 78 | if os.path.exists("y_predict.txt"): 79 | os.remove("y_predict.txt") 80 | models = ["vap", "ilog2", "weibull", "pow3", "pow4", "loglog_linear", 81 | "mmf", "janoschek", "dr_hill_zero_background", "log_power", 82 | "exp4"] 83 | xlim = get_xlim() 84 | print "xlim: %d" % (xlim) 85 | self.xlim = xlim 86 | model = setup_model_combination(#create_model( 87 | #"curve_combination", 88 | models=models, 89 | xlim=xlim, 90 | recency_weighting=True, 91 | nthreads=nthreads) 92 | self.model = model 93 | 94 | def run(self): 95 | pass 96 | 97 | def predict(self): 98 | """ 99 | predict f(x), returns 1 if not successful 100 | """ 101 | #we're are most likely not going to improve, stop! 102 | #let's made a prediction of the accuracy that will most likely be reached, that will be returned to the optimizer 103 | y_predict = self.model.predict(self.xlim, thin=PREDICTION_THINNING) 104 | #let's do a sanity check: 105 | if y_predict >= 0. and y_predict <= 1.0: 106 | with open("y_predict.txt", "w") as y_predict_file: 107 | y_predict_file.write(str(y_predict)) 108 | print "probably only going to reach %f, stopping..." % y_predict 109 | return 1 110 | else: 111 | #we did not pass the sanity check.. let's not report this to the optimizer 112 | #and pretend nothing happened 113 | print "didn't pass sanity check with predicted value %f" % y_predict 114 | return 0 115 | 116 | 117 | class ConservativeTerminationCriterion(TerminationCriterion): 118 | """ 119 | Will evaluate p(y > y_best) and stop if the result doesn't look promising. 120 | In any other case we will continue running. 121 | """ 122 | def __init__(self, nthreads, prob_x_greater_type, predictive_std_threshold=None): 123 | super(ConservativeTerminationCriterion, self).__init__(nthreads, prob_x_greater_type) 124 | self.predictive_std_threshold = predictive_std_threshold 125 | 126 | def run(self): 127 | if not os.path.exists("ybest.txt"): 128 | #no ybest yet... we can't do much 129 | print "not ybest yet...exiting" 130 | return 0 131 | ybest = float(open("ybest.txt").read()) 132 | assert os.path.exists("learning_curve.txt"), "no learning_curve.txt ... nothing to do" 133 | 134 | y = np.loadtxt("learning_curve.txt") 135 | 136 | y_curr_best = np.max(y) 137 | 138 | if y_curr_best > ybest: 139 | #we already exceeded ybest ... let the other criterions decide when to stop 140 | print "Already better than ybest... not evaluating f(y)>f(y_best)" 141 | return 0 142 | 143 | #TODO subtract num_cut from xlim! 144 | y = cut_beginning(y) 145 | x = np.asarray(range(1, len(y)+1)) 146 | 147 | if not self.model.fit(x, y): 148 | #failed fitting... not cancelling 149 | print "failed fitting the model" 150 | return 0 151 | 152 | if self.prob_x_greater_type == "posterior_prob_x_greater_than": 153 | prob_gt_ybest_xlast = self.model.posterior_prob_x_greater_than(self.xlim, 154 | ybest, thin=PREDICTION_THINNING) 155 | else: 156 | prob_gt_ybest_xlast = self.model.posterior_mean_prob_x_greater_than(self.xlim, 157 | ybest, thin=PREDICTION_THINNING) 158 | 159 | print "p(y>y_best) = %f" % prob_gt_ybest_xlast 160 | 161 | if prob_gt_ybest_xlast < IMPROVEMENT_PROB_THRESHOLD: 162 | if self.predictive_std_threshold is None: 163 | return self.predict() 164 | else: 165 | print "predictive_std_threshold set, checking the predictive_std first" 166 | predictive_std = self.model.predictive_std(self.xlim, thin=PREDICTION_THINNING) 167 | print "predictive_std: %f" % predictive_std 168 | 169 | if predictive_std < self.predictive_std_threshold: 170 | print "predicting..." 171 | return self.predict() 172 | else: 173 | print "continue evaluating" 174 | #we are gonna wait before we become more certain about the outcome! 175 | return 0 176 | 177 | else: 178 | print "continue evaluating" 179 | #we are probably still going to improve 180 | return 0 181 | 182 | 183 | class OptimisticTerminationCriterion(TerminationCriterion): 184 | """ 185 | Similar to the ConservativeTerminationCriterion will evaluate p(y > y_best) 186 | and stop if the result doesn't look promising. 187 | However additionally, if the model is confident in the prediction we will still 188 | stop to save time, at the risk of making a wrong prediction that we take. 189 | """ 190 | def __init__(self, nthreads, 191 | prob_x_greater_type, 192 | predictive_std_threshold=PREDICTIVE_STD_THRESHOLD): 193 | if predictive_std_threshold is None: 194 | predictive_std_threshold = PREDICTIVE_STD_THRESHOLD 195 | assert predictive_std_threshold > 0 196 | self.predictive_std_threshold = predictive_std_threshold 197 | super(OptimisticTerminationCriterion, self).__init__(nthreads, prob_x_greater_type) 198 | 199 | 200 | def run(self): 201 | if not os.path.exists("learning_curve.txt"): 202 | print "no learning_curve.txt ... nothing to do" 203 | return 0 204 | 205 | if os.path.exists("ybest.txt"): 206 | ybest = float(open("ybest.txt").read()) 207 | else: 208 | ybest = None 209 | y = np.loadtxt("learning_curve.txt") 210 | 211 | y_curr_best = np.max(y) 212 | 213 | #TODO subtract num_cut from xlim! 214 | y = cut_beginning(y) 215 | x = np.asarray(range(1, len(y)+1)) 216 | 217 | if not self.model.fit(x, y): 218 | #failed fitting... not cancelling 219 | return 0 220 | 221 | predictive_std = self.model.predictive_std(self.xlim, thin=PREDICTION_THINNING) 222 | print "predictive_std: %f" % predictive_std 223 | 224 | if predictive_std < self.predictive_std_threshold: 225 | #the model is pretty sure about the prediction: stop! 226 | print "predictive_std low, predicting and stopping..." 227 | return self.predict() 228 | elif ybest is not None: 229 | print "predictive_std high, let's check the probably to get higher than the current ybest" 230 | #we're still checking if maybe all the probability is below ybest 231 | if self.prob_x_greater_type == "posterior_prob_x_greater_than": 232 | prob_gt_ybest_xlast = self.model.posterior_prob_x_greater_than(self.xlim, 233 | ybest, thin=PREDICTION_THINNING) 234 | else: 235 | prob_gt_ybest_xlast = self.model.posterior_mean_prob_x_greater_than(self.xlim, 236 | ybest, thin=PREDICTION_THINNING) 237 | 238 | print "p(y>y_best) = %f" % prob_gt_ybest_xlast 239 | 240 | if prob_gt_ybest_xlast < IMPROVEMENT_PROB_THRESHOLD: 241 | return self.predict() 242 | else: 243 | print "continue evaluating" 244 | #we are probably still going to improve 245 | return 0 246 | else: 247 | print "neither the predictive_std is low nor is there a ybest ... continue" 248 | return 0 249 | 250 | 251 | 252 | 253 | def main(mode="conservative", 254 | prob_x_greater_type="posterior_prob_x_greater_than", 255 | nthreads=NTHREADS, 256 | predictive_std_threshold=None): 257 | ret = 0 258 | try: 259 | open("termination_criterion_running_pid", "w").write(str(os.getpid())) 260 | 261 | assert prob_x_greater_type in ["posterior_mean_prob_x_greater_than", "posterior_prob_x_greater_than"], ("prob_x_greater_type unkown %s" % prob_x_greater_type) 262 | 263 | #ret = run_prediction(nthreads) 264 | #return ret 265 | if mode == "conservative": 266 | term_crit = ConservativeTerminationCriterion(nthreads, 267 | prob_x_greater_type, 268 | predictive_std_threshold=predictive_std_threshold) 269 | ret = term_crit.run() 270 | elif mode == "optimistic": 271 | term_crit = OptimisticTerminationCriterion(nthreads, 272 | prob_x_greater_type, 273 | predictive_std_threshold=predictive_std_threshold) 274 | ret = term_crit.run() 275 | else: 276 | print "The mode can either be conservative or optimistic" 277 | ret = 0 278 | except Exception as e: 279 | import traceback 280 | with open("term_crit_error.txt", "a") as error_log: 281 | error_log.write(str(traceback.format_exc())) 282 | error_log.write(str(e)) 283 | finally: 284 | if os.path.exists("termination_criterion_running"): 285 | os.remove("termination_criterion_running") 286 | if os.path.exists("termination_criterion_running_pid"): 287 | os.remove("termination_criterion_running_pid") 288 | return ret 289 | 290 | if __name__ == "__main__": 291 | parser = argparse.ArgumentParser(description='Termination criterion.') 292 | parser.add_argument('--nthreads', type=int, default=1, help='number of threads to launch') 293 | parser.add_argument('--mode', type=str, default="conservative", help='either conservative or optimistic') 294 | parser.add_argument('--prob-x-greater-type', type=str, default="posterior_prob_x_greater_than", help='either posterior_mean_prob_x_greater_than or posterior_prob_x_greater_than') 295 | parser.add_argument('--predictive-std-threshold', type=float, 296 | default=None, help='threshold for making optimistic guesses about the learning curve.') 297 | 298 | args = parser.parse_args() 299 | 300 | ret = main(mode=args.mode, prob_x_greater_type=args.prob_x_greater_type, 301 | nthreads=args.nthreads, predictive_std_threshold=args.predictive_std_threshold) 302 | 303 | print "exiting with status: %d" % ret 304 | sys.exit(ret) 305 | 306 | -------------------------------------------------------------------------------- /pylrpredictor/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tdomhan/pylearningcurvepredictor/44348022037e27939f8dcc327f6252c09dd741c2/pylrpredictor/tests/__init__.py -------------------------------------------------------------------------------- /pylrpredictor/tests/test_curvemodels.py: -------------------------------------------------------------------------------- 1 | from pylrpredictor.curvemodels import CurveModel, MLCurveModel, LinearCurveModel 2 | from pylrpredictor.curvemodels import masked_mean_x_greater_than 3 | import unittest 4 | import numpy as np 5 | import random 6 | 7 | from pylrpredictor.curvefunctions import all_models, model_defaults 8 | 9 | class CurveModelTest(unittest.TestCase): 10 | 11 | def test_interface(self): 12 | model = CurveModel(function=lambda x: x) 13 | x = np.arange(0, 10) 14 | y = np.arange(0, 10) 15 | self.assertRaises(NotImplementedError, model.fit, x, y) 16 | #self.assertRaises(NotImplementedError, model.posterior_log_likelihood, x, y) 17 | 18 | def test_ml_fit(self): 19 | for model_name in all_models.keys(): 20 | if model_name == "linear": 21 | m = LinearCurveModel() 22 | else: 23 | if model_name in model_defaults: 24 | m = MLCurveMovel(function=all_models[model_name], 25 | default_vals=model_defaults[model_name], 26 | recency_weighting=True) 27 | else: 28 | m = MLCurveMovel(function=all_models[model_name], recency_weighting=True) 29 | 30 | #generate some data for the model 31 | x = np.arange(1, 1000) 32 | params = m.default_function_param_array() 33 | params = params + np.random.rand(params.shape[0]) 34 | y = m.function(x, *params) 35 | std = 0.01 36 | y += std*np.random.randn(y.shape[0]) 37 | self.assertTrue(m.fit(x, y)) 38 | print "original params vs fit params:" 39 | print params 40 | print m.ml_params 41 | 42 | def test_masked_mean_x_greater_than(self): 43 | self.assertAlmostEqual(0.5, masked_mean_x_greater_than([0.1, 0.9], 0.5)) 44 | 45 | self.assertAlmostEqual(0.5, masked_mean_x_greater_than([0.1, 0.9, np.nan], 0.5)) 46 | 47 | self.assertAlmostEqual(2./3., masked_mean_x_greater_than([0.1, 0.9, 0.8, np.nan], 0.5)) 48 | 49 | self.assertAlmostEqual(0., masked_mean_x_greater_than([0.1, 0.1, 0.15, np.nan], 0.5)) 50 | 51 | self.assertAlmostEqual(1., masked_mean_x_greater_than([0.8, 0.9, np.nan], 0.5)) 52 | 53 | 54 | #def test_ml_fit(self): 55 | # model1 = random.choice(all_models.keys()) 56 | # model1 = random.choice(all_models.keys()) 57 | 58 | if __name__ == "__main__": 59 | unittest.main() -------------------------------------------------------------------------------- /pylrpredictor/tests/test_terminationcriterion.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | import os 4 | import argparse 5 | 6 | from subprocess import Popen, PIPE 7 | 8 | from caffe.proto import caffe_pb2 9 | 10 | from pylrpredictor.curvefunctions import all_models, model_defaults 11 | from pylrpredictor.terminationcriterion import main 12 | 13 | 14 | def write_xlim(xlim, test_interval=2): 15 | solver = caffe_pb2.SolverParameter() 16 | 17 | solver.max_iter = xlim * test_interval 18 | solver.test_interval = test_interval 19 | 20 | open("caffenet_solver.prototxt", "w").write(str(solver)) 21 | 22 | 23 | def run_program(cmds): 24 | process = Popen(cmds, stdout=PIPE) 25 | (output, err) = process.communicate() 26 | exit_code = process.wait() 27 | return exit_code 28 | 29 | 30 | real_abort_learning_curve = [ 0.1126, 0.3304, 0.3844, 0.3984, 0.4128, 0.4366, 0.4536, 31 | 0.4664, 0.4742, 0.4858, 0.495 , 0.5038, 0.5136, 0.5198, 32 | 0.522 , 0.5298, 0.539 , 0.5422, 0.5452, 0.5508, 0.5556, 33 | 0.5586, 0.5624, 0.5668, 0.5722, 0.5752, 0.5774, 0.579 , 34 | 0.5822, 0.5848, 0.5852, 0.5878, 0.5904, 0.594 , 0.5974, 35 | 0.5992, 0.5988, 0.6006, 0.6022, 0.6042, 0.6054, 0.6058, 36 | 0.6072, 0.6106, 0.6152, 0.6112, 0.6142, 0.6152, 0.6148, 37 | 0.6172, 0.6168, 0.6194, 0.6198, 0.6208, 0.6206, 0.626 , 38 | 0.627 , 0.627 , 0.626 , 0.6272, 0.6268, 0.6256, 0.6314, 39 | 0.6318, 0.6318, 0.6368, 0.6346, 0.6354, 0.6376, 0.6356, 40 | 0.637 , 0.6394, 0.6426, 0.6432, 0.6418, 0.6428, 0.6448, 41 | 0.6436, 0.6456, 0.6454, 0.649 , 0.648 , 0.6494, 0.6492, 42 | 0.6504, 0.6492, 0.651 , 0.6502, 0.653 , 0.653 , 0.6518, 43 | 0.6564, 0.656 , 0.6552, 0.6542, 0.656 , 0.655 , 0.6564, 44 | 0.657 , 0.6572, 0.6612, 0.6624, 0.6636, 0.6604, 0.6604, 45 | 0.662 , 0.6604, 0.66 , 0.6576, 0.6614, 0.6644, 0.6658, 46 | 0.6676, 0.6688, 0.6676, 0.6686, 0.6678, 0.6652, 0.666 , 47 | 0.67 , 0.6674, 0.6708, 0.6714, 0.6708, 0.6724, 0.671 , 48 | 0.6702, 0.6716, 0.6716, 0.6736, 0.6692, 0.6742, 0.6766, 49 | 0.6768, 0.6772, 0.676 , 0.6772, 0.6772, 0.6788, 0.678 , 50 | 0.6806, 0.6784, 0.682 , 0.6822, 0.6822, 0.6816, 0.6834, 51 | 0.6822, 0.6828, 0.683 , 0.6858, 0.6838, 0.6826, 0.6886, 52 | 0.6882, 0.6866, 0.6882, 0.6914, 0.6894, 0.6876, 0.685 , 53 | 0.6902, 0.6876, 0.6936, 0.694 , 0.6948, 0.6922, 0.6936, 54 | 0.695 , 0.691 , 0.6886, 0.6896, 0.6942, 0.6918, 0.6962, 55 | 0.698 , 0.699 , 0.6964, 0.6994, 0.698 , 0.6952, 0.6932, 56 | 0.6958, 0.6958, 0.698 , 0.7024, 0.7028, 0.6992, 0.7006, 57 | 0.7038, 0.7016, 0.6986, 0.6994, 0.7012, 0.7 , 0.7046, 58 | 0.704 , 0.703 , 0.7038, 0.701 , 0.7046, 0.7036, 0.7026, 59 | 0.7 , 0.705 , 0.7034, 0.7084, 0.7084, 0.7068, 0.7078, 60 | 0.7098, 0.7078, 0.7076, 0.705 , 0.705 , 0.7074, 0.7084, 61 | 0.711 , 0.7054, 0.7102, 0.7118, 0.7104, 0.7088, 0.7088, 62 | 0.7104, 0.7112, 0.7094, 0.714 , 0.7136, 0.7138, 0.716 , 63 | 0.7146, 0.713 , 0.711 , 0.7108, 0.7124, 0.714 , 0.712 , 64 | 0.7166, 0.7152, 0.713 , 0.7178, 0.716 , 0.7122, 0.715 , 65 | 0.7154, 0.7128, 0.7156, 0.7162, 0.7178, 0.7176, 0.7202, 66 | 0.7212, 0.7164, 0.7164, 0.718 , 0.7172, 0.7188, 0.718 , 67 | 0.7204, 0.719 , 0.721 , 0.7222, 0.7216, 0.7198, 0.719 , 68 | 0.7214, 0.7196, 0.7206, 0.7216, 0.7236, 0.723 , 0.724 , 69 | 0.7234, 0.7236, 0.7238, 0.7208, 0.7202, 0.7198, 0.7226, 70 | 0.7228, 0.7236, 0.7262, 0.7244, 0.7218, 0.7204, 0.7238, 71 | 0.7232, 0.724 , 0.7244, 0.727 , 0.7266, 0.7278, 0.7262, 72 | 0.7274, 0.7246, 0.724 , 0.725 , 0.7254, 0.7236, 0.726 , 73 | 0.7244, 0.7272, 0.7294, 0.7274, 0.7284, 0.7254, 0.725 , 74 | 0.7242, 0.7278, 0.7272, 0.726 , 0.7274, 0.7272, 0.73 , 75 | 0.7302, 0.7286, 0.7238, 0.7294, 0.7286, 0.7264, 0.73 , 76 | 0.7274, 0.7326, 0.7286, 0.7304, 0.7322, 0.7274, 0.7258, 77 | 0.7296, 0.7268, 0.7262, 0.7282, 0.7294, 0.7336, 0.7338, 78 | 0.7328, 0.7316, 0.7286, 0.7322, 0.7318, 0.732 , 0.7302, 79 | 0.732 , 0.734 , 0.7314, 0.7356, 0.7352, 0.7302, 0.7284, 80 | 0.732 , 0.732 , 0.7298, 0.733 , 0.735 , 0.7342, 0.7312, 81 | 0.7346, 0.7358, 0.7318, 0.732 , 0.733 , 0.735 , 0.7318, 82 | 0.735 , 0.7334, 0.7348, 0.7366, 0.7356, 0.734 , 0.7336, 83 | 0.7334, 0.7324, 0.734 , 0.7344, 0.7348, 0.736 , 0.7346, 84 | 0.7342, 0.7374, 0.7362, 0.732 , 0.7324, 0.7368, 0.7346, 85 | 0.7334, 0.7356, 0.7374, 0.7372, 0.7354, 0.7364, 0.7338, 86 | 0.735 , 0.733 , 0.7354, 0.7326, 0.7364, 0.7372, 0.7372, 87 | 0.7364, 0.7356, 0.7384, 0.7344, 0.734 , 0.7326, 0.7378, 88 | 0.7348, 0.7376, 0.7374, 0.737 , 0.7394, 0.739 , 0.7372, 89 | 0.7366, 0.7378, 0.7366, 0.736 , 0.7356, 0.7346, 0.7388, 90 | 0.7348, 0.7378, 0.7388, 0.7378, 0.7356, 0.7354, 0.738 , 91 | 0.7376, 0.7396, 0.7402, 0.741 , 0.7366, 0.7382, 0.7422, 92 | 0.7414, 0.7364, 0.736 , 0.739 , 0.7358, 0.738 , 0.7396, 93 | 0.74 , 0.74 , 0.7432, 0.7416, 0.7384, 0.7404, 0.7378, 94 | 0.737 , 0.7384, 0.741 , 0.7448, 0.7408, 0.741 , 0.7458, 95 | 0.7412, 0.7384, 0.7408, 0.74 , 0.737 , 0.7404, 0.7416, 96 | 0.7414, 0.7396, 0.7408, 0.7446, 0.7432, 0.7416, 0.7376, 97 | 0.7402, 0.7364, 0.7404, 0.7418, 0.7408, 0.7422, 0.7426, 98 | 0.7408, 0.741 , 0.7426, 0.7368, 0.7392, 0.739 , 0.7412, 99 | 0.742 , 0.737 , 0.7426, 0.746 , 0.7394, 0.7392, 0.743 , 100 | 0.742 , 0.7372, 0.7404, 0.741 , 0.7436, 0.74 , 0.7398, 101 | 0.7472, 0.742 , 0.744 , 0.742 , 0.7452, 0.7382, 0.7406, 102 | 0.7414, 0.7406, 0.7398, 0.7452, 0.7436, 0.7414, 0.7444, 103 | 0.7412, 0.7436, 0.741 , 0.74 , 0.7438, 0.7414, 0.745 , 104 | 0.7462, 0.7446, 0.741 , 0.7438, 0.7428, 0.7422, 0.7412, 105 | 0.741 , 0.7452, 0.7428, 0.7462, 0.7464, 0.7454, 0.7436, 106 | 0.741 , 0.745 , 0.7388, 0.7422, 0.746 , 0.7426, 0.7428, 107 | 0.7466, 0.7464, 0.7452, 0.744 , 0.7456, 0.742 , 0.7394, 108 | 0.741 , 0.7448, 0.7456, 0.742 , 0.7458, 0.7444, 0.7446, 109 | 0.745 , 0.743 , 0.743 , 0.7432, 0.7432, 0.742 , 0.7452, 110 | 0.7468, 0.745 , 0.7452, 0.7438, 0.742 , 0.7436, 0.7444, 111 | 0.7428, 0.7452, 0.7452, 0.7462, 0.747 , 0.7492, 0.7454, 112 | 0.7454, 0.7454, 0.7462, 0.742 , 0.7446, 0.7466, 0.7476, 113 | 0.7474, 0.747 , 0.7454, 0.7412, 0.747 , 0.7462, 0.7474, 114 | 0.7452, 0.7454, 0.7474, 0.7474, 0.7478, 0.7466, 0.7464, 115 | 0.7456] 116 | real_abort_ybest = .80766 117 | real_abort_xlim = 2850 118 | 119 | 120 | class Terminationcriterion(unittest.TestCase): 121 | 122 | def test_conservative_predict_cancel(self): 123 | """ 124 | The termination criterion expects the learning_curve in a file 125 | called learning_curve.txt as well as the current best value in 126 | ybest.txt. We create both files and see if the termination criterion 127 | correctly predicts to cancel or continue running under various artificial 128 | ybest. 129 | """ 130 | for prob_x_greater_type in ["posterior_mean_prob_x_greater_than", "posterior_prob_x_greater_than"]: 131 | np.random.seed(13) 132 | #generate some data: 133 | for model_name in ["pow3", "log_power"]: 134 | function = all_models[model_name] 135 | params = model_defaults[model_name] 136 | xlim = 500 137 | x = np.arange(1, xlim, 1) 138 | y = function(x, **params) 139 | noise = 0.0005 * np.random.randn(len(y)) 140 | y_noisy = y + noise 141 | y_final = y_noisy[-1] 142 | num_train = 200 143 | np.savetxt("learning_curve.txt", y_noisy[:200]) 144 | write_xlim(xlim) 145 | 146 | print "Actual ybest: %f" % y_noisy[-1] 147 | 148 | #we set ybest to be higher than the final value of this curve 149 | #hence we DO want the evaluation to stop! 150 | open("ybest.txt", "w").write(str(y_final + 0.05)) 151 | open("termination_criterion_running", "w").write("running") 152 | 153 | ret = main(mode="conservative", 154 | prob_x_greater_type=prob_x_greater_type, 155 | nthreads=4) 156 | self.assertEqual(ret, 1) 157 | 158 | self.assertTrue(os.path.exists("y_predict.txt")) 159 | y_predict = float(open("y_predict.txt").read()) 160 | abserr = np.abs(y_predict - y_noisy[-1]) 161 | print "abs error %f" % abserr 162 | self.assertTrue(abserr < 0.03) 163 | 164 | #we set ybest to be lower than the final value of this curve 165 | #hence we DON'T want the evaluation to stop! 166 | open("ybest.txt", "w").write(str(y_final - 0.05)) 167 | open("termination_criterion_running", "w").write("running") 168 | 169 | ret = main(mode="conservative", 170 | prob_x_greater_type=prob_x_greater_type, 171 | nthreads=4) 172 | self.assertEqual(ret, 0) 173 | self.assertFalse(os.path.exists("y_predict.txt")) 174 | self.assertFalse(os.path.exists("termination_criterion_running")) 175 | self.assertFalse(os.path.exists("termination_criterion_running_pid")) 176 | self.cleanup() 177 | 178 | def test_conservative_real_example(self): 179 | """ 180 | The termination criterion expects the learning_curve in a file 181 | called learning_curve.txt as well as the current best value in 182 | ybest.txt. We create both files and see if the termination criterion 183 | correctly predicts to cancel or continue running under various artificial 184 | ybest. 185 | """ 186 | for prob_x_greater_type in ["posterior_mean_prob_x_greater_than", "posterior_prob_x_greater_than"]: 187 | np.savetxt("learning_curve.txt", real_abort_learning_curve) 188 | write_xlim(real_abort_xlim) 189 | 190 | open("ybest.txt", "w").write(str(real_abort_ybest)) 191 | open("termination_criterion_running", "w").write("running") 192 | 193 | ret = main(mode="conservative", 194 | prob_x_greater_type=prob_x_greater_type, 195 | nthreads=4) 196 | #ybest is higher than what the curve will ever reach 197 | #hence we expect to cancel the run: 198 | self.assertEqual(ret, 1) 199 | 200 | self.assertTrue(os.path.exists("y_predict.txt")) 201 | self.assertFalse(os.path.exists("termination_criterion_running")) 202 | self.assertFalse(os.path.exists("termination_criterion_running_pid")) 203 | self.cleanup() 204 | 205 | def test_conservative_command_line_args(self): 206 | for prob_x_greater_type in ["posterior_mean_prob_x_greater_than", "posterior_prob_x_greater_than"]: 207 | np.random.seed(13) 208 | #generate some data: 209 | for model_name in ["pow3", "log_power"]: 210 | function = all_models[model_name] 211 | params = model_defaults[model_name] 212 | xlim = 500 213 | x = np.arange(1, xlim, 1) 214 | y = function(x, **params) 215 | noise = 0.0005 * np.random.randn(len(y)) 216 | y_noisy = y + noise 217 | y_final = y_noisy[-1] 218 | num_train = 200 219 | np.savetxt("learning_curve.txt", y_noisy[:200]) 220 | write_xlim(xlim) 221 | 222 | print "Actual ybest: %f" % y_noisy[-1] 223 | 224 | #we set ybest to be higher than the final value of this curve 225 | #hence we DO want the evaluation to stop! 226 | open("ybest.txt", "w").write(str(y_final + 0.05)) 227 | open("termination_criterion_running", "w").write("running") 228 | 229 | ret = run_program(["python", "-m", "pylrpredictor.terminationcriterion", 230 | "--nthreads", "5", 231 | "--mode", "conservative", 232 | "--prob-x-greater-type", prob_x_greater_type]) 233 | self.assertEqual(ret, 1) 234 | 235 | self.assertTrue(os.path.exists("y_predict.txt")) 236 | y_predict = float(open("y_predict.txt").read()) 237 | 238 | #we set ybest to be lower than the final value of this curve 239 | #hence we DON'T want the evaluation to stop! 240 | open("ybest.txt", "w").write(str(y_final - 0.05)) 241 | open("termination_criterion_running", "w").write("running") 242 | 243 | ret = run_program(["python", "-m", "pylrpredictor.terminationcriterion", 244 | "--nthreads", "5", 245 | "--mode", "conservative", 246 | "--prob-x-greater-type", prob_x_greater_type]) 247 | self.assertEqual(ret, 0) 248 | self.assertFalse(os.path.exists("y_predict.txt")) 249 | self.assertFalse(os.path.exists("termination_criterion_running")) 250 | self.assertFalse(os.path.exists("termination_criterion_running_pid")) 251 | 252 | self.cleanup() 253 | 254 | 255 | def test_conservative_real_example_command_line_args(self): 256 | """ 257 | The termination criterion expects the learning_curve in a file 258 | called learning_curve.txt as well as the current best value in 259 | ybest.txt. We create both files and see if the termination criterion 260 | correctly predicts to cancel or continue running under various artificial 261 | ybest. 262 | """ 263 | for prob_x_greater_type in ["posterior_mean_prob_x_greater_than", "posterior_prob_x_greater_than"]: 264 | np.savetxt("learning_curve.txt", real_abort_learning_curve) 265 | write_xlim(real_abort_xlim) 266 | 267 | open("ybest.txt", "w").write(str(real_abort_ybest)) 268 | open("termination_criterion_running", "w").write("running") 269 | 270 | ret = run_program(["python", "-m", "pylrpredictor.terminationcriterion", 271 | "--nthreads", "5", 272 | "--mode", "conservative", 273 | "--prob-x-greater-type", prob_x_greater_type, 274 | #just check that it accepts the value for predictive-std-threshold, it's set too high to have a real influenec 275 | "--predictive-std-threshold", "10."]) 276 | #ybest is higher than what the curve will ever reach 277 | #hence we expect to cancel the run: 278 | self.assertEqual(ret, 1) 279 | 280 | self.assertTrue(os.path.exists("y_predict.txt")) 281 | self.assertFalse(os.path.exists("termination_criterion_running")) 282 | self.assertFalse(os.path.exists("termination_criterion_running_pid")) 283 | self.cleanup() 284 | 285 | 286 | def test_optimistic_real_example_command_line_args(self): 287 | """ 288 | The termination criterion expects the learning_curve in a file 289 | called learning_curve.txt as well as the current best value in 290 | ybest.txt. We create both files and see if the termination criterion 291 | correctly predicts to cancel or continue running under various artificial 292 | ybest. 293 | """ 294 | for prob_x_greater_type in ["posterior_mean_prob_x_greater_than", "posterior_prob_x_greater_than"]: 295 | np.savetxt("learning_curve.txt", real_abort_learning_curve) 296 | write_xlim(real_abort_xlim) 297 | 298 | open("ybest.txt", "w").write(str(real_abort_ybest)) 299 | open("termination_criterion_running", "w").write("running") 300 | 301 | ret = run_program(["python", "-m", "pylrpredictor.terminationcriterion", 302 | "--nthreads", "5", 303 | "--mode", "optimistic", 304 | "--predictive-std-threshold", str(0.05)]) 305 | #ybest is higher than what the curve will ever reach 306 | #hence we expect to cancel the run: 307 | self.assertEqual(ret, 1) 308 | 309 | ret = run_program(["python", "-m", "pylrpredictor.terminationcriterion", 310 | "--nthreads", "5", 311 | "--mode", "optimistic", 312 | "--predictive-std-threshold", str(0.01)]) 313 | #ybest is higher than what the curve will ever reach 314 | #hence we expect to cancel the run: 315 | self.assertEqual(ret, 1) 316 | 317 | ret = run_program(["python", "-m", "pylrpredictor.terminationcriterion", 318 | "--nthreads", "5", 319 | "--mode", "optimistic"]) 320 | #ybest is higher than what the curve will ever reach 321 | #hence we expect to cancel the run: 322 | self.assertEqual(ret, 1) 323 | 324 | self.assertTrue(os.path.exists("y_predict.txt")) 325 | self.assertFalse(os.path.exists("termination_criterion_running")) 326 | self.assertFalse(os.path.exists("termination_criterion_running_pid")) 327 | self.cleanup() 328 | 329 | 330 | 331 | def test_conservative_predictive_std_predict_cancel(self): 332 | for prob_x_greater_type in ["posterior_mean_prob_x_greater_than", "posterior_prob_x_greater_than"]: 333 | np.random.seed(13) 334 | #generate some data: 335 | 336 | model_name = "pow3" 337 | function = all_models[model_name] 338 | 339 | params = {'a': 0.52, 'alpha': 0.2, 'c': 0.84} 340 | xlim = 500 341 | x = np.arange(1, xlim, 1) 342 | y = function(x, **params) 343 | noise = 0.01 * np.random.randn(len(y)) 344 | y_noisy = y + noise 345 | y_final = y_noisy[-1] 346 | num_train = 30 347 | np.savetxt("learning_curve.txt", y_noisy[:num_train]) 348 | write_xlim(xlim) 349 | 350 | #first check: 351 | #if there's no ybest and the predictive_std is high 352 | #then we want the evaluation to continue 353 | if os.path.exists("ybest.txt"): 354 | os.remove("ybest.txt") 355 | ret = main(mode="conservative", 356 | prob_x_greater_type=prob_x_greater_type, 357 | predictive_std_threshold=0.00001, 358 | nthreads=4) 359 | self.assertEqual(ret, 0) 360 | 361 | print "Actual ybest: %f" % y_noisy[-1] 362 | 363 | #we set ybest to be higher than the final value of this curve 364 | #BUT because the predictive std is still high we don't want to stop 365 | open("ybest.txt", "w").write(str(y_final + 0.05)) 366 | open("termination_criterion_running", "w").write("running") 367 | 368 | ret = main(mode="conservative", 369 | prob_x_greater_type=prob_x_greater_type, 370 | predictive_std_threshold=0.00001, 371 | nthreads=4) 372 | self.assertEqual(ret, 0) 373 | 374 | self.assertFalse(os.path.exists("y_predict.txt")) 375 | 376 | self.cleanup() 377 | 378 | 379 | def test_optimistic_predict_cancel(self): 380 | """ 381 | Optimisitic mode 382 | 383 | The termination criterion expects the learning_curve in a file 384 | called learning_curve.txt as well as the current best value in 385 | ybest.txt. We create both files and see if the termination criterion 386 | correctly predicts to cancel or continue running under various artificial 387 | ybest. 388 | """ 389 | for prob_x_greater_type in ["posterior_mean_prob_x_greater_than", "posterior_prob_x_greater_than"]: 390 | np.random.seed(13) 391 | #generate some data: 392 | 393 | model_name = "pow3" 394 | function = all_models[model_name] 395 | 396 | params = {'a': 0.52, 'alpha': 0.2, 'c': 0.84} 397 | xlim = 500 398 | x = np.arange(1, xlim, 1) 399 | y = function(x, **params) 400 | noise = 0.01 * np.random.randn(len(y)) 401 | y_noisy = y + noise 402 | y_final = y_noisy[-1] 403 | num_train = 30 404 | np.savetxt("learning_curve.txt", y_noisy[:num_train]) 405 | write_xlim(xlim) 406 | 407 | #first check: 408 | #if there's no ybest and the predictive_std is high 409 | #then we want the evaluation to continue 410 | if os.path.exists("ybest.txt"): 411 | os.remove("ybest.txt") 412 | ret = main(mode="optimistic", 413 | prob_x_greater_type=prob_x_greater_type, 414 | nthreads=4) 415 | self.assertEqual(ret, 0) 416 | 417 | print "Actual ybest: %f" % y_noisy[-1] 418 | 419 | #we set ybest to be higher than the final value of this curve 420 | #hence we DO want the evaluation to stop! 421 | open("ybest.txt", "w").write(str(y_final + 0.05)) 422 | open("termination_criterion_running", "w").write("running") 423 | 424 | ret = main(mode="optimistic", 425 | prob_x_greater_type=prob_x_greater_type, 426 | nthreads=4) 427 | self.assertEqual(ret, 1) 428 | 429 | self.assertTrue(os.path.exists("y_predict.txt")) 430 | y_predict = float(open("y_predict.txt").read()) 431 | abserr = np.abs(y_predict-y_noisy[-1]) 432 | self.assertTrue(abserr < 0.05) 433 | print "abs error %f" % abserr 434 | 435 | #we set ybest to be lower than the final value of this curve 436 | #hence we DON'T want the evaluation to stop! 437 | #we assume here that because the model was set up like this 438 | #the predictive_std is above (it should actually be around 0.019) 439 | open("ybest.txt", "w").write(str(y_final - 0.05)) 440 | open("termination_criterion_running", "w").write("running") 441 | 442 | ret = main(mode="optimistic", nthreads=4) 443 | self.assertEqual(ret, 0) 444 | self.assertFalse(os.path.exists("y_predict.txt")) 445 | self.assertFalse(os.path.exists("termination_criterion_running")) 446 | self.assertFalse(os.path.exists("termination_criterion_running_pid")) 447 | 448 | num_train = 300 449 | np.savetxt("learning_curve.txt", y_noisy[:num_train]) 450 | #we set ybest to be lower than the final value of this curve 451 | #HOWEVER we except the predictive std to be around .0027 452 | #so the the run should be cancelled nevertheless 453 | open("ybest.txt", "w").write(str(y_final - 0.05)) 454 | open("termination_criterion_running", "w").write("running") 455 | 456 | ret = main(mode="optimistic", 457 | prob_x_greater_type=prob_x_greater_type, 458 | nthreads=4) 459 | self.assertEqual(ret, 1) 460 | self.assertTrue(os.path.exists("y_predict.txt")) 461 | y_predict = float(open("y_predict.txt").read()) 462 | abserr = np.abs(y_predict-y_noisy[-1]) 463 | self.assertTrue(abserr < 0.05) 464 | print "abs error %f" % abserr 465 | 466 | self.assertFalse(os.path.exists("termination_criterion_running")) 467 | self.assertFalse(os.path.exists("termination_criterion_running_pid")) 468 | 469 | self.cleanup() 470 | 471 | 472 | def test_error_logging(self): 473 | """ 474 | Test in case of an error, the error will be logged. 475 | """ 476 | open("ybest.txt", "w").write(str(0.5)) 477 | #Let's e.g. run main without creating any files 478 | if os.path.exists("learning_curve.txt"): 479 | os.remove("learning_curve.txt") 480 | ret = main() 481 | self.assertTrue(os.path.exists("term_crit_error.txt")) 482 | 483 | os.remove("ybest.txt") 484 | 485 | 486 | def cleanup(self): 487 | if os.path.exists("learning_curve.txt"): 488 | os.remove("learning_curve.txt") 489 | if os.path.exists("ybest.txt"): 490 | os.remove("ybest.txt") 491 | if os.path.exists("termination_criterion_running"): 492 | os.remove("termination_criterion_running") 493 | if os.path.exists("term_crit_error.txt"): 494 | os.remove("term_crit_error.txt") 495 | 496 | 497 | def test_predict_no_cancel(self): 498 | pass 499 | 500 | 501 | 502 | if __name__ == "__main__": 503 | unittest.main() 504 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | # Utility function to read the README file. 5 | # Used for the long_description. It's nice, because now 1) we have a top level 6 | # README file and 2) it's easier to type in the README file than to put a raw 7 | # string in below ... 8 | def read(fname): 9 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 10 | 11 | setup( 12 | name = "pylrpredictor", 13 | version = "0.1", 14 | author = "Tobias Domhan", 15 | author_email = "tdomhan@gmail.com", 16 | install_requires = ['numpy', 'docutils>=0.3', 'setuptools', 'matplotlib'], 17 | description = ("Predicting learning curves in python"), 18 | license = "BSD", 19 | keywords = "python learning curves, prediction", 20 | url = "http://packages.python.org/an_example_pypi_project", 21 | packages=find_packages(),#['pylrpredictor'], 22 | long_description="", 23 | ) 24 | --------------------------------------------------------------------------------