├── .gitignore ├── README.md ├── acquisition_functions.py ├── bo ├── __init__.py ├── acq │ ├── __init__.py │ ├── acqmap.py │ ├── acqopt.py │ └── acquisition.py ├── bo │ ├── __init__.py │ └── probo.py ├── dom │ ├── __init__.py │ ├── list.py │ └── real.py ├── ds │ ├── __init__.py │ └── makept.py ├── fn │ ├── __init__.py │ └── functionhandler.py ├── pp │ ├── __init__.py │ ├── gp │ │ ├── __init__.py │ │ └── gp_utils.py │ ├── pp_core.py │ ├── pp_gp_george.py │ ├── pp_gp_my_distmat.py │ ├── pp_gp_stan.py │ ├── pp_gp_stan_distmat.py │ └── stan │ │ ├── __init__.py │ │ ├── compile_stan.py │ │ ├── gp_distmat.py │ │ ├── gp_distmat_fixedsig.py │ │ ├── gp_hier2.py │ │ ├── gp_hier2_matern.py │ │ └── gp_hier3.py └── util │ ├── __init__.py │ ├── datatransform.py │ └── print_utils.py ├── darts ├── __init__.py ├── arch.py ├── local_search_runner.py └── run_experiments.sh ├── data.py ├── img ├── local_search_fig.png ├── ls_101_titled.png ├── ls_baselines_101.png ├── ls_cifar10.png ├── ls_cifar100.png ├── ls_cifar10_titled.png ├── ls_imagenet.png ├── real_synth_data.png ├── structured.png ├── uniform_preimages.png └── unstructured.png ├── meta_neural_net.py ├── meta_neuralnet.ipynb ├── metann_runner.py ├── nas_algorithms.py ├── nas_bench ├── __init__.py └── cell.py ├── nas_bench_201 ├── __init__.py └── cell.py ├── notebooks ├── random_walk_autocorrelation.ipynb └── simulate_real_data.ipynb ├── params.py ├── run_experiments_parallel.sh ├── run_experiments_sequential.py └── train_arch_runner.py /.gitignore: -------------------------------------------------------------------------------- 1 | notebooks/*.pdf 2 | notebooks/*.pkl 3 | notebooks/*.npy 4 | notebooks/*.npz 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | pip-wheel-metadata/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | *.py,cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | db.sqlite3-journal 68 | 69 | # Flask stuff: 70 | instance/ 71 | .webassets-cache 72 | 73 | # Scrapy stuff: 74 | .scrapy 75 | 76 | # Sphinx documentation 77 | docs/_build/ 78 | 79 | # PyBuilder 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | .python-version 91 | 92 | # pipenv 93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 96 | # install all needed dependencies. 97 | #Pipfile.lock 98 | 99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 100 | __pypackages__/ 101 | 102 | # Celery stuff 103 | celerybeat-schedule 104 | celerybeat.pid 105 | 106 | # SageMath parsed files 107 | *.sage.py 108 | 109 | # Environments 110 | .env 111 | .venv 112 | env/ 113 | venv/ 114 | ENV/ 115 | env.bak/ 116 | venv.bak/ 117 | 118 | # Spyder project settings 119 | .spyderproject 120 | .spyproject 121 | 122 | # Rope project settings 123 | .ropeproject 124 | 125 | # mkdocs documentation 126 | /site 127 | 128 | # mypy 129 | .mypy_cache/ 130 | .dmypy.json 131 | dmypy.json 132 | 133 | # Pyre type checker 134 | .pyre/ 135 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Local Search for NAS 2 | 3 | **Note: this repository has been combined with other NAS projects into [naszilla/naszilla](https://github.com/naszilla/naszilla), and this repo is deprecated and not maintained. Please use [naszilla/naszilla](https://github.com/naszilla/naszilla), which has more functionality.** 4 | 5 | [Local Search is State of the Art for Neural Architecture Search Benchmarks](https://arxiv.org/abs/2005.02960)\ 6 | Colin White, Sam Nolen, and Yash Savani.\ 7 | _arXiv:2005.02960_. 8 | 9 | We study the simplest versions of local search, showing that local search achieves state-of-the-art results on NASBench-101 (size 10^6) and NASBench-201 (size 10^4). We also show that local search fails on the DARTS search space (size 10^18). This suggests that existing NAS benchmarks may be too small to adequately evaluate NAS algorithms. See our paper for a theoretical study which characterizes the performance of local search on graph optimization problems, backed by simulation results. 10 | 11 |

12 | structured 13 |

14 | In the left figure, each point is an architecture from NAS-Bench-201 trained on CIFAR10, and each edge denotes the LS function. We plotted the trees of the nine architectures with the lowest test losses. The right figure is similar, but the architectures are assigned validation losses at random. We see that we are much more likely to converge to an architecture with low loss on structured data (CIFAR10) rather than unstructured (random) data. 15 | 16 | ## Requirements 17 | This repo is our fork of [naszilla/bananas](https://github.com/naszilla/bananas/). The requirements are as follows. 18 | - jupyter 19 | - tensorflow == 1.14.0 20 | - nasbench (follow the installation instructions [here](https://github.com/google-research/nasbench)) 21 | - nas-bench-201 (follow the installation instructions [here](https://github.com/D-X-Y/NAS-Bench-201)) 22 | - pytorch == 1.2.0, torchvision == 0.4.0 (used for experiments on the DARTS search space) 23 | - pybnn (used only for the DNGO baselien algorithm. Installation instructions [here](https://github.com/automl/pybnn)) 24 | 25 | If you run experiments on DARTS, you will need the naszilla fork of the darts repo: 26 | - Download the repo: https://github.com/naszilla/darts 27 | 28 | ## Run an experiment on nas-bench-101 or nas-bench-201 29 | 30 | To run an experiment on nas-bench-101, run 31 | ``` 32 | python run_experiments_sequential.py 33 | ``` 34 | To run with nas-bench-201, add the flag `--search_space nasbench_201_cifar10` to the above command with cifar10, cifar100, or imagenet. 35 | 36 | ## Run an experiment on DARTS 37 | To run an experiment on DARTS, run 38 | ``` 39 | bash darts/run_experiments.sh 40 | ``` 41 | 42 | 43 |

44 | ls_cifar10 45 | ls_cifar100 46 | ls_imagenet 47 | ls_baselines_101 48 | real_synth_data 49 | uniform_preimages 50 |

51 | 52 | ## Citation 53 | Please cite [our paper](https://arxiv.org/abs/2005.02960) if you use code from this repo: 54 | ``` 55 | @article{white2020local, 56 | title={Local Search is State of the Art for Neural Architecture Search Benchmarks}, 57 | author={White, Colin and Nolen, Sam and Savani, Yash}, 58 | journal={arXiv preprint arXiv:2005.02960}, 59 | year={2020} 60 | } 61 | ``` 62 | -------------------------------------------------------------------------------- /acquisition_functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | 4 | # Different acquisition functions that can be used by BANANAS 5 | def acq_fn(predictions, explore_type='its'): 6 | predictions = np.array(predictions) 7 | 8 | # Upper confidence bound (UCB) acquisition function 9 | if explore_type == 'ucb': 10 | explore_factor = 0.5 11 | mean = np.mean(predictions, axis=0) 12 | std = np.sqrt(np.var(predictions, axis=0)) 13 | ucb = mean - explore_factor * std 14 | sorted_indices = np.argsort(ucb) 15 | 16 | # Expected improvement (EI) acquisition function 17 | elif explore_type == 'ei': 18 | ei_calibration_factor = 5. 19 | mean = list(np.mean(predictions, axis=0)) 20 | std = list(np.sqrt(np.var(predictions, axis=0)) / 21 | ei_calibration_factor) 22 | 23 | min_y = ytrain.min() 24 | gam = [(min_y - mean[i]) / std[i] for i in range(len(mean))] 25 | ei = [-1 * std[i] * (gam[i] * norm.cdf(gam[i]) + norm.pdf(gam[i])) 26 | for i in range(len(mean))] 27 | sorted_indices = np.argsort(ei) 28 | 29 | # Probability of improvement (PI) acquisition function 30 | elif explore_type == 'pi': 31 | mean = list(np.mean(predictions, axis=0)) 32 | std = list(np.sqrt(np.var(predictions, axis=0))) 33 | min_y = ytrain.min() 34 | pi = [-1 * norm.cdf(min_y, loc=mean[i], scale=std[i]) for i in range(len(mean))] 35 | sorted_indices = np.argsort(pi) 36 | 37 | # Thompson sampling (TS) acquisition function 38 | elif explore_type == 'ts': 39 | rand_ind = np.random.randint(predictions.shape[0]) 40 | ts = predictions[rand_ind,:] 41 | sorted_indices = np.argsort(ts) 42 | 43 | # Top exploitation 44 | elif explore_type == 'percentile': 45 | min_prediction = np.min(predictions, axis=0) 46 | sorted_indices = np.argsort(min_prediction) 47 | 48 | # Top mean 49 | elif explore_type == 'mean': 50 | mean = np.mean(predictions, axis=0) 51 | sorted_indices = np.argsort(mean) 52 | 53 | elif explore_type == 'confidence': 54 | confidence_factor = 2 55 | mean = np.mean(predictions, axis=0) 56 | std = np.sqrt(np.var(predictions, axis=0)) 57 | conf = mean + confidence_factor * std 58 | sorted_indices = np.argsort(conf) 59 | 60 | # Independent Thompson sampling (ITS) acquisition function 61 | elif explore_type == 'its': 62 | mean = np.mean(predictions, axis=0) 63 | std = np.sqrt(np.var(predictions, axis=0)) 64 | samples = np.random.normal(mean, std) 65 | sorted_indices = np.argsort(samples) 66 | 67 | else: 68 | print('Invalid exploration type in meta neuralnet search', explore_type) 69 | sys.exit() 70 | 71 | return sorted_indices -------------------------------------------------------------------------------- /bo/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for running Bayesian Optimization (BO) in NASzilla. 3 | """ 4 | -------------------------------------------------------------------------------- /bo/acq/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for acquisition strategies. 3 | """ 4 | -------------------------------------------------------------------------------- /bo/acq/acqmap.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes to manage acqmap (acquisition maps from xin to acquisition value). 3 | """ 4 | 5 | from argparse import Namespace 6 | import numpy as np 7 | import copy 8 | from bo.acq.acquisition import Acquisitioner 9 | from bo.util.datatransform import DataTransformer 10 | #from bo.pp.pp_gp_george import GeorgeGpPP 11 | #from bo.pp.pp_gp_stan import StanGpPP 12 | from bo.pp.pp_gp_my_distmat import MyGpDistmatPP 13 | 14 | class AcqMapper(object): 15 | """ Class to manage acqmap (acquisition map). """ 16 | 17 | def __init__(self, data, amp, print_flag=True): 18 | """ Constructor 19 | Parameters: 20 | amp - Namespace of acqmap params 21 | print_flag - True or False 22 | """ 23 | self.data = data 24 | self.set_am_params(amp) 25 | #self.setup_acqmap() 26 | if print_flag: self.print_str() 27 | 28 | def set_am_params(self, amp): 29 | """ Set the acqmap params. 30 | Inputs: 31 | amp - Namespace of acqmap parameters """ 32 | self.amp = amp 33 | 34 | def get_acqmap(self, xin_is_list=True): 35 | """ Return acqmap. 36 | Inputs: xin_is_list True if input to acqmap is a list of xin """ 37 | # Potentially do acqmap setup here. Could include inference, 38 | # cachining/computing quantities, instantiating objects used in acqmap 39 | # definition. This becomes important when we do sequential opt of acqmaps. 40 | return self.acqmap_list if xin_is_list else self.acqmap_single 41 | 42 | def acqmap_list(self, xin_list): 43 | """ Acqmap defined on a list of xin. """ 44 | 45 | def get_trans_data(): 46 | """ Returns transformed data. """ 47 | dt = DataTransformer(self.data.y, False) 48 | return Namespace(X=self.data.X, y=dt.transform_data(self.data.y)) 49 | 50 | def apply_acq_to_pmlist(pmlist, acq_str, trans_data): 51 | """ Apply acquisition to pmlist. """ 52 | acqp = Namespace(acq_str=acq_str, pmout_str='sample') 53 | acq = Acquisitioner(trans_data, acqp, False) 54 | acqfn = acq.acq_method 55 | return [acqfn(p) for p in pmlist] 56 | 57 | def georgegp_acqmap(acq_str): 58 | """ Acqmaps for GeorgeGpPP """ 59 | trans_data = get_trans_data() 60 | pp = GeorgeGpPP(trans_data, self.amp.modelp, False) 61 | pmlist = pp.sample_pp_pred(self.amp.nppred, xin_list) if acq_str=='ts' \ 62 | else pp.sample_pp_post_pred(self.amp.nppred, xin_list) 63 | return apply_acq_to_pmlist(pmlist, acq_str, trans_data) 64 | 65 | def stangp_acqmap(acq_str): 66 | """ Acqmaps for StanGpPP """ 67 | trans_data = get_trans_data() 68 | pp = StanGpPP(trans_data, self.amp.modelp, False) 69 | pp.infer_post_and_update_samples(print_result=True) 70 | pmlist, _ = pp.sample_pp_pred(self.amp.nppred, xin_list) if acq_str=='ts' \ 71 | else pp.sample_pp_post_pred(self.amp.nppred, xin_list, full_cov=True, \ 72 | nloop=np.min([50,self.amp.nppred])) 73 | return apply_acq_to_pmlist(pmlist, acq_str, trans_data) 74 | 75 | def mygpdistmat_acqmap(acq_str): 76 | """ Acqmaps for MyGpDistmatPP """ 77 | trans_data = get_trans_data() 78 | pp = MyGpDistmatPP(trans_data, self.amp.modelp, False) 79 | pp.infer_post_and_update_samples(print_result=True) 80 | pmlist, _ = pp.sample_pp_pred(self.amp.nppred, xin_list) if acq_str=='ts' \ 81 | else pp.sample_pp_post_pred(self.amp.nppred, xin_list, full_cov=True) 82 | return apply_acq_to_pmlist(pmlist, acq_str, trans_data) 83 | 84 | # Mapping of am_str to acqmap 85 | if self.amp.am_str=='georgegp_ei': 86 | return georgegp_acqmap('ei') 87 | elif self.amp.am_str=='georgegp_pi': 88 | return georgegp_acqmap('pi') 89 | elif self.amp.am_str=='georgegp_ucb': 90 | return georgegp_acqmap('ucb') 91 | elif self.amp.am_str=='georgegp_ts': 92 | return georgegp_acqmap('ts') 93 | elif self.amp.am_str=='stangp_ei': 94 | return stangp_acqmap('ei') 95 | elif self.amp.am_str=='stangp_pi': 96 | return stangp_acqmap('pi') 97 | elif self.amp.am_str=='stangp_ucb': 98 | return stangp_acqmap('ucb') 99 | elif self.amp.am_str=='stangp_ts': 100 | return stangp_acqmap('ts') 101 | elif self.amp.am_str=='mygpdistmat_ei': 102 | return mygpdistmat_acqmap('ei') 103 | elif self.amp.am_str=='mygpdistmat_pi': 104 | return mygpdistmat_acqmap('pi') 105 | elif self.amp.am_str=='mygpdistmat_ucb': 106 | return mygpdistmat_acqmap('ucb') 107 | elif self.amp.am_str=='mygpdistmat_ts': 108 | return mygpdistmat_acqmap('ts') 109 | elif self.amp.am_str=='null': 110 | return [0. for xin in xin_list] 111 | 112 | def acqmap_single(self, xin): 113 | """ Acqmap defined on a single xin. Returns acqmap(xin) value, not list. """ 114 | return self.acqmap_list([xin])[0] 115 | 116 | def print_str(self): 117 | """ Print a description string """ 118 | print('*AcqMapper with amp='+str(self.amp) 119 | +'.\n-----') 120 | -------------------------------------------------------------------------------- /bo/acq/acqopt.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes to perform acquisition function optimization. 3 | """ 4 | 5 | from argparse import Namespace 6 | import numpy as np 7 | 8 | class AcqOptimizer(object): 9 | """ Class to perform acquisition function optimization """ 10 | 11 | def __init__(self, optp=None, print_flag=True): 12 | """ Constructor 13 | Inputs: 14 | optp - Namespace of opt parameters 15 | print_flag - True or False 16 | """ 17 | self.set_opt_params(optp) 18 | if print_flag: self.print_str() 19 | 20 | def set_opt_params(self, optp): 21 | """ Set the optimizer params. 22 | Inputs: 23 | acqp - Namespace of acquisition parameters """ 24 | if optp is None: 25 | optp = Namespace(opt_str='rand', max_iter=1000) 26 | self.optp = optp 27 | 28 | def optimize(self, dom, am): 29 | """ Optimize acqfn(probmap(x)) over x in domain """ 30 | if self.optp.opt_str=='rand': 31 | return self.optimize_rand(dom, am) 32 | 33 | def optimize_rand(self, dom, am): 34 | """ Optimize acqmap(x) over domain via random search """ 35 | xin_list = dom.unif_rand_sample(self.optp.max_iter) 36 | amlist = am.acqmap_list(xin_list) 37 | return xin_list[np.argmin(amlist)] 38 | 39 | # Utilities 40 | def print_str(self): 41 | """ print a description string """ 42 | print('*AcqOptimizer with optp='+str(self.optp) 43 | +'.\n-----') 44 | -------------------------------------------------------------------------------- /bo/acq/acquisition.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes to manage acquisition functions. 3 | """ 4 | 5 | from argparse import Namespace 6 | import numpy as np 7 | from scipy.stats import norm 8 | 9 | class Acquisitioner(object): 10 | """ Class to manage acquisition functions """ 11 | 12 | def __init__(self, data, acqp=None, print_flag=True): 13 | """ Constructor 14 | Parameters: 15 | acqp - Namespace of acquisition parameters 16 | print_flag - True or False 17 | """ 18 | self.data = data 19 | self.set_acq_params(acqp) 20 | self.set_acq_method() 21 | if print_flag: self.print_str() 22 | 23 | def set_acq_params(self, acqp): 24 | """ Set the acquisition params. 25 | Parameters: 26 | acqp - Namespace of acquisition parameters """ 27 | if acqp is None: 28 | acqp = Namespace(acq_str='ei', pmout_str='sample') 29 | self.acqp = acqp 30 | 31 | def set_acq_method(self): 32 | """ Set the acquisition method """ 33 | if self.acqp.acq_str=='ei': self.acq_method = self.ei 34 | if self.acqp.acq_str=='pi': self.acq_method = self.pi 35 | if self.acqp.acq_str=='ts': self.acq_method = self.ts 36 | if self.acqp.acq_str=='ucb': self.acq_method = self.ucb 37 | if self.acqp.acq_str=='rand': self.acq_method = self.rand 38 | if self.acqp.acq_str=='null': self.acq_method = self.null 39 | #if self.acqp.acqStr=='map': return self.map 40 | 41 | def ei(self, pmout): 42 | """ Expected improvement (EI) """ 43 | if self.acqp.pmout_str=='sample': 44 | return self.bbacq_ei(pmout) 45 | 46 | def pi(self, pmout): 47 | """ Probability of improvement (PI) """ 48 | if self.acqp.pmout_str=='sample': 49 | return self.bbacq_pi(pmout) 50 | 51 | def ucb(self, pmout): 52 | """ Upper (lower) confidence bound (UCB) """ 53 | if self.acqp.pmout_str=='sample': 54 | return self.bbacq_ucb(pmout) 55 | 56 | def ts(self, pmout): 57 | """ Thompson sampling (TS) """ 58 | if self.acqp.pmout_str=='sample': 59 | return self.bbacq_ts(pmout) 60 | 61 | def rand(self, pmout): 62 | """ Uniform random sampling """ 63 | return np.random.random() 64 | 65 | def null(self, pmout): 66 | """ Return constant 0. """ 67 | return 0. 68 | 69 | # Black Box Acquisition Functions 70 | def bbacq_ei(self, pmout_samp, normal=False): 71 | """ Black box acquisition: BB-EI 72 | Input: pmout_samp: post-pred samples - np array (shape=(nsamp,1)) 73 | Returns: EI acq value """ 74 | youts = np.array(pmout_samp).flatten() 75 | nsamp = youts.shape[0] 76 | if normal: 77 | mu = np.mean(youts) 78 | sig = np.std(youts) 79 | gam = (self.data.y.min() - mu) / sig 80 | eiVal = -1*sig*(gam*norm.cdf(gam) + norm.pdf(gam)) 81 | else: 82 | diffs = self.data.y.min() - youts 83 | ind_below_min = np.argwhere(diffs>0) 84 | eiVal = -1*np.sum(diffs[ind_below_min])/float(nsamp) if \ 85 | len(ind_below_min)>0 else 0 86 | return eiVal 87 | 88 | def bbacq_pi(self, pmout_samp, normal=False): 89 | """ Black box acquisition: BB-PI 90 | Input: pmout_samp: post-pred samples - np array (shape=(nsamp,1)) 91 | Returns: PI acq value """ 92 | youts = np.array(pmout_samp).flatten() 93 | nsamp = youts.shape[0] 94 | if normal: 95 | mu = np.mean(youts) 96 | sig = np.sqrt(np.var(youts)) 97 | piVal = -1*norm.cdf(self.data.y.min(),loc=mu,scale=sig) 98 | else: 99 | piVal = -1*len(np.argwhere(youts=self.domp.min_max[i][0] and 36 | pt[i]<=self.domp.min_max[i][1] for i in range(self.ndimx)] 37 | ret=False if False in bool_list else True 38 | return ret 39 | 40 | def unif_rand_sample(self, n=1): 41 | """ Draws a sample uniformly at random from domain """ 42 | li = [np.random.uniform(mm[0], mm[1], n) for mm in self.domp.min_max] 43 | return np.array(li).T 44 | 45 | def print_str(self): 46 | """ Print a description string """ 47 | print('*RealDomain with domp = ' + str(self.domp) + '.') 48 | print('-----') 49 | -------------------------------------------------------------------------------- /bo/ds/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for makept (serializing and subprocesses) strategy. 3 | """ 4 | -------------------------------------------------------------------------------- /bo/ds/makept.py: -------------------------------------------------------------------------------- 1 | """ 2 | Make a point in a domain, and serialize it. 3 | """ 4 | 5 | import sys 6 | import os 7 | sys.path.append(os.path.expanduser('./')) 8 | from argparse import Namespace, ArgumentParser 9 | import pickle 10 | import time 11 | import numpy as np 12 | from bo.dom.real import RealDomain 13 | from bo.dom.list import ListDomain 14 | from bo.acq.acqmap import AcqMapper 15 | from bo.acq.acqopt import AcqOptimizer 16 | 17 | def main(args, search_space, printinfo=False): 18 | starttime = time.time() 19 | 20 | # Load config and data 21 | makerp = pickle.load(open(args.configpkl, 'rb')) 22 | data = pickle.load(open(args.datapkl, 'rb')) 23 | 24 | if hasattr(args, 'mode') and args.mode == 'single_process': 25 | makerp.domp.mode = args.mode 26 | makerp.domp.iteridx = args.iteridx 27 | makerp.amp.modelp.mode = args.mode 28 | else: 29 | np.random.seed(args.seed) 30 | # Instantiate Domain, AcqMapper, AcqOptimizer 31 | dom = get_domain(makerp.domp, search_space) 32 | am = AcqMapper(data, makerp.amp, False) 33 | ao = AcqOptimizer(makerp.optp, False) 34 | # Optimize over domain to get nextpt 35 | nextpt = ao.optimize(dom, am) 36 | # Serialize nextpt 37 | with open(args.nextptpkl, 'wb') as f: 38 | pickle.dump(nextpt, f) 39 | # Print 40 | itertime = time.time()-starttime 41 | if printinfo: print_info(nextpt, itertime, args.nextptpkl) 42 | 43 | def get_domain(domp, search_space): 44 | """ Return Domain object. """ 45 | if not hasattr(domp, 'dom_str'): 46 | domp.dom_str = 'real' 47 | if domp.dom_str=='real': 48 | return RealDomain(domp, False) 49 | elif domp.dom_str=='list': 50 | return ListDomain(search_space, domp, False) 51 | 52 | def print_info(nextpt, itertime, nextptpkl): 53 | print('*Found nextpt = ' + str(nextpt) + '.') 54 | print('*Saved nextpt as ' + nextptpkl + '.') 55 | print('*Timing: makept took ' + str(itertime) + ' seconds.') 56 | print('-----') 57 | 58 | if __name__ == "__main__": 59 | parser = ArgumentParser(description='Args for a single instance of acquisition optimization.') 60 | parser.add_argument('--seed', dest='seed', type=int, default=1111) 61 | parser.add_argument('--configpkl', dest='configpkl', type=str, default='config.pkl') 62 | parser.add_argument('--datapkl', dest='datapkl', type=str, default='data.pkl') 63 | parser.add_argument('--nextptpkl', dest='nextptpkl', type=str, default='nextpt.pkl') 64 | args = parser.parse_args() 65 | main(args, printinfo=False) 66 | -------------------------------------------------------------------------------- /bo/fn/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for synthetic functions to query (perform experiment on). 3 | """ 4 | -------------------------------------------------------------------------------- /bo/fn/functionhandler.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes to handle functions. 3 | """ 4 | 5 | from argparse import Namespace 6 | import numpy as np 7 | 8 | def get_fh(fn, data=None, fhp=None, print_flag=True): 9 | """ Returns a function handler object """ 10 | if fhp is None: 11 | fhp=Namespace(fhstr='basic', namestr='noname') 12 | # Return FH object 13 | if fhp.fhstr=='basic': 14 | return BasicFH(fn, data, fhp, print_flag) 15 | elif fhp.fhstr=='extrainfo': 16 | return ExtraInfoFH(fn, data, fhp, print_flag) 17 | elif fhp.fhstr=='nannn': 18 | return NanNNFH(fn, data, fhp, print_flag) 19 | elif fhp.fhstr=='replacenannn': 20 | return ReplaceNanNNFH(fn, data, fhp, print_flag) 21 | elif fhp.fhstr=='object': 22 | return ObjectFH(fn, data, fhp, print_flag) 23 | 24 | 25 | class BasicFH(object): 26 | """ Class to handle basic functions, which map from an array xin to a real 27 | value yout. """ 28 | 29 | def __init__(self, fn, data=None, fhp=None, print_flag=True): 30 | """ Constructor. 31 | Inputs: 32 | pmp - Namespace of probmap params 33 | print_flag - True or False 34 | """ 35 | self.fn = fn 36 | self.data = data 37 | self.fhp = fhp 38 | if print_flag: self.print_str() 39 | 40 | def call_fn_and_add_data(self, xin): 41 | """ Call self.fn(xin), and update self.data """ 42 | yout = self.fn(xin) 43 | print('new datapoint score', yout) 44 | self.add_data_single(xin, yout) 45 | 46 | def add_data_single(self, xin, yout): 47 | """ Update self.data with a single xin yout pair. 48 | Inputs: 49 | xin: np.array size=(1, -1) 50 | yout: np.array size=(1, 1) """ 51 | xin = np.array(xin).reshape(1, -1) 52 | yout = np.array(yout).reshape(1, 1) 53 | newdata = Namespace(X=xin, y=yout) 54 | self.add_data(newdata) 55 | 56 | def add_data(self, newdata): 57 | """ Update self.data with newdata Namespace. 58 | Inputs: 59 | newdata: Namespace with fields X and y """ 60 | if self.data is None: 61 | self.data = newdata 62 | else: 63 | self.data.X = np.concatenate((self.data.X, newdata.X), 0) 64 | self.data.y = np.concatenate((self.data.y, newdata.y), 0) 65 | 66 | def print_str(self): 67 | """ Print a description string. """ 68 | print('*BasicFH with fhp='+str(self.fhp) 69 | +'.\n-----') 70 | 71 | 72 | class ExtraInfoFH(BasicFH): 73 | """ Class to handle functions that map from an array xin to a real 74 | value yout, but also return extra info """ 75 | 76 | def __init__(self, fn, data=None, fhp=None, print_flag=True): 77 | """ Constructor. 78 | Inputs: 79 | pmp - Namespace of probmap params 80 | print_flag - True or False 81 | """ 82 | super(ExtraInfoFH, self).__init__(fn, data, fhp, False) 83 | self.extrainfo = [] 84 | if print_flag: self.print_str() 85 | 86 | def call_fn_and_add_data(self, xin): 87 | """ Call self.fn(xin), and update self.data """ 88 | yout, exinf = self.fn(xin) 89 | self.add_data_single(xin, yout) 90 | self.extrainfo.append(exinf) 91 | 92 | def print_str(self): 93 | """ Print a description string. """ 94 | print('*ExtraInfoFH with fhp='+str(self.fhp) 95 | +'.\n-----') 96 | 97 | 98 | class NanNNFH(BasicFH): 99 | """ Class to handle NN functions that map from an array xin to either 100 | a real value yout or np.NaN, but also return extra info """ 101 | 102 | def __init__(self, fn, data=None, fhp=None, print_flag=True): 103 | """ Constructor. 104 | Inputs: 105 | pmp - Namespace of probmap params 106 | print_flag - True or False 107 | """ 108 | super(NanNNFH, self).__init__(fn, data, fhp, False) 109 | self.extrainfo = [] 110 | if print_flag: self.print_str() 111 | 112 | def call_fn_and_add_data(self, xin): 113 | """ Call self.fn(xin), and update self.data """ 114 | timethresh = 60. 115 | yout, walltime = self.fn(xin) 116 | if walltime > timethresh: 117 | self.add_data_single_nan(xin) 118 | else: 119 | self.add_data_single(xin, yout) 120 | self.possibly_init_xnan() 121 | exinf = Namespace(xin=xin, yout=yout, walltime=walltime) 122 | self.extrainfo.append(exinf) 123 | 124 | def add_data_single_nan(self, xin): 125 | """ Update self.data.X_nan with a single xin. 126 | Inputs: 127 | xin: np.array size=(1, -1) """ 128 | xin = xin.reshape(1,-1) 129 | newdata = Namespace(X = np.ones((0, xin.shape[1])), 130 | y = np.ones((0, 1)), 131 | X_nan = xin) 132 | self.add_data_nan(newdata) 133 | 134 | def add_data_nan(self, newdata): 135 | """ Update self.data with newdata Namespace. 136 | Inputs: 137 | newdata: Namespace with fields X, y, X_nan """ 138 | if self.data is None: 139 | self.data = newdata 140 | else: 141 | self.data.X_nan = np.concatenate((self.data.X_nan, newdata.X_nan), 0) 142 | 143 | def possibly_init_xnan(self): 144 | """ If self.data doesn't have X_nan, then create it. """ 145 | if not hasattr(self.data, 'X_nan'): 146 | self.data.X_nan = np.ones((0, self.data.X.shape[1])) 147 | 148 | def print_str(self): 149 | """ Print a description string. """ 150 | print('*NanNNFH with fhp='+str(self.fhp) 151 | +'.\n-----') 152 | 153 | 154 | class ReplaceNanNNFH(BasicFH): 155 | """ Class to handle NN functions that map from an array xin to either 156 | a real value yout or np.NaN. If np.NaN, we replace it with a large 157 | positive value. We also return extra info """ 158 | 159 | def __init__(self, fn, data=None, fhp=None, print_flag=True): 160 | """ Constructor. 161 | Inputs: 162 | pmp - Namespace of probmap params 163 | print_flag - True or False 164 | """ 165 | super(ReplaceNanNNFH, self).__init__(fn, data, fhp, False) 166 | self.extrainfo = [] 167 | if print_flag: self.print_str() 168 | 169 | def call_fn_and_add_data(self, xin): 170 | """ Call self.fn(xin), and update self.data """ 171 | timethresh = 60. 172 | replace_nan_val = 5. 173 | yout, walltime = self.fn(xin) 174 | if walltime > timethresh: 175 | yout = replace_nan_val 176 | self.add_data_single(xin, yout) 177 | exinf = Namespace(xin=xin, yout=yout, walltime=walltime) 178 | self.extrainfo.append(exinf) 179 | 180 | def print_str(self): 181 | """ Print a description string. """ 182 | print('*ReplaceNanNNFH with fhp='+str(self.fhp) 183 | +'.\n-----') 184 | 185 | 186 | class ObjectFH(object): 187 | """ Class to handle basic functions, which map from some object xin to a real 188 | value yout. """ 189 | 190 | def __init__(self, fn, data=None, fhp=None, print_flag=True): 191 | """ Constructor. 192 | Inputs: 193 | pmp - Namespace of probmap params 194 | print_flag - True or False 195 | """ 196 | self.fn = fn 197 | self.data = data 198 | self.fhp = fhp 199 | if print_flag: self.print_str() 200 | 201 | def call_fn_and_add_data(self, xin): 202 | """ Call self.fn(xin), and update self.data """ 203 | yout = self.fn(xin) 204 | self.add_data_single(xin, yout) 205 | 206 | def add_data_single(self, xin, yout): 207 | """ Update self.data with a single xin yout pair. """ 208 | newdata = Namespace(X=[xin], y=np.array(yout).reshape(1, 1)) 209 | self.add_data(newdata) 210 | 211 | def add_data(self, newdata): 212 | """ Update self.data with newdata Namespace. 213 | Inputs: 214 | newdata: Namespace with fields X and y """ 215 | if self.data is None: 216 | self.data = newdata 217 | else: 218 | self.data.X.extend(newdata.X) 219 | self.data.y = np.concatenate((self.data.y, newdata.y), 0) 220 | 221 | def print_str(self): 222 | """ Print a description string. """ 223 | print('*ObjectFH with fhp='+str(self.fhp) 224 | +'.\n-----') 225 | -------------------------------------------------------------------------------- /bo/pp/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for defining and running probabilistic programs. 3 | """ 4 | -------------------------------------------------------------------------------- /bo/pp/gp/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for Gaussian process (GP) utilities and functions. 3 | """ 4 | -------------------------------------------------------------------------------- /bo/pp/gp/gp_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities for Gaussian process (GP) inference 3 | """ 4 | 5 | import numpy as np 6 | from scipy.linalg import solve_triangular 7 | from scipy.spatial.distance import cdist 8 | #import GPy as gpy 9 | 10 | 11 | def kern_gibbscontext(xmatcon1, xmatcon2, xmatact1, xmatact2, theta, alpha, 12 | lscon, whichlsfn=1): 13 | """ Gibbs kernel (ls_fn of context only) """ 14 | actdim = xmatact1.shape[1] 15 | lsarr1 = ls_fn(xmatcon1, theta, whichlsfn).flatten() 16 | lsarr2 = ls_fn(xmatcon2, theta, whichlsfn).flatten() 17 | sum_sq_ls = np.add.outer(lsarr1, lsarr2) 18 | inexp = -1. * np.divide(cdist(xmatact1, xmatact2, 'sqeuclidean'), sum_sq_ls) 19 | prod_ls = np.outer(lsarr1, lsarr2) 20 | #coef = np.power(np.divide(2*prod_ls, sum_sq_ls), actdim/2.) # Correct 21 | coef = 1. 22 | kern_gibbscontext_only_ns = np.multiply(coef, np.exp(inexp)) 23 | kern_expquad_ns = kern_exp_quad_noscale(xmatcon1, xmatcon2, lscon) 24 | return alpha**2 * np.multiply(kern_gibbscontext_only_ns, kern_expquad_ns) 25 | 26 | def kern_gibbs1d(xmat1, xmat2, theta, alpha): 27 | """ Gibbs kernel in 1d """ 28 | lsarr1 = ls_fn(xmat1, theta).flatten() 29 | lsarr2 = ls_fn(xmat2, theta).flatten() 30 | sum_sq_ls = np.add.outer(lsarr1, lsarr2) 31 | prod_ls = np.outer(lsarr1, lsarr2) #TODO product of this for each dim 32 | coef = np.sqrt(np.divide(2*prod_ls, sum_sq_ls)) 33 | inexp = cdist(xmat1, xmat2, 'sqeuclidean') / sum_sq_ls #TODO sum of this for each dim 34 | return alpha**2 * coef * np.exp(-1 * inexp) 35 | 36 | def ls_fn(xmat, theta, whichlsfn=1): 37 | theta = np.array(theta).reshape(-1,1) 38 | if theta.shape[0]==2: 39 | if whichlsfn==1 or whichlsfn==2: 40 | return np.log(1 + np.exp(theta[0][0] + np.matmul(xmat,theta[1]))) # softplus transform 41 | elif whichlsfn==3: 42 | return np.exp(theta[0][0] + np.matmul(xmat,theta[1])) # exp transform 43 | elif theta.shape[0]==3: 44 | if whichlsfn==1: 45 | return np.log(1 + np.exp(theta[0][0] + np.matmul(xmat,theta[1]) + 46 | np.matmul(np.power(xmat,2),theta[2]))) # softplus transform 47 | elif whichlsfn==2: 48 | return np.log(1 + np.exp(theta[0][0] + np.matmul(xmat,theta[1]) + 49 | np.matmul(np.abs(xmat),theta[2]))) # softplus on abs transform 50 | elif whichlsfn==3: 51 | return np.exp(theta[0][0] + np.matmul(xmat,theta[1]) + 52 | np.matmul(np.power(xmat,2),theta[2])) # exp transform 53 | else: 54 | print('ERROR: theta parameter is incorrect.') 55 | 56 | def kern_matern32(xmat1, xmat2, ls, alpha): 57 | """ Matern 3/2 kernel, currently using GPy """ 58 | kern = gpy.kern.Matern32(input_dim=xmat1.shape[1], variance=alpha**2, 59 | lengthscale=ls) 60 | return kern.K(xmat1,xmat2) 61 | 62 | def kern_exp_quad(xmat1, xmat2, ls, alpha): 63 | """ Exponentiated quadratic kernel function aka squared exponential kernel 64 | aka RBF kernel """ 65 | return alpha**2 * kern_exp_quad_noscale(xmat1, xmat2, ls) 66 | 67 | def kern_exp_quad_noscale(xmat1, xmat2, ls): 68 | """ Exponentiated quadratic kernel function aka squared exponential kernel 69 | aka RBF kernel, without scale parameter. """ 70 | sq_norm = (-1/(2 * ls**2)) * cdist(xmat1, xmat2, 'sqeuclidean') 71 | return np.exp(sq_norm) 72 | 73 | def squared_euc_distmat(xmat1, xmat2, coef=1.): 74 | """ Distance matrix of squared euclidean distance (multiplied by coef) 75 | between points in xmat1 and xmat2. """ 76 | return coef * cdist(xmat1, xmat2, 'sqeuclidean') 77 | 78 | def kern_distmat(xmat1, xmat2, ls, alpha, distfn): 79 | """ Kernel for a given distmat, via passed-in distfn (which is assumed to be 80 | fn of xmat1 and xmat2 only) """ 81 | distmat = distfn(xmat1, xmat2) 82 | sq_norm = -distmat / ls**2 83 | return alpha**2 * np.exp(sq_norm) 84 | 85 | def get_cholesky_decomp(k11_nonoise, sigma, psd_str): 86 | """ Returns cholesky decomposition """ 87 | if psd_str == 'try_first': 88 | k11 = k11_nonoise + sigma**2 * np.eye(k11_nonoise.shape[0]) 89 | try: 90 | return stable_cholesky(k11, False) 91 | except np.linalg.linalg.LinAlgError: 92 | return get_cholesky_decomp(k11_nonoise, sigma, 'project_first') 93 | elif psd_str == 'project_first': 94 | k11_nonoise = project_symmetric_to_psd_cone(k11_nonoise) 95 | return get_cholesky_decomp(k11_nonoise, sigma, 'is_psd') 96 | elif psd_str == 'is_psd': 97 | k11 = k11_nonoise + sigma**2 * np.eye(k11_nonoise.shape[0]) 98 | return stable_cholesky(k11) 99 | 100 | def stable_cholesky(mmat, make_psd=True): 101 | """ Returns a 'stable' cholesky decomposition of mmat """ 102 | if mmat.size == 0: 103 | return mmat 104 | try: 105 | lmat = np.linalg.cholesky(mmat) 106 | except np.linalg.linalg.LinAlgError as e: 107 | if not make_psd: 108 | raise e 109 | diag_noise_power = -11 110 | max_mmat = np.diag(mmat).max() 111 | diag_noise = np.diag(mmat).max() * 1e-11 112 | break_loop = False 113 | while not break_loop: 114 | try: 115 | lmat = np.linalg.cholesky(mmat + ((10**diag_noise_power) * max_mmat) * 116 | np.eye(mmat.shape[0])) 117 | break_loop = True 118 | except np.linalg.linalg.LinAlgError: 119 | if diag_noise_power > -9: 120 | print('stable_cholesky failed with diag_noise_power=%d.'%(diag_noise_power)) 121 | diag_noise_power += 1 122 | if diag_noise_power >= 5: 123 | print('***** stable_cholesky failed: added diag noise = %e'%(diag_noise)) 124 | return lmat 125 | 126 | def project_symmetric_to_psd_cone(mmat, is_symmetric=True, epsilon=0): 127 | """ Project symmetric matrix mmat to the PSD cone """ 128 | if is_symmetric: 129 | try: 130 | eigvals, eigvecs = np.linalg.eigh(mmat) 131 | except np.linalg.LinAlgError: 132 | print('LinAlgError encountered with np.eigh. Defaulting to eig.') 133 | eigvals, eigvecs = np.linalg.eig(mmat) 134 | eigvals = np.real(eigvals) 135 | eigvecs = np.real(eigvecs) 136 | else: 137 | eigvals, eigvecs = np.linalg.eig(mmat) 138 | clipped_eigvals = np.clip(eigvals, epsilon, np.inf) 139 | return (eigvecs * clipped_eigvals).dot(eigvecs.T) 140 | 141 | def solve_lower_triangular(amat, b): 142 | """ Solves amat*x=b when amat is lower triangular """ 143 | return solve_triangular_base(amat, b, lower=True) 144 | 145 | def solve_upper_triangular(amat, b): 146 | """ Solves amat*x=b when amat is upper triangular """ 147 | return solve_triangular_base(amat, b, lower=False) 148 | 149 | def solve_triangular_base(amat, b, lower): 150 | """ Solves amat*x=b when amat is a triangular matrix. """ 151 | if amat.size == 0 and b.shape[0] == 0: 152 | return np.zeros((b.shape)) 153 | else: 154 | return solve_triangular(amat, b, lower=lower) 155 | 156 | def sample_mvn(mu, covmat, nsamp): 157 | """ Sample from multivariate normal distribution with mean mu and covariance 158 | matrix covmat """ 159 | mu = mu.reshape(-1,) 160 | ndim = len(mu) 161 | lmat = stable_cholesky(covmat) 162 | umat = np.random.normal(size=(ndim, nsamp)) 163 | return lmat.dot(umat).T + mu 164 | -------------------------------------------------------------------------------- /bo/pp/pp_core.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base classes for probabilistic programs. 3 | """ 4 | 5 | import pickle 6 | 7 | class DiscPP(object): 8 | """ Parent class for discriminative probabilistic programs """ 9 | 10 | def __init__(self): 11 | """ Constructor """ 12 | self.sample_list = [] 13 | if not hasattr(self,'data'): 14 | raise NotImplementedError('Implement var data in a child class') 15 | #if not hasattr(self,'ndimx'): 16 | #raise NotImplementedError('Implement var ndimx in a child class') 17 | #if not hasattr(self,'ndataInit'): 18 | #raise NotImplementedError('Implement var ndataInit in a child class') 19 | 20 | def infer_post_and_update_samples(self,nsamp): 21 | """ Run an inference algorithm (given self.data), draw samples from the 22 | posterior, and store in self.sample_list. """ 23 | raise NotImplementedError('Implement method in a child class') 24 | 25 | def sample_pp_post_pred(self,nsamp,input_list): 26 | """ Sample nsamp times from PP posterior predictive, for each x-input in 27 | input_list """ 28 | raise NotImplementedError('Implement method in a child class') 29 | 30 | def sample_pp_pred(self,nsamp,input_list,lv_list=None): 31 | """ Sample nsamp times from PP predictive for parameter lv, for each 32 | x-input in input_list. If lv is None, draw it uniformly at random 33 | from self.sample_list. """ 34 | raise NotImplementedError('Implement method in a child class') 35 | 36 | def add_new_data(self,newData): 37 | """ Add data (newData) to self.data """ 38 | raise NotImplementedError('Implement method in a child class') 39 | 40 | def get_namespace_to_save(self): 41 | """ Return namespace containing object info (to save to file) """ 42 | raise NotImplementedError('Implement method in a child class') 43 | 44 | def save_namespace_to_file(self,fileStr,printFlag): 45 | """ Saves results from get_namespace_to_save in fileStr """ 46 | ppNamespaceToSave = self.get_namespace_to_save() 47 | ff = open(fileStr,'wb') 48 | pickle.dump(ppNamespaceToSave,ff) 49 | ff.close() 50 | if printFlag: 51 | print('*Saved DiscPP Namespace in pickle file: ' +fileStr+'\n-----') 52 | -------------------------------------------------------------------------------- /bo/pp/pp_gp_george.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes for hierarchical GP models with George PP 3 | """ 4 | 5 | from argparse import Namespace 6 | import numpy as np 7 | import scipy.optimize as spo 8 | import george 9 | import emcee 10 | from bo.pp.pp_core import DiscPP 11 | 12 | class GeorgeGpPP(DiscPP): 13 | """ Hierarchical GPs implemented with George """ 14 | 15 | def __init__(self,data=None,modelp=None,printFlag=True): 16 | """ Constructor """ 17 | self.set_data(data) 18 | self.set_model_params(modelp) 19 | self.ndimx = self.modelp.ndimx 20 | self.set_kernel() 21 | self.set_model() 22 | super(GeorgeGpPP,self).__init__() 23 | if printFlag: 24 | self.print_str() 25 | 26 | def set_data(self,data): 27 | if data is None: 28 | pass #TODO: handle case where there's no data 29 | self.data = data 30 | 31 | def set_model_params(self,modelp): 32 | if modelp is None: 33 | modelp = Namespace(ndimx=1, noiseVar=1e-3, kernLs=1.5, kernStr='mat', 34 | fitType='mle') 35 | self.modelp = modelp 36 | 37 | def set_kernel(self): 38 | """ Set kernel for GP """ 39 | if self.modelp.kernStr=='mat': 40 | self.kernel = self.data.y.var() * \ 41 | george.kernels.Matern52Kernel(self.modelp.kernLs, ndim=self.ndimx) 42 | if self.modelp.kernStr=='rbf': # NOTE: periodically produces errors 43 | self.kernel = self.data.y.var() * \ 44 | george.kernels.ExpSquaredKernel(self.modelp.kernLs, ndim=self.ndimx) 45 | 46 | def set_model(self): 47 | """ Set GP regression model """ 48 | self.model = self.get_model() 49 | self.model.compute(self.data.X) 50 | self.fit_hyperparams(printOut=False) 51 | 52 | def get_model(self): 53 | """ Returns GPRegression model """ 54 | return george.GP(kernel=self.kernel,fit_mean=True) 55 | 56 | def fit_hyperparams(self,printOut=False): 57 | if self.modelp.fitType=='mle': 58 | spo.minimize(self.neg_log_like, self.model.get_parameter_vector(), 59 | jac=True) 60 | elif self.modelp.fitType=='bayes': 61 | self.nburnin = 200 62 | nsamp = 200 63 | nwalkers = 36 64 | gpdim = len(self.model) 65 | self.sampler = emcee.EnsembleSampler(nwalkers, gpdim, self.log_post) 66 | p0 = self.model.get_parameter_vector() + 1e-4*np.random.randn(nwalkers, 67 | gpdim) 68 | print 'Running burn-in.' 69 | p0, _, _ = self.sampler.run_mcmc(p0, self.nburnin) 70 | print 'Running main chain.' 71 | self.sampler.run_mcmc(p0, nsamp) 72 | if printOut: 73 | print 'Final GP hyperparam (in opt or MCMC chain):' 74 | print self.model.get_parameter_dict() 75 | 76 | def infer_post_and_update_samples(self): 77 | """ Update self.sample_list """ 78 | self.sample_list = [None] #TODO: need to not-break ts fn in maker_bayesopt.py 79 | 80 | def sample_pp_post_pred(self,nsamp,input_list): 81 | """ Sample from posterior predictive of PP. 82 | Inputs: 83 | input_list - list of np arrays size=(-1,) 84 | Returns: 85 | list (len input_list) of np arrays (size=(nsamp,1)).""" 86 | inputArray = np.array(input_list) 87 | if self.modelp.fitType=='mle': 88 | inputArray = np.array(input_list) 89 | ppredArray = self.model.sample_conditional(self.data.y.flatten(), 90 | inputArray, nsamp).T 91 | elif self.modelp.fitType=='bayes': 92 | ppredArray = np.zeros(shape=[len(input_list),nsamp]) 93 | for s in range(nsamp): 94 | walkidx = np.random.randint(self.sampler.chain.shape[0]) 95 | sampidx = np.random.randint(self.nburnin, self.sampler.chain.shape[1]) 96 | hparamSamp = self.sampler.chain[walkidx, sampidx] 97 | print 'hparamSamp = ' + str(hparamSamp) # TODO: remove print statement 98 | self.model.set_parameter_vector(hparamSamp) 99 | ppredArray[:,s] = self.model.sample_conditional(self.data.y.flatten(), 100 | inputArray, 1).flatten() 101 | return list(ppredArray) # each element is row in ppredArray matrix 102 | 103 | def sample_pp_pred(self,nsamp,input_list,lv=None): 104 | """ Sample from predictive of PP for parameter lv. 105 | Returns: list (len input_list) of np arrays (size (nsamp,1)).""" 106 | if self.modelp.fitType=='bayes': 107 | print('*WARNING: fitType=bayes not implemented for sample_pp_pred. \ 108 | Reverting to fitType=mle') 109 | # TODO: Equivalent algo for fitType=='bayes': 110 | # - draw posterior sample path over all xin in input_list 111 | # - draw pred samples around sample path pt, based on noise model 112 | inputArray = np.array(input_list) 113 | samplePath = self.model.sample_conditional(self.data.y.flatten(), 114 | inputArray).reshape(-1,) 115 | return [np.random.normal(s,np.sqrt(self.modelp.noiseVar),nsamp).reshape(-1,) 116 | for s in samplePath] 117 | 118 | def neg_log_like(self,hparams): 119 | """ Compute and return the negative log likelihood for model 120 | hyperparameters hparams, as well as its gradient. """ 121 | self.model.set_parameter_vector(hparams) 122 | g = self.model.grad_log_likelihood(self.data.y.flatten(), quiet=True) 123 | return -self.model.log_likelihood(self.data.y.flatten(), quiet=True), -g 124 | 125 | def log_post(self,hparams): 126 | """ Compute and return the log posterior density (up to constant of 127 | proportionality) for the model hyperparameters hparams. """ 128 | # Uniform prior between -100 and 100, for each hyperparam 129 | if np.any((-100 > hparams[1:]) + (hparams[1:] > 100)): 130 | return -np.inf 131 | self.model.set_parameter_vector(hparams) 132 | return self.model.log_likelihood(self.data.y.flatten(), quiet=True) 133 | 134 | # Utilities 135 | def print_str(self): 136 | """ Print a description string """ 137 | print '*GeorgeGpPP with modelp='+str(self.modelp)+'.' 138 | print '-----' 139 | -------------------------------------------------------------------------------- /bo/pp/pp_gp_my_distmat.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes for GP models without any PP backend, using a given distance matrix. 3 | """ 4 | 5 | from argparse import Namespace 6 | import time 7 | import copy 8 | import numpy as np 9 | from scipy.spatial.distance import cdist 10 | from bo.pp.pp_core import DiscPP 11 | from bo.pp.gp.gp_utils import kern_exp_quad, kern_matern32, \ 12 | get_cholesky_decomp, solve_upper_triangular, solve_lower_triangular, \ 13 | sample_mvn, squared_euc_distmat, kern_distmat 14 | from bo.util.print_utils import suppress_stdout_stderr 15 | 16 | 17 | class MyGpDistmatPP(DiscPP): 18 | """ GPs using a kernel specified by a given distance matrix, without any PP 19 | backend """ 20 | 21 | def __init__(self, data=None, modelp=None, printFlag=True): 22 | """ Constructor """ 23 | self.set_model_params(modelp) 24 | self.set_data(data) 25 | self.set_model() 26 | super(MyGpDistmatPP,self).__init__() 27 | if printFlag: 28 | self.print_str() 29 | 30 | def set_model_params(self, modelp): 31 | """ Set self.modelp """ 32 | if modelp is None: 33 | pass #TODO 34 | self.modelp = modelp 35 | 36 | def set_data(self, data): 37 | """ Set self.data """ 38 | if data is None: 39 | pass #TODO 40 | self.data_init = copy.deepcopy(data) 41 | self.data = copy.deepcopy(self.data_init) 42 | 43 | def set_model(self): 44 | """ Set GP regression model """ 45 | self.model = self.get_model() 46 | 47 | def get_model(self): 48 | """ Returns model object """ 49 | return None 50 | 51 | def infer_post_and_update_samples(self, print_result=False): 52 | """ Update self.sample_list """ 53 | self.sample_list = [Namespace(ls=self.modelp.kernp.ls, 54 | alpha=self.modelp.kernp.alpha, 55 | sigma=self.modelp.kernp.sigma)] 56 | if print_result: self.print_inference_result() 57 | 58 | def get_distmat(self, xmat1, xmat2): 59 | """ Get distance matrix """ 60 | #return squared_euc_distmat(xmat1, xmat2, .5) 61 | 62 | from data import Data 63 | self.distmat = Data.generate_distance_matrix 64 | #print('distmat') 65 | #print(self.distmat(xmat1, xmat2, self.modelp.distance)) 66 | return self.distmat(xmat1, xmat2, self.modelp.distance) 67 | 68 | def print_inference_result(self): 69 | """ Print results of stan inference """ 70 | print('*ls pt est = '+str(self.sample_list[0].ls)+'.') 71 | print('*alpha pt est = '+str(self.sample_list[0].alpha)+'.') 72 | print('*sigma pt est = '+str(self.sample_list[0].sigma)+'.') 73 | print('-----') 74 | 75 | def sample_pp_post_pred(self, nsamp, input_list, full_cov=False): 76 | """ Sample from posterior predictive of PP. 77 | Inputs: 78 | input_list - list of np arrays size=(-1,) 79 | Returns: 80 | list (len input_list) of np arrays (size=(nsamp,1)).""" 81 | samp = self.sample_list[0] 82 | postmu, postcov = self.gp_post(self.data.X, self.data.y, input_list, 83 | samp.ls, samp.alpha, samp.sigma, full_cov) 84 | if full_cov: 85 | ppred_list = list(sample_mvn(postmu, postcov, nsamp)) 86 | else: 87 | ppred_list = list(np.random.normal(postmu.reshape(-1,), 88 | postcov.reshape(-1,), 89 | size=(nsamp, len(input_list)))) 90 | return list(np.stack(ppred_list).T), ppred_list 91 | 92 | def sample_pp_pred(self, nsamp, input_list, lv=None): 93 | """ Sample from predictive of PP for parameter lv. 94 | Returns: list (len input_list) of np arrays (size (nsamp,1)).""" 95 | if lv is None: 96 | lv = self.sample_list[0] 97 | postmu, postcov = self.gp_post(self.data.X, self.data.y, input_list, lv.ls, 98 | lv.alpha, lv.sigma) 99 | pred_list = list(sample_mvn(postmu, postcov, 1)) ###TODO: sample from this mean nsamp times 100 | return list(np.stack(pred_list).T), pred_list 101 | 102 | def gp_post(self, x_train_list, y_train_arr, x_pred_list, ls, alpha, sigma, 103 | full_cov=True): 104 | """ Compute parameters of GP posterior """ 105 | kernel = lambda a, b, c, d: kern_distmat(a, b, c, d, self.get_distmat) 106 | k11_nonoise = kernel(x_train_list, x_train_list, ls, alpha) 107 | lmat = get_cholesky_decomp(k11_nonoise, sigma, 'try_first') 108 | smat = solve_upper_triangular(lmat.T, solve_lower_triangular(lmat, 109 | y_train_arr)) 110 | k21 = kernel(x_pred_list, x_train_list, ls, alpha) 111 | mu2 = k21.dot(smat) 112 | k22 = kernel(x_pred_list, x_pred_list, ls, alpha) 113 | vmat = solve_lower_triangular(lmat, k21.T) 114 | k2 = k22 - vmat.T.dot(vmat) 115 | if full_cov is False: 116 | k2 = np.sqrt(np.diag(k2)) 117 | return mu2, k2 118 | 119 | # Utilities 120 | def print_str(self): 121 | """ Print a description string """ 122 | print('*MyGpDistmatPP with modelp='+str(self.modelp)+'.') 123 | print('-----') 124 | -------------------------------------------------------------------------------- /bo/pp/pp_gp_stan.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes for GP models with Stan 3 | """ 4 | 5 | from argparse import Namespace 6 | import time 7 | import numpy as np 8 | import copy 9 | from bo.pp.pp_core import DiscPP 10 | import bo.pp.stan.gp_hier2 as gpstan2 11 | import bo.pp.stan.gp_hier3 as gpstan3 12 | import bo.pp.stan.gp_hier2_matern as gpstan2_matern 13 | from bo.pp.gp.gp_utils import kern_exp_quad, kern_matern32, \ 14 | get_cholesky_decomp, solve_upper_triangular, solve_lower_triangular, \ 15 | sample_mvn 16 | from bo.util.print_utils import suppress_stdout_stderr 17 | 18 | class StanGpPP(DiscPP): 19 | """ Hierarchical GPs implemented with Stan """ 20 | 21 | def __init__(self, data=None, modelp=None, printFlag=True): 22 | """ Constructor """ 23 | self.set_model_params(modelp) 24 | self.set_data(data) 25 | self.ndimx = self.modelp.ndimx 26 | self.set_model() 27 | super(StanGpPP,self).__init__() 28 | if printFlag: 29 | self.print_str() 30 | 31 | def set_model_params(self,modelp): 32 | if modelp is None: 33 | modelp = Namespace(ndimx=1, model_str='optfixedsig', 34 | gp_mean_transf_str='constant') 35 | if modelp.model_str=='optfixedsig': 36 | modelp.kernp = Namespace(u1=.1, u2=5., n1=10., n2=10., sigma=1e-5) 37 | modelp.infp = Namespace(niter=1000) 38 | elif modelp.model_str=='opt' or modelp.model_str=='optmatern32': 39 | modelp.kernp = Namespace(ig1=1., ig2=5., n1=10., n2=20., n3=.01, 40 | n4=.01) 41 | modelp.infp = Namespace(niter=1000) 42 | elif modelp.model_str=='samp' or modelp.model_str=='sampmatern32': 43 | modelp.kernp = Namespace(ig1=1., ig2=5., n1=10., n2=20., n3=.01, 44 | n4=.01) 45 | modelp.infp = Namespace(niter=1500, nwarmup=500) 46 | self.modelp = modelp 47 | 48 | def set_data(self, data): 49 | """ Set self.data """ 50 | if data is None: 51 | pass #TODO: handle case where there's no data 52 | self.data_init = copy.deepcopy(data) 53 | self.data = self.get_transformed_data(self.data_init, 54 | self.modelp.gp_mean_transf_str) 55 | 56 | def get_transformed_data(self, data, transf_str='linear'): 57 | """ Transform data, for non-zero-mean GP """ 58 | newdata = Namespace(X=data.X) 59 | if transf_str=='linear': 60 | mmat,_,_,_ = np.linalg.lstsq(np.concatenate([data.X, 61 | np.ones((data.X.shape[0],1))],1), data.y.flatten(), rcond=None) 62 | self.gp_mean_vec = lambda x: np.matmul(np.concatenate([x, 63 | np.ones((x.shape[0],1))],1), mmat) 64 | newdata.y = data.y - self.gp_mean_vec(data.X).reshape(-1,1) 65 | if transf_str=='constant': 66 | yconstant = data.y.mean() 67 | #yconstant = 0. 68 | self.gp_mean_vec = lambda x: np.array([yconstant for xcomp in x]) 69 | newdata.y = data.y - self.gp_mean_vec(data.X).reshape(-1,1) 70 | return newdata 71 | 72 | def set_model(self): 73 | """ Set GP regression model """ 74 | self.model = self.get_model() 75 | 76 | def get_model(self): 77 | """ Returns GPRegression model """ 78 | if self.modelp.model_str=='optfixedsig': 79 | return gpstan3.get_model(print_status=False) 80 | elif self.modelp.model_str=='opt' or self.modelp.model_str=='samp': 81 | return gpstan2.get_model(print_status=False) 82 | elif self.modelp.model_str=='optmatern32' or \ 83 | self.modelp.model_str=='sampmatern32': 84 | return gpstan2_matern.get_model(print_status=False) 85 | 86 | def infer_post_and_update_samples(self, seed=5000012, print_result=False): 87 | """ Update self.sample_list """ 88 | data_dict = self.get_stan_data_dict() 89 | with suppress_stdout_stderr(): 90 | if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' \ 91 | or self.modelp.model_str=='optmatern32': 92 | stanout = self.model.optimizing(data_dict, iter=self.modelp.infp.niter, 93 | #seed=seed, as_vector=True, algorithm='Newton') 94 | seed=seed, as_vector=True, algorithm='LBFGS') 95 | elif self.modelp.model_str=='samp' or self.modelp.model_str=='sampmatern32': 96 | stanout = self.model.sampling(data_dict, iter=self.modelp.infp.niter + 97 | self.modelp.infp.nwarmup, warmup=self.modelp.infp.nwarmup, chains=1, 98 | seed=seed, refresh=1000) 99 | print('-----') 100 | self.sample_list = self.get_sample_list_from_stan_out(stanout) 101 | if print_result: self.print_inference_result() 102 | 103 | def get_stan_data_dict(self): 104 | """ Return data dict for stan sampling method """ 105 | if self.modelp.model_str=='optfixedsig': 106 | return {'u1':self.modelp.kernp.u1, 'u2':self.modelp.kernp.u2, 107 | 'n1':self.modelp.kernp.n1, 'n2':self.modelp.kernp.n2, 108 | 'sigma':self.modelp.kernp.sigma, 'D':self.ndimx, 109 | 'N':len(self.data.X), 'x':self.data.X, 'y':self.data.y.flatten()} 110 | elif self.modelp.model_str=='opt' or self.modelp.model_str=='samp': 111 | return {'ig1':self.modelp.kernp.ig1, 'ig2':self.modelp.kernp.ig2, 112 | 'n1':self.modelp.kernp.n1, 'n2':self.modelp.kernp.n2, 113 | 'n3':self.modelp.kernp.n3, 'n4':self.modelp.kernp.n4, 114 | 'D':self.ndimx, 'N':len(self.data.X), 'x':self.data.X, 115 | 'y':self.data.y.flatten()} 116 | elif self.modelp.model_str=='optmatern32' or \ 117 | self.modelp.model_str=='sampmatern32': 118 | return {'ig1':self.modelp.kernp.ig1, 'ig2':self.modelp.kernp.ig2, 119 | 'n1':self.modelp.kernp.n1, 'n2':self.modelp.kernp.n2, 120 | 'n3':self.modelp.kernp.n3, 'n4':self.modelp.kernp.n4, 121 | 'D':self.ndimx, 'N':len(self.data.X), 'x':self.data.X, 122 | 'y':self.data.y.flatten(), 'covid':2} 123 | 124 | def get_sample_list_from_stan_out(self, stanout): 125 | """ Convert stan output to sample_list """ 126 | if self.modelp.model_str=='optfixedsig': 127 | return [Namespace(ls=stanout['rho'], alpha=stanout['alpha'], 128 | sigma=self.modelp.kernp.sigma)] 129 | elif self.modelp.model_str=='opt' or self.modelp.model_str=='optmatern32': 130 | return [Namespace(ls=stanout['rho'], alpha=stanout['alpha'], 131 | sigma=stanout['sigma'])] 132 | elif self.modelp.model_str=='samp' or \ 133 | self.modelp.model_str=='sampmatern32': 134 | sdict = stanout.extract(['rho','alpha','sigma']) 135 | return [Namespace(ls=sdict['rho'][i], alpha=sdict['alpha'][i], 136 | sigma=sdict['sigma'][i]) for i in range(sdict['rho'].shape[0])] 137 | 138 | def print_inference_result(self): 139 | """ Print results of stan inference """ 140 | if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' or \ 141 | self.modelp.model_str=='optmatern32': 142 | print('*ls pt est = '+str(self.sample_list[0].ls)+'.') 143 | print('*alpha pt est = '+str(self.sample_list[0].alpha)+'.') 144 | print('*sigma pt est = '+str(self.sample_list[0].sigma)+'.') 145 | elif self.modelp.model_str=='samp' or \ 146 | self.modelp.model_str=='sampmatern32': 147 | ls_arr = np.array([ns.ls for ns in self.sample_list]) 148 | alpha_arr = np.array([ns.alpha for ns in self.sample_list]) 149 | sigma_arr = np.array([ns.sigma for ns in self.sample_list]) 150 | print('*ls mean = '+str(ls_arr.mean())+'.') 151 | print('*ls std = '+str(ls_arr.std())+'.') 152 | print('*alpha mean = '+str(alpha_arr.mean())+'.') 153 | print('*alpha std = '+str(alpha_arr.std())+'.') 154 | print('*sigma mean = '+str(sigma_arr.mean())+'.') 155 | print('*sigma std = '+str(sigma_arr.std())+'.') 156 | print('-----') 157 | 158 | def sample_pp_post_pred(self, nsamp, input_list, full_cov=False, nloop=None): 159 | """ Sample from posterior predictive of PP. 160 | Inputs: 161 | input_list - list of np arrays size=(-1,) 162 | Returns: 163 | list (len input_list) of np arrays (size=(nsamp,1)).""" 164 | if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' or \ 165 | self.modelp.model_str=='optmatern32': 166 | nloop = 1 167 | sampids = [0] 168 | elif self.modelp.model_str=='samp' or \ 169 | self.modelp.model_str=='sampmatern32': 170 | if nloop is None: nloop=nsamp 171 | nsamp = int(nsamp/nloop) 172 | sampids = np.random.randint(len(self.sample_list), size=(nloop,)) 173 | ppred_list = [] 174 | for i in range(nloop): 175 | samp = self.sample_list[sampids[i]] 176 | postmu, postcov = self.gp_post(self.data.X, self.data.y, 177 | np.stack(input_list), samp.ls, samp.alpha, samp.sigma, full_cov) 178 | if full_cov: 179 | ppred_list.extend(list(sample_mvn(postmu, postcov, nsamp))) 180 | else: 181 | ppred_list.extend(list(np.random.normal(postmu.reshape(-1,), 182 | postcov.reshape(-1,), size=(nsamp, len(input_list))))) 183 | return self.get_reverse_transform(list(np.stack(ppred_list).T), ppred_list, 184 | input_list) 185 | 186 | def sample_pp_pred(self, nsamp, input_list, lv=None): 187 | """ Sample from predictive of PP for parameter lv. 188 | Returns: list (len input_list) of np arrays (size (nsamp,1)).""" 189 | x_pred = np.stack(input_list) 190 | if lv is None: 191 | if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' \ 192 | or self.modelp.model_str=='optmatern32': 193 | lv = self.sample_list[0] 194 | elif self.modelp.model_str=='samp' or \ 195 | self.modelp.model_str=='sampmatern32': 196 | lv = self.sample_list[np.random.randint(len(self.sample_list))] 197 | postmu, postcov = self.gp_post(self.data.X, self.data.y, x_pred, lv.ls, 198 | lv.alpha, lv.sigma) 199 | pred_list = list(sample_mvn(postmu, postcov, 1)) ###TODO: sample from this mean nsamp times 200 | return self.get_reverse_transform(list(np.stack(pred_list).T), pred_list, 201 | input_list) 202 | 203 | def get_reverse_transform(self, pp1, pp2, input_list): 204 | """ Apply reverse of data transform to ppred or pred """ 205 | pp1 = [pp1[i] + self.gp_mean_vec(input_list[i].reshape(1,-1)) for i in 206 | range(len(input_list))] 207 | pp2 = [psamp + self.gp_mean_vec(np.array(input_list)) for psamp in pp2] 208 | return pp1, pp2 209 | 210 | def gp_post(self, x_train, y_train, x_pred, ls, alpha, sigma, full_cov=True): 211 | """ Compute parameters of GP posterior """ 212 | if self.modelp.model_str=='optmatern32' or \ 213 | self.modelp.model_str=='sampmatern32': 214 | kernel = kern_matern32 215 | else: 216 | kernel = kern_exp_quad 217 | k11_nonoise = kernel(x_train, x_train, ls, alpha) 218 | lmat = get_cholesky_decomp(k11_nonoise, sigma, 'try_first') 219 | smat = solve_upper_triangular(lmat.T, solve_lower_triangular(lmat, y_train)) 220 | k21 = kernel(x_pred, x_train, ls, alpha) 221 | mu2 = k21.dot(smat) 222 | k22 = kernel(x_pred, x_pred, ls, alpha) 223 | vmat = solve_lower_triangular(lmat, k21.T) 224 | k2 = k22 - vmat.T.dot(vmat) 225 | if full_cov is False: 226 | k2 = np.sqrt(np.diag(k2)) 227 | return mu2, k2 228 | 229 | # Utilities 230 | def print_str(self): 231 | """ Print a description string """ 232 | print('*StanGpPP with modelp='+str(self.modelp)+'.') 233 | print('-----') 234 | -------------------------------------------------------------------------------- /bo/pp/pp_gp_stan_distmat.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes for GP models with Stan, using a given distance matrix. 3 | """ 4 | 5 | from argparse import Namespace 6 | import time 7 | import copy 8 | import numpy as np 9 | from scipy.spatial.distance import cdist 10 | from bo.pp.pp_core import DiscPP 11 | import bo.pp.stan.gp_distmat as gpstan 12 | import bo.pp.stan.gp_distmat_fixedsig as gpstan_fixedsig 13 | from bo.pp.gp.gp_utils import kern_exp_quad, kern_matern32, \ 14 | get_cholesky_decomp, solve_upper_triangular, solve_lower_triangular, \ 15 | sample_mvn, squared_euc_distmat, kern_distmat 16 | from bo.util.print_utils import suppress_stdout_stderr 17 | 18 | class StanGpDistmatPP(DiscPP): 19 | """ Hierarchical GPs using a given distance matrix, implemented with Stan """ 20 | 21 | def __init__(self, data=None, modelp=None, printFlag=True): 22 | """ Constructor """ 23 | self.set_model_params(modelp) 24 | self.set_data(data) 25 | self.ndimx = self.modelp.ndimx 26 | self.set_model() 27 | super(StanGpDistmatPP,self).__init__() 28 | if printFlag: 29 | self.print_str() 30 | 31 | def set_model_params(self, modelp): 32 | """ Set self.modelp """ 33 | if modelp is None: 34 | pass #TODO 35 | self.modelp = modelp 36 | 37 | def set_data(self, data): 38 | """ Set self.data """ 39 | if data is None: 40 | pass #TODO 41 | self.data_init = copy.deepcopy(data) 42 | self.data = copy.deepcopy(self.data_init) 43 | 44 | def set_model(self): 45 | """ Set GP regression model """ 46 | self.model = self.get_model() 47 | 48 | def get_model(self): 49 | """ Returns GPRegression model """ 50 | if self.modelp.model_str=='optfixedsig' or \ 51 | self.modelp.model_str=='sampfixedsig': 52 | return gpstan_fixedsig.get_model(print_status=True) 53 | elif self.modelp.model_str=='opt' or self.modelp.model_str=='samp': 54 | return gpstan.get_model(print_status=True) 55 | elif self.modelp.model_str=='fixedparam': 56 | return None 57 | 58 | def infer_post_and_update_samples(self, seed=543210, print_result=False): 59 | """ Update self.sample_list """ 60 | data_dict = self.get_stan_data_dict() 61 | with suppress_stdout_stderr(): 62 | if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt': 63 | stanout = self.model.optimizing(data_dict, iter=self.modelp.infp.niter, 64 | #seed=seed, as_vector=True, algorithm='Newton') 65 | seed=seed, as_vector=True, algorithm='LBFGS') 66 | elif self.modelp.model_str=='samp' or self.modelp.model_str=='sampfixedsig': 67 | stanout = self.model.sampling(data_dict, iter=self.modelp.infp.niter + 68 | self.modelp.infp.nwarmup, warmup=self.modelp.infp.nwarmup, chains=1, 69 | seed=seed, refresh=1000) 70 | elif self.modelp.model_str=='fixedparam': 71 | stanout = None 72 | print('-----') 73 | self.sample_list = self.get_sample_list_from_stan_out(stanout) 74 | if print_result: self.print_inference_result() 75 | 76 | def get_stan_data_dict(self): 77 | """ Return data dict for stan sampling method """ 78 | if self.modelp.model_str=='optfixedsig' or \ 79 | self.modelp.model_str=='sampfixedsig': 80 | return {'ig1':self.modelp.kernp.ig1, 'ig2':self.modelp.kernp.ig2, 81 | 'n1':self.modelp.kernp.n1, 'n2':self.modelp.kernp.n2, 82 | 'sigma':self.modelp.kernp.sigma, 'D':self.ndimx, 83 | 'N':len(self.data.X), 'y':self.data.y.flatten(), 84 | 'distmat':self.get_distmat(self.data.X, self.data.X)} 85 | elif self.modelp.model_str=='opt' or self.modelp.model_str=='samp': 86 | return {'ig1':self.modelp.kernp.ig1, 'ig2':self.modelp.kernp.ig2, 87 | 'n1':self.modelp.kernp.n1, 'n2':self.modelp.kernp.n2, 88 | 'n3':self.modelp.kernp.n3, 'n4':self.modelp.kernp.n4, 89 | 'D':self.ndimx, 'N':len(self.data.X), 'y':self.data.y.flatten(), 90 | 'distmat':self.get_distmat(self.data.X, self.data.X)} 91 | 92 | def get_distmat(self, xmat1, xmat2): 93 | """ Get distance matrix """ 94 | # For now, will compute squared euc distance * .5, on self.data.X 95 | return squared_euc_distmat(xmat1, xmat2, .5) 96 | 97 | def get_sample_list_from_stan_out(self, stanout): 98 | """ Convert stan output to sample_list """ 99 | if self.modelp.model_str=='optfixedsig': 100 | return [Namespace(ls=stanout['rho'], alpha=stanout['alpha'], 101 | sigma=self.modelp.kernp.sigma)] 102 | elif self.modelp.model_str=='opt': 103 | return [Namespace(ls=stanout['rho'], alpha=stanout['alpha'], 104 | sigma=stanout['sigma'])] 105 | elif self.modelp.model_str=='sampfixedsig': 106 | sdict = stanout.extract(['rho','alpha']) 107 | return [Namespace(ls=sdict['rho'][i], alpha=sdict['alpha'][i], 108 | sigma=self.modelp.kernp.sigma) for i in range(sdict['rho'].shape[0])] 109 | elif self.modelp.model_str=='samp': 110 | sdict = stanout.extract(['rho','alpha','sigma']) 111 | return [Namespace(ls=sdict['rho'][i], alpha=sdict['alpha'][i], 112 | sigma=sdict['sigma'][i]) for i in range(sdict['rho'].shape[0])] 113 | elif self.modelp.model_str=='fixedparam': 114 | return [Namespace(ls=self.modelp.kernp.ls, alpha=self.modelp.kernp.alpha, 115 | sigma=self.modelp.kernp.sigma)] 116 | 117 | def print_inference_result(self): 118 | """ Print results of stan inference """ 119 | if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' or \ 120 | self.modelp.model_str=='fixedparam': 121 | print('*ls pt est = '+str(self.sample_list[0].ls)+'.') 122 | print('*alpha pt est = '+str(self.sample_list[0].alpha)+'.') 123 | print('*sigma pt est = '+str(self.sample_list[0].sigma)+'.') 124 | elif self.modelp.model_str=='samp' or \ 125 | self.modelp.model_str=='sampfixedsig': 126 | ls_arr = np.array([ns.ls for ns in self.sample_list]) 127 | alpha_arr = np.array([ns.alpha for ns in self.sample_list]) 128 | sigma_arr = np.array([ns.sigma for ns in self.sample_list]) 129 | print('*ls mean = '+str(ls_arr.mean())+'.') 130 | print('*ls std = '+str(ls_arr.std())+'.') 131 | print('*alpha mean = '+str(alpha_arr.mean())+'.') 132 | print('*alpha std = '+str(alpha_arr.std())+'.') 133 | print('*sigma mean = '+str(sigma_arr.mean())+'.') 134 | print('*sigma std = '+str(sigma_arr.std())+'.') 135 | print('-----') 136 | 137 | def sample_pp_post_pred(self, nsamp, input_list, full_cov=False, nloop=None): 138 | """ Sample from posterior predictive of PP. 139 | Inputs: 140 | input_list - list of np arrays size=(-1,) 141 | Returns: 142 | list (len input_list) of np arrays (size=(nsamp,1)).""" 143 | if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' or \ 144 | self.modelp.model_str=='fixedparam': 145 | nloop = 1 146 | sampids = [0] 147 | elif self.modelp.model_str=='samp' or \ 148 | self.modelp.model_str=='sampfixedsig': 149 | if nloop is None: nloop=nsamp 150 | nsamp = int(nsamp/nloop) 151 | sampids = np.random.randint(len(self.sample_list), size=(nloop,)) 152 | ppred_list = [] 153 | for i in range(nloop): 154 | samp = self.sample_list[sampids[i]] 155 | postmu, postcov = self.gp_post(self.data.X, self.data.y, 156 | np.stack(input_list), samp.ls, samp.alpha, samp.sigma, full_cov) 157 | if full_cov: 158 | ppred_list.extend(list(sample_mvn(postmu, postcov, nsamp))) 159 | else: 160 | ppred_list.extend(list(np.random.normal(postmu.reshape(-1,), 161 | postcov.reshape(-1,), size=(nsamp, len(input_list))))) 162 | return list(np.stack(ppred_list).T), ppred_list 163 | 164 | def sample_pp_pred(self, nsamp, input_list, lv=None): 165 | """ Sample from predictive of PP for parameter lv. 166 | Returns: list (len input_list) of np arrays (size (nsamp,1)).""" 167 | x_pred = np.stack(input_list) 168 | if lv is None: 169 | if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' \ 170 | or self.modelp.model_str=='fixedparam': 171 | lv = self.sample_list[0] 172 | elif self.modelp.model_str=='samp' or \ 173 | self.modelp.model_str=='sampfixedsig': 174 | lv = self.sample_list[np.random.randint(len(self.sample_list))] 175 | postmu, postcov = self.gp_post(self.data.X, self.data.y, x_pred, lv.ls, 176 | lv.alpha, lv.sigma) 177 | pred_list = list(sample_mvn(postmu, postcov, 1)) ###TODO: sample from this mean nsamp times 178 | return list(np.stack(pred_list).T), pred_list 179 | 180 | def gp_post(self, x_train, y_train, x_pred, ls, alpha, sigma, full_cov=True): 181 | """ Compute parameters of GP posterior """ 182 | kernel = lambda a, b, c, d: kern_distmat(a, b, c, d, self.get_distmat) 183 | k11_nonoise = kernel(x_train, x_train, ls, alpha) 184 | lmat = get_cholesky_decomp(k11_nonoise, sigma, 'try_first') 185 | smat = solve_upper_triangular(lmat.T, solve_lower_triangular(lmat, y_train)) 186 | k21 = kernel(x_pred, x_train, ls, alpha) 187 | mu2 = k21.dot(smat) 188 | k22 = kernel(x_pred, x_pred, ls, alpha) 189 | vmat = solve_lower_triangular(lmat, k21.T) 190 | k2 = k22 - vmat.T.dot(vmat) 191 | if full_cov is False: 192 | k2 = np.sqrt(np.diag(k2)) 193 | return mu2, k2 194 | 195 | # Utilities 196 | def print_str(self): 197 | """ Print a description string """ 198 | print('*StanGpDistmatPP with modelp='+str(self.modelp)+'.') 199 | print('-----') 200 | -------------------------------------------------------------------------------- /bo/pp/stan/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for defining and compiling models in Stan. 3 | """ 4 | -------------------------------------------------------------------------------- /bo/pp/stan/compile_stan.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script to compile stan models 3 | """ 4 | 5 | #import pp_new.stan.gp_hier2 as gpstan 6 | #import pp_new.stan.gp_hier3 as gpstan 7 | #import pp_new.stan.gp_hier2_matern as gpstan 8 | import pp_new.stan.gp_distmat as gpstan 9 | #import pp_new.stan.gp_distmat_fixedsig as gpstan 10 | 11 | 12 | # Recompile model and return it 13 | model = gpstan.get_model(recompile=True) 14 | -------------------------------------------------------------------------------- /bo/pp/stan/gp_distmat.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions to define and compile PPs in Stan, for model: 3 | hierarchical GP (prior on rho, alpha, sigma) using a given distance matrix. 4 | """ 5 | 6 | import time 7 | import pickle 8 | import pystan 9 | 10 | def get_model(recompile=False, print_status=True): 11 | model_file_str = 'bo/pp/stan/hide_model/gp_distmat.pkl' 12 | 13 | if recompile: 14 | starttime = time.time() 15 | model = pystan.StanModel(model_code=get_model_code()) 16 | buildtime = time.time()-starttime 17 | with open(model_file_str,'wb') as f: 18 | pickle.dump(model, f) 19 | if print_status: 20 | print('*Time taken to compile = '+ str(buildtime) +' seconds.\n-----') 21 | print('*Model saved in file ' + model_file_str + '.\n-----') 22 | else: 23 | model = pickle.load(open(model_file_str,'rb')) 24 | if print_status: 25 | print('*Model loaded from file ' + model_file_str + '.\n-----') 26 | return model 27 | 28 | 29 | def get_model_code(): 30 | """ Parse modelp and return stan model code """ 31 | return """ 32 | data { 33 | int N; 34 | matrix[N, N] distmat; 35 | vector[N] y; 36 | real ig1; 37 | real ig2; 38 | real n1; 39 | real n2; 40 | real n3; 41 | real n4; 42 | } 43 | 44 | parameters { 45 | real rho; 46 | real alpha; 47 | real sigma; 48 | } 49 | 50 | model { 51 | matrix[N, N] cov = square(alpha) * exp(-distmat / square(rho)) 52 | + diag_matrix(rep_vector(square(sigma), N)); 53 | matrix[N, N] L_cov = cholesky_decompose(cov); 54 | rho ~ inv_gamma(ig1, ig2); 55 | alpha ~ normal(n1, n2); 56 | sigma ~ normal(n3, n4); 57 | y ~ multi_normal_cholesky(rep_vector(0, N), L_cov); 58 | } 59 | """ 60 | 61 | if __name__ == '__main__': 62 | get_model() 63 | -------------------------------------------------------------------------------- /bo/pp/stan/gp_distmat_fixedsig.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions to define and compile PPs in Stan, for model: 3 | hierarchical GP (prior on rho, alpha) and fixed sigma, using a given 4 | distance matrix. 5 | """ 6 | 7 | import time 8 | import pickle 9 | import pystan 10 | 11 | def get_model(recompile=False, print_status=True): 12 | model_file_str = 'bo/pp/stan/hide_model/gp_distmat_fixedsig.pkl' 13 | 14 | if recompile: 15 | starttime = time.time() 16 | model = pystan.StanModel(model_code=get_model_code()) 17 | buildtime = time.time()-starttime 18 | with open(model_file_str,'wb') as f: 19 | pickle.dump(model, f) 20 | if print_status: 21 | print('*Time taken to compile = '+ str(buildtime) +' seconds.\n-----') 22 | print('*Model saved in file ' + model_file_str + '.\n-----') 23 | else: 24 | model = pickle.load(open(model_file_str,'rb')) 25 | if print_status: 26 | print('*Model loaded from file ' + model_file_str + '.\n-----') 27 | return model 28 | 29 | 30 | def get_model_code(): 31 | """ Parse modelp and return stan model code """ 32 | return """ 33 | data { 34 | int N; 35 | matrix[N, N] distmat; 36 | vector[N] y; 37 | real ig1; 38 | real ig2; 39 | real n1; 40 | real n2; 41 | real sigma; 42 | } 43 | 44 | parameters { 45 | real rho; 46 | real alpha; 47 | } 48 | 49 | model { 50 | matrix[N, N] cov = square(alpha) * exp(-distmat / square(rho)) 51 | + diag_matrix(rep_vector(square(sigma), N)); 52 | matrix[N, N] L_cov = cholesky_decompose(cov); 53 | rho ~ inv_gamma(ig1, ig2); 54 | alpha ~ normal(n1, n2); 55 | y ~ multi_normal_cholesky(rep_vector(0, N), L_cov); 56 | } 57 | """ 58 | 59 | if __name__ == '__main__': 60 | get_model() 61 | -------------------------------------------------------------------------------- /bo/pp/stan/gp_hier2.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions to define and compile PPs in Stan, for model: 3 | hierarchical GP (prior on rho, alpha, sigma) 4 | """ 5 | 6 | import time 7 | import pickle 8 | import pystan 9 | 10 | def get_model(recompile=False, print_status=True): 11 | model_file_str = 'bo/pp/stan/hide_model/gp_hier2.pkl' 12 | 13 | if recompile: 14 | starttime = time.time() 15 | model = pystan.StanModel(model_code=get_model_code()) 16 | buildtime = time.time()-starttime 17 | with open(model_file_str,'wb') as f: 18 | pickle.dump(model, f) 19 | if print_status: 20 | print('*Time taken to compile = '+ str(buildtime) +' seconds.\n-----') 21 | print('*Model saved in file ' + model_file_str + '.\n-----') 22 | else: 23 | model = pickle.load(open(model_file_str,'rb')) 24 | if print_status: 25 | print('*Model loaded from file ' + model_file_str + '.\n-----') 26 | return model 27 | 28 | 29 | def get_model_code(): 30 | """ Parse modelp and return stan model code """ 31 | return """ 32 | data { 33 | int D; 34 | int N; 35 | vector[D] x[N]; 36 | vector[N] y; 37 | real ig1; 38 | real ig2; 39 | real n1; 40 | real n2; 41 | real n3; 42 | real n4; 43 | } 44 | 45 | parameters { 46 | real rho; 47 | real alpha; 48 | real sigma; 49 | } 50 | 51 | model { 52 | matrix[N, N] cov = cov_exp_quad(x, alpha, rho) 53 | + diag_matrix(rep_vector(square(sigma), N)); 54 | matrix[N, N] L_cov = cholesky_decompose(cov); 55 | rho ~ inv_gamma(ig1, ig2); 56 | alpha ~ normal(n1, n2); 57 | sigma ~ normal(n3, n4); 58 | y ~ multi_normal_cholesky(rep_vector(0, N), L_cov); 59 | } 60 | """ 61 | 62 | if __name__ == '__main__': 63 | get_model() 64 | -------------------------------------------------------------------------------- /bo/pp/stan/gp_hier2_matern.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions to define and compile PPs in Stan, for model: hierarchical GP (prior 3 | on rho, alpha, sigma), with matern kernel 4 | """ 5 | 6 | import time 7 | import pickle 8 | import pystan 9 | 10 | def get_model(recompile=False, print_status=True): 11 | model_file_str = 'bo/pp/stan/hide_model/gp_hier2_matern.pkl' 12 | 13 | if recompile: 14 | starttime = time.time() 15 | model = pystan.StanModel(model_code=get_model_code()) 16 | buildtime = time.time()-starttime 17 | with open(model_file_str,'wb') as f: 18 | pickle.dump(model, f) 19 | if print_status: 20 | print('*Time taken to compile = '+ str(buildtime) +' seconds.\n-----') 21 | print('*Model saved in file ' + model_file_str + '.\n-----') 22 | else: 23 | model = pickle.load(open(model_file_str,'rb')) 24 | if print_status: 25 | print('*Model loaded from file ' + model_file_str + '.\n-----') 26 | return model 27 | 28 | 29 | def get_model_code(): 30 | """ Parse modelp and return stan model code """ 31 | return """ 32 | functions { 33 | matrix distance_matrix_single(int N, vector[] x) { 34 | matrix[N, N] distmat; 35 | for(i in 1:(N-1)) { 36 | for(j in (i+1):N) { 37 | distmat[i, j] = distance(x[i], x[j]); 38 | } 39 | } 40 | return distmat; 41 | } 42 | 43 | matrix matern_covariance(int N, matrix dist, real ls, real alpha_sq, int COVFN) { 44 | matrix[N,N] S; 45 | real dist_ls; 46 | real sqrt3; 47 | real sqrt5; 48 | sqrt3=sqrt(3.0); 49 | sqrt5=sqrt(5.0); 50 | 51 | // exponential == Matern nu=1/2 , (p=0; nu=p+1/2) 52 | if (COVFN==1) { 53 | for(i in 1:(N-1)) { 54 | for(j in (i+1):N) { 55 | dist_ls = fabs(dist[i,j])/ls; 56 | S[i,j] = alpha_sq * exp(- dist_ls ); 57 | } 58 | } 59 | } 60 | 61 | // Matern nu= 3/2 covariance 62 | else if (COVFN==2) { 63 | for(i in 1:(N-1)) { 64 | for(j in (i+1):N) { 65 | dist_ls = fabs(dist[i,j])/ls; 66 | S[i,j] = alpha_sq * (1 + sqrt3 * dist_ls) * exp(-sqrt3 * dist_ls); 67 | } 68 | } 69 | } 70 | 71 | // Matern nu=5/2 covariance 72 | else if (COVFN==3) { 73 | for(i in 1:(N-1)) { 74 | for(j in (i+1):N) { 75 | dist_ls = fabs(dist[i,j])/ls; 76 | S[i,j] = alpha_sq * (1 + sqrt5 *dist_ls + 5* pow(dist_ls,2)/3) * exp(-sqrt5 *dist_ls); 77 | } 78 | } 79 | } 80 | 81 | // Matern as nu->Inf become Gaussian (aka squared exponential cov) 82 | else if (COVFN==4) { 83 | for(i in 1:(N-1)) { 84 | for(j in (i+1):N) { 85 | dist_ls = fabs(dist[i,j])/ls; 86 | S[i,j] = alpha_sq * exp( -pow(dist_ls,2)/2 ) ; 87 | } 88 | } 89 | } 90 | 91 | // fill upper triangle 92 | for(i in 1:(N-1)) { 93 | for(j in (i+1):N) { 94 | S[j,i] = S[i,j]; 95 | } 96 | } 97 | 98 | // create diagonal: nugget(nonspatial) + spatial variance + eps ensures positive definiteness 99 | for(i in 1:N) { 100 | S[i,i] = alpha_sq; 101 | } 102 | 103 | return S; 104 | } 105 | } 106 | 107 | data { 108 | int D; 109 | int N; 110 | vector[D] x[N]; 111 | vector[N] y; 112 | real ig1; 113 | real ig2; 114 | real n1; 115 | real n2; 116 | real n3; 117 | real n4; 118 | int covid; 119 | } 120 | 121 | parameters { 122 | real rho; 123 | real alpha; 124 | real sigma; 125 | } 126 | 127 | model { 128 | matrix[N, N] distmat = distance_matrix_single(N, x); 129 | matrix[N, N] cov = matern_covariance(N, distmat, rho, square(alpha), covid); 130 | matrix[N, N] L_cov = cholesky_decompose(cov); 131 | rho ~ inv_gamma(ig1, ig2); 132 | alpha ~ normal(n1, n2); 133 | sigma ~ normal(n3, n4); 134 | y ~ multi_normal_cholesky(rep_vector(0, N), L_cov); 135 | } 136 | """ 137 | 138 | if __name__ == '__main__': 139 | get_model() 140 | -------------------------------------------------------------------------------- /bo/pp/stan/gp_hier3.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions to define and compile PPs in Stan, for model: 3 | hierarchical GP with uniform prior on rho, normal prior on alpha, 4 | and fixed sigma 5 | """ 6 | 7 | import time 8 | import pickle 9 | import pystan 10 | 11 | def get_model(recompile=False, print_status=True): 12 | model_file_str = 'bo/pp/stan/hide_model/gp_hier3.pkl' 13 | 14 | if recompile: 15 | starttime = time.time() 16 | model = pystan.StanModel(model_code=get_model_code()) 17 | buildtime = time.time()-starttime 18 | with open(model_file_str,'wb') as f: 19 | pickle.dump(model, f) 20 | if print_status: 21 | print('*Time taken to compile = '+ str(buildtime) +' seconds.\n-----') 22 | print('*Model saved in file ' + model_file_str + '.\n-----') 23 | else: 24 | model = pickle.load(open(model_file_str,'rb')) 25 | if print_status: 26 | print('*Model loaded from file ' + model_file_str + '.\n-----') 27 | return model 28 | 29 | 30 | def get_model_code(): 31 | """ Parse modelp and return stan model code """ 32 | return """ 33 | data { 34 | int D; 35 | int N; 36 | vector[D] x[N]; 37 | vector[N] y; 38 | real u1; 39 | real u2; 40 | real n1; 41 | real n2; 42 | real sigma; 43 | } 44 | 45 | parameters { 46 | real rho; 47 | real alpha; 48 | } 49 | 50 | model { 51 | matrix[N, N] cov = cov_exp_quad(x, alpha, rho) 52 | + diag_matrix(rep_vector(square(sigma), N)); 53 | matrix[N, N] L_cov = cholesky_decompose(cov); 54 | rho ~ uniform(u1, u2); 55 | alpha ~ normal(n1, n2); 56 | y ~ multi_normal_cholesky(rep_vector(0, N), L_cov); 57 | } 58 | """ 59 | 60 | if __name__ == '__main__': 61 | get_model() 62 | -------------------------------------------------------------------------------- /bo/util/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Miscellaneous utilities. 3 | """ 4 | -------------------------------------------------------------------------------- /bo/util/datatransform.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes for transforming data. 3 | """ 4 | 5 | from argparse import Namespace 6 | import numpy as np 7 | from sklearn.preprocessing import StandardScaler 8 | #import sklearn.preprocessing as sklp 9 | 10 | class DataTransformer(object): 11 | """ Class for transforming data """ 12 | 13 | def __init__(self, datamat, printflag=True): 14 | """ Constructor 15 | Parameters: 16 | datamat - numpy array (n x d) of data to be transformed 17 | """ 18 | self.datamat = datamat 19 | self.set_transformers() 20 | if printflag: 21 | self.print_str() 22 | 23 | def set_transformers(self): 24 | """ Set transformers using self.datamat """ 25 | self.ss = StandardScaler() 26 | self.ss.fit(self.datamat) 27 | 28 | def transform_data(self, datamat=None): 29 | """ Return transformed datamat (default self.datamat) """ 30 | if datamat is None: 31 | datamat = self.datamat 32 | return self.ss.transform(datamat) 33 | 34 | def inv_transform_data(self, datamat): 35 | """ Return inverse transform of datamat """ 36 | return self.ss.inverse_transform(datamat) 37 | 38 | def print_str(self): 39 | """ Print a description string """ 40 | print('*DataTransformer with self.datamat.shape = ' + 41 | str(self.datamat.shape) + '.') 42 | print('-----') 43 | -------------------------------------------------------------------------------- /bo/util/print_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities for printing and output 3 | """ 4 | 5 | import os 6 | 7 | class suppress_stdout_stderr(object): 8 | ''' A context manager for doing a "deep suppression" of stdout and stderr in 9 | Python, i.e. will suppress all print, even if the print originates in a 10 | compiled C/Fortran sub-function. 11 | This will not suppress raised exceptions, since exceptions are printed 12 | to stderr just before a script exits, and after the context manager has 13 | exited (at least, I think that is why it lets exceptions through). ''' 14 | def __init__(self): 15 | # Open a pair of null files 16 | self.null_fds = [os.open(os.devnull, os.O_RDWR) for x in range(2)] 17 | # Save the actual stdout (1) and stderr (2) file descriptors. 18 | self.save_fds = [os.dup(1), os.dup(2)] 19 | 20 | def __enter__(self): 21 | # Assign the null pointers to stdout and stderr. 22 | os.dup2(self.null_fds[0], 1) 23 | os.dup2(self.null_fds[1], 2) 24 | 25 | def __exit__(self, *_): 26 | # Re-assign the real stdout/stderr back to (1) and (2) 27 | os.dup2(self.save_fds[0], 1) 28 | os.dup2(self.save_fds[1], 2) 29 | # Close the null files 30 | for fd in self.null_fds + self.save_fds: 31 | os.close(fd) 32 | -------------------------------------------------------------------------------- /darts/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /darts/arch.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | import os 4 | import copy 5 | import random 6 | 7 | sys.path.append(os.path.expanduser('~/darts/cnn')) 8 | from train_class import Train 9 | 10 | OPS = ['none', 11 | 'max_pool_3x3', 12 | 'avg_pool_3x3', 13 | 'skip_connect', 14 | 'sep_conv_3x3', 15 | 'sep_conv_5x5', 16 | 'dil_conv_3x3', 17 | 'dil_conv_5x5' 18 | ] 19 | NUM_VERTICES = 4 20 | INPUT_1 = 'c_k-2' 21 | INPUT_2 = 'c_k-1' 22 | 23 | 24 | class Arch: 25 | 26 | def __init__(self, arch): 27 | self.arch = arch 28 | 29 | def serialize(self): 30 | return self.arch 31 | 32 | def query(self, epochs=50): 33 | trainer = Train() 34 | val_losses, test_losses = trainer.main(self.arch, epochs=epochs) 35 | val_loss = 100 - np.mean(val_losses) 36 | test_loss = 100 - test_losses[-1] 37 | return val_loss, test_loss 38 | 39 | @classmethod 40 | def random_arch(cls): 41 | # output a uniformly random architecture spec 42 | # from the DARTS repository 43 | # https://github.com/quark0/darts 44 | 45 | normal = [] 46 | reduction = [] 47 | for i in range(NUM_VERTICES): 48 | ops = np.random.choice(range(len(OPS)), NUM_VERTICES) 49 | 50 | #input nodes for conv 51 | nodes_in_normal = np.random.choice(range(i+2), 2, replace=False) 52 | #input nodes for reduce 53 | nodes_in_reduce = np.random.choice(range(i+2), 2, replace=False) 54 | 55 | normal.extend([(nodes_in_normal[0], ops[0]), (nodes_in_normal[1], ops[1])]) 56 | reduction.extend([(nodes_in_reduce[0], ops[2]), (nodes_in_reduce[1], ops[3])]) 57 | 58 | return (normal, reduction) 59 | 60 | def get_arch_list(self): 61 | # convert tuple to list so that it is mutable 62 | arch_list = [] 63 | for cell in self.arch: 64 | arch_list.append([]) 65 | for pair in cell: 66 | arch_list[-1].append([]) 67 | for num in pair: 68 | arch_list[-1][-1].append(num) 69 | return arch_list 70 | 71 | def mutate(self, edits): 72 | """ mutate a single arch """ 73 | # first convert tuple to array so that it is mutable 74 | mutation = self.get_arch_list() 75 | 76 | #make mutations 77 | for _ in range(edits): 78 | cell = np.random.choice(2) 79 | pair = np.random.choice(len(OPS)) 80 | num = np.random.choice(2) 81 | if num == 1: 82 | mutation[cell][pair][num] = np.random.choice(len(OPS)) 83 | else: 84 | inputs = pair // 2 + 2 85 | choice = np.random.choice(inputs) 86 | if pair % 2 == 0 and mutation[cell][pair+1][num] != choice: 87 | mutation[cell][pair][num] = choice 88 | elif pair % 2 != 0 and mutation[cell][pair-1][num] != choice: 89 | mutation[cell][pair][num] = choice 90 | 91 | return mutation 92 | 93 | def get_paths(self): 94 | """ return all paths from input to output """ 95 | 96 | path_builder = [[[], [], [], []], [[], [], [], []]] 97 | paths = [[], []] 98 | 99 | for i, cell in enumerate(self.arch): 100 | for j in range(len(OPS)): 101 | if cell[j][0] == 0: 102 | path = [INPUT_1, OPS[cell[j][1]]] 103 | path_builder[i][j//2].append(path) 104 | paths[i].append(path) 105 | elif cell[j][0] == 1: 106 | path = [INPUT_2, OPS[cell[j][1]]] 107 | path_builder[i][j//2].append(path) 108 | paths[i].append(path) 109 | else: 110 | for path in path_builder[i][cell[j][0] - 2]: 111 | path = [*path, OPS[cell[j][1]]] 112 | path_builder[i][j//2].append(path) 113 | paths[i].append(path) 114 | 115 | # check if there are paths of length >=5 116 | contains_long_path = [False, False] 117 | if max([len(path) for path in paths[0]]) >= 5: 118 | contains_long_path[0] = True 119 | if max([len(path) for path in paths[1]]) >= 5: 120 | contains_long_path[1] = True 121 | 122 | return paths, contains_long_path 123 | 124 | def get_path_indices(self, long_paths=True): 125 | """ 126 | compute the index of each path 127 | There are 4 * (8^0 + ... + 8^4) paths total 128 | If long_paths = False, we give a single boolean to all paths of 129 | size 4, so there are only 4 * (1 + 8^0 + ... + 8^3) paths 130 | """ 131 | paths, contains_long_path = self.get_paths() 132 | normal_paths, reduce_paths = paths 133 | num_ops = len(OPS) 134 | """ 135 | Compute the max number of paths per input per cell. 136 | Since there are two cells and two inputs per cell, 137 | total paths = 4 * max_paths 138 | """ 139 | if not long_paths: 140 | max_paths = 1 + sum([num_ops ** i for i in range(NUM_VERTICES)]) 141 | else: 142 | max_paths = sum([num_ops ** i for i in range(NUM_VERTICES + 1)]) 143 | path_indices = [] 144 | 145 | # set the base index based on the cell and the input 146 | for i, paths in enumerate((normal_paths, reduce_paths)): 147 | for path in paths: 148 | index = i * 2 * max_paths 149 | if path[0] == INPUT_2: 150 | index += max_paths 151 | 152 | # recursively compute the index of the path 153 | for j in range(NUM_VERTICES + 1): 154 | if j == len(path) - 1: 155 | path_indices.append(index) 156 | break 157 | elif j == (NUM_VERTICES - 1) and not long_paths: 158 | path_indices.append(2 * (i + 1) * max_paths - 1) 159 | break 160 | else: 161 | index += num_ops ** j * (OPS.index(path[j + 1]) + 1) 162 | 163 | return (tuple(path_indices), contains_long_path) 164 | 165 | def encode_paths(self, long_paths=True): 166 | # output one-hot encoding of paths 167 | path_indices, _ = self.get_path_indices(long_paths=long_paths) 168 | num_ops = len(OPS) 169 | 170 | if not long_paths: 171 | max_paths = 1 + sum([num_ops ** i for i in range(NUM_VERTICES)]) 172 | else: 173 | max_paths = sum([num_ops ** i for i in range(NUM_VERTICES + 1)]) 174 | 175 | path_encoding = np.zeros(4 * max_paths) 176 | for index in path_indices: 177 | path_encoding[index] = 1 178 | return path_encoding 179 | 180 | def path_distance(self, other): 181 | # compute the distance between two architectures 182 | # by comparing their path encodings 183 | return np.sum(np.array(self.encode_paths() != np.array(other.encode_paths()))) 184 | 185 | def get_neighborhood(self, shuffle=True): 186 | op_nbhd = [] 187 | edge_nbhd = [] 188 | 189 | for i, cell in enumerate(self.arch): 190 | for j, pair in enumerate(cell): 191 | 192 | # mutate the op 193 | available = [op for op in range(len(OPS)) if op != pair[1]] 194 | for op in available: 195 | new_arch = self.get_arch_list() 196 | new_arch[i][j][1] = op 197 | op_nbhd.append({'spec': new_arch}) 198 | 199 | # mutate the edge 200 | other = j + 1 - 2 * (j % 2) 201 | available = [edge for edge in range(j//2+2) \ 202 | if edge not in [cell[other][0], pair[0]]] 203 | 204 | for edge in available: 205 | new_arch = self.get_arch_list() 206 | new_arch[i][j][0] = edge 207 | edge_nbhd.append({'spec': new_arch}) 208 | 209 | if shuffle: 210 | random.shuffle(edge_nbhd) 211 | random.shuffle(op_nbhd) 212 | 213 | # 112 in edge nbhd, 24 in op nbhd 214 | # alternate one edge nbr per 4 op nbrs 215 | nbrs = [] 216 | op_idx = 0 217 | for i in range(len(edge_nbhd)): 218 | nbrs.append(edge_nbhd[i]) 219 | for j in range(4): 220 | nbrs.append(op_nbhd[op_idx]) 221 | op_idx += 1 222 | nbrs = [*nbrs, *op_nbhd[op_idx:]] 223 | 224 | return nbrs 225 | 226 | 227 | -------------------------------------------------------------------------------- /darts/local_search_runner.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import logging 4 | import sys 5 | import os 6 | import pickle 7 | import numpy as np 8 | import copy 9 | 10 | sys.path.append(os.path.expanduser('~/local_search')) 11 | os.environ['search_space'] = 'darts' 12 | from data import Data 13 | 14 | 15 | """ 16 | local_search_runner is used in run_experiments.sh 17 | """ 18 | 19 | def compute_next_arches(search_space, data, 20 | query=0, 21 | filepath='trained_spec', 22 | loss='val_loss_avg', 23 | k=1): 24 | new_dicts = [] 25 | best = -1 26 | best_val = 100 27 | last_chosen = -1 28 | for i, arch_dict in enumerate(data): 29 | if 'chosen' in arch_dict: 30 | last_chosen = i 31 | if arch_dict[loss] < best_val: 32 | best = i 33 | best_val = arch_dict[loss] 34 | 35 | new_chosen = -1 36 | if last_chosen == -1: 37 | # if we just finished the random initialization 38 | new_chosen = best 39 | 40 | if data[-1][loss] < data[last_chosen][loss]: 41 | # if the last architecture did better than its parent 42 | new_chosen = len(data) - 1 43 | 44 | print('last chosen', last_chosen, 'new chosen', new_chosen) 45 | if new_chosen >= 0: 46 | # get its neighbors and return them 47 | print('new chosen arch:', new_chosen, data[new_chosen][loss]) 48 | neighbors = search_space.get_nbhd(data[new_chosen]['spec']) 49 | neighbors = [nbr['spec'] for nbr in neighbors] 50 | dict_with_nbrs = copy.deepcopy(data[new_chosen]) 51 | dict_with_nbrs['neighbors'] = neighbors 52 | dict_with_nbrs['chosen'] = 1 53 | if 'parent' not in dict_with_nbrs: 54 | dict_with_nbrs['parent'] = last_chosen 55 | 56 | filename = '{}_{}.pkl'.format(filepath, dict_with_nbrs['index']) 57 | dict_with_nbrs['filepath'] = filename 58 | with open(filename, 'wb') as f: 59 | pickle.dump(dict_with_nbrs, f) 60 | 61 | for i in range(k): 62 | new_dicts.append({'spec':neighbors[i], 'parent':new_chosen}) 63 | return new_dicts 64 | 65 | # try more neighbors from the last chosen architecture 66 | neighbors = data[last_chosen]['neighbors'] 67 | if len(neighbors) <= len(data) - (last_chosen + 1): 68 | print('reached local minimum:', last_chosen, data[last_chosen][loss]) 69 | else: 70 | nbr_index = len(data) - (last_chosen + 1) 71 | for i in range(nbr_index, min(len(neighbors) - 1, nbr_index + k)): 72 | new_dicts.append({'spec':neighbors[i], 'parent':last_chosen}) 73 | return new_dicts 74 | 75 | def run_local_search(args): 76 | 77 | save_dir = '{}/'.format(args.experiment_name) 78 | if not os.path.exists(save_dir): 79 | os.mkdir(save_dir) 80 | 81 | query = args.query 82 | num_init = args.num_init 83 | k = args.k 84 | trained_prefix = args.trained_filename 85 | untrained_prefix = args.untrained_filename 86 | 87 | search_space = Data('darts') 88 | 89 | # if it's the first iteration, choose k arches at random to train 90 | if query == 0: 91 | print('about to generate {} random'.format(num_init)) 92 | data = search_space.generate_random_dataset(num=num_init, train=False) 93 | next_arches = [{'spec':d['spec']} for d in data] 94 | 95 | elif query < num_init: 96 | # if we're still training the initial arches, continue 97 | return 98 | 99 | else: 100 | # get the data from prior iterations from pickle files 101 | data = [] 102 | for i in range(query): 103 | 104 | filepath = '{}{}_{}.pkl'.format(save_dir, trained_prefix, i) 105 | with open(filepath, 'rb') as f: 106 | arch = pickle.load(f) 107 | data.append(arch) 108 | 109 | print('Iteration {}'.format(query)) 110 | print('Arches so far') 111 | for d in data: 112 | print(d['spec']) 113 | print('val_loss', d['val_loss_avg']) 114 | if 'chosen' in d and 'parent' in d: 115 | print('chosen', 'parent', d['parent']) 116 | elif 'chosen' in d: 117 | print('chosen') 118 | 119 | # run the meta neural net to output the next arches 120 | filepath = save_dir + trained_prefix 121 | next_arches = compute_next_arches(search_space, data, 122 | query=query, 123 | filepath=filepath, 124 | k=k) 125 | 126 | print('next arch(es)') 127 | print([arch['spec'] for arch in next_arches]) 128 | 129 | # output the new arches to pickle files 130 | num_to_train = k if query != 0 else num_init 131 | for i in range(num_to_train): 132 | index = query + i 133 | filepath = '{}{}_{}.pkl'.format(save_dir, untrained_prefix, index) 134 | next_arches[i]['index'] = index 135 | next_arches[i]['filepath'] = filepath 136 | with open(filepath, 'wb') as f: 137 | pickle.dump(next_arches[i], f) 138 | 139 | 140 | def main(args): 141 | 142 | #set up save dir 143 | save_dir = './' 144 | 145 | #set up logging 146 | log_format = '%(asctime)s %(message)s' 147 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, 148 | format=log_format, datefmt='%m/%d %I:%M:%S %p') 149 | fh = logging.FileHandler(os.path.join(save_dir, 'log.txt')) 150 | fh.setFormatter(logging.Formatter(log_format)) 151 | logging.getLogger().addHandler(fh) 152 | logging.info(args) 153 | 154 | run_local_search(args) 155 | 156 | if __name__ == "__main__": 157 | parser = argparse.ArgumentParser(description='Args for meta neural net') 158 | parser.add_argument('--experiment_name', type=str, default='ls_darts_test', help='Folder for input/output files') 159 | parser.add_argument('--trained_filename', type=str, default='trained_spec', help='name of input files') 160 | parser.add_argument('--untrained_filename', type=str, default='untrained_spec', help='name of output files') 161 | parser.add_argument('--query', type=int, default=0, help='What query is the algorithm on') 162 | parser.add_argument('--num_init', type=int, default=20, help='Number of initial random architectures') 163 | parser.add_argument('--k', type=int, default=1, help='Number of architectures per iteration') 164 | 165 | args = parser.parse_args() 166 | main(args) -------------------------------------------------------------------------------- /darts/run_experiments.sh: -------------------------------------------------------------------------------- 1 | 2 | param_str=test 3 | 4 | # set parameters based on the param string 5 | if [ $param_str = test ]; then 6 | num_init=2 7 | start_iteration=0 8 | end_iteration=10 9 | epochs=1 10 | experiment_name=ls_darts_test 11 | fi 12 | if [ $param_str = twentyfive ]; then 13 | num_init=10 14 | start_iteration=0 15 | end_iteration=200 16 | epochs=25 17 | experiment_name=ls_darts_twentyfive 18 | fi 19 | if [ $param_str = fifty ]; then 20 | num_init=10 21 | start_iteration=0 22 | end_iteration=100 23 | epochs=50 24 | experiment_name=ls_darts_fifty 25 | fi 26 | 27 | for query in $(seq $start_iteration $end_iteration) 28 | do 29 | 30 | echo about to run local search round $query 31 | python darts/local_search_runner.py --experiment_name $experiment_name \ 32 | --query $query --num_init $num_init 33 | 34 | untrained_filepath=$experiment_name/untrained_spec\_$query.pkl 35 | trained_filepath=$experiment_name/trained_spec\_$query.pkl 36 | 37 | echo about to train architecture $query 38 | 39 | python train_arch_runner.py --untrained_filepath $untrained_filepath \ 40 | --trained_filepath $trained_filepath --epochs $epochs >> training.out 41 | 42 | echo finished iteration $query 43 | done 44 | 45 | -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | import sys 4 | import os 5 | 6 | if 'search_space' not in os.environ or os.environ['search_space'] == 'nasbench': 7 | from nasbench import api 8 | from nas_bench.cell import Cell 9 | 10 | elif os.environ['search_space'] == 'darts': 11 | from darts.arch import Arch 12 | 13 | elif os.environ['search_space'][:12] == 'nasbench_201': 14 | from nas_201_api import NASBench201API as API 15 | from nas_bench_201.cell import Cell 16 | 17 | else: 18 | print('Invalid search space environ in data.py') 19 | sys.exit() 20 | 21 | 22 | class Data: 23 | 24 | def __init__(self, 25 | search_space, 26 | dataset='cifar10', 27 | nasbench_folder='./', 28 | loaded_nasbench=None): 29 | self.search_space = search_space 30 | self.dataset = dataset 31 | 32 | if loaded_nasbench: 33 | self.nasbench = loaded_nasbench 34 | elif search_space == 'nasbench': 35 | self.nasbench = api.NASBench(nasbench_folder + 'nasbench_only108.tfrecord') 36 | elif search_space == 'nasbench_201': 37 | self.nasbench = API(os.path.expanduser('~/nas-bench-201/NAS-Bench-201-v1_0-e61699.pth')) 38 | elif search_space != 'darts': 39 | print(search_space, 'is not a valid search space') 40 | sys.exit() 41 | 42 | def get_type(self): 43 | return self.search_space 44 | 45 | def query_arch(self, 46 | arch=None, 47 | train=True, 48 | encoding_type='path', 49 | cutoff=-1, 50 | deterministic=True, 51 | epochs=0): 52 | 53 | arch_dict = {} 54 | arch_dict['epochs'] = epochs 55 | if self.search_space in ['nasbench', 'nasbench_201']: 56 | if arch is None: 57 | arch = Cell.random_cell(self.nasbench) 58 | 59 | arch_dict['spec'] = arch 60 | 61 | if encoding_type == 'adj': 62 | encoding = Cell(**arch).encode_standard() 63 | elif encoding_type == 'path': 64 | encoding = Cell(**arch).encode_paths() 65 | elif encoding_type == 'trunc_path': 66 | encoding = Cell(**arch).encode_paths()[:cutoff] 67 | else: 68 | print('invalid encoding type') 69 | 70 | arch_dict['encoding'] = encoding 71 | 72 | if train: 73 | arch_dict['val_loss'] = Cell(**arch).get_val_loss(self.nasbench, 74 | deterministic=deterministic, 75 | dataset=self.dataset) 76 | arch_dict['test_loss'] = Cell(**arch).get_test_loss(self.nasbench, 77 | dataset=self.dataset) 78 | arch_dict['num_params'] = Cell(**arch).get_num_params(self.nasbench) 79 | arch_dict['val_per_param'] = (arch_dict['val_loss'] - 4.8) * (arch_dict['num_params'] ** 0.5) / 100 80 | 81 | else: 82 | if arch is None: 83 | arch = Arch.random_arch() 84 | 85 | arch_dict['spec'] = arch 86 | 87 | if encoding_type == 'path': 88 | encoding = Arch(arch).encode_paths() 89 | elif encoding_type == 'trunc_path': 90 | encoding = Arch(arch).encode_paths()[:cutoff] 91 | else: 92 | encoding = arch 93 | 94 | arch_dict['encoding'] = encoding 95 | 96 | if train: 97 | if epochs == 0: 98 | epochs = 50 99 | arch_dict['val_loss'], arch_dict['test_loss'] = Arch(arch).query(epochs=epochs) 100 | 101 | return arch_dict 102 | 103 | def mutate_arch(self, 104 | arch, 105 | mutation_rate=1.0): 106 | if self.search_space in ['nasbench', 'nasbench_201']: 107 | return Cell(**arch).mutate(self.nasbench, 108 | mutation_rate=mutation_rate) 109 | else: 110 | return Arch(arch).mutate(int(mutation_rate)) 111 | 112 | def get_hash(self, arch): 113 | # return the path indices of the architecture, used as a hash 114 | if self.search_space == 'nasbench': 115 | return Cell(**arch).get_path_indices() 116 | elif self.search_space == 'darts': 117 | return Arch(arch).get_path_indices()[0] 118 | else: 119 | return Cell(**arch).get_string() 120 | 121 | def generate_random_dataset(self, 122 | num=10, 123 | train=True, 124 | encoding_type='path', 125 | cutoff=-1, 126 | random='standard', 127 | allow_isomorphisms=False, 128 | deterministic_loss=True, 129 | patience_factor=5): 130 | """ 131 | create a dataset of randomly sampled architectues 132 | test for isomorphisms using a hash map of path indices 133 | use patience_factor to avoid infinite loops 134 | """ 135 | data = [] 136 | dic = {} 137 | tries_left = num * patience_factor 138 | while len(data) < num: 139 | tries_left -= 1 140 | if tries_left <= 0: 141 | break 142 | arch_dict = self.query_arch(train=train, 143 | encoding_type=encoding_type, 144 | cutoff=cutoff, 145 | deterministic=deterministic_loss) 146 | 147 | h = self.get_hash(arch_dict['spec']) 148 | if allow_isomorphisms or h not in dic: 149 | dic[h] = 1 150 | data.append(arch_dict) 151 | 152 | return data 153 | 154 | def get_candidates(self, 155 | data, 156 | num=100, 157 | acq_opt_type='mutation', 158 | encoding_type='path', 159 | cutoff=-1, 160 | loss='val_loss', 161 | patience_factor=5, 162 | deterministic_loss=True, 163 | num_arches_to_mutate=1, 164 | max_mutation_rate=1, 165 | allow_isomorphisms=False): 166 | """ 167 | Creates a set of candidate architectures with mutated and/or random architectures 168 | """ 169 | 170 | candidates = [] 171 | # set up hash map 172 | dic = {} 173 | for d in data: 174 | arch = d['spec'] 175 | h = self.get_hash(arch) 176 | dic[h] = 1 177 | 178 | if acq_opt_type in ['mutation', 'mutation_random']: 179 | # mutate architectures with the lowest loss 180 | best_arches = [arch['spec'] for arch in sorted(data, key=lambda i:i[loss])[:num_arches_to_mutate * patience_factor]] 181 | 182 | # stop when candidates is size num 183 | # use patience_factor instead of a while loop to avoid long or infinite runtime 184 | for arch in best_arches: 185 | if len(candidates) >= num: 186 | break 187 | for i in range(num // num_arches_to_mutate // max_mutation_rate): 188 | for rate in range(1, max_mutation_rate + 1): 189 | mutated = self.mutate_arch(arch, mutation_rate=rate) 190 | arch_dict = self.query_arch(mutated, 191 | train=False, 192 | encoding_type=encoding_type, 193 | cutoff=cutoff) 194 | h = self.get_hash(mutated) 195 | 196 | if allow_isomorphisms or h not in dic: 197 | dic[h] = 1 198 | candidates.append(arch_dict) 199 | 200 | if acq_opt_type in ['random', 'mutation_random']: 201 | # add randomly sampled architectures to the set of candidates 202 | for _ in range(num * patience_factor): 203 | if len(candidates) >= 2 * num: 204 | break 205 | 206 | arch_dict = self.query_arch(train=False, 207 | encoding_type=encoding_type, 208 | cutoff=cutoff) 209 | h = self.get_hash(arch_dict['spec']) 210 | 211 | if allow_isomorphisms or h not in dic: 212 | dic[h] = 1 213 | candidates.append(arch_dict) 214 | 215 | return candidates 216 | 217 | def remove_duplicates(self, candidates, data): 218 | # input: two sets of architectues: candidates and data 219 | # output: candidates with arches from data removed 220 | 221 | dic = {} 222 | for d in data: 223 | dic[self.get_hash(d['spec'])] = 1 224 | unduplicated = [] 225 | for candidate in candidates: 226 | if self.get_hash(candidate['spec']) not in dic: 227 | dic[self.get_hash(candidate['spec'])] = 1 228 | unduplicated.append(candidate) 229 | return unduplicated 230 | 231 | def encode_data(self, dicts): 232 | """ 233 | method used by metann_runner.py (for Arch) 234 | input: list of arch dictionary objects 235 | output: xtrain (encoded architectures), ytrain (val loss) 236 | """ 237 | data = [] 238 | 239 | for dic in dicts: 240 | arch = dic['spec'] 241 | encoding = Arch(arch).encode_paths() 242 | data.append((arch, encoding, dic['val_loss_avg'], None)) 243 | 244 | return data 245 | 246 | def get_arch_list(self, 247 | aux_file_path, 248 | iteridx=0, 249 | num_top_arches=5, 250 | max_edits=20, 251 | num_repeats=5, 252 | verbose=1): 253 | # Method used for gp_bayesopt 254 | 255 | if self.search_space == 'darts': 256 | print('get_arch_list only supported for nasbench and nasbench_201') 257 | sys.exit() 258 | 259 | # load the list of architectures chosen by bayesopt so far 260 | base_arch_list = pickle.load(open(aux_file_path, 'rb')) 261 | top_arches = [archtuple[0] for archtuple in base_arch_list[:num_top_arches]] 262 | if verbose: 263 | top_5_loss = [archtuple[1][0] for archtuple in base_arch_list[:min(5, len(base_arch_list))]] 264 | print('top 5 val losses {}'.format(top_5_loss)) 265 | 266 | # perturb the best k architectures 267 | dic = {} 268 | for archtuple in base_arch_list: 269 | path_indices = Cell(**archtuple[0]).get_path_indices() 270 | dic[path_indices] = 1 271 | 272 | new_arch_list = [] 273 | for arch in top_arches: 274 | for edits in range(1, max_edits): 275 | for _ in range(num_repeats): 276 | perturbation = Cell(**arch).perturb(self.nasbench, edits) 277 | path_indices = Cell(**perturbation).get_path_indices() 278 | if path_indices not in dic: 279 | dic[path_indices] = 1 280 | new_arch_list.append(perturbation) 281 | 282 | # make sure new_arch_list is not empty 283 | while len(new_arch_list) == 0: 284 | for _ in range(100): 285 | arch = Cell.random_cell(self.nasbench) 286 | path_indices = Cell(**arch).get_path_indices() 287 | if path_indices not in dic: 288 | dic[path_indices] = 1 289 | new_arch_list.append(arch) 290 | 291 | return new_arch_list 292 | 293 | @classmethod 294 | def generate_distance_matrix(cls, arches_1, arches_2, distance): 295 | # Method used for gp_bayesopt for nasbench 296 | matrix = np.zeros([len(arches_1), len(arches_2)]) 297 | for i, arch_1 in enumerate(arches_1): 298 | for j, arch_2 in enumerate(arches_2): 299 | if distance == 'edit_distance': 300 | matrix[i][j] = Cell(**arch_1).edit_distance(Cell(**arch_2)) 301 | elif distance == 'path_distance': 302 | matrix[i][j] = Cell(**arch_1).path_distance(Cell(**arch_2)) 303 | elif distance == 'trunc_path_distance': 304 | matrix[i][j] = Cell(**arch_1).path_distance(Cell(**arch_2)) 305 | elif distance == 'nasbot_distance': 306 | matrix[i][j] = Cell(**arch_1).nasbot_distance(Cell(**arch_2)) 307 | else: 308 | print('{} is an invalid distance'.format(distance)) 309 | sys.exit() 310 | return matrix 311 | 312 | def get_nbhd(self, arch): 313 | if self.search_space in ['nasbench', 'nasbench_201']: 314 | return Cell(**arch).get_neighborhood(self.nasbench) 315 | else: 316 | return Arch(arch).get_neighborhood() 317 | -------------------------------------------------------------------------------- /img/local_search_fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/local_search_fig.png -------------------------------------------------------------------------------- /img/ls_101_titled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/ls_101_titled.png -------------------------------------------------------------------------------- /img/ls_baselines_101.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/ls_baselines_101.png -------------------------------------------------------------------------------- /img/ls_cifar10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/ls_cifar10.png -------------------------------------------------------------------------------- /img/ls_cifar100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/ls_cifar100.png -------------------------------------------------------------------------------- /img/ls_cifar10_titled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/ls_cifar10_titled.png -------------------------------------------------------------------------------- /img/ls_imagenet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/ls_imagenet.png -------------------------------------------------------------------------------- /img/real_synth_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/real_synth_data.png -------------------------------------------------------------------------------- /img/structured.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/structured.png -------------------------------------------------------------------------------- /img/uniform_preimages.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/uniform_preimages.png -------------------------------------------------------------------------------- /img/unstructured.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/unstructured.png -------------------------------------------------------------------------------- /meta_neural_net.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import itertools 3 | import os 4 | import random 5 | import sys 6 | 7 | import numpy as np 8 | from matplotlib import pyplot as plt 9 | from tensorflow import keras 10 | import tensorflow as tf 11 | from tensorflow.keras import backend as K 12 | from tensorflow.keras.models import Sequential 13 | from tensorflow.keras.optimizers import Adam 14 | 15 | def mle_loss(y_true, y_pred): 16 | # Minimum likelihood estimate loss function 17 | mean = tf.slice(y_pred, [0, 0], [-1, 1]) 18 | var = tf.slice(y_pred, [0, 1], [-1, 1]) 19 | return 0.5 * tf.log(2*np.pi*var) + tf.square(y_true - mean) / (2*var) 20 | 21 | 22 | def mape_loss(y_true, y_pred): 23 | # Minimum absolute percentage error loss function 24 | lower_bound = 4.5 25 | fraction = tf.math.divide(tf.subtract(y_pred, lower_bound), \ 26 | tf.subtract(y_true, lower_bound)) 27 | return tf.abs(tf.subtract(fraction, 1)) 28 | 29 | 30 | class MetaNeuralnet: 31 | 32 | def get_dense_model(self, 33 | input_dims, 34 | num_layers, 35 | layer_width, 36 | loss, 37 | regularization): 38 | input_layer = keras.layers.Input(input_dims) 39 | model = keras.models.Sequential() 40 | 41 | for _ in range(num_layers): 42 | model.add(keras.layers.Dense(layer_width, activation='relu')) 43 | 44 | model = model(input_layer) 45 | if loss == 'mle': 46 | mean = keras.layers.Dense(1)(model) 47 | var = keras.layers.Dense(1)(model) 48 | var = keras.layers.Activation(tf.math.softplus)(var) 49 | output = keras.layers.concatenate([mean, var]) 50 | else: 51 | if regularization == 0: 52 | output = keras.layers.Dense(1)(model) 53 | else: 54 | reg = keras.regularizers.l1(regularization) 55 | output = keras.layers.Dense(1, kernel_regularizer=reg)(model) 56 | 57 | dense_net = keras.models.Model(inputs=input_layer, outputs=output) 58 | return dense_net 59 | 60 | def fit(self, xtrain, ytrain, 61 | num_layers=10, 62 | layer_width=20, 63 | loss='mae', 64 | epochs=200, 65 | batch_size=32, 66 | lr=.01, 67 | verbose=0, 68 | regularization=0, 69 | **kwargs): 70 | 71 | if loss == 'mle': 72 | loss_fn = mle_loss 73 | elif loss == 'mape': 74 | loss_fn = mape_loss 75 | else: 76 | loss_fn = 'mae' 77 | 78 | self.model = self.get_dense_model((xtrain.shape[1],), 79 | loss=loss_fn, 80 | num_layers=num_layers, 81 | layer_width=layer_width, 82 | regularization=regularization) 83 | optimizer = keras.optimizers.Adam(lr=lr, beta_1=.9, beta_2=.99) 84 | 85 | self.model.compile(optimizer=optimizer, loss=loss_fn) 86 | #print(self.model.summary()) 87 | self.model.fit(xtrain, ytrain, 88 | batch_size=batch_size, 89 | epochs=epochs, 90 | verbose=verbose) 91 | 92 | train_pred = np.squeeze(self.model.predict(xtrain)) 93 | train_error = np.mean(abs(train_pred-ytrain)) 94 | return train_error 95 | 96 | def predict(self, xtest): 97 | return self.model.predict(xtest) 98 | -------------------------------------------------------------------------------- /meta_neuralnet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Train a Meta Neural Network on NASBench\n", 8 | "## Predict the accuracy of neural networks to within one percent!" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "%load_ext autoreload\n", 18 | "%autoreload 2\n", 19 | "%matplotlib inline" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import numpy as np\n", 29 | "from matplotlib import pyplot as plt\n", 30 | "from nasbench import api\n", 31 | "\n", 32 | "from data import Data\n", 33 | "from meta_neural_net import MetaNeuralnet" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "# define a function to plot the meta neural networks\n", 43 | "\n", 44 | "def plot_meta_neuralnet(ytrain, train_pred, ytest, test_pred, max_disp=500, title=None):\n", 45 | " \n", 46 | " plt.scatter(ytrain[:max_disp], train_pred[:max_disp], label='training data', alpha=0.7, s=64)\n", 47 | " plt.scatter(ytest[:max_disp], test_pred[:max_disp], label = 'test data', alpha=0.7, marker='^')\n", 48 | "\n", 49 | " # axis limits\n", 50 | " plt.xlim((5, 15))\n", 51 | " plt.ylim((5, 15))\n", 52 | " ax_lim = np.array([np.min([plt.xlim()[0], plt.ylim()[0]]),\n", 53 | " np.max([plt.xlim()[1], plt.ylim()[1]])])\n", 54 | " plt.xlim(ax_lim)\n", 55 | " plt.ylim(ax_lim)\n", 56 | " \n", 57 | " # 45-degree line\n", 58 | " plt.plot(ax_lim, ax_lim, 'k:') \n", 59 | " \n", 60 | " plt.gca().set_aspect('equal', adjustable='box')\n", 61 | " plt.title(title)\n", 62 | " plt.legend(loc='best')\n", 63 | " plt.xlabel('true percent error')\n", 64 | " plt.ylabel('predicted percent error')\n", 65 | " plt.show()" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "scrolled": false 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "# load the NASBench dataset\n", 77 | "# takes about 1 minute to load the nasbench dataset\n", 78 | "search_space = Data('nasbench')\n" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "# method which runs a meta neural network experiment\n", 88 | "def meta_neuralnet_experiment(params, \n", 89 | " ns=[100, 500], \n", 90 | " num_ensemble=3, \n", 91 | " test_size=500,\n", 92 | " cutoff=40,\n", 93 | " plot=True):\n", 94 | " \n", 95 | " for n in ns:\n", 96 | " for encoding_type in ['adj', 'path']:\n", 97 | "\n", 98 | " train_data = search_space.generate_random_dataset(num=n, \n", 99 | " encoding_type=encoding_type,\n", 100 | " cutoff=cutoff)\n", 101 | " \n", 102 | " test_data = search_space.generate_random_dataset(num=test_size, \n", 103 | " encoding_type=encoding_type,\n", 104 | " cutoff=cutoff)\n", 105 | " \n", 106 | " print(len(test_data))\n", 107 | " test_data = search_space.remove_duplicates(test_data, train_data)\n", 108 | " print(len(test_data))\n", 109 | " \n", 110 | " xtrain = np.array([d['encoding'] for d in train_data])\n", 111 | " ytrain = np.array([d['val_loss'] for d in train_data])\n", 112 | "\n", 113 | " xtest = np.array([d['encoding'] for d in test_data])\n", 114 | " ytest = np.array([d['val_loss'] for d in test_data])\n", 115 | "\n", 116 | " train_errors = []\n", 117 | " test_errors = []\n", 118 | " meta_neuralnet = MetaNeuralnet()\n", 119 | " for _ in range(num_ensemble): \n", 120 | " meta_neuralnet.fit(xtrain, ytrain, **params)\n", 121 | " train_pred = np.squeeze(meta_neuralnet.predict(xtrain))\n", 122 | " train_error = np.mean(abs(train_pred-ytrain))\n", 123 | " train_errors.append(train_error)\n", 124 | " test_pred = np.squeeze(meta_neuralnet.predict(xtest)) \n", 125 | " test_error = np.mean(abs(test_pred-ytest))\n", 126 | " test_errors.append(test_error)\n", 127 | "\n", 128 | " train_error = np.round(np.mean(train_errors, axis=0), 3)\n", 129 | " test_error = np.round(np.mean(test_errors, axis=0), 3)\n", 130 | " print('Meta neuralnet training size: {}, encoding type: {}'.format(n, encoding_type))\n", 131 | " print('Train error: {}, test error: {}'.format(train_error, test_error))\n", 132 | "\n", 133 | " if plot:\n", 134 | " if encoding_type == 'path':\n", 135 | " title = 'Path encoding, training set size {}'.format(n)\n", 136 | " else:\n", 137 | " title = 'Adjacency list encoding, training set size {}'.format(n) \n", 138 | "\n", 139 | " plot_meta_neuralnet(ytrain, train_pred, ytest, test_pred, title=title)\n", 140 | " plt.show() \n", 141 | " print('correlation', np.corrcoef(ytest, test_pred)[1,0])" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "meta_neuralnet_params = {'loss':'mae', 'num_layers':10, 'layer_width':20, 'epochs':200, \\\n", 151 | " 'batch_size':32, 'lr':.01, 'regularization':0, 'verbose':0}\n" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": { 158 | "scrolled": false 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "meta_neuralnet_experiment(meta_neuralnet_params)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [] 171 | } 172 | ], 173 | "metadata": { 174 | "kernelspec": { 175 | "display_name": "Python 3", 176 | "language": "python", 177 | "name": "python3" 178 | }, 179 | "language_info": { 180 | "codemirror_mode": { 181 | "name": "ipython", 182 | "version": 3 183 | }, 184 | "file_extension": ".py", 185 | "mimetype": "text/x-python", 186 | "name": "python", 187 | "nbconvert_exporter": "python", 188 | "pygments_lexer": "ipython3", 189 | "version": "3.7.7" 190 | } 191 | }, 192 | "nbformat": 4, 193 | "nbformat_minor": 2 194 | } 195 | -------------------------------------------------------------------------------- /metann_runner.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import logging 4 | import sys 5 | import os 6 | import pickle 7 | import numpy as np 8 | 9 | from acquisition_functions import acq_fn 10 | from data import Data 11 | from meta_neural_net import MetaNeuralnet 12 | 13 | 14 | """ 15 | meta neural net runner is used in run_experiments_parallel 16 | 17 | - loads data by opening k*i pickle files from previous iterations 18 | - trains a meta neural network and predicts accuracy of all candidates 19 | - outputs k pickle files of the architecture to be trained next 20 | """ 21 | 22 | def run_meta_neuralnet(search_space, dicts, 23 | k=10, 24 | verbose=1, 25 | num_ensemble=5, 26 | epochs=10000, 27 | lr=0.00001, 28 | loss='scaled', 29 | explore_type='its', 30 | explore_factor=0.5): 31 | 32 | # data: list of arch dictionary objects 33 | # trains a meta neural network 34 | # returns list of k arch dictionary objects - the k best predicted 35 | 36 | results = [] 37 | meta_neuralnet = MetaNeuralnet() 38 | data = search_space.encode_data(dicts) 39 | xtrain = np.array([d[1] for d in data]) 40 | ytrain = np.array([d[2] for d in data]) 41 | 42 | candidates = search_space.get_candidates(data, 43 | acq_opt_type='mutation_random', 44 | encode_paths=True, 45 | allow_isomorphisms=True, 46 | deterministic_loss=None) 47 | 48 | xcandidates = np.array([c[1] for c in candidates]) 49 | candidates_specs = [c[0] for c in candidates] 50 | predictions = [] 51 | 52 | # train an ensemble of neural networks 53 | train_error = 0 54 | for _ in range(num_ensemble): 55 | meta_neuralnet = MetaNeuralnet() 56 | train_error += meta_neuralnet.fit(xtrain, ytrain, 57 | loss=loss, 58 | epochs=epochs, 59 | lr=lr) 60 | predictions.append(np.squeeze(meta_neuralnet.predict(xcandidates))) 61 | train_error /= num_ensemble 62 | if verbose: 63 | print('Meta neural net train error: {}'.format(train_error)) 64 | 65 | sorted_indices = acq_fn(predictions, explore_type) 66 | 67 | top_k_candidates = [candidates_specs[i] for i in sorted_indices[:k]] 68 | candidates_dict = [] 69 | for candidate in top_k_candidates: 70 | d = {} 71 | d['spec'] = candidate 72 | candidates_dict.append(d) 73 | 74 | return candidates_dict 75 | 76 | 77 | def run(args): 78 | 79 | save_dir = '{}/'.format(args.experiment_name) 80 | if not os.path.exists(save_dir): 81 | os.mkdir(save_dir) 82 | 83 | query = args.query 84 | k = args.k 85 | trained_prefix = args.trained_filename 86 | untrained_prefix = args.untrained_filename 87 | threshold = args.threshold 88 | 89 | search_space = Data('darts') 90 | 91 | # if it's the first iteration, choose k arches at random to train 92 | if query == 0: 93 | print('about to generate {} random'.format(k)) 94 | data = search_space.generate_random_dataset(num=k, train=False) 95 | arches = [d['spec'] for d in data] 96 | 97 | next_arches = [] 98 | for arch in arches: 99 | d = {} 100 | d['spec'] = arch 101 | next_arches.append(d) 102 | 103 | else: 104 | # get the data from prior iterations from pickle files 105 | data = [] 106 | for i in range(query): 107 | 108 | filepath = '{}{}_{}.pkl'.format(save_dir, trained_prefix, i) 109 | with open(filepath, 'rb') as f: 110 | arch = pickle.load(f) 111 | data.append(arch) 112 | 113 | print('Iteration {}'.format(query)) 114 | print('Data from last round') 115 | print(data) 116 | 117 | # run the meta neural net to output the next arches 118 | next_arches = run_meta_neuralnet(search_space, data, k=k) 119 | 120 | print('next batch') 121 | print(next_arches) 122 | 123 | # output the new arches to pickle files 124 | for i in range(k): 125 | index = query + i 126 | filepath = '{}{}_{}.pkl'.format(save_dir, untrained_prefix, index) 127 | next_arches[i]['index'] = index 128 | next_arches[i]['filepath'] = filepath 129 | with open(filepath, 'wb') as f: 130 | pickle.dump(next_arches[i], f) 131 | 132 | 133 | def main(args): 134 | 135 | #set up save dir 136 | save_dir = './' 137 | 138 | #set up logging 139 | log_format = '%(asctime)s %(message)s' 140 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, 141 | format=log_format, datefmt='%m/%d %I:%M:%S %p') 142 | fh = logging.FileHandler(os.path.join(save_dir, 'log.txt')) 143 | fh.setFormatter(logging.Formatter(log_format)) 144 | logging.getLogger().addHandler(fh) 145 | logging.info(args) 146 | 147 | run(args) 148 | 149 | if __name__ == "__main__": 150 | parser = argparse.ArgumentParser(description='Args for meta neural net') 151 | parser.add_argument('--experiment_name', type=str, default='darts_test', help='Folder for input/output files') 152 | parser.add_argument('--params', type=str, default='test', help='Which set of params to use') 153 | parser.add_argument('--query', type=int, default=0, help='Which query is Neural BayesOpt on') 154 | parser.add_argument('--trained_filename', type=str, default='trained_spec', help='name of input files') 155 | parser.add_argument('--untrained_filename', type=str, default='untrained_spec', help='name of output files') 156 | parser.add_argument('--k', type=int, default=10, help='number of arches to train per iteration') 157 | parser.add_argument('--threshold', type=int, default=20, help='throw out arches with val loss above threshold') 158 | 159 | args = parser.parse_args() 160 | main(args) -------------------------------------------------------------------------------- /nas_algorithms.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import os 3 | import pickle 4 | import sys 5 | import copy 6 | import numpy as np 7 | import tensorflow as tf 8 | from argparse import Namespace 9 | 10 | from data import Data 11 | 12 | 13 | def run_nas_algorithm(algo_params, search_space, mp): 14 | 15 | # run nas algorithm 16 | ps = copy.deepcopy(algo_params) 17 | algo_name = ps.pop('algo_name') 18 | 19 | if algo_name == 'random': 20 | data = random_search(search_space, **ps) 21 | elif algo_name == 'evolution': 22 | data = evolution_search(search_space, **ps) 23 | elif algo_name == 'bananas': 24 | data = bananas(search_space, mp, **ps) 25 | elif algo_name == 'gp_bayesopt': 26 | data = gp_bayesopt_search(search_space, **ps) 27 | elif algo_name == 'dngo': 28 | data = dngo_search(search_space, **ps) 29 | elif algo_name == 'local_search': 30 | data = local_search(search_space, **ps) 31 | else: 32 | print('invalid algorithm name') 33 | sys.exit() 34 | 35 | k = 10 36 | if 'k' in ps: 37 | k = ps['k'] 38 | total_queries = 150 39 | if 'total_queries' in ps: 40 | total_queries = ps['total_queries'] 41 | loss = 'val_loss' 42 | if 'loss' in ps: 43 | loss = ps['loss'] 44 | 45 | return compute_best_test_losses(data, k, total_queries, loss), data 46 | 47 | 48 | def compute_best_test_losses(data, k, total_queries, loss): 49 | """ 50 | Given full data from a completed nas algorithm, 51 | output the test error of the arch with the best val error 52 | after every multiple of k 53 | """ 54 | results = [] 55 | for query in range(k, total_queries + k, k): 56 | best_arch = sorted(data[:query], key=lambda i:i[loss])[0] 57 | test_error = best_arch['test_loss'] 58 | results.append((query, test_error)) 59 | 60 | return results 61 | 62 | 63 | def local_search(search_space, 64 | num_init=10, 65 | k=10, 66 | loss='val_loss', 67 | query_full_nbhd=True, 68 | stop_at_minimum=True, 69 | total_queries=500, 70 | deterministic=True, 71 | verbose=1): 72 | """ 73 | local search 74 | """ 75 | query_dict = {} 76 | iter_dict = {} 77 | data = [] 78 | query = 0 79 | 80 | while True: 81 | # loop over full runs of local search until we hit total_queries 82 | 83 | arch_dicts = [] 84 | while len(arch_dicts) < num_init: 85 | arch_dict = search_space.query_arch(deterministic=deterministic) 86 | 87 | if search_space.get_hash(arch_dict['spec']) not in query_dict: 88 | query_dict[search_space.get_hash(arch_dict['spec'])] = 1 89 | data.append(arch_dict) 90 | arch_dicts.append(arch_dict) 91 | query += 1 92 | if query >= total_queries: 93 | return data 94 | 95 | sorted_arches = sorted([(arch, arch[loss]) for arch in arch_dicts], key=lambda i:i[1]) 96 | arch_dict = sorted_arches[0][0] 97 | 98 | while True: 99 | # loop over iterations of local search until we hit a local minimum 100 | if verbose: 101 | print('starting iteration, query', query) 102 | iter_dict[search_space.get_hash(arch_dict['spec'])] = 1 103 | nbhd = search_space.get_nbhd(arch_dict['spec']) 104 | improvement = False 105 | nbhd_dicts = [] 106 | for nbr in nbhd: 107 | if search_space.get_hash(nbr) not in query_dict: 108 | query_dict[search_space.get_hash(nbr)] = 1 109 | nbr_dict = search_space.query_arch(nbr, deterministic=deterministic) 110 | data.append(nbr_dict) 111 | nbhd_dicts.append(nbr_dict) 112 | query += 1 113 | if query >= total_queries: 114 | return data 115 | if nbr_dict[loss] < arch_dict[loss]: 116 | improvement = True 117 | if not query_full_nbhd: 118 | arch_dict = nbr_dict 119 | break 120 | 121 | if not stop_at_minimum: 122 | sorted_data = sorted([(arch, arch[loss]) for arch in data], key=lambda i:i[1]) 123 | index = 0 124 | while search_space.get_hash(sorted_data[index][0]['spec']) in iter_dict: 125 | index += 1 126 | 127 | arch_dict = sorted_data[index][0] 128 | 129 | elif not improvement: 130 | break 131 | 132 | else: 133 | sorted_nbhd = sorted([(nbr, nbr[loss]) for nbr in nbhd_dicts], key=lambda i:i[1]) 134 | arch_dict = sorted_nbhd[0][0] 135 | 136 | if verbose: 137 | top_5_loss = sorted([d[loss] for d in data])[:min(5, len(data))] 138 | print('local_search, query {}, top 5 losses {}'.format(query, top_5_loss)) 139 | 140 | 141 | def random_search(search_space, 142 | total_queries=150, 143 | loss='val_loss', 144 | deterministic=True, 145 | verbose=1): 146 | """ 147 | random search 148 | """ 149 | data = search_space.generate_random_dataset(num=total_queries, 150 | encoding_type='adj', 151 | deterministic_loss=deterministic) 152 | 153 | if verbose: 154 | top_5_loss = sorted([d[loss] for d in data])[:min(5, len(data))] 155 | print('random, query {}, top 5 losses {}'.format(total_queries, top_5_loss)) 156 | return data 157 | 158 | 159 | def evolution_search(search_space, 160 | total_queries=150, 161 | num_init=10, 162 | k=10, 163 | loss='val_loss', 164 | population_size=30, 165 | tournament_size=10, 166 | mutation_rate=1.0, 167 | deterministic=True, 168 | regularize=True, 169 | verbose=1): 170 | """ 171 | regularized evolution 172 | """ 173 | data = search_space.generate_random_dataset(num=num_init, 174 | deterministic_loss=deterministic) 175 | 176 | losses = [d[loss] for d in data] 177 | query = num_init 178 | population = [i for i in range(min(num_init, population_size))] 179 | 180 | while query <= total_queries: 181 | 182 | # evolve the population by mutating the best architecture 183 | # from a random subset of the population 184 | sample = np.random.choice(population, tournament_size) 185 | best_index = sorted([(i, losses[i]) for i in sample], key=lambda i:i[1])[0][0] 186 | mutated = search_space.mutate_arch(data[best_index]['spec'], 187 | mutation_rate=mutation_rate) 188 | arch_dict = search_space.query_arch(mutated, deterministic=deterministic) 189 | data.append(arch_dict) 190 | losses.append(arch_dict[loss]) 191 | population.append(len(data) - 1) 192 | 193 | # kill the oldest (or worst) from the population 194 | if len(population) >= population_size: 195 | if regularize: 196 | oldest_index = sorted([i for i in population])[0] 197 | population.remove(oldest_index) 198 | else: 199 | worst_index = sorted([(i, losses[i]) for i in population], key=lambda i:i[1])[-1][0] 200 | population.remove(worst_index) 201 | 202 | if verbose and (query % k == 0): 203 | top_5_loss = sorted([d[loss] for d in data])[:min(5, len(data))] 204 | print('evolution, query {}, top 5 losses {}'.format(query, top_5_loss)) 205 | 206 | query += 1 207 | 208 | return data 209 | 210 | 211 | def bananas(search_space, 212 | metann_params, 213 | num_init=10, 214 | k=10, 215 | loss='val_loss', 216 | total_queries=150, 217 | num_ensemble=5, 218 | acq_opt_type='mutation', 219 | num_arches_to_mutate=1, 220 | explore_type='its', 221 | encoding_type='trunc_path', 222 | cutoff=40, 223 | deterministic=True, 224 | verbose=1): 225 | """ 226 | Bayesian optimization with a neural network model 227 | """ 228 | from acquisition_functions import acq_fn 229 | from meta_neural_net import MetaNeuralnet 230 | 231 | data = search_space.generate_random_dataset(num=num_init, 232 | encoding_type=encoding_type, 233 | cutoff=cutoff, 234 | deterministic_loss=deterministic) 235 | 236 | query = num_init + k 237 | 238 | while query <= total_queries: 239 | 240 | xtrain = np.array([d['encoding'] for d in data]) 241 | ytrain = np.array([d[loss] for d in data]) 242 | 243 | if (query == num_init + k) and verbose: 244 | print('bananas xtrain shape', xtrain.shape) 245 | print('bananas ytrain shape', ytrain.shape) 246 | 247 | # get a set of candidate architectures 248 | candidates = search_space.get_candidates(data, 249 | acq_opt_type=acq_opt_type, 250 | encoding_type=encoding_type, 251 | cutoff=cutoff, 252 | num_arches_to_mutate=num_arches_to_mutate, 253 | loss=loss, 254 | deterministic_loss=deterministic) 255 | 256 | xcandidates = np.array([c['encoding'] for c in candidates]) 257 | candidate_predictions = [] 258 | 259 | # train an ensemble of neural networks 260 | train_error = 0 261 | for _ in range(num_ensemble): 262 | meta_neuralnet = MetaNeuralnet() 263 | train_error += meta_neuralnet.fit(xtrain, ytrain, **metann_params) 264 | 265 | # predict the validation loss of the candidate architectures 266 | candidate_predictions.append(np.squeeze(meta_neuralnet.predict(xcandidates))) 267 | 268 | # clear the tensorflow graph 269 | tf.reset_default_graph() 270 | 271 | tf.keras.backend.clear_session() 272 | 273 | train_error /= num_ensemble 274 | if verbose: 275 | print('query {}, Meta neural net train error: {}'.format(query, train_error)) 276 | 277 | # compute the acquisition function for all the candidate architectures 278 | candidate_indices = acq_fn(candidate_predictions, explore_type) 279 | 280 | # add the k arches with the minimum acquisition function values 281 | for i in candidate_indices[:k]: 282 | 283 | arch_dict = search_space.query_arch(candidates[i]['spec'], 284 | encoding_type=encoding_type, 285 | cutoff=cutoff, 286 | deterministic=deterministic) 287 | data.append(arch_dict) 288 | 289 | if verbose: 290 | top_5_loss = sorted([(d[loss], d['epochs']) for d in data], key=lambda d: d[0])[:min(5, len(data))] 291 | print('bananas, query {}, top 5 losses (loss, test, epoch): {}'.format(query, top_5_loss)) 292 | recent_10_loss = [(d[loss], d['epochs']) for d in data[-10:]] 293 | print('bananas, query {}, most recent 10 (loss, test, epoch): {}'.format(query, recent_10_loss)) 294 | 295 | query += k 296 | 297 | return data 298 | 299 | 300 | def gp_bayesopt_search(search_space, 301 | num_init=10, 302 | k=10, 303 | total_queries=150, 304 | distance='edit_distance', 305 | deterministic=True, 306 | tmpdir='./temp', 307 | max_iter=200, 308 | mode='single_process', 309 | nppred=1000): 310 | """ 311 | Bayesian optimization with a GP prior 312 | """ 313 | from bo.bo.probo import ProBO 314 | 315 | # set up the path for auxiliary pickle files 316 | if not os.path.exists(tmpdir): 317 | os.mkdir(tmpdir) 318 | aux_file_path = os.path.join(tmpdir, 'aux.pkl') 319 | 320 | num_iterations = total_queries - num_init 321 | 322 | # black-box function that bayesopt will optimize 323 | def fn(arch): 324 | return search_space.query_arch(arch, deterministic=deterministic)['val_loss'] 325 | 326 | # set all the parameters for the various BayesOpt classes 327 | fhp = Namespace(fhstr='object', namestr='train') 328 | domp = Namespace(dom_str='list', set_domain_list_auto=True, 329 | aux_file_path=aux_file_path, 330 | distance=distance) 331 | modelp = Namespace(kernp=Namespace(ls=3., alpha=1.5, sigma=1e-5), 332 | infp=Namespace(niter=num_iterations, nwarmup=500), 333 | distance=distance, search_space=search_space.get_type()) 334 | amp = Namespace(am_str='mygpdistmat_ucb', nppred=nppred, modelp=modelp) 335 | optp = Namespace(opt_str='rand', max_iter=max_iter) 336 | makerp = Namespace(domp=domp, amp=amp, optp=optp) 337 | probop = Namespace(niter=num_iterations, fhp=fhp, 338 | makerp=makerp, tmpdir=tmpdir, mode=mode) 339 | data = Namespace() 340 | 341 | # Set up initial data 342 | init_data = search_space.generate_random_dataset(num=num_init, 343 | deterministic_loss=deterministic) 344 | data.X = [d['spec'] for d in init_data] 345 | data.y = np.array([[d['val_loss']] for d in init_data]) 346 | 347 | # initialize aux file 348 | pairs = [(data.X[i], data.y[i]) for i in range(len(data.y))] 349 | pairs.sort(key=lambda x: x[1]) 350 | with open(aux_file_path, 'wb') as f: 351 | pickle.dump(pairs, f) 352 | 353 | # run Bayesian Optimization 354 | bo = ProBO(fn, search_space, aux_file_path, data, probop, True) 355 | bo.run_bo() 356 | 357 | # get the validation and test loss for all architectures chosen by BayesOpt 358 | results = [] 359 | for arch in data.X: 360 | archtuple = search_space.query_arch(arch) 361 | results.append(archtuple) 362 | 363 | return results 364 | 365 | 366 | def dngo_search(search_space, 367 | num_init=10, 368 | k=10, 369 | loss='val_loss', 370 | total_queries=150, 371 | encoding_type='path', 372 | cutoff=40, 373 | acq_opt_type='mutation', 374 | explore_type='ucb', 375 | deterministic=True, 376 | verbose=True): 377 | 378 | import torch 379 | from pybnn import DNGO 380 | from pybnn.util.normalization import zero_mean_unit_var_normalization, zero_mean_unit_var_denormalization 381 | from acquisition_functions import acq_fn 382 | 383 | def fn(arch): 384 | return search_space.query_arch(arch, deterministic=deterministic)[loss] 385 | 386 | # set up initial data 387 | data = search_space.generate_random_dataset(num=num_init, 388 | encoding_type=encoding_type, 389 | cutoff=cutoff, 390 | deterministic_loss=deterministic) 391 | 392 | query = num_init + k 393 | 394 | while query <= total_queries: 395 | 396 | # set up data 397 | x = np.array([d['encoding'] for d in data]) 398 | y = np.array([d[loss] for d in data]) 399 | 400 | # get a set of candidate architectures 401 | candidates = search_space.get_candidates(data, 402 | acq_opt_type=acq_opt_type, 403 | encoding_type=encoding_type, 404 | cutoff=cutoff, 405 | deterministic_loss=deterministic) 406 | 407 | xcandidates = np.array([d['encoding'] for d in candidates]) 408 | 409 | # train the model 410 | model = DNGO(do_mcmc=False) 411 | model.train(x, y, do_optimize=True) 412 | 413 | predictions = model.predict(xcandidates) 414 | candidate_indices = acq_fn(np.array(predictions), explore_type) 415 | 416 | # add the k arches with the minimum acquisition function values 417 | for i in candidate_indices[:k]: 418 | arch_dict = search_space.query_arch(candidates[i]['spec'], 419 | encoding_type=encoding_type, 420 | cutoff=cutoff, 421 | deterministic=deterministic) 422 | data.append(arch_dict) 423 | 424 | if verbose: 425 | top_5_loss = sorted([(d[loss], d['epochs']) for d in data], key=lambda d: d[0])[:min(5, len(data))] 426 | print('dngo, query {}, top 5 val losses (val, test, epoch): {}'.format(query, top_5_loss)) 427 | recent_10_loss = [(d[loss], d['epochs']) for d in data[-10:]] 428 | print('dngo, query {}, most recent 10 (val, test, epoch): {}'.format(query, recent_10_loss)) 429 | 430 | query += k 431 | 432 | return data 433 | -------------------------------------------------------------------------------- /nas_bench/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /nas_bench/cell.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import copy 3 | import itertools 4 | import random 5 | import sys 6 | import os 7 | import pickle 8 | 9 | from nasbench import api 10 | 11 | 12 | INPUT = 'input' 13 | OUTPUT = 'output' 14 | CONV3X3 = 'conv3x3-bn-relu' 15 | CONV1X1 = 'conv1x1-bn-relu' 16 | MAXPOOL3X3 = 'maxpool3x3' 17 | OPS = [CONV3X3, CONV1X1, MAXPOOL3X3] 18 | 19 | NUM_VERTICES = 7 20 | OP_SPOTS = NUM_VERTICES - 2 21 | MAX_EDGES = 9 22 | 23 | 24 | class Cell: 25 | 26 | def __init__(self, matrix, ops): 27 | 28 | self.matrix = matrix 29 | self.ops = ops 30 | 31 | def serialize(self): 32 | return { 33 | 'matrix': self.matrix, 34 | 'ops': self.ops 35 | } 36 | 37 | def modelspec(self): 38 | return api.ModelSpec(matrix=self.matrix, ops=self.ops) 39 | 40 | @classmethod 41 | def random_cell(cls, nasbench): 42 | """ 43 | From the NASBench repository 44 | 45 | one-hot adjacency matrix 46 | draw [0,1] for each slot in the adjacency matrix 47 | """ 48 | while True: 49 | matrix = np.random.choice( 50 | [0, 1], size=(NUM_VERTICES, NUM_VERTICES)) 51 | matrix = np.triu(matrix, 1) 52 | ops = np.random.choice(OPS, size=NUM_VERTICES).tolist() 53 | ops[0] = INPUT 54 | ops[-1] = OUTPUT 55 | spec = api.ModelSpec(matrix=matrix, ops=ops) 56 | if nasbench.is_valid(spec): 57 | return { 58 | 'matrix': matrix, 59 | 'ops': ops 60 | } 61 | 62 | def get_val_loss(self, nasbench, deterministic=1, patience=50, epochs=None, dataset=None): 63 | if not deterministic: 64 | # output one of the three validation accuracies at random 65 | if epochs: 66 | return (100*(1 - nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops), epochs=epochs)['validation_accuracy'])) 67 | else: 68 | return (100*(1 - nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops))['validation_accuracy'])) 69 | else: 70 | # query the api until we see all three accuracies, then average them 71 | # a few architectures only have two accuracies, so we use patience to avoid an infinite loop 72 | accs = [] 73 | while len(accs) < 3 and patience > 0: 74 | patience -= 1 75 | if epochs: 76 | acc = nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops), epochs=epochs)['validation_accuracy'] 77 | else: 78 | acc = nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops))['validation_accuracy'] 79 | if acc not in accs: 80 | accs.append(acc) 81 | return round(100*(1-np.mean(accs)), 4) 82 | 83 | 84 | def get_test_loss(self, nasbench, patience=50, epochs=None, dataset=None): 85 | """ 86 | query the api until we see all three accuracies, then average them 87 | a few architectures only have two accuracies, so we use patience to avoid an infinite loop 88 | """ 89 | accs = [] 90 | while len(accs) < 3 and patience > 0: 91 | patience -= 1 92 | if epochs: 93 | acc = nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops), epochs=epochs)['test_accuracy'] 94 | else: 95 | acc = nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops))['test_accuracy'] 96 | if acc not in accs: 97 | accs.append(acc) 98 | return round(100*(1-np.mean(accs)), 4) 99 | 100 | def get_num_params(self, nasbench): 101 | return nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops))['trainable_parameters'] 102 | 103 | def perturb(self, nasbench, edits=1): 104 | """ 105 | create new perturbed cell 106 | inspird by https://github.com/google-research/nasbench 107 | """ 108 | new_matrix = copy.deepcopy(self.matrix) 109 | new_ops = copy.deepcopy(self.ops) 110 | for _ in range(edits): 111 | while True: 112 | if np.random.random() < 0.5: 113 | for src in range(0, NUM_VERTICES - 1): 114 | for dst in range(src+1, NUM_VERTICES): 115 | new_matrix[src][dst] = 1 - new_matrix[src][dst] 116 | else: 117 | for ind in range(1, NUM_VERTICES - 1): 118 | available = [op for op in OPS if op != new_ops[ind]] 119 | new_ops[ind] = np.random.choice(available) 120 | 121 | new_spec = api.ModelSpec(new_matrix, new_ops) 122 | if nasbench.is_valid(new_spec): 123 | break 124 | return { 125 | 'matrix': new_matrix, 126 | 'ops': new_ops 127 | } 128 | 129 | def mutate(self, 130 | nasbench, 131 | mutation_rate=1.0, 132 | patience=5000): 133 | """ 134 | A stochastic approach to perturbing the cell 135 | inspird by https://github.com/google-research/nasbench 136 | """ 137 | p = 0 138 | while p < patience: 139 | p += 1 140 | new_matrix = copy.deepcopy(self.matrix) 141 | new_ops = copy.deepcopy(self.ops) 142 | 143 | edge_mutation_prob = mutation_rate / (NUM_VERTICES * (NUM_VERTICES - 1) / 2) 144 | # flip each edge w.p. so expected flips is 1. same for ops 145 | for src in range(0, NUM_VERTICES - 1): 146 | for dst in range(src + 1, NUM_VERTICES): 147 | if random.random() < edge_mutation_prob: 148 | new_matrix[src, dst] = 1 - new_matrix[src, dst] 149 | 150 | op_mutation_prob = mutation_rate / OP_SPOTS 151 | for ind in range(1, OP_SPOTS + 1): 152 | if random.random() < op_mutation_prob: 153 | available = [o for o in OPS if o != new_ops[ind]] 154 | new_ops[ind] = random.choice(available) 155 | 156 | new_spec = api.ModelSpec(new_matrix, new_ops) 157 | if nasbench.is_valid(new_spec): 158 | return { 159 | 'matrix': new_matrix, 160 | 'ops': new_ops 161 | } 162 | return self.mutate(nasbench, mutation_rate+1) 163 | 164 | def encode_standard(self): 165 | """ 166 | compute the "standard" encoding, 167 | i.e. adjacency matrix + op list encoding 168 | """ 169 | encoding_length = (NUM_VERTICES ** 2 - NUM_VERTICES) // 2 + OP_SPOTS 170 | encoding = np.zeros((encoding_length)) 171 | dic = {CONV1X1: 0., CONV3X3: 0.5, MAXPOOL3X3: 1.0} 172 | n = 0 173 | for i in range(NUM_VERTICES - 1): 174 | for j in range(i+1, NUM_VERTICES): 175 | encoding[n] = self.matrix[i][j] 176 | n += 1 177 | for i in range(1, NUM_VERTICES - 1): 178 | encoding[-i] = dic[self.ops[i]] 179 | return tuple(encoding) 180 | 181 | def get_paths(self): 182 | """ 183 | return all paths from input to output 184 | """ 185 | paths = [] 186 | for j in range(0, NUM_VERTICES): 187 | paths.append([[]]) if self.matrix[0][j] else paths.append([]) 188 | 189 | # create paths sequentially 190 | for i in range(1, NUM_VERTICES - 1): 191 | for j in range(1, NUM_VERTICES): 192 | if self.matrix[i][j]: 193 | for path in paths[i]: 194 | paths[j].append([*path, self.ops[i]]) 195 | return paths[-1] 196 | 197 | def get_path_indices(self): 198 | """ 199 | compute the index of each path 200 | There are 3^0 + ... + 3^5 paths total. 201 | (Paths can be length 0 to 5, and for each path, for each node, there 202 | are three choices for the operation.) 203 | """ 204 | paths = self.get_paths() 205 | mapping = {CONV3X3: 0, CONV1X1: 1, MAXPOOL3X3: 2} 206 | path_indices = [] 207 | 208 | for path in paths: 209 | index = 0 210 | for i in range(NUM_VERTICES - 1): 211 | if i == len(path): 212 | path_indices.append(index) 213 | break 214 | else: 215 | index += len(OPS) ** i * (mapping[path[i]] + 1) 216 | 217 | path_indices.sort() 218 | return tuple(path_indices) 219 | 220 | def encode_paths(self): 221 | """ output one-hot encoding of paths """ 222 | num_paths = sum([len(OPS) ** i for i in range(OP_SPOTS + 1)]) 223 | path_indices = self.get_path_indices() 224 | encoding = np.zeros(num_paths) 225 | for index in path_indices: 226 | encoding[index] = 1 227 | return encoding 228 | 229 | def path_distance(self, other): 230 | """ 231 | compute the distance between two architectures 232 | by comparing their path encodings 233 | """ 234 | return np.sum(np.array(self.encode_paths() != np.array(other.encode_paths()))) 235 | 236 | def trunc_path_distance(self, other, cutoff=40): 237 | """ 238 | compute the distance between two architectures 239 | by comparing their path encodings 240 | """ 241 | encoding = self.encode_paths()[:cutoff] 242 | other_encoding = other.encode_paths()[:cutoff] 243 | return np.sum(np.array(encoding) != np.array(other_encoding)) 244 | 245 | def edit_distance(self, other): 246 | """ 247 | compute the distance between two architectures 248 | by comparing their adjacency matrices and op lists 249 | """ 250 | graph_dist = np.sum(np.array(self.matrix) != np.array(other.matrix)) 251 | ops_dist = np.sum(np.array(self.ops) != np.array(other.ops)) 252 | return (graph_dist + ops_dist) 253 | 254 | def nasbot_distance(self, other): 255 | # distance based on optimal transport between row sums, column sums, and ops 256 | 257 | row_sums = sorted(np.array(self.matrix).sum(axis=0)) 258 | col_sums = sorted(np.array(self.matrix).sum(axis=1)) 259 | 260 | other_row_sums = sorted(np.array(other.matrix).sum(axis=0)) 261 | other_col_sums = sorted(np.array(other.matrix).sum(axis=1)) 262 | 263 | row_dist = np.sum(np.abs(np.subtract(row_sums, other_row_sums))) 264 | col_dist = np.sum(np.abs(np.subtract(col_sums, other_col_sums))) 265 | 266 | counts = [self.ops.count(op) for op in OPS] 267 | other_counts = [other.ops.count(op) for op in OPS] 268 | 269 | ops_dist = np.sum(np.abs(np.subtract(counts, other_counts))) 270 | 271 | return (row_dist + col_dist + ops_dist) 272 | 273 | def get_utilized(self): 274 | # return the sets of utilized edges and nodes 275 | # first, compute all paths 276 | n = np.shape(self.matrix)[0] 277 | sub_paths = [] 278 | for j in range(0, n): 279 | sub_paths.append([[(0, j)]]) if self.matrix[0][j] else sub_paths.append([]) 280 | 281 | # create paths sequentially 282 | for i in range(1, n - 1): 283 | for j in range(1, n): 284 | if self.matrix[i][j]: 285 | for sub_path in sub_paths[i]: 286 | sub_paths[j].append([*sub_path, (i, j)]) 287 | paths = sub_paths[-1] 288 | 289 | utilized_edges = [] 290 | for path in paths: 291 | for edge in path: 292 | if edge not in utilized_edges: 293 | utilized_edges.append(edge) 294 | 295 | utilized_nodes = [] 296 | for i in range(NUM_VERTICES): 297 | for edge in utilized_edges: 298 | if i in edge and i not in utilized_nodes: 299 | utilized_nodes.append(i) 300 | 301 | return utilized_edges, utilized_nodes 302 | 303 | def is_valid_vertex(self, vertex): 304 | edges, nodes = self.get_utilized() 305 | return (vertex in nodes) 306 | 307 | def is_valid_edge(self, edge): 308 | edges, nodes = self.get_utilized() 309 | return (edge in edges) 310 | 311 | def get_neighborhood(self, nasbench, shuffle=True): 312 | nbhd = [] 313 | # add the cells that differ by one op 314 | for vertex in range(1, OP_SPOTS + 1): 315 | if self.is_valid_vertex(vertex): 316 | available = [op for op in OPS if op != self.ops[vertex]] 317 | for op in available: 318 | new_matrix = copy.deepcopy(self.matrix) 319 | new_ops = copy.deepcopy(self.ops) 320 | new_ops[vertex] = op 321 | new_arch = {'matrix':new_matrix, 'ops':new_ops} 322 | nbhd.append(new_arch) 323 | 324 | # add the cells that differ by one edge 325 | for src in range(0, NUM_VERTICES - 1): 326 | for dst in range(src+1, NUM_VERTICES): 327 | new_matrix = copy.deepcopy(self.matrix) 328 | new_ops = copy.deepcopy(self.ops) 329 | new_matrix[src][dst] = 1 - new_matrix[src][dst] 330 | new_arch = {'matrix':new_matrix, 'ops':new_ops} 331 | 332 | if self.matrix[src][dst] and self.is_valid_edge((src, dst)): 333 | spec = api.ModelSpec(matrix=new_matrix, ops=new_ops) 334 | if nasbench.is_valid(spec): 335 | nbhd.append(new_arch) 336 | 337 | if not self.matrix[src][dst] and Cell(**new_arch).is_valid_edge((src, dst)): 338 | spec = api.ModelSpec(matrix=new_matrix, ops=new_ops) 339 | if nasbench.is_valid(spec): 340 | nbhd.append(new_arch) 341 | 342 | if shuffle: 343 | random.shuffle(nbhd) 344 | return nbhd 345 | 346 | -------------------------------------------------------------------------------- /nas_bench_201/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /nas_bench_201/cell.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import copy 3 | import itertools 4 | import random 5 | import sys 6 | import os 7 | import pickle 8 | 9 | 10 | OPS = ['avg_pool_3x3', 'nor_conv_1x1', 'nor_conv_3x3', 'none', 'skip_connect'] 11 | NUM_OPS = len(OPS) 12 | OP_SPOTS = 6 13 | LONGEST_PATH_LENGTH = 3 14 | 15 | class Cell: 16 | 17 | def __init__(self, string): 18 | self.string = string 19 | 20 | def get_string(self): 21 | return self.string 22 | 23 | def serialize(self): 24 | return { 25 | 'string':self.string 26 | } 27 | 28 | @classmethod 29 | def random_cell(cls, nasbench, max_nodes=4): 30 | """ 31 | From the AutoDL-Projects repository 32 | """ 33 | ops = [] 34 | for i in range(OP_SPOTS): 35 | op = random.choice(OPS) 36 | ops.append(op) 37 | return {'string':cls.get_string_from_ops(ops)} 38 | 39 | 40 | def get_runtime(self, nasbench, dataset='cifar100'): 41 | return nasbench.query_by_index(index, dataset).get_eval('x-valid')['time'] 42 | 43 | def get_val_loss(self, nasbench, deterministic=1, dataset='cifar100'): 44 | index = nasbench.query_index_by_arch(self.string) 45 | if dataset == 'cifar10': 46 | results = nasbench.query_by_index(index, 'cifar10-valid') 47 | else: 48 | results = nasbench.query_by_index(index, dataset) 49 | 50 | accs = [] 51 | for key in results.keys(): 52 | accs.append(results[key].get_eval('x-valid')['accuracy']) 53 | 54 | if deterministic: 55 | return round(100-np.mean(accs), 10) 56 | else: 57 | return round(100-np.random.choice(accs), 10) 58 | 59 | def get_test_loss(self, nasbench, dataset='cifar100', deterministic=1): 60 | index = nasbench.query_index_by_arch(self.string) 61 | results = nasbench.query_by_index(index, dataset) 62 | 63 | accs = [] 64 | for key in results.keys(): 65 | accs.append(results[key].get_eval('ori-test')['accuracy']) 66 | 67 | if deterministic: 68 | return round(100-np.mean(accs), 4) 69 | else: 70 | return round(100-np.random.choice(accs), 4) 71 | 72 | def get_op_list(self): 73 | # given a string, get the list of operations 74 | tokens = self.string.split('|') 75 | ops = [t.split('~')[0] for i,t in enumerate(tokens) if i not in [0,2,5,9]] 76 | return ops 77 | 78 | def get_num(self): 79 | # compute the unique number of the architecture, in [0, 15624] 80 | ops = self.get_op_list() 81 | index = 0 82 | for i, op in enumerate(ops): 83 | index += OPS.index(op) * NUM_OPS ** i 84 | return index 85 | 86 | @classmethod 87 | def get_string_from_ops(cls, ops): 88 | # given a list of operations, get the string 89 | strings = ['|'] 90 | nodes = [0, 0, 1, 0, 1, 2] 91 | for i, op in enumerate(ops): 92 | strings.append(op+'~{}|'.format(nodes[i])) 93 | if i < len(nodes) - 1 and nodes[i+1] == 0: 94 | strings.append('+|') 95 | return ''.join(strings) 96 | 97 | def perturb(self, nasbench, 98 | mutation_rate=1): 99 | # more deterministic version of mutate 100 | ops = self.get_op_list() 101 | new_ops = [] 102 | num = np.random.choice(len(ops)) 103 | for i, op in enumerate(ops): 104 | if i == num: 105 | available = [o for o in OPS if o != op] 106 | new_ops.append(np.random.choice(available)) 107 | else: 108 | new_ops.append(op) 109 | return {'string':self.get_string_from_ops(new_ops)} 110 | 111 | def mutate(self, 112 | nasbench, 113 | mutation_rate=1.0, 114 | patience=5000): 115 | 116 | p = 0 117 | ops = self.get_op_list() 118 | new_ops = [] 119 | # keeping mutation_prob consistent with nasbench_101 120 | mutation_prob = mutation_rate / (OP_SPOTS - 2) 121 | 122 | for i, op in enumerate(ops): 123 | if random.random() < mutation_prob: 124 | available = [o for o in OPS if o != op] 125 | new_ops.append(random.choice(available)) 126 | else: 127 | new_ops.append(op) 128 | 129 | return {'string':self.get_string_from_ops(new_ops)} 130 | 131 | def encode_standard(self): 132 | """ 133 | compute the standard encoding 134 | """ 135 | ops = self.get_op_list() 136 | encoding = [] 137 | for op in ops: 138 | encoding.append(OPS.index(op)) 139 | 140 | return encoding 141 | 142 | def get_num_params(self, nasbench): 143 | # todo update to the newer nasbench-201 dataset 144 | return 100 145 | 146 | def get_paths(self): 147 | """ 148 | return all paths from input to output 149 | """ 150 | path_blueprints = [[3], [0,4], [1,5], [0,2,5]] 151 | ops = self.get_op_list() 152 | paths = [] 153 | for blueprint in path_blueprints: 154 | paths.append([ops[node] for node in blueprint]) 155 | 156 | return paths 157 | 158 | def get_path_indices(self): 159 | """ 160 | compute the index of each path 161 | """ 162 | paths = self.get_paths() 163 | path_indices = [] 164 | 165 | for i, path in enumerate(paths): 166 | if i == 0: 167 | index = 0 168 | elif i in [1, 2]: 169 | index = NUM_OPS 170 | else: 171 | index = NUM_OPS + NUM_OPS ** 2 172 | for j, op in enumerate(path): 173 | index += OPS.index(op) * NUM_OPS ** j 174 | path_indices.append(index) 175 | 176 | return tuple(path_indices) 177 | 178 | def encode_paths(self): 179 | """ output one-hot encoding of paths """ 180 | num_paths = sum([NUM_OPS ** i for i in range(1, LONGEST_PATH_LENGTH + 1)]) 181 | path_indices = self.get_path_indices() 182 | encoding = np.zeros(num_paths) 183 | for index in path_indices: 184 | encoding[index] = 1 185 | return encoding 186 | 187 | def path_distance(self, other): 188 | """ 189 | compute the distance between two architectures 190 | by comparing their path encodings 191 | """ 192 | return np.sum(np.array(self.encode_paths() != np.array(other.encode_paths()))) 193 | 194 | def trunc_path_distance(self, other, cutoff=30): 195 | """ 196 | compute the distance between two architectures 197 | by comparing their truncated path encodings 198 | """ 199 | paths = np.array(self.encode_paths()[cutoff]) 200 | other_paths = np.array(other.encode_paths()[cutoff]) 201 | return np.sum(paths != other_paths) 202 | 203 | def edit_distance(self, other): 204 | 205 | ops = self.get_op_list() 206 | other_ops = other.get_op_list() 207 | return np.sum([1 for i in range(len(ops)) if ops[i] != other_ops[i]]) 208 | 209 | def nasbot_distance(self, other): 210 | # distance based on optimal transport between row sums, column sums, and ops 211 | 212 | ops = self.get_op_list() 213 | other_ops = other.get_op_list() 214 | 215 | counts = [ops.count(op) for op in OPS] 216 | other_counts = [other_ops.count(op) for op in OPS] 217 | ops_dist = np.sum(np.abs(np.subtract(counts, other_counts))) 218 | 219 | return ops_dist + self.edit_distance(other) 220 | 221 | def get_neighborhood(self, nasbench, shuffle=True): 222 | nbhd = [] 223 | ops = self.get_op_list() 224 | for i in range(len(ops)): 225 | available = [op for op in OPS if op != ops[i]] 226 | for op in available: 227 | new_ops = ops.copy() 228 | new_ops[i] = op 229 | new_arch = {'string':self.get_string_from_ops(new_ops)} 230 | nbhd.append(new_arch) 231 | 232 | if shuffle: 233 | random.shuffle(nbhd) 234 | return nbhd 235 | -------------------------------------------------------------------------------- /notebooks/random_walk_autocorrelation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%load_ext autoreload\n", 10 | "%autoreload 2\n", 11 | "%matplotlib inline" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import numpy as np\n", 21 | "import sys\n", 22 | "import os\n", 23 | "import collections\n", 24 | "import itertools\n", 25 | "import pickle\n", 26 | "from scipy.integrate import quad\n", 27 | "from matplotlib import pyplot as plt\n", 28 | "from scipy.special import logit, expit\n", 29 | "from scipy.stats import norm" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "# Random Walk Autocorrelation (RWA)\n", 37 | " - this notebook contains code to compute the random walk autocorrelation on NAS-Bench-201 datasets, and on arbitrary probability density functions" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "### Compute the RWA from PDFs" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "# first, define a few PDFs\n", 54 | "\n", 55 | "def sample(v, std=.35, dist='normal'):\n", 56 | " # sample a random point from the nbhd of v\n", 57 | " if dist == 'uniform':\n", 58 | " return np.random.rand()\n", 59 | " elif dist == 'lipschitz':\n", 60 | " return np.random.uniform(max(0, v-std), min(1, v+std))\n", 61 | " elif dist == 'normal':\n", 62 | " # rejection sampling\n", 63 | " u = np.random.rand()\n", 64 | " y = np.random.rand() * pdf(v, v, dist='normal', std=std)\n", 65 | " if y < pdf(u, v, dist='normal', std=std):\n", 66 | " return u\n", 67 | " else:\n", 68 | " return sample(v, std=std, dist='lipschitz')\n", 69 | " \n", 70 | "def pdf(u, v, dist='normal', std=.35):\n", 71 | " # return the value of the pdf of nbhd(v) at u\n", 72 | " if dist == 'uniform':\n", 73 | " # uniform distribution on [0,1]\n", 74 | " return 1\n", 75 | " elif dist == 'lipschitz':\n", 76 | " # uniform on [v-std, v+std]\n", 77 | " if v - std <= u and u <= v + std:\n", 78 | " return 1/(min(1, v+std)-max(0, v-std))\n", 79 | " else: \n", 80 | " return 0\n", 81 | " elif dist == 'normal':\n", 82 | " # normal dist with mean=v, std=std, scaled to be in [0,1]\n", 83 | " return norm.pdf(u, v, std) * (norm.cdf(1, v, std) - norm.cdf(0, v, std)) ** -1" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "def sample_constrained(cell, std, low=0, high=1, dist='normal'):\n", 93 | " for _ in range(200):\n", 94 | " sampled = sample(cell, std=std, dist=dist)\n", 95 | " if sampled > low and sampled < high:\n", 96 | " return sampled\n", 97 | " return cell\n", 98 | "\n", 99 | "def rwa_from_pdf(trials=100000,\n", 100 | " size=36,\n", 101 | " std=.35,\n", 102 | " low=0,\n", 103 | " high=1):\n", 104 | " # compute RWA for a synthetic dataset based on a PDF\n", 105 | " cell = .25\n", 106 | " window = collections.deque([cell])\n", 107 | " for _ in range(size - 1):\n", 108 | " cell = sample_constrained(cell, std=std, low=low, high=high, dist='normal')\n", 109 | " window.append(cell)\n", 110 | " \n", 111 | " autocorrs = np.zeros((size, trials, 2))\n", 112 | " for t in range(trials):\n", 113 | " if t % (trials/10) == 0:\n", 114 | " print('trial', t)\n", 115 | " #pass\n", 116 | " cell = sample_constrained(cell, std=std, low=low, high=high, dist='normal')\n", 117 | " window.append(cell)\n", 118 | " window.popleft()\n", 119 | " autocorrs[:, t, 0] = np.array([window[-1]] * size)\n", 120 | " autocorrs[:, t, 1] = np.array(window)\n", 121 | " \n", 122 | " corr = []\n", 123 | " for i in range(size):\n", 124 | " corr.append(np.corrcoef(autocorrs[i, :, 0], autocorrs[i, :, 1])[1,0])\n", 125 | " xs = [np.power(size - i - 1, 1/2) for i in range(size)]\n", 126 | " return xs, corr\n" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "### compute RWA on the NASBench-201 datasets" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "sys.path.append(os.path.expanduser('~/naszilla/bananas'))\n", 143 | "sys.path.append(os.path.expanduser('~/AutoDL-Projects/lib/'))\n", 144 | "\n", 145 | "from nas_bench_201.cell import Cell\n", 146 | "from nas_201_api import NASBench201API as API" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "def pert(cell, nasbench, low=0, high=100):\n", 156 | " for i in range(200):\n", 157 | " perturbed = Cell(**cell).perturb(nasbench)\n", 158 | " if Cell(**perturbed).get_val_loss(nasbench, dataset=dataset) > low and \\\n", 159 | " Cell(**perturbed).get_val_loss(nasbench, dataset=dataset) < high:\n", 160 | " return perturbed\n", 161 | " print('failed')\n", 162 | " return Cell(**cell).perturb(nasbench)\n", 163 | "\n", 164 | "def random_walk(nasbench,\n", 165 | " trials=10000,\n", 166 | " size=36,\n", 167 | " dataset='cifar10',\n", 168 | " save=False,\n", 169 | " low=0,\n", 170 | " high=100):\n", 171 | " \n", 172 | " # if low, high are proportions, compute the losses\n", 173 | " if high < 1:\n", 174 | " losses, _ = pickle.load(open('{}_losses.pkl'.format(dataset), 'rb'))\n", 175 | " losses.sort()\n", 176 | " limits = [losses[0], losses[-1]]\n", 177 | " low, high = [losses[int(low*15625)], losses[int(high*15625)]]\n", 178 | " print('limits', limits)\n", 179 | " print('scaled limits', low, high)\n", 180 | " \n", 181 | " # compute rwa for a dataset in nasbench-201\n", 182 | " cell = Cell.random_cell(nasbench)\n", 183 | " while Cell(**cell).get_val_loss(nasbench, dataset=dataset) < low or \\\n", 184 | " Cell(**cell).get_val_loss(nasbench, dataset=dataset) > high:\n", 185 | " cell = Cell.random_cell(nasbench)\n", 186 | "\n", 187 | " window = collections.deque([cell])\n", 188 | " for _ in range(size - 1):\n", 189 | " cell = pert(cell, nasbench, low=low, high=high)\n", 190 | " window.append(Cell(**cell).get_val_loss(nasbench, dataset=dataset))\n", 191 | " \n", 192 | " autocorrs = np.zeros((size, trials, 2))\n", 193 | " for t in range(trials):\n", 194 | " if t % (trials/10) == 0:\n", 195 | " print('trial', t)\n", 196 | "\n", 197 | " cell = pert(cell, nasbench, low=low, high=high)\n", 198 | " window.append(Cell(**cell).get_val_loss(nasbench, dataset=dataset))\n", 199 | " window.popleft()\n", 200 | " autocorrs[:, t, 0] = np.array([window[-1]] * size)\n", 201 | " autocorrs[:, t, 1] = np.array(window)\n", 202 | " \n", 203 | " corr = []\n", 204 | " for i in range(size):\n", 205 | " corr.append(np.corrcoef(autocorrs[i, :, 0], autocorrs[i, :, 1])[1,0])\n", 206 | " xs = [np.power(size - i - 1, 1/2) for i in range(size)]\n", 207 | " return xs, corr\n", 208 | " " 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "# generate synthetic data\n", 218 | "rwa_normals = {}\n", 219 | "for std in [.3, .35, .4]:\n", 220 | " print('starting', std)\n", 221 | " xs, corr = rwa_from_pdf(std=std, trials=10000)\n", 222 | " rwa_normals[std] = corr \n", 223 | " plt.plot(data['xs'], corr, label='normal pdf, std={}'.format(std))\n" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": { 230 | "scrolled": false 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "# download the nas-bench-201 dataset, and then load it with this command\n", 235 | "nasbench = API(os.path.expanduser('~/path/to/NAS-Bench-201-v1_0-e61699.pth'))" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "# compute RWA on the nas-bench-201 datasets\n", 245 | "datasets = ['ImageNet16-120', 'cifar100', 'cifar10']\n", 246 | "corrs = {}\n", 247 | "for dataset in datasets:\n", 248 | " _, corr = random_walk(nasbench, dataset=dataset, save=False, trials=10000, low=.1, high=.9)\n", 249 | " corrs[dataset] = corr\n", 250 | " print('finished', dataset)\n" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [] 259 | } 260 | ], 261 | "metadata": { 262 | "kernelspec": { 263 | "display_name": "Python 3", 264 | "language": "python", 265 | "name": "python3" 266 | }, 267 | "language_info": { 268 | "codemirror_mode": { 269 | "name": "ipython", 270 | "version": 3 271 | }, 272 | "file_extension": ".py", 273 | "mimetype": "text/x-python", 274 | "name": "python", 275 | "nbconvert_exporter": "python", 276 | "pygments_lexer": "ipython3", 277 | "version": "3.7.7" 278 | } 279 | }, 280 | "nbformat": 4, 281 | "nbformat_minor": 4 282 | } 283 | -------------------------------------------------------------------------------- /params.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | 4 | def algo_params(param_str): 5 | """ 6 | Return params list based on param_str. 7 | These are the parameters used to produce the figures in the paper 8 | For AlphaX and Reinforcement Learning, we used the corresponding github repos: 9 | https://github.com/linnanwang/AlphaX-NASBench101 10 | https://github.com/automl/nas_benchmarks 11 | """ 12 | params = [] 13 | 14 | if param_str == 'local_search': 15 | params.append({'algo_name':'local_search', 'total_queries':300}) 16 | 17 | elif param_str == 'ls_cont_at_min': 18 | params.append({'algo_name':'local_search', 'total_queries':300, 'stop_at_minimum':False}) 19 | 20 | elif param_str == 'ls_query_part': 21 | params.append({'algo_name':'local_search', 'total_queries':300, 'query_full_nbhd':False}) 22 | 23 | elif param_str == 'test': 24 | params.append({'algo_name':'random', 'total_queries':30}) 25 | params.append({'algo_name':'evolution', 'total_queries':30}) 26 | params.append({'algo_name':'bananas', 'total_queries':30}) 27 | params.append({'algo_name':'gp_bayesopt', 'total_queries':30}) 28 | params.append({'algo_name':'dngo', 'total_queries':30}) 29 | 30 | elif param_str == 'test_simple': 31 | params.append({'algo_name':'random', 'total_queries':30}) 32 | params.append({'algo_name':'evolution', 'total_queries':30}) 33 | 34 | elif param_str == 'main_experiments': 35 | params.append({'algo_name':'random', 'total_queries':300}) 36 | params.append({'algo_name':'evolution', 'total_queries':300}) 37 | params.append({'algo_name':'bananas', 'total_queries':300}) 38 | params.append({'algo_name':'gp_bayesopt', 'total_queries':300}) 39 | params.append({'algo_name':'dngo', 'total_queries':300}) 40 | params.append({'algo_name':'local_search', 'total_queries':300, 'stop_at_minimum':False}) 41 | params.append({'algo_name':'local_search', 'total_queries':300, 'query_full_nbhd':False}) 42 | 43 | elif param_str == 'bananas': 44 | params.append({'algo_name':'bananas', 'total_queries':150}) 45 | 46 | else: 47 | print('invalid algorithm params: {}'.format(param_str)) 48 | sys.exit() 49 | 50 | print('\n* Running experiment: ' + param_str) 51 | return params 52 | 53 | 54 | def meta_neuralnet_params(param_str): 55 | 56 | if param_str == 'nasbench': 57 | params = {'search_space':'nasbench', 'dataset':'cifar10', 'loss':'mae', 'num_layers':10, 'layer_width':20, \ 58 | 'epochs':150, 'batch_size':32, 'lr':.01, 'regularization':0, 'verbose':0} 59 | 60 | elif param_str == 'darts': 61 | params = {'search_space':'darts', 'dataset':'cifar10', 'loss':'mape', 'num_layers':10, 'layer_width':20, \ 62 | 'epochs':10000, 'batch_size':32, 'lr':.00001, 'regularization':0, 'verbose':0} 63 | 64 | elif param_str == 'nasbench_201_cifar10': 65 | params = {'search_space':'nasbench_201', 'dataset':'cifar10', 'loss':'mae', 'num_layers':10, 'layer_width':20, \ 66 | 'epochs':150, 'batch_size':32, 'lr':.01, 'regularization':0, 'verbose':0} 67 | 68 | elif param_str == 'nasbench_201_cifar100': 69 | params = {'search_space':'nasbench_201', 'dataset':'cifar100', 'loss':'mae', 'num_layers':10, 'layer_width':20, \ 70 | 'epochs':150, 'batch_size':32, 'lr':.01, 'regularization':0, 'verbose':0} 71 | 72 | elif param_str == 'nasbench_201_imagenet': 73 | params = {'search_space':'nasbench_201', 'dataset':'ImageNet16-120', 'loss':'mae', 'num_layers':10, 'layer_width':20, \ 74 | 'epochs':150, 'batch_size':32, 'lr':.01, 'regularization':0, 'verbose':0} 75 | 76 | else: 77 | print('invalid meta neural net params: {}'.format(param_str)) 78 | sys.exit() 79 | 80 | return params 81 | -------------------------------------------------------------------------------- /run_experiments_parallel.sh: -------------------------------------------------------------------------------- 1 | 2 | param_str=fifty_epochs 3 | experiment_name=bananas 4 | 5 | # set all instance names and zones 6 | instances=(bananas-t4-1-vm bananas-t4-2-vm bananas-t4-3-vm bananas-t4-4-vm \ 7 | bananas-t4-5-vm bananas-t4-6-vm bananas-t4-7-vm bananas-t4-8-vm \ 8 | bananas-t4-9-vm bananas-t4-10-vm) 9 | 10 | zones=(us-west1-b us-west1-b us-west1-b us-west1-b us-west1-b us-west1-b \ 11 | us-west1-b us-west1-b us-west1-b us-west1-b) 12 | 13 | # set parameters based on the param string 14 | if [ $param_str = test ]; then 15 | start_iteration=0 16 | end_iteration=1 17 | k=10 18 | untrained_filename=untrained_spec 19 | trained_filename=trained_spec 20 | epochs=1 21 | fi 22 | if [ $param_str = fifty_epochs ]; then 23 | start_iteration=0 24 | end_iteration=9 25 | k=10 26 | untrained_filename=untrained_spec 27 | trained_filename=trained_spec 28 | epochs=50 29 | fi 30 | 31 | # start bananas 32 | for i in $(seq $start_iteration $end_iteration) 33 | do 34 | let start=$i*$k 35 | let end=($i+1)*$k-1 36 | 37 | # train the neural net 38 | # input: all pickle files with index from 0 to i*k-1 39 | # output: k pickle files for the architectures to train next (indices i*k to (i+1)*k-1) 40 | echo about to run meta neural network in iteration $i 41 | python3 metann_runner.py --experiment_name $experiment_name --params $nas_params --k $k \ 42 | --untrained_filename $untrained_filename --trained_filename $trained_filename --query $start 43 | echo outputted architectures to train in iteration $i 44 | 45 | # train the k architectures 46 | let max_j=$k-1 47 | for j in $(seq 0 $max_j ) 48 | do 49 | let query=$i*$k+$j 50 | instance=${instances[$j]} 51 | zone=${zones[$j]} 52 | untrained_filepath=$experiment_name/$untrained_filename\_$query.pkl 53 | trained_filepath=$experiment_name/$trained_filename\_$query.pkl 54 | 55 | echo about to copy file $untrained_filepath to instance $instance 56 | gcloud compute scp $untrained_filepath $instance:~/naszilla/$experiment_name/ --zone $zone 57 | 58 | echo about to ssh into instance $instance 59 | gcloud compute ssh $instance --zone $zone --command="cd naszilla; \ 60 | python3 train_arch_runner.py --untrained_filepath $untrained_filepath \ 61 | --trained_filepath $trained_filepath --epochs $epochs" & 62 | done 63 | wait 64 | echo all architectures trained in iteration $i 65 | 66 | # copy results of trained architectures to the master CPU 67 | let max_j=$k-1 68 | for j in $(seq 0 $max_j ) 69 | do 70 | let query=$i*$k+$j 71 | instance=${instances[$j]} 72 | zone=${zones[$j]} 73 | trained_filepath=$experiment_name/$trained_filename\_$query.pkl 74 | gcloud compute scp $instance:~/naszilla/$trained_filepath $experiment_name --zone $zone 75 | done 76 | echo finished iteration $i 77 | done 78 | 79 | -------------------------------------------------------------------------------- /run_experiments_sequential.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import logging 4 | import sys 5 | import os 6 | import pickle 7 | import numpy as np 8 | import copy 9 | 10 | from params import * 11 | 12 | 13 | def run_experiments(args, save_dir): 14 | 15 | os.environ['search_space'] = args.search_space 16 | 17 | from nas_algorithms import run_nas_algorithm 18 | from data import Data 19 | 20 | trials = args.trials 21 | out_file = args.output_filename 22 | save_specs = args.save_specs 23 | metann_params = meta_neuralnet_params(args.search_space) 24 | algorithm_params = algo_params(args.algo_params) 25 | num_algos = len(algorithm_params) 26 | logging.info(algorithm_params) 27 | 28 | # set up search space 29 | mp = copy.deepcopy(metann_params) 30 | ss = mp.pop('search_space') 31 | dataset = mp.pop('dataset') 32 | search_space = Data(ss, dataset=dataset) 33 | 34 | for i in range(trials): 35 | results = [] 36 | walltimes = [] 37 | run_data = [] 38 | 39 | for j in range(num_algos): 40 | # run NAS algorithm 41 | print('\n* Running algorithm: {}'.format(algorithm_params[j])) 42 | starttime = time.time() 43 | algo_result, run_datum = run_nas_algorithm(algorithm_params[j], search_space, mp) 44 | algo_result = np.round(algo_result, 5) 45 | 46 | # remove unnecessary dict entries that take up space 47 | for d in run_datum: 48 | if not save_specs: 49 | d.pop('spec') 50 | for key in ['encoding', 'adjacency', 'path', 'dist_to_min']: 51 | if key in d: 52 | d.pop(key) 53 | 54 | # add walltime, results, run_data 55 | walltimes.append(time.time()-starttime) 56 | results.append(algo_result) 57 | run_data.append(run_datum) 58 | 59 | # print and pickle results 60 | filename = os.path.join(save_dir, '{}_{}.pkl'.format(out_file, i)) 61 | print('\n* Trial summary: (params, results, walltimes)') 62 | print(algorithm_params) 63 | print(metann_params) 64 | print(results) 65 | print(walltimes) 66 | print('\n* Saving to file {}'.format(filename)) 67 | with open(filename, 'wb') as f: 68 | pickle.dump([algorithm_params, metann_params, results, walltimes, run_data], f) 69 | f.close() 70 | 71 | def main(args): 72 | 73 | # make save directory 74 | save_dir = args.save_dir 75 | if not os.path.exists(save_dir): 76 | os.mkdir(save_dir) 77 | 78 | algo_params = args.algo_params 79 | save_path = save_dir + '/' + algo_params + '/' 80 | if not os.path.exists(save_path): 81 | os.mkdir(save_path) 82 | 83 | # set up logging 84 | log_format = '%(asctime)s %(message)s' 85 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, 86 | format=log_format, datefmt='%m/%d %I:%M:%S %p') 87 | fh = logging.FileHandler(os.path.join(save_dir, 'log.txt')) 88 | fh.setFormatter(logging.Formatter(log_format)) 89 | logging.getLogger().addHandler(fh) 90 | logging.info(args) 91 | 92 | run_experiments(args, save_path) 93 | 94 | 95 | if __name__ == "__main__": 96 | parser = argparse.ArgumentParser(description='Args for BANANAS experiments') 97 | parser.add_argument('--trials', type=int, default=500, help='Number of trials') 98 | parser.add_argument('--search_space', type=str, default='nasbench', \ 99 | help='nasbench or darts') 100 | parser.add_argument('--algo_params', type=str, default='main_experiments', help='which parameters to use') 101 | parser.add_argument('--output_filename', type=str, default='round', help='name of output files') 102 | parser.add_argument('--save_dir', type=str, default='results_output', help='name of save directory') 103 | parser.add_argument('--save_specs', type=bool, default=False, help='save the architecture specs') 104 | 105 | args = parser.parse_args() 106 | main(args) 107 | -------------------------------------------------------------------------------- /train_arch_runner.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import logging 4 | import sys 5 | import os 6 | import pickle 7 | 8 | sys.path.append(os.path.expanduser('~/darts/cnn')) 9 | from train_class import Train 10 | 11 | """ 12 | train arch runner is used in run_experiments_parallel 13 | 14 | - loads data by opening a pickle file containing an architecture spec 15 | - trains that architecture for e epochs 16 | - outputs a new pickle file with the architecture spec and its validation loss 17 | """ 18 | 19 | def run(args): 20 | 21 | untrained_filepath = os.path.expanduser(args.untrained_filepath) 22 | trained_filepath = os.path.expanduser(args.trained_filepath) 23 | epochs = args.epochs 24 | gpu = args.gpu 25 | train_portion = args.train_portion 26 | seed = args.seed 27 | save = args.save 28 | 29 | # load the arch spec that will be trained 30 | dic = pickle.load(open(untrained_filepath, 'rb')) 31 | arch = dic['spec'] 32 | print('loaded arch', arch) 33 | 34 | # train the arch 35 | trainer = Train() 36 | val_accs, test_accs = trainer.main(arch, 37 | epochs=epochs, 38 | gpu=gpu, 39 | train_portion=train_portion, 40 | seed=seed, 41 | save=save) 42 | 43 | val_sum = 0 44 | for epoch, val_acc in val_accs: 45 | key = 'val_loss_' + str(epoch) 46 | dic[key] = 100 - val_acc 47 | val_sum += dic[key] 48 | for epoch, test_acc in test_accs: 49 | key = 'test_loss_' + str(epoch) 50 | dic[key] = 100 - test_acc 51 | 52 | val_loss_avg = val_sum / len(val_accs) 53 | 54 | dic['val_loss_avg'] = val_loss_avg 55 | dic['val_loss'] = 100 - val_accs[-1][-1] 56 | dic['test_loss'] = 100 - test_accs[-1][-1] 57 | dic['filepath'] = args.trained_filepath 58 | 59 | print('arch {}'.format(arch)) 60 | print('val loss: {}'.format(dic['val_loss'])) 61 | print('test loss: {}'.format(dic['test_loss'])) 62 | print('val loss avg: {}'.format(dic['val_loss_avg'])) 63 | 64 | with open(trained_filepath, 'wb') as f: 65 | pickle.dump(dic, f) 66 | 67 | def main(args): 68 | 69 | #set up save dir 70 | save_dir = './' 71 | 72 | #set up logging 73 | log_format = '%(asctime)s %(message)s' 74 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, 75 | format=log_format, datefmt='%m/%d %I:%M:%S %p') 76 | fh = logging.FileHandler(os.path.join(save_dir, 'log.txt')) 77 | fh.setFormatter(logging.Formatter(log_format)) 78 | logging.getLogger().addHandler(fh) 79 | logging.info(args) 80 | 81 | run(args) 82 | 83 | if __name__ == "__main__": 84 | parser = argparse.ArgumentParser(description='Args for training a darts arch') 85 | parser.add_argument('--untrained_filepath', type=str, default='darts_test/untrained_spec_0.pkl', help='name of input files') 86 | parser.add_argument('--trained_filepath', type=str, default='darts_test/trained_spec_0.pkl', help='name of output files') 87 | parser.add_argument('--epochs', type=int, default=50, help='number of training epochs') 88 | parser.add_argument('--gpu', type=int, default=0, help='which gpu to use') 89 | parser.add_argument('--train_portion', type=float, default=0.7, help='portion of training data used for training') 90 | parser.add_argument('--seed', type=float, default=0, help='random seed to use') 91 | parser.add_argument('--save', type=str, default='EXP', help='directory to save to') 92 | 93 | args = parser.parse_args() 94 | main(args) 95 | --------------------------------------------------------------------------------