├── .gitignore
├── README.md
├── acquisition_functions.py
├── bo
├── __init__.py
├── acq
│ ├── __init__.py
│ ├── acqmap.py
│ ├── acqopt.py
│ └── acquisition.py
├── bo
│ ├── __init__.py
│ └── probo.py
├── dom
│ ├── __init__.py
│ ├── list.py
│ └── real.py
├── ds
│ ├── __init__.py
│ └── makept.py
├── fn
│ ├── __init__.py
│ └── functionhandler.py
├── pp
│ ├── __init__.py
│ ├── gp
│ │ ├── __init__.py
│ │ └── gp_utils.py
│ ├── pp_core.py
│ ├── pp_gp_george.py
│ ├── pp_gp_my_distmat.py
│ ├── pp_gp_stan.py
│ ├── pp_gp_stan_distmat.py
│ └── stan
│ │ ├── __init__.py
│ │ ├── compile_stan.py
│ │ ├── gp_distmat.py
│ │ ├── gp_distmat_fixedsig.py
│ │ ├── gp_hier2.py
│ │ ├── gp_hier2_matern.py
│ │ └── gp_hier3.py
└── util
│ ├── __init__.py
│ ├── datatransform.py
│ └── print_utils.py
├── darts
├── __init__.py
├── arch.py
├── local_search_runner.py
└── run_experiments.sh
├── data.py
├── img
├── local_search_fig.png
├── ls_101_titled.png
├── ls_baselines_101.png
├── ls_cifar10.png
├── ls_cifar100.png
├── ls_cifar10_titled.png
├── ls_imagenet.png
├── real_synth_data.png
├── structured.png
├── uniform_preimages.png
└── unstructured.png
├── meta_neural_net.py
├── meta_neuralnet.ipynb
├── metann_runner.py
├── nas_algorithms.py
├── nas_bench
├── __init__.py
└── cell.py
├── nas_bench_201
├── __init__.py
└── cell.py
├── notebooks
├── random_walk_autocorrelation.ipynb
└── simulate_real_data.ipynb
├── params.py
├── run_experiments_parallel.sh
├── run_experiments_sequential.py
└── train_arch_runner.py
/.gitignore:
--------------------------------------------------------------------------------
1 | notebooks/*.pdf
2 | notebooks/*.pkl
3 | notebooks/*.npy
4 | notebooks/*.npz
5 |
6 | # Byte-compiled / optimized / DLL files
7 | __pycache__/
8 | *.py[cod]
9 | *$py.class
10 |
11 | # C extensions
12 | *.so
13 |
14 | # Distribution / packaging
15 | .Python
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | wheels/
28 | pip-wheel-metadata/
29 | share/python-wheels/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 | MANIFEST
34 |
35 | # PyInstaller
36 | # Usually these files are written by a python script from a template
37 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
38 | *.manifest
39 | *.spec
40 |
41 | # Installer logs
42 | pip-log.txt
43 | pip-delete-this-directory.txt
44 |
45 | # Unit test / coverage reports
46 | htmlcov/
47 | .tox/
48 | .nox/
49 | .coverage
50 | .coverage.*
51 | .cache
52 | nosetests.xml
53 | coverage.xml
54 | *.cover
55 | *.py,cover
56 | .hypothesis/
57 | .pytest_cache/
58 |
59 | # Translations
60 | *.mo
61 | *.pot
62 |
63 | # Django stuff:
64 | *.log
65 | local_settings.py
66 | db.sqlite3
67 | db.sqlite3-journal
68 |
69 | # Flask stuff:
70 | instance/
71 | .webassets-cache
72 |
73 | # Scrapy stuff:
74 | .scrapy
75 |
76 | # Sphinx documentation
77 | docs/_build/
78 |
79 | # PyBuilder
80 | target/
81 |
82 | # Jupyter Notebook
83 | .ipynb_checkpoints
84 |
85 | # IPython
86 | profile_default/
87 | ipython_config.py
88 |
89 | # pyenv
90 | .python-version
91 |
92 | # pipenv
93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
96 | # install all needed dependencies.
97 | #Pipfile.lock
98 |
99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
100 | __pypackages__/
101 |
102 | # Celery stuff
103 | celerybeat-schedule
104 | celerybeat.pid
105 |
106 | # SageMath parsed files
107 | *.sage.py
108 |
109 | # Environments
110 | .env
111 | .venv
112 | env/
113 | venv/
114 | ENV/
115 | env.bak/
116 | venv.bak/
117 |
118 | # Spyder project settings
119 | .spyderproject
120 | .spyproject
121 |
122 | # Rope project settings
123 | .ropeproject
124 |
125 | # mkdocs documentation
126 | /site
127 |
128 | # mypy
129 | .mypy_cache/
130 | .dmypy.json
131 | dmypy.json
132 |
133 | # Pyre type checker
134 | .pyre/
135 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Local Search for NAS
2 |
3 | **Note: this repository has been combined with other NAS projects into [naszilla/naszilla](https://github.com/naszilla/naszilla), and this repo is deprecated and not maintained. Please use [naszilla/naszilla](https://github.com/naszilla/naszilla), which has more functionality.**
4 |
5 | [Local Search is State of the Art for Neural Architecture Search Benchmarks](https://arxiv.org/abs/2005.02960)\
6 | Colin White, Sam Nolen, and Yash Savani.\
7 | _arXiv:2005.02960_.
8 |
9 | We study the simplest versions of local search, showing that local search achieves state-of-the-art results on NASBench-101 (size 10^6) and NASBench-201 (size 10^4). We also show that local search fails on the DARTS search space (size 10^18). This suggests that existing NAS benchmarks may be too small to adequately evaluate NAS algorithms. See our paper for a theoretical study which characterizes the performance of local search on graph optimization problems, backed by simulation results.
10 |
11 |
12 |
13 |
14 | In the left figure, each point is an architecture from NAS-Bench-201 trained on CIFAR10, and each edge denotes the LS function. We plotted the trees of the nine architectures with the lowest test losses. The right figure is similar, but the architectures are assigned validation losses at random. We see that we are much more likely to converge to an architecture with low loss on structured data (CIFAR10) rather than unstructured (random) data.
15 |
16 | ## Requirements
17 | This repo is our fork of [naszilla/bananas](https://github.com/naszilla/bananas/). The requirements are as follows.
18 | - jupyter
19 | - tensorflow == 1.14.0
20 | - nasbench (follow the installation instructions [here](https://github.com/google-research/nasbench))
21 | - nas-bench-201 (follow the installation instructions [here](https://github.com/D-X-Y/NAS-Bench-201))
22 | - pytorch == 1.2.0, torchvision == 0.4.0 (used for experiments on the DARTS search space)
23 | - pybnn (used only for the DNGO baselien algorithm. Installation instructions [here](https://github.com/automl/pybnn))
24 |
25 | If you run experiments on DARTS, you will need the naszilla fork of the darts repo:
26 | - Download the repo: https://github.com/naszilla/darts
27 |
28 | ## Run an experiment on nas-bench-101 or nas-bench-201
29 |
30 | To run an experiment on nas-bench-101, run
31 | ```
32 | python run_experiments_sequential.py
33 | ```
34 | To run with nas-bench-201, add the flag `--search_space nasbench_201_cifar10` to the above command with cifar10, cifar100, or imagenet.
35 |
36 | ## Run an experiment on DARTS
37 | To run an experiment on DARTS, run
38 | ```
39 | bash darts/run_experiments.sh
40 | ```
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 | ## Citation
53 | Please cite [our paper](https://arxiv.org/abs/2005.02960) if you use code from this repo:
54 | ```
55 | @article{white2020local,
56 | title={Local Search is State of the Art for Neural Architecture Search Benchmarks},
57 | author={White, Colin and Nolen, Sam and Savani, Yash},
58 | journal={arXiv preprint arXiv:2005.02960},
59 | year={2020}
60 | }
61 | ```
62 |
--------------------------------------------------------------------------------
/acquisition_functions.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import sys
3 |
4 | # Different acquisition functions that can be used by BANANAS
5 | def acq_fn(predictions, explore_type='its'):
6 | predictions = np.array(predictions)
7 |
8 | # Upper confidence bound (UCB) acquisition function
9 | if explore_type == 'ucb':
10 | explore_factor = 0.5
11 | mean = np.mean(predictions, axis=0)
12 | std = np.sqrt(np.var(predictions, axis=0))
13 | ucb = mean - explore_factor * std
14 | sorted_indices = np.argsort(ucb)
15 |
16 | # Expected improvement (EI) acquisition function
17 | elif explore_type == 'ei':
18 | ei_calibration_factor = 5.
19 | mean = list(np.mean(predictions, axis=0))
20 | std = list(np.sqrt(np.var(predictions, axis=0)) /
21 | ei_calibration_factor)
22 |
23 | min_y = ytrain.min()
24 | gam = [(min_y - mean[i]) / std[i] for i in range(len(mean))]
25 | ei = [-1 * std[i] * (gam[i] * norm.cdf(gam[i]) + norm.pdf(gam[i]))
26 | for i in range(len(mean))]
27 | sorted_indices = np.argsort(ei)
28 |
29 | # Probability of improvement (PI) acquisition function
30 | elif explore_type == 'pi':
31 | mean = list(np.mean(predictions, axis=0))
32 | std = list(np.sqrt(np.var(predictions, axis=0)))
33 | min_y = ytrain.min()
34 | pi = [-1 * norm.cdf(min_y, loc=mean[i], scale=std[i]) for i in range(len(mean))]
35 | sorted_indices = np.argsort(pi)
36 |
37 | # Thompson sampling (TS) acquisition function
38 | elif explore_type == 'ts':
39 | rand_ind = np.random.randint(predictions.shape[0])
40 | ts = predictions[rand_ind,:]
41 | sorted_indices = np.argsort(ts)
42 |
43 | # Top exploitation
44 | elif explore_type == 'percentile':
45 | min_prediction = np.min(predictions, axis=0)
46 | sorted_indices = np.argsort(min_prediction)
47 |
48 | # Top mean
49 | elif explore_type == 'mean':
50 | mean = np.mean(predictions, axis=0)
51 | sorted_indices = np.argsort(mean)
52 |
53 | elif explore_type == 'confidence':
54 | confidence_factor = 2
55 | mean = np.mean(predictions, axis=0)
56 | std = np.sqrt(np.var(predictions, axis=0))
57 | conf = mean + confidence_factor * std
58 | sorted_indices = np.argsort(conf)
59 |
60 | # Independent Thompson sampling (ITS) acquisition function
61 | elif explore_type == 'its':
62 | mean = np.mean(predictions, axis=0)
63 | std = np.sqrt(np.var(predictions, axis=0))
64 | samples = np.random.normal(mean, std)
65 | sorted_indices = np.argsort(samples)
66 |
67 | else:
68 | print('Invalid exploration type in meta neuralnet search', explore_type)
69 | sys.exit()
70 |
71 | return sorted_indices
--------------------------------------------------------------------------------
/bo/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for running Bayesian Optimization (BO) in NASzilla.
3 | """
4 |
--------------------------------------------------------------------------------
/bo/acq/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for acquisition strategies.
3 | """
4 |
--------------------------------------------------------------------------------
/bo/acq/acqmap.py:
--------------------------------------------------------------------------------
1 | """
2 | Classes to manage acqmap (acquisition maps from xin to acquisition value).
3 | """
4 |
5 | from argparse import Namespace
6 | import numpy as np
7 | import copy
8 | from bo.acq.acquisition import Acquisitioner
9 | from bo.util.datatransform import DataTransformer
10 | #from bo.pp.pp_gp_george import GeorgeGpPP
11 | #from bo.pp.pp_gp_stan import StanGpPP
12 | from bo.pp.pp_gp_my_distmat import MyGpDistmatPP
13 |
14 | class AcqMapper(object):
15 | """ Class to manage acqmap (acquisition map). """
16 |
17 | def __init__(self, data, amp, print_flag=True):
18 | """ Constructor
19 | Parameters:
20 | amp - Namespace of acqmap params
21 | print_flag - True or False
22 | """
23 | self.data = data
24 | self.set_am_params(amp)
25 | #self.setup_acqmap()
26 | if print_flag: self.print_str()
27 |
28 | def set_am_params(self, amp):
29 | """ Set the acqmap params.
30 | Inputs:
31 | amp - Namespace of acqmap parameters """
32 | self.amp = amp
33 |
34 | def get_acqmap(self, xin_is_list=True):
35 | """ Return acqmap.
36 | Inputs: xin_is_list True if input to acqmap is a list of xin """
37 | # Potentially do acqmap setup here. Could include inference,
38 | # cachining/computing quantities, instantiating objects used in acqmap
39 | # definition. This becomes important when we do sequential opt of acqmaps.
40 | return self.acqmap_list if xin_is_list else self.acqmap_single
41 |
42 | def acqmap_list(self, xin_list):
43 | """ Acqmap defined on a list of xin. """
44 |
45 | def get_trans_data():
46 | """ Returns transformed data. """
47 | dt = DataTransformer(self.data.y, False)
48 | return Namespace(X=self.data.X, y=dt.transform_data(self.data.y))
49 |
50 | def apply_acq_to_pmlist(pmlist, acq_str, trans_data):
51 | """ Apply acquisition to pmlist. """
52 | acqp = Namespace(acq_str=acq_str, pmout_str='sample')
53 | acq = Acquisitioner(trans_data, acqp, False)
54 | acqfn = acq.acq_method
55 | return [acqfn(p) for p in pmlist]
56 |
57 | def georgegp_acqmap(acq_str):
58 | """ Acqmaps for GeorgeGpPP """
59 | trans_data = get_trans_data()
60 | pp = GeorgeGpPP(trans_data, self.amp.modelp, False)
61 | pmlist = pp.sample_pp_pred(self.amp.nppred, xin_list) if acq_str=='ts' \
62 | else pp.sample_pp_post_pred(self.amp.nppred, xin_list)
63 | return apply_acq_to_pmlist(pmlist, acq_str, trans_data)
64 |
65 | def stangp_acqmap(acq_str):
66 | """ Acqmaps for StanGpPP """
67 | trans_data = get_trans_data()
68 | pp = StanGpPP(trans_data, self.amp.modelp, False)
69 | pp.infer_post_and_update_samples(print_result=True)
70 | pmlist, _ = pp.sample_pp_pred(self.amp.nppred, xin_list) if acq_str=='ts' \
71 | else pp.sample_pp_post_pred(self.amp.nppred, xin_list, full_cov=True, \
72 | nloop=np.min([50,self.amp.nppred]))
73 | return apply_acq_to_pmlist(pmlist, acq_str, trans_data)
74 |
75 | def mygpdistmat_acqmap(acq_str):
76 | """ Acqmaps for MyGpDistmatPP """
77 | trans_data = get_trans_data()
78 | pp = MyGpDistmatPP(trans_data, self.amp.modelp, False)
79 | pp.infer_post_and_update_samples(print_result=True)
80 | pmlist, _ = pp.sample_pp_pred(self.amp.nppred, xin_list) if acq_str=='ts' \
81 | else pp.sample_pp_post_pred(self.amp.nppred, xin_list, full_cov=True)
82 | return apply_acq_to_pmlist(pmlist, acq_str, trans_data)
83 |
84 | # Mapping of am_str to acqmap
85 | if self.amp.am_str=='georgegp_ei':
86 | return georgegp_acqmap('ei')
87 | elif self.amp.am_str=='georgegp_pi':
88 | return georgegp_acqmap('pi')
89 | elif self.amp.am_str=='georgegp_ucb':
90 | return georgegp_acqmap('ucb')
91 | elif self.amp.am_str=='georgegp_ts':
92 | return georgegp_acqmap('ts')
93 | elif self.amp.am_str=='stangp_ei':
94 | return stangp_acqmap('ei')
95 | elif self.amp.am_str=='stangp_pi':
96 | return stangp_acqmap('pi')
97 | elif self.amp.am_str=='stangp_ucb':
98 | return stangp_acqmap('ucb')
99 | elif self.amp.am_str=='stangp_ts':
100 | return stangp_acqmap('ts')
101 | elif self.amp.am_str=='mygpdistmat_ei':
102 | return mygpdistmat_acqmap('ei')
103 | elif self.amp.am_str=='mygpdistmat_pi':
104 | return mygpdistmat_acqmap('pi')
105 | elif self.amp.am_str=='mygpdistmat_ucb':
106 | return mygpdistmat_acqmap('ucb')
107 | elif self.amp.am_str=='mygpdistmat_ts':
108 | return mygpdistmat_acqmap('ts')
109 | elif self.amp.am_str=='null':
110 | return [0. for xin in xin_list]
111 |
112 | def acqmap_single(self, xin):
113 | """ Acqmap defined on a single xin. Returns acqmap(xin) value, not list. """
114 | return self.acqmap_list([xin])[0]
115 |
116 | def print_str(self):
117 | """ Print a description string """
118 | print('*AcqMapper with amp='+str(self.amp)
119 | +'.\n-----')
120 |
--------------------------------------------------------------------------------
/bo/acq/acqopt.py:
--------------------------------------------------------------------------------
1 | """
2 | Classes to perform acquisition function optimization.
3 | """
4 |
5 | from argparse import Namespace
6 | import numpy as np
7 |
8 | class AcqOptimizer(object):
9 | """ Class to perform acquisition function optimization """
10 |
11 | def __init__(self, optp=None, print_flag=True):
12 | """ Constructor
13 | Inputs:
14 | optp - Namespace of opt parameters
15 | print_flag - True or False
16 | """
17 | self.set_opt_params(optp)
18 | if print_flag: self.print_str()
19 |
20 | def set_opt_params(self, optp):
21 | """ Set the optimizer params.
22 | Inputs:
23 | acqp - Namespace of acquisition parameters """
24 | if optp is None:
25 | optp = Namespace(opt_str='rand', max_iter=1000)
26 | self.optp = optp
27 |
28 | def optimize(self, dom, am):
29 | """ Optimize acqfn(probmap(x)) over x in domain """
30 | if self.optp.opt_str=='rand':
31 | return self.optimize_rand(dom, am)
32 |
33 | def optimize_rand(self, dom, am):
34 | """ Optimize acqmap(x) over domain via random search """
35 | xin_list = dom.unif_rand_sample(self.optp.max_iter)
36 | amlist = am.acqmap_list(xin_list)
37 | return xin_list[np.argmin(amlist)]
38 |
39 | # Utilities
40 | def print_str(self):
41 | """ print a description string """
42 | print('*AcqOptimizer with optp='+str(self.optp)
43 | +'.\n-----')
44 |
--------------------------------------------------------------------------------
/bo/acq/acquisition.py:
--------------------------------------------------------------------------------
1 | """
2 | Classes to manage acquisition functions.
3 | """
4 |
5 | from argparse import Namespace
6 | import numpy as np
7 | from scipy.stats import norm
8 |
9 | class Acquisitioner(object):
10 | """ Class to manage acquisition functions """
11 |
12 | def __init__(self, data, acqp=None, print_flag=True):
13 | """ Constructor
14 | Parameters:
15 | acqp - Namespace of acquisition parameters
16 | print_flag - True or False
17 | """
18 | self.data = data
19 | self.set_acq_params(acqp)
20 | self.set_acq_method()
21 | if print_flag: self.print_str()
22 |
23 | def set_acq_params(self, acqp):
24 | """ Set the acquisition params.
25 | Parameters:
26 | acqp - Namespace of acquisition parameters """
27 | if acqp is None:
28 | acqp = Namespace(acq_str='ei', pmout_str='sample')
29 | self.acqp = acqp
30 |
31 | def set_acq_method(self):
32 | """ Set the acquisition method """
33 | if self.acqp.acq_str=='ei': self.acq_method = self.ei
34 | if self.acqp.acq_str=='pi': self.acq_method = self.pi
35 | if self.acqp.acq_str=='ts': self.acq_method = self.ts
36 | if self.acqp.acq_str=='ucb': self.acq_method = self.ucb
37 | if self.acqp.acq_str=='rand': self.acq_method = self.rand
38 | if self.acqp.acq_str=='null': self.acq_method = self.null
39 | #if self.acqp.acqStr=='map': return self.map
40 |
41 | def ei(self, pmout):
42 | """ Expected improvement (EI) """
43 | if self.acqp.pmout_str=='sample':
44 | return self.bbacq_ei(pmout)
45 |
46 | def pi(self, pmout):
47 | """ Probability of improvement (PI) """
48 | if self.acqp.pmout_str=='sample':
49 | return self.bbacq_pi(pmout)
50 |
51 | def ucb(self, pmout):
52 | """ Upper (lower) confidence bound (UCB) """
53 | if self.acqp.pmout_str=='sample':
54 | return self.bbacq_ucb(pmout)
55 |
56 | def ts(self, pmout):
57 | """ Thompson sampling (TS) """
58 | if self.acqp.pmout_str=='sample':
59 | return self.bbacq_ts(pmout)
60 |
61 | def rand(self, pmout):
62 | """ Uniform random sampling """
63 | return np.random.random()
64 |
65 | def null(self, pmout):
66 | """ Return constant 0. """
67 | return 0.
68 |
69 | # Black Box Acquisition Functions
70 | def bbacq_ei(self, pmout_samp, normal=False):
71 | """ Black box acquisition: BB-EI
72 | Input: pmout_samp: post-pred samples - np array (shape=(nsamp,1))
73 | Returns: EI acq value """
74 | youts = np.array(pmout_samp).flatten()
75 | nsamp = youts.shape[0]
76 | if normal:
77 | mu = np.mean(youts)
78 | sig = np.std(youts)
79 | gam = (self.data.y.min() - mu) / sig
80 | eiVal = -1*sig*(gam*norm.cdf(gam) + norm.pdf(gam))
81 | else:
82 | diffs = self.data.y.min() - youts
83 | ind_below_min = np.argwhere(diffs>0)
84 | eiVal = -1*np.sum(diffs[ind_below_min])/float(nsamp) if \
85 | len(ind_below_min)>0 else 0
86 | return eiVal
87 |
88 | def bbacq_pi(self, pmout_samp, normal=False):
89 | """ Black box acquisition: BB-PI
90 | Input: pmout_samp: post-pred samples - np array (shape=(nsamp,1))
91 | Returns: PI acq value """
92 | youts = np.array(pmout_samp).flatten()
93 | nsamp = youts.shape[0]
94 | if normal:
95 | mu = np.mean(youts)
96 | sig = np.sqrt(np.var(youts))
97 | piVal = -1*norm.cdf(self.data.y.min(),loc=mu,scale=sig)
98 | else:
99 | piVal = -1*len(np.argwhere(youts=self.domp.min_max[i][0] and
36 | pt[i]<=self.domp.min_max[i][1] for i in range(self.ndimx)]
37 | ret=False if False in bool_list else True
38 | return ret
39 |
40 | def unif_rand_sample(self, n=1):
41 | """ Draws a sample uniformly at random from domain """
42 | li = [np.random.uniform(mm[0], mm[1], n) for mm in self.domp.min_max]
43 | return np.array(li).T
44 |
45 | def print_str(self):
46 | """ Print a description string """
47 | print('*RealDomain with domp = ' + str(self.domp) + '.')
48 | print('-----')
49 |
--------------------------------------------------------------------------------
/bo/ds/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for makept (serializing and subprocesses) strategy.
3 | """
4 |
--------------------------------------------------------------------------------
/bo/ds/makept.py:
--------------------------------------------------------------------------------
1 | """
2 | Make a point in a domain, and serialize it.
3 | """
4 |
5 | import sys
6 | import os
7 | sys.path.append(os.path.expanduser('./'))
8 | from argparse import Namespace, ArgumentParser
9 | import pickle
10 | import time
11 | import numpy as np
12 | from bo.dom.real import RealDomain
13 | from bo.dom.list import ListDomain
14 | from bo.acq.acqmap import AcqMapper
15 | from bo.acq.acqopt import AcqOptimizer
16 |
17 | def main(args, search_space, printinfo=False):
18 | starttime = time.time()
19 |
20 | # Load config and data
21 | makerp = pickle.load(open(args.configpkl, 'rb'))
22 | data = pickle.load(open(args.datapkl, 'rb'))
23 |
24 | if hasattr(args, 'mode') and args.mode == 'single_process':
25 | makerp.domp.mode = args.mode
26 | makerp.domp.iteridx = args.iteridx
27 | makerp.amp.modelp.mode = args.mode
28 | else:
29 | np.random.seed(args.seed)
30 | # Instantiate Domain, AcqMapper, AcqOptimizer
31 | dom = get_domain(makerp.domp, search_space)
32 | am = AcqMapper(data, makerp.amp, False)
33 | ao = AcqOptimizer(makerp.optp, False)
34 | # Optimize over domain to get nextpt
35 | nextpt = ao.optimize(dom, am)
36 | # Serialize nextpt
37 | with open(args.nextptpkl, 'wb') as f:
38 | pickle.dump(nextpt, f)
39 | # Print
40 | itertime = time.time()-starttime
41 | if printinfo: print_info(nextpt, itertime, args.nextptpkl)
42 |
43 | def get_domain(domp, search_space):
44 | """ Return Domain object. """
45 | if not hasattr(domp, 'dom_str'):
46 | domp.dom_str = 'real'
47 | if domp.dom_str=='real':
48 | return RealDomain(domp, False)
49 | elif domp.dom_str=='list':
50 | return ListDomain(search_space, domp, False)
51 |
52 | def print_info(nextpt, itertime, nextptpkl):
53 | print('*Found nextpt = ' + str(nextpt) + '.')
54 | print('*Saved nextpt as ' + nextptpkl + '.')
55 | print('*Timing: makept took ' + str(itertime) + ' seconds.')
56 | print('-----')
57 |
58 | if __name__ == "__main__":
59 | parser = ArgumentParser(description='Args for a single instance of acquisition optimization.')
60 | parser.add_argument('--seed', dest='seed', type=int, default=1111)
61 | parser.add_argument('--configpkl', dest='configpkl', type=str, default='config.pkl')
62 | parser.add_argument('--datapkl', dest='datapkl', type=str, default='data.pkl')
63 | parser.add_argument('--nextptpkl', dest='nextptpkl', type=str, default='nextpt.pkl')
64 | args = parser.parse_args()
65 | main(args, printinfo=False)
66 |
--------------------------------------------------------------------------------
/bo/fn/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for synthetic functions to query (perform experiment on).
3 | """
4 |
--------------------------------------------------------------------------------
/bo/fn/functionhandler.py:
--------------------------------------------------------------------------------
1 | """
2 | Classes to handle functions.
3 | """
4 |
5 | from argparse import Namespace
6 | import numpy as np
7 |
8 | def get_fh(fn, data=None, fhp=None, print_flag=True):
9 | """ Returns a function handler object """
10 | if fhp is None:
11 | fhp=Namespace(fhstr='basic', namestr='noname')
12 | # Return FH object
13 | if fhp.fhstr=='basic':
14 | return BasicFH(fn, data, fhp, print_flag)
15 | elif fhp.fhstr=='extrainfo':
16 | return ExtraInfoFH(fn, data, fhp, print_flag)
17 | elif fhp.fhstr=='nannn':
18 | return NanNNFH(fn, data, fhp, print_flag)
19 | elif fhp.fhstr=='replacenannn':
20 | return ReplaceNanNNFH(fn, data, fhp, print_flag)
21 | elif fhp.fhstr=='object':
22 | return ObjectFH(fn, data, fhp, print_flag)
23 |
24 |
25 | class BasicFH(object):
26 | """ Class to handle basic functions, which map from an array xin to a real
27 | value yout. """
28 |
29 | def __init__(self, fn, data=None, fhp=None, print_flag=True):
30 | """ Constructor.
31 | Inputs:
32 | pmp - Namespace of probmap params
33 | print_flag - True or False
34 | """
35 | self.fn = fn
36 | self.data = data
37 | self.fhp = fhp
38 | if print_flag: self.print_str()
39 |
40 | def call_fn_and_add_data(self, xin):
41 | """ Call self.fn(xin), and update self.data """
42 | yout = self.fn(xin)
43 | print('new datapoint score', yout)
44 | self.add_data_single(xin, yout)
45 |
46 | def add_data_single(self, xin, yout):
47 | """ Update self.data with a single xin yout pair.
48 | Inputs:
49 | xin: np.array size=(1, -1)
50 | yout: np.array size=(1, 1) """
51 | xin = np.array(xin).reshape(1, -1)
52 | yout = np.array(yout).reshape(1, 1)
53 | newdata = Namespace(X=xin, y=yout)
54 | self.add_data(newdata)
55 |
56 | def add_data(self, newdata):
57 | """ Update self.data with newdata Namespace.
58 | Inputs:
59 | newdata: Namespace with fields X and y """
60 | if self.data is None:
61 | self.data = newdata
62 | else:
63 | self.data.X = np.concatenate((self.data.X, newdata.X), 0)
64 | self.data.y = np.concatenate((self.data.y, newdata.y), 0)
65 |
66 | def print_str(self):
67 | """ Print a description string. """
68 | print('*BasicFH with fhp='+str(self.fhp)
69 | +'.\n-----')
70 |
71 |
72 | class ExtraInfoFH(BasicFH):
73 | """ Class to handle functions that map from an array xin to a real
74 | value yout, but also return extra info """
75 |
76 | def __init__(self, fn, data=None, fhp=None, print_flag=True):
77 | """ Constructor.
78 | Inputs:
79 | pmp - Namespace of probmap params
80 | print_flag - True or False
81 | """
82 | super(ExtraInfoFH, self).__init__(fn, data, fhp, False)
83 | self.extrainfo = []
84 | if print_flag: self.print_str()
85 |
86 | def call_fn_and_add_data(self, xin):
87 | """ Call self.fn(xin), and update self.data """
88 | yout, exinf = self.fn(xin)
89 | self.add_data_single(xin, yout)
90 | self.extrainfo.append(exinf)
91 |
92 | def print_str(self):
93 | """ Print a description string. """
94 | print('*ExtraInfoFH with fhp='+str(self.fhp)
95 | +'.\n-----')
96 |
97 |
98 | class NanNNFH(BasicFH):
99 | """ Class to handle NN functions that map from an array xin to either
100 | a real value yout or np.NaN, but also return extra info """
101 |
102 | def __init__(self, fn, data=None, fhp=None, print_flag=True):
103 | """ Constructor.
104 | Inputs:
105 | pmp - Namespace of probmap params
106 | print_flag - True or False
107 | """
108 | super(NanNNFH, self).__init__(fn, data, fhp, False)
109 | self.extrainfo = []
110 | if print_flag: self.print_str()
111 |
112 | def call_fn_and_add_data(self, xin):
113 | """ Call self.fn(xin), and update self.data """
114 | timethresh = 60.
115 | yout, walltime = self.fn(xin)
116 | if walltime > timethresh:
117 | self.add_data_single_nan(xin)
118 | else:
119 | self.add_data_single(xin, yout)
120 | self.possibly_init_xnan()
121 | exinf = Namespace(xin=xin, yout=yout, walltime=walltime)
122 | self.extrainfo.append(exinf)
123 |
124 | def add_data_single_nan(self, xin):
125 | """ Update self.data.X_nan with a single xin.
126 | Inputs:
127 | xin: np.array size=(1, -1) """
128 | xin = xin.reshape(1,-1)
129 | newdata = Namespace(X = np.ones((0, xin.shape[1])),
130 | y = np.ones((0, 1)),
131 | X_nan = xin)
132 | self.add_data_nan(newdata)
133 |
134 | def add_data_nan(self, newdata):
135 | """ Update self.data with newdata Namespace.
136 | Inputs:
137 | newdata: Namespace with fields X, y, X_nan """
138 | if self.data is None:
139 | self.data = newdata
140 | else:
141 | self.data.X_nan = np.concatenate((self.data.X_nan, newdata.X_nan), 0)
142 |
143 | def possibly_init_xnan(self):
144 | """ If self.data doesn't have X_nan, then create it. """
145 | if not hasattr(self.data, 'X_nan'):
146 | self.data.X_nan = np.ones((0, self.data.X.shape[1]))
147 |
148 | def print_str(self):
149 | """ Print a description string. """
150 | print('*NanNNFH with fhp='+str(self.fhp)
151 | +'.\n-----')
152 |
153 |
154 | class ReplaceNanNNFH(BasicFH):
155 | """ Class to handle NN functions that map from an array xin to either
156 | a real value yout or np.NaN. If np.NaN, we replace it with a large
157 | positive value. We also return extra info """
158 |
159 | def __init__(self, fn, data=None, fhp=None, print_flag=True):
160 | """ Constructor.
161 | Inputs:
162 | pmp - Namespace of probmap params
163 | print_flag - True or False
164 | """
165 | super(ReplaceNanNNFH, self).__init__(fn, data, fhp, False)
166 | self.extrainfo = []
167 | if print_flag: self.print_str()
168 |
169 | def call_fn_and_add_data(self, xin):
170 | """ Call self.fn(xin), and update self.data """
171 | timethresh = 60.
172 | replace_nan_val = 5.
173 | yout, walltime = self.fn(xin)
174 | if walltime > timethresh:
175 | yout = replace_nan_val
176 | self.add_data_single(xin, yout)
177 | exinf = Namespace(xin=xin, yout=yout, walltime=walltime)
178 | self.extrainfo.append(exinf)
179 |
180 | def print_str(self):
181 | """ Print a description string. """
182 | print('*ReplaceNanNNFH with fhp='+str(self.fhp)
183 | +'.\n-----')
184 |
185 |
186 | class ObjectFH(object):
187 | """ Class to handle basic functions, which map from some object xin to a real
188 | value yout. """
189 |
190 | def __init__(self, fn, data=None, fhp=None, print_flag=True):
191 | """ Constructor.
192 | Inputs:
193 | pmp - Namespace of probmap params
194 | print_flag - True or False
195 | """
196 | self.fn = fn
197 | self.data = data
198 | self.fhp = fhp
199 | if print_flag: self.print_str()
200 |
201 | def call_fn_and_add_data(self, xin):
202 | """ Call self.fn(xin), and update self.data """
203 | yout = self.fn(xin)
204 | self.add_data_single(xin, yout)
205 |
206 | def add_data_single(self, xin, yout):
207 | """ Update self.data with a single xin yout pair. """
208 | newdata = Namespace(X=[xin], y=np.array(yout).reshape(1, 1))
209 | self.add_data(newdata)
210 |
211 | def add_data(self, newdata):
212 | """ Update self.data with newdata Namespace.
213 | Inputs:
214 | newdata: Namespace with fields X and y """
215 | if self.data is None:
216 | self.data = newdata
217 | else:
218 | self.data.X.extend(newdata.X)
219 | self.data.y = np.concatenate((self.data.y, newdata.y), 0)
220 |
221 | def print_str(self):
222 | """ Print a description string. """
223 | print('*ObjectFH with fhp='+str(self.fhp)
224 | +'.\n-----')
225 |
--------------------------------------------------------------------------------
/bo/pp/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for defining and running probabilistic programs.
3 | """
4 |
--------------------------------------------------------------------------------
/bo/pp/gp/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for Gaussian process (GP) utilities and functions.
3 | """
4 |
--------------------------------------------------------------------------------
/bo/pp/gp/gp_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Utilities for Gaussian process (GP) inference
3 | """
4 |
5 | import numpy as np
6 | from scipy.linalg import solve_triangular
7 | from scipy.spatial.distance import cdist
8 | #import GPy as gpy
9 |
10 |
11 | def kern_gibbscontext(xmatcon1, xmatcon2, xmatact1, xmatact2, theta, alpha,
12 | lscon, whichlsfn=1):
13 | """ Gibbs kernel (ls_fn of context only) """
14 | actdim = xmatact1.shape[1]
15 | lsarr1 = ls_fn(xmatcon1, theta, whichlsfn).flatten()
16 | lsarr2 = ls_fn(xmatcon2, theta, whichlsfn).flatten()
17 | sum_sq_ls = np.add.outer(lsarr1, lsarr2)
18 | inexp = -1. * np.divide(cdist(xmatact1, xmatact2, 'sqeuclidean'), sum_sq_ls)
19 | prod_ls = np.outer(lsarr1, lsarr2)
20 | #coef = np.power(np.divide(2*prod_ls, sum_sq_ls), actdim/2.) # Correct
21 | coef = 1.
22 | kern_gibbscontext_only_ns = np.multiply(coef, np.exp(inexp))
23 | kern_expquad_ns = kern_exp_quad_noscale(xmatcon1, xmatcon2, lscon)
24 | return alpha**2 * np.multiply(kern_gibbscontext_only_ns, kern_expquad_ns)
25 |
26 | def kern_gibbs1d(xmat1, xmat2, theta, alpha):
27 | """ Gibbs kernel in 1d """
28 | lsarr1 = ls_fn(xmat1, theta).flatten()
29 | lsarr2 = ls_fn(xmat2, theta).flatten()
30 | sum_sq_ls = np.add.outer(lsarr1, lsarr2)
31 | prod_ls = np.outer(lsarr1, lsarr2) #TODO product of this for each dim
32 | coef = np.sqrt(np.divide(2*prod_ls, sum_sq_ls))
33 | inexp = cdist(xmat1, xmat2, 'sqeuclidean') / sum_sq_ls #TODO sum of this for each dim
34 | return alpha**2 * coef * np.exp(-1 * inexp)
35 |
36 | def ls_fn(xmat, theta, whichlsfn=1):
37 | theta = np.array(theta).reshape(-1,1)
38 | if theta.shape[0]==2:
39 | if whichlsfn==1 or whichlsfn==2:
40 | return np.log(1 + np.exp(theta[0][0] + np.matmul(xmat,theta[1]))) # softplus transform
41 | elif whichlsfn==3:
42 | return np.exp(theta[0][0] + np.matmul(xmat,theta[1])) # exp transform
43 | elif theta.shape[0]==3:
44 | if whichlsfn==1:
45 | return np.log(1 + np.exp(theta[0][0] + np.matmul(xmat,theta[1]) +
46 | np.matmul(np.power(xmat,2),theta[2]))) # softplus transform
47 | elif whichlsfn==2:
48 | return np.log(1 + np.exp(theta[0][0] + np.matmul(xmat,theta[1]) +
49 | np.matmul(np.abs(xmat),theta[2]))) # softplus on abs transform
50 | elif whichlsfn==3:
51 | return np.exp(theta[0][0] + np.matmul(xmat,theta[1]) +
52 | np.matmul(np.power(xmat,2),theta[2])) # exp transform
53 | else:
54 | print('ERROR: theta parameter is incorrect.')
55 |
56 | def kern_matern32(xmat1, xmat2, ls, alpha):
57 | """ Matern 3/2 kernel, currently using GPy """
58 | kern = gpy.kern.Matern32(input_dim=xmat1.shape[1], variance=alpha**2,
59 | lengthscale=ls)
60 | return kern.K(xmat1,xmat2)
61 |
62 | def kern_exp_quad(xmat1, xmat2, ls, alpha):
63 | """ Exponentiated quadratic kernel function aka squared exponential kernel
64 | aka RBF kernel """
65 | return alpha**2 * kern_exp_quad_noscale(xmat1, xmat2, ls)
66 |
67 | def kern_exp_quad_noscale(xmat1, xmat2, ls):
68 | """ Exponentiated quadratic kernel function aka squared exponential kernel
69 | aka RBF kernel, without scale parameter. """
70 | sq_norm = (-1/(2 * ls**2)) * cdist(xmat1, xmat2, 'sqeuclidean')
71 | return np.exp(sq_norm)
72 |
73 | def squared_euc_distmat(xmat1, xmat2, coef=1.):
74 | """ Distance matrix of squared euclidean distance (multiplied by coef)
75 | between points in xmat1 and xmat2. """
76 | return coef * cdist(xmat1, xmat2, 'sqeuclidean')
77 |
78 | def kern_distmat(xmat1, xmat2, ls, alpha, distfn):
79 | """ Kernel for a given distmat, via passed-in distfn (which is assumed to be
80 | fn of xmat1 and xmat2 only) """
81 | distmat = distfn(xmat1, xmat2)
82 | sq_norm = -distmat / ls**2
83 | return alpha**2 * np.exp(sq_norm)
84 |
85 | def get_cholesky_decomp(k11_nonoise, sigma, psd_str):
86 | """ Returns cholesky decomposition """
87 | if psd_str == 'try_first':
88 | k11 = k11_nonoise + sigma**2 * np.eye(k11_nonoise.shape[0])
89 | try:
90 | return stable_cholesky(k11, False)
91 | except np.linalg.linalg.LinAlgError:
92 | return get_cholesky_decomp(k11_nonoise, sigma, 'project_first')
93 | elif psd_str == 'project_first':
94 | k11_nonoise = project_symmetric_to_psd_cone(k11_nonoise)
95 | return get_cholesky_decomp(k11_nonoise, sigma, 'is_psd')
96 | elif psd_str == 'is_psd':
97 | k11 = k11_nonoise + sigma**2 * np.eye(k11_nonoise.shape[0])
98 | return stable_cholesky(k11)
99 |
100 | def stable_cholesky(mmat, make_psd=True):
101 | """ Returns a 'stable' cholesky decomposition of mmat """
102 | if mmat.size == 0:
103 | return mmat
104 | try:
105 | lmat = np.linalg.cholesky(mmat)
106 | except np.linalg.linalg.LinAlgError as e:
107 | if not make_psd:
108 | raise e
109 | diag_noise_power = -11
110 | max_mmat = np.diag(mmat).max()
111 | diag_noise = np.diag(mmat).max() * 1e-11
112 | break_loop = False
113 | while not break_loop:
114 | try:
115 | lmat = np.linalg.cholesky(mmat + ((10**diag_noise_power) * max_mmat) *
116 | np.eye(mmat.shape[0]))
117 | break_loop = True
118 | except np.linalg.linalg.LinAlgError:
119 | if diag_noise_power > -9:
120 | print('stable_cholesky failed with diag_noise_power=%d.'%(diag_noise_power))
121 | diag_noise_power += 1
122 | if diag_noise_power >= 5:
123 | print('***** stable_cholesky failed: added diag noise = %e'%(diag_noise))
124 | return lmat
125 |
126 | def project_symmetric_to_psd_cone(mmat, is_symmetric=True, epsilon=0):
127 | """ Project symmetric matrix mmat to the PSD cone """
128 | if is_symmetric:
129 | try:
130 | eigvals, eigvecs = np.linalg.eigh(mmat)
131 | except np.linalg.LinAlgError:
132 | print('LinAlgError encountered with np.eigh. Defaulting to eig.')
133 | eigvals, eigvecs = np.linalg.eig(mmat)
134 | eigvals = np.real(eigvals)
135 | eigvecs = np.real(eigvecs)
136 | else:
137 | eigvals, eigvecs = np.linalg.eig(mmat)
138 | clipped_eigvals = np.clip(eigvals, epsilon, np.inf)
139 | return (eigvecs * clipped_eigvals).dot(eigvecs.T)
140 |
141 | def solve_lower_triangular(amat, b):
142 | """ Solves amat*x=b when amat is lower triangular """
143 | return solve_triangular_base(amat, b, lower=True)
144 |
145 | def solve_upper_triangular(amat, b):
146 | """ Solves amat*x=b when amat is upper triangular """
147 | return solve_triangular_base(amat, b, lower=False)
148 |
149 | def solve_triangular_base(amat, b, lower):
150 | """ Solves amat*x=b when amat is a triangular matrix. """
151 | if amat.size == 0 and b.shape[0] == 0:
152 | return np.zeros((b.shape))
153 | else:
154 | return solve_triangular(amat, b, lower=lower)
155 |
156 | def sample_mvn(mu, covmat, nsamp):
157 | """ Sample from multivariate normal distribution with mean mu and covariance
158 | matrix covmat """
159 | mu = mu.reshape(-1,)
160 | ndim = len(mu)
161 | lmat = stable_cholesky(covmat)
162 | umat = np.random.normal(size=(ndim, nsamp))
163 | return lmat.dot(umat).T + mu
164 |
--------------------------------------------------------------------------------
/bo/pp/pp_core.py:
--------------------------------------------------------------------------------
1 | """
2 | Base classes for probabilistic programs.
3 | """
4 |
5 | import pickle
6 |
7 | class DiscPP(object):
8 | """ Parent class for discriminative probabilistic programs """
9 |
10 | def __init__(self):
11 | """ Constructor """
12 | self.sample_list = []
13 | if not hasattr(self,'data'):
14 | raise NotImplementedError('Implement var data in a child class')
15 | #if not hasattr(self,'ndimx'):
16 | #raise NotImplementedError('Implement var ndimx in a child class')
17 | #if not hasattr(self,'ndataInit'):
18 | #raise NotImplementedError('Implement var ndataInit in a child class')
19 |
20 | def infer_post_and_update_samples(self,nsamp):
21 | """ Run an inference algorithm (given self.data), draw samples from the
22 | posterior, and store in self.sample_list. """
23 | raise NotImplementedError('Implement method in a child class')
24 |
25 | def sample_pp_post_pred(self,nsamp,input_list):
26 | """ Sample nsamp times from PP posterior predictive, for each x-input in
27 | input_list """
28 | raise NotImplementedError('Implement method in a child class')
29 |
30 | def sample_pp_pred(self,nsamp,input_list,lv_list=None):
31 | """ Sample nsamp times from PP predictive for parameter lv, for each
32 | x-input in input_list. If lv is None, draw it uniformly at random
33 | from self.sample_list. """
34 | raise NotImplementedError('Implement method in a child class')
35 |
36 | def add_new_data(self,newData):
37 | """ Add data (newData) to self.data """
38 | raise NotImplementedError('Implement method in a child class')
39 |
40 | def get_namespace_to_save(self):
41 | """ Return namespace containing object info (to save to file) """
42 | raise NotImplementedError('Implement method in a child class')
43 |
44 | def save_namespace_to_file(self,fileStr,printFlag):
45 | """ Saves results from get_namespace_to_save in fileStr """
46 | ppNamespaceToSave = self.get_namespace_to_save()
47 | ff = open(fileStr,'wb')
48 | pickle.dump(ppNamespaceToSave,ff)
49 | ff.close()
50 | if printFlag:
51 | print('*Saved DiscPP Namespace in pickle file: ' +fileStr+'\n-----')
52 |
--------------------------------------------------------------------------------
/bo/pp/pp_gp_george.py:
--------------------------------------------------------------------------------
1 | """
2 | Classes for hierarchical GP models with George PP
3 | """
4 |
5 | from argparse import Namespace
6 | import numpy as np
7 | import scipy.optimize as spo
8 | import george
9 | import emcee
10 | from bo.pp.pp_core import DiscPP
11 |
12 | class GeorgeGpPP(DiscPP):
13 | """ Hierarchical GPs implemented with George """
14 |
15 | def __init__(self,data=None,modelp=None,printFlag=True):
16 | """ Constructor """
17 | self.set_data(data)
18 | self.set_model_params(modelp)
19 | self.ndimx = self.modelp.ndimx
20 | self.set_kernel()
21 | self.set_model()
22 | super(GeorgeGpPP,self).__init__()
23 | if printFlag:
24 | self.print_str()
25 |
26 | def set_data(self,data):
27 | if data is None:
28 | pass #TODO: handle case where there's no data
29 | self.data = data
30 |
31 | def set_model_params(self,modelp):
32 | if modelp is None:
33 | modelp = Namespace(ndimx=1, noiseVar=1e-3, kernLs=1.5, kernStr='mat',
34 | fitType='mle')
35 | self.modelp = modelp
36 |
37 | def set_kernel(self):
38 | """ Set kernel for GP """
39 | if self.modelp.kernStr=='mat':
40 | self.kernel = self.data.y.var() * \
41 | george.kernels.Matern52Kernel(self.modelp.kernLs, ndim=self.ndimx)
42 | if self.modelp.kernStr=='rbf': # NOTE: periodically produces errors
43 | self.kernel = self.data.y.var() * \
44 | george.kernels.ExpSquaredKernel(self.modelp.kernLs, ndim=self.ndimx)
45 |
46 | def set_model(self):
47 | """ Set GP regression model """
48 | self.model = self.get_model()
49 | self.model.compute(self.data.X)
50 | self.fit_hyperparams(printOut=False)
51 |
52 | def get_model(self):
53 | """ Returns GPRegression model """
54 | return george.GP(kernel=self.kernel,fit_mean=True)
55 |
56 | def fit_hyperparams(self,printOut=False):
57 | if self.modelp.fitType=='mle':
58 | spo.minimize(self.neg_log_like, self.model.get_parameter_vector(),
59 | jac=True)
60 | elif self.modelp.fitType=='bayes':
61 | self.nburnin = 200
62 | nsamp = 200
63 | nwalkers = 36
64 | gpdim = len(self.model)
65 | self.sampler = emcee.EnsembleSampler(nwalkers, gpdim, self.log_post)
66 | p0 = self.model.get_parameter_vector() + 1e-4*np.random.randn(nwalkers,
67 | gpdim)
68 | print 'Running burn-in.'
69 | p0, _, _ = self.sampler.run_mcmc(p0, self.nburnin)
70 | print 'Running main chain.'
71 | self.sampler.run_mcmc(p0, nsamp)
72 | if printOut:
73 | print 'Final GP hyperparam (in opt or MCMC chain):'
74 | print self.model.get_parameter_dict()
75 |
76 | def infer_post_and_update_samples(self):
77 | """ Update self.sample_list """
78 | self.sample_list = [None] #TODO: need to not-break ts fn in maker_bayesopt.py
79 |
80 | def sample_pp_post_pred(self,nsamp,input_list):
81 | """ Sample from posterior predictive of PP.
82 | Inputs:
83 | input_list - list of np arrays size=(-1,)
84 | Returns:
85 | list (len input_list) of np arrays (size=(nsamp,1))."""
86 | inputArray = np.array(input_list)
87 | if self.modelp.fitType=='mle':
88 | inputArray = np.array(input_list)
89 | ppredArray = self.model.sample_conditional(self.data.y.flatten(),
90 | inputArray, nsamp).T
91 | elif self.modelp.fitType=='bayes':
92 | ppredArray = np.zeros(shape=[len(input_list),nsamp])
93 | for s in range(nsamp):
94 | walkidx = np.random.randint(self.sampler.chain.shape[0])
95 | sampidx = np.random.randint(self.nburnin, self.sampler.chain.shape[1])
96 | hparamSamp = self.sampler.chain[walkidx, sampidx]
97 | print 'hparamSamp = ' + str(hparamSamp) # TODO: remove print statement
98 | self.model.set_parameter_vector(hparamSamp)
99 | ppredArray[:,s] = self.model.sample_conditional(self.data.y.flatten(),
100 | inputArray, 1).flatten()
101 | return list(ppredArray) # each element is row in ppredArray matrix
102 |
103 | def sample_pp_pred(self,nsamp,input_list,lv=None):
104 | """ Sample from predictive of PP for parameter lv.
105 | Returns: list (len input_list) of np arrays (size (nsamp,1))."""
106 | if self.modelp.fitType=='bayes':
107 | print('*WARNING: fitType=bayes not implemented for sample_pp_pred. \
108 | Reverting to fitType=mle')
109 | # TODO: Equivalent algo for fitType=='bayes':
110 | # - draw posterior sample path over all xin in input_list
111 | # - draw pred samples around sample path pt, based on noise model
112 | inputArray = np.array(input_list)
113 | samplePath = self.model.sample_conditional(self.data.y.flatten(),
114 | inputArray).reshape(-1,)
115 | return [np.random.normal(s,np.sqrt(self.modelp.noiseVar),nsamp).reshape(-1,)
116 | for s in samplePath]
117 |
118 | def neg_log_like(self,hparams):
119 | """ Compute and return the negative log likelihood for model
120 | hyperparameters hparams, as well as its gradient. """
121 | self.model.set_parameter_vector(hparams)
122 | g = self.model.grad_log_likelihood(self.data.y.flatten(), quiet=True)
123 | return -self.model.log_likelihood(self.data.y.flatten(), quiet=True), -g
124 |
125 | def log_post(self,hparams):
126 | """ Compute and return the log posterior density (up to constant of
127 | proportionality) for the model hyperparameters hparams. """
128 | # Uniform prior between -100 and 100, for each hyperparam
129 | if np.any((-100 > hparams[1:]) + (hparams[1:] > 100)):
130 | return -np.inf
131 | self.model.set_parameter_vector(hparams)
132 | return self.model.log_likelihood(self.data.y.flatten(), quiet=True)
133 |
134 | # Utilities
135 | def print_str(self):
136 | """ Print a description string """
137 | print '*GeorgeGpPP with modelp='+str(self.modelp)+'.'
138 | print '-----'
139 |
--------------------------------------------------------------------------------
/bo/pp/pp_gp_my_distmat.py:
--------------------------------------------------------------------------------
1 | """
2 | Classes for GP models without any PP backend, using a given distance matrix.
3 | """
4 |
5 | from argparse import Namespace
6 | import time
7 | import copy
8 | import numpy as np
9 | from scipy.spatial.distance import cdist
10 | from bo.pp.pp_core import DiscPP
11 | from bo.pp.gp.gp_utils import kern_exp_quad, kern_matern32, \
12 | get_cholesky_decomp, solve_upper_triangular, solve_lower_triangular, \
13 | sample_mvn, squared_euc_distmat, kern_distmat
14 | from bo.util.print_utils import suppress_stdout_stderr
15 |
16 |
17 | class MyGpDistmatPP(DiscPP):
18 | """ GPs using a kernel specified by a given distance matrix, without any PP
19 | backend """
20 |
21 | def __init__(self, data=None, modelp=None, printFlag=True):
22 | """ Constructor """
23 | self.set_model_params(modelp)
24 | self.set_data(data)
25 | self.set_model()
26 | super(MyGpDistmatPP,self).__init__()
27 | if printFlag:
28 | self.print_str()
29 |
30 | def set_model_params(self, modelp):
31 | """ Set self.modelp """
32 | if modelp is None:
33 | pass #TODO
34 | self.modelp = modelp
35 |
36 | def set_data(self, data):
37 | """ Set self.data """
38 | if data is None:
39 | pass #TODO
40 | self.data_init = copy.deepcopy(data)
41 | self.data = copy.deepcopy(self.data_init)
42 |
43 | def set_model(self):
44 | """ Set GP regression model """
45 | self.model = self.get_model()
46 |
47 | def get_model(self):
48 | """ Returns model object """
49 | return None
50 |
51 | def infer_post_and_update_samples(self, print_result=False):
52 | """ Update self.sample_list """
53 | self.sample_list = [Namespace(ls=self.modelp.kernp.ls,
54 | alpha=self.modelp.kernp.alpha,
55 | sigma=self.modelp.kernp.sigma)]
56 | if print_result: self.print_inference_result()
57 |
58 | def get_distmat(self, xmat1, xmat2):
59 | """ Get distance matrix """
60 | #return squared_euc_distmat(xmat1, xmat2, .5)
61 |
62 | from data import Data
63 | self.distmat = Data.generate_distance_matrix
64 | #print('distmat')
65 | #print(self.distmat(xmat1, xmat2, self.modelp.distance))
66 | return self.distmat(xmat1, xmat2, self.modelp.distance)
67 |
68 | def print_inference_result(self):
69 | """ Print results of stan inference """
70 | print('*ls pt est = '+str(self.sample_list[0].ls)+'.')
71 | print('*alpha pt est = '+str(self.sample_list[0].alpha)+'.')
72 | print('*sigma pt est = '+str(self.sample_list[0].sigma)+'.')
73 | print('-----')
74 |
75 | def sample_pp_post_pred(self, nsamp, input_list, full_cov=False):
76 | """ Sample from posterior predictive of PP.
77 | Inputs:
78 | input_list - list of np arrays size=(-1,)
79 | Returns:
80 | list (len input_list) of np arrays (size=(nsamp,1))."""
81 | samp = self.sample_list[0]
82 | postmu, postcov = self.gp_post(self.data.X, self.data.y, input_list,
83 | samp.ls, samp.alpha, samp.sigma, full_cov)
84 | if full_cov:
85 | ppred_list = list(sample_mvn(postmu, postcov, nsamp))
86 | else:
87 | ppred_list = list(np.random.normal(postmu.reshape(-1,),
88 | postcov.reshape(-1,),
89 | size=(nsamp, len(input_list))))
90 | return list(np.stack(ppred_list).T), ppred_list
91 |
92 | def sample_pp_pred(self, nsamp, input_list, lv=None):
93 | """ Sample from predictive of PP for parameter lv.
94 | Returns: list (len input_list) of np arrays (size (nsamp,1))."""
95 | if lv is None:
96 | lv = self.sample_list[0]
97 | postmu, postcov = self.gp_post(self.data.X, self.data.y, input_list, lv.ls,
98 | lv.alpha, lv.sigma)
99 | pred_list = list(sample_mvn(postmu, postcov, 1)) ###TODO: sample from this mean nsamp times
100 | return list(np.stack(pred_list).T), pred_list
101 |
102 | def gp_post(self, x_train_list, y_train_arr, x_pred_list, ls, alpha, sigma,
103 | full_cov=True):
104 | """ Compute parameters of GP posterior """
105 | kernel = lambda a, b, c, d: kern_distmat(a, b, c, d, self.get_distmat)
106 | k11_nonoise = kernel(x_train_list, x_train_list, ls, alpha)
107 | lmat = get_cholesky_decomp(k11_nonoise, sigma, 'try_first')
108 | smat = solve_upper_triangular(lmat.T, solve_lower_triangular(lmat,
109 | y_train_arr))
110 | k21 = kernel(x_pred_list, x_train_list, ls, alpha)
111 | mu2 = k21.dot(smat)
112 | k22 = kernel(x_pred_list, x_pred_list, ls, alpha)
113 | vmat = solve_lower_triangular(lmat, k21.T)
114 | k2 = k22 - vmat.T.dot(vmat)
115 | if full_cov is False:
116 | k2 = np.sqrt(np.diag(k2))
117 | return mu2, k2
118 |
119 | # Utilities
120 | def print_str(self):
121 | """ Print a description string """
122 | print('*MyGpDistmatPP with modelp='+str(self.modelp)+'.')
123 | print('-----')
124 |
--------------------------------------------------------------------------------
/bo/pp/pp_gp_stan.py:
--------------------------------------------------------------------------------
1 | """
2 | Classes for GP models with Stan
3 | """
4 |
5 | from argparse import Namespace
6 | import time
7 | import numpy as np
8 | import copy
9 | from bo.pp.pp_core import DiscPP
10 | import bo.pp.stan.gp_hier2 as gpstan2
11 | import bo.pp.stan.gp_hier3 as gpstan3
12 | import bo.pp.stan.gp_hier2_matern as gpstan2_matern
13 | from bo.pp.gp.gp_utils import kern_exp_quad, kern_matern32, \
14 | get_cholesky_decomp, solve_upper_triangular, solve_lower_triangular, \
15 | sample_mvn
16 | from bo.util.print_utils import suppress_stdout_stderr
17 |
18 | class StanGpPP(DiscPP):
19 | """ Hierarchical GPs implemented with Stan """
20 |
21 | def __init__(self, data=None, modelp=None, printFlag=True):
22 | """ Constructor """
23 | self.set_model_params(modelp)
24 | self.set_data(data)
25 | self.ndimx = self.modelp.ndimx
26 | self.set_model()
27 | super(StanGpPP,self).__init__()
28 | if printFlag:
29 | self.print_str()
30 |
31 | def set_model_params(self,modelp):
32 | if modelp is None:
33 | modelp = Namespace(ndimx=1, model_str='optfixedsig',
34 | gp_mean_transf_str='constant')
35 | if modelp.model_str=='optfixedsig':
36 | modelp.kernp = Namespace(u1=.1, u2=5., n1=10., n2=10., sigma=1e-5)
37 | modelp.infp = Namespace(niter=1000)
38 | elif modelp.model_str=='opt' or modelp.model_str=='optmatern32':
39 | modelp.kernp = Namespace(ig1=1., ig2=5., n1=10., n2=20., n3=.01,
40 | n4=.01)
41 | modelp.infp = Namespace(niter=1000)
42 | elif modelp.model_str=='samp' or modelp.model_str=='sampmatern32':
43 | modelp.kernp = Namespace(ig1=1., ig2=5., n1=10., n2=20., n3=.01,
44 | n4=.01)
45 | modelp.infp = Namespace(niter=1500, nwarmup=500)
46 | self.modelp = modelp
47 |
48 | def set_data(self, data):
49 | """ Set self.data """
50 | if data is None:
51 | pass #TODO: handle case where there's no data
52 | self.data_init = copy.deepcopy(data)
53 | self.data = self.get_transformed_data(self.data_init,
54 | self.modelp.gp_mean_transf_str)
55 |
56 | def get_transformed_data(self, data, transf_str='linear'):
57 | """ Transform data, for non-zero-mean GP """
58 | newdata = Namespace(X=data.X)
59 | if transf_str=='linear':
60 | mmat,_,_,_ = np.linalg.lstsq(np.concatenate([data.X,
61 | np.ones((data.X.shape[0],1))],1), data.y.flatten(), rcond=None)
62 | self.gp_mean_vec = lambda x: np.matmul(np.concatenate([x,
63 | np.ones((x.shape[0],1))],1), mmat)
64 | newdata.y = data.y - self.gp_mean_vec(data.X).reshape(-1,1)
65 | if transf_str=='constant':
66 | yconstant = data.y.mean()
67 | #yconstant = 0.
68 | self.gp_mean_vec = lambda x: np.array([yconstant for xcomp in x])
69 | newdata.y = data.y - self.gp_mean_vec(data.X).reshape(-1,1)
70 | return newdata
71 |
72 | def set_model(self):
73 | """ Set GP regression model """
74 | self.model = self.get_model()
75 |
76 | def get_model(self):
77 | """ Returns GPRegression model """
78 | if self.modelp.model_str=='optfixedsig':
79 | return gpstan3.get_model(print_status=False)
80 | elif self.modelp.model_str=='opt' or self.modelp.model_str=='samp':
81 | return gpstan2.get_model(print_status=False)
82 | elif self.modelp.model_str=='optmatern32' or \
83 | self.modelp.model_str=='sampmatern32':
84 | return gpstan2_matern.get_model(print_status=False)
85 |
86 | def infer_post_and_update_samples(self, seed=5000012, print_result=False):
87 | """ Update self.sample_list """
88 | data_dict = self.get_stan_data_dict()
89 | with suppress_stdout_stderr():
90 | if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' \
91 | or self.modelp.model_str=='optmatern32':
92 | stanout = self.model.optimizing(data_dict, iter=self.modelp.infp.niter,
93 | #seed=seed, as_vector=True, algorithm='Newton')
94 | seed=seed, as_vector=True, algorithm='LBFGS')
95 | elif self.modelp.model_str=='samp' or self.modelp.model_str=='sampmatern32':
96 | stanout = self.model.sampling(data_dict, iter=self.modelp.infp.niter +
97 | self.modelp.infp.nwarmup, warmup=self.modelp.infp.nwarmup, chains=1,
98 | seed=seed, refresh=1000)
99 | print('-----')
100 | self.sample_list = self.get_sample_list_from_stan_out(stanout)
101 | if print_result: self.print_inference_result()
102 |
103 | def get_stan_data_dict(self):
104 | """ Return data dict for stan sampling method """
105 | if self.modelp.model_str=='optfixedsig':
106 | return {'u1':self.modelp.kernp.u1, 'u2':self.modelp.kernp.u2,
107 | 'n1':self.modelp.kernp.n1, 'n2':self.modelp.kernp.n2,
108 | 'sigma':self.modelp.kernp.sigma, 'D':self.ndimx,
109 | 'N':len(self.data.X), 'x':self.data.X, 'y':self.data.y.flatten()}
110 | elif self.modelp.model_str=='opt' or self.modelp.model_str=='samp':
111 | return {'ig1':self.modelp.kernp.ig1, 'ig2':self.modelp.kernp.ig2,
112 | 'n1':self.modelp.kernp.n1, 'n2':self.modelp.kernp.n2,
113 | 'n3':self.modelp.kernp.n3, 'n4':self.modelp.kernp.n4,
114 | 'D':self.ndimx, 'N':len(self.data.X), 'x':self.data.X,
115 | 'y':self.data.y.flatten()}
116 | elif self.modelp.model_str=='optmatern32' or \
117 | self.modelp.model_str=='sampmatern32':
118 | return {'ig1':self.modelp.kernp.ig1, 'ig2':self.modelp.kernp.ig2,
119 | 'n1':self.modelp.kernp.n1, 'n2':self.modelp.kernp.n2,
120 | 'n3':self.modelp.kernp.n3, 'n4':self.modelp.kernp.n4,
121 | 'D':self.ndimx, 'N':len(self.data.X), 'x':self.data.X,
122 | 'y':self.data.y.flatten(), 'covid':2}
123 |
124 | def get_sample_list_from_stan_out(self, stanout):
125 | """ Convert stan output to sample_list """
126 | if self.modelp.model_str=='optfixedsig':
127 | return [Namespace(ls=stanout['rho'], alpha=stanout['alpha'],
128 | sigma=self.modelp.kernp.sigma)]
129 | elif self.modelp.model_str=='opt' or self.modelp.model_str=='optmatern32':
130 | return [Namespace(ls=stanout['rho'], alpha=stanout['alpha'],
131 | sigma=stanout['sigma'])]
132 | elif self.modelp.model_str=='samp' or \
133 | self.modelp.model_str=='sampmatern32':
134 | sdict = stanout.extract(['rho','alpha','sigma'])
135 | return [Namespace(ls=sdict['rho'][i], alpha=sdict['alpha'][i],
136 | sigma=sdict['sigma'][i]) for i in range(sdict['rho'].shape[0])]
137 |
138 | def print_inference_result(self):
139 | """ Print results of stan inference """
140 | if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' or \
141 | self.modelp.model_str=='optmatern32':
142 | print('*ls pt est = '+str(self.sample_list[0].ls)+'.')
143 | print('*alpha pt est = '+str(self.sample_list[0].alpha)+'.')
144 | print('*sigma pt est = '+str(self.sample_list[0].sigma)+'.')
145 | elif self.modelp.model_str=='samp' or \
146 | self.modelp.model_str=='sampmatern32':
147 | ls_arr = np.array([ns.ls for ns in self.sample_list])
148 | alpha_arr = np.array([ns.alpha for ns in self.sample_list])
149 | sigma_arr = np.array([ns.sigma for ns in self.sample_list])
150 | print('*ls mean = '+str(ls_arr.mean())+'.')
151 | print('*ls std = '+str(ls_arr.std())+'.')
152 | print('*alpha mean = '+str(alpha_arr.mean())+'.')
153 | print('*alpha std = '+str(alpha_arr.std())+'.')
154 | print('*sigma mean = '+str(sigma_arr.mean())+'.')
155 | print('*sigma std = '+str(sigma_arr.std())+'.')
156 | print('-----')
157 |
158 | def sample_pp_post_pred(self, nsamp, input_list, full_cov=False, nloop=None):
159 | """ Sample from posterior predictive of PP.
160 | Inputs:
161 | input_list - list of np arrays size=(-1,)
162 | Returns:
163 | list (len input_list) of np arrays (size=(nsamp,1))."""
164 | if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' or \
165 | self.modelp.model_str=='optmatern32':
166 | nloop = 1
167 | sampids = [0]
168 | elif self.modelp.model_str=='samp' or \
169 | self.modelp.model_str=='sampmatern32':
170 | if nloop is None: nloop=nsamp
171 | nsamp = int(nsamp/nloop)
172 | sampids = np.random.randint(len(self.sample_list), size=(nloop,))
173 | ppred_list = []
174 | for i in range(nloop):
175 | samp = self.sample_list[sampids[i]]
176 | postmu, postcov = self.gp_post(self.data.X, self.data.y,
177 | np.stack(input_list), samp.ls, samp.alpha, samp.sigma, full_cov)
178 | if full_cov:
179 | ppred_list.extend(list(sample_mvn(postmu, postcov, nsamp)))
180 | else:
181 | ppred_list.extend(list(np.random.normal(postmu.reshape(-1,),
182 | postcov.reshape(-1,), size=(nsamp, len(input_list)))))
183 | return self.get_reverse_transform(list(np.stack(ppred_list).T), ppred_list,
184 | input_list)
185 |
186 | def sample_pp_pred(self, nsamp, input_list, lv=None):
187 | """ Sample from predictive of PP for parameter lv.
188 | Returns: list (len input_list) of np arrays (size (nsamp,1))."""
189 | x_pred = np.stack(input_list)
190 | if lv is None:
191 | if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' \
192 | or self.modelp.model_str=='optmatern32':
193 | lv = self.sample_list[0]
194 | elif self.modelp.model_str=='samp' or \
195 | self.modelp.model_str=='sampmatern32':
196 | lv = self.sample_list[np.random.randint(len(self.sample_list))]
197 | postmu, postcov = self.gp_post(self.data.X, self.data.y, x_pred, lv.ls,
198 | lv.alpha, lv.sigma)
199 | pred_list = list(sample_mvn(postmu, postcov, 1)) ###TODO: sample from this mean nsamp times
200 | return self.get_reverse_transform(list(np.stack(pred_list).T), pred_list,
201 | input_list)
202 |
203 | def get_reverse_transform(self, pp1, pp2, input_list):
204 | """ Apply reverse of data transform to ppred or pred """
205 | pp1 = [pp1[i] + self.gp_mean_vec(input_list[i].reshape(1,-1)) for i in
206 | range(len(input_list))]
207 | pp2 = [psamp + self.gp_mean_vec(np.array(input_list)) for psamp in pp2]
208 | return pp1, pp2
209 |
210 | def gp_post(self, x_train, y_train, x_pred, ls, alpha, sigma, full_cov=True):
211 | """ Compute parameters of GP posterior """
212 | if self.modelp.model_str=='optmatern32' or \
213 | self.modelp.model_str=='sampmatern32':
214 | kernel = kern_matern32
215 | else:
216 | kernel = kern_exp_quad
217 | k11_nonoise = kernel(x_train, x_train, ls, alpha)
218 | lmat = get_cholesky_decomp(k11_nonoise, sigma, 'try_first')
219 | smat = solve_upper_triangular(lmat.T, solve_lower_triangular(lmat, y_train))
220 | k21 = kernel(x_pred, x_train, ls, alpha)
221 | mu2 = k21.dot(smat)
222 | k22 = kernel(x_pred, x_pred, ls, alpha)
223 | vmat = solve_lower_triangular(lmat, k21.T)
224 | k2 = k22 - vmat.T.dot(vmat)
225 | if full_cov is False:
226 | k2 = np.sqrt(np.diag(k2))
227 | return mu2, k2
228 |
229 | # Utilities
230 | def print_str(self):
231 | """ Print a description string """
232 | print('*StanGpPP with modelp='+str(self.modelp)+'.')
233 | print('-----')
234 |
--------------------------------------------------------------------------------
/bo/pp/pp_gp_stan_distmat.py:
--------------------------------------------------------------------------------
1 | """
2 | Classes for GP models with Stan, using a given distance matrix.
3 | """
4 |
5 | from argparse import Namespace
6 | import time
7 | import copy
8 | import numpy as np
9 | from scipy.spatial.distance import cdist
10 | from bo.pp.pp_core import DiscPP
11 | import bo.pp.stan.gp_distmat as gpstan
12 | import bo.pp.stan.gp_distmat_fixedsig as gpstan_fixedsig
13 | from bo.pp.gp.gp_utils import kern_exp_quad, kern_matern32, \
14 | get_cholesky_decomp, solve_upper_triangular, solve_lower_triangular, \
15 | sample_mvn, squared_euc_distmat, kern_distmat
16 | from bo.util.print_utils import suppress_stdout_stderr
17 |
18 | class StanGpDistmatPP(DiscPP):
19 | """ Hierarchical GPs using a given distance matrix, implemented with Stan """
20 |
21 | def __init__(self, data=None, modelp=None, printFlag=True):
22 | """ Constructor """
23 | self.set_model_params(modelp)
24 | self.set_data(data)
25 | self.ndimx = self.modelp.ndimx
26 | self.set_model()
27 | super(StanGpDistmatPP,self).__init__()
28 | if printFlag:
29 | self.print_str()
30 |
31 | def set_model_params(self, modelp):
32 | """ Set self.modelp """
33 | if modelp is None:
34 | pass #TODO
35 | self.modelp = modelp
36 |
37 | def set_data(self, data):
38 | """ Set self.data """
39 | if data is None:
40 | pass #TODO
41 | self.data_init = copy.deepcopy(data)
42 | self.data = copy.deepcopy(self.data_init)
43 |
44 | def set_model(self):
45 | """ Set GP regression model """
46 | self.model = self.get_model()
47 |
48 | def get_model(self):
49 | """ Returns GPRegression model """
50 | if self.modelp.model_str=='optfixedsig' or \
51 | self.modelp.model_str=='sampfixedsig':
52 | return gpstan_fixedsig.get_model(print_status=True)
53 | elif self.modelp.model_str=='opt' or self.modelp.model_str=='samp':
54 | return gpstan.get_model(print_status=True)
55 | elif self.modelp.model_str=='fixedparam':
56 | return None
57 |
58 | def infer_post_and_update_samples(self, seed=543210, print_result=False):
59 | """ Update self.sample_list """
60 | data_dict = self.get_stan_data_dict()
61 | with suppress_stdout_stderr():
62 | if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt':
63 | stanout = self.model.optimizing(data_dict, iter=self.modelp.infp.niter,
64 | #seed=seed, as_vector=True, algorithm='Newton')
65 | seed=seed, as_vector=True, algorithm='LBFGS')
66 | elif self.modelp.model_str=='samp' or self.modelp.model_str=='sampfixedsig':
67 | stanout = self.model.sampling(data_dict, iter=self.modelp.infp.niter +
68 | self.modelp.infp.nwarmup, warmup=self.modelp.infp.nwarmup, chains=1,
69 | seed=seed, refresh=1000)
70 | elif self.modelp.model_str=='fixedparam':
71 | stanout = None
72 | print('-----')
73 | self.sample_list = self.get_sample_list_from_stan_out(stanout)
74 | if print_result: self.print_inference_result()
75 |
76 | def get_stan_data_dict(self):
77 | """ Return data dict for stan sampling method """
78 | if self.modelp.model_str=='optfixedsig' or \
79 | self.modelp.model_str=='sampfixedsig':
80 | return {'ig1':self.modelp.kernp.ig1, 'ig2':self.modelp.kernp.ig2,
81 | 'n1':self.modelp.kernp.n1, 'n2':self.modelp.kernp.n2,
82 | 'sigma':self.modelp.kernp.sigma, 'D':self.ndimx,
83 | 'N':len(self.data.X), 'y':self.data.y.flatten(),
84 | 'distmat':self.get_distmat(self.data.X, self.data.X)}
85 | elif self.modelp.model_str=='opt' or self.modelp.model_str=='samp':
86 | return {'ig1':self.modelp.kernp.ig1, 'ig2':self.modelp.kernp.ig2,
87 | 'n1':self.modelp.kernp.n1, 'n2':self.modelp.kernp.n2,
88 | 'n3':self.modelp.kernp.n3, 'n4':self.modelp.kernp.n4,
89 | 'D':self.ndimx, 'N':len(self.data.X), 'y':self.data.y.flatten(),
90 | 'distmat':self.get_distmat(self.data.X, self.data.X)}
91 |
92 | def get_distmat(self, xmat1, xmat2):
93 | """ Get distance matrix """
94 | # For now, will compute squared euc distance * .5, on self.data.X
95 | return squared_euc_distmat(xmat1, xmat2, .5)
96 |
97 | def get_sample_list_from_stan_out(self, stanout):
98 | """ Convert stan output to sample_list """
99 | if self.modelp.model_str=='optfixedsig':
100 | return [Namespace(ls=stanout['rho'], alpha=stanout['alpha'],
101 | sigma=self.modelp.kernp.sigma)]
102 | elif self.modelp.model_str=='opt':
103 | return [Namespace(ls=stanout['rho'], alpha=stanout['alpha'],
104 | sigma=stanout['sigma'])]
105 | elif self.modelp.model_str=='sampfixedsig':
106 | sdict = stanout.extract(['rho','alpha'])
107 | return [Namespace(ls=sdict['rho'][i], alpha=sdict['alpha'][i],
108 | sigma=self.modelp.kernp.sigma) for i in range(sdict['rho'].shape[0])]
109 | elif self.modelp.model_str=='samp':
110 | sdict = stanout.extract(['rho','alpha','sigma'])
111 | return [Namespace(ls=sdict['rho'][i], alpha=sdict['alpha'][i],
112 | sigma=sdict['sigma'][i]) for i in range(sdict['rho'].shape[0])]
113 | elif self.modelp.model_str=='fixedparam':
114 | return [Namespace(ls=self.modelp.kernp.ls, alpha=self.modelp.kernp.alpha,
115 | sigma=self.modelp.kernp.sigma)]
116 |
117 | def print_inference_result(self):
118 | """ Print results of stan inference """
119 | if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' or \
120 | self.modelp.model_str=='fixedparam':
121 | print('*ls pt est = '+str(self.sample_list[0].ls)+'.')
122 | print('*alpha pt est = '+str(self.sample_list[0].alpha)+'.')
123 | print('*sigma pt est = '+str(self.sample_list[0].sigma)+'.')
124 | elif self.modelp.model_str=='samp' or \
125 | self.modelp.model_str=='sampfixedsig':
126 | ls_arr = np.array([ns.ls for ns in self.sample_list])
127 | alpha_arr = np.array([ns.alpha for ns in self.sample_list])
128 | sigma_arr = np.array([ns.sigma for ns in self.sample_list])
129 | print('*ls mean = '+str(ls_arr.mean())+'.')
130 | print('*ls std = '+str(ls_arr.std())+'.')
131 | print('*alpha mean = '+str(alpha_arr.mean())+'.')
132 | print('*alpha std = '+str(alpha_arr.std())+'.')
133 | print('*sigma mean = '+str(sigma_arr.mean())+'.')
134 | print('*sigma std = '+str(sigma_arr.std())+'.')
135 | print('-----')
136 |
137 | def sample_pp_post_pred(self, nsamp, input_list, full_cov=False, nloop=None):
138 | """ Sample from posterior predictive of PP.
139 | Inputs:
140 | input_list - list of np arrays size=(-1,)
141 | Returns:
142 | list (len input_list) of np arrays (size=(nsamp,1))."""
143 | if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' or \
144 | self.modelp.model_str=='fixedparam':
145 | nloop = 1
146 | sampids = [0]
147 | elif self.modelp.model_str=='samp' or \
148 | self.modelp.model_str=='sampfixedsig':
149 | if nloop is None: nloop=nsamp
150 | nsamp = int(nsamp/nloop)
151 | sampids = np.random.randint(len(self.sample_list), size=(nloop,))
152 | ppred_list = []
153 | for i in range(nloop):
154 | samp = self.sample_list[sampids[i]]
155 | postmu, postcov = self.gp_post(self.data.X, self.data.y,
156 | np.stack(input_list), samp.ls, samp.alpha, samp.sigma, full_cov)
157 | if full_cov:
158 | ppred_list.extend(list(sample_mvn(postmu, postcov, nsamp)))
159 | else:
160 | ppred_list.extend(list(np.random.normal(postmu.reshape(-1,),
161 | postcov.reshape(-1,), size=(nsamp, len(input_list)))))
162 | return list(np.stack(ppred_list).T), ppred_list
163 |
164 | def sample_pp_pred(self, nsamp, input_list, lv=None):
165 | """ Sample from predictive of PP for parameter lv.
166 | Returns: list (len input_list) of np arrays (size (nsamp,1))."""
167 | x_pred = np.stack(input_list)
168 | if lv is None:
169 | if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' \
170 | or self.modelp.model_str=='fixedparam':
171 | lv = self.sample_list[0]
172 | elif self.modelp.model_str=='samp' or \
173 | self.modelp.model_str=='sampfixedsig':
174 | lv = self.sample_list[np.random.randint(len(self.sample_list))]
175 | postmu, postcov = self.gp_post(self.data.X, self.data.y, x_pred, lv.ls,
176 | lv.alpha, lv.sigma)
177 | pred_list = list(sample_mvn(postmu, postcov, 1)) ###TODO: sample from this mean nsamp times
178 | return list(np.stack(pred_list).T), pred_list
179 |
180 | def gp_post(self, x_train, y_train, x_pred, ls, alpha, sigma, full_cov=True):
181 | """ Compute parameters of GP posterior """
182 | kernel = lambda a, b, c, d: kern_distmat(a, b, c, d, self.get_distmat)
183 | k11_nonoise = kernel(x_train, x_train, ls, alpha)
184 | lmat = get_cholesky_decomp(k11_nonoise, sigma, 'try_first')
185 | smat = solve_upper_triangular(lmat.T, solve_lower_triangular(lmat, y_train))
186 | k21 = kernel(x_pred, x_train, ls, alpha)
187 | mu2 = k21.dot(smat)
188 | k22 = kernel(x_pred, x_pred, ls, alpha)
189 | vmat = solve_lower_triangular(lmat, k21.T)
190 | k2 = k22 - vmat.T.dot(vmat)
191 | if full_cov is False:
192 | k2 = np.sqrt(np.diag(k2))
193 | return mu2, k2
194 |
195 | # Utilities
196 | def print_str(self):
197 | """ Print a description string """
198 | print('*StanGpDistmatPP with modelp='+str(self.modelp)+'.')
199 | print('-----')
200 |
--------------------------------------------------------------------------------
/bo/pp/stan/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for defining and compiling models in Stan.
3 | """
4 |
--------------------------------------------------------------------------------
/bo/pp/stan/compile_stan.py:
--------------------------------------------------------------------------------
1 | """
2 | Script to compile stan models
3 | """
4 |
5 | #import pp_new.stan.gp_hier2 as gpstan
6 | #import pp_new.stan.gp_hier3 as gpstan
7 | #import pp_new.stan.gp_hier2_matern as gpstan
8 | import pp_new.stan.gp_distmat as gpstan
9 | #import pp_new.stan.gp_distmat_fixedsig as gpstan
10 |
11 |
12 | # Recompile model and return it
13 | model = gpstan.get_model(recompile=True)
14 |
--------------------------------------------------------------------------------
/bo/pp/stan/gp_distmat.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions to define and compile PPs in Stan, for model:
3 | hierarchical GP (prior on rho, alpha, sigma) using a given distance matrix.
4 | """
5 |
6 | import time
7 | import pickle
8 | import pystan
9 |
10 | def get_model(recompile=False, print_status=True):
11 | model_file_str = 'bo/pp/stan/hide_model/gp_distmat.pkl'
12 |
13 | if recompile:
14 | starttime = time.time()
15 | model = pystan.StanModel(model_code=get_model_code())
16 | buildtime = time.time()-starttime
17 | with open(model_file_str,'wb') as f:
18 | pickle.dump(model, f)
19 | if print_status:
20 | print('*Time taken to compile = '+ str(buildtime) +' seconds.\n-----')
21 | print('*Model saved in file ' + model_file_str + '.\n-----')
22 | else:
23 | model = pickle.load(open(model_file_str,'rb'))
24 | if print_status:
25 | print('*Model loaded from file ' + model_file_str + '.\n-----')
26 | return model
27 |
28 |
29 | def get_model_code():
30 | """ Parse modelp and return stan model code """
31 | return """
32 | data {
33 | int N;
34 | matrix[N, N] distmat;
35 | vector[N] y;
36 | real ig1;
37 | real ig2;
38 | real n1;
39 | real n2;
40 | real n3;
41 | real n4;
42 | }
43 |
44 | parameters {
45 | real rho;
46 | real alpha;
47 | real sigma;
48 | }
49 |
50 | model {
51 | matrix[N, N] cov = square(alpha) * exp(-distmat / square(rho))
52 | + diag_matrix(rep_vector(square(sigma), N));
53 | matrix[N, N] L_cov = cholesky_decompose(cov);
54 | rho ~ inv_gamma(ig1, ig2);
55 | alpha ~ normal(n1, n2);
56 | sigma ~ normal(n3, n4);
57 | y ~ multi_normal_cholesky(rep_vector(0, N), L_cov);
58 | }
59 | """
60 |
61 | if __name__ == '__main__':
62 | get_model()
63 |
--------------------------------------------------------------------------------
/bo/pp/stan/gp_distmat_fixedsig.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions to define and compile PPs in Stan, for model:
3 | hierarchical GP (prior on rho, alpha) and fixed sigma, using a given
4 | distance matrix.
5 | """
6 |
7 | import time
8 | import pickle
9 | import pystan
10 |
11 | def get_model(recompile=False, print_status=True):
12 | model_file_str = 'bo/pp/stan/hide_model/gp_distmat_fixedsig.pkl'
13 |
14 | if recompile:
15 | starttime = time.time()
16 | model = pystan.StanModel(model_code=get_model_code())
17 | buildtime = time.time()-starttime
18 | with open(model_file_str,'wb') as f:
19 | pickle.dump(model, f)
20 | if print_status:
21 | print('*Time taken to compile = '+ str(buildtime) +' seconds.\n-----')
22 | print('*Model saved in file ' + model_file_str + '.\n-----')
23 | else:
24 | model = pickle.load(open(model_file_str,'rb'))
25 | if print_status:
26 | print('*Model loaded from file ' + model_file_str + '.\n-----')
27 | return model
28 |
29 |
30 | def get_model_code():
31 | """ Parse modelp and return stan model code """
32 | return """
33 | data {
34 | int N;
35 | matrix[N, N] distmat;
36 | vector[N] y;
37 | real ig1;
38 | real ig2;
39 | real n1;
40 | real n2;
41 | real sigma;
42 | }
43 |
44 | parameters {
45 | real rho;
46 | real alpha;
47 | }
48 |
49 | model {
50 | matrix[N, N] cov = square(alpha) * exp(-distmat / square(rho))
51 | + diag_matrix(rep_vector(square(sigma), N));
52 | matrix[N, N] L_cov = cholesky_decompose(cov);
53 | rho ~ inv_gamma(ig1, ig2);
54 | alpha ~ normal(n1, n2);
55 | y ~ multi_normal_cholesky(rep_vector(0, N), L_cov);
56 | }
57 | """
58 |
59 | if __name__ == '__main__':
60 | get_model()
61 |
--------------------------------------------------------------------------------
/bo/pp/stan/gp_hier2.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions to define and compile PPs in Stan, for model:
3 | hierarchical GP (prior on rho, alpha, sigma)
4 | """
5 |
6 | import time
7 | import pickle
8 | import pystan
9 |
10 | def get_model(recompile=False, print_status=True):
11 | model_file_str = 'bo/pp/stan/hide_model/gp_hier2.pkl'
12 |
13 | if recompile:
14 | starttime = time.time()
15 | model = pystan.StanModel(model_code=get_model_code())
16 | buildtime = time.time()-starttime
17 | with open(model_file_str,'wb') as f:
18 | pickle.dump(model, f)
19 | if print_status:
20 | print('*Time taken to compile = '+ str(buildtime) +' seconds.\n-----')
21 | print('*Model saved in file ' + model_file_str + '.\n-----')
22 | else:
23 | model = pickle.load(open(model_file_str,'rb'))
24 | if print_status:
25 | print('*Model loaded from file ' + model_file_str + '.\n-----')
26 | return model
27 |
28 |
29 | def get_model_code():
30 | """ Parse modelp and return stan model code """
31 | return """
32 | data {
33 | int D;
34 | int N;
35 | vector[D] x[N];
36 | vector[N] y;
37 | real ig1;
38 | real ig2;
39 | real n1;
40 | real n2;
41 | real n3;
42 | real n4;
43 | }
44 |
45 | parameters {
46 | real rho;
47 | real alpha;
48 | real sigma;
49 | }
50 |
51 | model {
52 | matrix[N, N] cov = cov_exp_quad(x, alpha, rho)
53 | + diag_matrix(rep_vector(square(sigma), N));
54 | matrix[N, N] L_cov = cholesky_decompose(cov);
55 | rho ~ inv_gamma(ig1, ig2);
56 | alpha ~ normal(n1, n2);
57 | sigma ~ normal(n3, n4);
58 | y ~ multi_normal_cholesky(rep_vector(0, N), L_cov);
59 | }
60 | """
61 |
62 | if __name__ == '__main__':
63 | get_model()
64 |
--------------------------------------------------------------------------------
/bo/pp/stan/gp_hier2_matern.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions to define and compile PPs in Stan, for model: hierarchical GP (prior
3 | on rho, alpha, sigma), with matern kernel
4 | """
5 |
6 | import time
7 | import pickle
8 | import pystan
9 |
10 | def get_model(recompile=False, print_status=True):
11 | model_file_str = 'bo/pp/stan/hide_model/gp_hier2_matern.pkl'
12 |
13 | if recompile:
14 | starttime = time.time()
15 | model = pystan.StanModel(model_code=get_model_code())
16 | buildtime = time.time()-starttime
17 | with open(model_file_str,'wb') as f:
18 | pickle.dump(model, f)
19 | if print_status:
20 | print('*Time taken to compile = '+ str(buildtime) +' seconds.\n-----')
21 | print('*Model saved in file ' + model_file_str + '.\n-----')
22 | else:
23 | model = pickle.load(open(model_file_str,'rb'))
24 | if print_status:
25 | print('*Model loaded from file ' + model_file_str + '.\n-----')
26 | return model
27 |
28 |
29 | def get_model_code():
30 | """ Parse modelp and return stan model code """
31 | return """
32 | functions {
33 | matrix distance_matrix_single(int N, vector[] x) {
34 | matrix[N, N] distmat;
35 | for(i in 1:(N-1)) {
36 | for(j in (i+1):N) {
37 | distmat[i, j] = distance(x[i], x[j]);
38 | }
39 | }
40 | return distmat;
41 | }
42 |
43 | matrix matern_covariance(int N, matrix dist, real ls, real alpha_sq, int COVFN) {
44 | matrix[N,N] S;
45 | real dist_ls;
46 | real sqrt3;
47 | real sqrt5;
48 | sqrt3=sqrt(3.0);
49 | sqrt5=sqrt(5.0);
50 |
51 | // exponential == Matern nu=1/2 , (p=0; nu=p+1/2)
52 | if (COVFN==1) {
53 | for(i in 1:(N-1)) {
54 | for(j in (i+1):N) {
55 | dist_ls = fabs(dist[i,j])/ls;
56 | S[i,j] = alpha_sq * exp(- dist_ls );
57 | }
58 | }
59 | }
60 |
61 | // Matern nu= 3/2 covariance
62 | else if (COVFN==2) {
63 | for(i in 1:(N-1)) {
64 | for(j in (i+1):N) {
65 | dist_ls = fabs(dist[i,j])/ls;
66 | S[i,j] = alpha_sq * (1 + sqrt3 * dist_ls) * exp(-sqrt3 * dist_ls);
67 | }
68 | }
69 | }
70 |
71 | // Matern nu=5/2 covariance
72 | else if (COVFN==3) {
73 | for(i in 1:(N-1)) {
74 | for(j in (i+1):N) {
75 | dist_ls = fabs(dist[i,j])/ls;
76 | S[i,j] = alpha_sq * (1 + sqrt5 *dist_ls + 5* pow(dist_ls,2)/3) * exp(-sqrt5 *dist_ls);
77 | }
78 | }
79 | }
80 |
81 | // Matern as nu->Inf become Gaussian (aka squared exponential cov)
82 | else if (COVFN==4) {
83 | for(i in 1:(N-1)) {
84 | for(j in (i+1):N) {
85 | dist_ls = fabs(dist[i,j])/ls;
86 | S[i,j] = alpha_sq * exp( -pow(dist_ls,2)/2 ) ;
87 | }
88 | }
89 | }
90 |
91 | // fill upper triangle
92 | for(i in 1:(N-1)) {
93 | for(j in (i+1):N) {
94 | S[j,i] = S[i,j];
95 | }
96 | }
97 |
98 | // create diagonal: nugget(nonspatial) + spatial variance + eps ensures positive definiteness
99 | for(i in 1:N) {
100 | S[i,i] = alpha_sq;
101 | }
102 |
103 | return S;
104 | }
105 | }
106 |
107 | data {
108 | int D;
109 | int N;
110 | vector[D] x[N];
111 | vector[N] y;
112 | real ig1;
113 | real ig2;
114 | real n1;
115 | real n2;
116 | real n3;
117 | real n4;
118 | int covid;
119 | }
120 |
121 | parameters {
122 | real rho;
123 | real alpha;
124 | real sigma;
125 | }
126 |
127 | model {
128 | matrix[N, N] distmat = distance_matrix_single(N, x);
129 | matrix[N, N] cov = matern_covariance(N, distmat, rho, square(alpha), covid);
130 | matrix[N, N] L_cov = cholesky_decompose(cov);
131 | rho ~ inv_gamma(ig1, ig2);
132 | alpha ~ normal(n1, n2);
133 | sigma ~ normal(n3, n4);
134 | y ~ multi_normal_cholesky(rep_vector(0, N), L_cov);
135 | }
136 | """
137 |
138 | if __name__ == '__main__':
139 | get_model()
140 |
--------------------------------------------------------------------------------
/bo/pp/stan/gp_hier3.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions to define and compile PPs in Stan, for model:
3 | hierarchical GP with uniform prior on rho, normal prior on alpha,
4 | and fixed sigma
5 | """
6 |
7 | import time
8 | import pickle
9 | import pystan
10 |
11 | def get_model(recompile=False, print_status=True):
12 | model_file_str = 'bo/pp/stan/hide_model/gp_hier3.pkl'
13 |
14 | if recompile:
15 | starttime = time.time()
16 | model = pystan.StanModel(model_code=get_model_code())
17 | buildtime = time.time()-starttime
18 | with open(model_file_str,'wb') as f:
19 | pickle.dump(model, f)
20 | if print_status:
21 | print('*Time taken to compile = '+ str(buildtime) +' seconds.\n-----')
22 | print('*Model saved in file ' + model_file_str + '.\n-----')
23 | else:
24 | model = pickle.load(open(model_file_str,'rb'))
25 | if print_status:
26 | print('*Model loaded from file ' + model_file_str + '.\n-----')
27 | return model
28 |
29 |
30 | def get_model_code():
31 | """ Parse modelp and return stan model code """
32 | return """
33 | data {
34 | int D;
35 | int N;
36 | vector[D] x[N];
37 | vector[N] y;
38 | real u1;
39 | real u2;
40 | real n1;
41 | real n2;
42 | real sigma;
43 | }
44 |
45 | parameters {
46 | real rho;
47 | real alpha;
48 | }
49 |
50 | model {
51 | matrix[N, N] cov = cov_exp_quad(x, alpha, rho)
52 | + diag_matrix(rep_vector(square(sigma), N));
53 | matrix[N, N] L_cov = cholesky_decompose(cov);
54 | rho ~ uniform(u1, u2);
55 | alpha ~ normal(n1, n2);
56 | y ~ multi_normal_cholesky(rep_vector(0, N), L_cov);
57 | }
58 | """
59 |
60 | if __name__ == '__main__':
61 | get_model()
62 |
--------------------------------------------------------------------------------
/bo/util/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Miscellaneous utilities.
3 | """
4 |
--------------------------------------------------------------------------------
/bo/util/datatransform.py:
--------------------------------------------------------------------------------
1 | """
2 | Classes for transforming data.
3 | """
4 |
5 | from argparse import Namespace
6 | import numpy as np
7 | from sklearn.preprocessing import StandardScaler
8 | #import sklearn.preprocessing as sklp
9 |
10 | class DataTransformer(object):
11 | """ Class for transforming data """
12 |
13 | def __init__(self, datamat, printflag=True):
14 | """ Constructor
15 | Parameters:
16 | datamat - numpy array (n x d) of data to be transformed
17 | """
18 | self.datamat = datamat
19 | self.set_transformers()
20 | if printflag:
21 | self.print_str()
22 |
23 | def set_transformers(self):
24 | """ Set transformers using self.datamat """
25 | self.ss = StandardScaler()
26 | self.ss.fit(self.datamat)
27 |
28 | def transform_data(self, datamat=None):
29 | """ Return transformed datamat (default self.datamat) """
30 | if datamat is None:
31 | datamat = self.datamat
32 | return self.ss.transform(datamat)
33 |
34 | def inv_transform_data(self, datamat):
35 | """ Return inverse transform of datamat """
36 | return self.ss.inverse_transform(datamat)
37 |
38 | def print_str(self):
39 | """ Print a description string """
40 | print('*DataTransformer with self.datamat.shape = ' +
41 | str(self.datamat.shape) + '.')
42 | print('-----')
43 |
--------------------------------------------------------------------------------
/bo/util/print_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Utilities for printing and output
3 | """
4 |
5 | import os
6 |
7 | class suppress_stdout_stderr(object):
8 | ''' A context manager for doing a "deep suppression" of stdout and stderr in
9 | Python, i.e. will suppress all print, even if the print originates in a
10 | compiled C/Fortran sub-function.
11 | This will not suppress raised exceptions, since exceptions are printed
12 | to stderr just before a script exits, and after the context manager has
13 | exited (at least, I think that is why it lets exceptions through). '''
14 | def __init__(self):
15 | # Open a pair of null files
16 | self.null_fds = [os.open(os.devnull, os.O_RDWR) for x in range(2)]
17 | # Save the actual stdout (1) and stderr (2) file descriptors.
18 | self.save_fds = [os.dup(1), os.dup(2)]
19 |
20 | def __enter__(self):
21 | # Assign the null pointers to stdout and stderr.
22 | os.dup2(self.null_fds[0], 1)
23 | os.dup2(self.null_fds[1], 2)
24 |
25 | def __exit__(self, *_):
26 | # Re-assign the real stdout/stderr back to (1) and (2)
27 | os.dup2(self.save_fds[0], 1)
28 | os.dup2(self.save_fds[1], 2)
29 | # Close the null files
30 | for fd in self.null_fds + self.save_fds:
31 | os.close(fd)
32 |
--------------------------------------------------------------------------------
/darts/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/darts/arch.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import sys
3 | import os
4 | import copy
5 | import random
6 |
7 | sys.path.append(os.path.expanduser('~/darts/cnn'))
8 | from train_class import Train
9 |
10 | OPS = ['none',
11 | 'max_pool_3x3',
12 | 'avg_pool_3x3',
13 | 'skip_connect',
14 | 'sep_conv_3x3',
15 | 'sep_conv_5x5',
16 | 'dil_conv_3x3',
17 | 'dil_conv_5x5'
18 | ]
19 | NUM_VERTICES = 4
20 | INPUT_1 = 'c_k-2'
21 | INPUT_2 = 'c_k-1'
22 |
23 |
24 | class Arch:
25 |
26 | def __init__(self, arch):
27 | self.arch = arch
28 |
29 | def serialize(self):
30 | return self.arch
31 |
32 | def query(self, epochs=50):
33 | trainer = Train()
34 | val_losses, test_losses = trainer.main(self.arch, epochs=epochs)
35 | val_loss = 100 - np.mean(val_losses)
36 | test_loss = 100 - test_losses[-1]
37 | return val_loss, test_loss
38 |
39 | @classmethod
40 | def random_arch(cls):
41 | # output a uniformly random architecture spec
42 | # from the DARTS repository
43 | # https://github.com/quark0/darts
44 |
45 | normal = []
46 | reduction = []
47 | for i in range(NUM_VERTICES):
48 | ops = np.random.choice(range(len(OPS)), NUM_VERTICES)
49 |
50 | #input nodes for conv
51 | nodes_in_normal = np.random.choice(range(i+2), 2, replace=False)
52 | #input nodes for reduce
53 | nodes_in_reduce = np.random.choice(range(i+2), 2, replace=False)
54 |
55 | normal.extend([(nodes_in_normal[0], ops[0]), (nodes_in_normal[1], ops[1])])
56 | reduction.extend([(nodes_in_reduce[0], ops[2]), (nodes_in_reduce[1], ops[3])])
57 |
58 | return (normal, reduction)
59 |
60 | def get_arch_list(self):
61 | # convert tuple to list so that it is mutable
62 | arch_list = []
63 | for cell in self.arch:
64 | arch_list.append([])
65 | for pair in cell:
66 | arch_list[-1].append([])
67 | for num in pair:
68 | arch_list[-1][-1].append(num)
69 | return arch_list
70 |
71 | def mutate(self, edits):
72 | """ mutate a single arch """
73 | # first convert tuple to array so that it is mutable
74 | mutation = self.get_arch_list()
75 |
76 | #make mutations
77 | for _ in range(edits):
78 | cell = np.random.choice(2)
79 | pair = np.random.choice(len(OPS))
80 | num = np.random.choice(2)
81 | if num == 1:
82 | mutation[cell][pair][num] = np.random.choice(len(OPS))
83 | else:
84 | inputs = pair // 2 + 2
85 | choice = np.random.choice(inputs)
86 | if pair % 2 == 0 and mutation[cell][pair+1][num] != choice:
87 | mutation[cell][pair][num] = choice
88 | elif pair % 2 != 0 and mutation[cell][pair-1][num] != choice:
89 | mutation[cell][pair][num] = choice
90 |
91 | return mutation
92 |
93 | def get_paths(self):
94 | """ return all paths from input to output """
95 |
96 | path_builder = [[[], [], [], []], [[], [], [], []]]
97 | paths = [[], []]
98 |
99 | for i, cell in enumerate(self.arch):
100 | for j in range(len(OPS)):
101 | if cell[j][0] == 0:
102 | path = [INPUT_1, OPS[cell[j][1]]]
103 | path_builder[i][j//2].append(path)
104 | paths[i].append(path)
105 | elif cell[j][0] == 1:
106 | path = [INPUT_2, OPS[cell[j][1]]]
107 | path_builder[i][j//2].append(path)
108 | paths[i].append(path)
109 | else:
110 | for path in path_builder[i][cell[j][0] - 2]:
111 | path = [*path, OPS[cell[j][1]]]
112 | path_builder[i][j//2].append(path)
113 | paths[i].append(path)
114 |
115 | # check if there are paths of length >=5
116 | contains_long_path = [False, False]
117 | if max([len(path) for path in paths[0]]) >= 5:
118 | contains_long_path[0] = True
119 | if max([len(path) for path in paths[1]]) >= 5:
120 | contains_long_path[1] = True
121 |
122 | return paths, contains_long_path
123 |
124 | def get_path_indices(self, long_paths=True):
125 | """
126 | compute the index of each path
127 | There are 4 * (8^0 + ... + 8^4) paths total
128 | If long_paths = False, we give a single boolean to all paths of
129 | size 4, so there are only 4 * (1 + 8^0 + ... + 8^3) paths
130 | """
131 | paths, contains_long_path = self.get_paths()
132 | normal_paths, reduce_paths = paths
133 | num_ops = len(OPS)
134 | """
135 | Compute the max number of paths per input per cell.
136 | Since there are two cells and two inputs per cell,
137 | total paths = 4 * max_paths
138 | """
139 | if not long_paths:
140 | max_paths = 1 + sum([num_ops ** i for i in range(NUM_VERTICES)])
141 | else:
142 | max_paths = sum([num_ops ** i for i in range(NUM_VERTICES + 1)])
143 | path_indices = []
144 |
145 | # set the base index based on the cell and the input
146 | for i, paths in enumerate((normal_paths, reduce_paths)):
147 | for path in paths:
148 | index = i * 2 * max_paths
149 | if path[0] == INPUT_2:
150 | index += max_paths
151 |
152 | # recursively compute the index of the path
153 | for j in range(NUM_VERTICES + 1):
154 | if j == len(path) - 1:
155 | path_indices.append(index)
156 | break
157 | elif j == (NUM_VERTICES - 1) and not long_paths:
158 | path_indices.append(2 * (i + 1) * max_paths - 1)
159 | break
160 | else:
161 | index += num_ops ** j * (OPS.index(path[j + 1]) + 1)
162 |
163 | return (tuple(path_indices), contains_long_path)
164 |
165 | def encode_paths(self, long_paths=True):
166 | # output one-hot encoding of paths
167 | path_indices, _ = self.get_path_indices(long_paths=long_paths)
168 | num_ops = len(OPS)
169 |
170 | if not long_paths:
171 | max_paths = 1 + sum([num_ops ** i for i in range(NUM_VERTICES)])
172 | else:
173 | max_paths = sum([num_ops ** i for i in range(NUM_VERTICES + 1)])
174 |
175 | path_encoding = np.zeros(4 * max_paths)
176 | for index in path_indices:
177 | path_encoding[index] = 1
178 | return path_encoding
179 |
180 | def path_distance(self, other):
181 | # compute the distance between two architectures
182 | # by comparing their path encodings
183 | return np.sum(np.array(self.encode_paths() != np.array(other.encode_paths())))
184 |
185 | def get_neighborhood(self, shuffle=True):
186 | op_nbhd = []
187 | edge_nbhd = []
188 |
189 | for i, cell in enumerate(self.arch):
190 | for j, pair in enumerate(cell):
191 |
192 | # mutate the op
193 | available = [op for op in range(len(OPS)) if op != pair[1]]
194 | for op in available:
195 | new_arch = self.get_arch_list()
196 | new_arch[i][j][1] = op
197 | op_nbhd.append({'spec': new_arch})
198 |
199 | # mutate the edge
200 | other = j + 1 - 2 * (j % 2)
201 | available = [edge for edge in range(j//2+2) \
202 | if edge not in [cell[other][0], pair[0]]]
203 |
204 | for edge in available:
205 | new_arch = self.get_arch_list()
206 | new_arch[i][j][0] = edge
207 | edge_nbhd.append({'spec': new_arch})
208 |
209 | if shuffle:
210 | random.shuffle(edge_nbhd)
211 | random.shuffle(op_nbhd)
212 |
213 | # 112 in edge nbhd, 24 in op nbhd
214 | # alternate one edge nbr per 4 op nbrs
215 | nbrs = []
216 | op_idx = 0
217 | for i in range(len(edge_nbhd)):
218 | nbrs.append(edge_nbhd[i])
219 | for j in range(4):
220 | nbrs.append(op_nbhd[op_idx])
221 | op_idx += 1
222 | nbrs = [*nbrs, *op_nbhd[op_idx:]]
223 |
224 | return nbrs
225 |
226 |
227 |
--------------------------------------------------------------------------------
/darts/local_search_runner.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import time
3 | import logging
4 | import sys
5 | import os
6 | import pickle
7 | import numpy as np
8 | import copy
9 |
10 | sys.path.append(os.path.expanduser('~/local_search'))
11 | os.environ['search_space'] = 'darts'
12 | from data import Data
13 |
14 |
15 | """
16 | local_search_runner is used in run_experiments.sh
17 | """
18 |
19 | def compute_next_arches(search_space, data,
20 | query=0,
21 | filepath='trained_spec',
22 | loss='val_loss_avg',
23 | k=1):
24 | new_dicts = []
25 | best = -1
26 | best_val = 100
27 | last_chosen = -1
28 | for i, arch_dict in enumerate(data):
29 | if 'chosen' in arch_dict:
30 | last_chosen = i
31 | if arch_dict[loss] < best_val:
32 | best = i
33 | best_val = arch_dict[loss]
34 |
35 | new_chosen = -1
36 | if last_chosen == -1:
37 | # if we just finished the random initialization
38 | new_chosen = best
39 |
40 | if data[-1][loss] < data[last_chosen][loss]:
41 | # if the last architecture did better than its parent
42 | new_chosen = len(data) - 1
43 |
44 | print('last chosen', last_chosen, 'new chosen', new_chosen)
45 | if new_chosen >= 0:
46 | # get its neighbors and return them
47 | print('new chosen arch:', new_chosen, data[new_chosen][loss])
48 | neighbors = search_space.get_nbhd(data[new_chosen]['spec'])
49 | neighbors = [nbr['spec'] for nbr in neighbors]
50 | dict_with_nbrs = copy.deepcopy(data[new_chosen])
51 | dict_with_nbrs['neighbors'] = neighbors
52 | dict_with_nbrs['chosen'] = 1
53 | if 'parent' not in dict_with_nbrs:
54 | dict_with_nbrs['parent'] = last_chosen
55 |
56 | filename = '{}_{}.pkl'.format(filepath, dict_with_nbrs['index'])
57 | dict_with_nbrs['filepath'] = filename
58 | with open(filename, 'wb') as f:
59 | pickle.dump(dict_with_nbrs, f)
60 |
61 | for i in range(k):
62 | new_dicts.append({'spec':neighbors[i], 'parent':new_chosen})
63 | return new_dicts
64 |
65 | # try more neighbors from the last chosen architecture
66 | neighbors = data[last_chosen]['neighbors']
67 | if len(neighbors) <= len(data) - (last_chosen + 1):
68 | print('reached local minimum:', last_chosen, data[last_chosen][loss])
69 | else:
70 | nbr_index = len(data) - (last_chosen + 1)
71 | for i in range(nbr_index, min(len(neighbors) - 1, nbr_index + k)):
72 | new_dicts.append({'spec':neighbors[i], 'parent':last_chosen})
73 | return new_dicts
74 |
75 | def run_local_search(args):
76 |
77 | save_dir = '{}/'.format(args.experiment_name)
78 | if not os.path.exists(save_dir):
79 | os.mkdir(save_dir)
80 |
81 | query = args.query
82 | num_init = args.num_init
83 | k = args.k
84 | trained_prefix = args.trained_filename
85 | untrained_prefix = args.untrained_filename
86 |
87 | search_space = Data('darts')
88 |
89 | # if it's the first iteration, choose k arches at random to train
90 | if query == 0:
91 | print('about to generate {} random'.format(num_init))
92 | data = search_space.generate_random_dataset(num=num_init, train=False)
93 | next_arches = [{'spec':d['spec']} for d in data]
94 |
95 | elif query < num_init:
96 | # if we're still training the initial arches, continue
97 | return
98 |
99 | else:
100 | # get the data from prior iterations from pickle files
101 | data = []
102 | for i in range(query):
103 |
104 | filepath = '{}{}_{}.pkl'.format(save_dir, trained_prefix, i)
105 | with open(filepath, 'rb') as f:
106 | arch = pickle.load(f)
107 | data.append(arch)
108 |
109 | print('Iteration {}'.format(query))
110 | print('Arches so far')
111 | for d in data:
112 | print(d['spec'])
113 | print('val_loss', d['val_loss_avg'])
114 | if 'chosen' in d and 'parent' in d:
115 | print('chosen', 'parent', d['parent'])
116 | elif 'chosen' in d:
117 | print('chosen')
118 |
119 | # run the meta neural net to output the next arches
120 | filepath = save_dir + trained_prefix
121 | next_arches = compute_next_arches(search_space, data,
122 | query=query,
123 | filepath=filepath,
124 | k=k)
125 |
126 | print('next arch(es)')
127 | print([arch['spec'] for arch in next_arches])
128 |
129 | # output the new arches to pickle files
130 | num_to_train = k if query != 0 else num_init
131 | for i in range(num_to_train):
132 | index = query + i
133 | filepath = '{}{}_{}.pkl'.format(save_dir, untrained_prefix, index)
134 | next_arches[i]['index'] = index
135 | next_arches[i]['filepath'] = filepath
136 | with open(filepath, 'wb') as f:
137 | pickle.dump(next_arches[i], f)
138 |
139 |
140 | def main(args):
141 |
142 | #set up save dir
143 | save_dir = './'
144 |
145 | #set up logging
146 | log_format = '%(asctime)s %(message)s'
147 | logging.basicConfig(stream=sys.stdout, level=logging.INFO,
148 | format=log_format, datefmt='%m/%d %I:%M:%S %p')
149 | fh = logging.FileHandler(os.path.join(save_dir, 'log.txt'))
150 | fh.setFormatter(logging.Formatter(log_format))
151 | logging.getLogger().addHandler(fh)
152 | logging.info(args)
153 |
154 | run_local_search(args)
155 |
156 | if __name__ == "__main__":
157 | parser = argparse.ArgumentParser(description='Args for meta neural net')
158 | parser.add_argument('--experiment_name', type=str, default='ls_darts_test', help='Folder for input/output files')
159 | parser.add_argument('--trained_filename', type=str, default='trained_spec', help='name of input files')
160 | parser.add_argument('--untrained_filename', type=str, default='untrained_spec', help='name of output files')
161 | parser.add_argument('--query', type=int, default=0, help='What query is the algorithm on')
162 | parser.add_argument('--num_init', type=int, default=20, help='Number of initial random architectures')
163 | parser.add_argument('--k', type=int, default=1, help='Number of architectures per iteration')
164 |
165 | args = parser.parse_args()
166 | main(args)
--------------------------------------------------------------------------------
/darts/run_experiments.sh:
--------------------------------------------------------------------------------
1 |
2 | param_str=test
3 |
4 | # set parameters based on the param string
5 | if [ $param_str = test ]; then
6 | num_init=2
7 | start_iteration=0
8 | end_iteration=10
9 | epochs=1
10 | experiment_name=ls_darts_test
11 | fi
12 | if [ $param_str = twentyfive ]; then
13 | num_init=10
14 | start_iteration=0
15 | end_iteration=200
16 | epochs=25
17 | experiment_name=ls_darts_twentyfive
18 | fi
19 | if [ $param_str = fifty ]; then
20 | num_init=10
21 | start_iteration=0
22 | end_iteration=100
23 | epochs=50
24 | experiment_name=ls_darts_fifty
25 | fi
26 |
27 | for query in $(seq $start_iteration $end_iteration)
28 | do
29 |
30 | echo about to run local search round $query
31 | python darts/local_search_runner.py --experiment_name $experiment_name \
32 | --query $query --num_init $num_init
33 |
34 | untrained_filepath=$experiment_name/untrained_spec\_$query.pkl
35 | trained_filepath=$experiment_name/trained_spec\_$query.pkl
36 |
37 | echo about to train architecture $query
38 |
39 | python train_arch_runner.py --untrained_filepath $untrained_filepath \
40 | --trained_filepath $trained_filepath --epochs $epochs >> training.out
41 |
42 | echo finished iteration $query
43 | done
44 |
45 |
--------------------------------------------------------------------------------
/data.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pickle
3 | import sys
4 | import os
5 |
6 | if 'search_space' not in os.environ or os.environ['search_space'] == 'nasbench':
7 | from nasbench import api
8 | from nas_bench.cell import Cell
9 |
10 | elif os.environ['search_space'] == 'darts':
11 | from darts.arch import Arch
12 |
13 | elif os.environ['search_space'][:12] == 'nasbench_201':
14 | from nas_201_api import NASBench201API as API
15 | from nas_bench_201.cell import Cell
16 |
17 | else:
18 | print('Invalid search space environ in data.py')
19 | sys.exit()
20 |
21 |
22 | class Data:
23 |
24 | def __init__(self,
25 | search_space,
26 | dataset='cifar10',
27 | nasbench_folder='./',
28 | loaded_nasbench=None):
29 | self.search_space = search_space
30 | self.dataset = dataset
31 |
32 | if loaded_nasbench:
33 | self.nasbench = loaded_nasbench
34 | elif search_space == 'nasbench':
35 | self.nasbench = api.NASBench(nasbench_folder + 'nasbench_only108.tfrecord')
36 | elif search_space == 'nasbench_201':
37 | self.nasbench = API(os.path.expanduser('~/nas-bench-201/NAS-Bench-201-v1_0-e61699.pth'))
38 | elif search_space != 'darts':
39 | print(search_space, 'is not a valid search space')
40 | sys.exit()
41 |
42 | def get_type(self):
43 | return self.search_space
44 |
45 | def query_arch(self,
46 | arch=None,
47 | train=True,
48 | encoding_type='path',
49 | cutoff=-1,
50 | deterministic=True,
51 | epochs=0):
52 |
53 | arch_dict = {}
54 | arch_dict['epochs'] = epochs
55 | if self.search_space in ['nasbench', 'nasbench_201']:
56 | if arch is None:
57 | arch = Cell.random_cell(self.nasbench)
58 |
59 | arch_dict['spec'] = arch
60 |
61 | if encoding_type == 'adj':
62 | encoding = Cell(**arch).encode_standard()
63 | elif encoding_type == 'path':
64 | encoding = Cell(**arch).encode_paths()
65 | elif encoding_type == 'trunc_path':
66 | encoding = Cell(**arch).encode_paths()[:cutoff]
67 | else:
68 | print('invalid encoding type')
69 |
70 | arch_dict['encoding'] = encoding
71 |
72 | if train:
73 | arch_dict['val_loss'] = Cell(**arch).get_val_loss(self.nasbench,
74 | deterministic=deterministic,
75 | dataset=self.dataset)
76 | arch_dict['test_loss'] = Cell(**arch).get_test_loss(self.nasbench,
77 | dataset=self.dataset)
78 | arch_dict['num_params'] = Cell(**arch).get_num_params(self.nasbench)
79 | arch_dict['val_per_param'] = (arch_dict['val_loss'] - 4.8) * (arch_dict['num_params'] ** 0.5) / 100
80 |
81 | else:
82 | if arch is None:
83 | arch = Arch.random_arch()
84 |
85 | arch_dict['spec'] = arch
86 |
87 | if encoding_type == 'path':
88 | encoding = Arch(arch).encode_paths()
89 | elif encoding_type == 'trunc_path':
90 | encoding = Arch(arch).encode_paths()[:cutoff]
91 | else:
92 | encoding = arch
93 |
94 | arch_dict['encoding'] = encoding
95 |
96 | if train:
97 | if epochs == 0:
98 | epochs = 50
99 | arch_dict['val_loss'], arch_dict['test_loss'] = Arch(arch).query(epochs=epochs)
100 |
101 | return arch_dict
102 |
103 | def mutate_arch(self,
104 | arch,
105 | mutation_rate=1.0):
106 | if self.search_space in ['nasbench', 'nasbench_201']:
107 | return Cell(**arch).mutate(self.nasbench,
108 | mutation_rate=mutation_rate)
109 | else:
110 | return Arch(arch).mutate(int(mutation_rate))
111 |
112 | def get_hash(self, arch):
113 | # return the path indices of the architecture, used as a hash
114 | if self.search_space == 'nasbench':
115 | return Cell(**arch).get_path_indices()
116 | elif self.search_space == 'darts':
117 | return Arch(arch).get_path_indices()[0]
118 | else:
119 | return Cell(**arch).get_string()
120 |
121 | def generate_random_dataset(self,
122 | num=10,
123 | train=True,
124 | encoding_type='path',
125 | cutoff=-1,
126 | random='standard',
127 | allow_isomorphisms=False,
128 | deterministic_loss=True,
129 | patience_factor=5):
130 | """
131 | create a dataset of randomly sampled architectues
132 | test for isomorphisms using a hash map of path indices
133 | use patience_factor to avoid infinite loops
134 | """
135 | data = []
136 | dic = {}
137 | tries_left = num * patience_factor
138 | while len(data) < num:
139 | tries_left -= 1
140 | if tries_left <= 0:
141 | break
142 | arch_dict = self.query_arch(train=train,
143 | encoding_type=encoding_type,
144 | cutoff=cutoff,
145 | deterministic=deterministic_loss)
146 |
147 | h = self.get_hash(arch_dict['spec'])
148 | if allow_isomorphisms or h not in dic:
149 | dic[h] = 1
150 | data.append(arch_dict)
151 |
152 | return data
153 |
154 | def get_candidates(self,
155 | data,
156 | num=100,
157 | acq_opt_type='mutation',
158 | encoding_type='path',
159 | cutoff=-1,
160 | loss='val_loss',
161 | patience_factor=5,
162 | deterministic_loss=True,
163 | num_arches_to_mutate=1,
164 | max_mutation_rate=1,
165 | allow_isomorphisms=False):
166 | """
167 | Creates a set of candidate architectures with mutated and/or random architectures
168 | """
169 |
170 | candidates = []
171 | # set up hash map
172 | dic = {}
173 | for d in data:
174 | arch = d['spec']
175 | h = self.get_hash(arch)
176 | dic[h] = 1
177 |
178 | if acq_opt_type in ['mutation', 'mutation_random']:
179 | # mutate architectures with the lowest loss
180 | best_arches = [arch['spec'] for arch in sorted(data, key=lambda i:i[loss])[:num_arches_to_mutate * patience_factor]]
181 |
182 | # stop when candidates is size num
183 | # use patience_factor instead of a while loop to avoid long or infinite runtime
184 | for arch in best_arches:
185 | if len(candidates) >= num:
186 | break
187 | for i in range(num // num_arches_to_mutate // max_mutation_rate):
188 | for rate in range(1, max_mutation_rate + 1):
189 | mutated = self.mutate_arch(arch, mutation_rate=rate)
190 | arch_dict = self.query_arch(mutated,
191 | train=False,
192 | encoding_type=encoding_type,
193 | cutoff=cutoff)
194 | h = self.get_hash(mutated)
195 |
196 | if allow_isomorphisms or h not in dic:
197 | dic[h] = 1
198 | candidates.append(arch_dict)
199 |
200 | if acq_opt_type in ['random', 'mutation_random']:
201 | # add randomly sampled architectures to the set of candidates
202 | for _ in range(num * patience_factor):
203 | if len(candidates) >= 2 * num:
204 | break
205 |
206 | arch_dict = self.query_arch(train=False,
207 | encoding_type=encoding_type,
208 | cutoff=cutoff)
209 | h = self.get_hash(arch_dict['spec'])
210 |
211 | if allow_isomorphisms or h not in dic:
212 | dic[h] = 1
213 | candidates.append(arch_dict)
214 |
215 | return candidates
216 |
217 | def remove_duplicates(self, candidates, data):
218 | # input: two sets of architectues: candidates and data
219 | # output: candidates with arches from data removed
220 |
221 | dic = {}
222 | for d in data:
223 | dic[self.get_hash(d['spec'])] = 1
224 | unduplicated = []
225 | for candidate in candidates:
226 | if self.get_hash(candidate['spec']) not in dic:
227 | dic[self.get_hash(candidate['spec'])] = 1
228 | unduplicated.append(candidate)
229 | return unduplicated
230 |
231 | def encode_data(self, dicts):
232 | """
233 | method used by metann_runner.py (for Arch)
234 | input: list of arch dictionary objects
235 | output: xtrain (encoded architectures), ytrain (val loss)
236 | """
237 | data = []
238 |
239 | for dic in dicts:
240 | arch = dic['spec']
241 | encoding = Arch(arch).encode_paths()
242 | data.append((arch, encoding, dic['val_loss_avg'], None))
243 |
244 | return data
245 |
246 | def get_arch_list(self,
247 | aux_file_path,
248 | iteridx=0,
249 | num_top_arches=5,
250 | max_edits=20,
251 | num_repeats=5,
252 | verbose=1):
253 | # Method used for gp_bayesopt
254 |
255 | if self.search_space == 'darts':
256 | print('get_arch_list only supported for nasbench and nasbench_201')
257 | sys.exit()
258 |
259 | # load the list of architectures chosen by bayesopt so far
260 | base_arch_list = pickle.load(open(aux_file_path, 'rb'))
261 | top_arches = [archtuple[0] for archtuple in base_arch_list[:num_top_arches]]
262 | if verbose:
263 | top_5_loss = [archtuple[1][0] for archtuple in base_arch_list[:min(5, len(base_arch_list))]]
264 | print('top 5 val losses {}'.format(top_5_loss))
265 |
266 | # perturb the best k architectures
267 | dic = {}
268 | for archtuple in base_arch_list:
269 | path_indices = Cell(**archtuple[0]).get_path_indices()
270 | dic[path_indices] = 1
271 |
272 | new_arch_list = []
273 | for arch in top_arches:
274 | for edits in range(1, max_edits):
275 | for _ in range(num_repeats):
276 | perturbation = Cell(**arch).perturb(self.nasbench, edits)
277 | path_indices = Cell(**perturbation).get_path_indices()
278 | if path_indices not in dic:
279 | dic[path_indices] = 1
280 | new_arch_list.append(perturbation)
281 |
282 | # make sure new_arch_list is not empty
283 | while len(new_arch_list) == 0:
284 | for _ in range(100):
285 | arch = Cell.random_cell(self.nasbench)
286 | path_indices = Cell(**arch).get_path_indices()
287 | if path_indices not in dic:
288 | dic[path_indices] = 1
289 | new_arch_list.append(arch)
290 |
291 | return new_arch_list
292 |
293 | @classmethod
294 | def generate_distance_matrix(cls, arches_1, arches_2, distance):
295 | # Method used for gp_bayesopt for nasbench
296 | matrix = np.zeros([len(arches_1), len(arches_2)])
297 | for i, arch_1 in enumerate(arches_1):
298 | for j, arch_2 in enumerate(arches_2):
299 | if distance == 'edit_distance':
300 | matrix[i][j] = Cell(**arch_1).edit_distance(Cell(**arch_2))
301 | elif distance == 'path_distance':
302 | matrix[i][j] = Cell(**arch_1).path_distance(Cell(**arch_2))
303 | elif distance == 'trunc_path_distance':
304 | matrix[i][j] = Cell(**arch_1).path_distance(Cell(**arch_2))
305 | elif distance == 'nasbot_distance':
306 | matrix[i][j] = Cell(**arch_1).nasbot_distance(Cell(**arch_2))
307 | else:
308 | print('{} is an invalid distance'.format(distance))
309 | sys.exit()
310 | return matrix
311 |
312 | def get_nbhd(self, arch):
313 | if self.search_space in ['nasbench', 'nasbench_201']:
314 | return Cell(**arch).get_neighborhood(self.nasbench)
315 | else:
316 | return Arch(arch).get_neighborhood()
317 |
--------------------------------------------------------------------------------
/img/local_search_fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/local_search_fig.png
--------------------------------------------------------------------------------
/img/ls_101_titled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/ls_101_titled.png
--------------------------------------------------------------------------------
/img/ls_baselines_101.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/ls_baselines_101.png
--------------------------------------------------------------------------------
/img/ls_cifar10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/ls_cifar10.png
--------------------------------------------------------------------------------
/img/ls_cifar100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/ls_cifar100.png
--------------------------------------------------------------------------------
/img/ls_cifar10_titled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/ls_cifar10_titled.png
--------------------------------------------------------------------------------
/img/ls_imagenet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/ls_imagenet.png
--------------------------------------------------------------------------------
/img/real_synth_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/real_synth_data.png
--------------------------------------------------------------------------------
/img/structured.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/structured.png
--------------------------------------------------------------------------------
/img/uniform_preimages.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/uniform_preimages.png
--------------------------------------------------------------------------------
/img/unstructured.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/realityengines/local_search/24d764704e3967bc97464656f4a7daf44a1c811c/img/unstructured.png
--------------------------------------------------------------------------------
/meta_neural_net.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import itertools
3 | import os
4 | import random
5 | import sys
6 |
7 | import numpy as np
8 | from matplotlib import pyplot as plt
9 | from tensorflow import keras
10 | import tensorflow as tf
11 | from tensorflow.keras import backend as K
12 | from tensorflow.keras.models import Sequential
13 | from tensorflow.keras.optimizers import Adam
14 |
15 | def mle_loss(y_true, y_pred):
16 | # Minimum likelihood estimate loss function
17 | mean = tf.slice(y_pred, [0, 0], [-1, 1])
18 | var = tf.slice(y_pred, [0, 1], [-1, 1])
19 | return 0.5 * tf.log(2*np.pi*var) + tf.square(y_true - mean) / (2*var)
20 |
21 |
22 | def mape_loss(y_true, y_pred):
23 | # Minimum absolute percentage error loss function
24 | lower_bound = 4.5
25 | fraction = tf.math.divide(tf.subtract(y_pred, lower_bound), \
26 | tf.subtract(y_true, lower_bound))
27 | return tf.abs(tf.subtract(fraction, 1))
28 |
29 |
30 | class MetaNeuralnet:
31 |
32 | def get_dense_model(self,
33 | input_dims,
34 | num_layers,
35 | layer_width,
36 | loss,
37 | regularization):
38 | input_layer = keras.layers.Input(input_dims)
39 | model = keras.models.Sequential()
40 |
41 | for _ in range(num_layers):
42 | model.add(keras.layers.Dense(layer_width, activation='relu'))
43 |
44 | model = model(input_layer)
45 | if loss == 'mle':
46 | mean = keras.layers.Dense(1)(model)
47 | var = keras.layers.Dense(1)(model)
48 | var = keras.layers.Activation(tf.math.softplus)(var)
49 | output = keras.layers.concatenate([mean, var])
50 | else:
51 | if regularization == 0:
52 | output = keras.layers.Dense(1)(model)
53 | else:
54 | reg = keras.regularizers.l1(regularization)
55 | output = keras.layers.Dense(1, kernel_regularizer=reg)(model)
56 |
57 | dense_net = keras.models.Model(inputs=input_layer, outputs=output)
58 | return dense_net
59 |
60 | def fit(self, xtrain, ytrain,
61 | num_layers=10,
62 | layer_width=20,
63 | loss='mae',
64 | epochs=200,
65 | batch_size=32,
66 | lr=.01,
67 | verbose=0,
68 | regularization=0,
69 | **kwargs):
70 |
71 | if loss == 'mle':
72 | loss_fn = mle_loss
73 | elif loss == 'mape':
74 | loss_fn = mape_loss
75 | else:
76 | loss_fn = 'mae'
77 |
78 | self.model = self.get_dense_model((xtrain.shape[1],),
79 | loss=loss_fn,
80 | num_layers=num_layers,
81 | layer_width=layer_width,
82 | regularization=regularization)
83 | optimizer = keras.optimizers.Adam(lr=lr, beta_1=.9, beta_2=.99)
84 |
85 | self.model.compile(optimizer=optimizer, loss=loss_fn)
86 | #print(self.model.summary())
87 | self.model.fit(xtrain, ytrain,
88 | batch_size=batch_size,
89 | epochs=epochs,
90 | verbose=verbose)
91 |
92 | train_pred = np.squeeze(self.model.predict(xtrain))
93 | train_error = np.mean(abs(train_pred-ytrain))
94 | return train_error
95 |
96 | def predict(self, xtest):
97 | return self.model.predict(xtest)
98 |
--------------------------------------------------------------------------------
/meta_neuralnet.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Train a Meta Neural Network on NASBench\n",
8 | "## Predict the accuracy of neural networks to within one percent!"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "%load_ext autoreload\n",
18 | "%autoreload 2\n",
19 | "%matplotlib inline"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": null,
25 | "metadata": {},
26 | "outputs": [],
27 | "source": [
28 | "import numpy as np\n",
29 | "from matplotlib import pyplot as plt\n",
30 | "from nasbench import api\n",
31 | "\n",
32 | "from data import Data\n",
33 | "from meta_neural_net import MetaNeuralnet"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "# define a function to plot the meta neural networks\n",
43 | "\n",
44 | "def plot_meta_neuralnet(ytrain, train_pred, ytest, test_pred, max_disp=500, title=None):\n",
45 | " \n",
46 | " plt.scatter(ytrain[:max_disp], train_pred[:max_disp], label='training data', alpha=0.7, s=64)\n",
47 | " plt.scatter(ytest[:max_disp], test_pred[:max_disp], label = 'test data', alpha=0.7, marker='^')\n",
48 | "\n",
49 | " # axis limits\n",
50 | " plt.xlim((5, 15))\n",
51 | " plt.ylim((5, 15))\n",
52 | " ax_lim = np.array([np.min([plt.xlim()[0], plt.ylim()[0]]),\n",
53 | " np.max([plt.xlim()[1], plt.ylim()[1]])])\n",
54 | " plt.xlim(ax_lim)\n",
55 | " plt.ylim(ax_lim)\n",
56 | " \n",
57 | " # 45-degree line\n",
58 | " plt.plot(ax_lim, ax_lim, 'k:') \n",
59 | " \n",
60 | " plt.gca().set_aspect('equal', adjustable='box')\n",
61 | " plt.title(title)\n",
62 | " plt.legend(loc='best')\n",
63 | " plt.xlabel('true percent error')\n",
64 | " plt.ylabel('predicted percent error')\n",
65 | " plt.show()"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": null,
71 | "metadata": {
72 | "scrolled": false
73 | },
74 | "outputs": [],
75 | "source": [
76 | "# load the NASBench dataset\n",
77 | "# takes about 1 minute to load the nasbench dataset\n",
78 | "search_space = Data('nasbench')\n"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": null,
84 | "metadata": {},
85 | "outputs": [],
86 | "source": [
87 | "# method which runs a meta neural network experiment\n",
88 | "def meta_neuralnet_experiment(params, \n",
89 | " ns=[100, 500], \n",
90 | " num_ensemble=3, \n",
91 | " test_size=500,\n",
92 | " cutoff=40,\n",
93 | " plot=True):\n",
94 | " \n",
95 | " for n in ns:\n",
96 | " for encoding_type in ['adj', 'path']:\n",
97 | "\n",
98 | " train_data = search_space.generate_random_dataset(num=n, \n",
99 | " encoding_type=encoding_type,\n",
100 | " cutoff=cutoff)\n",
101 | " \n",
102 | " test_data = search_space.generate_random_dataset(num=test_size, \n",
103 | " encoding_type=encoding_type,\n",
104 | " cutoff=cutoff)\n",
105 | " \n",
106 | " print(len(test_data))\n",
107 | " test_data = search_space.remove_duplicates(test_data, train_data)\n",
108 | " print(len(test_data))\n",
109 | " \n",
110 | " xtrain = np.array([d['encoding'] for d in train_data])\n",
111 | " ytrain = np.array([d['val_loss'] for d in train_data])\n",
112 | "\n",
113 | " xtest = np.array([d['encoding'] for d in test_data])\n",
114 | " ytest = np.array([d['val_loss'] for d in test_data])\n",
115 | "\n",
116 | " train_errors = []\n",
117 | " test_errors = []\n",
118 | " meta_neuralnet = MetaNeuralnet()\n",
119 | " for _ in range(num_ensemble): \n",
120 | " meta_neuralnet.fit(xtrain, ytrain, **params)\n",
121 | " train_pred = np.squeeze(meta_neuralnet.predict(xtrain))\n",
122 | " train_error = np.mean(abs(train_pred-ytrain))\n",
123 | " train_errors.append(train_error)\n",
124 | " test_pred = np.squeeze(meta_neuralnet.predict(xtest)) \n",
125 | " test_error = np.mean(abs(test_pred-ytest))\n",
126 | " test_errors.append(test_error)\n",
127 | "\n",
128 | " train_error = np.round(np.mean(train_errors, axis=0), 3)\n",
129 | " test_error = np.round(np.mean(test_errors, axis=0), 3)\n",
130 | " print('Meta neuralnet training size: {}, encoding type: {}'.format(n, encoding_type))\n",
131 | " print('Train error: {}, test error: {}'.format(train_error, test_error))\n",
132 | "\n",
133 | " if plot:\n",
134 | " if encoding_type == 'path':\n",
135 | " title = 'Path encoding, training set size {}'.format(n)\n",
136 | " else:\n",
137 | " title = 'Adjacency list encoding, training set size {}'.format(n) \n",
138 | "\n",
139 | " plot_meta_neuralnet(ytrain, train_pred, ytest, test_pred, title=title)\n",
140 | " plt.show() \n",
141 | " print('correlation', np.corrcoef(ytest, test_pred)[1,0])"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": null,
147 | "metadata": {},
148 | "outputs": [],
149 | "source": [
150 | "meta_neuralnet_params = {'loss':'mae', 'num_layers':10, 'layer_width':20, 'epochs':200, \\\n",
151 | " 'batch_size':32, 'lr':.01, 'regularization':0, 'verbose':0}\n"
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": null,
157 | "metadata": {
158 | "scrolled": false
159 | },
160 | "outputs": [],
161 | "source": [
162 | "meta_neuralnet_experiment(meta_neuralnet_params)"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": null,
168 | "metadata": {},
169 | "outputs": [],
170 | "source": []
171 | }
172 | ],
173 | "metadata": {
174 | "kernelspec": {
175 | "display_name": "Python 3",
176 | "language": "python",
177 | "name": "python3"
178 | },
179 | "language_info": {
180 | "codemirror_mode": {
181 | "name": "ipython",
182 | "version": 3
183 | },
184 | "file_extension": ".py",
185 | "mimetype": "text/x-python",
186 | "name": "python",
187 | "nbconvert_exporter": "python",
188 | "pygments_lexer": "ipython3",
189 | "version": "3.7.7"
190 | }
191 | },
192 | "nbformat": 4,
193 | "nbformat_minor": 2
194 | }
195 |
--------------------------------------------------------------------------------
/metann_runner.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import time
3 | import logging
4 | import sys
5 | import os
6 | import pickle
7 | import numpy as np
8 |
9 | from acquisition_functions import acq_fn
10 | from data import Data
11 | from meta_neural_net import MetaNeuralnet
12 |
13 |
14 | """
15 | meta neural net runner is used in run_experiments_parallel
16 |
17 | - loads data by opening k*i pickle files from previous iterations
18 | - trains a meta neural network and predicts accuracy of all candidates
19 | - outputs k pickle files of the architecture to be trained next
20 | """
21 |
22 | def run_meta_neuralnet(search_space, dicts,
23 | k=10,
24 | verbose=1,
25 | num_ensemble=5,
26 | epochs=10000,
27 | lr=0.00001,
28 | loss='scaled',
29 | explore_type='its',
30 | explore_factor=0.5):
31 |
32 | # data: list of arch dictionary objects
33 | # trains a meta neural network
34 | # returns list of k arch dictionary objects - the k best predicted
35 |
36 | results = []
37 | meta_neuralnet = MetaNeuralnet()
38 | data = search_space.encode_data(dicts)
39 | xtrain = np.array([d[1] for d in data])
40 | ytrain = np.array([d[2] for d in data])
41 |
42 | candidates = search_space.get_candidates(data,
43 | acq_opt_type='mutation_random',
44 | encode_paths=True,
45 | allow_isomorphisms=True,
46 | deterministic_loss=None)
47 |
48 | xcandidates = np.array([c[1] for c in candidates])
49 | candidates_specs = [c[0] for c in candidates]
50 | predictions = []
51 |
52 | # train an ensemble of neural networks
53 | train_error = 0
54 | for _ in range(num_ensemble):
55 | meta_neuralnet = MetaNeuralnet()
56 | train_error += meta_neuralnet.fit(xtrain, ytrain,
57 | loss=loss,
58 | epochs=epochs,
59 | lr=lr)
60 | predictions.append(np.squeeze(meta_neuralnet.predict(xcandidates)))
61 | train_error /= num_ensemble
62 | if verbose:
63 | print('Meta neural net train error: {}'.format(train_error))
64 |
65 | sorted_indices = acq_fn(predictions, explore_type)
66 |
67 | top_k_candidates = [candidates_specs[i] for i in sorted_indices[:k]]
68 | candidates_dict = []
69 | for candidate in top_k_candidates:
70 | d = {}
71 | d['spec'] = candidate
72 | candidates_dict.append(d)
73 |
74 | return candidates_dict
75 |
76 |
77 | def run(args):
78 |
79 | save_dir = '{}/'.format(args.experiment_name)
80 | if not os.path.exists(save_dir):
81 | os.mkdir(save_dir)
82 |
83 | query = args.query
84 | k = args.k
85 | trained_prefix = args.trained_filename
86 | untrained_prefix = args.untrained_filename
87 | threshold = args.threshold
88 |
89 | search_space = Data('darts')
90 |
91 | # if it's the first iteration, choose k arches at random to train
92 | if query == 0:
93 | print('about to generate {} random'.format(k))
94 | data = search_space.generate_random_dataset(num=k, train=False)
95 | arches = [d['spec'] for d in data]
96 |
97 | next_arches = []
98 | for arch in arches:
99 | d = {}
100 | d['spec'] = arch
101 | next_arches.append(d)
102 |
103 | else:
104 | # get the data from prior iterations from pickle files
105 | data = []
106 | for i in range(query):
107 |
108 | filepath = '{}{}_{}.pkl'.format(save_dir, trained_prefix, i)
109 | with open(filepath, 'rb') as f:
110 | arch = pickle.load(f)
111 | data.append(arch)
112 |
113 | print('Iteration {}'.format(query))
114 | print('Data from last round')
115 | print(data)
116 |
117 | # run the meta neural net to output the next arches
118 | next_arches = run_meta_neuralnet(search_space, data, k=k)
119 |
120 | print('next batch')
121 | print(next_arches)
122 |
123 | # output the new arches to pickle files
124 | for i in range(k):
125 | index = query + i
126 | filepath = '{}{}_{}.pkl'.format(save_dir, untrained_prefix, index)
127 | next_arches[i]['index'] = index
128 | next_arches[i]['filepath'] = filepath
129 | with open(filepath, 'wb') as f:
130 | pickle.dump(next_arches[i], f)
131 |
132 |
133 | def main(args):
134 |
135 | #set up save dir
136 | save_dir = './'
137 |
138 | #set up logging
139 | log_format = '%(asctime)s %(message)s'
140 | logging.basicConfig(stream=sys.stdout, level=logging.INFO,
141 | format=log_format, datefmt='%m/%d %I:%M:%S %p')
142 | fh = logging.FileHandler(os.path.join(save_dir, 'log.txt'))
143 | fh.setFormatter(logging.Formatter(log_format))
144 | logging.getLogger().addHandler(fh)
145 | logging.info(args)
146 |
147 | run(args)
148 |
149 | if __name__ == "__main__":
150 | parser = argparse.ArgumentParser(description='Args for meta neural net')
151 | parser.add_argument('--experiment_name', type=str, default='darts_test', help='Folder for input/output files')
152 | parser.add_argument('--params', type=str, default='test', help='Which set of params to use')
153 | parser.add_argument('--query', type=int, default=0, help='Which query is Neural BayesOpt on')
154 | parser.add_argument('--trained_filename', type=str, default='trained_spec', help='name of input files')
155 | parser.add_argument('--untrained_filename', type=str, default='untrained_spec', help='name of output files')
156 | parser.add_argument('--k', type=int, default=10, help='number of arches to train per iteration')
157 | parser.add_argument('--threshold', type=int, default=20, help='throw out arches with val loss above threshold')
158 |
159 | args = parser.parse_args()
160 | main(args)
--------------------------------------------------------------------------------
/nas_algorithms.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | import os
3 | import pickle
4 | import sys
5 | import copy
6 | import numpy as np
7 | import tensorflow as tf
8 | from argparse import Namespace
9 |
10 | from data import Data
11 |
12 |
13 | def run_nas_algorithm(algo_params, search_space, mp):
14 |
15 | # run nas algorithm
16 | ps = copy.deepcopy(algo_params)
17 | algo_name = ps.pop('algo_name')
18 |
19 | if algo_name == 'random':
20 | data = random_search(search_space, **ps)
21 | elif algo_name == 'evolution':
22 | data = evolution_search(search_space, **ps)
23 | elif algo_name == 'bananas':
24 | data = bananas(search_space, mp, **ps)
25 | elif algo_name == 'gp_bayesopt':
26 | data = gp_bayesopt_search(search_space, **ps)
27 | elif algo_name == 'dngo':
28 | data = dngo_search(search_space, **ps)
29 | elif algo_name == 'local_search':
30 | data = local_search(search_space, **ps)
31 | else:
32 | print('invalid algorithm name')
33 | sys.exit()
34 |
35 | k = 10
36 | if 'k' in ps:
37 | k = ps['k']
38 | total_queries = 150
39 | if 'total_queries' in ps:
40 | total_queries = ps['total_queries']
41 | loss = 'val_loss'
42 | if 'loss' in ps:
43 | loss = ps['loss']
44 |
45 | return compute_best_test_losses(data, k, total_queries, loss), data
46 |
47 |
48 | def compute_best_test_losses(data, k, total_queries, loss):
49 | """
50 | Given full data from a completed nas algorithm,
51 | output the test error of the arch with the best val error
52 | after every multiple of k
53 | """
54 | results = []
55 | for query in range(k, total_queries + k, k):
56 | best_arch = sorted(data[:query], key=lambda i:i[loss])[0]
57 | test_error = best_arch['test_loss']
58 | results.append((query, test_error))
59 |
60 | return results
61 |
62 |
63 | def local_search(search_space,
64 | num_init=10,
65 | k=10,
66 | loss='val_loss',
67 | query_full_nbhd=True,
68 | stop_at_minimum=True,
69 | total_queries=500,
70 | deterministic=True,
71 | verbose=1):
72 | """
73 | local search
74 | """
75 | query_dict = {}
76 | iter_dict = {}
77 | data = []
78 | query = 0
79 |
80 | while True:
81 | # loop over full runs of local search until we hit total_queries
82 |
83 | arch_dicts = []
84 | while len(arch_dicts) < num_init:
85 | arch_dict = search_space.query_arch(deterministic=deterministic)
86 |
87 | if search_space.get_hash(arch_dict['spec']) not in query_dict:
88 | query_dict[search_space.get_hash(arch_dict['spec'])] = 1
89 | data.append(arch_dict)
90 | arch_dicts.append(arch_dict)
91 | query += 1
92 | if query >= total_queries:
93 | return data
94 |
95 | sorted_arches = sorted([(arch, arch[loss]) for arch in arch_dicts], key=lambda i:i[1])
96 | arch_dict = sorted_arches[0][0]
97 |
98 | while True:
99 | # loop over iterations of local search until we hit a local minimum
100 | if verbose:
101 | print('starting iteration, query', query)
102 | iter_dict[search_space.get_hash(arch_dict['spec'])] = 1
103 | nbhd = search_space.get_nbhd(arch_dict['spec'])
104 | improvement = False
105 | nbhd_dicts = []
106 | for nbr in nbhd:
107 | if search_space.get_hash(nbr) not in query_dict:
108 | query_dict[search_space.get_hash(nbr)] = 1
109 | nbr_dict = search_space.query_arch(nbr, deterministic=deterministic)
110 | data.append(nbr_dict)
111 | nbhd_dicts.append(nbr_dict)
112 | query += 1
113 | if query >= total_queries:
114 | return data
115 | if nbr_dict[loss] < arch_dict[loss]:
116 | improvement = True
117 | if not query_full_nbhd:
118 | arch_dict = nbr_dict
119 | break
120 |
121 | if not stop_at_minimum:
122 | sorted_data = sorted([(arch, arch[loss]) for arch in data], key=lambda i:i[1])
123 | index = 0
124 | while search_space.get_hash(sorted_data[index][0]['spec']) in iter_dict:
125 | index += 1
126 |
127 | arch_dict = sorted_data[index][0]
128 |
129 | elif not improvement:
130 | break
131 |
132 | else:
133 | sorted_nbhd = sorted([(nbr, nbr[loss]) for nbr in nbhd_dicts], key=lambda i:i[1])
134 | arch_dict = sorted_nbhd[0][0]
135 |
136 | if verbose:
137 | top_5_loss = sorted([d[loss] for d in data])[:min(5, len(data))]
138 | print('local_search, query {}, top 5 losses {}'.format(query, top_5_loss))
139 |
140 |
141 | def random_search(search_space,
142 | total_queries=150,
143 | loss='val_loss',
144 | deterministic=True,
145 | verbose=1):
146 | """
147 | random search
148 | """
149 | data = search_space.generate_random_dataset(num=total_queries,
150 | encoding_type='adj',
151 | deterministic_loss=deterministic)
152 |
153 | if verbose:
154 | top_5_loss = sorted([d[loss] for d in data])[:min(5, len(data))]
155 | print('random, query {}, top 5 losses {}'.format(total_queries, top_5_loss))
156 | return data
157 |
158 |
159 | def evolution_search(search_space,
160 | total_queries=150,
161 | num_init=10,
162 | k=10,
163 | loss='val_loss',
164 | population_size=30,
165 | tournament_size=10,
166 | mutation_rate=1.0,
167 | deterministic=True,
168 | regularize=True,
169 | verbose=1):
170 | """
171 | regularized evolution
172 | """
173 | data = search_space.generate_random_dataset(num=num_init,
174 | deterministic_loss=deterministic)
175 |
176 | losses = [d[loss] for d in data]
177 | query = num_init
178 | population = [i for i in range(min(num_init, population_size))]
179 |
180 | while query <= total_queries:
181 |
182 | # evolve the population by mutating the best architecture
183 | # from a random subset of the population
184 | sample = np.random.choice(population, tournament_size)
185 | best_index = sorted([(i, losses[i]) for i in sample], key=lambda i:i[1])[0][0]
186 | mutated = search_space.mutate_arch(data[best_index]['spec'],
187 | mutation_rate=mutation_rate)
188 | arch_dict = search_space.query_arch(mutated, deterministic=deterministic)
189 | data.append(arch_dict)
190 | losses.append(arch_dict[loss])
191 | population.append(len(data) - 1)
192 |
193 | # kill the oldest (or worst) from the population
194 | if len(population) >= population_size:
195 | if regularize:
196 | oldest_index = sorted([i for i in population])[0]
197 | population.remove(oldest_index)
198 | else:
199 | worst_index = sorted([(i, losses[i]) for i in population], key=lambda i:i[1])[-1][0]
200 | population.remove(worst_index)
201 |
202 | if verbose and (query % k == 0):
203 | top_5_loss = sorted([d[loss] for d in data])[:min(5, len(data))]
204 | print('evolution, query {}, top 5 losses {}'.format(query, top_5_loss))
205 |
206 | query += 1
207 |
208 | return data
209 |
210 |
211 | def bananas(search_space,
212 | metann_params,
213 | num_init=10,
214 | k=10,
215 | loss='val_loss',
216 | total_queries=150,
217 | num_ensemble=5,
218 | acq_opt_type='mutation',
219 | num_arches_to_mutate=1,
220 | explore_type='its',
221 | encoding_type='trunc_path',
222 | cutoff=40,
223 | deterministic=True,
224 | verbose=1):
225 | """
226 | Bayesian optimization with a neural network model
227 | """
228 | from acquisition_functions import acq_fn
229 | from meta_neural_net import MetaNeuralnet
230 |
231 | data = search_space.generate_random_dataset(num=num_init,
232 | encoding_type=encoding_type,
233 | cutoff=cutoff,
234 | deterministic_loss=deterministic)
235 |
236 | query = num_init + k
237 |
238 | while query <= total_queries:
239 |
240 | xtrain = np.array([d['encoding'] for d in data])
241 | ytrain = np.array([d[loss] for d in data])
242 |
243 | if (query == num_init + k) and verbose:
244 | print('bananas xtrain shape', xtrain.shape)
245 | print('bananas ytrain shape', ytrain.shape)
246 |
247 | # get a set of candidate architectures
248 | candidates = search_space.get_candidates(data,
249 | acq_opt_type=acq_opt_type,
250 | encoding_type=encoding_type,
251 | cutoff=cutoff,
252 | num_arches_to_mutate=num_arches_to_mutate,
253 | loss=loss,
254 | deterministic_loss=deterministic)
255 |
256 | xcandidates = np.array([c['encoding'] for c in candidates])
257 | candidate_predictions = []
258 |
259 | # train an ensemble of neural networks
260 | train_error = 0
261 | for _ in range(num_ensemble):
262 | meta_neuralnet = MetaNeuralnet()
263 | train_error += meta_neuralnet.fit(xtrain, ytrain, **metann_params)
264 |
265 | # predict the validation loss of the candidate architectures
266 | candidate_predictions.append(np.squeeze(meta_neuralnet.predict(xcandidates)))
267 |
268 | # clear the tensorflow graph
269 | tf.reset_default_graph()
270 |
271 | tf.keras.backend.clear_session()
272 |
273 | train_error /= num_ensemble
274 | if verbose:
275 | print('query {}, Meta neural net train error: {}'.format(query, train_error))
276 |
277 | # compute the acquisition function for all the candidate architectures
278 | candidate_indices = acq_fn(candidate_predictions, explore_type)
279 |
280 | # add the k arches with the minimum acquisition function values
281 | for i in candidate_indices[:k]:
282 |
283 | arch_dict = search_space.query_arch(candidates[i]['spec'],
284 | encoding_type=encoding_type,
285 | cutoff=cutoff,
286 | deterministic=deterministic)
287 | data.append(arch_dict)
288 |
289 | if verbose:
290 | top_5_loss = sorted([(d[loss], d['epochs']) for d in data], key=lambda d: d[0])[:min(5, len(data))]
291 | print('bananas, query {}, top 5 losses (loss, test, epoch): {}'.format(query, top_5_loss))
292 | recent_10_loss = [(d[loss], d['epochs']) for d in data[-10:]]
293 | print('bananas, query {}, most recent 10 (loss, test, epoch): {}'.format(query, recent_10_loss))
294 |
295 | query += k
296 |
297 | return data
298 |
299 |
300 | def gp_bayesopt_search(search_space,
301 | num_init=10,
302 | k=10,
303 | total_queries=150,
304 | distance='edit_distance',
305 | deterministic=True,
306 | tmpdir='./temp',
307 | max_iter=200,
308 | mode='single_process',
309 | nppred=1000):
310 | """
311 | Bayesian optimization with a GP prior
312 | """
313 | from bo.bo.probo import ProBO
314 |
315 | # set up the path for auxiliary pickle files
316 | if not os.path.exists(tmpdir):
317 | os.mkdir(tmpdir)
318 | aux_file_path = os.path.join(tmpdir, 'aux.pkl')
319 |
320 | num_iterations = total_queries - num_init
321 |
322 | # black-box function that bayesopt will optimize
323 | def fn(arch):
324 | return search_space.query_arch(arch, deterministic=deterministic)['val_loss']
325 |
326 | # set all the parameters for the various BayesOpt classes
327 | fhp = Namespace(fhstr='object', namestr='train')
328 | domp = Namespace(dom_str='list', set_domain_list_auto=True,
329 | aux_file_path=aux_file_path,
330 | distance=distance)
331 | modelp = Namespace(kernp=Namespace(ls=3., alpha=1.5, sigma=1e-5),
332 | infp=Namespace(niter=num_iterations, nwarmup=500),
333 | distance=distance, search_space=search_space.get_type())
334 | amp = Namespace(am_str='mygpdistmat_ucb', nppred=nppred, modelp=modelp)
335 | optp = Namespace(opt_str='rand', max_iter=max_iter)
336 | makerp = Namespace(domp=domp, amp=amp, optp=optp)
337 | probop = Namespace(niter=num_iterations, fhp=fhp,
338 | makerp=makerp, tmpdir=tmpdir, mode=mode)
339 | data = Namespace()
340 |
341 | # Set up initial data
342 | init_data = search_space.generate_random_dataset(num=num_init,
343 | deterministic_loss=deterministic)
344 | data.X = [d['spec'] for d in init_data]
345 | data.y = np.array([[d['val_loss']] for d in init_data])
346 |
347 | # initialize aux file
348 | pairs = [(data.X[i], data.y[i]) for i in range(len(data.y))]
349 | pairs.sort(key=lambda x: x[1])
350 | with open(aux_file_path, 'wb') as f:
351 | pickle.dump(pairs, f)
352 |
353 | # run Bayesian Optimization
354 | bo = ProBO(fn, search_space, aux_file_path, data, probop, True)
355 | bo.run_bo()
356 |
357 | # get the validation and test loss for all architectures chosen by BayesOpt
358 | results = []
359 | for arch in data.X:
360 | archtuple = search_space.query_arch(arch)
361 | results.append(archtuple)
362 |
363 | return results
364 |
365 |
366 | def dngo_search(search_space,
367 | num_init=10,
368 | k=10,
369 | loss='val_loss',
370 | total_queries=150,
371 | encoding_type='path',
372 | cutoff=40,
373 | acq_opt_type='mutation',
374 | explore_type='ucb',
375 | deterministic=True,
376 | verbose=True):
377 |
378 | import torch
379 | from pybnn import DNGO
380 | from pybnn.util.normalization import zero_mean_unit_var_normalization, zero_mean_unit_var_denormalization
381 | from acquisition_functions import acq_fn
382 |
383 | def fn(arch):
384 | return search_space.query_arch(arch, deterministic=deterministic)[loss]
385 |
386 | # set up initial data
387 | data = search_space.generate_random_dataset(num=num_init,
388 | encoding_type=encoding_type,
389 | cutoff=cutoff,
390 | deterministic_loss=deterministic)
391 |
392 | query = num_init + k
393 |
394 | while query <= total_queries:
395 |
396 | # set up data
397 | x = np.array([d['encoding'] for d in data])
398 | y = np.array([d[loss] for d in data])
399 |
400 | # get a set of candidate architectures
401 | candidates = search_space.get_candidates(data,
402 | acq_opt_type=acq_opt_type,
403 | encoding_type=encoding_type,
404 | cutoff=cutoff,
405 | deterministic_loss=deterministic)
406 |
407 | xcandidates = np.array([d['encoding'] for d in candidates])
408 |
409 | # train the model
410 | model = DNGO(do_mcmc=False)
411 | model.train(x, y, do_optimize=True)
412 |
413 | predictions = model.predict(xcandidates)
414 | candidate_indices = acq_fn(np.array(predictions), explore_type)
415 |
416 | # add the k arches with the minimum acquisition function values
417 | for i in candidate_indices[:k]:
418 | arch_dict = search_space.query_arch(candidates[i]['spec'],
419 | encoding_type=encoding_type,
420 | cutoff=cutoff,
421 | deterministic=deterministic)
422 | data.append(arch_dict)
423 |
424 | if verbose:
425 | top_5_loss = sorted([(d[loss], d['epochs']) for d in data], key=lambda d: d[0])[:min(5, len(data))]
426 | print('dngo, query {}, top 5 val losses (val, test, epoch): {}'.format(query, top_5_loss))
427 | recent_10_loss = [(d[loss], d['epochs']) for d in data[-10:]]
428 | print('dngo, query {}, most recent 10 (val, test, epoch): {}'.format(query, recent_10_loss))
429 |
430 | query += k
431 |
432 | return data
433 |
--------------------------------------------------------------------------------
/nas_bench/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/nas_bench/cell.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import copy
3 | import itertools
4 | import random
5 | import sys
6 | import os
7 | import pickle
8 |
9 | from nasbench import api
10 |
11 |
12 | INPUT = 'input'
13 | OUTPUT = 'output'
14 | CONV3X3 = 'conv3x3-bn-relu'
15 | CONV1X1 = 'conv1x1-bn-relu'
16 | MAXPOOL3X3 = 'maxpool3x3'
17 | OPS = [CONV3X3, CONV1X1, MAXPOOL3X3]
18 |
19 | NUM_VERTICES = 7
20 | OP_SPOTS = NUM_VERTICES - 2
21 | MAX_EDGES = 9
22 |
23 |
24 | class Cell:
25 |
26 | def __init__(self, matrix, ops):
27 |
28 | self.matrix = matrix
29 | self.ops = ops
30 |
31 | def serialize(self):
32 | return {
33 | 'matrix': self.matrix,
34 | 'ops': self.ops
35 | }
36 |
37 | def modelspec(self):
38 | return api.ModelSpec(matrix=self.matrix, ops=self.ops)
39 |
40 | @classmethod
41 | def random_cell(cls, nasbench):
42 | """
43 | From the NASBench repository
44 |
45 | one-hot adjacency matrix
46 | draw [0,1] for each slot in the adjacency matrix
47 | """
48 | while True:
49 | matrix = np.random.choice(
50 | [0, 1], size=(NUM_VERTICES, NUM_VERTICES))
51 | matrix = np.triu(matrix, 1)
52 | ops = np.random.choice(OPS, size=NUM_VERTICES).tolist()
53 | ops[0] = INPUT
54 | ops[-1] = OUTPUT
55 | spec = api.ModelSpec(matrix=matrix, ops=ops)
56 | if nasbench.is_valid(spec):
57 | return {
58 | 'matrix': matrix,
59 | 'ops': ops
60 | }
61 |
62 | def get_val_loss(self, nasbench, deterministic=1, patience=50, epochs=None, dataset=None):
63 | if not deterministic:
64 | # output one of the three validation accuracies at random
65 | if epochs:
66 | return (100*(1 - nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops), epochs=epochs)['validation_accuracy']))
67 | else:
68 | return (100*(1 - nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops))['validation_accuracy']))
69 | else:
70 | # query the api until we see all three accuracies, then average them
71 | # a few architectures only have two accuracies, so we use patience to avoid an infinite loop
72 | accs = []
73 | while len(accs) < 3 and patience > 0:
74 | patience -= 1
75 | if epochs:
76 | acc = nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops), epochs=epochs)['validation_accuracy']
77 | else:
78 | acc = nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops))['validation_accuracy']
79 | if acc not in accs:
80 | accs.append(acc)
81 | return round(100*(1-np.mean(accs)), 4)
82 |
83 |
84 | def get_test_loss(self, nasbench, patience=50, epochs=None, dataset=None):
85 | """
86 | query the api until we see all three accuracies, then average them
87 | a few architectures only have two accuracies, so we use patience to avoid an infinite loop
88 | """
89 | accs = []
90 | while len(accs) < 3 and patience > 0:
91 | patience -= 1
92 | if epochs:
93 | acc = nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops), epochs=epochs)['test_accuracy']
94 | else:
95 | acc = nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops))['test_accuracy']
96 | if acc not in accs:
97 | accs.append(acc)
98 | return round(100*(1-np.mean(accs)), 4)
99 |
100 | def get_num_params(self, nasbench):
101 | return nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops))['trainable_parameters']
102 |
103 | def perturb(self, nasbench, edits=1):
104 | """
105 | create new perturbed cell
106 | inspird by https://github.com/google-research/nasbench
107 | """
108 | new_matrix = copy.deepcopy(self.matrix)
109 | new_ops = copy.deepcopy(self.ops)
110 | for _ in range(edits):
111 | while True:
112 | if np.random.random() < 0.5:
113 | for src in range(0, NUM_VERTICES - 1):
114 | for dst in range(src+1, NUM_VERTICES):
115 | new_matrix[src][dst] = 1 - new_matrix[src][dst]
116 | else:
117 | for ind in range(1, NUM_VERTICES - 1):
118 | available = [op for op in OPS if op != new_ops[ind]]
119 | new_ops[ind] = np.random.choice(available)
120 |
121 | new_spec = api.ModelSpec(new_matrix, new_ops)
122 | if nasbench.is_valid(new_spec):
123 | break
124 | return {
125 | 'matrix': new_matrix,
126 | 'ops': new_ops
127 | }
128 |
129 | def mutate(self,
130 | nasbench,
131 | mutation_rate=1.0,
132 | patience=5000):
133 | """
134 | A stochastic approach to perturbing the cell
135 | inspird by https://github.com/google-research/nasbench
136 | """
137 | p = 0
138 | while p < patience:
139 | p += 1
140 | new_matrix = copy.deepcopy(self.matrix)
141 | new_ops = copy.deepcopy(self.ops)
142 |
143 | edge_mutation_prob = mutation_rate / (NUM_VERTICES * (NUM_VERTICES - 1) / 2)
144 | # flip each edge w.p. so expected flips is 1. same for ops
145 | for src in range(0, NUM_VERTICES - 1):
146 | for dst in range(src + 1, NUM_VERTICES):
147 | if random.random() < edge_mutation_prob:
148 | new_matrix[src, dst] = 1 - new_matrix[src, dst]
149 |
150 | op_mutation_prob = mutation_rate / OP_SPOTS
151 | for ind in range(1, OP_SPOTS + 1):
152 | if random.random() < op_mutation_prob:
153 | available = [o for o in OPS if o != new_ops[ind]]
154 | new_ops[ind] = random.choice(available)
155 |
156 | new_spec = api.ModelSpec(new_matrix, new_ops)
157 | if nasbench.is_valid(new_spec):
158 | return {
159 | 'matrix': new_matrix,
160 | 'ops': new_ops
161 | }
162 | return self.mutate(nasbench, mutation_rate+1)
163 |
164 | def encode_standard(self):
165 | """
166 | compute the "standard" encoding,
167 | i.e. adjacency matrix + op list encoding
168 | """
169 | encoding_length = (NUM_VERTICES ** 2 - NUM_VERTICES) // 2 + OP_SPOTS
170 | encoding = np.zeros((encoding_length))
171 | dic = {CONV1X1: 0., CONV3X3: 0.5, MAXPOOL3X3: 1.0}
172 | n = 0
173 | for i in range(NUM_VERTICES - 1):
174 | for j in range(i+1, NUM_VERTICES):
175 | encoding[n] = self.matrix[i][j]
176 | n += 1
177 | for i in range(1, NUM_VERTICES - 1):
178 | encoding[-i] = dic[self.ops[i]]
179 | return tuple(encoding)
180 |
181 | def get_paths(self):
182 | """
183 | return all paths from input to output
184 | """
185 | paths = []
186 | for j in range(0, NUM_VERTICES):
187 | paths.append([[]]) if self.matrix[0][j] else paths.append([])
188 |
189 | # create paths sequentially
190 | for i in range(1, NUM_VERTICES - 1):
191 | for j in range(1, NUM_VERTICES):
192 | if self.matrix[i][j]:
193 | for path in paths[i]:
194 | paths[j].append([*path, self.ops[i]])
195 | return paths[-1]
196 |
197 | def get_path_indices(self):
198 | """
199 | compute the index of each path
200 | There are 3^0 + ... + 3^5 paths total.
201 | (Paths can be length 0 to 5, and for each path, for each node, there
202 | are three choices for the operation.)
203 | """
204 | paths = self.get_paths()
205 | mapping = {CONV3X3: 0, CONV1X1: 1, MAXPOOL3X3: 2}
206 | path_indices = []
207 |
208 | for path in paths:
209 | index = 0
210 | for i in range(NUM_VERTICES - 1):
211 | if i == len(path):
212 | path_indices.append(index)
213 | break
214 | else:
215 | index += len(OPS) ** i * (mapping[path[i]] + 1)
216 |
217 | path_indices.sort()
218 | return tuple(path_indices)
219 |
220 | def encode_paths(self):
221 | """ output one-hot encoding of paths """
222 | num_paths = sum([len(OPS) ** i for i in range(OP_SPOTS + 1)])
223 | path_indices = self.get_path_indices()
224 | encoding = np.zeros(num_paths)
225 | for index in path_indices:
226 | encoding[index] = 1
227 | return encoding
228 |
229 | def path_distance(self, other):
230 | """
231 | compute the distance between two architectures
232 | by comparing their path encodings
233 | """
234 | return np.sum(np.array(self.encode_paths() != np.array(other.encode_paths())))
235 |
236 | def trunc_path_distance(self, other, cutoff=40):
237 | """
238 | compute the distance between two architectures
239 | by comparing their path encodings
240 | """
241 | encoding = self.encode_paths()[:cutoff]
242 | other_encoding = other.encode_paths()[:cutoff]
243 | return np.sum(np.array(encoding) != np.array(other_encoding))
244 |
245 | def edit_distance(self, other):
246 | """
247 | compute the distance between two architectures
248 | by comparing their adjacency matrices and op lists
249 | """
250 | graph_dist = np.sum(np.array(self.matrix) != np.array(other.matrix))
251 | ops_dist = np.sum(np.array(self.ops) != np.array(other.ops))
252 | return (graph_dist + ops_dist)
253 |
254 | def nasbot_distance(self, other):
255 | # distance based on optimal transport between row sums, column sums, and ops
256 |
257 | row_sums = sorted(np.array(self.matrix).sum(axis=0))
258 | col_sums = sorted(np.array(self.matrix).sum(axis=1))
259 |
260 | other_row_sums = sorted(np.array(other.matrix).sum(axis=0))
261 | other_col_sums = sorted(np.array(other.matrix).sum(axis=1))
262 |
263 | row_dist = np.sum(np.abs(np.subtract(row_sums, other_row_sums)))
264 | col_dist = np.sum(np.abs(np.subtract(col_sums, other_col_sums)))
265 |
266 | counts = [self.ops.count(op) for op in OPS]
267 | other_counts = [other.ops.count(op) for op in OPS]
268 |
269 | ops_dist = np.sum(np.abs(np.subtract(counts, other_counts)))
270 |
271 | return (row_dist + col_dist + ops_dist)
272 |
273 | def get_utilized(self):
274 | # return the sets of utilized edges and nodes
275 | # first, compute all paths
276 | n = np.shape(self.matrix)[0]
277 | sub_paths = []
278 | for j in range(0, n):
279 | sub_paths.append([[(0, j)]]) if self.matrix[0][j] else sub_paths.append([])
280 |
281 | # create paths sequentially
282 | for i in range(1, n - 1):
283 | for j in range(1, n):
284 | if self.matrix[i][j]:
285 | for sub_path in sub_paths[i]:
286 | sub_paths[j].append([*sub_path, (i, j)])
287 | paths = sub_paths[-1]
288 |
289 | utilized_edges = []
290 | for path in paths:
291 | for edge in path:
292 | if edge not in utilized_edges:
293 | utilized_edges.append(edge)
294 |
295 | utilized_nodes = []
296 | for i in range(NUM_VERTICES):
297 | for edge in utilized_edges:
298 | if i in edge and i not in utilized_nodes:
299 | utilized_nodes.append(i)
300 |
301 | return utilized_edges, utilized_nodes
302 |
303 | def is_valid_vertex(self, vertex):
304 | edges, nodes = self.get_utilized()
305 | return (vertex in nodes)
306 |
307 | def is_valid_edge(self, edge):
308 | edges, nodes = self.get_utilized()
309 | return (edge in edges)
310 |
311 | def get_neighborhood(self, nasbench, shuffle=True):
312 | nbhd = []
313 | # add the cells that differ by one op
314 | for vertex in range(1, OP_SPOTS + 1):
315 | if self.is_valid_vertex(vertex):
316 | available = [op for op in OPS if op != self.ops[vertex]]
317 | for op in available:
318 | new_matrix = copy.deepcopy(self.matrix)
319 | new_ops = copy.deepcopy(self.ops)
320 | new_ops[vertex] = op
321 | new_arch = {'matrix':new_matrix, 'ops':new_ops}
322 | nbhd.append(new_arch)
323 |
324 | # add the cells that differ by one edge
325 | for src in range(0, NUM_VERTICES - 1):
326 | for dst in range(src+1, NUM_VERTICES):
327 | new_matrix = copy.deepcopy(self.matrix)
328 | new_ops = copy.deepcopy(self.ops)
329 | new_matrix[src][dst] = 1 - new_matrix[src][dst]
330 | new_arch = {'matrix':new_matrix, 'ops':new_ops}
331 |
332 | if self.matrix[src][dst] and self.is_valid_edge((src, dst)):
333 | spec = api.ModelSpec(matrix=new_matrix, ops=new_ops)
334 | if nasbench.is_valid(spec):
335 | nbhd.append(new_arch)
336 |
337 | if not self.matrix[src][dst] and Cell(**new_arch).is_valid_edge((src, dst)):
338 | spec = api.ModelSpec(matrix=new_matrix, ops=new_ops)
339 | if nasbench.is_valid(spec):
340 | nbhd.append(new_arch)
341 |
342 | if shuffle:
343 | random.shuffle(nbhd)
344 | return nbhd
345 |
346 |
--------------------------------------------------------------------------------
/nas_bench_201/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/nas_bench_201/cell.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import copy
3 | import itertools
4 | import random
5 | import sys
6 | import os
7 | import pickle
8 |
9 |
10 | OPS = ['avg_pool_3x3', 'nor_conv_1x1', 'nor_conv_3x3', 'none', 'skip_connect']
11 | NUM_OPS = len(OPS)
12 | OP_SPOTS = 6
13 | LONGEST_PATH_LENGTH = 3
14 |
15 | class Cell:
16 |
17 | def __init__(self, string):
18 | self.string = string
19 |
20 | def get_string(self):
21 | return self.string
22 |
23 | def serialize(self):
24 | return {
25 | 'string':self.string
26 | }
27 |
28 | @classmethod
29 | def random_cell(cls, nasbench, max_nodes=4):
30 | """
31 | From the AutoDL-Projects repository
32 | """
33 | ops = []
34 | for i in range(OP_SPOTS):
35 | op = random.choice(OPS)
36 | ops.append(op)
37 | return {'string':cls.get_string_from_ops(ops)}
38 |
39 |
40 | def get_runtime(self, nasbench, dataset='cifar100'):
41 | return nasbench.query_by_index(index, dataset).get_eval('x-valid')['time']
42 |
43 | def get_val_loss(self, nasbench, deterministic=1, dataset='cifar100'):
44 | index = nasbench.query_index_by_arch(self.string)
45 | if dataset == 'cifar10':
46 | results = nasbench.query_by_index(index, 'cifar10-valid')
47 | else:
48 | results = nasbench.query_by_index(index, dataset)
49 |
50 | accs = []
51 | for key in results.keys():
52 | accs.append(results[key].get_eval('x-valid')['accuracy'])
53 |
54 | if deterministic:
55 | return round(100-np.mean(accs), 10)
56 | else:
57 | return round(100-np.random.choice(accs), 10)
58 |
59 | def get_test_loss(self, nasbench, dataset='cifar100', deterministic=1):
60 | index = nasbench.query_index_by_arch(self.string)
61 | results = nasbench.query_by_index(index, dataset)
62 |
63 | accs = []
64 | for key in results.keys():
65 | accs.append(results[key].get_eval('ori-test')['accuracy'])
66 |
67 | if deterministic:
68 | return round(100-np.mean(accs), 4)
69 | else:
70 | return round(100-np.random.choice(accs), 4)
71 |
72 | def get_op_list(self):
73 | # given a string, get the list of operations
74 | tokens = self.string.split('|')
75 | ops = [t.split('~')[0] for i,t in enumerate(tokens) if i not in [0,2,5,9]]
76 | return ops
77 |
78 | def get_num(self):
79 | # compute the unique number of the architecture, in [0, 15624]
80 | ops = self.get_op_list()
81 | index = 0
82 | for i, op in enumerate(ops):
83 | index += OPS.index(op) * NUM_OPS ** i
84 | return index
85 |
86 | @classmethod
87 | def get_string_from_ops(cls, ops):
88 | # given a list of operations, get the string
89 | strings = ['|']
90 | nodes = [0, 0, 1, 0, 1, 2]
91 | for i, op in enumerate(ops):
92 | strings.append(op+'~{}|'.format(nodes[i]))
93 | if i < len(nodes) - 1 and nodes[i+1] == 0:
94 | strings.append('+|')
95 | return ''.join(strings)
96 |
97 | def perturb(self, nasbench,
98 | mutation_rate=1):
99 | # more deterministic version of mutate
100 | ops = self.get_op_list()
101 | new_ops = []
102 | num = np.random.choice(len(ops))
103 | for i, op in enumerate(ops):
104 | if i == num:
105 | available = [o for o in OPS if o != op]
106 | new_ops.append(np.random.choice(available))
107 | else:
108 | new_ops.append(op)
109 | return {'string':self.get_string_from_ops(new_ops)}
110 |
111 | def mutate(self,
112 | nasbench,
113 | mutation_rate=1.0,
114 | patience=5000):
115 |
116 | p = 0
117 | ops = self.get_op_list()
118 | new_ops = []
119 | # keeping mutation_prob consistent with nasbench_101
120 | mutation_prob = mutation_rate / (OP_SPOTS - 2)
121 |
122 | for i, op in enumerate(ops):
123 | if random.random() < mutation_prob:
124 | available = [o for o in OPS if o != op]
125 | new_ops.append(random.choice(available))
126 | else:
127 | new_ops.append(op)
128 |
129 | return {'string':self.get_string_from_ops(new_ops)}
130 |
131 | def encode_standard(self):
132 | """
133 | compute the standard encoding
134 | """
135 | ops = self.get_op_list()
136 | encoding = []
137 | for op in ops:
138 | encoding.append(OPS.index(op))
139 |
140 | return encoding
141 |
142 | def get_num_params(self, nasbench):
143 | # todo update to the newer nasbench-201 dataset
144 | return 100
145 |
146 | def get_paths(self):
147 | """
148 | return all paths from input to output
149 | """
150 | path_blueprints = [[3], [0,4], [1,5], [0,2,5]]
151 | ops = self.get_op_list()
152 | paths = []
153 | for blueprint in path_blueprints:
154 | paths.append([ops[node] for node in blueprint])
155 |
156 | return paths
157 |
158 | def get_path_indices(self):
159 | """
160 | compute the index of each path
161 | """
162 | paths = self.get_paths()
163 | path_indices = []
164 |
165 | for i, path in enumerate(paths):
166 | if i == 0:
167 | index = 0
168 | elif i in [1, 2]:
169 | index = NUM_OPS
170 | else:
171 | index = NUM_OPS + NUM_OPS ** 2
172 | for j, op in enumerate(path):
173 | index += OPS.index(op) * NUM_OPS ** j
174 | path_indices.append(index)
175 |
176 | return tuple(path_indices)
177 |
178 | def encode_paths(self):
179 | """ output one-hot encoding of paths """
180 | num_paths = sum([NUM_OPS ** i for i in range(1, LONGEST_PATH_LENGTH + 1)])
181 | path_indices = self.get_path_indices()
182 | encoding = np.zeros(num_paths)
183 | for index in path_indices:
184 | encoding[index] = 1
185 | return encoding
186 |
187 | def path_distance(self, other):
188 | """
189 | compute the distance between two architectures
190 | by comparing their path encodings
191 | """
192 | return np.sum(np.array(self.encode_paths() != np.array(other.encode_paths())))
193 |
194 | def trunc_path_distance(self, other, cutoff=30):
195 | """
196 | compute the distance between two architectures
197 | by comparing their truncated path encodings
198 | """
199 | paths = np.array(self.encode_paths()[cutoff])
200 | other_paths = np.array(other.encode_paths()[cutoff])
201 | return np.sum(paths != other_paths)
202 |
203 | def edit_distance(self, other):
204 |
205 | ops = self.get_op_list()
206 | other_ops = other.get_op_list()
207 | return np.sum([1 for i in range(len(ops)) if ops[i] != other_ops[i]])
208 |
209 | def nasbot_distance(self, other):
210 | # distance based on optimal transport between row sums, column sums, and ops
211 |
212 | ops = self.get_op_list()
213 | other_ops = other.get_op_list()
214 |
215 | counts = [ops.count(op) for op in OPS]
216 | other_counts = [other_ops.count(op) for op in OPS]
217 | ops_dist = np.sum(np.abs(np.subtract(counts, other_counts)))
218 |
219 | return ops_dist + self.edit_distance(other)
220 |
221 | def get_neighborhood(self, nasbench, shuffle=True):
222 | nbhd = []
223 | ops = self.get_op_list()
224 | for i in range(len(ops)):
225 | available = [op for op in OPS if op != ops[i]]
226 | for op in available:
227 | new_ops = ops.copy()
228 | new_ops[i] = op
229 | new_arch = {'string':self.get_string_from_ops(new_ops)}
230 | nbhd.append(new_arch)
231 |
232 | if shuffle:
233 | random.shuffle(nbhd)
234 | return nbhd
235 |
--------------------------------------------------------------------------------
/notebooks/random_walk_autocorrelation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "%load_ext autoreload\n",
10 | "%autoreload 2\n",
11 | "%matplotlib inline"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": null,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "import numpy as np\n",
21 | "import sys\n",
22 | "import os\n",
23 | "import collections\n",
24 | "import itertools\n",
25 | "import pickle\n",
26 | "from scipy.integrate import quad\n",
27 | "from matplotlib import pyplot as plt\n",
28 | "from scipy.special import logit, expit\n",
29 | "from scipy.stats import norm"
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "metadata": {},
35 | "source": [
36 | "# Random Walk Autocorrelation (RWA)\n",
37 | " - this notebook contains code to compute the random walk autocorrelation on NAS-Bench-201 datasets, and on arbitrary probability density functions"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {},
43 | "source": [
44 | "### Compute the RWA from PDFs"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": null,
50 | "metadata": {},
51 | "outputs": [],
52 | "source": [
53 | "# first, define a few PDFs\n",
54 | "\n",
55 | "def sample(v, std=.35, dist='normal'):\n",
56 | " # sample a random point from the nbhd of v\n",
57 | " if dist == 'uniform':\n",
58 | " return np.random.rand()\n",
59 | " elif dist == 'lipschitz':\n",
60 | " return np.random.uniform(max(0, v-std), min(1, v+std))\n",
61 | " elif dist == 'normal':\n",
62 | " # rejection sampling\n",
63 | " u = np.random.rand()\n",
64 | " y = np.random.rand() * pdf(v, v, dist='normal', std=std)\n",
65 | " if y < pdf(u, v, dist='normal', std=std):\n",
66 | " return u\n",
67 | " else:\n",
68 | " return sample(v, std=std, dist='lipschitz')\n",
69 | " \n",
70 | "def pdf(u, v, dist='normal', std=.35):\n",
71 | " # return the value of the pdf of nbhd(v) at u\n",
72 | " if dist == 'uniform':\n",
73 | " # uniform distribution on [0,1]\n",
74 | " return 1\n",
75 | " elif dist == 'lipschitz':\n",
76 | " # uniform on [v-std, v+std]\n",
77 | " if v - std <= u and u <= v + std:\n",
78 | " return 1/(min(1, v+std)-max(0, v-std))\n",
79 | " else: \n",
80 | " return 0\n",
81 | " elif dist == 'normal':\n",
82 | " # normal dist with mean=v, std=std, scaled to be in [0,1]\n",
83 | " return norm.pdf(u, v, std) * (norm.cdf(1, v, std) - norm.cdf(0, v, std)) ** -1"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": null,
89 | "metadata": {},
90 | "outputs": [],
91 | "source": [
92 | "def sample_constrained(cell, std, low=0, high=1, dist='normal'):\n",
93 | " for _ in range(200):\n",
94 | " sampled = sample(cell, std=std, dist=dist)\n",
95 | " if sampled > low and sampled < high:\n",
96 | " return sampled\n",
97 | " return cell\n",
98 | "\n",
99 | "def rwa_from_pdf(trials=100000,\n",
100 | " size=36,\n",
101 | " std=.35,\n",
102 | " low=0,\n",
103 | " high=1):\n",
104 | " # compute RWA for a synthetic dataset based on a PDF\n",
105 | " cell = .25\n",
106 | " window = collections.deque([cell])\n",
107 | " for _ in range(size - 1):\n",
108 | " cell = sample_constrained(cell, std=std, low=low, high=high, dist='normal')\n",
109 | " window.append(cell)\n",
110 | " \n",
111 | " autocorrs = np.zeros((size, trials, 2))\n",
112 | " for t in range(trials):\n",
113 | " if t % (trials/10) == 0:\n",
114 | " print('trial', t)\n",
115 | " #pass\n",
116 | " cell = sample_constrained(cell, std=std, low=low, high=high, dist='normal')\n",
117 | " window.append(cell)\n",
118 | " window.popleft()\n",
119 | " autocorrs[:, t, 0] = np.array([window[-1]] * size)\n",
120 | " autocorrs[:, t, 1] = np.array(window)\n",
121 | " \n",
122 | " corr = []\n",
123 | " for i in range(size):\n",
124 | " corr.append(np.corrcoef(autocorrs[i, :, 0], autocorrs[i, :, 1])[1,0])\n",
125 | " xs = [np.power(size - i - 1, 1/2) for i in range(size)]\n",
126 | " return xs, corr\n"
127 | ]
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "metadata": {},
132 | "source": [
133 | "### compute RWA on the NASBench-201 datasets"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": null,
139 | "metadata": {},
140 | "outputs": [],
141 | "source": [
142 | "sys.path.append(os.path.expanduser('~/naszilla/bananas'))\n",
143 | "sys.path.append(os.path.expanduser('~/AutoDL-Projects/lib/'))\n",
144 | "\n",
145 | "from nas_bench_201.cell import Cell\n",
146 | "from nas_201_api import NASBench201API as API"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": null,
152 | "metadata": {},
153 | "outputs": [],
154 | "source": [
155 | "def pert(cell, nasbench, low=0, high=100):\n",
156 | " for i in range(200):\n",
157 | " perturbed = Cell(**cell).perturb(nasbench)\n",
158 | " if Cell(**perturbed).get_val_loss(nasbench, dataset=dataset) > low and \\\n",
159 | " Cell(**perturbed).get_val_loss(nasbench, dataset=dataset) < high:\n",
160 | " return perturbed\n",
161 | " print('failed')\n",
162 | " return Cell(**cell).perturb(nasbench)\n",
163 | "\n",
164 | "def random_walk(nasbench,\n",
165 | " trials=10000,\n",
166 | " size=36,\n",
167 | " dataset='cifar10',\n",
168 | " save=False,\n",
169 | " low=0,\n",
170 | " high=100):\n",
171 | " \n",
172 | " # if low, high are proportions, compute the losses\n",
173 | " if high < 1:\n",
174 | " losses, _ = pickle.load(open('{}_losses.pkl'.format(dataset), 'rb'))\n",
175 | " losses.sort()\n",
176 | " limits = [losses[0], losses[-1]]\n",
177 | " low, high = [losses[int(low*15625)], losses[int(high*15625)]]\n",
178 | " print('limits', limits)\n",
179 | " print('scaled limits', low, high)\n",
180 | " \n",
181 | " # compute rwa for a dataset in nasbench-201\n",
182 | " cell = Cell.random_cell(nasbench)\n",
183 | " while Cell(**cell).get_val_loss(nasbench, dataset=dataset) < low or \\\n",
184 | " Cell(**cell).get_val_loss(nasbench, dataset=dataset) > high:\n",
185 | " cell = Cell.random_cell(nasbench)\n",
186 | "\n",
187 | " window = collections.deque([cell])\n",
188 | " for _ in range(size - 1):\n",
189 | " cell = pert(cell, nasbench, low=low, high=high)\n",
190 | " window.append(Cell(**cell).get_val_loss(nasbench, dataset=dataset))\n",
191 | " \n",
192 | " autocorrs = np.zeros((size, trials, 2))\n",
193 | " for t in range(trials):\n",
194 | " if t % (trials/10) == 0:\n",
195 | " print('trial', t)\n",
196 | "\n",
197 | " cell = pert(cell, nasbench, low=low, high=high)\n",
198 | " window.append(Cell(**cell).get_val_loss(nasbench, dataset=dataset))\n",
199 | " window.popleft()\n",
200 | " autocorrs[:, t, 0] = np.array([window[-1]] * size)\n",
201 | " autocorrs[:, t, 1] = np.array(window)\n",
202 | " \n",
203 | " corr = []\n",
204 | " for i in range(size):\n",
205 | " corr.append(np.corrcoef(autocorrs[i, :, 0], autocorrs[i, :, 1])[1,0])\n",
206 | " xs = [np.power(size - i - 1, 1/2) for i in range(size)]\n",
207 | " return xs, corr\n",
208 | " "
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": null,
214 | "metadata": {},
215 | "outputs": [],
216 | "source": [
217 | "# generate synthetic data\n",
218 | "rwa_normals = {}\n",
219 | "for std in [.3, .35, .4]:\n",
220 | " print('starting', std)\n",
221 | " xs, corr = rwa_from_pdf(std=std, trials=10000)\n",
222 | " rwa_normals[std] = corr \n",
223 | " plt.plot(data['xs'], corr, label='normal pdf, std={}'.format(std))\n"
224 | ]
225 | },
226 | {
227 | "cell_type": "code",
228 | "execution_count": null,
229 | "metadata": {
230 | "scrolled": false
231 | },
232 | "outputs": [],
233 | "source": [
234 | "# download the nas-bench-201 dataset, and then load it with this command\n",
235 | "nasbench = API(os.path.expanduser('~/path/to/NAS-Bench-201-v1_0-e61699.pth'))"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": null,
241 | "metadata": {},
242 | "outputs": [],
243 | "source": [
244 | "# compute RWA on the nas-bench-201 datasets\n",
245 | "datasets = ['ImageNet16-120', 'cifar100', 'cifar10']\n",
246 | "corrs = {}\n",
247 | "for dataset in datasets:\n",
248 | " _, corr = random_walk(nasbench, dataset=dataset, save=False, trials=10000, low=.1, high=.9)\n",
249 | " corrs[dataset] = corr\n",
250 | " print('finished', dataset)\n"
251 | ]
252 | },
253 | {
254 | "cell_type": "code",
255 | "execution_count": null,
256 | "metadata": {},
257 | "outputs": [],
258 | "source": []
259 | }
260 | ],
261 | "metadata": {
262 | "kernelspec": {
263 | "display_name": "Python 3",
264 | "language": "python",
265 | "name": "python3"
266 | },
267 | "language_info": {
268 | "codemirror_mode": {
269 | "name": "ipython",
270 | "version": 3
271 | },
272 | "file_extension": ".py",
273 | "mimetype": "text/x-python",
274 | "name": "python",
275 | "nbconvert_exporter": "python",
276 | "pygments_lexer": "ipython3",
277 | "version": "3.7.7"
278 | }
279 | },
280 | "nbformat": 4,
281 | "nbformat_minor": 4
282 | }
283 |
--------------------------------------------------------------------------------
/params.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 |
4 | def algo_params(param_str):
5 | """
6 | Return params list based on param_str.
7 | These are the parameters used to produce the figures in the paper
8 | For AlphaX and Reinforcement Learning, we used the corresponding github repos:
9 | https://github.com/linnanwang/AlphaX-NASBench101
10 | https://github.com/automl/nas_benchmarks
11 | """
12 | params = []
13 |
14 | if param_str == 'local_search':
15 | params.append({'algo_name':'local_search', 'total_queries':300})
16 |
17 | elif param_str == 'ls_cont_at_min':
18 | params.append({'algo_name':'local_search', 'total_queries':300, 'stop_at_minimum':False})
19 |
20 | elif param_str == 'ls_query_part':
21 | params.append({'algo_name':'local_search', 'total_queries':300, 'query_full_nbhd':False})
22 |
23 | elif param_str == 'test':
24 | params.append({'algo_name':'random', 'total_queries':30})
25 | params.append({'algo_name':'evolution', 'total_queries':30})
26 | params.append({'algo_name':'bananas', 'total_queries':30})
27 | params.append({'algo_name':'gp_bayesopt', 'total_queries':30})
28 | params.append({'algo_name':'dngo', 'total_queries':30})
29 |
30 | elif param_str == 'test_simple':
31 | params.append({'algo_name':'random', 'total_queries':30})
32 | params.append({'algo_name':'evolution', 'total_queries':30})
33 |
34 | elif param_str == 'main_experiments':
35 | params.append({'algo_name':'random', 'total_queries':300})
36 | params.append({'algo_name':'evolution', 'total_queries':300})
37 | params.append({'algo_name':'bananas', 'total_queries':300})
38 | params.append({'algo_name':'gp_bayesopt', 'total_queries':300})
39 | params.append({'algo_name':'dngo', 'total_queries':300})
40 | params.append({'algo_name':'local_search', 'total_queries':300, 'stop_at_minimum':False})
41 | params.append({'algo_name':'local_search', 'total_queries':300, 'query_full_nbhd':False})
42 |
43 | elif param_str == 'bananas':
44 | params.append({'algo_name':'bananas', 'total_queries':150})
45 |
46 | else:
47 | print('invalid algorithm params: {}'.format(param_str))
48 | sys.exit()
49 |
50 | print('\n* Running experiment: ' + param_str)
51 | return params
52 |
53 |
54 | def meta_neuralnet_params(param_str):
55 |
56 | if param_str == 'nasbench':
57 | params = {'search_space':'nasbench', 'dataset':'cifar10', 'loss':'mae', 'num_layers':10, 'layer_width':20, \
58 | 'epochs':150, 'batch_size':32, 'lr':.01, 'regularization':0, 'verbose':0}
59 |
60 | elif param_str == 'darts':
61 | params = {'search_space':'darts', 'dataset':'cifar10', 'loss':'mape', 'num_layers':10, 'layer_width':20, \
62 | 'epochs':10000, 'batch_size':32, 'lr':.00001, 'regularization':0, 'verbose':0}
63 |
64 | elif param_str == 'nasbench_201_cifar10':
65 | params = {'search_space':'nasbench_201', 'dataset':'cifar10', 'loss':'mae', 'num_layers':10, 'layer_width':20, \
66 | 'epochs':150, 'batch_size':32, 'lr':.01, 'regularization':0, 'verbose':0}
67 |
68 | elif param_str == 'nasbench_201_cifar100':
69 | params = {'search_space':'nasbench_201', 'dataset':'cifar100', 'loss':'mae', 'num_layers':10, 'layer_width':20, \
70 | 'epochs':150, 'batch_size':32, 'lr':.01, 'regularization':0, 'verbose':0}
71 |
72 | elif param_str == 'nasbench_201_imagenet':
73 | params = {'search_space':'nasbench_201', 'dataset':'ImageNet16-120', 'loss':'mae', 'num_layers':10, 'layer_width':20, \
74 | 'epochs':150, 'batch_size':32, 'lr':.01, 'regularization':0, 'verbose':0}
75 |
76 | else:
77 | print('invalid meta neural net params: {}'.format(param_str))
78 | sys.exit()
79 |
80 | return params
81 |
--------------------------------------------------------------------------------
/run_experiments_parallel.sh:
--------------------------------------------------------------------------------
1 |
2 | param_str=fifty_epochs
3 | experiment_name=bananas
4 |
5 | # set all instance names and zones
6 | instances=(bananas-t4-1-vm bananas-t4-2-vm bananas-t4-3-vm bananas-t4-4-vm \
7 | bananas-t4-5-vm bananas-t4-6-vm bananas-t4-7-vm bananas-t4-8-vm \
8 | bananas-t4-9-vm bananas-t4-10-vm)
9 |
10 | zones=(us-west1-b us-west1-b us-west1-b us-west1-b us-west1-b us-west1-b \
11 | us-west1-b us-west1-b us-west1-b us-west1-b)
12 |
13 | # set parameters based on the param string
14 | if [ $param_str = test ]; then
15 | start_iteration=0
16 | end_iteration=1
17 | k=10
18 | untrained_filename=untrained_spec
19 | trained_filename=trained_spec
20 | epochs=1
21 | fi
22 | if [ $param_str = fifty_epochs ]; then
23 | start_iteration=0
24 | end_iteration=9
25 | k=10
26 | untrained_filename=untrained_spec
27 | trained_filename=trained_spec
28 | epochs=50
29 | fi
30 |
31 | # start bananas
32 | for i in $(seq $start_iteration $end_iteration)
33 | do
34 | let start=$i*$k
35 | let end=($i+1)*$k-1
36 |
37 | # train the neural net
38 | # input: all pickle files with index from 0 to i*k-1
39 | # output: k pickle files for the architectures to train next (indices i*k to (i+1)*k-1)
40 | echo about to run meta neural network in iteration $i
41 | python3 metann_runner.py --experiment_name $experiment_name --params $nas_params --k $k \
42 | --untrained_filename $untrained_filename --trained_filename $trained_filename --query $start
43 | echo outputted architectures to train in iteration $i
44 |
45 | # train the k architectures
46 | let max_j=$k-1
47 | for j in $(seq 0 $max_j )
48 | do
49 | let query=$i*$k+$j
50 | instance=${instances[$j]}
51 | zone=${zones[$j]}
52 | untrained_filepath=$experiment_name/$untrained_filename\_$query.pkl
53 | trained_filepath=$experiment_name/$trained_filename\_$query.pkl
54 |
55 | echo about to copy file $untrained_filepath to instance $instance
56 | gcloud compute scp $untrained_filepath $instance:~/naszilla/$experiment_name/ --zone $zone
57 |
58 | echo about to ssh into instance $instance
59 | gcloud compute ssh $instance --zone $zone --command="cd naszilla; \
60 | python3 train_arch_runner.py --untrained_filepath $untrained_filepath \
61 | --trained_filepath $trained_filepath --epochs $epochs" &
62 | done
63 | wait
64 | echo all architectures trained in iteration $i
65 |
66 | # copy results of trained architectures to the master CPU
67 | let max_j=$k-1
68 | for j in $(seq 0 $max_j )
69 | do
70 | let query=$i*$k+$j
71 | instance=${instances[$j]}
72 | zone=${zones[$j]}
73 | trained_filepath=$experiment_name/$trained_filename\_$query.pkl
74 | gcloud compute scp $instance:~/naszilla/$trained_filepath $experiment_name --zone $zone
75 | done
76 | echo finished iteration $i
77 | done
78 |
79 |
--------------------------------------------------------------------------------
/run_experiments_sequential.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import time
3 | import logging
4 | import sys
5 | import os
6 | import pickle
7 | import numpy as np
8 | import copy
9 |
10 | from params import *
11 |
12 |
13 | def run_experiments(args, save_dir):
14 |
15 | os.environ['search_space'] = args.search_space
16 |
17 | from nas_algorithms import run_nas_algorithm
18 | from data import Data
19 |
20 | trials = args.trials
21 | out_file = args.output_filename
22 | save_specs = args.save_specs
23 | metann_params = meta_neuralnet_params(args.search_space)
24 | algorithm_params = algo_params(args.algo_params)
25 | num_algos = len(algorithm_params)
26 | logging.info(algorithm_params)
27 |
28 | # set up search space
29 | mp = copy.deepcopy(metann_params)
30 | ss = mp.pop('search_space')
31 | dataset = mp.pop('dataset')
32 | search_space = Data(ss, dataset=dataset)
33 |
34 | for i in range(trials):
35 | results = []
36 | walltimes = []
37 | run_data = []
38 |
39 | for j in range(num_algos):
40 | # run NAS algorithm
41 | print('\n* Running algorithm: {}'.format(algorithm_params[j]))
42 | starttime = time.time()
43 | algo_result, run_datum = run_nas_algorithm(algorithm_params[j], search_space, mp)
44 | algo_result = np.round(algo_result, 5)
45 |
46 | # remove unnecessary dict entries that take up space
47 | for d in run_datum:
48 | if not save_specs:
49 | d.pop('spec')
50 | for key in ['encoding', 'adjacency', 'path', 'dist_to_min']:
51 | if key in d:
52 | d.pop(key)
53 |
54 | # add walltime, results, run_data
55 | walltimes.append(time.time()-starttime)
56 | results.append(algo_result)
57 | run_data.append(run_datum)
58 |
59 | # print and pickle results
60 | filename = os.path.join(save_dir, '{}_{}.pkl'.format(out_file, i))
61 | print('\n* Trial summary: (params, results, walltimes)')
62 | print(algorithm_params)
63 | print(metann_params)
64 | print(results)
65 | print(walltimes)
66 | print('\n* Saving to file {}'.format(filename))
67 | with open(filename, 'wb') as f:
68 | pickle.dump([algorithm_params, metann_params, results, walltimes, run_data], f)
69 | f.close()
70 |
71 | def main(args):
72 |
73 | # make save directory
74 | save_dir = args.save_dir
75 | if not os.path.exists(save_dir):
76 | os.mkdir(save_dir)
77 |
78 | algo_params = args.algo_params
79 | save_path = save_dir + '/' + algo_params + '/'
80 | if not os.path.exists(save_path):
81 | os.mkdir(save_path)
82 |
83 | # set up logging
84 | log_format = '%(asctime)s %(message)s'
85 | logging.basicConfig(stream=sys.stdout, level=logging.INFO,
86 | format=log_format, datefmt='%m/%d %I:%M:%S %p')
87 | fh = logging.FileHandler(os.path.join(save_dir, 'log.txt'))
88 | fh.setFormatter(logging.Formatter(log_format))
89 | logging.getLogger().addHandler(fh)
90 | logging.info(args)
91 |
92 | run_experiments(args, save_path)
93 |
94 |
95 | if __name__ == "__main__":
96 | parser = argparse.ArgumentParser(description='Args for BANANAS experiments')
97 | parser.add_argument('--trials', type=int, default=500, help='Number of trials')
98 | parser.add_argument('--search_space', type=str, default='nasbench', \
99 | help='nasbench or darts')
100 | parser.add_argument('--algo_params', type=str, default='main_experiments', help='which parameters to use')
101 | parser.add_argument('--output_filename', type=str, default='round', help='name of output files')
102 | parser.add_argument('--save_dir', type=str, default='results_output', help='name of save directory')
103 | parser.add_argument('--save_specs', type=bool, default=False, help='save the architecture specs')
104 |
105 | args = parser.parse_args()
106 | main(args)
107 |
--------------------------------------------------------------------------------
/train_arch_runner.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import time
3 | import logging
4 | import sys
5 | import os
6 | import pickle
7 |
8 | sys.path.append(os.path.expanduser('~/darts/cnn'))
9 | from train_class import Train
10 |
11 | """
12 | train arch runner is used in run_experiments_parallel
13 |
14 | - loads data by opening a pickle file containing an architecture spec
15 | - trains that architecture for e epochs
16 | - outputs a new pickle file with the architecture spec and its validation loss
17 | """
18 |
19 | def run(args):
20 |
21 | untrained_filepath = os.path.expanduser(args.untrained_filepath)
22 | trained_filepath = os.path.expanduser(args.trained_filepath)
23 | epochs = args.epochs
24 | gpu = args.gpu
25 | train_portion = args.train_portion
26 | seed = args.seed
27 | save = args.save
28 |
29 | # load the arch spec that will be trained
30 | dic = pickle.load(open(untrained_filepath, 'rb'))
31 | arch = dic['spec']
32 | print('loaded arch', arch)
33 |
34 | # train the arch
35 | trainer = Train()
36 | val_accs, test_accs = trainer.main(arch,
37 | epochs=epochs,
38 | gpu=gpu,
39 | train_portion=train_portion,
40 | seed=seed,
41 | save=save)
42 |
43 | val_sum = 0
44 | for epoch, val_acc in val_accs:
45 | key = 'val_loss_' + str(epoch)
46 | dic[key] = 100 - val_acc
47 | val_sum += dic[key]
48 | for epoch, test_acc in test_accs:
49 | key = 'test_loss_' + str(epoch)
50 | dic[key] = 100 - test_acc
51 |
52 | val_loss_avg = val_sum / len(val_accs)
53 |
54 | dic['val_loss_avg'] = val_loss_avg
55 | dic['val_loss'] = 100 - val_accs[-1][-1]
56 | dic['test_loss'] = 100 - test_accs[-1][-1]
57 | dic['filepath'] = args.trained_filepath
58 |
59 | print('arch {}'.format(arch))
60 | print('val loss: {}'.format(dic['val_loss']))
61 | print('test loss: {}'.format(dic['test_loss']))
62 | print('val loss avg: {}'.format(dic['val_loss_avg']))
63 |
64 | with open(trained_filepath, 'wb') as f:
65 | pickle.dump(dic, f)
66 |
67 | def main(args):
68 |
69 | #set up save dir
70 | save_dir = './'
71 |
72 | #set up logging
73 | log_format = '%(asctime)s %(message)s'
74 | logging.basicConfig(stream=sys.stdout, level=logging.INFO,
75 | format=log_format, datefmt='%m/%d %I:%M:%S %p')
76 | fh = logging.FileHandler(os.path.join(save_dir, 'log.txt'))
77 | fh.setFormatter(logging.Formatter(log_format))
78 | logging.getLogger().addHandler(fh)
79 | logging.info(args)
80 |
81 | run(args)
82 |
83 | if __name__ == "__main__":
84 | parser = argparse.ArgumentParser(description='Args for training a darts arch')
85 | parser.add_argument('--untrained_filepath', type=str, default='darts_test/untrained_spec_0.pkl', help='name of input files')
86 | parser.add_argument('--trained_filepath', type=str, default='darts_test/trained_spec_0.pkl', help='name of output files')
87 | parser.add_argument('--epochs', type=int, default=50, help='number of training epochs')
88 | parser.add_argument('--gpu', type=int, default=0, help='which gpu to use')
89 | parser.add_argument('--train_portion', type=float, default=0.7, help='portion of training data used for training')
90 | parser.add_argument('--seed', type=float, default=0, help='random seed to use')
91 | parser.add_argument('--save', type=str, default='EXP', help='directory to save to')
92 |
93 | args = parser.parse_args()
94 | main(args)
95 |
--------------------------------------------------------------------------------