├── .gitignore ├── LICENSE ├── README.md ├── aav ├── models │ ├── constrained_maxent_parameters.npz │ └── h100_0.npy ├── randomized-staircase-results.npz └── test_and_calibration_aav_data.npz ├── assay.py ├── calibrate.py ├── environment.yml ├── fluorescence ├── blue_n192_lambda0_alpha0.1_gamma1.npz ├── blue_n192_lambda2_alpha0.1_gamma1.npz ├── blue_n192_lambda4_alpha0.1_gamma1.npz ├── blue_n192_lambda6_alpha0.1_gamma1.npz ├── blue_n384_lambda0_alpha0.1_gamma1.npz ├── blue_n384_lambda2_alpha0.1_gamma1.npz ├── blue_n384_lambda4_alpha0.1_gamma1.npz ├── blue_n384_lambda6_alpha0.1_gamma1.npz ├── blue_n96_lambda0_alpha0.1_gamma10.npz ├── blue_n96_lambda2_alpha0.1_gamma10.npz ├── blue_n96_lambda4_alpha0.1_gamma10.npz ├── blue_n96_lambda6_alpha0.1_gamma10.npz ├── blue_noise.npz ├── red_n192_lambda0_alpha0.1_gamma1.npz ├── red_n192_lambda2_alpha0.1_gamma1.npz ├── red_n192_lambda4_alpha0.1_gamma1.npz ├── red_n192_lambda6_alpha0.1_gamma1.npz ├── red_n384_lambda0_alpha0.1_gamma1.npz ├── red_n384_lambda2_alpha0.1_gamma1.npz ├── red_n384_lambda4_alpha0.1_gamma1.npz ├── red_n384_lambda6_alpha0.1_gamma1.npz ├── red_n96_lambda0_alpha0.1_gamma10.npz ├── red_n96_lambda2_alpha0.1_gamma10.npz ├── red_n96_lambda4_alpha0.1_gamma10.npz ├── red_n96_lambda6_alpha0.1_gamma10.npz ├── red_noise.npz ├── supp_data_3.xlsx └── supp_data_4.xlsx └── notebooks ├── aav-experiments.ipynb ├── aav-figures.ipynb ├── fluorescence-experiments.ipynb └── fluorescence-figures.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Clara Wong-Fannjiang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Conformal prediction under feedback covariate shift for biomolecular design 2 | This repo contains the code accompanying the following paper: 3 | 4 | C. Fannjiang, S. Bates, A. Angelopoulos, J. Listgarten, M. I. Jordan, Conformal prediction under feedback covariate shift for biomolecular design. 2022. *Proceedings of the National Academy of Sciences*, 119(43), e2204569119. 5 | [link](https://www.pnas.org/doi/10.1073/pnas.2204569119) 6 | 7 | See `calibrate.py` for implementations of both the full and split conformal prediction algorithms we describe. `assay.py` contains utilities and classes for handling the fluorescence and AAV datasets, which are stored (along with relevant models and results) in `fluorescence/` and `aav/`, respectively. 8 | 9 | Notebooks for reproducing and plotting the results of the simulated protein design experiments are as follows: 10 | - `notebooks/fluorescence-experiments.ipynb` shows how we ran the fluorescent protein design experiments, which uses full conformal prediction under feedback covariate shift, algorithmically optimized for ridge regression (Alg. S2 in the [SI Appendix](https://www.pnas.org/doi/10.1073/pnas.2204569119#supplementary-materials)). 11 | - `notebooks/fluorescence-figures.ipynb` creates Figs. 3 and 4 in the main paper and Fig. S2 in the [SI Appendix](https://www.pnas.org/doi/10.1073/pnas.2204569119#supplementary-materials). 12 | - `notebooks/aav-experiments.ipynb` shows how we ran the AAV design experiments, which uses a randomized version of split conformal prediction under covariate shift (Alg. S1 in the [SI Appendix](https://www.pnas.org/doi/10.1073/pnas.2204569119#supplementary-materials)). 13 | - `notebooks/aav-figures.ipynb` creates Fig. 5 in the main paper. 14 | 15 | -------------------------------------------------------------------------------- /aav/models/constrained_maxent_parameters.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/aav/models/constrained_maxent_parameters.npz -------------------------------------------------------------------------------- /aav/models/h100_0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/aav/models/h100_0.npy -------------------------------------------------------------------------------- /aav/randomized-staircase-results.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/aav/randomized-staircase-results.npz -------------------------------------------------------------------------------- /aav/test_and_calibration_aav_data.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/aav/test_and_calibration_aav_data.npz -------------------------------------------------------------------------------- /assay.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | import time 3 | from itertools import chain, combinations 4 | 5 | import numpy as np 6 | import scipy as sc 7 | import pandas as pd 8 | 9 | from sklearn.linear_model import LinearRegression 10 | from tensorflow.keras.utils import Sequence 11 | from calibrate import get_invcov_dot_xt 12 | 13 | # ===== utilities and classes for AAV experiments ===== 14 | 15 | # ----- utilities for converting between amino acids and nucleotides ----- 16 | 17 | AA2CODON = { 18 | 'l': ['tta', 'ttg', 'ctt', 'ctc', 'cta', 'ctg'], 19 | 's': ['tct', 'tcc', 'tca', 'tcg', 'agt', 'agc'], 20 | 'r': ['cgt', 'cgc', 'cga', 'cgg', 'aga', 'agg'], 21 | 'v': ['gtt', 'gtc', 'gta', 'gtg'], 22 | 'a': ['gct', 'gcc', 'gca', 'gcg'], 23 | 'p': ['cct', 'ccc', 'cca', 'ccg'], 24 | 't': ['act', 'acc', 'aca', 'acg'], 25 | 'g': ['ggt', 'ggc', 'gga', 'ggg'], 26 | '*': ['taa', 'tag', 'tga'], 27 | 'i': ['att', 'atc', 'ata'], 28 | 'y': ['tat', 'tac'], 29 | 'f': ['ttt', 'ttc'], 30 | 'c': ['tgt', 'tgc'], 31 | 'h': ['cat', 'cac'], 32 | 'q': ['caa', 'cag'], 33 | 'n': ['aat', 'aac'], 34 | 'k': ['aaa', 'aag'], 35 | 'd': ['gat', 'gac'], 36 | 'e': ['gaa', 'gag'], 37 | 'w': ['tgg'], 38 | 'm': ['atg'] 39 | } 40 | 41 | 42 | NUC_ORDERED = ['A', 'T', 'C', 'G'] 43 | NUC2IDX = {nuc: i for i, nuc in enumerate(NUC_ORDERED)} 44 | 45 | AA_ORDERED = [k.upper() for k in AA2CODON.keys()] 46 | AA2IDX = {aa: i for i, aa in enumerate(AA_ORDERED)} 47 | 48 | def pnuc2paa(pnuc_Lxk): 49 | """ 50 | Converts nucleotide probabilities to amino acid probabilities. 51 | """ 52 | L = pnuc_Lxk.shape[0] 53 | paadf_kxL = pd.DataFrame(0., index=AA_ORDERED, columns=range(int(L / 3))) 54 | for i in range(int(L / 3)): 55 | for aa in AA_ORDERED: 56 | codons = AA2CODON[aa.lower()] 57 | # for each codon corresponding to the AA, compute probability of generating that codon 58 | for cod in codons: 59 | p_cod = 1 60 | for j in range(3): # multiply probabilities of each of the 3 nucleotides in the codon 61 | nuc_idx = NUC2IDX[cod[j].upper()] 62 | p_cod *= pnuc_Lxk[i * 3 + j, nuc_idx] 63 | paadf_kxL[i].loc[aa] += p_cod 64 | return np.array(paadf_kxL).T 65 | 66 | def phinuc2paa(phinuc_Lxk): 67 | """ 68 | Converts unnormalized nucleotide probabilities to amino acid probabilities. 69 | """ 70 | # normalize probabilities of categorical distribution per site 71 | pnuc_Lxk = np.exp(phinuc_Lxk) / np.sum(np.exp(phinuc_Lxk), axis=1, keepdims=True) 72 | # convert nucleotide probabilities to amino acid probabilities 73 | paa_Lxk = np.array(pnuc2paa(pnuc_Lxk)) 74 | return paa_Lxk 75 | 76 | # NNK (training) distribution 77 | pnnknucpersite = np.array([[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25], [0, 0.5, 0, 0.5]]) 78 | pnnknuc_Lxk = np.tile(pnnknucpersite, [7, 1]) 79 | PNNKAA_LXK = np.array(pnuc2paa(pnnknuc_Lxk)) 80 | 81 | # ----- rejection sampling from test distribution ----- 82 | 83 | def get_loglikelihood(seq_n, p_Lxk: np.array): 84 | ohe_nxLxk = np.stack([one_hot_encode(seq, flatten=False) for seq in seq_n]) 85 | logp_1xLxk = np.log(p_Lxk)[None, :, :] 86 | logp_n = np.sum(ohe_nxLxk * logp_1xLxk, axis=(1, 2)) 87 | return logp_n 88 | 89 | def get_rejection_sampling_acceptance_probabilities(seq_n, phitestnuc_Lxk, logptrain_n): 90 | ptestaa_Lxk = phinuc2paa(phitestnuc_Lxk) 91 | ratio_Lxk = ptestaa_Lxk / PNNKAA_LXK 92 | maxp_l = np.max(ratio_Lxk, axis=1) 93 | M = np.prod(maxp_l) 94 | 95 | # compute test likelihoods of all data 96 | logptest_n = get_loglikelihood(seq_n, ptestaa_Lxk) 97 | paccept_n = np.exp(logptest_n - (np.log(M) + logptrain_n)) 98 | return paccept_n, logptest_n 99 | 100 | def rejection_sample_from_test_distribution(paccept_n): 101 | nonzero_samples_from_test = False 102 | while not nonzero_samples_from_test: 103 | accept_n = sc.stats.bernoulli.rvs(paccept_n) 104 | testsamp_idx = np.where(accept_n)[0] 105 | n_test = testsamp_idx.size 106 | if n_test: 107 | nonzero_samples_from_test = True 108 | return testsamp_idx 109 | 110 | # ----- class for sequence-fitness data generation ----- 111 | 112 | def one_hot_encode(seq, flatten: bool = True): 113 | l = len(seq) 114 | ohe_lxk = np.zeros((l, len(AA_ORDERED))) 115 | ones_idx = (range(l), [AA2IDX[seq[i]] for i in range(l)]) 116 | ohe_lxk[ones_idx] = 1 117 | return ohe_lxk.flatten() if flatten else ohe_lxk 118 | 119 | class DataGenerator(Sequence): 120 | def __init__(self, seq_n, fitness_nx2 = None, ids = None, batch_size: int = 1000, shuffle: bool = True): 121 | self.seq_n = seq_n 122 | # (estimates of) mean and variance log enrichment score (dummy values if using for prediction) 123 | self.fitness_nx2 = fitness_nx2 if fitness_nx2 is not None else np.zeros([len(seq_n), 2]) 124 | self.ids = ids if ids is not None else range(len(seq_n)) 125 | self.batch_size = batch_size 126 | self.shuffle = shuffle 127 | self.n_feat = len(self.seq_n[0]) * len(AA_ORDERED) 128 | self.on_epoch_end() 129 | 130 | def on_epoch_end(self): 131 | """ 132 | Update indices after each epoch. 133 | """ 134 | self.idx = np.arange(len(self.ids)) 135 | if self.shuffle: 136 | np.random.shuffle(self.idx) 137 | 138 | def __len__(self): 139 | return int(np.floor(len(self.ids) / self.batch_size)) 140 | 141 | def __getitem__(self, index): 142 | # generate indices of the batch 143 | idx = self.idx[index * self.batch_size : (index + 1) * self.batch_size] 144 | 145 | # find list of IDs 146 | ids = [self.ids[k] for k in idx] 147 | 148 | # fetch sequences and their (estimated) fitness mean and variance 149 | X_bxp = np.array([one_hot_encode(self.seq_n[idx], flatten=True) for idx in ids]) 150 | y_nx2 = self.fitness_nx2[ids] 151 | return [X_bxp, y_nx2[:, 0], y_nx2[:, 1]] 152 | 153 | # ===== utilities and classes for fluorescence experiments ===== 154 | 155 | # ----- utilities for Walsh-Hadamard transform ----- 156 | # adapted from David H. Brookes's code (https://github.com/dhbrookes/FitnessSparsity/blob/main/src/utils.py) for: 157 | # D. H. Brookes, A. Aghazadeh, J. Listgarten, 158 | # On the sparsity of fitness functions and implications for learning. PNAS, 119 (2022). 159 | 160 | def get_interactions(seq_len, order: int = None): 161 | """ 162 | Returns a list of tuples of epistatic interactions for a given sequence length, up to a specified order. 163 | For example, get_interactions(3) returns [(), (0,), (1,), (2,), (0, 1), (0, 2), (1, 2), (0, 1, 2)]. 164 | This sets of the order used for regression coefficients. 165 | """ 166 | if order is None: 167 | order = seq_len 168 | sites = list(range(seq_len)) 169 | combos = chain.from_iterable(combinations(sites, o) for o in range(order + 1)) 170 | return list(combos) 171 | 172 | def walsh_hadamard_from_seqs(signedseq_nxl: np.array, order: int = None, normalize: bool = False): 173 | """ 174 | Returns an n x array of (truncated) Walsh-Hadamard encodings of a given n x array of binary sequences. 175 | """ 176 | n, seq_len = signedseq_nxl.shape 177 | interactions = get_interactions(seq_len, order=order) 178 | X_nxp = np.zeros((n, len(interactions))) 179 | for i, idx in enumerate(interactions): 180 | if len(idx) == 0: 181 | X_nxp[:, i] = 1 182 | else: 183 | X_nxp[:, i] = np.prod(signedseq_nxl[:, idx], axis=1) 184 | if normalize: 185 | X_nxp /= np.sqrt(np.power(2, seq_len)) # for proper WH matrix 186 | return X_nxp 187 | 188 | # ----- sample training and designed data according to fluorescence experiments ----- 189 | 190 | def get_training_and_designed_data(data, n, gamma, lmbda, seed: int = None): 191 | """ 192 | Sample training data uniformly at random from combinatorially complete data set (Poelwijk et al. 2019), 193 | and sample one designed protein (w/ ground-truth label) according to design algorithm in Eq. 6 of main paper. 194 | 195 | :param data: assay.PoelwijkData object 196 | :param n: int, number of training points, {96, 192, 384} in main paper 197 | :param gamma: float, ridge regularization strength 198 | :param lmbda: float, inverse temperature of design algorithm in Eq. 6, {0, 2, 4, 6} in main paper 199 | :param seed: int, random seed 200 | :return: numpy arrays of training sequences, training labels, designed sequence, label, and prediction 201 | """ 202 | 203 | # get random training data 204 | rng = np.random.default_rng(seed) 205 | train_idx = rng.choice(data.n, n, replace=True) 206 | Xtrain_nxp, ytrain_n = data.X_nxp[train_idx], data.get_measurements(train_idx) # get noisy measurements 207 | 208 | # train ridge regression model 209 | A_pxn = get_invcov_dot_xt(Xtrain_nxp, gamma, use_lapack=True) 210 | beta_p = A_pxn.dot(ytrain_n) 211 | 212 | # construct test input distribution \tilde{p}_{X; Z_{1:n}} 213 | predall_n = data.X_nxp.dot(beta_p) 214 | punnorm_n = np.exp(lmbda * predall_n) 215 | Z = np.sum(punnorm_n) 216 | 217 | # draw test input (index of designed sequence) 218 | test_idx = rng.choice(data.n, 1, p=punnorm_n / Z if lmbda else None) 219 | Xtest_1xp = data.X_nxp[test_idx] 220 | 221 | # get noisy measurement and model prediction for designed sequence 222 | ytest_1 = data.get_measurements(test_idx) 223 | pred_1 = Xtest_1xp.dot(beta_p) 224 | return Xtrain_nxp, ytrain_n, Xtest_1xp, ytest_1, pred_1 225 | 226 | # ----- classes for handling combinatorially complete data sets ----- 227 | 228 | class Assay(ABC): 229 | def __init__(self): 230 | pass 231 | 232 | @abstractmethod 233 | def get_measurements(self, x_idx: np.array, seed: int = None): 234 | raise NotImplementedError 235 | 236 | class PoelwijkData(Assay): 237 | 238 | def __init__(self, fitness: str, order: int = 1, noise_estimate_order: int = 7, sig_level: float = 0.01, 239 | load_precomputed_noise: bool = True): 240 | if fitness not in ['red', 'blue']: 241 | raise ValueError('Unrecognized fitness name: {}'.format(fitness)) 242 | 243 | # ===== featurize sequences as higher-order interaction terms ===== 244 | 245 | df = self.read_poelwijk_supp3() 246 | self.Xsigned_nxp = self.strarr2signedarr(df.binary_genotype) # 1/-1 encoding of sequences 247 | self.X_nxp = walsh_hadamard_from_seqs(self.Xsigned_nxp, order=order) # featurize including intercept 248 | 249 | self.n, self.p = self.X_nxp.shape 250 | self.order = order 251 | print('Using {} order-{} features'.format(self.p, order)) 252 | 253 | if fitness == 'blue': 254 | self.y_n = np.array(df.brightness_blue) 255 | elif fitness == 'red': 256 | self.y_n = np.array(df.brightness_red) 257 | 258 | # ===== estimate per-sequence measurement noise SD ===== 259 | 260 | if load_precomputed_noise: 261 | d = np.load('../fluorescence/{}_noise.npz'.format(fitness)) 262 | self.se_n = d['se_n'] 263 | print("Loading estimated measurement noise SD computed using order {} and significance level {}".format( 264 | d['order_est_noise'], d['sig_level'])) 265 | 266 | else: 267 | t0 = time.time() 268 | 269 | # ===== compute Walsh-Hadamard transform, truncated to order noise_estimate_order ===== 270 | # best linear model of complete fitness landscape using terms of up to noise_estimate_order. 271 | # default value of noise_estimate_order = 7 taken from Poelwijk et al. (2019), who found 272 | # significant epistatic interactions of up to order 7 in the complete fitness landscape (see their Fig. 2e) 273 | 274 | # encode all 2^13 sequences with up to noise_estimate_order terms 275 | X_nxp = walsh_hadamard_from_seqs(self.Xsigned_nxp, order=noise_estimate_order) 276 | n_feat = X_nxp.shape[1] 277 | print('Estimating noise using {} interaction terms up to order {}'.format(n_feat, noise_estimate_order)) 278 | 279 | # fit linear model using all 2^13 fitness measurements 280 | ols = LinearRegression(fit_intercept=False) # featurization from walsh_hadamard_from_seqs has intercept 281 | ols.fit(X_nxp, self.y_n) 282 | 283 | # determine statistically significant coefficients 284 | # compute t-statistics 285 | pred_n = ols.predict(X_nxp) 286 | sigmasq_hat = np.sum(np.square(self.y_n - pred_n)) / (self.n - n_feat) # estimate of \sigma^2 287 | var_p = sigmasq_hat * (np.linalg.inv(np.dot(X_nxp.T, X_nxp)).diagonal()) 288 | ts_p = ols.coef_ / np.sqrt(var_p) 289 | 290 | # two-sided p-values 291 | pvals = np.array([2 * (1 - sc.stats.t.cdf(np.abs(t), (self.n - n_feat))) for t in ts_p]) 292 | self.coef_pvals = pvals 293 | 294 | # use Bonferroni-Sidak correction as in Poelwijk et al. (2019) (Fig. 2e, S6) 295 | threshold = 1 - np.power(1 - sig_level, 1 / n_feat) 296 | sigterm_idx = np.where(pvals < threshold)[0] 297 | print("{} terms below {} for significance level {}. {:.1f} s".format( 298 | sigterm_idx.size, threshold, sig_level, time.time() - t0)) 299 | 300 | # estimate per-sequence measurement noise SD by taking difference between measurements and 301 | # predictions made using the statistically significant coefficients 302 | pred_n = X_nxp[:, sigterm_idx].dot(ols.coef_[sigterm_idx]) 303 | self.se_n = np.abs(pred_n - self.y_n) 304 | np.savez('../fluorescence/{}_noise.npz'.format(fitness), 305 | se_n=self.se_n, noise_estimate_order=noise_estimate_order, pvals=pvals, threshold=threshold, 306 | sigterm_idx=sigterm_idx, n_feat=n_feat, sig_level=sig_level) 307 | 308 | def find(self, Xsigned_nxp): 309 | return np.array([np.where((self.Xsigned_nxp == X_p).all(axis=1))[0][0] for X_p in Xsigned_nxp]) 310 | 311 | def read_poelwijk_supp3(self): 312 | """ 313 | Parse Poelwijk et al. (2019) Supplementary Data 3 for raw data. 314 | 315 | :return: pandas dataframe 316 | """ 317 | df = pd.read_excel("../fluorescence/supp_data_3.xlsx", skiprows=2, header=None) 318 | df.columns = ["binary_genotype", "amino_acid_sequence", "counts_input", "counts_red", "counts_blue", 319 | "UNK1", "brightness_red", "brightness_blue", "UNK2", "brightness_combined"] 320 | df["binary_genotype"] = df["binary_genotype"].apply(lambda x: x[1:-1]) 321 | return df 322 | 323 | def strarr2signedarr(self, binstrarr): 324 | """ 325 | Convert array of strings of 0s and 1s to numpy array of -1s and 1s 326 | 327 | :param binstrarr: iterable containing strings of 0s and 1s 328 | :return: numpy array where each row corresponds to string 329 | """ 330 | return np.array([[2 * int(b) - 1 for b in binstr] for binstr in binstrarr]) 331 | 332 | def get_measurements(self, seqidx_n: np.array, seed: int = None): 333 | """ 334 | Given indices of sequences, return noisy measurements (using estimated measurement noise SD). 335 | 336 | :param seqidx_n: iterable of ints, indices of which sequences to get measurements for 337 | :param seed: int, random seed 338 | :return: numpy array of noisy measurements corresponding to provided sequence indices 339 | """ 340 | np.random.seed(seed) 341 | noisy_n = np.array([np.random.normal(loc=self.y_n[i], scale=self.se_n[i]) for i in seqidx_n]) 342 | # enforce non-negative measurement since enrichment scores are always non-negative 343 | return np.fmax(noisy_n, 0) 344 | 345 | -------------------------------------------------------------------------------- /calibrate.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classes for full conformal prediction for exchangeable, standard, and feedback covariate shift data, 3 | both for black-box predictive models and computationally optimized for ridge regression, and 4 | functions for (random, exact-coverage) split conformal prediction under standard covariate shift. 5 | Throughout this file, variable name suffixes denote the shape of the numpy array, where 6 | n: number of training points, or generic number of data points 7 | n1: n + 1 8 | p: number of features 9 | y: number of candidate labels, |Y| 10 | u: number of sequences in domain, |X| 11 | m: number of held-out calibration points for split conformal methods 12 | """ 13 | 14 | import numpy as np 15 | import time 16 | import scipy as sc 17 | 18 | from abc import ABC, abstractmethod 19 | 20 | # ===== utilities for split conformal ===== 21 | 22 | def get_split_coverage(lu_nx2, y_n): 23 | """ 24 | Computes empirical coverage of split conformal confidence interval 25 | :param lu_nx2: (n, 2) numpy array where first and second columns are lower and upper endpoints 26 | :param y_n: (n,) numpy array of true labels 27 | :return: float, empirical coverage 28 | """ 29 | cov = np.sum((y_n >= lu_nx2[:, 0]) & (y_n <= lu_nx2[:, 1])) / y_n.size 30 | return cov 31 | 32 | def get_randomized_staircase_coverage(C_n, y_n): 33 | """ 34 | Computes empirical coverage and lengths of randomized staircase confidence sets. 35 | 36 | :param C_n: length-n list of outputs of get_randomized_staircase_confidence_set (i.e., list of tuples) 37 | :param y_n: (n,) numpy array of true labels 38 | :return: (n,) binary array of coverage and (n,) numpy array of lengths 39 | """ 40 | def is_covered(confint_list, y): 41 | for confint_2 in confint_list: 42 | if y >= confint_2[0] and y <= confint_2[1]: 43 | return True 44 | return False 45 | def get_len_conf_set(confint_list): 46 | return np.sum([confint_2[1] - confint_2[0] for confint_2 in confint_list]) 47 | 48 | cov_n = np.array([is_covered(confset, y) for confset, y in zip(C_n, y_n)]) 49 | len_n = np.array([get_len_conf_set(confset) for confset in C_n]) 50 | return cov_n, len_n 51 | 52 | def get_randomized_staircase_confidence_set(scores_m, weights_m1, predtest, alpha: float = 0.1): 53 | """ 54 | Computes the "randomized staircase" confidence set in Alg. S1. 55 | 56 | :param scores_m: (m,) numpy array of calibration scores 57 | :param weights_m1: (m + 1) numpy array of calibration weights and single test weight 58 | :param predtest: float, prediction on test input 59 | :param alpha: miscoverage level 60 | :return: list of tuples (l, u), where l and u are floats denoting lower and upper 61 | endpoints of an interval. 62 | """ 63 | lb_is_set = False 64 | idx = np.argsort(scores_m) 65 | sortedscores_m1 = np.hstack([0, scores_m[idx]]) 66 | sortedweights_m1 = np.hstack([0, weights_m1[: -1][idx]]) 67 | C = [] 68 | 69 | # interval that is deterministically included in the confidence set 70 | # (color-coded green in Fig. S1) 71 | cdf_m1 = np.cumsum(sortedweights_m1) # CDF up to i-th sorted calibration score 72 | cdf_plus_test_weight_m1 = cdf_m1 + weights_m1[-1] 73 | deterministic_idx = np.where(cdf_plus_test_weight_m1 < 1 - alpha)[0] 74 | if deterministic_idx.size: 75 | i_det = np.max(deterministic_idx) 76 | C.append((predtest - sortedscores_m1[i_det + 1], predtest + sortedscores_m1[i_det + 1])) 77 | 78 | # intervals that are randomly included in the confidence set 79 | # (color-coded teal and blue in Fig. S1) 80 | for i in range(i_det + 1, sortedscores_m1.size - 1): 81 | assert(cdf_plus_test_weight_m1[i] >= 1 - alpha) 82 | if cdf_plus_test_weight_m1[i] >= 1 - alpha and cdf_m1[i] < 1 - alpha: 83 | if not lb_is_set: 84 | lb_is_set = True 85 | LF = cdf_m1[i] 86 | F = (cdf_plus_test_weight_m1[i] - (1 - alpha)) / (cdf_m1[i] + weights_m1[-1] - LF) 87 | if sc.stats.bernoulli.rvs(1 - F): 88 | C.append((predtest + sortedscores_m1[i], predtest + sortedscores_m1[i + 1])) 89 | C.append((predtest - sortedscores_m1[i + 1], predtest - sortedscores_m1[i])) 90 | 91 | # halfspaces that are randomly included in the confidence set 92 | # (color-coded purple in Fig. S1) 93 | if cdf_m1[-1] < 1 - alpha: # sum of all calibration weights 94 | if not lb_is_set: 95 | LF = cdf_m1[-1] 96 | F = alpha / (1 - LF) 97 | if sc.stats.bernoulli.rvs(1 - F): 98 | C.append((predtest + sortedscores_m1[-1], np.inf)) 99 | C.append((-np.inf, predtest - sortedscores_m1[-1])) 100 | return C 101 | 102 | 103 | 104 | # ========== full conformal utilities ========== 105 | 106 | def get_weighted_quantile(quantile, w_n1xy, scores_n1xy): 107 | """ 108 | Compute the quantile of weighted scores for each candidate label y 109 | 110 | :param quantile: float, quantile 111 | :param w_n1xy: (n + 1, |Y|) numpy array of weights (unnormalized) 112 | :param scores_n1xy: (n + 1, |Y|) numpy array of scores 113 | :return: (|Y|,) numpy array of quantiles 114 | """ 115 | if w_n1xy.ndim == 1: 116 | w_n1xy = w_n1xy[:, None] 117 | scores_n1xy = scores_n1xy[:, None] 118 | 119 | # normalize probabilities 120 | p_n1xy = w_n1xy / np.sum(w_n1xy, axis=0) 121 | 122 | # sort scores and their weights accordingly 123 | sorter_per_y_n1xy = np.argsort(scores_n1xy, axis=0) 124 | sortedscores_n1xy = np.take_along_axis(scores_n1xy, sorter_per_y_n1xy, axis=0) 125 | sortedp_n1xy = np.take_along_axis(p_n1xy, sorter_per_y_n1xy, axis=0) 126 | 127 | # locate quantiles of weighted scores per y 128 | cdf_n1xy = np.cumsum(sortedp_n1xy, axis=0) 129 | qidx_y = np.sum(cdf_n1xy < quantile, axis=0) # equivalent to [np.searchsorted(cdf_n1, q) for cdf_n1 in cdf_n1xy] 130 | q_y = sortedscores_n1xy[(qidx_y, range(qidx_y.size))] 131 | return q_y 132 | 133 | def is_covered(y, confset, y_increment): 134 | """ 135 | Return if confidence set covers true label 136 | 137 | :param y: true label 138 | :param confset: numpy array of values in confidence set 139 | :param y_increment: float, \Delta increment between candidate label values, 0.01 in main paper 140 | :return: bool 141 | """ 142 | return np.any(np.abs(y - confset) < (y_increment / 2)) 143 | 144 | 145 | 146 | # ========== utilities and classes for full conformal with ridge regression ========== 147 | 148 | def get_invcov_dot_xt(X_nxp, gamma, use_lapack: bool = True): 149 | """ 150 | Compute (X^TX + \gamma I)^{-1} X^T 151 | 152 | :param X_nxp: (n, p) numpy array encoding sequences 153 | :param gamma: float, ridge regularization strength 154 | :param use_lapack: bool, whether or not to use low-level LAPACK functions for inverting covariance (fastest) 155 | :return: (p, n) numpy array, (X^TX + \gamma I)^{-1} X^T 156 | """ 157 | reg_pxp = gamma * np.eye(X_nxp.shape[1]) 158 | reg_pxp[0, 0] = 0 # don't penalize intercept term 159 | cov_pxp = X_nxp.T.dot(X_nxp) + reg_pxp 160 | if use_lapack: 161 | # fastest way to invert PD matrices from 162 | # https://stackoverflow.com/questions/40703042/more-efficient-way-to-invert-a-matrix-knowing-it-is-symmetric-and-positive-semi 163 | zz, _ = sc.linalg.lapack.dpotrf(cov_pxp, False, False) 164 | invcovtri_pxp, info = sc.linalg.lapack.dpotri(zz) 165 | assert(info == 0) 166 | invcov_pxp = np.triu(invcovtri_pxp) + np.triu(invcovtri_pxp, k=1).T 167 | else: 168 | invcov_pxp = sc.linalg.pinvh(cov_pxp) 169 | return invcov_pxp.dot(X_nxp.T) 170 | 171 | 172 | class ConformalRidge(ABC): 173 | """ 174 | Abstract base class for full conformal with computations optimized for ridge regression. 175 | """ 176 | def __init__(self, ptrain_fn, ys, Xuniv_uxp, gamma, use_lapack: bool = True): 177 | """ 178 | :param ptrain_fn: function that outputs likelihood of input under training input distribution, p_X 179 | :param ys: numpy array of candidate labels 180 | :param Xuniv_uxp: (u, p) numpy array encoding all sequences in domain (e.g., all 2^13 sequences 181 | in Poelwijk et al. 2019 data set), needed for computing normalizing constant 182 | :param gamma: float, ridge regularization strength 183 | :param use_lapack: bool, whether or not to use low-level LAPACK functions for inverting covariance (fastest) 184 | """ 185 | self.ptrain_fn = ptrain_fn 186 | self.Xuniv_uxp = Xuniv_uxp 187 | self.p = Xuniv_uxp.shape[1] 188 | self.ys = ys 189 | self.n_y = ys.size 190 | self.gamma = gamma 191 | self.use_lapack = use_lapack 192 | 193 | def get_normalizing_constant(self, beta_p, lmbda): 194 | predall_u = self.Xuniv_uxp.dot(beta_p) 195 | Z = np.sum(np.exp(lmbda * predall_u)) 196 | return Z 197 | 198 | def get_insample_scores(self, Xaug_n1xp, ytrain_n): 199 | """ 200 | Compute in-sample scores, i.e. residuals using model trained on all n + 1 data points (instead of LOO data) 201 | 202 | :param Xaug_n1xp: (n + 1, p) numpy array encoding all n + 1 sequences (training + candidate test point) 203 | :param ytrain_n: (n,) numpy array of true labels for the n training points 204 | :return: (n + 1, |Y|) numpy array of scores 205 | """ 206 | A = get_invcov_dot_xt(Xaug_n1xp, self.gamma, use_lapack=self.use_lapack) 207 | C = A[:, : -1].dot(ytrain_n) # p elements 208 | a_n1 = C.dot(Xaug_n1xp.T) 209 | b_n1 = A[:, -1].dot(Xaug_n1xp.T) 210 | 211 | # process in-sample scores for each candidate value y 212 | scoresis_n1xy = np.zeros([ytrain_n.size + 1, self.n_y]) 213 | by_n1xy = np.outer(b_n1, self.ys) 214 | muhatiy_n1xy = a_n1[:, None] + by_n1xy 215 | scoresis_n1xy[: -1] = np.abs(ytrain_n[:, None] - muhatiy_n1xy[: -1]) 216 | scoresis_n1xy[-1] = np.abs(self.ys - muhatiy_n1xy[-1]) 217 | return scoresis_n1xy 218 | 219 | def compute_loo_scores_and_lrs(self, Xaug_n1xp, ytrain_n, lmbda, compute_lrs: bool = True): 220 | """ 221 | Compute LOO scores, i.e. residuals using model trained on n data points (training + candidate test points, 222 | but leave i-th training point out). 223 | 224 | :param Xaug_n1xp: (n + 1, p) numpy array encoding all n + 1 sequences (training + candidate test point) 225 | :param ytrain_n: (n,) numpy array of true labels for the n training points 226 | :param lmbda: float, inverse temperature of design algorithm in Eq. 6, {0, 2, 4, 6} in main paper 227 | :param compute_lrs: bool: whether or not to compute likelihood ratios (this part takes the longest, 228 | so set to False if only want to compute scores) 229 | :return: (n + 1, |Y|) numpy arrays of scores S_i(X_test, y) and weights w_i^y(X_test) in Eq. 3 in main paper 230 | """ 231 | # fit n + 1 LOO models and store linear parameterizations of \mu_{-i, y}(X_i) as function of y 232 | n = ytrain_n.size 233 | ab_nx2 = np.zeros([n, 2]) 234 | C_nxp = np.zeros([n, self.p]) 235 | An_nxp = np.zeros([n, self.p]) 236 | for i in range(n): 237 | # construct A_{-i} 238 | Xi_nxp = np.vstack([Xaug_n1xp[: i], Xaug_n1xp[i + 1 :]]) # n rows 239 | Ai = get_invcov_dot_xt(Xi_nxp, self.gamma, use_lapack=self.use_lapack) 240 | 241 | # compute linear parameterizations of \mu_{-i, y}(X_i) 242 | yi_ = np.hstack([ytrain_n[: i], ytrain_n[i + 1 :]]) # n - 1 elements 243 | Ci = Ai[:, : -1].dot(yi_) # p elements 244 | ai = Ci.dot(Xaug_n1xp[i]) # = Xtrain_nxp[i] 245 | bi = Ai[:, -1].dot(Xaug_n1xp[i]) 246 | 247 | # store 248 | ab_nx2[i] = ai, bi 249 | C_nxp[i] = Ci 250 | An_nxp[i] = Ai[:, -1] 251 | 252 | # LOO score for i = n + 1 253 | tmp = get_invcov_dot_xt(Xaug_n1xp[: -1], self.gamma, use_lapack=self.use_lapack) 254 | beta_p = tmp.dot(ytrain_n) 255 | alast = beta_p.dot(Xaug_n1xp[-1]) # prediction a_{n + 1}. Xaug_n1xp[-1] = Xtest_p 256 | 257 | # process LOO scores for each candidate value y 258 | scoresloo_n1xy = np.zeros([n + 1, self.n_y]) 259 | by_nxy = np.outer(ab_nx2[:, 1], self.ys) 260 | prediy_nxy = ab_nx2[:, 0][:, None] + by_nxy 261 | scoresloo_n1xy[: -1] = np.abs(ytrain_n[:, None] - prediy_nxy) 262 | scoresloo_n1xy[-1] = np.abs(self.ys - alast) 263 | 264 | # likelihood ratios for each candidate value y 265 | w_n1xy = None 266 | if compute_lrs: 267 | betaiy_nxpxy = C_nxp[:, :, None] + self.ys * An_nxp[:, :, None] 268 | # compute normalizing constant in Eq. 6 in main paper 269 | pred_nxyxu = np.tensordot(betaiy_nxpxy, self.Xuniv_uxp, axes=(1, 1)) 270 | normconst_nxy = np.sum(np.exp(lmbda * pred_nxyxu), axis=2) 271 | ptrain_n = self.ptrain_fn(Xaug_n1xp[: -1]) 272 | 273 | w_n1xy = np.zeros([n + 1, self.n_y]) 274 | wi_num_nxy = np.exp(lmbda * prediy_nxy) 275 | w_n1xy[: -1] = wi_num_nxy / (ptrain_n[:, None] * normconst_nxy) 276 | 277 | # for last i = n + 1, which is constant across candidate values of y 278 | Z = self.get_normalizing_constant(beta_p, lmbda) 279 | w_n1xy[-1] = np.exp(lmbda * alast) / (self.ptrain_fn(Xaug_n1xp[-1][None, :]) * Z) 280 | return scoresloo_n1xy, w_n1xy 281 | 282 | @abstractmethod 283 | def get_loo_scores_and_lrs(self, Xaug_n1xp, ytrain_n, lmbda): 284 | pass 285 | 286 | def get_confidence_set(self, Xtrain_nxp, ytrain_n, Xtest_1xp, lmbda, alpha: float = 0.1, use_is_scores: bool = False): 287 | if (self.p != Xtrain_nxp.shape[1]): 288 | raise ValueError('Feature dimension {} differs from provided Xuniv_uxp {}'.format( 289 | Xtrain_nxp.shape[1], self.Xuniv_uxp.shape)) 290 | Xaug_n1xp = np.vstack([Xtrain_nxp, Xtest_1xp]) 291 | 292 | # ===== compute scores and weights ===== 293 | 294 | # compute in-sample scores 295 | scoresis_n1xy = self.get_insample_scores(Xaug_n1xp, ytrain_n) if use_is_scores else None 296 | 297 | # compute LOO scores and likelihood ratios 298 | scoresloo_n1xy, w_n1xy = self.get_loo_scores_and_lrs(Xaug_n1xp, ytrain_n, lmbda) 299 | 300 | # ===== construct confidence sets ===== 301 | 302 | # based on LOO score 303 | looq_y = get_weighted_quantile(1 - alpha, w_n1xy, scoresloo_n1xy) 304 | loo_cs = self.ys[scoresloo_n1xy[-1] <= looq_y] 305 | 306 | # based on in-sample score 307 | is_cs = None 308 | if use_is_scores: 309 | isq_y = get_weighted_quantile(1 - alpha, w_n1xy, scoresis_n1xy) 310 | is_cs = self.ys[scoresis_n1xy[-1] <= isq_y] 311 | return loo_cs, is_cs 312 | 313 | 314 | class ConformalRidgeExchangeable(ConformalRidge): 315 | """ 316 | Class for full conformal with ridge regression, assuming exchangeable data. 317 | """ 318 | def __init__(self, ptrain_fn, ys, Xuniv_uxp, gamma, use_lapack: bool = True): 319 | super().__init__(ptrain_fn, ys, Xuniv_uxp, gamma, use_lapack=use_lapack) 320 | 321 | def get_loo_scores_and_lrs(self, Xaug_n1xp, ytrain_n, lmbda): 322 | scoresloo_n1xy, _ = self.compute_loo_scores_and_lrs(Xaug_n1xp, ytrain_n, lmbda, compute_lrs=False) 323 | # for exchangeble data, equal weights on all data points (no need to compute likelihood ratios in line above) 324 | w_n1xy = np.ones([Xaug_n1xp.shape[0], self.n_y]) 325 | return scoresloo_n1xy, w_n1xy 326 | 327 | 328 | class ConformalRidgeFeedbackCovariateShift(ConformalRidge): 329 | """ 330 | Class for full conformal with ridge regression under feedback covariate shift via Eq. 6 in main paper. 331 | """ 332 | def __init__(self, ptrain_fn, ys, Xuniv_uxp, gamma, use_lapack: bool = True): 333 | super().__init__(ptrain_fn, ys, Xuniv_uxp, gamma, use_lapack=use_lapack) 334 | 335 | def get_loo_scores_and_lrs(self, Xaug_n1xp, ytrain_n, lmbda): 336 | scoresloo_n1xy, w_n1xy = self.compute_loo_scores_and_lrs(Xaug_n1xp, ytrain_n, lmbda, compute_lrs=True) 337 | return scoresloo_n1xy, w_n1xy 338 | 339 | 340 | class ConformalRidgeStandardCovariateShift(ConformalRidge): 341 | """ 342 | Class for full conformal with ridge regression under standard covariate shift. 343 | """ 344 | def __init__(self, ptrain_fn, ys, Xuniv_uxp, gamma, use_lapack: bool = True): 345 | super().__init__(ptrain_fn, ys, Xuniv_uxp, gamma, use_lapack=use_lapack) 346 | 347 | def get_lrs(self, Xaug_n1xp, ytrain_n, lmbda): 348 | # fit model to training data 349 | tmp = get_invcov_dot_xt(Xaug_n1xp[: -1], self.gamma, use_lapack=self.use_lapack) 350 | beta_p = tmp.dot(ytrain_n) 351 | 352 | # compute normalizing constant for test covariate distribution 353 | Z = self.get_normalizing_constant(beta_p, lmbda) 354 | 355 | # get likelihood ratios for n + 1 covariates 356 | pred_n1 = Xaug_n1xp.dot(beta_p) 357 | ptest_n1 = np.exp(lmbda * pred_n1) / Z 358 | w_n1 = ptest_n1 / self.ptrain_fn(Xaug_n1xp) 359 | return w_n1 360 | 361 | def get_loo_scores_and_lrs(self, Xaug_n1xp, ytrain_n, lmbda): 362 | # LOO scores 363 | scoresloo_n1xy, _ = self.compute_loo_scores_and_lrs(Xaug_n1xp, ytrain_n, lmbda, compute_lrs=False) 364 | 365 | # compute likelihood ratios 366 | w_n1 = self.get_lrs(Xaug_n1xp, ytrain_n, lmbda) 367 | w_n1xy = w_n1[:, None] * np.ones([Xaug_n1xp.shape[0], self.n_y]) 368 | return scoresloo_n1xy, w_n1xy 369 | 370 | 371 | 372 | # ========== utilities and classes for full conformal with black-box model ========== 373 | 374 | def get_scores(model, Xaug_nxp, yaug_n, use_loo_score: bool = False): 375 | if use_loo_score: 376 | n1 = yaug_n.size # n + 1 377 | scores_n1 = np.zeros([n1]) 378 | 379 | for i in range(n1): 380 | Xtrain_nxp = np.vstack([Xaug_nxp[: i], Xaug_nxp[i + 1 :]]) 381 | ytrain_n = np.hstack([yaug_n[: i], yaug_n[i + 1 :]]) 382 | 383 | # train on LOO dataset 384 | model.fit(Xtrain_nxp, ytrain_n) 385 | pred_1 = model.predict(Xaug_nxp[i][None, :]) 386 | scores_n1[i] = np.abs(yaug_n[i] - pred_1[0]) 387 | 388 | else: # in-sample score 389 | model.fit(Xaug_nxp, yaug_n) 390 | pred_n1 = model.predict(Xaug_nxp) 391 | scores_n1 = np.abs(yaug_n - pred_n1) 392 | return scores_n1 393 | 394 | 395 | class Conformal(ABC): 396 | """ 397 | Abstract base class for full conformal with black-box predictive model. 398 | """ 399 | def __init__(self, model, ptrain_fn, ys, Xuniv_uxp): 400 | """ 401 | :param model: object with predict() method 402 | :param ptrain_fn: function that outputs likelihood of input under training input distribution, p_X 403 | :param ys: (|Y|,) numpy array of candidate labels 404 | :param Xuniv_uxp: (u, p) numpy array encoding all sequences in domain (e.g., all 2^13 sequences 405 | in Poelwijk et al. 2019 data set), needed for computing normalizing constant 406 | """ 407 | self.model = model 408 | self.ptrain_fn = ptrain_fn 409 | self.ys = ys 410 | self.Xuniv_uxp = Xuniv_uxp 411 | self.p = Xuniv_uxp.shape[1] 412 | self.n_y = ys.size 413 | 414 | @abstractmethod 415 | def get_lrs(self, Xaug_n1xp, yaug_n1, lmbda): 416 | pass 417 | 418 | def get_confidence_set(self, Xtrain_nxp, ytrain_n, Xtest_1xp, lmbda, 419 | use_loo_score: bool = True, alpha: float = 0.1, print_every: int = 10, verbose: bool = True): 420 | if (self.p != Xtrain_nxp.shape[1]): 421 | raise ValueError('Feature dimension {} differs from provided Xuniv_uxp {}'.format( 422 | Xtrain_nxp.shape[1], self.Xuniv_uxp.shape)) 423 | 424 | np.set_printoptions(precision=3) 425 | cs, n = [], ytrain_n.size 426 | t0 = time.time() 427 | Xaug_n1xp = np.vstack([Xtrain_nxp, Xtest_1xp]) 428 | scores_n1xy = np.zeros([n + 1, self.n_y]) 429 | w_n1xy = np.zeros([n + 1, self.n_y]) 430 | 431 | for y_idx, y in enumerate(self.ys): 432 | 433 | # get scores 434 | yaug_n1 = np.hstack([ytrain_n, y]) 435 | scores_n1 = get_scores(self.model, Xaug_n1xp, yaug_n1, use_loo_score=use_loo_score) 436 | scores_n1xy[:, y_idx] = scores_n1 437 | 438 | # get likelihood ratios 439 | w_n1 = self.get_lrs(Xaug_n1xp, yaug_n1, lmbda) 440 | w_n1xy[:, y_idx] = w_n1 441 | 442 | # for each value of inverse temperature lambda, compute quantile of weighted scores 443 | q = get_weighted_quantile(1 - alpha, w_n1, scores_n1) 444 | 445 | # if y <= quantile, include in confidence set 446 | if scores_n1[-1] <= q: 447 | cs.append(y) 448 | 449 | # print progress 450 | if verbose and (y_idx + 1) % print_every == 0: 451 | print("Done with {} / {} y values ({:.1f} s)".format( 452 | y_idx + 1, self.ys.size, time.time() - t0)) 453 | return np.array(cs), scores_n1xy, w_n1xy 454 | 455 | 456 | class ConformalExchangeable(Conformal): 457 | """ 458 | Full conformal with black-box predictive model, assuming exchangeable data. 459 | """ 460 | def __init__(self, model, ptrain_fn, ys, Xuniv_uxp): 461 | super().__init__(model, ptrain_fn, ys, Xuniv_uxp) 462 | 463 | def get_lrs(self, Xaug_n1xp, yaug_n1, lmbda): 464 | return np.ones([Xaug_n1xp.shape[0]]) 465 | 466 | 467 | class ConformalFeedbackCovariateShift(Conformal): 468 | """ 469 | Full conformal with black-box predictive model under feedback covariate shift via Eq. 6 in main paper. 470 | """ 471 | def __init__(self, model, ptrain_fn, ys, Xuniv_uxp): 472 | super().__init__(model, ptrain_fn, ys, Xuniv_uxp) 473 | 474 | def get_lrs(self, Xaug_n1xp, yaug_n1, lmbda): 475 | # compute weights for each value of lambda, the inverse temperature 476 | w_n1 = np.zeros([yaug_n1.size]) 477 | for i in range(yaug_n1.size): 478 | 479 | # fit LOO model 480 | Xtr_nxp = np.vstack([Xaug_n1xp[: i], Xaug_n1xp[i + 1 :]]) 481 | ytr_n = np.hstack([yaug_n1[: i], yaug_n1[i + 1 :]]) 482 | self.model.fit(Xtr_nxp, ytr_n) 483 | 484 | # compute normalizing constant 485 | predall_n = self.model.predict(self.Xuniv_uxp) 486 | Z = np.sum(np.exp(lmbda * predall_n)) 487 | 488 | # compute likelihood ratios 489 | testpred = self.model.predict(Xaug_n1xp[i][None, :]) 490 | ptest = np.exp(lmbda * testpred) / Z 491 | w_n1[i] = ptest / self.ptrain_fn(Xaug_n1xp[i][None, :]) 492 | return w_n1 493 | 494 | 495 | class ConformalStandardCovariateShift(Conformal): 496 | """ 497 | Full conformal with black-box predictive model under standard covariate shift. 498 | """ 499 | def __init__(self, model, ptrain_fn, ys, Xuniv_uxp): 500 | super().__init__(model, ptrain_fn, ys, Xuniv_uxp) 501 | 502 | def get_lrs(self, Xaug_n1xp, yaug_n1, lmbda): 503 | # get normalization constant for test covariate distribution 504 | self.model.fit(Xaug_n1xp[: -1], yaug_n1[: -1]) # Xtrain_nxp, ytrain_n 505 | predall_u = self.model.predict(self.Xuniv_uxp) 506 | Z = np.sum(np.exp(lmbda * predall_u)) 507 | 508 | # get likelihood ratios 509 | pred_n1 = self.model.predict(Xaug_n1xp) 510 | ptest_n1 = np.exp(lmbda * pred_n1) / Z 511 | w_n1 = ptest_n1 / self.ptrain_fn(Xaug_n1xp) 512 | return w_n1 513 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: conformal-design 2 | channels: 3 | - pytorch 4 | - anaconda 5 | - conda-forge 6 | dependencies: 7 | - pip=20.1.1 8 | - python=3.7.7 9 | - numpy=1.18.1 10 | - numpy-base=1.18.1 11 | - matplotlib=3.1.3 12 | - matplotlib-base=3.1.3 13 | - scipy=1.4.1 14 | - pytorch=1.4.0 15 | - torchvision=0.5.0 16 | - pandas=1.0.3 17 | - pillow=7.1.2 18 | - seaborn=0.10.1 19 | - tqdm=4.47.0 20 | - tensorflow-gpu==2.1 21 | - ipykernel 22 | - parse==1.19 23 | - scikit-learn==1.0.1 24 | - xlrd 25 | -------------------------------------------------------------------------------- /fluorescence/blue_n192_lambda0_alpha0.1_gamma1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n192_lambda0_alpha0.1_gamma1.npz -------------------------------------------------------------------------------- /fluorescence/blue_n192_lambda2_alpha0.1_gamma1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n192_lambda2_alpha0.1_gamma1.npz -------------------------------------------------------------------------------- /fluorescence/blue_n192_lambda4_alpha0.1_gamma1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n192_lambda4_alpha0.1_gamma1.npz -------------------------------------------------------------------------------- /fluorescence/blue_n192_lambda6_alpha0.1_gamma1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n192_lambda6_alpha0.1_gamma1.npz -------------------------------------------------------------------------------- /fluorescence/blue_n384_lambda0_alpha0.1_gamma1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n384_lambda0_alpha0.1_gamma1.npz -------------------------------------------------------------------------------- /fluorescence/blue_n384_lambda2_alpha0.1_gamma1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n384_lambda2_alpha0.1_gamma1.npz -------------------------------------------------------------------------------- /fluorescence/blue_n384_lambda4_alpha0.1_gamma1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n384_lambda4_alpha0.1_gamma1.npz -------------------------------------------------------------------------------- /fluorescence/blue_n384_lambda6_alpha0.1_gamma1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n384_lambda6_alpha0.1_gamma1.npz -------------------------------------------------------------------------------- /fluorescence/blue_n96_lambda0_alpha0.1_gamma10.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n96_lambda0_alpha0.1_gamma10.npz -------------------------------------------------------------------------------- /fluorescence/blue_n96_lambda2_alpha0.1_gamma10.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n96_lambda2_alpha0.1_gamma10.npz -------------------------------------------------------------------------------- /fluorescence/blue_n96_lambda4_alpha0.1_gamma10.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n96_lambda4_alpha0.1_gamma10.npz -------------------------------------------------------------------------------- /fluorescence/blue_n96_lambda6_alpha0.1_gamma10.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n96_lambda6_alpha0.1_gamma10.npz -------------------------------------------------------------------------------- /fluorescence/blue_noise.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_noise.npz -------------------------------------------------------------------------------- /fluorescence/red_n192_lambda0_alpha0.1_gamma1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n192_lambda0_alpha0.1_gamma1.npz -------------------------------------------------------------------------------- /fluorescence/red_n192_lambda2_alpha0.1_gamma1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n192_lambda2_alpha0.1_gamma1.npz -------------------------------------------------------------------------------- /fluorescence/red_n192_lambda4_alpha0.1_gamma1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n192_lambda4_alpha0.1_gamma1.npz -------------------------------------------------------------------------------- /fluorescence/red_n192_lambda6_alpha0.1_gamma1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n192_lambda6_alpha0.1_gamma1.npz -------------------------------------------------------------------------------- /fluorescence/red_n384_lambda0_alpha0.1_gamma1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n384_lambda0_alpha0.1_gamma1.npz -------------------------------------------------------------------------------- /fluorescence/red_n384_lambda2_alpha0.1_gamma1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n384_lambda2_alpha0.1_gamma1.npz -------------------------------------------------------------------------------- /fluorescence/red_n384_lambda4_alpha0.1_gamma1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n384_lambda4_alpha0.1_gamma1.npz -------------------------------------------------------------------------------- /fluorescence/red_n384_lambda6_alpha0.1_gamma1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n384_lambda6_alpha0.1_gamma1.npz -------------------------------------------------------------------------------- /fluorescence/red_n96_lambda0_alpha0.1_gamma10.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n96_lambda0_alpha0.1_gamma10.npz -------------------------------------------------------------------------------- /fluorescence/red_n96_lambda2_alpha0.1_gamma10.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n96_lambda2_alpha0.1_gamma10.npz -------------------------------------------------------------------------------- /fluorescence/red_n96_lambda4_alpha0.1_gamma10.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n96_lambda4_alpha0.1_gamma10.npz -------------------------------------------------------------------------------- /fluorescence/red_n96_lambda6_alpha0.1_gamma10.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n96_lambda6_alpha0.1_gamma10.npz -------------------------------------------------------------------------------- /fluorescence/red_noise.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_noise.npz -------------------------------------------------------------------------------- /fluorescence/supp_data_3.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/supp_data_3.xlsx -------------------------------------------------------------------------------- /fluorescence/supp_data_4.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/supp_data_4.xlsx -------------------------------------------------------------------------------- /notebooks/aav-experiments.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This notebook reproduces the AAV design experiments whose results are shown in Fig 5.\n", 8 | "\n", 9 | "Variable name suffixes in the following cells denote array dimensions, where\n", 10 | "\n", 11 | "n: number of calibration and test data points \n", 12 | "l: number of values of the inverse temperature, lambda \n", 13 | "L: length of sequence \n", 14 | "t: number of trials of sampling from test distribution, per lambda \n", 15 | "s: number of samples from test distribution \n", 16 | "m: number of calibration data points \n", 17 | "m1: m + 1 " 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import os\n", 27 | "import sys\n", 28 | "import time\n", 29 | "from importlib import reload\n", 30 | "module_path = os.path.abspath(os.path.join('..'))\n", 31 | "if module_path not in sys.path:\n", 32 | " sys.path.append(module_path)\n", 33 | " \n", 34 | "import numpy as np\n", 35 | "import scipy as sc\n", 36 | "from tensorflow import keras\n", 37 | "\n", 38 | "import assay\n", 39 | "import calibrate as cal" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "## Load held-out data and parameters of test sequence distributions" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 6, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "Loaded 1000000 held-out test and calibration data points.\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "# load held-out data (calibration and test data)\n", 64 | "d = np.load('../aav/test_and_calibration_aav_data.npz')\n", 65 | "seq_n = d['seq_n'] # list of strings\n", 66 | "y_n = d['y_n'] # true fitnesses\n", 67 | "n = y_n.size\n", 68 | "print('Loaded {} held-out test and calibration data points.'.format(n))" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 7, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "# load parameters of test sequence distributions\n", 78 | "d = np.load('../aav/models/constrained_maxent_parameters.npz')\n", 79 | "\n", 80 | "# phitestnuc_lxLxk[i] is an (L, k) numpy array of unnormalized probabilities of a categorical distribution\n", 81 | "# over k = 4 nucleotides at each of L sequence positions,\n", 82 | "# corresponding to phi in Eq. 5 of Supp. Materials and Methods here:\n", 83 | "# https://www.biorxiv.org/content/10.1101/2021.11.02.467003v2.full\n", 84 | "phitestnuc_lxLxk = d['phitestnuc_lxLxk']\n", 85 | "\n", 86 | "# note that lambda in bioRxiv above corresponds to 1 / lambda for us\n", 87 | "lambda_l = (1 / d['temperature_l']).astype(int)\n", 88 | "meanpredfit_l = d['meanpredfit_l']" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "## Construct confidence sets for designed sequences" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "Compute predictions and scores for all held-out data (calibration and test data)." 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 8, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "name": "stdout", 112 | "output_type": "stream", 113 | "text": [ 114 | "WARNING:tensorflow:Output lambda_1 missing from loss dictionary. We assume this was done on purpose. The fit and evaluate APIs will not be expecting any data to be passed to lambda_1.\n", 115 | "Mean predicted fitness for NNK (training) distribution: -0.47231370210647583\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "# load trained NN and predict for all held-out sequences\n", 121 | "datagen = assay.DataGenerator(seq_n)\n", 122 | "model = keras.models.load_model('../aav/models/h100_0.npy'.format(scale))\n", 123 | "pred_n = model.predict_generator(datagen).reshape(n)\n", 124 | "score_n = np.abs(pred_n - y_n) # score with residual\n", 125 | "print(\"Mean predicted fitness for NNK (training) distribution: {}\".format(np.mean(pred_n)))" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "Use rejection sampling to sample from test distribution, and construct split conformal confidence intervals for resulting designed sequences." 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "n_trial = 500\n", 142 | "alpha = 0.1\n", 143 | "n_cal = 10000\n", 144 | "save_results = True\n", 145 | "savefile = '../aav/split-results.npz'\n", 146 | "\n", 147 | "# compute training likelihoods of all sequences\n", 148 | "logptrain_n = assay.get_loglikelihood(seq_n, assay.PNNKAA_LXK)\n", 149 | "\n", 150 | "n_lambda = phitestnuc_lxLxk.shape[0]\n", 151 | "cov_lxt = np.zeros([n_lambda, n_trial])\n", 152 | "avglen_lxt = np.zeros([n_lambda, n_trial])\n", 153 | "fracinf_lxt = np.zeros([n_lambda, n_trial])\n", 154 | "len_lxt = {(l, t): None for l, t in zip(range(n_lambda), range(n_trial))}\n", 155 | "fit_lxt = {(l, t): None for l, t in zip(range(n_lambda), range(n_trial))}\n", 156 | "\n", 157 | "for l in range(n_lambda - 1, -1, -1):\n", 158 | " t0 = time.time()\n", 159 | " print(\"Test distribution with lambda = {:.1f}\".format(lambda_l[l]))\n", 160 | " \n", 161 | " # compute acceptance probabilities for all sequences (for rejection sampling from test distribution)\n", 162 | " paccept_n, logptest_n = assay.get_rejection_sampling_acceptance_probabilities(\n", 163 | " seq_n, phitestnuc_lxLxk[l], logptrain_n)\n", 164 | "\n", 165 | " # compute (unnormalized) weights for all data\n", 166 | " w_n = np.exp(logptest_n - logptrain_n)\n", 167 | "\n", 168 | " for t in range(n_trial):\n", 169 | " \n", 170 | " # partition held-out data into calibration data and test data\n", 171 | " # (i.e., samples from proposal distribution for rejection sampling from test distribution)\n", 172 | " shuffle_idx = np.random.permutation(n)\n", 173 | " cal_idx, test_idx = shuffle_idx[: n_cal], shuffle_idx[n_cal :]\n", 174 | " \n", 175 | " # sample from test distribution using rejection sampling\n", 176 | " testsamp_idx = assay.rejection_sample_from_test_distribution(paccept_n[test_idx])\n", 177 | " n_test = testsamp_idx.size\n", 178 | " if t == 0:\n", 179 | " print(\" On trial 0, sampled {} sequences from the test distribution.\".format(n_test))\n", 180 | "\n", 181 | " # fetch and normalize weights of calibration data\n", 182 | " p_sxm1 = np.hstack([np.tile(w_n[cal_idx], [n_test, 1]), w_n[test_idx[testsamp_idx]][:, None]])\n", 183 | " p_sxm1 /= np.sum(p_sxm1, axis=1, keepdims=True)\n", 184 | " \n", 185 | " # compute quantile of weighted calibration scores\n", 186 | " augscore_sxm1 = np.tile(np.hstack([score_n[cal_idx], [np.infty]]), (n_test, 1))\n", 187 | " q_sx1 = cal.get_weighted_quantile(1 - alpha, p_sxm1.T, augscore_sxm1.T)[:, None]\n", 188 | " \n", 189 | " # construct confidence intervals\n", 190 | " testpred_sx1 = pred_n[test_idx[testsamp_idx]][:, None]\n", 191 | " lu_sx2 = np.hstack([testpred_sx1 - q_sx1, testpred_sx1 + q_sx1])\n", 192 | " \n", 193 | " # record confidence interval lengths, true fitnesses, and empirical coverage\n", 194 | " noninf_idx = np.where(np.logical_and(~np.isinf(lu_sx2[:, 0]), ~np.isinf(lu_sx2[:, 1])))[0]\n", 195 | " avglen_lxt[l, t] = np.mean(2 * q_sx1[noninf_idx]) if noninf_idx.size else np.nan\n", 196 | " fracinf_lxt[l, t] = (n_test - noninf_idx.size) / n_test\n", 197 | " len_lxt[(l, t)] = 2 * q_sx1.flatten()\n", 198 | " fit_lxt[(l, t)] = y_n[test_idx[testsamp_idx]]\n", 199 | " cov_lxt[l, t] = cal.get_split_coverage(lu_sx2, fit_lxt[(l, t)])\n", 200 | " \n", 201 | " print(\" Empirical coverage: {:.4f}\\n Average non-inf length: {:.2f}\\n Fraction inf: {:.2f}\\n ({:.1f} s)\".format(\n", 202 | " np.mean(cov_lxt[l]), np.nanmean(avglen_lxt[l]), np.mean(fracinf_lxt[l]), time.time() - t0))\n", 203 | " \n", 204 | " # save results after each lambda\n", 205 | " if save_results:\n", 206 | " np.savez(savefile, cov_lxt=cov_lxt, avglen_lxt=avglen_lxt,\n", 207 | " fracinf_lxt=fracinf_lxt, len_lxt=len_lxt, fit_lxt=fit_lxt)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "Ditto, but with randomized staircase confidence sets to achieve exact coverage (Fig. 5)." 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 14, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "name": "stdout", 224 | "output_type": "stream", 225 | "text": [ 226 | "Test distribution with lambda = 7.0\n", 227 | " On trial 0, sampled 6 sequences from the test distribution.\n", 228 | " Average non-inf length 5.80\n", 229 | " Fraction inf 0.17\n", 230 | "109.6 s\n", 231 | "Test distribution with lambda = 6.0\n", 232 | " On trial 0, sampled 15 sequences from the test distribution.\n", 233 | " Average non-inf length 5.49\n", 234 | " Fraction inf 0.06\n", 235 | "150.0 s\n", 236 | "Test distribution with lambda = 5.0\n", 237 | " On trial 0, sampled 40 sequences from the test distribution.\n", 238 | " Average non-inf length 5.06\n", 239 | " Fraction inf 0.01\n", 240 | "228.0 s\n", 241 | "Test distribution with lambda = 4.0\n", 242 | " On trial 0, sampled 128 sequences from the test distribution.\n", 243 | " Average non-inf length 4.81\n", 244 | " Fraction inf 0.00\n", 245 | "338.8 s\n", 246 | "Test distribution with lambda = 3.0\n", 247 | " On trial 0, sampled 472 sequences from the test distribution.\n", 248 | " Average non-inf length 4.75\n", 249 | " Fraction inf 0.00\n", 250 | "771.5 s\n", 251 | "Test distribution with lambda = 2.0\n", 252 | " On trial 0, sampled 3233 sequences from the test distribution.\n", 253 | " Average non-inf length 4.69\n", 254 | " Fraction inf 0.00\n", 255 | "3919.5 s\n", 256 | "Test distribution with lambda = 1.0\n", 257 | " On trial 0, sampled 7130 sequences from the test distribution.\n", 258 | " Average non-inf length 4.79\n", 259 | " Fraction inf 0.00\n", 260 | "7445.0 s\n" 261 | ] 262 | } 263 | ], 264 | "source": [ 265 | "reload(cal)\n", 266 | "n_trial = 500\n", 267 | "alpha = 0.1\n", 268 | "n_cal = 10000\n", 269 | "save_results = True\n", 270 | "savefile = '../aav/randomized-staircase-results.npz'\n", 271 | "\n", 272 | "# compute training likelihoods of all sequences\n", 273 | "logptrain_n = assay.get_loglikelihood(seq_n, assay.PNNKAA_LXK)\n", 274 | "\n", 275 | "n_lambda = phitestnuc_lxLxk.shape[0]\n", 276 | "avglen_lxt = np.zeros([n_lambda, n_trial])\n", 277 | "fracinf_lxt = np.zeros([n_lambda, n_trial])\n", 278 | "len_lxt = {(l, t): None for l, t in zip(range(n_lambda), range(n_trial))}\n", 279 | "fit_lxt = {(l, t): None for l, t in zip(range(n_lambda), range(n_trial))}\n", 280 | "cov_lxt = {(l, t): None for l, t in zip(range(n_lambda), range(n_trial))}\n", 281 | "\n", 282 | "for l in range(n_lambda - 1, -1, -1):\n", 283 | " t0 = time.time()\n", 284 | " print(\"Test distribution with lambda = {:.1f}\".format(lambda_l[l]))\n", 285 | " \n", 286 | " # compute acceptance probabilities for all sequences (for rejection sampling from test distribution)\n", 287 | " paccept_n, logptest_n = assay.get_rejection_sampling_acceptance_probabilities(\n", 288 | " seq_n, phitestnuc_lxLxk[l], logptrain_n)\n", 289 | "\n", 290 | " # compute (unnormalized) weights for all data\n", 291 | " w_n = np.exp(logptest_n - logptrain_n)\n", 292 | "\n", 293 | " for t in range(n_trial):\n", 294 | " \n", 295 | " # partition held-out data into calibration data and test data\n", 296 | " # (i.e., samples from proposal distribution for rejection sampling from test distribution)\n", 297 | " shuffle_idx = np.random.permutation(n)\n", 298 | " cal_idx, test_idx = shuffle_idx[: n_cal], shuffle_idx[n_cal :]\n", 299 | " \n", 300 | " # sample from test distribution using rejection sampling\n", 301 | " testsamp_idx = assay.rejection_sample_from_test_distribution(paccept_n[test_idx])\n", 302 | " n_test = testsamp_idx.size\n", 303 | " if t == 0: # example of how many sequences are sampled from test distribution on a trial\n", 304 | " print(\" On trial 0, sampled {} sequences from the test distribution.\".format(n_test))\n", 305 | "\n", 306 | " # fetch and normalize weights of calibration data\n", 307 | " p_sxm1 = np.hstack([np.tile(w_n[cal_idx], [n_test, 1]), w_n[test_idx[testsamp_idx]][:, None]])\n", 308 | " p_sxm1 /= np.sum(p_sxm1, axis=1, keepdims=True)\n", 309 | " \n", 310 | " # construct randomized staircase confidence set\n", 311 | " testpred_s = pred_n[test_idx[testsamp_idx]]\n", 312 | " C_s = [cal.get_randomized_staircase_confidence_set(\n", 313 | " score_n[cal_idx], weights_m1, pred, alpha) for weights_m1, pred in zip(p_sxm1, testpred_s)]\n", 314 | " \n", 315 | " # record true fitnesses, empirical coverage, confidence set sizes\n", 316 | " fit_lxt[(l, t)] = y_n[test_idx[testsamp_idx]]\n", 317 | " cov_s, len_s = cal.get_randomized_staircase_coverage(C_s, fit_lxt[(l, t)])\n", 318 | " cov_lxt[(l, t)] = cov_s\n", 319 | " noninf_idx = np.where(~np.isinf(len_s))[0]\n", 320 | " avglen_lxt[l, t] = np.mean(len_s[noninf_idx]) if noninf_idx.size else np.nan\n", 321 | " fracinf_lxt[l, t] = (n_test - noninf_idx.size) / n_test\n", 322 | " len_lxt[(l, t)] = len_s\n", 323 | " \n", 324 | " cov = np.mean([np.mean(cov_lxt[(l, t)]) for t in range(n_trial)])\n", 325 | " print(\" Empirical coverage: {:.4f}\\n Average non-inf length: {:.2f}\\n Fraction inf: {:.2f}\\n ({:.1f} s)\".format(\n", 326 | " cov, np.nanmean(avglen_lxt[l]), np.mean(fracinf_lxt[l]), time.time() - t0))\n", 327 | " \n", 328 | " # save results after each lambda\n", 329 | " if save_results:\n", 330 | " np.savez(savefile, cov_lxt=cov_lxt, avglen_lxt=avglen_lxt,\n", 331 | " fracinf_lxt=fracinf_lxt, len_lxt=len_lxt, fit_lxt=fit_lxt)" 332 | ] 333 | } 334 | ], 335 | "metadata": { 336 | "kernelspec": { 337 | "display_name": "TensorFlow-GPU-2.1.0", 338 | "language": "python", 339 | "name": "tf-gpu" 340 | }, 341 | "language_info": { 342 | "codemirror_mode": { 343 | "name": "ipython", 344 | "version": 3 345 | }, 346 | "file_extension": ".py", 347 | "mimetype": "text/x-python", 348 | "name": "python", 349 | "nbconvert_exporter": "python", 350 | "pygments_lexer": "ipython3", 351 | "version": "3.7.7" 352 | } 353 | }, 354 | "nbformat": 4, 355 | "nbformat_minor": 4 356 | } 357 | -------------------------------------------------------------------------------- /notebooks/aav-figures.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This notebook reproduces plotting Fig. 5." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import time \n", 17 | "import numpy as np\n", 18 | "\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "import matplotlib.gridspec as gridspec\n", 21 | "plt.rcParams[\"font.size\"] = 8\n", 22 | "\n", 23 | "import seaborn as sns\n", 24 | "sns.set_style('whitegrid', {'grid.color': '0.9'})" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "# load lambdas and mean predicted fitnesses of test sequence distributions\n", 34 | "d = np.load('../aav/models/constrained_maxent_parameters.npz')\n", 35 | "\n", 36 | "# note that lambda in bioRxiv above corresponds to 1 / lambda for us\n", 37 | "lambda_l = (1 / d['temperature_l']).astype(int)\n", 38 | "meanpredfit_l = d['meanpredfit_l']" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# load coverage and sizes of randomized staircase confidence sets (constructed in aav-experiments.ipynb)\n", 48 | "fname = '../aav/randomized-staircase-results.npz'\n", 49 | "d = np.load(fname, allow_pickle='True')\n", 50 | "cov_lxt = d['cov_lxt'].item()\n", 51 | "avglen_lxt = d['avglen_lxt']\n", 52 | "fracinf_lxt = d['fracinf_lxt']\n", 53 | "len_lxt = d['len_lxt'].item()\n", 54 | "fit_lxt = d['fit_lxt'].item()" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 4, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "# compute mean empirical coverage and mean true fitness per lambda\n", 64 | "n_lambda, n_trial = lambda_l.size, 500 \n", 65 | "cov_l = np.zeros([n_lambda])\n", 66 | "truefit_l = np.zeros([n_lambda])\n", 67 | "for l in range(n_lambda):\n", 68 | " cov_l[l] = np.mean([np.mean(cov_lxt[(l, t)]) for t in range(n_trial)])\n", 69 | " truefit_l[l] = np.mean([np.mean(fit_lxt[(l, t)]) for t in range(n_trial)])" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 67, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "data": { 79 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcMAAACUCAYAAADvekIlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOydd3hU1fa/3zMtkx4S0giBQAgkYKF3QaT4EwVF6YpY4AoXRREUy0UQketF9IId9Gq4gsAXLqggiiIgICBIlRICAdJ7b1Myc35/jDOmTksySeC8z8ND5pyzz14TNnuds/danyWIoigiISEhISFxEyNragMkJCQkJCSaGskZSkhISEjc9EjOUEJCQkLipkdyhhISEhISNz2SM5SQkJCQuOmRnKGEhISExE2P5AwlJCQkJG56JGfYiJSUlDBjxow6zx85coRVq1a50CKJGwHzuNq2bRsrV66scV4aVxLOYG2+Ki4uZubMmS62yLVIzrAR2bJlC/fdd1+d5wcMGMCvv/6KRqNxoVUSLR1pXEk0BtbGlbe3N6GhoZw8edLFVrkOyRk2Irt27WLYsGEkJSUxdepUxo0bx/jx44mLi7Nc07t3bw4cONCEVkq0NMzjCiApKYmHH36Yu+++m++++85yjTSuJBzFPK70ej1Lly5lzJgxjB07ll9++QWAYcOGsWvXria2svFQNLUBNyo6nY7CwkJ8fX1RqVTExsaiUqk4f/48K1eu5LPPPgOga9eunDlzhlGjRjWxxRItgcrjCuDixYt8/fXXaLVaJk6cyJ133omnp6c0riQcovK4Wr9+PeXl5Xz77beAaYkUTHPVRx991JRmNiqSM2wk8vPz8fLyAkCv1/OPf/yD+Ph4ZDIZRUVFluv8/f3JyclpKjMlWhiVxxXA4MGD8fT0xNPTk5iYGC5fvkz37t2lcSXhEJXH1dGjR5kxYwaCIADg4+MD3PhzlbRM2ki4ubmh1+sBiI2NpX379nz77bd89dVX6HQ6y3U6nQ43N7emMlOihVF5XAGWCav6Z2lcSThC5XElimKNcQWmMaVSqVxtmsuQnGEj4efnR3l5OaIoUlJSQlBQEIIgsH379irXJSYm0rFjxyayUqKlUXlcARw8eJDS0lLy8vK4ePEinTp1AqRxJeEYlcfVgAED2Lx5M6IoIoqiZSUrKSnphh5TkjNsRHr27MmFCxeYOnUqGzZsYNKkSRQUFFS55sSJEwwaNKiJLJRoiZjHFcBtt93G008/zZQpU5g3bx6enp6ANK4kHMc8riZNmoRKpWLMmDHcf//9nDp1CoDjx48zePDgJray8RCkeoaNx4kTJ9i9ezevvPJKrecLCgp4/vnn+fzzz11smURLRhpXEo2BrXH1xBNP8O9//9sSvHWjIQXQNCK9evXi+vXrdZ7PyMhg/vz5rjNI4oZAGlcSjYG1cVVcXMzkyZNvWEcI0puhRAskMzOTWbNmceXKFU6dOoVC8dczXXx8PIsXL0YURZYsWUJ0dHStxyQkJCQqI+0ZSrQ4/Pz8iI2NpXv37jXOrV69mnfffZfVq1ezevXqOo9JSEhIVEZaJpVocbi5udWZNlBYWEhoaChg0lqs65iEhIREZVq0Mzx9+nSVSdFgMCCXyx26hzNtbvR21dtotdpa38KaI0ajscbPtR2ri+rLrlB33pU1nGnTUto1VF8Gg6HFjKv6Un2uagycnSOagsa01dn5qkU7Qzc3N2JiYiyfc3JyaN26tUP3cKbNjd6uepuLFy863K+zODvRmpHJZDV+ru1YXajV6ipjCqRx1Vh9uXJcNTWV5yqzgLparW7QPpz9d2kKqtu6fPlyJkyYQFRUlNV2x44d48KFC/z8888sX76c8PDwGtc4O65atDOUaFgKikr4v50/k5aRTdvQIMbfexd+Pl62GzYgb731Fi+//LLT7X19fcnIyEAQBIu8VG3HJCSairlz5wKwdu3aJrak+RAWFsaxY8eIiooiISGBgwcPWs4NGjTI4iT79u1Lr169uHbtGt6+rVj71TdkZucRGhRQ7/nK5c5w+fLlnDt3jq5du/KPf/zDcvyll14iISEBtVrNxIkTGTNmjKtNu2kRRZG0zBzWbf2e/IIiQCQ1I5ut3+1lxpSxjdp39eUSexyhXq9n5syZxMXF8eSTTzJnzhxOnDjB7NmzeeaZZ5g3bx6iKLJ48WKAWo/d6GRmZpKXl2fzOoPBQHZ2tkP3dqaNuZ3BYCA4ONjhthI3LnFxcSiVSq5evWrX9Xv27OGuu+5i63d7SbieAoJAelZuvecrlzrD8+fPU15ezldffcXixYs5e/Yst912m+X8ypUrad++vStNuqkpKS3jwuXrnLt0lfzCYvIKChEEAZVCiYe7moxs25NpfXnyyScJDQ3l7rvvZtCgQSiVSpttlEolsbGxVY717dsXgOjoaDZu3FjlXG3HbnTy8vLo3LmzzX0ZvV5v1++8vm3AtDx47do1yRlKWCgtLWXnzp0sWLCApUuXAhAZGUlkZGSdbQ4dOsTSpUs5+P46jKKIALir3eo9X7nUGZ4+fZoBAwYAMHDgQM6cOWNxhoIgsHDhQvz8/Fi0aBFhYWGuNO2mwWAwcDUpjXOXrnI9OR3jn2mmXh7uBAa0QqPR4qZSotHpCA0MaHR7YmNjSU1N5ccff2TDhg34+/vzr3/9q9H7vRlobsEUzc0eiabH09OTBQsWAPDaa6/Z1eaNN94AICjAj4KiYgRBoFyrrfd85VJnWFRUZNnw9Pb25vLly5ZzZkf4+++/869//Yv33nvP5v0MBkOVkiIVFRUOlxhxpk1LbJdfWEz8tRSuJKah0ZqqZsgEgXZhQXSOaEtYSGtKyjT88MsxsvMKCPT3464B3V1SskUulyMIAkajsUpFBgmJhuLixYt88MEHlJSU8Pnnn7N27Vpmz57d1GZJ1IN7hg3kWvL/MBiMhAaa9gzrg0udoY+PjyXPq6SkxFInC0yJ1GCq0P3OO+/YdT+5XF4lIkmK+vuLgqIS/m/HHpJSM1Eo5Hi4qy1P5sGBAdwa3ZGYThF4uP8V0RYE/D2iXY2+nNkfspcnnngCf39/7r77bj766COp7JBEo/Dmm2/y4Ycf8vTTTyOXyzl69GiTOUMpHqJhULspae3vR0ArXx6bMLre93OpM+zevTubN29m9OjRHD58mAcffNByrqSkBC8vL65evVrFSd7sFBSVsPW7vaRlZBMaEsj9o+5ApVRQVq6lXKOlrFxT7W8t5RoNl64mUa7RASLoQF9hYNiAntwS3ZHg1v71Sl9oSD755BPkcjm5ubk18vsk6s/f/va3GsdGjhzJhAkT0Gg0PP3001XSTeqKcLx27Rpr1qxhxIgRDB06tNHsbSxEUcTX19cy7g0GQ5PZIjnDhkGrM60iuakc37+uDZuzz4EDBxgyZAiJiYnExsYyevRo+vTp41Rn3bp1Q6VSMXXqVKKjowkNDeXjjz9m9uzZLFiwgMJCUwDHkiVLnLr/jUZhcSmfbfyW7LwCEEUuJSTx70830drfz2bbco3WEgzj7m562xpxh3P/bo3J7t27Wb9+PWFhYaSkpDB16lQeeOCBpjZLohodOnRg3LhxFBcX27w2Li6Ow4cPk5GRQWhoKI8//rgLLLTOAw88wOzZs0lJSeHZZ5+t8iDuasxl3MyrYZWP5+TkOL1V4GyUb1PQELbqKyoY3D0KhUJeJbewoqLCqfvZdIaff/45Q4YM4ZNPPmHixIksW7aM//3vf051BlRJpwAsSxWffPKJ0/e8kSguLSP+ahLxV5NIy8wlIzvX8jQrCFBRYcDX2xN3tRvuajUe7m64q93wcFf/+bfp+Ne7fyEnrwClQoHeYHBJMIwzbNiwgQ0bNqBQKNDr9TzyyCM2nWFd6Tm//vorq1evxs3NjSVLlhAZGcn777/PTz/9hK+vL3fddVezmJhdibVcNrVazccff+xUZOhvv/3Ghg0bLJ8ffvhh+vXrB8DWrVtZuHAhK1eubFKnU5kJEyYwYsQIkpOTadu2Lf7+/k1my4svvgjU/LdJT08nIiICtVrt1MqNs1G+TUFD2Fqu0VJYXILazc2SX2gwGDh//rxT97PpDEtLS0lLS0Mul9OjRw88PDyc6kiibkrLyom/lsylhCRSM/56WlIq5LTy8Uar16NWKTEYjYQGtbYrl2bK/aMsy6ttQgLrvbncWAiCQHZ2NqGhoWRnZ9ucBKyl53z44YfExsZSUlLC8uXLWbVqFWDKYR04cGCjf5cbmezsbHbv3o1GoyEqKop+/fpZnF91AgIC2L59O6mpqRQUFDSLsj933303c+fO5d577wVg/vz5dscmuBJ3d/emNqHFYC64JKs0Z9QnYtmmM3zqqadYtWoVs2fPRqvVcvvttzvdmcRflJVruHI9hbiERFLTsy0pDgq5nA7t2tClYzgd2oVRVq5xyqn5+XgxY8rYZi/RtHjxYpYuXUpRURE+Pj42k+KtpecAeHh44OHhQVJSkuXYypUr8fHxYeHChTWk1iTsIzAw0BL6bmsZz7zaM3HixEa3y14CAwO5dOkSZ8+eZeHChWRlZTW1SVaxtddrVrExU9sKwPvvv8/06dOtxmDs2rWL0aOrBp+cPHmS0NBQi7i9vbz55ptMmTIFrVZb5f9ZbXbs27ePIUOG8M477/Dqq6/Wer9t27YRExNDTExMrXaa58yGin+w6QzDwsJYsWIFubm5fPnll4wbN65BOr7ZqBzdqVIpcVe7WQIX5HIZkW3b0KVjOyIjwlBVWj5QKVuGU3OWtm3b8vHHH1s+26oqYS09B0xRt4WFhRY1i2nTpvHMM89w/fp1XnnlFb766iur96+ergMtM2XHYDDYvffkzB5Vffa1XJGuUx1BEHj++ef56aefmD17NkVFRS63wdVcv36dHTt20LNnT9577z0mTpyIt7c358+fJyMjg4ULF3Lq1CkCAgLYtWsXrVu3plevXmRkZODu7s5bb73F8OHDOXnyJIsXL2b58uV06tSJ/fv3W/7PZmZmsnbtWiIiIgDTCkJxcTH79+/H29ubLl261GrHhQsX6N+/P1evXmXTpk0kJSUxdepU9uzZw2OPPcabb76Jh4cHaWlptGrVilOnTtGnTx8+/fRTfHx86N27N4d+PYyHpydJidd58YUX6r30bdMZvvXWW6xbt47Vq1fTt29fXnnlFTZv3lyvTm82KgwGPtv4LZnZeYiIaHQ6yrVa+twWQ5fIdkS2b4vaTdXUZjYJTz/9dBU1meqfq2MtPeeFF15g3rx5hIWF0bNnT+CvIAXzf1ZbVE/XgZaZspOdnY1CobD51OxKBRqdTlfl9+vKYA9zBOfIkSOJjIzkP//5j8v6dgZbe7326JpGREQwZswYioqK6NGjB8OGDWP//v14eHjUeIgcMGAAAwYM4Msvv6RNmzaAabl77Nix/PHHH+Tm5uLl5cWkSZM4ceKEpd2JEycYOXIkXbp04aOPPrIcv/XWW4mPj6e0tLRWO86cOWPpY/LkySxdurTKWDUajbRv356YmBhCQkIAOH78OHfeeSe9e/dmxYoVqNXu3DV8BFfi40hISKi3M7RZ3Fej0aDT6dDr9dx3330tZoO2OSCKIleup7Buyy4ysnNBAJVSia+3Fx5qNQ/ecyfdOne8aR0hmMqtWPtcne7du3P06FEADh8+XKVUS48ePfjyyy+ZNWsWHTt2BP5608zLy2vScHpXo1QqLdURmgtardbl88eFCxcACA8P58iRIxw5coTMzEzuu+8+l9pRmfHjxzN+/PhG76dDhw5s2rSJoqIiVCrTHBMfH49ara5Rysz84GTeh4Oq+28BAQEUFxezefNmSktLLcd79erFr7/+yp49e6rcz9xnfHx8rXaYyc3NZdOmTbi5uREYGMjly5fZsmULer2edu3asWvXLtLS0gDo06cP+/fvZ+3atYwYMQIRs2CHzGZpNnsQxMrfvhY2bNjA3r17mTt3LtHR0SxevJi33nqr3h03BBcvXmy2JZyy8wr45chJElMzASgsLkFAwF2tskR3OiIq29QlnBprr23VqlWkpKTQo0cPTp8+TZs2bZg3b57VNsuWLePChQtER0cze/Zstm7dyuzZs/n44485fPgwrVq14vXXX6dVq1a89tprxMfHI4oi8+fPt2iY1kVt37U5jSt72xUUFJCenm6znStrgBqNRsLCwixv6405rsxs376dcePG8cEHH9Q49/TTTzdq35Wx57vW9/fR2NGkv/zyCykpKYAperg+NISt+YXFaHU6/Hy8q7xQVNe8thebzhBMQQvZ2dkMHz6c9PT0ZqMb2hydYVm5hsO//8EfcQkYRRG1m4oBvW4hom0o277fXyUQxpFyIzeqMzTf/9q1a3Ts2JHo6OhG68deW24EZ9iY7RqynqGrApqys7MJDAykvLycffv20a9fPwICXJduVPm7ZmaaHpCrC5Y3d2fYkDSErbkFRej1evx9fVBVSrx31hna3DNcunQpHh4eHD16lJEjR7Jo0SI+//xzhzu60TEYDJy+cJkjJ86h1emRCQLdu0UxsNetuKtNSe83ciBMfTBHjElINBYLFixg3bp1rFq1Cn9/f7766ivWr1/fJLYsWrQIqH1fsLy83Ok8w5sN0fhnNKnsr99VfbZCbDrDhIQE1q1bx7Rp0+rd2Y2IKIpcS05j/5FT5Bea1Dki2oYwdEBPWrdq+vwqCQkJU/AOQH5+Pi+//DIHDhyocU1dYg7Lli0jLi4OrVbLSy+9RK9evQBTPMXw4cN5++23GySPNTQ0lNTU1HpF6raUyiANYWtxaRmiUSQ3y72KpKCzDxI2naGHh4cl8ufChQt4e3s71dGNSE5+Ib8cOcn1lAwAWvl6M7R/Dzq2ayM92UlINCP69u3LtGnTmDFjBlqtFrVaXeW8NTGHhQsXolQqSU1N5fXXX7e80W3evNlSgb0h8PPzqyHR5ggtadWpIWx9/4st6PQVzJn+UJU9w8rSbI5g0xm++eabfPrpp7i7u/Ptt99aCjDerJjzBa8npyNicoAe7moG9LyF7t2iWsyTWXNl2bJlNST7JJoHlUXjndn3bkqqB2VVT62wJuZg3tsqKyuz7GnrdDrOnj1reUuUcC1GUUSnN2mQqlwl1O3v78+cOXMsSarNLVzb1WzYvpvE1AxLKK/BYOSJSfdVKYUk4TySI2xeiKJIUUkZufmFbPt+P3kFhQBcT0nny20/8PdHH0Qus5mh1eyxJeYwZ84czp49y4oVKwCTOsrYsWM5e/asXfevLDZgnkMbWnzAWcGGpqC+tmp1eioqKlAq5OTl5jaITTad4aJFi0hLSyMoKMhy7J///GeDdN7SuHwtmevJ6aZ8QYUCH28vKgwGyRHWg6KiIjZt2kRycjLh4eFMmjTJppalI0LdmZmZvPDCC+h0OubOnXvDaZQ21NuaKIqUlJaRk19Ibn4RufmF5OQXkpdfaHkCrywaDwYSU9L5aN3/aBsaRPuwENq1DSHAz6dFbhFYE3MAk+5teno6zz77LH369OHQoUN88MEHdjvDymIDM2bMAGjwJc2baZm0sLgUhUKBl6d7jfs4K+Zg0xmmpKTwxRdfOHXzGwWjKHL05DmOnDiHQiEHAXy8PNAbKpptNYiWwty5c5k0aRIjR44kLi6OZ555hv/+9791Xu+oUPenn37Kc889R5cuXZg1a5ZDztARRyOKIkZRpKCwmP99v5/0zGxCgwMZP3oYrXy9bTqI6n09NHoYXp7uaLU6NDo9Op0OjVaPVqdDa/5bp+fX42cpKilFJggkJKby4br/0evWLshkMmSCgCATkMtkps8yAZlg+luj1fH72Yvk5hehdlPh6+2FoY7EZQ93NQGtfNDp9ZSVa1DK5Wh1ehQKOTp9BVeT0riaZEqM9vJwp11YMO3CQmgfFoyXZ/MQ9n/88cerzGPPP/887777ruWztVqrOp0OlUqFp6cn7u7u5Obmkp6ezpNPPklSUhL79++nW7dudguSDxkypOG+WAtn+fLlTJgwwebea3JyMl9//TUeHh5MmjSJ3LxCcvIKyMrJ57ON3zbIkr1d2qTr1q2jc+fOlmPmtfWbAZ1ezw/7f+PytWRkgsA9wwZw6WoS6c28GkRLQalUcs899wAmxQxb5cEcFeq+dOkSr776KoIg4OnpaSkiXReJiYn8+9//BuDC5WsUl5QT0akLJbf04K0PYjn04zeIAKKIiMkJdrutJzG39aS8rJStX8VaBIQBPlr1Nn0H3kFMt9spKS7k+2+2IAggIJgcpAADBg9D7dOarMwMjuz/AYAPV71tKVrab/Aw2nXoRHZmOvt/3FnF3jKNlp79hxAYEkZ2Rionjx5gx5+pPGbuHHUfgcGhJF27wm+H9gGg0eosdvYfOgpN6yA0RTmcPXEUtZsKtZsKtz///ufy5QQHB7Nt+zesen8DZeUaPNzVlkCxv82ZS0FxOTt27ODk8aNV+la7qXhuwUskpmXx6y97yUi+Rsd2bVCplDbFFRqCo0ePcvToURITE1m9ejVgWrKsLtRtrdbqc889R3FxMQaDgeeff57g4GDLOH3//ffp1auXQ5U5EhMTAWjfvn0DfcuWS1hYGMeOHSMqKoqEhAQOHjxoOTdo0CCLk/y///s//Pz8LPmJ3+07gr6iApVSSXp2Llu/2+uQiElt2HSGISEhFBcXV9Gju1mcYWFxCd/8eJDs3ALcVEruHT6QDuFtGNKve4takmjOyOVynnzySWJiYoiLi0OhUFgmrWeffbbG9Y4KdRsMBstbmZeXF0VFRVadodFotOzplJVrAJPTM4oiOq0efS2FQ/UGAxUVFRgMBoyiiFDlfiIGgwGdXo9Wp681NamwuASj0gujaHozEzHtRQPIZTLc1Sr8fb0QdT54ebgjk5ve9uQyGdl5haiUCrw81BQqFbi7qQhu3coiqyWKIt06tSMsvB1uYjmXz3mDCOnZeShkJofs7emOm0rJgP63k5tS9fdZodeTm5uLXC7HUKEjom0wRqMRmUyG0Wj6LkGtvImKaEtualdy065RWq6hrFxDebmWco2WQ8fOgCCg1ekpLS/nyvVkOrZrU+e/QUMSHh6OTCYjOTmZgQMHIooiCoWi1qoQddVaray5WZ1nnnnGYZvefPNNwLr+6M1AXFwcSqXS8n/VGhqNhiFDhpCRkcH+/fvJzs1HEAQUcjnubm5kZOfV2x67UiueeOIJy+ebRaQ7OT2LHT8dolyjpZWvNw/cPQR/v7pLoUg4R+WxZc/ykaNC3ZWje2vbC6pOhw4dLMu0n238lrTMHBRyGRUGI0Gt/Vn+8vfIZALCn8uNgiAgk8kQMOU3hbUJJT07F6VcbpHde3LyGJNDNRp5bf5TGI0iRqMRoygi/vn3V1//SFaOBxOnzcBgFAkNCmDm1Ptr2Lfg6RlVPldeXu3bpw8rliysc7norsF9efqpJyzfrbqdY8eOZezYup+uJ0yYwIQJE+p8EHzkkUd45JFHLJ8NBgNpWbmsWb8do1GkW/c+3DH0LjQ6Pf+Y+5jTIfCOEBYWRlhYGNHR0WzatInCwkKef/55Dhw4wLBhwxq9f4naKSsrY+fOnSxYsMCSoRAZGUlkZGSt1z/44INs2bKFiooKnnrqKU7EZ1BWrkGhkFOu1TbIdlWdYWBFRUUkJSWxe/dukpOTSU5O5vr16/zwww/17rS5c/biFbZ+t5dyjZaItqFMfWCU5Agbib59++Lt7Y3RaMRgMGAwGOjbt2+dGqKOCnV36dKFU6dOUVZWRmlpqdW3wuqMv/cu2gS3pqLCQJvg1kweOwIPdzVqNzfcVEqUCgUKudy0N/fn2+f4e+8iNDAAnU5PaGAA4++9y+Qw/3yKVSmVqN1UeLir8fJwx9vLE19vL6bcP4qwkEAMBiNtglsz4b7hdtlorlv592n3M2PKWLv3TWqzs6GRy+WEhwYR0TYUd3c3fLw8KNfpCAl0fZX5F198kejoaE6fPo1cLmfdunUut0HiLzw8PFiwYAGApU6mNWJiYnjllVd47bXXCA4Oxs/XG6VCgSjSYOO3zjfDY8eO8fPPP5OammpZJlAoFEyePLnenTZXDEYj+w+f5PQF01JR79uiuaNf9yqVlCUallmzZhEUFFQlWtnaMry1vZ3qQt1gitx78cUX0Wq1Di9pOVMg2dmiyq4uxuzK/sbfe5dTBaobEvMy22effQaAHZLMEs2UwuISyjVa2oYGMfvRBxtsfq7TGY4YMYIRI0aQnp7ucMXjlki5RsuOnw6RnJ6FXC5j5B196da5Q1ObdcOj0WgcFnKoa29n9uzZlp/NhISEWI1OlWh8XO3oayMiIoK1a9dSWFhIbGxsgyrHSLiW5DRT8FPbkMAGfVGp0xmalUDmz59vWQISRRFBENiwYUODGdAcyM4r4JvdBygsLsXTQ83YkXfQJlgKjnEFDz30EMuXLycqKuqvpUYX1HqTuLlYsmQJ+/btY8yYMbRr147HHnusyWwx5xlKOEdymqnqR3ibYBtXOkadztBcRWD+/Pk3pOSQOfAgMTUDg8GIn48XYSGBjB11B97NJDfqZmDjxo0MHToUhcJmLJeEhNOYA2Y6duzIF198gY+PD717924SW2zV1JSoG1EUSU7/882wTZCNqx2jzgCaDRs2cO3aNVasWGEJoDH/qS/Lly9n6tSpLFu2rMrx+Ph4pkyZwuTJk4mLi6t3P9bY+t1ektIyKddo0elNIfOTxgyXHKGL8fPz46mnnmLcuHGWPxISDY257Nwnn3zC/fff36QqWvHx8cTHxzdZ/y2ZwuISikvKULupaO3vvKh5bdT5OP7ss8/yxRdfVAmgMVOfgWRNQWT16tW8++67yGQylixZwscff+x0P7ZIy8xBq9UhCAJeHu6IItLbSRMgCAJz5sypskxaW36hhER9KC0tJS0tDblcTo8ePfDwaLqH3pUrVwJSnqEzmPcLw0ODGjywsc7Zf+jQoQwdOpT777+/QZdJrSmIFBYWWoJ1zLlkjYF579NgNOKmVCKTyZok3FsCpk+f3tQmSNwEPPXUU6xatYrZs2ej1Wq5/fbbm9okCSdI+XOJtKH3C8GOpPuG3i+0piBirKSNaKxDJ7EylZXgwX4l9CvXU114ttoAACAASURBVJHLZKiUCpRKOQF+3tw1oLtDKurOqq63hHauVL+PiYlh48aNFBYWMm/evCpyTHVRl1D3999/z3/+8x8EQeCpp55ixIgRvPTSSyQkJKBWq5k4cSJjxoxpzK8j0UwxR8ebMee4SbQcRFG0BM809H4h2OEMGxprCiKVqxXL7CgLU1kJHuxTQi8tK+fE+Su4uakYO+oOQgJ8nAr3djZMvCW0q97GWRV4e3jhhReYOnUqa9asQaFQsG7dOqvKINaW2detW8eXX36JIAjMmDHDMvmtXLlS0oGUkGjhFBSVUFxajrvajYBW9mvB2otNj1NcXMzatWt5++23MRgM7Nu3r14dWlMQ8fX1JSMjg8zMTIeUQuxFFEV+PvQ7Gq2OiLahUh5hM8CcDG2WTbOVDF3bMruZDh06UF5eTllZmWX8CILAwoULmTVrFqmpqY30LSQkJBoby1thI+wXgh1vhpWf3M0yRvXR9LOmIPLMM88wb948RFFk8eLFTvdRF/HXkrl8PQWVUsHIIX1aZN21Gw1Hk6GtLbOPHDmScePGYTQaLUFeCxcuxM/Pj99//51//etfvPfee1bvr9Vqa9XMdObt2Nk36pbQriH60mq1Tt2jIfjf//7HQw891CR9P/30003Sb0vnr/3Chl8iBTucYWPIGNWlIBIdHc3GjRvrff/aKNdo2fvr7wAM7d8DHy/PRulHwjEqJ0OHh4fbTIa2tsy+evVqvvvuOwBmzpzJ4MGD8fMzhV/37t2bd955x6Y9lVcqJFo+5mC56nTr1q0JrDFRueSYhH2Y9gsbL3gG7FgmvVFkjPYdPkFZuZbwNkHcEl27MrpE0zBs2DBmzJjB8OG2xamtLbOrVCrUajXu7u7o9Xrgr6jkq1ev2qxYIXHj8eijjwKwaNGiKsejo6ObwhwAzp49y9mzZ5us/5ZIQVExJWXleLi7EdBIRRNsvhk2JxkjZ0lITOXilUSUCjmjhvSVhLdbMNaW2adMmcKUKVMAmDRpEmCKGiwsLEQQBJYsWdKElks0BR4eHsyfP5/ffvuNF198EfjrbXHFihVVrq0rSnnZsmXExcWh1Wp56aWX6NWrF6+99hrx8fEIgsDixYsdcq4ffPABIOUZOoJFjzQ0qNG2t2w6w+PHj+Pl5WXJyzl+/Dh9+vRpFGMaA41Wy56DxwEY3Od2/Hy8m9giifpS1zL7gw8+yIMPPljl3CeffOIyuySaH2vWrCEzM5OVK1fy3HPP1bnNYy1KeeHChSiVSlJTU3n99ddZu3YtM2fOJDw8nOvXr/POO+/w/vvvu/Jr3XRY9EhDG2eJFOxwhr/99htgepoyPwm1JGf4y9HTlJSV0ya4Nd1v6dzU5khISLiY4OBgXn/9dY4ePUpRUZHleFhYmOVna2IgSqUSMBWkNb8BmoO4FAqFXWlgEs4jiiJJaY0bPAN2OMPqkU9///vfG82YhuZ6cjrnLl1FIZdz99B+0vKohMRNyowZMxg6dGiVupmVsRalDDBnzhzOnj1bY2n13XffZdq0aTb7rywQotFoABpc2MKVYhn1xRFbC4pKKCouwV2twliha7TvaNMZbt261fJzdnY2+fn5jWJIQ6PV6fnx4DEABva+VapULyFxE+Pj48NTTz1l9XxdUcoAH374Ienp6Tz77LOWN8jY2FgiIyPtqn5RWSBErVYDNHhtx6asF+kojtiakpWPQqEgsn1bAgMDbV7vbNqPTWdoToYWBIGoqChLdFZz5+Cx0xSXlBES6E+vW7s0tTkSLYQTJ07UWPaqKzzfGs60aSntHG1jMBgoLtVgMBqRy2R4e6qRy+UYjUaXlYeTyWRWBeG7d+/O5s2bGT16NIcPH66y96zT6VCpVHh6euLu7g7AoUOHOHXqFKtWrXLYFkkKzjFSKgXPNCZWnaEoiuzcuZP//Oc/jWpEQ5OUlsmZC1eQy2SMGtpPWtNvxqSlpfHJJ59QWlrKihUr2LZtGxMmTGgye2QyGT169KhyrCEk7W6kdo62WfT2WkrKygHTQ7Wnh5o3FvyNU6dO2WxbV4Tnxx9/zIYNG3jooYeYN28eAAUFBSxevJj8/HwGDBhgCawCbEbBW4tSfu655yguLsZgMPD8888D8MYbb+Dl5cWjjz5Khw4dWLp0qd2/j86dpdgFe6lcv7Cx8gvNWHWGgiAQERHBrl276Natm8WpmNfWmyM6fQU/HTAtj/br0Y3ABq55JdGwvPLKKyxatIjXX38duVzOzp07bTpDR0LgJaHupiUtM9viCAHkMoGyco1dba1FeE6YMIEePXpw5MgRy/UffPABc+fOJTKyZh5x79692b17N0lJSYSHh3P33XfXuKauKOXqJewAdu/ebdd3qI1jx0zzk1Tk1za5BUWUlWvw9FDTyrdxMwFsvjKVlZVx8OBBPvnkEz766KNaB0Zz4vDvZykoKiEwwI++3WOa2hwJGxgMhiqTl61qJZUnSL1eXyV5eeHChaxfv55Vq1axZs0ay/GVK1fy5ZdfSo7QhRhFkWOnL7Dpmz2WY3KZgMEo4uGutuse1nRoW7duXWOp9vLly6xZs4Zp06bVeOt84YUXSExMpGvXriQmJvLCCy84+9XqzWeffWZR9JKwTuWUisaWz7S5ZzhgwADGjh1r+fzTTz81qkH1ITMnn5PnLiETBO4e2s+y3ynRfOnfvz+vvfYaWVlZLFu2jEGDBlm93tEQeLNQt5+fH4sWLaoSTi/ROJSWa/hh31Gup6Sj0+nx8nCnXKNFFEU8PdTMnHq/XfexFeFZnVOnTrF9+3Z8fX155plnqkg7ZmZmWuT47rjjDh555BEnv52EK0lxQUqFGavO0GAwsGXLFsaMGYMoipbPI0eObHTDHKXCYODg8T8QRejbI4bg1lKx3pbAnDlziI+PZ8CAAXTo0MGmkoejIfD2CHVv3ryZzZs3AybZroyMjCrnRVGsccwWzrRpKe2stdHpKyguLaNzRCiR4UEYjSIKhRx9RQVtggL+fLo3tS8sLKwSqDJp0iSLchDYjvCsTkREhGWVoXqcQHBwMB9//DFdu3bl3LlzdkUlSjQtxir7hU3oDLdv3862bduIi4tj+vTpiKKISqViyJAhjW6UoxQUlbBm/Xaycwvw9FAT0ymiqU2SsJPHH3+cOXPmcM899wDwz3/+k5dffrnO6x0NgbdHqLvyJHzq1ClCQkKqnJcCaGy3MYoiv508z9GT5zCKImEhgQS08uHsxQTu6Hs7fbvfUqNdeno627Ztq7MfaxGetREREUFWVhZeXl4YDIYq51asWMFPP/3EpUuX6Nixo9U0C4nmQV5+IeUaLV6e7jWUw5YvX86ECRNsamXv2bOHo0eP0rZtW6ZPn251qbVOZzhu3DjGjRtXZdO6ubLxmx/JzitAEEAQZGz/4RdmTBlru6FEk1NSUsLWrVs5f/4806dPr7V8UmUcDYEvKSnBy8tLEupuRErKytm19zDJaVkIAvTr0ZWBvW7l6MnzKORy2oU5FwVoLcJzy5YtbNy4kYKCAoqKili8eDFz585l/vz5aDSaGmIhP/74o+WBSxRFfvjhB8tnieaJpUpFLfuFYWFhHDt2jKioKBISEjh48KDlXGhoKDExpngRd3d33N3dKS8vx2g0Wt06s7ln2NwdIUBObgFKhQI3lRIvT3cysvOa2iQJO1Gr1bz11lvExsaycOFCizpHXTgaAi8JdTcu11PS+X7fEcrKtXi4qxk9bADt24aw7/AJ2oYG8beH78dd7eb0/euK8JwwYUKNqONOnTrx5Zdf1nqfTZs2WZyfIAhVPruaV199tUn6bUkUFJXw3d7DFBQVIwgm4RQ/H1PB7ri4OJRKJVevXrV5n0GDBjFo0CB+/PFHjh07Zok3qA2bzrAlEBrcmvTsXJRyOeVaLaGBAU1tkoSd9O/fHzDlgR07dsyuaGVHQuAloe7GwWg0cvjEHxw7fQFRhHZtghl91wA8PdxJzcjm5Ll4/rh0lScnN48IXp1OR2FhIb6+vhQUFDRpYeH27ds3Wd8thf/b+TOFRSUIgkBxSRlbv9vLjCljKS0tZefOnSxYsMCS2xkZGVklIr3y6tJvv/3GmTNnSElJseSj1oVNZ1heXs6RI0eqCNw+8MADDn+5xmT8vXex9bu9pGVk0yYkkPH33tXUJknYICsri6CgIMaOHUtycjJgWt544403mtgyibooKCph63d7SUnPQhAE3NVuKBUKBva+hb49uiETBIyiiJ+vN8MH9TZFj9qZRtHYvPDCC/z9739HEAQEQbCUc2oKDhw4ANAs4y+aC0mpmSCASqnE0+Ov1T5PT0+Lgs9rr71m8z79+vWjX79+dvVp0xk++eSTVgVumwN+Pl7MmDK2RWnz3ex88803zJw5s9a3uX/+859NYJGELcyOsKxcg1EU0VcYeOqRBwivJJN1Li6Bg8fOcOeAnnTr3MFltqWkpNC2bds6z/fs2ZMNGza4zB5rrF+/HpCcYV2Ulpn290RRNKXluGi1z6YztCVwKyHhDDNnzgRMUWGCICCKIn/88Qddukg6ss0VkyPUIgJqlQo3lbKKIzRfo9HqUChck+O7evVqAC5dukRkZCTz5893Sb8SjcfRk+fx8/FCq9djMBoJDQxwyWqfTWdoS+BWQqI+PPbYY6xbt47Vq1eTl5dHRkaGVAG8GZKUmoFOX4FRNKJ2U+GmUhEaVPVp/Xz8VQb0uoVbYzrRNsQ1eXzmuejy5csUFha6pE+JxiO/sIizcVdQKhQ8MXkMrVv5uqxvm87QlsCthER9MMuvpaam8vbbbzNlypQmtkiiOklpmWzffQA/Hy8MBiM6vZ7QoKpP6/mFRfx04DiCIDBjyphGl84ys3r1avR6PefPn+eLL76o87qWEPsgAYeOn8VoFLmlS0eXOkKwwxn27duXpKQksrKyEEXRFTZJ3ESEhYXx+OOP88ADD1BRUSFJ6DUzktMy2f7DL1RUGLi9axQjh/QlLze3xt68Sqmka1QECAKeHu4us8/8Znjo0CGr17WE2IebnfSsXOKvJqOQyxnY+1aX92/TGS5dupTi4mJOnjxJjx490Gg09OnTx6nOSkpKWLBgAQUFBUyePLnGk9ndd99tGayLFy+mU6dOTvUj0XJ46623qKioQKFQYDQa+fjjj5vaJIk/SU7PsjjCbp07MHJIX2S1vPElpmRw8Nhphg3sRWiwawPYVq9eTWRkJN7e1isaNKfYByliuiaiKHLwt9MA9LilM96eHi63waYzvHTpEhs2bGDatGmsXLmyhrKDI2zZsoV7772X0aNH8+ijjzJ69GhUKpXlvL+/f51JsxI3LgqFaRjKZDKbk5qEa0j50xHq/3SEo4b2q9URApz4I47MnHxSM7IJc9FeoZmOHTui0+m4cuUKQ4cOrfO65hT7EBzcuHX5WiLXU9JJTs9C7aZqsmpDdlW6NxqN+Pj48PXXX5OYmOh0Z6dOnWLx4sXI5XKio6O5du1alejBwsJCHn74YSIjI3n11Vdxc3NeuUJCQsI5UjOy2fbDL+j1FXSNirDqCNOzcrln2ADOx1+jezfrOpGNwfHjx1EoFHTt2tXqdfbEPjhSJzM+Pp7FixcjiiJLliyxKTBfmR9//BGAUaNG2d3mRsYoihw8ZirP1bd7V9RNNO/bdIYrV67EaDSyZMkSdu7caakG4AzFxcV4eZkkdby8vKpsZgN89dVX+Pn58cknn7B582YeffRRq/czGAzk5ORYPldUVFT5bA/OtLnR2znblzNIgQ3Ni9SMbLZ9v9/iCO++s3+djlCj1bL9h/0o5HKmPDAKRRPs9z766KP88ssvdO/e3ep1tmIfrBUSXrhwIUqlktTUVF5//XXWrl3L6tWreffdd5HJZCxZssSh5f2tW7cCkjM0E3flOtm5BXh7edDjls5NZodNZ+ju7s7nn39OYWEh8+bN4+DBgxYR1LrIzs62aEOaad26Nd7e3pSUlODm5kZpaWmNJTFzhYGRI0cSGxtr03i5XF5lI1+qLtAw7aq3yc7Odrhfe5ECG5oPaZkmR6jTVxDTybojBCjXaPHx8kSlVOLlwqCZynz//fc8+eSTfP7551a3cGzFPjhaJ7OwsJDQ0FAASxUVCcepMBj49bipQPeg3rc2yQOVGZvO8IUXXmDq1KmsWbMGhULBunXrGDZsmNU2gYGBte79ffHFFxw5coR77rmHixcv0rFjR8s5nU4HgEql4uTJk5aadY5QVFTk8MRtMBicmuxv1Hb+/v4ujehsToENNzNpmdn8b5fZEbbn/w2z7gjzCor4/cxFHrh7CHK53GWpFNWJiIjAw8PD5nxhK/bB0TqZ5pSg6j/XReVVLLMYfUOvvrhyRae+mG09d+kaeQVFtPL1IqiVd5Pab9MZajQahgwZwmeffQZQr/SKCRMmMH/+fNavX8/EiRNRqVQcOHAAo9HILbfcwsyZM/Hw8MDHx4e3337b4fuXlZURExPj0GSu1+stT36OcCO2MxgMxMfHu7TwaXMKbLhZScvMsTjC6Mh2/D8bb4QAB387zZXEVORyOcMH93aRpTUx607ef//9Vq+zFfvgaJ3MysWDqxcSrqt/82qLWm3Sa21o6ciWJEeZk5ODl7cP568koVAoGHFH3wabd5xdybLpDCMiIli7di2FhYXExsbaLKZoDS8vL9asWVPlWGV9vu3btzt9bzNSnprzNMXvThJ1aFqycgvYe+S0xRHeM2yAzcldo9UxpH93FAo5/Xvd4iJLaychIcGuJXZbsQ+O1sn09fUlIyMDQRAscRBNhbnQbatWraxed+3aNdasWcOIESMYMWJEg/VryydcvnyZQ4cOcf36dZ599ln8/f0B+P3MRTRaHW1Dg+gQ3qbe9tQXm480S5YsISoqijFjxtCuXbsa5XNuVN5///0aAT7V2bVrV41jJ0+eJD093eH+3nzzTa5evVqjuG1tdvz8889oNBrefPPNOu+3bds2y71qs7O50LdvX0JCQpDJZJaKAhKNT0FRCR/EbuXzLd+TmpFN+7Bgm46woKiET7/6hvdit/H+F1vpeUuXJq9KYa0+XWXMsQ+xsbFMmzaNjIyMKucr18mUyWSWOpkAzz33HNOmTWPWrFk888wzADzzzDPMmzePZ599lrlz5zpk84oVK+oViFgdc6FbMD0cxMbGWv5UXu7t0KED48aNc3m/UVFRBAQEkJOTY1mlKi3TcOLcJQCG9Lvd8v9++fLlNZao62L//v3MmjWrwb6PXfUMQ0JCcHd3RxRFjhw5YvcAbEr+9re/1Tg2cuRIJkyYgEajsQxgo9GITCaroYd5/fp1duzYQc+ePXnvvfeYOHEi3t7enD9/noyMDJ5//nlOnTpFQEAAu3btonXr1vTq1YuMjAzc3d156623GD58OCdPnmTx4sUsX76cTp06sW/fPkuNvczMTNauXUtERARger0vLi5m//79eHt706VLF4sdt912Gx999BETJ07kwoUL9OnTh6tXr7Jp0yaSkpKYOnUqe/bs4bHHHuPNN9/Ew8ODtLQ0vLy8OHXqFH369OHTTz/Fx8eH3r17c/z4cVq1asWlS5eYN2+e5WnN1TSkqIOEfYiiSOz/fUdqxl/LSfmFxTbfCLfs/JnUDNOeTklZOTt//pWZU60vTzYX7Il9cKROZnR0NBs3bnTKFnOgYEPgSKHbhsTRfseOHYuPjw9paWl06dKFUxcuU1FhIKpDOKFBfy3tWqtgP2jQIMtb6IULF9BqtU7FltSFzTfDWbNmsXHjRn7//XdOnDjBiRMnGqzz5kxERARjxozB29ubHj16MGzYMEpKSvDw8Kjx5DJgwAAeffRRTp48aTkWEBDA2LFjUSqV5Obm4uXlxaRJk6osqZw4cYKRI0dy3333Vbnfrbfeik6no7S0tFY7KvcxefJkNBpNlTcqo9FI+/btGT58OCEhIYApH+vOO+/kb3/7G3v27AFg9OjRDBkyhISEhIb7xTnIpUuXePvtt2nTpg0rV65sMjtuFrJy8tn87R5SMrJAMFWfCGjlS2ZOvtV22bn5XLmeitFoxN/PB39fH5ttXMHq1atZtWoVq1atsozr2jDHPpi3AppSWnLHjh3s2LGj3vcxF7qdOnWqJYgnMjKSxx57zPKn8hJmdnY2u3fvZu/evaSmprqs3wMHDvDpp5+yd+9eFEo3Ply3lUPHz5GTV8BtMX8V5TU72GvXrtm04cCBA6Snp3Px4kUuXLjg9HepjF0BNOaKwi0Ja5UP1Gq15XxdASYdOnRg06ZNDB482KKSEx8fT2hoaI3oMYVCYSlDZKby/ltAQADFxcVs3ryZ0tJSy/FevXqxfv16S3FbM0VFRahUKuLj4y129O/fv4paD0Bubi6bNm3Czc2NwMBALl++zJYtW9Dr9bRr145du3YxYcIEAMub4cmTJxkxYoQlWVkQBLui4RqLhhR1kKibco2WX4+f5WzcFUQR1G4qZIIMD3c3q/XiKgwGLl9NJqh1K+RyGRqdDne1igqjwSU15mwxcOBAwOTc+vbtW+d1DRn7UF/MjnDMmDH1uk/1Qrc5OTmWAswZ2XmEBPoz/t678PMxPYAHBgbaVRDX0X5tMWTIEO644w6S0zJZt/V7CotLQABBENhz8LjdFewrY14eTU1NtSm4YC+CaOMRaceOHfzxxx9Vov3Gjx/fIJ3Xl4sXL1bJeTx37hy33OLYhr6rojt/+eUXUlJSMBgMNsUEGqI/Z9tdvHiRwMDAKlFp1X/PDUlWVhb+/v7k5+ezc+dO+vfvb7OvxlQKOXXqFD169KhyrCXnrxqNRs5eTODX38+i0eqQyQR6dOtM184RfPvjIdIysmkTElhl0jQjiiKbv91DamYO9w4fiNFo5NfjZ0nPzKmzjSM21va7dpQjR45gNBrZv38/r776qtVr9+3bR0JCAh07duSuuxq/Pl5lKv8fMm/hNHSpspycHLb+cIjElHTUbioEQaBNcGtmTBnboP04QoXBQNyVRE6eu0R2bgHpWTnIBBlubkp8vDzR6vT8Y+5jDdqns/OVzTfDjRs3MnToUIt+pIRzmHUT9Xp9E1vSvHBU1MGVSiEtneT0LPYfPkFWbgEA7cOCuXNgL0tpnBlTxtbqRA0GA7+dvoCXhztdO3egtFyDt6cHYSGBdI3q0KxC+BUKhd1Lni0x9sFRMrNzqTAYKC3XgCiSkJhGUmoG4W2CXRqcVlqu4cyFy5y5cIWyclNepYe7muDW/pRrtKiUCjQ6XbNYXTBj08P5+fm1qKRog8EgpVc4icFgcHmfjoo6SEohtikuLePA0VPEJSQB4OvtydD+PegU0dbmhKjRaknLzOHIiXOolApmTh1LTFQEymb6MFxRUQHAjBkzrF43a9YsgoKCqqRh3IjOMCwkkJT0bAwGA+VaLXJEtny3D38/H7p3jaJr5wjcqm23NBT5hcV89fWPlsAsPx8v5HI5QQF+9Ly1C10i21NSWs7W7/ZWWZFoLtgc4YIgtJikaA8PD+Lj4x1q46zzvFHbuTqq1FFRh8ZQCtm8eTObN28G4KWXXqqhgtFSNG8zMzP549J1zsQlUFFhQC6XcXt0JLdGd0Ahl5Obm1tnf6IocuZiAmfjrjJm+ACiO7alXZsgSoqLm/y7WcMc2v/f//6XwYMH8/DDD9d6XUuNfXCU8ffeZdkzDAsNJCoinCuJKeQVFLH38AkOHjtNTFQEt3eNIijAel6iPRSXlpGYkkFSagaHT/yBRquzxE9o9Xoeu38kbUODLL7Dz8erzhWJpsamM5w+fbor7GgQfHx8WuzeTnNq50pJJEcDGxpDKWTSpElMmjQJMO1jVf99Nec9w4KiErZ8t5ek1AwEBLw83ZHL5cREdWBo/x74enva7A+ZaanRiAwRgaIyLfeOuKPBbKyrXfXAMWcwP5gfOXKEvLy8Oq976KGHWL58ebOIfXjvvfca7d5mZ1OZOwf25GpiKqfPXyYpLZOzFxM4ezGBNsGt6d41iqiO4XZrgmp1epLTMklMNTnAvIK/HpY0Wh0KuRy1mxvuahX6CgPhbVpOuSq7Kt1LSDQWS5YsYd++fRZRB1uKNK5UCjFH5lkLMnE1+ooKCotKKPjzz08HjlFQXGJZ4hYEgScm30f7sBCr9zF/t6TUDCoMRjqEt2HquFFEd2pPuxY0gW3fvp1x48bZXPJsTrEPZjk2VyGXyYjqEE5Uh3By8ws5e/EK5+OvkZaZQ1pmDh5HT3Jrl0hujelU4+HJYDSSkZVLYkoGiakZZGTlYqy0eqNSKghvE0z7sBCOnDxHbn4h7mrrEcrNlaYfGRI3PY4ENlRWComOjrYohcyePZvnnnuO4uJiDAaDpWqKWSlEFEUWL17skF0btu/menI6crmMq0mpfL5pB9PG30NAK1+b2p2OUtnxhgQFMGxgLwxG41+Or7CYguISSkrLq7TLLShEEATkMhmeHu7IZDKrjrCkrByVUsnnm3aQnVuAj7cH+UUl5OQVoFQoWpQjBOxWVGlOsQ9btmwBsKQ9uZKAVr4MG9iLQX1uI+5KImcuXCYrt4DfTl/g2JkLhIUEkpGVS05+oaUaSWXnJxMEwoJb065tCO3DQggJCkD+54pLh3ZtLEu0oYEBzWo/0B5atDNMTEzk3//+t+WzRqNhzJgxNVRmKjNmzBjGjBlDQUEBL774IhqNpsqT2vjx4xk1ahSZmZksWrSoRvtHHnmEIUOGkJyczCuvvFLj/IwZM+jbty/x8fG1JpE//PDDDB06lLNnz/LBBx/UOL9gwQI6d+7MsWPHLPto5u+mVqt59dVXad++PQcOHGD9+vU12r/xxhsEBwfz448/snXr1hrfb8WKFfj5+dWZ+GtewtmyZQs//fQTAPPmzatxXUPhTGCDq5RCcvIKkckEDEYjBiOkZeXw363fo1IqCAkMIDQ4gNCg1oQEBTgkS2YURUrLyikoKvnT2RXzy9FTFJWUIRqNxF9LBYXs/gAAEi1JREFU5lpyOq39ayqVyGUyfLw98fPx/vMtVaSouBS1m4oKo5GQQNOeryiKiKJIamYOSSkZRHUI5/iZC8QlJDF25GBy8gupMBrQaHUE+vuh1elRKZvfdFBXGk1tKTPW0miaU+yD+f9VUzhDMyqlkttiOnFrdCTpWTmcPn+F+GtJnD5/GX1FBYIgUK7RUlauoXPHdrQPC6F92xDahgbhpqo9Vau2JdqWRPMb/RI3Fc05sCEowI9ryeUYjUYEBHx8PPH19qSwuJSktEyS0jIt1/r5eBEaFIBKqWTfkZNoNFrc1W6MHj4ItUppWdYsLC6hqLiUimqRu3kFRX86MNPEbTAYiYpoi6+PF618vfH18cLPxwtvL0/LW6koikRGhPHZxm/JzitE7aakz+1dOXb6AsfPXOSOvreTnVvA6QuXUamU+Hh74aZSotHqCA0KIDuvALWbqtmFuJuxlkZTW8qMtTSalhT74EpMuYiBtAkOZGh5D9764L8IgoBCIUelUGAURR6feC/Lly8nfMKEOh2hmd9//52TJ0+SkJDAyy+/3KDSc41Ni3aG7du3r5K4WnmDvrLKTG34+fmxdu3aOoMBgoODrbYPDw+3er5z5861njcHp9x2221W2/ft27fKfm11O4cMGVKl4kd1Ro0axahRo+r8fuY35NooKSlhwoQJlifX6uLhDUlzCmyogQBG0YjRKCKTmaTLCotLiYmKQCYI/HbqPD5enmh0Oi5dTSIpLZPikjJLRGxpuYYtO3/Gx9sThVxOcWkZ7mo3FHI5pWUaAgP88PfzITktE5lMhqHC5CBFUUQul3H5egq3d+2E0Siy9bt93N61E61b+fHzr79bft68Yw8yQcDLQ02ZRssP+48wYnAfNFoduflFRLYPQ6VSEhbSmqCAVgzqcxsyQSC8TXCzDXE3Yy2NpraUGWtpNFLsg2083dW0bxtCenYu7m6mfb82f+qG2qsZ2rt3b3r37s2HH35IcXGx5AwlJOylOQU2gOlJ2fzAkp2Tj5eHO6XlGtQqJUXFpXh7eVCh09HK1wu1m4rIdqH4+Xhx+OR5OrUP48S5eHR6PWqVSQGkXKulW6f2tPL14sS5y3TpGI6/nw9HT10gqn0bWvl6kZWTR7lGjlqlolxrSkgWBIGKigrKSssoVsjq/Fmvr8BdrUKpVKAyGCkuLSM4wIeH/t9gPN3VCIJATMcwAAoKCqp81wdGDqSiogKFQkGFTkNOjsau31FDpVYUFhZWCYCqHNUL1tNoakuZcbTgrkRNKqdmmPf9HBXl3rFjB+Hh4Q0qou0KmscMJHHT0pwCG8D0VmZ+k24TEkh6Vi4Bfj7oKwyEBAUwY/Jfb9OD+/W0/HxHf9PPZ+IS0Or0aHV6jKKIp4eaCWNMteNGDOlvuX5I/55V7vPZph1kZOXi6aFGX2EgNCigyv7LHf171frzpWuppGfnopTLwV0gNDCAtmH214ZryrQRX19ftm3bVuf11tJoakuZcbTgrkRNqu/7OaoZ+v333/P1119zxx13kJqaSlhYmEvsbghsapM2Z06fPo2bm1tTm3HDo9Vq6d69e6Pce86cOQDNIrABTJVEzBOpwWikpFRjES7w8lRbIufqwmAwUFyqwWA0IpfJ8PZU2yV64Exf9WnXHDAajfTq1avO8+fPn2fz5s0sXbqUJUuW8OCDD1qWSefMmcOiRYsQBMGyP1jbseaCNFe5DmfnqxbtDCVaPmYFkcpI+zsSZpYtW8aFCxeIjo5m9uzZbN26ldmzZxMXF8frr79uSZmJiYmp9ZiEhL1IzlBCQkJC4qanZaynSEhISEhINCKSM5SQkJCQuOmRnKGEhISExE2P5AwlJCQkJG56bghnmJmZybhx47j11lstxT7t4cyZM0yePJmpU6eyfPlyu9rEx8db2rz88st2V9k288UXXzBlyhS7r09JSWHgwIFMmzaNJ554wqG+vv76a6ZPn860adPIzMy0ef2BAweYNm0a06ZNY/DgwezZs8eh/m40nBlXzowpaDnjytExBdK4cgXLly9n6tSpLFu2rKlNsYmz83WjI94AaDQasaCgQHzkkUdEvV5vd7usrCxRo9GIoiiKzz//vBgXF2ezjU6ns/z80ksviWfOnLG7P61WK7744ovi5MmT7W6TnJwszp8/3+7rzWRkZIgvv/yyw+3MjB8/XiwpKXG6/Y2AM+PKmTElii1jXNV3TImiNK4ag3Pnzon/+Mc/RFEUxddee82hsdMUODtfNzY3xJuhm5sbvr6+DrcLDAy0JMIqFAq7kqOVSmWVn81aiPawZcsWHnjgAYft/O2335g6dSqxsbF2tzl48CBGo5Hp06fzxhtvWOrd2UNycjIBAQF4elovDHuj48y4cmZMQcsYV/UZUyCNq8aiNg3X5oyz83Vjc0M4w/oSFxdHfn4+nTp1suv6n3/+mfvuu4+8vDy7hWj1ej3Hjv3/9u4/Jur6D+D4E7wQCOdgo2vgr1bRgf1ALCB1wgQXm5szCEQMKg4LW/YDIVnZxlZkm7bM/jgVkThKCBPmtA0U0xBRWFBWxFkpNMF+UEbQUDiO+/7B7vPluAPvIEzg9fjrOD7v1/t9n3vt3q/P5+D9rr/h9kTD3XHHHVRWVqLX66mtrcVgMDjU7s8//8RoNFJYWIi7uzsnTpxwuM9jx46xcuVKp8YprDmbU3Dr59V4cgokryZKV1eXsnH1rFmz+Pvvv//jEU1O034y7Ozs5M033yQ3N9fhNlFRURw9ehS1Ws2pU6ccanP48OERd4kYjZubG56enqhUKiIjI60WKx6Nl5cXjzzyCADh4eFcvHjR4T5PnjzJihW33i4Gk8VYcgpu/bwaT06B5NVEGW0NV+G4aT0Z9vf3k5WVxauvvoqvr69Dbfr6+pTHXl5eDq832NLSQnFxMVqtlp9++omioiKH2g3diqaxsZF58+Y51C4kJIQLFy4Ag1swzZkzx6F2HR0d3HbbbXh7ezt0vLA2lpyCyZFXY80pkLyaSMHBwZw7dw6A2traCVtHeKqbErtWGI1GNmzYgMFgQKvVkpGRwUMPPXTDdhUVFXz77bfKjvQZGRksWrRo1DbV1dXKdyzz589n2bJlDo0xKytLebxu3TqSk5MdatfQ0MD777+Pm5sbISEhDr0ugMDAQNzd3UlOTsbb25unn37aoXYnTpwgKirKoWOnurHk1VhyCiZHXo01p0DyaiItXLgQNzc3kpKS0Gg0ymLmt6qxfl5PNFmbVAghxLQ3rW+TCiGEECCToRBCCCGToRBCCCGToRBCiGlPJkMhhBDTnkyGQgghKCsro6ysjI6ODnQ6nd1jmpubaWpqcireUE1NTaxZswadTkdubi4mk4m6ujouX7487vGP15T4P0MhhBAjGxgYwNXVsWsfX19fNm7caPd3zc3NmEwmFi5cOKZxnD59moyMDJYvX648V19fz+LFi5k7d+6YYv5bJt1k2NzczHfffUd8fPxN77utrY3Lly87vQ7krT6O/Px8Hn30UYKCgti/fz9nz54lLy8PGFxjs6amhrS0tH+lr1uV5JXk1USpq6ujsLAQs9lMZ2cn69at49ChQ8ycOZM9e/YAkJOTQ0tLC+7u7mzfvp3r16+TmZlJf38/9913Hzk5OdTV1fHhhx8qcfLz860WPU9OTub++++noaGBuLg41q5dS3Z2Np6enrS2tpKfn2/Tj4eHBy+99BJ9fX3Mnj2bZcuW0dbWxs6dO9mxYwdVVVXs3buXmTNnsmnTJkpLS/nrr784d+4cO3bscCiexc8//0xpaSleXl709PTw8ccfk5+fT3l5OcePH2fJkiUEBARQU1NDd3c3AHv37qW3t5fXXnuNP/74Ax8fH7Zv386pU6fYs2cPnp6eaLVaZs2axbZt2/Dw8GD16tU88cQTTr9Pk+42aWBg4L/2gTUwMODU8e3t7cqyRzeLvTE6O47RXufAwACNjY0EBQUBkJqaitFoVJbr0mg0fPXVV06fq8lG8kryaiKZzWZ0Oh0RERF88803FBYWolar+f777zl58iR+fn7o9XrWr19PSUkJ3t7eFBQUUFxczD///ENra6sSa/fu3URERHD27FmbfqKjoykuLqa8vFxZ4i8kJIT9+/fb7aeqqooHH3zQZmKFwfdQp9Oh1+spKiri4YcfJiEhgbS0NN59912n482fP5/HH3+c7OxsYmJiAHB1dVWey87OBsDHx4e8vDzUajUXLlzg4MGDrFixAr1eT2hoKJWVlVRWVrJz5070ej3Lly/niy++IDMzE71eT1xc3Jjeo0l3ZVhXV0dtbS1LliyxqZK2bdvGM888w913341er8fX15eYmBir6iU+Pp5Dhw4Bg8tXDa8o4uLibKody3YjpaWlNDY28vXXXyuV3vDY5eXlI1aA9fX1FBQU0N/fT19fH7t27WL27Nk2/RkMBgoKCpQxajQaqyqxu7vbahxlZWWYTCbi4+P54IMPCA0NBbCK8fnnn9t9TQaDgTvvvFM5v2azma6uLlpaWnjggQeAwSRubm4e862RyUDySvJqIgUEBACDu4X4+Pgoj7u6urh48SKfffYZNTU19Pf3ExwcTGdnJzk5OXR3d9Pe3s7vv/9uFUetVitXT0MFBQUxY8YM/Pz8uHr1KoByfu314+LiohQsw9+Hq1ev4u/vj7u7O4DNbVZn4zl7rtRqtXJ+mpqa+OSTT+jt7WXVqlVs3LgRnU6HyWQiPT2dpKQkdDodn376KcnJyWNakm7SXRkON7RKeuyxx6ioqAAG701HRETYVC+XLl3CaDQq7YZXFPaqHYuEhARWr15NYWEhgN3Yo1WAAL29vezbt4/ExERKS0tH7G/oGIdXiWFhYVbjGIklhslkGvE1tba24u/vr/x8+PBh+vv7uXTpkvLc3LlzrX6eDiSvRiZ5NT4uLi7KY7PZzF133cWaNWsoKiqiuLiYjIwMjh49SnR0NEVFRSxatAh7q2bae85gMGAymbhy5Yoy6VomMXv9zJkzR9m+y5JLFj4+Ply5coXe3l5g8EpRpVIp+1g6G28kQ2OOdH7S0tIoKiqitLSUpKQk/P39yc3NJSEhgYKCAqX4y8zMZNeuXQ71azOOMbW6RQyvkiIjI8nLyyMxMZHbb78dT09Pm+qlr6+PpUuXKjGGVxT2qp2R2IttuUdurwJ0dXUlMDAQGLxNdObMGVxcXOz2N7SqGl4lLliwwGocw5PHYrSK0J5r165x5MgRsrKy+PLLL61iDu1jqpO8GiR5dXNERUXx1ltvkZKSAsBTTz1FeHg4W7ZsoaqqyqlYFRUVvP3228TGxuLm5nbDfqKjo3nxxRfRarU2Wz+5urry3HPP8eSTT+Lh4cELL7xAcHAw2dnZ/Pjjj2zdutWpeCMJDQ3lvffe4/z58/j5+dn8fu3atWzdupUDBw5gNpvZvHkzx44d4/z58/T09LBlyxZKSko4fvw4PT09bNiwwalzZjGpJ8OhzGYzKpUKf39/9u3bR3R0NPD/6iU1NRWAM2fOUF9fr7SzVBS//fYbr7/+OomJiVbHG41G5ViVSmX1HceNYtv7MLFsgWMwGJg3b55NDKPRSGNjo1VbS5UYGxvL5s2bmTFjhlKtweCGnpa4P/zwA2FhYVb92+vDYsGCBTQ0NACQl5dHamoq99xzDwcPHlSOaWtrY9WqVTd4B6YmySvJq/EKCwtTzl1sbKzy/KZNm5THb7zxhk27I0eO2I01PM5QWVlZqFT//1h/5513lMcuLi52+9m9e7fNc5ZdV1auXGmzIfOBAwdGHbe9eBZDX7Nlu7HFixfz0UcfjXqsZTwWw3e5CAsLc2oXFXsm/W3S4WJiYigpKSEyMhIYrIba29tJSUkhJSWF69evWx1fUlLC+vXrSU9PJzY21ub46upq5diAgAAaGxt5+eWXHYptj0qlQqvVUlxcTEJCwqj9WYSHh1NQUMDzzz/PtWvXuPfee63GER4ezunTp0lPT7fb52h9aDQafvnlF3799VdaW1tZunQparWajo4O5ZiWlhblymO6kryyJXklphLZwukmsvyRxiuvvPJfD8XK0D+BH85gMFBdXc2zzz77H4xMOELySojxmzK3ScXYabXaEX+n0WjQaDQ3cTRiqpC8EpOJXBkKIYSY9qbcd4ZCCCGEs2QyFEIIMe3JZCiEEGLak8lQCCHEtCeToRBCiGlPJkMhhBDTnkyGQgghpj2ZDIUQQkx7/wOs4aWdsO2fDwAAAABJRU5ErkJggg==\n", 80 | "text/plain": [ 81 | "
" 82 | ] 83 | }, 84 | "metadata": {}, 85 | "output_type": "display_data" 86 | } 87 | ], 88 | "source": [ 89 | "# plot Fig. 5\n", 90 | "save_fig = True\n", 91 | "fname = '../figures/060822/fig5-aav.pdf'\n", 92 | "cmain = 'slategray'\n", 93 | "\n", 94 | "# single-column width: 3.42 inches or 8.7 cm\n", 95 | "# double-column width: 7 inches or 17.8 cm\n", 96 | "# maximum height: 8.85 inches or 22.5 cm\n", 97 | "# small: approx 9 cm x 6 cm (3.54 x 2.36 in)\n", 98 | "# medium: approx 11 cm x 11 cm (4.33 in)\n", 99 | "# large: approx 18 cm x 22 cm (7 x 8.66 in)\n", 100 | "fig = plt.figure(figsize=(7, 1.6))\n", 101 | "gs1 = gridspec.GridSpec(1, 3, figure=fig, wspace=0.65)\n", 102 | "gs2 = gridspec.GridSpecFromSubplotSpec(2, 1, subplot_spec=gs1[1], hspace=0.2)\n", 103 | "ax1 = plt.subplot(gs1[0])\n", 104 | "ax21 = plt.subplot(gs2[0])\n", 105 | "ax22 = plt.subplot(gs2[1])\n", 106 | "ax3 = plt.subplot(gs1[2])\n", 107 | "\n", 108 | "# ===== true fitness =====\n", 109 | "\n", 110 | "nnkmeanfitness = -0.7716402509409155\n", 111 | "ax1.plot(lambda_l, truefit_l, '-ob', linewidth=2, markersize=4, alpha=0.8, c=cmain, label='__nolegend__');\n", 112 | "ax1.axhline(nnkmeanfitness, linestyle=\"--\", alpha=0.8,\n", 113 | " color='k', label=r'training distribution')\n", 114 | "ax1.set_xticks(lambda_l);\n", 115 | "ax1.set_xticklabels(lambda_l);\n", 116 | "ax1.set_ylabel('mean true fitness')\n", 117 | "ax1.set_xlabel(r'inverse temperature ($\\lambda$)')\n", 118 | "ax1.legend(fontsize=6, loc=(0.05, 0.1));\n", 119 | "ax1.set_title('(a)')\n", 120 | "\n", 121 | "# ===== coverage =====\n", 122 | "\n", 123 | "ax21.plot(lambda_l, cov_l, '-o', linewidth=2, markersize=4, alpha=0.8, c=cmain, label='__nolegend__');\n", 124 | "ax21.set_ylabel('emp. cov.');\n", 125 | "ax21.set_xticklabels([])\n", 126 | "ax21.set_ylim([0.85, 1]);\n", 127 | "ax21.set_yticks([0.85, 0.9, 0.95, 1])\n", 128 | "ax21.set_xticks(lambda_l);\n", 129 | "ax21.axhline(0.9, linestyle='--', alpha=0.8, c='k', label=r'$1 - \\alpha$');\n", 130 | "ax21.legend(fontsize=6)\n", 131 | "ax21.set_title('(b)')\n", 132 | "\n", 133 | "# ===== confidence set sizes =====\n", 134 | "fmax = 8.798749497001769\n", 135 | "fmin = -7.530085215864544\n", 136 | "frange = fmax - fmin\n", 137 | "\n", 138 | "navglen_lxt = avglen_lxt / frange # report average size as fraction of total range of fitness values\n", 139 | "navglen_l = np.nanmean(navglen_lxt, axis=1)\n", 140 | "ax22.plot(lambda_l, navglen_l, '-o', linewidth=2, markersize=4, alpha=0.8, c=cmain);\n", 141 | "ax22.set_ylabel('mean size');\n", 142 | "ax22.set_xticks(lambda_l);\n", 143 | "ax22.set_xticklabels(lambda_l);\n", 144 | "ax22.set_xlabel(r'inverse temperature ($\\lambda$)')\n", 145 | "\n", 146 | "# fraction of confidence sets with infinite size\n", 147 | "ax22inf = ax22.twinx()\n", 148 | "ax22inf.plot(lambda_l, np.mean(fracinf_lxt, axis=1), '--o', dashes=(1, 0.5), linewidth=2, markersize=4, alpha=0.8, c=cmain);\n", 149 | "ax22inf.set_ylabel(' frac. inf.', fontsize=5.5, rotation=270, labelpad=-5);\n", 150 | "ax22inf.set_yticks([0, 0.16]);\n", 151 | "\n", 152 | "\n", 153 | "# ===== trade-off with certainty =====\n", 154 | "\n", 155 | "nnkmeanpredfit = -0.4723137617111206 # computed in aav-experiments\n", 156 | "ax3.plot(meanpredfit_l, navglen_l, '-o', linewidth=2, markersize=4, alpha=0.8, c=cmain);\n", 157 | "ax3.set_xlabel('mean predicted fitness');\n", 158 | "ax3.set_ylabel('mean conf. set size', labelpad=-0.1);\n", 159 | "ax3.set_title('(c)');\n", 160 | "ax3.axvline(nnkmeanpredfit, linestyle=\"--\", alpha=0.8, color='k', label='training distribution')\n", 161 | "ax3.legend(fontsize=6, loc=(0.1, 0.78));\n", 162 | "\n", 163 | "xoff = [-0.15, 0.06, -0.18, 0.08, 0.08, -0.4, -0.4]\n", 164 | "yoff = [0.002, -0.003, 0.004, 0, 0, -0.001, -0.001]\n", 165 | "for l, lmbda in enumerate(lambda_l):\n", 166 | " ax3.annotate(r'$\\lambda = {}$'.format(lmbda),\n", 167 | " (meanpredfit_l[l] + xoff[l], navglen_l[l] + yoff[l]), fontsize=6)\n", 168 | "\n", 169 | "if save_fig:\n", 170 | " plt.savefig(fname, dpi=300, bbox_inches='tight')" 171 | ] 172 | } 173 | ], 174 | "metadata": { 175 | "kernelspec": { 176 | "display_name": "TensorFlow-GPU-2.1.0", 177 | "language": "python", 178 | "name": "tf-gpu" 179 | }, 180 | "language_info": { 181 | "codemirror_mode": { 182 | "name": "ipython", 183 | "version": 3 184 | }, 185 | "file_extension": ".py", 186 | "mimetype": "text/x-python", 187 | "name": "python", 188 | "nbconvert_exporter": "python", 189 | "pygments_lexer": "ipython3", 190 | "version": "3.7.7" 191 | } 192 | }, 193 | "nbformat": 4, 194 | "nbformat_minor": 4 195 | } 196 | -------------------------------------------------------------------------------- /notebooks/fluorescence-experiments.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This notebook has the script for reproducing the experimental results shown in Fig. 3, 4, S2." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import sys\n", 18 | "import time\n", 19 | "from importlib import reload\n", 20 | "module_path = os.path.abspath(os.path.join('../'))\n", 21 | "if module_path not in sys.path:\n", 22 | " sys.path.append(module_path)\n", 23 | " \n", 24 | "import numpy as np\n", 25 | " \n", 26 | "import assay\n", 27 | "import calibrate as cal" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | "Using 92 order-2 features\n", 40 | "Loading estimated measurement noise SD computed using order 7 and significance level 0.01\n", 41 | "red, 384, 6. 100 trials. SCS, FCS coverage: 0.8800, 0.8600. 752.7 s\n", 42 | "red, 384, 6. 200 trials. SCS, FCS coverage: 0.9050, 0.8950. 1523.3 s\n", 43 | "red, 384, 6. 300 trials. SCS, FCS coverage: 0.9100, 0.9000. 2321.2 s\n", 44 | "red, 384, 6. 400 trials. SCS, FCS coverage: 0.9050, 0.8975. 3115.3 s\n", 45 | "red, 384, 6. 500 trials. SCS, FCS coverage: 0.9040, 0.8960. 3886.5 s\n", 46 | "red, 384, 6. 600 trials. SCS, FCS coverage: 0.9083, 0.8967. 4682.9 s\n", 47 | "red, 384, 6. 700 trials. SCS, FCS coverage: 0.9129, 0.9014. 5508.1 s\n", 48 | "red, 384, 6. 800 trials. SCS, FCS coverage: 0.9100, 0.9012. 6339.8 s\n", 49 | "red, 384, 6. 900 trials. SCS, FCS coverage: 0.9156, 0.9056. 7130.1 s\n", 50 | "red, 384, 6. 1000 trials. SCS, FCS coverage: 0.9170, 0.9060. 7918.1 s\n", 51 | "red, 384, 6. 1100 trials. SCS, FCS coverage: 0.9182, 0.9082. 8694.7 s\n", 52 | "red, 384, 6. 1200 trials. SCS, FCS coverage: 0.9183, 0.9092. 9461.5 s\n", 53 | "red, 384, 6. 1300 trials. SCS, FCS coverage: 0.9192, 0.9092. 10214.7 s\n", 54 | "red, 384, 6. 1400 trials. SCS, FCS coverage: 0.9207, 0.9107. 10968.0 s\n", 55 | "red, 384, 6. 1500 trials. SCS, FCS coverage: 0.9233, 0.9140. 11721.7 s\n", 56 | "red, 384, 6. 1600 trials. SCS, FCS coverage: 0.9237, 0.9144. 12484.3 s\n", 57 | "red, 384, 6. 1700 trials. SCS, FCS coverage: 0.9259, 0.9165. 13244.3 s\n", 58 | "red, 384, 6. 1800 trials. SCS, FCS coverage: 0.9244, 0.9150. 13986.6 s\n", 59 | "red, 384, 6. 1900 trials. SCS, FCS coverage: 0.9253, 0.9163. 14725.1 s\n", 60 | "red, 384, 6. 2000 trials. SCS, FCS coverage: 0.9270, 0.9160. 15472.0 s\n" 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "reload(cal)\n", 66 | "reload(assay)\n", 67 | "\n", 68 | "alpha = 0.1 # miscoverage\n", 69 | "n_trains = [96, 192, 384] # number of training points\n", 70 | "ntrain2reg = {96: 10, 192: 1, 384: 1} # ridge regularization strength (gamma in code and paper)\n", 71 | "n_seed = 2000 # number of random trials\n", 72 | "lmbdas = [0, 2, 4, 6] # lambda, inverse temperature\n", 73 | "y_increment = 0.02 # grid spacing between candidate label values, \\Delta in paper\n", 74 | "ys = np.arange(0, 2.21, y_increment) # candidate label values, \\mathcal{Y} in paper\n", 75 | "order = 2 # complexity of features. 1 encodes the AA at each site,\n", 76 | " # 2 the AAs at each pair of sites,\n", 77 | " # 3 the AAs at each set of 3 sites, etc.\n", 78 | " \n", 79 | "# likelihood under training input distribution, p_X in paper (uniform distribution)\n", 80 | "ptrain_fn = lambda x: (1.0 / np.power(2, 13)) * np.ones([x.shape[0]])\n", 81 | "for fitness_str in ['red']:\n", 82 | " \n", 83 | " # featurize all sequences in combinatorially complete dataset\n", 84 | " data = assay.PoelwijkData(fitness_str, order=order)\n", 85 | " \n", 86 | " for t, n_train in enumerate(n_trains):\n", 87 | "\n", 88 | " reg = ntrain2reg[n_train]\n", 89 | " fcs = cal.ConformalRidgeFeedbackCovariateShift(ptrain_fn, ys, data.X_nxp, reg)\n", 90 | " scs = cal.ConformalRidgeStandardCovariateShift(ptrain_fn, ys, data.X_nxp, reg)\n", 91 | "\n", 92 | " for l, lmbda in enumerate(lmbdas):\n", 93 | "\n", 94 | " fset_s, sset_s = [], []\n", 95 | " fcov_s, scov_s = np.zeros([n_seed]), np.zeros([n_seed])\n", 96 | " ytest_s, predtest_s = np.zeros([n_seed]), np.zeros([n_seed])\n", 97 | " t0 = time.time()\n", 98 | "\n", 99 | " for seed in range(n_seed):\n", 100 | " \n", 101 | " # sample training and designed data\n", 102 | " Xtrain_nxp, ytrain_n, Xtest_1xp, ytest_1, pred_1 = assay.get_training_and_designed_data(\n", 103 | " data, n_train, reg, lmbda, seed=seed )\n", 104 | " ytest_s[seed] = ytest_1[0]\n", 105 | " predtest_s[seed] = pred_1[0]\n", 106 | "\n", 107 | " # construct confidence set under feedback covariate shift\n", 108 | " fset, _ = fcs.get_confidence_set(Xtrain_nxp, ytrain_n, Xtest_1xp, lmbda, alpha=alpha) \n", 109 | " fset_s.append(fset)\n", 110 | " fcov_s[seed] = cal.is_covered(ytest_s[seed], fset, y_increment)\n", 111 | "\n", 112 | " # construct confidence set under standard covariate shift\n", 113 | " sset, _ = scs.get_confidence_set(Xtrain_nxp, ytrain_n, Xtest_1xp, lmbda, alpha=alpha) \n", 114 | " sset_s.append(sset)\n", 115 | " scov_s[seed] = cal.is_covered(ytest_s[seed], sset, y_increment)\n", 116 | "\n", 117 | " if (seed + 1) % 100 == 0:\n", 118 | " print(\"{}, {}, {}. {} trials. SCS, FCS coverage: {:.4f}, {:.4f}. {:.1f} s\".format(\n", 119 | " fitness_str, n_train, lmbda, seed + 1,\n", 120 | " np.mean(scov_s[: seed + 1]), np.mean(fcov_s[: seed + 1]), time.time() - t0))\n", 121 | "\n", 122 | " np.savez('../fluorescence/{}_n{}_lambda{}_alpha{}_gamma{}.npz'.format(\n", 123 | " fitness_str, n_train, lmbda, alpha, reg),\n", 124 | " ytest_s=ytest_s, predtest_s=predtest_s,\n", 125 | " fset_s=fset_s, fcov_s=fcov_s, sset_s=sset_s, scov_s=scov_s, \n", 126 | " )" 127 | ] 128 | } 129 | ], 130 | "metadata": { 131 | "kernelspec": { 132 | "display_name": "TensorFlow-GPU-2.1.0", 133 | "language": "python", 134 | "name": "tf-gpu" 135 | }, 136 | "language_info": { 137 | "codemirror_mode": { 138 | "name": "ipython", 139 | "version": 3 140 | }, 141 | "file_extension": ".py", 142 | "mimetype": "text/x-python", 143 | "name": "python", 144 | "nbconvert_exporter": "python", 145 | "pygments_lexer": "ipython3", 146 | "version": "3.7.7" 147 | } 148 | }, 149 | "nbformat": 4, 150 | "nbformat_minor": 4 151 | } 152 | --------------------------------------------------------------------------------