├── .gitignore
├── LICENSE
├── README.md
├── aav
    ├── models
    │   ├── constrained_maxent_parameters.npz
    │   └── h100_0.npy
    ├── randomized-staircase-results.npz
    └── test_and_calibration_aav_data.npz
├── assay.py
├── calibrate.py
├── environment.yml
├── fluorescence
    ├── blue_n192_lambda0_alpha0.1_gamma1.npz
    ├── blue_n192_lambda2_alpha0.1_gamma1.npz
    ├── blue_n192_lambda4_alpha0.1_gamma1.npz
    ├── blue_n192_lambda6_alpha0.1_gamma1.npz
    ├── blue_n384_lambda0_alpha0.1_gamma1.npz
    ├── blue_n384_lambda2_alpha0.1_gamma1.npz
    ├── blue_n384_lambda4_alpha0.1_gamma1.npz
    ├── blue_n384_lambda6_alpha0.1_gamma1.npz
    ├── blue_n96_lambda0_alpha0.1_gamma10.npz
    ├── blue_n96_lambda2_alpha0.1_gamma10.npz
    ├── blue_n96_lambda4_alpha0.1_gamma10.npz
    ├── blue_n96_lambda6_alpha0.1_gamma10.npz
    ├── blue_noise.npz
    ├── red_n192_lambda0_alpha0.1_gamma1.npz
    ├── red_n192_lambda2_alpha0.1_gamma1.npz
    ├── red_n192_lambda4_alpha0.1_gamma1.npz
    ├── red_n192_lambda6_alpha0.1_gamma1.npz
    ├── red_n384_lambda0_alpha0.1_gamma1.npz
    ├── red_n384_lambda2_alpha0.1_gamma1.npz
    ├── red_n384_lambda4_alpha0.1_gamma1.npz
    ├── red_n384_lambda6_alpha0.1_gamma1.npz
    ├── red_n96_lambda0_alpha0.1_gamma10.npz
    ├── red_n96_lambda2_alpha0.1_gamma10.npz
    ├── red_n96_lambda4_alpha0.1_gamma10.npz
    ├── red_n96_lambda6_alpha0.1_gamma10.npz
    ├── red_noise.npz
    ├── supp_data_3.xlsx
    └── supp_data_4.xlsx
└── notebooks
    ├── aav-experiments.ipynb
    ├── aav-figures.ipynb
    ├── fluorescence-experiments.ipynb
    └── fluorescence-figures.ipynb


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Clara Wong-Fannjiang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Conformal prediction under feedback covariate shift for biomolecular design
 2 | This repo contains the code accompanying the following paper:
 3 | 
 4 | C. Fannjiang, S. Bates, A. Angelopoulos, J. Listgarten, M. I. Jordan, Conformal prediction under feedback covariate shift for biomolecular design. 2022. *Proceedings of the National Academy of Sciences*, 119(43), e2204569119.
 5 | [link](https://www.pnas.org/doi/10.1073/pnas.2204569119)
 6 | 
 7 | See `calibrate.py` for implementations of both the full and split conformal prediction algorithms we describe. `assay.py` contains utilities and classes for handling the fluorescence and AAV datasets, which are stored (along with relevant models and results) in `fluorescence/` and `aav/`, respectively.
 8 | 
 9 | Notebooks for reproducing and plotting the results of the simulated protein design experiments are as follows:
10 | - `notebooks/fluorescence-experiments.ipynb` shows how we ran the fluorescent protein design experiments, which uses full conformal prediction under feedback covariate shift, algorithmically optimized for ridge regression (Alg. S2 in the [SI Appendix](https://www.pnas.org/doi/10.1073/pnas.2204569119#supplementary-materials)).
11 | - `notebooks/fluorescence-figures.ipynb` creates Figs. 3 and 4 in the main paper and Fig. S2 in the [SI Appendix](https://www.pnas.org/doi/10.1073/pnas.2204569119#supplementary-materials).
12 | - `notebooks/aav-experiments.ipynb` shows how we ran the AAV design experiments, which uses a randomized version of split conformal prediction under covariate shift (Alg. S1 in the [SI Appendix](https://www.pnas.org/doi/10.1073/pnas.2204569119#supplementary-materials)).
13 | - `notebooks/aav-figures.ipynb` creates Fig. 5 in the main paper.
14 | 
15 | 


--------------------------------------------------------------------------------
/aav/models/constrained_maxent_parameters.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/aav/models/constrained_maxent_parameters.npz


--------------------------------------------------------------------------------
/aav/models/h100_0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/aav/models/h100_0.npy


--------------------------------------------------------------------------------
/aav/randomized-staircase-results.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/aav/randomized-staircase-results.npz


--------------------------------------------------------------------------------
/aav/test_and_calibration_aav_data.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/aav/test_and_calibration_aav_data.npz


--------------------------------------------------------------------------------
/assay.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | import time
  3 | from itertools import chain, combinations
  4 | 
  5 | import numpy as np
  6 | import scipy as sc
  7 | import pandas as pd
  8 | 
  9 | from sklearn.linear_model import LinearRegression
 10 | from tensorflow.keras.utils import Sequence
 11 | from calibrate import get_invcov_dot_xt
 12 | 
 13 | # ===== utilities and classes for AAV experiments =====
 14 | 
 15 | # ----- utilities for converting between amino acids and nucleotides -----
 16 | 
 17 | AA2CODON = {
 18 |         'l': ['tta', 'ttg', 'ctt', 'ctc', 'cta', 'ctg'],
 19 |         's': ['tct', 'tcc', 'tca', 'tcg', 'agt', 'agc'],
 20 |         'r': ['cgt', 'cgc', 'cga', 'cgg', 'aga', 'agg'],
 21 |         'v': ['gtt', 'gtc', 'gta', 'gtg'],
 22 |         'a': ['gct', 'gcc', 'gca', 'gcg'],
 23 |         'p': ['cct', 'ccc', 'cca', 'ccg'],
 24 |         't': ['act', 'acc', 'aca', 'acg'],
 25 |         'g': ['ggt', 'ggc', 'gga', 'ggg'],
 26 |         '*': ['taa', 'tag', 'tga'],
 27 |         'i': ['att', 'atc', 'ata'],
 28 |         'y': ['tat', 'tac'],
 29 |         'f': ['ttt', 'ttc'],
 30 |         'c': ['tgt', 'tgc'],
 31 |         'h': ['cat', 'cac'],
 32 |         'q': ['caa', 'cag'],
 33 |         'n': ['aat', 'aac'],
 34 |         'k': ['aaa', 'aag'],
 35 |         'd': ['gat', 'gac'],
 36 |         'e': ['gaa', 'gag'],
 37 |         'w': ['tgg'],
 38 |         'm': ['atg']
 39 |     }
 40 | 
 41 | 
 42 | NUC_ORDERED = ['A', 'T', 'C', 'G']
 43 | NUC2IDX = {nuc: i for i, nuc in enumerate(NUC_ORDERED)}
 44 | 
 45 | AA_ORDERED = [k.upper() for k in AA2CODON.keys()]
 46 | AA2IDX = {aa: i for i, aa in enumerate(AA_ORDERED)}
 47 | 
 48 | def pnuc2paa(pnuc_Lxk):
 49 |     """
 50 |     Converts nucleotide probabilities to amino acid probabilities.
 51 |     """
 52 |     L = pnuc_Lxk.shape[0]
 53 |     paadf_kxL = pd.DataFrame(0., index=AA_ORDERED, columns=range(int(L / 3)))
 54 |     for i in range(int(L / 3)):
 55 |         for aa in AA_ORDERED:
 56 |             codons = AA2CODON[aa.lower()]
 57 |             # for each codon corresponding to the AA, compute probability of generating that codon
 58 |             for cod in codons:
 59 |                 p_cod = 1
 60 |                 for j in range(3):  # multiply probabilities of each of the 3 nucleotides in the codon
 61 |                     nuc_idx = NUC2IDX[cod[j].upper()]
 62 |                     p_cod *= pnuc_Lxk[i * 3 + j, nuc_idx]
 63 |                 paadf_kxL[i].loc[aa] += p_cod
 64 |     return np.array(paadf_kxL).T
 65 | 
 66 | def phinuc2paa(phinuc_Lxk):
 67 |     """
 68 |     Converts unnormalized nucleotide probabilities to amino acid probabilities.
 69 |     """
 70 |     # normalize probabilities of categorical distribution per site
 71 |     pnuc_Lxk = np.exp(phinuc_Lxk) / np.sum(np.exp(phinuc_Lxk), axis=1, keepdims=True)
 72 |     # convert nucleotide probabilities to amino acid probabilities
 73 |     paa_Lxk = np.array(pnuc2paa(pnuc_Lxk))
 74 |     return paa_Lxk
 75 | 
 76 | # NNK (training) distribution
 77 | pnnknucpersite = np.array([[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25], [0, 0.5, 0, 0.5]])
 78 | pnnknuc_Lxk = np.tile(pnnknucpersite, [7, 1])
 79 | PNNKAA_LXK = np.array(pnuc2paa(pnnknuc_Lxk))
 80 | 
 81 | # ----- rejection sampling from test distribution -----
 82 | 
 83 | def get_loglikelihood(seq_n, p_Lxk: np.array):
 84 |     ohe_nxLxk = np.stack([one_hot_encode(seq, flatten=False) for seq in seq_n])
 85 |     logp_1xLxk = np.log(p_Lxk)[None, :, :]
 86 |     logp_n = np.sum(ohe_nxLxk * logp_1xLxk, axis=(1, 2))
 87 |     return logp_n
 88 | 
 89 | def get_rejection_sampling_acceptance_probabilities(seq_n, phitestnuc_Lxk, logptrain_n):
 90 |     ptestaa_Lxk = phinuc2paa(phitestnuc_Lxk)
 91 |     ratio_Lxk = ptestaa_Lxk / PNNKAA_LXK
 92 |     maxp_l = np.max(ratio_Lxk, axis=1)
 93 |     M = np.prod(maxp_l)
 94 | 
 95 |     # compute test likelihoods of all data
 96 |     logptest_n = get_loglikelihood(seq_n, ptestaa_Lxk)
 97 |     paccept_n = np.exp(logptest_n - (np.log(M) + logptrain_n))
 98 |     return paccept_n, logptest_n
 99 | 
100 | def rejection_sample_from_test_distribution(paccept_n):
101 |     nonzero_samples_from_test = False
102 |     while not nonzero_samples_from_test:
103 |         accept_n = sc.stats.bernoulli.rvs(paccept_n)
104 |         testsamp_idx = np.where(accept_n)[0]
105 |         n_test = testsamp_idx.size
106 |         if n_test:
107 |             nonzero_samples_from_test = True
108 |     return testsamp_idx
109 | 
110 | # ----- class for sequence-fitness data generation -----
111 | 
112 | def one_hot_encode(seq, flatten: bool = True):
113 |     l = len(seq)
114 |     ohe_lxk = np.zeros((l, len(AA_ORDERED)))
115 |     ones_idx = (range(l), [AA2IDX[seq[i]] for i in range(l)])
116 |     ohe_lxk[ones_idx] = 1
117 |     return ohe_lxk.flatten() if flatten else ohe_lxk
118 | 
119 | class DataGenerator(Sequence):
120 |     def __init__(self, seq_n, fitness_nx2 = None, ids = None, batch_size: int = 1000, shuffle: bool = True):
121 |         self.seq_n = seq_n
122 |         # (estimates of) mean and variance log enrichment score (dummy values if using for prediction)
123 |         self.fitness_nx2 = fitness_nx2 if fitness_nx2 is not None else np.zeros([len(seq_n), 2])
124 |         self.ids = ids if ids is not None else range(len(seq_n))
125 |         self.batch_size = batch_size
126 |         self.shuffle = shuffle
127 |         self.n_feat = len(self.seq_n[0]) * len(AA_ORDERED)
128 |         self.on_epoch_end()
129 | 
130 |     def on_epoch_end(self):
131 |         """
132 |         Update indices after each epoch.
133 |         """
134 |         self.idx = np.arange(len(self.ids))
135 |         if self.shuffle:
136 |             np.random.shuffle(self.idx)
137 | 
138 |     def __len__(self):
139 |         return int(np.floor(len(self.ids) / self.batch_size))
140 | 
141 |     def __getitem__(self, index):
142 |         # generate indices of the batch
143 |         idx = self.idx[index * self.batch_size : (index + 1) * self.batch_size]
144 | 
145 |         # find list of IDs
146 |         ids = [self.ids[k] for k in idx]
147 | 
148 |         # fetch sequences and their (estimated) fitness mean and variance
149 |         X_bxp = np.array([one_hot_encode(self.seq_n[idx], flatten=True) for idx in ids])
150 |         y_nx2 = self.fitness_nx2[ids]
151 |         return [X_bxp, y_nx2[:, 0], y_nx2[:, 1]]
152 | 
153 | # ===== utilities and classes for fluorescence experiments =====
154 | 
155 | # ----- utilities for Walsh-Hadamard transform -----
156 | # adapted from David H. Brookes's code (https://github.com/dhbrookes/FitnessSparsity/blob/main/src/utils.py) for:
157 | # D. H. Brookes, A. Aghazadeh, J. Listgarten,
158 | # On the sparsity of fitness functions and implications for learning. PNAS, 119 (2022).
159 | 
160 | def get_interactions(seq_len, order: int = None):
161 |     """
162 |     Returns a list of tuples of epistatic interactions for a given sequence length, up to a specified order.
163 |     For example, get_interactions(3) returns [(), (0,), (1,), (2,), (0, 1), (0, 2), (1, 2), (0, 1, 2)].
164 |     This sets of the order used for regression coefficients.
165 |     """
166 |     if order is None:
167 |         order = seq_len
168 |     sites = list(range(seq_len))
169 |     combos = chain.from_iterable(combinations(sites, o) for o in range(order + 1))
170 |     return list(combos)
171 | 
172 | def walsh_hadamard_from_seqs(signedseq_nxl: np.array, order: int = None, normalize: bool = False):
173 |     """
174 |     Returns an n x array of (truncated) Walsh-Hadamard encodings of a given n x array of binary sequences.
175 |     """
176 |     n, seq_len = signedseq_nxl.shape
177 |     interactions = get_interactions(seq_len, order=order)
178 |     X_nxp = np.zeros((n, len(interactions)))
179 |     for i, idx in enumerate(interactions):
180 |         if len(idx) == 0:
181 |             X_nxp[:, i] = 1
182 |         else:
183 |             X_nxp[:, i] = np.prod(signedseq_nxl[:, idx], axis=1)
184 |     if normalize:
185 |         X_nxp /= np.sqrt(np.power(2, seq_len))  # for proper WH matrix
186 |     return X_nxp
187 | 
188 | # ----- sample training and designed data according to fluorescence experiments -----
189 | 
190 | def get_training_and_designed_data(data, n, gamma, lmbda, seed: int = None):
191 |     """
192 |     Sample training data uniformly at random from combinatorially complete data set (Poelwijk et al. 2019),
193 |     and sample one designed protein (w/ ground-truth label) according to design algorithm in Eq. 6 of main paper.
194 | 
195 |     :param data: assay.PoelwijkData object
196 |     :param n: int, number of training points, {96, 192, 384} in main paper
197 |     :param gamma: float, ridge regularization strength
198 |     :param lmbda: float, inverse temperature of design algorithm in Eq. 6, {0, 2, 4, 6} in main paper
199 |     :param seed: int, random seed
200 |     :return: numpy arrays of training sequences, training labels, designed sequence, label, and prediction
201 |     """
202 | 
203 |     # get random training data
204 |     rng = np.random.default_rng(seed)
205 |     train_idx = rng.choice(data.n, n, replace=True)
206 |     Xtrain_nxp, ytrain_n = data.X_nxp[train_idx], data.get_measurements(train_idx)  # get noisy measurements
207 | 
208 |     # train ridge regression model
209 |     A_pxn = get_invcov_dot_xt(Xtrain_nxp, gamma, use_lapack=True)
210 |     beta_p = A_pxn.dot(ytrain_n)
211 | 
212 |     # construct test input distribution \tilde{p}_{X; Z_{1:n}}
213 |     predall_n = data.X_nxp.dot(beta_p)
214 |     punnorm_n = np.exp(lmbda * predall_n)
215 |     Z = np.sum(punnorm_n)
216 | 
217 |     # draw test input (index of designed sequence)
218 |     test_idx = rng.choice(data.n, 1, p=punnorm_n / Z if lmbda else None)
219 |     Xtest_1xp = data.X_nxp[test_idx]
220 | 
221 |     # get noisy measurement and model prediction for designed sequence
222 |     ytest_1 = data.get_measurements(test_idx)
223 |     pred_1 = Xtest_1xp.dot(beta_p)
224 |     return Xtrain_nxp, ytrain_n, Xtest_1xp, ytest_1, pred_1
225 | 
226 | # ----- classes for handling combinatorially complete data sets -----
227 | 
228 | class Assay(ABC):
229 |     def __init__(self):
230 |         pass
231 | 
232 |     @abstractmethod
233 |     def get_measurements(self, x_idx: np.array, seed: int = None):
234 |         raise NotImplementedError
235 | 
236 | class PoelwijkData(Assay):
237 | 
238 |     def __init__(self, fitness: str, order: int = 1, noise_estimate_order: int = 7, sig_level: float = 0.01,
239 |                  load_precomputed_noise: bool = True):
240 |         if fitness not in ['red', 'blue']:
241 |             raise ValueError('Unrecognized fitness name: {}'.format(fitness))
242 | 
243 |         # ===== featurize sequences as higher-order interaction terms =====
244 | 
245 |         df = self.read_poelwijk_supp3()
246 |         self.Xsigned_nxp = self.strarr2signedarr(df.binary_genotype)  # 1/-1 encoding of sequences
247 |         self.X_nxp = walsh_hadamard_from_seqs(self.Xsigned_nxp, order=order) # featurize including intercept
248 | 
249 |         self.n, self.p = self.X_nxp.shape
250 |         self.order = order
251 |         print('Using {} order-{} features'.format(self.p, order))
252 | 
253 |         if fitness == 'blue':
254 |             self.y_n = np.array(df.brightness_blue)
255 |         elif fitness == 'red':
256 |             self.y_n = np.array(df.brightness_red)
257 | 
258 |         # ===== estimate per-sequence measurement noise SD =====
259 | 
260 |         if load_precomputed_noise:
261 |             d = np.load('../fluorescence/{}_noise.npz'.format(fitness))
262 |             self.se_n = d['se_n']
263 |             print("Loading estimated measurement noise SD computed using order {} and significance level {}".format(
264 |                 d['order_est_noise'], d['sig_level']))
265 | 
266 |         else:
267 |             t0 = time.time()
268 | 
269 |             # ===== compute Walsh-Hadamard transform, truncated to order noise_estimate_order =====
270 |             # best linear model of complete fitness landscape using terms of up to noise_estimate_order.
271 |             # default value of noise_estimate_order = 7 taken from Poelwijk et al. (2019), who found
272 |             # significant epistatic interactions of up to order 7 in the complete fitness landscape (see their Fig. 2e)
273 | 
274 |             # encode all 2^13 sequences with up to noise_estimate_order terms
275 |             X_nxp = walsh_hadamard_from_seqs(self.Xsigned_nxp, order=noise_estimate_order)
276 |             n_feat = X_nxp.shape[1]
277 |             print('Estimating noise using {} interaction terms up to order {}'.format(n_feat, noise_estimate_order))
278 | 
279 |             # fit linear model using all 2^13 fitness measurements
280 |             ols = LinearRegression(fit_intercept=False)  # featurization from walsh_hadamard_from_seqs has intercept
281 |             ols.fit(X_nxp, self.y_n)
282 | 
283 |             # determine statistically significant coefficients
284 |             # compute t-statistics
285 |             pred_n = ols.predict(X_nxp)
286 |             sigmasq_hat = np.sum(np.square(self.y_n - pred_n)) / (self.n - n_feat)  # estimate of \sigma^2
287 |             var_p = sigmasq_hat * (np.linalg.inv(np.dot(X_nxp.T, X_nxp)).diagonal())
288 |             ts_p = ols.coef_ / np.sqrt(var_p)
289 | 
290 |             # two-sided p-values
291 |             pvals = np.array([2 * (1 - sc.stats.t.cdf(np.abs(t), (self.n - n_feat))) for t in ts_p])
292 |             self.coef_pvals = pvals
293 | 
294 |             # use Bonferroni-Sidak correction as in Poelwijk et al. (2019) (Fig. 2e, S6)
295 |             threshold = 1 - np.power(1 - sig_level, 1 / n_feat)
296 |             sigterm_idx = np.where(pvals < threshold)[0]
297 |             print("{} terms below {} for significance level {}. {:.1f} s".format(
298 |                 sigterm_idx.size, threshold, sig_level, time.time() - t0))
299 | 
300 |             # estimate per-sequence measurement noise SD by taking difference between measurements and
301 |             # predictions made using the statistically significant coefficients
302 |             pred_n = X_nxp[:, sigterm_idx].dot(ols.coef_[sigterm_idx])
303 |             self.se_n = np.abs(pred_n - self.y_n)
304 |             np.savez('../fluorescence/{}_noise.npz'.format(fitness),
305 |                      se_n=self.se_n, noise_estimate_order=noise_estimate_order, pvals=pvals, threshold=threshold,
306 |                      sigterm_idx=sigterm_idx, n_feat=n_feat, sig_level=sig_level)
307 | 
308 |     def find(self, Xsigned_nxp):
309 |         return np.array([np.where((self.Xsigned_nxp == X_p).all(axis=1))[0][0] for X_p in Xsigned_nxp])
310 | 
311 |     def read_poelwijk_supp3(self):
312 |         """
313 |         Parse Poelwijk et al. (2019) Supplementary Data 3 for raw data.
314 | 
315 |         :return: pandas dataframe
316 |         """
317 |         df = pd.read_excel("../fluorescence/supp_data_3.xlsx", skiprows=2, header=None)
318 |         df.columns = ["binary_genotype", "amino_acid_sequence", "counts_input", "counts_red", "counts_blue",
319 |                             "UNK1", "brightness_red", "brightness_blue", "UNK2", "brightness_combined"]
320 |         df["binary_genotype"] = df["binary_genotype"].apply(lambda x: x[1:-1])
321 |         return df
322 | 
323 |     def strarr2signedarr(self, binstrarr):
324 |         """
325 |         Convert array of strings of 0s and 1s to numpy array of -1s and 1s
326 | 
327 |         :param binstrarr: iterable containing strings of 0s and 1s
328 |         :return: numpy array where each row corresponds to string
329 |         """
330 |         return np.array([[2 * int(b) - 1 for b in binstr] for binstr in binstrarr])
331 | 
332 |     def get_measurements(self, seqidx_n: np.array, seed: int = None):
333 |         """
334 |         Given indices of sequences, return noisy measurements (using estimated measurement noise SD).
335 | 
336 |         :param seqidx_n: iterable of ints, indices of which sequences to get measurements for
337 |         :param seed: int, random seed
338 |         :return: numpy array of noisy measurements corresponding to provided sequence indices
339 |         """
340 |         np.random.seed(seed)
341 |         noisy_n = np.array([np.random.normal(loc=self.y_n[i], scale=self.se_n[i]) for i in seqidx_n])
342 |         # enforce non-negative measurement since enrichment scores are always non-negative
343 |         return np.fmax(noisy_n, 0)
344 | 
345 | 


--------------------------------------------------------------------------------
/calibrate.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Classes for full conformal prediction for exchangeable, standard, and feedback covariate shift data,
  3 | both for black-box predictive models and computationally optimized for ridge regression, and
  4 | functions for (random, exact-coverage) split conformal prediction under standard covariate shift.
  5 | Throughout this file, variable name suffixes denote the shape of the numpy array, where
  6 |     n: number of training points, or generic number of data points
  7 |     n1: n + 1
  8 |     p: number of features
  9 |     y: number of candidate labels, |Y|
 10 |     u: number of sequences in domain, |X|
 11 |     m: number of held-out calibration points for split conformal methods
 12 | """
 13 | 
 14 | import numpy as np
 15 | import time
 16 | import scipy as sc
 17 | 
 18 | from abc import ABC, abstractmethod
 19 | 
 20 | # ===== utilities for split conformal =====
 21 | 
 22 | def get_split_coverage(lu_nx2, y_n):
 23 |     """
 24 |     Computes empirical coverage of split conformal confidence interval
 25 |     :param lu_nx2: (n, 2) numpy array where first and second columns are lower and upper endpoints
 26 |     :param y_n: (n,) numpy array of true labels
 27 |     :return: float, empirical coverage
 28 |     """
 29 |     cov = np.sum((y_n >= lu_nx2[:, 0]) & (y_n <= lu_nx2[:, 1])) / y_n.size
 30 |     return cov
 31 | 
 32 | def get_randomized_staircase_coverage(C_n, y_n):
 33 |     """
 34 |     Computes empirical coverage and lengths of randomized staircase confidence sets.
 35 | 
 36 |     :param C_n: length-n list of outputs of get_randomized_staircase_confidence_set (i.e., list of tuples)
 37 |     :param y_n: (n,) numpy array of true labels
 38 |     :return: (n,) binary array of coverage and (n,) numpy array of lengths
 39 |     """
 40 |     def is_covered(confint_list, y):
 41 |         for confint_2 in confint_list:
 42 |             if y >= confint_2[0] and y <= confint_2[1]:
 43 |                 return True
 44 |         return False
 45 |     def get_len_conf_set(confint_list):
 46 |         return np.sum([confint_2[1] - confint_2[0] for confint_2 in confint_list])
 47 | 
 48 |     cov_n = np.array([is_covered(confset, y) for confset, y in zip(C_n, y_n)])
 49 |     len_n = np.array([get_len_conf_set(confset) for confset in C_n])
 50 |     return cov_n, len_n
 51 | 
 52 | def get_randomized_staircase_confidence_set(scores_m, weights_m1, predtest, alpha: float = 0.1):
 53 |     """
 54 |     Computes the "randomized staircase" confidence set in Alg. S1.
 55 | 
 56 |     :param scores_m: (m,) numpy array of calibration scores
 57 |     :param weights_m1: (m + 1) numpy array of calibration weights and single test weight
 58 |     :param predtest: float, prediction on test input
 59 |     :param alpha: miscoverage level
 60 |     :return: list of tuples (l, u), where l and u are floats denoting lower and upper
 61 |         endpoints of an interval.
 62 |     """
 63 |     lb_is_set = False
 64 |     idx = np.argsort(scores_m)
 65 |     sortedscores_m1 = np.hstack([0, scores_m[idx]])
 66 |     sortedweights_m1 = np.hstack([0, weights_m1[: -1][idx]])
 67 |     C = []
 68 | 
 69 |     # interval that is deterministically included in the confidence set
 70 |     # (color-coded green in Fig. S1)
 71 |     cdf_m1 = np.cumsum(sortedweights_m1) # CDF up to i-th sorted calibration score
 72 |     cdf_plus_test_weight_m1 = cdf_m1 + weights_m1[-1]
 73 |     deterministic_idx = np.where(cdf_plus_test_weight_m1 < 1 - alpha)[0]
 74 |     if deterministic_idx.size:
 75 |         i_det = np.max(deterministic_idx)
 76 |         C.append((predtest - sortedscores_m1[i_det + 1], predtest + sortedscores_m1[i_det + 1]))
 77 | 
 78 |     # intervals that are randomly included in the confidence set
 79 |     # (color-coded teal and blue in Fig. S1)
 80 |     for i in range(i_det + 1, sortedscores_m1.size - 1):
 81 |         assert(cdf_plus_test_weight_m1[i] >= 1 - alpha)
 82 |         if cdf_plus_test_weight_m1[i] >= 1 - alpha and cdf_m1[i] < 1 - alpha:
 83 |             if not lb_is_set:
 84 |                 lb_is_set = True
 85 |                 LF = cdf_m1[i]
 86 |             F = (cdf_plus_test_weight_m1[i] - (1 - alpha)) / (cdf_m1[i] + weights_m1[-1] - LF)
 87 |             if sc.stats.bernoulli.rvs(1 - F):
 88 |                 C.append((predtest + sortedscores_m1[i], predtest + sortedscores_m1[i + 1]))
 89 |                 C.append((predtest - sortedscores_m1[i + 1], predtest - sortedscores_m1[i]))
 90 | 
 91 |     # halfspaces that are randomly included in the confidence set
 92 |     # (color-coded purple in Fig. S1)
 93 |     if cdf_m1[-1] < 1 - alpha:  # sum of all calibration weights
 94 |         if not lb_is_set:
 95 |             LF = cdf_m1[-1]
 96 |         F = alpha / (1 - LF)
 97 |         if sc.stats.bernoulli.rvs(1 - F):
 98 |             C.append((predtest + sortedscores_m1[-1], np.inf))
 99 |             C.append((-np.inf, predtest - sortedscores_m1[-1]))
100 |     return C
101 | 
102 | 
103 | 
104 | # ========== full conformal utilities ==========
105 | 
106 | def get_weighted_quantile(quantile, w_n1xy, scores_n1xy):
107 |     """
108 |     Compute the quantile of weighted scores for each candidate label y
109 | 
110 |     :param quantile: float, quantile
111 |     :param w_n1xy: (n + 1, |Y|) numpy array of weights (unnormalized)
112 |     :param scores_n1xy: (n + 1, |Y|) numpy array of scores
113 |     :return: (|Y|,) numpy array of quantiles
114 |     """
115 |     if w_n1xy.ndim == 1:
116 |         w_n1xy = w_n1xy[:, None]
117 |         scores_n1xy = scores_n1xy[:, None]
118 | 
119 |     # normalize probabilities
120 |     p_n1xy = w_n1xy / np.sum(w_n1xy, axis=0)
121 | 
122 |     # sort scores and their weights accordingly
123 |     sorter_per_y_n1xy = np.argsort(scores_n1xy, axis=0)
124 |     sortedscores_n1xy = np.take_along_axis(scores_n1xy, sorter_per_y_n1xy, axis=0)
125 |     sortedp_n1xy = np.take_along_axis(p_n1xy, sorter_per_y_n1xy, axis=0)
126 | 
127 |     # locate quantiles of weighted scores per y
128 |     cdf_n1xy = np.cumsum(sortedp_n1xy, axis=0)
129 |     qidx_y = np.sum(cdf_n1xy < quantile, axis=0)  # equivalent to [np.searchsorted(cdf_n1, q) for cdf_n1 in cdf_n1xy]
130 |     q_y = sortedscores_n1xy[(qidx_y, range(qidx_y.size))]
131 |     return q_y
132 | 
133 | def is_covered(y, confset, y_increment):
134 |     """
135 |     Return if confidence set covers true label
136 | 
137 |     :param y: true label
138 |     :param confset: numpy array of values in confidence set
139 |     :param y_increment: float, \Delta increment between candidate label values, 0.01 in main paper
140 |     :return: bool
141 |     """
142 |     return np.any(np.abs(y - confset) < (y_increment / 2))
143 | 
144 | 
145 | 
146 | # ========== utilities and classes for full conformal with ridge regression ==========
147 | 
148 | def get_invcov_dot_xt(X_nxp, gamma, use_lapack: bool = True):
149 |     """
150 |     Compute (X^TX + \gamma I)^{-1} X^T
151 | 
152 |     :param X_nxp: (n, p) numpy array encoding sequences
153 |     :param gamma: float, ridge regularization strength
154 |     :param use_lapack: bool, whether or not to use low-level LAPACK functions for inverting covariance (fastest)
155 |     :return: (p, n) numpy array, (X^TX + \gamma I)^{-1} X^T
156 |     """
157 |     reg_pxp = gamma * np.eye(X_nxp.shape[1])
158 |     reg_pxp[0, 0] = 0  # don't penalize intercept term
159 |     cov_pxp = X_nxp.T.dot(X_nxp) + reg_pxp
160 |     if use_lapack:
161 |         # fastest way to invert PD matrices from
162 |         # https://stackoverflow.com/questions/40703042/more-efficient-way-to-invert-a-matrix-knowing-it-is-symmetric-and-positive-semi
163 |         zz, _ = sc.linalg.lapack.dpotrf(cov_pxp, False, False)
164 |         invcovtri_pxp, info = sc.linalg.lapack.dpotri(zz)
165 |         assert(info == 0)
166 |         invcov_pxp = np.triu(invcovtri_pxp) + np.triu(invcovtri_pxp, k=1).T
167 |     else:
168 |         invcov_pxp = sc.linalg.pinvh(cov_pxp)
169 |     return invcov_pxp.dot(X_nxp.T)
170 | 
171 | 
172 | class ConformalRidge(ABC):
173 |     """
174 |     Abstract base class for full conformal with computations optimized for ridge regression.
175 |     """
176 |     def __init__(self, ptrain_fn, ys, Xuniv_uxp, gamma, use_lapack: bool = True):
177 |         """
178 |         :param ptrain_fn: function that outputs likelihood of input under training input distribution, p_X
179 |         :param ys: numpy array of candidate labels
180 |         :param Xuniv_uxp: (u, p) numpy array encoding all sequences in domain (e.g., all 2^13 sequences
181 |             in Poelwijk et al. 2019 data set), needed for computing normalizing constant
182 |         :param gamma: float, ridge regularization strength
183 |         :param use_lapack: bool, whether or not to use low-level LAPACK functions for inverting covariance (fastest)
184 |         """
185 |         self.ptrain_fn = ptrain_fn
186 |         self.Xuniv_uxp = Xuniv_uxp
187 |         self.p = Xuniv_uxp.shape[1]
188 |         self.ys = ys
189 |         self.n_y = ys.size
190 |         self.gamma = gamma
191 |         self.use_lapack = use_lapack
192 | 
193 |     def get_normalizing_constant(self, beta_p, lmbda):
194 |         predall_u = self.Xuniv_uxp.dot(beta_p)
195 |         Z = np.sum(np.exp(lmbda * predall_u))
196 |         return Z
197 | 
198 |     def get_insample_scores(self, Xaug_n1xp, ytrain_n):
199 |         """
200 |         Compute in-sample scores, i.e. residuals using model trained on all n + 1 data points (instead of LOO data)
201 | 
202 |         :param Xaug_n1xp: (n + 1, p) numpy array encoding all n + 1 sequences (training + candidate test point)
203 |         :param ytrain_n: (n,) numpy array of true labels for the n training points
204 |         :return: (n + 1, |Y|) numpy array of scores
205 |         """
206 |         A = get_invcov_dot_xt(Xaug_n1xp, self.gamma, use_lapack=self.use_lapack)
207 |         C = A[:, : -1].dot(ytrain_n)  # p elements
208 |         a_n1 = C.dot(Xaug_n1xp.T)
209 |         b_n1 = A[:, -1].dot(Xaug_n1xp.T)
210 | 
211 |         # process in-sample scores for each candidate value y
212 |         scoresis_n1xy = np.zeros([ytrain_n.size + 1, self.n_y])
213 |         by_n1xy = np.outer(b_n1, self.ys)
214 |         muhatiy_n1xy = a_n1[:, None] + by_n1xy
215 |         scoresis_n1xy[: -1] = np.abs(ytrain_n[:, None] - muhatiy_n1xy[: -1])
216 |         scoresis_n1xy[-1] = np.abs(self.ys - muhatiy_n1xy[-1])
217 |         return scoresis_n1xy
218 | 
219 |     def compute_loo_scores_and_lrs(self, Xaug_n1xp, ytrain_n, lmbda, compute_lrs: bool = True):
220 |         """
221 |         Compute LOO scores, i.e. residuals using model trained on n data points (training + candidate test points,
222 |         but leave i-th training point out).
223 | 
224 |         :param Xaug_n1xp: (n + 1, p) numpy array encoding all n + 1 sequences (training + candidate test point)
225 |         :param ytrain_n: (n,) numpy array of true labels for the n training points
226 |         :param lmbda: float, inverse temperature of design algorithm in Eq. 6, {0, 2, 4, 6} in main paper
227 |         :param compute_lrs: bool: whether or not to compute likelihood ratios (this part takes the longest,
228 |             so set to False if only want to compute scores)
229 |         :return: (n + 1, |Y|) numpy arrays of scores S_i(X_test, y) and weights w_i^y(X_test) in Eq. 3 in main paper
230 |         """
231 |         # fit n + 1 LOO models and store linear parameterizations of \mu_{-i, y}(X_i) as function of y
232 |         n = ytrain_n.size
233 |         ab_nx2 = np.zeros([n, 2])
234 |         C_nxp = np.zeros([n, self.p])
235 |         An_nxp = np.zeros([n, self.p])
236 |         for i in range(n):
237 |             # construct A_{-i}
238 |             Xi_nxp = np.vstack([Xaug_n1xp[: i], Xaug_n1xp[i + 1 :]]) # n rows
239 |             Ai = get_invcov_dot_xt(Xi_nxp, self.gamma, use_lapack=self.use_lapack)
240 | 
241 |             # compute linear parameterizations of \mu_{-i, y}(X_i)
242 |             yi_ = np.hstack([ytrain_n[: i], ytrain_n[i + 1 :]])  # n - 1 elements
243 |             Ci = Ai[:, : -1].dot(yi_) # p elements
244 |             ai = Ci.dot(Xaug_n1xp[i])  # = Xtrain_nxp[i]
245 |             bi = Ai[:, -1].dot(Xaug_n1xp[i])
246 | 
247 |             # store
248 |             ab_nx2[i] = ai, bi
249 |             C_nxp[i] = Ci
250 |             An_nxp[i] = Ai[:, -1]
251 | 
252 |         # LOO score for i = n + 1
253 |         tmp = get_invcov_dot_xt(Xaug_n1xp[: -1], self.gamma, use_lapack=self.use_lapack)
254 |         beta_p = tmp.dot(ytrain_n)
255 |         alast = beta_p.dot(Xaug_n1xp[-1])  # prediction a_{n + 1}. Xaug_n1xp[-1] = Xtest_p
256 | 
257 |         # process LOO scores for each candidate value y
258 |         scoresloo_n1xy = np.zeros([n + 1, self.n_y])
259 |         by_nxy = np.outer(ab_nx2[:, 1], self.ys)
260 |         prediy_nxy = ab_nx2[:, 0][:, None] + by_nxy
261 |         scoresloo_n1xy[: -1] = np.abs(ytrain_n[:, None] - prediy_nxy)
262 |         scoresloo_n1xy[-1] = np.abs(self.ys - alast)
263 | 
264 |         # likelihood ratios for each candidate value y
265 |         w_n1xy = None
266 |         if compute_lrs:
267 |             betaiy_nxpxy = C_nxp[:, :, None] + self.ys * An_nxp[:, :, None]
268 |             # compute normalizing constant in Eq. 6 in main paper
269 |             pred_nxyxu = np.tensordot(betaiy_nxpxy, self.Xuniv_uxp, axes=(1, 1))
270 |             normconst_nxy = np.sum(np.exp(lmbda * pred_nxyxu), axis=2)
271 |             ptrain_n = self.ptrain_fn(Xaug_n1xp[: -1])
272 | 
273 |             w_n1xy = np.zeros([n + 1, self.n_y])
274 |             wi_num_nxy = np.exp(lmbda * prediy_nxy)
275 |             w_n1xy[: -1] = wi_num_nxy / (ptrain_n[:, None] * normconst_nxy)
276 | 
277 |             # for last i = n + 1, which is constant across candidate values of y
278 |             Z = self.get_normalizing_constant(beta_p, lmbda)
279 |             w_n1xy[-1] = np.exp(lmbda * alast) / (self.ptrain_fn(Xaug_n1xp[-1][None, :]) * Z)
280 |         return scoresloo_n1xy, w_n1xy
281 | 
282 |     @abstractmethod
283 |     def get_loo_scores_and_lrs(self, Xaug_n1xp, ytrain_n, lmbda):
284 |         pass
285 | 
286 |     def get_confidence_set(self, Xtrain_nxp, ytrain_n, Xtest_1xp, lmbda, alpha: float = 0.1, use_is_scores: bool = False):
287 |         if (self.p != Xtrain_nxp.shape[1]):
288 |             raise ValueError('Feature dimension {} differs from provided Xuniv_uxp {}'.format(
289 |                 Xtrain_nxp.shape[1], self.Xuniv_uxp.shape))
290 |         Xaug_n1xp = np.vstack([Xtrain_nxp, Xtest_1xp])
291 | 
292 |         # ===== compute scores and weights =====
293 | 
294 |         # compute in-sample scores
295 |         scoresis_n1xy = self.get_insample_scores(Xaug_n1xp, ytrain_n) if use_is_scores else None
296 | 
297 |         # compute LOO scores and likelihood ratios
298 |         scoresloo_n1xy, w_n1xy = self.get_loo_scores_and_lrs(Xaug_n1xp, ytrain_n, lmbda)
299 | 
300 |         # ===== construct confidence sets =====
301 | 
302 |         # based on LOO score
303 |         looq_y = get_weighted_quantile(1 - alpha, w_n1xy, scoresloo_n1xy)
304 |         loo_cs = self.ys[scoresloo_n1xy[-1] <= looq_y]
305 | 
306 |         # based on in-sample score
307 |         is_cs = None
308 |         if use_is_scores:
309 |             isq_y = get_weighted_quantile(1 - alpha, w_n1xy, scoresis_n1xy)
310 |             is_cs = self.ys[scoresis_n1xy[-1] <= isq_y]
311 |         return loo_cs, is_cs
312 | 
313 | 
314 | class ConformalRidgeExchangeable(ConformalRidge):
315 |     """
316 |     Class for full conformal with ridge regression, assuming exchangeable data.
317 |     """
318 |     def __init__(self, ptrain_fn, ys, Xuniv_uxp, gamma, use_lapack: bool = True):
319 |         super().__init__(ptrain_fn, ys, Xuniv_uxp, gamma, use_lapack=use_lapack)
320 | 
321 |     def get_loo_scores_and_lrs(self, Xaug_n1xp, ytrain_n, lmbda):
322 |         scoresloo_n1xy, _ = self.compute_loo_scores_and_lrs(Xaug_n1xp, ytrain_n, lmbda, compute_lrs=False)
323 |         # for exchangeble data, equal weights on all data points (no need to compute likelihood ratios in line above)
324 |         w_n1xy = np.ones([Xaug_n1xp.shape[0], self.n_y])
325 |         return scoresloo_n1xy, w_n1xy
326 | 
327 | 
328 | class ConformalRidgeFeedbackCovariateShift(ConformalRidge):
329 |     """
330 |     Class for full conformal with ridge regression under feedback covariate shift via Eq. 6 in main paper.
331 |     """
332 |     def __init__(self, ptrain_fn, ys, Xuniv_uxp, gamma, use_lapack: bool = True):
333 |         super().__init__(ptrain_fn, ys, Xuniv_uxp, gamma, use_lapack=use_lapack)
334 | 
335 |     def get_loo_scores_and_lrs(self, Xaug_n1xp, ytrain_n, lmbda):
336 |         scoresloo_n1xy, w_n1xy = self.compute_loo_scores_and_lrs(Xaug_n1xp, ytrain_n, lmbda, compute_lrs=True)
337 |         return scoresloo_n1xy, w_n1xy
338 | 
339 | 
340 | class ConformalRidgeStandardCovariateShift(ConformalRidge):
341 |     """
342 |     Class for full conformal with ridge regression under standard covariate shift.
343 |     """
344 |     def __init__(self, ptrain_fn, ys, Xuniv_uxp, gamma, use_lapack: bool = True):
345 |         super().__init__(ptrain_fn, ys, Xuniv_uxp, gamma, use_lapack=use_lapack)
346 | 
347 |     def get_lrs(self, Xaug_n1xp, ytrain_n, lmbda):
348 |         # fit model to training data
349 |         tmp = get_invcov_dot_xt(Xaug_n1xp[: -1], self.gamma, use_lapack=self.use_lapack)
350 |         beta_p = tmp.dot(ytrain_n)
351 | 
352 |         # compute normalizing constant for test covariate distribution
353 |         Z = self.get_normalizing_constant(beta_p, lmbda)
354 | 
355 |         # get likelihood ratios for n + 1 covariates
356 |         pred_n1 = Xaug_n1xp.dot(beta_p)
357 |         ptest_n1 = np.exp(lmbda * pred_n1) / Z
358 |         w_n1 = ptest_n1 / self.ptrain_fn(Xaug_n1xp)
359 |         return w_n1
360 | 
361 |     def get_loo_scores_and_lrs(self, Xaug_n1xp, ytrain_n, lmbda):
362 |         # LOO scores
363 |         scoresloo_n1xy, _ = self.compute_loo_scores_and_lrs(Xaug_n1xp, ytrain_n, lmbda, compute_lrs=False)
364 | 
365 |         # compute likelihood ratios
366 |         w_n1 = self.get_lrs(Xaug_n1xp, ytrain_n, lmbda)
367 |         w_n1xy = w_n1[:, None] * np.ones([Xaug_n1xp.shape[0], self.n_y])
368 |         return scoresloo_n1xy, w_n1xy
369 | 
370 | 
371 | 
372 | # ========== utilities and classes for full conformal with black-box model ==========
373 | 
374 | def get_scores(model, Xaug_nxp, yaug_n, use_loo_score: bool = False):
375 |     if use_loo_score:
376 |         n1 = yaug_n.size  # n + 1
377 |         scores_n1 = np.zeros([n1])
378 | 
379 |         for i in range(n1):
380 |             Xtrain_nxp = np.vstack([Xaug_nxp[: i], Xaug_nxp[i + 1 :]])
381 |             ytrain_n = np.hstack([yaug_n[: i], yaug_n[i + 1 :]])
382 | 
383 |             # train on LOO dataset
384 |             model.fit(Xtrain_nxp, ytrain_n)
385 |             pred_1 = model.predict(Xaug_nxp[i][None, :])
386 |             scores_n1[i] = np.abs(yaug_n[i] - pred_1[0])
387 | 
388 |     else:  # in-sample score
389 |         model.fit(Xaug_nxp, yaug_n)
390 |         pred_n1 = model.predict(Xaug_nxp)
391 |         scores_n1 = np.abs(yaug_n - pred_n1)
392 |     return scores_n1
393 | 
394 | 
395 | class Conformal(ABC):
396 |     """
397 |     Abstract base class for full conformal with black-box predictive model.
398 |     """
399 |     def __init__(self, model, ptrain_fn, ys, Xuniv_uxp):
400 |         """
401 |         :param model: object with predict() method
402 |         :param ptrain_fn: function that outputs likelihood of input under training input distribution, p_X
403 |         :param ys: (|Y|,) numpy array of candidate labels
404 |         :param Xuniv_uxp: (u, p) numpy array encoding all sequences in domain (e.g., all 2^13 sequences
405 |             in Poelwijk et al. 2019 data set), needed for computing normalizing constant
406 |         """
407 |         self.model = model
408 |         self.ptrain_fn = ptrain_fn
409 |         self.ys = ys
410 |         self.Xuniv_uxp = Xuniv_uxp
411 |         self.p = Xuniv_uxp.shape[1]
412 |         self.n_y = ys.size
413 | 
414 |     @abstractmethod
415 |     def get_lrs(self, Xaug_n1xp, yaug_n1, lmbda):
416 |         pass
417 | 
418 |     def get_confidence_set(self, Xtrain_nxp, ytrain_n, Xtest_1xp, lmbda,
419 |                            use_loo_score: bool = True, alpha: float = 0.1, print_every: int = 10, verbose: bool = True):
420 |         if (self.p != Xtrain_nxp.shape[1]):
421 |             raise ValueError('Feature dimension {} differs from provided Xuniv_uxp {}'.format(
422 |                 Xtrain_nxp.shape[1], self.Xuniv_uxp.shape))
423 | 
424 |         np.set_printoptions(precision=3)
425 |         cs, n = [], ytrain_n.size
426 |         t0 = time.time()
427 |         Xaug_n1xp = np.vstack([Xtrain_nxp, Xtest_1xp])
428 |         scores_n1xy = np.zeros([n + 1, self.n_y])
429 |         w_n1xy = np.zeros([n + 1, self.n_y])
430 | 
431 |         for y_idx, y in enumerate(self.ys):
432 | 
433 |             # get scores
434 |             yaug_n1 = np.hstack([ytrain_n, y])
435 |             scores_n1 = get_scores(self.model, Xaug_n1xp, yaug_n1, use_loo_score=use_loo_score)
436 |             scores_n1xy[:, y_idx] = scores_n1
437 | 
438 |             # get likelihood ratios
439 |             w_n1 = self.get_lrs(Xaug_n1xp, yaug_n1, lmbda)
440 |             w_n1xy[:, y_idx] = w_n1
441 | 
442 |             # for each value of inverse temperature lambda, compute quantile of weighted scores
443 |             q = get_weighted_quantile(1 - alpha, w_n1, scores_n1)
444 | 
445 |             # if y <= quantile, include in confidence set
446 |             if scores_n1[-1] <= q:
447 |                 cs.append(y)
448 | 
449 |             # print progress
450 |             if verbose and (y_idx + 1) % print_every == 0:
451 |                 print("Done with {} / {} y values ({:.1f} s)".format(
452 |                     y_idx + 1, self.ys.size, time.time() - t0))
453 |         return np.array(cs), scores_n1xy, w_n1xy
454 | 
455 | 
456 | class ConformalExchangeable(Conformal):
457 |     """
458 |     Full conformal with black-box predictive model, assuming exchangeable data.
459 |     """
460 |     def __init__(self, model, ptrain_fn, ys, Xuniv_uxp):
461 |         super().__init__(model, ptrain_fn, ys, Xuniv_uxp)
462 | 
463 |     def get_lrs(self, Xaug_n1xp, yaug_n1, lmbda):
464 |         return np.ones([Xaug_n1xp.shape[0]])
465 | 
466 | 
467 | class ConformalFeedbackCovariateShift(Conformal):
468 |     """
469 |     Full conformal with black-box predictive model under feedback covariate shift via Eq. 6 in main paper.
470 |     """
471 |     def __init__(self, model, ptrain_fn, ys, Xuniv_uxp):
472 |         super().__init__(model, ptrain_fn, ys, Xuniv_uxp)
473 | 
474 |     def get_lrs(self, Xaug_n1xp, yaug_n1, lmbda):
475 |         # compute weights for each value of lambda, the inverse temperature
476 |         w_n1 = np.zeros([yaug_n1.size])
477 |         for i in range(yaug_n1.size):
478 | 
479 |             # fit LOO model
480 |             Xtr_nxp = np.vstack([Xaug_n1xp[: i], Xaug_n1xp[i + 1 :]])
481 |             ytr_n = np.hstack([yaug_n1[: i], yaug_n1[i + 1 :]])
482 |             self.model.fit(Xtr_nxp, ytr_n)
483 | 
484 |             # compute normalizing constant
485 |             predall_n = self.model.predict(self.Xuniv_uxp)
486 |             Z = np.sum(np.exp(lmbda * predall_n))
487 | 
488 |             # compute likelihood ratios
489 |             testpred = self.model.predict(Xaug_n1xp[i][None, :])
490 |             ptest = np.exp(lmbda * testpred) / Z
491 |             w_n1[i] = ptest / self.ptrain_fn(Xaug_n1xp[i][None, :])
492 |         return w_n1
493 | 
494 | 
495 | class ConformalStandardCovariateShift(Conformal):
496 |     """
497 |     Full conformal with black-box predictive model under standard covariate shift.
498 |     """
499 |     def __init__(self, model, ptrain_fn, ys, Xuniv_uxp):
500 |         super().__init__(model, ptrain_fn, ys, Xuniv_uxp)
501 | 
502 |     def get_lrs(self, Xaug_n1xp, yaug_n1, lmbda):
503 |         # get normalization constant for test covariate distribution
504 |         self.model.fit(Xaug_n1xp[: -1], yaug_n1[: -1])  # Xtrain_nxp, ytrain_n
505 |         predall_u = self.model.predict(self.Xuniv_uxp)
506 |         Z = np.sum(np.exp(lmbda * predall_u))
507 | 
508 |         # get likelihood ratios
509 |         pred_n1 = self.model.predict(Xaug_n1xp)
510 |         ptest_n1 = np.exp(lmbda * pred_n1) / Z
511 |         w_n1 = ptest_n1 / self.ptrain_fn(Xaug_n1xp)
512 |         return w_n1
513 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: conformal-design 
 2 | channels:
 3 |   - pytorch
 4 |   - anaconda
 5 |   - conda-forge
 6 | dependencies:
 7 |   - pip=20.1.1
 8 |   - python=3.7.7
 9 |   - numpy=1.18.1
10 |   - numpy-base=1.18.1
11 |   - matplotlib=3.1.3
12 |   - matplotlib-base=3.1.3
13 |   - scipy=1.4.1
14 |   - pytorch=1.4.0
15 |   - torchvision=0.5.0
16 |   - pandas=1.0.3
17 |   - pillow=7.1.2
18 |   - seaborn=0.10.1
19 |   - tqdm=4.47.0
20 |   - tensorflow-gpu==2.1
21 |   - ipykernel
22 |   - parse==1.19
23 |   - scikit-learn==1.0.1
24 |   - xlrd
25 | 


--------------------------------------------------------------------------------
/fluorescence/blue_n192_lambda0_alpha0.1_gamma1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n192_lambda0_alpha0.1_gamma1.npz


--------------------------------------------------------------------------------
/fluorescence/blue_n192_lambda2_alpha0.1_gamma1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n192_lambda2_alpha0.1_gamma1.npz


--------------------------------------------------------------------------------
/fluorescence/blue_n192_lambda4_alpha0.1_gamma1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n192_lambda4_alpha0.1_gamma1.npz


--------------------------------------------------------------------------------
/fluorescence/blue_n192_lambda6_alpha0.1_gamma1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n192_lambda6_alpha0.1_gamma1.npz


--------------------------------------------------------------------------------
/fluorescence/blue_n384_lambda0_alpha0.1_gamma1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n384_lambda0_alpha0.1_gamma1.npz


--------------------------------------------------------------------------------
/fluorescence/blue_n384_lambda2_alpha0.1_gamma1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n384_lambda2_alpha0.1_gamma1.npz


--------------------------------------------------------------------------------
/fluorescence/blue_n384_lambda4_alpha0.1_gamma1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n384_lambda4_alpha0.1_gamma1.npz


--------------------------------------------------------------------------------
/fluorescence/blue_n384_lambda6_alpha0.1_gamma1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n384_lambda6_alpha0.1_gamma1.npz


--------------------------------------------------------------------------------
/fluorescence/blue_n96_lambda0_alpha0.1_gamma10.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n96_lambda0_alpha0.1_gamma10.npz


--------------------------------------------------------------------------------
/fluorescence/blue_n96_lambda2_alpha0.1_gamma10.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n96_lambda2_alpha0.1_gamma10.npz


--------------------------------------------------------------------------------
/fluorescence/blue_n96_lambda4_alpha0.1_gamma10.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n96_lambda4_alpha0.1_gamma10.npz


--------------------------------------------------------------------------------
/fluorescence/blue_n96_lambda6_alpha0.1_gamma10.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_n96_lambda6_alpha0.1_gamma10.npz


--------------------------------------------------------------------------------
/fluorescence/blue_noise.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/blue_noise.npz


--------------------------------------------------------------------------------
/fluorescence/red_n192_lambda0_alpha0.1_gamma1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n192_lambda0_alpha0.1_gamma1.npz


--------------------------------------------------------------------------------
/fluorescence/red_n192_lambda2_alpha0.1_gamma1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n192_lambda2_alpha0.1_gamma1.npz


--------------------------------------------------------------------------------
/fluorescence/red_n192_lambda4_alpha0.1_gamma1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n192_lambda4_alpha0.1_gamma1.npz


--------------------------------------------------------------------------------
/fluorescence/red_n192_lambda6_alpha0.1_gamma1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n192_lambda6_alpha0.1_gamma1.npz


--------------------------------------------------------------------------------
/fluorescence/red_n384_lambda0_alpha0.1_gamma1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n384_lambda0_alpha0.1_gamma1.npz


--------------------------------------------------------------------------------
/fluorescence/red_n384_lambda2_alpha0.1_gamma1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n384_lambda2_alpha0.1_gamma1.npz


--------------------------------------------------------------------------------
/fluorescence/red_n384_lambda4_alpha0.1_gamma1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n384_lambda4_alpha0.1_gamma1.npz


--------------------------------------------------------------------------------
/fluorescence/red_n384_lambda6_alpha0.1_gamma1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n384_lambda6_alpha0.1_gamma1.npz


--------------------------------------------------------------------------------
/fluorescence/red_n96_lambda0_alpha0.1_gamma10.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n96_lambda0_alpha0.1_gamma10.npz


--------------------------------------------------------------------------------
/fluorescence/red_n96_lambda2_alpha0.1_gamma10.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n96_lambda2_alpha0.1_gamma10.npz


--------------------------------------------------------------------------------
/fluorescence/red_n96_lambda4_alpha0.1_gamma10.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n96_lambda4_alpha0.1_gamma10.npz


--------------------------------------------------------------------------------
/fluorescence/red_n96_lambda6_alpha0.1_gamma10.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_n96_lambda6_alpha0.1_gamma10.npz


--------------------------------------------------------------------------------
/fluorescence/red_noise.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/red_noise.npz


--------------------------------------------------------------------------------
/fluorescence/supp_data_3.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/supp_data_3.xlsx


--------------------------------------------------------------------------------
/fluorescence/supp_data_4.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clarafy/conformal-for-design/ec484b5344c688c07279ba5747f6051a6f97eeb2/fluorescence/supp_data_4.xlsx


--------------------------------------------------------------------------------
/notebooks/aav-experiments.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "This notebook reproduces the AAV design experiments whose results are shown in Fig 5.\n",
  8 |     "\n",
  9 |     "Variable name suffixes in the following cells denote array dimensions, where\n",
 10 |     "\n",
 11 |     "n: number of calibration and test data points  \n",
 12 |     "l: number of values of the inverse temperature, lambda  \n",
 13 |     "L: length of sequence  \n",
 14 |     "t: number of trials of sampling from test distribution, per lambda  \n",
 15 |     "s: number of samples from test distribution  \n",
 16 |     "m: number of calibration data points  \n",
 17 |     "m1: m + 1  "
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "import os\n",
 27 |     "import sys\n",
 28 |     "import time\n",
 29 |     "from importlib import reload\n",
 30 |     "module_path = os.path.abspath(os.path.join('..'))\n",
 31 |     "if module_path not in sys.path:\n",
 32 |     "    sys.path.append(module_path)\n",
 33 |     "    \n",
 34 |     "import numpy as np\n",
 35 |     "import scipy as sc\n",
 36 |     "from tensorflow import keras\n",
 37 |     "\n",
 38 |     "import assay\n",
 39 |     "import calibrate as cal"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "## Load held-out data and parameters of test sequence distributions"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 6,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "name": "stdout",
 56 |      "output_type": "stream",
 57 |      "text": [
 58 |       "Loaded 1000000 held-out test and calibration data points.\n"
 59 |      ]
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "# load held-out data (calibration and test data)\n",
 64 |     "d = np.load('../aav/test_and_calibration_aav_data.npz')\n",
 65 |     "seq_n = d['seq_n']  # list of strings\n",
 66 |     "y_n = d['y_n']      # true fitnesses\n",
 67 |     "n = y_n.size\n",
 68 |     "print('Loaded {} held-out test and calibration data points.'.format(n))"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 7,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "# load parameters of test sequence distributions\n",
 78 |     "d = np.load('../aav/models/constrained_maxent_parameters.npz')\n",
 79 |     "\n",
 80 |     "# phitestnuc_lxLxk[i] is an (L, k) numpy array of unnormalized probabilities of a categorical distribution\n",
 81 |     "# over k = 4 nucleotides at each of L sequence positions,\n",
 82 |     "# corresponding to phi in Eq. 5 of Supp. Materials and Methods here:\n",
 83 |     "# https://www.biorxiv.org/content/10.1101/2021.11.02.467003v2.full\n",
 84 |     "phitestnuc_lxLxk = d['phitestnuc_lxLxk']\n",
 85 |     "\n",
 86 |     "# note that lambda in bioRxiv above corresponds to 1 / lambda for us\n",
 87 |     "lambda_l = (1 / d['temperature_l']).astype(int)\n",
 88 |     "meanpredfit_l = d['meanpredfit_l']"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "## Construct confidence sets for designed sequences"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "Compute predictions and scores for all held-out data (calibration and test data)."
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 8,
108 |    "metadata": {},
109 |    "outputs": [
110 |     {
111 |      "name": "stdout",
112 |      "output_type": "stream",
113 |      "text": [
114 |       "WARNING:tensorflow:Output lambda_1 missing from loss dictionary. We assume this was done on purpose. The fit and evaluate APIs will not be expecting any data to be passed to lambda_1.\n",
115 |       "Mean predicted fitness for NNK (training) distribution: -0.47231370210647583\n"
116 |      ]
117 |     }
118 |    ],
119 |    "source": [
120 |     "# load trained NN and predict for all held-out sequences\n",
121 |     "datagen = assay.DataGenerator(seq_n)\n",
122 |     "model = keras.models.load_model('../aav/models/h100_0.npy'.format(scale))\n",
123 |     "pred_n = model.predict_generator(datagen).reshape(n)\n",
124 |     "score_n = np.abs(pred_n - y_n)  # score with residual\n",
125 |     "print(\"Mean predicted fitness for NNK (training) distribution: {}\".format(np.mean(pred_n)))"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {},
131 |    "source": [
132 |     "Use rejection sampling to sample from test distribution, and construct split conformal confidence intervals for resulting designed sequences."
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "n_trial = 500\n",
142 |     "alpha = 0.1\n",
143 |     "n_cal = 10000\n",
144 |     "save_results = True\n",
145 |     "savefile = '../aav/split-results.npz'\n",
146 |     "\n",
147 |     "# compute training likelihoods of all sequences\n",
148 |     "logptrain_n = assay.get_loglikelihood(seq_n, assay.PNNKAA_LXK)\n",
149 |     "\n",
150 |     "n_lambda = phitestnuc_lxLxk.shape[0]\n",
151 |     "cov_lxt = np.zeros([n_lambda, n_trial])\n",
152 |     "avglen_lxt = np.zeros([n_lambda, n_trial])\n",
153 |     "fracinf_lxt = np.zeros([n_lambda, n_trial])\n",
154 |     "len_lxt = {(l, t): None for l, t in zip(range(n_lambda), range(n_trial))}\n",
155 |     "fit_lxt = {(l, t): None for l, t in zip(range(n_lambda), range(n_trial))}\n",
156 |     "\n",
157 |     "for l in range(n_lambda - 1, -1, -1):\n",
158 |     "    t0 = time.time()\n",
159 |     "    print(\"Test distribution with lambda = {:.1f}\".format(lambda_l[l]))\n",
160 |     "    \n",
161 |     "    # compute acceptance probabilities for all sequences (for rejection sampling from test distribution)\n",
162 |     "    paccept_n, logptest_n = assay.get_rejection_sampling_acceptance_probabilities(\n",
163 |     "        seq_n, phitestnuc_lxLxk[l], logptrain_n)\n",
164 |     "\n",
165 |     "    # compute (unnormalized) weights for all data\n",
166 |     "    w_n = np.exp(logptest_n - logptrain_n)\n",
167 |     "\n",
168 |     "    for t in range(n_trial):\n",
169 |     "        \n",
170 |     "        # partition held-out data into calibration data and test data\n",
171 |     "        # (i.e., samples from proposal distribution for rejection sampling from test distribution)\n",
172 |     "        shuffle_idx = np.random.permutation(n)\n",
173 |     "        cal_idx, test_idx = shuffle_idx[: n_cal], shuffle_idx[n_cal :]\n",
174 |     "        \n",
175 |     "        # sample from test distribution using rejection sampling\n",
176 |     "        testsamp_idx = assay.rejection_sample_from_test_distribution(paccept_n[test_idx])\n",
177 |     "        n_test = testsamp_idx.size\n",
178 |     "        if t == 0:\n",
179 |     "            print(\"  On trial 0, sampled {} sequences from the test distribution.\".format(n_test))\n",
180 |     "\n",
181 |     "        # fetch and normalize weights of calibration data\n",
182 |     "        p_sxm1 = np.hstack([np.tile(w_n[cal_idx], [n_test, 1]), w_n[test_idx[testsamp_idx]][:, None]])\n",
183 |     "        p_sxm1 /= np.sum(p_sxm1, axis=1, keepdims=True)\n",
184 |     "        \n",
185 |     "        # compute quantile of weighted calibration scores\n",
186 |     "        augscore_sxm1 = np.tile(np.hstack([score_n[cal_idx], [np.infty]]), (n_test, 1))\n",
187 |     "        q_sx1 = cal.get_weighted_quantile(1 - alpha, p_sxm1.T, augscore_sxm1.T)[:, None]\n",
188 |     "        \n",
189 |     "        # construct confidence intervals\n",
190 |     "        testpred_sx1 = pred_n[test_idx[testsamp_idx]][:, None]\n",
191 |     "        lu_sx2 =  np.hstack([testpred_sx1 - q_sx1, testpred_sx1 + q_sx1])\n",
192 |     "         \n",
193 |     "        # record confidence interval lengths, true fitnesses, and empirical coverage\n",
194 |     "        noninf_idx = np.where(np.logical_and(~np.isinf(lu_sx2[:, 0]), ~np.isinf(lu_sx2[:, 1])))[0]\n",
195 |     "        avglen_lxt[l, t] = np.mean(2 * q_sx1[noninf_idx]) if noninf_idx.size else np.nan\n",
196 |     "        fracinf_lxt[l, t] = (n_test - noninf_idx.size) / n_test\n",
197 |     "        len_lxt[(l, t)] = 2 * q_sx1.flatten()\n",
198 |     "        fit_lxt[(l, t)] = y_n[test_idx[testsamp_idx]]\n",
199 |     "        cov_lxt[l, t] = cal.get_split_coverage(lu_sx2, fit_lxt[(l, t)])\n",
200 |     "        \n",
201 |     "    print(\"  Empirical coverage: {:.4f}\\n  Average non-inf length: {:.2f}\\n  Fraction inf: {:.2f}\\n  ({:.1f} s)\".format(\n",
202 |     "        np.mean(cov_lxt[l]), np.nanmean(avglen_lxt[l]), np.mean(fracinf_lxt[l]), time.time() - t0))\n",
203 |     "    \n",
204 |     "    # save results after each lambda\n",
205 |     "    if save_results:\n",
206 |     "        np.savez(savefile, cov_lxt=cov_lxt, avglen_lxt=avglen_lxt,\n",
207 |     "                 fracinf_lxt=fracinf_lxt, len_lxt=len_lxt, fit_lxt=fit_lxt)"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "markdown",
212 |    "metadata": {},
213 |    "source": [
214 |     "Ditto, but with randomized staircase confidence sets to achieve exact coverage (Fig. 5)."
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 14,
220 |    "metadata": {},
221 |    "outputs": [
222 |     {
223 |      "name": "stdout",
224 |      "output_type": "stream",
225 |      "text": [
226 |       "Test distribution with lambda = 7.0\n",
227 |       "  On trial 0, sampled 6 sequences from the test distribution.\n",
228 |       "  Average non-inf length 5.80\n",
229 |       "  Fraction inf 0.17\n",
230 |       "109.6 s\n",
231 |       "Test distribution with lambda = 6.0\n",
232 |       "  On trial 0, sampled 15 sequences from the test distribution.\n",
233 |       "  Average non-inf length 5.49\n",
234 |       "  Fraction inf 0.06\n",
235 |       "150.0 s\n",
236 |       "Test distribution with lambda = 5.0\n",
237 |       "  On trial 0, sampled 40 sequences from the test distribution.\n",
238 |       "  Average non-inf length 5.06\n",
239 |       "  Fraction inf 0.01\n",
240 |       "228.0 s\n",
241 |       "Test distribution with lambda = 4.0\n",
242 |       "  On trial 0, sampled 128 sequences from the test distribution.\n",
243 |       "  Average non-inf length 4.81\n",
244 |       "  Fraction inf 0.00\n",
245 |       "338.8 s\n",
246 |       "Test distribution with lambda = 3.0\n",
247 |       "  On trial 0, sampled 472 sequences from the test distribution.\n",
248 |       "  Average non-inf length 4.75\n",
249 |       "  Fraction inf 0.00\n",
250 |       "771.5 s\n",
251 |       "Test distribution with lambda = 2.0\n",
252 |       "  On trial 0, sampled 3233 sequences from the test distribution.\n",
253 |       "  Average non-inf length 4.69\n",
254 |       "  Fraction inf 0.00\n",
255 |       "3919.5 s\n",
256 |       "Test distribution with lambda = 1.0\n",
257 |       "  On trial 0, sampled 7130 sequences from the test distribution.\n",
258 |       "  Average non-inf length 4.79\n",
259 |       "  Fraction inf 0.00\n",
260 |       "7445.0 s\n"
261 |      ]
262 |     }
263 |    ],
264 |    "source": [
265 |     "reload(cal)\n",
266 |     "n_trial = 500\n",
267 |     "alpha = 0.1\n",
268 |     "n_cal = 10000\n",
269 |     "save_results = True\n",
270 |     "savefile = '../aav/randomized-staircase-results.npz'\n",
271 |     "\n",
272 |     "# compute training likelihoods of all sequences\n",
273 |     "logptrain_n = assay.get_loglikelihood(seq_n, assay.PNNKAA_LXK)\n",
274 |     "\n",
275 |     "n_lambda = phitestnuc_lxLxk.shape[0]\n",
276 |     "avglen_lxt = np.zeros([n_lambda, n_trial])\n",
277 |     "fracinf_lxt = np.zeros([n_lambda, n_trial])\n",
278 |     "len_lxt = {(l, t): None for l, t in zip(range(n_lambda), range(n_trial))}\n",
279 |     "fit_lxt = {(l, t): None for l, t in zip(range(n_lambda), range(n_trial))}\n",
280 |     "cov_lxt = {(l, t): None for l, t in zip(range(n_lambda), range(n_trial))}\n",
281 |     "\n",
282 |     "for l in range(n_lambda - 1, -1, -1):\n",
283 |     "    t0 = time.time()\n",
284 |     "    print(\"Test distribution with lambda = {:.1f}\".format(lambda_l[l]))\n",
285 |     "    \n",
286 |     "    # compute acceptance probabilities for all sequences (for rejection sampling from test distribution)\n",
287 |     "    paccept_n, logptest_n = assay.get_rejection_sampling_acceptance_probabilities(\n",
288 |     "        seq_n, phitestnuc_lxLxk[l], logptrain_n)\n",
289 |     "\n",
290 |     "    # compute (unnormalized) weights for all data\n",
291 |     "    w_n = np.exp(logptest_n - logptrain_n)\n",
292 |     "\n",
293 |     "    for t in range(n_trial):\n",
294 |     "        \n",
295 |     "        # partition held-out data into calibration data and test data\n",
296 |     "        # (i.e., samples from proposal distribution for rejection sampling from test distribution)\n",
297 |     "        shuffle_idx = np.random.permutation(n)\n",
298 |     "        cal_idx, test_idx = shuffle_idx[: n_cal], shuffle_idx[n_cal :]\n",
299 |     "        \n",
300 |     "        # sample from test distribution using rejection sampling\n",
301 |     "        testsamp_idx = assay.rejection_sample_from_test_distribution(paccept_n[test_idx])\n",
302 |     "        n_test = testsamp_idx.size\n",
303 |     "        if t == 0:  # example of how many sequences are sampled from test distribution on a trial\n",
304 |     "            print(\"  On trial 0, sampled {} sequences from the test distribution.\".format(n_test))\n",
305 |     "\n",
306 |     "        # fetch and normalize weights of calibration data\n",
307 |     "        p_sxm1 = np.hstack([np.tile(w_n[cal_idx], [n_test, 1]), w_n[test_idx[testsamp_idx]][:, None]])\n",
308 |     "        p_sxm1 /= np.sum(p_sxm1, axis=1, keepdims=True)\n",
309 |     "        \n",
310 |     "        # construct randomized staircase confidence set\n",
311 |     "        testpred_s = pred_n[test_idx[testsamp_idx]]\n",
312 |     "        C_s = [cal.get_randomized_staircase_confidence_set(\n",
313 |     "            score_n[cal_idx], weights_m1, pred, alpha) for weights_m1, pred in zip(p_sxm1, testpred_s)]\n",
314 |     "         \n",
315 |     "        # record true fitnesses, empirical coverage, confidence set sizes\n",
316 |     "        fit_lxt[(l, t)] = y_n[test_idx[testsamp_idx]]\n",
317 |     "        cov_s, len_s = cal.get_randomized_staircase_coverage(C_s, fit_lxt[(l, t)])\n",
318 |     "        cov_lxt[(l, t)] = cov_s\n",
319 |     "        noninf_idx = np.where(~np.isinf(len_s))[0]\n",
320 |     "        avglen_lxt[l, t] = np.mean(len_s[noninf_idx]) if noninf_idx.size else np.nan\n",
321 |     "        fracinf_lxt[l, t] = (n_test - noninf_idx.size) / n_test\n",
322 |     "        len_lxt[(l, t)] = len_s\n",
323 |     "    \n",
324 |     "    cov = np.mean([np.mean(cov_lxt[(l, t)]) for t in range(n_trial)])\n",
325 |     "    print(\"  Empirical coverage: {:.4f}\\n  Average non-inf length: {:.2f}\\n  Fraction inf: {:.2f}\\n  ({:.1f} s)\".format(\n",
326 |     "        cov, np.nanmean(avglen_lxt[l]), np.mean(fracinf_lxt[l]), time.time() - t0))\n",
327 |     "        \n",
328 |     "    # save results after each lambda\n",
329 |     "    if save_results:\n",
330 |     "        np.savez(savefile, cov_lxt=cov_lxt, avglen_lxt=avglen_lxt,\n",
331 |     "                 fracinf_lxt=fracinf_lxt, len_lxt=len_lxt, fit_lxt=fit_lxt)"
332 |    ]
333 |   }
334 |  ],
335 |  "metadata": {
336 |   "kernelspec": {
337 |    "display_name": "TensorFlow-GPU-2.1.0",
338 |    "language": "python",
339 |    "name": "tf-gpu"
340 |   },
341 |   "language_info": {
342 |    "codemirror_mode": {
343 |     "name": "ipython",
344 |     "version": 3
345 |    },
346 |    "file_extension": ".py",
347 |    "mimetype": "text/x-python",
348 |    "name": "python",
349 |    "nbconvert_exporter": "python",
350 |    "pygments_lexer": "ipython3",
351 |    "version": "3.7.7"
352 |   }
353 |  },
354 |  "nbformat": 4,
355 |  "nbformat_minor": 4
356 | }
357 | 


--------------------------------------------------------------------------------
/notebooks/aav-figures.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "This notebook reproduces plotting Fig. 5."
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import time    \n",
 17 |     "import numpy as np\n",
 18 |     "\n",
 19 |     "import matplotlib.pyplot as plt\n",
 20 |     "import matplotlib.gridspec as gridspec\n",
 21 |     "plt.rcParams[\"font.size\"] = 8\n",
 22 |     "\n",
 23 |     "import seaborn as sns\n",
 24 |     "sns.set_style('whitegrid', {'grid.color': '0.9'})"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 2,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "# load lambdas and mean predicted fitnesses of test sequence distributions\n",
 34 |     "d = np.load('../aav/models/constrained_maxent_parameters.npz')\n",
 35 |     "\n",
 36 |     "# note that lambda in bioRxiv above corresponds to 1 / lambda for us\n",
 37 |     "lambda_l = (1 / d['temperature_l']).astype(int)\n",
 38 |     "meanpredfit_l = d['meanpredfit_l']"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 3,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "# load coverage and sizes of randomized staircase confidence sets (constructed in aav-experiments.ipynb)\n",
 48 |     "fname = '../aav/randomized-staircase-results.npz'\n",
 49 |     "d = np.load(fname, allow_pickle='True')\n",
 50 |     "cov_lxt = d['cov_lxt'].item()\n",
 51 |     "avglen_lxt = d['avglen_lxt']\n",
 52 |     "fracinf_lxt = d['fracinf_lxt']\n",
 53 |     "len_lxt = d['len_lxt'].item()\n",
 54 |     "fit_lxt = d['fit_lxt'].item()"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 4,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "# compute mean empirical coverage and mean true fitness per lambda\n",
 64 |     "n_lambda, n_trial = lambda_l.size, 500 \n",
 65 |     "cov_l = np.zeros([n_lambda])\n",
 66 |     "truefit_l = np.zeros([n_lambda])\n",
 67 |     "for l in range(n_lambda):\n",
 68 |     "        cov_l[l] = np.mean([np.mean(cov_lxt[(l, t)]) for t in range(n_trial)])\n",
 69 |     "        truefit_l[l] = np.mean([np.mean(fit_lxt[(l, t)]) for t in range(n_trial)])"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 67,
 75 |    "metadata": {},
 76 |    "outputs": [
 77 |     {
 78 |      "data": {
 79 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcMAAACUCAYAAADvekIlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOydd3hU1fa/3zMtkx4S0giBQAgkYKF3QaT4EwVF6YpY4AoXRREUy0UQketF9IId9Gq4gsAXLqggiiIgICBIlRICAdJ7b1Myc35/jDOmTksySeC8z8ND5pyzz14TNnuds/danyWIoigiISEhISFxEyNragMkJCQkJCSaGskZSkhISEjc9EjOUEJCQkLipkdyhhISEhISNz2SM5SQkJCQuOmRnKGEhISExE2P5AwlJCQkJG56JGfYiJSUlDBjxow6zx85coRVq1a50CKJGwHzuNq2bRsrV66scV4aVxLOYG2+Ki4uZubMmS62yLVIzrAR2bJlC/fdd1+d5wcMGMCvv/6KRqNxoVUSLR1pXEk0BtbGlbe3N6GhoZw8edLFVrkOyRk2Irt27WLYsGEkJSUxdepUxo0bx/jx44mLi7Nc07t3bw4cONCEVkq0NMzjCiApKYmHH36Yu+++m++++85yjTSuJBzFPK70ej1Lly5lzJgxjB07ll9++QWAYcOGsWvXria2svFQNLUBNyo6nY7CwkJ8fX1RqVTExsaiUqk4f/48K1eu5LPPPgOga9eunDlzhlGjRjWxxRItgcrjCuDixYt8/fXXaLVaJk6cyJ133omnp6c0riQcovK4Wr9+PeXl5Xz77beAaYkUTHPVRx991JRmNiqSM2wk8vPz8fLyAkCv1/OPf/yD+Ph4ZDIZRUVFluv8/f3JyclpKjMlWhiVxxXA4MGD8fT0xNPTk5iYGC5fvkz37t2lcSXhEJXH1dGjR5kxYwaCIADg4+MD3PhzlbRM2ki4ubmh1+sBiI2NpX379nz77bd89dVX6HQ6y3U6nQ43N7emMlOihVF5XAGWCav6Z2lcSThC5XElimKNcQWmMaVSqVxtmsuQnGEj4efnR3l5OaIoUlJSQlBQEIIgsH379irXJSYm0rFjxyayUqKlUXlcARw8eJDS0lLy8vK4ePEinTp1AqRxJeEYlcfVgAED2Lx5M6IoIoqiZSUrKSnphh5TkjNsRHr27MmFCxeYOnUqGzZsYNKkSRQUFFS55sSJEwwaNKiJLJRoiZjHFcBtt93G008/zZQpU5g3bx6enp6ANK4kHMc8riZNmoRKpWLMmDHcf//9nDp1CoDjx48zePDgJray8RCkeoaNx4kTJ9i9ezevvPJKrecLCgp4/vnn+fzzz11smURLRhpXEo2BrXH1xBNP8O9//9sSvHWjIQXQNCK9evXi+vXrdZ7PyMhg/vz5rjNI4oZAGlcSjYG1cVVcXMzkyZNvWEcI0puhRAskMzOTWbNmceXKFU6dOoVC8dczXXx8PIsXL0YURZYsWUJ0dHStxyQkJCQqI+0ZSrQ4/Pz8iI2NpXv37jXOrV69mnfffZfVq1ezevXqOo9JSEhIVEZaJpVocbi5udWZNlBYWEhoaChg0lqs65iEhIREZVq0Mzx9+nSVSdFgMCCXyx26hzNtbvR21dtotdpa38KaI0ajscbPtR2ri+rLrlB33pU1nGnTUto1VF8Gg6HFjKv6Un2uagycnSOagsa01dn5qkU7Qzc3N2JiYiyfc3JyaN26tUP3cKbNjd6uepuLFy863K+zODvRmpHJZDV+ru1YXajV6ipjCqRx1Vh9uXJcNTWV5yqzgLparW7QPpz9d2kKqtu6fPlyJkyYQFRUlNV2x44d48KFC/z8888sX76c8PDwGtc4O65atDOUaFgKikr4v50/k5aRTdvQIMbfexd+Pl62GzYgb731Fi+//LLT7X19fcnIyEAQBIu8VG3HJCSairlz5wKwdu3aJrak+RAWFsaxY8eIiooiISGBgwcPWs4NGjTI4iT79u1Lr169uHbtGt6+rVj71TdkZucRGhRQ7/nK5c5w+fLlnDt3jq5du/KPf/zDcvyll14iISEBtVrNxIkTGTNmjKtNu2kRRZG0zBzWbf2e/IIiQCQ1I5ut3+1lxpSxjdp39eUSexyhXq9n5syZxMXF8eSTTzJnzhxOnDjB7NmzeeaZZ5g3bx6iKLJ48WKAWo/d6GRmZpKXl2fzOoPBQHZ2tkP3dqaNuZ3BYCA4ONjhthI3LnFxcSiVSq5evWrX9Xv27OGuu+5i63d7SbieAoJAelZuvecrlzrD8+fPU15ezldffcXixYs5e/Yst912m+X8ypUrad++vStNuqkpKS3jwuXrnLt0lfzCYvIKChEEAZVCiYe7moxs25NpfXnyyScJDQ3l7rvvZtCgQSiVSpttlEolsbGxVY717dsXgOjoaDZu3FjlXG3HbnTy8vLo3LmzzX0ZvV5v1++8vm3AtDx47do1yRlKWCgtLWXnzp0sWLCApUuXAhAZGUlkZGSdbQ4dOsTSpUs5+P46jKKIALir3eo9X7nUGZ4+fZoBAwYAMHDgQM6cOWNxhoIgsHDhQvz8/Fi0aBFhYWGuNO2mwWAwcDUpjXOXrnI9OR3jn2mmXh7uBAa0QqPR4qZSotHpCA0MaHR7YmNjSU1N5ccff2TDhg34+/vzr3/9q9H7vRlobsEUzc0eiabH09OTBQsWAPDaa6/Z1eaNN94AICjAj4KiYgRBoFyrrfd85VJnWFRUZNnw9Pb25vLly5ZzZkf4+++/869//Yv33nvP5v0MBkOVkiIVFRUOlxhxpk1LbJdfWEz8tRSuJKah0ZqqZsgEgXZhQXSOaEtYSGtKyjT88MsxsvMKCPT3464B3V1SskUulyMIAkajsUpFBgmJhuLixYt88MEHlJSU8Pnnn7N27Vpmz57d1GZJ1IN7hg3kWvL/MBiMhAaa9gzrg0udoY+PjyXPq6SkxFInC0yJ1GCq0P3OO+/YdT+5XF4lIkmK+vuLgqIS/m/HHpJSM1Eo5Hi4qy1P5sGBAdwa3ZGYThF4uP8V0RYE/D2iXY2+nNkfspcnnngCf39/7r77bj766COp7JBEo/Dmm2/y4Ycf8vTTTyOXyzl69GiTOUMpHqJhULspae3vR0ArXx6bMLre93OpM+zevTubN29m9OjRHD58mAcffNByrqSkBC8vL65evVrFSd7sFBSVsPW7vaRlZBMaEsj9o+5ApVRQVq6lXKOlrFxT7W8t5RoNl64mUa7RASLoQF9hYNiAntwS3ZHg1v71Sl9oSD755BPkcjm5ubk18vsk6s/f/va3GsdGjhzJhAkT0Gg0PP3001XSTeqKcLx27Rpr1qxhxIgRDB06tNHsbSxEUcTX19cy7g0GQ5PZIjnDhkGrM60iuakc37+uDZuzz4EDBxgyZAiJiYnExsYyevRo+vTp41Rn3bp1Q6VSMXXqVKKjowkNDeXjjz9m9uzZLFiwgMJCUwDHkiVLnLr/jUZhcSmfbfyW7LwCEEUuJSTx70830drfz2bbco3WEgzj7m562xpxh3P/bo3J7t27Wb9+PWFhYaSkpDB16lQeeOCBpjZLohodOnRg3LhxFBcX27w2Li6Ow4cPk5GRQWhoKI8//rgLLLTOAw88wOzZs0lJSeHZZ5+t8iDuasxl3MyrYZWP5+TkOL1V4GyUb1PQELbqKyoY3D0KhUJeJbewoqLCqfvZdIaff/45Q4YM4ZNPPmHixIksW7aM//3vf051BlRJpwAsSxWffPKJ0/e8kSguLSP+ahLxV5NIy8wlIzvX8jQrCFBRYcDX2xN3tRvuajUe7m64q93wcFf/+bfp+Ne7fyEnrwClQoHeYHBJMIwzbNiwgQ0bNqBQKNDr9TzyyCM2nWFd6Tm//vorq1evxs3NjSVLlhAZGcn777/PTz/9hK+vL3fddVezmJhdibVcNrVazccff+xUZOhvv/3Ghg0bLJ8ffvhh+vXrB8DWrVtZuHAhK1eubFKnU5kJEyYwYsQIkpOTadu2Lf7+/k1my4svvgjU/LdJT08nIiICtVrt1MqNs1G+TUFD2Fqu0VJYXILazc2SX2gwGDh//rxT97PpDEtLS0lLS0Mul9OjRw88PDyc6kiibkrLyom/lsylhCRSM/56WlIq5LTy8Uar16NWKTEYjYQGtbYrl2bK/aMsy6ttQgLrvbncWAiCQHZ2NqGhoWRnZ9ucBKyl53z44YfExsZSUlLC8uXLWbVqFWDKYR04cGCjf5cbmezsbHbv3o1GoyEqKop+/fpZnF91AgIC2L59O6mpqRQUFDSLsj933303c+fO5d577wVg/vz5dscmuBJ3d/emNqHFYC64JKs0Z9QnYtmmM3zqqadYtWoVs2fPRqvVcvvttzvdmcRflJVruHI9hbiERFLTsy0pDgq5nA7t2tClYzgd2oVRVq5xyqn5+XgxY8rYZi/RtHjxYpYuXUpRURE+Pj42k+KtpecAeHh44OHhQVJSkuXYypUr8fHxYeHChTWk1iTsIzAw0BL6bmsZz7zaM3HixEa3y14CAwO5dOkSZ8+eZeHChWRlZTW1SVaxtddrVrExU9sKwPvvv8/06dOtxmDs2rWL0aOrBp+cPHmS0NBQi7i9vbz55ptMmTIFrVZb5f9ZbXbs27ePIUOG8M477/Dqq6/Wer9t27YRExNDTExMrXaa58yGin+w6QzDwsJYsWIFubm5fPnll4wbN65BOr7ZqBzdqVIpcVe7WQIX5HIZkW3b0KVjOyIjwlBVWj5QKVuGU3OWtm3b8vHHH1s+26oqYS09B0xRt4WFhRY1i2nTpvHMM89w/fp1XnnlFb766iur96+ergMtM2XHYDDYvffkzB5Vffa1XJGuUx1BEHj++ef56aefmD17NkVFRS63wdVcv36dHTt20LNnT9577z0mTpyIt7c358+fJyMjg4ULF3Lq1CkCAgLYtWsXrVu3plevXmRkZODu7s5bb73F8OHDOXnyJIsXL2b58uV06tSJ/fv3W/7PZmZmsnbtWiIiIgDTCkJxcTH79+/H29ubLl261GrHhQsX6N+/P1evXmXTpk0kJSUxdepU9uzZw2OPPcabb76Jh4cHaWlptGrVilOnTtGnTx8+/fRTfHx86N27N4d+PYyHpydJidd58YUX6r30bdMZvvXWW6xbt47Vq1fTt29fXnnlFTZv3lyvTm82KgwGPtv4LZnZeYiIaHQ6yrVa+twWQ5fIdkS2b4vaTdXUZjYJTz/9dBU1meqfq2MtPeeFF15g3rx5hIWF0bNnT+CvIAXzf1ZbVE/XgZaZspOdnY1CobD51OxKBRqdTlfl9+vKYA9zBOfIkSOJjIzkP//5j8v6dgZbe7326JpGREQwZswYioqK6NGjB8OGDWP//v14eHjUeIgcMGAAAwYM4Msvv6RNmzaAabl77Nix/PHHH+Tm5uLl5cWkSZM4ceKEpd2JEycYOXIkXbp04aOPPrIcv/XWW4mPj6e0tLRWO86cOWPpY/LkySxdurTKWDUajbRv356YmBhCQkIAOH78OHfeeSe9e/dmxYoVqNXu3DV8BFfi40hISKi3M7RZ3Fej0aDT6dDr9dx3330tZoO2OSCKIleup7Buyy4ysnNBAJVSia+3Fx5qNQ/ecyfdOne8aR0hmMqtWPtcne7du3P06FEADh8+XKVUS48ePfjyyy+ZNWsWHTt2BP5608zLy2vScHpXo1QqLdURmgtardbl88eFCxcACA8P58iRIxw5coTMzEzuu+8+l9pRmfHjxzN+/PhG76dDhw5s2rSJoqIiVCrTHBMfH49ara5Rysz84GTeh4Oq+28BAQEUFxezefNmSktLLcd79erFr7/+yp49e6rcz9xnfHx8rXaYyc3NZdOmTbi5uREYGMjly5fZsmULer2edu3asWvXLtLS0gDo06cP+/fvZ+3atYwYMQIRs2CHzGZpNnsQxMrfvhY2bNjA3r17mTt3LtHR0SxevJi33nqr3h03BBcvXmy2JZyy8wr45chJElMzASgsLkFAwF2tskR3OiIq29QlnBprr23VqlWkpKTQo0cPTp8+TZs2bZg3b57VNsuWLePChQtER0cze/Zstm7dyuzZs/n44485fPgwrVq14vXXX6dVq1a89tprxMfHI4oi8+fPt2iY1kVt37U5jSt72xUUFJCenm6znStrgBqNRsLCwixv6405rsxs376dcePG8cEHH9Q49/TTTzdq35Wx57vW9/fR2NGkv/zyCykpKYAperg+NISt+YXFaHU6/Hy8q7xQVNe8thebzhBMQQvZ2dkMHz6c9PT0ZqMb2hydYVm5hsO//8EfcQkYRRG1m4oBvW4hom0o277fXyUQxpFyIzeqMzTf/9q1a3Ts2JHo6OhG68deW24EZ9iY7RqynqGrApqys7MJDAykvLycffv20a9fPwICXJduVPm7ZmaaHpCrC5Y3d2fYkDSErbkFRej1evx9fVBVSrx31hna3DNcunQpHh4eHD16lJEjR7Jo0SI+//xzhzu60TEYDJy+cJkjJ86h1emRCQLdu0UxsNetuKtNSe83ciBMfTBHjElINBYLFixg3bp1rFq1Cn9/f7766ivWr1/fJLYsWrQIqH1fsLy83Ok8w5sN0fhnNKnsr99VfbZCbDrDhIQE1q1bx7Rp0+rd2Y2IKIpcS05j/5FT5Bea1Dki2oYwdEBPWrdq+vwqCQkJU/AOQH5+Pi+//DIHDhyocU1dYg7Lli0jLi4OrVbLSy+9RK9evQBTPMXw4cN5++23GySPNTQ0lNTU1HpF6raUyiANYWtxaRmiUSQ3y72KpKCzDxI2naGHh4cl8ufChQt4e3s71dGNSE5+Ib8cOcn1lAwAWvl6M7R/Dzq2ayM92UlINCP69u3LtGnTmDFjBlqtFrVaXeW8NTGHhQsXolQqSU1N5fXXX7e80W3evNlSgb0h8PPzqyHR5ggtadWpIWx9/4st6PQVzJn+UJU9w8rSbI5g0xm++eabfPrpp7i7u/Ptt99aCjDerJjzBa8npyNicoAe7moG9LyF7t2iWsyTWXNl2bJlNST7JJoHlUXjndn3bkqqB2VVT62wJuZg3tsqKyuz7GnrdDrOnj1reUuUcC1GUUSnN2mQqlwl1O3v78+cOXMsSarNLVzb1WzYvpvE1AxLKK/BYOSJSfdVKYUk4TySI2xeiKJIUUkZufmFbPt+P3kFhQBcT0nny20/8PdHH0Qus5mh1eyxJeYwZ84czp49y4oVKwCTOsrYsWM5e/asXfevLDZgnkMbWnzAWcGGpqC+tmp1eioqKlAq5OTl5jaITTad4aJFi0hLSyMoKMhy7J///GeDdN7SuHwtmevJ6aZ8QYUCH28vKgwGyRHWg6KiIjZt2kRycjLh4eFMmjTJppalI0LdmZmZvPDCC+h0OubOnXvDaZQ21NuaKIqUlJaRk19Ibn4RufmF5OQXkpdfaHkCrywaDwYSU9L5aN3/aBsaRPuwENq1DSHAz6dFbhFYE3MAk+5teno6zz77LH369OHQoUN88MEHdjvDymIDM2bMAGjwJc2baZm0sLgUhUKBl6d7jfs4K+Zg0xmmpKTwxRdfOHXzGwWjKHL05DmOnDiHQiEHAXy8PNAbKpptNYiWwty5c5k0aRIjR44kLi6OZ555hv/+9791Xu+oUPenn37Kc889R5cuXZg1a5ZDztARRyOKIkZRpKCwmP99v5/0zGxCgwMZP3oYrXy9bTqI6n09NHoYXp7uaLU6NDo9Op0OjVaPVqdDa/5bp+fX42cpKilFJggkJKby4br/0evWLshkMmSCgCATkMtkps8yAZlg+luj1fH72Yvk5hehdlPh6+2FoY7EZQ93NQGtfNDp9ZSVa1DK5Wh1ehQKOTp9BVeT0riaZEqM9vJwp11YMO3CQmgfFoyXZ/MQ9n/88cerzGPPP/887777ruWztVqrOp0OlUqFp6cn7u7u5Obmkp6ezpNPPklSUhL79++nW7dudguSDxkypOG+WAtn+fLlTJgwwebea3JyMl9//TUeHh5MmjSJ3LxCcvIKyMrJ57ON3zbIkr1d2qTr1q2jc+fOlmPmtfWbAZ1ezw/7f+PytWRkgsA9wwZw6WoS6c28GkRLQalUcs899wAmxQxb5cEcFeq+dOkSr776KoIg4OnpaSkiXReJiYn8+9//BuDC5WsUl5QT0akLJbf04K0PYjn04zeIAKKIiMkJdrutJzG39aS8rJStX8VaBIQBPlr1Nn0H3kFMt9spKS7k+2+2IAggIJgcpAADBg9D7dOarMwMjuz/AYAPV71tKVrab/Aw2nXoRHZmOvt/3FnF3jKNlp79hxAYEkZ2Rionjx5gx5+pPGbuHHUfgcGhJF27wm+H9gGg0eosdvYfOgpN6yA0RTmcPXEUtZsKtZsKtz///ufy5QQHB7Nt+zesen8DZeUaPNzVlkCxv82ZS0FxOTt27ODk8aNV+la7qXhuwUskpmXx6y97yUi+Rsd2bVCplDbFFRqCo0ePcvToURITE1m9ejVgWrKsLtRtrdbqc889R3FxMQaDgeeff57g4GDLOH3//ffp1auXQ5U5EhMTAWjfvn0DfcuWS1hYGMeOHSMqKoqEhAQOHjxoOTdo0CCLk/y///s//Pz8LPmJ3+07gr6iApVSSXp2Llu/2+uQiElt2HSGISEhFBcXV9Gju1mcYWFxCd/8eJDs3ALcVEruHT6QDuFtGNKve4takmjOyOVynnzySWJiYoiLi0OhUFgmrWeffbbG9Y4KdRsMBstbmZeXF0VFRVadodFotOzplJVrAJPTM4oiOq0efS2FQ/UGAxUVFRgMBoyiiFDlfiIGgwGdXo9Wp681NamwuASj0gujaHozEzHtRQPIZTLc1Sr8fb0QdT54ebgjk5ve9uQyGdl5haiUCrw81BQqFbi7qQhu3coiqyWKIt06tSMsvB1uYjmXz3mDCOnZeShkJofs7emOm0rJgP63k5tS9fdZodeTm5uLXC7HUKEjom0wRqMRmUyG0Wj6LkGtvImKaEtualdy065RWq6hrFxDebmWco2WQ8fOgCCg1ekpLS/nyvVkOrZrU+e/QUMSHh6OTCYjOTmZgQMHIooiCoWi1qoQddVaray5WZ1nnnnGYZvefPNNwLr+6M1AXFwcSqXS8n/VGhqNhiFDhpCRkcH+/fvJzs1HEAQUcjnubm5kZOfV2x67UiueeOIJy+ebRaQ7OT2LHT8dolyjpZWvNw/cPQR/v7pLoUg4R+WxZc/ykaNC3ZWje2vbC6pOhw4dLMu0n238lrTMHBRyGRUGI0Gt/Vn+8vfIZALCn8uNgiAgk8kQMOU3hbUJJT07F6VcbpHde3LyGJNDNRp5bf5TGI0iRqMRoygi/vn3V1//SFaOBxOnzcBgFAkNCmDm1Ptr2Lfg6RlVPldeXu3bpw8rliysc7norsF9efqpJyzfrbqdY8eOZezYup+uJ0yYwIQJE+p8EHzkkUd45JFHLJ8NBgNpWbmsWb8do1GkW/c+3DH0LjQ6Pf+Y+5jTIfCOEBYWRlhYGNHR0WzatInCwkKef/55Dhw4wLBhwxq9f4naKSsrY+fOnSxYsMCSoRAZGUlkZGSt1z/44INs2bKFiooKnnrqKU7EZ1BWrkGhkFOu1TbIdlWdYWBFRUUkJSWxe/dukpOTSU5O5vr16/zwww/17rS5c/biFbZ+t5dyjZaItqFMfWCU5Agbib59++Lt7Y3RaMRgMGAwGOjbt2+dGqKOCnV36dKFU6dOUVZWRmlpqdW3wuqMv/cu2gS3pqLCQJvg1kweOwIPdzVqNzfcVEqUCgUKudy0N/fn2+f4e+8iNDAAnU5PaGAA4++9y+Qw/3yKVSmVqN1UeLir8fJwx9vLE19vL6bcP4qwkEAMBiNtglsz4b7hdtlorlv592n3M2PKWLv3TWqzs6GRy+WEhwYR0TYUd3c3fLw8KNfpCAl0fZX5F198kejoaE6fPo1cLmfdunUut0HiLzw8PFiwYAGApU6mNWJiYnjllVd47bXXCA4Oxs/XG6VCgSjSYOO3zjfDY8eO8fPPP5OammpZJlAoFEyePLnenTZXDEYj+w+f5PQF01JR79uiuaNf9yqVlCUallmzZhEUFFQlWtnaMry1vZ3qQt1gitx78cUX0Wq1Di9pOVMg2dmiyq4uxuzK/sbfe5dTBaobEvMy22effQaAHZLMEs2UwuISyjVa2oYGMfvRBxtsfq7TGY4YMYIRI0aQnp7ucMXjlki5RsuOnw6RnJ6FXC5j5B196da5Q1ObdcOj0WgcFnKoa29n9uzZlp/NhISEWI1OlWh8XO3oayMiIoK1a9dSWFhIbGxsgyrHSLiW5DRT8FPbkMAGfVGp0xmalUDmz59vWQISRRFBENiwYUODGdAcyM4r4JvdBygsLsXTQ83YkXfQJlgKjnEFDz30EMuXLycqKuqvpUYX1HqTuLlYsmQJ+/btY8yYMbRr147HHnusyWwx5xlKOEdymqnqR3ibYBtXOkadztBcRWD+/Pk3pOSQOfAgMTUDg8GIn48XYSGBjB11B97NJDfqZmDjxo0MHToUhcJmLJeEhNOYA2Y6duzIF198gY+PD717924SW2zV1JSoG1EUSU7/882wTZCNqx2jzgCaDRs2cO3aNVasWGEJoDH/qS/Lly9n6tSpLFu2rMrx+Ph4pkyZwuTJk4mLi6t3P9bY+t1ektIyKddo0elNIfOTxgyXHKGL8fPz46mnnmLcuHGWPxISDY257Nwnn3zC/fff36QqWvHx8cTHxzdZ/y2ZwuISikvKULupaO3vvKh5bdT5OP7ss8/yxRdfVAmgMVOfgWRNQWT16tW8++67yGQylixZwscff+x0P7ZIy8xBq9UhCAJeHu6IItLbSRMgCAJz5sypskxaW36hhER9KC0tJS0tDblcTo8ePfDwaLqH3pUrVwJSnqEzmPcLw0ODGjywsc7Zf+jQoQwdOpT777+/QZdJrSmIFBYWWoJ1zLlkjYF579NgNOKmVCKTyZok3FsCpk+f3tQmSNwEPPXUU6xatYrZs2ej1Wq5/fbbm9okCSdI+XOJtKH3C8GOpPuG3i+0piBirKSNaKxDJ7EylZXgwX4l9CvXU114ttoAACAASURBVJHLZKiUCpRKOQF+3tw1oLtDKurOqq63hHauVL+PiYlh48aNFBYWMm/evCpyTHVRl1D3999/z3/+8x8EQeCpp55ixIgRvPTSSyQkJKBWq5k4cSJjxoxpzK8j0UwxR8ebMee4SbQcRFG0BM809H4h2OEMGxprCiKVqxXL7CgLU1kJHuxTQi8tK+fE+Su4uakYO+oOQgJ8nAr3djZMvCW0q97GWRV4e3jhhReYOnUqa9asQaFQsG7dOqvKINaW2detW8eXX36JIAjMmDHDMvmtXLlS0oGUkGjhFBSVUFxajrvajYBW9mvB2otNj1NcXMzatWt5++23MRgM7Nu3r14dWlMQ8fX1JSMjg8zMTIeUQuxFFEV+PvQ7Gq2OiLahUh5hM8CcDG2WTbOVDF3bMruZDh06UF5eTllZmWX8CILAwoULmTVrFqmpqY30LSQkJBoby1thI+wXgh1vhpWf3M0yRvXR9LOmIPLMM88wb948RFFk8eLFTvdRF/HXkrl8PQWVUsHIIX1aZN21Gw1Hk6GtLbOPHDmScePGYTQaLUFeCxcuxM/Pj99//51//etfvPfee1bvr9Vqa9XMdObt2Nk36pbQriH60mq1Tt2jIfjf//7HQw891CR9P/30003Sb0vnr/3Chl8iBTucYWPIGNWlIBIdHc3GjRvrff/aKNdo2fvr7wAM7d8DHy/PRulHwjEqJ0OHh4fbTIa2tsy+evVqvvvuOwBmzpzJ4MGD8fMzhV/37t2bd955x6Y9lVcqJFo+5mC56nTr1q0JrDFRueSYhH2Y9gsbL3gG7FgmvVFkjPYdPkFZuZbwNkHcEl27MrpE0zBs2DBmzJjB8OG2xamtLbOrVCrUajXu7u7o9Xrgr6jkq1ev2qxYIXHj8eijjwKwaNGiKsejo6ObwhwAzp49y9mzZ5us/5ZIQVExJWXleLi7EdBIRRNsvhk2JxkjZ0lITOXilUSUCjmjhvSVhLdbMNaW2adMmcKUKVMAmDRpEmCKGiwsLEQQBJYsWdKElks0BR4eHsyfP5/ffvuNF198EfjrbXHFihVVrq0rSnnZsmXExcWh1Wp56aWX6NWrF6+99hrx8fEIgsDixYsdcq4ffPABIOUZOoJFjzQ0qNG2t2w6w+PHj+Pl5WXJyzl+/Dh9+vRpFGMaA41Wy56DxwEY3Od2/Hy8m9giifpS1zL7gw8+yIMPPljl3CeffOIyuySaH2vWrCEzM5OVK1fy3HPP1bnNYy1KeeHChSiVSlJTU3n99ddZu3YtM2fOJDw8nOvXr/POO+/w/vvvu/Jr3XRY9EhDG2eJFOxwhr/99htgepoyPwm1JGf4y9HTlJSV0ya4Nd1v6dzU5khISLiY4OBgXn/9dY4ePUpRUZHleFhYmOVna2IgSqUSMBWkNb8BmoO4FAqFXWlgEs4jiiJJaY0bPAN2OMPqkU9///vfG82YhuZ6cjrnLl1FIZdz99B+0vKohMRNyowZMxg6dGiVupmVsRalDDBnzhzOnj1bY2n13XffZdq0aTb7rywQotFoABpc2MKVYhn1xRFbC4pKKCouwV2twliha7TvaNMZbt261fJzdnY2+fn5jWJIQ6PV6fnx4DEABva+VapULyFxE+Pj48NTTz1l9XxdUcoAH374Ienp6Tz77LOWN8jY2FgiIyPtqn5RWSBErVYDNHhtx6asF+kojtiakpWPQqEgsn1bAgMDbV7vbNqPTWdoToYWBIGoqChLdFZz5+Cx0xSXlBES6E+vW7s0tTkSLYQTJ07UWPaqKzzfGs60aSntHG1jMBgoLtVgMBqRy2R4e6qRy+UYjUaXlYeTyWRWBeG7d+/O5s2bGT16NIcPH66y96zT6VCpVHh6euLu7g7AoUOHOHXqFKtWrXLYFkkKzjFSKgXPNCZWnaEoiuzcuZP//Oc/jWpEQ5OUlsmZC1eQy2SMGtpPWtNvxqSlpfHJJ59QWlrKihUr2LZtGxMmTGgye2QyGT169KhyrCEk7W6kdo62WfT2WkrKygHTQ7Wnh5o3FvyNU6dO2WxbV4Tnxx9/zIYNG3jooYeYN28eAAUFBSxevJj8/HwGDBhgCawCbEbBW4tSfu655yguLsZgMPD8888D8MYbb+Dl5cWjjz5Khw4dWLp0qd2/j86dpdgFe6lcv7Cx8gvNWHWGgiAQERHBrl276Natm8WpmNfWmyM6fQU/HTAtj/br0Y3ABq55JdGwvPLKKyxatIjXX38duVzOzp07bTpDR0LgJaHupiUtM9viCAHkMoGyco1dba1FeE6YMIEePXpw5MgRy/UffPABc+fOJTKyZh5x79692b17N0lJSYSHh3P33XfXuKauKOXqJewAdu/ebdd3qI1jx0zzk1Tk1za5BUWUlWvw9FDTyrdxMwFsvjKVlZVx8OBBPvnkEz766KNaB0Zz4vDvZykoKiEwwI++3WOa2hwJGxgMhiqTl61qJZUnSL1eXyV5eeHChaxfv55Vq1axZs0ay/GVK1fy5ZdfSo7QhRhFkWOnL7Dpmz2WY3KZgMEo4uGutuse1nRoW7duXWOp9vLly6xZs4Zp06bVeOt84YUXSExMpGvXriQmJvLCCy84+9XqzWeffWZR9JKwTuWUisaWz7S5ZzhgwADGjh1r+fzTTz81qkH1ITMnn5PnLiETBO4e2s+y3ynRfOnfvz+vvfYaWVlZLFu2jEGDBlm93tEQeLNQt5+fH4sWLaoSTi/ROJSWa/hh31Gup6Sj0+nx8nCnXKNFFEU8PdTMnHq/XfexFeFZnVOnTrF9+3Z8fX155plnqkg7ZmZmWuT47rjjDh555BEnv52EK0lxQUqFGavO0GAwsGXLFsaMGYMoipbPI0eObHTDHKXCYODg8T8QRejbI4bg1lKx3pbAnDlziI+PZ8CAAXTo0MGmkoejIfD2CHVv3ryZzZs3AybZroyMjCrnRVGsccwWzrRpKe2stdHpKyguLaNzRCiR4UEYjSIKhRx9RQVtggL+fLo3tS8sLKwSqDJp0iSLchDYjvCsTkREhGWVoXqcQHBwMB9//DFdu3bl3LlzdkUlSjQtxir7hU3oDLdv3862bduIi4tj+vTpiKKISqViyJAhjW6UoxQUlbBm/Xaycwvw9FAT0ymiqU2SsJPHH3+cOXPmcM899wDwz3/+k5dffrnO6x0NgbdHqLvyJHzq1ClCQkKqnJcCaGy3MYoiv508z9GT5zCKImEhgQS08uHsxQTu6Hs7fbvfUqNdeno627Ztq7MfaxGetREREUFWVhZeXl4YDIYq51asWMFPP/3EpUuX6Nixo9U0C4nmQV5+IeUaLV6e7jWUw5YvX86ECRNsamXv2bOHo0eP0rZtW6ZPn251qbVOZzhu3DjGjRtXZdO6ubLxmx/JzitAEEAQZGz/4RdmTBlru6FEk1NSUsLWrVs5f/4806dPr7V8UmUcDYEvKSnBy8tLEupuRErKytm19zDJaVkIAvTr0ZWBvW7l6MnzKORy2oU5FwVoLcJzy5YtbNy4kYKCAoqKili8eDFz585l/vz5aDSaGmIhP/74o+WBSxRFfvjhB8tnieaJpUpFLfuFYWFhHDt2jKioKBISEjh48KDlXGhoKDExpngRd3d33N3dKS8vx2g0Wt06s7ln2NwdIUBObgFKhQI3lRIvT3cysvOa2iQJO1Gr1bz11lvExsaycOFCizpHXTgaAi8JdTcu11PS+X7fEcrKtXi4qxk9bADt24aw7/AJ2oYG8beH78dd7eb0/euK8JwwYUKNqONOnTrx5Zdf1nqfTZs2WZyfIAhVPruaV199tUn6bUkUFJXw3d7DFBQVIwgm4RQ/H1PB7ri4OJRKJVevXrV5n0GDBjFo0CB+/PFHjh07Zok3qA2bzrAlEBrcmvTsXJRyOeVaLaGBAU1tkoSd9O/fHzDlgR07dsyuaGVHQuAloe7GwWg0cvjEHxw7fQFRhHZtghl91wA8PdxJzcjm5Ll4/rh0lScnN48IXp1OR2FhIb6+vhQUFDRpYeH27ds3Wd8thf/b+TOFRSUIgkBxSRlbv9vLjCljKS0tZefOnSxYsMCS2xkZGVklIr3y6tJvv/3GmTNnSElJseSj1oVNZ1heXs6RI0eqCNw+8MADDn+5xmT8vXex9bu9pGVk0yYkkPH33tXUJknYICsri6CgIMaOHUtycjJgWt544403mtgyibooKCph63d7SUnPQhAE3NVuKBUKBva+hb49uiETBIyiiJ+vN8MH9TZFj9qZRtHYvPDCC/z9739HEAQEQbCUc2oKDhw4ANAs4y+aC0mpmSCASqnE0+Ov1T5PT0+Lgs9rr71m8z79+vWjX79+dvVp0xk++eSTVgVumwN+Pl7MmDK2RWnz3ex88803zJw5s9a3uX/+859NYJGELcyOsKxcg1EU0VcYeOqRBwivJJN1Li6Bg8fOcOeAnnTr3MFltqWkpNC2bds6z/fs2ZMNGza4zB5rrF+/HpCcYV2Ulpn290RRNKXluGi1z6YztCVwKyHhDDNnzgRMUWGCICCKIn/88Qddukg6ss0VkyPUIgJqlQo3lbKKIzRfo9HqUChck+O7evVqAC5dukRkZCTz5893Sb8SjcfRk+fx8/FCq9djMBoJDQxwyWqfTWdoS+BWQqI+PPbYY6xbt47Vq1eTl5dHRkaGVAG8GZKUmoFOX4FRNKJ2U+GmUhEaVPVp/Xz8VQb0uoVbYzrRNsQ1eXzmuejy5csUFha6pE+JxiO/sIizcVdQKhQ8MXkMrVv5uqxvm87QlsCthER9MMuvpaam8vbbbzNlypQmtkiiOklpmWzffQA/Hy8MBiM6vZ7QoKpP6/mFRfx04DiCIDBjyphGl84ys3r1avR6PefPn+eLL76o87qWEPsgAYeOn8VoFLmlS0eXOkKwwxn27duXpKQksrKyEEXRFTZJ3ESEhYXx+OOP88ADD1BRUSFJ6DUzktMy2f7DL1RUGLi9axQjh/QlLze3xt68Sqmka1QECAKeHu4us8/8Znjo0CGr17WE2IebnfSsXOKvJqOQyxnY+1aX92/TGS5dupTi4mJOnjxJjx490Gg09OnTx6nOSkpKWLBgAQUFBUyePLnGk9ndd99tGayLFy+mU6dOTvUj0XJ46623qKioQKFQYDQa+fjjj5vaJIk/SU7PsjjCbp07MHJIX2S1vPElpmRw8Nhphg3sRWiwawPYVq9eTWRkJN7e1isaNKfYByliuiaiKHLwt9MA9LilM96eHi63waYzvHTpEhs2bGDatGmsXLmyhrKDI2zZsoV7772X0aNH8+ijjzJ69GhUKpXlvL+/f51JsxI3LgqFaRjKZDKbk5qEa0j50xHq/3SEo4b2q9URApz4I47MnHxSM7IJc9FeoZmOHTui0+m4cuUKQ4cOrfO65hT7EBzcuHX5WiLXU9JJTs9C7aZqsmpDdlW6NxqN+Pj48PXXX5OYmOh0Z6dOnWLx4sXI5XKio6O5du1alejBwsJCHn74YSIjI3n11Vdxc3NeuUJCQsI5UjOy2fbDL+j1FXSNirDqCNOzcrln2ADOx1+jezfrOpGNwfHjx1EoFHTt2tXqdfbEPjhSJzM+Pp7FixcjiiJLliyxKTBfmR9//BGAUaNG2d3mRsYoihw8ZirP1bd7V9RNNO/bdIYrV67EaDSyZMkSdu7caakG4AzFxcV4eZkkdby8vKpsZgN89dVX+Pn58cknn7B582YeffRRq/czGAzk5ORYPldUVFT5bA/OtLnR2znblzNIgQ3Ni9SMbLZ9v9/iCO++s3+djlCj1bL9h/0o5HKmPDAKRRPs9z766KP88ssvdO/e3ep1tmIfrBUSXrhwIUqlktTUVF5//XXWrl3L6tWreffdd5HJZCxZssSh5f2tW7cCkjM0E3flOtm5BXh7edDjls5NZodNZ+ju7s7nn39OYWEh8+bN4+DBgxYR1LrIzs62aEOaad26Nd7e3pSUlODm5kZpaWmNJTFzhYGRI0cSGxtr03i5XF5lI1+qLtAw7aq3yc7Odrhfe5ECG5oPaZkmR6jTVxDTybojBCjXaPHx8kSlVOLlwqCZynz//fc8+eSTfP7551a3cGzFPjhaJ7OwsJDQ0FAASxUVCcepMBj49bipQPeg3rc2yQOVGZvO8IUXXmDq1KmsWbMGhULBunXrGDZsmNU2gYGBte79ffHFFxw5coR77rmHixcv0rFjR8s5nU4HgEql4uTJk5aadY5QVFTk8MRtMBicmuxv1Hb+/v4ujehsToENNzNpmdn8b5fZEbbn/w2z7gjzCor4/cxFHrh7CHK53GWpFNWJiIjAw8PD5nxhK/bB0TqZ5pSg6j/XReVVLLMYfUOvvrhyRae+mG09d+kaeQVFtPL1IqiVd5Pab9MZajQahgwZwmeffQZQr/SKCRMmMH/+fNavX8/EiRNRqVQcOHAAo9HILbfcwsyZM/Hw8MDHx4e3337b4fuXlZURExPj0GSu1+stT36OcCO2MxgMxMfHu7TwaXMKbLhZScvMsTjC6Mh2/D8bb4QAB387zZXEVORyOcMH93aRpTUx607ef//9Vq+zFfvgaJ3MysWDqxcSrqt/82qLWm3Sa21o6ciWJEeZk5ODl7cP568koVAoGHFH3wabd5xdybLpDCMiIli7di2FhYXExsbaLKZoDS8vL9asWVPlWGV9vu3btzt9bzNSnprzNMXvThJ1aFqycgvYe+S0xRHeM2yAzcldo9UxpH93FAo5/Xvd4iJLaychIcGuJXZbsQ+O1sn09fUlIyMDQRAscRBNhbnQbatWraxed+3aNdasWcOIESMYMWJEg/VryydcvnyZQ4cOcf36dZ599ln8/f0B+P3MRTRaHW1Dg+gQ3qbe9tQXm480S5YsISoqijFjxtCuXbsa5XNuVN5///0aAT7V2bVrV41jJ0+eJD093eH+3nzzTa5evVqjuG1tdvz8889oNBrefPPNOu+3bds2y71qs7O50LdvX0JCQpDJZJaKAhKNT0FRCR/EbuXzLd+TmpFN+7Bgm46woKiET7/6hvdit/H+F1vpeUuXJq9KYa0+XWXMsQ+xsbFMmzaNjIyMKucr18mUyWSWOpkAzz33HNOmTWPWrFk888wzADzzzDPMmzePZ599lrlz5zpk84oVK+oViFgdc6FbMD0cxMbGWv5UXu7t0KED48aNc3m/UVFRBAQEkJOTY1mlKi3TcOLcJQCG9Lvd8v9++fLlNZao62L//v3MmjWrwb6PXfUMQ0JCcHd3RxRFjhw5YvcAbEr+9re/1Tg2cuRIJkyYgEajsQxgo9GITCaroYd5/fp1duzYQc+ePXnvvfeYOHEi3t7enD9/noyMDJ5//nlOnTpFQEAAu3btonXr1vTq1YuMjAzc3d156623GD58OCdPnmTx4sUsX76cTp06sW/fPkuNvczMTNauXUtERARger0vLi5m//79eHt706VLF4sdt912Gx999BETJ07kwoUL9OnTh6tXr7Jp0yaSkpKYOnUqe/bs4bHHHuPNN9/Ew8ODtLQ0vLy8OHXqFH369OHTTz/Fx8eH3r17c/z4cVq1asWlS5eYN2+e5WnN1TSkqIOEfYiiSOz/fUdqxl/LSfmFxTbfCLfs/JnUDNOeTklZOTt//pWZU60vTzYX7Il9cKROZnR0NBs3bnTKFnOgYEPgSKHbhsTRfseOHYuPjw9paWl06dKFUxcuU1FhIKpDOKFBfy3tWqtgP2jQIMtb6IULF9BqtU7FltSFzTfDWbNmsXHjRn7//XdOnDjBiRMnGqzz5kxERARjxozB29ubHj16MGzYMEpKSvDw8Kjx5DJgwAAeffRRTp48aTkWEBDA2LFjUSqV5Obm4uXlxaRJk6osqZw4cYKRI0dy3333Vbnfrbfeik6no7S0tFY7KvcxefJkNBpNlTcqo9FI+/btGT58OCEhIYApH+vOO+/kb3/7G3v27AFg9OjRDBkyhISEhIb7xTnIpUuXePvtt2nTpg0rV65sMjtuFrJy8tn87R5SMrJAMFWfCGjlS2ZOvtV22bn5XLmeitFoxN/PB39fH5ttXMHq1atZtWoVq1atsozr2jDHPpi3AppSWnLHjh3s2LGj3vcxF7qdOnWqJYgnMjKSxx57zPKn8hJmdnY2u3fvZu/evaSmprqs3wMHDvDpp5+yd+9eFEo3Ply3lUPHz5GTV8BtMX8V5TU72GvXrtm04cCBA6Snp3Px4kUuXLjg9HepjF0BNOaKwi0Ja5UP1Gq15XxdASYdOnRg06ZNDB482KKSEx8fT2hoaI3oMYVCYSlDZKby/ltAQADFxcVs3ryZ0tJSy/FevXqxfv16S3FbM0VFRahUKuLj4y129O/fv4paD0Bubi6bNm3Czc2NwMBALl++zJYtW9Dr9bRr145du3YxYcIEAMub4cmTJxkxYoQlWVkQBLui4RqLhhR1kKibco2WX4+f5WzcFUQR1G4qZIIMD3c3q/XiKgwGLl9NJqh1K+RyGRqdDne1igqjwSU15mwxcOBAwOTc+vbtW+d1DRn7UF/MjnDMmDH1uk/1Qrc5OTmWAswZ2XmEBPoz/t678PMxPYAHBgbaVRDX0X5tMWTIEO644w6S0zJZt/V7CotLQABBENhz8LjdFewrY14eTU1NtSm4YC+CaOMRaceOHfzxxx9Vov3Gjx/fIJ3Xl4sXL1bJeTx37hy33OLYhr6rojt/+eUXUlJSMBgMNsUEGqI/Z9tdvHiRwMDAKlFp1X/PDUlWVhb+/v7k5+ezc+dO+vfvb7OvxlQKOXXqFD169KhyrCXnrxqNRs5eTODX38+i0eqQyQR6dOtM184RfPvjIdIysmkTElhl0jQjiiKbv91DamYO9w4fiNFo5NfjZ0nPzKmzjSM21va7dpQjR45gNBrZv38/r776qtVr9+3bR0JCAh07duSuuxq/Pl5lKv8fMm/hNHSpspycHLb+cIjElHTUbioEQaBNcGtmTBnboP04QoXBQNyVRE6eu0R2bgHpWTnIBBlubkp8vDzR6vT8Y+5jDdqns/OVzTfDjRs3MnToUIt+pIRzmHUT9Xp9E1vSvHBU1MGVSiEtneT0LPYfPkFWbgEA7cOCuXNgL0tpnBlTxtbqRA0GA7+dvoCXhztdO3egtFyDt6cHYSGBdI3q0KxC+BUKhd1Lni0x9sFRMrNzqTAYKC3XgCiSkJhGUmoG4W2CXRqcVlqu4cyFy5y5cIWyclNepYe7muDW/pRrtKiUCjQ6XbNYXTBj08P5+fm1qKRog8EgpVc4icFgcHmfjoo6SEohtikuLePA0VPEJSQB4OvtydD+PegU0dbmhKjRaknLzOHIiXOolApmTh1LTFQEymb6MFxRUQHAjBkzrF43a9YsgoKCqqRh3IjOMCwkkJT0bAwGA+VaLXJEtny3D38/H7p3jaJr5wjcqm23NBT5hcV89fWPlsAsPx8v5HI5QQF+9Ly1C10i21NSWs7W7/ZWWZFoLtgc4YIgtJikaA8PD+Lj4x1q46zzvFHbuTqq1FFRh8ZQCtm8eTObN28G4KWXXqqhgtFSNG8zMzP549J1zsQlUFFhQC6XcXt0JLdGd0Ahl5Obm1tnf6IocuZiAmfjrjJm+ACiO7alXZsgSoqLm/y7WcMc2v/f//6XwYMH8/DDD9d6XUuNfXCU8ffeZdkzDAsNJCoinCuJKeQVFLH38AkOHjtNTFQEt3eNIijAel6iPRSXlpGYkkFSagaHT/yBRquzxE9o9Xoeu38kbUODLL7Dz8erzhWJpsamM5w+fbor7GgQfHx8WuzeTnNq50pJJEcDGxpDKWTSpElMmjQJMO1jVf99Nec9w4KiErZ8t5ek1AwEBLw83ZHL5cREdWBo/x74enva7A+ZaanRiAwRgaIyLfeOuKPBbKyrXfXAMWcwP5gfOXKEvLy8Oq976KGHWL58ebOIfXjvvfca7d5mZ1OZOwf25GpiKqfPXyYpLZOzFxM4ezGBNsGt6d41iqiO4XZrgmp1epLTMklMNTnAvIK/HpY0Wh0KuRy1mxvuahX6CgPhbVpOuSq7Kt1LSDQWS5YsYd++fRZRB1uKNK5UCjFH5lkLMnE1+ooKCotKKPjzz08HjlFQXGJZ4hYEgScm30f7sBCr9zF/t6TUDCoMRjqEt2HquFFEd2pPuxY0gW3fvp1x48bZXPJsTrEPZjk2VyGXyYjqEE5Uh3By8ws5e/EK5+OvkZaZQ1pmDh5HT3Jrl0hujelU4+HJYDSSkZVLYkoGiakZZGTlYqy0eqNSKghvE0z7sBCOnDxHbn4h7mrrEcrNlaYfGRI3PY4ENlRWComOjrYohcyePZvnnnuO4uJiDAaDpWqKWSlEFEUWL17skF0btu/menI6crmMq0mpfL5pB9PG30NAK1+b2p2OUtnxhgQFMGxgLwxG41+Or7CYguISSkrLq7TLLShEEATkMhmeHu7IZDKrjrCkrByVUsnnm3aQnVuAj7cH+UUl5OQVoFQoWpQjBOxWVGlOsQ9btmwBsKQ9uZKAVr4MG9iLQX1uI+5KImcuXCYrt4DfTl/g2JkLhIUEkpGVS05+oaUaSWXnJxMEwoJb065tCO3DQggJCkD+54pLh3ZtLEu0oYEBzWo/0B5atDNMTEzk3//+t+WzRqNhzJgxNVRmKjNmzBjGjBlDQUEBL774IhqNpsqT2vjx4xk1ahSZmZksWrSoRvtHHnmEIUOGkJyczCuvvFLj/IwZM+jbty/x8fG1JpE//PDDDB06lLNnz/LBBx/UOL9gwQI6d+7MsWPHLPto5u+mVqt59dVXad++PQcOHGD9+vU12r/xxhsEBwfz448/snXr1hrfb8WKFfj5+dWZ+GtewtmyZQs//fQTAPPmzatxXUPhTGCDq5RCcvIKkckEDEYjBiOkZeXw363fo1IqCAkMIDQ4gNCg1oQEBTgkS2YURUrLyikoKvnT2RXzy9FTFJWUIRqNxF9LBYXs/gAAEi1JREFU5lpyOq39ayqVyGUyfLw98fPx/vMtVaSouBS1m4oKo5GQQNOeryiKiKJIamYOSSkZRHUI5/iZC8QlJDF25GBy8gupMBrQaHUE+vuh1elRKZvfdFBXGk1tKTPW0miaU+yD+f9VUzhDMyqlkttiOnFrdCTpWTmcPn+F+GtJnD5/GX1FBYIgUK7RUlauoXPHdrQPC6F92xDahgbhpqo9Vau2JdqWRPMb/RI3Fc05sCEowI9ryeUYjUYEBHx8PPH19qSwuJSktEyS0jIt1/r5eBEaFIBKqWTfkZNoNFrc1W6MHj4ItUppWdYsLC6hqLiUimqRu3kFRX86MNPEbTAYiYpoi6+PF618vfH18cLPxwtvL0/LW6koikRGhPHZxm/JzitE7aakz+1dOXb6AsfPXOSOvreTnVvA6QuXUamU+Hh74aZSotHqCA0KIDuvALWbqtmFuJuxlkZTW8qMtTSalhT74EpMuYiBtAkOZGh5D9764L8IgoBCIUelUGAURR6feC/Lly8nfMKEOh2hmd9//52TJ0+SkJDAyy+/3KDSc41Ni3aG7du3r5K4WnmDvrLKTG34+fmxdu3aOoMBgoODrbYPDw+3er5z5861njcHp9x2221W2/ft27fKfm11O4cMGVKl4kd1Ro0axahRo+r8fuY35NooKSlhwoQJlifX6uLhDUlzCmyogQBG0YjRKCKTmaTLCotLiYmKQCYI/HbqPD5enmh0Oi5dTSIpLZPikjJLRGxpuYYtO3/Gx9sThVxOcWkZ7mo3FHI5pWUaAgP88PfzITktE5lMhqHC5CBFUUQul3H5egq3d+2E0Siy9bt93N61E61b+fHzr79bft68Yw8yQcDLQ02ZRssP+48wYnAfNFoduflFRLYPQ6VSEhbSmqCAVgzqcxsyQSC8TXCzDXE3Yy2NpraUGWtpNFLsg2083dW0bxtCenYu7m6mfb82f+qG2qsZ2rt3b3r37s2HH35IcXGx5AwlJOylOQU2gOlJ2fzAkp2Tj5eHO6XlGtQqJUXFpXh7eVCh09HK1wu1m4rIdqH4+Xhx+OR5OrUP48S5eHR6PWqVSQGkXKulW6f2tPL14sS5y3TpGI6/nw9HT10gqn0bWvl6kZWTR7lGjlqlolxrSkgWBIGKigrKSssoVsjq/Fmvr8BdrUKpVKAyGCkuLSM4wIeH/t9gPN3VCIJATMcwAAoKCqp81wdGDqSiogKFQkGFTkNOjsau31FDpVYUFhZWCYCqHNUL1tNoakuZcbTgrkRNKqdmmPf9HBXl3rFjB+Hh4Q0qou0KmscMJHHT0pwCG8D0VmZ+k24TEkh6Vi4Bfj7oKwyEBAUwY/Jfb9OD+/W0/HxHf9PPZ+IS0Or0aHV6jKKIp4eaCWNMteNGDOlvuX5I/55V7vPZph1kZOXi6aFGX2EgNCigyv7LHf171frzpWuppGfnopTLwV0gNDCAtmH214ZryrQRX19ftm3bVuf11tJoakuZcbTgrkRNqu/7OaoZ+v333/P1119zxx13kJqaSlhYmEvsbghsapM2Z06fPo2bm1tTm3HDo9Vq6d69e6Pce86cOQDNIrABTJVEzBOpwWikpFRjES7w8lRbIufqwmAwUFyqwWA0IpfJ8PZU2yV64Exf9WnXHDAajfTq1avO8+fPn2fz5s0sXbqUJUuW8OCDD1qWSefMmcOiRYsQBMGyP1jbseaCNFe5DmfnqxbtDCVaPmYFkcpI+zsSZpYtW8aFCxeIjo5m9uzZbN26ldmzZxMXF8frr79uSZmJiYmp9ZiEhL1IzlBCQkJC4qanZaynSEhISEhINCKSM5SQkJCQuOmRnKGEhISExE2P5AwlJCQkJG56bghnmJmZybhx47j11lstxT7t4cyZM0yePJmpU6eyfPlyu9rEx8db2rz88st2V9k288UXXzBlyhS7r09JSWHgwIFMmzaNJ554wqG+vv76a6ZPn860adPIzMy0ef2BAweYNm0a06ZNY/DgwezZs8eh/m40nBlXzowpaDnjytExBdK4cgXLly9n6tSpLFu2rKlNsYmz83WjI94AaDQasaCgQHzkkUdEvV5vd7usrCxRo9GIoiiKzz//vBgXF2ezjU6ns/z80ksviWfOnLG7P61WK7744ovi5MmT7W6TnJwszp8/3+7rzWRkZIgvv/yyw+3MjB8/XiwpKXG6/Y2AM+PKmTElii1jXNV3TImiNK4ag3Pnzon/+Mc/RFEUxddee82hsdMUODtfNzY3xJuhm5sbvr6+DrcLDAy0JMIqFAq7kqOVSmWVn81aiPawZcsWHnjgAYft/O2335g6dSqxsbF2tzl48CBGo5Hp06fzxhtvWOrd2UNycjIBAQF4elovDHuj48y4cmZMQcsYV/UZUyCNq8aiNg3X5oyz83Vjc0M4w/oSFxdHfn4+nTp1suv6n3/+mfvuu4+8vDy7hWj1ej3Hjv3/9u4/Jur6D+D4E7wQCOdgo2vgr1bRgf1ALCB1wgQXm5szCEQMKg4LW/YDIVnZxlZkm7bM/jgVkThKCBPmtA0U0xBRWFBWxFkpNMF+UEbQUDiO+/7B7vPluAPvIEzg9fjrOD7v1/t9n3vt3q/P5+D9rr/h9kTD3XHHHVRWVqLX66mtrcVgMDjU7s8//8RoNFJYWIi7uzsnTpxwuM9jx46xcuVKp8YprDmbU3Dr59V4cgokryZKV1eXsnH1rFmz+Pvvv//jEU1O034y7Ozs5M033yQ3N9fhNlFRURw9ehS1Ws2pU6ccanP48OERd4kYjZubG56enqhUKiIjI60WKx6Nl5cXjzzyCADh4eFcvHjR4T5PnjzJihW33i4Gk8VYcgpu/bwaT06B5NVEGW0NV+G4aT0Z9vf3k5WVxauvvoqvr69Dbfr6+pTHXl5eDq832NLSQnFxMVqtlp9++omioiKH2g3diqaxsZF58+Y51C4kJIQLFy4Ag1swzZkzx6F2HR0d3HbbbXh7ezt0vLA2lpyCyZFXY80pkLyaSMHBwZw7dw6A2traCVtHeKqbErtWGI1GNmzYgMFgQKvVkpGRwUMPPXTDdhUVFXz77bfKjvQZGRksWrRo1DbV1dXKdyzz589n2bJlDo0xKytLebxu3TqSk5MdatfQ0MD777+Pm5sbISEhDr0ugMDAQNzd3UlOTsbb25unn37aoXYnTpwgKirKoWOnurHk1VhyCiZHXo01p0DyaiItXLgQNzc3kpKS0Gg0ymLmt6qxfl5PNFmbVAghxLQ3rW+TCiGEECCToRBCCCGToRBCCCGToRBCiGlPJkMhhBDTnkyGQgghKCsro6ysjI6ODnQ6nd1jmpubaWpqcireUE1NTaxZswadTkdubi4mk4m6ujouX7487vGP15T4P0MhhBAjGxgYwNXVsWsfX19fNm7caPd3zc3NmEwmFi5cOKZxnD59moyMDJYvX648V19fz+LFi5k7d+6YYv5bJt1k2NzczHfffUd8fPxN77utrY3Lly87vQ7krT6O/Px8Hn30UYKCgti/fz9nz54lLy8PGFxjs6amhrS0tH+lr1uV5JXk1USpq6ujsLAQs9lMZ2cn69at49ChQ8ycOZM9e/YAkJOTQ0tLC+7u7mzfvp3r16+TmZlJf38/9913Hzk5OdTV1fHhhx8qcfLz860WPU9OTub++++noaGBuLg41q5dS3Z2Np6enrS2tpKfn2/Tj4eHBy+99BJ9fX3Mnj2bZcuW0dbWxs6dO9mxYwdVVVXs3buXmTNnsmnTJkpLS/nrr784d+4cO3bscCiexc8//0xpaSleXl709PTw8ccfk5+fT3l5OcePH2fJkiUEBARQU1NDd3c3AHv37qW3t5fXXnuNP/74Ax8fH7Zv386pU6fYs2cPnp6eaLVaZs2axbZt2/Dw8GD16tU88cQTTr9Pk+42aWBg4L/2gTUwMODU8e3t7cqyRzeLvTE6O47RXufAwACNjY0EBQUBkJqaitFoVJbr0mg0fPXVV06fq8lG8kryaiKZzWZ0Oh0RERF88803FBYWolar+f777zl58iR+fn7o9XrWr19PSUkJ3t7eFBQUUFxczD///ENra6sSa/fu3URERHD27FmbfqKjoykuLqa8vFxZ4i8kJIT9+/fb7aeqqooHH3zQZmKFwfdQp9Oh1+spKiri4YcfJiEhgbS0NN59912n482fP5/HH3+c7OxsYmJiAHB1dVWey87OBsDHx4e8vDzUajUXLlzg4MGDrFixAr1eT2hoKJWVlVRWVrJz5070ej3Lly/niy++IDMzE71eT1xc3Jjeo0l3ZVhXV0dtbS1LliyxqZK2bdvGM888w913341er8fX15eYmBir6iU+Pp5Dhw4Bg8tXDa8o4uLibKody3YjpaWlNDY28vXXXyuV3vDY5eXlI1aA9fX1FBQU0N/fT19fH7t27WL27Nk2/RkMBgoKCpQxajQaqyqxu7vbahxlZWWYTCbi4+P54IMPCA0NBbCK8fnnn9t9TQaDgTvvvFM5v2azma6uLlpaWnjggQeAwSRubm4e862RyUDySvJqIgUEBACDu4X4+Pgoj7u6urh48SKfffYZNTU19Pf3ExwcTGdnJzk5OXR3d9Pe3s7vv/9uFUetVitXT0MFBQUxY8YM/Pz8uHr1KoByfu314+LiohQsw9+Hq1ev4u/vj7u7O4DNbVZn4zl7rtRqtXJ+mpqa+OSTT+jt7WXVqlVs3LgRnU6HyWQiPT2dpKQkdDodn376KcnJyWNakm7SXRkON7RKeuyxx6ioqAAG701HRETYVC+XLl3CaDQq7YZXFPaqHYuEhARWr15NYWEhgN3Yo1WAAL29vezbt4/ExERKS0tH7G/oGIdXiWFhYVbjGIklhslkGvE1tba24u/vr/x8+PBh+vv7uXTpkvLc3LlzrX6eDiSvRiZ5NT4uLi7KY7PZzF133cWaNWsoKiqiuLiYjIwMjh49SnR0NEVFRSxatAh7q2bae85gMGAymbhy5Yoy6VomMXv9zJkzR9m+y5JLFj4+Ply5coXe3l5g8EpRpVIp+1g6G28kQ2OOdH7S0tIoKiqitLSUpKQk/P39yc3NJSEhgYKCAqX4y8zMZNeuXQ71azOOMbW6RQyvkiIjI8nLyyMxMZHbb78dT09Pm+qlr6+PpUuXKjGGVxT2qp2R2IttuUdurwJ0dXUlMDAQGLxNdObMGVxcXOz2N7SqGl4lLliwwGocw5PHYrSK0J5r165x5MgRsrKy+PLLL61iDu1jqpO8GiR5dXNERUXx1ltvkZKSAsBTTz1FeHg4W7ZsoaqqyqlYFRUVvP3228TGxuLm5nbDfqKjo3nxxRfRarU2Wz+5urry3HPP8eSTT+Lh4cELL7xAcHAw2dnZ/Pjjj2zdutWpeCMJDQ3lvffe4/z58/j5+dn8fu3atWzdupUDBw5gNpvZvHkzx44d4/z58/T09LBlyxZKSko4fvw4PT09bNiwwalzZjGpJ8OhzGYzKpUKf39/9u3bR3R0NPD/6iU1NRWAM2fOUF9fr7SzVBS//fYbr7/+OomJiVbHG41G5ViVSmX1HceNYtv7MLFsgWMwGJg3b55NDKPRSGNjo1VbS5UYGxvL5s2bmTFjhlKtweCGnpa4P/zwA2FhYVb92+vDYsGCBTQ0NACQl5dHamoq99xzDwcPHlSOaWtrY9WqVTd4B6YmySvJq/EKCwtTzl1sbKzy/KZNm5THb7zxhk27I0eO2I01PM5QWVlZqFT//1h/5513lMcuLi52+9m9e7fNc5ZdV1auXGmzIfOBAwdGHbe9eBZDX7Nlu7HFixfz0UcfjXqsZTwWw3e5CAsLc2oXFXsm/W3S4WJiYigpKSEyMhIYrIba29tJSUkhJSWF69evWx1fUlLC+vXrSU9PJzY21ub46upq5diAgAAaGxt5+eWXHYptj0qlQqvVUlxcTEJCwqj9WYSHh1NQUMDzzz/PtWvXuPfee63GER4ezunTp0lPT7fb52h9aDQafvnlF3799VdaW1tZunQparWajo4O5ZiWlhblymO6kryyJXklphLZwukmsvyRxiuvvPJfD8XK0D+BH85gMFBdXc2zzz77H4xMOELySojxmzK3ScXYabXaEX+n0WjQaDQ3cTRiqpC8EpOJXBkKIYSY9qbcd4ZCCCGEs2QyFEIIMe3JZCiEEGLak8lQCCHEtCeToRBCiGlPJkMhhBDTnkyGQgghpj2ZDIUQQkx7/wOs4aWdsO2fDwAAAABJRU5ErkJggg==\n",
 80 |       "text/plain": [
 81 |        "<Figure size 504x115.2 with 5 Axes>"
 82 |       ]
 83 |      },
 84 |      "metadata": {},
 85 |      "output_type": "display_data"
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "# plot Fig. 5\n",
 90 |     "save_fig = True\n",
 91 |     "fname = '../figures/060822/fig5-aav.pdf'\n",
 92 |     "cmain = 'slategray'\n",
 93 |     "\n",
 94 |     "# single-column width: 3.42 inches or 8.7 cm\n",
 95 |     "# double-column width: 7 inches or 17.8 cm\n",
 96 |     "# maximum height: 8.85 inches or 22.5 cm\n",
 97 |     "# small: approx 9 cm x 6 cm (3.54 x 2.36 in)\n",
 98 |     "# medium: approx 11 cm x 11 cm (4.33 in)\n",
 99 |     "# large: approx 18 cm x 22 cm (7 x 8.66 in)\n",
100 |     "fig = plt.figure(figsize=(7, 1.6))\n",
101 |     "gs1 = gridspec.GridSpec(1, 3, figure=fig, wspace=0.65)\n",
102 |     "gs2 = gridspec.GridSpecFromSubplotSpec(2, 1, subplot_spec=gs1[1], hspace=0.2)\n",
103 |     "ax1 = plt.subplot(gs1[0])\n",
104 |     "ax21 = plt.subplot(gs2[0])\n",
105 |     "ax22 = plt.subplot(gs2[1])\n",
106 |     "ax3 = plt.subplot(gs1[2])\n",
107 |     "\n",
108 |     "# ===== true fitness =====\n",
109 |     "\n",
110 |     "nnkmeanfitness = -0.7716402509409155\n",
111 |     "ax1.plot(lambda_l, truefit_l, '-ob', linewidth=2, markersize=4, alpha=0.8, c=cmain, label='__nolegend__');\n",
112 |     "ax1.axhline(nnkmeanfitness, linestyle=\"--\", alpha=0.8,\n",
113 |     "            color='k', label=r'training distribution')\n",
114 |     "ax1.set_xticks(lambda_l);\n",
115 |     "ax1.set_xticklabels(lambda_l);\n",
116 |     "ax1.set_ylabel('mean true fitness')\n",
117 |     "ax1.set_xlabel(r'inverse temperature ($\\lambda$)')\n",
118 |     "ax1.legend(fontsize=6, loc=(0.05, 0.1));\n",
119 |     "ax1.set_title('(a)')\n",
120 |     "\n",
121 |     "# ===== coverage =====\n",
122 |     "\n",
123 |     "ax21.plot(lambda_l, cov_l, '-o', linewidth=2, markersize=4,  alpha=0.8, c=cmain, label='__nolegend__');\n",
124 |     "ax21.set_ylabel('emp. cov.');\n",
125 |     "ax21.set_xticklabels([])\n",
126 |     "ax21.set_ylim([0.85, 1]);\n",
127 |     "ax21.set_yticks([0.85, 0.9, 0.95, 1])\n",
128 |     "ax21.set_xticks(lambda_l);\n",
129 |     "ax21.axhline(0.9, linestyle='--', alpha=0.8, c='k', label=r'$1 - \\alpha$');\n",
130 |     "ax21.legend(fontsize=6)\n",
131 |     "ax21.set_title('(b)')\n",
132 |     "\n",
133 |     "# ===== confidence set sizes =====\n",
134 |     "fmax = 8.798749497001769\n",
135 |     "fmin = -7.530085215864544\n",
136 |     "frange = fmax - fmin\n",
137 |     "\n",
138 |     "navglen_lxt = avglen_lxt / frange  # report average size as fraction of total range of fitness values\n",
139 |     "navglen_l = np.nanmean(navglen_lxt, axis=1)\n",
140 |     "ax22.plot(lambda_l, navglen_l, '-o', linewidth=2, markersize=4, alpha=0.8, c=cmain);\n",
141 |     "ax22.set_ylabel('mean size');\n",
142 |     "ax22.set_xticks(lambda_l);\n",
143 |     "ax22.set_xticklabels(lambda_l);\n",
144 |     "ax22.set_xlabel(r'inverse temperature ($\\lambda$)')\n",
145 |     "\n",
146 |     "# fraction of confidence sets with infinite size\n",
147 |     "ax22inf = ax22.twinx()\n",
148 |     "ax22inf.plot(lambda_l, np.mean(fracinf_lxt, axis=1), '--o', dashes=(1, 0.5),  linewidth=2, markersize=4, alpha=0.8, c=cmain);\n",
149 |     "ax22inf.set_ylabel('  frac. inf.', fontsize=5.5, rotation=270, labelpad=-5);\n",
150 |     "ax22inf.set_yticks([0, 0.16]);\n",
151 |     "\n",
152 |     "\n",
153 |     "# ===== trade-off with certainty =====\n",
154 |     "\n",
155 |     "nnkmeanpredfit = -0.4723137617111206  # computed in aav-experiments\n",
156 |     "ax3.plot(meanpredfit_l, navglen_l, '-o',  linewidth=2, markersize=4, alpha=0.8, c=cmain);\n",
157 |     "ax3.set_xlabel('mean predicted fitness');\n",
158 |     "ax3.set_ylabel('mean conf. set size', labelpad=-0.1);\n",
159 |     "ax3.set_title('(c)');\n",
160 |     "ax3.axvline(nnkmeanpredfit, linestyle=\"--\", alpha=0.8, color='k', label='training distribution')\n",
161 |     "ax3.legend(fontsize=6, loc=(0.1, 0.78));\n",
162 |     "\n",
163 |     "xoff = [-0.15, 0.06, -0.18, 0.08, 0.08, -0.4, -0.4]\n",
164 |     "yoff = [0.002, -0.003, 0.004, 0, 0, -0.001, -0.001]\n",
165 |     "for l, lmbda in enumerate(lambda_l):\n",
166 |     "    ax3.annotate(r'$\\lambda = {}$'.format(lmbda),\n",
167 |     "                 (meanpredfit_l[l] + xoff[l], navglen_l[l] + yoff[l]), fontsize=6)\n",
168 |     "\n",
169 |     "if save_fig:\n",
170 |     "    plt.savefig(fname, dpi=300, bbox_inches='tight')"
171 |    ]
172 |   }
173 |  ],
174 |  "metadata": {
175 |   "kernelspec": {
176 |    "display_name": "TensorFlow-GPU-2.1.0",
177 |    "language": "python",
178 |    "name": "tf-gpu"
179 |   },
180 |   "language_info": {
181 |    "codemirror_mode": {
182 |     "name": "ipython",
183 |     "version": 3
184 |    },
185 |    "file_extension": ".py",
186 |    "mimetype": "text/x-python",
187 |    "name": "python",
188 |    "nbconvert_exporter": "python",
189 |    "pygments_lexer": "ipython3",
190 |    "version": "3.7.7"
191 |   }
192 |  },
193 |  "nbformat": 4,
194 |  "nbformat_minor": 4
195 | }
196 | 


--------------------------------------------------------------------------------
/notebooks/fluorescence-experiments.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "This notebook has the script for reproducing the experimental results shown in Fig. 3, 4, S2."
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 2,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "import sys\n",
 18 |     "import time\n",
 19 |     "from importlib import reload\n",
 20 |     "module_path = os.path.abspath(os.path.join('../'))\n",
 21 |     "if module_path not in sys.path:\n",
 22 |     "    sys.path.append(module_path)\n",
 23 |     "    \n",
 24 |     "import numpy as np\n",
 25 |     "    \n",
 26 |     "import assay\n",
 27 |     "import calibrate as cal"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 3,
 33 |    "metadata": {},
 34 |    "outputs": [
 35 |     {
 36 |      "name": "stdout",
 37 |      "output_type": "stream",
 38 |      "text": [
 39 |       "Using 92 order-2 features\n",
 40 |       "Loading estimated measurement noise SD computed using order 7 and significance level 0.01\n",
 41 |       "red, 384, 6. 100 trials. SCS, FCS coverage: 0.8800, 0.8600. 752.7 s\n",
 42 |       "red, 384, 6. 200 trials. SCS, FCS coverage: 0.9050, 0.8950. 1523.3 s\n",
 43 |       "red, 384, 6. 300 trials. SCS, FCS coverage: 0.9100, 0.9000. 2321.2 s\n",
 44 |       "red, 384, 6. 400 trials. SCS, FCS coverage: 0.9050, 0.8975. 3115.3 s\n",
 45 |       "red, 384, 6. 500 trials. SCS, FCS coverage: 0.9040, 0.8960. 3886.5 s\n",
 46 |       "red, 384, 6. 600 trials. SCS, FCS coverage: 0.9083, 0.8967. 4682.9 s\n",
 47 |       "red, 384, 6. 700 trials. SCS, FCS coverage: 0.9129, 0.9014. 5508.1 s\n",
 48 |       "red, 384, 6. 800 trials. SCS, FCS coverage: 0.9100, 0.9012. 6339.8 s\n",
 49 |       "red, 384, 6. 900 trials. SCS, FCS coverage: 0.9156, 0.9056. 7130.1 s\n",
 50 |       "red, 384, 6. 1000 trials. SCS, FCS coverage: 0.9170, 0.9060. 7918.1 s\n",
 51 |       "red, 384, 6. 1100 trials. SCS, FCS coverage: 0.9182, 0.9082. 8694.7 s\n",
 52 |       "red, 384, 6. 1200 trials. SCS, FCS coverage: 0.9183, 0.9092. 9461.5 s\n",
 53 |       "red, 384, 6. 1300 trials. SCS, FCS coverage: 0.9192, 0.9092. 10214.7 s\n",
 54 |       "red, 384, 6. 1400 trials. SCS, FCS coverage: 0.9207, 0.9107. 10968.0 s\n",
 55 |       "red, 384, 6. 1500 trials. SCS, FCS coverage: 0.9233, 0.9140. 11721.7 s\n",
 56 |       "red, 384, 6. 1600 trials. SCS, FCS coverage: 0.9237, 0.9144. 12484.3 s\n",
 57 |       "red, 384, 6. 1700 trials. SCS, FCS coverage: 0.9259, 0.9165. 13244.3 s\n",
 58 |       "red, 384, 6. 1800 trials. SCS, FCS coverage: 0.9244, 0.9150. 13986.6 s\n",
 59 |       "red, 384, 6. 1900 trials. SCS, FCS coverage: 0.9253, 0.9163. 14725.1 s\n",
 60 |       "red, 384, 6. 2000 trials. SCS, FCS coverage: 0.9270, 0.9160. 15472.0 s\n"
 61 |      ]
 62 |     }
 63 |    ],
 64 |    "source": [
 65 |     "reload(cal)\n",
 66 |     "reload(assay)\n",
 67 |     "\n",
 68 |     "alpha = 0.1                           # miscoverage\n",
 69 |     "n_trains = [96, 192, 384]             # number of training points\n",
 70 |     "ntrain2reg = {96: 10, 192: 1, 384: 1} # ridge regularization strength (gamma in code and paper)\n",
 71 |     "n_seed = 2000                         # number of random trials\n",
 72 |     "lmbdas = [0, 2, 4, 6]                 # lambda, inverse temperature\n",
 73 |     "y_increment = 0.02                    # grid spacing between candidate label values, \\Delta in paper\n",
 74 |     "ys = np.arange(0, 2.21, y_increment)  # candidate label values, \\mathcal{Y} in paper\n",
 75 |     "order = 2                             # complexity of features. 1 encodes the AA at each site,\n",
 76 |     "                                      # 2 the AAs at each pair of sites,\n",
 77 |     "                                      # 3 the AAs at each set of 3 sites, etc.\n",
 78 |     "        \n",
 79 |     "# likelihood under training input distribution, p_X in paper (uniform distribution)\n",
 80 |     "ptrain_fn = lambda x: (1.0 / np.power(2, 13)) * np.ones([x.shape[0]])\n",
 81 |     "for fitness_str in ['red']:\n",
 82 |     "    \n",
 83 |     "    # featurize all sequences in combinatorially complete dataset\n",
 84 |     "    data = assay.PoelwijkData(fitness_str, order=order)\n",
 85 |     "    \n",
 86 |     "    for t, n_train in enumerate(n_trains):\n",
 87 |     "\n",
 88 |     "        reg = ntrain2reg[n_train]\n",
 89 |     "        fcs = cal.ConformalRidgeFeedbackCovariateShift(ptrain_fn, ys, data.X_nxp, reg)\n",
 90 |     "        scs = cal.ConformalRidgeStandardCovariateShift(ptrain_fn, ys, data.X_nxp, reg)\n",
 91 |     "\n",
 92 |     "        for l, lmbda in enumerate(lmbdas):\n",
 93 |     "\n",
 94 |     "            fset_s, sset_s = [], []\n",
 95 |     "            fcov_s, scov_s = np.zeros([n_seed]), np.zeros([n_seed])\n",
 96 |     "            ytest_s, predtest_s = np.zeros([n_seed]), np.zeros([n_seed])\n",
 97 |     "            t0 = time.time()\n",
 98 |     "\n",
 99 |     "            for seed in range(n_seed):\n",
100 |     "                \n",
101 |     "                # sample training and designed data\n",
102 |     "                Xtrain_nxp, ytrain_n, Xtest_1xp, ytest_1, pred_1 = assay.get_training_and_designed_data(\n",
103 |     "                    data, n_train, reg, lmbda, seed=seed )\n",
104 |     "                ytest_s[seed] = ytest_1[0]\n",
105 |     "                predtest_s[seed] = pred_1[0]\n",
106 |     "\n",
107 |     "                # construct confidence set under feedback covariate shift\n",
108 |     "                fset, _ = fcs.get_confidence_set(Xtrain_nxp, ytrain_n, Xtest_1xp, lmbda, alpha=alpha) \n",
109 |     "                fset_s.append(fset)\n",
110 |     "                fcov_s[seed] = cal.is_covered(ytest_s[seed], fset, y_increment)\n",
111 |     "\n",
112 |     "                # construct confidence set under standard covariate shift\n",
113 |     "                sset, _ = scs.get_confidence_set(Xtrain_nxp, ytrain_n, Xtest_1xp, lmbda, alpha=alpha) \n",
114 |     "                sset_s.append(sset)\n",
115 |     "                scov_s[seed] = cal.is_covered(ytest_s[seed], sset, y_increment)\n",
116 |     "\n",
117 |     "                if (seed + 1) % 100 == 0:\n",
118 |     "                    print(\"{}, {}, {}. {} trials. SCS, FCS coverage: {:.4f}, {:.4f}. {:.1f} s\".format(\n",
119 |     "                        fitness_str, n_train, lmbda, seed + 1,\n",
120 |     "                        np.mean(scov_s[: seed + 1]), np.mean(fcov_s[: seed + 1]), time.time() - t0))\n",
121 |     "\n",
122 |     "            np.savez('../fluorescence/{}_n{}_lambda{}_alpha{}_gamma{}.npz'.format(\n",
123 |     "                fitness_str, n_train, lmbda, alpha, reg),\n",
124 |     "                     ytest_s=ytest_s, predtest_s=predtest_s,\n",
125 |     "                     fset_s=fset_s, fcov_s=fcov_s, sset_s=sset_s, scov_s=scov_s, \n",
126 |     "                    )"
127 |    ]
128 |   }
129 |  ],
130 |  "metadata": {
131 |   "kernelspec": {
132 |    "display_name": "TensorFlow-GPU-2.1.0",
133 |    "language": "python",
134 |    "name": "tf-gpu"
135 |   },
136 |   "language_info": {
137 |    "codemirror_mode": {
138 |     "name": "ipython",
139 |     "version": 3
140 |    },
141 |    "file_extension": ".py",
142 |    "mimetype": "text/x-python",
143 |    "name": "python",
144 |    "nbconvert_exporter": "python",
145 |    "pygments_lexer": "ipython3",
146 |    "version": "3.7.7"
147 |   }
148 |  },
149 |  "nbformat": 4,
150 |  "nbformat_minor": 4
151 | }
152 | 


--------------------------------------------------------------------------------