├── EDBOLogo.png
├── LICENSE
├── README.md
├── edbo
    └── plus
    │   ├── __init__.py
    │   ├── benchmark
    │       ├── __init__.py
    │       └── multiobjective_benchmark.py
    │   ├── model.py
    │   ├── optimizer_botorch.py
    │   ├── scope_generator.py
    │   └── utils.py
├── examples
    ├── publication
    │   ├── BMS_yield_cost
    │   │   ├── 0_data_preprocessing.ipynb
    │   │   ├── 1_preprocess_data.py
    │   │   ├── 2_plot_ground_truth.py
    │   │   ├── 3_run_edbo_cost_yield_performance.py
    │   │   ├── 4_plot_performance_hypervol.py
    │   │   ├── 5_plot_MAE_and_RMSE.py
    │   │   ├── 6_distrib_plots.py
    │   │   ├── 7_plot_scope_expansion.py
    │   │   ├── 8_optimization_expanding_scope.py
    │   │   ├── 9_optimization_constraints.py
    │   │   └── data
    │   │   │   ├── PCI_PMI_cost_full.csv
    │   │   │   ├── PCI_PMI_cost_full_update.csv
    │   │   │   ├── base_dft.csv
    │   │   │   ├── clean_dft.csv
    │   │   │   ├── experiments_yield_and_cost.csv
    │   │   │   ├── ligand_dft.csv
    │   │   │   └── solvent_dft.csv
    │   ├── Crosscoupling
    │   │   ├── 1_run_experiments.py
    │   │   ├── campaigns
    │   │   │   ├── 0_recalculate_predictions.py
    │   │   │   ├── 1_analysis.py
    │   │   │   ├── challenging_campaign_cvt
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round1.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round2.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round3.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round4.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round5.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round6.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round7.csv
    │   │   │   │   ├── predictions_1.csv
    │   │   │   │   ├── predictions_2.csv
    │   │   │   │   ├── predictions_3.csv
    │   │   │   │   ├── predictions_4.csv
    │   │   │   │   ├── predictions_5.csv
    │   │   │   │   ├── predictions_6.csv
    │   │   │   │   └── predictions_7.csv
    │   │   │   ├── challenging_campaign_random
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round1.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round2.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round3.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round4.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round5.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round6.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round7.csv
    │   │   │   │   ├── predictions_1.csv
    │   │   │   │   ├── predictions_2.csv
    │   │   │   │   ├── predictions_3.csv
    │   │   │   │   ├── predictions_4.csv
    │   │   │   │   ├── predictions_5.csv
    │   │   │   │   ├── predictions_6.csv
    │   │   │   │   └── predictions_7.csv
    │   │   │   ├── crosscoupling_results_challenging_campaign_cvt.csv
    │   │   │   └── easy_campaign
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round0.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round1.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round2.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round3.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round4.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round5.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round6.csv
    │   │   │   │   ├── edbo_crosscoupling_photoredox_yield_ee_round7.csv
    │   │   │   │   ├── predictions_1.csv
    │   │   │   │   ├── predictions_2.csv
    │   │   │   │   ├── predictions_3.csv
    │   │   │   │   ├── predictions_4.csv
    │   │   │   │   ├── predictions_5.csv
    │   │   │   │   ├── predictions_6.csv
    │   │   │   │   └── predictions_7.csv
    │   │   └── edbo_crosscoupling_photoredox_yield_ee.csv
    │   ├── Suzuki
    │   │   ├── 0_clean_dft.py
    │   │   ├── 0_clean_mordred.py
    │   │   ├── 1_run_ohe.py
    │   │   ├── 2_run_dft.py
    │   │   ├── 3_run_mordred.py
    │   │   ├── 4_random_features.py
    │   │   ├── data
    │   │   │   ├── dataset_B1.csv
    │   │   │   ├── dataset_B2.csv
    │   │   │   ├── dataset_B2_DFT_clean.csv
    │   │   │   ├── dataset_B3.csv
    │   │   │   └── dataset_B3_Mordred_clean.csv
    │   │   ├── performance
    │   │   │   ├── 1_merge_all.py
    │   │   │   ├── 2_plot_ground_truth.py
    │   │   │   ├── 3_plot_decision_pathways_objectives.py
    │   │   │   ├── 4_plot_performance.py
    │   │   │   ├── 5_find_entry.py
    │   │   │   └── 7_plot_performance_acquisition_function.py
    │   │   └── performance_acq
    │   │   │   ├── 1_merge_all.py
    │   │   │   └── 2_plot_acq_batch.py
    │   └── Virtual-experimentation
    │   │   ├── 1_benchmark.py
    │   │   ├── data
    │   │       └── data.csv
    │   │   └── performance
    │   │       ├── 1_merge_all.py
    │   │       ├── 2_plot_ground_truth.py
    │   │       ├── 3_plot_performance_acquisition_function.py
    │   │       └── 4_hypervol_sampling.py
    └── tutorials
    │   ├── 1_CLI_example.ipynb
    │   └── 2_EDBO_WebApp_Tutorial.pdf
├── requirements.txt
├── setup.cfg
└── setup.py


/EDBOLogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doyle-lab-ucla/edboplus/f5d1687e834d33c77598767c5383a919f9a58034/EDBOLogo.png


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Jose A. Garrido Torres
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # <img src="EDBOLogo.png" width="190">
 3 | 
 4 | ## **EDBO+**. Bayesian reaction optimization as a tool for chemical synthesis
 5 | 
 6 | WebApp: https://www.edbowebapp.com
 7 | 
 8 | **Reference:** Garrido Torres, Jose A.; Lau, Sii Hong; Anchuri, Pranay; Stevens, Jason M.; Tabora, Jose E.; Li, Jun; Borovika, Alina; Adams, Ryan P.; Doyle, Abigail G. "A Multi-Objective Active Learning Platform and Web App for Reaction Optimization".
 9 | 
10 | **DOI:** 
11 | 
12 | 10.26434/chemrxiv-2022-cljcp
13 | 
14 | 10.1021/jacs.2c08592
15 | 
16 | **Links**:
17 | [ChemRxiv](https://chemrxiv.org/engage/chemrxiv/article-details/62f6966269f3a5df46b5584b), 
18 | [JACS](https://pubs.acs.org/doi/full/10.1021/jacs.2c08592)
19 | 
20 | 
21 | <br>
22 | 
23 | ---
24 | 
25 | <br>
26 | 
27 | ### Installation:
28 | 
29 | <br>
30 | 
31 | (1) Create anaconda environment:
32 | 
33 | ```
34 | conda create --name edbo_env python=3.8
35 | ```
36 | 
37 | (2) Activate conda environment:
38 | 
39 | ```
40 | conda activate edbo_env
41 | ```
42 | 
43 | (3) Install EDBO+ dependencies:
44 | 
45 | ```
46 | pip install -e .
47 | ```
48 | 
49 | <br>
50 | 
51 | ---
52 | 
53 | <br>
54 | 
55 | #### **Note**: to run the notebook tutorials install JupyterLab
56 | 
57 | ```
58 | conda install jupyterlab
59 | ```
60 | 


--------------------------------------------------------------------------------
/edbo/plus/__init__.py:
--------------------------------------------------------------------------------
1 | from .optimizer_botorch import *


--------------------------------------------------------------------------------
/edbo/plus/benchmark/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doyle-lab-ucla/edboplus/f5d1687e834d33c77598767c5383a919f9a58034/edbo/plus/benchmark/__init__.py


--------------------------------------------------------------------------------
/edbo/plus/model.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | import gpytorch
 4 | from gpytorch.kernels import MaternKernel, ScaleKernel
 5 | from gpytorch.priors import GammaPrior
 6 | from gpytorch.constraints import GreaterThan
 7 | import numpy as np
 8 | 
 9 | tkwargs = {
10 |     "dtype": torch.double,
11 |     "device": torch.device("cpu"),
12 | }
13 | 
14 | def build_and_optimize_model(train_x, train_y):
15 |     """ Builds model and optimizes it."""
16 | 
17 |     gp_options = {
18 |         'ls_prior1': 2.0, 'ls_prior2': 0.2, 'ls_prior3': 5.0,
19 |         'out_prior1': 5.0, 'out_prior2': 0.5, 'out_prior3': 8.0,
20 |         'noise_prior1': 1.5, 'noise_prior2': 0.1, 'noise_prior3': 5.0,
21 |         'noise_constraint': 1e-5,
22 |     }
23 | 
24 |     n_features = np.shape(train_x)[1]
25 | 
26 |     class ExactGPModel(gpytorch.models.ExactGP):
27 |         def __init__(self, train_x, train_y, likelihood):
28 |             super(ExactGPModel, self).__init__(train_x, train_y,
29 |                                                likelihood)
30 |             self.mean_module = gpytorch.means.ConstantMean()
31 | 
32 |             kernels = MaternKernel(
33 |                 ard_num_dims=n_features,
34 |                 lengthscale_prior=GammaPrior(gp_options['ls_prior1'],
35 |                                              gp_options['ls_prior2'])
36 |             )
37 | 
38 |             self.covar_module = ScaleKernel(
39 |                 kernels,
40 |                 outputscale_prior=GammaPrior(gp_options['out_prior1'],
41 |                                              gp_options['out_prior2']))
42 |             try:
43 |                 ls_init = gp_options['ls_prior3']
44 |                 self.covar_module.base_kernel.lengthscale = ls_init
45 |             except:
46 |                 uniform = gp_options['ls_prior3']
47 |                 ls_init = torch.ones(n_features).to(**tkwargs) * uniform
48 |                 self.covar_module.base_kernel.lengthscale = ls_init
49 | 
50 |         def forward(self, x):
51 |             mean_x = self.mean_module(x)
52 |             covar_x = self.covar_module(x)
53 |             return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
54 | 
55 |     # initialize likelihood and model
56 |     likelihood = gpytorch.likelihoods.GaussianLikelihood(
57 |         GammaPrior(gp_options['noise_prior1'], gp_options['noise_prior2'])
58 |     )
59 | 
60 |     likelihood.noise = gp_options['noise_prior3']
61 |     model = ExactGPModel(train_x, train_y, likelihood).to(**tkwargs)
62 | 
63 |     model.likelihood.noise_covar.register_constraint(
64 |         "raw_noise", GreaterThan(gp_options['noise_constraint'])
65 |     )
66 | 
67 |     model.train()
68 |     likelihood.train()
69 |     optimizer = torch.optim.Adam([
70 |         {'params': model.parameters()},
71 |     ], lr=0.1)
72 | 
73 |     # "Loss" for GPs - the marginal log likelihood
74 |     mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
75 | 
76 |     training_iter = 1000
77 |     for i in range(training_iter):
78 |         # Zero gradients from previous iteration
79 |         optimizer.zero_grad()
80 |         # Output from model
81 |         output = model(train_x)
82 |         # Calc loss and backprop gradients
83 |         loss = -mll(output, train_y.squeeze(-1).to(**tkwargs))
84 |         loss.backward()
85 |         optimizer.step()
86 | 
87 |     model.eval()
88 |     likelihood.eval()
89 |     return model, likelihood  # Optimized model
90 | 
91 | 


--------------------------------------------------------------------------------
/edbo/plus/optimizer_botorch.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | import random
  4 | import sys
  5 | import warnings
  6 | 
  7 | from botorch.acquisition.monte_carlo import qExpectedImprovement
  8 | from botorch.acquisition.multi_objective.monte_carlo import \
  9 |     qExpectedHypervolumeImprovement, qNoisyExpectedHypervolumeImprovement
 10 | from botorch.models import SingleTaskGP, ModelListGP
 11 | from botorch.optim import optimize_acqf_discrete
 12 | from botorch.sampling.samplers import SobolQMCNormalSampler, IIDNormalSampler
 13 | from botorch.utils.multi_objective.box_decompositions import \
 14 |     NondominatedPartitioning
 15 | from idaes.surrogate.pysmo.sampling import LatinHypercubeSampling, CVTSampling
 16 | import numpy as np
 17 | from ordered_set import OrderedSet
 18 | import pandas as pd
 19 | from scipy.stats import norm
 20 | from sklearn.preprocessing import MinMaxScaler
 21 | from scipy.spatial.distance import cdist
 22 | import torch
 23 | 
 24 | from .utils import EDBOStandardScaler
 25 | from .model import build_and_optimize_model
 26 | from .scope_generator import create_reaction_scope
 27 | 
 28 | tkwargs = {
 29 |     "dtype": torch.double,
 30 |     "device": torch.device("cpu"),
 31 | }
 32 | 
 33 | 
 34 | class EDBOplus:
 35 | 
 36 |     def __init__(self):
 37 | 
 38 |         self.predicted_mean = []
 39 |         self.predicted_variance = []
 40 | 
 41 |     @staticmethod
 42 |     def generate_reaction_scope(components, directory='./', filename='reaction.csv',
 43 |                                 check_overwrite=True):
 44 |         """
 45 |         Creates a reaction scope from a dictionary of components and values.
 46 |         """
 47 |         print("Generating a reaction scope...")
 48 |         df, n_combinations = create_reaction_scope(components=components, directory=directory,
 49 |                                    filename=filename,
 50 |                                    check_overwrite=check_overwrite)
 51 |         print(f"The scope was generated and contains {n_combinations} possible reactions!")
 52 |         return df
 53 | 
 54 |     @staticmethod
 55 |     def _init_sampling(df, batch, sampling_method, seed):
 56 | 
 57 |         np.random.seed(seed)
 58 |         random.seed(seed)
 59 |         numeric_cols = df._get_numeric_data().columns
 60 |         ohe_columns = list(OrderedSet(df.columns) - OrderedSet(numeric_cols))
 61 |         if len(ohe_columns) > 0:
 62 |             print(f"The following columns are categorical and will be encoded"
 63 |                   f" using One-Hot-Encoding: {ohe_columns}")
 64 |         # Encode OHE.
 65 |         df_sampling = pd.get_dummies(df, prefix=ohe_columns,
 66 |                                      columns=ohe_columns, drop_first=True)
 67 |         
 68 |         class HiddenPrints:
 69 |             def __enter__(self):
 70 |                 self._original_stdout = sys.stdout
 71 |                 sys.stdout = open(os.devnull, 'w')
 72 | 
 73 |             def __exit__(self, exc_type, exc_val, exc_tb):
 74 |                 sys.stdout.close()
 75 |                 sys.stdout = self._original_stdout
 76 | 
 77 |         # Order df according to initial sampling method (random samples).
 78 |         with HiddenPrints():
 79 |             idaes = None
 80 |             if sampling_method == 'random':
 81 |                 samples = df_sampling.sample(n=batch, random_state=seed)
 82 |             elif sampling_method.lower() == 'lhs':
 83 |                 idaes = LatinHypercubeSampling(df_sampling, batch, sampling_type="selection")
 84 |             elif sampling_method.lower() == 'cvt':
 85 |                 idaes = CVTSampling(df_sampling, batch, sampling_type="selection")
 86 | 
 87 |             if idaes is not None:
 88 |                 samples = idaes.sample_points()
 89 |             
 90 |             # Sometimes the LHS or CVT sampling methods return less samples than requested. Add random samples in this case.
 91 |             additional_samples = None
 92 |             if len(samples) < batch:
 93 |                 additional_samples = df.sample(n=batch-len(samples), random_state=seed, replace=True)
 94 |                 additional_samples = additional_samples.reset_index(drop=True)
 95 |             # Add the additional samples to the samples dataframe. If some of the additional_samples are already in samples, generate new ones until the batch size is reached.
 96 |             extra_seed = 1
 97 |             while len(samples) < batch:
 98 |                 samples = pd.concat([samples,additional_samples]).drop_duplicates(ignore_index=True)
 99 |                 additional_samples = df.sample(n=batch-len(samples), random_state=seed+extra_seed, replace=True)
100 |                 extra_seed +=1
101 | 
102 |         # Get index of the best samples according to the random sampling method.
103 |         df_sampling_matrix = df_sampling.to_numpy()
104 |         priority_list = np.zeros_like(df_sampling.index)
105 | 
106 |         for sample in samples.to_numpy():
107 |             d_i = cdist([sample], df_sampling_matrix, metric='cityblock')
108 |             a = np.argmin(d_i)
109 |             priority_list[a] = 1.
110 |         df['priority'] = priority_list
111 | 
112 |         print(f"Generated {len(samples)} initial samples using {sampling_method} sampling (seed = {seed}). Run finished!")
113 | 
114 |         return df
115 |     
116 | 
117 |     def run(self,
118 |             objectives, objective_mode, objective_thresholds=None,
119 |             directory='.', filename='reaction.csv',
120 |             columns_features='all',
121 |             batch=5, init_sampling_method='cvt', seed=0,
122 |             scaler_features=MinMaxScaler(),
123 |             scaler_objectives=EDBOStandardScaler(),
124 |             acquisition_function='NoisyEHVI',
125 |             acquisition_function_sampler='SobolQMCNormalSampler'):
126 | 
127 |         """
128 |         Parameters
129 |         ----------
130 |         objectives: list
131 |             list of string containing the name for each objective.
132 |             Example:
133 |                 objectives = ['yield', 'cost', 'impurity']
134 | 
135 |         objective_mode: list
136 |             list to select whether the objective should be maximized or minimized.
137 |             Examples:
138 |                 A) Example for single-objective optimization:
139 |                     objective_mode = ['max']
140 |                 B) Example for multi-objective optimization:
141 |                     objective_mode = ['max', 'min', 'min']
142 | 
143 |         objective_thresholds: list
144 |             List of worst case values for each objective.
145 |             Example:
146 |                 objective_threshold = [50.0, 10.0, 10.0]
147 | 
148 |         columns_features: list
149 |             List containing the names of the columns to be included in the regression model. By default set to
150 |             'all', which means the algorithm will automatically select all the columns that are not in
151 |             the *objectives* list.
152 | 
153 |         batch: int
154 |             Number of experiments that you want to run in parallel. For instance *batch = 5* means that you
155 |             will run 5 experiments in each EDBO+ run. You can change this number at any stage of the optimization,
156 |             so don't worry if you change  your mind after creating or initializing the reaction scope.
157 | 
158 |         get_predictions: boolean
159 |             If True it will print out a *csv file* with the predictions.
160 |             You can also access the *predicted_mean* and *predicted_variance* through the EDBOPlus class.
161 | 
162 |         directory: string
163 |             name of the directory to save the results of the optimization.
164 | 
165 |         filename: string
166 |             Name of the file to save a *csv* with the priority list. If *get_predictions=True* EDBO+ will automatically
167 |             save a second file including the predictions (*pred_filename.csv*).
168 | 
169 |         init_sampling_method: string:
170 |             Method for selecting the first samples in the scope (in absence)  Choices are:
171 |             - 'random' : Random seed (as implemented in Pandas).
172 |             - 'lhs' : LatinHypercube sampling.
173 |             - 'cvt' : CVT sampling.
174 | 
175 |         scaler_features: sklearn class
176 |             sklearn.preprocessing class for transforming the features.
177 |             Example:
178 |                 sklearn.preprocessing.MinMaxScaler()
179 | 
180 |         scaler_objectives: sklearn class
181 |             sklearn.preprocessing class for transforming the objective values.
182 |             Examples:
183 |                 - sklearn.preprocessing.StandardScaler()
184 |             Default:
185 |                 EDBOStandardScaler()
186 | 
187 |         seed: int
188 |             Seed for the random initialization.
189 | 
190 |         acquisition_function_sampler: string
191 |             Options are: 'SobolQMCNormalSampler' or 'IIDNormalSampler'.
192 | 
193 |         """
194 | 
195 |         wdir = Path(directory)
196 |         csv_filename = wdir.joinpath(filename)
197 |         torch.manual_seed(seed=seed)
198 |         np.random.seed(seed)
199 |         self.acquisition_sampler = acquisition_function_sampler
200 | 
201 |         # 1. Safe checks.
202 |         self.objective_names = objectives
203 |         # Check whether the columns_features contains the objectives.
204 |         if columns_features != 'all':
205 |             for objective in objectives:
206 |                 if objective in columns_features:
207 |                     columns_features.remove(objective)
208 |                 if 'priority' in columns_features:
209 |                     columns_features.remove('priority')
210 | 
211 |         # Check objectives is a list (even for single objective optimization).
212 |         ohe_features = False
213 |         if type(objectives) != list:
214 |             objectives = [objectives]
215 |         if type(objective_mode) != list:
216 |             objective_mode = [objective_mode]
217 | 
218 |         # Check that the user's scope exists.
219 |         msg = "Scope was not found. Please create a scope (csv file)."
220 |         assert os.path.exists(csv_filename), msg
221 | 
222 |         # 2. Load reaction.
223 |         df = pd.read_csv(f"{csv_filename}")
224 |         df = df.dropna(axis='columns', how='all')
225 |         original_df = df.copy(deep=True)  # Make a copy of the original data.
226 | 
227 |         # 2.1. Initialize sampling (only in the first iteration).
228 |         obj_in_df = list(filter(lambda x: x in df.columns.values, objectives))
229 | 
230 |         # TODO CHECK: Check whether new objective has been added – if not add PENDING.
231 |         for obj_i in self.objective_names:
232 |             if obj_i not in original_df.columns.values:
233 |                 original_df[obj_i] = ['PENDING'] * len(original_df.values)
234 | 
235 |         if columns_features != 'all':
236 |             if 'priority' in df.columns.values:
237 |                 for obj_i in objectives:
238 |                     if obj_i not in df.columns.values:
239 |                         df[obj_i] = ['PENDING'] * len(df.values)
240 | 
241 |                 df = df[columns_features + objectives + ['priority']]
242 |             else:
243 |                 if len(obj_in_df) == 0:
244 |                     df = df[columns_features]
245 |                 else:
246 |                     df = df[columns_features + objectives]
247 | 
248 |         # No objectives columns in the scope? Then random initialization.
249 |         if len(obj_in_df) == 0:
250 |             print("There are no experimental observations yet. Random samples will be drawn.")
251 |             df = self._init_sampling(df=df, batch=batch, seed=seed,
252 |                                      sampling_method=init_sampling_method)
253 |             original_df['priority'] = df['priority']
254 |             # Append objectives.
255 |             for objective in objectives:
256 |                 if objective not in original_df.columns.values:
257 |                     original_df[objective] = ['PENDING'] * len(original_df)
258 | 
259 |             # Sort values and save dataframe.
260 |             original_df = original_df.sort_values('priority', ascending=False)
261 |             original_df = original_df.loc[:,~original_df.columns.str.contains('^Unnamed')]
262 |             original_df.to_csv(csv_filename, index=False)
263 |             return original_df
264 | 
265 |         if columns_features == 'all':  # replacing with actual list of all features for printout
266 |             columns_features = list(set(df.columns.tolist())- set(objectives) - set(['priority']))
267 |         print(f"This run will optimize for the following objectives: {objectives}")
268 |         print(f"The following features will be used: {columns_features}")
269 | 
270 |         # 3. Separate train and test data.
271 | 
272 |         # 3.1. Auto-detect dummy features (one-hot-encoding).
273 |         numeric_cols = df._get_numeric_data().columns
274 |         for nc in numeric_cols:
275 |             df[nc] = pd.to_numeric(df[nc], downcast='float')
276 |         ohe_columns = list(OrderedSet(df.columns) - OrderedSet(numeric_cols))
277 |         ohe_columns = list(OrderedSet(ohe_columns) - OrderedSet(objectives))
278 | 
279 |         if len(ohe_columns) > 0:
280 |             print(f"The following columns are categorical and will be encoded"
281 |                   f" using One-Hot-Encoding: {ohe_columns}")
282 |             ohe_features = True
283 | 
284 |         data = pd.get_dummies(df, prefix=ohe_columns, columns=ohe_columns, drop_first=True)
285 | 
286 |         # 3.2. Any sample with a value 'PENDING' in any objective is a test.
287 |         idx_test = (data[data.apply(lambda r: r.astype(str).str.contains('PENDING', case=False).any(), axis=1)]).index.values
288 |         idx_train = (data[~data.apply(lambda r: r.astype(str).str.contains('PENDING', case=False).any(), axis=1)]).index.values
289 | 
290 |         # Data only contains featurized information (train and test).
291 |         df_train_y = data.loc[idx_train][objectives]
292 |         if 'priority' in data.columns.tolist():
293 |             data = data.drop(columns=objectives + ['priority'])
294 |         else:
295 |             data = data.drop(columns=objectives)
296 |         df_train_x = data.loc[idx_train]
297 |         df_test_x = data.loc[idx_test]
298 | 
299 |         if len(df_train_x.values) == 0:
300 |             msg = 'The scope was already generated, please ' \
301 |                   'insert at least one experimental observation ' \
302 |                   'value and then press run.'
303 |             print(msg)
304 |             return original_df
305 | 
306 |         # Run the BO process.
307 |         priority_list = self._model_run(
308 |                 data=data,
309 |                 df_train_x=df_train_x,
310 |                 df_test_x=df_test_x,
311 |                 df_train_y=df_train_y,
312 |                 batch=batch,
313 |                 objective_mode=objective_mode,
314 |                 objective_thresholds=objective_thresholds,
315 |                 seed=seed,
316 |                 scaler_x=scaler_features,
317 |                 scaler_y=scaler_objectives,
318 |                 acquisition_function=acquisition_function
319 |         )
320 | 
321 |         # Low priority to the samples that have been already collected.
322 |         for i in range(0, len(idx_train)):
323 |             priority_list[idx_train[i]] = -1
324 | 
325 |         original_df['priority'] = priority_list
326 | 
327 |         cols_sort = ['priority'] + original_df.columns.values.tolist()
328 |         # Attach objectives predictions and expected improvement.
329 |         cols_for_preds = []
330 |         for idx_obj in range(0, len(objectives)):
331 |             name = objectives[idx_obj]
332 |             mean = self.predicted_mean[:, idx_obj]
333 |             var = self.predicted_variance[:, idx_obj]
334 |             ei = self.ei[:, idx_obj]
335 |             original_df[f"{name}_predicted_mean"] = mean
336 |             original_df[f"{name}_predicted_variance"] = var
337 |             original_df[f"{name}_expected_improvement"] = ei
338 |             cols_for_preds.append([f"{name}_predicted_mean",
339 |                                    f"{name}_predicted_variance",
340 |                                    f"{name}_expected_improvement"
341 |                                    ])
342 |         cols_for_preds = np.ravel(cols_for_preds)
343 | 
344 |         original_df = original_df.sort_values(cols_sort, ascending=False)
345 |         # Save extra df containing predictions, uncertainties and EI.
346 |         original_df.to_csv(f"{directory}/pred_{filename}", index=False)
347 |         # Drop predictions, uncertainties and EI.
348 |         original_df = original_df.drop(columns=cols_for_preds, axis='columns')
349 |         original_df = original_df.sort_values(cols_sort, ascending=False)
350 |         original_df.to_csv(csv_filename, index=False)
351 | 
352 |         print("Run finished!")
353 |         return original_df
354 | 
355 |     def _model_run(self, data, df_train_x,  df_test_x, df_train_y, batch,
356 |                    objective_mode, objective_thresholds, seed,
357 |                    scaler_x, scaler_y, acquisition_function):
358 |         """
359 |         Runs the surrogate machine learning model.
360 |         Returns a priority list for a given scope (top priority to low priority).
361 |         """
362 | 
363 |         # Check number of objectives.
364 |         n_objectives = len(df_train_y.columns.values)
365 | 
366 |         scaler_x.fit(df_train_x.to_numpy())
367 |         init_train = scaler_x.transform(df_train_x.to_numpy())
368 |         test_xnp = scaler_x.transform(df_test_x.to_numpy())
369 |         test_x = torch.tensor(test_xnp.tolist()).double().to(**tkwargs)
370 |         y = df_train_y.astype(float).to_numpy()  # not scaled.
371 | 
372 |         individual_models = []
373 |         for i in range(0, n_objectives):
374 |             if objective_mode[i].lower() == 'min':
375 |                 y[:, i] = -y[:, i]
376 |         y = scaler_y.fit_transform(y)
377 | 
378 |         print("Generating surrogate model...")
379 |         for i in range(0, n_objectives):
380 |             train_x = torch.tensor(init_train).to(**tkwargs).double()
381 |             train_y = np.array(y)[:, i]
382 |             train_y = (np.atleast_2d(train_y).reshape(len(train_y), -1))
383 |             train_y_i = torch.tensor(train_y.tolist()).to(**tkwargs).double()
384 | 
385 |             gp, likelihood = build_and_optimize_model(train_x=train_x, train_y=train_y_i,)
386 | 
387 |             model_i = SingleTaskGP(train_X=train_x, train_Y=train_y_i,
388 |                                    covar_module=gp.covar_module, likelihood=likelihood)
389 |             individual_models.append(model_i)
390 | 
391 |         print("Model generated!")
392 | 
393 |         # Reference point is the minimum seen so far.
394 |         ref_mins = np.min(y, axis=0)
395 |         if objective_thresholds is None:
396 |             ref_point = torch.tensor(ref_mins).double().to(**tkwargs)
397 |         else:
398 |             ref_point = np.zeros(n_objectives)
399 |             for i in range(0, n_objectives):
400 |                 if objective_thresholds[i] is None:
401 |                     ref_point[i] = ref_mins[i]
402 |                 else:
403 |                     ref_point[i] = objective_thresholds[i]
404 |                     if objective_mode[i].lower() == 'min':
405 |                         ref_point[i] = -ref_point[i]
406 |             # Scale.
407 |             ref_point = scaler_y.transform(np.array([ref_point]))
408 |             # Loop again.
409 |             for i in range(0, n_objectives):
410 |                 if objective_thresholds[i] is None:
411 |                     ref_point[0][i] = ref_mins[i]
412 |             ref_point = torch.tensor(ref_point[0]).double().to(**tkwargs)
413 | 
414 |         if len(data.values) > 100000:
415 |             sobol_num_samples = 64
416 |         elif len(data.values) > 50000:
417 |             sobol_num_samples = 128
418 |         elif len(data.values) > 10000:
419 |             sobol_num_samples = 256
420 |         else:
421 |             sobol_num_samples = 512
422 | 
423 |         y_torch = torch.tensor(y).to(**tkwargs).double()
424 | 
425 |         if self.acquisition_sampler == 'IIDNormalSampler':
426 |             sampler = IIDNormalSampler(num_samples=sobol_num_samples, collapse_batch_dims=True, seed=seed)
427 |         if self.acquisition_sampler == 'SobolQMCNormalSampler':
428 |             sampler = SobolQMCNormalSampler(num_samples=sobol_num_samples, collapse_batch_dims=True, seed=seed) 
429 | 
430 |         print ("Optimizing acqusition function...")
431 | 
432 |         surrogate_model = None
433 | 
434 |         if acquisition_function.lower() == 'ehvi':
435 | 
436 |             partitioning = NondominatedPartitioning(
437 |                 ref_point=ref_point,
438 |                 Y=y_torch)
439 |             
440 |             surrogate_model = ModelListGP(*individual_models)
441 |             individual_models = []  # empty to reuduce memory
442 |             
443 |             EHVI = qExpectedHypervolumeImprovement(
444 |                 model=surrogate_model, sampler=sampler,
445 |                 ref_point=ref_point,  # use known reference point
446 |                 partitioning=partitioning
447 |             )
448 | 
449 |             acq_result = optimize_acqf_discrete(
450 |                 acq_function=EHVI,
451 |                 choices=test_x,
452 |                 q=batch,
453 |                 unique=True
454 |             )
455 | 
456 | 
457 |         if acquisition_function.lower() == 'noisyehvi':
458 |             with warnings.catch_warnings():
459 |                 warnings.simplefilter("ignore")
460 |                 acq_fct = None
461 |                 if n_objectives > 1:  # NOTE: NoisyEHVI fails in case of n_objectives = 1 --> added that it uses EI in this case
462 |                     surrogate_model = ModelListGP(*individual_models)
463 |                     train_x = torch.tensor(init_train).to(**tkwargs).double()
464 |                     acq_fct = qNoisyExpectedHypervolumeImprovement(
465 |                         model=surrogate_model, sampler=sampler,
466 |                         ref_point=ref_point,
467 |                         alpha = 0.0,
468 |                         incremental_nehvi=True, X_baseline=train_x, prune_baseline=True
469 |                     )
470 |                 else:
471 |                     surrogate_model = individual_models[0]
472 |                     best_value = y_torch.max()
473 |                     acq_fct = qExpectedImprovement(
474 |                         model = surrogate_model, 
475 |                         best_f = best_value,
476 |                         sampler = sampler
477 |                     )
478 | 
479 |                 acq_result = optimize_acqf_discrete(
480 |                     acq_function=acq_fct,
481 |                     choices=test_x,
482 |                     q=batch,
483 |                     unique=True
484 |                 )
485 | 
486 |         best_samples = scaler_x.inverse_transform(acq_result[0].detach().cpu().numpy())
487 | 
488 |         print('Acquisition function optimized.')
489 | 
490 |         # Save rescaled predictions (only for first fantasy).
491 | 
492 |         # Get predictions in chunks.
493 |         chunk_size = 1000
494 |         n_chunks = len(data.values) // chunk_size
495 | 
496 |         if n_chunks == 0:
497 |             n_chunks = 1
498 | 
499 |         self.predicted_mean = np.zeros(shape=(len(data.values), n_objectives))
500 |         self.predicted_variance = np.zeros(shape=(len(data.values), n_objectives))
501 |         self.ei = np.zeros(shape=(len(data.values), n_objectives))
502 | 
503 |         observed_raw_values = df_train_y.astype(float).to_numpy()
504 | 
505 |         for i in range(0, len(data.values), n_chunks):
506 |             vals = data.values[i:i+n_chunks]
507 |             data_tensor = torch.tensor(scaler_x.transform(vals)).double().to(**tkwargs)
508 |             preds = surrogate_model.posterior(X=data_tensor)
509 |             self.predicted_mean[i:i+n_chunks] = scaler_y.inverse_transform(preds.mean.detach().cpu().numpy())
510 |             self.predicted_variance[i:i+n_chunks] = scaler_y.inverse_transform_var(preds.variance.detach().cpu().numpy())
511 | 
512 |             for j in range(0, len(objective_mode)):
513 |                 maximizing = False
514 |                 if objective_mode[j] == 'max':
515 |                     maximizing = True
516 |                 self.ei[i:i+n_chunks, j] = self.expected_improvement(
517 |                     train_y=observed_raw_values[:, j],
518 |                     mean=self.predicted_mean[i:i+n_chunks, j],
519 |                     variance=self.predicted_variance[i:i+n_chunks, j],
520 |                     maximizing=maximizing
521 |                 )
522 | 
523 |         print('Predictions and expected improvement obtained.')
524 | 
525 |         # Flip predictions if needed.
526 |         for i in range(0, len(objective_mode)):
527 |             if objective_mode[i] == 'min':
528 |                 self.predicted_mean[:, i] = -self.predicted_mean[:, i]
529 | 
530 |         # Rescale samples.
531 |         all_samples = data.values
532 | 
533 |         priority_list = [0] * len(data.values)
534 | 
535 |         # Find best samples in data.
536 |         for sample in best_samples:
537 |             d_i = cdist([sample], all_samples, metric='cityblock')
538 |             a = np.argmin(d_i)
539 |             priority_list[a] = 1.
540 | 
541 |         return priority_list
542 | 
543 |     def expected_improvement(self, train_y, mean, variance,
544 |                              maximizing=False):
545 |         """ expected_improvement
546 |         Expected improvement acquisition function.
547 |         Arguments:
548 |         ----------
549 |             mean: Numpy array.
550 |                 predicted mean of the Gaussian Process.
551 |             variance: Numpy array.
552 |                 predicted variance of the Gaussian Process.
553 |             train_y: Numpy array.
554 |                 Numpy array that contains the values of previously observed train targets.
555 |             maximizing: Boolean.
556 |                 Boolean flag that indicates whether the loss function is to be maximised or minimised.
557 |         """
558 | 
559 |         sigma = variance * 2.
560 | 
561 |         if maximizing:
562 |             loss_optimum = np.max(train_y)
563 |         else:
564 |             loss_optimum = np.min(train_y)
565 | 
566 |         scaling_factor = (-1) ** (not maximizing)
567 | 
568 |         # In case sigma equals zero
569 |         with np.errstate(divide='ignore'):
570 |             Z = scaling_factor * (mean - loss_optimum) / sigma
571 |             expected_improvement = scaling_factor * (mean - loss_optimum) * norm.cdf(Z) + sigma * norm.pdf(Z)
572 |             expected_improvement[sigma == 0.0] = 0.0
573 | 
574 |         return expected_improvement
575 | 
576 | 


--------------------------------------------------------------------------------
/edbo/plus/scope_generator.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import itertools
 3 | import pandas as pd
 4 | import os
 5 | from pathlib import Path
 6 | 
 7 | 
 8 | def create_reaction_scope(components, directory='./', filename='reaction.csv',
 9 |                           check_overwrite=True):
10 | 
11 |     """
12 |     Reaction scope generator. Pass components dictionary, each
13 |     dictionary key contains a list of the choices for a given component.
14 | 
15 |     ----------------------------------------------------------------------
16 |     Example:
17 | 
18 |     components = {'temperature': [30, 40, 50],
19 |                   'solvent': ['THF', 'DMSO'],
20 |                   'concentration': [0.1, 0.2, 0.3, 0.4, 0.5]}
21 |     ----------------------------------------------------------------------
22 | 
23 |     ----------------------------------------------------------------------
24 |     Note:
25 |         - All non-numerical choices are encoded using a One-Hot-Encoder.
26 |     ----------------------------------------------------------------------
27 | 
28 |     ----------------------------------------------------------------------
29 |     Returns:
30 |           A dataframe with name *{label}.csv* including the entire
31 |           set of choices (reaction scope).
32 |     ----------------------------------------------------------------------
33 |     """
34 | 
35 |     msg = "You need to pass a dictionary for components. \n"
36 |     assert type(components) == dict, msg
37 | 
38 |     wdir = Path(directory)
39 |     csv_filename = wdir.joinpath(filename)
40 |     # Ask to overwrite previous scope.
41 | 
42 |     if os.path.exists(csv_filename) and check_overwrite is True:
43 |         overwrite = input('Scope already exists. Overwrite? Y = yes, N = no\n')
44 |         if overwrite.lower() != 'y':
45 |             return
46 | 
47 |     # Predict how large will the scope be.
48 |     n_combinations = 0
49 |     for key in list(components.keys()):
50 |         if n_combinations == 0:
51 |             n_combinations = len(components[key])
52 |         else:
53 |             n_combinations *= len(components[key])
54 | 
55 |     # Generate initial scope.
56 |     keys = components.keys()
57 |     values = (components[key] for key in keys)
58 | 
59 |     scope = [dict(zip(keys, combination)) for combination in
60 |                 itertools.product(*values)]
61 |     df_scope = pd.DataFrame(scope)
62 |     df_scope.to_csv(csv_filename, index=False, mode='w',
63 |                     header=list(keys))
64 | 
65 |     return df_scope, n_combinations
66 | 


--------------------------------------------------------------------------------
/edbo/plus/utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | 
 5 | class EDBOStandardScaler:
 6 |     """
 7 |     Custom standard scaler for EDBO.
 8 |     """
 9 |     def __init__(self):
10 |         pass
11 | 
12 |     def fit(self, x):
13 |         self.mu  = np.mean(x, axis=0)
14 |         self.std = np.std(x, axis=0)
15 | 
16 |     def transform(self, x):
17 |         for obj in range(0, len(self.std)):
18 |             if self.std[obj] == 0.0:
19 |                 self.std[obj] = 1e-6
20 |         return (x-[self.mu])/[self.std]
21 | 
22 |     def fit_transform(self, x):
23 |         self.mu = np.mean(x, axis=0)
24 |         self.std = np.std(x, axis=0)
25 | 
26 |         for obj in range(0, len(self.std)):
27 |             if self.std[obj] == 0.0:
28 |                 self.std[obj] = 1e-6
29 |         return (x-[self.mu])/[self.std]
30 | 
31 |     def inverse_transform(self, x):
32 |         return x * [self.std] + [self.mu]
33 | 
34 |     def inverse_transform_var(self, x):
35 |         return x * [self.std]
36 | 


--------------------------------------------------------------------------------
/examples/publication/BMS_yield_cost/0_data_preprocessing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "bfc9fc54",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import numpy as np\n",
 11 |     "import pandas as pd"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "id": "54a71f91",
 18 |    "metadata": {},
 19 |    "outputs": [
 20 |     {
 21 |      "data": {
 22 |       "text/plain": [
 23 |        "Index(['Ligand_inchi', 'Base_inchi', 'Solvent_inchi', 'Product_inchi',\n",
 24 |        "       'Electrophile_inchi', 'Nucleophile_inchi', 'Precatalyst_inchi', 'Base',\n",
 25 |        "       'Electrophile', 'Electrophile_PCI_Name', 'Ligand', 'Nucleophile',\n",
 26 |        "       'Nucleophile_PCI_Name', 'Precatalyst', 'Product', 'Solvent',\n",
 27 |        "       'Screen_ID', 'umol_Screen', 'Entry', 'Well', 'Row', 'Column',\n",
 28 |        "       'Base_Equiv', 'Electrophile_Equiv', 'Ligand_Equiv', 'Nucleophile_Equiv',\n",
 29 |        "       'Precatalyst_Equiv', 'Concentration', 'Time_h', 'Temp_C', 'SampleName',\n",
 30 |        "       'Vial', 'AP_ISO', 'AP_PDT', 'AP_STD', 'Mean_AP', 'Max_AP', 'SD_AP',\n",
 31 |        "       'Z_Score_AP', 'RelYield_PDT', 'Mean_RY', 'Max_RY', 'SD_RY',\n",
 32 |        "       'Z_Score_RY', 'Yield', 'Mean_Yield', 'Max_Yield', 'SD_Yield',\n",
 33 |        "       'Z_Score_Yield', 'Product_MW', 'Solvent_density', 'Solvent_mass',\n",
 34 |        "       'Product_mg', 'Base_Cost', 'Base_amt', 'Base_MW', 'Base_price.mol',\n",
 35 |        "       'Solvent_Cost', 'Solvent_amt', 'Solvent_MW', 'Solvent_price.mol',\n",
 36 |        "       'Ligand_Cost', 'Ligand_amt', 'Ligand_MW', 'Ligand_price.mol',\n",
 37 |        "       'Ligand_dol', 'Base_dol', 'Solvent_dol', 'reagent_cost',\n",
 38 |        "       'Nucleophile_MW', 'Electrophile_MW', 'Precatalyst_MW', 'Nucleophile_mg',\n",
 39 |        "       'Electrophile_mg', 'Precatalyst_mg', 'Ligand_mg', 'Base_mg', 'Total_mg',\n",
 40 |        "       'PMI', 'solvent mg', 'ligand_dol_will', 'base_dol_will',\n",
 41 |        "       'solvent_dol_will', 'total_cost_update'],\n",
 42 |        "      dtype='object')"
 43 |       ]
 44 |      },
 45 |      "execution_count": 2,
 46 |      "metadata": {},
 47 |      "output_type": "execute_result"
 48 |     }
 49 |    ],
 50 |    "source": [
 51 |     "df = pd.read_csv('./data/PCI_PMI_cost_full_update.csv')\n",
 52 |     "df.columns"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 3,
 58 |    "id": "d4d208a8",
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "name": "stdout",
 63 |      "output_type": "stream",
 64 |      "text": [
 65 |       "Ligand_inchi\n",
 66 |       "Base_inchi\n",
 67 |       "Solvent_inchi\n",
 68 |       "Base\n",
 69 |       "Ligand\n",
 70 |       "Solvent\n",
 71 |       "Screen_ID\n",
 72 |       "Entry\n",
 73 |       "Well\n",
 74 |       "Row\n",
 75 |       "Column\n",
 76 |       "Concentration\n",
 77 |       "Temp_C\n",
 78 |       "SampleName\n",
 79 |       "Vial\n",
 80 |       "AP_ISO\n",
 81 |       "AP_PDT\n",
 82 |       "AP_STD\n",
 83 |       "Mean_AP\n",
 84 |       "Max_AP\n",
 85 |       "SD_AP\n",
 86 |       "Z_Score_AP\n",
 87 |       "RelYield_PDT\n",
 88 |       "Mean_RY\n",
 89 |       "Max_RY\n",
 90 |       "SD_RY\n",
 91 |       "Z_Score_RY\n",
 92 |       "Yield\n",
 93 |       "Mean_Yield\n",
 94 |       "Max_Yield\n",
 95 |       "SD_Yield\n",
 96 |       "Z_Score_Yield\n",
 97 |       "Solvent_density\n",
 98 |       "Solvent_mass\n",
 99 |       "Product_mg\n",
100 |       "Base_Cost\n",
101 |       "Base_amt\n",
102 |       "Base_MW\n",
103 |       "Base_price.mol\n",
104 |       "Solvent_Cost\n",
105 |       "Solvent_amt\n",
106 |       "Solvent_MW\n",
107 |       "Solvent_price.mol\n",
108 |       "Ligand_Cost\n",
109 |       "Ligand_amt\n",
110 |       "Ligand_MW\n",
111 |       "Ligand_price.mol\n",
112 |       "Ligand_dol\n",
113 |       "Base_dol\n",
114 |       "Solvent_dol\n",
115 |       "reagent_cost\n",
116 |       "Ligand_mg\n",
117 |       "Base_mg\n",
118 |       "Total_mg\n",
119 |       "PMI\n",
120 |       "solvent mg\n",
121 |       "ligand_dol_will\n",
122 |       "base_dol_will\n",
123 |       "solvent_dol_will\n",
124 |       "total_cost_update\n"
125 |      ]
126 |     }
127 |    ],
128 |    "source": [
129 |     "for i in range(0, len(df.columns)):\n",
130 |     "    if len(np.unique(df[df.columns[i]])) > 1:\n",
131 |     "        print(df.columns[i])\n",
132 |     "\n",
133 |     "# np.unique(df['Concentration'].values)"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 4,
139 |    "id": "d4a6a824",
140 |    "metadata": {},
141 |    "outputs": [
142 |     {
143 |      "data": {
144 |       "text/html": [
145 |        "<div>\n",
146 |        "<style scoped>\n",
147 |        "    .dataframe tbody tr th:only-of-type {\n",
148 |        "        vertical-align: middle;\n",
149 |        "    }\n",
150 |        "\n",
151 |        "    .dataframe tbody tr th {\n",
152 |        "        vertical-align: top;\n",
153 |        "    }\n",
154 |        "\n",
155 |        "    .dataframe thead th {\n",
156 |        "        text-align: right;\n",
157 |        "    }\n",
158 |        "</style>\n",
159 |        "<table border=\"1\" class=\"dataframe\">\n",
160 |        "  <thead>\n",
161 |        "    <tr style=\"text-align: right;\">\n",
162 |        "      <th></th>\n",
163 |        "      <th>base</th>\n",
164 |        "      <th>ligand</th>\n",
165 |        "      <th>solvent</th>\n",
166 |        "      <th>concentration</th>\n",
167 |        "      <th>temperature</th>\n",
168 |        "      <th>yield</th>\n",
169 |        "      <th>cost</th>\n",
170 |        "      <th>PMI</th>\n",
171 |        "    </tr>\n",
172 |        "  </thead>\n",
173 |        "  <tbody>\n",
174 |        "    <tr>\n",
175 |        "      <th>0</th>\n",
176 |        "      <td>KOAc</td>\n",
177 |        "      <td>BrettPhos</td>\n",
178 |        "      <td>DMAc</td>\n",
179 |        "      <td>0.100</td>\n",
180 |        "      <td>105</td>\n",
181 |        "      <td>5.47</td>\n",
182 |        "      <td>0.145775</td>\n",
183 |        "      <td>917.668323</td>\n",
184 |        "    </tr>\n",
185 |        "    <tr>\n",
186 |        "      <th>1</th>\n",
187 |        "      <td>KOAc</td>\n",
188 |        "      <td>PPhtBu2</td>\n",
189 |        "      <td>DMAc</td>\n",
190 |        "      <td>0.100</td>\n",
191 |        "      <td>105</td>\n",
192 |        "      <td>0.00</td>\n",
193 |        "      <td>0.043201</td>\n",
194 |        "      <td>inf</td>\n",
195 |        "    </tr>\n",
196 |        "    <tr>\n",
197 |        "      <th>2</th>\n",
198 |        "      <td>KOAc</td>\n",
199 |        "      <td>tBPh-CPhos</td>\n",
200 |        "      <td>DMAc</td>\n",
201 |        "      <td>0.100</td>\n",
202 |        "      <td>105</td>\n",
203 |        "      <td>78.95</td>\n",
204 |        "      <td>0.269140</td>\n",
205 |        "      <td>64.469151</td>\n",
206 |        "    </tr>\n",
207 |        "    <tr>\n",
208 |        "      <th>3</th>\n",
209 |        "      <td>KOAc</td>\n",
210 |        "      <td>PCy3 HBF4</td>\n",
211 |        "      <td>DMAc</td>\n",
212 |        "      <td>0.100</td>\n",
213 |        "      <td>105</td>\n",
214 |        "      <td>7.26</td>\n",
215 |        "      <td>0.032181</td>\n",
216 |        "      <td>691.080949</td>\n",
217 |        "    </tr>\n",
218 |        "    <tr>\n",
219 |        "      <th>4</th>\n",
220 |        "      <td>KOAc</td>\n",
221 |        "      <td>PPh3</td>\n",
222 |        "      <td>DMAc</td>\n",
223 |        "      <td>0.100</td>\n",
224 |        "      <td>105</td>\n",
225 |        "      <td>28.15</td>\n",
226 |        "      <td>0.026373</td>\n",
227 |        "      <td>178.881165</td>\n",
228 |        "    </tr>\n",
229 |        "    <tr>\n",
230 |        "      <th>...</th>\n",
231 |        "      <td>...</td>\n",
232 |        "      <td>...</td>\n",
233 |        "      <td>...</td>\n",
234 |        "      <td>...</td>\n",
235 |        "      <td>...</td>\n",
236 |        "      <td>...</td>\n",
237 |        "      <td>...</td>\n",
238 |        "      <td>...</td>\n",
239 |        "    </tr>\n",
240 |        "    <tr>\n",
241 |        "      <th>1723</th>\n",
242 |        "      <td>CsOPiv</td>\n",
243 |        "      <td>PPh2Me</td>\n",
244 |        "      <td>p-Xylene</td>\n",
245 |        "      <td>0.153</td>\n",
246 |        "      <td>120</td>\n",
247 |        "      <td>1.60</td>\n",
248 |        "      <td>0.110653</td>\n",
249 |        "      <td>2091.688946</td>\n",
250 |        "    </tr>\n",
251 |        "    <tr>\n",
252 |        "      <th>1724</th>\n",
253 |        "      <td>CsOPiv</td>\n",
254 |        "      <td>GorlosPhos HBF4</td>\n",
255 |        "      <td>p-Xylene</td>\n",
256 |        "      <td>0.153</td>\n",
257 |        "      <td>120</td>\n",
258 |        "      <td>8.39</td>\n",
259 |        "      <td>0.121732</td>\n",
260 |        "      <td>400.447659</td>\n",
261 |        "    </tr>\n",
262 |        "    <tr>\n",
263 |        "      <th>1725</th>\n",
264 |        "      <td>CsOPiv</td>\n",
265 |        "      <td>JackiePhos</td>\n",
266 |        "      <td>p-Xylene</td>\n",
267 |        "      <td>0.153</td>\n",
268 |        "      <td>120</td>\n",
269 |        "      <td>13.34</td>\n",
270 |        "      <td>0.439356</td>\n",
271 |        "      <td>252.868372</td>\n",
272 |        "    </tr>\n",
273 |        "    <tr>\n",
274 |        "      <th>1726</th>\n",
275 |        "      <td>CsOPiv</td>\n",
276 |        "      <td>CgMe-PPh</td>\n",
277 |        "      <td>p-Xylene</td>\n",
278 |        "      <td>0.153</td>\n",
279 |        "      <td>120</td>\n",
280 |        "      <td>19.13</td>\n",
281 |        "      <td>0.141130</td>\n",
282 |        "      <td>175.981223</td>\n",
283 |        "    </tr>\n",
284 |        "    <tr>\n",
285 |        "      <th>1727</th>\n",
286 |        "      <td>CsOPiv</td>\n",
287 |        "      <td>PPhMe2</td>\n",
288 |        "      <td>p-Xylene</td>\n",
289 |        "      <td>0.153</td>\n",
290 |        "      <td>120</td>\n",
291 |        "      <td>0.00</td>\n",
292 |        "      <td>0.111903</td>\n",
293 |        "      <td>inf</td>\n",
294 |        "    </tr>\n",
295 |        "  </tbody>\n",
296 |        "</table>\n",
297 |        "<p>1728 rows × 8 columns</p>\n",
298 |        "</div>"
299 |       ],
300 |       "text/plain": [
301 |        "        base           ligand   solvent  concentration  temperature  yield  \\\n",
302 |        "0       KOAc        BrettPhos      DMAc          0.100          105   5.47   \n",
303 |        "1       KOAc          PPhtBu2      DMAc          0.100          105   0.00   \n",
304 |        "2       KOAc       tBPh-CPhos      DMAc          0.100          105  78.95   \n",
305 |        "3       KOAc        PCy3 HBF4      DMAc          0.100          105   7.26   \n",
306 |        "4       KOAc             PPh3      DMAc          0.100          105  28.15   \n",
307 |        "...      ...              ...       ...            ...          ...    ...   \n",
308 |        "1723  CsOPiv           PPh2Me  p-Xylene          0.153          120   1.60   \n",
309 |        "1724  CsOPiv  GorlosPhos HBF4  p-Xylene          0.153          120   8.39   \n",
310 |        "1725  CsOPiv       JackiePhos  p-Xylene          0.153          120  13.34   \n",
311 |        "1726  CsOPiv         CgMe-PPh  p-Xylene          0.153          120  19.13   \n",
312 |        "1727  CsOPiv           PPhMe2  p-Xylene          0.153          120   0.00   \n",
313 |        "\n",
314 |        "          cost          PMI  \n",
315 |        "0     0.145775   917.668323  \n",
316 |        "1     0.043201          inf  \n",
317 |        "2     0.269140    64.469151  \n",
318 |        "3     0.032181   691.080949  \n",
319 |        "4     0.026373   178.881165  \n",
320 |        "...        ...          ...  \n",
321 |        "1723  0.110653  2091.688946  \n",
322 |        "1724  0.121732   400.447659  \n",
323 |        "1725  0.439356   252.868372  \n",
324 |        "1726  0.141130   175.981223  \n",
325 |        "1727  0.111903          inf  \n",
326 |        "\n",
327 |        "[1728 rows x 8 columns]"
328 |       ]
329 |      },
330 |      "execution_count": 4,
331 |      "metadata": {},
332 |      "output_type": "execute_result"
333 |     }
334 |    ],
335 |    "source": [
336 |     "df_sel = df[['Base', 'Ligand', 'Solvent', 'Concentration', 'Temp_C', 'Yield', 'total_cost_update', 'PMI']]\n",
337 |     "\n",
338 |     "df_all_exp_index = df_sel.rename(columns={'Base': 'base', 'Solvent': 'solvent',\n",
339 |     "                         'Ligand': 'ligand', 'Concentration': 'concentration',\n",
340 |     "                         'Temp_C': 'temperature', 'Yield': 'yield',\n",
341 |     "                         'total_cost_update': 'cost'\n",
342 |     "                        })\n",
343 |     "\n",
344 |     "# df_all_exp_index.to_csv('./data/experiment_index.csv', index=False)\n",
345 |     "df_all_exp_index"
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "code",
350 |    "execution_count": 5,
351 |    "id": "c81b98e3",
352 |    "metadata": {},
353 |    "outputs": [],
354 |    "source": [
355 |     "df_yield_cost = df_all_exp_index.drop(columns=['PMI'])\n",
356 |     "# df_yield_cost['new_index'] = np.arange(0, len(df_yield_cost))\n",
357 |     "df_yield_cost.to_csv('./data/experiments_yield_and_cost.csv')\n",
358 |     "\n"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": 6,
364 |    "id": "3d0e53be",
365 |    "metadata": {},
366 |    "outputs": [],
367 |    "source": [
368 |     "df = pd.read_csv('./data/experiments_yield_and_cost.csv')\n",
369 |     "df.rename(columns={'Unnamed: 0': 'new_index'}, inplace=True)\n",
370 |     "df.to_csv('./data/experiments_yield_and_cost.csv', index=False)\n"
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "code",
375 |    "execution_count": 18,
376 |    "id": "5469a317",
377 |    "metadata": {},
378 |    "outputs": [],
379 |    "source": []
380 |   },
381 |   {
382 |    "cell_type": "code",
383 |    "execution_count": 12,
384 |    "metadata": {
385 |     "collapsed": false,
386 |     "pycharm": {
387 |      "name": "#%%\n"
388 |     }
389 |    },
390 |    "outputs": [],
391 |    "source": []
392 |   },
393 |   {
394 |    "cell_type": "code",
395 |    "execution_count": 12,
396 |    "metadata": {
397 |     "collapsed": false,
398 |     "pycharm": {
399 |      "name": "#%%\n"
400 |     }
401 |    },
402 |    "outputs": [],
403 |    "source": []
404 |   }
405 |  ],
406 |  "metadata": {
407 |   "kernelspec": {
408 |    "display_name": "Python 3.7.5 ('edboplus')",
409 |    "language": "python",
410 |    "name": "python3"
411 |   },
412 |   "language_info": {
413 |    "codemirror_mode": {
414 |     "name": "ipython",
415 |     "version": 3
416 |    },
417 |    "file_extension": ".py",
418 |    "mimetype": "text/x-python",
419 |    "name": "python",
420 |    "nbconvert_exporter": "python",
421 |    "pygments_lexer": "ipython3",
422 |    "version": "3.7.5"
423 |   },
424 |   "vscode": {
425 |    "interpreter": {
426 |     "hash": "f6b50c482b94d49566f339c9bbaa80fe4f4c53d65f91d29ce8fa084769027490"
427 |    }
428 |   }
429 |  },
430 |  "nbformat": 4,
431 |  "nbformat_minor": 5
432 | }


--------------------------------------------------------------------------------
/examples/publication/BMS_yield_cost/1_preprocess_data.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | import pandas as pd
 4 | 
 5 | df_exp = pd.read_csv('./data/experiments_yield_and_cost.csv')
 6 | 
 7 | 
 8 | # Base features.
 9 | for i in ['base', 'ligand', 'solvent']:
10 |     df_i = pd.read_csv(f"data/{i}_dft.csv")
11 |     df_i.rename(columns={f"{i}_file_name": i}, inplace=True)
12 |     df_exp = pd.merge(df_exp, df_i, on=i)
13 | 
14 | df_edbo = df_exp.copy(deep=True)
15 | # Remove correlated features.
16 | corr_matrix = df_edbo.corr().abs()
17 | # Select upper triangle of correlation matrix
18 | upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
19 | # Find features with correlation greater than 0.95.
20 | to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]
21 | # Drop features
22 | df_edbo.drop(to_drop, axis=1, inplace=True)
23 | 
24 | # Remove columns that have only one or two unique values.
25 | extra_columns_to_remove = []
26 | for column in df_edbo.columns.values:
27 |     if len(np.unique(df_edbo[column].values)) <= 1:
28 |         extra_columns_to_remove.append(column)
29 | df_edbo.drop(extra_columns_to_remove, axis=1, inplace=True)
30 | 
31 | # Remove non numerical.
32 | df_edbo_numeric = df_edbo.select_dtypes(include=np.number)
33 | df_edbo_numeric.to_csv('./data/clean_dft.csv', index=0)
34 | print(df_edbo_numeric)
35 | 


--------------------------------------------------------------------------------
/examples/publication/BMS_yield_cost/2_plot_ground_truth.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | 
 3 | import pandas as pd
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | import seaborn as sns
 7 | sns.set_style("ticks")
 8 | sns.despine()
 9 | import matplotlib as mpl
10 | mpl.rcParams['grid.linestyle'] = ':'
11 | mpl.rcParams['grid.linewidth'] = 0.1
12 | plt.rcParams['font.family'] = 'Helvetica'
13 | plt.rcParams['font.size'] = 10
14 | import pareto
15 | from edbo.plus.benchmark.multiobjective_benchmark import is_pareto
16 | from pymoo.mcdm.high_tradeoff import HighTradeoffPoints
17 | from sklearn.preprocessing import MinMaxScaler
18 | import seaborn as sns
19 | 
20 | 
21 | def get_pareto_points(objective_values):
22 |     """ Get pareto for the ground truth function.
23 |     NOTE: Assumes maximization."""
24 |     pareto_ground = pareto.eps_sort(tables=objective_values,
25 |                                     objectives=np.arange(2),
26 |                                     maximize_all=True)
27 |     idx_pareto = is_pareto(objectives=-objective_values)
28 |     return np.array(pareto_ground), idx_pareto
29 | 
30 | 
31 | def get_high_tradeoff_points(pareto_points):
32 |     """ Pass a numpy array with the pareto points and returns a numpy
33 |         array with the high tradeoff points."""
34 | 
35 |     scaler_pareto = MinMaxScaler()
36 |     pareto_scaled = scaler_pareto.fit_transform(pareto_points)
37 |     try:
38 |         tradeoff = HighTradeoffPoints()
39 | 
40 |         tradeoff_args = tradeoff.do(-pareto_scaled)  # Always minimizing.
41 |         tradeoff_points = pareto_points[tradeoff_args]
42 |     except:
43 |         tradeoff_points = []
44 |         pass
45 |     return tradeoff_points
46 | 
47 | 
48 | df_exp = pd.read_csv('./data/experiments_yield_and_cost.csv')
49 | 
50 | df_exp['cost'] = -df_exp['cost']
51 | objective_vals = df_exp[['yield', 'cost']].values
52 | pareto_points, idx_pareto = get_pareto_points(objective_vals)
53 | high_tradeoff_points = get_high_tradeoff_points(pareto_points)
54 | 
55 | print(np.unique(df_exp['base'].values))
56 | 
57 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 5))
58 | 
59 | 
60 | hues = ['ligand', 'base', 'solvent', 'concentration']
61 | 
62 | sns.scatterplot(x=df_exp['cost'], y=df_exp['yield'],
63 |                 hue=df_exp['ligand'], s=80,
64 |                 lw=0.01, edgecolor='black',
65 |                 ax=ax, palette='Spectral',
66 |                 style=df_exp['solvent'],
67 |                 )
68 | sns.lineplot(x=pareto_points[:, 1], y=pareto_points[:, 0],
69 |              linewidth=2, color='grey', ls='dotted', ax=ax)
70 | if not os.path.exists('results_plots'):
71 |     os.mkdir('results_plots')
72 | plt.savefig(f'./results_plots/dataset.svg', format='svg', dpi=500)
73 | plt.show()
74 | 


--------------------------------------------------------------------------------
/examples/publication/BMS_yield_cost/3_run_edbo_cost_yield_performance.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import shutil
 3 | import pandas as pd
 4 | import numpy as np
 5 | import os
 6 | from edbo.plus.benchmark.multiobjective_benchmark import Benchmark
 7 | import os
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | import seaborn as sns
11 | import pandas as pd
12 | sns.set_style("darkgrid")
13 | sns.set_context("talk")
14 | 
15 | # Benchmark filename
16 | for batch in [1, 2, 3, 5]:
17 |     for acq_i in ['EHVI']:
18 |         for sampling_method in ['seed', 'lhs', 'cvtsampling']:
19 |             budget = 60
20 |             acq = acq_i
21 |             seed = 1
22 | 
23 |             df_exp = pd.read_csv('./data/clean_dft.csv')
24 |             sort_column = 'new_index'
25 | 
26 |             columns_regression = df_exp.columns
27 |             columns_regression = columns_regression.drop([sort_column, 'yield', 'cost']).tolist()
28 |             objectives = ['yield', 'cost']
29 |             objective_modes = ['max', 'min']
30 |             objective_thresholds = [None, None]
31 |             print(f"Columns for regression: {columns_regression}")
32 | 
33 |             label_benchmark = f"benchmark_dft_acq_{acq}_batch_{batch}_seed_{seed}_init_sampling_{sampling_method}.csv"
34 | 
35 |             if not os.path.exists(f"./results/{label_benchmark}"):
36 | 
37 |                 # Remove previous files
38 |                 if os.path.exists(label_benchmark):
39 |                     os.remove(label_benchmark)
40 | 
41 |                 if os.path.exists(f'pred_{label_benchmark}'):
42 |                     os.remove(f'pred_{label_benchmark}')
43 | 
44 |                 if os.path.exists(f'results_{label_benchmark}'):
45 |                     os.remove(f'results_{label_benchmark}')
46 | 
47 |                 bench = Benchmark(df_ground=df_exp,
48 |                                   features_regression=columns_regression,
49 |                                   objective_names=objectives,
50 |                                   objective_modes=objective_modes,
51 |                                   objective_thresholds=objective_thresholds,
52 |                                   filename=label_benchmark,
53 |                                   filename_results=f'results_{label_benchmark}',
54 |                                   index_column=sort_column,
55 |                                   acquisition_function=acq)
56 | 
57 |                 bench.run(steps=int(budget/batch), batch=batch, seed=seed,
58 |                           plot_ground=False,
59 |                           plot_predictions=False, plot_train=False,
60 |                           init_method=sampling_method)
61 | 
62 |                 # Move results.
63 |                 if not os.path.exists('results'):
64 |                     os.mkdir('results')
65 |                 shutil.move(label_benchmark, f'results/{label_benchmark}')
66 |                 shutil.move(f'pred_{label_benchmark}', f'results/pred_{label_benchmark}')
67 |                 shutil.move(f'results_{label_benchmark}', f'results/results_{label_benchmark}')
68 | 
69 | 


--------------------------------------------------------------------------------
/examples/publication/BMS_yield_cost/4_plot_performance_hypervol.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | import pandas as pd
 6 | import os
 7 | 
 8 | 
 9 | # sns.set_style("ticks")
10 | # sns.set_context("paper")
11 | import matplotlib as mpl
12 | mpl.rcParams['grid.linestyle'] = ':'
13 | mpl.rcParams['grid.linewidth'] = 0.1
14 | 
15 | objective_1 = 'conversion'
16 | objective_2 = 'selectivity'
17 | 
18 | plt.rcParams['font.family'] = 'Helvetica'
19 | # mpl.rc('font', **{'family':'sans-serif', 'sans-serif':['HelveticaLight']})
20 | 
21 | # Best objectives.
22 | best_conversion_in_scope = 100.
23 | best_selectivity_in_scope = 100.
24 | n_steps = 60
25 | n_experiments = 60
26 | feat_iter = 0
27 | 
28 | if not os.path.exists('./results_plots'):
29 |     os.mkdir('results_plots')
30 | 
31 | fig, ax = plt.subplots(figsize=(7., 2.5), dpi=500, nrows=1, ncols=3)
32 | 
33 | colors_sampling = ['#DC143C', '#0343DF', '#FAC205', '#15B01A']
34 | 
35 | alphas = [0.4, 0.6, 0.7, 1.0]
36 | i = -1
37 | for sampling_method in ['seed', 'lhs', 'cvtsampling']:
38 | 
39 |     i += 1
40 |     j = -1
41 |     for batch in [1, 2, 3, 5]:
42 |         j += 1
43 |         acq = 'EHVI'
44 | 
45 |         df_i = pd.read_csv(f'./results/results_benchmark_dft_acq_{acq}_batch_{batch}_seed_1_init_sampling_{sampling_method}.csv')
46 |         df_i = df_i[df_i['n_experiments'] <= n_experiments]
47 | 
48 |         # Hypervolume.
49 |         hypervol = df_i['hypervolume completed (%)'].values[:]
50 | 
51 |         # Plot performance for each acquisition function.
52 |         n_exp = df_i['n_experiments'].values[:]
53 | 
54 |         ax[i].plot(n_exp, hypervol, color=colors_sampling[j], lw=2.5,
55 |                       label=f"{batch}",
56 |                    alpha=alphas[j])
57 | 
58 |         ax[i].set_title(f"{sampling_method}")
59 |         ax[i].set_xlabel('Samples')
60 |         ax[i].set_ylabel('Hypervolume (%)')
61 |         ax[i].set_ylim(0, 100)
62 | 
63 | ax[i].legend()
64 | plt.tight_layout()
65 | plt.savefig(f"results_plots/benchmark_hypervol.svg")
66 | 
67 | plt.show()
68 | 
69 | 


--------------------------------------------------------------------------------
/examples/publication/BMS_yield_cost/5_plot_MAE_and_RMSE.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | import seaborn as sns
  5 | import pareto
  6 | from pymoo.mcdm.high_tradeoff import HighTradeoffPoints
  7 | from sklearn.preprocessing import MinMaxScaler
  8 | 
  9 | from edbo.plus.benchmark.multiobjective_benchmark import is_pareto
 10 | 
 11 | sns.set_style("ticks")
 12 | import matplotlib as mpl
 13 | # mpl.rcParams['grid.linestyle'] = ':'
 14 | # mpl.rcParams['grid.linewidth'] = 0.1
 15 | plt.rcParams['font.family'] = 'Helvetica'
 16 | import joypy
 17 | from matplotlib import cm
 18 | 
 19 | ##############
 20 | 
 21 | def get_pareto_points(objective_values):
 22 |     """ Get pareto for the ground truth function.
 23 |     NOTE: Assumes maximization."""
 24 |     pareto_ground = pareto.eps_sort(tables=objective_values,
 25 |                                     objectives=np.arange(2),
 26 |                                     maximize_all=True)
 27 |     idx_pareto = is_pareto(objectives=-objective_values)
 28 |     return np.array(pareto_ground), idx_pareto
 29 | 
 30 | def get_high_tradeoff_points(pareto_points):
 31 |     """ Pass a numpy array with the pareto points and returns a numpy
 32 |         array with the high tradeoff points."""
 33 | 
 34 |     scaler_pareto = MinMaxScaler()
 35 |     pareto_scaled = scaler_pareto.fit_transform(pareto_points)
 36 |     try:
 37 |         tradeoff = HighTradeoffPoints()
 38 | 
 39 |         tradeoff_args = tradeoff.do(-pareto_scaled)  # Always minimizing.
 40 |         tradeoff_points = pareto_points[tradeoff_args]
 41 |     except:
 42 |         tradeoff_points = []
 43 |         pass
 44 |     return tradeoff_points
 45 | 
 46 | 
 47 | df_exp = pd.read_csv('./data/experiments_yield_and_cost.csv')
 48 | df_exp['cost'] = -df_exp['cost']
 49 | 
 50 | objective_vals = df_exp[['yield', 'cost']].values
 51 | pareto_points, idx_pareto = get_pareto_points(objective_vals)
 52 | high_tradeoff_points = get_high_tradeoff_points(pareto_points)
 53 | 
 54 | ######
 55 | 
 56 | 
 57 | samplings = ['seed', 'lhs', 'cvtsampling']
 58 | batch_sizes = [1, 2, 3, 5]
 59 | # colorpalettes = ['Blues', 'Reds', 'Greens', 'Oranges']
 60 | max_number_experiments = 45
 61 | objective_1 = 'yield'
 62 | objective_2 = 'cost'
 63 | 
 64 | colors = ['blue', 'green', 'red']
 65 | 
 66 | df_all = pd.read_csv(f'./results/results_benchmark_dft_acq_EHVI_batch_{batch_sizes[0]}_seed_1_init_sampling_{samplings[0]}.csv')
 67 | for i in batch_sizes:
 68 |     for j in samplings:
 69 |         df_i = pd.read_csv(f'./results/results_benchmark_dft_acq_EHVI_batch_{i}_seed_1_init_sampling_{j}.csv')
 70 |         df_i = df_i[df_i['n_experiments'] <= max_number_experiments]
 71 |         df_all = df_all.append(df_i, ignore_index=True)
 72 | 
 73 | 
 74 | df_all.drop_duplicates(inplace=True)
 75 | 
 76 | df_finish = df_all[(df_all['n_experiments'] < max_number_experiments+2) & (df_all['n_experiments'] > max_number_experiments-2)]
 77 | 
 78 | fig, ax = plt.subplots(nrows=1, ncols=4, figsize=(14, 2.2))
 79 | 
 80 | sns.barplot(data=df_finish, x='init_method', y='MAE_yield',
 81 |             hue='batch', ax=ax[0], palette='Blues',
 82 |             lw=0.7, edgecolor='black', ci=None)
 83 | # ax[0].set_ylim((5, 18))
 84 | 
 85 | sns.barplot(data=df_finish, x='init_method', y='MAE_cost',
 86 |             hue='batch', ax=ax[1], palette='Reds',
 87 |             lw=0.7, edgecolor='black', ci=None)
 88 | # ax[1].set_ylim(0.01)
 89 | 
 90 | 
 91 | sns.barplot(data=df_finish, x='init_method', y='RMSE_yield',
 92 |             hue='batch', ax=ax[2], palette='Blues',
 93 |             lw=0.7, edgecolor='black', ci=None)
 94 | # ax[2].set_ylim(10, 25)
 95 | 
 96 | sns.barplot(data=df_finish, x='init_method', y='RMSE_cost',
 97 |             hue='batch', ax=ax[3], palette='Reds',
 98 |             lw=0.7, edgecolor='black', ci=None)
 99 | # ax[3].set_ylim(0.01, 0.06)
100 | 
101 | 
102 | plt.savefig('./results_plots/fig2c.svg', format='svg', dpi=500)
103 | plt.tight_layout()
104 | plt.show()
105 | 


--------------------------------------------------------------------------------
/examples/publication/BMS_yield_cost/6_distrib_plots.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | sns.set_style("ticks")
 6 | import matplotlib as mpl
 7 | # mpl.rcParams['grid.linestyle'] = ':'
 8 | # mpl.rcParams['grid.linewidth'] = 0.1
 9 | plt.rcParams['font.family'] = 'Helvetica'
10 | import joypy
11 | from matplotlib import cm
12 | 
13 | samplings = ['seed', 'lhs', 'cvtsampling']
14 | objective_1 = 'yield'
15 | objective_2 = 'cost'
16 | max_num_experiments = 46
17 | 
18 | df_0 = pd.read_csv(f'./results/results_benchmark_dft_acq_EHVI_batch_3_seed_1_init_sampling_{samplings[0]}.csv')
19 | df_0['step'] += 1
20 | df_0 = df_0[df_0['n_experiments'] < max_num_experiments]
21 | 
22 | df_1 = pd.read_csv(f'./results/results_benchmark_dft_acq_EHVI_batch_3_seed_1_init_sampling_{samplings[1]}.csv')
23 | df_1['step'] += 1
24 | df_1 = df_1[df_1['n_experiments'] < max_num_experiments]
25 | 
26 | df_2 = pd.read_csv(f'./results/results_benchmark_dft_acq_EHVI_batch_3_seed_1_init_sampling_{samplings[2]}.csv')
27 | df_2['step'] += 1
28 | df_2 = df_2[df_2['n_experiments'] < max_num_experiments]
29 | 
30 | frames = [df_0, df_1, df_2]
31 | colormaps_obj_1 = [cm.Blues] * 3
32 | colormaps_obj_2 = [cm.Reds] * 3
33 | # colormaps_obj_2 = [cm.PuRd] * 3
34 | # colormaps = [cm.autumn_r, cm.autumn_r, cm.cool, cm.summer]
35 | # pal = sns.cubehelix_palette(10, rot=-.25, light=.7, as_cmap=True)
36 | 
37 | for i in range(0, 3):
38 |     df = pd.concat(frames)
39 | 
40 |     plt.figure()
41 |     ax, fig = joypy.joyplot(
42 |         data=eval(f"df_{i}")[['step', f"{objective_1}_collected_values"]],
43 |         by='step',
44 |         linecolor='black',
45 |         linewidth=0.7,
46 |         ylim='own',
47 |         column=['yield_collected_values'],
48 |         colormap=colormaps_obj_1[i],
49 |         legend=False,
50 |         alpha=0.95, #bins=10,
51 |         normalize=False,
52 |         grid=False,
53 |         figsize=(3, 3), #x_range=(0, 100)
54 |         x_range=(0, 100)
55 |     )
56 | 
57 |     plt.savefig(f'./results_plots/subplot_{samplings[i]}_{objective_1}.svg', format='svg', dpi=500)
58 |     plt.show()
59 |     ax, fig = joypy.joyplot(
60 |         data=eval(f"df_{i}")[['step', f"{objective_2}_collected_values"]],
61 |         by='step',
62 |         linecolor='black',
63 |         linewidth=0.7,
64 |         # hist=True,
65 |         ylim='own',
66 |         column=[f'{objective_2}_collected_values'],
67 |         # color=['#686de0'],
68 |         colormap=colormaps_obj_2[i],
69 |         legend=False,
70 |         alpha=0.95, #bins=10,
71 |         normalize=False, grid=False,
72 |         figsize=(3, 3),
73 |         x_range=(0, 0.4)
74 |     )
75 |     plt.savefig(f'./results_plots/subplot_{samplings[i]}_{objective_2}.svg', format='svg', dpi=500)
76 |     plt.show()
77 | 


--------------------------------------------------------------------------------
/examples/publication/BMS_yield_cost/7_plot_scope_expansion.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | 
  3 | import pandas as pd
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | import seaborn as sns
  7 | sns.set_style("ticks")
  8 | sns.despine()
  9 | import matplotlib as mpl
 10 | mpl.rcParams['grid.linestyle'] = ':'
 11 | mpl.rcParams['grid.linewidth'] = 0.1
 12 | plt.rcParams['font.family'] = 'Helvetica'
 13 | plt.rcParams['font.size'] = 10
 14 | import pareto
 15 | from edbo.plus.benchmark.multiobjective_benchmark import is_pareto
 16 | from pymoo.mcdm.high_tradeoff import HighTradeoffPoints
 17 | from sklearn.preprocessing import MinMaxScaler
 18 | import seaborn as sns
 19 | 
 20 | 
 21 | def get_pareto_points(objective_values):
 22 |     """ Get pareto for the ground truth function.
 23 |     NOTE: Assumes maximization."""
 24 |     pareto_ground = pareto.eps_sort(tables=objective_values,
 25 |                                     objectives=np.arange(2),
 26 |                                     maximize_all=True)
 27 |     idx_pareto = is_pareto(objectives=-objective_values)
 28 |     return np.array(pareto_ground), idx_pareto
 29 | 
 30 | 
 31 | def get_high_tradeoff_points(pareto_points):
 32 |     """ Pass a numpy array with the pareto points and returns a numpy
 33 |         array with the high tradeoff points."""
 34 | 
 35 |     scaler_pareto = MinMaxScaler()
 36 |     pareto_scaled = scaler_pareto.fit_transform(pareto_points)
 37 |     try:
 38 |         tradeoff = HighTradeoffPoints()
 39 | 
 40 |         tradeoff_args = tradeoff.do(-pareto_scaled)  # Always minimizing.
 41 |         tradeoff_points = pareto_points[tradeoff_args]
 42 |     except:
 43 |         tradeoff_points = []
 44 |         pass
 45 |     return tradeoff_points
 46 | 
 47 | 
 48 | df_exp = pd.read_csv('./data/experiments_yield_and_cost.csv')
 49 | 
 50 | df_exp['cost'] = -df_exp['cost']
 51 | objective_vals = df_exp[['yield', 'cost']].values
 52 | pareto_points, idx_pareto = get_pareto_points(objective_vals)
 53 | high_tradeoff_points = get_high_tradeoff_points(pareto_points)
 54 | 
 55 | print(np.unique(df_exp['base'].values))
 56 | 
 57 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 5))
 58 | 
 59 | 
 60 | hues = ['ligand', 'base', 'solvent', 'concentration']
 61 | 
 62 | sns.scatterplot(x=df_exp['cost'], y=df_exp['yield'],
 63 |                 hue=df_exp['ligand'], s=80,
 64 |                 lw=0.01, edgecolor='black',
 65 |                 ax=ax, palette='Spectral',
 66 |                 style=df_exp['solvent'],
 67 |                 )
 68 | sns.lineplot(x=pareto_points[:, 1], y=pareto_points[:, 0],
 69 |              linewidth=2, color='grey', ls='dotted', ax=ax)
 70 | ax.set_xlim(-0.5, 0.02)
 71 | ax.set_ylim(-10, 110)
 72 | 
 73 | if not os.path.exists('results_plots'):
 74 |     os.mkdir('results_plots')
 75 | 
 76 | plt.savefig(f'./results_plots/dataset.svg', format='svg', dpi=500)
 77 | # plt.show()
 78 | 
 79 | # Reduced space
 80 | 
 81 | df_exp = pd.read_csv('./data/experiments_yield_and_cost.csv')
 82 | 
 83 | # Removing a ligand.
 84 | df_exp = df_exp[df_exp["ligand"].str.contains("CgMe-PPh")==False]
 85 | df_exp = df_exp[df_exp["ligand"].str.contains("PPh3")==False]
 86 | 
 87 | df_exp['cost'] = -df_exp['cost']
 88 | objective_vals = df_exp[['yield', 'cost']].values
 89 | pareto_points, idx_pareto = get_pareto_points(objective_vals)
 90 | high_tradeoff_points = get_high_tradeoff_points(pareto_points)
 91 | 
 92 | print(np.unique(df_exp['base'].values))
 93 | 
 94 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 5))
 95 | 
 96 | hues = ['ligand', 'base', 'solvent', 'concentration']
 97 | 
 98 | sns.scatterplot(x=df_exp['cost'], y=df_exp['yield'],
 99 |                 hue=df_exp['ligand'], s=80,
100 |                 lw=0.01, edgecolor='black',
101 |                 ax=ax, palette='Spectral',                
102 |                 style=df_exp['solvent'],
103 |                 )
104 | sns.lineplot(x=pareto_points[:, 1], y=pareto_points[:, 0],
105 |              linewidth=2, color='grey', ls='dotted', ax=ax)
106 | ax.set_xlim(-0.5, 0.02)
107 | ax.set_ylim(-10, 110)
108 | 
109 | if not os.path.exists('results_plots'):
110 |     os.mkdir('results_plots')
111 | plt.savefig(f'./results_plots/dataset_reduced.svg', format='svg', dpi=500)
112 | # plt.show()
113 | 
114 | 


--------------------------------------------------------------------------------
/examples/publication/BMS_yield_cost/8_optimization_expanding_scope.py:
--------------------------------------------------------------------------------
  1 | from edbo.plus.optimizer_botorch import EDBOplus
  2 | import pandas as pd
  3 | import numpy as np
  4 | import seaborn as sns
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | df_lookup = pd.read_csv('./data/experiments_yield_and_cost.csv')
  8 | df_large = pd.read_csv('./data/experiments_yield_and_cost.csv')
  9 | 
 10 | condition1 = df_large["ligand"].str.contains("CgMe-PPh")==False
 11 | condition2 = df_large["ligand"].str.contains("PPh3")==False
 12 | df_small = df_large[condition1 & condition2]
 13 | 
 14 | # Refereces for plots.
 15 | ref_best_yield_small_scope = np.max(df_small['yield'])
 16 | ref_best_cost_small_scope = np.min(df_small['cost'])
 17 | 
 18 | ref_best_yield_large_scope = np.max(df_large['yield'])
 19 | ref_best_cost_large_scope = np.min(df_large['cost'])
 20 | 
 21 | df_small.to_csv('./data/small_scope_lookup.csv', index=False)
 22 | df_large.to_csv('./data/large_scope_lookup.csv', index=False)
 23 | 
 24 | df_small.drop(columns=['yield', 'cost'], inplace=True)
 25 | df_large.drop(columns=['yield', 'cost'], inplace=True)
 26 | 
 27 | df_small.to_csv('./small_scope.csv', index=False)
 28 | df_large.to_csv('./large_scope.csv', index=False)
 29 | 
 30 | # Expand scope.
 31 | df_expand = df_large.copy()
 32 | condition1 = df_large["ligand"].str.contains("CgMe-PPh")==True
 33 | condition2 = df_large["ligand"].str.contains("PPh3")==True
 34 | df_expand = df_large[condition1 | condition2]
 35 | df_expand['priority'] = np.zeros(len(df_expand))
 36 | df_expand['yield'] = ['PENDING'] * len(df_expand)
 37 | df_expand['cost'] = ['PENDING'] * len(df_expand)
 38 | 
 39 | print('References:')
 40 | print('Small scope (best yield / best cost):', ref_best_yield_small_scope, ref_best_cost_small_scope)
 41 | print('Large scope (best yield / best cost):',ref_best_yield_large_scope, ref_best_cost_large_scope)
 42 | 
 43 | # Run optimization loops.
 44 | n_rounds_small = 6
 45 | n_round_large = 5
 46 | batch_size = 3
 47 | columns_regression = df_small.drop(columns=['new_index']).columns.tolist()
 48 | 
 49 | n_experiments = 0
 50 | 
 51 | track_results_dict = {
 52 |     'n_experiments': [],
 53 |     'best_yield': [],
 54 |     'best_cost': [],
 55 |     'max_ei_yield': [],
 56 |     'max_ei_cost': [],
 57 |     'max_uncertainty_yield': [],
 58 |     'max_uncertainty_cost': [],
 59 |     'avg_uncertainty_yield': [],
 60 |     'avg_uncertainty_cost': [],    
 61 |     }
 62 | 
 63 | collected_yields = []
 64 | collected_costs = []
 65 | 
 66 | for round in range(0, n_rounds_small):    
 67 |     EDBOplus().run(
 68 |         filename='small_scope.csv',  # Previously generated scope.
 69 |         objectives=['yield', 'cost'],  # Objectives to be optimized.
 70 |         objective_mode=['max', 'min'],  # Maximize yield and ee but minimize side_product.
 71 |         batch=batch_size,  # Number of experiments in parallel that we want to perform in this round.
 72 |         columns_features=columns_regression, # features to be included in the model.
 73 |         init_sampling_method='cvtsampling'  # initialization method.
 74 |     )
 75 |     
 76 |     n_experiments += batch_size
 77 |     # Update with experimental values (observations).
 78 |     df_results = pd.read_csv('small_scope.csv')    
 79 |     arg_lookup = df_results.loc[0:batch_size-1]['new_index'].values    
 80 |     
 81 |     for a in range(len(arg_lookup)):        
 82 |         df_results.at[a,'yield'] = df_lookup.loc[arg_lookup[a]]['yield']
 83 |         df_results.at[a,'cost'] = df_lookup.loc[arg_lookup[a]]['cost']
 84 |         collected_yields.append(df_lookup.loc[arg_lookup[a]]['yield'])
 85 |         collected_costs.append(df_lookup.loc[arg_lookup[a]]['cost'])
 86 |     
 87 |     df_results.to_csv('small_scope.csv', index=False)
 88 |     
 89 |     if round > 0:
 90 |         # Save all predicted values.
 91 |         df_pred = pd.read_csv('pred_small_scope.csv')
 92 |         max_ei_yield = np.max(df_pred['yield_expected_improvement'])
 93 |         max_ei_cost = np.max(df_pred['cost_expected_improvement'])
 94 |         max_uncertainty_yield = np.max((df_pred['yield_predicted_variance']))
 95 |         max_uncertainty_cost = np.max((df_pred['cost_predicted_variance']))
 96 |         avg_uncertainty_yield = np.average((df_pred['yield_predicted_variance']))
 97 |         avg_uncertainty_cost = np.average((df_pred['cost_predicted_variance']))                        
 98 |         best_yield = np.max(collected_yields)
 99 |         best_cost = np.min(collected_costs)        
100 |         track_results_dict['n_experiments'].append(n_experiments)
101 |         track_results_dict['best_yield'].append(best_yield)
102 |         track_results_dict['best_cost'].append(best_cost)    
103 |         track_results_dict['max_ei_yield'].append(max_ei_yield)                        
104 |         track_results_dict['max_ei_cost'].append(max_ei_cost)                        
105 |         track_results_dict['max_uncertainty_yield'].append(max_uncertainty_yield)                                
106 |         track_results_dict['max_uncertainty_cost'].append(max_uncertainty_cost)                                
107 |         track_results_dict['avg_uncertainty_yield'].append(avg_uncertainty_yield)                        
108 |         track_results_dict['avg_uncertainty_cost'].append(avg_uncertainty_cost)                        
109 | 
110 | # Plot before expanding:
111 | fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(7, 7))
112 | 
113 | sns.scatterplot(
114 |     x=np.array(track_results_dict['n_experiments']), 
115 |     y=np.array(track_results_dict['max_ei_yield']), ax=ax[0][0], color='C1', s=100,
116 |     zorder=100
117 |     )
118 | sns.scatterplot(
119 |     x=track_results_dict['n_experiments'], 
120 |     y=track_results_dict['max_ei_cost'], ax=ax[0][1], color='C1', s=100,
121 |     zorder=100
122 |     )
123 | sns.scatterplot(
124 |     x=track_results_dict['n_experiments'], 
125 |     y=track_results_dict['best_yield'], ax=ax[1][0], color='C1',  s=100,
126 |     zorder=100
127 |     )
128 | sns.scatterplot(    
129 |     x=track_results_dict['n_experiments'], 
130 |     y=track_results_dict['best_cost'], ax=ax[1][1], color='C1',s=100,
131 |     zorder=100
132 |     )
133 | 
134 | ax[0][0].set_xlabel('Number of experiments')
135 | ax[0][1].set_xlabel('Number of experiments')
136 | ax[1][0].set_xlabel('Number of experiments')
137 | ax[1][1].set_xlabel('Number of experiments')
138 | ax[0][0].set_ylabel('Max EI (yield)')
139 | ax[0][1].set_ylabel('Max EI (cost)')
140 | ax[1][0].set_ylabel('Highest yield found')
141 | ax[1][1].set_ylabel('Lowest cost found')
142 | 
143 | 
144 | # Expand scope:
145 | df_small = pd.read_csv('small_scope.csv')
146 | df_expand = df_expand.append(df_small)
147 | df_expand.sort_values(by=['priority'], ascending=False, inplace=True)
148 | df_expand.to_csv('expanded_scope.csv', index=False)
149 | 
150 | n_experiments -= batch_size
151 | 
152 | # Keep optimizing after expanding.
153 | for round in range(0, n_round_large):    
154 |     EDBOplus().run(
155 |         filename='expanded_scope.csv',  # Previously generated scope.
156 |         objectives=['yield', 'cost'],  # Objectives to be optimized.
157 |         objective_mode=['max', 'min'],  # Maximize yield and ee but minimize side_product.
158 |         batch=batch_size,  # Number of experiments in parallel that we want to perform in this round.
159 |         columns_features=columns_regression, # features to be included in the model.
160 |         init_sampling_method='cvtsampling'  # initialization method.
161 |     )
162 |     
163 |     n_experiments += batch_size
164 |     # Update with experimental values (observations).
165 |     df_results = pd.read_csv('expanded_scope.csv')    
166 |     arg_lookup = df_results.loc[0:batch_size-1]['new_index'].values    
167 |     
168 |     for a in range(len(arg_lookup)):        
169 |         df_results.at[a,'yield'] = df_lookup.loc[arg_lookup[a]]['yield']
170 |         df_results.at[a,'cost'] = df_lookup.loc[arg_lookup[a]]['cost']
171 |         collected_yields.append(df_lookup.loc[arg_lookup[a]]['yield'])
172 |         collected_costs.append(df_lookup.loc[arg_lookup[a]]['cost'])
173 |     
174 |     df_results.to_csv('expanded_scope.csv', index=False)
175 |     
176 |     if round > 0:
177 |         # Save all predicted values.
178 |         df_pred = pd.read_csv('pred_expanded_scope.csv')
179 |         max_ei_yield = np.max(df_pred['yield_expected_improvement'])
180 |         max_ei_cost = np.max(df_pred['cost_expected_improvement'])
181 |         max_uncertainty_yield = np.max((df_pred['yield_predicted_variance']))
182 |         max_uncertainty_cost = np.max((df_pred['cost_predicted_variance']))
183 |         avg_uncertainty_yield = np.average((df_pred['yield_predicted_variance']))
184 |         avg_uncertainty_cost = np.average((df_pred['cost_predicted_variance']))                        
185 |         best_yield = np.max(collected_yields)
186 |         best_cost = np.min(collected_costs)        
187 |         track_results_dict['n_experiments'].append(n_experiments)
188 |         track_results_dict['best_yield'].append(best_yield)
189 |         track_results_dict['best_cost'].append(best_cost)    
190 |         track_results_dict['max_ei_yield'].append(max_ei_yield)                        
191 |         track_results_dict['max_ei_cost'].append(max_ei_cost)                        
192 |         track_results_dict['max_uncertainty_yield'].append(max_uncertainty_yield)                                
193 |         track_results_dict['avg_uncertainty_yield'].append(avg_uncertainty_yield)                        
194 |         track_results_dict['avg_uncertainty_cost'].append(avg_uncertainty_cost)
195 |         
196 |         
197 | sns.scatterplot(
198 |     x=np.array(track_results_dict['n_experiments']), 
199 |     y=np.array(track_results_dict['max_ei_yield']), ax=ax[0][0], color='C0', s=95,
200 |     zorder=10
201 |     )
202 | sns.scatterplot(
203 |     x=track_results_dict['n_experiments'], 
204 |     y=track_results_dict['max_ei_cost'], ax=ax[0][1], color='C0', s=95,
205 |     )
206 | sns.scatterplot(
207 |     x=track_results_dict['n_experiments'], 
208 |     y=track_results_dict['best_yield'], ax=ax[1][0], color='C0',  s=95,
209 |     zorder=10
210 |     )
211 | sns.scatterplot(    
212 |     x=track_results_dict['n_experiments'], 
213 |     y=track_results_dict['best_cost'], ax=ax[1][1], color='C0',s=95,
214 |     zorder=10
215 |     )
216 | 
217 | plt.tight_layout()
218 | plt.savefig('./results_plots/expand_scope.svg', format='svg')
219 | plt.show()
220 | 
221 |     
222 |     
223 |     
224 |     
225 | 
226 |     
227 | 
228 | 
229 | 


--------------------------------------------------------------------------------
/examples/publication/BMS_yield_cost/9_optimization_constraints.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from edbo.plus.optimizer_botorch import EDBOplus
  3 | import pandas as pd
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | import pareto
  7 | from edbo.plus.benchmark.multiobjective_benchmark import is_pareto
  8 | import torch
  9 | from botorch.utils.multi_objective.hypervolume import Hypervolume
 10 | import copy
 11 | import numpy as np
 12 | import matplotlib.pyplot as plt
 13 | import pandas as pd
 14 | import matplotlib as mpl
 15 | 
 16 | # # Metrics.
 17 | # def get_pareto_points(objective_values):
 18 | #     """ Get pareto for the ground truth function.
 19 | #     NOTE: Assumes maximization."""
 20 | #     pareto_ground = pareto.eps_sort(tables=objective_values,
 21 | #                                     objectives=np.arange(2),
 22 | #                                     maximize_all=True)
 23 | #     idx_pareto = is_pareto(objectives=-objective_values)
 24 | #     return np.array(pareto_ground), idx_pareto
 25 | 
 26 | # def get_hypervolume(pareto_points, ref_mins):
 27 | #     """
 28 | #     Calculate hypervolume.
 29 | #     """     
 30 | #     pareto_torch = torch.Tensor(pareto_points)
 31 | #     hv = Hypervolume(ref_point=torch.Tensor(ref_mins))
 32 | #     hypervolume = hv.compute(pareto_Y=pareto_torch)
 33 | #     return hypervolume
 34 | 
 35 | 
 36 | # # Combinations of constraints tested in this example.
 37 | # # Columns that remain constant after EDBO suggest the best sample using batch=1.
 38 | set_constraints = [
 39 |     ['ligand'],
 40 |     ['ligand', 'base'],
 41 |     ['solvent', 'concentration', 'temperature'],    
 42 | ]
 43 | 
 44 | # df_results = pd.DataFrame(columns=['seed', 'constraints', 
 45 | #                                    'n_exp', 'hypervolume'])
 46 | 
 47 | # for columns_to_constrain in set_constraints:
 48 | #     # Parameters.
 49 | #     batch_size = 5
 50 | #     # columns_to_constrain = ['solvent', 'concentration', 'temperature']  
 51 | #     n_rounds = 7 
 52 | #     n_seeds = 5   
 53 | #     # Load lookup tables.
 54 | #     df_hte = pd.read_csv('./data/experiments_yield_and_cost.csv')
 55 | #     # Get targets for hypervolume indicator.
 56 | #     targets_hte = np.zeros((len(df_hte), 2))
 57 | #     targets_hte[:, 0] = df_hte['yield'].to_numpy()
 58 | #     targets_hte[:, 1] = -df_hte['cost'].to_numpy()
 59 | #     worst_targets = np.min(targets_hte, axis=0)
 60 | #     pareto_ref = get_pareto_points(objective_values=targets_hte)[0]
 61 | #     hypervolume_ref = get_hypervolume(pareto_points=pareto_ref, ref_mins=worst_targets)
 62 | 
 63 | #     # Get columns names for regression and search space.
 64 | #     columns_search_space = df_hte.drop(columns=['yield', 'cost']).columns.tolist()
 65 | #     columns_regression = df_hte.drop(columns=['new_index', 'yield', 'cost']).columns.tolist()
 66 | #     df_full_space = df_hte[columns_search_space]
 67 | 
 68 | #     # Initialize optimization campaing.    
 69 | #     for seed in range(0, n_seeds):
 70 | #         n_exp = 0
 71 | #         df_full_space.to_csv('optimization.csv', index=False)
 72 | #         for round in range(0, n_rounds):
 73 | #             EDBOplus().run(
 74 | #                 filename='optimization.csv',
 75 | #                 seed=seed, 
 76 | #                 objectives=['yield', 'cost'],  
 77 | #                 objective_mode=['max', 'min'],  # Maximize yield but minimize cost.
 78 | #                 batch=1,  
 79 | #                 columns_features=columns_regression, # features to be included in the model.
 80 | #                 init_sampling_method='cvtsampling'  # initialization method.
 81 | #             )
 82 |             
 83 | #             df_opt = pd.read_csv('optimization.csv')
 84 |                     
 85 | #             # Initial optimization to obtain the best sample in the entire search space.
 86 | #             best_suggested_sample = df_opt.loc[0]    
 87 | #             df_reduced_space = df_opt.copy()
 88 | #             for col in columns_to_constrain:
 89 | #                 df_reduced_space = df_reduced_space[df_reduced_space[col] == best_suggested_sample[col]]
 90 | 
 91 | #             df_reduced_space.drop(columns=['yield', 'cost', 'priority'], inplace=True)
 92 | #             df_reduced_space.to_csv('optimization_reduced.csv', index=False)
 93 |             
 94 | #             EDBOplus().run(
 95 | #                 filename='optimization_reduced.csv',  # Previously generated scope.
 96 | #                 objectives=['yield', 'cost'],  # Objectives to be optimized.
 97 | #                 objective_mode=['max', 'min'],  # Maximize yield and ee but minimize side_product.
 98 | #                 batch=batch_size,  
 99 | #                 seed=seed,
100 | #                 columns_features=columns_regression, # features to be included in the model.
101 | #                 init_sampling_method='cvtsampling'  # initialization method.
102 | #             )
103 |             
104 | #             df_opt_reduced = pd.read_csv('optimization_reduced.csv')    
105 |             
106 | #             idx_best_samples = df_opt_reduced['new_index'].values.tolist()[:batch_size]
107 | #             print('Index best samples:', idx_best_samples)
108 | #             df_opt = df_opt.sort_values(by='new_index')    
109 | #             df_opt.reset_index(inplace=True)
110 | #             df_opt.drop(columns=['index'], inplace=True)
111 |             
112 | #             for a in range(len(idx_best_samples)):        
113 | #                 df_opt.at[idx_best_samples[a],'yield'] = df_hte.loc[idx_best_samples[a]]['yield']
114 | #                 df_opt.at[idx_best_samples[a],'cost'] = df_hte.loc[idx_best_samples[a]]['cost']
115 | #                 df_opt.at[idx_best_samples[a],'priority'] = 1
116 |             
117 | #             df_opt = df_opt.sort_values(by='priority', ascending=False)        
118 | #             df_opt.to_csv('optimization.csv', index=False)
119 |             
120 | #             # Monitoring hypervolume.
121 | #             df_train = df_opt[df_opt['yield'] != 'PENDING']
122 | #             df_train['yield'] = copy.deepcopy(pd.to_numeric(df_train['yield']))
123 | #             df_train['cost'] = copy.deepcopy(pd.to_numeric(df_train['cost']))
124 |             
125 | #             targets_train = np.zeros((len(df_train), 2))
126 | #             targets_train[:, 0] = df_train['yield'].to_numpy()
127 | #             targets_train[:, 1] = -df_train['cost'].to_numpy()
128 | #             pareto_train = get_pareto_points(objective_values=targets_train)[0]
129 | #             hypervolume_train = get_hypervolume(pareto_points=pareto_train, 
130 | #                                             ref_mins=worst_targets)
131 | #             hypervolume_explored = (hypervolume_train/hypervolume_ref) * 100
132 |             
133 | #             n_exp += batch_size
134 | #             print(f"Number of samples: {n_exp}")        
135 | #             print(f"Hypervolume: {hypervolume_explored}")
136 |             
137 | #             dict_results = {'seed': seed,
138 | #                             'constraints': columns_to_constrain, 
139 | #                             'n_exp': n_exp, 
140 | #                             'hypervolume': hypervolume_explored}                    
141 | #             df_results = df_results.append(dict_results, ignore_index=True)
142 | #     df_results.to_csv('constraint_optimization_results.csv')
143 | 
144 | 
145 | # Plot results.
146 | df_results = pd.read_csv('constraint_optimization_results.csv')
147 | colors = [ '#0343DF', '#FAC205', '#DC143C']
148 | count = 0
149 | 
150 | mpl.rcParams['grid.linestyle'] = ':'
151 | mpl.rcParams['grid.linewidth'] = 0.1
152 | plt.rcParams['font.family'] = 'Helvetica'
153 | 
154 | fig, ax = plt.subplots(figsize=(4., 4.0), dpi=500, nrows=1, ncols=1)
155 |     
156 | for constraints in set_constraints:    
157 |     # Get subset for constraints.
158 |     constraints = str(constraints)
159 |     df_constraint = df_results[df_results['constraints'] == constraints]    
160 |     
161 |     # Get average, max and min hypervolume explored at each step.
162 |     df_avg = df_constraint.groupby(['n_exp']).agg([np.average])
163 |     df_max = df_constraint.groupby(['n_exp']).agg([np.max])
164 |     df_min = df_constraint.groupby(['n_exp']).agg([np.min])
165 | 
166 |     
167 |     n_exp = np.unique(df_results['n_exp'].values).flatten()
168 |     hypervol_avg = df_avg['hypervolume'].values.flatten()
169 |     hypervol_max = df_max['hypervolume'].values.flatten()
170 |     hypervol_min = df_min['hypervolume'].values.flatten()
171 | 
172 |     color = colors[count]
173 | 
174 |     ax.plot(n_exp, hypervol_avg, color=color, lw=2.5,
175 |             label=str(constraints))
176 |     ax.fill_between(x=n_exp,
177 |                     y1=hypervol_avg,
178 |                     y2=hypervol_max, color=color, alpha=0.3, lw=0.)
179 |     ax.fill_between(x=n_exp,
180 |                     y1=hypervol_min,
181 |                     y2=hypervol_avg, color=color, alpha=0.3, lw=0.)
182 |     ax.plot(n_exp, hypervol_min, color=color, alpha=1., lw=1., ls='--')
183 |     ax.plot(n_exp, hypervol_max, color=color, alpha=1., lw=1., ls='--')
184 |     ax.plot(n_exp, np.ones_like(n_exp)*100,
185 |                 dashes=[8, 4], color='black', linewidth=0.8)
186 |     ax.scatter(n_exp, hypervol_avg, marker='o', s=0., color=color)
187 |     count += 1
188 | 
189 | ax.set_xticks(np.arange(0, 120, 10))
190 | ax.set_xlim(0, np.max(n_exp[:-1]))
191 | ax.set_ylim(0, 100)
192 | ax.set_xlabel('Number of experiments')
193 | ax.set_ylabel('Hypervolume (%)')
194 | plt.legend()
195 | plt.savefig('./results_plots/optimization_constraints.svg', format='svg')
196 | 


--------------------------------------------------------------------------------
/examples/publication/BMS_yield_cost/data/base_dft.csv:
--------------------------------------------------------------------------------
1 | base_file_name,base_SMILES,base_stoichiometry,base_number_of_atoms,base_charge,base_multiplicity,base_convergence_criteria,base_dipole,base_molar_mass,base_molar_volume,base_electronic_spatial_extent,base_homo_energy,base_lumo_energy,base_electronegativity,base_hardness,base_electrophilicity,base_E_scf,base_zero_point_correction,base_E_thermal_correction,base_H_thermal_correction,base_G_thermal_correction,base_E_zpe,base_E,base_H,base_G,base_ES_root_dipole,base_ES_root_molar_volume,base_ES_root_electronic_spatial_extent,base_ES1_transition,base_ES1_osc_strength,base_ES1_<S**2>,base_ES2_transition,base_ES2_osc_strength,base_ES2_<S**2>,base_ES3_transition,base_ES3_osc_strength,base_ES3_<S**2>,base_ES4_transition,base_ES4_osc_strength,base_ES4_<S**2>,base_ES5_transition,base_ES5_osc_strength,base_ES5_<S**2>,base_ES6_transition,base_ES6_osc_strength,base_ES6_<S**2>,base_ES7_transition,base_ES7_osc_strength,base_ES7_<S**2>,base_ES8_transition,base_ES8_osc_strength,base_ES8_<S**2>,base_ES9_transition,base_ES9_osc_strength,base_ES9_<S**2>,base_ES10_transition,base_ES10_osc_strength,base_ES10_<S**2>,base_atom1_atom,base_atom1_Mulliken_charge,base_atom1_APT_charge,base_atom1_NPA_charge,base_atom1_NPA_core,base_atom1_NPA_valence,base_atom1_NPA_Rydberg,base_atom1_NPA_total,base_atom1_NMR_shift,base_atom1_NMR_anisotropy,base_atom1_ES_root_Mulliken_charge,base_atom1_ES_root_NPA_charge,base_atom1_ES_root_NPA_core,base_atom1_ES_root_NPA_valence,base_atom1_ES_root_NPA_Rydberg,base_atom1_ES_root_NPA_total,base_atom2_atom,base_atom2_Mulliken_charge,base_atom2_APT_charge,base_atom2_NPA_charge,base_atom2_NPA_core,base_atom2_NPA_valence,base_atom2_NPA_Rydberg,base_atom2_NPA_total,base_atom2_NMR_shift,base_atom2_NMR_anisotropy,base_atom2_ES_root_Mulliken_charge,base_atom2_ES_root_NPA_charge,base_atom2_ES_root_NPA_core,base_atom2_ES_root_NPA_valence,base_atom2_ES_root_NPA_Rydberg,base_atom2_ES_root_NPA_total,base_atom3_atom,base_atom3_Mulliken_charge,base_atom3_APT_charge,base_atom3_NPA_charge,base_atom3_NPA_core,base_atom3_NPA_valence,base_atom3_NPA_Rydberg,base_atom3_NPA_total,base_atom3_NMR_shift,base_atom3_NMR_anisotropy,base_atom3_ES_root_Mulliken_charge,base_atom3_ES_root_NPA_charge,base_atom3_ES_root_NPA_core,base_atom3_ES_root_NPA_valence,base_atom3_ES_root_NPA_Rydberg,base_atom3_ES_root_NPA_total,base_c_min_atom_number,base_c_min_atom,base_c_min_atom=O,base_c_min_Mulliken_charge,base_c_min_APT_charge,base_c_min_NPA_charge,base_c_min_NPA_core,base_c_min_NPA_valence,base_c_min_NPA_Rydberg,base_c_min_NPA_total,base_c_min_NMR_shift,base_c_min_NMR_anisotropy,base_c_min_ES_root_Mulliken_charge,base_c_min_ES_root_NPA_charge,base_c_min_ES_root_NPA_core,base_c_min_ES_root_NPA_valence,base_c_min_ES_root_NPA_Rydberg,base_c_min_ES_root_NPA_total,base_c_min+1_atom_number,base_c_min+1_atom,base_c_min+1_atom=O,base_c_min+1_Mulliken_charge,base_c_min+1_APT_charge,base_c_min+1_NPA_charge,base_c_min+1_NPA_core,base_c_min+1_NPA_valence,base_c_min+1_NPA_Rydberg,base_c_min+1_NPA_total,base_c_min+1_NMR_shift,base_c_min+1_NMR_anisotropy,base_c_min+1_ES_root_Mulliken_charge,base_c_min+1_ES_root_NPA_charge,base_c_min+1_ES_root_NPA_core,base_c_min+1_ES_root_NPA_valence,base_c_min+1_ES_root_NPA_Rydberg,base_c_min+1_ES_root_NPA_total,base_c_max_atom_number,base_c_max_atom,base_c_max_atom=Cs,base_c_max_atom=K,base_c_max_Mulliken_charge,base_c_max_APT_charge,base_c_max_NPA_charge,base_c_max_NPA_core,base_c_max_NPA_valence,base_c_max_NPA_Rydberg,base_c_max_NPA_total,base_c_max_NMR_shift,base_c_max_NMR_anisotropy,base_c_max_ES_root_Mulliken_charge,base_c_max_ES_root_NPA_charge,base_c_max_ES_root_NPA_core,base_c_max_ES_root_NPA_valence,base_c_max_ES_root_NPA_Rydberg,base_c_max_ES_root_NPA_total,base_c_max-1_atom_number,base_c_max-1_atom,base_c_max-1_atom=C,base_c_max-1_Mulliken_charge,base_c_max-1_APT_charge,base_c_max-1_NPA_charge,base_c_max-1_NPA_core,base_c_max-1_NPA_valence,base_c_max-1_NPA_Rydberg,base_c_max-1_NPA_total,base_c_max-1_NMR_shift,base_c_max-1_NMR_anisotropy,base_c_max-1_ES_root_Mulliken_charge,base_c_max-1_ES_root_NPA_charge,base_c_max-1_ES_root_NPA_core,base_c_max-1_ES_root_NPA_valence,base_c_max-1_ES_root_NPA_Rydberg,base_c_max-1_ES_root_NPA_total,base_vib_1_vibration,base_vib_1_standard_vibration,base_vib_1_correlation,base_vib_1_frequency,base_vib_1_reduced_mass,base_vib_1_frc_const,base_vib_1_IR_intensity,base_vib_1_dip_strength,base_vib_1_rot_strength,base_vib_1_E-M_angle,base_vib_1_standard_frequency,base_vib_1_standard_reduced_mass,base_vib_1_standard_frc_const,base_vib_1_standard_IR_intensity,base_vib_1_standard_dip_strength,base_vib_1_standard_rot_strength,base_vib_1_standard_E-M_angle,base_vib_2_vibration,base_vib_2_standard_vibration,base_vib_2_correlation,base_vib_2_frequency,base_vib_2_reduced_mass,base_vib_2_frc_const,base_vib_2_IR_intensity,base_vib_2_dip_strength,base_vib_2_rot_strength,base_vib_2_E-M_angle,base_vib_2_standard_frequency,base_vib_2_standard_reduced_mass,base_vib_2_standard_frc_const,base_vib_2_standard_IR_intensity,base_vib_2_standard_dip_strength,base_vib_2_standard_rot_strength,base_vib_2_standard_E-M_angle,base_vib_3_vibration,base_vib_3_standard_vibration,base_vib_3_correlation,base_vib_3_frequency,base_vib_3_reduced_mass,base_vib_3_frc_const,base_vib_3_IR_intensity,base_vib_3_dip_strength,base_vib_3_rot_strength,base_vib_3_E-M_angle,base_vib_3_standard_frequency,base_vib_3_standard_reduced_mass,base_vib_3_standard_frc_const,base_vib_3_standard_IR_intensity,base_vib_3_standard_dip_strength,base_vib_3_standard_rot_strength,base_vib_3_standard_E-M_angle,base_vib_4_vibration,base_vib_4_standard_vibration,base_vib_4_correlation,base_vib_4_frequency,base_vib_4_reduced_mass,base_vib_4_frc_const,base_vib_4_IR_intensity,base_vib_4_dip_strength,base_vib_4_rot_strength,base_vib_4_E-M_angle,base_vib_4_standard_frequency,base_vib_4_standard_reduced_mass,base_vib_4_standard_frc_const,base_vib_4_standard_IR_intensity,base_vib_4_standard_dip_strength,base_vib_4_standard_rot_strength,base_vib_4_standard_E-M_angle,base_vib_5_vibration,base_vib_5_standard_vibration,base_vib_5_correlation,base_vib_5_frequency,base_vib_5_reduced_mass,base_vib_5_frc_const,base_vib_5_IR_intensity,base_vib_5_dip_strength,base_vib_5_rot_strength,base_vib_5_E-M_angle,base_vib_5_standard_frequency,base_vib_5_standard_reduced_mass,base_vib_5_standard_frc_const,base_vib_5_standard_IR_intensity,base_vib_5_standard_dip_strength,base_vib_5_standard_rot_strength,base_vib_5_standard_E-M_angle,base_vib_6_vibration,base_vib_6_standard_vibration,base_vib_6_correlation,base_vib_6_frequency,base_vib_6_reduced_mass,base_vib_6_frc_const,base_vib_6_IR_intensity,base_vib_6_dip_strength,base_vib_6_rot_strength,base_vib_6_E-M_angle,base_vib_6_standard_frequency,base_vib_6_standard_reduced_mass,base_vib_6_standard_frc_const,base_vib_6_standard_IR_intensity,base_vib_6_standard_dip_strength,base_vib_6_standard_rot_strength,base_vib_6_standard_E-M_angle,base_vib_7_vibration,base_vib_7_standard_vibration,base_vib_7_correlation,base_vib_7_frequency,base_vib_7_reduced_mass,base_vib_7_frc_const,base_vib_7_IR_intensity,base_vib_7_dip_strength,base_vib_7_rot_strength,base_vib_7_E-M_angle,base_vib_7_standard_frequency,base_vib_7_standard_reduced_mass,base_vib_7_standard_frc_const,base_vib_7_standard_IR_intensity,base_vib_7_standard_dip_strength,base_vib_7_standard_rot_strength,base_vib_7_standard_E-M_angle,base_vib_8_vibration,base_vib_8_standard_vibration,base_vib_8_correlation,base_vib_8_frequency,base_vib_8_reduced_mass,base_vib_8_frc_const,base_vib_8_IR_intensity,base_vib_8_dip_strength,base_vib_8_rot_strength,base_vib_8_E-M_angle,base_vib_8_standard_frequency,base_vib_8_standard_reduced_mass,base_vib_8_standard_frc_const,base_vib_8_standard_IR_intensity,base_vib_8_standard_dip_strength,base_vib_8_standard_rot_strength,base_vib_8_standard_E-M_angle,base_atom1_%VBur,base_atom2_%VBur,base_atom3_%VBur,base_c_min_%VBur,base_c_min+1_%VBur,base_c_max_%VBur,base_c_max-1_%VBur
2 | CsOAc,O=C([O-])C.[Cs+],C2H3CsO2,8,0,1,met,10.1478,191.9499,406.01,876.2473,-0.16646,-0.05377,0.11011499999999999,0.056345,0.056345,-248.406264045,0.049939,0.056406,0.05735,0.015986,-248.356325,-248.349858,-248.348914,-248.390278,8.2546,1380.434,909.9558,498.95,0.0037,0.000,432.49,0.1696,0.000,429.03,0.0000,0.000,324.62,0.0000,0.000,312.85,0.0122,0.000,308.18,0.0425,0.000,299.13,0.0286,0.000,290.79,0.0138,0.000,290.15,0.0014,0.000,281.64,0.0007,0.000,O1,-0.630819,-0.948476,-0.82325,1.99975,6.80840,0.01510,8.82325,37.0240,325.6241,-0.367329,-0.45721,1.99977,6.44316,0.01428,8.45721,O2,-0.627311,-0.929421,-0.81310,1.99975,6.79835,0.01500,8.81310,12.2909,315.3062,-0.370284,-0.45523,1.99977,6.44127,0.01419,8.45523,Cs3,0.845450,0.946019,0.93442,53.99587,0.05739,0.01232,54.06558,77.7746,4.3447,0.102280,0.12092,54.00014,0.75463,0.12431,54.87908,1,O,1,-0.630819,-0.948476,-0.82325,1.99975,6.80840,0.01510,8.82325,37.0240,325.6241,-0.367329,-0.45721,1.99977,6.44316,0.01428,8.45721,3,O,1,-0.627311,-0.929421,-0.81310,1.99975,6.79835,0.01500,8.81310,12.2909,315.3062,-0.370284,-0.45523,1.99977,6.44127,0.01419,8.45523,5,Cs,1,0,0.845450,0.946019,0.93442,53.99587,0.05739,0.01232,54.06558,77.7746,4.3447,0.102280,0.12092,54.00014,0.75463,0.12431,54.87908,2,C,1,0.503244,1.052880,0.79419,1.99945,3.15120,0.05516,5.20581,23.9298,101.6692,0.597363,0.80166,1.99946,3.14820,0.05067,5.19834,6,6,1.,618.7872,2.451,0.5529,7.6617,49.3959,0.0056,89.9964,618.7872,2.451,0.5529,7.6617,49.3959,0.0056,89.9964,7,7,1.,646.437,6.0553,1.4909,18.1353,111.9193,-0.0052,90.08,646.437,6.0553,1.4909,18.1353,111.9193,-0.0052,90.08,8,8,1.0000000000000002,916.3218,7.2493,3.5863,16.7246,72.8142,0.0002,89.9953,916.3218,7.2493,3.5863,16.7246,72.8142,0.0002,89.9953,9,9,1.0000000000000002,1027.1507,1.4579,0.9063,6.8743,26.6993,0.0013,89.997,1027.1507,1.4579,0.9063,6.8743,26.6993,0.0013,89.997,10,10,1.,1063.8484,1.8673,1.2451,6.1805,23.1767,-0.0021,90.0022,1063.8484,1.8673,1.2451,6.1805,23.1767,-0.0021,90.0022,11,11,1.0000000000000002,1377.1441,1.4156,1.5818,25.4311,73.6705,0.0003,89.9992,1377.1441,1.4156,1.5818,25.4311,73.6705,0.0003,89.9992,12,12,1.,1428.8053,5.1995,6.254,180.9276,505.1719,0.0002,89.9995,1428.8053,5.1995,6.254,180.9276,505.1719,0.0002,89.9995,15,15,0.9999999999999999,1677.2563,8.6498,14.3368,430.3053,1023.4925,0.,90.,1677.2563,8.6498,14.3368,430.3053,1023.4925,0.,90.,0.4621717912637346,0.4583096314481602,0.4355558216819856,0.4621717912637346,0.4583096314481602,0.4355558216819856,0.5387263854375617
3 | CsOPiv,O=C([O-])C(C)(C)C.[Cs+],C5H9CsO2,17,0,1,met,11.0112,234.0303,1542.838,1831.5572,-0.17387,-0.05538,0.114625,0.059245,0.059245,-366.348293098,0.135133,0.145329,0.146273,0.096898,-366.21316,-366.202964,-366.20202,-366.251395,8.9444,1218.03,1885.5803,469.32,0.0031,0.000,434.42,0.1679,0.000,409.02,0.0000,0.000,311.54,0.0012,0.000,307.21,0.0401,0.000,301.54,0.0076,0.000,297.70,0.0363,0.000,281.05,0.0009,0.000,280.56,0.0077,0.000,275.55,0.0018,0.000,O1,-0.626594,-0.907646,-0.80886,1.99975,6.79434,0.01477,8.80886,36.4140,345.8849,-0.363461,-0.43354,1.99977,6.41981,0.01396,8.43354,O2,-0.640045,-0.928433,-0.82255,1.99975,6.80799,0.01481,8.82255,56.9329,339.6416,-0.390175,-0.47421,1.99977,6.46010,0.01434,8.47421,Cs3,0.848316,0.958216,0.93820,53.99606,0.05620,0.00954,54.06180,77.7495,5.0811,0.075126,0.10338,53.99940,0.78274,0.11448,54.89662,3,O,1,-0.640045,-0.928433,-0.82255,1.99975,6.80799,0.01481,8.82255,56.9329,339.6416,-0.390175,-0.47421,1.99977,6.46010,0.01434,8.47421,1,O,1,-0.626594,-0.907646,-0.80886,1.99975,6.79434,0.01477,8.80886,36.4140,345.8849,-0.363461,-0.43354,1.99977,6.41981,0.01396,8.43354,8,Cs,1,0,0.848316,0.958216,0.93820,53.99606,0.05620,0.00954,54.06180,77.7495,5.0811,0.075126,0.10338,53.99940,0.78274,0.11448,54.89662,2,C,1,0.513009,0.941821,0.81589,1.99929,3.12499,0.05983,5.18411,16.0679,110.6588,0.611765,0.82753,1.99928,3.11710,0.05610,5.17247,15,6,0.9959074339135122,794.1344,6.9049,2.5656,5.4827,27.5427,0.0024,89.9968,618.7872,2.451,0.5529,7.6617,49.3959,0.0056,89.9964,14,7,0.9816918876799305,588.297,3.5036,0.7144,10.1352,68.7297,0.0015,89.864,646.437,6.0553,1.4909,18.1353,111.9193,-0.0052,90.08,17,8,-0.9966831868163546,888.0265,3.386,1.5732,30.4925,136.9854,0.009,89.8991,916.3218,7.2493,3.5863,16.7246,72.8142,0.0002,89.9953,22,9,0.9443745773600293,1059.5097,1.4077,0.9311,0.7714,2.9047,-0.0667,90.3188,1027.1507,1.4579,0.9063,6.8743,26.6993,0.0013,89.997,23,10,-0.9999999970969979,1243.8906,2.7116,2.4719,4.866,15.6061,-0.0039,90.0381,1063.8484,1.8673,1.2451,6.1805,23.1767,-0.0021,90.0022,29,11,-0.999939389883036,1456.51,1.5233,1.904,45.7368,125.2737,0.0018,89.9899,1377.1441,1.4156,1.5818,25.4311,73.6705,0.0003,89.9992,26,12,0.9995646393653587,1395.7311,2.9236,3.3556,87.7367,250.7766,-0.0087,90.0441,1428.8053,5.1995,6.254,180.9276,505.1719,0.0002,89.9995,36,15,0.9971974621484632,1661.3087,11.2165,18.2392,378.225,908.2539,-0.0008,90.0001,1677.2563,8.6498,14.3368,430.3053,1023.4925,0.,90.,0.5230981108350049,0.539564683692105,0.43534624711834974,0.539564683692105,0.5230981108350049,0.43534624711834974,0.664291488278794
4 | KOAc,O=C([O-])C.[K+],C2H3KO2,8,0,1,met,7.1686,98.1428,545.524,641.7535,-0.19743,-0.04683,0.12212999999999999,0.0753,0.0753,-828.445320274,0.050381,0.056594,0.057539,0.018561,-828.394939,-828.388726,-828.387782,-828.426759,8.5245,606.138,685.0531,363.32,0.0031,0.000,327.15,0.0000,0.000,321.58,0.1037,0.000,261.01,0.0000,0.000,251.07,0.0078,0.000,244.35,0.0107,0.000,241.63,0.0001,0.000,239.66,0.0025,0.000,234.54,0.0124,0.000,232.06,0.0018,0.000,O1,-0.627463,-0.943989,-0.82067,1.99980,6.81192,0.00895,8.82067,30.0644,310.1108,-0.363511,-0.43458,1.99983,6.41968,0.01507,8.43458,O2,-0.627534,-0.927130,-0.81465,1.99980,6.79875,0.01610,8.81465,6.9248,303.5098,-0.370326,-0.43362,1.99983,6.41864,0.01515,8.43362,K3,0.725768,0.862762,0.91455,17.99508,0.06358,0.02679,18.08545,1286.0105,40.8048,0.001831,0.05032,18.00078,0.85275,0.09615,18.94968,1,O,1,-0.627463,-0.943989,-0.82067,1.99980,6.81192,0.00895,8.82067,30.0644,310.1108,-0.363511,-0.43458,1.99983,6.41968,0.01507,8.43458,3,O,1,-0.627534,-0.927130,-0.81465,1.99980,6.79875,0.01610,8.81465,6.9248,303.5098,-0.370326,-0.43362,1.99983,6.41864,0.01515,8.43362,5,K,0,1,0.725768,0.862762,0.91455,17.99508,0.06358,0.02679,18.08545,1286.0105,40.8048,0.001831,0.05032,18.00078,0.85275,0.09615,18.94968,2,C,1,0.582472,1.091107,0.78874,1.99958,3.15451,0.05718,5.21126,19.4377,106.5248,0.659809,0.78899,1.99960,3.15956,0.05185,5.21101,6,6,0.9999890370004872,616.9335,2.5135,0.5637,7.6394,49.4004,-0.0238,90.0144,618.7872,2.451,0.5529,7.6617,49.3959,0.0056,89.9964,7,7,0.9987724746696157,659.7903,6.1973,1.5895,29.1201,176.0738,0.0207,89.6665,646.437,6.0553,1.4909,18.1353,111.9193,-0.0052,90.08,8,8,-0.9998826675540469,925.2324,7.2377,3.6505,12.1374,52.3339,-0.0009,90.0186,916.3218,7.2493,3.5863,16.7246,72.8142,0.0002,89.9953,9,9,0.9999989809690724,1031.7256,1.4605,0.916,7.6401,29.5422,-0.0049,90.009,1027.1507,1.4579,0.9063,6.8743,26.6993,0.0013,89.997,10,10,0.9999999998768896,1069.0842,1.8415,1.2401,5.4846,20.4663,0.0076,89.9904,1063.8484,1.8673,1.2451,6.1805,23.1767,-0.0021,90.0022,11,11,0.9999990327735793,1383.254,1.412,1.5918,21.8732,63.0837,-0.0014,90.0059,1377.1441,1.4156,1.5818,25.4311,73.6705,0.0003,89.9992,12,12,0.9997085947517004,1439.3398,4.9649,6.0602,223.0332,618.178,-0.0014,90.0038,1428.8053,5.1995,6.254,180.9276,505.1719,0.0002,89.9995,15,15,-0.9997468732478542,1644.7032,7.706,12.2816,409.2433,992.662,0.0008,89.9999,1677.2563,8.6498,14.3368,430.3053,1023.4925,0.,90.,0.5728870393102002,0.5692344540582618,0.8437172539744319,0.5728870393102002,0.5692344540582618,0.8437172539744319,0.6196521062243645
5 | KOPiv,O=C([O-])C(C)(C)C.[K+],C5H9KO2,17,0,1,met,7.7731,140.2232,1394.117,1500.593,-0.2019,-0.04847,0.125185,0.076715,0.076715,-946.387800763,0.135595,0.145534,0.146478,0.099461,-946.252206,-946.242267,-946.241322,-946.28834,8.9776,909.207,1563.3278,355.25,0.0024,0.000,330.15,0.1106,0.000,321.67,0.0000,0.000,256.20,0.0000,0.000,248.54,0.0117,0.000,247.09,0.0045,0.000,240.58,0.0065,0.000,236.56,0.0018,0.000,233.63,0.0109,0.000,229.18,0.0018,0.000,O1,-0.640102,-0.902338,-0.81130,1.99979,6.79620,0.01531,8.81130,27.8484,331.9497,-0.379796,-0.42010,1.99983,6.40559,0.01469,8.42010,O2,-0.649551,-0.924868,-0.82499,1.99979,6.81009,0.01510,8.82499,46.0592,327.5974,-0.397948,-0.45369,1.99983,6.43914,0.01472,8.45369,K3,0.731425,0.873667,0.91906,17.99520,0.06186,0.02389,18.08094,1287.9448,39.1010,-0.016351,0.04662,18.00055,0.86198,0.09086,18.95338,3,O,1,-0.649551,-0.924868,-0.82499,1.99979,6.81009,0.01510,8.82499,46.0592,327.5974,-0.397948,-0.45369,1.99983,6.43914,0.01472,8.45369,1,O,1,-0.640102,-0.902338,-0.81130,1.99979,6.79620,0.01531,8.81130,27.8484,331.9497,-0.379796,-0.42010,1.99983,6.40559,0.01469,8.42010,8,K,0,1,0.731425,0.873667,0.91906,17.99520,0.06186,0.02389,18.08094,1287.9448,39.1010,-0.016351,0.04662,18.00055,0.86198,0.09086,18.95338,2,C,1,0.614239,0.982954,0.81254,1.99942,3.12952,0.05852,5.18746,11.3401,114.6019,0.697336,0.81335,1.99941,3.12961,0.05763,5.18665,15,6,0.9985454480831925,791.4536,6.8855,2.5412,5.0505,25.4575,0.0014,89.9982,618.7872,2.451,0.5529,7.6617,49.3959,0.0056,89.9964,14,7,-0.9599107742386958,595.6456,3.6165,0.756,15.9905,107.098,0.0007,89.7512,646.437,6.0553,1.4909,18.1353,111.9193,-0.0052,90.08,17,8,0.9954281640750704,893.1298,3.5163,1.6526,27.9244,124.7315,0.0022,89.9738,916.3218,7.2493,3.5863,16.7246,72.8142,0.0002,89.9953,22,9,-0.9145365457535707,1060.0869,1.4056,0.9307,0.9054,3.4074,-0.0275,90.1271,1027.1507,1.4579,0.9063,6.8743,26.6993,0.0013,89.997,23,10,-0.9999999942993534,1242.9392,2.7008,2.4583,3.9264,12.6023,-0.0019,90.0937,1063.8484,1.8673,1.2451,6.1805,23.1767,-0.0021,90.0022,29,11,0.9999328641835897,1460.0476,1.5532,1.9507,61.3992,167.7656,0.0017,89.9895,1377.1441,1.4156,1.5818,25.4311,73.6705,0.0003,89.9992,29,12,-0.999479732422525,1460.0476,1.5532,1.9507,61.3992,167.7656,0.0017,89.9895,1428.8053,5.1995,6.254,180.9276,505.1719,0.0002,89.9995,36,15,-0.9973077260375155,1625.8494,10.5985,16.5065,357.3476,876.8352,-0.0087,90.0138,1677.2563,8.6498,14.3368,430.3053,1023.4925,0.,90.,0.6338732373282236,0.6495613903775336,0.8435376186341725,0.6495613903775336,0.6338732373282236,0.8435376186341725,0.7457261758629982


--------------------------------------------------------------------------------
/examples/publication/BMS_yield_cost/data/solvent_dft.csv:
--------------------------------------------------------------------------------
1 | solvent_file_name,solvent_SMILES,solvent_stoichiometry,solvent_number_of_atoms,solvent_charge,solvent_multiplicity,solvent_convergence_criteria,solvent_dipole,solvent_molar_mass,solvent_molar_volume,solvent_electronic_spatial_extent,solvent_homo_energy,solvent_lumo_energy,solvent_electronegativity,solvent_hardness,solvent_electrophilicity,solvent_E_scf,solvent_zero_point_correction,solvent_E_thermal_correction,solvent_H_thermal_correction,solvent_G_thermal_correction,solvent_E_zpe,solvent_E,solvent_H,solvent_G,solvent_ES_root_dipole,solvent_ES_root_molar_volume,solvent_ES_root_electronic_spatial_extent,solvent_ES1_transition,solvent_ES1_osc_strength,solvent_ES1_<S**2>,solvent_ES2_transition,solvent_ES2_osc_strength,solvent_ES2_<S**2>,solvent_ES3_transition,solvent_ES3_osc_strength,solvent_ES3_<S**2>,solvent_ES4_transition,solvent_ES4_osc_strength,solvent_ES4_<S**2>,solvent_ES5_transition,solvent_ES5_osc_strength,solvent_ES5_<S**2>,solvent_ES6_transition,solvent_ES6_osc_strength,solvent_ES6_<S**2>,solvent_ES7_transition,solvent_ES7_osc_strength,solvent_ES7_<S**2>,solvent_ES8_transition,solvent_ES8_osc_strength,solvent_ES8_<S**2>,solvent_ES9_transition,solvent_ES9_osc_strength,solvent_ES9_<S**2>,solvent_ES10_transition,solvent_ES10_osc_strength,solvent_ES10_<S**2>,solvent_c_min_atom_number,solvent_c_min_atom,solvent_c_min_atom=N,solvent_c_min_atom=O,solvent_c_min_atom=C,solvent_c_min_Mulliken_charge,solvent_c_min_APT_charge,solvent_c_min_NPA_charge,solvent_c_min_NPA_core,solvent_c_min_NPA_valence,solvent_c_min_NPA_Rydberg,solvent_c_min_NPA_total,solvent_c_min_NMR_shift,solvent_c_min_NMR_anisotropy,solvent_c_min_ES_root_Mulliken_charge,solvent_c_min_ES_root_NPA_charge,solvent_c_min_ES_root_NPA_core,solvent_c_min_ES_root_NPA_valence,solvent_c_min_ES_root_NPA_Rydberg,solvent_c_min_ES_root_NPA_total,solvent_c_min+1_atom_number,solvent_c_min+1_atom,solvent_c_min+1_atom=C,solvent_c_min+1_atom=O,solvent_c_min+1_atom=N,solvent_c_min+1_Mulliken_charge,solvent_c_min+1_APT_charge,solvent_c_min+1_NPA_charge,solvent_c_min+1_NPA_core,solvent_c_min+1_NPA_valence,solvent_c_min+1_NPA_Rydberg,solvent_c_min+1_NPA_total,solvent_c_min+1_NMR_shift,solvent_c_min+1_NMR_anisotropy,solvent_c_min+1_ES_root_Mulliken_charge,solvent_c_min+1_ES_root_NPA_charge,solvent_c_min+1_ES_root_NPA_core,solvent_c_min+1_ES_root_NPA_valence,solvent_c_min+1_ES_root_NPA_Rydberg,solvent_c_min+1_ES_root_NPA_total,solvent_c_max_atom_number,solvent_c_max_atom,solvent_c_max_atom=C,solvent_c_max_atom=H,solvent_c_max_Mulliken_charge,solvent_c_max_APT_charge,solvent_c_max_NPA_charge,solvent_c_max_NPA_core,solvent_c_max_NPA_valence,solvent_c_max_NPA_Rydberg,solvent_c_max_NPA_total,solvent_c_max_NMR_shift,solvent_c_max_NMR_anisotropy,solvent_c_max_ES_root_Mulliken_charge,solvent_c_max_ES_root_NPA_charge,solvent_c_max_ES_root_NPA_core,solvent_c_max_ES_root_NPA_valence,solvent_c_max_ES_root_NPA_Rydberg,solvent_c_max_ES_root_NPA_total,solvent_c_max-1_atom_number,solvent_c_max-1_atom,solvent_c_max-1_atom=H,solvent_c_max-1_Mulliken_charge,solvent_c_max-1_APT_charge,solvent_c_max-1_NPA_charge,solvent_c_max-1_NPA_core,solvent_c_max-1_NPA_valence,solvent_c_max-1_NPA_Rydberg,solvent_c_max-1_NPA_total,solvent_c_max-1_NMR_shift,solvent_c_max-1_NMR_anisotropy,solvent_c_max-1_ES_root_Mulliken_charge,solvent_c_max-1_ES_root_NPA_charge,solvent_c_max-1_ES_root_NPA_core,solvent_c_max-1_ES_root_NPA_valence,solvent_c_max-1_ES_root_NPA_Rydberg,solvent_c_max-1_ES_root_NPA_total,solvent_c_min_%VBur,solvent_c_min+1_%VBur,solvent_c_max_%VBur,solvent_c_max-1_%VBur
2 | BuCN,CCCC#N,C4H7N,12,0,1,met,4.0491,69.106,914.079,571.8195,-0.3186,0.03549,0.141555,0.177045,0.177045,-211.38290967,0.103466,0.109411,0.110356,0.074482,-211.279443,-211.273498,-211.272554,-211.308428,3.124,796.555,571.8803,161.05,0.0001,0.000,154.99,0.0089,0.000,154.72,0.0015,0.000,134.54,0.0145,0.000,132.51,0.0177,0.000,127.49,0.0395,0.000,127.44,0.0007,0.000,125.59,0.0002,0.000,123.78,0.0156,0.000,122.66,0.0268,0.000,5,N,1,0,0,-0.472503,-0.316553,-0.32895,1.99965,5.30799,0.02132,7.32895,0.2592,455.8112,-0.443011,-0.26674,1.99970,5.23124,0.03580,7.26674,1,C,1,0,0,-0.446562,0.081423,-0.68089,1.99946,4.67343,0.00800,6.68089,174.3062,23.2239,-0.452535,-0.68587,1.99946,4.67726,0.00916,6.68587,4,C,1,0,0.347150,0.093010,0.28826,1.99942,3.67740,0.03492,5.71174,83.2576,312.0115,0.357169,0.29020,1.99955,3.66181,0.04844,5.70980,12,H,1,0.190629,-0.001691,0.27199,0.00000,0.72663,0.00138,0.72801,30.2477,6.3175,0.177540,0.26144,0.00000,0.73154,0.00702,0.73856,0.2941528696745606,0.42073590611059547,0.44292087063261576,0.41396964162749617
3 | BuOAc,CCCCOC(C)=O,C6H12O2,20,0,1,met,1.732,116.1596,1374.801,1567.8891,-0.26727,0.01633,0.12547,0.1418,0.1418,-386.334640247,0.176341,0.186348,0.187292,0.139915,-386.158299,-386.148293,-386.147349,-386.194725,1.1576,933.507,1569.2596,213.62,0.0012,0.000,159.76,0.1002,0.000,150.56,0.0020,0.000,142.27,0.0048,0.000,137.73,0.0011,0.000,137.18,0.0008,0.000,134.14,0.1183,0.000,133.21,0.0135,0.000,130.96,0.0016,0.000,128.95,0.0377,0.000,5,O,0,1,0,-0.456115,-0.901813,-0.56061,1.99975,6.54945,0.01141,8.56061,134.3177,142.8309,-0.443534,-0.51973,1.99977,6.50828,0.01167,8.51973,8,O,0,1,0,-0.470030,-0.684614,-0.60235,1.99978,6.58292,0.01964,8.60235,-71.7431,570.4859,-0.303557,-0.28504,1.99981,6.26508,0.02015,8.28504,6,C,1,0,0.600089,1.124279,0.82420,1.99949,3.13174,0.04457,5.17580,29.0632,84.0999,0.462388,0.48792,1.99952,3.47223,0.04032,5.51208,18,H,1,0.180845,0.019608,0.25616,0.00000,0.74297,0.00086,0.74384,30.2399,6.1567,0.160979,0.23070,0.00000,0.76480,0.00451,0.76930,0.5591748750037423,0.43049609293134933,0.5580371845154337,0.3342714289991317
4 | DMAc,CC(N(C)C)=O,C4H9NO,15,0,1,met,3.6595,87.1212,854.473,624.6972,-0.2338,0.03388,0.09996000000000001,0.13384000000000001,0.13384000000000001,-287.830205604,0.131005,0.138714,0.139658,0.099133,-287.699201,-287.691491,-287.690547,-287.731073,2.0521,881.599,624.4849,220.76,0.0009,0.000,178.41,0.2223,0.000,162.71,0.0264,0.000,156.10,0.0121,0.000,147.68,0.0172,0.000,142.30,0.0073,0.000,135.59,0.0005,0.000,132.75,0.0026,0.000,132.39,0.0150,0.000,130.70,0.0003,0.000,6,O,0,1,0,-0.508595,-0.777915,-0.63034,1.99980,6.61277,0.01778,8.63034,-65.8556,569.4492,-0.338968,-0.28910,1.99983,6.27046,0.01882,8.28910,3,N,0,0,1,-0.392215,-0.743329,-0.47855,1.99933,5.46881,0.01041,7.47855,155.0284,111.8179,-0.395470,-0.46828,1.99935,5.45833,0.01061,7.46828,2,C,1,0,0.577639,1.037174,0.69314,1.99938,3.26821,0.03928,5.30686,33.7389,96.6573,0.462343,0.37787,1.99940,3.58626,0.03647,5.62213,10,H,1,0.206649,0.071311,0.26442,0.00000,0.73335,0.00223,0.73558,27.7382,6.7158,0.180961,0.24084,0.00000,0.75824,0.00092,0.75916,0.4078321008353043,0.6535133678632375,0.5860303583725037,0.3953474446872848
5 | p-Xylene,CC1=CC=C(C)C=C1,C8H10,18,0,1,met,0.0011,106.167,1034.845,1072.0641,-0.22568,0.0068,0.10944,0.11624,0.11624,-310.884514007,0.15581,0.163925,0.164869,0.121251,-310.728704,-310.720589,-310.719645,-310.763263,0.0004,862.371,1072.3756,232.87,0.0037,0.000,206.12,0.0838,0.000,177.12,0.3578,0.000,175.96,0.7943,0.000,159.63,0.0002,0.000,159.63,0.0034,0.000,157.51,0.0000,0.000,156.96,0.0015,0.000,154.99,0.0042,0.000,150.37,0.0000,0.000,1,C,0,0,1,-0.529145,0.099551,-0.68665,1.99939,4.68003,0.00723,6.68665,167.9050,34.2593,-0.530333,-0.69452,1.99940,4.68738,0.00774,6.69452,6,C,1,0,0,-0.529145,0.099552,-0.68665,1.99939,4.68003,0.00723,6.68665,167.9049,34.2594,-0.530332,-0.69452,1.99940,4.68738,0.00774,6.69452,9,H,0,1,0.154870,-0.023372,0.23741,0.00000,0.76170,0.00089,0.76259,30.1558,7.2488,0.157494,0.23910,0.00000,0.75928,0.00162,0.76090,14,H,1,0.154871,-0.023375,0.23741,0.00000,0.76170,0.00089,0.76259,30.1556,7.2489,0.157495,0.23911,0.00000,0.75928,0.00162,0.76089,0.4245681267027933,0.4245980659261699,0.3513068471003862,0.3512469686536331


--------------------------------------------------------------------------------
/examples/publication/Crosscoupling/1_run_experiments.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Cross-coupling photoredox.
 3 | import pandas as pd
 4 | from edbo.plus.optimizer_botorch import EDBOplus
 5 | 
 6 | filename = 'edbo_crosscoupling_photoredox_yield_ee.csv'
 7 | 
 8 | df_to_opt = pd.read_csv(filename)
 9 | regression_columns = df_to_opt.columns.drop(['Ligand', 'priority']).values.tolist()
10 | 
11 | opt = EDBOplus()
12 | opt.run(
13 |         filename=filename,
14 |         objectives=['yield', 'ee'],
15 |         objective_mode=['max', 'max'],
16 |         objective_thresholds=[None, None],
17 |         batch=3,
18 |         init_sampling_method='cvtsampling',
19 |         columns_features=regression_columns
20 |         )
21 | 


--------------------------------------------------------------------------------
/examples/publication/Crosscoupling/campaigns/0_recalculate_predictions.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import pandas as pd
 6 | import shutil
 7 | 
 8 | for campaign in ['challenging_campaign_cvt', 'challenging_campaign_random', 'easy_campaign']:
 9 |     for round in range(1, 8):
10 |         df = pd.read_csv(f"{campaign}/edbo_crosscoupling_photoredox_yield_ee_round{round}.csv")
11 |         df.to_csv('optimization.csv', index=False)
12 | 
13 |         from edbo.plus.optimizer_botorch import EDBOplus
14 | 
15 |         filename = 'optimization.csv'
16 | 
17 |         regression_columns = df.columns.drop(['Ligand', 'priority']).values.tolist()
18 | 
19 |         opt = EDBOplus()
20 |         opt.run(
21 |             filename=filename,
22 |             objectives=['yield', 'ee'],
23 |             objective_mode=['max', 'max'],
24 |             objective_thresholds=[None, None],
25 |             batch=3,
26 |             init_sampling_method='cvtsampling',
27 |             columns_features=regression_columns
28 |         )
29 | 
30 |         shutil.copy('pred_optimization.csv', f"{campaign}/predictions_{round}.csv")


--------------------------------------------------------------------------------
/examples/publication/Crosscoupling/campaigns/1_analysis.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import matplotlib.pyplot as plt
  3 | import numpy as np
  4 | import pandas as pd
  5 | import shutil
  6 | import seaborn as sns
  7 | 
  8 | for campaign in ['challenging_campaign_cvt', 'challenging_campaign_random', 'easy_campaign']:
  9 | 
 10 |     fig, ax = plt.subplots(nrows=1, ncols=4, figsize=(12, 4))
 11 | 
 12 |     av_uncertainties_yield = []
 13 |     max_uncertainties_yield = []
 14 |     av_uncertainties_ee = []
 15 |     max_uncertainties_ee = []
 16 | 
 17 |     for round in range(1, 8):
 18 |         df = pd.read_csv(f"{campaign}/predictions_{round}.csv")
 19 | 
 20 | 
 21 |         max_uncertainties_yield.append(df['yield_predicted_variance'].max())
 22 |         max_uncertainties_ee.append(df['ee_predicted_variance'].max())
 23 | 
 24 |         av_uncertainties_yield.append(df['yield_predicted_variance'].mean())
 25 |         av_uncertainties_ee.append(df['ee_predicted_variance'].mean())
 26 | 
 27 |     max_uncertainties_yield = np.sqrt(max_uncertainties_yield)
 28 |     max_uncertainties_ee = np.sqrt(max_uncertainties_ee)
 29 |     av_uncertainties_yield = np.sqrt(av_uncertainties_yield)
 30 |     av_uncertainties_ee = np.sqrt(av_uncertainties_ee)
 31 |     plt.title(f"{campaign}", loc='center')
 32 |     sns.scatterplot(x=np.arange(1, 8), y=av_uncertainties_yield, ax=ax[0], label='average_uncertainty_yield')
 33 |     sns.scatterplot(x=np.arange(1, 8), y=av_uncertainties_ee, ax=ax[0], label='average_uncertainty_ee')
 34 |     plt.title(f"{campaign}", loc='center')
 35 |     sns.scatterplot(x=np.arange(1, 8), y=max_uncertainties_yield, ax=ax[1], label='max_uncertainty_yield')
 36 |     sns.scatterplot(x=np.arange(1, 8), y=max_uncertainties_ee, ax=ax[1], label='max_uncertainty_ee')
 37 | 
 38 |     ax[0].set_xlabel('Round')
 39 |     ax[0].set_ylabel('Uncertainty')
 40 |     ax[1].set_xlabel('Round')
 41 |     ax[1].set_ylabel('Uncertainty')
 42 |     ax[0].set_xticks(np.arange(1, 8))
 43 |     ax[1].set_xticks(np.arange(1, 8))
 44 |     ax[0].set_ylim(0, 15)
 45 |     ax[1].set_ylim(0, 15)
 46 | 
 47 |     ax[0].legend(loc='upper center', bbox_to_anchor=(0.5, -0.2),
 48 |                  fancybox=True, shadow=True)
 49 |     ax[1].legend(loc='upper center', bbox_to_anchor=(0.5, -0.2),
 50 |                  fancybox=True, shadow=True)
 51 |     plt.tight_layout()
 52 | 
 53 | 
 54 |     # Expected improvement.
 55 |     av_eis_yield = []
 56 |     max_eis_yield = []
 57 |     av_eis_ee = []
 58 |     max_eis_ee = []
 59 | 
 60 |     for round in range(1, 8):
 61 |         df = pd.read_csv(f"{campaign}/predictions_{round}.csv")
 62 | 
 63 |         max_eis_yield.append(df['yield_expected_improvement'].max())
 64 | 
 65 |         max_eis_ee.append(df['ee_expected_improvement'].max())
 66 | 
 67 |         av_eis_yield.append(df['yield_expected_improvement'].mean())
 68 |         av_eis_ee.append(df['ee_expected_improvement'].mean())
 69 | 
 70 |     
 71 |     plt.title(f"{campaign}", loc='center')
 72 |     sns.scatterplot(x=np.arange(1, 8), y=av_eis_yield, ax=ax[2], label='average_EI_yield')
 73 |     sns.scatterplot(x=np.arange(1, 8), y=av_eis_ee, ax=ax[2], label='average_EI_ee')
 74 |     plt.title(f"{campaign}", loc='center')
 75 |     sns.scatterplot(x=np.arange(1, 8), y=max_eis_yield, ax=ax[3], label='max_EI_yield')
 76 |     sns.scatterplot(x=np.arange(1, 8), y=max_eis_ee, ax=ax[3], label='max_EI_ee')
 77 | 
 78 |     ax[2].set_xlabel('Round')
 79 |     ax[2].set_ylabel('EI')
 80 |     ax[3].set_xlabel('Round')
 81 |     ax[3].set_ylabel('EI')
 82 |     ax[2].set_xticks(np.arange(1, 8))
 83 |     ax[3].set_xticks(np.arange(1, 8))
 84 |     ax[2].set_ylim(0, 100)
 85 |     ax[3].set_ylim(0, 100)
 86 |     ax[2].legend(loc='upper center', bbox_to_anchor=(0.5, -0.2),
 87 |                  fancybox=True, shadow=True)
 88 |     ax[3].legend(loc='upper center', bbox_to_anchor=(0.5, -0.2),
 89 |                  fancybox=True, shadow=True)
 90 |     plt.tight_layout()
 91 |     plt.savefig(f"./plots/{campaign}.svg", format='svg')
 92 | 
 93 |     # Save results in csv file.
 94 |     df = pd.DataFrame([],
 95 |         columns=['max_uncertainty_yield', 'avg_uncertainty_yield', 'max_EI_yield', 'avg_EI_yield',
 96 |                  'max_uncertainty_ee', 'avg_uncertainty_ee', 'max_EI_ee', 'avg_EI_ee'])
 97 |     df['max_uncertainty_yield'] = max_uncertainties_yield
 98 |     df['max_uncertainty_ee'] = max_uncertainties_ee
 99 |     df['avg_uncertainty_yield'] = av_uncertainties_yield
100 |     df['avg_uncertainty_yield'] = av_uncertainties_ee
101 |     df['max_EI_yield'] = max_eis_yield
102 |     df['max_EI_ee'] = max_eis_ee
103 |     df['avg_EI_yield'] = av_eis_yield
104 |     df['avg_EI_ee'] = av_eis_ee
105 | 
106 |     df.to_csv(f'crosscoupling_results_{campaign}.csv')
107 |     plt.show()
108 | 
109 | 


--------------------------------------------------------------------------------
/examples/publication/Crosscoupling/campaigns/crosscoupling_results_challenging_campaign_cvt.csv:
--------------------------------------------------------------------------------
1 | ,max_uncertainty_yield,avg_uncertainty_yield,max_EI_yield,avg_EI_yield,max_uncertainty_ee,avg_uncertainty_ee,max_EI_ee,avg_EI_ee
2 | 0,6.827581272767426,2.9324467709552584,32.40782689891654,25.602683007714496,3.276152995193164,,7.60101466180589,5.928642147112224
3 | 1,8.210461679254594,5.773323515784001,46.574447681313785,11.421841208309262,9.962907192629936,,74.7088879188402,18.83098803650586
4 | 2,5.128887161645351,3.81806514719726,13.744451293043053,2.709265489045186,5.803851647525387,,21.650273058529116,5.9165346528746054
5 | 3,4.541302003122084,3.2123354371983703,5.162726577974803,0.40422385279901685,5.227188408641633,,16.10033337598427,2.30352295288218
6 | 4,3.859516872139973,2.863183469922444,0.5196149081752943,0.020525537970990253,4.08837032148018,,7.477211599451265,1.2544773840895274
7 | 5,3.795156763474525,2.436198804520339,0.2496501348370021,0.00964674815969636,3.8062975956252423,,6.226783841040771,0.6123865268781998
8 | 6,5.306888155590136,2.6604791781908226,2.778079942380365,0.1453910230805698,6.401147787936088,,12.269211324706786,1.6390130724559002
9 | 


--------------------------------------------------------------------------------
/examples/publication/Suzuki/0_clean_dft.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | 
 7 | df_dft = pd.read_csv('data/dataset_B2.csv')
 8 | 
 9 | # # Remove correlated features.
10 | corr_matrix = df_dft.corr().abs()
11 | # Select upper triangle of correlation matrix.
12 | upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
13 | # Find features with correlation greater than 0.95.
14 | to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]
15 | # Drop features
16 | df_dft.drop(to_drop, axis=1, inplace=True)
17 | 
18 | # Remove columns that have only one or two unique values.
19 | extra_columns_to_remove = []
20 | for column in df_dft.columns.values:
21 |     if len(np.unique(df_dft[column].values)) <= 1:
22 |         extra_columns_to_remove.append(column)
23 | df_dft.drop(extra_columns_to_remove, axis=1, inplace=True)
24 | 
25 | # Store SMILES.
26 | solvent_ohe = df_dft['solvent'].values
27 | base_ohe = df_dft['base'].values
28 | ligand_ohe = df_dft['ligand'].values
29 | 
30 | # Remove non numerical.
31 | df_edbo_numeric = df_dft.select_dtypes(include=np.number)
32 | 
33 | # Add back OHE features.
34 | df_edbo_numeric.insert(1, "solvent", solvent_ohe, False)
35 | df_edbo_numeric.insert(1, "base", base_ohe, False)
36 | df_edbo_numeric.insert(1, "ligand", ligand_ohe, False)
37 | 
38 | df_edbo_numeric.to_csv('./data/dataset_B2_DFT_clean.csv', index=0)
39 | 


--------------------------------------------------------------------------------
/examples/publication/Suzuki/0_clean_mordred.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | 
 7 | df_dft = pd.read_csv('data/dataset_B3.csv')
 8 | 
 9 | # # Remove correlated features.
10 | corr_matrix = df_dft.corr().abs()
11 | # Select upper triangle of correlation matrix.
12 | upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
13 | # Find features with correlation greater than 0.95.
14 | to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]
15 | # Drop features
16 | df_dft.drop(to_drop, axis=1, inplace=True)
17 | 
18 | # Remove columns that have only one or two unique values.
19 | extra_columns_to_remove = []
20 | for column in df_dft.columns.values:
21 |     if len(np.unique(df_dft[column].values)) <= 1:
22 |         extra_columns_to_remove.append(column)
23 | df_dft.drop(extra_columns_to_remove, axis=1, inplace=True)
24 | 
25 | # Store SMILES.
26 | solvent_ohe = df_dft['solvent'].values
27 | base_ohe = df_dft['base'].values
28 | ligand_ohe = df_dft['ligand'].values
29 | 
30 | # Remove non numerical.
31 | df_edbo_numeric = df_dft.select_dtypes(include=np.number)
32 | 
33 | # Add back OHE features.
34 | df_edbo_numeric.insert(1, "solvent", solvent_ohe, False)
35 | df_edbo_numeric.insert(1, "base", base_ohe, False)
36 | df_edbo_numeric.insert(1, "ligand", ligand_ohe, False)
37 | 
38 | df_edbo_numeric.to_csv('./data/dataset_B3_Mordred_clean.csv', index=0)
39 | 


--------------------------------------------------------------------------------
/examples/publication/Suzuki/1_run_ohe.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import shutil
 3 | import pandas as pd
 4 | import numpy as np
 5 | import os
 6 | from edbo.plus.benchmark.multiobjective_benchmark import Benchmark
 7 | import os
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | import seaborn as sns
11 | import pandas as pd
12 | sns.set_style("darkgrid")
13 | sns.set_context("talk")
14 | 
15 | for acq_i in [
16 |     'EHVI',
17 |     'MOUCB',
18 |     'MOGreedy'
19 | ]:
20 |     for seed_i in np.arange(0, 5):
21 |         budget = 30
22 |         acq = acq_i
23 |         batch = 1
24 |         seed = seed_i
25 | 
26 |         df_exp = pd.read_csv('./data/dataset_B1.csv')
27 |         df_exp['new_index'] = np.arange(0, len(df_exp.values))
28 |         sort_column = 'new_index'
29 | 
30 |         # Select the features for the model.
31 |         columns_regression = df_exp.columns
32 |         columns_regression = columns_regression.drop([sort_column, 'objective_conversion', 'objective_selectivity']).tolist()
33 |         objectives = ['objective_conversion', 'objective_selectivity']
34 |         objective_modes = ['max', 'max']
35 |         objective_thresholds = [None, None]
36 |         print(f"Columns for regression: {columns_regression}")
37 |         ######################
38 | 
39 |         label_benchmark = f"benchmark_ohe_acq_{acq}_batch_{batch}_seed_{seed}.csv"
40 | 
41 |         if not os.path.exists(f"./results_ohe/{label_benchmark}"):
42 |             # Remove previous files
43 |             if os.path.exists(label_benchmark):
44 |                 os.remove(label_benchmark)
45 | 
46 |             if os.path.exists(f'pred_{label_benchmark}'):
47 |                 os.remove(f'pred_{label_benchmark}')
48 | 
49 |             if os.path.exists(f'results_{label_benchmark}'):
50 |                 os.remove(f'results_{label_benchmark}')
51 | 
52 |             bench = Benchmark(df_ground=df_exp,
53 |                               features_regression=columns_regression,
54 |                               objective_names=objectives,
55 |                               objective_modes=objective_modes,
56 |                               objective_thresholds=objective_thresholds,
57 |                               filename=label_benchmark,
58 |                               filename_results=f'results_{label_benchmark}',
59 |                               index_column=sort_column,
60 |                               acquisition_function=acq)
61 |             bench.run(steps=int(budget/batch), batch=batch, seed=seed,
62 |                       plot_predictions=False,
63 |                       plot_ground=False,
64 |                       plot_train=False,
65 |                       init_method='seed')
66 | 
67 |             # Move results.
68 |             if not os.path.exists('results_ohe'):
69 |                 os.mkdir('results_ohe')
70 |             shutil.move(label_benchmark, f'results_ohe/{label_benchmark}')
71 |             shutil.move(f'pred_{label_benchmark}', f'results_ohe/pred_{label_benchmark}')
72 |             shutil.move(f'results_{label_benchmark}', f'results_ohe/results_{label_benchmark}')
73 | 
74 | # Clean.
75 | if os.path.exists('results'):
76 |     shutil.rmtree('results')
77 | 


--------------------------------------------------------------------------------
/examples/publication/Suzuki/2_run_dft.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import shutil
 3 | import pandas as pd
 4 | import numpy as np
 5 | import os
 6 | from edbo.plus.benchmark.multiobjective_benchmark import Benchmark
 7 | import os
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | import seaborn as sns
11 | import pandas as pd
12 | sns.set_style("darkgrid")
13 | sns.set_context("talk")
14 | 
15 | 
16 | for batch in [1, 2, 3, 5]:
17 |     for acq_i in [
18 |         'EHVI',
19 |         'MOUCB',
20 |         'MOGreedy'
21 |     ]:
22 |         for seed_i in np.arange(0, 5):
23 |             budget = 30
24 |             acq = acq_i
25 |             seed = seed_i
26 | 
27 |             df_exp = pd.read_csv('./data/dataset_B2_DFT_clean.csv')
28 |             df_exp['new_index'] = np.arange(0, len(df_exp.values))
29 |             sort_column = 'new_index'
30 | 
31 |             # Select the features for the model.
32 |             columns_regression = df_exp.columns
33 |             columns_regression = columns_regression.drop('solvent')
34 |             columns_regression = columns_regression.drop('ligand')
35 | 
36 |             columns_regression = columns_regression.drop([sort_column, 'objective_conversion', 'objective_selectivity']).tolist()
37 |             objectives = ['objective_conversion', 'objective_selectivity']
38 |             objective_modes = ['max', 'max']
39 |             objective_thresholds = [None, None]
40 |             print(f"Columns for regression: {columns_regression}")
41 |             ######################
42 | 
43 |             label_benchmark = f"benchmark_dft_acq_{acq}_batch_{batch}_seed_{seed}.csv"
44 | 
45 |             if not os.path.exists(f"./results_dft/{label_benchmark}"):
46 |                 # Remove previous files
47 |                 if os.path.exists(label_benchmark):
48 |                     os.remove(label_benchmark)
49 | 
50 |                 if os.path.exists(f'pred_{label_benchmark}'):
51 |                     os.remove(f'pred_{label_benchmark}')
52 | 
53 |                 if os.path.exists(f'results_{label_benchmark}'):
54 |                     os.remove(f'results_{label_benchmark}')
55 | 
56 |                 bench = Benchmark(df_ground=df_exp,
57 |                                   features_regression=columns_regression,
58 |                                   objective_names=objectives,
59 |                                   objective_modes=objective_modes,
60 |                                   objective_thresholds=objective_thresholds,
61 |                                   filename=label_benchmark,
62 |                                   filename_results=f'results_{label_benchmark}',
63 |                                   index_column=sort_column,
64 |                                   acquisition_function=acq)
65 |                 bench.run(steps=int(budget/batch), batch=batch, seed=seed,
66 |                           plot_predictions=False,
67 |                           plot_ground=False,
68 |                           plot_train=False)
69 | 
70 |                 # Move results.
71 |                 if not os.path.exists('results_dft'):
72 |                     os.mkdir('results_dft')
73 |                 shutil.move(label_benchmark, f'results_dft/{label_benchmark}')
74 |                 shutil.move(f'pred_{label_benchmark}', f'results_dft/pred_{label_benchmark}')
75 |                 shutil.move(f'results_{label_benchmark}', f'results_dft/results_{label_benchmark}')
76 | 
77 | 


--------------------------------------------------------------------------------
/examples/publication/Suzuki/3_run_mordred.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import shutil
 3 | import pandas as pd
 4 | import numpy as np
 5 | import os
 6 | from edbo.plus.benchmark.multiobjective_benchmark import Benchmark
 7 | import os
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | import seaborn as sns
11 | import pandas as pd
12 | sns.set_style("darkgrid")
13 | sns.set_context("talk")
14 | 
15 | 
16 | for acq_i in [
17 |     'EHVI',
18 |     'MOUCB',
19 |     'MOGreedy'
20 | ]:
21 |     for seed_i in np.arange(0, 5):
22 |         budget = 30
23 |         acq = acq_i
24 |         batch = 1
25 |         seed = seed_i
26 | 
27 |         df_exp = pd.read_csv('./data/dataset_B3_Mordred_clean.csv')
28 |         df_exp['new_index'] = np.arange(0, len(df_exp.values))
29 |         sort_column = 'new_index'
30 | 
31 |         # Select the features for the model.
32 |         columns_regression = df_exp.columns
33 |         columns_regression = columns_regression.drop('solvent')
34 |         columns_regression = columns_regression.drop('ligand')
35 | 
36 |         columns_regression = columns_regression.drop([sort_column, 'objective_conversion', 'objective_selectivity']).tolist()
37 |         objectives = ['objective_conversion', 'objective_selectivity']
38 |         objective_modes = ['max', 'max']
39 |         objective_thresholds = [None, None]
40 |         print(f"Columns for regression: {columns_regression}")
41 |         ######################
42 | 
43 |         label_benchmark = f"benchmark_mordred_acq_{acq}_batch_{batch}_seed_{seed}.csv"
44 | 
45 |         if not os.path.exists(f"./results_mordred/{label_benchmark}"):
46 |             # Remove previous files
47 |             if os.path.exists(label_benchmark):
48 |                 os.remove(label_benchmark)
49 | 
50 |             if os.path.exists(f'pred_{label_benchmark}'):
51 |                 os.remove(f'pred_{label_benchmark}')
52 | 
53 |             if os.path.exists(f'results_{label_benchmark}'):
54 |                 os.remove(f'results_{label_benchmark}')
55 | 
56 |             bench = Benchmark(df_ground=df_exp,
57 |                               features_regression=columns_regression,
58 |                               objective_names=objectives,
59 |                               objective_modes=objective_modes,
60 |                               objective_thresholds=objective_thresholds,
61 |                               filename=label_benchmark,
62 |                               filename_results=f'results_{label_benchmark}',
63 |                               index_column=sort_column,
64 |                               acquisition_function=acq)
65 |             bench.run(steps=int(budget/batch), batch=batch, seed=seed,
66 |                       plot_predictions=False,
67 |                       plot_ground=False,
68 |                       plot_train=False)
69 | 
70 |             # Move results.
71 |             if not os.path.exists('results_mordred'):
72 |                 os.mkdir('results_mordred')
73 |             shutil.move(label_benchmark, f'results_mordred/{label_benchmark}')
74 |             shutil.move(f'pred_{label_benchmark}', f'results_mordred/pred_{label_benchmark}')
75 |             shutil.move(f'results_{label_benchmark}', f'results_mordred/results_{label_benchmark}')
76 | 


--------------------------------------------------------------------------------
/examples/publication/Suzuki/4_random_features.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import shutil
 3 | import pandas as pd
 4 | import numpy as np
 5 | import os
 6 | from edbo.plus.benchmark.multiobjective_benchmark import Benchmark
 7 | import os
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | import seaborn as sns
11 | import pandas as pd
12 | sns.set_style("darkgrid")
13 | sns.set_context("talk")
14 | 
15 | for acq_i in [
16 |     'EHVI',
17 |     'MOUCB',
18 |     'MOGreedy'
19 | ]:
20 |     for seed_i in np.arange(0, 5):
21 |         budget = 30
22 |         acq = acq_i
23 |         batch = 1
24 |         seed = seed_i
25 | 
26 |         df_exp = pd.read_csv('./data/dataset_B1.csv')
27 |         df_exp['new_index'] = np.arange(0, len(df_exp.values))
28 |         sort_column = 'new_index'
29 | 
30 |         # Select the features for the model.
31 |         columns_regression = df_exp.columns
32 |         columns_regression = columns_regression.drop([sort_column, 'objective_conversion', 'objective_selectivity']).tolist()
33 |         objectives = ['objective_conversion', 'objective_selectivity']
34 |         objective_modes = ['max', 'max']
35 |         objective_thresholds = [None, None]
36 |         print(f"Columns for regression: {columns_regression}")
37 |         ######################
38 | 
39 |         label_benchmark = f"benchmark_random_acq_{acq}_batch_{batch}_seed_{seed}.csv"
40 | 
41 |         if not os.path.exists(f"./results_random/{label_benchmark}"):
42 |             # Remove previous files
43 |             if os.path.exists(label_benchmark):
44 |                 os.remove(label_benchmark)
45 | 
46 |             if os.path.exists(f'pred_{label_benchmark}'):
47 |                 os.remove(f'pred_{label_benchmark}')
48 | 
49 |             if os.path.exists(f'results_{label_benchmark}'):
50 |                 os.remove(f'results_{label_benchmark}')
51 | 
52 |             bench = Benchmark(df_ground=df_exp,
53 |                               features_regression=columns_regression,
54 |                               objective_names=objectives,
55 |                               objective_modes=objective_modes,
56 |                               objective_thresholds=objective_thresholds,
57 |                               filename=label_benchmark,
58 |                               filename_results=f'results_{label_benchmark}',
59 |                               index_column=sort_column,
60 |                               acquisition_function=acq)
61 |             bench.run(steps=int(budget/batch), batch=batch, seed=seed,
62 |                       plot_predictions=False,
63 |                       plot_ground=False,
64 |                       plot_train=False,
65 |                       random_sampling=True)
66 | 
67 |             # Move results.
68 |             if not os.path.exists('results_random'):
69 |                 os.mkdir('results_random')
70 |             shutil.move(label_benchmark, f'results_random/{label_benchmark}')
71 |             shutil.move(f'pred_{label_benchmark}', f'results_random/pred_{label_benchmark}')
72 |             shutil.move(f'results_{label_benchmark}', f'results_random/results_{label_benchmark}')
73 | 


--------------------------------------------------------------------------------
/examples/publication/Suzuki/data/dataset_B1.csv:
--------------------------------------------------------------------------------
  1 | ligand,base,solvent,ligand_equivalent,objective_conversion,objective_selectivity
  2 | P(tBu)3,NaOH(aq.),MeOH,0.125,39.6,67.17171717171718
  3 | P(tBu)3,s. NaHCO3(aq.),MeOH,0.125,52.3,74.37858508604207
  4 | P(tBu)3,CsF(aq.),MeOH,0.125,50.8,74.01574803149606
  5 | P(tBu)3,1M K3PO4(aq.),MeOH,0.125,50.3,73.55864811133202
  6 | P(tBu)3,KOH(aq.),MeOH,0.125,61.5,78.21138211382113
  7 | P(tBu)3,Cs2CO3(aq.),MeOH,0.125,61.00000000000001,79.01639344262294
  8 | P(tBu)3,KOAc,MeOH,0.125,67.5,52.8888888888889
  9 | P(tBu)3,None,MeOH,0.125,74.9,53.271028037383175
 10 | P(Ph)3,NaOH(aq.),MeOH,0.125,99.2,73.79032258064517
 11 | P(Ph)3,s. NaHCO3(aq.),MeOH,0.125,86.60000000000001,85.10392609699768
 12 | P(Ph)3,CsF(aq.),MeOH,0.125,83.3,89.07563025210085
 13 | P(Ph)3,1M K3PO4(aq.),MeOH,0.125,81.5,92.14723926380368
 14 | P(Ph)3,KOH(aq.),MeOH,0.125,82.99999999999999,91.92771084337352
 15 | P(Ph)3,Cs2CO3(aq.),MeOH,0.125,82.2,93.06569343065692
 16 | P(Ph)3,KOAc,MeOH,0.125,81.4,95.0859950859951
 17 | P(Ph)3,None,MeOH,0.125,80.30000000000001,94.89414694894144
 18 | AmPhos,NaOH(aq.),MeOH,0.125,75.2,89.49468085106382
 19 | AmPhos,s. NaHCO3(aq.),MeOH,0.125,75.39999999999999,90.18567639257296
 20 | AmPhos,CsF(aq.),MeOH,0.125,77.3,90.03880983182407
 21 | AmPhos,1M K3PO4(aq.),MeOH,0.125,74.3,88.42530282637955
 22 | AmPhos,KOH(aq.),MeOH,0.125,56.900000000000006,78.55887521968366
 23 | AmPhos,Cs2CO3(aq.),MeOH,0.125,60.1,78.70216306156405
 24 | AmPhos,KOAc,MeOH,0.125,43.7,72.31121281464532
 25 | AmPhos,None,MeOH,0.125,39.3,70.22900763358778
 26 | P(Cy)3,NaOH(aq.),MeOH,0.125,46.5,71.82795698924731
 27 | P(Cy)3,s. NaHCO3(aq.),MeOH,0.125,33.9,70.50147492625368
 28 | P(Cy)3,CsF(aq.),MeOH,0.125,55.2,75.90579710144928
 29 | P(Cy)3,1M K3PO4(aq.),MeOH,0.125,46.7,73.23340471092077
 30 | P(Cy)3,KOH(aq.),MeOH,0.125,59.8,77.59197324414716
 31 | P(Cy)3,Cs2CO3(aq.),MeOH,0.125,84.39999999999999,92.53554502369668
 32 | P(Cy)3,KOAc,MeOH,0.125,80.60000000000001,94.04466501240694
 33 | P(Cy)3,None,MeOH,0.125,76.7,92.4380704041721
 34 | P(o-Tol)3,NaOH(aq.),MeOH,0.125,83.99999999999999,83.45238095238095
 35 | P(o-Tol)3,s. NaHCO3(aq.),MeOH,0.125,76.5,84.70588235294117
 36 | P(o-Tol)3,CsF(aq.),MeOH,0.125,83.79999999999998,82.69689737470168
 37 | P(o-Tol)3,1M K3PO4(aq.),MeOH,0.125,76.5,80.65359477124183
 38 | P(o-Tol)3,KOH(aq.),MeOH,0.125,74.5,75.16778523489933
 39 | P(o-Tol)3,Cs2CO3(aq.),MeOH,0.125,79.5,66.41509433962264
 40 | P(o-Tol)3,KOAc,MeOH,0.125,67.8,74.63126843657818
 41 | P(o-Tol)3,None,MeOH,0.125,59.39999999999999,76.26262626262627
 42 | CataCXium A,NaOH(aq.),MeOH,0.125,56.400000000000006,78.0141843971631
 43 | CataCXium A,s. NaHCO3(aq.),MeOH,0.125,66.3,81.14630467571644
 44 | CataCXium A,CsF(aq.),MeOH,0.125,47.7,74.8427672955975
 45 | CataCXium A,1M K3PO4(aq.),MeOH,0.125,60.3,79.93366500829188
 46 | CataCXium A,KOH(aq.),MeOH,0.125,63.8,80.87774294670847
 47 | CataCXium A,Cs2CO3(aq.),MeOH,0.125,45.99999999999999,73.47826086956523
 48 | CataCXium A,KOAc,MeOH,0.125,38.7,69.50904392764858
 49 | CataCXium A,None,MeOH,0.125,47.39999999999999,73.41772151898735
 50 | SPhos,NaOH(aq.),MeOH,0.0625,45.2,72.34513274336285
 51 | SPhos,s. NaHCO3(aq.),MeOH,0.0625,28.0,58.57142857142858
 52 | SPhos,CsF(aq.),MeOH,0.0625,38.39999999999999,67.44791666666667
 53 | SPhos,1M K3PO4(aq.),MeOH,0.0625,39.3,67.68447837150127
 54 | SPhos,KOH(aq.),MeOH,0.0625,36.9,66.66666666666667
 55 | SPhos,Cs2CO3(aq.),MeOH,0.0625,74.6,71.58176943699732
 56 | SPhos,KOAc,MeOH,0.0625,54.5,67.33944954128441
 57 | SPhos,None,MeOH,0.0625,49.7,70.4225352112676
 58 | dtbpf,NaOH(aq.),MeOH,0.0625,38.6,47.15025906735752
 59 | dtbpf,s. NaHCO3(aq.),MeOH,0.0625,19.6,21.428571428571427
 60 | dtbpf,CsF(aq.),MeOH,0.0625,20.7,25.120772946859905
 61 | dtbpf,1M K3PO4(aq.),MeOH,0.0625,19.7,21.82741116751269
 62 | dtbpf,KOH(aq.),MeOH,0.0625,19.8,24.24242424242425
 63 | dtbpf,Cs2CO3(aq.),MeOH,0.0625,15.8,21.51898734177215
 64 | dtbpf,KOAc,MeOH,0.0625,16.1,19.875776397515526
 65 | dtbpf,None,MeOH,0.0625,13.0,20.0
 66 | XPhos,NaOH(aq.),MeOH,0.0625,77.9,83.31193838254171
 67 | XPhos,s. NaHCO3(aq.),MeOH,0.0625,79.39999999999999,83.50125944584383
 68 | XPhos,CsF(aq.),MeOH,0.0625,72.7,82.80605226960111
 69 | XPhos,1M K3PO4(aq.),MeOH,0.0625,53.8,77.32342007434944
 70 | XPhos,KOH(aq.),MeOH,0.0625,46.0,72.6086956521739
 71 | XPhos,Cs2CO3(aq.),MeOH,0.0625,41.0,70.73170731707317
 72 | XPhos,KOAc,MeOH,0.0625,51.4,74.12451361867704
 73 | XPhos,None,MeOH,0.0625,33.5,63.28358208955224
 74 | dppf,NaOH(aq.),MeOH,0.0625,40.5,52.8395061728395
 75 | dppf,s. NaHCO3(aq.),MeOH,0.0625,36.3,67.49311294765839
 76 | dppf,CsF(aq.),MeOH,0.0625,35.3,65.43909348441927
 77 | dppf,1M K3PO4(aq.),MeOH,0.0625,36.0,62.77777777777778
 78 | dppf,KOH(aq.),MeOH,0.0625,28.3,49.1166077738516
 79 | dppf,Cs2CO3(aq.),MeOH,0.0625,35.4,40.96045197740113
 80 | dppf,KOAc,MeOH,0.0625,25.5,53.333333333333336
 81 | dppf,None,MeOH,0.0625,20.0,54.50000000000001
 82 | Xanthphos,NaOH(aq.),MeOH,0.0625,12.2,41.80327868852459
 83 | Xanthphos,s. NaHCO3(aq.),MeOH,0.0625,7.8,32.05128205128205
 84 | Xanthphos,CsF(aq.),MeOH,0.0625,9.7,32.98969072164949
 85 | Xanthphos,1M K3PO4(aq.),MeOH,0.0625,8.5,31.764705882352946
 86 | Xanthphos,KOH(aq.),MeOH,0.0625,10.2,40.19607843137255
 87 | Xanthphos,Cs2CO3(aq.),MeOH,0.0625,12.0,35.833333333333336
 88 | Xanthphos,KOAc,MeOH,0.0625,7.6,23.68421052631579
 89 | Xanthphos,None,MeOH,0.0625,7.399999999999999,24.324324324324326
 90 | P(tBu)3,NaOH(aq.),MeCN,0.125,38.2,69.63350785340315
 91 | P(tBu)3,s. NaHCO3(aq.),MeCN,0.125,42.8,52.10280373831775
 92 | P(tBu)3,CsF(aq.),MeCN,0.125,21.3,24.413145539906104
 93 | P(tBu)3,1M K3PO4(aq.),MeCN,0.125,29.8,54.0268456375839
 94 | P(tBu)3,KOH(aq.),MeCN,0.125,24.0,34.583333333333336
 95 | P(tBu)3,Cs2CO3(aq.),MeCN,0.125,20.1,54.72636815920397
 96 | P(tBu)3,KOAc,MeCN,0.125,18.4,10.326086956521738
 97 | P(tBu)3,None,MeCN,0.125,22.1,12.21719457013575
 98 | P(Ph)3,NaOH(aq.),MeCN,0.125,16.7,36.52694610778443
 99 | P(Ph)3,s. NaHCO3(aq.),MeCN,0.125,34.6,67.05202312138728
100 | P(Ph)3,CsF(aq.),MeCN,0.125,37.9,76.2532981530343
101 | P(Ph)3,1M K3PO4(aq.),MeCN,0.125,25.1,79.6812749003984
102 | P(Ph)3,KOH(aq.),MeCN,0.125,13.3,75.18796992481204
103 | P(Ph)3,Cs2CO3(aq.),MeCN,0.125,22.9,74.67248908296943
104 | P(Ph)3,KOAc,MeCN,0.125,12.1,60.33057851239669
105 | P(Ph)3,None,MeCN,0.125,27.1,79.33579335793357
106 | AmPhos,NaOH(aq.),MeCN,0.125,13.3,31.57894736842105
107 | AmPhos,s. NaHCO3(aq.),MeCN,0.125,31.8,62.8930817610063
108 | AmPhos,CsF(aq.),MeCN,0.125,31.6,63.29113924050633
109 | AmPhos,1M K3PO4(aq.),MeCN,0.125,30.8,62.66233766233766
110 | AmPhos,KOH(aq.),MeCN,0.125,29.4,62.24489795918368
111 | AmPhos,Cs2CO3(aq.),MeCN,0.125,25.3,58.49802371541502
112 | AmPhos,KOAc,MeCN,0.125,21.2,50.943396226415096
113 | AmPhos,None,MeCN,0.125,26.7,55.0561797752809
114 | P(Cy)3,NaOH(aq.),MeCN,0.125,33.2,67.46987951807229
115 | P(Cy)3,s. NaHCO3(aq.),MeCN,0.125,32.2,67.3913043478261
116 | P(Cy)3,CsF(aq.),MeCN,0.125,15.999999999999998,60.62500000000001
117 | P(Cy)3,1M K3PO4(aq.),MeCN,0.125,10.3,66.99029126213593
118 | P(Cy)3,KOH(aq.),MeCN,0.125,7.800000000000001,55.12820512820512
119 | P(Cy)3,Cs2CO3(aq.),MeCN,0.125,7.300000000000001,43.83561643835616
120 | P(Cy)3,KOAc,MeCN,0.125,3.8,23.68421052631579
121 | P(Cy)3,None,MeCN,0.125,7.0,30.0
122 | P(o-Tol)3,NaOH(aq.),MeCN,0.125,11.5,4.3478260869565215
123 | P(o-Tol)3,s. NaHCO3(aq.),MeCN,0.125,13.7,2.18978102189781
124 | P(o-Tol)3,CsF(aq.),MeCN,0.125,12.0,3.333333333333333
125 | P(o-Tol)3,1M K3PO4(aq.),MeCN,0.125,9.7,2.061855670103093
126 | P(o-Tol)3,KOH(aq.),MeCN,0.125,10.5,1.9047619047619049
127 | P(o-Tol)3,Cs2CO3(aq.),MeCN,0.125,10.3,1.9417475728155336
128 | P(o-Tol)3,KOAc,MeCN,0.125,8.6,3.488372093023256
129 | P(o-Tol)3,None,MeCN,0.125,9.1,1.0989010989010988
130 | CataCXium A,NaOH(aq.),MeCN,0.125,9.1,5.494505494505495
131 | CataCXium A,s. NaHCO3(aq.),MeCN,0.125,7.199999999999999,8.333333333333334
132 | CataCXium A,CsF(aq.),MeCN,0.125,7.1,11.267605633802818
133 | CataCXium A,1M K3PO4(aq.),MeCN,0.125,12.9,36.43410852713178
134 | CataCXium A,KOH(aq.),MeCN,0.125,12.4,35.483870967741936
135 | CataCXium A,Cs2CO3(aq.),MeCN,0.125,12.7,33.85826771653544
136 | CataCXium A,KOAc,MeCN,0.125,14.1,36.87943262411348
137 | CataCXium A,None,MeCN,0.125,0.0,0.0
138 | SPhos,NaOH(aq.),MeCN,0.0625,13.3,3.007518796992482
139 | SPhos,s. NaHCO3(aq.),MeCN,0.0625,10.5,3.8095238095238098
140 | SPhos,CsF(aq.),MeCN,0.0625,10.9,5.504587155963303
141 | SPhos,1M K3PO4(aq.),MeCN,0.0625,9.2,4.347826086956522
142 | SPhos,KOH(aq.),MeCN,0.0625,8.0,6.25
143 | SPhos,Cs2CO3(aq.),MeCN,0.0625,10.3,2.912621359223301
144 | SPhos,KOAc,MeCN,0.0625,7.8,7.6923076923076925
145 | SPhos,None,MeCN,0.0625,7.2,6.944444444444445
146 | dtbpf,NaOH(aq.),MeCN,0.0625,9.3,8.602150537634408
147 | dtbpf,s. NaHCO3(aq.),MeCN,0.0625,7.1,7.042253521126761
148 | dtbpf,CsF(aq.),MeCN,0.0625,7.7,9.09090909090909
149 | dtbpf,1M K3PO4(aq.),MeCN,0.0625,6.4,6.25
150 | dtbpf,KOH(aq.),MeCN,0.0625,4.2,11.904761904761903
151 | dtbpf,Cs2CO3(aq.),MeCN,0.0625,7.9,10.126582278481012
152 | dtbpf,KOAc,MeCN,0.0625,5.4,5.555555555555556
153 | dtbpf,None,MeCN,0.0625,4.1,12.195121951219514
154 | XPhos,NaOH(aq.),MeCN,0.0625,9.5,3.1578947368421053
155 | XPhos,s. NaHCO3(aq.),MeCN,0.0625,11.2,1.785714285714286
156 | XPhos,CsF(aq.),MeCN,0.0625,8.7,0.0
157 | XPhos,1M K3PO4(aq.),MeCN,0.0625,9.7,16.494845360824744
158 | XPhos,KOH(aq.),MeCN,0.0625,9.7,11.34020618556701
159 | XPhos,Cs2CO3(aq.),MeCN,0.0625,10.6,24.528301886792452
160 | XPhos,KOAc,MeCN,0.0625,9.2,13.043478260869565
161 | XPhos,None,MeCN,0.0625,9.2,17.39130434782609
162 | dppf,NaOH(aq.),MeCN,0.0625,4.9,10.20408163265306
163 | dppf,s. NaHCO3(aq.),MeCN,0.0625,5.6,17.857142857142858
164 | dppf,CsF(aq.),MeCN,0.0625,5.9,16.949152542372882
165 | dppf,1M K3PO4(aq.),MeCN,0.0625,4.8,20.833333333333336
166 | dppf,KOH(aq.),MeCN,0.0625,4.6,15.217391304347828
167 | dppf,Cs2CO3(aq.),MeCN,0.0625,6.0,15.0
168 | dppf,KOAc,MeCN,0.0625,4.5,15.555555555555555
169 | dppf,None,MeCN,0.0625,4.9,18.367346938775515
170 | Xanthphos,NaOH(aq.),MeCN,0.0625,4.8,0.0
171 | Xanthphos,s. NaHCO3(aq.),MeCN,0.0625,4.4,2.272727272727273
172 | Xanthphos,CsF(aq.),MeCN,0.0625,4.1,0.0
173 | Xanthphos,1M K3PO4(aq.),MeCN,0.0625,6.4,0.0
174 | Xanthphos,KOH(aq.),MeCN,0.0625,4.0,0.0
175 | Xanthphos,Cs2CO3(aq.),MeCN,0.0625,6.2,0.0
176 | Xanthphos,KOAc,MeCN,0.0625,5.4,0.0
177 | Xanthphos,None,MeCN,0.0625,3.4000000000000004,0.0
178 | P(tBu)3,NaOH(aq.),THF,0.125,10.8,9.25925925925926
179 | P(tBu)3,s. NaHCO3(aq.),THF,0.125,33.7,1.1869436201780417
180 | P(tBu)3,CsF(aq.),THF,0.125,6.800000000000001,5.88235294117647
181 | P(tBu)3,1M K3PO4(aq.),THF,0.125,5.5,9.090909090909092
182 | P(tBu)3,KOH(aq.),THF,0.125,7.6,5.2631578947368425
183 | P(tBu)3,Cs2CO3(aq.),THF,0.125,9.6,3.125
184 | P(tBu)3,KOAc,THF,0.125,10.1,1.98019801980198
185 | P(tBu)3,None,THF,0.125,15.3,1.3071895424836604
186 | P(Ph)3,NaOH(aq.),THF,0.125,13.7,15.328467153284672
187 | P(Ph)3,s. NaHCO3(aq.),THF,0.125,11.6,22.41379310344828
188 | P(Ph)3,CsF(aq.),THF,0.125,10.4,34.61538461538461
189 | P(Ph)3,1M K3PO4(aq.),THF,0.125,9.1,27.47252747252747
190 | P(Ph)3,KOH(aq.),THF,0.125,10.0,22.000000000000004
191 | P(Ph)3,Cs2CO3(aq.),THF,0.125,8.4,21.428571428571427
192 | P(Ph)3,KOAc,THF,0.125,4.5,26.666666666666668
193 | P(Ph)3,None,THF,0.125,8.0,15.0
194 | AmPhos,NaOH(aq.),THF,0.125,11.2,1.785714285714286
195 | AmPhos,s. NaHCO3(aq.),THF,0.125,11.1,0.9009009009009008
196 | AmPhos,CsF(aq.),THF,0.125,8.5,0.0
197 | AmPhos,1M K3PO4(aq.),THF,0.125,5.6,0.0
198 | AmPhos,KOH(aq.),THF,0.125,1.7,0.0
199 | AmPhos,Cs2CO3(aq.),THF,0.125,1.8,0.0
200 | AmPhos,KOAc,THF,0.125,3.9,5.128205128205129
201 | AmPhos,None,THF,0.125,6.7,1.4925373134328357
202 | P(Cy)3,NaOH(aq.),THF,0.125,7.6,3.947368421052632
203 | P(Cy)3,s. NaHCO3(aq.),THF,0.125,7.9,0.0
204 | P(Cy)3,CsF(aq.),THF,0.125,7.4,0.0
205 | P(Cy)3,1M K3PO4(aq.),THF,0.125,5.8,0.0
206 | P(Cy)3,KOH(aq.),THF,0.125,4.800000000000001,4.166666666666666
207 | P(Cy)3,Cs2CO3(aq.),THF,0.125,4.6,2.173913043478261
208 | P(Cy)3,KOAc,THF,0.125,6.5,0.0
209 | P(Cy)3,None,THF,0.125,4.5,0.0
210 | P(o-Tol)3,NaOH(aq.),THF,0.125,8.0,5.0
211 | P(o-Tol)3,s. NaHCO3(aq.),THF,0.125,7.6,6.578947368421052
212 | P(o-Tol)3,CsF(aq.),THF,0.125,7.700000000000001,6.493506493506493
213 | P(o-Tol)3,1M K3PO4(aq.),THF,0.125,6.6,6.060606060606061
214 | P(o-Tol)3,KOH(aq.),THF,0.125,7.1,5.633802816901409
215 | P(o-Tol)3,Cs2CO3(aq.),THF,0.125,2.5,16.0
216 | P(o-Tol)3,KOAc,THF,0.125,5.0,8.0
217 | P(o-Tol)3,None,THF,0.125,2.3000000000000003,8.695652173913043
218 | CataCXium A,NaOH(aq.),THF,0.125,4.6,0.0
219 | CataCXium A,s. NaHCO3(aq.),THF,0.125,4.5,0.0
220 | CataCXium A,CsF(aq.),THF,0.125,4.6,0.0
221 | CataCXium A,1M K3PO4(aq.),THF,0.125,2.0,0.0
222 | CataCXium A,KOH(aq.),THF,0.125,1.8,0.0
223 | CataCXium A,Cs2CO3(aq.),THF,0.125,1.8,0.0
224 | CataCXium A,KOAc,THF,0.125,1.8,0.0
225 | CataCXium A,None,THF,0.125,4.0,0.0
226 | SPhos,NaOH(aq.),THF,0.0625,9.5,0.0
227 | SPhos,s. NaHCO3(aq.),THF,0.0625,4.7,0.0
228 | SPhos,CsF(aq.),THF,0.0625,4.2,0.0
229 | SPhos,1M K3PO4(aq.),THF,0.0625,4.6,0.0
230 | SPhos,KOH(aq.),THF,0.0625,4.300000000000001,0.0
231 | SPhos,Cs2CO3(aq.),THF,0.0625,4.8,0.0
232 | SPhos,KOAc,THF,0.0625,2.0,0.0
233 | SPhos,None,THF,0.0625,4.6,0.0
234 | dtbpf,NaOH(aq.),THF,0.0625,5.300000000000001,11.32075471698113
235 | dtbpf,s. NaHCO3(aq.),THF,0.0625,5.4,9.25925925925926
236 | dtbpf,CsF(aq.),THF,0.0625,4.7,14.893617021276595
237 | dtbpf,1M K3PO4(aq.),THF,0.0625,4.9,20.40816326530612
238 | dtbpf,KOH(aq.),THF,0.0625,5.1,13.725490196078432
239 | dtbpf,Cs2CO3(aq.),THF,0.0625,2.0,0.0
240 | dtbpf,KOAc,THF,0.0625,2.4,12.5
241 | dtbpf,None,THF,0.0625,4.800000000000001,4.166666666666666
242 | XPhos,NaOH(aq.),THF,0.0625,5.0,6.0
243 | XPhos,s. NaHCO3(aq.),THF,0.0625,5.0,0.0
244 | XPhos,CsF(aq.),THF,0.0625,4.7,0.0
245 | XPhos,1M K3PO4(aq.),THF,0.0625,2.0,0.0
246 | XPhos,KOH(aq.),THF,0.0625,1.8,0.0
247 | XPhos,Cs2CO3(aq.),THF,0.0625,1.7,0.0
248 | XPhos,KOAc,THF,0.0625,1.7,0.0
249 | XPhos,None,THF,0.0625,1.9,0.0
250 | dppf,NaOH(aq.),THF,0.0625,8.0,2.5
251 | dppf,s. NaHCO3(aq.),THF,0.0625,8.2,0.0
252 | dppf,CsF(aq.),THF,0.0625,15.1,0.0
253 | dppf,1M K3PO4(aq.),THF,0.0625,11.8,0.0
254 | dppf,KOH(aq.),THF,0.0625,4.199999999999999,9.523809523809526
255 | dppf,Cs2CO3(aq.),THF,0.0625,4.9,10.20408163265306
256 | dppf,KOAc,THF,0.0625,3.4,14.705882352941178
257 | dppf,None,THF,0.0625,4.4,13.636363636363637
258 | Xanthphos,NaOH(aq.),THF,0.0625,3.7,5.405405405405405
259 | Xanthphos,s. NaHCO3(aq.),THF,0.0625,2.4,20.833333333333336
260 | Xanthphos,CsF(aq.),THF,0.0625,1.9,0.0
261 | Xanthphos,1M K3PO4(aq.),THF,0.0625,2.7,29.629629629629623
262 | Xanthphos,KOH(aq.),THF,0.0625,3.0,40.0
263 | Xanthphos,Cs2CO3(aq.),THF,0.0625,1.8,0.0
264 | Xanthphos,KOAc,THF,0.0625,1.8,0.0
265 | Xanthphos,None,THF,0.0625,2.6,0.0
266 | P(tBu)3,NaOH(aq.),DMF,0.125,24.0,0.0
267 | P(tBu)3,s. NaHCO3(aq.),DMF,0.125,21.0,0.0
268 | P(tBu)3,CsF(aq.),DMF,0.125,14.7,0.0
269 | P(tBu)3,1M K3PO4(aq.),DMF,0.125,13.7,0.0
270 | P(tBu)3,KOH(aq.),DMF,0.125,18.4,0.0
271 | P(tBu)3,Cs2CO3(aq.),DMF,0.125,19.2,0.0
272 | P(tBu)3,KOAc,DMF,0.125,19.6,0.0
273 | P(tBu)3,None,DMF,0.125,21.9,0.0
274 | P(Ph)3,NaOH(aq.),DMF,0.125,23.8,0.0
275 | P(Ph)3,s. NaHCO3(aq.),DMF,0.125,10.2,0.0
276 | P(Ph)3,CsF(aq.),DMF,0.125,3.4000000000000004,0.0
277 | P(Ph)3,1M K3PO4(aq.),DMF,0.125,3.3,0.0
278 | P(Ph)3,KOH(aq.),DMF,0.125,4.1,0.0
279 | P(Ph)3,Cs2CO3(aq.),DMF,0.125,4.1,0.0
280 | P(Ph)3,KOAc,DMF,0.125,5.3,32.075471698113205
281 | P(Ph)3,None,DMF,0.125,3.8,0.0
282 | AmPhos,NaOH(aq.),DMF,0.125,22.2,0.0
283 | AmPhos,s. NaHCO3(aq.),DMF,0.125,16.3,0.0
284 | AmPhos,CsF(aq.),DMF,0.125,14.9,0.0
285 | AmPhos,1M K3PO4(aq.),DMF,0.125,17.8,0.0
286 | AmPhos,KOH(aq.),DMF,0.125,14.8,0.0
287 | AmPhos,Cs2CO3(aq.),DMF,0.125,15.6,0.0
288 | AmPhos,KOAc,DMF,0.125,4.3,0.0
289 | AmPhos,None,DMF,0.125,67.10000000000001,0.0
290 | P(Cy)3,NaOH(aq.),DMF,0.125,33.9,0.0
291 | P(Cy)3,s. NaHCO3(aq.),DMF,0.125,15.3,0.0
292 | P(Cy)3,CsF(aq.),DMF,0.125,9.7,0.0
293 | P(Cy)3,1M K3PO4(aq.),DMF,0.125,5.4,0.0
294 | P(Cy)3,KOH(aq.),DMF,0.125,3.8,0.0
295 | P(Cy)3,Cs2CO3(aq.),DMF,0.125,11.8,0.0
296 | P(Cy)3,KOAc,DMF,0.125,15.8,0.0
297 | P(Cy)3,None,DMF,0.125,11.3,0.0
298 | P(o-Tol)3,NaOH(aq.),DMF,0.125,8.7,0.0
299 | P(o-Tol)3,s. NaHCO3(aq.),DMF,0.125,0.0,0.0
300 | P(o-Tol)3,CsF(aq.),DMF,0.125,0.0,0.0
301 | P(o-Tol)3,1M K3PO4(aq.),DMF,0.125,0.0,0.0
302 | P(o-Tol)3,KOH(aq.),DMF,0.125,0.0,0.0
303 | P(o-Tol)3,Cs2CO3(aq.),DMF,0.125,0.0,0.0
304 | P(o-Tol)3,KOAc,DMF,0.125,0.0,0.0
305 | P(o-Tol)3,None,DMF,0.125,0.0,0.0
306 | CataCXium A,NaOH(aq.),DMF,0.125,0.0,0.0
307 | CataCXium A,s. NaHCO3(aq.),DMF,0.125,0.0,0.0
308 | CataCXium A,CsF(aq.),DMF,0.125,0.0,0.0
309 | CataCXium A,1M K3PO4(aq.),DMF,0.125,0.0,0.0
310 | CataCXium A,KOH(aq.),DMF,0.125,0.0,0.0
311 | CataCXium A,Cs2CO3(aq.),DMF,0.125,12.6,0.0
312 | CataCXium A,KOAc,DMF,0.125,6.8,0.0
313 | CataCXium A,None,DMF,0.125,0.0,0.0
314 | SPhos,NaOH(aq.),DMF,0.0625,9.2,100.0
315 | SPhos,s. NaHCO3(aq.),DMF,0.0625,0.0,0.0
316 | SPhos,CsF(aq.),DMF,0.0625,0.0,0.0
317 | SPhos,1M K3PO4(aq.),DMF,0.0625,0.0,0.0
318 | SPhos,KOH(aq.),DMF,0.0625,0.0,0.0
319 | SPhos,Cs2CO3(aq.),DMF,0.0625,14.5,0.0
320 | SPhos,KOAc,DMF,0.0625,0.0,0.0
321 | SPhos,None,DMF,0.0625,0.0,0.0
322 | dtbpf,NaOH(aq.),DMF,0.0625,3.8,0.0
323 | dtbpf,s. NaHCO3(aq.),DMF,0.0625,0.0,0.0
324 | dtbpf,CsF(aq.),DMF,0.0625,0.0,0.0
325 | dtbpf,1M K3PO4(aq.),DMF,0.0625,3.1,0.0
326 | dtbpf,KOH(aq.),DMF,0.0625,3.4,0.0
327 | dtbpf,Cs2CO3(aq.),DMF,0.0625,0.0,0.0
328 | dtbpf,KOAc,DMF,0.0625,0.0,0.0
329 | dtbpf,None,DMF,0.0625,0.0,0.0
330 | XPhos,NaOH(aq.),DMF,0.0625,12.1,0.0
331 | XPhos,s. NaHCO3(aq.),DMF,0.0625,21.200000000000003,0.0
332 | XPhos,CsF(aq.),DMF,0.0625,24.2,0.0
333 | XPhos,1M K3PO4(aq.),DMF,0.0625,29.2,0.0
334 | XPhos,KOH(aq.),DMF,0.0625,27.3,0.0
335 | XPhos,Cs2CO3(aq.),DMF,0.0625,22.8,0.0
336 | XPhos,KOAc,DMF,0.0625,18.5,0.0
337 | XPhos,None,DMF,0.0625,22.3,0.0
338 | dppf,NaOH(aq.),DMF,0.0625,19.0,68.94736842105263
339 | dppf,s. NaHCO3(aq.),DMF,0.0625,7.7,55.84415584415584
340 | dppf,CsF(aq.),DMF,0.0625,2.1,0.0
341 | dppf,1M K3PO4(aq.),DMF,0.0625,1.9,0.0
342 | dppf,KOH(aq.),DMF,0.0625,14.4,70.83333333333333
343 | dppf,Cs2CO3(aq.),DMF,0.0625,15.9,72.95597484276729
344 | dppf,KOAc,DMF,0.0625,2.7,33.33333333333333
345 | dppf,None,DMF,0.0625,1.9,0.0
346 | Xanthphos,NaOH(aq.),DMF,0.0625,2.2,0.0
347 | Xanthphos,s. NaHCO3(aq.),DMF,0.0625,1.9,0.0
348 | Xanthphos,CsF(aq.),DMF,0.0625,1.9,0.0
349 | Xanthphos,1M K3PO4(aq.),DMF,0.0625,1.8,0.0
350 | Xanthphos,KOH(aq.),DMF,0.0625,2.1,0.0
351 | Xanthphos,Cs2CO3(aq.),DMF,0.0625,2.1,0.0
352 | Xanthphos,KOAc,DMF,0.0625,2.6,34.61538461538461
353 | Xanthphos,None,DMF,0.0625,1.7,0.0
354 | 


--------------------------------------------------------------------------------
/examples/publication/Suzuki/performance/1_merge_all.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | batch = 1
 5 | 
 6 | objective_1 = 'objective_conversion'
 7 | objective_2 = 'objective_selectivity'
 8 | 
 9 | columns_to_keep = ['step', 'n_experiments',
10 |                    'dmaximin_tradeoff', 'hypervolume completed (%)',
11 |                    f'MAE_{objective_1}', f"MAE_{objective_2}",
12 |                    f'RMSE_{objective_1}', f'RMSE_{objective_2}',
13 |                    f'R2_{objective_1}', f'R2_{objective_2}',
14 |                    f'{objective_1}_best', f'{objective_2}_best'
15 |                    ]
16 | 
17 | for feat in ['ohe', 'dft', 'mordred', 'random']:
18 |     for acq in ['EHVI', 'MOUCB', 'MOGreedy']:
19 |         df_i = pd.read_csv(f"../results_{feat}/results_benchmark_{feat}_acq_{acq}_batch_{batch}_seed_0.csv")
20 |         columns_to_drop = list(set(df_i.columns.values) - set(columns_to_keep))
21 |         df_i.drop(columns=columns_to_drop, inplace=True)
22 |         for seed_i in range(0, 5):
23 |             df_j = pd.read_csv(f"../results_{feat}/results_benchmark_{feat}_acq_{acq}_batch_{batch}_seed_{seed_i}.csv")
24 |             df_j.drop(columns=columns_to_drop, inplace=True)
25 |             df_i = df_i.append(df_j)
26 | 
27 |         df_i.to_csv(f"./{feat}_{acq}_all.csv", index=False)
28 | 
29 |         df_av = df_i.groupby(['step', 'n_experiments']).agg([np.average])
30 |         df_av['step'] = np.unique(df_i.step.values)
31 |         df_av['n_experiments'] = np.unique(df_i.n_experiments.values)
32 |         df_av.to_csv(f"./{feat}_{acq}_avg.csv", index=False)
33 | 
34 |         df_min = df_i.groupby(['step', 'n_experiments']).agg([np.min])
35 |         df_min['step'] = np.unique(df_i.step.values)
36 |         df_min['n_experiments'] = np.unique(df_i.n_experiments.values)
37 |         df_min.to_csv(f"./{feat}_{acq}_min.csv", index=False)
38 | 
39 |         df_max = df_i.groupby(['step', 'n_experiments']).agg([np.max])
40 |         df_max['step'] = np.unique(df_i.step.values)
41 |         df_max['n_experiments'] = np.unique(df_i.n_experiments.values)
42 |         df_max.to_csv(f"./{feat}_{acq}_max.csv", index=False)
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/examples/publication/Suzuki/performance/2_plot_ground_truth.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import pandas as pd
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | import seaborn as sns
  6 | sns.set_style("ticks")
  7 | sns.despine()
  8 | import matplotlib as mpl
  9 | mpl.rcParams['grid.linestyle'] = ':'
 10 | mpl.rcParams['grid.linewidth'] = 0.1
 11 | plt.rcParams['font.family'] = 'Helvetica'
 12 | plt.rcParams['font.size'] = 10
 13 | import pareto
 14 | from edbo.plus.benchmark.multiobjective_benchmark import is_pareto
 15 | from pymoo.mcdm.high_tradeoff import HighTradeoffPoints
 16 | from sklearn.preprocessing import MinMaxScaler
 17 | 
 18 | # Hue: Color (ligand), shape (base), filling (solvent), alpha (ligand_eq).
 19 | 
 20 | import seaborn as sns
 21 | 
 22 | dataset = 'dft'
 23 | acq = 'EHVI'
 24 | batch = 1
 25 | total_restarts = 5
 26 | n_steps = 30
 27 | seed = 0
 28 | 
 29 | 
 30 | def get_pareto_points(objective_values):
 31 |     """ Get pareto for the ground truth function.
 32 |     NOTE: Assumes maximization."""
 33 |     pareto_ground = pareto.eps_sort(tables=objective_values,
 34 |                                     objectives=np.arange(2),
 35 |                                     maximize_all=True)
 36 |     idx_pareto = is_pareto(objectives=-objective_values)
 37 |     return np.array(pareto_ground), idx_pareto
 38 | 
 39 | 
 40 | def get_high_tradeoff_points(pareto_points):
 41 |     """ Pass a numpy array with the pareto points and returns a numpy
 42 |         array with the high tradeoff points."""
 43 | 
 44 |     scaler_pareto = MinMaxScaler()
 45 |     pareto_scaled = scaler_pareto.fit_transform(pareto_points)
 46 |     try:
 47 |         tradeoff = HighTradeoffPoints()
 48 | 
 49 |         tradeoff_args = tradeoff.do(-pareto_scaled)  # Always minimizing.
 50 |         tradeoff_points = pareto_points[tradeoff_args]
 51 |     except:
 52 |         tradeoff_points = []
 53 |         pass
 54 |     return tradeoff_points
 55 | 
 56 | 
 57 | df_exp = pd.read_csv('../data/dataset_B1.csv')
 58 | objective_vals = df_exp[['objective_conversion', 'objective_selectivity']].values
 59 | pareto_points, idx_pareto = get_pareto_points(objective_vals)
 60 | high_tradeoff_points = get_high_tradeoff_points(pareto_points)
 61 | 
 62 | 
 63 | df_benchmark = pd.read_csv(f'../results_{dataset}/results_benchmark_{dataset}_acq_{acq}_batch_{batch}_seed_{seed}.csv')
 64 | 
 65 | fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(9, 15))
 66 | 
 67 | palettes = [['tab10', 'viridis'], [None, 'Blues']]
 68 | 
 69 | hues = [['ligand', 'base'], ['solvent', 'ligand_equivalent']]
 70 | for i in range(0, 2):
 71 |     for j in range(0, 2):
 72 |         sns.scatterplot(x=df_exp['objective_conversion'], y=df_exp['objective_selectivity'],
 73 |                         hue=df_exp[hues[i][j]], s=40, lw=1., edgecolor='black', ax=ax[i][j], palette=palettes[i][j])
 74 |         sns.lineplot(x=pareto_points[:, 0], y=pareto_points[:, 1],
 75 |                      linewidth=1.2, color='grey', ls='dotted', ax=ax[i][j])
 76 |         ax[i][j].set_xlim(-5, 105)
 77 |         ax[i][j].set_ylim(-5, 105)
 78 |         ax[i][j].legend(loc=4)
 79 |         ax[i][j].set_title(hues[i][j])
 80 | 
 81 | plt.tight_layout()
 82 | plt.show()
 83 | 
 84 | palettes = ['tab10', None]
 85 | fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(7, 5))
 86 | hues = ['ligand', 'solvent']
 87 | 
 88 | for i in range(0, 2):
 89 |     sns.scatterplot(x=df_exp['objective_conversion'], y=df_exp['objective_selectivity'],
 90 |                     hue=df_exp[hues[i]], s=50, lw=1., edgecolor='black', ax=ax[i], palette=palettes[i])
 91 |     sns.lineplot(x=pareto_points[:, 0], y=pareto_points[:, 1],
 92 |                  linewidth=1.2, color='grey', ls='dotted', ax=ax[i])
 93 |     ax[i].set_xlim(-5, 105)
 94 |     ax[i].set_ylim(-5, 105)
 95 |     ax[i].legend(loc=4)
 96 |     ax[i].set_title(hues[i])
 97 | 
 98 | ax[0].legend(scatterpoints=1, loc='best', ncol=2, markerscale=1, fontsize=9)
 99 | ax[1].legend(scatterpoints=1, loc='best', ncol=2, markerscale=1, fontsize=9)
100 | 
101 | plt.tight_layout()
102 | plt.savefig('Fig2_scope.svg', dpi=500, format='svg')
103 | plt.show()
104 | 


--------------------------------------------------------------------------------
/examples/publication/Suzuki/performance/3_plot_decision_pathways_objectives.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pandas as pd
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | import seaborn as sns
 6 | sns.set_style("ticks")
 7 | sns.despine()
 8 | import matplotlib as mpl
 9 | mpl.rcParams['grid.linestyle'] = ':'
10 | mpl.rcParams['grid.linewidth'] = 0.1
11 | plt.rcParams['font.family'] = 'Helvetica'
12 | 
13 | 
14 | datasets = ['ohe', 'dft', 'mordred', 'random']
15 | acq = 'EHVI'
16 | batch = 1
17 | total_restarts = 5
18 | n_steps = 30
19 | 
20 | color_paletes = [sns.color_palette("Blues", n_colors=total_restarts),
21 |                  sns.color_palette("Reds", n_colors=total_restarts),
22 |                  sns.color_palette("Greens", n_colors=total_restarts),
23 |                  sns.color_palette("Oranges", n_colors=total_restarts)]
24 | 
25 | cp = 0
26 | for dataset in datasets:
27 |     objectives = ['objective_conversion', 'objective_selectivity']
28 |     dict_ratios_plot = {'width_ratios': [0.5, 0.2, 0.5, 0.2], 'wspace': 0.4}
29 |     fig, ax = plt.subplots(nrows=1, ncols=4, figsize=(10, 3),
30 |                            gridspec_kw=dict_ratios_plot)
31 |     obj_counter = 0
32 |     for obj in objectives:
33 | 
34 |         for seed in range(total_restarts):
35 |             df_benchmark = pd.read_csv(f'../results_{dataset}/results_benchmark_{dataset}_acq_{acq}_batch_{batch}_seed_{seed}.csv')
36 |             df_exp = pd.read_csv('../data/dataset_B1.csv')
37 |             total_number_of_experiments = len(df_exp)
38 | 
39 |             trace_xy = []
40 |             for i in range(0, n_steps):
41 |                 trace_xy.append([df_benchmark['step'][i], df_benchmark[f"{obj}_collected_values"][i]])
42 |             trace_xy = np.reshape(trace_xy, (len(trace_xy), -2))
43 |             ax[0+obj_counter].scatter(trace_xy[:, 0], trace_xy[:, 1],
44 |                         facecolor='white', s=50,
45 |                         edgecolors=color_paletes[cp][seed],
46 |                         zorder=100)
47 |             ax[0+obj_counter].plot(trace_xy[:, 0], trace_xy[:, 1],
48 |                      linestyle='dotted', c=color_paletes[cp][seed],
49 |                      lw=1.1, alpha=1.)
50 |             ax[0+obj_counter].set_xlim(-1, n_steps+1)
51 |             ax[0+obj_counter].set_ylim(-5, 100+10)
52 |             # ax[0].set_title(f'Objective: {obj}')
53 |             sns.despine(trim=True, offset=2, ax=ax[0+obj_counter])
54 |             sns.distplot(a=df_benchmark, x=df_benchmark[f"{obj}_collected_values"],
55 |                          ax=ax[1+obj_counter], vertical=True,
56 |                          hist=False,
57 |                          # bins=20
58 |                          kde_kws={'shade': True,
59 |                                   'color': color_paletes[cp][seed],
60 |                                   'alpha': 0.1},
61 |                          color='black'
62 |                          )
63 | 
64 |             ax[1+obj_counter].set_xlim(0, 0.025)
65 |             ax[1+obj_counter].set_ylim(-5, 100+10)
66 |             ax[1+obj_counter].axvline(x=0.015, color='black', ls='dotted', alpha=0.5)
67 | 
68 |         ax[0+obj_counter].set_title(dataset)
69 |         ax[0+obj_counter].set_xlabel('Number of samples collected')
70 |         ax[0+obj_counter].set_ylabel(f"{obj} (in %)")
71 |         hlinecolor = 'black'
72 |         hlinestyle = 'dotted'
73 |         hlinewidth = 0.5
74 |         # plt.hlines(y=13, linestyles=hlinestyle, lw=hlinewidth, colors=hlinecolor, xmin=0, xmax=n_steps)
75 |         # plt.hlines(y=14, linestyles=hlinestyle, lw=hlinewidth, colors=hlinecolor, xmin=0, xmax=n_steps)
76 |         # plt.hlines(y=29, linestyles=hlinestyle, lw=hlinewidth, colors=hlinecolor, xmin=0, xmax=n_steps)
77 |         # plt.hlines(y=9, linestyles=hlinestyle, lw=hlinewidth, colors=hlinecolor, xmin=0, xmax=n_steps)
78 |         # plt.hlines(y=8, linestyles=hlinestyle, lw=hlinewidth, colors=hlinecolor, xmin=0, xmax=n_steps)
79 |         obj_counter += 2
80 |     plt.savefig(f"fig_3_{cp}.svg", format='svg', dpi=500)
81 |     plt.show()
82 |     plt.tight_layout()
83 |     cp += 1
84 | 


--------------------------------------------------------------------------------
/examples/publication/Suzuki/performance/4_plot_performance.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | import seaborn as sns
  5 | import pandas as pd
  6 | import os
  7 | 
  8 | 
  9 | # sns.set_style("ticks")
 10 | # sns.set_context("paper")
 11 | import matplotlib as mpl
 12 | mpl.rcParams['grid.linestyle'] = ':'
 13 | mpl.rcParams['grid.linewidth'] = 0.1
 14 | 
 15 | objective_1 = 'conversion'
 16 | objective_2 = 'selectivity'
 17 | 
 18 | plt.rcParams['font.family'] = 'Helvetica'
 19 | # mpl.rc('font', **{'family':'sans-serif', 'sans-serif':['HelveticaLight']})
 20 | 
 21 | # Best objectives.
 22 | best_conversion_in_scope = 100.
 23 | best_selectivity_in_scope = 100.
 24 | n_steps = 30
 25 | feat_iter = 0
 26 | 
 27 | if not os.path.exists('./figures'):
 28 |     os.mkdir('figures')
 29 | 
 30 | 
 31 | for acq in ['EHVI', 'MOUCB', 'MOGreedy']:
 32 |     colors = ['#DC143C', '#0343DF', '#FAC205', '#15B01A']
 33 |     color_i = 0
 34 |     fig, ax = plt.subplots(figsize=(8., 8.0), dpi=500, nrows=2, ncols=2)
 35 | 
 36 |     for feat in ['ohe', 'dft', 'mordred', 'random']:
 37 |         avg = pd.read_csv(f"./{feat}_{acq}_avg.csv")
 38 |         avg = avg.apply(pd.to_numeric, errors='coerce')
 39 |         max = pd.read_csv(f"./{feat}_{acq}_max.csv")
 40 |         max = max.apply(pd.to_numeric, errors='coerce')
 41 |         min = pd.read_csv(f"./{feat}_{acq}_min.csv")
 42 |         min = min.apply(pd.to_numeric, errors='coerce')
 43 | 
 44 |         n_exp = avg['n_experiments'].values[1:]
 45 | 
 46 |         # Hypervolume.
 47 |         hypervol_max = max['hypervolume completed (%)'].values[1:]
 48 |         hypervol_min = min['hypervolume completed (%)'].values[1:]
 49 |         hypervol_avg = avg['hypervolume completed (%)'].values[1:]
 50 | 
 51 |             # Where hypervolume is 99% completed.
 52 |         try:
 53 |             hyper_complete_arg = np.argwhere(hypervol_avg > 99.0)[0]
 54 |             hyper_complete_y = [hypervol_avg[hyper_complete_arg]]
 55 |             hyper_complete_x = [n_exp[hyper_complete_arg]]
 56 |         except:
 57 |             conversion_complete_x = []
 58 |             conversion_complete_y = []
 59 | 
 60 |         # Distance pareto.
 61 |         dtradeoff_max = max['dmaximin_tradeoff'].values[1:]
 62 |         dtradeoff_min = min['dmaximin_tradeoff'].values[1:]
 63 |         dtradeoff_avg = avg['dmaximin_tradeoff'].values[1:]
 64 | 
 65 | 
 66 |         # Best samples at each run.
 67 |         bestconversion_max = max['objective_conversion_best'].values[1:]
 68 |         bestselectivity_max = max['objective_selectivity_best'].values[1:]
 69 |         bestconversion_min = min['objective_conversion_best'].values[1:]
 70 |         bestselectivity_min = min['objective_selectivity_best'].values[1:]
 71 |         bestconversion_avg = avg['objective_conversion_best'].values[1:]
 72 |         bestselectivity_avg = avg['objective_selectivity_best'].values[1:]
 73 | 
 74 |         # Where best conversion is sampled.
 75 |         try:
 76 |             conversion_complete_arg = np.argwhere(bestconversion_max == best_conversion_in_scope)[0]
 77 |             conversion_complete_y = [bestconversion_max[conversion_complete_arg]]
 78 |             conversion_complete_x = [n_exp[conversion_complete_arg]]
 79 |         except:
 80 |             conversion_complete_x = []
 81 |             conversion_complete_y = []
 82 | 
 83 |         # Where best selectivity is sampled.
 84 |         try:
 85 |             selectivity_complete_arg = np.argwhere(bestselectivity_min == best_selectivity_in_scope)[0]
 86 |             selectivity_complete_y = [bestselectivity_min[selectivity_complete_arg]]
 87 |             selectivity_complete_x = [n_exp[selectivity_complete_arg]]
 88 |         except:
 89 |             selectivity_complete_x = []
 90 |             selectivity_complete_y = []
 91 | 
 92 |         # Plot performance for each acquisition function.
 93 |         ax[0][0].plot(n_exp, hypervol_avg, color=colors[color_i], lw=2.5,
 94 |                  label=feat.upper())
 95 |         ax[0][0].fill_between(x=n_exp,
 96 |                         y1=hypervol_avg,
 97 |                         y2=hypervol_max, color=colors[color_i], alpha=0.3, lw=0.)
 98 |         ax[0][0].fill_between(x=n_exp,
 99 |                         y1=hypervol_min,
100 |                         y2=hypervol_avg, color=colors[color_i], alpha=0.3, lw=0.)
101 |         ax[0][0].plot(n_exp, hypervol_min, color=colors[color_i], alpha=1., lw=1., ls='--')
102 |         ax[0][0].plot(n_exp, hypervol_max, color=colors[color_i], alpha=1., lw=1., ls='--')
103 |         ax[0][0].plot(n_exp, np.ones_like(n_exp)*100,
104 |                  dashes=[8, 4], color='black', linewidth=0.8)
105 |         ax[0][0].scatter(n_exp, hypervol_avg, marker='o', s=0., color=colors[color_i])
106 | 
107 |         ax[0][0].set_xticks(np.arange(0, 120, 10))
108 |         ax[0][0].set_xlim(0, n_steps)
109 |         ax[0][0].set_ylim(0, 100)
110 |         ax[0][0].set_xlabel('Samples')
111 |         ax[0][0].set_ylabel('Hypervolume (%)')
112 | 
113 |         # Plot distance tradeoff.
114 |         ax[0][1].plot(n_exp, dtradeoff_avg, color=colors[color_i], lw=2.5,
115 |                    label=feat.upper())
116 |         ax[0][1].plot(n_exp, dtradeoff_min, color=colors[color_i], lw=1., ls='--',
117 |                       label=feat.upper())
118 |         ax[0][1].plot(n_exp, dtradeoff_max, color=colors[color_i], lw=1., ls='--',
119 |                       label=feat.upper())
120 | 
121 | 
122 |         ax[0][1].fill_between(x=n_exp,
123 |                            y1=dtradeoff_avg,
124 |                            y2=dtradeoff_max, color=colors[color_i], alpha=0.3,
125 |                            )
126 |         ax[0][1].fill_between(x=n_exp,
127 |                            y1=dtradeoff_min,
128 |                            y2=dtradeoff_avg, color=colors[color_i], alpha=0.3,
129 |                            )
130 |         ax[0][1].plot(n_exp, np.ones_like(n_exp) * 0,
131 |                    dashes=[8, 4], color='black', linewidth=0.8)
132 |         ax[0][1].scatter(n_exp, dtradeoff_avg, marker='o', s=0.,
133 |                       color=colors[color_i])
134 | 
135 | 
136 |         ax[0][1].set_xticks(np.arange(0, 120, 10))
137 |         ax[0][1].set_xlim(0, n_steps)
138 |         ax[0][1].set_ylim(0, 80)
139 |         ax[0][1].set_xlabel('Samples')
140 |         ax[0][1].set_ylabel(r'$d_{(trade-off)}$')
141 | 
142 |         # Plot best conversion.
143 |         ax[1][0].plot(n_exp, bestconversion_avg, color=colors[color_i], lw=2.5,
144 |                    label=feat)
145 |         ax[1][0].plot(n_exp, bestconversion_min, color=colors[color_i], lw=1, ls='--',
146 |                       label=feat, alpha=1.)
147 |         ax[1][0].plot(n_exp, bestconversion_max, color=colors[color_i], lw=1, ls='--',
148 |                       label=feat, alpha=1.)
149 |         ax[1][0].fill_between(x=n_exp,
150 |                            y1=bestconversion_avg,
151 |                            y2=bestconversion_max, color=colors[color_i], alpha=0.3,
152 |                            )
153 |         ax[1][0].fill_between(x=n_exp,
154 |                            y1=bestconversion_min,
155 |                            y2=bestconversion_avg, color=colors[color_i], alpha=0.3,
156 |                            )
157 | 
158 |         ax[1][0].plot(n_exp, np.ones_like(n_exp) * 0,
159 |                    dashes=[8, 4], color='black', linewidth=0.8)
160 |         ax[1][0].scatter(n_exp, bestconversion_avg, marker='o', s=0.,
161 |                       color=colors[color_i])
162 | 
163 |         ax[1][0].set_xticks(np.arange(0, 120, 10))
164 |         ax[1][0].set_xlim(0, n_steps)
165 |         ax[1][0].set_ylim(20, 100)
166 |         ax[1][0].set_xlabel('Samples')
167 |         ax[1][0].set_ylabel('Best conversion')
168 | 
169 |         # Plot best selectivity.
170 |         ax[1][1].plot(n_exp, bestselectivity_avg, color=colors[color_i], lw=2.5,
171 |                    label=feat.upper())
172 | 
173 |         ax[1][1].plot(n_exp, bestselectivity_min, color=colors[color_i], lw=1.0, ls='--',
174 |                       label=feat.upper())
175 |         ax[1][1].plot(n_exp, bestselectivity_max, color=colors[color_i], lw=1.0, ls='--',
176 |                       label=feat.upper())
177 | 
178 | 
179 |         ax[1][1].fill_between(x=n_exp,
180 |                            y1=bestselectivity_avg,
181 |                            y2=bestselectivity_max, color=colors[color_i], alpha=0.3,
182 |                            )
183 |         ax[1][1].fill_between(x=n_exp,
184 |                            y1=bestselectivity_min,
185 |                            y2=bestselectivity_avg, color=colors[color_i], alpha=0.3,
186 |                            )
187 |         ax[1][1].plot(n_exp, np.ones_like(n_exp) * 0,
188 |                    dashes=[8, 4], color='black', linewidth=0.8)
189 |         ax[1][1].scatter(n_exp, bestselectivity_avg, marker='o', s=0.,
190 |                       color=colors[color_i])
191 | 
192 | 
193 |         ax[1][1].set_xticks(np.arange(0, 120, 10))
194 |         ax[1][1].set_xlim(0, n_steps)
195 |         ax[1][1].set_ylim(0, 100.)
196 |         ax[1][1].set_xlabel('Samples')
197 |         ax[1][1].set_ylabel('Best selectivity')
198 | 
199 |         color_i += 1
200 |     plt.legend()
201 |     plt.tight_layout()
202 |     plt.savefig(f"figures/benchmark_{acq}.svg")
203 |     plt.show()
204 | 
205 | 
206 | 


--------------------------------------------------------------------------------
/examples/publication/Suzuki/performance/5_find_entry.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pandas as pd
 3 | 
 4 | 
 5 | df = pd.read_csv('../data/dataset_B1.csv')
 6 | 
 7 | c_ligand, c_base, c_leq, c_solvent = 'SPhos', 'NaOH(aq.)', 0.0625, 'DMF'
 8 | 
 9 | c_ligand, c_base, c_leq, c_solvent = 'P(Ph)3', 'KOAc', 0.125, 'MeOH'
10 | 
11 | c_ligand, c_base, c_leq, c_solvent = 'P(Cy)3', 'Cs2CO3(aq.)', 0.125, 'MeOH'
12 | 
13 | c_ligand, c_base, c_leq, c_solvent = 'P(Ph)3', 'NaOH(aq.)', 0.125, 'MeOH'
14 | 
15 | c_ligand, c_base, c_leq, c_solvent = 'P(Ph)3', 'CsF(aq.)', 0.125, 'MeCN'
16 | 
17 | df_new = df[(df['ligand'] == c_ligand) & (df['base'] == c_base) & (df['solvent'] == c_solvent)]
18 | 
19 | print(df_new)
20 | 


--------------------------------------------------------------------------------
/examples/publication/Suzuki/performance/7_plot_performance_acquisition_function.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | import seaborn as sns
  5 | import pandas as pd
  6 | import os
  7 | 
  8 | 
  9 | # sns.set_style("ticks")
 10 | # sns.set_context("paper")
 11 | import matplotlib as mpl
 12 | mpl.rcParams['grid.linestyle'] = ':'
 13 | mpl.rcParams['grid.linewidth'] = 0.1
 14 | 
 15 | objective_1 = 'conversion'
 16 | objective_2 = 'selectivity'
 17 | 
 18 | plt.rcParams['font.family'] = 'Helvetica'
 19 | # mpl.rc('font', **{'family':'sans-serif', 'sans-serif':['HelveticaLight']})
 20 | 
 21 | # Best objectives.
 22 | best_conversion_in_scope = 100.
 23 | best_selectivity_in_scope = 100.
 24 | n_steps = 30
 25 | feat_iter = 0
 26 | 
 27 | if not os.path.exists('./figures'):
 28 |     os.mkdir('figures')
 29 | 
 30 | 
 31 | colors = ['#DC143C', '#0343DF', '#FAC205']
 32 | feat = 'DFT'
 33 | color_i = 0
 34 | fig, ax = plt.subplots(figsize=(8., 8.), dpi=500, nrows=2, ncols=2)
 35 | for acq in ['EHVI', 'MOUCB', 'MOGreedy']:
 36 | 
 37 |     avg = pd.read_csv(f"./{feat}_{acq}_avg.csv")
 38 |     avg = avg.apply(pd.to_numeric, errors='coerce')
 39 |     max = pd.read_csv(f"./{feat}_{acq}_max.csv")
 40 |     max = max.apply(pd.to_numeric, errors='coerce')
 41 |     min = pd.read_csv(f"./{feat}_{acq}_min.csv")
 42 |     min = min.apply(pd.to_numeric, errors='coerce')
 43 | 
 44 |     n_exp = avg['n_experiments'].values[1:]
 45 | 
 46 |     # Hypervolume.
 47 |     hypervol_max = max['hypervolume completed (%)'].values[1:]
 48 |     hypervol_min = min['hypervolume completed (%)'].values[1:]
 49 |     hypervol_avg = avg['hypervolume completed (%)'].values[1:]
 50 | 
 51 |         # Where hypervolume is 99% completed.
 52 |     try:
 53 |         hyper_complete_arg = np.argwhere(hypervol_avg > 99.0)[0]
 54 |         hyper_complete_y = [hypervol_avg[hyper_complete_arg]]
 55 |         hyper_complete_x = [n_exp[hyper_complete_arg]]
 56 |     except:
 57 |         conversion_complete_x = []
 58 |         conversion_complete_y = []
 59 | 
 60 |     # Distance pareto.
 61 |     dtradeoff_max = max['dmaximin_tradeoff'].values[1:]
 62 |     dtradeoff_min = min['dmaximin_tradeoff'].values[1:]
 63 |     dtradeoff_avg = avg['dmaximin_tradeoff'].values[1:]
 64 | 
 65 | 
 66 |     # Best samples at each run.
 67 |     bestconversion_max = max['objective_conversion_best'].values[1:]
 68 |     bestselectivity_max = max['objective_selectivity_best'].values[1:]
 69 |     bestconversion_min = min['objective_conversion_best'].values[1:]
 70 |     bestselectivity_min = min['objective_selectivity_best'].values[1:]
 71 |     bestconversion_avg = avg['objective_conversion_best'].values[1:]
 72 |     bestselectivity_avg = avg['objective_selectivity_best'].values[1:]
 73 | 
 74 |     # Where best conversion is sampled.
 75 |     try:
 76 |         conversion_complete_arg = np.argwhere(bestconversion_max == best_conversion_in_scope)[0]
 77 |         conversion_complete_y = [bestconversion_max[conversion_complete_arg]]
 78 |         conversion_complete_x = [n_exp[conversion_complete_arg]]
 79 |     except:
 80 |         conversion_complete_x = []
 81 |         conversion_complete_y = []
 82 | 
 83 |     # Where best selectivity is sampled.
 84 |     try:
 85 |         selectivity_complete_arg = np.argwhere(bestselectivity_min == best_selectivity_in_scope)[0]
 86 |         selectivity_complete_y = [bestselectivity_min[selectivity_complete_arg]]
 87 |         selectivity_complete_x = [n_exp[selectivity_complete_arg]]
 88 |     except:
 89 |         selectivity_complete_x = []
 90 |         selectivity_complete_y = []
 91 | 
 92 |     # Plot performance for each acquisition function.
 93 |     ax[0][0].plot(n_exp, hypervol_avg, color=colors[color_i], lw=2.5, label=acq.upper())
 94 |     ax[0][0].fill_between(x=n_exp, y1=hypervol_avg, y2=hypervol_max, color=colors[color_i], alpha=0.3, lw=0.)
 95 |     ax[0][0].fill_between(x=n_exp, y1=hypervol_min, y2=hypervol_avg, color=colors[color_i], alpha=0.3, lw=0.)
 96 |     ax[0][0].plot(n_exp, hypervol_min, color=colors[color_i], alpha=1., lw=1., ls='--')
 97 |     ax[0][0].plot(n_exp, hypervol_max, color=colors[color_i], alpha=1., lw=1., ls='--')
 98 |     ax[0][0].plot(n_exp, np.ones_like(n_exp)*100, dashes=[8, 4], color='black', linewidth=0.8)
 99 |     ax[0][0].scatter(n_exp, hypervol_avg, marker='o', s=0., color=colors[color_i])
100 | 
101 |     ax[0][0].set_xticks(np.arange(0, 120, 10))
102 |     ax[0][0].set_xlim(0, n_steps)
103 |     ax[0][0].set_ylim(0, 100)
104 |     ax[0][0].set_xlabel('Samples')
105 |     ax[0][0].set_ylabel('Hypervolume (%)')
106 |     # plt.tick_params(axis="x", direction="in")
107 |     # plt.tick_params(axis="y", direction="in")
108 | 
109 |     # Plot distance tradeoff.
110 |     ax[0][1].plot(n_exp, dtradeoff_avg, color=colors[color_i], lw=2.5, label=acq.upper())
111 |     ax[0][1].plot(n_exp, dtradeoff_min, color=colors[color_i], lw=1., ls='--', label=acq.upper())
112 |     ax[0][1].plot(n_exp, dtradeoff_max, color=colors[color_i], lw=1., ls='--', label=acq.upper())
113 |     ax[0][1].fill_between(x=n_exp, y1=dtradeoff_avg, y2=dtradeoff_max, color=colors[color_i], alpha=0.3)
114 |     ax[0][1].fill_between(x=n_exp, y1=dtradeoff_min, y2=dtradeoff_avg, color=colors[color_i], alpha=0.3)
115 |     ax[0][1].plot(n_exp, np.ones_like(n_exp) * 0, dashes=[8, 4], color='black', linewidth=0.8)
116 |     ax[0][1].scatter(n_exp, dtradeoff_avg, marker='o', s=0., color=colors[color_i])
117 | 
118 |     ax[0][1].set_xticks(np.arange(0, 120, 10))
119 |     ax[0][1].set_xlim(0, n_steps)
120 |     ax[0][1].set_ylim(0, 80)
121 |     ax[0][1].set_xlabel('Samples')
122 |     ax[0][1].set_ylabel(r'$d_{(trade-off)}$')
123 | 
124 |     # Plot best conversion.
125 |     ax[1][0].plot(n_exp, bestconversion_avg, color=colors[color_i], lw=2.5, label=acq)
126 |     ax[1][0].plot(n_exp, bestconversion_min, color=colors[color_i], lw=1, ls='--', label=acq, alpha=1.)
127 |     ax[1][0].plot(n_exp, bestconversion_max, color=colors[color_i], lw=1, ls='--', label=acq, alpha=1.)
128 |     ax[1][0].fill_between(x=n_exp, y1=bestconversion_avg, y2=bestconversion_max, color=colors[color_i], alpha=0.3)
129 |     ax[1][0].fill_between(x=n_exp, y1=bestconversion_min, y2=bestconversion_avg, color=colors[color_i], alpha=0.3)
130 | 
131 |     ax[1][0].plot(n_exp, np.ones_like(n_exp) * 0,
132 |                dashes=[8, 4], color='black', linewidth=0.8)
133 |     ax[1][0].scatter(n_exp, bestconversion_avg, marker='o', s=0.,
134 |                   color=colors[color_i])
135 | 
136 |     ax[1][0].set_xticks(np.arange(0, 120, 10))
137 |     ax[1][0].set_xlim(0, n_steps)
138 |     ax[1][0].set_ylim(20, 100)
139 |     ax[1][0].set_xlabel('Samples')
140 |     ax[1][0].set_ylabel('Best conversion')
141 | 
142 |     # Plot best selectivity.
143 |     ax[1][1].plot(n_exp, bestselectivity_avg, color=colors[color_i], lw=2.5,
144 |                label=acq.upper())
145 | 
146 |     ax[1][1].plot(n_exp, bestselectivity_min, color=colors[color_i], lw=1.0, ls='--',
147 |                   label=acq.upper())
148 |     ax[1][1].plot(n_exp, bestselectivity_max, color=colors[color_i], lw=1.0, ls='--',
149 |                   label=acq.upper())
150 | 
151 | 
152 |     ax[1][1].fill_between(x=n_exp,
153 |                        y1=bestselectivity_avg,
154 |                        y2=bestselectivity_max, color=colors[color_i], alpha=0.3,
155 |                        )
156 |     ax[1][1].fill_between(x=n_exp,
157 |                        y1=bestselectivity_min,
158 |                        y2=bestselectivity_avg, color=colors[color_i], alpha=0.3,
159 |                        )
160 |     ax[1][1].plot(n_exp, np.ones_like(n_exp) * 0,
161 |                dashes=[8, 4], color='black', linewidth=0.8)
162 |     ax[1][1].scatter(n_exp, bestselectivity_avg, marker='o', s=0.,
163 |                   color=colors[color_i])
164 | 
165 | 
166 |     ax[1][1].set_xticks(np.arange(0, 120, 10))
167 |     ax[1][1].set_xlim(0, n_steps)
168 |     ax[1][1].set_ylim(0, 100.)
169 |     ax[1][1].set_xlabel('Samples')
170 |     ax[1][1].set_ylabel('Best selectivity')
171 | 
172 |     color_i += 1
173 | 
174 | ax[0][1].legend()
175 | plt.tight_layout()
176 | # plt.savefig(f"figures/benchmark_acquisition_functions.svg")
177 | plt.show()
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/examples/publication/Suzuki/performance_acq/1_merge_all.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | 
 5 | objective_1 = 'objective_conversion'
 6 | objective_2 = 'objective_selectivity'
 7 | columns_to_keep = ['step', 'n_experiments', 'hypervolume completed (%)']
 8 | 
 9 | for batch in [1, 2, 3, 5]:
10 |     for acq in ['EHVI', 'MOUCB', 'MOGreedy']:
11 | 
12 |         df_i = pd.read_csv(f"../results/results_benchmark_dft_acq_{acq}_batch_{batch}_seed_0.csv")
13 |         columns_to_drop = list(set(df_i.columns.values) - set(columns_to_keep))
14 |         df_i.drop(columns=columns_to_drop, inplace=True)
15 |         for seed_i in range(0, 5):
16 |             df_j = pd.read_csv(f"../results/results_benchmark_dft_acq_{acq}_batch_{batch}_seed_{seed_i}.csv")
17 |             df_j.drop(columns=columns_to_drop, inplace=True)
18 |             df_i = df_i.append(df_j)
19 | 
20 |         df_i.to_csv(f"./dft_{acq}_{batch}_all.csv", index=False)
21 | 
22 |         df_av = df_i.groupby(['step', 'n_experiments']).agg([np.average])
23 |         df_av['step'] = np.unique(df_i.step.values)
24 |         df_av['n_experiments'] = np.unique(df_i.n_experiments.values)
25 |         df_av.to_csv(f"./dft_{acq}_{batch}_avg.csv", index=False)
26 | 
27 |         df_min = df_i.groupby(['step', 'n_experiments']).agg([np.min])
28 |         df_min['step'] = np.unique(df_i.step.values)
29 |         df_min['n_experiments'] = np.unique(df_i.n_experiments.values)
30 |         df_min.to_csv(f"./dft_{acq}_{batch}_min.csv", index=False)
31 | 
32 |         df_max = df_i.groupby(['step', 'n_experiments']).agg([np.max])
33 |         df_max['step'] = np.unique(df_i.step.values)
34 |         df_max['n_experiments'] = np.unique(df_i.n_experiments.values)
35 |         df_max.to_csv(f"./dft_{acq}_{batch}_max.csv", index=False)
36 | 
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/examples/publication/Suzuki/performance_acq/2_plot_acq_batch.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | 
 8 | n_steps = 30
 9 | colors = ['#DC143C', '#0343DF', '#FAC205']
10 | feat = 'dft'
11 | fig, ax = plt.subplots(figsize=(15., 4.), dpi=500, nrows=1, ncols=4)
12 | 
13 | batch_count = 0
14 | for batch in [1, 2, 3, 5]:
15 |     color_i = 0
16 |     for acq in ['EHVI', 'MOUCB', 'MOGreedy']:
17 |         avg = pd.read_csv(f"./{feat}_{acq}_{batch}_avg.csv")
18 |         avg = avg.apply(pd.to_numeric, errors='coerce')
19 |         max = pd.read_csv(f"./{feat}_{acq}_{batch}_max.csv")
20 |         max = max.apply(pd.to_numeric, errors='coerce')
21 |         min = pd.read_csv(f"./{feat}_{acq}_{batch}_min.csv")
22 |         min = min.apply(pd.to_numeric, errors='coerce')
23 |         n_exp = avg['n_experiments'].values[1:]
24 | 
25 |         hypervol_max = max['hypervolume completed (%)'].values[1:]
26 |         hypervol_min = min['hypervolume completed (%)'].values[1:]
27 |         hypervol_avg = avg['hypervolume completed (%)'].values[1:]
28 |         # Plot performance for each acquisition function.
29 |         ax[batch_count].plot(n_exp, hypervol_avg, color=colors[color_i], lw=2.5, label=acq.upper())
30 |         ax[batch_count].fill_between(x=n_exp, y1=hypervol_avg, y2=hypervol_max, color=colors[color_i], alpha=0.3, lw=0.)
31 |         ax[batch_count].fill_between(x=n_exp, y1=hypervol_min, y2=hypervol_avg, color=colors[color_i], alpha=0.3, lw=0.)
32 |         ax[batch_count].plot(n_exp, hypervol_min, color=colors[color_i], alpha=1., lw=1., ls='--')
33 |         ax[batch_count].plot(n_exp, hypervol_max, color=colors[color_i], alpha=1., lw=1., ls='--')
34 |         ax[batch_count].plot(n_exp, np.ones_like(n_exp) * 100, dashes=[8, 4], color='black', linewidth=0.8)
35 |         ax[batch_count].scatter(n_exp, hypervol_avg, marker='o', s=0., color=colors[color_i])
36 | 
37 |         ax[batch_count].set_xticks(np.arange(0, 120, 5))
38 |         ax[batch_count].set_xlim(0, n_steps)
39 |         ax[batch_count].set_ylim(0, 100)
40 |         ax[batch_count].set_xlabel('Samples')
41 |         ax[batch_count].set_ylabel('Hypervolume (%)')
42 |         color_i += 1
43 | 
44 |     batch_count += 1
45 |     plt.legend()
46 | 
47 | if not os.path.exists('figures'):
48 |     os.mkdir('figures')
49 | 
50 | plt.tight_layout()
51 | plt.savefig(f"figures/benchmark_acquisition_functions_batch.svg")
52 | plt.show()
53 | 
54 | 


--------------------------------------------------------------------------------
/examples/publication/Virtual-experimentation/1_benchmark.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import shutil
 3 | from edbo.plus.benchmark.multiobjective_benchmark import Benchmark
 4 | import os
 5 | import numpy as np
 6 | import pandas as pd
 7 | 
 8 | 
 9 | #######################
10 | # Benchmark inputs
11 | budget = 30
12 | 
13 | acq = 'EHVI'
14 | seed = 1
15 | for sampling_method in ['seed', 'lhs', 'cvtsampling']:
16 |     for batch in [1, 2, 3, 5]:
17 |         df_exp = pd.read_csv('./data/data.csv')
18 |         df_exp['new_index'] = np.arange(0, len(df_exp.values))
19 |         sort_column = 'new_index'
20 | 
21 |         # Select the features for the model.
22 |         columns_regression = ['Temperature', 'Volume', 'D',
23 |                               'SM2',
24 |                               'W',
25 |                               'Mixing',
26 |                               'Time',
27 |                               'WB'
28 |                               ]
29 | 
30 |         # Select objectives.
31 |         objectives = ['P', 'I1']
32 |         objective_modes = ['max', 'min']
33 |         objective_thresholds = [None, None]
34 |         print(f"Columns for regression: {columns_regression}")
35 | 
36 |         label_benchmark = f"benchmark_acq_{acq}_batch_{batch}_{sampling_method}.csv"
37 | 
38 |         # Remove previous files.
39 |         if os.path.exists(label_benchmark):
40 |             os.remove(label_benchmark)
41 | 
42 |         if os.path.exists(f'pred_{label_benchmark}'):
43 |             os.remove(f'pred_{label_benchmark}')
44 | 
45 |         if os.path.exists(f'results_{label_benchmark}'):
46 |             os.remove(f'results_{label_benchmark}')
47 | 
48 |         bench = Benchmark(
49 |             df_ground=df_exp,
50 |             features_regression=columns_regression,
51 |             objective_names=objectives,
52 |             objective_modes=objective_modes,
53 |             objective_thresholds=objective_thresholds,
54 |             filename=label_benchmark,
55 |             filename_results=f'results_{label_benchmark}',
56 |             index_column=sort_column,acquisition_function=acq
57 |         )
58 | 
59 |         bench.run(
60 |             steps=int(budget/batch), batch=batch, seed=seed,
61 |             init_method=sampling_method,
62 |             plot_train=False, plot_predictions=False
63 |         )
64 | 
65 |         if not os.path.exists('results'):
66 |             os.mkdir('results')
67 | 
68 |         shutil.move(label_benchmark, f'results/{label_benchmark}')
69 |         shutil.move(f'pred_{label_benchmark}', f'results/pred_{label_benchmark}')
70 |         shutil.move(f'results_{label_benchmark}', f'results/results_{label_benchmark}')
71 | 


--------------------------------------------------------------------------------
/examples/publication/Virtual-experimentation/performance/1_merge_all.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | batch = 1
 5 | 
 6 | objective_1 = 'P'
 7 | objective_2 = 'I1'
 8 | 
 9 | columns_to_keep = ['step', 'n_experiments',
10 |                    'dmaximin_tradeoff', 'hypervolume completed (%)',
11 |                    f'MAE_{objective_1}', f"MAE_{objective_2}",
12 |                    f'RMSE_{objective_1}', f'RMSE_{objective_2}',
13 |                    f'R2_{objective_1}', f'R2_{objective_2}',
14 |                    f'{objective_1}_best', f'{objective_2}_best'
15 |                    ]
16 | 
17 | acq = 'EHVI'
18 | for sampling in ['seed', 'lhs', 'cvtsampling']:
19 |     df_i = pd.read_csv(f"../results/results_benchmark_acq_{acq}_batch_{batch}_{sampling}.csv")
20 |     columns_to_drop = list(set(df_i.columns.values) - set(columns_to_keep))
21 |     df_i.drop(columns=columns_to_drop, inplace=True)
22 | 
23 |     df_i.to_csv(f"./{sampling}_all.csv", index=False)
24 | 
25 |     df_av = df_i.groupby(['step', 'n_experiments']).agg([np.average])
26 |     df_av['step'] = np.unique(df_i.step.values)
27 |     df_av['n_experiments'] = np.unique(df_i.n_experiments.values)
28 |     df_av.to_csv(f"./{sampling}_avg.csv", index=False)
29 | 
30 |     df_min = df_i.groupby(['step', 'n_experiments']).agg([np.min])
31 |     df_min['step'] = np.unique(df_i.step.values)
32 |     df_min['n_experiments'] = np.unique(df_i.n_experiments.values)
33 |     df_min.to_csv(f"./{sampling}_min.csv", index=False)
34 | 
35 |     df_max = df_i.groupby(['step', 'n_experiments']).agg([np.max])
36 |     df_max['step'] = np.unique(df_i.step.values)
37 |     df_max['n_experiments'] = np.unique(df_i.n_experiments.values)
38 |     df_max.to_csv(f"./{sampling}_max.csv", index=False)
39 | 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/examples/publication/Virtual-experimentation/performance/2_plot_ground_truth.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | 
 3 | import pandas as pd
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | import seaborn as sns
 7 | sns.set_style("ticks")
 8 | sns.despine()
 9 | import matplotlib as mpl
10 | mpl.rcParams['grid.linestyle'] = ':'
11 | mpl.rcParams['grid.linewidth'] = 0.1
12 | plt.rcParams['font.family'] = 'Helvetica'
13 | plt.rcParams['font.size'] = 10
14 | import pareto
15 | from edbo.plus.benchmark.multiobjective_benchmark import is_pareto
16 | from pymoo.mcdm.high_tradeoff import HighTradeoffPoints
17 | from sklearn.preprocessing import MinMaxScaler
18 | 
19 | 
20 | # Hue: Color (ligand), shape (base), filling (solvent), alpha (ligand_eq).
21 | 
22 | import seaborn as sns
23 | 
24 | dataset = 'dft'
25 | acq = 'EHVI'
26 | batch = 1
27 | total_restarts = 5
28 | n_steps = 30
29 | seed = 0
30 | 
31 | 
32 | def get_pareto_points(objective_values):
33 |     """ Get pareto for the ground truth function.
34 |     NOTE: Assumes maximization."""
35 |     pareto_ground = pareto.eps_sort(tables=objective_values,
36 |                                     objectives=np.arange(2),
37 |                                     maximize_all=True)
38 |     idx_pareto = is_pareto(objectives=-objective_values)
39 |     return np.array(pareto_ground), idx_pareto
40 | 
41 | def get_high_tradeoff_points(pareto_points):
42 |     """ Pass a numpy array with the pareto points and returns a numpy
43 |         array with the high tradeoff points."""
44 | 
45 |     scaler_pareto = MinMaxScaler()
46 |     pareto_scaled = scaler_pareto.fit_transform(pareto_points)
47 |     try:
48 |         tradeoff = HighTradeoffPoints()
49 | 
50 |         tradeoff_args = tradeoff.do(-pareto_scaled)  # Always minimizing.
51 |         tradeoff_points = pareto_points[tradeoff_args]
52 |     except:
53 |         tradeoff_points = []
54 |         pass
55 |     return tradeoff_points
56 | 
57 | 
58 | df_exp = pd.read_csv('../data/data.csv')
59 | df_exp['I1'] = -df_exp['I1'].values
60 | objective_vals = df_exp[['P', 'I1']].values
61 | pareto_points, idx_pareto = get_pareto_points(objective_vals)
62 | high_tradeoff_points = get_high_tradeoff_points(pareto_points)
63 | 
64 | fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(12, 12))
65 | 
66 | print(df_exp.columns)
67 | 
68 | palettes = [['Reds', 'Reds', 'Blues'],
69 |             ['Greens', 'Oranges', 'Reds'],
70 |             ['Blues', 'Greens', 'Oranges']
71 |             ]
72 | 
73 | hues = [['Temperature', 'Temperature', 'Volume'],
74 |         ['D', 'SM2', 'W'],
75 |         ['Mixing', 'Time', 'WB']
76 |         ]
77 | 
78 | for i in range(0, 3):
79 |     for j in range(0, 3):
80 |         sns.scatterplot(x=df_exp['P'], y=df_exp['I1'],
81 |                         hue=df_exp[hues[i][j]], s=40, lw=1., edgecolor='black', ax=ax[i][j], palette=palettes[i][j])
82 |         sns.lineplot(x=pareto_points[:, 0], y=pareto_points[:, 1],
83 |                      linewidth=1.2, color='grey', ls='dotted', ax=ax[i][j])
84 |         # ax[i][j].set_xlim(-5, 105)
85 |         # ax[i][j].set_ylim(-5, 105)
86 |         ax[i][j].legend(loc=3)
87 |         ax[i][j].set_title(hues[i][j])
88 | fig.delaxes(ax[0][0])
89 | plt.tight_layout()
90 | 
91 | if not os.path.exists('../plots'):
92 |     os.mkdir('../plots')
93 | plt.savefig('../plots/SI_ground_truth.svg', dpi=500, format='svg')
94 | plt.show()
95 | 


--------------------------------------------------------------------------------
/examples/publication/Virtual-experimentation/performance/3_plot_performance_acquisition_function.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | import seaborn as sns
  5 | import pandas as pd
  6 | import os
  7 | 
  8 | 
  9 | # sns.set_style("ticks")
 10 | # sns.set_context("paper")
 11 | import matplotlib as mpl
 12 | mpl.rcParams['grid.linestyle'] = ':'
 13 | mpl.rcParams['grid.linewidth'] = 0.1
 14 | 
 15 | objective_1 = 'P'
 16 | objective_2 = 'I1'
 17 | 
 18 | plt.rcParams['font.family'] = 'Helvetica'
 19 | # mpl.rc('font', **{'family':'sans-serif', 'sans-serif':['HelveticaLight']})
 20 | 
 21 | # Best objectives.
 22 | best_P_in_scope = 100.
 23 | best_I1_in_scope = 100.
 24 | n_steps = 30
 25 | 
 26 | if not os.path.exists('./figures'):
 27 |     os.mkdir('figures')
 28 | 
 29 | 
 30 | colors = ['#DC143C', '#0343DF', '#FAC205']
 31 | color_i = 0
 32 | fig, ax = plt.subplots(figsize=(8., 8.), dpi=500, nrows=2, ncols=2)
 33 | 
 34 | acq = 'EHVI'
 35 | for sampling in ['seed', 'lhs', 'cvtsampling']:
 36 | 
 37 |     avg = pd.read_csv(f"./{sampling}_avg.csv")
 38 | 
 39 |     avg = avg.apply(pd.to_numeric, errors='coerce')
 40 |     max = pd.read_csv(f"./{sampling}_max.csv")
 41 |     max = max.apply(pd.to_numeric, errors='coerce')
 42 |     min = pd.read_csv(f"./{sampling}_min.csv")
 43 |     min = min.apply(pd.to_numeric, errors='coerce')
 44 | 
 45 |     n_exp = avg['n_experiments'].values[1:]
 46 | 
 47 |     # Hypervolume.
 48 |     hypervol_max = max['hypervolume completed (%)'].values[1:]
 49 |     hypervol_min = min['hypervolume completed (%)'].values[1:]
 50 |     hypervol_avg = avg['hypervolume completed (%)'].values[1:]
 51 | 
 52 |         # Where hypervolume is 99% completed.
 53 |     try:
 54 |         hyper_complete_arg = np.argwhere(hypervol_avg > 99.0)[0]
 55 |         hyper_complete_y = [hypervol_avg[hyper_complete_arg]]
 56 |         hyper_complete_x = [n_exp[hyper_complete_arg]]
 57 |     except:
 58 |         P_complete_x = []
 59 |         P_complete_y = []
 60 | 
 61 |     # Distance pareto.
 62 |     dtradeoff_max = max['dmaximin_tradeoff'].values[1:]
 63 |     dtradeoff_min = min['dmaximin_tradeoff'].values[1:]
 64 |     dtradeoff_avg = avg['dmaximin_tradeoff'].values[1:]
 65 | 
 66 | 
 67 |     # Best samples at each run.
 68 |     bestP_max = max[f'{objective_1}_best'].values[1:]
 69 |     bestI1_max = max[f'{objective_2}_best'].values[1:]
 70 |     bestP_min = min[f'{objective_1}_best'].values[1:]
 71 |     bestI1_min = min[f'{objective_2}_best'].values[1:]
 72 |     bestP_avg = avg[f'{objective_1}_best'].values[1:]
 73 |     bestI1_avg = avg[f'{objective_2}_best'].values[1:]
 74 | 
 75 |     # Where best P is sampled.
 76 |     try:
 77 |         P_complete_arg = np.argwhere(bestP_max == best_P_in_scope)[0]
 78 |         P_complete_y = [bestP_max[P_complete_arg]]
 79 |         P_complete_x = [n_exp[P_complete_arg]]
 80 |     except:
 81 |         P_complete_x = []
 82 |         P_complete_y = []
 83 | 
 84 |     # Where best I1 is sampled.
 85 |     try:
 86 |         I1_complete_arg = np.argwhere(bestI1_min == best_I1_in_scope)[0]
 87 |         I1_complete_y = [bestI1_min[I1_complete_arg]]
 88 |         I1_complete_x = [n_exp[I1_complete_arg]]
 89 |     except:
 90 |         I1_complete_x = []
 91 |         I1_complete_y = []
 92 | 
 93 |     # Plot performance for each acquisition function.
 94 |     ax[0][0].plot(n_exp, hypervol_avg, color=colors[color_i], lw=2.5, label=sampling.upper())
 95 |     ax[0][0].fill_between(x=n_exp, y1=hypervol_avg, y2=hypervol_max, color=colors[color_i], alpha=0.3, lw=0.)
 96 |     ax[0][0].fill_between(x=n_exp, y1=hypervol_min, y2=hypervol_avg, color=colors[color_i], alpha=0.3, lw=0.)
 97 |     ax[0][0].plot(n_exp, hypervol_min, color=colors[color_i], alpha=1., lw=1., ls='--')
 98 |     ax[0][0].plot(n_exp, hypervol_max, color=colors[color_i], alpha=1., lw=1., ls='--')
 99 |     # ax[0][0].plot(n_exp, np.ones_like(n_exp)*100, dashes=[8, 4], color='black', linewidth=0.8)
100 |     ax[0][0].scatter(n_exp, hypervol_avg, marker='o', s=0., color=colors[color_i])
101 | 
102 |     ax[0][0].set_xticks(np.arange(0, 120, 5))
103 |     ax[0][0].set_xlim(0, n_steps)
104 | 
105 |     # ax[0][0].set_ylim(40, 100)
106 |     ax[0][0].set_xlabel('Samples')
107 |     ax[0][0].set_ylabel('Hypervolume (%)')
108 |     # plt.tick_params(axis="x", direction="in")
109 |     # plt.tick_params(axis="y", direction="in")
110 | 
111 |     # Plot distance tradeoff.
112 |     ax[0][1].plot(n_exp, dtradeoff_avg, color=colors[color_i], lw=2.5, label=sampling.upper())
113 |     ax[0][1].plot(n_exp, dtradeoff_min, color=colors[color_i], lw=1., ls='--', label=sampling.upper())
114 |     ax[0][1].plot(n_exp, dtradeoff_max, color=colors[color_i], lw=1., ls='--', label=sampling.upper())
115 |     ax[0][1].fill_between(x=n_exp, y1=dtradeoff_avg, y2=dtradeoff_max, color=colors[color_i], alpha=0.3)
116 |     ax[0][1].fill_between(x=n_exp, y1=dtradeoff_min, y2=dtradeoff_avg, color=colors[color_i], alpha=0.3)
117 |     # ax[0][1].plot(n_exp, np.ones_like(n_exp) * 0, dashes=[8, 4], color='black', linewidth=0.8)
118 |     ax[0][1].scatter(n_exp, dtradeoff_avg, marker='o', s=0., color=colors[color_i])
119 | 
120 |     ax[0][1].set_xticks(np.arange(0, 120, 5))
121 |     ax[0][1].set_xlim(0, n_steps)
122 |     # ax[0][1].set_ylim(0, 80)
123 |     ax[0][1].set_xlabel('Samples')
124 |     ax[0][1].set_ylabel(r'$d_{(trade-off)}$')
125 | 
126 |     # Plot best P.
127 |     ax[1][0].plot(n_exp, bestP_avg, color=colors[color_i], lw=2.5, label=sampling)
128 |     ax[1][0].plot(n_exp, bestP_min, color=colors[color_i], lw=1, ls='--', label=sampling, alpha=1.)
129 |     ax[1][0].plot(n_exp, bestP_max, color=colors[color_i], lw=1, ls='--', label=sampling, alpha=1.)
130 |     # ax[1][0].fill_between(x=n_exp, y1=bestP_avg, y2=bestP_max, color=colors[color_i], alpha=0.3)
131 |     # ax[1][0].fill_between(x=n_exp, y1=bestP_min, y2=bestP_avg, color=colors[color_i], alpha=0.3)
132 | 
133 |     # ax[1][0].plot(n_exp, np.ones_like(n_exp) * 0,
134 |     #            dashes=[8, 4], color='black', linewidth=0.8)
135 |     ax[1][0].scatter(n_exp, bestP_avg, marker='o', s=0.,
136 |                   color=colors[color_i])
137 | 
138 |     ax[1][0].set_xticks(np.arange(0, 120, 5))
139 |     ax[1][0].set_xlim(0, n_steps)
140 |     # ax[1][0].set_ylim(0.8, 1.1)
141 |     ax[1][0].set_xlabel('Samples')
142 |     ax[1][0].set_ylabel('Best P')
143 | 
144 |     # Plot best I1.
145 |     ax[1][1].plot(n_exp, bestI1_avg, color=colors[color_i], lw=2.5,
146 |                label=sampling.upper())
147 | 
148 |     ax[1][1].plot(n_exp, bestI1_min, color=colors[color_i], lw=1.0, ls='--',
149 |                   label=sampling.upper())
150 |     ax[1][1].plot(n_exp, bestI1_max, color=colors[color_i], lw=1.0, ls='--',
151 |                   label=sampling.upper())
152 | 
153 |     ax[1][1].fill_between(x=n_exp,
154 |                        y1=bestI1_avg,
155 |                        y2=bestI1_max, color=colors[color_i], alpha=0.3,
156 |                        )
157 |     ax[1][1].fill_between(x=n_exp,
158 |                        y1=bestI1_min,
159 |                        y2=bestI1_avg, color=colors[color_i], alpha=0.3,
160 |                        )
161 |     # ax[1][1].plot(n_exp, np.ones_like(n_exp) * 0,
162 |     #            dashes=[8, 4], color='black', linewidth=0.8)
163 |     ax[1][1].scatter(n_exp, bestI1_avg, marker='o', s=0.,
164 |                   color=colors[color_i])
165 | 
166 | 
167 |     ax[1][1].set_xticks(np.arange(0, 120, 5))
168 |     ax[1][1].set_xlim(0, n_steps)
169 |     ax[1][1].set_ylim(0.0, 0.005)
170 |     ax[1][1].set_xlabel('Samples')
171 |     ax[1][1].set_ylabel('Best I1')
172 | 
173 |     color_i += 1
174 | 
175 | ax[0][1].legend()
176 | plt.tight_layout()
177 | plt.savefig(f"figures/benchmark_sampling.svg")
178 | plt.show()
179 | 
180 | 
181 | 


--------------------------------------------------------------------------------
/examples/publication/Virtual-experimentation/performance/4_hypervol_sampling.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | import pandas as pd
 6 | import os
 7 | 
 8 | 
 9 | # sns.set_style("ticks")
10 | # sns.set_context("paper")
11 | import matplotlib as mpl
12 | mpl.rcParams['grid.linestyle'] = ':'
13 | mpl.rcParams['grid.linewidth'] = 0.1
14 | 
15 | objective_1 = 'P'
16 | objective_2 = 'I1'
17 | 
18 | plt.rcParams['font.family'] = 'Helvetica'
19 | # mpl.rc('font', **{'family':'sans-serif', 'sans-serif':['HelveticaLight']})
20 | 
21 | # Best objectives.
22 | best_conversion_in_scope = 100.
23 | best_selectivity_in_scope = 100.
24 | 
25 | n_experiments = 30
26 | feat_iter = 0
27 | 
28 | if not os.path.exists('./figures'):
29 |     os.mkdir('figures')
30 | 
31 | fig, ax = plt.subplots(figsize=(7., 4.0), dpi=500, nrows=1, ncols=3)
32 | 
33 | colors_sampling = ['#DC143C', '#0343DF', '#FAC205', '#15B01A']
34 | 
35 | alphas = [0.4, 0.6, 0.7, 1.0]
36 | i = -1
37 | for sampling_method in ['seed', 'lhs', 'cvtsampling']:
38 | 
39 |     i += 1
40 |     j = -1
41 |     for batch in [1, 2, 3, 5]:
42 |         j += 1
43 |         acq = 'EHVI'
44 | 
45 |         df_i = pd.read_csv(f'../results/results_benchmark_acq_{acq}_batch_{batch}_{sampling_method}.csv')
46 |         df_i = df_i[df_i['n_experiments'] <= n_experiments]
47 | 
48 |         # Hypervolume.
49 |         hypervol = df_i['hypervolume completed (%)'].values[:]
50 | 
51 |         # Plot performance for each acquisition function.
52 |         n_exp = df_i['n_experiments'].values[:]
53 | 
54 |         ax[i].plot(n_exp, hypervol, color=colors_sampling[j], lw=2.5,
55 |                       label=f"{batch}",
56 |                    alpha=alphas[j])
57 | 
58 |         ax[i].set_title(f"{sampling_method}")
59 |         ax[i].set_xlabel('Samples')
60 |         ax[i].set_ylabel('Hypervolume (%)')
61 |         ax[i].set_ylim(80, 100)
62 | 
63 | ax[i].legend()
64 | plt.tight_layout()
65 | plt.savefig(f"figures/benchmark_hypervol.svg")
66 | 
67 | plt.show()
68 | 
69 | 


--------------------------------------------------------------------------------
/examples/tutorials/2_EDBO_WebApp_Tutorial.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/doyle-lab-ucla/edboplus/f5d1687e834d33c77598767c5383a919f9a58034/examples/tutorials/2_EDBO_WebApp_Tutorial.pdf


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | botorch==0.5.0
 2 | gpytorch==1.5.1
 3 | idaes-pse==1.5.1
 4 | ipykernel==6.5.1
 5 | ipython==7.29.0
 6 | ipywidgets==7.6.5
 7 | Jinja2==3.0.3
 8 | joypy==0.2.6
 9 | lxml==4.6.4
10 | mordred==1.2.0
11 | numpy==1.21.5
12 | ordered-set==4.0.2
13 | pandas==1.3.4
14 | pareto==1.1.1.post3
15 | pymoo==0.5.0
16 | scikit-learn==1.0.1
17 | scipy==1.7.2
18 | seaborn
19 | matplotlib
20 | sympy==1.9
21 | torch==1.10.0
22 | tqdm


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |    name='edbo',
 5 |    packages=['edbo'], 
 6 |    version='0.2.0',
 7 |    author='Jose A. Garrido Torres & Abigail Gutmann Doyle',
 8 |    author_email='josegarridotorres@me.com',
 9 |    url='https://github.com/doyle-lab-ucla/edboplus',
10 |    keywords=['Bayesian Optimization', 'Chemical Reaction Optimization'],
11 |    license='MIT',
12 |    description='Bayesian reaction optimization as a tool for chemical synthesis.',
13 |    install_requires=[
14 |         'botorch==0.5.0',
15 |         'gpytorch==1.5.1',
16 |         'idaes-pse==1.5.1',
17 |         'ipykernel==6.5.1',
18 |         'ipython==7.29.0',
19 |         'ipywidgets==7.6.5',
20 |         'Jinja2==3.0.3',
21 |         'joypy==0.2.6',
22 |         'lxml==4.6.4',
23 |         'mordred==1.2.0',
24 |         'numpy==1.21.5',
25 |         'ordered-set==4.0.2',
26 |         'pandas==1.3.4',
27 |         'pareto==1.1.1.post3',
28 |         'pymoo==0.5.0',
29 |         'scikit-learn==1.0.1',
30 |         'scipy==1.7.2',
31 |         'seaborn',
32 |         'matplotlib',
33 |         'sympy==1.9',
34 |         'torch==1.10.0',
35 |         'tqdm',
36 |     ],
37 |    classifiers=[
38 |     'Development Status :: 3 - Alpha',
39 |     'Intended Audience :: Science/Research', 
40 |     'Topic :: Scientific/Engineering :: Chemistry',
41 |     'License :: OSI Approved :: MIT License', 
42 |     'Programming Language :: Python :: 3.8',
43 |   ],
44 | )


--------------------------------------------------------------------------------