├── data ├── PCE_ID.xls ├── Theo_simu.xlsx ├── ID-1-purity.xlsx ├── MolFeatures.xlsx ├── Mol_Group_A.pdf ├── Mol_Group_A.xlsx ├── Mol_Group_B.pdf ├── Mol_Group_B.xlsx ├── TRPL for ML.xlsx └── dataset.csv ├── README.md ├── LICENSE ├── theoretical.py ├── main_stats.py ├── main_linear.py ├── main_MTGPR.py ├── main_rfm.py ├── .gitignore ├── features.py ├── GP_models.py ├── utils.py ├── RFM_model.py ├── main_model_selection.py └── reaction.py /data/PCE_ID.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimat-lab/perovskite_htm_screening/HEAD/data/PCE_ID.xls -------------------------------------------------------------------------------- /data/Theo_simu.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimat-lab/perovskite_htm_screening/HEAD/data/Theo_simu.xlsx -------------------------------------------------------------------------------- /data/ID-1-purity.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimat-lab/perovskite_htm_screening/HEAD/data/ID-1-purity.xlsx -------------------------------------------------------------------------------- /data/MolFeatures.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimat-lab/perovskite_htm_screening/HEAD/data/MolFeatures.xlsx -------------------------------------------------------------------------------- /data/Mol_Group_A.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimat-lab/perovskite_htm_screening/HEAD/data/Mol_Group_A.pdf -------------------------------------------------------------------------------- /data/Mol_Group_A.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimat-lab/perovskite_htm_screening/HEAD/data/Mol_Group_A.xlsx -------------------------------------------------------------------------------- /data/Mol_Group_B.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimat-lab/perovskite_htm_screening/HEAD/data/Mol_Group_B.pdf -------------------------------------------------------------------------------- /data/Mol_Group_B.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimat-lab/perovskite_htm_screening/HEAD/data/Mol_Group_B.xlsx -------------------------------------------------------------------------------- /data/TRPL for ML.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimat-lab/perovskite_htm_screening/HEAD/data/TRPL for ML.xlsx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Screening hole transport materials for perovskite solar cells assisted by machine learning 2 | 3 | Repository for the code for the paper "Screening hole transport materials for perovskite solar cells assisted by machine learning" . 4 | 5 | ## Requirements 6 | 7 | TODO 8 | 9 | ## Data 10 | 11 | The minimal data for the machine learning section of the paper can be found in [data](data) . 12 | 13 | ## Code 14 | 15 | To rerun all fits and make the plots of the paper you can run the scripts with `main` prefix: 16 | 17 | * main_MTGPR.py 18 | * main_linear.py 19 | * main_model_selection.py 20 | * main_rfm.py 21 | * main_stats.py 22 | 23 | other modules contain class, functions and utilites for features and training. 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Artificial Intelligence for Materials Science group 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /theoretical.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | 5 | 6 | class TheoSimulation: 7 | def __init__(self, 8 | file_name: str = "Theo_simu.xlsx", 9 | data_dir_path: str = "data", 10 | descriptor_list: list = None): 11 | self.file_path = os.path.join(data_dir_path, file_name) 12 | self.data_path = data_dir_path 13 | self.theo = None 14 | self.load_data() 15 | self.descriptor_list = descriptor_list 16 | if descriptor_list is None: 17 | self.descriptor_list = ["dipole", "homo", "lumo", "gap", "energy", "a", "b", "c"] 18 | 19 | def load_data(self): 20 | self.theo = pd.read_excel(self.file_path, sheet_name=0, header=0) 21 | self.theo.set_index("ID", inplace=True) 22 | 23 | def labels_for_combos(self, combos: list): 24 | ab = [x[0]+x[1] for x in combos] 25 | values = self.theo.loc[ab] 26 | values = values[self.descriptor_list] 27 | return ab, np.array(values) 28 | 29 | 30 | 31 | 32 | if __name__ == "__main__": 33 | data = TheoSimulation() 34 | print(data.labels_for_combos([("A9","B702"), ("A99","B172"), ("A1066","B2")])) 35 | -------------------------------------------------------------------------------- /main_stats.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | data = pd.read_excel("data/MolFeatures.xlsx") 6 | 7 | alpha=0.75 8 | # Plot models 9 | fig, axsg = plt.subplots(1, 4, figsize=(17.25, 3.75)) 10 | axs = axsg.flatten() 11 | 12 | axs[0].hist(data["C"]/data["NumAtoms"], bins=20, label="C", color="dimgray", alpha=alpha) 13 | axs[0].hist(data["N"]/data["NumAtoms"], bins=20, label="N", color="#3b5b92", alpha=alpha) 14 | axs[0].hist(data["H"]/data["NumAtoms"], bins=20, label="H", color="lightgrey", alpha=alpha) 15 | axs[0].hist(data["O"]/data["NumAtoms"], bins=20, label="O", color="maroon", alpha=alpha) 16 | axs[0].hist(data["S"]/data["NumAtoms"], bins=20, label="S", color="gold", alpha=alpha) 17 | axs[0].hist(data["F"]/data["NumAtoms"], bins=20, label="F", color="seagreen", alpha=alpha) 18 | axs[0].set_xlabel("Mole Fraction") 19 | axs[0].set_ylabel("Counts") 20 | axs[0].set_ylim([0., 25.]) 21 | axs[0].legend(loc="upper right") 22 | 23 | 24 | axs[1].hist(data["AtomIsInRing"]/data["NumAtoms"], bins=20, label="Ring", color="#3b5b92", alpha=alpha) 25 | axs[1].hist(data["AtomIsAromatic"]/data["NumAtoms"], bins=20, label="Aromatic", color="goldenrod", alpha=alpha) 26 | axs[1].hist(data["NumRotatableBonds"]/data["NumBonds"], bins=20, label="Rotatable", color="seagreen", alpha=alpha) 27 | axs[1].hist(data["BondIsConjugated"]/data["NumBonds"], bins=20, label="Conjugated", color="indianred", alpha=alpha) 28 | axs[1].set_xlabel("Mole Fraction") 29 | # axs[0].set_ylim([0., 25.]) 30 | axs[1].legend(loc="upper left") 31 | 32 | 33 | axs[2].hist(data["homo"], bins=20, label="Homo", color="#3b5b92", alpha=alpha) 34 | axs[2].hist(data["lumo"], bins=20, label="Lumo", color="goldenrod", alpha=alpha) 35 | axs[2].hist(data["gap"], bins=20, label="Gap", color="seagreen", alpha=alpha) 36 | axs[2].set_xlabel("Energy [eV]") 37 | # axs[0].set_ylim([0., 25.]) 38 | axs[2].legend(loc="upper left") 39 | 40 | 41 | axs[3].hist(data["ExactMolWt"], bins=20, label="Weight", color="#3b5b92", alpha=alpha) 42 | axs[3].set_xlabel("Molar Mass [g/mol]") 43 | # axs[0].set_ylim([0., 25.]) 44 | axs[3].legend(loc="upper left") 45 | 46 | axs[0].text( 47 | -0.2, 1.0, 'A', fontsize=18, weight="bold", 48 | transform=axs[0].transAxes, 49 | ) 50 | 51 | 52 | plt.savefig("data_analysis.png", bbox_inches='tight', pad_inches=0.1) 53 | plt.show() 54 | -------------------------------------------------------------------------------- /main_linear.py: -------------------------------------------------------------------------------- 1 | from sklearn.linear_model import LinearRegression 2 | from utils import generate_trainset, standardize_data, plot_scatter, leave_one_out_crossval 3 | 4 | 5 | DATA_PATH = './data/dataset.csv' 6 | features = ["C count", "N count", "O count", "H count", "S count", 7 | "F count", "Cl count", "atoms count", "atoms in ring", 8 | "aromatic atoms count", "bonds count", "conjugated bonds count", 9 | "aromatic bonds count", "rotatable bonds count", "carbonyl O (excl. COOH) count", 10 | "exact molecular weight", "Morgan FP density", "fraction of SP3 C", "log P", 11 | "molar refractivity", "has tertiary amine", "has secondary amine", "has imine", 12 | "has thiophene", "has pyrrole", "has benzimidazole", "has benzothiophene", 13 | "has naphthalene", "has biphenyl", "dipole", "homo level", "lumo level", 14 | "homo/lumo gap", "total energy", "rotation constant a", "rotation constant b", 15 | "rotation constant c", 'purity'] 16 | 17 | print('Generating trainset...') 18 | X, y, samples_composition = generate_trainset(path=DATA_PATH, use_simulation=True, objective='PCE') 19 | 20 | 21 | # forward optimization best r2, 8 features, r2: 0.456999140013172, bic: 381.1341164453419 22 | single_task_best_features = ['has tertiary amine', 'rotation constant c', 'dipole', 'purity', 'aromatic bonds count', 'N count', 'log P', 'aromatic atoms count'] 23 | MASK = [(f in single_task_best_features) for f in features] 24 | feat_order = [f for f in features if f in single_task_best_features] 25 | X = X[..., MASK] 26 | 27 | preds = [] 28 | ground = [] 29 | coefficients = {f: [] for f in feat_order} 30 | coefficients['intercept'] = [] 31 | for xtrain, xtest, ytrain, ytest in leave_one_out_crossval(X, y, samples_composition, reject='both'): 32 | xtrain, ytrain, x_scaler, y_scaler = standardize_data(xtrain, ytrain) 33 | xtest = x_scaler.transform(xtest) 34 | reg = LinearRegression().fit(xtrain, ytrain) 35 | for f, c in zip(feat_order, reg.coef_[0]): 36 | coefficients[f].append(c) 37 | coefficients['intercept'].append(reg.intercept_) 38 | preds.append(y_scaler.inverse_transform(reg.predict(xtest)).item()) 39 | ground.append(ytest.item()) 40 | plot_scatter(preds, ground, save=True, name="linear_scatter.png") -------------------------------------------------------------------------------- /main_MTGPR.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import argparse 3 | import numpy as np 4 | import seaborn as sns 5 | import matplotlib.pyplot as plt 6 | 7 | from utils import generate_trainset, standardize_data, leave_one_out_crossval, plot_scatter 8 | from GP_models import MTGPR 9 | 10 | 11 | DATA_PATH = './data/dataset.csv' 12 | ADD_OUTPUTS = ['Voc', 'Jsc', 'FF', 'D_V0', 'D_J0', 'contact_angle', 'PLQY_perov', 'PLQY_glass', 't1_perov', 't2_perov', 't1_glass', 't2_glass'] 13 | features = ["C count", "N count", "O count", "H count", "S count", 14 | "F count", "Cl count", "atoms count", "atoms in ring", 15 | "aromatic atoms count", "bonds count", "conjugated bonds count", 16 | "aromatic bonds count", "rotatable bonds count", "carbonyl O (excl. COOH) count", 17 | "exact molecular weight", "Morgan FP density", "fraction of SP3 C", "log P", 18 | "molar refractivity", "has tertiary amine", "has secondary amine", "has imine", 19 | "has thiophene", "has pyrrole", "has benzimidazole", "has benzothiophene", 20 | "has naphthalene", "has biphenyl", "dipole", "homo level", "lumo level", 21 | "homo/lumo gap", "total energy", "rotation constant a", "rotation constant b", 22 | "rotation constant c", 'purity'] 23 | 24 | print('Generating trainset...') 25 | X, y, samples_composition = generate_trainset(path=DATA_PATH, use_simulation=True, objective='PCE', add_labels=ADD_OUTPUTS) 26 | 27 | ### ELIMINATE FEATURES THAT HAVE A SINGLE VALUES (constant) OR VERY NARROW DISTRIBUTIONS (spiked) ON THE TRAIN SET 28 | ####spiked = ['F', "O", 'S', "fr_C_O_noCOO", "has_Benzimidazole", "has_Benzothiophene", "has_Naphthalene", "has_Thiophene"] 29 | ####constant = ['Cl', "has_C=NC", "has_CNC", "has_Pyrrole"] 30 | SPIKED = [5, 2, 4, 14, 25, 26, 27, 23] 31 | CONSTANT = [6, 21, 22, 24] 32 | NOT_GOOD = sorted(CONSTANT+SPIKED) 33 | MASK_FEATURES = np.ones(X.shape[-1], dtype=bool) 34 | MASK_FEATURES[NOT_GOOD] = False 35 | X = X[..., MASK_FEATURES] 36 | 37 | task_labels = [ 38 | 'PCE', '$V_{oc}$', '$J_{sc}$', 'FF', '$D_{V_{0}}$', '$D_{J_{0}}$', 'CA', 39 | '$PLQY_{perov}$', '$PLQY_{glass}$', '$t1_{perov}$', '$t2_{perov}$', 40 | '$t1_{glass}$', '$t2_{glass}$', 41 | ] 42 | 43 | xtrain, ytrain, _, _ = standardize_data(X, y) 44 | regr = MTGPR(xtrain, ytrain) 45 | regr.fit() 46 | 47 | B = regr.model.covar_module.task_covar_module.covar_factor.detach().numpy() 48 | v = regr.model.covar_module.task_covar_module.var.detach().numpy() 49 | task_covar = np.matmul(B,B.T) + np.diag(v) 50 | 51 | plt.figure(figsize = (22,18)) 52 | plt.rcParams['font.size'] = 30 53 | 54 | print(np.abs(task_covar)) 55 | ax = sns.heatmap( 56 | np.abs(task_covar), 57 | linewidth=0.5, 58 | annot=True, 59 | fmt=".2f", 60 | xticklabels=task_labels, 61 | yticklabels=task_labels, 62 | cbar_kws={'label':'abs'}, 63 | ) 64 | plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right') 65 | plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right') 66 | plt.tight_layout() 67 | plt.show() 68 | plt.close() -------------------------------------------------------------------------------- /main_rfm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | from sklearn.metrics import r2_score 6 | from sklearn.model_selection import train_test_split 7 | 8 | from RFM_model import RFM 9 | from utils import generate_trainset, standardize_data 10 | 11 | 12 | DATA_PATH = './data/dataset.csv' 13 | features = ["C count", "N count", "O count", "H count", "S count", 14 | "F count", "Cl count", "atoms count", "atoms in ring", 15 | "aromatic atoms count", "bonds count", "conjugated bonds count", 16 | "aromatic bonds count", "rotatable bonds count", "carbonyl O (excl. COOH) count", 17 | "exact molecular weight", "Morgan FP density", "fraction of SP3 C", "log P", 18 | "molar refractivity", "has tertiary amine", "has secondary amine", "has imine", 19 | "has thiophene", "has pyrrole", "has benzimidazole", "has benzothiophene", 20 | "has naphthalene", "has biphenyl", "dipole", "homo level", "lumo level", 21 | "homo/lumo gap", "total energy", "rotation constant a", "rotation constant b", 22 | "rotation constant c", 'purity'] 23 | 24 | print('Generating trainset...') 25 | X, y, samples_composition = generate_trainset(path=DATA_PATH, use_simulation=True, objective='PCE') 26 | 27 | ### ELIMINATE FEATURES THAT HAVE A SINGLE VALUES (constant) OR VERY NARROW DISTRIBUTIONS (spiked) ON THE TRAIN SET 28 | ####spiked = ['F', "O", 'S', "fr_C_O_noCOO", "has_Benzimidazole", "has_Benzothiophene", "has_Naphthalene", "has_Thiophene"] 29 | ####constant = ['Cl', "has_C=NC", "has_CNC", "has_Pyrrole"] 30 | SPIKED = [5, 2, 4, 14, 25, 26, 27, 23] 31 | CONSTANT = [6, 21, 22, 24] 32 | NOT_GOOD = sorted(CONSTANT+SPIKED) 33 | MASK_FEATURES = np.ones(X.shape[-1], dtype=bool) 34 | MASK_FEATURES[NOT_GOOD] = False 35 | X = X[..., MASK_FEATURES] 36 | feat_order = [f for f, b in zip(features, MASK_FEATURES) if b] 37 | 38 | preds = [] 39 | ground = [] 40 | importances = [] 41 | matrices = [] 42 | r2s = [] 43 | ## run 1000 random splits 44 | for _ in range(1000): 45 | xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=None) 46 | xtrain, ytrain, x_scaler, y_scaler = standardize_data(xtrain, ytrain) 47 | xtest = x_scaler.transform(xtest) 48 | reg = RFM() 49 | reg.fit( 50 | xtrain.detach().numpy(), ytrain.detach().numpy(), reg=1e-3, num_iters=5, 51 | centering=True, verbose=False, diag_only=False, 52 | ) 53 | matrix = reg.get_M() 54 | matrices.append(matrix/matrix.sum()) 55 | M = np.diag(reg.get_M()) 56 | importances.append(M / M.sum()) 57 | temp_preds = y_scaler.inverse_transform(reg.predict(xtest)).ravel() 58 | preds.append(temp_preds) 59 | ground.append(ytest.ravel()) 60 | r2s.append(r2_score(ytest.ravel(), temp_preds)) 61 | 62 | preds = np.concatenate(preds, axis=0) 63 | ground = np.concatenate(ground, axis=0) 64 | 65 | matrix = 0 66 | importance = 0 67 | tot = 0 68 | for i, M in enumerate(matrices): 69 | matrix += M 70 | importance += importances[i] 71 | tot += 1 72 | matrix = matrix/tot 73 | importance = importance/tot 74 | feature_imp = pd.Series(importance, index=feat_order).sort_values(ascending=False) 75 | 76 | plt.rcParams["figure.figsize"] = (10, 9) 77 | ax = sns.barplot(x=feature_imp, y=feature_imp.index, color='royalblue', alpha=.7, edgecolor='black') 78 | ax.tick_params(axis='y', labelsize=18) 79 | ax.tick_params(axis='x', labelsize=18) 80 | plt.tight_layout() 81 | plt.show() 82 | plt.close() -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | .idea/ 161 | 162 | *.png -------------------------------------------------------------------------------- /features.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import rdkit 3 | import rdkit.Chem 4 | import rdkit.Chem.Descriptors 5 | import rdkit.Chem.Fragments 6 | from tqdm import tqdm 7 | 8 | 9 | class MolFeatures: 10 | mol_rep = { 11 | # Count atoms 12 | "C": lambda m: sum([x.GetSymbol() == "C" for x in m.GetAtoms()]), 13 | "N": lambda m: sum([x.GetSymbol() == "N" for x in m.GetAtoms()]), 14 | "O": lambda m: sum([x.GetSymbol() == "O" for x in m.GetAtoms()]), 15 | "H": lambda m: sum([x.GetSymbol() == "H" for x in m.GetAtoms()]), 16 | "S": lambda m: sum([x.GetSymbol() == "S" for x in m.GetAtoms()]), 17 | "F": lambda m: sum([x.GetSymbol() == "F" for x in m.GetAtoms()]), 18 | "Cl": lambda m: sum([x.GetSymbol() == "Cl" for x in m.GetAtoms()]), 19 | # Count bonds 20 | "NumAtoms": lambda m: sum([True for _ in m.GetAtoms()]), 21 | "AtomIsInRing": lambda m: sum([x.IsInRing() for x in m.GetAtoms()]), 22 | "AtomIsAromatic": lambda m: sum([x.GetIsAromatic() for x in m.GetAtoms()]), 23 | "NumBonds": lambda m: sum([True for _ in m.GetBonds()]), 24 | "BondIsConjugated": lambda m: sum([x.GetIsConjugated() for x in m.GetBonds()]), 25 | "BondIsAromatic": lambda m: sum([x.GetIsAromatic() for x in m.GetBonds()]), 26 | "NumRotatableBonds": lambda m: rdkit.Chem.Lipinski.NumRotatableBonds(m), 27 | # Fractions 28 | "fr_Al_COO": lambda m: rdkit.Chem.Fragments.fr_Al_COO(m), 29 | "fr_Ar_COO": lambda m: rdkit.Chem.Fragments.fr_Ar_COO(m), 30 | "fr_Al_OH": lambda m: rdkit.Chem.Fragments.fr_Al_OH(m), 31 | "fr_Ar_OH": lambda m: rdkit.Chem.Fragments.fr_Ar_OH(m), 32 | "fr_C_O_noCOO": lambda m: rdkit.Chem.Fragments.fr_C_O_noCOO(m), 33 | "fr_NH2": lambda m: rdkit.Chem.Fragments.fr_NH2(m), 34 | "fr_SH": lambda m: rdkit.Chem.Fragments.fr_SH(m), 35 | "fr_sulfide": lambda m: rdkit.Chem.Fragments.fr_sulfide(m), 36 | "fr_alkyl_halide": lambda m: rdkit.Chem.Fragments.fr_alkyl_halide(m), 37 | # Descriptors 38 | "ExactMolWt": lambda m: rdkit.Chem.Descriptors.ExactMolWt(m), 39 | "FpDensityMorgan3": lambda m: rdkit.Chem.Descriptors.FpDensityMorgan3(m), 40 | "FractionCSP3": lambda m: rdkit.Chem.Lipinski.FractionCSP3(m), 41 | "MolLogP": lambda m: rdkit.Chem.Crippen.MolLogP(m), 42 | "MolMR": lambda m: rdkit.Chem.Crippen.MolMR(m), 43 | # Custom structures 44 | "has_CN(C)C": lambda m: len(m.GetSubstructMatches(rdkit.Chem.MolFromSmiles("CN(C)C"))), 45 | "has_CNC": lambda m: len(m.GetSubstructMatches(rdkit.Chem.MolFromSmiles("CNC"))), # or HasSubstructMatch() 46 | "has_C=NC": lambda m: len(m.GetSubstructMatches(rdkit.Chem.MolFromSmiles("C=NC"))), 47 | "has_Thiophene": lambda m: len(m.GetSubstructMatches(rdkit.Chem.MolFromSmiles("c1cScc1"))), 48 | "has_Pyrrole": lambda m: len(m.GetSubstructMatches(rdkit.Chem.MolFromSmiles("c1cNcc1"))), 49 | "has_Benzimidazole": lambda m: len(m.GetSubstructMatches(rdkit.Chem.MolFromSmiles("Cn1cnc2ccccc21"))), 50 | "has_Benzothiophene": lambda m: len(m.GetSubstructMatches(rdkit.Chem.MolFromSmiles("c1ccc2sccc2c1"))), 51 | "has_Naphthalene": lambda m: len(m.GetSubstructMatches(rdkit.Chem.MolFromSmiles("c1ccc2ccccc2c1"))), 52 | "has_Biphenyl": lambda m: len(m.GetSubstructMatches(rdkit.Chem.MolFromSmiles("c1ccc(-c2ccccc2)cc1"))) 53 | } 54 | 55 | def __init__(self, descriptor_list: list = None): 56 | default_list = [ 57 | "C", "N", "O", "H", "S", "F", "Cl", 58 | "NumAtoms", "AtomIsInRing", "AtomIsAromatic", 59 | "NumBonds", "BondIsConjugated", "BondIsAromatic", "NumRotatableBonds", 60 | "fr_C_O_noCOO", 61 | # "fr_Al_COO", "fr_Ar_COO", "fr_Al_OH", "fr_Ar_OH", "fr_NH2", 62 | # "fr_SH", "fr_sulfide", "fr_alkyl_halide" 63 | "ExactMolWt", "FpDensityMorgan3", "FractionCSP3", 64 | "MolLogP", "MolMR", 65 | "has_CN(C)C", "has_CNC", "has_C=NC", "has_Thiophene", "has_Pyrrole", "has_Benzimidazole", 66 | "has_Benzothiophene", "has_Naphthalene", "has_Biphenyl" 67 | ] 68 | self.descriptor_list = default_list if descriptor_list is None else descriptor_list 69 | 70 | def __call__(self, mol_list: list): 71 | self.info("Making features:") 72 | feat_list = [] 73 | for i in tqdm(range(len(mol_list))): 74 | feat_list.append(self.map_descriptor(mol_list[i])) 75 | return np.array(feat_list) 76 | 77 | def info(self, *args, **kwargs): 78 | print("INFO:", *args, **kwargs) 79 | 80 | def map_descriptor(self, m): 81 | rep = [self.mol_rep[n](m) for n in self.descriptor_list] 82 | return rep 83 | 84 | 85 | if __name__ == "__main__": 86 | mk = MolFeatures() 87 | print(mk([rdkit.Chem.MolFromSmiles("CC"), rdkit.Chem.MolFromSmiles("CCOS")])) 88 | print(mk.descriptor_list) -------------------------------------------------------------------------------- /GP_models.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import gpytorch 4 | 5 | from torch.optim.lr_scheduler import StepLR 6 | from botorch import fit_gpytorch_model 7 | from botorch.models.gp_regression import SingleTaskGP, FixedNoiseGP 8 | from botorch.models.model_list_gp_regression import ModelListGP 9 | from gpytorch.mlls.sum_marginal_log_likelihood import SumMarginalLogLikelihood 10 | 11 | 12 | 13 | def at_least_2dim(x): 14 | if len(x.shape)<2: 15 | x = x.reshape(-1, 1) 16 | return x 17 | 18 | 19 | class GPR: 20 | def __init__(self, train_x, train_y, noise_free=False): 21 | train_x = at_least_2dim(train_x) 22 | train_y = at_least_2dim(train_y) 23 | if isinstance(train_x, np.ndarray): 24 | train_x, train_y = torch.tensor(train_x).float(), torch.tensor(train_y).float() 25 | self.initialize_model(train_x, train_y, noise_free) 26 | 27 | def initialize_model(self, train_x, train_y, noise_free): 28 | models = [] 29 | for i in range(train_y.shape[-1]): 30 | ## mean_module : ConstantMean, likelihood : GaussianLikelihood with inferred noise level 31 | if noise_free: 32 | train_Yvar = torch.full_like(train_y[..., i : i + 1], 1e-3) 33 | models.append(FixedNoiseGP(train_x, train_y[..., i : i + 1], train_Yvar=train_Yvar, 34 | covar_module=gpytorch.kernels.ScaleKernel( 35 | gpytorch.kernels.RBFKernel( 36 | ard_num_dims=train_x.shape[-1])) 37 | )) 38 | else: 39 | models.append(SingleTaskGP(train_x, train_y[..., i : i + 1], 40 | covar_module=gpytorch.kernels.ScaleKernel( 41 | gpytorch.kernels.RBFKernel( 42 | ard_num_dims=train_x.shape[-1])) 43 | )) 44 | self.model = ModelListGP(*models) 45 | self.mll = SumMarginalLogLikelihood(self.model.likelihood, self.model) 46 | 47 | def fit(self): 48 | fit_gpytorch_model(self.mll) 49 | 50 | def predict(self, x, return_posterior=False, no_grad=True): 51 | if isinstance(x, np.ndarray): 52 | x = torch.tensor(at_least_2dim(x)).float() 53 | if no_grad: 54 | with torch.no_grad(), gpytorch.settings.fast_pred_var(): 55 | posterior = self.model.posterior(x) 56 | else: 57 | posterior = self.model.posterior(x) 58 | if return_posterior: 59 | return posterior 60 | else: 61 | mean, var = posterior.mean, posterior.variance 62 | std = torch.sqrt(var) 63 | return mean, std 64 | 65 | 66 | class MultitaskGPModel(gpytorch.models.ExactGP): 67 | def __init__(self, train_x, train_y, likelihood): 68 | super().__init__(train_x, train_y, likelihood) 69 | self.mean_module = gpytorch.means.MultitaskMean( 70 | gpytorch.means.ConstantMean(), 71 | num_tasks=train_y.shape[-1] 72 | ) 73 | self.covar_module = gpytorch.kernels.MultitaskKernel( 74 | gpytorch.kernels.RBFKernel(ard_num_dims=train_x.shape[-1]), 75 | num_tasks=train_y.shape[-1], 76 | rank=1 77 | ) 78 | 79 | def forward(self, x): 80 | mean_x = self.mean_module(x) 81 | covar_x = self.covar_module(x) 82 | return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x) 83 | 84 | 85 | class MTGPR: 86 | def __init__(self, train_x, train_y): 87 | self.train_x = train_x 88 | self.train_y = train_y 89 | self.likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=train_y.shape[-1]) 90 | self.model = MultitaskGPModel(train_x, train_y, self.likelihood) 91 | self.mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model) 92 | self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.1) # Includes likelihood parameters 93 | self.scheduler = StepLR(self.optimizer, step_size=40, gamma=0.7) 94 | 95 | def fit(self, training_iterations=250): 96 | self.model.train() 97 | self.likelihood.train() 98 | for i in range(training_iterations): 99 | self.optimizer.zero_grad() 100 | output = self.model(self.train_x) 101 | loss = -self.mll(output, self.train_y) 102 | loss.backward() 103 | #print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item())) 104 | self.optimizer.step() 105 | if self.optimizer.param_groups[0]['lr'] > 1e-2: 106 | self.scheduler.step() 107 | if self.optimizer.param_groups[0]['lr'] < 1e-2: 108 | for param_group in self.optimizer.param_groups: 109 | param_group['lr'] = 1e-1 110 | 111 | def predict(self, test_x): 112 | self.model.eval() 113 | self.likelihood.eval() 114 | with torch.no_grad(), gpytorch.settings.fast_pred_var(): 115 | predictions = self.likelihood(self.model(test_x)) 116 | mean = predictions.mean 117 | lower, upper = predictions.confidence_region() 118 | return mean, (upper-lower)/4 119 | 120 | 121 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import rdkit 4 | import torch 5 | import numpy as np 6 | import pandas as pd 7 | import matplotlib.pyplot as plt 8 | 9 | from itertools import product 10 | 11 | from sklearn.preprocessing import StandardScaler 12 | from sklearn.metrics import r2_score 13 | 14 | from reaction import ReactionAB 15 | from features import MolFeatures 16 | from theoretical import TheoSimulation 17 | 18 | 19 | 20 | def at_least_2d(x): 21 | if len(x.shape)<2: 22 | x = x[np.newaxis, ...] 23 | return x 24 | 25 | 26 | ######################################### UPLOAD EXPERIMENTAL AND SIMULATION DATA ###################################################################### 27 | 28 | def produce_mol_dataset(name: str = 'dataset', n_mol : int = 2000, rng = None): 29 | PROBLEMS = {'A263', 'A435', 'A439', 'A440', 'A485', 'A486', 'A518', 'A530', 'A546', 'A630', 'A688', 'A689', 'A690', 'A879', 'A1115'} 30 | LIMIT = 100000 31 | 32 | if rng==None: 33 | rng = np.random.default_rng() 34 | 35 | df = pd.read_excel('./data/Mol_Group_A.xlsx', sheet_name=0, header=0) 36 | As = df.loc[:, 'ID'] 37 | df = pd.read_excel('./data/Mol_Group_B.xlsx', sheet_name=0, header=0) 38 | Bs = df.loc[:, 'ID'] 39 | 40 | As = list(set(As)-PROBLEMS) 41 | n_as = min(len(As), n_mol) 42 | n_bs = min(len(Bs), n_mol) 43 | As = rng.choice(As, n_as, replace=False) 44 | Bs = rng.choice(Bs, n_bs, replace=False) 45 | 46 | samples = [(a, b) for a, b in product(As, Bs) if a not in PROBLEMS] 47 | 48 | if len(samples)>LIMIT: 49 | n_chunks = int(len(samples)/LIMIT) + 1 50 | else: 51 | n_chunks = 1 52 | 53 | mols = [] 54 | data = {} 55 | for i in range(n_chunks): 56 | chunk = samples[i*LIMIT:(i+1)*LIMIT] 57 | mols = ReactionAB().run_combos(chunk) 58 | 59 | keys = [a+b for a,b in chunk] 60 | data = dict(zip(keys, mols)) 61 | 62 | np.save('./data/Mols/'+name+'_chunk'+str(i)+'.npy', data) 63 | 64 | mols = [] 65 | data = {} 66 | 67 | def get_features(mols, samples, use_simulation_data=False): 68 | feature_generator = MolFeatures() 69 | features = feature_generator(mols) 70 | if use_simulation_data: 71 | _, theos = TheoSimulation().labels_for_combos(samples) 72 | features = np.concatenate([features, theos], axis=-1) 73 | return features 74 | 75 | 76 | def generate_trainset(path='./data/dataset.csv', use_simulation=False, objective='PCE', add_labels=None): 77 | df = pd.read_csv(path) 78 | labels = [objective] 79 | if not add_labels is None: 80 | labels += add_labels 81 | targets = df[labels].to_numpy() 82 | if len(targets.shape)<2: 83 | target = target[..., np.newaxis] 84 | samples = [] 85 | for ab in df['AB']: 86 | a, b = ab[1:-1].split(',') 87 | samples.append((a[1:-1], b[2:-1])) 88 | 89 | reaction_engine = ReactionAB(file_name_a="Mol_Group_A.xlsx", file_name_b="Mol_Group_B.xlsx", data_dir_path='data') 90 | mols = reaction_engine.run_combos(samples) 91 | features = get_features(mols, samples=samples, use_simulation_data=use_simulation) 92 | 93 | ### add purity 94 | purity = pd.read_excel('./data/ID-1-purity.xlsx')['purity (%)'].to_numpy()[:len(features)] 95 | features = np.concatenate((features, purity[..., np.newaxis]), axis=-1) 96 | 97 | 98 | # Ignore samples with smaller than 0.2 PCE. 99 | ignore_samples_mask = targets[:, 0] > 0.2 100 | selected_targets = targets[ignore_samples_mask] 101 | selected_features = features[ignore_samples_mask] 102 | return selected_features, selected_targets, np.asarray(samples)[ignore_samples_mask] 103 | 104 | 105 | 106 | ####################################################### PREPROCESSING ##################################################################################### 107 | 108 | def standardize_data(X, y): 109 | x_scaler = StandardScaler(with_std=True, with_mean=True, copy=True).fit(X) 110 | y_scaler = StandardScaler(with_std=True, with_mean=True, copy=True).fit(y) 111 | scaled_x = torch.tensor(x_scaler.transform(X)).float() 112 | scaled_y = torch.tensor(y_scaler.transform(y)).float() 113 | return scaled_x, scaled_y, x_scaler, y_scaler 114 | 115 | 116 | ####################################################### CROSS-VALIDATION ################################################################################### 117 | 118 | 119 | def leave_one_out_crossval(X, y, samples_composition, reject='both'): 120 | samples_composition = np.array([[a, b] for (a, b) in samples_composition]) 121 | for idx, (A, B) in enumerate(samples_composition): 122 | if reject=='both': 123 | mask_seen_fragments = np.logical_and(samples_composition[:, 0]!=A, samples_composition[:, 1]!=B) 124 | elif reject=='A': 125 | mask_seen_fragments = samples_composition[:, 0]!=A 126 | elif reject=='B': 127 | mask_seen_fragments = samples_composition[:, 1]!=B 128 | xtrain = at_least_2d(X[mask_seen_fragments]) 129 | ytrain = at_least_2d(y[mask_seen_fragments]) 130 | xtest = at_least_2d(X[idx]) 131 | ytest = at_least_2d(y[idx]) 132 | yield xtrain, xtest, ytrain, ytest 133 | 134 | 135 | ############################################ PLOTS ########################################### 136 | 137 | def plot_scatter(preds, targets, name='scatter_plot', save=False): 138 | fig, ax = plt.subplots(1, 1, figsize=(6, 6)) 139 | ax.plot( 140 | [np.amin(targets),np.amax(targets)], 141 | [np.amin(targets),np.amax(targets)], 142 | label=r"R$^2$: {0:0.3f}".format(r2_score(targets, preds)), 143 | c="red", 144 | linestyle='--', 145 | ) 146 | ax.scatter(preds, targets, alpha=0.65, c="royalblue", s=80) 147 | ax.legend(loc="upper left", fontsize=14) 148 | ax.set_xlabel("Predicted", fontsize=18) 149 | ax.set_ylabel("True", fontsize=18) 150 | plt.title(f"PCE [%]", fontsize=18) 151 | plt.grid() 152 | plt.tight_layout() 153 | if save: 154 | plt.savefig(f"{name}.png", bbox_inches = 'tight', pad_inches=0.1) 155 | else: 156 | plt.show() 157 | plt.close() 158 | 159 | -------------------------------------------------------------------------------- /RFM_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import solve 3 | from sklearn.base import BaseEstimator 4 | import time 5 | from tqdm import tqdm 6 | 7 | ### RFM code taken from https://github.com/aradha/recursive_feature_machines (Adityanarayanan Radhakrishnan) 8 | 9 | def euclidean_distances(samples, centers, M=None, squared=True): 10 | if M is None: 11 | samples_norm = np.sum(samples**2, axis=1, keepdims=True) 12 | else: 13 | samples_norm = (samples @ M) * samples 14 | samples_norm = np.sum(samples_norm, axis=1, keepdims=True) 15 | if samples is centers: 16 | centers_norm = samples_norm 17 | else: 18 | if M is None: 19 | centers_norm = np.sum(centers**2, axis=1, keepdims=True) 20 | else: 21 | centers_norm = (centers @ M) * centers 22 | centers_norm = np.sum(centers_norm, axis=1, keepdims=True) 23 | 24 | centers_norm = np.reshape(centers_norm, (1, -1)) 25 | distances = samples @ (M @ centers.T) 26 | distances *= -2 27 | distances = distances + samples_norm + centers_norm 28 | if not squared: 29 | distances = np.where(distances < 0, 0, distances) 30 | distances = np.sqrt(distances) 31 | 32 | return distances 33 | 34 | def laplace_kernel(samples, centers, bandwidth, M=None): 35 | assert bandwidth > 0 36 | kernel_mat = euclidean_distances(samples, centers, M=M, squared=False) 37 | kernel_mat = np.where(kernel_mat < 0, 0, kernel_mat) 38 | gamma = 1. / bandwidth 39 | kernel_mat *= -gamma 40 | kernel_mat = np.exp(kernel_mat) 41 | return kernel_mat 42 | 43 | 44 | def get_grads(X, sol, L, P, max_num_samples=20000, centering=True): 45 | indices = np.random.randint(len(X), size=max_num_samples) 46 | 47 | if len(X) > len(indices): 48 | x = X[indices, :] 49 | else: 50 | x = X 51 | 52 | K = laplace_kernel(X, x, L, M=P) 53 | 54 | dist = euclidean_distances(X, x, M=P, squared=False) 55 | dist = np.where(dist < 1e-10, 0, dist) 56 | 57 | with np.errstate(divide='ignore'): 58 | K = K/dist 59 | 60 | K[K == float("Inf")] = 0. 61 | 62 | a1 = sol.T 63 | n, d = X.shape 64 | n, c = a1.shape 65 | m, d = x.shape 66 | 67 | a1 = a1.reshape(n, c, 1) 68 | 69 | X1 = (X @ P).reshape(n, 1, d) 70 | 71 | step1 = a1 @ X1 72 | del a1, X1 73 | step1 = step1.reshape(-1, c*d) 74 | 75 | step2 = K.T @ step1 76 | del step1 77 | 78 | step2 = step2.reshape(-1, c, d) 79 | 80 | a2 = sol 81 | step3 = (a2 @ K).T 82 | 83 | del K, a2 84 | 85 | step3 = step3.reshape(m, c, 1) 86 | x1 = (x @ P).reshape(m, 1, d) 87 | step3 = step3 @ x1 88 | 89 | G = (step2 - step3) * -1/L 90 | if centering: 91 | G_mean = np.expand_dims(np.mean(G, axis=0), axis=0) 92 | G = G - G_mean 93 | return G 94 | 95 | def egop(G, verbose=False, diag_only=False): 96 | M = 0. 97 | chunks = len(G) // 20 + 1 98 | batches = np.array_split(G, chunks) 99 | if verbose: 100 | for i in tqdm(range(len(batches))): 101 | grad = batches[i] 102 | gradT = np.swapaxes(grad, 1, 2) 103 | if diag_only: 104 | T = np.sum(gradT * gradT, axis=-1) 105 | M += np.sum(T, axis=0) 106 | else: 107 | M += np.sum(gradT @ grad, axis=0) 108 | del grad, gradT 109 | else: 110 | for i in range(len(batches)): 111 | grad = batches[i] 112 | gradT = np.swapaxes(grad, 1, 2) 113 | if diag_only: 114 | T = np.sum(gradT * gradT, axis=-1) 115 | M += np.sum(T, axis=0) 116 | else: 117 | M += np.sum(gradT @ grad, axis=0) 118 | del grad, gradT 119 | M /= len(G) 120 | if diag_only: 121 | M = np.diag(M) 122 | return M 123 | 124 | class RFM(BaseEstimator): 125 | def __init__(self, kernel="laplace"): 126 | self.kernel=kernel 127 | self.X_train = None 128 | self.alphas = None 129 | self.M = None 130 | self.L = None 131 | self.reg = None 132 | 133 | def fit(self, X_train, y_train, reg=1e-3, bandwidth=10, num_iters=5, 134 | M=None, centering=True, verbose=False, diag_only=False): 135 | self.X_train = X_train 136 | n, d = X_train.shape 137 | if M is None: 138 | M = np.eye(d) 139 | self.M = M 140 | self.L = bandwidth 141 | self.reg = reg 142 | 143 | for iter_idx in range(num_iters): 144 | if verbose: 145 | print("Starting Iteration: " + str(iter_idx)) 146 | start = time.time() 147 | K_train = laplace_kernel(X_train, X_train, self.L, M=M) 148 | sol = solve(K_train + reg * np.eye(n), y_train).T 149 | end = time.time() 150 | if verbose: 151 | print("Solved Kernel Regression in " + str(end - start) + " seconds.") 152 | self.alphas = sol 153 | 154 | start = time.time() 155 | G = get_grads(X_train, self.alphas, self.L, M, centering=centering) 156 | end = time.time() 157 | if verbose: 158 | print("Computed Gradients in " + str(end - start) + " seconds.") 159 | 160 | start = time.time() 161 | M = egop(G, verbose=verbose, diag_only=diag_only) 162 | end = time.time() 163 | if verbose: 164 | print("Computed EGOP in " + str(end - start) + " seconds.") 165 | print("===============================================================") 166 | self.M = M 167 | 168 | start = time.time() 169 | K_train = laplace_kernel(X_train, X_train, self.L, M=M) 170 | sol = solve(K_train + reg * np.eye(n), y_train).T 171 | end = time.time() 172 | if verbose: 173 | print("Solved Final Kernel Regression in " + str(end - start) + " seconds.") 174 | self.alphas = sol 175 | return self 176 | 177 | def predict(self, X_test): 178 | L = self.L 179 | M = self.M 180 | K_test = laplace_kernel(self.X_train, X_test, L, M=M) 181 | preds = (self.alphas @ K_test).T 182 | return preds 183 | 184 | def get_alphas(self): 185 | return self.alphas 186 | 187 | def get_M(self): 188 | return self.M 189 | -------------------------------------------------------------------------------- /main_model_selection.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import rdkit 3 | import keras as ks 4 | import rdkit.Chem 5 | import rdkit.Chem.AllChem 6 | import rdkit.Chem.Draw 7 | import pandas as pd 8 | import sys 9 | import json 10 | import torch 11 | import gpytorch 12 | from torch.optim.lr_scheduler import StepLR 13 | import tensorflow as tf 14 | import matplotlib.pyplot as plt 15 | from sklearn.linear_model import LinearRegression 16 | from sklearn.preprocessing import StandardScaler 17 | from sklearn.model_selection import KFold 18 | from sklearn.gaussian_process.kernels import ( 19 | ConstantKernel, RBF, DotProduct, WhiteKernel, Matern, Exponentiation, ExpSineSquared, RationalQuadratic) 20 | from sklearn.gaussian_process import GaussianProcessRegressor 21 | from sklearn.metrics import r2_score 22 | from sklearn.kernel_ridge import KernelRidge 23 | from sklearn.ensemble import RandomForestRegressor 24 | from reaction import ReactionAB 25 | # from experiment import ExperimentalDataset 26 | from features import MolFeatures 27 | from theoretical import TheoSimulation 28 | 29 | seed = 42 30 | np.random.seed(seed) 31 | # ks.utils.set_random_seed(seed) 32 | 33 | sys.path.append("..") 34 | 35 | use_simulation_data = True 36 | 37 | label_names = ["PCE", "Jsc", "Voc", "FF"] 38 | new_data = pd.read_csv("data/dataset.csv") 39 | 40 | # For the model selection only the first initial data is availbable 41 | new_data = new_data.iloc[:100, :] 42 | 43 | samples, targets = [x.replace("(", "").replace(")", "").replace("'", "").replace(" ", "").split(",") for 44 | x in new_data["AB"].values], new_data.loc[:, label_names].values 45 | 46 | feat_cols = [ 47 | "C", 48 | "N", 49 | "O", 50 | "H", 51 | "S", 52 | "F", 53 | # "Cl", 54 | "NumAtoms", 55 | "AtomIsInRing", 56 | "AtomIsAromatic", 57 | "NumBonds", 58 | "BondIsConjugated", 59 | "BondIsAromatic", 60 | "NumRotatableBonds", 61 | # "fr_C_O_noCOO", 62 | # "fr_Al_COO", 63 | # "fr_Ar_COO", 64 | # "fr_Al_OH", 65 | # "fr_Ar_OH", 66 | # "fr_NH2", 67 | # "fr_SH", 68 | # "fr_sulfide", 69 | # "fr_alkyl_halide" 70 | "ExactMolWt", 71 | "FpDensityMorgan3", 72 | "MolLogP", 73 | "MolMR", 74 | "FractionCSP3", 75 | "has_CN(C)C", 76 | # "has_cnc", 77 | # "has_C=NC", 78 | # "has_Thiophene", 79 | # "has_Pyrrole", 80 | # "has_Benzimidazole", 81 | # "has_Benzothiophene", 82 | # "has_Naphthalene", 83 | "has_Biphenyl" 84 | ] 85 | theo_cols = [ 86 | "dipole", 87 | "homo", 88 | "lumo", 89 | "gap", 90 | "energy", 91 | "a", 92 | "b", 93 | "c" 94 | ] 95 | 96 | # Make reaction 97 | reaction_engine = ReactionAB(file_name_a="Mol_Group_A.xlsx", file_name_b="Mol_Group_B.xlsx", 98 | data_dir_path="./data") 99 | mols = reaction_engine.run_combos(samples) 100 | # reaction_engine.draw_to_pdf_products(samples) 101 | # reaction_engine.save_to_mol_folder_for_simulation(samples, make_conformers=True, optimize_conformer=True, add_hydrogen=True) 102 | 103 | # Make features 104 | feature_generator = MolFeatures(descriptor_list=feat_cols) 105 | features = feature_generator(mols) 106 | 107 | if use_simulation_data: 108 | theo_generator = TheoSimulation(file_name="Theo_simu.xlsx", descriptor_list=theo_cols, data_dir_path="./data") 109 | _, theos = theo_generator.labels_for_combos(samples) 110 | features = np.concatenate([features, theos], axis=-1) 111 | 112 | columns = {"ID": ["".join(x) for x in samples]} 113 | columns.update({x: features[:, i] for i, x in enumerate(feat_cols)}) 114 | if use_simulation_data: 115 | columns.update({x: theos[:, i] for i, x in enumerate(theo_cols)}) 116 | columns.update({x: targets[:, i] for i, x in enumerate(label_names)}) 117 | columns.update({"MolFormular": [rdkit.Chem.rdMolDescriptors.CalcMolFormula(m) for m in mols]}) 118 | 119 | frame = pd.DataFrame(columns) 120 | frame.to_excel("data/MolFeatures.xlsx", index=False) 121 | 122 | # Ignore samples with smaller than 0.2 PCE. 123 | ignore_samples_mask = targets[:, 2] > 0.2 124 | 125 | selected_targets = targets[ignore_samples_mask] 126 | selected_features = features[ignore_samples_mask] 127 | 128 | # Std scaling needs to be replaced with better scaler. 129 | y_scaler = StandardScaler(with_std=True, with_mean=True, copy=True) 130 | x_scaler = StandardScaler(with_std=True, with_mean=True, copy=True) 131 | 132 | scaled_targets = y_scaler.fit_transform(selected_targets) 133 | scaled_features = x_scaler.fit_transform(selected_features) 134 | 135 | # For validation, use a KFold() split. 136 | kf = KFold(n_splits=10, random_state=None, shuffle=True) 137 | split_indices = kf.split(X=scaled_features) 138 | 139 | print("Fitting model to data...") 140 | 141 | fit_stats = [] 142 | validation_stats = [] 143 | models_fitted = { 144 | "Gauss": [], 145 | "Kernel": [], 146 | "NN": [], 147 | "Linear": [], 148 | "RF": [] 149 | } 150 | 151 | for train_index, test_index in split_indices: 152 | ytrain = scaled_targets[train_index] 153 | ytest = scaled_targets[test_index] 154 | xtrain = scaled_features[train_index] 155 | xtest = scaled_features[test_index] 156 | 157 | for model_type in models_fitted.keys(): 158 | print(model_type) 159 | 160 | if model_type == "Linear": 161 | model = LinearRegression().fit(xtrain, ytrain) 162 | 163 | if model_type == "Gauss": 164 | kernel = Matern(length_scale=1.0) + WhiteKernel() + DotProduct() 165 | model = GaussianProcessRegressor(kernel=kernel, random_state=0).fit(xtrain, ytrain) 166 | 167 | elif model_type == "MGP": 168 | 169 | class MultitaskGPModel(gpytorch.models.ExactGP): 170 | def __init__(self, train_x, train_y, likelihood): 171 | super().__init__(train_x, train_y, likelihood) 172 | self.mean_module = gpytorch.means.MultitaskMean( 173 | gpytorch.means.ConstantMean(), 174 | num_tasks=train_y.shape[-1] 175 | ) 176 | self.covar_module = gpytorch.kernels.MultitaskKernel( 177 | gpytorch.kernels.RBFKernel(ard_num_dims=train_x.shape[-1]), 178 | num_tasks=train_y.shape[-1], 179 | rank=1 180 | ) 181 | 182 | def forward(self, x): 183 | mean_x = self.mean_module(x) 184 | covar_x = self.covar_module(x) 185 | return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x) 186 | 187 | 188 | class MTGPR: 189 | def __init__(self, train_x, train_y): 190 | if isinstance(train_x, np.ndarray): 191 | self.train_x = torch.tensor(train_x).float() 192 | else: 193 | self.train_x = train_x 194 | if isinstance(train_y, np.ndarray): 195 | self.train_y = torch.tensor(train_y).float() 196 | else: 197 | self.train_y = train_y 198 | self.likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=train_y.shape[-1]) 199 | self.model = MultitaskGPModel(self.train_x, self.train_y, self.likelihood) 200 | self.mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model) 201 | self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.1) # Includes likelihood parameters 202 | self.scheduler = StepLR(self.optimizer, step_size=40, gamma=0.7) 203 | 204 | def fit(self, training_iterations=250): 205 | self.model.train() 206 | self.likelihood.train() 207 | for i in range(training_iterations): 208 | self.optimizer.zero_grad() 209 | output = self.model(self.train_x) 210 | loss = -self.mll(output, self.train_y) 211 | loss.backward() 212 | # print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item())) 213 | self.optimizer.step() 214 | if self.optimizer.param_groups[0]['lr'] > 1e-2: 215 | self.scheduler.step() 216 | if self.optimizer.param_groups[0]['lr'] < 1e-2: 217 | for param_group in self.optimizer.param_groups: 218 | param_group['lr'] = 1e-1 219 | 220 | def predict(self, test_x, return_std=False): 221 | if isinstance(test_x, np.ndarray): 222 | torch_test_x = torch.tensor(test_x).float() 223 | else: 224 | torch_test_x = test_x 225 | self.model.eval() 226 | self.likelihood.eval() 227 | with torch.no_grad(), gpytorch.settings.fast_pred_var(): 228 | predictions = self.likelihood(self.model(torch_test_x)) 229 | mean = predictions.mean 230 | if return_std: 231 | lower, upper = predictions.confidence_region() 232 | return mean, (upper - lower) / 4 233 | else: 234 | return mean 235 | 236 | print("Fitting model...") 237 | model = MTGPR(xtrain, ytrain) 238 | model.fit() 239 | 240 | elif model_type == "Kernel": 241 | params = {'alpha': 0.1, 'gamma': None, 'kernel': 'laplacian'} 242 | model = KernelRidge(**params).fit(xtrain, ytrain) 243 | 244 | elif model_type == "RF": 245 | params = {} 246 | model = RandomForestRegressor(**params).fit(xtrain, ytrain) 247 | 248 | else: 249 | model = ks.Sequential() 250 | model.add(ks.layers.Input(shape=[features.shape[-1]])) 251 | model.add(ks.layers.BatchNormalization()) 252 | model.add(ks.layers.Dense(100, 253 | activation=ks.layers.LeakyReLU(alpha=0.05), 254 | kernel_regularizer=ks.regularizers.L1(1e-8))) 255 | model.add(ks.layers.Dense(100, 256 | activation=ks.layers.LeakyReLU(alpha=0.05), 257 | kernel_regularizer=ks.regularizers.L1(1e-8))) 258 | model.add(ks.layers.Dense(ytrain.shape[-1])) 259 | 260 | model.compile(optimizer=ks.optimizers.Adam(learning_rate=1e-4), 261 | loss='mae', metrics=['mae', 'mse', 'mape']) 262 | 263 | history = model.fit( 264 | x=xtrain, y=ytrain, 265 | batch_size=16, epochs=100, verbose=1, callbacks=None, shuffle=True, 266 | validation_data=(xtest, ytest)) 267 | fit_stats.append(history) 268 | 269 | predicted = model.predict(xtest) 270 | predicted = y_scaler.inverse_transform(predicted) 271 | test_labels_rescaled = y_scaler.inverse_transform(ytest) 272 | models_fitted[model_type].append([predicted, test_labels_rescaled]) 273 | 274 | 275 | # Plot the GP results 276 | validation_stats = models_fitted["Gauss"] 277 | r2_stats = [ 278 | r2_score(np.concatenate([x[1][:, i] for x in validation_stats], axis=0), 279 | np.concatenate([x[0][:, i] for x in validation_stats], axis=0)) for 280 | i in range(scaled_targets.shape[-1])] 281 | # r2_stats = [r2_score(x[1],x[0]) for x in validation_stats] # Not stable if r2 < 0 282 | mae_stats = [np.mean(np.abs(x[1] - x[0]), axis=0) for x in validation_stats] 283 | 284 | fig, axsg = plt.subplots(1, 4, figsize=(17.25, 3.75)) 285 | titles = ["PCE [%]", r"J$_{sc}$ [mA/cm$^2$]", "Voc [V]", "FF [%]"] 286 | axs = axsg.flatten() 287 | for j in range(scaled_targets.shape[-1]): 288 | axs[j].plot([np.amin(selected_targets[:, j]), np.amax(selected_targets[:, j])], 289 | [np.amin(selected_targets[:, j]), np.amax(selected_targets[:, j])], "--", 290 | label=r"r$^2$: {0:0.3f}, MAE: {1:0.4f} $\pm$ {2:0.3f}".format(r2_stats[j], 291 | np.mean(mae_stats, axis=0)[j], 292 | np.std(mae_stats, axis=0)[j]), 293 | c="indianred") 294 | for i in range(len(validation_stats)): 295 | x_pred, y_actual = validation_stats[i] 296 | axs[j].scatter(x_pred[:, j], y_actual[:, j], alpha=0.65, c="royalblue") 297 | 298 | axs[j].grid(True) 299 | axs[j].set_title(titles[j]) 300 | axs[j].legend(loc="lower left") 301 | axs[j].set_xlabel("Predicted") 302 | if j == 0: 303 | axs[j].set_ylabel("True") 304 | 305 | axs[0].text( 306 | -0.2, 1.0, 'C', fontsize=18, weight="bold", 307 | transform=axs[0].transAxes, 308 | ) 309 | plt.savefig("GP_labels.png", bbox_inches='tight', pad_inches=0.1) 310 | plt.show() 311 | 312 | # Plot training curve 313 | plt.figure() 314 | for x in fit_stats: 315 | plt.plot(np.array(x.history["mae"]) * np.mean(y_scaler.scale_), c="blue") 316 | for x in fit_stats: 317 | plt.plot(np.array(x.history["val_mae"]) * np.mean(y_scaler.scale_), c="red") 318 | plt.xlabel('ML Prediction') 319 | plt.ylabel('Measured device property') 320 | plt.savefig("learning_curve_nn.png", bbox_inches='tight', pad_inches=0.1) 321 | plt.show() 322 | 323 | # Plot models 324 | fig, axsg = plt.subplots(1, 5, figsize=(18.25, 3.75), sharey=True) 325 | fig.subplots_adjust(wspace=0.) 326 | axs = axsg.flatten() 327 | full_title = {"Gauss": "Gaussian Process", "Kernel": "Kernel Ridge", "NN": "Neural Network", "Linear": "Linear", 328 | "RF": "Random Forest"} 329 | for j, model_type in enumerate(models_fitted): 330 | 331 | validation_stats = models_fitted[model_type] 332 | r2_stats = [ 333 | r2_score(np.concatenate([x[1][:, i] for x in validation_stats], axis=0), 334 | np.concatenate([x[0][:, i] for x in validation_stats], axis=0)) for 335 | i in range(scaled_targets.shape[-1])] 336 | # r2_stats = [r2_score(x[1],x[0]) for x in validation_stats] # Not stable if r2 < 0 337 | mae_stats = [np.mean(np.abs(x[1] - x[0]), axis=0) for x in validation_stats] 338 | 339 | axs[j].plot([np.amin(selected_targets[:, 0]), np.amax(selected_targets[:, 0])], 340 | [np.amin(selected_targets[:, 0]), np.amax(selected_targets[:, 0])], "--", 341 | label=r"r$^2$: {0:0.3f}, MAE: {1:0.4f} $\pm$ {2:0.3f}".format(r2_stats[0], 342 | np.mean(mae_stats, axis=0)[0], 343 | np.std(mae_stats, axis=0)[0]), 344 | c="indianred") 345 | for i in range(len(validation_stats)): 346 | x_pred, y_actual = validation_stats[i] 347 | axs[j].scatter(x_pred[:, 0], y_actual[:, 0], alpha=0.65, c="royalblue") 348 | 349 | axs[j].grid(True) 350 | axs[j].set_title(full_title[model_type]) 351 | axs[j].legend(loc="lower left") 352 | axs[j].set_xlabel("Predicted PCE [%]") 353 | if j==0: 354 | axs[j].set_ylabel("True PCE [%]") 355 | 356 | axs[0].text( 357 | -0.2, 1.0, 'B', fontsize=18, weight="bold", 358 | transform=axs[0].transAxes, 359 | ) 360 | 361 | plt.savefig("all_models.png", bbox_inches='tight', pad_inches=0.1) 362 | plt.show() 363 | 364 | -------------------------------------------------------------------------------- /reaction.py: -------------------------------------------------------------------------------- 1 | import rdkit 2 | import rdkit.Chem 3 | import rdkit.Chem.AllChem 4 | import rdkit.Chem.Draw 5 | from rdkit.Chem.Draw import rdMolDraw2D 6 | import os 7 | from rdkit.Chem import rdChemReactions 8 | import pandas as pd 9 | import yaml 10 | import json 11 | import numpy as np 12 | from PIL import Image, ImageDraw, ImageFont 13 | from io import BytesIO 14 | from concurrent.futures import ThreadPoolExecutor 15 | from tqdm import tqdm 16 | 17 | 18 | class ReactionAB: 19 | smarts = "[#6:1]-[$([B](-O)(-O)),$([B](-F)(-F)(-F))].[#6,#7:2]-[I,Br]>>[*:1]-[*:2]" 20 | rxn = rdChemReactions.ReactionFromSmarts(smarts) 21 | conf_folder = "ExpToSimulate" 22 | 23 | def __init__(self, 24 | file_name_a: str = "Mol_Group_A_v5.xlsx", 25 | file_name_b: str = "Mol_Group_B_v4.xlsx", 26 | data_dir_path: str = "database", 27 | id_column: str = "ID", 28 | smile_column: str = "Smiles"): 29 | """Initialize class that makes reaction. 30 | 31 | 32 | Args: 33 | file_name_a (str): File path to database for reactants A. 34 | file_name_b (str): File path to database for reactants B. 35 | data_dir_path (str): (Relative) path to database directory. 36 | """ 37 | self.data_path = os.path.realpath(data_dir_path) 38 | self.file_path_a = os.path.join(data_dir_path, file_name_a) 39 | self.file_path_b = os.path.join(data_dir_path, file_name_b) 40 | self.info("Reading excel files.") 41 | self.data_a = pd.read_excel(self.file_path_a, sheet_name=0, header=0) 42 | self.data_b = pd.read_excel(self.file_path_b, sheet_name=0, header=0) 43 | self.info("Reading structures.") 44 | mol_a = self.load_json_file( 45 | os.path.splitext(self.file_path_a)[0] + ".json") 46 | self.mol_a = {key: rdkit.Chem.MolFromMolBlock(value, removeHs=False) for key, value in mol_a.items()} 47 | mol_b = self.load_json_file( 48 | os.path.splitext(self.file_path_b)[0] + ".json") 49 | self.mol_b = {key: rdkit.Chem.MolFromMolBlock(value, removeHs=False) for key, value in mol_b.items()} 50 | self._id_column = id_column 51 | self._smile_column = smile_column 52 | 53 | def info(self, *args, **kwargs): 54 | print("INFO:", *args, **kwargs) 55 | 56 | def error(self, *args, **kwargs): 57 | print("INFO:", *args, **kwargs) 58 | 59 | def _log(self, *args, **kwargs): 60 | print(*args, **kwargs) 61 | 62 | @staticmethod 63 | def load_json_file(fname): 64 | with open(fname, 'r') as json_file: 65 | file_read = json.load(json_file) 66 | return file_read 67 | 68 | @staticmethod 69 | def count_halogen(m, rgs: list = ['I', 'Br']): 70 | counts = 0 71 | for a in m.GetAtoms(): 72 | if str(a.GetSymbol()) in rgs: 73 | counts = counts + 1 74 | return counts 75 | 76 | @staticmethod 77 | def count_B_reacts(m, rgs: list = ["OBO", "FB(F)F"]): 78 | groups = [rdkit.Chem.MolFromSmiles(x) for x in rgs] 79 | counts = 0 80 | for x in groups: 81 | counts = counts + len(m.GetSubstructMatches(x)) 82 | return counts 83 | 84 | @staticmethod 85 | def count_A_reacts(m, rgs: list = ["CBr", "NBr", "CI", "NI"]): 86 | groups = [rdkit.Chem.MolFromSmiles(x) for x in rgs] 87 | counts = 0 88 | for x in groups: 89 | counts = counts + len(m.GetSubstructMatches(x)) 90 | return counts 91 | 92 | def check_reactive_groups(self): 93 | # Check individual groups 94 | ra_groups = [] 95 | rb_groups = [] 96 | rh_groups = [] 97 | for key, value in self.mol_a.items(): 98 | ra_groups.append(self.count_A_reacts(value)) 99 | rb_groups.append(self.count_B_reacts(value)) 100 | rh_groups.append(self.count_halogen(value)) 101 | self.info("Counts for X-Br in A:", 102 | {x: y for x, y in zip(*np.unique(ra_groups, return_counts=True))}) 103 | self.info("Counts for X-B(O)O in A:", 104 | {x: y for x, y in zip(*np.unique(rb_groups, return_counts=True))}) 105 | self.info("Compare additional Br's, miss:", 106 | (np.array(ra_groups) - np.array(rh_groups))[(np.array(ra_groups) != np.array(rh_groups))]) 107 | 108 | ra_groups = [] 109 | rb_groups = [] 110 | for key, value in self.mol_b.items(): 111 | ra_groups.append(self.count_A_reacts(value)) 112 | rb_groups.append(self.count_B_reacts(value)) 113 | self.info("Counts for X-Br in B", 114 | {x: y for x, y in zip(*np.unique(ra_groups, return_counts=True))}) 115 | self.info("Counts for X-B(O)O in B", 116 | {x: y for x, y in zip(*np.unique(rb_groups, return_counts=True))}) 117 | 118 | @staticmethod 119 | def remove_free_water(m, info: str = None): 120 | water = rdkit.Chem.MolFromSmiles("O") 121 | water = rdkit.Chem.AddHs(water) # important! 122 | has_removed = False 123 | for _ in m.GetAtoms(): # Remove multiple times but atmost #Atoms 124 | if m.HasSubstructMatch(water): 125 | m = rdkit.Chem.DeleteSubstructs(m, water) 126 | has_removed = True 127 | else: 128 | break 129 | if has_removed and info is not None: 130 | print("Removed free water from:", info) 131 | return m 132 | 133 | @staticmethod 134 | def remove_free_acid(m, info=None): 135 | hcl = rdkit.Chem.MolFromSmiles("Cl") 136 | hcl = rdkit.Chem.AddHs(hcl) # important! 137 | hbr = rdkit.Chem.MolFromSmiles("Br") 138 | hbr = rdkit.Chem.AddHs(hbr) # important! 139 | has_removed = False 140 | for _ in m.GetAtoms(): # Remove multiple times but atmost #Atoms 141 | if m.HasSubstructMatch(hcl): 142 | m = rdkit.Chem.DeleteSubstructs(m, hcl) 143 | has_removed = True 144 | elif m.HasSubstructMatch(hbr): 145 | m = rdkit.Chem.DeleteSubstructs(m, hbr) 146 | has_removed = True 147 | else: 148 | break 149 | if has_removed and info is not None: 150 | print("Removed free HCl, HBr from:", info) 151 | return m 152 | 153 | def run_reaction(self, ida: str, idb: str, 154 | add_hydrogen: bool = True, 155 | sanitize: bool = True, 156 | make_conformers: bool = False, 157 | optimize_conformer: bool = False, 158 | useRandomCoords: bool = True, 159 | maxAttempts: int = 100, 160 | randomSeed: int = -1): 161 | rxn = self.rxn 162 | is_valid = True 163 | 164 | m1 = self.mol_a[ida] 165 | m2 = self.mol_b[idb] 166 | 167 | c1a = self.count_A_reacts(m1) 168 | c1b = self.count_B_reacts(m1) 169 | c2a = self.count_A_reacts(m2) 170 | c2b = self.count_B_reacts(m2) 171 | 172 | if c1a == 0 or c2b == 0: 173 | self.error("Error: missing reactive group for", ida, idb) 174 | is_valid = False 175 | if c1b > 0 or c2a > 0: 176 | self.error( 177 | "Error: X-Br group in B or Y-B(O)O in group A for", ida, idb) 178 | is_valid = False 179 | 180 | if not is_valid: 181 | return 182 | 183 | run_racts = 0 184 | prod = m1 185 | for _ in range(c1a): 186 | reacts = (m2, prod) 187 | products = rxn.RunReactants(reacts) 188 | if len(products) > 0: 189 | prod = products[0][0] 190 | run_racts += 1 191 | rdkit.Chem.SanitizeMol(prod) 192 | if run_racts != c1a: 193 | self.error("Error: expected additional reaction for", ida, idb) 194 | 195 | # Finished product 196 | m = prod 197 | if sanitize: 198 | rdkit.Chem.SanitizeMol(m) 199 | if add_hydrogen: 200 | m = rdkit.Chem.AddHs(m) 201 | if make_conformers: 202 | rdkit.Chem.RemoveStereochemistry(m) 203 | rdkit.Chem.AssignStereochemistry(m) 204 | rdkit.Chem.AllChem.EmbedMolecule( 205 | m, randomSeed=randomSeed, maxAttempts=maxAttempts, 206 | useRandomCoords=useRandomCoords) 207 | if optimize_conformer and make_conformers: 208 | rdkit.Chem.AllChem.MMFFOptimizeMolecule(m) 209 | rdkit.Chem.AssignAtomChiralTagsFromStructure(m) 210 | rdkit.Chem.AssignStereochemistryFrom3D(m) 211 | rdkit.Chem.AssignStereochemistry(m) 212 | m.SetProp("_Name", ida + idb) 213 | 214 | return m 215 | 216 | def __getitem__(self, key: tuple): 217 | assert len(key) == 2, "Provide ['A...', 'B...'] keys" 218 | return self.run_reaction(key[0], key[1]) 219 | 220 | def show_reaction(self, ida, idb): 221 | m1 = self.mol_a[ida] 222 | m2 = self.mol_b[idb] 223 | prod = self.run_reaction(ida, idb) 224 | sm1 = rdkit.Chem.MolToSmarts(rdkit.Chem.RemoveHs(m1), isomericSmiles=True) 225 | sm2 = rdkit.Chem.MolToSmarts(rdkit.Chem.RemoveHs(m2), isomericSmiles=True) 226 | res = rdkit.Chem.MolToSmarts(rdkit.Chem.RemoveHs(prod), isomericSmiles=True) 227 | cr = rdkit.Chem.AllChem.ReactionFromSmarts(sm1 + "." + sm2 + ">>" + res) 228 | img = rdkit.Chem.Draw.ReactionToImage(cr) 229 | return img 230 | 231 | def all_combos_possible(self): 232 | combos = [] 233 | for x in self.mol_a.keys(): 234 | for y in self.mol_b.keys(): 235 | combos.append((x, y)) 236 | return combos 237 | 238 | def run_combos(self, combos, **kwargs): 239 | mol_combos = [] # List here is better 240 | for i in tqdm(range(len(combos))): 241 | mol_combos.append(self.run_reaction(combos[i][0], combos[i][1], 242 | **kwargs)) 243 | return mol_combos 244 | 245 | def run_all_combos(self, num_workers=1, batch_size=1000, **kwargs): 246 | combos = self.all_combos_possible() 247 | mol_combos = [] # List here is better 248 | 249 | def wrapp_run_kwargs(kwargs): 250 | return self.run_reaction(**kwargs) 251 | 252 | if num_workers == 1: 253 | for i in tqdm(range(len(combos))): 254 | mol_combos.append(self.run_reaction(combos[i][0], combos[i][1], **kwargs)) 255 | else: 256 | self.info("Start parallel reactions:", flush=True) 257 | arg_list = [{"ida": x, "idb": y} for x, y in combos] 258 | for x in arg_list: 259 | x.update(kwargs) 260 | for i in range(0, len(arg_list), batch_size): 261 | with ThreadPoolExecutor(max_workers=num_workers) as executor: 262 | result = executor.map(wrapp_run_kwargs, arg_list[i:i + batch_size]) 263 | mol_combos.append(list(result)) 264 | self.info("Finished {} of {}".format(i + batch_size, len(arg_list))) 265 | return mol_combos 266 | 267 | @staticmethod 268 | def MolsToGridImageZoomed(mols, molsPerRow=3, subImgSize=(100, 100), legends=None, 269 | grid=True, 270 | **kwargs): 271 | nRows = len(mols) // molsPerRow 272 | if len(mols) % molsPerRow: nRows += 1 273 | fullSize = (molsPerRow * subImgSize[0], nRows * subImgSize[1]) 274 | full_image = Image.new('RGBA', fullSize) 275 | dy = subImgSize[1] 276 | dx = subImgSize[0] 277 | for ii, mol in enumerate(mols): 278 | if mol.GetNumConformers() == 0: 279 | rdkit.Chem.AllChem.Compute2DCoords(mol) 280 | column = ii % molsPerRow 281 | row = ii // molsPerRow 282 | offset = (column * subImgSize[0], row * subImgSize[1]) 283 | d2d = rdMolDraw2D.MolDraw2DCairo(subImgSize[0], subImgSize[1]) 284 | d2d.DrawMolecule(mol) 285 | d2d.FinishDrawing() 286 | sub = Image.open(BytesIO(d2d.GetDrawingText())) 287 | full_image.paste(sub, box=offset) 288 | 289 | fnt = ImageFont.truetype("arial", size=12) 290 | txt = Image.new("RGBA", full_image.size, (255, 255, 255, 0)) 291 | d = ImageDraw.Draw(txt) 292 | if legends is not None: 293 | for ii, mol in enumerate(mols): 294 | column = ii % molsPerRow 295 | row = ii // molsPerRow 296 | offset = ((column + 0.5) * dx, (row + 1) * dy) 297 | d.text(offset, legends[ii], font=fnt, anchor="md", fill=(0, 0, 0, 255)) 298 | 299 | # Grid 300 | for ii, mol in enumerate(mols): 301 | column = ii % molsPerRow 302 | row = ii // molsPerRow 303 | gp = np.array([column * dx, row * dy], dtype="int") 304 | if grid: 305 | d.line(([gp[0], gp[1], 306 | gp[0] + dx, gp[1]]), 307 | fill=(0, 0, 0, 255), width=1) 308 | d.line(([gp[0], min(gp[1] + dy, full_image.size[1] - 1), 309 | gp[0] + dx, min(gp[1] + dy, full_image.size[1] - 1)]), 310 | fill=(0, 0, 0, 255), width=1) 311 | d.line(([gp[0], gp[1], 312 | gp[0], gp[1] + dy]), fill=(0, 0, 0, 255), width=1) 313 | d.line(([min(gp[0] + dx, full_image.size[0] - 1), gp[1], 314 | min(gp[0] + dx, full_image.size[0] - 1), gp[1] + dy]), fill=(0, 0, 0, 255), width=1) 315 | 316 | out = Image.alpha_composite(full_image, txt) 317 | background = Image.new("RGB", out.size, (255, 255, 255)) 318 | background.paste(out, mask=out.split()[3]) # 3 is the alpha channel 319 | return background 320 | 321 | def draw_to_pdf_products(self, reacts: list, 322 | filepath: str = "ReactionProductList.pdf", 323 | mols_per_page: int = 35, 324 | mols_per_row: int = 5): 325 | mols = self.run_combos(reacts, make_conformers=False) 326 | mol_copy = [ 327 | rdkit.Chem.MolFromMolBlock(rdkit.Chem.MolToMolBlock(x)) for x in mols] 328 | ids = [x[0] + x[1] for x in reacts] 329 | 330 | for m in mol_copy: 331 | rdkit.Chem.RemoveHs(m) 332 | rdkit.Chem.SanitizeMol(m) 333 | rdkit.Chem.AllChem.Compute2DCoords(m) 334 | 335 | grid_function = self.MolsToGridImageZoomed # or rdkit.Chem.Draw.MolsToGridImage 336 | image_m = [grid_function( 337 | mol_copy[i:i + mols_per_page], 338 | molsPerRow=mols_per_row, 339 | subImgSize=(500, 500), 340 | legends=ids[i:i + mols_per_page]) for i in range(0, len(mols), mols_per_page)] 341 | 342 | image_m[0].save(filepath, 343 | "PDF", dpi=(300, 300), save_all=True, 344 | append_images=image_m[1:]) 345 | 346 | def save_to_mol_folder_for_simulation(self, reacts: list, **kwargs): 347 | mols = self.run_combos(reacts, **kwargs) 348 | os.makedirs(self.conf_folder, exist_ok=True) 349 | react_dict = {} 350 | for x, m in zip(reacts, mols): 351 | folder = os.path.join(self.conf_folder, x[0] + x[1]) 352 | os.makedirs(folder, exist_ok=True) 353 | rdkit.Chem.MolToMolFile(m, os.path.join(folder, "conf_guess.mol")) 354 | rdkit.Chem.MolToXYZFile(m, os.path.join(folder, "conf_guess.xyz")) 355 | 356 | react_dict[x[0] + x[1]] = rdkit.Chem.MolToSmiles(m) 357 | 358 | with open(os.path.join( 359 | self.conf_folder, "exp_to_do_stock.yaml"), 'w') as yaml_file: 360 | yaml.dump(react_dict, yaml_file, default_flow_style=False) 361 | 362 | 363 | if __name__ == "__main__": 364 | react = ReactionAB() 365 | react.check_reactive_groups() 366 | # print(react["A1", "B1"]) 367 | # react.show_reaction("A1", "B1").show() 368 | # print("Number of possible reactions:", len(react.all_combos_possible())) 369 | # print("Run over a list of pairs:", flush=True) 370 | # react.run_combos([("A1", "B1"), ("A10", "B10"), ("A100", "B100"), ("A110", "B500")]) 371 | # react.draw_to_pdf_products([("A1", "B1"), ("A10", "B10"), ("A100", "B100"), ("A110", "B500")]) 372 | # Not practical, takes long. 373 | # all = react.run_all_combos(num_workers=12, make_conformers=True, optimize_conformer=True) 374 | 375 | 376 | 377 | 378 | 379 | 380 | -------------------------------------------------------------------------------- /data/dataset.csv: -------------------------------------------------------------------------------- 1 | ID-1,Voc,Jsc,PCE,FF,D_V0,D_J0,AB,contact_angle,PLQY_perov,PLQY_glass,t1_perov,t2_perov,t1_glass,t2_glass 2 | 1,0.969192061296921,19.9255,14.65352,0.758792247497908,0.8242821097373962,298.81129455566406,"('A1', 'B1')",72.1,0.6538936120542039,0.9939450367475676,2.604170070614818,5.522340530889254,2.8355635214011694,4.885752111949565 3 | 2,0.936152371998422,19.3848333333333,13.6253333333333,0.750824650021253,1.3495174646377563,289.2644348144531,"('A2', 'B1')",106.5,0.5133525609805139,0.90892401684458,2.696652156149841,5.681877639268151,3.2039652121324464,4.70564880455431 4 | 3,0.969526821720342,21.2718333333333,15.3908,0.746270792949397,0.8589989542961121,311.6711120605469,"('A3', 'B1')",100.1,0.4166589896427485,0.8884259949766448,2.589266665112243,5.798728951965154,3.5610460826040513,5.347059910536027 5 | 4,0.907475988213601,19.1323333333333,12.89492,0.742703671653976,0.4745796620845794,319.1230163574219,"('A4', 'B1')",96.7,0.4506281476706301,0.7944113529009306,2.3739750890807185,5.264502018053306,2.9231615807191558,4.262539022051294 6 | 5,0.953545517386469,21.0443333333333,15.3956,0.767220180746633,1.066213607788086,363.4838562011719,"('A5', 'B1')",96.0,0.6377088954292008,0.6512995986153193,2.329227023940467,5.413251904640136,2.7862450486872667,4.232220669556607 7 | 6,0.970325460042217,20.7918333333333,15.64794,0.77561635841695,0.9308498501777648,333.4620361328125,"('A6', 'B1')",98.1,0.5613085227335738,0.7703752081097407,2.6232182655855123,5.701914063596173,2.7180005319553784,4.219360635538369 8 | 7,0.954858225853392,21.6118333333333,15.7872,0.765023202966358,0.7457088493817562,336.33026123046875,"('A7', 'B1')",107.6,0.5346588440241988,0.7232594594743524,2.372111155642656,5.902250746563152,2.70805020110221,4.26689632742025 9 | 8,0.892852531803385,19.712,12.6698333333333,0.719880611676578,1.2241406440734863,281.1792984008789,"('A8', 'B1')",104.8,0.1997960420329058,0.1882834106904658,2.4423470353692043,5.061771909543635,2.6232182655855123,4.276666119016055 10 | 9,1.00117905488731,19.8938333333333,14.9758,0.751899508434698,0.7284704446792603,279.0131530761719,"('A9', 'B1')",94.1,0.5524180192600373,1.3235941454827085,1.7334238922150915,5.163299154869948,0.8372475245337022,5.476965517975683 11 | 10,0.908114354031057,18.8638333333333,12.9106266666667,0.753662370325359,0.8430207967758179,306.5643310546875,"('A10', 'B1')",101.3,0.5345190942953818,1.108819065748383,1.843719208158766,5.154504731182189,2.175887439948088,5.45907474542615 12 | 11,0.9668951871498238,22.045,17.18667999999999,0.8063108228707,0.1196127459406852,377.3582153320313,"('A11', 'B1')",94.5,0.5786420118704697,1.4258057532582622,1.840549633397487,5.286598973593838,2.222459048514761,5.288620503569173 13 | 12,0.925053650251033,21.0616666666667,14.8729333333333,0.763373315982562,0.7457088493817562,331.36549377441406,"('A12', 'B1')",93.4,0.2302013721962937,0.2122510031707061,2.0149030205422647,4.4075725416416205,2.005525858729668,4.09933210373314 14 | 13,0.9672753981453316,22.7545,17.47297999999999,0.7938702750334682,0.5526999831199646,378.8519134521485,"('A13', 'B1')",93.0,0.5702275796875482,1.5142233010990578,2.0268315914075385,5.805345790065642,2.7363136663750693,5.748373357984076 15 | 14,0.9657705232837248,22.93216666666667,17.68941999999999,0.7987199976616989,0.4500168263912201,386.5478515625,"('A14', 'B1')",78.5,0.5710377773288812,1.3300365442302713,2.919930560137709,5.827002554614793,2.589266665112243,5.24812872496475 16 | 15,0.8960757258048507,22.340666666666667,15.748086666666651,0.7866597953037301,0.6085071563720703,362.8600158691406,"('A15', 'B1')",92.6,0.6713558580654283,0.7774756376790575,2.667922410011431,5.243491524264258,3.12982600803469,4.437697832044343 17 | 16,0.9104335348311724,22.3135,16.120666666666654,0.7935367686949607,0.7457088493817562,399.3815002441406,"('A16', 'B1')",94.9,0.5166092522831635,1.242524525592238,2.604909442182697,5.727140864233436,2.6210388241125804,4.634534794617996 18 | 17,0.931378875714626,20.787333333333333,15.04413333333332,0.777037556472657,0.6790651679039001,322.3023681640625,"('A17', 'B1')",81.5,0.346255448399423,0.4355419393914089,2.285438934159075,5.524296892681455,2.6973262377392344,4.394942859940428 19 | 18,0.9662229822775712,21.724,16.73783999999999,0.7974110594022649,0.4277629554271698,348.5682983398437,"('A18', 'B1')",93.5,0.5379497434604311,1.608774081838942,2.821973947420524,6.100700045314072,3.20761263258949,5.223755445221132 20 | 19,0.9388687980304742,22.5485,16.942799999999988,0.8003180680046688,0.4025917947292328,365.85157775878906,"('A19', 'B1')",96.3,0.5734222059431309,0.8209265466833475,2.1610215286722587,5.0158881403777045,2.6071242825122494,4.302577500800556 21 | 20,0.98804155954776,23.188166666666667,17.993219999999987,0.7853572627051948,0.5774725675582886,410.0501403808594,"('A20', 'B1')",94.6,0.5400451738927363,1.344155689292423,1.803358605071407,5.890953365511593,2.761274962339508,4.903421209789107 22 | 21,0.9081685784567416,20.187,14.327466666666655,0.7815039041330136,0.7849965691566467,368.7667541503906,"('A21', 'B1')",90.6,0.4663433517600799,0.5903754162799315,2.410542234499138,5.483343595749118,2.848391685655282,4.4728951466782725 23 | 22,0.964956412061777,22.1353333333333,16.58104,0.776279297767946,0.7457088493817562,367.37071228027344,"('A22', 'B1')",101.9,0.3728517540269125,0.6896622675640636,1.969905654611529,5.237026898104929,2.775708849576025,4.741796783519916 24 | 23,0.9527082483284808,22.37933333333333,16.561066666666655,0.7767499521164244,0.584176778793335,378.0126953125,"('A23', 'B1')",96.1,0.7388135812215938,1.6230615974861191,1.5216989981260935,6.241873009570204,2.2925347571405443,5.009434573519737 25 | 24,0.8970427202570175,20.90383333333333,13.923579999999989,0.7425263229644067,0.9550488591194152,304.11988830566406,"('A24', 'B1')",93.4,0.4312370220734827,0.5967553488833869,2.6181254935742233,4.586802528629907,2.3978952727983707,4.653293461169819 26 | 25,0.959354348358631,21.872333333333334,16.142266666666654,0.7692904714034388,0.5307152271270752,333.8181915283203,"('A25', 'B1')",93.7,0.6534299095561985,1.3442180857153383,2.1004689088719113,5.83305481036034,2.6511270537025893,4.684443366882599 27 | 26,0.9637213477056992,21.108500000000003,14.880799999999988,0.7315052167195881,1.018460750579834,311.75836181640625,"('A26', 'B1')",94.7,0.5864178362599963,1.3171984903144236,2.60859812213055,6.01200219832553,2.3896797998449792,5.367703494033044 28 | 27,0.8882126080017356,19.962333333333333,13.935613333333322,0.7859553070276891,0.5333954095840454,339.7156066894531,"('A27', 'B1')",92.9,0.5943714766633755,0.774005580371138,2.1016921506146558,4.561740628060756,2.668616131856803,4.639088404191552 29 | 28,0.9411461894137362,21.298500000000004,15.483599999999988,0.772441923976495,0.5333443284034729,377.0221252441406,"('A28', 'B1')",94.8,0.5250093044350997,1.2752380413604862,2.0149030205422647,5.718572560128837,2.7107133185216936,5.086731764067556 30 | 29,0.906489838115277,20.820000000000004,14.564386666666657,0.771700062235351,0.6533699631690979,370.0816650390625,"('A29', 'B1')",96.7,0.7210836770697298,1.235692449542688,1.780024213009634,5.852001310115928,2.7517480563679295,5.094240877951052 31 | 30,0.939457021825418,19.5201666666667,14.0970666666667,0.76872023884106,0.9627365469932556,307.4132385253906,"('A30', 'B1')",93.3,0.6036964626394389,1.209323566569667,2.3896797998449792,6.249589067317437,2.5937610547000824,5.54502118227126 32 | 31,0.9156974717140304,21.64416666666667,13.545986666666655,0.683467328872346,1.118086338043213,252.6215362548828,"('A1', 'B2')",86.6,0.385027296491453,0.3335586896042941,4.610555658264429,6.229339063022186,2.8160073426073025,5.524257005164247 33 | 32,0.930946549370994,17.379833333333337,10.795090666666658,0.6671998505531895,1.5501980781555176,186.1217269897461,"('A2', 'B2')",89.6,0.804320620584203,1.7570067699692986,2.740840023925201,6.424690726001537,3.0281994636914926,5.656586039825227 34 | 33,0.801191025337596,14.2713333333333,8.47198399999999,0.740942490962632,0.7457088493817562,207.37144470214844,"('A3', 'B2')",92.0,0.8960278705264125,1.6862821950864693,3.5959414584546674,6.080207756942371,3.0041963519661206,5.729710096666117 35 | 34,0.8341259555726195,14.62915,8.225914666666657,0.6741141939154506,1.7948462963104248,155.2179718017578,"('A6', 'B2')",83.5,0.7974317363095113,1.9995941413209808,2.489064659936664,6.6508480975151185,2.378619779270043,6.057860716747665 36 | 35,0.9180557297271424,17.698,11.266189333333324,0.6933999551966736,1.167419672012329,192.4375534057617,"('A7', 'B2')",84.3,0.7615925447185049,2.162432876791471,6.080116244702545,6.834894221414897,3.6826098411003407,5.750952279132591 37 | 36,0.8762722538827455,16.1139,9.51807599999999,0.6740768985416637,1.5899317264556885,177.66534423828125,"('A8', 'B2')",83.2,0.7118428905033934,0.5829520688759136,2.4807312783775197,5.844268792289402,2.3115448343655176,5.4169888962655355 38 | 37,0.9432954726266604,20.766666666666666,13.757139999999987,0.7022853597254757,1.1919080018997192,259.0177001953125,"('A10', 'B2')",75.9,0.887676372309389,1.7803726717896116,1.264126727145683,6.855450947352848,2.367436065313662,5.953139432786314 39 | 38,0.93614673253333,18.909,12.090738666666656,0.683030892489554,1.431490778923035,216.8745346069336,"('A11', 'B2')",82.9,0.7377245830541187,2.09359418077698,2.462149662665384,6.974291802782381,3.151025157960026,6.351757936569426 40 | 39,0.827149806860096,11.3421166666667,6.06779866666666,0.646774485752231,1.8470876216888428,117.78759002685548,"('A12', 'B2')",86.0,0.6013399666546435,0.5563046279055263,2.278292400425001,5.63913728116267,2.6871669901857858,5.372960909543803 41 | 40,0.9269653529159012,19.77133333333333,13.354593333333325,0.7286705336695368,0.9830908179283142,251.69414520263672,"('A13', 'B2')",97.4,0.8991782771621643,2.104562889185302,3.58351893845611,6.152115482991541,2.551786178627545,6.007165637334066 42 | 41,0.9129249424246538,19.271833333333333,12.567106666666657,0.7142943211260108,1.1217660903930664,232.00569915771484,"('A14', 'B2')",76.5,0.8752656726191291,1.653732361852251,2.5626389983283526,6.152605026980459,3.044998514856909,5.372543188900008 43 | 42,0.881758624733483,15.5649666666667,9.39406799999999,0.684472159687382,1.3659489154815674,178.2980194091797,"('A15', 'B2')",86.3,0.8370338308804947,1.644101538904916,3.912023005428146,6.112818975520158,1.851599469584072,6.244069808571811 44 | 43,0.8909944790930316,14.49625,8.643179999999994,0.6691798795136102,1.672309398651123,155.67787170410156,"('A17', 'B2')",82.8,0.8590025163408033,1.3555692131772303,2.8414149131696336,6.282472333238322,3.0160249768217535,6.044246732546951 45 | 44,0.8921682636459731,20.915166666666668,13.670159999999989,0.7325976102330783,0.7331898808479309,270.1714324951172,"('A18', 'B2')",87.1,0.9898908350624076,1.8194014532065208,1.3428648031925547,6.045431763377924,2.7093826463359885,5.789378962272886 46 | 45,0.933489309302586,17.0435,11.435454,0.718762280087576,1.051593780517578,184.454704284668,"('A19', 'B2')",83.8,0.99342365337974,1.6933497866079783,2.696652156149841,6.207220879988174,3.3061538329752613,5.523937847737139 47 | 46,0.925887159408354,12.4149166666667,7.69010399999999,0.66900650910837,0.922232985496521,125.3464126586914,"('A20', 'B2')",87.5,0.8260705390730299,1.665170878633118,2.9407479652212314,6.248758285852109,2.514465452029545,5.481680047850402 48 | 47,0.849645462734099,14.9288333333333,7.70529999999999,0.607471545765693,1.0058822631835938,83.66973495483398,"('A21', 'B2')",92.0,0.8251077128769112,1.3430213739587231,1.9768549529047348,6.169966317799071,2.468099531471619,5.915501008300943 49 | 48,0.864915531575342,18.5568333333333,11.21532,0.6987697668576,1.054625153541565,235.4025421142578,"('A22', 'B2')",89.6,0.7941459476611484,2.0633694295899776,2.528125768907977,6.428654394910388,5.8821238989988345,6.338965034125509 50 | 49,0.8247928322846068,16.273633333333333,9.212330666666656,0.6863412691188064,1.0742616653442385,187.12191009521484,"('A23', 'B2')",90.2,0.8553089818143494,1.5929412867612838,2.4664031782234406,6.447036412613682,2.331172549845958,5.9406448247780075 51 | 50,0.820647003133021,15.5156333333333,8.55982933333332,0.672263001405067,1.7791378498077393,174.0285186767578,"('A24', 'B2')",85.1,0.8292667144846438,1.646602939804419,2.180417459019838,6.238266029572827,2.1162555148025524,5.622500110036512 52 | 51,0.8446735720668319,12.368950000000002,7.306463999999993,0.6993353935586811,0.8376029133796692,165.94361114501953,"('A27', 'B2')",44.1,0.430733039789863,0.8542109172598732,2.8195915758351173,4.999304677753871,3.279029747687948,5.100110300523858 53 | 52,0.9012439928317748,18.618,12.383159999999988,0.7379995133304443,0.3734071552753448,244.9143829345703,"('A28', 'B2')",27.1,0.5866832086119395,1.6134619067393283,2.866192902199006,6.1889209914862064,3.795713883027763,6.3551523434145 54 | 53,0.962954783550635,14.74635,10.3756,0.730672543325574,1.0907914638519287,165.0103759765625,"('A29', 'B2')",76.2,0.6744272126586979,1.7125849884552415,4.997212273764115,6.729190656065888,2.3233676321765744,6.028085730563627 55 | 54,1.00030267935111,23.4831666666667,18.15702,0.772959037661726,0.5475579500198364,340.85223388671875,"('A30', 'B2')",59.8,0.850130209990933,1.6830288468937555,3.9064072666425105,6.215827354826921,2.8814431271518632,6.425371328847449 56 | 55,0.9448904551546132,20.002,14.121939999999988,0.7472044971019796,0.2395762056112289,255.71453094482425,"('A31', 'B2')",103.8,0.467163662213274,0.7907534262147138,1.5973653311998313,6.287282185911616,1.6094379124341003,5.784594117392615 57 | 56,0.8726866332664335,20.707833333333333,13.718399999999988,0.7591200756143289,0.0761139094829559,280.4857482910156,"('A32', 'B2')",80.3,0.8186793174953579,1.5217673470852242,2.3702437414678603,6.3999251641959445,2.9882040071331994,6.1786492595679245 58 | 57,0.8695659513972184,20.593833333333333,13.307039999999986,0.7430905570063924,0.3462651669979095,262.7627716064453,"('A33', 'B2')",82.9,0.698229291132598,1.516902916021092,4.447111881329816,6.501965118050027,3.3439213138424253,6.282378890140428 59 | 58,1.0159047563158357,22.38653846153846,18.19932692307692,0.8002310292033326,0.4010657370090484,298.6084289550781,"('A30', 'B3')",90.9,0.6206982125609989,1.7608352627165196,1.5789787049493917,6.670943744902507,3.912023005428146,6.587343381907437 60 | 59,0.907756178481051,18.8478333333333,11.90532,0.695841660802429,0.3141034245491028,194.5270004272461,"('A31', 'B3')",77.1,0.6034125336772882,1.622604067030694,1.665818245870208,6.501694993791227,1.7749523509116738,6.618325012662868 61 | 60,0.94342302163101,16.6973333333333,10.731912,0.681276668273161,0.5453540682792664,146.36078262329102,"('A32', 'B3')",90.0,1.0611496219487513,0.7818872776691216,2.131796772013764,6.2262401826519,2.4336133554004498,5.30985060236171 62 | 61,0.84852038621655,20.985000000000003,13.283459999999986,0.7460019416075684,0.7457088493817562,280.6656188964844,"('A33', 'B3')",66.6,0.5364626223078431,1.4177537788085692,1.958685340544036,6.782101348945185,1.6863989535702288,6.185261673626961 63 | 62,0.9615428551510352,20.081333333333333,14.57773333333332,0.7549684644950271,0.5179274082183838,272.3524169921875,"('A30', 'B4')",83.6,0.6644617063405537,1.5667679254093043,4.638121286127777,6.783619607997977,1.9768549529047348,6.575647504226011 64 | 63,0.7058614363363237,17.49721153846154,8.486099999999992,0.6870998397414236,0.8123644590377808,204.1861343383789,"('A31', 'B4')",76.1,1.1224007156050126,1.5569042210346098,4.284000269375321,6.566165972461445,2.1849270495258133,6.282958096643734 65 | 64,0.915808239694231,16.5699833333333,11.1779406666667,0.736605880810019,0.2237274944782257,202.92215728759768,"('A32', 'B4')",80.2,0.6630260950432887,1.53367576616205,3.8124236402053913,7.3866567051634835,4.280685807012206,6.580874893126198 66 | 65,0.841849308764966,20.654,12.9283866666667,0.74354250515771,0.6181590557098389,275.18902587890625,"('A33', 'B4')",77.8,0.6207048044763493,1.6183648398620267,3.8680711178989635,6.5044378629314314,3.8680711178989635,6.5044378629314314 67 | 66,0.952209510728935,21.2043333333333,15.5502666666667,0.770159546972405,0.74054354429245,331.91233825683594,"('A30', 'B5')",82.5,0.5380727346443241,1.4917893192704317,2.776954179749421,6.092237053930959,2.776954179749421,6.092237053930959 68 | 67,0.865490298753422,21.1306666666667,13.63248,0.745417296527251,0.6182990670204163,277.6234893798828,"('A31', 'B5')",91.2,0.5985162626673881,1.4619110048986643,2.3869262414277967,7.400773284098163,3.566994266123812,6.3072237421316535 69 | 68,0.770217471256775,18.9071666666667,10.3434,0.710270049431028,0.1641063541173935,232.4411163330078,"('A32', 'B5')",65.1,0.5636063015785159,1.5071976465821233,3.2204745462318978,6.987711829098064,3.5779478934066544,6.517183709463964 70 | 69,0.898407792802533,19.1761666666667,13.35054,0.77493187746119,0.7457088493817562,247.70818328857425,"('A33', 'B5')",80.6,0.6271257838884627,1.556013064208589,3.846951008684431,6.505933525788771,3.846951008684431,6.505933525788771 71 | 70,1.00027119822236,22.8285,18.13548,0.794207373394558,0.7336567044258118,378.1464080810547,"('A30', 'B6')",92.4,0.6126951923505842,1.3818254860865622,2.125847914493992,7.000817479427672,2.961140828784372,6.235841074830201 72 | 71,0.889731433965337,19.2,12.312,0.720723103085264,0.7484471797943115,220.3172302246093,"('A31', 'B6')",92.3,0.5886130718756097,1.2679304730337326,3.264231526588998,6.1651447076866015,3.264231526588998,6.1651447076866015 73 | 72,0.873747542709479,19.2873333333333,12.38628,0.73499221603026,0.7147552371025085,239.0922698974609,"('A32', 'B6')",95.3,0.849110754240745,1.5770248152504618,3.1896529661912973,6.227741476812155,3.312730400339825,6.259122467728835 74 | 73,0.889509428196043,20.2633333333333,13.4442,0.745887828282719,0.7646552920341492,256.01002502441406,"('A33', 'B6')",85.2,0.6003043127538287,1.169384411913362,3.092405160814252,6.116752682250139,3.092405160814252,6.116752682250139 75 | 74,0.889143706455206,21.6191666666667,14.3886,0.748527277011931,0.7288943529129028,281.88250732421875,"('A30', 'B7')",77.4,0.6551522872985377,1.5759516715757844,3.7618975468505083,6.465883414827127,3.7618975468505083,6.465883414827127 76 | 75,0.789912052621111,18.9088333333333,10.6733866666667,0.71459301814683,1.0877550840377808,245.9346618652344,"('A31', 'B7')",80.2,0.4770937071092534,1.4831315577532092,3.214867803470662,6.373388046633112,3.214867803470662,6.373388046633112 77 | 76,0.910768942183162,18.1595,12.0241753333333,0.72701471524772,0.6716845631599426,226.6268920898437,"('A32', 'B7')",92.5,0.7229357253348008,1.435019847455516,2.386006701133118,7.391612502516408,3.246490991901174,6.282976775140115 78 | 77,0.915712965742097,18.7986666666667,12.7255666666667,0.739248955967061,0.7782513499259949,238.14224243164065,"('A33', 'B7')",79.2,0.5507325529133206,1.7447158215957768,2.6181254935742233,7.482861890726881,3.1945831322991562,6.744730339129773 79 | 78,0.933897896156244,18.208333333333336,13.185199999999991,0.7753845255429885,0.6532506346702576,249.3000946044922,"('A30', 'B8')",94.9,0.6389387761652563,1.5396978405691617,4.081765780015241,5.967120623718465,4.081765780015241,5.967120623718465 80 | 79,0.72856073524625,15.723557692307692,7.92712307692307,0.6919887289601707,0.6911315321922302,174.71273803710938,"('A31', 'B8')",94.8,0.5472952896844744,1.4327824386358865,3.150596984114906,6.956840637672617,4.415582293202092,6.07182258967363 81 | 80,0.6923492051760907,16.685192307692308,8.32612307692307,0.7207529221693856,0.6799123287200928,218.86001586914065,"('A32', 'B8')",89.7,0.7224547145709733,1.540064355822416,3.912023005428146,5.400106512450071,4.048649602959971,6.265206151185283 82 | 81,0.6740831263516762,10.944038461538462,5.109199999999994,0.6925670681654947,0.8391420245170593,137.03339004516602,"('A33', 'B8')",111.0,0.4328903409555666,0.4044334417061869,2.906354462402774,5.186994667903771,2.906354462402774,5.186994667903771 83 | 82,0.6865457629583948,17.529326923076926,8.14974999999999,0.6771884693534668,0.7457088493817562,193.9578094482422,"('A30', 'B9')",98.6,0.5838693775789728,1.354033683991678,4.059407992341204,5.783455883378043,4.059407992341204,5.783455883378043 84 | 83,0.8122828013046043,17.239807692307693,10.468207692307686,0.7475369352062634,0.7728519439697266,239.3302230834961,"('A31', 'B9')",93.6,0.5811021650007446,1.3416638031181378,2.554899021608035,7.129001030613187,3.653770277000368,6.0913551299227775 85 | 84,0.7218069628179143,16.60798076923077,8.09776153846153,0.6755026905801205,0.5930888652801514,175.4158706665039,"('A32', 'B9')",85.6,0.8807805505162155,1.6297486561252814,3.912023005428146,5.395580184168247,4.065945087700403,6.263303019960951 86 | 85,0.7151387521362176,15.649038461538462,7.564630769230761,0.6759425170683063,0.7369089126586914,165.67908477783203,"('A33', 'B9')",92.1,0.6098029214821797,0.9542770028232068,3.912023005428146,7.658298345183371,4.614625344758847,6.312478634079945 87 | 86,0.8525817904709119,18.702596153846155,12.154361538461533,0.7622443284145063,0.6329440474510193,244.4916305541992,"('A30', 'B10')",86.3,0.4634725295677839,1.3524848313828282,3.912023005428146,7.8202508182147294,3.7736800558064014,6.700128221820461 88 | 87,0.7073830055582437,15.494038461538462,7.094199999999992,0.6472680591053853,0.9022797346115112,137.02317810058594,"('A31', 'B10')",92.1,0.484432081228493,1.326967249758646,3.651177586929925,7.16369123376117,3.912023005428146,5.2936062868179175 89 | 88,0.6816555461131346,17.426826923076923,8.10769999999999,0.6825184252379143,0.4661506712436676,196.921875,"('A32', 'B10')",86.0,0.5254240647688724,1.5128399711425866,3.3112726741341683,7.278104305067688,6.617576759665009,8.197593804284399 90 | 89,0.7668929676302785,15.116153846153846,7.543846153846146,0.6507538772000355,0.631700336933136,124.8127899169922,"('A33', 'B10')",76.0,0.5504271040394415,1.35441603184745,4.110873864173311,7.209887444146602,4.110873864173311,7.209887444146602 91 | 90,0.7417430252275974,18.60298076923077,10.26155769230768,0.7436648982261492,0.5100787878036499,258.09657287597656,"('A30', 'B11')",95.3,0.5325121391768158,0.9923109280502284,3.912023005428146,5.365929284958057,3.5686872687855304,6.32496809581828 92 | 91,0.7927912394561732,19.356634615384618,11.27526153846153,0.7347471832598043,0.3140197098255157,227.600814819336,"('A31', 'B11')",85.8,0.5535381396158505,1.6888339887870896,3.912023005428146,5.432935668272798,3.912023005428146,5.343673068268834 93 | 92,0.7827802436724588,15.12923076923077,7.77996153846153,0.6569324978720315,0.5610160827636719,128.9163475036621,"('A32', 'B11')",95.5,0.5412860097770961,1.084138042245926,3.420346200500916,6.450154416453444,3.420346200500916,6.450154416453444 94 | 93,0.7836764332702176,13.38346153846154,6.928096153846147,0.6605545136771773,0.6026217937469482,118.78337478637695,"('A33', 'B11')",77.5,0.5280380218477846,1.3503782165284088,-0.9675840262617056,0.3074846997479607,-0.9675840262617056,0.3074846997479607 95 | 94,0.9780512924744464,17.350333333333335,12.652975999999995,0.7456297541778804,0.5557070374488831,135.86166381835938,"('A30', 'B12')",95.8,0.4439681785992265,0.9482705697503784,3.17220341666977,6.1395827825604385,3.17220341666977,6.1395827825604385 96 | 95,1.0039247555896236,18.769,14.211539999999994,0.7542213881148225,0.5002891421318054,147.21249389648438,"('A31', 'B12')",69.6,0.5691045783782954,1.162177401393418,3.3555024224000256,6.2538672723743085,3.3555024224000256,6.2538672723743085 97 | 96,0.982431683483908,14.166883333333336,10.285701999999995,0.7390218180352649,0.5737015604972839,117.64452362060548,"('A32', 'B12')",94.2,0.6044185791938071,0.8727255164452474,3.100092288878234,6.140983086547454,3.100092288878234,6.140983086547454 98 | 97,0.9851939963917896,18.342166666666667,13.759339999999996,0.7614216310066139,0.5427629947662354,156.2816619873047,"('A33', 'B12')",97.1,0.3802322399845384,1.668001731022544,2.8027541365715076,6.573833786445484,2.821973947420524,5.801060414120672 99 | 98,0.5419412191581997,3.826778846153846,1.269119230769229,0.6119513206630561,0.9850916266441344,47.70678901672363,"('A30', 'B13')",81.0,0.3977435293787634,0.5329180415436852,2.0294631718735947,6.207120133456288,3.141130476243348,5.009901691742504 100 | 99,0.6139430978728448,17.99105769230769,7.440969230769221,0.6736660964636627,0.7258914709091187,206.2675170898437,"('A31', 'B13')",92.3,0.4748278948082133,1.8985136872727888,3.2402458506043934,6.2644072771746755,2.6932749155200555,4.014940539434832 101 | 100,0.5523988466347415,9.64346153846154,3.52107038461538,0.6609810226308206,1.121955394744873,119.69685745239258,"('A32', 'B13')",94.5,0.4326640503937349,1.7257997779119894,3.054001181677967,6.039540170339545,2.9796028916241073,4.533781589900438 102 | 101,0.6149525787817327,16.78451923076923,7.153615384615375,0.6930667332140056,0.5465410947799683,210.3163375854492,"('A33', 'B13')",90.9,0.4441888647701001,1.9177013562445944,1.8421356765531218,6.132464852829423,2.7568403652716422,4.056642695038089 103 | 102,1.07370381233332,23.61625,20.641695,0.814047831887953,0.7457088493817562,302.1597900390625,"('A1087', 'B3')",94.8,0.4657450211145358,1.7628906098675727,2.5160822672564502,6.259772650376566,3.1170645587215158,5.054014017398201 104 | 103,1.07380875823598,23.633875,20.82075,0.82041670260357,0.7457088493817562,305.1864929199219,"('A1090', 'B3')",95.0,0.4005477409392413,1.2686456627029858,1.128171090909654,6.1940175608061825,2.865053949911875,4.593401202388092 105 | 104,1.07858271907674,23.680125,20.989915,0.821813424970907,0.7457088493817562,352.4723358154297,"('A1106', 'B3')",98.2,0.4495850167890869,1.8077397550154133,2.7893229212309465,6.247501141724217,2.488234439880675,5.393309313903605 106 | 105,0.947217799573576,22.216125,17.2994,0.82207772330792,0.7457088493817562,363.2457733154297,"('A30', 'B770')",95.0,0.307681264561769,1.214510965577439,2.756205242989257,5.8719205869336815,2.7298116928837226,5.090554917272129 107 | 106,0.937916429544553,23.315625,17.9915,0.822727791702103,0.7457088493817562,333.26690673828125,"('A1087', 'B770')",98.4,0.3836563140808195,1.086654387640518,2.263844264677615,5.6937321388027,3.1175073304799117,5.075985985334385 108 | 107,0.936378867112289,21.219875,16.0779,0.80916091759014,0.7457088493817562,294.16387939453125,"('A1090', 'B770')",93.5,0.2835812461166669,0.845832375204477,2.468946630209271,5.560027571011063,2.60859812213055,4.321347323862031 109 | 108,0.953042845917508,21.96375,17.6463,0.843013910385518,0.7457088493817562,346.41783142089844,"('A1106', 'B770')",94.9,0.3364058250362782,0.8140510644137561,2.2213750375685026,5.662265794453267,2.6347624053323777,4.672174414768565 110 | 109,1.04546591405595,24.034625,20.65822,0.822139794035742,0.7457088493817562,353.8546905517578,"('A30', 'B772')",91.4,0.4111455474553925,1.8190321596422847,2.451005098112319,6.43952607451319,2.6071242825122494,4.162626069638506 111 | 110,0.833974291513192,18.19875,10.4839,0.690762333083177,0.3266535401344299,210.0936050415039,"('A525', 'B386')",82.5,0.3209785949206686,1.40961999488207,2.498973906999436,5.4772582151825135,2.1983350716202463,5.530103463243588 112 | 111,0.799167627113859,20.332,11.27184,0.693708205215509,0.7457088493817562,240.97146606445312,"('A1058', 'B3')",66.7,0.3419860641111923,1.5827296640741688,2.4336133554004498,5.737185012956527,2.1126345090356,6.006082186931363 113 | 112,1.00281346616283,22.56425,18.77601,0.829778633787981,0.7457088493817562,359.35655212402344,"('A1104', 'B1')",86.2,0.2912443311134763,1.0347741297656678,2.0744289998562917,5.455876516649401,3.26918863874179,5.616007169584166 114 | 113,1.03460570569076,22.09475,18.60518,0.813897875748036,0.7457088493817562,316.4782257080078,"('A1058', 'B6')",84.5,0.3731568626190597,1.7825429295771409,2.367436065313662,5.783055655550709,2.8914822521801917,6.016035201042187 115 | 114,0.978793702058942,21.754,18.50898,0.869265004188272,0.7457088493817562,322.0953063964844,"('A51', 'B12')",95.3,0.2048107102310019,0.1441197074935769,2.4570214462984645,5.786130216004954,2.9454910571172443,6.013592899180983 116 | 115,0.999948762104437,25.09525,20.46051,0.815355830573224,0.7457088493817562,320.6391296386719,"('A1021', 'B1')",87.3,0.3145988621526024,0.4272441909318276,2.533696813957432,6.437927634250219,2.67827804276854,4.146937012341271 117 | 116,0.991279646119977,23.95075,20.0109,0.842851987368066,0.7457088493817562,367.8880310058594,"('A1103', 'B1')",90.6,0.3919496164061768,1.0385748880452257,2.373043556642607,5.411779971458838,3.0969341540629585,5.693024818003241 118 | 117,0.963936600873682,22.323875,18.4408,0.856962276741806,0.7457088493817562,315.7584533691406,"('A52', 'B1')",89.9,0.3085126939488428,0.788662146810403,2.373043556642607,5.090000849476996,2.2915241456346207,5.150165325524179 119 | 118,1.02602554499184,24.595875,20.1663,0.79910854158104,0.7457088493817562,345.59521484375,"('A359', 'B3')",78.2,0.3910499903760829,1.729974101470518,2.130609828254235,5.78775590843238,2.0228711901914416,6.068587612154564 120 | 119,0.947290240974313,23.423625,17.7916,0.801821756526296,0.7457088493817562,343.38145446777344,"('A1090', 'B1')",92.1,0.3368674955691861,1.5758383945824672,2.9183111265854063,5.915608894804446,2.1972245773362196,6.04455497568209 121 | 120,0.971326296978835,14.83975,7.76215999999999,0.538506387760133,0.5677120685577393,91.22993087768556,"('A1047', 'B3')",74.3,0.2357156253050978,0.321723146448361,-0.9675840262617056,0.3074846997479607,-0.9675840262617056,0.3074846997479607 122 | 121,1.00209021353218,24.791,20.367795,0.819866514477139,0.7457088493817562,339.6400146484375,"('A1058', 'B12')",93.3,0.3557801842947126,1.6033511491177812,2.7942278973432626,5.821832141430015,4.137244396537321,6.008567455007644 123 | 122,1.01618816150529,24.984875,20.73214,0.816568872719241,0.7457088493817562,358.7217559814453,"('A30', 'B771')",88.2,0.380462905652044,1.6112727358210492,2.4423470353692043,5.678840664351991,2.700018029404946,5.8771754007058545 124 | 123,1.03542507576215,24.844625,21.4786,0.834939188961373,0.7457088493817562,358.93389892578125,"('A1106', 'B771')",91.9,0.4269286600763737,1.537688981343461,2.924236271848921,6.098613462625025,3.5564902660681907,6.072883230769142 125 | 124,0.966868475092527,22.143875,17.1923,0.802995218774078,0.7457088493817562,288.1349334716797,"('A30', 'B773')",87.6,0.4256981738326856,1.5668961464204554,2.6483001966964363,5.687991788749877,2.182674796321488,6.044863123832677 126 | 125,1.03387605821556,24.107,20.48035,0.821723539238293,0.7457088493817562,323.28077697753906,"('A30', 'B769')",91.3,0.3885823272237163,1.921125543122787,2.5257286443082556,5.675417293099305,2.826129489167811,5.874368775231734 127 | 126,1.08368388255031,22.8726666666667,20.29568,0.818811986643522,0.7457088493817562,379.6998748779297,"('A1069', 'B772')",83.3,0.4425322330601057,1.9673853933793848,1.9329696377795786,5.945839369081707,3.5832411220909393,6.127283945250464 128 | 127,1.0857708255824,23.2898333333333,20.38912,0.806294937851874,0.7457088493817562,367.4183654785156,"('A1069', 'B769')",70.5,0.4273640556778064,1.594371833906037,2.4423470353692043,6.305837234812808,2.8616292890305104,6.374411343785544 129 | 128,1.04058418037941,22.1251923076923,17.4985461538461,0.760042178261284,0.4106669425964355,307.54278564453125,"('A184', 'B12')",80.8,0.3728521092402596,0.669456847886798,2.0425181875752383,5.632644446822861,1.7950872593207297,5.733729551024153 130 | 129,1.07929996653754,22.038,19.1474646153846,0.805001951685208,0.3143788576126098,298.90464782714844,"('A1087', 'B769')",80.0,0.3588103579562869,0.7639096285072684,2.162172939277301,6.174056900566543,1.449269160281279,6.615047264282018 131 | 130,1.09473989022155,23.4043076923077,20.8776307692308,0.814843996793874,0.7457088493817562,278.24322509765625,"('A1090', 'B769')",89.9,0.5471415202728578,1.937973423366096,2.2945529212967815,6.426569360333832,0.9669838461896731,6.168270923213827 132 | 131,1.03564934127786,21.9835238095238,17.5758628571428,0.771981003829398,0.4961412250995636,321.0870361328125,"('A1104', 'B12')",86.7,0.532090914378523,1.0429885541347896,2.1644717908644115,5.629058928460265,1.9487632180377197,5.730489317484765 133 | 132,0.940927418716247,21.9061538461538,15.3445384615385,0.744443209163312,1.015432357788086,311.6299133300781,"('A1100', 'B10')",91.3,0.5567227503982234,2.0931601916633187,2.4840729690394228,6.55471657305235,1.264126727145683,6.1737444363671905 134 | 133,1.10491648983009,23.5392307692308,21.43296,0.824063031942389,0.7457088493817562,304.28221130371094,"('A1069', 'B771')",86.2,0.4527092086642579,2.0306351685942494,2.21375387928743,6.636287704837738,1.6845453849209058,6.794060126534253 135 | 134,1.05004815669135,21.7749230769231,17.7295630769231,0.775411409042942,0.6414093971252441,300.73699951171875,"('A184', 'B1')",92.1,0.2506872210198598,0.289512940504517,2.2428350885882717,5.784686349880037,2.1471001901536506,6.066270489909225 136 | 135,1.01204887883152,23.8572580645161,18.5644064516129,0.768880863359991,0.1237399280071258,231.6332855224609,"('A1066', 'B12')",81.2,0.6162406845095915,1.917586057411101,1.9401794743463283,6.124420774356607,2.7232671669070703,6.462530112300095 137 | 136,0.955212099864689,21.4964285714286,15.0682448979592,0.733831825810906,1.3457001447677612,304.50103759765625,"('A1100', 'B769')",96.1,0.3451107839618475,0.5288537080967318,2.7180005319553784,5.865362630310336,2.5392369943330477,5.883489505442683 138 | 137,1.00865751696344,22.1319047619048,17.6048888888889,0.78862541018667,0.6743093132972717,364.52056884765625,"('A343', 'B12')",95.9,0.4772503262834406,1.934597039942917,2.2213750375685026,6.373763377184307,2.673458756332591,6.6549511702495 139 | 138,1.06499809146147,23.0904761904762,19.9314349206349,0.810507143755386,0.0263665933161973,386.9368743896485,"('A525', 'B12')",84.3,0.4998113552925376,2.291835599177482,2.8633430855082453,6.3122609864868116,0.9082585601768908,6.680503299623384 140 | 139,0.97909912454109,20.2840845070423,14.745785915493,0.742481865608738,0.9565617442131042,291.6895294189453,"('A1048', 'B770')",76.4,0.3677910955949128,1.814164602399896,2.3580197998821464,5.788154259875543,1.7047480922384253,5.980883767563795 141 | 140,0.947315623594484,21.8138461538462,15.4988461538461,0.750019269655838,0.9134892225265504,306.8052215576172,"('A1105', 'B12')",74.9,0.3273274005839718,0.7980699165403764,2.2905125117597764,5.581878459415514,3.0160249768217535,5.735474921388691 142 | 141,1.07406441346709,23.4163333333333,19.5110666666667,0.775767879272411,0.7457088493817562,186.1306228637696,"('A1103', 'B12')",97.2,0.3279295350767361,0.5184931718621972,2.066862759472976,5.5460754786261655,2.485739636091892,5.807601803295198 143 | 142,0.99566109788133,20.694743130227,15.2375149342891,0.739507469914295,0.0167694296687841,165.90062713623047,"('A1021', 'B12')",84.0,0.3399275759170619,0.8001473112443377,2.531313022602156,5.782008147906709,2.0149030205422647,5.97581777228179 144 | 143,1.0069489624813,21.5439068100358,16.1902031063321,0.746311916998383,0.7457088493817562,199.93429565429688,"('A1106', 'B10')",86.1,0.4122296331214997,1.7429541869341776,2.1701959049483,6.244011548791433,2.2617630984737906,6.811805894930298 145 | 144,1.05198478087356,22.2516666666667,18.06558,0.771755808546023,0.5841480493545532,318.54002380371094,"('A1106', 'B2')",60.1,0.4049456567678402,1.944421928097098,2.481567748522486,6.113416657858728,3.007166651179654,6.454695292066289 146 | 145,1.00363816795439,21.54,17.72946,0.820111005940037,0.7457088493817562,321.9380798339844,"('A73', 'B12')",81.2,0.5581624011139122,1.482342394062698,2.8707357833793057,6.410273561307727,3.912023005428146,5.708338745737413 147 | 146,1.04058418037941,22.1251923076923,17.4985461538461,0.760042178261284,0.4106669425964355,307.54278564453125,"('A1087', 'B2')",76.9,0.3763383581057451,1.584837044385507,2.605648267484129,6.225904058164043,3.0973859272804907,6.256996864913979 148 | 147,0.984526918506248,21.6938333333333,17.26606,0.808405671179955,0.7457088493817562,355.55914306640625,"('A1105', 'B1')",88.0,0.2632666889552453,0.2914180035635253,2.5952547069568657,5.908653427582808,2.598979106047848,4.164647686064525 149 | 148,0.998985499549338,22.4370491803279,17.9469508196721,0.800692477648899,0.5508818030357361,384.3457794189453,"('A1069', 'B773')",80.5,0.4246862473835411,1.7167761359648857,2.2915241456346207,6.049521168549791,3.584906863730958,5.9160672826355345 150 | 149,1.03640840299237,20.9586153846154,16.84496,0.775490479798561,0.435497373342514,302.43919372558594,"('A1090', 'B6')",102.2,0.4252640999940562,1.4783390053829095,2.3758355547336385,6.240119887590915,2.451005098112319,6.8096020615736625 151 | --------------------------------------------------------------------------------