├── LICENSE └── gnu-gpl-v3.0.md ├── LICENSES_THIRD_PARTY ├── README.md ├── config └── setup.json ├── data ├── holdout_bace.csv ├── holdout_bbbp.csv ├── holdout_clintox.csv ├── holdout_esolv.csv ├── holdout_freesolv.csv ├── holdout_lipo.csv ├── holdout_qm7.csv ├── split.py ├── train_bace.csv ├── train_bbbp.csv ├── train_clintox.csv ├── train_esolv.csv ├── train_freesolv.csv ├── train_lipo.csv └── train_qm7.csv ├── molprop.yml ├── pics └── fig1.png └── src ├── DataLoader.py ├── DeepNetworks ├── SLEF.py └── __init__.py ├── __init__.py ├── holdout.py ├── train_tune.py └── utils.py /LICENSES_THIRD_PARTY: -------------------------------------------------------------------------------- 1 | MolPROP uses third-party libraries which may be distributed under different 2 | licenses. We have attempted to list all of these third party libraries and 3 | their licenses below. 4 | 5 | You must agree to the terms of these licenses, in addition to the DSpace 6 | source code license, in order to use this software. 7 | 8 | -------------------------------------------------- 9 | Third party python libraries listed by License type 10 | [Format: Name (scripts/package) - URL] 11 | -------------------------------------------------- 12 | 13 | MIT-0 License (http://opensource.org/licenses/MIT) 14 | * DeepChem (split) - https://github.com/deepchem/deepchem/blob/master/LICENSE 15 | * ChemBERTa-2 (MolPROP) - https://huggingface.co/DeepChem 16 | * torch_geometric (MolPROP) - https://github.com/pyg-team/pytorch_geometric/blob/master/LICENSE 17 | 18 | Apache Software License, Version 2.0 (http://opensource.org/licenses/apache2.0) 19 | * BERT langauge model architecture (MolPROP) - https://github.com/google-research/bert 20 | * kornia (MolPROP) - https://github.com/kornia/kornia/blob/master/LICENSE 21 | * ray (train_tune) - https://github.com/ray-project/ray/blob/master/LICENSE 22 | * tensorboard (train_tune) - https://github.com/tensorflow/tensorboard/blob/master/LICENSE 23 | 24 | GNU GPL 3 (https://www.gnu.org/licenses/gpl.txt) 25 | * RoBERTa (MolPROP) - https://github.com/dreamer/roberta/blob/master/LICENSE 26 | 27 | BSD License (http://www.opensource.org/licenses/bsd-license.php) 28 | * RDKit (MolPROP) - https://github.com/rdkit/rdkit/blob/master/license.txt 29 | * Biopython (MolPROP) - https://github.com/biopython/biopython/blob/master/LICENSE.rst 30 | * Pytorch (DataLoader) - https://github.com/intel/torch/blob/master/LICENSE.md 31 | * Sci-kit learn (MolPROP) - https://github.com/scikit-learn/scikit-learn/blob/main/COPYING 32 | * numpy (MolPROP) - https://github.com/numpy/numpy/blob/main/LICENSE.txt 33 | * pandas (split) - https://github.com/pandas-dev/pandas/blob/main/LICENSE 34 | 35 | Creative Commons License (https://creativecommons.org/licenses/by/4.0/) 36 | * TR-MISR (MolPROP) - https://github.com/Suanmd/TR-MISR 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MolPROP: Molecular Property Predictions with Multimodal Language and Graph Fusion 2 | 3 | fuses molecular language and graph representation for property prediction 4 | 5 | current models include: 6 | - language -- ChemBERTa-2-77M-MLM, ChemBERTa-2-77M-MTR 7 | - graph -- GCN, GAT, GATv2 8 | 9 | ``` 10 | @article{rollins2024, 11 | title = {{MolPROP}: {Molecular} {Property} {Prediction} with {Multimodal} {Language} and {Graph} {Fusion}}, 12 | journal = {Journal of Cheminformatics}, 13 | author = {Rollins, Zachary A and Cheng, Alan C and Metwally, Essam}, 14 | month = may, 15 | year = {2024}} 16 | ``` 17 | 18 | ![image info](pics/fig1.png) 19 | 20 | ## requirements 21 | - git lfs (for locally stored language models) 22 | - install git lfs for your machine: https://github.com/git-lfs/git-lfs 23 | - chemberta requires local installation to config: https://huggingface.co/DeepChem 24 | - molprop.yaml 25 | ``` 26 | conda env create --name molprop --file molprop.yaml 27 | ``` 28 | 29 | ## preprocess data 30 | ### data splitting 31 | - data splits to reproduce manuscript results are provided: ./data/*csv 32 | - data should be in data directory (e.g., csv format) and organized such that SMILES string and endpoints are in separate columns 33 | - data can be split using the Bemis-Murko scaffold split implemented by DeepChem 34 | - explore the help options to execute the data split procedure 35 | ``` 36 | python ./data/split.py --help 37 | ``` 38 | ## train and hyperparameter tune 39 | - training and tuning execution is specified by the configuration files (config/setup.json) 40 | ``` 41 | python ./src/train_tune.py 42 | ``` 43 | 44 | ### hyperparameter tune 45 | - setup["training"]["ray_tune"] == True 46 | - specify hyperparameter search space in the '__main__' of ./src/train_tune.py 47 | - ray cluster must be initialized before hyperparameter tuning execution 48 | - submit PBS script with specified num_cpus and num_gpus 49 | - start ray cluster 50 | ``` 51 | ray start --head --num-cpus=8 --num-gpus=4 --temp-dir="/absolute/path/to/temporary/storage/" 52 | python ./src/train_tune.py 53 | ``` 54 | 55 | ### inference and holdout 56 | - test trained/validated models on holdout by specifying config/setup.json 57 | - MolPROP models trained on data in manuscript are located in models/weights 58 | - setup['holdout']['model_path'] 59 | - setup['holdout']['holdout_data_path] 60 | ``` 61 | python ./src/holdout.py 62 | ``` 63 | 64 | 65 | ### logging information and model storage 66 | - train_tune.log files are recorded and saved for every time stamped batch run 67 | - runs are also recorded on tensorboard 68 | - ***** = unqiue file identifier (e.g., time stamp or number) 69 | ``` 70 | logs/batch_*****/train_tune.log 71 | logs/batch_*****/events.out.tfevents.***** (setup["training"]["ray_tune"] == False) 72 | logs/batch_*****/ray_tune/hp_tune_*****/checkpoint_*****/events.out.tfevents.***** (setup["training"]["ray_tune"] == True) 73 | ``` 74 | 75 | - hyperparameter tune runs are implemented by ray tune and models are stored 76 | - non-hyperparameter tuned models are also stored 77 | ``` 78 | logs/batch_*****/ray_tune/hp_tune_*****/checkpoint_*****/dict_checkpoint.pkl (setup["training"]["ray_tune"] == True) 79 | models/weights/batch_*****/MOLPROP*****.pth (setup["training"]["ray_tune"] == False) 80 | ``` 81 | 82 | # License 83 | MolPROP fuses molecular language and graph for property prediction. 84 | Copyright © 2023 Merck & Co., Inc., Rahway, NJ, USA and its affiliates. All rights reserved. 85 | 86 | This program is free software: you can redistribute it and/or modify 87 | it under the terms of the GNU General Public License as published by 88 | the Free Software Foundation, either version 3 of the License, or 89 | (at your option) any later version. 90 | 91 | This program is distributed in the hope that it will be useful, 92 | but WITHOUT ANY WARRANTY; without even the implied warranty of 93 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 94 | GNU General Public License for more details. 95 | 96 | You should have received a copy of the GNU General Public License 97 | along with this program. If not, see . 98 | -------------------------------------------------------------------------------- /config/setup.json: -------------------------------------------------------------------------------- 1 | { 2 | "paths": { 3 | "data_directory": "data", 4 | "checkpoint_dir": "models/weights", 5 | "log_dir": "logs/", 6 | "precision": 32, 7 | "use_atomsets": "hDIST", 8 | "chemprop_maps": false, 9 | "median_ref": false, 10 | 11 | "**precision": "16, 32, or 64", 12 | "**use_atomsets": "hDIST", 13 | "**chemprop_maps": "add, channel, or false (if channel, encoder: in_channels = # of atomsets + 1)", 14 | "**median_ref": "true or false (if true, encoder: in_channels = (# of atomsets + resprop_maps) * 2)" 15 | }, 16 | 17 | "network": { 18 | "encoder": { 19 | "in_channels": 1, 20 | "num_layers" : 2, 21 | "kernel_size": 3, 22 | "n_hidden_fusion": 64 23 | }, 24 | "transformer": { 25 | "depth": 1, 26 | "heads": 8, 27 | "mlp_dim": 32, 28 | "dropout": 0.4849075056273958 29 | }, 30 | "decoder": { 31 | "kernel_size": 1 32 | }, 33 | "language": { 34 | "model": "chemberta-77m-mlm", 35 | "graph": "GATv2", 36 | "drop_rate": 0.25689688702296715, 37 | "freeze": false, 38 | "freeze_layer_count": 1, 39 | "mode": "continuous", 40 | "num_predictions": 1, 41 | 42 | "**model": "chemberta-77m-mlm (3), chemberta-77m-mtr (3), none, chemberta-77m-mlm-only (3), chemberta-77m-mtr-only (3)", 43 | "**graph": "false, GCN, GAT, GATv2", 44 | "**freeze_layer_count": "chemberta (0-3)", 45 | "**mode": "continuous or discrete" 46 | } 47 | }, 48 | 49 | "training": { 50 | "dataset": "train_qm7.csv", 51 | "train_all_data": false, 52 | "kfold": 10, 53 | "val_proportion": false, 54 | "ray_tune": true, 55 | "seed": 123, 56 | "verbose": false, 57 | "load_checkpoints": false, 58 | 59 | 60 | "min_loss": 5, 61 | "best_val_score": 300, 62 | 63 | "num_epochs": 50, 64 | "batch_size": 16, 65 | "n_workers": 0, 66 | 67 | "ens_L": 1, 68 | "set_L": 1, 69 | 70 | "lr_coder": 0.00015686882058242425, 71 | "lr_transformer": 1.2957759237665303e-06, 72 | "lr_lang": 1.2895820606332682e-08, 73 | "lr_strategy": 3, 74 | "lr_step": 1, 75 | "lr_decay": 0.8787585778193224, 76 | 77 | "combine_losses": false, 78 | "loss": "L3", 79 | "alpha1": 0.5, 80 | "alpha2": 0.5, 81 | 82 | "**lr_strategy": "0 = ReducePlateau, 1 = CosineAnneal, 2 = StepDecay, 3 = linear warmup inverse sqrt decay, 4 = Constant", 83 | "**losses": "L1 or L2 or combination (alpha1*L1 + alpha2*L2) or L3 (RMSE) or focal or BCE" 84 | }, 85 | 86 | "holdout":{ 87 | "dataset": "holdout_bbbp.csv", 88 | "model_path": "./logs/batch_16_time_2023-11-17-17-44-45-705974/", 89 | "atomsets": "hDIST", 90 | "filename": "hp_tune_92db243c_qm7_chemberta-77m-mlm_ensL1_kf10_epoch37_GATv2_32bit", 91 | "validation": false, 92 | "ray_tune": "hp_tune_cbfe0eae_38_lr_ens_coder=0.0006,lr_ens_transformer=0.0000,lr_lang=0.0000,lr_step=1,lr_strategy=3,kernel_size=1,in_channels_2023-11-18_07-55-50", 93 | "checkpoint": "checkpoint_000037", 94 | "resprop_maps": false, 95 | "validated_model": false, 96 | "epoch_num": false, 97 | "holdout_data_path": "data" 98 | } 99 | } -------------------------------------------------------------------------------- /data/holdout_bace.csv: -------------------------------------------------------------------------------- 1 | ,X,y,w,ids 2 | 0,BACE_410,1,1.0,O1CCC(CC1)CNC(=O)C(Cc1cc2cc(ccc2nc1N)-c1ccccc1C)C 3 | 1,BACE_390,1,1.0,O1c2ncc(cc2C([NH2+]CC(O)C2NC(=O)C=3C=CC(=O)N(CCCCc4cc(C2)ccc4)C=3)CC12CCC2)CC(C)(C)C 4 | 2,BACE_389,1,1.0,O1c2ncc(cc2C([NH2+]CC(O)C(NC(=O)COC)Cc2cc(ccc2)-c2occn2)CC12CCC2)CC(C)(C)C 5 | 3,BACE_387,1,1.0,O1c2ncc(cc2C([NH2+]CC(O)C(NC(=O)COC)Cc2cc(ccc2)-c2ncccc2)CC12CCC2)CC(C)(C)C 6 | 4,BACE_383,1,1.0,S1(=O)(=O)N(c2cc(cc3n(cc(CC1)c23)CC)C(=O)NC([C@H](O)C[NH2+]C1CC1)Cc1ccccc1)C 7 | 5,BACE_380,1,1.0,Fc1cc(cc(F)c1)CC(NC(=O)C)C(O)C[NH2+]C1(CC\C(=N\O)\CC1)c1cc(ccc1)C(C)(C)C 8 | 6,BACE_370,1,1.0,S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)C1)C(=O)NC(Cc1ccccc1)C(O)C[NH2+]Cc1cc(OC)ccc1)C 9 | 7,BACE_368,1,1.0,S1(Oc2cc(cc3c2n(cc3CC)CC1)C(=O)NC([C@H](O)C[NH2+]Cc1cc(ccc1)C(F)(F)F)Cc1ccccc1)(=O)=O 10 | 8,BACE_363,1,1.0,S1(=O)(=O)CC(Cc2cc(OC(C(F)(F)F)C(F)(F)F)c(N)c(F)c2)C(O)C([NH2+]Cc2noc(c2)CC(C)(C)C)C1 11 | 9,BACE_362,1,1.0,S(=O)(=O)(N(C)c1cc(cc(c1)C(=O)NC(C)c1ccc(F)cc1)-c1oc(cn1)C([NH3+])(Cc1ccccc1)C)C 12 | 10,BACE_359,1,1.0,s1cc(nc1)-c1cc(ccc1)CC(NC(=O)COC)C(O)C[NH2+]C1CC2(Oc3ncc(cc13)CC(C)(C)C)CCC2 13 | 11,BACE_351,1,1.0,Fc1c2c(ccc1)C(N=C2N)(C=1C=C(C)C(=O)N(C=1)C)c1cc(ccc1)C#CC1CC1 14 | 12,BACE_349,1,1.0,S1(=O)(=O)CC(Cc2cc(OC(C(F)(F)F)C(F)(F)F)c(N)c(F)c2)C(O)C([NH2+]Cc2cc3c(COC3(C)C)cc2)C1 15 | 13,BACE_346,1,1.0,S(=O)(=O)(N(c1cc(cc(N2CCCC2=O)c1)C(=O)NC(Cc1ccccc1)C(O)C[NH2+]Cc1cc(ccc1)C(F)(F)F)c1ccccc1)C 16 | 14,BACE_343,1,1.0,O1c2c(cc(cc2)CC)C([NH2+]CC(O)C2NC(=O)C=3C=C(c4ncccc4)C(=O)N(CCCCc4cc(C2)ccc4)C=3)CC12CCC2 17 | 15,BACE_342,1,1.0,Fc1cc(cc(F)c1)CC(NC(=O)C(N1CCC(NC(=O)C)(C(CC)C)C1=O)CCc1ccccc1)C(O)C1[NH2+]CC(Oc2cccnc2)C1 18 | 16,BACE_336,1,1.0,s1cncc1-c1cc(ccc1)CC(NC(=O)COC)C(O)C[NH2+]C1CC2(Oc3ncc(cc13)CC(C)(C)C)CCC2 19 | 17,BACE_330,1,1.0,S1(=O)(=O)N(c2cc(cc3n(cc(CC1)c23)CC)C(=O)NC([C@H](O)CNc1ccccc1)Cc1ccccc1)C 20 | 18,BACE_323,1,1.0,S(=O)(=O)(N(c1cc(cc(c1)C(=O)NC(Cc1ccccc1)C(O)C[NH2+]Cc1cc(ccc1)C(F)(F)F)C1CCCC1)c1ccccc1)C 21 | 19,BACE_305,1,1.0,O1c2ncc(cc2C([NH2+]CC(O)C2NC(=O)C=3C=CC(=O)N(CCCc4cc(C2)ccc4)C=3)CC12CCC2)CC(C)(C)C 22 | 20,BACE_303,1,1.0,Fc1cc(cc(F)c1)CC(NC(=O)C(N1CCC(NC(=O)C)(C(CC)C)C1=O)CCc1ccccc1)C(O)C1[NH2+]CC(Oc2ncccc2)C1 23 | 21,BACE_302,1,1.0,Fc1cc(cc(F)c1)CC(NC(=O)C(N1CCC(NC(=O)C)(C(CC)C)C1=O)CCc1ccccc1)C(O)C1[NH2+]CC(Oc2ccccc2)C1 24 | 22,BACE_299,1,1.0,O1c2c(cc(cc2)CC)C([NH2+]CC(O)C2NC(=O)C=3C=C(N4CCCC4)C(=O)N(CCCCc4cc(C2)ccc4)C=3)CC12CCC2 25 | 23,BACE_297,1,1.0,Fc1c2c(ccc1)C(N=C2N)(C=1C=C(C)C(=O)N(C=1)CC)c1cc(NC(=O)c2ncccc2)ccc1 26 | 24,BACE_284,1,1.0,Fc1cc(cc(F)c1)CC(NC(=O)C(N1CCC(NC(=O)C)(C(CC)C)C1=O)CCc1ccccc1)C(O)C1[NH2+]CC(OCc2ncccc2)C1 27 | 25,BACE_283,1,1.0,Fc1cc(cc(F)c1)CC(NC(=O)C(N1CCC(NC(=O)C)(C(CC)C)C1=O)CCc1ccccc1)C(O)C1[NH2+]CC(OCc2cccnc2)C1 28 | 26,BACE_282,1,1.0,O1CCCCOc2cc(CC(NC(=O)c3cc(N4CCCC4=O)cc1c3)C(O)C[NH2+]C1(CC1)c1cc(ccc1)C(C)C)ccc2 29 | 27,BACE_271,1,1.0,Oc1ccc(cc1C1CCCCC1)CC[NH3+] 30 | 28,BACE_268,1,1.0,s1cc(cc1)-c1cc2c(nc(N)cc2)cc1 31 | 29,BACE_267,1,1.0,O=C(N(C)C1CCCCC1)CCc1cc2c(nc1N)cccc2 32 | 30,BACE_266,1,1.0,O=C(NCC1CCCCC1)CCc1cc2c(nc1N)cccc2 33 | 31,BACE_264,1,1.0,O=C1NC(CN1Cc1ccccc1)(Cc1ccccc1)C(=O)NC(Cc1ccccc1)C(O)C[NH2+]Cc1cc(N(C)C)ccc1 34 | 32,BACE_263,1,1.0,O=C1N(C)C(=[NH2+])NC1(c1ccccc1)c1ccccc1 35 | 33,BACE_262,1,1.0,O(c1cc2CN(C(CCC(=O)N(C)C3CCCCC3)C3CCCCC3)C(=[NH+]c2cc1)N)c1ccccc1 36 | 34,BACE_261,1,1.0,O=C1N(C)C(=[NH2+])NC1(c1cc(ccc1)-c1cccnc1)c1ccccc1 37 | 35,BACE_260,1,1.0,O(C)c1cc(ccc1)CN1CC(NC1=O)C(=O)NC(Cc1ccccc1)C(O)C[NH2+]Cc1cc(N(C)C)ccc1 38 | 36,BACE_258,1,1.0,O(C)c1cc(ccc1)-c1cc(ccc1)C1(NC(=[NH2+])N(C)C1=O)c1ccccc1 39 | 37,BACE_257,1,1.0,s1c(ccc1-c1cc(C#N)c(F)cc1)C12N=C(N)N(C)C(=O)C1CN(C2)c1ccccc1 40 | 38,BACE_255,1,1.0,s1cc(cc1C1(N=C(N)N(C)C(=O)C1)C)-c1cc(ccc1)C#N 41 | 39,BACE_252,1,1.0,Fc1ccc(NC(=O)C(CC([NH3+])C(NC(=O)c2cc(ccc2)C(=O)N(CCC)CCC)Cc2ccccc2)C)cc1 42 | 40,BACE_251,1,1.0,O1CCCCCCNC(=O)CC(NC(=O)C(NC1=O)C(C)C)C(=O)NC(C(O)CC(C(=O)NC(C(C)C)C(=O)NCc1ccccc1)C)CC(C)C 43 | 41,BACE_247,1,1.0,O=C(c1cc2CN(CCC(=O)N(C)C3CCCCC3)C(=[NH+]c2cc1)N)c1ccccc1 44 | 42,BACE_242,1,1.0,s1cc(cc1C12N=C(N)N(C)C(=O)C1CN(C2)C(=O)c1ccccc1)-c1cc(ccc1)C#N 45 | 43,BACE_241,1,1.0,S(=O)(=O)(N(C)c1cc(cc(c1)C(=O)NC(Cc1ccccc1)C(O)C[NH2+]Cc1cc(OC)ccc1)C(=O)NC(C)c1ccccc1)C 46 | 44,BACE_240,1,1.0,Clc1cc(sc1C1(NC(=[NH2+])N(C)C(=O)C1)C)-c1cc(cnc1)C#CC 47 | 45,BACE_239,1,1.0,S(=O)(=O)(N(C)c1cc(cc(c1)C(=O)NC(C(O)CC(C(=O)NC(C(C)C)C(=O)NC(C)C)C)CC(C)C)C(=O)NC(C)c1ccccc1)C 48 | 46,BACE_235,1,1.0,S(=O)(=O)(N1CC([NH2+]CC1)C(O)C(NC(=O)c1cc(cc(c1)C)C(=O)N1CCCC1COC)Cc1cc(F)cc(F)c1)c1cc(ccc1)C 49 | 47,BACE_234,1,1.0,Fc1cc(cc(F)c1)CC(NC(=O)c1cc(cc(c1)C)C(=O)N1CCCC1COC)C(O)C1[NH2+]CC(Oc2ccccc2)C1 50 | 48,BACE_232,1,1.0,S(=O)(=O)(CC(NC(OCn1nc(cc1C)C)=O)C(=O)NC(C(O)CC(C(=O)NC(C(C)C)C(=O)NCC(C)C)C)CC(C)C)C 51 | 49,BACE_231,1,1.0,OC(C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C([NH3+])Cc1c2c([nH]c1)cccc2)Cc1c2c([nH]c1)cccc2)CO)CCC(=O)[O-])C(C)C)CC(=O)N)CC(C)C)CC(C(=O)NC(C(=O)NC(CCC(=O)[O-])C(=O)NC(Cc1ccccc1)C(=O)[O-])C)C 52 | 50,BACE_205,0,1.0,O=C1NC(=NC(=C1)CCC)N 53 | 51,BACE_204,0,1.0,O1CC[NH+](CC1)CC(O)COc1cc(ccc1C)C 54 | 52,BACE_203,0,1.0,n1c2c(ccc1N)cccc2 55 | 53,BACE_202,0,1.0,n1ccc2c(cccc2)c1N 56 | 54,BACE_201,0,1.0,Clc1cc(ccc1)CCc1cccnc1N 57 | 55,BACE_200,0,1.0,n1c2c(nc(N)c1N1CCCC1)cccc2 58 | 56,BACE_196,0,1.0,Fc1ccc(cc1)CC1CC[NH2+]CC1 59 | 57,BACE_194,0,1.0,n1cccc(NCc2ccccc2)c1N 60 | 58,BACE_193,0,1.0,o1cccc1C[N@H+](Cc1cc(ccc1)CCc1cccnc1N)C 61 | 59,BACE_191,0,1.0,O=C1NC(=NC(=C1)CCc1cc2[nH]ccc2cc1)N 62 | 60,BACE_190,0,1.0,O1CCC(OC(=O)[C@@H]2[NH2+]C[C@]3(C2)c2c(NC3=O)cccc2)CC1 63 | 61,BACE_188,0,1.0,[NH+]=1[C@](N=C(c2ccccc2)C=1N)(C)c1cc(ccc1)-c1cccnc1 64 | 62,BACE_187,0,1.0,O1[C@@H]2COCC[C@@]2(N=C1N)c1cc(ccc1)-c1cncnc1 65 | 63,BACE_186,0,1.0,[nH]1c2cc(ccc2cc1)CCc1nc(N)ccc1 66 | 64,BACE_182,0,1.0,S1CC[C@@](N=C1N)(C)c1ccc(OC)cc1 67 | 65,BACE_178,0,1.0,O=C([O-])[C@@H](\[NH+]=C\1/NC(Cc2c/1cccc2)(C)C)Cc1ccccc1 68 | 66,BACE_177,0,1.0,Clc1cc2nc(n(c2cc1)CCCC(=O)N(C)C1CCCCC1)N 69 | 67,BACE_176,0,1.0,O(C(=O)[C@@H]1[NH2+]C[C@]2(C1)c1c(NC2=O)cccc1)C1(CCCCC1)C#N 70 | 68,BACE_175,0,1.0,O(C)c1cc(ccc1)-c1cc(ccc1)CCc1nc(N)ccc1 71 | 69,BACE_174,0,1.0,S(=O)(=O)(Nc1cc(Nc2ncnc(c2)-c2ccccc2)ccc1C)C 72 | 70,BACE_173,0,1.0,Brc1cc2c(SCC[C@@H]2OC(=O)[C@@H]2[NH2+]C[C@]3(C2)c2c(NC3=O)cccc2)cc1 73 | 71,BACE_172,0,1.0,O=C1N(C)C(N[C@](C1)(C)[C@@H]1C[C@H]1c1ccccc1)=N 74 | 72,BACE_168,0,1.0,[nH]1c2cc(ccc2cc1)-c1cc(ccc1)CNc1cccnc1N 75 | 73,BACE_167,0,1.0,Clc1cc2nc(n(c2cc1)[C@H](CC(=O)NC1C2CC3CC1CC(C2)C3)CC)N 76 | 74,BACE_166,0,1.0,O(C)c1ccc(cc1)[C@@]1([NH+]=C(N)CC1)c1cc(ccc1)-c1cncnc1 77 | 75,BACE_164,0,1.0,O=C1N(Cc2ccc(cc2)CNC(=O)Nc2ccc(cc2)C#N)C(N[C@@]1(CC(C)C)C)=N 78 | 76,BACE_163,0,1.0,Clc1cc(ccc1)-c1cc(ccc1)[C@@]1(OCCC(=[NH+]1)N)C 79 | 77,BACE_159,0,1.0,O(Cc1ncccc1)c1ccc(cc1CNc1cccnc1N)-c1cc2[nH]ccc2cc1 80 | 78,BACE_158,0,1.0,Brc1cc2c(S(=O)(=O)CC[C@@H]2OC(=O)[C@@H]2[NH2+]C[C@]3(C2)c2c(NC3=O)cccc2)cc1 81 | 79,BACE_157,0,1.0,O=C(NC(=[NH2+])N)Cn1c(ccc1-c1ccccc1)-c1ccccc1 82 | 80,BACE_154,0,1.0,O(C)c1ccc(cc1)[C@@]1([NH+]=C(N)[C@H](OC)C1)c1cc(ccc1)-c1cccnc1 83 | 81,BACE_152,0,1.0,Fc1cc(N2C3(CC[NH+](CC3)Cc3ccc(NC(=O)C)cc3)C(=NC2=O)NC2CCCCC2)ccc1 84 | 82,BACE_150,0,1.0,S(=O)(=O)(CCCCC)c1cc(ccc1)C(=O)N[C@H]([C@H](O)C[NH2+][C@H](C(=O)NC1CCCCC1)C)Cc1ccccc1 85 | 83,BACE_149,0,1.0,S(=O)(=O)(N(C)c1nc2CC[C@@H](CCc3cc(C[C@]([NH3+])(C)C(OCc(c1)c2)=O)ccc3)C)C 86 | 84,BACE_147,0,1.0,S(Oc1cc(cc(OCC(=O)NCCCCC[NH3+])c1)C(=O)N[C@H](C)c1ccc(F)cc1)(=O)(=O)Cc1ccccc1 87 | 85,BACE_145,0,1.0,Fc1cc(cc(F)c1)C[C@H](NC(=O)c1cc(cc(c1)C)C(=O)N(CCC)CCC)[C@H](O)[C@@H]1[NH2+]CCN(C1)C(=O)c1ccccc1 88 | 86,BACE_144,0,1.0,Clc1cc(cc(Cl)c1NC(=O)C)C\N=C(\NC(=O)Cn1c2c(cccc2)cc1)/N 89 | 87,BACE_143,0,1.0,Brc1cc(ccc1O)C[C@@H]1CS(=O)(=O)C[C@H]([NH2+]Cc2cc(ccc2)C2CC2)[C@H]1O 90 | 88,BACE_141,0,1.0,Clc1cc2CC([NH+]=C(N[C@@H](Cc3ccccc3)C3=NOC(=O)[N-]3)c2cc1)(C)C 91 | 89,BACE_140,0,1.0,Clc1ccccc1-c1n(CC(=O)NC(=[NH2+])N)c(cc1)-c1ccc(Oc2ccc(cc2)C(=O)C)cc1 92 | 90,BACE_139,0,1.0,s1c2c(nc1C)C(=[NH+][C@@]2(c1cc(ccc1)-c1cncnc1)c1ccc(OC)cc1)N 93 | 91,BACE_137,0,1.0,O=C1N(C)C(N[C@@]1(CC1CCCCC1)CCC1CCCCC1)=N 94 | 92,BACE_135,0,1.0,Brc1cc(ccc1)C(=O)Nc1ccc(cc1)-c1n(CC(=O)NC(=[NH2+])N)c(cc1)-c1ccccc1 95 | 93,BACE_134,0,1.0,O=C1N[C@@H](C[C@@H](CCCCCCCC(=O)N[C@H]1C)C)[C@@H](O)C[C@H](C(=O)NCCCC)C 96 | 94,BACE_133,0,1.0,Clc1ccccc1-c1scc(-c2ccc(OCCC)cc2)c1CC(=O)NC(=[NH2+])N 97 | 95,BACE_130,0,1.0,Clc1cc2CC([NH+]=C(N[C@@H](Cc3ccccc3)C=3NC(=O)C(=CN=3)C#N)c2cc1)(C)C 98 | 96,BACE_129,0,1.0,Clc1cc2CC([NH+]=C(N[C@@H](Cc3cscc3-c3cn[nH]c3)C(=O)[O-])c2cc1)(C)C 99 | 97,BACE_124,0,1.0,Clc1ccc(nc1)C(=O)Nc1cc(ccc1)[C@]1([NH+]=C(N)CN(C)C1=O)C 100 | 98,BACE_121,0,1.0,FC(F)(F)c1cc(ccc1)C[NH2+]C[C@@H](O)[C@@H](NC(=O)c1cc(N2CCCC2=O)c2nccc(c2c1)CC)Cc1ccccc1 101 | 99,BACE_117,0,1.0,O=C(NC[C@H]([NH3+])C(=O)N[C@@H](C(C)C)C(=O)N[C@@H](CC(C)C)C(=O)NC[C@](O)(CCc1ccccc1)C(=O)Nc1cc(ccc1)C(=O)[O-])c1nnn[n-]1 102 | 100,BACE_115,0,1.0,S1C[C@H](NC(=O)[C@@H](NC(=O)[C@@H]2[NH2+]CCC[C@@H]2SC\C=C\C1)C)[C@@H](O)C[C@H](C(=O)NCCCC)C 103 | 101,BACE_114,0,1.0,O(C(=O)[C@H]1N(CCC1)C(=O)[C@H](N(C(=O)[C@@H](NC(=O)[C@@H](NC(=O)C[C@H](O)[C@@H](NC(=O)[C@@H](N(C(=O)[C@@H](NC(=O)[C@@H](O)C)C(C)C)C)CCC(=O)N)CC1CCCCC1)CC(C)C)C)C)CC1CCCCC1)C 104 | 102,BACE_112,0,1.0,O=C1N(CCC1)c1cc(cc(NCC)c1)C(=O)N[C@H]([C@H](O)C[NH2+]C1CCCCC1)Cc1ccccc1 105 | 103,BACE_111,0,1.0,Clc1cc2CC([NH+]=C(N[C@@H](Cc3ccccc3)C=3NC(=O)c4c(N=3)ccnc4)c2cc1)(C)C 106 | 104,BACE_109,0,1.0,S1C[C@H](NC(OC(C)(C)C)=O)C(=O)N[C@@H](C)C(=O)N[C@@H](CSC\C=C\C1)[C@@H](O)C[C@H](C(=O)N[C@@H](C(C)C)C(=O)NCCCC)C 107 | 105,BACE_108,0,1.0,S1(=O)(=O)C[C@@H](Cc2cc(C[C@@H]3N(CCC)C(OC3)=O)c(O)cc2)[C@H](O)[C@@H]([NH2+]Cc2cc(ccc2)C(C)C)C1 108 | 106,BACE_106,0,1.0,S1(=O)(=O)C[C@@H](Cc2cc(F)c3NCC4(CCC(F)(F)CC4)c3c2)[C@H](O)[C@@H]([NH2+]Cc2cc(ccc2)C(C)(C)C)C1 109 | 107,BACE_105,0,1.0,S(=O)(=O)(N[C@@H]1C[C@H](C[C@@H](C1)C(=O)N[C@H]([C@@H](O)CC(=O)N[C@@H](CC(C)C)C(=O)N[C@H](C(=O)N([C@H](CC1CCCCC1)C(=O)N1CCC[C@H]1C(OC)=O)C)C)CC1CCCCC1)C(=O)N[C@H](C)C1CCCCC1)C 110 | 108,BACE_104,0,1.0,Fc1cc(cc(F)c1)C[C@H](NC(=O)c1cc(cc(c1)C)C(=O)N(CCC)CCC)[C@H](O)[C@@H]1[NH2+]CC[N@@H+](C1)Cc1ccccc1 111 | 109,BACE_103,0,1.0,S(=O)(=O)(C(CCC)CCC)C[C@@H](NC(OCc1ccccc1)=O)C(=O)N[C@H]([C@H](O)C[NH2+]Cc1cc(OC)ccc1)Cc1ccccc1 112 | 110,BACE_100,1,1.0,S(=O)(=O)(N(C)c1cc(cc(c1)C(=O)N[C@H]([C@H](O)C[NH2+]C1CC1)Cc1ccccc1)C(=O)c1ccccc1)C 113 | 111,BACE_99,1,1.0,Clc1ccc(nc1)C(=O)Nc1cc([C@]2([NH+]=C(N)[C@@H]3[C@H](C2)C3)C)c(F)cc1 114 | 112,BACE_94,1,1.0,[NH+]=1[C@](N=C(C)C=1N)(C1CC1)c1cc(ccc1)-c1cc(cnc1)C#CC 115 | 113,BACE_93,1,1.0,O1C[C@H](NC(=O)c2cc(cc(c2)[C@@H](O)[NH2+][C@H](C\C=C\C1)c1ccccc1)C)[C@H](O)C[NH2+]Cc1cc(ccc1)C(C)C 116 | 114,BACE_90,1,1.0,FC(F)(F)c1cc(ccc1)C[NH2+]C[C@@H](O)[C@@H](NC(=O)C=1C=C(N2CCCC2=O)C(=O)N(C=1)C1CCCC1)Cc1ccccc1 117 | 115,BACE_88,1,1.0,O=C(N)CC[C@H](N(C(=O)[C@@H](NC(=O)[C@@H](O)C)C(C)C)C)C(=O)N[C@H]([C@@H](O)CC(=O)N[C@@H](CC(C)C)C(=O)N[C@H](C(=O)N([C@H](CC1CCCCC1)C(=O)[O-])C)C)CC1CCCCC1 118 | 116,BACE_87,1,1.0,S(=O)(=O)(N(C)c1cc(cc(c1)C(=O)N[C@H]([C@H](O)[C@@H]1[NH2+]C(N(C)C1=O)(C)C)Cc1ccccc1)C(=O)N[C@H](C)c1ccc(F)cc1)C 119 | 117,BACE_84,1,1.0,Fc1cc(cc(F)c1)C[C@H](NC(=O)C)[C@H](O)C[NH2+]C1(CCCCC1)c1cc(-n2nccc2)ccc1 120 | 118,BACE_83,1,1.0,S(=O)(=O)(N(C)c1cc(cc(c1)C(=O)N[C@H]([C@@H](O)CC(=O)N[C@@H](CC(C)C)C(=O)N[C@H](C(=O)N([C@H](Cc1ccccc1)C(=O)N1CCC[C@H]1C(OC)=O)C)C)Cc1ccccc1)C(=O)N[C@H](C)c1ccccc1)C 121 | 119,BACE_81,1,1.0,O=C1N(CC[C@]12[N@H+](CCC2)CCC)[C@H](C(=O)N[C@H]([C@H](O)C[NH2+]Cc1cc(ccc1)C(C)C)Cc1ccccc1)C 122 | 120,BACE_75,1,1.0,O(C(C)(C)C)C(=O)N[C@@H](C(C)C)C(=O)N([C@@H](CCC(=O)N)C(=O)N[C@H]([C@@H](O)CC(=O)N[C@@H](CC(C)C)C(=O)N[C@H](C(=O)N([C@H](Cc1ccccc1)C(=O)N1CCC[C@@H]1C(OC)=O)C)C)Cc1ccccc1)C 123 | 121,BACE_72,1,1.0,Clc1cc(cc(Cl)c1NC(=O)C)CNC(NC(=O)Cn1c2cc(ccc2cc1)C#N)=N 124 | 122,BACE_71,1,1.0,O=C(N[C@H]([C@H](O)C[NH2+][C@H]1CC(Cc2nn(cc12)CC(C)(C)C)(C)C)Cc1ccccc1)C 125 | 123,BACE_70,1,1.0,Fc1c(cccc1OC)-c1cc(ccc1)[C@]1([NH+]=C(N2CC(F)(F)CN=C12)N)c1ccncc1 126 | 124,BACE_66,1,1.0,O=C(N[C@H](C(=O)[O-])C)[C@@H]1CCC[C@H]1[C@H](O)[C@@H](NC(=O)[C@@H](NC(=O)[C@@H](NC(=O)[C@@H]([NH3+])CCC(=O)[O-])C(C)C)CC(=O)N)CC(C)C 127 | 125,BACE_65,1,1.0,S(=O)(=O)(N(C)c1cc(cc(c1)C(=O)N[C@H]([C@@H](O)CC(=O)N[C@@H](C(C)C)C(=O)Nc1cc(cc(c1)C(=O)[O-])C(=O)[O-])COc1cc(F)cc(F)c1)C(=O)N[C@H](C)c1ccccc1)C 128 | 126,BACE_64,1,1.0,O1c2c(cc(cc2)CC)[C@@H]([NH2+]C[C@@H](O)[C@H]2NC(=O)CCCCCCCc3cc(C2)ccc3)CC12CCC2 129 | 127,BACE_63,1,1.0,O=C1N(CCC1)c1cc(cc(NCC)c1)C(=O)N[C@H]([C@H](O)C[NH2+]C(CCCC(C)C)(C)C)Cc1ccccc1 130 | 128,BACE_60,1,1.0,O=C1N[C@@H](C[C@@H](CCCCCCCCC(=O)N(C)[C@H]1C)C)[C@H](O)C[NH2+]Cc1cc(ccc1)C(C)C 131 | 129,BACE_58,1,1.0,O1Cc2cc(cc(c2)C(=O)N[C@H](CCc2cc(C[C@]([NH3+])(C)C1=O)ccc2)c1ccccc1)-c1ccccc1C#N 132 | 130,BACE_57,1,1.0,O=C1N(Cc2ccc(cc2)CN2C[C@@H](NC2=O)CCC)C(N[C@@]1(CC1CCCCC1)CCC1CCCCC1)=N 133 | 131,BACE_56,1,1.0,S1(=O)(=O)N(c2cc(cc3N(CCN(CC1)c23)CC)C(=O)N[C@H]([C@H](O)C[NH2+]Cc1cc(OC)ccc1)Cc1ccccc1)C 134 | 132,BACE_53,1,1.0,O(C)c1cc(ccc1)C[NH2+]C[C@@H](O)[C@@H](NC(=O)c1cc(N2CCCC2=O)c2c(n(cc2)CC)c1)CC=1CCC=CC=1 135 | 133,BACE_47,1,1.0,O1c2c(cc(cc2)CC)[C@@H]([NH2+]C[C@@H](O)[C@H]2NC(=O)CCCCCCc3cc(C2)ccc3)CC12CCC2 136 | 134,BACE_43,1,1.0,S(=O)(=O)(N(C)c1cc(cc(c1)C(=O)N[C@H]([C@H](O)C[NH2+]C1CC1)Cc1ccccc1)C(=O)N[C@H](C)c1ccccc1)C 137 | 135,BACE_37,1,1.0,Fc1cc(cc(F)c1)C[C@H](NC(=O)C)[C@H](O)C[NH2+][C@]1(CCc2n[nH]cc2C1)c1cc(ccc1)C(C)(C)C 138 | 136,BACE_36,1,1.0,O(C)c1c(cc(cc1C)[C@@]1([NH+]=C(N)C(=N1)C)c1cc(ccc1)-c1cncnc1)C 139 | 137,BACE_35,1,1.0,S(=O)(=O)(N(C)c1cc(cc(c1)C(=O)N[C@H](C)c1ccc(F)cc1)-c1oc(nn1)[C@]([NH3+])(Cc1ccccc1)C)C 140 | 138,BACE_32,1,1.0,s1cc(cc1)C[C@H](NC(=O)c1cc(nc(NC[C@H]2C[C@@H]2C)c1)N(S(=O)(=O)C)C)[C@@H]([NH3+])CCCC 141 | 139,BACE_31,1,1.0,S(CC[C@H](NC(=O)[C@@H](NC(=O)C)CC(C)C)C(=O)N[C@H]([C@@H](O)[C@@H]1CC(=O)C[C@H]1C(=O)N[C@@H](C(C)C)C(=O)[NH2+]CCCC)CC(C)C)C 142 | 140,BACE_30,1,1.0,s1c2OC(C[C@H]([NH2+]C[C@@H](O)[C@@H](NC(=O)C)Cc3ccccc3)c2cc1CC(C)(C)C)(C)C 143 | 141,BACE_27,1,1.0,Clc1cc2CC([NH+]=C(N[C@@H](C[C@H]3[C@H](SC=C3c3cn[nH]c3)CCC)C(=O)[O-])c2cc1)(C)C 144 | 142,BACE_20,1,1.0,S(=O)(=O)(N1C[C@@H]([NH2+]CC1)[C@@H](O)[C@@H](NC(=O)c1cc(cc(c1)C)C(=O)N(CCC)CCC)Cc1cc(F)cc(F)c1)c1ccccc1 145 | 143,BACE_19,1,1.0,Fc1cc(cc(F)c1)C[C@H](NC(=O)c1cc(cc(c1)C)C(=O)N(CCC)CCC)[C@H](O)[C@@H]1[NH2+]CN(Cc2ccccc2)C1=O 146 | 144,BACE_17,1,1.0,Fc1cc(cc(F)c1)C[C@H](NC(=O)c1cc(cc(c1)C)C(=O)N(CCC)CCC)[C@H](O)[C@@H]1[NH2+]C[C@H](OCc2ccccc2)C1 147 | 145,BACE_12,1,1.0,Fc1cc(cc(F)c1)C[C@H](NC(=O)c1cc(cc(c1)C)C(=O)N(CCC)CCC)[C@H](O)[C@@H]1[NH2+]CCN(Cc2ccccc2)C1=O 148 | 146,BACE_11,1,1.0,Fc1cc(cc(F)c1)C[C@H](NC(=O)c1cc(cc(c1)C)C(=O)N(CCC)CCC)[C@H](O)[C@@H]1[NH2+]C[C@H](Oc2ccccc2)C1 149 | 147,BACE_10,1,1.0,O=C1N(CCCC1)C(C)(C)[C@@H]1C[C@@H](CCC1)C(=O)N[C@H]([C@H](O)C[NH2+]Cc1cc(ccc1)C(C)C)Cc1ccccc1 150 | 148,BACE_9,1,1.0,O1c2c(cc(cc2)CC)[C@@H]([NH2+]C[C@@H](O)[C@H]2NC(=O)C=3C=CC(=O)N(CCCCc4cc(C2)ccc4)C=3)CC12CCC2 151 | 149,BACE_8,1,1.0,Fc1c2c(ccc1)[C@@]([NH+]=C2N)(C=1C=C(C)C(=O)N(C=1)CC)c1cc(ccc1)-c1cc(cnc1)C#CC 152 | 150,BACE_7,1,1.0,S(=O)(=O)(CCCCC)C[C@@H](NC(=O)c1cccnc1)C(=O)N[C@H]([C@H](O)C[NH2+]Cc1cc(ccc1)CC)Cc1cc(F)cc(F)c1 153 | 151,BACE_1,1,1.0,O1CC[C@@H](NC(=O)[C@@H](Cc2cc3cc(ccc3nc2N)-c2ccccc2C)C)CC1(C)C 154 | -------------------------------------------------------------------------------- /data/holdout_bbbp.csv: -------------------------------------------------------------------------------- 1 | ,X,y,w,ids 2 | 0,Enalapril,0,1.0,CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N2CCC[C@H]2C(O)=O 3 | 1,Efavirenz,0,1.0,FC(F)(F)[C@]1(OC(=O)Nc2ccc(Cl)cc12)C#CC3CC3 4 | 2,Dihydroergotamine,0,1.0,CN1C[C@@H](C[C@H]2[C@H]1Cc3c[nH]c4cccc2c34)C(=O)N[C@]5(C)O[C@@]6(O)[C@@H]7CCCN7C(=O)[C@H](Cc8ccccc8)N6C5=O 5 | 3,nicotine,1,1.0,CN1CCC[C@H]1c2cccnc2 6 | 4,MPTP,1,1.0,CN1CCC(=CC1)c2ccccc2 7 | 5,Dexrazoxane,0,1.0,C[C@@H](CN1CC(=O)NC(=O)C1)N2CC(=O)NC(=O)C2 8 | 6,Dacarbazine,1,1.0,CN(C)N\N=C/1N=CN=C1C(N)=O 9 | 7,methyprylon,1,1.0,CCC1(CC)C(=O)NCC(C)C1=O 10 | 8,Abacavir,1,1.0,Nc1nc(NC2CC2)c3ncn([C@@H]4C[C@H](CO)C=C4)c3n1 11 | 9,9-C7,1,1.0,c1(c(cc2n(ncc2c1)CCN)Cl)Cl 12 | 10,9-C15,1,1.0,c1c2CCOc2c(cc1OC)CN[C@@H]1CCCN[C@H]1c1ccccc1 13 | 11,lorcainide,1,1.0,CC(C)N1CCC(CC1)N(C(=O)Cc2ccccc2)c3ccc(Cl)cc3 14 | 12,9-C1,1,1.0,n1(c(c2nc[nH]c2n(c1=O)C)=O)C 15 | 13,BBcpd10,0,1.0,c1(nccc(c1)c1nc([nH]n1)N)N(C)C 16 | 14,8-5,0,1.0,CN(C)Cc1oc(cc1)CSCCNC=1NC=C(CN1)Cc1cnc(cc1)C 17 | 15,kynurenic acid,1,1.0,OC(=O)C1=CC(=O)c2ccccc2N1 18 | 16,ketamine,1,1.0,CNC1(CCCCC1=O)c2ccccc2Cl 19 | 17,U50488,1,1.0,CN(C1CCCC[C@H]1N2CCCC2)C(=O)Cc3ccc(Cl)c(Cl)c3 20 | 18,tenoxicam,0,1.0,CN1C(=C(\O)Nc2ccccn2)/C(=O)c3sccc3[S]1(=O)=O 21 | 19,23,1,1.0,[NH]C(CC(C)C([N@@](C(C)(C)C)C(N)(C)N)(C)C)c1c(c(c[nH+][o+]1)C)[O-] 22 | 20,harman,1,1.0,Cc1nccc2c1[nH]c3ccccc23 23 | 21,harmalol,1,1.0,CC1=C2NC3=CC(=O)C=CC3=C2C=CN1 24 | 22,glaziovine,1,1.0,COc1cc2CCN(C)C3CC4(C=CC(=O)C=C4)c(c1O)c23 25 | 23,ethosuximide,1,1.0,CCC1(C)CC(=O)NC1=O 26 | 24,skb-i,0,1.0,c1cc2c(C(N[C@@H](CC)c3ccccc3)=O)c(c(nc2cc1)c1ccccc1)Cn1cncc1 27 | 25,skb-h,0,1.0,c1cc2c(c(c(c3ccccc3)nc2cc1)OCCNC([C@@H]1CCCN1)=O)C(=O)N[C@@H](CC)c1ccccc1 28 | 26,skb-g,0,1.0,c1cc2c(c(OCCNC(Cc3ncccc3)=O)c(c3ccccc3)nc2cc1)C(N[C@@H](CC)c1ccccc1)=O 29 | 27,skb-f,0,1.0,c1ccc2c(c(C(N[C@@H](CC)c3ccccc3)=O)c(OCCNC(Cc3c(cccc3)C(=O)O)=O)c(c3ccccc3)n2)c1 30 | 28,5-69,1,1.0,c1nc(C2CCN(CC2)C(NC2CCCCC2)=S)c[nH]1 31 | 29,droperidol,1,1.0,Fc1ccc(cc1)C(=O)CCCN2CCC(=CC2)N3C(=O)Nc4ccccc34 32 | 30,SB204484,1,1.0,c1(ccc(c(c1)Cl)Cl)CC(N1[C@H](CN2CCCC2)c2n(CC1)ncn2)=O 33 | 31,SB204454,0,1.0,c1(ccc(c(c1)Cl)Cl)CC(N1[C@H](C[C@]2(CC1)NC(NC2=O)=O)CN1CCCC1)=O 34 | 32,rufloxacin,0,1.0,CN1CCN(CC1)c2c(F)cc3C(=O)C(=CN4CCSc2c34)C(O)=O 35 | 33,Pirenzepine,1,1.0,CN1CCN(CC1)CC(=O)N2c3ccccc3C(=O)Nc4cccnc24 36 | 34,Delavirdine,0,1.0,CC(C)Nc1cccnc1N2CCN(CC2)C(=O)c3[nH]c4ccc(N[S](C)(=O)=O)cc4c3 37 | 35,roxindole,1,1.0,C[S](O)(=O)=O.Oc1ccc2[nH]cc(CCCCN3CCC(=CC3)c4ccccc4)c2c1 38 | 36,piroxicam,0,1.0,CN1C(=C(/O)Nc2ccccn2)/C(=O)c3ccccc3[S]1(=O)=O 39 | 37,meloxicam,0,1.0,CN1C(=C(/O)Nc2sc(C)cn2)/C(=O)c3ccccc3[S]1(=O)=O 40 | 38,cyclophenazine,1,1.0,FC(F)(F)c1ccc2Sc3ccccc3N(CCCN4CCN(CC4)C5CC5)c2c1 41 | 39,cocaine,1,1.0,COC(=O)[C@H]1[C@H](CC2CCC1N2C)OC(=O)c3ccccc3 42 | 40,isoxicam,0,1.0,CN1C(=C(\O)Nc2cc(C)on2)/C(=O)c3ccccc3[S]1(=O)=O 43 | 41,ICI197067,1,1.0,CC(C)[C@@H](CN1CCCC1)N(C)C(=O)Cc2ccc(Cl)c(Cl)c2 44 | 42,chlorzoxazone,1,1.0,Clc1ccc2OC(=O)Nc2c1 45 | 43,chlordiazepoxide,1,1.0,CN=C1CN(O)C(=C2C=C(Cl)C=CC2=N1)c3ccccc3 46 | 44,centazolone,1,1.0,NN1C=Nc2cc3ccccc3cc2C1=O 47 | 45,Northioridazine,1,1.0,CSc1ccc2Sc3ccccc3N(CCC4CCCNC4)c2c1 48 | 46,cannabidiol,1,1.0,CCCCCc1cc(O)c(C2C=C(C)CC[C@H]2C(C)=C)c(O)c1 49 | 47,buclizine,1,1.0,CC(C)(C)c1ccc(CN2CCN(CC2)C(c3ccccc3)c4ccc(Cl)cc4)cc1 50 | 48,bromazepam,1,1.0,Brc1ccc2NC(=O)CN=C(c3ccccn3)c2c1 51 | 49,enoxacin,0,1.0,CCN1C=C(C(O)=O)C(=O)c2cc(F)c(nc12)N3CCNCC3 52 | 50,Methylcyclopentane,1,1.0,CC1CCCC1 53 | 51,Indinavir,1,1.0,CC(C)(C)NC(=O)[C@@H]1CN(CCN1C[C@@H](O)C[C@@H](Cc2ccccc2)C(=O)N[C@@H]3[C@H](O)Cc4ccccc34)Cc5cccnc5 54 | 52,biperiden,1,1.0,OC(CCN1CCCCC1)(C2CC3CC2C=C3)c4ccccc4 55 | 53,benztropine,1,1.0,CN1C2CCC1CC(C2)OC(c3ccccc3)c4ccccc4.O[S](O)(=O)=O 56 | 54,benperidol,1,1.0,Fc1ccc(cc1)C(=O)CCCN2CCC(CC2)N3C(=O)Nc4ccccc34 57 | 55,azaperone,1,1.0,Fc1ccc(cc1)C(=O)CCCN2CCN(CC2)c3ccccn3 58 | 56,apomorphine,1,1.0,CN1CCc2cccc3c2[C@H]1Cc4ccc(O)c(O)c34 59 | 57,difloxacin,0,1.0,CN1CCN(CC1)c2cc3N(C=C(C(O)=O)C(=O)c3cc2F)c4ccc(F)cc4 60 | 58,Flumazenil,1,1.0,CCOC(=O)c1ncn2c1CN(C)C(=O)c3cc(F)ccc23 61 | 59,warfarin,0,1.0,CC(=O)CC(c1ccccc1)C2=C(O)Oc3ccccc3C2=O 62 | 60,verapamil,0,1.0,COc1ccc(CCN(C)CCCC(C#N)(C(C)C)c2ccc(OC)c(OC)c2)cc1OC 63 | 61,valinomycin,0,1.0,CC(C)[C@@H]1NC(=O)[C@H](C)OC(=O)C(NC(=O)[C@H](OC(=O)[C@@H](NC(=O)[C@H](C)OC(=O)[C@H](NC(=O)[C@H](OC(=O)[C@@H](NC(=O)[C@H](C)OC(=O)[C@H](NC(=O)[C@H](OC1=O)C(C)C)C(C)C)C(C)C)C(C)C)C(C)C)C(C)C)C(C)C)C(C)C 64 | 62,Triamterene,0,1.0,Nc1nc(N)c2nc(c3ccccc3)c(N)nc2n1 65 | 63,tolazamide,0,1.0,Cc1ccc(cc1)[S](=O)(=O)NC(=O)NN2CCCCCC2 66 | 64,thioguanine,0,1.0,NC1=NC(=S)c2[nH]cnc2N1 67 | 65,BRL52871,1,1.0,c1(ccc(c(c1)Cl)Cl)CC(N1[C@@H](c2c(CC1)scc2)CN1CCCC1)=O 68 | 66,BRL52580,1,1.0,c1(cc(c(cc1)Cl)Cl)CC(N1[C@@H](c2c(CC1)cccc2)CN1CCCC1)=O 69 | 67,Tifluadom,1,1.0,CN1C(CNC(=O)c2cscc2)CN=C(c3ccccc3F)c4ccccc14 70 | 68,Mefloquine,1,1.0,OC(C1CCCCN1)c2cc(nc3c2cccc3C(F)(F)F)C(F)(F)F 71 | 69,Bretazenil,1,1.0,CC(C)(C)OC(=O)c1ncn2c3cccc(Br)c3C(=O)N4CCC[C@H]4c12 72 | 70,testolactone,0,1.0,C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)C=C[C@]34C)[C@@H]1CCC(=O)O2 73 | 71,Terconazole,0,1.0,CC(C)N1CCN(CC1)c2ccc(OC[C@H]3CO[C@@](Cn4cncn4)(O3)c5ccc(Cl)cc5Cl)cc2 74 | 72,sulfasalazine,0,1.0,OC(=O)C1=C\C(C=CC1=O)=N/Nc2ccc(cc2)[S](=O)(=O)Nc3ccccn3 75 | 73,Stanozolol,0,1.0,C[C@]1(O)CC[C@H]2[C@@H]3CC[C@H]4Cc5[nH]ncc5C[C@]4(C)[C@H]3CC[C@]12C 76 | 74,Spironolactone,0,1.0,CC(=O)S[C@@H]1CC2=CC(=O)CC[C@]2(C)[C@H]3CC[C@@]4(C)[C@@H](CC[C@@]45CCC(=O)O5)[C@H]13 77 | 75,scopolamine,0,1.0,CN1[C@@H]2CC(C[C@H]1[C@@H]3O[C@H]23)OC(=O)[C@H](CO)c4ccccc4 78 | 76,rifampin,0,1.0,CC1C=CC=C(C(=O)NC2=C(C3=C(C(=C4C(=C3C(=O)C2=CNN5CCN(CC5)C)C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C 79 | 77,Thioperamide,1,1.0,S=C(NC1CCCCC1)N2CCC(CC2)c3[nH]cnc3 80 | 78,Zolantidine,1,1.0,OC(=O)\C=C/C(O)=O.OC(=O)\C=C/C(O)=O.C1CCN(CC1)Cc2cccc(OCCCNc3sc4ccccc4n3)c2 81 | 79,ribavirin,0,1.0,NC(=O)c1ncn(n1)[C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O 82 | 80,puromycin,0,1.0,COc1ccc(C[C@H](N)C(=O)NC2[C@@H](O)[C@@H](O[C@@H]2CO)n3cnc4c(ncnc34)N(C)C)cc1 83 | 81,proscillaridin,0,1.0,CC1OC(O[C@H]2CC[C@]3(C)[C@H]4CC[C@]5(C)[C@H](CC[C@]5(O)[C@@H]4CCC3=C2)C6=COC(=O)C=C6)C(O)C(O)C1O 84 | 82,Vancomycin,0,1.0,CN[C@H](CC(C)C)C(=O)NC1[C@H](O)c2ccc(Oc3cc4cc(Oc5ccc(cc5Cl)[C@@H](O)[C@@H]6NC(=O)[C@H](NC(=O)[C@@H]4NC(=O)[C@H](CC(N)=O)NC1=O)c7ccc(O)c(c7)c8c(O)cc(O)cc8[C@@H](NC6=O)C(O)=O)c3O[C@@H]9O[C@H](CO)[C@@H](O)[C@H](O)[C@H]9O[C@H]%10C[C@](C)(N)[C@H](O)[C@H](C)O%10)c(Cl)c2 85 | 83,Thiotepa,1,1.0,S=[P](N1CC1)(N2CC2)N3CC3 86 | 84,Teniposide,1,1.0,COc1cc(cc(OC)c1O)[C@H]2[C@@H]3[C@H](COC3=O)[C@H](OC4OC5COC(OC5C(O)C4O)c6sccc6)c7cc8OCOc8cc27 87 | 85,Guanadrel,0,1.0,NC(N)=NCC1COC2(CCCCC2)O1 88 | 86,Flucytosine,1,1.0,NC1=C(F)C=NC(=O)N1 89 | 87,Ceftazidime,1,1.0,CC(C)(O/N=C(C(=O)N[C@H]1[C@H]2SCC(=C(N2C1=O)C([O-])=O)C[n+]3ccccc3)/c4csc(N)n4)C(O)=O 90 | 88,Tibolone,1,1.0,C[C@@H]1CC2=C(CCC(=O)C2)[C@H]3CC[C@@]4(C)[C@@H](CC[C@@]4(O)C#C)[C@H]13 91 | 89,Phenserine,1,1.0,CN1CC[C@]2(C)[C@H]1N(C)c3ccc(OC(=O)Nc4ccccc4)cc23 92 | 90,Phenylbutazone,0,1.0,CCCCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3 93 | 91,phenoxybenzamine,0,1.0,CC(COc1ccccc1)N(CCCl)Cc2ccccc2 94 | 92,Phenolphthalein,0,1.0,Oc1ccc(cc1)C2(OC(=O)c3ccccc23)c4ccc(O)cc4 95 | 93,phenazopyridine,0,1.0,Nc1ccc(N=Nc2ccccc2)c(N)n1 96 | 94,papaverine,0,1.0,COc1ccc(Cc2nccc3cc(OC)c(OC)cc23)cc1OC 97 | 96,AmphotericinB,0,1.0,C[C@H]1O[C@@H](O[C@@H]\2C[C@@H]3O[C@](O)(C[C@@H](O)C[C@@H](O)[C@H](O)CC[C@@H](O)C[C@@H](O)CC(=O)O[C@@H](C)[C@H](C)[C@H](O)[C@@H](C)\C=C/C=C\C=C/C=C\C=C/C=C\C=C2)C[C@H](O)[C@H]3C(O)=O)[C@@H](O)[C@@H](N)[C@@H]1O 98 | 97,Zonisamide,1,1.0,N[S](=O)(=O)Cc1noc2ccccc12 99 | 98,Topotecan,1,1.0,CC[C@@]1(O)C(=O)OCC2=C1C=C3N(Cc4cc5c(CN(C)C)c(O)ccc5nc34)C2=O 100 | 99,Tamoxifen,1,1.0,CC/C(c1ccccc1)=C(c2ccccc2)/c3ccc(OCCN(C)C)cc3 101 | 100,Nevirapine,1,1.0,Cc1ccnc2N(C3CC3)c4ncccc4C(=O)Nc12 102 | 101,Mirtazapine,1,1.0,CN1CCN2C(C1)c3ccccc3Cc4cccnc24 103 | 102,Oxaprozin,0,1.0,OC(=O)CCc1oc(c2ccccc2)c(n1)c3ccccc3 104 | 103,Oxandrolone,0,1.0,C[C@]1(O)CC[C@H]2[C@@H]3CC[C@H]4CC(=O)OC[C@]4(C)[C@H]3CC[C@]12C 105 | 104,Sulfafurazole,1,1.0,Cc1noc(N[S](=O)(=O)c2ccc(N)cc2)c1C 106 | 105,Sulfadiazine,1,1.0,Nc1ccc(cc1)[S](=O)(=O)Nc2ncccn2 107 | 106,Stavudine,1,1.0,CC1=CN([C@@H]2O[C@H](CO)C=C2)C(=O)NC1=O 108 | 107,Ritonavir,0,1.0,CC(C)[C@H](NC(=O)N(C)Cc1csc(n1)C(C)C)C(=O)N[C@H](C[C@H](O)[C@H](Cc2ccccc2)NC(=O)OCc3scnc3)Cc4ccccc4 109 | 108,Mianserin,1,1.0,CN1CCN2C(C1)c3ccccc3Cc4ccccc24 110 | 109,Lupitidine,0,1.0,Cc1ccc(CC2=CN=C(NCCSCc3oc(cc3)C(C)(C)N)NC2=O)cn1 111 | 110,methotrexate,0,1.0,CN(Cc1cnc2nc(N)nc(N)c2n1)c3ccc(cc3)C(=O)N[C@@H](CCC(O)=O)C(O)=O 112 | 111,mepiramine,0,1.0,COc1ccc(CN(CCN(C)C)c2ccccn2)cc1 113 | 112,mepenzolatebromide,0,1.0,[Br-].C[N+]1(C)CCCC(C1)OC(=O)C(O)(c2ccccc2)c3ccccc3 114 | 113,Pyrimethamine,1,1.0,CCc1nc(N)nc(N)c1c2ccc(Cl)cc2 115 | 114,Pyridostigminebromide,1,1.0,[Br-].CN(C)C(=O)Oc1ccc[n+](C)c1 116 | 115,Minocycline,1,1.0,C1[C@@H]2[C@](C(=C3C(c4c(ccc(c4C[C@@H]13)N(C)C)O)=O)O)(C(C(C(N)=O)=C([C@H]2N(C)C)O)=O)O 117 | 116,Mifepristone,0,1.0,CC#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CCC4=C3[C@H](C[C@]12C)c5ccc(cc5)N(C)C 118 | 117,Didanosine,0,1.0,OC[C@@H]1CC[C@@H](O1)n2cnc3C(=O)N=CNc23 119 | 118,Ketoconazole,0,1.0,CC(=O)N1CCN(CC1)c2ccc(OC[C@H]3CO[C@@](Cn4ccnc4)(O3)c5ccc(Cl)cc5Cl)cc2 120 | 119,Isoquercitrin,0,1.0,OC[C@H]1O[C@@H](OC2=C(Oc3cc(O)cc(O)c3C2=O)c4ccc(O)c(O)c4)[C@H](O)[C@@H](O)[C@@H]1O 121 | 120,Ketorolac,0,1.0,OC(=O)C1CCn2c1ccc2C(=O)c3ccccc3 122 | 121,Itraconazole,0,1.0,CCC(C)N1N=CN(C1=O)c2ccc(cc2)N3CCN(CC3)c4ccc(OC[C@H]5CO[C@@](Cn6cncn6)(O5)c7ccc(Cl)cc7Cl)cc4 123 | 122,1-48,1,1.0,CCCN(CCC)CCc1ccc(c2c1CC(N2)=C)O 124 | 123,hydralazine,0,1.0,NNc1nncc2ccccc12 125 | 124,guanethidinesulfate,0,1.0,NC(N)=NCCN1CCCCCCC1 126 | 125,Cycloserine,1,1.0,NC1CONC1=O 127 | 126,Cromoglicicacid,0,1.0,OC(COc1ccc2OC(=CC(=O)c2c1)C(O)=O)COc3cccc4OC(=CC(=O)c34)C(O)=O 128 | 127,Clofazimine,0,1.0,CC(C)N=C1C=C2N(c3ccc(Cl)cc3)c4ccccc4N=C2C=C1Nc5ccc(Cl)cc5 129 | 128,trimetozine,1,1.0,COc1cc(cc(OC)c1OC)C(=O)N2CCOCC2 130 | 129,trihexyphenidyl,1,1.0,OC(CCN1CCCCC1)(C2CCCCC2)c3ccccc3 131 | 130,glycopyrrolate,0,1.0,[Br-].C[N+]1(C)CCC(C1)OC(=O)C(O)(C2CCCC2)c3ccccc3 132 | 131,furosemide,0,1.0,N[S](=O)(=O)c1cc(C(O)=O)c(NCc2occc2)cc1Cl 133 | 132,fluorouracil,0,1.0,FC1=CNC(=O)NC1=O 134 | 133,fenoterol,0,1.0,CC(Cc1ccc(O)cc1)NCC(O)c2cc(O)cc(O)c2 135 | 134,estrone,0,1.0,C[C@]12CC[C@H]3[C@@H](CCc4cc(O)ccc34)[C@@H]1CCC2=O 136 | 135,Clidiniumbromide,0,1.0,[Br-].C[N+]12CCC(CC1)C(C2)OC(=O)C(O)(c3ccccc3)c4ccccc4 137 | 136,Cisapride,1,1.0,O.COC1CN(CCCOc2ccc(F)cc2)CCC1NC(=O)c3cc(Cl)c(N)cc3OC 138 | 137,Chloroquine,1,1.0,CCN(CC)CCCC(C)Nc1ccnc2cc(Cl)ccc12 139 | 138,Cefradine,0,1.0,CC1=C(N2[C@H](SC1)[C@H](NC(=O)[C@H](N)C3=CCC=CC3)C2=O)C(O)=O 140 | 139,trazodone,1,1.0,Clc1cccc(c1)N2CCN(CCCN3N=C4C=CC=CN4C3=O)CC2 141 | 140,thebaine,1,1.0,COC1=CC=C2[C@H]3Cc4ccc(OC)c5O[C@@H]1[C@]2(CCN3C)c45 142 | 141,thc,1,1.0,CCCCCc1cc(O)c2[C@@H]3C=C(C)CC[C@H]3C(C)(C)Oc2c1 143 | 142,enkephalin,0,1.0,CC(C)[C@H](NC(=O)[C@H](Cc1ccccc1)NC(=O)CNC(=O)[C@@H](C)NC(=O)[C@@H](N)Cc2ccc(O)cc2)C(N)=O 144 | 143,Dyclonine,0,1.0,[Cl].CCCCOc1ccc(cc1)C(=O)CCN2CCCCC2 145 | 144,Bromocriptine,0,1.0,CC(C)C[C@@H]1N2C(=O)[C@](NC(=O)[C@H]3CN(C)[C@@H]4Cc5c(Br)[nH]c6cccc(C4=C3)c56)(O[C@@]2(O)[C@@H]7CCCN7C1=O)C(C)C 146 | 145,Bretyliumtosilate,0,1.0,CC[N+](C)(C)Cc1ccccc1Br.Cc2ccc(cc2)[S]([O-])(=O)=O 147 | 146,Bacitracin,0,1.0,CCC(C)C(N)C1=NC(CS1)C(=O)N[C@@H](CC(C)C)C(=O)N[C@H](CCC(O)=O)C(=O)N[C@@H]([C@@H](C)CC)C(=O)NCCCC[C@@H]2NC(=O)[C@H](CC(N)=O)NC(=O)[C@@H](CC(O)=O)NC(=O)[C@H](Cc3[nH]cnc3)NC(=O)[C@@H](Cc4ccccc4)NC(=O)[C@@H](NC(=O)[C@@H](CCCN)NC2=O)[C@@H](C)CC 148 | 147,Azithromycin,0,1.0,CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)C(C)[C@@H](O[C@@H]3O[C@H](C)C[C@@H]([C@H]3O)N(C)C)[C@](C)(O)C[C@@H](C)CN(C)[C@H](C)[C@@H](O)[C@]1(C)O 149 | 148,Atovaquone,0,1.0,OC1=C(C2CCC(CC2)c3ccc(Cl)cc3)C(=O)C(=O)c4ccccc14 150 | 149,spiclomazine,1,1.0,[H+].[Cl-].Clc1ccc2Sc3ccccc3N(CCCN4CCC5(CC4)NC(=O)CS5)c2c1 151 | 150,rescinnamine,1,1.0,CO[C@H]1[C@@H](C[C@@H]2CN3CCc4c([nH]c5cc(OC)ccc45)[C@H]3C[C@@H]2[C@@H]1C(=O)OC)OC(=O)\C=C\c6cc(OC)c(OC)c(OC)c6 152 | 151,pyridazinone,1,1.0,O=C1C=CNN=C1 153 | 152,Domperidone,0,1.0,Clc1ccc2N(C3CCN(CCCN4C(=O)Nc5ccccc45)CC3)C(=O)Nc2c1 154 | 153,Diethylstilbestrol,0,1.0,CC\C(c1ccc(O)cc1)=C(\CC)c2ccc(O)cc2 155 | 154,dicyclomine,0,1.0,CCN(CC)CCOC(=O)C1(CCCCC1)C2CCCCC2 156 | 155,Dehydrocholic acid,0,1.0,C[C@H](CCC(O)=O)[C@H]1CC[C@H]2[C@H]3[C@H](CC(=O)[C@]12C)[C@@]4(C)CCC(=O)C[C@H]4CC3=O 157 | 156,dapsone,0,1.0,Nc1ccc(cc1)[S](=O)(=O)c2ccc(N)cc2 158 | 157,Trovafloxacin,1,1.0,NC1[C@H]2CN(C[C@@H]12)c3nc4N(C=C(C(O)=O)C(=O)c4cc3F)c5ccc(F)cc5F 159 | 158,Trimethoprim,1,1.0,COc1cc(Cc2cnc(N)nc2N)cc(OC)c1OC 160 | 159,Praziquantel,1,1.0,O=C1CN(CC2N1CCc3ccccc23)C(=O)C4CCCCC4 161 | 160,Pravastatin,0,1.0,CC[C@H](C)C(=O)O[C@H]1C[C@H](O)C=C2C=C[C@H](C)[C@H](CC[C@@H](O)C[C@@H](O)CC(O)=O)[C@@H]12 162 | 161,Plicamycin,1,1.0,CO[C@@H]([C@@H]1Cc2cc3cc(O[C@H]4C[C@@H](O[C@H]5C[C@@H](O)[C@H](O)[C@@H](C)O5)[C@H](O)[C@@H](C)O4)c(C)c(O)c3c(O)c2C(=O)[C@H]1O[C@H]6C[C@@H](O[C@H]7C[C@@H](O[C@H]8C[C@](C)(O)[C@H](O)[C@@H](C)O8)[C@H](O)[C@@H](C)O7)[C@H](O)[C@@H](C)O6)C(=O)[C@@H](O)[C@@H](C)O 163 | 162,procyclidine,1,1.0,OC(CCN1CCCC1)(C2CCCCC2)c3ccccc3 164 | 163,primidone,1,1.0,CCC1(C(=O)NCNC1=O)c2ccccc2 165 | 164,pinoxepin,1,1.0,OCCN1CCN(CC\C=C/2c3ccccc3COc4ccc(Cl)cc24)CC1 166 | 165,piflutixol,1,1.0,OCC[C@@H]1CCN(CC\C=C2/c3ccc(F)cc3Sc4ccc(cc24)C(F)(F)F)CC1 167 | 166,clotrimazole,0,1.0,Clc1ccccc1C(n2ccnc2)(c3ccccc3)c4ccccc4 168 | 167,chlorthalidone,0,1.0,N[S](=O)(=O)c1cc(ccc1Cl)C2(O)NC(=O)c3ccccc23 169 | 168,chlorothiazide,0,1.0,N[S](=O)(=O)c1cc2c(NC=N[S]2(=O)=O)cc1Cl 170 | 169,Pentostatin,1,1.0,OC[C@H]1O[C@H](C[C@@H]1O)n2cnc3[C@H](O)CN=CNc23 171 | 170,Omeprazole,0,1.0,COc1ccc2nc([nH]c2c1)[S](=O)Cc3ncc(C)c(OC)c3C 172 | 171,phencyclidine,1,1.0,C1CCN(CC1)C2(CCCCC2)c3ccccc3 173 | 172,cephapirin,0,1.0,CC(=O)OCC1=C(N2[C@H](SC1)[C@H](NC(=O)CSc3ccncc3)C2=O)C(O)=O 174 | 173,carteolol,0,1.0,CC(C)(C)NCC(O)COc1cccc2NC(=O)CCc12 175 | 174,Astemizole,0,1.0,COc1ccc(CCN2CCC(CC2)Nc3nc4ccccc4n3Cc5ccc(F)cc5)cc1 176 | 175,Nelfinavir,0,1.0,Cc1c(O)cccc1C(=O)N[C@@H](CSc2ccccc2)[C@H](O)CN3C[C@H]4CCCC[C@H]4C[C@H]3C(=O)NC(C)(C)C 177 | 176,Nalidixicacid,0,1.0,CCN1C=C(C(O)=O)C(=O)c2ccc(C)nc12 178 | 177,Nafcillin,0,1.0,[Na+].CCOc1ccc2ccccc2c1C(=O)N[C@H]3[C@H]4SC(C)(C)[C@@H](N4C3=O)C([O-])=O 179 | 178,Fluconazole,1,1.0,OC(Cn1cncn1)(Cn2cncn2)c3ccc(F)cc3F 180 | 179,Etoposide,0,1.0,COc1cc(cc(OC)c1O)[C@H]2[C@@H]3C(COC3=O)[C@H](O[C@@H]4O[C@@H]5CO[C@@H](C)O[C@H]5[C@H](O)[C@H]4O)c6cc7OCOc7cc26 181 | 180,pecazine,1,1.0,CN1CCCC(C1)CN2c3ccccc3Sc4ccccc24 182 | 181,oxiperomide,1,1.0,O=C1Nc2ccccc2N1C3CCN(CCOc4ccccc4)CC3 183 | 182,oxaflumazine,1,1.0,OC(=O)CCC(O)=O.FC(F)(F)c1ccc2Sc3ccccc3N(CCCN4CCN(CCC5OCCCO5)CC4)c2c1 184 | 183,Amiodarone,0,1.0,CCCCc1oc2ccccc2c1C(=O)c3cc(I)c(OCCN(CC)CC)c(I)c3 185 | 184,aminophylline,0,1.0,CN1C(=O)N(C)c2nc[nH]c2C1=O.CN3C(=O)N(C)c4nc[nH]c4C3=O.NCCN 186 | 185,Alprostadil,0,1.0,CCCCC[C@H](O)/C=C/[C@H]1[C@H](O)CC(=O)[C@@H]1CCCCCCC(O)=O 187 | 186,allopurinol,0,1.0,O=C1N=CN=C2NNC=C12 188 | 187,ceftriaxone,1,1.0,CO/N=C(C(=O)N[C@H]1[C@H]2SCC(=C(N2C1=O)C(O)=O)CSC3=NC(=O)C(=O)NN3C)/c4csc(N)n4 189 | 188,compound 35,0,1.0,CN(C)Cc1oc(CSCCNC2C([N+]([O-])=O)=CC=N2)cc1 190 | 189,compound 38,1,1.0,CN(C)Cc1ccnc(c2cccc(NC3C([N+]([O-])=O)=CC=N3)c2)c1 191 | 190,SB-656104-A,1,1.0,ClC1=CC=CC(OC2CCN(CCC3CCCN3S(C4=CC(N([H])C=C5)=C5C=C4)(=O)=O)CC2)=C1 192 | 191,compound 36,1,1.0,CN(C)Cc1ccc(CSCCNC2=C([N+]([O-])=O)C(Cc3ccccc3)=CN2)o1 193 | 192,compound 37,1,1.0,CN(C)Cc1ccc(c2cccc(NC3C([N+]([O-])=O)=CC=N3)c2)o1 194 | 193,granisetron,1,1.0,CN1[C@H]2CCC[C@@H]1CC(C2)NC(=O)c3nn(C)c4ccccc34 195 | 194,2-(3'-Iodo-4'-aminophenyl)-6-hydroxybenzothiazole,1,1.0,IC1=C(N([H])[H])C=CC(C2=NC3=CC=C(O[H])C=C3S2)=C1 196 | 195,11a,0,1.0,CN(C)c1cc(C2=NC(N)=NN2)ccn1 197 | 196,piperacillin,1,1.0,O.CCN1CCN(C(=O)N[C@@H](C(=O)N[C@H]2[C@H]3SC(C)(C)[C@@H](N3C2=O)C(O)=O)c4ccccc4)C(=O)C1=O 198 | 197,clozapine,1,1.0,CN1CCN(CC1)C2=C3C=CC=CC3=Nc4ccc(Cl)cc4N2 199 | 198,sufentanil,1,1.0,CCC(=O)N(c1ccccc1)C2(CCN(CCc3sccc3)CC2)COC 200 | 199,lamotrigine,1,1.0,Nc1nnc(c(N)n1)c2cccc(Cl)c2Cl 201 | 200,glyburide,1,1.0,COc1ccc(Cl)cc1C(=O)NCCc2ccc(cc2)[S](=O)(=O)NC(=O)NC3CCCCC3 202 | 201,ondansetron,1,1.0,Cn1c2CCC(Cn3ccnc3C)C(=O)c2c4ccccc14 203 | 202,rolitetracycline,1,1.0,CN(C)[C@H]1[C@@H]2C[C@H]3C(=C(O)c4c(O)cccc4[C@@]3(C)O)C(=O)[C@]2(O)C(=O)\C(=C(/O)NCN5CCCC5)C1=O 204 | 203,cefoperazone,1,1.0,CCN1CCN(C(=O)N[C@@H](C(=O)N[C@H]2[C@H]3SCC(=C(N3C2=O)C(O)=O)CSc4nnnn4C)c5ccc(O)cc5)C(=O)C1=O 205 | -------------------------------------------------------------------------------- /data/holdout_clintox.csv: -------------------------------------------------------------------------------- 1 | ,X,y,w,ids 2 | 0,1,0,1.0,C[C@@]1(C(=O)N2[C@H](C(=O)N3CCC[C@H]3[C@@]2(O1)O)Cc4ccccc4)NC(=O)[C@@H]5C[C@@H]6c7cccc8c7c(c[nH]8)C[C@H]6[NH+](C5)C 3 | 1,1,0,1.0,C[C@@]1([C@@H](N2[C@H](S1(=O)=O)CC2=O)C(=O)[O-])Cn3ccnn3 4 | 2,1,0,1.0,C[C@@](c1ccccc1)(c2ccc(cc2)Cl)OCC[C@H]3CCC[NH+]3C 5 | 3,1,0,1.0,c1nc2c(nc(nc2n1[C@@H]3C[C@@H](C=C3)CO)N)NC4CC4 6 | 4,1,0,1.0,c1nc2c(n1[C@H]3C[C@@H]([C@H](O3)CO)O)NC=[NH+]C[C@H]2O 7 | 5,1,0,1.0,c1nc(nn1[C@H]2[C@@H]([C@@H]([C@H](O2)CO)O)O)C(=O)N 8 | 6,1,0,1.0,c1cnc2cc3c(cc2n1)[C@@H]4C[C@H]3C[NH2+]C4 9 | 7,1,0,1.0,c1cnc(nc1)N2CC[NH+](CC2)CCCCN3C(=O)CC4(CCCC4)CC3=O 10 | 8,1,0,1.0,C1CN1P(=S)(N2CC2)N3CC3 11 | 9,0,1,1.0,C1CN(CCN1CC2=CC3=C(C=C2)OC(O3)(F)F)C(=O)NC4=C(C=CN=C4)Cl 12 | 10,1,0,1.0,C1CN(CCN1C(=O)CCBr)C(=O)CCBr 13 | 11,1,0,1.0,c1ccnc(c1)[N-]S(=O)(=O)c2ccc(cc2)N 14 | 12,1,0,1.0,c1ccnc(c1)[N-]S(=O)(=O)c2ccc(cc2)/N=N/c3ccc(c(c3)C(=O)[O-])O 15 | 13,1,0,1.0,c1ccnc(c1)[C@H](c2ccc(cc2)Cl)OC3CC[NH+](CC3)CCCC(=O)[O-] 16 | 14,0,1,1.0,C1CCN[C@@H](C1)C2(CN(C2)C(=O)C3=C(C(=C(C=C3)F)F)NC4=C(C=C(C=C4)I)F)O 17 | 15,1,0,1.0,c1ccn2c(c1)nn(c2=O)CCC[NH+]3CCN(CC3)c4cccc(c4)Cl 18 | 16,0,1,1.0,C1CCN(CC1)C(=O)C2=CC3=NON=C3C=C2 19 | 17,1,0,1.0,c1ccn(c(=S)c1)[O-] 20 | 18,1,0,1.0,C1CCCN(CCC1)CC[NH+]=C(N)N 21 | 19,1,0,1.0,c1ccc2c(c1)cnnc2NN 22 | 20,1,0,1.0,c1ccc2c(c1)CCN3C2CN(CC3=O)C(=O)C4CCCCC4 23 | 21,1,0,1.0,c1ccc2c(c1)cccc2CC3=[NH+]CCN3 24 | 22,1,0,1.0,c1ccc2c(c1)CCCC2C3=[NH+]CCN3 25 | 23,1,0,1.0,c1ccc2c(c1)C=Cc3ccccc3N2C(=O)N 26 | 24,1,0,1.0,c1ccc2c(c1)Cc3ccccc3N4C2C[NH+]=C4N 27 | 25,1,0,1.0,c1ccc2c(c1)CC(=O)c3ccccc3N2C(=O)N 28 | 26,1,0,1.0,c1ccc2c(c1)CC(N(C2)C(=O)[C@H](CO)NC(=O)[C@H](Cc3cccs3)NC(=O)CNC(=O)C4C[C@H](CN4C(=O)C5CCCN5C(=O)C(CCC[NH+]=C(N)N)NC(=O)[C@@H](CCC[NH+]=C(N)N)[NH3+])O)C(=O)N6[C@H]7CCCC[C@H]7CC6C(=O)N[C@@H](CCC[NH+]=C(N)N)C(=O)[O-] 29 | 27,1,0,1.0,c1ccc2c(c1)cc(c(c2Cc3c4ccccc4cc(c3O)C(=O)[O-])O)C(=O)[O-] 30 | 28,1,0,1.0,c1ccc2c(c1)c3c([nH]2)CN(CC3)C(=O)CCS 31 | 29,1,0,1.0,c1ccc2c(c1)C3=[NH+]CCN3C2(c4ccc(cc4)Cl)O 32 | 30,1,0,1.0,c1ccc2c(c1)C(=O)O[Bi](O2)O 33 | 31,1,0,1.0,c1ccc2c(c1)C(=O)NC2(c3ccc(c(c3)S(=O)(=O)N)Cl)O 34 | 32,1,0,1.0,c1ccc2c(c1)c(ns2)N3CC[NH+](CC3)CCc4cc5c(cc4Cl)NC(=O)C5 35 | 33,1,0,1.0,c1ccc2c(c1)c(ns2)N3CC[NH+](CC3)C[C@@H]4CCCC[C@H]4CN5C(=O)[C@H]6[C@@H]7CC[C@@H](C7)[C@H]6C5=O 36 | 34,1,0,1.0,c1ccc2c(c1)c(no2)CS(=O)(=O)N 37 | 35,0,1,1.0,C1=CC=C2C(=C1)C(=NN=C2NC3=CC=C(C=C3)Cl)CC4=CC=NC=C4 38 | 36,1,0,1.0,c1ccc2c(c1)c(c[nH]2)C(=O)OC3C[C@H]4CC5CC(C3)[NH+]4CC5=O 39 | 37,1,0,1.0,c1ccc2c(c1)c(c3c([nH+]2)CCCC3)N 40 | 38,1,0,1.0,c1ccc2c(c1)c(c(c(=O)o2)Cc3c(c4ccccc4oc3=O)[O-])[O-] 41 | 39,1,0,1.0,c1ccc2c(c1)c(c(c(n2)C3CC3)/C=C/[C@H](C[C@H](CC(=O)[O-])O)O)c4ccc(cc4)F 42 | 40,1,0,1.0,c1ccc2c(c1)C(=[NH+]c3ccccc3S2)N4CC[NH+](CC4)CCOCCO 43 | 41,1,0,1.0,c1ccc2c(c1)[nH]c(=O)n2C3=CC[NH+](CC3)CCCC(=O)c4ccc(cc4)F 44 | 42,1,0,1.0,c1ccc2c(c1)[nH]c(=O)n2C3CC[NH+](CC3)CCCC(c4ccc(cc4)F)c5ccc(cc5)F 45 | 43,1,0,1.0,C1CCC2(CC1)OCC(O2)C[NH+]=C(N)N 46 | 44,1,0,1.0,c1ccc(cc1)OC(=O)c2ccc(cc2O)N 47 | 45,0,1,1.0,C1=CC=C(C=C1)NS(=O)(=O)C2=CC=CC(=C2)/C=C/C(=O)NO 48 | 46,1,0,1.0,c1ccc(cc1)n2c(ccn2)[N-]S(=O)(=O)c3ccc(cc3)N 49 | 47,1,0,1.0,c1ccc(cc1)CSCC2=NS(=O)(=O)c3cc(c(cc3N2)Cl)S(=O)(=O)N 50 | 48,1,0,1.0,c1ccc(cc1)COC(=O)c2ccccc2 51 | 49,1,0,1.0,c1ccc(cc1)CN2C3C[S+]4CCCC4C3N(C2=O)Cc5ccccc5 52 | 50,1,0,1.0,c1ccc(cc1)CN(CC2=[NH+]CCN2)c3ccccc3 53 | 51,1,0,1.0,c1ccc(cc1)CCCCOCCCCCC[NH2+]CC(c2ccc(c(c2)CO)O)O 54 | 52,1,0,1.0,c1ccc(cc1)CC2Nc3cc(c(cc3S(=O)(=O)N2)S(=O)(=O)N)C(F)(F)F 55 | 53,1,0,1.0,c1ccc(cc1)C2=NCC(=O)N(c3c2cc(cc3)Cl)CC4CC4 56 | 54,1,0,1.0,c1ccc(cc1)c2c(oc(n2)CCC(=O)[O-])c3ccccc3 57 | 55,1,0,1.0,c1ccc(cc1)C2C(=O)[NH+]=C(O2)N 58 | 56,1,0,1.0,c1ccc(cc1)c2c(nc3c(n2)c(nc(n3)N)N)N 59 | 57,1,0,1.0,c1ccc(cc1)C(=O)OOC(=O)c2ccccc2 60 | 58,1,0,1.0,c1ccc(cc1)C(=O)NC(Cc2ccc(cc2)O)C(=O)Nc3ccc(cc3)C(=O)[O-] 61 | 59,1,0,1.0,c1ccc(cc1)C(=O)c2ccc3n2CCC3C(=O)[O-] 62 | 60,1,0,1.0,c1ccc(cc1)C(CCC[NH+]2CCCCC2)(c3ccccc3)O 63 | 61,1,0,1.0,c1ccc(cc1)C(CC[NH+]2CCCCC2)(C3CCCCC3)O 64 | 62,1,0,1.0,c1ccc(cc1)C(CC[NH+]2CCCCC2)(C3CCCC3)O 65 | 63,1,0,1.0,c1ccc(cc1)C(CC[NH+]2CCCCC2)(C3CC4CC3C=C4)O 66 | 64,1,0,1.0,c1ccc(cc1)C(CC[NH+]2CCCC2)(C3CCCCC3)O 67 | 65,1,0,1.0,c1ccc(cc1)C(c2ccccc2)(c3ccccc3Cl)n4ccnc4 68 | 66,1,0,1.0,c1ccc(cc1)C(c2ccccc2)(C(=O)OC3CC4CCC(C3)[N+]45CCCC5)O 69 | 67,1,0,1.0,c1ccc(cc1)C(c2ccccc2)([C@@H]3CC[NH+](C3)CCc4ccc5c(c4)CCO5)C(=O)N 70 | 68,1,0,1.0,c1ccc(cc1)[C@H]2CN3CCSC3=[NH+]2 71 | 69,1,0,1.0,c1ccc(cc1)[C@H]2c3ccccc3CCN2C(=O)O[C@H]4C[NH+]5CCC4CC5 72 | 70,1,0,1.0,c1ccc(cc1)[C@H](C(=O)N[C@H]2[C@H]3CCC(=C(N3C2=O)C(=O)[O-])Cl)[NH3+] 73 | 71,1,0,1.0,c1ccc(cc1)/N=N/c2ccc(nc2N)N 74 | 72,1,0,1.0,c1ccc(c(c1)C2=NCC(=S)N(c3c2cc(cc3)Cl)CC(F)(F)F)F 75 | 73,1,0,1.0,c1ccc(c(c1)c2nc(n(n2)c3ccc(cc3)C(=O)[O-])c4ccccc4O)O 76 | 74,0,1,1.0,C1=CC=C(C(=C1)C(=O)NCCC[C@@H](C(=O)O)NC(=O)C2=CC=C(C=C2)NCC3=CN=C4C(=N3)C(=NC(=N4)N)N)C(=O)O 77 | 75,1,0,1.0,c1cc2c3c(c1)C(=O)N(C[C@H]3CCC2)[C@@H]4C[NH+]5CCC4CC5 78 | 76,1,0,1.0,c1cc2c(csc2c(c1)Cl)COC(Cn3ccnc3)c4ccc(cc4Cl)Cl 79 | 77,1,0,1.0,c1cc2c(cc1OCCCCc3nnnn3C4CCCCC4)CCC(=O)N2 80 | 78,1,0,1.0,c1cc2c(cc1F)CCC(O2)C(C[NH2+]CC(C3CCc4cc(ccc4O3)F)O)O 81 | 79,1,0,1.0,c1cc2c(cc1Cl)[nH]c(=O)o2 82 | 80,1,0,1.0,c1cc2c(cc1Cl)[C@@](OC(=O)N2)(C#CC3CC3)C(F)(F)F 83 | 81,1,0,1.0,c1cc2c(cc1C#N)c(c[nH]2)CCCC[NH+]3CCN(CC3)c4ccc5c(c4)cc(o5)C(=O)N 84 | 82,1,0,1.0,c1cc2c(cc1)C(=O)C(=C(C2=O)[C@H]3CC[C@@H](CC3)c4ccc(cc4)Cl)[O-] 85 | 83,1,0,1.0,c1cc2c(cc(nc2c(c1)C(F)(F)F)C(F)(F)F)C(C3CCCC[NH2+]3)O 86 | 84,1,0,1.0,c1cc2c(c(c1)OCC(COc3cccc4c3c(=O)cc(o4)C(=O)[O-])O)c(=O)cc(o2)C(=O)[O-] 87 | 85,1,0,1.0,C1CC2(C1)C(=O)O[Pt]OC2=O 88 | 86,1,0,1.0,c1cc(oc1/C=N/N2CCOC2=O)[N+](=O)[O-] 89 | 87,1,0,1.0,c1cc(oc1/C=N/N2CC(=O)NC2=O)[N+](=O)[O-] 90 | 88,1,0,1.0,c1cc(oc1)CNc2cc(c(cc2C(=O)[O-])S(=O)(=O)N)Cl 91 | 89,0,1,1.0,C1CC(=O)NC2=C1C=CC(=C2)OCCCCN3CCN(CC3)C4=C(C(=CC=C4)Cl)Cl 92 | 90,1,0,1.0,c1cc(ccc1NC(=[NH2+])NC(=[NH2+])NCCCCCCNC(=[NH2+])NC(=[NH2+])Nc2ccc(cc2)Cl)Cl 93 | 91,1,0,1.0,c1cc(ccc1N2CCOCC2=O)N3C[C@@H](OC3=O)CNC(=O)c4ccc(s4)Cl 94 | 92,1,0,1.0,c1cc(ccc1CSC(Cn2ccnc2)c3ccc(cc3Cl)Cl)Cl 95 | 93,1,0,1.0,c1cc(ccc1C[NH+]2CCCNCC[NH2+]CCCNCC2)C[NH+]3CCCNCC[NH2+]CCCNCC3 96 | 94,1,0,1.0,c1cc(ccc1c2ccc(o2)/C=N/N3CC(=O)NC3=O)[N+](=O)[O-] 97 | 95,1,0,1.0,c1cc(ccc1C2C[NH2+]CCc3c2cc(c(c3Cl)O)O)O 98 | 96,1,0,1.0,c1cc(ccc1c2c(c3ccc(cc3s2)O)C(=O)c4ccc(cc4)OCC[NH+]5CCCCC5)O 99 | 97,1,0,1.0,c1cc(ccc1C(=O)NCCC(=O)[O-])N/N=C\2/C=CC(=O)C(=C2)C(=O)[O-] 100 | 98,1,0,1.0,c1cc(ccc1C(=O)N[C@@H](CCC(=O)[O-])C(=O)[O-])NCc2cnc3c(n2)c(=O)nc([nH]3)N 101 | 99,1,0,1.0,c1cc(ccc1C(=O)CCC[NH+]2CCC(CC2)(c3ccc(cc3)Cl)O)F 102 | 100,1,1,1.0,C1=CC(=CC=C1C#N)C(C2=CC=C(C=C2)C#N)N3C=NC=N3 103 | 101,1,0,1.0,c1cc(ccc1[C@@H]2CC[NH2+]C[C@H]2COc3ccc4c(c3)OCO4)F 104 | 102,1,0,1.0,c1cc(ccc1[C@@H]2[C@H](C(=O)N2c3ccc(cc3)F)CC[C@@H](c4ccc(cc4)F)O)O 105 | 103,1,0,1.0,c1cc(c2c3c1C[C@@H]4[C@]5([C@]3(CC[NH+]4CC6CCC6)[C@@H](O2)[C@H](CC5)O)O)O 106 | 104,0,1,1.0,C1CC(C1)(C(=O)O)C(=O)O.N.N.[Pt] 107 | 105,1,0,1.0,c1cc(c(cc1OCC(F)(F)F)C(=O)NCC2CCCC[NH2+]2)OCC(F)(F)F 108 | 106,1,0,1.0,c1cc(c(cc1F)F)C(Cn2cncn2)(Cn3cncn3)O 109 | 107,1,0,1.0,c1cc(c(cc1Cl)Cl)CO/N=C(/Cn2ccnc2)\c3ccc(cc3Cl)Cl 110 | 108,0,1,1.0,C1=CC(=C(C=C1Cl)Cl)C(=O)NS(=O)(=O)C2=CC=C(S2)Br 111 | 109,1,0,1.0,c1cc(c(cc1Cl)Cl)C(Cn2ccnc2)OCc3ccsc3Cl 112 | 110,1,0,1.0,c1cc(c(cc1C(=O)Nc2c(cncc2Cl)Cl)OCC3CC3)OC(F)F 113 | 111,1,0,1.0,c1cc(c(cc1C(F)(F)F)[N+](=O)[O-])C(=O)[C-]2C(=O)CCCC2=O 114 | 112,1,0,1.0,c1cc(c(cc1[N+](=O)[O-])Cl)NC(=O)c2cc(ccc2O)Cl 115 | 113,1,0,1.0,c1cc(c(cc1/N=N/c2ccc(c(c2)C(=O)[O-])O)C(=O)[O-])O 116 | 114,1,0,1.0,c1cc(c(c2c1nsn2)NC3=[NH+]CCN3)Cl 117 | 115,1,0,1.0,c1cc(c(c2c1nccn2)Br)NC3=[NH+]CCN3 118 | 116,1,0,1.0,c1cc(c(c2c1[NH+]=C3NC(=O)CN3C2)Cl)Cl 119 | 117,1,0,1.0,c1cc(c(c(c1)F)Cn2cc(nn2)C(=O)N)F 120 | 118,1,0,1.0,c1cc(c(c(c1)Cl)SC(CCc2ccc(cc2)Cl)Cn3ccnc3)Cl 121 | 119,1,0,1.0,c1cc(c(c(c1)Cl)Cl)N2CC[NH+](CC2)CCCCOc3ccc4c(c3)NC(=O)CC4 122 | 120,1,0,1.0,c1cc(c(c(c1)Cl)Cl)c2c(nc(nn2)N)N 123 | 121,1,0,1.0,c1c2=N/C(=C/3\C=CON3)/N=c2c4c(n1)CCOC4 124 | 122,1,0,1.0,c1c2c(cc(c1Cl)S(=O)(=O)N)S(=O)(=O)NC(N2)C3CC4CC3C=C4 125 | 123,1,0,1.0,c1c2c(c(c(c1F)N3CCCC[C@H](C3)[NH3+])Cl)n(cc(c2=O)C(=O)[O-])C4CC4 126 | 124,1,0,1.0,c1c2c([nH]n1)ncnc2O 127 | 125,1,0,1.0,c1c(nc(s1)N)/C(=C/CC(=O)[O-])/C(=O)N[C@H]2[C@@H]3N(C2=O)C(=CCS3)C(=O)[O-] 128 | 127,1,0,1.0,c1c(c(cc(c1F)F)F)C[C@H](CC(=O)N2CCn3c(nnc3C(F)(F)F)C2)[NH3+] 129 | 128,1,0,1.0,c1c(c(c(c(c1I)NC(=O)CCOCCOCCOCCOCCC(=O)Nc2c(cc(c(c2I)C(=O)[O-])I)I)I)C(=O)[O-])I 130 | 129,1,0,1.0,c1c(c(c(c(c1I)NC(=O)CCCCC(=O)Nc2c(cc(c(c2I)C(=O)[O-])I)I)I)C(=O)[O-])I 131 | 130,1,0,1.0,c1c([nH]cn1)C[C@@H](C(=O)N2CCC[C@H]2C(=O)N)NC(=O)[C@@H]3CCC(=O)N3 132 | 131,1,0,1.0,c1c([n+](c(nc1N2CCCCC2)N)[O-])N 133 | 132,1,0,1.0,c1[nH]c(=O)c2c(n1)n(cn2)[C@H]3CC[C@H](O3)CO 134 | 133,1,0,1.0,C1[C@H]2[C@@H]([C@@H](S1)CCCCC(=O)[O-])NC(=O)N2 135 | 134,1,0,1.0,C1[C@H](C(=O)NO1)[NH3+] 136 | 135,1,0,1.0,C1[C@H]([C@@H]([C@H]([C@@H](O1)O[C@@H]2CO[C@H]([C@@H]([C@H]2OS(=O)(=O)[O-])OS(=O)(=O)[O-])O)OS(=O)(=O)[O-])OS(=O)(=O)[O-])O 137 | 136,1,0,1.0,C1[C@@H]2N(C1=O)[C@H](/C(=C/CO)/O2)C(=O)[O-] 138 | 137,1,0,1.0,C1[C@@H]2C[C@@H]2N([C@@H]1C#N)C(=O)[C@H](C34CC5CC(C3)CC(C5)(C4)O)[NH3+] 139 | 138,1,0,1.0,C/C=C\1/C(=O)N[C@H](C(=O)O[C@H]\2CC(=O)N[C@@H](C(=O)N[C@H](CSSCC/C=C2)C(=O)N1)C(C)C)C(C)C 140 | 139,1,0,1.0,C/C=C(/C(=C/C)/c1ccc(cc1)O)\c2ccc(cc2)O 141 | 140,1,0,1.0,C/[NH+]=C\1/CN(C(=c2cc(ccc2=N1)Cl)c3ccccc3)[O-] 142 | 141,1,0,1.0,C([C@@H]1[C@H]([C@@H]([C@H](C(=O)O1)O)O)O)O 143 | 142,1,0,1.0,C([C@@H]1[C@@H]([C@@H]([C@H]([C@@H](O1)O[C@@H]2[C@H](O[C@@]([C@H]2O)(CO)O)CO)O)O)O)O 144 | 143,1,0,1.0,C([C@@H]([C@@H]1C(=C(C(=O)O1)O)[O-])O)O 145 | 144,1,0,1.0,C#CCC(Cc1cnc2c(n1)c(nc(n2)N)N)c3ccc(cc3)C(=O)N[C@@H](CCC(=O)[O-])C(=O)[O-] 146 | 145,1,0,1.0,C#CC[NH2+][C@@H]1CCc2c1cccc2 147 | -------------------------------------------------------------------------------- /data/holdout_esolv.csv: -------------------------------------------------------------------------------- 1 | ,X,y,w,ids 2 | 0,Pyrene,-6.176,1.0,c1cc2ccc3cccc4ccc(c1)c2c34 3 | 1,methylthiouracil,-2.436,1.0,Cc1cc(=O)[nH]c(=S)[nH]1 4 | 2,phenolphthalein,-2.9,1.0,Oc1ccc(cc1)C2(OC(=O)c3ccccc23)c4ccc(O)cc4 5 | 3,Benzo(a)pyrene,-8.699,1.0,c1ccc2c(c1)cc3ccc4cccc5ccc2c3c45 6 | 4,Acenapthylene,-3.96,1.0,C1=Cc2cccc3cccc1c23 7 | 5,"1,2-Propylene oxide",-0.59,1.0,CC1CO1 8 | 6,RTI 17,-4.706,1.0,CCN2c1ccccc1N(C)C(=S)c3cccnc23 9 | 7,Estrone,-3.955,1.0,CC12CCC3C(CCc4cc(O)ccc34)C2CCC1=O 10 | 8,Fluridone,-4.445,1.0,Cn2cc(c1ccccc1)c(=O)c(c2)c3cccc(c3)C(F)(F)F 11 | 9,triforine,-4.19,1.0,ClC(Cl)(Cl)C(NC=O)N1C=CN(C=C1)C(NC=O)C(Cl)(Cl)Cl 12 | 10,Methoxsalen,-3.664,1.0,COc2c1occc1cc3ccc(=O)oc23 13 | 11,piroxicam,-4.16,1.0,CN2C(=C(O)c1ccccc1S2(=O)=O)C(=O)Nc3ccccn3 14 | 12,Riboflavin,-3.685,1.0,Cc3cc2nc1c(=O)[nH]c(=O)nc1n(CC(O)C(O)C(O)CO)c2cc3C 15 | 13,p-terphenyl,-7.11,1.0,c1ccc(cc1)c2ccc(cc2)c3ccccc3 16 | 14,adrenosterone,-3.48,1.0,CC34CC(=O)C1C(CCC2=CC(=O)CCC12C)C3CCC4(=O) 17 | 15,Dibenzothiophene,-4.38,1.0,c1ccc2c(c1)sc3ccccc23 18 | 16,Danazol,-5.507,1.0,CC23Cc1cnoc1C=C2CCC4C3CCC5(C)C4CCC5(O)C#C 19 | 17,benzoin,-2.85,1.0,OC(C(=O)c1ccccc1)c2ccccc2 20 | 18,Salicin,-0.85,1.0,OCC2OC(Oc1ccccc1CO)C(O)C(O)C2O 21 | 19,Santonin,-3.09,1.0,CC3C2CCC1(C)C=CC(=O)C(=C1C2OC3=O)C 22 | 20,piperonal,-1.63,1.0,O=Cc2ccc1OCOc1c2 23 | 21,Siduron,-4.11,1.0,CC1CCCCC1NC(=O)Nc2ccccc2 24 | 22,acetyl sulfisoxazole,-3.59,1.0,CC(=O)N(S(=O)c1ccc(N)cc1)c2onc(C)c2C 25 | 23,isocarbamid,-2.15,1.0,C1N(C(=O)NCC(C)C)C(=O)NC1 26 | 24,Dioxacarb,-1.57,1.0,CNC(=O)Oc1ccccc1C2OCCO2 27 | 25,pyracarbolid,-2.56,1.0,CC1=C(CCCO1)C(=O)Nc2ccccc2 28 | 26,Methazole,-2.82,1.0,Cn2c(=O)on(c1ccc(Cl)c(Cl)c1)c2=O 29 | 27,Acenapthene,-4.63,1.0,C1Cc2cccc3cccc1c23 30 | 28,Benzo(k)fluoranthene,-8.49,1.0,c1ccc2cc3c4cccc5cccc(c3cc2c1)c45 31 | 29,Minoxidil,-1.989,1.0,Nc1cc(nc(N)n1=O)N2CCCCC2 32 | 30,Coumatetralyl,-2.84,1.0,O=c2c(C3CCCc4ccccc43)c(O)c1ccccc1o2 33 | 31,Fluvalinate,-8.003,1.0,CC(C)C(Nc1ccc(cc1Cl)C(F)(F)F)C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2 34 | 32,Tetrafluthrin,-7.321,1.0,Cc1c(F)c(F)c(COC(=O)C2C(C=C(Cl)C(F)(F)F)C2(C)C)c(F)c1F 35 | 33,Benzotriazole,-0.78,1.0,c2ccc1[nH]nnc1c2 36 | 34,Benzoxazole,-1.16,1.0,c2ccc1ocnc1c2 37 | 35,Etomidate,-4.735,1.0,CCOC(=O)c1cncn1C(C)c2ccccc2 38 | 36,RTI 19,-4.749,1.0,CCN2c1ccccc1N(C)C(=O)c3ccccc23 39 | 37,Glafenine,-4.571,1.0,OCC(O)COC(=O)c1ccccc1Nc2ccnc3cc(Cl)ccc23 40 | 38,Lactose,-0.244,1.0,OCC1OC(OC2C(O)C(O)C(O)OC2CO)C(O)C(O)C1O 41 | 39,Equilenin,-5.24,1.0,CC34CCc1c(ccc2cc(O)ccc12)C3CCC4=O 42 | 40,Dienochlor,-7.278,1.0,ClC1=C(Cl)C(Cl)(C(=C1Cl)Cl)C2(Cl)C(=C(Cl)C(=C2Cl)Cl)Cl 43 | 41,Kepone,-5.259,1.0,ClC1(C(=O)C2(Cl)C3(Cl)C14Cl)C5(Cl)C2(Cl)C3(Cl)C(Cl)(Cl)C45Cl 44 | 42,Morin,-3.083,1.0,Oc1ccc(c(O)c1)c3oc2cc(O)cc(O)c2c(=O)c3O 45 | 43,rhodanine,-1.77,1.0,C1SC(=S)NC1(=O) 46 | 44,captafol,-5.4,1.0,ClC(Cl)C(Cl)(Cl)SN2C(=O)C1CC=CCC1C2=O 47 | 45,Aldrin,-6.307,1.0,ClC1=C(Cl)C2(Cl)C3C4CC(C=C4)C3C1(Cl)C2(Cl)Cl 48 | 46,acetazolamide,-2.36,1.0,CC(=O)Nc1nnc(s1)S(N)(=O)=O 49 | 47,Carboxin,-3.14,1.0,CC1=C(SCCO1)C(=O)Nc2ccccc2 50 | 48,Mefenacet,-4.873,1.0,CN(C(=O)COc1nc2ccccc2s1)c3ccccc3 51 | 49,Methyldymron,-3.35,1.0,CN(C(=O)NC(C)(C)c1ccccc1)c2ccccc2 52 | 50,aminothiazole,-0.36,1.0,Nc1nccs1 53 | 51,Amitraz,-5.47,1.0,CN(C=Nc1ccc(C)cc1C)C=Nc2ccc(C)cc2C 54 | 52,chloralose,-1.84,1.0,OCC(O)C2OC1OC(OC1C2O)C(Cl)(Cl)Cl 55 | 53,Triamterene,-2.404,1.0,Nc3nc(N)c2nc(c1ccccc1)c(N)nc2n3 56 | 54,Metolazone,-3.78,1.0,CC2Nc1cc(Cl)c(cc1C(=O)N2c3ccccc3C)S(N)(=O)=O 57 | 55,Buthidazole,-1.877,1.0,CN1CC(O)N(C1=O)c2nnc(s2)C(C)(C)C 58 | 56,Reposal,-2.696,1.0,CCC1(C(=O)NC(=O)NC1=O)C2=CCC3CCC2C3 59 | 57,Lovastatin,-6.005,1.0,CCC(C)C(=O)OC2CC(C)C=C3C=CC(C)C(CCC1CC(O)CC(=O)O1)C23 60 | 58,Indapamide,-3.586,1.0,CC2Cc1ccccc1N2NC(=O)c3ccc(Cl)c(c3)S(N)(=O)=O 61 | 59,Dibenzofurane,-4.6,1.0,o1c2ccccc2c3ccccc13 62 | 60,Carbanilide,-3.15,1.0,O=C(Nc1ccccc1)Nc2ccccc2 63 | 61,Benzo(b)fluoranthene,-8.23,1.0,c1ccc2c(c1)c3cccc4c3c2cc5ccccc54 64 | 62,Ancymidol,-2.596,1.0,COc1ccc(cc1)C(O)(C2CC2)c3cncnc3 65 | 63,"1,7-phenantroline",-2.68,1.0,c1cnc2c(c1)ccc3ncccc23 66 | 64,Raffinose,-0.41,1.0,OCC1OC(CO)(OC2OC(COC3OC(CO)C(O)C(O)C3O)C(O)C(O)C2O)C(O)C1O 67 | 65,Chlorimuron-ethyl (ph 7),-4.576,1.0,CCOC(=O)c1ccccc1S(=O)(=O)NN(C=O)c2nc(Cl)cc(OC)n2 68 | 66,Equilin,-5.282,1.0,CC34CCC1C(=CCc2cc(O)ccc12)C3CCC4=O 69 | 67,Pyrolan,-2.09,1.0,CN(C)C(=O)Oc1cc(C)nn1c2ccccc2 70 | 68,Flutriafol,-3.37,1.0,OC(Cn1cncn1)(c2ccc(F)cc2)c3ccccc3F 71 | 69,"7,12-Dimethylbenz(a)anthracene",-7.02,1.0,Cc1c2ccccc2c(C)c3ccc4ccccc4c13 72 | 70,Triazolam,-4.09,1.0,Cc3nnc4CN=C(c1ccccc1Cl)c2cc(Cl)ccc2n34 73 | 71,nevirapine,-3.19,1.0,Cc3ccnc4N(C1CC1)c2ncccc2C(=O)Nc34 74 | 72,Perylene,-8.804,1.0,c1cc2cccc3c4cccc5cccc(c(c1)c23)c54 75 | 73,"Etoposide (148-167,25mg/ml)",-3.571,1.0,COc1cc(cc(OC)c1O)C6C2C(COC2=O)C(OC4OC3COC(C)OC3C(O)C4O)c7cc5OCOc5cc67 76 | 74,Hypoxanthine,-2.296,1.0,O=c1[nH]cnc2nc[nH]c12 77 | 75,Diosgenin,-7.32,1.0,C1C(O)CCC2(C)CC3CCC4(C)C5(C)CC6OCC(C)CC6OC5CC4C3C=C21 78 | 76,Methaqualone,-2.925,1.0,Cc1ccccc1n3c(C)nc2ccccc2c3=O 79 | 77,Etofenprox,-8.6,1.0,CCOc1ccc(cc1)C(C)(C)COCc3cccc(Oc2ccccc2)c3 80 | 78,Glutethimide,-2.337,1.0,CCC1(CCC(=O)NC1=O)c2ccccc2 81 | 79,cycloheximide,-1.13,1.0,CC1CC(C)C(=O)C(C1)C(O)CC2CC(=O)NC(=O)C2 82 | 80,pregnenolone,-4.65,1.0,CC(=O)C3CCC4C2CC=C1CC(O)CCC1(C)C2CCC34C 83 | 81,Tricresyl phosphate,-6.01,1.0,Cc1ccc(OP(=O)(Oc2cccc(C)c2)Oc3ccccc3C)cc1 84 | 82,Metribuzin,-2.253,1.0,CSc1nnc(c(=O)n1N)C(C)(C)C 85 | 83,tubercidin,-1.95,1.0,Nc1ncnc2n(ccc12)C3OC(CO)C(O)C3O 86 | 84,Cyclopropyl-5-spirobarbituric acid,-1.886,1.0,O=C2NC(=O)C1(CC1)C(=O)N2 87 | 85,Indan,-3.04,1.0,C1Cc2ccccc2C1 88 | 86,Isoquinoline,-1.45,1.0,c1ccc2cnccc2c1 89 | 87,Inosine,-1.23,1.0,OCC1OC(C(O)C1O)n2cnc3c(O)ncnc23 90 | 88,chlorquinox,-5.43,1.0,c2(Cl)c(Cl)c(Cl)c1nccnc1c2(Cl) 91 | 89,styrene oxide,-1.6,1.0,C1OC1c2ccccc2 92 | 90,diethylstilbestrol,-4.07,1.0,CCC(=C(CC)c1ccc(O)cc1)c2ccc(O)cc2 93 | 91,Fluoranthene,-6.0,1.0,c1ccc2c(c1)c3cccc4cccc2c34 94 | 92,Flucythrinate,-6.876,1.0,CC(C)C(C(=O)OC(C#N)c1cccc(Oc2ccccc2)c1)c3ccc(OC(F)F)cc3 95 | 93,Propiconazole,-3.493,1.0,CCCC1COC(Cn2cncn2)(O1)c3ccc(Cl)cc3Cl 96 | 94,nitrofurantoin,-3.38,1.0,O=C2CN(N=Cc1ccc(o1)N(=O)=O)C(=O)N2 97 | 95,Pyrazinamide,-0.667,1.0,NC(=O)c1cnccn1 98 | 96,"Atovaquone(0,430mg/ml) - neutral",-5.931,1.0,OC4=C(C1CCC(CC1)c2ccc(Cl)cc2)C(=O)c3ccccc3C4=O 99 | 97,Benznidazole,-2.81,1.0,O=C(Cn1ccnc1N(=O)=O)NCc2ccccc2 100 | 98,cyclobarbital,-2.17,1.0,CCC1(C(=O)NC(=O)NC1=O)C2=CCCCC2 101 | 99,nifedipine,-4.76,1.0,COC(=O)C1=C(C)NC(=C(C1c2ccccc2N(=O)=O)C(=O)OC)C 102 | 100,Cyclobutyl-5-spirobarbituric acid,-1.655,1.0,O=C2NC(=O)C1(CCC1)C(=O)N2 103 | 101,Piperophos,-4.15,1.0,CCCOP(=S)(OCCC)SCC(=O)N1CCCCC1C 104 | 102,Diphenylamine,-3.504,1.0,N(c1ccccc1)c2ccccc2 105 | 103,"P,P'-DDE",-6.9,1.0,ClC(Cl)=C(c1ccc(Cl)cc1)c2ccc(Cl)cc2 106 | 104,Lenacil,-4.594,1.0,O=c2[nH]c1CCCc1c(=O)n2C3CCCCC3 107 | 105,Primidone,-2.64,1.0,CCC1(C(=O)NCNC1=O)c2ccccc2 108 | 106,2-pyrrolidone,1.07,1.0,O=C1CCCN1 109 | 107,Rotenone,-4.42,1.0,COc5cc4OCC3Oc2c1CC(Oc1ccc2C(=O)C3c4cc5OC)C(C)=C 110 | 108,Dieldrin,-6.29,1.0,ClC4=C(Cl)C5(Cl)C3C1CC(C2OC12)C3C4(Cl)C5(Cl)Cl 111 | 109,Thiophene,-1.33,1.0,c1ccsc1 112 | 110,Picene,-7.87,1.0,c1ccc2c(c1)ccc3c2ccc4c5ccccc5ccc43 113 | 111,Fenfuram,-3.3,1.0,Cc1occc1C(=O)Nc2ccccc2 114 | 112,Amigdalin,-0.77,1.0,OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)C(O)C3O 115 | -------------------------------------------------------------------------------- /data/holdout_freesolv.csv: -------------------------------------------------------------------------------- 1 | ,X,y,w,ids 2 | 0,imidazole,-9.63,1.0,c1cnc[nH]1 3 | 1,1-methylimidazole,-8.41,1.0,Cn1ccnc1 4 | 2,4-methyl-1H-imidazole,-10.27,1.0,Cc1c[nH]cn1 5 | 3,1-methylpiperazine,-7.77,1.0,CN1CCNCC1 6 | 4,"1,4-dimethylpiperazine",-7.58,1.0,CN1CCN(CC1)C 7 | 5,piperazine,-7.4,1.0,C1CNCCN1 8 | 6,simazine,-10.22,1.0,CCNc1nc(nc(n1)Cl)NCC 9 | 7,"2-N-ethyl-6-(methylsulfanyl)-4-N-(propan-2-yl)-1,3,5-triazine-2,4-diamine",-7.65,1.0,CCNc1nc(nc(n1)SC)NC(C)C 10 | 8,terbutryn,-6.68,1.0,CCNc1nc(nc(n1)SC)NC(C)(C)C 11 | 9,methyl cyclopropanecarboxylate,-4.1,1.0,COC(=O)C1CC1 12 | 10,cyclopropane,0.75,1.0,C1CC1 13 | 11,1-cyclopropylethanone,-4.61,1.0,CC(=O)C1CC1 14 | 12,2-ethylpyrazine,-5.45,1.0,CCc1cnccn1 15 | 13,2-isobutylpyrazine,-5.04,1.0,CC(C)Cc1cnccn1 16 | 14,2-methylpyrazine,-5.51,1.0,Cc1cnccn1 17 | 15,dialifor,-5.74,1.0,CCOP(=S)(OCC)S[C@@H](CCl)N1C(=O)c2ccccc2C1=O 18 | 16,phthalimide,-9.61,1.0,c1ccc2c(c1)C(=O)NC2=O 19 | 17,"3,5,5-trimethylcyclohex-2-en-1-one",-5.18,1.0,CC1=CC(=O)CC(C1)(C)C 20 | 18,6-isopropyl-3-methyl-1-cyclohex-2-enone,-4.51,1.0,CC1=CC(=O)[C@@H](CC1)C(C)C 21 | 19,1-methylpyrrole,-2.89,1.0,Cn1cccc1 22 | 20,pyrrole,-4.78,1.0,c1cc[nH]c1 23 | 21,2-methylthiophene,-1.38,1.0,Cc1cccs1 24 | 22,thiophene,-1.4,1.0,c1ccsc1 25 | 23,1-pyrrolidin-1-ylethanone,-9.8,1.0,CC(=O)N1CCCC1 26 | 24,pyrrolidine,-5.48,1.0,C1CCNC1 27 | 25,piperidine,-5.11,1.0,C1CCNCC1 28 | 26,1-methylpiperidine,-3.88,1.0,CN1CCCCC1 29 | 27,4-methylmorpholine,-6.32,1.0,CN1CCOCC1 30 | 28,morpholine,-7.17,1.0,C1COCCN1 31 | 29,"1,4-dioxane",-5.06,1.0,C1COCCO1 32 | 30,cyclopentene,0.56,1.0,C1CC=CC1 33 | 31,pyrene,-4.52,1.0,c1cc2ccc3cccc4c3c2c(c1)cc4 34 | 32,endrin,-4.82,1.0,C1[C@@H]2[C@H]3[C@@H]([C@H]1[C@H]4[C@@H]2O4)[C@@]5(C(=C([C@]3(C5(Cl)Cl)Cl)Cl)Cl)Cl 35 | 33,endosulfan alpha,-4.23,1.0,C1[C@@H]2[C@H](COS(=O)O1)[C@@]3(C(=C([C@]2(C3(Cl)Cl)Cl)Cl)Cl)Cl 36 | 34,octafluorocyclobutane,3.43,1.0,C1(C(C(C1(F)F)(F)F)(F)F)(F)F 37 | 35,profluralin,-2.45,1.0,CCC[N@@](CC1CC1)c2c(cc(cc2[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-] 38 | 36,quinoline,-5.72,1.0,c1ccc2c(c1)cccn2 39 | 37,captan,-9.01,1.0,C1C=CC[C@@H]2[C@@H]1C(=O)N(C2=O)SC(Cl)(Cl)Cl 40 | 38,ketoprofen,-10.78,1.0,C[C@@H](c1cccc(c1)C(=O)c2ccccc2)C(=O)O 41 | 39,Amitriptyline,-7.43,1.0,CN(C)CCC=C1c2ccccc2CCc3c1cccc3 42 | 40,1-benzylimidazole,-7.63,1.0,c1ccc(cc1)Cn2ccnc2 43 | 41,9H-fluorene,-3.35,1.0,c1ccc-2c(c1)Cc3c2cccc3 44 | 42,"1,1-diphenylethene",-2.78,1.0,C=C(c1ccccc1)c2ccccc2 45 | 43,diphenyl ether,-2.87,1.0,c1ccc(cc1)Oc2ccccc2 46 | 44,[2-benzhydryloxyethyl]-dimethyl-amine,-9.34,1.0,CN(C)CCOC(c1ccccc1)c2ccccc2 47 | 45,chlordane,-3.44,1.0,C1[C@H]([C@@H]2[C@H]([C@H]1Cl)[C@]3(C(=C([C@@]2(C3(Cl)Cl)Cl)Cl)Cl)Cl)Cl 48 | 46,3-methyl-1H-indole,-5.88,1.0,Cc1c[nH]c2c1cccc2 49 | 47,sulfolane,-8.61,1.0,C1CC[S+2](C1)([O-])[O-] 50 | 48,anthracene,-3.95,1.0,c1ccc2cc3ccccc3cc2c1 51 | 49,heptachlor,-2.55,1.0,C1=C[C@@H]([C@@H]2[C@H]1[C@@]3(C(=C([C@]2(C3(Cl)Cl)Cl)Cl)Cl)Cl)Cl 52 | 50,cyclopentanone,-4.7,1.0,C1CCC(=O)C1 53 | 51,"3-(dimethoxyphosphinothioylsulfanylmethyl)-1,2,3-benzotriazin-4-one",-10.03,1.0,COP(=S)(OC)SCn1c(=O)c2ccccc2nn1 54 | 52,cyanuric acid,-18.06,1.0,c1(=O)[nH]c(=O)[nH]c(=O)[nH]1 55 | 53,quinone,-6.5,1.0,C1=CC(=O)C=CC1=O 56 | 54,phenanthrene,-3.88,1.0,c1ccc2c(c1)ccc3c2cccc3 57 | 55,caffeine,-12.64,1.0,Cn1cnc2c1c(=O)n(c(=O)n2C)C 58 | 56,carbofuran,-9.61,1.0,CC1(Cc2cccc(c2O1)OC(=O)NC)C 59 | 57,acenaphthene,-3.15,1.0,c1cc2cccc3c2c(c1)CC3 60 | 58,"9,10-dihydroanthracene",-3.78,1.0,c1ccc2c(c1)Cc3ccccc3C2 61 | 59,"cyclohepta-1,3,5-triene",-0.99,1.0,C1C=CC=CC=C1 62 | 60,5-Amino-4-chloro-2-phenylpyridazin-3(2H)-one,-16.43,1.0,c1ccc(cc1)n2c(=O)c(c(cn2)N)Cl 63 | 61,azetidine,-5.56,1.0,C1CNC1 64 | 62,"2-(2,3-dimethylphenyl)aminobenzoic acid",-6.78,1.0,Cc1cccc(c1C)Nc2ccccc2C(=O)O 65 | 63,cycloheptanol,-5.48,1.0,C1CCCC(CC1)O 66 | 64,indane,-1.46,1.0,c1ccc2c(c1)CCC2 67 | -------------------------------------------------------------------------------- /data/holdout_lipo.csv: -------------------------------------------------------------------------------- 1 | ,X,y,w,ids 2 | 0,CHEMBL1807872,-0.03,1.0,O[C@@H](CNCCCOCCNCCc1cccc(Cl)c1)c2ccc(O)c3NC(=O)Sc23 3 | 1,CHEMBL274642,2.55,1.0,NC1=NN(CC1)c2cccc(c2)C(F)(F)F 4 | 2,CHEMBL79537,1.93,1.0,COc1cc(ccc1Cn2ccc3ccc(cc23)C(=O)NCC4CCCC4)C(=O)NS(=O)(=O)c5ccccc5 5 | 3,CHEMBL1734492,3.3,1.0,NC(=O)NC(=O)C(Nc1ccc2CCCc2c1)c3ccccc3 6 | 4,CHEMBL1645105,3.2,1.0,COc1ccc(cn1)c2ccc3nc(N)sc3c2 7 | 5,CHEMBL402553,2.87,1.0,Clc1cc(Nc2ncnc3[nH]nc(OCCN4CCCC4)c23)ccc1OCc5ccccn5 8 | 6,CHEMBL1649771,1.39,1.0,Nc1ncc([nH]1)c2ccc(F)cc2 9 | 7,CHEMBL2071095,1.4,1.0,O=C(NC1(CC1)C#N)[C@@H]2CCCC[C@H]2C(=O)N3CCN(CC3)c4nc5ncccc5s4 10 | 8,CHEMBL559696,2.18,1.0,O=C1NC=Nc2scc(c3cccs3)c12 11 | 9,CHEMBL1938681,3.0,1.0,Clc1ccc(cc1)c2oc(cc2)C(=O)N(Cc3ccccn3)c4ccc(cc4)N5CCNCC5 12 | 10,CHEMBL1481,1.73,1.0,CCC1=C(C)CN(C(=O)NCCc2ccc(cc2)S(=O)(=O)NC(=O)N[C@@H]3CC[C@@H](C)CC3)C1=O 13 | 11,CHEMBL518760,2.52,1.0,COc1cc2ncc(C(=O)N)c(Nc3ccc(F)cc3F)c2cc1NCCN4CCCCC4 14 | 12,CHEMBL1081627,4.1,1.0,Oc1ccc(Nc2nc(cs2)c3ccc(cc3)C#N)cc1 15 | 13,CHEMBL499864,2.94,1.0,Cn1cnc(c2ccccc2)c1C#Cc3ccnc(N)n3 16 | 14,CHEMBL878,1.64,1.0,CC1Nc2cc(Cl)c(cc2C(=O)N1c3ccccc3C)S(=O)(=O)N 17 | 15,CHEMBL1762536,3.8,1.0,CN(c1ccnc(Nc2cc(cc(c2)N3CCOCC3)N4CCCC4)n1)c5cc(CO)ccc5C 18 | 16,CHEMBL1927039,3.01,1.0,N#Cc1ccc(Nc2nccc(NC3CC3)n2)cc1 19 | 17,CHEMBL1950730,1.32,1.0,CN[C@@H](C)C(=O)N[C@@H](C1CCCCC1)C(=O)N[C@H]2CCCN(C2)C(=O)Cc3ccccc3 20 | 18,CHEMBL1682810,3.8,1.0,CN1CCOc2nc(ccc12)C#Cc3ccccc3 21 | 19,CHEMBL2062564,3.44,1.0,CC(=O)Nc1ccc2cnn(c3cc(NC4CC4)n5ncc(C#N)c5n3)c2c1 22 | 20,CHEMBL550777,1.19,1.0,NC1CCN(CC1)c2nccc(C(=O)NCC34CC5CC(CC(C5)C3)C4)c2Cl 23 | 21,CHEMBL2315158,1.9,1.0,CCCCNc1nc(N)c2NC(=O)N(Cc3ccc(OCCN(C)C)nc3)c2n1 24 | 22,CHEMBL2152906,4.4,1.0,CCN1C=C(C=C(C)C1=O)[C@@]2(N=C(N)c3c(F)cccc23)c4cccc(c4)c5cc(ccn5)C#CC 25 | 23,CHEMBL273235,2.4,1.0,Fc1ccc(cc1)n2cc(C3CCNCC3)c4cc(Cl)ccc24 26 | 25,CHEMBL235789,1.84,1.0,Clc1ccc(CN2CCNCC2)cc1C(=O)NCC34CC5CC(CC(C5)C3)C4 27 | 26,CHEMBL515839,2.82,1.0,CC(C)n1c(C)ncc1c2ccnc(Nc3ccc(cc3)S(=O)(=O)CC4CCCO4)n2 28 | 27,CHEMBL2035039,1.6,1.0,NC1(CCC1)c2ccc(cc2)N3C(=O)c4ccccc4N=C3c5ccccc5 29 | 28,CHEMBL313907,1.25,1.0,O=S(=O)(Nc1nc(nn1Cc2ccccc2)c3ccccc3)c4ccccc4 30 | 29,CHEMBL384536,2.04,1.0,CC(C)CN1C(=O)N(C)C(=O)c2c1sc(Cc3c[nH]c4ncccc34)c2C(=O)N5C[C@H](O)CO5 31 | 30,CHEMBL567175,2.7,1.0,Oc1ccc2C[C@H]3N(CC4CC4)CC[C@@]56[C@@H](Oc1c25)c7[nH]c8ccccc8c7C[C@@]36O 32 | 31,CHEMBL91428,3.9,1.0,COc1cc2ncnc(Nc3cc4ccccc4cn3)c2cc1OC 33 | 32,CHEMBL2170985,0.9,1.0,Nc1ncnc2c1ncn2[C@@H]3O[C@H](CSCCCNC(=O)NCc4ccccc4)[C@@H](O)[C@H]3O 34 | 33,CHEMBL1385161,-0.2,1.0,CC(=O)Nc1nc(N)n(n1)c2ccccc2 35 | 34,CHEMBL2158834,1.71,1.0,OC(=O)c1cccnc1N2CCC(CN3CCC(CC3)Oc4ccc(Cl)c(Cl)c4)CC2 36 | 35,CHEMBL760,1.86,1.0,Clc1ccc2N=C3NC(=O)CN3Cc2c1Cl 37 | 36,CHEMBL392559,1.45,1.0,Clc1ccc(CN2C[C@@H]3C[C@H]2CN3)cc1C(=O)NCC45CC6CC(CC(C6)C4)C5 38 | 37,CHEMBL1349,-1.3,1.0,CC(C)[C@H](N)C(=O)OCCOCn1cnc2C(=O)N=C(N)Nc12 39 | 38,CHEMBL20210,2.8,1.0,CCOC(=O)\C=C\[C@H](C[C@@H]1CCNC1=O)NC(=O)[C@@H](CC(=O)[C@@H](NC(=O)c2cc(C)on2)C(C)C)Cc3ccc(F)cc3 40 | 39,CHEMBL2036782,2.21,1.0,CC(C)N1CCN[C@H](C1)C(=O)N2CCN(CC2)C(=O)Nc3ccc(Cl)c(Cl)c3 41 | 40,CHEMBL1940279,3.88,1.0,O[C@]1(CN2CCC1CC2)C#Cc3ccc(Oc4ccc(cc4)C(=O)NCc5ccnc6ccccc56)cc3 42 | 41,CHEMBL184271,3.19,1.0,COc1cc2ncnc(Nc3cc(NC(=O)c4ccnc(c4)N5CCOCC5)ccc3C)c2cc1OCCN6CCOCC6 43 | 42,CHEMBL2335904,3.2,1.0,C[C@@H](NC1=CC(=O)CC1)c2ccc(Nc3ncc4cc(ccc4n3)c5ccncc5)cc2 44 | 43,CHEMBL393501,3.8,1.0,CN1C(=O)C=C(CCc2cccc(c2)c3ccccc3)N=C1N 45 | 44,CHEMBL1499,1.4,1.0,C[C@]12CCC(=O)C=C1CC[C@H]3[C@@H]4CC[C@](O)(C(=O)CO)[C@@]4(C)CC(=O)[C@H]23 46 | 45,CHEMBL264026,2.0,1.0,CC[S+]([O-])c1ncccc1C2(O)CCN(CC34CC(c5ccccc35)c6ccccc46)CC2 47 | 46,CHEMBL92484,1.57,1.0,CCc1cccc(c1)N(C)C(=N)Nc2cccc3ccccc23 48 | 47,CHEMBL284954,2.79,1.0,CCOC(=O)[C@H](Cc1ccc(cc1)[N+](=O)[O-])NC(=O)c2ccccc2 49 | 48,CHEMBL1458025,2.7,1.0,Cc1sc2ncnc(SCC(=O)N3CCN(CC3)C(=O)c4occc4)c2c1C 50 | 49,CHEMBL1605684,3.2,1.0,CC(=O)C1=CN(Cc2ccccc2)C(=O)N(Cc3ccc(F)cc3)C1=O 51 | 50,CHEMBL505521,1.85,1.0,CNC1=Nc2ncccc2C(=NC1c3cccs3)c4occc4 52 | 51,CHEMBL1541257,2.33,1.0,C1Cc2ncc(c3ccc4OCCOc4c3)n2C1 53 | 52,CHEMBL71917,3.5,1.0,CC(C)OC(=O)C1=CN(Cc2c(F)cccc2F)c3sc(c(CN(C)Cc4ccccc4)c3C1=O)c5ccc(NC(=O)C(C)C)cc5 54 | 53,CHEMBL2035031,1.7,1.0,NC1(CCC1)c2ccc(cc2)c3ncc4cccnc4c3c5ccccc5 55 | 54,CHEMBL460,1.54,1.0,CCc1c(C)[nH]c2CCC(CN3CCOCC3)C(=O)c12 56 | 55,CHEMBL2035034,1.7,1.0,NC1(CCC1)c2ccc(cc2)c3c(ncc4nccn34)c5ccccc5 57 | 56,CHEMBL468297,3.15,1.0,C1CC1Nc2ccc3nnc(c4ccccc4)n3n2 58 | 57,CHEMBL533154,1.39,1.0,Cc1cc(N)c2cc(NC(=O)CCC(=O)Nc3ccc4nc(C)cc(N)c4c3)ccc2n1 59 | 58,CHEMBL69863,2.11,1.0,Oc1cc(O)cc(\C=C\c2ccc(O)c(O)c2)c1 60 | 59,CHEMBL2164850,3.44,1.0,O=C(NC1=CC(=CNC1=O)c2ccncc2)[C@H](Cc3ccccc3)NC4(CC4)c5ccccn5 61 | 60,CHEMBL219861,2.14,1.0,CC(C)CN1C(=O)N(C)C(=O)c2c1sc(Cc3ccnc4ccccc34)c2C(=O)N5C[C@H](O)CO5 62 | 61,CHEMBL2035035,1.7,1.0,NC1(CCC1)c2ccc(cc2)c3c(ccn4ncnc34)c5ccccc5 63 | 62,CHEMBL420857,2.86,1.0,Oc1ccc(CCNCCS(=O)(=O)CCCOCCc2oc3ccccc3c2)c4sc(O)nc14 64 | 63,CHEMBL2094541,1.4,1.0,COc1cc(OC)nc(n1)N2N=CC(=C(Cl)C2=O)Cl 65 | 64,CHEMBL1289926,3.8,1.0,CNC(=O)c1ccccc1Sc2ccc3c(\C=C\c4ccccn4)n[nH]c3c2 66 | 65,CHEMBL1538857,2.3,1.0,Cc1cc(Nc2ccccc2)n(CCC#N)n1 67 | 66,CHEMBL1951081,2.82,1.0,COc1cc(OC)c(cc1NC(=O)CSc2ccncc2)S(=O)(=O)N3C(C)CCc4ccccc34 68 | 67,CHEMBL1779014,3.76,1.0,COc1cc(F)ccc1c2cncc(CNC(=O)c3ccccc3)c2 69 | 68,CHEMBL364069,0.7,1.0,O=C(N[C@H]1CN2CCC1CC2)c3ccc(s3)c4ccccn4 70 | 69,CHEMBL217354,3.44,1.0,CC(C)c1ccccc1Cc2cc(C(=O)Nc3ccc(cc3)S(=O)(=O)c4ccccc4C(C)(C)C)c(O)c(O)c2O 71 | 70,CHEMBL391565,1.72,1.0,COc1ccc(NC(=O)c2ccnc(N)n2)cc1 72 | 71,CHEMBL2086656,3.9,1.0,CC(C)(C)OC(=O)N1CCN(CC1)c2ccc(OCc3ccc(cc3)S(=O)(=O)C)cc2 73 | 72,CHEMBL1454,0.81,1.0,C1CN2C[C@@H](N=C2S1)c3ccccc3 74 | 73,CHEMBL472618,2.59,1.0,Nc1c(NC2CCCC2)nc(nc1N3CCOCC3)C#N 75 | 74,CHEMBL205465,4.07,1.0,CC(C)CN1C(=O)N(C)C(=O)c2c1sc(Cc3ccccc3C(F)(F)F)c2C(=O)N4CCCCC4 76 | 75,CHEMBL1922267,2.87,1.0,C[C@H]1[C@@H]2CN(CCN3CCOCC3)CC[C@H]2Cc4[nH]c5ccc(cc5c14)C(F)(F)F 77 | 76,CHEMBL514201,3.6,1.0,C[C@@H]1CN(Cc2ccc(F)cc2)[C@@H](C)CN1C(=O)c3cc4c(cn(C)c4cc3Cl)C(=O)C(=O)N(C)C 78 | 77,CHEMBL1887445,3.2,1.0,CCCC(=O)N1CCN(CC1)c2nnc(c3ccccc3)c4ccccc24 79 | 78,CHEMBL1779045,2.24,1.0,Cc1cccc(c1)N(Cc2cc(F)c(F)cc2F)C(=O)O[C@H]3CN4CCC3CC4 80 | 79,CHEMBL1287891,0.77,1.0,NC(=O)c1cnc(N[C@H]2CCCNC2)c3cc(sc13)c4ccncc4 81 | 80,CHEMBL393714,2.61,1.0,CN1C(=O)c2c(onc2c3ccccc3)C=C1c4ccncc4 82 | 81,CHEMBL1212988,2.53,1.0,Cc1cc(C)c2c(N)c(sc2n1)C(=O)NC3CC3 83 | 82,CHEMBL1198335,3.9,1.0,COc1cccc(Nc2nc(NCC3CCCO3)c4ccccc4n2)c1 84 | 83,CHEMBL250124,3.61,1.0,COc1ccc2c(C)cc(N[C@H]3CCC[C@@H](C3)NCc4cccc(OC(F)(F)F)c4)nc2c1 85 | 84,CHEMBL2177608,1.11,1.0,CC1(CC1)c2c(cnn2c3ccc(cc3)C(=O)O)C(=O)NC4C5CC6CC(CC4C6)C5 86 | 85,CHEMBL1399152,2.58,1.0,[O-][N+](=O)c1ccc2c(c1)nc3CCCCCn23 87 | 86,CHEMBL1624529,0.3,1.0,NC1C2CN(CC12)c3nc4N(C=C(C(=O)O)C(=O)c4cc3F)c5ccc(F)cc5F 88 | 87,CHEMBL2207285,3.67,1.0,O[C@H](CNC(=O)C1=CNC(=O)c2cc(ccc12)S(=O)(=O)NC3CC3)CN4CCC(CC4)Oc5ccc(Cl)c(Cl)c5 89 | 88,CHEMBL22033,1.8,1.0,COc1cc(ccc1Cn2ncc3ccc(NC(=O)OC4CCCC4)cc23)C(=O)NS(=O)(=O)c5ccccc5 90 | 89,CHEMBL2177390,1.3,1.0,CC(C)NC[C@@H](C(=O)N1CCN(CC1)c2ncnc3[C@H](O)C[C@@H](C)c23)c4ccc(Cl)cc4 91 | 90,CHEMBL424378,1.9,1.0,CCCc1c(O)c(ccc1OCc2cccc(NC(=O)c3ccccc3C(=O)O)c2)C(=O)C 92 | 91,CHEMBL51,2.9,1.0,Fc1ccc(cc1)C(=O)C2CCN(CCN3C(=O)Nc4ccccc4C3=O)CC2 93 | 92,CHEMBL326642,2.68,1.0,Oc1ccc(CCNCCS(=O)(=O)CCCOCCSc2ccccc2)c3sc(O)nc13 94 | 93,CHEMBL1464,0.86,1.0,CC(=O)CC(C1=C(O)c2ccccc2OC1=O)c3ccccc3 95 | 94,CHEMBL561368,3.57,1.0,CC(C)NC(=O)c1cnc(N2CCC(CC2)N3C(=O)OCc4ccccc34)c(Cl)c1 96 | 95,CHEMBL354788,1.2,1.0,NC1=NC(Nc2c(F)ccc(F)c12)c3occc3 97 | 96,CHEMBL184721,3.2,1.0,Cc1ccc(NC(=O)c2cccc(c2)N3CCOCC3)cc1NC(=O)c4ccc(OCc5ccccn5)cc4 98 | 97,CHEMBL254370,2.14,1.0,CC[C@@H](NC1=C(Nc2cccc(C(=O)N(C)C)c2O)C(=O)C1=O)c3ccccc3 99 | 98,CHEMBL1064,3.7,1.0,CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@@H]12 100 | 99,CHEMBL1807817,2.54,1.0,O[C@@H](CNCCSCCCOCCc1ccccc1)c2ccc(O)c3NC(=O)Sc23 101 | 100,CHEMBL2325701,2.8,1.0,CS(=O)(=O)Cc1cc(nc(n1)c2cccc3cc[nH]c23)N4CCOCC4 102 | 101,CHEMBL2086689,3.4,1.0,C[C@@H]1CN(CCN1c2ncc(OCc3ccncc3C#N)cn2)c4ncc(F)cn4 103 | 102,CHEMBL126,0.67,1.0,CC(=O)NC[C@H]1CN(C(=O)O1)c2ccc(N3CCOCC3)c(F)c2 104 | 103,CHEMBL2216902,2.9,1.0,CC(C)c1nc2ccccc2n1c3nc(N4CCOCC4)c5nc(OC6CN(C6)C7CCS(=O)(=O)CC7)n(C)c5n3 105 | 104,CHEMBL1486187,2.06,1.0,FC(F)(F)c1nnc2ccc(NCc3cccnc3)nn12 106 | 105,CHEMBL213294,2.94,1.0,COc1ccc(NC(=O)CSc2nnc(C)n2c3ccc(C)cc3)cc1 107 | 106,CHEMBL1720347,1.52,1.0,Cc1nc2nc(nn2c(O)c1Cc3ccccc3)c4cccnc4 108 | 107,CHEMBL1312533,2.79,1.0,CC(=O)NC1CC2CCCC(C1)N2C(=O)Nc3ccc(Cl)c(c3)C(F)(F)F 109 | 108,CHEMBL101,0.45,1.0,CCCCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3 110 | 109,CHEMBL2151319,2.74,1.0,C[C@H](Nc1ccc2ncn(c3cc([nH]n3)C4CC4)c2n1)c5ncc(F)cn5 111 | 110,CHEMBL216749,2.7,1.0,COc1cc2c(NC(=O)Nc3c(Cl)cccc3Cl)ncnc2cc1OCC4CCN(C)CC4 112 | 111,CHEMBL2107486,2.1,1.0,C[C@@H](O)[C@@H]1[C@H]2[C@@H](C)C(=C(N2C1=O)C(=O)OCOC(=O)C(C)(C)C)SC3CN(C3)C4=NCCS4 113 | 112,CHEMBL331019,3.6,1.0,O=C(CSc1ccncc1)Nc2ccc(cc2)C(=O)c3ccccc3 114 | 113,CHEMBL1405,3.4,1.0,C[C@]12CC[C@H]3[C@@H](CCc4cc(O)ccc34)[C@@H]1CCC2=O 115 | 114,CHEMBL135,3.72,1.0,C[C@]12CC[C@H]3[C@@H](CCc4cc(O)ccc34)[C@@H]1CC[C@@H]2O 116 | 115,CHEMBL1870155,3.01,1.0,O=C(NC1CCCCC1)c2cccnc2Sc3ccccc3 117 | 116,CHEMBL2088121,-0.62,1.0,COc1c(N2CCO[C@@H](CN(C)C)C2)c(F)cc3C(=O)C(=CN(C4CC4)c13)C(=O)O 118 | 117,CHEMBL1683934,2.16,1.0,O[C@@H](CNCCCCCCCCCN1CCC(CC1)OC(=O)Nc2ccccc2c3ccccc3)c4ccc(O)c5NC(=O)C=Cc45 119 | 118,CHEMBL1562923,2.5,1.0,Cc1ccc(cc1)n2c(C)cc(C(=O)NS(=O)(=O)c3ccc(C)cc3)c2C 120 | 119,CHEMBL219445,4.04,1.0,COc1cc2ncnc(N3CC[C@H](C3)Oc4cnc5ccccc5n4)c2cc1OC 121 | 120,CHEMBL1302214,0.98,1.0,Nc1ccc(cc1)S(=O)(=O)N2CCCC2 122 | 121,CHEMBL1345229,2.26,1.0,CC1CCN(CC1)C(=O)c2scc3OCCOc23 123 | 122,CHEMBL586702,3.04,1.0,FC(F)c1nc2ccccc2n1c3nc(nc(n3)N4CCOCC4)N5CCOCC5 124 | 123,CHEMBL445102,2.25,1.0,OC1(CCN(Cc2c[nH]c3ccccc23)CC1)c4ccc(Cl)cc4 125 | 124,CHEMBL1783285,2.75,1.0,COc1cc(c(OC)nn1)c2c(F)ccc3c(N)c(nnc23)C(=O)NC4CC4 126 | 125,CHEMBL414017,2.22,1.0,Nc1nc(N)c2cc(NCc3ccc(Cl)c(Cl)c3)ccc2n1 127 | 126,CHEMBL1684984,1.2,1.0,CS(=O)(=O)N1CCc2c1nc(nc2c3cnc(N)nc3)N4CCOCC4 128 | 127,CHEMBL473967,1.32,1.0,CC1(C)Oc2ncnc(N)c2N=C1c3ccc(cc3)[C@@H]4CC[C@@H](CC(=O)O)CC4 129 | 128,CHEMBL521851,3.3,1.0,CS(=O)(=O)N1CCN(Cc2cc3nc(nc(N4CCOCC4)c3s2)c5cccc6[nH]ncc56)CC1 130 | 129,CHEMBL446519,1.6,1.0,Nc1c(NC2CCOC2)nc(nc1N3CCOCC3)C#N 131 | 130,CHEMBL480618,1.75,1.0,COc1cc2c(Nc3ccc(F)cc3F)c(cnc2cc1N4CCN(C)CC4)C(=O)N 132 | 131,CHEMBL595,2.8,1.0,CCc1ccc(CCOc2ccc(CC3SC(=O)NC3=O)cc2)nc1 133 | 132,CHEMBL1952147,2.19,1.0,NS(=O)(=O)c1ccc(CCNc2ccc3nnc(c4ccccc4)n3n2)cc1 134 | 133,CHEMBL62746,0.36,1.0,NC(=N)c1ccc(cc1)C(=O)N2CCN(CC2)S(=O)(=O)c3ccc4cc(Br)ccc4c3 135 | 134,CHEMBL193240,3.95,1.0,FC(F)Oc1ccc(cc1OCC2CC2)C(=O)Nc3c(Cl)cncc3Cl 136 | 135,CHEMBL1818341,1.9,1.0,CC(C)CNCc1ccc(cc1)c2ccccc2S(=O)(=O)N3CCCC3 137 | 136,CHEMBL140642,3.0,1.0,CC(O)(C(=O)Nc1ccc2c(c1)c3ccccc3S2(=O)=O)C(F)(F)F 138 | 137,CHEMBL1972934,1.19,1.0,Nc1ncnc2sccc12 139 | 138,CHEMBL1329237,3.37,1.0,CCN(CC)c1ccc(cc1)c2nn3c(nnc3s2)c4[nH]nc5CCCc45 140 | 139,CHEMBL2087844,3.8,1.0,CN1C(=O)N(CC2CC2)c3nn(Cc4ccnc5ccc(Cl)cc45)c(c3C1=O)c6ncnn6C 141 | 140,CHEMBL263655,1.2,1.0,CN1SC(=NC1=O)NCc2ccccc2 142 | 141,CHEMBL2010843,2.5,1.0,CN(C1CCN(Cc2nc3CCCCc3s2)CC1)C(=O)Cc4ccc(cc4)n5cnnn5 143 | 142,CHEMBL2208437,1.21,1.0,CS(=O)(=O)c1ccccc1C(=O)NCC(O)CNC2CCN(Cc3ccc(Cl)c(Cl)c3)CC2 144 | 143,CHEMBL1870384,3.38,1.0,COc1ccc(C=NN=Cc2ccc(OC)c(OC)c2)cc1OC 145 | 144,CHEMBL1545465,2.42,1.0,COc1ccc(cc1)C(=O)C2CCN(CC2)C(=O)c3occc3 146 | 145,CHEMBL2110360,3.13,1.0,CCN(CC)CCOc1ccc(cc1OC)N(C)C(=O)c2ccc(cc2)c3ccc(cc3)C(F)(F)F 147 | 146,CHEMBL422329,2.16,1.0,NC(=N)Nc1cc(Cl)nc(n1)c2ccccc2 148 | 147,CHEMBL213029,1.68,1.0,COc1ccc2c(c1)c(CC(=O)O)c(C)n2c3ccnc4cc(Cl)ccc34 149 | 148,CHEMBL1950725,0.81,1.0,CN[C@@H](C)C(=O)N[C@@H](C1CCCCC1)C(=O)N[C@H]2CCN(C2)C(=O)Cc3ccccc3 150 | 149,CHEMBL216769,2.11,1.0,COc1cc2c(Nc3cc(CC(=O)Nc4cccc(F)c4)[nH]n3)ncnc2cc1OCCCN5CCC(CO)CC5 151 | 150,CHEMBL11144,2.22,1.0,CC(C)C(NC(=O)CN1C(=O)C(=CN=C1c2ccc(F)cc2)NC(=O)OCc3ccncc3)C(=O)C(F)(F)F 152 | 151,CHEMBL1477550,3.44,1.0,O=C(Nc1ccccc1)c2oc3ccccc3c2 153 | 152,CHEMBL254359,1.2,1.0,Nc1ccccc1NC(=O)c2ccc(cc2)N3CCOCC3 154 | 153,CHEMBL408194,2.55,1.0,COC1=CC(=N/C/1=C\c2[nH]c(C)cc2C)c3cc4ccccc4[nH]3 155 | 154,CHEMBL2035028,2.0,1.0,NC1(CCC1)c2ccc(cc2)c3ncc4ncccc4c3c5ccccc5 156 | 155,CHEMBL11428,2.87,1.0,CC(C)C(NC(=O)CN1C(=O)C(=CN=C1C2CCCCC2)NC(=O)OCc3ccccc3)C(=O)C(F)(F)F 157 | 156,CHEMBL1079772,2.65,1.0,Nc1ncnc2sc(nc12)c3c(ncn3C[C@H]4CCCO4)c5ccccc5 158 | 157,CHEMBL311862,2.7,1.0,COc1cc(ccc1Cn2ccc3ccc(NC(=O)OC4CCCC4)cc23)c5nn[nH]n5 159 | 158,CHEMBL288524,0.29,1.0,NC12CCC(CC1)(CC2)c3ccccc3 160 | 159,CHEMBL809,3.15,1.0,CN[C@H]1CC[C@@H](c2ccc(Cl)c(Cl)c2)c3ccccc13 161 | 160,CHEMBL1762539,2.61,1.0,CN(c1ccnc(Nc2cc(cc(c2)N3CCN(C)CC3)N4CCOCC4)n1)c5cc(CO)ccc5C 162 | 161,CHEMBL1201,3.16,1.0,CN(C)S(=O)(=O)c1ccc2Sc3ccccc3\C(=C\CCN4CCN(C)CC4)\c2c1 163 | 162,CHEMBL1449167,1.06,1.0,Nc1n[nH]c(Nc2ccccc2)n1 164 | 163,CHEMBL1940315,1.4,1.0,O[C@]1(CN2CCC1CC2)C#Cc3ccc(Oc4ccc(cc4)C(=O)NC5CCCS(=O)(=O)C5)cc3 165 | 164,CHEMBL464552,2.61,1.0,CNC(=O)c1c(F)cccc1Nc2nc(Nc3cc4N(CCc4cc3OC)C(=O)CN(C)C)nc5[nH]ccc25 166 | 165,CHEMBL2153184,2.61,1.0,CCCSc1nc(NC[C@@H]2CC[C@H](CC2)C(=O)O)ccc1C(=O)NC3CCCCC3 167 | 166,CHEMBL201307,2.89,1.0,COc1cc2c(Nc3ncc(CC(=O)Nc4cccc(F)c4)s3)ncnc2cc1OCCCN5CCC(CO)CC5 168 | 167,CHEMBL564607,2.48,1.0,OC(=O)c1ccc(cc1)c2ccc(Cl)c(c2)C(=O)NCC34CC5CC(CC(C5)C3)C4 169 | 168,CHEMBL313206,0.53,1.0,Cc1cc(N)c2cc(NC(=O)CCCCC(=O)Nc3ccc4nc(C)cc(N)c4c3)ccc2n1 170 | 169,CHEMBL2062555,2.3,1.0,Nc1cc(nc2c(cnn12)C#N)c3cccs3 171 | 170,CHEMBL191974,4.0,1.0,Oc1ccc(cc1)c2ccc3cc(O)ccc3c2 172 | 171,CHEMBL562310,3.28,1.0,O=C(Nc1cccc2cccnc12)c3ccc(cc3)N4C(=O)[C@@H]5[C@H]6C[C@H](C=C6)[C@@H]5C4=O 173 | 172,CHEMBL1809223,3.5,1.0,COc1cc(ccc1Nc2ncc(Cl)c(n2)c3cnc4cc(ccn34)N5CCCC5)N6CCN(CC6)C(=O)C 174 | 173,CHEMBL2147569,3.8,1.0,C[C@]12Cc3cnn(c3C=C1CC[C@@]2(O)CCc4ccc(F)cc4C(=O)N)c5ccc(F)cc5 175 | 174,CHEMBL456381,3.7,1.0,Oc1ccc(cc1)c2csc(n2)c3ccc(O)cc3 176 | 175,CHEMBL1508448,-0.2,1.0,CC(C)(C)c1cc(O)n2nc(N)nc2n1 177 | 176,CHEMBL2087421,-0.4,1.0,COc1cc(C)c(c(C)c1)S(=O)(=O)N(C)CCOCC(=O)N2CCN(CC2)C3CCN(C)CC3 178 | 177,CHEMBL2107366,2.9,1.0,C[C@H](C1=CNC(=S)N1)c2cccc(C)c2C 179 | 178,CHEMBL472455,3.09,1.0,COc1ccc(N(C(C(=O)NC2CCCC2)c3ccccc3F)C(=O)c4occc4)c(OC)c1 180 | 179,CHEMBL1762541,3.0,1.0,CN(c1ccnc(Nc2cc(CN3CCOCC3)cc(c2)N4CCOCC4)n1)c5cc(CO)ccc5C 181 | 180,CHEMBL409494,2.76,1.0,Nc1ccccc1NC(=O)c2ccc(cc2)C3CCN(CCC(=O)Nc4ccccc4F)CC3 182 | 181,CHEMBL2048864,0.68,1.0,Clc1cccc(\C=C\2/SC(=O)NC2=O)c1N3CCNCC3 183 | 182,CHEMBL2335905,3.02,1.0,C[C@@H](NC1=CC(=O)NCC1)c2ccc(Nc3ncc4cc(ccc4n3)c5ccncc5)cc2 184 | 183,CHEMBL358325,2.58,1.0,CN1CCN(CC1)c2nc(CCOc3ccc(C[C@H](Nc4ccccc4C(=O)c5ccccc5)C(=O)O)cc3)c(C)s2 185 | 184,CHEMBL1835918,2.62,1.0,Cc1onc(n1)c2ccc(OCC3CN(C3)c4ccc(C)nn4)cc2 186 | 185,CHEMBL2104625,0.67,1.0,O=C(Nc1nnn[nH]1)C2=CC(=C3C=CC=CN3C2=O)Oc4ccccc4 187 | 186,CHEMBL2062570,3.57,1.0,CC(=O)Nc1ccc2ccn(c3cc(Nc4ccn(C)n4)n5ncc(C#N)c5n3)c2c1 188 | 187,CHEMBL652,1.13,1.0,FC(F)(F)COc1ccc(OCC(F)(F)F)c(c1)C(=O)NCC2CCCCN2 189 | 188,CHEMBL480601,3.84,1.0,CC1(C)[C@@H]2CC[C@@]1(CS(=O)(=O)NC3CCN(CC3)c4ccc(cn4)C(F)(F)F)C(=O)C2 190 | 189,CHEMBL2312933,2.6,1.0,CCCN(c1cccnc1)P(=O)(c2ccccc2)c3ccccc3 191 | 190,CHEMBL2104196,1.7,1.0,NS(=O)(=O)c1ccc(NCc2ccccc2)cc1 192 | 191,CHEMBL1947153,1.99,1.0,O[C@@H](CNCCc1cccc(CN2CCC(CC2)c3ccccc3)c1)c4ccc(O)c5NC(=O)Sc45 193 | 192,CHEMBL1201203,1.49,1.0,CN1[C@@H]2CC[C@H]1C[C@H](C2)OC(c3ccccc3)c4ccccc4 194 | 193,CHEMBL66953,2.03,1.0,OC1=CC=Cc2cc(O)c(O)c(O)c2C1=O 195 | 194,CHEMBL1824832,2.94,1.0,CCOc1ccc(Oc2ccc(cc2)S(=O)(=O)NC(=O)c3cccc(c3)c4ccc(Cl)c(Cl)c4)cc1 196 | 195,CHEMBL279053,3.92,1.0,CCCc1c(OCCCSc2ccc(CC(=O)O)cc2Cl)ccc3c(CC)noc13 197 | 196,CHEMBL1950864,1.64,1.0,CN[C@@H](C)C(=O)N[C@@H](C1CCCCC1)C(=O)N[C@H]2C[C@@H]3CC[C@H]2N(CCc4ccccc4)C3 198 | 197,CHEMBL2164377,3.9,1.0,FC(F)(F)Oc1ccc(CNC(=O)C2N(CC3CC(F)(F)C3)C(=O)c4ccccc24)cc1 199 | 198,CHEMBL318021,2.4,1.0,CC(CCc1cccc(OCc2ccc3ccccc3n2)c1)CC(=O)O 200 | 199,CHEMBL1730860,3.3,1.0,Oc1ccc(cc1)c2cnc3ccccc3n2 201 | 200,CHEMBL1762537,4.2,1.0,CN(c1ccnc(Nc2cc(cc(c2)N3CCOCC3)N4CCCCC4)n1)c5cc(CO)ccc5C 202 | 201,CHEMBL1559397,-0.8,1.0,OC(=O)c1cc(nc2ccc(F)cc12)c3ccncc3 203 | 202,CHEMBL440464,0.5,1.0,CN1CCCC1c2cccnc2 204 | 203,CHEMBL128514,3.5,1.0,CNC(=O)c1ccc(CCC(COc2ccc(cc2)c3cccc(c3)[N+](=O)[O-])N4C(=O)CSC4=O)cc1 205 | 204,CHEMBL605785,2.98,1.0,COC1(CCOCC1)c2ccc(NC(=O)C3=CC(=O)c4cc(F)cc(c4O3)c5c(C)nn(C)c5C)cn2 206 | 205,CHEMBL1430943,3.44,1.0,CCCS(=O)(=O)N1CCCC(C1)C(=O)N2CCC(Cc3ccccc3)CC2 207 | 206,CHEMBL251202,2.65,1.0,COc1cccc(c1)C(=O)N2CCC(CC2)N3C(=O)Nc4ccccc34 208 | 207,CHEMBL272282,1.63,1.0,CN1C(=N)N(CC(=O)c2ccc(Cl)cc2)c3ccccc13 209 | 208,CHEMBL1574150,4.36,1.0,O=C(NC1CCCC1)C(N(Cc2occc2)C(=O)c3ccc([nH]3)c4ccccc4)c5ccncc5 210 | 209,CHEMBL1946026,2.75,1.0,O=C(N1CCCC1)c2ccc(cc2)C(=C3CCN(Cc4cscn4)CC3)c5cccc6cccnc56 211 | 210,CHEMBL376813,2.7,1.0,COc1cc(N2CC[C@H](C2)N(C)C)c3NC(=CC(=O)c3c1)C(=O)Nc4ccc(cc4)N5CCOCC5 212 | 211,CHEMBL230641,4.21,1.0,CC(C)c1ccccc1Nc2nc(SCc3cccs3)n[nH]2 213 | 212,CHEMBL1831130,1.55,1.0,COc1ccc2nc(C)cc(N3CC(CNC(=O)C4CC4)OC3=O)c2c1 214 | 213,CHEMBL107920,3.84,1.0,Fc1cc(F)cc(c1)c2cc(on2)N(CCCN3CCCCCC3)Cc4ccc5OCOc5c4 215 | 214,CHEMBL2062566,3.3,1.0,CC(=O)Nc1ccc2CCN(c3cc(NC4CC4)n5ncc(C#N)c5n3)c2c1 216 | 215,CHEMBL2172001,1.93,1.0,C[C@@H]1CN(CCN1C(=O)[C@@H]2CCCC[C@H]2C(=O)NC3(CC3)C#N)c4ccc5c(C)nnc(C)c5c4 217 | 216,CHEMBL453053,1.22,1.0,NC(=O)Nc1sc(cc1C(=O)N)c2ccc(CNC3CCCC3)cc2 218 | 217,CHEMBL2062571,2.24,1.0,CC(=O)Nc1ccc2ccn(c3cc(NC4COC4)n5ncc(C#N)c5n3)c2c1 219 | 218,CHEMBL2110732,4.1,1.0,COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1NC(=O)\C=C\CN4CCCCC4 220 | 220,CHEMBL1945294,-0.28,1.0,O[C@@H](CNCCc1cccc(CN2CCCC2)c1)c3ccc(O)c4NC(=O)Sc34 221 | 221,CHEMBL2152918,3.4,1.0,CO[C@@H]1CC[C@@]2(CC1)Cc3ccc(cc3C24N=C(C)C(=N4)N)c5cncc(Br)c5 222 | 222,CHEMBL259850,2.9,1.0,COc1ccc(CNC(=O)Nc2ncc(s2)[N+](=O)[O-])cc1 223 | 223,CHEMBL2164668,3.01,1.0,O=C(NC1(CC1)C#N)[C@@H]2CCCC[C@H]2C(=O)N3CCc4oc5ccccc5c4C3 224 | 224,CHEMBL440335,4.5,1.0,[O-]c1c2c(nn1c3ccc(Cl)cc3)c4ccccc4c[n+]2Cc5ccccc5 225 | 225,CHEMBL1491421,1.9,1.0,CC(C)(C)c1cc2nc3C(=O)N(Cc4ccccc4)Cc3c(O)n2n1 226 | 226,CHEMBL488881,4.2,1.0,O=C1C=C(Oc2c1cccc2c3cccc(c3)c4ccsc4)N5CCOCC5 227 | 227,CHEMBL2070947,1.49,1.0,CS(=O)(=O)c1ccc2nc(sc2c1)N3CCN(CC3)C(=O)[C@@H]4CCCC[C@H]4C(=O)NC5(CC5)C#N 228 | 228,CHEMBL10975,2.02,1.0,CC(C)C(NC(=O)CN1C(=O)C(=CN=C1c2cccs2)NC(=O)OCc3ccccc3)C(=O)C(F)(F)F 229 | 229,CHEMBL452530,1.2,1.0,NC(=O)Nc1sc(cc1C(=O)N)c2ccc(OCCN3CCCCC3)cc2 230 | 230,CHEMBL1346348,3.16,1.0,O=C(N1CCCCC1)c2scc3CCCCc23 231 | 231,CHEMBL1914480,0.83,1.0,COc1ccc2N=CC(=O)N(CCN3CCC(CC3)NCc4cc5OCCOc5cn4)c2n1 232 | 232,CHEMBL1956334,3.01,1.0,CC(=O)c1ncccc1NC(=O)[C@@H]2CC[C@H](CC2)N3C(=O)[C@@H]4[C@@H]5CC[C@@H](C5)[C@@H]4C3=O 233 | 233,CHEMBL621,2.67,1.0,Clc1cccc(c1)N2CCN(CCCN3N=C4C=CC=CN4C3=O)CC2 234 | 234,CHEMBL14516,2.0,1.0,CC(C)C(NC(=O)CN1C(=O)C(=CC=C1c2ccccc2)NC(=O)CN3CCOCC3)C(=O)C(F)(F)F 235 | 235,CHEMBL1682836,3.1,1.0,CN1CCOc2cc(COC3CCCCC3)cnc12 236 | 236,CHEMBL481611,2.9,1.0,CC(C)(F)C[C@H](N[C@@H](c1ccc(cc1)c2ccc(cc2)S(=O)(=O)C)C(F)(F)F)C(=O)NC3(CC3)C#N 237 | 237,CHEMBL2208432,4.46,1.0,O[C@H](CNC(=O)N1C(=O)Nc2ccccc12)CN3CCC(CC3)Oc4ccc(Cl)c(Cl)c4 238 | 238,CHEMBL260091,2.03,1.0,C[C@@H](O)[C@H](NC(=O)c1ccc(cc1)C#Cc2ccc(CN3CCOCC3)cc2)C(=O)NO 239 | 239,CHEMBL1917462,0.92,1.0,Cc1c(Sc2ccc(Cl)cc2)c3c(Cl)nccc3n1CC(=O)O 240 | 240,CHEMBL1950722,1.65,1.0,CN[C@@H](C)C(=O)N[C@@H](C1CCCCC1)C(=O)N[C@H]2CCN(CCc3ccccc3)C2 241 | 241,CHEMBL1721498,2.72,1.0,Clc1ccc(cc1)C(=O)C2CCN(CC2)C(=O)C3CC3 242 | 242,CHEMBL280713,2.1,1.0,OC(=O)c1ccccc1Cn2nnc(n2)c3cccc(OCc4ccc5ccccc5n4)c3 243 | 243,CHEMBL1073,0.31,1.0,Cc1cnc(cn1)C(=O)NCCc2ccc(cc2)S(=O)(=O)NC(=O)NC3CCCCC3 244 | 244,CHEMBL2151323,2.41,1.0,CC(C)Oc1cc(n[nH]1)n2cnc3cnc(N[C@@H](C)c4ncc(F)cn4)nc23 245 | 245,CHEMBL2333941,0.8,1.0,C[C@]1(CCSC(=N1)N)c2cc(c(F)cc2F)c3cncnc3 246 | 246,CHEMBL1916086,1.9,1.0,Cc1ccc(cc1)S(=O)(=O)Nc2c(cnn2c3ccccc3)C(=O)NCc4ccccc4 247 | 247,CHEMBL1592130,4.2,1.0,c1ccc(cc1)c2nnc(s2)c3ccccc3 248 | 248,CHEMBL521809,2.13,1.0,O=C(N1CCN(CC1)c2ncccn2)c3cccc(CC4=NNC(=O)c5ccccc45)c3 249 | 249,CHEMBL1807855,0.47,1.0,Nc1nc(OCc2nccs2)nc3c1ncn3[C@@H]4O[C@H](CF)[C@@H](O)[C@H]4O 250 | 250,CHEMBL301265,3.15,1.0,CCCN[C@H]1CCc2nc(N)sc2C1 251 | 251,CHEMBL235579,2.18,1.0,Clc1ccc(O[C@H]2CCCNC2)cc1C(=O)NCC34CC5CC(CC(C5)C3)C4 252 | 252,CHEMBL1094250,3.48,1.0,C[C@@H](CNC(=O)c1c(O)c(O)cc2c(O)c(c(C)cc12)c3c(C)cc4c(C(=O)NC[C@H](C)c5ccccc5)c(O)c(O)cc4c3O)c6ccccc6 253 | 253,CHEMBL2107729,3.02,1.0,Clc1ccc(NS(=O)(=O)c2ccc(Cl)s2)c(c1)C(=O)Nc3ccc(cc3)S(=O)(=O)N4CCOCC4 254 | 254,CHEMBL1608462,3.8,1.0,CC(C)n1ncc2c(cc(nc12)C3CC3)C(=O)NCc4c(C)cc(C)nc4O 255 | 255,CHEMBL1762157,3.73,1.0,CC1(C)N(Cc2ccnc(c2)N3CCOCC3)C(=O)N(C1=O)c4ccc(SC(F)(F)F)cc4 256 | 256,CHEMBL1190521,1.89,1.0,CCOC(=O)C1(CCCN(CC2CC2)C1)c3cccc(O)c3 257 | 257,CHEMBL1382733,1.2,1.0,Cn1c(nc2ccccc12)c3ccc(O)nc3 258 | 258,CHEMBL193757,0.81,1.0,CC(C)Oc1cc(OCCc2cccnc2)cc(c1)C(=O)Nc3ccc(cn3)C(=O)O 259 | 259,CHEMBL1290746,2.6,1.0,Cc1ccc(cc1NC(=O)c2cnn(c2N)c3ccccc3F)C(=O)Nc4ccon4 260 | 260,CHEMBL1703518,3.9,1.0,CCN(CC)S(=O)(=O)c1ccc(cc1)c2oc(SCC(=O)N3CCc4ccccc34)nn2 261 | 261,CHEMBL2086691,3.1,1.0,C[C@@H]1CN(CCN1c2ncc(OCc3ccncc3C#N)cn2)c4noc(n4)C5CC5 262 | 262,CHEMBL2029359,1.92,1.0,CS(=O)(=O)Cc1cc(nc(n1)c2cnc3[nH]ccc3c2)N4CCOCC4 263 | 263,CHEMBL193646,0.73,1.0,C(CNCc1ccccc1)NCc2ccccc2 264 | 264,CHEMBL1684,1.78,1.0,NS(=O)(=O)c1cc2c(NC(Cc3ccccc3)NS2(=O)=O)cc1C(F)(F)F 265 | 265,CHEMBL184659,2.7,1.0,COc1cc2ncnc(Nc3cc(NC(=O)c4ccnc(c4)N5CCOCC5)ccc3C)c2cc1OCCN6CCCC6 266 | 266,CHEMBL1682824,3.2,1.0,O=C1CCOc2nc(\C=C\c3ccccc3)ccc12 267 | 267,CHEMBL2216859,3.49,1.0,CC(C)N1CCN(Cc2oc(nc2)c3cc(cc4[nH]ncc34)c5cccc6[nH]ccc56)CC1 268 | 268,CHEMBL1889452,2.11,1.0,CC1N(C(=O)c2ccccc2)c3ccccc3NC1=O 269 | 269,CHEMBL2208429,2.95,1.0,O[C@H](CNC(=O)c1n[nH]c2ccccc12)CN3CCC(CC3)Oc4ccc(Cl)c(Cl)c4 270 | 270,CHEMBL2325983,3.4,1.0,NC1(CCN(CC1)c2ncnc3[nH]ccc23)C(=O)NC(C4CC4)c5ccc(Cl)cc5 271 | 271,CHEMBL387125,2.23,1.0,CON(C)C(=O)c1c(Cn2c(C)nc(Cl)c2Cl)sc3N(CC(C)C)C(=O)N(C)C(=O)c13 272 | 272,CHEMBL2036576,2.36,1.0,CN1CCN(CC1)c2ccc(NC(=O)c3oc(Nc4ccccc4F)nn3)cn2 273 | 273,CHEMBL2177165,2.83,1.0,Nc1ncc(nc1C(=O)Nc2cccnc2)c3ccc(cc3)S(=O)(=O)N4CCOCC4 274 | 274,CHEMBL1604571,-0.2,1.0,CCOC(=O)C(CCc1ccccc1)NC2CCc3ccccc3N(CC(=O)O)C2=O 275 | 275,CHEMBL1770477,4.23,1.0,CCOc1ccc2oc(C(=O)NC(CCSC)c3nc4ncccc4[nH]3)c(C)c2c1 276 | 276,CHEMBL741,1.38,1.0,Nc1nnc(c(N)n1)c2cccc(Cl)c2Cl 277 | 277,CHEMBL2036739,3.72,1.0,Clc1ccc(cc1)C2(CCCC2)C(=O)Nc3oc(nn3)C(=O)Nc4ccc(cc4)N5CCOCC5 278 | 278,CHEMBL2346972,2.8,1.0,Fc1ccc(CN2CCN(CC2)C3=Nn4c(CC3)nnc4C(F)(F)F)cc1 279 | 279,CHEMBL216981,2.32,1.0,CC[C@@H](NC1=C(Nc2cccc(C(=O)N(C)C)c2O)C(=O)C1=O)c3oc(C)cc3 280 | 280,CHEMBL1783119,3.5,1.0,COc1ccc(cc1)C(=O)NCc2cn(nn2)c3cc(C)nc4ccc(OC)cc34 281 | 281,CHEMBL585,1.16,1.0,Nc1nc(N)c2nc(c(N)nc2n1)c3ccccc3 282 | 282,CHEMBL335744,2.7,1.0,CC(C)C(=O)Nc1nc(cc2ccccc12)c3ccccn3 283 | 283,CHEMBL474432,0.84,1.0,COc1cc2ncn(c3cc(OCc4ccccc4C(F)(F)F)c(s3)C(=O)O)c2cc1OC 284 | 284,CHEMBL1886726,1.73,1.0,CC1CN(C(=O)c2ccccc2)c3ccccc3NC1=O 285 | 285,CHEMBL575448,3.16,1.0,C[C@]1(CCCN1c2nc(Nc3cc([nH]n3)C4CC4)c5cccn5n2)C(=O)Nc6ccc(F)nc6 286 | 286,CHEMBL2057372,0.96,1.0,CC(C)(C)NS(=O)(=O)c1cncc(c1)c2ccc3nc(NC(=O)NCC(=O)N4CCOCC4)nn3c2 287 | 287,CHEMBL499028,0.91,1.0,COc1ccccc1c2nnc(N)[nH]2 288 | 288,CHEMBL654,2.82,1.0,CN1CCN2C(C1)c3ccccc3Cc4cccnc24 289 | 289,CHEMBL1682822,3.0,1.0,O=C1CCOc2nc(ccc12)C#CC3CCCC3 290 | 290,CHEMBL250961,4.0,1.0,Cc1ccc(F)cc1S(=O)(=O)N[C@@H]2CCN(Cc3ccc(cc3)c4ccccc4)C2 291 | 291,CHEMBL1068,1.28,1.0,NC(=O)N1c2ccccc2CC(=O)c3ccccc13 292 | 292,CHEMBL2142704,0.56,1.0,CC1CC(=O)Nc2ccccc2N1 293 | 293,CHEMBL1359777,3.46,1.0,Cc1cc(C(=O)CN2C=C(C=CC2=O)C(F)(F)F)c(C)n1Cc3ccccc3 294 | 294,CHEMBL2062561,2.97,1.0,COc1ccc(cc1)c2cc(Nc3ccn(C)n3)n4ncc(C#N)c4n2 295 | 295,CHEMBL2147033,3.32,1.0,O=C1COC2(CCN(CC2)S(=O)(=O)c3ccc(cc3)c4ccc5cnccc5c4)CN1C6CC6 296 | 296,CHEMBL1789941,2.55,1.0,N#CC[C@H](C1CCCC1)n2cc(cn2)c3ncnc4[nH]ccc34 297 | 297,CHEMBL418822,3.05,1.0,CC(C)C(NC(=O)CN1C(=O)C(=CC=C1c2cccnc2)NC(=O)OCc3ccccc3)C(=O)C(F)(F)F 298 | 298,CHEMBL379787,3.3,1.0,[O-][N+](=O)c1cccc(c1)C(=O)Nc2nc3ccccc3n2CCN4CCOCC4 299 | 299,CHEMBL1770471,3.69,1.0,CSCCC(NC(=O)c1sccc1Cl)c2nc3ccccc3[nH]2 300 | 300,CHEMBL2035018,3.3,1.0,NC1(CCC1)c2ccc(cc2)c3nn4cccc4cc3c5ccccc5 301 | 301,CHEMBL1371,2.16,1.0,Clc1ccc2OC(=O)Nc2c1 302 | 302,CHEMBL1682825,2.5,1.0,O=C1CCOc2nc(CCc3ccccc3)ccc12 303 | 303,CHEMBL2041980,2.4,1.0,CS(=O)(=O)c1ccc(cc1)c2cnc(N)c(c2)c3ccc(nc3)C(F)(F)F 304 | 304,CHEMBL1916532,1.46,1.0,COc1ccc2ncc(F)c(CCN3CCC(CC3)NCc4cc5OCCOc5cn4)c2n1 305 | 305,CHEMBL119385,2.74,1.0,Fc1ccc(SC2=NN3C=NC(=O)C(=C3C=C2)c4c(Cl)cccc4Cl)c(F)c1 306 | 306,CHEMBL1934423,0.78,1.0,C[C@H](NC(=O)c1c(C)nn(C2CCOC2)c1NS(=O)(=O)c3ccc(C)cc3)C(C)(C)C 307 | 307,CHEMBL1467,-0.42,1.0,O=C1NC=Nc2[nH]ncc12 308 | 308,CHEMBL1213085,3.55,1.0,Cc1ccccc1N2C(=Nc3cccc(C)c3C2=O)Cn4nc(c5ccc(O)c(F)c5)c6c(N)ncnc46 309 | 309,CHEMBL976,2.43,1.0,O=C(C1CCCCC1)N2CC3N(CCc4ccccc34)C(=O)C2 310 | 310,CHEMBL1607641,0.97,1.0,Cc1nnsc1C(=O)Nc2ccccc2 311 | 311,CHEMBL237616,0.75,1.0,O=C(COc1ccccc1)N2CCOCC2 312 | 312,CHEMBL1559730,3.2,1.0,CN1CCN(CC1)c2ncnc3c2sc4nc(C)cc(C)c34 313 | 313,CHEMBL74355,1.7,1.0,COc1cccc([C@H](O)C2CCN(CCc3ccc(F)cc3)CC2)c1OC 314 | 314,CHEMBL1975242,1.94,1.0,O=C(Nc1nc2C(=O)NC=Nc2s1)c3ccccc3 315 | 315,CHEMBL75880,3.02,1.0,C(C(C1CCCCC1)C2CCCCC2)C3CCCCN3 316 | 316,CHEMBL813,-0.9,1.0,CCCCc1ncc(\C=C(/Cc2cccs2)\C(=O)O)n1Cc3ccc(cc3)C(=O)O 317 | 317,CHEMBL1180288,0.7,1.0,Oc1c2C(=O)N(NC(=O)c2nc3cc(Cl)ccc13)C(C4CC4)c5ccncc5 318 | 318,CHEMBL182150,3.13,1.0,CN(C)c1nc(N[C@@H]2CC[C@@H](CC2)NC(=O)c3ccc(F)c(F)c3)nc4ccccc14 319 | 319,CHEMBL2164670,3.57,1.0,COc1ccccc1C(NC(=O)[C@@H]2CCCC[C@H]2C(=O)N3CCN(Cc4ccc(F)cc4)CC3)C#N 320 | 320,CHEMBL409493,2.31,1.0,Nc1ccccc1NC(=O)c2ccc(cc2)C3CCN(Cc4ccc(cc4)C(=O)NC5CC5)CC3 321 | 321,CHEMBL142715,3.3,1.0,C(CN1CCCCC1)C2CCc3cc(OCc4ccc(cc4)c5ccccc5)ccc3C2 322 | 322,CHEMBL205807,2.35,1.0,CC(C)CN1C(=O)N(C)C(=O)c2c1sc(Cc3c[nH]c4ccccc34)c2C(=O)N5CC[C@@H](O)C5 323 | 323,CHEMBL1165028,3.3,1.0,O=C(NCCSCc1ccccc1)c2ccccc2 324 | 324,CHEMBL1950724,0.82,1.0,CN[C@@H](C)C(=O)N[C@@H](C1CCCCC1)C(=O)N[C@H]2CCN(C2)C(=O)c3ccccc3 325 | 325,CHEMBL306262,3.2,1.0,CCCCCC[C@H]1OC(=O)CNC(=O)[C@H](NC(=O)[C@H](CO)NC(=O)[C@@H](NC(=O)[C@H](CC(C)C)N(C)C(=O)[C@@H]1C)[C@H](C)CC)[C@H](C)O 326 | 326,CHEMBL366080,1.8,1.0,CN(C1CCN(C)CC1)c2nccc(Nc3cc(NC(=O)c4ccnc(c4)N5CCOCC5)ccc3C)n2 327 | 327,CHEMBL1917461,0.35,1.0,Cc1ccc2c(n1)c(Sc3ccc(Cl)cc3)c(C)n2CC(=O)O 328 | 328,CHEMBL14690,1.13,1.0,Clc1ccc(CNC(=N)SCCCc2c[nH]cn2)cc1 329 | 329,CHEMBL551765,2.53,1.0,OC(=O)C1CCN(CC1)c2ncccc2c3ccc(Cl)c(c3)C(=O)NCC4CCCCCC4 330 | 330,CHEMBL204476,1.98,1.0,CC(C)CN1C(=O)N(C)C(=O)c2c1sc(Cc3cccc4ncccc34)c2C(=O)N5CC[C@@H](O)C5 331 | 331,CHEMBL1180299,1.1,1.0,Clc1ccc2C(=O)C3=C(Nc2c1)C(=O)NN(Cc4ccccn4)C3=O 332 | 332,CHEMBL1515447,2.36,1.0,CN1CCCN(CC1)C(c2ccccc2)c3ccc(Cl)cc3 333 | 333,CHEMBL1628691,3.62,1.0,Clc1cccc(c1Cl)n2nnnc2NCc3ccccc3Oc4ccccn4 334 | 334,CHEMBL1689115,1.01,1.0,OC(=O)COc1ccc(Cl)cc1CN2CCCN(CC2)S(=O)(=O)Cc3ccccc3 335 | 335,CHEMBL1623,3.39,1.0,Cc1cccc(CN2CCN(CC2)C(c3ccccc3)c4ccc(Cl)cc4)c1 336 | 336,CHEMBL1947155,2.28,1.0,C[C@@H](NCc1ccccc1c2ccc(CCNC[C@H](O)c3ccc(O)c4NC(=O)Sc34)cc2)c5ccccc5 337 | 337,CHEMBL1987793,3.89,1.0,Cc1ccccc1c2c(C(=O)O)n(CCCOc3cccc4ccccc34)c5ccccc25 338 | 338,CHEMBL1076211,1.65,1.0,CC(CN1CCCCC1)C(=O)c2ccc(C)cc2 339 | 339,CHEMBL1471868,3.19,1.0,Fc1ccc(NC(=O)c2cccnc2Oc3ccccc3)cc1 340 | 340,CHEMBL235581,1.49,1.0,Clc1ccc(CC2CCNCC2)cc1C(=O)NCC34CC5CC(CC(C5)C3)C4 341 | 341,CHEMBL316388,1.5,1.0,C[C@@H]1CN([C@@H](C)CN1C(=O)c2ccc(cc2)C#N)C(=O)[C@@](C)(O)C(F)(F)F 342 | 342,CHEMBL2035030,2.2,1.0,NC1(CCC1)c2ccc(cc2)c3ncc4ccncc4c3c5ccccc5 343 | 343,CHEMBL401193,4.13,1.0,CN(C)C(=O)CN1CCN(CCc2c([nH]c3sc(cc23)C(C)(C)C(=O)N4C5CCC4CC5)c6cc(C)cc(C)c6)CC1 344 | 344,CHEMBL1480987,2.31,1.0,CC1CC(CC(C)(C)C1)OC(=O)C(O)c2ccccc2 345 | 345,CHEMBL2181927,2.51,1.0,CCOc1ncc(C)c2NC(=C([C@@H](c3ccc(cc3OC)C#N)c12)C(=O)N)C 346 | 346,CHEMBL1233655,0.55,1.0,CC(=O)Nc1nc(C)c(s1)c2cnc(F)c(NS(=O)(=O)c3sc(C)nc3C)c2 347 | 347,CHEMBL150606,3.2,1.0,COc1cc2c(Nc3ccc(Cl)cc3F)ncnc2cc1OCCn4cncn4 348 | 348,CHEMBL87388,3.52,1.0,COc1ccc(Cn2cc3N(CC(C)C)C(=O)N(C)C(=O)c3c2)cc1 349 | 349,CHEMBL256576,2.82,1.0,C[C@@H](Oc1cccc2ncnc(Nc3ccc4c(cnn4Cc5ccccn5)c3)c12)C(=O)N6CCOCC6 350 | 350,CHEMBL2141746,1.55,1.0,CCCNC(=O)CSc1ccc(cn1)S(=O)(=O)N2CCCC2 351 | 351,CHEMBL1732699,3.0,1.0,FC(F)(F)c1nnc2ccc(nn12)N3CCCCC3 352 | 352,CHEMBL1332736,1.3,1.0,O=C(Oc1ccccc1)N2CCOCC2 353 | 353,CHEMBL1779497,1.26,1.0,Clc1ccc(cc1C(=O)NCC2CCCCC2)N3N=CC(=O)NC3=O 354 | 354,CHEMBL205245,1.61,1.0,CC(C)CN1C(=O)N(C)C(=O)c2c1sc(Cn3c(C)nc(Cl)c3Cl)c2C(=O)N4CC[C@@H](O)C4 355 | 355,CHEMBL370252,0.74,1.0,CC1CCc2cc(F)cc3C(=O)C(=CN1c23)C(=O)O 356 | 356,CHEMBL1940108,3.11,1.0,COc1cccc(NC(=O)Cn2cc(Oc3ncnc4cc(OC)c(OC)cc34)cn2)c1 357 | 357,CHEMBL1481866,1.7,1.0,NC(=O)c1ccc(Oc2cccc3cccnc23)c(c1)[N+](=O)[O-] 358 | 358,CHEMBL1909734,2.68,1.0,Clc1ccccc1c2cnn[nH]2 359 | 359,CHEMBL551360,2.62,1.0,OCC(CO)N1C=Cc2c(NC(=O)CC34CC5CC(CC(C5)C3)C4)cccc2C1=O 360 | 360,CHEMBL1945035,1.58,1.0,O[C@@H](CNCCc1ccccc1CNCCc2ccccc2F)c3ccc(O)c4NC(=O)Sc34 361 | 361,CHEMBL1290331,3.07,1.0,Clc1ccc(N2CCN(CC2)C(=O)COCc3cscn3)c(Cl)c1 362 | 362,CHEMBL1086580,3.07,1.0,Oc1nc(nc2CCSCc12)c3ccc(cc3)C(F)(F)F 363 | 363,CHEMBL197536,1.0,1.0,Oc1ccc2C(=O)C=C(Oc2c1)N3CCOCC3 364 | 365,CHEMBL2178393,2.0,1.0,Nc1nnc(CCSCCc2nnc(NC(=O)Cc3ccccc3)s2)s1 365 | 366,CHEMBL90531,3.2,1.0,[O-][N+](=O)c1ccc2OC(CN(c2c1)c3cccc[n+]3[O-])(C(F)F)C(F)F 366 | 367,CHEMBL271023,0.79,1.0,[O-][S+](c1ccccc1)c2ccc3nnnn3n2 367 | 368,CHEMBL144960,3.33,1.0,O=C1C=COc2cc(OCc3ccccc3)ccc12 368 | 369,CHEMBL373250,2.4,1.0,Cn1ncnc1COc2nn3c(nnc3cc2C(C)(C)C)c4cc(F)ccc4F 369 | 370,CHEMBL42,3.08,1.0,CN1CCN(CC1)C2=Nc3cc(Cl)ccc3Nc4ccccc24 370 | 371,CHEMBL470915,3.53,1.0,Nc1c(NC2CC3CCC2C3)nc(nc1N4CCOCC4)C#N 371 | 372,CHEMBL1962631,-0.56,1.0,NC(=N)NC(=O)c1nc(Cl)c(NCc2ccccn2)nc1N 372 | 373,CHEMBL181502,3.2,1.0,CN(C1CCN(CCC(c2ccccc2)c3ccccc3)CC1)C(=O)C4CCC4 373 | 374,CHEMBL1499868,4.48,1.0,CC(C(Cc1nc2ccccc2[nH]1)c3nc4ccccc4[nH]3)c5ccccc5 374 | 375,CHEMBL1079175,2.5,1.0,NC1(CCC1)c2ccc(cc2)c3nc4C=CN5C(=O)NN=C5c4cc3c6ccccc6 375 | 376,CHEMBL404957,1.11,1.0,CC(C)C[C@H](N)c1oc(nn1)S(=O)(=O)Cc2ccc(F)cc2 376 | 377,CHEMBL1617016,3.12,1.0,Nc1c2CCN(c3ccccc3)c2nc4ccc(Br)cc14 377 | 378,CHEMBL1392670,4.4,1.0,Oc1ccccc1OC(=O)c2cccc3ccccc23 378 | 379,CHEMBL2208436,3.09,1.0,O[C@@H](CNC1CCN(Cc2ccc(Cl)c(Cl)c2)CC1)COc3cccc(c3)C#N 379 | 380,CHEMBL1712280,4.28,1.0,C1Oc2ccccc2C3=NN(CC13)c4ccccc4 380 | 381,CHEMBL284275,0.2,1.0,COc1ccc(Cc2c(N)n[nH]c2N)cc1 381 | 382,CHEMBL2216830,2.04,1.0,CCN(CC)CCCCNc1ncc2CN(C(=O)N(Cc3cccc(NC(=O)C=C)c3)c2n1)c4c(Cl)c(OC)cc(OC)c4Cl 382 | 383,CHEMBL2171169,2.2,1.0,CC(C)N(CCCNC(=O)Nc1ccc(cc1)C(C)(C)C)C[C@H]2O[C@H]([C@H](O)[C@@H]2O)n3cnc4c(N)ncnc34 383 | 384,CHEMBL1852688,3.7,1.0,CCN1CCN(CC1)c2ccc(Nc3cc(ncn3)N(C)C(=O)Nc4c(Cl)c(OC)cc(OC)c4Cl)cc2 384 | 385,CHEMBL15245,1.65,1.0,COC(=O)[C@H]1[C@@H](O)CC[C@H]2CN3CCc4c([nH]c5ccccc45)[C@@H]3C[C@H]12 385 | 386,CHEMBL143305,3.72,1.0,Cc1cc(CCCOc2c(Cl)cc(cc2Cl)C3=NCCO3)on1 386 | 387,CHEMBL21731,0.89,1.0,CNCCCC12CCC(c3ccccc13)c4ccccc24 387 | 388,CHEMBL492234,2.6,1.0,COC(=O)c1ccc(C)c(NS(=O)(=O)c2ccc3N(C)SC(=O)c3c2)c1 388 | 389,CHEMBL2158534,2.0,1.0,CC1(CC1)c2nc(ncc2C(=O)N[C@@H]3C4CC5CC3C[C@@](O)(C5)C4)N6CCOCC6 389 | 390,CHEMBL1650698,2.47,1.0,C[C@H](Nc1ncc(F)c(Nc2cc([nH]n2)C3CC3)n1)c4ncc(F)cn4 390 | 391,CHEMBL1784338,1.43,1.0,Cc1cc(CCC2CCN(CC2)S(=O)(=O)CC3(CCOCC3)N(O)C=O)c(C)cn1 391 | 392,CHEMBL235392,1.55,1.0,Clc1ccc(CN2CC3CNCC(C2)O3)cc1C(=O)NCC45CC6CC(CC(C6)C4)C5 392 | 393,CHEMBL1682820,3.0,1.0,O=C1CCOc2cc(COc3ccccc3)ccc12 393 | 394,CHEMBL299672,3.13,1.0,COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN4CCCC4 394 | 395,CHEMBL2152917,3.4,1.0,CO[C@@H]1CC[C@@]2(CC1)Cc3ccc(OCC(C)C)cc3C24N=C(C)C(=N4)N 395 | 396,CHEMBL239513,2.87,1.0,O=C(COc1ccccc1)c2ccccc2 396 | 397,CHEMBL1412343,2.7,1.0,Cc1ccccc1NC(=O)CCS(=O)(=O)c2ccc(Br)s2 397 | 398,CHEMBL12980,1.19,1.0,OC(C(=O)OC1CN2CCC1CC2)(c3ccccc3)c4ccccc4 398 | 399,CHEMBL1082812,1.71,1.0,O=C1NC(=NC(=C1C#N)c2ccccc2)SCCc3ccccc3 399 | 400,CHEMBL1415894,3.8,1.0,N(c1ccccc1)c2cc(Nc3ccccc3)[nH]n2 400 | 401,CHEMBL1934414,2.0,1.0,C[C@H](NC(=O)c1c(C)nn(C2CCCC2)c1NS(=O)(=O)c3ccc(C)cc3)C(C)(C)C 401 | 402,CHEMBL1310086,3.47,1.0,COc1ccccc1Cn2c(C)nc3ccccc23 402 | 403,CHEMBL1807822,2.28,1.0,O[C@@H](CNCCCOCCOCCc1cccc2ccccc12)c3ccc(O)c4NC(=O)Sc34 403 | 404,CHEMBL1096,1.1,1.0,CC(C)c1ccc2Oc3nc(N)c(cc3C(=O)c2c1)C(=O)O 404 | 405,CHEMBL1234354,2.2,1.0,COc1ccc(cn1)C2=Cc3c(C)nc(N)nc3N([C@@H]4CC[C@H](CC4)OCCO)C2=O 405 | 406,CHEMBL578288,2.91,1.0,NC(=NC#N)c1sc(Nc2ccccc2)nc1N 406 | 407,CHEMBL168899,1.4,1.0,OB1N(C(=O)Nc2ccccc12)c3ccccc3 407 | 408,CHEMBL1235773,2.55,1.0,COc1ccc2ncc(C#N)c(CCN3CCC(CC3)NCc4cc5SCOc5cn4)c2c1 408 | 409,CHEMBL2087849,4.33,1.0,Cn1cncc1c2c3C(=O)N(CC4CC4)C(=O)N(CC5CC5)c3nn2Cc6ccnc7ccc(Cl)cc67 409 | 410,CHEMBL2031236,2.06,1.0,CN1CCN(CC1)c2ccc3N=CN(C(=O)c3c2)c4cc(NC(=O)c5cscn5)ccc4C 410 | 411,CHEMBL2325699,2.6,1.0,CS(=O)(=O)C1(CC1)c2cc(nc(n2)c3cccc4[nH]ccc34)N5CC6CCC(C5)O6 411 | 412,CHEMBL475386,1.14,1.0,CNC1=Nc2ncccc2C(=NC1c3cccs3)c4occn4 412 | 413,CHEMBL528217,2.27,1.0,C(CCCCNc1cc(nc2ccccc12)c3ccccc3)CCCNc4cc(nc5ccccc45)c6ccccc6 413 | 414,CHEMBL1289785,3.93,1.0,Clc1ccc(N2CCN(CC2)C(=O)CCCc3ccncc3)c(Cl)c1 414 | 415,CHEMBL469790,3.15,1.0,COc1cc(ccc1N2CC[C@@H](O)C2)N3N=Nc4cc(sc4C3=O)c5ccc(Cl)cc5 415 | 416,CHEMBL190044,3.77,1.0,CCC(COC(=O)c1cc(OC)c(OC)c(OC)c1)(N(C)C)c2ccccc2 416 | 417,CHEMBL2153181,1.95,1.0,CCCSc1ncccc1C(=O)N2CCCC2c3ccncc3 417 | 418,CHEMBL1527751,2.25,1.0,Oc1ncnc2scc(c3ccsc3)c12 418 | 419,CHEMBL317462,3.14,1.0,OC1(CN2CCC1CC2)C#Cc3ccc(cc3)c4ccccc4 419 | -------------------------------------------------------------------------------- /data/split.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # MolPROP fuses molecular language and graph for property prediction. 5 | # Copyright © 2023 Merck & Co., Inc., Rahway, NJ, USA and its affiliates. All rights reserved. 6 | 7 | # This program is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | 12 | # This program is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | 17 | # You should have received a copy of the GNU General Public License 18 | # along with this program. If not, see . 19 | 20 | import os 21 | import time 22 | import random 23 | import argparse 24 | import torch 25 | import pandas as pd 26 | from tokenizers import Tokenizer 27 | from transformers import BertModel 28 | from transformers.models.roberta import RobertaModel, RobertaTokenizer 29 | import pandas as pd 30 | import numpy as np 31 | import deepchem as dc 32 | import rdkit 33 | import os 34 | import argparse 35 | 36 | 37 | if __name__ == '__main__': 38 | os.chdir('pathway/to/data') 39 | 40 | # arg for data 41 | parser = argparse.ArgumentParser() 42 | parser.add_argument('--data', type=str, default='esolv') 43 | parser.add_argument('--X', type=str, default='Compound ID') 44 | parser.add_argument('--y', type=str, default='measured log solubility in mols per litre') 45 | parser.add_argument('--ids', type=str, default='smiles') 46 | parser.add_argument('--frac_train', type=float, default=0.9) 47 | parser.add_argument('--seed', type=int, default=123) 48 | 49 | args = parser.parse_args() 50 | 51 | data = pd.read_csv(args.data + '.csv') 52 | print(data.head()) 53 | 54 | dataset = dc.data.DiskDataset.from_numpy(X=data[args.X], y=data[args.y],ids=data[args.ids]) 55 | scaffoldsplitter = dc.splits.ScaffoldSplitter() 56 | train, holdout = scaffoldsplitter.train_test_split(dataset, frac_train=args.frac_train, seed=args.seed) 57 | train= train.to_dataframe() 58 | holdout = holdout.to_dataframe() 59 | train.to_csv("train_" + args.data + ".csv") 60 | holdout.to_csv("holdout_" + args.data + ".csv") 61 | -------------------------------------------------------------------------------- /data/train_freesolv.csv: -------------------------------------------------------------------------------- 1 | ,X,y,w,ids 2 | 0,methanesulfonyl chloride,-4.87,1.0,CS(=O)(=O)Cl 3 | 1,3-methylbut-1-ene,1.83,1.0,CC(C)C=C 4 | 2,heptan-1-ol,-4.21,1.0,CCCCCCCO 5 | 3,"2,3-dimethylbutane",2.34,1.0,CC(C)C(C)C 6 | 4,2-methylpentan-2-ol,-3.92,1.0,CCCC(C)(C)O 7 | 5,butan-2-ol,-4.62,1.0,CC[C@H](C)O 8 | 6,dibromomethane,-1.96,1.0,C(Br)Br 9 | 7,2-methylpentan-3-ol,-3.88,1.0,CC[C@H](C(C)C)O 10 | 8,ethyl pentanoate,-2.49,1.0,CCCCC(=O)OCC 11 | 9,"(2Z)-3,7-dimethylocta-2,6-dien-1-ol",-4.78,1.0,CC(=CCC/C(=C\CO)/C)C 12 | 10,"2,2-dimethylpentane",2.88,1.0,CCCC(C)(C)C 13 | 11,2-acetoxyethyl acetate,-6.34,1.0,CC(=O)OCCOC(=O)C 14 | 12,ethion,-6.1,1.0,CCOP(=S)(OCC)SCSP(=S)(OCC)OCC 15 | 13,pentanenitrile,-3.52,1.0,CCCCC#N 16 | 14,2-methylpropan-2-ol,-4.47,1.0,CC(C)(C)O 17 | 15,"2,4-dimethylpentan-3-one",-2.74,1.0,CC(C)C(=O)C(C)C 18 | 16,propanal,-3.43,1.0,CCC=O 19 | 17,"N,N-dimethylformamide",-7.81,1.0,CN(C)C=O 20 | 18,"penta-1,4-diene",0.93,1.0,C=CCC=C 21 | 19,N-ethylethanamine,-4.07,1.0,CCNCC 22 | 20,isopentyl formate,-2.13,1.0,CC(C)CCOC=O 23 | 21,decan-1-ol,-3.64,1.0,CCCCCCCCCCO 24 | 22,ethyl propanoate,-2.68,1.0,CCC(=O)OCC 25 | 23,nonane,3.13,1.0,CCCCCCCCC 26 | 24,N-methylacetamide,-10.0,1.0,CC(=O)NC 27 | 25,non-1-ene,2.06,1.0,CCCCCCCC=C 28 | 26,"(2R,3R,4R,5R)-Hexan-1,2,3,4,5,6-hexol",-23.62,1.0,C([C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O)O 29 | 27,methyl butanoate,-2.83,1.0,CCCC(=O)OC 30 | 28,N-propylpropan-1-amine,-3.65,1.0,CCCNCCC 31 | 29,tetrafluoromethane,3.12,1.0,C(F)(F)(F)F 32 | 30,2-methylbutan-1-ol,-4.42,1.0,CC[C@@H](C)CO 33 | 31,but-1-yne,-0.16,1.0,CCC#C 34 | 32,hydrazine,-9.3,1.0,NN 35 | 33,octan-1-amine,-3.65,1.0,CCCCCCCCN 36 | 34,ammonia,-4.29,1.0,N 37 | 35,"1,1-difluoroethane",-0.11,1.0,CC(F)F 38 | 36,butadiene,0.56,1.0,C=CC=C 39 | 37,"N,N-dimethylmethanamine",-3.2,1.0,CN(C)C 40 | 38,hexanamide,-9.31,1.0,CCCCCC(=O)N 41 | 39,isobutyl nitrate,-1.88,1.0,CC(C)CO[N+](=O)[O-] 42 | 40,2-(nitrooxy)ethan-1-ol,-8.18,1.0,C(CO[N+](=O)[O-])O 43 | 41,octan-2-one,-2.88,1.0,CCCCCCC(=O)C 44 | 42,ethanamine,-4.5,1.0,CCN 45 | 43,"1,1-dichloroethane",-0.84,1.0,CC(Cl)Cl 46 | 44,1-bromooctane,0.52,1.0,CCCCCCCCBr 47 | 45,n-butane,2.1,1.0,CCCC 48 | 46,chloromethane,-0.55,1.0,CCl 49 | 47,1-bromo-2-methyl-propane,-0.03,1.0,CC(C)CBr 50 | 48,2-isopropylsulfanylpropane,-1.21,1.0,CC(C)SC(C)C 51 | 49,heptane,2.67,1.0,CCCCCCC 52 | 50,diiodomethane,-2.49,1.0,C(I)I 53 | 51,"N,N-dipropyl(propylsulfanyl)formamide",-4.13,1.0,CCCN(CCC)C(=O)SCCC 54 | 52,nitromethane,-4.02,1.0,C[N+](=O)[O-] 55 | 53,methoxyethane,-2.1,1.0,CCOC 56 | 54,"2-chloro-1,1,1-trimethoxy-ethane",-4.59,1.0,COC(CCl)(OC)OC 57 | 55,isobutane,2.3,1.0,CC(C)C 58 | 56,3-methylbutanoic acid,-6.09,1.0,CC(C)CC(=O)O 59 | 57,1-chloropropane,-0.33,1.0,CCCCl 60 | 58,1-propylsulfanylpropane,-1.28,1.0,CCCSCCC 61 | 59,hexan-3-ol,-4.06,1.0,CCC[C@H](CC)O 62 | 60,acetonitrile,-3.88,1.0,CC#N 63 | 61,"2-chloro-2-(difluoromethoxy)-1,1,1-trifluoro-ethane",0.1,1.0,[C@@H](C(F)(F)F)(OC(F)F)Cl 64 | 62,"hexa-1,5-diene",1.01,1.0,C=CCCC=C 65 | 63,methyl acetate,-3.13,1.0,CC(=O)OC 66 | 64,propanethiol,-1.1,1.0,CCCS 67 | 65,heptan-2-one,-3.04,1.0,CCCCCC(=O)C 68 | 66,ethyl hexanoate,-2.23,1.0,CCCCCC(=O)OCC 69 | 67,1-methoxypropane,-1.66,1.0,CCCOC 70 | 68,N-methylmethanamine,-4.29,1.0,CNC 71 | 69,"1,1,2,3,3,3-hexafluoroprop-1-ene",2.93,1.0,C(=C(F)F)(C(F)(F)F)F 72 | 70,iodomethane,-0.89,1.0,CI 73 | 71,"(E)-1,2-dichloroethylene",-0.78,1.0,C(=C/Cl)\Cl 74 | 72,n-pentane,2.3,1.0,CCCCC 75 | 73,butanenitrile,-3.64,1.0,CCCC#N 76 | 74,"2-bromo-1,1,1,2-tetrafluoro-ethane",0.5,1.0,[C@@H](C(F)(F)F)(F)Br 77 | 75,[(2S)-butan-2-yl] nitrate,-1.82,1.0,CC[C@H](C)O[N+](=O)[O-] 78 | 76,"2,3-dimethylpentane",2.52,1.0,CC[C@@H](C)C(C)C 79 | 77,4-methylpentan-2-ol,-3.73,1.0,C[C@H](CC(C)C)O 80 | 78,"1,1-dichloroethylene",0.25,1.0,C=C(Cl)Cl 81 | 79,2-methylpropan-1-ol,-4.5,1.0,CC(C)CO 82 | 80,propyl propanoate,-2.44,1.0,CCCOC(=O)CC 83 | 81,hexachloroethane,-0.64,1.0,C(C(Cl)(Cl)Cl)(Cl)(Cl)Cl 84 | 82,2-chloro-2-methyl-propane,1.09,1.0,CC(C)(C)Cl 85 | 83,isoprene,0.68,1.0,CC(=C)C=C 86 | 84,ethylene glycol,-9.3,1.0,C(CO)O 87 | 85,propyl formate,-2.48,1.0,CCCOC=O 88 | 86,hexanoic acid,-6.21,1.0,CCCCCC(=O)O 89 | 87,diethyl butanedioate,-5.71,1.0,CCOC(=O)CCC(=O)OCC 90 | 88,methyl 2-chloroacetate,-4.0,1.0,COC(=O)CCl 91 | 89,butanal,-3.18,1.0,CCCC=O 92 | 90,"1,1,2-trichloroethylene",-0.44,1.0,C(=C(Cl)Cl)Cl 93 | 91,"N,N-diethylethanamine",-3.22,1.0,CCN(CC)CC 94 | 92,3-chloroprop-1-ene,-0.57,1.0,C=CCCl 95 | 93,formaldehyde,-2.75,1.0,C=O 96 | 94,"1,1,1-trimethoxyethane",-4.42,1.0,CC(OC)(OC)OC 97 | 95,2-methoxypropane,-2.01,1.0,CC(C)OC 98 | 96,"1,1,2-trichloroethane",-1.99,1.0,C(C(Cl)Cl)Cl 99 | 97,isopropyl acetate,-2.64,1.0,CC(C)OC(=O)C 100 | 98,"2,3-dimethylbuta-1,3-diene",0.4,1.0,CC(=C)C(=C)C 101 | 99,hex-1-ene,1.58,1.0,CCCCC=C 102 | 100,hydrogen sulfide,-0.7,1.0,S 103 | 101,ethoxyethane,-1.59,1.0,CCOCC 104 | 102,hexyl acetate,-2.26,1.0,CCCCCCOC(=O)C 105 | 103,pentanoic acid,-6.16,1.0,CCCCC(=O)O 106 | 104,bromoethane,-0.74,1.0,CCBr 107 | 105,hexan-1-ol,-4.4,1.0,CCCCCCO 108 | 106,hexyl nitrate,-1.66,1.0,CCCCCCO[N+](=O)[O-] 109 | 107,bromoform,-2.13,1.0,C(Br)(Br)Br 110 | 108,2-propoxyethanol,-6.4,1.0,CCCOCCO 111 | 109,butyric acid,-6.35,1.0,CCCC(=O)O 112 | 110,"1,1,1-trifluoro-2,2,2-trimethoxyethane",-0.8,1.0,COC(C(F)(F)F)(OC)OC 113 | 111,bromo-trifluoro-methane,1.79,1.0,C(F)(F)(F)Br 114 | 112,butan-1-ol,-4.72,1.0,CCCCO 115 | 113,ethyl acetate,-2.94,1.0,CCOC(=O)C 116 | 114,isobutyl 2-methylpropanoate,-1.69,1.0,CC(C)COC(=O)C(C)C 117 | 115,2-methoxy-2-methyl-propane,-2.21,1.0,CC(C)(C)OC 118 | 116,pentan-3-one,-3.41,1.0,CCC(=O)CC 119 | 117,"methyl 2,2,2-trifluoroacetate",-1.1,1.0,COC(=O)C(F)(F)F 120 | 118,"3,3-dimethylbutan-2-one",-3.11,1.0,CC(=O)C(C)(C)C 121 | 119,methyl methanesulfonate,-4.87,1.0,COS(=O)(=O)C 122 | 120,N-isopropylpropan-2-amine,-3.22,1.0,CC(C)NC(C)C 123 | 121,heptan-1-amine,-3.79,1.0,CCCCCCCN 124 | 122,propane,2.0,1.0,CCC 125 | 123,dichloromethane,-1.31,1.0,C(Cl)Cl 126 | 124,"methyl 2,2-dimethylpropanoate",-2.4,1.0,CC(C)(C)C(=O)OC 127 | 125,trichloro(nitro)methane,-1.45,1.0,C([N+](=O)[O-])(Cl)(Cl)Cl 128 | 126,nonan-2-one,-2.49,1.0,CCCCCCCC(=O)C 129 | 127,butan-1-amine,-4.24,1.0,CCCCN 130 | 128,ethyl butanoate,-2.49,1.0,CCCC(=O)OCC 131 | 129,1-iodohexane,0.08,1.0,CCCCCCI 132 | 130,"1,1,2-trichloro-1,2,2-trifluoro-ethane",1.77,1.0,C(C(F)(Cl)Cl)(F)(F)Cl 133 | 131,trimethyl phosphate,-8.7,1.0,COP(=O)(OC)OC 134 | 132,isohexane,2.51,1.0,CCCC(C)C 135 | 133,"2-chloro-1,1,1-trifluoro-ethane",0.06,1.0,C(C(F)(F)F)Cl 136 | 134,ethylene,1.28,1.0,C=C 137 | 135,1-iodopentane,-0.14,1.0,CCCCCI 138 | 136,trimethoxymethane,-4.42,1.0,COC(OC)OC 139 | 137,decane,3.16,1.0,CCCCCCCCCC 140 | 138,"1,2-dinitroxypropane",-4.95,1.0,C[C@@H](CO[N+](=O)[O-])O[N+](=O)[O-] 141 | 139,prop-1-ene,1.32,1.0,CC=C 142 | 140,"(1R)-2,2,2-trichloro-1-dimethoxyphosphoryl-ethanol",-12.74,1.0,COP(=O)([C@H](C(Cl)(Cl)Cl)O)OC 143 | 141,"(2E)-3,7-dimethylocta-2,6-dien-1-ol",-4.45,1.0,CC(=CCC/C(=C/CO)/C)C 144 | 142,"2,3,4-trimethylpentane",2.56,1.0,CC(C)C(C)C(C)C 145 | 143,3-methylbutan-2-one,-3.24,1.0,CC(C)C(=O)C 146 | 144,N-butylbutan-1-amine,-3.24,1.0,CCCCNCCCC 147 | 145,butane-1-thiol,-0.99,1.0,CCCCS 148 | 146,chloro-difluoro-methane,-0.5,1.0,C(F)(F)Cl 149 | 147,prop-1-yne,-0.48,1.0,CC#C 150 | 148,nonanal,-2.07,1.0,CCCCCCCCC=O 151 | 149,propionic acid,-6.46,1.0,CCC(=O)O 152 | 150,chloroform,-1.08,1.0,C(Cl)(Cl)Cl 153 | 151,methane,2.0,1.0,C 154 | 152,1-chlorobutane,-0.16,1.0,CCCCCl 155 | 153,isobutyl formate,-2.22,1.0,CC(C)COC=O 156 | 154,"1,2-diethoxyethane",-3.54,1.0,CCOCCOCC 157 | 155,pentyl propanoate,-2.11,1.0,CCCCCOC(=O)CC 158 | 156,2-methylbut-2-ene,1.31,1.0,CC=C(C)C 159 | 157,"1,2-dichloroethane",-1.79,1.0,C(CCl)Cl 160 | 158,"3,3-dimethylpentane",2.56,1.0,CCC(C)(C)CC 161 | 159,"2,2-dichloro-1,1-difluoro-1-methoxy-ethane",-1.12,1.0,COC(C(Cl)Cl)(F)F 162 | 160,2-ethoxyethyl acetate,-5.31,1.0,CCOCCOC(=O)C 163 | 161,2-methylbutan-2-ol,-4.43,1.0,CCC(C)(C)O 164 | 162,dimethoxymethane,-2.93,1.0,COCOC 165 | 163,pentan-3-ol,-4.35,1.0,CCC(CC)O 166 | 164,undecan-2-one,-2.15,1.0,CCCCCCCCCC(=O)C 167 | 165,1-bromo-2-chloro-ethane,-1.95,1.0,C(CBr)Cl 168 | 166,iodoethane,-0.74,1.0,CCI 169 | 167,2-bromopropane,-0.48,1.0,CC(C)Br 170 | 168,methylsulfonylmethane,-10.08,1.0,CS(=O)(=O)C 171 | 169,pent-2-ene,1.31,1.0,CC/C=C\C 172 | 170,ethane,1.83,1.0,CC 173 | 171,ethylsulfanylethane,-1.46,1.0,CCSCC 174 | 172,chloro-fluoro-methane,-0.77,1.0,C(F)Cl 175 | 173,pent-1-ene,1.68,1.0,CCCC=C 176 | 174,hept-1-yne,0.6,1.0,CCCCCC#C 177 | 175,decan-2-one,-2.34,1.0,CCCCCCCCC(=O)C 178 | 176,pentanal,-3.03,1.0,CCCCC=O 179 | 177,1-nitrobutane,-3.09,1.0,CCCC[N+](=O)[O-] 180 | 178,1-chloro-2-(2-chloroethoxy)ethane,-4.23,1.0,C(CCl)OCCCl 181 | 179,1-nitroethane,-3.71,1.0,CC[N+](=O)[O-] 182 | 180,bromomethane,-0.82,1.0,CBr 183 | 181,methanol,-5.1,1.0,CO 184 | 182,heptanal,-2.67,1.0,CCCCCCC=O 185 | 183,octanal,-2.29,1.0,CCCCCCCC=O 186 | 184,fluoromethane,-0.22,1.0,CF 187 | 185,methylsulfinylmethane,-9.28,1.0,CS(=O)C 188 | 186,1-bromobutane,-0.4,1.0,CCCCBr 189 | 187,nonan-1-ol,-3.88,1.0,CCCCCCCCCO 190 | 188,"1,1,2,2-tetrachloroethylene",0.1,1.0,C(=C(Cl)Cl)(Cl)Cl 191 | 189,2-bromo-2-methyl-propane,0.84,1.0,CC(C)(C)Br 192 | 190,"1,1,1,2-tetramethoxyethane",-5.73,1.0,COCC(OC)(OC)OC 193 | 191,acetamide,-9.71,1.0,CC(=O)N 194 | 192,dimethyl sulfate,-5.1,1.0,COS(=O)(=O)OC 195 | 193,"1,1,2,2-tetrachloroethane",-2.37,1.0,C(C(Cl)Cl)(Cl)Cl 196 | 194,1-bromohexane,0.18,1.0,CCCCCCBr 197 | 195,1-bromoheptane,0.34,1.0,CCCCCCCBr 198 | 196,"3,3,3-trimethoxypropanenitrile",-6.4,1.0,COC(CC#N)(OC)OC 199 | 197,2-chlorobutane,0.0,1.0,CC[C@H](C)Cl 200 | 198,diethyl propanedioate,-6.0,1.0,CCOC(=O)CC(=O)OCC 201 | 199,methylsulfanylmethane,-1.61,1.0,CSC 202 | 200,methyl pentanoate,-2.56,1.0,CCCCC(=O)OC 203 | 201,2-methylpent-1-ene,1.47,1.0,CCCC(=C)C 204 | 202,chloroethylene,-0.59,1.0,C=CCl 205 | 203,heptan-4-one,-2.92,1.0,CCCC(=O)CCC 206 | 204,propyl butanoate,-2.28,1.0,CCCC(=O)OCCC 207 | 205,2-ethoxyethanol,-6.69,1.0,CCOCCO 208 | 206,3-methylpentane,2.51,1.0,CCC(C)CC 209 | 207,1-nitropropane,-3.34,1.0,CCC[N+](=O)[O-] 210 | 208,ethanol,-5.0,1.0,CCO 211 | 209,ethyl formate,-2.56,1.0,CCOC=O 212 | 210,octan-1-ol,-4.09,1.0,CCCCCCCCO 213 | 211,but-1-ene,1.38,1.0,CCC=C 214 | 212,carbon tetrachloride,0.08,1.0,C(Cl)(Cl)(Cl)Cl 215 | 213,methyldisulfanylmethane,-1.83,1.0,CSSC 216 | 214,"2,3-diacetoxypropyl acetate",-8.84,1.0,CC(=O)OCC(COC(=O)C)OC(=O)C 217 | 215,methoxymethane,-1.91,1.0,COC 218 | 216,hexane,2.48,1.0,CCCCCC 219 | 217,"1,2-dibromoethane",-2.33,1.0,C(CBr)Br 220 | 218,"1,1,1,2,2-pentachloroethane",-1.23,1.0,C(C(Cl)(Cl)Cl)(Cl)Cl 221 | 219,propyl acetate,-2.79,1.0,CCCOC(=O)C 222 | 220,ethanethiol,-1.14,1.0,CCS 223 | 221,ethyldisulfanylethane,-1.64,1.0,CCSSCC 224 | 222,pentan-2-one,-3.52,1.0,CCCC(=O)C 225 | 223,acetic acid,-6.69,1.0,CC(=O)O 226 | 224,acetaldehyde,-3.5,1.0,CC=O 227 | 225,hex-1-yne,0.29,1.0,CCCCC#C 228 | 226,propanenitrile,-3.84,1.0,CCC#N 229 | 227,butyl acetate,-2.64,1.0,CCCCOC(=O)C 230 | 228,aldicarb,-9.84,1.0,CC(C)(/C=N\OC(=O)NC)SC 231 | 229,2-methylpropanal,-2.86,1.0,CC(C)C=O 232 | 230,propionamide,-9.4,1.0,CCC(=O)N 233 | 231,1-bromopropane,-0.56,1.0,CCCBr 234 | 232,2-chloropropane,-0.25,1.0,CC(C)Cl 235 | 233,"1,3-dichloropropane",-1.89,1.0,C(CCl)CCl 236 | 234,"1,2-dichloropropane",-1.27,1.0,C[C@@H](CCl)Cl 237 | 235,diethoxy-(ethylsulfanylmethylsulfanyl)-thioxo-$l^{5}-phosphane,-4.37,1.0,CCOP(=S)(OCC)SCSCC 238 | 236,acetone,-3.8,1.0,CC(=O)C 239 | 237,2-methylprop-1-ene,1.16,1.0,CC(=C)C 240 | 238,1-nitropentane,-2.82,1.0,CCCCC[N+](=O)[O-] 241 | 239,(2E)-hex-2-enal,-3.68,1.0,CCC/C=C/C=O 242 | 240,octane,2.88,1.0,CCCCCCCC 243 | 241,"(3R)-3,7-Dimethylocta-1,6-dien-3-yl acetate",-2.49,1.0,CC(=CCC[C@](C)(C=C)OC(=O)C)C 244 | 242,"1,3-bis-(nitrooxy)butane",-4.29,1.0,C[C@@H](CCO[N+](=O)[O-])O[N+](=O)[O-] 245 | 243,2-isopropoxypropane,-0.53,1.0,CC(C)OC(C)C 246 | 244,2-methylhexane,2.93,1.0,CCCCC(C)C 247 | 245,"2-bromo-2-chloro-1,1,1-trifluoro-ethane",-0.11,1.0,[C@@H](C(F)(F)F)(Cl)Br 248 | 246,1-butoxybutane,-0.83,1.0,CCCCOCCCC 249 | 247,"2,4-dimethylpentane",2.83,1.0,CC(C)CC(C)C 250 | 248,but-2-enal,-4.22,1.0,C/C=C/C=O 251 | 249,3-methylhexane,2.71,1.0,CCC[C@H](C)CC 252 | 250,"1,2-dimethoxyethane",-4.84,1.0,COCCOC 253 | 251,isopentyl acetate,-2.21,1.0,CC(C)CCOC(=O)C 254 | 252,1-chlorohexane,0.0,1.0,CCCCCCCl 255 | 253,4-methylpentan-2-one,-3.05,1.0,CC(C)CC(=O)C 256 | 254,hexanal,-2.81,1.0,CCCCCC=O 257 | 255,"1,1-diethoxyethane",-3.28,1.0,CCOC(C)OCC 258 | 256,methanethiol,-1.2,1.0,CS 259 | 257,prop-2-en-1-ol,-5.03,1.0,C=CCO 260 | 258,methylsulfanylethane,-1.5,1.0,CCSC 261 | 259,pentyl acetate,-2.51,1.0,CCCCCOC(=O)C 262 | 260,chloroethane,-0.63,1.0,CCCl 263 | 261,3-methylbutan-1-ol,-4.42,1.0,CC(C)CCO 264 | 262,pentan-1-amine,-4.09,1.0,CCCCCN 265 | 263,"2,2,5-trimethylhexane",2.93,1.0,CC(C)CCC(C)(C)C 266 | 264,2-butoxyethanol,-6.25,1.0,CCCCOCCO 267 | 265,2-nitropropane,-3.13,1.0,CC(C)[N+](=O)[O-] 268 | 266,glycerol,-13.43,1.0,C(C(CO)O)O 269 | 267,1-iodopropane,-0.53,1.0,CCCI 270 | 268,2-methoxyethanamine,-6.55,1.0,COCCN 271 | 269,"1,1,1,2-tetrachloroethane",-1.43,1.0,C(C(Cl)(Cl)Cl)Cl 272 | 270,methyl propanoate,-2.93,1.0,CCC(=O)OC 273 | 271,2-methoxyethanol,-6.62,1.0,COCCO 274 | 272,methyl formate,-2.78,1.0,COC=O 275 | 273,"1,4-dichlorobutane",-2.32,1.0,C(CCCl)CCl 276 | 274,nitroxyacetone,-5.99,1.0,CC(=O)CO[N+](=O)[O-] 277 | 275,methyl hexanoate,-2.49,1.0,CCCCCC(=O)OC 278 | 276,"1,1,1-trifluoropropan-2-ol",-4.16,1.0,C[C@@H](C(F)(F)F)O 279 | 277,1-bromo-pentane,-0.1,1.0,CCCCCBr 280 | 278,oct-1-ene,1.92,1.0,CCCCCCC=C 281 | 279,propan-2-ol,-4.74,1.0,CC(C)O 282 | 280,hexan-1-amine,-3.95,1.0,CCCCCCN 283 | 281,3-nitrooxypropyl nitrate,-4.8,1.0,C(CO[N+](=O)[O-])CO[N+](=O)[O-] 284 | 282,pentan-1-ol,-4.57,1.0,CCCCCO 285 | 283,pentan-2-ol,-4.39,1.0,CCC[C@@H](C)O 286 | 284,3-methylheptane,2.97,1.0,CCCC[C@@H](C)CC 287 | 285,diethyl (2R)-2-dimethoxyphosphinothioylsulfanylbutanedioate,-8.15,1.0,CCOC(=O)C[C@H](C(=O)OCC)SP(=S)(OC)OC 288 | 286,hept-2-ene,1.68,1.0,CCCC/C=C/C 289 | 287,1-propoxypropane,-1.16,1.0,CCCOCCC 290 | 288,"2,2,4-trimethylpentane",2.89,1.0,CC(C)CC(C)(C)C 291 | 289,nonan-5-one,-2.64,1.0,CCCCC(=O)CCCC 292 | 290,pebulate,-3.64,1.0,CCCCN(CC)C(=O)SCCC 293 | 291,hept-1-ene,1.66,1.0,CCCCCC=C 294 | 292,isopropyl formate,-2.02,1.0,CC(C)OC=O 295 | 293,1-acetoxyethyl acetate,-4.97,1.0,CC(OC(=O)C)OC(=O)C 296 | 294,isopentane,2.38,1.0,CCC(C)C 297 | 295,butyl nitrate,-2.09,1.0,CCCCO[N+](=O)[O-] 298 | 296,"1,1,1-trichloroethane",-0.19,1.0,CC(Cl)(Cl)Cl 299 | 297,1-iodoheptane,0.27,1.0,CCCCCCCI 300 | 298,1-chloro-pentane,-0.1,1.0,CCCCCCl 301 | 299,isobutyl acetate,-2.36,1.0,CC(C)COC(=O)C 302 | 300,"2,2-dimethylbutane",2.51,1.0,CCC(C)(C)C 303 | 301,methyl 2-cyanoacetate,-6.72,1.0,COC(=O)CC#N 304 | 302,methanamine,-4.55,1.0,CN 305 | 303,"(Z)-1,2-dichloroethylene",-1.17,1.0,C(=C\Cl)\Cl 306 | 304,hexan-2-one,-3.28,1.0,CCCCC(=O)C 307 | 305,"1,2-dinitroxyethane",-5.73,1.0,C(CO[N+](=O)[O-])O[N+](=O)[O-] 308 | 306,triethylphosphate,-7.5,1.0,CCOP(=O)(OCC)OCC 309 | 307,"2,2,2-trifluoroethanol",-4.31,1.0,C(C(F)(F)F)O 310 | 308,1-butoxy-2-propanol,-5.73,1.0,CCCCOC[C@H](C)O 311 | 309,propan-1-ol,-4.85,1.0,CCCO 312 | 310,neopentane,2.51,1.0,CC(C)(C)C 313 | 311,pent-1-yne,0.01,1.0,CCCC#C 314 | 312,1-iodobutane,-0.25,1.0,CCCCI 315 | 313,2-iodopropane,-0.46,1.0,CC(C)I 316 | 314,111-trifluoropropan-2-ol,-4.2,1.0,C[C@H](C(F)(F)F)O 317 | 315,propan-1-amine,-4.39,1.0,CCCN 318 | 316,oct-2-enal,-3.43,1.0,CCCCC/C=C/C=O 319 | 317,oct-1-yne,0.71,1.0,CCCCCCC#C 320 | 318,methyl octanoate,-2.04,1.0,CCCCCCCC(=O)OC 321 | 319,1-chloroheptane,0.29,1.0,CCCCCCCCl 322 | 320,"4-methoxy-N,N-dimethyl-benzamide",-11.01,1.0,CN(C)C(=O)c1ccc(cc1)OC 323 | 321,"3,5-dimethylphenol",-6.27,1.0,Cc1cc(cc(c1)O)C 324 | 322,benzenethiol,-2.55,1.0,c1ccc(cc1)S 325 | 323,ethoxybenzene,-2.22,1.0,CCOc1ccccc1 326 | 324,4-bromophenol,-5.85,1.0,c1cc(ccc1O)Br 327 | 325,benzonitrile,-4.1,1.0,c1ccc(cc1)C#N 328 | 326,p-xylene,-0.8,1.0,Cc1ccc(cc1)C 329 | 327,"N,N-dimethylbenzamide",-9.29,1.0,CN(C)C(=O)c1ccccc1 330 | 328,4-tert-butylphenol,-5.91,1.0,CC(C)(C)c1ccc(cc1)O 331 | 329,"1,2,4-trichlorobenzene",-1.12,1.0,c1cc(c(cc1Cl)Cl)Cl 332 | 330,2-hydroxybenzaldehyde,-4.68,1.0,c1ccc(c(c1)C=O)O 333 | 331,aniline,-5.49,1.0,c1ccc(cc1)N 334 | 332,2-iodophenol,-6.2,1.0,c1ccc(c(c1)O)I 335 | 333,"2,6-dimethoxyphenol",-6.96,1.0,COc1cccc(c1O)OC 336 | 334,trifluoromethylbenzene,-0.25,1.0,c1ccc(cc1)C(F)(F)F 337 | 335,"1,2-bis(trifluoromethyl)benzene",1.07,1.0,c1ccc(c(c1)C(F)(F)F)C(F)(F)F 338 | 336,methyl paraben,-9.51,1.0,COC(=O)c1ccc(cc1)O 339 | 337,pentylbenzene,-0.23,1.0,CCCCCc1ccccc1 340 | 338,3-methoxyphenol,-7.66,1.0,COc1cccc(c1)O 341 | 339,phenylmethanol,-6.62,1.0,c1ccc(cc1)CO 342 | 340,"2-chloro-1-(2,4-dichlorophenyl)ethenyl diethyl phosphate",-7.07,1.0,CCOP(=O)(OCC)O/C(=C/Cl)/c1ccc(cc1Cl)Cl 343 | 341,"N-methyl-N-(2,2,2-trifluoroethyl)aniline",-1.92,1.0,CN(CC(F)(F)F)c1ccccc1 344 | 342,m-xylene,-0.83,1.0,Cc1cccc(c1)C 345 | 343,trimethoxymethylbenzene,-4.04,1.0,COC(c1ccccc1)(OC)OC 346 | 344,ethyl benzoate,-3.64,1.0,CCOC(=O)c1ccccc1 347 | 345,benzyl bromide,-2.38,1.0,c1ccc(cc1)CBr 348 | 346,3-hydroxybenzonitrile,-9.65,1.0,c1cc(cc(c1)O)C#N 349 | 347,propylbenzene,-0.53,1.0,CCCc1ccccc1 350 | 348,4-chlorophenol,-7.03,1.0,c1cc(ccc1O)Cl 351 | 349,"3,5-dichloro-2,6-dimethoxyphenol",-6.44,1.0,COc1c(cc(c(c1O)OC)Cl)Cl 352 | 350,methylsulfanylbenzene,-2.73,1.0,CSc1ccccc1 353 | 351,2-ethylphenol,-5.66,1.0,CCc1ccccc1O 354 | 352,1-isopropyl-4-methyl-benzene,-0.68,1.0,Cc1ccc(cc1)C(C)C 355 | 353,"1,2-dichlorobenzene",-1.36,1.0,c1ccc(c(c1)Cl)Cl 356 | 354,ethylbenzene,-0.79,1.0,CCc1ccccc1 357 | 355,benzaldehyde,-4.02,1.0,c1ccc(cc1)C=O 358 | 356,1-(p-tolyl)ethanone,-4.7,1.0,Cc1ccc(cc1)C(=O)C 359 | 357,1-chloro-2-methyl-benzene,-1.14,1.0,Cc1ccccc1Cl 360 | 358,butylbenzene,-0.4,1.0,CCCCc1ccccc1 361 | 359,"N,N-dimethylaniline",-3.45,1.0,CN(C)c1ccccc1 362 | 360,N-methylaniline,-4.69,1.0,CNc1ccccc1 363 | 361,benzene,-0.9,1.0,c1ccccc1 364 | 362,"1,2,3-trichlorobenzene",-1.24,1.0,c1cc(c(c(c1)Cl)Cl)Cl 365 | 363,4-chlorophenyl)sulfanylmethylsulfanyl-diethoxy-thioxo-$l^{5}-phosphane,-6.5,1.0,CCOP(=S)(OCC)SCSc1ccc(cc1)Cl 366 | 364,butyl paraben,-8.72,1.0,CCCCOC(=O)c1ccc(cc1)O 367 | 365,4-ethylphenol,-6.13,1.0,CCc1ccc(cc1)O 368 | 366,phenyl formate,-3.82,1.0,c1ccc(cc1)OC=O 369 | 367,fluorobenzene,-0.8,1.0,c1ccc(cc1)F 370 | 368,acetylsalicylic acid,-9.94,1.0,CC(=O)Oc1ccccc1C(=O)O 371 | 369,3-hydroxybenzaldehyde,-9.52,1.0,c1cc(cc(c1)O)C=O 372 | 370,"2,6-dimethylphenol",-5.26,1.0,Cc1cccc(c1O)C 373 | 371,m-cresol,-5.49,1.0,Cc1cccc(c1)O 374 | 372,4-methylaniline,-5.57,1.0,Cc1ccc(cc1)N 375 | 373,"1,3-dichlorobenzene",-0.98,1.0,c1cc(cc(c1)Cl)Cl 376 | 374,cumene,-0.3,1.0,CC(C)c1ccccc1 377 | 375,"2,6-dichlorosyringaldehyde",-8.68,1.0,COc1c(c(c(c(c1Cl)C=O)Cl)OC)O 378 | 376,methyl 4-nitrobenzoate,-6.88,1.0,COC(=O)c1ccc(cc1)[N+](=O)[O-] 379 | 377,"1,2,3-trimethylbenzene",-1.21,1.0,Cc1cccc(c1C)C 380 | 378,benzyl chloride,-1.93,1.0,c1ccc(cc1)CCl 381 | 379,4-methyl-2-methoxyphenol,-5.8,1.0,Cc1ccc(c(c1)OC)O 382 | 380,toluene,-0.9,1.0,Cc1ccccc1 383 | 381,ethyl paraben,-9.2,1.0,CCOC(=O)c1ccc(cc1)O 384 | 382,4-propylphenol,-5.21,1.0,CCCc1ccc(cc1)O 385 | 383,3-methoxyaniline,-7.29,1.0,COc1cccc(c1)N 386 | 384,alachlor,-8.21,1.0,CCc1cccc(c1N(COC)C(=O)CCl)CC 387 | 385,iodobenzene,-1.74,1.0,c1ccc(cc1)I 388 | 386,4-propylguaiacol,-5.26,1.0,CCCc1ccc(c(c1)OC)O 389 | 387,1-bromo-4-methyl-benzene,-1.39,1.0,Cc1ccc(cc1)Br 390 | 388,4-hydroxybenzonitrile,-10.17,1.0,c1cc(ccc1C#N)O 391 | 389,3-ethylphenol,-6.25,1.0,CCc1cccc(c1)O 392 | 390,"1,4-dibromobenzene",-2.3,1.0,c1cc(ccc1Br)Br 393 | 391,dicamba,-9.86,1.0,COc1c(ccc(c1C(=O)O)Cl)Cl 394 | 392,"1,2-dimethoxybenzene",-5.33,1.0,COc1ccccc1OC 395 | 393,"3,4-dichlorophenol",-7.29,1.0,c1cc(c(cc1O)Cl)Cl 396 | 394,anisole,-2.45,1.0,COc1ccccc1 397 | 395,"2,5-dimethylphenol",-5.91,1.0,Cc1ccc(c(c1)O)C 398 | 396,"1,4-dichlorobenzene",-1.01,1.0,c1cc(ccc1Cl)Cl 399 | 397,"1,2,3,4-tetrachlorobenzene",-1.34,1.0,c1cc(c(c(c1Cl)Cl)Cl)Cl 400 | 398,chlorobenzene,-1.12,1.0,c1ccc(cc1)Cl 401 | 399,2-nitroaniline,-7.37,1.0,c1ccc(c(c1)N)[N+](=O)[O-] 402 | 400,nitrobenzene,-4.12,1.0,c1ccc(cc1)[N+](=O)[O-] 403 | 401,2-methylaniline,-5.53,1.0,Cc1ccccc1N 404 | 402,1-ethyl-4-methyl-benzene,-0.95,1.0,CCc1ccc(cc1)C 405 | 403,hexylbenzene,-0.04,1.0,CCCCCCc1ccccc1 406 | 404,2-chlorosyringaldehyde,-7.78,1.0,COc1cc(c(c(c1O)OC)Cl)C=O 407 | 405,m-bis(trifluoromethyl)benzene,1.07,1.0,c1cc(cc(c1)C(F)(F)F)C(F)(F)F 408 | 406,4-fluorophenol,-6.19,1.0,c1cc(ccc1O)F 409 | 407,2-chloroaniline,-4.91,1.0,c1ccc(c(c1)N)Cl 410 | 408,"1,2,4-trimethylbenzene",-0.86,1.0,Cc1ccc(c(c1)C)C 411 | 409,1-ethyl-2-methylbenzene,-0.85,1.0,CCc1ccccc1C 412 | 410,"2,3-dimethylphenol",-6.16,1.0,Cc1cccc(c1C)O 413 | 411,methylparathion,-7.19,1.0,COP(=S)(OC)Oc1ccc(cc1)[N+](=O)[O-] 414 | 412,diethoxy-(4-nitrophenoxy)-thioxo-$l^{5}-phosphane,-6.74,1.0,CCOP(=S)(OCC)Oc1ccc(cc1)[N+](=O)[O-] 415 | 413,"1-N,1-N-diethyl-2,6-dinitro-4-(trifluoromethyl)benzene-1,3-diamine",-5.66,1.0,CCN(CC)c1c(cc(c(c1[N+](=O)[O-])N)C(F)(F)F)[N+](=O)[O-] 416 | 414,nitralin,-7.98,1.0,CCCN(CCC)c1c(cc(cc1[N+](=O)[O-])S(=O)(=O)C)[N+](=O)[O-] 417 | 415,"N,N-4-trimethylbenzamide",-9.76,1.0,Cc1ccc(cc1)C(=O)N(C)C 418 | 416,methyl benzoate,-3.92,1.0,COC(=O)c1ccccc1 419 | 417,4-methylbenzaldehyde,-4.27,1.0,Cc1ccc(cc1)C=O 420 | 418,mesitylene,-0.9,1.0,Cc1cc(cc(c1)C)C 421 | 419,dichlobenil,-4.71,1.0,c1cc(c(c(c1)Cl)C#N)Cl 422 | 420,"1,2,4,5-tetrachlorobenzene",-1.34,1.0,c1c(c(cc(c1Cl)Cl)Cl)Cl 423 | 421,diethoxymethoxybenzene,-5.23,1.0,CCOC(OCC)Oc1ccccc1 424 | 422,3-nitrophenol,-9.62,1.0,c1cc(cc(c1)O)[N+](=O)[O-] 425 | 423,2-phenylethanol,-6.79,1.0,c1ccc(cc1)CCO 426 | 424,fenuron,-9.13,1.0,CN(C)C(=O)Nc1ccccc1 427 | 425,"2,6-dimethylaniline",-5.21,1.0,Cc1cccc(c1N)C 428 | 426,3-nitroaniline,-8.84,1.0,c1cc(cc(c1)[N+](=O)[O-])N 429 | 427,2-methoxyaniline,-6.12,1.0,COc1ccccc1N 430 | 428,phenol,-6.6,1.0,c1ccc(cc1)O 431 | 429,o-cresol,-5.9,1.0,Cc1ccccc1O 432 | 430,4-nitrophenol,-10.64,1.0,c1cc(ccc1[N+](=O)[O-])O 433 | 431,4-chloroaniline,-5.9,1.0,c1cc(ccc1N)Cl 434 | 432,2-methylbenzaldehyde,-3.93,1.0,Cc1ccccc1C=O 435 | 433,"N,N-dimethyl-4-nitro-benzamide",-11.95,1.0,CN(C)C(=O)c1ccc(cc1)[N+](=O)[O-] 436 | 434,trifluralin,-3.25,1.0,CCCN(CCC)c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-] 437 | 435,pentachloronitrobenzene,-5.22,1.0,c1(c(c(c(c(c1Cl)Cl)Cl)Cl)Cl)N(=O)=O 438 | 436,"1,2,2-trifluoroethoxybenzene",-1.29,1.0,c1ccc(cc1)O[C@@H](C(F)F)F 439 | 437,sec-butylbenzene,-0.45,1.0,CC[C@H](C)c1ccccc1 440 | 438,3-phenylpropan-1-ol,-6.92,1.0,c1ccc(cc1)CCCO 441 | 439,"2-[(1R)-1-methylpropyl]-4,6-dinitro-phenolate",-6.23,1.0,CC[C@@H](C)c1cc(cc(c1O)[N+](=O)[O-])[N+](=O)[O-] 442 | 440,methyl 4-methoxybenzoate,-5.33,1.0,COc1ccc(cc1)C(=O)OC 443 | 441,"N-(3,4-dichlorophenyl)propanimidic acid",-7.78,1.0,CCC(=O)Nc1ccc(c(c1)Cl)Cl 444 | 442,3-chloroaniline,-5.82,1.0,c1cc(cc(c1)Cl)N 445 | 443,"N-butyl-N-ethyl-2,6-dinitro-4-(trifluoromethyl)aniline",-3.51,1.0,CCCC[N@](CC)c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-] 446 | 444,1-(4-methoxyphenyl)ethanone,-4.4,1.0,CC(=O)c1ccc(cc1)OC 447 | 445,"1,2,3,5-tetrachlorobenzene",-1.62,1.0,c1c(cc(c(c1Cl)Cl)Cl)Cl 448 | 446,1-phenylethanone,-4.58,1.0,CC(=O)c1ccccc1 449 | 447,"1,3,5-trichlorobenzene",-0.78,1.0,c1c(cc(cc1Cl)Cl)Cl 450 | 448,propyl paraben,-9.37,1.0,CCCOC(=O)c1ccc(cc1)O 451 | 449,3-chlorophenol,-6.62,1.0,c1cc(cc(c1)Cl)O 452 | 450,benzamide,-11.0,1.0,c1ccc(cc1)C(=O)N 453 | 451,1-methyl-3-nitro-benzene,-3.45,1.0,Cc1cccc(c1)[N+](=O)[O-] 454 | 452,tert-butylbenzene,-0.44,1.0,CC(C)(C)c1ccccc1 455 | 453,"2,4-dimethylphenol",-6.01,1.0,Cc1ccc(c(c1)C)O 456 | 454,ibuprofen,-7.0,1.0,C[C@@H](c1ccc(cc1)CC(C)C)C(=O)O 457 | 455,"3,4-dimethylphenol",-6.5,1.0,Cc1ccc(cc1C)O 458 | 456,4-chloro-3-methyl-phenol,-6.79,1.0,Cc1cc(ccc1Cl)O 459 | 457,isopropenylbenzene,-1.24,1.0,CC(=C)c1ccccc1 460 | 458,bromobenzene,-1.46,1.0,c1ccc(cc1)Br 461 | 459,1-methyl-2-nitro-benzene,-3.58,1.0,Cc1ccccc1[N+](=O)[O-] 462 | 460,4-nitroaniline,-9.82,1.0,c1cc(ccc1N)N(=O)=O 463 | 461,4-methoxyaniline,-7.48,1.0,COc1ccc(cc1)N 464 | 462,isobutylbenzene,0.16,1.0,CC(C)Cc1ccccc1 465 | 463,2-chlorophenol,-4.55,1.0,c1ccc(c(c1)O)Cl 466 | 464,2-fluorophenol,-5.29,1.0,c1ccc(c(c1)O)F 467 | 465,styrene,-1.24,1.0,C=Cc1ccccc1 468 | 466,o-xylene,-0.9,1.0,Cc1ccccc1C 469 | 467,p-cresol,-6.13,1.0,Cc1ccc(cc1)O 470 | 468,2-methoxyphenol,-5.94,1.0,COc1ccccc1O 471 | 469,2-nitrophenol,-4.58,1.0,c1ccc(c(c1)[N+](=O)[O-])O 472 | 470,hexachlorobenzene,-2.33,1.0,c1(c(c(c(c(c1Cl)Cl)Cl)Cl)Cl)Cl 473 | 471,4-hydroxybenzaldehyde,-8.83,1.0,c1cc(ccc1C=O)O 474 | 472,2-ethylpyridine,-4.33,1.0,CCc1ccccn1 475 | 473,2-methylpyridine,-4.63,1.0,Cc1ccccn1 476 | 474,"2,4-dimethylpyridine",-4.86,1.0,Cc1ccnc(c1)C 477 | 475,"3,4-dimethylpyridine",-5.22,1.0,Cc1ccncc1C 478 | 476,2-chloropyridine,-4.39,1.0,c1ccnc(c1)Cl 479 | 477,4-ethylpyridine,-4.73,1.0,CCc1ccncc1 480 | 478,chlorpyrifos,-5.04,1.0,CCOP(=S)(OCC)Oc1c(cc(c(n1)Cl)Cl)Cl 481 | 479,1-(3-pyridyl)ethanone,-8.26,1.0,CC(=O)c1cccnc1 482 | 480,pyridine,-4.69,1.0,c1ccncc1 483 | 481,"2,6-dimethylpyridine",-4.59,1.0,Cc1cccc(n1)C 484 | 482,pyridine-3-carbaldehyde,-7.1,1.0,c1cc(cnc1)C=O 485 | 483,pyridine-3-carbonitrile,-6.75,1.0,c1cc(cnc1)C#N 486 | 484,pyridine-4-carbaldehyde,-7.0,1.0,c1cnccc1C=O 487 | 485,3-chloropyridine,-4.01,1.0,c1cc(cnc1)Cl 488 | 486,4-methylpyridine,-4.93,1.0,Cc1ccncc1 489 | 487,3-methylpyridine,-4.77,1.0,Cc1cccnc1 490 | 488,"2,3-dimethylpyridine",-4.82,1.0,Cc1cccnc1C 491 | 489,pyridine-4-carbonitrile,-6.02,1.0,c1cnccc1C#N 492 | 490,1-(4-pyridyl)ethanone,-7.62,1.0,CC(=O)c1ccncc1 493 | 491,"2,5-dimethylpyridine",-4.72,1.0,Cc1ccc(nc1)C 494 | 492,3-ethylpyridine,-4.59,1.0,CCc1cccnc1 495 | 493,"3,5-dimethylpyridine",-4.84,1.0,Cc1cc(cnc1)C 496 | 494,"1,2,4,5-tetrachloro-3-(3,4-dichlorophenyl)benzene",-4.38,1.0,c1cc(c(cc1c2c(c(cc(c2Cl)Cl)Cl)Cl)Cl)Cl 497 | 495,"1,4-dichloro-2-phenyl-benzene",-2.46,1.0,c1ccc(cc1)c2cc(ccc2Cl)Cl 498 | 496,"1,2,3,4-tetrachloro-5-phenyl-benzene",-3.48,1.0,c1ccc(cc1)c2cc(c(c(c2Cl)Cl)Cl)Cl 499 | 497,"1,2,3,4-tetrachloro-5-(3,4,5-trichlorophenyl)benzene",-3.17,1.0,c1c(cc(c(c1Cl)Cl)Cl)c2cc(c(c(c2Cl)Cl)Cl)Cl 500 | 498,"1,2,3,4-tetrachloro-5-(3,4-dichlorophenyl)benzene",-3.04,1.0,c1cc(c(cc1c2cc(c(c(c2Cl)Cl)Cl)Cl)Cl)Cl 501 | 499,"1,2,3,4,5-pentachloro-6-(2,3,4,5,6-pentachlorophenyl)benzene",-2.98,1.0,c1(c(c(c(c(c1Cl)Cl)Cl)Cl)Cl)c2c(c(c(c(c2Cl)Cl)Cl)Cl)Cl 502 | 500,1-chloro-2-phenyl-benzene,-2.69,1.0,c1ccc(cc1)c2ccccc2Cl 503 | 501,"1,2,3,4-tetrachloro-5-(2,3,4-trichlorophenyl)benzene",-4.4,1.0,c1cc(c(c(c1c2cc(c(c(c2Cl)Cl)Cl)Cl)Cl)Cl)Cl 504 | 502,"1,3-dichloro-2-(2,6-dichlorophenyl)benzene",-2.28,1.0,c1cc(c(c(c1)Cl)c2c(cccc2Cl)Cl)Cl 505 | 503,diflunisal,-9.4,1.0,c1cc(c(cc1c2ccc(cc2F)F)C(=O)O)O 506 | 504,biphenyl,-2.7,1.0,c1ccc(cc1)c2ccccc2 507 | 505,flurbiprofen,-8.42,1.0,C[C@@H](c1ccc(c(c1)F)c2ccccc2)C(=O)O 508 | 506,"1,2,3,4-tetrachloro-5-(2,3,4,6-tetrachlorophenyl)benzene",-4.61,1.0,c1c(c(c(c(c1Cl)Cl)Cl)Cl)c2c(cc(c(c2Cl)Cl)Cl)Cl 509 | 507,"1,2,3-trichloro-5-(2,5-dichlorophenyl)benzene",-3.61,1.0,c1cc(c(cc1Cl)c2cc(c(c(c2)Cl)Cl)Cl)Cl 510 | 508,"1,3,5-trichloro-2-(2,6-dichlorophenyl)benzene",-1.96,1.0,c1cc(c(c(c1)Cl)c2c(cc(cc2Cl)Cl)Cl)Cl 511 | 509,"1,3,5-trichloro-2-phenyl-benzene",-2.16,1.0,c1ccc(cc1)c2c(cc(cc2Cl)Cl)Cl 512 | 510,naphthalen-2-ol,-8.11,1.0,c1ccc2cc(ccc2c1)O 513 | 511,"2,6-dimethylnaphthalene",-2.63,1.0,Cc1ccc2cc(ccc2c1)C 514 | 512,naphthalene,-2.4,1.0,c1ccc2ccccc2c1 515 | 513,naphthalen-1-yl N-methylcarbamate,-9.45,1.0,CNC(=O)Oc1cccc2c1cccc2 516 | 514,"1,3-dimethylnaphthalene",-2.47,1.0,Cc1cc(c2ccccc2c1)C 517 | 515,"2,3-dimethylnaphthalene",-2.78,1.0,Cc1cc2ccccc2cc1C 518 | 516,naphthalen-1-amine,-7.28,1.0,c1ccc2c(c1)cccc2N 519 | 517,"1,4-dimethylnaphthalene",-2.82,1.0,Cc1ccc(c2c1cccc2)C 520 | 518,1-ethylnaphthalene,-2.4,1.0,CCc1cccc2c1cccc2 521 | 519,naphthalen-1-ol,-7.67,1.0,c1ccc2c(c1)cccc2O 522 | 520,naproxen,-10.21,1.0,C[C@@H](c1ccc2cc(ccc2c1)OC)C(=O)O 523 | 521,naphthalen-2-amine,-7.47,1.0,c1ccc2cc(ccc2c1)N 524 | 522,1-methylnaphthalene,-2.44,1.0,Cc1cccc2c1cccc2 525 | 523,"2,3-dichlorodibenzo-p-dioxin",-3.56,1.0,c1ccc2c(c1)Oc3cc(c(cc3O2)Cl)Cl 526 | 524,"1,2,3,7-tetrachlorodibenzo-p-dioxin",-3.84,1.0,c1cc2c(cc1Cl)Oc3cc(c(c(c3O2)Cl)Cl)Cl 527 | 525,2-chlorodibenzo-p-dioxin,-3.1,1.0,c1ccc2c(c1)Oc3ccc(cc3O2)Cl 528 | 526,"1,2,3,4,7-pentachlorodibenzo-p-dioxin",-4.15,1.0,c1cc2c(cc1Cl)Oc3c(c(c(c(c3Cl)Cl)Cl)Cl)O2 529 | 527,"1,2,3,4,6,7,8,9-octachlorodibenzo-p-dioxin",-4.53,1.0,c12c(c(c(c(c1Cl)Cl)Cl)Cl)Oc3c(c(c(c(c3Cl)Cl)Cl)Cl)O2 530 | 528,"1,2,4-trichlorodibenzo-p-dioxin",-4.05,1.0,c1ccc2c(c1)Oc3c(cc(c(c3O2)Cl)Cl)Cl 531 | 529,"2,7-dichlorodibenzo-p-dioxin",-3.67,1.0,c1cc2c(cc1Cl)Oc3ccc(cc3O2)Cl 532 | 530,"1,2,3,4-tetrachlorodibenzo-p-dioxin",-3.81,1.0,c1ccc2c(c1)Oc3c(c(c(c(c3Cl)Cl)Cl)Cl)O2 533 | 531,dibenzo-p-dioxin,-3.15,1.0,c1ccc2c(c1)Oc3ccccc3O2 534 | 532,1-chlorodibenzo-p-dioxin,-3.52,1.0,c1ccc2c(c1)Oc3cccc(c3O2)Cl 535 | 533,"2,3,7,8-tetrachlorodibenzo-p-dioxin",-3.37,1.0,c1c2c(cc(c1Cl)Cl)Oc3cc(c(cc3O2)Cl)Cl 536 | 534,"1,2,3,4,7,8-hexachlorodibenzo-p-dioxin",-3.71,1.0,c1c2c(cc(c1Cl)Cl)Oc3c(c(c(c(c3Cl)Cl)Cl)Cl)O2 537 | 535,"1,2-dimethylcyclohexane",1.58,1.0,C[C@@H]1CCCC[C@@H]1C 538 | 536,cyclohexane,1.23,1.0,C1CCCCC1 539 | 537,cyclohexanamine,-4.59,1.0,C1CCC(CC1)N 540 | 538,methylcyclohexane,1.7,1.0,CC1CCCCC1 541 | 539,methyl cyclohexanecarboxylate,-3.3,1.0,COC(=O)C1CCCCC1 542 | 540,"1,4-dimethylcyclohexane",2.11,1.0,CC1CCC(CC1)C 543 | 541,cyclohexanol,-5.46,1.0,C1CCC(CC1)O 544 | 542,1-cyclohexylethanone,-3.9,1.0,CC(=O)C1CCCCC1 545 | 543,"(1R,2S,5R)-2-isopropyl-5-methylcyclohexanol",-3.2,1.0,C[C@@H]1CC[C@H]([C@@H](C1)O)C(C)C 546 | 544,5-bromouracil,-18.17,1.0,c1c(c(=O)[nH]c(=O)[nH]1)Br 547 | 545,bromacil,-9.73,1.0,CC[C@H](C)n1c(=O)c(c([nH]c1=O)C)Br 548 | 546,6-chlorouracil,-15.83,1.0,c1c(=O)[nH]c(=O)[nH]c1Cl 549 | 547,5-iodouracil,-18.72,1.0,c1c(c(=O)[nH]c(=O)[nH]1)I 550 | 548,5-fluorouracil,-16.92,1.0,c1c(c(=O)[nH]c(=O)[nH]1)F 551 | 549,5-trifluoromethyluracil,-15.46,1.0,c1c(c(=O)[nH]c(=O)[nH]1)C(F)(F)F 552 | 550,terbacil,-11.14,1.0,Cc1c(c(=O)n(c(=O)[nH]1)C(C)(C)C)Cl 553 | 551,5-chlorouracil,-17.74,1.0,c1c(c(=O)[nH]c(=O)[nH]1)Cl 554 | 552,methylcyclopentane,1.59,1.0,CC1CCCC1 555 | 553,cyclopentanol,-5.49,1.0,C1CCC(C1)O 556 | 554,pentylcyclopentane,2.55,1.0,CCCCCC1CCCC1 557 | 555,propylcyclopentane,2.13,1.0,CCCC1CCCC1 558 | 556,cyclopentane,1.2,1.0,C1CCCC1 559 | 557,"1-(2-hydroxyethylamino)-9,10-anthraquinone",-14.21,1.0,c1ccc2c(c1)C(=O)c3cccc(c3C2=O)NCCO 560 | 558,"1-amino-4-hydroxy-9,10-anthracenedione",-9.53,1.0,c1ccc2c(c1)C(=O)c3c(ccc(c3C2=O)O)N 561 | 559,"1,4-diamino-9,10-anthracenedione",-11.85,1.0,c1ccc2c(c1)C(=O)c3c(ccc(c3C2=O)N)N 562 | 560,"1-amino-9,10-anthracenedione",-9.44,1.0,c1ccc2c(c1)C(=O)c3cccc(c3C2=O)N 563 | 561,"2-amino-9,10-anthraquinone",-11.53,1.0,c1ccc2c(c1)C(=O)c3ccc(cc3C2=O)N 564 | 562,cyclohexene,0.14,1.0,C1CCC=CC1 565 | 563,1-methylcyclohexene,0.67,1.0,CC1=CCCCC1 566 | 564,"(1S,5R)-2-methyl-5-(1-methylethenyl)-2-cyclohexen-1-ol",-4.44,1.0,CC1=CC[C@H](C[C@@H]1O)C(=C)C 567 | 565,4-(1-Methylethenyl)-1-cyclohexene-1-carboxaldehyde,-4.09,1.0,CC(=C)[C@H]1CCC(=CC1)C=O 568 | 566,cyclohexanone,-4.91,1.0,C1CCC(=O)CC1 569 | 567,"(2S,5R)-2-isopropyl-5-methylcyclohexanone",-2.53,1.0,C[C@@H]1CC[C@H](C(=O)C1)C(C)C 570 | 568,"(2R,5R)-2-methyl-5-(1-methylethenyl)-cyclohexanone",-3.75,1.0,C[C@@H]1CC[C@H](CC1=O)C(=C)C 571 | 569,2-methyltetrahydrofuran,-3.3,1.0,C[C@H]1CCCO1 572 | 570,"2,5-dimethyltetrahydrofuran",-2.92,1.0,C[C@H]1CC[C@@H](O1)C 573 | 571,tetrahydrofuran,-3.47,1.0,C1CCOC1 574 | 572,tetrahydropyran,-3.12,1.0,C1CCOCC1 575 | 573,"(2S,3R,4S,5R)-oxane-2,3,4,5-tetrol",-20.52,1.0,C1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O 576 | 574,"(2R,3R,4S,5S,6R)-6-(hydroxymethyl)tetrahydropyran-2,3,4,5-tetrol",-25.47,1.0,C([C@@H]1[C@H]([C@@H]([C@H]([C@@H](O1)O)O)O)O)O 577 | 575,pirimor,-9.41,1.0,Cc1c(nc(nc1OC(=O)N(C)C)N(C)C)C 578 | 576,diazinon,-6.48,1.0,CCOP(=S)(OCC)Oc1cc(nc(n1)C(C)C)C 579 | -------------------------------------------------------------------------------- /molprop.yml: -------------------------------------------------------------------------------- 1 | name: molprop 2 | channels: 3 | - https://conda.anaconda.org/pyg 4 | - https://conda.anaconda.org/pytorch 5 | - https://conda.anaconda.org/conda-forge 6 | - https://conda.anaconda.org/salilab 7 | - https://conda.anaconda.org/bioconda 8 | - https://conda.anaconda.org/pytorch/ 9 | - https://conda.anaconda.org/conda-forge/ 10 | - defaults 11 | dependencies: 12 | - _libgcc_mutex=0.1=conda_forge 13 | - _openmp_mutex=4.5=2_kmp_llvm 14 | - blas=2.116=mkl 15 | - blas-devel=3.9.0=16_linux64_mkl 16 | - brotlipy=0.7.0=py38h0a891b7_1005 17 | - bzip2=1.0.8=h7f98852_4 18 | - ca-certificates=2022.9.24=ha878542_0 19 | - certifi=2024.2.2 20 | - cffi=1.15.1=py38h4a40e3a_2 21 | - charset-normalizer=2.1.1=pyhd8ed1ab_0 22 | - colorama=0.4.6=pyhd8ed1ab_0 23 | - cryptography=38.0.4=py38h80a4ca7_0 24 | - cudatoolkit=11.3.1=h9edb442_10 25 | - ffmpeg=4.3=hf484d3e_0 26 | - freetype=2.12.1=hca18f0e_1 27 | - gmp=6.2.1=h58526e2_0 28 | - gnutls=3.6.13=h85f3911_1 29 | - idna=3.4=pyhd8ed1ab_0 30 | - jinja2=3.1.2=pyhd8ed1ab_1 31 | - joblib=1.2.0=pyhd8ed1ab_0 32 | - jpeg=9e=h166bdaf_2 33 | - lame=3.100=h166bdaf_1003 34 | - lcms2=2.14=h6ed2654_0 35 | - ld_impl_linux-64=2.39=hcc3a1bd_1 36 | - lerc=4.0.0=h27087fc_0 37 | - libblas=3.9.0=16_linux64_mkl 38 | - libcblas=3.9.0=16_linux64_mkl 39 | - libdeflate=1.14=h166bdaf_0 40 | - libffi=3.4.2=h7f98852_5 41 | - libgcc-ng=12.2.0=h65d4601_19 42 | - libgfortran-ng=12.2.0=h69a702a_19 43 | - libgfortran5=12.2.0=h337968e_19 44 | - libgomp=12.2.0=h65d4601_19 45 | - libiconv=1.17=h166bdaf_0 46 | - liblapack=3.9.0=16_linux64_mkl 47 | - liblapacke=3.9.0=16_linux64_mkl 48 | - libnsl=2.0.0=h7f98852_0 49 | - libpng=1.6.39=h753d276_0 50 | - libsqlite=3.40.0=h753d276_0 51 | - libstdcxx-ng=12.2.0=h46fd767_19 52 | - libtiff=4.4.0=h55922b4_4 53 | - libwebp-base=1.2.4=h166bdaf_0 54 | - libxcb=1.13=h7f98852_1004 55 | - libzlib=1.2.13=h166bdaf_4 56 | - llvm-openmp=15.0.5=he0ac6c6_0 57 | - markupsafe=2.1.1=py38h0a891b7_2 58 | - mkl=2022.1.0=h84fe81f_915 59 | - mkl-devel=2022.1.0=ha770c72_916 60 | - mkl-include=2022.1.0=h84fe81f_915 61 | - ncurses=6.3=h27087fc_1 62 | - nettle=3.6=he412f7d_0 63 | - numpy=1.23.5=py38h7042d01_0 64 | - openh264=2.1.1=h780b84a_0 65 | - openjpeg=2.5.0=h7d73246_1 66 | - openssl=3.0.7=h166bdaf_0 67 | - pillow=9.2.0=py38h9eb91d8_3 68 | - pip=22.3.1=pyhd8ed1ab_0 69 | - pthread-stubs=0.4=h36c2ea0_1001 70 | - pycparser=2.21=pyhd8ed1ab_0 71 | - pyopenssl=22.1.0=pyhd8ed1ab_0 72 | - pyparsing=3.0.9=pyhd8ed1ab_0 73 | - pysocks=1.7.1=pyha2e5f31_6 74 | - python=3.8.12=h0744224_3_cpython 75 | - python_abi=3.8=3_cp38 76 | - pytorch=1.12.1=py3.8_cuda11.3_cudnn8.3.2_0 77 | - pytorch-cluster=1.6.0=py38_torch_1.12.0_cu113 78 | - pytorch-mutex=1.0=cuda 79 | - pytorch-scatter=2.1.0=py38_torch_1.12.0_cu113 80 | - pytorch-sparse=0.6.15=py38_torch_1.12.0_cu113 81 | - readline=8.1.2=h0f457ee_0 82 | - requests=2.28.1=pyhd8ed1ab_1 83 | - scikit-learn=1.1.3=py38h4c4ba11_1 84 | - setuptools=65.5.1=pyhd8ed1ab_0 85 | - sqlite=3.40.0=h4ff8645_0 86 | - tbb=2021.7.0=h924138e_0 87 | - threadpoolctl=3.1.0=pyh8a188c0_0 88 | - tk=8.6.12=h27826a3_0 89 | - torchaudio=0.12.1=py38_cu113 90 | - torchvision=0.13.1=py38_cu113 91 | - tqdm=4.64.1=pyhd8ed1ab_0 92 | - urllib3=1.26.13=pyhd8ed1ab_0 93 | - wheel=0.38.4=pyhd8ed1ab_0 94 | - xorg-libxau=1.0.9=h7f98852_0 95 | - xorg-libxdmcp=1.1.3=h7f98852_0 96 | - xz=5.2.6=h166bdaf_0 97 | - zlib=1.2.13=h166bdaf_4 98 | - zstd=1.5.2=h6239696_4 99 | - pip: 100 | - ablang==0.2.2 101 | - absl-py==1.3.0 102 | - aiohttp==3.8.4 103 | - aiosignal==1.3.1 104 | - antlr4-python3-runtime==4.8 105 | - anyio==3.6.2 106 | - appdirs==1.4.4 107 | - argon2-cffi==21.3.0 108 | - argon2-cffi-bindings==21.2.0 109 | - argparse==1.4.0 110 | - astropy==5.2.2 111 | - asttokens==2.2.0 112 | - astunparse==1.6.3 113 | - async-timeout==4.0.2 114 | - atom3d==0.2.6 115 | - attrs==22.1.0 116 | - backcall==0.2.0 117 | - beautifulsoup4==4.11.1 118 | - biopython==1.81 119 | - bitarray==2.6.0 120 | - bleach==5.0.1 121 | - blosc2==2.0.0 122 | - cachetools==5.2.0 123 | - captum==0.6.0 124 | - click==8.0.4 125 | - click-plugins==1.1.1 126 | - cligj==0.7.2 127 | - configspace==0.6.0 128 | - contourpy==1.0.6 129 | - cycler==0.11.0 130 | - cython==0.29.32 131 | - debugpy==1.6.4 132 | - decorator==5.1.1 133 | - deepchem==2.7.1 134 | - defusedxml==0.7.1 135 | - dill==0.3.6 136 | - distlib==0.3.6 137 | - easy-parallel==0.1.6 138 | - easydict==1.10 139 | - einops==0.6.0 140 | - entrypoints==0.4 141 | - exceptiongroup==1.0.4 142 | - executing==1.2.0 143 | - fair-esm==2.0.0 144 | - fairseq==0.12.2 145 | - fastjsonschema==2.16.2 146 | - filelock==3.8.2 147 | - fiona==1.8.22 148 | - flatbuffers==22.11.23 149 | - fonttools==4.38.0 150 | - freesasa==2.2.0.post3 151 | - frozenlist==1.3.3 152 | - fsspec==2023.5.0 153 | - gast==0.4.0 154 | - git-lfs==1.6 155 | - google-auth==2.14.1 156 | - google-auth-oauthlib==0.4.6 157 | - google-pasta==0.2.0 158 | - grpcio==1.50.0 159 | - gvp==0.1.1 160 | - h5py==3.7.0 161 | - hpbandster==0.7.4 162 | - huggingface-hub==0.12.0 163 | - hydra-core==1.0.7 164 | - imageio==2.22.4 165 | - importlib-metadata==5.1.0 166 | - importlib-resources==5.10.0 167 | - iniconfig==1.1.1 168 | - ipdb==0.13.9 169 | - ipykernel==6.17.1 170 | - ipython==8.7.0 171 | - ipython-genutils==0.2.0 172 | - ipywidgets==8.0.2 173 | - jedi==0.18.2 174 | - jsonschema==4.17.1 175 | - jupyter==1.0.0 176 | - jupyter-client==7.4.7 177 | - jupyter-console==6.4.4 178 | - jupyter-core==5.1.0 179 | - jupyter-server==1.23.3 180 | - jupyterlab-pygments==0.2.2 181 | - jupyterlab-widgets==3.0.3 182 | - keras==2.11.0 183 | - kiwisolver==1.4.4 184 | - kornia==0.7.0 185 | - libclang==14.0.6 186 | - lightning-utilities==0.8.0 187 | - line-profiler==4.0.1 188 | - llvmlite==0.39.1 189 | - lmdb==1.4.1 190 | - lxml==4.9.1 191 | - markdown==3.4.1 192 | - matplotlib==3.6.2 193 | - matplotlib-inline==0.1.6 194 | - mistune==2.0.4 195 | - msgpack==1.0.4 196 | - multidict==6.0.4 197 | - multipledispatch==0.6.0 198 | - multiprocess==0.70.14 199 | - munch==2.5.0 200 | - nbclassic==0.4.8 201 | - nbclient==0.7.2 202 | - nbconvert==7.2.5 203 | - nbformat==5.7.0 204 | - nest-asyncio==1.5.6 205 | - netifaces==0.11.0 206 | - networkx==2.8.8 207 | - ninja==1.11.1 208 | - notebook==6.5.2 209 | - notebook-shim==0.2.2 210 | - numba==0.56.4 211 | - numexpr==2.8.4 212 | - oauthlib==3.2.2 213 | - omegaconf==2.0.6 214 | - opt-einsum==3.3.0 215 | - packaging==21.3 216 | - pandas==1.5.2 217 | - pandocfilters==1.5.0 218 | - parso==0.8.3 219 | - pathos==0.3.0 220 | - patsy==0.5.3 221 | - pexpect==4.8.0 222 | - pickleshare==0.7.5 223 | - pkgutil-resolve-name==1.3.10 224 | - platformdirs==2.5.4 225 | - plotly==5.11.0 226 | - pluggy==1.0.0 227 | - portalocker==2.6.0 228 | - pox==0.3.2 229 | - ppft==1.7.6.6 230 | - prometheus-client==0.15.0 231 | - prompt-toolkit==3.0.33 232 | - protobuf==3.19.6 233 | - psutil==5.9.4 234 | - ptyprocess==0.7.0 235 | - pure-eval==0.2.2 236 | - py-cpuinfo==9.0.0 237 | - pyasn1==0.4.8 238 | - pyasn1-modules==0.2.8 239 | - pyerfa==2.0.0.3 240 | - pygments==2.13.0 241 | - pyro4==4.82 242 | - pyrr==0.10.3 243 | - pyrsistent==0.19.2 244 | - pytest==7.2.0 245 | - python-dateutil==2.8.2 246 | - python-dotenv==1.0.0 247 | - pytorch-lightning==2.0.3 248 | - pytorch-msssim==0.2.1 249 | - pytz==2022.6 250 | - pywavelets==1.4.1 251 | - pyyaml==6.0.1 252 | - pyzmq==24.0.1 253 | - qtconsole==5.4.0 254 | - qtpy==2.3.0 255 | - ray==2.1.0 256 | - rdkit==2023.3.3 257 | - rdkit-pypi==2022.9.5 258 | - regex==2022.10.31 259 | - requests-oauthlib==1.3.1 260 | - retrying==1.3.4 261 | - rsa==4.9 262 | - sacrebleu==2.3.1 263 | - scikit-image==0.19.3 264 | - scipy==1.9.3 265 | - seaborn==0.12.1 266 | - send2trash==1.8.0 267 | - sentencepiece==0.1.99 268 | - serpent==1.41 269 | - six==1.16.0 270 | - sklearn==0.0.post1 271 | - sniffio==1.3.0 272 | - soupsieve==2.3.2.post1 273 | - stack-data==0.6.2 274 | - statsmodels==0.13.5 275 | - suds-community==1.1.2 276 | - tables==3.8.0 277 | - tabulate==0.9.0 278 | - tenacity==8.1.0 279 | - tensorboard==2.11.0 280 | - tensorboard-data-server==0.6.1 281 | - tensorboard-plugin-wit==1.8.1 282 | - tensorboardx==2.5.1 283 | - tensorflow==2.11.0 284 | - tensorflow-estimator==2.11.0 285 | - tensorflow-io-gcs-filesystem==0.28.0 286 | - termcolor==2.1.1 287 | - terminado==0.17.0 288 | - tifffile==2022.10.10 289 | - tinycss2==1.2.1 290 | - tokenizers==0.13.2 291 | - toml==0.10.2 292 | - tomli==2.0.1 293 | - torch-geometric==2.3.1 294 | - torch-tb-profiler==0.4.0 295 | - torchdrug==0.2.1 296 | - torchinfo==1.8.0 297 | - torchmetrics==0.11.4 298 | - torchsummary==1.5.1 299 | - tornado==6.2 300 | - traitlets==5.6.0 301 | - transformers==4.26.0 302 | - typing-extensions==4.5.0 303 | - url-normalize==1.4.3 304 | - virtualenv==20.17.1 305 | - wcwidth==0.2.5 306 | - webencodings==0.5.1 307 | - websocket-client==1.4.2 308 | - werkzeug==2.2.2 309 | - wget==3.2 310 | - widgetsnbextension==4.0.3 311 | - wrapt==1.14.1 312 | - xmltodict==0.13.0 313 | - yarl==1.9.2 314 | - zipp==3.11.0 315 | - zope-event==5.0 316 | - zope-interface==6.1 317 | prefix: /home/ 318 | -------------------------------------------------------------------------------- /pics/fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merck/MolPROP/7eea0a70f472ac679f6a815549c37927c3e215be/pics/fig1.png -------------------------------------------------------------------------------- /src/DataLoader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # MolPROP fuses molecular language and graph for property prediction. 5 | # Copyright © 2023 Merck & Co., Inc., Rahway, NJ, USA and its affiliates. All rights reserved. 6 | 7 | # This program is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | 12 | # This program is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | 17 | # You should have received a copy of the GNU General Public License 18 | # along with this program. If not, see . 19 | 20 | """ MolPROP module and functions to load, augment, and preprocess batches of data """ 21 | from collections import OrderedDict 22 | import numpy as np 23 | import pandas as pd 24 | import math 25 | import torch 26 | import os 27 | from os.path import join, exists, dirname, basename, isfile 28 | import sys 29 | import heapq 30 | import glob 31 | from collections import defaultdict 32 | import torch 33 | from torch.utils.data import Dataset 34 | from tqdm import tqdm 35 | import pdb 36 | import ablang 37 | import torch_geometric 38 | from torch_geometric.utils import smiles 39 | from rdkit import Chem 40 | import networkx as nx 41 | from rdkit import Chem 42 | from rdkit.Chem import Descriptors 43 | from rdkit.Chem import AllChem 44 | from rdkit import DataStructs 45 | from rdkit.Chem.rdMolDescriptors import GetMorganFingerprintAsBitVect 46 | from torch.utils import data 47 | from torch_geometric.data import Data 48 | from torch_geometric.data import InMemoryDataset 49 | from torch_geometric.data import Batch 50 | from itertools import repeat, product, chain 51 | from transformers.models.roberta import RobertaModel, RobertaTokenizer 52 | from tokenizers import Tokenizer 53 | from tokenizers import Regex 54 | from tokenizers.pre_tokenizers import Split 55 | from tokenizers.models import WordLevel 56 | from itertools import chain, repeat, islice 57 | import json 58 | 59 | # allowable node and edge features 60 | allowable_features = { 61 | 'possible_atomic_num_list' : list(range(1, 119)), 62 | 'possible_formal_charge_list' : [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5], 63 | 'possible_chirality_list' : [ 64 | Chem.rdchem.ChiralType.CHI_UNSPECIFIED, 65 | Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW, 66 | Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW, 67 | Chem.rdchem.ChiralType.CHI_OTHER 68 | ], 69 | 'possible_hybridization_list' : [ 70 | Chem.rdchem.HybridizationType.S, 71 | Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2, 72 | Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.SP3D, 73 | Chem.rdchem.HybridizationType.SP3D2, Chem.rdchem.HybridizationType.UNSPECIFIED 74 | ], 75 | 'possible_numH_list' : [0, 1, 2, 3, 4, 5, 6, 7, 8], 76 | 'possible_implicit_valence_list' : [0, 1, 2, 3, 4, 5, 6], 77 | 'possible_degree_list' : [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 78 | 'possible_bonds' : [ 79 | Chem.rdchem.BondType.SINGLE, 80 | Chem.rdchem.BondType.DOUBLE, 81 | Chem.rdchem.BondType.TRIPLE, 82 | Chem.rdchem.BondType.AROMATIC 83 | ], 84 | 'possible_bond_dirs' : [ # only for double bond stereo information 85 | Chem.rdchem.BondDir.NONE, 86 | Chem.rdchem.BondDir.ENDUPRIGHT, 87 | Chem.rdchem.BondDir.ENDDOWNRIGHT 88 | ] 89 | } 90 | 91 | def mol_to_graph_data_obj_simple(mol): 92 | """ 93 | Converts rdkit mol object to graph Data object required by the pytorch 94 | geometric package. NB: Uses simplified atom and bond features, and represent 95 | as indices 96 | :param mol: rdkit mol object 97 | :return: graph data object with the attributes: x, edge_index, edge_attr 98 | """ 99 | # atoms 100 | num_atom_features = 2 # atom type, chirality tag 101 | atom_features_list = [] 102 | for atom in mol.GetAtoms(): 103 | atom_feature = [allowable_features['possible_atomic_num_list'].index( 104 | atom.GetAtomicNum())] + [allowable_features[ 105 | 'possible_chirality_list'].index(atom.GetChiralTag())] 106 | atom_features_list.append(atom_feature) 107 | x = torch.tensor(np.array(atom_features_list), dtype=torch.long) 108 | 109 | # bonds 110 | num_bond_features = 2 # bond type, bond direction 111 | if len(mol.GetBonds()) > 0: # mol has bonds 112 | edges_list = [] 113 | edge_features_list = [] 114 | for bond in mol.GetBonds(): 115 | i = bond.GetBeginAtomIdx() 116 | j = bond.GetEndAtomIdx() 117 | edge_feature = [allowable_features['possible_bonds'].index( 118 | bond.GetBondType())] + [allowable_features[ 119 | 'possible_bond_dirs'].index( 120 | bond.GetBondDir())] 121 | edges_list.append((i, j)) 122 | edge_features_list.append(edge_feature) 123 | edges_list.append((j, i)) 124 | edge_features_list.append(edge_feature) 125 | 126 | # data.edge_index: Graph connectivity in COO format with shape [2, num_edges] 127 | edge_index = torch.tensor(np.array(edges_list).T, dtype=torch.long) 128 | 129 | # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features] 130 | edge_attr = torch.tensor(np.array(edge_features_list), 131 | dtype=torch.long) 132 | else: # mol has no bonds 133 | edge_index = torch.empty((2, 0), dtype=torch.long) 134 | edge_attr = torch.empty((0, num_bond_features), dtype=torch.long) 135 | 136 | data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr) 137 | 138 | return data 139 | 140 | def get_patch(img, x, y, size=32): 141 | """ 142 | *** currently NOT implemented, future update will cut cdrs and cdr3s from distance maps on the fly *** 143 | 144 | Slices out a square patch from `ensemble structure` starting from the (x,y) top-left corner. 145 | If `im` is a 3D array of shape (l, n, m), then the same (x,y) is broadcasted across the first dimension, 146 | and the output has shape (l, size, size). 147 | Args: 148 | img: numpy.ndarray (n, m), input image 149 | x, y: int, top-left corner of the patch 150 | size: int, patch size 151 | Returns: 152 | patch: numpy.ndarray (size, size) 153 | """ 154 | patch = img[..., x:(x + size), y:(y + size)] # using ellipsis to slice arbitrary ndarrays 155 | return patch 156 | 157 | def create_tokenizer_custom(file): 158 | """ 159 | custom tokenizer for progen2 models 160 | """ 161 | with open(file, 'r') as f: 162 | return Tokenizer.from_str(f.read()) 163 | 164 | 165 | class DataSet(OrderedDict): 166 | """ 167 | An OrderedDict derived class to group the assets of a dataset. 168 | Sources: https://github.com/Suanmd/TR-MISR 169 | """ 170 | def __init__(self, *args, **kwargs): 171 | super(DataSet, self).__init__(*args, **kwargs) 172 | 173 | def __repr__(self): 174 | dict_info = f"{'name':>10} : {self['name']}" 175 | for name, v in self.items(): 176 | if hasattr(v, 'shape'): 177 | dict_info += f"\n{name:>10} : {v.shape} {v.__class__.__name__} ({v.dtype})" 178 | else: 179 | dict_info += f"\n{name:>10} : {v.__class__.__name__} ({v})" 180 | return dict_info 181 | 182 | def read_dataset(superset_dir, seed=None, ens_L=None, lang=None, graph=False, chemprop=False, median_ref=False): 183 | # NOT IMPLEMENTED ... only for future SLEF update 184 | lr_ens_c = [] 185 | #print(superset_dir) 186 | for dir_ in range(len(superset_dir)): 187 | if not graph: 188 | if not lang == "ablang-only" and not lang == "protbert-only" and not lang == "protbert-bfd-only" and not lang == "prot-t5-xl-bfd-only" and not lang == "progen-medium-only" and not lang == "progen-oas-only": 189 | idx_names = np.array([basename(path)[8:-4] for path in glob.glob(join(superset_dir[dir_], 'cdrs_DAB*.npy'))]) 190 | idx_names = np.sort(idx_names) 191 | lr_ens = np.array([np.load(join(superset_dir[dir_], f'cdrs_DAB{i}.npy')) for i in idx_names], dtype=np.float32) 192 | ens_len, ens_h, ens_w = lr_ens.shape 193 | elif lang == "ablang-only" or lang == "protbert-only" or lang == "protbert-bfd-only" or lang == "prot-t5-xl-bfd-only" or lang == "progen-medium-only" or lang == "progen-oas-only": 194 | lr_ens = None 195 | else: 196 | raise ValueError('invalid argument') 197 | else: 198 | idx_names = np.array([basename(path)[3:-3] for path in glob.glob(join(superset_dir[dir_], 'DAB*.pt'))]) 199 | idx_names = np.sort(idx_names) 200 | lr_ens = [torch.load(join(superset_dir[dir_], f'DAB{i}.pt')) for i in idx_names] 201 | 202 | 203 | 204 | if lang == "ablang" or lang == "ablang-only": 205 | # collect HC & LC sequences --> tokenize w/ AbLang convention 206 | tokenizer = ablang.ABtokenizer(os.path.join("config/","vocab.json")) 207 | lc_seq = [np.genfromtxt(glob.glob(join(superset_dir[dir_], f'L_DAB*.txt'))[0], dtype='str').tolist()] 208 | hc_seq = [np.genfromtxt(glob.glob(join(superset_dir[dir_], f'H_DAB*.txt'))[0], dtype='str').tolist()] 209 | ## tokenize and create attention mask 210 | lc_token = tokenizer(lc_seq, pad=True) 211 | #lc_attention_mask = torch.ones(lc_token.shape, device = lc_token.device).masked_fill(lc_token == 21, 0).masked_fill(lc_token == 0, 0).masked_fill(lc_token == 22, 0) 212 | lc_attention_mask = torch.zeros(lc_token.shape, device = lc_token.device).masked_fill(lc_token == 21, 1) 213 | hc_token = tokenizer(hc_seq, pad=True) 214 | #hc_attention_mask = torch.ones(hc_token.shape, device = hc_token.device).masked_fill(hc_token == 21, 0).masked_fill(lc_token == 0, 0).masked_fill(lc_token == 22, 0) 215 | hc_attention_mask = torch.zeros(hc_token.shape, device = hc_token.device).masked_fill(hc_token == 21, 1) 216 | 217 | 218 | elif lang == "protbert" or lang == "protbert-only" or lang == "protbert-bfd" or lang == "protbert-bfd-only" or lang == "prot-t5-xl-bfd" or lang == "prot-t5-xl-bfd-only" or lang == "progen-medium" or lang == "progen-oas" or lang == "progen-medium-only" or lang == "progen-oas-only": 219 | # *** NEED LOCAL INSTALL OF PROTBERT/PROTBERT-BFD/PROT-T5-XL-BFD/PROGEN-MEDIUM/PROGEN-OAS (located in config/) *** 220 | #import requests 221 | #requests.get('http://huggingface.co', verify=False) 222 | os.environ['CURL_CA_BUNDLE'] = '' 223 | if lang == "protbert" or lang == "protbert-only": 224 | tokenizer = BertTokenizer.from_pretrained("config/bert/prot_bert", do_lower_case=False) 225 | lc_seq2 = [np.genfromtxt(glob.glob(join(superset_dir[dir_], f'L_DAB*.txt'))[0], dtype='str').tolist()] 226 | hc_seq2 = [np.genfromtxt(glob.glob(join(superset_dir[dir_], f'H_DAB*.txt'))[0], dtype='str').tolist()] 227 | # replace '-' token with '[PAD]' token in seqs 228 | lc_seq2 = [[x if x != '-' else '[PAD]' for x in lc_seq2[0]]] 229 | hc_seq2 = [[x if x != '-' else '[PAD]' for x in hc_seq2[0]]] 230 | # combine seqs into string separated by space 231 | lc_seq = " ".join(lc_seq2[0]) 232 | hc_seq = " ".join(hc_seq2[0]) 233 | lc_token2 = tokenizer(lc_seq, padding=True, return_tensors="pt") 234 | lc_token, lc_attention_mask = lc_token2['input_ids'], lc_token2['attention_mask'] 235 | lc_attention_mask = torch.ones(lc_token.shape, device = lc_token.device).masked_fill(lc_token == 0, 0) 236 | hc_token2 = tokenizer(hc_seq, padding=True, return_tensors="pt") 237 | hc_token, hc_attention_mask = hc_token2['input_ids'], hc_token2['attention_mask'] 238 | hc_attention_mask = torch.ones(hc_token.shape, device = hc_token.device).masked_fill(hc_token == 0, 0) 239 | 240 | elif lang == "protbert-bfd" or lang == "protbert-bfd-only": 241 | # *** NEED LOCAL INSTALL OF PROTBERT/PROTBERT-BFD/PROT-T5-XL-BFD/PROGEN-MEDIUM/PROGEN-OAS (located in config/) *** 242 | tokenizer = BertTokenizer.from_pretrained("config/bert/prot_bert_bfd", do_lower_case=False) 243 | lc_seq2 = [np.genfromtxt(glob.glob(join(superset_dir[dir_], f'L_DAB*.txt'))[0], dtype='str').tolist()] 244 | hc_seq2 = [np.genfromtxt(glob.glob(join(superset_dir[dir_], f'H_DAB*.txt'))[0], dtype='str').tolist()] 245 | # replace '-' token with '[PAD]' token in seqs 246 | lc_seq2 = [[x if x != '-' else '[PAD]' for x in lc_seq2[0]]] 247 | hc_seq2 = [[x if x != '-' else '[PAD]' for x in hc_seq2[0]]] 248 | # combine seqs into string separated by space 249 | lc_seq = " ".join(lc_seq2[0]) 250 | hc_seq = " ".join(hc_seq2[0]) 251 | lc_token2 = tokenizer(lc_seq, padding=True, return_tensors="pt") 252 | lc_token, lc_attention_mask = lc_token2['input_ids'], lc_token2['attention_mask'] 253 | lc_attention_mask = torch.ones(lc_token.shape, device = lc_token.device).masked_fill(lc_token == 0, 0) 254 | hc_token2 = tokenizer(hc_seq, padding=True, return_tensors="pt") 255 | hc_token, hc_attention_mask = hc_token2['input_ids'], hc_token2['attention_mask'] 256 | hc_attention_mask = torch.ones(hc_token.shape, device = hc_token.device).masked_fill(hc_token == 0, 0) 257 | elif lang == "prot-t5-xl-bfd" or lang == "prot-t5-xl-bfd-only": 258 | # *** NEED LOCAL INSTALL OF PROTBERT/PROTBERT-BFD/PROT-T5-XL-BFD/PROGEN-MEDIUM/PROGEN-OAS (located in config/) *** 259 | tokenizer = T5Tokenizer.from_pretrained("config/bert/prot_t5_xl_bfd", do_lower_case=False) 260 | lc_seq2 = [np.genfromtxt(glob.glob(join(superset_dir[dir_], f'L_DAB*.txt'))[0], dtype='str').tolist()] 261 | hc_seq2 = [np.genfromtxt(glob.glob(join(superset_dir[dir_], f'H_DAB*.txt'))[0], dtype='str').tolist()] 262 | # replace '-' token with '' token in seqs 263 | lc_seq2 = [[x if x != '-' else '' for x in lc_seq2[0]]] 264 | hc_seq2 = [[x if x != '-' else '' for x in hc_seq2[0]]] 265 | lc_seq = " ".join(lc_seq2[0]) 266 | hc_seq = " ".join(hc_seq2[0]) 267 | lc_token2 = tokenizer(lc_seq,padding=True, add_special_tokens=True, return_tensors="pt") 268 | lc_token, lc_attention_mask = lc_token2['input_ids'], lc_token2['attention_mask'] 269 | lc_attention_mask = torch.ones(lc_token.shape, device = lc_token.device).masked_fill(lc_token == 0, 0) 270 | hc_token2 = tokenizer(hc_seq, padding=True, add_special_tokens=True, return_tensors="pt") 271 | hc_token, hc_attention_mask = hc_token2['input_ids'], hc_token2['attention_mask'] 272 | hc_attention_mask = torch.ones(hc_token.shape, device = hc_token.device).masked_fill(hc_token == 0, 0) 273 | elif lang == "progen-medium" or lang == "progen-oas" or lang == "progen-medium-only" or lang == "progen-oas-only": 274 | # *** NEED LOCAL INSTALL OF PROTBERT/PROTBERT-BFD/PROT-T5-XL-BFD/PROGEN-MEDIUM/PROGEN-OAS (located in config/) *** 275 | tokenizer = create_tokenizer_custom('config/progen/progen2/tokenizer.json') 276 | lc_seq2 = [np.genfromtxt(glob.glob(join(superset_dir[dir_], f'L_DAB*.txt'))[0], dtype='str').tolist()] 277 | hc_seq2 = [np.genfromtxt(glob.glob(join(superset_dir[dir_], f'H_DAB*.txt'))[0], dtype='str').tolist()] 278 | lc_seq = "".join(lc_seq2[0]) 279 | hc_seq = "".join(hc_seq2[0]) 280 | lc_seq = torch.tensor(tokenizer.encode(lc_seq).ids).view([1, -1]).tolist()[0] 281 | hc_seq = torch.tensor(tokenizer.encode(hc_seq).ids).view([1, -1]).tolist()[0] 282 | lc_seq.insert(0, 3) 283 | lc_seq.append(4) 284 | lc_token = torch.tensor(lc_seq) 285 | hc_seq.insert(0, 3) 286 | hc_seq.append(4) 287 | hc_token = torch.tensor(hc_seq) 288 | lc_attention_mask = torch.ones(lc_token.shape, device = lc_token.device).masked_fill(lc_token == 0, 0) 289 | hc_attention_mask = torch.ones(hc_token.shape, device = hc_token.device).masked_fill(hc_token == 0, 0) 290 | 291 | elif lang == "none": 292 | lc_token = None 293 | hc_token = None 294 | lc_attention_mask = None 295 | hc_attention_mask = None 296 | else: 297 | raise ValueError('invalid language argument (must be "ablang", "protbert", "protbert-bfd", "prot-t5-xl-bfd", "progen-medium", "progen-oas", "none", or "ablang-only", "protbert-only", "protbert-bfd-only", "prot-t5-xl-bfd-only", "progen-medium-only", "progen-oas-only",)') 298 | 299 | if lang == "ablang-only" or lang == "protbert-only" or lang == "protbert-bfd-only" or lang == "prot-t5-xl-bfd-only" or lang == "progen-medium-only" or lang == "progen-oas-only": 300 | ens_attention_mask = None 301 | elif graph is not False: 302 | ens_attention_mask = np.array(0) 303 | elif graph == False: 304 | # attention masks for the ensemble of distance maps 305 | ens_attention_mask = np.zeros(lr_ens.shape) 306 | ens_attention_mask[lr_ens != 0] = 1 307 | ens_attention_mask = np.array(ens_attention_mask) 308 | 309 | # add residue property maps (for LEF only) 310 | if chemprop and not graph: 311 | if chemprop and exists(join(superset_dir[dir_], 'chemprop.npy')) and cdr_patch == False or chemprop and exists(join(superset_dir[dir_], 'cdrs_chemprop.npy')) and cdr_patch == 'cdrs' or chemprop and exists(join(superset_dir[dir_], 'cdr3s_chemprop.npy')) and cdr_patch == 'cdr3s': 312 | if cdr_patch == 'cdrs': 313 | chemprop_map = np.array(np.load(join(superset_dir[dir_], 'cdrs_chemprop.npy')), dtype=np.float32) 314 | elif cdr_patch == 'cdr3s': 315 | chemprop_map = np.array(np.load(join(superset_dir[dir_], 'cdr3s_chemprop.npy')), dtype=np.float32) 316 | elif cdr_patch == False: 317 | chemprop_map = np.array(np.load(join(superset_dir[dir_], 'chemprop.npy')), dtype=np.float32) 318 | 319 | if chemprop == 'add': 320 | for i in range(len(lr_ens)): 321 | lr_ens[i] = lr_ens[i] + chemprop_map 322 | lr_ens_c.append(lr_ens) 323 | 324 | elif chemprop == 'channel' and dir_ == len(superset_dir)-1: 325 | chemprop_ens = np.stack([chemprop_map]*ens_len, axis=0) 326 | lr_ens_c.append(lr_ens) 327 | lr_ens_c.append(chemprop_ens) 328 | elif chemprop == 'channel' and dir_ != len(superset_dir)-1: 329 | lr_ens_c.append(lr_ens) 330 | continue 331 | else: 332 | raise ValueError("invalid ensemble residue property map option") 333 | 334 | elif chemprop and not exists(join(superset_dir[dir_], 'chemprop.npy')) and cdr_patch == False or chemprop and not exists(join(superset_dir[dir_], 'cdrs_chemprop.npy')) and cdr_patch == 'cdrs' or chemprop and not exists(join(superset_dir[dir_], 'cdr3s_chemprop.npy')) and cdr_patch == 'cdr3s': 335 | prfx = cdr_patch + "_" if cdr_patch else '' 336 | dir_name = basename(superset_dir[dir_]) 337 | if cdr_patch == 'cdrs' or cdr_patch == 'cdr3s': 338 | lc_pth = join(superset_dir[dir_], 'L_%s.json' %dir_name) 339 | lc_anarci = json.load(open(lc_pth)) 340 | hc_pth = join(superset_dir[dir_], 'H_%s.json' %dir_name) 341 | hc_anarci = json.load(open(hc_pth)) 342 | cdr1_PAD, cdr2_PAD, cdr3_PAD = 20, 12, 32 343 | cdrl1, cdrl2, cdrl3 = lc_anarci['cdr1'], lc_anarci['cdr2'], lc_anarci['cdr3'] 344 | cdrh1, cdrh2, cdrh3 = hc_anarci['cdr1'], hc_anarci['cdr2'], hc_anarci['cdr3'] 345 | cdrl1 = [lc_anarci['seq'][x] for x in cdrl1] 346 | cdrl2 = [lc_anarci['seq'][x] for x in cdrl2] 347 | cdrl3 = [lc_anarci['seq'][x] for x in cdrl3] 348 | cdrh1 = [hc_anarci['seq'][x] for x in cdrh1] 349 | cdrh2 = [hc_anarci['seq'][x] for x in cdrh2] 350 | cdrh3 = [hc_anarci['seq'][x] for x in cdrh3] 351 | cdrl1 = cdrl1 + ['-']*(cdr1_PAD-len(cdrl1)) 352 | cdrl2 = cdrl2 + ['-']*(cdr2_PAD-len(cdrl2)) 353 | cdrl3 = cdrl3 + ['-']*(cdr3_PAD-len(cdrl3)) 354 | cdrh1 = cdrh1 + ['-']*(cdr1_PAD-len(cdrh1)) 355 | cdrh2 = cdrh2 + ['-']*(cdr2_PAD-len(cdrh2)) 356 | cdrh3 = cdrh3 + ['-']*(cdr3_PAD-len(cdrh3)) 357 | 358 | cdrs = cdrl1 + cdrl2 + cdrl3 + cdrh1 + cdrh2 + cdrh3 359 | cdrs = ''.join(cdrs) 360 | cdr3s = cdrl3 + cdrh3 361 | cdr3s = ''.join(cdr3s) 362 | if cdr_patch == 'cdrs': 363 | Fv = cdrs 364 | elif cdr_patch == 'cdr3s': 365 | Fv = cdr3s 366 | else: 367 | Fv = str(np.char.add(lc_seq[0], hc_seq[0])) 368 | 369 | 370 | chemprop_grid = pd.read_json(os.path.join("config/","chemprop.json")) 371 | chemprop_map = np.empty((ens_w, ens_h), dtype=float) 372 | for residue1 in range(len(Fv)): 373 | for residue2 in range (len(Fv)): 374 | residue_pair = Fv[residue1]+Fv[residue2] 375 | pair_prop = chemprop_grid[Fv[residue1]][Fv[residue2]] 376 | pair_prop_inv = chemprop_grid[Fv[residue2]][Fv[residue1]] 377 | if math.isnan(pair_prop) is False: 378 | chemprop_map[residue1][residue2] = pair_prop 379 | elif math.isnan(pair_prop_inv) is False: 380 | chemprop_map[residue1][residue2] = pair_prop_inv 381 | else: 382 | raise ValueError("residue pair not found") 383 | 384 | 385 | np.save(join(superset_dir[dir_], '%schemprop.npy') % prfx, chemprop_map) 386 | 387 | if chemprop == 'add': 388 | for i in range(len(lr_ens)): 389 | lr_ens[i] = lr_ens[i] + chemprop_map 390 | lr_ens_c.append(lr_ens) 391 | 392 | elif chemprop == 'channel' and dir_ == len(superset_dir)-1: 393 | chemprop_ens = np.stack([chemprop_map]*ens_len, axis=0) 394 | lr_ens_c.append(lr_ens) 395 | lr_ens_c.append(chemprop_ens) 396 | elif chemprop == 'channel' and dir_ != len(superset_dir)-1: 397 | lr_ens_c.append(lr_ens) 398 | continue 399 | else: 400 | raise ValueError("invalid ensemble residue property map option") 401 | else: 402 | lr_ens_c.append(lr_ens) 403 | elif chemprop and graph: 404 | raise ValueError("residue property maps only for AbLEF, not implemented for graph representations") 405 | else: 406 | lr_ens_c.append(lr_ens) 407 | 408 | # property to predict 409 | if exists(join(superset_dir[dir_], 'prop.npy')): 410 | prop = np.array(np.load(join(superset_dir[dir_], 'prop.npy')), dtype=np.float32) 411 | else: 412 | prop = None 413 | 414 | #if create_patches: 415 | #if seed is not None: 416 | #np.random.seed(seed) 417 | 418 | # max_x = lr_ens[0].shape[0] - patch_size 419 | #max_y = lr_ens[0].shape[1] - patch_size 420 | #x = np.random.randint(low=0, high=max_x) 421 | #y = np.random.randint(low=0, high=max_y) 422 | #lr_ens = get_patch(lr_ens, x, y, patch_size) # broadcast slicing coordinates across all structures 423 | #ens_attention_mask = get_patch(ens_attention_mask, x, y, patch_size) 424 | 425 | if lang == "ablang-only" or lang == "protbert-only" or lang == "protbert-bfd-only" or lang == "prot-t5-xl-bfd-only" or lang == "progen-medium-only" or lang == "progen-oas-only" and graph == False: 426 | lr_ens_stacked = None 427 | elif graph != False: 428 | lr_ens_stacked = lr_ens_c 429 | else: 430 | lr_ens_stacked = np.stack(lr_ens_c, axis=0) 431 | 432 | if median_ref and not graph: 433 | c_stack, l_stack, w_stack, h_stack = lr_ens_stacked.shape 434 | refs = np.median(lr_ens_stacked[:,:ens_len,:,:], axis=1, keepdims=True) 435 | refs_stacked = np.repeat(refs, ens_len, axis=1) 436 | lr_ens_stacked = np.concatenate((lr_ens_stacked, refs_stacked), axis=0) 437 | lr_ens_stacked = np.array(lr_ens_stacked) 438 | 439 | if lang == "ablang-only" and not graph or lang == "protbert-only" and not graph or lang == "protbert-bfd-only" and not graph or lang == "prot-t5-xl-bfd-only" and not graph or lang == "progen-medium-only" and not graph or lang == "progen-oas-only" and not graph: 440 | lr_ens_stacked = None 441 | elif graph: 442 | lr_ens_stacked = lr_ens_c 443 | else: 444 | lr_ens_stacked = np.array(lr_ens_stacked) 445 | 446 | # organize all assets into a DataSet (OrderedDict) 447 | dataset = DataSet(name=basename(superset_dir[0]), 448 | lr=lr_ens_stacked, ens_attention_mask = ens_attention_mask, 449 | lc_token=lc_token, lc_attention_mask=lc_attention_mask, 450 | hc_token=hc_token, hc_attention_mask=hc_attention_mask, 451 | prop=prop) 452 | return dataset 453 | 454 | class SuperSet(Dataset): 455 | """ 456 | NOT IMPLEMENTED FOR MolPROP ... only for future SLEF update 457 | Derived SuperSet class for loading many datasets from a list of directories.""" 458 | 459 | def __init__(self, superset_dir, setup, seed=None, ens_L=-1, lang=None, graph = False, chemprop=False, median_ref=False, **kwargs): 460 | 461 | super().__init__() 462 | self.superset_dir = superset_dir 463 | self.name_to_dir = defaultdict(list) 464 | for s_dir in superset_dir: 465 | self.name_to_dir[basename(s_dir)].append(s_dir) 466 | 467 | self.superset_dir = [[v] for k, v in self.name_to_dir.items()] 468 | #self.name_to_dir = {basename(dirname(s_dir)) + '-' + basename(s_dir): s_dir for s_dir in superset_dir} 469 | self.seed = seed # seed for random patches 470 | self.ens_L = ens_L 471 | self.lang = lang 472 | self.graph = graph 473 | self.chemprop = chemprop 474 | self.median_ref = median_ref 475 | 476 | 477 | def __len__(self): 478 | return len(self.superset_dir) 479 | 480 | def __getitem__(self, index): 481 | """ Returns a dict of all assets in the directory of the given index.""" 482 | superset_dir = self.superset_dir[index] 483 | #if isinstance(index, int): 484 | # print('int') 485 | # superset_dir = [self.superset_dir[index]] 486 | #elif isinstance(index, str): 487 | # print('str') 488 | # superset_dir = [self.name_to_dir[index]] 489 | #elif isinstance(index, slice): 490 | # print('slice') 491 | # superset_dir = self.superset_dir[index] 492 | #else: 493 | # raise KeyError('index must be int, string, or slice') 494 | 495 | superset_dir = superset_dir[0] 496 | 497 | superset = [read_dataset(superset_dir=superset_dir, 498 | seed=self.seed, 499 | ens_L=self.ens_L, lang = self.lang, graph = self.graph, chemprop = self.chemprop, median_ref = self.median_ref)] 500 | 501 | if len(superset) == 1: 502 | superset = superset[0] 503 | 504 | superset_list = superset if isinstance(superset, list) else [superset] 505 | 506 | for i, superset_ in enumerate(superset_list): 507 | if superset_['lr'] is None: 508 | superset_['lr'] = superset_['lr'] 509 | superset_['ens_attention_mask'] = superset_['ens_attention_mask'] 510 | elif type(superset_['lr'][0][0]) is torch_geometric.data.data.Data: 511 | superset_['lr'] = superset_['lr'][0][0] 512 | superset_['ens_attention_mask'] = torch.from_numpy(superset_['ens_attention_mask'].astype(np.bool_)) 513 | elif type(superset_['lr'][0]) is np.ndarray: 514 | superset_['lr'] = torch.from_numpy(superset_['lr'].astype(np.float32)) 515 | superset_['ens_attention_mask'] = torch.from_numpy(superset_['ens_attention_mask'].astype(np.bool_)) 516 | superset_['lc_token'] = superset_['lc_token'] 517 | superset_['lc_attention_mask'] = superset_['lc_attention_mask'] 518 | superset_['hc_token'] = superset_['hc_token'] 519 | superset_['hc_attention_mask'] = superset_['hc_attention_mask'] 520 | 521 | if superset_['prop'] is not None: 522 | superset_['prop'] = torch.from_numpy(superset_['prop'].astype(np.float32)) 523 | superset_list[i] = superset_ 524 | 525 | 526 | if len(superset_list) == 1: 527 | superset = superset_list[0] 528 | 529 | return superset 530 | 531 | class SuperSet_csv(Dataset): 532 | """MolPROP Superset class for loading assets from csv file containing path data.""" 533 | 534 | def __init__(self, data, X = 'SMILES', y = 'target'): 535 | """ 536 | Arguments: 537 | csv_file (string): Path to the csv file with annotations. 538 | """ 539 | self.superdata = data 540 | self.strings = self.superdata[X] 541 | self.target = self.superdata[y] 542 | 543 | def __len__(self): 544 | return len(self.strings) 545 | 546 | def __getitem__(self, index): 547 | 548 | strings = self.strings[index] 549 | target = self.target[index] 550 | 551 | superset = convert_data(strings, target) 552 | 553 | return superset 554 | 555 | def convert_data(strings, target): 556 | """ 557 | Arguments: 558 | strings: SMILES strings in batch 559 | target: target values to predict 560 | Returns: 561 | dataset: OrderDict containing the data assets 562 | name: SMILES strings 563 | tokens: tokenized SMILES strings 564 | attention_masks: attention masks for tokenized SMILES strings 565 | graphs: graph representations of SMILES strings of type torch_geometric.data.data.Data 566 | prop: target values to predict 567 | 568 | """ 569 | 570 | # sequence padding tools 571 | def pad_infinite(iterable, padding=None): 572 | return chain(iterable, repeat(padding)) 573 | 574 | def pad(iterable, size, padding=None): 575 | return islice(pad_infinite(iterable, padding), size) 576 | 577 | # convert data to list 578 | strings = [strings] 579 | target = [target] 580 | # reformat ChemBERTa tokenizer for 'word' types with [***] pattern 581 | new_tokenizer = Tokenizer(WordLevel.from_file('config/chembert/ChemBERTa-77M-MLM/vocab.json', unk_token='[UNK]')) 582 | pre_tokenizer = Split(pattern=Regex("\[(.*?)\]|.*?"), behavior='isolated') 583 | new_tokenizer.pre_tokenizer = pre_tokenizer 584 | tok = [idx for idx in new_tokenizer.encode(strings[0]).ids] 585 | # add start, end, and pad tokens to 100 length 586 | tok.insert(0, 12) 587 | tok = list(pad(tok, 199, 11)) 588 | tok.append(13) 589 | tok =torch.tensor(tok) 590 | mask_toks = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 14, 17, 18, 20, 21, 22, 24, 591 | 26, 29, 30, 31, 32, 38, 39, 43, 58, 60, 63, 64, 72, 76, 98, 124, 170, 202, 592 | 217, 298, 327, 355, 370, 379, 388, 389, 398, 399, 419, 450] 593 | for t in mask_toks: 594 | tok[tok == t] = 11 595 | attention_mask = torch.ones(tok.shape, device = tok.device).masked_fill(tok == 11, 0) 596 | 597 | # print the sum of attention mask 598 | #print(strings) 599 | #print(torch.sum(attention_mask)) 600 | 601 | 602 | 603 | # prefered customizable implementation for smiles to graph conversion 604 | graph_rdkit = Chem.MolFromSmiles(strings[0]) 605 | graph = mol_to_graph_data_obj_simple(graph_rdkit) 606 | 607 | 608 | # alternative implementation for smiles to graph conversion via torch_geometric 609 | #graph = smiles.from_smiles(strings[0], with_hydrogen=False) 610 | #print(graph) 611 | 612 | y = torch.from_numpy(np.array(target[0])) 613 | 614 | 615 | dataset = DataSet(lr = graph, ens_attention_mask = torch.from_numpy(np.array(0)), alpha = torch.from_numpy(np.array(0)), 616 | token=tok, attention_mask = attention_mask, 617 | prop=y.unsqueeze(-1), name=strings) 618 | 619 | return dataset -------------------------------------------------------------------------------- /src/DeepNetworks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merck/MolPROP/7eea0a70f472ac679f6a815549c37927c3e215be/src/DeepNetworks/__init__.py -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Merck/MolPROP/7eea0a70f472ac679f6a815549c37927c3e215be/src/__init__.py -------------------------------------------------------------------------------- /src/holdout.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # MolPROP fuses molecular language and graph for property prediction. 5 | # Copyright © 2023 Merck & Co., Inc., Rahway, NJ, USA and its affiliates. All rights reserved. 6 | 7 | # This program is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | 12 | # This program is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | 17 | # You should have received a copy of the GNU General Public License 18 | # along with this program. If not, see . 19 | 20 | import torch 21 | from sklearn.metrics import r2_score 22 | from sklearn.metrics import accuracy_score 23 | from sklearn.metrics import average_precision_score 24 | from sklearn.metrics import roc_auc_score 25 | from scipy import stats 26 | from glob import glob 27 | import matplotlib.pyplot as plt 28 | from matplotlib.offsetbox import AnchoredText 29 | import json 30 | import os 31 | from os.path import join 32 | import numpy as np 33 | import pandas as pd 34 | from DeepNetworks.SLEF import SLEFNet 35 | from torch.utils.data import DataLoader 36 | from ray.air.checkpoint import Checkpoint 37 | from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present 38 | import pdb 39 | from DataLoader import SuperSet, SuperSet_csv, get_patch 40 | from tqdm import tqdm 41 | from utils import getDataSetDirectories, collateFunction, get_loss 42 | import itertools 43 | import ablang 44 | import sys 45 | 46 | def patch_iterator(img, positions, size): 47 | """Iterator across square patches of `ens map` located in `positions`.""" 48 | for x, y in positions: 49 | yield get_patch(img=img, x=x, y=y, size=size) 50 | 51 | def data_import(dataset): 52 | 53 | data_directory = setup["paths"]["data_directory"] 54 | print(data_directory) 55 | if not setup["network"]["language"]["graph"]: 56 | holdout_list = getDataSetDirectories(setup, os.path.join(data_directory, "holdout")) 57 | elif setup["network"]["language"]["graph"]: 58 | pass 59 | 60 | # Dataloaders 61 | batch_size = setup["training"]["batch_size"] 62 | n_workers = setup["training"]["n_workers"] 63 | ens_L = setup["training"]["ens_L"] 64 | set_L = setup["training"]["set_L"] 65 | lang = setup["network"]["language"]["model"] 66 | graph = setup["network"]["language"]["graph"] 67 | chemprop = setup["paths"]["chemprop_maps"] 68 | median_ref = setup["paths"]["median_ref"] 69 | 70 | if not graph: 71 | 72 | holdout_dataset = SuperSet(superset_dir=holdout_list, 73 | setup=setup["training"], 74 | ens_L=ens_L, lang=lang, graph=graph, chemprop=chemprop, median_ref=median_ref) 75 | 76 | holdout_dataloader = DataLoader(holdout_dataset, 77 | batch_size=int(1), 78 | shuffle=False, 79 | num_workers=n_workers, 80 | collate_fn=collateFunction(setup=setup, set_L=set_L), 81 | pin_memory=True) 82 | 83 | 84 | dataloaders = {'holdout': holdout_dataloader} 85 | else: 86 | if dataset: 87 | data = pd.read_csv(os.path.join(data_directory,dataset)) 88 | holdout_dataset = SuperSet_csv(data=data, X = 'ids', y ='y') 89 | else: 90 | raise ValueError("dataset not found in specified directory: %s" % str(os.path.join(data_directory, dataset))) 91 | 92 | holdout_dataloader = DataLoader(holdout_dataset, 93 | batch_size=int(1), 94 | shuffle=False, 95 | num_workers=n_workers, 96 | collate_fn=collateFunction(setup=setup, set_L=set_L), 97 | pin_memory=True) 98 | dataloaders = {'holdout': holdout_dataloader} 99 | 100 | 101 | torch.cuda.empty_cache() 102 | return dataloaders 103 | 104 | def test_model(fusion_model, filename, dataloaders, setup): 105 | 106 | #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 107 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 108 | 109 | fusion_model.eval() 110 | 111 | PROPs = [] 112 | PREDs = [] 113 | holdout_score = 0 114 | 115 | for lrs, ens_attention_masks, alphas, tokens, attention_masks, props, names in tqdm(dataloaders['holdout']): 116 | if type(lrs) is torch.Tensor or lrs is None: 117 | if setup["paths"]["precision"] == "16": 118 | lrs = lrs.half().to(device) 119 | ens_attention_masks = ens_attention_masks.bool().to(device) 120 | alphas = alphas.half().to(device) 121 | tokens = tokens.int().to(device) 122 | attention_masks.bool().to(device) 123 | props = props.half().to(device) 124 | elif setup["paths"]["precision"] == "32": 125 | lrs = lrs.float().to(device) 126 | ens_attention_masks = ens_attention_masks.bool().to(device) 127 | alphas = alphas.bool().to(device) 128 | tokens = tokens.int().to(device) 129 | attention_masks.bool().to(device) 130 | props = props.float().to(device) 131 | elif setup["paths"]["precision"] == "64": 132 | lrs = lrs.double().to(device) 133 | ens_attention_masks = ens_attention_masks.bool().to(device) 134 | alphas = alphas.bool().to(device) 135 | tokens = tokens.int().to(device) 136 | attention_masks.bool().to(device) 137 | props = props.double().to(device) 138 | else: 139 | lrs = [lr.to(device) for lr in lrs] 140 | from torch_geometric.data import Batch 141 | lrs = Batch.from_data_list(lrs).to(device) 142 | tokens = tokens.int().to(device) 143 | attention_masks.bool().to(device) 144 | props = props.float().to(device) 145 | 146 | fusion_size = 320 147 | 148 | 149 | prop_preds = fusion_model(lrs, ens_attention_masks, alphas, tokens, attention_masks, fusion_size=fusion_size) 150 | score = 0 151 | # compute loss for validation set 152 | for i in range(prop_preds.shape[0]): 153 | PROPs.append(props[i].detach().cpu().numpy()) 154 | PREDs.append(prop_preds[i].detach().cpu().numpy()) 155 | if setup['training']['combine_losses']: 156 | score1 = get_loss(prop_preds, props, metric='L1') 157 | score2 = get_loss(prop_preds, props, metric='L2') 158 | score = setup['training']['alpha1'] * score1 + setup['training']['alpha2'] * score2 159 | else: 160 | score = get_loss(prop_preds,props, metric=setup['training']['loss'], num_predictions=setup['network']['language']['num_predictions']) 161 | 162 | score = torch.mean(score) 163 | batch_score = score.detach().cpu().numpy() * len(props) / len(dataloaders['holdout'].dataset) 164 | 165 | #if setup['training']["kfold"]: 166 | # batch_score = score.detach().cpu().numpy() * len(props) / len(dataloaders['holdout'].sampler.indices) 167 | #else: 168 | # batch_score = score.detach().cpu().numpy() * len(props) / len(dataloaders['holdout'].dataset) 169 | 170 | #if setup['training']['combine_losses']: 171 | # score1 = get_loss(prop_preds[i], props[i], metric='L1') 172 | # score2 = get_loss(prop_preds[i], props[i], metric='L2') 173 | # score += setup['training']['alpha1'] * score1 + setup['training']['alpha2'] * score2 174 | #else: 175 | # score += get_loss(prop_preds[i],props[i], metric=setup['training']['loss'], num_predictions=setup['network']['language']['num_predictions']) 176 | 177 | #batch_score = score.detach().cpu().numpy() * len(props) / len(dataloaders['holdout'].dataset) 178 | holdout_score += batch_score 179 | 180 | PROPs = np.squeeze(np.array(PROPs)) 181 | PREDs = np.squeeze(np.array(PREDs)) 182 | 183 | if setup['network']['language']['mode'] == 'continuous': 184 | r2 = r2_score(PROPs, PREDs) 185 | pearson = stats.pearsonr(PROPs, PREDs).statistic 186 | spearman = stats.spearmanr(PROPs, PREDs).correlation 187 | 188 | if setup["training"]["kfold"]: 189 | return holdout_score, r2, pearson, spearman, PROPs, PREDs 190 | else: 191 | # plot predictions vs properties 192 | fig, ax = plt.subplots() 193 | ax.scatter(PROPs, PREDs, c='black',edgecolors=(0, 0, 0)) 194 | ax.plot([PROPs.min(), PROPs.max()], [PROPs.min(), PROPs.max()], 'k--', lw=4) 195 | ax.set_xlabel('Measured') 196 | ax.set_ylabel('Predicted') 197 | ax.set_title('R\u00b2: '+ "{:.2f}".format(r2)) 198 | text_box2 = AnchoredText('R\u00b2: ' "{:.2f}".format(r2) + "\n" + '$r_{p}$: ' + "{:.2f}".format(pearson) + "\n" + '$r_{s}$: ' + "{:.2f}".format(spearman), frameon=True,prop=dict(color = 'black'), loc=4, pad=0.5) 199 | plt.setp(text_box2.patch, facecolor='white', alpha=0.5) 200 | plt.gca().add_artist(text_box2) 201 | plt.savefig(r'%sholdout_%s.png' % (path + "/", filename), dpi=300, bbox_inches='tight') 202 | return holdout_score, r2, pearson, spearman, PROPs, PREDs 203 | 204 | elif setup['network']['language']['mode'] == 'discrete': 205 | if setup['network']['language']['num_predictions'] == 1: 206 | PREDs = torch.from_numpy(PREDs) 207 | PREDs = PREDs.unsqueeze(1) 208 | PREDs = torch.sigmoid(PREDs) 209 | PREDs = PREDs.squeeze() 210 | PREDs = PREDs.detach().cpu().numpy() 211 | PREDs = np.round(PREDs) 212 | acc = accuracy_score(PROPs, PREDs) 213 | ap = average_precision_score(PROPs, PREDs) 214 | auc = roc_auc_score(PROPs, PREDs) 215 | print('') 216 | print('acc: ', acc, 'prc: ', ap, 'roc_auc: ', auc) 217 | print('val_score: ', holdout_score) 218 | print('') 219 | elif setup['network']['language']['num_predictions'] > 2: 220 | PREDs = PREDs.argmax(axis=1) 221 | acc = None 222 | ap = average_precision_score(PROPs, PREDs) 223 | auc = roc_auc_score(PROPs, PREDs) 224 | print('') 225 | print('prc: ', ap, 'roc_auc: ', auc) 226 | print('val_score: ', holdout_score) 227 | print('') 228 | return holdout_score, acc, ap, auc, PROPs, PREDs 229 | 230 | 231 | 232 | 233 | def main(dataset, path, model_path, filename, epoch_num, validation, ray_tune, checkpoint, setup): 234 | 235 | # random seeds for reproducibility 236 | np.random.seed(setup["training"]["seed"]) 237 | torch.manual_seed(setup["training"]["seed"]) 238 | 239 | # define compute specifications 240 | #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 241 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 242 | 243 | # import holdout data 244 | dataloaders = data_import(dataset) 245 | 246 | if ray_tune: 247 | if setup["training"]["kfold"]: 248 | with open(model_path + 'ray_tune/' + ray_tune + '/params.json', "r") as read_file: 249 | config = json.load(read_file) 250 | checkpoint = Checkpoint.from_directory(model_path + 'ray_tune/' + ray_tune + '/' + checkpoint + '/') 251 | checkpoint_dict = checkpoint.to_dict() 252 | checkpoint_dict = checkpoint_dict.get("model_state_dict") 253 | for fold in tqdm(range(len(checkpoint_dict))): 254 | checkpoint_dict[fold] = {k[7:]: v for k, v in checkpoint_dict[fold].items()} 255 | if setup['network']['language']['mode'] == 'continuous': 256 | cv_data = {'holdout_score': [],'r2':[],'rp':[],'rs':[], 'props':[], 'preds':[]} 257 | elif setup['network']['language']['mode'] == 'discrete': 258 | cv_data = {'holdout_score': [], 'prc':[],'roc_auc':[], 'props':[], 'preds':[]} 259 | for fold in tqdm(range(setup["training"]["kfold"])): 260 | fusion_model = SLEFNet(config["network"]) 261 | #checkpoint_dict[fold] = {k[7:]: v for k, v in checkpoint_dict[fold].items()} 262 | fusion_model.load_state_dict(checkpoint_dict[fold]) 263 | fusion_model.eval() 264 | fusion_model = torch.nn.DataParallel(fusion_model, device_ids=[0]).to(device) 265 | #fusion_model = torch.nn.DataParallel(fusion_model, device_ids=list(range(torch.cuda.device_count()))).to(device) 266 | #fusion_model.to(device) 267 | # append props and preds to cv_data 268 | if setup['network']['language']['mode'] == 'continuous': 269 | holdout_score, r2, pearson, spearman, props, preds = test_model(fusion_model, filename, dataloaders, setup) 270 | cv_data['props'].append(props) 271 | cv_data['preds'].append(preds) 272 | cv_data['holdout_score'].append(holdout_score) 273 | cv_data['r2'].append(r2) 274 | cv_data['rp'].append(pearson) 275 | cv_data['rs'].append(spearman) 276 | print('r2: ', r2, 'r_p: ', pearson, 'r_s: ', spearman) 277 | print('') 278 | print('holdout_score: ', holdout_score) 279 | elif setup['network']['language']['mode'] == 'discrete': 280 | holdout_score, acc, ap, auc, props, preds = test_model(fusion_model, filename, dataloaders, setup) 281 | cv_data['props'].append(props) 282 | cv_data['preds'].append(preds) 283 | cv_data['holdout_score'].append(holdout_score) 284 | cv_data['prc'].append(ap) 285 | cv_data['roc_auc'].append(auc) 286 | print('prc: ', ap, 'roc_auc: ', auc) 287 | print('') 288 | print('holdout_score: ', holdout_score) 289 | 290 | if setup['network']['language']['mode'] == 'continuous': 291 | print('holdout_score_avg: ', np.mean(cv_data['holdout_score'])) 292 | print('holdout_score_std: ', np.std(cv_data['holdout_score'])) 293 | print('r2_avg: ', np.mean(cv_data['r2'])) 294 | print('r2_std: ', np.std(cv_data['r2'])) 295 | print('rp_avg: ', np.mean(cv_data['rp'])) 296 | print('rp_std: ', np.std(cv_data['rp'])) 297 | print('rs_avg: ', np.mean(cv_data['rs'])) 298 | print('rs_std: ', np.std(cv_data['rs'])) 299 | # average across props and preds sublists 300 | cv_data['props_avg'] = np.mean(cv_data['props'], axis=0) 301 | cv_data['preds_avg'] = np.mean(cv_data['preds'], axis=0) 302 | # std across props and preds sublists 303 | cv_data['props_std'] = np.std(cv_data['props'], axis=0) 304 | cv_data['preds_std'] = np.std(cv_data['preds'], axis=0) 305 | # plot predictions vs properties as scatter plot with error bars 306 | fig, ax = plt.subplots() 307 | ax.errorbar(cv_data['props_avg'], cv_data['preds_avg'], xerr=cv_data['props_std'], yerr=cv_data['preds_std']/np.sqrt(setup["training"]["kfold"]), fmt='o', color='black', ecolor='black', elinewidth=3, capsize=0) 308 | ax.set_xlabel('Measured') 309 | ax.set_ylabel('Predicted') 310 | ax.plot([cv_data['props_avg'].min(), cv_data['props_avg'].max()], [cv_data['props_avg'].min(), cv_data['props_avg'].max()], 'k--', lw=4) 311 | text_box2 = AnchoredText('R\u00b2: ' "{:.2f}".format(np.mean(cv_data['r2'])) + " " + u"\u00B1" + " {:.2f}".format(np.std(cv_data['r2'])/np.sqrt(setup["training"]["kfold"])) + 312 | "\n" + '$r_{p}$: ' + "{:.2f}".format(np.mean(cv_data['rp'])) + " " + u"\u00B1" + " {:.2f}".format(np.std(cv_data['rp'])/np.sqrt(setup["training"]["kfold"])) + 313 | "\n" + '$r_{s}$: ' + "{:.2f}".format(np.mean(cv_data['rs'])) + " " + u"\u00B1" + " {:.2f}".format(np.std(cv_data['rs'])/np.sqrt(setup["training"]["kfold"])), 314 | frameon=True,prop=dict(color = 'black'), loc=4, pad=0.5) 315 | plt.setp(text_box2.patch, facecolor='white', alpha=0.5) 316 | plt.gca().add_artist(text_box2) 317 | plt.savefig(r'%sholdout_%s.png' % (path + "/", filename), dpi=300, bbox_inches='tight') 318 | elif setup['network']['language']['mode'] == 'discrete': 319 | print('holdout_score_avg: ', np.mean(cv_data['holdout_score'])) 320 | print('holdout_score_std: ', np.std(cv_data['holdout_score'])) 321 | print('prc_avg: ', np.mean(cv_data['prc'])) 322 | print('prc_std: ', np.std(cv_data['prc'])) 323 | print('roc_auc_avg: ', np.mean(cv_data['roc_auc'])) 324 | print('roc_auc_std: ', np.std(cv_data['roc_auc'])) 325 | 326 | else: 327 | with open(model_path + 'ray_tune/' + ray_tune + '/params.json', "r") as read_file: 328 | config = json.load(read_file) 329 | fusion_model = SLEFNet(config["network"]) 330 | checkpoint = Checkpoint.from_directory(model_path + 'ray_tune/' + ray_tune + '/' + checkpoint + '/') 331 | checkpoint_dict = checkpoint.to_dict() 332 | checkpoint_dict = checkpoint_dict.get("model_state_dict") 333 | consume_prefix_in_state_dict_if_present(checkpoint_dict, "module.") 334 | fusion_model.load_state_dict(checkpoint_dict) 335 | fusion_model = torch.nn.DataParallel(fusion_model, device_ids=[0]).to(device) 336 | #fusion_model = torch.nn.DataParallel(fusion_model, device_ids=list(range(torch.cuda.device_count()))).to(device) 337 | #fusion_model.to(device) 338 | holdout_score, r2, pearson, spearman, props, preds = test_model(fusion_model, filename, dataloaders, setup) 339 | print('r2: ', r2, 'r_p: ', pearson, 'r_s: ', spearman) 340 | print('') 341 | print('holdout_score: ', holdout_score) 342 | 343 | else: 344 | if epoch_num: 345 | fusion_model = SLEFNet(setup["network"]) 346 | module_state_dict = torch.load(os.path.join(model_path, 347 | 'SLEF_'+str(epoch_num)+'.pth')) 348 | module_state_dict = module_state_dict["model_state_dict"] 349 | model_state_dict = {k[7:]: v for k, v in module_state_dict.items()} 350 | fusion_model.load_state_dict(model_state_dict) 351 | fusion_model = torch.nn.DataParallel(fusion_model, device_ids=[0]).to(device) 352 | #fusion_model = torch.nn.DataParallel(fusion_model, device_ids=list(range(torch.cuda.device_count()))).to(device) 353 | #fusion_model.to(device) 354 | if setup['network']['language']['mode'] == 'continuous': 355 | holdout_score, r2, pearson, spearman, props, preds = test_model(fusion_model, filename, dataloaders, setup) 356 | print('r2: ', r2, 'r_p: ', pearson, 'r_s: ', spearman) 357 | print('') 358 | print('holdout_score: ', holdout_score) 359 | elif setup['network']['language']['mode'] == 'discrete': 360 | holdout_score, acc, ap, auc, props, preds = test_model(fusion_model, filename, dataloaders, setup) 361 | print('prc: ', ap, 'roc_auc: ', auc) 362 | print('holdout_score: ', holdout_score) 363 | 364 | 365 | elif validation: 366 | print("validation") 367 | fusion_model = SLEFNet(setup["network"]) 368 | module_state_dict = torch.load(os.path.join(model_path,'SLEF_validation.pth')) 369 | module_state_dict = module_state_dict["model_state_dict"] 370 | model_state_dict = {k[7:]: v for k, v in module_state_dict.items()} 371 | fusion_model.load_state_dict(model_state_dict) 372 | fusion_model = torch.nn.DataParallel(fusion_model, device_ids=[0]).to(device) 373 | #fusion_model = torch.nn.DataParallel(fusion_model, device_ids=list(range(torch.cuda.device_count()))).to(device) 374 | #fusion_model.to(device) 375 | if setup['network']['language']['num_predictions'] == 1: 376 | holdout_score, r2, pearson, spearman, props, preds = test_model(fusion_model, filename, dataloaders, setup) 377 | print('r2: ', r2, 'r_p: ', pearson, 'r_s: ', spearman) 378 | print('') 379 | print('holdout_score: ', holdout_score) 380 | elif setup['network']['language']['num_predictions'] > 1: 381 | holdout_score, acc, ap, auc, props, preds = test_model(fusion_model, filename, dataloaders, setup) 382 | print('prc: ', ap, 'roc_auc: ', auc) 383 | print('holdout_score: ', holdout_score) 384 | elif setup["training"]["kfold"]: 385 | if setup['network']['language']['mode'] == 'continuous': 386 | cv_data = {'holdout_score': [],'r2':[],'rp':[],'rs':[],'props':[], 'preds':[]} 387 | elif setup['network']['language']['mode'] == 'discrete': 388 | cv_data = {'holdout_score': [],'prc':[],'roc_auc':[],'props':[], 'preds':[]} 389 | for fold in tqdm(range(setup["training"]["kfold"])): 390 | fusion_model = SLEFNet(setup["network"]) 391 | module_state_dict = torch.load(os.path.join(model_path,'SLEF_f%s_validation.pth')%str(fold+1)) 392 | module_state_dict = module_state_dict["model_state_dict"] 393 | model_state_dict = {k[7:]: v for k, v in module_state_dict.items()} 394 | fusion_model.load_state_dict(model_state_dict) 395 | fusion_model = torch.nn.DataParallel(fusion_model, device_ids=[0]).to(device) 396 | #fusion_model = torch.nn.DataParallel(fusion_model, device_ids=list(range(torch.cuda.device_count()))).to(device) 397 | #fusion_model.to(device) 398 | if setup['network']['language']['mode'] == 'continuous': 399 | holdout_score, r2, pearson, spearman, props, preds = test_model(fusion_model, filename, dataloaders, setup) 400 | # props and preds are numpy arrays to list 401 | # append props and preds to cv_data 402 | cv_data['props'].append(props) 403 | cv_data['preds'].append(preds) 404 | cv_data['holdout_score'].append(holdout_score) 405 | cv_data['r2'].append(r2) 406 | cv_data['rp'].append(pearson) 407 | cv_data['rs'].append(spearman) 408 | print('r2: ', r2, 'r_p: ', pearson, 'r_s: ', spearman) 409 | print('') 410 | print('holdout_score: ', holdout_score) 411 | elif setup['network']['language']['mode'] == 'discrete': 412 | holdout_score, acc, ap, auc, props, preds = test_model(fusion_model, filename, dataloaders, setup) 413 | # props and preds are numpy arrays to list 414 | # append props and preds to cv_data 415 | cv_data['props'].append(props) 416 | cv_data['preds'].append(preds) 417 | cv_data['holdout_score'].append(holdout_score) 418 | cv_data['prc'].append(ap) 419 | cv_data['roc_auc'].append(auc) 420 | print('prc: ', ap, 'roc_auc: ', auc) 421 | print('holdout_score: ', holdout_score) 422 | 423 | if setup['network']['language']['mode'] == 'continuous': 424 | print('holdout_score_avg: ', np.mean(cv_data['holdout_score'])) 425 | print('holdout_score_std: ', np.std(cv_data['holdout_score'])) 426 | print('r2_avg: ', np.mean(cv_data['r2'])) 427 | print('r2_std: ', np.std(cv_data['r2'])) 428 | print('rp_avg: ', np.mean(cv_data['rp'])) 429 | print('rp_std: ', np.std(cv_data['rp'])) 430 | print('rs_avg: ', np.mean(cv_data['rs'])) 431 | print('rs_std: ', np.std(cv_data['rs'])) 432 | # average across props and preds sublists 433 | cv_data['props_avg'] = np.mean(cv_data['props'], axis=0) 434 | cv_data['preds_avg'] = np.mean(cv_data['preds'], axis=0) 435 | # std across props and preds sublists 436 | cv_data['props_std'] = np.std(cv_data['props'], axis=0) 437 | cv_data['preds_std'] = np.std(cv_data['preds'], axis=0) 438 | # plot predictions vs properties as scatter plot with error bars 439 | fig, ax = plt.subplots() 440 | ax.errorbar(cv_data['props_avg'], cv_data['preds_avg'], xerr=cv_data['props_std'], yerr=cv_data['preds_std']/np.sqrt(setup["training"]["kfold"]), fmt='o', color='black', ecolor='black', elinewidth=3, capsize=0) 441 | ax.set_xlabel('Measured') 442 | ax.set_ylabel('Predicted') 443 | ax.plot([cv_data['props_avg'].min(), cv_data['props_avg'].max()], [cv_data['props_avg'].min(), cv_data['props_avg'].max()], 'k--', lw=4) 444 | text_box2 = AnchoredText('R\u00b2: ' "{:.2f}".format(np.mean(cv_data['r2'])) + " " + u"\u00B1" + " {:.2f}".format(np.std(cv_data['r2'])/np.sqrt(setup["training"]["kfold"])) + 445 | "\n" + '$R_{p}$: ' + "{:.2f}".format(np.mean(cv_data['rp'])) + " " + u"\u00B1" + " {:.2f}".format(np.std(cv_data['rp'])/np.sqrt(setup["training"]["kfold"])) + 446 | "\n" + '$R_{s}$: ' + "{:.2f}".format(np.mean(cv_data['rs'])) + " " + u"\u00B1" + " {:.2f}".format(np.std(cv_data['rs'])/np.sqrt(setup["training"]["kfold"])), 447 | frameon=True,prop=dict(color = 'black'), loc=4, pad=0.5) 448 | plt.setp(text_box2.patch, facecolor='white', alpha=0.5) 449 | plt.gca().add_artist(text_box2) 450 | plt.savefig(r'%sholdout_%s.png' % (path + "/", filename), dpi=300, bbox_inches='tight') 451 | elif setup['network']['language']['mode'] == 'discrete': 452 | print('holdout_score_avg: ', np.mean(cv_data['holdout_score'])) 453 | print('holdout_score_std: ', np.std(cv_data['holdout_score'])) 454 | print('prc_avg: ', np.mean(cv_data['prc'])) 455 | print('prc_std: ', np.std(cv_data['prc'])) 456 | print('roc_auc_avg: ', np.mean(cv_data['roc_auc'])) 457 | print('roc_auc_std: ', np.std(cv_data['roc_auc'])) 458 | 459 | else: 460 | fusion_model = SLEFNet(setup["network"]) 461 | module_state_dict = torch.load(os.path.join(model_path,'SLEF.pth')) 462 | module_state_dict = module_state_dict["model_state_dict"] 463 | model_state_dict = {k[7:]: v for k, v in module_state_dict.items()} 464 | fusion_model.load_state_dict(model_state_dict) 465 | fusion_model = torch.nn.DataParallel(fusion_model, device_ids=[0]).to(device) 466 | #fusion_model = torch.nn.DataParallel(fusion_model, device_ids=list(range(torch.cuda.device_count()))).to(device) 467 | #fusion_model.to(device) 468 | if setup['network']['language']['mode'] == 'continuous': 469 | holdout_score, r2, pearson, spearman, props, preds = test_model(fusion_model, filename, dataloaders, setup) 470 | print('r2: ', r2, 'r_p: ', pearson, 'r_s: ', spearman) 471 | print('') 472 | print('holdout_score: ', holdout_score) 473 | elif setup['network']['language']['mode'] == 'discrete': 474 | holdout_score, acc, ap, auc, props, preds = test_model(fusion_model, filename, dataloaders, setup) 475 | print('prc: ', ap, 'roc_auc: ', auc) 476 | print('holdout_score: ', holdout_score) 477 | 478 | 479 | if __name__ == '__main__': 480 | 481 | with open('./config/setup.json', "r") as read_file: 482 | setup = json.load(read_file) 483 | 484 | dataset = setup["holdout"]["dataset"] 485 | path = setup["holdout"]["holdout_data_path"] 486 | model_path = setup["holdout"]["model_path"] 487 | filename = setup["holdout"]["filename"] 488 | epoch_num = setup["holdout"]["epoch_num"] 489 | ray_tune = setup["holdout"]["ray_tune"] 490 | checkpoint = setup["holdout"]["checkpoint"] 491 | validation = setup["holdout"]["validation"] 492 | 493 | with open(model_path + 'setup.json', "r") as read_file: 494 | setup = json.load(read_file) 495 | 496 | main(dataset, path, model_path, filename, epoch_num, validation, ray_tune, checkpoint, setup) -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # MolPROP fuses molecular language and graph for property prediction. 5 | # Copyright © 2023 Merck & Co., Inc., Rahway, NJ, USA and its affiliates. All rights reserved. 6 | 7 | # This program is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | 12 | # This program is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | 17 | # You should have received a copy of the GNU General Public License 18 | # along with this program. If not, see . 19 | 20 | """MolPROP utilities """ 21 | from torch import nn 22 | import torch 23 | import torch_geometric 24 | import math 25 | from torch.optim.lr_scheduler import LambdaLR 26 | from kornia.losses.focal import FocalLoss, BinaryFocalLossWithLogits 27 | import csv 28 | import numpy as np 29 | import os 30 | import time 31 | import matplotlib.pyplot as plt 32 | from ray import tune 33 | import sys 34 | from ray.tune import CLIReporter 35 | from ray.tune.schedulers import ASHAScheduler 36 | 37 | def getDataSetDirectories(setup, data_dir): 38 | """ 39 | function collects and returns a list of paths to directories, one for every dataset. 40 | Arguments: 41 | data_dir: str, path/dir of the dataset 42 | Returns: 43 | dataset_dirs: list of str, dataset directories 44 | """ 45 | 46 | atomsets = setup["paths"]["use_atomsets"] 47 | print(atomsets) 48 | if atomsets is not False or None: 49 | atomsets = atomsets.split(', ') 50 | else: 51 | raise ValueError("atomsets must be specified in setup file by directory name") 52 | 53 | dataset_dirs = [] 54 | for channel_dir in atomsets: 55 | path = os.path.join(data_dir, channel_dir) 56 | for dataset_name in os.listdir(path): 57 | dataset_dirs.append(os.path.join(path, dataset_name)) 58 | return dataset_dirs 59 | 60 | class collateFunction(): 61 | """ pad batches of data with variable number of structures. """ 62 | 63 | def __init__(self, setup, set_L=32): 64 | """ 65 | Args: 66 | set_L: int, pad length 67 | """ 68 | self.setup = setup 69 | self.set_L = set_L 70 | 71 | def __call__(self, batch): 72 | return self.collateFunction(batch) 73 | 74 | def collateFunction(self, batch): 75 | """ 76 | collate function to adjust for a variable number of structures. 77 | Args: 78 | batch: lists of structural ensemble datasets 79 | Returns: 80 | padded_lr_batch: tensor (B, C, set_L, W, H), low resolution ensembles 81 | padded_ens_attention_mask_batch: tensor (B, C, set_L, W, H), boolean attention mask for low resolution ensembles 82 | alpha_batch: tensor (B, set_L), boolean low resolution ensemble indicator (0 if padded, 1 if genuine) 83 | tokens_batch: tensor (B, 162), tokenized light chain sequence for ablang (padded '-, 21' to 160 with start '<, 0' and end '>, 22' tokens) 84 | attention_masks_batch: tensor (B, 162), boolean light chain mask for ablang 85 | prop_batch: tensor (B, 1), antibody property value 86 | isn_batch: list of ensemble names 87 | """ 88 | 89 | lr_batch = [] # batch of low resolution structures 90 | ens_attention_mask_batch = [] # batch of attention masks for ensemble of structures 91 | alpha_batch = [] # batch of indicators (0 if padded, 1 if genuine) 92 | tokens_batch = [] # batch of light chain sequences 93 | attention_masks_batch = [] # batch of light chain attention masks 94 | prop_batch = [] # batch of protein properties 95 | isn_batch = [] # batch of names 96 | 97 | train_batch = True 98 | 99 | 100 | for dataset in batch: 101 | lrs = dataset['lr'] 102 | ens_attention_masks = dataset['ens_attention_mask'] 103 | tokens = dataset['token'] 104 | attention_masks = dataset['attention_mask'] 105 | 106 | if type(dataset['lr']) == torch.Tensor: 107 | if lrs is not None: 108 | C, L, H, W = lrs.shape 109 | else: 110 | L = None 111 | 112 | if L is not None and L >= self.set_L: 113 | # append batch up to set_L by removing 'L's to set_L 114 | ## use ensemble structures up to set_L index 115 | lr_batch.append(lrs[:, :self.set_L, :, :]) 116 | ens_attention_mask_batch.append(ens_attention_masks[:self.set_L]) 117 | #ens_attention_mask_batch.append(ens_attention_masks[:, :self.set_L, : ,:]) 118 | alpha_batch.append(torch.ones(self.set_L)) 119 | ## append sequence tokens & masks for structures 120 | tokens_batch.append(tokens) 121 | attention_masks_batch.append(attention_masks) 122 | 123 | elif L is not None and L < self.set_L : # this may not work 124 | # append batch up to set_L by padding 'L's to set_L 125 | ## 0 pad ensemble structure distance maps to set_L 126 | pad = torch.full((C, self.set_L - L, H, W), fill_value=0) 127 | lr_batch.append(torch.cat([lrs, pad], dim=1)) 128 | ens_attention_mask_batch.append(torch.cat([ens_attention_masks, pad], dim=1)) 129 | alpha_batch.append(torch.cat([torch.ones(L), torch.zeros(self.set_L - L)], dim=0)) 130 | ## append sequence tokens & masks for structures 131 | tokens_batch.append(tokens) 132 | attention_masks_batch.append(attention_masks) 133 | else: 134 | lr_batch = None 135 | ens_attention_mask_batch = None 136 | alpha_batch = None 137 | tokens_batch.append(tokens) 138 | attention_masks_batch.append(attention_masks) 139 | 140 | 141 | # append batch of properties to predict 142 | prop = dataset['prop'] 143 | if train_batch and prop is not None: 144 | prop_batch.append(prop) 145 | else: 146 | train_batch = False 147 | 148 | 149 | isn_batch.append(dataset['name']) 150 | 151 | elif type(dataset['lr']) == torch_geometric.data.data.Data: 152 | 153 | L = None 154 | lr_batch.append(lrs) 155 | #padded_lr_batch = lr_batch 156 | #padded_lr_batch = torch.stack(lr_batch, dim=0) 157 | ens_attention_mask_batch = None 158 | padded_ens_attention_mask_batch = None 159 | alpha_batch = None 160 | tokens_batch.append(tokens) 161 | attention_masks_batch.append(attention_masks) 162 | prop = dataset['prop'] 163 | if train_batch and prop is not None: 164 | prop_batch.append(prop) 165 | else: 166 | train_batch = False 167 | 168 | 169 | isn_batch.append(dataset['name']) 170 | 171 | elif dataset['lr'] is None: 172 | L = None 173 | ens_attention_mask_batch = None 174 | padded_ens_attention_mask_batch = None 175 | alpha_batch = None 176 | tokens_batch.append(tokens) 177 | attention_masks_batch.append(attention_masks) 178 | prop = dataset['prop'] 179 | if train_batch and prop is not None: 180 | prop_batch.append(prop) 181 | else: 182 | train_batch = False 183 | 184 | 185 | isn_batch.append(dataset['name']) 186 | 187 | # convert lists batch to torch stacks batch 188 | if L is not None: 189 | padded_lr_batch = torch.stack(lr_batch, dim=0) 190 | padded_ens_attention_mask_batch = torch.stack(ens_attention_mask_batch, dim=0) 191 | alpha_batch = torch.stack(alpha_batch, dim=0) 192 | elif L is None and type(dataset['lr']) == torch_geometric.data.data.Data: 193 | from torch_geometric.data import Batch 194 | #padded_lr_batch = Batch.from_data_list(lr_batch) 195 | padded_lr_batch = lr_batch 196 | padded_ens_attention_mask_batch = None 197 | alpha_batch = None 198 | else: 199 | padded_lr_batch = None 200 | padded_ens_attention_mask_batch = None 201 | alpha_batch = None 202 | 203 | if tokens is not None: 204 | tokens_batch = torch.stack(tokens_batch, dim=0).squeeze(1) 205 | attention_masks_batch = torch.stack(attention_masks_batch, dim=0).squeeze(1) 206 | else: 207 | tokens_batch = None 208 | attention_masks_batch = None 209 | 210 | if train_batch: 211 | prop_batch = torch.stack(prop_batch, dim=0) 212 | 213 | return padded_lr_batch, padded_ens_attention_mask_batch, alpha_batch, tokens_batch, attention_masks_batch, prop_batch, isn_batch 214 | 215 | def get_loss(prop_preds, props, metric='L2', num_predictions=1): 216 | """ 217 | compute loss for batch instance. 218 | Args: 219 | prop_preds: tensor (B, 1), batch property predictions from ALEF 220 | props: tensor (B, 1), batch properties 221 | Returns: 222 | loss: tensor (B), loss metric for antibody property prediction. 223 | """ 224 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 225 | props = props.to(device) 226 | prop_preds = prop_preds.to(device) 227 | 228 | if metric == 'L1': 229 | loss = torch.nn.L1Loss().to(device) 230 | l1s = loss(props, prop_preds) 231 | return l1s 232 | 233 | if metric == 'L2': 234 | loss = torch.nn.MSELoss().to(device) 235 | l2s = loss(props, prop_preds) 236 | return l2s 237 | 238 | if metric == 'L3': 239 | loss = torch.nn.MSELoss().to(device) 240 | l3s = torch.sqrt(loss(props, prop_preds)) 241 | return l3s 242 | 243 | if metric == 'focal': 244 | if num_predictions > 2: 245 | # not sure if this works for multi-class but close enough for now 246 | alpha = torch.tensor([1/num_predictions]*num_predictions) 247 | alpha = alpha.float() 248 | props = props.to(torch.int64) 249 | loss = FocalLoss(alpha = alpha, gamma = 2.0, reduction = 'mean').to(device) 250 | focal_loss = loss(prop_preds, props) 251 | elif num_predictions == 1: 252 | alpha = 0.5 253 | props = props.to(torch.int64) 254 | loss = BinaryFocalLossWithLogits(alpha = alpha, gamma = 2.0, reduction = 'mean').to(device) 255 | focal_loss = loss(prop_preds, props) 256 | return focal_loss 257 | if metric == 'BCE': 258 | loss = torch.nn.BCEWithLogitsLoss().to(device) 259 | focal_loss = loss(prop_preds, props) 260 | return focal_loss 261 | 262 | 263 | 264 | def get_inverse_sqrt_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps, last_epoch=-1): 265 | """ Create a schedule with a learning rate that linearly increases from 0 until the configured 266 | learning rates (lr_coder, lr_transformer, lr_lang). Thereafter we decay proportional to the inverse square root of 267 | the number of epochs. Scheduler implemented in BERT. 268 | 269 | Sources: 270 | https://github.com/jcyk/copyisallyouneed/blob/master/optim.py 271 | """ 272 | 273 | def lr_lambda(current_step): 274 | current_step = max(1, current_step) 275 | return min(float(current_step)**-0.5, float(current_step)*(float(num_warmup_steps)**-1.5)) 276 | 277 | return LambdaLR(optimizer, lr_lambda, last_epoch) 278 | 279 | --------------------------------------------------------------------------------