├── LICENSE ├── README.md ├── configs └── bbbp │ └── bbbp.yaml ├── data └── bbbp │ ├── processed │ ├── bbbp.pt │ ├── pre_filter.pt │ ├── pre_transform.pt │ ├── train_valid_test_bbbp_seed_2021.ckpt │ ├── train_valid_test_bbbp_seed_2022.ckpt │ ├── train_valid_test_bbbp_seed_2023.ckpt │ ├── train_valid_test_bbbp_seed_2024.ckpt │ ├── train_valid_test_bbbp_seed_2025.ckpt │ ├── train_valid_test_bbbp_seed_2026.ckpt │ ├── train_valid_test_bbbp_seed_2027.ckpt │ ├── train_valid_test_bbbp_seed_2028.ckpt │ ├── train_valid_test_bbbp_seed_2029.ckpt │ └── train_valid_test_bbbp_seed_2030.ckpt │ └── raw │ └── bbbp.csv ├── dataset ├── bace.csv ├── bbbp.csv ├── clintox.csv ├── delaney.csv ├── freesolv.csv ├── hiv.csv ├── lipo.csv ├── muv.csv ├── sider.csv ├── tox21.csv └── toxcast.csv ├── example ├── auc_bace.ipynb ├── count_brics.ipynb ├── edge_tox21.ipynb ├── edge_tox21_gat.ipynb ├── fa_lipo.ipynb ├── fa_tox21.ipynb ├── interpretation_bace.ipynb ├── interpretation_bbbp.ipynb ├── rmse_esol.ipynb └── visualization_bbbp.ipynb ├── hignn.png ├── requirements.txt ├── source ├── config.py ├── cross_validate.py ├── dataset.py ├── loss.py ├── model.py ├── train.py └── utils.py └── test ├── best_bbbp_seed2021_random.yaml ├── logs └── bbbp_seed2021_random_2022-02-23.log └── round_22 ├── fold_0 ├── checkpoints │ └── best_ckpt.pth └── tensorboard │ ├── events.out.tfevents.1645630520.node03.247884.1050 │ ├── scalar_auc_train_auc │ └── events.out.tfevents.1645630521.node03.247884.1051 │ ├── scalar_auc_valid_auc │ └── events.out.tfevents.1645630521.node03.247884.1052 │ ├── scalar_loss_train_loss │ └── events.out.tfevents.1645630521.node03.247884.1053 │ └── scalar_loss_valid_loss │ └── events.out.tfevents.1645630521.node03.247884.1054 ├── fold_1 ├── checkpoints │ └── best_ckpt.pth └── tensorboard │ ├── events.out.tfevents.1645630605.node03.247884.1055 │ ├── scalar_auc_train_auc │ └── events.out.tfevents.1645630606.node03.247884.1056 │ ├── scalar_auc_valid_auc │ └── events.out.tfevents.1645630606.node03.247884.1057 │ ├── scalar_loss_train_loss │ └── events.out.tfevents.1645630606.node03.247884.1058 │ └── scalar_loss_valid_loss │ └── events.out.tfevents.1645630606.node03.247884.1059 ├── fold_2 ├── checkpoints │ └── best_ckpt.pth └── tensorboard │ ├── events.out.tfevents.1645630669.node03.247884.1060 │ ├── scalar_auc_train_auc │ └── events.out.tfevents.1645630670.node03.247884.1061 │ ├── scalar_auc_valid_auc │ └── events.out.tfevents.1645630670.node03.247884.1062 │ ├── scalar_loss_train_loss │ └── events.out.tfevents.1645630670.node03.247884.1063 │ └── scalar_loss_valid_loss │ └── events.out.tfevents.1645630670.node03.247884.1064 ├── fold_3 ├── checkpoints │ └── best_ckpt.pth └── tensorboard │ ├── events.out.tfevents.1645630751.node03.247884.1065 │ ├── scalar_auc_train_auc │ └── events.out.tfevents.1645630752.node03.247884.1066 │ ├── scalar_auc_valid_auc │ └── events.out.tfevents.1645630752.node03.247884.1067 │ ├── scalar_loss_train_loss │ └── events.out.tfevents.1645630752.node03.247884.1068 │ └── scalar_loss_valid_loss │ └── events.out.tfevents.1645630752.node03.247884.1069 ├── fold_4 ├── checkpoints │ └── best_ckpt.pth └── tensorboard │ ├── events.out.tfevents.1645630846.node03.247884.1070 │ ├── scalar_auc_train_auc │ └── events.out.tfevents.1645630847.node03.247884.1071 │ ├── scalar_auc_valid_auc │ └── events.out.tfevents.1645630847.node03.247884.1072 │ ├── scalar_loss_train_loss │ └── events.out.tfevents.1645630847.node03.247884.1073 │ └── scalar_loss_valid_loss │ └── events.out.tfevents.1645630847.node03.247884.1074 ├── fold_5 ├── checkpoints │ └── best_ckpt.pth └── tensorboard │ ├── events.out.tfevents.1645630956.node03.247884.1075 │ ├── scalar_auc_train_auc │ └── events.out.tfevents.1645630957.node03.247884.1076 │ ├── scalar_auc_valid_auc │ └── events.out.tfevents.1645630957.node03.247884.1077 │ ├── scalar_loss_train_loss │ └── events.out.tfevents.1645630957.node03.247884.1078 │ └── scalar_loss_valid_loss │ └── events.out.tfevents.1645630957.node03.247884.1079 ├── fold_6 ├── checkpoints │ └── best_ckpt.pth └── tensorboard │ ├── events.out.tfevents.1645631062.node03.247884.1080 │ ├── scalar_auc_train_auc │ └── events.out.tfevents.1645631063.node03.247884.1081 │ ├── scalar_auc_valid_auc │ └── events.out.tfevents.1645631063.node03.247884.1082 │ ├── scalar_loss_train_loss │ └── events.out.tfevents.1645631063.node03.247884.1083 │ └── scalar_loss_valid_loss │ └── events.out.tfevents.1645631063.node03.247884.1084 ├── fold_7 ├── checkpoints │ └── best_ckpt.pth └── tensorboard │ ├── events.out.tfevents.1645631187.node03.247884.1085 │ ├── scalar_auc_train_auc │ └── events.out.tfevents.1645631188.node03.247884.1086 │ ├── scalar_auc_valid_auc │ └── events.out.tfevents.1645631188.node03.247884.1087 │ ├── scalar_loss_train_loss │ └── events.out.tfevents.1645631188.node03.247884.1088 │ └── scalar_loss_valid_loss │ └── events.out.tfevents.1645631188.node03.247884.1089 ├── fold_8 ├── checkpoints │ └── best_ckpt.pth └── tensorboard │ ├── events.out.tfevents.1645631304.node03.247884.1090 │ ├── scalar_auc_train_auc │ └── events.out.tfevents.1645631305.node03.247884.1091 │ ├── scalar_auc_valid_auc │ └── events.out.tfevents.1645631305.node03.247884.1092 │ ├── scalar_loss_train_loss │ └── events.out.tfevents.1645631305.node03.247884.1093 │ └── scalar_loss_valid_loss │ └── events.out.tfevents.1645631305.node03.247884.1094 └── fold_9 ├── checkpoints └── best_ckpt.pth └── tensorboard ├── events.out.tfevents.1645631374.node03.247884.1095 ├── scalar_auc_train_auc └── events.out.tfevents.1645631375.node03.247884.1096 ├── scalar_auc_valid_auc └── events.out.tfevents.1645631375.node03.247884.1097 ├── scalar_loss_train_loss └── events.out.tfevents.1645631375.node03.247884.1098 └── scalar_loss_valid_loss └── events.out.tfevents.1645631375.node03.247884.1099 /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 idrugLab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | HiGNN is a well-designed hierarchical and interactive informative graph neural networks framework for predicting molecular property by utilizing a co-representation learning of molecular graphs and chemically synthesizable BRICS fragments. Meanwhile, a plug-and-play feature-wise attention block was first designed in HiGNN architecture to adaptively recalibrate atomic features after message passing phase. [HiGNN](https://pubs.acs.org/doi/10.1021/acs.jcim.2c01099) has been accepted for publication in [Journal of Chemical Information and Modeling](https://pubs.acs.org/journal/jcisd8/). 3 | ![overview](https://github.com/idrugLab/hignn/blob/main/hignn.png) 4 | Fig.1 The overview of HiGNN 5 | 6 | ## Requirements 7 | This project is developed using python 3.7.10, and mainly requires the following libraries. 8 | ```txt 9 | rdkit==2021.03.1 10 | scikit_learn==1.1.1 11 | torch==1.7.1+cu101 12 | torch_geometric==1.7.1 13 | torch_scatter==2.0.7 14 | ``` 15 | To install [requirements](https://github.com/idrugLab/hignn/blob/main/requirements.txt): 16 | ```txt 17 | pip install -r requirements.txt 18 | ``` 19 | 20 | ## Usage 21 | ### File description 22 | 1. `source:` the source code for HiGNN. 23 | - `config.py` 24 | - `datasets.py` 25 | - `utils.py` 26 | - `model.py` 27 | - `loss.py` 28 | - `train.py` 29 | - `cross_validate.py` 30 | 2. `configs:` HiGNN used [yacs](https://github.com/rbgirshick/yacs) for experimental configuration, where you can customize the relevant hyperparameters for each experiment with yaml file. 31 | 3. `data:` the dataset for training. 32 | - `raw:` where to store the original csv dataset. 33 | - `processed:` the dataset objects generated by [PyG](https://pytorch-geometric.readthedocs.io/en/latest/notes/create_dataset.html). 34 | 4. `test:` where the training logs, checkpoints and tensorboards are saved. 35 | 5. `example:` jupyter notebook codes for fragments counting, t-SNE visualization, interpretation and so on. 36 | 6. `dataset:` 11 real-world drug-discovery-related datasets used in this study. 37 | 38 | ### Training example 39 | Taking the BBBP dataset as an example, experiment can be run via: 40 | ```shell 41 | git clone https://github.com/idruglab/hignn 42 | cd ./hignn 43 | 44 | # For one random seed 45 | python ./source/train.py --cfg ./configs/bbbp/bbbp.yaml --opts 'SEED' 2022 'MODEL.BRICS' True 'MODEL.F_ATT' True --tag seed_2022 46 | 47 | # For 10 different random seeds (2021~2030) 48 | python ./source/cross_validate.py --cfg ./configs/bbbp/bbbp.yaml --opts 'MODEL.BRICS' True 'MODEL.F_ATT' True 'HYPER' False --tag 10_seeds 49 | 50 | # For hyperparameters optimization 51 | python ./source/cross_validate.py --cfg ./configs/bbbp/bbbp.yaml --opts 'MODEL.BRICS' True 'MODEL.F_ATT' True --tag hignn # HiGNN 52 | python ./source/cross_validate.py --cfg ./configs/bbbp/bbbp.yaml --opts 'MODEL.F_ATT' True --tag w/o_hi # the variant (w/o HI) 53 | python ./source/cross_validate.py --cfg ./configs/bbbp/bbbp.yaml --opts 'MODEL.BRICS' True --tag w/o_fa # the variant (w/o FA) 54 | python ./source/cross_validate.py --cfg ./configs/bbbp/bbbp.yaml --tag vanilla # the variant (w/o All) 55 | ``` 56 | And more hyperparameter details can be found in [config.py](https://github.com/idrugLab/hignn/blob/main/source/config.py "config.py"). 57 | 58 | ### Interpretation 59 | The interpretability of HiGNN can refer to [interpretation_bace](https://github.com/idrugLab/hignn/blob/main/example/interpretation_bace.ipynb "interpretation_bace.ipynb") and [interpretation_bbbp](https://github.com/idrugLab/hignn/blob/main/example/interpretation_bbbp.ipynb "interpretation_bbbp.ipynb"). 60 | 61 | ## Data 62 | - The datasets used in this study are available in [dataset](https://github.com/idrugLab/hignn/blob/main/dataset) or [MoleculeNet](https://moleculenet.org/). 63 | - The training logs, checkpoints, and tensorboards for each dataset can be found in [BaiduNetdisk](https://pan.baidu.com/s/1NDDrsjWuL_5PhOeSD7RM5w?pwd=scut). 64 | ## Results 65 | In the present study, we evaluated the proposed HiGNN model on 11 commonly used and publicly available drug discovery-related datasets from [Wu et al.](https://pubs.rsc.org/en/content/articlelanding/2018/SC/C7SC02664A), including classification and regression tasks. According to previous studies, 14 learning tasks were designed based on 11 benchmark datasets, including 11 classification tasks based random- and scaffold-splitting methods and three regression tasks based on random-splitting method. 66 | 67 | Table 1 Predictive performance results of HiGNN on the drug discovery-related benchmark datasets. 68 | 69 | | Dataset | Split Type | Metric | Chemprop | GCN | GAT | Attentive FP | HRGCN+ | XGBoost | HiGNN | 70 | |---------|------------|---------|----------|-------|-------|--------------|--------|---------|---------| 71 | | BACE | random | ROC-AUC | **0.898** | **0.898** | 0.886 | 0.876 | 0.891 | 0.889 | 0.890 | 72 | | | scaffold | ROC-AUC | 0.857 | | | | | | **0.882** | 73 | | HIV | random | ROC-AUC | 0.827 | **0.834** | 0.826 | 0.822 | 0.824 | 0.816 | 0.816 | 74 | | | scaffold | ROC-AUC | 0.794 | | | | | | **0.802** | 75 | | MUV | random | PRC-AUC | 0.053 | 0.061 | 0.057 | 0.038 | 0.082 | 0.068 | **0.186** | 76 | | Tox21 | random | ROC-AUC | 0.854 | 0.836 | 0.835 | 0.852 | 0.848 | 0.836 | **0.856** | 77 | | ToxCast | random | ROC-AUC | 0.764 | 0.770 | 0.768 | **0.794** | 0.793 | 0.774 | 0.781 | 78 | | BBBP | random | ROC-AUC | 0.917 | 0.903 | 0.898 | 0.887 | 0.926 | 0.926 | **0.932** | 79 | | | scaffold | ROC-AUC | 0.886 | | | | | | **0.927** | 80 | | ClinTox | random | ROC-AUC | 0.897 | 0.895 | 0.888 | 0.904 | 0.899 | 0.911 | **0.930** | 81 | | SIDER | random | ROC-AUC | **0.658** | 0.634 | 0.627 | 0.623 | 0.641 | 0.642 | 0.651 | 82 | | FreeSolv | random | RMSE | 1.009 | 1.149 | 1.304 | 1.091 | 0.926 | 1.025 | **0.915** | 83 | | ESOL | random | RMSE | 0.587 | 0.708 | 0.658 | 0.587 | 0.563 | 0.582 | **0.532** | 84 | | Lipo | random | RMSE | 0.563 | 0.664 | 0.683 | 0.553 | 0.603 | 0.574 | **0.549** | 85 | 86 | ## Acknowledgments 87 | The code was partly built based on [chemprop](https://github.com/chemprop/chemprop), [TrimNet](https://github.com/yvquanli/trimnet) and [Swin Transformer](https://github.com/microsoft/Swin-Transformer). Thanks a lot for their open source codes! 88 | 89 | -------------------------------------------------------------------------------- /configs/bbbp/bbbp.yaml: -------------------------------------------------------------------------------- 1 | OUTPUT_DIR: './test/bbbp' 2 | TAG: 'default' 3 | SEED: 2021 4 | NUM_FOLDS: 10 5 | HYPER: True 6 | HYPER_REMOVE: None 7 | NUM_ITERS: 30 8 | 9 | 10 | DATA: 11 | BATCH_SIZE: 64 12 | DATASET: 'bbbp' 13 | DATA_PATH: './data/bbbp/' 14 | TASK_TYPE: 'classification' 15 | METRIC: 'auc' 16 | SPLIT_TYPE: 'random' 17 | 18 | 19 | MODEL: 20 | F_ATT: False 21 | BRICS: False 22 | 23 | 24 | LOSS: 25 | FL_LOSS: False 26 | CL_LOSS: False 27 | 28 | 29 | TRAIN: 30 | EARLY_STOP: 50 31 | MAX_EPOCHS: 200 32 | OPTIMIZER: 33 | TYPE: 'adam' 34 | LR_SCHEDULER: 35 | TYPE: 'reduce' 36 | -------------------------------------------------------------------------------- /data/bbbp/processed/bbbp.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/data/bbbp/processed/bbbp.pt -------------------------------------------------------------------------------- /data/bbbp/processed/pre_filter.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/data/bbbp/processed/pre_filter.pt -------------------------------------------------------------------------------- /data/bbbp/processed/pre_transform.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/data/bbbp/processed/pre_transform.pt -------------------------------------------------------------------------------- /data/bbbp/processed/train_valid_test_bbbp_seed_2021.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/data/bbbp/processed/train_valid_test_bbbp_seed_2021.ckpt -------------------------------------------------------------------------------- /data/bbbp/processed/train_valid_test_bbbp_seed_2022.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/data/bbbp/processed/train_valid_test_bbbp_seed_2022.ckpt -------------------------------------------------------------------------------- /data/bbbp/processed/train_valid_test_bbbp_seed_2023.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/data/bbbp/processed/train_valid_test_bbbp_seed_2023.ckpt -------------------------------------------------------------------------------- /data/bbbp/processed/train_valid_test_bbbp_seed_2024.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/data/bbbp/processed/train_valid_test_bbbp_seed_2024.ckpt -------------------------------------------------------------------------------- /data/bbbp/processed/train_valid_test_bbbp_seed_2025.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/data/bbbp/processed/train_valid_test_bbbp_seed_2025.ckpt -------------------------------------------------------------------------------- /data/bbbp/processed/train_valid_test_bbbp_seed_2026.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/data/bbbp/processed/train_valid_test_bbbp_seed_2026.ckpt -------------------------------------------------------------------------------- /data/bbbp/processed/train_valid_test_bbbp_seed_2027.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/data/bbbp/processed/train_valid_test_bbbp_seed_2027.ckpt -------------------------------------------------------------------------------- /data/bbbp/processed/train_valid_test_bbbp_seed_2028.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/data/bbbp/processed/train_valid_test_bbbp_seed_2028.ckpt -------------------------------------------------------------------------------- /data/bbbp/processed/train_valid_test_bbbp_seed_2029.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/data/bbbp/processed/train_valid_test_bbbp_seed_2029.ckpt -------------------------------------------------------------------------------- /data/bbbp/processed/train_valid_test_bbbp_seed_2030.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/data/bbbp/processed/train_valid_test_bbbp_seed_2030.ckpt -------------------------------------------------------------------------------- /dataset/delaney.csv: -------------------------------------------------------------------------------- 1 | smiles,logSolubility 2 | OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)C(O)C3O,-0.77 3 | Cc1occc1C(=O)Nc2ccccc2,-3.3 4 | CC(C)=CCCC(C)=CC(=O),-2.06 5 | c1ccc2c(c1)ccc3c2ccc4c5ccccc5ccc43,-7.87 6 | c1ccsc1,-1.33 7 | c2ccc1scnc1c2,-1.5 8 | Clc1cc(Cl)c(c(Cl)c1)c2c(Cl)cccc2Cl,-7.32 9 | CC12CCC3C(CCc4cc(O)ccc34)C2CCC1O,-5.03 10 | ClC4=C(Cl)C5(Cl)C3C1CC(C2OC12)C3C4(Cl)C5(Cl)Cl,-6.29 11 | COc5cc4OCC3Oc2c1CC(Oc1ccc2C(=O)C3c4cc5OC)C(C)=C,-4.42 12 | O=C1CCCN1,1.07 13 | Clc1ccc2ccccc2c1,-4.14 14 | CCCC=C,-2.68 15 | CCC1(C(=O)NCNC1=O)c2ccccc2,-2.64 16 | CCCCCCCCCCCCCC,-7.96 17 | CC(C)Cl,-1.41 18 | CCC(C)CO,-0.47 19 | N#Cc1ccccc1,-1 20 | CCOP(=S)(OCC)Oc1cc(C)nc(n1)C(C)C,-3.64 21 | CCCCCCCCCC(C)O,-2.94 22 | Clc1ccc(c(Cl)c1)c2c(Cl)ccc(Cl)c2Cl,-7.43 23 | O=c2[nH]c1CCCc1c(=O)n2C3CCCCC3,-4.593999999999999 24 | CCOP(=S)(OCC)SCSCC,-4.11 25 | CCOc1ccc(NC(=O)C)cc1,-2.35 26 | CCN(CC)c1c(cc(c(N)c1N(=O)=O)C(F)(F)F)N(=O)=O,-5.47 27 | CCCCCCCO,-1.81 28 | Cn1c(=O)n(C)c2nc[nH]c2c1=O,-1.39 29 | CCCCC1(CC)C(=O)NC(=O)NC1=O,-1.661 30 | ClC(Cl)=C(c1ccc(Cl)cc1)c2ccc(Cl)cc2,-6.9 31 | CCCCCCCC(=O)OC,-3.17 32 | CCc1ccc(CC)cc1,-3.75 33 | CCOP(=S)(OCC)SCSC(C)(C)C,-4.755 34 | COC(=O)Nc1cccc(OC(=O)Nc2cccc(C)c2)c1,-4.805 35 | ClC(=C)Cl,-1.64 36 | Cc1cccc2c1Cc3ccccc32,-5.22 37 | CCCCC=O,-0.85 38 | N(c1ccccc1)c2ccccc2,-3.5039999999999996 39 | CN(C)C(=O)SCCCCOc1ccccc1,-3.927 40 | CCCOP(=S)(OCCC)SCC(=O)N1CCCCC1C,-4.15 41 | CCCCCCCI,-4.81 42 | c1c(Cl)cccc1c2ccccc2,-4.88 43 | OCCCC=C,-0.15 44 | O=C2NC(=O)C1(CCC1)C(=O)N2,-1.655 45 | CC(C)C1CCC(C)CC1O,-2.53 46 | CC(C)OC=O,-0.63 47 | CCCCCC(C)O,-1.55 48 | CC(=O)Nc1ccc(Br)cc1,-3.083 49 | c1ccccc1n2ncc(N)c(Br)c2(=O),-3.127 50 | COC(=O)C1=C(C)NC(=C(C1c2ccccc2N(=O)=O)C(=O)OC)C,-4.76 51 | c2c(C)cc1nc(C)ccc1c2,-1.94 52 | CCCCCCC#C,-3.66 53 | CCC1(C(=O)NC(=O)NC1=O)C2=CCCCC2,-2.17 54 | c1ccc2c(c1)ccc3c4ccccc4ccc23,-8.057 55 | CCC(C)n1c(=O)[nH]c(C)c(Br)c1=O,-2.523 56 | Clc1cccc(c1Cl)c2c(Cl)c(Cl)cc(Cl)c2Cl,-8.6 57 | Cc1ccccc1O,-0.62 58 | CC(C)CCC(C)(C)C,-5.05 59 | Cc1ccc(C)c2ccccc12,-4.14 60 | Cc1cc2c3ccccc3ccc2c4ccccc14,-6.57 61 | CCCC(=O)C,-0.19 62 | Clc1cc(Cl)c(Cl)c(c1Cl)c2c(Cl)c(Cl)cc(Cl)c2Cl,-9.15 63 | CCCOC(=O)CC,-0.82 64 | CC34CC(O)C1(F)C(CCC2=CC(=O)C=CC12C)C3CC(O)C4(O)C(=O)CO,-3.68 65 | Nc1ccc(O)cc1,-0.8 66 | O=C(Cn1ccnc1N(=O)=O)NCc2ccccc2,-2.81 67 | OC4=C(C1CCC(CC1)c2ccc(Cl)cc2)C(=O)c3ccccc3C4=O,-5.931 68 | CCNc1nc(Cl)nc(n1)N(CC)CC,-4.06 69 | NC(=O)c1cnccn1,-0.667 70 | CCC(Br)(CC)C(=O)NC(N)=O,-2.68 71 | Clc1ccccc1c2ccccc2Cl,-5.27 72 | O=C2CN(N=Cc1ccc(o1)N(=O)=O)C(=O)N2,-3.38 73 | Clc2ccc(Oc1ccc(cc1)N(=O)=O)c(Cl)c2,-5.46 74 | CC1(C)C2CCC1(C)C(=O)C2,-1.96 75 | O=C1NC(=O)NC(=O)C1(CC=C)c1ccccc1,-2.369 76 | CCCCC(=O)OCC,-2.25 77 | CC(C)CCOC(=O)C,-1.92 78 | O=C1N(COC(=O)CCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3,-5.886 79 | Clc1cccc(c1)c2cc(Cl)ccc2Cl,-6.01 80 | CCCBr,-1.73 81 | CCCC1COC(Cn2cncn2)(O1)c3ccc(Cl)cc3Cl,-3.4930000000000003 82 | COP(=S)(OC)SCC(=O)N(C)C=O,-1.995 83 | Cc1ncnc2nccnc12,-0.466 84 | NC(=S)N,0.32 85 | Cc1ccc(C)cc1,-2.77 86 | CCc1ccccc1CC,-3.28 87 | ClC(Cl)(Cl)C(Cl)(Cl)Cl,-3.67 88 | CC(C)C(C(=O)OC(C#N)c1cccc(Oc2ccccc2)c1)c3ccc(OC(F)F)cc3,-6.876 89 | CCCN(=O)=O,-0.8 90 | CC(C)C1CCC(C)CC1=O,-2.35 91 | CCN2c1cc(Cl)ccc1NC(=O)c3cccnc23,-5.36 92 | O=N(=O)c1c(Cl)c(Cl)ccc1,-3.48 93 | CCCC(C)C1(CC=C)C(=O)NC(=S)NC1=O,-3.46 94 | c1ccc2c(c1)c3cccc4cccc2c34,-6 95 | CCCOC(C)C,-1.34 96 | Cc1cc(C)c2ccccc2c1,-4.29 97 | CCC(=C(CC)c1ccc(O)cc1)c2ccc(O)cc2,-4.07 98 | c1(C#N)c(Cl)c(C#N)c(Cl)c(Cl)c(Cl)1,-5.64 99 | Clc1ccc(Cl)c(c1)c2ccc(Cl)c(Cl)c2,-7.25 100 | C1OC1c2ccccc2,-1.6 101 | CC(C)c1ccccc1,-3.27 102 | CC12CCC3C(CCC4=CC(=O)CCC34C)C2CCC1C(=O)CO,-3.45 103 | c2(Cl)c(Cl)c(Cl)c1nccnc1c2(Cl),-5.43 104 | C1OC(O)C(O)C(O)C1O,0.39 105 | ClCCl,-0.63 106 | CCc1cccc2ccccc12,-4.17 107 | COC=O,0.58 108 | Oc1ccccc1N(=O)=O,-1.74 109 | Cc1c[nH]c(=O)[nH]c1=O,-1.506 110 | CC(C)C,-2.55 111 | OCC1OC(C(O)C1O)n2cnc3c(O)ncnc23,-1.23 112 | Oc1c(I)cc(C#N)cc1I,-3.61 113 | Oc1ccc(Cl)cc1C(=O)Nc2ccc(cc2Cl)N(=O)=O,-4.7 114 | CCCCC,-3.18 115 | c1ccccc1O,0 116 | Nc3ccc2cc1ccccc1cc2c3,-5.17 117 | Cn1cnc2n(C)c(=O)[nH]c(=O)c12,-2.523 118 | c1ccc2cnccc2c1,-1.45 119 | COP(=S)(OC)SCC(=O)N(C(C)C)c1ccc(Cl)cc1,-4.4319999999999995 120 | CCCCCCc1ccccc1,-5.21 121 | Clc1ccccc1c2ccccc2,-4.54 122 | CCCC(=C)C,-3.03 123 | CC(C)C(C)C(C)C,-4.8 124 | Clc1cc(Cl)c(Cl)c(Cl)c1Cl,-5.65 125 | Oc1cccc(c1)N(=O)=O,-1.01 126 | CCCCCCCCC=C,-5.51 127 | CC(=O)OCC(COC(=O)C)OC(=O)C,-0.6 128 | CCCCc1c(C)nc(nc1O)N(C)C,-2.24 129 | CC1(C)C(C=C(Cl)Cl)C1C(=O)OC(C#N)c2ccc(F)c(Oc3ccccc3)c2,-7.337000000000001 130 | c1ccncc1,0.76 131 | CCCCCCCBr,-4.43 132 | Cc1ccncc1C,0.36 133 | CC34CC(O)C1(F)C(CCC2=CC(=O)CCC12C)C3CCC4(O)C(=O)CO,-3.43 134 | CCSCc1ccccc1OC(=O)NC,-2.09 135 | CCOC(=O)CC(=O)OCC,-0.82 136 | CC1=CCC(CC1)C(C)=C,-4.26 137 | C1Cc2ccccc2C1,-3.04 138 | CC(C)(C)c1ccc(O)cc1,-2.41 139 | O=C2NC(=O)C1(CC1)C(=O)N2,-1.886 140 | Clc1cccc(I)c1,-3.55 141 | Brc1cccc2ccccc12,-4.35 142 | CC/C=C/C,-2.54 143 | Cc1cccc(C)n1,0.45 144 | ClC=C(Cl)Cl,-1.96 145 | Nc1cccc2ccccc12,-1.92 146 | Cc1cccc(C)c1,-2.82 147 | Oc2ncc1nccnc1n2,-1.9469999999999998 148 | CO,1.57 149 | CCC1(CCC(C)C)C(=O)NC(=O)NC1=O,-2.468 150 | CCC(=O)C,0.52 151 | Fc1c[nH]c(=O)[nH]c1=O,-1.077 152 | Nc1ncnc2n(ccc12)C3OC(CO)C(O)C3O,-1.95 153 | Oc1cccc(O)c1,0.81 154 | CCCCCCO,-1.24 155 | CCCCCCl,-2.73 156 | C=CC=C,-1.87 157 | CCCOC(=O)C,-0.72 158 | Oc2ccc1CCCCc1c2,-1.99 159 | NC(=O)CCl,-0.02 160 | COP(=S)(OC)Oc1cc(Cl)c(I)cc1Cl,-6.62 161 | Cc1ccc(Cl)cc1,-3.08 162 | CSc1nnc(c(=O)n1N)C(C)(C)C,-2.253 163 | Cc1ccc(OP(=O)(Oc2cccc(C)c2)Oc3ccccc3C)cc1,-6.01 164 | CCCCCC=O,-1.3 165 | CCCCOC(=O)c1ccc(N)cc1,-3.082 166 | O2c1cc(C)ccc1N(C)C(=O)c3cc(N)cnc23,-3.043 167 | CC(C)=CCC/C(C)=C\CO,-2.46 168 | Clc1ccc(cc1)c2ccccc2Cl,-5.28 169 | O=C1N(COC(=O)CCCCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3,-6.523 170 | CCN(=O)=O,-0.22 171 | CCN(CC(C)=C)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O,-6.124 172 | Clc1ccc(Cl)c(Cl)c1Cl,-4.57 173 | CCCC(C)(COC(N)=O)COC(N)=O,-1.807 174 | CC(=O)C3CCC4C2CC=C1CC(O)CCC1(C)C2CCC34C,-4.65 175 | CI,-1 176 | CC1CC(C)C(=O)C(C1)C(O)CC2CC(=O)NC(=O)C2,-1.13 177 | O=C1N(COC(=O)CCCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3,-6.301 178 | CC1=CC(=O)CC(C)(C)C1,-1.06 179 | O=C1NC(=O)NC(=O)C1(CC)C(C)CC,-2.39 180 | CCCCC(=O)CCCC,-2.58 181 | CCC1(CCC(=O)NC1=O)c2ccccc2,-2.3369999999999997 182 | CCC(C)CC,-3.68 183 | CCOc1ccc(cc1)C(C)(C)COCc3cccc(Oc2ccccc2)c3,-8.6 184 | Cc1ccccc1n3c(C)nc2ccccc2c3=O,-2.925 185 | ClCC#N,-0.092 186 | CCOP(=S)(CC)Oc1cc(Cl)c(Cl)cc1Cl,-5.752000000000001 187 | CC12CCC(=O)C=C1CCC3C2CCC4(C)C3CCC4(O)C#C,-5.66 188 | c1ccnnc1,1.1 189 | Clc1cc(Cl)c(Cl)c(Cl)c1,-4.63 190 | C1C(O)CCC2(C)CC3CCC4(C)C5(C)CC6OCC(C)CC6OC5CC4C3C=C21,-7.32 191 | Nc1ccccc1O,-0.72 192 | CCCCCCCCC(=O)OCC,-3.8 193 | COCC(=O)N(C(C)C(=O)OC)c1c(C)cccc1C,-1.601 194 | CNC(=O)Oc1ccccc1OC(C)C,-2.05 195 | CCC(C)Cl,-1.96 196 | Oc1ccc2ccccc2c1,-2.28 197 | CC(C)Oc1cc(c(Cl)cc1Cl)n2nc(oc2=O)C(C)(C)C,-5.696000000000001 198 | CCCCC#C,-2.36 199 | CCCCCCCC#C,-4.24 200 | Cc1ccccc1Cl,-3.52 201 | CC(C)OC(C)C,-1.1 202 | Nc1ccc(cc1)S(=O)(=O)c2ccc(N)cc2,-3.094 203 | CNN,1.34 204 | CC#C,-0.41 205 | CCOP(=S)(OCC)ON=C(C#N)c1ccccc1,-4.862 206 | CCNP(=S)(OC)OC(=CC(=O)OC(C)C)C,-3.408 207 | C=CC=O,0.57 208 | O=c1[nH]cnc2nc[nH]c12,-2.296 209 | Oc2ccc1ncccc1c2,-2.16 210 | Fc1ccccc1,-1.8 211 | CCCCl,-1.47 212 | CCOC(=O)C,-0.04 213 | CCCC(C)(C)C,-4.36 214 | Cc1cc(C)c(C)c(C)c1C,-4 215 | CC12CCC(CC1)C(C)(C)O2,-1.64 216 | CCCCOC(=O)CCCCCCCCC(=O)OCCCC,-3.8960000000000004 217 | Clc1ccc(cc1)c2ccc(Cl)cc2,-6.56 218 | Cc1cccnc1C,0.38 219 | CC(=C)C1CC=C(C)C(=O)C1,-2.06 220 | CCOP(=S)(OCC)SCSc1ccc(Cl)cc1,-5.736000000000001 221 | COc1cc(cc(OC)c1O)C6C2C(COC2=O)C(OC4OC3COC(C)OC3C(O)C4O)c7cc5OCOc5cc67,-3.571 222 | c1cc2cccc3c4cccc5cccc(c(c1)c23)c54,-8.804 223 | Cc1ccc(cc1N(=O)=O)N(=O)=O,-2.82 224 | c1c(Br)ccc2ccccc12,-4.4 225 | CNC(=O)Oc1cccc(N=CN(C)C)c1,-2.34 226 | COc2cnc1ncncc1n2,-1.139 227 | Cc3ccnc4N(C1CC1)c2ncccc2C(=O)Nc34,-3.19 228 | CCOP(=S)(OCC)Oc1nc(Cl)n(n1)C(C)C,-3.658 229 | CC(=C)C=C,-2.03 230 | CC(C)=CCCC(O)(C)C=C,-1.99 231 | COP(=S)(OC)Oc1ccc(SC)c(C)c1,-4.57 232 | OC1CCCCC1,-0.44 233 | O=C1NC(=O)NC(=O)C1(C)CC=C,-1.16 234 | CC34CCC1C(CCC2CC(O)CCC12C)C3CCC4=O,-4.16 235 | OCC(O)C(O)C(O)C(O)CO,0.06 236 | Cc1ccc(cc1)c2ccccc2,-4.62 237 | CCNc1nc(Cl)nc(NC(C)C)n1,-3.85 238 | NC(=S)Nc1ccccc1,-1.77 239 | CCCC(=O)CCC,-1.3 240 | CC(=O)C(C)(C)C,-0.72 241 | Oc1ccc(Cl)cc1,-0.7 242 | O=C1CCCCC1,-0.6 243 | Cc1cccc(N)c1,-0.85 244 | ClC(Cl)(Cl)C#N,-2.168 245 | CNc2cnn(c1cccc(c1)C(F)(F)F)c(=O)c2Cl,-4.046 246 | CCCCCCCCC(=O)C,-3.3 247 | CCN(CC)c1nc(Cl)nc(NC(C)C)n1,-3.785 248 | CCOC(=O)c1ccc(N)cc1,-2.616 249 | Clc1ccc(Cl)c(Cl)c1,-3.59 250 | Cc3nnc4CN=C(c1ccccc1Cl)c2cc(Cl)ccc2n34,-4.09 251 | Oc1ccccc1O,0.62 252 | CCN2c1ncccc1N(C)C(=O)c3cccnc23,-2.62 253 | CSC,-0.45 254 | Cc1ccccc1Br,-2.23 255 | CCOC(=O)N,0.85 256 | CC(=O)OC3(CCC4C2C=C(C)C1=CC(=O)CCC1(C)C2CCC34C)C(C)=O,-5.35 257 | CC(C)C(O)C(C)C,-1.22 258 | c1ccc2ccccc2c1,-3.6 259 | CCNc1ccccc1,-1.7 260 | O=C1NC(=O)C(N1)(c2ccccc2)c3ccccc3,-4.0969999999999995 261 | Cc1c2ccccc2c(C)c3ccc4ccccc4c13,-7.02 262 | CCOP(=S)(OCC)SC(CCl)N1C(=O)c2ccccc2C1=O,-6.34 263 | COc1ccc(cc1)C(c2ccc(OC)cc2)C(Cl)(Cl)Cl,-6.89 264 | Fc1cccc(F)c1C(=O)NC(=O)Nc2cc(Cl)c(F)c(Cl)c2F,-7.28 265 | O=C1N(COC(=O)CCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3,-4.678 266 | CN(C)C(=O)Nc1ccc(Cl)cc1,-2.89 267 | OC(Cn1cncn1)(c2ccc(F)cc2)c3ccccc3F,-3.37 268 | CC(=O)OCC(=O)C3(O)C(CC4C2CCC1=CC(=O)C=CC1(C)C2(F)C(O)CC34C)OC(C)=O,-4.13 269 | CCCCBr,-2.37 270 | Brc1cc(Br)c(Br)cc1Br,-6.98 271 | CC(C)CC(=O)C,-0.74 272 | CCSC(=O)N(CC)C1CCCCC1,-3.4 273 | COc1ccc(Cl)cc1,-2.78 274 | CC1(C)C(C=C(Br)Br)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2,-8.402000000000001 275 | CCC(C)C1(CC=C)C(=O)NC(=O)NC1=O,-2.016 276 | COP(=S)(OC)Oc1ccc(N(=O)=O)c(C)c1,-4.04 277 | Ic1cccc2ccccc12,-4.55 278 | OCC(O)C(O)C(O)C(O)CO,1.09 279 | CCS,-0.6 280 | ClCC(Cl)Cl,-1.48 281 | CN(C)C(=O)Oc1cc(C)nn1c2ccccc2,-2.09 282 | NC(=O)c1ccccc1O,-1.82 283 | Cc1ccccc1N(=O)=O,-2.33 284 | O=C1NC(=O)NC(=O)C1(C(C)C)C(C)C,-2.766 285 | CCc1ccccc1C,-3.21 286 | CCCCCCCCl,-4 287 | O=C1NC(=O)NC(=O)C1(CC)CC,-2.4 288 | C(Cc1ccccc1)c2ccccc2,-4.62 289 | ClC(Cl)C(Cl)Cl,-1.74 290 | CCN2c1cc(OC)cc(C)c1NC(=O)c3cccnc23,-5.153 291 | Cc1ccc2c(ccc3ccccc32)c1,-5.84 292 | CCCCOC(=O)c1ccccc1C(=O)OCCCC,-4.4 293 | COc1c(O)c(Cl)c(Cl)c(Cl)c1Cl,-4.02 294 | CCN(CC)C(=O)C(=CCOP(=O)(OC)OC)Cl,0.523 295 | CC34CCC1C(=CCc2cc(O)ccc12)C3CCC4=O,-5.282 296 | CCOC(=O)c1ccccc1S(=O)(=O)NN(C=O)c2nc(Cl)cc(OC)n2,-4.5760000000000005 297 | COc1ccc(cc1)N(=O)=O,-2.41 298 | CCCCCCCl,-3.12 299 | Clc1cc(c(Cl)c(Cl)c1Cl)c2cc(Cl)c(Cl)c(Cl)c2Cl,-9.16 300 | OCC1OC(CO)(OC2OC(COC3OC(CO)C(O)C(O)C3O)C(O)C(O)C2O)C(O)C1O,-0.41 301 | CCCCCCCCCCCCCCCCCCCCCCCCCC,-8.334 302 | CCN2c1ccccc1N(C)C(=O)c3cccnc23,-3.324 303 | CC(Cl)Cl,-1.29 304 | Nc1ccc(cc1)S(N)(=O)=O,-1.34 305 | CCCN(CCC)c1c(cc(cc1N(=O)=O)C(C)C)N(=O)=O,-6.49 306 | ClC1C(Cl)C(Cl)C(Cl)C(Cl)C1Cl,-4.64 307 | CCOP(=S)(NC(C)C)Oc1ccccc1C(=O)OC(C)C,-4.194 308 | Clc1cccc(Cl)c1Cl,-4 309 | ClC(Cl)(Cl)Cl,-2.31 310 | O=N(=O)c1cc(Cl)c(Cl)cc1,-3.2 311 | OC1CCCCCCC1,-1.29 312 | CC1(O)CCC2C3CCC4=CC(=O)CCC4(C)C3CCC21C,-3.9989999999999997 313 | CCOc1ccc(NC(N)=O)cc1,-2.17 314 | C/C1CCC(\C)CC1,-4.47 315 | c1cnc2c(c1)ccc3ncccc23,-2.68 316 | COC(C)(C)C,-0.24 317 | COc1ccc(C=CC)cc1,-3.13 318 | CCCCCCCCCCCCCCCCO,-7 319 | O=c1cc[nH]c(=O)[nH]1,-1.4880000000000002 320 | Nc1ncnc2nc[nH]c12,-2.12 321 | Clc1cc(Cl)c(cc1Cl)c2cccc(Cl)c2Cl,-7.21 322 | COc1ccc(cc1)C(O)(C2CC2)c3cncnc3,-2.596 323 | c1ccc2c(c1)c3cccc4c3c2cc5ccccc54,-8.23 324 | O=C(Nc1ccccc1)Nc2ccccc2,-3.15 325 | CCC1(C(=O)NC(=O)NC1=O)c2ccccc2,-2.322 326 | Clc1ccc(cc1)c2cccc(Cl)c2Cl,-6.29 327 | CC(C)c1ccc(NC(=O)N(C)C)cc1,-3.536 328 | CCN(CC)C(=O)CSc1ccc(Cl)nn1,-1.716 329 | CCC(C)(C)CO,-1.04 330 | CCCOC(=O)CCC,-1.75 331 | Cc1c(cc(cc1N(=O)=O)N(=O)=O)N(=O)=O,-3.22 332 | CC(C)OP(=S)(OC(C)C)SCCNS(=O)(=O)c1ccccc1,-4.2 333 | C1CCCCCC1,-3.51 334 | CCCOC=O,-0.49 335 | CC(C)c1ccccc1C,-3.76 336 | Nc1cccc(Cl)c1,-1.37 337 | CC(C)CC(C)C,-4.26 338 | o1c2ccccc2c3ccccc13,-4.6 339 | CCOC2Oc1ccc(OS(C)(=O)=O)cc1C2(C)C,-3.42 340 | CN(C)C(=O)Nc1cccc(c1)C(F)(F)F,-3.43 341 | c3ccc2nc1ccccc1cc2c3,-3.67 342 | CC12CC(=O)C3C(CCC4=CC(=O)CCC34C)C2CCC1(O)C(=O)CO,-3.11 343 | OCC1OC(O)C(O)C(O)C1O,0.74 344 | Cc1cccc(O)c1,-0.68 345 | CC2Cc1ccccc1N2NC(=O)c3ccc(Cl)c(c3)S(N)(=O)=O,-3.5860000000000003 346 | CCC(C)C(=O)OC2CC(C)C=C3C=CC(C)C(CCC1CC(O)CC(=O)O1)C23,-6.005 347 | O=N(=O)c1ccc(cc1)N(=O)=O,-3.39 348 | CCC1(C(=O)NC(=O)NC1=O)C2=CCC3CCC2C3,-2.696 349 | CCCCCCCCCC(=O)OCC,-4.1 350 | CN(C)C(=O)Nc1ccccc1,-1.6 351 | CCCOCC,-0.66 352 | CC(C)O,0.43 353 | Cc1ccc2ccccc2c1,-3.77 354 | ClC(Br)Br,-1.9 355 | CCC(C(CC)c1ccc(O)cc1)c2ccc(O)cc2,-4.43 356 | CCOC(=O)CC(SP(=S)(OC)OC)C(=O)OCC,-3.37 357 | ClCc1ccccc1,-2.39 358 | C/C=C/C=O,0.32 359 | CON(C)C(=O)Nc1ccc(Br)c(Cl)c1,-3.924 360 | Cc1c2ccccc2c(C)c3ccccc13,-6.57 361 | CCCCCC(=O)OC,-1.87 362 | CN(C)C(=O)Nc1ccc(c(Cl)c1)n2nc(oc2=O)C(C)(C)C,-4.328 363 | CC(=O)Nc1ccc(F)cc1,-1.78 364 | CCc1cccc(CC)c1N(COC)C(=O)CCl,-3.26 365 | C1CCC=CC1,-2.59 366 | CC12CC(O)C3C(CCC4=CC(=O)CCC34C)C2CCC1(O)C(=O)CO,-3.09 367 | c1cncnc1,1.1 368 | Clc1ccc(cc1)N(=O)=O,-2.92 369 | CCC(=O)OC,-0.14 370 | Clc1ccccc1N(=O)=O,-2.55 371 | CCCCN(C)C(=O)Nc1ccc(Cl)c(Cl)c1,-4.77 372 | CN1CC(O)N(C1=O)c2nnc(s2)C(C)(C)C,-1.8769999999999998 373 | O=N(=O)c1ccccc1,-1.8 374 | Ic1ccccc1,-3.01 375 | CC2Nc1cc(Cl)c(cc1C(=O)N2c3ccccc3C)S(N)(=O)=O,-3.78 376 | COc1ccccc1OCC(O)COC(N)=O,-0.985 377 | CCCCOCN(C(=O)CCl)c1c(CC)cccc1CC,-4.19 378 | Oc1cccc(Cl)c1Cl,-1.3 379 | CCCC(=O)OC,-1.92 380 | CCC(=O)Nc1ccc(Cl)c(Cl)c1,-3 381 | Nc3nc(N)c2nc(c1ccccc1)c(N)nc2n3,-2.404 382 | CCCCCC(=O)OCC,-2.35 383 | OCC(O)C2OC1OC(OC1C2O)C(Cl)(Cl)Cl,-1.84 384 | CN(C=Nc1ccc(C)cc1C)C=Nc2ccc(C)cc2C,-5.47 385 | COc1nc(NC(C)C)nc(NC(C)C)n1,-2.478 386 | CCCCCCC=C,-4.44 387 | Cc1ccc(N)cc1,-1.21 388 | Nc1nccs1,-0.36 389 | c1ccccc1(OC(=O)NC),-1.8030000000000002 390 | CCCC(O)CC,-0.8 391 | c3ccc2c(O)c1ccccc1cc2c3,-4.73 392 | Cc1ccc2cc3ccccc3cc2c1,-6.96 393 | Cc1cccc(C)c1C,-3.2 394 | CNC(=O)Oc1ccc(N(C)C)c(C)c1,-2.36 395 | CCCCCCCC(C)O,-2.74 396 | CN(C(=O)NC(C)(C)c1ccccc1)c2ccccc2,-3.35 397 | CCCC(=O)CC,-0.83 398 | Oc1c(Br)cc(C#N)cc1Br,-3.33 399 | Clc1ccc(cc1Cl)c2ccccc2,-6.39 400 | CN(C(=O)COc1nc2ccccc2s1)c3ccccc3,-4.873 401 | Oc1cccc2ncccc12,-2.54 402 | CC1=C(SCCO1)C(=O)Nc2ccccc2,-3.14 403 | CCOc2ccc1nc(sc1c2)S(N)(=O)=O,-3.81 404 | Oc1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl,-4.28 405 | ClCBr,-0.89 406 | CCC1(CC)C(=O)NC(=O)N(C)C1=O,-2.23 407 | CC(=O)OCC(=O)C3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C,-4.63 408 | NC(=O)NCc1ccccc1,-0.95 409 | CN(C)C(=O)Nc1ccc(C)c(Cl)c1,-3.483 410 | CON(C)C(=O)Nc1ccc(Cl)c(Cl)c1,-3.592 411 | OC1CCCCCC1,-0.88 412 | CS(=O)(=O)c1ccc(cc1)C(O)C(CO)NC(=O)C(Cl)Cl,-2.154 413 | CCCC(C)C1(CC)C(=O)NC(=S)NC1=O,-3.36 414 | CC(=O)Nc1nnc(s1)S(N)(=O)=O,-2.36 415 | Oc1ccc(cc1)N(=O)=O,-0.74 416 | ClC1=C(Cl)C2(Cl)C3C4CC(C=C4)C3C1(Cl)C2(Cl)Cl,-6.307 417 | C1CCOC1,0.49 418 | Nc1ccccc1N(=O)=O,-1.96 419 | Clc1cccc(c1Cl)c2cccc(Cl)c2Cl,-7.28 420 | CCCCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3,-3.81 421 | Cc1c(cccc1N(=O)=O)N(=O)=O,-3 422 | CC(=O)C1CCC2C3CCC4=CC(=O)CCC4(C)C3CCC12C,-4.42 423 | CCN(CC)c1nc(Cl)nc(n1)N(CC)CC,-4.4110000000000005 424 | ClC(Cl)C(Cl)(Cl)SN2C(=O)C1CC=CCC1C2=O,-5.4 425 | c1(Br)c(Br)cc(Br)cc1,-4.5 426 | OC3N=C(c1ccccc1)c2cc(Cl)ccc2NC3=O,-3.952 427 | O=C1NC(=O)NC(=O)C1(C(C)CCC)CC=C,-2.356 428 | c1(O)c(C)ccc(C(C)C)c1,-2.08 429 | C1SC(=S)NC1(=O),-1.77 430 | Oc1ccc(c(O)c1)c3oc2cc(O)cc(O)c2c(=O)c3O,-3.083 431 | ClC1(C(=O)C2(Cl)C3(Cl)C14Cl)C5(Cl)C2(Cl)C3(Cl)C(Cl)(Cl)C45Cl,-5.2589999999999995 432 | CCN(CC)C(=S)SSC(=S)N(CC)CC,-4.86 433 | C1CCCCC1,-3.1 434 | ClC1=C(Cl)C(Cl)(C(=C1Cl)Cl)C2(Cl)C(=C(Cl)C(=C2Cl)Cl)Cl,-7.278 435 | CN(C)C=Nc1ccc(Cl)cc1C,-2.86 436 | CC34CCc1c(ccc2cc(O)ccc12)C3CCC4=O,-5.24 437 | CCCCCCCCO,-2.39 438 | CCSCC,-1.34 439 | ClCCCl,-1.06 440 | CCC(C)(C)Cl,-2.51 441 | ClCCBr,-1.32 442 | Nc1ccc(cc1)N(=O)=O,-2.37 443 | OCC1OC(OC2C(O)C(O)C(O)OC2CO)C(O)C(O)C1O,-0.244 444 | CCN2c1ncccc1N(CC)C(=O)c3cccnc23,-2.86 445 | Clc1ccccc1,-2.38 446 | CCCCCCCC=C,-5.05 447 | Brc1ccc(I)cc1,-4.56 448 | CCC(C)(O)CC,-0.36 449 | CCCCCc1ccccc1,-4.64 450 | NC(=O)NC1NC(=O)NC1=O,-1.6 451 | OCC(O)COC(=O)c1ccccc1Nc2ccnc3cc(Cl)ccc23,-4.571000000000001 452 | ClC(Cl)C(c1ccc(Cl)cc1)c2ccc(Cl)cc2,-7.2 453 | CC(=O)OC3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C,-5.184 454 | Clc1cccc2ccccc12,-3.93 455 | CCN2c1ccccc1N(C)C(=O)c3ccccc23,-4.749 456 | CCCCC(C)O,-0.89 457 | CCCC1CCCC1,-4.74 458 | CCOC(=O)c1cncn1C(C)c2ccccc2,-4.735 459 | Oc1ccc(Cl)c(Cl)c1,-1.25 460 | CC1(C)C(C=C(Cl)Cl)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2,-8.017000000000001 461 | c2ccc1ocnc1c2,-1.16 462 | CCCCCO,-0.6 463 | CCN(CC)c1ccccc1,-3.03 464 | Fc1cccc(F)c1,-2 465 | ClCCC#N,-0.29 466 | CC(C)(C)Cc1ccccc1,-4.15 467 | O=C1NC(=O)NC(=O)C1(CC)c1ccccc1,-2.322 468 | Clc1ccccc1I,-3.54 469 | c2ccc1[nH]nnc1c2,-0.78 470 | CNC(=O)Oc1cccc2CC(C)(C)Oc12,-2.8 471 | Cc1cccc(C)c1O,-1.29 472 | CC(C)C(C)O,-0.18 473 | c1ccccc1C(O)c2ccccc2,-2.55 474 | CCCCCCCCCC(=O)OC,-4.69 475 | COP(=S)(OC)Oc1ccc(cc1Cl)N(=O)=O,-4.31 476 | CC(C)CBr,-2.43 477 | CCI,-1.6 478 | CN(C)C(=O)Oc1nc(nc(C)c1C)N(C)C,-1.95 479 | CCCCCCBr,-3.81 480 | CCCC(C)C,-3.74 481 | Cc1c(F)c(F)c(COC(=O)C2C(C=C(Cl)C(F)(F)F)C2(C)C)c(F)c1F,-7.321000000000001 482 | CCc1cccc(C)c1N(C(C)COC)C(=O)CCl,-2.73 483 | ON=Cc1ccc(o1)N(=O)=O,-2.19 484 | CC(C)C(Nc1ccc(cc1Cl)C(F)(F)F)C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2,-8.003 485 | Nc1nc[nH]n1,0.522 486 | BrC(Br)Br,-1.91 487 | COP(=O)(OC)C(O)C(Cl)(Cl)Cl,-0.22 488 | CCOP(=S)(OCC)SCn1c(=O)oc2cc(Cl)ccc12,-5.233 489 | OCc1ccccc1,-0.4 490 | O=c2c(C3CCCc4ccccc43)c(O)c1ccccc1o2,-2.84 491 | Oc1ccc(Br)cc1,-1.09 492 | CC(C)Br,-1.59 493 | CC(C)CC(C)(C)C,-4.74 494 | O=N(=O)c1cc(cc(c1)N(=O)=O)N(=O)=O,-2.89 495 | CN2C(=O)CN=C(c1ccccc1)c3cc(ccc23)N(=O)=O,-3.7960000000000003 496 | CCC,-1.94 497 | Nc1cc(nc(N)n1=O)N2CCCCC2,-1.989 498 | Nc2cccc3nc1ccccc1cc23,-4.22 499 | c1ccc2cc3c4cccc5cccc(c3cc2c1)c45,-8.49 500 | OC(c1ccc(Cl)cc1)(c2ccc(Cl)cc2)C(Cl)(Cl)Cl,-5.666 501 | C1Cc2cccc3cccc1c23,-4.63 502 | CCOP(=S)(OCC)SC(CCl)N2C(=O)c1ccccc1C2=O,-6.34 503 | Brc1ccc(Br)cc1,-4.07 504 | Cn2c(=O)on(c1ccc(Cl)c(Cl)c1)c2=O,-2.82 505 | Oc1ccc(cc1)c2ccccc2,-3.48 506 | CC1=C(CCCO1)C(=O)Nc2ccccc2,-2.56 507 | CCOC=C,-0.85 508 | CCC#C,-1.24 509 | COc1ncnc2nccnc12,-1.11 510 | CCCCC(C)(O)CC,-1.6 511 | Clc1ccc(Cl)cc1,-3.27 512 | O=C1N(COC(=O)C)C(=O)C(N1)(c2ccccc2)c3ccccc3,-4.47 513 | CSCS(=O)CC(CO)NC(=O)C=Cc1c(C)[nH]c(=O)[nH]c1=O,-1.9809999999999999 514 | Cc1c[nH]c2ccccc12,-2.42 515 | COc2ncc1nccnc1n2,-1.11 516 | CNC(=O)Oc1ccccc1C2OCCO2,-1.57 517 | C1N(C(=O)NCC(C)C)C(=O)NC1,-2.15 518 | CC#N,0.26 519 | CCOC(=O)NCCOc2ccc(Oc1ccccc1)cc2,-4.7 520 | CC(=O)N(S(=O)c1ccc(N)cc1)c2onc(C)c2C,-3.59 521 | ClCC(Cl)(Cl)Cl,-2.18 522 | CCCCO,0 523 | CC1CCCCC1NC(=O)Nc2ccccc2,-4.11 524 | Clc1cc(Cl)cc(Cl)c1,-4.48 525 | O=Cc1ccco1,-0.1 526 | CC(C)CCO,-0.51 527 | O=Cc2ccc1OCOc1c2,-1.63 528 | CC(=C)C,-2.33 529 | O=Cc1ccccc1,-1.19 530 | CC(=C)C(=C)C,-2.4 531 | CCOC(=O)CCN(SN(C)C(=O)Oc1cccc2CC(C)(C)Oc21)C(C)C,-4.71 532 | O2c1ccccc1N(C)C(=O)c3cccnc23,-3.6719999999999997 533 | C1c2ccccc2c3ccccc13,-5 534 | CC1CCCCC1,-3.85 535 | NC(=N)NS(=O)(=O)c1ccc(N)cc1,-1.99 536 | COC(=O)c1ccc(O)cc1,-1.827 537 | CC1CCCO1,0.11 538 | CC3C2CCC1(C)C=CC(=O)C(=C1C2OC3=O)C,-3.09 539 | OCC2OC(Oc1ccccc1CO)C(O)C(O)C2O,-0.85 540 | CCCI,-2.29 541 | CCNc1nc(NC(C)C)nc(SC)n1,-3.04 542 | CCCO,0.62 543 | CC(=O)C1(O)CCC2C3CCC4=CC(=O)CCC4(C)C3CCC21C,-3.8169999999999997 544 | CCCC(C)O,-0.29 545 | OC(C(=O)c1ccccc1)c2ccccc2,-2.85 546 | Cc1ccc(O)c(C)c1,-1.19 547 | Clc1cccc(c1)N(=O)=O,-2.77 548 | Cc2c(N)c(=O)n(c1ccccc1)n2C,-0.624 549 | Clc1ccc(c(Cl)c1)c2cc(Cl)ccc2Cl,-6.57 550 | ClC(=C(Cl)C(=C(Cl)Cl)Cl)Cl,-4.92 551 | CCNc1nc(NC(C)(C)C)nc(SC)n1,-4 552 | CCC(C)CCO,-0.71 553 | Cc2ncc1nccnc1n2,-0.12 554 | CC23Cc1cnoc1C=C2CCC4C3CCC5(C)C4CCC5(O)C#C,-5.507000000000001 555 | CCCCI,-2.96 556 | Brc1ccc2ccccc2c1,-4.4 557 | CC1OC(CC(O)C1O)OC2C(O)CC(OC2C)OC8C(O)CC(OC7CCC3(C)C(CCC4C3CC(O)C5(C)C(CCC45O)C6=CC(=O)OC6)C7)OC8C,-4.081 558 | FC(F)(F)c1ccccc1,-2.51 559 | CCCCCCOC(=O)c1ccccc1C(=O)OCCCCCC,-6.144 560 | c1ccc2c(c1)sc3ccccc23,-4.38 561 | Clc1ccc(c(Cl)c1)c2ccc(Cl)c(Cl)c2Cl,-7.8 562 | Clc1ccc(c(Cl)c1Cl)c2ccc(Cl)c(Cl)c2Cl,-8.01 563 | CC(=O)CC(c1ccccc1)c3c(O)c2ccccc2oc3=O,-3.8930000000000002 564 | c1ccccc1C(O)C(O)c2ccccc2,-1.93 565 | COC(=O)c1ccccc1C(=O)OC,-1.66 566 | CCCCCCCC(=O)OCC,-3.39 567 | CCSSCC,-2.42 568 | CCOCCOCC,-0.77 569 | Clc1cc(Cl)c(Cl)cc1Cl,-5.56 570 | Nc1ccc(cc1)c2ccc(N)cc2,-2.7 571 | CCCCCC=C,-3.73 572 | CCCCc1c(C)nc(NCC)[nH]c1=O,-3.028 573 | O=C1NC(=O)NC(=O)C1(CC)C(C)CCC,-2.39 574 | Nc1ccccc1Cl,-1.52 575 | COc1cccc(Cl)c1,-2.78 576 | CCCCN(CC)C(=O)SCCC,-3.53 577 | CCCCOC=O,-1.37 578 | CC12CC(O)C3C(CCC4=CC(=O)C=CC34C)C2CCC1(O)C(=O)CO,-3.18 579 | BrC(Cl)Cl,-1.54 580 | CC34CC(=O)C1C(CCC2=CC(=O)CCC12C)C3CCC4(=O),-3.48 581 | c1ccc(cc1)c2ccc(cc2)c3ccccc3,-7.11 582 | Oc1ccc(C=O)cc1,-0.96 583 | CBr,-0.79 584 | Cc1cc(ccc1NS(=O)(=O)C(F)(F)F)S(=O)(=O)c2ccccc2,-3.8 585 | CC(=O)CC(c1ccc(Cl)cc1)c2c(O)c3ccccc3oc2=O,-5.8389999999999995 586 | CCc1ccc2ccccc2c1,-4.29 587 | Nc1c(C)c[nH]c(=O)n1,-1.4580000000000002 588 | Clc2c(Cl)c(Cl)c(c1ccccc1)c(Cl)c2Cl,-7.92 589 | c1c(NC(=O)c2ccccc2(I))cccc1,-4.21 590 | Cc3cc2nc1c(=O)[nH]c(=O)nc1n(CC(O)C(O)C(O)CO)c2cc3C,-3.685 591 | Fc1ccccc1Br,-2.7 592 | Oc1ccc(Cl)cc1Cl,-1.55 593 | CC1(C)C(C=C(Cl)Cl)C1C(=O)OCc2cccc(Oc3ccccc3)c2,-6.291 594 | CN2C(=C(O)c1ccccc1S2(=O)=O)C(=O)Nc3ccccn3,-4.16 595 | O=C1N(COC(=O)CC)C(=O)C(N1)(c2ccccc2)c3ccccc3,-4.907 596 | C1CCCC1,-2.64 597 | Cc1ccccc1N,-2.21 598 | c1(OC)ccc(CC=C)cc1,-2.92 599 | CN(C)C(=O)Nc1cccc(OC(=O)NC(C)(C)C)c1,-2.93 600 | CC(C)C=C,-2.73 601 | Oc1ccccn1,1.02 602 | CC,-1.36 603 | Clc1ccccc1Cl,-3.05 604 | Sc2nc1ccccc1s2,-3.18 605 | Clc1c(Cl)c(Cl)c(c(Cl)c1Cl)c2c(Cl)c(Cl)c(Cl)c(Cl)c2Cl,-11.6 606 | COc2c1occc1cc3ccc(=O)oc23,-3.6639999999999997 607 | CC(=O)N,1.58 608 | Cc1cccc2ccccc12,-3.7 609 | CCN(CC)C(=O)C(C)Oc1cccc2ccccc12,-3.57 610 | CC(O)C(C)(C)C,-0.62 611 | CCCC(=O)OCC,-1.36 612 | CC2=CC(=O)c1ccccc1C2=O,-3.03 613 | c1ccc2c(c1)ccc3ccccc32,-5.26 614 | Cc1ccnc(C)c1,0.38 615 | CCCCCCCCCO,-3.01 616 | BrCBr,-1.17 617 | CC1CC2C3CCC4=CC(=O)C=CC4(C)C3(F)C(O)CC2(C)C1(O)C(=O)CO,-3.59 618 | Cc1ccc2cc(C)ccc2c1,-4.89 619 | CCSC(=O)N(CC(C)C)CC(C)C,-3.68 620 | O=N(=O)OCC(CON(=O)=O)ON(=O)=O,-2.22 621 | Nc1cccc(c1)N(=O)=O,-2.19 622 | CCCCCl,-2.03 623 | ClC(Cl)(Cl)C(NC=O)N1C=CN(C=C1)C(NC=O)C(Cl)(Cl)Cl,-4.19 624 | Cn2cc(c1ccccc1)c(=O)c(c2)c3cccc(c3)C(F)(F)F,-4.445 625 | Nc3cc2c1ccccc1ccc2c4ccccc34,-6.2 626 | CC12CCC3C(CCc4cc(O)ccc34)C2CCC1=O,-3.955 627 | CCN2c1ccccc1N(C)C(=S)c3cccnc23,-4.706 628 | CC1CO1,-0.59 629 | O=C3CN=C(c1ccccc1)c2cc(ccc2N3)N(=O)=O,-3.7960000000000003 630 | CCNC(=S)NCC,-1.46 631 | Oc1cc(Cl)cc(Cl)c1Cl,-2.67 632 | CCCCC(=O)OC,-1.34 633 | Nc1ccccc1,-0.41 634 | Cc1cccc2c(C)cccc12,-4.678999999999999 635 | NS(=O)(=O)c2cc1c(NCNS1(=O)=O)cc2Cl,-2.63 636 | C1=Cc2cccc3cccc1c23,-3.96 637 | CCCCCOC(=O)CC,-1.28 638 | CCNc1nc(NC(C)C)nc(OC)n1,-2.084 639 | c1ccc2c(c1)cc3ccc4cccc5ccc2c3c45,-8.699 640 | CCBr,-1.09 641 | CCC#CCC,-1.99 642 | CC1OC(CC(O)C1O)OC2C(O)CC(OC2C)OC8C(O)CC(OC7CCC3(C)C(CCC4C3CCC5(C)C(CCC45O)C6=CC(=O)OC6)C7)OC8C,-5.292999999999999 643 | CCC(=C)C,-2.73 644 | Oc1cccc2cccnc12,-2.42 645 | C1CCc2ccccc2C1,-4.37 646 | Oc1ccc(cc1)C2(OC(=O)c3ccccc23)c4ccc(O)cc4,-2.9 647 | Brc1cc(Br)cc(Br)c1,-5.6 648 | COP(=S)(OC)Oc1cc(Cl)c(Cl)cc1Cl,-5.72 649 | Cc1cc(=O)[nH]c(=S)[nH]1,-2.436 650 | COc1cc(CC=C)ccc1O,-1.56 651 | O=C1NC(=O)NC(=O)C1(C(C)C)CC=C,-1.7080000000000002 652 | c1cc2ccc3cccc4ccc(c1)c2c34,-6.176 653 | CCOC(C)OCC,-0.43 654 | CC1(C)CON(Cc2ccccc2Cl)C1=O,-2.338 655 | CCCCOCCO,-0.42 656 | Clc1c(Cl)c(Cl)c(N(=O)=O)c(Cl)c1Cl,-5.82 657 | CC12CCC(O)CC1CCC3C2CCC4(C)C3CCC4=O,-4.402 658 | FC(F)(F)c1cccc(c1)N2CC(CCl)C(Cl)C2=O,-4.047 659 | c1ccc2ncccc2c1,-1.3 660 | COC(=O)c1cc(O)c(O)c(O)c1,-1.24 661 | OC(Cn1cncn1)(Cn2cncn2)c3ccc(F)cc3F,-1.8 662 | Clc2ccc1oc(=O)[nH]c1c2,-2.8310000000000004 663 | Clc1ccc(c(Cl)c1)c2c(Cl)c(Cl)c(Cl)c(Cl)c2Cl,-7.92 664 | O=C1NC(=O)C(=O)C(=O)N1,-1.25 665 | ClCCCCl,-1.62 666 | Fc1cccc(Br)c1,-2.67 667 | Clc1ccc(Br)cc1,-3.63 668 | CC(C)C(C)C,-3.65 669 | CCC=C,-1.94 670 | Clc1ccc(Cl)c(c1)c2cc(Cl)c(Cl)c(Cl)c2Cl,-7.68 671 | Nc1cc[nH]c(=O)n1,-1.155 672 | FC(F)(Cl)C(F)(Cl)Cl,-3.04 673 | CCC#N,0.28 674 | ClC(Cl)C(c1ccc(Cl)cc1)c2ccccc2Cl,-6.51 675 | COc1ccccc1N(=O)=O,-1.96 676 | CC34CCC1C(CC=C2CC(O)CCC12C)C3CCC4=O,-4.12 677 | CC12CC2(C)C(=O)N(C1=O)c3cc(Cl)cc(Cl)c3,-4.8 678 | c1cc2ccc3ccc4ccc5cccc6c(c1)c2c3c4c56,-9.017999999999999 679 | CCC(C)c1cc(cc(N(=O)=O)c1O)N(=O)=O,-3.38 680 | c1c(OC)c(OC)C2C(=O)OCC2c1,-1.899 681 | OCC(O)CO,1.12 682 | COc1ccccc1O,-1.96 683 | CCOP(=S)(OCC)Oc1nc(Cl)c(Cl)cc1Cl,-5.67 684 | Cc1c2ccccc2cc3ccccc13,-5.89 685 | Cc1cc(=O)n(c2ccccc2)n1C,0.715 686 | CCCCOC,-0.99 687 | Cc2cnc1cncnc1n2,-0.8540000000000001 688 | CCNc1nc(Cl)nc(NCC)n1,-4.55 689 | CN(C)C(=O)C,1.11 690 | CSc1nc(nc(n1)N(C)C)N(C)C,-2.676 691 | C=C,-0.4 692 | CC(C)(C)CCO,-0.5 693 | O=C1NC(=O)NC(=O)C1(CC)CC=C,-1.614 694 | Oc1ccc(Cl)c(Cl)c1Cl,-2.67 695 | COc1ccccc1,-1.85 696 | c1ccc(Cl)cc1C(c2ccc(Cl)cc2)(O)C(=O)OC(C)C,-4.53 697 | CC13CCC(=O)C=C1CCC4C2CCC(C(=O)CO)C2(CC(O)C34)C=O,-3.85 698 | COc2ccc(Oc1ccc(NC(=O)N(C)C)cc1)cc2,-4.16 699 | CCc1ccc(C)cc1,-3.11 700 | CC(C)SC(C)C,-2.24 701 | O=N(=O)c1cccc(c1)N(=O)=O,-2.29 702 | CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,-5.54 703 | CCC1(C(C)C)C(=O)NC(=O)NC1=O,-2.21 704 | CC(=O)OCC(=O)C3(O)CCC4C2CCC1=CC(=O)CCC1(C)C2C(=O)CC34C,-4.21 705 | Cc1ncc(N(=O)=O)n1CCO,-1.22 706 | Nc1ccc(Cl)cc1,-1.66 707 | CCCC(C)(C)CO,-1.52 708 | c1ccoc1,-0.82 709 | COCCCNc1nc(NC(C)C)nc(SC)n1,-2.928 710 | CN(C)C(=O)NC1CC2CC1C3CCCC23,-3.1710000000000003 711 | CC(C)(C)c1ccccc1,-3.66 712 | CC(=O)CCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3,-3.27 713 | CC(=O)OCC(=O)C3(O)CCC4C2CCC1=CC(=O)C=CC1(C)C2C(O)CC34C,-4.37 714 | CCCOC,-0.39 715 | CC(C)OC(=O)C,-0.55 716 | Brc1ccccc1,-2.55 717 | CCOC(=O)c1ccc(O)cc1,-2.35 718 | O=C1N(COC(=O)CCC)C(=O)C(N1)(c2ccccc2)c3ccccc3,-5.071000000000001 719 | CCC(=O)OC3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C,-5.37 720 | c1cc2ccc3ccc4ccc5ccc6ccc1c7c2c3c4c5c67,-9.332 721 | O=c1[nH]cnc2[nH]ncc12,-2.266 722 | ClC=C,-1.75 723 | CN(C)C(=O)C(c1ccccc1)c2ccccc2,-2.98 724 | BrC(Br)(Br)Br,-3.14 725 | CCN2c1cc(N(C)C)cc(C)c1NC(=O)c3cccnc23,-4.871 726 | O=C1NC(=O)c2ccccc12,-2.61 727 | OC(c1ccc(Cl)cc1)(c2cncnc2)c3ccccc3Cl,-4.38 728 | COC(=O)c1ccccc1,-1.85 729 | Cn1ccc(=O)[nH]c1=O,-0.807 730 | CCCCC1C(=O)N(N(C1=O)c2ccc(O)cc2)c3ccccc3,-3.73 731 | Clc1ccc(Cl)c(c1)c2cccc(Cl)c2Cl,-6.47 732 | CCC2NC(=O)c1cc(c(Cl)cc1N2)S(N)(=O)=O,-3.29 733 | CN(C)C(=O)Nc1ccc(Cl)c(Cl)c1,-3.8 734 | C1CC=CC1,-2.1 735 | C1(=O)NC(=O)NC(=O)C1(O)C2(O)C(=O)NC(=O)NC2(=O),-1.99 736 | CCCCCCCCC,-5.88 737 | Oc1ccccc1Cl,-1.06 738 | c1cccc2c3c(C)cc4ccccc4c3ccc12,-6.59 739 | CCOc1ccccc1,-2.33 740 | CCOC(=O)C=Cc1ccccc1,-3 741 | Cc1[nH]c(=O)n(c(=O)c1Cl)C(C)(C)C,-2.484 742 | Clc1ccccc1C2=NCC(=O)Nc3ccc(cc23)N(=O)=O,-3.4989999999999997 743 | Cc1ccc(cc1)S(=O)(=O)N,-1.74 744 | CC(OC(=O)Nc1cccc(Cl)c1)C#C,-2.617 745 | CCCCCC(C)C,-5.08 746 | CC1(C)C(C=C(Cl)C(F)(F)F)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2,-8.176 747 | CCCC1C(=O)N3N(C1=O)c2cc(C)ccc2N=C3N(C)C,-3.5380000000000003 748 | CN2C(=O)CN=C(c1ccccc1)c3cc(Cl)ccc23,-3.7539999999999996 749 | CCC(O)C(C)C,-0.7 750 | CCOP(=S)(OCC)Oc1ccc(cc1)S(C)=O,-2.3 751 | CC1(C)C2CCC1(C)C(O)C2,-2.32 752 | CC12CCC3C(CCC4=CC(=O)CCC34C)C2CCC1O,-4.02 753 | CCCCCCC,-4.53 754 | Oc1cccc2ccccc12,-2.22 755 | C/C1CCCCC1\C,-4.3 756 | COc2cc1c(N)nc(nc1c(OC)c2OC)N3CCN(CC3)C(=O)OCC(C)(C)O,-3.638 757 | C1Cc2c3c1cccc3cc4c2ccc5ccccc54,-7.85 758 | CC(=O)C3(C)CCC4C2C=C(C)C1=CC(=O)CCC1(C)C2CCC34C,-5.27 759 | CCCCCC(=O)C,-1.45 760 | COP(=O)(NC(C)=O)SC,0.54 761 | CCCCSP(=O)(SCCCC)SCCCC,-5.14 762 | c1cC2C(=O)NC(=O)C2cc1,-2.932 763 | NS(=O)(=O)c2cc1c(NC(NS1(=O)=O)C(Cl)Cl)cc2Cl,-2.68 764 | CC=C(C)C,-2.56 765 | Cc1ccc(C)c(C)c1,-3.31 766 | Oc1cc(Cl)c(Cl)cc1Cl,-2.21 767 | c1ccc2c(c1)cnc3ccccc23,-2.78 768 | CCCC(C)(O)CC,-0.98 769 | CCCCCCCC,-5.24 770 | c1ccc2cc3ccccc3cc2c1,-6.35 771 | NNc1ccccc1,0.07 772 | CCC=O,0.58 773 | C1CCCCCCC1,-4.15 774 | O=C1NC(=O)NC(=O)C1(CC=C)CC=C,-2.077 775 | ClC(Cl)Cl,-1.17 776 | Sc1nccc(=O)[nH]1,-2.273 777 | Clc1ccc(CN(C2CCCC2)C(=O)Nc3ccccc3)cc1,-5.915 778 | CC1=CCCCC1,-3.27 779 | CCCCC(CC)C=O,-2.13 780 | COc2c1occc1c(OC)c3c(=O)cc(C)oc23,-3.0210000000000004 781 | O=C1NC(=O)NC(=O)C1(CC)CCC(C)C,-2.658 782 | c1ccc2c3c(ccc2c1)c4cccc5cccc3c45,-8 783 | CCC(CC)C=O,-1.52 784 | CCCOCCC,-1.62 785 | CCCCCCCCCCCCCCO,-5.84 786 | Oc1c(Cl)ccc(Cl)c1Cl,-2.64 787 | NC(=O)N,0.96 788 | CCCC#C,-1.64 789 | Brc1cccc(Br)c1,-3.54 790 | CCCCCCCCCCCCCCCCCCO,-8.4 791 | CC(=O)Nc1ccccc1,-1.33 792 | c1cc(O)c(O)c2OCC3(O)CC4=CC(=O)C(O)=CC4=C3c21,-2.7 793 | c1nccc(C(=O)NN)c1,0.009000000000000001 794 | OC1C=CC2C1C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl,-5.46 795 | CC(C)CCOC=O,-1.52 796 | CC(=O)c1ccccc1,-1.28 797 | c2ccc1nc(ccc1c2)c4ccc3ccccc3n4,-5.4 798 | CCOP(=O)(OCC)OCC,0.43 799 | CC2(C)C1CCC(C)(C1)C2=O,-1.85 800 | COc2cnc1cncnc1n2,-0.91 801 | ClC2=C(Cl)C3(Cl)C1C=CCC1C2(Cl)C3(Cl)Cl,-5.64 802 | CC(C)N(=O)=O,-0.62 803 | c1ccc2c(c1)[nH]c3ccccc32,-5.27 804 | OCC(O)C(O)CO,0.7 805 | CCCOC(=O)c1ccc(N)cc1,-2.452 806 | CNC(=O)C=C(C)OP(=O)(OC)OC,0.6509999999999999 807 | O=C1CCC(=O)N1,0.3 808 | CCC(C)C(C)C,-4.28 809 | CCCCc1c(C)nc(NCC)nc1OS(=O)(=O)N(C)C,-4.16 810 | CCN2c1ncccc1N(C)C(=S)c3cccnc23,-4.6339999999999995 811 | O2c1ccccc1N(CC)C(=O)c3ccccc23,-3.68 812 | C1CCOCC1,-0.03 813 | CCCCCC#C,-3.01 814 | c1cc2ccc(OC)c(CC=C(C)(C))c2oc1=O,-4.314 815 | c1cc(C)cc2c1c3cc4cccc5CCc(c45)c3cc2,-7.92 816 | CCOC(=O)c1ccccc1,-2.32 817 | ClCC(C)C,-2 818 | CC34CCC1C(CCc2cc(O)ccc12)C3CCC4(O)C#C,-4.3 819 | CCCCCCCCCCCC(=O)OC,-4.69 820 | CCCSCCC,-2.58 821 | c1ccc2cc3cc4ccccc4cc3cc2c1,-8.6 822 | CCCCCBr,-3.08 823 | CCCC/C=C/C,-3.82 824 | Cc1ncc(N(=O)=O)n1CCO,-1.26 825 | CCCCCC1CCCC1,-6.08 826 | Clc1ccc(Cl)c(c1)c2c(Cl)c(Cl)cc(Cl)c2Cl,-7.42 827 | O=C1NC(=O)NC(=O)C1(CC)C(C)C,-2.148 828 | CC(Cl)(Cl)Cl,-2 829 | CON(C)C(=O)Nc1ccc(Cl)cc1,-2.57 830 | O=C2NC(=O)C1(CCCCC1)C(=O)N2,-3.06 831 | CN(C)C(=O)OC1=CC(=O)CC(C)(C)C1,-0.85 832 | Cc1ccc(Br)cc1,-3.19 833 | CCOCC,-0.09 834 | CC(C)NC(=O)N1CC(=O)N(C1=O)c2cc(Cl)cc(Cl)c2,-4.376 835 | CCCCN(CC)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O,-5.53 836 | Cc1cc(C)c(O)c(C)c1,-2.05 837 | c1ccccc1,-1.64 838 | Clc1ccc(I)cc1,-4.03 839 | COc1ccc(NC(=O)N(C)C)cc1Cl,-2.5639999999999996 840 | CC(C)N(C(=O)CCl)c1ccccc1,-2.48 841 | C=Cc1ccccc1,-2.82 842 | COCOC,0.48 843 | Cc1ccccc1C,-2.8 844 | CCC(C)O,0.47 845 | Oc1ccc(O)cc1,-0.17 846 | CC34CCC1C(CCc2cc(O)ccc12)C3CC(O)C4O,-4.955 847 | C1c2ccccc2c3cc4ccccc4cc13,-8.04 848 | O=C1CNC(=O)N1,-0.4 849 | c1(O)cc(O)ccc1CCCCCC,-2.59 850 | C=CCS(=O)SCC=C,-0.83 851 | CCOP(=S)(OCC)Oc2ccc1oc(=O)c(Cl)c(C)c1c2,-5.382000000000001 852 | Cc1c(C)c2c3ccccc3ccc2c4ccccc14,-7.01 853 | CCCCC(=O)OC3(C(C)CC4C2CCC1=CC(=O)C=CC1(C)C2(F)C(O)CC34C)C(=O)CO,-4.71 854 | O=c2[nH]c(=O)c1[nH]c(=O)[nH]c1[nH]2,-3.93 855 | Oc1c(Cl)cc(Cl)c(Cl)c1Cl,-3.1 856 | Clc1cccc(Cl)c1,-3.04 857 | Clc1ccc(cc1)C(c2ccc(Cl)cc2)C(Cl)(Cl)Cl,-7.15 858 | CC(C)COC=O,-1.01 859 | c1ccccc1SC,-2.39 860 | CCN2c1nc(C)cc(C(F)(F)F)c1NC(=O)c3cccnc23,-4.207 861 | CCCCCC,-3.84 862 | COC(=O)c1cccnc1,-0.46 863 | NS(=O)(=O)c3cc2c(NC(Cc1ccccc1)NS2(=O)=O)cc3C(F)(F)F,-3.59 864 | Clc1ccc(cc1Cl)c2cc(Cl)c(Cl)c(Cl)c2Cl,-7.82 865 | CC1(OC(=O)N(C1=O)c2cc(Cl)cc(Cl)c2)C=C,-4.925 866 | CCNc1nc(Cl)nc(NC(C)(C)C#N)n1,-3.15 867 | c1ccc2c(c1)c3ccccc3c4ccccc24,-6.726 868 | CC=C(C(=CC)c1ccc(O)cc1)c2ccc(O)cc2,-4.95 869 | CCCCC(CC)COC(=O)c1ccccc1C(=O)OCC(CC)CCCC,-6.96 870 | CCc1ccccn1,0.51 871 | COP(=O)(OC)OC(Br)C(Cl)(Cl)Br,-2.28 872 | c1ccc(cc1)c2ccccc2,-4.345 873 | Clc1cc(Cl)c(c(Cl)c1)c2c(Cl)cc(Cl)cc2Cl,-8.71 874 | CN(C)c1nc(nc(n1)N(C)C)N(C)C,-3.364 875 | CC(C)CC(C)(C)O,-0.92 876 | O=C2NC(=O)C1(CCCCCC1)C(=O)N2,-3.168 877 | OCC1OC(O)(CO)C(O)C1O,0.64 878 | Cc1cc(C)cc(O)c1,-1.4 879 | ClCC#CCOC(=O)Nc1cccc(Cl)c1,-4.37 880 | CC(=O)Nc1ccc(Cl)cc1,-2.843 881 | Clc1ccc(Cl)c(c1)c2c(Cl)c(Cl)c(Cl)c(Cl)c2Cl,-8.94 882 | CCC(C)(C)C,-3.55 883 | CNc1ccccc1,-1.28 884 | C=CCC=C,-2.09 885 | CC(=O)OCC(=O)C1(O)CCC2C3CCC4=CC(=O)CCC4(C)C3C(O)CC21C,-4.88 886 | Cc1cc(cc(N(=O)=O)c1O)N(=O)=O,-1.456 887 | OC3N=C(c1ccccc1Cl)c2cc(Cl)ccc2NC3=O,-3.6039999999999996 888 | Oc1cccc(Cl)c1,-0.7 889 | Clc1cccc(Br)c1,-3.21 890 | NS(=O)(=O)c2cc1c(N=CNS1(=O)=O)cc2Cl,-3.05 891 | O=C1NC(=O)NC(=O)C1(C)CC,-1.228 892 | OCCOc1ccccc1,-0.7 893 | C(c1ccccc1)c2ccccc2,-4.08 894 | CCCCCC(O)CC,-1.98 895 | CCN(Cc1c(F)cccc1Cl)c2c(cc(cc2N(=O)=O)C(F)(F)F)N(=O)=O,-6.78 896 | CC(C)Nc1nc(Cl)nc(NC(C)C)n1,-4.43 897 | CCCC(C)CO,-1.11 898 | CCCCC(C)(C)O,-1.08 899 | CCc1ccccc1,-2.77 900 | O=C1NC(=O)NC(=O)C1(CC)CC=C(C)C,-2.253 901 | ClC1C=CC2C1C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl,-6.317 902 | CCC(C)C1(CC(Br)=C)C(=O)NC(=O)NC1=O,-2.647 903 | CC1(C)C(C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2)C1(C)C,-6.025 904 | COC(C)(C)CCCC(C)CC=CC(C)=CC(=O)OC(C)C,-5.19 905 | CCOC(=O)CC,-0.66 906 | CSc1nc(NC(C)C)nc(NC(C)C)n1,-4.1 907 | CC(C#C)N(C)C(=O)Nc1ccc(Cl)cc1,-3.9 908 | Cc1cc2ccccc2cc1C,-4.72 909 | Clc1ccc(cc1)c2cc(Cl)ccc2Cl,-6.25 910 | Clc1ccc(c(Cl)c1)c2cc(Cl)c(Cl)c(Cl)c2Cl,-7.39 911 | NC(N)=NC#N,-0.31 912 | ClC(Cl)(Cl)N(=O)=O,-2 913 | Clc1cccc(Cl)c1c2ccccc2,-5.21 914 | COc1ccc(C=O)cc1,-1.49 915 | CC(=O)Nc1ccc(cc1)N(=O)=O,-2.6919999999999997 916 | CCCCCCC(=O)OCC,-2.74 917 | CC(=O)Nc1ccc(O)cc1,-1.03 918 | c2ccc1[nH]ncc1c2,-2.16 919 | CC5(C)OC4CC3C2CCC1=CC(=O)C=CC1(C)C2(F)C(O)CC3(C)C4(O5)C(=O)CO,-4.31 920 | Nc2nc1[nH]cnc1c(=O)[nH]2,-3.583 921 | COC(=O)C,0.46 922 | CC34CCC1C(CCC2CC(=O)CCC12C)C3CCC4O,-4.743 923 | CCCC(O)C=C,-0.59 924 | OC(C1=CC2C5C(C1C2=C(c3ccccc3)c4ccccn4)C(=O)NC5=O)(c6ccccc6)c7ccccn7,-3.931 925 | CCCCOCCCC,-1.85 926 | CCCCCCCCCCCCO,-4.8 927 | CCN2c1nc(N(C)(CCO))ccc1NC(=O)c3cccnc23,-3.36 928 | CCCC(C)(C)O,-0.49 929 | Nc1nc(=O)[nH]cc1F,-0.972 930 | CCCCOc1ccc(C(=O)OCC)c(c1)N(CC)CC,-3.84 931 | CCCCCC(C)(C)O,-1.72 932 | Cc1c(C)c(C)c(C)c(C)c1C,-5.23 933 | CC(C)c1ccc(C)cc1O,-2.22 934 | c2cnc1ncncc1n2,0.02 935 | CCOP(=S)(OCC)Oc1ccc(cc1)N(=O)=O,-4.66 936 | C,-0.9 937 | c2ccc1NCCc1c2,-1.04 938 | O=N(=O)c1cccc2ccccc12,-3.54 939 | CCC(C)C(=O)C,-0.67 940 | Nc1nc(O)nc2nc[nH]c12,-3.4010000000000002 941 | OC(CC(c1ccccc1)c3c(O)c2ccccc2oc3=O)c4ccc(cc4)c5ccc(Br)cc5,-4.445 942 | CN(=O)=O,0.26 943 | CC(C)N(C(C)C)C(=O)SCC(Cl)=C(Cl)Cl,-4.88 944 | C=CCCC=C,-2.68 945 | c2ccc1[nH]ccc1c2,-1.52 946 | CC34CCC1C(CCC2=CC(=O)CCC12C)C3CCC4=O,-3.69 947 | CCCCC=C,-3.23 948 | Cc1cccc(C)c1NC(=O)c2cc(c(Cl)cc2O)S(N)(=O)=O,-3.79 949 | CCC1CCCCC1,-4.25 950 | CCCCCCCC(=O)C,-2.58 951 | COC(=O)Nc2nc1ccc(cc1[nH]2)C(=O)c3ccccc3,-3.88 952 | CC(C)OC(=O)Nc1cccc(Cl)c1,-3.38 953 | CCN2c1nc(Cl)ccc1N(C)C(=O)c3cccnc23,-4.114 954 | CNC(=O)Oc1cccc2ccccc12,-3.2239999999999998 955 | C#C,0.29 956 | Cc1cncc(C)c1,0.38 957 | C1C=CCC=C1,-2.06 958 | CCOC(=O)N(C)C(=O)CSP(=S)(OCC)OCC,-2.5180000000000002 959 | CC(O)c1ccccc1,-0.92 960 | CC(Cl)CCl,-1.6 961 | CCCC=C(CC)C=O,-2.46 962 | CCOP(=S)(OCC)SCCSCC,-4.23 963 | CC(=O)OC3(C)CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C,-5.284 964 | Clc1ccc(cc1)c2c(Cl)cccc2Cl,-6.14 965 | Fc1cccc(F)c1C(=O)NC(=O)Nc2ccc(Cl)cc2,-6.02 966 | Oc1cc(Cl)ccc1Oc2ccc(Cl)cc2Cl,-4.46 967 | c1(C(=O)OCCCCCC(C)(C))c(C(=O)OCCCCCC(C)(C))cccc1,-6.6370000000000005 968 | CC12CC(O)C3C(CCC4=CC(=O)CCC34C)C2CCC1C(=O)CO,-3.24 969 | Cc1cc(C)cc(C)c1,-3.4 970 | CCCCCCCCOC(=O)c1ccccc1C(=O)OCCCCCCCC,-5.115 971 | CCCCCCCCCCCCCCCO,-6.35 972 | Clc1cccc(Cl)c1c2c(Cl)cccc2Cl,-7.39 973 | O=C1NC(=O)NC(=O)C1(C)C,-1.742 974 | CC(C)I,-2.09 975 | O=N(=O)c1ccccc1N(=O)=O,-3.1 976 | CC(C)C(=O)C,-0.12 977 | CCCCCCCCCCCCCCCC,-8.4 978 | CC12CCC(CC1)C(C)(C)O2,-1.74 979 | Cc2cccc3sc1nncn1c23,-2.07 980 | CCCCCCC(=O)C,-2.05 981 | CCCCCCCCC(=O)OC,-3.38 982 | Fc1ccc(F)cc1,-1.97 983 | O=C1N(C2CCC(=O)NC2=O)C(=O)c3ccccc13,-2.676 984 | CCCN(CCC)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O,-5.68 985 | CCO,1.1 986 | O=C2NC(=O)C1(CCCC1)C(=O)N2,-2.349 987 | c1c(NC(=O)OC(C)C(=O)NCC)cccc1,-1.83 988 | CC(C)=CC3C(C(=O)OCc2cccc(Oc1ccccc1)c2)C3(C)C,-5.24 989 | CN(C)C(=O)NC1CCCCCCC1,-2.218 990 | ClC1(C2(Cl)C3(Cl)C4(Cl)C5(Cl)C1(Cl)C3(Cl)Cl)C5(Cl)C(Cl)(Cl)C24Cl,-6.8 991 | CCCCCCCCBr,-5.06 992 | CCCCNC(=O)n1c(NC(=O)OC)nc2ccccc12,-4.883 993 | CN(C)c2c(C)n(C)n(c1ccccc1)c2=O,-0.364 994 | CCC(O)CC,-0.24 995 | Cc1ccc(cc1)N(=O)=O,-2.49 996 | CC(C)CCCO,-1.14 997 | CC34CCC1C(CCC2=CC(=O)CCC12O)C3CCC4(O)C#C,-4.57 998 | CC(C)OC(=O)C(O)(c1ccc(Br)cc1)c2ccc(Br)cc2,-4.93 999 | Nc2cnn(c1ccccc1)c(=O)c2Cl,-2.878 1000 | CCC(C)(C)O,0.15 1001 | Cc1ccc(O)cc1,-0.73 1002 | CCOC=O,0.15 1003 | CN(C)c1ccccc1,-1.92 1004 | C1CCC2CCCCC2C1,-5.19 1005 | CCCCS,-2.18 1006 | c1ccc2c(c1)c3cccc4ccc5cccc2c5c43,-7.8 1007 | ClC(=C(Cl)Cl)Cl,-2.54 1008 | CCC(=O)CC,-0.28 1009 | C=CC#N,0.15 1010 | CC1CC2C3CC(F)C4=CC(=O)C=CC4(C)C3(F)C(O)CC2(C)C1(O)C(=O)CO,-5.6129999999999995 1011 | CCCCC(=O)C,-0.8 1012 | CCNc1nc(NC(C)(C)C)nc(OC)n1,-3.239 1013 | CCCCC(C)CC,-5.16 1014 | BrCCBr,-1.68 1015 | CNC(=O)Oc1ccccc1C(C)C,-2.863 1016 | O=C1NCCN1c2ncc(s2)N(=O)=O,-3.22 1017 | C1c2ccccc2c3ccc4ccccc4c13,-6.68 1018 | COc1ccccc1Cl,-2.46 1019 | COP(=S)(OC)Oc1cc(Cl)c(Br)cc1Cl,-6.09 1020 | ClC(Cl)CC(=O)NC2=C(Cl)C(=O)c1ccccc1C2=O,-5.03 1021 | ClC(Cl)C(c1ccc(Cl)cc1)c2ccc(Cl)cc2,-7.2 1022 | COC(=O)C=C,-0.22 1023 | CN(C)C(=O)Nc2ccc(Oc1ccc(Cl)cc1)cc2,-4.89 1024 | N(=Nc1ccccc1)c2ccccc2,-4.45 1025 | CC(C)c1ccc(C)cc1,-3.77 1026 | Oc1c(Cl)cccc1Cl,-1.79 1027 | OCC2OC(OC1(CO)OC(CO)C(O)C1O)C(O)C(O)C2O,0.79 1028 | OC1C(O)C(O)C(O)C(O)C1O,0.35 1029 | Cn2c(=O)n(C)c1ncn(CC(O)CO)c1c2=O,-0.17 1030 | OCC(NC(=O)C(Cl)Cl)C(O)c1ccc(cc1)N(=O)=O,-2.1109999999999998 1031 | CCC(O)(CC)CC,-0.85 1032 | CC45CCC2C(CCC3CC1SC1CC23C)C4CCC5O,-5.41 1033 | Brc1ccccc1Br,-3.5 1034 | Oc1c(Cl)cc(Cl)cc1Cl,-2.34 1035 | CCCN(CCC)c1c(cc(cc1N(=O)=O)S(N)(=O)=O)N(=O)=O,-5.16 1036 | C2c1ccccc1N(CCF)C(=O)c3ccccc23,-4.7989999999999995 1037 | CC(C)C(=O)C(C)C,-1.3 1038 | O=C1NC(=O)NC(=O)C1(C(C)C)CC=C(C)C,-2.593 1039 | c1c(O)C2C(=O)C3cc(O)ccC3OC2cc1(OC),-2.943 1040 | Cn1cnc2n(C)c(=O)n(C)c(=O)c12,-0.8759999999999999 1041 | CC(=O)SC4CC1=CC(=O)CCC1(C)C5CCC2(C)C(CCC23CCC(=O)O3)C45,-4.173 1042 | Cc1ccc(O)cc1C,-1.38 1043 | O(c1ccccc1)c2ccccc2,-3.96 1044 | Clc1cc(Cl)c(cc1Cl)c2cc(Cl)c(Cl)cc2Cl,-8.56 1045 | NC(=O)c1cccnc1,0.61 1046 | Sc1ccccc1,-2.12 1047 | CNC(=O)Oc1cc(C)cc(C)c1,-2.5810000000000004 1048 | ClC1CC2C(C1Cl)C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl,-6.86 1049 | CSSC,-1.44 1050 | NC(=O)c1ccccc1,-0.96 1051 | Clc1ccccc1Br,-3.19 1052 | COC(=O)c1ccccc1OC2OC(COC3OCC(O)C(O)C3O)C(O)C(O)C2O,-0.742 1053 | CCCCC(O)CC,-1.47 1054 | CCN2c1nc(C)cc(C)c1NC(=O)c3cccnc23,-4.553999999999999 1055 | Oc1cc(Cl)cc(Cl)c1,-1.34 1056 | Cc1cccc2c1ccc3ccccc32,-5.85 1057 | CCCCC(CC)CO,-2.11 1058 | CC(C)N(C(C)C)C(=O)SCC(=CCl)Cl,-4.2860000000000005 1059 | Cc1ccccc1,-2.21 1060 | Clc1cccc(n1)C(Cl)(Cl)Cl,-3.76 1061 | C1CCC=CCC1,-3.18 1062 | CN(C)C(=S)SSC(=S)N(C)C,-3.9 1063 | COC1=CC(=O)CC(C)C13Oc2c(Cl)c(OC)cc(OC)c2C3=O,-3.2460000000000004 1064 | CCCCCCCCCCO,-3.63 1065 | CCC(C)(C)CC,-4.23 1066 | CNC(=O)C(C)SCCSP(=O)(OC)(OC),1.1440000000000001 1067 | Oc1cc(Cl)c(Cl)c(Cl)c1Cl,-3.15 1068 | CCCC=O,-0.01 1069 | CC4CC3C2CCC1=CC(=O)C=CC1(C)C2(F)C(O)CC3(C)C4(O)C(=O)COC(C)=O,-4.9 1070 | CCCC,-2.57 1071 | COc1ccccc1O,-1.96 1072 | CC1CC2C3CCC(O)(C(=O)C)C3(C)CC(O)C2(F)C4(C)C=CC(=O)C=C14,-4.099 1073 | ClC(Cl)C(Cl)(Cl)Cl,-2.6 1074 | CCOC(=O)c1ccccc1C(=O)OCC,-2.35 1075 | CC(C)CO,0.1 1076 | CC(C)Cc1ccccc1,-4.12 1077 | ICI,-2.34 1078 | CCCC(O)CCC,-1.4 1079 | CCCCCOC(=O)C,-1.89 1080 | Oc1c(Cl)c(Cl)cc(Cl)c1Cl,-3.37 1081 | CCCc1ccccc1,-3.37 1082 | FC(F)(Cl)C(F)(F)Cl,-2.74 1083 | CC=CC=O,0.32 1084 | CN(C)C(=O)N(C)C,0.94 1085 | Cc1cc(C)c(C)cc1C,-4.59 1086 | CC(=O)OC3(CCC4C2CCC1=CC(=O)CCC1C2CCC34C)C#C,-4.8 1087 | CCOP(=S)(OCC)N2C(=O)c1ccccc1C2=O,-3.35 1088 | c1ccccc1NC(=O)c2c(O)cccc2,-3.59 1089 | CCN(CC)C(=S)SCC(Cl)=C,-3.39 1090 | ClCC,-1.06 1091 | CC(=O)Nc1cc(NS(=O)(=O)C(F)(F)F)c(C)cc1C,-3.24 1092 | O=C(C=CC=Cc2ccc1OCOc1c2)N3CCCCC3,-3.46 1093 | CC/C=C\C,-2.54 1094 | CNC(=O)ON=C(CSC)C(C)(C)C,-1.62 1095 | O=C2NC(=O)C1(CCCCCCC1)C(=O)N2,-2.9819999999999998 1096 | c1(C(C)(C)C)cc(C(C)(C)C)cc(OC(=O)NC)c1,-4.24 1097 | Oc2cc(O)c1C(=O)CC(Oc1c2)c3ccc(O)c(O)c3,-3.62 1098 | O=C(c1ccccc1)c2ccccc2,-3.12 1099 | CCCCCCCCCCCCCCCCCCCC,-8.172 1100 | N(Nc1ccccc1)c2ccccc2,-2.92 1101 | CCC(CC)CO,-1.17 1102 | Oc1ccncc1,1.02 1103 | Cl\C=C/Cl,-1.3 1104 | CC1CCCC1,-3.3 1105 | CC(C)CC(C)O,-0.8 1106 | O2c1ccc(N)cc1N(C)C(=O)c3cc(C)ccc23,-3.928 1107 | CC(C)(C)CO,-0.4 1108 | CC(C)(C)C(=O)C(Oc1ccc(Cl)cc1)n2cncn2,-3.61 1109 | Cc1cc(no1)C(=O)NNCc2ccccc2,-2.461 1110 | CC=C,-1.08 1111 | Oc1ccc(Cl)cc1Cc2cc(Cl)ccc2O,-3.9530000000000003 1112 | CCOC(=O)Nc2cccc(OC(=O)Nc1ccccc1)c2,-4.632 1113 | O=C1c2ccccc2C(=O)c3ccccc13,-5.19 1114 | CCCCCCC(C)O,-2.09 1115 | CC1=C(C(=O)Nc2ccccc2)S(=O)(=O)CCO1,-2.281 1116 | CCCCc1ccccc1,-4.06 1117 | O=C1NC(=O)C(=O)N1,-0.4 1118 | COP(=S)(OC)Oc1ccc(Sc2ccc(OP(=S)(OC)OC)cc2)cc1,-6.237 1119 | NS(=O)(=O)c1cc(ccc1Cl)C2(O)NC(=O)c3ccccc23,-3.451 1120 | CC(C)COC(=O)C,-1.21 1121 | CC(C)C(C)(C)C,-4.36 1122 | Clc1ccc(c(Cl)c1Cl)c2c(Cl)cc(Cl)c(Cl)c2Cl,-7.66 1123 | N#Cc1ccccc1C#N,-2.38 1124 | Cc1cccc(c1)N(=O)=O,-2.44 1125 | FC(F)(F)C(Cl)Br,-1.71 1126 | CNC(=O)ON=C(SC)C(=O)N(C)C,0.106 1127 | CCSCCSP(=S)(OC)OC,-3.091 1128 | CCC(C)C,-3.18 1129 | COP(=O)(OC)OC(=CCl)c1cc(Cl)c(Cl)cc1Cl,-4.522 1130 | -------------------------------------------------------------------------------- /dataset/freesolv.csv: -------------------------------------------------------------------------------- 1 | smiles,freesolv 2 | CN(C)C(=O)c1ccc(cc1)OC,-11.01 3 | CS(=O)(=O)Cl,-4.87 4 | CC(C)C=C,1.83 5 | CCc1cnccn1,-5.45 6 | CCCCCCCO,-4.21 7 | Cc1cc(cc(c1)O)C,-6.27 8 | CC(C)C(C)C,2.34 9 | CCCC(C)(C)O,-3.92 10 | C[C@@H]1CCCC[C@@H]1C,1.58 11 | CC[C@H](C)O,-4.62 12 | C(Br)Br,-1.96 13 | CC[C@H](C(C)C)O,-3.88 14 | CCc1ccccn1,-4.33 15 | CCCCC(=O)OCC,-2.49 16 | c1ccc(cc1)S,-2.55 17 | CC(=CCC/C(=C\CO)/C)C,-4.78 18 | c1ccc2c(c1)CCC2,-1.46 19 | CCOc1ccccc1,-2.22 20 | c1cc(ccc1O)Br,-5.85 21 | CCCC(C)(C)C,2.88 22 | CC(=O)OCCOC(=O)C,-6.34 23 | CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,-6.1 24 | C1CCCC(CC1)O,-5.48 25 | COC(=O)C1CC1,-4.1 26 | c1ccc(cc1)C#N,-4.1 27 | CCCCC#N,-3.52 28 | CC(C)(C)O,-4.47 29 | CC(C)C(=O)C(C)C,-2.74 30 | CCC=O,-3.43 31 | CN(C)C=O,-7.81 32 | Cc1ccc(cc1)C,-0.8 33 | C=CCC=C,0.93 34 | Cc1cccc(c1C)Nc2ccccc2C(=O)O,-6.78 35 | CN(C)C(=O)c1ccccc1,-9.29 36 | CCNCC,-4.07 37 | CC(C)(C)c1ccc(cc1)O,-5.91 38 | CC(C)CCOC=O,-2.13 39 | CCCCCCCCCCO,-3.64 40 | CCC(=O)OCC,-2.68 41 | CCCCCCCCC,3.13 42 | CC(=O)NC,-10 43 | CCCCCCCC=C,2.06 44 | c1ccc2cc(ccc2c1)O,-8.11 45 | c1cc(c(cc1Cl)Cl)Cl,-1.12 46 | C([C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O)O,-23.62 47 | CCCC(=O)OC,-2.83 48 | c1ccc(c(c1)C=O)O,-4.68 49 | C1CNC1,-5.56 50 | CCCNCCC,-3.65 51 | c1ccc(cc1)N,-5.49 52 | C(F)(F)(F)F,3.12 53 | CC[C@@H](C)CO,-4.42 54 | c1ccc(c(c1)O)I,-6.2 55 | COc1cccc(c1O)OC,-6.96 56 | CCC#C,-0.16 57 | c1ccc(cc1)C(F)(F)F,-0.25 58 | NN,-9.3 59 | Cc1ccccn1,-4.63 60 | CCNc1nc(nc(n1)Cl)NCC,-10.22 61 | c1ccc2c(c1)Oc3cc(c(cc3O2)Cl)Cl,-3.56 62 | CCCCCCCCN,-3.65 63 | N,-4.29 64 | c1ccc(c(c1)C(F)(F)F)C(F)(F)F,1.07 65 | COC(=O)c1ccc(cc1)O,-9.51 66 | CCCCCc1ccccc1,-0.23 67 | CC(F)F,-0.11 68 | c1ccc(cc1)n2c(=O)c(c(cn2)N)Cl,-16.43 69 | C=CC=C,0.56 70 | CN(C)C,-3.2 71 | CCCCCC(=O)N,-9.31 72 | CC(C)CO[N+](=O)[O-],-1.88 73 | c1ccc2c(c1)C(=O)c3cccc(c3C2=O)NCCO,-14.21 74 | C(CO[N+](=O)[O-])O,-8.18 75 | CCCCCCC(=O)C,-2.88 76 | CN1CCNCC1,-7.77 77 | CCN,-4.5 78 | C1C=CC=CC=C1,-0.99 79 | c1ccc2c(c1)Cc3ccccc3C2,-3.78 80 | CC(Cl)Cl,-0.84 81 | COc1cccc(c1)O,-7.66 82 | c1cc2cccc3c2c(c1)CC3,-3.15 83 | CCCCCCCCBr,0.52 84 | c1ccc(cc1)CO,-6.62 85 | c1c(c(=O)[nH]c(=O)[nH]1)Br,-18.17 86 | CCCC,2.1 87 | CCl,-0.55 88 | CC(C)CBr,-0.03 89 | CC(C)SC(C)C,-1.21 90 | CCCCCCC,2.67 91 | c1cnc[nH]1,-9.63 92 | c1cc2c(cc1Cl)Oc3cc(c(c(c3O2)Cl)Cl)Cl,-3.84 93 | CC[C@H](C)n1c(=O)c(c([nH]c1=O)C)Br,-9.73 94 | C(I)I,-2.49 95 | CCCN(CCC)C(=O)SCCC,-4.13 96 | C[N+](=O)[O-],-4.02 97 | CCOC,-2.1 98 | COC(CCl)(OC)OC,-4.59 99 | CC(C)C,2.3 100 | CC(C)CC(=O)O,-6.09 101 | CCOP(=O)(OCC)O/C(=C/Cl)/c1ccc(cc1Cl)Cl,-7.07 102 | CCCCl,-0.33 103 | CCCSCCC,-1.28 104 | CCC[C@H](CC)O,-4.06 105 | CC#N,-3.88 106 | CN(CC(F)(F)F)c1ccccc1,-1.92 107 | [C@@H](C(F)(F)F)(OC(F)F)Cl,0.1 108 | C=CCCC=C,1.01 109 | Cc1cccc(c1)C,-0.83 110 | CC(=O)OC,-3.13 111 | COC(c1ccccc1)(OC)OC,-4.04 112 | CCOC(=O)c1ccccc1,-3.64 113 | CCCS,-1.1 114 | CCCCCC(=O)C,-3.04 115 | CC1(Cc2cccc(c2O1)OC(=O)NC)C,-9.61 116 | c1ccc(cc1)CBr,-2.38 117 | CCCCCC(=O)OCC,-2.23 118 | CCCOC,-1.66 119 | CN1CCOCC1,-6.32 120 | c1cc(cc(c1)O)C#N,-9.65 121 | c1cc(c(cc1c2c(c(cc(c2Cl)Cl)Cl)Cl)Cl)Cl,-4.38 122 | CCCc1ccccc1,-0.53 123 | Cn1cnc2c1c(=O)n(c(=O)n2C)C,-12.64 124 | CNC,-4.29 125 | C(=C(F)F)(C(F)(F)F)F,2.93 126 | c1cc(ccc1O)Cl,-7.03 127 | C1CCNCC1,-5.11 128 | c1ccc2c(c1)ccc3c2cccc3,-3.88 129 | CI,-0.89 130 | COc1c(cc(c(c1O)OC)Cl)Cl,-6.44 131 | C(=C/Cl)\Cl,-0.78 132 | CCCCC,2.3 133 | CCCC#N,-3.64 134 | [C@@H](C(F)(F)F)(F)Br,0.5 135 | CC(C)Cc1cnccn1,-5.04 136 | CC[C@H](C)O[N+](=O)[O-],-1.82 137 | c1ccc(cc1)c2cc(ccc2Cl)Cl,-2.46 138 | c1ccc(cc1)c2cc(c(c(c2Cl)Cl)Cl)Cl,-3.48 139 | CC[C@@H](C)C(C)C,2.52 140 | C[C@H](CC(C)C)O,-3.73 141 | C1CCOCC1,-3.12 142 | C1CC1,0.75 143 | c1c(cc(c(c1Cl)Cl)Cl)c2cc(c(c(c2Cl)Cl)Cl)Cl,-3.17 144 | C=C(Cl)Cl,0.25 145 | CC(C)CO,-4.5 146 | CCCOC(=O)CC,-2.44 147 | C(C(Cl)(Cl)Cl)(Cl)(Cl)Cl,-0.64 148 | CSc1ccccc1,-2.73 149 | CCc1ccccc1O,-5.66 150 | CC(C)(C)Cl,1.09 151 | CC(=C)C=C,0.68 152 | Cc1ccc(cc1)C(C)C,-0.68 153 | Cn1ccnc1,-8.41 154 | C(CO)O,-9.3 155 | c1ccc(c(c1)Cl)Cl,-1.36 156 | c1c(=O)[nH]c(=O)[nH]c1Cl,-15.83 157 | CCCOC=O,-2.48 158 | c1ccc2c(c1)Oc3ccc(cc3O2)Cl,-3.1 159 | CCCCCC(=O)O,-6.21 160 | CCOC(=O)CCC(=O)OCC,-5.71 161 | Cc1ccnc(c1)C,-4.86 162 | C1CCC=CC1,0.14 163 | CN1CCN(CC1)C,-7.58 164 | c1cc(c(cc1c2cc(c(c(c2Cl)Cl)Cl)Cl)Cl)Cl,-3.04 165 | C1=CC(=O)C=CC1=O,-6.5 166 | COC(=O)CCl,-4 167 | CCCC=O,-3.18 168 | CCc1ccccc1,-0.79 169 | C(=C(Cl)Cl)Cl,-0.44 170 | CCN(CC)CC,-3.22 171 | c1cc2c(cc1Cl)Oc3c(c(c(c(c3Cl)Cl)Cl)Cl)O2,-4.15 172 | Cc1ccncc1C,-5.22 173 | c1(=O)[nH]c(=O)[nH]c(=O)[nH]1,-18.06 174 | c1ccc(cc1)C=O,-4.02 175 | c1ccnc(c1)Cl,-4.39 176 | C=CCCl,-0.57 177 | Cc1ccc(cc1)C(=O)C,-4.7 178 | C=O,-2.75 179 | Cc1ccccc1Cl,-1.14 180 | CC(=O)N1CCCC1,-9.8 181 | CC(OC)(OC)OC,-4.42 182 | CCCCc1ccccc1,-0.4 183 | CN(C)c1ccccc1,-3.45 184 | CC(C)OC,-2.01 185 | c12c(c(c(c(c1Cl)Cl)Cl)Cl)Oc3c(c(c(c(c3Cl)Cl)Cl)Cl)O2,-4.53 186 | c1(c(c(c(c(c1Cl)Cl)Cl)Cl)Cl)c2c(c(c(c(c2Cl)Cl)Cl)Cl)Cl,-2.98 187 | C(C(Cl)Cl)Cl,-1.99 188 | CNc1ccccc1,-4.69 189 | CC(C)OC(=O)C,-2.64 190 | c1ccccc1,-0.9 191 | c1cc(c(c(c1)Cl)Cl)Cl,-1.24 192 | CCOP(=S)(OCC)SCSc1ccc(cc1)Cl,-6.5 193 | COP(=S)(OC)SCn1c(=O)c2ccccc2nn1,-10.03 194 | c1ccc2c(c1)Oc3c(cc(c(c3O2)Cl)Cl)Cl,-4.05 195 | CC(=C)C(=C)C,0.4 196 | CCCCC=C,1.58 197 | S,-0.7 198 | CCOCC,-1.59 199 | CCNc1nc(nc(n1)SC)NC(C)C,-7.65 200 | CCCCOC(=O)c1ccc(cc1)O,-8.72 201 | CCCCCCOC(=O)C,-2.26 202 | C1CCC(=O)C1,-4.7 203 | CCCCC(=O)O,-6.16 204 | CCBr,-0.74 205 | Cc1ccc2cc(ccc2c1)C,-2.63 206 | CCCCCCO,-4.4 207 | c1ccc(cc1)c2ccccc2Cl,-2.69 208 | CC1=CCCCC1,0.67 209 | CCCCCCO[N+](=O)[O-],-1.66 210 | C(Br)(Br)Br,-2.13 211 | CCc1ccc(cc1)O,-6.13 212 | CCCOCCO,-6.4 213 | c1ccc(cc1)OC=O,-3.82 214 | c1c(c(=O)[nH]c(=O)[nH]1)I,-18.72 215 | CCCC(=O)O,-6.35 216 | COC(C(F)(F)F)(OC)OC,-0.8 217 | C1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O,-20.52 218 | C(F)(F)(F)Br,1.79 219 | CCCCO,-4.72 220 | c1ccc(cc1)F,-0.8 221 | CCOC(=O)C,-2.94 222 | CC(C)COC(=O)C(C)C,-1.69 223 | CC(C)(C)OC,-2.21 224 | C1=C[C@@H]([C@@H]2[C@H]1[C@@]3(C(=C([C@]2(C3(Cl)Cl)Cl)Cl)Cl)Cl)Cl,-2.55 225 | CCC(=O)CC,-3.41 226 | COC(=O)C(F)(F)F,-1.1 227 | c1ccc2ccccc2c1,-2.4 228 | c1cc(c(c(c1c2cc(c(c(c2Cl)Cl)Cl)Cl)Cl)Cl)Cl,-4.4 229 | CC(=O)Oc1ccccc1C(=O)O,-9.94 230 | CC(=O)C(C)(C)C,-3.11 231 | COS(=O)(=O)C,-4.87 232 | CCc1ccncc1,-4.73 233 | CC(C)NC(C)C,-3.22 234 | c1cc2c(cc1Cl)Oc3ccc(cc3O2)Cl,-3.67 235 | CCCCCCCN,-3.79 236 | CC1CCCC1,1.59 237 | CCC,2 238 | C[C@H]1CCCO1,-3.3 239 | CNC(=O)Oc1cccc2c1cccc2,-9.45 240 | c1cc(cc(c1)O)C=O,-9.52 241 | c1ccc2cc3ccccc3cc2c1,-3.95 242 | C(Cl)Cl,-1.31 243 | CC(C)(C)C(=O)OC,-2.4 244 | C([N+](=O)[O-])(Cl)(Cl)Cl,-1.45 245 | C1CC[S+2](C1)([O-])[O-],-8.61 246 | Cc1cccc(c1O)C,-5.26 247 | Cc1cccc(c1)O,-5.49 248 | c1ccc2c(c1)C(=O)c3c(ccc(c3C2=O)O)N,-9.53 249 | c1ccc2c(c1)C(=O)c3c(ccc(c3C2=O)N)N,-11.85 250 | CCCCCCCC(=O)C,-2.49 251 | CCCCN,-4.24 252 | CCCC(=O)OCC,-2.49 253 | Cc1ccc(cc1)N,-5.57 254 | CCCCCCI,0.08 255 | C(C(F)(Cl)Cl)(F)(F)Cl,1.77 256 | COP(=O)(OC)OC,-8.7 257 | c1cc(cc(c1)Cl)Cl,-0.98 258 | Cc1cc(c2ccccc2c1)C,-2.47 259 | CCCC(C)C,2.51 260 | CCOP(=S)(OCC)Oc1c(cc(c(n1)Cl)Cl)Cl,-5.04 261 | C(C(F)(F)F)Cl,0.06 262 | C=C,1.28 263 | CCCCCI,-0.14 264 | COC(OC)OC,-4.42 265 | CCCCCCCCCC,3.16 266 | C[C@@H](CO[N+](=O)[O-])O[N+](=O)[O-],-4.95 267 | CC=C,1.32 268 | Cc1c[nH]c2c1cccc2,-5.88 269 | COP(=O)([C@H](C(Cl)(Cl)Cl)O)OC,-12.74 270 | C1CCCCC1,1.23 271 | CC(=CCC/C(=C/CO)/C)C,-4.45 272 | CC(C)c1ccccc1,-0.3 273 | CC(C)C(C)C(C)C,2.56 274 | CC(C)C(=O)C,-3.24 275 | CCCCNCCCC,-3.24 276 | CCCCS,-0.99 277 | c1ccc2c(c1)Oc3c(c(c(c(c3Cl)Cl)Cl)Cl)O2,-3.81 278 | COc1c(c(c(c(c1Cl)C=O)Cl)OC)O,-8.68 279 | C1CCC(CC1)N,-4.59 280 | C(F)(F)Cl,-0.5 281 | COC(=O)c1ccc(cc1)[N+](=O)[O-],-6.88 282 | CC(=O)c1cccnc1,-8.26 283 | CC#C,-0.48 284 | CCCCCCCCC=O,-2.07 285 | CCC(=O)O,-6.46 286 | C(Cl)(Cl)Cl,-1.08 287 | Cc1cccc(c1C)C,-1.21 288 | C,2 289 | c1ccc(cc1)CCl,-1.93 290 | CC1CCCCC1,1.7 291 | Cc1cccs1,-1.38 292 | c1ccncc1,-4.69 293 | CCCCCl,-0.16 294 | C[C@H]1CC[C@@H](O1)C,-2.92 295 | Cc1ccc(c(c1)OC)O,-5.8 296 | C1[C@H]([C@@H]2[C@H]([C@H]1Cl)[C@]3(C(=C([C@@]2(C3(Cl)Cl)Cl)Cl)Cl)Cl)Cl,-3.44 297 | Cc1ccccc1,-0.9 298 | CC(C)COC=O,-2.22 299 | CCOC(=O)c1ccc(cc1)O,-9.2 300 | CCOCCOCC,-3.54 301 | CCCCCOC(=O)CC,-2.11 302 | CCCc1ccc(cc1)O,-5.21 303 | CC=C(C)C,1.31 304 | C(CCl)Cl,-1.79 305 | CCC(C)(C)CC,2.56 306 | Cc1cc2ccccc2cc1C,-2.78 307 | Cc1cccc(n1)C,-4.59 308 | COC(C(Cl)Cl)(F)F,-1.12 309 | CCOCCOC(=O)C,-5.31 310 | COc1cccc(c1)N,-7.29 311 | c1cc(cnc1)C=O,-7.1 312 | CCC(C)(C)O,-4.43 313 | CCc1cccc(c1N(COC)C(=O)CCl)CC,-8.21 314 | Cn1cccc1,-2.89 315 | COCOC,-2.93 316 | CCC(CC)O,-4.35 317 | CCCCCCCCCC(=O)C,-2.15 318 | C(CBr)Cl,-1.95 319 | c1ccc(cc1)I,-1.74 320 | CC1=CC(=O)CC(C1)(C)C,-5.18 321 | CCI,-0.74 322 | CCCc1ccc(c(c1)OC)O,-5.26 323 | CC(C)Br,-0.48 324 | Cc1ccc(cc1)Br,-1.39 325 | c1cc(ccc1C#N)O,-10.17 326 | CS(=O)(=O)C,-10.08 327 | CCc1cccc(c1)O,-6.25 328 | CC1=CC[C@H](C[C@@H]1O)C(=C)C,-4.44 329 | c1cc(ccc1Br)Br,-2.3 330 | COc1c(ccc(c1C(=O)O)Cl)Cl,-9.86 331 | CC/C=C\C,1.31 332 | CC,1.83 333 | COc1ccccc1OC,-5.33 334 | CCSCC,-1.46 335 | c1cc(cnc1)C#N,-6.75 336 | c1cc(c(cc1O)Cl)Cl,-7.29 337 | COc1ccccc1,-2.45 338 | Cc1ccc(c(c1)O)C,-5.91 339 | c1cc(ccc1Cl)Cl,-1.01 340 | C(F)Cl,-0.77 341 | CCCC=C,1.68 342 | c1cc(c(c(c1Cl)Cl)Cl)Cl,-1.34 343 | CCCCCC#C,0.6 344 | CCCCCCCCC(=O)C,-2.34 345 | c1ccc(cc1)Cl,-1.12 346 | CN(C)CCOC(c1ccccc1)c2ccccc2,-9.34 347 | CCCCC=O,-3.03 348 | c1ccc(cc1)Oc2ccccc2,-2.87 349 | C1CCC(=O)CC1,-4.91 350 | CCCC[N+](=O)[O-],-3.09 351 | c1cnccc1C=O,-7 352 | C(CCl)OCCCl,-4.23 353 | CC[N+](=O)[O-],-3.71 354 | c1cc(cnc1)Cl,-4.01 355 | CBr,-0.82 356 | CO,-5.1 357 | CCCCCCC=O,-2.67 358 | c1cc(c(c(c1)Cl)c2c(cccc2Cl)Cl)Cl,-2.28 359 | c1ccc(c(c1)N)[N+](=O)[O-],-7.37 360 | CN1CCCCC1,-3.88 361 | CCCCCCCC=O,-2.29 362 | c1ccc(cc1)[N+](=O)[O-],-4.12 363 | C[C@@H]1CC[C@H](C(=O)C1)C(C)C,-2.53 364 | C([C@@H]1[C@H]([C@@H]([C@H]([C@@H](O1)O)O)O)O)O,-25.47 365 | CF,-0.22 366 | CS(=O)C,-9.280000000000001 367 | c1ccc2c(c1)Oc3ccccc3O2,-3.15 368 | Cc1ccccc1N,-5.53 369 | CCCCBr,-0.4 370 | CCCCCCCCCO,-3.88 371 | Cc1ccncc1,-4.93 372 | C(=C(Cl)Cl)(Cl)Cl,0.1 373 | CC(C)(C)Br,0.84 374 | C=C(c1ccccc1)c2ccccc2,-2.78 375 | CCc1ccc(cc1)C,-0.95 376 | Cc1cccnc1,-4.77 377 | COCC(OC)(OC)OC,-5.73 378 | c1ccc-2c(c1)Cc3c2cccc3,-3.35 379 | CC(=O)N,-9.71 380 | COS(=O)(=O)OC,-5.1 381 | C(C(Cl)Cl)(Cl)Cl,-2.37 382 | COC(=O)C1CCCCC1,-3.3 383 | CCCCCCBr,0.18 384 | CCCCCCCBr,0.34 385 | c1ccc2c(c1)Oc3cccc(c3O2)Cl,-3.52 386 | COC(CC#N)(OC)OC,-6.4 387 | CC[C@H](C)Cl,0 388 | CCCCCCc1ccccc1,-0.04 389 | COc1cc(c(c(c1O)OC)Cl)C=O,-7.78 390 | c1cc(cc(c1)C(F)(F)F)C(F)(F)F,1.07 391 | c1ccc(cc1)Cn2ccnc2,-7.63 392 | c1ccc2c(c1)cccc2N,-7.28 393 | CCOC(=O)CC(=O)OCC,-6 394 | CC(=O)C1CC1,-4.61 395 | c1cc[nH]c1,-4.78 396 | c1cc(c(cc1c2ccc(cc2F)F)C(=O)O)O,-9.4 397 | CC1CCC(CC1)C,2.11 398 | C1CCC(CC1)O,-5.46 399 | CN(C)CCC=C1c2ccccc2CCc3c1cccc3,-7.43 400 | c1cc(ccc1O)F,-6.19 401 | c1ccc(c(c1)N)Cl,-4.91 402 | Cc1ccc(c(c1)C)C,-0.86 403 | CCc1ccccc1C,-0.85 404 | C[C@@H]1CC[C@H](CC1=O)C(=C)C,-3.75 405 | c1ccc(cc1)c2ccccc2,-2.7 406 | Cc1cccc(c1C)O,-6.16 407 | COP(=S)(OC)Oc1ccc(cc1)[N+](=O)[O-],-7.19 408 | CCOP(=S)(OCC)Oc1ccc(cc1)[N+](=O)[O-],-6.74 409 | CCN(CC)c1c(cc(c(c1[N+](=O)[O-])N)C(F)(F)F)[N+](=O)[O-],-5.66 410 | CSC,-1.61 411 | C[C@@H](c1cccc(c1)C(=O)c2ccccc2)C(=O)O,-10.78 412 | C1CCC(C1)O,-5.49 413 | CCCCC(=O)OC,-2.56 414 | CCCC(=C)C,1.47 415 | C[C@@H](c1ccc(c(c1)F)c2ccccc2)C(=O)O,-8.42 416 | CCCN(CCC)c1c(cc(cc1[N+](=O)[O-])S(=O)(=O)C)[N+](=O)[O-],-7.98 417 | C=CCl,-0.59 418 | Cc1ccc(cc1)C(=O)N(C)C,-9.76 419 | CCCC(=O)CCC,-2.92 420 | COC(=O)c1ccccc1,-3.92 421 | Cc1ccc(cc1)C=O,-4.27 422 | CCCC(=O)OCCC,-2.28 423 | C1CNCCN1,-7.4 424 | CCOP(=S)(OCC)S[C@@H](CCl)N1C(=O)c2ccccc2C1=O,-5.74 425 | CCOCCO,-6.69 426 | CCC(C)CC,2.51 427 | Cc1cnccn1,-5.51 428 | CCC[N+](=O)[O-],-3.34 429 | Cc1cc(cc(c1)C)C,-0.9 430 | c1c(c(=O)[nH]c(=O)[nH]1)F,-16.92 431 | CCO,-5 432 | Cc1ccc(c2c1cccc2)C,-2.82 433 | c1c2c(cc(c1Cl)Cl)Oc3cc(c(cc3O2)Cl)Cl,-3.37 434 | c1cc(c(c(c1)Cl)C#N)Cl,-4.71 435 | CCOC=O,-2.56 436 | c1c(c(cc(c1Cl)Cl)Cl)Cl,-1.34 437 | CCOC(OCC)Oc1ccccc1,-5.23 438 | c1cc(cc(c1)O)[N+](=O)[O-],-9.62 439 | CCCCCCCCO,-4.09 440 | CCC=C,1.38 441 | C(Cl)(Cl)(Cl)Cl,0.08 442 | c1ccc(cc1)CCO,-6.79 443 | CN(C)C(=O)Nc1ccccc1,-9.13 444 | CSSC,-1.83 445 | C1C=CC[C@@H]2[C@@H]1C(=O)N(C2=O)SC(Cl)(Cl)Cl,-9.01 446 | CC(=O)OCC(COC(=O)C)OC(=O)C,-8.84 447 | COC,-1.91 448 | CCCCCC,2.48 449 | C(CBr)Br,-2.33 450 | C(C(Cl)(Cl)Cl)(Cl)Cl,-1.23 451 | c1c(c(=O)[nH]c(=O)[nH]1)C(F)(F)F,-15.46 452 | Cc1cccc(c1N)C,-5.21 453 | CCCOC(=O)C,-2.79 454 | c1ccc2c(c1)cccn2,-5.72 455 | CCS,-1.14 456 | CCSSCC,-1.64 457 | c1ccsc1,-1.4 458 | CCc1cccc2c1cccc2,-2.4 459 | CCCC(=O)C,-3.52 460 | c1c(c(c(c(c1Cl)Cl)Cl)Cl)c2c(cc(c(c2Cl)Cl)Cl)Cl,-4.61 461 | CCC[N@@](CC1CC1)c2c(cc(cc2[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-],-2.45 462 | CC(=O)O,-6.69 463 | CC=O,-3.5 464 | c1cc(cc(c1)[N+](=O)[O-])N,-8.84 465 | CCCCC#C,0.29 466 | COc1ccccc1N,-6.12 467 | c1ccc(cc1)O,-6.6 468 | CCC#N,-3.84 469 | c1ccc2c(c1)cccc2O,-7.67 470 | CCCCOC(=O)C,-2.64 471 | CC(C)(/C=N\OC(=O)NC)SC,-9.84 472 | Cc1ccccc1O,-5.9 473 | CC(C)C=O,-2.86 474 | CCC(=O)N,-9.4 475 | CCCBr,-0.56 476 | CC(C)Cl,-0.25 477 | C(CCl)CCl,-1.89 478 | c1cc(ccc1[N+](=O)[O-])O,-10.64 479 | C[C@@H](CCl)Cl,-1.27 480 | c1cc(ccc1N)Cl,-5.9 481 | c1ccc2c(c1)C(=O)c3cccc(c3C2=O)N,-9.44 482 | Cc1cccnc1C,-4.82 483 | c1cnccc1C#N,-6.02 484 | CCOP(=S)(OCC)SCSCC,-4.37 485 | CC(=O)C1CCCCC1,-3.9 486 | Cc1ccccc1C=O,-3.93 487 | CC(=O)c1ccncc1,-7.62 488 | c1c2c(cc(c1Cl)Cl)Oc3c(c(c(c(c3Cl)Cl)Cl)Cl)O2,-3.71 489 | CC(=O)C,-3.8 490 | CC(=C)C,1.16 491 | c1cc(c(cc1Cl)c2cc(c(c(c2)Cl)Cl)Cl)Cl,-3.61 492 | CCCCC[N+](=O)[O-],-2.82 493 | CCC/C=C/C=O,-3.68 494 | CN(C)C(=O)c1ccc(cc1)[N+](=O)[O-],-11.95 495 | C1CCOC1,-3.47 496 | CCCCCCCC,2.88 497 | CCCN(CCC)c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-],-3.25 498 | CC(=CCC[C@](C)(C=C)OC(=O)C)C,-2.49 499 | C[C@@H](CCO[N+](=O)[O-])O[N+](=O)[O-],-4.29 500 | CC(C)OC(C)C,-0.53 501 | CCCCC(C)C,2.93 502 | c1(c(c(c(c(c1Cl)Cl)Cl)Cl)Cl)N(=O)=O,-5.22 503 | [C@@H](C(F)(F)F)(Cl)Br,-0.11 504 | CCCCOCCCC,-0.83 505 | CCCCCC1CCCC1,2.55 506 | CC(C)CC(C)C,2.83 507 | Cc1ccc(nc1)C,-4.72 508 | C/C=C/C=O,-4.22 509 | CCC[C@H](C)CC,2.71 510 | c1cc(c(c(c1)Cl)c2c(cc(cc2Cl)Cl)Cl)Cl,-1.96 511 | c1ccc(cc1)O[C@@H](C(F)F)F,-1.29 512 | COCCOC,-4.84 513 | CC[C@H](C)c1ccccc1,-0.45 514 | c1ccc(cc1)CCCO,-6.92 515 | CC[C@@H](C)c1cc(cc(c1O)[N+](=O)[O-])[N+](=O)[O-],-6.23 516 | COc1ccc(cc1)C(=O)OC,-5.33 517 | CCC(=O)Nc1ccc(c(c1)Cl)Cl,-7.78 518 | C[C@@H](c1ccc2cc(ccc2c1)OC)C(=O)O,-10.21 519 | C1(C(C(C1(F)F)(F)F)(F)F)(F)F,3.43 520 | CC(C)CCOC(=O)C,-2.21 521 | CCCCCCCl,0 522 | CC(C)CC(=O)C,-3.05 523 | CCCCCC=O,-2.81 524 | c1cc(cc(c1)Cl)N,-5.82 525 | C1COCCN1,-7.17 526 | CCOC(C)OCC,-3.28 527 | CCCC[N@](CC)c1c(cc(cc1[N+](=O)[O-])C(F)(F)F)[N+](=O)[O-],-3.51 528 | CS,-1.2 529 | C1[C@@H]2[C@H](COS(=O)O1)[C@@]3(C(=C([C@]2(C3(Cl)Cl)Cl)Cl)Cl)Cl,-4.23 530 | CC(=O)c1ccc(cc1)OC,-4.4 531 | C=CCO,-5.03 532 | CCSC,-1.5 533 | CCCCCOC(=O)C,-2.51 534 | c1c(cc(c(c1Cl)Cl)Cl)Cl,-1.62 535 | CC(=O)c1ccccc1,-4.58 536 | CCCl,-0.63 537 | CCCC1CCCC1,2.13 538 | c1c(cc(cc1Cl)Cl)Cl,-0.78 539 | CCCOC(=O)c1ccc(cc1)O,-9.37 540 | c1cc(cc(c1)Cl)O,-6.62 541 | CC(C)CCO,-4.42 542 | CCCCCN,-4.09 543 | Cc1c(c(=O)n(c(=O)[nH]1)C(C)(C)C)Cl,-11.14 544 | CC(C)CCC(C)(C)C,2.93 545 | CCCCOCCO,-6.25 546 | C1[C@@H]2[C@H]3[C@@H]([C@H]1[C@H]4[C@@H]2O4)[C@@]5(C(=C([C@]3(C5(Cl)Cl)Cl)Cl)Cl)Cl,-4.82 547 | c1ccc(cc1)C(=O)N,-11 548 | CC(C)[N+](=O)[O-],-3.13 549 | C(C(CO)O)O,-13.43 550 | CCCI,-0.53 551 | COCCN,-6.55 552 | C(C(Cl)(Cl)Cl)Cl,-1.43 553 | CCC(=O)OC,-2.93 554 | C1CCCC1,1.2 555 | CCc1cccnc1,-4.59 556 | Cc1cc(cnc1)C,-4.84 557 | COCCO,-6.619999999999999 558 | COC=O,-2.78 559 | c1ccc2cc(ccc2c1)N,-7.47 560 | Cc1c[nH]cn1,-10.27 561 | Cc1cccc(c1)[N+](=O)[O-],-3.45 562 | C(CCCl)CCl,-2.32 563 | CC(=O)CO[N+](=O)[O-],-5.99 564 | CC(C)(C)c1ccccc1,-0.44 565 | CCCCCC(=O)OC,-2.49 566 | C[C@@H](C(F)(F)F)O,-4.16 567 | CCCCCBr,-0.1 568 | CCCCCCC=C,1.92 569 | CC1=CC(=O)[C@@H](CC1)C(C)C,-4.51 570 | CC(C)O,-4.74 571 | CCCCCCN,-3.95 572 | C(CO[N+](=O)[O-])CO[N+](=O)[O-],-4.8 573 | Cc1ccc(c(c1)C)O,-6.01 574 | CCCCCO,-4.57 575 | CCC[C@@H](C)O,-4.39 576 | CCCC[C@@H](C)CC,2.97 577 | C[C@@H](c1ccc(cc1)CC(C)C)C(=O)O,-7 578 | CCOC(=O)C[C@H](C(=O)OCC)SP(=S)(OC)OC,-8.15 579 | Cc1ccc(cc1C)O,-6.5 580 | Cc1cc(ccc1Cl)O,-6.79 581 | CCCC/C=C/C,1.68 582 | CCCOCCC,-1.16 583 | C[C@@H]1CC[C@H]([C@@H](C1)O)C(C)C,-3.2 584 | CCNc1nc(nc(n1)SC)NC(C)(C)C,-6.68 585 | CC(C)CC(C)(C)C,2.89 586 | CCCCC(=O)CCCC,-2.64 587 | CCCCN(CC)C(=O)SCCC,-3.64 588 | CCCCCC=C,1.66 589 | CC(C)OC=O,-2.02 590 | CC(OC(=O)C)OC(=O)C,-4.97 591 | c1c(c(=O)[nH]c(=O)[nH]1)Cl,-17.74 592 | CC(=C)c1ccccc1,-1.24 593 | CCC(C)C,2.38 594 | CCCCO[N+](=O)[O-],-2.09 595 | c1ccc(cc1)Br,-1.46 596 | CC(Cl)(Cl)Cl,-0.19 597 | CC(=C)[C@H]1CCC(=CC1)C=O,-4.09 598 | Cc1ccccc1[N+](=O)[O-],-3.58 599 | CCCCCCCI,0.27 600 | c1cc2ccc3cccc4c3c2c(c1)cc4,-4.52 601 | CCCCCCl,-0.1 602 | CC(C)COC(=O)C,-2.36 603 | CCC(C)(C)C,2.51 604 | c1cc(ccc1N)N(=O)=O,-9.82 605 | COC(=O)CC#N,-6.72 606 | COc1ccc(cc1)N,-7.48 607 | CC(C)Cc1ccccc1,0.16 608 | c1ccc(cc1)c2c(cc(cc2Cl)Cl)Cl,-2.16 609 | CN,-4.55 610 | c1ccc(c(c1)O)Cl,-4.55 611 | c1ccc2c(c1)C(=O)c3ccc(cc3C2=O)N,-11.53 612 | C(=C\Cl)\Cl,-1.17 613 | CCCCC(=O)C,-3.28 614 | C(CO[N+](=O)[O-])O[N+](=O)[O-],-5.73 615 | c1ccc(c(c1)O)F,-5.29 616 | Cc1c(nc(nc1OC(=O)N(C)C)N(C)C)C,-9.41 617 | C=Cc1ccccc1,-1.24 618 | CCOP(=O)(OCC)OCC,-7.5 619 | C(C(F)(F)F)O,-4.31 620 | CCCCOC[C@H](C)O,-5.73 621 | CCCO,-4.85 622 | Cc1ccccc1C,-0.9 623 | CC(C)(C)C,2.51 624 | CCCC#C,0.01 625 | c1ccc2c(c1)C(=O)NC2=O,-9.61 626 | CCCCI,-0.25 627 | Cc1ccc(cc1)O,-6.13 628 | CC(C)I,-0.46 629 | COc1ccccc1O,-5.94 630 | C1CC=CC1,0.56 631 | C[C@H](C(F)(F)F)O,-4.2 632 | CCCN,-4.39 633 | c1ccc(c(c1)[N+](=O)[O-])O,-4.58 634 | Cc1cccc2c1cccc2,-2.44 635 | c1(c(c(c(c(c1Cl)Cl)Cl)Cl)Cl)Cl,-2.33 636 | CCCCC/C=C/C=O,-3.43 637 | CCCCCCC#C,0.71 638 | CCOP(=S)(OCC)Oc1cc(nc(n1)C(C)C)C,-6.48 639 | CCCCCCCC(=O)OC,-2.04 640 | C1CCNC1,-5.48 641 | c1cc(ccc1C=O)O,-8.83 642 | CCCCCCCCl,0.29 643 | C1COCCO1,-5.06 644 | -------------------------------------------------------------------------------- /example/count_brics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 72, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from rdkit import Chem\n", 10 | "from rdkit.Chem.BRICS import BRICSDecompose\n", 11 | "import pandas as pd\n", 12 | "\n", 13 | "from rdkit import RDLogger\n", 14 | "RDLogger.DisableLog('rdApp.*')" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 73, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "df = pd.read_csv('lipo.csv')" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 74, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "smiles = df['smiles']" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 75, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "data": { 42 | "text/plain": [ 43 | "0 Cn1c(CN2CCN(c3ccc(Cl)cc3)CC2)nc2ccccc21\n", 44 | "1 COc1cc(OC)c(S(=O)(=O)N2c3ccccc3CCC2C)cc1NC(=O)...\n", 45 | "2 COC(=O)[C@H](c1ccccc1Cl)N1CCc2sccc2C1\n", 46 | "3 O=C(NC1Cc2ccccc2N(C[C@@H](O)CO)C1=O)c1cc2cc(Cl...\n", 47 | "4 Cc1cccc(C[C@H](NC(=O)c2cc(C(C)(C)C)nn2C)C(=O)N...\n", 48 | " ... \n", 49 | "4195 CC(C)c1c(C(=O)NC2C3CC4CC(C3)CC2C4)cnn1-c1ccc(C...\n", 50 | "4196 Nc1ccc(OCCc2ccccc2)cc1\n", 51 | "4197 O=C(Nc1cccc(O)c1)c1ccc(OCCCN2CCCC2)cc1OCc1cccnc1\n", 52 | "4198 Cc1ccnc(NCc2c(O)ccc3ccccc23)c1\n", 53 | "4199 COc1cc(N2CC3CN(CCO)CC(C2)O3)ccc1Nc1ncc(Cl)c(-c...\n", 54 | "Name: smiles, Length: 4200, dtype: object" 55 | ] 56 | }, 57 | "execution_count": 75, 58 | "metadata": {}, 59 | "output_type": "execute_result" 60 | } 61 | ], 62 | "source": [ 63 | "smiles" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 76, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "def fragment_count(smiles):\n", 73 | " mol = Chem.MolFromSmiles(smiles)\n", 74 | " res = list(BRICSDecompose(mol))\n", 75 | " count = len(res)\n", 76 | " return count" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 77, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "ones = []\n", 86 | "counts = []\n", 87 | "for i in smiles:\n", 88 | " count = fragment_count(i)\n", 89 | " if count == 1:\n", 90 | " ones.append(count)\n", 91 | " counts.append(count)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 78, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "0.0380952380952381" 103 | ] 104 | }, 105 | "execution_count": 78, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "len(ones)/len(counts)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 79, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/plain": [ 122 | "160" 123 | ] 124 | }, 125 | "execution_count": 79, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [ 131 | "len(ones)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 80, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "df_dist = pd.DataFrame(counts)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 82, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "df_dist.to_csv('lipo_fragment.csv', index=False)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [] 158 | } 159 | ], 160 | "metadata": { 161 | "kernelspec": { 162 | "display_name": "Python [conda env:pytorch]", 163 | "language": "python", 164 | "name": "conda-env-pytorch-py" 165 | }, 166 | "language_info": { 167 | "codemirror_mode": { 168 | "name": "ipython", 169 | "version": 3 170 | }, 171 | "file_extension": ".py", 172 | "mimetype": "text/x-python", 173 | "name": "python", 174 | "nbconvert_exporter": "python", 175 | "pygments_lexer": "ipython3", 176 | "version": "3.7.10" 177 | } 178 | }, 179 | "nbformat": 4, 180 | "nbformat_minor": 4 181 | } 182 | -------------------------------------------------------------------------------- /example/fa_tox21.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## 加载数据集" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import numpy as np\n", 18 | "import pandas as pd\n", 19 | "from tqdm import tqdm\n", 20 | "from random import Random\n", 21 | "from collections import defaultdict\n", 22 | "\n", 23 | "import torch\n", 24 | "from torch_geometric.data import Data, InMemoryDataset\n", 25 | "from torch_geometric.data import DataLoader\n", 26 | "\n", 27 | "from rdkit import Chem\n", 28 | "from rdkit.Chem.BRICS import FindBRICSBonds\n", 29 | "from rdkit.Chem.Scaffolds import MurckoScaffold\n", 30 | "from rdkit import RDLogger" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "# -------------------------------------\n", 40 | "# attentive_fp fashion featurization\n", 41 | "# -------------------------------------\n", 42 | "def onehot_encoding(x, allowable_set):\n", 43 | " if x not in allowable_set:\n", 44 | " raise Exception(\"input {0} not in allowable set{1}:\".format(\n", 45 | " x, allowable_set))\n", 46 | " return [x == s for s in allowable_set]\n", 47 | "\n", 48 | "\n", 49 | "def onehot_encoding_unk(x, allowable_set):\n", 50 | " \"\"\"Maps inputs not in the allowable set to the last element.\"\"\"\n", 51 | " if x not in allowable_set:\n", 52 | " x = allowable_set[-1]\n", 53 | " return [x == s for s in allowable_set]\n", 54 | "\n", 55 | "\n", 56 | "def atom_attr(mol, explicit_H=False, use_chirality=True, pharmaco=True, scaffold=True):\n", 57 | " if pharmaco:\n", 58 | " mol = tag_pharmacophore(mol)\n", 59 | " if scaffold:\n", 60 | " mol = tag_scaffold(mol)\n", 61 | "\n", 62 | " feat = []\n", 63 | " for i, atom in enumerate(mol.GetAtoms()):\n", 64 | " results = onehot_encoding_unk(\n", 65 | " atom.GetSymbol(),\n", 66 | " ['B', 'C', 'N', 'O', 'F', 'Si', 'P', 'S', 'Cl', 'As', 'Se', 'Br', 'Te', 'I', 'At', 'other'\n", 67 | " ]) + onehot_encoding_unk(atom.GetDegree(),\n", 68 | " [0, 1, 2, 3, 4, 5, 'other']) + \\\n", 69 | " [atom.GetFormalCharge(), atom.GetNumRadicalElectrons()] + \\\n", 70 | " onehot_encoding_unk(atom.GetHybridization(), [\n", 71 | " Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2,\n", 72 | " Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.SP3D,\n", 73 | " Chem.rdchem.HybridizationType.SP3D2, 'other'\n", 74 | " ]) + [atom.GetIsAromatic()]\n", 75 | " if not explicit_H:\n", 76 | " results = results + onehot_encoding_unk(atom.GetTotalNumHs(),\n", 77 | " [0, 1, 2, 3, 4])\n", 78 | " if use_chirality:\n", 79 | " try:\n", 80 | " results = results + onehot_encoding_unk(\n", 81 | " atom.GetProp('_CIPCode'),\n", 82 | " ['R', 'S']) + [atom.HasProp('_ChiralityPossible')]\n", 83 | " # print(one_of_k_encoding_unk(atom.GetProp('_CIPCode'), ['R', 'S']) + [atom.HasProp('_ChiralityPossible')])\n", 84 | " except:\n", 85 | " results = results + [0, 0] + [atom.HasProp('_ChiralityPossible')]\n", 86 | " if pharmaco:\n", 87 | " results = results + [int(atom.GetProp('Hbond_donor'))] + [int(atom.GetProp('Hbond_acceptor'))] + \\\n", 88 | " [int(atom.GetProp('Basic'))] + [int(atom.GetProp('Acid'))] + \\\n", 89 | " [int(atom.GetProp('Halogen'))]\n", 90 | " if scaffold:\n", 91 | " results = results + [int(atom.GetProp('Scaffold'))]\n", 92 | " feat.append(results)\n", 93 | "\n", 94 | " return np.array(feat)\n", 95 | "\n", 96 | "\n", 97 | "def bond_attr(mol, use_chirality=True):\n", 98 | " feat = []\n", 99 | " index = []\n", 100 | " n = mol.GetNumAtoms()\n", 101 | " for i in range(n):\n", 102 | " for j in range(n):\n", 103 | " if i != j:\n", 104 | " bond = mol.GetBondBetweenAtoms(i, j)\n", 105 | " if bond is not None:\n", 106 | " bt = bond.GetBondType()\n", 107 | " bond_feats = [\n", 108 | " bt == Chem.rdchem.BondType.SINGLE, bt == Chem.rdchem.BondType.DOUBLE,\n", 109 | " bt == Chem.rdchem.BondType.TRIPLE, bt == Chem.rdchem.BondType.AROMATIC,\n", 110 | " bond.GetIsConjugated(),\n", 111 | " bond.IsInRing()\n", 112 | " ]\n", 113 | " if use_chirality:\n", 114 | " bond_feats = bond_feats + onehot_encoding_unk(\n", 115 | " str(bond.GetStereo()),\n", 116 | " [\"STEREONONE\", \"STEREOANY\", \"STEREOZ\", \"STEREOE\"])\n", 117 | " feat.append(bond_feats)\n", 118 | " index.append([i, j])\n", 119 | "\n", 120 | " return np.array(index), np.array(feat)\n", 121 | "\n", 122 | "\n", 123 | "def bond_break(mol):\n", 124 | " results = np.array(sorted(list(FindBRICSBonds(mol))), dtype=np.long)\n", 125 | "\n", 126 | " if results.size == 0:\n", 127 | " cluster_idx = []\n", 128 | " Chem.rdmolops.GetMolFrags(mol, asMols=True, frags=cluster_idx)\n", 129 | " fra_edge_index, fra_edge_attr = bond_attr(mol)\n", 130 | "\n", 131 | " else:\n", 132 | " bond_to_break = results[:, 0, :]\n", 133 | " bond_to_break = bond_to_break.tolist()\n", 134 | " with Chem.RWMol(mol) as rwmol:\n", 135 | " for i in bond_to_break:\n", 136 | " rwmol.RemoveBond(*i)\n", 137 | " rwmol = rwmol.GetMol()\n", 138 | " cluster_idx = []\n", 139 | " Chem.rdmolops.GetMolFrags(rwmol, asMols=True, sanitizeFrags=False, frags=cluster_idx)\n", 140 | " fra_edge_index, fra_edge_attr = bond_attr(rwmol)\n", 141 | " cluster_idx = torch.LongTensor(cluster_idx)\n", 142 | "\n", 143 | " return fra_edge_index, fra_edge_attr, cluster_idx" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 3, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "# ---------------------------------------------\n", 153 | "# Scaffold and pharmacophore information utils\n", 154 | "# ---------------------------------------------\n", 155 | "# tag pharmoco features to each atom\n", 156 | "fun_smarts = {\n", 157 | " 'Hbond_donor': '[$([N;!H0;v3,v4&+1]),$([O,S;H1;+0]),n&H1&+0]',\n", 158 | " 'Hbond_acceptor': '[$([O,S;H1;v2;!$(*-*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$([N;v3;!$(N-*=[O,N,P,S])]),n&X2&H0&+0,$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]',\n", 159 | " 'Basic': '[#7;+,$([N;H2&+0][$([C,a]);!$([C,a](=O))]),$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))]),$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))]),$([n;X2;+0;-0])]',\n", 160 | " 'Acid': '[C,S](=[O,S,P])-[O;H1,-1]',\n", 161 | " 'Halogen': '[F,Cl,Br,I]'\n", 162 | " }\n", 163 | "FunQuery = dict([(pharmaco, Chem.MolFromSmarts(s)) for (pharmaco, s) in fun_smarts.items()])\n", 164 | "\n", 165 | "\n", 166 | "def tag_pharmacophore(mol):\n", 167 | " for fungrp, qmol in FunQuery.items():\n", 168 | " matches = mol.GetSubstructMatches(qmol)\n", 169 | " match_idxes = []\n", 170 | " for mat in matches:\n", 171 | " match_idxes.extend(mat)\n", 172 | " for i, atom in enumerate(mol.GetAtoms()):\n", 173 | " tag = '1' if i in match_idxes else '0'\n", 174 | " atom.SetProp(fungrp, tag)\n", 175 | " return mol\n", 176 | "\n", 177 | "\n", 178 | "# tag scaffold information to each atom\n", 179 | "def tag_scaffold(mol):\n", 180 | " core = MurckoScaffold.GetScaffoldForMol(mol)\n", 181 | " match_idxes = mol.GetSubstructMatch(core)\n", 182 | " for i, atom in enumerate(mol.GetAtoms()):\n", 183 | " tag = '1' if i in match_idxes else '0'\n", 184 | " atom.SetProp('Scaffold', tag)\n", 185 | " return mol" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 4, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "# ---------------------------------\n", 195 | "# data and dataset\n", 196 | "# ---------------------------------\n", 197 | "class MolData(Data):\n", 198 | " def __init__(self, fra_edge_index=None, fra_edge_attr=None, cluster_index=None, **kwargs):\n", 199 | " super(MolData, self).__init__(**kwargs)\n", 200 | " self.cluster_index = cluster_index\n", 201 | " self.fra_edge_index = fra_edge_index\n", 202 | " self.fra_edge_attr = fra_edge_attr\n", 203 | "\n", 204 | " def __inc__(self, key, value, *args, **kwargs):\n", 205 | " if key == 'cluster_index':\n", 206 | " return int(self.cluster_index.max()) + 1\n", 207 | " else:\n", 208 | " return super().__inc__(key, value, *args, **kwargs)\n", 209 | "\n", 210 | "\n", 211 | "class MolDataset(InMemoryDataset):\n", 212 | "\n", 213 | " def __init__(self, root, dataset, task_type, tasks, logger=None,\n", 214 | " transform=None, pre_transform=None, pre_filter=None):\n", 215 | "\n", 216 | " self.tasks = tasks\n", 217 | " self.dataset = dataset\n", 218 | " self.task_type = task_type\n", 219 | "\n", 220 | " super(MolDataset, self).__init__(root, transform, pre_transform, pre_filter)\n", 221 | " self.data, self.slices = torch.load(self.processed_paths[0])\n", 222 | "\n", 223 | " @property\n", 224 | " def raw_file_names(self):\n", 225 | " return ['{}.csv'.format(self.dataset)]\n", 226 | "\n", 227 | " @property\n", 228 | " def processed_file_names(self):\n", 229 | " return ['{}.pt'.format(self.dataset)]\n", 230 | "\n", 231 | " def download(self):\n", 232 | " pass\n", 233 | "\n", 234 | " def process(self):\n", 235 | " df = pd.read_csv(self.raw_paths[0])\n", 236 | " smilesList = df.smiles.values\n", 237 | " print(f'number of all smiles: {len(smilesList)}')\n", 238 | " remained_smiles = []\n", 239 | " canonical_smiles_list = []\n", 240 | " for smiles in smilesList:\n", 241 | " try:\n", 242 | " canonical_smiles_list.append(Chem.MolToSmiles(Chem.MolFromSmiles(smiles), isomericSmiles=True))\n", 243 | " remained_smiles.append(smiles)\n", 244 | " except:\n", 245 | " print(f'not successfully processed smiles: {smiles}')\n", 246 | " pass\n", 247 | " print(f'number of successfully processed smiles: {len(remained_smiles)}')\n", 248 | "\n", 249 | " df = df[df[\"smiles\"].isin(remained_smiles)].reset_index()\n", 250 | " target = df[self.tasks].values\n", 251 | " smilesList = df.smiles.values\n", 252 | " data_list = []\n", 253 | "\n", 254 | " for i, smi in enumerate(tqdm(smilesList)):\n", 255 | "\n", 256 | " mol = Chem.MolFromSmiles(smi)\n", 257 | " data = self.mol2graph(mol)\n", 258 | "\n", 259 | " if data is not None:\n", 260 | " label = target[i]\n", 261 | " label[np.isnan(label)] = 666\n", 262 | " data.y = torch.LongTensor([label])\n", 263 | " if self.task_type == 'regression':\n", 264 | " data.y = torch.FloatTensor([label])\n", 265 | " data_list.append(data)\n", 266 | "\n", 267 | " if self.pre_filter is not None:\n", 268 | " data_list = [data for data in data_list if self.pre_filter(data)]\n", 269 | " if self.pre_transform is not None:\n", 270 | " data_list = [self.pre_transform(data) for data in data_list]\n", 271 | "\n", 272 | " data, slices = self.collate(data_list)\n", 273 | " torch.save((data, slices), self.processed_paths[0])\n", 274 | "\n", 275 | " def mol2graph(self, mol):\n", 276 | " smiles = Chem.MolToSmiles(mol)\n", 277 | " if mol is None: return None\n", 278 | " node_attr = atom_attr(mol)\n", 279 | " edge_index, edge_attr = bond_attr(mol)\n", 280 | " fra_edge_index, fra_edge_attr, cluster_index = bond_break(mol)\n", 281 | " data = MolData(\n", 282 | " x=torch.FloatTensor(node_attr),\n", 283 | " edge_index=torch.LongTensor(edge_index).t(),\n", 284 | " edge_attr=torch.FloatTensor(edge_attr),\n", 285 | " fra_edge_index=torch.LongTensor(fra_edge_index).t(),\n", 286 | " fra_edge_attr=torch.FloatTensor(fra_edge_attr),\n", 287 | " cluster_index=torch.LongTensor(cluster_index),\n", 288 | " y=None,\n", 289 | " smiles=smiles,\n", 290 | " )\n", 291 | " return data" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": {}, 297 | "source": [ 298 | "## 定义模型" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 5, 304 | "metadata": {}, 305 | "outputs": [], 306 | "source": [ 307 | "import torch\n", 308 | "from torch import nn\n", 309 | "import torch.nn.functional as F\n", 310 | "from torch.nn import Linear, Sequential, Parameter, Bilinear\n", 311 | "\n", 312 | "from torch_scatter import scatter\n", 313 | "from torch_geometric.nn import global_add_pool, GATConv\n", 314 | "from torch_geometric.nn.conv import MessagePassing\n", 315 | "from torch_geometric.nn.inits import glorot, reset\n", 316 | "from torch_geometric.nn.pool.pool import pool_batch\n", 317 | "from torch_geometric.nn.pool.consecutive import consecutive_cluster" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 6, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "# ---------------------------------------\n", 327 | "# Attention layers\n", 328 | "# ---------------------------------------\n", 329 | "class FeatureAttention(nn.Module):\n", 330 | " def __init__(self, channels, reduction):\n", 331 | " super().__init__()\n", 332 | " self.mlp = Sequential(\n", 333 | " Linear(channels, channels // reduction, bias=False),\n", 334 | " nn.ReLU(inplace=True),\n", 335 | " Linear(channels // reduction, channels, bias=False),\n", 336 | " )\n", 337 | "\n", 338 | " self.reset_parameters()\n", 339 | "\n", 340 | " def reset_parameters(self):\n", 341 | " reset(self.mlp)\n", 342 | "\n", 343 | " def forward(self, x, batch, size=None):\n", 344 | " max_result = scatter(x, batch, dim=0, dim_size=size, reduce='max')\n", 345 | " sum_result = scatter(x, batch, dim=0, dim_size=size, reduce='sum')\n", 346 | " max_out = self.mlp(max_result)\n", 347 | " sum_out = self.mlp(sum_result)\n", 348 | " y = torch.sigmoid(max_out + sum_out)\n", 349 | " y_ = y\n", 350 | " y = y[batch]\n", 351 | " return x * y, y_" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 7, 357 | "metadata": {}, 358 | "outputs": [], 359 | "source": [ 360 | "# ---------------------------------------\n", 361 | "# Neural tensor networks conv\n", 362 | "# ---------------------------------------\n", 363 | "class NTNConv(MessagePassing):\n", 364 | "\n", 365 | " def __init__(self, in_channels, out_channels, slices, dropout, edge_dim=None, **kwargs):\n", 366 | " kwargs.setdefault('aggr', 'add')\n", 367 | " super(NTNConv, self).__init__(node_dim=0, **kwargs)\n", 368 | "\n", 369 | " self.in_channels = in_channels\n", 370 | " self.out_channels = out_channels\n", 371 | " self.slices = slices\n", 372 | " self.dropout = dropout\n", 373 | " self.edge_dim = edge_dim\n", 374 | "\n", 375 | " self.weight_node = Parameter(torch.Tensor(in_channels,\n", 376 | " out_channels))\n", 377 | " if edge_dim is not None:\n", 378 | " self.weight_edge = Parameter(torch.Tensor(edge_dim,\n", 379 | " out_channels))\n", 380 | " else:\n", 381 | " self.weight_edge = self.register_parameter('weight_edge', None)\n", 382 | "\n", 383 | " self.bilinear = Bilinear(out_channels, out_channels, slices, bias=False)\n", 384 | "\n", 385 | " if self.edge_dim is not None:\n", 386 | " self.linear = Linear(3 * out_channels, slices)\n", 387 | " else:\n", 388 | " self.linear = Linear(2 * out_channels, slices)\n", 389 | "\n", 390 | " self._alpha = None\n", 391 | "\n", 392 | " self.reset_parameters()\n", 393 | "\n", 394 | " def reset_parameters(self):\n", 395 | " glorot(self.weight_node)\n", 396 | " glorot(self.weight_edge)\n", 397 | " self.bilinear.reset_parameters()\n", 398 | " self.linear.reset_parameters()\n", 399 | "\n", 400 | " def forward(self, x, edge_index, edge_attr=None, return_attention_weights=None):\n", 401 | "\n", 402 | " x = torch.matmul(x, self.weight_node)\n", 403 | "\n", 404 | " if self.weight_edge is not None:\n", 405 | " assert edge_attr is not None\n", 406 | " edge_attr = torch.matmul(edge_attr, self.weight_edge)\n", 407 | "\n", 408 | " out = self.propagate(edge_index, x=x, edge_attr=edge_attr)\n", 409 | "\n", 410 | " alpha = self._alpha\n", 411 | " self._alpha = None\n", 412 | "\n", 413 | " if isinstance(return_attention_weights, bool):\n", 414 | " assert alpha is not None\n", 415 | " return out, (edge_index, alpha)\n", 416 | " else:\n", 417 | " return out\n", 418 | "\n", 419 | " def message(self, x_i, x_j, edge_attr):\n", 420 | " score = self.bilinear(x_i, x_j)\n", 421 | " if edge_attr is not None:\n", 422 | " vec = torch.cat((x_i, edge_attr, x_j), 1)\n", 423 | " block_score = self.linear(vec) # bias already included\n", 424 | " else:\n", 425 | " vec = torch.cat((x_i, x_j), 1)\n", 426 | " block_score = self.linear(vec)\n", 427 | " scores = score + block_score\n", 428 | " alpha = torch.tanh(scores)\n", 429 | " self._alpha = alpha\n", 430 | " alpha = F.dropout(alpha, p=self.dropout, training=self.training)\n", 431 | "\n", 432 | " dim_split = self.out_channels // self.slices\n", 433 | " out = x_j.view(-1, self.slices, dim_split)\n", 434 | "\n", 435 | " out = out * alpha.view(-1, self.slices, 1)\n", 436 | " out = out.view(-1, self.out_channels)\n", 437 | " return out\n", 438 | "\n", 439 | " def __repr__(self):\n", 440 | " return '{}({}, {}, slices={})'.format(self.__class__.__name__,\n", 441 | " self.in_channels,\n", 442 | " self.out_channels, self.slices)" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 8, 448 | "metadata": {}, 449 | "outputs": [], 450 | "source": [ 451 | "# ---------------------------------------\n", 452 | "# HiGNN backbone\n", 453 | "# ---------------------------------------\n", 454 | "class HiGNN(torch.nn.Module):\n", 455 | " \"\"\"Hierarchical informative graph neural network for molecular representation.\n", 456 | "\n", 457 | " \"\"\"\n", 458 | "\n", 459 | " def __init__(self, in_channels, hidden_channels, out_channels, edge_dim, num_layers,\n", 460 | " slices, dropout, f_att=False, r=4, brics=True, cl=False):\n", 461 | " super(HiGNN, self).__init__()\n", 462 | "\n", 463 | " self.hidden_channels = hidden_channels\n", 464 | " self.num_layers = num_layers\n", 465 | " self.dropout = dropout\n", 466 | "\n", 467 | " self.f_att = f_att\n", 468 | " self.brics = brics\n", 469 | " self.cl = cl\n", 470 | "\n", 471 | " # atom feature transformation\n", 472 | " self.lin_a = Linear(in_channels, hidden_channels)\n", 473 | " self.lin_b = Linear(edge_dim, hidden_channels)\n", 474 | "\n", 475 | " # convs block\n", 476 | " self.atom_convs = torch.nn.ModuleList()\n", 477 | " for _ in range(num_layers):\n", 478 | " conv = NTNConv(hidden_channels, hidden_channels, slices=slices,\n", 479 | " dropout=dropout, edge_dim=hidden_channels)\n", 480 | " self.atom_convs.append(conv)\n", 481 | "\n", 482 | " self.lin_gate = Linear(3 * hidden_channels, hidden_channels)\n", 483 | "\n", 484 | " if self.f_att:\n", 485 | " self.feature_att = FeatureAttention(channels=hidden_channels, reduction=r)\n", 486 | "\n", 487 | " if self.brics:\n", 488 | " # mol-fra attention\n", 489 | " self.cross_att = GATConv(hidden_channels, hidden_channels, heads=4,\n", 490 | " dropout=dropout, add_self_loops=False,\n", 491 | " negative_slope=0.01, concat=False)\n", 492 | "\n", 493 | " if self.brics:\n", 494 | " self.out = Linear(2 * hidden_channels, out_channels)\n", 495 | " else:\n", 496 | " self.out = Linear(hidden_channels, out_channels)\n", 497 | "\n", 498 | " if self.cl:\n", 499 | " self.lin_project = Linear(hidden_channels, int(hidden_channels/2))\n", 500 | "\n", 501 | " self.reset_parameters()\n", 502 | "\n", 503 | " def reset_parameters(self):\n", 504 | "\n", 505 | " self.lin_a.reset_parameters()\n", 506 | " self.lin_b.reset_parameters()\n", 507 | "\n", 508 | " for conv in self.atom_convs:\n", 509 | " conv.reset_parameters()\n", 510 | "\n", 511 | " self.lin_gate.reset_parameters()\n", 512 | "\n", 513 | " if self.f_att:\n", 514 | " self.feature_att.reset_parameters()\n", 515 | "\n", 516 | " if self.brics:\n", 517 | " self.cross_att.reset_parameters()\n", 518 | "\n", 519 | " self.out.reset_parameters()\n", 520 | "\n", 521 | " if self.cl:\n", 522 | " self.lin_project.reset_parameters()\n", 523 | "\n", 524 | " def forward(self, data):\n", 525 | " # get mol input\n", 526 | " x = data.x\n", 527 | " edge_index = data.edge_index\n", 528 | " edge_attr = data.edge_attr\n", 529 | " batch = data.batch\n", 530 | "\n", 531 | " x = F.relu(self.lin_a(x)) # (N, 46) -> (N, hidden_channels)\n", 532 | " edge_attr = F.relu(self.lin_b(edge_attr)) # (N, 10) -> (N, hidden_channels)\n", 533 | "\n", 534 | " fa = []\n", 535 | " # mol conv block\n", 536 | " for i in range(0, self.num_layers):\n", 537 | " h = F.relu(self.atom_convs[i](x, edge_index, edge_attr))\n", 538 | " beta = self.lin_gate(torch.cat([x, h, x - h], 1)).sigmoid()\n", 539 | " x = beta * x + (1 - beta) * h\n", 540 | " if self.f_att:\n", 541 | " x, y_ = self.feature_att(x, batch)\n", 542 | " fa.append(y_)\n", 543 | "\n", 544 | " mol_vec = global_add_pool(x, batch).relu_()\n", 545 | "\n", 546 | " if self.brics:\n", 547 | " # get fragment input\n", 548 | " fra_x = data.x\n", 549 | " fra_edge_index = data.fra_edge_index\n", 550 | " fra_edge_attr = data.fra_edge_attr\n", 551 | " cluster = data.cluster_index\n", 552 | "\n", 553 | " fra_x = F.relu(self.lin_a(fra_x)) # (N, 46) -> (N, hidden_channels)\n", 554 | " fra_edge_attr = F.leaky_relu_(self.lin_b(fra_edge_attr)) # (N, 10) -> (N, hidden_channels)\n", 555 | "\n", 556 | " # fragment convs block\n", 557 | " for i in range(0, self.num_layers):\n", 558 | " fra_h = F.relu(self.atom_convs[i](fra_x, fra_edge_index, fra_edge_attr))\n", 559 | " beta = self.lin_gate(torch.cat([fra_x, fra_h, fra_x - fra_h], 1)).sigmoid()\n", 560 | " fra_x = beta * fra_x + (1 - beta) * fra_h\n", 561 | " if self.f_att:\n", 562 | " fra_x, _ = self.feature_att(fra_x, cluster)\n", 563 | "\n", 564 | " fra_x = global_add_pool(fra_x, cluster).relu_()\n", 565 | "\n", 566 | " # get fragment batch\n", 567 | " cluster, perm = consecutive_cluster(cluster)\n", 568 | " fra_batch = pool_batch(perm, data.batch)\n", 569 | "\n", 570 | " # molecule-fragment attention\n", 571 | " row = torch.arange(fra_batch.size(0), device=batch.device)\n", 572 | " mol_fra_index = torch.stack([row, fra_batch], dim=0)\n", 573 | " fra_vec = self.cross_att((fra_x, mol_vec), mol_fra_index)\n", 574 | " fra_vec = fra_vec.relu()\n", 575 | "\n", 576 | " vectors_concat = list()\n", 577 | " vectors_concat.append(mol_vec)\n", 578 | " vectors_concat.append(fra_vec)\n", 579 | "\n", 580 | " out = torch.cat(vectors_concat, 1)\n", 581 | "\n", 582 | " # molecule-fragment contrastive\n", 583 | " if self.cl:\n", 584 | " out = F.dropout(out, p=self.dropout, training=self.training)\n", 585 | " return self.out(out), self.lin_project(mol_vec).relu_(), self.lin_project(fra_vec).relu_()\n", 586 | " else:\n", 587 | " out = F.dropout(out, p=self.dropout, training=self.training)\n", 588 | " return self.out(out), fa\n", 589 | "\n", 590 | " else:\n", 591 | " assert self.cl is False\n", 592 | " out = F.dropout(mol_vec, p=self.dropout, training=self.training)\n", 593 | " return self.out(out)" 594 | ] 595 | }, 596 | { 597 | "cell_type": "markdown", 598 | "metadata": {}, 599 | "source": [ 600 | "## 加载模型" 601 | ] 602 | }, 603 | { 604 | "cell_type": "code", 605 | "execution_count": 9, 606 | "metadata": {}, 607 | "outputs": [], 608 | "source": [ 609 | "def load_best_result(model):\n", 610 | " best_ckpt_path = 'E:/3-Code/Jupternote book/HiGNN_Vis/tox21_best_ckpt.pth'\n", 611 | " ckpt = torch.load(best_ckpt_path, map_location=torch.device('cpu'))\n", 612 | " model.load_state_dict(ckpt['model'])\n", 613 | "\n", 614 | " return model" 615 | ] 616 | }, 617 | { 618 | "cell_type": "code", 619 | "execution_count": 12, 620 | "metadata": {}, 621 | "outputs": [ 622 | { 623 | "name": "stdout", 624 | "output_type": "stream", 625 | "text": [ 626 | "HiGNN(\n", 627 | " (lin_a): Linear(in_features=46, out_features=256, bias=True)\n", 628 | " (lin_b): Linear(in_features=10, out_features=256, bias=True)\n", 629 | " (atom_convs): ModuleList(\n", 630 | " (0): NTNConv(256, 256, slices=2)\n", 631 | " (1): NTNConv(256, 256, slices=2)\n", 632 | " (2): NTNConv(256, 256, slices=2)\n", 633 | " )\n", 634 | " (lin_gate): Linear(in_features=768, out_features=256, bias=True)\n", 635 | " (feature_att): FeatureAttention(\n", 636 | " (mlp): Sequential(\n", 637 | " (0): Linear(in_features=256, out_features=64, bias=False)\n", 638 | " (1): ReLU(inplace=True)\n", 639 | " (2): Linear(in_features=64, out_features=256, bias=False)\n", 640 | " )\n", 641 | " )\n", 642 | " (cross_att): GATConv(256, 256, heads=4)\n", 643 | " (out): Linear(in_features=512, out_features=24, bias=True)\n", 644 | ")\n" 645 | ] 646 | } 647 | ], 648 | "source": [ 649 | "# ---------------------------------------\n", 650 | "# Build HiGNN \n", 651 | "# ---------------------------------------\n", 652 | "model = HiGNN(in_channels=46,\n", 653 | " hidden_channels=256,\n", 654 | " out_channels=24,\n", 655 | " edge_dim=10,\n", 656 | " num_layers=3,\n", 657 | " dropout=0.5,\n", 658 | " slices=2,\n", 659 | " f_att=True,\n", 660 | " r=4,\n", 661 | " brics=True,\n", 662 | " cl=False)\n", 663 | "\n", 664 | "model = load_best_result(model)\n", 665 | "print(model)" 666 | ] 667 | }, 668 | { 669 | "cell_type": "markdown", 670 | "metadata": {}, 671 | "source": [ 672 | "## 进行预测" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": 13, 678 | "metadata": {}, 679 | "outputs": [], 680 | "source": [ 681 | "import csv\n", 682 | "def get_header(path):\n", 683 | " with open(path) as f:\n", 684 | " header = next(csv.reader(f))\n", 685 | "\n", 686 | " return header\n", 687 | "\n", 688 | "\n", 689 | "def get_task_names(path, use_compound_names=False):\n", 690 | " index = 2 if use_compound_names else 1\n", 691 | " task_names = get_header(path)[index:]\n", 692 | "\n", 693 | " return task_names\n", 694 | "\n", 695 | "task_names = get_task_names('E:/3-Code/Jupternote book/HiGNN_Vis/raw/tox21.csv')" 696 | ] 697 | }, 698 | { 699 | "cell_type": "code", 700 | "execution_count": 14, 701 | "metadata": {}, 702 | "outputs": [ 703 | { 704 | "name": "stdout", 705 | "output_type": "stream", 706 | "text": [ 707 | "7831\n" 708 | ] 709 | } 710 | ], 711 | "source": [ 712 | "path = 'E:/3-Code/Jupternote book/HiGNN_Vis'\n", 713 | "dataset = 'tox21'\n", 714 | "task_type = 'classification'\n", 715 | "tasks = task_names\n", 716 | "tox21 = MolDataset(root=path, dataset=dataset, task_type=task_type, tasks=tasks)\n", 717 | "print(len(tox21))" 718 | ] 719 | }, 720 | { 721 | "cell_type": "code", 722 | "execution_count": 15, 723 | "metadata": {}, 724 | "outputs": [ 725 | { 726 | "data": { 727 | "text/plain": [ 728 | "784" 729 | ] 730 | }, 731 | "execution_count": 15, 732 | "metadata": {}, 733 | "output_type": "execute_result" 734 | } 735 | ], 736 | "source": [ 737 | "seed = 2029\n", 738 | "random = Random(seed)\n", 739 | "indices = list(range(len(tox21)))\n", 740 | "random.seed(seed)\n", 741 | "random.shuffle(indices)\n", 742 | "\n", 743 | "train_size = int(0.8 * len(tox21))\n", 744 | "val_size = int(0.1 * len(tox21))\n", 745 | "test_size = len(tox21) - train_size - val_size\n", 746 | "\n", 747 | "trn_id, val_id, test_id = indices[:train_size], \\\n", 748 | " indices[train_size:(train_size + val_size)], \\\n", 749 | " indices[(train_size + val_size):]\n", 750 | "len(test_id)" 751 | ] 752 | }, 753 | { 754 | "cell_type": "code", 755 | "execution_count": 18, 756 | "metadata": {}, 757 | "outputs": [], 758 | "source": [ 759 | "tox21_test = tox21[test_id]" 760 | ] 761 | }, 762 | { 763 | "cell_type": "code", 764 | "execution_count": 19, 765 | "metadata": {}, 766 | "outputs": [ 767 | { 768 | "data": { 769 | "text/plain": [ 770 | "Batch(batch=[14848], cluster_index=[14848], edge_attr=[30878, 10], edge_index=[2, 30878], fra_edge_attr=[26728, 10], fra_edge_index=[2, 26728], ptr=[785], smiles=[784], x=[14848, 46], y=[784, 12])" 771 | ] 772 | }, 773 | "execution_count": 19, 774 | "metadata": {}, 775 | "output_type": "execute_result" 776 | } 777 | ], 778 | "source": [ 779 | "loader = DataLoader(tox21_test, batch_size=784)\n", 780 | "iter_ = iter(loader)\n", 781 | "batch = next(iter_)\n", 782 | "batch" 783 | ] 784 | }, 785 | { 786 | "cell_type": "code", 787 | "execution_count": 20, 788 | "metadata": {}, 789 | "outputs": [], 790 | "source": [ 791 | "model.eval() # 关闭dropout\n", 792 | "output = model(batch)" 793 | ] 794 | }, 795 | { 796 | "cell_type": "code", 797 | "execution_count": 21, 798 | "metadata": {}, 799 | "outputs": [ 800 | { 801 | "data": { 802 | "text/plain": [ 803 | "(tensor([[ 1.0534, -0.9555, 1.7339, ..., -2.6404, 2.5901, -2.7638],\n", 804 | " [ 0.9031, -0.9170, 1.1977, ..., -0.7447, 0.3027, -0.4066],\n", 805 | " [-3.2785, 3.7095, -2.5252, ..., -0.5109, 1.3867, -1.5189],\n", 806 | " ...,\n", 807 | " [ 0.8243, -0.8440, 0.6335, ..., 1.4691, -1.6419, 1.4477],\n", 808 | " [-0.1385, 0.2545, 1.5754, ..., 1.5246, -0.5056, 0.4561],\n", 809 | " [-3.0913, 3.2826, -2.1624, ..., 0.3497, 0.1309, -0.6398]],\n", 810 | " grad_fn=),\n", 811 | " [tensor([[0.3651, 0.1889, 0.2367, ..., 0.3447, 0.4278, 0.3908],\n", 812 | " [0.0697, 0.0539, 0.0185, ..., 0.2967, 0.1847, 0.1615],\n", 813 | " [0.0366, 0.0010, 0.0083, ..., 0.0289, 0.3105, 0.0251],\n", 814 | " ...,\n", 815 | " [0.0202, 0.0933, 0.0034, ..., 0.0880, 0.0623, 0.1130],\n", 816 | " [0.0848, 0.4221, 0.0122, ..., 0.2098, 0.1414, 0.2107],\n", 817 | " [0.1586, 0.0085, 0.0413, ..., 0.0190, 0.3240, 0.0879]],\n", 818 | " grad_fn=),\n", 819 | " tensor([[0.3221, 0.2468, 0.3001, ..., 0.3851, 0.3962, 0.4558],\n", 820 | " [0.2039, 0.2449, 0.1982, ..., 0.5176, 0.3347, 0.3741],\n", 821 | " [0.0910, 0.0310, 0.2031, ..., 0.0650, 0.3833, 0.0340],\n", 822 | " ...,\n", 823 | " [0.0378, 0.5996, 0.0107, ..., 0.6107, 0.1029, 0.4877],\n", 824 | " [0.2904, 0.5256, 0.0573, ..., 0.5281, 0.2559, 0.2842],\n", 825 | " [0.2073, 0.3152, 0.0647, ..., 0.0493, 0.3610, 0.1006]],\n", 826 | " grad_fn=),\n", 827 | " tensor([[0.3529, 0.1748, 0.3074, ..., 0.3466, 0.3840, 0.4094],\n", 828 | " [0.3103, 0.3147, 0.3613, ..., 0.5429, 0.3880, 0.4388],\n", 829 | " [0.9104, 0.0557, 0.2807, ..., 0.0112, 0.5828, 0.0215],\n", 830 | " ...,\n", 831 | " [0.0519, 0.3966, 0.0350, ..., 0.4764, 0.1824, 0.3906],\n", 832 | " [0.2906, 0.4100, 0.0939, ..., 0.4763, 0.3089, 0.2490],\n", 833 | " [0.9202, 0.2403, 0.0529, ..., 0.0131, 0.5191, 0.0315]],\n", 834 | " grad_fn=)])" 835 | ] 836 | }, 837 | "execution_count": 21, 838 | "metadata": {}, 839 | "output_type": "execute_result" 840 | } 841 | ], 842 | "source": [ 843 | "output" 844 | ] 845 | }, 846 | { 847 | "cell_type": "code", 848 | "execution_count": 22, 849 | "metadata": {}, 850 | "outputs": [], 851 | "source": [ 852 | "fa = output[1]" 853 | ] 854 | }, 855 | { 856 | "cell_type": "code", 857 | "execution_count": 23, 858 | "metadata": {}, 859 | "outputs": [], 860 | "source": [ 861 | "fa = [i.detach().numpy() for i in fa]" 862 | ] 863 | }, 864 | { 865 | "cell_type": "code", 866 | "execution_count": 24, 867 | "metadata": {}, 868 | "outputs": [], 869 | "source": [ 870 | "fa = [i.mean(0) for i in fa]" 871 | ] 872 | }, 873 | { 874 | "cell_type": "code", 875 | "execution_count": 25, 876 | "metadata": {}, 877 | "outputs": [], 878 | "source": [ 879 | "df_out = pd.DataFrame(fa)" 880 | ] 881 | }, 882 | { 883 | "cell_type": "code", 884 | "execution_count": 26, 885 | "metadata": {}, 886 | "outputs": [], 887 | "source": [ 888 | "df_out = df_out.T" 889 | ] 890 | }, 891 | { 892 | "cell_type": "code", 893 | "execution_count": 107, 894 | "metadata": {}, 895 | "outputs": [], 896 | "source": [ 897 | "df_out.to_csv('tox21_att.csv', index=False)" 898 | ] 899 | } 900 | ], 901 | "metadata": { 902 | "kernelspec": { 903 | "display_name": "Python [conda env:pytorch]", 904 | "language": "python", 905 | "name": "conda-env-pytorch-py" 906 | }, 907 | "language_info": { 908 | "codemirror_mode": { 909 | "name": "ipython", 910 | "version": 3 911 | }, 912 | "file_extension": ".py", 913 | "mimetype": "text/x-python", 914 | "name": "python", 915 | "nbconvert_exporter": "python", 916 | "pygments_lexer": "ipython3", 917 | "version": "3.7.10" 918 | } 919 | }, 920 | "nbformat": 4, 921 | "nbformat_minor": 2 922 | } 923 | -------------------------------------------------------------------------------- /hignn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/hignn.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | hyperopt==0.2.5 2 | numpy==1.19.2 3 | pandas==1.2.4 4 | PyYAML==6.0 5 | rdkit==2021.03.1 6 | scikit_learn==1.1.1 7 | termcolor==1.1.0 8 | torch==1.7.1+cu101 9 | torch_geometric==1.7.1 10 | torch_scatter==2.0.7 11 | tqdm==4.62.2 12 | yacs==0.1.8 13 | -------------------------------------------------------------------------------- /source/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @Author : Weimin Zhu 4 | @Time : 2021-09-28 5 | @File : config.py 6 | """ 7 | 8 | import os 9 | import yaml 10 | from yacs.config import CfgNode as CN 11 | 12 | 13 | _C = CN() 14 | 15 | # ----------------------------------------------------------------------------- 16 | # Experiment settings 17 | # ----------------------------------------------------------------------------- 18 | # Path to output folder 19 | _C.OUTPUT_DIR = "" 20 | # Tag of experiment, overwritten by command line argument 21 | _C.TAG = 'default' 22 | # Fixed random seed 23 | _C.SEED = 1 24 | # Number of folds to run 25 | _C.NUM_FOLDS = 10 26 | # Whether to show individual scores for each task 27 | _C.SHOW_EACH_SCORES = False 28 | # Perform evaluation only, overwritten by command line argument 29 | _C.EVAL_MODE = False 30 | # Frequency to show training epoch 31 | _C.SHOW_FREQ = 5 32 | 33 | # Hyperopt setting 34 | _C.HYPER = False 35 | _C.HYPER_COUNT = 1 36 | _C.HYPER_REMOVE = None 37 | # Number of hyperparameters choice to try 38 | _C.NUM_ITERS = 20 39 | 40 | # ----------------------------------------------------------------------------- 41 | # Data settings 42 | # ----------------------------------------------------------------------------- 43 | _C.DATA = CN() 44 | # Batch size, overwritten by command line argument 45 | _C.DATA.BATCH_SIZE = 64 46 | # Path to dataset, overwritten by command line argument 47 | _C.DATA.DATA_PATH = '../data/' 48 | # Dataset name 49 | _C.DATA.DATASET = 'bace' 50 | # Tasks name, override by ~get_task_names~(utlis.py 152) function 51 | _C.DATA.TASK_NAME = None 52 | # Dataset type, 'classification' or 'regression' 53 | _C.DATA.TASK_TYPE = 'classification' 54 | # Metric, choice from ['auc', 'prc', 'rmse', 'mae'] 55 | _C.DATA.METRIC = 'auc' 56 | # How to split data, 'random', 'scaffold' or 'noise' 57 | _C.DATA.SPLIT_TYPE = 'random' 58 | # anti-noise rate for hiv dataset, only works when DATA.SPLIT_TYPE is 'noise' 59 | _C.DATA.RATE = None 60 | 61 | # ----------------------------------------------------------------------------- 62 | # Model settings 63 | # ----------------------------------------------------------------------------- 64 | _C.MODEL = CN() 65 | # Hidden size of HiGNN model 66 | _C.MODEL.HID = 64 67 | # Output size of HiGNN model, override by dataset.py 474 68 | _C.MODEL.OUT_DIM = None 69 | # Number of layers 70 | _C.MODEL.DEPTH = 3 71 | # Number of heads 72 | _C.MODEL.SLICES = 2 73 | # Dropout 74 | _C.MODEL.DROPOUT = 0.2 75 | # Feature attention 76 | _C.MODEL.F_ATT = True 77 | # reduction value 78 | _C.MODEL.R = 4 79 | # Whether to use BRICS information, if set to False, the option LOSS.CL_LOSS is set to False 80 | _C.MODEL.BRICS = True 81 | 82 | # ----------------------------------------------------------------------------- 83 | # Loss settings 84 | # ----------------------------------------------------------------------------- 85 | _C.LOSS = CN() 86 | # Whether to adopt focal loss 87 | _C.LOSS.FL_LOSS = False 88 | # Whether to adopt molecule-fragment contrastive learning 89 | _C.LOSS.CL_LOSS = False 90 | # Alpha 91 | _C.LOSS.ALPHA = 0.1 92 | # Scale logits by the inverse of the temperature 93 | _C.LOSS.TEMPERATURE = 0.1 94 | 95 | # ----------------------------------------------------------------------------- 96 | # Training settings 97 | # ----------------------------------------------------------------------------- 98 | _C.TRAIN = CN() 99 | # Checkpoint to resume, overwritten by command line argument 100 | _C.TRAIN.RESUME = None 101 | _C.TRAIN.START_EPOCH = 0 102 | _C.TRAIN.MAX_EPOCHS = 100 103 | # early stopping 104 | _C.TRAIN.EARLY_STOP = -1 105 | 106 | # Tensorboard 107 | _C.TRAIN.TENSORBOARD = CN() 108 | _C.TRAIN.TENSORBOARD.ENABLE = True 109 | 110 | # Optimizer 111 | _C.TRAIN.OPTIMIZER = CN() 112 | _C.TRAIN.OPTIMIZER.TYPE = 'adam' 113 | # Learning rate 114 | _C.TRAIN.OPTIMIZER.BASE_LR = 1e-3 115 | # FPN Learning rate 116 | _C.TRAIN.OPTIMIZER.FP_LR = 4e-5 117 | # SGD momentum 118 | _C.TRAIN.OPTIMIZER.MOMENTUM = 0.9 119 | # Weight decay 120 | _C.TRAIN.OPTIMIZER.WEIGHT_DECAY = 1e-4 121 | 122 | # LR scheduler 123 | _C.TRAIN.LR_SCHEDULER = CN() 124 | _C.TRAIN.LR_SCHEDULER.TYPE = 'reduce' 125 | # NoamLR parameters 126 | _C.TRAIN.LR_SCHEDULER.WARMUP_EPOCHS = 2.0 127 | _C.TRAIN.LR_SCHEDULER.INIT_LR = 1e-4 128 | _C.TRAIN.LR_SCHEDULER.MAX_LR = 1e-2 129 | _C.TRAIN.LR_SCHEDULER.FINAL_LR = 1e-4 130 | # ReduceLRonPlateau 131 | _C.TRAIN.LR_SCHEDULER.FACTOR = 0.7 132 | _C.TRAIN.LR_SCHEDULER.PATIENCE = 10 133 | _C.TRAIN.LR_SCHEDULER.MIN_LR = 1e-5 134 | 135 | 136 | def _update_config_from_file(config, cfg_file): 137 | config.defrost() 138 | with open(cfg_file, 'r') as f: 139 | yaml_cfg = yaml.load(f, Loader=yaml.FullLoader) 140 | 141 | for cfg in yaml_cfg.setdefault('BASE', ['']): 142 | if cfg: 143 | _update_config_from_file( 144 | config, os.path.join(os.path.dirname(cfg_file), cfg) 145 | ) 146 | config.merge_from_file(cfg_file) 147 | config.freeze() 148 | 149 | 150 | def update_config(cfg, args): 151 | _update_config_from_file(cfg, args.cfg) 152 | 153 | cfg.defrost() 154 | if args.opts: 155 | cfg.merge_from_list(args.opts) 156 | # merge from specific arguments 157 | if args.batch_size: 158 | cfg.DATA.BATCH_SIZE = args.batch_size 159 | if args.lr_scheduler: 160 | cfg.TRAIN.LR_SCHEDULER.TYPE = args.lr_scheduler 161 | if args.resume: 162 | cfg.TRAIN.RESUME = args.resume 163 | if args.tag: 164 | cfg.TAG = args.tag 165 | if args.eval: 166 | cfg.EVAL_MODE = True 167 | 168 | # output folder 169 | cfg.OUTPUT_DIR = os.path.join(cfg.OUTPUT_DIR, cfg.TAG) 170 | 171 | cfg.freeze() 172 | 173 | 174 | def get_config(args): 175 | """Get a yacs CfgNode object with default values.""" 176 | # Return a clone so that the defaults will not be altered 177 | # This is for the "local variable" use pattern 178 | cfg = _C.clone() 179 | update_config(cfg, args) 180 | 181 | return cfg 182 | -------------------------------------------------------------------------------- /source/cross_validate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @Author : Weimin Zhu 4 | @Time : 2021-10-01 5 | @File : cross_validate.py 6 | """ 7 | 8 | import os 9 | import yaml 10 | import numpy as np 11 | from copy import deepcopy 12 | from hyperopt import fmin, hp, tpe 13 | 14 | import torch 15 | 16 | from model import build_model 17 | from train import train, parse_args 18 | from utils import create_logger, get_task_names 19 | 20 | 21 | # --------------------------------------- 22 | # 10 folds cross validation 23 | # --------------------------------------- 24 | def cross_validate(cfg, logger): 25 | """k-fold cross-validation. 26 | 27 | """ 28 | 29 | # Initialize relevant variables 30 | init_seed = cfg.SEED 31 | out_dir = cfg.OUTPUT_DIR 32 | task_names = get_task_names(os.path.join(cfg.DATA.DATA_PATH, 'raw/{}.csv'.format(cfg.DATA.DATASET))) 33 | 34 | # Run training on different random seeds for each fold 35 | all_scores = [] 36 | for fold_num in range(cfg.NUM_FOLDS): 37 | cfg.defrost() 38 | cfg.SEED = init_seed + fold_num 39 | cfg.OUTPUT_DIR = os.path.join(out_dir, f'fold_{fold_num}') 40 | cfg.freeze() 41 | logger.info(f'Fold {fold_num}') 42 | model_scores = train(cfg, logger) 43 | all_scores.append(model_scores) 44 | all_scores = np.array(all_scores) 45 | 46 | # Report results 47 | cfg.defrost() 48 | cfg.OUTPUT_DIR = out_dir 49 | cfg.freeze() 50 | logger.info(f'{cfg.NUM_FOLDS}-fold cross validation') 51 | 52 | # Report scores for each fold 53 | for fold_num, scores in enumerate(all_scores): 54 | logger.info(f'Seed {init_seed + fold_num} ==> test {cfg.DATA.METRIC} = {np.nanmean(scores):.3f}') 55 | if cfg.SHOW_EACH_SCORES: 56 | for task_name, score in zip(task_names, scores): 57 | logger.info(f'Seed {init_seed + fold_num} ==> test {task_name} {cfg.DATA.METRIC} = {score:.3f}') 58 | 59 | # Report scores across models 60 | avg_scores = np.nanmean(all_scores, axis=1) 61 | mean_score, std_score = np.nanmean(avg_scores), np.nanstd(avg_scores) 62 | logger.info(f'Overall test {cfg.DATA.METRIC} = {mean_score:.3f} ± {std_score:.3f}') 63 | 64 | if cfg.SHOW_EACH_SCORES: 65 | for task_num, task_name in enumerate(task_names): 66 | logger.info(f'Overall test {task_name} {cfg.DATA.METRIC} = ' 67 | f'{np.nanmean(all_scores[:, task_num]):.3f} ± {np.nanstd(all_scores[:, task_num]):.3f}') 68 | 69 | return mean_score, std_score 70 | 71 | 72 | # --------------------------------------- 73 | # Hyperparameters optimization 74 | # --------------------------------------- 75 | SPACE = { 76 | 'MODEL.HID': hp.choice('dim', [64, 128, 256]), 77 | 'MODEL.SLICES': hp.choice('slices', [1, 2, 4]), 78 | 'MODEL.DROPOUT': hp.quniform('dropout', low=0.0, high=0.5, q=0.1), 79 | 'MODEL.DEPTH': hp.choice('depth', [2, 3, 4]), 80 | 'TRAIN.OPTIMIZER.BASE_LR': hp.loguniform('lr', np.log(1e-4), np.log(1e-2)), 81 | 'TRAIN.OPTIMIZER.WEIGHT_DECAY': hp.choice('l2', [1e-4, 1e-5, 1e-6]), 82 | } 83 | INT_KEYS = ['MODEL.HID', 'MODEL.DEPTH', 'MODEL.SLICES'] 84 | 85 | 86 | def hyperopt(cfg, logger): 87 | """Runs hyperparameter optimization on a HiGNN model. 88 | 89 | """ 90 | # Save path for best hyperparameters 91 | yaml_name = "best_{}_{}.yaml".format(cfg.DATA.DATASET, cfg.TAG) 92 | cfg_save_path = os.path.join(cfg.OUTPUT_DIR, yaml_name) 93 | # Run 94 | results = [] 95 | 96 | # Define hyperparameter optimization 97 | def objective(hyperparams): 98 | # Convert hyperparams from float to int when necessary 99 | for key in INT_KEYS: 100 | hyperparams[key] = int(hyperparams[key]) 101 | 102 | # Update args with hyperparams 103 | hyper_cfg = deepcopy(cfg) 104 | if hyper_cfg.OUTPUT_DIR is not None: 105 | folder_name = f'round_{hyper_cfg.HYPER_COUNT}' 106 | hyper_cfg.defrost() 107 | hyper_cfg.OUTPUT_DIR = os.path.join(hyper_cfg.OUTPUT_DIR, folder_name) 108 | hyper_cfg.freeze() 109 | hyper_cfg.defrost() 110 | opts = list() 111 | for key, value in hyperparams.items(): 112 | opts.append(key) 113 | opts.append(value) 114 | hyper_cfg.merge_from_list(opts) 115 | hyper_cfg.freeze() 116 | 117 | # Record hyperparameters 118 | cfg.defrost() 119 | cfg.HYPER_COUNT += 1 120 | cfg.freeze() 121 | logger.info(f'round_{hyper_cfg.HYPER_COUNT - 1}') 122 | logger.info(hyperparams) 123 | 124 | # Cross validate 125 | mean_score, std_score = cross_validate(hyper_cfg, logger) 126 | 127 | # Record results 128 | temp_model = build_model(hyper_cfg) 129 | num_params = sum(param.numel() for param in temp_model.parameters() if param.requires_grad) 130 | logger.info(f'num params: {num_params:,}') 131 | logger.info(f'{mean_score} ± {std_score} {hyper_cfg.DATA.METRIC}') 132 | 133 | results.append({ 134 | 'mean_score': mean_score, 135 | 'std_score': std_score, 136 | 'hyperparams': hyperparams, 137 | 'num_params': num_params 138 | }) 139 | 140 | # Deal with nan 141 | if np.isnan(mean_score): 142 | if hyper_cfg.DATA.TASK_TYPE == 'classification': 143 | mean_score = 0 144 | else: 145 | raise ValueError('Can\'t handle nan score for non-classification dataset.') 146 | 147 | return (-1 if hyper_cfg.DATA.TASK_TYPE == 'classification' else 1) * mean_score 148 | 149 | fmin(objective, SPACE, algo=tpe.suggest, max_evals=cfg.NUM_ITERS, verbose=False) 150 | 151 | # Report best result 152 | results = [result for result in results if not np.isnan(result['mean_score'])] 153 | best_result = \ 154 | min(results, key=lambda result: (-1 if cfg.DATA.TASK_TYPE == 'classification' else 1) * result['mean_score']) 155 | logger.info('best result') 156 | logger.info(best_result['hyperparams']) 157 | logger.info(f'num params: {best_result["num_params"]:,}') 158 | logger.info(f'{best_result["mean_score"]} ± {best_result["std_score"]} {cfg.DATA.METRIC}') 159 | 160 | # Save best hyperparameter settings as yaml config file 161 | with open(cfg_save_path, 'w') as f: 162 | yaml.dump(best_result['hyperparams'], f, indent=4, sort_keys=True) 163 | 164 | 165 | if __name__ == '__main__': 166 | _, cfg = parse_args() 167 | 168 | logger = create_logger(cfg) 169 | 170 | # print device mode 171 | if torch.cuda.is_available(): 172 | logger.info('GPU mode...') 173 | else: 174 | logger.info('CPU mode...') 175 | 176 | # training 177 | if cfg.HYPER: 178 | # Add MODEL.R of the feture attention module 179 | if cfg.MODEL.F_ATT: 180 | SPACE.update({'MODEL.R': hp.choice('R', [1, 2, 4])}) 181 | INT_KEYS.append('MODEL.R') 182 | # Add LOSS.ALPHA and LOSS.TEMPERATURE of the contrastive block 183 | if cfg.MODEL.BRICS and cfg.LOSS.CL_LOSS: 184 | SPACE.update({'LOSS.ALPHA': hp.choice('alpha', [0.1, 0.15, 0.2, 0.25])}) 185 | SPACE.update({'LOSS.TEMPERATURE': hp.choice('temperature', [0.07, 0.1, 0.2])}) 186 | # Delete the parameters you don’t want to optimize 187 | if cfg.HYPER_REMOVE is not None: 188 | for i in cfg.HYPER_REMOVE: 189 | del SPACE[i] 190 | INT_KEYS = [i for i in INT_KEYS if i not in cfg.HYPER_REMOVE] 191 | hyperopt(cfg, logger) 192 | 193 | else: 194 | logger.info(cfg.dump()) 195 | cross_validate(cfg, logger) 196 | 197 | -------------------------------------------------------------------------------- /source/dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @Author : Weimin Zhu 4 | @Time : 2021-09-28 5 | @File : dataset.py 6 | """ 7 | 8 | import os 9 | import numpy as np 10 | import pandas as pd 11 | from tqdm import tqdm 12 | from random import Random 13 | from collections import defaultdict 14 | 15 | import torch 16 | from torch_geometric.data import Data, InMemoryDataset 17 | from torch_geometric.data import DataLoader 18 | 19 | from rdkit import Chem 20 | from rdkit.Chem.BRICS import FindBRICSBonds 21 | from rdkit.Chem.Scaffolds import MurckoScaffold 22 | from rdkit import RDLogger 23 | 24 | from utils import get_task_names 25 | 26 | RDLogger.DisableLog('rdApp.*') 27 | 28 | 29 | # ------------------------------------- 30 | # attentive_fp fashion featurization 31 | # ------------------------------------- 32 | def onehot_encoding(x, allowable_set): 33 | if x not in allowable_set: 34 | raise Exception("input {0} not in allowable set{1}:".format( 35 | x, allowable_set)) 36 | return [x == s for s in allowable_set] 37 | 38 | 39 | def onehot_encoding_unk(x, allowable_set): 40 | """Maps inputs not in the allowable set to the last element.""" 41 | if x not in allowable_set: 42 | x = allowable_set[-1] 43 | return [x == s for s in allowable_set] 44 | 45 | 46 | def atom_attr(mol, explicit_H=False, use_chirality=True, pharmaco=True, scaffold=True): 47 | if pharmaco: 48 | mol = tag_pharmacophore(mol) 49 | if scaffold: 50 | mol = tag_scaffold(mol) 51 | 52 | feat = [] 53 | for i, atom in enumerate(mol.GetAtoms()): 54 | results = onehot_encoding_unk( 55 | atom.GetSymbol(), 56 | ['B', 'C', 'N', 'O', 'F', 'Si', 'P', 'S', 'Cl', 'As', 'Se', 'Br', 'Te', 'I', 'At', 'other' 57 | ]) + onehot_encoding_unk(atom.GetDegree(), 58 | [0, 1, 2, 3, 4, 5, 'other']) + \ 59 | [atom.GetFormalCharge(), atom.GetNumRadicalElectrons()] + \ 60 | onehot_encoding_unk(atom.GetHybridization(), [ 61 | Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2, 62 | Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.SP3D, 63 | Chem.rdchem.HybridizationType.SP3D2, 'other' 64 | ]) + [atom.GetIsAromatic()] 65 | if not explicit_H: 66 | results = results + onehot_encoding_unk(atom.GetTotalNumHs(), 67 | [0, 1, 2, 3, 4]) 68 | if use_chirality: 69 | try: 70 | results = results + onehot_encoding_unk( 71 | atom.GetProp('_CIPCode'), 72 | ['R', 'S']) + [atom.HasProp('_ChiralityPossible')] 73 | # print(one_of_k_encoding_unk(atom.GetProp('_CIPCode'), ['R', 'S']) + [atom.HasProp('_ChiralityPossible')]) 74 | except: 75 | results = results + [0, 0] + [atom.HasProp('_ChiralityPossible')] 76 | if pharmaco: 77 | results = results + [int(atom.GetProp('Hbond_donor'))] + [int(atom.GetProp('Hbond_acceptor'))] + \ 78 | [int(atom.GetProp('Basic'))] + [int(atom.GetProp('Acid'))] + \ 79 | [int(atom.GetProp('Halogen'))] 80 | if scaffold: 81 | results = results + [int(atom.GetProp('Scaffold'))] 82 | feat.append(results) 83 | 84 | return np.array(feat) 85 | 86 | 87 | def bond_attr(mol, use_chirality=True): 88 | feat = [] 89 | index = [] 90 | n = mol.GetNumAtoms() 91 | for i in range(n): 92 | for j in range(n): 93 | if i != j: 94 | bond = mol.GetBondBetweenAtoms(i, j) 95 | if bond is not None: 96 | bt = bond.GetBondType() 97 | bond_feats = [ 98 | bt == Chem.rdchem.BondType.SINGLE, bt == Chem.rdchem.BondType.DOUBLE, 99 | bt == Chem.rdchem.BondType.TRIPLE, bt == Chem.rdchem.BondType.AROMATIC, 100 | bond.GetIsConjugated(), 101 | bond.IsInRing() 102 | ] 103 | if use_chirality: 104 | bond_feats = bond_feats + onehot_encoding_unk( 105 | str(bond.GetStereo()), 106 | ["STEREONONE", "STEREOANY", "STEREOZ", "STEREOE"]) 107 | feat.append(bond_feats) 108 | index.append([i, j]) 109 | 110 | return np.array(index), np.array(feat) 111 | 112 | 113 | def bond_break(mol): 114 | results = np.array(sorted(list(FindBRICSBonds(mol))), dtype=np.long) 115 | 116 | if results.size == 0: 117 | cluster_idx = [] 118 | Chem.rdmolops.GetMolFrags(mol, asMols=True, frags=cluster_idx) 119 | fra_edge_index, fra_edge_attr = bond_attr(mol) 120 | 121 | else: 122 | bond_to_break = results[:, 0, :] 123 | bond_to_break = bond_to_break.tolist() 124 | with Chem.RWMol(mol) as rwmol: 125 | for i in bond_to_break: 126 | rwmol.RemoveBond(*i) 127 | rwmol = rwmol.GetMol() 128 | cluster_idx = [] 129 | Chem.rdmolops.GetMolFrags(rwmol, asMols=True, sanitizeFrags=False, frags=cluster_idx) 130 | fra_edge_index, fra_edge_attr = bond_attr(rwmol) 131 | cluster_idx = torch.LongTensor(cluster_idx) 132 | 133 | return fra_edge_index, fra_edge_attr, cluster_idx 134 | 135 | 136 | # --------------------------------------------- 137 | # Scaffold and pharmacophore information utils 138 | # --------------------------------------------- 139 | # tag pharmoco features to each atom 140 | fun_smarts = { 141 | 'Hbond_donor': '[$([N;!H0;v3,v4&+1]),$([O,S;H1;+0]),n&H1&+0]', 142 | 'Hbond_acceptor': '[$([O,S;H1;v2;!$(*-*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$([N;v3;!$(N-*=[O,N,P,S])]),n&X2&H0&+0,$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]', 143 | 'Basic': '[#7;+,$([N;H2&+0][$([C,a]);!$([C,a](=O))]),$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))]),$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))]),$([n;X2;+0;-0])]', 144 | 'Acid': '[C,S](=[O,S,P])-[O;H1,-1]', 145 | 'Halogen': '[F,Cl,Br,I]' 146 | } 147 | FunQuery = dict([(pharmaco, Chem.MolFromSmarts(s)) for (pharmaco, s) in fun_smarts.items()]) 148 | 149 | 150 | def tag_pharmacophore(mol): 151 | for fungrp, qmol in FunQuery.items(): 152 | matches = mol.GetSubstructMatches(qmol) 153 | match_idxes = [] 154 | for mat in matches: 155 | match_idxes.extend(mat) 156 | for i, atom in enumerate(mol.GetAtoms()): 157 | tag = '1' if i in match_idxes else '0' 158 | atom.SetProp(fungrp, tag) 159 | return mol 160 | 161 | 162 | # tag scaffold information to each atom 163 | def tag_scaffold(mol): 164 | core = MurckoScaffold.GetScaffoldForMol(mol) 165 | match_idxes = mol.GetSubstructMatch(core) 166 | for i, atom in enumerate(mol.GetAtoms()): 167 | tag = '1' if i in match_idxes else '0' 168 | atom.SetProp('Scaffold', tag) 169 | return mol 170 | 171 | 172 | # --------------------------------- 173 | # data and dataset 174 | # --------------------------------- 175 | class MolData(Data): 176 | def __init__(self, fra_edge_index=None, fra_edge_attr=None, cluster_index=None, **kwargs): 177 | super(MolData, self).__init__(**kwargs) 178 | self.cluster_index = cluster_index 179 | self.fra_edge_index = fra_edge_index 180 | self.fra_edge_attr = fra_edge_attr 181 | 182 | def __inc__(self, key, value, *args, **kwargs): 183 | if key == 'cluster_index': 184 | return int(self.cluster_index.max()) + 1 185 | else: 186 | return super().__inc__(key, value, *args, **kwargs) 187 | 188 | 189 | class MolDataset(InMemoryDataset): 190 | 191 | def __init__(self, root, dataset, task_type, tasks, logger=None, 192 | transform=None, pre_transform=None, pre_filter=None): 193 | 194 | self.tasks = tasks 195 | self.dataset = dataset 196 | self.task_type = task_type 197 | self.logger = logger 198 | 199 | super(MolDataset, self).__init__(root, transform, pre_transform, pre_filter) 200 | self.data, self.slices = torch.load(self.processed_paths[0]) 201 | 202 | @property 203 | def raw_file_names(self): 204 | return ['{}.csv'.format(self.dataset)] 205 | 206 | @property 207 | def processed_file_names(self): 208 | return ['{}.pt'.format(self.dataset)] 209 | 210 | def download(self): 211 | pass 212 | 213 | def process(self): 214 | df = pd.read_csv(self.raw_paths[0]) 215 | smilesList = df.smiles.values 216 | self.logger.info(f'number of all smiles: {len(smilesList)}') 217 | remained_smiles = [] 218 | canonical_smiles_list = [] 219 | for smiles in smilesList: 220 | try: 221 | canonical_smiles_list.append(Chem.MolToSmiles(Chem.MolFromSmiles(smiles), isomericSmiles=True)) 222 | remained_smiles.append(smiles) 223 | except: 224 | self.logger.info(f'not successfully processed smiles: {smiles}') 225 | pass 226 | self.logger.info(f'number of successfully processed smiles: {len(remained_smiles)}') 227 | 228 | df = df[df["smiles"].isin(remained_smiles)].reset_index() 229 | target = df[self.tasks].values 230 | smilesList = df.smiles.values 231 | data_list = [] 232 | 233 | for i, smi in enumerate(tqdm(smilesList)): 234 | 235 | mol = Chem.MolFromSmiles(smi) 236 | data = self.mol2graph(mol) 237 | 238 | if data is not None: 239 | label = target[i] 240 | label[np.isnan(label)] = 666 241 | data.y = torch.LongTensor([label]) 242 | if self.task_type == 'regression': 243 | data.y = torch.FloatTensor([label]) 244 | data_list.append(data) 245 | 246 | if self.pre_filter is not None: 247 | data_list = [data for data in data_list if self.pre_filter(data)] 248 | if self.pre_transform is not None: 249 | data_list = [self.pre_transform(data) for data in data_list] 250 | 251 | data, slices = self.collate(data_list) 252 | torch.save((data, slices), self.processed_paths[0]) 253 | 254 | def mol2graph(self, mol): 255 | smiles = Chem.MolToSmiles(mol) 256 | if mol is None: return None 257 | node_attr = atom_attr(mol) 258 | edge_index, edge_attr = bond_attr(mol) 259 | fra_edge_index, fra_edge_attr, cluster_index = bond_break(mol) 260 | data = MolData( 261 | x=torch.FloatTensor(node_attr), 262 | edge_index=torch.LongTensor(edge_index).t(), 263 | edge_attr=torch.FloatTensor(edge_attr), 264 | fra_edge_index=torch.LongTensor(fra_edge_index).t(), 265 | fra_edge_attr=torch.FloatTensor(fra_edge_attr), 266 | cluster_index=torch.LongTensor(cluster_index), 267 | y=None, 268 | smiles=smiles, 269 | ) 270 | return data 271 | 272 | 273 | # --------------------------------- 274 | # load dataset 275 | # --------------------------------- 276 | def load_dataset_random(path, dataset, seed, task_type, tasks=None, logger=None): 277 | save_path = path + 'processed/train_valid_test_{}_seed_{}.ckpt'.format(dataset, seed) 278 | if os.path.isfile(save_path): 279 | trn, val, test = torch.load(save_path) 280 | return trn, val, test 281 | pyg_dataset = MolDataset(root=path, dataset=dataset, task_type=task_type, tasks=tasks, logger=logger) 282 | del pyg_dataset.data.smiles 283 | 284 | # Seed randomness 285 | random = Random(seed) 286 | indices = list(range(len(pyg_dataset))) 287 | random.seed(seed) 288 | random.shuffle(indices) 289 | 290 | train_size = int(0.8 * len(pyg_dataset)) 291 | val_size = int(0.1 * len(pyg_dataset)) 292 | test_size = len(pyg_dataset) - train_size - val_size 293 | 294 | trn_id, val_id, test_id = indices[:train_size], \ 295 | indices[train_size:(train_size + val_size)], \ 296 | indices[(train_size + val_size):] 297 | 298 | trn, val, test = pyg_dataset[torch.LongTensor(trn_id)], \ 299 | pyg_dataset[torch.LongTensor(val_id)], \ 300 | pyg_dataset[torch.LongTensor(test_id)] 301 | 302 | logger.info(f'Total smiles = {len(pyg_dataset):,} | ' 303 | f'train smiles = {train_size:,} | ' 304 | f'val smiles = {val_size:,} | ' 305 | f'test smiles = {test_size:,}') 306 | 307 | assert task_type == 'classification' or 'regression' 308 | if task_type == 'classification': 309 | weights = [] 310 | for i in range(len(tasks)): 311 | validId = np.where((pyg_dataset.data.y[:, i] == 0) | (pyg_dataset.data.y[:, i] == 1))[0] 312 | pos_len = (pyg_dataset.data.y[:, i][validId].sum()).item() 313 | neg_len = len(pyg_dataset.data.y[:, i][validId]) - pos_len 314 | weights.append([(neg_len + pos_len) / neg_len, (neg_len + pos_len) / pos_len]) 315 | trn.weights = weights 316 | 317 | else: 318 | trn.weights = None 319 | 320 | torch.save([trn, val, test], save_path) 321 | return load_dataset_random(path, dataset, seed, task_type, tasks) 322 | 323 | 324 | # anti-noise experiments for hiv dataset 325 | def load_dataset_noise(path, dataset, seed, task_type, tasks, rate, logger=None): 326 | save_path = path + 'processed/train_valid_test_{}_seed_{}_noise_{}.ckpt'.format(dataset, seed, int(100*rate)) 327 | if os.path.isfile(save_path): 328 | trn, val, test = torch.load(save_path) 329 | return trn, val, test 330 | pyg_dataset = MolDataset(root=path, dataset=dataset, task_type=task_type, tasks=tasks, logger=logger) 331 | del pyg_dataset.data.smiles 332 | 333 | train_size = int(0.8 * len(pyg_dataset)) 334 | val_size = int(0.1 * len(pyg_dataset)) 335 | test_size = len(pyg_dataset) - train_size - val_size 336 | 337 | pyg_dataset, perm = pyg_dataset.shuffle(return_perm=True) 338 | trn_perm, val_perm = perm[:train_size], perm[train_size:(train_size + val_size)] 339 | trn_cutoff, val_cutoff = int(train_size * rate), int(val_size*rate) 340 | trn_noise_perm, val_noise_perm = trn_perm[:trn_cutoff], val_perm[:val_cutoff] 341 | noise_perm = torch.cat([trn_noise_perm, val_noise_perm]) 342 | 343 | # add same rate noise to train set and val set(simply change the label) 344 | pyg_dataset.data.y[noise_perm] = 1 - pyg_dataset.data.y[noise_perm] 345 | 346 | trn, val, test = pyg_dataset[:train_size], \ 347 | pyg_dataset[train_size:(train_size + val_size)], \ 348 | pyg_dataset[(train_size + val_size):] 349 | 350 | logger.info(f'Total smiles = {len(pyg_dataset):,} | ' 351 | f'train smiles = {train_size:,} | ' 352 | f'val smiles = {val_size:,} | ' 353 | f'test smiles = {test_size:,}') 354 | 355 | weights = [] 356 | pos_len = (pyg_dataset.data.y.sum()).item() 357 | neg_len = len(pyg_dataset) - pos_len 358 | weights.append([(neg_len + pos_len) / neg_len, (neg_len + pos_len) / pos_len]) 359 | trn.weights = weights 360 | logger.info(weights) 361 | 362 | torch.save([trn, val, test], save_path) 363 | return load_dataset_noise(path, dataset, seed, task_type, tasks, rate) 364 | 365 | 366 | def load_dataset_scaffold(path, dataset, seed, task_type, tasks=None, logger=None): 367 | save_path = path + 'processed/train_valid_test_{}_seed_{}_scaffold.ckpt'.format(dataset, seed) 368 | if os.path.isfile(save_path): 369 | trn, val, test = torch.load(save_path) 370 | return trn, val, test 371 | 372 | pyg_dataset = MolDataset(root=path, dataset=dataset, task_type=task_type, tasks=tasks, logger=logger) 373 | 374 | trn_id, val_id, test_id, weights = scaffold_split(pyg_dataset, task_type=task_type, tasks=tasks, 375 | seed=seed, logger=logger) 376 | del pyg_dataset.data.smiles 377 | trn, val, test = pyg_dataset[torch.LongTensor(trn_id)], \ 378 | pyg_dataset[torch.LongTensor(val_id)], \ 379 | pyg_dataset[torch.LongTensor(test_id)] 380 | trn.weights = weights 381 | 382 | torch.save([trn, val, test], save_path) 383 | return load_dataset_scaffold(path, dataset, seed, task_type, tasks) 384 | 385 | 386 | # --------------------------------------------- 387 | # Scaffold utils, copy from chemprop. 388 | # --------------------------------------------- 389 | def generate_scaffold(mol, include_chirality=False): 390 | """ 391 | Computes the Bemis-Murcko scaffold for a SMILES string. 392 | :param mol: A SMILES or an RDKit molecule. 393 | :param include_chirality: Whether to include chirality in the computed scaffold.. 394 | :return: The Bemis-Murcko scaffold for the molecule. 395 | """ 396 | mol = Chem.MolFromSmiles(mol) if type(mol) == str else mol 397 | scaffold = MurckoScaffold.MurckoScaffoldSmiles(mol=mol, includeChirality=include_chirality) 398 | 399 | return scaffold 400 | 401 | 402 | def scaffold_to_smiles(smiles, use_indices=False): 403 | """ 404 | Computes the scaffold for each SMILES and returns a mapping from scaffolds to sets of smiles (or indices). 405 | :param smiles: A list of SMILES or RDKit molecules. 406 | :param use_indices: Whether to map to the SMILES's index in :code:`mols` rather than 407 | mapping to the smiles string itself. This is necessary if there are duplicate smiles. 408 | :return: A dictionary mapping each unique scaffold to all SMILES (or indices) which have that scaffold. 409 | """ 410 | scaffolds = defaultdict(set) 411 | for i, smi in enumerate(smiles): 412 | scaffold = generate_scaffold(smi) 413 | if use_indices: 414 | scaffolds[scaffold].add(i) 415 | else: 416 | scaffolds[scaffold].add(smi) 417 | 418 | return scaffolds 419 | 420 | 421 | def scaffold_split(pyg_dataset, task_type, tasks, sizes=(0.8, 0.1, 0.1), balanced=True, seed=1, logger=None): 422 | 423 | assert sum(sizes) == 1 424 | 425 | # Split 426 | logger.info('generating scaffold......') 427 | num = len(pyg_dataset) 428 | train_size, val_size, test_size = sizes[0] * num, sizes[1] * num, sizes[2] * num 429 | train_ids, val_ids, test_ids = [], [], [] 430 | train_scaffold_count, val_scaffold_count, test_scaffold_count = 0, 0, 0 431 | 432 | # Map from scaffold to index in the data 433 | scaffold_to_indices = scaffold_to_smiles(pyg_dataset.data.smiles, use_indices=True) 434 | 435 | # Seed randomness 436 | random = Random(seed) 437 | 438 | if balanced: # Put stuff that's bigger than half the val/test size into train, rest just order randomly 439 | index_sets = list(scaffold_to_indices.values()) 440 | big_index_sets = [] 441 | small_index_sets = [] 442 | for index_set in index_sets: 443 | if len(index_set) > val_size / 2 or len(index_set) > test_size / 2: 444 | big_index_sets.append(index_set) 445 | else: 446 | small_index_sets.append(index_set) 447 | random.seed(seed) 448 | random.shuffle(big_index_sets) 449 | random.shuffle(small_index_sets) 450 | index_sets = big_index_sets + small_index_sets 451 | else: # Sort from largest to smallest scaffold sets 452 | index_sets = sorted(list(scaffold_to_indices.values()), 453 | key=lambda index_set: len(index_set), 454 | reverse=True) 455 | 456 | for index_set in index_sets: 457 | if len(train_ids) + len(index_set) <= train_size: 458 | train_ids += index_set 459 | train_scaffold_count += 1 460 | elif len(val_ids) + len(index_set) <= val_size: 461 | val_ids += index_set 462 | val_scaffold_count += 1 463 | else: 464 | test_ids += index_set 465 | test_scaffold_count += 1 466 | 467 | logger.info(f'Total scaffolds = {len(scaffold_to_indices):,} | ' 468 | f'train scaffolds = {train_scaffold_count:,} | ' 469 | f'val scaffolds = {val_scaffold_count:,} | ' 470 | f'test scaffolds = {test_scaffold_count:,}') 471 | 472 | logger.info(f'Total smiles = {num:,} | ' 473 | f'train smiles = {len(train_ids):,} | ' 474 | f'val smiles = {len(val_ids):,} | ' 475 | f'test smiles = {len(test_ids):,}') 476 | 477 | assert len(train_ids) + len(val_ids) + len(test_ids) == len(pyg_dataset) 478 | 479 | # Compute weights 480 | assert task_type == 'classification' or 'regression' 481 | if task_type == 'classification': 482 | weights = [] 483 | for i in range(len(tasks)): 484 | pos_len = (pyg_dataset.data.y[:, i].sum()).item() 485 | neg_len = len(pyg_dataset) - pos_len 486 | weights.append([(neg_len + pos_len) / neg_len, (neg_len + pos_len) / pos_len]) 487 | 488 | else: 489 | weights = None 490 | 491 | return train_ids, val_ids, test_ids, weights 492 | 493 | 494 | # --------------------------------- 495 | # build dataset and dataloader 496 | # --------------------------------- 497 | def build_dataset(cfg, logger): 498 | 499 | cfg.defrost() 500 | task_name = get_task_names(os.path.join(cfg.DATA.DATA_PATH, 'raw/{}.csv'.format(cfg.DATA.DATASET))) 501 | if cfg.DATA.TASK_TYPE == 'classification': 502 | out_dim = 2 * len(task_name) 503 | elif cfg.DATA.TASK_TYPE == 'regression': 504 | out_dim = len(task_name) 505 | else: 506 | raise Exception('Unknown task type') 507 | opts = ['DATA.TASK_NAME', task_name, 'MODEL.OUT_DIM', out_dim] 508 | cfg.defrost() 509 | cfg.merge_from_list(opts) 510 | cfg.freeze() 511 | 512 | if cfg.DATA.SPLIT_TYPE == 'random': 513 | train_dataset, valid_dataset, test_dataset = load_dataset_random(cfg.DATA.DATA_PATH, 514 | cfg.DATA.DATASET, 515 | cfg.SEED, 516 | cfg.DATA.TASK_TYPE, 517 | cfg.DATA.TASK_NAME, 518 | logger) 519 | 520 | elif cfg.DATA.SPLIT_TYPE == 'scaffold': 521 | train_dataset, valid_dataset, test_dataset = load_dataset_scaffold(cfg.DATA.DATA_PATH, 522 | cfg.DATA.DATASET, 523 | cfg.SEED, 524 | cfg.DATA.TASK_TYPE, 525 | cfg.DATA.TASK_NAME, 526 | logger) 527 | 528 | elif cfg.DATA.SPLIT_TYPE == 'noise': 529 | train_dataset, valid_dataset, test_dataset = load_dataset_noise(cfg.DATA.DATA_PATH, 530 | cfg.DATA.DATASET, 531 | cfg.SEED, 532 | cfg.DATA.TASK_TYPE, 533 | cfg.DATA.TASK_NAME, 534 | cfg.DATA.RATE, 535 | logger) 536 | 537 | else: 538 | raise Exception('Unknown dataset split type') 539 | 540 | return train_dataset, valid_dataset, test_dataset 541 | 542 | 543 | def build_loader(cfg, logger): 544 | train_dataset, valid_dataset, test_dataset = build_dataset(cfg, logger) 545 | train_dataloader = DataLoader(train_dataset, batch_size=cfg.DATA.BATCH_SIZE, shuffle=True) 546 | valid_dataloader = DataLoader(valid_dataset, batch_size=cfg.DATA.BATCH_SIZE) 547 | test_dataloader = DataLoader(test_dataset, batch_size=cfg.DATA.BATCH_SIZE) 548 | weights = train_dataset.weights 549 | 550 | return train_dataloader, valid_dataloader, test_dataloader, weights 551 | 552 | -------------------------------------------------------------------------------- /source/loss.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @Author : Weimin Zhu 4 | @Time : 2021-09-28 5 | @File : loss.py 6 | """ 7 | 8 | 9 | import torch 10 | from torch import nn 11 | import torch.nn.functional as F 12 | 13 | 14 | # cl_loss 15 | class NTXentLoss(nn.Module): 16 | def __init__(self, temperature=0.5): 17 | super(NTXentLoss, self).__init__() 18 | self.temperature = temperature 19 | self.cross_entropy = nn.CrossEntropyLoss(reduction="mean") 20 | self.eps = 1e-8 21 | 22 | if abs(self.temperature) < self.eps: 23 | raise ValueError('Illegal temperature: abs({}) < 1e-8' 24 | .format(self.temperature)) 25 | 26 | def forward(self, out0, out1): 27 | device = out0.device 28 | batch_size, _ = out0.shape 29 | 30 | # normalize the output to length 1 31 | out0 = F.normalize(out0, dim=1) 32 | out1 = F.normalize(out1, dim=1) 33 | 34 | # use other samples from batch as negatives 35 | output = torch.cat([out0, out1], 0) 36 | 37 | # the logits are the similarity matrix divided by the temperature 38 | logits = torch.einsum('nc,mc->nm', output, output) / self.temperature 39 | # We need to removed the similarities of samples to themselves 40 | logits = logits[~torch.eye(2 * batch_size, dtype=torch.bool, device=out0.device)].view(2 * batch_size, -1) 41 | 42 | # The labels point from a sample in out_i to its equivalent in out_(1-i) 43 | labels = torch.arange(batch_size, device=device, dtype=torch.long) 44 | labels = torch.cat([labels + batch_size - 1, labels]) 45 | 46 | loss = self.cross_entropy(logits, labels) 47 | 48 | return loss 49 | 50 | 51 | class AlignLoss(nn.Module): 52 | def __init__(self): 53 | super(AlignLoss, self).__init__() 54 | self.mse = nn.MSELoss(reduction='mean') 55 | 56 | def forward(self, out0, out1): 57 | loss = self.mse(out0, out1) 58 | 59 | return loss 60 | 61 | 62 | # focal_loss 63 | class FocalLoss(nn.Module): 64 | """ 65 | For imbalanced data. 66 | """ 67 | def __init__(self, gamma=2, alpha=0.25): 68 | super(FocalLoss, self).__init__() 69 | self.gamma = gamma 70 | self.alpha = alpha 71 | 72 | def forward(self, inputs, target): 73 | # input:size is M*2. M is the batch number 74 | # target:size is M. 75 | target = target.float() 76 | pt = torch.softmax(inputs, dim=1) 77 | p = pt[:, 1] 78 | loss = -self.alpha * (1 - p) ** self.gamma * (target * torch.log(p)) - \ 79 | (1 - self.alpha) * p ** self.gamma * ((1 - target) * torch.log(1 - p)) 80 | return loss.mean() 81 | 82 | 83 | class JointLoss(nn.Module): 84 | def __init__(self, loss, cl_loss=None, alpha=0.5): 85 | super(JointLoss, self).__init__() 86 | self.loss = loss 87 | self.cl_loss = cl_loss 88 | self.alpha = alpha 89 | 90 | def forward(self, output, target, vec0=None, vec1=None): 91 | if self.cl_loss is None: 92 | loss = self.loss(output, target) 93 | else: 94 | loss = self.alpha * self.cl_loss(vec0, vec1) + (1 - self.alpha) * self.loss(output, target) 95 | 96 | return loss 97 | 98 | 99 | def bulid_loss(cfg, weight=None): 100 | if cfg.DATA.TASK_TYPE == 'classification': 101 | if weight is not None: 102 | loss = nn.CrossEntropyLoss(weight=weight) if not cfg.LOSS.FL_LOSS else FocalLoss(alpha=1/weight[0]) 103 | else: 104 | loss = nn.CrossEntropyLoss() 105 | else: 106 | loss = nn.MSELoss() 107 | cl_loss = NTXentLoss(temperature=cfg.LOSS.TEMPERATURE) if cfg.LOSS.CL_LOSS else None 108 | 109 | joint_loss = JointLoss(loss=loss, 110 | cl_loss=cl_loss, 111 | alpha=cfg.LOSS.ALPHA) 112 | 113 | return joint_loss 114 | 115 | -------------------------------------------------------------------------------- /source/model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @Author : Weimin Zhu 4 | @Time : 2021-09-28 5 | @File : model.py 6 | """ 7 | 8 | import torch 9 | from torch import nn 10 | import torch.nn.functional as F 11 | from torch.nn import Linear, Sequential, Parameter, Bilinear 12 | 13 | from torch_scatter import scatter 14 | from torch_geometric.nn import global_add_pool, GATConv 15 | from torch_geometric.nn.conv import MessagePassing 16 | from torch_geometric.nn.inits import glorot, reset 17 | from torch_geometric.nn.pool.pool import pool_batch 18 | from torch_geometric.nn.pool.consecutive import consecutive_cluster 19 | 20 | 21 | # --------------------------------------- 22 | # Attention layers 23 | # --------------------------------------- 24 | class FeatureAttention(nn.Module): 25 | def __init__(self, channels, reduction): 26 | super().__init__() 27 | self.mlp = Sequential( 28 | Linear(channels, channels // reduction, bias=False), 29 | nn.ReLU(inplace=True), 30 | Linear(channels // reduction, channels, bias=False), 31 | ) 32 | 33 | self.reset_parameters() 34 | 35 | def reset_parameters(self): 36 | reset(self.mlp) 37 | 38 | def forward(self, x, batch, size=None): 39 | max_result = scatter(x, batch, dim=0, dim_size=size, reduce='max') 40 | sum_result = scatter(x, batch, dim=0, dim_size=size, reduce='sum') 41 | max_out = self.mlp(max_result) 42 | sum_out = self.mlp(sum_result) 43 | y = torch.sigmoid(max_out + sum_out) 44 | y = y[batch] 45 | return x * y 46 | 47 | 48 | # --------------------------------------- 49 | # Neural tensor networks conv 50 | # --------------------------------------- 51 | class NTNConv(MessagePassing): 52 | 53 | def __init__(self, in_channels, out_channels, slices, dropout, edge_dim=None, **kwargs): 54 | kwargs.setdefault('aggr', 'add') 55 | super(NTNConv, self).__init__(node_dim=0, **kwargs) 56 | 57 | self.in_channels = in_channels 58 | self.out_channels = out_channels 59 | self.slices = slices 60 | self.dropout = dropout 61 | self.edge_dim = edge_dim 62 | 63 | self.weight_node = Parameter(torch.Tensor(in_channels, 64 | out_channels)) 65 | if edge_dim is not None: 66 | self.weight_edge = Parameter(torch.Tensor(edge_dim, 67 | out_channels)) 68 | else: 69 | self.weight_edge = self.register_parameter('weight_edge', None) 70 | 71 | self.bilinear = Bilinear(out_channels, out_channels, slices, bias=False) 72 | 73 | if self.edge_dim is not None: 74 | self.linear = Linear(3 * out_channels, slices) 75 | else: 76 | self.linear = Linear(2 * out_channels, slices) 77 | 78 | self._alpha = None 79 | 80 | self.reset_parameters() 81 | 82 | def reset_parameters(self): 83 | glorot(self.weight_node) 84 | glorot(self.weight_edge) 85 | self.bilinear.reset_parameters() 86 | self.linear.reset_parameters() 87 | 88 | def forward(self, x, edge_index, edge_attr=None, return_attention_weights=None): 89 | 90 | x = torch.matmul(x, self.weight_node) 91 | 92 | if self.weight_edge is not None: 93 | assert edge_attr is not None 94 | edge_attr = torch.matmul(edge_attr, self.weight_edge) 95 | 96 | out = self.propagate(edge_index, x=x, edge_attr=edge_attr) 97 | 98 | alpha = self._alpha 99 | self._alpha = None 100 | 101 | if isinstance(return_attention_weights, bool): 102 | assert alpha is not None 103 | return out, (edge_index, alpha) 104 | else: 105 | return out 106 | 107 | def message(self, x_i, x_j, edge_attr): 108 | score = self.bilinear(x_i, x_j) 109 | if edge_attr is not None: 110 | vec = torch.cat((x_i, edge_attr, x_j), 1) 111 | block_score = self.linear(vec) # bias already included 112 | else: 113 | vec = torch.cat((x_i, x_j), 1) 114 | block_score = self.linear(vec) 115 | scores = score + block_score 116 | alpha = torch.tanh(scores) 117 | self._alpha = alpha 118 | alpha = F.dropout(alpha, p=self.dropout, training=self.training) 119 | 120 | dim_split = self.out_channels // self.slices 121 | out = torch.max(x_j, edge_attr).view(-1, self.slices, dim_split) 122 | 123 | out = out * alpha.view(-1, self.slices, 1) 124 | out = out.view(-1, self.out_channels) 125 | return out 126 | 127 | def __repr__(self): 128 | return '{}({}, {}, slices={})'.format(self.__class__.__name__, 129 | self.in_channels, 130 | self.out_channels, self.slices) 131 | 132 | 133 | # --------------------------------------- 134 | # HiGNN backbone 135 | # --------------------------------------- 136 | def build_model(cfg): 137 | model = HiGNN(in_channels=46, 138 | hidden_channels=cfg.MODEL.HID, 139 | out_channels=cfg.MODEL.OUT_DIM, 140 | edge_dim=10, 141 | num_layers=cfg.MODEL.DEPTH, 142 | dropout=cfg.MODEL.DROPOUT, 143 | slices=cfg.MODEL.SLICES, 144 | f_att=cfg.MODEL.F_ATT, 145 | r=cfg.MODEL.R, 146 | brics=cfg.MODEL.BRICS, 147 | cl=cfg.LOSS.CL_LOSS, ) 148 | 149 | return model 150 | 151 | 152 | class HiGNN(torch.nn.Module): 153 | """Hierarchical informative graph neural network for molecular representation. 154 | 155 | """ 156 | 157 | def __init__(self, in_channels, hidden_channels, out_channels, edge_dim, num_layers, 158 | slices, dropout, f_att=False, r=4, brics=True, cl=False): 159 | super(HiGNN, self).__init__() 160 | 161 | self.hidden_channels = hidden_channels 162 | self.num_layers = num_layers 163 | self.dropout = dropout 164 | 165 | self.f_att = f_att 166 | self.brics = brics 167 | self.cl = cl 168 | 169 | # atom feature transformation 170 | self.lin_a = Linear(in_channels, hidden_channels) 171 | self.lin_b = Linear(edge_dim, hidden_channels) 172 | 173 | # convs block 174 | self.atom_convs = torch.nn.ModuleList() 175 | for _ in range(num_layers): 176 | conv = NTNConv(hidden_channels, hidden_channels, slices=slices, 177 | dropout=dropout, edge_dim=hidden_channels) 178 | self.atom_convs.append(conv) 179 | 180 | self.lin_gate = Linear(3 * hidden_channels, hidden_channels) 181 | 182 | if self.f_att: 183 | self.feature_att = FeatureAttention(channels=hidden_channels, reduction=r) 184 | 185 | if self.brics: 186 | # mol-fra attention 187 | self.cross_att = GATConv(hidden_channels, hidden_channels, heads=4, 188 | dropout=dropout, add_self_loops=False, 189 | negative_slope=0.01, concat=False) 190 | 191 | if self.brics: 192 | self.out = Linear(2 * hidden_channels, out_channels) 193 | else: 194 | self.out = Linear(hidden_channels, out_channels) 195 | 196 | if self.cl: 197 | self.lin_project = Linear(hidden_channels, int(hidden_channels/2)) 198 | 199 | self.reset_parameters() 200 | 201 | def reset_parameters(self): 202 | 203 | self.lin_a.reset_parameters() 204 | self.lin_b.reset_parameters() 205 | 206 | for conv in self.atom_convs: 207 | conv.reset_parameters() 208 | 209 | self.lin_gate.reset_parameters() 210 | 211 | if self.f_att: 212 | self.feature_att.reset_parameters() 213 | 214 | if self.brics: 215 | self.cross_att.reset_parameters() 216 | 217 | self.out.reset_parameters() 218 | 219 | if self.cl: 220 | self.lin_project.reset_parameters() 221 | 222 | def forward(self, data): 223 | # get mol input 224 | x = data.x 225 | edge_index = data.edge_index 226 | edge_attr = data.edge_attr 227 | batch = data.batch 228 | 229 | x = F.relu(self.lin_a(x)) # (N, 46) -> (N, hidden_channels) 230 | edge_attr = F.relu(self.lin_b(edge_attr)) # (N, 10) -> (N, hidden_channels) 231 | 232 | # mol conv block 233 | for i in range(0, self.num_layers): 234 | h = F.relu(self.atom_convs[i](x, edge_index, edge_attr)) 235 | beta = self.lin_gate(torch.cat([x, h, x - h], 1)).sigmoid() 236 | x = beta * x + (1 - beta) * h 237 | if self.f_att: 238 | x = self.feature_att(x, batch) 239 | 240 | mol_vec = global_add_pool(x, batch).relu_() 241 | 242 | if self.brics: 243 | # get fragment input 244 | fra_x = data.x 245 | fra_edge_index = data.fra_edge_index 246 | fra_edge_attr = data.fra_edge_attr 247 | cluster = data.cluster_index 248 | 249 | fra_x = F.relu(self.lin_a(fra_x)) # (N, 46) -> (N, hidden_channels) 250 | fra_edge_attr = F.relu(self.lin_b(fra_edge_attr)) # (N, 10) -> (N, hidden_channels) 251 | 252 | # fragment convs block 253 | for i in range(0, self.num_layers): 254 | fra_h = F.relu(self.atom_convs[i](fra_x, fra_edge_index, fra_edge_attr)) 255 | beta = self.lin_gate(torch.cat([fra_x, fra_h, fra_x - fra_h], 1)).sigmoid() 256 | fra_x = beta * fra_x + (1 - beta) * fra_h 257 | if self.f_att: 258 | fra_x = self.feature_att(fra_x, cluster) 259 | 260 | fra_x = global_add_pool(fra_x, cluster).relu_() 261 | 262 | # get fragment batch 263 | cluster, perm = consecutive_cluster(cluster) 264 | fra_batch = pool_batch(perm, data.batch) 265 | 266 | # molecule-fragment attention 267 | row = torch.arange(fra_batch.size(0), device=batch.device) 268 | mol_fra_index = torch.stack([row, fra_batch], dim=0) 269 | fra_vec = self.cross_att((fra_x, mol_vec), mol_fra_index).relu_() 270 | 271 | vectors_concat = list() 272 | vectors_concat.append(mol_vec) 273 | vectors_concat.append(fra_vec) 274 | 275 | out = torch.cat(vectors_concat, 1) 276 | 277 | # molecule-fragment contrastive 278 | if self.cl: 279 | out = F.dropout(out, p=self.dropout, training=self.training) 280 | return self.out(out), self.lin_project(mol_vec).relu_(), self.lin_project(fra_vec).relu_() 281 | else: 282 | out = F.dropout(out, p=self.dropout, training=self.training) 283 | return self.out(out) 284 | 285 | else: 286 | assert self.cl is False 287 | out = F.dropout(mol_vec, p=self.dropout, training=self.training) 288 | return self.out(out) 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | -------------------------------------------------------------------------------- /source/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @Author : Weimin Zhu 4 | @Time : 2021-10-01 5 | @File : train.py 6 | """ 7 | 8 | import os 9 | import time 10 | import datetime 11 | import argparse 12 | import numpy as np 13 | 14 | import torch 15 | import torch.nn.functional as F 16 | from torch.utils.tensorboard import SummaryWriter 17 | 18 | from config import get_config 19 | from utils import create_logger, seed_set 20 | from utils import NoamLR, build_scheduler, build_optimizer, get_metric_func 21 | from utils import load_checkpoint, save_best_checkpoint, load_best_result 22 | from dataset import build_loader 23 | from loss import bulid_loss 24 | from model import build_model 25 | 26 | 27 | def parse_args(): 28 | parser = argparse.ArgumentParser(description="codes for HiGNN") 29 | 30 | parser.add_argument( 31 | "--cfg", 32 | help="decide which cfg to use", 33 | required=False, 34 | default="../configs/bbbp.yaml", 35 | type=str, 36 | ) 37 | 38 | parser.add_argument( 39 | "--opts", 40 | help="Modify config options by adding 'KEY VALUE' pairs. ", 41 | default=None, 42 | nargs='+', 43 | ) 44 | 45 | # easy config modification 46 | parser.add_argument('--batch-size', type=int, help="batch size for training") 47 | parser.add_argument('--lr_scheduler', type=str, help='learning rate scheduler') 48 | parser.add_argument('--resume', help='resume from checkpoint') 49 | parser.add_argument('--tag', help='tag of experiment') 50 | parser.add_argument('--eval', action='store_true', help='Perform evaluation only') 51 | 52 | args = parser.parse_args() 53 | cfg = get_config(args) 54 | 55 | return args, cfg 56 | 57 | 58 | def train_one_epoch(cfg, model, criterion, trainloader, optimizer, lr_scheduler, device, logger): 59 | model.train() 60 | 61 | losses = [] 62 | y_pred_list = {} 63 | y_label_list = {} 64 | 65 | for data in trainloader: 66 | data = data.to(device) 67 | output = model(data) 68 | if isinstance(output, tuple): 69 | output, vec1, vec2 = output 70 | else: 71 | output, vec1, vec2 = output, None, None 72 | loss = 0 73 | 74 | for i in range(len(cfg.DATA.TASK_NAME)): 75 | if cfg.DATA.TASK_TYPE == 'classification': 76 | y_pred = output[:, i * 2:(i + 1) * 2] 77 | y_label = data.y[:, i].squeeze() 78 | validId = np.where((y_label.cpu().numpy() == 0) | (y_label.cpu().numpy() == 1))[0] 79 | 80 | if len(validId) == 0: 81 | continue 82 | if y_label.dim() == 0: 83 | y_label = y_label.unsqueeze(0) 84 | 85 | y_pred = y_pred[torch.tensor(validId).to(device)] 86 | y_label = y_label[torch.tensor(validId).to(device)] 87 | 88 | loss += criterion[i](y_pred, y_label, vec1, vec2) 89 | y_pred = F.softmax(y_pred.detach().cpu(), dim=-1)[:, 1].view(-1).numpy() 90 | else: 91 | y_pred = output[:, i] 92 | y_label = data.y[:, i] 93 | loss += criterion(y_pred, y_label, vec1, vec2) 94 | y_pred = y_pred.detach().cpu().numpy() 95 | 96 | try: 97 | y_label_list[i].extend(y_label.cpu().numpy()) 98 | y_pred_list[i].extend(y_pred) 99 | except: 100 | y_label_list[i] = [] 101 | y_pred_list[i] = [] 102 | y_label_list[i].extend(y_label.cpu().numpy()) 103 | y_pred_list[i].extend(y_pred) 104 | 105 | optimizer.zero_grad() 106 | loss.backward() 107 | optimizer.step() 108 | 109 | if isinstance(lr_scheduler, NoamLR): 110 | lr_scheduler.step() 111 | 112 | losses.append(loss.item()) 113 | 114 | # Compute metric 115 | results = [] 116 | metric_func = get_metric_func(metric=cfg.DATA.METRIC) 117 | for i, task in enumerate(cfg.DATA.TASK_NAME): 118 | if cfg.DATA.TASK_TYPE == 'classification': 119 | nan = False 120 | if all(target == 0 for target in y_label_list[i]) or all(target == 1 for target in y_label_list[i]): 121 | nan = True 122 | logger.info(f'Warning: Found task "{task}" with targets all 0s or all 1s while training') 123 | 124 | if nan: 125 | results.append(float('nan')) 126 | continue 127 | 128 | if len(y_label_list[i]) == 0: 129 | continue 130 | 131 | results.append(metric_func(y_label_list[i], y_pred_list[i])) 132 | 133 | avg_results = np.nanmean(results) 134 | trn_loss = np.array(losses).mean() 135 | 136 | return trn_loss, avg_results 137 | 138 | 139 | @torch.no_grad() 140 | def validate(cfg, model, criterion, dataloader, epoch, device, logger, eval_mode=False): 141 | model.eval() 142 | 143 | losses = [] 144 | y_pred_list = {} 145 | y_label_list = {} 146 | 147 | for data in dataloader: 148 | data = data.to(device) 149 | output = model(data) 150 | if isinstance(output, tuple): 151 | output, vec1, vec2 = output 152 | else: 153 | output, vec1, vec2 = output, None, None 154 | loss = 0 155 | 156 | for i in range(len(cfg.DATA.TASK_NAME)): 157 | if cfg.DATA.TASK_TYPE == 'classification': 158 | y_pred = output[:, i * 2:(i + 1) * 2] 159 | y_label = data.y[:, i].squeeze() 160 | validId = np.where((y_label.cpu().numpy() == 0) | (y_label.cpu().numpy() == 1))[0] 161 | if len(validId) == 0: 162 | continue 163 | if y_label.dim() == 0: 164 | y_label = y_label.unsqueeze(0) 165 | 166 | y_pred = y_pred[torch.tensor(validId).to(device)] 167 | y_label = y_label[torch.tensor(validId).to(device)] 168 | 169 | loss += criterion[i](y_pred, y_label, vec1, vec2) 170 | y_pred = F.softmax(y_pred.detach().cpu(), dim=-1)[:, 1].view(-1).numpy() 171 | else: 172 | y_pred = output[:, i] 173 | y_label = data.y[:, i] 174 | loss += criterion(y_pred, y_label, vec1, vec2) 175 | y_pred = y_pred.detach().cpu().numpy() 176 | 177 | try: 178 | y_label_list[i].extend(y_label.cpu().numpy()) 179 | y_pred_list[i].extend(y_pred) 180 | except: 181 | y_label_list[i] = [] 182 | y_pred_list[i] = [] 183 | y_label_list[i].extend(y_label.cpu().numpy()) 184 | y_pred_list[i].extend(y_pred) 185 | losses.append(loss.item()) 186 | 187 | # Compute metric 188 | val_results = [] 189 | metric_func = get_metric_func(metric=cfg.DATA.METRIC) 190 | for i, task in enumerate(cfg.DATA.TASK_NAME): 191 | if cfg.DATA.TASK_TYPE == 'classification': 192 | nan = False 193 | if all(target == 0 for target in y_label_list[i]) or all(target == 1 for target in y_label_list[i]): 194 | nan = True 195 | logger.info(f'Warning: Found task "{task}" with targets all 0s or all 1s while validating') 196 | 197 | if nan: 198 | val_results.append(float('nan')) 199 | continue 200 | 201 | if len(y_label_list[i]) == 0: 202 | continue 203 | 204 | val_results.append(metric_func(y_label_list[i], y_pred_list[i])) 205 | 206 | avg_val_results = np.nanmean(val_results) 207 | val_loss = np.array(losses).mean() 208 | if eval_mode: 209 | logger.info(f'Seed {cfg.SEED} Dataset {cfg.DATA.DATASET} ==> ' 210 | f'The best epoch:{epoch} test_loss:{val_loss:.3f} test_scores:{avg_val_results:.3f}') 211 | return val_results 212 | 213 | return val_loss, avg_val_results 214 | 215 | 216 | def train(cfg, logger): 217 | seed_set(cfg.SEED) 218 | # step 1: dataloder loading, get number of tokens 219 | train_loader, val_loader, test_loader, weights = build_loader(cfg, logger) 220 | # step 2: model loading 221 | model = build_model(cfg) 222 | logger.info(model) 223 | # device mode 224 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 225 | model.to(device) 226 | 227 | # step 3: optimizer loading 228 | optimizer = build_optimizer(cfg, model) 229 | n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) 230 | logger.info(f"number of params: {n_parameters}") 231 | 232 | # step 4: lr_scheduler loading 233 | lr_scheduler = build_scheduler(cfg, optimizer, steps_per_epoch=len(train_loader)) 234 | 235 | # step 5: loss function loading 236 | if weights is not None: 237 | criterion = [bulid_loss(cfg, torch.Tensor(w).to(device)) for w in weights] 238 | else: 239 | criterion = bulid_loss(cfg) 240 | 241 | # step 6: tensorboard loading 242 | if cfg.TRAIN.TENSORBOARD.ENABLE: 243 | tensorboard_dir = os.path.join(cfg.OUTPUT_DIR, "tensorboard") 244 | if not os.path.exists(tensorboard_dir): 245 | os.makedirs(tensorboard_dir) 246 | else: 247 | tensorboard_dir = None 248 | 249 | if tensorboard_dir is not None: 250 | writer = SummaryWriter(log_dir=tensorboard_dir) 251 | else: 252 | writer = None 253 | 254 | # step 7: model resuming (if training is interrupted, this will work.) 255 | best_epoch, best_score = 0, 0 if cfg.DATA.TASK_TYPE == 'classification' else float('inf') 256 | if cfg.TRAIN.RESUME: 257 | best_epoch, best_score = load_checkpoint(cfg, model, optimizer, lr_scheduler, logger) 258 | validate(cfg, model, criterion, val_loader, best_epoch, device, logger) 259 | 260 | if cfg.EVAL_MODE: 261 | return 262 | 263 | # step 8: training loop 264 | logger.info("Start training") 265 | early_stop_cnt = 0 266 | start_time = time.time() 267 | for epoch in range(cfg.TRAIN.START_EPOCH, cfg.TRAIN.MAX_EPOCHS): 268 | 269 | # 1: Results after one epoch training 270 | trn_loss, trn_score = train_one_epoch(cfg, model, criterion, train_loader, optimizer, 271 | lr_scheduler, device, logger) 272 | val_loss, val_score = validate(cfg, model, criterion, val_loader, epoch, device, logger) 273 | # Just for observing the testset results during training 274 | test_loss, test_score = validate(cfg, model, criterion, test_loader, epoch, device, logger) 275 | 276 | # 2: Upadate learning rate 277 | if not isinstance(lr_scheduler, NoamLR): 278 | lr_scheduler.step(val_loss) 279 | 280 | # 3: Print results 281 | if epoch % cfg.SHOW_FREQ == 0 or epoch == cfg.TRAIN.MAX_EPOCHS - 1: 282 | lr_cur = lr_scheduler.optimizer.param_groups[0]['lr'] 283 | logger.info(f'Epoch:{epoch} {cfg.DATA.DATASET} trn_loss:{trn_loss:.3f} ' 284 | f'trn_{cfg.DATA.METRIC}:{trn_score:.3f} lr:{lr_cur:.5f}') 285 | logger.info(f'Epoch:{epoch} {cfg.DATA.DATASET} val_loss:{val_loss:.3f} ' 286 | f'val_{cfg.DATA.METRIC}:{val_score:.3f} lr:{lr_cur:.5f}') 287 | logger.info(f'Epoch:{epoch} {cfg.DATA.DATASET} test_loss:{test_loss:.3f} ' 288 | f'test_{cfg.DATA.METRIC}:{test_score:.3f} lr:{lr_cur:.5f}') 289 | 290 | # 4: Tensorboard for training visualization. 291 | loss_dict, acc_dict = {"train_loss": trn_loss}, {f"train_{cfg.DATA.METRIC}": trn_score} 292 | loss_dict["valid_loss"], acc_dict[f"valid_{cfg.DATA.METRIC}"] = val_loss, val_score 293 | 294 | if cfg.TRAIN.TENSORBOARD.ENABLE: 295 | writer.add_scalars(f"scalar/{cfg.DATA.METRIC}", acc_dict, epoch) 296 | writer.add_scalars("scalar/loss", loss_dict, epoch) 297 | 298 | # 5: Save best results. 299 | if cfg.DATA.TASK_TYPE == 'classification' and val_score > best_score or \ 300 | cfg.DATA.TASK_TYPE == 'regression' and val_score < best_score: 301 | best_score, best_epoch = val_score, epoch 302 | save_best_checkpoint(cfg, epoch, model, best_score, best_epoch, optimizer, lr_scheduler, logger) 303 | early_stop_cnt = 0 304 | else: 305 | early_stop_cnt += 1 306 | # 6: Early stopping. 307 | if early_stop_cnt > cfg.TRAIN.EARLY_STOP > 0: 308 | logger.info('Early stop hitted!') 309 | break 310 | 311 | if cfg.TRAIN.TENSORBOARD.ENABLE: 312 | writer.close() 313 | # 7: Record training time. 314 | total_time = time.time() - start_time 315 | total_time_str = str(datetime.timedelta(seconds=int(total_time))) 316 | logger.info(f'Training time {total_time_str}') 317 | 318 | # 8: Evaluation. 319 | model, best_epoch = load_best_result(cfg, model, logger) 320 | score = validate(cfg, model, criterion, test_loader, best_epoch, device, logger=logger, eval_mode=True) 321 | 322 | return score 323 | 324 | 325 | if __name__ == "__main__": 326 | _, cfg = parse_args() 327 | 328 | logger = create_logger(cfg) 329 | 330 | # print config 331 | logger.info(cfg.dump()) 332 | # print device mode 333 | if torch.cuda.is_available(): 334 | logger.info('GPU mode...') 335 | else: 336 | logger.info('CPU mode...') 337 | # training 338 | train(cfg, logger) 339 | 340 | 341 | -------------------------------------------------------------------------------- /source/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @Author : Weimin Zhu 4 | @Time : 2021-09-28 5 | @File : utils.py 6 | """ 7 | 8 | import os 9 | import csv 10 | import time 11 | import math 12 | import random 13 | import logging 14 | import numpy as np 15 | from termcolor import colored 16 | 17 | import torch 18 | from torch.optim.lr_scheduler import _LRScheduler 19 | 20 | from sklearn.metrics import auc, mean_absolute_error, mean_squared_error, precision_recall_curve, roc_auc_score 21 | 22 | 23 | # ----------------------------------------------------------------------------- 24 | # Set seed for random, numpy, torch, cuda. 25 | # ----------------------------------------------------------------------------- 26 | def seed_set(seed=2021): 27 | random.seed(seed) 28 | os.environ['PYTHONHASHSEED'] = str(seed) 29 | np.random.seed(seed) 30 | torch.manual_seed(seed) 31 | torch.cuda.manual_seed(seed) 32 | torch.cuda.manual_seed_all(seed) 33 | torch.backends.cudnn.deterministic = True 34 | 35 | 36 | # ----------------------------------------------------------------------------- 37 | # Model resuming & checkpoint loading and saving. 38 | # ----------------------------------------------------------------------------- 39 | def load_checkpoint(cfg, model, optimizer, lr_scheduler, logger): 40 | logger.info(f"==============> Resuming form {cfg.TRAIN.RESUME}....................") 41 | 42 | checkpoint = torch.load(cfg.TRAIN.RESUME, map_location='cpu') 43 | msg = model.load_state_dict(checkpoint['model'], strict=False) 44 | logger.info(msg) 45 | best_epoch, best_auc = 0, 0.0 46 | if not cfg.EVAL_MODE and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint: 47 | optimizer.load_state_dict(checkpoint['optimizer']) 48 | lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) 49 | cfg.defrost() 50 | cfg.TRAIN.START_EPOCH = checkpoint['epoch'] + 1 51 | cfg.freeze() 52 | logger.info(f"=> loaded successfully '{cfg.TRAIN.RESUME}' (epoch {checkpoint['epoch']})") 53 | if 'best_auc' in checkpoint: 54 | best_auc = checkpoint['best_auc'] 55 | if 'best_epoch' in checkpoint: 56 | best_epoch = checkpoint['best_epoch'] 57 | 58 | del checkpoint 59 | torch.cuda.empty_cache() 60 | return best_epoch, best_auc 61 | 62 | 63 | def save_best_checkpoint(cfg, epoch, model, best_auc, best_epoch, optimizer, lr_scheduler, logger): 64 | save_state = {'model': model.state_dict(), 65 | 'optimizer': optimizer.state_dict(), 66 | 'lr_scheduler': lr_scheduler.state_dict(), 67 | 'best_auc': best_auc, 68 | 'best_epoch': best_epoch, 69 | 'epoch': epoch, 70 | 'config': cfg} 71 | 72 | ckpt_dir = os.path.join(cfg.OUTPUT_DIR, "checkpoints") 73 | if not os.path.exists(ckpt_dir): 74 | os.makedirs(ckpt_dir) 75 | save_path = os.path.join(ckpt_dir, f'best_ckpt.pth') 76 | torch.save(save_state, save_path) 77 | logger.info(f"best_ckpt saved !!!") 78 | 79 | 80 | def load_best_result(cfg, model, logger): 81 | ckpt_dir = os.path.join(cfg.OUTPUT_DIR, "checkpoints") 82 | best_ckpt_path = os.path.join(ckpt_dir, f'best_ckpt.pth') 83 | logger.info(f'Ckpt loading: {best_ckpt_path}') 84 | ckpt = torch.load(best_ckpt_path) 85 | model.load_state_dict(ckpt['model']) 86 | best_epoch = ckpt['best_epoch'] 87 | 88 | return model, best_epoch 89 | 90 | 91 | # ----------------------------------------------------------------------------- 92 | # Log 93 | # ----------------------------------------------------------------------------- 94 | def create_logger(cfg): 95 | # log name 96 | dataset_name = cfg.DATA.DATASET 97 | tag_name = cfg.TAG 98 | time_str = time.strftime("%Y-%m-%d") 99 | log_name = "{}_{}_{}.log".format(dataset_name, tag_name, time_str) 100 | 101 | # log dir 102 | log_dir = os.path.join(cfg.OUTPUT_DIR, "logs") 103 | if not os.path.exists(log_dir): 104 | os.makedirs(log_dir) 105 | 106 | # create logger 107 | logger = logging.getLogger(log_name) 108 | logger.setLevel(logging.DEBUG) 109 | logger.propagate = False 110 | 111 | # create formatter 112 | fmt = '[%(asctime)s] (%(filename)s %(lineno)d): %(levelname)s %(message)s' 113 | color_fmt = \ 114 | colored('[%(asctime)s]', 'green') + \ 115 | colored('(%(filename)s %(lineno)d): ', 'yellow') + \ 116 | colored('%(levelname)-5s', 'magenta') + ' %(message)s' 117 | 118 | # create console handlers for master process 119 | console_handler = logging.StreamHandler() 120 | console_handler.setLevel(logging.DEBUG) 121 | console_handler.setFormatter( 122 | logging.Formatter(fmt=color_fmt, datefmt='%Y-%m-%d %H:%M:%S')) 123 | logger.addHandler(console_handler) 124 | 125 | # create file handlers 126 | file_handler = logging.FileHandler(os.path.join(log_dir, log_name)) 127 | file_handler.setLevel(logging.DEBUG) 128 | file_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt='%Y-%m-%d %H:%M:%S')) 129 | logger.addHandler(file_handler) 130 | 131 | return logger 132 | 133 | 134 | # ----------------------------------------------------------------------------- 135 | # Data utils 136 | # ----------------------------------------------------------------------------- 137 | def get_header(path): 138 | with open(path) as f: 139 | header = next(csv.reader(f)) 140 | 141 | return header 142 | 143 | 144 | def get_task_names(path, use_compound_names=False): 145 | index = 2 if use_compound_names else 1 146 | task_names = get_header(path)[index:] 147 | 148 | return task_names 149 | 150 | 151 | # ----------------------------------------------------------------------------- 152 | # Optimizer 153 | # ----------------------------------------------------------------------------- 154 | def build_optimizer(cfg, model): 155 | params = model.parameters() 156 | 157 | opt_lower = cfg.TRAIN.OPTIMIZER.TYPE.lower() 158 | optimizer = None 159 | 160 | if opt_lower == 'sgd': 161 | optimizer = torch.optim.SGD( 162 | params, 163 | lr=cfg.TRAIN.OPTIMIZER.BASE_LR, 164 | momentum=cfg.TRAIN.OPTIMIZER.MOMENTUM, 165 | weight_decay=cfg.TRAIN.OPTIMIZER.WEIGHT_DECAY, 166 | nesterov=True, 167 | ) 168 | elif opt_lower == 'adam': 169 | optimizer = torch.optim.Adam( 170 | params, 171 | lr=cfg.TRAIN.OPTIMIZER.BASE_LR, 172 | weight_decay=cfg.TRAIN.OPTIMIZER.WEIGHT_DECAY, 173 | ) 174 | return optimizer 175 | 176 | 177 | # ----------------------------------------------------------------------------- 178 | # Lr_scheduler 179 | # ----------------------------------------------------------------------------- 180 | def build_scheduler(cfg, optimizer, steps_per_epoch): 181 | if cfg.TRAIN.LR_SCHEDULER.TYPE == "reduce": 182 | scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( 183 | optimizer, 184 | mode='min', 185 | factor=cfg.TRAIN.LR_SCHEDULER.FACTOR, 186 | patience=cfg.TRAIN.LR_SCHEDULER.PATIENCE, 187 | min_lr=cfg.TRAIN.LR_SCHEDULER.MIN_LR 188 | ) 189 | elif cfg.TRAIN.LR_SCHEDULER.TYPE == "noam": 190 | scheduler = NoamLR( 191 | optimizer, 192 | warmup_epochs=[cfg.TRAIN.LR_SCHEDULER.WARMUP_EPOCHS], 193 | total_epochs=[cfg.TRAIN.MAX_EPOCHS], 194 | steps_per_epoch=steps_per_epoch, 195 | init_lr=[cfg.TRAIN.LR_SCHEDULER.INIT_LR], 196 | max_lr=[cfg.TRAIN.LR_SCHEDULER.MAX_LR], 197 | final_lr=[cfg.TRAIN.LR_SCHEDULER.FINAL_LR] 198 | ) 199 | else: 200 | raise NotImplementedError("Unsupported LR Scheduler: {}".format(cfg.TRAIN.LR_SCHEDULER.TYPE)) 201 | 202 | return scheduler 203 | 204 | 205 | class NoamLR(_LRScheduler): 206 | def __init__(self, optimizer, warmup_epochs, total_epochs, steps_per_epoch, 207 | init_lr, max_lr, final_lr): 208 | 209 | assert len(optimizer.param_groups) == len(warmup_epochs) == len(total_epochs) == len(init_lr) == \ 210 | len(max_lr) == len(final_lr) 211 | 212 | self.num_lrs = len(optimizer.param_groups) 213 | 214 | self.optimizer = optimizer 215 | self.warmup_epochs = np.array(warmup_epochs) 216 | self.total_epochs = np.array(total_epochs) 217 | self.steps_per_epoch = steps_per_epoch 218 | self.init_lr = np.array(init_lr) 219 | self.max_lr = np.array(max_lr) 220 | self.final_lr = np.array(final_lr) 221 | 222 | self.current_step = 0 223 | self.lr = init_lr 224 | self.warmup_steps = (self.warmup_epochs * self.steps_per_epoch).astype(int) 225 | self.total_steps = self.total_epochs * self.steps_per_epoch 226 | self.linear_increment = (self.max_lr - self.init_lr) / self.warmup_steps 227 | 228 | self.exponential_gamma = (self.final_lr / self.max_lr) ** (1 / (self.total_steps - self.warmup_steps)) 229 | 230 | super(NoamLR, self).__init__(optimizer) 231 | 232 | def get_lr(self): 233 | 234 | return list(self.lr) 235 | 236 | def step(self, current_step=None): 237 | 238 | if current_step is not None: 239 | self.current_step = current_step 240 | else: 241 | self.current_step += 1 242 | 243 | for i in range(self.num_lrs): 244 | if self.current_step <= self.warmup_steps[i]: 245 | self.lr[i] = self.init_lr[i] + self.current_step * self.linear_increment[i] 246 | elif self.current_step <= self.total_steps[i]: 247 | self.lr[i] = self.max_lr[i] * (self.exponential_gamma[i] ** (self.current_step - self.warmup_steps[i])) 248 | else: # theoretically this case should never be reached since training should stop at total_steps 249 | self.lr[i] = self.final_lr[i] 250 | 251 | self.optimizer.param_groups[i]['lr'] = self.lr[i] 252 | 253 | 254 | # ----------------------------------------------------------------------------- 255 | # Metric utils 256 | # ----------------------------------------------------------------------------- 257 | def prc_auc(targets, preds): 258 | precision, recall, _ = precision_recall_curve(targets, preds) 259 | return auc(recall, precision) 260 | 261 | 262 | def rmse(targets, preds): 263 | return math.sqrt(mean_squared_error(targets, preds)) 264 | 265 | 266 | def mse(targets, preds): 267 | return mean_squared_error(targets, preds) 268 | 269 | 270 | def get_metric_func(metric): 271 | 272 | if metric == 'auc': 273 | return roc_auc_score 274 | 275 | if metric == 'prc': 276 | return prc_auc 277 | 278 | if metric == 'rmse': 279 | return rmse 280 | 281 | if metric == 'mae': 282 | return mean_absolute_error 283 | 284 | raise ValueError(f'Metric "{metric}" not supported.') 285 | 286 | -------------------------------------------------------------------------------- /test/best_bbbp_seed2021_random.yaml: -------------------------------------------------------------------------------- 1 | MODEL.DEPTH: 2 2 | MODEL.DROPOUT: 0.4 3 | MODEL.HID: 128 4 | MODEL.R: 4 5 | MODEL.SLICES: 1 6 | TRAIN.OPTIMIZER.BASE_LR: 0.0015370819867509245 7 | TRAIN.OPTIMIZER.WEIGHT_DECAY: 1.0e-06 8 | -------------------------------------------------------------------------------- /test/round_22/fold_0/checkpoints/best_ckpt.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_0/checkpoints/best_ckpt.pth -------------------------------------------------------------------------------- /test/round_22/fold_0/tensorboard/events.out.tfevents.1645630520.node03.247884.1050: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_0/tensorboard/events.out.tfevents.1645630520.node03.247884.1050 -------------------------------------------------------------------------------- /test/round_22/fold_0/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645630521.node03.247884.1051: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_0/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645630521.node03.247884.1051 -------------------------------------------------------------------------------- /test/round_22/fold_0/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645630521.node03.247884.1052: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_0/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645630521.node03.247884.1052 -------------------------------------------------------------------------------- /test/round_22/fold_0/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645630521.node03.247884.1053: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_0/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645630521.node03.247884.1053 -------------------------------------------------------------------------------- /test/round_22/fold_0/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645630521.node03.247884.1054: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_0/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645630521.node03.247884.1054 -------------------------------------------------------------------------------- /test/round_22/fold_1/checkpoints/best_ckpt.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_1/checkpoints/best_ckpt.pth -------------------------------------------------------------------------------- /test/round_22/fold_1/tensorboard/events.out.tfevents.1645630605.node03.247884.1055: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_1/tensorboard/events.out.tfevents.1645630605.node03.247884.1055 -------------------------------------------------------------------------------- /test/round_22/fold_1/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645630606.node03.247884.1056: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_1/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645630606.node03.247884.1056 -------------------------------------------------------------------------------- /test/round_22/fold_1/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645630606.node03.247884.1057: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_1/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645630606.node03.247884.1057 -------------------------------------------------------------------------------- /test/round_22/fold_1/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645630606.node03.247884.1058: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_1/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645630606.node03.247884.1058 -------------------------------------------------------------------------------- /test/round_22/fold_1/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645630606.node03.247884.1059: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_1/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645630606.node03.247884.1059 -------------------------------------------------------------------------------- /test/round_22/fold_2/checkpoints/best_ckpt.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_2/checkpoints/best_ckpt.pth -------------------------------------------------------------------------------- /test/round_22/fold_2/tensorboard/events.out.tfevents.1645630669.node03.247884.1060: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_2/tensorboard/events.out.tfevents.1645630669.node03.247884.1060 -------------------------------------------------------------------------------- /test/round_22/fold_2/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645630670.node03.247884.1061: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_2/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645630670.node03.247884.1061 -------------------------------------------------------------------------------- /test/round_22/fold_2/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645630670.node03.247884.1062: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_2/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645630670.node03.247884.1062 -------------------------------------------------------------------------------- /test/round_22/fold_2/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645630670.node03.247884.1063: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_2/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645630670.node03.247884.1063 -------------------------------------------------------------------------------- /test/round_22/fold_2/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645630670.node03.247884.1064: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_2/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645630670.node03.247884.1064 -------------------------------------------------------------------------------- /test/round_22/fold_3/checkpoints/best_ckpt.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_3/checkpoints/best_ckpt.pth -------------------------------------------------------------------------------- /test/round_22/fold_3/tensorboard/events.out.tfevents.1645630751.node03.247884.1065: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_3/tensorboard/events.out.tfevents.1645630751.node03.247884.1065 -------------------------------------------------------------------------------- /test/round_22/fold_3/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645630752.node03.247884.1066: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_3/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645630752.node03.247884.1066 -------------------------------------------------------------------------------- /test/round_22/fold_3/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645630752.node03.247884.1067: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_3/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645630752.node03.247884.1067 -------------------------------------------------------------------------------- /test/round_22/fold_3/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645630752.node03.247884.1068: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_3/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645630752.node03.247884.1068 -------------------------------------------------------------------------------- /test/round_22/fold_3/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645630752.node03.247884.1069: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_3/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645630752.node03.247884.1069 -------------------------------------------------------------------------------- /test/round_22/fold_4/checkpoints/best_ckpt.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_4/checkpoints/best_ckpt.pth -------------------------------------------------------------------------------- /test/round_22/fold_4/tensorboard/events.out.tfevents.1645630846.node03.247884.1070: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_4/tensorboard/events.out.tfevents.1645630846.node03.247884.1070 -------------------------------------------------------------------------------- /test/round_22/fold_4/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645630847.node03.247884.1071: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_4/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645630847.node03.247884.1071 -------------------------------------------------------------------------------- /test/round_22/fold_4/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645630847.node03.247884.1072: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_4/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645630847.node03.247884.1072 -------------------------------------------------------------------------------- /test/round_22/fold_4/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645630847.node03.247884.1073: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_4/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645630847.node03.247884.1073 -------------------------------------------------------------------------------- /test/round_22/fold_4/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645630847.node03.247884.1074: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_4/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645630847.node03.247884.1074 -------------------------------------------------------------------------------- /test/round_22/fold_5/checkpoints/best_ckpt.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_5/checkpoints/best_ckpt.pth -------------------------------------------------------------------------------- /test/round_22/fold_5/tensorboard/events.out.tfevents.1645630956.node03.247884.1075: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_5/tensorboard/events.out.tfevents.1645630956.node03.247884.1075 -------------------------------------------------------------------------------- /test/round_22/fold_5/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645630957.node03.247884.1076: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_5/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645630957.node03.247884.1076 -------------------------------------------------------------------------------- /test/round_22/fold_5/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645630957.node03.247884.1077: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_5/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645630957.node03.247884.1077 -------------------------------------------------------------------------------- /test/round_22/fold_5/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645630957.node03.247884.1078: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_5/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645630957.node03.247884.1078 -------------------------------------------------------------------------------- /test/round_22/fold_5/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645630957.node03.247884.1079: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_5/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645630957.node03.247884.1079 -------------------------------------------------------------------------------- /test/round_22/fold_6/checkpoints/best_ckpt.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_6/checkpoints/best_ckpt.pth -------------------------------------------------------------------------------- /test/round_22/fold_6/tensorboard/events.out.tfevents.1645631062.node03.247884.1080: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_6/tensorboard/events.out.tfevents.1645631062.node03.247884.1080 -------------------------------------------------------------------------------- /test/round_22/fold_6/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645631063.node03.247884.1081: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_6/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645631063.node03.247884.1081 -------------------------------------------------------------------------------- /test/round_22/fold_6/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645631063.node03.247884.1082: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_6/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645631063.node03.247884.1082 -------------------------------------------------------------------------------- /test/round_22/fold_6/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645631063.node03.247884.1083: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_6/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645631063.node03.247884.1083 -------------------------------------------------------------------------------- /test/round_22/fold_6/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645631063.node03.247884.1084: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_6/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645631063.node03.247884.1084 -------------------------------------------------------------------------------- /test/round_22/fold_7/checkpoints/best_ckpt.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_7/checkpoints/best_ckpt.pth -------------------------------------------------------------------------------- /test/round_22/fold_7/tensorboard/events.out.tfevents.1645631187.node03.247884.1085: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_7/tensorboard/events.out.tfevents.1645631187.node03.247884.1085 -------------------------------------------------------------------------------- /test/round_22/fold_7/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645631188.node03.247884.1086: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_7/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645631188.node03.247884.1086 -------------------------------------------------------------------------------- /test/round_22/fold_7/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645631188.node03.247884.1087: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_7/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645631188.node03.247884.1087 -------------------------------------------------------------------------------- /test/round_22/fold_7/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645631188.node03.247884.1088: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_7/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645631188.node03.247884.1088 -------------------------------------------------------------------------------- /test/round_22/fold_7/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645631188.node03.247884.1089: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_7/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645631188.node03.247884.1089 -------------------------------------------------------------------------------- /test/round_22/fold_8/checkpoints/best_ckpt.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_8/checkpoints/best_ckpt.pth -------------------------------------------------------------------------------- /test/round_22/fold_8/tensorboard/events.out.tfevents.1645631304.node03.247884.1090: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_8/tensorboard/events.out.tfevents.1645631304.node03.247884.1090 -------------------------------------------------------------------------------- /test/round_22/fold_8/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645631305.node03.247884.1091: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_8/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645631305.node03.247884.1091 -------------------------------------------------------------------------------- /test/round_22/fold_8/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645631305.node03.247884.1092: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_8/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645631305.node03.247884.1092 -------------------------------------------------------------------------------- /test/round_22/fold_8/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645631305.node03.247884.1093: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_8/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645631305.node03.247884.1093 -------------------------------------------------------------------------------- /test/round_22/fold_8/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645631305.node03.247884.1094: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_8/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645631305.node03.247884.1094 -------------------------------------------------------------------------------- /test/round_22/fold_9/checkpoints/best_ckpt.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_9/checkpoints/best_ckpt.pth -------------------------------------------------------------------------------- /test/round_22/fold_9/tensorboard/events.out.tfevents.1645631374.node03.247884.1095: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_9/tensorboard/events.out.tfevents.1645631374.node03.247884.1095 -------------------------------------------------------------------------------- /test/round_22/fold_9/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645631375.node03.247884.1096: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_9/tensorboard/scalar_auc_train_auc/events.out.tfevents.1645631375.node03.247884.1096 -------------------------------------------------------------------------------- /test/round_22/fold_9/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645631375.node03.247884.1097: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_9/tensorboard/scalar_auc_valid_auc/events.out.tfevents.1645631375.node03.247884.1097 -------------------------------------------------------------------------------- /test/round_22/fold_9/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645631375.node03.247884.1098: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_9/tensorboard/scalar_loss_train_loss/events.out.tfevents.1645631375.node03.247884.1098 -------------------------------------------------------------------------------- /test/round_22/fold_9/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645631375.node03.247884.1099: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idrugLab/hignn/4b5c5b4ea61da2a78728fe6d709ea90962503d12/test/round_22/fold_9/tensorboard/scalar_loss_valid_loss/events.out.tfevents.1645631375.node03.247884.1099 --------------------------------------------------------------------------------