├── data └── ADMETlab_data │ ├── chirality_pretrain_canonical.csv │ ├── CHEMBL_Final_random_selected_50.csv │ ├── CYP2C19-sub_canonical.csv │ ├── SkinSen_canonical.csv │ ├── CYP1A2-sub_canonical.csv │ ├── DILI_canonical.csv │ └── CYP2C9-sub_canonical.csv ├── .gitignore ├── figure └── Knowledge-based BERT.png ├── experiment ├── __pycache__ │ ├── my_nn.cpython-37.pyc │ ├── build_data.cpython-36.pyc │ └── build_data.cpython-37.pyc ├── build_dataset_for_tasks.py ├── build_contrastive_dataset_for_tasks.py ├── generate_drugbank_embedding.py ├── contrastive_aug.py ├── atom_embedding_generator.py ├── k_bert_pretrain_chirality_R_S.py ├── k_bert_pretrain_chirality.py ├── build_pretrain_selected_tasks.py ├── build_contrastive_pretrain_selected_tasks.py ├── K_BERT_WCL_pretrain.py ├── K_BERT_pretrain.py ├── build_pretrain_chirality_R_S.py ├── downstream_task.py └── build_data.py ├── .idea ├── inspectionProfiles │ ├── profiles_settings.xml │ └── Project_Default.xml ├── vcs.xml ├── misc.xml ├── modules.xml ├── standard_code.iml ├── Knowledge-based BERT.iml └── workspace.xml └── README.md /data/ADMETlab_data/chirality_pretrain_canonical.csv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .idea/workspace.xml 3 | data/task_data/drug_smiles.csv 4 | experiment/__pycache__/build_data.cpython-37.pyc 5 | -------------------------------------------------------------------------------- /figure/Knowledge-based BERT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wzxxxx/Knowledge-based-BERT/HEAD/figure/Knowledge-based BERT.png -------------------------------------------------------------------------------- /experiment/__pycache__/my_nn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wzxxxx/Knowledge-based-BERT/HEAD/experiment/__pycache__/my_nn.cpython-37.pyc -------------------------------------------------------------------------------- /experiment/__pycache__/build_data.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wzxxxx/Knowledge-based-BERT/HEAD/experiment/__pycache__/build_data.cpython-36.pyc -------------------------------------------------------------------------------- /experiment/__pycache__/build_data.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wzxxxx/Knowledge-based-BERT/HEAD/experiment/__pycache__/build_data.cpython-37.pyc -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /experiment/build_dataset_for_tasks.py: -------------------------------------------------------------------------------- 1 | from experiment import build_data 2 | task_list = ['Pgp-sub', 'HIA', 'F(20%)', 'F(30%)', 'FDAMDD', 'CYP1A2-sub', 'CYP2C19-sub', 'CYP2C9-sub', 3 | 'CYP2D6-sub', 'CYP3A4-sub', 'T12', 'DILI', 'SkinSen', 'Carcinogenicity', 'Respiratory'] 4 | for task in task_list: 5 | build_data.built_data_and_save_for_splited( 6 | origin_path='../data/ADMETlab_data/' + task + '_canonical.csv', 7 | save_path='../data/task_data/' + task + '.npy') -------------------------------------------------------------------------------- /.idea/standard_code.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /.idea/Knowledge-based BERT.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /experiment/build_contrastive_dataset_for_tasks.py: -------------------------------------------------------------------------------- 1 | from experiment import build_data 2 | task_name_list = ['Pgp-sub', 'HIA', 'F(20%)', 'F(30%)', 'FDAMDD', 'CYP1A2-sub', 'CYP2C19-sub', 'CYP2C9-sub', 3 | 'CYP2D6-sub', 'CYP3A4-sub', 'T12', 'DILI', 'SkinSen', 'Carcinogenicity', 'Respiratory'] 4 | aug_times = [5] 5 | for task_name in task_name_list: 6 | for times in aug_times: 7 | build_data.built_data_and_save_for_contrastive_splited( 8 | origin_path='../data/contrastive_data/' + task_name + '_'+str(times) + '_contrastive_aug.csv', 9 | save_path='../data/task_data/'+ task_name + '_'+str(times) + '_contrastive_aug.npy') -------------------------------------------------------------------------------- /experiment/generate_drugbank_embedding.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from experiment.atom_embedding_generator import bert_atom_embedding 4 | task_list = ['drugbank'] 5 | for task_name in task_list: 6 | print(task_name) 7 | dataset = pd.read_csv('./data/'+task_name+'_canonical.csv', index_col=None) 8 | smiles_list = dataset['canonical_smiles'].values.tolist() 9 | pretrain_features_list = [] 10 | for i, smiles in enumerate(smiles_list): 11 | print("{}/{}".format(i+1, len(smiles_list))) 12 | try: 13 | h_global, g_atom = bert_atom_embedding(smiles, pretrain_model='pretrain_k_bert_epoch_7.pth') 14 | pretrain_features_list.append(h_global) 15 | except: 16 | pretrain_features_list.append(['NaN' for x in range(768)]) 17 | 18 | for i in range(len(pretrain_features_list[0])): 19 | global_feature_n = [pretrain_features_list[x][i] for x in range(len(pretrain_features_list))] 20 | dataset['pretrain_feature_'+str(i+1)] = global_feature_n 21 | dataset = dataset[dataset['pretrain_feature_1']!='NaN'] 22 | dataset.to_csv('./data/embedding/'+task_name+'_k_bert_embedding.csv', index=False) 23 | 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Knowledge-based-BERT 2 | K-BERT is a model based on BERT that can extract molecular features from molecules like a computational chemist. The pre-training tasks are used in K-BERT: atom feature prediction task, global feature prediction task, and contrastive learning task. The atom feature prediction task allows the model to learn the manual extracted information in graph-based methods: atomic initial information, the global feature prediction task allows the model to learn the manual extracted information in descriptor-based methods: molecular descriptors/molecular fingerprints, and the contrastive learning task allows the model to make the embeddings of different SMILES strings of the same molecule more similar, thus enabling K-BERT to generalize to SMILES of different formats not limited to canonical SMILES. 3 | 4 | ![image]() 5 | 6 | 7 | 8 | **requirements:** 9 | python 3.7 10 | anaconda 11 | xgboost 12 | rdkit 13 | pytorch 14 | sklearn 15 | 16 | 17 | 18 | The datasets and pre-trained models can be downloaded from the following link: https://pan.baidu.com/s/1yzhHwhELuJG-3lxlrVtRPA Fetch code:WZXX 19 | 20 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 30 | -------------------------------------------------------------------------------- /experiment/contrastive_aug.py: -------------------------------------------------------------------------------- 1 | from rdkit import Chem 2 | import pandas as pd 3 | 4 | def task_dataset_augmentation(task_name, input_path, output_path, augmentation_num=5): 5 | origin_dataset = pd.read_csv(input_path, index_col=None) 6 | smiles_list = origin_dataset['smiles'].values.tolist() 7 | 8 | for i in range(augmentation_num-1): 9 | aug_smiles = [] 10 | for j, smiles in enumerate(smiles_list): 11 | print('{}/{}'.format(j + 1, len(smiles_list))) 12 | try: 13 | aug_smiles.append(Chem.MolToSmiles(Chem.MolFromSmiles(smiles), doRandom=True)) 14 | except: 15 | print(smiles) 16 | aug_smiles.append(smiles) 17 | origin_dataset['aug_smiles' + '_' + str(i)] = aug_smiles 18 | print(task_name) 19 | origin_dataset.to_csv(output_path, index=False) 20 | 21 | 22 | task_name_list = ['Pgp-sub', 'HIA', 'F(20%)', 'F(30%)', 'FDAMDD', 'CYP1A2-sub', 'CYP2C19-sub', 'CYP2C9-sub', 23 | 'CYP2D6-sub', 'CYP3A4-sub', 'T12', 'DILI', 'SkinSen', 'Carcinogenicity', 'Respiratory'] 24 | 25 | aug_times = [5] 26 | 27 | # DOWNSTREAM TASKS 28 | for task_name in task_name_list: 29 | for times in aug_times: 30 | print(task_name) 31 | input_path = '../data/ADMETlab_data/' + task_name +'_canonical.csv' 32 | output_path = '../data/contrastive_data/' + task_name +'_'+str(times)+'_contrastive_aug.csv' 33 | task_dataset_augmentation(task_name, input_path, output_path, augmentation_num=times) 34 | 35 | # PRETRAIN TASKS 36 | task_dataset_augmentation('CHEMBL', input_path='../data/pretrain_data/CHEMBL.csv', 37 | output_path='../data/pretrain_data/CHEMBL_contrastive.csv', augmentation_num=5) 38 | -------------------------------------------------------------------------------- /experiment/atom_embedding_generator.py: -------------------------------------------------------------------------------- 1 | https://github.com/wzxxxx/Knowledge-based-BERT/tree/main/experimentfrom experiment.build_data import construct_input_from_smiles 2 | import torch 3 | from experiment.my_nn import EarlyStopping, set_random_seed, BERT_atom_embedding_generator 4 | import os 5 | import numpy as np 6 | 7 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 8 | set_random_seed() 9 | 10 | 11 | def bert_atom_embedding(smiles, pretrain_model='pretrain_k_bert_epoch_7.pth'): 12 | # fix parameters of model 13 | args = {} 14 | args['device'] = "cuda" if torch.cuda.is_available() else "cpu" 15 | args['metric_name'] = 'roc_auc' 16 | args['batch_size'] = 128 17 | args['num_epochs'] = 200 18 | args['d_model'] = 768 19 | args['n_layers'] = 6 20 | args['vocab_size'] = 47 21 | args['maxlen'] = 201 22 | args['d_k'] = 64 23 | args['d_v'] = 64 24 | args['d_ff'] = 768 * 4 25 | args['n_heads'] = 12 26 | args['global_labels_dim'] = 1 27 | args['atom_labels_dim'] = 15 28 | args['lr'] = 3e-5 29 | args['pretrain_layer'] = 6 30 | args['mode'] = 'higher' 31 | args['task_name'] = 'HIA' 32 | args['patience'] = 20 33 | args['times'] = 10 34 | args['pretrain_model'] = pretrain_model 35 | 36 | token_idx, global_label_list, atom_labels_list, atom_mask_list = construct_input_from_smiles(smiles) 37 | 38 | model = BERT_atom_embedding_generator(d_model=args['d_model'], n_layers=args['n_layers'], vocab_size=args['vocab_size'], 39 | maxlen=args['maxlen'], d_k=args['d_k'], d_v=args['d_v'], n_heads=args['n_heads'], d_ff=args['d_ff'], 40 | global_label_dim=args['global_labels_dim'], atom_label_dim=args['atom_labels_dim'], use_atom=False) 41 | stopper = EarlyStopping(pretrained_model=args['pretrain_model'], 42 | pretrain_layer=args['pretrain_layer'], 43 | mode=args['mode']) 44 | model.to(args['device']) 45 | stopper.load_pretrained_model(model) 46 | 47 | token_idx = torch.tensor([token_idx]).long().to(args['device']) 48 | atom_mask = atom_mask_list 49 | atom_mask_np = np.array(atom_mask) 50 | atom_mask_index = np.where(atom_mask_np == 1) 51 | h_global, h_atom = model(token_idx, atom_mask_index) 52 | h_global = h_global.cpu().squeeze().detach().numpy() 53 | h_atom = h_atom.cpu().squeeze().detach().numpy() 54 | return h_global, h_atom 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /experiment/k_bert_pretrain_chirality_R_S.py: -------------------------------------------------------------------------------- 1 | from experiment import build_data 2 | import torch 3 | from torch.optim import Adam 4 | from torch.utils.data import DataLoader 5 | from experiment.my_nn import collate_pretrain_data, EarlyStopping, run_a_contrastive_R_S_pretrain_epoch, \ 6 | set_random_seed, K_BERT 7 | import time 8 | set_random_seed() 9 | 10 | # define parameters of model 11 | args = {} 12 | args['device'] = "cuda" if torch.cuda.is_available() else "cpu" 13 | args['batch_size'] = 32 14 | args['num_epochs'] = 50 15 | args['d_model'] = 768 16 | args['n_layers'] = 6 17 | args['vocab_size'] = 47 18 | args['maxlen'] = 201 19 | args['d_k'] = 64 20 | args['d_v'] = 64 21 | args['d_ff'] = 768*4 22 | args['n_heads'] = 12 23 | args['global_labels_dim'] = 1 24 | args['atom_labels_dim'] = 15 25 | args['lr'] = 0.00003 26 | args['pretrain_layer'] = 5 27 | args['pretrain_model'] = 'pretrain_k_bert_epoch_7.pth' 28 | args['task_name'] = 'k_bert_chirality_R_S' 29 | args['pretrain_data_path'] = '../data/pretrain_data/chirality_pretrain_R_S_maccs' 30 | 31 | pretrain_set = build_data.load_data_for_contrastive_aug_pretrain( 32 | pretrain_data_path=args['pretrain_data_path']) 33 | print("Pretrain data generation is complete !") 34 | 35 | pretrain_loader = DataLoader(dataset=pretrain_set, 36 | batch_size=args['batch_size'], 37 | shuffle=True, 38 | collate_fn=collate_pretrain_data) 39 | 40 | loss_criterion_global = torch.nn.BCEWithLogitsLoss(reduction='none') 41 | loss_criterion_atom = torch.nn.BCEWithLogitsLoss(reduction='none') 42 | model = K_BERT(d_model=args['d_model'], n_layers=args['n_layers'], vocab_size=args['vocab_size'], 43 | maxlen=args['maxlen'], d_k=args['d_k'], d_v=args['d_v'], n_heads=args['n_heads'], d_ff=args['d_ff'], 44 | global_label_dim=args['global_labels_dim'], atom_label_dim=args['atom_labels_dim']) 45 | optimizer = Adam(model.parameters(), lr=args['lr']) 46 | stopper = EarlyStopping(pretrained_model=args['pretrain_model'], 47 | pretrain_layer=args['pretrain_layer'], 48 | task_name=args['task_name']) 49 | model.to(args['device']) 50 | stopper.load_pretrained_model(model) 51 | 52 | for epoch in range(args['num_epochs']): 53 | start = time.time() 54 | # Train 55 | run_a_contrastive_R_S_pretrain_epoch(args, epoch, model, pretrain_loader, loss_criterion_global=loss_criterion_global, 56 | loss_criterion_atom=loss_criterion_atom, optimizer=optimizer) 57 | # Validation and early stop 58 | stopper.pretrain_step(epoch, model) 59 | elapsed = (time.time() - start) 60 | m, s = divmod(elapsed, 60) 61 | h, m = divmod(m, 60) 62 | print("An epoch time used:", "{:d}:{:d}:{:d}".format(int(h), int(m), int(s))) 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /experiment/k_bert_pretrain_chirality.py: -------------------------------------------------------------------------------- 1 | from experiment import build_data 2 | import torch 3 | from torch.optim import Adam 4 | from torch.utils.data import DataLoader 5 | from experiment.my_nn import collate_pretrain_data, EarlyStopping, run_a_contrastive_pretrain_epoch, \ 6 | set_random_seed, K_BERT 7 | import time 8 | set_random_seed() 9 | 10 | # define parameters of model 11 | args = {} 12 | args['device'] = "cuda" if torch.cuda.is_available() else "cpu" 13 | args['batch_size'] = 32 14 | args['num_epochs'] = 50 15 | args['d_model'] = 768 16 | args['n_layers'] = 6 17 | args['vocab_size'] = 47 18 | args['maxlen'] = 201 19 | args['d_k'] = 64 20 | args['d_v'] = 64 21 | args['d_ff'] = 768*4 22 | args['n_heads'] = 12 23 | args['global_labels_dim'] = 154 24 | args['atom_labels_dim'] = 15 25 | args['pretrain_layer'] = 5 26 | args['lr'] = 0.00003 27 | args['pretrain_model'] = 'pretrain_k_bert_epoch_7.pth' 28 | args['task_name'] = 'k_bert_chirality' 29 | args['pretrain_data_path'] = '/apdcephfs/private_zhenxingwu/pretrain/data/BERT_maccs_data/chirality_pretrain_maccs' 30 | 31 | pretrain_set = build_data.load_data_for_contrastive_aug_pretrain( 32 | pretrain_data_path=args['pretrain_data_path']) 33 | print("Pretrain data generation is complete !") 34 | 35 | pretrain_loader = DataLoader(dataset=pretrain_set, 36 | batch_size=args['batch_size'], 37 | shuffle=True, 38 | collate_fn=collate_pretrain_data) 39 | 40 | loss_criterion_global = torch.nn.BCEWithLogitsLoss(reduction='none') 41 | loss_criterion_atom = torch.nn.BCEWithLogitsLoss(reduction='none') 42 | model = K_BERT(d_model=args['d_model'], n_layers=args['n_layers'], vocab_size=args['vocab_size'], 43 | maxlen=args['maxlen'], d_k=args['d_k'], d_v=args['d_v'], n_heads=args['n_heads'], d_ff=args['d_ff'], 44 | global_label_dim=args['global_labels_dim'], atom_label_dim=args['atom_labels_dim']) 45 | optimizer = Adam(model.parameters(), lr=args['lr']) 46 | stopper = EarlyStopping(pretrained_model=args['pretrain_model'], 47 | pretrain_layer=args['pretrain_layer'], 48 | task_name=args['task_name']) 49 | model.to(args['device']) 50 | stopper.load_pretrained_model(model) 51 | 52 | 53 | for epoch in range(args['num_epochs']): 54 | start = time.time() 55 | # Train 56 | run_a_contrastive_pretrain_epoch(args, epoch, model, pretrain_loader, loss_criterion_global=loss_criterion_global, 57 | loss_criterion_atom=loss_criterion_atom, optimizer=optimizer) 58 | # Validation and early stop 59 | stopper.pretrain_step(epoch, model) 60 | elapsed = (time.time() - start) 61 | m, s = divmod(elapsed, 60) 62 | h, m = divmod(m, 60) 63 | print("An epoch time used:", "{:d}:{:d}:{:d}".format(int(h), int(m), int(s))) 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /data/ADMETlab_data/CHEMBL_Final_random_selected_50.csv: -------------------------------------------------------------------------------- 1 | smiles 2 | CP(=S)(Nn1cnnc1)Oc1ccccc1 3 | OC12CC3CC(C1)CC(Nc1nc(N[C@H]4CC[C@H](O)CC4)ncc1C(F)(F)F)(C3)C2 4 | Cc1ccc(C(=O)N(c2c(C)n(C)n(-c3ccccc3)c2=O)C(C)C)cc1 5 | CCC[C@H](NC(=O)OCC(Cl)(Cl)Cl)C(=O)N[C@H](C)c1nc2ccc(F)cc2s1 6 | CC(=O)c1cc2c(C)c(C(=O)c3ccccc3)oc2cc1O 7 | CCn1c(=O)c2sccc2n(CC(=O)NCCC2=CCCCC2)c1=O 8 | C=CCc1ccc(OC)c(-c2cc(CC=C)ccc2O[C@@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)c1 9 | NS(=O)(=O)c1ccc(CCNc2ccn3nc(-c4ccc(OCCN5CCOCC5)cc4)cc3n2)cc1 10 | COc1ccc(CNS(=O)(=O)c2cc(-c3cc(C)no3)ccc2C)cc1OC 11 | COC1(OC)C[C@@H](C(=O)O)N(C(=O)[C@H](C)CS)C1 12 | CCOC(=O)c1c(C)[nH]c(C(=O)/C(C#N)=C/c2ccc(OCC)cc2)c1C 13 | CC(=O)CCC(=O)Oc1cc(Cl)ccc1Oc1ccc(Cl)cc1Cl 14 | Nc1cnc(-c2ccc(-c3ccccc3S(=O)(=O)N3CC(O)C3)cc2F)cn1 15 | Cn1ccnc1CN1CCC(Cn2cc(-c3ccncc3)nn2)CC1 16 | C=C(C)[C@@H](O)CC[C@@H](C)[C@@H]1CC[C@]2(C)C3=C(CC[C@@]12C)[C@@]1(C)CC[C@H](O)C(C)(C)[C@@H]1CC3=O 17 | N#C[C@H]1CC[C@@H](Nc2ncnc3c(=O)[nH]ccc23)C1 18 | CCCc1cnc(N2CCC(Oc3ccn(-c4ccc([S+](C)[O-])cc4)c(=O)c3)CC2)nc1 19 | c1ccc2[nH]c(-c3ccc(NC4=NCCNC4)cc3)nc2c1 20 | C[C@@H](n1cnc2ccc(F)cc2c1=O)[C@](O)(Cn1cncn1)c1ccc(F)cc1F 21 | O=[N+]([O-])c1ccccc1N/N=C/c1cc2ccccc2nc1Cl 22 | Cn1c(SCC(=O)N2CCCc3ccccc32)ncc(C(=O)Nc2ccc(F)cc2)c1=O 23 | Cc1ccc(C(=O)Nc2ccc3c(c2)N(C)C(=O)c2ccccc2S3)cc1 24 | CC[C@H](C)[C@@H]1O[C@@]2(C=CC1C)CC1C[C@@H](CC=C(C)C(O[C@H]3CC(OC)C(O[C@H]4CC(OC)C(O)C(C)O4)C(C)O3)C(C)C=CC=C3CO[C@@H]4C(O)C(C)=C[C@@H](C(=O)O1)[C@]34O)O2 25 | C/C(S)=N/c1c(C#N)c(-c2ccc(Br)cc2)cn1-c1ccc(S(N)(=O)=O)cc1 26 | CCCc1c(C(=O)OCC)c(C(=O)OCC)c2c(-c3ccc(F)cc3)cc(N3CCOCC3)nn12 27 | O=C(Nc1cc[nH]c(=O)c1)c1ccc(Cl)cc1Oc1ccc(C(F)(F)F)nc1 28 | O=C(COc1ccc(F)cc1F)N1CCCCCCC1 29 | O=C1N(c2ccccc2)c2nc(CN3CCCCC3)[nH]c(=O)c2C2C(N3CCCC3)CCCCN12 30 | CC1(C)[C@@H](OCc2cccc([N+](=O)[O-])c2)CC[C@@]2(C)[C@H]1CC[C@]1(C)[C@@H]2C(=O)C=C2[C@@H]3C[C@@](C)(C(=O)O)CC[C@]3(C)CC[C@]21C 31 | COC(=O)N[C@H](C(=O)N1CCC[C@H]1c1ncc(-c2ccc3c(c2)OC(c2cccc(C4CC4)c2)n2c-3cc3cc(-c4cnc([C@@H]5CCCN5C(=O)[C@@H](NC(=O)OC)C(C)C)[nH]4)ccc32)[nH]1)C(C)C 32 | COC(=O)/C=C/c1cccc(N(Cc2ccc(-c3ccc(SC)cc3)cc2)C(=O)C(C)C)c1 33 | NC[C@@H](CC(=O)O)c1ccc(Cl)cc1 34 | CN1CCN(CC(=O)Nc2ccc3ncnc(Nc4ccc(Cl)cc4)c3c2)CC1 35 | CC(C)n1ncnc1-n1cc2c(n1)-c1ccccc1OCC2 36 | Cc1cc(C)c(CN2C=[N+](Cc3c(C)cc(C)cc3C)C3CCCCC32)c(C)c1 37 | CC(C)n1nnc2cc(-c3nn[nH]n3)ccc21 38 | COc1cc(-c2nnc(SC3=CS(=O)(=O)c4ccccc43)o2)cc(OC)c1OC 39 | C[C@H](NC(=O)C1CC1)c1ccc(CN2Cc3ccc(OCC4CC4(F)F)cc3C(F)C2)cc1 40 | CCc1ccc2sc3c(=O)[nH]c(CN(C)C)nc3c2c1 41 | O=C(NCCc1cccc2ccccc12)C1CCC1 42 | N#CC(C#N)=CC1=C2OC(c3ccccc3)=CC(c3ccccc3)=C2CCC1 43 | N=C(N)c1cnc(OC2CCCC2)nc1N 44 | COc1ccc(-c2csc(-c3cc(C(=N)N)sc3SC)n2)cc1 45 | COc1ccc(N2CC(C(=O)NCCC3=CCCCC3)CC2=O)cc1 46 | CN(C)Cc1ccc(-c2nnc(Nc3cccnc3Oc3ccccc3C(C)(C)C)s2)cc1 47 | C=CC(C)(C)c1c(O)cc(O)c2c1O[C@]13[C@H]4C[C@H](C(=O)[C@]1(/C=C/C(C)(C)O)OC4(C)C)[C@H](OC)[C@H]3C2=O 48 | COc1ccc(CCN/C=C2/C=C(Br)C(=O)OC2=O)cc1 49 | CCc1c(-c2nnc(C3(c4ccc(Cl)cc4)CCC3)s2)nc(-c2ccc(Cl)cc2Cl)n1-c1ccc(Br)cc1 50 | O=C(Cn1cncn1)C12CC3CC(CC(C3)C1)C2 51 | Oc1ccc(-c2cn(-c3cc(O)cc(O)c3)nn2)cc1 52 | -------------------------------------------------------------------------------- /experiment/build_pretrain_selected_tasks.py: -------------------------------------------------------------------------------- 1 | from experiment.build_data import build_maccs_pretrain_data_and_save 2 | import multiprocessing 3 | import pandas as pd 4 | 5 | task_name = 'CHEMBL' 6 | if __name__ == "__main__": 7 | n_thread = 8 8 | data = pd.read_csv('../pretrain_data/'+task_name+'.csv') 9 | smiles_list = data['smiles'].values.tolist() 10 | # 避免内存不足,将数据集分为10份来计算 11 | for i in range(10): 12 | n_split = int(len(smiles_list)/10) 13 | smiles_split = smiles_list[i*n_split:(i+1)*n_split] 14 | 15 | n_mol = int(len(smiles_split)/8) 16 | 17 | # creating processes 18 | p1 = multiprocessing.Process(target=build_maccs_pretrain_data_and_save, args=(smiles_split[:n_mol], 19 | '../data/pretrain_data/'+task_name+'_maccs_'+str(i*8+1)+'.npy')) 20 | p2 = multiprocessing.Process(target=build_maccs_pretrain_data_and_save, args=(smiles_split[n_mol:2*n_mol], 21 | '../data/pretrain_data/'+task_name+'_maccs_'+str(i*8+2)+'.npy')) 22 | p3 = multiprocessing.Process(target=build_maccs_pretrain_data_and_save, args=(smiles_split[2*n_mol:3*n_mol], 23 | '../data/pretrain_data/'+task_name+'_maccs_'+str(i*8+3)+'.npy')) 24 | p4 = multiprocessing.Process(target=build_maccs_pretrain_data_and_save, args=(smiles_split[3*n_mol:4*n_mol], 25 | '../data/pretrain_data/'+task_name+'_maccs_'+str(i*8+4)+'.npy')) 26 | p5 = multiprocessing.Process(target=build_maccs_pretrain_data_and_save, args=(smiles_split[4*n_mol:5*n_mol], 27 | '../data/pretrain_data/'+task_name+'_maccs_'+str(i*8+5)+'.npy')) 28 | p6 = multiprocessing.Process(target=build_maccs_pretrain_data_and_save, args=(smiles_split[5*n_mol:6*n_mol], 29 | '../data/pretrain_data/'+task_name+'_maccs_'+str(i*8+6)+'.npy')) 30 | p7 = multiprocessing.Process(target=build_maccs_pretrain_data_and_save, args=(smiles_split[6*n_mol:7*n_mol], 31 | '../data/pretrain_data/'+task_name+'_maccs_'+str(i*8+7)+'.npy')) 32 | p8 = multiprocessing.Process(target=build_maccs_pretrain_data_and_save, args=(smiles_split[7*n_mol:], 33 | '../data/pretrain_data/'+task_name+'_maccs_'+str(i*8+8)+'.npy')) 34 | 35 | # starting my_scaffold_split 1&2 36 | p1.start() 37 | p2.start() 38 | p3.start() 39 | p4.start() 40 | p5.start() 41 | p6.start() 42 | p7.start() 43 | p8.start() 44 | 45 | # wait until my_scaffold_split 1&2 is finished 46 | p1.join() 47 | p2.join() 48 | p3.join() 49 | p4.join() 50 | p5.join() 51 | p6.join() 52 | p7.join() 53 | p8.join() 54 | 55 | 56 | # both processes finished 57 | print("Done!") 58 | 59 | -------------------------------------------------------------------------------- /experiment/build_contrastive_pretrain_selected_tasks.py: -------------------------------------------------------------------------------- 1 | from experiment.build_data import build_maccs_pretrain_contrastive_data_and_save 2 | import multiprocessing 3 | import pandas as pd 4 | 5 | task_name = 'CHEMBL' 6 | if __name__ == "__main__": 7 | n_thread = 8 8 | data = pd.read_csv('../data/pretrain_data/'+task_name+'_5_contrastive_aug.csv') 9 | smiles_name_list = ['smiles', 'aug_smiles_0', 'aug_smiles_1', 'aug_smiles_2', 'aug_smiles_3'] 10 | smiles_list = data[smiles_name_list].values.tolist() 11 | 12 | # 避免内存不足,将数据集分为10份来计算 13 | for i in range(10): 14 | n_split = int(len(smiles_list)/10) 15 | smiles_split = smiles_list[i*n_split:(i+1)*n_split] 16 | 17 | n_mol = int(len(smiles_split)/8) 18 | 19 | # creating processes 20 | p1 = multiprocessing.Process(target=build_maccs_pretrain_contrastive_data_and_save, args=(smiles_split[:n_mol], 21 | '../data/pretrain_data/'+task_name+'_maccs_contrastive_'+str(i*8+1)+'.npy')) 22 | p2 = multiprocessing.Process(target=build_maccs_pretrain_contrastive_data_and_save, args=(smiles_split[n_mol:2*n_mol], 23 | '../data/pretrain_data/'+task_name+'_maccs_contrastive_'+str(i*8+2)+'.npy')) 24 | p3 = multiprocessing.Process(target=build_maccs_pretrain_contrastive_data_and_save, args=(smiles_split[2*n_mol:3*n_mol], 25 | '../data/pretrain_data/'+task_name+'_maccs_contrastive_'+str(i*8+3)+'.npy')) 26 | p4 = multiprocessing.Process(target=build_maccs_pretrain_contrastive_data_and_save, args=(smiles_split[3*n_mol:4*n_mol], 27 | '../data/pretrain_data/'+task_name+'_maccs_contrastive_'+str(i*8+4)+'.npy')) 28 | p5 = multiprocessing.Process(target=build_maccs_pretrain_contrastive_data_and_save, args=(smiles_split[4*n_mol:5*n_mol], 29 | '../data/pretrain_data/'+task_name+'_maccs_contrastive_'+str(i*8+5)+'.npy')) 30 | p6 = multiprocessing.Process(target=build_maccs_pretrain_contrastive_data_and_save, args=(smiles_split[5*n_mol:6*n_mol], 31 | '../data/pretrain_data/'+task_name+'_maccs_contrastive_'+str(i*8+6)+'.npy')) 32 | p7 = multiprocessing.Process(target=build_maccs_pretrain_contrastive_data_and_save, args=(smiles_split[6*n_mol:7*n_mol], 33 | '../data/pretrain_data/'+task_name+'_maccs_contrastive_'+str(i*8+7)+'.npy')) 34 | p8 = multiprocessing.Process(target=build_maccs_pretrain_contrastive_data_and_save, args=(smiles_split[7*n_mol:], 35 | '../data/pretrain_data/'+task_name+'_maccs_contrastive_'+str(i*8+8)+'.npy')) 36 | 37 | # starting my_scaffold_split 1&2 38 | p1.start() 39 | p2.start() 40 | p3.start() 41 | p4.start() 42 | p5.start() 43 | p6.start() 44 | p7.start() 45 | p8.start() 46 | 47 | # wait until my_scaffold_split 1&2 is finished 48 | p1.join() 49 | p2.join() 50 | p3.join() 51 | p4.join() 52 | p5.join() 53 | p6.join() 54 | p7.join() 55 | p8.join() 56 | 57 | 58 | # both processes finished 59 | print("Done!") 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /experiment/K_BERT_WCL_pretrain.py: -------------------------------------------------------------------------------- 1 | from experiment import build_data 2 | import torch 3 | from torch.optim import Adam 4 | from torch.utils.data import DataLoader 5 | from experiment.my_nn import collate_pretrain_data, EarlyStopping, run_a_pretrain_epoch, \ 6 | set_random_seed, K_BERT_WCL 7 | import os 8 | import time 9 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 10 | set_random_seed() 11 | 12 | # define parameters of model 13 | args = {} 14 | args['device'] = "cuda" if torch.cuda.is_available() else "cpu" 15 | args['batch_size'] = 80 16 | args['num_epochs'] = 50 17 | args['d_model'] = 768 18 | args['n_layers'] = 6 19 | args['vocab_size'] = 47 20 | args['maxlen'] = 201 21 | args['d_k'] = 64 22 | args['d_v'] = 64 23 | args['d_ff'] = 768*4 24 | args['n_heads'] = 12 25 | args['global_labels_dim'] = 154 26 | args['atom_labels_dim'] = 15 27 | args['lr'] = 0.00003 28 | args['task_name'] = 'k_bert_wcl' 29 | args['pretrain_data_path'] = '../data/pretrain_data/CHEMBL_maccs' 30 | pretrain_set = build_data.load_data_for_pretrain( 31 | pretrain_data_path=args['pretrain_data_path']) 32 | print("Pretrain data generation is complete !") 33 | 34 | pretrain_loader = DataLoader(dataset=pretrain_set, 35 | batch_size=args['batch_size'], 36 | shuffle=True, 37 | collate_fn=collate_pretrain_data) 38 | 39 | global_pos_weight = torch.tensor([884.17, 70.71, 43.32, 118.73, 428.67, 829.0, 192.84, 67.89, 533.86, 18.46, 707.55, 160.14, 23.19, 26.33, 13.38, 12.45, 44.91, 173.58, 40.14, 67.25, 171.12, 8.84, 8.36, 43.63, 5.87, 10.2, 3.06, 161.72, 101.75, 20.01, 4.35, 12.62, 331.79, 31.17, 23.19, 5.91, 53.58, 15.73, 10.75, 6.84, 3.92, 6.52, 6.33, 6.74, 24.7, 2.67, 6.64, 5.4, 6.71, 6.51, 1.35, 24.07, 5.2, 0.74, 4.78, 6.1, 62.43, 6.1, 12.57, 9.44, 3.33, 5.71, 4.67, 0.98, 8.2, 1.28, 9.13, 1.1, 1.03, 2.46, 2.95, 0.74, 6.24, 0.96, 1.72, 2.25, 2.16, 2.87, 1.8, 1.62, 0.76, 1.78, 1.74, 1.08, 0.65, 0.97, 0.71, 5.08, 0.75, 0.85, 3.3, 4.79, 1.72, 0.78, 1.46, 1.8, 2.97, 2.18, 0.61, 0.61, 1.83, 1.19, 4.68, 3.08, 2.83, 0.51, 0.77, 6.31, 0.47, 0.29, 0.58, 2.76, 1.48, 0.25, 1.33, 0.69, 1.03, 0.97, 3.27, 1.31, 1.22, 0.85, 1.75, 1.02, 1.13, 0.16, 1.02, 2.2, 1.72, 2.9, 0.26, 0.69, 0.6, 0.23, 0.76, 0.73, 0.47, 1.13, 0.48, 0.53, 0.72, 0.38, 0.35, 0.48, 0.12, 0.52, 0.15, 0.28, 0.36, 0.08, 0.06, 0.03, 0.07, 0.01]) 40 | atom_pos_weight = torch.tensor([4.81, 1.0, 2.23, 53.49, 211.94, 0.49, 2.1, 1.13, 1.22, 1.93, 5.74, 15.42, 70.09, 61.47, 23.2]) 41 | loss_criterion_global = torch.nn.BCEWithLogitsLoss(reduction='none', pos_weight=global_pos_weight.to('cuda')) 42 | loss_criterion_atom = torch.nn.BCEWithLogitsLoss(reduction='none', pos_weight=atom_pos_weight.to('cuda')) 43 | model = K_BERT_WCL(d_model=args['d_model'], n_layers=args['n_layers'], vocab_size=args['vocab_size'], 44 | maxlen=args['maxlen'], d_k=args['d_k'], d_v=args['d_v'], n_heads=args['n_heads'], d_ff=args['d_ff'], 45 | global_label_dim=args['global_labels_dim'], atom_label_dim=args['atom_labels_dim']) 46 | optimizer = Adam(model.parameters(), lr=args['lr']) 47 | stopper = EarlyStopping(task_name=args['task_name']) 48 | model.to(args['device']) 49 | 50 | for epoch in range(args['num_epochs']): 51 | start = time.time() 52 | # Train 53 | run_a_pretrain_epoch(args, epoch, model, pretrain_loader, loss_criterion_global=loss_criterion_global, 54 | loss_criterion_atom=loss_criterion_atom, optimizer=optimizer) 55 | # Validation and early stop 56 | stopper.pretrain_step(epoch, model) 57 | elapsed = (time.time() - start) 58 | m, s = divmod(elapsed, 60) 59 | h, m = divmod(m, 60) 60 | print("An epoch time used:", "{:d}:{:d}:{:d}".format(int(h), int(m), int(s))) 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /experiment/K_BERT_pretrain.py: -------------------------------------------------------------------------------- 1 | from experiment import build_data 2 | import torch 3 | from torch.optim import Adam 4 | from torch.utils.data import DataLoader 5 | from experiment.my_nn import collate_pretrain_data, EarlyStopping, run_a_contrastive_pretrain_epoch, \ 6 | set_random_seed, K_BERT 7 | import os 8 | import time 9 | set_random_seed() 10 | 11 | # define parameters of model 12 | args = {} 13 | args['device'] = "cuda" if torch.cuda.is_available() else "cpu" 14 | args['batch_size'] = 32 15 | args['num_epochs'] = 50 16 | args['d_model'] = 768 17 | args['n_layers'] = 6 18 | args['vocab_size'] = 47 19 | args['maxlen'] = 201 20 | args['d_k'] = 64 21 | args['d_v'] = 64 22 | args['d_ff'] = 768*4 23 | args['n_heads'] = 12 24 | args['global_labels_dim'] = 154 25 | args['atom_labels_dim'] = 15 26 | args['lr'] = 0.00003 27 | args['task_name'] = 'k_bert' 28 | args['pretrain_data_path'] = '../data/pretrain_data/CHEMBL_maccs' 29 | 30 | pretrain_set = build_data.load_data_for_contrastive_aug_pretrain( 31 | pretrain_data_path=args['pretrain_data_path']) 32 | print("Pretrain data generation is complete !") 33 | 34 | pretrain_loader = DataLoader(dataset=pretrain_set, 35 | batch_size=args['batch_size'], 36 | shuffle=True, 37 | collate_fn=collate_pretrain_data) 38 | 39 | global_pos_weight = torch.tensor([884.17, 70.71, 43.32, 118.73, 428.67, 829.0, 192.84, 67.89, 533.86, 18.46, 707.55, 160.14, 23.19, 26.33, 13.38, 12.45, 44.91, 173.58, 40.14, 67.25, 171.12, 8.84, 8.36, 43.63, 5.87, 10.2, 3.06, 161.72, 101.75, 20.01, 4.35, 12.62, 331.79, 31.17, 23.19, 5.91, 53.58, 15.73, 10.75, 6.84, 3.92, 6.52, 6.33, 6.74, 24.7, 2.67, 6.64, 5.4, 6.71, 6.51, 1.35, 24.07, 5.2, 0.74, 4.78, 6.1, 62.43, 6.1, 12.57, 9.44, 3.33, 5.71, 4.67, 0.98, 8.2, 1.28, 9.13, 1.1, 1.03, 2.46, 2.95, 0.74, 6.24, 0.96, 1.72, 2.25, 2.16, 2.87, 1.8, 1.62, 0.76, 1.78, 1.74, 1.08, 0.65, 0.97, 0.71, 5.08, 0.75, 0.85, 3.3, 4.79, 1.72, 0.78, 1.46, 1.8, 2.97, 2.18, 0.61, 0.61, 1.83, 1.19, 4.68, 3.08, 2.83, 0.51, 0.77, 6.31, 0.47, 0.29, 0.58, 2.76, 1.48, 0.25, 1.33, 0.69, 1.03, 0.97, 3.27, 1.31, 1.22, 0.85, 1.75, 1.02, 1.13, 0.16, 1.02, 2.2, 1.72, 2.9, 0.26, 0.69, 0.6, 0.23, 0.76, 0.73, 0.47, 1.13, 0.48, 0.53, 0.72, 0.38, 0.35, 0.48, 0.12, 0.52, 0.15, 0.28, 0.36, 0.08, 0.06, 0.03, 0.07, 0.01]) 40 | global_pos_weight = torch.cat((global_pos_weight, global_pos_weight, global_pos_weight, global_pos_weight, global_pos_weight), 0) 41 | atom_pos_weight = torch.tensor([4.81, 1.0, 2.23, 53.49, 211.94, 0.49, 2.1, 1.13, 1.22, 1.93, 5.74, 15.42, 70.09, 61.47, 23.2]) 42 | loss_criterion_global = torch.nn.BCEWithLogitsLoss(reduction='none', pos_weight=global_pos_weight.to('cuda')) 43 | loss_criterion_atom = torch.nn.BCEWithLogitsLoss(reduction='none', pos_weight=atom_pos_weight.to('cuda')) 44 | model = K_BERT(d_model=args['d_model'], n_layers=args['n_layers'], vocab_size=args['vocab_size'], 45 | maxlen=args['maxlen'], d_k=args['d_k'], d_v=args['d_v'], n_heads=args['n_heads'], d_ff=args['d_ff'], 46 | global_label_dim=args['global_labels_dim'], atom_label_dim=args['atom_labels_dim']) 47 | optimizer = Adam(model.parameters(), lr=args['lr']) 48 | stopper = EarlyStopping(task_name=args['task_name']) 49 | model.to(args['device']) 50 | 51 | for epoch in range(args['num_epochs']): 52 | start = time.time() 53 | # Train 54 | run_a_contrastive_pretrain_epoch(args, epoch, model, pretrain_loader, loss_criterion_global=loss_criterion_global, 55 | loss_criterion_atom=loss_criterion_atom, optimizer=optimizer) 56 | # Validation and early stop 57 | stopper.pretrain_step(epoch, model) 58 | elapsed = (time.time() - start) 59 | m, s = divmod(elapsed, 60) 60 | h, m = divmod(m, 60) 61 | print("An epoch time used:", "{:d}:{:d}:{:d}".format(int(h), int(m), int(s))) 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /experiment/build_pretrain_chirality_R_S.py: -------------------------------------------------------------------------------- 1 | from experiment.build_data import build_pretrain_chirality_R_S_contrastive_data_and_save 2 | import multiprocessing 3 | import pandas as pd 4 | 5 | task_name = 'chirality_pretrain_R_S' 6 | if __name__ == "__main__": 7 | n_thread = 8 8 | data = pd.read_csv('../data/ADMETlab_scaffold_augmentation/'+task_name+'_5_consensus_aug.csv') 9 | smiles_name_list = ['smiles', 'aug_smiles_0', 'aug_smiles_1', 'aug_smiles_2', 'aug_smiles_3'] 10 | smiles_list = data[smiles_name_list].values.tolist() 11 | global_list = data['labels'].values.tolist() 12 | 13 | # 避免内存不足,将数据集分为10份来计算 14 | for i in range(10): 15 | n_split = int(len(smiles_list)/10) 16 | smiles_split = smiles_list[i*n_split:(i+1)*n_split] 17 | global_split = global_list[i*n_split:(i+1)*n_split] 18 | 19 | n_mol = int(len(smiles_split)/8) 20 | 21 | # creating processes 22 | p1 = multiprocessing.Process(target=build_pretrain_chirality_R_S_contrastive_data_and_save, args=(smiles_split[:n_mol], global_split[:n_mol], 23 | '../data/BERT_maccs_data/'+task_name+'_maccs_consensus_'+str(i*8+1)+'.npy')) 24 | p2 = multiprocessing.Process(target=build_pretrain_chirality_R_S_contrastive_data_and_save, args=(smiles_split[n_mol:2*n_mol], global_split[n_mol:2*n_mol], 25 | '../data/BERT_maccs_data/'+task_name+'_maccs_consensus_'+str(i*8+2)+'.npy')) 26 | p3 = multiprocessing.Process(target=build_pretrain_chirality_R_S_contrastive_data_and_save, args=(smiles_split[2*n_mol:3*n_mol], global_split[2*n_mol:3*n_mol], 27 | '../data/BERT_maccs_data/'+task_name+'_maccs_consensus_'+str(i*8+3)+'.npy')) 28 | p4 = multiprocessing.Process(target=build_pretrain_chirality_R_S_contrastive_data_and_save, args=(smiles_split[3*n_mol:4*n_mol], global_split[3*n_mol:4*n_mol], 29 | '../data/BERT_maccs_data/'+task_name+'_maccs_consensus_'+str(i*8+4)+'.npy')) 30 | p5 = multiprocessing.Process(target=build_pretrain_chirality_R_S_contrastive_data_and_save, args=(smiles_split[4*n_mol:5*n_mol], global_split[4*n_mol:5*n_mol], 31 | '../data/BERT_maccs_data/'+task_name+'_maccs_consensus_'+str(i*8+5)+'.npy')) 32 | p6 = multiprocessing.Process(target=build_pretrain_chirality_R_S_contrastive_data_and_save, args=(smiles_split[5*n_mol:6*n_mol], global_split[5*n_mol:6*n_mol], 33 | '../data/BERT_maccs_data/'+task_name+'_maccs_consensus_'+str(i*8+6)+'.npy')) 34 | p7 = multiprocessing.Process(target=build_pretrain_chirality_R_S_contrastive_data_and_save, args=(smiles_split[6*n_mol:7*n_mol], global_split[6*n_mol:7*n_mol], 35 | '../data/BERT_maccs_data/'+task_name+'_maccs_consensus_'+str(i*8+7)+'.npy')) 36 | p8 = multiprocessing.Process(target=build_pretrain_chirality_R_S_contrastive_data_and_save, args=(smiles_split[7*n_mol:], global_split[7*n_mol:], 37 | '../data/BERT_maccs_data/'+task_name+'_maccs_consensus_'+str(i*8+8)+'.npy')) 38 | 39 | # starting my_scaffold_split 1&2 40 | p1.start() 41 | p2.start() 42 | p3.start() 43 | p4.start() 44 | p5.start() 45 | p6.start() 46 | p7.start() 47 | p8.start() 48 | 49 | # wait until my_scaffold_split 1&2 is finished 50 | p1.join() 51 | p2.join() 52 | p3.join() 53 | p4.join() 54 | p5.join() 55 | p6.join() 56 | p7.join() 57 | p8.join() 58 | 59 | 60 | # both processes finished 61 | print("Done!") 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /experiment/downstream_task.py: -------------------------------------------------------------------------------- 1 | from experiment import build_data 2 | import torch 3 | from torch.optim import Adam 4 | from torch.utils.data import DataLoader 5 | from experiment.my_nn import collate_data, EarlyStopping, run_a_train_global_epoch, run_an_eval_global_epoch,\ 6 | set_random_seed, K_BERT_WCL, pos_weight 7 | import os 8 | import numpy as np 9 | import pandas as pd 10 | set_random_seed() 11 | 12 | 13 | # fix parameters of model 14 | args = {} 15 | args['device'] = "cuda" if torch.cuda.is_available() else "cpu" 16 | args['metric_name'] = 'roc_auc' 17 | args['batch_size'] = 128 18 | args['num_epochs'] = 200 19 | args['d_model'] = 768 20 | args['n_layers'] = 6 21 | args['vocab_size'] = 47 22 | args['maxlen'] = 201 23 | args['d_k'] = 64 24 | args['d_v'] = 64 25 | args['d_ff'] = 768 * 4 26 | args['n_heads'] = 12 27 | args['global_labels_dim'] = 1 28 | args['atom_labels_dim'] = 15 29 | args['lr'] = 3e-5 30 | args['pretrain_layer'] = 5 31 | args['mode'] = 'higher' 32 | args['patience'] = 20 33 | args['times'] = 10 34 | args['pretrain_model'] = 'pretrain_k_bert_wcl_epoch_7.pth' 35 | # args['pretrain_model'] = 'pretrain_k_bert_epoch_7.pth' 36 | 37 | args['task_name_list'] = ['Pgp-sub', 'HIA', 'F(20%)', 'F(30%)', 'FDAMDD', 'CYP1A2-sub', 'CYP2C19-sub', 'CYP2C9-sub', 38 | 'CYP2D6-sub', 'CYP3A4-sub', 'T12', 'DILI', 'SkinSen', 'Carcinogenicity', 'Respiratory'] 39 | 40 | for task in args['task_name_list']: 41 | args['task_name'] = task 42 | args['data_path'] = '../data/task_data/' + args['task_name'] + '.npy' 43 | 44 | all_times_train_result = [] 45 | all_times_val_result = [] 46 | all_times_test_result = [] 47 | result_pd = pd.DataFrame() 48 | result_pd['index'] = ['roc_auc', 'accuracy', 'sensitivity', 'specificity', 'f1-score', 'precision', 'recall', 49 | 'error rate', 'mcc'] 50 | 51 | for time_id in range(args['times']): 52 | set_random_seed(2020+time_id) 53 | train_set, val_set, test_set, task_number = build_data.load_data_for_random_splited( 54 | data_path=args['data_path'], shuffle=True 55 | ) 56 | print("Molecule graph is loaded!") 57 | train_loader = DataLoader(dataset=train_set, 58 | batch_size=args['batch_size'], 59 | shuffle=True, 60 | collate_fn=collate_data) 61 | 62 | val_loader = DataLoader(dataset=val_set, 63 | batch_size=args['batch_size'], 64 | collate_fn=collate_data) 65 | 66 | test_loader = DataLoader(dataset=test_set, 67 | batch_size=args['batch_size'], 68 | collate_fn=collate_data) 69 | pos_weight_task = pos_weight(train_set) 70 | one_time_train_result = [] 71 | one_time_val_result = [] 72 | one_time_test_result = [] 73 | print('***************************************************************************************************') 74 | print('{}, {}/{} time'.format(args['task_name'], time_id+1, args['times'])) 75 | print('***************************************************************************************************') 76 | 77 | loss_criterion = torch.nn.BCEWithLogitsLoss(reduction='none', pos_weight=pos_weight_task.to(args['device'])) 78 | model = K_BERT_WCL(d_model=args['d_model'], n_layers=args['n_layers'], vocab_size=args['vocab_size'], 79 | maxlen=args['maxlen'], d_k=args['d_k'], d_v=args['d_v'], n_heads=args['n_heads'], d_ff=args['d_ff'], 80 | global_label_dim=args['global_labels_dim'], atom_label_dim=args['atom_labels_dim']) 81 | stopper = EarlyStopping(patience=args['patience'], pretrained_model=args['pretrain_model'], 82 | pretrain_layer=args['pretrain_layer'], 83 | task_name=args['task_name']+'_downstream_k_bert_wcl', mode=args['mode']) 84 | model.to(args['device']) 85 | stopper.load_pretrained_model(model) 86 | optimizer = Adam(model.parameters(), lr=args['lr']) 87 | for epoch in range(args['num_epochs']): 88 | train_score = run_a_train_global_epoch(args, epoch, model, train_loader, loss_criterion, optimizer) 89 | # Validation and early stop 90 | _ = run_an_eval_global_epoch(args, model, train_loader)[0] 91 | val_score = run_an_eval_global_epoch(args, model, val_loader)[0] 92 | test_score = run_an_eval_global_epoch(args, model, test_loader)[0] 93 | if epoch < 5: 94 | early_stop = stopper.step(0, model) 95 | else: 96 | early_stop = stopper.step(val_score, model) 97 | print('epoch {:d}/{:d}, {}, lr: {:.6f}, train: {:.4f}, valid: {:.4f}, best valid {:.4f}, ' 98 | 'test: {:.4f}'.format( 99 | epoch + 1, args['num_epochs'], args['metric_name'], optimizer.param_groups[0]['lr'], train_score, val_score, 100 | stopper.best_score, test_score)) 101 | if early_stop: 102 | break 103 | stopper.load_checkpoint(model) 104 | train_score = run_an_eval_global_epoch(args, model, train_loader)[0] 105 | val_score = run_an_eval_global_epoch(args, model, val_loader)[0] 106 | test_score = run_an_eval_global_epoch(args, model, test_loader)[0] 107 | pred_name = 'prediction_' + str(time_id + 1) 108 | stop_test_list = run_an_eval_global_epoch(args, model, test_loader) 109 | stop_train_list = run_an_eval_global_epoch(args, model, train_loader) 110 | stop_val_list = run_an_eval_global_epoch(args, model, val_loader) 111 | result_pd['train_' + str(time_id + 1)] = stop_train_list 112 | result_pd['val_' + str(time_id + 1)] = stop_val_list 113 | result_pd['test_' + str(time_id + 1)] = stop_test_list 114 | print(result_pd[['index', 'train_' + str(time_id + 1), 'val_' + str(time_id + 1), 'test_' + str(time_id + 1)]]) 115 | print('********************************{}, {}_times_result*******************************'.format(args['task_name'], 116 | time_id + 1)) 117 | print("training_result:", round(train_score, 4)) 118 | print("val_result:", round(val_score, 4)) 119 | print("test_result:", round(test_score, 4)) 120 | 121 | one_time_train_result.append(train_score) 122 | one_time_val_result.append(val_score) 123 | one_time_test_result.append(test_score) 124 | # except: 125 | # task_number = task_number - 1 126 | all_times_train_result.append(round(np.array(one_time_train_result).mean(), 4)) 127 | all_times_val_result.append(round(np.array(one_time_val_result).mean(), 4)) 128 | all_times_test_result.append(round(np.array(one_time_test_result).mean(), 4)) 129 | # except: 130 | # print('{} times is failed!'.format(time_id+1)) 131 | print("************************************{}_times_result************************************".format( 132 | time_id + 1)) 133 | print('the train result of all tasks ({}): '.format(args['metric_name']), np.array(all_times_train_result)) 134 | print('the average train result of all tasks ({}): {:.3f}'.format(args['metric_name'], 135 | np.array(all_times_train_result).mean())) 136 | print('the train result of all tasks (std): {:.3f}'.format(np.array(all_times_train_result).std())) 137 | print('the train result of all tasks (var): {:.3f}'.format(np.array(all_times_train_result).var())) 138 | 139 | print('the val result of all tasks ({}): '.format(args['metric_name']), np.array(all_times_val_result)) 140 | print('the average val result of all tasks ({}): {:.3f}'.format(args['metric_name'], 141 | np.array(all_times_val_result).mean())) 142 | print('the val result of all tasks (std): {:.3f}'.format(np.array(all_times_val_result).std())) 143 | print('the val result of all tasks (var): {:.3f}'.format(np.array(all_times_val_result).var())) 144 | 145 | print('the test result of all tasks ({}):'.format(args['metric_name']), np.array(all_times_test_result)) 146 | print('the average test result of all tasks ({}): {:.3f}'.format(args['metric_name'], 147 | np.array(all_times_test_result).mean())) 148 | print('the test result of all tasks (std): {:.3f}'.format(np.array(all_times_test_result).std())) 149 | print('the test result of all tasks (var): {:.3f}'.format(np.array(all_times_test_result).var())) 150 | result_pd.to_csv('../result/maccs/' + args['task_name'] + '_K_BERT_WCL_result.csv', index=False) 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | -------------------------------------------------------------------------------- /data/ADMETlab_data/CYP2C19-sub_canonical.csv: -------------------------------------------------------------------------------- 1 | smiles,group,CYP2C19-sub 2 | CCC1(c2ccccc2)C(=O)NC(=O)NC1=O,training,1 3 | C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CC[C@@H]2O,training,1 4 | CN(C)CCC=C1c2ccccc2CCc2ccccc21,training,1 5 | COc1ccc(CCN(C)CCCC(C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC,training,1 6 | CN1CCC[C@H]1c1cccnc1,training,1 7 | CN1C(=O)NC(=O)C(C)(C2=CCCCC2)C1=O,training,1 8 | O=C1NC(=O)C(c2ccccc2)(c2ccccc2)N1,training,1 9 | CCOC(=O)C1(c2ccccc2)CCN(C)CC1,training,1 10 | CC(=O)[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,training,1 11 | C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2O,training,1 12 | Cc1c(N(C)C)c(=O)n(-c2ccccc2)n1C,training,1 13 | CN(C)CCCN1c2ccccc2Sc2ccccc21,training,1 14 | Cc1cc(=O)n(-c2ccccc2)n1C,training,1 15 | CNCCC=C1c2ccccc2CCc2ccccc21,training,1 16 | CCCC(C)C1(CC)C(=O)NC(=O)NC1=O,training,1 17 | CCC(=O)C(CC(C)N(C)C)(c1ccccc1)c1ccccc1,training,1 18 | CN1C(=O)CC(C)(c2ccccc2)C1=O,training,1 19 | CCCC(C)(COC(N)=O)COC(=O)NC(C)C,training,1 20 | Nc1ccc(S(=O)(=O)c2ccc(N)cc2)cc1,training,1 21 | CC(=O)CC(c1ccccc1)c1c(O)c2ccccc2oc1=O,training,1 22 | CN(C)CCC(c1ccc(Br)cc1)c1ccccn1,training,1 23 | CCCC(CCC)C(=O)O,training,1 24 | CCC1(c2ccccc2)C(=O)NC(=O)N(C)C1=O,training,1 25 | CCC1(c2ccccc2)C(=O)NCNC1=O,training,1 26 | COc1ccc2c(c1)[C@]13CCCC[C@@H]1[C@H](C2)N(C)CC3,training,1 27 | CN1C(=O)OC(C)(C)C1=O,training,1 28 | C=C[C@H]1CN2CC[C@H]1C[C@H]2[C@H](O)c1ccnc2ccc(OC)cc12,training,1 29 | CCC(=O)C1(c2cccc(O)c2)CCN(C)CC1,training,1 30 | CN(C)CCCOc1nn(Cc2ccccc2)c2ccccc12,training,1 31 | CN(C)CCC=C1c2ccccc2COc2ccccc21,training,1 32 | O=C1CN=C(c2ccccn2)c2cc(Br)ccc2N1,training,1 33 | CCCCc1oc2ccccc2c1C(=O)c1cc(I)c(OCCN(CC)CC)c(I)c1,training,1 34 | CCCCCc1cc(O)c2c(c1)OC(C)(C)[C@@H]1CCC(C)=C[C@@H]21,training,1 35 | O=P1(NCCCl)OCCCN1CCCl,training,1 36 | CN1CCN(C2=Nc3cc(Cl)ccc3Nc3ccccc32)CC1,training,1 37 | COc1ccc(Cl)cc1C(=O)NCCc1ccc(S(=O)(=O)NC(=O)NC2CCCCC2)cc1,training,1 38 | CC/C(=C(\c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1,training,1 39 | CO[C@H]1/C=C/O[C@@]2(C)Oc3c(C)c(O)c4c(O)c(c(/C=N/N5CCN(C)CC5)c(O)c4c3C2=O)NC(=O)/C(C)=C\C=C\[C@H](C)[C@H](O)[C@@H](C)[C@@H](O)[C@@H](C)[C@H](OC(C)=O)[C@@H]1C,training,1 40 | O=C(O)Cc1ccccc1Nc1c(Cl)cccc1Cl,training,1 41 | CN(C)CCOC1=Cc2ccccc2Sc2ccc(Cl)cc21,training,1 42 | CC1(C)S[C@@H]2[C@H](NC(=O)[C@H](N)c3ccc(O)cc3)C(=O)N2[C@H]1C(=O)O,training,1 43 | CC(C)(C)NCC(O)COc1nsnc1N1CCOCC1,training,1 44 | COc1cc([C@@H]2c3cc4c(cc3[C@@H](O[C@@H]3O[C@@H]5CO[C@@H](c6cccs6)O[C@H]5[C@H](O)[C@H]3O)[C@H]3COC(=O)[C@H]23)OCO4)cc(OC)c1O,training,1 45 | Cc1cn([C@H]2C[C@H](N=[N+]=[N-])[C@@H](CO)O2)c(=O)[nH]c1=O,training,1 46 | CCc1cc2c(s1)N(C)C(=O)CN=C2c1ccccc1Cl,training,1 47 | Fc1ccccc1C1=NCC(=S)N(CC(F)(F)F)c2ccc(Cl)cc21,training,1 48 | CN(C)Cc1nnc2n1-c1ccc(Cl)cc1C(c1ccccc1)=NC2,training,1 49 | CCCCN1CCCCC1C(=O)Nc1c(C)cccc1C,training,1 50 | CCc1cc2c(s1)-n1c(C)nnc1CN=C2c1ccccc1Cl,training,1 51 | COc1ccc(CC(C)NCC(O)c2ccc(O)c(NC=O)c2)cc1,training,1 52 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CCCC[C@@H]4[C@H]3C(=C)C[C@@]21CC,training,1 53 | CNCCC(Oc1ccc(C(F)(F)F)cc1)c1ccccc1,training,1 54 | O=C(C1CCCCC1)N1CC(=O)N2CCc3ccccc3C2C1,training,1 55 | CN1CCCC(n2nc(Cc3ccc(Cl)cc3)c3ccccc3c2=O)CC1,training,1 56 | CN(C)CCCC1(c2ccc(F)cc2)OCc2cc(C#N)ccc21,training,1 57 | CC1(C)NC(=O)N(c2ccc([N+](=O)[O-])c(C(F)(F)F)c2)C1=O,training,1 58 | NS(=O)(=O)Cc1noc2ccccc12,training,1 59 | O=C(NCCN1CCOCC1)c1ccc(Cl)cc1,training,1 60 | CO[C@H]1O[C@@H]2O[C@@]3(C)CC[C@H]4[C@H](C)CC[C@@H]([C@H]1C)[C@]42OO3,training,1 61 | COc1ccc2nc(S(=O)Cc3ncc(C)c(OC)c3C)[nH]c2c1,training,1 62 | COc1cc2nc(N3CCN(C(=O)C4COc5ccccc5O4)CC3)nc(N)c2cc1OC,training,1 63 | CN[C@H]1CC[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc21,training,1 64 | COc1cc(N)c(Cl)cc1C(=O)N[C@@H]1CCN(CCCOc2ccc(F)cc2)C[C@@H]1OC,training,1 65 | CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(OC)C[C@@H](C)C(=O)[C@H](C)[C@@H](O)[C@]1(C)O,training,1 66 | CNCC[C@@H](Oc1ccccc1C)c1ccccc1,training,1 67 | O=C1NC(c2ccccc2)(c2ccccc2)C(=O)N1COP(=O)(O)O,training,1 68 | CC(=O)N[C@@H]1[C@@H]([C@@H](O)[C@H](O)CO)O[C@@](Oc2ccc3c(C)cc(=O)oc3c2)(C(=O)O)C[C@H]1O,training,0 69 | CCCCCCCCCCCCCCCCCCCCCCO,training,0 70 | CN1c2c(oc(=O)n(-c3ccccn3)c2=O)-c2ccccc2S1(=O)=O,training,0 71 | CC(CN1c2ccccc2Sc2cccnc21)N(C)C,training,0 72 | CCOC(=O)C1(c2ccccc2)CCN(CCC(C#N)(c2ccccc2)c2ccccc2)CC1,training,0 73 | CCC(=O)[C@@]1(C)[C@H](C)C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@H]3[C@@H](O)C[C@@]21C,training,0 74 | Clc1ccc(CSC(Cn2ccnc2)c2ccc(Cl)cc2Cl)cc1,training,0 75 | CCCCCCCCCCCCCCCC(=O)OC[C@@H](COP(=O)(O)OCCN)OC(=O)CCCCCCCCCCCCCCC,training,0 76 | CC(N)=O,training,0 77 | Clc1ccc(C(OCCN2CCCCC2)c2ccccc2)cc1,training,0 78 | Cn1cnc(N)c2ncnc1-2,training,0 79 | CC1=C(C(=O)O)N2C(=O)[C@@H](NC(=O)[C@H](N)c3ccc(O)cc3)[C@H]2SC1,training,0 80 | O=c1ccn([C@H]2C[C@@H](O)[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O2)c(=O)[nH]1,training,0 81 | C[C@H](N)C(=O)O,training,0 82 | O=C(NC(=O)c1cc(F)c(F)cc1Cl)Nc1ccc(C(=O)O)cc1OC(F)(F)F,training,0 83 | C[N+](C)(C)CCO.[Cl-],training,0 84 | NCC1CCC(C(=O)O)CC1,training,0 85 | Cc1cc(N)nc(CCNC(=O)c2ccc(C#N)cc2)c1,training,0 86 | CCNC(=O)c1cn2ncnc(Nc3cc(C(=O)NOC)ccc3C)c2c1C,training,0 87 | O=C(O)C(CP(=O)(O)O)=C(Cl)Cl,training,0 88 | O=C(CO)[C@@H](O)[C@H](O)COP(=O)(O)O,training,0 89 | CCN(CC)CCOc1ccc(C(=C(Cl)c2ccccc2)c2ccccc2)cc1,training,0 90 | O=C([O-])[O-].O=C([O-])[O-].O=C([O-])[O-].[La+3].[La+3],training,0 91 | Cc1nc(-c2ccccn2)nc(NCCc2ccccc2)c1Cl,training,0 92 | CC(C)(C)Nc1nc(C(F)(F)F)nc2ccc(-c3cccc(N)c3)cc12,training,0 93 | NC(=O)c1ccc(Nc2nc(OCC3CCCCC3)c3nc[nH]c3n2)cc1,training,0 94 | O=C1CCO1,training,0 95 | C=CC(=O)Nc1cc(Nc2nccc(-c3cn(C)c4ccccc34)n2)c(OC)cc1N(C)CCN(C)C,training,0 96 | C[N+]1(CC(=O)c2ccc(-c3ccccc3)cc2)[C@H]2CC[C@@H]1CC(OC(=O)[C@H](CO)c1ccccc1)C2,training,0 97 | C[C@H](Cc1cc2c(c(C(N)=O)c1)N(CCCO)CC2)NCCOc1ccccc1OCC(F)(F)F,training,0 98 | O=C(O)C1=C[C@@H](O)[C@H](OS(=O)(=O)O)CO1,training,0 99 | O=C(O)CCc1nc2c(=O)[nH]c(=O)[nH]c2n(C[C@H](O)[C@@H](O)[C@H](O)CO)c1=O,training,0 100 | O=C1CN(S(=O)(=O)/C=C/c2ccc(Cl)s2)CCN1Cc1cc2cnccc2[nH]1,training,0 101 | O=C(O)c1ccc(Cc2ccccc2Cl)o1,training,0 102 | CCCCCC(=O)N[C@@H](CCS)C(=O)O,training,0 103 | O=CN(O)CCOc1cccc(C(=O)c2ccccc2)c1,training,0 104 | CC(C)Nc1cc(NC2CCC(N)CC2)nc2c(C#N)cnn12,training,0 105 | OC(c1ccccc1)(c1ccccc1)C12CC[N+](CCOCc3ccccc3)(CC1)CC2,training,0 106 | CC(=O)O[C@H](C(=O)Nc1cccc2c1C(=O)C(=O)NC2=O)c1cccc(Cl)c1,training,0 107 | CCCOc1ccnc2[nH]cc(-c3ccnc(N)n3)c12,training,0 108 | C[C@H]1CC(=O)NN=C1c1ccc(NC2=C(Cc3cccc(I)c3)C(=O)CCC2)cc1,training,0 109 | Nc1nc(=O)n([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@@H]2O)cc1Br,training,0 110 | N[C@H](COc1cncc2nc(-c3ccncc3)ccc12)Cc1c[nH]c2ccccc12,training,0 111 | CNS(=O)(=O)Nc1ccc(-c2ccccc2)n(CC(=O)N[C@H](C(=O)C(F)(F)F)C(C)C)c1=O,training,0 112 | COC(=O)[C@@H](C)c1ccc(-c2ccccc2)c(F)c1,training,0 113 | COc1ccc(F)c(F)c1C(=O)c1cnc(NC2CCN(S(C)(=O)=O)CC2)nc1N,training,0 114 | O=C(Nc1ccccc1)Nc1ccccc1,training,0 115 | N#Cc1ccc(C(c2ccc(C#N)cc2)n2cncn2)cc1,training,0 116 | O=C(N/N=C/c1cc(Br)c(O)c(Br)c1O)c1ccc(Cl)cc1,training,0 117 | O=C(O)CCc1c[nH]c2ccccc12,training,0 118 | Cc1cn[nH]c1,training,0 119 | CCC(=O)CCC(C)(C)O,training,0 120 | C=CCN(C)CCCCCCOc1ccc2c(-c3ccc(Br)cc3)coc2c1,training,0 121 | O=C(O)[C@@H](Cc1c[nH]c2ccccc12)NS(=O)(=O)c1ccc(N2CCC(c3ccccc3)CC2)cc1,training,0 122 | O=C(O)CCCc1cccs1,training,0 123 | COc1ccc(S(=O)(=O)N2CCc3cc(O)c(OC)cc3[C@@H]2C(=O)NO)cc1,training,0 124 | O=S(=O)(CCNCCCO)c1ccc(Nc2nc(OCC3CCCCC3)c3nc[nH]c3n2)cc1,training,0 125 | CC(C)(C)OC(=O)N[C@H](C=O)C1CCCCC1,training,0 126 | Cc1ccc(C(=O)c2ccc(CC(=O)O)n2C)cc1,training,0 127 | Clc1ccc2c(c1Cl)CNCCC2,training,0 128 | CN(C)C=O,training,0 129 | CC[C@H]1[C@@H]2C[C@H]3[C@@H]4N(C)c5ccccc5[C@]45C[C@@H](C2[C@H]5O)N3[C@@H]1O,training,0 130 | CN1CCN(c2ccc(C(=O)Nc3n[nH]c4cn(C(=O)Cc5cccs5)cc34)cc2)CC1,training,0 131 | Cc1cc(C)cc(Oc2c(CSCc3ccco3)c(C)[nH]c(=O)c2I)c1,training,0 132 | CC(CCc1ccccc1)NCC(O)c1ccc(O)c(C(N)=O)c1,training,0 133 | Oc1ccc([C@H]2Sc3cc(O)ccc3O[C@@H]2c2ccc(OCCN3CCCCC3)cc2)cc1,training,0 134 | CC(C)(C)c1cc(C(C)(C)C)c(NC(=O)c2c[nH]c3ccccc3c2=O)cc1O,training,0 135 | CN1CCc2cccc3c2[C@H]1Cc1ccc(O)c(O)c1-3,training,0 136 | CC(=O)N1N=C(c2ccccc2Cl)C[C@H]1c1cccc(O)c1,training,0 137 | CNCC(=O)c1nnc(C(C)(C)C)o1,training,0 138 | C[C@@H]1CCN(C(=O)CC#N)C[C@@H]1N(C)c1ncnc2[nH]ccc12,training,0 139 | Cc1ncc(COP(=O)(O)O)c(CNCCCC[C@@H](N)C(=O)O)c1O,training,0 140 | N[C@H](C(=O)O)c1ccc(O[C@H]2O[C@@H](CO)[C@H](O)[C@@H](O)[C@H]2O[C@H]2O[C@@H](CO)[C@H](O)[C@@H](O)[C@@H]2O)cc1,training,0 141 | NC(=O)c1cnccn1,training,0 142 | CCC1(CC)C(=O)NCC(C)C1=O,training,0 143 | O=[N+]([O-])c1ccc(OP(=O)(O)Oc2ccc([N+](=O)[O-])cc2)cc1,training,0 144 | OC[C@@H]1O[C@H](O[C@@H]2[C@@H](O)C[C@@H](O)O[C@@H]2CO)[C@@H](O)[C@H](O)[C@H]1O,training,0 145 | CC(=O)O[C@H]1C[C@@H]2CC[C@@H]3[C@H](CC[C@@]4(C)[C@H]3C[C@H]([N+]3(C)CCCCC3)[C@@H]4OC(C)=O)[C@@]2(C)C[C@@H]1N1CCCCC1,training,0 146 | CCN(CC)CC#CCOC(=O)C(O)(c1ccccc1)C1CCCCC1,training,0 147 | Cc1cccc2c(=O)[nH]c(CCCN3CC=C(c4ccc(F)cc4)CC3)nc12,training,0 148 | O=C(NO)[C@@H](O)[C@@H](O)COP(=O)(O)O,training,0 149 | C[C@@H](CC(=O)O)[C@H](N)C(=O)O,training,0 150 | O=C(O)c1sccc1S(=O)(=O)Nc1ccc(Cl)cc1,training,0 151 | c1ccc(C2CC2)c(OCC2=NCCN2)c1,training,0 152 | NC(=O)c1ncn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c1N,training,0 153 | CC(SC(=O)c1cccs1)C(=O)NCC(=O)O,training,0 154 | C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1,training,0 155 | NS(=O)(=O)c1ccc(C(=O)NCc2ccc(F)cc2F)cc1,training,0 156 | c1ccc2c(CCc3ccncc3)c[nH]c2c1,training,0 157 | Nc1nc(N)nc(-c2cc3ccccc3cc2Br)n1,training,0 158 | NC(=O)C1=CN([C@@H]2O[C@@H](COP(=O)([O-])OP(=O)([O-])OC[C@@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H]([O-])[C@H]3[O-])[C@H](O)[C@H]2O)C=C[C@H]1n1cc2c(=S)n([C@@H](C(=O)N3CCCCCC3)c3ccccc3)cnc2n1,training,0 159 | CC=CC1=C(C(=O)O)N2C(=O)[C@@H](NC(=O)[C@H](N)c3ccc(O)cc3)[C@H]2SC1,training,0 160 | NC(=O)C1=C[N+]=CC([C@@H]2O[C@@H](COP(=O)([O-])OP(=O)(O)OC[C@@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@H]3O)[C@H](O)[C@H]2O)=C1,training,0 161 | O=C(O)C[C@H](N=C(O)CP(=O)(O)O)C(=O)O,training,0 162 | O=C(O)c1ccccc1/N=N/c1ccc(O)c2ccccc12,training,0 163 | CCC(C)n1ncn(-c2ccc(N3CCN(c4ccc(OC[C@H]5CO[C@](Cn6cncn6)(c6ccc(Cl)cc6Cl)O5)cc4)CC3)cc2)c1=O,training,0 164 | Cc1ncc(C[n+]2csc(CCO)c2C)c(N)n1,training,0 165 | CC(C)(COP(=O)(O)OP(=O)(O)OC[C@@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCC[C@H](F)C(=O)O,training,0 166 | Nc1cccc(-c2cnn3ccc(Nc4cccc(Cl)c4)nc23)c1,training,0 167 | O=c1cc[nH]c(=O)[nH]1,training,0 168 | COCCOc1cc(C)nc(N)n1,training,0 169 | Oc1ccc(C2=Cc3ccc(O)cc3OC2)cc1,training,0 170 | O=P1(N(CCCl)CCCl)NCCCO1,training,1 171 | CSc1ccc2c(c1)N(CCC1CCCCN1C)c1ccccc1S2,training,1 172 | CCCC(C)(COC(N)=O)COC(N)=O,training,1 173 | COc1ccc2[nH]cc(CCNC(C)=O)c2c1,training,1 174 | CN(C)CCOC(c1ccc(Cl)cc1)c1ccccn1,training,1 175 | Cc1cnc(C(=O)NCCc2ccc(S(=O)(=O)NC(=O)NC3CCCCC3)cc2)cn1,training,1 176 | CN/C(=C\[N+](=O)[O-])NCCSCc1ccc(CN(C)C)o1,training,1 177 | NC(N)=Nc1nc(CSCCC(N)=NS(N)(=O)=O)cs1,training,1 178 | Cc1ccc(-c2nc3ccc(C)cn3c2CC(=O)N(C)C)cc1,training,1 179 | COc1ccc(-c2oc3ncnc(NCCO)c3c2-c2ccc(OC)cc2)cc1,training,0 180 | CN(C)c1cccc2c(S(=O)(=O)N[C@H](Cc3ccc(OS(=O)(=O)c4cccc5c(N(C)C)cccc45)cc3)C(=O)O)cccc12,training,0 181 | Nc1nc(=O)c2sc(=O)n([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O)c2[nH]1,training,0 182 | CC12C=CC(=O)C=C1CCC1C2C(O)CC2(C)C1CCC2(O)C(=O)COC(=O)c1cccc(S(=O)(=O)[O-])c1.[Na+],training,0 183 | C[C@]12CC[C@H]3[C@@H](CC=C4C[C@@H](O)CC[C@@]43C)[C@@H]1CC[C@@H]2O,training,0 184 | C[C@H](O)[C@H](CCc1cccc2nc(-c3ccc(Cl)cc3)oc12)n1cnc(C(N)=O)c1,training,0 185 | NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,training,0 186 | C[C@]12CC[C@H]3[C@@H](CC[C@H]4C[C@@H](O)CC[C@@]43C)[C@@H]1C[C@@H](Br)C2=O,training,0 187 | O=CN1CCCCC1,training,0 188 | Cc1cccc(Nc2ccccc2C(=O)O)c1C,training,0 189 | CCN[C@@H]1CN(CCOC)S(=O)(=O)c2sc(S(N)(=O)=O)cc21,training,0 190 | C=CC(C)(C)OC[C@@H]1O[C@H](O[C@H]2C3=C([C@H](C)COC(C)=O)C[C@@H](O)[C@]3(C)C=C3[C@H](COC)CC[C@@H]3[C@@H](C)[C@@H]2O)[C@@H](O)[C@H](OC(C)=O)[C@H]1O,training,0 191 | CCC1(c2ccccc2)NC(=O)N(C)C1=O,training,1 192 | O=C1CCC(N2C(=O)c3ccccc3C2=O)C(=O)N1,training,1 193 | O=C(CCCN1CCC(O)(c2ccc(Cl)cc2)CC1)c1ccc(F)cc1,training,1 194 | COc1ccc2c(c1)c(CC(=O)O)c(C)n2C(=O)c1ccc(Cl)cc1,training,1 195 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCc4cc(O)ccc4[C@H]3CC[C@@]21C,training,1 196 | OCCN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1,training,1 197 | CN1[C@H]2CC[C@@H]1CC(OC(c1ccccc1)c1ccccc1)C2,training,1 198 | CN1C(=O)CC(=O)N(c2ccccc2)c2cc(Cl)ccc21,training,1 199 | COc1ccc(C(CN(C)C)C2(O)CCCCC2)cc1,training,1 200 | N=C(N)c1ccc(C=Cc2ccc(C(=N)N)cc2O)cc1.O=S(=O)(O)CCO.O=S(=O)(O)CCO,training,0 201 | O=C1CC[C@@]2(O)[C@H]3Cc4ccc(O)c5c4[C@@]2(CCN3CC2CC2)[C@H]1O5,training,0 202 | COc1ccc(N2CCN(c3cccc(C)c3)CC2)nn1,training,0 203 | O=C(N[C@@H]1Cc2ccccc2[C@H]1O)c1cc2cc(Cl)sc2[nH]1,training,0 204 | O=S1(=O)NCCCNc2nc(ncc2Br)Nc2cccc1c2,training,0 205 | N=C(O)CN1CCCC1=O,training,0 206 | O=C(O)[C@@H](CF)OP(=O)(O)O,training,0 207 | NC(=O)c1c[n+]([C@@H]2O[C@@H](COP(=O)(O)OP(=O)(O)OC[C@@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@H]3O)[C@H](O)[C@H]2O)ccc1[C@H](O)C(=O)C[C@H](N)C(=O)O,training,0 208 | CC(C)(C)c1snc([O-])c1C[C@H]([N+])C(=O)O,training,0 209 | Cc1ccc(-n2nccn2)c(C(=O)N2CCN(c3nc4cc(Cl)ccc4o3)CC[C@H]2C)c1,training,0 210 | CCCc1cc(=O)[nH]c(=S)[nH]1,training,0 211 | CCC[C@H]1COc2ccsc2C(=N)N1,training,0 212 | CN[C@@H]1C[C@H](C)S(=O)(=O)c2sc(S(N)(=O)=O)cc21,training,0 213 | NNCCc1ccccc1,test,1 214 | O=C(N[C@H](CO)[C@H](O)c1ccc([N+](=O)[O-])cc1)C(Cl)Cl,test,1 215 | CCOc1ccc(NC(C)=O)cc1,test,1 216 | COc1cc(CNC(=O)CCCC/C=C/C(C)C)ccc1O,test,1 217 | CC(C)/N=C(\N)N=C(N)Nc1ccc(Cl)cc1,test,1 218 | C#CCN(C)C(C)Cc1ccccc1,test,1 219 | CC(C)Cc1ccc(C(C)C(=O)O)cc1,test,1 220 | COC(=O)[C@@H](NC(=O)[C@@H](NC(=O)CC[C@H](O)[C@H](Cc1ccccc1)NC(=O)[C@@H](C)NC(=O)[C@H](C)N)C(C)C)C(C)C,test,0 221 | COc1ccc(CC(N)=O)cc1,test,0 222 | NC(N)=NCc1cccc(I)c1,test,0 223 | COc1ccc(S(=O)(=O)N[C@H](C)C(=O)O)cc1,test,0 224 | N[C@@H](CSCCC(=O)c1ccc(Cl)c(Cl)c1)C(=O)O,test,0 225 | O[C@H](CCCl)c1ccccc1,test,0 226 | N#C[C@@H](O)c1ccccc1,test,0 227 | CC(C)c1cccc(C(C)C)c1O,test,1 228 | N[C@H]1C(O)O[C@H](CO)[C@@H](O)[C@@H]1O,test,1 229 | CC(=O)N[C@H]1[C@H](O)[C@@H](F)[C@H](C(=O)O)O[C@H]1[C@H](O)[C@@H](O)CO,test,0 230 | N[C@H]1[C@H](O)O[C@H](CO)[C@@H](OP(=O)(O)O)[C@@H]1O,test,0 231 | CC(=O)N[C@@H](Cc1cccc2ccccc12)[B-](O)(O)O,test,0 232 | CNCCCN1c2ccccc2CCc2ccccc21,test,1 233 | CN(C)CCCN1c2ccccc2CCc2ccccc21,test,1 234 | CN1C(=O)C(O)N=C(c2ccccc2)c2cc(Cl)ccc21,test,1 235 | CN1C(=O)CN=C(c2ccccc2F)c2cc([N+](=O)[O-])ccc21,test,1 236 | CCCCNC(=O)NS(=O)(=O)c1ccc(C)cc1,val,1 237 | CCCNC(=O)NS(=O)(=O)c1ccc(Cl)cc1,val,1 238 | CC(C)C(=O)Nc1ccc([N+](=O)[O-])c(C(F)(F)F)c1,val,1 239 | CN(C)c1ccc(C(=O)O)cc1,val,0 240 | CC(=O)Nc1ccc(C(=O)O)cc1N,val,0 241 | COCCCCC(=NOCCN)c1ccc(C(F)(F)F)cc1,val,0 242 | CN(C)C(=O)Oc1cccc([N+](C)(C)C)c1,val,0 243 | COc1cccc(C=O)c1OP(=O)(O)O,val,0 244 | COC[C@@H](O)[C@H](CC(C)C)NC(=O)[C@H](CC(C)C)NC(=O)OCc1ccccc1,val,0 245 | COc1ccc(/C=C/C(=O)O)cc1O,val,0 246 | CC(=O)C(N)Cc1ccccc1,val,0 247 | NCCc1ccc(O)c(O)c1,val,1 248 | O=P(O)(O)OCCNS(=O)(=O)c1ccc(OC(F)(F)F)cc1,val,0 249 | COCCc1ccc(OCC(O)CNC(C)C)cc1,val,1 250 | O=C(O)/C=C/c1ccc(B(O)O)cc1,val,0 251 | CC(=O)N[C@H]1[C@H](O)O[C@H](CO)[C@@H](OS(=O)(=O)O)[C@@H]1O,val,0 252 | OC1OC[C@H](O)[C@@H](O)[C@@H]1O,val,0 253 | CC(C)NCC(O)COc1cccc2ccccc12,val,1 254 | CN(C/C=C/C#CC(C)(C)C)Cc1cccc2ccccc12,val,1 255 | CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21,val,1 256 | CC(CN(C)C)CN1c2ccccc2CCc2ccccc21,val,1 257 | O=C1Nc2ccc(Cl)cc2C(c2ccccc2Cl)=NC1O,val,0 258 | CN1C(=O)CN=C(c2ccccc2F)c2cc(Cl)ccc21,val,0 259 | CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,val,1 260 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 12 | 13 | 15 | 16 | 17 | 18 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 60 | 61 | 62 | 82 | 83 | 84 | 104 | 105 | 106 | 126 | 127 | 128 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 1632899824522 165 | 172 | 173 | 174 | 175 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | -------------------------------------------------------------------------------- /data/ADMETlab_data/SkinSen_canonical.csv: -------------------------------------------------------------------------------- 1 | smiles,group,SkinSen 2 | CC(=O)OCC(=O)C1(O)C(C)CC2C3CCC4=CC(=O)C=CC4(C)C3=CCC21C,training,0 3 | CCCOc1ccc(Br)c(C(=O)c2ccc(OC)cc2O)c1,training,0 4 | CC=C(C)C=O,training,0 5 | O=C1C(=P(c2ccccc2)(c2ccccc2)c2ccccc2)CCN1c1ccccc1,training,0 6 | C=CC1(C)CC(OC(=O)CSC2CC3CCC(C2)N3C)C2(C)C(C)CCC3(CCC(=O)C32)C(C)C1O,training,0 7 | CCCCc1ccc(C(=O)CC(=O)c2cc(C)c(C)c(C)c2C)cc1,training,0 8 | NN1C=CC=CN1,training,0 9 | CCOc1ccc(C(=O)Cc2ccc(S(C)(=O)=O)cc2)cc1,training,0 10 | CC(C)(C)OC(=O)N1CC(F)CC1C(=O)O,training,0 11 | Cc1nc(COc2ccc(CC(NC(=O)OC(C)(C)C)C3CO3)cc2)cs1,training,0 12 | CC(C)(C)OC(=O)NC(C(=O)N1CC(F)CC1C#N)C(c1ccc(F)cc1)c1ccc(F)cc1,training,0 13 | O=c1[nH]c(=S)[nH]c2c1CCC2,training,0 14 | Cc1ccc(C)c(C(=O)CC(=O)c2cc(C)ccc2C)c1,training,0 15 | CC1C(c2ccccc2)NC(=O)N1C,training,0 16 | CCCOc1ccc(C=CC(=O)N2C(=O)N(C)C(C)C2c2ccccc2)c(C(=O)c2ccc(OC)cc2OCc2ccccc2)c1,training,0 17 | COc1ccc(C2(C#N)CCC(=O)CC2)cc1OC1CCCC1,training,0 18 | CC(=O)c1ccc(Cc2ccc(F)cc2)o1,training,0 19 | CCOc1ccc(-c2nn3ncccc3c2-c2ccc(S(C)(=O)=O)cc2)cc1,training,0 20 | CC(=O)CCCN1C(=O)c2ccccc2C1=O,training,0 21 | Cc1nc(COc2ccc(CC(NC(=O)OC3COC4OCCC34)C(O)CN(CC(C)C)S(=O)(=O)c3ccc4c(c3)OCO4)cc2)cs1,training,0 22 | CC(C)N(C(=O)Cn1c(=O)c(=NOCc2ccccc2)c(=O)n(-c2ccccc2)c2ccccc21)c1ccccc1,training,0 23 | Cc1c2ccc(N)cc2nn1C,training,0 24 | COc1cc(C)c2c(Oc3cccc(C(F)(F)F)c3)c(OC)cc(N)c2n1,training,0 25 | CC(=O)C1CCCCC1=O,training,0 26 | CS(=O)(=O)CCN,training,0 27 | C=C(C)C(=O)OCC(C)O,training,0 28 | CCC(C)C1C(=O)NC(C2Cc3ccccc3C2)C(=O)N1C(C(=O)N1CCOCC1)c1coc(C)n1,training,0 29 | O=C(O)c1c(O)c(-c2ccccc2)nc2ccccc12,training,0 30 | CC1C=CC(C(C)C)CC1,training,0 31 | O=Cc1ccc(-c2ccc(C(F)(F)F)cc2)cc1,training,0 32 | COc1ccc(C2(C#N)CCC(C(=O)O)CC2)cc1OC1CCCC1,training,0 33 | NC(=O)C1CC(F)CN1,training,0 34 | C=C1CC(C(=C)C)CCC1C,training,0 35 | C=C1CCC(C(C)C)CC1,training,0 36 | Cc1ccc(Nc2nccc(N(C)c3ccc4c(C)n(C)nc4c3)n2)cc1S(N)(=O)=O,training,0 37 | COc1cc(C)c2c(Cl)c(OC)ccc2n1,training,0 38 | Cc1cc2c(cc1C(F)(F)F)NCC2,training,0 39 | C=CC1(C)CC(OC(=O)CO)C2(C)C(C)CCC3(CCC(=O)C32)C(C)C1O,training,0 40 | Cc1ccc2oc(=O)ccc2c1,training,0 41 | O=C(O)CCCCC(=O)O,training,0 42 | CCNc1nc(Cl)nc(NC(C)C)n1,training,0 43 | CCCCCCCCCBr,training,0 44 | C=C1CC=C(C(C)C)CC1,training,0 45 | CCCCO,training,0 46 | CCCCCCCCCCl,training,0 47 | CCC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(OC)CC(C)C(=O)C(C)C(O)C1(C)O,training,0 48 | O=c1ccc2ccccc2o1,training,0 49 | Nc1cc[nH]c(=O)n1,training,0 50 | COC(=O)CCC(C#N)(CCC(=O)OC)c1ccc(OC)c(OC2CCCC2)c1,training,0 51 | COC(=O)OC,training,0 52 | CCOC(=O)C(=NOC(C)(C)C(=O)OC(C)(C)C)c1csc(NC(c2ccccc2)(c2ccccc2)c2ccccc2)n1,training,0 53 | CCOC(=O)c1nnc[nH]1,training,0 54 | CCOC(=O)CC(=O)c1cc(F)c(Cl)nc1Cl,training,0 55 | O=C(C(=O)c1ccco1)c1ccco1,training,0 56 | OCC(O)CO,training,0 57 | CCCCCC,training,0 58 | CCCCCCI,training,0 59 | CC(C)O,training,0 60 | CC(C)N(C#N)C#N,training,0 61 | NCC1OC(OC2C(N)CC(N)C(OC3OC(CO)C(O)C(N)C3O)C2O)C(O)C(O)C1O,training,0 62 | CC(O)C(=O)O,training,0 63 | Cc1c2ccc(Nc3ccnc(Cl)n3)cc2nn1C,training,0 64 | Cc1c2ccc(N(C)c3ccnc(Cl)n3)cc2nn1C,training,0 65 | CCCCN1CCC(CNC(=O)c2c3n(c4ccccc24)CCCO3)CC1,training,0 66 | CC(C)(C)OC(=O)NC(C(=O)O)C(c1ccc(F)cc1)c1ccc(F)cc1,training,0 67 | CCCCCCCC(=O)O,training,0 68 | CCOc1cc(Oc2ccc(C(F)(F)F)cc2Cl)ccc1[N+](=O)[O-],training,0 69 | C=CC(=O)OCC(CO)(COC(=O)C=C)COC(=O)C=C,training,0 70 | Cc1cc(F)ccc1C1CC(=O)C=CN1C(=O)OCc1ccccc1,training,0 71 | Fc1ccc(Oc2ccnc3cc(Cl)cc(Cl)c23)cc1,training,0 72 | COC1C=COC2(C)Oc3c(C)c(O)c4c(O)c(cc(O)c4c3C2=O)NC(=O)C(C)=CC=CC(C)C(O)C(C)C(O)C(C)C(OC(C)=O)C1C,training,0 73 | O=C1NS(=O)(=O)c2ccccc21,training,0 74 | O=C(O)CCC(=O)O,training,0 75 | O=C(O)C(O)C(O)C(=O)O,training,0 76 | C=CC(=O)OCC(CC)(COC(=O)C=C)COC(=O)C=C,training,0 77 | C=C(Cl)Cl,training,0 78 | CCCCCCCC=CC=O,training,1 79 | O=c1[nH]sc2ccccc12,training,1 80 | N#CCCC(Br)(C#N)CBr,training,1 81 | C=CC(=O)N1C(=O)N(C)C(C)C1c1ccccc1,training,1 82 | C(CCCOCC1CO1)CCOCC1CO1,training,1 83 | OCCCCCCCCCCCCBr,training,1 84 | O=C1C=CC2(O)C3Cc4ccc(O)c5c4C2(CCN3)C1O5,training,1 85 | CCCCCCCCCCCCCCCCCCCl,training,1 86 | CCCCCCCCCCI,training,1 87 | CN(N=O)C(=N)N[N+](=O)[O-],training,1 88 | Oc1cccc2ccccc12,training,1 89 | CCCC=CC=O,training,1 90 | CC(C)(C)N(CC(=O)c1ccc(O)c(CO)c1)Cc1ccccc1,training,1 91 | C=CC(=O)OCCO,training,1 92 | Sc1nc2ccccc2s1,training,1 93 | CCCCCCCCCC(C)C=O,training,1 94 | CC(CC(=O)Cl)CC(C)(C)C,training,1 95 | CC1(C)CC(CBr)C(=O)O1,training,1 96 | CN(C)CCCN,training,1 97 | Nc1cc(F)cc(-c2cccnc2)c1,training,1 98 | Cn1sc(Cl)cc1=O,training,1 99 | CC(=O)C(=O)CC(C)C,training,1 100 | C=CCC1(C)C=C(OC)C(=O)CC1,training,1 101 | CCN(CC)CCCCCCO,training,1 102 | CCC=CCCCCC=O,training,1 103 | CC(C)(c1ccc(OCC2CO2)cc1)c1ccc(OCC2CO2)cc1,training,1 104 | C=C1C=CC(C(C)C)CC1,training,1 105 | CCCCCCCCCCCCCCCCCCCCCCBr,training,1 106 | CCCCCCCCCCCCCCCCCCCCBr,training,1 107 | CCCCCCCCCCCCCCCCCBr,training,1 108 | CCCCCCCCCCCCCCCCBr,training,1 109 | CCCCCCBr,training,1 110 | CCCCCCCCCCCCCCCCCCBr,training,1 111 | CCCCCCCCCCCCCCCBr,training,1 112 | CCCCCCCCCCCCCCBr,training,1 113 | CCCCCCCCCCCCCBr,training,1 114 | C=CC(=O)OCCCC,training,1 115 | CCCCOCC1CO1,training,1 116 | CCCCCCCCCCCC1=NC(C)(C)C(=O)O1,training,1 117 | CCCCCCCCCCCCCCCl,training,1 118 | CC(C)=CCCC(C)=CC=O,training,1 119 | CCCCCCCCCCCCCCCCCl,training,1 120 | CCOS(=O)(=O)OCC,training,1 121 | CCC(C=O)CC,training,1 122 | NCCNCCN,training,1 123 | NCCCNCCCN,training,1 124 | CCCCCCCCCCCCOS(C)(=O)=O,training,1 125 | CCOC(=O)CC1CC2CCC(C1)N2C,training,1 126 | C=CC(=O)OCC,training,1 127 | CCC(O)C(CO)CO,training,1 128 | C=C(C)C(=O)OCCOC(=O)C(=C)C,training,1 129 | C=C(C)C(=O)O,training,1 130 | CC(CC=O)CCCC(C)(C)O,training,1 131 | CCCCCCCCCCCCI,training,1 132 | CCCCCCCCCCCCCCCCI,training,1 133 | CC(C)CCCCCC(=O)Cl,training,1 134 | CCCCCCCCCCCCCC(=O)OC(C)C,training,1 135 | CCCCCC=CCC=CCCCCCCCC(=O)O,training,1 136 | CCCCCCC#CC(=O)OC,training,1 137 | C=CC(=O)OC,training,1 138 | CCCCCCCCCCCCCCC=CS(=O)(=O)OC,training,1 139 | C=C(C)C(=O)OC,training,1 140 | CCCCCCCCC(=O)Cl,training,1 141 | CCCCCCCCO,training,1 142 | CCCCCCCCC=CCCCCCCCC(=O)O,training,1 143 | CCOC=C1N=C(c2ccccc2)OC1=O,training,1 144 | CCCCCCCCCCCCCCCC(=O)Cl,training,1 145 | O=C(Oc1ccccc1)c1ccccc1,training,1 146 | CC(CCCCN)C(C)(C)N,training,1 147 | C=CCCCCCCCCC(=O)O,training,1 148 | CC(C)N(C(=O)CN1C(=O)C(NC(=O)Nc2cccc(C(=O)OC(C)(C)C)c2)C(=O)N(c2ccccc2)c2ccccc21)c1ccccc1,training,1 149 | NC1CCCCC1N,training,1 150 | O=S(=O)(Cl)c1ccc2c(c1)OCO2,training,1 151 | ClCc1ccc2ccc3cccc4ccc1c2c34,training,1 152 | Cc1cc(=O)n(-c2ccc(C)c(C)c2)[nH]1,training,1 153 | CCC(C)(C)C1CCC(CC=O)CC1,training,1 154 | CC(C)(C)C(=O)CC(=O)C(C)(C)C,training,1 155 | C=C(C)C(=O)OCC(O)COc1ccc(C(C)(C)c2ccc(OCC(O)COC(=O)C(=C)C)cc2)cc1,training,1 156 | Clc1ccnc(Cl)n1,training,1 157 | CCC=CC=CC=O,training,1 158 | COc1cc(C)c2c(Oc3cccc(C(F)(F)F)c3)c(OC)cc([N+](=O)[O-])c2n1,training,1 159 | Nc1ccccc1Nc1ccccc1,training,1 160 | CCCCCCCCCCCCC(Br)C(=O)O,training,1 161 | O=[N+]([O-])c1ccc(OCc2cccc(F)c2)c(Cl)c1,training,1 162 | Cc1nc2ccccc2c(=O)o1,training,1 163 | O=C(Nc1ccc(Cl)c(Cl)c1)c1cc(Cl)cc(Cl)c1O,training,1 164 | O=C1CCc2ccccc2O1,training,1 165 | CC1(C)CC(N)CC(C)(CN)C1,training,1 166 | C=C1NS(=O)(=O)N=C1c1ccccc1,training,1 167 | CC(C)(O)CCCC1=CCC(C=O)CC1,training,1 168 | O=C(C(=O)c1ccccc1)C1=CCC(Br)(Br)C=C1,training,1 169 | CC(Br)CCCN1C(=O)c2ccccc2C1=O,training,1 170 | CC(I)CCCN1C(=O)c2ccccc2C1=O,training,1 171 | C=C1CC(C(=C)C)CC=C1C,training,1 172 | C=C1CC(C)(C)OC1=O,training,1 173 | O=C(O)c1cccc(-c2cc(Cl)cc([N+](=O)[O-])c2O)c1,training,1 174 | COc1cc(C)c2c(Cl)c(OC)cc([N+](=O)[O-])c2n1,training,1 175 | COc1cc([N+](=O)[O-])c2[nH]c(=O)cc(C)c2c1Cl,training,1 176 | COc1cc2c(cc1C(F)(F)F)NCC2,training,1 177 | Cc1ncccc1Oc1ccc(N)cn1,training,1 178 | CCN(CC)CCCCCCBr,training,1 179 | Cc1c2ccccc2c(C)c2c1ccc1ccccc12,training,1 180 | CON=C1CN(c2nc3c(cc2F)c(=O)c(C(=O)O)cn3C2CC2)CC1CN,training,1 181 | CCCCCCCC(Br)CCCCCC,training,1 182 | CC(C)C1=CC2=CCC3C(C)(C(=O)O)CCCC3(C)C2CC1,training,1 183 | CC1=CCC(C(C)C)C=C1,training,1 184 | CC1=CC=C(C(C)C)CC1,training,1 185 | c1ccc2c(c1)cc1ccc3cccc4ccc2c1c34,training,1 186 | O=C1C=CC(=O)C=C1,training,1 187 | O=C1CCO1,training,1 188 | CCCCCCCCCCCCCCCC1=NC(C)(C)C(=O)O1,training,1 189 | CCCCCCC1=NC(C)(C)C(=O)O1,training,1 190 | CCCCCCCCCC1=NC(C)(C)C(=O)O1,training,1 191 | C=C(C)C1CCC(C)=C(N=O)C1,training,1 192 | Clc1ccccc1C(c1ccccc1)(c1ccccc1)n1ccnc1,training,1 193 | C(=NC1CCCCC1)=NC1CCCCC1,training,1 194 | CCOC(=O)C=CC(=O)OCC,training,1 195 | COS(=O)(=O)OC,training,1 196 | CS(C)=O,training,1 197 | CN1C2CCC1CC(OS(C)(=O)=O)C2,training,1 198 | NCCN,training,1 199 | CC(C)=CCCC(C)=CCCC(C)=CC=O,training,1 200 | CC(C)=CCCC(C)=CCO,training,1 201 | O=CCCCC=O,training,1 202 | O=C(CS)OCC(O)CO,training,1 203 | NCCNCCO,training,1 204 | O=C(NCNC(=O)NC1C(=O)NC(=O)N1CO)NC1C(=O)NC(=O)N1CO,training,1 205 | CCCCCCCCCCCCOS(=O)(=O)O,training,1 206 | CCC=CCC=CCC=CCCCCCCCC(=O)O,training,1 207 | CCCCCCCCCCCCS(=O)(=O)OC,training,1 208 | COS(C)(=O)=O,training,1 209 | COC(=O)C(C)=O,training,1 210 | CCCCCCCCCCCCCCCCC(CS(=O)(=O)O)C(=O)OC,training,1 211 | CCN(CC)CCN(Cc1ccc(-c2ccc(C(F)(F)F)cc2)cc1)C(=O)Cn1c(SCc2ccc(F)cc2)nc(=O)c2c1CCC2,training,1 212 | CCN(N=O)C(N)=O,training,1 213 | CC(C)N(C(=O)CNc1ccccc1Nc1ccccc1)c1ccccc1,training,1 214 | CN(N=O)C(N)=O,training,1 215 | COC1=CC=C2C3Cc4ccc(O)c5c4C2(CCN3C)C1O5,training,1 216 | O=C(O)C(=O)O,training,1 217 | O=c1[nH]cco1,training,1 218 | CC1(C)SC2C(NC(=O)Cc3ccccc3)C(=O)N2C1C(=O)O,training,1 219 | C=C(C)C1CC=C(C=O)CC1,training,1 220 | CC(C)(C)c1ccc(OCC2CO2)cc1,training,1 221 | c1ccncc1,training,1 222 | C=C(C)C1CC=C(C)C(=O)C1,training,1 223 | O=C(Oc1ccc([N+](=O)[O-])cc1)OC1COC2OCCC12,training,1 224 | CCOC(=S)S,training,1 225 | CCCCCCCCCCCC(=O)OC(C)C(=O)OC(C)C(=O)O,training,1 226 | CC(C)=CCCC(C)=CCCC(C)=CCCC=C(C)CCC=C(C)CCC=C(C)C,training,1 227 | CN(C)C(=S)SSC(=S)N(C)C,training,1 228 | C=CCCCCCCCCC=O,training,1 229 | C=Cc1ccncc1,training,1 230 | C[N+](C)(C)c1ccc2c(c1)C(=NNc1ccc(N)c([N+](=O)[O-])c1)C(=O)C=C2,training,0 231 | CCCCCCCCC(=O)O,training,0 232 | CNC1C(OC2C(OC3C(O)C(O)C(NC(=N)N)C(O)C3NC(=N)N)OC(C)C2(O)C=O)OC(CO)C(O)C1O,training,0 233 | Cc1cc(=O)n(-c2ccccc2)[nH]1,training,1 234 | Nc1ccc(N)c2c1C(=O)c1ccccc1C2=O,training,1 235 | O=C1C=CC(=O)O1,training,1 236 | O=C1OC(=O)c2ccccc21,training,1 237 | C=C(C=CCC(C)C)CC,training,0 238 | CC(C)(CC1Cc2ccccc2C1)NCC(O)COc1cc(CCC(=O)O)cc(F)c1F,training,0 239 | CCCOc1ccc2c(c1)C(O)(c1ccc(OC)cc1OCc1ccccc1)C(C(=O)N1C(=O)N(C)C(C)C1c1ccccc1)C2c1ccc2c(c1)OCO2,training,0 240 | CC(C)N(C(=O)CN1C(=O)C(N)C(=O)N(c2ccccc2)c2ccccc21)c1ccccc1,training,0 241 | CCCCN1C(=O)C(C(O)C2CCCCC2)NC(=O)C12CCN(Cc1ccc(Oc3ccc(C(=O)O)cc3)cc1)CC2,training,0 242 | OCCCCCCCl,training,0 243 | COc1cc(N)c2nccc(C)c2c1,training,0 244 | COc1ccc(Nc2ccc(CCNCC(O)c3ccc(O)c4[nH]c(=O)ccc34)cc2)cc1-c1ccccc1,training,0 245 | CCCCCCCCCCCCCCCCCCI,training,0 246 | O=CNc1cc(C(O)CBr)ccc1OCc1ccccc1,training,0 247 | CC(O)CO,training,0 248 | CN(C)CCOC(C)(c1ccccc1)c1ccc(Br)cc1,training,1 249 | C=CC(=O)OCC(CC)CCCC,training,1 250 | O=C(OCc1ccccc1)c1ccccc1,training,1 251 | CCCCCCCCCCCBr,training,1 252 | CCCCC1=NC(C)(C)C(=O)O1,training,1 253 | CCCCCCCCCI,training,1 254 | CCCCCCCCCCCCCCI,training,1 255 | C=C(C)C1CC=C(C)CC1,training,1 256 | C=CC(C)(O)CCC=C(C)C=O,training,1 257 | O=C(O)CCCCCCCCCCCBr,training,1 258 | Clc1ncnc2ccc(I)cc12,training,1 259 | CCC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(O)CC(C)CN(C)C(C)C(O)C1(C)O,training,1 260 | O=CC=O,training,1 261 | CCCCCCCCC=CCCCCCCCCOS(C)(=O)=O,training,1 262 | O=c1c(-c2ccccc2)c1-c1ccccc1,training,1 263 | O=c1c(O)c(O)c1=O,training,1 264 | CC(C)(C)OC(=O)N1CC(F)CC1C(N)=O,training,0 265 | N#CC1CC(F)CN1C(=O)C(N)C(c1ccc(F)cc1)c1ccc(F)cc1,training,0 266 | CC(C)(C)N(Cc1ccccc1)CC(O)c1ccc(O)c(CO)c1,training,0 267 | COc1cc(C)c2c(Oc3cccc(C(F)(F)F)c3)c(OC)cc(NC(C)CCCN3C(=O)c4ccccc4C3=O)c2n1,training,0 268 | COc1ccc2nc(Cl)cc(C)c2c1,training,0 269 | O=c1[nH]cnc2ccc(I)cc12,training,0 270 | COc1ccc2[nH]c(=O)cc(C)c2c1,training,0 271 | CCCCBr,training,0 272 | O=C(O)C=CC(=O)O,training,0 273 | O=C(O)c1ccc2c(c1)C(=O)OC2=O,training,1 274 | O=CC=C(c1ccccc1)c1ccccc1,training,1 275 | CCCCCCCCCCCCBr,training,1 276 | O=C1OC2(c3ccc(O)cc3Oc3cc(O)ccc32)c2ccc(N=C=S)cc21,training,1 277 | FOC1=C2CCC(C2)C1,training,1 278 | CC=CC=CC=O,training,1 279 | CN1C2CCC1CC(O)C2,training,1 280 | Clc1nc(Cl)nc(Cl)n1,training,1 281 | CC1=C(C=O)C(C)(C)CC=C1,training,1 282 | CCC=C1OC(=O)c2ccccc21,training,1 283 | CCCCCn1c(=O)[nH]c(=O)c2[nH]c(Cl)nc21,training,1 284 | N=C1CC(=Nc2ccc(N)cc2)C(=N)CC1=Nc1ccc(N)cc1,training,1 285 | CC(=O)C(C)=O,training,1 286 | CCCCCCCCCCCCCCCCCC1=NC(C)(C)C(=O)O1,training,1 287 | C=CC(=O)OCC(COC(=O)C=C)C(CC)OC(=O)C=C,training,1 288 | CCCCNC(=O)OC#CCI,training,1 289 | CCCCCCCCCCCCCCCCS(=O)(=O)OC,training,1 290 | O=C1OC(=O)C2CCCCC12,training,1 291 | O=C(OOC(=O)c1ccccc1)c1ccccc1,training,1 292 | COc1cc(C(=O)CC(=O)C(C)(C)C)cc(OC)c1OC,test,0 293 | CNC(C)c1cc(C(F)(F)F)cc(C(F)(F)F)c1,test,0 294 | O=C(O)C(=NOCc1ccccc1)C(=O)O,test,0 295 | CCCOc1cccc(C(=O)O)c1,test,0 296 | Cc1ccc(N)cc1S(N)(=O)=O,test,0 297 | CCOC(=O)c1ccc(N)cc1,test,0 298 | CCOC(=O)c1ccccc1C(=O)OCC,test,0 299 | CCOC(=O)CC(=O)c1ccccc1,test,0 300 | COC(=O)c1ccccc1O,test,0 301 | Nc1ccc(S(=O)(=O)O)cc1,test,0 302 | CC(C)(C)OC(=O)c1cccc(N)c1,test,0 303 | COc1cc(C=O)ccc1O,test,0 304 | CCCCCC(=O)CC(=O)c1ccccc1,test,1 305 | CCc1ccc(CC)c(C(=O)CC(C)=O)c1,test,1 306 | O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1,test,1 307 | Nc1ccc(OCCO)c(N)c1,test,1 308 | O=Cc1cc(O)ccc1Br,test,1 309 | COc1cc(C)ccc1O,test,1 310 | Nc1ccc(Cl)c(Cl)c1,test,1 311 | C=CCc1cc(C)c(OC)c(O)c1,test,1 312 | C=CCc1ccc(OC)cc1,test,1 313 | Nc1ccc(N)cc1,test,1 314 | CCCCCC(C=O)=Cc1ccccc1,test,1 315 | CCCCC(C=O)=Cc1ccccc1,test,1 316 | CC(=O)C=Cc1ccccc1,test,1 317 | CCCc1ccc(O)c(OC)c1,test,1 318 | CC=Cc1ccc(O)c(OC)c1,test,1 319 | CCCCCCCCCCCCOC(=O)c1cc(O)c(O)c(O)c1,test,1 320 | Oc1cccc(O)c1,test,1 321 | COc1ccc(C=O)cc1OC,test,1 322 | O=Cc1cc(O)ccc1C=C(CC(=O)O)C(=O)O,test,1 323 | CC(=O)C(=O)c1ccccc1,test,1 324 | Cc1cc(N)ccc1N,test,1 325 | Nc1cc([N+](=O)[O-])cc(Cl)c1O,test,1 326 | Cc1ccc(NCCO)cc1O,test,1 327 | CC=Cc1ccc(O)c(OC)c1C,test,1 328 | CCN(CCNS(C)(=O)=O)c1ccc(N)c(C)c1,test,1 329 | NCc1cccc(CN)c1,test,1 330 | CC(C)N(C(=N)N)C(=N)Nc1ccc(Cl)c(Cl)c1,test,1 331 | Oc1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl,test,1 332 | CCCN(CCC)c1c([N+](=O)[O-])cc(C(F)(F)F)cc1[N+](=O)[O-],test,1 333 | CCOC(=O)C(C)(C)Oc1ccc(Cl)cc1,test,0 334 | Cc1cc(O)ccc1N,test,1 335 | O=C(O)Cc1ccc(C(=O)O)cc1,test,1 336 | Nc1ccc(N(CO)CCO)cc1[N+](=O)[O-],test,1 337 | C=CCc1cc(OC)c(O)cc1C,test,1 338 | OCC=Cc1ccccc1,test,1 339 | O=CC=Cc1ccccc1,test,1 340 | CC(C=O)Cc1ccc(C(C)C)cc1,test,1 341 | C=CCc1ccc(O)c(OC)c1,test,1 342 | O=[N+]([O-])c1ccc(CBr)cc1,test,1 343 | Cc1ccc(CCC=O)cc1,test,1 344 | COc1ccc(C(C)=O)cc1,test,0 345 | CCOC(=O)C(Cc1ccc(O)cc1)NC(=O)OC(C)(C)C,test,0 346 | CC=Cc1cc(C)c(O)c(OC)c1,test,1 347 | O=[N+]([O-])c1ccc(S(=O)(=O)O)c([N+](=O)[O-])c1,test,1 348 | Nc1cccc(N)c1,test,1 349 | CCOCc1cc(OC)c(B(O)O)c(OC)c1,val,0 350 | CCC(N)c1ccccc1,val,0 351 | CCCOc1ccc(Br)c(C(=O)O)c1,val,0 352 | NS(=O)(=O)c1cccc(CCCCOCCCCCCBr)c1,val,0 353 | COc1ccc(C=O)cc1O,val,0 354 | O=C(O)c1ccc(O)cc1,val,0 355 | Nc1ccccc1C(=O)O,val,0 356 | O=Cc1ccccc1,val,0 357 | Clc1ccccc1,val,0 358 | C=CCc1ccc(O)c(OC(C)C)c1,val,0 359 | CCC(=O)c1cccc(Cl)c1,val,0 360 | CCCOC(=O)c1ccc(O)cc1,val,0 361 | Nc1ccc(S(N)(=O)=O)cc1,val,0 362 | CC=Cc1ccc(OC)cc1,val,1 363 | Nc1ccc(NCCO)c([N+](=O)[O-])c1,val,1 364 | O=C(CC(=O)C(F)(F)F)c1ccccc1,val,1 365 | CNc1ccc(O)cc1,val,1 366 | Cc1ccc(N)cc1O,val,1 367 | C=CCc1cc(C)c(O)c(OC)c1,val,1 368 | CCCCCCC(C=O)=Cc1ccccc1,val,1 369 | CC(C=O)=Cc1ccccc1,val,1 370 | CC(C=O)c1ccccc1,val,1 371 | Nc1ccccc1,val,1 372 | BrCc1ccccc1,val,1 373 | CC=CC(=O)Oc1c(C(C)CCCCCC)cc([N+](=O)[O-])cc1[N+](=O)[O-],val,1 374 | CC=Cc1ccc(O)c(OC(C)C)c1,val,1 375 | CNc1cccc(NC)c1,val,1 376 | COc1ccc(NC(=O)CC(C)=O)cc1,val,1 377 | CC(=O)CC(=O)c1cc(C)c(C)c(C)c1C,val,1 378 | CCC(=O)C=Cc1ccc(OC)cc1,val,1 379 | Oc1ccc(O)cc1,val,1 380 | CC(=O)C(C)C(=O)c1ccccc1,val,1 381 | Nc1cccc(O)c1,val,1 382 | O=C(Cl)c1ccc(F)c(Cl)c1,val,1 383 | CCOC(=O)CC(=O)c1cc(C)c(C)c(C)c1C,val,1 384 | CCOC(=O)c1ccc(CBr)cc1,val,1 385 | N#Cc1c(Cl)c(Cl)c(Cl)c(C#N)c1Cl,val,1 386 | CCCc1ccc(OC)c(O)c1,val,1 387 | CCOC(=O)c1ccc(I)cc1,val,1 388 | O=CCc1ccccc1,val,1 389 | CC(C=O)Cc1ccc(C(C)(C)C)cc1,val,1 390 | CC(CC(=O)Oc1ccc(S(=O)(=O)O)cc1)CC(C)(C)C,val,1 391 | CN(C)c1ccc(N=O)cc1,val,1 392 | CCCOC(=O)c1cc(O)c(O)c(O)c1,val,1 393 | CCOc1cc(C=O)ccc1O,val,0 394 | COC(=O)c1ccc(O)cc1,val,0 395 | CC(=O)CC(=O)c1cc(C)ccc1C,val,1 396 | Nc1ccccc1O,val,1 397 | Nc1ccc(N)c([N+](=O)[O-])c1,val,1 398 | CCCCCCCCC(=O)NCCC(CCC(=O)O)c1ccccc1S(=O)(=O)O,val,1 399 | COc1cc(CC#N)c([N+](=O)[O-])cc1C(F)(F)F,val,0 400 | O=C(O)c1ccccc1O,val,0 401 | N#CSc1ccc(N)c([N+](=O)[O-])c1,val,1 402 | CC(C)CC=C(C=O)c1ccccc1,val,1 403 | CCCCCCCCCCCCCC[N+](C)(C)Cc1ccccc1,val,1 404 | CC=Cc1ccc(OC(C)C)c(OC)c1,val,1 405 | COC(=O)c1ccc(CBr)cc1,val,1 406 | Cc1cccc(O)c1O,val,1 407 | -------------------------------------------------------------------------------- /data/ADMETlab_data/CYP1A2-sub_canonical.csv: -------------------------------------------------------------------------------- 1 | smiles,group,CYP1A2-sub 2 | C=C(NC)C(=O)O,training,0 3 | O=P(O)(O)O[C@@H]1C(O)[C@H](OP(=O)(O)O)[C@@H](O)C(O)[C@H]1O,training,0 4 | C[C@@H](C(=O)N1CCOCC1)N1CC[C@H](NS(=O)(=O)c2n[nH]c(-c3ccc(Cl)s3)n2)C1=O,training,0 5 | O=C(Nc1ccccn1)Nc1cccc2c1[C@H]1CCCN1C2=O,training,0 6 | C1CCN(C2CCNCC2)CC1,training,0 7 | c1ccc([C@@H]2CO2)cc1,training,0 8 | CC(C)Nc1cccnc1N1CCN(C(=O)c2cc3cc(NS(C)(=O)=O)ccc3[nH]2)CC1,training,0 9 | Cc1cccc(C)c1OCC(=O)N[C@@H](Cc1ccccc1)[C@@H](O)C[C@H](CC(C)C)NC(=O)c1ccc(N)cc1,training,0 10 | CC(C)C[C@H](N)P(=O)(O)O,training,0 11 | OCCOCCOCCO,training,0 12 | CN1CC[C@]23c4c5ccc(O)c4O[C@H]2CCC[C@H]3[C@H]1C5,training,0 13 | CSCC[C@@H](NC(=O)CCC(=O)O)C(=O)O,training,0 14 | O=C1NC(=O)C(Cc2ccc3cc(OCc4ccccc4F)ccc3c2)S1,training,0 15 | Cc1c(NC(=O)NC(=O)c2ccc(Cl)cc2Cl)ccc(OCCCC(=O)O)c1C,training,0 16 | CN(C)C[C@@H](O)COc1ccc(Nc2nccc(Nc3cc(Cl)ccc3Cl)n2)cc1,training,0 17 | CN(CC[C@H](N)CC(=O)N[C@H]1C=C[C@H](N2C=C[C@@](N)(O)NC2=O)O[C@@H]1C(=O)O)C(=N)N,training,0 18 | CC(CC1c2ccccc2CCc2ccccc21)CN(C)C,training,0 19 | CCOC(=O)c1c(C)nn(-c2cccc([N+](=O)[O-])c2)c1C,training,0 20 | N[C@@H](C[SeH](=O)=O)C(=O)O,training,0 21 | OC[C@H]1O[C@@H](n2cnc3c2N=CNC[C@H]3O)C[C@@H]1O,training,0 22 | CCOC(=O)c1c(C)nn(-c2ccc(N)cc2)c1C,training,0 23 | O=c1ncccn1[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,training,0 24 | O=c1ccn([C@@H]2O[C@@H](COP(=O)(O)OP(=O)(O)O[C@@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@H]3F)[C@H](O)[C@H]2O)c(=O)[nH]1,training,0 25 | NCc1ccccc1CC(=O)N[C@@H]1C(=O)N2C(C(=O)O)=C(CSc3nnnn3CC(=O)O)CS[C@H]12,training,0 26 | CC(=O)[C@@H](O)[C@H](O)COP(=O)(O)O,training,0 27 | CC1=CC(=O)N2CC(=O)N(C)c3ccc(Cl)cc3C2(c2ccccc2)O1,training,0 28 | CC1(C)SCCN(S(=O)(=O)c2ccc(OCC#CCN)cc2)[C@H]1C(=O)NO,training,0 29 | Cc1ccc(C(=O)Nc2ccc(S(=O)(=O)O)c3cc(S(=O)(=O)O)cc(S(=O)(=O)O)c23)cc1NC(=O)c1cccc(NC(=O)Nc2cccc(C(=O)Nc3cc(C(=O)Nc4ccc(S(=O)(=O)O)c5cc(S(=O)(=O)O)cc(S(=O)(=O)O)c45)ccc3C)c2)c1,training,0 30 | O=C(Nc1ccc2[nH]ccc2c1)c1cc(F)cc(N2CCOCC2)c1,training,0 31 | CN(C(=O)N1CC(c2cc(F)ccc2F)=C[C@H]1c1ccccc1)C1CCNCC1,training,0 32 | Nc1ncnc2c1ncn2CCOCP1(=O)OCC[C@@H](c2cccc(Cl)c2)O1,training,0 33 | CCOC(=O)N1CCC(Nc2cc(C)ccn2)CC1,training,0 34 | N#C/C(=C1/SC[C@@H](c2ccc(Cl)cc2Cl)S1)n1ccnc1,training,0 35 | CC(C)(CO)[C@@H](O)C(=O)OP(=O)(O)OC[C@@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@H]1O,training,0 36 | O=C(O)c1ccccc1/N=N/c1ccc(O)cc1,training,0 37 | C[C@H](NC(=O)[C@@H]1CCCN1C(=O)[C@@H]1CCCN1C(=O)[C@@H](O)[C@H](N)Cc1ccccc1)C(N)=O,training,0 38 | COc1cc(O)c(C(=O)/C=C/c2ccc(O)cc2)cc1CC=C(C)C,training,0 39 | C[C@H](O)N[C@H]1[C@H](O)O[C@H](CO)[C@@H](O)[C@@H]1O[C@@H]1O[C@H](CO)[C@H](O)[C@H](O)[C@H]1O[C@@H]1O[C@@H](C)[C@@H](O)[C@@H](O)[C@@H]1O,training,0 40 | NCCC(=O)NCC[C@H](N)C(=O)O,training,0 41 | O=C1NC(=O)C(c2ccc(Oc3ccccc3)cc2)(N2CCN(c3ncccn3)CC2)C(=O)N1,training,0 42 | O=C(O)C[C@H]1CC[C@@H](C(=O)O)N1,training,0 43 | OC[C@@H]1[C@H](O)[C@@H](O)[C@H](O)c2nc(-c3ccccc3)cn21,training,0 44 | O=C([O-])c1ccc[nH]1,training,0 45 | OC[C@H](O)CNc1ncnc2[nH]c(-c3ccccc3)c(-c3ccccc3)c12,training,0 46 | Cc1ncc(-c2ccnc(Nc3ccc(N4CCN(C(=O)CO)CC4)cc3)n2)n1C(C)C,training,0 47 | Cc1ncc(COP(=O)(O)O)c(CN[C@H](CCC(=O)O)C(=O)O)c1O,training,0 48 | CCC(CC)(CC(=O)Nc1cccc(/C=C/c2nc(C3CCC3)cs2)c1)C(=O)O,training,0 49 | Cn1sc(=O)c2cc(S(N)(=O)=O)ccc21,training,0 50 | CO[C@@H]1[C@@H](OC(N)=O)[C@@H](O)[C@H](Oc2ccc3c(O)c(NC(=O)c4ccc(O)c(CC=C(C)C)c4)c(=O)oc3c2C)OC1(C)C,training,0 51 | Nc1ccc2cc3ccc(N)cc3nc2c1,training,0 52 | NCC(=O)Nc1ccc(OCc2ccccc2)cc1,training,0 53 | C[C@H]1CNC(=O)c2[nH]c3ccc(C(=O)Nc4nc(C(=O)NCCN(C)C)cs4)cc3c21,training,0 54 | Cc1ncc(CO)c(C=O)c1O,training,0 55 | COc1cc(CC2=C(N)N=C(N)[N+]=C2)cc(OC)c1OC,training,0 56 | N[C@@H](Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1)C(=O)O,training,0 57 | Cc1cccc([C@H](C)c2c[nH]cn2)c1C,training,0 58 | C[C@H]1CN=C(N)c2sccc2O1,training,0 59 | CC(C)N=c1cc2n(-c3ccc(Cl)cc3)c3ccccc3nc-2cc1Nc1ccc(Cl)cc1,training,0 60 | Brc1cnc2c(NCc3cncnc3)cc(-c3ccccc3)cn12,training,0 61 | CCOc1cccc(-c2ccc(NC(=O)/C(C#N)=C(/C)O)cc2)c1,training,0 62 | CC(C)(C)C(=O)Oc1ccc2c(c1)nc(NC(=O)c1cccc([N+](=O)[O-])c1)n2CCCO,training,0 63 | OC[C@@H]1O[C@@H](OCCCCCCC2CCCCC2)[C@H](O)[C@@H](O)[C@@H]1O[C@@H]1O[C@@H](CO)[C@H](O)[C@@H](O)[C@@H]1O,training,0 64 | CO[C@H]1C[C@H]2OC[C@@]2(OC(C)=O)[C@H]2[C@H](OC(=O)c3ccccc3)[C@]3(O)C[C@H](OC(=O)[C@H](O)[C@@H](NC(=O)OC(C)(C)C)c4ccccc4)C(C)=C([C@@H](OC)C(=O)[C@]12C)C3(C)C,training,0 65 | CCC(CC)NC(=O)C[C@@H](C(N)=O)C(C)(C)C,training,0 66 | O=C(O)Cn1c(=O)n(Cc2ccc(Br)cc2F)c(=O)c2ccc(Cl)cc21,training,0 67 | O=C1N[C@@H](Cc2ccc(O)cc2)C(=O)N[C@H]1Cc1ccc(O)cc1,training,0 68 | CC(C)CCC[C@@](C)(O)[C@H]1CC[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@@]21C,training,0 69 | CC(C)OP(=O)(O)OC[C@@H](N)C(=O)O,training,0 70 | CCCNC(=O)[C@H]1O[C@@H]1C(=O)N[C@H](C(=O)N1CCC[C@H]1C(=O)OC)[C@@H](C)CC,training,0 71 | N[C@@H](CC(=O)N1CCC[C@H]1CNC(=O)c1ccccc1)Cc1ccccc1F,training,0 72 | CC[C@@H](C)[C@H](N)C(=O)O,training,0 73 | O=c1cnn([C@@H]2O[C@@H](COP(=O)(O)O)[C@H](O)[C@H]2O)c(=O)[nH]1,training,0 74 | COc1cc2c(Nc3c(Cl)ccc4c3OCO4)ncnc2cc1OCCCN1CCCCC1,training,0 75 | CCCCCCCCCCC1=C(C)C(=O)C(OC)=C(OC)C1=O,training,0 76 | CCOC(O)=N[C@@H]1CC[C@@H]2[C@@H](C1)C[C@H]1C(=O)O[C@H](C)[C@H]1[C@H]2/C=C/c1ccc(-c2cccc(F)c2)cn1,training,0 77 | Nc1ccc(S(=O)(=O)c2ccc(N)cc2)cc1,training,0 78 | O=C(O)[C@@H](Cc1ccccc1)[C@H](Cc1ccc2c(c1)OCO2)C(=O)O,training,0 79 | O=C(Cc1ccc(O)cc1)Nc1ncc(-c2ccc(O)cc2)nc1Cc1ccccc1,training,0 80 | N[C@@H](CCSC(F)F)C(=O)O,training,0 81 | CCC(=O)N(c1ccccc1F)C1(c2ccccc2)CCN(CCn2nnn(CC)c2=O)CC1,training,0 82 | CN1CCC23NC(=O)CC(c4cc(Cl)ccc4O2)C3C1,training,0 83 | CC(C)NCC(O)COc1cccc2c1C=CC2,training,0 84 | NCCCCN,training,0 85 | Nc1nc2c(c(=O)[nH]1)N=C(COP(=O)(O)OP(=O)(O)OP(=O)(O)OP(=O)(O)OC[C@@H]1O[C@@H](n3cnc4c(N)ncnc43)[C@H](O)[C@H]1O)CN2,training,0 86 | CC(C)(C)c1nc(-c2cccc(NS(=O)(=O)c3c(F)cccc3F)c2F)c(-c2ccnc(N)n2)s1,training,0 87 | O=C(c1ccccc1)c1ccc(O)c(O)c1[N+](=O)[O-],training,0 88 | COc1ccc(-c2ccccc2)cc1N1CC(=O)NS1(=O)=O,training,0 89 | CN1CCN(c2ccccc2C/N=N\C(N)=S)CC1,training,0 90 | Oc1c(Br)cc(-c2nc3ccccc3o2)cc1Br,training,0 91 | Cc1ccnc2c1NC(=O)c1cccnc1N2C1CC1,training,0 92 | CS(=O)(=O)CC[C@@H](N)C(=O)O,training,0 93 | CC(C)CCCCCCCCC(=O)N(C)[C@H](CO)C(=O)N[C@H](C)C(=O)NCC(=O)N(C)[C@@H]1C(=O)N[C@H](C)C(=O)N[C@H](C(=O)O)Cc2ccc(O)c(c2)-c2cc1ccc2O,training,0 94 | CCC(N)=O,training,0 95 | CC(=O)CCC(=O)O,training,0 96 | CCOP(=O)(O)OP(=O)(O)O,training,0 97 | NC(=O)[C@@H](CS)NC(=O)CCCCCNC(=O)[C@@H](Cc1ccc(C(F)(F)P(=O)(O)O)cc1)NC(=O)[C@@H](CC(=O)O)NC(=O)Cc1ccc(C(F)(F)P(=O)(O)O)cc1,training,0 98 | O=C(N/N=C/c1cc(Br)c(O)c(Br)c1O)c1ccc(Cl)cc1,training,0 99 | COC(=O)[C@H]1[C@@H](OC(=O)c2ccccc2)C[C@@H]2CC[C@H]1N2C,training,0 100 | O=c1[nH]c(=O)n(COCCO)cc1Cc1ccccc1,training,0 101 | O=C(CCC(=O)Nc1ccc2c(c1)C(=O)C(=O)NC2=O)Nc1cccc(OCCF)c1,training,0 102 | C1=CC2C3=CC=CC4C5C=CC=CN5[Pt](N2C=C1)N34,training,0 103 | C[C@@H](NC(N)=O)C(=O)O,training,0 104 | Cc1ncc([N+](=O)[O-])n1CCO,training,0 105 | O=C1/C(=N\O)c2c(/C=C/c3ccccc3)cccc2N1Cc1cc(F)cc2c1OCOC2,training,0 106 | CC(C)=CCNc1ncnc2[nH]cnc12,training,0 107 | C[C@@H](N)[C@H]1N[C@@H](CN)C(=O)N1CC(=O)O,training,0 108 | O=S(=O)(O)c1ccc(O)c(/N=N/c2c(S(=O)(=O)O)cc3c(S(=O)(=O)O)c(Nc4ncnc(Nc5ccc6c(O)c(/N=N/c7cc(S(=O)(=O)O)ccc7O)c(S(=O)(=O)O)cc6c5S(=O)(=O)O)n4)ccc3c2O)c1,training,0 109 | CN(C)C[C@H](O)Cn1c2ccc(Br)cc2c2cc(Br)ccc21,training,0 110 | O=C(O)[C@H](S)[C@H](S)C(=O)O,training,0 111 | COc1ccccc1Oc1c(NS(=O)(=O)c2ccc(C(C)(C)C)cc2)nc(-c2ncccn2)nc1OCCO,training,0 112 | O=C(O)c1cn(-c2ccc(F)cc2)c2cc(N3CCNCC3)c(F)cc2c1=O,training,0 113 | CC(C)(C)c1onc([O-])c1C[C@@H]([N+])C(=O)[O-],training,0 114 | O=C1NC(=O)c2c1c(-c1ccccc1)cc1[nH]c3ccc(O)cc3c21,training,0 115 | O=c1ccn([C@@H]2O[C@@H](CO)[C@@H](OP(=O)(O)O)[C@H]2F)c(=O)[nH]1,training,0 116 | OC(c1ccccc1)(c1ccccc1)C12CC[N+](CCOCc3ccccc3)(CC1)CC2,training,0 117 | NC(=O)NO,training,0 118 | CCOC(=O)[C@H](CCc1ccccc1)N[C@H]1CS[C@H](c2cccs2)CN(CC(=O)O)C1=O,training,0 119 | OC[C@H]1O[C@@H](n2cnc3cc(Cl)c(Cl)cc32)[C@H](O)[C@@H]1O,training,0 120 | CNCCNS(=O)(=O)c1cccc2cnccc12,training,0 121 | CCNC(=O)[C@H]1O[C@@H](n2cnc3c(N)nc(C#CCC4CCC(C(=O)OC)CC4)nc32)[C@H](O)[C@@H]1O,training,0 122 | O=C([O-])[O-],training,0 123 | CC[C@H](C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@H]21,training,0 124 | CC(C)(C)NCC(O)COc1cccc2c1CCC(=O)N2,training,0 125 | CC(C)[C@H](NC(=O)Cc1c[nH]c2ccccc12)C(=O)O,training,0 126 | CC1(C)Cc2c(-c3ccccc3)c(-c3ccc(Cl)cc3)c(CC(=O)O)n2C1,training,0 127 | NCC1CCC(C(=O)O)CC1,training,0 128 | O=C(N[C@@H]1Cc2ccccc2NC1=O)c1cc2cc(Cl)sc2[nH]1,training,0 129 | Cc1cccc(C)c1OCC(=O)N[C@@H](Cc1ccccc1)[C@@H](O)C[C@H](Cc1ccccc1)NC(=O)[C@H](C(C)C)N1CCCNC1=O,training,0 130 | CO[C@@]1(NC(=O)Cc2cccs2)C(=O)N2C(C(=O)O)=C(COC(N)=O)CS[C@@H]21,training,0 131 | CN(C)CCOC1=Cc2ccccc2Sc2ccc(Cl)cc21,training,1 132 | CN(C)CCc1c[nH]c2ccc(C[C@H]3COC(=O)N3)cc12,training,1 133 | O=C1Cc2cc(CCN3CCN(c4nsc5ccccc45)CC3)c(Cl)cc2N1,training,1 134 | CC(=O)CC(c1ccccc1)c1c(O)c2ccccc2oc1=O,training,1 135 | CC[C@]1(O)C[C@H]2CN(CCc3c([nH]c4ccccc34)[C@@](C(=O)OC)(c3cc4c(cc3OC)N(C=O)[C@H]3[C@@](O)(C(=O)OC)[C@H](OC(C)=O)[C@]5(CC)C=CCN6CC[C@]43[C@@H]65)C2)C1,training,1 136 | CN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1,training,1 137 | Nc1nc(N)c2nc(-c3ccccc3)c(N)nc2n1,training,1 138 | CN(C)CCOc1ccc(/C(=C(/CCCl)c2ccccc2)c2ccccc2)cc1,training,1 139 | Clc1ccc2nsnc2c1NC1=NCCN1,training,1 140 | c1ccc2[nH]c(-c3cscn3)nc2c1,training,1 141 | CSc1ccc2c(c1)N(CCC1CCCCN1C)c1ccccc1S2,training,1 142 | O=c1[nH]c(=O)n(C2CCCO2)cc1F,training,1 143 | Nc1c2c(nc3ccccc13)CCCC2,training,1 144 | CN[C@H]1CC[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc21,training,1 145 | CCCN1CCCC[C@H]1C(=O)Nc1c(C)cccc1C,training,1 146 | CS(=O)(=O)c1ccc(C2=C(c3ccccc3)C(=O)OC2)cc1,training,1 147 | CC(C)c1nc(CN(C)C(=O)N[C@H](C(=O)N[C@@H](Cc2ccccc2)C[C@H](O)[C@H](Cc2ccccc2)NC(=O)OCc2cncs2)C(C)C)cs1,training,1 148 | Nc1nc2ccc(OC(F)(F)F)cc2s1,training,1 149 | CO[C@H]1/C=C/O[C@@]2(C)Oc3c(C)c(O)c4c(O)c(c(/C=N/N5CCN(C)CC5)c(O)c4c3C2=O)NC(=O)/C(C)=C\C=C\[C@H](C)[C@H](O)[C@@H](C)[C@@H](O)[C@@H](C)[C@H](OC(C)=O)[C@@H]1C,training,1 150 | CO[C@H]1/C=C/O[C@@]2(C)Oc3c(C)c(O)c4c(c3C2=O)C2=NC3(CCN(CC(C)C)CC3)NC2=C(NC(=O)/C(C)=C\C=C\[C@H](C)[C@H](O)[C@@H](C)[C@@H](O)[C@@H](C)[C@H](OC(C)=O)[C@@H]1C)C4=O,training,1 151 | CC1=C(/C=C/C(C)=C/C=C/C(C)=C/CO)C(C)(C)CCC1,training,1 152 | C#CCN[C@@H]1CCc2ccccc21,training,1 153 | CN/C(=C\[N+](=O)[O-])NCCSCc1ccc(CN(C)C)o1,training,1 154 | C=C[C@H]1CN2CC[C@H]1C[C@H]2[C@H](O)c1ccnc2ccc(OC)cc12,training,1 155 | NC(=O)c1cnccn1,training,1 156 | CCCNCC(O)COc1ccccc1C(=O)CCc1ccccc1,training,1 157 | CC(=O)[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,training,1 158 | COc1cc(NC(C)CCCN)c2ncccc2c1,training,1 159 | O=C(C1CCCCC1)N1CC(=O)N2CCc3ccccc3C2C1,training,1 160 | Cc1ccc(=O)n(-c2ccccc2)c1,training,1 161 | O=c1[nH]c2ccccc2n1C1CCN(CCCC(c2ccc(F)cc2)c2ccc(F)cc2)CC1,training,1 162 | Cc1cc(=O)n(-c2ccccc2)n1C,training,1 163 | COc1ccnc(CS(=O)c2nc3ccc(OC(F)F)cc3[nH]2)c1OC,training,1 164 | O=C1O[Pt]2([N+][C@@H]3CCCC[C@H]3[N+]2)OC1=O,training,1 165 | COc1ccc2nc(S(=O)Cc3ncc(C)c(OC)c3C)[nH]c2c1,training,1 166 | Cc1cc2c(s1)Nc1ccccc1N=C2N1CCN(C)CC1,training,1 167 | CNCCC=C1c2ccccc2CCc2ccccc21,training,1 168 | CN1CCC[C@H]1c1cccnc1,training,1 169 | CN1CCN2c3ccccc3Cc3ccccc3C2C1,training,1 170 | CC1=CC(=O)c2ccccc2C1=O,training,1 171 | COc1ccc2[nH]cc(CCNC(C)=O)c2c1,training,1 172 | CNCCCC12CCC(c3ccccc31)c1ccccc12,training,1 173 | CCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCNC(C)C3)c(F)c21,training,1 174 | CCCCN1CCCC[C@H]1C(=O)Nc1c(C)cccc1C,training,1 175 | CC(C)Cn1cnc2c(N)nc3ccccc3c21,training,1 176 | Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1,training,1 177 | COC1=C(OC)C(=O)C(CCCCCCCCCCO)=C(C)C1=O,training,1 178 | CN1C(=O)NC(=O)C(C)(C2=CCCCC2)C1=O,training,1 179 | COC1=CC(=O)C[C@@H](C)[C@]12Oc1c(Cl)c(OC)cc(OC)c1C2=O,training,1 180 | CN1[C@H]2CCC[C@@H]1CC(NC(=O)c1nn(C)c3ccccc13)C2,training,1 181 | CN[C@@H]1CCc2[nH]c3ccc(C(N)=O)cc3c2C1,training,1 182 | CNCCC(Oc1ccc(C(F)(F)F)cc1)c1ccccc1,training,1 183 | O=c1[nH]cc(F)c(=O)[nH]1,training,1 184 | Fc1ccc(C(c2ccc(F)cc2)N2CCN(C/C=C/c3ccccc3)CC2)cc1,training,1 185 | O=C(NCC1CCCCN1)c1cc(OCC(F)(F)F)ccc1OCC(F)(F)F,training,1 186 | COc1cc([C@@H]2c3cc4c(cc3[C@@H](O[C@@H]3O[C@@H]5CO[C@@H](C)O[C@H]5[C@H](O)[C@H]3O)[C@H]3COC(=O)[C@H]23)OCO4)cc(OC)c1O,training,1 187 | CCO,training,1 188 | C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CC[C@@H]2O,training,1 189 | CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(O)C[C@@H](C)C(=O)[C@H](C)[C@@H](O)[C@]1(C)O,training,1 190 | CNCC[C@H](Oc1cccc2ccccc12)c1cccs1,training,1 191 | CN(C)CCC=C1c2ccccc2COc2ccccc21,training,1 192 | CCN(CC)C(=S)SSC(=S)N(CC)CC,training,1 193 | CCCCC[C@H](O)/C=C/[C@H]1[C@H](O)CC(=O)[C@@H]1C/C=C\CCCC(=O)O,training,1 194 | CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,training,1 195 | COc1cccc2c1C(=O)c1c(O)c3c(c(O)c1C2=O)C[C@@](O)(C(C)=O)C[C@@H]3O[C@H]1C[C@H](N)[C@H](O)[C@H](C)O1,training,1 196 | CN(C)N=Nc1[nH]cnc1C(N)=O,training,1 197 | CN(C)CCC=C1c2ccccc2C=Cc2ccccc21,training,1 198 | COC(=O)[C@H](c1ccccc1Cl)N1CCc2sccc2C1,training,1 199 | COc1cc(N)c(Cl)cc1C(=O)N[C@@H]1CCN(CCCOc2ccc(F)cc2)C[C@@H]1OC,training,1 200 | COc1ccccc1OCCNCC(O)COc1cccc2[nH]c3ccccc3c12,training,1 201 | O=NN(CCCl)C(=O)NCCCl,training,1 202 | NC(=O)N1c2ccccc2C=Cc2ccccc21,training,1 203 | CCCCN1CCCCC1C(=O)Nc1c(C)cccc1C,training,1 204 | O=C1CN=C(c2ccccn2)c2cc(Br)ccc2N1,training,1 205 | CN(C)CCCOc1nn(Cc2ccccc2)c2ccccc12,training,1 206 | CN1CCCC(n2nc(Cc3ccc(Cl)cc3)c3ccccc3c2=O)CC1,training,1 207 | C[C@@H](O[C@H]1OCCN(Cc2n[nH]c(=O)[nH]2)[C@H]1c1ccc(F)cc1)c1cc(C(F)(F)F)cc(C(F)(F)F)c1,training,1 208 | O=C1CN2Cc3c(ccc(Cl)c3Cl)N=C2N1,training,1 209 | CN(C)CCC=C1c2ccccc2CCc2ccccc21,training,1 210 | Cc1c(N(C)C)c(=O)n(-c2ccccc2)n1C,training,1 211 | Cc1[nH]cnc1CN1CCc2c(c3ccccc3n2C)C1=O,training,1 212 | CCCSc1ccc2nc(NC(=O)OC)[nH]c2c1,training,1 213 | C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1,training,1 214 | Clc1cccc(Cl)c1NC1=NCCN1,training,1 215 | CC/C(=C(\c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1,training,1 216 | N=C(N)c1ccc(OCCCCCOc2ccc(C(=N)N)cc2)cc1,training,1 217 | CC(c1cc2ccccc2s1)N(O)C(N)=O,training,1 218 | CN(C)CCc1c[nH]c2ccc(CS(=O)(=O)N3CCCC3)cc12,training,1 219 | CCC(=O)NCC[C@@H]1CCc2ccc3c(c21)CCO3,training,1 220 | C[C@@H](NCCCc1cccc(C(F)(F)F)c1)c1cccc2ccccc12,training,1 221 | O=C1CCC(N2C(=O)c3ccccc3C2=O)C(=O)N1,training,1 222 | COc1ccc([C@@H]2CC(=O)c3c(O)cc(O)cc3O2)cc1O,training,1 223 | Cc1oncc1C(=O)Nc1ccc(C(F)(F)F)cc1,training,1 224 | COC(=O)C1=C(C)NC(C)=C(C(=O)OC)C1c1ccccc1[N+](=O)[O-],training,1 225 | O=C1CCc2cc(OCCCCc3nnnn3C3CCCCC3)ccc2N1,training,1 226 | Cc1nc(Nc2ncc(C(=O)Nc3c(C)cccc3Cl)s2)cc(N2CCN(CCO)CC2)n1,training,1 227 | Cn1c(=O)c2[n-]cnc2n(C)c1=O,training,1 228 | CN(C)S(=O)(=O)c1ccc2c(c1)N(CCCN1CCC(CCO)CC1)c1ccccc1S2,training,1 229 | Cc1ccc(-c2ncc(Cl)cc2-c2ccc(S(C)(=O)=O)cc2)cn1,training,1 230 | C[C@H]1CNCCc2ccc(Cl)cc21,training,1 231 | CCCC(=O)OCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)C1c1cccc(Cl)c1Cl,training,1 232 | CCCN(CCc1cccs1)[C@H]1CCc2c(O)cccc2C1,training,1 233 | CC1=NN(c2ccc(C)c(C)c2)C(=O)/C1=N\Nc1cccc(-c2cccc(C(=O)O)c2)c1O,training,1 234 | CN1CC2c3ccccc3Oc3ccc(Cl)cc3C2C1,training,1 235 | Cc1ccc(Nc2nccc(N(C)c3ccc4c(C)n(C)nc4c3)n2)cc1S(N)(=O)=O,training,1 236 | Cn1c(CCCC(=O)O)nc2cc(N(CCCl)CCCl)ccc21,training,1 237 | CC(=O)[C@@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CCC4=C3[C@@H](c3ccc(N(C)C)cc3)C[C@@]21C,training,1 238 | CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21,training,0 239 | CC(C)CN(C[C@@H](O)[C@H](Cc1ccccc1)NC(=O)O[C@H]1CCOC1)S(=O)(=O)c1ccc(N)cc1,training,0 240 | C=C(C)[C@@H]1CC[C@@]2(C)O[C@H]2C1,training,0 241 | CC(C)(CO)[C@@H](O)C(=O)NCCC(=O)N/C=C/S,training,0 242 | CCCOc1ccc(S(=O)(=O)NCCC2CCCN2C)cc1-c1nc(=O)c2c([nH]1)c(CCC)nn2C,training,0 243 | C=C[C@H](N)C(=O)O,training,0 244 | CC(=O)OC[C@@H]1CS[C@H]2[C@H](NC(=O)CCCC[C@@H](NC(=O)C[N+])C(=O)[O-])C(=O)N2[C@H]1C(=O)[O-],training,0 245 | CCCCCCCCCCS,training,0 246 | CC(C)N1CCC(NC(=O)c2cc3ccccc3n2CC(=O)Nc2ccc(Cl)cc2)CC1,training,0 247 | N[C@@H](CP(=O)(O)O)C(=O)O,training,0 248 | COc1c(C(=O)NC2CCN3CCCC2C3)ccc(N)c1Cl,training,0 249 | Cc1cnc(NCC(F)(F)c2ccccn2)c(=O)n1CC(=O)NCc1ncccc1F,training,0 250 | NS(=O)(=O)c1cc(C(=O)O)cc(N2CCCC2)c1Oc1ccccc1,training,0 251 | COCCNS(=O)(=O)c1ccc(Nc2nccc(-c3cnc(C)n3C(C)C)n2)cc1,training,0 252 | COc1cc(C2c3cc4c(cc3C(O)C(CO)C2C(=O)O)OCO4)cc(OC)c1OC,training,0 253 | O=c1[nH]cnc2sc3c(c12)CCCC3,training,0 254 | Cc1cc(O)c(C(=O)N[C@@H](C(=O)N[C@@H]2C(=O)N3C(C(=O)O)=C(CSc4nnnn4C)CS[C@H]23)c2ccc(O)cc2)cn1,training,0 255 | CC[C@H]1OC(=O)[C@H](C)C(=O)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(OC)C[C@@H](C)C(=O)[C@H](C)[C@H]2N(CCCCn3cnc(-c4cccnc4)c3)C(=O)O[C@]12C,training,1 256 | CCOC(=O)CC(SP(=S)(OC)OC)C(=O)OCC,training,1 257 | O=c1[nH]c2ccccc2n1CCCN1CCC(n2c(=O)[nH]c3cc(Cl)ccc32)CC1,training,1 258 | CN1CCN(C2=Nc3cc(Cl)ccc3Nc3ccccc32)CC1,training,1 259 | C(=Cc1ccccc1)CN1CCN(C(c2ccccc2)c2ccccc2)CC1,training,1 260 | O=c1[nH]c2cc(Cl)ccc2o1,training,1 261 | CC(C)C[C@H](NC(=O)[C@H](Cc1ccccc1)NC(=O)c1cnccn1)B(O)O,training,1 262 | CC(C)NCC(O)COc1ccc(CCOCC2CC2)cc1,training,1 263 | COc1cc(C(C)=O)ccc1OCCCN1CCC(c2noc3cc(F)ccc23)CC1,training,1 264 | CCOc1ccc(Cc2cc([C@@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@H]3O)ccc2Cl)cc1,training,1 265 | COc1ccc(-n2nc(C(N)=O)c3c2C(=O)N(c2ccc(N4CCCCC4=O)cc2)CC3)cc1,training,1 266 | CN=C(O)c1ccccc1Sc1ccc2c(/C=C/c3ccccn3)[nH]nc2c1,training,1 267 | Nc1cccc2c1C(=O)N(C1CCC(=O)NC1=O)C2=O,training,1 268 | c1cc(Nc2cc(C3CC3)n[nH]2)nc(Nc2ccc3[nH]cnc3c2)n1,training,0 269 | C=C(O)C1=C(C)C2=Cc3c(C)c(CCC(=O)O)c4n3[Fe@]35n6c(c(C)c(C(C)=O)c6=CC6=[N+]3C(=C4)C(CCC(=O)O)=C6C)=CC1=[N+]25,training,0 270 | Cn1c(=O)ccc2ccccc21,training,0 271 | CN[C@H]1[C@@H](O)[C@@H](NC)[C@H](O)[C@H]2O[C@@H]3O[C@H](C)CC(=O)[C@]3(O)O[C@H]12,training,0 272 | O=c1[nH]c(=O)n(COCCO)c(O)c1Cc1cccc(OCc2ccccc2)c1,training,0 273 | CC#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CCC4=C3[C@@H](c3ccc(N(C)C)cc3)C[C@@]21C,training,0 274 | [N+]CCCC[C@H](N)C(N)=O,training,0 275 | CC(=O)N/C(=C\c1ccc(CC(=O)O)c(C=O)c1)C(=O)N[C@@H]1CCCCN(Cc2ccc(-c3ccccc3)cc2)C1=O,training,0 276 | C=CC1=C(C)C2=Cc3c(C=C)c(C)c4n3[Fe]35n6c(c(C)c(CCC(=O)O)c6=C(c6ccccc6)C6=[N+]3C(=C4)C(C)=C6CCC(=O)O)=CC1=[N+]25,training,0 277 | Cc1cc(C(=O)N[C@@H](C)C(=O)N[C@H](C(=O)N[C@@H](CC(C)C)C(=O)N[C@H](/C=C/C(=O)OCc2ccccc2)C[C@@H]2CCNC2=O)C(C)C)no1,training,0 278 | CC/C=C/C[C@@H]1C(=O)C=C[C@H]1CCCCCCCC(=O)O,training,0 279 | CC(C(O)c1ccc(O)cc1)N1CCC(Cc2ccccc2)CC1,training,0 280 | CC1=C(C(=O)O)N[C@H]([C@H](NC(=O)Cc2cccs2)C(=O)O)SC1,training,0 281 | CNc1nc[nH]c2c([C@@H]3O[C@@H](CO)[C@H](O)[C@H]3O)nnc1-2,training,0 282 | O=C(OCCN1CCOCC1)c1cccnc1Nc1cccc(C(F)(F)F)c1,training,0 283 | O[C@@H]1Cc2c(ccc3ccc4ccccc4c23)[C@@H](O)[C@@H]1O,training,0 284 | COc1ccnc2[nH]cc(-c3ccnc(N)n3)c12,training,0 285 | CN(C)C[C@H](O)COc1ccc(Nc2cc(Nc3c(F)cccc3F)ncn2)cc1,training,0 286 | Cc1ccc(-c2nc3ccc(C)cn3c2CC(=O)N(C)C)cc1,training,1 287 | COc1ccc(CCN(C)CCCC(C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC,training,1 288 | CN1CCN(CC/C=C2/c3ccccc3Sc3ccc(S(=O)(=O)N(C)C)cc32)CC1,training,1 289 | CC1CN(c2cc3c(cc2F)c(=O)c(C(=O)O)cn3-c2ccc(F)cc2F)CCN1,training,1 290 | CCCN(CCC)CCc1cccc2c1CC(=O)N2,training,1 291 | OCCN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1,training,1 292 | Cc1nccn1CC1CCc2c(c3ccccc3n2C)C1=O,training,1 293 | CN1CCN2c3ncccc3Cc3ccccc3C2C1,training,1 294 | COC(F)(F)C(Cl)Cl,training,1 295 | O=C(CCCN1CCC(O)(c2ccc(Cl)cc2)CC1)c1ccc(F)cc1,training,1 296 | CN1C(=O)CN=C(c2ccccc2F)c2cc([N+](=O)[O-])ccc21,training,1 297 | CN1C[C@H](C(=O)N[C@]2(C)O[C@@]3(O)[C@@H]4CCCN4C(=O)[C@H](Cc4ccccc4)N3C2=O)C=C2c3cccc4[nH]cc(c34)C[C@H]21,training,1 298 | CCCCc1oc2ccccc2c1C(=O)c1cc(I)c(OCCN(CC)CC)c(I)c1,training,1 299 | O=C1c2cccc3c2[C@H](CCC3)CN1[C@@H]1CN2CCC1CC2,training,1 300 | CN(C)CCc1c[nH]c2ccc(Cn3cncn3)cc12,training,1 301 | CC(=O)CC(c1ccc([N+](=O)[O-])cc1)c1c(O)c2ccccc2oc1=O,training,1 302 | CNCc1ccc(-c2[nH]c3cc(F)cc4c3c2CCNC4=O)cc1,training,1 303 | O=C(NO)c1ccccc1O,test,0 304 | O=C(O)C(=O)/C=C/c1ccccc1O,test,0 305 | O=C(O)[C@@H](CS)CCCc1ccccc1,test,0 306 | CC(C)Cc1ccc(C(C)C(O)=NO)cc1,test,0 307 | C[C@](N)(C(=O)O)c1ccc(C(=O)O)cc1,test,0 308 | CC(C)c1cccc(C(C)C)c1O,test,1 309 | CC(C)/N=C(\N)N=C(N)Nc1ccc(Cl)cc1,test,1 310 | CNNCc1ccc(C(=O)NC(C)C)cc1,test,1 311 | CCOc1ccc(NC(C)=O)cc1,test,1 312 | COCCCCC(=NOCCN)c1ccc(C(F)(F)F)cc1,test,1 313 | CCN[C@@H](C)Cc1cccc(C(F)(F)F)c1,test,1 314 | CC(C)(C)NCC(O)c1cc(Cl)c(N)c(Cl)c1,test,1 315 | COc1cc(CNC(=O)CCCC/C=C/C(C)C)ccc1O,test,1 316 | CC(NC(C)(C)C)C(=O)c1cccc(Cl)c1,test,1 317 | OCc1ccccc1,test,1 318 | CC(C)OC(=O)c1cc(NC(=S)OC(C)C)ccc1Cl,test,0 319 | CO[C@H]1[C@@H](O)O[C@@H](C)[C@H](O)[C@H]1O,test,0 320 | Oc1ccc2ccccc2c1O,test,0 321 | CC(C)NCC(O)COc1cccc2ccccc12,test,1 322 | Cn1c(=O)c2[nH]cnc2n(C)c1=O,test,1 323 | Cn1cnc2c1c(=O)[nH]c(=O)n2C,test,1 324 | CC(=O)CCCCn1c(=O)c2c(ncn2C)n(C)c1=O,test,1 325 | CC[C@H](OC(C)=O)C(C[C@@H](C)N(C)C)(c1ccccc1)c1ccccc1,test,0 326 | CCN(CC)CCOC(=O)C(O)(c1ccccc1)c1ccccc1,test,0 327 | CC[C@H](OC(C)=O)C(C[C@H](C)N(C)C)(c1ccccc1)c1ccccc1,test,0 328 | CCC(=O)C(CC(C)N(C)C)(c1ccccc1)c1ccccc1,test,1 329 | O=C(OCCOCCO)c1ccccc1Nc1cccc(C(F)(F)F)c1,test,0 330 | Cc1ccc(Nc2c(F)cccc2Cl)c(CC(=O)O)c1,test,1 331 | CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21,test,1 332 | CN(C)CCCN1c2ccccc2Sc2ccccc21,test,1 333 | CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21,test,1 334 | C[C@]12CC[C@@H]3c4ccc(OS(=O)(=O)[O-])cc4CC[C@H]3[C@@H]1CCC2=O,test,1 335 | O=C(O)C(=O)Cc1ccc(O)cc1,val,0 336 | N[C@@H](Cc1ccccc1)C(=O)O,val,0 337 | N[C@@H](CCC(=O)N[C@H](CSc1ccc([N+](=O)[O-])cc1[N+](=O)[O-])C(=O)NCC(=O)O)C(=O)O,val,0 338 | N[C@@H](CCC(=O)N[C@H](CS[C@H](O)N(O)c1ccc(Br)cc1)C(=O)NCC(=O)O)C(=O)O,val,0 339 | COC[C@@H](NC(C)=O)C(=O)NCc1ccccc1,val,0 340 | O=C(CCCl)NCc1ccccc1,val,0 341 | CC(Cc1ccccc1)NN,val,0 342 | C=CC(=O)c1ccc(CCCCCC)cc1,val,0 343 | CC(=O)Nc1ccc(O)cc1,val,1 344 | CCN(CC)CC(=O)Nc1c(C)cccc1C,val,1 345 | CC(C)C(=O)Nc1ccc([N+](=O)[O-])c(C(F)(F)F)c1,val,1 346 | NCCc1ccc(O)c(O)c1,val,1 347 | NC(N)=NN=Cc1c(Cl)cccc1Cl,val,1 348 | CC(C)C[C@H](N=C(O)CN=C(O)c1cc(Cl)ccc1Cl)B(O)O,val,1 349 | NS(=O)(=O)c1ccc(CCC(=O)O)cc1,val,0 350 | C#CCN(C)C(C)Cc1ccccc1,val,1 351 | Cc1cccc(C)c1OCC(C)N,val,1 352 | C1CCOCC1,val,0 353 | CC(=O)N[C@H]1[C@H](O)C[C@H](P(=O)(O)O)O[C@H]1[C@H](O)[C@@H](O)CO,val,0 354 | O=C(O)[C@@H]1C[C@H](O)[C@@H](O)[C@H](O)O1,val,0 355 | CC(=O)N[C@@H](Cc1cccc2ccccc12)[B-](O)(O)O,val,0 356 | CN(C/C=C/C#CC(C)(C)C)Cc1cccc2ccccc12,val,1 357 | COc1ccc2cc(CCC(C)=O)ccc2c1,val,1 358 | COc1ccc2cc([C@H](C)C(=O)O)ccc2c1,val,1 359 | Cn1c(=O)c2c(ncn2C)n(C)c1=O,val,1 360 | CN(C)CCC(O)(c1ccccc1)c1ccccc1Cl,val,0 361 | CN(C)CCOC(c1ccccc1)c1ccccc1,val,1 362 | O=C(O)Cc1ccccc1Nc1c(Cl)cccc1Cl,val,1 363 | CC(CN(C)C)CN1c2ccccc2Sc2ccc(C#N)cc21,val,1 364 | CN(C)CCCN1c2ccccc2CCc2ccccc21,val,1 365 | CNCCCN1c2ccccc2CCc2ccccc21,val,1 366 | C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CCC2=O,val,1 367 | C[C@]12CC[C@@H]3c4ccc(OS(=O)(=O)O)cc4CC[C@H]3[C@@H]1CCC2=O,val,1 368 | -------------------------------------------------------------------------------- /data/ADMETlab_data/DILI_canonical.csv: -------------------------------------------------------------------------------- 1 | smiles,group,DILI 2 | CC(=O)OCC[N+](C)(C)C,training,0 3 | C[N+](C)(C)CC(=O)[O-],training,0 4 | NCCCCCC(=O)O,training,0 5 | CS(C)=O,training,0 6 | CC(C)(CO)C(O)C(=O)NCCC(=O)O,training,0 7 | NC(=O)c1cnccn1,training,1 8 | Cc1ncc(C[n+]2csc(CCO)c2C)c(N)n1,training,0 9 | O=C1NC(=O)C(c2ccccc2)(c2ccccc2)N1,training,1 10 | Nc1c2c(nc3ccccc13)CCCC2,training,0 11 | CCCSc1ccc2nc(NC(=O)OC)[nH]c2c1,training,1 12 | NCCCC(O)(P(=O)(O)O)P(=O)(O)O,training,0 13 | NC12CC3CC(CC(C3)C1)C2,training,0 14 | NCCCNCCSP(=O)(O)O,training,0 15 | CCC1(c2ccc(N)cc2)CCC(=O)NC1=O,training,0 16 | CN(C)CCC=C1c2ccccc2CCc2ccccc21,training,0 17 | CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O,training,1 18 | COc1cc(NS(C)(=O)=O)ccc1Nc1c2ccccc2nc2ccccc12,training,1 19 | COc1ccc(CCN2CCC(Nc3nc4ccccc4n3Cc3ccc(F)cc3)CC2)cc1,training,0 20 | O=C(O)COc1nn(Cc2ccccc2)c2ccccc12,training,1 21 | CCc1oc2ccccc2c1C(=O)c1cc(Br)c(O)c(Br)c1,training,1 22 | CC(Cc1ccccc1)N(C)Cc1ccccc1,training,0 23 | CC(C[N+](C)(C)C)OC(N)=O,training,0 24 | OC(CCN1CCCCC1)(c1ccccc1)C1CC2C=CC1C2,training,0 25 | Oc1c(Cl)cc(Cl)cc1Sc1cc(Cl)cc(Cl)c1O,training,1 26 | CCCCN1CCCCC1C(=O)Nc1c(C)cccc1C,training,0 27 | O=C1CC2(CCCC2)CC(=O)N1CCCCN1CCN(c2ncccn2)CC1,training,0 28 | CS(=O)(=O)OCCCCOS(C)(=O)=O,training,1 29 | COc1ccc(CCN(C)CCCC(C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC,training,1 30 | NC(=O)N1c2ccccc2C=Cc2ccccc21,training,1 31 | CCN(CC)CCOCCOC(=O)C1(c2ccccc2)CCCC1,training,0 32 | CCCC(C)(COC(N)=O)COC(=O)NC(C)C,training,0 33 | CC1=C(C(=O)O)N2C(=O)C(NC(=O)C(N)c3ccc(O)cc3)C2SC1,training,1 34 | Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1,training,1 35 | OC(O)C(Cl)(Cl)Cl,training,0 36 | O=c1[nH]c2cc(Cl)ccc2o1,training,1 37 | Cc1cc(C2CCCCC2)n(O)c(=O)c1,training,0 38 | O=C1CCc2cc(OCCCCc3nnnn3C3CCCCC3)ccc2N1,training,0 39 | O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc2c1=O,training,1 40 | Clc1cccc(Cl)c1NC1=NCCN1,training,1 41 | CN1CCN(C2=c3ccccc3=Nc3ccc(Cl)cc3N2)CC1,training,1 42 | CN1CCC(=C2c3ccccc3C=Cc3ccccc32)CC1,training,1 43 | O=C1CN(N=Cc2ccc(-c3ccc([N+](=O)[O-])cc3)o2)C(=O)N1,training,1 44 | Nc1ccc(S(=O)(=O)c2ccc(N)cc2)cc1,training,1 45 | CC(=O)N(O)CCCCCNC(=O)CCC(=O)N(O)CCCCCNC(=O)CCC(=O)N(O)CCCCCN,training,0 46 | CNCCCN1c2ccccc2CCc2ccccc21,training,0 47 | CCN(CC)CCOC(=O)C1(C2CCCCC2)CCCCC1,training,0 48 | O=C(O)c1cc(-c2ccc(F)cc2F)ccc1O,training,1 49 | OCC(S)CS,training,1 50 | OCCN(CCO)c1nc(N2CCCCC2)c2nc(N(CCO)CCO)nc(N3CCCCC3)c2n1,training,1 51 | CCN(CC)C(=S)SSC(=S)N(CC)CC,training,1 52 | O=C(CCCN1CC=C(n2c(=O)[nH]c3ccccc32)CC1)c1ccc(F)cc1,training,0 53 | CC(O)(P(=O)(O)O)P(=O)(O)O,training,0 54 | CCc1cccc2c3c([nH]c12)C(CC)(CC(=O)O)OCC3,training,1 55 | CCC(=O)N(c1ccccc1)C1CCN(CCc2ccccc2)CC1,training,0 56 | CC(C)(C(=O)O)c1ccc(C(O)CCCN2CCC(C(O)(c3ccccc3)c3ccccc3)CC2)cc1,training,0 57 | O=C(COc1ccc(Cl)cc1)N1CCN(Cc2ccc3c(c2)OCO3)CC1,training,1 58 | Nc1[nH]c(=O)ncc1F,training,1 59 | O=c1[nH]cc(F)c(=O)[nH]1,training,1 60 | CNCCC(Oc1ccc(C(F)(F)F)cc1)c1ccccc1,training,0 61 | O=C1OCCN1N=Cc1ccc([N+](=O)[O-])o1,training,1 62 | NS(=O)(=O)c1cc(C(=O)O)c(NCc2ccco2)cc1Cl,training,1 63 | CNC(C)C1CCC(N)C(OC2C(N)CC(N)C(OC3OCC(C)(O)C(NC)C3O)C2O)O1,training,0 64 | O=C(OCC(O)CO)c1ccccc1Nc1ccnc2cc(Cl)ccc12,training,1 65 | NC(N)=NCCN1CCCCCCC1,training,0 66 | NS(=O)(=O)c1cc2c(cc1Cl)NCNS2(=O)=O,training,1 67 | OCCOCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1,training,0 68 | COc1ccc2c(c1)c(CC(=O)O)c(C)n2C(=O)c1ccc(Cl)cc1,training,1 69 | CC(=O)N(CC(O)CN(C(C)=O)c1c(I)c(C(=O)NCC(O)CO)c(I)c(C(=O)NCC(O)CO)c1I)c1c(I)c(C(=O)NCC(O)CO)c(I)c(C(=O)NCC(O)CO)c1I,training,0 70 | Cc1cc(C(=O)NNCc2ccccc2)no1,training,1 71 | CC(COc1ccccc1)NC(C)C(O)c1ccc(O)cc1,training,0 72 | CNC1(c2ccccc2Cl)CCCCC1=O,training,0 73 | O=C(c1ccccc1)c1ccc2n1CCC2C(=O)O,training,1 74 | Cc1oncc1C(=O)Nc1ccc(C(F)(F)F)cc1,training,1 75 | N#Cc1ccc(C(c2ccc(C#N)cc2)n2cncn2)cc1,training,1 76 | CCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCNC(C)C3)c(F)c21,training,1 77 | CCOC(=O)N1CCC(=C2c3ccc(Cl)cc3CCc3cccnc32)CC1,training,0 78 | CCCCc1nc(Cl)c(CO)n1Cc1ccc(-c2ccccc2-c2nn[nH]n2)cc1,training,1 79 | CNCCCC12CCC(c3ccccc31)c1ccccc12,training,0 80 | COC(=O)Nc1nc2ccc(C(=O)c3ccccc3)cc2[nH]1,training,1 81 | Cc1cccc(CN2CCN(C(c3ccccc3)c3ccc(Cl)cc3)CC2)c1,training,0 82 | OC(c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)C1CCCCN1,training,0 83 | CN1CCCCC1CCN1c2ccccc2Sc2ccc(S(C)=O)cc21,training,1 84 | Cc1ncc([N+](=O)[O-])n1CCO,training,1 85 | N=c1nc(N2CCCCC2)cc(N)n1O,training,0 86 | O=C1c2c(O)ccc(O)c2C(=O)c2c(NCCNCCO)ccc(NCCNCCO)c21,training,1 87 | NCC1OC(OC2C(CO)OC(OC3C(O)C(N)CC(N)C3OC3OC(CN)C(O)C(O)C3N)C2O)C(N)C(O)C1O,training,0 88 | Cc1ccnc2c1NC(=O)c1cccnc1N2C1CC1,training,1 89 | O=C(CCNNC(=O)c1ccncc1)NCc1ccccc1,training,1 90 | COC(=O)C1=C(C)NC(C)=C(C(=O)OCCN(C)Cc2ccccc2)C1c1cccc([N+](=O)[O-])c1,training,1 91 | COC(=O)C1=C(C)NC(C)=C(C(=O)OC)C1c1ccccc1[N+](=O)[O-],training,1 92 | COCCOC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)C)C1c1cccc([N+](=O)[O-])c1,training,1 93 | CN1Cc2c(N)cccc2C(c2ccccc2)C1,training,1 94 | CCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCNCC3)cc21,training,1 95 | O=C(O)CCc1nc(-c2ccccc2)c(-c2ccccc2)o1,training,1 96 | CCCOc1ccc2nc(NC(=O)OC)[nH]c2c1,training,1 97 | CCN(CC)CC#CCOC(=O)C(O)(c1ccccc1)C1CCCCC1,training,0 98 | NCCC(O)(P(=O)(O)O)P(=O)(O)O,training,0 99 | CC(C)(CO)C(O)C(=O)NCCCO,training,0 100 | COc1ccc(Cc2nccc3cc(OC)c(OC)cc23)cc1OC,training,1 101 | NC1=NC(=O)C(c2ccccc2)O1,training,1 102 | C1CCC(C(CC2CCCCN2)C2CCCCC2)CC1,training,1 103 | CC(COc1ccccc1)N(CCCl)Cc1ccccc1,training,0 104 | CCCCC1C(=O)N(c2ccccc2)N(c2ccccc2)C1=O,training,1 105 | CC(C)NCC(O)COc1cccc2[nH]ccc12,training,0 106 | O=C(C1CCCCC1)N1CC(=O)N2CCc3ccccc3C2C1,training,0 107 | CCC1(c2ccccc2)C(=O)NCNC1=O,training,0 108 | CC(C)(Sc1cc(C(C)(C)C)c(O)c(C(C)(C)C)c1)Sc1cc(C(C)(C)C)c(O)c(C(C)(C)C)c1,training,0 109 | CN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1,training,1 110 | OC(CCN1CCCC1)(c1ccccc1)C1CCCCC1,training,0 111 | CCCNCC(O)COc1ccccc1C(=O)CCc1ccccc1,training,0 112 | CNCCCC1c2ccccc2C=Cc2ccccc21,training,0 113 | CN(C)C(=O)Oc1ccc[n+](C)c1,training,0 114 | CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1,training,1 115 | Cc1nc2n(c(=O)c1CCN1CCC(c3noc4cc(F)ccc34)CC1)CCCC2,training,1 116 | CN1C2CC(OC(=O)C(CO)c3ccccc3)CC1C1OC12,training,0 117 | C[N+](C)(C)CCOC(=O)CCC(=O)OCC[N+](C)(C)C,training,0 118 | Cc1nnc(NS(=O)(=O)c2ccc(N)cc2)s1,training,1 119 | Nc1ccc(S(=O)(=O)Nc2nccs2)cc1,training,1 120 | CC1=C(CC(=O)O)c2cc(F)ccc2C1=Cc1ccc(S(C)=O)cc1,training,1 121 | Cn1nnc2c(C(N)=O)ncn2c1=O,training,0 122 | CC(C)(C)c1ccc(C(O)CCCN2CCC(C(O)(c3ccccc3)c3ccccc3)CC2)cc1,training,0 123 | Clc1ccccc1CN1CCc2sccc2C1,training,1 124 | Cc1ccc(S(=O)(=O)NC(=O)NN2CCCCCC2)cc1,training,1 125 | Cc1ccc(C(=O)c2ccc(CC(=O)O)n2C)cc1,training,1 126 | O=c1n(CCCN2CCN(c3cccc(Cl)c3)CC2)nc2ccccn12,training,1 127 | OC(CCN1CCCCC1)(c1ccccc1)C1CCCCC1,training,0 128 | COc1cc(Cc2cnc(N)nc2N)cc(OC)c1OC,training,1 129 | Cc1c(C)c2c(c(C)c1O)CCC(C)(COc1ccc(CC3SC(=O)NC3=O)cc1)O2,training,1 130 | COc1cc(C(=O)NS(=O)(=O)c2ccccc2C)ccc1Cc1cn(C)c2ccc(NC(=O)OC3CCCC3)cc12,training,1 131 | NS(=O)(=O)Cc1noc2ccccc12,training,1 132 | COC12C(COC(N)=O)C3=C(C(=O)C(C)=C(N)C3=O)N1CC1NC12,training,0 133 | CC[N+](C)(CC)CCOC(=O)C(O)(c1ccccc1)C1CCCCC1,training,0 134 | CC12CCC3c4ccc(O)cc4CCC3C1CCC2O,training,0 135 | O=c1[nH]c(=O)n(C2CC(O)C(CO)O2)cc1F,training,1 136 | CC12COC(=O)CC1CCC1C2CCC2(C)C1CCC2(C)O,training,1 137 | CC(=O)Nc1ccc2c(c1)Cc1ccccc1-2,training,1 138 | O=c1[nH]c(=O)n(C2CC(O)C(CO)O2)cc1I,training,1 139 | CCC1C(=O)OCC1Cc1cncn1C,training,0 140 | CNC(=O)Oc1ccc2c(c1)C1(C)CCN(C)C1N2C,training,0 141 | CC(=O)OCC1=C(C(=O)O)N2C(=O)C(NC(=O)Cc3cccs3)C2SC1,training,1 142 | NCC1OC(OC2C(N)CC(N)C(OC3OC(CO)C(O)C(N)C3O)C2O)C(O)C(O)C1O,training,0 143 | Cc1onc(-c2ccccc2Cl)c1C(=O)NC1C(=O)N2C1SC(C)(C)C2C(=O)O,training,1 144 | OCC1OC(OC2C(CO)OC(O)C(O)C2O)C(O)C(O)C1O,training,0 145 | CSCCC(N)C(=O)O,training,0 146 | COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1C(NC(C)=O)CC2,training,0 147 | C=C1CCC(O)CC1=CC=C1CCCC2(C)C1CCC2C(C)CCCC(C)C,training,0 148 | NC(N)=NCCCC(N)C(=O)O,training,0 149 | CN1CCN(C(c2ccccc2)c2ccccc2)CC1,training,0 150 | c1ccc2c(c1)Nc1ccccc1S2,training,1 151 | CCC(CO)NC(=O)C1C=C2c3cccc4[nH]cc(c34)CC2N(C)C1,training,0 152 | O=C(O)c1cc(-c2ccccc2)nc2ccccc12,training,1 153 | CCOC(=O)C1(c2ccccc2)CCN(CCc2ccc(N)cc2)CC1,training,0 154 | CC(=O)CC(c1ccc([N+](=O)[O-])cc1)c1c(O)oc2ccccc2c1=O,training,1 155 | COC1C(OC(N)=O)C(O)C(Oc2ccc3c(=O)c(NC(=O)c4ccc(O)c(CC=C(C)C)c4)c(O)oc3c2C)OC1(C)C,training,1 156 | Nc1ncn(C2OC(CO)C(O)C2O)c(=O)n1,training,1 157 | CC12CC(=CO)C(=O)CC1CCC1C2CCC2(C)C1CCC2(C)O,training,1 158 | C[N+](C)(C)CC(O)CC(=O)[O-],training,0 159 | OCC1OC(OC2C(CO)OC(O)(CO)C2O)C(O)C(O)C1O,training,0 160 | Cn1c(=O)c2[nH]c(Br)nc2n(C)c1=O,training,0 161 | CC1C(c2ccccc2)OCCN1C,training,0 162 | CCC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(O)CC(C)C(=O)C(C)C(O)C1(C)O,training,1 163 | CCC1(O)CC2CN(CCc3c([nH]c4ccccc34)C(C(=O)OC)(c3cc4c(cc3OC)N(C)C3C(O)(C(=O)OC)C(OC(C)=O)C5(CC)C=CCN6CCC43C65)C2)C1,training,0 164 | CCOC(=O)C1(c2ccccc2)CCN(CCC(C#N)(c2ccccc2)c2ccccc2)CC1,training,0 165 | COc1cc2c(c3oc(=O)c4c(c13)CCC4=O)C1C=COC1O2,training,1 166 | Cc1c(C)c2c(c(C)c1O)CCC(C)(CCCC(C)CCCC(C)CCCC(C)C)O2,training,0 167 | CNC1C(O)C(NC)C2OC3(O)C(=O)CC(C)OC3OC2C1O,training,0 168 | NC(N)=NC(=O)c1nc(Cl)c(N)nc1N,training,1 169 | OCCN1CCN(CCC=C2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1,training,0 170 | Cc1cn(C2C=CC(CO)O2)c(=O)[nH]c1=O,training,1 171 | Cc1onc(-c2c(Cl)cccc2Cl)c1C(=O)NC1C(=O)N2C1SC(C)(C)C2C(=O)O,training,1 172 | CNC1C(OC2C(OC3C(O)C(O)C(N=C(N)N)C(O)C3N=C(N)N)OC(C)C2(O)C=O)OC(CO)C(O)C1O,training,0 173 | CC1CC2C3CCC4=CC(=O)C=CC4(C)C3(Cl)C(O)CC2(C)C1(O)C(=O)CO,training,0 174 | CN(C)CCCC1c2ccccc2Nc2ccc(Cl)cc21,training,1 175 | CCc1c(C)[nH]c2c1C(=O)C(CN1CCOCC1)CC2,training,0 176 | CC(=O)OC1CC2CCC3C(CCC4(C)C3CC([N+]3(C)CCCCC3)C4OC(C)=O)C2(C)CC1[N+]1(C)CCCCC1,training,0 177 | O=C(O)Cc1csc(-c2ccc(Cl)cc2)n1,training,1 178 | CC(=O)Oc1ccc(C2(c3ccc(OC(C)=O)cc3)C(=O)N(C(C)=O)c3ccccc32)cc1,training,1 179 | CN(N=O)C(=O)NC1C(O)OC(CO)C(O)C1O,training,1 180 | CC1OC(OC2C(O)CC(OC3C(O)CC(OC4CCC5(C)C(CCC6C5CC(O)C5(C)C(C7=CC(=O)OC7)CCC65O)C4)OC3C)OC2C)CC(O)C1O,training,0 181 | CC(CCC(=O)O)C1CCC2C3C(O)CC4CC(O)CCC4(C)C3CCC12C,training,0 182 | COc1ccc2c(C(=O)c3ccc(Cl)cc3)c(C)n(CC(=O)O)c2c1,training,1 183 | CCOC(=O)Nc1ccc2c(c1)N(C(=O)CCN1CCOCC1)c1ccccc1S2,training,1 184 | Cc1cn(C2CC(N=[N+]=[N-])C(CO)O2)c(=O)[nH]c1=O,training,1 185 | CC(=O)OC1C(=O)C2(C)C(O)CC3OCC3(OC(C)=O)C2C(OC(=O)c2ccccc2)C2(O)CC(OC(=O)C(O)C(NC(=O)c3ccccc3)c3ccccc3)C(C)=C1C2(C)C,training,1 186 | CC(CCc1ccc(O)cc1)NCCc1ccc(O)c(O)c1,training,0 187 | CC(C)(C)NCC(O)COc1ccccc1C1CCCC1,training,0 188 | NCCC(O)C(=O)NC1CC(N)C(OC2OC(CN)C(O)C(O)C2O)C(O)C1OC1OC(CO)C(O)C(N)C1O,training,0 189 | O=C(O)COc1ccc(C(=O)c2cccs2)c(Cl)c1Cl,training,1 190 | CC(C)(C)NCC(O)COc1cccc2c1CC(O)C(O)C2,training,0 191 | COc1ccc(C2Sc3ccccc3N(CCN(C)C)C(=O)C2OC(C)=O)cc1,training,1 192 | CC(C(=O)O)c1ccc2oc(-c3ccc(Cl)cc3)nc2c1,training,1 193 | CC1(C)C(C=C(Cl)Cl)C1C(=O)OCc1cccc(Oc2ccccc2)c1,training,0 194 | COc1cccc2c1C(=O)c1c(O)c3c(c(O)c1C2=O)CC(O)(C(=O)CO)CC3OC1CC(N)C(O)C(C)O1,training,1 195 | CN(C)CCn1nnnc1SCC1=C(C(=O)O)N2C(=O)C(NC(=O)Cc3csc(N)n3)C2SC1,training,1 196 | CC(=O)N1CCN(c2ccc(OCC3COC(Cn4ccnc4)(c4ccc(Cl)cc4Cl)O3)cc2)CC1,training,1 197 | O=c1nc[nH]c2c1ncn2C1CCC(CO)O1,training,1 198 | CCC(C)C(=O)OC1CC(C)C=C2C=CC(C)C(CCC3CC(O)CC(=O)O3)C21,training,0 199 | CCC(C)(C)C(=O)OC1CC(C)C=C2C=CC(C)C(CCC3CC(O)CC(=O)O3)C21,training,0 200 | CCOC(=O)C(CCc1ccccc1)NC(C)C(=O)N1Cc2ccccc2CC1C(=O)O,training,1 201 | CCC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(O)CC(C)CN(C)C(C)C(O)C1(C)O,training,0 202 | CCC(C)n1ncn(-c2ccc(N3CCN(c4ccc(OCC5COC(Cn6cncn6)(c6ccc(Cl)cc6Cl)O5)cc4)CC3)cc2)c1=O,training,1 203 | CC(O)(CS(=O)(=O)c1ccc(F)cc1)C(=O)Nc1ccc(C#N)c(C(F)(F)F)c1,training,1 204 | CC(C)(C)NC(=O)C1CCC2C3CCC4NC(=O)C=CC4(C)C3CCC12C,training,0 205 | CC1CN(c2cc3c(cc2F)c(=O)c(C(=O)O)cn3-c2ccc(F)cc2F)CCN1,training,1 206 | Nc1ncnc2c1ncn2CCOCP(=O)(O)O,training,1 207 | CC(c1cc2ccccc2s1)N(O)C(N)=O,training,1 208 | COCC(=O)OC1(CCN(C)CCCc2nc3ccccc3[nH]2)CCc2cc(F)ccc2C1C(C)C,training,0 209 | CCC1(O)C(=O)OCc2c1cc1n(c2=O)Cc2cc3c(CN(C)C)c(O)ccc3nc2-1,training,0 210 | CC(O)CN1CCN(CC(=O)[O-])CCN(CC(=O)[O-])CCN(CC(=O)[O-])CC1,training,0 211 | CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(O)C(Cc1ccccc1)NC(=O)C(CC(N)=O)NC(=O)c1ccc2ccccc2n1,training,1 212 | CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(-c2ccc(F)cc2)n1CCC(O)CC(O)CC(=O)O,training,1 213 | Nc1ccn(C2CSC(CO)O2)c(=O)n1,training,1 214 | Nc1nc(NC2CC2)c2ncn(C3C=CC(CO)C3)c2n1,training,1 215 | Cc1cc(Br)c(O)c2ncccc12,training,1 216 | CNC1CCC(c2ccc(Cl)c(Cl)c2)c2ccccc21,training,0 217 | CN1c2ccccc2C(NCCCCCCC(=O)O)c2ccc(Cl)cc2S1(=O)=O,training,1 218 | Cc1c(F)c(N2CCNC(C)C2)cc2c1c(=O)c(C(=O)O)cn2C1CC1,training,1 219 | CN(CCOc1ccc(CC2SC(=O)NC2=O)cc1)c1ccccn1,training,1 220 | C=C(c1ccc(C(=O)O)cc1)c1cc2c(cc1C)C(C)(C)CCC2(C)C,training,1 221 | CCCCOCC(CN1C(=O)N(CC(COCCCC)OC(N)=O)C(=O)C(CC)(c2ccccc2)C1=O)OC(N)=O,training,1 222 | CC1CC2C3CCC4=CC(=O)C=CC4(C)C3(Cl)C(O)CC2(C)C1(O)C(=O)CCl,training,0 223 | CN(Cc1cnc2nc(N)nc(N)c2n1)c1ccc(C(=O)NC(CCC(=O)O)C(=O)O)cc1,training,1 224 | CC(=O)OC12COC1CC(O)C1(C)C(=O)C(O)C3=C(C)C(OC(=O)C(O)C(NC(=O)OC(C)(C)C)c4ccccc4)CC(O)(C(OC(=O)c4ccccc4)C21)C3(C)C,training,1 225 | CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23,training,1 226 | CCCCCC(O)C=CC1C(O)CC(=O)C1CCCCCCC(=O)O,training,0 227 | COc1c(N2CC3CCCNC3C2)c(F)cc2c(=O)c(C(=O)O)cn(C3CC3)c12,training,1 228 | CCOC1OC(=O)CC1NC(=O)C1CCCN2C(=O)CCC(NC(=O)c3nccc4ccccc34)C(=O)N12,training,1 229 | C=C1C(CO)C(O)CC1n1cnc2c(=O)nc(N)[nH]c21,training,1 230 | NCC1OC(OC2C(CO)OC(OC3C(O)C(N)CC(N)C3OC3OC(CO)C(O)C(O)C3N)C2O)C(N)C(O)C1O,training,0 231 | O=C(O)CCCCC1SCC2NC(=O)NC21,training,0 232 | CN1C2CCC1CC(OC(=O)C(CO)c1ccccc1)C2,training,0 233 | COC(c1ccccc1)(c1ccccc1)C(Oc1nc(C)cc(C)n1)C(=O)O,training,1 234 | COC1CC(OC2C(C)C(=O)OC(C)C(C)C(OC(C)=O)C(C)C(=O)C3(CO3)CC(C)C(OC3OC(C)CC(N(C)C)C3OC(C)=O)C2C)OC(C)C1OC(C)=O,training,1 235 | CS(=O)(=O)CCNCc1ccc(-c2ccc3ncnc(Nc4ccc(OCc5cccc(F)c5)c(Cl)c4)c3c2)o1,training,1 236 | CC(C)CN(CC(O)C(Cc1ccccc1)NC(=O)OC1COC2OCCC12)S(=O)(=O)c1ccc(N)cc1,training,1 237 | CCOC(Nc1ccc(C(=O)O)cc1)C(=O)c1ccc(-c2ccccc2)cc1,training,1 238 | NC(C(=O)O)C1CC(Cl)=NO1,training,0 239 | CC(C)c1nc(CN(C)C(=O)NC(C(=O)NC(Cc2ccccc2)CC(O)C(Cc2ccccc2)NC(=O)OCc2cncs2)C(C)C)cs1,training,1 240 | CC1OC(OC2C(CO)OC(OC3C(CO)OC(O)C(O)C3O)C(O)C2O)C(O)C(O)C1NC1C=C(CO)C(O)C(O)C1O,training,1 241 | COC1(NC(=O)Cc2cccs2)C(=O)N2C(C(=O)O)=C(COC(N)=O)CSC21,training,1 242 | NC1CC1c1ccccc1,training,0 243 | CC(C)=CCN1CCC2(C)c3cc(O)ccc3CC1C2C,training,0 244 | C=CC[N+]1(C2CC3C4CCC5CC(O)C(N6CCOCC6)CC5(C)C4CCC3(C)C2OC(C)=O)CCCC1,training,0 245 | CC(CO)NC(=O)C1C=C2c3cccc4[nH]cc(c34)CC2N(C)C1,training,0 246 | CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CCO,training,0 247 | Cn1nnnc1SCC1=C(C(=O)O)N2C(=O)C(NC(=O)C(O)c3ccccc3)C2SC1,training,1 248 | CC1(C)SC2C(NC(=O)C(NC(=O)N3CCN(S(C)(=O)=O)C3=O)c3ccccc3)C(=O)N2C1C(=O)O,training,1 249 | S=c1nc[nH]c2nc[nH]c12,training,1 250 | CN1CCCN=C1C=Cc1cccs1,training,0 251 | CN1CCN(CCC=C2c3ccccc3Sc3ccc(S(=O)(=O)N(C)C)cc32)CC1,training,1 252 | Cn1cc[nH]c1=S,training,1 253 | CC(C)n1c(C=CC(O)CC(O)CC(=O)O)c(-c2ccc(F)cc2)c2ccccc21,training,0 254 | Nc1nc(=S)c2[nH]cnc2[nH]1,training,1 255 | CCC(=C(c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1,training,1 256 | NCCCC(N)CC(=O)NCC1NC(=O)C(CO)NC(=O)C(N)CNC(=O)C(C2CCN=C(N)N2)NC(=O)C(=CNC(N)=O)NC1=O,training,0 257 | CCCCN1C(=O)C(C(O)C2CCCCC2)NC(=O)C12CCN(Cc1ccc(Oc3ccc(C(=O)O)cc3)cc1)CC2,training,1 258 | CN(C)Cc1ccc(CSCCNC(=C[N+](=O)[O-])NCc2ccc3c(c2)OCO3)o1,training,1 259 | CCCCCCCCCCCC(CC1OC(=O)C1CCCCCC)OC(=O)C(CC(C)C)NC=O,training,1 260 | COc1ccc(CC(C)NCC(O)c2ccc(O)c(NC=O)c2)cc1,training,0 261 | CN1C(=C(O)Nc2ccccn2)C(=O)c2ccccc2S1(=O)=O,training,1 262 | C=C1CCC(O)CC1=CC=C1CCCC2(C)C1CCC2C(C)C=CC(C)C(C)C,training,0 263 | CC1c2cccc(O)c2C(O)=C2C(=O)C3(O)C(=O)C(=C(N)O)C(=O)C(N(C)C)C3C(O)C21,training,1 264 | Cc1ccc(C(=CCN2CCCC2)c2ccccn2)cc1,training,0 265 | COc1ccc2c3c1OC1C(O)C=CC4C(C2)N(C)CCC341,training,0 266 | C=C1CCC2(O)C3Cc4ccc(O)c5c4C2(CCN3CC2CC2)C1O5,training,0 267 | CN1CCC23c4c5ccc(O)c4OC2C(O)C=CC3C1C5,training,0 268 | CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2C(=O)Nc3ccc(F)cc32)c1C,training,1 269 | CN(C)NN=C1N=CN=C1C(N)=O,training,1 270 | O=C1CN(N=Cc2ccc([N+](=O)[O-])o2)C(=O)N1,training,1 271 | O=C(O)C1=CC(=NNc2ccc(S(=O)(=O)Nc3ccccn3)cc2)C=CC1=O,training,1 272 | COc1ccc2c(c1)C13CCCCC1C(C2)N(C)CC3,training,0 273 | CON=C(C(=O)NC1C(=O)N2C(C(=O)O)=C(CSc3nc(=O)c(=O)[nH]n3C)CSC12)c1csc(N)n1,training,1 274 | CCOC(=O)C(CCc1ccccc1)NC1CCc2ccccc2N(CC(=O)O)C1=O,training,1 275 | COC1C=COC2(C)Oc3c(C)c(O)c4c(c3C2=O)C(=O)C(=CNN2CCN(C)CC2)C(=C4O)NC(=O)C(C)=CC=CC(C)C(O)C(C)C(O)C(C)C(OC(C)=O)C1C,training,1 276 | CCC1OC(=O)C(C)C(=O)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(OC)CC(C)C(=O)C(C)C2N(CCCCn3cnc(-c4cccnc4)c3)C(=O)OC12C,training,1 277 | CON=C(N)c1ccc(-c2ccc(-c3ccc(C(N)=NOC)cc3)o2)cc1,training,1 278 | CC(C)CC(CN)CC(=O)O,training,0 279 | CC1CN(CC(Cc2ccccc2)C(=O)NCC(=O)O)CCC1(C)c1cccc(O)c1,training,0 280 | O=C1C=CC=CC1=C1NC(=C2C=CC=CC2=O)N(c2ccc(C(=O)O)cc2)N1,training,1 281 | COC1C=COC2(C)Oc3c(C)c(O)c4c(c3C2=O)C2=NC3(CCN(CC(C)C)CC3)NC2=C(NC(=O)C(C)=CC=CC(C)C(O)C(C)C(O)C(C)C(OC(C)=O)C1C)C4=O,training,1 282 | CCCCCCCCC=CCCCCCCCC(=O)O,training,0 283 | CC(=O)Oc1cc2c(s1)CCN(C(C(=O)C1CC1)c1ccccc1F)C2,training,0 284 | CN1CCC(Nc2ncc3ncnc(Nc4ccc(F)c(Cl)c4)c3n2)CC1,training,1 285 | Cc1ccc(Nc2nccc(N(C)c3ccc4c(C)n(C)nc4c3)n2)cc1S(N)(=O)=O,training,1 286 | C[N+]1(C)C2CCC1CC(OC(=O)C(O)c1ccccc1)C2,training,0 287 | [C-]#N,training,0 288 | C=C1c2cccc(O)c2C(O)=C2C(=O)C3(O)C(O)=C(C(N)=O)C(=O)C(N(C)C)C3C(O)C12,training,1 289 | O=C1OC(C(O)CO)C(O)=C1O,training,0 290 | CN(C)C1C(=O)C(C(N)=O)=C(O)C2(O)C(=O)C3=C(O)c4c(O)ccc(Cl)c4C(O)C3CC12,training,1 291 | OCC(O)C(O)C(O)C(O)CO,training,0 292 | Cc1ccc2cc3c(ccc4ccccc43)c3c2c1CC3,training,1 293 | Cn1cnc([N+](=O)[O-])c1Sc1ncnc2nc[nH]c12,training,1 294 | CC(=O)Oc1ccc(C(=C2CCCCC2)c2ccc(OC(C)=O)cc2)cc1,training,1 295 | COc1cc2c(cc1OC)C(=O)C(CC1CCN(Cc3ccccc3)CC1)C2,training,0 296 | OC(Cn1cncn1)(Cn1cncn1)c1ccc(F)cc1F,training,1 297 | CC(C(=O)O)c1ccc(-c2ccccc2)c(F)c1,training,1 298 | Nc1cc(-c2ccncc2)c[nH]c1=O,training,1 299 | CN(C)C(=O)C(CCN1CCC(O)(c2ccc(Cl)cc2)CC1)(c1ccccc1)c1ccccc1,training,0 300 | CC12CC3CC(C)(C1)CC(N)(C3)C2,training,0 301 | CN(C)C(=N)N=C(N)N,training,0 302 | Cc1ccccc1N1C(=O)c2cc(S(N)(=O)=O)c(Cl)cc2NC1C,training,1 303 | CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21,training,1 304 | CC1(C)NC(=O)N(c2ccc([N+](=O)[O-])c(C(F)(F)F)c2)C1=O,training,1 305 | COc1ccc(CN(CCN(C)C)c2ccccn2)cc1,training,0 306 | Nc1nc2ccc(OC(F)(F)F)cc2s1,training,1 307 | Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1,training,1 308 | Cc1cc(NS(=O)(=O)c2ccc(N)cc2)no1,training,1 309 | O=C1C(CCS(=O)c2ccccc2)C(=O)N(c2ccccc2)N1c1ccccc1,training,0 310 | CC1CC2C3CCC4=CC(=O)C=CC4(C)C3(F)C(O)CC2(C)C1(O)C(=O)CO,training,0 311 | CC1(C)SC2C(NC(=O)Cc3ccccc3)C(=O)N2C1C(=O)O,training,0 312 | O=c1[nH]cc(N(CCCl)CCCl)c(=O)[nH]1,training,1 313 | NC(CO)(CO)CO,training,0 314 | CCC(CO)NC(=O)C1C=C2c3cccc4c3c(cn4C)CC2N(C)C1,training,0 315 | COC(=O)Nc1nc2ccccc2[nH]1,training,1 316 | CN1CCCC1CCOC(C)(c1ccccc1)c1ccc(Cl)cc1,training,0 317 | CC(=O)NC1C(O)OC(CO)C(OS(=O)(=O)[O-])C1OC1OC(C(=O)[O-])C(O)C(O)C1O,training,1 318 | CC(C(=O)O)c1ccc(N2CC=CC2)c(Cl)c1,training,1 319 | COc1cc(C2c3cc4c(cc3C(OC3OC5COC(C)OC5C(O)C3O)C3COC(=O)C23)OCO4)cc(OC)c1O,training,1 320 | Nc1nc(=O)n(C2CSC(CO)O2)cc1F,training,1 321 | CCc1oc2ccccc2c1C(=O)c1ccc(O)cc1,training,1 322 | CC(Cn1cnc2c(N)ncnc21)OCP(=O)(O)O,training,1 323 | CCCCCC(O)C=CC1C(O)CC(=O)C1CC=CCCCC(=O)O,training,0 324 | CC(C)(O)c1ccccc1CCC(SCC1(CC(=O)O)CC1)c1cccc(C=Cc2ccc3ccc(Cl)cc3n2)c1,training,0 325 | CC(C)(C)NC(=O)C1CN(Cc2cccnc2)CCN1CC(O)CC(Cc1ccccc1)C(=O)NC1c2ccccc2CC1O,training,1 326 | CO[Si](C)(C)O[Si](C)(C)C,training,0 327 | Cc1ccc(-n2[nH]c(C)c(NN=C3C=CC=C(c4cccc(C(=O)O)c4)C3=O)c2=O)cc1C,training,1 328 | Oc1cccc2cccnc12,training,1 329 | CC(=O)Nc1nnc(S(N)(=O)=O)s1,training,1 330 | CC(=O)c1ccc(S(=O)(=O)NC(=O)NC2CCCCC2)cc1,training,1 331 | O=c1ncnc2[nH][nH]cc1-2,training,1 332 | CN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1,training,0 333 | CN1C(=O)CCS(=O)(=O)C1c1ccc(Cl)cc1,training,1 334 | Cc1c(-c2ccccc2)oc2c(C(=O)OCCN3CCCCC3)cccc2c1=O,training,0 335 | CC(C)(C(=O)c1cccnc1)c1cccnc1,training,0 336 | Cc1ncc2n1-c1ccc(Cl)cc1C(c1ccccc1F)=NC2,training,0 337 | Nc1ccc(N=Nc2ccccc2)c(N)n1,training,0 338 | CC(=CCC1=C(C)C(=O)c2ccccc2C1=O)CCCC(C)CCCC(C)CCCC(C)C,training,0 339 | COc1cc(NC(C)CCCN)c2ncccc2c1,training,0 340 | COc1cc(C(=O)NCc2ccc(OCCN(C)C)cc2)cc(OC)c1OC,training,1 341 | Cc1ccc(N(CC2=NCCN2)c2cccc(O)c2)cc1,training,0 342 | CC12C=CC(=O)C=C1CCC1C2C(=O)CC2(C)C1CCC2(O)C(=O)CO,training,0 343 | COc1ccc(CC(N)C(=O)NC2C(CO)OC(n3cnc4c(N(C)C)ncnc43)C2O)cc1,training,1 344 | CN1C2CCC1CC(OC(c1ccccc1)c1ccccc1)C2,training,0 345 | O=c1[nH]c2ccccc2n1C1CCN(CCCC(c2ccc(F)cc2)c2ccc(F)cc2)CC1,training,0 346 | C[Si](C)(C)O[Si](C)(C)O[Si](C)(C)C,training,0 347 | CC(C)(Oc1ccc(CCNC(=O)c2ccc(Cl)cc2)cc1)C(=O)O,training,0 348 | OCC1OC(n2cnc3c2NC=NCC3O)CC1O,training,1 349 | C=C1CC2C(CCC3(C)C(=O)CCC23)C2(C)C=CC(=O)C=C12,training,1 350 | CNCCC(Oc1cccc2ccccc12)c1cccs1,training,1 351 | Cc1nc(C)c2c(n1)N(Cc1ccc(-c3ccccc3-c3nn[nH]n3)cc1)C(=O)CC2,training,1 352 | NC1C2CN(c3nc4c(cc3F)c(=O)c(C(=O)O)cn4-c3ccc(F)cc3F)CC12,training,1 353 | CN1c2c(oc(=O)n(-c3ccccn3)c2=O)-c2ccccc2S1(=O)=O,training,1 354 | NC(N)=Nc1nc(CSCCN=CNS(=O)(=O)c2ccc(Br)cc2)cs1,training,1 355 | Cc1nc([N+](=O)[O-])cn1-c1ccc([N+](=O)[O-])cc1,training,1 356 | CCC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(OC)CC(C)C(=O)C(C)C(O)C1(C)O,training,1 357 | CC(C)CC(NC(=O)C(Cc1ccccc1)NC(=O)c1cnccn1)B(O)O,training,1 358 | COc1ccccc1Oc1c(NS(=O)(=O)c2ccc(C(C)(C)C)cc2)nc(-c2ncccn2)nc1OCCO,training,1 359 | CN(C)CCCC1(c2ccc(F)cc2)OCc2cc(C#N)ccc21,training,0 360 | COC1=CC(=O)CC(C)C12Oc1c(Cl)c(OC)cc(OC)c1C2=O,training,1 361 | COCCNC(=O)CN(CCN(CCN(CC(=O)[O-])CC(=O)NCCOC)CC(=O)[O-])CC(=O)[O-],training,0 362 | CCCc1cc(=O)[nH]c(=S)[nH]1,training,1 363 | CCCC(CCC)C(=O)O,training,1 364 | CCCCC(CC)COC(=O)CC(C(=O)OCC(CC)CCCC)S(=O)(=O)[O-],training,0 365 | O=C(NC(CO)C(O)c1ccc([N+](=O)[O-])cc1)C(Cl)Cl,test,0 366 | CC(NC(C)(C)C)C(=O)c1cccc(Cl)c1,test,0 367 | CC(C)(C)NCC(O)c1ccc(O)c(CO)c1,test,0 368 | CC(=O)Oc1ccccc1C(=O)O,test,0 369 | CC(C)NCC(O)COc1ccc(CC(N)=O)cc1,test,0 370 | CC(C)C(=O)Nc1ccc([N+](=O)[O-])c(C(F)(F)F)c1,test,1 371 | NNCCc1ccccc1,test,0 372 | CC(C)NCC(O)c1ccc(O)c(O)c1,test,0 373 | CC(C)NCC(O)c1cc(O)cc(O)c1,test,0 374 | COc1ccc(OC)c(C(O)CNC(=O)CN)c1,test,0 375 | CCOc1ccc(NC(C)=O)cc1,test,1 376 | CC(C)(N)Cc1ccccc1,test,0 377 | CCCCNc1ccc(C(=O)OCCN(C)C)cc1,test,0 378 | NC(Cc1ccc(O)c(O)c1)C(=O)O,test,0 379 | CCCCNc1ccc(C(=O)OCCOCCOCCOCCOCCOCCOCCOCCOCCOC)cc1,test,0 380 | CC(C)Cc1ccc(CC(=O)O)cc1,test,1 381 | C#CCN(C)C(C)Cc1ccccc1,test,0 382 | CC(N)C(O)c1ccccc1,test,0 383 | CC(Cc1ccc(O)c(O)c1)(NN)C(=O)O,test,0 384 | CC(O)C(=O)Nc1c(I)c(C(=O)NC(CO)CO)c(I)c(C(=O)NC(CO)CO)c1I,test,0 385 | CC(N)(Cc1ccc(O)cc1)C(=O)O,test,0 386 | CC(=O)Nc1c(I)c(NC(C)=O)c(I)c(C(=O)O)c1I,test,0 387 | CC(=O)Nc1cccc(O)c1,test,0 388 | CCN(CC)CCOc1cccc(OCCN(CC)CC)c1OCCN(CC)CC,test,0 389 | O=C(O)c1ccccc1O,test,0 390 | NC(=O)c1ccccc1O,test,0 391 | C=CCOc1ccc(CC(=O)O)cc1Cl,test,1 392 | NCC1(CC(=O)O)CCCCC1,test,0 393 | NCC1CCC(C(=O)O)CC1,test,0 394 | O=C(O)c1cccnc1,test,1 395 | CC(C)NNC(=O)c1ccncc1,test,1 396 | CC(N=C(NC#N)Nc1ccncc1)C(C)(C)C,test,0 397 | S=C=Nc1cccc2ccccc12,test,1 398 | CC(C)NCC(O)COc1cccc2ccccc12,test,0 399 | COc1ccc2c(C(=S)N(C)CC(=O)O)cccc2c1C(F)(F)F,test,1 400 | Nc1ccn(C2OC(CO)C(O)C2O)c(=O)n1,test,1 401 | C[N+](C)(C)CCOP(=O)([O-])OP(=O)(O)OCC1OC(n2ccc(N)nc2=O)C(O)C1O,test,0 402 | Nc1ccn(C2CCC(CO)O2)c(=O)n1,test,1 403 | NC(Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1)C(=O)O,test,0 404 | CN(C)CCOC(c1ccc(Cl)cc1)c1ccccn1,test,0 405 | Cc1ccc(Nc2c(F)cccc2Cl)c(CC(=O)O)c1,test,1 406 | Cc1ccc(Cl)c(Nc2ccccc2C(=O)O)c1Cl,test,1 407 | Clc1ccc(C(c2ccccc2Cl)C(Cl)Cl)cc1,test,0 408 | Cc1ccc(O)c(C(CCN(C(C)C)C(C)C)c2ccccc2)c1,test,0 409 | Cc1ccccc1C(OCCN(C)C)c1ccccc1,test,0 410 | CC(C(=O)O)c1cccc(C(=O)c2ccccc2)c1,test,1 411 | O=C(c1cc(O)c(O)c(O)c1)c1ccc(O)c(O)c1O,test,1 412 | Cc1ccc(C(=O)c2cc(O)c(O)c([N+](=O)[O-])c2)cc1,test,1 413 | CC(C)OC(=O)C(C)(C)Oc1ccc(C(=O)c2ccc(Cl)cc2)cc1,test,1 414 | C#CC1(O)CCC2C3CCC4=CC(=O)CCC4C3CCC21C,test,0 415 | CC12CCC(=O)C=C1CCC1C3CCC(O)(C(=O)CO)C3(C)CC(O)C12F,test,0 416 | COc1ccc2c3c1OC1C(=O)CCC4C(C2)N(C)CCC314,test,0 417 | NCCc1ccc(O)c(O)c1,val,0 418 | Nc1ccc(C(=O)O)cc1,val,0 419 | CCCNC(=O)NS(=O)(=O)c1ccc(Cl)cc1,val,1 420 | CC[N+](C)(C)c1cccc(O)c1,val,0 421 | N#CC(C#N)=NNc1ccc(OC(F)(F)F)cc1,val,1 422 | COCCCCC(=NOCCN)c1ccc(C(F)(F)F)cc1,val,0 423 | COc1ccccc1OCC(O)CO,val,0 424 | CCCCCCc1ccc(O)cc1O,val,0 425 | CCN(CC)CC(=O)Nc1c(C)cccc1C,val,0 426 | CC(=O)Oc1cc(C(C)C)c(OCCN(C)C)cc1C,val,1 427 | CC(C)c1cccc(C(C)C)c1O,val,0 428 | Nc1ccc(S(N)(=O)=O)cc1,val,1 429 | CCCCCCCCNC(C)C(O)c1ccc(SC(C)C)cc1,val,1 430 | CCCCNC(=O)NS(=O)(=O)c1ccc(C)cc1,val,1 431 | CC(N)Cc1ccccc1,val,0 432 | COc1ccc(OC)c(C(O)C(C)N)c1,val,0 433 | CNC(C)C(O)c1ccccc1,val,0 434 | CNC(C)Cc1ccccc1,val,0 435 | NC(=O)NS(=O)(=O)c1ccc(N)cc1,val,1 436 | CN(C)C(=O)Oc1cc(OC(=O)N(C)C)cc(C(O)CNC(C)(C)C)c1,val,0 437 | COC(=O)CCc1ccc(OCC(O)CNC(C)C)cc1,val,0 438 | CCCCCCCN(CC)CCCC(O)c1ccc(NS(C)(=O)=O)cc1,val,0 439 | NCC(O)c1ccc(O)c(O)c1,val,0 440 | COc1cc(CNC(=O)CCCCC=CC(C)C)ccc1O,val,0 441 | C#CCN(C)Cc1ccccc1,val,0 442 | CNCC(O)c1ccc(O)c(O)c1,val,0 443 | COCCCOc1cc(CC(CC(N)C(O)CC(C(=O)NCC(C)(C)C(N)=O)C(C)C)C(C)C)ccc1OC,val,0 444 | ClC1C(Cl)C(Cl)C(Cl)C(Cl)C1Cl,val,0 445 | OC1C(O)C(O)C(O)C(O)C1O,val,0 446 | NC1CCCCC1N,val,1 447 | Cc1ncc(CO)c(CO)c1O,val,0 448 | CN(CC=CC#CC(C)(C)C)Cc1cccc2ccccc12,val,1 449 | CCCCCOC(=O)Nc1nc(=O)n(C2OC(C)C(O)C2O)cc1F,val,1 450 | CCCCNc1cc(C(=O)O)cc(S(N)(=O)=O)c1Oc1ccccc1,val,1 451 | CC(C(=O)O)c1cccc(Oc2ccccc2)c1,val,1 452 | CS(=O)(=O)Nc1ccc([N+](=O)[O-])cc1Oc1ccccc1,val,1 453 | CN(C)CCC(c1ccc(Cl)cc1)c1ccccn1,val,0 454 | CN(C)CCOC(C)(c1ccccc1)c1ccccn1,val,0 455 | CN(C)CCC(c1ccc(Br)cc1)c1ccccn1,val,0 456 | O=C(O)Cc1ccccc1Nc1c(Cl)cccc1Cl,val,1 457 | Cc1cccc(Nc2ccccc2C(=O)O)c1C,val,1 458 | C[N+](C)CCOC(c1ccccc1)c1ccccc1,val,0 459 | CN(C)CCOC(c1ccccc1)c1ccccc1,val,0 460 | COc1cc(O)c(C(=O)c2ccccc2)cc1S(=O)(=O)O,val,0 461 | Nc1c(CC(=O)O)cccc1C(=O)c1ccc(Br)cc1,val,1 462 | CC12CCC(=O)C=C1CCC1C2C(O)CC2(C)C1CCC2(O)C(=O)CO,val,0 463 | CC12CCC3C4CCC(=O)C=C4CCC3C1CCC2O,val,1 464 | CC(=O)C1(O)CCC2C3CC(C)C4=CC(=O)CCC4(C)C3CCC21C,val,0 465 | C#CC1(O)CCC2C3CCC4=CC(=O)CCC4C3CCC21CC,val,0 466 | CN1CCC23c4c5ccc(O)c4OC2C(=O)CCC3C1C5,val,0 467 | COc1ccc2c3c1OC1C(=O)CCC4(O)C(C2)N(C)CCC314,val,0 468 | CN1CCC23c4c5ccc(O)c4OC2C(=O)CCC3(O)C1C5,val,0 469 | -------------------------------------------------------------------------------- /experiment/build_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd from rdkit.Chem import MolFromSmiles, MACCSkeys, AllChem import numpy as np from rdkit.ML.Descriptors import MoleculeDescriptors import multiprocessing as mp import torch from rdkit import Chem import math import random from rdkit.Chem import ChemicalFeatures from rdkit import RDConfig import os # knowledge-based transformer pre-train model # from rdkit_des import Chem # smi = '' # random_equivalent_smiles = Chem.MolFromSmiles(Chem.MolToSmiles(smi, doRandom=True)) def smi_tokenizer(smi): """ Tokenize a SMILES molecule or reaction """ import re pattern = "(\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#|-|\+|\\\\|\/|:|~|@|\?|>|\*|\$|\%[0-9]{2}|[0-9])" regex = re.compile(pattern) tokens = [token for token in regex.findall(smi)] # assert smi == ''.join(tokens) # return ' '.join(tokens) return tokens def one_of_k_encoding(x, allowable_set): if x not in allowable_set: raise Exception("input {0} not in allowable set{1}:".format( x, allowable_set)) return [x == s for s in allowable_set] def one_of_k_encoding_unk(x, allowable_set): """Maps inputs not in the allowable set to the last element.""" if x not in allowable_set: x = allowable_set[-1] return [x == s for s in allowable_set] def atom_labels(atom, use_chirality=True): results = one_of_k_encoding(atom.GetDegree(), [0, 1, 2, 3, 4, 5, 6]) + \ one_of_k_encoding_unk(atom.GetHybridization(), [ Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2, Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.SP3D, Chem.rdchem.HybridizationType.SP3D2, 'other']) + [atom.GetIsAromatic()] \ + one_of_k_encoding_unk(atom.GetTotalNumHs(), [0, 1, 2, 3, 4]) if use_chirality: try: results = results + one_of_k_encoding_unk( atom.GetProp('_CIPCode'), ['R', 'S']) + [atom.HasProp('_ChiralityPossible')] except: results = results + [False, False ] + [atom.HasProp('_ChiralityPossible')] atom_labels_list = np.array(results).tolist() atom_selected_index = [1, 2, 3, 4, 7, 8, 9, 13, 14, 15, 16, 17, 19, 20, 21] atom_labels_selected = [atom_labels_list[x] for x in atom_selected_index] return atom_labels_selected def global_maccs_data(smiles): mol = Chem.MolFromSmiles(smiles) maccs = MACCSkeys.GenMACCSKeys(mol) global_maccs_list = np.array(maccs).tolist() # 选择负/正样本比例小于1000且大于0.001的数据 selected_index = [3, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165] selected_global_list = [global_maccs_list[x] for x in selected_index] return selected_global_list def global_ecfp4_data(smiles): mol = Chem.MolFromSmiles(smiles) ecfp4 = AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024) global_ecfp4_list = np.array(ecfp4).tolist() return global_ecfp4_list def global_rdkit_des_data(smiles): descriptors_name = ['MaxEStateIndex', 'MinEStateIndex', 'MaxAbsEStateIndex', 'MinAbsEStateIndex', 'qed', 'MolWt', 'HeavyAtomMolWt', 'ExactMolWt', 'NumValenceElectrons', 'NumRadicalElectrons', 'MaxPartialCharge', 'MinPartialCharge', 'MaxAbsPartialCharge', 'MinAbsPartialCharge', 'FpDensityMorgan1', 'FpDensityMorgan2', 'FpDensityMorgan3', 'BalabanJ', 'BertzCT', 'Chi0', 'Chi0n', 'Chi0v', 'Chi1', 'Chi1n', 'Chi1v', 'Chi2n', 'Chi2v', 'Chi3n', 'Chi3v', 'Chi4n', 'Chi4v', 'HallKierAlpha', 'Ipc', 'Kappa1', 'Kappa2', 'Kappa3', 'LabuteASA', 'PEOE_VSA1', 'PEOE_VSA10', 'PEOE_VSA11', 'PEOE_VSA12', 'PEOE_VSA13', 'PEOE_VSA14', 'PEOE_VSA2', 'PEOE_VSA3', 'PEOE_VSA4', 'PEOE_VSA5', 'PEOE_VSA6', 'PEOE_VSA7', 'PEOE_VSA8', 'PEOE_VSA9', 'SMR_VSA1', 'SMR_VSA10', 'SMR_VSA2', 'SMR_VSA3', 'SMR_VSA4', 'SMR_VSA5', 'SMR_VSA6', 'SMR_VSA7', 'SMR_VSA8', 'SMR_VSA9', 'SlogP_VSA1', 'SlogP_VSA10', 'SlogP_VSA11', 'SlogP_VSA12', 'SlogP_VSA2', 'SlogP_VSA3', 'SlogP_VSA4', 'SlogP_VSA5', 'SlogP_VSA6', 'SlogP_VSA7', 'SlogP_VSA8', 'SlogP_VSA9', 'TPSA', 'EState_VSA1', 'EState_VSA10', 'EState_VSA11', 'EState_VSA2', 'EState_VSA3', 'EState_VSA4', 'EState_VSA5', 'EState_VSA6', 'EState_VSA7', 'EState_VSA8', 'EState_VSA9', 'VSA_EState1', 'VSA_EState10', 'VSA_EState2', 'VSA_EState3', 'VSA_EState4', 'VSA_EState5', 'VSA_EState6', 'VSA_EState7', 'VSA_EState8', 'VSA_EState9', 'FractionCSP3', 'HeavyAtomCount', 'NHOHCount', 'NOCount', 'NumAliphaticCarbocycles', 'NumAliphaticHeterocycles', 'NumAliphaticRings', 'NumAromaticCarbocycles', 'NumAromaticHeterocycles', 'NumAromaticRings', 'NumHAcceptors', 'NumHDonors', 'NumHeteroatoms', 'NumRotatableBonds', 'NumSaturatedCarbocycles', 'NumSaturatedHeterocycles', 'NumSaturatedRings', 'RingCount', 'MolLogP', 'MolMR', 'fr_Al_COO', 'fr_Al_OH', 'fr_Al_OH_noTert', 'fr_ArN', 'fr_Ar_COO', 'fr_Ar_N', 'fr_Ar_NH', 'fr_Ar_OH', 'fr_COO', 'fr_COO2', 'fr_C_O', 'fr_C_O_noCOO', 'fr_C_S', 'fr_HOCCN', 'fr_Imine', 'fr_NH0', 'fr_NH1', 'fr_NH2', 'fr_N_O', 'fr_Ndealkylation1', 'fr_Ndealkylation2', 'fr_Nhpyrrole', 'fr_SH', 'fr_aldehyde', 'fr_alkyl_carbamate', 'fr_alkyl_halide', 'fr_allylic_oxid', 'fr_amide', 'fr_amidine', 'fr_aniline', 'fr_aryl_methyl', 'fr_azide', 'fr_azo', 'fr_barbitur', 'fr_benzene', 'fr_benzodiazepine', 'fr_bicyclic', 'fr_diazo', 'fr_dihydropyridine', 'fr_epoxide', 'fr_ester', 'fr_ether', 'fr_furan', 'fr_guanido', 'fr_halogen', 'fr_hdrzine', 'fr_hdrzone', 'fr_imidazole', 'fr_imide', 'fr_isocyan', 'fr_isothiocyan', 'fr_ketone', 'fr_ketone_Topliss', 'fr_lactam', 'fr_lactone', 'fr_methoxy', 'fr_morpholine', 'fr_nitrile', 'fr_nitro', 'fr_nitro_arom', 'fr_nitro_arom_nonortho', 'fr_nitroso', 'fr_oxazole', 'fr_oxime', 'fr_para_hydroxylation', 'fr_phenol', 'fr_phenol_noOrthoHbond', 'fr_phos_acid', 'fr_phos_ester', 'fr_piperdine', 'fr_piperzine', 'fr_priamide', 'fr_prisulfonamd', 'fr_pyridine', 'fr_quatN', 'fr_sulfide', 'fr_sulfonamd', 'fr_sulfone', 'fr_term_acetylene', 'fr_tetrazole', 'fr_thiazole', 'fr_thiocyan', 'fr_thiophene', 'fr_unbrch_alkane', 'fr_urea'] m = Chem.MolFromSmiles(smiles) desc_calc = MoleculeDescriptors.MolecularDescriptorCalculator(descriptors_name) descriptors = np.array(desc_calc.CalcDescriptors(m)).tolist() return descriptors def construct_input_from_smiles(smiles, max_len=200, global_feature='MACCS'): try: # built a pretrain data from smiles atom_list = [] atom_token_list = ['c', 'C', 'O', 'N', 'n', '[C@H]', 'F', '[C@@H]', 'S', 'Cl', '[nH]', 's', 'o', '[C@]', '[C@@]', '[O-]', '[N+]', 'Br', 'P', '[n+]', 'I', '[S+]', '[N-]', '[Si]', 'B', '[Se]', '[other_atom]'] all_token_list = ['[PAD]', '[GLO]', 'c', 'C', '(', ')', 'O', '1', '2', '=', 'N', '3', 'n', '4', '[C@H]', 'F', '[C@@H]', '-', 'S', '/', 'Cl', '[nH]', 's', 'o', '5', '#', '[C@]', '[C@@]', '\\', '[O-]', '[N+]', 'Br', '6', 'P', '[n+]', '7', 'I', '[S+]', '8', '[N-]', '[Si]', 'B', '9', '[2H]', '[Se]', '[other_atom]', '[other_token]'] # 构建token转化成idx的字典 word2idx = {} for i, w in enumerate(all_token_list): word2idx[w] = i # 构建token_list 并加上padding和global token_list = smi_tokenizer(smiles) padding_list = ['[PAD]' for x in range(max_len-len(token_list))] tokens = ['[GLO]'] + token_list + padding_list mol = MolFromSmiles(smiles) atom_example = mol.GetAtomWithIdx(0) atom_labels_example = atom_labels(atom_example) atom_mask_labels = [2 for x in range(len(atom_labels_example))] atom_labels_list = [] atom_mask_list = [] index = 0 tokens_idx = [] for i, token in enumerate(tokens): if token in atom_token_list: atom = mol.GetAtomWithIdx(index) an_atom_labels = atom_labels(atom) atom_labels_list.append(an_atom_labels) atom_mask_list.append(1) index = index + 1 tokens_idx.append(word2idx[token]) else: if token in all_token_list: atom_labels_list.append(atom_mask_labels) tokens_idx.append(word2idx[token]) atom_mask_list.append(0) elif '[' in list(token): atom = mol.GetAtomWithIdx(index) tokens[i] = '[other_atom]' an_atom_labels = atom_labels(atom) atom_labels_list.append(an_atom_labels) atom_mask_list.append(1) index = index + 1 tokens_idx.append(word2idx['[other_atom]']) else: tokens[i] = '[other_token]' atom_labels_list.append(atom_mask_labels) tokens_idx.append(word2idx['[other_token]']) atom_mask_list.append(0) if global_feature == 'MACCS': global_label_list = global_maccs_data(smiles) elif global_feature == 'ECFP4': global_label_list = global_ecfp4_data(smiles) elif global_feature == 'RDKIT_des': global_label_list = global_rdkit_des_data(smiles) tokens_idx = [word2idx[x] for x in tokens] if len(tokens_idx) == max_len + 1: return tokens_idx, global_label_list, atom_labels_list, atom_mask_list else: return 0, 0, 0, 0 except: return 0, 0, 0, 0 def build_maccs_pretrain_data_and_save(smiles_list, output_smiles_path, global_feature='MACCS'): smiles_list = smiles_list tokens_idx_list = [] global_label_list = [] atom_labels_list = [] atom_mask_list = [] for i, smiles in enumerate(smiles_list): tokens_idx, global_labels, atom_labels, atom_mask = construct_input_from_smiles(smiles, global_feature=global_feature) if tokens_idx != 0: tokens_idx_list.append(tokens_idx) global_label_list.append(global_labels) atom_labels_list.append(atom_labels) atom_mask_list.append(atom_mask) print('{}/{} is transformed!'.format(i+1, len(smiles_list))) else: print('{} is transformed failed!'.format(smiles)) pretrain_data_list = [tokens_idx_list, global_label_list, atom_labels_list, atom_mask_list] pretrain_data_np = np.array(pretrain_data_list) np.save(output_smiles_path, pretrain_data_np) def build_ECFP4_pretrain_data_and_save(smiles_list, output_smiles_path, global_feature='ECFP4'): smiles_list = smiles_list tokens_idx_list = [] global_label_list = [] atom_labels_list = [] atom_mask_list = [] for i, smiles in enumerate(smiles_list): tokens_idx, global_labels, atom_labels, atom_mask = construct_input_from_smiles(smiles, global_feature=global_feature) if tokens_idx != 0: tokens_idx_list.append(tokens_idx) global_label_list.append(global_labels) atom_labels_list.append(atom_labels) atom_mask_list.append(atom_mask) print('{}/{} is transformed!'.format(i+1, len(smiles_list))) else: print('{} is transformed failed!'.format(smiles)) pretrain_data_list = [tokens_idx_list, global_label_list, atom_labels_list, atom_mask_list] pretrain_data_np = np.array(pretrain_data_list) np.save(output_smiles_path, pretrain_data_np) def build_rdkit_des_pretrain_data_and_save(smiles_list, output_smiles_path, global_feature='RDKIT_des'): smiles_list = smiles_list tokens_idx_list = [] global_label_list = [] atom_labels_list = [] atom_mask_list = [] for i, smiles in enumerate(smiles_list): tokens_idx, global_labels, atom_labels, atom_mask = construct_input_from_smiles(smiles, global_feature=global_feature) if tokens_idx != 0: tokens_idx_list.append(tokens_idx) global_label_list.append(global_labels) atom_labels_list.append(atom_labels) atom_mask_list.append(atom_mask) print('{}/{} is transformed!'.format(i+1, len(smiles_list))) else: print('{} is transformed failed!'.format(smiles)) pretrain_data_list = [tokens_idx_list, global_label_list, atom_labels_list, atom_mask_list] pretrain_data_np = np.array(pretrain_data_list) np.save(output_smiles_path, pretrain_data_np) def build_chirality_pretrain_data_and_save(smiles_list, labels_list, output_smiles_path): tokens_idx_list = [] global_label_list = [] atom_labels_list = [] atom_mask_list = [] for i, smiles in enumerate(smiles_list): tokens_idx, _, atom_labels, atom_mask = construct_input_from_smiles(smiles) if tokens_idx != 0: tokens_idx_list.append(tokens_idx) global_label_list.append([labels_list[i]]) atom_labels_list.append(atom_labels) atom_mask_list.append(atom_mask) print('{}/{} is transformed!'.format(i+1, len(smiles_list))) else: print('{} is transformed failed!'.format(smiles)) pretrain_data_list = [tokens_idx_list, global_label_list, atom_labels_list, atom_mask_list] pretrain_data_np = np.array(pretrain_data_list) np.save(output_smiles_path, pretrain_data_np) def build_mask(labels_list, mask_value=100): mask = [] for i in labels_list: if i == mask_value: mask.append(0) else: mask.append(1) return mask def multi_task_build_dataset(dataset_smiles, labels_list, smiles_name, global_feature='ECFP4'): dataset = [] failed_molecule = [] labels = dataset_smiles[labels_list] split_index = dataset_smiles['group'] smilesList = dataset_smiles[smiles_name] molecule_number = len(smilesList) for i, smiles in enumerate(smilesList): token_idx, _, _, _ = construct_input_from_smiles(smiles, global_feature=global_feature) if token_idx != 0: mask = build_mask(labels.loc[i], mask_value=123456) molecule = [smiles, token_idx, labels.loc[i].values.tolist(), mask, split_index.loc[i]] dataset.append(molecule) print('{}/{} molecule is transformed! {} is transformed failed!'.format(i + 1, molecule_number, len(failed_molecule))) else: print('{} is transformed failed!'.format(smiles)) molecule_number = molecule_number - 1 failed_molecule.append(smiles) print('{}({}) is transformed failed!'.format(failed_molecule, len(failed_molecule))) return dataset def built_data_and_save_for_splited( origin_path='G:/加密/Dataset/AttentionFP/ClinTox.csv', save_path='G:/加密/Dataset/AttentionFP/ClinTox.npy', task_list_selected=None): data_origin = pd.read_csv(origin_path) data_origin = data_origin.fillna(123456) labels_list = [x for x in data_origin.columns if x not in ['smiles', 'group']] if task_list_selected is not None: labels_list = task_list_selected data_set_gnn = multi_task_build_dataset(dataset_smiles=data_origin, labels_list=labels_list, smiles_name='smiles') smiles, token_idx, labels, mask, split_index = map(list, zip(*data_set_gnn)) dataset_list = [smiles, token_idx, labels, mask, split_index] dataset_np = np.array(dataset_list) np.save(save_path, dataset_np) print('Molecules graph is saved!') def built_ECFP4_data_and_save_for_splited( origin_path='G:/加密/Dataset/AttentionFP/ClinTox.csv', save_path='G:/加密/Dataset/AttentionFP/ClinTox.npy', task_list_selected=None): data_origin = pd.read_csv(origin_path) data_origin = data_origin.fillna(123456) labels_list = [x for x in data_origin.columns if x not in ['smiles', 'group']] if task_list_selected is not None: labels_list = task_list_selected data_set_gnn = multi_task_build_dataset(dataset_smiles=data_origin, labels_list=labels_list, smiles_name='smiles', global_feature='ECFP4') smiles, token_idx, labels, mask, split_index = map(list, zip(*data_set_gnn)) dataset_list = [smiles, token_idx, labels, mask, split_index] dataset_np = np.array(dataset_list) np.save(save_path, dataset_np) print('Molecules graph is saved!') def built_rdkit_des_data_and_save_for_splited( origin_path='G:/加密/Dataset/AttentionFP/ClinTox.csv', save_path='G:/加密/Dataset/AttentionFP/ClinTox.npy', task_list_selected=None): data_origin = pd.read_csv(origin_path) data_origin = data_origin.fillna(123456) labels_list = [x for x in data_origin.columns if x not in ['smiles', 'group']] if task_list_selected is not None: labels_list = task_list_selected data_set_gnn = multi_task_build_dataset(dataset_smiles=data_origin, labels_list=labels_list, smiles_name='smiles', global_feature='RDKIT_des') smiles, token_idx, labels, mask, split_index = map(list, zip(*data_set_gnn)) dataset_list = [smiles, token_idx, labels, mask, split_index] dataset_np = np.array(dataset_list) np.save(save_path, dataset_np) print('Molecules graph is saved!') def contrastive_aug_build_dataset(dataset_smiles, labels_list, smiles_name_list): dataset = [] failed_molecule = [] labels = dataset_smiles[labels_list] split_index = dataset_smiles['group'] smilesList = dataset_smiles[smiles_name_list].values.tolist() molecule_number = len(smilesList) for i, _ in enumerate(smilesList): token_idx_list = [construct_input_from_smiles(smiles)[0] for smiles in smilesList[i]] if 0 not in token_idx_list: mask = build_mask(labels.loc[i], mask_value=123456) molecule = [smilesList[i][0], labels.loc[i].values.tolist(), mask, split_index.loc[i], token_idx_list] dataset.append(molecule) print('{}/{} molecule is transformed! {} is transformed failed!'.format(i + 1, molecule_number, len(failed_molecule))) else: print('{} is transformed failed!'.format(smilesList[i][0])) molecule_number = molecule_number - 1 failed_molecule.append(smilesList[i][0]) print('{}({}) is transformed failed!'.format(failed_molecule, len(failed_molecule))) return dataset def built_data_and_save_for_contrastive_splited( origin_path='G:/加密/Dataset/AttentionFP/ClinTox.csv', save_path='G:/加密/Dataset/AttentionFP/ClinTox.npy'): data_origin = pd.read_csv(origin_path) data_origin = data_origin.fillna(123456) smiles_list = ['smiles', 'aug_smiles_0', 'aug_smiles_1', 'aug_smiles_2', 'aug_smiles_3'] labels_list = [x for x in data_origin.columns if x not in ['smiles', 'group']+smiles_list ] data_set = contrastive_aug_build_dataset(dataset_smiles=data_origin, labels_list=labels_list, smiles_name_list=smiles_list) smiles, labels, mask, split_index, token_idx, = map(list, zip(*data_set)) dataset_list = [smiles, token_idx, labels, mask, split_index] dataset_np = np.array(dataset_list) np.save(save_path, dataset_np) print('Molecules graph is saved!') def build_maccs_pretrain_contrastive_data_and_save(smiles_list, output_smiles_path, global_feature='MACCS'): # all smiles list smiles_list = smiles_list tokens_idx_all_list = [] global_label_list = [] atom_labels_list = [] atom_mask_list = [] for i, smiles_one_mol in enumerate(smiles_list): tokens_idx_list = [construct_input_from_smiles(smiles, global_feature=global_feature)[0] for smiles in smiles_one_mol] if 0 not in tokens_idx_list: _ , global_labels, atom_labels, atom_mask = construct_input_from_smiles(smiles_one_mol[0], global_feature=global_feature) tokens_idx_all_list.append(tokens_idx_list) global_label_list.append(global_labels) atom_labels_list.append(atom_labels) atom_mask_list.append(atom_mask) print('{}/{} is transformed!'.format(i+1, len(smiles_list))) else: print('{} is transformed failed!'.format(smiles_one_mol[0])) pretrain_data_list = [tokens_idx_all_list, global_label_list, atom_labels_list, atom_mask_list] pretrain_data_np = np.array(pretrain_data_list, dtype=object) np.save(output_smiles_path, pretrain_data_np) def build_pretrain_chirality_R_S_contrastive_data_and_save(smiles_list, global_all_label_list, output_smiles_path, global_feature='MACCS'): # all smiles list smiles_list = smiles_list tokens_idx_all_list = [] global_label_list = [] atom_labels_list = [] atom_mask_list = [] for i, smiles_one_mol in enumerate(smiles_list): tokens_idx_list = [construct_input_from_smiles(smiles, global_feature=global_feature)[0] for smiles in smiles_one_mol] if 0 not in tokens_idx_list: _ , global_labels, atom_labels, atom_mask = construct_input_from_smiles(smiles_one_mol[0], global_feature=global_feature) tokens_idx_all_list.append(tokens_idx_list) global_label_list.append(global_all_label_list[i]) atom_labels_list.append(atom_labels) atom_mask_list.append(atom_mask) print('{}/{} is transformed!'.format(i+1, len(smiles_list))) else: print('{} is transformed failed!'.format(smiles_one_mol[0])) pretrain_data_list = [tokens_idx_all_list, global_label_list, atom_labels_list, atom_mask_list] pretrain_data_np = np.array(pretrain_data_list, dtype=object) np.save(output_smiles_path, pretrain_data_np) def load_data_for_pretrain(pretrain_data_path='./data/CHEMBL_wash_500_pretrain'): tokens_idx_list = [] global_labels_list = [] atom_labels_list = [] atom_mask_list = [] for i in range(80): pretrain_data = np.load(pretrain_data_path+'_{}.npy'.format(i+1), allow_pickle=True) tokens_idx_list = tokens_idx_list + [x for x in pretrain_data[0]] global_labels_list = global_labels_list + [x for x in pretrain_data[1]] atom_labels_list = atom_labels_list + [x for x in pretrain_data[2]] atom_mask_list = atom_mask_list + [x for x in pretrain_data[3]] print(pretrain_data_path+'_{}.npy'.format(i+1) + ' is loaded') pretrain_data_final = [] for i in range(len(tokens_idx_list)): a_pretrain_data = [tokens_idx_list[i], global_labels_list[i], atom_labels_list[i], atom_mask_list[i]] pretrain_data_final.append(a_pretrain_data) return pretrain_data_final def load_data_for_contrastive_aug_pretrain(pretrain_data_path='./data/CHEMBL_wash_500_pretrain'): tokens_idx_list = [] global_labels_list = [] atom_labels_list = [] atom_mask_list = [] for i in range(80): pretrain_data = np.load(pretrain_data_path+'_contrastive_{}.npy'.format(i+1), allow_pickle=True) tokens_idx_list = tokens_idx_list + [x for x in pretrain_data[0]] global_labels_list = global_labels_list + [x for x in pretrain_data[1]] atom_labels_list = atom_labels_list + [x for x in pretrain_data[2]] atom_mask_list = atom_mask_list + [x for x in pretrain_data[3]] print(pretrain_data_path+'_contrastive_{}.npy'.format(i+1) + ' is loaded') pretrain_data_final = [] for i in range(len(tokens_idx_list)): a_pretrain_data = [tokens_idx_list[i], global_labels_list[i], atom_labels_list[i], atom_mask_list[i]] pretrain_data_final.append(a_pretrain_data) return pretrain_data_final def load_data_for_pretrain_rdkit_des(pretrain_data_path='./data/CHEMBL_wash_500_pretrain'): tokens_idx_list = [] global_labels_list = [] atom_labels_list = [] atom_mask_list = [] for i in range(80): pretrain_data = np.load(pretrain_data_path+'_{}.npy'.format(i+1), allow_pickle=True) tokens_idx_list = tokens_idx_list + [x for x in pretrain_data[0]] global_labels_list = global_labels_list + [x for x in pretrain_data[1]] atom_labels_list = atom_labels_list + [x for x in pretrain_data[2]] atom_mask_list = atom_mask_list + [x for x in pretrain_data[3]] print(pretrain_data_path+'_{}.npy'.format(i+1) + ' is loaded') global_labels_pd = pd.DataFrame(global_labels_list) global_labels_normal = global_labels_pd.apply(lambda x: (x - x.mean()) / math.sqrt(sum((x - x.min()) ** 2 / len(x)))) global_labels_normal_final = global_labels_normal.dropna(axis=1, how='any') pretrain_data_final = [] for i in range(len(tokens_idx_list)): a_pretrain_data = [tokens_idx_list[i], global_labels_normal_final.iloc[i].values.tolist(), atom_labels_list[i], atom_mask_list[i]] pretrain_data_final.append(a_pretrain_data) global_labels_dim = len(global_labels_normal_final.iloc[1].values.tolist()) return pretrain_data_final, global_labels_dim def load_data_for_augmentation_pretrain(pretrain_data_path='./data/CHEMBL_wash_500_pretrain'): tokens_idx_list = [] global_labels_list = [] atom_labels_list = [] atom_mask_list = [] for i in range(40): pretrain_data = np.load(pretrain_data_path+'_{}.npy'.format(i+1), allow_pickle=True) tokens_idx_list = tokens_idx_list + [x for x in pretrain_data[0]] global_labels_list = global_labels_list + [x for x in pretrain_data[1]] atom_labels_list = atom_labels_list + [x for x in pretrain_data[2]] atom_mask_list = atom_mask_list + [x for x in pretrain_data[3]] print(pretrain_data_path+'_{}.npy'.format(i+1) + ' is loaded') pretrain_data_final = [] for i in range(len(tokens_idx_list)): a_pretrain_data = [tokens_idx_list[i], global_labels_list[i], atom_labels_list[i], atom_mask_list[i]] pretrain_data_final.append(a_pretrain_data) return pretrain_data_final def load_data_for_splited(data_path='example.npy'): data = np.load(data_path, allow_pickle=True) smiles_list = data[0] tokens_idx_list = data[1] labels_list = data[2] mask_list = data[3] group_list = data[4] train_set = [] val_set = [] test_set = [] task_number = len(labels_list[1]) for i, group in enumerate(group_list): molecule = [smiles_list[i], tokens_idx_list[i], labels_list[i], mask_list[i]] if group == 'training': train_set.append(molecule) elif group == 'val': val_set.append(molecule) else: test_set.append(molecule) print('Training set: {}, Validation set: {}, Test set: {}, task number: {}'.format( len(train_set), len(val_set), len(test_set), task_number)) return train_set, val_set, test_set, task_number def load_data_for_random_splited(data_path='example.npy', shuffle=True): data = np.load(data_path, allow_pickle=True) smiles_list = data[0] tokens_idx_list = data[1] labels_list = data[2] mask_list = data[3] group_list = data[4] if shuffle: random.shuffle(group_list) print(group_list) train_set = [] val_set = [] test_set = [] task_number = len(labels_list[1]) for i, group in enumerate(group_list): molecule = [smiles_list[i], tokens_idx_list[i], labels_list[i], mask_list[i]] if group == 'training': train_set.append(molecule) elif group == 'val': val_set.append(molecule) else: test_set.append(molecule) print('Training set: {}, Validation set: {}, Test set: {}, task number: {}'.format( len(train_set), len(val_set), len(test_set), task_number)) return train_set, val_set, test_set, task_number def task_dataset_analyze(data_path='example.npy'): data = np.load(data_path, allow_pickle=True) smiles_list = data[0] tokens_idx_list = data[1] labels_list = data[2] mask_list = data[3] group_list = data[4] train_set_pad = 0 val_set_pad = 0 test_set_pad = 0 train_set_other_atom = 0 val_set_other_atom = 0 test_set_other_atom = 0 train_set_other_token = 0 val_set_other_token = 0 test_set_other_token = 0 train_set = [] val_set = [] test_set = [] task_number = len(labels_list[1]) for i, group in enumerate(group_list): tokens_idx_np = np.array(tokens_idx_list[i]) pad_count = len(np.where(tokens_idx_np == 0)[0]) other_atom_count = len(np.where(tokens_idx_np == 45)[0]) other_token_count = len(np.where(tokens_idx_np == 46)[0]) if group == 'training': train_set.append(tokens_idx_np) train_set_pad = train_set_pad + pad_count train_set_other_atom = train_set_other_atom + other_atom_count train_set_other_token = train_set_other_token + other_token_count elif group == 'val': val_set.append(tokens_idx_np) val_set_pad = val_set_pad + pad_count val_set_other_atom = val_set_other_atom + other_atom_count val_set_other_token = val_set_other_token + other_token_count else: test_set.append(tokens_idx_np) test_set_pad = test_set_pad + pad_count test_set_other_atom = test_set_other_atom + other_atom_count test_set_other_token = test_set_other_token + other_token_count print('Training set, mol count: {}, pad count: {} {}%, other atom count: {}, other token count: {}'.format( len(train_set), train_set_pad, round(train_set_pad/(len(train_set)*201)*100, 2), train_set_other_atom, train_set_other_token)) print('Validation set, mol count: {}, pad count: {} {}%, other atom count: {}, other token count: {}'.format( len(val_set), val_set_pad, round(val_set_pad/(len(val_set)*201)*100, 2), val_set_other_atom, val_set_other_token)) print('Test set, mol count: {}, pad count: {} {}%, other atom count: {}, other token count: {}'.format( len(test_set), test_set_pad, round(test_set_pad/(len(test_set)*201)*100, 2), test_set_other_atom, test_set_other_token)) -------------------------------------------------------------------------------- /data/ADMETlab_data/CYP2C9-sub_canonical.csv: -------------------------------------------------------------------------------- 1 | smiles,group,CYP2C9-sub 2 | COCc1c(C(=O)OC(C)C)ncc2[nH]c3ccc(OCc4ccccc4)cc3c12,training,0 3 | CC(=O)Nc1nnc(S(N)(=O)=O)s1,training,0 4 | CC(C)=O,training,0 5 | CC[C@@H]1[C@@H]2C[C@H]3[C@@H]4N(C)c5ccccc5[C@]45C[C@@H]([C@H]2[C@H]5O)N3[C@@H]1O,training,0 6 | CCC(=O)N(c1ccccc1)C1(COC)CCN(CCn2nnn(CC)c2=O)CC1,training,0 7 | CN(C)CCc1c[nH]c2ccc(CS(=O)(=O)N3CCCC3)cc12,training,0 8 | CCCN(CCC)C(=O)Cc1c(-c2ccc(Cl)cc2)nc2ccc(Cl)cn12,training,0 9 | Cc1nnc2n1-c1ccc(Cl)cc1C(c1ccccc1)=NC2,training,0 10 | Nc1c(Br)cc(Br)cc1CNC1CCC(O)CC1,training,0 11 | CC[C@]1(c2ccc(N)cc2)CCC(=O)NC1=O,training,0 12 | CNn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCN(C)CC3)cc21,training,0 13 | CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O,training,0 14 | O=C1c2ccccc2C(=O)c2ccccc21,training,0 15 | CCC[C@H]1C(=O)N2C(N(C)C)=Nc3ccc(C)cc3N2C1=O,training,0 16 | CN1CCc2cccc3c2[C@@H]1Cc1ccc(O)c(O)c1-3,training,0 17 | C[C@@H](O[C@H]1OCCN(Cc2n[nH]c(=O)[nH]2)[C@H]1c1ccc(F)cc1)c1cc(C(F)(F)F)cc(C(F)(F)F)c1,training,0 18 | CCN(CC)CCCN(c1ccccc1)C1Cc2ccccc2C1,training,0 19 | C[C@H]1CCN(C(=O)[C@H](CCCN=C(N)N)NS(=O)(=O)c2ccc3c(c2)C[C@@H](C)CN3)[C@@H](C(=O)O)C1,training,0 20 | O=C1CCc2ccc(OCCCCN3CCN(c4cccc(Cl)c4Cl)CC3)cc2N1,training,0 21 | C[C@@H]1CC[C@H]2[C@@H](C)C(=O)O[C@@H]3O[C@@]4(C)CC[C@@H]1[C@]32OO4,training,0 22 | C[C@@H]1CC[C@H]2[C@@H](C)[C@@H](OC(=O)CCC(=O)O)O[C@@H]3O[C@@]4(C)CC[C@@H]1[C@@]23OO4,training,0 23 | COC(=O)N[C@H](C(=O)N[C@@H](Cc1ccccc1)[C@H](O)CN(Cc1ccc(-c2ccccn2)cc1)NC(=O)[C@H](NC(=O)OC)C(C)(C)C)C(C)(C)C,training,0 24 | O=C1C(=O)c2ccccc2C(O)=C1C1CCC(c2ccc(Cl)cc2)CC1,training,0 25 | CNS(=O)(=O)Cc1ccc2[nH]cc(CCCN3CCN(c4ncncc4OC)CC3)c2c1,training,0 26 | CC[C@]1(C)C[C@@H](OC(=O)CSc2n[nH]c(N)n2)[C@]2(C)[C@@H](C)CC[C@]3(CCC(=O)[C@H]32)[C@@H](C)[C@@H]1O,training,0 27 | CN1CCC(=C2c3ccccc3CCc3cccnc32)CC1,training,0 28 | CN1CCC[C@H](n2nc(Cc3ccc(Cl)cc3)c3ccccc3c2=O)CC1,training,0 29 | CN1CCN(CCCCN2C(=O)CN(/N=C\c3ccc(-c4ccc(Cl)cc4)o3)C2=O)CC1,training,0 30 | CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(O)C[C@@H](C)CN(C)[C@H](C)[C@@H](O)[C@]1(C)O,training,0 31 | COC(=O)C1=C(C)NC(C)=C(C(=O)O[C@H]2CCN(Cc3ccccc3)C2)[C@H]1c1cccc([N+](=O)[O-])c1,training,0 32 | C[C@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@@]3(Cl)[C@@H](O)C[C@]2(C)[C@@]1(O)C(=O)CO,training,0 33 | COC(=O)C1=C(C)NC(C)=C(C(=O)O[C@@H]2CCCN(Cc3ccccc3)C2)[C@@H]1c1cccc([N+](=O)[O-])c1,training,0 34 | C[C@@H](Cc1ccccc1)N(C)Cc1ccccc1,training,0 35 | CN(C)CCCOc1nn(Cc2ccccc2)c2ccccc12,training,0 36 | CC(C)COC[C@@H](CN(Cc1ccccc1)c1ccccc1)N1CCCC1,training,0 37 | C[C@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@@]3(F)[C@@H](O)C[C@]2(C)[C@@]1(O)C(=O)CO,training,0 38 | CC(C)NC[C@H](O)COc1ccc(CCOCC2CC2)cc1,training,0 39 | CC(C)(Oc1ccc(CCNC(=O)c2ccc(Cl)cc2)cc1)C(=O)O,training,0 40 | c1ccc(-c2ccc([C@@H](c3ccccc3)n3ccnc3)cc2)cc1,training,0 41 | O[C@@](CCN1CCCCC1)(c1ccccc1)[C@@H]1C[C@@H]2C=C[C@H]1C2,training,0 42 | CCc1oc2ccccc2c1C(=O)c1cc(Br)c(O)c(Br)c1,training,0 43 | C[C@]12C=CC(=O)C=C1CC[C@@H]1[C@@H]2CC[C@]2(C)[C@@H](O)CC[C@@H]12,training,0 44 | CCN[C@H]1CN(CCCOC)S(=O)(=O)c2sc(S(N)(=O)=O)cc21,training,0 45 | COc1cc(Br)c2oc(C3CCNCC3)cc2c1,training,0 46 | O=C1CN=C(c2ccccn2)c2cc(Br)ccc2N1,training,0 47 | CC(C)C[C@H]1C(=O)N2CCC[C@H]2[C@]2(O)O[C@](NC(=O)[C@@H]3C=C4c5cccc6[nH]c(Br)c(c56)C[C@H]4N(C)C3)(C(C)C)C(=O)N12,training,0 48 | O=C(CCCN1CCC(O)(c2ccc(Br)cc2)CC1)c1ccc(F)cc1,training,0 49 | Nc1nc(=O)c(Br)c(-c2ccccc2)[nH]1,training,0 50 | Cc1nnc2n1-c1sc(Br)cc1C(c1ccccc1Cl)=NC2,training,0 51 | CC(C)(C)N1CCC(c2ccccc2)(c2ccccc2)CC1,training,0 52 | CO[C@]12CC[C@@]3(C[C@@H]1[C@](C)(O)C(C)(C)C)[C@H]1Cc4ccc(O)c5c4[C@@]3(CCN1CC1CC1)[C@@H]2O5,training,0 53 | O=C1CC2(CCCC2)CC(=O)N1CCCCN1CCN(c2ncccn2)CC1,training,0 54 | CS(=O)(=O)OCCCCOS(C)(=O)=O,training,0 55 | CC(C)c1nc(COC(N)=O)n(Cc2ccncc2)c1Sc1cc(Cl)cc(Cl)c1,training,0 56 | CCOC(=O)n1ccn(C)c1=S,training,0 57 | CCC[C@@](C)(COC(N)=O)COC(=O)NC(C)C,training,0 58 | CC(C)(C)NC[C@H](O)COc1cccc2c1CCC(=O)N2,training,0 59 | COCc1c(C(C)C)nc(C(C)C)c(/C=C\[C@@H](O)C[C@@H](O)CC(=O)O)c1-c1ccc(F)cc1,training,0 60 | O=C(O)COCCN1CCN([C@H](c2ccccc2)c2ccc(Cl)cc2)CC1,training,0 61 | C[C@H]1O[C@]2(CS1)CN1CCC2CC1,training,0 62 | CCN(CC)CCC[C@H](C)Nc1ccnc2cc(Cl)ccc12,training,0 63 | CN(C)CC[C@H](c1ccc(Cl)cc1)c1ccccn1,training,0 64 | OC(O)C(Cl)(Cl)Cl,training,0 65 | C=C1CC[C@H](O)C/C1=C/C=C1\CCC[C@@]2(C)[C@H]1CC[C@@H]2[C@H](C)CCCC(C)C,training,0 66 | COCCOC(=O)C1=C(C)NC(C)=C(C(=O)OC/C=C/c2ccccc2)[C@H]1c1cccc([N+](=O)[O-])c1,training,0 67 | O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc2c1=O,training,0 68 | CC(C)(Oc1ccc([C@@H]2CC2(Cl)Cl)cc1)C(=O)O,training,0 69 | CNC1=Nc2ccc(Cl)cc2C(c2ccccc2)=[N+]([O-])C1,training,0 70 | CCC[C@@H]1C[C@H](C(=O)N[C@H]([C@H](C)Cl)[C@H]2O[C@H](SC)[C@H](O)[C@@H](O)[C@H]2O)N(C)C1,training,0 71 | CC(C)/N=c1/cc2n(-c3ccc(Cl)cc3)c3ccccc3nc-2cc1Nc1ccc(Cl)cc1,training,0 72 | Cc1ncsc1CCCl,training,0 73 | OCCN1CCN(CC/C=C2/c3ccccc3Sc3ccc(Cl)cc32)CC1,training,0 74 | Clc1ccccc1C(c1ccccc1)(c1ccccc1)n1ccnc1,training,0 75 | COC(=O)[C@H]1[C@@H](OC(=O)c2ccccc2)C[C@@H]2CC[C@H]1N2C,training,0 76 | COc1ccc2c3c1O[C@H]1[C@@H](O)C=C[C@H]4[C@@H](C2)N(C)CC[C@]314,training,0 77 | COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](NC(C)=O)CC2,training,0 78 | C[C@]12CC(=O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@]2(O)C(=O)CO,training,0 79 | CN1C(=O)CC[C@H]1c1cccnc1,training,0 80 | CN(C)CCC=C1c2ccccc2C=Cc2ccccc21,training,0 81 | CC(=O)[C@@]1(O)CC[C@H]2[C@@H]3C=C(Cl)C4=CC(=O)[C@@H]5C[C@@H]5[C@]4(C)[C@H]3CC[C@@]21C,training,0 82 | CCN(CC)CCS(=O)(=O)[C@@H]1CCN2C(=O)c3coc(n3)CC(=O)C[C@H](O)C=C(C)C=CCNC(=O)C=C[C@@H](C)[C@@H](C(C)C)OC(=O)[C@@H]12,training,0 83 | COc1cccc2c1C(=O)c1c(O)c3c(c(O)c1C2=O)C[C@@](O)(C(C)=O)C[C@@H]3O[C@H]1C[C@H](N)[C@H](O)[C@H](C)O1,training,0 84 | O=C(O)c1ccc(OCCn2ccnc2)cc1,training,0 85 | CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N(CC(=O)O)C1Cc2ccccc2C1,training,0 86 | CNCCCN1c2ccccc2CCc2ccccc21,training,0 87 | C[C@@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@@]3(F)[C@@H](O)C[C@]2(C)[C@@]1(O)C(=O)CO,training,0 88 | Cc1cccc([C@H](C)c2c[nH]cn2)c1C,training,0 89 | ClC1=C(Cl)[C@]2(Cl)[C@@H]3[C@@H]4C[C@H]([C@@H]3[C@@]1(Cl)C2(Cl)Cl)[C@H]1O[C@@H]41,training,0 90 | CC/C(=C(\CC)c1ccc(O)cc1)c1ccc(O)cc1,training,0 91 | C[C@H]1O[C@@H](O[C@H]2[C@@H](O)C[C@H](O[C@H]3[C@@H](O)C[C@H](O[C@H]4CC[C@]5(C)[C@H]6CC[C@]7(C)[C@@H](C8=CC(=O)OC8)CC[C@]7(O)[C@@H]6CC[C@@H]5C4)O[C@@H]3C)O[C@@H]2C)C[C@H](O)[C@@H]1O,training,0 92 | C[C@H]1O[C@@H](O[C@H]2[C@@H](O)C[C@H](O[C@H]3[C@@H](O)C[C@H](O[C@H]4CC[C@]5(C)[C@H]6C[C@@H](O)[C@]7(C)[C@@H](C8=CC(=O)OC8)CC[C@]7(O)[C@@H]6CC[C@@H]5C4)O[C@@H]3C)O[C@@H]2C)C[C@H](O)[C@@H]1O,training,0 93 | NNc1nnc(NN)c2ccccc12,training,0 94 | COc1ccc(-c2cc(=O)c3c(O)cc(O[C@@H]4O[C@H](CO[C@@H]5O[C@@H](C)[C@H](O)[C@@H](O)[C@H]5O)[C@@H](O)[C@H](O)[C@H]4O)cc3o2)cc1O,training,0 95 | CCC(C)(C)NC[C@H](O)COc1ccccc1C(=O)CCc1ccccc1,training,0 96 | CCN(CC)C(=S)SSC(=S)N(CC)CC,training,0 97 | CS(C)=O,training,0 98 | CN(CCOc1ccc(NS(C)(=O)=O)cc1)CCc1ccc(NS(C)(=O)=O)cc1,training,0 99 | O=c1[nH]c2ccccc2n1CCCN1CCC(n2c(=O)[nH]c3cc(Cl)ccc32)CC1,training,0 100 | COc1cc2c(cc1OC)C(=O)[C@H](CC1CCN(Cc3ccccc3)CC1)C2,training,0 101 | COc1cccc2c1C(=O)c1c(O)c3c(c(O)c1C2=O)C[C@@](O)(C(=O)CO)C[C@@H]3O[C@H]1C[C@H](N)[C@H](O)[C@H](C)O1,training,0 102 | C[C@H]1c2cccc(O)c2C(=O)C2=C(O)[C@]3(O)C(=O)C(C(N)=O)=C(O)[C@@H](N(C)C)[C@@H]3[C@@H](O)[C@@H]21,training,0 103 | C[C@]12CC[C@H]3[C@@H](CC[C@H]4NC(=O)C=C[C@]34C)[C@@H]1CC[C@@H]2C(=O)Nc1cc(C(F)(F)F)ccc1C(F)(F)F,training,0 104 | CC(C)(C)c1ccc(C(=O)CCCN2CCC(OC(c3ccccc3)c3ccccc3)CC2)cc1,training,0 105 | NC(N)=Nc1nc(CSCC/N=C/NS(=O)(=O)c2ccc(Br)cc2)cs1,training,0 106 | CNC(=O)c1cccc(NCC(=O)NCCc2ccc(OC)c(OC)c2)c1,training,0 107 | Clc1ccc(CO[C@@H](Cn2ccnc2)c2ccc(Cl)cc2Cl)cc1,training,0 108 | CC1=C(C(=O)OCCN(Cc2ccccc2)c2ccccc2)[C@H](c2cccc([N+](=O)[O-])c2)C(P2(=O)OCC(C)(C)CO2)=C(C)N1,training,0 109 | CCOCn1c(Cc2ccccc2)c(C(C)C)c(=O)[nH]c1=O,training,0 110 | COc1ccc(C(=O)Nc2ccccc2CC[C@H]2CCCCN2C)cc1,training,0 111 | CCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCNCC3)nc21,training,0 112 | NC1=NC[C@@H]2c3ccccc3Cc3ccccc3N12,training,0 113 | COC(=O)[C@@H]1CC2=CC(=O)CC[C@]2(C)[C@@]23O[C@@H]2C[C@@]2(C)[C@@H](CC[C@@]24CCC(=O)O4)[C@H]13,training,0 114 | CCCCc1ncc(/C=C(/Cc2cccs2)C(=O)O)n1Cc1ccc(C(=O)O)cc1,training,0 115 | C[C@H](CO)NC(=O)[C@@H]1C=C2c3cccc4[nH]cc(c34)C[C@H]2N(C)C1,training,0 116 | CN1C[C@H](C(=O)N[C@]2(C)O[C@@]3(O)[C@@H]4CCCN4C(=O)[C@H](Cc4ccccc4)N3C2=O)C=C2c3cccc4[nH]cc(c34)C[C@H]21,training,0 117 | CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(O)C[C@@H](C)C(=O)[C@H](C)[C@@H](O)[C@]1(C)O,training,0 118 | CC[C@]1(C)CC(=O)NC1=O,training,0 119 | CCN1C(=O)N[C@H](c2ccccc2)C1=O,training,0 120 | CCOc1ccc2c3c1O[C@H]1[C@@H](O)C=C[C@H]4[C@@H](C2)N(C)CC[C@]314,training,0 121 | CCOC(=O)c1cncn1[C@H](C)c1ccccc1,training,0 122 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@@H]4[C@H]3C(=C)C[C@@]21CC,training,0 123 | COc1cc([C@@H]2c3cc4c(cc3[C@@H](O[C@@H]3O[C@@H]5CO[C@@H](C)O[C@H]5[C@H](O)[C@H]3O)[C@H]3COC(=O)[C@H]23)OCO4)cc(OC)c1O,training,0 124 | COc1ccc(C(C)C)cc1CN[C@H]1C2CCN(CC2)[C@H]1C(c1ccccc1)c1ccccc1,training,0 125 | NC(N)=Nc1nc(CSCC/C(N)=N\S(N)(=O)=O)cs1,training,0 126 | CC(C)OC(=O)C(C)(C)Oc1ccc(C(=O)c2ccc(Cl)cc2)cc1,training,0 127 | CC(C)(C(=O)O)c1ccc([C@@H](O)CCCN2CCC(C(O)(c3ccccc3)c3ccccc3)CC2)cc1,training,0 128 | CC(C)(C)NC(=O)[C@H]1CC[C@H]2[C@@H]3CC[C@H]4NC(=O)C=C[C@]4(C)[C@H]3CC[C@]12C,training,0 129 | O=C(NC[C@H]1CCCCN1)c1cc(OCC(F)(F)F)ccc1OCC(F)(F)F,training,0 130 | Cn1cc(S(C)=O)c(=O)c2ccc(F)cc21,training,0 131 | Fc1cccc2c1O[C@H]1CNC[C@@H]1O2,training,0 132 | CN1CCN(C2=Nc3cc(F)ccc3Cc3ccccc32)CC1,training,0 133 | OCCN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1,training,0 134 | O=c1[nH]cc(F)c(=O)[nH]1,training,0 135 | CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(O)C[C@](C)(F)C(=O)[C@H](C)[C@@H](O)[C@]1(C)O,training,0 136 | CCC(=O)O[C@]1(C(=O)SCF)[C@H](C)C[C@H]2[C@@H]3C[C@H](F)C4=CC(=O)C=C[C@]4(C)[C@@]3(F)[C@@H](O)C[C@@]21C,training,0 137 | CN[C@@H]1CCc2[nH]c3ccc(C(N)=O)cc3c2C1,training,0 138 | Cc1nc2c([nH]1)c(=O)n(C)c(=O)n2Cc1ccco1,training,0 139 | COc1ccc2c3c1O[C@H]1C[C@@H](O)C=C[C@@]31CCN(C)C2,training,0 140 | COc1ccc(CCN(C)CCC[C@@](C#N)(c2cc(OC)c(OC)c(OC)c2)C(C)C)cc1OC,training,0 141 | CC(=O)[C@H]1CC[C@H]2[C@@H]3CC[C@H]4C[C@](C)(O)CC[C@]4(C)[C@H]3CC[C@]12C,training,0 142 | COc1c(N2CCN[C@@H](C)C2)c(F)cc2c(=O)c(C(=O)O)cn(C3CC3)c12,training,0 143 | CC1(C)CC(=O)N(CCCCN2CCN(c3ncccn3)CC2)C(=O)C1,training,0 144 | CN1[C@H]2CCC[C@@H]1CC(NC(=O)c1nn(C)c3ccccc13)C2,training,0 145 | Cc1c(F)c(N2CCN[C@H](C)C2)cc2c1c(=O)c(C(=O)O)cn2C1CC1,training,0 146 | COC1=CC(=O)C[C@@H](C)[C@]12Oc1c(Cl)c(OC)cc(OC)c1C2=O,training,0 147 | NNc1nncc2ccccc12,training,0 148 | CN1CCCN([C@H](c2ccccc2)c2ccc(Cl)cc2)CC1,training,0 149 | CCN(CCO)CCC[C@H](C)Nc1ccnc2cc(Cl)ccc12,training,0 150 | CC[C@H]1CN2CC[C@H]1C[C@@H]2[C@@H](O)c1ccnc2ccc(OC)cc12,training,0 151 | COc1ccc2c3c1O[C@H]1C(=O)CC[C@H]4[C@@H](C2)N(C)CC[C@]314,training,0 152 | OCCOCCN1CCN([C@H](c2ccccc2)c2ccc(Cl)cc2)CC1,training,0 153 | COc1cc(C(C)=O)ccc1OCCCN1CCC(c2noc3cc(F)ccc23)CC1,training,0 154 | O=C(NC1CCN(CCc2c[nH]c3ccccc23)CC1)c1ccccc1,training,0 155 | CCc1c2c(nc3ccc(OC(=O)N4CCC(N5CCCCC5)CC4)cc13)-c1cc3c(c(=O)n1C2)COC(=O)[C@]3(O)CC,training,0 156 | O=C(O)CCCC/C=C(\c1ccccc1)c1cccnc1,training,0 157 | Clc1ccc([C@@H](Cn2ccnc2)OCc2c(Cl)cccc2Cl)c(Cl)c1,training,0 158 | FC(F)O[C@@H](Cl)C(F)(F)F,training,0 159 | NNC(=O)c1ccncc1,training,0 160 | O=[N+]([O-])O[C@H]1CO[C@H]2[C@@H]1OC[C@H]2O[N+](=O)[O-],training,0 161 | COC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)C)[C@H]1c1cccc2nonc12,training,0 162 | CO[C@@H]1[C@@H](O[C@@H]2O[C@H](C)[C@@H](O[C@H]3C[C@@](C)(O)[C@@H](OC(=O)CC(C)C)[C@H](C)O3)[C@H](N(C)C)[C@H]2O)[C@@H](CC=O)C[C@@H](C)[C@@H](O)C=CC=CC[C@@H](C)OC(=O)C[C@H]1OC(C)=O,training,0 163 | CC(=O)N1CCN(c2ccc(OC[C@H]3CO[C@](Cn4ccnc4)(c4ccc(Cl)cc4Cl)O3)cc2)CC1,training,0 164 | C[C@@H](C(=O)O)c1cccc(C(=O)c2ccccc2)c1,training,0 165 | C[C@@H](CCc1ccccc1)NC[C@H](O)c1ccc(O)c(C(N)=O)c1,training,0 166 | COC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)(C)CN(C)CCC(c2ccccc2)c2ccccc2)[C@H]1c1cccc([N+](=O)[O-])c1,training,0 167 | N#Cc1ccc(C(c2ccc(C#N)cc2)n2cncn2)cc1,training,0 168 | CN(C)c1ccc([C@H]2C[C@@]3(C)[C@@H](CC[C@@]3(O)/C=C/CO)[C@@H]3CCC4=CC(=O)CCC4=C32)cc1,training,0 169 | CC(=O)NC[C@H]1CN(c2ccc(N3CCOCC3)c(F)c2)C(=O)O1,training,0 170 | N[C@@H](Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1)C(=O)O,training,0 171 | CCN(CC)C(=O)N[C@H]1C=C2c3cccc4[nH]cc(c34)C[C@H]2N(C)C1,training,0 172 | c1ccc2cc(COC3CCNCC3)ccc2c1,training,0 173 | CN1[C@H](C[C@H](O)c2ccccc2)CCC[C@@H]1CC(=O)c1ccccc1,training,0 174 | CCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCN[C@H](C)C3)c(F)c21,training,0 175 | O=NN(CCCl)C(=O)NC1CCCCC1,training,0 176 | Cc1cccc(C)c1OCC(=O)N[C@@H](Cc1ccccc1)[C@@H](O)C[C@H](Cc1ccccc1)NC(=O)[C@H](C(C)C)N1CCCNC1=O,training,0 177 | CCCCN(CCCC)C[C@@H](O)c1cc(Cl)cc2c1-c1ccc(Cl)cc1/C2=C\c1ccc(Cl)cc1,training,0 178 | CCOC(=O)C[C@@H](SP(=S)(OC)OC)C(=O)OCC,training,0 179 | COC(=O)C1=C(C)NC(C)=C(C(=O)OCCN2CCN(C(c3ccccc3)c3ccccc3)CC2)[C@H]1c1cccc([N+](=O)[O-])c1,training,0 180 | CNCCCC12CCC(c3ccccc31)c1ccccc12,training,0 181 | COC(=O)Nc1nc2cc(C(=O)c3ccccc3)ccc2[nH]1,training,0 182 | CN1CCN=C(c2ccccc2)c2cc(Cl)ccc21,training,0 183 | CN(C)CC(Oc1ccccc1)Oc1ccccc1,training,0 184 | O[C@@H](c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)[C@H]1CCCCN1,training,0 185 | CCOC(=O)C1(c2ccccc2)CCN(C)CC1,training,0 186 | COC(=O)[C@H](c1ccccc1)[C@H]1CCCCN1,training,0 187 | CC[C@]1(c2ccccc2)C(=O)NC(=O)N(C)C1=O,training,0 188 | C[C@H]1C[C@H]2[C@@H]3CC[C@](O)(C(=O)CO)[C@@]3(C)C[C@H](O)[C@@H]2[C@@]2(C)C=CC(=O)C=C12,training,0 189 | c1ccc2c(c1)Sc1ccccc1N2C[C@H]1CN2CCC1CC2,training,0 190 | Cc1ccccc1-n1c(C)nc2ccccc2c1=O,training,0 191 | CNC(=O)O/N=C(\C)SC,training,0 192 | COc1c2occc2cc2ccc(=O)oc12,training,0 193 | CC(C)(C(=O)c1cccnc1)c1cccnc1,training,0 194 | CC[C@H](C)C(=O)O[C@H]1CCC=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@H]21,training,0 195 | C[C@@H]1CO[C@]2(c3ccccc3Cl)c3cc(Cl)ccc3NC(=O)CN12,training,0 196 | COCC(=O)O[C@]1(CCN(C)CCCc2nc3ccccc3[nH]2)CCc2cc(F)ccc2[C@@H]1C(C)C,training,0 197 | Clc1ccc(CO[C@@H](Cn2ccnc2)c2ccc(Cl)cc2Cl)c(Cl)c1,training,0 198 | CO/N=C\C1=CCCN(C)C1,training,0 199 | Cc1cc(-c2ccccc2)nnc1NCCN1CCOCC1,training,0 200 | Nc1cc(N2CCCCC2)nc(N)[n+]1[O-],training,0 201 | CN(c1nccc(=O)[nH]1)C1CCN(c2nc3ccccc3n2Cc2ccc(F)cc2)CC1,training,0 202 | CCc1c(C)[nH]c2c1C(=O)[C@@H](CN1CCOCC1)CC2,training,0 203 | CCOC(=O)Nc1ccc2c(c1)N(C(=O)CCN1CCOCC1)c1ccccc1S2,training,0 204 | CCOc1cc(N)c(Cl)cc1C(=O)NC[C@@H]1CN(Cc2ccc(F)cc2)CCO1,training,0 205 | CCOc1ccc2ccccc2c1C(=O)N[C@@H]1C(=O)N2[C@@H](C(=O)O)C(C)(C)S[C@H]12,training,0 206 | CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21,training,0 207 | Cc1cccc(C)c1NC(=O)CN1CCCC1=O,training,0 208 | O=[N+]([O-])c1cncn1CCN1CCOCC1,training,0 209 | O=[N+]([O-])OCC(CO[N+](=O)[O-])O[N+](=O)[O-],training,0 210 | Fc1ccc(Cn2c(NC3CCNCC3)nc3ccccc32)cc1,training,0 211 | COc1ccc2c3c1O[C@H]1[C@@H](O)C=C[C@H]4[C@@H](C2)NCC[C@]314,training,0 212 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@@H]4[C@H]3CC[C@@]21C,training,0 213 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@@H]4[C@H]3CC[C@@]21CC,training,0 214 | CNCCC=C1c2ccccc2CCc2ccccc21,training,0 215 | Cc1cc2c(s1)Nc1ccccc1N=C2N1CCN(C)CC1,training,0 216 | CN(C)CC/C=C1\c2ccccc2COc2ccc(CC(=O)O)cc21,training,0 217 | Cc1c(-c2cnccn2)ssc1=S,training,0 218 | CN(C)c1ccc([C@H]2C[C@@]3(C)[C@@H](CC[C@]3(O)CCCO)[C@@H]3CCC4=CC(=O)CCC4=C32)cc1,training,0 219 | OCCN1CCN(CCCN2c3ccccc3C=Cc3ccccc32)CC1,training,0 220 | NC(=O)N1c2ccccc2CC(=O)c2ccccc21,training,0 221 | Clc1ccc(CO/N=C(\Cn2ccnc2)c2ccc(Cl)cc2Cl)c(Cl)c1,training,0 222 | CCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)[C@@H]1c1cccc2c1OCO2,training,0 223 | COc1ccc(Cc2nccc3cc(OC)c(OC)cc23)cc1OC,training,0 224 | CCC(=O)NS(=O)(=O)c1ccc(-c2c(-c3ccccc3)noc2C)cc1,training,0 225 | Fc1ccc([C@@H]2CCNC[C@H]2COc2ccc3c(c2)OCO3)cc1,training,0 226 | N=C(N)c1ccc(OCCCCCOc2ccc(C(=N)N)cc2)cc1,training,0 227 | CC(C)=CCN1CC[C@]2(C)c3cc(O)ccc3C[C@H]1[C@H]2C,training,0 228 | CCC[C@@H](C)C1(CC)C(=O)NC(=O)NC1=O,training,0 229 | CCCN1C[C@H](CSC)C[C@@H]2c3cccc4[nH]cc(c34)C[C@H]21,training,0 230 | O=C1[C@H]2CCCC[C@H]2C(=O)N1CCCCN1CCN(c2nsc3ccccc23)CC1,training,0 231 | c1ccc(C2(N3CCCCC3)CCCCC2)cc1,training,0 232 | CN1C(=O)C[C@@H](c2ccccc2)C1=O,training,0 233 | CC[C@@H]1C(=O)OC[C@@H]1Cc1cncn1C,training,0 234 | Cc1cccc(C)c1NC(=O)CC12CCCN1CCC2,training,0 235 | COc1ccc(-c2nc3cc(C4=NNC(=O)C[C@H]4C)ccc3[nH]2)cc1,training,0 236 | O=c1[nH]c2ccccc2n1C1CCN(CCCC(c2ccc(F)cc2)c2ccc(F)cc2)CC1,training,0 237 | C[C@H](N/C(=N\C#N)Nc1ccncc1)C(C)(C)C,training,0 238 | CCn1cc(C(=O)O)c(=O)c2cnc(N3CCNCC3)nc21,training,0 239 | C#CC[C@]12CCC(=O)C=C1CC[C@H]1[C@@H]3CCC(=O)[C@@]3(C)CC[C@@H]12,training,0 240 | CCCN[C@H]1CCc2nc(N)sc2C1,training,0 241 | COC(=O)C1=C(C)NC(C)=C(C(=O)OC/C=C/c2ccccc2)[C@H]1c1cccc([N+](=O)[O-])c1,training,0 242 | C[C@]12CC[C@H]3[C@@H](CC=C4C[C@@H](O)CC[C@@]43C)[C@@H]1CCC2=O,training,0 243 | C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)C=C[C@@]43C)[C@@H]1CC[C@]2(O)C(=O)CO,training,0 244 | CC(=O)[C@H]1CC[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@]12C,training,0 245 | CCC1(c2ccccc2)C(=O)NCNC1=O,training,0 246 | CCCO,training,0 247 | CN(C)C(=O)OC1=CC=[C-][N+](C)(Br)C1,training,0 248 | COc1ccc(CN(CCN(C)C)c2ccccn2)cc1,training,0 249 | CCCN1CCC[C@@H]2Cc3nc(N)ncc3C[C@H]21,training,0 250 | C=C[C@H]1CN2CC[C@H]1C[C@@H]2[C@H](O)c1ccnc2ccc(OC)cc12,training,0 251 | COc1ccccc1OC[C@@H](O)CN1CCN(CC(=O)Nc2c(C)cccc2C)CC1,training,0 252 | O=C(N[C@H](Cc1cc(=O)[nH]c2ccccc12)C(=O)O)c1ccc(Cl)cc1,training,0 253 | CCOc1ccccc1O[C@H](c1ccccc1)[C@H]1CNCCO1,training,0 254 | C[C@@](Cc1ccccc1)(NC(=O)CN)c1ccccc1,training,0 255 | COC(=O)[C@H]1[C@H]2C[C@@H]3c4[nH]c5cc(OC)ccc5c4CCN3C[C@H]2C[C@@H](OC(=O)c2cc(OC)c(OC)c(OC)c2)[C@@H]1OC,training,0 256 | CO[C@H]1C=CO[C@@]2(C)Oc3c(C)c(O)c4c(c3C2=O)C2=NC3(CCN(CC(C)C)CC3)NC2=C(NC(=O)C(C)=CC=C[C@H](C)[C@H](O)[C@@H](C)[C@@H](O)[C@@H](C)[C@H](OC(C)=O)[C@@H]1C)C4=O,training,0 257 | CO[C@H]1C=CO[C@@]2(C)Oc3c(C)c(O)c4c(O)c(cc(O)c4c3C2=O)NC(=O)C(C)=CC=C[C@H](C)[C@H](O)[C@@H](C)[C@@H](O)[C@@H](C)[C@H](OC(C)=O)[C@H]1C,training,0 258 | CO[C@H]1C=CO[C@@]2(C)Oc3c(C)c(O)c4c(O)c(c(/C=N\N5CCN(C6CCCC6)CC5)c(O)c4c3C2=O)NC(=O)C(C)=CC=C[C@H](C)[C@H](O)[C@@H](C)[C@@H](O)[C@@H](C)[C@H](OC(C)=O)[C@H]1C,training,0 259 | Nc1nc2ccc(OC(F)(F)F)cc2s1,training,0 260 | Cc1nc2n(c(=O)c1CCN1CCC(c3noc4cc(F)ccc34)CC1)CCCC2,training,0 261 | CC[C@@]1(c2ccncc2)CCC(=O)NC1=O,training,0 262 | CCCC(=O)O[C@H]1[C@H](C)O[C@@H](O[C@@H]2[C@@H](C)O[C@@H](O[C@@H]3[C@@H](OC)[C@H](O)CC(=O)O[C@H](C)CC=CC=C[C@H](O)[C@H](C)C[C@@H]3CC=O)[C@H](O)[C@H]2N(C)C)C[C@@]1(C)OC(=O)CC,training,0 263 | CCCN(CCC)CCc1cccc2c1CC(=O)N2,training,0 264 | CCCN1CCCC[C@H]1C(=O)Nc1c(C)cccc1C,training,0 265 | O=C(CO)NCCCOc1cccc(CN2CCCCC2)c1,training,0 266 | OCc1cc([C@H](O)CNCCCCCCOCCCCc2ccccc2)ccc1O,training,0 267 | C=CCC1([C@@H](C)CCC)C(=O)NC(=O)NC1=O,training,0 268 | O=C1NCCN1CCN1CCC(c2cn(-c3ccc(F)cc3)c3ccc(Cl)cc23)CC1,training,0 269 | FCOC(C(F)(F)F)C(F)(F)F,training,0 270 | C1CCN2C[C@H]3C[C@H](CN4CCCC[C@H]34)[C@@H]2C1,training,0 271 | CC(=O)S[C@@H]1CC2=CC(=O)CC[C@]2(C)[C@H]2CC[C@@]3(C)[C@@H](CC[C@@]34CCC(=O)O4)[C@@H]21,training,0 272 | CC(C)(C)[C@@H](O)/C=C\c1ccc2c(c1)OCO2,training,0 273 | CCC(=O)N(c1ccccc1)C1(COC)CCN(CCc2cccs2)CC1,training,0 274 | Clc1ccc(CS[C@@H](Cn2ccnc2)c2ccc(Cl)cc2Cl)cc1,training,0 275 | Cc1onc(NS(=O)(=O)c2ccc(N)cc2)c1C,training,0 276 | O=C(O)c1cc(/N=N\c2ccc(S(=O)(=O)Nc3ccccn3)cc2)ccc1O,training,0 277 | COc1cc(OC)nc(NS(=O)(=O)c2ccc(N)cc2)n1,training,0 278 | COc1ncnc(NS(=O)(=O)c2ccc(N)cc2)c1OC,training,0 279 | Cc1ccnc(NS(=O)(=O)c2ccc(N)cc2)n1,training,0 280 | Nc1ccc(S(=O)(=O)Nc2ccccn2)cc1,training,0 281 | CC1=C(CC(=O)O)c2cc(F)ccc2/C1=C\c1ccc(S(C)=O)cc1,training,0 282 | CCN1CCC[C@@H]1CNC(=O)c1cc(S(N)(=O)=O)ccc1OC,training,0 283 | Nc1c2c(nc3ccccc13)CCCC2,training,0 284 | C=CC[C@@H]1C=C(C)C[C@H](C)C[C@H](OC)[C@@H]2O[C@](O)(C(=O)C(=O)N3CCCC[C@H]3C(=O)O[C@H](/C(C)=C/[C@@H]3CC[C@@H](O)[C@H](OC)C3)[C@H](C)[C@@H](O)CC1=O)[C@H](C)C[C@@H]2OC,training,0 285 | Cc1nc(C)c2c(n1)N(Cc1ccc(-c3ccccc3-c3nnn[nH]3)cc1)C(=O)CC2,training,0 286 | CCCCOC(=O)C(=O)Nc1cccc(-c2nnn[nH]2)c1,training,0 287 | CCCc1nc2c(C)cc(-c3nc4ccccc4n3C)cc2n1Cc1ccc(-c2ccccc2C(=O)O)cc1,training,0 288 | CC(C)(C)c1ccc([C@H](O)CCCN2CCC(C(O)(c3ccccc3)c3ccccc3)CC2)cc1,training,0 289 | CCN(CC)C(=O)N[C@H]1C[C@@H]2c3cccc4[nH]cc(c34)C[C@H]2N(C)C1,training,0 290 | C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)C=C[C@@]43C)[C@@H]1CCC(=O)O2,training,0 291 | CN(C)[C@@H]1C(O)=C(C(N)=O)C(=O)[C@@]2(O)C(O)=C3C(=O)c4c(O)cccc4[C@@](C)(O)[C@H]3C[C@@H]12,training,0 292 | CN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1,training,0 293 | c1ccc2[nH]c(-c3cscn3)nc2c1,training,0 294 | S=P(N1CC1)(N1CC1)N1CC1,training,0 295 | Cc1ccsc1C(=CCCN1CCC[C@@H](C(=O)O)C1)c1sccc1C,training,0 296 | CC(C)(C)NC[C@H](O)COc1nsnc1N1CCOCC1,training,0 297 | Clc1ccc([C@@H](Cn2ccnc2)OCc2ccsc2Cl)c(Cl)c1,training,0 298 | CCC[C@@]1(CCc2ccccc2)CC(=O)C([C@H](CC)c2cccc(NS(=O)(=O)c3ccc(C(F)(F)F)cn3)c2)=C(O)O1,training,0 299 | Cc1ccc(C(=O)c2cc(O)c(O)c([N+](=O)[O-])c2)cc1,training,0 300 | Cc1ccc(C(=O)[C@H](C)CN2CCCCC2)cc1,training,0 301 | CC1(C)O[C@@H]2CO[C@@]3(COS(N)(=O)=O)OC(C)(C)O[C@H]3[C@@H]2O1,training,0 302 | CCN(CC)c1cc(C)nc2ncnn12,training,0 303 | O=c1n(CCCN2CCN(c3cccc(Cl)c3)CC2)nc2ccccn12,training,0 304 | ClC=C(Cl)Cl,training,0 305 | O=C(CCCN1CCC(O)(c2cccc(C(F)(F)F)c2)CC1)c1ccc(F)cc1,training,0 306 | COc1cc(NCc2ccc3nc(N)nc(N)c3c2C)cc(OC)c1OC,training,0 307 | C[C@H](CN(C)C)CN1c2ccccc2CCc2ccccc21,training,0 308 | COc1ccc(CN2CCNCC2)c(OC)c1OC,training,0 309 | CN(C)CCN(Cc1ccccc1)c1ccccn1,training,0 310 | CO[C@H]1C[C@H](O[C@@H]2[C@@H](C)C(=O)O[C@H](C)[C@H](C)[C@H](OC(C)=O)[C@@H](C)C(=O)[C@@]3(CO3)C[C@H](C)[C@H](O[C@@H]3O[C@H](C)C[C@H](N(C)C)[C@H]3OC(C)=O)[C@H]2C)O[C@@H](C)[C@@H]1OC(C)=O,training,0 311 | O=C(OC1C[C-]2CC[C-](C1)[N+2]21(Cl)[C-2]CC[C-2]1)C(O)(c1ccccc1)c1ccccc1,training,0 312 | Fc1ccc(C(OCCN2CCN(CCCc3ccccc3)CC2)c2ccc(F)cc2)cc1,training,0 313 | COc1ccc(C(=O)N2CCN(c3ccc4c(c3)CCC(=O)N4)CC2)cc1OC,training,0 314 | CC[C@]1(O)C[C@H]2CN(CCc3c([nH]c4ccccc34)[C@@](C(=O)OC)(c3cc4c(cc3OC)N(C)[C@H]3[C@@](O)(C(=O)OC)[C@H](OC(C)=O)[C@]5(CC)C=CCN6CC[C@]43[C@@H]65)C2)C1,training,0 315 | CC[C@]1(O)C[C@H]2CN(CCc3c([nH]c4ccccc34)[C@@](C(=O)OC)(c3cc4c(cc3OC)N(C)[C@H]3[C@@](O)(C(N)=O)[C@H](O)[C@]5(CC)C=CCN6CC[C@]43[C@@H]65)C2)C1,training,0 316 | CCC1=C[C@@H]2CN(C1)Cc1c([nH]c3ccccc13)[C@@](C(=O)OC)(c1cc3c(cc1OC)N(C)[C@H]1[C@@](O)(C(=O)OC)[C@H](OC(C)=O)[C@]4(CC)C=CCN5CC[C@]31[C@@H]54)C2,training,0 317 | Cn1nnc2ccc([C@H](c3ccc(Cl)cc3)n3cncn3)cc21,training,0 318 | COC(=O)[C@H]1[C@@H](O)CC[C@H]2CN3CCc4c([nH]c5ccccc45)[C@@H]3C[C@@H]21,training,0 319 | CCN(C(C)=O)c1cccc(C2=[N+]3[N-]CC(C#N)=C3NC=C2)c1,training,0 320 | CN1[C@H]2CC[C@@H]1CC(NC(=O)c1cc(Cl)cc3c1OC(C)(C)C3)C2,training,0 321 | O=C1Cc2cc(CCN3CCN(c4nsc5ccccc45)CC3)c(Cl)cc2N1,training,0 322 | CN(C)CCc1c[nH]c2ccc(C[C@H]3COC(=O)N3)cc12,training,0 323 | NS(=O)(=O)Cc1noc2ccccc12,training,0 324 | Nc1nc2cc(Cl)ccc2o1,training,0 325 | CCc1cccc2cc([C@H](O)CNC(C)(C)C)oc12,training,1 326 | CCCCCc1cc(O)c2c(c1)OC(C)(C)[C@H]1CC=C(C=O)C[C@H]21,training,1 327 | COCC[C@@H]1C[C@@H]2CN3CCc4c([nH]c5ccccc45)[C@@](C(=O)OC)(C2)[C@@H]13,training,1 328 | CCCc1ccc2ccccc2n1,training,1 329 | COC(=O)[C@H](c1ccccc1Cl)N1CCC2=C(CC(=O)S2)C1,training,1 330 | CC(C)(C)C1CCC(C2=C(O)C(=O)c3ccccc3C2=O)CC1,training,1 331 | CCOc1ccc2nc3ccc(=O)cc-3oc2c1,training,1 332 | CCCCCc1cc(O)c2c(c1)OC(C)(C)[C@@H]1[C@@H](O)C=C(C)C[C@@H]21,training,1 333 | C[C@@H](C#Cc1ccc(Cc2ccccc2)s1)N(O)C(N)=O,training,1 334 | Cc1[nH]cnc1CN1CCc2c(c3ccccc3n2C)C1=O,training,1 335 | CCCCc1oc2ccccc2c1C(=O)c1cc(I)c(OCCN(CC)CC)c(I)c1,training,1 336 | Clc1ccc2c(c1)C(N1CCNCC1)=Nc1ccccc1O2,training,1 337 | CC(C)CN(C[C@@H](O)[C@H](Cc1ccccc1)NC(=O)O[C@H]1CCOC1)S(=O)(=O)c1ccc(N)cc1,training,1 338 | CCCCC/C=C\C/C=C\C/C=C\C/C=C\CCCC(=O)O,training,1 339 | CNCC[C@H](Oc1ccccc1C)c1ccccc1,training,1 340 | COc1ccc(-c2coc3cc(O)cc(O)c3c2=O)cc1,training,1 341 | O=C(O)C/C=C\C[C@@H]1[C@@H](NS(=O)(=O)c2ccccc2)[C@H]2CC[C@@H]1C2,training,1 342 | CC(C)C[C@H](NC(=O)[C@H](Cc1ccccc1)NC(=O)c1cnccn1)B(O)O,training,1 343 | COc1ccccc1Oc1c(NS(=O)(=O)c2ccc(C(C)(C)C)cc2)nc(-c2ncccn2)nc1OCCO,training,1 344 | COc1ccc2c(C(=O)c3cc(OC)c(OC)c(OC)c3)c[nH]c2c1,training,1 345 | CCOc1nc2cccc(C(=O)O)c2n1Cc1ccc(-c2ccccc2-c2nnn[nH]2)cc1,training,1 346 | NC(=O)N1c2ccccc2C=Cc2ccccc21,training,1 347 | COc1ccccc1OCCNC[C@H](O)COc1cccc2[nH]c3ccccc3c12,training,1 348 | CCOP(=S)(OCC)Oc1nc(Cl)c(Cl)cc1Cl,training,1 349 | C(=C/c1ccccc1)\CN1CCN(C(c2ccccc2)c2ccccc2)CC1,training,1 350 | CN1C(=O)CC(=O)N(c2ccccc2)c2cc(Cl)ccc21,training,1 351 | CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21,training,1 352 | COC(=O)[C@@H](c1ccccc1Cl)N1CCc2sccc2C1,training,1 353 | CN1CCN(C2=Nc3cc(Cl)ccc3Nc3ccccc32)CC1,training,1 354 | NC(=O)c1c(Cl)c(-c2cccnc2)n2c1CCCC2,training,1 355 | Cc1ccc(S(=O)(=O)Nc2ccnn2-c2ccccc2)cc1,training,1 356 | Oc1ccc(OC(F)(F)F)cc1CN[C@@H]1CCCN[C@H]1c1ccccc1,training,1 357 | O=P1(N(CCCl)CCCl)NCCCO1,training,1 358 | Nc1ccc(S(=O)(=O)c2ccc(N)cc2)cc1,training,1 359 | COC(=O)c1cc(OC)c2c(c1-c1c(C(=O)OC)cc(OC)c3c1OCO3)OCO2,training,1 360 | CC(C)Nc1cccnc1N1CCN(C(=O)c2cc3cc(NS(C)(=O)=O)ccc3[nH]2)CC1,training,1 361 | CCCCCc1cc(O)c2c(c1)OC(C)(C)[C@H]1CCC(C)=C[C@H]21,training,1 362 | CCN(CC)C(=O)SC,training,1 363 | COc1ccc2c(c1)[C@]13CCCC[C@@H]1[C@H](C2)N(C)CC3,training,1 364 | C=CCSSCC=C,training,1 365 | c1ccc2c(c1)ccc1cc3c(ccc4ccccc43)cc12,training,1 366 | c1ccc2c(c1)cc1ccc3cccc4c5ccccc5c2c1c34,training,1 367 | O=c1oc2ccccc2c(O)c1Cc1c(O)c2ccccc2oc1=O,training,1 368 | COc1ccc([C@@H]2Sc3ccccc3N(CCN(C)C)C(=O)[C@@H]2OC(C)=O)cc1,training,1 369 | Cc1c2ccccc2c(C)c2c1ccc1ccccc12,training,1 370 | O=C(OC1C[C@@H]2CC3C[C@H](C1)N2CC3=O)c1c[nH]c2ccccc12,training,1 371 | CCN[C@H]1C[C@H](C)S(=O)(=O)c2sc(S(N)(=O)=O)cc21,training,1 372 | COc1cc2c(CCN3CCN(c4cccc(Cl)c4C)CC3)nn(Cc3c[nH]cn3)c2cc1OC,training,1 373 | CCCCc1nc(Cl)c(C=O)n1Cc1ccc(-c2ccccc2-c2nnn[nH]2)cc1,training,1 374 | CN1CCC[C@@H]1C[C@@H]1CNc2ccc(CCS(=O)(=O)c3ccccc3)cc21,training,1 375 | Cc1c2ccncc2c(C)c2c1[nH]c1ccccc12,training,1 376 | C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CCC2=O,training,1 377 | CCc1cccc2c3c([nH]c12)[C@@](CC)(CC(=O)O)OCC3,training,1 378 | Cc1ccc(-c2ncc(Cl)cc2-c2ccc(S(C)(=O)=O)cc2)cn1,training,1 379 | CNCC[C@H](Oc1ccc(C(F)(F)F)cc1)c1ccccc1,training,1 380 | C[C@H](C(=O)O)c1ccc(-c2ccccc2)c(F)c1,training,1 381 | CC(C)n1c(/C=C/[C@H](O)C[C@H](O)CC(=O)O)c(-c2ccc(F)cc2)c2ccccc21,training,1 382 | COc1ccc(-c2coc3cc(O)ccc3c2=O)cc1,training,1 383 | CCC1=C(C)CN(C(=O)NCCc2ccc(S(=O)(=O)NC(=O)NC3CCC(C)CC3)cc2)C1=O,training,1 384 | Cc1cnc(C(=O)NCCc2ccc(S(=O)(=O)NC(=O)NC3CCCCC3)cc2)cn1,training,1 385 | COc1ccc(Cl)cc1C(=O)NCCc1ccc(S(=O)(=O)NC(=O)NC2CCCCC2)cc1,training,1 386 | O=S(Cc1cc(OCC2CC2)ccn1)c1nc2cc(F)ccc2[nH]1,training,1 387 | FC(F)(F)[C@H](Cl)Br,training,1 388 | COc1ccc2c(c1)[nH]c1c(C)nccc12,training,1 389 | CN1C(=O)[C@@](C)(C2=CCCCC2)C(=O)N=C1O,training,1 390 | O=P1(NCCCl)OCCCN1CCCl,training,1 391 | CN(C)CCCN1c2ccccc2CCc2ccccc21,training,1 392 | CC(C)(C)NC(=O)[C@@H]1CN(Cc2cccnc2)CCN1C[C@@H](O)C[C@@H](Cc1ccccc1)C(=O)N[C@H]1c2ccccc2C[C@H]1O,training,1 393 | COc1ccc2c(c1)c(CC(=O)O)c(C)n2C(=O)c1ccc(Cl)cc1,training,1 394 | COc1ccc2c(c1)c(CC(=O)NCCc1ccccc1)c(C)n2C(=O)c1ccc(Cl)cc1,training,1 395 | CCCCC1=NC2(CCCC2)C(=O)N1Cc1ccc(-c2ccccc2-c2nn[nH]n2)cc1,training,1 396 | CN[C@@]1(c2ccccc2Cl)CCCCC1=O,training,1 397 | CCC(=O)C1(c2cccc(O)c2)CCN(C)CC1,training,1 398 | COc1ccc(N2CCc3c(NCCO)nc4c(OC(F)(F)F)cccc4c32)c(C)c1,training,1 399 | CCCCCCCCCCCC(=O)O,training,1 400 | CC1(C)Cc2c(-c3ccccc3)c(-c3ccc(Cl)cc3)c(CC(=O)OC[C@@H]3O[C@@H](O)[C@@H](O)[C@H](O)[C@@H]3O)n2C1,training,1 401 | CCCCC/C=C/C/C=C/CCCCCCCC(=O)O,training,1 402 | CCOC(=O)N1CCC(=C2c3ccc(Cl)cc3CCc3cccnc32)CC1,training,1 403 | CN1C(C(=O)Nc2ccccn2)=C(O)c2sc(Cl)cc2S1(=O)=O,training,1 404 | CCCCc1nc(Cl)c(CO)n1Cc1ccc(-c2ccccc2-c2nnn[nH]2)cc1,training,1 405 | Cc1ccccc1C(=O)N1CC/C(=N\OS(=O)(=O)O)c2ccc(Cl)cc21,training,1 406 | CCN[C@@H](C)Cc1ccc2c(c1)OCO2,training,1 407 | Cc1cnc(NC(=O)C2=C(O)c3ccccc3S(=O)(=O)N2C)s1,training,1 408 | CC[C@]1(c2ccccc2)NC(=O)N(C)C1=O,training,1 409 | CN1CCN2c3ccccc3Cc3ccccc3[C@@H]2C1,training,1 410 | CN1CCN2c3ncccc3Cc3ccccc3[C@H]2C1,training,1 411 | CN1CCN2c3ccccc3Cc3cccnc3[C@@H]2C1,training,1 412 | CN1CC[C@]23c4c5ccc(O)c4O[C@H]2[C@@H](O)C=C[C@H]3[C@H]1C5,training,1 413 | COc1ccc(OC(=O)N(CC(=O)O)Cc2ccc(OCCc3nc(-c4ccccc4)oc3C)cc2)cc1,training,1 414 | CCCCCN(CCCCC)N=O,training,1 415 | CC(C)C1CCC(C(=O)N[C@H](Cc2ccccc2)C(=O)O)CC1,training,1 416 | CCN(CC)N=O,training,1 417 | CCCN(CCC)CCc1ccc(OC)c(OCCc2ccccc2)c1,training,1 418 | Cc1c(O)cccc1C(=O)N[C@@H](CSc1ccccc1)[C@H](O)CN1C[C@H]2CCCC[C@H]2C[C@H]1C(=O)NC(C)(C)C,training,1 419 | Cc1ccnc2c1NC(=O)c1cccnc1N2C1CC1,training,1 420 | CN1CCC[C@H]1c1cccnc1,training,1 421 | CCCCN(C)N=O,training,1 422 | CCN(C)N=O,training,1 423 | Cc1nccn1C[C@H]1CCc2c(c3ccccc3n2C)C1=O,training,1 424 | CN1CCN(CCCN2c3ccccc3Sc3ccccc32)CC1,training,1 425 | OCCN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1,training,1 426 | c1ccc2c(c1)ccc1ccccc12,training,1 427 | CCC1(c2ccccc2)C(=O)NC(=O)NC1=O,training,1 428 | CCCCC1C(=O)N(c2ccccc2)N(c2ccccc2)C1=O,training,1 429 | O=C1NC(=O)C(c2ccccc2)(c2ccccc2)N1,training,1 430 | CCOP(=S)(OCC)SCSCC,training,1 431 | CN1C(C(=O)Nc2ccccn2)=C(O)c2ccccc2S1(=O)=O,training,1 432 | O=C(C1CCCCC1)N1CC(=O)N2CCc3ccccc3[C@H]2C1,training,1 433 | Fc1ccccc1C1=NCC(=S)N(CC(F)(F)F)c2ccc(Cl)cc21,training,1 434 | OCCOCCN1CCN(C2=Nc3ccccc3Sc3ccccc32)CC1,training,1 435 | O=C1C=C2CN([C@@H](C(=O)C3CC3)c3ccccc3F)CC[C@H]2S1,training,1 436 | COCCCOc1ccnc(CSc2nc3ccccc3[nH]2)c1C,training,1 437 | CCOc1cc(CC(=O)N[C@@H](CC(C)C)c2ccccc2N2CCCCC2)ccc1C(=O)O,training,1 438 | CO[C@H]1C=CO[C@@]2(C)Oc3c(C)c(O)c4c(O)c(c(/C=N\N5CCN(C)CC5)c(O)c4c3C2=O)NC(=O)C(C)=CC=C[C@H](C)[C@H](O)[C@@H](C)[C@@H](O)[C@@H](C)[C@H](OC(C)=O)[C@H]1C,training,1 439 | O=C(O)CCC/C=C\C[C@@H]1[C@@H](NS(=O)(=O)c2ccccc2)[C@H]2CC[C@@H]1C2,training,1 440 | Cc1ccsc1-c1ccc([C@@H](C)C(=O)O)cc1,training,1 441 | C=CCc1ccc2c(c1)OCO2,training,1 442 | CC1=C(C)C(=O)C([C@@H](CCCCCC(=O)O)c2ccccc2)=C(C)C1=O,training,1 443 | CN[C@H]1CC[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc21,training,1 444 | C[C@@H]1O[C@@]2(CS1)CN1CCC2CC1,training,1 445 | Nc1ccc(S(=O)(=O)Nc2ncccn2)cc1,training,1 446 | Cc1cc(NS(=O)(=O)c2ccc(N)cc2)no1,training,1 447 | O=C1C(CCS(=O)c2ccccc2)C(=O)N(c2ccccc2)N1c1ccccc1,training,1 448 | C[C@@H](C(=O)O)c1ccc(C(=O)c2cccs2)cc1,training,1 449 | O=c1[nH]c(=O)n([C@@H]2CCCO2)cc1F,training,1 450 | CN1/C(=C(/O)Nc2ccccn2)C(=O)c2sccc2S1(=O)=O,training,1 451 | O=C(O)CC[C@@H]1[C@@H](NS(=O)(=O)c2ccccc2)[C@H]2CC[C@@H]1C2,training,1 452 | O=C1CC[C@H](N2C(=O)c3ccccc3C2=O)C(=O)N1,training,1 453 | O=C(Cn1c(=O)sc2ccc(Cl)cc21)N1CCN(CCO)CC1,training,1 454 | O=C(O)COc1ccc(C(=O)c2cccs2)c(Cl)c1Cl,training,1 455 | CNCC[C@@H](Oc1ccccc1C)c1ccccc1,training,1 456 | CN(C)CCOc1ccc(/C(=C(\CCCl)c2ccccc2)c2ccccc2)cc1,training,1 457 | COc1cccc(OC)c1-c1ccc(C[C@H](NC(=O)c2c(Cl)cccc2Cl)C(=O)O)cc1,training,1 458 | COc1cc(Cc2cnc(N)nc2N)cc(OC)c1OC,training,1 459 | Cc1c(C)c2c(c(C)c1O)CC[C@](C)(COc1ccc(C[C@@H]3SC(=O)NC3=O)cc1)O2,training,1 460 | CN1[C@H]2CC[C@@H]1CC(OC(=O)c1c[nH]c3ccccc13)C2,training,1 461 | Cc1onc(-c2ccccc2)c1-c1ccc(S(N)(=O)=O)cc1,training,1 462 | CCCC(CCC)C(=O)O,training,1 463 | CCCCC(=O)N(Cc1ccc(-c2ccccc2-c2nnn[nH]2)cc1)[C@H](C(=O)O)C(C)C,training,1 464 | COc1ccc([C@@H](CN(C)C)C2(O)CCCCC2)cc1,training,1 465 | COC[C@@H](c1ccc(C(F)(F)F)cc1)N1CCN(C2(C)CCN(C(=O)c3c(C)ncnc3C)CC2)C[C@@H]1C,training,1 466 | O=c1ccc2nc3ccc(OCOCc4ccccc4)cc3oc-2c1,training,1 467 | C[C@@H](c1ncncc1F)[C@](O)(Cn1cncn1)c1ccc(F)cc1F,training,1 468 | CC(=O)C[C@H](c1ccccc1)c1c(O)oc2ccccc2c1=O,training,1 469 | CC(=O)C[C@@H](c1ccccc1)c1c(O)oc2ccccc2c1=O,training,1 470 | COc1cc(C(=O)NS(=O)(=O)c2ccccc2C)ccc1Cc1cn(C)c2ccc(NC(=O)OC3CCCC3)cc12,training,1 471 | C[C@@H](C(=O)O)c1ccc2c(c1)CC(=O)c1ccccc1S2,training,1 472 | Cc1cn([C@H]2C[C@H](N=[N+][N-])[C@@H](CO)O2)c(=O)[nH]c1=O,training,1 473 | C[C@@H](c1cc2ccccc2s1)N(O)C(N)=O,training,1 474 | Cc1ccc(-c2nc3ccc(C)cn3c2CC(=O)N(C)C)cc1,training,1 475 | CC[C@@]1(c2ccccc2)NC(=O)N(C)C1=O,training,1 476 | O=C1CC[C@@H](N2C(=O)c3ccccc3C2=O)C(=O)N1,training,1 477 | COc1ccc(CCN(C)CCC[C@@](C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC,training,1 478 | CN1C(=O)[C@](C)(C2=CCCCC2)C(=O)N=C1O,training,1 479 | CC(=O)[C@@H]1CC[C@@H]2[C@H]3CCC4=CC(=O)CC[C@@]4(C)[C@@H]3CC[C@]12C,training,1 480 | CN(C)CC[C@@H](c1ccc(Br)cc1)c1ccccn1,training,1 481 | FC(F)(F)[C@@H](Cl)Br,training,1 482 | C(=Cc1ccccc1)CN1CCN(C(c2ccccc2)c2ccccc2)CC1,training,1 483 | CN1CC[C@]23c4c5ccc(O)c4O[C@H]2C(=O)CC[C@H]3[C@H]1C5,training,1 484 | CN(C)CCO[C@@H](c1ccc(Cl)cc1)c1ccccn1,training,1 485 | CN(C)CCC=C1c2ccccc2COc2ccccc21,training,1 486 | CCCCCc1cc(O)c2c(c1)OC(C)(C)[C@@H]1CCC(C)=C[C@@H]21,training,1 487 | Nc1ccn([C@H]2CC[C@@H](CO)O2)c(=O)n1,training,1 488 | CCC(=C(c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1,training,1 489 | CO[C@H]1C=CO[C@@]2(C)Oc3c(C)c(O)c4c(O)c(c(C=NN5CCN(C)CC5)c(O)c4c3C2=O)NC(=O)C(C)=CC=C[C@H](C)[C@H](O)[C@@H](C)[C@@H](O)[C@@H](C)[C@H](OC(C)=O)[C@@H]1C,training,1 490 | Cc1ccc(C(=O)NCCc2ccc(S(=O)(=O)NC(=O)NC3CCCCC3)cc2)nc1,training,1 491 | CN[C@]1(c2ccccc2Cl)CCCCC1=O,training,1 492 | CCCCC[C@H](O)C=C[C@H]1[C@H](O)C[C@@H]2OC(=CCCCC(=O)O)C[C@@H]21,training,1 493 | C[C@H](C(=O)O)c1ccc(C(=O)c2cccs2)cc1,training,1 494 | CCc1cccc2c3c([nH]c12)[C@](CC)(CC(=O)O)OCC3,training,1 495 | CN1CCN(C(=O)O[C@@H]2c3nccnc3C(=O)N2c2ccc(Cl)cn2)CC1,training,1 496 | COc1ccc(C[C@H](C)NC[C@@H](O)c2ccc(O)c(NC=O)c2)cc1,training,1 497 | CC(C)(C)c1ccc([C@@H](O)CCCN2CCC(C(O)(c3ccccc3)c3ccccc3)CC2)cc1,training,1 498 | CS(=O)(=O)Nc1ccc([N+](=O)[O-])cc1Oc1ccccc1,training,1 499 | Fc1ccc(C(c2ccc(F)cc2)N2CCN(CC=Cc3ccccc3)CC2)cc1,training,1 500 | CNCC[C@@H](Oc1ccc(C(F)(F)F)cc1)c1ccccc1,training,1 501 | CC(=O)[C@]1(O)Cc2c(O)c3c(c(O)c2[C@@H](O[C@H]2C[C@H](N)[C@H](O)[C@H](C)O2)C1)C(=O)c1ccccc1C3=O,training,1 502 | CN1C(C(=O)Nc2ccccn2)=C(O)c2sccc2S1(=O)=O,training,1 503 | O=C(NCCN1CCOCC1)c1ccc(Cl)cc1,training,1 504 | COc1ccccc1OCCNC[C@@H](O)COc1cccc2[nH]c3ccccc3c12,training,1 505 | COc1cc(N)c(Cl)cc1C(=O)N[C@@H]1CCN(CCCOc2ccc(F)cc2)C[C@H]1OC,training,1 506 | CN1C2CCC1CC(OC(=O)c1c[nH]c3ccccc13)C2,training,1 507 | CC(C)n1c(C=C[C@H](O)C[C@H](O)CC(=O)O)c(-c2ccc(F)cc2)c2ccccc21,training,1 508 | CC1=C(C)C(=O)C([C@H](CCCCCC(=O)O)c2ccccc2)=C(C)C1=O,training,1 509 | COC(=O)[C@H](c1ccccc1Cl)N1CCc2sccc2C1,training,1 510 | COc1cc2c(cc1O)CCN[C@]21CS[C@@H]2c3c(OC(C)=O)c(C)c4c(c3[C@H](COC1=O)N1[C@@H](O)[C@@H]3Cc5cc(C)c(OC)c(O)c5[C@H]([C@H]21)N3C)OCO4,training,1 511 | CCN[C@@H]1C[C@@H](C)S(=O)(=O)c2sc(S(N)(=O)=O)cc21,training,1 512 | CC(C)C[C@H](NC(=O)[C@@H](Cc1ccccc1)NC(=O)c1cnccn1)B(O)O,training,1 513 | CCOC(=O)CN[C@@H](C(=O)N1CC[C@H]1C(=O)NCc1ccc(C=NNO)cc1)C1CCCCC1,training,1 514 | CC(C)CN(C[C@@H](OP(=O)(O)O)[C@H](Cc1ccccc1)NC(=O)O[C@H]1CCOC1)S(=O)(=O)c1ccc(N)cc1,training,1 515 | CC(C)c1nc(N(C)S(C)(=O)=O)nc(-c2ccc(F)cc2)c1C=C[C@H](O)C[C@@H](O)CC(=O)O,training,1 516 | CC(C)(C#N)c1cc(Cn2cncn2)cc(C(C)(C)C#N)c1,training,0 517 | COc1ccc(CCN2CCC(Nc3nc4ccccc4n3Cc3ccc(F)cc3)CC2)cc1,training,0 518 | CC1=CC(=O)C=C2CC[C@H]3[C@@H]4CCC(=O)[C@@]4(C)CC[C@@H]3[C@@]12C,training,0 519 | CC(C)c1cc(C(C)C)c(CC(=O)NS(=O)(=O)Oc2c(C(C)C)cccc2C(C)C)c(C(C)C)c1,training,0 520 | OC(c1ccccc1)(c1ccccc1)C1CCNCC1,training,0 521 | CC[C@@H](c1ccc(O)c(F)c1)[C@H](C)c1ccc(O)c(F)c1,training,0 522 | COc1cc(OC)c(C(=O)CCCN2CCCC2)c(OC)c1,training,0 523 | CCCCN1CCCC[C@H]1C(=O)Nc1c(C)cccc1C,training,0 524 | C[C@H](CS)C(=O)N1CCC[C@H]1C(=O)O,training,0 525 | C[C@H](CCC(=O)O)[C@H]1CC[C@H]2[C@H]3[C@H](CC[C@@]21C)[C@@]1(C)CC[C@@H](O)C[C@@H]1C[C@H]3O,training,0 526 | O=C1CCc2cc(OCCCCc3nnnn3C3CCCCC3)ccc2N1,training,0 527 | CN1CCC[C@@H]1CCO[C@](C)(c1ccccc1)c1ccc(Cl)cc1,training,0 528 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=Cc5oncc5C[C@]4(C)[C@H]3CC[C@@]21C,training,0 529 | O=C1CN(/N=C\c2ccc(-c3ccc([N+](=O)[O-])cc3)o2)C(=O)N1,training,0 530 | COc1ccc2c3c1O[C@H]1[C@@H](O)CC[C@H]4[C@@H](C2)N(C)CC[C@]314,training,0 531 | CN1C[C@H](C(=O)N[C@]2(C)O[C@@]3(O)[C@@H]4CCCN4C(=O)[C@H](Cc4ccccc4)N3C2=O)C[C@@H]2c3cccc4[nH]cc(c34)C[C@H]21,training,0 532 | CCOCCn1c(N2CCCN(C)CC2)nc2ccccc21,training,0 533 | CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1CCC[C@H]1C(=O)O,training,0 534 | N#Cc1ccc([C@H]2CCCc3cncn32)cc1,training,0 535 | CCC(=O)N(c1ccccc1)C1CCN(CCc2ccccc2)CC1,training,0 536 | Cc1onc(-c2c(F)cccc2Cl)c1C(=O)N[C@@H]1C(=O)N2[C@@H](C(=O)O)C(C)(C)S[C@H]12,training,0 537 | N=C(N)NC[C@@H]1COc2ccccc2O1,training,0 538 | CC(C)Cn1cnc2c(N)nc3ccccc3c21,training,0 539 | CC[C@H](C)n1ncn(-c2ccc(N3CCN(c4ccc(OC[C@H]5CO[C@](Cn6cncn6)(c6ccc(Cl)cc6Cl)O5)cc4)CC3)cc2)c1=O,training,0 540 | Clc1cccc([C@H](c2ccc3nc[nH]c3c2)n2ccnc2)c1,training,0 541 | COC1=CC(=O)O[C@H]1[C@H](O)c1ccccc1Cl,training,0 542 | CC[C@H](C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@H]21,training,0 543 | O=C1c2c(O)ccc(O)c2C(=O)c2c(NCCNCCO)ccc(NCCNCCO)c21,training,0 544 | CCc1nn(CCCN2CCN(c3cccc(Cl)c3)CC2)c(=O)n1CCOc1ccccc1,training,0 545 | O=C(N[C@H]1CCS[C@H]2CCC[C@@H](C(=O)O)N2C1=O)[C@@H](S)Cc1ccccc1,training,0 546 | CC(C)NC[C@@H]1CCc2cc(CO)c([N+](=O)[O-])cc2N1,training,0 547 | C[C@H](CN(C)C)CN1c2ccccc2S(=O)(=O)c2ccccc21,training,0 548 | CC(C)(C)NC[C@H](O)COc1ccccc1C1CCCC1,training,0 549 | C1CCC(C(C[C@H]2CCCCN2)C2CCCCC2)CC1,training,0 550 | C[C@]12CC(=O)[C@H]3[C@@H](CCC4=CC(=O)C=C[C@@]43C)[C@@H]1CC[C@]2(O)C(=O)CO,training,0 551 | COc1cc(N[C@@H](C)CCCN)c2ncccc2c1,training,0 552 | CCC(=O)O[C@](Cc1ccccc1)(c1ccccc1)[C@H](C)CN(C)C,training,0 553 | CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1,training,0 554 | CCN(CC)CCC[C@@H](C)Nc1c2ccc(Cl)cc2nc2ccc(OC)cc12,training,0 555 | CCN1CCC[C@H]1CNC(=O)c1c(OC)ccc(Br)c1OC,training,0 556 | CN(C(=O)c1c(O)n(C)c2ccccc2c1=O)c1ccccc1,training,0 557 | CC(C)C[C@H](N(C)C)C1(c2ccc(Cl)cc2)CCC1,training,0 558 | Nc1ccc(S(=O)(=O)Nc2ccnn2-c2ccccc2)cc1,training,0 559 | CCOc1ccccc1OCCN[C@H](C)Cc1ccc(OC)c(S(N)(=O)=O)c1,training,0 560 | O=C1Nc2c(O)cc(Cl)cc2[C@@](C#CC2CC2)(C(F)(F)F)O1,training,1 561 | C[C@H](NC(N)=O)c1cc2ccccc2s1,training,1 562 | C[C@@H](C#Cc1ccc(Cc2ccccc2)s1)NC(N)=O,training,1 563 | CCCSc1ccc2nc(NC(=O)OC)[nH]c2c1,training,1 564 | CN(C)c1cn(C)n(-c2ccccc2)c1=O,training,1 565 | CN(C)CCC=C1c2ccccc2CCc2ccccc21,training,1 566 | ClC(Cl)Cl,training,1 567 | COc1cc(N)c(Cl)cc1C(=O)N[C@H]1CCN(CCCOc2ccc(F)cc2)C[C@H]1OC,training,1 568 | O=C(OCc1ccccc1)c1ccccc1-c1c2ccc(=O)cc-2oc2cc(OCc3ccccc3)ccc12,training,1 569 | CN(C)CC/C=C1/c2ccccc2COc2ccccc21,training,1 570 | CC/C=C\C/C=C\C/C=C\C/C=C\C/C=C\CCCC(=O)O,training,1 571 | COC(OC)[C@@]1(C)Oc2ccc(N)cc2[C@H](N(Cc2nnn(C)n2)c2ccc(Cl)cc2)[C@H]1O,training,1 572 | CC(C)(O)c1ccccc1CC[C@@H](SCC1(CC(=O)O)CC1)c1cccc(/C=C/c2ccc3ccc(Cl)cc3n2)c1,training,1 573 | CN(C)N=O,training,1 574 | CN(CCOc1ccc(C[C@@H]2SC(=O)NC2=O)cc1)c1ccccn1,training,1 575 | CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12,training,1 576 | O=C1C(CCSc2ccccc2)C(=O)N(c2ccccc2)N1c1ccccc1,training,1 577 | CC/C(=C(\c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1,training,1 578 | Cc1cccc(Nc2ccncc2S(=O)(=O)NC(=O)NC(C)C)c1,training,1 579 | CN1C(=O)OC(C)(C)C1=O,training,1 580 | CCNc1ncn(Cc2ccc(OC)c(OC3CCCC3)c2)c2nc(C(C)C)nc1-2,training,1 581 | CN1CCN(C(=O)O[C@H]2c3nccnc3C(=O)N2c2ccc(Cl)cn2)CC1,training,1 582 | O=C(O)CCc1nc(-c2ccccc2)c(-c2ccccc2)o1,training,1 583 | Nc1nc(NC2CC2)c2ncn([C@H]3C=C[C@@H](CO)C3)c2n1,training,0 584 | C[C@]12CC[C@H]3[C@@H](CC=C4C[C@@H](O)CC[C@@]43C)[C@@H]1CC=C2c1cccnc1,training,0 585 | CN(C)Cc1nnc2n1-c1ccc(Cl)cc1C(c1ccccc1)=NC2,training,0 586 | CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(-c2ccc(F)cc2)n1CC[C@@H](O)C[C@@H](O)CC(=O)O,training,0 587 | CCC[C@H]1O[C@@H]2C[C@H]3[C@@H]4CCC5=CC(=O)C=C[C@]5(C)[C@H]4[C@@H](O)C[C@]3(C)[C@]2(C(=O)CO)O1,training,0 588 | c1ccc(C2(c3ccccc3)C[C@H]2C2=NCCN2)cc1,training,0 589 | CN(C)CCC[C@@]1(c2ccc(F)cc2)OCc2cc(C#N)ccc21,training,0 590 | CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(OC)C[C@@H](C)C(=O)[C@H](C)[C@@H](O)[C@]1(C)O,training,0 591 | N=C(N)N1CCc2ccccc2C1,training,0 592 | Clc1ccc2c(c1)CCc1cccnc1C2=C1CCNCC1,training,0 593 | CCN(CC)C(=O)N1CCN(C)CC1,training,0 594 | CN1CCN(c2cc3c(cc2F)c(=O)c(C(=O)O)cn3-c2ccc(F)cc2)CC1,training,0 595 | C[C@]12CC[C@H]3[C@H]([C@@H]1[C@@H]1C[C@@H]1[C@@]21CCC(=O)O1)[C@H]1C[C@H]1C1=CC(=O)CC[C@@]13C,training,0 596 | FC(F)OC(F)(F)[C@@H](F)Cl,training,0 597 | C=C1C[C@@H]2[C@H](CC[C@]3(C)C(=O)CC[C@@H]23)[C@@]2(C)C=CC(=O)C=C12,training,0 598 | C#C[C@]1(O)C=C[C@H]2[C@@H]3CCC4=CC(=O)CC[C@@H]4[C@H]3CC[C@@]21CC,training,0 599 | C[C@H]1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23,training,0 600 | CC#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CCC4=C3[C@@H](c3ccc(N(C)C)cc3)C[C@@]21C,training,0 601 | CCN(CC)C(=O)[C@]1(c2ccccc2)C[C@@H]1CN,training,0 602 | CC1(C)NC(=O)N(c2ccc([N+](=O)[O-])c(C(F)(F)F)c2)C1=O,training,0 603 | CCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCNCC3)cc21,training,0 604 | CCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCN(C)CC3)cc21,training,0 605 | CCCNC[C@H](O)COc1ccccc1C(=O)CCc1ccccc1,training,0 606 | O=C(c1ccc(OCCN2CCCCC2)cc1)c1c(-c2ccc(O)cc2)sc2cc(O)ccc12,training,0 607 | CN/C(=C\[N+](=O)[O-])NCCSCc1ccc(CN(C)C)o1,training,0 608 | CCOCc1nc2c(N)nc3ccccc3c2n1CC(C)(C)O,training,0 609 | Cc1cncc(CN2CCC(=C3c4ccc(Cl)cc4CCc4cccnc43)CC2)c1,training,0 610 | CC(C)(C)NC(=O)[C@@H]1C[C@@H]2CCCC[C@@H]2CN1C[C@@H](O)[C@H](Cc1ccccc1)NC(=O)[C@H](CC(N)=O)NC(=O)c1ccc2ccccc2n1,training,0 611 | CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@H]21,training,0 612 | CN1CC(=O)N2[C@H](Cc3c([nH]c4ccccc34)[C@H]2c2ccc3c(c2)OCO3)C1=O,training,0 613 | CC(C)(C)c1cc(C[C@H]2SCNC2=O)cc(C(C)(C)C)c1O,training,0 614 | COc1cc([C@@H]2c3cc4c(cc3[C@@H](O[C@@H]3O[C@@H]5CO[C@@H](c6cccs6)O[C@H]5[C@H](O)[C@H]3O)[C@H]3COC(=O)[C@H]23)OCO4)cc(OC)c1O,training,0 615 | C[C@H](Cn1cnc2c(N)ncnc21)OCP(=O)(O)O,training,0 616 | COc1ccccc1Oc1c(NS(=O)(=O)c2ccc(C(C)C)cn2)nc(-c2ccnc(-c3nn[nH]n3)c2)nc1OCCO,training,0 617 | CN1CCN(CC/C=C2\c3ccccc3Sc3ccc(S(=O)(=O)N(C)C)cc32)CC1,training,0 618 | C[C@@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)C3=CC[C@]2(C)[C@H]1C(=O)CN1CCN(c2cc(N3CCCC3)nc(N3CCCC3)n2)CC1,training,0 619 | N[C@@H]1C[C@H]1c1ccccc1,training,0 620 | Cc1nnc2n1-c1ccc(Cl)cc1C(c1ccccc1Cl)=NC2,training,0 621 | Cc1ccc(/C(=C/CN2CCCC2)c2ccccn2)cc1,training,0 622 | O=P1(N(CCCl)CCCl)OCCCN1CCCl,training,0 623 | CC[C@]1(O)C[C@H]2CN(CCc3c([nH]c4ccccc34)[C@@](C(=O)OC)(c3cc4c(cc3OC)N(C=O)[C@H]3[C@@](O)(C(=O)OC)[C@H](OC(C)=O)[C@]5(CC)C=CCN6CC[C@]43[C@@H]65)C2)C1,training,0 624 | COc1ccc2c(c1)Oc1cc(O)ccc1[C@@]21OC(=O)c2ccccc21,training,1 625 | Cc1cc(=O)n(-c2ccccc2)n1C,training,1 626 | c1ccc2c(c1)cc1ccc3cccc4ccc2c1c34,training,1 627 | O=c1cc(C(F)(F)F)c2ccc(OCc3ccccc3)cc2o1,training,1 628 | C=CC1CO1,training,1 629 | Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1,training,1 630 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CCCC[C@@H]4[C@H]3C(=C)C[C@@]21CC,training,1 631 | CC(C)N(CC[C@@](C(N)=O)(c1ccccc1)c1ccccn1)C(C)C,training,1 632 | CCc1nn(CCCN2CCN(c3cccc(Cl)c3)CC2)c(=O)n1CC,training,1 633 | Fc1ccc(C(c2ccc(F)cc2)N2CCN(C/C=C/c3ccccc3)CC2)cc1,training,1 634 | Cc1ccc(S(=O)(=O)NC(=O)NN2C[C@H]3CCC[C@H]3C2)cc1,training,1 635 | O=C(CCCN1CCC(O)(c2ccc(Cl)cc2)CC1)c1ccc(F)cc1,training,1 636 | CN1CC[C@@]23c4c5ccc(O)c4O[C@@H]2C(=O)CC[C@@H]3[C@@H]1C5,training,1 637 | CC1(C)Cc2c(-c3ccccc3)c(-c3ccc(Cl)cc3)c(CC(=O)O)n2C1,training,1 638 | Cc1ncc2n1-c1ccc(Cl)cc1C(c1ccccc1F)=NC2,training,1 639 | CSc1nc(-c2ccnc(N[C@H](C)c3ccccc3)c2)c(-c2ccc(F)cc2)[nH]1,training,1 640 | CCCN(CCC)N=O,training,1 641 | CN(CCCC(=O)c1cccnc1)N=O,training,1 642 | C[C@H]1Cc2c(Cl)cc(C(=O)N[C@@H](Cc3ccccc3)C(=O)O)c(O)c2C(=O)O1,training,1 643 | CN1CCCN=C1/C=C\c1cccs1,training,1 644 | C=C[C@H]1CN2CC[C@H]1C[C@@H]2[C@@H](O)c1ccnc2ccc(OC)cc12,training,1 645 | CC(C)c1nc(CN(C)C(=O)N[C@H](C(=O)N[C@@H](Cc2ccccc2)C[C@H](O)[C@H](Cc2ccccc2)NC(=O)OCc2cncs2)C(C)C)cs1,training,1 646 | CC(C)c1nc(N(C)S(C)(=O)=O)nc(-c2ccc(F)cc2)c1CC[C@@H](O)C[C@@H](O)CC(=O)O,training,1 647 | O=c1c2ccccc2nc2n1CCc1c-2[nH]c2ccccc12,training,1 648 | CN(C)S(=O)(=O)CCNC(=O)N(CCCl)N=O,training,1 649 | COc1ccc(CCN(C)CCC[C@](C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC,training,1 650 | CC(C)C1CCC(C(=O)N[C@@H](Cc2ccccc2)C(=O)O)CC1,training,1 651 | CN/C(=N\C#N)NCCSCc1nc[nH]c1C,test,0 652 | Cn1ccnc1S,test,0 653 | CCS(=O)(=O)CCn1c([N+](=O)[O-])cnc1C,test,0 654 | Cc1ncc([N+](=O)[O-])n1CCO,test,1 655 | CC(=O)Nc1ccccc1,test,0 656 | C=CCc1ccccc1OC[C@@H](O)CNC(C)C,test,0 657 | Cc1cc(N(C)C)ccc1C[C@H](C)N,test,0 658 | C[C@H](N)Cc1ccccc1,test,0 659 | Nc1ccccc1,test,0 660 | Cc1ccc(Cl)c(OC[C@@H](O)CNC(C)(C)C)c1,test,0 661 | CC(C)(Oc1ccc(Cl)cc1)C(=O)O,test,0 662 | CCOC(=O)C(C)(C)Oc1ccc(Cl)cc1,test,0 663 | Cc1cc(Cl)c(S(N)(=O)=O)cc1S(N)(=O)=O,test,0 664 | CCN[C@H](C)Cc1cccc(C(F)(F)F)c1,test,0 665 | Cc1ccc(C)c(OCCCC(C)(C)C(=O)O)c1,test,0 666 | CN[C@@H](C)Cc1ccccc1,test,0 667 | COCCc1ccc(OC[C@@H](O)CNC(C)C)cc1,test,0 668 | C=CCOc1ccccc1OC[C@@H](O)CNC(C)C,test,0 669 | O=C(O)c1ccccc1O,test,0 670 | Nc1ccc(S(N)(=O)=O)cc1,test,0 671 | Cc1cccc(C)c1NC(=O)[C@H](C)N,test,0 672 | CC(=O)Nc1ccc(O)cc1,test,1 673 | CC(C)/N=C(N)\N=C(/N)Nc1ccc(Cl)cc1,test,1 674 | C=CCc1ccc(O)c(OC)c1,test,1 675 | CC(C)Cc1ccc([C@H](C)C(=O)O)cc1,test,1 676 | CC(C)c1cccc(C(C)C)c1O,test,1 677 | C#CCN(C)[C@H](C)Cc1ccccc1,test,1 678 | CC(C)NC[C@H](O)COc1ccc(COCCOC(C)C)cc1,test,0 679 | C[C@@H](NC(C)(C)C)C(=O)c1cccc(Cl)c1,test,0 680 | CCN(CC)CCNC(=O)c1ccc(N)cc1,test,0 681 | Cc1ccccc1,test,0 682 | CN(C)C(=O)Nc1ccc(Cl)c(Cl)c1,test,1 683 | c1ccccc1,test,0 684 | CCCCCCCN(CC)CCC[C@@H](O)c1ccc(NS(C)(=O)=O)cc1,test,0 685 | COc1cc(CNC(=O)CCCCC=CC(C)C)ccc1O,test,1 686 | CC1=C(/C=C/C(C)=C\C=C\C(C)=C/C(=O)O)C(C)(C)CCC1,test,1 687 | CC1=C(/C=C/C(C)=C/C=C/C(C)=C/C=O)C(C)(C)CCC1,test,1 688 | CC1=C(/C=C/C(C)=C/C=C/C(C)=C/CO)C(C)(C)CCC1,test,1 689 | CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CC(=O)O,test,1 690 | CC1=C(/C=C\C(C)=C\C=C/C(C)=C\C(=O)O)C(C)(C)CCC1,test,0 691 | NCCc1c[nH]c2ccc(O)cc12,test,1 692 | COc1ccc2[nH]cc(CCNC(C)=O)c2c1,test,1 693 | CC(C)NC[C@H](O)COc1cccc2ccccc12,test,0 694 | COc1ccc2cc([C@H](C)C(=O)O)ccc2c1,test,1 695 | CN(C/C=C/C#CC(C)(C)C)Cc1cccc2ccccc12,test,1 696 | O=c1ccc2ccccc2o1,test,0 697 | CCCCOc1ccc2c(C(F)(F)F)cc(=O)oc2c1,test,1 698 | CCCCNCc1cc(=O)oc2cc(OC)ccc12,test,1 699 | C[C@@H](O)CCCCn1c(=O)c2c(ncn2C)n(C)c1=O,test,0 700 | CC(=O)CCCCn1c(=O)c2c(ncn2C)n(C)c1=O,test,0 701 | Cn1cnc2c1c(=O)[nH]c(=O)n2C,test,0 702 | COC(=O)C1=C(C)NC(C)=C(C(=O)OCC(C)=O)[C@@H]1c1ccccc1[N+](=O)[O-],test,0 703 | CCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)[C@@H]1c1cccc(Cl)c1Cl,test,0 704 | CCCOCCOC(=O)C1=C(C)NC(C)=C(C(=O)OCCOCCC)C1c1cccc([N+](=O)[O-])c1,test,0 705 | COC(=O)C1=C(C#N)NC(C)=C(C(=O)OC(C)C)[C@H]1c1cccc([N+](=O)[O-])c1,test,0 706 | CCOC(=O)C1=C(COCCN)NC(C)=C(C(=O)OC)[C@@H]1c1ccccc1Cl,test,0 707 | O=C(O)c1ccccc1Nc1cccc(C(F)(F)F)c1,test,0 708 | Cc1cccc(Nc2ccccc2C(=O)O)c1C,test,1 709 | Cc1ccc(O)c([C@@H](CCN(C(C)C)C(C)C)c2ccccc2)c1,test,1 710 | COc1ccc([C@@H](c2ccc(O)c(O)c2)C(Cl)(Cl)Cl)cc1,test,1 711 | COc1ccc([C@@H](c2ccc(O)cc2)C(Cl)(Cl)Cl)cc1,test,1 712 | CCC(=O)C(C[C@H](C)N(C)C)(c1ccccc1)c1ccccc1,test,1 713 | CN(C)CCCCCN1c2ccccc2Sc2ccc(C(F)(F)F)cc21,test,1 714 | CN(C)CCCN1c2ccccc2Sc2ccccc21,test,1 715 | C[C@H](CN(C)C)CN1c2ccccc2Sc2ccc(C#N)cc21,test,1 716 | O=c1c(O)c(-c2ccccc2)oc2cc(O)cc(O)c12,test,1 717 | COc1ccc(-c2oc3cc(O)cc(O)c3c(=O)c2O)cc1,test,1 718 | COc1ccc2c(c1)CC[C@@H]1[C@@H]2CC[C@]2(C)[C@H]1CC[C@H]2O,test,1 719 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCc4cc(O)ccc4[C@H]3CC[C@@]21C,test,1 720 | O=C1CN=C(c2ccccc2)c2cc(Cl)ccc2N1,test,0 721 | CN1C(=O)CN=C(c2ccccc2F)c2cc([N+](=O)[O-])ccc21,test,1 722 | CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,test,1 723 | CCN(CC)CCN1C(=O)CN=C(c2ccccc2F)c2cc(Cl)ccc21,test,0 724 | C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2C(=O)CO,test,0 725 | C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2O,test,1 726 | COCCCOc1ccnc(CS(=O)c2nc3ccccc3[nH]2)c1C,test,0 727 | O=S(Cc1ccccn1)c1nc2ccccc2[nH]1,test,0 728 | COc1ccc2[nH]c(S(=O)Cc3ncc(C)c(OC)c3C)nc2c1,test,1 729 | CC(=O)C[C@@H](c1ccc([N+](=O)[O-])cc1)c1c(O)c2ccccc2oc1=O,test,1 730 | CC[C@H](c1ccccc1)c1c(O)c2ccccc2oc1=O,test,1 731 | CC(=O)C[C@H](c1ccc([N+](=O)[O-])cc1)c1c(O)c2ccccc2oc1=O,test,1 732 | CN(C)/N=N\c1[nH]cnc1C(N)=O,val,0 733 | NCCc1c[nH]cn1,val,1 734 | CCCC(=O)Nc1ccc(OC[C@@H](O)CNC(C)C)c(C(C)=O)c1,val,0 735 | CC(C)(C)NC[C@@H](O)c1ccc(O)c(CO)c1,val,0 736 | CC(C)(C)NC[C@H](O)COc1ccccc1C#N,val,0 737 | C[C@H](N)C(=O)c1ccccc1,val,0 738 | CCN(CC)C(=O)/C(C#N)=C\c1cc(O)c(O)c([N+](=O)[O-])c1,val,0 739 | CNC[C@H](O)c1ccc(O)c(O)c1,val,0 740 | CCc1ccccc1,val,0 741 | NC(=O)OCC(COC(N)=O)c1ccccc1,val,0 742 | COCCCC/C(=N/OCCN)c1ccc(C(F)(F)F)cc1,val,0 743 | N=C(N)N/N=C\c1c(Cl)cccc1Cl,val,0 744 | CCN(CC)CCNC(=O)c1cc(Cl)c(N)cc1OC,val,0 745 | C#CCN(C)Cc1ccccc1,val,0 746 | NC(N)=N/C(N)=N/CCc1ccccc1,val,0 747 | Oc1ccccc1,val,0 748 | CCCN(CCC)S(=O)(=O)c1ccc(C(=O)O)cc1,val,0 749 | CC(C)Cc1ccc([C@@H](C)C(=O)O)cc1,val,1 750 | COc1cc(CNC(=O)CCCC/C=C/C(C)C)ccc1O,val,1 751 | CCCNC(=O)NS(=O)(=O)c1ccc(Cl)cc1,val,1 752 | COc1ccc(OC(F)(F)F)cc1CN,val,1 753 | Nc1ccc([N+](=O)[O-])c(C(F)(F)F)c1,val,1 754 | CCN(CC)CC(=O)Nc1c(C)cccc1C,val,1 755 | C=CCc1ccc(OC)c(OC)c1,val,1 756 | CCOc1ccc(NC(C)=O)cc1,val,1 757 | NCCc1ccc(O)c(O)c1,val,1 758 | C[C@H](NC(C)(C)C)C(=O)c1cccc(Cl)c1,val,1 759 | COc1ccc([N+](=O)[O-])cc1,val,1 760 | C=Cc1ccccc1,val,1 761 | CCCCNC(=O)NS(=O)(=O)c1ccc(C)cc1,val,1 762 | O=C(N[C@H](CO)[C@H](O)c1ccc([N+](=O)[O-])cc1)C(Cl)Cl,val,0 763 | Cc1cccc(C)c1OC[C@@H](C)N,val,0 764 | CC(=O)Oc1ccccc1C(=O)O,val,1 765 | CC1=C(/C=C/C(C)=C/C=C/C(C)=C/C(=O)O)C(C)(C)CCC1,val,1 766 | C=C(C)[C@@H]1CC=C(C)CC1,val,1 767 | CC(C)NC[C@H](O)COc1cccc2[nH]ccc12,val,0 768 | CCCCC/N=C(\N)N/N=C\c1c[nH]c2ccc(CO)cc12,val,0 769 | COc1ccc2[nH]cc(CCN(C(C)C)C(C)C)c2c1,val,1 770 | CNC(=O)Oc1cccc2ccccc12,val,1 771 | CN(CC=CC#CC(C)(C)C)Cc1cccc2ccccc12,val,1 772 | CCOc1ccc2cc(C#N)c(=O)oc2c1,val,1 773 | COc1ccc2c(C(F)(F)F)cc(=O)oc2c1,val,1 774 | CCOc1ccc2ccc(=O)oc2c1,val,1 775 | CCOc1ccc2c(C(F)(F)F)cc(=O)oc2c1,val,1 776 | Cn1c(=O)[nH]c2ncn(C)c2c1=O,val,0 777 | Cn1c(=O)c2c(ncn2C)n(C)c1=O,val,1 778 | Cn1c(=O)c2[nH]cnc2n(C)c1=O,val,1 779 | CCOC(=O)C1=C(C)NC(C)=C(C(=O)OCC)C1c1ccccc1/C=C\C(=O)OC(C)(C)C,val,0 780 | COC(=O)C1=C(C)NC(C)=C(C(=O)OC)C1c1ccccc1[N+](=O)[O-],val,0 781 | COC(=O)C1=C(C)NC(C)=C(C(=O)OCC(C)C)[C@H]1c1ccccc1[N+](=O)[O-],val,0 782 | CCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)[C@@H]1c1cccc([N+](=O)[O-])c1,val,0 783 | COCCOC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)C)[C@@H]1c1cccc([N+](=O)[O-])c1,val,0 784 | O=C(O)Cc1cc(O)ccc1Nc1c(Cl)cccc1Cl,val,1 785 | O=C(O)COC(=O)Cc1ccccc1Nc1c(Cl)cccc1Cl,val,1 786 | O=C(O)Cc1ccccc1Nc1c(Cl)cccc1Cl,val,1 787 | Cc1ccc(Nc2c(F)cccc2Cl)c(CC(=O)O)c1,val,1 788 | OCCc1ccccc1Nc1c(Cl)cccc1Cl,val,1 789 | NC(=O)CS(=O)C(c1ccccc1)c1ccccc1,val,0 790 | CCCC(C(=O)OCCN(CC)CC)(c1ccccc1)c1ccccc1,val,0 791 | COc1ccc(C(c2ccc(OC)cc2)C(Cl)(Cl)Cl)cc1,val,1 792 | Cc1ccc(O)c([C@H](CCN(C(C)C)C(C)C)c2ccccc2)c1,val,1 793 | Cc1ccccc1[C@H](OCCN(C)C)c1ccccc1,val,0 794 | C[C@H](CN1c2ccccc2Sc2ccccc21)N(C)C,val,0 795 | CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21,val,1 796 | O=c1c(O)c(-c2ccc(O)c(O)c2)oc2cc(O)cc(O)c12,val,0 797 | COc1ccc(-c2cc(=O)c3c(O)c(OC)c(O)cc3o2)cc1OC,val,1 798 | COc1ccc(-c2oc3cc(O)cc(O)c3c(=O)c2O)cc1O,val,1 799 | C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CC[C@@H]2O,val,1 800 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCc4cc(OC)ccc4[C@H]3CC[C@@]21C,val,1 801 | C[C@@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CC[C@H]2O,val,1 802 | O=C1CN=C(c2ccccc2Cl)c2cc([N+](=O)[O-])ccc2N1,val,0 803 | O=C1CN=C(c2ccccc2F)c2cc(Cl)ccc2N1CC(F)(F)F,val,1 804 | CN1C(=O)[C@H](O)N=C(c2ccccc2)c2cc(Cl)ccc21,val,1 805 | C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@]2(O)C(=O)CO,val,0 806 | CC(=O)[C@@]1(O)CC[C@H]2[C@@H]3C[C@H](C)C4=CC(=O)CC[C@]4(C)[C@H]3CC[C@@]21C,val,0 807 | CC(=O)[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,val,1 808 | CCC(=O)O[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,val,1 809 | COc1ccnc(CS(=O)c2nc3ccc(OC(F)F)cc3[nH]2)c1OC,val,0 810 | Cc1c(OCC(F)(F)F)ccnc1CS(=O)c1nc2ccccc2[nH]1,val,1 811 | CC(=O)C[C@@H](c1ccccc1)c1c(O)c2ccccc2oc1=O,val,1 812 | CC(=O)C[C@H](c1ccccc1)c1c(O)c2ccccc2oc1=O,val,1 813 | --------------------------------------------------------------------------------