├── data
└── ADMETlab_data
│ ├── chirality_pretrain_canonical.csv
│ ├── CHEMBL_Final_random_selected_50.csv
│ ├── CYP2C19-sub_canonical.csv
│ ├── SkinSen_canonical.csv
│ ├── CYP1A2-sub_canonical.csv
│ ├── DILI_canonical.csv
│ └── CYP2C9-sub_canonical.csv
├── .gitignore
├── figure
└── Knowledge-based BERT.png
├── experiment
├── __pycache__
│ ├── my_nn.cpython-37.pyc
│ ├── build_data.cpython-36.pyc
│ └── build_data.cpython-37.pyc
├── build_dataset_for_tasks.py
├── build_contrastive_dataset_for_tasks.py
├── generate_drugbank_embedding.py
├── contrastive_aug.py
├── atom_embedding_generator.py
├── k_bert_pretrain_chirality_R_S.py
├── k_bert_pretrain_chirality.py
├── build_pretrain_selected_tasks.py
├── build_contrastive_pretrain_selected_tasks.py
├── K_BERT_WCL_pretrain.py
├── K_BERT_pretrain.py
├── build_pretrain_chirality_R_S.py
├── downstream_task.py
└── build_data.py
├── .idea
├── inspectionProfiles
│ ├── profiles_settings.xml
│ └── Project_Default.xml
├── vcs.xml
├── misc.xml
├── modules.xml
├── standard_code.iml
├── Knowledge-based BERT.iml
└── workspace.xml
└── README.md
/data/ADMETlab_data/chirality_pretrain_canonical.csv:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | .idea/workspace.xml
3 | data/task_data/drug_smiles.csv
4 | experiment/__pycache__/build_data.cpython-37.pyc
5 |
--------------------------------------------------------------------------------
/figure/Knowledge-based BERT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wzxxxx/Knowledge-based-BERT/HEAD/figure/Knowledge-based BERT.png
--------------------------------------------------------------------------------
/experiment/__pycache__/my_nn.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wzxxxx/Knowledge-based-BERT/HEAD/experiment/__pycache__/my_nn.cpython-37.pyc
--------------------------------------------------------------------------------
/experiment/__pycache__/build_data.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wzxxxx/Knowledge-based-BERT/HEAD/experiment/__pycache__/build_data.cpython-36.pyc
--------------------------------------------------------------------------------
/experiment/__pycache__/build_data.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wzxxxx/Knowledge-based-BERT/HEAD/experiment/__pycache__/build_data.cpython-37.pyc
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/experiment/build_dataset_for_tasks.py:
--------------------------------------------------------------------------------
1 | from experiment import build_data
2 | task_list = ['Pgp-sub', 'HIA', 'F(20%)', 'F(30%)', 'FDAMDD', 'CYP1A2-sub', 'CYP2C19-sub', 'CYP2C9-sub',
3 | 'CYP2D6-sub', 'CYP3A4-sub', 'T12', 'DILI', 'SkinSen', 'Carcinogenicity', 'Respiratory']
4 | for task in task_list:
5 | build_data.built_data_and_save_for_splited(
6 | origin_path='../data/ADMETlab_data/' + task + '_canonical.csv',
7 | save_path='../data/task_data/' + task + '.npy')
--------------------------------------------------------------------------------
/.idea/standard_code.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/Knowledge-based BERT.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/experiment/build_contrastive_dataset_for_tasks.py:
--------------------------------------------------------------------------------
1 | from experiment import build_data
2 | task_name_list = ['Pgp-sub', 'HIA', 'F(20%)', 'F(30%)', 'FDAMDD', 'CYP1A2-sub', 'CYP2C19-sub', 'CYP2C9-sub',
3 | 'CYP2D6-sub', 'CYP3A4-sub', 'T12', 'DILI', 'SkinSen', 'Carcinogenicity', 'Respiratory']
4 | aug_times = [5]
5 | for task_name in task_name_list:
6 | for times in aug_times:
7 | build_data.built_data_and_save_for_contrastive_splited(
8 | origin_path='../data/contrastive_data/' + task_name + '_'+str(times) + '_contrastive_aug.csv',
9 | save_path='../data/task_data/'+ task_name + '_'+str(times) + '_contrastive_aug.npy')
--------------------------------------------------------------------------------
/experiment/generate_drugbank_embedding.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | from experiment.atom_embedding_generator import bert_atom_embedding
4 | task_list = ['drugbank']
5 | for task_name in task_list:
6 | print(task_name)
7 | dataset = pd.read_csv('./data/'+task_name+'_canonical.csv', index_col=None)
8 | smiles_list = dataset['canonical_smiles'].values.tolist()
9 | pretrain_features_list = []
10 | for i, smiles in enumerate(smiles_list):
11 | print("{}/{}".format(i+1, len(smiles_list)))
12 | try:
13 | h_global, g_atom = bert_atom_embedding(smiles, pretrain_model='pretrain_k_bert_epoch_7.pth')
14 | pretrain_features_list.append(h_global)
15 | except:
16 | pretrain_features_list.append(['NaN' for x in range(768)])
17 |
18 | for i in range(len(pretrain_features_list[0])):
19 | global_feature_n = [pretrain_features_list[x][i] for x in range(len(pretrain_features_list))]
20 | dataset['pretrain_feature_'+str(i+1)] = global_feature_n
21 | dataset = dataset[dataset['pretrain_feature_1']!='NaN']
22 | dataset.to_csv('./data/embedding/'+task_name+'_k_bert_embedding.csv', index=False)
23 |
24 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Knowledge-based-BERT
2 | K-BERT is a model based on BERT that can extract molecular features from molecules like a computational chemist. The pre-training tasks are used in K-BERT: atom feature prediction task, global feature prediction task, and contrastive learning task. The atom feature prediction task allows the model to learn the manual extracted information in graph-based methods: atomic initial information, the global feature prediction task allows the model to learn the manual extracted information in descriptor-based methods: molecular descriptors/molecular fingerprints, and the contrastive learning task allows the model to make the embeddings of different SMILES strings of the same molecule more similar, thus enabling K-BERT to generalize to SMILES of different formats not limited to canonical SMILES.
3 |
4 | ![image]()
5 |
6 |
7 |
8 | **requirements:**
9 | python 3.7
10 | anaconda
11 | xgboost
12 | rdkit
13 | pytorch
14 | sklearn
15 |
16 |
17 |
18 | The datasets and pre-trained models can be downloaded from the following link: https://pan.baidu.com/s/1yzhHwhELuJG-3lxlrVtRPA Fetch code:WZXX
19 |
20 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
18 |
19 |
20 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/experiment/contrastive_aug.py:
--------------------------------------------------------------------------------
1 | from rdkit import Chem
2 | import pandas as pd
3 |
4 | def task_dataset_augmentation(task_name, input_path, output_path, augmentation_num=5):
5 | origin_dataset = pd.read_csv(input_path, index_col=None)
6 | smiles_list = origin_dataset['smiles'].values.tolist()
7 |
8 | for i in range(augmentation_num-1):
9 | aug_smiles = []
10 | for j, smiles in enumerate(smiles_list):
11 | print('{}/{}'.format(j + 1, len(smiles_list)))
12 | try:
13 | aug_smiles.append(Chem.MolToSmiles(Chem.MolFromSmiles(smiles), doRandom=True))
14 | except:
15 | print(smiles)
16 | aug_smiles.append(smiles)
17 | origin_dataset['aug_smiles' + '_' + str(i)] = aug_smiles
18 | print(task_name)
19 | origin_dataset.to_csv(output_path, index=False)
20 |
21 |
22 | task_name_list = ['Pgp-sub', 'HIA', 'F(20%)', 'F(30%)', 'FDAMDD', 'CYP1A2-sub', 'CYP2C19-sub', 'CYP2C9-sub',
23 | 'CYP2D6-sub', 'CYP3A4-sub', 'T12', 'DILI', 'SkinSen', 'Carcinogenicity', 'Respiratory']
24 |
25 | aug_times = [5]
26 |
27 | # DOWNSTREAM TASKS
28 | for task_name in task_name_list:
29 | for times in aug_times:
30 | print(task_name)
31 | input_path = '../data/ADMETlab_data/' + task_name +'_canonical.csv'
32 | output_path = '../data/contrastive_data/' + task_name +'_'+str(times)+'_contrastive_aug.csv'
33 | task_dataset_augmentation(task_name, input_path, output_path, augmentation_num=times)
34 |
35 | # PRETRAIN TASKS
36 | task_dataset_augmentation('CHEMBL', input_path='../data/pretrain_data/CHEMBL.csv',
37 | output_path='../data/pretrain_data/CHEMBL_contrastive.csv', augmentation_num=5)
38 |
--------------------------------------------------------------------------------
/experiment/atom_embedding_generator.py:
--------------------------------------------------------------------------------
1 | https://github.com/wzxxxx/Knowledge-based-BERT/tree/main/experimentfrom experiment.build_data import construct_input_from_smiles
2 | import torch
3 | from experiment.my_nn import EarlyStopping, set_random_seed, BERT_atom_embedding_generator
4 | import os
5 | import numpy as np
6 |
7 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
8 | set_random_seed()
9 |
10 |
11 | def bert_atom_embedding(smiles, pretrain_model='pretrain_k_bert_epoch_7.pth'):
12 | # fix parameters of model
13 | args = {}
14 | args['device'] = "cuda" if torch.cuda.is_available() else "cpu"
15 | args['metric_name'] = 'roc_auc'
16 | args['batch_size'] = 128
17 | args['num_epochs'] = 200
18 | args['d_model'] = 768
19 | args['n_layers'] = 6
20 | args['vocab_size'] = 47
21 | args['maxlen'] = 201
22 | args['d_k'] = 64
23 | args['d_v'] = 64
24 | args['d_ff'] = 768 * 4
25 | args['n_heads'] = 12
26 | args['global_labels_dim'] = 1
27 | args['atom_labels_dim'] = 15
28 | args['lr'] = 3e-5
29 | args['pretrain_layer'] = 6
30 | args['mode'] = 'higher'
31 | args['task_name'] = 'HIA'
32 | args['patience'] = 20
33 | args['times'] = 10
34 | args['pretrain_model'] = pretrain_model
35 |
36 | token_idx, global_label_list, atom_labels_list, atom_mask_list = construct_input_from_smiles(smiles)
37 |
38 | model = BERT_atom_embedding_generator(d_model=args['d_model'], n_layers=args['n_layers'], vocab_size=args['vocab_size'],
39 | maxlen=args['maxlen'], d_k=args['d_k'], d_v=args['d_v'], n_heads=args['n_heads'], d_ff=args['d_ff'],
40 | global_label_dim=args['global_labels_dim'], atom_label_dim=args['atom_labels_dim'], use_atom=False)
41 | stopper = EarlyStopping(pretrained_model=args['pretrain_model'],
42 | pretrain_layer=args['pretrain_layer'],
43 | mode=args['mode'])
44 | model.to(args['device'])
45 | stopper.load_pretrained_model(model)
46 |
47 | token_idx = torch.tensor([token_idx]).long().to(args['device'])
48 | atom_mask = atom_mask_list
49 | atom_mask_np = np.array(atom_mask)
50 | atom_mask_index = np.where(atom_mask_np == 1)
51 | h_global, h_atom = model(token_idx, atom_mask_index)
52 | h_global = h_global.cpu().squeeze().detach().numpy()
53 | h_atom = h_atom.cpu().squeeze().detach().numpy()
54 | return h_global, h_atom
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/experiment/k_bert_pretrain_chirality_R_S.py:
--------------------------------------------------------------------------------
1 | from experiment import build_data
2 | import torch
3 | from torch.optim import Adam
4 | from torch.utils.data import DataLoader
5 | from experiment.my_nn import collate_pretrain_data, EarlyStopping, run_a_contrastive_R_S_pretrain_epoch, \
6 | set_random_seed, K_BERT
7 | import time
8 | set_random_seed()
9 |
10 | # define parameters of model
11 | args = {}
12 | args['device'] = "cuda" if torch.cuda.is_available() else "cpu"
13 | args['batch_size'] = 32
14 | args['num_epochs'] = 50
15 | args['d_model'] = 768
16 | args['n_layers'] = 6
17 | args['vocab_size'] = 47
18 | args['maxlen'] = 201
19 | args['d_k'] = 64
20 | args['d_v'] = 64
21 | args['d_ff'] = 768*4
22 | args['n_heads'] = 12
23 | args['global_labels_dim'] = 1
24 | args['atom_labels_dim'] = 15
25 | args['lr'] = 0.00003
26 | args['pretrain_layer'] = 5
27 | args['pretrain_model'] = 'pretrain_k_bert_epoch_7.pth'
28 | args['task_name'] = 'k_bert_chirality_R_S'
29 | args['pretrain_data_path'] = '../data/pretrain_data/chirality_pretrain_R_S_maccs'
30 |
31 | pretrain_set = build_data.load_data_for_contrastive_aug_pretrain(
32 | pretrain_data_path=args['pretrain_data_path'])
33 | print("Pretrain data generation is complete !")
34 |
35 | pretrain_loader = DataLoader(dataset=pretrain_set,
36 | batch_size=args['batch_size'],
37 | shuffle=True,
38 | collate_fn=collate_pretrain_data)
39 |
40 | loss_criterion_global = torch.nn.BCEWithLogitsLoss(reduction='none')
41 | loss_criterion_atom = torch.nn.BCEWithLogitsLoss(reduction='none')
42 | model = K_BERT(d_model=args['d_model'], n_layers=args['n_layers'], vocab_size=args['vocab_size'],
43 | maxlen=args['maxlen'], d_k=args['d_k'], d_v=args['d_v'], n_heads=args['n_heads'], d_ff=args['d_ff'],
44 | global_label_dim=args['global_labels_dim'], atom_label_dim=args['atom_labels_dim'])
45 | optimizer = Adam(model.parameters(), lr=args['lr'])
46 | stopper = EarlyStopping(pretrained_model=args['pretrain_model'],
47 | pretrain_layer=args['pretrain_layer'],
48 | task_name=args['task_name'])
49 | model.to(args['device'])
50 | stopper.load_pretrained_model(model)
51 |
52 | for epoch in range(args['num_epochs']):
53 | start = time.time()
54 | # Train
55 | run_a_contrastive_R_S_pretrain_epoch(args, epoch, model, pretrain_loader, loss_criterion_global=loss_criterion_global,
56 | loss_criterion_atom=loss_criterion_atom, optimizer=optimizer)
57 | # Validation and early stop
58 | stopper.pretrain_step(epoch, model)
59 | elapsed = (time.time() - start)
60 | m, s = divmod(elapsed, 60)
61 | h, m = divmod(m, 60)
62 | print("An epoch time used:", "{:d}:{:d}:{:d}".format(int(h), int(m), int(s)))
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
--------------------------------------------------------------------------------
/experiment/k_bert_pretrain_chirality.py:
--------------------------------------------------------------------------------
1 | from experiment import build_data
2 | import torch
3 | from torch.optim import Adam
4 | from torch.utils.data import DataLoader
5 | from experiment.my_nn import collate_pretrain_data, EarlyStopping, run_a_contrastive_pretrain_epoch, \
6 | set_random_seed, K_BERT
7 | import time
8 | set_random_seed()
9 |
10 | # define parameters of model
11 | args = {}
12 | args['device'] = "cuda" if torch.cuda.is_available() else "cpu"
13 | args['batch_size'] = 32
14 | args['num_epochs'] = 50
15 | args['d_model'] = 768
16 | args['n_layers'] = 6
17 | args['vocab_size'] = 47
18 | args['maxlen'] = 201
19 | args['d_k'] = 64
20 | args['d_v'] = 64
21 | args['d_ff'] = 768*4
22 | args['n_heads'] = 12
23 | args['global_labels_dim'] = 154
24 | args['atom_labels_dim'] = 15
25 | args['pretrain_layer'] = 5
26 | args['lr'] = 0.00003
27 | args['pretrain_model'] = 'pretrain_k_bert_epoch_7.pth'
28 | args['task_name'] = 'k_bert_chirality'
29 | args['pretrain_data_path'] = '/apdcephfs/private_zhenxingwu/pretrain/data/BERT_maccs_data/chirality_pretrain_maccs'
30 |
31 | pretrain_set = build_data.load_data_for_contrastive_aug_pretrain(
32 | pretrain_data_path=args['pretrain_data_path'])
33 | print("Pretrain data generation is complete !")
34 |
35 | pretrain_loader = DataLoader(dataset=pretrain_set,
36 | batch_size=args['batch_size'],
37 | shuffle=True,
38 | collate_fn=collate_pretrain_data)
39 |
40 | loss_criterion_global = torch.nn.BCEWithLogitsLoss(reduction='none')
41 | loss_criterion_atom = torch.nn.BCEWithLogitsLoss(reduction='none')
42 | model = K_BERT(d_model=args['d_model'], n_layers=args['n_layers'], vocab_size=args['vocab_size'],
43 | maxlen=args['maxlen'], d_k=args['d_k'], d_v=args['d_v'], n_heads=args['n_heads'], d_ff=args['d_ff'],
44 | global_label_dim=args['global_labels_dim'], atom_label_dim=args['atom_labels_dim'])
45 | optimizer = Adam(model.parameters(), lr=args['lr'])
46 | stopper = EarlyStopping(pretrained_model=args['pretrain_model'],
47 | pretrain_layer=args['pretrain_layer'],
48 | task_name=args['task_name'])
49 | model.to(args['device'])
50 | stopper.load_pretrained_model(model)
51 |
52 |
53 | for epoch in range(args['num_epochs']):
54 | start = time.time()
55 | # Train
56 | run_a_contrastive_pretrain_epoch(args, epoch, model, pretrain_loader, loss_criterion_global=loss_criterion_global,
57 | loss_criterion_atom=loss_criterion_atom, optimizer=optimizer)
58 | # Validation and early stop
59 | stopper.pretrain_step(epoch, model)
60 | elapsed = (time.time() - start)
61 | m, s = divmod(elapsed, 60)
62 | h, m = divmod(m, 60)
63 | print("An epoch time used:", "{:d}:{:d}:{:d}".format(int(h), int(m), int(s)))
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
--------------------------------------------------------------------------------
/data/ADMETlab_data/CHEMBL_Final_random_selected_50.csv:
--------------------------------------------------------------------------------
1 | smiles
2 | CP(=S)(Nn1cnnc1)Oc1ccccc1
3 | OC12CC3CC(C1)CC(Nc1nc(N[C@H]4CC[C@H](O)CC4)ncc1C(F)(F)F)(C3)C2
4 | Cc1ccc(C(=O)N(c2c(C)n(C)n(-c3ccccc3)c2=O)C(C)C)cc1
5 | CCC[C@H](NC(=O)OCC(Cl)(Cl)Cl)C(=O)N[C@H](C)c1nc2ccc(F)cc2s1
6 | CC(=O)c1cc2c(C)c(C(=O)c3ccccc3)oc2cc1O
7 | CCn1c(=O)c2sccc2n(CC(=O)NCCC2=CCCCC2)c1=O
8 | C=CCc1ccc(OC)c(-c2cc(CC=C)ccc2O[C@@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)c1
9 | NS(=O)(=O)c1ccc(CCNc2ccn3nc(-c4ccc(OCCN5CCOCC5)cc4)cc3n2)cc1
10 | COc1ccc(CNS(=O)(=O)c2cc(-c3cc(C)no3)ccc2C)cc1OC
11 | COC1(OC)C[C@@H](C(=O)O)N(C(=O)[C@H](C)CS)C1
12 | CCOC(=O)c1c(C)[nH]c(C(=O)/C(C#N)=C/c2ccc(OCC)cc2)c1C
13 | CC(=O)CCC(=O)Oc1cc(Cl)ccc1Oc1ccc(Cl)cc1Cl
14 | Nc1cnc(-c2ccc(-c3ccccc3S(=O)(=O)N3CC(O)C3)cc2F)cn1
15 | Cn1ccnc1CN1CCC(Cn2cc(-c3ccncc3)nn2)CC1
16 | C=C(C)[C@@H](O)CC[C@@H](C)[C@@H]1CC[C@]2(C)C3=C(CC[C@@]12C)[C@@]1(C)CC[C@H](O)C(C)(C)[C@@H]1CC3=O
17 | N#C[C@H]1CC[C@@H](Nc2ncnc3c(=O)[nH]ccc23)C1
18 | CCCc1cnc(N2CCC(Oc3ccn(-c4ccc([S+](C)[O-])cc4)c(=O)c3)CC2)nc1
19 | c1ccc2[nH]c(-c3ccc(NC4=NCCNC4)cc3)nc2c1
20 | C[C@@H](n1cnc2ccc(F)cc2c1=O)[C@](O)(Cn1cncn1)c1ccc(F)cc1F
21 | O=[N+]([O-])c1ccccc1N/N=C/c1cc2ccccc2nc1Cl
22 | Cn1c(SCC(=O)N2CCCc3ccccc32)ncc(C(=O)Nc2ccc(F)cc2)c1=O
23 | Cc1ccc(C(=O)Nc2ccc3c(c2)N(C)C(=O)c2ccccc2S3)cc1
24 | CC[C@H](C)[C@@H]1O[C@@]2(C=CC1C)CC1C[C@@H](CC=C(C)C(O[C@H]3CC(OC)C(O[C@H]4CC(OC)C(O)C(C)O4)C(C)O3)C(C)C=CC=C3CO[C@@H]4C(O)C(C)=C[C@@H](C(=O)O1)[C@]34O)O2
25 | C/C(S)=N/c1c(C#N)c(-c2ccc(Br)cc2)cn1-c1ccc(S(N)(=O)=O)cc1
26 | CCCc1c(C(=O)OCC)c(C(=O)OCC)c2c(-c3ccc(F)cc3)cc(N3CCOCC3)nn12
27 | O=C(Nc1cc[nH]c(=O)c1)c1ccc(Cl)cc1Oc1ccc(C(F)(F)F)nc1
28 | O=C(COc1ccc(F)cc1F)N1CCCCCCC1
29 | O=C1N(c2ccccc2)c2nc(CN3CCCCC3)[nH]c(=O)c2C2C(N3CCCC3)CCCCN12
30 | CC1(C)[C@@H](OCc2cccc([N+](=O)[O-])c2)CC[C@@]2(C)[C@H]1CC[C@]1(C)[C@@H]2C(=O)C=C2[C@@H]3C[C@@](C)(C(=O)O)CC[C@]3(C)CC[C@]21C
31 | COC(=O)N[C@H](C(=O)N1CCC[C@H]1c1ncc(-c2ccc3c(c2)OC(c2cccc(C4CC4)c2)n2c-3cc3cc(-c4cnc([C@@H]5CCCN5C(=O)[C@@H](NC(=O)OC)C(C)C)[nH]4)ccc32)[nH]1)C(C)C
32 | COC(=O)/C=C/c1cccc(N(Cc2ccc(-c3ccc(SC)cc3)cc2)C(=O)C(C)C)c1
33 | NC[C@@H](CC(=O)O)c1ccc(Cl)cc1
34 | CN1CCN(CC(=O)Nc2ccc3ncnc(Nc4ccc(Cl)cc4)c3c2)CC1
35 | CC(C)n1ncnc1-n1cc2c(n1)-c1ccccc1OCC2
36 | Cc1cc(C)c(CN2C=[N+](Cc3c(C)cc(C)cc3C)C3CCCCC32)c(C)c1
37 | CC(C)n1nnc2cc(-c3nn[nH]n3)ccc21
38 | COc1cc(-c2nnc(SC3=CS(=O)(=O)c4ccccc43)o2)cc(OC)c1OC
39 | C[C@H](NC(=O)C1CC1)c1ccc(CN2Cc3ccc(OCC4CC4(F)F)cc3C(F)C2)cc1
40 | CCc1ccc2sc3c(=O)[nH]c(CN(C)C)nc3c2c1
41 | O=C(NCCc1cccc2ccccc12)C1CCC1
42 | N#CC(C#N)=CC1=C2OC(c3ccccc3)=CC(c3ccccc3)=C2CCC1
43 | N=C(N)c1cnc(OC2CCCC2)nc1N
44 | COc1ccc(-c2csc(-c3cc(C(=N)N)sc3SC)n2)cc1
45 | COc1ccc(N2CC(C(=O)NCCC3=CCCCC3)CC2=O)cc1
46 | CN(C)Cc1ccc(-c2nnc(Nc3cccnc3Oc3ccccc3C(C)(C)C)s2)cc1
47 | C=CC(C)(C)c1c(O)cc(O)c2c1O[C@]13[C@H]4C[C@H](C(=O)[C@]1(/C=C/C(C)(C)O)OC4(C)C)[C@H](OC)[C@H]3C2=O
48 | COc1ccc(CCN/C=C2/C=C(Br)C(=O)OC2=O)cc1
49 | CCc1c(-c2nnc(C3(c4ccc(Cl)cc4)CCC3)s2)nc(-c2ccc(Cl)cc2Cl)n1-c1ccc(Br)cc1
50 | O=C(Cn1cncn1)C12CC3CC(CC(C3)C1)C2
51 | Oc1ccc(-c2cn(-c3cc(O)cc(O)c3)nn2)cc1
52 |
--------------------------------------------------------------------------------
/experiment/build_pretrain_selected_tasks.py:
--------------------------------------------------------------------------------
1 | from experiment.build_data import build_maccs_pretrain_data_and_save
2 | import multiprocessing
3 | import pandas as pd
4 |
5 | task_name = 'CHEMBL'
6 | if __name__ == "__main__":
7 | n_thread = 8
8 | data = pd.read_csv('../pretrain_data/'+task_name+'.csv')
9 | smiles_list = data['smiles'].values.tolist()
10 | # 避免内存不足,将数据集分为10份来计算
11 | for i in range(10):
12 | n_split = int(len(smiles_list)/10)
13 | smiles_split = smiles_list[i*n_split:(i+1)*n_split]
14 |
15 | n_mol = int(len(smiles_split)/8)
16 |
17 | # creating processes
18 | p1 = multiprocessing.Process(target=build_maccs_pretrain_data_and_save, args=(smiles_split[:n_mol],
19 | '../data/pretrain_data/'+task_name+'_maccs_'+str(i*8+1)+'.npy'))
20 | p2 = multiprocessing.Process(target=build_maccs_pretrain_data_and_save, args=(smiles_split[n_mol:2*n_mol],
21 | '../data/pretrain_data/'+task_name+'_maccs_'+str(i*8+2)+'.npy'))
22 | p3 = multiprocessing.Process(target=build_maccs_pretrain_data_and_save, args=(smiles_split[2*n_mol:3*n_mol],
23 | '../data/pretrain_data/'+task_name+'_maccs_'+str(i*8+3)+'.npy'))
24 | p4 = multiprocessing.Process(target=build_maccs_pretrain_data_and_save, args=(smiles_split[3*n_mol:4*n_mol],
25 | '../data/pretrain_data/'+task_name+'_maccs_'+str(i*8+4)+'.npy'))
26 | p5 = multiprocessing.Process(target=build_maccs_pretrain_data_and_save, args=(smiles_split[4*n_mol:5*n_mol],
27 | '../data/pretrain_data/'+task_name+'_maccs_'+str(i*8+5)+'.npy'))
28 | p6 = multiprocessing.Process(target=build_maccs_pretrain_data_and_save, args=(smiles_split[5*n_mol:6*n_mol],
29 | '../data/pretrain_data/'+task_name+'_maccs_'+str(i*8+6)+'.npy'))
30 | p7 = multiprocessing.Process(target=build_maccs_pretrain_data_and_save, args=(smiles_split[6*n_mol:7*n_mol],
31 | '../data/pretrain_data/'+task_name+'_maccs_'+str(i*8+7)+'.npy'))
32 | p8 = multiprocessing.Process(target=build_maccs_pretrain_data_and_save, args=(smiles_split[7*n_mol:],
33 | '../data/pretrain_data/'+task_name+'_maccs_'+str(i*8+8)+'.npy'))
34 |
35 | # starting my_scaffold_split 1&2
36 | p1.start()
37 | p2.start()
38 | p3.start()
39 | p4.start()
40 | p5.start()
41 | p6.start()
42 | p7.start()
43 | p8.start()
44 |
45 | # wait until my_scaffold_split 1&2 is finished
46 | p1.join()
47 | p2.join()
48 | p3.join()
49 | p4.join()
50 | p5.join()
51 | p6.join()
52 | p7.join()
53 | p8.join()
54 |
55 |
56 | # both processes finished
57 | print("Done!")
58 |
59 |
--------------------------------------------------------------------------------
/experiment/build_contrastive_pretrain_selected_tasks.py:
--------------------------------------------------------------------------------
1 | from experiment.build_data import build_maccs_pretrain_contrastive_data_and_save
2 | import multiprocessing
3 | import pandas as pd
4 |
5 | task_name = 'CHEMBL'
6 | if __name__ == "__main__":
7 | n_thread = 8
8 | data = pd.read_csv('../data/pretrain_data/'+task_name+'_5_contrastive_aug.csv')
9 | smiles_name_list = ['smiles', 'aug_smiles_0', 'aug_smiles_1', 'aug_smiles_2', 'aug_smiles_3']
10 | smiles_list = data[smiles_name_list].values.tolist()
11 |
12 | # 避免内存不足,将数据集分为10份来计算
13 | for i in range(10):
14 | n_split = int(len(smiles_list)/10)
15 | smiles_split = smiles_list[i*n_split:(i+1)*n_split]
16 |
17 | n_mol = int(len(smiles_split)/8)
18 |
19 | # creating processes
20 | p1 = multiprocessing.Process(target=build_maccs_pretrain_contrastive_data_and_save, args=(smiles_split[:n_mol],
21 | '../data/pretrain_data/'+task_name+'_maccs_contrastive_'+str(i*8+1)+'.npy'))
22 | p2 = multiprocessing.Process(target=build_maccs_pretrain_contrastive_data_and_save, args=(smiles_split[n_mol:2*n_mol],
23 | '../data/pretrain_data/'+task_name+'_maccs_contrastive_'+str(i*8+2)+'.npy'))
24 | p3 = multiprocessing.Process(target=build_maccs_pretrain_contrastive_data_and_save, args=(smiles_split[2*n_mol:3*n_mol],
25 | '../data/pretrain_data/'+task_name+'_maccs_contrastive_'+str(i*8+3)+'.npy'))
26 | p4 = multiprocessing.Process(target=build_maccs_pretrain_contrastive_data_and_save, args=(smiles_split[3*n_mol:4*n_mol],
27 | '../data/pretrain_data/'+task_name+'_maccs_contrastive_'+str(i*8+4)+'.npy'))
28 | p5 = multiprocessing.Process(target=build_maccs_pretrain_contrastive_data_and_save, args=(smiles_split[4*n_mol:5*n_mol],
29 | '../data/pretrain_data/'+task_name+'_maccs_contrastive_'+str(i*8+5)+'.npy'))
30 | p6 = multiprocessing.Process(target=build_maccs_pretrain_contrastive_data_and_save, args=(smiles_split[5*n_mol:6*n_mol],
31 | '../data/pretrain_data/'+task_name+'_maccs_contrastive_'+str(i*8+6)+'.npy'))
32 | p7 = multiprocessing.Process(target=build_maccs_pretrain_contrastive_data_and_save, args=(smiles_split[6*n_mol:7*n_mol],
33 | '../data/pretrain_data/'+task_name+'_maccs_contrastive_'+str(i*8+7)+'.npy'))
34 | p8 = multiprocessing.Process(target=build_maccs_pretrain_contrastive_data_and_save, args=(smiles_split[7*n_mol:],
35 | '../data/pretrain_data/'+task_name+'_maccs_contrastive_'+str(i*8+8)+'.npy'))
36 |
37 | # starting my_scaffold_split 1&2
38 | p1.start()
39 | p2.start()
40 | p3.start()
41 | p4.start()
42 | p5.start()
43 | p6.start()
44 | p7.start()
45 | p8.start()
46 |
47 | # wait until my_scaffold_split 1&2 is finished
48 | p1.join()
49 | p2.join()
50 | p3.join()
51 | p4.join()
52 | p5.join()
53 | p6.join()
54 | p7.join()
55 | p8.join()
56 |
57 |
58 | # both processes finished
59 | print("Done!")
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/experiment/K_BERT_WCL_pretrain.py:
--------------------------------------------------------------------------------
1 | from experiment import build_data
2 | import torch
3 | from torch.optim import Adam
4 | from torch.utils.data import DataLoader
5 | from experiment.my_nn import collate_pretrain_data, EarlyStopping, run_a_pretrain_epoch, \
6 | set_random_seed, K_BERT_WCL
7 | import os
8 | import time
9 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
10 | set_random_seed()
11 |
12 | # define parameters of model
13 | args = {}
14 | args['device'] = "cuda" if torch.cuda.is_available() else "cpu"
15 | args['batch_size'] = 80
16 | args['num_epochs'] = 50
17 | args['d_model'] = 768
18 | args['n_layers'] = 6
19 | args['vocab_size'] = 47
20 | args['maxlen'] = 201
21 | args['d_k'] = 64
22 | args['d_v'] = 64
23 | args['d_ff'] = 768*4
24 | args['n_heads'] = 12
25 | args['global_labels_dim'] = 154
26 | args['atom_labels_dim'] = 15
27 | args['lr'] = 0.00003
28 | args['task_name'] = 'k_bert_wcl'
29 | args['pretrain_data_path'] = '../data/pretrain_data/CHEMBL_maccs'
30 | pretrain_set = build_data.load_data_for_pretrain(
31 | pretrain_data_path=args['pretrain_data_path'])
32 | print("Pretrain data generation is complete !")
33 |
34 | pretrain_loader = DataLoader(dataset=pretrain_set,
35 | batch_size=args['batch_size'],
36 | shuffle=True,
37 | collate_fn=collate_pretrain_data)
38 |
39 | global_pos_weight = torch.tensor([884.17, 70.71, 43.32, 118.73, 428.67, 829.0, 192.84, 67.89, 533.86, 18.46, 707.55, 160.14, 23.19, 26.33, 13.38, 12.45, 44.91, 173.58, 40.14, 67.25, 171.12, 8.84, 8.36, 43.63, 5.87, 10.2, 3.06, 161.72, 101.75, 20.01, 4.35, 12.62, 331.79, 31.17, 23.19, 5.91, 53.58, 15.73, 10.75, 6.84, 3.92, 6.52, 6.33, 6.74, 24.7, 2.67, 6.64, 5.4, 6.71, 6.51, 1.35, 24.07, 5.2, 0.74, 4.78, 6.1, 62.43, 6.1, 12.57, 9.44, 3.33, 5.71, 4.67, 0.98, 8.2, 1.28, 9.13, 1.1, 1.03, 2.46, 2.95, 0.74, 6.24, 0.96, 1.72, 2.25, 2.16, 2.87, 1.8, 1.62, 0.76, 1.78, 1.74, 1.08, 0.65, 0.97, 0.71, 5.08, 0.75, 0.85, 3.3, 4.79, 1.72, 0.78, 1.46, 1.8, 2.97, 2.18, 0.61, 0.61, 1.83, 1.19, 4.68, 3.08, 2.83, 0.51, 0.77, 6.31, 0.47, 0.29, 0.58, 2.76, 1.48, 0.25, 1.33, 0.69, 1.03, 0.97, 3.27, 1.31, 1.22, 0.85, 1.75, 1.02, 1.13, 0.16, 1.02, 2.2, 1.72, 2.9, 0.26, 0.69, 0.6, 0.23, 0.76, 0.73, 0.47, 1.13, 0.48, 0.53, 0.72, 0.38, 0.35, 0.48, 0.12, 0.52, 0.15, 0.28, 0.36, 0.08, 0.06, 0.03, 0.07, 0.01])
40 | atom_pos_weight = torch.tensor([4.81, 1.0, 2.23, 53.49, 211.94, 0.49, 2.1, 1.13, 1.22, 1.93, 5.74, 15.42, 70.09, 61.47, 23.2])
41 | loss_criterion_global = torch.nn.BCEWithLogitsLoss(reduction='none', pos_weight=global_pos_weight.to('cuda'))
42 | loss_criterion_atom = torch.nn.BCEWithLogitsLoss(reduction='none', pos_weight=atom_pos_weight.to('cuda'))
43 | model = K_BERT_WCL(d_model=args['d_model'], n_layers=args['n_layers'], vocab_size=args['vocab_size'],
44 | maxlen=args['maxlen'], d_k=args['d_k'], d_v=args['d_v'], n_heads=args['n_heads'], d_ff=args['d_ff'],
45 | global_label_dim=args['global_labels_dim'], atom_label_dim=args['atom_labels_dim'])
46 | optimizer = Adam(model.parameters(), lr=args['lr'])
47 | stopper = EarlyStopping(task_name=args['task_name'])
48 | model.to(args['device'])
49 |
50 | for epoch in range(args['num_epochs']):
51 | start = time.time()
52 | # Train
53 | run_a_pretrain_epoch(args, epoch, model, pretrain_loader, loss_criterion_global=loss_criterion_global,
54 | loss_criterion_atom=loss_criterion_atom, optimizer=optimizer)
55 | # Validation and early stop
56 | stopper.pretrain_step(epoch, model)
57 | elapsed = (time.time() - start)
58 | m, s = divmod(elapsed, 60)
59 | h, m = divmod(m, 60)
60 | print("An epoch time used:", "{:d}:{:d}:{:d}".format(int(h), int(m), int(s)))
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/experiment/K_BERT_pretrain.py:
--------------------------------------------------------------------------------
1 | from experiment import build_data
2 | import torch
3 | from torch.optim import Adam
4 | from torch.utils.data import DataLoader
5 | from experiment.my_nn import collate_pretrain_data, EarlyStopping, run_a_contrastive_pretrain_epoch, \
6 | set_random_seed, K_BERT
7 | import os
8 | import time
9 | set_random_seed()
10 |
11 | # define parameters of model
12 | args = {}
13 | args['device'] = "cuda" if torch.cuda.is_available() else "cpu"
14 | args['batch_size'] = 32
15 | args['num_epochs'] = 50
16 | args['d_model'] = 768
17 | args['n_layers'] = 6
18 | args['vocab_size'] = 47
19 | args['maxlen'] = 201
20 | args['d_k'] = 64
21 | args['d_v'] = 64
22 | args['d_ff'] = 768*4
23 | args['n_heads'] = 12
24 | args['global_labels_dim'] = 154
25 | args['atom_labels_dim'] = 15
26 | args['lr'] = 0.00003
27 | args['task_name'] = 'k_bert'
28 | args['pretrain_data_path'] = '../data/pretrain_data/CHEMBL_maccs'
29 |
30 | pretrain_set = build_data.load_data_for_contrastive_aug_pretrain(
31 | pretrain_data_path=args['pretrain_data_path'])
32 | print("Pretrain data generation is complete !")
33 |
34 | pretrain_loader = DataLoader(dataset=pretrain_set,
35 | batch_size=args['batch_size'],
36 | shuffle=True,
37 | collate_fn=collate_pretrain_data)
38 |
39 | global_pos_weight = torch.tensor([884.17, 70.71, 43.32, 118.73, 428.67, 829.0, 192.84, 67.89, 533.86, 18.46, 707.55, 160.14, 23.19, 26.33, 13.38, 12.45, 44.91, 173.58, 40.14, 67.25, 171.12, 8.84, 8.36, 43.63, 5.87, 10.2, 3.06, 161.72, 101.75, 20.01, 4.35, 12.62, 331.79, 31.17, 23.19, 5.91, 53.58, 15.73, 10.75, 6.84, 3.92, 6.52, 6.33, 6.74, 24.7, 2.67, 6.64, 5.4, 6.71, 6.51, 1.35, 24.07, 5.2, 0.74, 4.78, 6.1, 62.43, 6.1, 12.57, 9.44, 3.33, 5.71, 4.67, 0.98, 8.2, 1.28, 9.13, 1.1, 1.03, 2.46, 2.95, 0.74, 6.24, 0.96, 1.72, 2.25, 2.16, 2.87, 1.8, 1.62, 0.76, 1.78, 1.74, 1.08, 0.65, 0.97, 0.71, 5.08, 0.75, 0.85, 3.3, 4.79, 1.72, 0.78, 1.46, 1.8, 2.97, 2.18, 0.61, 0.61, 1.83, 1.19, 4.68, 3.08, 2.83, 0.51, 0.77, 6.31, 0.47, 0.29, 0.58, 2.76, 1.48, 0.25, 1.33, 0.69, 1.03, 0.97, 3.27, 1.31, 1.22, 0.85, 1.75, 1.02, 1.13, 0.16, 1.02, 2.2, 1.72, 2.9, 0.26, 0.69, 0.6, 0.23, 0.76, 0.73, 0.47, 1.13, 0.48, 0.53, 0.72, 0.38, 0.35, 0.48, 0.12, 0.52, 0.15, 0.28, 0.36, 0.08, 0.06, 0.03, 0.07, 0.01])
40 | global_pos_weight = torch.cat((global_pos_weight, global_pos_weight, global_pos_weight, global_pos_weight, global_pos_weight), 0)
41 | atom_pos_weight = torch.tensor([4.81, 1.0, 2.23, 53.49, 211.94, 0.49, 2.1, 1.13, 1.22, 1.93, 5.74, 15.42, 70.09, 61.47, 23.2])
42 | loss_criterion_global = torch.nn.BCEWithLogitsLoss(reduction='none', pos_weight=global_pos_weight.to('cuda'))
43 | loss_criterion_atom = torch.nn.BCEWithLogitsLoss(reduction='none', pos_weight=atom_pos_weight.to('cuda'))
44 | model = K_BERT(d_model=args['d_model'], n_layers=args['n_layers'], vocab_size=args['vocab_size'],
45 | maxlen=args['maxlen'], d_k=args['d_k'], d_v=args['d_v'], n_heads=args['n_heads'], d_ff=args['d_ff'],
46 | global_label_dim=args['global_labels_dim'], atom_label_dim=args['atom_labels_dim'])
47 | optimizer = Adam(model.parameters(), lr=args['lr'])
48 | stopper = EarlyStopping(task_name=args['task_name'])
49 | model.to(args['device'])
50 |
51 | for epoch in range(args['num_epochs']):
52 | start = time.time()
53 | # Train
54 | run_a_contrastive_pretrain_epoch(args, epoch, model, pretrain_loader, loss_criterion_global=loss_criterion_global,
55 | loss_criterion_atom=loss_criterion_atom, optimizer=optimizer)
56 | # Validation and early stop
57 | stopper.pretrain_step(epoch, model)
58 | elapsed = (time.time() - start)
59 | m, s = divmod(elapsed, 60)
60 | h, m = divmod(m, 60)
61 | print("An epoch time used:", "{:d}:{:d}:{:d}".format(int(h), int(m), int(s)))
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
--------------------------------------------------------------------------------
/experiment/build_pretrain_chirality_R_S.py:
--------------------------------------------------------------------------------
1 | from experiment.build_data import build_pretrain_chirality_R_S_contrastive_data_and_save
2 | import multiprocessing
3 | import pandas as pd
4 |
5 | task_name = 'chirality_pretrain_R_S'
6 | if __name__ == "__main__":
7 | n_thread = 8
8 | data = pd.read_csv('../data/ADMETlab_scaffold_augmentation/'+task_name+'_5_consensus_aug.csv')
9 | smiles_name_list = ['smiles', 'aug_smiles_0', 'aug_smiles_1', 'aug_smiles_2', 'aug_smiles_3']
10 | smiles_list = data[smiles_name_list].values.tolist()
11 | global_list = data['labels'].values.tolist()
12 |
13 | # 避免内存不足,将数据集分为10份来计算
14 | for i in range(10):
15 | n_split = int(len(smiles_list)/10)
16 | smiles_split = smiles_list[i*n_split:(i+1)*n_split]
17 | global_split = global_list[i*n_split:(i+1)*n_split]
18 |
19 | n_mol = int(len(smiles_split)/8)
20 |
21 | # creating processes
22 | p1 = multiprocessing.Process(target=build_pretrain_chirality_R_S_contrastive_data_and_save, args=(smiles_split[:n_mol], global_split[:n_mol],
23 | '../data/BERT_maccs_data/'+task_name+'_maccs_consensus_'+str(i*8+1)+'.npy'))
24 | p2 = multiprocessing.Process(target=build_pretrain_chirality_R_S_contrastive_data_and_save, args=(smiles_split[n_mol:2*n_mol], global_split[n_mol:2*n_mol],
25 | '../data/BERT_maccs_data/'+task_name+'_maccs_consensus_'+str(i*8+2)+'.npy'))
26 | p3 = multiprocessing.Process(target=build_pretrain_chirality_R_S_contrastive_data_and_save, args=(smiles_split[2*n_mol:3*n_mol], global_split[2*n_mol:3*n_mol],
27 | '../data/BERT_maccs_data/'+task_name+'_maccs_consensus_'+str(i*8+3)+'.npy'))
28 | p4 = multiprocessing.Process(target=build_pretrain_chirality_R_S_contrastive_data_and_save, args=(smiles_split[3*n_mol:4*n_mol], global_split[3*n_mol:4*n_mol],
29 | '../data/BERT_maccs_data/'+task_name+'_maccs_consensus_'+str(i*8+4)+'.npy'))
30 | p5 = multiprocessing.Process(target=build_pretrain_chirality_R_S_contrastive_data_and_save, args=(smiles_split[4*n_mol:5*n_mol], global_split[4*n_mol:5*n_mol],
31 | '../data/BERT_maccs_data/'+task_name+'_maccs_consensus_'+str(i*8+5)+'.npy'))
32 | p6 = multiprocessing.Process(target=build_pretrain_chirality_R_S_contrastive_data_and_save, args=(smiles_split[5*n_mol:6*n_mol], global_split[5*n_mol:6*n_mol],
33 | '../data/BERT_maccs_data/'+task_name+'_maccs_consensus_'+str(i*8+6)+'.npy'))
34 | p7 = multiprocessing.Process(target=build_pretrain_chirality_R_S_contrastive_data_and_save, args=(smiles_split[6*n_mol:7*n_mol], global_split[6*n_mol:7*n_mol],
35 | '../data/BERT_maccs_data/'+task_name+'_maccs_consensus_'+str(i*8+7)+'.npy'))
36 | p8 = multiprocessing.Process(target=build_pretrain_chirality_R_S_contrastive_data_and_save, args=(smiles_split[7*n_mol:], global_split[7*n_mol:],
37 | '../data/BERT_maccs_data/'+task_name+'_maccs_consensus_'+str(i*8+8)+'.npy'))
38 |
39 | # starting my_scaffold_split 1&2
40 | p1.start()
41 | p2.start()
42 | p3.start()
43 | p4.start()
44 | p5.start()
45 | p6.start()
46 | p7.start()
47 | p8.start()
48 |
49 | # wait until my_scaffold_split 1&2 is finished
50 | p1.join()
51 | p2.join()
52 | p3.join()
53 | p4.join()
54 | p5.join()
55 | p6.join()
56 | p7.join()
57 | p8.join()
58 |
59 |
60 | # both processes finished
61 | print("Done!")
62 |
63 |
64 |
65 |
--------------------------------------------------------------------------------
/experiment/downstream_task.py:
--------------------------------------------------------------------------------
1 | from experiment import build_data
2 | import torch
3 | from torch.optim import Adam
4 | from torch.utils.data import DataLoader
5 | from experiment.my_nn import collate_data, EarlyStopping, run_a_train_global_epoch, run_an_eval_global_epoch,\
6 | set_random_seed, K_BERT_WCL, pos_weight
7 | import os
8 | import numpy as np
9 | import pandas as pd
10 | set_random_seed()
11 |
12 |
13 | # fix parameters of model
14 | args = {}
15 | args['device'] = "cuda" if torch.cuda.is_available() else "cpu"
16 | args['metric_name'] = 'roc_auc'
17 | args['batch_size'] = 128
18 | args['num_epochs'] = 200
19 | args['d_model'] = 768
20 | args['n_layers'] = 6
21 | args['vocab_size'] = 47
22 | args['maxlen'] = 201
23 | args['d_k'] = 64
24 | args['d_v'] = 64
25 | args['d_ff'] = 768 * 4
26 | args['n_heads'] = 12
27 | args['global_labels_dim'] = 1
28 | args['atom_labels_dim'] = 15
29 | args['lr'] = 3e-5
30 | args['pretrain_layer'] = 5
31 | args['mode'] = 'higher'
32 | args['patience'] = 20
33 | args['times'] = 10
34 | args['pretrain_model'] = 'pretrain_k_bert_wcl_epoch_7.pth'
35 | # args['pretrain_model'] = 'pretrain_k_bert_epoch_7.pth'
36 |
37 | args['task_name_list'] = ['Pgp-sub', 'HIA', 'F(20%)', 'F(30%)', 'FDAMDD', 'CYP1A2-sub', 'CYP2C19-sub', 'CYP2C9-sub',
38 | 'CYP2D6-sub', 'CYP3A4-sub', 'T12', 'DILI', 'SkinSen', 'Carcinogenicity', 'Respiratory']
39 |
40 | for task in args['task_name_list']:
41 | args['task_name'] = task
42 | args['data_path'] = '../data/task_data/' + args['task_name'] + '.npy'
43 |
44 | all_times_train_result = []
45 | all_times_val_result = []
46 | all_times_test_result = []
47 | result_pd = pd.DataFrame()
48 | result_pd['index'] = ['roc_auc', 'accuracy', 'sensitivity', 'specificity', 'f1-score', 'precision', 'recall',
49 | 'error rate', 'mcc']
50 |
51 | for time_id in range(args['times']):
52 | set_random_seed(2020+time_id)
53 | train_set, val_set, test_set, task_number = build_data.load_data_for_random_splited(
54 | data_path=args['data_path'], shuffle=True
55 | )
56 | print("Molecule graph is loaded!")
57 | train_loader = DataLoader(dataset=train_set,
58 | batch_size=args['batch_size'],
59 | shuffle=True,
60 | collate_fn=collate_data)
61 |
62 | val_loader = DataLoader(dataset=val_set,
63 | batch_size=args['batch_size'],
64 | collate_fn=collate_data)
65 |
66 | test_loader = DataLoader(dataset=test_set,
67 | batch_size=args['batch_size'],
68 | collate_fn=collate_data)
69 | pos_weight_task = pos_weight(train_set)
70 | one_time_train_result = []
71 | one_time_val_result = []
72 | one_time_test_result = []
73 | print('***************************************************************************************************')
74 | print('{}, {}/{} time'.format(args['task_name'], time_id+1, args['times']))
75 | print('***************************************************************************************************')
76 |
77 | loss_criterion = torch.nn.BCEWithLogitsLoss(reduction='none', pos_weight=pos_weight_task.to(args['device']))
78 | model = K_BERT_WCL(d_model=args['d_model'], n_layers=args['n_layers'], vocab_size=args['vocab_size'],
79 | maxlen=args['maxlen'], d_k=args['d_k'], d_v=args['d_v'], n_heads=args['n_heads'], d_ff=args['d_ff'],
80 | global_label_dim=args['global_labels_dim'], atom_label_dim=args['atom_labels_dim'])
81 | stopper = EarlyStopping(patience=args['patience'], pretrained_model=args['pretrain_model'],
82 | pretrain_layer=args['pretrain_layer'],
83 | task_name=args['task_name']+'_downstream_k_bert_wcl', mode=args['mode'])
84 | model.to(args['device'])
85 | stopper.load_pretrained_model(model)
86 | optimizer = Adam(model.parameters(), lr=args['lr'])
87 | for epoch in range(args['num_epochs']):
88 | train_score = run_a_train_global_epoch(args, epoch, model, train_loader, loss_criterion, optimizer)
89 | # Validation and early stop
90 | _ = run_an_eval_global_epoch(args, model, train_loader)[0]
91 | val_score = run_an_eval_global_epoch(args, model, val_loader)[0]
92 | test_score = run_an_eval_global_epoch(args, model, test_loader)[0]
93 | if epoch < 5:
94 | early_stop = stopper.step(0, model)
95 | else:
96 | early_stop = stopper.step(val_score, model)
97 | print('epoch {:d}/{:d}, {}, lr: {:.6f}, train: {:.4f}, valid: {:.4f}, best valid {:.4f}, '
98 | 'test: {:.4f}'.format(
99 | epoch + 1, args['num_epochs'], args['metric_name'], optimizer.param_groups[0]['lr'], train_score, val_score,
100 | stopper.best_score, test_score))
101 | if early_stop:
102 | break
103 | stopper.load_checkpoint(model)
104 | train_score = run_an_eval_global_epoch(args, model, train_loader)[0]
105 | val_score = run_an_eval_global_epoch(args, model, val_loader)[0]
106 | test_score = run_an_eval_global_epoch(args, model, test_loader)[0]
107 | pred_name = 'prediction_' + str(time_id + 1)
108 | stop_test_list = run_an_eval_global_epoch(args, model, test_loader)
109 | stop_train_list = run_an_eval_global_epoch(args, model, train_loader)
110 | stop_val_list = run_an_eval_global_epoch(args, model, val_loader)
111 | result_pd['train_' + str(time_id + 1)] = stop_train_list
112 | result_pd['val_' + str(time_id + 1)] = stop_val_list
113 | result_pd['test_' + str(time_id + 1)] = stop_test_list
114 | print(result_pd[['index', 'train_' + str(time_id + 1), 'val_' + str(time_id + 1), 'test_' + str(time_id + 1)]])
115 | print('********************************{}, {}_times_result*******************************'.format(args['task_name'],
116 | time_id + 1))
117 | print("training_result:", round(train_score, 4))
118 | print("val_result:", round(val_score, 4))
119 | print("test_result:", round(test_score, 4))
120 |
121 | one_time_train_result.append(train_score)
122 | one_time_val_result.append(val_score)
123 | one_time_test_result.append(test_score)
124 | # except:
125 | # task_number = task_number - 1
126 | all_times_train_result.append(round(np.array(one_time_train_result).mean(), 4))
127 | all_times_val_result.append(round(np.array(one_time_val_result).mean(), 4))
128 | all_times_test_result.append(round(np.array(one_time_test_result).mean(), 4))
129 | # except:
130 | # print('{} times is failed!'.format(time_id+1))
131 | print("************************************{}_times_result************************************".format(
132 | time_id + 1))
133 | print('the train result of all tasks ({}): '.format(args['metric_name']), np.array(all_times_train_result))
134 | print('the average train result of all tasks ({}): {:.3f}'.format(args['metric_name'],
135 | np.array(all_times_train_result).mean()))
136 | print('the train result of all tasks (std): {:.3f}'.format(np.array(all_times_train_result).std()))
137 | print('the train result of all tasks (var): {:.3f}'.format(np.array(all_times_train_result).var()))
138 |
139 | print('the val result of all tasks ({}): '.format(args['metric_name']), np.array(all_times_val_result))
140 | print('the average val result of all tasks ({}): {:.3f}'.format(args['metric_name'],
141 | np.array(all_times_val_result).mean()))
142 | print('the val result of all tasks (std): {:.3f}'.format(np.array(all_times_val_result).std()))
143 | print('the val result of all tasks (var): {:.3f}'.format(np.array(all_times_val_result).var()))
144 |
145 | print('the test result of all tasks ({}):'.format(args['metric_name']), np.array(all_times_test_result))
146 | print('the average test result of all tasks ({}): {:.3f}'.format(args['metric_name'],
147 | np.array(all_times_test_result).mean()))
148 | print('the test result of all tasks (std): {:.3f}'.format(np.array(all_times_test_result).std()))
149 | print('the test result of all tasks (var): {:.3f}'.format(np.array(all_times_test_result).var()))
150 | result_pd.to_csv('../result/maccs/' + args['task_name'] + '_K_BERT_WCL_result.csv', index=False)
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
--------------------------------------------------------------------------------
/data/ADMETlab_data/CYP2C19-sub_canonical.csv:
--------------------------------------------------------------------------------
1 | smiles,group,CYP2C19-sub
2 | CCC1(c2ccccc2)C(=O)NC(=O)NC1=O,training,1
3 | C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CC[C@@H]2O,training,1
4 | CN(C)CCC=C1c2ccccc2CCc2ccccc21,training,1
5 | COc1ccc(CCN(C)CCCC(C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC,training,1
6 | CN1CCC[C@H]1c1cccnc1,training,1
7 | CN1C(=O)NC(=O)C(C)(C2=CCCCC2)C1=O,training,1
8 | O=C1NC(=O)C(c2ccccc2)(c2ccccc2)N1,training,1
9 | CCOC(=O)C1(c2ccccc2)CCN(C)CC1,training,1
10 | CC(=O)[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,training,1
11 | C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2O,training,1
12 | Cc1c(N(C)C)c(=O)n(-c2ccccc2)n1C,training,1
13 | CN(C)CCCN1c2ccccc2Sc2ccccc21,training,1
14 | Cc1cc(=O)n(-c2ccccc2)n1C,training,1
15 | CNCCC=C1c2ccccc2CCc2ccccc21,training,1
16 | CCCC(C)C1(CC)C(=O)NC(=O)NC1=O,training,1
17 | CCC(=O)C(CC(C)N(C)C)(c1ccccc1)c1ccccc1,training,1
18 | CN1C(=O)CC(C)(c2ccccc2)C1=O,training,1
19 | CCCC(C)(COC(N)=O)COC(=O)NC(C)C,training,1
20 | Nc1ccc(S(=O)(=O)c2ccc(N)cc2)cc1,training,1
21 | CC(=O)CC(c1ccccc1)c1c(O)c2ccccc2oc1=O,training,1
22 | CN(C)CCC(c1ccc(Br)cc1)c1ccccn1,training,1
23 | CCCC(CCC)C(=O)O,training,1
24 | CCC1(c2ccccc2)C(=O)NC(=O)N(C)C1=O,training,1
25 | CCC1(c2ccccc2)C(=O)NCNC1=O,training,1
26 | COc1ccc2c(c1)[C@]13CCCC[C@@H]1[C@H](C2)N(C)CC3,training,1
27 | CN1C(=O)OC(C)(C)C1=O,training,1
28 | C=C[C@H]1CN2CC[C@H]1C[C@H]2[C@H](O)c1ccnc2ccc(OC)cc12,training,1
29 | CCC(=O)C1(c2cccc(O)c2)CCN(C)CC1,training,1
30 | CN(C)CCCOc1nn(Cc2ccccc2)c2ccccc12,training,1
31 | CN(C)CCC=C1c2ccccc2COc2ccccc21,training,1
32 | O=C1CN=C(c2ccccn2)c2cc(Br)ccc2N1,training,1
33 | CCCCc1oc2ccccc2c1C(=O)c1cc(I)c(OCCN(CC)CC)c(I)c1,training,1
34 | CCCCCc1cc(O)c2c(c1)OC(C)(C)[C@@H]1CCC(C)=C[C@@H]21,training,1
35 | O=P1(NCCCl)OCCCN1CCCl,training,1
36 | CN1CCN(C2=Nc3cc(Cl)ccc3Nc3ccccc32)CC1,training,1
37 | COc1ccc(Cl)cc1C(=O)NCCc1ccc(S(=O)(=O)NC(=O)NC2CCCCC2)cc1,training,1
38 | CC/C(=C(\c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1,training,1
39 | CO[C@H]1/C=C/O[C@@]2(C)Oc3c(C)c(O)c4c(O)c(c(/C=N/N5CCN(C)CC5)c(O)c4c3C2=O)NC(=O)/C(C)=C\C=C\[C@H](C)[C@H](O)[C@@H](C)[C@@H](O)[C@@H](C)[C@H](OC(C)=O)[C@@H]1C,training,1
40 | O=C(O)Cc1ccccc1Nc1c(Cl)cccc1Cl,training,1
41 | CN(C)CCOC1=Cc2ccccc2Sc2ccc(Cl)cc21,training,1
42 | CC1(C)S[C@@H]2[C@H](NC(=O)[C@H](N)c3ccc(O)cc3)C(=O)N2[C@H]1C(=O)O,training,1
43 | CC(C)(C)NCC(O)COc1nsnc1N1CCOCC1,training,1
44 | COc1cc([C@@H]2c3cc4c(cc3[C@@H](O[C@@H]3O[C@@H]5CO[C@@H](c6cccs6)O[C@H]5[C@H](O)[C@H]3O)[C@H]3COC(=O)[C@H]23)OCO4)cc(OC)c1O,training,1
45 | Cc1cn([C@H]2C[C@H](N=[N+]=[N-])[C@@H](CO)O2)c(=O)[nH]c1=O,training,1
46 | CCc1cc2c(s1)N(C)C(=O)CN=C2c1ccccc1Cl,training,1
47 | Fc1ccccc1C1=NCC(=S)N(CC(F)(F)F)c2ccc(Cl)cc21,training,1
48 | CN(C)Cc1nnc2n1-c1ccc(Cl)cc1C(c1ccccc1)=NC2,training,1
49 | CCCCN1CCCCC1C(=O)Nc1c(C)cccc1C,training,1
50 | CCc1cc2c(s1)-n1c(C)nnc1CN=C2c1ccccc1Cl,training,1
51 | COc1ccc(CC(C)NCC(O)c2ccc(O)c(NC=O)c2)cc1,training,1
52 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CCCC[C@@H]4[C@H]3C(=C)C[C@@]21CC,training,1
53 | CNCCC(Oc1ccc(C(F)(F)F)cc1)c1ccccc1,training,1
54 | O=C(C1CCCCC1)N1CC(=O)N2CCc3ccccc3C2C1,training,1
55 | CN1CCCC(n2nc(Cc3ccc(Cl)cc3)c3ccccc3c2=O)CC1,training,1
56 | CN(C)CCCC1(c2ccc(F)cc2)OCc2cc(C#N)ccc21,training,1
57 | CC1(C)NC(=O)N(c2ccc([N+](=O)[O-])c(C(F)(F)F)c2)C1=O,training,1
58 | NS(=O)(=O)Cc1noc2ccccc12,training,1
59 | O=C(NCCN1CCOCC1)c1ccc(Cl)cc1,training,1
60 | CO[C@H]1O[C@@H]2O[C@@]3(C)CC[C@H]4[C@H](C)CC[C@@H]([C@H]1C)[C@]42OO3,training,1
61 | COc1ccc2nc(S(=O)Cc3ncc(C)c(OC)c3C)[nH]c2c1,training,1
62 | COc1cc2nc(N3CCN(C(=O)C4COc5ccccc5O4)CC3)nc(N)c2cc1OC,training,1
63 | CN[C@H]1CC[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc21,training,1
64 | COc1cc(N)c(Cl)cc1C(=O)N[C@@H]1CCN(CCCOc2ccc(F)cc2)C[C@@H]1OC,training,1
65 | CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(OC)C[C@@H](C)C(=O)[C@H](C)[C@@H](O)[C@]1(C)O,training,1
66 | CNCC[C@@H](Oc1ccccc1C)c1ccccc1,training,1
67 | O=C1NC(c2ccccc2)(c2ccccc2)C(=O)N1COP(=O)(O)O,training,1
68 | CC(=O)N[C@@H]1[C@@H]([C@@H](O)[C@H](O)CO)O[C@@](Oc2ccc3c(C)cc(=O)oc3c2)(C(=O)O)C[C@H]1O,training,0
69 | CCCCCCCCCCCCCCCCCCCCCCO,training,0
70 | CN1c2c(oc(=O)n(-c3ccccn3)c2=O)-c2ccccc2S1(=O)=O,training,0
71 | CC(CN1c2ccccc2Sc2cccnc21)N(C)C,training,0
72 | CCOC(=O)C1(c2ccccc2)CCN(CCC(C#N)(c2ccccc2)c2ccccc2)CC1,training,0
73 | CCC(=O)[C@@]1(C)[C@H](C)C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@H]3[C@@H](O)C[C@@]21C,training,0
74 | Clc1ccc(CSC(Cn2ccnc2)c2ccc(Cl)cc2Cl)cc1,training,0
75 | CCCCCCCCCCCCCCCC(=O)OC[C@@H](COP(=O)(O)OCCN)OC(=O)CCCCCCCCCCCCCCC,training,0
76 | CC(N)=O,training,0
77 | Clc1ccc(C(OCCN2CCCCC2)c2ccccc2)cc1,training,0
78 | Cn1cnc(N)c2ncnc1-2,training,0
79 | CC1=C(C(=O)O)N2C(=O)[C@@H](NC(=O)[C@H](N)c3ccc(O)cc3)[C@H]2SC1,training,0
80 | O=c1ccn([C@H]2C[C@@H](O)[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O2)c(=O)[nH]1,training,0
81 | C[C@H](N)C(=O)O,training,0
82 | O=C(NC(=O)c1cc(F)c(F)cc1Cl)Nc1ccc(C(=O)O)cc1OC(F)(F)F,training,0
83 | C[N+](C)(C)CCO.[Cl-],training,0
84 | NCC1CCC(C(=O)O)CC1,training,0
85 | Cc1cc(N)nc(CCNC(=O)c2ccc(C#N)cc2)c1,training,0
86 | CCNC(=O)c1cn2ncnc(Nc3cc(C(=O)NOC)ccc3C)c2c1C,training,0
87 | O=C(O)C(CP(=O)(O)O)=C(Cl)Cl,training,0
88 | O=C(CO)[C@@H](O)[C@H](O)COP(=O)(O)O,training,0
89 | CCN(CC)CCOc1ccc(C(=C(Cl)c2ccccc2)c2ccccc2)cc1,training,0
90 | O=C([O-])[O-].O=C([O-])[O-].O=C([O-])[O-].[La+3].[La+3],training,0
91 | Cc1nc(-c2ccccn2)nc(NCCc2ccccc2)c1Cl,training,0
92 | CC(C)(C)Nc1nc(C(F)(F)F)nc2ccc(-c3cccc(N)c3)cc12,training,0
93 | NC(=O)c1ccc(Nc2nc(OCC3CCCCC3)c3nc[nH]c3n2)cc1,training,0
94 | O=C1CCO1,training,0
95 | C=CC(=O)Nc1cc(Nc2nccc(-c3cn(C)c4ccccc34)n2)c(OC)cc1N(C)CCN(C)C,training,0
96 | C[N+]1(CC(=O)c2ccc(-c3ccccc3)cc2)[C@H]2CC[C@@H]1CC(OC(=O)[C@H](CO)c1ccccc1)C2,training,0
97 | C[C@H](Cc1cc2c(c(C(N)=O)c1)N(CCCO)CC2)NCCOc1ccccc1OCC(F)(F)F,training,0
98 | O=C(O)C1=C[C@@H](O)[C@H](OS(=O)(=O)O)CO1,training,0
99 | O=C(O)CCc1nc2c(=O)[nH]c(=O)[nH]c2n(C[C@H](O)[C@@H](O)[C@H](O)CO)c1=O,training,0
100 | O=C1CN(S(=O)(=O)/C=C/c2ccc(Cl)s2)CCN1Cc1cc2cnccc2[nH]1,training,0
101 | O=C(O)c1ccc(Cc2ccccc2Cl)o1,training,0
102 | CCCCCC(=O)N[C@@H](CCS)C(=O)O,training,0
103 | O=CN(O)CCOc1cccc(C(=O)c2ccccc2)c1,training,0
104 | CC(C)Nc1cc(NC2CCC(N)CC2)nc2c(C#N)cnn12,training,0
105 | OC(c1ccccc1)(c1ccccc1)C12CC[N+](CCOCc3ccccc3)(CC1)CC2,training,0
106 | CC(=O)O[C@H](C(=O)Nc1cccc2c1C(=O)C(=O)NC2=O)c1cccc(Cl)c1,training,0
107 | CCCOc1ccnc2[nH]cc(-c3ccnc(N)n3)c12,training,0
108 | C[C@H]1CC(=O)NN=C1c1ccc(NC2=C(Cc3cccc(I)c3)C(=O)CCC2)cc1,training,0
109 | Nc1nc(=O)n([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@@H]2O)cc1Br,training,0
110 | N[C@H](COc1cncc2nc(-c3ccncc3)ccc12)Cc1c[nH]c2ccccc12,training,0
111 | CNS(=O)(=O)Nc1ccc(-c2ccccc2)n(CC(=O)N[C@H](C(=O)C(F)(F)F)C(C)C)c1=O,training,0
112 | COC(=O)[C@@H](C)c1ccc(-c2ccccc2)c(F)c1,training,0
113 | COc1ccc(F)c(F)c1C(=O)c1cnc(NC2CCN(S(C)(=O)=O)CC2)nc1N,training,0
114 | O=C(Nc1ccccc1)Nc1ccccc1,training,0
115 | N#Cc1ccc(C(c2ccc(C#N)cc2)n2cncn2)cc1,training,0
116 | O=C(N/N=C/c1cc(Br)c(O)c(Br)c1O)c1ccc(Cl)cc1,training,0
117 | O=C(O)CCc1c[nH]c2ccccc12,training,0
118 | Cc1cn[nH]c1,training,0
119 | CCC(=O)CCC(C)(C)O,training,0
120 | C=CCN(C)CCCCCCOc1ccc2c(-c3ccc(Br)cc3)coc2c1,training,0
121 | O=C(O)[C@@H](Cc1c[nH]c2ccccc12)NS(=O)(=O)c1ccc(N2CCC(c3ccccc3)CC2)cc1,training,0
122 | O=C(O)CCCc1cccs1,training,0
123 | COc1ccc(S(=O)(=O)N2CCc3cc(O)c(OC)cc3[C@@H]2C(=O)NO)cc1,training,0
124 | O=S(=O)(CCNCCCO)c1ccc(Nc2nc(OCC3CCCCC3)c3nc[nH]c3n2)cc1,training,0
125 | CC(C)(C)OC(=O)N[C@H](C=O)C1CCCCC1,training,0
126 | Cc1ccc(C(=O)c2ccc(CC(=O)O)n2C)cc1,training,0
127 | Clc1ccc2c(c1Cl)CNCCC2,training,0
128 | CN(C)C=O,training,0
129 | CC[C@H]1[C@@H]2C[C@H]3[C@@H]4N(C)c5ccccc5[C@]45C[C@@H](C2[C@H]5O)N3[C@@H]1O,training,0
130 | CN1CCN(c2ccc(C(=O)Nc3n[nH]c4cn(C(=O)Cc5cccs5)cc34)cc2)CC1,training,0
131 | Cc1cc(C)cc(Oc2c(CSCc3ccco3)c(C)[nH]c(=O)c2I)c1,training,0
132 | CC(CCc1ccccc1)NCC(O)c1ccc(O)c(C(N)=O)c1,training,0
133 | Oc1ccc([C@H]2Sc3cc(O)ccc3O[C@@H]2c2ccc(OCCN3CCCCC3)cc2)cc1,training,0
134 | CC(C)(C)c1cc(C(C)(C)C)c(NC(=O)c2c[nH]c3ccccc3c2=O)cc1O,training,0
135 | CN1CCc2cccc3c2[C@H]1Cc1ccc(O)c(O)c1-3,training,0
136 | CC(=O)N1N=C(c2ccccc2Cl)C[C@H]1c1cccc(O)c1,training,0
137 | CNCC(=O)c1nnc(C(C)(C)C)o1,training,0
138 | C[C@@H]1CCN(C(=O)CC#N)C[C@@H]1N(C)c1ncnc2[nH]ccc12,training,0
139 | Cc1ncc(COP(=O)(O)O)c(CNCCCC[C@@H](N)C(=O)O)c1O,training,0
140 | N[C@H](C(=O)O)c1ccc(O[C@H]2O[C@@H](CO)[C@H](O)[C@@H](O)[C@H]2O[C@H]2O[C@@H](CO)[C@H](O)[C@@H](O)[C@@H]2O)cc1,training,0
141 | NC(=O)c1cnccn1,training,0
142 | CCC1(CC)C(=O)NCC(C)C1=O,training,0
143 | O=[N+]([O-])c1ccc(OP(=O)(O)Oc2ccc([N+](=O)[O-])cc2)cc1,training,0
144 | OC[C@@H]1O[C@H](O[C@@H]2[C@@H](O)C[C@@H](O)O[C@@H]2CO)[C@@H](O)[C@H](O)[C@H]1O,training,0
145 | CC(=O)O[C@H]1C[C@@H]2CC[C@@H]3[C@H](CC[C@@]4(C)[C@H]3C[C@H]([N+]3(C)CCCCC3)[C@@H]4OC(C)=O)[C@@]2(C)C[C@@H]1N1CCCCC1,training,0
146 | CCN(CC)CC#CCOC(=O)C(O)(c1ccccc1)C1CCCCC1,training,0
147 | Cc1cccc2c(=O)[nH]c(CCCN3CC=C(c4ccc(F)cc4)CC3)nc12,training,0
148 | O=C(NO)[C@@H](O)[C@@H](O)COP(=O)(O)O,training,0
149 | C[C@@H](CC(=O)O)[C@H](N)C(=O)O,training,0
150 | O=C(O)c1sccc1S(=O)(=O)Nc1ccc(Cl)cc1,training,0
151 | c1ccc(C2CC2)c(OCC2=NCCN2)c1,training,0
152 | NC(=O)c1ncn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c1N,training,0
153 | CC(SC(=O)c1cccs1)C(=O)NCC(=O)O,training,0
154 | C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1,training,0
155 | NS(=O)(=O)c1ccc(C(=O)NCc2ccc(F)cc2F)cc1,training,0
156 | c1ccc2c(CCc3ccncc3)c[nH]c2c1,training,0
157 | Nc1nc(N)nc(-c2cc3ccccc3cc2Br)n1,training,0
158 | NC(=O)C1=CN([C@@H]2O[C@@H](COP(=O)([O-])OP(=O)([O-])OC[C@@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H]([O-])[C@H]3[O-])[C@H](O)[C@H]2O)C=C[C@H]1n1cc2c(=S)n([C@@H](C(=O)N3CCCCCC3)c3ccccc3)cnc2n1,training,0
159 | CC=CC1=C(C(=O)O)N2C(=O)[C@@H](NC(=O)[C@H](N)c3ccc(O)cc3)[C@H]2SC1,training,0
160 | NC(=O)C1=C[N+]=CC([C@@H]2O[C@@H](COP(=O)([O-])OP(=O)(O)OC[C@@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@H]3O)[C@H](O)[C@H]2O)=C1,training,0
161 | O=C(O)C[C@H](N=C(O)CP(=O)(O)O)C(=O)O,training,0
162 | O=C(O)c1ccccc1/N=N/c1ccc(O)c2ccccc12,training,0
163 | CCC(C)n1ncn(-c2ccc(N3CCN(c4ccc(OC[C@H]5CO[C@](Cn6cncn6)(c6ccc(Cl)cc6Cl)O5)cc4)CC3)cc2)c1=O,training,0
164 | Cc1ncc(C[n+]2csc(CCO)c2C)c(N)n1,training,0
165 | CC(C)(COP(=O)(O)OP(=O)(O)OC[C@@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCC[C@H](F)C(=O)O,training,0
166 | Nc1cccc(-c2cnn3ccc(Nc4cccc(Cl)c4)nc23)c1,training,0
167 | O=c1cc[nH]c(=O)[nH]1,training,0
168 | COCCOc1cc(C)nc(N)n1,training,0
169 | Oc1ccc(C2=Cc3ccc(O)cc3OC2)cc1,training,0
170 | O=P1(N(CCCl)CCCl)NCCCO1,training,1
171 | CSc1ccc2c(c1)N(CCC1CCCCN1C)c1ccccc1S2,training,1
172 | CCCC(C)(COC(N)=O)COC(N)=O,training,1
173 | COc1ccc2[nH]cc(CCNC(C)=O)c2c1,training,1
174 | CN(C)CCOC(c1ccc(Cl)cc1)c1ccccn1,training,1
175 | Cc1cnc(C(=O)NCCc2ccc(S(=O)(=O)NC(=O)NC3CCCCC3)cc2)cn1,training,1
176 | CN/C(=C\[N+](=O)[O-])NCCSCc1ccc(CN(C)C)o1,training,1
177 | NC(N)=Nc1nc(CSCCC(N)=NS(N)(=O)=O)cs1,training,1
178 | Cc1ccc(-c2nc3ccc(C)cn3c2CC(=O)N(C)C)cc1,training,1
179 | COc1ccc(-c2oc3ncnc(NCCO)c3c2-c2ccc(OC)cc2)cc1,training,0
180 | CN(C)c1cccc2c(S(=O)(=O)N[C@H](Cc3ccc(OS(=O)(=O)c4cccc5c(N(C)C)cccc45)cc3)C(=O)O)cccc12,training,0
181 | Nc1nc(=O)c2sc(=O)n([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O)c2[nH]1,training,0
182 | CC12C=CC(=O)C=C1CCC1C2C(O)CC2(C)C1CCC2(O)C(=O)COC(=O)c1cccc(S(=O)(=O)[O-])c1.[Na+],training,0
183 | C[C@]12CC[C@H]3[C@@H](CC=C4C[C@@H](O)CC[C@@]43C)[C@@H]1CC[C@@H]2O,training,0
184 | C[C@H](O)[C@H](CCc1cccc2nc(-c3ccc(Cl)cc3)oc12)n1cnc(C(N)=O)c1,training,0
185 | NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,training,0
186 | C[C@]12CC[C@H]3[C@@H](CC[C@H]4C[C@@H](O)CC[C@@]43C)[C@@H]1C[C@@H](Br)C2=O,training,0
187 | O=CN1CCCCC1,training,0
188 | Cc1cccc(Nc2ccccc2C(=O)O)c1C,training,0
189 | CCN[C@@H]1CN(CCOC)S(=O)(=O)c2sc(S(N)(=O)=O)cc21,training,0
190 | C=CC(C)(C)OC[C@@H]1O[C@H](O[C@H]2C3=C([C@H](C)COC(C)=O)C[C@@H](O)[C@]3(C)C=C3[C@H](COC)CC[C@@H]3[C@@H](C)[C@@H]2O)[C@@H](O)[C@H](OC(C)=O)[C@H]1O,training,0
191 | CCC1(c2ccccc2)NC(=O)N(C)C1=O,training,1
192 | O=C1CCC(N2C(=O)c3ccccc3C2=O)C(=O)N1,training,1
193 | O=C(CCCN1CCC(O)(c2ccc(Cl)cc2)CC1)c1ccc(F)cc1,training,1
194 | COc1ccc2c(c1)c(CC(=O)O)c(C)n2C(=O)c1ccc(Cl)cc1,training,1
195 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCc4cc(O)ccc4[C@H]3CC[C@@]21C,training,1
196 | OCCN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1,training,1
197 | CN1[C@H]2CC[C@@H]1CC(OC(c1ccccc1)c1ccccc1)C2,training,1
198 | CN1C(=O)CC(=O)N(c2ccccc2)c2cc(Cl)ccc21,training,1
199 | COc1ccc(C(CN(C)C)C2(O)CCCCC2)cc1,training,1
200 | N=C(N)c1ccc(C=Cc2ccc(C(=N)N)cc2O)cc1.O=S(=O)(O)CCO.O=S(=O)(O)CCO,training,0
201 | O=C1CC[C@@]2(O)[C@H]3Cc4ccc(O)c5c4[C@@]2(CCN3CC2CC2)[C@H]1O5,training,0
202 | COc1ccc(N2CCN(c3cccc(C)c3)CC2)nn1,training,0
203 | O=C(N[C@@H]1Cc2ccccc2[C@H]1O)c1cc2cc(Cl)sc2[nH]1,training,0
204 | O=S1(=O)NCCCNc2nc(ncc2Br)Nc2cccc1c2,training,0
205 | N=C(O)CN1CCCC1=O,training,0
206 | O=C(O)[C@@H](CF)OP(=O)(O)O,training,0
207 | NC(=O)c1c[n+]([C@@H]2O[C@@H](COP(=O)(O)OP(=O)(O)OC[C@@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@H]3O)[C@H](O)[C@H]2O)ccc1[C@H](O)C(=O)C[C@H](N)C(=O)O,training,0
208 | CC(C)(C)c1snc([O-])c1C[C@H]([N+])C(=O)O,training,0
209 | Cc1ccc(-n2nccn2)c(C(=O)N2CCN(c3nc4cc(Cl)ccc4o3)CC[C@H]2C)c1,training,0
210 | CCCc1cc(=O)[nH]c(=S)[nH]1,training,0
211 | CCC[C@H]1COc2ccsc2C(=N)N1,training,0
212 | CN[C@@H]1C[C@H](C)S(=O)(=O)c2sc(S(N)(=O)=O)cc21,training,0
213 | NNCCc1ccccc1,test,1
214 | O=C(N[C@H](CO)[C@H](O)c1ccc([N+](=O)[O-])cc1)C(Cl)Cl,test,1
215 | CCOc1ccc(NC(C)=O)cc1,test,1
216 | COc1cc(CNC(=O)CCCC/C=C/C(C)C)ccc1O,test,1
217 | CC(C)/N=C(\N)N=C(N)Nc1ccc(Cl)cc1,test,1
218 | C#CCN(C)C(C)Cc1ccccc1,test,1
219 | CC(C)Cc1ccc(C(C)C(=O)O)cc1,test,1
220 | COC(=O)[C@@H](NC(=O)[C@@H](NC(=O)CC[C@H](O)[C@H](Cc1ccccc1)NC(=O)[C@@H](C)NC(=O)[C@H](C)N)C(C)C)C(C)C,test,0
221 | COc1ccc(CC(N)=O)cc1,test,0
222 | NC(N)=NCc1cccc(I)c1,test,0
223 | COc1ccc(S(=O)(=O)N[C@H](C)C(=O)O)cc1,test,0
224 | N[C@@H](CSCCC(=O)c1ccc(Cl)c(Cl)c1)C(=O)O,test,0
225 | O[C@H](CCCl)c1ccccc1,test,0
226 | N#C[C@@H](O)c1ccccc1,test,0
227 | CC(C)c1cccc(C(C)C)c1O,test,1
228 | N[C@H]1C(O)O[C@H](CO)[C@@H](O)[C@@H]1O,test,1
229 | CC(=O)N[C@H]1[C@H](O)[C@@H](F)[C@H](C(=O)O)O[C@H]1[C@H](O)[C@@H](O)CO,test,0
230 | N[C@H]1[C@H](O)O[C@H](CO)[C@@H](OP(=O)(O)O)[C@@H]1O,test,0
231 | CC(=O)N[C@@H](Cc1cccc2ccccc12)[B-](O)(O)O,test,0
232 | CNCCCN1c2ccccc2CCc2ccccc21,test,1
233 | CN(C)CCCN1c2ccccc2CCc2ccccc21,test,1
234 | CN1C(=O)C(O)N=C(c2ccccc2)c2cc(Cl)ccc21,test,1
235 | CN1C(=O)CN=C(c2ccccc2F)c2cc([N+](=O)[O-])ccc21,test,1
236 | CCCCNC(=O)NS(=O)(=O)c1ccc(C)cc1,val,1
237 | CCCNC(=O)NS(=O)(=O)c1ccc(Cl)cc1,val,1
238 | CC(C)C(=O)Nc1ccc([N+](=O)[O-])c(C(F)(F)F)c1,val,1
239 | CN(C)c1ccc(C(=O)O)cc1,val,0
240 | CC(=O)Nc1ccc(C(=O)O)cc1N,val,0
241 | COCCCCC(=NOCCN)c1ccc(C(F)(F)F)cc1,val,0
242 | CN(C)C(=O)Oc1cccc([N+](C)(C)C)c1,val,0
243 | COc1cccc(C=O)c1OP(=O)(O)O,val,0
244 | COC[C@@H](O)[C@H](CC(C)C)NC(=O)[C@H](CC(C)C)NC(=O)OCc1ccccc1,val,0
245 | COc1ccc(/C=C/C(=O)O)cc1O,val,0
246 | CC(=O)C(N)Cc1ccccc1,val,0
247 | NCCc1ccc(O)c(O)c1,val,1
248 | O=P(O)(O)OCCNS(=O)(=O)c1ccc(OC(F)(F)F)cc1,val,0
249 | COCCc1ccc(OCC(O)CNC(C)C)cc1,val,1
250 | O=C(O)/C=C/c1ccc(B(O)O)cc1,val,0
251 | CC(=O)N[C@H]1[C@H](O)O[C@H](CO)[C@@H](OS(=O)(=O)O)[C@@H]1O,val,0
252 | OC1OC[C@H](O)[C@@H](O)[C@@H]1O,val,0
253 | CC(C)NCC(O)COc1cccc2ccccc12,val,1
254 | CN(C/C=C/C#CC(C)(C)C)Cc1cccc2ccccc12,val,1
255 | CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21,val,1
256 | CC(CN(C)C)CN1c2ccccc2CCc2ccccc21,val,1
257 | O=C1Nc2ccc(Cl)cc2C(c2ccccc2Cl)=NC1O,val,0
258 | CN1C(=O)CN=C(c2ccccc2F)c2cc(Cl)ccc21,val,0
259 | CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,val,1
260 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 | 1632899824522
165 |
166 |
167 | 1632899824522
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
--------------------------------------------------------------------------------
/data/ADMETlab_data/SkinSen_canonical.csv:
--------------------------------------------------------------------------------
1 | smiles,group,SkinSen
2 | CC(=O)OCC(=O)C1(O)C(C)CC2C3CCC4=CC(=O)C=CC4(C)C3=CCC21C,training,0
3 | CCCOc1ccc(Br)c(C(=O)c2ccc(OC)cc2O)c1,training,0
4 | CC=C(C)C=O,training,0
5 | O=C1C(=P(c2ccccc2)(c2ccccc2)c2ccccc2)CCN1c1ccccc1,training,0
6 | C=CC1(C)CC(OC(=O)CSC2CC3CCC(C2)N3C)C2(C)C(C)CCC3(CCC(=O)C32)C(C)C1O,training,0
7 | CCCCc1ccc(C(=O)CC(=O)c2cc(C)c(C)c(C)c2C)cc1,training,0
8 | NN1C=CC=CN1,training,0
9 | CCOc1ccc(C(=O)Cc2ccc(S(C)(=O)=O)cc2)cc1,training,0
10 | CC(C)(C)OC(=O)N1CC(F)CC1C(=O)O,training,0
11 | Cc1nc(COc2ccc(CC(NC(=O)OC(C)(C)C)C3CO3)cc2)cs1,training,0
12 | CC(C)(C)OC(=O)NC(C(=O)N1CC(F)CC1C#N)C(c1ccc(F)cc1)c1ccc(F)cc1,training,0
13 | O=c1[nH]c(=S)[nH]c2c1CCC2,training,0
14 | Cc1ccc(C)c(C(=O)CC(=O)c2cc(C)ccc2C)c1,training,0
15 | CC1C(c2ccccc2)NC(=O)N1C,training,0
16 | CCCOc1ccc(C=CC(=O)N2C(=O)N(C)C(C)C2c2ccccc2)c(C(=O)c2ccc(OC)cc2OCc2ccccc2)c1,training,0
17 | COc1ccc(C2(C#N)CCC(=O)CC2)cc1OC1CCCC1,training,0
18 | CC(=O)c1ccc(Cc2ccc(F)cc2)o1,training,0
19 | CCOc1ccc(-c2nn3ncccc3c2-c2ccc(S(C)(=O)=O)cc2)cc1,training,0
20 | CC(=O)CCCN1C(=O)c2ccccc2C1=O,training,0
21 | Cc1nc(COc2ccc(CC(NC(=O)OC3COC4OCCC34)C(O)CN(CC(C)C)S(=O)(=O)c3ccc4c(c3)OCO4)cc2)cs1,training,0
22 | CC(C)N(C(=O)Cn1c(=O)c(=NOCc2ccccc2)c(=O)n(-c2ccccc2)c2ccccc21)c1ccccc1,training,0
23 | Cc1c2ccc(N)cc2nn1C,training,0
24 | COc1cc(C)c2c(Oc3cccc(C(F)(F)F)c3)c(OC)cc(N)c2n1,training,0
25 | CC(=O)C1CCCCC1=O,training,0
26 | CS(=O)(=O)CCN,training,0
27 | C=C(C)C(=O)OCC(C)O,training,0
28 | CCC(C)C1C(=O)NC(C2Cc3ccccc3C2)C(=O)N1C(C(=O)N1CCOCC1)c1coc(C)n1,training,0
29 | O=C(O)c1c(O)c(-c2ccccc2)nc2ccccc12,training,0
30 | CC1C=CC(C(C)C)CC1,training,0
31 | O=Cc1ccc(-c2ccc(C(F)(F)F)cc2)cc1,training,0
32 | COc1ccc(C2(C#N)CCC(C(=O)O)CC2)cc1OC1CCCC1,training,0
33 | NC(=O)C1CC(F)CN1,training,0
34 | C=C1CC(C(=C)C)CCC1C,training,0
35 | C=C1CCC(C(C)C)CC1,training,0
36 | Cc1ccc(Nc2nccc(N(C)c3ccc4c(C)n(C)nc4c3)n2)cc1S(N)(=O)=O,training,0
37 | COc1cc(C)c2c(Cl)c(OC)ccc2n1,training,0
38 | Cc1cc2c(cc1C(F)(F)F)NCC2,training,0
39 | C=CC1(C)CC(OC(=O)CO)C2(C)C(C)CCC3(CCC(=O)C32)C(C)C1O,training,0
40 | Cc1ccc2oc(=O)ccc2c1,training,0
41 | O=C(O)CCCCC(=O)O,training,0
42 | CCNc1nc(Cl)nc(NC(C)C)n1,training,0
43 | CCCCCCCCCBr,training,0
44 | C=C1CC=C(C(C)C)CC1,training,0
45 | CCCCO,training,0
46 | CCCCCCCCCCl,training,0
47 | CCC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(OC)CC(C)C(=O)C(C)C(O)C1(C)O,training,0
48 | O=c1ccc2ccccc2o1,training,0
49 | Nc1cc[nH]c(=O)n1,training,0
50 | COC(=O)CCC(C#N)(CCC(=O)OC)c1ccc(OC)c(OC2CCCC2)c1,training,0
51 | COC(=O)OC,training,0
52 | CCOC(=O)C(=NOC(C)(C)C(=O)OC(C)(C)C)c1csc(NC(c2ccccc2)(c2ccccc2)c2ccccc2)n1,training,0
53 | CCOC(=O)c1nnc[nH]1,training,0
54 | CCOC(=O)CC(=O)c1cc(F)c(Cl)nc1Cl,training,0
55 | O=C(C(=O)c1ccco1)c1ccco1,training,0
56 | OCC(O)CO,training,0
57 | CCCCCC,training,0
58 | CCCCCCI,training,0
59 | CC(C)O,training,0
60 | CC(C)N(C#N)C#N,training,0
61 | NCC1OC(OC2C(N)CC(N)C(OC3OC(CO)C(O)C(N)C3O)C2O)C(O)C(O)C1O,training,0
62 | CC(O)C(=O)O,training,0
63 | Cc1c2ccc(Nc3ccnc(Cl)n3)cc2nn1C,training,0
64 | Cc1c2ccc(N(C)c3ccnc(Cl)n3)cc2nn1C,training,0
65 | CCCCN1CCC(CNC(=O)c2c3n(c4ccccc24)CCCO3)CC1,training,0
66 | CC(C)(C)OC(=O)NC(C(=O)O)C(c1ccc(F)cc1)c1ccc(F)cc1,training,0
67 | CCCCCCCC(=O)O,training,0
68 | CCOc1cc(Oc2ccc(C(F)(F)F)cc2Cl)ccc1[N+](=O)[O-],training,0
69 | C=CC(=O)OCC(CO)(COC(=O)C=C)COC(=O)C=C,training,0
70 | Cc1cc(F)ccc1C1CC(=O)C=CN1C(=O)OCc1ccccc1,training,0
71 | Fc1ccc(Oc2ccnc3cc(Cl)cc(Cl)c23)cc1,training,0
72 | COC1C=COC2(C)Oc3c(C)c(O)c4c(O)c(cc(O)c4c3C2=O)NC(=O)C(C)=CC=CC(C)C(O)C(C)C(O)C(C)C(OC(C)=O)C1C,training,0
73 | O=C1NS(=O)(=O)c2ccccc21,training,0
74 | O=C(O)CCC(=O)O,training,0
75 | O=C(O)C(O)C(O)C(=O)O,training,0
76 | C=CC(=O)OCC(CC)(COC(=O)C=C)COC(=O)C=C,training,0
77 | C=C(Cl)Cl,training,0
78 | CCCCCCCC=CC=O,training,1
79 | O=c1[nH]sc2ccccc12,training,1
80 | N#CCCC(Br)(C#N)CBr,training,1
81 | C=CC(=O)N1C(=O)N(C)C(C)C1c1ccccc1,training,1
82 | C(CCCOCC1CO1)CCOCC1CO1,training,1
83 | OCCCCCCCCCCCCBr,training,1
84 | O=C1C=CC2(O)C3Cc4ccc(O)c5c4C2(CCN3)C1O5,training,1
85 | CCCCCCCCCCCCCCCCCCCl,training,1
86 | CCCCCCCCCCI,training,1
87 | CN(N=O)C(=N)N[N+](=O)[O-],training,1
88 | Oc1cccc2ccccc12,training,1
89 | CCCC=CC=O,training,1
90 | CC(C)(C)N(CC(=O)c1ccc(O)c(CO)c1)Cc1ccccc1,training,1
91 | C=CC(=O)OCCO,training,1
92 | Sc1nc2ccccc2s1,training,1
93 | CCCCCCCCCC(C)C=O,training,1
94 | CC(CC(=O)Cl)CC(C)(C)C,training,1
95 | CC1(C)CC(CBr)C(=O)O1,training,1
96 | CN(C)CCCN,training,1
97 | Nc1cc(F)cc(-c2cccnc2)c1,training,1
98 | Cn1sc(Cl)cc1=O,training,1
99 | CC(=O)C(=O)CC(C)C,training,1
100 | C=CCC1(C)C=C(OC)C(=O)CC1,training,1
101 | CCN(CC)CCCCCCO,training,1
102 | CCC=CCCCCC=O,training,1
103 | CC(C)(c1ccc(OCC2CO2)cc1)c1ccc(OCC2CO2)cc1,training,1
104 | C=C1C=CC(C(C)C)CC1,training,1
105 | CCCCCCCCCCCCCCCCCCCCCCBr,training,1
106 | CCCCCCCCCCCCCCCCCCCCBr,training,1
107 | CCCCCCCCCCCCCCCCCBr,training,1
108 | CCCCCCCCCCCCCCCCBr,training,1
109 | CCCCCCBr,training,1
110 | CCCCCCCCCCCCCCCCCCBr,training,1
111 | CCCCCCCCCCCCCCCBr,training,1
112 | CCCCCCCCCCCCCCBr,training,1
113 | CCCCCCCCCCCCCBr,training,1
114 | C=CC(=O)OCCCC,training,1
115 | CCCCOCC1CO1,training,1
116 | CCCCCCCCCCCC1=NC(C)(C)C(=O)O1,training,1
117 | CCCCCCCCCCCCCCCl,training,1
118 | CC(C)=CCCC(C)=CC=O,training,1
119 | CCCCCCCCCCCCCCCCCl,training,1
120 | CCOS(=O)(=O)OCC,training,1
121 | CCC(C=O)CC,training,1
122 | NCCNCCN,training,1
123 | NCCCNCCCN,training,1
124 | CCCCCCCCCCCCOS(C)(=O)=O,training,1
125 | CCOC(=O)CC1CC2CCC(C1)N2C,training,1
126 | C=CC(=O)OCC,training,1
127 | CCC(O)C(CO)CO,training,1
128 | C=C(C)C(=O)OCCOC(=O)C(=C)C,training,1
129 | C=C(C)C(=O)O,training,1
130 | CC(CC=O)CCCC(C)(C)O,training,1
131 | CCCCCCCCCCCCI,training,1
132 | CCCCCCCCCCCCCCCCI,training,1
133 | CC(C)CCCCCC(=O)Cl,training,1
134 | CCCCCCCCCCCCCC(=O)OC(C)C,training,1
135 | CCCCCC=CCC=CCCCCCCCC(=O)O,training,1
136 | CCCCCCC#CC(=O)OC,training,1
137 | C=CC(=O)OC,training,1
138 | CCCCCCCCCCCCCCC=CS(=O)(=O)OC,training,1
139 | C=C(C)C(=O)OC,training,1
140 | CCCCCCCCC(=O)Cl,training,1
141 | CCCCCCCCO,training,1
142 | CCCCCCCCC=CCCCCCCCC(=O)O,training,1
143 | CCOC=C1N=C(c2ccccc2)OC1=O,training,1
144 | CCCCCCCCCCCCCCCC(=O)Cl,training,1
145 | O=C(Oc1ccccc1)c1ccccc1,training,1
146 | CC(CCCCN)C(C)(C)N,training,1
147 | C=CCCCCCCCCC(=O)O,training,1
148 | CC(C)N(C(=O)CN1C(=O)C(NC(=O)Nc2cccc(C(=O)OC(C)(C)C)c2)C(=O)N(c2ccccc2)c2ccccc21)c1ccccc1,training,1
149 | NC1CCCCC1N,training,1
150 | O=S(=O)(Cl)c1ccc2c(c1)OCO2,training,1
151 | ClCc1ccc2ccc3cccc4ccc1c2c34,training,1
152 | Cc1cc(=O)n(-c2ccc(C)c(C)c2)[nH]1,training,1
153 | CCC(C)(C)C1CCC(CC=O)CC1,training,1
154 | CC(C)(C)C(=O)CC(=O)C(C)(C)C,training,1
155 | C=C(C)C(=O)OCC(O)COc1ccc(C(C)(C)c2ccc(OCC(O)COC(=O)C(=C)C)cc2)cc1,training,1
156 | Clc1ccnc(Cl)n1,training,1
157 | CCC=CC=CC=O,training,1
158 | COc1cc(C)c2c(Oc3cccc(C(F)(F)F)c3)c(OC)cc([N+](=O)[O-])c2n1,training,1
159 | Nc1ccccc1Nc1ccccc1,training,1
160 | CCCCCCCCCCCCC(Br)C(=O)O,training,1
161 | O=[N+]([O-])c1ccc(OCc2cccc(F)c2)c(Cl)c1,training,1
162 | Cc1nc2ccccc2c(=O)o1,training,1
163 | O=C(Nc1ccc(Cl)c(Cl)c1)c1cc(Cl)cc(Cl)c1O,training,1
164 | O=C1CCc2ccccc2O1,training,1
165 | CC1(C)CC(N)CC(C)(CN)C1,training,1
166 | C=C1NS(=O)(=O)N=C1c1ccccc1,training,1
167 | CC(C)(O)CCCC1=CCC(C=O)CC1,training,1
168 | O=C(C(=O)c1ccccc1)C1=CCC(Br)(Br)C=C1,training,1
169 | CC(Br)CCCN1C(=O)c2ccccc2C1=O,training,1
170 | CC(I)CCCN1C(=O)c2ccccc2C1=O,training,1
171 | C=C1CC(C(=C)C)CC=C1C,training,1
172 | C=C1CC(C)(C)OC1=O,training,1
173 | O=C(O)c1cccc(-c2cc(Cl)cc([N+](=O)[O-])c2O)c1,training,1
174 | COc1cc(C)c2c(Cl)c(OC)cc([N+](=O)[O-])c2n1,training,1
175 | COc1cc([N+](=O)[O-])c2[nH]c(=O)cc(C)c2c1Cl,training,1
176 | COc1cc2c(cc1C(F)(F)F)NCC2,training,1
177 | Cc1ncccc1Oc1ccc(N)cn1,training,1
178 | CCN(CC)CCCCCCBr,training,1
179 | Cc1c2ccccc2c(C)c2c1ccc1ccccc12,training,1
180 | CON=C1CN(c2nc3c(cc2F)c(=O)c(C(=O)O)cn3C2CC2)CC1CN,training,1
181 | CCCCCCCC(Br)CCCCCC,training,1
182 | CC(C)C1=CC2=CCC3C(C)(C(=O)O)CCCC3(C)C2CC1,training,1
183 | CC1=CCC(C(C)C)C=C1,training,1
184 | CC1=CC=C(C(C)C)CC1,training,1
185 | c1ccc2c(c1)cc1ccc3cccc4ccc2c1c34,training,1
186 | O=C1C=CC(=O)C=C1,training,1
187 | O=C1CCO1,training,1
188 | CCCCCCCCCCCCCCCC1=NC(C)(C)C(=O)O1,training,1
189 | CCCCCCC1=NC(C)(C)C(=O)O1,training,1
190 | CCCCCCCCCC1=NC(C)(C)C(=O)O1,training,1
191 | C=C(C)C1CCC(C)=C(N=O)C1,training,1
192 | Clc1ccccc1C(c1ccccc1)(c1ccccc1)n1ccnc1,training,1
193 | C(=NC1CCCCC1)=NC1CCCCC1,training,1
194 | CCOC(=O)C=CC(=O)OCC,training,1
195 | COS(=O)(=O)OC,training,1
196 | CS(C)=O,training,1
197 | CN1C2CCC1CC(OS(C)(=O)=O)C2,training,1
198 | NCCN,training,1
199 | CC(C)=CCCC(C)=CCCC(C)=CC=O,training,1
200 | CC(C)=CCCC(C)=CCO,training,1
201 | O=CCCCC=O,training,1
202 | O=C(CS)OCC(O)CO,training,1
203 | NCCNCCO,training,1
204 | O=C(NCNC(=O)NC1C(=O)NC(=O)N1CO)NC1C(=O)NC(=O)N1CO,training,1
205 | CCCCCCCCCCCCOS(=O)(=O)O,training,1
206 | CCC=CCC=CCC=CCCCCCCCC(=O)O,training,1
207 | CCCCCCCCCCCCS(=O)(=O)OC,training,1
208 | COS(C)(=O)=O,training,1
209 | COC(=O)C(C)=O,training,1
210 | CCCCCCCCCCCCCCCCC(CS(=O)(=O)O)C(=O)OC,training,1
211 | CCN(CC)CCN(Cc1ccc(-c2ccc(C(F)(F)F)cc2)cc1)C(=O)Cn1c(SCc2ccc(F)cc2)nc(=O)c2c1CCC2,training,1
212 | CCN(N=O)C(N)=O,training,1
213 | CC(C)N(C(=O)CNc1ccccc1Nc1ccccc1)c1ccccc1,training,1
214 | CN(N=O)C(N)=O,training,1
215 | COC1=CC=C2C3Cc4ccc(O)c5c4C2(CCN3C)C1O5,training,1
216 | O=C(O)C(=O)O,training,1
217 | O=c1[nH]cco1,training,1
218 | CC1(C)SC2C(NC(=O)Cc3ccccc3)C(=O)N2C1C(=O)O,training,1
219 | C=C(C)C1CC=C(C=O)CC1,training,1
220 | CC(C)(C)c1ccc(OCC2CO2)cc1,training,1
221 | c1ccncc1,training,1
222 | C=C(C)C1CC=C(C)C(=O)C1,training,1
223 | O=C(Oc1ccc([N+](=O)[O-])cc1)OC1COC2OCCC12,training,1
224 | CCOC(=S)S,training,1
225 | CCCCCCCCCCCC(=O)OC(C)C(=O)OC(C)C(=O)O,training,1
226 | CC(C)=CCCC(C)=CCCC(C)=CCCC=C(C)CCC=C(C)CCC=C(C)C,training,1
227 | CN(C)C(=S)SSC(=S)N(C)C,training,1
228 | C=CCCCCCCCCC=O,training,1
229 | C=Cc1ccncc1,training,1
230 | C[N+](C)(C)c1ccc2c(c1)C(=NNc1ccc(N)c([N+](=O)[O-])c1)C(=O)C=C2,training,0
231 | CCCCCCCCC(=O)O,training,0
232 | CNC1C(OC2C(OC3C(O)C(O)C(NC(=N)N)C(O)C3NC(=N)N)OC(C)C2(O)C=O)OC(CO)C(O)C1O,training,0
233 | Cc1cc(=O)n(-c2ccccc2)[nH]1,training,1
234 | Nc1ccc(N)c2c1C(=O)c1ccccc1C2=O,training,1
235 | O=C1C=CC(=O)O1,training,1
236 | O=C1OC(=O)c2ccccc21,training,1
237 | C=C(C=CCC(C)C)CC,training,0
238 | CC(C)(CC1Cc2ccccc2C1)NCC(O)COc1cc(CCC(=O)O)cc(F)c1F,training,0
239 | CCCOc1ccc2c(c1)C(O)(c1ccc(OC)cc1OCc1ccccc1)C(C(=O)N1C(=O)N(C)C(C)C1c1ccccc1)C2c1ccc2c(c1)OCO2,training,0
240 | CC(C)N(C(=O)CN1C(=O)C(N)C(=O)N(c2ccccc2)c2ccccc21)c1ccccc1,training,0
241 | CCCCN1C(=O)C(C(O)C2CCCCC2)NC(=O)C12CCN(Cc1ccc(Oc3ccc(C(=O)O)cc3)cc1)CC2,training,0
242 | OCCCCCCCl,training,0
243 | COc1cc(N)c2nccc(C)c2c1,training,0
244 | COc1ccc(Nc2ccc(CCNCC(O)c3ccc(O)c4[nH]c(=O)ccc34)cc2)cc1-c1ccccc1,training,0
245 | CCCCCCCCCCCCCCCCCCI,training,0
246 | O=CNc1cc(C(O)CBr)ccc1OCc1ccccc1,training,0
247 | CC(O)CO,training,0
248 | CN(C)CCOC(C)(c1ccccc1)c1ccc(Br)cc1,training,1
249 | C=CC(=O)OCC(CC)CCCC,training,1
250 | O=C(OCc1ccccc1)c1ccccc1,training,1
251 | CCCCCCCCCCCBr,training,1
252 | CCCCC1=NC(C)(C)C(=O)O1,training,1
253 | CCCCCCCCCI,training,1
254 | CCCCCCCCCCCCCCI,training,1
255 | C=C(C)C1CC=C(C)CC1,training,1
256 | C=CC(C)(O)CCC=C(C)C=O,training,1
257 | O=C(O)CCCCCCCCCCCBr,training,1
258 | Clc1ncnc2ccc(I)cc12,training,1
259 | CCC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(O)CC(C)CN(C)C(C)C(O)C1(C)O,training,1
260 | O=CC=O,training,1
261 | CCCCCCCCC=CCCCCCCCCOS(C)(=O)=O,training,1
262 | O=c1c(-c2ccccc2)c1-c1ccccc1,training,1
263 | O=c1c(O)c(O)c1=O,training,1
264 | CC(C)(C)OC(=O)N1CC(F)CC1C(N)=O,training,0
265 | N#CC1CC(F)CN1C(=O)C(N)C(c1ccc(F)cc1)c1ccc(F)cc1,training,0
266 | CC(C)(C)N(Cc1ccccc1)CC(O)c1ccc(O)c(CO)c1,training,0
267 | COc1cc(C)c2c(Oc3cccc(C(F)(F)F)c3)c(OC)cc(NC(C)CCCN3C(=O)c4ccccc4C3=O)c2n1,training,0
268 | COc1ccc2nc(Cl)cc(C)c2c1,training,0
269 | O=c1[nH]cnc2ccc(I)cc12,training,0
270 | COc1ccc2[nH]c(=O)cc(C)c2c1,training,0
271 | CCCCBr,training,0
272 | O=C(O)C=CC(=O)O,training,0
273 | O=C(O)c1ccc2c(c1)C(=O)OC2=O,training,1
274 | O=CC=C(c1ccccc1)c1ccccc1,training,1
275 | CCCCCCCCCCCCBr,training,1
276 | O=C1OC2(c3ccc(O)cc3Oc3cc(O)ccc32)c2ccc(N=C=S)cc21,training,1
277 | FOC1=C2CCC(C2)C1,training,1
278 | CC=CC=CC=O,training,1
279 | CN1C2CCC1CC(O)C2,training,1
280 | Clc1nc(Cl)nc(Cl)n1,training,1
281 | CC1=C(C=O)C(C)(C)CC=C1,training,1
282 | CCC=C1OC(=O)c2ccccc21,training,1
283 | CCCCCn1c(=O)[nH]c(=O)c2[nH]c(Cl)nc21,training,1
284 | N=C1CC(=Nc2ccc(N)cc2)C(=N)CC1=Nc1ccc(N)cc1,training,1
285 | CC(=O)C(C)=O,training,1
286 | CCCCCCCCCCCCCCCCCC1=NC(C)(C)C(=O)O1,training,1
287 | C=CC(=O)OCC(COC(=O)C=C)C(CC)OC(=O)C=C,training,1
288 | CCCCNC(=O)OC#CCI,training,1
289 | CCCCCCCCCCCCCCCCS(=O)(=O)OC,training,1
290 | O=C1OC(=O)C2CCCCC12,training,1
291 | O=C(OOC(=O)c1ccccc1)c1ccccc1,training,1
292 | COc1cc(C(=O)CC(=O)C(C)(C)C)cc(OC)c1OC,test,0
293 | CNC(C)c1cc(C(F)(F)F)cc(C(F)(F)F)c1,test,0
294 | O=C(O)C(=NOCc1ccccc1)C(=O)O,test,0
295 | CCCOc1cccc(C(=O)O)c1,test,0
296 | Cc1ccc(N)cc1S(N)(=O)=O,test,0
297 | CCOC(=O)c1ccc(N)cc1,test,0
298 | CCOC(=O)c1ccccc1C(=O)OCC,test,0
299 | CCOC(=O)CC(=O)c1ccccc1,test,0
300 | COC(=O)c1ccccc1O,test,0
301 | Nc1ccc(S(=O)(=O)O)cc1,test,0
302 | CC(C)(C)OC(=O)c1cccc(N)c1,test,0
303 | COc1cc(C=O)ccc1O,test,0
304 | CCCCCC(=O)CC(=O)c1ccccc1,test,1
305 | CCc1ccc(CC)c(C(=O)CC(C)=O)c1,test,1
306 | O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1,test,1
307 | Nc1ccc(OCCO)c(N)c1,test,1
308 | O=Cc1cc(O)ccc1Br,test,1
309 | COc1cc(C)ccc1O,test,1
310 | Nc1ccc(Cl)c(Cl)c1,test,1
311 | C=CCc1cc(C)c(OC)c(O)c1,test,1
312 | C=CCc1ccc(OC)cc1,test,1
313 | Nc1ccc(N)cc1,test,1
314 | CCCCCC(C=O)=Cc1ccccc1,test,1
315 | CCCCC(C=O)=Cc1ccccc1,test,1
316 | CC(=O)C=Cc1ccccc1,test,1
317 | CCCc1ccc(O)c(OC)c1,test,1
318 | CC=Cc1ccc(O)c(OC)c1,test,1
319 | CCCCCCCCCCCCOC(=O)c1cc(O)c(O)c(O)c1,test,1
320 | Oc1cccc(O)c1,test,1
321 | COc1ccc(C=O)cc1OC,test,1
322 | O=Cc1cc(O)ccc1C=C(CC(=O)O)C(=O)O,test,1
323 | CC(=O)C(=O)c1ccccc1,test,1
324 | Cc1cc(N)ccc1N,test,1
325 | Nc1cc([N+](=O)[O-])cc(Cl)c1O,test,1
326 | Cc1ccc(NCCO)cc1O,test,1
327 | CC=Cc1ccc(O)c(OC)c1C,test,1
328 | CCN(CCNS(C)(=O)=O)c1ccc(N)c(C)c1,test,1
329 | NCc1cccc(CN)c1,test,1
330 | CC(C)N(C(=N)N)C(=N)Nc1ccc(Cl)c(Cl)c1,test,1
331 | Oc1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl,test,1
332 | CCCN(CCC)c1c([N+](=O)[O-])cc(C(F)(F)F)cc1[N+](=O)[O-],test,1
333 | CCOC(=O)C(C)(C)Oc1ccc(Cl)cc1,test,0
334 | Cc1cc(O)ccc1N,test,1
335 | O=C(O)Cc1ccc(C(=O)O)cc1,test,1
336 | Nc1ccc(N(CO)CCO)cc1[N+](=O)[O-],test,1
337 | C=CCc1cc(OC)c(O)cc1C,test,1
338 | OCC=Cc1ccccc1,test,1
339 | O=CC=Cc1ccccc1,test,1
340 | CC(C=O)Cc1ccc(C(C)C)cc1,test,1
341 | C=CCc1ccc(O)c(OC)c1,test,1
342 | O=[N+]([O-])c1ccc(CBr)cc1,test,1
343 | Cc1ccc(CCC=O)cc1,test,1
344 | COc1ccc(C(C)=O)cc1,test,0
345 | CCOC(=O)C(Cc1ccc(O)cc1)NC(=O)OC(C)(C)C,test,0
346 | CC=Cc1cc(C)c(O)c(OC)c1,test,1
347 | O=[N+]([O-])c1ccc(S(=O)(=O)O)c([N+](=O)[O-])c1,test,1
348 | Nc1cccc(N)c1,test,1
349 | CCOCc1cc(OC)c(B(O)O)c(OC)c1,val,0
350 | CCC(N)c1ccccc1,val,0
351 | CCCOc1ccc(Br)c(C(=O)O)c1,val,0
352 | NS(=O)(=O)c1cccc(CCCCOCCCCCCBr)c1,val,0
353 | COc1ccc(C=O)cc1O,val,0
354 | O=C(O)c1ccc(O)cc1,val,0
355 | Nc1ccccc1C(=O)O,val,0
356 | O=Cc1ccccc1,val,0
357 | Clc1ccccc1,val,0
358 | C=CCc1ccc(O)c(OC(C)C)c1,val,0
359 | CCC(=O)c1cccc(Cl)c1,val,0
360 | CCCOC(=O)c1ccc(O)cc1,val,0
361 | Nc1ccc(S(N)(=O)=O)cc1,val,0
362 | CC=Cc1ccc(OC)cc1,val,1
363 | Nc1ccc(NCCO)c([N+](=O)[O-])c1,val,1
364 | O=C(CC(=O)C(F)(F)F)c1ccccc1,val,1
365 | CNc1ccc(O)cc1,val,1
366 | Cc1ccc(N)cc1O,val,1
367 | C=CCc1cc(C)c(O)c(OC)c1,val,1
368 | CCCCCCC(C=O)=Cc1ccccc1,val,1
369 | CC(C=O)=Cc1ccccc1,val,1
370 | CC(C=O)c1ccccc1,val,1
371 | Nc1ccccc1,val,1
372 | BrCc1ccccc1,val,1
373 | CC=CC(=O)Oc1c(C(C)CCCCCC)cc([N+](=O)[O-])cc1[N+](=O)[O-],val,1
374 | CC=Cc1ccc(O)c(OC(C)C)c1,val,1
375 | CNc1cccc(NC)c1,val,1
376 | COc1ccc(NC(=O)CC(C)=O)cc1,val,1
377 | CC(=O)CC(=O)c1cc(C)c(C)c(C)c1C,val,1
378 | CCC(=O)C=Cc1ccc(OC)cc1,val,1
379 | Oc1ccc(O)cc1,val,1
380 | CC(=O)C(C)C(=O)c1ccccc1,val,1
381 | Nc1cccc(O)c1,val,1
382 | O=C(Cl)c1ccc(F)c(Cl)c1,val,1
383 | CCOC(=O)CC(=O)c1cc(C)c(C)c(C)c1C,val,1
384 | CCOC(=O)c1ccc(CBr)cc1,val,1
385 | N#Cc1c(Cl)c(Cl)c(Cl)c(C#N)c1Cl,val,1
386 | CCCc1ccc(OC)c(O)c1,val,1
387 | CCOC(=O)c1ccc(I)cc1,val,1
388 | O=CCc1ccccc1,val,1
389 | CC(C=O)Cc1ccc(C(C)(C)C)cc1,val,1
390 | CC(CC(=O)Oc1ccc(S(=O)(=O)O)cc1)CC(C)(C)C,val,1
391 | CN(C)c1ccc(N=O)cc1,val,1
392 | CCCOC(=O)c1cc(O)c(O)c(O)c1,val,1
393 | CCOc1cc(C=O)ccc1O,val,0
394 | COC(=O)c1ccc(O)cc1,val,0
395 | CC(=O)CC(=O)c1cc(C)ccc1C,val,1
396 | Nc1ccccc1O,val,1
397 | Nc1ccc(N)c([N+](=O)[O-])c1,val,1
398 | CCCCCCCCC(=O)NCCC(CCC(=O)O)c1ccccc1S(=O)(=O)O,val,1
399 | COc1cc(CC#N)c([N+](=O)[O-])cc1C(F)(F)F,val,0
400 | O=C(O)c1ccccc1O,val,0
401 | N#CSc1ccc(N)c([N+](=O)[O-])c1,val,1
402 | CC(C)CC=C(C=O)c1ccccc1,val,1
403 | CCCCCCCCCCCCCC[N+](C)(C)Cc1ccccc1,val,1
404 | CC=Cc1ccc(OC(C)C)c(OC)c1,val,1
405 | COC(=O)c1ccc(CBr)cc1,val,1
406 | Cc1cccc(O)c1O,val,1
407 |
--------------------------------------------------------------------------------
/data/ADMETlab_data/CYP1A2-sub_canonical.csv:
--------------------------------------------------------------------------------
1 | smiles,group,CYP1A2-sub
2 | C=C(NC)C(=O)O,training,0
3 | O=P(O)(O)O[C@@H]1C(O)[C@H](OP(=O)(O)O)[C@@H](O)C(O)[C@H]1O,training,0
4 | C[C@@H](C(=O)N1CCOCC1)N1CC[C@H](NS(=O)(=O)c2n[nH]c(-c3ccc(Cl)s3)n2)C1=O,training,0
5 | O=C(Nc1ccccn1)Nc1cccc2c1[C@H]1CCCN1C2=O,training,0
6 | C1CCN(C2CCNCC2)CC1,training,0
7 | c1ccc([C@@H]2CO2)cc1,training,0
8 | CC(C)Nc1cccnc1N1CCN(C(=O)c2cc3cc(NS(C)(=O)=O)ccc3[nH]2)CC1,training,0
9 | Cc1cccc(C)c1OCC(=O)N[C@@H](Cc1ccccc1)[C@@H](O)C[C@H](CC(C)C)NC(=O)c1ccc(N)cc1,training,0
10 | CC(C)C[C@H](N)P(=O)(O)O,training,0
11 | OCCOCCOCCO,training,0
12 | CN1CC[C@]23c4c5ccc(O)c4O[C@H]2CCC[C@H]3[C@H]1C5,training,0
13 | CSCC[C@@H](NC(=O)CCC(=O)O)C(=O)O,training,0
14 | O=C1NC(=O)C(Cc2ccc3cc(OCc4ccccc4F)ccc3c2)S1,training,0
15 | Cc1c(NC(=O)NC(=O)c2ccc(Cl)cc2Cl)ccc(OCCCC(=O)O)c1C,training,0
16 | CN(C)C[C@@H](O)COc1ccc(Nc2nccc(Nc3cc(Cl)ccc3Cl)n2)cc1,training,0
17 | CN(CC[C@H](N)CC(=O)N[C@H]1C=C[C@H](N2C=C[C@@](N)(O)NC2=O)O[C@@H]1C(=O)O)C(=N)N,training,0
18 | CC(CC1c2ccccc2CCc2ccccc21)CN(C)C,training,0
19 | CCOC(=O)c1c(C)nn(-c2cccc([N+](=O)[O-])c2)c1C,training,0
20 | N[C@@H](C[SeH](=O)=O)C(=O)O,training,0
21 | OC[C@H]1O[C@@H](n2cnc3c2N=CNC[C@H]3O)C[C@@H]1O,training,0
22 | CCOC(=O)c1c(C)nn(-c2ccc(N)cc2)c1C,training,0
23 | O=c1ncccn1[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,training,0
24 | O=c1ccn([C@@H]2O[C@@H](COP(=O)(O)OP(=O)(O)O[C@@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@H]3F)[C@H](O)[C@H]2O)c(=O)[nH]1,training,0
25 | NCc1ccccc1CC(=O)N[C@@H]1C(=O)N2C(C(=O)O)=C(CSc3nnnn3CC(=O)O)CS[C@H]12,training,0
26 | CC(=O)[C@@H](O)[C@H](O)COP(=O)(O)O,training,0
27 | CC1=CC(=O)N2CC(=O)N(C)c3ccc(Cl)cc3C2(c2ccccc2)O1,training,0
28 | CC1(C)SCCN(S(=O)(=O)c2ccc(OCC#CCN)cc2)[C@H]1C(=O)NO,training,0
29 | Cc1ccc(C(=O)Nc2ccc(S(=O)(=O)O)c3cc(S(=O)(=O)O)cc(S(=O)(=O)O)c23)cc1NC(=O)c1cccc(NC(=O)Nc2cccc(C(=O)Nc3cc(C(=O)Nc4ccc(S(=O)(=O)O)c5cc(S(=O)(=O)O)cc(S(=O)(=O)O)c45)ccc3C)c2)c1,training,0
30 | O=C(Nc1ccc2[nH]ccc2c1)c1cc(F)cc(N2CCOCC2)c1,training,0
31 | CN(C(=O)N1CC(c2cc(F)ccc2F)=C[C@H]1c1ccccc1)C1CCNCC1,training,0
32 | Nc1ncnc2c1ncn2CCOCP1(=O)OCC[C@@H](c2cccc(Cl)c2)O1,training,0
33 | CCOC(=O)N1CCC(Nc2cc(C)ccn2)CC1,training,0
34 | N#C/C(=C1/SC[C@@H](c2ccc(Cl)cc2Cl)S1)n1ccnc1,training,0
35 | CC(C)(CO)[C@@H](O)C(=O)OP(=O)(O)OC[C@@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@H]1O,training,0
36 | O=C(O)c1ccccc1/N=N/c1ccc(O)cc1,training,0
37 | C[C@H](NC(=O)[C@@H]1CCCN1C(=O)[C@@H]1CCCN1C(=O)[C@@H](O)[C@H](N)Cc1ccccc1)C(N)=O,training,0
38 | COc1cc(O)c(C(=O)/C=C/c2ccc(O)cc2)cc1CC=C(C)C,training,0
39 | C[C@H](O)N[C@H]1[C@H](O)O[C@H](CO)[C@@H](O)[C@@H]1O[C@@H]1O[C@H](CO)[C@H](O)[C@H](O)[C@H]1O[C@@H]1O[C@@H](C)[C@@H](O)[C@@H](O)[C@@H]1O,training,0
40 | NCCC(=O)NCC[C@H](N)C(=O)O,training,0
41 | O=C1NC(=O)C(c2ccc(Oc3ccccc3)cc2)(N2CCN(c3ncccn3)CC2)C(=O)N1,training,0
42 | O=C(O)C[C@H]1CC[C@@H](C(=O)O)N1,training,0
43 | OC[C@@H]1[C@H](O)[C@@H](O)[C@H](O)c2nc(-c3ccccc3)cn21,training,0
44 | O=C([O-])c1ccc[nH]1,training,0
45 | OC[C@H](O)CNc1ncnc2[nH]c(-c3ccccc3)c(-c3ccccc3)c12,training,0
46 | Cc1ncc(-c2ccnc(Nc3ccc(N4CCN(C(=O)CO)CC4)cc3)n2)n1C(C)C,training,0
47 | Cc1ncc(COP(=O)(O)O)c(CN[C@H](CCC(=O)O)C(=O)O)c1O,training,0
48 | CCC(CC)(CC(=O)Nc1cccc(/C=C/c2nc(C3CCC3)cs2)c1)C(=O)O,training,0
49 | Cn1sc(=O)c2cc(S(N)(=O)=O)ccc21,training,0
50 | CO[C@@H]1[C@@H](OC(N)=O)[C@@H](O)[C@H](Oc2ccc3c(O)c(NC(=O)c4ccc(O)c(CC=C(C)C)c4)c(=O)oc3c2C)OC1(C)C,training,0
51 | Nc1ccc2cc3ccc(N)cc3nc2c1,training,0
52 | NCC(=O)Nc1ccc(OCc2ccccc2)cc1,training,0
53 | C[C@H]1CNC(=O)c2[nH]c3ccc(C(=O)Nc4nc(C(=O)NCCN(C)C)cs4)cc3c21,training,0
54 | Cc1ncc(CO)c(C=O)c1O,training,0
55 | COc1cc(CC2=C(N)N=C(N)[N+]=C2)cc(OC)c1OC,training,0
56 | N[C@@H](Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1)C(=O)O,training,0
57 | Cc1cccc([C@H](C)c2c[nH]cn2)c1C,training,0
58 | C[C@H]1CN=C(N)c2sccc2O1,training,0
59 | CC(C)N=c1cc2n(-c3ccc(Cl)cc3)c3ccccc3nc-2cc1Nc1ccc(Cl)cc1,training,0
60 | Brc1cnc2c(NCc3cncnc3)cc(-c3ccccc3)cn12,training,0
61 | CCOc1cccc(-c2ccc(NC(=O)/C(C#N)=C(/C)O)cc2)c1,training,0
62 | CC(C)(C)C(=O)Oc1ccc2c(c1)nc(NC(=O)c1cccc([N+](=O)[O-])c1)n2CCCO,training,0
63 | OC[C@@H]1O[C@@H](OCCCCCCC2CCCCC2)[C@H](O)[C@@H](O)[C@@H]1O[C@@H]1O[C@@H](CO)[C@H](O)[C@@H](O)[C@@H]1O,training,0
64 | CO[C@H]1C[C@H]2OC[C@@]2(OC(C)=O)[C@H]2[C@H](OC(=O)c3ccccc3)[C@]3(O)C[C@H](OC(=O)[C@H](O)[C@@H](NC(=O)OC(C)(C)C)c4ccccc4)C(C)=C([C@@H](OC)C(=O)[C@]12C)C3(C)C,training,0
65 | CCC(CC)NC(=O)C[C@@H](C(N)=O)C(C)(C)C,training,0
66 | O=C(O)Cn1c(=O)n(Cc2ccc(Br)cc2F)c(=O)c2ccc(Cl)cc21,training,0
67 | O=C1N[C@@H](Cc2ccc(O)cc2)C(=O)N[C@H]1Cc1ccc(O)cc1,training,0
68 | CC(C)CCC[C@@](C)(O)[C@H]1CC[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@@]21C,training,0
69 | CC(C)OP(=O)(O)OC[C@@H](N)C(=O)O,training,0
70 | CCCNC(=O)[C@H]1O[C@@H]1C(=O)N[C@H](C(=O)N1CCC[C@H]1C(=O)OC)[C@@H](C)CC,training,0
71 | N[C@@H](CC(=O)N1CCC[C@H]1CNC(=O)c1ccccc1)Cc1ccccc1F,training,0
72 | CC[C@@H](C)[C@H](N)C(=O)O,training,0
73 | O=c1cnn([C@@H]2O[C@@H](COP(=O)(O)O)[C@H](O)[C@H]2O)c(=O)[nH]1,training,0
74 | COc1cc2c(Nc3c(Cl)ccc4c3OCO4)ncnc2cc1OCCCN1CCCCC1,training,0
75 | CCCCCCCCCCC1=C(C)C(=O)C(OC)=C(OC)C1=O,training,0
76 | CCOC(O)=N[C@@H]1CC[C@@H]2[C@@H](C1)C[C@H]1C(=O)O[C@H](C)[C@H]1[C@H]2/C=C/c1ccc(-c2cccc(F)c2)cn1,training,0
77 | Nc1ccc(S(=O)(=O)c2ccc(N)cc2)cc1,training,0
78 | O=C(O)[C@@H](Cc1ccccc1)[C@H](Cc1ccc2c(c1)OCO2)C(=O)O,training,0
79 | O=C(Cc1ccc(O)cc1)Nc1ncc(-c2ccc(O)cc2)nc1Cc1ccccc1,training,0
80 | N[C@@H](CCSC(F)F)C(=O)O,training,0
81 | CCC(=O)N(c1ccccc1F)C1(c2ccccc2)CCN(CCn2nnn(CC)c2=O)CC1,training,0
82 | CN1CCC23NC(=O)CC(c4cc(Cl)ccc4O2)C3C1,training,0
83 | CC(C)NCC(O)COc1cccc2c1C=CC2,training,0
84 | NCCCCN,training,0
85 | Nc1nc2c(c(=O)[nH]1)N=C(COP(=O)(O)OP(=O)(O)OP(=O)(O)OP(=O)(O)OC[C@@H]1O[C@@H](n3cnc4c(N)ncnc43)[C@H](O)[C@H]1O)CN2,training,0
86 | CC(C)(C)c1nc(-c2cccc(NS(=O)(=O)c3c(F)cccc3F)c2F)c(-c2ccnc(N)n2)s1,training,0
87 | O=C(c1ccccc1)c1ccc(O)c(O)c1[N+](=O)[O-],training,0
88 | COc1ccc(-c2ccccc2)cc1N1CC(=O)NS1(=O)=O,training,0
89 | CN1CCN(c2ccccc2C/N=N\C(N)=S)CC1,training,0
90 | Oc1c(Br)cc(-c2nc3ccccc3o2)cc1Br,training,0
91 | Cc1ccnc2c1NC(=O)c1cccnc1N2C1CC1,training,0
92 | CS(=O)(=O)CC[C@@H](N)C(=O)O,training,0
93 | CC(C)CCCCCCCCC(=O)N(C)[C@H](CO)C(=O)N[C@H](C)C(=O)NCC(=O)N(C)[C@@H]1C(=O)N[C@H](C)C(=O)N[C@H](C(=O)O)Cc2ccc(O)c(c2)-c2cc1ccc2O,training,0
94 | CCC(N)=O,training,0
95 | CC(=O)CCC(=O)O,training,0
96 | CCOP(=O)(O)OP(=O)(O)O,training,0
97 | NC(=O)[C@@H](CS)NC(=O)CCCCCNC(=O)[C@@H](Cc1ccc(C(F)(F)P(=O)(O)O)cc1)NC(=O)[C@@H](CC(=O)O)NC(=O)Cc1ccc(C(F)(F)P(=O)(O)O)cc1,training,0
98 | O=C(N/N=C/c1cc(Br)c(O)c(Br)c1O)c1ccc(Cl)cc1,training,0
99 | COC(=O)[C@H]1[C@@H](OC(=O)c2ccccc2)C[C@@H]2CC[C@H]1N2C,training,0
100 | O=c1[nH]c(=O)n(COCCO)cc1Cc1ccccc1,training,0
101 | O=C(CCC(=O)Nc1ccc2c(c1)C(=O)C(=O)NC2=O)Nc1cccc(OCCF)c1,training,0
102 | C1=CC2C3=CC=CC4C5C=CC=CN5[Pt](N2C=C1)N34,training,0
103 | C[C@@H](NC(N)=O)C(=O)O,training,0
104 | Cc1ncc([N+](=O)[O-])n1CCO,training,0
105 | O=C1/C(=N\O)c2c(/C=C/c3ccccc3)cccc2N1Cc1cc(F)cc2c1OCOC2,training,0
106 | CC(C)=CCNc1ncnc2[nH]cnc12,training,0
107 | C[C@@H](N)[C@H]1N[C@@H](CN)C(=O)N1CC(=O)O,training,0
108 | O=S(=O)(O)c1ccc(O)c(/N=N/c2c(S(=O)(=O)O)cc3c(S(=O)(=O)O)c(Nc4ncnc(Nc5ccc6c(O)c(/N=N/c7cc(S(=O)(=O)O)ccc7O)c(S(=O)(=O)O)cc6c5S(=O)(=O)O)n4)ccc3c2O)c1,training,0
109 | CN(C)C[C@H](O)Cn1c2ccc(Br)cc2c2cc(Br)ccc21,training,0
110 | O=C(O)[C@H](S)[C@H](S)C(=O)O,training,0
111 | COc1ccccc1Oc1c(NS(=O)(=O)c2ccc(C(C)(C)C)cc2)nc(-c2ncccn2)nc1OCCO,training,0
112 | O=C(O)c1cn(-c2ccc(F)cc2)c2cc(N3CCNCC3)c(F)cc2c1=O,training,0
113 | CC(C)(C)c1onc([O-])c1C[C@@H]([N+])C(=O)[O-],training,0
114 | O=C1NC(=O)c2c1c(-c1ccccc1)cc1[nH]c3ccc(O)cc3c21,training,0
115 | O=c1ccn([C@@H]2O[C@@H](CO)[C@@H](OP(=O)(O)O)[C@H]2F)c(=O)[nH]1,training,0
116 | OC(c1ccccc1)(c1ccccc1)C12CC[N+](CCOCc3ccccc3)(CC1)CC2,training,0
117 | NC(=O)NO,training,0
118 | CCOC(=O)[C@H](CCc1ccccc1)N[C@H]1CS[C@H](c2cccs2)CN(CC(=O)O)C1=O,training,0
119 | OC[C@H]1O[C@@H](n2cnc3cc(Cl)c(Cl)cc32)[C@H](O)[C@@H]1O,training,0
120 | CNCCNS(=O)(=O)c1cccc2cnccc12,training,0
121 | CCNC(=O)[C@H]1O[C@@H](n2cnc3c(N)nc(C#CCC4CCC(C(=O)OC)CC4)nc32)[C@H](O)[C@@H]1O,training,0
122 | O=C([O-])[O-],training,0
123 | CC[C@H](C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@H]21,training,0
124 | CC(C)(C)NCC(O)COc1cccc2c1CCC(=O)N2,training,0
125 | CC(C)[C@H](NC(=O)Cc1c[nH]c2ccccc12)C(=O)O,training,0
126 | CC1(C)Cc2c(-c3ccccc3)c(-c3ccc(Cl)cc3)c(CC(=O)O)n2C1,training,0
127 | NCC1CCC(C(=O)O)CC1,training,0
128 | O=C(N[C@@H]1Cc2ccccc2NC1=O)c1cc2cc(Cl)sc2[nH]1,training,0
129 | Cc1cccc(C)c1OCC(=O)N[C@@H](Cc1ccccc1)[C@@H](O)C[C@H](Cc1ccccc1)NC(=O)[C@H](C(C)C)N1CCCNC1=O,training,0
130 | CO[C@@]1(NC(=O)Cc2cccs2)C(=O)N2C(C(=O)O)=C(COC(N)=O)CS[C@@H]21,training,0
131 | CN(C)CCOC1=Cc2ccccc2Sc2ccc(Cl)cc21,training,1
132 | CN(C)CCc1c[nH]c2ccc(C[C@H]3COC(=O)N3)cc12,training,1
133 | O=C1Cc2cc(CCN3CCN(c4nsc5ccccc45)CC3)c(Cl)cc2N1,training,1
134 | CC(=O)CC(c1ccccc1)c1c(O)c2ccccc2oc1=O,training,1
135 | CC[C@]1(O)C[C@H]2CN(CCc3c([nH]c4ccccc34)[C@@](C(=O)OC)(c3cc4c(cc3OC)N(C=O)[C@H]3[C@@](O)(C(=O)OC)[C@H](OC(C)=O)[C@]5(CC)C=CCN6CC[C@]43[C@@H]65)C2)C1,training,1
136 | CN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1,training,1
137 | Nc1nc(N)c2nc(-c3ccccc3)c(N)nc2n1,training,1
138 | CN(C)CCOc1ccc(/C(=C(/CCCl)c2ccccc2)c2ccccc2)cc1,training,1
139 | Clc1ccc2nsnc2c1NC1=NCCN1,training,1
140 | c1ccc2[nH]c(-c3cscn3)nc2c1,training,1
141 | CSc1ccc2c(c1)N(CCC1CCCCN1C)c1ccccc1S2,training,1
142 | O=c1[nH]c(=O)n(C2CCCO2)cc1F,training,1
143 | Nc1c2c(nc3ccccc13)CCCC2,training,1
144 | CN[C@H]1CC[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc21,training,1
145 | CCCN1CCCC[C@H]1C(=O)Nc1c(C)cccc1C,training,1
146 | CS(=O)(=O)c1ccc(C2=C(c3ccccc3)C(=O)OC2)cc1,training,1
147 | CC(C)c1nc(CN(C)C(=O)N[C@H](C(=O)N[C@@H](Cc2ccccc2)C[C@H](O)[C@H](Cc2ccccc2)NC(=O)OCc2cncs2)C(C)C)cs1,training,1
148 | Nc1nc2ccc(OC(F)(F)F)cc2s1,training,1
149 | CO[C@H]1/C=C/O[C@@]2(C)Oc3c(C)c(O)c4c(O)c(c(/C=N/N5CCN(C)CC5)c(O)c4c3C2=O)NC(=O)/C(C)=C\C=C\[C@H](C)[C@H](O)[C@@H](C)[C@@H](O)[C@@H](C)[C@H](OC(C)=O)[C@@H]1C,training,1
150 | CO[C@H]1/C=C/O[C@@]2(C)Oc3c(C)c(O)c4c(c3C2=O)C2=NC3(CCN(CC(C)C)CC3)NC2=C(NC(=O)/C(C)=C\C=C\[C@H](C)[C@H](O)[C@@H](C)[C@@H](O)[C@@H](C)[C@H](OC(C)=O)[C@@H]1C)C4=O,training,1
151 | CC1=C(/C=C/C(C)=C/C=C/C(C)=C/CO)C(C)(C)CCC1,training,1
152 | C#CCN[C@@H]1CCc2ccccc21,training,1
153 | CN/C(=C\[N+](=O)[O-])NCCSCc1ccc(CN(C)C)o1,training,1
154 | C=C[C@H]1CN2CC[C@H]1C[C@H]2[C@H](O)c1ccnc2ccc(OC)cc12,training,1
155 | NC(=O)c1cnccn1,training,1
156 | CCCNCC(O)COc1ccccc1C(=O)CCc1ccccc1,training,1
157 | CC(=O)[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,training,1
158 | COc1cc(NC(C)CCCN)c2ncccc2c1,training,1
159 | O=C(C1CCCCC1)N1CC(=O)N2CCc3ccccc3C2C1,training,1
160 | Cc1ccc(=O)n(-c2ccccc2)c1,training,1
161 | O=c1[nH]c2ccccc2n1C1CCN(CCCC(c2ccc(F)cc2)c2ccc(F)cc2)CC1,training,1
162 | Cc1cc(=O)n(-c2ccccc2)n1C,training,1
163 | COc1ccnc(CS(=O)c2nc3ccc(OC(F)F)cc3[nH]2)c1OC,training,1
164 | O=C1O[Pt]2([N+][C@@H]3CCCC[C@H]3[N+]2)OC1=O,training,1
165 | COc1ccc2nc(S(=O)Cc3ncc(C)c(OC)c3C)[nH]c2c1,training,1
166 | Cc1cc2c(s1)Nc1ccccc1N=C2N1CCN(C)CC1,training,1
167 | CNCCC=C1c2ccccc2CCc2ccccc21,training,1
168 | CN1CCC[C@H]1c1cccnc1,training,1
169 | CN1CCN2c3ccccc3Cc3ccccc3C2C1,training,1
170 | CC1=CC(=O)c2ccccc2C1=O,training,1
171 | COc1ccc2[nH]cc(CCNC(C)=O)c2c1,training,1
172 | CNCCCC12CCC(c3ccccc31)c1ccccc12,training,1
173 | CCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCNC(C)C3)c(F)c21,training,1
174 | CCCCN1CCCC[C@H]1C(=O)Nc1c(C)cccc1C,training,1
175 | CC(C)Cn1cnc2c(N)nc3ccccc3c21,training,1
176 | Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1,training,1
177 | COC1=C(OC)C(=O)C(CCCCCCCCCCO)=C(C)C1=O,training,1
178 | CN1C(=O)NC(=O)C(C)(C2=CCCCC2)C1=O,training,1
179 | COC1=CC(=O)C[C@@H](C)[C@]12Oc1c(Cl)c(OC)cc(OC)c1C2=O,training,1
180 | CN1[C@H]2CCC[C@@H]1CC(NC(=O)c1nn(C)c3ccccc13)C2,training,1
181 | CN[C@@H]1CCc2[nH]c3ccc(C(N)=O)cc3c2C1,training,1
182 | CNCCC(Oc1ccc(C(F)(F)F)cc1)c1ccccc1,training,1
183 | O=c1[nH]cc(F)c(=O)[nH]1,training,1
184 | Fc1ccc(C(c2ccc(F)cc2)N2CCN(C/C=C/c3ccccc3)CC2)cc1,training,1
185 | O=C(NCC1CCCCN1)c1cc(OCC(F)(F)F)ccc1OCC(F)(F)F,training,1
186 | COc1cc([C@@H]2c3cc4c(cc3[C@@H](O[C@@H]3O[C@@H]5CO[C@@H](C)O[C@H]5[C@H](O)[C@H]3O)[C@H]3COC(=O)[C@H]23)OCO4)cc(OC)c1O,training,1
187 | CCO,training,1
188 | C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CC[C@@H]2O,training,1
189 | CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(O)C[C@@H](C)C(=O)[C@H](C)[C@@H](O)[C@]1(C)O,training,1
190 | CNCC[C@H](Oc1cccc2ccccc12)c1cccs1,training,1
191 | CN(C)CCC=C1c2ccccc2COc2ccccc21,training,1
192 | CCN(CC)C(=S)SSC(=S)N(CC)CC,training,1
193 | CCCCC[C@H](O)/C=C/[C@H]1[C@H](O)CC(=O)[C@@H]1C/C=C\CCCC(=O)O,training,1
194 | CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,training,1
195 | COc1cccc2c1C(=O)c1c(O)c3c(c(O)c1C2=O)C[C@@](O)(C(C)=O)C[C@@H]3O[C@H]1C[C@H](N)[C@H](O)[C@H](C)O1,training,1
196 | CN(C)N=Nc1[nH]cnc1C(N)=O,training,1
197 | CN(C)CCC=C1c2ccccc2C=Cc2ccccc21,training,1
198 | COC(=O)[C@H](c1ccccc1Cl)N1CCc2sccc2C1,training,1
199 | COc1cc(N)c(Cl)cc1C(=O)N[C@@H]1CCN(CCCOc2ccc(F)cc2)C[C@@H]1OC,training,1
200 | COc1ccccc1OCCNCC(O)COc1cccc2[nH]c3ccccc3c12,training,1
201 | O=NN(CCCl)C(=O)NCCCl,training,1
202 | NC(=O)N1c2ccccc2C=Cc2ccccc21,training,1
203 | CCCCN1CCCCC1C(=O)Nc1c(C)cccc1C,training,1
204 | O=C1CN=C(c2ccccn2)c2cc(Br)ccc2N1,training,1
205 | CN(C)CCCOc1nn(Cc2ccccc2)c2ccccc12,training,1
206 | CN1CCCC(n2nc(Cc3ccc(Cl)cc3)c3ccccc3c2=O)CC1,training,1
207 | C[C@@H](O[C@H]1OCCN(Cc2n[nH]c(=O)[nH]2)[C@H]1c1ccc(F)cc1)c1cc(C(F)(F)F)cc(C(F)(F)F)c1,training,1
208 | O=C1CN2Cc3c(ccc(Cl)c3Cl)N=C2N1,training,1
209 | CN(C)CCC=C1c2ccccc2CCc2ccccc21,training,1
210 | Cc1c(N(C)C)c(=O)n(-c2ccccc2)n1C,training,1
211 | Cc1[nH]cnc1CN1CCc2c(c3ccccc3n2C)C1=O,training,1
212 | CCCSc1ccc2nc(NC(=O)OC)[nH]c2c1,training,1
213 | C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1,training,1
214 | Clc1cccc(Cl)c1NC1=NCCN1,training,1
215 | CC/C(=C(\c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1,training,1
216 | N=C(N)c1ccc(OCCCCCOc2ccc(C(=N)N)cc2)cc1,training,1
217 | CC(c1cc2ccccc2s1)N(O)C(N)=O,training,1
218 | CN(C)CCc1c[nH]c2ccc(CS(=O)(=O)N3CCCC3)cc12,training,1
219 | CCC(=O)NCC[C@@H]1CCc2ccc3c(c21)CCO3,training,1
220 | C[C@@H](NCCCc1cccc(C(F)(F)F)c1)c1cccc2ccccc12,training,1
221 | O=C1CCC(N2C(=O)c3ccccc3C2=O)C(=O)N1,training,1
222 | COc1ccc([C@@H]2CC(=O)c3c(O)cc(O)cc3O2)cc1O,training,1
223 | Cc1oncc1C(=O)Nc1ccc(C(F)(F)F)cc1,training,1
224 | COC(=O)C1=C(C)NC(C)=C(C(=O)OC)C1c1ccccc1[N+](=O)[O-],training,1
225 | O=C1CCc2cc(OCCCCc3nnnn3C3CCCCC3)ccc2N1,training,1
226 | Cc1nc(Nc2ncc(C(=O)Nc3c(C)cccc3Cl)s2)cc(N2CCN(CCO)CC2)n1,training,1
227 | Cn1c(=O)c2[n-]cnc2n(C)c1=O,training,1
228 | CN(C)S(=O)(=O)c1ccc2c(c1)N(CCCN1CCC(CCO)CC1)c1ccccc1S2,training,1
229 | Cc1ccc(-c2ncc(Cl)cc2-c2ccc(S(C)(=O)=O)cc2)cn1,training,1
230 | C[C@H]1CNCCc2ccc(Cl)cc21,training,1
231 | CCCC(=O)OCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)C1c1cccc(Cl)c1Cl,training,1
232 | CCCN(CCc1cccs1)[C@H]1CCc2c(O)cccc2C1,training,1
233 | CC1=NN(c2ccc(C)c(C)c2)C(=O)/C1=N\Nc1cccc(-c2cccc(C(=O)O)c2)c1O,training,1
234 | CN1CC2c3ccccc3Oc3ccc(Cl)cc3C2C1,training,1
235 | Cc1ccc(Nc2nccc(N(C)c3ccc4c(C)n(C)nc4c3)n2)cc1S(N)(=O)=O,training,1
236 | Cn1c(CCCC(=O)O)nc2cc(N(CCCl)CCCl)ccc21,training,1
237 | CC(=O)[C@@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CCC4=C3[C@@H](c3ccc(N(C)C)cc3)C[C@@]21C,training,1
238 | CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21,training,0
239 | CC(C)CN(C[C@@H](O)[C@H](Cc1ccccc1)NC(=O)O[C@H]1CCOC1)S(=O)(=O)c1ccc(N)cc1,training,0
240 | C=C(C)[C@@H]1CC[C@@]2(C)O[C@H]2C1,training,0
241 | CC(C)(CO)[C@@H](O)C(=O)NCCC(=O)N/C=C/S,training,0
242 | CCCOc1ccc(S(=O)(=O)NCCC2CCCN2C)cc1-c1nc(=O)c2c([nH]1)c(CCC)nn2C,training,0
243 | C=C[C@H](N)C(=O)O,training,0
244 | CC(=O)OC[C@@H]1CS[C@H]2[C@H](NC(=O)CCCC[C@@H](NC(=O)C[N+])C(=O)[O-])C(=O)N2[C@H]1C(=O)[O-],training,0
245 | CCCCCCCCCCS,training,0
246 | CC(C)N1CCC(NC(=O)c2cc3ccccc3n2CC(=O)Nc2ccc(Cl)cc2)CC1,training,0
247 | N[C@@H](CP(=O)(O)O)C(=O)O,training,0
248 | COc1c(C(=O)NC2CCN3CCCC2C3)ccc(N)c1Cl,training,0
249 | Cc1cnc(NCC(F)(F)c2ccccn2)c(=O)n1CC(=O)NCc1ncccc1F,training,0
250 | NS(=O)(=O)c1cc(C(=O)O)cc(N2CCCC2)c1Oc1ccccc1,training,0
251 | COCCNS(=O)(=O)c1ccc(Nc2nccc(-c3cnc(C)n3C(C)C)n2)cc1,training,0
252 | COc1cc(C2c3cc4c(cc3C(O)C(CO)C2C(=O)O)OCO4)cc(OC)c1OC,training,0
253 | O=c1[nH]cnc2sc3c(c12)CCCC3,training,0
254 | Cc1cc(O)c(C(=O)N[C@@H](C(=O)N[C@@H]2C(=O)N3C(C(=O)O)=C(CSc4nnnn4C)CS[C@H]23)c2ccc(O)cc2)cn1,training,0
255 | CC[C@H]1OC(=O)[C@H](C)C(=O)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(OC)C[C@@H](C)C(=O)[C@H](C)[C@H]2N(CCCCn3cnc(-c4cccnc4)c3)C(=O)O[C@]12C,training,1
256 | CCOC(=O)CC(SP(=S)(OC)OC)C(=O)OCC,training,1
257 | O=c1[nH]c2ccccc2n1CCCN1CCC(n2c(=O)[nH]c3cc(Cl)ccc32)CC1,training,1
258 | CN1CCN(C2=Nc3cc(Cl)ccc3Nc3ccccc32)CC1,training,1
259 | C(=Cc1ccccc1)CN1CCN(C(c2ccccc2)c2ccccc2)CC1,training,1
260 | O=c1[nH]c2cc(Cl)ccc2o1,training,1
261 | CC(C)C[C@H](NC(=O)[C@H](Cc1ccccc1)NC(=O)c1cnccn1)B(O)O,training,1
262 | CC(C)NCC(O)COc1ccc(CCOCC2CC2)cc1,training,1
263 | COc1cc(C(C)=O)ccc1OCCCN1CCC(c2noc3cc(F)ccc23)CC1,training,1
264 | CCOc1ccc(Cc2cc([C@@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@H]3O)ccc2Cl)cc1,training,1
265 | COc1ccc(-n2nc(C(N)=O)c3c2C(=O)N(c2ccc(N4CCCCC4=O)cc2)CC3)cc1,training,1
266 | CN=C(O)c1ccccc1Sc1ccc2c(/C=C/c3ccccn3)[nH]nc2c1,training,1
267 | Nc1cccc2c1C(=O)N(C1CCC(=O)NC1=O)C2=O,training,1
268 | c1cc(Nc2cc(C3CC3)n[nH]2)nc(Nc2ccc3[nH]cnc3c2)n1,training,0
269 | C=C(O)C1=C(C)C2=Cc3c(C)c(CCC(=O)O)c4n3[Fe@]35n6c(c(C)c(C(C)=O)c6=CC6=[N+]3C(=C4)C(CCC(=O)O)=C6C)=CC1=[N+]25,training,0
270 | Cn1c(=O)ccc2ccccc21,training,0
271 | CN[C@H]1[C@@H](O)[C@@H](NC)[C@H](O)[C@H]2O[C@@H]3O[C@H](C)CC(=O)[C@]3(O)O[C@H]12,training,0
272 | O=c1[nH]c(=O)n(COCCO)c(O)c1Cc1cccc(OCc2ccccc2)c1,training,0
273 | CC#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CCC4=C3[C@@H](c3ccc(N(C)C)cc3)C[C@@]21C,training,0
274 | [N+]CCCC[C@H](N)C(N)=O,training,0
275 | CC(=O)N/C(=C\c1ccc(CC(=O)O)c(C=O)c1)C(=O)N[C@@H]1CCCCN(Cc2ccc(-c3ccccc3)cc2)C1=O,training,0
276 | C=CC1=C(C)C2=Cc3c(C=C)c(C)c4n3[Fe]35n6c(c(C)c(CCC(=O)O)c6=C(c6ccccc6)C6=[N+]3C(=C4)C(C)=C6CCC(=O)O)=CC1=[N+]25,training,0
277 | Cc1cc(C(=O)N[C@@H](C)C(=O)N[C@H](C(=O)N[C@@H](CC(C)C)C(=O)N[C@H](/C=C/C(=O)OCc2ccccc2)C[C@@H]2CCNC2=O)C(C)C)no1,training,0
278 | CC/C=C/C[C@@H]1C(=O)C=C[C@H]1CCCCCCCC(=O)O,training,0
279 | CC(C(O)c1ccc(O)cc1)N1CCC(Cc2ccccc2)CC1,training,0
280 | CC1=C(C(=O)O)N[C@H]([C@H](NC(=O)Cc2cccs2)C(=O)O)SC1,training,0
281 | CNc1nc[nH]c2c([C@@H]3O[C@@H](CO)[C@H](O)[C@H]3O)nnc1-2,training,0
282 | O=C(OCCN1CCOCC1)c1cccnc1Nc1cccc(C(F)(F)F)c1,training,0
283 | O[C@@H]1Cc2c(ccc3ccc4ccccc4c23)[C@@H](O)[C@@H]1O,training,0
284 | COc1ccnc2[nH]cc(-c3ccnc(N)n3)c12,training,0
285 | CN(C)C[C@H](O)COc1ccc(Nc2cc(Nc3c(F)cccc3F)ncn2)cc1,training,0
286 | Cc1ccc(-c2nc3ccc(C)cn3c2CC(=O)N(C)C)cc1,training,1
287 | COc1ccc(CCN(C)CCCC(C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC,training,1
288 | CN1CCN(CC/C=C2/c3ccccc3Sc3ccc(S(=O)(=O)N(C)C)cc32)CC1,training,1
289 | CC1CN(c2cc3c(cc2F)c(=O)c(C(=O)O)cn3-c2ccc(F)cc2F)CCN1,training,1
290 | CCCN(CCC)CCc1cccc2c1CC(=O)N2,training,1
291 | OCCN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1,training,1
292 | Cc1nccn1CC1CCc2c(c3ccccc3n2C)C1=O,training,1
293 | CN1CCN2c3ncccc3Cc3ccccc3C2C1,training,1
294 | COC(F)(F)C(Cl)Cl,training,1
295 | O=C(CCCN1CCC(O)(c2ccc(Cl)cc2)CC1)c1ccc(F)cc1,training,1
296 | CN1C(=O)CN=C(c2ccccc2F)c2cc([N+](=O)[O-])ccc21,training,1
297 | CN1C[C@H](C(=O)N[C@]2(C)O[C@@]3(O)[C@@H]4CCCN4C(=O)[C@H](Cc4ccccc4)N3C2=O)C=C2c3cccc4[nH]cc(c34)C[C@H]21,training,1
298 | CCCCc1oc2ccccc2c1C(=O)c1cc(I)c(OCCN(CC)CC)c(I)c1,training,1
299 | O=C1c2cccc3c2[C@H](CCC3)CN1[C@@H]1CN2CCC1CC2,training,1
300 | CN(C)CCc1c[nH]c2ccc(Cn3cncn3)cc12,training,1
301 | CC(=O)CC(c1ccc([N+](=O)[O-])cc1)c1c(O)c2ccccc2oc1=O,training,1
302 | CNCc1ccc(-c2[nH]c3cc(F)cc4c3c2CCNC4=O)cc1,training,1
303 | O=C(NO)c1ccccc1O,test,0
304 | O=C(O)C(=O)/C=C/c1ccccc1O,test,0
305 | O=C(O)[C@@H](CS)CCCc1ccccc1,test,0
306 | CC(C)Cc1ccc(C(C)C(O)=NO)cc1,test,0
307 | C[C@](N)(C(=O)O)c1ccc(C(=O)O)cc1,test,0
308 | CC(C)c1cccc(C(C)C)c1O,test,1
309 | CC(C)/N=C(\N)N=C(N)Nc1ccc(Cl)cc1,test,1
310 | CNNCc1ccc(C(=O)NC(C)C)cc1,test,1
311 | CCOc1ccc(NC(C)=O)cc1,test,1
312 | COCCCCC(=NOCCN)c1ccc(C(F)(F)F)cc1,test,1
313 | CCN[C@@H](C)Cc1cccc(C(F)(F)F)c1,test,1
314 | CC(C)(C)NCC(O)c1cc(Cl)c(N)c(Cl)c1,test,1
315 | COc1cc(CNC(=O)CCCC/C=C/C(C)C)ccc1O,test,1
316 | CC(NC(C)(C)C)C(=O)c1cccc(Cl)c1,test,1
317 | OCc1ccccc1,test,1
318 | CC(C)OC(=O)c1cc(NC(=S)OC(C)C)ccc1Cl,test,0
319 | CO[C@H]1[C@@H](O)O[C@@H](C)[C@H](O)[C@H]1O,test,0
320 | Oc1ccc2ccccc2c1O,test,0
321 | CC(C)NCC(O)COc1cccc2ccccc12,test,1
322 | Cn1c(=O)c2[nH]cnc2n(C)c1=O,test,1
323 | Cn1cnc2c1c(=O)[nH]c(=O)n2C,test,1
324 | CC(=O)CCCCn1c(=O)c2c(ncn2C)n(C)c1=O,test,1
325 | CC[C@H](OC(C)=O)C(C[C@@H](C)N(C)C)(c1ccccc1)c1ccccc1,test,0
326 | CCN(CC)CCOC(=O)C(O)(c1ccccc1)c1ccccc1,test,0
327 | CC[C@H](OC(C)=O)C(C[C@H](C)N(C)C)(c1ccccc1)c1ccccc1,test,0
328 | CCC(=O)C(CC(C)N(C)C)(c1ccccc1)c1ccccc1,test,1
329 | O=C(OCCOCCO)c1ccccc1Nc1cccc(C(F)(F)F)c1,test,0
330 | Cc1ccc(Nc2c(F)cccc2Cl)c(CC(=O)O)c1,test,1
331 | CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21,test,1
332 | CN(C)CCCN1c2ccccc2Sc2ccccc21,test,1
333 | CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21,test,1
334 | C[C@]12CC[C@@H]3c4ccc(OS(=O)(=O)[O-])cc4CC[C@H]3[C@@H]1CCC2=O,test,1
335 | O=C(O)C(=O)Cc1ccc(O)cc1,val,0
336 | N[C@@H](Cc1ccccc1)C(=O)O,val,0
337 | N[C@@H](CCC(=O)N[C@H](CSc1ccc([N+](=O)[O-])cc1[N+](=O)[O-])C(=O)NCC(=O)O)C(=O)O,val,0
338 | N[C@@H](CCC(=O)N[C@H](CS[C@H](O)N(O)c1ccc(Br)cc1)C(=O)NCC(=O)O)C(=O)O,val,0
339 | COC[C@@H](NC(C)=O)C(=O)NCc1ccccc1,val,0
340 | O=C(CCCl)NCc1ccccc1,val,0
341 | CC(Cc1ccccc1)NN,val,0
342 | C=CC(=O)c1ccc(CCCCCC)cc1,val,0
343 | CC(=O)Nc1ccc(O)cc1,val,1
344 | CCN(CC)CC(=O)Nc1c(C)cccc1C,val,1
345 | CC(C)C(=O)Nc1ccc([N+](=O)[O-])c(C(F)(F)F)c1,val,1
346 | NCCc1ccc(O)c(O)c1,val,1
347 | NC(N)=NN=Cc1c(Cl)cccc1Cl,val,1
348 | CC(C)C[C@H](N=C(O)CN=C(O)c1cc(Cl)ccc1Cl)B(O)O,val,1
349 | NS(=O)(=O)c1ccc(CCC(=O)O)cc1,val,0
350 | C#CCN(C)C(C)Cc1ccccc1,val,1
351 | Cc1cccc(C)c1OCC(C)N,val,1
352 | C1CCOCC1,val,0
353 | CC(=O)N[C@H]1[C@H](O)C[C@H](P(=O)(O)O)O[C@H]1[C@H](O)[C@@H](O)CO,val,0
354 | O=C(O)[C@@H]1C[C@H](O)[C@@H](O)[C@H](O)O1,val,0
355 | CC(=O)N[C@@H](Cc1cccc2ccccc12)[B-](O)(O)O,val,0
356 | CN(C/C=C/C#CC(C)(C)C)Cc1cccc2ccccc12,val,1
357 | COc1ccc2cc(CCC(C)=O)ccc2c1,val,1
358 | COc1ccc2cc([C@H](C)C(=O)O)ccc2c1,val,1
359 | Cn1c(=O)c2c(ncn2C)n(C)c1=O,val,1
360 | CN(C)CCC(O)(c1ccccc1)c1ccccc1Cl,val,0
361 | CN(C)CCOC(c1ccccc1)c1ccccc1,val,1
362 | O=C(O)Cc1ccccc1Nc1c(Cl)cccc1Cl,val,1
363 | CC(CN(C)C)CN1c2ccccc2Sc2ccc(C#N)cc21,val,1
364 | CN(C)CCCN1c2ccccc2CCc2ccccc21,val,1
365 | CNCCCN1c2ccccc2CCc2ccccc21,val,1
366 | C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CCC2=O,val,1
367 | C[C@]12CC[C@@H]3c4ccc(OS(=O)(=O)O)cc4CC[C@H]3[C@@H]1CCC2=O,val,1
368 |
--------------------------------------------------------------------------------
/data/ADMETlab_data/DILI_canonical.csv:
--------------------------------------------------------------------------------
1 | smiles,group,DILI
2 | CC(=O)OCC[N+](C)(C)C,training,0
3 | C[N+](C)(C)CC(=O)[O-],training,0
4 | NCCCCCC(=O)O,training,0
5 | CS(C)=O,training,0
6 | CC(C)(CO)C(O)C(=O)NCCC(=O)O,training,0
7 | NC(=O)c1cnccn1,training,1
8 | Cc1ncc(C[n+]2csc(CCO)c2C)c(N)n1,training,0
9 | O=C1NC(=O)C(c2ccccc2)(c2ccccc2)N1,training,1
10 | Nc1c2c(nc3ccccc13)CCCC2,training,0
11 | CCCSc1ccc2nc(NC(=O)OC)[nH]c2c1,training,1
12 | NCCCC(O)(P(=O)(O)O)P(=O)(O)O,training,0
13 | NC12CC3CC(CC(C3)C1)C2,training,0
14 | NCCCNCCSP(=O)(O)O,training,0
15 | CCC1(c2ccc(N)cc2)CCC(=O)NC1=O,training,0
16 | CN(C)CCC=C1c2ccccc2CCc2ccccc21,training,0
17 | CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O,training,1
18 | COc1cc(NS(C)(=O)=O)ccc1Nc1c2ccccc2nc2ccccc12,training,1
19 | COc1ccc(CCN2CCC(Nc3nc4ccccc4n3Cc3ccc(F)cc3)CC2)cc1,training,0
20 | O=C(O)COc1nn(Cc2ccccc2)c2ccccc12,training,1
21 | CCc1oc2ccccc2c1C(=O)c1cc(Br)c(O)c(Br)c1,training,1
22 | CC(Cc1ccccc1)N(C)Cc1ccccc1,training,0
23 | CC(C[N+](C)(C)C)OC(N)=O,training,0
24 | OC(CCN1CCCCC1)(c1ccccc1)C1CC2C=CC1C2,training,0
25 | Oc1c(Cl)cc(Cl)cc1Sc1cc(Cl)cc(Cl)c1O,training,1
26 | CCCCN1CCCCC1C(=O)Nc1c(C)cccc1C,training,0
27 | O=C1CC2(CCCC2)CC(=O)N1CCCCN1CCN(c2ncccn2)CC1,training,0
28 | CS(=O)(=O)OCCCCOS(C)(=O)=O,training,1
29 | COc1ccc(CCN(C)CCCC(C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC,training,1
30 | NC(=O)N1c2ccccc2C=Cc2ccccc21,training,1
31 | CCN(CC)CCOCCOC(=O)C1(c2ccccc2)CCCC1,training,0
32 | CCCC(C)(COC(N)=O)COC(=O)NC(C)C,training,0
33 | CC1=C(C(=O)O)N2C(=O)C(NC(=O)C(N)c3ccc(O)cc3)C2SC1,training,1
34 | Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1,training,1
35 | OC(O)C(Cl)(Cl)Cl,training,0
36 | O=c1[nH]c2cc(Cl)ccc2o1,training,1
37 | Cc1cc(C2CCCCC2)n(O)c(=O)c1,training,0
38 | O=C1CCc2cc(OCCCCc3nnnn3C3CCCCC3)ccc2N1,training,0
39 | O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc2c1=O,training,1
40 | Clc1cccc(Cl)c1NC1=NCCN1,training,1
41 | CN1CCN(C2=c3ccccc3=Nc3ccc(Cl)cc3N2)CC1,training,1
42 | CN1CCC(=C2c3ccccc3C=Cc3ccccc32)CC1,training,1
43 | O=C1CN(N=Cc2ccc(-c3ccc([N+](=O)[O-])cc3)o2)C(=O)N1,training,1
44 | Nc1ccc(S(=O)(=O)c2ccc(N)cc2)cc1,training,1
45 | CC(=O)N(O)CCCCCNC(=O)CCC(=O)N(O)CCCCCNC(=O)CCC(=O)N(O)CCCCCN,training,0
46 | CNCCCN1c2ccccc2CCc2ccccc21,training,0
47 | CCN(CC)CCOC(=O)C1(C2CCCCC2)CCCCC1,training,0
48 | O=C(O)c1cc(-c2ccc(F)cc2F)ccc1O,training,1
49 | OCC(S)CS,training,1
50 | OCCN(CCO)c1nc(N2CCCCC2)c2nc(N(CCO)CCO)nc(N3CCCCC3)c2n1,training,1
51 | CCN(CC)C(=S)SSC(=S)N(CC)CC,training,1
52 | O=C(CCCN1CC=C(n2c(=O)[nH]c3ccccc32)CC1)c1ccc(F)cc1,training,0
53 | CC(O)(P(=O)(O)O)P(=O)(O)O,training,0
54 | CCc1cccc2c3c([nH]c12)C(CC)(CC(=O)O)OCC3,training,1
55 | CCC(=O)N(c1ccccc1)C1CCN(CCc2ccccc2)CC1,training,0
56 | CC(C)(C(=O)O)c1ccc(C(O)CCCN2CCC(C(O)(c3ccccc3)c3ccccc3)CC2)cc1,training,0
57 | O=C(COc1ccc(Cl)cc1)N1CCN(Cc2ccc3c(c2)OCO3)CC1,training,1
58 | Nc1[nH]c(=O)ncc1F,training,1
59 | O=c1[nH]cc(F)c(=O)[nH]1,training,1
60 | CNCCC(Oc1ccc(C(F)(F)F)cc1)c1ccccc1,training,0
61 | O=C1OCCN1N=Cc1ccc([N+](=O)[O-])o1,training,1
62 | NS(=O)(=O)c1cc(C(=O)O)c(NCc2ccco2)cc1Cl,training,1
63 | CNC(C)C1CCC(N)C(OC2C(N)CC(N)C(OC3OCC(C)(O)C(NC)C3O)C2O)O1,training,0
64 | O=C(OCC(O)CO)c1ccccc1Nc1ccnc2cc(Cl)ccc12,training,1
65 | NC(N)=NCCN1CCCCCCC1,training,0
66 | NS(=O)(=O)c1cc2c(cc1Cl)NCNS2(=O)=O,training,1
67 | OCCOCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1,training,0
68 | COc1ccc2c(c1)c(CC(=O)O)c(C)n2C(=O)c1ccc(Cl)cc1,training,1
69 | CC(=O)N(CC(O)CN(C(C)=O)c1c(I)c(C(=O)NCC(O)CO)c(I)c(C(=O)NCC(O)CO)c1I)c1c(I)c(C(=O)NCC(O)CO)c(I)c(C(=O)NCC(O)CO)c1I,training,0
70 | Cc1cc(C(=O)NNCc2ccccc2)no1,training,1
71 | CC(COc1ccccc1)NC(C)C(O)c1ccc(O)cc1,training,0
72 | CNC1(c2ccccc2Cl)CCCCC1=O,training,0
73 | O=C(c1ccccc1)c1ccc2n1CCC2C(=O)O,training,1
74 | Cc1oncc1C(=O)Nc1ccc(C(F)(F)F)cc1,training,1
75 | N#Cc1ccc(C(c2ccc(C#N)cc2)n2cncn2)cc1,training,1
76 | CCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCNC(C)C3)c(F)c21,training,1
77 | CCOC(=O)N1CCC(=C2c3ccc(Cl)cc3CCc3cccnc32)CC1,training,0
78 | CCCCc1nc(Cl)c(CO)n1Cc1ccc(-c2ccccc2-c2nn[nH]n2)cc1,training,1
79 | CNCCCC12CCC(c3ccccc31)c1ccccc12,training,0
80 | COC(=O)Nc1nc2ccc(C(=O)c3ccccc3)cc2[nH]1,training,1
81 | Cc1cccc(CN2CCN(C(c3ccccc3)c3ccc(Cl)cc3)CC2)c1,training,0
82 | OC(c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)C1CCCCN1,training,0
83 | CN1CCCCC1CCN1c2ccccc2Sc2ccc(S(C)=O)cc21,training,1
84 | Cc1ncc([N+](=O)[O-])n1CCO,training,1
85 | N=c1nc(N2CCCCC2)cc(N)n1O,training,0
86 | O=C1c2c(O)ccc(O)c2C(=O)c2c(NCCNCCO)ccc(NCCNCCO)c21,training,1
87 | NCC1OC(OC2C(CO)OC(OC3C(O)C(N)CC(N)C3OC3OC(CN)C(O)C(O)C3N)C2O)C(N)C(O)C1O,training,0
88 | Cc1ccnc2c1NC(=O)c1cccnc1N2C1CC1,training,1
89 | O=C(CCNNC(=O)c1ccncc1)NCc1ccccc1,training,1
90 | COC(=O)C1=C(C)NC(C)=C(C(=O)OCCN(C)Cc2ccccc2)C1c1cccc([N+](=O)[O-])c1,training,1
91 | COC(=O)C1=C(C)NC(C)=C(C(=O)OC)C1c1ccccc1[N+](=O)[O-],training,1
92 | COCCOC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)C)C1c1cccc([N+](=O)[O-])c1,training,1
93 | CN1Cc2c(N)cccc2C(c2ccccc2)C1,training,1
94 | CCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCNCC3)cc21,training,1
95 | O=C(O)CCc1nc(-c2ccccc2)c(-c2ccccc2)o1,training,1
96 | CCCOc1ccc2nc(NC(=O)OC)[nH]c2c1,training,1
97 | CCN(CC)CC#CCOC(=O)C(O)(c1ccccc1)C1CCCCC1,training,0
98 | NCCC(O)(P(=O)(O)O)P(=O)(O)O,training,0
99 | CC(C)(CO)C(O)C(=O)NCCCO,training,0
100 | COc1ccc(Cc2nccc3cc(OC)c(OC)cc23)cc1OC,training,1
101 | NC1=NC(=O)C(c2ccccc2)O1,training,1
102 | C1CCC(C(CC2CCCCN2)C2CCCCC2)CC1,training,1
103 | CC(COc1ccccc1)N(CCCl)Cc1ccccc1,training,0
104 | CCCCC1C(=O)N(c2ccccc2)N(c2ccccc2)C1=O,training,1
105 | CC(C)NCC(O)COc1cccc2[nH]ccc12,training,0
106 | O=C(C1CCCCC1)N1CC(=O)N2CCc3ccccc3C2C1,training,0
107 | CCC1(c2ccccc2)C(=O)NCNC1=O,training,0
108 | CC(C)(Sc1cc(C(C)(C)C)c(O)c(C(C)(C)C)c1)Sc1cc(C(C)(C)C)c(O)c(C(C)(C)C)c1,training,0
109 | CN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1,training,1
110 | OC(CCN1CCCC1)(c1ccccc1)C1CCCCC1,training,0
111 | CCCNCC(O)COc1ccccc1C(=O)CCc1ccccc1,training,0
112 | CNCCCC1c2ccccc2C=Cc2ccccc21,training,0
113 | CN(C)C(=O)Oc1ccc[n+](C)c1,training,0
114 | CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1,training,1
115 | Cc1nc2n(c(=O)c1CCN1CCC(c3noc4cc(F)ccc34)CC1)CCCC2,training,1
116 | CN1C2CC(OC(=O)C(CO)c3ccccc3)CC1C1OC12,training,0
117 | C[N+](C)(C)CCOC(=O)CCC(=O)OCC[N+](C)(C)C,training,0
118 | Cc1nnc(NS(=O)(=O)c2ccc(N)cc2)s1,training,1
119 | Nc1ccc(S(=O)(=O)Nc2nccs2)cc1,training,1
120 | CC1=C(CC(=O)O)c2cc(F)ccc2C1=Cc1ccc(S(C)=O)cc1,training,1
121 | Cn1nnc2c(C(N)=O)ncn2c1=O,training,0
122 | CC(C)(C)c1ccc(C(O)CCCN2CCC(C(O)(c3ccccc3)c3ccccc3)CC2)cc1,training,0
123 | Clc1ccccc1CN1CCc2sccc2C1,training,1
124 | Cc1ccc(S(=O)(=O)NC(=O)NN2CCCCCC2)cc1,training,1
125 | Cc1ccc(C(=O)c2ccc(CC(=O)O)n2C)cc1,training,1
126 | O=c1n(CCCN2CCN(c3cccc(Cl)c3)CC2)nc2ccccn12,training,1
127 | OC(CCN1CCCCC1)(c1ccccc1)C1CCCCC1,training,0
128 | COc1cc(Cc2cnc(N)nc2N)cc(OC)c1OC,training,1
129 | Cc1c(C)c2c(c(C)c1O)CCC(C)(COc1ccc(CC3SC(=O)NC3=O)cc1)O2,training,1
130 | COc1cc(C(=O)NS(=O)(=O)c2ccccc2C)ccc1Cc1cn(C)c2ccc(NC(=O)OC3CCCC3)cc12,training,1
131 | NS(=O)(=O)Cc1noc2ccccc12,training,1
132 | COC12C(COC(N)=O)C3=C(C(=O)C(C)=C(N)C3=O)N1CC1NC12,training,0
133 | CC[N+](C)(CC)CCOC(=O)C(O)(c1ccccc1)C1CCCCC1,training,0
134 | CC12CCC3c4ccc(O)cc4CCC3C1CCC2O,training,0
135 | O=c1[nH]c(=O)n(C2CC(O)C(CO)O2)cc1F,training,1
136 | CC12COC(=O)CC1CCC1C2CCC2(C)C1CCC2(C)O,training,1
137 | CC(=O)Nc1ccc2c(c1)Cc1ccccc1-2,training,1
138 | O=c1[nH]c(=O)n(C2CC(O)C(CO)O2)cc1I,training,1
139 | CCC1C(=O)OCC1Cc1cncn1C,training,0
140 | CNC(=O)Oc1ccc2c(c1)C1(C)CCN(C)C1N2C,training,0
141 | CC(=O)OCC1=C(C(=O)O)N2C(=O)C(NC(=O)Cc3cccs3)C2SC1,training,1
142 | NCC1OC(OC2C(N)CC(N)C(OC3OC(CO)C(O)C(N)C3O)C2O)C(O)C(O)C1O,training,0
143 | Cc1onc(-c2ccccc2Cl)c1C(=O)NC1C(=O)N2C1SC(C)(C)C2C(=O)O,training,1
144 | OCC1OC(OC2C(CO)OC(O)C(O)C2O)C(O)C(O)C1O,training,0
145 | CSCCC(N)C(=O)O,training,0
146 | COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1C(NC(C)=O)CC2,training,0
147 | C=C1CCC(O)CC1=CC=C1CCCC2(C)C1CCC2C(C)CCCC(C)C,training,0
148 | NC(N)=NCCCC(N)C(=O)O,training,0
149 | CN1CCN(C(c2ccccc2)c2ccccc2)CC1,training,0
150 | c1ccc2c(c1)Nc1ccccc1S2,training,1
151 | CCC(CO)NC(=O)C1C=C2c3cccc4[nH]cc(c34)CC2N(C)C1,training,0
152 | O=C(O)c1cc(-c2ccccc2)nc2ccccc12,training,1
153 | CCOC(=O)C1(c2ccccc2)CCN(CCc2ccc(N)cc2)CC1,training,0
154 | CC(=O)CC(c1ccc([N+](=O)[O-])cc1)c1c(O)oc2ccccc2c1=O,training,1
155 | COC1C(OC(N)=O)C(O)C(Oc2ccc3c(=O)c(NC(=O)c4ccc(O)c(CC=C(C)C)c4)c(O)oc3c2C)OC1(C)C,training,1
156 | Nc1ncn(C2OC(CO)C(O)C2O)c(=O)n1,training,1
157 | CC12CC(=CO)C(=O)CC1CCC1C2CCC2(C)C1CCC2(C)O,training,1
158 | C[N+](C)(C)CC(O)CC(=O)[O-],training,0
159 | OCC1OC(OC2C(CO)OC(O)(CO)C2O)C(O)C(O)C1O,training,0
160 | Cn1c(=O)c2[nH]c(Br)nc2n(C)c1=O,training,0
161 | CC1C(c2ccccc2)OCCN1C,training,0
162 | CCC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(O)CC(C)C(=O)C(C)C(O)C1(C)O,training,1
163 | CCC1(O)CC2CN(CCc3c([nH]c4ccccc34)C(C(=O)OC)(c3cc4c(cc3OC)N(C)C3C(O)(C(=O)OC)C(OC(C)=O)C5(CC)C=CCN6CCC43C65)C2)C1,training,0
164 | CCOC(=O)C1(c2ccccc2)CCN(CCC(C#N)(c2ccccc2)c2ccccc2)CC1,training,0
165 | COc1cc2c(c3oc(=O)c4c(c13)CCC4=O)C1C=COC1O2,training,1
166 | Cc1c(C)c2c(c(C)c1O)CCC(C)(CCCC(C)CCCC(C)CCCC(C)C)O2,training,0
167 | CNC1C(O)C(NC)C2OC3(O)C(=O)CC(C)OC3OC2C1O,training,0
168 | NC(N)=NC(=O)c1nc(Cl)c(N)nc1N,training,1
169 | OCCN1CCN(CCC=C2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1,training,0
170 | Cc1cn(C2C=CC(CO)O2)c(=O)[nH]c1=O,training,1
171 | Cc1onc(-c2c(Cl)cccc2Cl)c1C(=O)NC1C(=O)N2C1SC(C)(C)C2C(=O)O,training,1
172 | CNC1C(OC2C(OC3C(O)C(O)C(N=C(N)N)C(O)C3N=C(N)N)OC(C)C2(O)C=O)OC(CO)C(O)C1O,training,0
173 | CC1CC2C3CCC4=CC(=O)C=CC4(C)C3(Cl)C(O)CC2(C)C1(O)C(=O)CO,training,0
174 | CN(C)CCCC1c2ccccc2Nc2ccc(Cl)cc21,training,1
175 | CCc1c(C)[nH]c2c1C(=O)C(CN1CCOCC1)CC2,training,0
176 | CC(=O)OC1CC2CCC3C(CCC4(C)C3CC([N+]3(C)CCCCC3)C4OC(C)=O)C2(C)CC1[N+]1(C)CCCCC1,training,0
177 | O=C(O)Cc1csc(-c2ccc(Cl)cc2)n1,training,1
178 | CC(=O)Oc1ccc(C2(c3ccc(OC(C)=O)cc3)C(=O)N(C(C)=O)c3ccccc32)cc1,training,1
179 | CN(N=O)C(=O)NC1C(O)OC(CO)C(O)C1O,training,1
180 | CC1OC(OC2C(O)CC(OC3C(O)CC(OC4CCC5(C)C(CCC6C5CC(O)C5(C)C(C7=CC(=O)OC7)CCC65O)C4)OC3C)OC2C)CC(O)C1O,training,0
181 | CC(CCC(=O)O)C1CCC2C3C(O)CC4CC(O)CCC4(C)C3CCC12C,training,0
182 | COc1ccc2c(C(=O)c3ccc(Cl)cc3)c(C)n(CC(=O)O)c2c1,training,1
183 | CCOC(=O)Nc1ccc2c(c1)N(C(=O)CCN1CCOCC1)c1ccccc1S2,training,1
184 | Cc1cn(C2CC(N=[N+]=[N-])C(CO)O2)c(=O)[nH]c1=O,training,1
185 | CC(=O)OC1C(=O)C2(C)C(O)CC3OCC3(OC(C)=O)C2C(OC(=O)c2ccccc2)C2(O)CC(OC(=O)C(O)C(NC(=O)c3ccccc3)c3ccccc3)C(C)=C1C2(C)C,training,1
186 | CC(CCc1ccc(O)cc1)NCCc1ccc(O)c(O)c1,training,0
187 | CC(C)(C)NCC(O)COc1ccccc1C1CCCC1,training,0
188 | NCCC(O)C(=O)NC1CC(N)C(OC2OC(CN)C(O)C(O)C2O)C(O)C1OC1OC(CO)C(O)C(N)C1O,training,0
189 | O=C(O)COc1ccc(C(=O)c2cccs2)c(Cl)c1Cl,training,1
190 | CC(C)(C)NCC(O)COc1cccc2c1CC(O)C(O)C2,training,0
191 | COc1ccc(C2Sc3ccccc3N(CCN(C)C)C(=O)C2OC(C)=O)cc1,training,1
192 | CC(C(=O)O)c1ccc2oc(-c3ccc(Cl)cc3)nc2c1,training,1
193 | CC1(C)C(C=C(Cl)Cl)C1C(=O)OCc1cccc(Oc2ccccc2)c1,training,0
194 | COc1cccc2c1C(=O)c1c(O)c3c(c(O)c1C2=O)CC(O)(C(=O)CO)CC3OC1CC(N)C(O)C(C)O1,training,1
195 | CN(C)CCn1nnnc1SCC1=C(C(=O)O)N2C(=O)C(NC(=O)Cc3csc(N)n3)C2SC1,training,1
196 | CC(=O)N1CCN(c2ccc(OCC3COC(Cn4ccnc4)(c4ccc(Cl)cc4Cl)O3)cc2)CC1,training,1
197 | O=c1nc[nH]c2c1ncn2C1CCC(CO)O1,training,1
198 | CCC(C)C(=O)OC1CC(C)C=C2C=CC(C)C(CCC3CC(O)CC(=O)O3)C21,training,0
199 | CCC(C)(C)C(=O)OC1CC(C)C=C2C=CC(C)C(CCC3CC(O)CC(=O)O3)C21,training,0
200 | CCOC(=O)C(CCc1ccccc1)NC(C)C(=O)N1Cc2ccccc2CC1C(=O)O,training,1
201 | CCC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(O)CC(C)CN(C)C(C)C(O)C1(C)O,training,0
202 | CCC(C)n1ncn(-c2ccc(N3CCN(c4ccc(OCC5COC(Cn6cncn6)(c6ccc(Cl)cc6Cl)O5)cc4)CC3)cc2)c1=O,training,1
203 | CC(O)(CS(=O)(=O)c1ccc(F)cc1)C(=O)Nc1ccc(C#N)c(C(F)(F)F)c1,training,1
204 | CC(C)(C)NC(=O)C1CCC2C3CCC4NC(=O)C=CC4(C)C3CCC12C,training,0
205 | CC1CN(c2cc3c(cc2F)c(=O)c(C(=O)O)cn3-c2ccc(F)cc2F)CCN1,training,1
206 | Nc1ncnc2c1ncn2CCOCP(=O)(O)O,training,1
207 | CC(c1cc2ccccc2s1)N(O)C(N)=O,training,1
208 | COCC(=O)OC1(CCN(C)CCCc2nc3ccccc3[nH]2)CCc2cc(F)ccc2C1C(C)C,training,0
209 | CCC1(O)C(=O)OCc2c1cc1n(c2=O)Cc2cc3c(CN(C)C)c(O)ccc3nc2-1,training,0
210 | CC(O)CN1CCN(CC(=O)[O-])CCN(CC(=O)[O-])CCN(CC(=O)[O-])CC1,training,0
211 | CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(O)C(Cc1ccccc1)NC(=O)C(CC(N)=O)NC(=O)c1ccc2ccccc2n1,training,1
212 | CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(-c2ccc(F)cc2)n1CCC(O)CC(O)CC(=O)O,training,1
213 | Nc1ccn(C2CSC(CO)O2)c(=O)n1,training,1
214 | Nc1nc(NC2CC2)c2ncn(C3C=CC(CO)C3)c2n1,training,1
215 | Cc1cc(Br)c(O)c2ncccc12,training,1
216 | CNC1CCC(c2ccc(Cl)c(Cl)c2)c2ccccc21,training,0
217 | CN1c2ccccc2C(NCCCCCCC(=O)O)c2ccc(Cl)cc2S1(=O)=O,training,1
218 | Cc1c(F)c(N2CCNC(C)C2)cc2c1c(=O)c(C(=O)O)cn2C1CC1,training,1
219 | CN(CCOc1ccc(CC2SC(=O)NC2=O)cc1)c1ccccn1,training,1
220 | C=C(c1ccc(C(=O)O)cc1)c1cc2c(cc1C)C(C)(C)CCC2(C)C,training,1
221 | CCCCOCC(CN1C(=O)N(CC(COCCCC)OC(N)=O)C(=O)C(CC)(c2ccccc2)C1=O)OC(N)=O,training,1
222 | CC1CC2C3CCC4=CC(=O)C=CC4(C)C3(Cl)C(O)CC2(C)C1(O)C(=O)CCl,training,0
223 | CN(Cc1cnc2nc(N)nc(N)c2n1)c1ccc(C(=O)NC(CCC(=O)O)C(=O)O)cc1,training,1
224 | CC(=O)OC12COC1CC(O)C1(C)C(=O)C(O)C3=C(C)C(OC(=O)C(O)C(NC(=O)OC(C)(C)C)c4ccccc4)CC(O)(C(OC(=O)c4ccccc4)C21)C3(C)C,training,1
225 | CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23,training,1
226 | CCCCCC(O)C=CC1C(O)CC(=O)C1CCCCCCC(=O)O,training,0
227 | COc1c(N2CC3CCCNC3C2)c(F)cc2c(=O)c(C(=O)O)cn(C3CC3)c12,training,1
228 | CCOC1OC(=O)CC1NC(=O)C1CCCN2C(=O)CCC(NC(=O)c3nccc4ccccc34)C(=O)N12,training,1
229 | C=C1C(CO)C(O)CC1n1cnc2c(=O)nc(N)[nH]c21,training,1
230 | NCC1OC(OC2C(CO)OC(OC3C(O)C(N)CC(N)C3OC3OC(CO)C(O)C(O)C3N)C2O)C(N)C(O)C1O,training,0
231 | O=C(O)CCCCC1SCC2NC(=O)NC21,training,0
232 | CN1C2CCC1CC(OC(=O)C(CO)c1ccccc1)C2,training,0
233 | COC(c1ccccc1)(c1ccccc1)C(Oc1nc(C)cc(C)n1)C(=O)O,training,1
234 | COC1CC(OC2C(C)C(=O)OC(C)C(C)C(OC(C)=O)C(C)C(=O)C3(CO3)CC(C)C(OC3OC(C)CC(N(C)C)C3OC(C)=O)C2C)OC(C)C1OC(C)=O,training,1
235 | CS(=O)(=O)CCNCc1ccc(-c2ccc3ncnc(Nc4ccc(OCc5cccc(F)c5)c(Cl)c4)c3c2)o1,training,1
236 | CC(C)CN(CC(O)C(Cc1ccccc1)NC(=O)OC1COC2OCCC12)S(=O)(=O)c1ccc(N)cc1,training,1
237 | CCOC(Nc1ccc(C(=O)O)cc1)C(=O)c1ccc(-c2ccccc2)cc1,training,1
238 | NC(C(=O)O)C1CC(Cl)=NO1,training,0
239 | CC(C)c1nc(CN(C)C(=O)NC(C(=O)NC(Cc2ccccc2)CC(O)C(Cc2ccccc2)NC(=O)OCc2cncs2)C(C)C)cs1,training,1
240 | CC1OC(OC2C(CO)OC(OC3C(CO)OC(O)C(O)C3O)C(O)C2O)C(O)C(O)C1NC1C=C(CO)C(O)C(O)C1O,training,1
241 | COC1(NC(=O)Cc2cccs2)C(=O)N2C(C(=O)O)=C(COC(N)=O)CSC21,training,1
242 | NC1CC1c1ccccc1,training,0
243 | CC(C)=CCN1CCC2(C)c3cc(O)ccc3CC1C2C,training,0
244 | C=CC[N+]1(C2CC3C4CCC5CC(O)C(N6CCOCC6)CC5(C)C4CCC3(C)C2OC(C)=O)CCCC1,training,0
245 | CC(CO)NC(=O)C1C=C2c3cccc4[nH]cc(c34)CC2N(C)C1,training,0
246 | CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CCO,training,0
247 | Cn1nnnc1SCC1=C(C(=O)O)N2C(=O)C(NC(=O)C(O)c3ccccc3)C2SC1,training,1
248 | CC1(C)SC2C(NC(=O)C(NC(=O)N3CCN(S(C)(=O)=O)C3=O)c3ccccc3)C(=O)N2C1C(=O)O,training,1
249 | S=c1nc[nH]c2nc[nH]c12,training,1
250 | CN1CCCN=C1C=Cc1cccs1,training,0
251 | CN1CCN(CCC=C2c3ccccc3Sc3ccc(S(=O)(=O)N(C)C)cc32)CC1,training,1
252 | Cn1cc[nH]c1=S,training,1
253 | CC(C)n1c(C=CC(O)CC(O)CC(=O)O)c(-c2ccc(F)cc2)c2ccccc21,training,0
254 | Nc1nc(=S)c2[nH]cnc2[nH]1,training,1
255 | CCC(=C(c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1,training,1
256 | NCCCC(N)CC(=O)NCC1NC(=O)C(CO)NC(=O)C(N)CNC(=O)C(C2CCN=C(N)N2)NC(=O)C(=CNC(N)=O)NC1=O,training,0
257 | CCCCN1C(=O)C(C(O)C2CCCCC2)NC(=O)C12CCN(Cc1ccc(Oc3ccc(C(=O)O)cc3)cc1)CC2,training,1
258 | CN(C)Cc1ccc(CSCCNC(=C[N+](=O)[O-])NCc2ccc3c(c2)OCO3)o1,training,1
259 | CCCCCCCCCCCC(CC1OC(=O)C1CCCCCC)OC(=O)C(CC(C)C)NC=O,training,1
260 | COc1ccc(CC(C)NCC(O)c2ccc(O)c(NC=O)c2)cc1,training,0
261 | CN1C(=C(O)Nc2ccccn2)C(=O)c2ccccc2S1(=O)=O,training,1
262 | C=C1CCC(O)CC1=CC=C1CCCC2(C)C1CCC2C(C)C=CC(C)C(C)C,training,0
263 | CC1c2cccc(O)c2C(O)=C2C(=O)C3(O)C(=O)C(=C(N)O)C(=O)C(N(C)C)C3C(O)C21,training,1
264 | Cc1ccc(C(=CCN2CCCC2)c2ccccn2)cc1,training,0
265 | COc1ccc2c3c1OC1C(O)C=CC4C(C2)N(C)CCC341,training,0
266 | C=C1CCC2(O)C3Cc4ccc(O)c5c4C2(CCN3CC2CC2)C1O5,training,0
267 | CN1CCC23c4c5ccc(O)c4OC2C(O)C=CC3C1C5,training,0
268 | CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2C(=O)Nc3ccc(F)cc32)c1C,training,1
269 | CN(C)NN=C1N=CN=C1C(N)=O,training,1
270 | O=C1CN(N=Cc2ccc([N+](=O)[O-])o2)C(=O)N1,training,1
271 | O=C(O)C1=CC(=NNc2ccc(S(=O)(=O)Nc3ccccn3)cc2)C=CC1=O,training,1
272 | COc1ccc2c(c1)C13CCCCC1C(C2)N(C)CC3,training,0
273 | CON=C(C(=O)NC1C(=O)N2C(C(=O)O)=C(CSc3nc(=O)c(=O)[nH]n3C)CSC12)c1csc(N)n1,training,1
274 | CCOC(=O)C(CCc1ccccc1)NC1CCc2ccccc2N(CC(=O)O)C1=O,training,1
275 | COC1C=COC2(C)Oc3c(C)c(O)c4c(c3C2=O)C(=O)C(=CNN2CCN(C)CC2)C(=C4O)NC(=O)C(C)=CC=CC(C)C(O)C(C)C(O)C(C)C(OC(C)=O)C1C,training,1
276 | CCC1OC(=O)C(C)C(=O)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(OC)CC(C)C(=O)C(C)C2N(CCCCn3cnc(-c4cccnc4)c3)C(=O)OC12C,training,1
277 | CON=C(N)c1ccc(-c2ccc(-c3ccc(C(N)=NOC)cc3)o2)cc1,training,1
278 | CC(C)CC(CN)CC(=O)O,training,0
279 | CC1CN(CC(Cc2ccccc2)C(=O)NCC(=O)O)CCC1(C)c1cccc(O)c1,training,0
280 | O=C1C=CC=CC1=C1NC(=C2C=CC=CC2=O)N(c2ccc(C(=O)O)cc2)N1,training,1
281 | COC1C=COC2(C)Oc3c(C)c(O)c4c(c3C2=O)C2=NC3(CCN(CC(C)C)CC3)NC2=C(NC(=O)C(C)=CC=CC(C)C(O)C(C)C(O)C(C)C(OC(C)=O)C1C)C4=O,training,1
282 | CCCCCCCCC=CCCCCCCCC(=O)O,training,0
283 | CC(=O)Oc1cc2c(s1)CCN(C(C(=O)C1CC1)c1ccccc1F)C2,training,0
284 | CN1CCC(Nc2ncc3ncnc(Nc4ccc(F)c(Cl)c4)c3n2)CC1,training,1
285 | Cc1ccc(Nc2nccc(N(C)c3ccc4c(C)n(C)nc4c3)n2)cc1S(N)(=O)=O,training,1
286 | C[N+]1(C)C2CCC1CC(OC(=O)C(O)c1ccccc1)C2,training,0
287 | [C-]#N,training,0
288 | C=C1c2cccc(O)c2C(O)=C2C(=O)C3(O)C(O)=C(C(N)=O)C(=O)C(N(C)C)C3C(O)C12,training,1
289 | O=C1OC(C(O)CO)C(O)=C1O,training,0
290 | CN(C)C1C(=O)C(C(N)=O)=C(O)C2(O)C(=O)C3=C(O)c4c(O)ccc(Cl)c4C(O)C3CC12,training,1
291 | OCC(O)C(O)C(O)C(O)CO,training,0
292 | Cc1ccc2cc3c(ccc4ccccc43)c3c2c1CC3,training,1
293 | Cn1cnc([N+](=O)[O-])c1Sc1ncnc2nc[nH]c12,training,1
294 | CC(=O)Oc1ccc(C(=C2CCCCC2)c2ccc(OC(C)=O)cc2)cc1,training,1
295 | COc1cc2c(cc1OC)C(=O)C(CC1CCN(Cc3ccccc3)CC1)C2,training,0
296 | OC(Cn1cncn1)(Cn1cncn1)c1ccc(F)cc1F,training,1
297 | CC(C(=O)O)c1ccc(-c2ccccc2)c(F)c1,training,1
298 | Nc1cc(-c2ccncc2)c[nH]c1=O,training,1
299 | CN(C)C(=O)C(CCN1CCC(O)(c2ccc(Cl)cc2)CC1)(c1ccccc1)c1ccccc1,training,0
300 | CC12CC3CC(C)(C1)CC(N)(C3)C2,training,0
301 | CN(C)C(=N)N=C(N)N,training,0
302 | Cc1ccccc1N1C(=O)c2cc(S(N)(=O)=O)c(Cl)cc2NC1C,training,1
303 | CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21,training,1
304 | CC1(C)NC(=O)N(c2ccc([N+](=O)[O-])c(C(F)(F)F)c2)C1=O,training,1
305 | COc1ccc(CN(CCN(C)C)c2ccccn2)cc1,training,0
306 | Nc1nc2ccc(OC(F)(F)F)cc2s1,training,1
307 | Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1,training,1
308 | Cc1cc(NS(=O)(=O)c2ccc(N)cc2)no1,training,1
309 | O=C1C(CCS(=O)c2ccccc2)C(=O)N(c2ccccc2)N1c1ccccc1,training,0
310 | CC1CC2C3CCC4=CC(=O)C=CC4(C)C3(F)C(O)CC2(C)C1(O)C(=O)CO,training,0
311 | CC1(C)SC2C(NC(=O)Cc3ccccc3)C(=O)N2C1C(=O)O,training,0
312 | O=c1[nH]cc(N(CCCl)CCCl)c(=O)[nH]1,training,1
313 | NC(CO)(CO)CO,training,0
314 | CCC(CO)NC(=O)C1C=C2c3cccc4c3c(cn4C)CC2N(C)C1,training,0
315 | COC(=O)Nc1nc2ccccc2[nH]1,training,1
316 | CN1CCCC1CCOC(C)(c1ccccc1)c1ccc(Cl)cc1,training,0
317 | CC(=O)NC1C(O)OC(CO)C(OS(=O)(=O)[O-])C1OC1OC(C(=O)[O-])C(O)C(O)C1O,training,1
318 | CC(C(=O)O)c1ccc(N2CC=CC2)c(Cl)c1,training,1
319 | COc1cc(C2c3cc4c(cc3C(OC3OC5COC(C)OC5C(O)C3O)C3COC(=O)C23)OCO4)cc(OC)c1O,training,1
320 | Nc1nc(=O)n(C2CSC(CO)O2)cc1F,training,1
321 | CCc1oc2ccccc2c1C(=O)c1ccc(O)cc1,training,1
322 | CC(Cn1cnc2c(N)ncnc21)OCP(=O)(O)O,training,1
323 | CCCCCC(O)C=CC1C(O)CC(=O)C1CC=CCCCC(=O)O,training,0
324 | CC(C)(O)c1ccccc1CCC(SCC1(CC(=O)O)CC1)c1cccc(C=Cc2ccc3ccc(Cl)cc3n2)c1,training,0
325 | CC(C)(C)NC(=O)C1CN(Cc2cccnc2)CCN1CC(O)CC(Cc1ccccc1)C(=O)NC1c2ccccc2CC1O,training,1
326 | CO[Si](C)(C)O[Si](C)(C)C,training,0
327 | Cc1ccc(-n2[nH]c(C)c(NN=C3C=CC=C(c4cccc(C(=O)O)c4)C3=O)c2=O)cc1C,training,1
328 | Oc1cccc2cccnc12,training,1
329 | CC(=O)Nc1nnc(S(N)(=O)=O)s1,training,1
330 | CC(=O)c1ccc(S(=O)(=O)NC(=O)NC2CCCCC2)cc1,training,1
331 | O=c1ncnc2[nH][nH]cc1-2,training,1
332 | CN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1,training,0
333 | CN1C(=O)CCS(=O)(=O)C1c1ccc(Cl)cc1,training,1
334 | Cc1c(-c2ccccc2)oc2c(C(=O)OCCN3CCCCC3)cccc2c1=O,training,0
335 | CC(C)(C(=O)c1cccnc1)c1cccnc1,training,0
336 | Cc1ncc2n1-c1ccc(Cl)cc1C(c1ccccc1F)=NC2,training,0
337 | Nc1ccc(N=Nc2ccccc2)c(N)n1,training,0
338 | CC(=CCC1=C(C)C(=O)c2ccccc2C1=O)CCCC(C)CCCC(C)CCCC(C)C,training,0
339 | COc1cc(NC(C)CCCN)c2ncccc2c1,training,0
340 | COc1cc(C(=O)NCc2ccc(OCCN(C)C)cc2)cc(OC)c1OC,training,1
341 | Cc1ccc(N(CC2=NCCN2)c2cccc(O)c2)cc1,training,0
342 | CC12C=CC(=O)C=C1CCC1C2C(=O)CC2(C)C1CCC2(O)C(=O)CO,training,0
343 | COc1ccc(CC(N)C(=O)NC2C(CO)OC(n3cnc4c(N(C)C)ncnc43)C2O)cc1,training,1
344 | CN1C2CCC1CC(OC(c1ccccc1)c1ccccc1)C2,training,0
345 | O=c1[nH]c2ccccc2n1C1CCN(CCCC(c2ccc(F)cc2)c2ccc(F)cc2)CC1,training,0
346 | C[Si](C)(C)O[Si](C)(C)O[Si](C)(C)C,training,0
347 | CC(C)(Oc1ccc(CCNC(=O)c2ccc(Cl)cc2)cc1)C(=O)O,training,0
348 | OCC1OC(n2cnc3c2NC=NCC3O)CC1O,training,1
349 | C=C1CC2C(CCC3(C)C(=O)CCC23)C2(C)C=CC(=O)C=C12,training,1
350 | CNCCC(Oc1cccc2ccccc12)c1cccs1,training,1
351 | Cc1nc(C)c2c(n1)N(Cc1ccc(-c3ccccc3-c3nn[nH]n3)cc1)C(=O)CC2,training,1
352 | NC1C2CN(c3nc4c(cc3F)c(=O)c(C(=O)O)cn4-c3ccc(F)cc3F)CC12,training,1
353 | CN1c2c(oc(=O)n(-c3ccccn3)c2=O)-c2ccccc2S1(=O)=O,training,1
354 | NC(N)=Nc1nc(CSCCN=CNS(=O)(=O)c2ccc(Br)cc2)cs1,training,1
355 | Cc1nc([N+](=O)[O-])cn1-c1ccc([N+](=O)[O-])cc1,training,1
356 | CCC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(OC)CC(C)C(=O)C(C)C(O)C1(C)O,training,1
357 | CC(C)CC(NC(=O)C(Cc1ccccc1)NC(=O)c1cnccn1)B(O)O,training,1
358 | COc1ccccc1Oc1c(NS(=O)(=O)c2ccc(C(C)(C)C)cc2)nc(-c2ncccn2)nc1OCCO,training,1
359 | CN(C)CCCC1(c2ccc(F)cc2)OCc2cc(C#N)ccc21,training,0
360 | COC1=CC(=O)CC(C)C12Oc1c(Cl)c(OC)cc(OC)c1C2=O,training,1
361 | COCCNC(=O)CN(CCN(CCN(CC(=O)[O-])CC(=O)NCCOC)CC(=O)[O-])CC(=O)[O-],training,0
362 | CCCc1cc(=O)[nH]c(=S)[nH]1,training,1
363 | CCCC(CCC)C(=O)O,training,1
364 | CCCCC(CC)COC(=O)CC(C(=O)OCC(CC)CCCC)S(=O)(=O)[O-],training,0
365 | O=C(NC(CO)C(O)c1ccc([N+](=O)[O-])cc1)C(Cl)Cl,test,0
366 | CC(NC(C)(C)C)C(=O)c1cccc(Cl)c1,test,0
367 | CC(C)(C)NCC(O)c1ccc(O)c(CO)c1,test,0
368 | CC(=O)Oc1ccccc1C(=O)O,test,0
369 | CC(C)NCC(O)COc1ccc(CC(N)=O)cc1,test,0
370 | CC(C)C(=O)Nc1ccc([N+](=O)[O-])c(C(F)(F)F)c1,test,1
371 | NNCCc1ccccc1,test,0
372 | CC(C)NCC(O)c1ccc(O)c(O)c1,test,0
373 | CC(C)NCC(O)c1cc(O)cc(O)c1,test,0
374 | COc1ccc(OC)c(C(O)CNC(=O)CN)c1,test,0
375 | CCOc1ccc(NC(C)=O)cc1,test,1
376 | CC(C)(N)Cc1ccccc1,test,0
377 | CCCCNc1ccc(C(=O)OCCN(C)C)cc1,test,0
378 | NC(Cc1ccc(O)c(O)c1)C(=O)O,test,0
379 | CCCCNc1ccc(C(=O)OCCOCCOCCOCCOCCOCCOCCOCCOCCOC)cc1,test,0
380 | CC(C)Cc1ccc(CC(=O)O)cc1,test,1
381 | C#CCN(C)C(C)Cc1ccccc1,test,0
382 | CC(N)C(O)c1ccccc1,test,0
383 | CC(Cc1ccc(O)c(O)c1)(NN)C(=O)O,test,0
384 | CC(O)C(=O)Nc1c(I)c(C(=O)NC(CO)CO)c(I)c(C(=O)NC(CO)CO)c1I,test,0
385 | CC(N)(Cc1ccc(O)cc1)C(=O)O,test,0
386 | CC(=O)Nc1c(I)c(NC(C)=O)c(I)c(C(=O)O)c1I,test,0
387 | CC(=O)Nc1cccc(O)c1,test,0
388 | CCN(CC)CCOc1cccc(OCCN(CC)CC)c1OCCN(CC)CC,test,0
389 | O=C(O)c1ccccc1O,test,0
390 | NC(=O)c1ccccc1O,test,0
391 | C=CCOc1ccc(CC(=O)O)cc1Cl,test,1
392 | NCC1(CC(=O)O)CCCCC1,test,0
393 | NCC1CCC(C(=O)O)CC1,test,0
394 | O=C(O)c1cccnc1,test,1
395 | CC(C)NNC(=O)c1ccncc1,test,1
396 | CC(N=C(NC#N)Nc1ccncc1)C(C)(C)C,test,0
397 | S=C=Nc1cccc2ccccc12,test,1
398 | CC(C)NCC(O)COc1cccc2ccccc12,test,0
399 | COc1ccc2c(C(=S)N(C)CC(=O)O)cccc2c1C(F)(F)F,test,1
400 | Nc1ccn(C2OC(CO)C(O)C2O)c(=O)n1,test,1
401 | C[N+](C)(C)CCOP(=O)([O-])OP(=O)(O)OCC1OC(n2ccc(N)nc2=O)C(O)C1O,test,0
402 | Nc1ccn(C2CCC(CO)O2)c(=O)n1,test,1
403 | NC(Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1)C(=O)O,test,0
404 | CN(C)CCOC(c1ccc(Cl)cc1)c1ccccn1,test,0
405 | Cc1ccc(Nc2c(F)cccc2Cl)c(CC(=O)O)c1,test,1
406 | Cc1ccc(Cl)c(Nc2ccccc2C(=O)O)c1Cl,test,1
407 | Clc1ccc(C(c2ccccc2Cl)C(Cl)Cl)cc1,test,0
408 | Cc1ccc(O)c(C(CCN(C(C)C)C(C)C)c2ccccc2)c1,test,0
409 | Cc1ccccc1C(OCCN(C)C)c1ccccc1,test,0
410 | CC(C(=O)O)c1cccc(C(=O)c2ccccc2)c1,test,1
411 | O=C(c1cc(O)c(O)c(O)c1)c1ccc(O)c(O)c1O,test,1
412 | Cc1ccc(C(=O)c2cc(O)c(O)c([N+](=O)[O-])c2)cc1,test,1
413 | CC(C)OC(=O)C(C)(C)Oc1ccc(C(=O)c2ccc(Cl)cc2)cc1,test,1
414 | C#CC1(O)CCC2C3CCC4=CC(=O)CCC4C3CCC21C,test,0
415 | CC12CCC(=O)C=C1CCC1C3CCC(O)(C(=O)CO)C3(C)CC(O)C12F,test,0
416 | COc1ccc2c3c1OC1C(=O)CCC4C(C2)N(C)CCC314,test,0
417 | NCCc1ccc(O)c(O)c1,val,0
418 | Nc1ccc(C(=O)O)cc1,val,0
419 | CCCNC(=O)NS(=O)(=O)c1ccc(Cl)cc1,val,1
420 | CC[N+](C)(C)c1cccc(O)c1,val,0
421 | N#CC(C#N)=NNc1ccc(OC(F)(F)F)cc1,val,1
422 | COCCCCC(=NOCCN)c1ccc(C(F)(F)F)cc1,val,0
423 | COc1ccccc1OCC(O)CO,val,0
424 | CCCCCCc1ccc(O)cc1O,val,0
425 | CCN(CC)CC(=O)Nc1c(C)cccc1C,val,0
426 | CC(=O)Oc1cc(C(C)C)c(OCCN(C)C)cc1C,val,1
427 | CC(C)c1cccc(C(C)C)c1O,val,0
428 | Nc1ccc(S(N)(=O)=O)cc1,val,1
429 | CCCCCCCCNC(C)C(O)c1ccc(SC(C)C)cc1,val,1
430 | CCCCNC(=O)NS(=O)(=O)c1ccc(C)cc1,val,1
431 | CC(N)Cc1ccccc1,val,0
432 | COc1ccc(OC)c(C(O)C(C)N)c1,val,0
433 | CNC(C)C(O)c1ccccc1,val,0
434 | CNC(C)Cc1ccccc1,val,0
435 | NC(=O)NS(=O)(=O)c1ccc(N)cc1,val,1
436 | CN(C)C(=O)Oc1cc(OC(=O)N(C)C)cc(C(O)CNC(C)(C)C)c1,val,0
437 | COC(=O)CCc1ccc(OCC(O)CNC(C)C)cc1,val,0
438 | CCCCCCCN(CC)CCCC(O)c1ccc(NS(C)(=O)=O)cc1,val,0
439 | NCC(O)c1ccc(O)c(O)c1,val,0
440 | COc1cc(CNC(=O)CCCCC=CC(C)C)ccc1O,val,0
441 | C#CCN(C)Cc1ccccc1,val,0
442 | CNCC(O)c1ccc(O)c(O)c1,val,0
443 | COCCCOc1cc(CC(CC(N)C(O)CC(C(=O)NCC(C)(C)C(N)=O)C(C)C)C(C)C)ccc1OC,val,0
444 | ClC1C(Cl)C(Cl)C(Cl)C(Cl)C1Cl,val,0
445 | OC1C(O)C(O)C(O)C(O)C1O,val,0
446 | NC1CCCCC1N,val,1
447 | Cc1ncc(CO)c(CO)c1O,val,0
448 | CN(CC=CC#CC(C)(C)C)Cc1cccc2ccccc12,val,1
449 | CCCCCOC(=O)Nc1nc(=O)n(C2OC(C)C(O)C2O)cc1F,val,1
450 | CCCCNc1cc(C(=O)O)cc(S(N)(=O)=O)c1Oc1ccccc1,val,1
451 | CC(C(=O)O)c1cccc(Oc2ccccc2)c1,val,1
452 | CS(=O)(=O)Nc1ccc([N+](=O)[O-])cc1Oc1ccccc1,val,1
453 | CN(C)CCC(c1ccc(Cl)cc1)c1ccccn1,val,0
454 | CN(C)CCOC(C)(c1ccccc1)c1ccccn1,val,0
455 | CN(C)CCC(c1ccc(Br)cc1)c1ccccn1,val,0
456 | O=C(O)Cc1ccccc1Nc1c(Cl)cccc1Cl,val,1
457 | Cc1cccc(Nc2ccccc2C(=O)O)c1C,val,1
458 | C[N+](C)CCOC(c1ccccc1)c1ccccc1,val,0
459 | CN(C)CCOC(c1ccccc1)c1ccccc1,val,0
460 | COc1cc(O)c(C(=O)c2ccccc2)cc1S(=O)(=O)O,val,0
461 | Nc1c(CC(=O)O)cccc1C(=O)c1ccc(Br)cc1,val,1
462 | CC12CCC(=O)C=C1CCC1C2C(O)CC2(C)C1CCC2(O)C(=O)CO,val,0
463 | CC12CCC3C4CCC(=O)C=C4CCC3C1CCC2O,val,1
464 | CC(=O)C1(O)CCC2C3CC(C)C4=CC(=O)CCC4(C)C3CCC21C,val,0
465 | C#CC1(O)CCC2C3CCC4=CC(=O)CCC4C3CCC21CC,val,0
466 | CN1CCC23c4c5ccc(O)c4OC2C(=O)CCC3C1C5,val,0
467 | COc1ccc2c3c1OC1C(=O)CCC4(O)C(C2)N(C)CCC314,val,0
468 | CN1CCC23c4c5ccc(O)c4OC2C(=O)CCC3(O)C1C5,val,0
469 |
--------------------------------------------------------------------------------
/experiment/build_data.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
from rdkit.Chem import MolFromSmiles, MACCSkeys, AllChem
import numpy as np
from rdkit.ML.Descriptors import MoleculeDescriptors
import multiprocessing as mp
import torch
from rdkit import Chem
import math
import random
from rdkit.Chem import ChemicalFeatures
from rdkit import RDConfig
import os
# knowledge-based transformer pre-train model
# from rdkit_des import Chem
# smi = ''
# random_equivalent_smiles = Chem.MolFromSmiles(Chem.MolToSmiles(smi, doRandom=True))
def smi_tokenizer(smi):
"""
Tokenize a SMILES molecule or reaction
"""
import re
pattern = "(\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#|-|\+|\\\\|\/|:|~|@|\?|>|\*|\$|\%[0-9]{2}|[0-9])"
regex = re.compile(pattern)
tokens = [token for token in regex.findall(smi)]
# assert smi == ''.join(tokens)
# return ' '.join(tokens)
return tokens
def one_of_k_encoding(x, allowable_set):
if x not in allowable_set:
raise Exception("input {0} not in allowable set{1}:".format(
x, allowable_set))
return [x == s for s in allowable_set]
def one_of_k_encoding_unk(x, allowable_set):
"""Maps inputs not in the allowable set to the last element."""
if x not in allowable_set:
x = allowable_set[-1]
return [x == s for s in allowable_set]
def atom_labels(atom, use_chirality=True):
results = one_of_k_encoding(atom.GetDegree(),
[0, 1, 2, 3, 4, 5, 6]) + \
one_of_k_encoding_unk(atom.GetHybridization(), [
Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2,
Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.SP3D,
Chem.rdchem.HybridizationType.SP3D2, 'other']) + [atom.GetIsAromatic()] \
+ one_of_k_encoding_unk(atom.GetTotalNumHs(),
[0, 1, 2, 3, 4])
if use_chirality:
try:
results = results + one_of_k_encoding_unk(
atom.GetProp('_CIPCode'),
['R', 'S']) + [atom.HasProp('_ChiralityPossible')]
except:
results = results + [False, False
] + [atom.HasProp('_ChiralityPossible')]
atom_labels_list = np.array(results).tolist()
atom_selected_index = [1, 2, 3, 4, 7, 8, 9, 13, 14, 15, 16, 17, 19, 20, 21]
atom_labels_selected = [atom_labels_list[x] for x in atom_selected_index]
return atom_labels_selected
def global_maccs_data(smiles):
mol = Chem.MolFromSmiles(smiles)
maccs = MACCSkeys.GenMACCSKeys(mol)
global_maccs_list = np.array(maccs).tolist()
# 选择负/正样本比例小于1000且大于0.001的数据
selected_index = [3, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165]
selected_global_list = [global_maccs_list[x] for x in selected_index]
return selected_global_list
def global_ecfp4_data(smiles):
mol = Chem.MolFromSmiles(smiles)
ecfp4 = AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024)
global_ecfp4_list = np.array(ecfp4).tolist()
return global_ecfp4_list
def global_rdkit_des_data(smiles):
descriptors_name = ['MaxEStateIndex', 'MinEStateIndex', 'MaxAbsEStateIndex', 'MinAbsEStateIndex', 'qed', 'MolWt',
'HeavyAtomMolWt', 'ExactMolWt', 'NumValenceElectrons', 'NumRadicalElectrons',
'MaxPartialCharge', 'MinPartialCharge', 'MaxAbsPartialCharge', 'MinAbsPartialCharge',
'FpDensityMorgan1', 'FpDensityMorgan2', 'FpDensityMorgan3', 'BalabanJ', 'BertzCT', 'Chi0',
'Chi0n', 'Chi0v', 'Chi1', 'Chi1n', 'Chi1v', 'Chi2n', 'Chi2v', 'Chi3n', 'Chi3v', 'Chi4n',
'Chi4v', 'HallKierAlpha', 'Ipc', 'Kappa1', 'Kappa2', 'Kappa3', 'LabuteASA', 'PEOE_VSA1',
'PEOE_VSA10', 'PEOE_VSA11', 'PEOE_VSA12', 'PEOE_VSA13', 'PEOE_VSA14', 'PEOE_VSA2',
'PEOE_VSA3', 'PEOE_VSA4', 'PEOE_VSA5', 'PEOE_VSA6', 'PEOE_VSA7', 'PEOE_VSA8', 'PEOE_VSA9',
'SMR_VSA1', 'SMR_VSA10', 'SMR_VSA2', 'SMR_VSA3', 'SMR_VSA4', 'SMR_VSA5', 'SMR_VSA6',
'SMR_VSA7', 'SMR_VSA8', 'SMR_VSA9', 'SlogP_VSA1', 'SlogP_VSA10', 'SlogP_VSA11',
'SlogP_VSA12', 'SlogP_VSA2', 'SlogP_VSA3', 'SlogP_VSA4', 'SlogP_VSA5', 'SlogP_VSA6',
'SlogP_VSA7', 'SlogP_VSA8', 'SlogP_VSA9', 'TPSA', 'EState_VSA1', 'EState_VSA10',
'EState_VSA11', 'EState_VSA2', 'EState_VSA3', 'EState_VSA4', 'EState_VSA5', 'EState_VSA6',
'EState_VSA7', 'EState_VSA8', 'EState_VSA9', 'VSA_EState1', 'VSA_EState10', 'VSA_EState2',
'VSA_EState3', 'VSA_EState4', 'VSA_EState5', 'VSA_EState6', 'VSA_EState7', 'VSA_EState8',
'VSA_EState9', 'FractionCSP3', 'HeavyAtomCount', 'NHOHCount', 'NOCount',
'NumAliphaticCarbocycles', 'NumAliphaticHeterocycles', 'NumAliphaticRings',
'NumAromaticCarbocycles', 'NumAromaticHeterocycles', 'NumAromaticRings', 'NumHAcceptors',
'NumHDonors', 'NumHeteroatoms', 'NumRotatableBonds', 'NumSaturatedCarbocycles',
'NumSaturatedHeterocycles', 'NumSaturatedRings', 'RingCount', 'MolLogP', 'MolMR',
'fr_Al_COO', 'fr_Al_OH', 'fr_Al_OH_noTert', 'fr_ArN', 'fr_Ar_COO', 'fr_Ar_N', 'fr_Ar_NH',
'fr_Ar_OH', 'fr_COO', 'fr_COO2', 'fr_C_O', 'fr_C_O_noCOO', 'fr_C_S', 'fr_HOCCN', 'fr_Imine',
'fr_NH0', 'fr_NH1', 'fr_NH2', 'fr_N_O', 'fr_Ndealkylation1', 'fr_Ndealkylation2',
'fr_Nhpyrrole', 'fr_SH', 'fr_aldehyde', 'fr_alkyl_carbamate', 'fr_alkyl_halide',
'fr_allylic_oxid', 'fr_amide', 'fr_amidine', 'fr_aniline', 'fr_aryl_methyl', 'fr_azide',
'fr_azo', 'fr_barbitur', 'fr_benzene', 'fr_benzodiazepine', 'fr_bicyclic', 'fr_diazo',
'fr_dihydropyridine', 'fr_epoxide', 'fr_ester', 'fr_ether', 'fr_furan', 'fr_guanido',
'fr_halogen', 'fr_hdrzine', 'fr_hdrzone', 'fr_imidazole', 'fr_imide', 'fr_isocyan',
'fr_isothiocyan', 'fr_ketone', 'fr_ketone_Topliss', 'fr_lactam', 'fr_lactone', 'fr_methoxy',
'fr_morpholine', 'fr_nitrile', 'fr_nitro', 'fr_nitro_arom', 'fr_nitro_arom_nonortho',
'fr_nitroso', 'fr_oxazole', 'fr_oxime', 'fr_para_hydroxylation', 'fr_phenol',
'fr_phenol_noOrthoHbond', 'fr_phos_acid', 'fr_phos_ester', 'fr_piperdine', 'fr_piperzine',
'fr_priamide', 'fr_prisulfonamd', 'fr_pyridine', 'fr_quatN', 'fr_sulfide', 'fr_sulfonamd',
'fr_sulfone', 'fr_term_acetylene', 'fr_tetrazole', 'fr_thiazole', 'fr_thiocyan',
'fr_thiophene', 'fr_unbrch_alkane', 'fr_urea']
m = Chem.MolFromSmiles(smiles)
desc_calc = MoleculeDescriptors.MolecularDescriptorCalculator(descriptors_name)
descriptors = np.array(desc_calc.CalcDescriptors(m)).tolist()
return descriptors
def construct_input_from_smiles(smiles, max_len=200, global_feature='MACCS'):
try:
# built a pretrain data from smiles
atom_list = []
atom_token_list = ['c', 'C', 'O', 'N', 'n', '[C@H]', 'F', '[C@@H]', 'S', 'Cl', '[nH]', 's', 'o', '[C@]',
'[C@@]', '[O-]', '[N+]', 'Br', 'P', '[n+]', 'I', '[S+]', '[N-]', '[Si]', 'B', '[Se]', '[other_atom]']
all_token_list = ['[PAD]', '[GLO]', 'c', 'C', '(', ')', 'O', '1', '2', '=', 'N', '3', 'n', '4', '[C@H]', 'F', '[C@@H]', '-', 'S', '/', 'Cl', '[nH]', 's', 'o', '5', '#', '[C@]', '[C@@]', '\\', '[O-]', '[N+]', 'Br', '6', 'P', '[n+]', '7', 'I', '[S+]', '8', '[N-]', '[Si]', 'B', '9', '[2H]', '[Se]', '[other_atom]', '[other_token]']
# 构建token转化成idx的字典
word2idx = {}
for i, w in enumerate(all_token_list):
word2idx[w] = i
# 构建token_list 并加上padding和global
token_list = smi_tokenizer(smiles)
padding_list = ['[PAD]' for x in range(max_len-len(token_list))]
tokens = ['[GLO]'] + token_list + padding_list
mol = MolFromSmiles(smiles)
atom_example = mol.GetAtomWithIdx(0)
atom_labels_example = atom_labels(atom_example)
atom_mask_labels = [2 for x in range(len(atom_labels_example))]
atom_labels_list = []
atom_mask_list = []
index = 0
tokens_idx = []
for i, token in enumerate(tokens):
if token in atom_token_list:
atom = mol.GetAtomWithIdx(index)
an_atom_labels = atom_labels(atom)
atom_labels_list.append(an_atom_labels)
atom_mask_list.append(1)
index = index + 1
tokens_idx.append(word2idx[token])
else:
if token in all_token_list:
atom_labels_list.append(atom_mask_labels)
tokens_idx.append(word2idx[token])
atom_mask_list.append(0)
elif '[' in list(token):
atom = mol.GetAtomWithIdx(index)
tokens[i] = '[other_atom]'
an_atom_labels = atom_labels(atom)
atom_labels_list.append(an_atom_labels)
atom_mask_list.append(1)
index = index + 1
tokens_idx.append(word2idx['[other_atom]'])
else:
tokens[i] = '[other_token]'
atom_labels_list.append(atom_mask_labels)
tokens_idx.append(word2idx['[other_token]'])
atom_mask_list.append(0)
if global_feature == 'MACCS':
global_label_list = global_maccs_data(smiles)
elif global_feature == 'ECFP4':
global_label_list = global_ecfp4_data(smiles)
elif global_feature == 'RDKIT_des':
global_label_list = global_rdkit_des_data(smiles)
tokens_idx = [word2idx[x] for x in tokens]
if len(tokens_idx) == max_len + 1:
return tokens_idx, global_label_list, atom_labels_list, atom_mask_list
else:
return 0, 0, 0, 0
except:
return 0, 0, 0, 0
def build_maccs_pretrain_data_and_save(smiles_list, output_smiles_path, global_feature='MACCS'):
smiles_list = smiles_list
tokens_idx_list = []
global_label_list = []
atom_labels_list = []
atom_mask_list = []
for i, smiles in enumerate(smiles_list):
tokens_idx, global_labels, atom_labels, atom_mask = construct_input_from_smiles(smiles,
global_feature=global_feature)
if tokens_idx != 0:
tokens_idx_list.append(tokens_idx)
global_label_list.append(global_labels)
atom_labels_list.append(atom_labels)
atom_mask_list.append(atom_mask)
print('{}/{} is transformed!'.format(i+1, len(smiles_list)))
else:
print('{} is transformed failed!'.format(smiles))
pretrain_data_list = [tokens_idx_list, global_label_list, atom_labels_list, atom_mask_list]
pretrain_data_np = np.array(pretrain_data_list)
np.save(output_smiles_path, pretrain_data_np)
def build_ECFP4_pretrain_data_and_save(smiles_list, output_smiles_path, global_feature='ECFP4'):
smiles_list = smiles_list
tokens_idx_list = []
global_label_list = []
atom_labels_list = []
atom_mask_list = []
for i, smiles in enumerate(smiles_list):
tokens_idx, global_labels, atom_labels, atom_mask = construct_input_from_smiles(smiles,
global_feature=global_feature)
if tokens_idx != 0:
tokens_idx_list.append(tokens_idx)
global_label_list.append(global_labels)
atom_labels_list.append(atom_labels)
atom_mask_list.append(atom_mask)
print('{}/{} is transformed!'.format(i+1, len(smiles_list)))
else:
print('{} is transformed failed!'.format(smiles))
pretrain_data_list = [tokens_idx_list, global_label_list, atom_labels_list, atom_mask_list]
pretrain_data_np = np.array(pretrain_data_list)
np.save(output_smiles_path, pretrain_data_np)
def build_rdkit_des_pretrain_data_and_save(smiles_list, output_smiles_path, global_feature='RDKIT_des'):
smiles_list = smiles_list
tokens_idx_list = []
global_label_list = []
atom_labels_list = []
atom_mask_list = []
for i, smiles in enumerate(smiles_list):
tokens_idx, global_labels, atom_labels, atom_mask = construct_input_from_smiles(smiles,
global_feature=global_feature)
if tokens_idx != 0:
tokens_idx_list.append(tokens_idx)
global_label_list.append(global_labels)
atom_labels_list.append(atom_labels)
atom_mask_list.append(atom_mask)
print('{}/{} is transformed!'.format(i+1, len(smiles_list)))
else:
print('{} is transformed failed!'.format(smiles))
pretrain_data_list = [tokens_idx_list, global_label_list, atom_labels_list, atom_mask_list]
pretrain_data_np = np.array(pretrain_data_list)
np.save(output_smiles_path, pretrain_data_np)
def build_chirality_pretrain_data_and_save(smiles_list, labels_list, output_smiles_path):
tokens_idx_list = []
global_label_list = []
atom_labels_list = []
atom_mask_list = []
for i, smiles in enumerate(smiles_list):
tokens_idx, _, atom_labels, atom_mask = construct_input_from_smiles(smiles)
if tokens_idx != 0:
tokens_idx_list.append(tokens_idx)
global_label_list.append([labels_list[i]])
atom_labels_list.append(atom_labels)
atom_mask_list.append(atom_mask)
print('{}/{} is transformed!'.format(i+1, len(smiles_list)))
else:
print('{} is transformed failed!'.format(smiles))
pretrain_data_list = [tokens_idx_list, global_label_list, atom_labels_list, atom_mask_list]
pretrain_data_np = np.array(pretrain_data_list)
np.save(output_smiles_path, pretrain_data_np)
def build_mask(labels_list, mask_value=100):
mask = []
for i in labels_list:
if i == mask_value:
mask.append(0)
else:
mask.append(1)
return mask
def multi_task_build_dataset(dataset_smiles, labels_list, smiles_name, global_feature='ECFP4'):
dataset = []
failed_molecule = []
labels = dataset_smiles[labels_list]
split_index = dataset_smiles['group']
smilesList = dataset_smiles[smiles_name]
molecule_number = len(smilesList)
for i, smiles in enumerate(smilesList):
token_idx, _, _, _ = construct_input_from_smiles(smiles, global_feature=global_feature)
if token_idx != 0:
mask = build_mask(labels.loc[i], mask_value=123456)
molecule = [smiles, token_idx, labels.loc[i].values.tolist(), mask, split_index.loc[i]]
dataset.append(molecule)
print('{}/{} molecule is transformed! {} is transformed failed!'.format(i + 1, molecule_number,
len(failed_molecule)))
else:
print('{} is transformed failed!'.format(smiles))
molecule_number = molecule_number - 1
failed_molecule.append(smiles)
print('{}({}) is transformed failed!'.format(failed_molecule, len(failed_molecule)))
return dataset
def built_data_and_save_for_splited(
origin_path='G:/加密/Dataset/AttentionFP/ClinTox.csv',
save_path='G:/加密/Dataset/AttentionFP/ClinTox.npy',
task_list_selected=None):
data_origin = pd.read_csv(origin_path)
data_origin = data_origin.fillna(123456)
labels_list = [x for x in data_origin.columns if x not in ['smiles', 'group']]
if task_list_selected is not None:
labels_list = task_list_selected
data_set_gnn = multi_task_build_dataset(dataset_smiles=data_origin, labels_list=labels_list,
smiles_name='smiles')
smiles, token_idx, labels, mask, split_index = map(list, zip(*data_set_gnn))
dataset_list = [smiles, token_idx, labels, mask, split_index]
dataset_np = np.array(dataset_list)
np.save(save_path, dataset_np)
print('Molecules graph is saved!')
def built_ECFP4_data_and_save_for_splited(
origin_path='G:/加密/Dataset/AttentionFP/ClinTox.csv',
save_path='G:/加密/Dataset/AttentionFP/ClinTox.npy',
task_list_selected=None):
data_origin = pd.read_csv(origin_path)
data_origin = data_origin.fillna(123456)
labels_list = [x for x in data_origin.columns if x not in ['smiles', 'group']]
if task_list_selected is not None:
labels_list = task_list_selected
data_set_gnn = multi_task_build_dataset(dataset_smiles=data_origin, labels_list=labels_list,
smiles_name='smiles', global_feature='ECFP4')
smiles, token_idx, labels, mask, split_index = map(list, zip(*data_set_gnn))
dataset_list = [smiles, token_idx, labels, mask, split_index]
dataset_np = np.array(dataset_list)
np.save(save_path, dataset_np)
print('Molecules graph is saved!')
def built_rdkit_des_data_and_save_for_splited(
origin_path='G:/加密/Dataset/AttentionFP/ClinTox.csv',
save_path='G:/加密/Dataset/AttentionFP/ClinTox.npy',
task_list_selected=None):
data_origin = pd.read_csv(origin_path)
data_origin = data_origin.fillna(123456)
labels_list = [x for x in data_origin.columns if x not in ['smiles', 'group']]
if task_list_selected is not None:
labels_list = task_list_selected
data_set_gnn = multi_task_build_dataset(dataset_smiles=data_origin, labels_list=labels_list,
smiles_name='smiles', global_feature='RDKIT_des')
smiles, token_idx, labels, mask, split_index = map(list, zip(*data_set_gnn))
dataset_list = [smiles, token_idx, labels, mask, split_index]
dataset_np = np.array(dataset_list)
np.save(save_path, dataset_np)
print('Molecules graph is saved!')
def contrastive_aug_build_dataset(dataset_smiles, labels_list, smiles_name_list):
dataset = []
failed_molecule = []
labels = dataset_smiles[labels_list]
split_index = dataset_smiles['group']
smilesList = dataset_smiles[smiles_name_list].values.tolist()
molecule_number = len(smilesList)
for i, _ in enumerate(smilesList):
token_idx_list = [construct_input_from_smiles(smiles)[0] for smiles in smilesList[i]]
if 0 not in token_idx_list:
mask = build_mask(labels.loc[i], mask_value=123456)
molecule = [smilesList[i][0], labels.loc[i].values.tolist(), mask, split_index.loc[i], token_idx_list]
dataset.append(molecule)
print('{}/{} molecule is transformed! {} is transformed failed!'.format(i + 1, molecule_number,
len(failed_molecule)))
else:
print('{} is transformed failed!'.format(smilesList[i][0]))
molecule_number = molecule_number - 1
failed_molecule.append(smilesList[i][0])
print('{}({}) is transformed failed!'.format(failed_molecule, len(failed_molecule)))
return dataset
def built_data_and_save_for_contrastive_splited(
origin_path='G:/加密/Dataset/AttentionFP/ClinTox.csv',
save_path='G:/加密/Dataset/AttentionFP/ClinTox.npy'):
data_origin = pd.read_csv(origin_path)
data_origin = data_origin.fillna(123456)
smiles_list = ['smiles', 'aug_smiles_0', 'aug_smiles_1', 'aug_smiles_2', 'aug_smiles_3']
labels_list = [x for x in data_origin.columns if x not in ['smiles', 'group']+smiles_list ]
data_set = contrastive_aug_build_dataset(dataset_smiles=data_origin, labels_list=labels_list,
smiles_name_list=smiles_list)
smiles, labels, mask, split_index, token_idx, = map(list, zip(*data_set))
dataset_list = [smiles, token_idx, labels, mask, split_index]
dataset_np = np.array(dataset_list)
np.save(save_path, dataset_np)
print('Molecules graph is saved!')
def build_maccs_pretrain_contrastive_data_and_save(smiles_list, output_smiles_path, global_feature='MACCS'):
# all smiles list
smiles_list = smiles_list
tokens_idx_all_list = []
global_label_list = []
atom_labels_list = []
atom_mask_list = []
for i, smiles_one_mol in enumerate(smiles_list):
tokens_idx_list = [construct_input_from_smiles(smiles, global_feature=global_feature)[0] for
smiles in smiles_one_mol]
if 0 not in tokens_idx_list:
_ , global_labels, atom_labels, atom_mask = construct_input_from_smiles(smiles_one_mol[0],
global_feature=global_feature)
tokens_idx_all_list.append(tokens_idx_list)
global_label_list.append(global_labels)
atom_labels_list.append(atom_labels)
atom_mask_list.append(atom_mask)
print('{}/{} is transformed!'.format(i+1, len(smiles_list)))
else:
print('{} is transformed failed!'.format(smiles_one_mol[0]))
pretrain_data_list = [tokens_idx_all_list, global_label_list, atom_labels_list, atom_mask_list]
pretrain_data_np = np.array(pretrain_data_list, dtype=object)
np.save(output_smiles_path, pretrain_data_np)
def build_pretrain_chirality_R_S_contrastive_data_and_save(smiles_list, global_all_label_list, output_smiles_path, global_feature='MACCS'):
# all smiles list
smiles_list = smiles_list
tokens_idx_all_list = []
global_label_list = []
atom_labels_list = []
atom_mask_list = []
for i, smiles_one_mol in enumerate(smiles_list):
tokens_idx_list = [construct_input_from_smiles(smiles, global_feature=global_feature)[0] for
smiles in smiles_one_mol]
if 0 not in tokens_idx_list:
_ , global_labels, atom_labels, atom_mask = construct_input_from_smiles(smiles_one_mol[0],
global_feature=global_feature)
tokens_idx_all_list.append(tokens_idx_list)
global_label_list.append(global_all_label_list[i])
atom_labels_list.append(atom_labels)
atom_mask_list.append(atom_mask)
print('{}/{} is transformed!'.format(i+1, len(smiles_list)))
else:
print('{} is transformed failed!'.format(smiles_one_mol[0]))
pretrain_data_list = [tokens_idx_all_list, global_label_list, atom_labels_list, atom_mask_list]
pretrain_data_np = np.array(pretrain_data_list, dtype=object)
np.save(output_smiles_path, pretrain_data_np)
def load_data_for_pretrain(pretrain_data_path='./data/CHEMBL_wash_500_pretrain'):
tokens_idx_list = []
global_labels_list = []
atom_labels_list = []
atom_mask_list = []
for i in range(80):
pretrain_data = np.load(pretrain_data_path+'_{}.npy'.format(i+1), allow_pickle=True)
tokens_idx_list = tokens_idx_list + [x for x in pretrain_data[0]]
global_labels_list = global_labels_list + [x for x in pretrain_data[1]]
atom_labels_list = atom_labels_list + [x for x in pretrain_data[2]]
atom_mask_list = atom_mask_list + [x for x in pretrain_data[3]]
print(pretrain_data_path+'_{}.npy'.format(i+1) + ' is loaded')
pretrain_data_final = []
for i in range(len(tokens_idx_list)):
a_pretrain_data = [tokens_idx_list[i], global_labels_list[i], atom_labels_list[i], atom_mask_list[i]]
pretrain_data_final.append(a_pretrain_data)
return pretrain_data_final
def load_data_for_contrastive_aug_pretrain(pretrain_data_path='./data/CHEMBL_wash_500_pretrain'):
tokens_idx_list = []
global_labels_list = []
atom_labels_list = []
atom_mask_list = []
for i in range(80):
pretrain_data = np.load(pretrain_data_path+'_contrastive_{}.npy'.format(i+1), allow_pickle=True)
tokens_idx_list = tokens_idx_list + [x for x in pretrain_data[0]]
global_labels_list = global_labels_list + [x for x in pretrain_data[1]]
atom_labels_list = atom_labels_list + [x for x in pretrain_data[2]]
atom_mask_list = atom_mask_list + [x for x in pretrain_data[3]]
print(pretrain_data_path+'_contrastive_{}.npy'.format(i+1) + ' is loaded')
pretrain_data_final = []
for i in range(len(tokens_idx_list)):
a_pretrain_data = [tokens_idx_list[i], global_labels_list[i], atom_labels_list[i], atom_mask_list[i]]
pretrain_data_final.append(a_pretrain_data)
return pretrain_data_final
def load_data_for_pretrain_rdkit_des(pretrain_data_path='./data/CHEMBL_wash_500_pretrain'):
tokens_idx_list = []
global_labels_list = []
atom_labels_list = []
atom_mask_list = []
for i in range(80):
pretrain_data = np.load(pretrain_data_path+'_{}.npy'.format(i+1), allow_pickle=True)
tokens_idx_list = tokens_idx_list + [x for x in pretrain_data[0]]
global_labels_list = global_labels_list + [x for x in pretrain_data[1]]
atom_labels_list = atom_labels_list + [x for x in pretrain_data[2]]
atom_mask_list = atom_mask_list + [x for x in pretrain_data[3]]
print(pretrain_data_path+'_{}.npy'.format(i+1) + ' is loaded')
global_labels_pd = pd.DataFrame(global_labels_list)
global_labels_normal = global_labels_pd.apply(lambda x: (x - x.mean()) / math.sqrt(sum((x - x.min()) ** 2 / len(x))))
global_labels_normal_final = global_labels_normal.dropna(axis=1, how='any')
pretrain_data_final = []
for i in range(len(tokens_idx_list)):
a_pretrain_data = [tokens_idx_list[i], global_labels_normal_final.iloc[i].values.tolist(), atom_labels_list[i], atom_mask_list[i]]
pretrain_data_final.append(a_pretrain_data)
global_labels_dim = len(global_labels_normal_final.iloc[1].values.tolist())
return pretrain_data_final, global_labels_dim
def load_data_for_augmentation_pretrain(pretrain_data_path='./data/CHEMBL_wash_500_pretrain'):
tokens_idx_list = []
global_labels_list = []
atom_labels_list = []
atom_mask_list = []
for i in range(40):
pretrain_data = np.load(pretrain_data_path+'_{}.npy'.format(i+1), allow_pickle=True)
tokens_idx_list = tokens_idx_list + [x for x in pretrain_data[0]]
global_labels_list = global_labels_list + [x for x in pretrain_data[1]]
atom_labels_list = atom_labels_list + [x for x in pretrain_data[2]]
atom_mask_list = atom_mask_list + [x for x in pretrain_data[3]]
print(pretrain_data_path+'_{}.npy'.format(i+1) + ' is loaded')
pretrain_data_final = []
for i in range(len(tokens_idx_list)):
a_pretrain_data = [tokens_idx_list[i], global_labels_list[i], atom_labels_list[i], atom_mask_list[i]]
pretrain_data_final.append(a_pretrain_data)
return pretrain_data_final
def load_data_for_splited(data_path='example.npy'):
data = np.load(data_path, allow_pickle=True)
smiles_list = data[0]
tokens_idx_list = data[1]
labels_list = data[2]
mask_list = data[3]
group_list = data[4]
train_set = []
val_set = []
test_set = []
task_number = len(labels_list[1])
for i, group in enumerate(group_list):
molecule = [smiles_list[i], tokens_idx_list[i], labels_list[i], mask_list[i]]
if group == 'training':
train_set.append(molecule)
elif group == 'val':
val_set.append(molecule)
else:
test_set.append(molecule)
print('Training set: {}, Validation set: {}, Test set: {}, task number: {}'.format(
len(train_set), len(val_set), len(test_set), task_number))
return train_set, val_set, test_set, task_number
def load_data_for_random_splited(data_path='example.npy', shuffle=True):
data = np.load(data_path, allow_pickle=True)
smiles_list = data[0]
tokens_idx_list = data[1]
labels_list = data[2]
mask_list = data[3]
group_list = data[4]
if shuffle:
random.shuffle(group_list)
print(group_list)
train_set = []
val_set = []
test_set = []
task_number = len(labels_list[1])
for i, group in enumerate(group_list):
molecule = [smiles_list[i], tokens_idx_list[i], labels_list[i], mask_list[i]]
if group == 'training':
train_set.append(molecule)
elif group == 'val':
val_set.append(molecule)
else:
test_set.append(molecule)
print('Training set: {}, Validation set: {}, Test set: {}, task number: {}'.format(
len(train_set), len(val_set), len(test_set), task_number))
return train_set, val_set, test_set, task_number
def task_dataset_analyze(data_path='example.npy'):
data = np.load(data_path, allow_pickle=True)
smiles_list = data[0]
tokens_idx_list = data[1]
labels_list = data[2]
mask_list = data[3]
group_list = data[4]
train_set_pad = 0
val_set_pad = 0
test_set_pad = 0
train_set_other_atom = 0
val_set_other_atom = 0
test_set_other_atom = 0
train_set_other_token = 0
val_set_other_token = 0
test_set_other_token = 0
train_set = []
val_set = []
test_set = []
task_number = len(labels_list[1])
for i, group in enumerate(group_list):
tokens_idx_np = np.array(tokens_idx_list[i])
pad_count = len(np.where(tokens_idx_np == 0)[0])
other_atom_count = len(np.where(tokens_idx_np == 45)[0])
other_token_count = len(np.where(tokens_idx_np == 46)[0])
if group == 'training':
train_set.append(tokens_idx_np)
train_set_pad = train_set_pad + pad_count
train_set_other_atom = train_set_other_atom + other_atom_count
train_set_other_token = train_set_other_token + other_token_count
elif group == 'val':
val_set.append(tokens_idx_np)
val_set_pad = val_set_pad + pad_count
val_set_other_atom = val_set_other_atom + other_atom_count
val_set_other_token = val_set_other_token + other_token_count
else:
test_set.append(tokens_idx_np)
test_set_pad = test_set_pad + pad_count
test_set_other_atom = test_set_other_atom + other_atom_count
test_set_other_token = test_set_other_token + other_token_count
print('Training set, mol count: {}, pad count: {} {}%, other atom count: {}, other token count: {}'.format(
len(train_set), train_set_pad, round(train_set_pad/(len(train_set)*201)*100, 2), train_set_other_atom, train_set_other_token))
print('Validation set, mol count: {}, pad count: {} {}%, other atom count: {}, other token count: {}'.format(
len(val_set), val_set_pad, round(val_set_pad/(len(val_set)*201)*100, 2), val_set_other_atom, val_set_other_token))
print('Test set, mol count: {}, pad count: {} {}%, other atom count: {}, other token count: {}'.format(
len(test_set), test_set_pad, round(test_set_pad/(len(test_set)*201)*100, 2), test_set_other_atom, test_set_other_token))
--------------------------------------------------------------------------------
/data/ADMETlab_data/CYP2C9-sub_canonical.csv:
--------------------------------------------------------------------------------
1 | smiles,group,CYP2C9-sub
2 | COCc1c(C(=O)OC(C)C)ncc2[nH]c3ccc(OCc4ccccc4)cc3c12,training,0
3 | CC(=O)Nc1nnc(S(N)(=O)=O)s1,training,0
4 | CC(C)=O,training,0
5 | CC[C@@H]1[C@@H]2C[C@H]3[C@@H]4N(C)c5ccccc5[C@]45C[C@@H]([C@H]2[C@H]5O)N3[C@@H]1O,training,0
6 | CCC(=O)N(c1ccccc1)C1(COC)CCN(CCn2nnn(CC)c2=O)CC1,training,0
7 | CN(C)CCc1c[nH]c2ccc(CS(=O)(=O)N3CCCC3)cc12,training,0
8 | CCCN(CCC)C(=O)Cc1c(-c2ccc(Cl)cc2)nc2ccc(Cl)cn12,training,0
9 | Cc1nnc2n1-c1ccc(Cl)cc1C(c1ccccc1)=NC2,training,0
10 | Nc1c(Br)cc(Br)cc1CNC1CCC(O)CC1,training,0
11 | CC[C@]1(c2ccc(N)cc2)CCC(=O)NC1=O,training,0
12 | CNn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCN(C)CC3)cc21,training,0
13 | CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O,training,0
14 | O=C1c2ccccc2C(=O)c2ccccc21,training,0
15 | CCC[C@H]1C(=O)N2C(N(C)C)=Nc3ccc(C)cc3N2C1=O,training,0
16 | CN1CCc2cccc3c2[C@@H]1Cc1ccc(O)c(O)c1-3,training,0
17 | C[C@@H](O[C@H]1OCCN(Cc2n[nH]c(=O)[nH]2)[C@H]1c1ccc(F)cc1)c1cc(C(F)(F)F)cc(C(F)(F)F)c1,training,0
18 | CCN(CC)CCCN(c1ccccc1)C1Cc2ccccc2C1,training,0
19 | C[C@H]1CCN(C(=O)[C@H](CCCN=C(N)N)NS(=O)(=O)c2ccc3c(c2)C[C@@H](C)CN3)[C@@H](C(=O)O)C1,training,0
20 | O=C1CCc2ccc(OCCCCN3CCN(c4cccc(Cl)c4Cl)CC3)cc2N1,training,0
21 | C[C@@H]1CC[C@H]2[C@@H](C)C(=O)O[C@@H]3O[C@@]4(C)CC[C@@H]1[C@]32OO4,training,0
22 | C[C@@H]1CC[C@H]2[C@@H](C)[C@@H](OC(=O)CCC(=O)O)O[C@@H]3O[C@@]4(C)CC[C@@H]1[C@@]23OO4,training,0
23 | COC(=O)N[C@H](C(=O)N[C@@H](Cc1ccccc1)[C@H](O)CN(Cc1ccc(-c2ccccn2)cc1)NC(=O)[C@H](NC(=O)OC)C(C)(C)C)C(C)(C)C,training,0
24 | O=C1C(=O)c2ccccc2C(O)=C1C1CCC(c2ccc(Cl)cc2)CC1,training,0
25 | CNS(=O)(=O)Cc1ccc2[nH]cc(CCCN3CCN(c4ncncc4OC)CC3)c2c1,training,0
26 | CC[C@]1(C)C[C@@H](OC(=O)CSc2n[nH]c(N)n2)[C@]2(C)[C@@H](C)CC[C@]3(CCC(=O)[C@H]32)[C@@H](C)[C@@H]1O,training,0
27 | CN1CCC(=C2c3ccccc3CCc3cccnc32)CC1,training,0
28 | CN1CCC[C@H](n2nc(Cc3ccc(Cl)cc3)c3ccccc3c2=O)CC1,training,0
29 | CN1CCN(CCCCN2C(=O)CN(/N=C\c3ccc(-c4ccc(Cl)cc4)o3)C2=O)CC1,training,0
30 | CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(O)C[C@@H](C)CN(C)[C@H](C)[C@@H](O)[C@]1(C)O,training,0
31 | COC(=O)C1=C(C)NC(C)=C(C(=O)O[C@H]2CCN(Cc3ccccc3)C2)[C@H]1c1cccc([N+](=O)[O-])c1,training,0
32 | C[C@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@@]3(Cl)[C@@H](O)C[C@]2(C)[C@@]1(O)C(=O)CO,training,0
33 | COC(=O)C1=C(C)NC(C)=C(C(=O)O[C@@H]2CCCN(Cc3ccccc3)C2)[C@@H]1c1cccc([N+](=O)[O-])c1,training,0
34 | C[C@@H](Cc1ccccc1)N(C)Cc1ccccc1,training,0
35 | CN(C)CCCOc1nn(Cc2ccccc2)c2ccccc12,training,0
36 | CC(C)COC[C@@H](CN(Cc1ccccc1)c1ccccc1)N1CCCC1,training,0
37 | C[C@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@@]3(F)[C@@H](O)C[C@]2(C)[C@@]1(O)C(=O)CO,training,0
38 | CC(C)NC[C@H](O)COc1ccc(CCOCC2CC2)cc1,training,0
39 | CC(C)(Oc1ccc(CCNC(=O)c2ccc(Cl)cc2)cc1)C(=O)O,training,0
40 | c1ccc(-c2ccc([C@@H](c3ccccc3)n3ccnc3)cc2)cc1,training,0
41 | O[C@@](CCN1CCCCC1)(c1ccccc1)[C@@H]1C[C@@H]2C=C[C@H]1C2,training,0
42 | CCc1oc2ccccc2c1C(=O)c1cc(Br)c(O)c(Br)c1,training,0
43 | C[C@]12C=CC(=O)C=C1CC[C@@H]1[C@@H]2CC[C@]2(C)[C@@H](O)CC[C@@H]12,training,0
44 | CCN[C@H]1CN(CCCOC)S(=O)(=O)c2sc(S(N)(=O)=O)cc21,training,0
45 | COc1cc(Br)c2oc(C3CCNCC3)cc2c1,training,0
46 | O=C1CN=C(c2ccccn2)c2cc(Br)ccc2N1,training,0
47 | CC(C)C[C@H]1C(=O)N2CCC[C@H]2[C@]2(O)O[C@](NC(=O)[C@@H]3C=C4c5cccc6[nH]c(Br)c(c56)C[C@H]4N(C)C3)(C(C)C)C(=O)N12,training,0
48 | O=C(CCCN1CCC(O)(c2ccc(Br)cc2)CC1)c1ccc(F)cc1,training,0
49 | Nc1nc(=O)c(Br)c(-c2ccccc2)[nH]1,training,0
50 | Cc1nnc2n1-c1sc(Br)cc1C(c1ccccc1Cl)=NC2,training,0
51 | CC(C)(C)N1CCC(c2ccccc2)(c2ccccc2)CC1,training,0
52 | CO[C@]12CC[C@@]3(C[C@@H]1[C@](C)(O)C(C)(C)C)[C@H]1Cc4ccc(O)c5c4[C@@]3(CCN1CC1CC1)[C@@H]2O5,training,0
53 | O=C1CC2(CCCC2)CC(=O)N1CCCCN1CCN(c2ncccn2)CC1,training,0
54 | CS(=O)(=O)OCCCCOS(C)(=O)=O,training,0
55 | CC(C)c1nc(COC(N)=O)n(Cc2ccncc2)c1Sc1cc(Cl)cc(Cl)c1,training,0
56 | CCOC(=O)n1ccn(C)c1=S,training,0
57 | CCC[C@@](C)(COC(N)=O)COC(=O)NC(C)C,training,0
58 | CC(C)(C)NC[C@H](O)COc1cccc2c1CCC(=O)N2,training,0
59 | COCc1c(C(C)C)nc(C(C)C)c(/C=C\[C@@H](O)C[C@@H](O)CC(=O)O)c1-c1ccc(F)cc1,training,0
60 | O=C(O)COCCN1CCN([C@H](c2ccccc2)c2ccc(Cl)cc2)CC1,training,0
61 | C[C@H]1O[C@]2(CS1)CN1CCC2CC1,training,0
62 | CCN(CC)CCC[C@H](C)Nc1ccnc2cc(Cl)ccc12,training,0
63 | CN(C)CC[C@H](c1ccc(Cl)cc1)c1ccccn1,training,0
64 | OC(O)C(Cl)(Cl)Cl,training,0
65 | C=C1CC[C@H](O)C/C1=C/C=C1\CCC[C@@]2(C)[C@H]1CC[C@@H]2[C@H](C)CCCC(C)C,training,0
66 | COCCOC(=O)C1=C(C)NC(C)=C(C(=O)OC/C=C/c2ccccc2)[C@H]1c1cccc([N+](=O)[O-])c1,training,0
67 | O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc2c1=O,training,0
68 | CC(C)(Oc1ccc([C@@H]2CC2(Cl)Cl)cc1)C(=O)O,training,0
69 | CNC1=Nc2ccc(Cl)cc2C(c2ccccc2)=[N+]([O-])C1,training,0
70 | CCC[C@@H]1C[C@H](C(=O)N[C@H]([C@H](C)Cl)[C@H]2O[C@H](SC)[C@H](O)[C@@H](O)[C@H]2O)N(C)C1,training,0
71 | CC(C)/N=c1/cc2n(-c3ccc(Cl)cc3)c3ccccc3nc-2cc1Nc1ccc(Cl)cc1,training,0
72 | Cc1ncsc1CCCl,training,0
73 | OCCN1CCN(CC/C=C2/c3ccccc3Sc3ccc(Cl)cc32)CC1,training,0
74 | Clc1ccccc1C(c1ccccc1)(c1ccccc1)n1ccnc1,training,0
75 | COC(=O)[C@H]1[C@@H](OC(=O)c2ccccc2)C[C@@H]2CC[C@H]1N2C,training,0
76 | COc1ccc2c3c1O[C@H]1[C@@H](O)C=C[C@H]4[C@@H](C2)N(C)CC[C@]314,training,0
77 | COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1[C@@H](NC(C)=O)CC2,training,0
78 | C[C@]12CC(=O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@]2(O)C(=O)CO,training,0
79 | CN1C(=O)CC[C@H]1c1cccnc1,training,0
80 | CN(C)CCC=C1c2ccccc2C=Cc2ccccc21,training,0
81 | CC(=O)[C@@]1(O)CC[C@H]2[C@@H]3C=C(Cl)C4=CC(=O)[C@@H]5C[C@@H]5[C@]4(C)[C@H]3CC[C@@]21C,training,0
82 | CCN(CC)CCS(=O)(=O)[C@@H]1CCN2C(=O)c3coc(n3)CC(=O)C[C@H](O)C=C(C)C=CCNC(=O)C=C[C@@H](C)[C@@H](C(C)C)OC(=O)[C@@H]12,training,0
83 | COc1cccc2c1C(=O)c1c(O)c3c(c(O)c1C2=O)C[C@@](O)(C(C)=O)C[C@@H]3O[C@H]1C[C@H](N)[C@H](O)[C@H](C)O1,training,0
84 | O=C(O)c1ccc(OCCn2ccnc2)cc1,training,0
85 | CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N(CC(=O)O)C1Cc2ccccc2C1,training,0
86 | CNCCCN1c2ccccc2CCc2ccccc21,training,0
87 | C[C@@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@@]3(F)[C@@H](O)C[C@]2(C)[C@@]1(O)C(=O)CO,training,0
88 | Cc1cccc([C@H](C)c2c[nH]cn2)c1C,training,0
89 | ClC1=C(Cl)[C@]2(Cl)[C@@H]3[C@@H]4C[C@H]([C@@H]3[C@@]1(Cl)C2(Cl)Cl)[C@H]1O[C@@H]41,training,0
90 | CC/C(=C(\CC)c1ccc(O)cc1)c1ccc(O)cc1,training,0
91 | C[C@H]1O[C@@H](O[C@H]2[C@@H](O)C[C@H](O[C@H]3[C@@H](O)C[C@H](O[C@H]4CC[C@]5(C)[C@H]6CC[C@]7(C)[C@@H](C8=CC(=O)OC8)CC[C@]7(O)[C@@H]6CC[C@@H]5C4)O[C@@H]3C)O[C@@H]2C)C[C@H](O)[C@@H]1O,training,0
92 | C[C@H]1O[C@@H](O[C@H]2[C@@H](O)C[C@H](O[C@H]3[C@@H](O)C[C@H](O[C@H]4CC[C@]5(C)[C@H]6C[C@@H](O)[C@]7(C)[C@@H](C8=CC(=O)OC8)CC[C@]7(O)[C@@H]6CC[C@@H]5C4)O[C@@H]3C)O[C@@H]2C)C[C@H](O)[C@@H]1O,training,0
93 | NNc1nnc(NN)c2ccccc12,training,0
94 | COc1ccc(-c2cc(=O)c3c(O)cc(O[C@@H]4O[C@H](CO[C@@H]5O[C@@H](C)[C@H](O)[C@@H](O)[C@H]5O)[C@@H](O)[C@H](O)[C@H]4O)cc3o2)cc1O,training,0
95 | CCC(C)(C)NC[C@H](O)COc1ccccc1C(=O)CCc1ccccc1,training,0
96 | CCN(CC)C(=S)SSC(=S)N(CC)CC,training,0
97 | CS(C)=O,training,0
98 | CN(CCOc1ccc(NS(C)(=O)=O)cc1)CCc1ccc(NS(C)(=O)=O)cc1,training,0
99 | O=c1[nH]c2ccccc2n1CCCN1CCC(n2c(=O)[nH]c3cc(Cl)ccc32)CC1,training,0
100 | COc1cc2c(cc1OC)C(=O)[C@H](CC1CCN(Cc3ccccc3)CC1)C2,training,0
101 | COc1cccc2c1C(=O)c1c(O)c3c(c(O)c1C2=O)C[C@@](O)(C(=O)CO)C[C@@H]3O[C@H]1C[C@H](N)[C@H](O)[C@H](C)O1,training,0
102 | C[C@H]1c2cccc(O)c2C(=O)C2=C(O)[C@]3(O)C(=O)C(C(N)=O)=C(O)[C@@H](N(C)C)[C@@H]3[C@@H](O)[C@@H]21,training,0
103 | C[C@]12CC[C@H]3[C@@H](CC[C@H]4NC(=O)C=C[C@]34C)[C@@H]1CC[C@@H]2C(=O)Nc1cc(C(F)(F)F)ccc1C(F)(F)F,training,0
104 | CC(C)(C)c1ccc(C(=O)CCCN2CCC(OC(c3ccccc3)c3ccccc3)CC2)cc1,training,0
105 | NC(N)=Nc1nc(CSCC/N=C/NS(=O)(=O)c2ccc(Br)cc2)cs1,training,0
106 | CNC(=O)c1cccc(NCC(=O)NCCc2ccc(OC)c(OC)c2)c1,training,0
107 | Clc1ccc(CO[C@@H](Cn2ccnc2)c2ccc(Cl)cc2Cl)cc1,training,0
108 | CC1=C(C(=O)OCCN(Cc2ccccc2)c2ccccc2)[C@H](c2cccc([N+](=O)[O-])c2)C(P2(=O)OCC(C)(C)CO2)=C(C)N1,training,0
109 | CCOCn1c(Cc2ccccc2)c(C(C)C)c(=O)[nH]c1=O,training,0
110 | COc1ccc(C(=O)Nc2ccccc2CC[C@H]2CCCCN2C)cc1,training,0
111 | CCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCNCC3)nc21,training,0
112 | NC1=NC[C@@H]2c3ccccc3Cc3ccccc3N12,training,0
113 | COC(=O)[C@@H]1CC2=CC(=O)CC[C@]2(C)[C@@]23O[C@@H]2C[C@@]2(C)[C@@H](CC[C@@]24CCC(=O)O4)[C@H]13,training,0
114 | CCCCc1ncc(/C=C(/Cc2cccs2)C(=O)O)n1Cc1ccc(C(=O)O)cc1,training,0
115 | C[C@H](CO)NC(=O)[C@@H]1C=C2c3cccc4[nH]cc(c34)C[C@H]2N(C)C1,training,0
116 | CN1C[C@H](C(=O)N[C@]2(C)O[C@@]3(O)[C@@H]4CCCN4C(=O)[C@H](Cc4ccccc4)N3C2=O)C=C2c3cccc4[nH]cc(c34)C[C@H]21,training,0
117 | CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(O)C[C@@H](C)C(=O)[C@H](C)[C@@H](O)[C@]1(C)O,training,0
118 | CC[C@]1(C)CC(=O)NC1=O,training,0
119 | CCN1C(=O)N[C@H](c2ccccc2)C1=O,training,0
120 | CCOc1ccc2c3c1O[C@H]1[C@@H](O)C=C[C@H]4[C@@H](C2)N(C)CC[C@]314,training,0
121 | CCOC(=O)c1cncn1[C@H](C)c1ccccc1,training,0
122 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@@H]4[C@H]3C(=C)C[C@@]21CC,training,0
123 | COc1cc([C@@H]2c3cc4c(cc3[C@@H](O[C@@H]3O[C@@H]5CO[C@@H](C)O[C@H]5[C@H](O)[C@H]3O)[C@H]3COC(=O)[C@H]23)OCO4)cc(OC)c1O,training,0
124 | COc1ccc(C(C)C)cc1CN[C@H]1C2CCN(CC2)[C@H]1C(c1ccccc1)c1ccccc1,training,0
125 | NC(N)=Nc1nc(CSCC/C(N)=N\S(N)(=O)=O)cs1,training,0
126 | CC(C)OC(=O)C(C)(C)Oc1ccc(C(=O)c2ccc(Cl)cc2)cc1,training,0
127 | CC(C)(C(=O)O)c1ccc([C@@H](O)CCCN2CCC(C(O)(c3ccccc3)c3ccccc3)CC2)cc1,training,0
128 | CC(C)(C)NC(=O)[C@H]1CC[C@H]2[C@@H]3CC[C@H]4NC(=O)C=C[C@]4(C)[C@H]3CC[C@]12C,training,0
129 | O=C(NC[C@H]1CCCCN1)c1cc(OCC(F)(F)F)ccc1OCC(F)(F)F,training,0
130 | Cn1cc(S(C)=O)c(=O)c2ccc(F)cc21,training,0
131 | Fc1cccc2c1O[C@H]1CNC[C@@H]1O2,training,0
132 | CN1CCN(C2=Nc3cc(F)ccc3Cc3ccccc32)CC1,training,0
133 | OCCN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1,training,0
134 | O=c1[nH]cc(F)c(=O)[nH]1,training,0
135 | CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(O)C[C@](C)(F)C(=O)[C@H](C)[C@@H](O)[C@]1(C)O,training,0
136 | CCC(=O)O[C@]1(C(=O)SCF)[C@H](C)C[C@H]2[C@@H]3C[C@H](F)C4=CC(=O)C=C[C@]4(C)[C@@]3(F)[C@@H](O)C[C@@]21C,training,0
137 | CN[C@@H]1CCc2[nH]c3ccc(C(N)=O)cc3c2C1,training,0
138 | Cc1nc2c([nH]1)c(=O)n(C)c(=O)n2Cc1ccco1,training,0
139 | COc1ccc2c3c1O[C@H]1C[C@@H](O)C=C[C@@]31CCN(C)C2,training,0
140 | COc1ccc(CCN(C)CCC[C@@](C#N)(c2cc(OC)c(OC)c(OC)c2)C(C)C)cc1OC,training,0
141 | CC(=O)[C@H]1CC[C@H]2[C@@H]3CC[C@H]4C[C@](C)(O)CC[C@]4(C)[C@H]3CC[C@]12C,training,0
142 | COc1c(N2CCN[C@@H](C)C2)c(F)cc2c(=O)c(C(=O)O)cn(C3CC3)c12,training,0
143 | CC1(C)CC(=O)N(CCCCN2CCN(c3ncccn3)CC2)C(=O)C1,training,0
144 | CN1[C@H]2CCC[C@@H]1CC(NC(=O)c1nn(C)c3ccccc13)C2,training,0
145 | Cc1c(F)c(N2CCN[C@H](C)C2)cc2c1c(=O)c(C(=O)O)cn2C1CC1,training,0
146 | COC1=CC(=O)C[C@@H](C)[C@]12Oc1c(Cl)c(OC)cc(OC)c1C2=O,training,0
147 | NNc1nncc2ccccc12,training,0
148 | CN1CCCN([C@H](c2ccccc2)c2ccc(Cl)cc2)CC1,training,0
149 | CCN(CCO)CCC[C@H](C)Nc1ccnc2cc(Cl)ccc12,training,0
150 | CC[C@H]1CN2CC[C@H]1C[C@@H]2[C@@H](O)c1ccnc2ccc(OC)cc12,training,0
151 | COc1ccc2c3c1O[C@H]1C(=O)CC[C@H]4[C@@H](C2)N(C)CC[C@]314,training,0
152 | OCCOCCN1CCN([C@H](c2ccccc2)c2ccc(Cl)cc2)CC1,training,0
153 | COc1cc(C(C)=O)ccc1OCCCN1CCC(c2noc3cc(F)ccc23)CC1,training,0
154 | O=C(NC1CCN(CCc2c[nH]c3ccccc23)CC1)c1ccccc1,training,0
155 | CCc1c2c(nc3ccc(OC(=O)N4CCC(N5CCCCC5)CC4)cc13)-c1cc3c(c(=O)n1C2)COC(=O)[C@]3(O)CC,training,0
156 | O=C(O)CCCC/C=C(\c1ccccc1)c1cccnc1,training,0
157 | Clc1ccc([C@@H](Cn2ccnc2)OCc2c(Cl)cccc2Cl)c(Cl)c1,training,0
158 | FC(F)O[C@@H](Cl)C(F)(F)F,training,0
159 | NNC(=O)c1ccncc1,training,0
160 | O=[N+]([O-])O[C@H]1CO[C@H]2[C@@H]1OC[C@H]2O[N+](=O)[O-],training,0
161 | COC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)C)[C@H]1c1cccc2nonc12,training,0
162 | CO[C@@H]1[C@@H](O[C@@H]2O[C@H](C)[C@@H](O[C@H]3C[C@@](C)(O)[C@@H](OC(=O)CC(C)C)[C@H](C)O3)[C@H](N(C)C)[C@H]2O)[C@@H](CC=O)C[C@@H](C)[C@@H](O)C=CC=CC[C@@H](C)OC(=O)C[C@H]1OC(C)=O,training,0
163 | CC(=O)N1CCN(c2ccc(OC[C@H]3CO[C@](Cn4ccnc4)(c4ccc(Cl)cc4Cl)O3)cc2)CC1,training,0
164 | C[C@@H](C(=O)O)c1cccc(C(=O)c2ccccc2)c1,training,0
165 | C[C@@H](CCc1ccccc1)NC[C@H](O)c1ccc(O)c(C(N)=O)c1,training,0
166 | COC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)(C)CN(C)CCC(c2ccccc2)c2ccccc2)[C@H]1c1cccc([N+](=O)[O-])c1,training,0
167 | N#Cc1ccc(C(c2ccc(C#N)cc2)n2cncn2)cc1,training,0
168 | CN(C)c1ccc([C@H]2C[C@@]3(C)[C@@H](CC[C@@]3(O)/C=C/CO)[C@@H]3CCC4=CC(=O)CCC4=C32)cc1,training,0
169 | CC(=O)NC[C@H]1CN(c2ccc(N3CCOCC3)c(F)c2)C(=O)O1,training,0
170 | N[C@@H](Cc1cc(I)c(Oc2ccc(O)c(I)c2)c(I)c1)C(=O)O,training,0
171 | CCN(CC)C(=O)N[C@H]1C=C2c3cccc4[nH]cc(c34)C[C@H]2N(C)C1,training,0
172 | c1ccc2cc(COC3CCNCC3)ccc2c1,training,0
173 | CN1[C@H](C[C@H](O)c2ccccc2)CCC[C@@H]1CC(=O)c1ccccc1,training,0
174 | CCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCN[C@H](C)C3)c(F)c21,training,0
175 | O=NN(CCCl)C(=O)NC1CCCCC1,training,0
176 | Cc1cccc(C)c1OCC(=O)N[C@@H](Cc1ccccc1)[C@@H](O)C[C@H](Cc1ccccc1)NC(=O)[C@H](C(C)C)N1CCCNC1=O,training,0
177 | CCCCN(CCCC)C[C@@H](O)c1cc(Cl)cc2c1-c1ccc(Cl)cc1/C2=C\c1ccc(Cl)cc1,training,0
178 | CCOC(=O)C[C@@H](SP(=S)(OC)OC)C(=O)OCC,training,0
179 | COC(=O)C1=C(C)NC(C)=C(C(=O)OCCN2CCN(C(c3ccccc3)c3ccccc3)CC2)[C@H]1c1cccc([N+](=O)[O-])c1,training,0
180 | CNCCCC12CCC(c3ccccc31)c1ccccc12,training,0
181 | COC(=O)Nc1nc2cc(C(=O)c3ccccc3)ccc2[nH]1,training,0
182 | CN1CCN=C(c2ccccc2)c2cc(Cl)ccc21,training,0
183 | CN(C)CC(Oc1ccccc1)Oc1ccccc1,training,0
184 | O[C@@H](c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)[C@H]1CCCCN1,training,0
185 | CCOC(=O)C1(c2ccccc2)CCN(C)CC1,training,0
186 | COC(=O)[C@H](c1ccccc1)[C@H]1CCCCN1,training,0
187 | CC[C@]1(c2ccccc2)C(=O)NC(=O)N(C)C1=O,training,0
188 | C[C@H]1C[C@H]2[C@@H]3CC[C@](O)(C(=O)CO)[C@@]3(C)C[C@H](O)[C@@H]2[C@@]2(C)C=CC(=O)C=C12,training,0
189 | c1ccc2c(c1)Sc1ccccc1N2C[C@H]1CN2CCC1CC2,training,0
190 | Cc1ccccc1-n1c(C)nc2ccccc2c1=O,training,0
191 | CNC(=O)O/N=C(\C)SC,training,0
192 | COc1c2occc2cc2ccc(=O)oc12,training,0
193 | CC(C)(C(=O)c1cccnc1)c1cccnc1,training,0
194 | CC[C@H](C)C(=O)O[C@H]1CCC=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@H]21,training,0
195 | C[C@@H]1CO[C@]2(c3ccccc3Cl)c3cc(Cl)ccc3NC(=O)CN12,training,0
196 | COCC(=O)O[C@]1(CCN(C)CCCc2nc3ccccc3[nH]2)CCc2cc(F)ccc2[C@@H]1C(C)C,training,0
197 | Clc1ccc(CO[C@@H](Cn2ccnc2)c2ccc(Cl)cc2Cl)c(Cl)c1,training,0
198 | CO/N=C\C1=CCCN(C)C1,training,0
199 | Cc1cc(-c2ccccc2)nnc1NCCN1CCOCC1,training,0
200 | Nc1cc(N2CCCCC2)nc(N)[n+]1[O-],training,0
201 | CN(c1nccc(=O)[nH]1)C1CCN(c2nc3ccccc3n2Cc2ccc(F)cc2)CC1,training,0
202 | CCc1c(C)[nH]c2c1C(=O)[C@@H](CN1CCOCC1)CC2,training,0
203 | CCOC(=O)Nc1ccc2c(c1)N(C(=O)CCN1CCOCC1)c1ccccc1S2,training,0
204 | CCOc1cc(N)c(Cl)cc1C(=O)NC[C@@H]1CN(Cc2ccc(F)cc2)CCO1,training,0
205 | CCOc1ccc2ccccc2c1C(=O)N[C@@H]1C(=O)N2[C@@H](C(=O)O)C(C)(C)S[C@H]12,training,0
206 | CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21,training,0
207 | Cc1cccc(C)c1NC(=O)CN1CCCC1=O,training,0
208 | O=[N+]([O-])c1cncn1CCN1CCOCC1,training,0
209 | O=[N+]([O-])OCC(CO[N+](=O)[O-])O[N+](=O)[O-],training,0
210 | Fc1ccc(Cn2c(NC3CCNCC3)nc3ccccc32)cc1,training,0
211 | COc1ccc2c3c1O[C@H]1[C@@H](O)C=C[C@H]4[C@@H](C2)NCC[C@]314,training,0
212 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@@H]4[C@H]3CC[C@@]21C,training,0
213 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@@H]4[C@H]3CC[C@@]21CC,training,0
214 | CNCCC=C1c2ccccc2CCc2ccccc21,training,0
215 | Cc1cc2c(s1)Nc1ccccc1N=C2N1CCN(C)CC1,training,0
216 | CN(C)CC/C=C1\c2ccccc2COc2ccc(CC(=O)O)cc21,training,0
217 | Cc1c(-c2cnccn2)ssc1=S,training,0
218 | CN(C)c1ccc([C@H]2C[C@@]3(C)[C@@H](CC[C@]3(O)CCCO)[C@@H]3CCC4=CC(=O)CCC4=C32)cc1,training,0
219 | OCCN1CCN(CCCN2c3ccccc3C=Cc3ccccc32)CC1,training,0
220 | NC(=O)N1c2ccccc2CC(=O)c2ccccc21,training,0
221 | Clc1ccc(CO/N=C(\Cn2ccnc2)c2ccc(Cl)cc2Cl)c(Cl)c1,training,0
222 | CCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)[C@@H]1c1cccc2c1OCO2,training,0
223 | COc1ccc(Cc2nccc3cc(OC)c(OC)cc23)cc1OC,training,0
224 | CCC(=O)NS(=O)(=O)c1ccc(-c2c(-c3ccccc3)noc2C)cc1,training,0
225 | Fc1ccc([C@@H]2CCNC[C@H]2COc2ccc3c(c2)OCO3)cc1,training,0
226 | N=C(N)c1ccc(OCCCCCOc2ccc(C(=N)N)cc2)cc1,training,0
227 | CC(C)=CCN1CC[C@]2(C)c3cc(O)ccc3C[C@H]1[C@H]2C,training,0
228 | CCC[C@@H](C)C1(CC)C(=O)NC(=O)NC1=O,training,0
229 | CCCN1C[C@H](CSC)C[C@@H]2c3cccc4[nH]cc(c34)C[C@H]21,training,0
230 | O=C1[C@H]2CCCC[C@H]2C(=O)N1CCCCN1CCN(c2nsc3ccccc23)CC1,training,0
231 | c1ccc(C2(N3CCCCC3)CCCCC2)cc1,training,0
232 | CN1C(=O)C[C@@H](c2ccccc2)C1=O,training,0
233 | CC[C@@H]1C(=O)OC[C@@H]1Cc1cncn1C,training,0
234 | Cc1cccc(C)c1NC(=O)CC12CCCN1CCC2,training,0
235 | COc1ccc(-c2nc3cc(C4=NNC(=O)C[C@H]4C)ccc3[nH]2)cc1,training,0
236 | O=c1[nH]c2ccccc2n1C1CCN(CCCC(c2ccc(F)cc2)c2ccc(F)cc2)CC1,training,0
237 | C[C@H](N/C(=N\C#N)Nc1ccncc1)C(C)(C)C,training,0
238 | CCn1cc(C(=O)O)c(=O)c2cnc(N3CCNCC3)nc21,training,0
239 | C#CC[C@]12CCC(=O)C=C1CC[C@H]1[C@@H]3CCC(=O)[C@@]3(C)CC[C@@H]12,training,0
240 | CCCN[C@H]1CCc2nc(N)sc2C1,training,0
241 | COC(=O)C1=C(C)NC(C)=C(C(=O)OC/C=C/c2ccccc2)[C@H]1c1cccc([N+](=O)[O-])c1,training,0
242 | C[C@]12CC[C@H]3[C@@H](CC=C4C[C@@H](O)CC[C@@]43C)[C@@H]1CCC2=O,training,0
243 | C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)C=C[C@@]43C)[C@@H]1CC[C@]2(O)C(=O)CO,training,0
244 | CC(=O)[C@H]1CC[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@]12C,training,0
245 | CCC1(c2ccccc2)C(=O)NCNC1=O,training,0
246 | CCCO,training,0
247 | CN(C)C(=O)OC1=CC=[C-][N+](C)(Br)C1,training,0
248 | COc1ccc(CN(CCN(C)C)c2ccccn2)cc1,training,0
249 | CCCN1CCC[C@@H]2Cc3nc(N)ncc3C[C@H]21,training,0
250 | C=C[C@H]1CN2CC[C@H]1C[C@@H]2[C@H](O)c1ccnc2ccc(OC)cc12,training,0
251 | COc1ccccc1OC[C@@H](O)CN1CCN(CC(=O)Nc2c(C)cccc2C)CC1,training,0
252 | O=C(N[C@H](Cc1cc(=O)[nH]c2ccccc12)C(=O)O)c1ccc(Cl)cc1,training,0
253 | CCOc1ccccc1O[C@H](c1ccccc1)[C@H]1CNCCO1,training,0
254 | C[C@@](Cc1ccccc1)(NC(=O)CN)c1ccccc1,training,0
255 | COC(=O)[C@H]1[C@H]2C[C@@H]3c4[nH]c5cc(OC)ccc5c4CCN3C[C@H]2C[C@@H](OC(=O)c2cc(OC)c(OC)c(OC)c2)[C@@H]1OC,training,0
256 | CO[C@H]1C=CO[C@@]2(C)Oc3c(C)c(O)c4c(c3C2=O)C2=NC3(CCN(CC(C)C)CC3)NC2=C(NC(=O)C(C)=CC=C[C@H](C)[C@H](O)[C@@H](C)[C@@H](O)[C@@H](C)[C@H](OC(C)=O)[C@@H]1C)C4=O,training,0
257 | CO[C@H]1C=CO[C@@]2(C)Oc3c(C)c(O)c4c(O)c(cc(O)c4c3C2=O)NC(=O)C(C)=CC=C[C@H](C)[C@H](O)[C@@H](C)[C@@H](O)[C@@H](C)[C@H](OC(C)=O)[C@H]1C,training,0
258 | CO[C@H]1C=CO[C@@]2(C)Oc3c(C)c(O)c4c(O)c(c(/C=N\N5CCN(C6CCCC6)CC5)c(O)c4c3C2=O)NC(=O)C(C)=CC=C[C@H](C)[C@H](O)[C@@H](C)[C@@H](O)[C@@H](C)[C@H](OC(C)=O)[C@H]1C,training,0
259 | Nc1nc2ccc(OC(F)(F)F)cc2s1,training,0
260 | Cc1nc2n(c(=O)c1CCN1CCC(c3noc4cc(F)ccc34)CC1)CCCC2,training,0
261 | CC[C@@]1(c2ccncc2)CCC(=O)NC1=O,training,0
262 | CCCC(=O)O[C@H]1[C@H](C)O[C@@H](O[C@@H]2[C@@H](C)O[C@@H](O[C@@H]3[C@@H](OC)[C@H](O)CC(=O)O[C@H](C)CC=CC=C[C@H](O)[C@H](C)C[C@@H]3CC=O)[C@H](O)[C@H]2N(C)C)C[C@@]1(C)OC(=O)CC,training,0
263 | CCCN(CCC)CCc1cccc2c1CC(=O)N2,training,0
264 | CCCN1CCCC[C@H]1C(=O)Nc1c(C)cccc1C,training,0
265 | O=C(CO)NCCCOc1cccc(CN2CCCCC2)c1,training,0
266 | OCc1cc([C@H](O)CNCCCCCCOCCCCc2ccccc2)ccc1O,training,0
267 | C=CCC1([C@@H](C)CCC)C(=O)NC(=O)NC1=O,training,0
268 | O=C1NCCN1CCN1CCC(c2cn(-c3ccc(F)cc3)c3ccc(Cl)cc23)CC1,training,0
269 | FCOC(C(F)(F)F)C(F)(F)F,training,0
270 | C1CCN2C[C@H]3C[C@H](CN4CCCC[C@H]34)[C@@H]2C1,training,0
271 | CC(=O)S[C@@H]1CC2=CC(=O)CC[C@]2(C)[C@H]2CC[C@@]3(C)[C@@H](CC[C@@]34CCC(=O)O4)[C@@H]21,training,0
272 | CC(C)(C)[C@@H](O)/C=C\c1ccc2c(c1)OCO2,training,0
273 | CCC(=O)N(c1ccccc1)C1(COC)CCN(CCc2cccs2)CC1,training,0
274 | Clc1ccc(CS[C@@H](Cn2ccnc2)c2ccc(Cl)cc2Cl)cc1,training,0
275 | Cc1onc(NS(=O)(=O)c2ccc(N)cc2)c1C,training,0
276 | O=C(O)c1cc(/N=N\c2ccc(S(=O)(=O)Nc3ccccn3)cc2)ccc1O,training,0
277 | COc1cc(OC)nc(NS(=O)(=O)c2ccc(N)cc2)n1,training,0
278 | COc1ncnc(NS(=O)(=O)c2ccc(N)cc2)c1OC,training,0
279 | Cc1ccnc(NS(=O)(=O)c2ccc(N)cc2)n1,training,0
280 | Nc1ccc(S(=O)(=O)Nc2ccccn2)cc1,training,0
281 | CC1=C(CC(=O)O)c2cc(F)ccc2/C1=C\c1ccc(S(C)=O)cc1,training,0
282 | CCN1CCC[C@@H]1CNC(=O)c1cc(S(N)(=O)=O)ccc1OC,training,0
283 | Nc1c2c(nc3ccccc13)CCCC2,training,0
284 | C=CC[C@@H]1C=C(C)C[C@H](C)C[C@H](OC)[C@@H]2O[C@](O)(C(=O)C(=O)N3CCCC[C@H]3C(=O)O[C@H](/C(C)=C/[C@@H]3CC[C@@H](O)[C@H](OC)C3)[C@H](C)[C@@H](O)CC1=O)[C@H](C)C[C@@H]2OC,training,0
285 | Cc1nc(C)c2c(n1)N(Cc1ccc(-c3ccccc3-c3nnn[nH]3)cc1)C(=O)CC2,training,0
286 | CCCCOC(=O)C(=O)Nc1cccc(-c2nnn[nH]2)c1,training,0
287 | CCCc1nc2c(C)cc(-c3nc4ccccc4n3C)cc2n1Cc1ccc(-c2ccccc2C(=O)O)cc1,training,0
288 | CC(C)(C)c1ccc([C@H](O)CCCN2CCC(C(O)(c3ccccc3)c3ccccc3)CC2)cc1,training,0
289 | CCN(CC)C(=O)N[C@H]1C[C@@H]2c3cccc4[nH]cc(c34)C[C@H]2N(C)C1,training,0
290 | C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)C=C[C@@]43C)[C@@H]1CCC(=O)O2,training,0
291 | CN(C)[C@@H]1C(O)=C(C(N)=O)C(=O)[C@@]2(O)C(O)=C3C(=O)c4c(O)cccc4[C@@](C)(O)[C@H]3C[C@@H]12,training,0
292 | CN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1,training,0
293 | c1ccc2[nH]c(-c3cscn3)nc2c1,training,0
294 | S=P(N1CC1)(N1CC1)N1CC1,training,0
295 | Cc1ccsc1C(=CCCN1CCC[C@@H](C(=O)O)C1)c1sccc1C,training,0
296 | CC(C)(C)NC[C@H](O)COc1nsnc1N1CCOCC1,training,0
297 | Clc1ccc([C@@H](Cn2ccnc2)OCc2ccsc2Cl)c(Cl)c1,training,0
298 | CCC[C@@]1(CCc2ccccc2)CC(=O)C([C@H](CC)c2cccc(NS(=O)(=O)c3ccc(C(F)(F)F)cn3)c2)=C(O)O1,training,0
299 | Cc1ccc(C(=O)c2cc(O)c(O)c([N+](=O)[O-])c2)cc1,training,0
300 | Cc1ccc(C(=O)[C@H](C)CN2CCCCC2)cc1,training,0
301 | CC1(C)O[C@@H]2CO[C@@]3(COS(N)(=O)=O)OC(C)(C)O[C@H]3[C@@H]2O1,training,0
302 | CCN(CC)c1cc(C)nc2ncnn12,training,0
303 | O=c1n(CCCN2CCN(c3cccc(Cl)c3)CC2)nc2ccccn12,training,0
304 | ClC=C(Cl)Cl,training,0
305 | O=C(CCCN1CCC(O)(c2cccc(C(F)(F)F)c2)CC1)c1ccc(F)cc1,training,0
306 | COc1cc(NCc2ccc3nc(N)nc(N)c3c2C)cc(OC)c1OC,training,0
307 | C[C@H](CN(C)C)CN1c2ccccc2CCc2ccccc21,training,0
308 | COc1ccc(CN2CCNCC2)c(OC)c1OC,training,0
309 | CN(C)CCN(Cc1ccccc1)c1ccccn1,training,0
310 | CO[C@H]1C[C@H](O[C@@H]2[C@@H](C)C(=O)O[C@H](C)[C@H](C)[C@H](OC(C)=O)[C@@H](C)C(=O)[C@@]3(CO3)C[C@H](C)[C@H](O[C@@H]3O[C@H](C)C[C@H](N(C)C)[C@H]3OC(C)=O)[C@H]2C)O[C@@H](C)[C@@H]1OC(C)=O,training,0
311 | O=C(OC1C[C-]2CC[C-](C1)[N+2]21(Cl)[C-2]CC[C-2]1)C(O)(c1ccccc1)c1ccccc1,training,0
312 | Fc1ccc(C(OCCN2CCN(CCCc3ccccc3)CC2)c2ccc(F)cc2)cc1,training,0
313 | COc1ccc(C(=O)N2CCN(c3ccc4c(c3)CCC(=O)N4)CC2)cc1OC,training,0
314 | CC[C@]1(O)C[C@H]2CN(CCc3c([nH]c4ccccc34)[C@@](C(=O)OC)(c3cc4c(cc3OC)N(C)[C@H]3[C@@](O)(C(=O)OC)[C@H](OC(C)=O)[C@]5(CC)C=CCN6CC[C@]43[C@@H]65)C2)C1,training,0
315 | CC[C@]1(O)C[C@H]2CN(CCc3c([nH]c4ccccc34)[C@@](C(=O)OC)(c3cc4c(cc3OC)N(C)[C@H]3[C@@](O)(C(N)=O)[C@H](O)[C@]5(CC)C=CCN6CC[C@]43[C@@H]65)C2)C1,training,0
316 | CCC1=C[C@@H]2CN(C1)Cc1c([nH]c3ccccc13)[C@@](C(=O)OC)(c1cc3c(cc1OC)N(C)[C@H]1[C@@](O)(C(=O)OC)[C@H](OC(C)=O)[C@]4(CC)C=CCN5CC[C@]31[C@@H]54)C2,training,0
317 | Cn1nnc2ccc([C@H](c3ccc(Cl)cc3)n3cncn3)cc21,training,0
318 | COC(=O)[C@H]1[C@@H](O)CC[C@H]2CN3CCc4c([nH]c5ccccc45)[C@@H]3C[C@@H]21,training,0
319 | CCN(C(C)=O)c1cccc(C2=[N+]3[N-]CC(C#N)=C3NC=C2)c1,training,0
320 | CN1[C@H]2CC[C@@H]1CC(NC(=O)c1cc(Cl)cc3c1OC(C)(C)C3)C2,training,0
321 | O=C1Cc2cc(CCN3CCN(c4nsc5ccccc45)CC3)c(Cl)cc2N1,training,0
322 | CN(C)CCc1c[nH]c2ccc(C[C@H]3COC(=O)N3)cc12,training,0
323 | NS(=O)(=O)Cc1noc2ccccc12,training,0
324 | Nc1nc2cc(Cl)ccc2o1,training,0
325 | CCc1cccc2cc([C@H](O)CNC(C)(C)C)oc12,training,1
326 | CCCCCc1cc(O)c2c(c1)OC(C)(C)[C@H]1CC=C(C=O)C[C@H]21,training,1
327 | COCC[C@@H]1C[C@@H]2CN3CCc4c([nH]c5ccccc45)[C@@](C(=O)OC)(C2)[C@@H]13,training,1
328 | CCCc1ccc2ccccc2n1,training,1
329 | COC(=O)[C@H](c1ccccc1Cl)N1CCC2=C(CC(=O)S2)C1,training,1
330 | CC(C)(C)C1CCC(C2=C(O)C(=O)c3ccccc3C2=O)CC1,training,1
331 | CCOc1ccc2nc3ccc(=O)cc-3oc2c1,training,1
332 | CCCCCc1cc(O)c2c(c1)OC(C)(C)[C@@H]1[C@@H](O)C=C(C)C[C@@H]21,training,1
333 | C[C@@H](C#Cc1ccc(Cc2ccccc2)s1)N(O)C(N)=O,training,1
334 | Cc1[nH]cnc1CN1CCc2c(c3ccccc3n2C)C1=O,training,1
335 | CCCCc1oc2ccccc2c1C(=O)c1cc(I)c(OCCN(CC)CC)c(I)c1,training,1
336 | Clc1ccc2c(c1)C(N1CCNCC1)=Nc1ccccc1O2,training,1
337 | CC(C)CN(C[C@@H](O)[C@H](Cc1ccccc1)NC(=O)O[C@H]1CCOC1)S(=O)(=O)c1ccc(N)cc1,training,1
338 | CCCCC/C=C\C/C=C\C/C=C\C/C=C\CCCC(=O)O,training,1
339 | CNCC[C@H](Oc1ccccc1C)c1ccccc1,training,1
340 | COc1ccc(-c2coc3cc(O)cc(O)c3c2=O)cc1,training,1
341 | O=C(O)C/C=C\C[C@@H]1[C@@H](NS(=O)(=O)c2ccccc2)[C@H]2CC[C@@H]1C2,training,1
342 | CC(C)C[C@H](NC(=O)[C@H](Cc1ccccc1)NC(=O)c1cnccn1)B(O)O,training,1
343 | COc1ccccc1Oc1c(NS(=O)(=O)c2ccc(C(C)(C)C)cc2)nc(-c2ncccn2)nc1OCCO,training,1
344 | COc1ccc2c(C(=O)c3cc(OC)c(OC)c(OC)c3)c[nH]c2c1,training,1
345 | CCOc1nc2cccc(C(=O)O)c2n1Cc1ccc(-c2ccccc2-c2nnn[nH]2)cc1,training,1
346 | NC(=O)N1c2ccccc2C=Cc2ccccc21,training,1
347 | COc1ccccc1OCCNC[C@H](O)COc1cccc2[nH]c3ccccc3c12,training,1
348 | CCOP(=S)(OCC)Oc1nc(Cl)c(Cl)cc1Cl,training,1
349 | C(=C/c1ccccc1)\CN1CCN(C(c2ccccc2)c2ccccc2)CC1,training,1
350 | CN1C(=O)CC(=O)N(c2ccccc2)c2cc(Cl)ccc21,training,1
351 | CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21,training,1
352 | COC(=O)[C@@H](c1ccccc1Cl)N1CCc2sccc2C1,training,1
353 | CN1CCN(C2=Nc3cc(Cl)ccc3Nc3ccccc32)CC1,training,1
354 | NC(=O)c1c(Cl)c(-c2cccnc2)n2c1CCCC2,training,1
355 | Cc1ccc(S(=O)(=O)Nc2ccnn2-c2ccccc2)cc1,training,1
356 | Oc1ccc(OC(F)(F)F)cc1CN[C@@H]1CCCN[C@H]1c1ccccc1,training,1
357 | O=P1(N(CCCl)CCCl)NCCCO1,training,1
358 | Nc1ccc(S(=O)(=O)c2ccc(N)cc2)cc1,training,1
359 | COC(=O)c1cc(OC)c2c(c1-c1c(C(=O)OC)cc(OC)c3c1OCO3)OCO2,training,1
360 | CC(C)Nc1cccnc1N1CCN(C(=O)c2cc3cc(NS(C)(=O)=O)ccc3[nH]2)CC1,training,1
361 | CCCCCc1cc(O)c2c(c1)OC(C)(C)[C@H]1CCC(C)=C[C@H]21,training,1
362 | CCN(CC)C(=O)SC,training,1
363 | COc1ccc2c(c1)[C@]13CCCC[C@@H]1[C@H](C2)N(C)CC3,training,1
364 | C=CCSSCC=C,training,1
365 | c1ccc2c(c1)ccc1cc3c(ccc4ccccc43)cc12,training,1
366 | c1ccc2c(c1)cc1ccc3cccc4c5ccccc5c2c1c34,training,1
367 | O=c1oc2ccccc2c(O)c1Cc1c(O)c2ccccc2oc1=O,training,1
368 | COc1ccc([C@@H]2Sc3ccccc3N(CCN(C)C)C(=O)[C@@H]2OC(C)=O)cc1,training,1
369 | Cc1c2ccccc2c(C)c2c1ccc1ccccc12,training,1
370 | O=C(OC1C[C@@H]2CC3C[C@H](C1)N2CC3=O)c1c[nH]c2ccccc12,training,1
371 | CCN[C@H]1C[C@H](C)S(=O)(=O)c2sc(S(N)(=O)=O)cc21,training,1
372 | COc1cc2c(CCN3CCN(c4cccc(Cl)c4C)CC3)nn(Cc3c[nH]cn3)c2cc1OC,training,1
373 | CCCCc1nc(Cl)c(C=O)n1Cc1ccc(-c2ccccc2-c2nnn[nH]2)cc1,training,1
374 | CN1CCC[C@@H]1C[C@@H]1CNc2ccc(CCS(=O)(=O)c3ccccc3)cc21,training,1
375 | Cc1c2ccncc2c(C)c2c1[nH]c1ccccc12,training,1
376 | C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CCC2=O,training,1
377 | CCc1cccc2c3c([nH]c12)[C@@](CC)(CC(=O)O)OCC3,training,1
378 | Cc1ccc(-c2ncc(Cl)cc2-c2ccc(S(C)(=O)=O)cc2)cn1,training,1
379 | CNCC[C@H](Oc1ccc(C(F)(F)F)cc1)c1ccccc1,training,1
380 | C[C@H](C(=O)O)c1ccc(-c2ccccc2)c(F)c1,training,1
381 | CC(C)n1c(/C=C/[C@H](O)C[C@H](O)CC(=O)O)c(-c2ccc(F)cc2)c2ccccc21,training,1
382 | COc1ccc(-c2coc3cc(O)ccc3c2=O)cc1,training,1
383 | CCC1=C(C)CN(C(=O)NCCc2ccc(S(=O)(=O)NC(=O)NC3CCC(C)CC3)cc2)C1=O,training,1
384 | Cc1cnc(C(=O)NCCc2ccc(S(=O)(=O)NC(=O)NC3CCCCC3)cc2)cn1,training,1
385 | COc1ccc(Cl)cc1C(=O)NCCc1ccc(S(=O)(=O)NC(=O)NC2CCCCC2)cc1,training,1
386 | O=S(Cc1cc(OCC2CC2)ccn1)c1nc2cc(F)ccc2[nH]1,training,1
387 | FC(F)(F)[C@H](Cl)Br,training,1
388 | COc1ccc2c(c1)[nH]c1c(C)nccc12,training,1
389 | CN1C(=O)[C@@](C)(C2=CCCCC2)C(=O)N=C1O,training,1
390 | O=P1(NCCCl)OCCCN1CCCl,training,1
391 | CN(C)CCCN1c2ccccc2CCc2ccccc21,training,1
392 | CC(C)(C)NC(=O)[C@@H]1CN(Cc2cccnc2)CCN1C[C@@H](O)C[C@@H](Cc1ccccc1)C(=O)N[C@H]1c2ccccc2C[C@H]1O,training,1
393 | COc1ccc2c(c1)c(CC(=O)O)c(C)n2C(=O)c1ccc(Cl)cc1,training,1
394 | COc1ccc2c(c1)c(CC(=O)NCCc1ccccc1)c(C)n2C(=O)c1ccc(Cl)cc1,training,1
395 | CCCCC1=NC2(CCCC2)C(=O)N1Cc1ccc(-c2ccccc2-c2nn[nH]n2)cc1,training,1
396 | CN[C@@]1(c2ccccc2Cl)CCCCC1=O,training,1
397 | CCC(=O)C1(c2cccc(O)c2)CCN(C)CC1,training,1
398 | COc1ccc(N2CCc3c(NCCO)nc4c(OC(F)(F)F)cccc4c32)c(C)c1,training,1
399 | CCCCCCCCCCCC(=O)O,training,1
400 | CC1(C)Cc2c(-c3ccccc3)c(-c3ccc(Cl)cc3)c(CC(=O)OC[C@@H]3O[C@@H](O)[C@@H](O)[C@H](O)[C@@H]3O)n2C1,training,1
401 | CCCCC/C=C/C/C=C/CCCCCCCC(=O)O,training,1
402 | CCOC(=O)N1CCC(=C2c3ccc(Cl)cc3CCc3cccnc32)CC1,training,1
403 | CN1C(C(=O)Nc2ccccn2)=C(O)c2sc(Cl)cc2S1(=O)=O,training,1
404 | CCCCc1nc(Cl)c(CO)n1Cc1ccc(-c2ccccc2-c2nnn[nH]2)cc1,training,1
405 | Cc1ccccc1C(=O)N1CC/C(=N\OS(=O)(=O)O)c2ccc(Cl)cc21,training,1
406 | CCN[C@@H](C)Cc1ccc2c(c1)OCO2,training,1
407 | Cc1cnc(NC(=O)C2=C(O)c3ccccc3S(=O)(=O)N2C)s1,training,1
408 | CC[C@]1(c2ccccc2)NC(=O)N(C)C1=O,training,1
409 | CN1CCN2c3ccccc3Cc3ccccc3[C@@H]2C1,training,1
410 | CN1CCN2c3ncccc3Cc3ccccc3[C@H]2C1,training,1
411 | CN1CCN2c3ccccc3Cc3cccnc3[C@@H]2C1,training,1
412 | CN1CC[C@]23c4c5ccc(O)c4O[C@H]2[C@@H](O)C=C[C@H]3[C@H]1C5,training,1
413 | COc1ccc(OC(=O)N(CC(=O)O)Cc2ccc(OCCc3nc(-c4ccccc4)oc3C)cc2)cc1,training,1
414 | CCCCCN(CCCCC)N=O,training,1
415 | CC(C)C1CCC(C(=O)N[C@H](Cc2ccccc2)C(=O)O)CC1,training,1
416 | CCN(CC)N=O,training,1
417 | CCCN(CCC)CCc1ccc(OC)c(OCCc2ccccc2)c1,training,1
418 | Cc1c(O)cccc1C(=O)N[C@@H](CSc1ccccc1)[C@H](O)CN1C[C@H]2CCCC[C@H]2C[C@H]1C(=O)NC(C)(C)C,training,1
419 | Cc1ccnc2c1NC(=O)c1cccnc1N2C1CC1,training,1
420 | CN1CCC[C@H]1c1cccnc1,training,1
421 | CCCCN(C)N=O,training,1
422 | CCN(C)N=O,training,1
423 | Cc1nccn1C[C@H]1CCc2c(c3ccccc3n2C)C1=O,training,1
424 | CN1CCN(CCCN2c3ccccc3Sc3ccccc32)CC1,training,1
425 | OCCN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1,training,1
426 | c1ccc2c(c1)ccc1ccccc12,training,1
427 | CCC1(c2ccccc2)C(=O)NC(=O)NC1=O,training,1
428 | CCCCC1C(=O)N(c2ccccc2)N(c2ccccc2)C1=O,training,1
429 | O=C1NC(=O)C(c2ccccc2)(c2ccccc2)N1,training,1
430 | CCOP(=S)(OCC)SCSCC,training,1
431 | CN1C(C(=O)Nc2ccccn2)=C(O)c2ccccc2S1(=O)=O,training,1
432 | O=C(C1CCCCC1)N1CC(=O)N2CCc3ccccc3[C@H]2C1,training,1
433 | Fc1ccccc1C1=NCC(=S)N(CC(F)(F)F)c2ccc(Cl)cc21,training,1
434 | OCCOCCN1CCN(C2=Nc3ccccc3Sc3ccccc32)CC1,training,1
435 | O=C1C=C2CN([C@@H](C(=O)C3CC3)c3ccccc3F)CC[C@H]2S1,training,1
436 | COCCCOc1ccnc(CSc2nc3ccccc3[nH]2)c1C,training,1
437 | CCOc1cc(CC(=O)N[C@@H](CC(C)C)c2ccccc2N2CCCCC2)ccc1C(=O)O,training,1
438 | CO[C@H]1C=CO[C@@]2(C)Oc3c(C)c(O)c4c(O)c(c(/C=N\N5CCN(C)CC5)c(O)c4c3C2=O)NC(=O)C(C)=CC=C[C@H](C)[C@H](O)[C@@H](C)[C@@H](O)[C@@H](C)[C@H](OC(C)=O)[C@H]1C,training,1
439 | O=C(O)CCC/C=C\C[C@@H]1[C@@H](NS(=O)(=O)c2ccccc2)[C@H]2CC[C@@H]1C2,training,1
440 | Cc1ccsc1-c1ccc([C@@H](C)C(=O)O)cc1,training,1
441 | C=CCc1ccc2c(c1)OCO2,training,1
442 | CC1=C(C)C(=O)C([C@@H](CCCCCC(=O)O)c2ccccc2)=C(C)C1=O,training,1
443 | CN[C@H]1CC[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc21,training,1
444 | C[C@@H]1O[C@@]2(CS1)CN1CCC2CC1,training,1
445 | Nc1ccc(S(=O)(=O)Nc2ncccn2)cc1,training,1
446 | Cc1cc(NS(=O)(=O)c2ccc(N)cc2)no1,training,1
447 | O=C1C(CCS(=O)c2ccccc2)C(=O)N(c2ccccc2)N1c1ccccc1,training,1
448 | C[C@@H](C(=O)O)c1ccc(C(=O)c2cccs2)cc1,training,1
449 | O=c1[nH]c(=O)n([C@@H]2CCCO2)cc1F,training,1
450 | CN1/C(=C(/O)Nc2ccccn2)C(=O)c2sccc2S1(=O)=O,training,1
451 | O=C(O)CC[C@@H]1[C@@H](NS(=O)(=O)c2ccccc2)[C@H]2CC[C@@H]1C2,training,1
452 | O=C1CC[C@H](N2C(=O)c3ccccc3C2=O)C(=O)N1,training,1
453 | O=C(Cn1c(=O)sc2ccc(Cl)cc21)N1CCN(CCO)CC1,training,1
454 | O=C(O)COc1ccc(C(=O)c2cccs2)c(Cl)c1Cl,training,1
455 | CNCC[C@@H](Oc1ccccc1C)c1ccccc1,training,1
456 | CN(C)CCOc1ccc(/C(=C(\CCCl)c2ccccc2)c2ccccc2)cc1,training,1
457 | COc1cccc(OC)c1-c1ccc(C[C@H](NC(=O)c2c(Cl)cccc2Cl)C(=O)O)cc1,training,1
458 | COc1cc(Cc2cnc(N)nc2N)cc(OC)c1OC,training,1
459 | Cc1c(C)c2c(c(C)c1O)CC[C@](C)(COc1ccc(C[C@@H]3SC(=O)NC3=O)cc1)O2,training,1
460 | CN1[C@H]2CC[C@@H]1CC(OC(=O)c1c[nH]c3ccccc13)C2,training,1
461 | Cc1onc(-c2ccccc2)c1-c1ccc(S(N)(=O)=O)cc1,training,1
462 | CCCC(CCC)C(=O)O,training,1
463 | CCCCC(=O)N(Cc1ccc(-c2ccccc2-c2nnn[nH]2)cc1)[C@H](C(=O)O)C(C)C,training,1
464 | COc1ccc([C@@H](CN(C)C)C2(O)CCCCC2)cc1,training,1
465 | COC[C@@H](c1ccc(C(F)(F)F)cc1)N1CCN(C2(C)CCN(C(=O)c3c(C)ncnc3C)CC2)C[C@@H]1C,training,1
466 | O=c1ccc2nc3ccc(OCOCc4ccccc4)cc3oc-2c1,training,1
467 | C[C@@H](c1ncncc1F)[C@](O)(Cn1cncn1)c1ccc(F)cc1F,training,1
468 | CC(=O)C[C@H](c1ccccc1)c1c(O)oc2ccccc2c1=O,training,1
469 | CC(=O)C[C@@H](c1ccccc1)c1c(O)oc2ccccc2c1=O,training,1
470 | COc1cc(C(=O)NS(=O)(=O)c2ccccc2C)ccc1Cc1cn(C)c2ccc(NC(=O)OC3CCCC3)cc12,training,1
471 | C[C@@H](C(=O)O)c1ccc2c(c1)CC(=O)c1ccccc1S2,training,1
472 | Cc1cn([C@H]2C[C@H](N=[N+][N-])[C@@H](CO)O2)c(=O)[nH]c1=O,training,1
473 | C[C@@H](c1cc2ccccc2s1)N(O)C(N)=O,training,1
474 | Cc1ccc(-c2nc3ccc(C)cn3c2CC(=O)N(C)C)cc1,training,1
475 | CC[C@@]1(c2ccccc2)NC(=O)N(C)C1=O,training,1
476 | O=C1CC[C@@H](N2C(=O)c3ccccc3C2=O)C(=O)N1,training,1
477 | COc1ccc(CCN(C)CCC[C@@](C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC,training,1
478 | CN1C(=O)[C@](C)(C2=CCCCC2)C(=O)N=C1O,training,1
479 | CC(=O)[C@@H]1CC[C@@H]2[C@H]3CCC4=CC(=O)CC[C@@]4(C)[C@@H]3CC[C@]12C,training,1
480 | CN(C)CC[C@@H](c1ccc(Br)cc1)c1ccccn1,training,1
481 | FC(F)(F)[C@@H](Cl)Br,training,1
482 | C(=Cc1ccccc1)CN1CCN(C(c2ccccc2)c2ccccc2)CC1,training,1
483 | CN1CC[C@]23c4c5ccc(O)c4O[C@H]2C(=O)CC[C@H]3[C@H]1C5,training,1
484 | CN(C)CCO[C@@H](c1ccc(Cl)cc1)c1ccccn1,training,1
485 | CN(C)CCC=C1c2ccccc2COc2ccccc21,training,1
486 | CCCCCc1cc(O)c2c(c1)OC(C)(C)[C@@H]1CCC(C)=C[C@@H]21,training,1
487 | Nc1ccn([C@H]2CC[C@@H](CO)O2)c(=O)n1,training,1
488 | CCC(=C(c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1,training,1
489 | CO[C@H]1C=CO[C@@]2(C)Oc3c(C)c(O)c4c(O)c(c(C=NN5CCN(C)CC5)c(O)c4c3C2=O)NC(=O)C(C)=CC=C[C@H](C)[C@H](O)[C@@H](C)[C@@H](O)[C@@H](C)[C@H](OC(C)=O)[C@@H]1C,training,1
490 | Cc1ccc(C(=O)NCCc2ccc(S(=O)(=O)NC(=O)NC3CCCCC3)cc2)nc1,training,1
491 | CN[C@]1(c2ccccc2Cl)CCCCC1=O,training,1
492 | CCCCC[C@H](O)C=C[C@H]1[C@H](O)C[C@@H]2OC(=CCCCC(=O)O)C[C@@H]21,training,1
493 | C[C@H](C(=O)O)c1ccc(C(=O)c2cccs2)cc1,training,1
494 | CCc1cccc2c3c([nH]c12)[C@](CC)(CC(=O)O)OCC3,training,1
495 | CN1CCN(C(=O)O[C@@H]2c3nccnc3C(=O)N2c2ccc(Cl)cn2)CC1,training,1
496 | COc1ccc(C[C@H](C)NC[C@@H](O)c2ccc(O)c(NC=O)c2)cc1,training,1
497 | CC(C)(C)c1ccc([C@@H](O)CCCN2CCC(C(O)(c3ccccc3)c3ccccc3)CC2)cc1,training,1
498 | CS(=O)(=O)Nc1ccc([N+](=O)[O-])cc1Oc1ccccc1,training,1
499 | Fc1ccc(C(c2ccc(F)cc2)N2CCN(CC=Cc3ccccc3)CC2)cc1,training,1
500 | CNCC[C@@H](Oc1ccc(C(F)(F)F)cc1)c1ccccc1,training,1
501 | CC(=O)[C@]1(O)Cc2c(O)c3c(c(O)c2[C@@H](O[C@H]2C[C@H](N)[C@H](O)[C@H](C)O2)C1)C(=O)c1ccccc1C3=O,training,1
502 | CN1C(C(=O)Nc2ccccn2)=C(O)c2sccc2S1(=O)=O,training,1
503 | O=C(NCCN1CCOCC1)c1ccc(Cl)cc1,training,1
504 | COc1ccccc1OCCNC[C@@H](O)COc1cccc2[nH]c3ccccc3c12,training,1
505 | COc1cc(N)c(Cl)cc1C(=O)N[C@@H]1CCN(CCCOc2ccc(F)cc2)C[C@H]1OC,training,1
506 | CN1C2CCC1CC(OC(=O)c1c[nH]c3ccccc13)C2,training,1
507 | CC(C)n1c(C=C[C@H](O)C[C@H](O)CC(=O)O)c(-c2ccc(F)cc2)c2ccccc21,training,1
508 | CC1=C(C)C(=O)C([C@H](CCCCCC(=O)O)c2ccccc2)=C(C)C1=O,training,1
509 | COC(=O)[C@H](c1ccccc1Cl)N1CCc2sccc2C1,training,1
510 | COc1cc2c(cc1O)CCN[C@]21CS[C@@H]2c3c(OC(C)=O)c(C)c4c(c3[C@H](COC1=O)N1[C@@H](O)[C@@H]3Cc5cc(C)c(OC)c(O)c5[C@H]([C@H]21)N3C)OCO4,training,1
511 | CCN[C@@H]1C[C@@H](C)S(=O)(=O)c2sc(S(N)(=O)=O)cc21,training,1
512 | CC(C)C[C@H](NC(=O)[C@@H](Cc1ccccc1)NC(=O)c1cnccn1)B(O)O,training,1
513 | CCOC(=O)CN[C@@H](C(=O)N1CC[C@H]1C(=O)NCc1ccc(C=NNO)cc1)C1CCCCC1,training,1
514 | CC(C)CN(C[C@@H](OP(=O)(O)O)[C@H](Cc1ccccc1)NC(=O)O[C@H]1CCOC1)S(=O)(=O)c1ccc(N)cc1,training,1
515 | CC(C)c1nc(N(C)S(C)(=O)=O)nc(-c2ccc(F)cc2)c1C=C[C@H](O)C[C@@H](O)CC(=O)O,training,1
516 | CC(C)(C#N)c1cc(Cn2cncn2)cc(C(C)(C)C#N)c1,training,0
517 | COc1ccc(CCN2CCC(Nc3nc4ccccc4n3Cc3ccc(F)cc3)CC2)cc1,training,0
518 | CC1=CC(=O)C=C2CC[C@H]3[C@@H]4CCC(=O)[C@@]4(C)CC[C@@H]3[C@@]12C,training,0
519 | CC(C)c1cc(C(C)C)c(CC(=O)NS(=O)(=O)Oc2c(C(C)C)cccc2C(C)C)c(C(C)C)c1,training,0
520 | OC(c1ccccc1)(c1ccccc1)C1CCNCC1,training,0
521 | CC[C@@H](c1ccc(O)c(F)c1)[C@H](C)c1ccc(O)c(F)c1,training,0
522 | COc1cc(OC)c(C(=O)CCCN2CCCC2)c(OC)c1,training,0
523 | CCCCN1CCCC[C@H]1C(=O)Nc1c(C)cccc1C,training,0
524 | C[C@H](CS)C(=O)N1CCC[C@H]1C(=O)O,training,0
525 | C[C@H](CCC(=O)O)[C@H]1CC[C@H]2[C@H]3[C@H](CC[C@@]21C)[C@@]1(C)CC[C@@H](O)C[C@@H]1C[C@H]3O,training,0
526 | O=C1CCc2cc(OCCCCc3nnnn3C3CCCCC3)ccc2N1,training,0
527 | CN1CCC[C@@H]1CCO[C@](C)(c1ccccc1)c1ccc(Cl)cc1,training,0
528 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=Cc5oncc5C[C@]4(C)[C@H]3CC[C@@]21C,training,0
529 | O=C1CN(/N=C\c2ccc(-c3ccc([N+](=O)[O-])cc3)o2)C(=O)N1,training,0
530 | COc1ccc2c3c1O[C@H]1[C@@H](O)CC[C@H]4[C@@H](C2)N(C)CC[C@]314,training,0
531 | CN1C[C@H](C(=O)N[C@]2(C)O[C@@]3(O)[C@@H]4CCCN4C(=O)[C@H](Cc4ccccc4)N3C2=O)C[C@@H]2c3cccc4[nH]cc(c34)C[C@H]21,training,0
532 | CCOCCn1c(N2CCCN(C)CC2)nc2ccccc21,training,0
533 | CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1CCC[C@H]1C(=O)O,training,0
534 | N#Cc1ccc([C@H]2CCCc3cncn32)cc1,training,0
535 | CCC(=O)N(c1ccccc1)C1CCN(CCc2ccccc2)CC1,training,0
536 | Cc1onc(-c2c(F)cccc2Cl)c1C(=O)N[C@@H]1C(=O)N2[C@@H](C(=O)O)C(C)(C)S[C@H]12,training,0
537 | N=C(N)NC[C@@H]1COc2ccccc2O1,training,0
538 | CC(C)Cn1cnc2c(N)nc3ccccc3c21,training,0
539 | CC[C@H](C)n1ncn(-c2ccc(N3CCN(c4ccc(OC[C@H]5CO[C@](Cn6cncn6)(c6ccc(Cl)cc6Cl)O5)cc4)CC3)cc2)c1=O,training,0
540 | Clc1cccc([C@H](c2ccc3nc[nH]c3c2)n2ccnc2)c1,training,0
541 | COC1=CC(=O)O[C@H]1[C@H](O)c1ccccc1Cl,training,0
542 | CC[C@H](C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@H]21,training,0
543 | O=C1c2c(O)ccc(O)c2C(=O)c2c(NCCNCCO)ccc(NCCNCCO)c21,training,0
544 | CCc1nn(CCCN2CCN(c3cccc(Cl)c3)CC2)c(=O)n1CCOc1ccccc1,training,0
545 | O=C(N[C@H]1CCS[C@H]2CCC[C@@H](C(=O)O)N2C1=O)[C@@H](S)Cc1ccccc1,training,0
546 | CC(C)NC[C@@H]1CCc2cc(CO)c([N+](=O)[O-])cc2N1,training,0
547 | C[C@H](CN(C)C)CN1c2ccccc2S(=O)(=O)c2ccccc21,training,0
548 | CC(C)(C)NC[C@H](O)COc1ccccc1C1CCCC1,training,0
549 | C1CCC(C(C[C@H]2CCCCN2)C2CCCCC2)CC1,training,0
550 | C[C@]12CC(=O)[C@H]3[C@@H](CCC4=CC(=O)C=C[C@@]43C)[C@@H]1CC[C@]2(O)C(=O)CO,training,0
551 | COc1cc(N[C@@H](C)CCCN)c2ncccc2c1,training,0
552 | CCC(=O)O[C@](Cc1ccccc1)(c1ccccc1)[C@H](C)CN(C)C,training,0
553 | CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1,training,0
554 | CCN(CC)CCC[C@@H](C)Nc1c2ccc(Cl)cc2nc2ccc(OC)cc12,training,0
555 | CCN1CCC[C@H]1CNC(=O)c1c(OC)ccc(Br)c1OC,training,0
556 | CN(C(=O)c1c(O)n(C)c2ccccc2c1=O)c1ccccc1,training,0
557 | CC(C)C[C@H](N(C)C)C1(c2ccc(Cl)cc2)CCC1,training,0
558 | Nc1ccc(S(=O)(=O)Nc2ccnn2-c2ccccc2)cc1,training,0
559 | CCOc1ccccc1OCCN[C@H](C)Cc1ccc(OC)c(S(N)(=O)=O)c1,training,0
560 | O=C1Nc2c(O)cc(Cl)cc2[C@@](C#CC2CC2)(C(F)(F)F)O1,training,1
561 | C[C@H](NC(N)=O)c1cc2ccccc2s1,training,1
562 | C[C@@H](C#Cc1ccc(Cc2ccccc2)s1)NC(N)=O,training,1
563 | CCCSc1ccc2nc(NC(=O)OC)[nH]c2c1,training,1
564 | CN(C)c1cn(C)n(-c2ccccc2)c1=O,training,1
565 | CN(C)CCC=C1c2ccccc2CCc2ccccc21,training,1
566 | ClC(Cl)Cl,training,1
567 | COc1cc(N)c(Cl)cc1C(=O)N[C@H]1CCN(CCCOc2ccc(F)cc2)C[C@H]1OC,training,1
568 | O=C(OCc1ccccc1)c1ccccc1-c1c2ccc(=O)cc-2oc2cc(OCc3ccccc3)ccc12,training,1
569 | CN(C)CC/C=C1/c2ccccc2COc2ccccc21,training,1
570 | CC/C=C\C/C=C\C/C=C\C/C=C\C/C=C\CCCC(=O)O,training,1
571 | COC(OC)[C@@]1(C)Oc2ccc(N)cc2[C@H](N(Cc2nnn(C)n2)c2ccc(Cl)cc2)[C@H]1O,training,1
572 | CC(C)(O)c1ccccc1CC[C@@H](SCC1(CC(=O)O)CC1)c1cccc(/C=C/c2ccc3ccc(Cl)cc3n2)c1,training,1
573 | CN(C)N=O,training,1
574 | CN(CCOc1ccc(C[C@@H]2SC(=O)NC2=O)cc1)c1ccccn1,training,1
575 | CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12,training,1
576 | O=C1C(CCSc2ccccc2)C(=O)N(c2ccccc2)N1c1ccccc1,training,1
577 | CC/C(=C(\c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1,training,1
578 | Cc1cccc(Nc2ccncc2S(=O)(=O)NC(=O)NC(C)C)c1,training,1
579 | CN1C(=O)OC(C)(C)C1=O,training,1
580 | CCNc1ncn(Cc2ccc(OC)c(OC3CCCC3)c2)c2nc(C(C)C)nc1-2,training,1
581 | CN1CCN(C(=O)O[C@H]2c3nccnc3C(=O)N2c2ccc(Cl)cn2)CC1,training,1
582 | O=C(O)CCc1nc(-c2ccccc2)c(-c2ccccc2)o1,training,1
583 | Nc1nc(NC2CC2)c2ncn([C@H]3C=C[C@@H](CO)C3)c2n1,training,0
584 | C[C@]12CC[C@H]3[C@@H](CC=C4C[C@@H](O)CC[C@@]43C)[C@@H]1CC=C2c1cccnc1,training,0
585 | CN(C)Cc1nnc2n1-c1ccc(Cl)cc1C(c1ccccc1)=NC2,training,0
586 | CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(-c2ccc(F)cc2)n1CC[C@@H](O)C[C@@H](O)CC(=O)O,training,0
587 | CCC[C@H]1O[C@@H]2C[C@H]3[C@@H]4CCC5=CC(=O)C=C[C@]5(C)[C@H]4[C@@H](O)C[C@]3(C)[C@]2(C(=O)CO)O1,training,0
588 | c1ccc(C2(c3ccccc3)C[C@H]2C2=NCCN2)cc1,training,0
589 | CN(C)CCC[C@@]1(c2ccc(F)cc2)OCc2cc(C#N)ccc21,training,0
590 | CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(OC)C[C@@H](C)C(=O)[C@H](C)[C@@H](O)[C@]1(C)O,training,0
591 | N=C(N)N1CCc2ccccc2C1,training,0
592 | Clc1ccc2c(c1)CCc1cccnc1C2=C1CCNCC1,training,0
593 | CCN(CC)C(=O)N1CCN(C)CC1,training,0
594 | CN1CCN(c2cc3c(cc2F)c(=O)c(C(=O)O)cn3-c2ccc(F)cc2)CC1,training,0
595 | C[C@]12CC[C@H]3[C@H]([C@@H]1[C@@H]1C[C@@H]1[C@@]21CCC(=O)O1)[C@H]1C[C@H]1C1=CC(=O)CC[C@@]13C,training,0
596 | FC(F)OC(F)(F)[C@@H](F)Cl,training,0
597 | C=C1C[C@@H]2[C@H](CC[C@]3(C)C(=O)CC[C@@H]23)[C@@]2(C)C=CC(=O)C=C12,training,0
598 | C#C[C@]1(O)C=C[C@H]2[C@@H]3CCC4=CC(=O)CC[C@@H]4[C@H]3CC[C@@]21CC,training,0
599 | C[C@H]1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23,training,0
600 | CC#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CCC4=C3[C@@H](c3ccc(N(C)C)cc3)C[C@@]21C,training,0
601 | CCN(CC)C(=O)[C@]1(c2ccccc2)C[C@@H]1CN,training,0
602 | CC1(C)NC(=O)N(c2ccc([N+](=O)[O-])c(C(F)(F)F)c2)C1=O,training,0
603 | CCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCNCC3)cc21,training,0
604 | CCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCN(C)CC3)cc21,training,0
605 | CCCNC[C@H](O)COc1ccccc1C(=O)CCc1ccccc1,training,0
606 | O=C(c1ccc(OCCN2CCCCC2)cc1)c1c(-c2ccc(O)cc2)sc2cc(O)ccc12,training,0
607 | CN/C(=C\[N+](=O)[O-])NCCSCc1ccc(CN(C)C)o1,training,0
608 | CCOCc1nc2c(N)nc3ccccc3c2n1CC(C)(C)O,training,0
609 | Cc1cncc(CN2CCC(=C3c4ccc(Cl)cc4CCc4cccnc43)CC2)c1,training,0
610 | CC(C)(C)NC(=O)[C@@H]1C[C@@H]2CCCC[C@@H]2CN1C[C@@H](O)[C@H](Cc1ccccc1)NC(=O)[C@H](CC(N)=O)NC(=O)c1ccc2ccccc2n1,training,0
611 | CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@H]21,training,0
612 | CN1CC(=O)N2[C@H](Cc3c([nH]c4ccccc34)[C@H]2c2ccc3c(c2)OCO3)C1=O,training,0
613 | CC(C)(C)c1cc(C[C@H]2SCNC2=O)cc(C(C)(C)C)c1O,training,0
614 | COc1cc([C@@H]2c3cc4c(cc3[C@@H](O[C@@H]3O[C@@H]5CO[C@@H](c6cccs6)O[C@H]5[C@H](O)[C@H]3O)[C@H]3COC(=O)[C@H]23)OCO4)cc(OC)c1O,training,0
615 | C[C@H](Cn1cnc2c(N)ncnc21)OCP(=O)(O)O,training,0
616 | COc1ccccc1Oc1c(NS(=O)(=O)c2ccc(C(C)C)cn2)nc(-c2ccnc(-c3nn[nH]n3)c2)nc1OCCO,training,0
617 | CN1CCN(CC/C=C2\c3ccccc3Sc3ccc(S(=O)(=O)N(C)C)cc32)CC1,training,0
618 | C[C@@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)C3=CC[C@]2(C)[C@H]1C(=O)CN1CCN(c2cc(N3CCCC3)nc(N3CCCC3)n2)CC1,training,0
619 | N[C@@H]1C[C@H]1c1ccccc1,training,0
620 | Cc1nnc2n1-c1ccc(Cl)cc1C(c1ccccc1Cl)=NC2,training,0
621 | Cc1ccc(/C(=C/CN2CCCC2)c2ccccn2)cc1,training,0
622 | O=P1(N(CCCl)CCCl)OCCCN1CCCl,training,0
623 | CC[C@]1(O)C[C@H]2CN(CCc3c([nH]c4ccccc34)[C@@](C(=O)OC)(c3cc4c(cc3OC)N(C=O)[C@H]3[C@@](O)(C(=O)OC)[C@H](OC(C)=O)[C@]5(CC)C=CCN6CC[C@]43[C@@H]65)C2)C1,training,0
624 | COc1ccc2c(c1)Oc1cc(O)ccc1[C@@]21OC(=O)c2ccccc21,training,1
625 | Cc1cc(=O)n(-c2ccccc2)n1C,training,1
626 | c1ccc2c(c1)cc1ccc3cccc4ccc2c1c34,training,1
627 | O=c1cc(C(F)(F)F)c2ccc(OCc3ccccc3)cc2o1,training,1
628 | C=CC1CO1,training,1
629 | Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1,training,1
630 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CCCC[C@@H]4[C@H]3C(=C)C[C@@]21CC,training,1
631 | CC(C)N(CC[C@@](C(N)=O)(c1ccccc1)c1ccccn1)C(C)C,training,1
632 | CCc1nn(CCCN2CCN(c3cccc(Cl)c3)CC2)c(=O)n1CC,training,1
633 | Fc1ccc(C(c2ccc(F)cc2)N2CCN(C/C=C/c3ccccc3)CC2)cc1,training,1
634 | Cc1ccc(S(=O)(=O)NC(=O)NN2C[C@H]3CCC[C@H]3C2)cc1,training,1
635 | O=C(CCCN1CCC(O)(c2ccc(Cl)cc2)CC1)c1ccc(F)cc1,training,1
636 | CN1CC[C@@]23c4c5ccc(O)c4O[C@@H]2C(=O)CC[C@@H]3[C@@H]1C5,training,1
637 | CC1(C)Cc2c(-c3ccccc3)c(-c3ccc(Cl)cc3)c(CC(=O)O)n2C1,training,1
638 | Cc1ncc2n1-c1ccc(Cl)cc1C(c1ccccc1F)=NC2,training,1
639 | CSc1nc(-c2ccnc(N[C@H](C)c3ccccc3)c2)c(-c2ccc(F)cc2)[nH]1,training,1
640 | CCCN(CCC)N=O,training,1
641 | CN(CCCC(=O)c1cccnc1)N=O,training,1
642 | C[C@H]1Cc2c(Cl)cc(C(=O)N[C@@H](Cc3ccccc3)C(=O)O)c(O)c2C(=O)O1,training,1
643 | CN1CCCN=C1/C=C\c1cccs1,training,1
644 | C=C[C@H]1CN2CC[C@H]1C[C@@H]2[C@@H](O)c1ccnc2ccc(OC)cc12,training,1
645 | CC(C)c1nc(CN(C)C(=O)N[C@H](C(=O)N[C@@H](Cc2ccccc2)C[C@H](O)[C@H](Cc2ccccc2)NC(=O)OCc2cncs2)C(C)C)cs1,training,1
646 | CC(C)c1nc(N(C)S(C)(=O)=O)nc(-c2ccc(F)cc2)c1CC[C@@H](O)C[C@@H](O)CC(=O)O,training,1
647 | O=c1c2ccccc2nc2n1CCc1c-2[nH]c2ccccc12,training,1
648 | CN(C)S(=O)(=O)CCNC(=O)N(CCCl)N=O,training,1
649 | COc1ccc(CCN(C)CCC[C@](C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC,training,1
650 | CC(C)C1CCC(C(=O)N[C@@H](Cc2ccccc2)C(=O)O)CC1,training,1
651 | CN/C(=N\C#N)NCCSCc1nc[nH]c1C,test,0
652 | Cn1ccnc1S,test,0
653 | CCS(=O)(=O)CCn1c([N+](=O)[O-])cnc1C,test,0
654 | Cc1ncc([N+](=O)[O-])n1CCO,test,1
655 | CC(=O)Nc1ccccc1,test,0
656 | C=CCc1ccccc1OC[C@@H](O)CNC(C)C,test,0
657 | Cc1cc(N(C)C)ccc1C[C@H](C)N,test,0
658 | C[C@H](N)Cc1ccccc1,test,0
659 | Nc1ccccc1,test,0
660 | Cc1ccc(Cl)c(OC[C@@H](O)CNC(C)(C)C)c1,test,0
661 | CC(C)(Oc1ccc(Cl)cc1)C(=O)O,test,0
662 | CCOC(=O)C(C)(C)Oc1ccc(Cl)cc1,test,0
663 | Cc1cc(Cl)c(S(N)(=O)=O)cc1S(N)(=O)=O,test,0
664 | CCN[C@H](C)Cc1cccc(C(F)(F)F)c1,test,0
665 | Cc1ccc(C)c(OCCCC(C)(C)C(=O)O)c1,test,0
666 | CN[C@@H](C)Cc1ccccc1,test,0
667 | COCCc1ccc(OC[C@@H](O)CNC(C)C)cc1,test,0
668 | C=CCOc1ccccc1OC[C@@H](O)CNC(C)C,test,0
669 | O=C(O)c1ccccc1O,test,0
670 | Nc1ccc(S(N)(=O)=O)cc1,test,0
671 | Cc1cccc(C)c1NC(=O)[C@H](C)N,test,0
672 | CC(=O)Nc1ccc(O)cc1,test,1
673 | CC(C)/N=C(N)\N=C(/N)Nc1ccc(Cl)cc1,test,1
674 | C=CCc1ccc(O)c(OC)c1,test,1
675 | CC(C)Cc1ccc([C@H](C)C(=O)O)cc1,test,1
676 | CC(C)c1cccc(C(C)C)c1O,test,1
677 | C#CCN(C)[C@H](C)Cc1ccccc1,test,1
678 | CC(C)NC[C@H](O)COc1ccc(COCCOC(C)C)cc1,test,0
679 | C[C@@H](NC(C)(C)C)C(=O)c1cccc(Cl)c1,test,0
680 | CCN(CC)CCNC(=O)c1ccc(N)cc1,test,0
681 | Cc1ccccc1,test,0
682 | CN(C)C(=O)Nc1ccc(Cl)c(Cl)c1,test,1
683 | c1ccccc1,test,0
684 | CCCCCCCN(CC)CCC[C@@H](O)c1ccc(NS(C)(=O)=O)cc1,test,0
685 | COc1cc(CNC(=O)CCCCC=CC(C)C)ccc1O,test,1
686 | CC1=C(/C=C/C(C)=C\C=C\C(C)=C/C(=O)O)C(C)(C)CCC1,test,1
687 | CC1=C(/C=C/C(C)=C/C=C/C(C)=C/C=O)C(C)(C)CCC1,test,1
688 | CC1=C(/C=C/C(C)=C/C=C/C(C)=C/CO)C(C)(C)CCC1,test,1
689 | CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CC(=O)O,test,1
690 | CC1=C(/C=C\C(C)=C\C=C/C(C)=C\C(=O)O)C(C)(C)CCC1,test,0
691 | NCCc1c[nH]c2ccc(O)cc12,test,1
692 | COc1ccc2[nH]cc(CCNC(C)=O)c2c1,test,1
693 | CC(C)NC[C@H](O)COc1cccc2ccccc12,test,0
694 | COc1ccc2cc([C@H](C)C(=O)O)ccc2c1,test,1
695 | CN(C/C=C/C#CC(C)(C)C)Cc1cccc2ccccc12,test,1
696 | O=c1ccc2ccccc2o1,test,0
697 | CCCCOc1ccc2c(C(F)(F)F)cc(=O)oc2c1,test,1
698 | CCCCNCc1cc(=O)oc2cc(OC)ccc12,test,1
699 | C[C@@H](O)CCCCn1c(=O)c2c(ncn2C)n(C)c1=O,test,0
700 | CC(=O)CCCCn1c(=O)c2c(ncn2C)n(C)c1=O,test,0
701 | Cn1cnc2c1c(=O)[nH]c(=O)n2C,test,0
702 | COC(=O)C1=C(C)NC(C)=C(C(=O)OCC(C)=O)[C@@H]1c1ccccc1[N+](=O)[O-],test,0
703 | CCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)[C@@H]1c1cccc(Cl)c1Cl,test,0
704 | CCCOCCOC(=O)C1=C(C)NC(C)=C(C(=O)OCCOCCC)C1c1cccc([N+](=O)[O-])c1,test,0
705 | COC(=O)C1=C(C#N)NC(C)=C(C(=O)OC(C)C)[C@H]1c1cccc([N+](=O)[O-])c1,test,0
706 | CCOC(=O)C1=C(COCCN)NC(C)=C(C(=O)OC)[C@@H]1c1ccccc1Cl,test,0
707 | O=C(O)c1ccccc1Nc1cccc(C(F)(F)F)c1,test,0
708 | Cc1cccc(Nc2ccccc2C(=O)O)c1C,test,1
709 | Cc1ccc(O)c([C@@H](CCN(C(C)C)C(C)C)c2ccccc2)c1,test,1
710 | COc1ccc([C@@H](c2ccc(O)c(O)c2)C(Cl)(Cl)Cl)cc1,test,1
711 | COc1ccc([C@@H](c2ccc(O)cc2)C(Cl)(Cl)Cl)cc1,test,1
712 | CCC(=O)C(C[C@H](C)N(C)C)(c1ccccc1)c1ccccc1,test,1
713 | CN(C)CCCCCN1c2ccccc2Sc2ccc(C(F)(F)F)cc21,test,1
714 | CN(C)CCCN1c2ccccc2Sc2ccccc21,test,1
715 | C[C@H](CN(C)C)CN1c2ccccc2Sc2ccc(C#N)cc21,test,1
716 | O=c1c(O)c(-c2ccccc2)oc2cc(O)cc(O)c12,test,1
717 | COc1ccc(-c2oc3cc(O)cc(O)c3c(=O)c2O)cc1,test,1
718 | COc1ccc2c(c1)CC[C@@H]1[C@@H]2CC[C@]2(C)[C@H]1CC[C@H]2O,test,1
719 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCc4cc(O)ccc4[C@H]3CC[C@@]21C,test,1
720 | O=C1CN=C(c2ccccc2)c2cc(Cl)ccc2N1,test,0
721 | CN1C(=O)CN=C(c2ccccc2F)c2cc([N+](=O)[O-])ccc21,test,1
722 | CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc21,test,1
723 | CCN(CC)CCN1C(=O)CN=C(c2ccccc2F)c2cc(Cl)ccc21,test,0
724 | C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2C(=O)CO,test,0
725 | C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2O,test,1
726 | COCCCOc1ccnc(CS(=O)c2nc3ccccc3[nH]2)c1C,test,0
727 | O=S(Cc1ccccn1)c1nc2ccccc2[nH]1,test,0
728 | COc1ccc2[nH]c(S(=O)Cc3ncc(C)c(OC)c3C)nc2c1,test,1
729 | CC(=O)C[C@@H](c1ccc([N+](=O)[O-])cc1)c1c(O)c2ccccc2oc1=O,test,1
730 | CC[C@H](c1ccccc1)c1c(O)c2ccccc2oc1=O,test,1
731 | CC(=O)C[C@H](c1ccc([N+](=O)[O-])cc1)c1c(O)c2ccccc2oc1=O,test,1
732 | CN(C)/N=N\c1[nH]cnc1C(N)=O,val,0
733 | NCCc1c[nH]cn1,val,1
734 | CCCC(=O)Nc1ccc(OC[C@@H](O)CNC(C)C)c(C(C)=O)c1,val,0
735 | CC(C)(C)NC[C@@H](O)c1ccc(O)c(CO)c1,val,0
736 | CC(C)(C)NC[C@H](O)COc1ccccc1C#N,val,0
737 | C[C@H](N)C(=O)c1ccccc1,val,0
738 | CCN(CC)C(=O)/C(C#N)=C\c1cc(O)c(O)c([N+](=O)[O-])c1,val,0
739 | CNC[C@H](O)c1ccc(O)c(O)c1,val,0
740 | CCc1ccccc1,val,0
741 | NC(=O)OCC(COC(N)=O)c1ccccc1,val,0
742 | COCCCC/C(=N/OCCN)c1ccc(C(F)(F)F)cc1,val,0
743 | N=C(N)N/N=C\c1c(Cl)cccc1Cl,val,0
744 | CCN(CC)CCNC(=O)c1cc(Cl)c(N)cc1OC,val,0
745 | C#CCN(C)Cc1ccccc1,val,0
746 | NC(N)=N/C(N)=N/CCc1ccccc1,val,0
747 | Oc1ccccc1,val,0
748 | CCCN(CCC)S(=O)(=O)c1ccc(C(=O)O)cc1,val,0
749 | CC(C)Cc1ccc([C@@H](C)C(=O)O)cc1,val,1
750 | COc1cc(CNC(=O)CCCC/C=C/C(C)C)ccc1O,val,1
751 | CCCNC(=O)NS(=O)(=O)c1ccc(Cl)cc1,val,1
752 | COc1ccc(OC(F)(F)F)cc1CN,val,1
753 | Nc1ccc([N+](=O)[O-])c(C(F)(F)F)c1,val,1
754 | CCN(CC)CC(=O)Nc1c(C)cccc1C,val,1
755 | C=CCc1ccc(OC)c(OC)c1,val,1
756 | CCOc1ccc(NC(C)=O)cc1,val,1
757 | NCCc1ccc(O)c(O)c1,val,1
758 | C[C@H](NC(C)(C)C)C(=O)c1cccc(Cl)c1,val,1
759 | COc1ccc([N+](=O)[O-])cc1,val,1
760 | C=Cc1ccccc1,val,1
761 | CCCCNC(=O)NS(=O)(=O)c1ccc(C)cc1,val,1
762 | O=C(N[C@H](CO)[C@H](O)c1ccc([N+](=O)[O-])cc1)C(Cl)Cl,val,0
763 | Cc1cccc(C)c1OC[C@@H](C)N,val,0
764 | CC(=O)Oc1ccccc1C(=O)O,val,1
765 | CC1=C(/C=C/C(C)=C/C=C/C(C)=C/C(=O)O)C(C)(C)CCC1,val,1
766 | C=C(C)[C@@H]1CC=C(C)CC1,val,1
767 | CC(C)NC[C@H](O)COc1cccc2[nH]ccc12,val,0
768 | CCCCC/N=C(\N)N/N=C\c1c[nH]c2ccc(CO)cc12,val,0
769 | COc1ccc2[nH]cc(CCN(C(C)C)C(C)C)c2c1,val,1
770 | CNC(=O)Oc1cccc2ccccc12,val,1
771 | CN(CC=CC#CC(C)(C)C)Cc1cccc2ccccc12,val,1
772 | CCOc1ccc2cc(C#N)c(=O)oc2c1,val,1
773 | COc1ccc2c(C(F)(F)F)cc(=O)oc2c1,val,1
774 | CCOc1ccc2ccc(=O)oc2c1,val,1
775 | CCOc1ccc2c(C(F)(F)F)cc(=O)oc2c1,val,1
776 | Cn1c(=O)[nH]c2ncn(C)c2c1=O,val,0
777 | Cn1c(=O)c2c(ncn2C)n(C)c1=O,val,1
778 | Cn1c(=O)c2[nH]cnc2n(C)c1=O,val,1
779 | CCOC(=O)C1=C(C)NC(C)=C(C(=O)OCC)C1c1ccccc1/C=C\C(=O)OC(C)(C)C,val,0
780 | COC(=O)C1=C(C)NC(C)=C(C(=O)OC)C1c1ccccc1[N+](=O)[O-],val,0
781 | COC(=O)C1=C(C)NC(C)=C(C(=O)OCC(C)C)[C@H]1c1ccccc1[N+](=O)[O-],val,0
782 | CCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)[C@@H]1c1cccc([N+](=O)[O-])c1,val,0
783 | COCCOC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)C)[C@@H]1c1cccc([N+](=O)[O-])c1,val,0
784 | O=C(O)Cc1cc(O)ccc1Nc1c(Cl)cccc1Cl,val,1
785 | O=C(O)COC(=O)Cc1ccccc1Nc1c(Cl)cccc1Cl,val,1
786 | O=C(O)Cc1ccccc1Nc1c(Cl)cccc1Cl,val,1
787 | Cc1ccc(Nc2c(F)cccc2Cl)c(CC(=O)O)c1,val,1
788 | OCCc1ccccc1Nc1c(Cl)cccc1Cl,val,1
789 | NC(=O)CS(=O)C(c1ccccc1)c1ccccc1,val,0
790 | CCCC(C(=O)OCCN(CC)CC)(c1ccccc1)c1ccccc1,val,0
791 | COc1ccc(C(c2ccc(OC)cc2)C(Cl)(Cl)Cl)cc1,val,1
792 | Cc1ccc(O)c([C@H](CCN(C(C)C)C(C)C)c2ccccc2)c1,val,1
793 | Cc1ccccc1[C@H](OCCN(C)C)c1ccccc1,val,0
794 | C[C@H](CN1c2ccccc2Sc2ccccc21)N(C)C,val,0
795 | CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc21,val,1
796 | O=c1c(O)c(-c2ccc(O)c(O)c2)oc2cc(O)cc(O)c12,val,0
797 | COc1ccc(-c2cc(=O)c3c(O)c(OC)c(O)cc3o2)cc1OC,val,1
798 | COc1ccc(-c2oc3cc(O)cc(O)c3c(=O)c2O)cc1O,val,1
799 | C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CC[C@@H]2O,val,1
800 | C#C[C@]1(O)CC[C@H]2[C@@H]3CCc4cc(OC)ccc4[C@H]3CC[C@@]21C,val,1
801 | C[C@@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CC[C@H]2O,val,1
802 | O=C1CN=C(c2ccccc2Cl)c2cc([N+](=O)[O-])ccc2N1,val,0
803 | O=C1CN=C(c2ccccc2F)c2cc(Cl)ccc2N1CC(F)(F)F,val,1
804 | CN1C(=O)[C@H](O)N=C(c2ccccc2)c2cc(Cl)ccc21,val,1
805 | C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@]2(O)C(=O)CO,val,0
806 | CC(=O)[C@@]1(O)CC[C@H]2[C@@H]3C[C@H](C)C4=CC(=O)CC[C@]4(C)[C@H]3CC[C@@]21C,val,0
807 | CC(=O)[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,val,1
808 | CCC(=O)O[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,val,1
809 | COc1ccnc(CS(=O)c2nc3ccc(OC(F)F)cc3[nH]2)c1OC,val,0
810 | Cc1c(OCC(F)(F)F)ccnc1CS(=O)c1nc2ccccc2[nH]1,val,1
811 | CC(=O)C[C@@H](c1ccccc1)c1c(O)c2ccccc2oc1=O,val,1
812 | CC(=O)C[C@H](c1ccccc1)c1c(O)c2ccccc2oc1=O,val,1
813 |
--------------------------------------------------------------------------------