├── RNN-model ├── model.h5 └── model.json ├── utils ├── fpscores.pkl.gz ├── __pycache__ │ ├── AtomInfo.cpython-37.pyc │ ├── filter.cpython-37.pyc │ ├── sascorer.cpython-37.pyc │ ├── SDF2xyzV2.cpython-37.pyc │ ├── load_model.cpython-37.pyc │ ├── make_smile.cpython-37.pyc │ ├── rdock_test_MP.cpython-37.pyc │ └── add_node_type_zinc.cpython-37.pyc ├── AtomInfo.py ├── load_model.py ├── SDF2xyzV2.py ├── rdock_test_MP.py ├── make_smile.py ├── sascorer.py ├── bonds_dict.txt ├── add_node_type_zinc.py ├── filter.py └── atoms_dict.txt ├── train_RNN ├── make_smile.pyc ├── __pycache__ │ ├── make_smile.cpython-36.pyc │ └── make_smile.cpython-37.pyc ├── train_RNN.yaml ├── make_smile.py └── train_RNN.py ├── example_ligand_design ├── cavity.as ├── setting.yaml └── cavity.prm ├── LICENSE ├── README.md └── sbmolgen.py /RNN-model/model.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clinfo/SBMolGen/HEAD/RNN-model/model.h5 -------------------------------------------------------------------------------- /utils/fpscores.pkl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clinfo/SBMolGen/HEAD/utils/fpscores.pkl.gz -------------------------------------------------------------------------------- /train_RNN/make_smile.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clinfo/SBMolGen/HEAD/train_RNN/make_smile.pyc -------------------------------------------------------------------------------- /example_ligand_design/cavity.as: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clinfo/SBMolGen/HEAD/example_ligand_design/cavity.as -------------------------------------------------------------------------------- /utils/__pycache__/AtomInfo.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clinfo/SBMolGen/HEAD/utils/__pycache__/AtomInfo.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/filter.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clinfo/SBMolGen/HEAD/utils/__pycache__/filter.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/sascorer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clinfo/SBMolGen/HEAD/utils/__pycache__/sascorer.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/SDF2xyzV2.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clinfo/SBMolGen/HEAD/utils/__pycache__/SDF2xyzV2.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/load_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clinfo/SBMolGen/HEAD/utils/__pycache__/load_model.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/make_smile.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clinfo/SBMolGen/HEAD/utils/__pycache__/make_smile.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/rdock_test_MP.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clinfo/SBMolGen/HEAD/utils/__pycache__/rdock_test_MP.cpython-37.pyc -------------------------------------------------------------------------------- /train_RNN/__pycache__/make_smile.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clinfo/SBMolGen/HEAD/train_RNN/__pycache__/make_smile.cpython-36.pyc -------------------------------------------------------------------------------- /train_RNN/__pycache__/make_smile.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clinfo/SBMolGen/HEAD/train_RNN/__pycache__/make_smile.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/add_node_type_zinc.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clinfo/SBMolGen/HEAD/utils/__pycache__/add_node_type_zinc.cpython-37.pyc -------------------------------------------------------------------------------- /train_RNN/train_RNN.yaml: -------------------------------------------------------------------------------- 1 | dataset: ../data/250k_rndm_zinc_drugs_clean.smi 2 | output_json: ../RNN-model/model.json 3 | output_weight: ../RNN-model/model.h5 4 | 5 | dropout_rate: 0.2 6 | rec_dropout_rate: 0.2 7 | learning_rate: 0.01 8 | epoch: 100 9 | batch_size: 512 10 | validation_split: 0.1 11 | units: 256 12 | -------------------------------------------------------------------------------- /example_ligand_design/setting.yaml: -------------------------------------------------------------------------------- 1 | trial: 1 2 | c_val: 1.0 3 | loop_num_nodeExpansion: 1000 4 | target: 'CDK2' 5 | target_path: /home/mabiao/apps/SBMolGen/example_ligand_design 6 | hours: 1 7 | score_target: 'SCORE.INTER' 8 | docking_num: 10 9 | base_rdock_score: -20 10 | sa_threshold: 3.5 11 | # rule5 1: weigth < 500 logp <5 donor < 5 acceptor < 10, 2: weigth < 300 logp <3 donor < 3 acceptor < 3 rotbonds <3 12 | rule5: 1 13 | radical_check: True 14 | simulation_num: 3 15 | hashimoto_filter: True 16 | model_name: model 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 clinfo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /example_ligand_design/cavity.prm: -------------------------------------------------------------------------------- 1 | RBT_PARAMETER_FILE_V1.00 2 | TITLE 1h00_protein.pdb 3 | 4 | RECEPTOR_FILE ./receptor.mol2 5 | RECEPTOR_FLEX 3.0 6 | 7 | ################################################################## 8 | ### CAVITY DEFINITION: REFERENCE LIGAND METHOD 9 | ################################################################## 10 | SECTION MAPPER 11 | SITE_MAPPER RbtLigandSiteMapper 12 | REF_MOL 1h00_lig_FAP.sdf 13 | RADIUS 6.0 14 | SMALL_SPHERE 1.0 15 | MIN_VOLUME 100 16 | MAX_CAVITIES 1 17 | VOL_INCR 0.0 18 | GRIDSTEP 0.5 19 | END_SECTION 20 | 21 | ################################################################ 22 | # CAVITY DEFINITION: TWO SPHERES METHOD 23 | ################################################################ 24 | #SECTION MAPPER 25 | # SITE_MAPPER RbtSphereSiteMapper 26 | ##HETATM 2815 O HOH 756 37.266 -20.992 -4.910 0.90 24.86 1CSE2940 27 | # CENTER (7.185,8.250,22.649) 28 | # RADIUS 15.0 29 | # SMALL_SPHERE 1.5 30 | # LARGE_SPHERE 6.0 31 | # MAX_CAVITIES 1 32 | #END_SECTION 33 | 34 | ################################# 35 | #CAVITY RESTRAINT PENALTY 36 | ################################# 37 | SECTION CAVITY 38 | SCORING_FUNCTION RbtCavityGridSF 39 | WEIGHT 1.0 40 | END_SECTION 41 | 42 | ################################# 43 | ## PHARMACOPHORIC RESTRAINTS 44 | ################################# 45 | #SECTION PHARMA 46 | # SCORING_FUNCTION RbtPharmaSF 47 | # WEIGHT 1.0 48 | # CONSTRAINTS_FILE pharma_cdk2.const 49 | # OPTIONAL_FILE optional.const 50 | # NOPT 3 51 | # WRITE_ERRORS TRUE 52 | #END_SECTION 53 | 54 | -------------------------------------------------------------------------------- /RNN-model/model.json: -------------------------------------------------------------------------------- 1 | {"class_name": "Sequential", "keras_version": "1.1.1", "config": [{"class_name": "Embedding", "config": {"trainable": true, "name": "embedding_1", "activity_regularizer": null, "W_constraint": null, "init": "uniform", "input_dtype": "int32", "mask_zero": false, "input_dim": 64, "batch_input_shape": [null, 82], "W_regularizer": null, "dropout": 0.0, "output_dim": 64, "input_length": 82}}, {"class_name": "GRU", "config": {"U_regularizer": null, "name": "gru_1", "inner_activation": "hard_sigmoid", "go_backwards": false, "output_dim": 256, "trainable": true, "unroll": false, "consume_less": "cpu", "stateful": false, "init": "glorot_uniform", "inner_init": "orthogonal", "dropout_U": 0.0, "dropout_W": 0.0, "input_dim": 64, "return_sequences": true, "b_regularizer": null, "W_regularizer": null, "activation": "sigmoid", "input_length": null}}, {"class_name": "GRU", "config": {"U_regularizer": null, "name": "gru_2", "inner_activation": "hard_sigmoid", "go_backwards": false, "output_dim": 256, "trainable": true, "unroll": false, "consume_less": "cpu", "stateful": false, "init": "glorot_uniform", "inner_init": "orthogonal", "dropout_U": 0.0, "dropout_W": 0.0, "input_dim": 256, "return_sequences": true, "b_regularizer": null, "W_regularizer": null, "activation": "sigmoid", "input_length": null}}, {"class_name": "TimeDistributed", "config": {"layer": {"class_name": "Dense", "config": {"W_constraint": null, "b_constraint": null, "name": "dense_1", "activity_regularizer": null, "trainable": true, "init": "glorot_uniform", "bias": true, "input_dim": null, "b_regularizer": null, "W_regularizer": null, "activation": "softmax", "output_dim": 64}}, "trainable": true, "name": "timedistributed_1"}}]} -------------------------------------------------------------------------------- /utils/AtomInfo.py: -------------------------------------------------------------------------------- 1 | #!/sw/bin/python3.4 2 | import sys, math 3 | 4 | def AtomicWeight(Element): 5 | 6 | AtomicWeight = {'H': 1.008,'He': 4.003, \ 7 | 'Li': 6.938, 'Be': 9.012, 'B': 10.81, 'C': 12.01, 'N': 14.01, 'O': 16.00, 'F': 19.00, 'Ne': 20.18, \ 8 | 'Na': 22.99, 'Mg': 24.80, 'Al':26.98, 'Si':28.08, 'P': 30.97, 'S': 32.06, 'Cl': 35.45, 'Ar': 39.95, \ 9 | 'K': 39.10, 'Ca': 40.08, 'Sc': 44.96, 'Ti': 47.87, 'V': 50.94, 'Cr': 52.00, 'Mn': 54.94,'Fe': 55.85, 'Co': 58.93,\ 10 | 'Ni': 58.69, 'Cu': 63.55, 'Zn': 65.38,'Ga': 69.72, 'Ge': 72.63, 'As': 74.92,'Se': 78.97, 'Br': 79.90, 'Kr': 83.80,\ 11 | 'Rb': 85.47, 'Sr': 87.62, 'Y': 88.91, 'Zr': 91.22, 'Nb': 92.21, 'Mo': 95.95, 'Tc': 99, 'Ru': 101.07, 'Rh': 102.91,\ 12 | 'Pd': 106.42, 'Ag': 107.87, 'Cd': 112.41, 'In': 114.82, 'Sn': 118.71, 'Sb': 121.76, 'Te': 127.60, 'I': 126.9, 'Xe': 131.29} 13 | 14 | if(Element in AtomicWeight): 15 | return AtomicWeight[Element] 16 | else: 17 | print ("We don't have the information about %-s!" % (Element)) 18 | exit() 19 | 20 | 21 | def AtomicNumElec(Element): 22 | 23 | AtomicNumElec = {'H': 1,'He': 2, \ 24 | 'Li': 3, 'Be': 4, 'B': 5, 'C': 6, 'N': 7, 'O': 8, 'F': 9, 'Ne': 10, \ 25 | 'Na': 11, 'Mg': 12, 'Al': 13, 'Si': 14, 'P': 15, 'S': 16, 'Cl': 17, 'Ar': 18, \ 26 | 'K': 19, 'Ca': 20, 'Sc': 21, 'Ti': 22, 'V': 23, 'Cr': 24, 'Mn': 25,'Fe': 26, 'Co': 27,\ 27 | 'Ni': 28, 'Cu': 29, 'Zn': 30,'Ga': 31, 'Ge': 32, 'As': 33,'Se': 34, 'Br': 35, 'Kr': 36,\ 28 | 'Rb': 37, 'Sr': 38, 'Y': 39, 'Zr': 40, 'Nb': 41, 'Mo': 42, 'Tc': 43, 'Ru': 44, 'Rh': 45,\ 29 | 'Pd': 46, 'Ag': 47, 'Cd': 48, 'In': 49, 'Sn': 50, 'Sb': 51, 'Te': 52, 'I': 53, 'Xe': 54} 30 | 31 | if(Element in AtomicNumElec): 32 | return AtomicNumElec[Element] 33 | else: 34 | print ("We don't have the information about %-s!" % (Element)) 35 | exit() 36 | 37 | 38 | #var = input() 39 | #print (AtomicWeight(var)) 40 | 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Structure-based de novo Molecular Generator (SBMolGen) 2 | Supporting Information for the paper _"[Structure-Based de Novo Molecular Generator Combined with Artificial Intelligence and Docking Simulations](https://pubs.acs.org/doi/10.1021/acs.jcim.1c00679)"_. 3 | 4 | In this study, we developed a new deep learning-based molecular generator, SBMolGen, that integrates a recurrent neural network, a Monte Carlo tree search, and docking simulations. The results of an evaluation using four target proteins (two kinases and two G protein-coupled receptors) showed that the generated molecules had a better binding affinity score (docking score) than the known active compounds, and they possessed a broader chemical space distribution. SBMolGen not only generates novel binding active molecules but also presents 3D docking poses with target proteins, which will be useful in subsequent drug design. 5 | 6 | SBMolGen uses and modifies some [ChemTS](https://github.com/tsudalab/ChemTS) features. For more information on ChemTS, please see the [paper of ChemTS](https://doi.org/10.1080/14686996.2017.1401424). 7 | 8 | ## Requirements 9 | 1. [Python](https://www.anaconda.com/download/)>=3.7 10 | 2. [Keras](https://github.com/fchollet/keras) (version 2.0.5) If you installed the newest version of keras, some errors will show up. Please change it back to keras 2.0.5 by pip install keras==2.0.5. 11 | 3. tensoflow (version 1.15.2, ver>=2.0 occurred error.) 12 | 4. [rdkit](https://anaconda.org/rdkit/rdkit) 13 | 5. [rDock](http://rdock.sourceforge.net/installation/) 14 | 15 | ## How to use 16 | 17 | 1. Get SBMolGen. 18 | 19 | ``` 20 | git clone https://github.com/clinfo/SBMolGen.git 21 | cd SBMolGen 22 | ``` 23 | Set the system path, here is the example for bash. 24 | ``` 25 | export SBMolGen_PATH=/Path to SBMolGen/SBMolGen 26 | export PATH=${SBMolGen_PATH}:${PATH} 27 | export RBT_ROOT=/Path to rDock 28 | export LD_LIBRARY_PATH=${RBT_ROOT}/lib:${LD_LIBRARY_PATH} 29 | ``` 30 | 31 | 2. Train the RNN model. 32 | 33 | ``` 34 | cd train_RNN 35 | python train_RNN.py train_RNN.yaml 36 | ``` 37 | 3. Make a setting file for molecule generate. 38 | 39 | A sample of setting file. 40 | 41 | setting.yaml 42 | ``` 43 | c_val: 1.0 44 | loop_num_nodeExpansion: 1000 45 | target: 'CDK2' 46 | target_path: /home/apps/SBMolGen/example_ligand_design 47 | hours: 1 48 | score_target: 'SCORE.INTER' 49 | docking_num: 10 50 | base_rdock_score: -20 51 | sa_threshold: 3.5 52 | # rule5 1: weigth < 500 logp <5 donor < 5 acceptor < 10, 2: weigth < 300 logp <3 donor < 3 acceptor < 3 rotbonds <3 53 | rule5: 1 54 | radical_check: True 55 | simulation_num: 3 56 | hashimoto_filter: True 57 | model_name: model 58 | ``` 59 | 4. Prepare the target file. 60 | 61 | Refer to the [rDock Tutorials](http://rdock.sourceforge.net/docking-in-3-steps/) for instructions on preparing the required files for docking. 62 | 63 | 5. Molecule generate. 64 | 65 | ``` 66 | cd example_ligand_design 67 | python ${SBMolGen_PATH}/sbmolgen.py setting.yaml 68 | ``` 69 | 70 | 71 | ## License 72 | This package is distributed under the MIT License. 73 | -------------------------------------------------------------------------------- /utils/load_model.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import itertools 3 | import operator 4 | import numpy as np 5 | import nltk 6 | import h5py 7 | import os 8 | from datetime import datetime 9 | from keras.models import Sequential 10 | from keras.layers import Dense, Activation,TimeDistributed 11 | from keras.layers import LSTM,GRU 12 | from keras.layers.embeddings import Embedding 13 | from keras.optimizers import RMSprop, Adam 14 | from keras.utils.data_utils import get_file 15 | from keras.layers import Dropout 16 | import numpy as np 17 | import random 18 | import sys 19 | from keras.utils.np_utils import to_categorical 20 | from keras.preprocessing import sequence 21 | from keras.models import model_from_json 22 | from make_smile import zinc_data_with_bracket, zinc_processed_with_bracket 23 | 24 | import os 25 | import tensorflow as tf 26 | os.environ["CUDA_VISIBLE_DEVICES"]= "1" #"0,1,2,3," 27 | from keras import backend as K 28 | config = tf.ConfigProto() 29 | config.gpu_options.allow_growth = True 30 | sess = tf.Session(config = config) 31 | K.set_session(sess) 32 | 33 | def prepare_data(smiles,all_smile): 34 | all_smile_index=[] 35 | for i in range(len(all_smile)): 36 | smile_index=[] 37 | for j in range(len(all_smile[i])): 38 | smile_index.append(smiles.index(all_smile[i][j])) 39 | all_smile_index.append(smile_index) 40 | X_train=all_smile_index 41 | y_train=[] 42 | for i in range(len(X_train)): 43 | 44 | x1=X_train[i] 45 | x2=x1[1:len(x1)] 46 | x2.append(0) 47 | y_train.append(x2) 48 | 49 | return X_train,y_train 50 | 51 | 52 | def loaded_model(filename): 53 | json_file = open(filename + '.json', 'r') 54 | #json_file = open('/home/terayama/molecule-design/ChemTS/train_RNN/model_enamine_GRU256_epoch10.json', 'r') 55 | #json_file = open('/home/terayama/molecule-design/ChemTS/RNN-model/model.json', 'r') 56 | 57 | #json_file = open('/Users/yang/LSTM-chemical-project/protein-ligand/model.json', 'r') 58 | loaded_model_json = json_file.read() 59 | json_file.close() 60 | loaded_model = model_from_json(loaded_model_json) 61 | 62 | # load weights into new model 63 | #loaded_model.load_weights('/Users/yang/LSTM-chemical-project/protein-ligand/model.h5') 64 | 65 | loaded_model.load_weights(filename+'.h5') 66 | #loaded_model.load_weights('/home/terayama/molecule-design/ChemTS/train_RNN/model_enamine_GRU256_epoch10.h5') 67 | #loaded_model.load_weights('/home/terayama/molecule-design/ChemTS/RNN-model/model.h5') 68 | print("Loaded model from disk") 69 | 70 | 71 | return loaded_model 72 | 73 | def loaded_activity_model(): 74 | #smile_old=zinc_data_with_bracket_original() 75 | #valcabulary,smile=zinc_processed_with_bracket(smile_old) 76 | 77 | #json_file = open('/Users/yang/LSTM-chemical-project/new-version-rnn/model/model.json', 'r') 78 | json_file = open('/Users/yang/LSTM-chemical-project/protein-ligand/ppara_model.json', 'r') 79 | loaded_model_json = json_file.read() 80 | json_file.close() 81 | loaded_model = model_from_json(loaded_model_json) 82 | 83 | # load weights into new model 84 | loaded_model.load_weights('/Users/yang/LSTM-chemical-project/protein-ligand/ppara_model.hdf5') 85 | #loaded_model.load_weights('/Users/yang/LSTM-chemical-project/new-version-rnn/model/model.h5') 86 | print("Loaded model from disk") 87 | 88 | return loaded_model 89 | 90 | 91 | -------------------------------------------------------------------------------- /utils/SDF2xyzV2.py: -------------------------------------------------------------------------------- 1 | #!/sw/bin/python3.6 2 | import sys, math 3 | from numpy import * 4 | import AtomInfo 5 | 6 | def Read_sdf(infilename): 7 | 8 | ifile = open(infilename, 'r') #open file for reading 9 | 10 | count = 0 11 | 12 | X = [] 13 | Y = [] 14 | Z = [] 15 | element_symbol = [] 16 | 17 | 18 | Bond_pair1 = [] 19 | Bond_pair2 = [] 20 | Bond_type = [] 21 | 22 | TotalCharge = 0 23 | CHG_atom = [] 24 | CHG = [] 25 | 26 | for line in ifile: 27 | if count == 0: 28 | Header1 = line 29 | 30 | count += 1 31 | continue 32 | 33 | if count == 1: 34 | Header2 = line 35 | 36 | count += 1 37 | continue 38 | 39 | if count == 2: 40 | Header3 = line 41 | count += 1 42 | 43 | continue 44 | 45 | if count == 3: 46 | a = line.split() 47 | N = int(a[0]) 48 | N_Bond = int(a[1]) 49 | 50 | count += 1 51 | continue 52 | 53 | if 3 < count <= N+4: 54 | i_atom = line.split() 55 | if len(i_atom) != 0: 56 | X.append(float(i_atom[0])) 57 | Y.append(float(i_atom[1])) 58 | Z.append(float(i_atom[2])) 59 | element_symbol.append(i_atom[3]) 60 | 61 | count += 1 62 | continue 63 | 64 | if N+4 < count <= N+N_Bond+3 : 65 | bond_info = line.split() 66 | print (bond_info) 67 | bond_info = line.split() 68 | Bond_pair1.append(int(bond_info[0])) 69 | Bond_pair2.append(int(bond_info[1])) 70 | Bond_type.append(int(bond_info[2])) 71 | 72 | count +=1 73 | continue 74 | 75 | if count > N+N_Bond+3: 76 | mol_info = line.split() 77 | print (mol_info) 78 | if (mol_info[0] == "M"): 79 | if (mol_info[1] == "END"): 80 | break 81 | if (mol_info[1] == "CHG"): 82 | Num_CHGInfo = int(mol_info[2]) 83 | for k in range(Num_CHGInfo): 84 | CHG_atom.append(int(mol_info[3+2*k])) 85 | CHG.append(int(mol_info[4+2*k])) 86 | TotalCharge += int(mol_info[4+2*k]) 87 | else: 88 | print("The sdf file is invalid!") 89 | sys.exit() 90 | 91 | count +=1 92 | 93 | #Copy to array of numpy########################### 94 | 95 | Mol_atom = [] 96 | Mol_CartX = zeros(N) 97 | Mol_CartY = zeros(N) 98 | Mol_CartZ = zeros(N) 99 | 100 | CHG_atom = array(CHG_atom) 101 | CHG = array(CHG) 102 | 103 | for j in range(N): 104 | Mol_CartX[j] = X[j] 105 | Mol_CartY[j] = Y[j] 106 | Mol_CartZ[j] = Z[j] 107 | Mol_atom.append(element_symbol[j]) 108 | 109 | #del element_symbol[N:TotalStep] 110 | 111 | del element_symbol[:] 112 | del X[:] 113 | del Y[:] 114 | del Z[:] 115 | 116 | ################################################## 117 | 118 | print (Mol_atom) 119 | print (N) 120 | print (len(Mol_CartX)) 121 | 122 | print('Reading a sdf file has finished') 123 | 124 | ###Calculating the total number of electrons################# 125 | TotalNum_electron = 0 126 | 127 | for j in range(N): 128 | if (len(CHG_atom) != 0): 129 | Judge = CHG_atom-j 130 | if (any(Judge) == 0): 131 | TotalNum_electron += AtomInfo.AtomicNumElec(Mol_atom[j])-CHG[where(Judge == 0)] 132 | else: 133 | TotalNum_electron += AtomInfo.AtomicNumElec(Mol_atom[j]) 134 | else: 135 | TotalNum_electron += AtomInfo.AtomicNumElec(Mol_atom[j]) 136 | 137 | print('Total number of electron: %7d ' % (TotalNum_electron)) 138 | 139 | if (TotalNum_electron%2==0): 140 | print ("This system is a closed shell!") 141 | SpinMulti = 1 142 | else: 143 | print ("This system is a open shell!") 144 | SpinMulti = 2 145 | 146 | ############################################################# 147 | 148 | return Mol_atom, Mol_CartX, Mol_CartY, Mol_CartZ,TotalCharge, SpinMulti 149 | 150 | """ 151 | usage ='Usage; %s infile' % sys.argv[0] 152 | 153 | try: 154 | infilename = sys.argv[1] 155 | except: 156 | print (usage); sys.exit() 157 | 158 | Mol_atom, X, Y, Z, TotalCharge, SpinMulti = Read_sdf(infilename) 159 | 160 | ofile = open('Test_FromSDF.xyz','w') 161 | 162 | ofile.write('%5d \n' % len(Mol_atom)) 163 | ofile.write('%5d %5d \n' % (TotalCharge, SpinMulti)) 164 | 165 | for j in range(len(Mol_atom)): 166 | ofile.write('%-4s % 10.5f % 10.5f % 10.5f \n' 167 | % (Mol_atom[j],X[j], Y[j], Z[j])) 168 | 169 | """ 170 | 171 | -------------------------------------------------------------------------------- /utils/rdock_test_MP.py: -------------------------------------------------------------------------------- 1 | from rdkit import Chem 2 | from rdkit.Chem import AllChem 3 | import subprocess 4 | #import threading 5 | from multiprocessing import Process 6 | import os 7 | import shutil 8 | import time 9 | 10 | def docking_calculation(cmd): 11 | proc = subprocess.call( cmd , shell=True) 12 | print('end in thread') 13 | 14 | #----parameter & preparation 15 | def rdock_score(compound, score_type, target_dir, docking_num = 10): 16 | 17 | # check RBT_ROOT setting: 18 | RBT_ROOT=os.environ.get('RBT_ROOT') 19 | if os.getenv('RBT_ROOT') == None: 20 | print("The RBT_ROOT has not defined, please set it before use it!") 21 | exit(0) 22 | else: 23 | RBT_ROOT=os.getenv('RBT_ROOT') 24 | print('RBT_ROOT is: ', RBT_ROOT) 25 | 26 | input_smiles = str(compound) 27 | 28 | num_cpus = docking_num # total number of docking 29 | score_name = score_type # SCORE or SCORE.INTER ## edit by Biao Ma at 20190920 30 | 31 | target_dir = target_dir 32 | sdf_file = 'tmp_comp.sdf' 33 | docking_result_file = 'rdock_out_' 34 | min_score = 10**10 35 | min_score_inter = 10**10 36 | min_score_id = 0 37 | min_score_inter_id = 0 38 | 39 | path=os.getcwd() 40 | os.chdir(path) 41 | 42 | #----Translation from SMILES to sdf 43 | fw = Chem.SDWriter(sdf_file) 44 | m1 = Chem.MolFromSmiles(input_smiles) 45 | try: 46 | if m1!= None: 47 | m = Chem.AddHs(m1) 48 | 49 | cid = AllChem.EmbedMolecule(m) 50 | #fw.write(m) 51 | 52 | opt = AllChem.UFFOptimizeMolecule(m,maxIters=200) 53 | print('3D structure optimization by AllChem.UFFOptimizeMolecule(m,maxIters=200) :', opt) 54 | 55 | fw.write(m) 56 | fw.close() 57 | 58 | #----rdock calculation 59 | #cmd = '/home/terayama/rdock/fiexed_side_chain/build/exe/rbdock -r cavity.prm -p /home/terayama/rdock/fiexed_side_chain/data/scripts/dock.prm -i ' + sdf_file + ' -o ' + docking_result_file + ' -T 0 -n' + str(num_docking_unit) 60 | #proc = subprocess.call( cmd , shell=True) 61 | #proc = subprocess.popen(cmd, shell=True) 62 | #r1 = p1.wait() 63 | #print('r1', p1) 64 | 65 | start_time = time.time() 66 | processes = [] 67 | for i in range(num_cpus): 68 | #cmd = '/home/terayama/rdock/fiexed_side_chain/build/exe/rbdock -r cavity.prm -p /home/terayama/rdock/fiexed_side_chain/data/scripts/dock.prm -i ' + sdf_file + ' -o ' + docking_result_file + str(i) + ' -T 0 -s '+str(i)+' -n 1' 69 | #cmd = 'rbdock -r cavity.prm -p /home/mki/SideEffectPrediction/08_rDock/prm/dock.prm -i ' + sdf_file + ' -o ' + docking_result_file + str(i) + ' -T 0 -s '+str(i)+' -n 1' 70 | cmd = RBT_ROOT + '/bin/rbdock -allH -r '+ target_dir + '/cavity.prm -p '+ RBT_ROOT + '/data/scripts/dock.prm -i ' + sdf_file + ' -o ' + docking_result_file + str(i) + ' -T 0 -s '+str(i)+' -n 1' 71 | print('cmd', cmd) 72 | #t = threading.Thread(target=docking_calculation, args=(cmd,)) 73 | #threads.append(t) 74 | #t.start() 75 | t = Process(target=docking_calculation, args=(cmd,)) 76 | processes.append(t) 77 | for p in processes: 78 | p.start() 79 | 80 | for p in processes: 81 | p.join() 82 | print('end') 83 | end_time = time.time() 84 | #print('time used:%s' % end_time - start_time) 85 | print('docking time_used', end_time - start_time) 86 | 87 | 88 | for i in range(num_cpus): 89 | #----find the minimum score of rdock from multiple docking results 90 | f = open(docking_result_file+str(i)+'.sd') 91 | lines = f.readlines() 92 | f.close() 93 | 94 | line_count = 0 95 | score_line = -1 96 | score_inter_line = -1 97 | for line in lines: 98 | v_list = line.split() 99 | if line_count == score_line: 100 | print(v_list[0]) 101 | if float(v_list[0]) < min_score: 102 | min_score = float(v_list[0]) 103 | min_score_id = i 104 | 105 | if line_count == score_inter_line: 106 | print(v_list[0]) 107 | if float(v_list[0]) < min_score_inter: 108 | min_score_inter = float(v_list[0]) 109 | min_score_inter_id = i 110 | 111 | if len(v_list) <= 1: 112 | line_count += 1 113 | continue 114 | 115 | if v_list[1] == '': 116 | score_line = line_count + 1 117 | if v_list[1] == '': 118 | score_inter_line = line_count + 1 119 | 120 | line_count += 1 121 | 122 | 123 | print('minimum rdock score', min_score, 'score_inter', min_score_inter) 124 | except: 125 | print('error') 126 | min_score=10**10 127 | min_score_inter = 10**10 128 | 129 | os.chdir(path) 130 | #os.mkdir('out') # creat the directory for rdock best output for new compound. 131 | best_docking_id = min_score_id if score_type == 'SCORE' else min_score_inter_id 132 | return [min_score, min_score_inter, best_docking_id] 133 | 134 | 135 | #rdock_score('Oc1ccc(NC(=O)C)cc1', 2, '', docking_num = 1, protein_target = 'kith') 136 | -------------------------------------------------------------------------------- /train_RNN/make_smile.py: -------------------------------------------------------------------------------- 1 | 2 | import csv 3 | import itertools 4 | import operator 5 | import numpy as np 6 | import nltk 7 | import os 8 | from rdkit import Chem 9 | from rdkit.Chem import Draw 10 | from IPython import display 11 | #import matplotlib.pyplot as plt 12 | from rdkit.Chem import Descriptors 13 | 14 | 15 | def zinc_data_with_bracket(): 16 | 17 | sen_space=[] 18 | #f = open('/Users/yang/smiles.csv', 'rb') 19 | #f = open('/Users/yang/LSTM-chemical-project/smile_trainning.csv', 'rb') 20 | f = open('/home/yang/LSTM-chemical-project/data/250k_rndm_zinc_drugs_clean.smi', 'rb') 21 | 22 | reader = csv.reader(f) 23 | for row in reader: 24 | #word_space[row].append(reader[row]) 25 | #print word_sapce 26 | sen_space.append(row) 27 | #print sen_space 28 | f.close() 29 | 30 | word1=sen_space[0] 31 | word_space=list(word1[0]) 32 | end="\n" 33 | 34 | zinc_processed=[] 35 | organic_smile=[] 36 | t=0 37 | for i in range(len(sen_space)): 38 | word1=sen_space[i] 39 | m = Chem.MolFromSmiles(word1[0]) 40 | Chem.Kekulize(m) 41 | s=Chem.MolToSmiles(m,kekuleSmiles=True) 42 | zinc_processed.append(s) 43 | #word_space=list(word1[0]) 44 | #print len(zinc_processed) 45 | 46 | while t 8: nMacrocycles+=1 71 | 72 | sizePenalty = nAtoms**1.005 - nAtoms 73 | stereoPenalty = math.log10(nChiralCenters+1) 74 | spiroPenalty = math.log10(nSpiro+1) 75 | bridgePenalty = math.log10(nBridgeheads+1) 76 | macrocyclePenalty = 0. 77 | # --------------------------------------- 78 | # This differs from the paper, which defines: 79 | # macrocyclePenalty = math.log10(nMacrocycles+1) 80 | # This form generates better results when 2 or more macrocycles are present 81 | if nMacrocycles > 0: macrocyclePenalty = math.log10(2) 82 | 83 | score2 = 0. -sizePenalty -stereoPenalty -spiroPenalty -bridgePenalty -macrocyclePenalty 84 | 85 | # correction for the fingerprint density 86 | # not in the original publication, added in version 1.1 87 | # to make highly symmetrical molecules easier to synthetise 88 | score3 = 0. 89 | if nAtoms > len(fps): 90 | score3 = math.log(float(nAtoms) / len(fps)) * .5 91 | 92 | sascore = score1 + score2 + score3 93 | 94 | # need to transform "raw" value into scale between 1 and 10 95 | min = -4.0 96 | max = 2.5 97 | sascore = 11. - (sascore - min + 1) / (max - min) * 9. 98 | # smooth the 10-end 99 | if sascore > 8.: sascore = 8. + math.log(sascore+1.-9.) 100 | if sascore > 10.: sascore = 10.0 101 | elif sascore < 1.: sascore = 1.0 102 | 103 | return sascore 104 | 105 | 106 | def processMols(mols): 107 | print('smiles\tName\tsa_score') 108 | for i,m in enumerate(mols): 109 | if m is None: 110 | continue 111 | 112 | s = calculateScore(m) 113 | 114 | smiles = Chem.MolToSmiles(m) 115 | print(smiles+"\t"+m.GetProp('_Name') + "\t%3f"%s) 116 | 117 | 118 | if __name__=='__main__': 119 | import sys,time 120 | 121 | t1=time.time() 122 | readFragmentScores("fpscores") 123 | t2=time.time() 124 | 125 | suppl = Chem.SmilesMolSupplier(sys.argv[1]) 126 | t3=time.time() 127 | processMols(suppl) 128 | t4=time.time() 129 | 130 | print('Reading took %.2f seconds. Calculating took %.2f seconds'%((t2-t1),(t4-t3)), file=sys.stderr) 131 | 132 | 133 | # 134 | # Copyright (c) 2013, Novartis Institutes for BioMedical Research Inc. 135 | # All rights reserved. 136 | # 137 | # Redistribution and use in source and binary forms, with or without 138 | # modification, are permitted provided that the following conditions are 139 | # met: 140 | # 141 | # * Redistributions of source code must retain the above copyright 142 | # notice, this list of conditions and the following disclaimer. 143 | # * Redistributions in binary form must reproduce the above 144 | # copyright notice, this list of conditions and the following 145 | # disclaimer in the documentation and/or other materials provided 146 | # with the distribution. 147 | # * Neither the name of Novartis Institutes for BioMedical Research Inc. 148 | # nor the names of its contributors may be used to endorse or promote 149 | # products derived from this software without specific prior written permission. 150 | # 151 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 152 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 153 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 154 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 155 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 156 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 157 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 158 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 159 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 160 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 161 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 162 | # 163 | -------------------------------------------------------------------------------- /utils/bonds_dict.txt: -------------------------------------------------------------------------------- 1 | ES_Index_Bond Frequency BondIs 2 | 6_8 170415 1 3 | 6_10 1273 1 4 | 6_12 187433 1 5 | 6_14 833 1 6 | 6_15 64778 1 7 | 6_16 183041 1 8 | 6_18 192262 1 9 | 6_23 15264 1 10 | 6_27 757 1 11 | 6_29 117264 1 12 | 6_31 36052 1 13 | 6_32 6610 1 14 | 6_35 223121 1 15 | 6_49 8277 1 16 | 6_51 748 1 17 | 6_52 15621 1 18 | 7_10 9314 1 19 | 7_13 44 0 20 | 7_15 3959 1 21 | 7_27 14 0 22 | 8_6 170415 1 23 | 8_8 1310807 1 24 | 8_10 32616 1 25 | 8_12 849593 1 26 | 8_14 10055 1 27 | 8_15 215273 1 28 | 8_16 483397 1 29 | 8_18 184198 1 30 | 8_20 15403 1 31 | 8_23 215804 1 32 | 8_27 12794 1 33 | 8_29 769017 1 34 | 8_30 165 1 35 | 8_31 95020 1 36 | 8_32 8153 1 37 | 8_33 40523 1 38 | 8_35 272495 1 39 | 8_37 3004 1 40 | 8_47 2117 1 41 | 8_49 43073 1 42 | 8_51 2083 1 43 | 8_52 12271 1 44 | 9_14 3493 1 45 | 10_6 1273 1 46 | 10_7 9314 1 47 | 10_8 32616 1 48 | 10_10 47655 1 49 | 10_12 16023 1 50 | 10_13 83 1 51 | 10_14 672 1 52 | 10_15 88653 1 53 | 10_16 71652 1 54 | 10_18 5917 1 55 | 10_20 29 0 56 | 10_22 53 1 57 | 10_23 2707 1 58 | 10_27 19346 1 59 | 10_29 3998 1 60 | 10_30 490 1 61 | 10_31 592 1 62 | 10_32 13 0 63 | 10_33 25 0 64 | 10_34 5023 1 65 | 10_35 1056 1 66 | 10_37 45 0 67 | 10_47 1 0 68 | 10_48 27 0 69 | 10_49 574 1 70 | 10_51 40 0 71 | 10_52 1128 1 72 | 10_53 84 1 73 | 10_69 134 1 74 | 10_74 136 1 75 | 11_11 3049536 1 76 | 11_15 18995 1 77 | 11_16 4172658 1 78 | 11_17 580818 1 79 | 11_24 40605 1 80 | 11_28 332759 1 81 | 11_31 128507 1 82 | 11_36 17051 1 83 | 11_50 38547 1 84 | 12_6 187433 1 85 | 12_8 849593 1 86 | 12_10 16023 1 87 | 12_12 199622 1 88 | 12_14 4415 1 89 | 12_15 208364 1 90 | 12_16 179303 1 91 | 12_18 85384 1 92 | 12_20 21375 1 93 | 12_23 160946 1 94 | 12_27 7058 1 95 | 12_29 135290 1 96 | 12_30 207 1 97 | 12_31 32570 1 98 | 12_32 1212 1 99 | 12_33 86746 1 100 | 12_35 123777 1 101 | 12_37 7086 1 102 | 12_47 617 1 103 | 12_49 13296 1 104 | 12_51 279 1 105 | 12_52 3696 1 106 | 13_7 44 0 107 | 13_10 83 1 108 | 13_13 3 0 109 | 13_15 24 0 110 | 13_27 554 1 111 | 13_34 36 0 112 | 13_48 512 1 113 | 14_6 833 1 114 | 14_8 10055 1 115 | 14_9 3493 1 116 | 14_10 672 1 117 | 14_12 4415 1 118 | 14_14 9224 1 119 | 14_15 6570 1 120 | 14_16 40711 1 121 | 14_18 3387 1 122 | 14_20 1 0 123 | 14_23 709 1 124 | 14_25 47171 1 125 | 14_27 815 1 126 | 14_29 218 1 127 | 14_31 35 0 128 | 14_33 1 0 129 | 14_35 28 0 130 | 14_49 198 1 131 | 14_52 4 0 132 | 14_53 3 0 133 | 14_69 3 0 134 | 14_74 72 1 135 | 15_6 64778 1 136 | 15_7 3959 1 137 | 15_8 215273 1 138 | 15_10 88653 1 139 | 15_11 18995 1 140 | 15_12 208364 1 141 | 15_13 24 0 142 | 15_14 6570 1 143 | 15_15 120120 1 144 | 15_16 402200 1 145 | 15_17 43165 1 146 | 15_18 51595 1 147 | 15_20 50123 1 148 | 15_22 19492 1 149 | 15_23 439101 1 150 | 15_24 33787 1 151 | 15_26 7 0 152 | 15_27 76137 1 153 | 15_28 4653 1 154 | 15_29 222018 1 155 | 15_30 918 1 156 | 15_31 80539 1 157 | 15_32 26 0 158 | 15_33 99968 1 159 | 15_34 921315 1 160 | 15_35 125884 1 161 | 15_36 11135 1 162 | 15_37 563 1 163 | 15_47 2835 1 164 | 15_48 20103 1 165 | 15_49 23542 1 166 | 15_50 3970 1 167 | 15_51 73 1 168 | 15_52 843 1 169 | 15_53 1717 1 170 | 15_69 1168 1 171 | 15_74 160 1 172 | 16_6 183041 1 173 | 16_8 483397 1 174 | 16_10 71652 1 175 | 16_11 4172658 1 176 | 16_12 179303 1 177 | 16_14 40711 1 178 | 16_15 402200 1 179 | 16_16 1013057 1 180 | 16_17 240907 1 181 | 16_18 112928 1 182 | 16_20 58086 1 183 | 16_23 308718 1 184 | 16_24 63233 1 185 | 16_26 13 0 186 | 16_27 14832 1 187 | 16_28 635753 1 188 | 16_29 143564 1 189 | 16_30 37005 1 190 | 16_31 180057 1 191 | 16_32 291 1 192 | 16_33 96842 1 193 | 16_35 396564 1 194 | 16_36 70848 1 195 | 16_37 128699 1 196 | 16_47 2078 1 197 | 16_49 46122 1 198 | 16_50 97332 1 199 | 16_51 2672 1 200 | 16_52 76295 1 201 | 16_53 203401 1 202 | 16_69 36869 1 203 | 16_74 6502 1 204 | 17_11 580818 1 205 | 17_15 43165 1 206 | 17_16 240907 1 207 | 17_17 349879 1 208 | 17_24 71072 1 209 | 17_28 208321 1 210 | 17_31 129720 1 211 | 17_36 29896 1 212 | 17_50 30281 1 213 | 18_6 192262 1 214 | 18_8 184198 1 215 | 18_10 5917 1 216 | 18_12 85384 1 217 | 18_14 3387 1 218 | 18_15 51595 1 219 | 18_16 112928 1 220 | 18_18 15765 1 221 | 18_20 2420 1 222 | 18_23 17363 1 223 | 18_27 3129 1 224 | 18_29 12953 1 225 | 18_30 45 0 226 | 18_31 1549 1 227 | 18_32 42 0 228 | 18_33 20120 1 229 | 18_35 44016 1 230 | 18_37 190141 1 231 | 18_47 56 1 232 | 18_49 3955 1 233 | 18_51 150 1 234 | 18_52 2235 1 235 | 20_8 15403 1 236 | 20_10 29 0 237 | 20_12 21375 1 238 | 20_14 1 0 239 | 20_15 50123 1 240 | 20_16 58086 1 241 | 20_18 2420 1 242 | 20_23 1391 1 243 | 20_27 197 1 244 | 20_29 386 1 245 | 20_31 627 1 246 | 20_32 3 0 247 | 20_35 67 1 248 | 20_49 21 0 249 | 20_51 24 0 250 | 20_52 9682 1 251 | 22_10 53 1 252 | 22_15 19492 1 253 | 22_27 6 0 254 | 22_51 2 0 255 | 23_6 15264 1 256 | 23_8 215804 1 257 | 23_10 2707 1 258 | 23_12 160946 1 259 | 23_14 709 1 260 | 23_15 439101 1 261 | 23_16 308718 1 262 | 23_18 17363 1 263 | 23_20 1391 1 264 | 23_23 3469 1 265 | 23_27 17570 1 266 | 23_29 4412 1 267 | 23_30 246 1 268 | 23_31 1354 1 269 | 23_32 2 0 270 | 23_33 10769 1 271 | 23_35 1397 1 272 | 23_47 1 0 273 | 23_49 102 1 274 | 23_51 103 1 275 | 23_52 37273 1 276 | 23_53 3 0 277 | 24_11 40605 1 278 | 24_15 33787 1 279 | 24_16 63233 1 280 | 24_17 71072 1 281 | 24_24 192 1 282 | 24_28 23612 1 283 | 24_31 1379 1 284 | 24_36 42 0 285 | 24_50 162 1 286 | 25_14 47171 1 287 | 26_15 7 0 288 | 26_16 13 0 289 | 27_6 757 1 290 | 27_7 14 0 291 | 27_8 12794 1 292 | 27_10 19346 1 293 | 27_12 7058 1 294 | 27_13 554 1 295 | 27_14 815 1 296 | 27_15 76137 1 297 | 27_16 14832 1 298 | 27_18 3129 1 299 | 27_20 197 1 300 | 27_22 6 0 301 | 27_23 17570 1 302 | 27_27 4089 1 303 | 27_29 9270 1 304 | 27_30 544 1 305 | 27_31 1357 1 306 | 27_32 3 0 307 | 27_33 3804 1 308 | 27_34 620 1 309 | 27_35 10189 1 310 | 27_48 15 0 311 | 27_49 45 0 312 | 27_51 163 1 313 | 27_52 2355 1 314 | 28_11 332759 1 315 | 28_15 4653 1 316 | 28_16 635753 1 317 | 28_17 208321 1 318 | 28_24 23612 1 319 | 28_28 51906 1 320 | 28_31 78504 1 321 | 28_36 18132 1 322 | 28_50 4760 1 323 | 29_6 117264 1 324 | 29_8 769017 1 325 | 29_10 3998 1 326 | 29_12 135290 1 327 | 29_14 218 1 328 | 29_15 222018 1 329 | 29_16 143564 1 330 | 29_18 12953 1 331 | 29_20 386 1 332 | 29_23 4412 1 333 | 29_27 9270 1 334 | 29_29 1159 1 335 | 29_30 26 0 336 | 29_31 375 1 337 | 29_32 4 0 338 | 29_33 2255 1 339 | 29_35 2073 1 340 | 29_47 4 0 341 | 29_49 634 1 342 | 29_51 103 1 343 | 29_52 24949 1 344 | 29_53 102 1 345 | 29_69 3 0 346 | 29_74 3 0 347 | 30_8 165 1 348 | 30_10 490 1 349 | 30_12 207 1 350 | 30_15 918 1 351 | 30_16 37005 1 352 | 30_18 45 0 353 | 30_23 246 1 354 | 30_27 544 1 355 | 30_29 26 0 356 | 30_31 3 0 357 | 30_34 81390 1 358 | 30_35 1045 1 359 | 31_6 36052 1 360 | 31_8 95020 1 361 | 31_10 592 1 362 | 31_11 128507 1 363 | 31_12 32570 1 364 | 31_14 35 0 365 | 31_15 80539 1 366 | 31_16 180057 1 367 | 31_17 129720 1 368 | 31_18 1549 1 369 | 31_20 627 1 370 | 31_23 1354 1 371 | 31_24 1379 1 372 | 31_27 1357 1 373 | 31_28 78504 1 374 | 31_29 375 1 375 | 31_30 3 0 376 | 31_31 1311 1 377 | 31_33 993 1 378 | 31_34 4600 1 379 | 31_35 513 1 380 | 31_36 1292 1 381 | 31_37 1 0 382 | 31_49 40 0 383 | 31_50 1156 1 384 | 31_51 3 0 385 | 31_52 2266 1 386 | 31_53 1 0 387 | 31_69 1 0 388 | 32_6 6610 1 389 | 32_8 8153 1 390 | 32_10 13 0 391 | 32_12 1212 1 392 | 32_15 26 0 393 | 32_16 291 1 394 | 32_18 42 0 395 | 32_20 3 0 396 | 32_23 2 0 397 | 32_27 3 0 398 | 32_29 4 0 399 | 32_34 258 1 400 | 32_35 7 0 401 | 33_8 40523 1 402 | 33_10 25 0 403 | 33_12 86746 1 404 | 33_14 1 0 405 | 33_15 99968 1 406 | 33_16 96842 1 407 | 33_18 20120 1 408 | 33_23 10769 1 409 | 33_27 3804 1 410 | 33_29 2255 1 411 | 33_31 993 1 412 | 33_35 30 0 413 | 33_49 3 0 414 | 33_51 8 0 415 | 33_52 2095 1 416 | 34_10 5023 1 417 | 34_13 36 0 418 | 34_15 921315 1 419 | 34_27 620 1 420 | 34_30 81390 1 421 | 34_31 4600 1 422 | 34_32 258 1 423 | 34_51 3219 1 424 | 34_52 194404 1 425 | 35_6 223121 1 426 | 35_8 272495 1 427 | 35_10 1056 1 428 | 35_12 123777 1 429 | 35_14 28 0 430 | 35_15 125884 1 431 | 35_16 396564 1 432 | 35_18 44016 1 433 | 35_20 67 1 434 | 35_23 1397 1 435 | 35_27 10189 1 436 | 35_29 2073 1 437 | 35_30 1045 1 438 | 35_31 513 1 439 | 35_32 7 0 440 | 35_33 30 0 441 | 35_35 1446 1 442 | 35_37 2 0 443 | 35_49 97 1 444 | 35_51 126 1 445 | 35_52 3644 1 446 | 35_53 2 0 447 | 36_11 17051 1 448 | 36_15 11135 1 449 | 36_16 70848 1 450 | 36_17 29896 1 451 | 36_24 42 0 452 | 36_28 18132 1 453 | 36_31 1292 1 454 | 36_36 2 0 455 | 37_8 3004 1 456 | 37_10 45 0 457 | 37_12 7086 1 458 | 37_15 563 1 459 | 37_16 128699 1 460 | 37_18 190141 1 461 | 37_31 1 0 462 | 37_35 2 0 463 | 47_8 2117 1 464 | 47_10 1 0 465 | 47_12 617 1 466 | 47_15 2835 1 467 | 47_16 2078 1 468 | 47_18 56 1 469 | 47_23 1 0 470 | 47_29 4 0 471 | 47_49 1 0 472 | 48_10 27 0 473 | 48_13 512 1 474 | 48_15 20103 1 475 | 48_27 15 0 476 | 49_6 8277 1 477 | 49_8 43073 1 478 | 49_10 574 1 479 | 49_12 13296 1 480 | 49_14 198 1 481 | 49_15 23542 1 482 | 49_16 46122 1 483 | 49_18 3955 1 484 | 49_20 21 0 485 | 49_23 102 1 486 | 49_27 45 0 487 | 49_29 634 1 488 | 49_31 40 0 489 | 49_33 3 0 490 | 49_35 97 1 491 | 49_47 1 0 492 | 49_49 1566 1 493 | 49_51 32 0 494 | 49_52 39 0 495 | 49_53 1 0 496 | 49_74 1 0 497 | 50_11 38547 1 498 | 50_15 3970 1 499 | 50_16 97332 1 500 | 50_17 30281 1 501 | 50_24 162 1 502 | 50_28 4760 1 503 | 50_31 1156 1 504 | 50_50 117 1 505 | 51_6 748 1 506 | 51_8 2083 1 507 | 51_10 40 0 508 | 51_12 279 1 509 | 51_15 73 1 510 | 51_16 2672 1 511 | 51_18 150 1 512 | 51_20 24 0 513 | 51_22 2 0 514 | 51_23 103 1 515 | 51_27 163 1 516 | 51_29 103 1 517 | 51_31 3 0 518 | 51_33 8 0 519 | 51_34 3219 1 520 | 51_35 126 1 521 | 51_49 32 0 522 | 52_6 15621 1 523 | 52_8 12271 1 524 | 52_10 1128 1 525 | 52_12 3696 1 526 | 52_14 4 0 527 | 52_15 843 1 528 | 52_16 76295 1 529 | 52_18 2235 1 530 | 52_20 9682 1 531 | 52_23 37273 1 532 | 52_27 2355 1 533 | 52_29 24949 1 534 | 52_31 2266 1 535 | 52_33 2095 1 536 | 52_34 194404 1 537 | 52_35 3644 1 538 | 52_49 39 0 539 | 52_52 1 0 540 | 53_10 84 1 541 | 53_14 3 0 542 | 53_15 1717 1 543 | 53_16 203401 1 544 | 53_23 3 0 545 | 53_29 102 1 546 | 53_31 1 0 547 | 53_35 2 0 548 | 53_49 1 0 549 | 69_10 134 1 550 | 69_14 3 0 551 | 69_15 1168 1 552 | 69_16 36869 1 553 | 69_29 3 0 554 | 69_31 1 0 555 | 74_10 136 1 556 | 74_14 72 1 557 | 74_15 160 1 558 | 74_16 6502 1 559 | 74_29 3 0 560 | 74_49 1 0 561 | -------------------------------------------------------------------------------- /utils/add_node_type_zinc.py: -------------------------------------------------------------------------------- 1 | from subprocess import Popen, PIPE 2 | from math import * 3 | import random 4 | import numpy as np 5 | from copy import deepcopy 6 | #from types import IntType, ListType, TupleType, StringTypes 7 | import itertools 8 | import time 9 | import math 10 | import argparse 11 | import subprocess 12 | from load_model import loaded_model 13 | from keras.preprocessing import sequence 14 | from rdkit import Chem 15 | from rdkit.Chem import Draw 16 | from rdkit.Chem import Descriptors 17 | import sys 18 | from rdkit.Chem import AllChem 19 | from rdkit.Chem import MolFromSmiles, MolToSmiles 20 | from rdkit.Chem import rdMolDescriptors 21 | from make_smile import zinc_data_with_bracket_original, zinc_processed_with_bracket 22 | #from rdock_test import rdock_score 23 | from rdock_test_MP import rdock_score 24 | import sascorer 25 | import pickle 26 | import gzip 27 | import networkx as nx 28 | from rdkit.Chem import rdmolops 29 | import SDF2xyzV2 30 | from filter import HashimotoFilter, Neutralizer 31 | import shutil,os 32 | 33 | #import tensorflow as tf 34 | #os.environ["CUDA_VISIBLE_DEVICES"]="0" #"0,1,2,3," 35 | #from keras import backend as K 36 | #config = tf.ConfigProto() 37 | #config.gpu_options.allow_growth = True 38 | #sess = tf.Session(config = config) 39 | #K.set_session(sess) 40 | 41 | 42 | smiles_max_len = 82 #MW250:60, MW300:70 43 | 44 | def expanded_node(model,state,val,loop_num): 45 | 46 | all_nodes=[] 47 | 48 | end="\n" 49 | 50 | #position=[] 51 | position=[] 52 | position.extend(state) 53 | total_generated=[] 54 | new_compound=[] 55 | get_int_old=[] 56 | for j in range(len(position)): 57 | get_int_old.append(val.index(position[j])) 58 | 59 | get_int=get_int_old 60 | 61 | x=np.reshape(get_int,(1,len(get_int))) 62 | #x_pad= sequence.pad_sequences(x, maxlen=82, dtype='int32', 63 | # padding='post', truncating='pre', value=0.) 64 | x_pad= sequence.pad_sequences(x, maxlen=smiles_max_len, dtype='int32', 65 | padding='post', truncating='pre', value=0.) 66 | 67 | for i in range(loop_num): 68 | predictions=model.predict(x_pad) 69 | #print "shape of RNN",predictions.shape 70 | preds=np.asarray(predictions[0][len(get_int)-1]).astype('float64') 71 | preds = np.log(preds) / 1.0 72 | preds = np.exp(preds) / np.sum(np.exp(preds)) 73 | next_probas = np.random.multinomial(1, preds, 1) 74 | next_int=np.argmax(next_probas) 75 | #get_int.append(next_int) 76 | all_nodes.append(next_int) 77 | 78 | print(all_nodes) 79 | all_nodes=list(set(all_nodes)) 80 | 81 | print(all_nodes) 82 | 83 | 84 | 85 | 86 | 87 | #total_generated.append(get_int) 88 | #all_posible.extend(total_generated) 89 | 90 | 91 | 92 | 93 | 94 | 95 | return all_nodes 96 | 97 | 98 | def node_to_add(all_nodes,val): 99 | added_nodes=[] 100 | for i in range(len(all_nodes)): 101 | added_nodes.append(val[all_nodes[i]]) 102 | 103 | print(added_nodes) 104 | 105 | return added_nodes 106 | 107 | 108 | 109 | def chem_kn_simulation(model,state,val,added_nodes): 110 | all_posible=[] 111 | 112 | end="\n" 113 | #val2=['C', '(', ')', 'c', '1', '2', 'o', '=', 'O', 'N', '3', 'F', '[C@@H]', 'n', '-', '#', 'S', 'Cl', '[O-]', '[C@H]', '[NH+]', '[C@]', 's', 'Br', '/', '[nH]', '[NH3+]', '4', '[NH2+]', '[C@@]', '[N+]', '[nH+]', '\\', '[S@]', '5', '[N-]', '[n+]', '[S@@]', '[S-]', '6', '7', 'I', '[n-]', 'P', '[OH+]', '[NH-]', '[P@@H]', '[P@@]', '[PH2]', '[P@]', '[P+]', '[S+]', '[o+]', '[CH2-]', '[CH-]', '[SH+]', '[O+]', '[s+]', '[PH+]', '[PH]', '8', '[S@@+]'] 114 | for i in range(len(added_nodes)): 115 | #position=[] 116 | position=[] 117 | position.extend(state) 118 | position.append(added_nodes[i]) 119 | #print state 120 | #print position 121 | #print len(val2) 122 | total_generated=[] 123 | new_compound=[] 124 | get_int_old=[] 125 | for j in range(len(position)): 126 | get_int_old.append(val.index(position[j])) 127 | 128 | get_int=get_int_old 129 | 130 | x=np.reshape(get_int,(1,len(get_int))) 131 | x_pad= sequence.pad_sequences(x, maxlen=smiles_max_len, dtype='int32', 132 | padding='post', truncating='pre', value=0.) 133 | while not get_int[-1] == val.index(end): 134 | predictions=model.predict(x_pad) 135 | #print "shape of RNN",predictions.shape 136 | preds=np.asarray(predictions[0][len(get_int)-1]).astype('float64') 137 | preds = np.log(preds) / 1.0 138 | preds = np.exp(preds) / np.sum(np.exp(preds)) 139 | next_probas = np.random.multinomial(1, preds, 1) 140 | #print predictions[0][len(get_int)-1] 141 | #print "next probas",next_probas 142 | #next_int=np.argmax(predictions[0][len(get_int)-1]) 143 | next_int=np.argmax(next_probas) 144 | a=predictions[0][len(get_int)-1] 145 | next_int_test=sorted(range(len(a)), key=lambda i: a[i])[-10:] 146 | get_int.append(next_int) 147 | x=np.reshape(get_int,(1,len(get_int))) 148 | x_pad = sequence.pad_sequences(x, maxlen=smiles_max_len, dtype='int32', 149 | padding='post', truncating='pre', value=0.) 150 | if len(get_int)>smiles_max_len: 151 | break 152 | total_generated.append(get_int) 153 | all_posible.extend(total_generated) 154 | 155 | 156 | return all_posible 157 | 158 | 159 | 160 | def predict_smile(all_posible,val): 161 | 162 | 163 | new_compound=[] 164 | for i in range(len(all_posible)): 165 | total_generated=all_posible[i] 166 | 167 | generate_smile=[] 168 | 169 | for j in range(len(total_generated)-1): 170 | generate_smile.append(val[total_generated[j]]) 171 | generate_smile.remove("&") 172 | new_compound.append(generate_smile) 173 | 174 | return new_compound 175 | 176 | 177 | def make_input_smile(generate_smile): 178 | new_compound=[] 179 | for i in range(len(generate_smile)): 180 | middle=[] 181 | for j in range(len(generate_smile[i])): 182 | middle.append(generate_smile[i][j]) 183 | com=''.join(middle) 184 | new_compound.append(com) 185 | #print new_compound 186 | #print len(new_compound) 187 | 188 | return new_compound 189 | 190 | 191 | 192 | def check_node_type(new_compound, score_type, generated_dict, sa_threshold = 10, rule = 0, radical = False, docking_num = 10, target_dir = 'cdk2', hashimoto_filter=False, dict_id=1, trial = 1): 193 | node_index=[] 194 | valid_compound=[] 195 | all_smile=[] 196 | distance=[] 197 | 198 | score=[] 199 | f_list = open('list_docking_pose_%s.txt' % trial, 'a') 200 | for i in range(len(new_compound)): 201 | #check dictionary 202 | print('check dictionary', 'comp:', new_compound[i], 'check:', new_compound[i] in generated_dict) 203 | if new_compound[i] in generated_dict: 204 | node_index.append(i) 205 | valid_compound.append(new_compound[i]) 206 | score.append(generated_dict[new_compound[i]]) 207 | print('duplication!!') 208 | continue 209 | 210 | ko = Chem.MolFromSmiles(new_compound[i]) 211 | if ko!=None: 212 | # check hashimoto_filter 213 | if hashimoto_filter: 214 | hashifilter = HashimotoFilter() 215 | hf,_ = hashifilter.filter([new_compound[i]]) 216 | print('hashimoto filter check is', hf) 217 | if hf[0] == 0: 218 | continue 219 | 220 | #check SA_score 221 | SA_score = -sascorer.calculateScore(MolFromSmiles(new_compound[i])) 222 | 223 | 224 | #if new_compound[i][-1] == '\n': 225 | # continue 226 | 227 | print('SA_score', SA_score) 228 | if sa_threshold < -SA_score: 229 | #node_index.append(i) 230 | #valid_compound.append(new_compound[i]) 231 | #score.append([10**10, 10**10]) 232 | continue 233 | 234 | #check radical 235 | if radical: 236 | #koh = Chem.AddHs(ko) ## get ValueError: Sanitization error: Explicit valence for atom # 3 C, 6, is 237 | try: 238 | koh = Chem.AddHs(ko) 239 | except ValueError: 240 | continue 241 | 242 | fw = Chem.SDWriter('radical_check.sdf') 243 | try: 244 | fw.write(koh) 245 | fw.close() 246 | except ValueError: 247 | continue 248 | Mol_atom, Mol_CartX, Mol_CartY, Mol_CartZ,TotalCharge, SpinMulti = SDF2xyzV2.Read_sdf('radical_check.sdf') 249 | print('radical check', SpinMulti) 250 | if SpinMulti == 2: #2:open 251 | continue 252 | 253 | #check Rule of Five 254 | weight = round(rdMolDescriptors._CalcMolWt(ko), 2) 255 | logp = Descriptors.MolLogP(ko) 256 | donor = rdMolDescriptors.CalcNumLipinskiHBD(ko) 257 | acceptor = rdMolDescriptors.CalcNumLipinskiHBA(ko) 258 | rotbonds = rdMolDescriptors.CalcNumRotatableBonds(ko) 259 | if rule == 1: 260 | if weight > 500 or logp > 5 or donor > 5 or acceptor > 10: 261 | #node_index.append(i) 262 | #valid_compound.append(new_compound[i]) 263 | #score.append([10**10, 10**10]) 264 | continue 265 | if rule == 2: 266 | if weight > 300 or logp > 3 or donor > 3 or acceptor > 3 or rotbonds > 3: 267 | #node_index.append(i) 268 | #valid_compound.append(new_compound[i]) 269 | #score.append([10**10, 10**10]) 270 | continue 271 | 272 | cycle_list = nx.cycle_basis(nx.Graph(rdmolops.GetAdjacencyMatrix(MolFromSmiles(new_compound[i])))) 273 | if len(cycle_list) == 0: 274 | cycle_length =0 275 | else: 276 | cycle_length = max([ len(j) for j in cycle_list ]) 277 | if cycle_length <= 6: 278 | cycle_length = 0 279 | if cycle_length==0: 280 | m=rdock_score(new_compound[i], score_type, target_dir, docking_num=docking_num) 281 | if m[0]<10**10: 282 | node_index.append(i) 283 | valid_compound.append(new_compound[i]) 284 | score.append(m[:2]) 285 | #add dictionary 286 | generated_dict[new_compound[i]] = m[:2] 287 | 288 | #copy best docking result 289 | best_docking_id = m[2] 290 | docking_result_file = 'rdock_out_' 291 | compound_id = i 292 | # creat the directory for best docking pose. 293 | out_dir = 'mol_3D_pose_trial'+ str(trial) 294 | if not os.path.isdir(out_dir): 295 | os.mkdir(out_dir) 296 | f_list.write('pose_'+str(dict_id)+'_'+str(compound_id)+'_'+str(best_docking_id)+','+new_compound[i]) 297 | f_list.write('\n') 298 | shutil.copyfile(docking_result_file+str(best_docking_id)+'.sd', out_dir + '/pose_'+ str(dict_id)+'_'+str(compound_id)+'_'+str(best_docking_id)+'.sd') 299 | f_list.close() 300 | return node_index,score,valid_compound, generated_dict 301 | -------------------------------------------------------------------------------- /train_RNN/train_RNN.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import itertools 3 | import operator 4 | import numpy as np 5 | import nltk 6 | import os 7 | import sys 8 | from datetime import datetime 9 | from keras.models import Sequential 10 | from keras.layers import Dense, Activation,TimeDistributed 11 | from keras.layers import LSTM,GRU 12 | from keras.layers.embeddings import Embedding 13 | from keras.optimizers import RMSprop, Adam 14 | from keras.utils.data_utils import get_file 15 | from keras.layers import Dropout 16 | import numpy as np 17 | import random 18 | import sys 19 | from keras.utils.np_utils import to_categorical 20 | from keras.preprocessing import sequence 21 | from keras.models import model_from_json 22 | from keras.callbacks import EarlyStopping, ModelCheckpoint 23 | #from make_smile import ziprocess_organic,process_zinc_data 24 | from make_smile import zinc_data_with_bracket_original,zinc_processed_with_bracket 25 | 26 | from keras.layers import Conv1D, MaxPooling1D 27 | #from combine_bond_atom import organic, process_organic,bond_atom 28 | 29 | import yaml 30 | 31 | import matplotlib as mpl 32 | mpl.use('Agg') ## fix the "Invalid DISPLAY variable" Error 33 | import matplotlib.pyplot as plt 34 | 35 | def load_data(): 36 | 37 | sen_space=[] 38 | #f = open('/Users/yang/smiles.csv', 'rb') 39 | f = open(dataset, 'rb') 40 | reader = csv.reader(f) 41 | for row in reader: 42 | #word_space[row].append(reader[row]) 43 | #print word_sapce 44 | sen_space.append(row) 45 | #print sen_space 46 | f.close() 47 | 48 | element_table=["Cu","Ti","Zr","Ga","Ge","As","Se","Br","Si","Zn","Cl","Be","Ca","Na","Sr","Ir","Li","Rb","Cs","Fr","Be","Mg", 49 | "Ca","Sr","Ba","Ra","Sc","La","Ac","Ti","Zr","Nb","Ta","Db","Cr","Mo","Sg","Mn","Tc","Re","Bh","Fe","Ru","Os","Hs","Co","Rh", 50 | "Ir","Mt","Ni","Pd","Pt","Ds","Cu","Ag","Au","Rg","Zn","Cd","Hg","Cn","Al","Ga","In","Tl","Nh","Si","Ge","Sn","Pb","Fl", 51 | "As","Sb","Bi","Mc","Se","Te","Po","Lv","Cl","Br","At","Ts","He","Ne","Ar","Kr","Xe","Rn","Og"] 52 | #print sen_space 53 | word1=sen_space[0] 54 | word_space=list(word1[0]) 55 | end="\n" 56 | #start="st" 57 | #word_space.insert(0,end) 58 | word_space.append(end) 59 | #print word_space 60 | #print len(sen_space) 61 | all_smile=[] 62 | #print word_space 63 | #for i in range(len(all_smile)): 64 | 65 | for i in range(len(sen_space)): 66 | word1=sen_space[i] 67 | word_space=list(word1[0]) 68 | word=[] 69 | #word_space.insert(0,end) 70 | j=0 71 | while j= 70: 145 | re= -1.0 146 | while node != None: 147 | node.Update(re) 148 | node = node.parentNode 149 | continue 150 | if node.position == '\n': 151 | re = -1.0 152 | while node != None: 153 | node.Update(re) 154 | node = node.parentNode 155 | continue 156 | 157 | """------------------------------------------------------------------""" 158 | 159 | """expansion step""" 160 | expanded=expanded_node(model,state.position,val,loop_num_nodeExpansion) 161 | 162 | new_compound = [] 163 | nodeadded = [] 164 | for n in range(simulation_num): 165 | nodeadded_tmp = node_to_add(expanded, val) 166 | all_posible=chem_kn_simulation(model,state.position,val,nodeadded_tmp) 167 | generate_smile=predict_smile(all_posible,val) 168 | new_compound_tmp = make_input_smile(generate_smile) 169 | nodeadded.extend(nodeadded_tmp) 170 | new_compound.extend(new_compound_tmp) 171 | print('nodeadded', nodeadded) 172 | print('new compound', new_compound) 173 | print('generated_dict', generated_dict) 174 | print('dict_id', dict_id) 175 | for comp in new_compound: 176 | print('lastcomp', comp[-1], ' ... ',comp[-1] == '\n') 177 | node_index,rdock_score,valid_smile,generated_dict = check_node_type(new_compound, score_type, generated_dict, sa_threshold = sa_threshold, rule = rule5, radical = radical_check, docking_num = docking_num, target_dir = target_dir, hashimoto_filter = hashimoto_filter, dict_id = dict_id, trial = trial) 178 | valid_compound.extend(valid_smile) 179 | score_distribution.extend(rdock_score) 180 | 181 | print('node', node_index, 'rdock_score', rdock_score, 'valid', valid_smile) 182 | #out_f = open(output_dir, 'a') 183 | #out_f.write(str(valid_smile) + ', '+ str(rdock_score)+', '+str(min_score)+', '+str(len(state.position))) 184 | out_f.write(str(valid_smile) + ', '+ str(rdock_score)+', '+str(min_score)+', '+str(len(state.position))+', '+str(time.time()-start_time)) 185 | out_f.write('\n') 186 | out_f.flush() 187 | #out_f.close() 188 | dict_id += 1 189 | 190 | if len(node_index)==0: 191 | re=-1.0 192 | while node != None: 193 | node.Update(re) 194 | node = node.parentNode 195 | else: 196 | re_list = [] 197 | #atom_list = [nodeadded[m] for m in node_index] 198 | atom_checked = [] 199 | for i in range(len(node_index)): 200 | m=node_index[i] 201 | atom = nodeadded[m] 202 | 203 | if atom not in atom_checked: 204 | node.Addnode(atom, state) 205 | node_pool.append(node.childNodes[len(atom_checked)]) 206 | depth.append(len(state.position)) 207 | atom_checked.append(atom) 208 | else: 209 | node_pool.append(node.childNodes[atom_checked.index(atom)]) 210 | 211 | #node.Addnode(nodeadded[m],state) 212 | #node.Addnode(nodeadded[m],state) 213 | #print valid_smile[i], 'node m', m, 'nodeadded[m]', nodeadded[m], 'node.childNodes[i]', node.childNodes[i] 214 | for child in node.childNodes: 215 | print(child.position) 216 | print('\n') 217 | #node_pool.append(node.childNodes[i]) 218 | #depth.append(len(state.position)) 219 | 220 | score_index = 0 if score_type == 'SCORE' else 1 221 | 222 | print("current minmum score",min_score) 223 | if rdock_score[i][score_index]<=min_score: 224 | min_score_distribution.append(rdock_score[i][score_index]) 225 | min_score=rdock_score[i][score_index] 226 | else: 227 | min_score_distribution.append(min_score) 228 | """simulation""" 229 | 230 | if atom == '\n': 231 | re = -1 232 | else: 233 | #re=(- (rdock_score[i][score_index] + 20)*0.1)/(1+abs(rdock_score[i][score_index] + 20)*0.1) 234 | re=(- (rdock_score[i][score_index] - base_rdock_score)*0.1)/(1+abs(rdock_score[i][score_index] -base_rdock_score)*0.1) 235 | #### pj16 reward fuction: 236 | #base_rdock_score = -20 237 | #reward = (np.tanh(0.1*(abs(rdock_score[max_index])+base_rdock_score)) + 1)/2 238 | re_list.append(re) 239 | print('atom', atom, 're_list', re_list) 240 | #re=(- (rdock_score[i]/100))/(1+abs(rdock_score[i]/100)) 241 | """backpropation step""" 242 | 243 | for i in range(len(node_pool)): 244 | 245 | node=node_pool[i] 246 | while node != None: 247 | node.Update(re_list[i]) 248 | node = node.parentNode 249 | 250 | for child in node_pool: 251 | print(child.position, child.wins, child.visits) 252 | 253 | 254 | out_f.close() 255 | 256 | """check if found the desired compound""" 257 | 258 | #print "all valid compounds:",valid_compound 259 | #print "all active compounds:",desired_compound 260 | print("rdock_score",score_distribution) 261 | print("num valid_compound:",len(valid_compound)) 262 | print("valid compounds",valid_compound) 263 | print("depth",depth) 264 | print("min_score",min_score_distribution) 265 | 266 | return valid_compound 267 | 268 | 269 | def UCTchemical(): 270 | one_search_start_time=time.time() 271 | time_out=one_search_start_time+60*10 272 | state = chemical() 273 | best = MCTS(root = state,verbose = False) 274 | 275 | return best 276 | 277 | 278 | if __name__ == "__main__": 279 | # set parameter 280 | argvs = sys.argv 281 | 282 | """read yaml file for configuration""" 283 | f = open(str(argvs[1]), "r+") 284 | conf = yaml.load(f, Loader=yaml.SafeLoader) 285 | f.close() 286 | 287 | trial = conf.get('trial', 1) 288 | c_val = conf.get('c_val', 1.0) 289 | loop_num_nodeExpansion = conf.get('loop_num_nodeExpansion', 1000) 290 | target = conf.get('target', 'CDK2') 291 | target_dir = conf.get('target_path', './') 292 | hours = conf.get('hours', 1) 293 | score_type = conf.get('score_type', 'SCORE.INTER') # or 294 | docking_num = conf.get('docking_num', 10) 295 | sa_threshold = conf.get('sa_threshold', 3.5) #if SA > sa_threshold, score = 0. Default sa_threshold = 10 296 | #RO5: if a compound does not satisfy rule of 5, score = 0. 297 | rule5 = conf.get('rule5', 1) #0:none, 1: rule of 5, 2: rule of 3 298 | radical_check = conf.get('radical_check', True) 299 | simulation_num = conf.get('simulation_num', 3) 300 | hashimoto_filter = conf.get('hashimoto_filter', True) # or False, use/not use hashimoto filter 301 | base_rdock_score = conf.get('base_rdock_score', -20) 302 | model_name = conf.get('model_name', 'model') 303 | 304 | print('========== display configuration ==========') 305 | print('trial num is: ', trial) 306 | print('c_val: ', c_val) 307 | print('loop_num_nodeExpansion: ', loop_num_nodeExpansion) 308 | print('target: ', target) 309 | print('target_dir: ',target_dir) 310 | print('max run time: ',hours) 311 | print('score_type: ', score_type) 312 | print('docking_num: ',docking_num) 313 | print('sa_threshold: ',sa_threshold) 314 | print('model_name: ', model_name) 315 | print('base_rdock_score: ', base_rdock_score) 316 | print('simulation_num: ',simulation_num) 317 | print('hashimoto_filter: ', hashimoto_filter) 318 | """----------------------------------------------------------------------""" 319 | 320 | output_dir = 'result_'+target+'_C'+str(c_val)+'_trial'+str(trial)+'.txt' 321 | 322 | smile_old=zinc_data_with_bracket_original(SBMolGen_PATH + '/data/250k_rndm_zinc_drugs_clean.smi') 323 | val,smile=zinc_processed_with_bracket(smile_old) 324 | print('val is ', val) 325 | 326 | out_f = open(output_dir, 'w') 327 | out_f.write('#valid_smile, rdock_score, min_score, depth, used_time') 328 | out_f.write('\n') 329 | out_f.close() 330 | 331 | model=loaded_model(SBMolGen_PATH + '/RNN-model/'+ model_name) #WM300 not tested 332 | valid_compound=UCTchemical() 333 | -------------------------------------------------------------------------------- /utils/filter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from rdkit import Chem 4 | from rdkit.Chem import AllChem 5 | import pandas as pd 6 | from os import path 7 | 8 | 9 | class HashimotoFilter: 10 | 11 | neutralizer = None 12 | evaluater = None 13 | 14 | def __init__(self): 15 | self.neutralizer = Neutralizer() 16 | self.evaluater = Evaluater() 17 | 18 | def filter(self, smiles): 19 | ''' 20 | input list of str smiles 21 | return list of int 1: OK, 0: NG 22 | ''' 23 | results = [] 24 | mols = [] # debug用 25 | for smi in smiles: 26 | result = 0 27 | try: 28 | mol = Chem.MolFromSmiles(smi) # SMILES正当性チェック 29 | mol1 = self.neutralizer.NeutraliseCharges(mol) 30 | 31 | # 一度SMILESに直さないと、Evaluateで何故かエラーになる 32 | neutralized = mol1.GetProp('neutralized') 33 | mol1 = Chem.MolFromSmiles(Chem.MolToSmiles(mol1)) 34 | mol1.SetProp('neutralized', neutralized) 35 | 36 | mol2 = self.evaluater.Evaluate(mol1) 37 | if mol2 and int(mol2.GetProp('ErrorIs')) == 0: 38 | result = 1 39 | except: 40 | mol2 = mol 41 | results.append(result) 42 | mols.append(mol2) 43 | return (results, mols) 44 | 45 | 46 | class Neutralizer: 47 | 48 | reactions = None 49 | 50 | def __init__(self): 51 | patts= ( 52 | # Imidazoles 53 | ('[n+;H]','n'), 54 | # Amines 55 | ('[N+;!H0]','N'), 56 | # Carboxylic acids and alcohols 57 | ('[$([O-]);!$([O-][#7])]','O'), 58 | # Thiols 59 | ('[S-;X1]','S'), 60 | # Sulfonamides 61 | ('[$([N-;X2]S(=O)=O)]','N'), 62 | # Enamines 63 | ('[$([N-;X2][C,N]=C)]','N'), 64 | # Tetrazoles 65 | ('[n-]','[nH]'), 66 | # Sulfoxides 67 | ('[$([S-]=O)]','S'), 68 | # Amides 69 | ('[$([N-]C=O)]','N'), 70 | ) 71 | self.reactions = [(Chem.MolFromSmarts(x),Chem.MolFromSmiles(y,False)) for x,y in patts] 72 | 73 | 74 | def NeutraliseCharges(self, mol, reactions=None): 75 | replaced = False 76 | for i,(reactant, product) in enumerate(self.reactions): 77 | while mol.HasSubstructMatch(reactant): 78 | replaced = True 79 | rms = AllChem.ReplaceSubstructs(mol, reactant, product) 80 | mol = rms[0] 81 | mol.SetProp('neutralized', str(replaced)) 82 | return mol 83 | #if replaced: 84 | # return True 85 | #else: 86 | # return False 87 | 88 | class Evaluater: 89 | 90 | a_dict = {} 91 | b_dict = {} 92 | esPatterns = None 93 | 94 | _rawD = [ 95 | ('sLi', '[LiD1]-*'), 96 | ('ssBe', '[BeD2](-*)-*'), 97 | ('ssssBe', '[BeD4](-*)(-*)(-*)-*'), 98 | ('ssBH', '[BD2H](-*)-*'), 99 | ('sssB', '[BD3](-*)(-*)-*'), 100 | ('ssssB', '[BD4](-*)(-*)(-*)-*'), 101 | ('sCH3', '[CD1H3]-*'), 102 | ('dCH2', '[CD1H2]=*'), 103 | ('ssCH2', '[CD2H2](-*)-*'), 104 | ('tCH', '[CD1H]#*'), 105 | ('dsCH', '[CD2H](=*)-*'), 106 | ('aaCH', '[C,c;D2H](:*):*'), 107 | ('sssCH', '[CD3H](-*)(-*)-*'), 108 | ('ddC', '[CD2H0](=*)=*'), 109 | ('tsC', '[CD2H0](#*)-*'), 110 | #('dssC', '[CD3H0](=*)(-*)-*'), 111 | ('dssC', '[C,c;D3H0](=*)(~*)~*'), 112 | ('aasC', '[C,c;D3H0](:*)(:*)-*'), 113 | ('aaaC', '[C,c;D3H0](:*)(:*):*'), 114 | ('ssssC', '[CD4H0](-*)(-*)(-*)-*'), 115 | ('sNH3', '[ND1H3]-*'), 116 | ('sNH2', '[ND1H2]-*'), 117 | ('ssNH2', '[ND2H2](-*)-*'), 118 | ('dNH', '[ND1H]=*'), 119 | ('ssNH', '[ND2H](-*)-*'), 120 | ('aaNH', '[N,nD2H](:*):*'), 121 | ('tN', '[ND1H0]#*'), 122 | ('sssNH', '[ND3H](-*)(-*)-*'), 123 | ('dsN', '[ND2H0](=*)-*'), 124 | ('aaN', '[N,nD2H0](:*):*'), 125 | ('sssN', '[ND3H0](-*)(-*)-*'), 126 | ('ddsN', '[ND3H0](~[OD1H0])(~[OD1H0])-,:*'), # mod 127 | ('aasN', '[N,nD3H0](:*)(:*)-,:*'), # mod 128 | ('ssssN', '[ND4H0](-*)(-*)(-*)-*'), 129 | ('sOH', '[OD1H]-*'), 130 | #('dO', '[OD1H0]=*'), 131 | ('dO', '[$([OD1H0]=*),$([OD1H0-]-[*+])]'), 132 | ('ssO', '[OD2H0](-*)-*'), 133 | ('aaO', '[O,oD2H0](:*):*'), 134 | ('sF', '[FD1]-*'), 135 | ('sSiH3', '[SiD1H3]-*'), 136 | ('ssSiH2', '[SiD2H2](-*)-*'), 137 | ('sssSiH', '[SiD3H1](-*)(-*)-*'), 138 | ('ssssSi', '[SiD4H0](-*)(-*)(-*)-*'), 139 | ('sPH2', '[PD1H2]-*'), 140 | ('ssPH', '[PD2H1](-*)-*'), 141 | ('sssP', '[PD3H0](-*)(-*)-*'), 142 | ('dsssP', '[PD4H0](=*)(-*)(-*)-*'), 143 | ('sssssP', '[PD5H0](-*)(-*)(-*)(-*)-*'), 144 | ('sSH', '[SD1H1]-*'), 145 | ('dS', '[SD1H0]=*'), 146 | ('ssS', '[SD2H0](-*)-*'), 147 | ('aaS', '[S,sD2H0](:*):*'), 148 | ('dssS', '[SD3H0](=*)(-*)-*'), 149 | ('ddssS', '[SD4H0](~[OD1H0])(~[OD1H0])(-*)-*'), # mod 150 | ('sCl', '[ClD1]-*'), 151 | ('sGeH3', '[GeD1H3](-*)'), 152 | ('ssGeH2', '[GeD2H2](-*)-*'), 153 | ('sssGeH', '[GeD3H1](-*)(-*)-*'), 154 | ('ssssGe', '[GeD4H0](-*)(-*)(-*)-*'), 155 | ('sAsH2', '[AsD1H2]-*'), 156 | ('ssAsH', '[AsD2H1](-*)-*'), 157 | ('sssAs', '[AsD3H0](-*)(-*)-*'), 158 | ('sssdAs', '[AsD4H0](=*)(-*)(-*)-*'), 159 | ('sssssAs', '[AsD5H0](-*)(-*)(-*)(-*)-*'), 160 | ('sSeH', '[SeD1H1]-*'), 161 | ('dSe', '[SeD1H0]=*'), 162 | ('ssSe', '[SeD2H0](-*)-*'), 163 | ('aaSe', '[SeD2H0](:*):*'), 164 | ('dssSe', '[SeD3H0](=*)(-*)-*'), 165 | ('ddssSe', '[SeD4H0](=*)(=*)(-*)-*'), 166 | ('sBr', '[BrD1]-*'), 167 | ('sSnH3', '[SnD1H3]-*'), 168 | ('ssSnH2', '[SnD2H2](-*)-*'), 169 | ('sssSnH', '[SnD3H1](-*)(-*)-*'), 170 | ('ssssSn', '[SnD4H0](-*)(-*)(-*)-*'), 171 | ('sI', '[ID1]-*'), 172 | ('sPbH3', '[PbD1H3]-*'), 173 | ('ssPbH2', '[PbD2H2](-*)-*'), 174 | ('sssPbH', '[PbD3H1](-*)(-*)-*'), 175 | ('ssssPb', '[PbD4H0](-*)(-*)(-*)-*'), 176 | ] 177 | 178 | dict_aEstate ={'sLi':0,'ssBe':1,'ssssBe':2,'ssBH':3,'sssB':4,'ssssB':5,'sCH3':6,'dCH2':7,'ssCH2':8,'tCH':9,'dsCH':10,'aaCH':11,'sssCH':12,'ddC':13,'tsC':14,'dssC':15,'aasC':16,'aaaC':17,'ssssC':18,'sNH3':19,'sNH2':20,'ssNH2':21,'dNH':22,'ssNH':23,'aaNH':24,'tN':25,'sssNH':26,'dsN':27,'aaN':28,'sssN':29,'ddsN':30,'aasN':31,'ssssN':32,'sOH':33,'dO':34,'ssO':35,'aaO':36,'sF':37,'sSiH3':38,'ssSiH2':39,'sssSiH':40,'ssssSi':41,'sPH2':42,'ssPH':43,'sssP':44,'dsssP':45,'sssssP':46,'sSH':47,'dS':48,'ssS':49,'aaS':50,'dssS':51,'ddssS':52,'sCl':53,'sGeH3':54,'ssGeH2':55,'sssGeH':56,'ssssGe':57,'sAsH2':58,'ssAsH':59,'sssAs':60,'sssdAs':61,'sssssAs':62,'sSeH':63,'dSe':64,'ssSe':65,'aaSe':66,'dssSe':67,'ddssSe':68,'sBr':69,'sSnH3':70,'ssSnH2':71,'sssSnH':72,'ssssSn':73,'sI':74,'sPbH3':75,'ssPbH2':76,'sssPbH':77,'ssssPb':78} 179 | dict_atEstate ={'sLi':-1,'ssBe':-1,'ssssBe':-1,'ssBH':-1,'sssB':-1,'ssssB':-1,'sCH3':1,'dCH2':2,'ssCH2':1,'tCH':3,'dsCH':2,'aaCH':2,'sssCH':1,'ddC':4,'tsC':3,'dssC':2,'aasC':2,'aaaC':2,'ssssC':1,'sNH3':5,'sNH2':5,'ssNH2':5,'dNH':7,'ssNH':5,'aaNH':8,'tN':10,'sssNH':5,'dsN':7,'aaN':8,'sssN':5,'ddsN':9,'aasN':8,'ssssN':6,'sOH':11,'dO':12,'ssO':11,'aaO':13,'sF':14,'sSiH3':-1,'ssSiH2':-1,'sssSiH':-1,'ssssSi':-1,'sPH2':15,'ssPH':15,'sssP':15,'dsssP':15,'sssssP':15,'sSH':16,'dS':19,'ssS':16,'aaS':17,'dssS':18,'ddssS':18,'sCl':14,'sGeH3':-1,'ssGeH2':-1,'sssGeH':-1,'ssssGe':-1,'sAsH2':-1,'ssAsH':-1,'sssAs':-1,'sssdAs':-1,'sssssAs':-1,'sSeH':-1,'dSe':-1,'ssSe':-1,'aaSe':-1,'dssSe':-1,'ddssSe':-1,'sBr':20,'sSnH3':-1,'ssSnH2':-1,'sssSnH':-1,'ssssSn':-1,'sI':21,'sPbH3':-1,'ssPbH2':-1,'sssPbH':-1,'ssssPb':-1} 180 | 181 | def __init__(self): 182 | 183 | current_dir = path.dirname(path.abspath(__file__)) 184 | dfb = pd.read_csv(current_dir + '/bonds_dict.txt', delimiter='\t') 185 | for i, f in dfb.iterrows(): 186 | #print(f['Frequency']) 187 | if f['BondIs'] == 1: 188 | self.b_dict[f['ES_Index_Bond']]=f['BondIs'] 189 | 190 | dfa = pd.read_csv(current_dir + '/atoms_dict.txt', delimiter='\t') 191 | for i, f in dfa.iterrows(): 192 | #print(f['Frequency']) 193 | if f['AtomIs'] == 1: 194 | self.a_dict[f['ES_Index_AtomBond']]=f['AtomIs'] 195 | 196 | rawV = self._rawD 197 | 198 | esPatterns = [None] * len(rawV) 199 | for i, (name, sma) in enumerate(rawV): 200 | patt = Chem.MolFromSmarts(sma) 201 | if patt is None: 202 | sys.stderr.write('WARNING: problems with pattern %s (name: %s), skipped.\n' % (sma, name)) 203 | else: 204 | esPatterns[i] = name, patt 205 | self.esPatterns = esPatterns 206 | 207 | 208 | def Evaluate(self, mol): 209 | self.Det_UnknownAtoms(mol) 210 | self.Det_InvalidBonds(mol) 211 | self.Det_InvalidAtoms(mol) 212 | self.Det_FailMol(mol) 213 | return mol 214 | 215 | def TypeAtoms(self, mol): 216 | """ assigns each atom in a molecule to an EState type 217 | **Returns:** 218 | list of tuples (atoms can possibly match multiple patterns) with atom types 219 | """ 220 | nAtoms = mol.GetNumAtoms() 221 | res = [None] * nAtoms 222 | for name, patt in self.esPatterns: 223 | matches = mol.GetSubstructMatches(patt, uniquify=0) 224 | for match in matches: 225 | idx = match[0] 226 | if res[idx] is None: 227 | res[idx] = [name] 228 | elif name not in res[idx]: 229 | res[idx].append(name) 230 | for i, v in enumerate(res): 231 | if v is not None: 232 | res[i] = tuple(v) 233 | else: 234 | res[i] = () 235 | return res 236 | 237 | def aEstateMol(self, mol): 238 | aE_atoms = self.TypeAtoms(mol) 239 | aE_key=[] 240 | for aE_atom in aE_atoms: 241 | if aE_atom != (): 242 | a=list(aE_atom) 243 | if a[0] in self.dict_aEstate: 244 | aE_key.append(self.dict_aEstate[a[0]]) 245 | else: 246 | aE_key.append(-1) 247 | else: 248 | aE_key.append(-1) 249 | return aE_key 250 | 251 | def atEstateMol(self, mol): 252 | aE_atoms = self.TypeAtoms(mol) 253 | atE_key=[] 254 | for aE_atom in aE_atoms: 255 | if aE_atom != (): 256 | c=list(aE_atom) 257 | if c[0] in self.dict_atEstate: 258 | atE_key.append(self.dict_atEstate[c[0]]) 259 | else: 260 | atE_key.append(-2) 261 | else: 262 | atE_key.append(-2) 263 | return atE_key 264 | 265 | def Det_UnknownAtoms(self, mol): 266 | ctrue=0 267 | cfalse=0 268 | a_list=[] 269 | aE_list=self.aEstateMol(mol) 270 | a_string='' 271 | #print(aE_list) 272 | for atom in mol.GetAtoms(): 273 | idx1=atom.GetIdx() 274 | key1=aE_list[idx1] 275 | if key1 == -1: 276 | cfalse += 1 277 | a_list.append(idx1) 278 | else: 279 | ctrue += 1 280 | #print(cfalse) 281 | #print(ctrue) 282 | #print(a_list) 283 | if len(a_list)>0: 284 | aa=map(str,a_list) 285 | #aa=list(map(str,a_list)) 286 | a_string=';'.join(aa) 287 | #print(a_string) 288 | mol.SetProp('UnknownAtoms', a_string) 289 | if cfalse > 0: 290 | mol.SetProp('UnknownAtomIs', '1') 291 | else: 292 | mol.SetProp('UnknownAtomIs', '0') 293 | 294 | def Det_InvalidBonds(self, mol): 295 | aE_list=self.aEstateMol(mol) 296 | bonds=mol.GetBonds() 297 | a_string='' 298 | invalid_atoms=[] 299 | #print(len(invalid_atoms)) 300 | ctrue=0 301 | cfalse=0 302 | for bond in bonds: 303 | query_bE=None 304 | idx1=bond.GetBeginAtomIdx() 305 | idx2=bond.GetEndAtomIdx() 306 | key1=aE_list[idx1] 307 | key2=aE_list[idx2] 308 | query_bE=str(key1)+'_'+str(key2) 309 | if not query_bE in self.b_dict: 310 | cfalse += 1 311 | invalid_atoms.append(idx1) 312 | invalid_atoms.append(idx2) 313 | else: 314 | ctrue += 1 315 | 316 | if len(invalid_atoms)>0: 317 | a_list=list(set(invalid_atoms)) 318 | a_list.sort() 319 | #aa=list(map(str,a_list)) 320 | aa=map(str,a_list) 321 | #print(aa) 322 | a_string=';'.join(aa) 323 | #print(a_string) 324 | 325 | mol.SetProp('InvalidBonds', a_string) 326 | if cfalse > 0: 327 | mol.SetProp('InvalidBondIs', '1') 328 | else: 329 | mol.SetProp('InvalidBondIs', '0') 330 | 331 | def Det_InvalidAtoms(self, mol): 332 | aE_list=self.aEstateMol(mol) 333 | atE_list=self.atEstateMol(mol) 334 | a_string='' 335 | invalid_atoms=[] 336 | #print(len(invalid_atoms)) 337 | ctrue=0 338 | cfalse=0 339 | for atom in mol.GetAtoms(): 340 | query_aE=None 341 | idx1=atom.GetIdx() 342 | key1=aE_list[idx1] 343 | b=[] 344 | for nbr in atom.GetNeighbors(): 345 | idx2=nbr.GetIdx() 346 | key2=atE_list[idx2] 347 | b.append(key2) 348 | b.sort() 349 | b=list(map(str,b)) 350 | #print(b) 351 | b='_'.join(b) 352 | query_aE=str(key1)+':'+str(b) 353 | #print(query_aE) 354 | if not query_aE in self.a_dict: 355 | cfalse += 1 356 | invalid_atoms.append(idx1) 357 | else: 358 | ctrue += 1 359 | 360 | if len(invalid_atoms)>0: 361 | a_list=list(set(invalid_atoms)) 362 | a_list.sort() 363 | aa=map(str,a_list) 364 | #aa=list(map(str,a_list)) 365 | #print(aa) 366 | a_string=';'.join(aa) 367 | #print(a_string) 368 | #print(cfalse) 369 | #print(ctrue) 370 | 371 | mol.SetProp('InvalidAtoms', a_string) 372 | if cfalse > 0: 373 | mol.SetProp('InvalidAtomIs', '1') 374 | else: 375 | mol.SetProp('InvalidAtomIs', '0') 376 | 377 | def Det_FailMol(self, mol): 378 | c=0 379 | atoms=[] 380 | atoms_string='' 381 | if mol.HasProp('UnknownAtomIs'): 382 | if int(mol.GetProp('UnknownAtomIs')) != 0 : 383 | c += 1 384 | if mol.HasProp('UnknownAtomIs'): 385 | if mol.GetProp('UnknownAtoms') is not '': 386 | a1=mol.GetProp('UnknownAtoms') 387 | for idx in a1.split(';'): 388 | atoms.append(idx) 389 | if mol.HasProp('InvalidBondIs'): 390 | if int(mol.GetProp('InvalidBondIs')) != 0 : 391 | c += 1 392 | if mol.HasProp('InvalidBonds'): 393 | if mol.GetProp('InvalidBonds') is not '': 394 | a2=mol.GetProp('InvalidBonds') 395 | for idx in a2.split(';'): 396 | atoms.append(idx) 397 | if mol.HasProp('InvalidAtomIs'): 398 | if int(mol.GetProp('InvalidAtomIs')) != 0 : 399 | c += 1 400 | if mol.HasProp('InvalidAtoms'): 401 | if mol.GetProp('InvalidAtoms') is not '': 402 | a3=mol.GetProp('InvalidAtoms') 403 | for idx in a3.split(';'): 404 | atoms.append(idx) 405 | if c == 0: 406 | mol.SetProp('ErrorAtoms', '') 407 | mol.SetProp('ErrorIs', '0') 408 | else: 409 | atoms=set(atoms) 410 | atoms=list(map(int,atoms)) 411 | atoms.sort() 412 | atoms_string=';'.join(map(str,atoms)) 413 | mol.SetProp('ErrorAtoms',atoms_string) 414 | mol.SetProp('ErrorIs', '1') 415 | 416 | -------------------------------------------------------------------------------- /utils/atoms_dict.txt: -------------------------------------------------------------------------------- 1 | ES_Index_AtomBond Frequency AtomIs 2 | 11:2_2 5097921 1 3 | 8:1_1 1124920 1 4 | 34:2 899054 1 5 | 8:1_5 809993 1 6 | 16:1_2_2 728211 1 7 | 16:2_2_2 613313 1 8 | 6:1 529698 1 9 | 28:2_2 498868 1 10 | 16:2_2_11 447506 1 11 | 35:1_2 434807 1 12 | 11:2_8 356517 1 13 | 8:1_2 347603 1 14 | 16:2_2_5 323432 1 15 | 16:2_2_14 309177 1 16 | 17:2_2_2 278765 1 17 | 17:2_2_8 274650 1 18 | 23:1_2 267650 1 19 | 15:1_5_12 249133 1 20 | 6:2 239073 1 21 | 6:11 217927 1 22 | 8:1_11 216391 1 23 | 15:2_5_12 204073 1 24 | 23:2_2 201671 1 25 | 16:2_2_8 198470 1 26 | 53:2 197493 1 27 | 29:1_1_2 195713 1 28 | 37:1 194615 1 29 | 33:2 194219 1 30 | 34:18 192282 1 31 | 29:1_1_1 167556 1 32 | 8:2_5 160233 1 33 | 12:1_1_1 153406 1 34 | 28:2_8 148502 1 35 | 33:1 141796 1 36 | 12:1_2_5 140738 1 37 | 12:1_1_5 136082 1 38 | 12:1_1_11 131981 1 39 | 6:5 129544 1 40 | 16:1_2_8 128580 1 41 | 37:2 127905 1 42 | 31:1_2_2 124042 1 43 | 20:2 105588 1 44 | 16:2_5_8 100690 1 45 | 12:1_1_2 98589 1 46 | 15:1_11_12 97492 1 47 | 35:1_1 96942 1 48 | 24:2_2 93345 1 49 | 10:2_2 90703 1 50 | 50:2_2 80001 1 51 | 34:9 75970 1 52 | 15:2_11_12 71368 1 53 | 16:2_2_18 69829 1 54 | 11:8_8 69336 1 55 | 8:2_11 62435 1 56 | 23:1_1 58412 1 57 | 15:2_8_12 57524 1 58 | 15:2_2_12 57014 1 59 | 17:2_8_8 55966 1 60 | 29:1_2_2 55840 1 61 | 36:2_2 53383 1 62 | 8:1_8 51419 1 63 | 10:1_2 50450 1 64 | 52:2_5_12_12 49926 1 65 | 16:5_8_8 49381 1 66 | 16:2_8_8 46001 1 67 | 18:2_14_14_14 45418 1 68 | 25:3 45146 1 69 | 15:5_5_12 44186 1 70 | 31:2_2_2 41671 1 71 | 18:1_1_1_2 39611 1 72 | 8:2_8 39518 1 73 | 8:2_2 39027 1 74 | 20:1 38410 1 75 | 31:1_2_8 37497 1 76 | 31:2_2_8 37186 1 77 | 69:2 36452 1 78 | 49:1_2 36186 1 79 | 30:2_12_12 36088 1 80 | 14:2_10 35849 1 81 | 6:8 35799 1 82 | 16:1_8_8 35517 1 83 | 35:2_2 35388 1 84 | 11:2_17 35310 1 85 | 16:2_2_3 33888 1 86 | 15:1_2_2 33861 1 87 | 16:2_2_20 32817 1 88 | 12:1_2_11 32499 1 89 | 18:1_1_1_1 32333 1 90 | 15:1_2_12 31202 1 91 | 16:2_2_9 30725 1 92 | 15:5_11_12 30418 1 93 | 16:2_8_11 29483 1 94 | 17:2_2_13 26782 1 95 | 28:8_8 26096 1 96 | 27:2_5 24451 1 97 | 16:2_2_17 24339 1 98 | 16:2_2_13 23794 1 99 | 8:1_16 23257 1 100 | 16:2_2_16 22578 1 101 | 18:1_1_1_11 22477 1 102 | 23:2_18 22176 1 103 | 16:5_8_17 20828 1 104 | 17:2_2_17 20797 1 105 | 27:1_2 20707 1 106 | 15:8_8_12 20159 1 107 | 24:2_8 19260 1 108 | 22:2 18754 1 109 | 28:2_13 18616 1 110 | 15:1_1_2 18558 1 111 | 8:2_16 18331 1 112 | 48:2 17909 1 113 | 15:5_5_7 17782 1 114 | 12:1_2_2 17504 1 115 | 15:1_2_7 17436 1 116 | 29:1_1_18 17193 1 117 | 27:2_2 16579 1 118 | 36:2_8 16527 1 119 | 16:1_2_17 15800 1 120 | 6:18 15748 1 121 | 10:2_7 15461 1 122 | 23:2_7 15411 1 123 | 15:2_2_2 15349 1 124 | 11:2_13 15318 1 125 | 18:1_1_2_11 14566 1 126 | 12:1_1_8 14393 1 127 | 15:2_5_7 14380 1 128 | 49:2_2 14176 1 129 | 16:1_2_13 13988 1 130 | 52:1_5_12_12 13832 1 131 | 49:1_1 13536 1 132 | 23:1_18 13352 1 133 | 7:2 13232 1 134 | 16:2_8_13 13212 1 135 | 33:5 13169 1 136 | 12:2_2_5 13055 1 137 | 16:8_8_16 12932 1 138 | 27:2_11 12765 1 139 | 18:1_1_1_5 12250 1 140 | 23:2_11 11915 1 141 | 52:1_2_12_12 11179 1 142 | 12:1_8_11 11171 1 143 | 16:2_2_7 11138 1 144 | 15:2_2_7 11052 1 145 | 18:1_1_2_5 11002 1 146 | 8:11_11 10811 1 147 | 15:2_2_5 10746 1 148 | 14:2_3 10693 1 149 | 23:2_5 10586 1 150 | 14:1_3 10307 1 151 | 8:1_7 10161 1 152 | 16:2_8_17 10090 1 153 | 16:2_8_14 9902 1 154 | 15:1_1_12 9587 1 155 | 20:18 9419 1 156 | 8:1_18 9295 1 157 | 15:5_5_19 9168 1 158 | 18:1_1_2_2 8656 1 159 | 15:1_2_5 8302 1 160 | 29:2_2_2 8197 1 161 | 35:1_7 7990 1 162 | 6:16 7984 1 163 | 15:2_12_13 7739 1 164 | 14:1_10 7521 1 165 | 18:1_14_14_14 7513 1 166 | 16:1_8_13 7262 1 167 | 12:2_2_2 6876 1 168 | 15:2_2_11 6729 1 169 | 12:1_11_11 6671 1 170 | 74:2 6578 1 171 | 15:5_7_16 6531 1 172 | 8:1_6 6487 1 173 | 29:1_2_7 6325 1 174 | 15:2_2_16 6246 1 175 | 6:6 6155 1 176 | 17:2_8_17 6133 1 177 | 29:1_2_18 6090 1 178 | 16:1_8_17 5947 1 179 | 16:2_2_21 5945 1 180 | 16:8_8_11 5930 1 181 | 27:2_7 5841 1 182 | 16:2_8_16 5678 1 183 | 18:11_14_14_14 5311 1 184 | 47:2 4654 1 185 | 52:2_2_12_12 4549 1 186 | 15:1_5_7 4543 1 187 | 34:8 4435 1 188 | 18:1_2_2_11 4389 1 189 | 12:2_2_11 4371 1 190 | 16:2_5_17 4311 1 191 | 50:2_8 4225 1 192 | 8:2_18 4140 1 193 | 15:1_1_7 4055 1 194 | 28:2_17 3957 1 195 | 10:2_5 3769 1 196 | 12:1_2_8 3753 1 197 | 33:7 3724 1 198 | 12:1_2_16 3693 1 199 | 16:2_3_8 3606 1 200 | 18:1_1_11_11 3471 1 201 | 9:3 3464 1 202 | 15:5_16_19 3396 1 203 | 32:1_1_1_1 3396 1 204 | 15:1_2_11 3232 1 205 | 8:3_5 3193 1 206 | 31:2_2_12 3186 1 207 | 24:8_8 3163 1 208 | 12:1_1_16 3116 1 209 | 52:1_1_12_12 3012 1 210 | 8:1_3 3007 1 211 | 16:5_8_13 3004 1 212 | 17:8_8_8 2882 1 213 | 15:2_2_3 2878 1 214 | 35:1_5 2793 1 215 | 29:1_2_5 2791 1 216 | 15:5_12_16 2730 1 217 | 47:1 2716 1 218 | 29:1_2_11 2663 1 219 | 16:2_8_18 2639 1 220 | 12:1_5_11 2567 1 221 | 12:1_1_7 2552 1 222 | 35:1_11 2547 1 223 | 8:5_5 2512 1 224 | 16:2_17_18 2496 1 225 | 8:1_14 2459 1 226 | 18:1_2_2_5 2411 1 227 | 52:5_5_12_12 2391 1 228 | 27:2_18 2341 1 229 | 12:1_2_7 2308 1 230 | 11:8_17 2305 1 231 | 17:8_8_17 2293 1 232 | 35:2_18 2215 1 233 | 12:1_1_18 2207 1 234 | 16:2_8_9 2205 1 235 | 16:2_14_17 2204 1 236 | 12:1_3_5 2175 1 237 | 12:2_5_16 2146 1 238 | 52:2_11_12_12 2138 1 239 | 31:2_2_18 2101 1 240 | 16:8_16_17 2060 1 241 | 16:2_5_13 2050 1 242 | 31:2_2_5 2021 1 243 | 12:1_1_14 2005 1 244 | 52:2_7_12_12 1999 1 245 | 52:2_8_12_12 1952 1 246 | 15:8_12_13 1939 1 247 | 33:18 1909 1 248 | 10:2_12 1900 1 249 | 8:2_7 1853 1 250 | 49:1_16 1853 1 251 | 15:8_8_19 1850 1 252 | 12:1_5_16 1848 1 253 | 15:2_7_12 1844 1 254 | 8:3_11 1799 1 255 | 36:8_8 1764 1 256 | 18:1_1_14_14 1754 1 257 | 15:1_2_3 1706 1 258 | 20:5 1703 1 259 | 29:2_2_5 1634 1 260 | 8:8_11 1608 1 261 | 10:5_12 1605 1 262 | 12:2_2_8 1605 1 263 | 16:8_13_16 1601 1 264 | 29:1_1_5 1591 1 265 | 51:1_2_12 1588 1 266 | 10:1_7 1530 1 267 | 52:5_11_12_12 1520 1 268 | 15:2_7_11 1500 1 269 | 16:8_8_14 1493 1 270 | 15:7_8_17 1445 1 271 | 16:2_8_20 1411 1 272 | 10:1_12 1372 1 273 | 15:5_8_12 1365 1 274 | 31:1_8_8 1354 1 275 | 17:2_8_13 1352 1 276 | 8:2_6 1341 1 277 | 31:2_2_11 1320 1 278 | 15:2_11_11 1312 1 279 | 16:3_8_8 1284 1 280 | 16:8_8_8 1280 1 281 | 15:2_5_5 1269 1 282 | 15:2_5_11 1264 1 283 | 18:1_2_2_2 1254 1 284 | 11:8_13 1241 1 285 | 18:1_1_5_11 1216 1 286 | 27:2_8 1214 1 287 | 15:1_7_12 1205 1 288 | 15:2_2_14 1177 1 289 | 35:1_18 1175 1 290 | 31:2_8_8 1161 1 291 | 31:2_2_7 1159 1 292 | 35:2_7 1142 1 293 | 52:1_11_12_12 1131 1 294 | 10:5_7 1124 1 295 | 16:2_9_13 1108 1 296 | 10:2_18 1099 1 297 | 8:5_11 1092 1 298 | 29:2_2_7 1084 1 299 | 12:1_5_5 1076 1 300 | 49:2_16 1057 1 301 | 16:8_8_18 1048 1 302 | 12:2_5_5 1042 1 303 | 18:1_1_1_16 1034 1 304 | 8:5_16 1013 1 305 | 29:1_1_7 1008 1 306 | 15:2_2_8 1006 1 307 | 12:2_11_11 990 1 308 | 33:8 986 1 309 | 12:2_5_7 972 1 310 | 12:2_5_11 954 1 311 | 15:1_8_12 949 1 312 | 30:11_12_12 938 1 313 | 35:1_9 932 1 314 | 23:2_8 931 1 315 | 12:11_14_14 922 1 316 | 18:1_2_11_11 914 1 317 | 16:2_16_17 893 1 318 | 15:5_7_7 885 1 319 | 14:5_10 870 1 320 | 18:2_2_2_5 866 1 321 | 18:1_1_1_8 865 1 322 | 18:2_2_2_11 863 1 323 | 15:1_2_16 855 1 324 | 18:1_1_2_18 847 1 325 | 23:1_5 832 1 326 | 15:2_7_16 806 1 327 | 15:2_5_19 804 1 328 | 18:14_14_14_18 798 1 329 | 23:1_7 790 1 330 | 16:2_3_17 788 1 331 | 15:2_7_8 784 1 332 | 10:2_11 783 1 333 | 18:1_1_2_3 781 1 334 | 15:1_12_16 778 1 335 | 6:3 771 1 336 | 50:8_8 771 1 337 | 18:1_2_5_11 762 1 338 | 15:2_5_16 760 1 339 | 18:1_2_14_14 756 1 340 | 31:2_12_13 748 1 341 | 12:1_1_6 746 1 342 | 8:5_8 743 1 343 | 16:2_7_8 734 1 344 | 12:2_2_16 728 1 345 | 29:2_2_11 727 1 346 | 6:7 720 1 347 | 8:3_8 719 1 348 | 28:8_17 715 1 349 | 15:5_7_11 714 1 350 | 51:1_1_12 712 1 351 | 15:8_12_17 705 1 352 | 14:7_10 699 1 353 | 10:2_3 695 1 354 | 23:2_3 694 1 355 | 16:8_17_18 692 1 356 | 15:2_2_20 686 1 357 | 27:5_7 665 1 358 | 8:2_3 663 1 359 | 27:2_3 661 1 360 | 12:1_2_18 649 1 361 | 12:1_1_3 638 1 362 | 18:1_1_3_11 630 1 363 | 12:1_14_14 626 1 364 | 16:8_8_17 625 1 365 | 18:1_1_2_7 603 1 366 | 18:1_1_5_7 602 1 367 | 29:1_1_11 586 1 368 | 16:2_9_17 583 1 369 | 15:1_7_16 580 1 370 | 16:2_11_13 579 1 371 | 20:8 568 1 372 | 34:7 567 1 373 | 10:2_8 545 1 374 | 10:2_16 542 1 375 | 15:5_11_19 541 1 376 | 16:2_17_20 540 1 377 | 18:1_1_1_7 533 1 378 | 16:8_11_17 532 1 379 | 16:7_8_17 529 1 380 | 12:1_11_16 523 1 381 | 15:1_5_19 520 1 382 | 15:8_11_12 506 1 383 | 15:1_2_9 501 1 384 | 31:2_8_12 500 1 385 | 15:2_2_17 493 1 386 | 12:2_14_14 487 1 387 | 13:7_19 485 1 388 | 48:4 485 1 389 | 15:1_2_14 480 1 390 | 15:11_11_12 480 1 391 | 31:2_2_17 480 1 392 | 27:1_7 475 1 393 | 18:1_1_2_16 472 1 394 | 18:2_2_5_11 461 1 395 | 23:5_18 447 1 396 | 29:2_2_18 447 1 397 | 15:2_12_16 439 1 398 | 31:1_2_17 436 1 399 | 16:8_8_9 433 1 400 | 18:1_1_1_18 431 1 401 | 12:2_2_7 422 1 402 | 15:1_7_11 422 1 403 | 15:7_8_8 422 1 404 | 18:1_2_2_7 420 1 405 | 30:1_12_12 417 1 406 | 15:5_7_12 413 1 407 | 16:7_8_8 413 1 408 | 12:1_2_3 408 1 409 | 18:2_2_2_2 407 1 410 | 12:1_8_16 406 1 411 | 18:1_1_1_14 401 1 412 | 18:1_1_3_5 398 1 413 | 35:1_8 395 1 414 | 12:1_5_18 392 1 415 | 15:2_8_19 385 1 416 | 35:2_5 385 1 417 | 51:2_2_12 376 1 418 | 18:2_2_11_11 372 1 419 | 10:2_9 369 1 420 | 16:8_11_13 369 1 421 | 15:2_3_3 361 1 422 | 18:1_1_1_3 357 1 423 | 8:2_14 351 1 424 | 15:1_2_8 351 1 425 | 8:3_16 342 1 426 | 12:2_8_11 342 1 427 | 52:11_11_12_12 326 1 428 | 31:1_2_13 320 1 429 | 15:8_13_19 312 1 430 | 27:5_12 312 1 431 | 49:2_5 306 1 432 | 18:2_2_2_7 303 1 433 | 15:1_2_18 294 1 434 | 30:7_12_12 290 1 435 | 50:2_17 289 1 436 | 15:2_2_18 288 1 437 | 27:2_9 288 1 438 | 15:8_17_19 287 1 439 | 52:1_8_12_12 283 1 440 | 12:2_3_5 279 1 441 | 15:2_7_13 278 1 442 | 18:1_1_5_5 277 1 443 | 29:1_7_18 277 1 444 | 27:2_4 271 1 445 | 16:2_2_6 270 1 446 | 18:1_2_5_16 270 1 447 | 32:1_1_1_2 269 1 448 | 23:1_11 267 1 449 | 18:2_2_5_5 265 1 450 | 34:6 260 1 451 | 27:1_4 254 1 452 | 23:7_18 248 1 453 | 30:5_12_12 247 1 454 | 32:1_1_1_12 244 1 455 | 18:1_2_3_11 243 1 456 | 18:1_1_2_14 242 1 457 | 12:1_2_6 239 1 458 | 16:2_13_20 231 1 459 | 18:1_11_14_14 226 1 460 | 16:8_8_20 224 1 461 | 27:2_12 221 1 462 | 23:2_9 220 1 463 | 18:1_2_2_3 218 1 464 | 15:2_8_17 213 1 465 | 12:2_2_3 211 1 466 | 31:2_8_18 206 1 467 | 18:14_14_14_16 205 1 468 | 18:2_2_2_16 202 1 469 | 16:2_3_13 195 1 470 | 15:3_5_12 194 1 471 | 12:1_5_7 192 1 472 | 14:10_16 192 1 473 | 29:1_2_16 192 1 474 | 18:2_2_14_14 191 1 475 | 14:3_3 189 1 476 | 16:8_14_17 189 1 477 | 18:2_2_5_16 186 1 478 | 15:2_3_7 185 1 479 | 23:1_8 185 1 480 | 31:2_2_13 179 1 481 | 15:5_8_19 175 1 482 | 20:7 172 1 483 | 15:7_11_16 171 1 484 | 16:2_8_21 171 1 485 | 18:1_1_5_16 171 1 486 | 16:2_11_17 168 1 487 | 18:1_1_16_16 168 1 488 | 12:1_3_11 166 1 489 | 16:2_7_17 166 1 490 | 52:5_7_12_12 165 1 491 | 29:1_1_8 164 1 492 | 16:2_13_18 162 1 493 | 24:2_17 161 1 494 | 12:1_1_9 159 1 495 | 15:2_14_14 158 1 496 | 29:1_2_8 155 1 497 | 12:2_8_16 152 1 498 | 8:1_9 148 1 499 | 18:1_2_2_16 146 1 500 | 35:5_18 145 1 501 | 35:7_18 143 1 502 | 31:2_8_11 140 1 503 | 12:2_2_18 139 1 504 | 12:2_5_18 139 1 505 | 28:8_13 133 1 506 | 10:2_20 132 1 507 | 12:2_7_11 132 1 508 | 15:2_7_14 132 1 509 | 18:1_1_2_8 132 1 510 | 10:2_21 131 1 511 | 18:1_1_8_11 130 1 512 | 12:8_14_14 129 1 513 | 18:1_2_3_5 128 1 514 | 18:2_2_11_16 128 1 515 | 18:11_11_14_14 128 1 516 | 12:1_11_18 127 1 517 | 20:11 125 1 518 | 15:1_7_20 124 1 519 | 12:1_2_14 122 1 520 | 15:7_11_12 120 1 521 | 29:1_5_18 120 1 522 | 16:2_13_14 119 1 523 | 16:2_13_16 119 1 524 | 16:3_8_17 118 1 525 | 12:5_5_16 117 1 526 | 15:7_16_16 117 1 527 | 49:1_5 117 1 528 | 15:2_12_17 116 1 529 | 16:8_13_18 114 1 530 | 49:1_3 114 1 531 | 10:2_14 113 1 532 | 15:1_2_20 111 1 533 | 29:1_11_18 109 1 534 | 16:8_8_21 108 1 535 | 12:2_3_11 105 1 536 | 15:2_16_16 104 1 537 | 31:2_5_8 104 1 538 | 51:2_5_12 103 1 539 | 15:5_7_19 102 1 540 | 15:2_7_7 99 1 541 | 8:8_16 95 1 542 | 27:7_7 95 1 543 | 10:7_7 93 1 544 | 29:1_1_3 93 1 545 | 18:1_14_14_18 92 1 546 | 17:2_17_17 91 1 547 | 15:7_7_16 90 1 548 | 51:5_11_12 90 1 549 | 53:5 89 1 550 | 49:16_16 88 1 551 | 8:3_6 87 1 552 | 15:2_17_19 87 1 553 | 18:1_2_2_8 86 1 554 | 49:2_3 86 1 555 | 52:1_7_12_12 86 1 556 | 12:1_5_8 82 1 557 | 23:2_16 81 1 558 | 29:2_18_18 81 1 559 | 18:1_2_5_5 80 1 560 | 8:5_18 79 1 561 | 29:1_1_16 79 1 562 | 18:1_1_7_11 76 1 563 | 18:1_2_7_7 76 1 564 | 23:8_18 75 1 565 | 23:18_18 75 1 566 | 18:1_1_3_8 72 1 567 | 8:5_7 71 1 568 | 8:8_18 71 1 569 | 18:1_2_11_16 71 1 570 | 52:5_8_12_12 71 1 571 | 15:2_2_9 70 1 572 | 13:2_2 69 1 573 | 15:2_20_20 67 1 574 | 29:2_5_16 67 1 575 | 14:3_21 66 1 576 | 29:2_2_16 66 1 577 | 49:5_5 66 1 578 | 74:3 66 1 579 | 8:11_16 65 1 580 | 18:2_3_16_16 64 1 581 | 35:2_8 64 1 582 | 15:1_7_7 63 1 583 | 12:5_11_11 62 1 584 | 23:11_18 62 1 585 | 12:2_16_16 61 1 586 | 29:1_3_5 61 1 587 | 29:2_5_18 61 1 588 | 12:1_16_16 60 1 589 | 16:8_8_13 60 1 590 | 12:5_5_5 59 1 591 | 15:12_13_13 57 1 592 | 15:16_16_19 56 1 593 | 24:2_13 55 1 594 | 12:1_7_11 54 1 595 | 15:12_13_17 54 1 596 | 15:2_3_8 53 1 597 | 23:5_5 52 1 598 | 15:1_2_21 51 1 599 | 15:2_3_18 51 1 600 | 18:2_2_2_8 51 1 601 | 29:2_2_8 51 1 602 | 51:7_7_12 51 1 603 | 10:1_4 50 1 604 | 15:11_12_16 50 1 605 | 27:18_18 50 1 606 | 12:2_5_8 49 0 607 | 18:1_2_16_18 49 0 608 | 18:2_2_3_11 48 0 609 | 15:3_11_12 47 0 610 | 18:1_2_16_16 47 0 611 | 49:5_11 47 0 612 | 15:7_8_16 46 0 613 | 18:1_2_2_18 46 0 614 | 51:2_2_7 46 0 615 | 8:11_14 45 0 616 | 12:2_7_7 45 0 617 | 29:1_16_18 45 0 618 | 10:11_12 44 0 619 | 7:4 43 0 620 | 15:2_21_21 43 0 621 | 16:7_8_13 43 0 622 | 31:2_7_17 43 0 623 | 10:7_11 42 0 624 | 15:5_7_8 42 0 625 | 15:2_7_9 41 0 626 | 15:5_12_18 41 0 627 | 8:16_18 40 0 628 | 15:1_7_14 40 0 629 | 16:2_17_21 40 0 630 | 29:1_2_3 40 0 631 | 14:3_5 39 0 632 | 18:1_2_8_11 39 0 633 | 15:2_2_19 38 0 634 | 15:2_7_17 38 0 635 | 17:8_8_13 38 0 636 | 18:1_2_11_14 38 0 637 | 12:2_16_18 37 0 638 | 15:1_7_8 37 0 639 | 15:2_9_16 37 0 640 | 18:1_5_5_11 37 0 641 | 23:5_7 37 0 642 | 13:7_12 36 0 643 | 23:1_16 36 0 644 | 34:4 36 0 645 | 12:2_2_14 35 0 646 | 12:2_11_16 35 0 647 | 15:1_7_18 35 0 648 | 8:16_16 34 0 649 | 14:3_8 34 0 650 | 18:1_1_7_16 34 0 651 | 18:1_14_14_16 34 0 652 | 49:2_18 34 0 653 | 12:5_5_11 33 0 654 | 29:1_2_14 33 0 655 | 31:2_2_3 33 0 656 | 31:2_2_16 32 0 657 | 8:11_18 31 0 658 | 10:8_12 30 0 659 | 12:1_11_14 30 0 660 | 18:1_2_2_14 30 0 661 | 35:2_11 30 0 662 | 49:1_7 30 0 663 | 8:8_8 29 0 664 | 12:1_3_8 29 0 665 | 12:1_7_16 29 0 666 | 12:1_8_18 29 0 667 | 15:8_16_19 29 0 668 | 31:2_7_8 29 0 669 | 49:5_16 29 0 670 | 18:1_2_3_16 28 0 671 | 18:2_2_16_16 28 0 672 | 8:6_11 27 0 673 | 10:5_19 27 0 674 | 15:7_16_18 27 0 675 | 18:1_1_3_3 27 0 676 | 18:2_2_7_7 27 0 677 | 50:8_17 27 0 678 | 12:1_2_9 26 0 679 | 15:2_3_12 26 0 680 | 16:8_17_20 26 0 681 | 35:2_16 26 0 682 | 49:2_8 26 0 683 | 51:1_12_16 26 0 684 | 12:14_14_16 25 0 685 | 12:14_14_18 25 0 686 | 15:11_16_19 25 0 687 | 49:1_18 25 0 688 | 12:2_2_6 24 0 689 | 35:1_16 24 0 690 | 12:1_5_6 23 0 691 | 15:2_2_13 23 0 692 | 15:2_11_16 23 0 693 | 16:2_13_21 23 0 694 | 18:1_1_1_9 23 0 695 | 18:1_1_7_7 23 0 696 | 33:11 23 0 697 | 35:2_3 23 0 698 | 12:2_7_16 22 0 699 | 14:3_11 22 0 700 | 15:2_5_14 22 0 701 | 18:1_2_14_18 22 0 702 | 27:12_16 22 0 703 | 29:2_5_5 22 0 704 | 29:2_7_16 22 0 705 | 8:3_18 21 0 706 | 12:1_7_7 21 0 707 | 12:5_5_7 21 0 708 | 15:7_8_13 21 0 709 | 8:3_3 20 0 710 | 8:18_18 20 0 711 | 12:1_6_11 20 0 712 | 15:1_16_19 19 0 713 | 15:2_2_21 19 0 714 | 16:5_17_17 19 0 715 | 18:1_1_11_16 19 0 716 | 18:2_2_2_14 19 0 717 | 29:1_5_5 19 0 718 | 10:2_4 18 0 719 | 18:2_14_14_16 18 0 720 | 20:16 18 0 721 | 29:2_11_18 18 0 722 | 18:2_14_14_18 17 0 723 | 52:7_11_12_12 17 0 724 | 8:5_6 16 0 725 | 12:1_9_11 16 0 726 | 12:5_7_11 16 0 727 | 18:1_1_1_6 16 0 728 | 27:2_16 16 0 729 | 35:8_18 16 0 730 | 10:7_8 15 0 731 | 10:7_16 15 0 732 | 12:5_14_14 15 0 733 | 15:7_7_19 15 0 734 | 29:1_18_18 15 0 735 | 48:7 15 0 736 | 52:2_12_12_16 15 0 737 | 52:7_7_12_12 15 0 738 | 12:11_16_16 14 0 739 | 15:2_14_18 14 0 740 | 15:2_16_19 14 0 741 | 15:7_7_12 14 0 742 | 17:2_13_17 14 0 743 | 18:1_1_11_14 14 0 744 | 18:1_2_7_11 14 0 745 | 18:1_5_11_11 14 0 746 | 29:1_14_14 14 0 747 | 29:2_5_7 14 0 748 | 51:1_5_12 14 0 749 | 7:7 13 0 750 | 15:1_3_7 13 0 751 | 15:2_5_18 13 0 752 | 15:7_13_17 13 0 753 | 18:1_2_11_18 13 0 754 | 18:1_11_11_11 13 0 755 | 51:11_11_12 13 0 756 | 8:3_14 12 0 757 | 10:2_6 12 0 758 | 12:1_3_3 12 0 759 | 15:2_16_18 12 0 760 | 15:11_11_19 12 0 761 | 18:2_11_14_14 12 0 762 | 23:1_3 12 0 763 | 27:1_19 12 0 764 | 29:1_1_14 12 0 765 | 29:2_7_18 12 0 766 | 8:7_7 11 0 767 | 10:4_8 11 0 768 | 15:3_3_7 11 0 769 | 15:8_11_19 11 0 770 | 29:2_2_14 11 0 771 | 32:1_1_2_2 11 0 772 | 52:11_12_12_16 11 0 773 | 15:1_2_4 10 0 774 | 15:3_12_16 10 0 775 | 18:1_1_6_11 10 0 776 | 18:1_1_8_14 10 0 777 | 32:1_1_2_12 10 0 778 | 49:1_8 10 0 779 | 8:6_8 9 0 780 | 12:2_3_7 9 0 781 | 12:2_11_18 9 0 782 | 12:5_16_16 9 0 783 | 15:2_3_5 9 0 784 | 15:2_7_18 9 0 785 | 16:3_8_13 9 0 786 | 18:1_1_2_9 9 0 787 | 18:1_1_14_18 9 0 788 | 23:5_16 9 0 789 | 29:1_1_9 9 0 790 | 29:1_5_16 9 0 791 | 49:2_7 9 0 792 | 8:2_9 8 0 793 | 12:2_18_18 8 0 794 | 13:2_4 8 0 795 | 14:3_16 8 0 796 | 15:1_1_4 8 0 797 | 15:3_5_19 8 0 798 | 15:7_12_16 8 0 799 | 18:1_1_5_8 8 0 800 | 18:1_11_11_16 8 0 801 | 18:2_11_11_11 8 0 802 | 23:9_18 8 0 803 | 32:1_1_1_5 8 0 804 | 51:2_7_12 8 0 805 | 51:2_11_12 8 0 806 | 12:1_3_7 7 0 807 | 15:1_3_12 7 0 808 | 15:1_7_19 7 0 809 | 15:2_5_8 7 0 810 | 15:2_14_20 7 0 811 | 18:1_1_2_6 7 0 812 | 18:1_2_5_7 7 0 813 | 18:5_14_14_14 7 0 814 | 27:8_12 7 0 815 | 29:1_5_7 7 0 816 | 29:1_7_11 7 0 817 | 29:2_16_18 7 0 818 | 51:5_5_12 7 0 819 | 8:14_18 6 0 820 | 12:5_7_16 6 0 821 | 12:11_11_11 6 0 822 | 15:1_2_19 6 0 823 | 15:1_6_12 6 0 824 | 15:5_7_18 6 0 825 | 16:2_6_8 6 0 826 | 18:1_1_3_16 6 0 827 | 18:2_2_2_3 6 0 828 | 18:8_14_14_14 6 0 829 | 29:1_2_9 6 0 830 | 29:1_7_7 6 0 831 | 35:2_9 6 0 832 | 51:1_2_7 6 0 833 | 12:1_3_16 5 0 834 | 14:3_10 5 0 835 | 15:2_2_6 5 0 836 | 15:7_11_11 5 0 837 | 16:2_7_13 5 0 838 | 16:8_9_17 5 0 839 | 18:1_2_2_9 5 0 840 | 18:2_2_5_18 5 0 841 | 18:2_2_7_11 5 0 842 | 18:2_2_7_16 5 0 843 | 18:2_5_5_16 5 0 844 | 22:7 5 0 845 | 23:5_11 5 0 846 | 35:5_5 5 0 847 | 47:5 5 0 848 | 49:2_11 5 0 849 | 51:1_11_12 5 0 850 | 52:1_12_12_16 5 0 851 | 8:3_7 4 0 852 | 8:6_16 4 0 853 | 12:2_2_9 4 0 854 | 12:2_7_8 4 0 855 | 12:2_11_14 4 0 856 | 12:5_7_7 4 0 857 | 12:7_16_16 4 0 858 | 14:3_18 4 0 859 | 15:1_1_19 4 0 860 | 15:2_8_13 4 0 861 | 15:2_11_19 4 0 862 | 15:2_14_16 4 0 863 | 15:2_16_20 4 0 864 | 15:3_7_16 4 0 865 | 16:8_17_21 4 0 866 | 18:1_2_7_16 4 0 867 | 18:1_2_8_8 4 0 868 | 18:1_3_3_11 4 0 869 | 18:1_3_8_11 4 0 870 | 18:2_2_8_14 4 0 871 | 18:2_2_11_18 4 0 872 | 18:2_3_3_5 4 0 873 | 27:1_12 4 0 874 | 29:1_2_6 4 0 875 | 29:1_11_11 4 0 876 | 29:2_2_3 4 0 877 | 29:5_18_18 4 0 878 | 32:1_1_1_11 4 0 879 | 32:1_2_2_12 4 0 880 | 35:1_6 4 0 881 | 36:2_13 4 0 882 | 50:17_17 4 0 883 | 52:3_5_12_12 4 0 884 | 8:5_14 3 0 885 | 8:6_18 3 0 886 | 8:9_16 3 0 887 | 8:14_16 3 0 888 | 10:3_12 3 0 889 | 12:1_5_9 3 0 890 | 12:1_16_18 3 0 891 | 12:2_14_18 3 0 892 | 12:3_3_11 3 0 893 | 12:3_11_11 3 0 894 | 14:3_14 3 0 895 | 14:10_11 3 0 896 | 15:2_3_11 3 0 897 | 15:2_3_16 3 0 898 | 15:2_8_11 3 0 899 | 15:2_13_19 3 0 900 | 15:6_11_12 3 0 901 | 15:7_7_7 3 0 902 | 15:7_7_11 3 0 903 | 15:11_12_18 3 0 904 | 17:8_17_17 3 0 905 | 18:1_1_5_18 3 0 906 | 18:1_1_9_9 3 0 907 | 18:1_1_11_18 3 0 908 | 18:1_2_3_3 3 0 909 | 18:1_2_5_14 3 0 910 | 18:1_2_6_11 3 0 911 | 18:1_2_18_18 3 0 912 | 18:1_5_14_14 3 0 913 | 18:2_2_3_3 3 0 914 | 18:2_2_8_11 3 0 915 | 20:6 3 0 916 | 23:3_18 3 0 917 | 27:2_6 3 0 918 | 27:2_14 3 0 919 | 27:2_19 3 0 920 | 27:2_20 3 0 921 | 27:7_8 3 0 922 | 29:1_1_20 3 0 923 | 29:1_16_16 3 0 924 | 29:2_7_11 3 0 925 | 30:8_12_12 3 0 926 | 31:2_8_16 3 0 927 | 31:5_8_8 3 0 928 | 32:1_1_2_7 3 0 929 | 33:16 3 0 930 | 53:3 3 0 931 | 53:7 3 0 932 | 69:5 3 0 933 | 69:7 3 0 934 | 74:5 3 0 935 | 8:6_14 2 0 936 | 8:7_18 2 0 937 | 8:9_18 2 0 938 | 10:4_16 2 0 939 | 10:6_7 2 0 940 | 12:1_5_14 2 0 941 | 12:1_7_8 2 0 942 | 12:1_7_9 2 0 943 | 12:2_3_8 2 0 944 | 12:2_3_14 2 0 945 | 12:2_6_11 2 0 946 | 12:2_8_8 2 0 947 | 12:2_8_14 2 0 948 | 12:2_8_18 2 0 949 | 12:3_3_5 2 0 950 | 12:5_5_8 2 0 951 | 12:5_7_8 2 0 952 | 12:7_7_7 2 0 953 | 12:7_8_16 2 0 954 | 12:8_11_11 2 0 955 | 13:2_7 2 0 956 | 13:7_7 2 0 957 | 14:3_20 2 0 958 | 15:1_2_6 2 0 959 | 15:1_11_19 2 0 960 | 15:2_2_4 2 0 961 | 15:2_5_20 2 0 962 | 15:2_6_11 2 0 963 | 15:2_8_8 2 0 964 | 15:2_8_16 2 0 965 | 15:2_9_9 2 0 966 | 15:2_11_14 2 0 967 | 15:12_16_16 2 0 968 | 16:6_8_17 2 0 969 | 18:1_1_5_6 2 0 970 | 18:1_1_7_8 2 0 971 | 18:1_3_11_11 2 0 972 | 18:2_2_2_18 2 0 973 | 18:2_2_5_7 2 0 974 | 18:2_5_5_5 2 0 975 | 18:2_5_11_11 2 0 976 | 18:2_5_11_16 2 0 977 | 18:2_5_14_14 2 0 978 | 18:2_8_14_14 2 0 979 | 18:3_14_14_14 2 0 980 | 18:5_5_11_11 2 0 981 | 22:18 2 0 982 | 23:1_6 2 0 983 | 23:2_14 2 0 984 | 23:5_9 2 0 985 | 23:7_11 2 0 986 | 29:2_5_11 2 0 987 | 31:2_8_9 2 0 988 | 31:2_13_18 2 0 989 | 31:8_8_18 2 0 990 | 32:1_2_11_12 2 0 991 | 35:1_14 2 0 992 | 35:2_6 2 0 993 | 35:5_7 2 0 994 | 37:11 2 0 995 | 49:1_11 2 0 996 | 49:16_18 2 0 997 | 51:2_12_16 2 0 998 | 51:7_11_11 2 0 999 | 52:2_12_12_18 2 0 1000 | 69:3 2 0 1001 | 8:7_8 1 0 1002 | 8:7_11 1 0 1003 | 8:7_16 1 0 1004 | 8:8_14 1 0 1005 | 10:3_7 1 0 1006 | 10:4_5 1 0 1007 | 10:4_20 1 0 1008 | 10:7_12 1 0 1009 | 12:1_3_18 1 0 1010 | 12:1_6_16 1 0 1011 | 12:1_8_14 1 0 1012 | 12:1_14_16 1 0 1013 | 12:1_18_18 1 0 1014 | 12:2_3_3 1 0 1015 | 12:2_3_18 1 0 1016 | 12:2_5_6 1 0 1017 | 12:2_5_14 1 0 1018 | 12:3_3_7 1 0 1019 | 12:3_5_16 1 0 1020 | 12:3_7_7 1 0 1021 | 12:3_8_14 1 0 1022 | 12:3_16_16 1 0 1023 | 12:5_5_9 1 0 1024 | 12:5_7_14 1 0 1025 | 12:5_11_16 1 0 1026 | 12:7_7_16 1 0 1027 | 14:3_7 1 0 1028 | 15:1_6_7 1 0 1029 | 15:1_12_18 1 0 1030 | 15:2_3_14 1 0 1031 | 15:2_4_7 1 0 1032 | 15:2_4_13 1 0 1033 | 15:2_4_18 1 0 1034 | 15:2_5_9 1 0 1035 | 15:2_7_20 1 0 1036 | 15:2_9_11 1 0 1037 | 15:2_9_14 1 0 1038 | 15:2_9_20 1 0 1039 | 15:2_17_17 1 0 1040 | 15:2_18_18 1 0 1041 | 15:3_7_11 1 0 1042 | 15:3_8_12 1 0 1043 | 15:5_6_7 1 0 1044 | 15:5_6_12 1 0 1045 | 15:6_7_12 1 0 1046 | 15:7_7_8 1 0 1047 | 15:7_12_18 1 0 1048 | 15:7_13_13 1 0 1049 | 15:7_16_19 1 0 1050 | 15:8_12_16 1 0 1051 | 15:12_17_17 1 0 1052 | 15:17_17_19 1 0 1053 | 16:2_6_17 1 0 1054 | 18:1_1_3_6 1 0 1055 | 18:1_1_8_8 1 0 1056 | 18:1_1_9_11 1 0 1057 | 18:1_2_2_6 1 0 1058 | 18:1_2_3_8 1 0 1059 | 18:1_2_3_14 1 0 1060 | 18:1_2_3_18 1 0 1061 | 18:1_2_8_16 1 0 1062 | 18:1_3_16_16 1 0 1063 | 18:1_5_5_5 1 0 1064 | 18:1_5_5_8 1 0 1065 | 18:1_5_8_11 1 0 1066 | 18:1_8_11_11 1 0 1067 | 18:1_8_14_14 1 0 1068 | 18:2_2_3_5 1 0 1069 | 18:2_2_8_8 1 0 1070 | 18:2_2_11_14 1 0 1071 | 18:2_3_14_14 1 0 1072 | 18:2_7_7_8 1 0 1073 | 18:2_11_11_16 1 0 1074 | 18:3_3_16_16 1 0 1075 | 18:3_16_16_16 1 0 1076 | 18:5_5_5_8 1 0 1077 | 18:5_5_5_11 1 0 1078 | 18:5_5_8_16 1 0 1079 | 18:5_5_16_16 1 0 1080 | 18:11_11_11_11 1 0 1081 | 20:3 1 0 1082 | 23:5_8 1 0 1083 | 23:14_18 1 0 1084 | 27:1_18 1 0 1085 | 27:3_7 1 0 1086 | 27:3_18 1 0 1087 | 27:4_16 1 0 1088 | 27:4_18 1 0 1089 | 27:7_11 1 0 1090 | 27:7_18 1 0 1091 | 29:1_1_21 1 0 1092 | 29:1_5_11 1 0 1093 | 29:1_8_18 1 0 1094 | 29:2_3_11 1 0 1095 | 29:2_21_21 1 0 1096 | 29:5_5_11 1 0 1097 | 29:5_7_11 1 0 1098 | 29:9_9_18 1 0 1099 | 29:14_14_18 1 0 1100 | 31:2_2_9 1 0 1101 | 31:2_2_14 1 0 1102 | 31:2_2_20 1 0 1103 | 31:2_3_8 1 0 1104 | 31:2_5_13 1 0 1105 | 31:2_8_14 1 0 1106 | 31:2_11_13 1 0 1107 | 31:2_12_17 1 0 1108 | 31:2_16_17 1 0 1109 | 32:1_1_2_5 1 0 1110 | 33:3 1 0 1111 | 35:1_3 1 0 1112 | 35:2_14 1 0 1113 | 35:7_16 1 0 1114 | 37:8 1 0 1115 | 47:16 1 0 1116 | 49:2_14 1 0 1117 | 49:2_21 1 0 1118 | 49:5_7 1 0 1119 | 51:1_2_2 1 0 1120 | 51:1_5_7 1 0 1121 | 51:1_8_12 1 0 1122 | 51:2_2_2 1 0 1123 | 51:2_8_12 1 0 1124 | 51:5_12_16 1 0 1125 | 52:5_12_12_16 1 0 1126 | 52:8_8_12_12 1 0 1127 | 52:8_11_12_12 1 0 1128 | 53:8 1 0 1129 | 53:11 1 0 1130 | 53:16 1 0 1131 | 69:8 1 0 1132 | 74:16 1 0 1133 | --------------------------------------------------------------------------------