├── .idea ├── .gitignore ├── libraries │ └── R_User_Library.xml ├── misc.xml ├── modules.xml ├── other.xml ├── tutorials.iml └── vcs.xml ├── 2D_conformation_sampling ├── Network_Interpretation.png └── alternative_2D_projections.png ├── Chemception_2D └── chemception2D.py ├── Chemical_Shift_back_calculation_from_MD ├── README.md └── calc_shifts_from_xtc.pl ├── Electrostatic_Potential_Globular_Protein ├── 3K5C-BACE.pdb ├── 3K5C-BACE_1.mol ├── BACE_pocket_EP2.png └── README.md ├── LICENSE ├── Morphing_Conformations └── README.md ├── Multilayer_Perceptron_Keras.tar.gz ├── Multilayer_Perceptron_Keras ├── .ipynb_checkpoints │ ├── MLP_Keras-checkpoint.ipynb │ └── qsar-rdkit-cdk2-checkpoint.ipynb ├── MLP_Keras.ipynb └── data │ ├── cdk1.sdf │ ├── cdk1_pK.dat │ ├── cdk2.sdf │ ├── cdk2_large.sdf │ ├── cdk2_pK.dat │ └── logBB.sdf ├── README.md ├── ROC_curves ├── .Rhistory ├── README.Rmd ├── README.html ├── README.md ├── data │ ├── .Rhistory │ ├── ROC_curves.png │ ├── SF1.scores │ ├── SF2.scores │ ├── SF3.scores │ ├── SF4.scores │ ├── SF5.scores │ └── activities └── function_definitions.r ├── compare_atomic_properties ├── README.md ├── data │ ├── compound_stereo1_ion1_tau1.COSMO_PM6.mol2 │ └── compound_stereo1_ion1_tau1.COSMO_PM6.sdf ├── mean_pose.png ├── pose1.png ├── pose2.marked.png ├── pose2.png └── std_pose.png ├── create_alternative_protonations ├── README.md ├── images │ └── 1a30_all_protonations.png └── protonate_receptor.py ├── dockprep ├── README.md ├── dockprep.py └── example_files │ ├── 3K5C-BACE.pdb │ ├── 3K5C-BACE_1.mol │ ├── 3K5C-BACE_4.mol │ ├── 3K5C-BACE_5.mol │ ├── 3K5C-BACE_6.mol │ ├── 3K5C-BACE_7.mol │ └── 3K5C-BACE_8.mol ├── mod_frcmod └── mod_frcmod.py ├── show_ligand_interactions ├── README.md ├── find_ligands_interacting_with_residueset.py ├── image_gallery │ ├── BACE_104_liginter.jpg │ ├── CatS_335_liginter.jpg │ └── Thrombin_2zc9_liginter.jpg └── show_ligand_interactions.py ├── visualize_ECFP_fragments ├── fragment_molecules.py └── visualize_ECFP_fragments.ipynb └── visualize_ligand_properties └── README.md /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml -------------------------------------------------------------------------------- /.idea/libraries/R_User_Library.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/other.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | -------------------------------------------------------------------------------- /.idea/tutorials.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 13 | 14 | 16 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /2D_conformation_sampling/Network_Interpretation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/2D_conformation_sampling/Network_Interpretation.png -------------------------------------------------------------------------------- /2D_conformation_sampling/alternative_2D_projections.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/2D_conformation_sampling/alternative_2D_projections.png -------------------------------------------------------------------------------- /Chemception_2D/chemception2D.py: -------------------------------------------------------------------------------- 1 | import rdkit 2 | from rdkit import Chem 3 | from rdkit.Chem import AllChem 4 | import pandas as pd 5 | import numpy as np 6 | import matplotlib 7 | matplotlib.use('Qt4Agg') # temporary solution to avoid "ImportError: No module named PyQt5" which is mainly for Python 3 8 | import matplotlib.pyplot as plt 9 | print "RDKit: %s"%rdkit.__version__ 10 | 11 | 12 | import keras 13 | from keras.models import Sequential, Model 14 | from keras.layers import Conv2D, MaxPooling2D, Input, GlobalMaxPooling2D 15 | from keras.layers.core import Dense, Dropout, Activation, Flatten 16 | from keras.optimizers import Adam 17 | from keras.preprocessing.image import ImageDataGenerator 18 | from keras.callbacks import ReduceLROnPlateau 19 | print("Keras: %s"%keras.__version__) 20 | 21 | 22 | data = pd.read_hdf("Sutherland.h5","table") 23 | data["mol"] = data["smiles"].apply(Chem.MolFromSmiles) 24 | 25 | 26 | def chemcepterize_mol(mol, embed=20.0, res=0.5): 27 | dims = int(embed*2/res) 28 | cmol = Chem.Mol(mol.ToBinary()) 29 | cmol.ComputeGasteigerCharges() 30 | AllChem.Compute2DCoords(cmol) 31 | coords = cmol.GetConformer(0).GetPositions() 32 | vect = np.zeros((dims,dims,4)) 33 | #Bonds first 34 | for i,bond in enumerate(mol.GetBonds()): 35 | bondorder = bond.GetBondTypeAsDouble() 36 | bidx = bond.GetBeginAtomIdx() 37 | eidx = bond.GetEndAtomIdx() 38 | bcoords = coords[bidx] 39 | ecoords = coords[eidx] 40 | frac = np.linspace(0,1,int(1/res*2)) # 41 | for f in frac: 42 | c = (f*bcoords + (1-f)*ecoords) 43 | idx = int(round((c[0] + embed)/res)) 44 | idy = int(round((c[1]+ embed)/res)) 45 | #Save in the vector first channel 46 | vect[ idx , idy ,0] = bondorder 47 | #Atom Layers 48 | for i,atom in enumerate(cmol.GetAtoms()): 49 | idx = int(round((coords[i][0] + embed)/res)) 50 | idy = int(round((coords[i][1]+ embed)/res)) 51 | #Atomic number 52 | vect[ idx , idy, 1] = atom.GetAtomicNum() 53 | #Gasteiger Charges 54 | charge = atom.GetProp("_GasteigerCharge") 55 | vect[ idx , idy, 3] = charge 56 | #Hybridization 57 | hyptype = atom.GetHybridization().real 58 | vect[ idx , idy, 2] = hyptype 59 | return vect 60 | 61 | 62 | # To better understand what the code has done, lets try to “chemcepterize” a molecule and show it as an image. 63 | # The embedding and the resolution are set lower than they will be for the final dataset. Matplotlib only supports 64 | # RGB, so only the first three channels are used. 65 | mol = data["mol"][0] 66 | v = chemcepterize_mol(mol, embed=10, res=0.2) 67 | print(v.shape) 68 | plt.imshow(v[:,:,:3]) 69 | 70 | 71 | # Next step is to “chemcepterize” the entire collection of RDKit molecules and add a new column with the “images” to the dataframe 72 | def vectorize(mol): 73 | return chemcepterize_mol(mol, embed=12) 74 | data["molimage"] = data["mol"].apply(vectorize) 75 | 76 | # The dataset already had a split value indicating if it should be train or test set. The shape of the final numpy arrays are 77 | # (samples, height, width, channels) 78 | X_train = np.array(list(data["molimage"][data["split"]==1])) 79 | X_test = np.array(list(data["molimage"][data["split"]==0])) 80 | print(X_train.shape) 81 | print(X_test.shape) 82 | 83 | 84 | # We also need to the prepare the values to predict. Here it is the IC50 for some DHFR inhibitors. The data is converted to log space and 85 | # the robust scaler from scikit-learn is used to scale the data to somewhat between -1 and 1 (neural networks like this range and it makes 86 | # training somewhat easier). 87 | assay = "PC_uM_value" 88 | y_train = data[assay][data["split"]==1].values.reshape(-1,1) 89 | y_test = data[assay][data["split"]==0].values.reshape(-1,1) 90 | from sklearn.preprocessing import RobustScaler 91 | rbs = RobustScaler(with_centering=True, with_scaling=True, quantile_range=(5.0, 95.0), copy=True) 92 | y_train_s = rbs.fit_transform(np.log(y_train)) 93 | y_test_s = rbs.transform(np.log(y_test)) 94 | h = plt.hist(y_train_s, bins=20) 95 | 96 | 97 | input_shape = X_train.shape[1:] 98 | print input_shape 99 | 100 | 101 | def Inception0(input): 102 | tower_1 = Conv2D(16, (1, 1), padding='same', activation='relu')(input) 103 | tower_1 = Conv2D(16, (3, 3), padding='same', activation='relu')(tower_1) 104 | tower_2 = Conv2D(16, (1, 1), padding='same', activation='relu')(input) 105 | tower_2 = Conv2D(16, (5, 5), padding='same', activation='relu')(tower_2) 106 | tower_3 = Conv2D(16, (1, 1), padding='same', activation='relu')(input) 107 | output = keras.layers.concatenate([tower_1, tower_2, tower_3], axis=-1) 108 | return output 109 | 110 | 111 | def Inception(input): 112 | tower_1 = Conv2D(16, (1, 1), padding='same', activation='relu')(input) 113 | tower_1 = Conv2D(16, (3, 3), padding='same', activation='relu')(tower_1) 114 | tower_2 = Conv2D(16, (1, 1), padding='same', activation='relu')(input) 115 | tower_2 = Conv2D(16, (5, 5), padding='same', activation='relu')(tower_2) 116 | tower_3 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(input) 117 | tower_3 = Conv2D(16, (1, 1), padding='same', activation='relu')(tower_3) 118 | output = keras.layers.concatenate([tower_1, tower_2, tower_3], axis=-1) 119 | return output 120 | 121 | 122 | input_img = Input(shape=input_shape) 123 | x = Inception0(input_img) 124 | x = Inception(x) 125 | x = Inception(x) 126 | od=int(x.shape[1]) 127 | x = MaxPooling2D(pool_size=(od,od), strides=(1,1))(x) 128 | x = Flatten()(x) 129 | x = Dense(100, activation='relu')(x) 130 | output = Dense(1, activation='linear')(x) 131 | model = Model(inputs=input_img, outputs=output) 132 | print model.summary() 133 | 134 | 135 | # For the optimization I use the Adam optimizer and the mean absolute error as a loss function. 136 | optimizer = Adam(lr=0.00025) 137 | model.compile(loss="mae", optimizer=optimizer) 138 | 139 | 140 | # The next part is crucial to avoid overfitting. Here the ImageDataGenerator object is used to perform random rotations and flips 141 | # of the images before the training as a way of augmenting the training dataset. By doing this, the network will learn how to handle 142 | # rotations and seeing the features in different orientations will help the model generalize better. Not including this will lead to 143 | # completely overfit models. We have not encoded stereochemical information in the images, otherwise the flipping should be done by 144 | # other means. The training set is concatenated to 50 times the length to have some sensible size epochs. 145 | 146 | from image import ImageDataGenerator 147 | generator = ImageDataGenerator(rotation_range=180, 148 | width_shift_range=0.1,height_shift_range=0.1, 149 | fill_mode="constant",cval = 0, 150 | horizontal_flip=True, vertical_flip=True,data_format='channels_last', 151 | ) 152 | #Concatenate for longer epochs 153 | Xt = np.concatenate([X_train]*50, axis=0) 154 | yt = np.concatenate([y_train_s]*50, axis=0) 155 | batch_size=128 156 | g = generator.flow(Xt, yt, batch_size=batch_size, shuffle=True) 157 | steps_per_epoch = 10000/batch_size 158 | 159 | 160 | 161 | # Now for the interesting part: Training. To lower the learning rate once the validation loss starts to plateau off I use 162 | # the ReduceLROnPlateau callback avaible as part of Keras. 163 | reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5,patience=10, min_lr=1e-6, verbose=1) 164 | history = model.fit_generator(g, 165 | steps_per_epoch=len(Xt)//batch_size, 166 | epochs=150, 167 | validation_data=(X_test,y_test_s), 168 | callbacks=[reduce_lr]) 169 | 170 | # Models can be saved and loaded. The history objects history dictionary is pickled. 171 | name = "Chemception_std_notebook_demo" 172 | model.save("%s.h5"%name) 173 | hist = history.history 174 | import pickle 175 | pickle.dump(hist, file("%s_history.pickle"%name,"w")) 176 | #from keras.model import load_model 177 | #model = load_model("%s.h5"%name) 178 | 179 | 180 | # The convergence of the training can be judged from a plot of the learning process. Somewhat unusual, when there's 181 | # no regularization: The validation loss drops before the loss. The validation set is not augmented and thus consists of 182 | # some “perfect” pictures, whereas maybe it may take the network some longer to deal with all the rotations, which also 183 | # introduces some pixel artifacts due to the low resolution. 184 | for label in ['val_loss','loss']: 185 | plt.plot(hist[label], label = label) 186 | plt.legend() 187 | plt.yscale("log") 188 | plt.xlabel("Epochs") 189 | plt.ylabel("Loss/lr") 190 | 191 | # Plotting and Evaluating the Performance 192 | y_pred_t = rbs.inverse_transform(model.predict(X_train)) 193 | y_pred = rbs.inverse_transform(model.predict(X_test)) 194 | plt.scatter(np.log(y_train), y_pred_t, label="Train") 195 | plt.scatter(np.log(y_test), y_pred, label="Test") 196 | plt.xlabel("log(PC_uM)") 197 | plt.ylabel("predicted") 198 | plt.plot([-10,6],[-10,6]) 199 | plt.legend() 200 | 201 | corr2 = np.corrcoef(np.log(y_test).reshape(1,-1), y_pred.reshape(1,-1))[0][1]**2 202 | rmse = np.mean((np.log(y_test) - y_pred)**2)**0.5 203 | print("R2 : %0.2F"%corr2) 204 | print("RMSE : %0.2F"%rmse) 205 | 206 | 207 | # Visualizing the Layers 208 | # It can be interesting to try and understand how the model "sees" the molecules. For this I’ll take an example molecule 209 | # and plot some of the outputs from the different layers. I’ve taken the compound with the lowest IC50, number 143 in the dataset. 210 | molnum = 143 211 | molimage = np.array(list(data["molimage"][molnum:molnum+1])) 212 | mol = data["mol"][molnum] 213 | 214 | # The molecule looks like this 215 | from rdkit.Chem import Draw 216 | Draw.MolToImage(mol) 217 | 218 | # And has this “chemcepterized” image as shown below 219 | plt.imshow(molimage[0,:,:,:3]) 220 | 221 | # The first example is the third layer, which is the 1,1 convolution which feeds the 3,3 convolutional layer in tower 2. 222 | layer1_model = Model(inputs=model.input, 223 | outputs=model.layers[2].output) 224 | kernels1 = layer1_model.predict(molimage)[0] 225 | def plot_kernels(kernels): 226 | fig, axes = plt.subplots(2,3, figsize=(12,8)) 227 | for i,ax in enumerate(axes.flatten()): 228 | ax.matshow(kernels[:,:,i]) 229 | ax.set_title("Kernel %s"%i) 230 | plot_kernels(kernels1) 231 | 232 | # Lets go deeper... 233 | for layer in [7,13,15,19,20]: 234 | print("Layer %i"%layer) 235 | plot_kernels(Model(inputs=model.input,outputs=model.layers[layer].output).predict(molimage)[0]) 236 | plt.show() 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | -------------------------------------------------------------------------------- /Chemical_Shift_back_calculation_from_MD/README.md: -------------------------------------------------------------------------------- 1 | Requirements: 2 | * Perl 3 | * GROMACS Tools 4 | * Sparta+ 5 | 6 | Usage: calc_shifts_from_xtc.pl 7 | 8 | Input: as described above. You can find the start and end time of your trajectory by doing: gmx check -f . 9 | 10 | Output: 'SHIFTS' file contains chemical shifts of all frames, 'consensus_chemical_shifts.tab' file contains the average chemical shifts of each atom. 11 | 12 | -------------------------------------------------------------------------------- /Chemical_Shift_back_calculation_from_MD/calc_shifts_from_xtc.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | (@ARGV == 5) or die "Usage: calc_shifts_from_xtc.pl \n 4 | Input: as described above. You can find the start and end time of your trajectory by doing: gmx check -f .\n 5 | Output: 'SHIFTS' file contains chemical shifts of all frames, 'consensus_chemical_shifts.tab' file contains the average chemical shifts of each atom.\n"; 6 | 7 | $pdbfile=$ARGV[0]; 8 | $trj=$ARGV[1]; 9 | $start=$ARGV[2]; 10 | $end=$ARGV[3]; 11 | $timestep=$ARGV[4]; 12 | 13 | # 14 | # How many shifts we will be collecting ? 15 | # 16 | 17 | `echo 0 | gmx trjconv -s $pdbfile -f $trj -dump 1 -o ps0.pdb`; 18 | `sparta+ -in ps0.pdb > /dev/null 2>&1`; 19 | `/bin/rm -rf ps*.pdb`; 20 | 21 | # Find the line where the stucture description and CS predictions start 22 | open ( IN, "pred.tab" ) or die "Can not open pred.tab. Usage: calc_shifts \n"; 23 | while ( $line = ) 24 | { 25 | if ( $line =~ /^FORMAT/ ) 26 | { 27 | last; 28 | } 29 | } 30 | 31 | $line = ; 32 | $tot = 0; # the total number of atoms with CS prediction 33 | while ( $line = ) 34 | { 35 | $ids[ $tot ] = substr( $line, 0, 14 ); 36 | $tot++; 37 | } 38 | 39 | close( IN ); 40 | 41 | `/bin/rm -rf *.tab`; 42 | 43 | if ( $tot < 1 ) 44 | { 45 | print "Too few atoms for calculating shifts. Something is wrong. Bye.\n"; 46 | exit; 47 | } 48 | 49 | 50 | print "Will be collecting data for $tot atoms. Starting ...\n"; 51 | 52 | 53 | # 54 | # Will do it in sets of 400 structures ... 55 | # 56 | 57 | $first = int($start); 58 | 59 | `/bin/rm SHIFTS`; 60 | 61 | while( $first < int($end) ) { 62 | 63 | $last = $first + 400 * $timestep -1; 64 | printf("Now processing set starting at picosecond %8d and ending at picosecond %8d\n", $first, $last ); 65 | 66 | #for ($ps = $first; $ps <= $last; $ps += $timestep) { # not all ps are present in the trajectory (timestep for saving coordinates in MD was 2ps) 67 | # #print "DEBUG: echo 0 | gmx trjconv -dump $ps -s $pdbfile -f $trj -o ps${ps}.pdb > /dev/null 2>&1"; 68 | # `echo 0 | gmx trjconv -dump $ps -s $pdbfile -f $trj -o ps${ps}.pdb > /dev/null 2>&1`; 69 | #} 70 | 71 | # real 17m26.892s 72 | # user 17m7.694s 73 | # sys 0m15.413s 74 | `echo 0 | gmx trjconv -b $first -e $last -sep -s $pdbfile -f $trj -o ps.pdb > /dev/null 2>&1`; 75 | 76 | `sparta+ -in ps*.pdb > /dev/null 2>&1`; 77 | 78 | `/bin/rm -rf ps*.pdb *_struct.tab`; 79 | 80 | @files = glob("ps*.tab"); 81 | 82 | 83 | if ( @files == 0 ) 84 | { 85 | last; 86 | } 87 | 88 | 89 | @files = map {s/_pred\.tab//g; $_; } @files; 90 | @files = map {s/^ps//g; $_; } @files; 91 | my @ordered_files = sort {$a <=> $b} @files; 92 | 93 | 94 | foreach $file ( @ordered_files ) 95 | { 96 | #print "DEBUG: appending shifts from file ps${file}_pred.tab \n"; 97 | `tail -$tot ps${file}_pred.tab | awk '{printf "%8.3f ", \$5}' >> SHIFTS`; # append all chemical shifts of the current frame to a single line of SHIFTS file 98 | `echo >> SHIFTS`; # change line 99 | } 100 | 101 | `/bin/rm -rf ps*.tab`; # remove the frames that have been processed 102 | 103 | $first += 400 * $timestep; 104 | 105 | } 106 | 107 | print "\n\n"; 108 | 109 | # 110 | # Calculate means + sigmas using SHIFTS file 111 | # 112 | open ( IN, "SHIFTS" ) or die "Can not open SHIFTS ??? How did this happen ???\n"; 113 | open ( OUT, '>>consensus_chemical_shifts.tab'); 114 | 115 | for ( $i=0 ; $i < $tot ; $i++ ) 116 | { 117 | $mean= 0.0; 118 | $nof_lines = 0; 119 | $std = 0.0; 120 | while ( $line = ) 121 | { 122 | @data = split( ' ', $line ); 123 | 124 | $nof_lines++; 125 | $delta = $data[ $i ] - $mean; 126 | $mean += $delta / $nof_lines; 127 | $std += $delta * ($data[ $i ] - $mean); 128 | } 129 | 130 | printf "%s %8.4f %8.4f\n", $ids[ $i ], $mean, sqrt( $std / ($nof_lines -1)); 131 | printf OUT "%s %8.4f %8.4f\n", $ids[ $i ], $mean, sqrt( $std / ($nof_lines -1)); 132 | seek( IN, 0, 0 ); 133 | } 134 | 135 | close( IN ); 136 | close( OUT ); 137 | 138 | print "\nAll done.\n\n"; 139 | -------------------------------------------------------------------------------- /Electrostatic_Potential_Globular_Protein/3K5C-BACE_1.mol: -------------------------------------------------------------------------------- 1 | REMARK score -90.21 2 | LCcorina 10041815583D 1 1.00000 0.00000 0 3 | CORINA 4.00 0026 26.04.2017 4 | 87 88 0 0 0 0 999 V2000 5 | 25.9314 5.1125 17.3673 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 26.3958 5.7692 18.5087 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 27.6294 6.4115 18.5006 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 26.6868 5.1203 16.1800 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 27.9071 5.7803 16.1600 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 28.3567 6.4294 17.3141 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 28.7339 5.7745 14.9144 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 28.2672 5.9839 13.7999 O 0 0 0 0 0 0 0 0 0 0 0 0 13 | 30.0161 5.5308 15.1565 N 0 0 0 0 0 0 0 0 0 0 0 0 14 | 31.1655 5.7229 14.2694 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | 32.3013 6.5543 14.9475 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 31.7701 7.7059 15.6807 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 32.5417 8.1089 16.8492 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 31.7382 8.9416 17.8567 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 31.4978 8.5096 19.2845 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | 30.6304 9.5192 20.0787 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 29.4018 8.8984 20.7758 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | 28.4435 8.3191 19.8155 N 0 0 0 0 0 0 0 0 0 0 0 0 23 | 28.2301 6.9783 19.7605 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 28.6153 6.2012 20.6343 O 0 0 0 0 0 0 0 0 0 0 0 0 25 | 31.6361 8.9012 14.7208 C 0 0 0 0 0 0 0 0 0 0 0 0 26 | 27.5563 9.2855 19.1471 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 26.1995 9.3897 19.8666 C 0 0 0 0 0 0 0 0 0 0 0 0 28 | 24.7362 4.4553 17.3963 O 0 0 0 0 0 0 0 0 0 0 0 0 29 | 24.1183 4.4752 16.0315 C 0 0 0 0 0 0 0 0 0 0 0 0 30 | 22.5931 4.3157 16.1531 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | 31.6697 4.3250 13.9202 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | 31.9299 3.5877 15.1249 O 0 0 0 0 0 0 0 0 0 0 0 0 33 | 30.5990 3.5391 13.1173 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | 31.2898 2.7953 11.9565 C 0 0 0 0 0 0 0 0 0 0 0 0 35 | 30.2806 1.9129 11.2208 C 0 0 0 0 0 0 0 0 0 0 0 0 36 | 31.8599 3.7873 10.9327 C 0 0 0 0 0 0 0 0 0 0 0 0 37 | 31.1890 4.7139 10.4842 O 0 0 0 0 0 0 0 0 0 0 0 0 38 | 33.0930 3.5508 10.5117 N 0 0 0 0 0 0 0 0 0 0 0 0 39 | 33.6869 4.3522 9.4692 C 0 0 0 0 0 0 0 0 0 0 0 0 40 | 34.4311 3.4314 8.5130 C 0 0 0 0 0 0 0 0 0 0 0 0 41 | 35.0024 4.1969 7.2880 C 0 0 0 0 0 0 0 0 0 0 0 0 42 | 36.2636 4.9971 7.6826 C 0 0 0 0 0 0 0 0 0 0 0 0 43 | 25.7958 5.7810 19.4067 H 0 0 0 0 0 0 0 0 0 0 0 0 44 | 26.3148 4.6169 15.2999 H 0 0 0 0 0 0 0 0 0 0 0 0 45 | 29.2986 6.9557 17.2660 H 0 0 0 0 0 0 0 0 0 0 0 0 46 | 30.1652 5.1352 16.1720 H 0 0 0 0 0 0 0 0 0 0 0 0 47 | 30.8093 6.2738 13.3865 H 0 0 0 0 0 0 0 0 0 0 0 0 48 | 32.8490 5.9165 15.6569 H 0 0 0 0 0 0 0 0 0 0 0 0 49 | 32.9940 6.9252 14.1777 H 0 0 0 0 0 0 0 0 0 0 0 0 50 | 30.8225 7.3438 16.1060 H 0 0 0 0 0 0 0 0 0 0 0 0 51 | 32.9033 7.2138 17.3764 H 0 0 0 0 0 0 0 0 0 0 0 0 52 | 33.4002 8.7179 16.5295 H 0 0 0 0 0 0 0 0 0 0 0 0 53 | 32.2194 9.9220 17.9885 H 0 0 0 0 0 0 0 0 0 0 0 0 54 | 30.7141 9.0833 17.4812 H 0 0 0 0 0 0 0 0 0 0 0 0 55 | 30.9772 7.5406 19.2916 H 0 0 0 0 0 0 0 0 0 0 0 0 56 | 32.4617 8.4118 19.8053 H 0 0 0 0 0 0 0 0 0 0 0 0 57 | 31.2407 9.9882 20.8646 H 0 0 0 0 0 0 0 0 0 0 0 0 58 | 30.2527 10.2947 19.3962 H 0 0 0 0 0 0 0 0 0 0 0 0 59 | 29.7308 8.0964 21.4529 H 0 0 0 0 0 0 0 0 0 0 0 0 60 | 28.8772 9.6737 21.3534 H 0 0 0 0 0 0 0 0 0 0 0 0 61 | 30.9754 8.6277 13.8849 H 0 0 0 0 0 0 0 0 0 0 0 0 62 | 32.6285 9.1711 14.3307 H 0 0 0 0 0 0 0 0 0 0 0 0 63 | 31.2086 9.7589 15.2607 H 0 0 0 0 0 0 0 0 0 0 0 0 64 | 28.0295 10.2785 19.1472 H 0 0 0 0 0 0 0 0 0 0 0 0 65 | 27.3787 8.9640 18.1102 H 0 0 0 0 0 0 0 0 0 0 0 0 66 | 25.7077 8.4058 19.8661 H 0 0 0 0 0 0 0 0 0 0 0 0 67 | 25.5624 10.1184 19.3440 H 0 0 0 0 0 0 0 0 0 0 0 0 68 | 26.3589 9.7195 20.9038 H 0 0 0 0 0 0 0 0 0 0 0 0 69 | 24.3476 5.4318 15.5392 H 0 0 0 0 0 0 0 0 0 0 0 0 70 | 24.5257 3.6475 15.4324 H 0 0 0 0 0 0 0 0 0 0 0 0 71 | 22.1522 4.2293 15.1491 H 0 0 0 0 0 0 0 0 0 0 0 0 72 | 22.1721 5.1941 16.6641 H 0 0 0 0 0 0 0 0 0 0 0 0 73 | 22.3643 3.4095 16.7332 H 0 0 0 0 0 0 0 0 0 0 0 0 74 | 32.6216 4.4157 13.3765 H 0 0 0 0 0 0 0 0 0 0 0 0 75 | 32.9038 3.6816 15.6277 H 0 0 0 0 0 0 0 0 0 0 0 0 76 | 30.1026 2.8145 13.7796 H 0 0 0 0 0 0 0 0 0 0 0 0 77 | 29.8526 4.2409 12.7169 H 0 0 0 0 0 0 0 0 0 0 0 0 78 | 32.0982 2.1846 12.3849 H 0 0 0 0 0 0 0 0 0 0 0 0 79 | 30.7877 1.3786 10.4039 H 0 0 0 0 0 0 0 0 0 0 0 0 80 | 29.4784 2.5408 10.8059 H 0 0 0 0 0 0 0 0 0 0 0 0 81 | 29.8497 1.1844 11.9235 H 0 0 0 0 0 0 0 0 0 0 0 0 82 | 33.6657 2.7302 10.9684 H 0 0 0 0 0 0 0 0 0 0 0 0 83 | 32.8970 4.8943 8.9284 H 0 0 0 0 0 0 0 0 0 0 0 0 84 | 34.3873 5.0738 9.9152 H 0 0 0 0 0 0 0 0 0 0 0 0 85 | 35.2706 2.9557 9.0410 H 0 0 0 0 0 0 0 0 0 0 0 0 86 | 33.7447 2.6560 8.1419 H 0 0 0 0 0 0 0 0 0 0 0 0 87 | 35.2699 3.4787 6.4991 H 0 0 0 0 0 0 0 0 0 0 0 0 88 | 34.2437 4.8957 6.9058 H 0 0 0 0 0 0 0 0 0 0 0 0 89 | 36.0008 5.7418 8.4482 H 0 0 0 0 0 0 0 0 0 0 0 0 90 | 36.6653 5.5090 6.7957 H 0 0 0 0 0 0 0 0 0 0 0 0 91 | 37.0228 4.3100 8.0846 H 0 0 0 0 0 0 0 0 0 0 0 0 92 | 1 2 2 0 0 0 0 93 | 1 4 1 0 0 0 0 94 | 1 24 1 0 0 0 0 95 | 2 3 1 0 0 0 0 96 | 2 39 1 0 0 0 0 97 | 3 6 2 0 0 0 0 98 | 3 19 1 0 0 0 0 99 | 4 5 2 0 0 0 0 100 | 4 40 1 0 0 0 0 101 | 5 6 1 0 0 0 0 102 | 5 7 1 0 0 0 0 103 | 6 41 1 0 0 0 0 104 | 7 8 2 0 0 0 0 105 | 7 9 1 0 0 0 0 106 | 9 10 1 0 0 0 0 107 | 9 42 1 0 0 0 0 108 | 10 11 1 0 0 0 0 109 | 10 27 1 0 0 0 0 110 | 10 43 1 0 0 0 0 111 | 11 12 1 0 0 0 0 112 | 11 44 1 0 0 0 0 113 | 11 45 1 0 0 0 0 114 | 12 13 1 0 0 0 0 115 | 12 21 1 0 0 0 0 116 | 12 46 1 0 0 0 0 117 | 13 14 1 0 0 0 0 118 | 13 47 1 0 0 0 0 119 | 13 48 1 0 0 0 0 120 | 14 15 1 0 0 0 0 121 | 14 49 1 0 0 0 0 122 | 14 50 1 0 0 0 0 123 | 15 16 1 0 0 0 0 124 | 15 51 1 0 0 0 0 125 | 15 52 1 0 0 0 0 126 | 16 17 1 0 0 0 0 127 | 16 53 1 0 0 0 0 128 | 16 54 1 0 0 0 0 129 | 17 18 1 0 0 0 0 130 | 17 55 1 0 0 0 0 131 | 17 56 1 0 0 0 0 132 | 18 19 1 0 0 0 0 133 | 18 22 1 0 0 0 0 134 | 19 20 2 0 0 0 0 135 | 21 57 1 0 0 0 0 136 | 21 58 1 0 0 0 0 137 | 21 59 1 0 0 0 0 138 | 22 23 1 0 0 0 0 139 | 22 60 1 0 0 0 0 140 | 22 61 1 0 0 0 0 141 | 23 62 1 0 0 0 0 142 | 23 63 1 0 0 0 0 143 | 23 64 1 0 0 0 0 144 | 24 25 1 0 0 0 0 145 | 25 26 1 0 0 0 0 146 | 25 65 1 0 0 0 0 147 | 25 66 1 0 0 0 0 148 | 26 67 1 0 0 0 0 149 | 26 68 1 0 0 0 0 150 | 26 69 1 0 0 0 0 151 | 27 28 1 0 0 0 0 152 | 27 29 1 0 0 0 0 153 | 27 70 1 0 0 0 0 154 | 28 71 1 0 0 0 0 155 | 29 30 1 0 0 0 0 156 | 29 72 1 0 0 0 0 157 | 29 73 1 0 0 0 0 158 | 30 31 1 0 0 0 0 159 | 30 32 1 0 0 0 0 160 | 30 74 1 0 0 0 0 161 | 31 75 1 0 0 0 0 162 | 31 76 1 0 0 0 0 163 | 31 77 1 0 0 0 0 164 | 32 33 2 0 0 0 0 165 | 32 34 1 0 0 0 0 166 | 34 35 1 0 0 0 0 167 | 34 78 1 0 0 0 0 168 | 35 36 1 0 0 0 0 169 | 35 79 1 0 0 0 0 170 | 35 80 1 0 0 0 0 171 | 36 37 1 0 0 0 0 172 | 36 81 1 0 0 0 0 173 | 36 82 1 0 0 0 0 174 | 37 38 1 0 0 0 0 175 | 37 83 1 0 0 0 0 176 | 37 84 1 0 0 0 0 177 | 38 85 1 0 0 0 0 178 | 38 86 1 0 0 0 0 179 | 38 87 1 0 0 0 0 180 | M END 181 | $$$$ 182 | -------------------------------------------------------------------------------- /Electrostatic_Potential_Globular_Protein/BACE_pocket_EP2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/Electrostatic_Potential_Globular_Protein/BACE_pocket_EP2.png -------------------------------------------------------------------------------- /Electrostatic_Potential_Globular_Protein/README.md: -------------------------------------------------------------------------------- 1 | # CALCULATE AND VISUALIZE THE ELECTROSTATIC POTENTIAL OF A GLOBULAR PROTEIN 2 | 3 | #### NOTE: I strongly recommend installing the latest versions of PDB2PQR and APBS from [https://github.com/Electrostatics/apbs-pdb2pqr.git](https://github.com/Electrostatics/apbs-pdb2pqr.git). 4 | 5 | In this example we will calculate the electrostatic potential (**EP**) of BACE protease (beta-secretase 1), one of the targets of [D3R Grand Challenge 2018](https://drugdesigndata.org/about/grand-challenge-4). 6 | All compounds in the BACE free energy set of D3R Challenge 2018 had a charge of +2 at the assay pH value of 4.5. Therefore we shall calculate the electrostatic potential at this pH. 7 | ``` 8 | pH=4.5 9 | pdbID=3K5C-BACE 10 | pdb2pqr.py --ff=CHARMM --apbs-input --with-ph=$pH --chain ${pdbID}.pdb ${pdbID}.pH${pH}.pqr 11 | ``` 12 | this will create an input file for APBS named '3K5C-BACE.pH4.5.in' 13 | 14 | Launch APBS 15 | ``` 16 | apbs ${pdbID}.pH${pH}.in --output-file=${pdbID}.pH${pH}_APBS.log 17 | ``` 18 | this will create a density file with the electrostatic potential named '3K5C-BACE.pH4.5.pqr.dx' 19 | 20 | 21 | To view the EP on the protein surface load the receptor pqr file `3K5C-BACE.pH4.5.pqr` on [UCSF Chimera](https://www.cgl.ucsf.edu/chimera/) and display the surface by **Actions->Surface->Show**. If that fails try to adjust the vdw raddi by: 22 | ``` 23 | vdwdefine +.05 24 | ``` 25 | or to add hydrogens with the command 26 | ``` 27 | addh 28 | ``` 29 | and then select **Surface/Binding Analysis->Electrostatic Surface Coloring**, and under **potential file** select the .dx file. I will leave the default values, namely the surface will be colored with a color scale from -10 eV to 10 eV, with red representing negative charge, white neutral and blue positive. The charge value shown is that on the solvent 30 | accessible surface of the protein, namely 1.4 Å far from the surface. 31 | 32 | Now we will try to focus on the binding pocket. Load the ligand file `3K5C-BACE_1.mol` and use the **Tools->Depiction->Per-Model Clipping** utility to make tomographies of the EP map while mentaining the cartoon representation of the protein. Select as **model** the `MSMS main surface...` and experiment with **Enable clipping** and **Adjust clipping with mouse as below** options both activated. Use the middle and right mouse buttons to rotate and shift the clipping place, respectively. 33 | Optionally, you can create a nice cartoon representation of the protein and ligand: 34 | ``` 35 | ribspline cardinal smoothing both stiffness 0.8 36 | ribrepr edged 37 | set silhouette 38 | set silhouette_width 2 39 | set dcstart 0.3 40 | background solid white 41 | ``` 42 | In the case of BACE, the binding pocket is covered by a a loop ("beta turn") therefore it is better to hide the ribbons for clarity. 43 | ``` 44 | ~ribbon 45 | ``` 46 | Now we are ready to write an image file. Go to **File->Image**, select the image name and type and save it! 47 | 48 | ![EP at the binding pocket of BACE.](BACE_pocket_EP2.png) 49 | 50 | 51 | Once your're done, you can save your session at **File->Save Session as**. When you reopen it with Chimera you might get an error saying: 52 | ``` 53 | "Could not restore surface color on surface model with id 0.1 because volume used in coloring was not restored." 54 | ``` 55 | To fix it simply go to **Surface/Binding Analysis->Electrostatic Surface Coloring** and click on **color** button. 56 | 57 | 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Thomas Evangelidis 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Morphing_Conformations/README.md: -------------------------------------------------------------------------------- 1 | Morphing protein-ligand conformations with UCSF Chimera 2 | ========================== 3 | 4 | In this tutorial we will do simple morphing between 2 protein-ligand complex conformations, but using the same steps 5 | we can use even more structures. 6 | 7 | ```python 8 | open data/state1.pdb 9 | open data/state2.pdb 10 | del :Na+ 11 | del :WAT 12 | ribspline cardinal smoothing both stiffness 0.8 13 | ribrepr edged 14 | set silhouette 15 | set silhouette_width 2 16 | set dcstart 0.3 17 | # under Tools->Depiction->Ribbon Style Editor, set Coil width and height to 0.15. The rest look fine. 18 | sel ~:LIG 19 | namesel rec 20 | sel :LIG 21 | namesel lig 22 | color pink rec 23 | color green lig 24 | zonesel lig 3.5 rec 25 | namesel pocket 26 | ~display rec 27 | display pocket 28 | background solid white 29 | 30 | # then Tools->Structure Comparison->Morph Conformations, add the two models, increase the number of intermediate conformations (e.g. 60), 31 | # do NOT select minimize, click create. Alternatively use the following command lines: 32 | morph start #0 name holo2apo frames 60 33 | morph interpolate #1 name holo2apo 34 | morph movie name holo2apo nogui true minimize false 35 | 36 | ``` 37 | 38 | -------------------------------------------------------------------------------- /Multilayer_Perceptron_Keras.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/Multilayer_Perceptron_Keras.tar.gz -------------------------------------------------------------------------------- /Multilayer_Perceptron_Keras/.ipynb_checkpoints/MLP_Keras-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#Tutorial 1: Multi-Layer Perceptron with Keras" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "##Objectives:" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "collapsed": true 21 | }, 22 | "source": [ 23 | "In this tutorial you will learn how to construct a simple Multi-Layer Perceptron model with Keras. Specifically you will learn to:\n", 24 | "* Create and add layers including weight initialization and activation.\n", 25 | "* Compile models including optimization method, loss function and metrics.\n", 26 | "* Fit models include epochs and batch size\n", 27 | "* Model predictions.\n", 28 | "* Summarize the model." 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 1, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "name": "stderr", 38 | "output_type": "stream", 39 | "text": [ 40 | "/home/thomas/Programs/Anaconda2/lib/python2.7/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", 41 | " from ._conv import register_converters as _register_converters\n", 42 | "Using TensorFlow backend.\n" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "import numpy as np\n", 48 | "from keras.models import Sequential\n", 49 | "from keras.layers import Dense\n", 50 | "from keras.wrappers.scikit_learn import KerasRegressor\n", 51 | "from sklearn.model_selection import cross_val_score, KFold, train_test_split\n", 52 | "from sklearn.preprocessing import StandardScaler\n", 53 | "from sklearn.pipeline import Pipeline\n", 54 | "from rdkit import Chem, DataStructs\n", 55 | "from rdkit.Chem import AllChem, Descriptors" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "#### Reading molecules and activity from SDF" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 2, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "fname = \"data/cdk2.sdf\"\n", 72 | "\n", 73 | "mols = []\n", 74 | "y = []\n", 75 | "for mol in Chem.SDMolSupplier(fname):\n", 76 | " if mol is not None:\n", 77 | " mols.append(mol)\n", 78 | " y.append(float(mol.GetProp(\"pIC50\")))" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "#### Calculate descriptors (fingerprints) and convert them into numpy array" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 3, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "# generate binary Morgan fingerprint with radius 2\n", 95 | "fp = [AllChem.GetMorganFingerprintAsBitVect(m, 2) for m in mols]" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 4, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "def rdkit_numpy_convert(fp):\n", 105 | " output = []\n", 106 | " for f in fp:\n", 107 | " arr = np.zeros((1,))\n", 108 | " DataStructs.ConvertToNumpyArray(f, arr)\n", 109 | " output.append(arr)\n", 110 | " return np.asarray(output)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 5, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "x = rdkit_numpy_convert(fp)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 6, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "# fix random seed for reproducibility\n", 129 | "seed = 2019\n", 130 | "np.random.seed(seed)\n", 131 | "\n", 132 | "# randomly select 20% of compounds as test set\n", 133 | "x_tr, x_ts, y_tr, y_ts = train_test_split(x, y, test_size=0.20, random_state=seed)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 7, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "name": "stdout", 143 | "output_type": "stream", 144 | "text": [ 145 | "# molecules for training = 348, # of features = 2048\n", 146 | "\n" 147 | ] 148 | } 149 | ], 150 | "source": [ 151 | "mol_num, feat_num = x_tr.shape\n", 152 | "print(\"# molecules for training = %i, # of features = %i\\n\" % (mol_num, feat_num))" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "We can create Keras models and evaluate them with scikit-learn by using handy wrapper objects provided by the Keras library. This is desirable, because scikit-learn excels at evaluating models and will allow us to use powerful data preparation and model evaluation schemes with very few lines of code.\n", 160 | "\n", 161 | "The Keras wrappers require a function as an argument. This function that we must define is responsible for creating the neural network model to be evaluated.\n", 162 | "\n", 163 | "Below we define the function to create a simple MLP regressor that has a single fully connected hidden layer with the same number of neurons as input attributes (13). The network uses the rectifier activation function for the hidden layer. No activation function is used for the output layer because it is a regression problem and we are interested in predicting numerical values directly without transform.\n", 164 | "\n", 165 | "The efficient ADAM optimization algorithm is used and a mean squared error loss function is optimized. This will be the same metric that we will use to evaluate the performance of the model. It is a desirable metric because by taking the square root gives us an error value we can directly understand in the context of the problem (kcal)." 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 8, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "\n", 175 | "# define the first MLP regressor model\n", 176 | "def MLP_model1(sample_num, feat_num):\n", 177 | "\t# create model\n", 178 | "\tmodel = Sequential()\n", 179 | "\tmodel.add(Dense(sample_num, input_dim=feat_num, kernel_initializer='normal', activation='relu'))\n", 180 | "\tmodel.add(Dense(1, kernel_initializer='normal'))\n", 181 | "\t# Compile model\n", 182 | "\tmodel.compile(loss='mean_squared_error', optimizer='adam')\n", 183 | "\treturn model\n" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "The Keras wrapper object for use in scikit-learn as a regression estimator is called KerasRegressor. We create an instance and pass it both the name of the function to create the neural network model as well as some parameters to pass along to the fit() function of the model later, such as the number of epochs and batch size.\n", 191 | "\n", 192 | "We also initialize the random number generator with a constant random seed, a process we will repeat for each model evaluated in this tutorial. This is an attempt to ensure we compare models consistently." 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 9, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "# evaluate model with standardized dataset\n", 202 | "estimator = KerasRegressor(build_fn=MLP_model1, sample_num=mol_num, feat_num=feat_num, epochs=10, batch_size=2, verbose=0)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "The final step is to evaluate this baseline model. We will use 10-fold cross validation to evaluate the model." 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 14, 215 | "metadata": {}, 216 | "outputs": [ 217 | { 218 | "ename": "ValueError", 219 | "evalue": "scoring value should either be a callable, string or None. {'tau': , 'MSE': 'mean_squared_error', 'r2': 'r2'} was passed", 220 | "output_type": "error", 221 | "traceback": [ 222 | "\u001b[0;31m\u001b[0m", 223 | "\u001b[0;31mValueError\u001b[0mTraceback (most recent call last)", 224 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mkfold\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKFold\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_splits\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscoring\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mscorer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkfold\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 12\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;31m#print(\"Results: %.2f (%.2f) MSE\" % (results.mean(), results.std()))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 225 | "\u001b[0;32m/home/thomas/Programs/Anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_validation.pyc\u001b[0m in \u001b[0;36mcross_val_score\u001b[0;34m(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch)\u001b[0m\n\u001b[1;32m 347\u001b[0m \"\"\"\n\u001b[1;32m 348\u001b[0m \u001b[0;31m# To ensure multimetric format is not supported\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 349\u001b[0;31m \u001b[0mscorer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_scoring\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscoring\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mscoring\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 350\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 351\u001b[0m cv_results = cross_validate(estimator=estimator, X=X, y=y, groups=groups,\n", 226 | "\u001b[0;32m/home/thomas/Programs/Anaconda2/lib/python2.7/site-packages/sklearn/metrics/scorer.pyc\u001b[0m in \u001b[0;36mcheck_scoring\u001b[0;34m(estimator, scoring, allow_none)\u001b[0m\n\u001b[1;32m 303\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 304\u001b[0m raise ValueError(\"scoring value should either be a callable, string or\"\n\u001b[0;32m--> 305\u001b[0;31m \" None. %r was passed\" % scoring)\n\u001b[0m\u001b[1;32m 306\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 307\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 227 | "\u001b[0;31mValueError\u001b[0m: scoring value should either be a callable, string or None. {'tau': , 'MSE': 'mean_squared_error', 'r2': 'r2'} was passed" 228 | ] 229 | } 230 | ], 231 | "source": [ 232 | "def kendalls_tau(estimator, X, y):\n", 233 | " from scipy.stats import kendalltau, pearsonr\n", 234 | " preds = estimator.predict(X)\n", 235 | " t = kendalltau(preds, y)[0]\n", 236 | " return t\n", 237 | "\n", 238 | "scorer = {'r2':'r2', 'MSE':'mean_squared_error'}\n", 239 | " \n", 240 | "\n", 241 | "kfold = KFold(n_splits=2, random_state=seed)\n", 242 | "results = cross_val_score(estimator, x, y, scoring=scorer, cv=kfold)\n", 243 | "print results\n", 244 | "#print(\"Results: %.2f (%.2f) MSE\" % (results.mean(), results.std()))" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "Running this code gives us an estimate of the model’s performance on the problem for unseen data. The result reports the mean squared error including the average and standard deviation (average variance) across all 10 folds of the cross validation evaluation.\n", 252 | "\n" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [] 261 | } 262 | ], 263 | "metadata": { 264 | "kernelspec": { 265 | "display_name": "Python [default]", 266 | "language": "python", 267 | "name": "python2" 268 | }, 269 | "language_info": { 270 | "codemirror_mode": { 271 | "name": "ipython", 272 | "version": 2 273 | }, 274 | "file_extension": ".py", 275 | "mimetype": "text/x-python", 276 | "name": "python", 277 | "nbconvert_exporter": "python", 278 | "pygments_lexer": "ipython2", 279 | "version": "2.7.15" 280 | } 281 | }, 282 | "nbformat": 4, 283 | "nbformat_minor": 1 284 | } 285 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | I will gradually open source and publish various scripts and tutorials from my personal archive about MD setup and analysis, protein-ligand docking, machine learning and lots of other interesting things related to Computational Chemistry and Drug Design. 2 | 3 | Follow me on [twitter](https://twitter.com/tevangelidis) or on [LinkedIn](https://www.linkedin.com/in/thomas-evangelidis-495b45125/) to get regular updates. 4 | -------------------------------------------------------------------------------- /ROC_curves/.Rhistory: -------------------------------------------------------------------------------- 1 | "data/SF3.scores", 2 | "data/SF4.scores", 3 | "data/SF5.scores" 4 | ) 5 | x = read.table(RESULTS_FILES[1], header = TRUE) 6 | valid_molnames <- as.vector(x[1]) 7 | for (i in 2:length(RESULTS_FILES)) { 8 | y = read.table(RESULTS_FILES[i], header = TRUE) 9 | print(head(y[1])) 10 | valid_molnames <- intersect(valid_molnames, as.vector(y[1])) 11 | } 12 | valid_molnames 13 | x = read.table(RESULTS_FILES[1], header = TRUE) 14 | valid_molnames <- x[1] 15 | i=2 16 | y = read.table(RESULTS_FILES[i], header = TRUE) 17 | valid_molnames <- intersect(valid_molnames, y[1]) 18 | valid_molnames 19 | x = read.table(RESULTS_FILES[1], header = TRUE) 20 | valid_molnames <- x[1] 21 | #for (i in 2:length(RESULTS_FILES)) { 22 | i=2 23 | y = read.table(RESULTS_FILES[i], header = TRUE) 24 | valid_molnames <- intersect(valid_molnames, y[1]) 25 | #} 26 | i=3 27 | y = read.table(RESULTS_FILES[i], header = TRUE) 28 | valid_molnames <- intersect(valid_molnames, y[1]) 29 | i=4 30 | y = read.table(RESULTS_FILES[i], header = TRUE) 31 | valid_molnames <- intersect(valid_molnames, y[1]) 32 | i=5 33 | y = read.table(RESULTS_FILES[i], header = TRUE) 34 | valid_molnames <- intersect(valid_molnames, y[1]) 35 | valid_molnames 36 | x = read.table(RESULTS_FILES[1], header = TRUE) 37 | valid_molnames <- x[1] 38 | #for (i in 2:length(RESULTS_FILES)) { 39 | i=2 40 | y = read.table(RESULTS_FILES[i], header = TRUE) 41 | valid_molnames <- intersect(valid_molnames, y[1]) 42 | valid_molnames 43 | x = read.table(RESULTS_FILES[1], header = TRUE) 44 | valid_molnames <- x[1] 45 | #for (i in 2:length(RESULTS_FILES)) { 46 | i=2 47 | y = read.table(RESULTS_FILES[i], header = TRUE) 48 | valid_molnames <- intersect(valid_molnames, y[1]) 49 | valid_molnames 50 | y 51 | valid_molnames <- c() 52 | for (i in 1:length(RESULTS_FILES)) { 53 | x = read.table(RESULTS_FILES[i], header = TRUE) 54 | valid_molnames <- c(valid_molnames, x[1]) 55 | } 56 | valid_molnames 57 | length(valid_molnames) 58 | valid_molnames <- c() 59 | for (i in 1:length(RESULTS_FILES)) { 60 | x = read.table(RESULTS_FILES[i], header = TRUE) 61 | valid_molnames <- c(valid_molnames, as.vector(x[1])) 62 | } 63 | length(valid_molnames) 64 | valid_molnames[1] 65 | valid_molnames[2] 66 | c(c(), c(1,2,3)) 67 | v <- c(c(), c(1,2,3)) 68 | v 69 | valid_molnames <- vector() 70 | for (i in 1:length(RESULTS_FILES)) { 71 | x = read.table(RESULTS_FILES[i], header = TRUE) 72 | valid_molnames <- c(valid_molnames, as.vector(x[1])) 73 | } 74 | valid_molnames[2] 75 | valid_molnames <- vector() 76 | for (i in 1:length(RESULTS_FILES)) { 77 | x = read.table(RESULTS_FILES[i], header = TRUE) 78 | valid_molnames <- c(valid_molnames, as.vector(x[1])) 79 | } 80 | valid_molnames 81 | length(valid_molnames) 82 | valid_molnames <- vector() 83 | for (i in 1:length(RESULTS_FILES)) { 84 | x = read.table(RESULTS_FILES[i], header = TRUE) 85 | colnames(x)[1] = "molname" 86 | colnames(x)[2] = "score" 87 | valid_molnames <- c(valid_molnames, as.vector(x$molname)) 88 | } 89 | valid_molnames 90 | x = read.table(RESULTS_FILES[1], header = TRUE) 91 | colnames(x)[1] = "molname" 92 | valid_molnames <- x$molname 93 | for (i in 2:length(RESULTS_FILES)) { 94 | x = read.table(RESULTS_FILES[i], header = TRUE) 95 | colnames(x)[1] = "molname" 96 | valid_molnames <- intersect(valid_molnames, x$molname) 97 | } 98 | valid_molnames 99 | length(valid_molnames) 100 | x = read.table(RESULTS_FILES[1], header = TRUE) 101 | colnames(x)[1] = "molname" 102 | valid_molnames <- x$molname 103 | vvalid_molnames 104 | length(valid_molnames) 105 | x = read.table(RESULTS_FILES[1], header = TRUE) 106 | colnames(x)[1] = "molname" 107 | valid_molnames <- x$molname 108 | length(valid_molnames) 109 | x = read.table(RESULTS_FILES[1], header = TRUE) 110 | RESULTS_FILES = c("data/SF1.scores", 111 | "data/SF2.scores", 112 | "data/SF3.scores", 113 | "data/SF4.scores", 114 | "data/SF5.scores" 115 | ) 116 | x = read.table(RESULTS_FILES[1], header = TRUE) 117 | colnames(x)[1] = "molname" 118 | valid_molnames <- x$molname 119 | length(valid_molnames) 120 | x = read.table(RESULTS_FILES[1], header = TRUE) 121 | colnames(x)[1] = "molname" 122 | valid_molnames <- unique(sort(x$molname)) 123 | length(valid_molnames) 124 | x = read.table(RESULTS_FILES[1], header = TRUE) 125 | colnames(x)[1] = "molname" 126 | valid_molnames <- unique(sort(x$molname)) ; # unique molnames 127 | for (i in 2:length(RESULTS_FILES)) { 128 | x = read.table(RESULTS_FILES[i], header = TRUE) 129 | colnames(x)[1] = "molname" 130 | valid_molnames <- intersect(valid_molnames, x$molname) 131 | } 132 | length(valid_molnames) 133 | RESULTS_FILES = c("data/SF1.scores", 134 | "data/SF2.scores", 135 | "data/SF3.scores", 136 | "data/SF4.scores", 137 | "data/SF5.scores" 138 | ) 139 | valid_molnames <- common_molnames(RESULTS_FILES) ; # molnames with scores by all scoring functions 140 | # You could also have several functions in a single R file and still document them separately. Simply put an identifier 141 | # starting with ## ---- before each function definition and then create empty chunks referring to each one of the identifiers. 142 | library("ROCR") 143 | library("hash") 144 | ## ---- find the molnames that are common in all score files in order to compare the scoring functions correctly 145 | common_molnames <- function(RESULTS_FILES) { 146 | x = read.table(RESULTS_FILES[1], header = TRUE) 147 | colnames(x)[1] = "molname" 148 | valid_molnames <- unique(sort(x$molname)) ; # unique molnames 149 | for (i in 2:length(RESULTS_FILES)) { 150 | x = read.table(RESULTS_FILES[i], header = TRUE) 151 | colnames(x)[1] = "molname" 152 | valid_molnames <- intersect(valid_molnames, x$molname) 153 | } 154 | } 155 | ## ---- read_scores 156 | read_scores <- function(RESULTS_FILE, ACTIVITIE_FILE, valid_molnames) { 157 | " 158 | The valid_molnames list ensures that only the molnames that were scored by all scoring functions will 159 | be considered. 160 | " 161 | x = read.table(RESULTS_FILE, header = TRUE) 162 | colnames(x)[2] = "score" 163 | # ignore the other columns 164 | score_dict = hash() 165 | for (i in seq(1, nrow(x))) { score_dict[x[i,1]] <- x[i,2] } 166 | a = read.table(ACTIVITIES_FILE) 167 | colnames(a)[1] = "molname" 168 | colnames(a)[2] = "label" 169 | label_dict = hash() 170 | for (i in seq(1, nrow(a))) { label_dict[a[i,1]] <- a[i,2] } 171 | scores <- rep(0, length(valid_molnames)) 172 | labels <- rep("", length(valid_molnames)) ; # initialize labels to a list of size(scores) but without contents 173 | i = 1 174 | for (molname in valid_molnames) { 175 | scores[i] <- score_dict[[molname]] 176 | labels[i] <- label_dict[[molname]] 177 | i <- i+1 178 | } 179 | pred = prediction(-1*scores, labels) ;# IMPORTANT: -1* because the function prediction() assumes that the highest the score the better 180 | return(pred) 181 | } 182 | RESULTS_FILES = c("data/SF1.scores", 183 | "data/SF2.scores", 184 | "data/SF3.scores", 185 | "data/SF4.scores", 186 | "data/SF5.scores" 187 | ) 188 | valid_molnames <- common_molnames(RESULTS_FILES) ; # molnames with scores by all scoring functions 189 | ACTIVITIES_FILE = "data/activities" 190 | NAMES <- c("Scoring Function 1", "Sscoring Function 2", "Scoring Function 3", "Scoring Function 4", 191 | "Scoring Function 5") 192 | par(cex.main=2.0, cex.lab=1.5) ; # <== CHANGE ME 193 | library("ROCR") 194 | library("hash") 195 | COLORS <- rainbow(3*length(RESULTS_FILES)) 196 | COLORS <- COLORS[seq(3, length(COLORS), 3)] 197 | for (i in 1:length(RESULTS_FILES)) { 198 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames) 199 | perf = performance(pred, 'tpr', 'fpr') 200 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") } 201 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) } 202 | # plot(perf2, add = TRUE, colorize = TRUE, lwd=3) 203 | # plot(perf3, add = TRUE, colorize = TRUE, lwd=3) 204 | } 205 | # You could also have several functions in a single R file and still document them separately. Simply put an identifier 206 | # starting with ## ---- before each function definition and then create empty chunks referring to each one of the identifiers. 207 | library("ROCR") 208 | library("hash") 209 | ## ---- find the molnames that are common in all score files in order to compare the scoring functions correctly 210 | common_molnames <- function(RESULTS_FILES) { 211 | x = read.table(RESULTS_FILES[1], header = TRUE) 212 | colnames(x)[1] = "molname" 213 | valid_molnames <- unique(sort(x$molname)) ; # unique molnames 214 | for (i in 2:length(RESULTS_FILES)) { 215 | x = read.table(RESULTS_FILES[i], header = TRUE) 216 | colnames(x)[1] = "molname" 217 | valid_molnames <- intersect(valid_molnames, x$molname) 218 | } 219 | } 220 | ## ---- read_scores 221 | read_scores <- function(RESULTS_FILE, ACTIVITIES_FILE, valid_molnames) { 222 | " 223 | The valid_molnames list ensures that only the molnames that were scored by all scoring functions will 224 | be considered. 225 | " 226 | x = read.table(RESULTS_FILE, header = TRUE) 227 | colnames(x)[2] = "score" 228 | # ignore the other columns 229 | score_dict = hash() 230 | for (i in seq(1, nrow(x))) { score_dict[x[i,1]] <- x[i,2] } 231 | a = read.table(ACTIVITIES_FILE) 232 | colnames(a)[1] = "molname" 233 | colnames(a)[2] = "label" 234 | label_dict = hash() 235 | for (i in seq(1, nrow(a))) { label_dict[a[i,1]] <- a[i,2] } 236 | scores <- rep(0, length(valid_molnames)) 237 | labels <- rep("", length(valid_molnames)) ; # initialize labels to a list of size(scores) but without contents 238 | i = 1 239 | for (molname in valid_molnames) { 240 | scores[i] <- score_dict[[molname]] 241 | labels[i] <- label_dict[[molname]] 242 | i <- i+1 243 | } 244 | pred = prediction(-1*scores, labels) ;# IMPORTANT: -1* because the function prediction() assumes that the highest the score the better 245 | return(pred) 246 | } 247 | for (i in 1:length(RESULTS_FILES)) { 248 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames) 249 | perf = performance(pred, 'tpr', 'fpr') 250 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") } 251 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) } 252 | # plot(perf2, add = TRUE, colorize = TRUE, lwd=3) 253 | # plot(perf3, add = TRUE, colorize = TRUE, lwd=3) 254 | } 255 | i 256 | RESULTS_FILES[i] 257 | ACTIVITIES_FILE 258 | getwd(\) 259 | getwd() 260 | valid_molnames 261 | # You could also have several functions in a single R file and still document them separately. Simply put an identifier 262 | # starting with ## ---- before each function definition and then create empty chunks referring to each one of the identifiers. 263 | library("ROCR") 264 | library("hash") 265 | ## ---- find the molnames that are common in all score files in order to compare the scoring functions correctly 266 | common_molnames <- function(RESULTS_FILES) { 267 | x = read.table(RESULTS_FILES[1], header = TRUE) 268 | colnames(x)[1] = "molname" 269 | valid_molnames <- unique(sort(x$molname)) ; # unique molnames 270 | for (i in 2:length(RESULTS_FILES)) { 271 | x = read.table(RESULTS_FILES[i], header = TRUE) 272 | colnames(x)[1] = "molname" 273 | valid_molnames <- intersect(valid_molnames, x$molname) 274 | } 275 | return(valid_molnames) 276 | } 277 | ## ---- read_scores 278 | read_scores <- function(RESULTS_FILE, ACTIVITIES_FILE, valid_molnames) { 279 | " 280 | The valid_molnames list ensures that only the molnames that were scored by all scoring functions will 281 | be considered. 282 | " 283 | x = read.table(RESULTS_FILE, header = TRUE) 284 | colnames(x)[2] = "score" 285 | # ignore the other columns 286 | score_dict = hash() 287 | for (i in seq(1, nrow(x))) { score_dict[x[i,1]] <- x[i,2] } 288 | a = read.table(ACTIVITIES_FILE) 289 | colnames(a)[1] = "molname" 290 | colnames(a)[2] = "label" 291 | label_dict = hash() 292 | for (i in seq(1, nrow(a))) { label_dict[a[i,1]] <- a[i,2] } 293 | scores <- rep(0, length(valid_molnames)) 294 | labels <- rep("", length(valid_molnames)) ; # initialize labels to a list of size(scores) but without contents 295 | i = 1 296 | for (molname in valid_molnames) { 297 | scores[i] <- score_dict[[molname]] 298 | labels[i] <- label_dict[[molname]] 299 | i <- i+1 300 | } 301 | pred = prediction(-1*scores, labels) ;# IMPORTANT: -1* because the function prediction() assumes that the highest the score the better 302 | return(pred) 303 | } 304 | valid_molnames <- common_molnames(RESULTS_FILES) ; # molnames with scores by all scoring functions 305 | valid_molnames 306 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames) 307 | valid_molnames 308 | ACTIVITIES_FILE 309 | RESULTS_FILE= RESULTS_FILES[i] 310 | ACTIVITIES_FILE 311 | valid_molnames 312 | x = read.table(RESULTS_FILE, header = TRUE) 313 | colnames(x)[2] = "score" 314 | # ignore the other columns 315 | score_dict = hash() 316 | for (i in seq(1, nrow(x))) { score_dict[x[i,1]] <- x[i,2] } 317 | score_dict 318 | a = read.table(ACTIVITIES_FILE) 319 | colnames(a)[1] = "molname" 320 | colnames(a)[2] = "label" 321 | label_dict = hash() 322 | for (i in seq(1, nrow(a))) { label_dict[a[i,1]] <- a[i,2] } 323 | scores <- rep(0, length(valid_molnames)) 324 | labels <- rep("", length(valid_molnames)) ; # initialize labels to a list of size(scores) but without contents 325 | i = 1 326 | scores 327 | labels 328 | for (molname in valid_molnames) { 329 | scores[i] <- score_dict[[molname]] 330 | labels[i] <- label_dict[[molname]] 331 | i <- i+1 332 | } 333 | scores 334 | labels 335 | # You could also have several functions in a single R file and still document them separately. Simply put an identifier 336 | # starting with ## ---- before each function definition and then create empty chunks referring to each one of the identifiers. 337 | library("ROCR") 338 | library("hash") 339 | ## ---- find the molnames that are common in all score files in order to compare the scoring functions correctly 340 | common_molnames <- function(RESULTS_FILES) { 341 | x = read.table(RESULTS_FILES[1], header = TRUE) 342 | colnames(x)[1] = "molname" 343 | valid_molnames <- unique(sort(x$molname)) ; # unique molnames 344 | for (i in 2:length(RESULTS_FILES)) { 345 | x = read.table(RESULTS_FILES[i], header = TRUE) 346 | colnames(x)[1] = "molname" 347 | valid_molnames <- intersect(valid_molnames, x$molname) 348 | } 349 | return(valid_molnames) 350 | } 351 | ## ---- read_scores 352 | read_scores <- function(RESULTS_FILE, ACTIVITIES_FILE, valid_molnames) { 353 | " 354 | The valid_molnames list ensures that only the molnames that were scored by all scoring functions will 355 | be considered. 356 | " 357 | x = read.table(RESULTS_FILE, header = TRUE) 358 | colnames(x)[2] = "score" 359 | # ignore the other columns 360 | score_dict = hash() 361 | for (i in seq(1, nrow(x))) { score_dict[x[i,1]] <- x[i,2] } 362 | a = read.table(ACTIVITIES_FILE) 363 | colnames(a)[1] = "molname" 364 | colnames(a)[2] = "label" 365 | label_dict = hash() 366 | for (i in seq(1, nrow(a))) { label_dict[a[i,1]] <- a[i,2] } 367 | scores <- rep(0, length(valid_molnames)) 368 | labels <- rep("", length(valid_molnames)) ; # initialize labels to a list of size(scores) but without contents 369 | i = 1 370 | for (molname in valid_molnames) { 371 | scores[i] <- score_dict[[molname]] 372 | labels[i] <- label_dict[[molname]] 373 | i <- i+1 374 | } 375 | pred = prediction(-1*scores, labels) ;# IMPORTANT: -1* because the function prediction() assumes that the highest the score the better 376 | return(pred) 377 | } 378 | RESULTS_FILES = c("data/SF1.scores", 379 | "data/SF2.scores", 380 | "data/SF3.scores", 381 | "data/SF4.scores", 382 | "data/SF5.scores" 383 | ) 384 | valid_molnames <- common_molnames(RESULTS_FILES) ; # molnames with scores by all scoring functions 385 | length(valid_molnames) 386 | ACTIVITIES_FILE = "data/activities" 387 | NAMES <- c("Scoring Function 1", "Sscoring Function 2", "Scoring Function 3", "Scoring Function 4", 388 | "Scoring Function 5") 389 | par(cex.main=2.0, cex.lab=1.5) ; # <== CHANGE ME 390 | library("ROCR") 391 | library("hash") 392 | COLORS <- rainbow(3*length(RESULTS_FILES)) 393 | COLORS <- COLORS[seq(3, length(COLORS), 3)] 394 | for (i in 1:length(RESULTS_FILES)) { 395 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames) 396 | perf = performance(pred, 'tpr', 'fpr') 397 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") } 398 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) } 399 | # plot(perf2, add = TRUE, colorize = TRUE, lwd=3) 400 | # plot(perf3, add = TRUE, colorize = TRUE, lwd=3) 401 | } 402 | AUCs = rep(0, length(RESULTS_FILES)) 403 | LNAMES = rep(0, length(RESULTS_FILES)) 404 | for (i in 1:length(RESULTS_FILES)) { 405 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILES[i]) 406 | auc = performance(pred, measure='auc') 407 | AUCs[i] = sprintf("%.3f", auc@y.values) 408 | LNAMES[i] = paste(NAMES[i], "(", AUCs[i],")") 409 | # print(paste(NAMES[i], "AUC-ROC=", auc@y.values)) 410 | } 411 | AUCs = rep(0, length(RESULTS_FILES)) 412 | LNAMES = rep(0, length(RESULTS_FILES)) 413 | for (i in 1:length(RESULTS_FILES)) { 414 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE) 415 | auc = performance(pred, measure='auc') 416 | AUCs[i] = sprintf("%.3f", auc@y.values) 417 | LNAMES[i] = paste(NAMES[i], "(", AUCs[i],")") 418 | # print(paste(NAMES[i], "AUC-ROC=", auc@y.values)) 419 | } 420 | AUCs = rep(0, length(RESULTS_FILES)) 421 | LNAMES = rep(0, length(RESULTS_FILES)) 422 | for (i in 1:length(RESULTS_FILES)) { 423 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames) 424 | auc = performance(pred, measure='auc') 425 | AUCs[i] = sprintf("%.3f", auc@y.values) 426 | LNAMES[i] = paste(NAMES[i], "(", AUCs[i],")") 427 | # print(paste(NAMES[i], "AUC-ROC=", auc@y.values)) 428 | } 429 | abline(a=0, b=1, lty=2, lwd=3, col="black") 430 | for (i in 1:length(RESULTS_FILES)) { 431 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames) 432 | perf = performance(pred, 'tpr', 'fpr') 433 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") } 434 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) } 435 | # plot(perf2, add = TRUE, colorize = TRUE, lwd=3) 436 | # plot(perf3, add = TRUE, colorize = TRUE, lwd=3) 437 | } 438 | AUCs = rep(0, length(RESULTS_FILES)) 439 | LNAMES = rep(0, length(RESULTS_FILES)) 440 | for (i in 1:length(RESULTS_FILES)) { 441 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames) 442 | auc = performance(pred, measure='auc') 443 | AUCs[i] = sprintf("%.3f", auc@y.values) 444 | LNAMES[i] = paste(NAMES[i], "(", AUCs[i],")") 445 | # print(paste(NAMES[i], "AUC-ROC=", auc@y.values)) 446 | } 447 | abline(a=0, b=1, lty=2, lwd=3, col="black") 448 | pred 449 | for (i in 1:length(RESULTS_FILES)) { 450 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames) 451 | perf = performance(pred, 'tpr', 'fpr') 452 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") } 453 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) } 454 | } 455 | plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") 456 | library("ROCR") 457 | library("hash") 458 | for (i in 1:length(RESULTS_FILES)) { 459 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames) 460 | perf = performance(pred, 'tpr', 'fpr') 461 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") } 462 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) } 463 | } 464 | AUCs = rep(0, length(RESULTS_FILES)) 465 | LNAMES = rep(0, length(RESULTS_FILES)) 466 | for (i in 1:length(RESULTS_FILES)) { 467 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames) 468 | auc = performance(pred, measure='auc') 469 | AUCs[i] = sprintf("%.3f", auc@y.values) 470 | LNAMES[i] = paste(NAMES[i], "(", AUCs[i],")") 471 | # print(paste(NAMES[i], "AUC-ROC=", auc@y.values)) 472 | } 473 | abline(a=0, b=1, lty=2, lwd=3, col="black") 474 | for (i in 1:length(RESULTS_FILES)) { 475 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames) 476 | perf = performance(pred, 'tpr', 'fpr') 477 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") } 478 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) } 479 | } 480 | AUCs = rep(0, length(RESULTS_FILES)) 481 | LNAMES = rep(0, length(RESULTS_FILES)) 482 | for (i in 1:length(RESULTS_FILES)) { 483 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames) 484 | auc = performance(pred, measure='auc') 485 | AUCs[i] = sprintf("%.3f", auc@y.values) 486 | LNAMES[i] = paste(NAMES[i], "(", AUCs[i],")") 487 | # print(paste(NAMES[i], "AUC-ROC=", auc@y.values)) 488 | } 489 | legend(0.62, 0.28, legend=LNAMES, 490 | col=COLORS, lty=1, cex=0.8) 491 | COLORS <- rainbow(3*length(RESULTS_FILES)) 492 | COLORS <- COLORS[seq(3, length(COLORS), 3)] 493 | for (i in 1:length(RESULTS_FILES)) { 494 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames) 495 | perf = performance(pred, 'tpr', 'fpr') 496 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") } 497 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) } 498 | } 499 | AUCs = rep(0, length(RESULTS_FILES)) 500 | LNAMES = rep(0, length(RESULTS_FILES)) 501 | for (i in 1:length(RESULTS_FILES)) { 502 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames) 503 | auc = performance(pred, measure='auc') 504 | AUCs[i] = sprintf("%.3f", auc@y.values) 505 | LNAMES[i] = paste(NAMES[i], "(", AUCs[i],")") 506 | # print(paste(NAMES[i], "AUC-ROC=", auc@y.values)) 507 | } 508 | abline(a=0, b=1, lty=2, lwd=3, col="black") 509 | # Add legend 510 | legend(0.62, 0.28, legend=LNAMES, 511 | col=COLORS, lty=1, cex=0.8) 512 | knitr::read_chunk("function_definitions.r") 513 | -------------------------------------------------------------------------------- /ROC_curves/README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Create ROC curves in R" 3 | author: "Thomas Evangelidis" 4 | date: "20/10/2019" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE) 10 | ``` 11 | 12 | Paste below all your score files. The first two columns must be the molname and the score (**the lowest score the better, not the opposite!**). The code will ignore the rest of the columns, if they exist. 13 | 14 | ```{r scores} 15 | RESULTS_FILES = c("data/SF1.scores", 16 | "data/SF2.scores", 17 | "data/SF3.scores", 18 | "data/SF4.scores", 19 | "data/SF5.scores" 20 | ) 21 | 22 | ``` 23 | 24 | Paste below the file with molecular bioactivities of all molecules. First column must be the molname and the second its bioactivity (1 or 0). 25 | 26 | ```{r activities} 27 | ACTIVITIES_FILE = "data/activities" 28 | ``` 29 | 30 | Write the names that will be shown in the legend. 31 | ```{r names} 32 | NAMES <- c("Scoring Function 1", "Sscoring Function 2", "Scoring Function 3", "Scoring Function 4", 33 | "Scoring Function 5") 34 | ``` 35 | 36 | Define (i) a function to find the molnames that were scored successfully by all given scoring functions, and (ii) a function to read a score file and a file with the bioactivity of each molecule (0 or 1). 37 | 38 | 40 | ```{r,cache = FALSE, echo=FALSE} 41 | knitr::read_chunk("function_definitions.r") 42 | ``` 43 | 44 | 45 | ```{r common_molnames} 46 | ``` 47 | ```{r count_actives_inactives} 48 | ``` 49 | ```{r read_scores} 50 | ``` 51 | 52 | Load the required libraries and create ROC curve plots. Adjust the font sizes (first line below). 53 | 54 | ```{r plot} 55 | par(cex.main=2.0, cex.lab=1.5) ; # <== CHANGE ME 56 | 57 | library("ROCR") 58 | library("hash") 59 | 60 | COLORS <- rainbow(3*length(RESULTS_FILES)) 61 | COLORS <- COLORS[seq(3, length(COLORS), 3)] 62 | 63 | valid_molnames <- common_molnames(RESULTS_FILES) ; # molnames with scores by all scoring functions; only these will be used for plotting 64 | # Count and print the number of actives and inactives that are common in all score files 65 | num <- count_actives_inactives(ACTIVITIES_FILE, valid_molnames) 66 | for (i in 1:length(RESULTS_FILES)) { 67 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames) 68 | perf = performance(pred, 'tpr', 'fpr') 69 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], 70 | main=paste("ROC Curves (", num$actives, "actives,", num$inactives, "inactives)")) } 71 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) } 72 | } 73 | 74 | AUCs = rep(0, length(RESULTS_FILES)) 75 | LNAMES = rep(0, length(RESULTS_FILES)) 76 | for (i in 1:length(RESULTS_FILES)) { 77 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames) 78 | auc = performance(pred, measure='auc') 79 | AUCs[i] = sprintf("%.3f", auc@y.values) 80 | LNAMES[i] = paste(NAMES[i], "(", AUCs[i],")") 81 | # print(paste(NAMES[i], "AUC-ROC=", auc@y.values)) 82 | } 83 | 84 | abline(a=0, b=1, lty=2, lwd=3, col="black") 85 | # Add legend 86 | legend(0.62, 0.28, legend=LNAMES, 87 | col=COLORS, lty=1, cex=0.8) 88 | ``` 89 | 90 | 91 | The number in parenthesis after each name in the legend is the area under the ROC curve. 92 | 93 | **IMPORTANT: You will need to adjust the last line `legend(...)` to have a legend with the right names at the right position.** 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /ROC_curves/README.md: -------------------------------------------------------------------------------- 1 | [Click here for the ROC curves tutorial](http://htmlpreview.github.io/?https://github.com/tevang/tutorials/blob/master/ROC_curves/README.html) 2 | 3 | ![Alt text](data/ROC_curves.png "ROC curves plot") -------------------------------------------------------------------------------- /ROC_curves/data/.Rhistory: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/ROC_curves/data/.Rhistory -------------------------------------------------------------------------------- /ROC_curves/data/ROC_curves.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/ROC_curves/data/ROC_curves.png -------------------------------------------------------------------------------- /ROC_curves/function_definitions.r: -------------------------------------------------------------------------------- 1 | # You could also have several functions in a single R file and still document them separately. Simply put an identifier 2 | # starting with ## ---- before each function definition and then create empty chunks referring to each one of the identifiers. 3 | 4 | library("ROCR") 5 | library("hash") 6 | 7 | 8 | ## ---- common_molnames 9 | common_molnames <- function(RESULTS_FILES) { 10 | " 11 | A function to find the molnames that are common in all score files in order to compare the scoring 12 | functions properly. 13 | " 14 | x = read.table(RESULTS_FILES[1], header = TRUE) 15 | colnames(x)[1] = "molname" ; # add a column header and operate on x$molname, otherwise 'inersect' fails! 16 | x$molname <- tolower(x$molname) 17 | valid_molnames <- unique(sort(x$molname)) ; # unique molnames 18 | for (i in 2:length(RESULTS_FILES)) { 19 | x = read.table(RESULTS_FILES[i], header = TRUE) 20 | colnames(x)[1] = "molname" 21 | x$molname <- tolower(x$molname) 22 | valid_molnames <- intersect(valid_molnames, x$molname) 23 | } 24 | return(valid_molnames) 25 | } 26 | 27 | ## ---- count_actives_inactives 28 | count_actives_inactives <- function(ACTIVITIES_FILE, valid_molnames) { 29 | a = read.table(ACTIVITIES_FILE) 30 | colnames(a)[1] = "molname" 31 | colnames(a)[2] = "label" 32 | a$molname <- tolower(a$molname) 33 | actives <- a$molname[a$label==1] 34 | inactives <- a$molname[a$label==0] 35 | active_num <- length(actives[actives %in% valid_molnames]) 36 | inactive_num <- length(inactives[actives %in% valid_molnames]) 37 | paste("The molecules that have been scored by all scoring functions consist of", active_num, "actives", 38 | " and", inactive_num, "inactives.") 39 | num <- list(actives=active_num, inactives=inactive_num) 40 | return(num) 41 | } 42 | 43 | ## ---- read_scores 44 | read_scores <- function(RESULTS_FILE, ACTIVITIES_FILE, valid_molnames) { 45 | " 46 | The valid_molnames list ensures that only the molnames that were scored by all scoring functions will 47 | be considered. 48 | " 49 | x = read.table(RESULTS_FILE, header = TRUE) 50 | colnames(x)[1] = "molname" 51 | colnames(x)[2] = "score" 52 | x$molname <- tolower(x$molname) 53 | # ignore the other columns 54 | score_dict = hash() 55 | for (i in seq(1, nrow(x))) { score_dict[x[i,1]] <- x[i,2] } 56 | 57 | a = read.table(ACTIVITIES_FILE) 58 | colnames(a)[1] = "molname" 59 | colnames(a)[2] = "label" 60 | a$molname <- tolower(a$molname) 61 | label_dict = hash() 62 | for (i in seq(1, nrow(a))) { label_dict[a[i,1]] <- a[i,2] } 63 | scores <- rep(0, length(valid_molnames)) 64 | labels <- rep("", length(valid_molnames)) ; # initialize labels to a list of size(scores) but without contents 65 | i = 1 66 | for (molname in valid_molnames) { 67 | scores[i] <- score_dict[[molname]] 68 | labels[i] <- label_dict[[molname]] 69 | i <- i+1 70 | } 71 | pred = prediction(-1*scores, labels) ;# IMPORTANT: -1* because the function prediction() assumes that the highest the score the better 72 | return(pred) 73 | } 74 | -------------------------------------------------------------------------------- /compare_atomic_properties/README.md: -------------------------------------------------------------------------------- 1 | # Compare partial charges between multiple ligand conformations. 2 | #### the same code can be adapted to visualize other atomic properties, like lipophilicity, refraction, donor/acceptor of H-bond, electronegativity, van der Waals attraction/repulsion 3 | In this tutorial we will exploit the new RDKit's drawing code to visualize the differences in charge distribution induced by 4 | conformational changes of a molecule. To be fair, the conformations in this example come from docking with Glide (100 docking poses) 5 | and the partial charges were calculated individually for each pose on the PM6 semi-empirical QM level of theory, upon some subtle 6 | geometry optimization. 7 | 8 | 9 | #### 1. Set global variables 10 | ```python 11 | mol2_file = "data/compound_stereo1_ion1_tau1.COSMO_PM6.mol2" # multi-molecule MOL2 file with PM6 partial charges 12 | ``` 13 | #### 2. Define the necessary functions. 14 | 15 | ```python 16 | import os 17 | import io 18 | from PIL import Image 19 | import numpy as np 20 | from lib.modlib.pybel import Outputfile, readfile 21 | from rdkit import Chem 22 | from rdkit.Chem import Draw 23 | from rdkit.Chem.Draw import SimilarityMaps 24 | from rdkit.Chem.rdDepictor import Compute2DCoords 25 | 26 | def show_png(data): 27 | bio = io.BytesIO(data) 28 | img = Image.open(bio) 29 | img = img.resize((2000, 2000), Image.ANTIALIAS) 30 | return img 31 | 32 | def mol2_to_sdf(mol2_file, sdf_file=None): 33 | """ 34 | Method to convert a multi-mol2 file to sdf format with an extra property that stores the partial charges. 35 | It invokes PyBel's mol2 file loader because RDKit's mol2 file loader that not read the partial charges. 36 | """ 37 | 38 | if sdf_file == None: 39 | sdf_file = os.path.splitext(mol2_file)[0] + ".sdf" 40 | largeSDfile = Outputfile("sdf", sdf_file, overwrite=True) 41 | for mymol in readfile("mol2", mol2_file): 42 | 43 | # Add the Molecular (Free) Energy in a new property field in the sdf file 44 | if 'Comment' in mymol.data.keys() and "Energy:" in mymol.data['Comment']: 45 | mymol.data["molecular energy"] = float(mymol.data['Comment'].split()[1]) 46 | del mymol.data['Comment'] # if you keep this the energy will be writen under the molname in the sdf 47 | # Add the Partial Charges of the atoms separated by ',' in a new property field in the sdf file 48 | charges = [str(a.partialcharge) for a in mymol.atoms] 49 | if len(set(charges)) > 1: 50 | mymol.data["partial charge"] = ",".join(charges) 51 | 52 | # Write this molecules with the extra property fields into the sdf file 53 | largeSDfile.write(mymol) 54 | 55 | largeSDfile.close() 56 | 57 | def load_sdf_with_charges(sdf_file): 58 | """ 59 | This method reads in an sdf file with partial charges as an extra property, and returns an RDKit mol object with 60 | an extra atomic property named "partial charge". 61 | """ 62 | # NOTE: for simplicity, because every conformer has different atomic charges and the RDKit MOL object does not 63 | # store varying atomic property values for each conformer, I will save each conformer into a separate 64 | # RDKit MOL object. 65 | mol_list = [] # list of RDKit MOL objects (conformers) of the same molecule but with different atomic charges 66 | suppl = Chem.SDMolSupplier(sdf_file, removeHs=False, sanitize=False) 67 | for mol in suppl: 68 | if 'partial charge' in mol.GetPropNames(): 69 | formal_charge = int(np.sum( [float(c) for c in mol.GetProp('partial charge').split(',')] ).round()) 70 | for atom, charge in zip(mol.GetAtoms(), mol.GetProp('partial charge').split(',')): 71 | # By default the Atom object does not have a property for its partial charge, therefore add one 72 | atom.SetDoubleProp('partial charge', float(charge)) # property value must be string 73 | atom.SetFormalCharge(formal_charge) 74 | mol_list.append(mol) 75 | return mol_list 76 | 77 | sdf_file = os.path.splitext(mol2_file)[0] + ".sdf" 78 | mol2_to_sdf(mol2_file=mol2_file, sdf_file=sdf_file) 79 | # Load the multi-molecule sdf file with partial charges 80 | mol_list = load_sdf_with_charges(sdf_file) 81 | 82 | ``` 83 | 84 | #### 3. First we will compare the partial charges of two distinct conformers. 85 | I will pick up two docking poses with noticeable differences in charge distribution. 86 | 87 | 88 | First conformer: 89 | 90 | ```python 91 | mol1 = mol_list[52] 92 | Compute2DCoords(mol1) # add 2D coordinates for better 2D image depiction 93 | charges1 = [a.GetDoubleProp("partial charge") for a in mol1.GetAtoms()] 94 | d = Chem.Draw.MolDraw2DCairo(400, 400) 95 | Chem.Draw.SimilarityMaps.GetSimilarityMapFromWeights(mol1, charges1, draw2d=d) 96 | d.FinishDrawing() 97 | img = show_png(d.GetDrawingText()) 98 | img.save("pose1.png", quality=95) 99 | ``` 100 | 101 | Second conformer: 102 | 103 | ```python 104 | mol2 = mol_list[61] 105 | Compute2DCoords(mol2) # add 2D coordinates for better 2D image depiction 106 | charges2 = [a.GetDoubleProp("partial charge") for a in mol2.GetAtoms()] 107 | d = Chem.Draw.MolDraw2DCairo(400, 400) 108 | # But draw the charges on the 1st conformer to be able to compare it visually 109 | Chem.Draw.SimilarityMaps.GetSimilarityMapFromWeights(mol1, charges2, draw2d=d) 110 | d.FinishDrawing() 111 | img = show_png(d.GetDrawingText()) 112 | img.save("pose2.png") 113 | ``` 114 | 115 | Conformer 1 | Conformer 2 116 | :-------------------------:|:-------------------------: 117 | ![](pose1.png) | ![](pose2.marked.png) 118 | 119 | The red arrows on conformer 2 show where the differences are locate. Mainly at the aromatic carbons and less at the polarized oxygen of the methyl phenyl ether. 120 | 121 | #### 4. Now lets visualize the average (unweighted) partial charges of each atom. 122 | 123 | ```python 124 | all_pose_charges = [] 125 | for mol in mol_list: 126 | all_pose_charges.append( [a.GetDoubleProp("partial charge") for a in mol.GetAtoms()] ) 127 | all_pose_charges = np.array(all_pose_charges) 128 | mean_pose_charges = all_pose_charges.mean(axis=0) 129 | std_pose_charges = all_pose_charges.std(axis=0) 130 | 131 | d = Draw.MolDraw2DCairo(400, 400) 132 | SimilarityMaps.GetSimilarityMapFromWeights(mol1,list(mean_pose_charges),draw2d=d) 133 | d.FinishDrawing() 134 | img = show_png(d.GetDrawingText()) 135 | img.save("mean_pose.png") 136 | ``` 137 | 138 | ![](mean_pose.png) 139 | 140 | 141 | #### 5. Finally we can see in which atoms are located most of the differences in the partial charge value by plotting the standard deviations. 142 | 143 | ```python 144 | d = Draw.MolDraw2DCairo(400, 400) 145 | SimilarityMaps.GetSimilarityMapFromWeights(mol1,list(std_pose_charges),draw2d=d) 146 | d.FinishDrawing() 147 | img = show_png(d.GetDrawingText()) 148 | img.save("std_pose.png") 149 | ``` 150 | 151 | ![](std_pose.png) 152 | 153 | Notice that in all 100 docking poses, charge differences at the aromatic rings are not noticeable, unlike the two docking pose that we compared before. -------------------------------------------------------------------------------- /compare_atomic_properties/mean_pose.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/compare_atomic_properties/mean_pose.png -------------------------------------------------------------------------------- /compare_atomic_properties/pose1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/compare_atomic_properties/pose1.png -------------------------------------------------------------------------------- /compare_atomic_properties/pose2.marked.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/compare_atomic_properties/pose2.marked.png -------------------------------------------------------------------------------- /compare_atomic_properties/pose2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/compare_atomic_properties/pose2.png -------------------------------------------------------------------------------- /compare_atomic_properties/std_pose.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/compare_atomic_properties/std_pose.png -------------------------------------------------------------------------------- /create_alternative_protonations/README.md: -------------------------------------------------------------------------------- 1 | # CREATE ALTERNATIVE PROTONATION STATES OF A RECEPTOR 2 | 3 | This is a Python script to create all alternative protonation state combinations of a protein given a ligand and a specified radius. It could be useful in case you have a receptor but you are not sure about the protonation states of some residues in the binding site and you want to do docking, MD, or any other structure-based drug design method using all of alternative receptor protonations. 4 | 5 | The script must be executed with [PyChimera](https://pychimera.readthedocs.io/en/latest/), a Python wrappen for [UCSF Chimera](https://www.cgl.ucsf.edu/chimera/), which searches for protonatable standard residues (ASP, GLU, HIS) around the ligand, finds all possible combinations of protonation states, and writes a pdb file for each combination. Since the number of combinations of more than 6 protonatable residues becomes very large, the user can fix some residues to a give protonated/unprotonated state. See the tutorial below. You can also get the example usage by typing `protonate_receptor.py -h`. 6 | 7 | ## INSTALLATION 8 | ``` 9 | conda create -n pychimera -c insilichem pychimera # this will create a virtual environment for PyChimera and modify your ~/.bashrc thus you have to source it again 10 | source ~/.bashrc 11 | conda activate pychimera 12 | pip install pychimera 13 | pip install --upgrade numpy # upgrade numpy to match the version that pychimera needs 14 | ``` 15 | Finally, add the path to `protonate_receptor.py` to your `PATH` variable and create a symbolic link, like `sudo ln -s [full path to protonate_receptor.py] /usr/local/bin/`. 16 | 17 | ## TUTORIAL 18 | 19 | Download the pdb structure of HIV-1 protease complexed with a tripeptide inhibitor from [PDB](https://www.rcsb.org/structure/1A30). Save the receptor coordinates to file "1a30_receptor.pdb" and the ligand coordinates to a file "1a30_ligand.sdf". Then, list all protonatable residues within 4 Angstroms from the ligand. 20 | 21 | ``` 22 | source ~/.bashrc # necessary only if you haven't sourced the lated modified version 23 | conda activate pychimera 24 | pychimera $(which protonate_receptor.py) -rec 1a30_protein.pdb -lig 1a30_ligand.sdf -r 4.0 -list 25 | ``` 26 | 27 | You should get that the protonatable residues are: **ASP_25.B ASP_25.A ASP_29.A ASP_30.A**. We want to keep **ASP_29.A ASP_30.A** fixed to the unprotonated state and create alternative protonations for all the rest (namely the catalytic dyad **ASP_25.B and ASP_25.A**). 28 | 29 | `pychimera $(which protonate_receptor.py) -rec 1a30_protein.pdb -lig 1a30_ligand.sdf -r 4.0 -fix ASP_29.A -fix ASP_30.A` 30 | 31 | The script will generate 4 files in the current directory, from which we are interested only in the following 3: 32 | ``` 33 | 1a30_protein_ASP25A_ASP25B.pdb 34 | 1a30_protein_ASH25A_ASP25B.pdb 35 | 1a30_protein_ASP25A_ASH25B.pdb 36 | ``` 37 | which correspond to the structures shown in the figure below. 38 | ![3 alternative protonation states of the catalytic dyad in the HIV-1 protease.](images/1a30_all_protonations.png) 39 | 40 | The doubly protonated catalytic dyad does not exist. In general, the acid driven catalysis requires that one of the members of the ASP dyad is ionized in order to 41 | activate the water molecule for the nucleophilic attack, while the second member needs to be protonated in order to enhance the electrophilic nature of the substrate 42 | carbonyl group. 43 | 44 | To deactivate the `pychimera` conda virtual environment and switch to your default: 45 | `source deactivate pychimera` 46 | -------------------------------------------------------------------------------- /create_alternative_protonations/images/1a30_all_protonations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/create_alternative_protonations/images/1a30_all_protonations.png -------------------------------------------------------------------------------- /create_alternative_protonations/protonate_receptor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__="Thomas Evangelidis" 4 | __email__="tevang3@gmail.com" 5 | 6 | 7 | 8 | from argparse import ArgumentParser, RawDescriptionHelpFormatter 9 | from itertools import combinations, permutations 10 | import sys, gc, os 11 | from operator import itemgetter 12 | from ete3 import Tree 13 | 14 | 15 | ## Parse command line arguments 16 | def cmdlineparse(): 17 | parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, description=""" 18 | DESCRIPTION: 19 | 20 | This is a Python script to create all alternative protonation state combinations of a protein given a ligand and a specified radius. 21 | It could be useful in case you have a receptor but you are not sure about the protonation states of some residues in the binding site 22 | and you want to do docking, MD, or any other structure-based drug design method using all of alternative receptor protonations. 23 | 24 | The script must be executed with PyChimera, a Python wrappen of UCSF Chimera, which searches for protonatable standard residues (ASP, GLU, HIS) 25 | around the ligand, finds all possible combinations of protonation states, and writes a pdb file for each combination. Since the number 26 | of combinations of more than 6 protonatable residues becomes very large, the user can fix some residues to a give protonated/unprotonated state. 27 | See the examples below. You can also get the same info by typing `protonate_receptor.py -h`. 28 | 29 | TODO: add optional support for LYS and CYS protonated forms. 30 | https://www.cgl.ucsf.edu/chimera/docs/ContributedSoftware/addh/addh.html 31 | 32 | """, 33 | epilog=""" 34 | ### EXAMPLE 1: list all protonatable residues within 4 Angstroms from the ligand. 35 | pychimera $(which protonate_receptor.py) -rec 1a30_protein.pdb -lig 1a30_ligand.sdf -r 4.0 -list 36 | 37 | ### EXAMPLE 2: keep ASP_29.A ASP_30.A fixed to the unprotonated state and create alternative protonations for all the rest. 38 | pychimera $(which protonate_receptor.py) -rec 1a30_protein.pdb -lig 1a30_ligand.sdf -r 4.0 -fix ASP_29.A -fix ASP_30.A 39 | 40 | ### EXAMPLE 3: protonate all protein residues. 41 | pychimera $(which protonate_receptor.py) -rec 1a30_protein.pdb -lig 1a30_ligand.sdf 42 | 43 | """) 44 | parser.add_argument("-list", dest="LIST_PROTONATABLE", required=False, default=False, action='store_true', 45 | help="List the protonatable residues within the binding site and exit.") 46 | parser.add_argument("-rec", dest="RECEPTOR", required=False, default=None, type=str, 47 | help="pdb file with the apo form of the receptor.") 48 | parser.add_argument("-lig", dest="LIGAND", required=False, default=None, type=str, 49 | help="sdf or mol2 file with optimized ligand structure from which to find the binding site residues.") 50 | parser.add_argument("-r", dest="RADIUS", required=False, default=8.0, type=float, 51 | help="The distance around the ligand within which residues will be protonated. Use '-r 0' if you " 52 | "want to protonate the whole protein. Default: %(default)s.") 53 | # parser.add_argument("-p", dest="PYTHONPATH", required=False, default=None, type=str, 54 | # help="the PYTHONPATH environment variable.") 55 | parser.add_argument("-fix", dest="FIXED_STATES", required=False, default=[], type=str, action='append', 56 | help="the residue to fixed to one state. E.g. '-fix ASP_30.A GLH_24.B' will NOT produce any structure with" 57 | "ASH_30.A or GLU_24.B. This is useful when you have >4 protonatable residues within the binding site" 58 | " and you want to reduce the number of combinations.") 59 | parser.add_argument("-flipcooh", dest="FLIP_COOH", required=False, default=False, action='store_true', 60 | help="Flip the carboxylic group of ASH and GLH by 180 degrees. This will produced an order of magnitude " 61 | "more combinations, therefore use with caution. For example, if you do an energy minimization " 62 | "before scoring, it will hopefully suffice (hopefully it will flip the COOH if needed). If not, then " 63 | "use this option.") 64 | parser.add_argument("-dockprep", dest="DOCKPREP", required=False, default=False, action='store_true', 65 | help="Prepare the receptor for docking or MD. Namely:" 66 | "1) delete water molecules, " 67 | "2) repaire truncated sidechains, " 68 | "3) add hydrogens, " 69 | "4) assign partial charges (protein amberSB14, ligand AM1). " 70 | "Also for each protonation state combination write a pdb file that contains the protein+ligand and the first 2 lines" 71 | " will be comments that state the net charge of the receptor and the ligand, respectively.") 72 | 73 | args = parser.parse_args() 74 | return args 75 | 76 | ########################################################## FUNCTION DEFINITIONS #################################################### 77 | 78 | def write_protonated_structure(protonations): 79 | 80 | global residues, args 81 | 82 | id2state = {} 83 | pdb = args.RECEPTOR.replace(".pdb", "") 84 | for rstate in protonations: 85 | state, resid = rstate.split('_') 86 | id2state[resid] = state 87 | if args.FLIP_COOH: 88 | state = state.replace("1", "a").replace("2", "b") 89 | pdb += "_%s%s" % (state , resid.replace(".","")) 90 | pdb += ".pdb" 91 | # Alter the protonation states 92 | ASH_GLH_rstates = [] 93 | for r in residues: 94 | try: 95 | r.type = id2state[str(r.id)][:3] # works for both ASH and ASH1 96 | if id2state[str(r.id)][:3] in ["ASH", "GLH"]: 97 | ASH_GLH_rstates.append((str(r.id), id2state[str(r.id)])) 98 | except KeyError: 99 | continue 100 | 101 | if args.FLIP_COOH: 102 | for resid, state in ASH_GLH_rstates: 103 | if state == 'GLH1': # rename the atoms, because by default proton goes to O[DE]2 but we want it in O[DE]1 104 | rc("setattr a name XX :%s@OE1" % resid) 105 | rc("setattr a name OE1 :%s@OE2" % resid) 106 | rc("setattr a name OE2 :%s@XX" % resid) 107 | if state == 'ASH1': # rename the atoms, because by default proton goes to O[DE]2 but we want it in O[DE]1 108 | rc("setattr a name XX :%s@OD1" % resid) 109 | rc("setattr a name OD1 :%s@OD2" % resid) 110 | rc("setattr a name OD2 :%s@XX" % resid) 111 | 112 | # Write the structure 113 | rc("del H") 114 | rc("addh") 115 | if args.DOCKPREP: # prepend net charges to the pdb file 116 | pdb = pdb.replace(".pdb", "_complex.pdb") 117 | rc("combine #0,1 name complex modelId 2") 118 | rc("write format pdb #2 %s" % pdb) 119 | rc("delete #2") 120 | models = chimera.openModels.list(modelTypes=[chimera.Molecule]) 121 | rc("addcharge std spec #0") # re-add ff14SB charges to the protonated receptor only (the ligand protonation did not change) 122 | rec_charge = estimateFormalCharge(models[0].atoms) 123 | lig_charge = estimateFormalCharge(models[1].atoms) 124 | # Neutralize system 125 | net_charge = rec_charge+lig_charge 126 | if net_charge < 0: 127 | initiateAddions(q, "Na+", "neutralize", chimera.replyobj.status) 128 | elif net_charge > 0: 129 | initiateAddions(models, "Cl-", "neutralize", chimera.replyobj.status) 130 | with open(pdb, "r+") as f: 131 | s = f.read() 132 | f.seek(0) 133 | f.write("# receptor net charge = %i\n# ligand net charge = %i\n" % (-lig_charge, lig_charge)) # after system neutralization 134 | f.write(s) 135 | else: 136 | rc("write format pdb #0 %s" % pdb) 137 | 138 | if args.FLIP_COOH: # restore the original O[DE] names 139 | for resid, state in ASH_GLH_rstates: 140 | if state == 'GLH1': # rename the atoms, because by default proton goes to O[DE]2 but we want it in O[DE]1 141 | rc("setattr a name XX :%s@OE1" % resid) 142 | rc("setattr a name OE1 :%s@OE2" % resid) 143 | rc("setattr a name OE2 :%s@XX" % resid) 144 | if state == 'ASH1': # rename the atoms, because by default proton goes to O[DE]2 but we want it in O[DE]1 145 | rc("setattr a name XX :%s@OD1" % resid) 146 | rc("setattr a name OD1 :%s@OD2" % resid) 147 | rc("setattr a name OD2 :%s@XX" % resid) 148 | 149 | def populate_leaves(Peptide_Tree, resid, residue_states): 150 | """ 151 | FUNCTION that adds new branches to the leaves of the Tree. 152 | ARGUMENTS: 153 | Peptide_Tree: The Tree structure with connectivities 154 | RETURNS: 155 | (Peptide_Tree, BOOLEAN): A tuple with elements the input Tree structure with new branches (if applicable), and a BOOLEAN value which is True if the function added 156 | new leaves to the Tree, or False otherwise 157 | """ 158 | 159 | number_of_new_leaves = 0 160 | leaves = list(Peptide_Tree.iter_leaves()) 161 | for leaf in leaves: 162 | try: 163 | for state in residue_states[resid]: 164 | new_child = leaf.add_child(name=resid) # add a new brach to the current TOCSY add index (leaf) with length the respective probability 165 | new_child.add_features(state=state) 166 | number_of_new_leaves += 1 167 | # print "DEBUG: adding connection: ",name,"-->",NOESYaaindex 168 | except(KeyError, IndexError): 169 | continue 170 | 171 | # print Peptide_Tree.get_ascii(show_internal=True, compact=False, attributes=["name", "dist", "occupancy", "numOfResonances"]) 172 | # print Peptide_Tree.get_ascii(show_internal=True, compact=False) 173 | if number_of_new_leaves > 0: 174 | return (Peptide_Tree, True) 175 | else: 176 | return (Peptide_Tree, False) 177 | 178 | 179 | def build_Protonation_Tree(peptide, residue_states): 180 | 181 | print("Building Protonation Trees from peptide %s" % list(peptide)) 182 | expand_tree = True 183 | Peptide_Tree = Tree() 184 | Root = Peptide_Tree.get_tree_root() 185 | Root.add_feature("name", "root") 186 | Root.add_feature("state", "delete") 187 | level = 0 188 | sys.stdout.write("Expanding tree from level ") 189 | while level < len(peptide): 190 | sys.stdout.write(str(level) + " ") 191 | sys.stdout.flush() 192 | Peptide_Tree, expand_tree = populate_leaves(Peptide_Tree, peptide[level], residue_states) 193 | level += 1 194 | # Print the Tree 195 | # print Peptide_Tree.get_ascii(show_internal=True, compact=False) 196 | # print Peptide_Tree.get_ascii(show_internal=True, compact=False, attributes=["name", "dist", "occupancy", "numOfResonances"]) 197 | 198 | print("\nSaving protonations from Tree...") 199 | 200 | all_protonations_set = set() 201 | for leaf in Peptide_Tree.iter_leaves(): 202 | protonations = [] 203 | resid, chain = leaf.name.split(".") 204 | protonations.append((leaf.state, resid, chain)) 205 | for ancestor in leaf.get_ancestors()[:-1]: # skip the root 206 | resid, chain = ancestor.name.split(".") 207 | protonations.append((ancestor.state, resid, chain)) 208 | protonations.sort(key=itemgetter(2, 1)) # sort by chain and resid to avoid permutations of the same combination 209 | protonations = tuple(["%s_%s.%s" % (t[0], t[1], t[2]) for t in protonations]) 210 | all_protonations_set.add(protonations) 211 | del protonations 212 | del ancestor 213 | del leaf 214 | # Peptide_Tree = None 215 | del Peptide_Tree 216 | gc.collect() 217 | return all_protonations_set 218 | 219 | ###################################################################################################################################### 220 | 221 | 222 | if __name__ == "__main__": 223 | args = cmdlineparse() 224 | 225 | from chimera import runCommand as rc 226 | from chimera.selection import currentResidues 227 | 228 | 229 | resname_states_dict = { 230 | "GLU": ["GLU", "GLH"], 231 | "ASP": ["ASP", "ASH"] 232 | } 233 | 234 | if args.FLIP_COOH: 235 | resname_states_dict["GLU"] = ["GLU", "GLH1", "GLH2"] 236 | resname_states_dict["ASP"] = ["ASP", "ASH1", "ASH2"] 237 | 238 | rc("open %s" % args.RECEPTOR) # load the receptor 239 | rc("open %s" % args.LIGAND) # load the ligand 240 | 241 | if args.DOCKPREP: 242 | import chimera 243 | from Addions import initiateAddions 244 | from DockPrep import prep 245 | from AddCharge import estimateFormalCharge 246 | models = chimera.openModels.list(modelTypes=[chimera.Molecule]) 247 | print("Preparing receptor for docking and calculating ligand AM1 charges (may be slow).") 248 | prep(models, nogui=True, method='am1') 249 | 250 | # Select the residues to be protonated 251 | if args.RADIUS > 0: 252 | rc("sel #1 z<%f & ~ #1" % args.RADIUS) 253 | elif args.RADIUS == 0: 254 | rc("sel #0") 255 | residues = currentResidues() # get the residue of the pocket 256 | residue_states = {} 257 | protonatable_resids = [] 258 | protonatable_resnames = [] 259 | for r in residues: 260 | if r.type in ["GLU", "GLH"]: 261 | states = resname_states_dict["GLU"] 262 | protonatable_resids.append(str(r.id)) 263 | protonatable_resnames.append(r.type) 264 | elif r.type in ["ASP", "ASH"]: 265 | states = resname_states_dict["ASP"] 266 | protonatable_resids.append(str(r.id)) 267 | protonatable_resnames.append(r.type) 268 | elif r.type in ["HIS", "HIE", "HID", "HIP"]: 269 | states = ["HIE", "HID", "HIP"] 270 | protonatable_resids.append(str(r.id)) 271 | protonatable_resnames.append(r.type) 272 | else: 273 | states = [r.type] 274 | residue_states[str(r.id)] = states 275 | 276 | if args.LIST_PROTONATABLE: 277 | protonatable_rstates = ["%s_%s" % (name,id) for name,id in zip(protonatable_resnames, protonatable_resids)] 278 | print("\n~~~ The protonatable residues within %.3f Angstroms from the ligand are: %s\n" % (args.RADIUS, " ".join(protonatable_rstates))) 279 | sys.exit(0) 280 | 281 | 282 | for rstate in args.FIXED_STATES: 283 | state, resid = rstate.split('_') 284 | residue_states[resid] = [state] 285 | try: 286 | protonatable_resids.remove(resid) 287 | print("Fixed resid %s to %s state." % (resid, state)) 288 | except ValueError: 289 | print("Warning: residue %s is not within the specified distance from the ligand or is not a valid residue, " \ 290 | "therefore it will be ignored." % rstate) 291 | 292 | all_protonations = set() 293 | for peptide in permutations(protonatable_resids, len(protonatable_resids)): 294 | all_protonations = all_protonations.union(build_Protonation_Tree(peptide, residue_states)) 295 | 296 | # Finally create and write the protonated structures 297 | all_protonations = list(all_protonations) 298 | all_protonations.sort(key=lambda x: x.count) 299 | for protonations in all_protonations: 300 | print("Writing structure with the following protonation states: ", protonations) 301 | write_protonated_structure(protonations) 302 | -------------------------------------------------------------------------------- /dockprep/README.md: -------------------------------------------------------------------------------- 1 | This is a Python script to prepare a receptor-ligand complex for scoring (e.g. for PM6/COSMO scoring) using [UCSF Chimera](https://www.cgl.ucsf.edu/chimera/). It takes as input either (i) the protein-ligand complex in one pdb file, or (ii) the receptor in a pdb file and the ligand in a separate file of any format (pdb, mol, mol2, sdf). The output is always a pdb file of the protein-ligand complex in which the residue name of the ligand is "LIG". 2 | 3 | If you encounter problems with the input pdb file then try correcting it using one of the following programs: 4 | 1) pdb4amber from AmberTools (https://github.com/Amber-MD/pdb4amber) 5 | 2) pdbfixer (https://github.com/pandegroup/pdbfixer) 6 | Sometimes you may need to rename atoms manually in Chimera (e.g. the N- and C-terminal caps), but this is out of the scope of this script. 7 | 8 | 9 | For a full list of options run `dockprep.py -h`. 10 | 11 | 12 | You can launch the script either using [PyChimera](https://pychimera.readthedocs.io/en/latest/) (to install it read this [tutorial](https://github.com/tevang/tutorials/tree/master/create_alternative_protonations)) 13 | ```bash 14 | pychimera $(which dockprep.py) -rec example_files/3K5C-BACE.pdb -lig $mol -cmethod gas -neut 15 | ``` 16 | or the vanilla UCSF Chimera executable. 17 | ```bash 18 | chimera --nogui --nostatus --script "$(which dockprep.py) -rec example_files/3K5C-BACE.pdb -lig example_files/3K5C-BACE_1.mol -cmethod gas -neut" 19 | ``` 20 | To prepare multiple protein-ligand complexes with dockprep.py **in parallel** on the Unix Shel, first create a file with all the individual commands like this: 21 | ```bash 22 | for mol in $(ls example_files/*mol) 23 | do 24 | echo "chimera --nogui --nostatus --script \"$(which dockprep.py) -rec example_files/3K5C-BACE.pdb -lig $mol -cmethod gas -neut\"" 25 | done > commands.txt 26 | ``` 27 | 28 | Then launch it using [GNU parallel script](https://www.gnu.org/software/parallel/) 29 | ``` bash 30 | parallel -j3 < commands.txt 31 | ``` 32 | 33 | Likewise, if you have the protein-ligand complex already saved in one file, you can you do all the above like this: 34 | ```bash 35 | pychimera $(which dockprep.py) -complex complex.pdb -cmethod gas -neut 36 | ``` -------------------------------------------------------------------------------- /dockprep/dockprep.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__="Thomas Evangelidis" 4 | __email__="tevang3@gmail.com" 5 | 6 | 7 | 8 | import os, sys, traceback 9 | import random 10 | from argparse import ArgumentParser, RawDescriptionHelpFormatter 11 | from collections import defaultdict 12 | from operator import itemgetter 13 | 14 | 15 | def cmdlineparse(): 16 | parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, description=""" 17 | DESCRIPTION: 18 | 19 | This is a Python script to prepare the receptor-ligand complex for scoring. If you encounter problems with the input pdb file 20 | then try correcting it using: 21 | 1) pdb4amber from AmberTools (https://github.com/Amber-MD/pdb4amber) 22 | 2) pdbfixer (https://github.com/pandegroup/pdbfixer) 23 | 24 | TODO: add optional support for LYS and CYS protonated forms. 25 | https://www.cgl.ucsf.edu/chimera/docs/ContributedSoftware/addh/addh.html 26 | 27 | """, 28 | epilog=""" 29 | ### EXAMPLE 1: 30 | pychimera $(which dockprep.py) -complex 3K5C-BACE_150_complex.pdb -cmethod gas -neut -lignetcharge -2 31 | 32 | 33 | """) 34 | parser.add_argument("-complex", dest="COMPLEX", required=False, default=None, type=str, 35 | help="pdb file with the holo form of the receptor.") 36 | parser.add_argument("--charge-method", dest="CHARGE_METHOD", required=False, default='gas', type=str, 37 | choices=['gas', 'am1'], 38 | help="Method to calculate charges fo the ligand. Default: %(default)s") 39 | parser.add_argument("-neut", dest="NEUTRALIZE", required=False, default=False, action='store_true', 40 | help="Neutralize the system by adding counter ions.") 41 | parser.add_argument("-stripions", dest="STRIP_IONS", required=False, default=False, action='store_true', 42 | help="Strip out all ions.") 43 | parser.add_argument("-keepchains", dest="KEEP_CHAINIDS", required=False, default=False, action='store_true', 44 | help="Keep the original chain IDs. Default is False, ligand and protein will be chain A for homology modeling.") 45 | parser.add_argument("-keephydrogens", dest="KEEP_PROTEIN_HYDROGENS", required=False, default=False, action='store_true', 46 | help="Keep the protein's hydrogens (default is to strip them).") 47 | parser.add_argument("-rec", dest="RECEPTOR", required=False, default=None, type=str, 48 | help="Instead of -complex give the pdb file with the apo form of the receptor.") 49 | parser.add_argument("-lig", dest="LIGAND", required=False, default=None, type=str, 50 | help="Instead of -complex give an sdf or mol2 file with optimized ligand structure from which to find the " 51 | "binding site residues.") 52 | parser.add_argument("-o", dest="OUT_PDB", required=False, default=None, type=str, 53 | help="Output PDB file name of the prepared protein-ligand complex.") 54 | parser.add_argument("-lignetcharge", dest="LIG_NET_CHARGE", required=False, default=None, type=int, 55 | help="Optionaly (but RECOMMENDED) give the net charge of the ligand, otherwise it will be estimated by Chimera.") 56 | parser.add_argument("-recnetcharge", dest="REC_NET_CHARGE", required=False, default=None, type=int, 57 | help="Optionaly (but RECOMMENDED) give the net charge of the receptor, otherwise it will be estimated by Chimera.") 58 | args = parser.parse_args() 59 | return args 60 | 61 | 62 | ################################################### FUNCTION DEFINITIONS ######################################### 63 | 64 | def standardize_terminal_protein_residues(receptor_pdb, molID="#0"): 65 | """ 66 | To prevent errors like 'ValueError: Cannot determine GAFF type for :11.A@HD14 (etc.)' raised by 67 | initiateAddions(), originating from termini capped by Shrodinger's Maestro or incomplete or missing protein 68 | residues, mutate the terminal residues to their original aa type (applies to every protein chain). 69 | """ 70 | print("Standardizing protein's terminal residues.") 71 | rc("sel %s & protein" % molID) 72 | chaindID_resids = defaultdict(list) 73 | for r in currentResidues(): 74 | chaindID_resids[r.id.chainId].append((r.id.position, r.type)) 75 | for chainID in chaindID_resids.keys(): 76 | chaindID_resids[chainID].sort(key=itemgetter(0)) 77 | # NOTE: the side-chain mutations (swapaa) were not necessary in the proteins tested so far. 78 | # rc("swapaa %s %s:%i.%s" % 79 | # (chaindID_resids[chainID][0][1], molID, chaindID_resids[chainID][0][0], str(chainID))) # N-term 80 | rc("del element.H & %s:%i.%s" % 81 | (molID, chaindID_resids[chainID][0][0], str(chainID))) 82 | # rc("swapaa %s %s:%i.%s" % 83 | # (chaindID_resids[chainID][-1][1], molID, chaindID_resids[chainID][-1][0], str(chainID))) # C-term 84 | rc("del element.H & %s:%i.%s" % 85 | (molID, chaindID_resids[chainID][-1][0], str(chainID))) 86 | # Only if you write and load the PDB then Chimera will reset the valence of the N-terminal amide and 87 | # thus will not again the H1,H2,H3 which cause the error. 88 | rc("write format pdb #0 " + receptor_pdb.replace(".pdb", "_tmp.pdb")) 89 | rc("del #0") 90 | rc("open %s %s" % (molID, receptor_pdb.replace(".pdb", "_tmp.pdb"))) 91 | os.remove(receptor_pdb.replace(".pdb", "_tmp.pdb")) 92 | 93 | ################################################################################################################## 94 | 95 | if __name__ == "__main__": 96 | 97 | try: 98 | 99 | args = cmdlineparse() 100 | import chimera 101 | from chimera import runCommand as rc 102 | from Addions import initiateAddions 103 | from DockPrep import prep 104 | import AddH 105 | from AddCharge import estimateFormalCharge, addNonstandardResCharges 106 | from chimera.selection import currentResidues 107 | 108 | 109 | if args.KEEP_PROTEIN_HYDROGENS: 110 | addHFunc = AddH.simpleAddHydrogens 111 | # NOTE: addHFunc=None yields unrealistic net charges! 112 | else: 113 | addHFunc = AddH.hbondAddHydrogens 114 | # NOTE: the default option addHFunc=AddH.hbondAddHydrogens raised an Error in Carbonic 115 | # Unhydrase with the Zn+2 ion. However, is works better for some proteins, where AddH.simpleAddHydrogens 116 | # leads to net_charge prediction of the order of 120... 117 | 118 | if args.COMPLEX: 119 | rc("open %s" % args.COMPLEX) # load the protein-ligand complex 120 | if args.KEEP_PROTEIN_HYDROGENS: 121 | rc("delete element.H") 122 | if args.REC_NET_CHARGE == None: 123 | standardize_terminal_protein_residues(args.RECEPTOR, "#0") # TODO: UNTESTED 124 | if args.STRIP_IONS: 125 | rc("delete ions") 126 | rc("split #0 ligands") 127 | rc("sel #0.2") # select the ligand 128 | ligres = currentResidues()[0] 129 | ligres.type = 'LIG' # change the resname of the ligand to 'LIG' 130 | rc("combine #0.1 modelId 1") # create a new molecule containing just the receptor 131 | rc("combine #0.2 modelId 2") # create a new molecule containing just the ligand 132 | rc("del #0") 133 | if args.REC_NET_CHARGE != None: 134 | rec_charge = args.REC_NET_CHARGE 135 | else: 136 | # We will estimate the receptor's net charge. For this we need to DockPrep the receptor (is fast). 137 | models = chimera.openModels.list(modelTypes=[chimera.Molecule]) 138 | # For a full list of DockPrep options, look into file Chimera-alpha_py2.7/share/DockPrep/__init__.py 139 | prep([models[0]], nogui=True, method=args.CHARGE_METHOD, addHFunc=addHFunc) 140 | rec_charge = estimateFormalCharge(models[0].atoms) # DockPred does not assign charges to receptor atoms, only to ligand atoms 141 | print("Receptor's net charge =", rec_charge) 142 | # Now that we calculated the charges of the protein and the ligand, we just need the complex 143 | rc("combine #1,2 modelId 3") # create a new molecule containing the protein-ligand complex 144 | rc("del #1-2") 145 | pdb = args.COMPLEX.replace(".pdb", "_prep.pdb") 146 | elif args.RECEPTOR and args.LIGAND: 147 | rc("open %s" % args.RECEPTOR) # load the receptor 148 | if args.REC_NET_CHARGE == None: 149 | standardize_terminal_protein_residues(args.RECEPTOR,"#0") # read function's definition to understand why is here 150 | rc("open %s" % args.LIGAND) # load the ligand 151 | if args.STRIP_IONS: 152 | rc("delete ions") 153 | if args.REC_NET_CHARGE != None: 154 | rec_charge = args.REC_NET_CHARGE 155 | else: 156 | # We will estimate the receptor's net charge. For this we need to DockPrep the receptor (is fast). 157 | models = chimera.openModels.list(modelTypes=[chimera.Molecule]) 158 | # For a full list of DockPrep options, look into file Chimera-alpha_py2.7/share/DockPrep/__init__.py 159 | prep([models[0]], nogui=True, method=args.CHARGE_METHOD, addHFunc=addHFunc) 160 | rec_charge = estimateFormalCharge(models[0].atoms) # DockPred does not assign charges to receptor atoms, only to ligand atoms 161 | print("Receptor's net charge =", rec_charge) 162 | rc("sel #1") # select the ligand 163 | ligres = currentResidues()[0] 164 | ligres.type = 'LIG' # change the resname of the ligand to 'LIG' 165 | rc("combine #0,1 modelId 2") # create a new molecule containing the protein-ligand complex 166 | rc("combine #2 modelId 3") # create a new molecule containing the protein-ligand complex 167 | rc("del #0-2") 168 | pdb = os.path.splitext(os.path.basename(args.RECEPTOR))[0] + "_" + os.path.splitext(os.path.basename(args.LIGAND))[0] + "_prep.pdb" 169 | elif args.RECEPTOR: 170 | rc("open %s" % args.RECEPTOR) # load the receptor 171 | if args.KEEP_PROTEIN_HYDROGENS: 172 | rc("delete element.H") 173 | if args.REC_NET_CHARGE == None: 174 | standardize_terminal_protein_residues(args.RECEPTOR,"#0") # TODO: UNTESTED 175 | if args.STRIP_IONS: 176 | rc("delete ions") 177 | if args.REC_NET_CHARGE != None: 178 | rec_charge = args.REC_NET_CHARGE 179 | else: 180 | # We will estimate the receptor's net charge. For this we need to DockPrep the receptor (is fast). 181 | models = chimera.openModels.list(modelTypes=[chimera.Molecule]) 182 | # For a full list of DockPrep options, look into file Chimera-alpha_py2.7/share/DockPrep/__init__.py 183 | prep([models[0]], nogui=True, method=args.CHARGE_METHOD, addHFunc=addHFunc) 184 | rec_charge = estimateFormalCharge(models[0].atoms) # DockPred does not assign charges to receptor atoms, only to ligand atoms 185 | print("Receptor's net charge =", rec_charge) 186 | pdb = os.path.splitext(os.path.basename(args.RECEPTOR))[0] + "_prep.pdb" 187 | 188 | print("Preparing receptor for docking and calculating ligand '%s' charges (may be slow)." % args.CHARGE_METHOD) 189 | models = chimera.openModels.list(modelTypes=[chimera.Molecule]) # actually only one model is left 190 | if args.LIGAND and args.LIG_NET_CHARGE != None: # args.LIG_NET_CHARGE doesn't work if 0 191 | net_charge = args.LIG_NET_CHARGE + rec_charge 192 | elif args.LIGAND and args.LIG_NET_CHARGE == None: # not args.LIG_NET_CHARGE doesn't work if 0 193 | # Add partial charges again for initiateAddions() to function. 194 | prep(models, nogui=True, method=args.CHARGE_METHOD, addHFunc=AddH.simpleAddHydrogens) 195 | # NOTE: the default option addHFunc=AddH.hbondAddHydrogens raised an Error in Carbonic Unhydrase with the Zn+2 ion. 196 | net_charge = estimateFormalCharge(models[0].atoms) 197 | elif not args.LIGAND and args.COMPLEX: 198 | # Add partial charges again for initiateAddions() to function. 199 | prep(models, nogui=True, method=args.CHARGE_METHOD, addHFunc=AddH.simpleAddHydrogens) 200 | # NOTE: the default option addHFunc=AddH.hbondAddHydrogens raised an Error in Carbonic Unhydrase with the Zn+2 ion. 201 | net_charge = estimateFormalCharge(models[0].atoms) 202 | elif not args.LIGAND and args.RECEPTOR: 203 | net_charge = rec_charge 204 | 205 | # Neutralize system 206 | if args.NEUTRALIZE: 207 | if net_charge < 0: 208 | initiateAddions(models, "Na+", str(abs(net_charge)), chimera.replyobj.status) 209 | elif net_charge > 0: 210 | initiateAddions(models, "Cl-", str(net_charge), chimera.replyobj.status) 211 | 212 | if net_charge != 0: 213 | # change the resids of the ions, which by default they are all 1 214 | rc("sel ~ions") 215 | existing_resids = [r.id.position for r in currentResidues()] 216 | start = max(existing_resids) + 2 217 | rc("resrenumber %i ions" % start) # renumber the resids of the added ions 218 | 219 | if args.COMPLEX or args.LIGAND: 220 | # change the resid of the ligand 221 | rc('sel #3 & ~ #3:LIG') 222 | existing_resids = [r.id.position for r in currentResidues()] 223 | start = max(existing_resids) + 2 224 | rc("resrenumber %i #3:LIG" % start) 225 | rc("combine #3 modelId 4") # create a new molecule to split it into receptor and ligand 226 | rc("split #4 atoms ~#4:LIG") 227 | rc("combine #4.1 modelId 5") # create a new molecule containing just the receptor 228 | rc("combine #4.2 modelId 6") # create a new molecule containing just the ligand 229 | models = chimera.openModels.list(modelTypes=[chimera.Molecule]) 230 | # for m in models: print(len(m.atoms), estimateFormalCharge(m.atoms) # DEBUGGING 231 | if args.LIG_NET_CHARGE != None: # not args.LIG_NET_CHARGE doesn't work if 0 232 | lig_charge = args.LIG_NET_CHARGE 233 | else: 234 | lig_charge = estimateFormalCharge(models[3].atoms) 235 | rc("del #4-6") 236 | 237 | # Finally, write the complex pdb file with headers 238 | if args.KEEP_CHAINIDS == False: 239 | rc("changechains B A all") # <== OPTIONAL (ligand and protein will be chain A for homology modeling) 240 | if args.OUT_PDB: 241 | pdb = args.OUT_PDB 242 | if args.COMPLEX or args.LIGAND: 243 | rc("write format pdb #3 %s" % pdb) 244 | else: 245 | rc("write format pdb #0 %s" % pdb) 246 | with open(pdb, "r+") as f: 247 | s = f.read() 248 | f.seek(0) 249 | if args.COMPLEX or args.LIGAND: 250 | f.write("HEADER receptor net charge = %i\nHEADER ligand net charge = %i\n" % (rec_charge, lig_charge)) # after system neutralization 251 | else: 252 | f.write("HEADER receptor net charge = %i\n" % (rec_charge)) # after system neutralization 253 | f.write(s) 254 | 255 | except: 256 | type, value, tb = sys.exc_info() 257 | lines = traceback.format_exception(type, value, tb) 258 | print((''.join(lines))) 259 | raise -------------------------------------------------------------------------------- /dockprep/example_files/3K5C-BACE_1.mol: -------------------------------------------------------------------------------- 1 | REMARK score -90.21 2 | LCcorina 10041815583D 1 1.00000 0.00000 0 3 | CORINA 4.00 0026 26.04.2017 4 | 87 88 0 0 0 0 999 V2000 5 | 25.9314 5.1125 17.3673 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 26.3958 5.7692 18.5087 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 27.6294 6.4115 18.5006 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 26.6868 5.1203 16.1800 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 27.9071 5.7803 16.1600 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 28.3567 6.4294 17.3141 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 28.7339 5.7745 14.9144 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 28.2672 5.9839 13.7999 O 0 0 0 0 0 0 0 0 0 0 0 0 13 | 30.0161 5.5308 15.1565 N 0 0 0 0 0 0 0 0 0 0 0 0 14 | 31.1655 5.7229 14.2694 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | 32.3013 6.5543 14.9475 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 31.7701 7.7059 15.6807 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 32.5417 8.1089 16.8492 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 31.7382 8.9416 17.8567 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 31.4978 8.5096 19.2845 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | 30.6304 9.5192 20.0787 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 29.4018 8.8984 20.7758 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | 28.4435 8.3191 19.8155 N 0 0 0 0 0 0 0 0 0 0 0 0 23 | 28.2301 6.9783 19.7605 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 28.6153 6.2012 20.6343 O 0 0 0 0 0 0 0 0 0 0 0 0 25 | 31.6361 8.9012 14.7208 C 0 0 0 0 0 0 0 0 0 0 0 0 26 | 27.5563 9.2855 19.1471 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 26.1995 9.3897 19.8666 C 0 0 0 0 0 0 0 0 0 0 0 0 28 | 24.7362 4.4553 17.3963 O 0 0 0 0 0 0 0 0 0 0 0 0 29 | 24.1183 4.4752 16.0315 C 0 0 0 0 0 0 0 0 0 0 0 0 30 | 22.5931 4.3157 16.1531 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | 31.6697 4.3250 13.9202 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | 31.9299 3.5877 15.1249 O 0 0 0 0 0 0 0 0 0 0 0 0 33 | 30.5990 3.5391 13.1173 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | 31.2898 2.7953 11.9565 C 0 0 0 0 0 0 0 0 0 0 0 0 35 | 30.2806 1.9129 11.2208 C 0 0 0 0 0 0 0 0 0 0 0 0 36 | 31.8599 3.7873 10.9327 C 0 0 0 0 0 0 0 0 0 0 0 0 37 | 31.1890 4.7139 10.4842 O 0 0 0 0 0 0 0 0 0 0 0 0 38 | 33.0930 3.5508 10.5117 N 0 0 0 0 0 0 0 0 0 0 0 0 39 | 33.6869 4.3522 9.4692 C 0 0 0 0 0 0 0 0 0 0 0 0 40 | 34.4311 3.4314 8.5130 C 0 0 0 0 0 0 0 0 0 0 0 0 41 | 35.0024 4.1969 7.2880 C 0 0 0 0 0 0 0 0 0 0 0 0 42 | 36.2636 4.9971 7.6826 C 0 0 0 0 0 0 0 0 0 0 0 0 43 | 25.7958 5.7810 19.4067 H 0 0 0 0 0 0 0 0 0 0 0 0 44 | 26.3148 4.6169 15.2999 H 0 0 0 0 0 0 0 0 0 0 0 0 45 | 29.2986 6.9557 17.2660 H 0 0 0 0 0 0 0 0 0 0 0 0 46 | 30.1652 5.1352 16.1720 H 0 0 0 0 0 0 0 0 0 0 0 0 47 | 30.8093 6.2738 13.3865 H 0 0 0 0 0 0 0 0 0 0 0 0 48 | 32.8490 5.9165 15.6569 H 0 0 0 0 0 0 0 0 0 0 0 0 49 | 32.9940 6.9252 14.1777 H 0 0 0 0 0 0 0 0 0 0 0 0 50 | 30.8225 7.3438 16.1060 H 0 0 0 0 0 0 0 0 0 0 0 0 51 | 32.9033 7.2138 17.3764 H 0 0 0 0 0 0 0 0 0 0 0 0 52 | 33.4002 8.7179 16.5295 H 0 0 0 0 0 0 0 0 0 0 0 0 53 | 32.2194 9.9220 17.9885 H 0 0 0 0 0 0 0 0 0 0 0 0 54 | 30.7141 9.0833 17.4812 H 0 0 0 0 0 0 0 0 0 0 0 0 55 | 30.9772 7.5406 19.2916 H 0 0 0 0 0 0 0 0 0 0 0 0 56 | 32.4617 8.4118 19.8053 H 0 0 0 0 0 0 0 0 0 0 0 0 57 | 31.2407 9.9882 20.8646 H 0 0 0 0 0 0 0 0 0 0 0 0 58 | 30.2527 10.2947 19.3962 H 0 0 0 0 0 0 0 0 0 0 0 0 59 | 29.7308 8.0964 21.4529 H 0 0 0 0 0 0 0 0 0 0 0 0 60 | 28.8772 9.6737 21.3534 H 0 0 0 0 0 0 0 0 0 0 0 0 61 | 30.9754 8.6277 13.8849 H 0 0 0 0 0 0 0 0 0 0 0 0 62 | 32.6285 9.1711 14.3307 H 0 0 0 0 0 0 0 0 0 0 0 0 63 | 31.2086 9.7589 15.2607 H 0 0 0 0 0 0 0 0 0 0 0 0 64 | 28.0295 10.2785 19.1472 H 0 0 0 0 0 0 0 0 0 0 0 0 65 | 27.3787 8.9640 18.1102 H 0 0 0 0 0 0 0 0 0 0 0 0 66 | 25.7077 8.4058 19.8661 H 0 0 0 0 0 0 0 0 0 0 0 0 67 | 25.5624 10.1184 19.3440 H 0 0 0 0 0 0 0 0 0 0 0 0 68 | 26.3589 9.7195 20.9038 H 0 0 0 0 0 0 0 0 0 0 0 0 69 | 24.3476 5.4318 15.5392 H 0 0 0 0 0 0 0 0 0 0 0 0 70 | 24.5257 3.6475 15.4324 H 0 0 0 0 0 0 0 0 0 0 0 0 71 | 22.1522 4.2293 15.1491 H 0 0 0 0 0 0 0 0 0 0 0 0 72 | 22.1721 5.1941 16.6641 H 0 0 0 0 0 0 0 0 0 0 0 0 73 | 22.3643 3.4095 16.7332 H 0 0 0 0 0 0 0 0 0 0 0 0 74 | 32.6216 4.4157 13.3765 H 0 0 0 0 0 0 0 0 0 0 0 0 75 | 32.9038 3.6816 15.6277 H 0 0 0 0 0 0 0 0 0 0 0 0 76 | 30.1026 2.8145 13.7796 H 0 0 0 0 0 0 0 0 0 0 0 0 77 | 29.8526 4.2409 12.7169 H 0 0 0 0 0 0 0 0 0 0 0 0 78 | 32.0982 2.1846 12.3849 H 0 0 0 0 0 0 0 0 0 0 0 0 79 | 30.7877 1.3786 10.4039 H 0 0 0 0 0 0 0 0 0 0 0 0 80 | 29.4784 2.5408 10.8059 H 0 0 0 0 0 0 0 0 0 0 0 0 81 | 29.8497 1.1844 11.9235 H 0 0 0 0 0 0 0 0 0 0 0 0 82 | 33.6657 2.7302 10.9684 H 0 0 0 0 0 0 0 0 0 0 0 0 83 | 32.8970 4.8943 8.9284 H 0 0 0 0 0 0 0 0 0 0 0 0 84 | 34.3873 5.0738 9.9152 H 0 0 0 0 0 0 0 0 0 0 0 0 85 | 35.2706 2.9557 9.0410 H 0 0 0 0 0 0 0 0 0 0 0 0 86 | 33.7447 2.6560 8.1419 H 0 0 0 0 0 0 0 0 0 0 0 0 87 | 35.2699 3.4787 6.4991 H 0 0 0 0 0 0 0 0 0 0 0 0 88 | 34.2437 4.8957 6.9058 H 0 0 0 0 0 0 0 0 0 0 0 0 89 | 36.0008 5.7418 8.4482 H 0 0 0 0 0 0 0 0 0 0 0 0 90 | 36.6653 5.5090 6.7957 H 0 0 0 0 0 0 0 0 0 0 0 0 91 | 37.0228 4.3100 8.0846 H 0 0 0 0 0 0 0 0 0 0 0 0 92 | 1 2 2 0 0 0 0 93 | 1 4 1 0 0 0 0 94 | 1 24 1 0 0 0 0 95 | 2 3 1 0 0 0 0 96 | 2 39 1 0 0 0 0 97 | 3 6 2 0 0 0 0 98 | 3 19 1 0 0 0 0 99 | 4 5 2 0 0 0 0 100 | 4 40 1 0 0 0 0 101 | 5 6 1 0 0 0 0 102 | 5 7 1 0 0 0 0 103 | 6 41 1 0 0 0 0 104 | 7 8 2 0 0 0 0 105 | 7 9 1 0 0 0 0 106 | 9 10 1 0 0 0 0 107 | 9 42 1 0 0 0 0 108 | 10 11 1 0 0 0 0 109 | 10 27 1 0 0 0 0 110 | 10 43 1 0 0 0 0 111 | 11 12 1 0 0 0 0 112 | 11 44 1 0 0 0 0 113 | 11 45 1 0 0 0 0 114 | 12 13 1 0 0 0 0 115 | 12 21 1 0 0 0 0 116 | 12 46 1 0 0 0 0 117 | 13 14 1 0 0 0 0 118 | 13 47 1 0 0 0 0 119 | 13 48 1 0 0 0 0 120 | 14 15 1 0 0 0 0 121 | 14 49 1 0 0 0 0 122 | 14 50 1 0 0 0 0 123 | 15 16 1 0 0 0 0 124 | 15 51 1 0 0 0 0 125 | 15 52 1 0 0 0 0 126 | 16 17 1 0 0 0 0 127 | 16 53 1 0 0 0 0 128 | 16 54 1 0 0 0 0 129 | 17 18 1 0 0 0 0 130 | 17 55 1 0 0 0 0 131 | 17 56 1 0 0 0 0 132 | 18 19 1 0 0 0 0 133 | 18 22 1 0 0 0 0 134 | 19 20 2 0 0 0 0 135 | 21 57 1 0 0 0 0 136 | 21 58 1 0 0 0 0 137 | 21 59 1 0 0 0 0 138 | 22 23 1 0 0 0 0 139 | 22 60 1 0 0 0 0 140 | 22 61 1 0 0 0 0 141 | 23 62 1 0 0 0 0 142 | 23 63 1 0 0 0 0 143 | 23 64 1 0 0 0 0 144 | 24 25 1 0 0 0 0 145 | 25 26 1 0 0 0 0 146 | 25 65 1 0 0 0 0 147 | 25 66 1 0 0 0 0 148 | 26 67 1 0 0 0 0 149 | 26 68 1 0 0 0 0 150 | 26 69 1 0 0 0 0 151 | 27 28 1 0 0 0 0 152 | 27 29 1 0 0 0 0 153 | 27 70 1 0 0 0 0 154 | 28 71 1 0 0 0 0 155 | 29 30 1 0 0 0 0 156 | 29 72 1 0 0 0 0 157 | 29 73 1 0 0 0 0 158 | 30 31 1 0 0 0 0 159 | 30 32 1 0 0 0 0 160 | 30 74 1 0 0 0 0 161 | 31 75 1 0 0 0 0 162 | 31 76 1 0 0 0 0 163 | 31 77 1 0 0 0 0 164 | 32 33 2 0 0 0 0 165 | 32 34 1 0 0 0 0 166 | 34 35 1 0 0 0 0 167 | 34 78 1 0 0 0 0 168 | 35 36 1 0 0 0 0 169 | 35 79 1 0 0 0 0 170 | 35 80 1 0 0 0 0 171 | 36 37 1 0 0 0 0 172 | 36 81 1 0 0 0 0 173 | 36 82 1 0 0 0 0 174 | 37 38 1 0 0 0 0 175 | 37 83 1 0 0 0 0 176 | 37 84 1 0 0 0 0 177 | 38 85 1 0 0 0 0 178 | 38 86 1 0 0 0 0 179 | 38 87 1 0 0 0 0 180 | M END 181 | $$$$ 182 | -------------------------------------------------------------------------------- /dockprep/example_files/3K5C-BACE_4.mol: -------------------------------------------------------------------------------- 1 | REMARK score -96.38 2 | LCcorina 10041815553D 1 1.00000 0.00000 0 3 | CORINA 4.00 0026 26.04.2017 4 | 90 92 0 0 0 0 999 V2000 5 | 31.5509 4.5957 14.3313 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 32.7807 4.7940 13.6214 O 0 0 0 0 0 0 0 0 0 0 0 0 7 | 30.6497 3.7570 13.3907 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 31.2278 2.4219 13.1292 N 0 3 0 0 0 0 0 0 0 0 0 0 9 | 30.5584 1.7491 12.0021 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 30.8676 0.2413 11.9094 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 29.4667 0.7100 12.3551 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 30.5108 2.4645 10.6461 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 31.7085 2.6465 9.9184 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 31.7030 3.3047 8.6594 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | 30.4694 3.7778 8.1475 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 29.2648 3.6033 8.8686 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 29.2854 2.9457 10.1183 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 33.0142 3.4801 7.9025 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 33.3417 2.1948 7.1492 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | 32.8769 4.6287 6.9086 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 34.1337 3.7914 8.8885 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | 27.4020 6.6138 17.1724 N 0 0 0 0 0 0 0 0 0 0 0 0 23 | 27.4209 5.4691 16.2173 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 28.4932 5.6289 15.1339 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 29.7488 5.7284 15.5740 N 0 0 0 0 0 0 0 0 0 0 0 0 26 | 30.9009 5.9373 14.6885 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 31.9152 6.8807 15.3686 C 0 0 0 0 0 0 0 0 0 0 0 0 28 | 31.2207 8.2337 15.8118 C 0 0 0 0 0 0 0 0 0 0 0 0 29 | 32.3550 9.0923 16.4012 C 0 0 0 0 0 0 0 0 0 0 0 0 30 | 31.8626 10.3916 17.0956 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | 30.8932 10.1571 18.2658 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | 31.5282 9.2143 19.3473 C 0 0 0 0 0 0 0 0 0 0 0 0 33 | 30.7066 9.3458 20.6753 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | 29.1764 9.1329 20.4634 C 0 0 0 0 0 0 0 0 0 0 0 0 35 | 28.8836 7.9727 19.5313 O 0 0 0 0 0 0 0 0 0 0 0 0 36 | 27.4627 7.5107 19.5111 C 0 0 0 0 0 0 0 0 0 0 0 0 37 | 27.3265 6.3585 18.5102 C 0 0 0 0 0 0 0 0 0 0 0 0 38 | 27.1593 5.2242 18.9600 O 0 0 0 0 0 0 0 0 0 0 0 0 39 | 30.5498 8.9660 14.5629 C 0 0 0 0 0 0 0 0 0 0 0 0 40 | 28.1796 5.7332 13.9498 O 0 0 0 0 0 0 0 0 0 0 0 0 41 | 26.0452 5.2304 15.5522 C 0 0 0 0 0 0 0 0 0 0 0 0 42 | 27.4572 7.9830 16.5957 C 0 0 0 0 0 0 0 0 0 0 0 0 43 | 31.7856 4.0836 15.2760 H 0 0 0 0 0 0 0 0 0 0 0 0 44 | 33.7064 5.0105 14.1747 H 0 0 0 0 0 0 0 0 0 0 0 0 45 | 29.6613 3.6221 13.8541 H 0 0 0 0 0 0 0 0 0 0 0 0 46 | 30.5358 4.2798 12.4296 H 0 0 0 0 0 0 0 0 0 0 0 0 47 | 32.2955 2.5260 12.8853 H 0 0 0 0 0 0 0 0 0 0 0 0 48 | 31.1507 -0.0122 10.8771 H 0 0 0 0 0 0 0 0 0 0 0 0 49 | 31.6974 -0.0053 12.5881 H 0 0 0 0 0 0 0 0 0 0 0 0 50 | 29.1494 0.8516 13.3988 H 0 0 0 0 0 0 0 0 0 0 0 0 51 | 28.6027 0.8447 11.6878 H 0 0 0 0 0 0 0 0 0 0 0 0 52 | 32.6438 2.2833 10.3181 H 0 0 0 0 0 0 0 0 0 0 0 0 53 | 30.4383 4.2808 7.1923 H 0 0 0 0 0 0 0 0 0 0 0 0 54 | 28.3344 3.9723 8.4629 H 0 0 0 0 0 0 0 0 0 0 0 0 55 | 28.3683 2.8092 10.6721 H 0 0 0 0 0 0 0 0 0 0 0 0 56 | 34.2511 2.3435 6.5484 H 0 0 0 0 0 0 0 0 0 0 0 0 57 | 32.5031 1.9342 6.4867 H 0 0 0 0 0 0 0 0 0 0 0 0 58 | 33.5072 1.3798 7.8692 H 0 0 0 0 0 0 0 0 0 0 0 0 59 | 32.2552 4.3071 6.0601 H 0 0 0 0 0 0 0 0 0 0 0 0 60 | 33.8732 4.9191 6.5439 H 0 0 0 0 0 0 0 0 0 0 0 0 61 | 32.4033 5.4886 7.4051 H 0 0 0 0 0 0 0 0 0 0 0 0 62 | 33.7622 4.4840 9.6581 H 0 0 0 0 0 0 0 0 0 0 0 0 63 | 34.9753 4.2551 8.3532 H 0 0 0 0 0 0 0 0 0 0 0 0 64 | 34.4713 2.8597 9.3659 H 0 0 0 0 0 0 0 0 0 0 0 0 65 | 27.6802 4.6115 16.8556 H 0 0 0 0 0 0 0 0 0 0 0 0 66 | 29.8982 5.6458 16.6607 H 0 0 0 0 0 0 0 0 0 0 0 0 67 | 30.5112 6.3992 13.7693 H 0 0 0 0 0 0 0 0 0 0 0 0 68 | 32.3319 6.3896 16.2603 H 0 0 0 0 0 0 0 0 0 0 0 0 69 | 32.7279 7.1139 14.6649 H 0 0 0 0 0 0 0 0 0 0 0 0 70 | 30.4363 8.0695 16.5653 H 0 0 0 0 0 0 0 0 0 0 0 0 71 | 32.9038 8.5078 17.1543 H 0 0 0 0 0 0 0 0 0 0 0 0 72 | 33.0431 9.3927 15.5974 H 0 0 0 0 0 0 0 0 0 0 0 0 73 | 32.7258 10.9414 17.4990 H 0 0 0 0 0 0 0 0 0 0 0 0 74 | 31.3360 11.0216 16.3637 H 0 0 0 0 0 0 0 0 0 0 0 0 75 | 30.6642 11.1177 18.7505 H 0 0 0 0 0 0 0 0 0 0 0 0 76 | 29.9636 9.7068 17.8873 H 0 0 0 0 0 0 0 0 0 0 0 0 77 | 31.4961 8.1701 19.0030 H 0 0 0 0 0 0 0 0 0 0 0 0 78 | 32.5718 9.5062 19.5362 H 0 0 0 0 0 0 0 0 0 0 0 0 79 | 31.0537 8.5921 21.3974 H 0 0 0 0 0 0 0 0 0 0 0 0 80 | 30.8504 10.3514 21.0975 H 0 0 0 0 0 0 0 0 0 0 0 0 81 | 28.6994 8.9133 21.4301 H 0 0 0 0 0 0 0 0 0 0 0 0 82 | 28.7349 10.0452 20.0358 H 0 0 0 0 0 0 0 0 0 0 0 0 83 | 27.1750 7.1657 20.5151 H 0 0 0 0 0 0 0 0 0 0 0 0 84 | 26.8106 8.3434 19.2088 H 0 0 0 0 0 0 0 0 0 0 0 0 85 | 29.6961 8.3720 14.2047 H 0 0 0 0 0 0 0 0 0 0 0 0 86 | 31.2904 9.0672 13.7560 H 0 0 0 0 0 0 0 0 0 0 0 0 87 | 30.2004 9.9637 14.8673 H 0 0 0 0 0 0 0 0 0 0 0 0 88 | 26.1118 4.3695 14.8707 H 0 0 0 0 0 0 0 0 0 0 0 0 89 | 25.7523 6.1258 14.9842 H 0 0 0 0 0 0 0 0 0 0 0 0 90 | 25.2931 5.0263 16.3285 H 0 0 0 0 0 0 0 0 0 0 0 0 91 | 28.2099 8.5775 17.1341 H 0 0 0 0 0 0 0 0 0 0 0 0 92 | 26.4721 8.4624 16.6945 H 0 0 0 0 0 0 0 0 0 0 0 0 93 | 27.7302 7.9221 15.5319 H 0 0 0 0 0 0 0 0 0 0 0 0 94 | 31.1322 1.8524 13.9578 H 0 0 0 0 0 0 0 0 0 0 0 0 95 | 1 2 1 0 0 0 0 96 | 1 3 1 0 0 0 0 97 | 1 22 1 0 0 0 0 98 | 1 39 1 0 0 0 0 99 | 2 40 1 0 0 0 0 100 | 3 4 1 0 0 0 0 101 | 3 41 1 0 0 0 0 102 | 3 42 1 0 0 0 0 103 | 4 5 1 0 0 0 0 104 | 4 43 1 0 0 0 0 105 | 4 90 1 0 0 0 0 106 | 5 6 1 0 0 0 0 107 | 5 7 1 0 0 0 0 108 | 5 8 1 0 0 0 0 109 | 6 7 1 0 0 0 0 110 | 6 44 1 0 0 0 0 111 | 6 45 1 0 0 0 0 112 | 7 46 1 0 0 0 0 113 | 7 47 1 0 0 0 0 114 | 8 9 2 0 0 0 0 115 | 8 13 1 0 0 0 0 116 | 9 10 1 0 0 0 0 117 | 9 48 1 0 0 0 0 118 | 10 11 2 0 0 0 0 119 | 10 14 1 0 0 0 0 120 | 11 12 1 0 0 0 0 121 | 11 49 1 0 0 0 0 122 | 12 13 2 0 0 0 0 123 | 12 50 1 0 0 0 0 124 | 13 51 1 0 0 0 0 125 | 14 15 1 0 0 0 0 126 | 14 16 1 0 0 0 0 127 | 14 17 1 0 0 0 0 128 | 15 52 1 0 0 0 0 129 | 15 53 1 0 0 0 0 130 | 15 54 1 0 0 0 0 131 | 16 55 1 0 0 0 0 132 | 16 56 1 0 0 0 0 133 | 16 57 1 0 0 0 0 134 | 17 58 1 0 0 0 0 135 | 17 59 1 0 0 0 0 136 | 17 60 1 0 0 0 0 137 | 18 19 1 0 0 0 0 138 | 18 33 1 0 0 0 0 139 | 18 38 1 0 0 0 0 140 | 19 20 1 0 0 0 0 141 | 19 37 1 0 0 0 0 142 | 19 61 1 0 0 0 0 143 | 20 21 1 0 0 0 0 144 | 20 36 2 0 0 0 0 145 | 21 22 1 0 0 0 0 146 | 21 62 1 0 0 0 0 147 | 22 23 1 0 0 0 0 148 | 22 63 1 0 0 0 0 149 | 23 24 1 0 0 0 0 150 | 23 64 1 0 0 0 0 151 | 23 65 1 0 0 0 0 152 | 24 25 1 0 0 0 0 153 | 24 35 1 0 0 0 0 154 | 24 66 1 0 0 0 0 155 | 25 26 1 0 0 0 0 156 | 25 67 1 0 0 0 0 157 | 25 68 1 0 0 0 0 158 | 26 27 1 0 0 0 0 159 | 26 69 1 0 0 0 0 160 | 26 70 1 0 0 0 0 161 | 27 28 1 0 0 0 0 162 | 27 71 1 0 0 0 0 163 | 27 72 1 0 0 0 0 164 | 28 29 1 0 0 0 0 165 | 28 73 1 0 0 0 0 166 | 28 74 1 0 0 0 0 167 | 29 30 1 0 0 0 0 168 | 29 75 1 0 0 0 0 169 | 29 76 1 0 0 0 0 170 | 30 31 1 0 0 0 0 171 | 30 77 1 0 0 0 0 172 | 30 78 1 0 0 0 0 173 | 31 32 1 0 0 0 0 174 | 32 33 1 0 0 0 0 175 | 32 79 1 0 0 0 0 176 | 32 80 1 0 0 0 0 177 | 33 34 2 0 0 0 0 178 | 35 81 1 0 0 0 0 179 | 35 82 1 0 0 0 0 180 | 35 83 1 0 0 0 0 181 | 37 84 1 0 0 0 0 182 | 37 85 1 0 0 0 0 183 | 37 86 1 0 0 0 0 184 | 38 87 1 0 0 0 0 185 | 38 88 1 0 0 0 0 186 | 38 89 1 0 0 0 0 187 | M CHG 1 4 1 188 | M END 189 | $$$$ 190 | -------------------------------------------------------------------------------- /dockprep/example_files/3K5C-BACE_5.mol: -------------------------------------------------------------------------------- 1 | REMARK score -91.46 2 | LCcorina 10041815563D 1 1.00000 0.00000 0 3 | CORINA 4.00 0026 26.04.2017 4 | 80 81 0 0 0 0 999 V2000 5 | 33.9967 5.3581 9.1859 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 32.8799 5.6936 8.2105 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 32.0881 4.4469 7.8151 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 31.4016 3.8418 8.9881 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 31.7937 2.8679 9.8802 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 30.7061 2.7299 10.7620 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 29.7002 3.5512 10.4594 N 0 0 0 0 0 0 0 0 0 0 0 0 12 | 30.1457 4.2167 9.3799 N 0 0 0 0 0 0 0 0 0 0 0 0 13 | 31.5630 4.6126 14.3519 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 32.7624 4.8321 13.5411 O 0 0 0 0 0 0 0 0 0 0 0 0 15 | 30.5960 3.8175 13.4791 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 31.1683 2.4998 13.1023 N 0 3 0 0 0 0 0 0 0 0 0 0 17 | 30.5638 1.7848 11.9697 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 27.4233 6.6411 17.1690 N 0 0 0 0 0 0 0 0 0 0 0 0 19 | 27.4267 5.5062 16.2045 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | 28.5014 5.6828 15.1314 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 29.7568 5.7719 15.5819 N 0 0 0 0 0 0 0 0 0 0 0 0 22 | 30.9180 5.9608 14.7021 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 31.9295 6.8994 15.3838 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 31.2379 8.2566 15.8178 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 32.3716 9.1128 16.4117 C 0 0 0 0 0 0 0 0 0 0 0 0 26 | 31.8799 10.4167 17.0980 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 30.9012 10.1906 18.2619 C 0 0 0 0 0 0 0 0 0 0 0 0 28 | 31.5243 9.2490 19.3514 C 0 0 0 0 0 0 0 0 0 0 0 0 29 | 30.6938 9.3887 20.6730 C 0 0 0 0 0 0 0 0 0 0 0 0 30 | 29.1641 9.1817 20.4508 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | 28.8733 8.0075 19.5340 O 0 0 0 0 0 0 0 0 0 0 0 0 32 | 27.4498 7.5562 19.5029 C 0 0 0 0 0 0 0 0 0 0 0 0 33 | 27.3118 6.3962 18.5036 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | 27.1061 5.2673 18.9530 O 0 0 0 0 0 0 0 0 0 0 0 0 35 | 30.5782 8.9874 14.5619 C 0 0 0 0 0 0 0 0 0 0 0 0 36 | 28.1941 5.7979 13.9463 O 0 0 0 0 0 0 0 0 0 0 0 0 37 | 26.0514 5.2752 15.5310 C 0 0 0 0 0 0 0 0 0 0 0 0 38 | 27.5762 8.0104 16.6242 C 0 0 0 0 0 0 0 0 0 0 0 0 39 | 33.5650 4.9465 10.1102 H 0 0 0 0 0 0 0 0 0 0 0 0 40 | 34.5646 6.2703 9.4210 H 0 0 0 0 0 0 0 0 0 0 0 0 41 | 34.6686 4.6145 8.7323 H 0 0 0 0 0 0 0 0 0 0 0 0 42 | 32.1891 6.4114 8.6769 H 0 0 0 0 0 0 0 0 0 0 0 0 43 | 33.3092 6.1368 7.2998 H 0 0 0 0 0 0 0 0 0 0 0 0 44 | 31.3274 4.7174 7.0680 H 0 0 0 0 0 0 0 0 0 0 0 0 45 | 32.7718 3.6985 7.3878 H 0 0 0 0 0 0 0 0 0 0 0 0 46 | 32.6116 2.3061 10.0035 H 0 0 0 0 0 0 0 0 0 0 0 0 47 | 28.7849 3.5478 11.0695 H 0 0 0 0 0 0 0 0 0 0 0 0 48 | 31.8474 4.0800 15.2713 H 0 0 0 0 0 0 0 0 0 0 0 0 49 | 33.7108 5.1083 14.0254 H 0 0 0 0 0 0 0 0 0 0 0 0 50 | 29.6595 3.6475 14.0305 H 0 0 0 0 0 0 0 0 0 0 0 0 51 | 30.3829 4.3822 12.5596 H 0 0 0 0 0 0 0 0 0 0 0 0 52 | 32.2268 2.6240 12.8302 H 0 0 0 0 0 0 0 0 0 0 0 0 53 | 31.1005 0.8389 11.8048 H 0 0 0 0 0 0 0 0 0 0 0 0 54 | 29.5074 1.5730 12.1915 H 0 0 0 0 0 0 0 0 0 0 0 0 55 | 27.6713 4.6373 16.8333 H 0 0 0 0 0 0 0 0 0 0 0 0 56 | 29.8978 5.6966 16.6703 H 0 0 0 0 0 0 0 0 0 0 0 0 57 | 30.5394 6.4181 13.7760 H 0 0 0 0 0 0 0 0 0 0 0 0 58 | 32.3383 6.4094 16.2798 H 0 0 0 0 0 0 0 0 0 0 0 0 59 | 32.7479 7.1266 14.6847 H 0 0 0 0 0 0 0 0 0 0 0 0 60 | 30.4482 8.0983 16.5668 H 0 0 0 0 0 0 0 0 0 0 0 0 61 | 32.9129 8.5289 17.1707 H 0 0 0 0 0 0 0 0 0 0 0 0 62 | 33.0665 9.4074 15.6115 H 0 0 0 0 0 0 0 0 0 0 0 0 63 | 32.7425 10.9643 17.5055 H 0 0 0 0 0 0 0 0 0 0 0 0 64 | 31.3613 11.0464 16.3600 H 0 0 0 0 0 0 0 0 0 0 0 0 65 | 30.6729 11.1539 18.7414 H 0 0 0 0 0 0 0 0 0 0 0 0 66 | 29.9725 9.7428 17.8784 H 0 0 0 0 0 0 0 0 0 0 0 0 67 | 31.4903 8.2037 19.0106 H 0 0 0 0 0 0 0 0 0 0 0 0 68 | 32.5678 9.5370 19.5468 H 0 0 0 0 0 0 0 0 0 0 0 0 69 | 31.0323 8.6360 21.4002 H 0 0 0 0 0 0 0 0 0 0 0 0 70 | 30.8388 10.3951 21.0926 H 0 0 0 0 0 0 0 0 0 0 0 0 71 | 28.6774 8.9801 21.4165 H 0 0 0 0 0 0 0 0 0 0 0 0 72 | 28.7321 10.0897 20.0050 H 0 0 0 0 0 0 0 0 0 0 0 0 73 | 27.1504 7.2181 20.5059 H 0 0 0 0 0 0 0 0 0 0 0 0 74 | 26.8069 8.3922 19.1906 H 0 0 0 0 0 0 0 0 0 0 0 0 75 | 29.7472 8.3775 14.1778 H 0 0 0 0 0 0 0 0 0 0 0 0 76 | 31.3343 9.1153 13.7732 H 0 0 0 0 0 0 0 0 0 0 0 0 77 | 30.1984 9.9729 14.8691 H 0 0 0 0 0 0 0 0 0 0 0 0 78 | 26.1184 4.4176 14.8453 H 0 0 0 0 0 0 0 0 0 0 0 0 79 | 25.7639 6.1744 14.9666 H 0 0 0 0 0 0 0 0 0 0 0 0 80 | 25.2953 5.0693 16.3029 H 0 0 0 0 0 0 0 0 0 0 0 0 81 | 27.9578 7.9546 15.5940 H 0 0 0 0 0 0 0 0 0 0 0 0 82 | 28.2844 8.5761 17.2474 H 0 0 0 0 0 0 0 0 0 0 0 0 83 | 26.5999 8.5171 16.6266 H 0 0 0 0 0 0 0 0 0 0 0 0 84 | 31.1229 1.8972 13.9116 H 0 0 0 0 0 0 0 0 0 0 0 0 85 | 1 2 1 0 0 0 0 86 | 1 35 1 0 0 0 0 87 | 1 36 1 0 0 0 0 88 | 1 37 1 0 0 0 0 89 | 2 3 1 0 0 0 0 90 | 2 38 1 0 0 0 0 91 | 2 39 1 0 0 0 0 92 | 3 40 1 0 0 0 0 93 | 3 41 1 0 0 0 0 94 | 3 4 1 0 0 0 0 95 | 4 8 2 0 0 0 0 96 | 4 5 1 0 0 0 0 97 | 5 6 2 0 0 0 0 98 | 5 42 1 0 0 0 0 99 | 6 13 1 0 0 0 0 100 | 6 7 1 0 0 0 0 101 | 7 43 1 0 0 0 0 102 | 7 8 1 0 0 0 0 103 | 9 10 1 0 0 0 0 104 | 9 11 1 0 0 0 0 105 | 9 18 1 0 0 0 0 106 | 9 44 1 0 0 0 0 107 | 10 45 1 0 0 0 0 108 | 11 12 1 0 0 0 0 109 | 11 46 1 0 0 0 0 110 | 11 47 1 0 0 0 0 111 | 12 13 1 0 0 0 0 112 | 12 48 1 0 0 0 0 113 | 12 80 1 0 0 0 0 114 | 13 49 1 0 0 0 0 115 | 13 50 1 0 0 0 0 116 | 14 15 1 0 0 0 0 117 | 14 29 1 0 0 0 0 118 | 14 34 1 0 0 0 0 119 | 15 16 1 0 0 0 0 120 | 15 33 1 0 0 0 0 121 | 15 51 1 0 0 0 0 122 | 16 17 1 0 0 0 0 123 | 16 32 2 0 0 0 0 124 | 17 18 1 0 0 0 0 125 | 17 52 1 0 0 0 0 126 | 18 19 1 0 0 0 0 127 | 18 53 1 0 0 0 0 128 | 19 20 1 0 0 0 0 129 | 19 54 1 0 0 0 0 130 | 19 55 1 0 0 0 0 131 | 20 21 1 0 0 0 0 132 | 20 31 1 0 0 0 0 133 | 20 56 1 0 0 0 0 134 | 21 22 1 0 0 0 0 135 | 21 57 1 0 0 0 0 136 | 21 58 1 0 0 0 0 137 | 22 23 1 0 0 0 0 138 | 22 59 1 0 0 0 0 139 | 22 60 1 0 0 0 0 140 | 23 24 1 0 0 0 0 141 | 23 61 1 0 0 0 0 142 | 23 62 1 0 0 0 0 143 | 24 25 1 0 0 0 0 144 | 24 63 1 0 0 0 0 145 | 24 64 1 0 0 0 0 146 | 25 26 1 0 0 0 0 147 | 25 65 1 0 0 0 0 148 | 25 66 1 0 0 0 0 149 | 26 27 1 0 0 0 0 150 | 26 67 1 0 0 0 0 151 | 26 68 1 0 0 0 0 152 | 27 28 1 0 0 0 0 153 | 28 29 1 0 0 0 0 154 | 28 69 1 0 0 0 0 155 | 28 70 1 0 0 0 0 156 | 29 30 2 0 0 0 0 157 | 31 71 1 0 0 0 0 158 | 31 72 1 0 0 0 0 159 | 31 73 1 0 0 0 0 160 | 33 74 1 0 0 0 0 161 | 33 75 1 0 0 0 0 162 | 33 76 1 0 0 0 0 163 | 34 77 1 0 0 0 0 164 | 34 78 1 0 0 0 0 165 | 34 79 1 0 0 0 0 166 | M CHG 1 12 1 167 | M END 168 | $$$$ 169 | -------------------------------------------------------------------------------- /dockprep/example_files/3K5C-BACE_6.mol: -------------------------------------------------------------------------------- 1 | REMARK score -93.89 2 | LCcorina 10041815563D 1 1.00000 0.00000 0 3 | CORINA 4.00 0026 26.04.2017 4 | 83 85 0 0 0 0 999 V2000 5 | 26.0389 5.0980 17.1262 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 26.4167 5.7876 18.2802 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 27.6637 6.4053 18.3411 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 26.8569 5.1146 15.9818 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 28.0575 5.8109 16.0184 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 28.4163 6.4994 17.1827 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 28.9594 5.7943 14.8234 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 28.5541 5.9358 13.6745 O 0 0 0 0 0 0 0 0 0 0 0 0 13 | 30.2347 5.6239 15.1551 N 0 0 0 0 0 0 0 0 0 0 0 0 14 | 31.4131 5.8430 14.3108 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | 31.9673 4.4540 13.9943 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 31.0226 3.6874 13.0758 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 31.4347 2.3120 12.8649 N 0 3 0 0 0 0 0 0 0 0 0 0 18 | 30.7489 1.5429 11.8351 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 30.5484 2.3291 10.5546 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | 31.6454 2.8088 9.8772 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 31.5162 3.5349 8.7105 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | 32.7653 4.0494 8.0830 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 33.3879 5.1291 8.9235 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 33.7428 2.9263 7.7978 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 30.2393 3.8019 8.2119 C 0 0 0 0 0 0 0 0 0 0 0 0 26 | 29.1289 3.3147 8.9032 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 29.2486 2.5630 10.0638 C 0 0 0 0 0 0 0 0 0 0 0 0 28 | 33.2451 4.5721 13.3243 O 0 0 0 0 0 0 0 0 0 0 0 0 29 | 32.4767 6.7299 15.0334 C 0 0 0 0 0 0 0 0 0 0 0 0 30 | 31.8682 7.8722 15.7172 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | 32.5590 8.3203 16.9183 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | 31.6736 9.1391 17.8655 C 0 0 0 0 0 0 0 0 0 0 0 0 33 | 31.3707 8.8351 19.1153 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | 30.4658 9.8158 19.8470 C 0 0 0 0 0 0 0 0 0 0 0 0 35 | 29.3443 9.0273 20.5325 C 0 0 0 0 0 0 0 0 0 0 0 0 36 | 28.5003 8.3110 19.5637 N 0 0 0 0 0 0 0 0 0 0 0 0 37 | 28.2427 6.9808 19.6075 C 0 0 0 0 0 0 0 0 0 0 0 0 38 | 28.5371 6.2545 20.5547 O 0 0 0 0 0 0 0 0 0 0 0 0 39 | 27.5523 9.1761 18.8879 C 0 0 0 0 0 0 0 0 0 0 0 0 40 | 26.2955 9.0524 19.7015 C 0 0 0 0 0 0 0 0 0 0 0 0 41 | 31.7453 9.0484 14.7312 C 0 0 0 0 0 0 0 0 0 0 0 0 42 | 25.1112 4.5454 17.1067 H 0 0 0 0 0 0 0 0 0 0 0 0 43 | 25.7427 5.8409 19.1224 H 0 0 0 0 0 0 0 0 0 0 0 0 44 | 26.5481 4.5900 15.0897 H 0 0 0 0 0 0 0 0 0 0 0 0 45 | 29.3034 7.1152 17.1636 H 0 0 0 0 0 0 0 0 0 0 0 0 46 | 30.3578 5.2786 16.1922 H 0 0 0 0 0 0 0 0 0 0 0 0 47 | 31.0927 6.3688 13.3992 H 0 0 0 0 0 0 0 0 0 0 0 0 48 | 32.1306 3.9068 14.9344 H 0 0 0 0 0 0 0 0 0 0 0 0 49 | 30.0149 3.6720 13.5166 H 0 0 0 0 0 0 0 0 0 0 0 0 50 | 30.9855 4.1810 12.0934 H 0 0 0 0 0 0 0 0 0 0 0 0 51 | 32.4969 2.2870 12.5803 H 0 0 0 0 0 0 0 0 0 0 0 0 52 | 31.3396 0.6474 11.5918 H 0 0 0 0 0 0 0 0 0 0 0 0 53 | 29.7577 1.2394 12.2030 H 0 0 0 0 0 0 0 0 0 0 0 0 54 | 32.6390 2.6201 10.2561 H 0 0 0 0 0 0 0 0 0 0 0 0 55 | 32.4802 4.5027 7.1222 H 0 0 0 0 0 0 0 0 0 0 0 0 56 | 34.2879 5.5123 8.4203 H 0 0 0 0 0 0 0 0 0 0 0 0 57 | 33.6642 4.7162 9.9050 H 0 0 0 0 0 0 0 0 0 0 0 0 58 | 32.6671 5.9488 9.0602 H 0 0 0 0 0 0 0 0 0 0 0 0 59 | 33.1976 2.0583 7.3985 H 0 0 0 0 0 0 0 0 0 0 0 0 60 | 34.2552 2.6410 8.7284 H 0 0 0 0 0 0 0 0 0 0 0 0 61 | 34.4847 3.2646 7.0595 H 0 0 0 0 0 0 0 0 0 0 0 0 62 | 30.1230 4.3772 7.3053 H 0 0 0 0 0 0 0 0 0 0 0 0 63 | 28.1333 3.5190 8.5382 H 0 0 0 0 0 0 0 0 0 0 0 0 64 | 28.3934 2.1624 10.5878 H 0 0 0 0 0 0 0 0 0 0 0 0 65 | 34.1167 4.9662 13.8675 H 0 0 0 0 0 0 0 0 0 0 0 0 66 | 33.0096 6.1262 15.7827 H 0 0 0 0 0 0 0 0 0 0 0 0 67 | 33.1951 7.1156 14.2951 H 0 0 0 0 0 0 0 0 0 0 0 0 68 | 30.9131 7.4810 16.0978 H 0 0 0 0 0 0 0 0 0 0 0 0 69 | 32.9216 7.4469 17.4801 H 0 0 0 0 0 0 0 0 0 0 0 0 70 | 33.4123 8.9543 16.6356 H 0 0 0 0 0 0 0 0 0 0 0 0 71 | 32.1109 10.1389 18.0037 H 0 0 0 0 0 0 0 0 0 0 0 0 72 | 30.6664 9.2359 17.4340 H 0 0 0 0 0 0 0 0 0 0 0 0 73 | 30.8506 7.8661 19.1412 H 0 0 0 0 0 0 0 0 0 0 0 0 74 | 32.2932 8.7721 19.7112 H 0 0 0 0 0 0 0 0 0 0 0 0 75 | 31.0518 10.3644 20.5990 H 0 0 0 0 0 0 0 0 0 0 0 0 76 | 30.0369 10.5272 19.1260 H 0 0 0 0 0 0 0 0 0 0 0 0 77 | 29.7826 8.2873 21.2183 H 0 0 0 0 0 0 0 0 0 0 0 0 78 | 28.7039 9.7188 21.0998 H 0 0 0 0 0 0 0 0 0 0 0 0 79 | 27.9381 10.2062 18.8783 H 0 0 0 0 0 0 0 0 0 0 0 0 80 | 27.4106 8.8280 17.8541 H 0 0 0 0 0 0 0 0 0 0 0 0 81 | 25.5821 9.8315 19.3948 H 0 0 0 0 0 0 0 0 0 0 0 0 82 | 26.5357 9.1744 20.7680 H 0 0 0 0 0 0 0 0 0 0 0 0 83 | 25.8485 8.0608 19.5373 H 0 0 0 0 0 0 0 0 0 0 0 0 84 | 31.2285 9.8861 15.2222 H 0 0 0 0 0 0 0 0 0 0 0 0 85 | 31.1704 8.7291 13.8494 H 0 0 0 0 0 0 0 0 0 0 0 0 86 | 32.7493 9.3701 14.4174 H 0 0 0 0 0 0 0 0 0 0 0 0 87 | 31.2901 1.7394 13.7929 H 0 0 0 0 0 0 0 0 0 0 0 0 88 | 1 2 2 0 0 0 0 89 | 1 4 1 0 0 0 0 90 | 1 38 1 0 0 0 0 91 | 2 3 1 0 0 0 0 92 | 2 39 1 0 0 0 0 93 | 3 6 2 0 0 0 0 94 | 3 33 1 0 0 0 0 95 | 4 5 2 0 0 0 0 96 | 4 40 1 0 0 0 0 97 | 5 6 1 0 0 0 0 98 | 5 7 1 0 0 0 0 99 | 6 41 1 0 0 0 0 100 | 7 8 2 0 0 0 0 101 | 7 9 1 0 0 0 0 102 | 9 10 1 0 0 0 0 103 | 9 42 1 0 0 0 0 104 | 10 11 1 0 0 0 0 105 | 10 25 1 0 0 0 0 106 | 10 43 1 0 0 0 0 107 | 11 12 1 0 0 0 0 108 | 11 24 1 0 0 0 0 109 | 11 44 1 0 0 0 0 110 | 12 13 1 0 0 0 0 111 | 12 45 1 0 0 0 0 112 | 12 46 1 0 0 0 0 113 | 13 14 1 0 0 0 0 114 | 13 47 1 0 0 0 0 115 | 13 83 1 0 0 0 0 116 | 14 15 1 0 0 0 0 117 | 14 48 1 0 0 0 0 118 | 14 49 1 0 0 0 0 119 | 15 16 2 0 0 0 0 120 | 15 23 1 0 0 0 0 121 | 16 17 1 0 0 0 0 122 | 16 50 1 0 0 0 0 123 | 17 18 1 0 0 0 0 124 | 17 21 2 0 0 0 0 125 | 18 19 1 0 0 0 0 126 | 18 20 1 0 0 0 0 127 | 18 51 1 0 0 0 0 128 | 19 52 1 0 0 0 0 129 | 19 53 1 0 0 0 0 130 | 19 54 1 0 0 0 0 131 | 20 55 1 0 0 0 0 132 | 20 56 1 0 0 0 0 133 | 20 57 1 0 0 0 0 134 | 21 22 1 0 0 0 0 135 | 21 58 1 0 0 0 0 136 | 22 23 2 0 0 0 0 137 | 22 59 1 0 0 0 0 138 | 23 60 1 0 0 0 0 139 | 24 61 1 0 0 0 0 140 | 25 26 1 0 0 0 0 141 | 25 62 1 0 0 0 0 142 | 25 63 1 0 0 0 0 143 | 26 27 1 0 0 0 0 144 | 26 37 1 0 0 0 0 145 | 26 64 1 0 0 0 0 146 | 27 28 1 0 0 0 0 147 | 27 65 1 0 0 0 0 148 | 27 66 1 0 0 0 0 149 | 28 29 1 0 0 0 0 150 | 28 67 1 0 0 0 0 151 | 28 68 1 0 0 0 0 152 | 29 30 1 0 0 0 0 153 | 29 69 1 0 0 0 0 154 | 29 70 1 0 0 0 0 155 | 30 31 1 0 0 0 0 156 | 30 71 1 0 0 0 0 157 | 30 72 1 0 0 0 0 158 | 31 32 1 0 0 0 0 159 | 31 73 1 0 0 0 0 160 | 31 74 1 0 0 0 0 161 | 32 33 1 0 0 0 0 162 | 32 35 1 0 0 0 0 163 | 33 34 2 0 0 0 0 164 | 35 36 1 0 0 0 0 165 | 35 75 1 0 0 0 0 166 | 35 76 1 0 0 0 0 167 | 36 77 1 0 0 0 0 168 | 36 78 1 0 0 0 0 169 | 36 79 1 0 0 0 0 170 | 37 80 1 0 0 0 0 171 | 37 81 1 0 0 0 0 172 | 37 82 1 0 0 0 0 173 | M CHG 1 13 1 174 | M END 175 | $$$$ 176 | -------------------------------------------------------------------------------- /dockprep/example_files/3K5C-BACE_7.mol: -------------------------------------------------------------------------------- 1 | REMARK score -107.15 2 | LCcorina 10041815563D 1 1.00000 0.00000 0 3 | CORINA 4.00 0026 26.04.2017 4 | 99103 0 0 0 0 999 V2000 5 | 26.0120 5.2247 17.1304 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 26.4129 5.9437 18.2587 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 27.6961 6.5208 18.2724 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 26.8310 5.1715 15.9875 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 28.0498 5.8372 15.9965 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 28.4355 6.5464 17.1389 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 28.9508 5.7579 14.8061 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 28.5574 5.9069 13.6550 O 0 0 0 0 0 0 0 0 0 0 0 0 13 | 30.2184 5.5301 15.1407 N 0 0 0 0 0 0 0 0 0 0 0 0 14 | 31.4096 5.6841 14.2991 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | 31.9172 4.2659 14.0350 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 30.9735 3.5150 13.1028 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 31.3860 2.1434 12.8687 N 0 3 0 0 0 0 0 0 0 0 0 0 18 | 30.6596 1.3719 11.8689 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 30.5068 2.1138 10.5559 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | 31.6263 2.5934 9.9164 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 31.5403 3.2802 8.7221 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | 32.8087 3.7992 8.1385 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 33.4712 4.7776 9.0678 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 33.7429 2.6686 7.7549 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 30.2843 3.5066 8.1551 C 0 0 0 0 0 0 0 0 0 0 0 0 26 | 29.1509 3.0195 8.8081 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 29.2282 2.3068 9.9966 C 0 0 0 0 0 0 0 0 0 0 0 0 28 | 33.2208 4.3157 13.4072 O 0 0 0 0 0 0 0 0 0 0 0 0 29 | 32.5026 6.5602 14.9902 C 0 0 0 0 0 0 0 0 0 0 0 0 30 | 31.9326 7.7469 15.6303 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | 32.6375 8.2156 16.8151 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | 31.7796 9.0978 17.7305 C 0 0 0 0 0 0 0 0 0 0 0 0 33 | 31.4661 8.8513 18.9890 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | 30.5944 9.8881 19.6831 C 0 0 0 0 0 0 0 0 0 0 0 0 35 | 29.4393 9.1701 20.3750 C 0 0 0 0 0 0 0 0 0 0 0 0 36 | 28.6442 8.4414 19.3750 N 0 0 0 0 0 0 0 0 0 0 0 0 37 | 28.4523 7.0902 19.4246 C 0 0 0 0 0 0 0 0 0 0 0 0 38 | 28.9044 6.3541 20.2996 O 0 0 0 0 0 0 0 0 0 0 0 0 39 | 27.6632 9.3181 18.7237 C 0 0 0 0 0 0 0 0 0 0 0 0 40 | 28.2853 10.2769 17.6950 C 0 0 0 0 0 0 0 0 0 0 0 0 41 | 31.8501 8.8892 14.6012 C 0 0 0 0 0 0 0 0 0 0 0 0 42 | 24.7301 4.5174 17.1301 C 0 0 0 0 0 0 0 0 0 0 0 0 43 | 23.9180 4.4312 16.0725 N 0 0 0 0 0 0 0 0 0 0 0 0 44 | 22.7447 3.6459 16.4769 C 0 0 0 0 0 0 0 0 0 0 0 0 45 | 22.9121 3.3004 17.7579 C 0 0 0 0 0 0 0 0 0 0 0 0 46 | 24.1981 3.8101 18.3419 O 0 0 0 0 0 0 0 0 0 0 0 0 47 | 25.6256 11.5147 21.8412 C 0 0 0 0 0 0 0 0 0 0 0 0 48 | 25.7133 10.1144 21.9110 C 0 0 0 0 0 0 0 0 0 0 0 0 49 | 26.3816 9.4017 20.9027 C 0 0 0 0 0 0 0 0 0 0 0 0 50 | 26.9631 10.0885 19.8247 C 0 0 0 0 0 0 0 0 0 0 0 0 51 | 26.8755 11.4892 19.7559 C 0 0 0 0 0 0 0 0 0 0 0 0 52 | 26.2072 12.2019 20.7641 C 0 0 0 0 0 0 0 0 0 0 0 0 53 | 25.7423 6.0481 19.0989 H 0 0 0 0 0 0 0 0 0 0 0 0 54 | 26.5092 4.6177 15.1178 H 0 0 0 0 0 0 0 0 0 0 0 0 55 | 29.3474 7.1241 17.1019 H 0 0 0 0 0 0 0 0 0 0 0 0 56 | 30.3206 5.1890 16.1815 H 0 0 0 0 0 0 0 0 0 0 0 0 57 | 31.1102 6.1931 13.3711 H 0 0 0 0 0 0 0 0 0 0 0 0 58 | 32.0264 3.7360 14.9928 H 0 0 0 0 0 0 0 0 0 0 0 0 59 | 29.9654 3.4918 13.5423 H 0 0 0 0 0 0 0 0 0 0 0 0 60 | 30.9372 4.0252 12.1289 H 0 0 0 0 0 0 0 0 0 0 0 0 61 | 32.4343 2.1275 12.5358 H 0 0 0 0 0 0 0 0 0 0 0 0 62 | 31.1988 0.4349 11.6658 H 0 0 0 0 0 0 0 0 0 0 0 0 63 | 29.6520 1.1410 12.2448 H 0 0 0 0 0 0 0 0 0 0 0 0 64 | 32.6037 2.4360 10.3479 H 0 0 0 0 0 0 0 0 0 0 0 0 65 | 32.5420 4.3444 7.2212 H 0 0 0 0 0 0 0 0 0 0 0 0 66 | 34.2745 5.3053 8.5327 H 0 0 0 0 0 0 0 0 0 0 0 0 67 | 33.8960 4.2375 9.9268 H 0 0 0 0 0 0 0 0 0 0 0 0 68 | 32.7278 5.5061 9.4239 H 0 0 0 0 0 0 0 0 0 0 0 0 69 | 34.6386 3.0821 7.2683 H 0 0 0 0 0 0 0 0 0 0 0 0 70 | 33.2291 1.9883 7.0598 H 0 0 0 0 0 0 0 0 0 0 0 0 71 | 34.0389 2.1149 8.6581 H 0 0 0 0 0 0 0 0 0 0 0 0 72 | 30.2014 4.0509 7.2260 H 0 0 0 0 0 0 0 0 0 0 0 0 73 | 28.1704 3.1929 8.3899 H 0 0 0 0 0 0 0 0 0 0 0 0 74 | 28.3561 1.9062 10.4919 H 0 0 0 0 0 0 0 0 0 0 0 0 75 | 34.0860 4.7016 13.9661 H 0 0 0 0 0 0 0 0 0 0 0 0 76 | 33.0143 5.9667 15.7621 H 0 0 0 0 0 0 0 0 0 0 0 0 77 | 33.2342 6.8937 14.2393 H 0 0 0 0 0 0 0 0 0 0 0 0 78 | 30.9647 7.4024 16.0234 H 0 0 0 0 0 0 0 0 0 0 0 0 79 | 32.9707 7.3519 17.4092 H 0 0 0 0 0 0 0 0 0 0 0 0 80 | 33.5115 8.8102 16.5106 H 0 0 0 0 0 0 0 0 0 0 0 0 81 | 32.2501 10.0868 17.8326 H 0 0 0 0 0 0 0 0 0 0 0 0 82 | 30.7764 9.2124 17.2942 H 0 0 0 0 0 0 0 0 0 0 0 0 83 | 30.9134 7.9023 19.0501 H 0 0 0 0 0 0 0 0 0 0 0 0 84 | 32.3855 8.7794 19.5886 H 0 0 0 0 0 0 0 0 0 0 0 0 85 | 31.1947 10.4335 20.4261 H 0 0 0 0 0 0 0 0 0 0 0 0 86 | 30.2023 10.5957 18.9377 H 0 0 0 0 0 0 0 0 0 0 0 0 87 | 29.8390 8.4596 21.1136 H 0 0 0 0 0 0 0 0 0 0 0 0 88 | 28.8011 9.9076 20.8836 H 0 0 0 0 0 0 0 0 0 0 0 0 89 | 26.9721 8.6658 18.1698 H 0 0 0 0 0 0 0 0 0 0 0 0 90 | 27.4928 10.8850 17.2344 H 0 0 0 0 0 0 0 0 0 0 0 0 91 | 28.8003 9.6952 16.9163 H 0 0 0 0 0 0 0 0 0 0 0 0 92 | 29.0077 10.9362 18.1983 H 0 0 0 0 0 0 0 0 0 0 0 0 93 | 31.3941 9.7734 15.0706 H 0 0 0 0 0 0 0 0 0 0 0 0 94 | 31.2352 8.5694 13.7469 H 0 0 0 0 0 0 0 0 0 0 0 0 95 | 32.8619 9.1408 14.2507 H 0 0 0 0 0 0 0 0 0 0 0 0 96 | 21.9539 3.4522 15.7674 H 0 0 0 0 0 0 0 0 0 0 0 0 97 | 22.1588 2.7129 18.2616 H 0 0 0 0 0 0 0 0 0 0 0 0 98 | 25.1107 12.0628 22.6164 H 0 0 0 0 0 0 0 0 0 0 0 0 99 | 25.2666 9.5860 22.7403 H 0 0 0 0 0 0 0 0 0 0 0 0 100 | 26.4484 8.3251 20.9561 H 0 0 0 0 0 0 0 0 0 0 0 0 101 | 27.3224 12.0181 18.9271 H 0 0 0 0 0 0 0 0 0 0 0 0 102 | 26.1405 13.2785 20.7111 H 0 0 0 0 0 0 0 0 0 0 0 0 103 | 31.2885 1.5671 13.8005 H 0 0 0 0 0 0 0 0 0 0 0 0 104 | 1 2 2 0 0 0 0 105 | 1 4 1 0 0 0 0 106 | 1 38 1 0 0 0 0 107 | 2 3 1 0 0 0 0 108 | 2 49 1 0 0 0 0 109 | 3 6 2 0 0 0 0 110 | 3 33 1 0 0 0 0 111 | 4 5 2 0 0 0 0 112 | 4 50 1 0 0 0 0 113 | 5 6 1 0 0 0 0 114 | 5 7 1 0 0 0 0 115 | 6 51 1 0 0 0 0 116 | 7 8 2 0 0 0 0 117 | 7 9 1 0 0 0 0 118 | 9 10 1 0 0 0 0 119 | 9 52 1 0 0 0 0 120 | 10 11 1 0 0 0 0 121 | 10 25 1 0 0 0 0 122 | 10 53 1 0 0 0 0 123 | 11 12 1 0 0 0 0 124 | 11 24 1 0 0 0 0 125 | 11 54 1 0 0 0 0 126 | 12 13 1 0 0 0 0 127 | 12 55 1 0 0 0 0 128 | 12 56 1 0 0 0 0 129 | 13 14 1 0 0 0 0 130 | 13 57 1 0 0 0 0 131 | 13 99 1 0 0 0 0 132 | 14 15 1 0 0 0 0 133 | 14 58 1 0 0 0 0 134 | 14 59 1 0 0 0 0 135 | 15 16 2 0 0 0 0 136 | 15 23 1 0 0 0 0 137 | 16 17 1 0 0 0 0 138 | 16 60 1 0 0 0 0 139 | 17 18 1 0 0 0 0 140 | 17 21 2 0 0 0 0 141 | 18 19 1 0 0 0 0 142 | 18 20 1 0 0 0 0 143 | 18 61 1 0 0 0 0 144 | 19 62 1 0 0 0 0 145 | 19 63 1 0 0 0 0 146 | 19 64 1 0 0 0 0 147 | 20 65 1 0 0 0 0 148 | 20 66 1 0 0 0 0 149 | 20 67 1 0 0 0 0 150 | 21 22 1 0 0 0 0 151 | 21 68 1 0 0 0 0 152 | 22 23 2 0 0 0 0 153 | 22 69 1 0 0 0 0 154 | 23 70 1 0 0 0 0 155 | 24 71 1 0 0 0 0 156 | 25 26 1 0 0 0 0 157 | 25 72 1 0 0 0 0 158 | 25 73 1 0 0 0 0 159 | 26 27 1 0 0 0 0 160 | 26 37 1 0 0 0 0 161 | 26 74 1 0 0 0 0 162 | 27 28 1 0 0 0 0 163 | 27 75 1 0 0 0 0 164 | 27 76 1 0 0 0 0 165 | 28 29 1 0 0 0 0 166 | 28 77 1 0 0 0 0 167 | 28 78 1 0 0 0 0 168 | 29 30 1 0 0 0 0 169 | 29 79 1 0 0 0 0 170 | 29 80 1 0 0 0 0 171 | 30 31 1 0 0 0 0 172 | 30 81 1 0 0 0 0 173 | 30 82 1 0 0 0 0 174 | 31 32 1 0 0 0 0 175 | 31 83 1 0 0 0 0 176 | 31 84 1 0 0 0 0 177 | 32 33 1 0 0 0 0 178 | 32 35 1 0 0 0 0 179 | 33 34 2 0 0 0 0 180 | 35 36 1 0 0 0 0 181 | 35 46 1 0 0 0 0 182 | 35 85 1 0 0 0 0 183 | 36 86 1 0 0 0 0 184 | 36 87 1 0 0 0 0 185 | 36 88 1 0 0 0 0 186 | 37 89 1 0 0 0 0 187 | 37 90 1 0 0 0 0 188 | 37 91 1 0 0 0 0 189 | 38 39 2 0 0 0 0 190 | 38 42 1 0 0 0 0 191 | 39 40 1 0 0 0 0 192 | 40 41 2 0 0 0 0 193 | 40 92 1 0 0 0 0 194 | 41 42 1 0 0 0 0 195 | 41 93 1 0 0 0 0 196 | 43 44 2 0 0 0 0 197 | 43 48 1 0 0 0 0 198 | 43 94 1 0 0 0 0 199 | 44 45 1 0 0 0 0 200 | 44 95 1 0 0 0 0 201 | 45 46 2 0 0 0 0 202 | 45 96 1 0 0 0 0 203 | 46 47 1 0 0 0 0 204 | 47 48 2 0 0 0 0 205 | 47 97 1 0 0 0 0 206 | 48 98 1 0 0 0 0 207 | M CHG 1 13 1 208 | M END 209 | $$$$ 210 | -------------------------------------------------------------------------------- /dockprep/example_files/3K5C-BACE_8.mol: -------------------------------------------------------------------------------- 1 | REMARK score -94.61 2 | LCcorina 10041815573D 1 1.00000 0.00000 0 3 | CORINA 4.00 0026 26.04.2017 4 | 78 80 0 0 0 0 999 V2000 5 | 30.0817 5.8744 15.5689 N 0 0 0 0 0 0 0 0 0 0 0 0 6 | 31.0768 5.9668 14.4764 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 32.1282 7.0611 14.7985 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 31.5804 8.4575 15.1801 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 31.4819 8.6552 16.7044 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 30.8977 10.0119 17.1396 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 30.3663 9.9857 18.4589 O 0 0 0 0 0 0 0 0 0 0 0 0 12 | 31.2336 9.5764 19.5124 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 30.4137 9.5114 20.8109 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 29.1215 8.6674 20.7001 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | 29.3758 7.1579 20.5765 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 28.1065 6.4709 20.3694 N 0 0 0 0 0 0 0 0 0 0 0 0 17 | 27.6384 6.1507 19.1255 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 28.4391 6.1913 17.9486 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 27.8916 5.8384 16.6906 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | 26.5329 5.4414 16.5987 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 25.7215 5.3957 17.7612 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | 26.2842 5.7500 19.0174 N 0 0 0 0 0 0 0 0 0 0 0 0 23 | 24.0377 4.9021 17.6423 Cl 0 0 0 0 0 0 0 0 0 0 0 0 24 | 28.7397 5.8986 15.4288 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 28.2121 5.9479 14.3204 O 0 0 0 0 0 0 0 0 0 0 0 0 26 | 31.7640 4.5859 14.2251 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 33.0118 4.7540 13.5394 O 0 0 0 0 0 0 0 0 0 0 0 0 28 | 30.8867 3.7097 13.2974 C 0 0 0 0 0 0 0 0 0 0 0 0 29 | 31.4764 2.3700 13.0934 N 0 3 0 0 0 0 0 0 0 0 0 0 30 | 30.8680 1.6731 11.9487 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | 30.7216 2.4319 10.6224 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | 31.8759 2.9487 9.9924 C 0 0 0 0 0 0 0 0 0 0 0 0 33 | 31.7791 3.6572 8.7646 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | 30.4985 3.8370 8.1839 C 0 0 0 0 0 0 0 0 0 0 0 0 35 | 29.3368 3.3259 8.8080 C 0 0 0 0 0 0 0 0 0 0 0 0 36 | 29.4486 2.6228 10.0282 C 0 0 0 0 0 0 0 0 0 0 0 0 37 | 33.0598 4.2031 8.1051 C 0 0 0 0 0 0 0 0 0 0 0 0 38 | 33.6116 5.4207 8.8972 C 0 0 0 0 0 0 0 0 0 0 0 0 39 | 34.1645 3.1498 7.8111 C 0 0 0 0 0 0 0 0 0 0 0 0 40 | 32.5051 9.5454 14.6039 C 0 0 0 0 0 0 0 0 0 0 0 0 41 | 30.4174 5.7753 16.6117 H 0 0 0 0 0 0 0 0 0 0 0 0 42 | 30.5054 6.2581 13.5827 H 0 0 0 0 0 0 0 0 0 0 0 0 43 | 32.7444 6.7380 15.6505 H 0 0 0 0 0 0 0 0 0 0 0 0 44 | 32.7714 7.2213 13.9207 H 0 0 0 0 0 0 0 0 0 0 0 0 45 | 30.5662 8.5392 14.7619 H 0 0 0 0 0 0 0 0 0 0 0 0 46 | 30.8338 7.8770 17.1339 H 0 0 0 0 0 0 0 0 0 0 0 0 47 | 32.4854 8.5831 17.1491 H 0 0 0 0 0 0 0 0 0 0 0 0 48 | 31.6873 10.7772 17.1106 H 0 0 0 0 0 0 0 0 0 0 0 0 49 | 30.0849 10.2975 16.4557 H 0 0 0 0 0 0 0 0 0 0 0 0 50 | 31.6518 8.5861 19.2792 H 0 0 0 0 0 0 0 0 0 0 0 0 51 | 32.0516 10.3043 19.6176 H 0 0 0 0 0 0 0 0 0 0 0 0 52 | 31.0250 9.0643 21.6086 H 0 0 0 0 0 0 0 0 0 0 0 0 53 | 30.1125 10.5274 21.1057 H 0 0 0 0 0 0 0 0 0 0 0 0 54 | 28.5044 8.8221 21.5975 H 0 0 0 0 0 0 0 0 0 0 0 0 55 | 28.5564 8.9774 19.8087 H 0 0 0 0 0 0 0 0 0 0 0 0 56 | 30.0416 6.9671 19.7219 H 0 0 0 0 0 0 0 0 0 0 0 0 57 | 29.8481 6.7889 21.4988 H 0 0 0 0 0 0 0 0 0 0 0 0 58 | 27.4857 6.1903 21.2331 H 0 0 0 0 0 0 0 0 0 0 0 0 59 | 29.4742 6.4932 18.0099 H 0 0 0 0 0 0 0 0 0 0 0 0 60 | 26.1125 5.1725 15.6409 H 0 0 0 0 0 0 0 0 0 0 0 0 61 | 31.9885 4.1091 15.1905 H 0 0 0 0 0 0 0 0 0 0 0 0 62 | 33.9118 5.0479 14.0994 H 0 0 0 0 0 0 0 0 0 0 0 0 63 | 29.8900 3.5847 13.7459 H 0 0 0 0 0 0 0 0 0 0 0 0 64 | 30.7888 4.1979 12.3166 H 0 0 0 0 0 0 0 0 0 0 0 0 65 | 32.5553 2.4694 12.9033 H 0 0 0 0 0 0 0 0 0 0 0 0 66 | 31.4674 0.7851 11.6994 H 0 0 0 0 0 0 0 0 0 0 0 0 67 | 29.8451 1.3636 12.2094 H 0 0 0 0 0 0 0 0 0 0 0 0 68 | 32.8467 2.8072 10.4440 H 0 0 0 0 0 0 0 0 0 0 0 0 69 | 30.3990 4.3712 7.2506 H 0 0 0 0 0 0 0 0 0 0 0 0 70 | 28.3692 3.4738 8.3518 H 0 0 0 0 0 0 0 0 0 0 0 0 71 | 28.5642 2.2306 10.5085 H 0 0 0 0 0 0 0 0 0 0 0 0 72 | 32.7357 4.5323 7.1068 H 0 0 0 0 0 0 0 0 0 0 0 0 73 | 34.4989 5.8200 8.3843 H 0 0 0 0 0 0 0 0 0 0 0 0 74 | 33.8875 5.1025 9.9134 H 0 0 0 0 0 0 0 0 0 0 0 0 75 | 32.8388 6.2014 8.9546 H 0 0 0 0 0 0 0 0 0 0 0 0 76 | 34.6070 2.8083 8.7584 H 0 0 0 0 0 0 0 0 0 0 0 0 77 | 34.9460 3.6035 7.1838 H 0 0 0 0 0 0 0 0 0 0 0 0 78 | 33.7222 2.2923 7.2828 H 0 0 0 0 0 0 0 0 0 0 0 0 79 | 32.0900 10.5381 14.8324 H 0 0 0 0 0 0 0 0 0 0 0 0 80 | 32.5815 9.4219 13.5135 H 0 0 0 0 0 0 0 0 0 0 0 0 81 | 33.5045 9.4533 15.0543 H 0 0 0 0 0 0 0 0 0 0 0 0 82 | 31.3176 1.7585 13.9939 H 0 0 0 0 0 0 0 0 0 0 0 0 83 | 1 2 1 0 0 0 0 84 | 1 20 1 0 0 0 0 85 | 1 37 1 0 0 0 0 86 | 2 3 1 0 0 0 0 87 | 2 22 1 0 0 0 0 88 | 2 38 1 0 0 0 0 89 | 3 4 1 0 0 0 0 90 | 3 39 1 0 0 0 0 91 | 3 40 1 0 0 0 0 92 | 4 5 1 0 0 0 0 93 | 4 36 1 0 0 0 0 94 | 4 41 1 0 0 0 0 95 | 5 6 1 0 0 0 0 96 | 5 42 1 0 0 0 0 97 | 5 43 1 0 0 0 0 98 | 6 7 1 0 0 0 0 99 | 6 44 1 0 0 0 0 100 | 6 45 1 0 0 0 0 101 | 7 8 1 0 0 0 0 102 | 8 9 1 0 0 0 0 103 | 8 46 1 0 0 0 0 104 | 8 47 1 0 0 0 0 105 | 9 10 1 0 0 0 0 106 | 9 48 1 0 0 0 0 107 | 9 49 1 0 0 0 0 108 | 10 11 1 0 0 0 0 109 | 10 50 1 0 0 0 0 110 | 10 51 1 0 0 0 0 111 | 11 12 1 0 0 0 0 112 | 11 52 1 0 0 0 0 113 | 11 53 1 0 0 0 0 114 | 12 13 1 0 0 0 0 115 | 12 54 1 0 0 0 0 116 | 13 14 2 0 0 0 0 117 | 13 18 1 0 0 0 0 118 | 14 15 1 0 0 0 0 119 | 14 55 1 0 0 0 0 120 | 15 16 2 0 0 0 0 121 | 15 20 1 0 0 0 0 122 | 16 17 1 0 0 0 0 123 | 16 56 1 0 0 0 0 124 | 17 18 2 0 0 0 0 125 | 17 19 1 0 0 0 0 126 | 20 21 2 0 0 0 0 127 | 22 23 1 0 0 0 0 128 | 22 24 1 0 0 0 0 129 | 22 57 1 0 0 0 0 130 | 23 58 1 0 0 0 0 131 | 24 25 1 0 0 0 0 132 | 24 59 1 0 0 0 0 133 | 24 60 1 0 0 0 0 134 | 25 26 1 0 0 0 0 135 | 25 61 1 0 0 0 0 136 | 25 78 1 0 0 0 0 137 | 26 27 1 0 0 0 0 138 | 26 62 1 0 0 0 0 139 | 26 63 1 0 0 0 0 140 | 27 28 2 0 0 0 0 141 | 27 32 1 0 0 0 0 142 | 28 29 1 0 0 0 0 143 | 28 64 1 0 0 0 0 144 | 29 30 2 0 0 0 0 145 | 29 33 1 0 0 0 0 146 | 30 31 1 0 0 0 0 147 | 30 65 1 0 0 0 0 148 | 31 32 2 0 0 0 0 149 | 31 66 1 0 0 0 0 150 | 32 67 1 0 0 0 0 151 | 33 34 1 0 0 0 0 152 | 33 35 1 0 0 0 0 153 | 33 68 1 0 0 0 0 154 | 34 69 1 0 0 0 0 155 | 34 70 1 0 0 0 0 156 | 34 71 1 0 0 0 0 157 | 35 72 1 0 0 0 0 158 | 35 73 1 0 0 0 0 159 | 35 74 1 0 0 0 0 160 | 36 75 1 0 0 0 0 161 | 36 76 1 0 0 0 0 162 | 36 77 1 0 0 0 0 163 | M CHG 1 25 1 164 | M END 165 | $$$$ 166 | -------------------------------------------------------------------------------- /mod_frcmod/mod_frcmod.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os, shutil 3 | import traceback, sys, re 4 | from subprocess import call 5 | 6 | import pandas as pd 7 | from argparse import ArgumentParser, RawDescriptionHelpFormatter 8 | from collections import defaultdict 9 | from collections import OrderedDict 10 | import parmed as pmd 11 | from collections import defaultdict 12 | import numpy as np 13 | 14 | class tree(OrderedDict): 15 | def __missing__(self, key): 16 | self[key] = type(self)() 17 | return self[key] 18 | 19 | ## Parse command line arguments 20 | def cmdlineparse(): 21 | parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, description=""" 22 | DESCRIPTION: 23 | This script reads an optimized ligand structure, measures the bond bonds, dihedrals and bond lengths and writes them to 24 | a provided frcmod file by replacing the existing ones. 25 | 26 | Some examples input files: 27 | * correct ligand frcmod file: 28 | /home2/thomas/Documents/Consensus_Scoring_Project/D3R_2018/BACE/MD_FEset/BACE_from_3dv5_apo/BACE68_frcmod.ligand 29 | * wrong ligand frcmod file: 30 | /home2/thomas/Documents/Consensus_Scoring_Project/D3R_2018/BACE/MD_FEset/BACE_from_3dv5_apo/BACE68_wrong/frcmod.ligand 31 | * optimized ligand geometry file: 32 | /home2/thomas/Documents/Consensus_Scoring_Project/D3R_2018/BACE/MD_FEset/BACE_from_3dv5_apo/ligands/bcc/bace68.bcc.mol2 33 | 34 | 35 | 36 | """, 37 | epilog=""" 38 | EXAMPLE: 39 | 40 | mod_frcmod.py -ligfile bace68.bcc.mol2 -frcmod BACE68_frcmod.ligand -ofrcmod BACE68_frcmod.ligand_corrected 41 | 42 | """) 43 | parser.add_argument("-ligfile", dest="LIGFILE", required=False, default=None, type=str, 44 | help="sdf or mol2 file with optimized ligand structure from which to measure the equilibrium " 45 | "bond angles, dihedrals and bond lengths.") 46 | parser.add_argument("-frcmod", dest="FRCMOD", required=True, default=None, 47 | help="the frcmod parameter file of the ligand.") 48 | parser.add_argument("-ofrcmod", dest="OUT_FRCMOD", required=False, default=None, 49 | help="the name of the modified frcmod parameter file of the ligand, namely the output.") 50 | parser.add_argument("-ff", dest="FF", required=False, default="gaff2", 51 | help="the ligand force field.") 52 | parser.add_argument("-verbose", dest="VERBOSE", required=False, default=False, action='store_true', 53 | help="Print more details.") 54 | 55 | args = parser.parse_args() 56 | return args 57 | 58 | 59 | #################################################### FUNCTION DEFINITIONS ################################################ 60 | 61 | 62 | # THE FOLLOWING CODE IS USELESS SINCE PARMED CAN READ AND WRITE FRCMOD FILES 63 | # ##~~~~~~~~~~~~~~~~~~~~`` DataFrames to store the force field parameters ``~~~~~~~~~~~~~~~~~~## 64 | # mass_cols = ["KNDSYM", "AMASS", "ATPOL", "comment"] 65 | # # NOTE: by defining the dtype you will be able to retrieve the value of a column by simply doing row[colname] 66 | # mass_df = pd.DataFrame([], columns=mass_cols) 67 | # mass_format = "%2s %-6.3f%13.3f\t%s\n" # (A2,2X,F10.2x,f10.2) 68 | # mass_pattern = "^([ 0-9a-z]{2})\s+([0-9.-]+)\s+([0-9.-]+)[\s$]+(.*)" # 4 groups 69 | # 70 | # bond_cols = ["IBT", "JBT", "RK", "REQ", "comment"] 71 | # bond_df = pd.DataFrame([], columns=bond_cols) 72 | # bond_format = "%2s-%2s%8.2f%8.3f\t%s\n" # A2,1X,A2,2F10.2 73 | # bond_pattern = "^([ 0-9a-z]{2})-([ 0-9a-z]{2})\s+([0-9.-]+)\s+([0-9.-]+)[\s$]+(.*)" # 5 groups 74 | # 75 | # angl_cols = ["ITT" , "JTT" , "KTT" , "TK" , "TEQ", "comment"] 76 | # angl_df = pd.DataFrame([], columns=angl_cols) 77 | # angl_format = "%2s-%2s-%2s%9.3f%12.3f\t%s\n" # A2,1X,A2,1X,A2,2F10.2 78 | # angl_pattern = "^([ 0-9a-z]{2})-([ 0-9a-z]{2})-([ 0-9a-z]{2})\s+([0-9.-]+)\s+([0-9.-]+)[\s$]+(.*)" # 6 groups 79 | # 80 | # dihe_cols = ["IPT" , "JPT" , "KPT" , "LPT" , "IDIVF" , "PK" , "PHASE" , "PN", "comment"] 81 | # dihe_df = pd.DataFrame([], columns=dihe_cols) 82 | # dihe_format = "%2s-%2s-%2s-%2s%4i%9.3f%14.3f%16.3f\t%s\n" # A2,1X,A2,1X,A2,1X,A2,I4,3F15.2 83 | # dihe_pattern = "^([ 0-9a-z]{2})-([ 0-9a-z]{2})-([ 0-9a-z]{2})-([ 0-9a-z]{2})\s+([0-9]+)\s+([0-9.-]+)\s+([0-9.-]+)\s+([0-9.-]+)[\s$]+(.*)" # 9 groups 84 | # 85 | # impr_cols = ["IPT" , "JPT" , "KPT" , "LPT" , "PK" , "PHASE" , "PN", "comment"] 86 | # impr_df = pd.DataFrame([], columns=impr_cols) 87 | # impr_format = "%2s-%2s-%2s-%2s%12.1f%15.1f%12.1f\t%s\n" # A2,1X,A2,1X,A2,1X,A2,I4,3F15.2 88 | # impr_pattern = "^([ 0-9a-z]{2})-([ 0-9a-z]{2})-([ 0-9a-z]{2})-([ 0-9a-z]{2})\s+([0-9.-]+)\s+([0-9.-]+)\s+([0-9.-]+)[\s$]+(.*)" # 8 groups 89 | # 90 | # # H-BOND 10-12 POTENTIAL PARAMETERS 91 | # hbon_cols = ["KT1" , "KT2" , "A" , "B", "comment"] 92 | # hbon_df = pd.DataFrame([], columns=hbon_cols) 93 | # hbon_df = hbon_df.astype({"KT1": 'str', "KT2": 'str', "A": 'float', "B": 'float', "comment": 'str'}) 94 | # hbon_format = "" # 2X,A2,2X,A2,2x,5F10.2,I2 95 | # 96 | # # ONLY IF KINDNB .EQ. 'RE' ??? 97 | # nonb_cols = ["LTYNB" , "R" , "EDEP", "comment"] 98 | # nonb_df = pd.DataFrame([], columns=nonb_cols) 99 | # nonb_format = "%4s%16.4f%8.4f\t%s\n" # A2,1X,A2,1X,A2,1X,A2,I4,3F15.2 100 | # nonb_pattern = "^\s*([ 0-9a-z]{2})\s+([0-9.-]+)\s+([0-9.-]+)[\s$]+(.*)" # 4 groups 101 | # 102 | # # Put all dataframes together into a dict 103 | # forcefield = {"MASS": mass_df, 104 | # "BOND": bond_df, 105 | # "ANGLE": angl_df, 106 | # "DIHE": dihe_df, 107 | # "IMPROPER": impr_df, 108 | # "NONBON": nonb_df} 109 | # fields = ["MASS", "BOND", "ANGLE", "DIHE", "IMPROPER", "NONBON"] 110 | # columns = [mass_cols, bond_cols, angl_cols, dihe_cols, impr_cols, nonb_cols] 111 | # patterns = [mass_pattern, bond_pattern, angl_pattern, dihe_pattern, impr_pattern, nonb_pattern] 112 | # formats = [mass_format, bond_format, angl_format, dihe_format, impr_format, nonb_format] 113 | # 114 | # def update_forcefield_dtypes(): 115 | # 116 | # global forcefield 117 | # 118 | # forcefield["MASS"] = forcefield["MASS"].astype({'KNDSYM': 'str', 'AMASS': 'float', 'ATPOL': 'float', "comment": 'str'}) 119 | # forcefield["BOND"] = forcefield["BOND"].astype({"IBT": 'str', "JBT": 'str', "RK": 'float', "REQ": 'float', "comment": 'str'}) 120 | # forcefield["ANGLE"] = forcefield["ANGLE"].astype({"ITT": 'str', "JTT": 'str', "KTT": 'str', "TK": 'float', "TEQ": 'float', "comment": 'str'}) 121 | # forcefield["DIHE"] = forcefield["DIHE"].astype({"IPT": str, "JPT": 'str', "KPT": 'str', "LPT": 'str', "IDIVF": 'int', "PK": 'float', 122 | # "PHASE": 'float', "PN": 'float', "comment": 'str'}) 123 | # forcefield["IMPROPER"] = forcefield["IMPROPER"].astype({"IPT": 'str', "JPT": 'str', "KPT": 'str', "LPT": 'str', 124 | # "PK": 'float', "PHASE": 'float', "PN": 'float', "comment": 'str'}) 125 | # forcefield["NONBON"] = forcefield["NONBON"].astype({"LTYNB": 'str', "R": 'float', "EDEP": 'float', "comment": 'str'}) 126 | # 127 | # def load_frcmod(fname): 128 | # """ 129 | # For the format of frcmod file look at: 130 | # http://ambermd.org/formats.html#frcmod 131 | # 132 | # :param fname: 133 | # :return: 134 | # """ 135 | # global forcefield 136 | # with open(fname, 'r') as f: 137 | # contents = f.readlines() 138 | # 139 | # starts = [contents.index(f+"\n") for f in fields] 140 | # ends = [s-1 for s in starts[1:]] 141 | # ends.append(len(contents)-1) 142 | # for i in range(len(fields)): 143 | # field, cols, start, end, pattern = fields[i], columns[i], starts[i], ends[i], patterns[i] 144 | # for line in contents[start+1:end+1]: 145 | # m = re.search(pattern, line) 146 | # if not m: 147 | # continue 148 | # row_dict = {c:w for w,c in zip(m.groups(), cols)} 149 | # forcefield[field] = forcefield[field].append(row_dict, ignore_index=True) # save this line to the dataframe 150 | # update_forcefield_dtypes() 151 | # 152 | # def write_frcmod(outfname): 153 | # 154 | # global forcefield, args 155 | # 156 | # out = open(outfname, 'w') 157 | # for i in range(len(fields)): 158 | # field, format = fields[i], formats[i] 159 | # out.write(field + "\n") 160 | # for i, row in forcefield[field].iterrows(): 161 | # out.write(format % tuple(row.values)) 162 | # out.write("\n") 163 | 164 | 165 | def run_commandline(commandline, logname="log", append=False, return_out=False, error_keywords=[], skip_fail=False, 166 | verbose=True): 167 | """ 168 | FUNCTION to run a single command on the UNIX shell. The worker will only receive an index from network. 169 | """ 170 | if append: 171 | fout = open(logname, 'a') 172 | else: 173 | fout = open(logname, 'w') 174 | if verbose: 175 | print("Running commandline:", commandline) 176 | return_code = call(commandline, stdout=fout, stderr=fout, shell=True, executable='/bin/bash') 177 | 178 | if (return_code != 0): 179 | print("ERROR, THE FOLLOWING COMMAND FAILED TO RUN:", "FAIL") 180 | print(commandline) 181 | print("return_code=", return_code) 182 | fout.close() 183 | print("Output:") 184 | with open(logname, 'r') as f: 185 | contents = f.readlines() 186 | for line in contents: 187 | print(line) 188 | if not skip_fail: 189 | raise Exception() 190 | fout.close() 191 | 192 | if len(error_keywords) > 0: 193 | with open(logname, 'r') as f: 194 | contents = f.readlines() 195 | for line in contents: 196 | for word in error_keywords: 197 | if word in line: 198 | print("ERROR, THE FOLLOWING COMMAND FAILED TO RUN:") 199 | print(commandline) 200 | print("COMMAND OUTPUT:") 201 | for line in contents: 202 | print(line) 203 | raise Exception() 204 | 205 | if return_out: 206 | with open(logname, 'r') as f: 207 | contents = f.readlines() 208 | return contents 209 | 210 | def create_prmtop(frcmod, ligfile): 211 | 212 | if os.path.exists("tmp/"): 213 | shutil.rmtree("tmp/") 214 | os.mkdir("tmp/") 215 | run_commandline("ln -s %s %s/frcmod.ligand" % (os.path.abspath(frcmod), os.path.abspath("tmp/"))) 216 | 217 | # convert with antechamber to mol2 with GAFF2 atom types 218 | # NOTE: -at gaff2 writes some unknown atom types that are not in the frcmod file (e.g. nh->nu, n->ns, n3->n7). 219 | run_commandline("antechamber -i %s -fi %s -o tmp/ligand.gaff2.mol2 -fo mol2 -rn LIG -at gaff2 -dr n" 220 | % (ligfile, ligfile.split('.')[-1])) 221 | 222 | ligand_leap = """ 223 | source leaprc.gaff2 224 | loadAmberParams tmp/frcmod.ligand 225 | LIG = loadMol2 tmp/ligand.gaff2.mol2 226 | saveAmberParm LIG tmp/ligand.prmtop tmp/ligand.inpcrd 227 | quit 228 | """ 229 | 230 | with open("tmp/ligand_leap.in", 'w') as f: 231 | f.write(ligand_leap) 232 | leap_out = run_commandline("tleap -s -f tmp/ligand_leap.in", return_out=True, error_keywords=['FATAL:']) 233 | 234 | 235 | def write_corrected_frcmod(ligfile, frcmod, out_frcmod, verbose=False): 236 | """ 237 | This method takes the equilibrium bond lengths and angles from the ligfile and writes a new 238 | frcmod file with corrected GAFF2 ligand parameters for MD. 239 | 240 | :param ligfile: mol2 or sdf file with optimized ligand geometry from where to copy bond lengths and angles. 241 | :param frcmod: the frcmod file that needs corrections. 242 | :param out_frcmod: the name of the output frcmod file that carries the corrections. 243 | :return: 244 | """ 245 | global args 246 | 247 | # create the prmtop and inpcrd file within a 'tmp/' folder 248 | create_prmtop(frcmod, ligfile) 249 | 250 | # load them to PARMED 251 | mol = pmd.load_file("tmp/ligand.prmtop", xyz="tmp/ligand.inpcrd", structure=True) 252 | bond_dict = defaultdict(list) 253 | for bond in mol.bonds: 254 | # print("%s-%s XXX %f" % (bond.atom1.type, bond.atom2.type, bond.measure())) 255 | bond_dict["%s-%s" % (bond.atom1.type, bond.atom2.type)].append(bond.measure()) 256 | bond_dict["%s-%s" % (bond.atom2.type, bond.atom1.type)].append(bond.measure()) # add the reverse bond, too 257 | 258 | if verbose: 259 | print("\nBond = mean value += stdev, min-max") 260 | for bondname, distlist in bond_dict.items(): 261 | print("%s = %f +- %f, %f" % (bondname, np.mean(distlist), np.std(distlist), np.ptp(distlist))) 262 | 263 | angle_dict = defaultdict(list) 264 | for angle in mol.angles: 265 | angle_dict["%s-%s-%s" % (angle.atom1.type, angle.atom2.type, angle.atom3.type)].append(angle.measure()) 266 | angle_dict["%s-%s-%s" % (angle.atom3.type, angle.atom2.type, angle.atom1.type)].append(angle.measure()) # add the reverse angle, too 267 | 268 | if verbose: 269 | print("\nAngle = mean value += stdev, min-max") 270 | for anglename, anglelist in angle_dict.items(): 271 | print("%s = %f +- %f, %f" % (anglename, np.mean(anglelist), np.std(anglelist), np.ptp(anglelist))) 272 | 273 | # par = pmd.load_file(frcmod) 274 | 275 | for bond in mol.bonds: 276 | bondname = "%s-%s" % (bond.atom1.type, bond.atom2.type) 277 | assert bondname in bond_dict.keys(), "ERROR: bond %s does not exist in the mol2 file with " \ 278 | "the optimized geometry!" % bondname 279 | idx = bond.type.idx 280 | bond.type.req = round(np.mean(bond_dict[bondname]), 3) # replace with the mean bond value 281 | mol.bond_types[idx].req = round(np.mean(bond_dict[bondname]), 3) # replace with the mean bond value 282 | 283 | for angle in mol.angles: 284 | anglename = "%s-%s-%s" % (angle.atom1.type, angle.atom2.type, angle.atom3.type) 285 | assert anglename in angle_dict.keys(), "ERROR: angle %s does not exist in the mol2 file with " \ 286 | "the optimized geometry!" % anglename 287 | idx = angle.type.idx 288 | angle.type.theteq = round(np.mean(angle_dict[anglename]), 3) # replace with the mean angle value 289 | mol.angle_types[idx].theteq = round(np.mean(angle_dict[anglename]), 3) # replace with the mean angle value 290 | 291 | # par.write('edited_'+frcmod, title="Created by mod_frcmod.py script.", style='frcmod') 292 | pmd.tools.writeFrcmod(mol, out_frcmod).execute() 293 | 294 | # clean intermediate files 295 | shutil.rmtree("tmp/") 296 | 297 | 298 | ################################################### END OF FUNCTION DEFINITIONS ########################################## 299 | 300 | if __name__ == "__main__": 301 | 302 | try: 303 | args = cmdlineparse() 304 | if args.OUT_FRCMOD == None: 305 | args.OUT_FRCMOD = "mod_%s" % args.FRCMOD 306 | write_corrected_frcmod(args.LIGFILE, args.FRCMOD, args.OUT_FRCMOD) 307 | 308 | except: 309 | type, value, tb = sys.exc_info() 310 | lines = traceback.format_exception(type, value, tb) 311 | print(''.join(lines)) 312 | raise -------------------------------------------------------------------------------- /show_ligand_interactions/README.md: -------------------------------------------------------------------------------- 1 | # A new PyMOL command to visualize receptor-ligand interactions and create publication-quality images. 2 | 3 | This script is also within the [Pymol-script-repo](https://pymolwiki.org/index.php/Git_install_scripts), which I strongly recommend to install. In that case you just need to import it into PyMOL: 4 | ``` 5 | import show_ligand_interactions 6 | ``` 7 | Otherwise you can download it from here and load it on PyMOL every time you launch it. 8 | ``` 9 | run 0: 54 | contacts_mdict[receptor_pdb][ligpdb] = (cont399, cont402, cont403, cont404) 55 | cmd.delete("cont*") 56 | cmd.delete(ligmol) 57 | cmd.delete(receptor) 58 | 59 | print("\nCONTACT RESULTS FOR INHIBITOR %s:" % inhibitor) 60 | print("receptor_pdb\tligand_pdb\tcontact_399\tcontact_402\tcontact_403\tcontact_404\n") 61 | struct_files = "" 62 | for receptor_pdb in contacts_mdict.keys(): 63 | struct_files += " " + receptor_pdb 64 | for ligpdb in contacts_mdict[receptor_pdb].keys(): 65 | struct_files += " " + ligpdb 66 | print(receptor_pdb, ligpdb, contacts_mdict[receptor_pdb][ligpdb]) 67 | print("To load the poses:") 68 | print("pymol " + struct_files) 69 | 70 | 71 | -------------------------------------------------------------------------------- /show_ligand_interactions/image_gallery/BACE_104_liginter.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/show_ligand_interactions/image_gallery/BACE_104_liginter.jpg -------------------------------------------------------------------------------- /show_ligand_interactions/image_gallery/CatS_335_liginter.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/show_ligand_interactions/image_gallery/CatS_335_liginter.jpg -------------------------------------------------------------------------------- /show_ligand_interactions/image_gallery/Thrombin_2zc9_liginter.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/show_ligand_interactions/image_gallery/Thrombin_2zc9_liginter.jpg -------------------------------------------------------------------------------- /show_ligand_interactions/show_ligand_interactions.py: -------------------------------------------------------------------------------- 1 | #show_ligand_interactions v.1.0 2 | # author: Thomas Evangelidis, 2019 3 | # License: BSD-2-Clause 4 | 5 | from pymol import cmd, util 6 | import show_bumps 7 | 8 | def show_ligand_interactions(recsel="not hetatm", ligsel="hetatm", cutoff=5): 9 | """ 10 | DESCRIPTION 11 | 12 | Visualize interactions between receptor and ligand. 13 | 14 | ARGUMENTS 15 | 16 | recsel = string: atom selection of the receptor {default: "not hetatm"} 17 | 18 | ligsel = string: atom selections of the ligand {default: "hetatm"} 19 | 20 | cutoff = float: show as sticks all receptor residues within this distance from the ligand {default: 5.0} 21 | """ 22 | cmd.select('ligand', ligsel) 23 | cmd.select('receptor', recsel) 24 | 25 | cmd.bg_color('white') 26 | cmd.show_as('cartoon') 27 | cmd.show_as('sticks', 'hetatm or ligand') 28 | cmd.show_as('nonbonded', "resn HOH+T3P+WAT within %s of ligand" % cutoff) 29 | cmd.set('cartoon_transparency', 0.2) 30 | cmd.spectrum(selection=recsel+" or "+ligsel,byres=1) 31 | util.cbag('not name C*') 32 | cmd.set('cartoon_fancy_helices', 1); 33 | cmd.show("sticks", "(hydro)"); 34 | cmd.select("pocket", "byres (receptor within %s of ligand)" % cutoff); 35 | cmd.show("sticks", "pocket") 36 | cmd.hide('(h. and (e. c extend 1))') 37 | cmd.set('h_bond_max_angle', 30) 38 | cmd.set('h_bond_cutoff_center', 3.6) 39 | cmd.set('h_bond_cutoff_edge', 3.2) 40 | cmd.dist('ligand_Hbonds', 'ligand', 'receptor', 3.5, mode=2) 41 | cmd.set('dash_radius', 0.15) 42 | # now set the label options 43 | cmd.set('label_size', 20) 44 | cmd.set('label_position', [0,0,10]) 45 | cmd.orient("ligand") 46 | 47 | cmd.extend('show_ligand_interactions', show_ligand_interactions) 48 | -------------------------------------------------------------------------------- /visualize_ECFP_fragments/fragment_molecules.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__="Thomas Evangelidis" 4 | __email__="tevang3@gmail.com" 5 | 6 | SMILES="CCC1=CC=C(C=C1)C=C2C(=O)NC(=S)S2" # 10058-F4 c-Myc inhibitor 7 | SMILES='CC(C)c5cc(CNC[C@@H](O)[C@@H]4C[C@H](C)CCCCCN([C@H](C)c1ccccc1)C(=O)c2cc(cc(c2)C3=NC=CO3)C(=O)N4)ccc5' # macrocycle 8 | 9 | from argparse import ArgumentParser, RawDescriptionHelpFormatter 10 | from rdkit.Chem.Draw.IPythonConsole import * 11 | from rdkit.Chem.Draw import MolToFile 12 | from rdkit import Chem 13 | from rdkit.Chem import AllChem 14 | from rdkit.Chem.rdmolfiles import MolFromMol2File 15 | import os, shutil 16 | 17 | ## Parse command line arguments 18 | def cmdlineparse(): 19 | parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, description=""" 20 | DESCRIPTION: 21 | 22 | This is a Python script to create fragments as in ECFP fingerprints. 23 | 24 | """, 25 | epilog=""" 26 | ### EXAMPLE 1: 27 | 28 | """) 29 | parser.add_argument("-smiles", dest="SMILES", required=False, default=None, 30 | help="The molecule to be fragmented in SMILES format.") 31 | parser.add_argument("-mol2", dest="MOL2", required=False, default=None, 32 | help="The molecule to be fragmented in MOL2 format.") 33 | parser.add_argument("-outfolder", dest="OUT_FOLDER", required=False, default="fragments", type=str, 34 | help="The folder name which will be created (or erased if it already exists) where the " 35 | "PNG images of the fragments will be saved.") 36 | parser.add_argument("-fpradius", dest="FP_RADIUS", required=False, default=2, type=int, 37 | help="The ECFP radius parameter value (distance in number of bonds). Default: %(default)s") 38 | 39 | args = parser.parse_args() 40 | return args 41 | 42 | if __name__ == "__main__": 43 | args = cmdlineparse() 44 | if args.SMILES: 45 | mol = Chem.MolFromSmiles(args.SMILES) 46 | if args.MOL2: 47 | mol = MolFromMol2File(args.MOL2, sanitize=False, removeHs=False) 48 | if os.path.exists(args.OUT_FOLDER): 49 | shutil.rmtree(args.OUT_FOLDER) 50 | os.mkdir(args.OUT_FOLDER) 51 | 52 | MolToFile(mol, "original_molecule.png") 53 | shutil.move("original_molecule.png", args.OUT_FOLDER + "/original_molecule.png") 54 | bi = {} 55 | fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=args.FP_RADIUS, bitInfo=bi) 56 | for k in bi.keys(): 57 | mfp2_svg = DrawMorganBit(mol, k, bi) 58 | mfp2_svg.save(fp="%s/%i_frag.png" % (args.OUT_FOLDER, k), format="PNG") 59 | 60 | # TODO: show all fragments in one figure 61 | # https://stackoverflow.com/questions/37365824/pandas-ipython-notebook-include-and-display-an-image-in-a-dataframe 62 | -------------------------------------------------------------------------------- /visualize_ECFP_fragments/visualize_ECFP_fragments.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "heading", 5 | "metadata": { 6 | "collapsed": true, 7 | "pycharm": {} 8 | }, 9 | "level": 1, 10 | "source": [ 11 | "Create and Visualize Fragments like in ECFP Fingerprints" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "pycharm": {} 18 | }, 19 | "source": "ECFP (extended connectivity fingerprints) aka circular fingerprints, are built by applying the Morgan algorithm to a set of user-supplied atom invariants. In this tutorial we will generate fragments of a macrocyclic and a non-macrocyclic molecule of similar size and compare them.\n\n When generating Morgan fingerprints, the radius of the fingerprint(must also be provided :" 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 29, 24 | "metadata": { 25 | "pycharm": {} 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "from rdkit import Chem, DataStructs\n", 30 | "from rdkit.Chem import AllChem, Draw, rdDepictor #, rdCoordGen # rdCoordGen requires Python 3.7\n", 31 | "from IPython.display import display, HTML\n", 32 | "import pandas as pd\n", 33 | "import os\n", 34 | "\n", 35 | "# The first canonical SMILES corresponds to macrocyclic compound BACE_149 from D3R GC2018\n", 36 | "# The following two SMILES correspond to non-macrocyclic compounds\n", 37 | "smiles \u003d [\u0027CCCCNC(\u003dO)[C@H](C)C[C@H](O)[C@@H]1C[C@H](C)CCCCCCC[C@H](NC(\u003dO)OC(C)(C)C)C(\u003dO)N[C@@H](C)C(\u003dO)N1\u0027,\n", 38 | " \u0027CCCCNC(\u003dO)C(C)CC(C(CC1CCCCC1)NC(\u003dO)C(C(C)C)NC(\u003dO)CNC(\u003dO)OC(C)(C)C)O\u0027,\n", 39 | " \u0027CC(C)C(C(\u003dO)NC(C(C)C)C(\u003dO)OC)NC(\u003dO)CCC(C(CC1CCCCC1)NC(\u003dO)C(C)NC(\u003dO)C(C)N)O\u0027\n", 40 | " ]\n", 41 | "ids \u003d [\u0027BACE_149\u0027, \u0027mol2\u0027, \u0027mol3\u0027]\n", 42 | "df \u003d pd.DataFrame({\u0027mol\u0027: [Chem.MolFromSmiles(x) for x in smiles]}, index\u003dids)\n" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": { 48 | "pycharm": {} 49 | }, 50 | "source": [ 51 | "Lets visualize these 3 molecules." 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 30, 57 | "metadata": { 58 | "pycharm": {} 59 | }, 60 | "outputs": [ 61 | { 62 | "name": "stdout", 63 | "output_type": "stream", 64 | "text": [ 65 | "/home2/thomas/Documents/tutorials/Multilayer_Perceptron_Keras\n" 66 | ] 67 | }, 68 | { 69 | "ename": "IOError", 70 | "evalue": "[Errno 2] No such file or directory: \u0027images/gridmol.png\u0027", 71 | "traceback": [ 72 | "\u001b[0;31m\u001b[0m", 73 | "\u001b[0;31mIOError\u001b[0mTraceback (most recent call last)", 74 | "\u001b[0;32m\u003cipython-input-30-febc8a8e8909\u003e\u001b[0m in \u001b[0;36m\u003cmodule\u003e\u001b[0;34m()\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;31m# display(img) # try it again in Python 3.7\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetcwd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---\u003e 17\u001b[0;31m \u001b[0mimg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"images/gridmol.png\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# save the image to a file for the time being\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 75 | "\u001b[0;32m/home/thomas/.local/lib/python2.7/site-packages/PIL/Image.pyc\u001b[0m in \u001b[0;36msave\u001b[0;34m(self, fp, format, **params)\u001b[0m\n\u001b[1;32m 2002\u001b[0m \u001b[0;31m# Open also for reading (\"+\"), because TIFF save_all\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2003\u001b[0m \u001b[0;31m# writer needs to go back and edit the written data.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-\u003e 2004\u001b[0;31m \u001b[0mfp\u001b[0m \u001b[0;34m\u003d\u001b[0m \u001b[0mbuiltins\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"w+b\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2005\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2006\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 76 | "\u001b[0;31mIOError\u001b[0m: [Errno 2] No such file or directory: \u0027images/gridmol.png\u0027" 77 | ], 78 | "output_type": "error" 79 | } 80 | ], 81 | "source": "mols \u003d []\nfor mol in df[\u0027mol\u0027].values:\n mol \u003d Chem.Mol(mol)\n rdDepictor.Compute2DCoords\n # rdCoordGen.AddCoords(mol) # requires Python 3.7\n # rescale(mol, f\u003d1.4) # AddCoords seems to produced coordinates that are hard to display, so rescale them\n mols.append(mol)\nlegends \u003d df[\u0027mol\u0027].keys()\nimg \u003d Draw.MolsToGridImage(mols, \n molsPerRow\u003dlen(legends),\n subImgSize\u003d(300, 300),\n legends\u003dlegends,\n useSVG\u003dFalse, # set to True in Python 3.7\n )\n# display(img) # try it again in Python 3.7\nprint(os.getcwd())\nimg.save(\"images/gridmol.png\") # save the image to a file for the time being" 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": { 87 | "pycharm": {} 88 | }, 89 | "outputs": [], 90 | "source": "fp1 \u003d AllChem.GetMorganFingerprint(m1,radius\u003d3)\nfp2 \u003d AllChem.GetMorganFingerprint(m2,radius\u003d3)\nfp3 \u003d AllChem.GetMorganFingerprint(m3,radius\u003d3)\nprint(\"The ECFP fingeprint(similarity between m1 and m2 is %f\" % DataStructs.DiceSimilarity(fp1,fp2))\nprint(\"The ECFP fingeprint(similarity between m2 and m3 is %f\" % DataStructs.DiceSimilarity(fp2,fp3))\nprint(\"The ECFP fingeprint(similarity between m1 and m3 is %f\" % DataStructs.DiceSimilarity(fp1,fp3))" 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": { 95 | "pycharm": {} 96 | }, 97 | "source": "Morgan fingerprints, like atom pairs and topological torsions, use counts bm1y default, but it’s also possible to calculate them as bit vectors:" 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 4, 102 | "metadata": { 103 | "pycharm": {} 104 | }, 105 | "outputs": [ 106 | { 107 | "name": "stdout", 108 | "output_type": "stream", 109 | "text": [ 110 | "The ECFP fingeprint similarity between m1 and m2 is 0.480000\nThe ECFP fingeprint similarity between m2 and m3 is 0.555556\nThe ECFP fingeprint similarity between m1 and m3 is 0.248521\n" 111 | ] 112 | } 113 | ], 114 | "source": "fp1 \u003d AllChem.GetMorganFingerprintAsBitVect(m1,radius\u003d3,nBits\u003d4096)\nfp2 \u003d AllChem.GetMorganFingerprintAsBitVect(m2,radius\u003d3,nBits\u003d4096)\nfp3 \u003d AllChem.GetMorganFingerprintAsBitVect(m3,radius\u003d3,nBits\u003d4096)\nprint(\"The ECFP fingeprint(similarity between m1 and m2 is %f\" % DataStructs.DiceSimilarity(fp1,fp2))\nprint(\"The ECFP fingeprint(similarity between m2 and m3 is %f\" % DataStructs.DiceSimilarity(fp2,fp3))\nprint(\"The ECFP fingeprint(similarity between m1 and m3 is %f\" % DataStructs.DiceSimilarity(fp1,fp3))" 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": { 119 | "pycharm": {} 120 | }, 121 | "source": [ 122 | "As you can see, the similarity changes slightly if you express them as bit vectors and can change further if you increase the nBits parameter which controls bit collisions.\n", 123 | "\n", 124 | "When comparing the ECFP/FCFP fingerprints and the Morgan fingerprints generated by the RDKit, remember that the 4 in ECFP4 corresponds to the diameter of the atom environments considered, while the Morgan fingerprints take a radius parameter. So the examples above, with radius\u003d2, are roughly equivalent to ECFP4 and FCFP4." 125 | ] 126 | }, 127 | { 128 | "cell_type": "heading", 129 | "metadata": { 130 | "pycharm": {} 131 | }, 132 | "level": 2, 133 | "source": [ 134 | "Explaining bits from Morgan Fingerprints." 135 | ] 136 | } 137 | ], 138 | "metadata": { 139 | "kernelspec": { 140 | "display_name": "Python 2", 141 | "language": "python", 142 | "name": "python2" 143 | }, 144 | "language_info": { 145 | "codemirror_mode": { 146 | "name": "ipython", 147 | "version": 2 148 | }, 149 | "file_extension": ".py", 150 | "mimetype": "text/x-python", 151 | "name": "python", 152 | "nbconvert_exporter": "python", 153 | "pygments_lexer": "ipython2", 154 | "version": "2.7.6" 155 | } 156 | }, 157 | "nbformat": 4, 158 | "nbformat_minor": 0 159 | } -------------------------------------------------------------------------------- /visualize_ligand_properties/README.md: -------------------------------------------------------------------------------- 1 | # UCSF Chimera Tutorial: visualize ligand properties (charges, bond lengths & angles, etc.) 2 | 3 | Load the two ligand files into Chimera and label atom names and charges 4 | ``` 5 | labelopt info "%(name)s %(charge)+.3f" 6 | label 7 | ``` 8 | or if you want the atom type as well 9 | ``` 10 | labelopt info "%(name)s(%(idatmType)s) %(charge)+.3f" 11 | label 12 | ``` 13 | or to show only one molecule and label by atom type 14 | ``` 15 | ~display #1 16 | labelopt info "%(idatmType)s" 17 | label 18 | ``` 19 | to change the label font size go Favorites->Preferences->"Category:"Background. 20 | to change the background color (althouhg black is good to display labels) 21 | ``` 22 | background solid white 23 | ``` 24 | change representation (only the "wire" works well with .mol2 files) 25 | ``` 26 | represent wire 27 | ``` 28 | play with scale command to zoom in the image as much as you want 29 | ``` 30 | scale 1.4 31 | ``` 32 | 33 | 34 | You can label the bonds with their length with this Python: 35 | 36 | ```python 37 | from chimera import openModels, Molecule 38 | for mol in openModels.list(modelTypes=[Molecule]): 39 | for b in mol.bonds: 40 | b.label = "%.2f" % b.length() 41 | ``` 42 | 43 | You can label atoms, bonds, and residues in this fashion, but there is no provision to label bond angles per se. If you meant torsion angles and wanted to add that to the bond label, you can get the floating-point dihedral value of four atoms with this code: 44 | 45 | ```python 46 | import chimera 47 | dihed_val = chimera.dihedral(a1.coord(), a2.coord(), a3.coord(), a4.coord()) 48 | ``` 49 | --------------------------------------------------------------------------------