├── .idea
├── .gitignore
├── libraries
│ └── R_User_Library.xml
├── misc.xml
├── modules.xml
├── other.xml
├── tutorials.iml
└── vcs.xml
├── 2D_conformation_sampling
├── Network_Interpretation.png
└── alternative_2D_projections.png
├── Chemception_2D
└── chemception2D.py
├── Chemical_Shift_back_calculation_from_MD
├── README.md
└── calc_shifts_from_xtc.pl
├── Electrostatic_Potential_Globular_Protein
├── 3K5C-BACE.pdb
├── 3K5C-BACE_1.mol
├── BACE_pocket_EP2.png
└── README.md
├── LICENSE
├── Morphing_Conformations
└── README.md
├── Multilayer_Perceptron_Keras.tar.gz
├── Multilayer_Perceptron_Keras
├── .ipynb_checkpoints
│ ├── MLP_Keras-checkpoint.ipynb
│ └── qsar-rdkit-cdk2-checkpoint.ipynb
├── MLP_Keras.ipynb
└── data
│ ├── cdk1.sdf
│ ├── cdk1_pK.dat
│ ├── cdk2.sdf
│ ├── cdk2_large.sdf
│ ├── cdk2_pK.dat
│ └── logBB.sdf
├── README.md
├── ROC_curves
├── .Rhistory
├── README.Rmd
├── README.html
├── README.md
├── data
│ ├── .Rhistory
│ ├── ROC_curves.png
│ ├── SF1.scores
│ ├── SF2.scores
│ ├── SF3.scores
│ ├── SF4.scores
│ ├── SF5.scores
│ └── activities
└── function_definitions.r
├── compare_atomic_properties
├── README.md
├── data
│ ├── compound_stereo1_ion1_tau1.COSMO_PM6.mol2
│ └── compound_stereo1_ion1_tau1.COSMO_PM6.sdf
├── mean_pose.png
├── pose1.png
├── pose2.marked.png
├── pose2.png
└── std_pose.png
├── create_alternative_protonations
├── README.md
├── images
│ └── 1a30_all_protonations.png
└── protonate_receptor.py
├── dockprep
├── README.md
├── dockprep.py
└── example_files
│ ├── 3K5C-BACE.pdb
│ ├── 3K5C-BACE_1.mol
│ ├── 3K5C-BACE_4.mol
│ ├── 3K5C-BACE_5.mol
│ ├── 3K5C-BACE_6.mol
│ ├── 3K5C-BACE_7.mol
│ └── 3K5C-BACE_8.mol
├── mod_frcmod
└── mod_frcmod.py
├── show_ligand_interactions
├── README.md
├── find_ligands_interacting_with_residueset.py
├── image_gallery
│ ├── BACE_104_liginter.jpg
│ ├── CatS_335_liginter.jpg
│ └── Thrombin_2zc9_liginter.jpg
└── show_ligand_interactions.py
├── visualize_ECFP_fragments
├── fragment_molecules.py
└── visualize_ECFP_fragments.ipynb
└── visualize_ligand_properties
└── README.md
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /workspace.xml
--------------------------------------------------------------------------------
/.idea/libraries/R_User_Library.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/other.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/tutorials.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/2D_conformation_sampling/Network_Interpretation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/2D_conformation_sampling/Network_Interpretation.png
--------------------------------------------------------------------------------
/2D_conformation_sampling/alternative_2D_projections.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/2D_conformation_sampling/alternative_2D_projections.png
--------------------------------------------------------------------------------
/Chemception_2D/chemception2D.py:
--------------------------------------------------------------------------------
1 | import rdkit
2 | from rdkit import Chem
3 | from rdkit.Chem import AllChem
4 | import pandas as pd
5 | import numpy as np
6 | import matplotlib
7 | matplotlib.use('Qt4Agg') # temporary solution to avoid "ImportError: No module named PyQt5" which is mainly for Python 3
8 | import matplotlib.pyplot as plt
9 | print "RDKit: %s"%rdkit.__version__
10 |
11 |
12 | import keras
13 | from keras.models import Sequential, Model
14 | from keras.layers import Conv2D, MaxPooling2D, Input, GlobalMaxPooling2D
15 | from keras.layers.core import Dense, Dropout, Activation, Flatten
16 | from keras.optimizers import Adam
17 | from keras.preprocessing.image import ImageDataGenerator
18 | from keras.callbacks import ReduceLROnPlateau
19 | print("Keras: %s"%keras.__version__)
20 |
21 |
22 | data = pd.read_hdf("Sutherland.h5","table")
23 | data["mol"] = data["smiles"].apply(Chem.MolFromSmiles)
24 |
25 |
26 | def chemcepterize_mol(mol, embed=20.0, res=0.5):
27 | dims = int(embed*2/res)
28 | cmol = Chem.Mol(mol.ToBinary())
29 | cmol.ComputeGasteigerCharges()
30 | AllChem.Compute2DCoords(cmol)
31 | coords = cmol.GetConformer(0).GetPositions()
32 | vect = np.zeros((dims,dims,4))
33 | #Bonds first
34 | for i,bond in enumerate(mol.GetBonds()):
35 | bondorder = bond.GetBondTypeAsDouble()
36 | bidx = bond.GetBeginAtomIdx()
37 | eidx = bond.GetEndAtomIdx()
38 | bcoords = coords[bidx]
39 | ecoords = coords[eidx]
40 | frac = np.linspace(0,1,int(1/res*2)) #
41 | for f in frac:
42 | c = (f*bcoords + (1-f)*ecoords)
43 | idx = int(round((c[0] + embed)/res))
44 | idy = int(round((c[1]+ embed)/res))
45 | #Save in the vector first channel
46 | vect[ idx , idy ,0] = bondorder
47 | #Atom Layers
48 | for i,atom in enumerate(cmol.GetAtoms()):
49 | idx = int(round((coords[i][0] + embed)/res))
50 | idy = int(round((coords[i][1]+ embed)/res))
51 | #Atomic number
52 | vect[ idx , idy, 1] = atom.GetAtomicNum()
53 | #Gasteiger Charges
54 | charge = atom.GetProp("_GasteigerCharge")
55 | vect[ idx , idy, 3] = charge
56 | #Hybridization
57 | hyptype = atom.GetHybridization().real
58 | vect[ idx , idy, 2] = hyptype
59 | return vect
60 |
61 |
62 | # To better understand what the code has done, lets try to “chemcepterize” a molecule and show it as an image.
63 | # The embedding and the resolution are set lower than they will be for the final dataset. Matplotlib only supports
64 | # RGB, so only the first three channels are used.
65 | mol = data["mol"][0]
66 | v = chemcepterize_mol(mol, embed=10, res=0.2)
67 | print(v.shape)
68 | plt.imshow(v[:,:,:3])
69 |
70 |
71 | # Next step is to “chemcepterize” the entire collection of RDKit molecules and add a new column with the “images” to the dataframe
72 | def vectorize(mol):
73 | return chemcepterize_mol(mol, embed=12)
74 | data["molimage"] = data["mol"].apply(vectorize)
75 |
76 | # The dataset already had a split value indicating if it should be train or test set. The shape of the final numpy arrays are
77 | # (samples, height, width, channels)
78 | X_train = np.array(list(data["molimage"][data["split"]==1]))
79 | X_test = np.array(list(data["molimage"][data["split"]==0]))
80 | print(X_train.shape)
81 | print(X_test.shape)
82 |
83 |
84 | # We also need to the prepare the values to predict. Here it is the IC50 for some DHFR inhibitors. The data is converted to log space and
85 | # the robust scaler from scikit-learn is used to scale the data to somewhat between -1 and 1 (neural networks like this range and it makes
86 | # training somewhat easier).
87 | assay = "PC_uM_value"
88 | y_train = data[assay][data["split"]==1].values.reshape(-1,1)
89 | y_test = data[assay][data["split"]==0].values.reshape(-1,1)
90 | from sklearn.preprocessing import RobustScaler
91 | rbs = RobustScaler(with_centering=True, with_scaling=True, quantile_range=(5.0, 95.0), copy=True)
92 | y_train_s = rbs.fit_transform(np.log(y_train))
93 | y_test_s = rbs.transform(np.log(y_test))
94 | h = plt.hist(y_train_s, bins=20)
95 |
96 |
97 | input_shape = X_train.shape[1:]
98 | print input_shape
99 |
100 |
101 | def Inception0(input):
102 | tower_1 = Conv2D(16, (1, 1), padding='same', activation='relu')(input)
103 | tower_1 = Conv2D(16, (3, 3), padding='same', activation='relu')(tower_1)
104 | tower_2 = Conv2D(16, (1, 1), padding='same', activation='relu')(input)
105 | tower_2 = Conv2D(16, (5, 5), padding='same', activation='relu')(tower_2)
106 | tower_3 = Conv2D(16, (1, 1), padding='same', activation='relu')(input)
107 | output = keras.layers.concatenate([tower_1, tower_2, tower_3], axis=-1)
108 | return output
109 |
110 |
111 | def Inception(input):
112 | tower_1 = Conv2D(16, (1, 1), padding='same', activation='relu')(input)
113 | tower_1 = Conv2D(16, (3, 3), padding='same', activation='relu')(tower_1)
114 | tower_2 = Conv2D(16, (1, 1), padding='same', activation='relu')(input)
115 | tower_2 = Conv2D(16, (5, 5), padding='same', activation='relu')(tower_2)
116 | tower_3 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(input)
117 | tower_3 = Conv2D(16, (1, 1), padding='same', activation='relu')(tower_3)
118 | output = keras.layers.concatenate([tower_1, tower_2, tower_3], axis=-1)
119 | return output
120 |
121 |
122 | input_img = Input(shape=input_shape)
123 | x = Inception0(input_img)
124 | x = Inception(x)
125 | x = Inception(x)
126 | od=int(x.shape[1])
127 | x = MaxPooling2D(pool_size=(od,od), strides=(1,1))(x)
128 | x = Flatten()(x)
129 | x = Dense(100, activation='relu')(x)
130 | output = Dense(1, activation='linear')(x)
131 | model = Model(inputs=input_img, outputs=output)
132 | print model.summary()
133 |
134 |
135 | # For the optimization I use the Adam optimizer and the mean absolute error as a loss function.
136 | optimizer = Adam(lr=0.00025)
137 | model.compile(loss="mae", optimizer=optimizer)
138 |
139 |
140 | # The next part is crucial to avoid overfitting. Here the ImageDataGenerator object is used to perform random rotations and flips
141 | # of the images before the training as a way of augmenting the training dataset. By doing this, the network will learn how to handle
142 | # rotations and seeing the features in different orientations will help the model generalize better. Not including this will lead to
143 | # completely overfit models. We have not encoded stereochemical information in the images, otherwise the flipping should be done by
144 | # other means. The training set is concatenated to 50 times the length to have some sensible size epochs.
145 |
146 | from image import ImageDataGenerator
147 | generator = ImageDataGenerator(rotation_range=180,
148 | width_shift_range=0.1,height_shift_range=0.1,
149 | fill_mode="constant",cval = 0,
150 | horizontal_flip=True, vertical_flip=True,data_format='channels_last',
151 | )
152 | #Concatenate for longer epochs
153 | Xt = np.concatenate([X_train]*50, axis=0)
154 | yt = np.concatenate([y_train_s]*50, axis=0)
155 | batch_size=128
156 | g = generator.flow(Xt, yt, batch_size=batch_size, shuffle=True)
157 | steps_per_epoch = 10000/batch_size
158 |
159 |
160 |
161 | # Now for the interesting part: Training. To lower the learning rate once the validation loss starts to plateau off I use
162 | # the ReduceLROnPlateau callback avaible as part of Keras.
163 | reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5,patience=10, min_lr=1e-6, verbose=1)
164 | history = model.fit_generator(g,
165 | steps_per_epoch=len(Xt)//batch_size,
166 | epochs=150,
167 | validation_data=(X_test,y_test_s),
168 | callbacks=[reduce_lr])
169 |
170 | # Models can be saved and loaded. The history objects history dictionary is pickled.
171 | name = "Chemception_std_notebook_demo"
172 | model.save("%s.h5"%name)
173 | hist = history.history
174 | import pickle
175 | pickle.dump(hist, file("%s_history.pickle"%name,"w"))
176 | #from keras.model import load_model
177 | #model = load_model("%s.h5"%name)
178 |
179 |
180 | # The convergence of the training can be judged from a plot of the learning process. Somewhat unusual, when there's
181 | # no regularization: The validation loss drops before the loss. The validation set is not augmented and thus consists of
182 | # some “perfect” pictures, whereas maybe it may take the network some longer to deal with all the rotations, which also
183 | # introduces some pixel artifacts due to the low resolution.
184 | for label in ['val_loss','loss']:
185 | plt.plot(hist[label], label = label)
186 | plt.legend()
187 | plt.yscale("log")
188 | plt.xlabel("Epochs")
189 | plt.ylabel("Loss/lr")
190 |
191 | # Plotting and Evaluating the Performance
192 | y_pred_t = rbs.inverse_transform(model.predict(X_train))
193 | y_pred = rbs.inverse_transform(model.predict(X_test))
194 | plt.scatter(np.log(y_train), y_pred_t, label="Train")
195 | plt.scatter(np.log(y_test), y_pred, label="Test")
196 | plt.xlabel("log(PC_uM)")
197 | plt.ylabel("predicted")
198 | plt.plot([-10,6],[-10,6])
199 | plt.legend()
200 |
201 | corr2 = np.corrcoef(np.log(y_test).reshape(1,-1), y_pred.reshape(1,-1))[0][1]**2
202 | rmse = np.mean((np.log(y_test) - y_pred)**2)**0.5
203 | print("R2 : %0.2F"%corr2)
204 | print("RMSE : %0.2F"%rmse)
205 |
206 |
207 | # Visualizing the Layers
208 | # It can be interesting to try and understand how the model "sees" the molecules. For this I’ll take an example molecule
209 | # and plot some of the outputs from the different layers. I’ve taken the compound with the lowest IC50, number 143 in the dataset.
210 | molnum = 143
211 | molimage = np.array(list(data["molimage"][molnum:molnum+1]))
212 | mol = data["mol"][molnum]
213 |
214 | # The molecule looks like this
215 | from rdkit.Chem import Draw
216 | Draw.MolToImage(mol)
217 |
218 | # And has this “chemcepterized” image as shown below
219 | plt.imshow(molimage[0,:,:,:3])
220 |
221 | # The first example is the third layer, which is the 1,1 convolution which feeds the 3,3 convolutional layer in tower 2.
222 | layer1_model = Model(inputs=model.input,
223 | outputs=model.layers[2].output)
224 | kernels1 = layer1_model.predict(molimage)[0]
225 | def plot_kernels(kernels):
226 | fig, axes = plt.subplots(2,3, figsize=(12,8))
227 | for i,ax in enumerate(axes.flatten()):
228 | ax.matshow(kernels[:,:,i])
229 | ax.set_title("Kernel %s"%i)
230 | plot_kernels(kernels1)
231 |
232 | # Lets go deeper...
233 | for layer in [7,13,15,19,20]:
234 | print("Layer %i"%layer)
235 | plot_kernels(Model(inputs=model.input,outputs=model.layers[layer].output).predict(molimage)[0])
236 | plt.show()
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
--------------------------------------------------------------------------------
/Chemical_Shift_back_calculation_from_MD/README.md:
--------------------------------------------------------------------------------
1 | Requirements:
2 | * Perl
3 | * GROMACS Tools
4 | * Sparta+
5 |
6 | Usage: calc_shifts_from_xtc.pl
7 |
8 | Input: as described above. You can find the start and end time of your trajectory by doing: gmx check -f .
9 |
10 | Output: 'SHIFTS' file contains chemical shifts of all frames, 'consensus_chemical_shifts.tab' file contains the average chemical shifts of each atom.
11 |
12 |
--------------------------------------------------------------------------------
/Chemical_Shift_back_calculation_from_MD/calc_shifts_from_xtc.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl -w
2 |
3 | (@ARGV == 5) or die "Usage: calc_shifts_from_xtc.pl \n
4 | Input: as described above. You can find the start and end time of your trajectory by doing: gmx check -f .\n
5 | Output: 'SHIFTS' file contains chemical shifts of all frames, 'consensus_chemical_shifts.tab' file contains the average chemical shifts of each atom.\n";
6 |
7 | $pdbfile=$ARGV[0];
8 | $trj=$ARGV[1];
9 | $start=$ARGV[2];
10 | $end=$ARGV[3];
11 | $timestep=$ARGV[4];
12 |
13 | #
14 | # How many shifts we will be collecting ?
15 | #
16 |
17 | `echo 0 | gmx trjconv -s $pdbfile -f $trj -dump 1 -o ps0.pdb`;
18 | `sparta+ -in ps0.pdb > /dev/null 2>&1`;
19 | `/bin/rm -rf ps*.pdb`;
20 |
21 | # Find the line where the stucture description and CS predictions start
22 | open ( IN, "pred.tab" ) or die "Can not open pred.tab. Usage: calc_shifts \n";
23 | while ( $line = )
24 | {
25 | if ( $line =~ /^FORMAT/ )
26 | {
27 | last;
28 | }
29 | }
30 |
31 | $line = ;
32 | $tot = 0; # the total number of atoms with CS prediction
33 | while ( $line = )
34 | {
35 | $ids[ $tot ] = substr( $line, 0, 14 );
36 | $tot++;
37 | }
38 |
39 | close( IN );
40 |
41 | `/bin/rm -rf *.tab`;
42 |
43 | if ( $tot < 1 )
44 | {
45 | print "Too few atoms for calculating shifts. Something is wrong. Bye.\n";
46 | exit;
47 | }
48 |
49 |
50 | print "Will be collecting data for $tot atoms. Starting ...\n";
51 |
52 |
53 | #
54 | # Will do it in sets of 400 structures ...
55 | #
56 |
57 | $first = int($start);
58 |
59 | `/bin/rm SHIFTS`;
60 |
61 | while( $first < int($end) ) {
62 |
63 | $last = $first + 400 * $timestep -1;
64 | printf("Now processing set starting at picosecond %8d and ending at picosecond %8d\n", $first, $last );
65 |
66 | #for ($ps = $first; $ps <= $last; $ps += $timestep) { # not all ps are present in the trajectory (timestep for saving coordinates in MD was 2ps)
67 | # #print "DEBUG: echo 0 | gmx trjconv -dump $ps -s $pdbfile -f $trj -o ps${ps}.pdb > /dev/null 2>&1";
68 | # `echo 0 | gmx trjconv -dump $ps -s $pdbfile -f $trj -o ps${ps}.pdb > /dev/null 2>&1`;
69 | #}
70 |
71 | # real 17m26.892s
72 | # user 17m7.694s
73 | # sys 0m15.413s
74 | `echo 0 | gmx trjconv -b $first -e $last -sep -s $pdbfile -f $trj -o ps.pdb > /dev/null 2>&1`;
75 |
76 | `sparta+ -in ps*.pdb > /dev/null 2>&1`;
77 |
78 | `/bin/rm -rf ps*.pdb *_struct.tab`;
79 |
80 | @files = glob("ps*.tab");
81 |
82 |
83 | if ( @files == 0 )
84 | {
85 | last;
86 | }
87 |
88 |
89 | @files = map {s/_pred\.tab//g; $_; } @files;
90 | @files = map {s/^ps//g; $_; } @files;
91 | my @ordered_files = sort {$a <=> $b} @files;
92 |
93 |
94 | foreach $file ( @ordered_files )
95 | {
96 | #print "DEBUG: appending shifts from file ps${file}_pred.tab \n";
97 | `tail -$tot ps${file}_pred.tab | awk '{printf "%8.3f ", \$5}' >> SHIFTS`; # append all chemical shifts of the current frame to a single line of SHIFTS file
98 | `echo >> SHIFTS`; # change line
99 | }
100 |
101 | `/bin/rm -rf ps*.tab`; # remove the frames that have been processed
102 |
103 | $first += 400 * $timestep;
104 |
105 | }
106 |
107 | print "\n\n";
108 |
109 | #
110 | # Calculate means + sigmas using SHIFTS file
111 | #
112 | open ( IN, "SHIFTS" ) or die "Can not open SHIFTS ??? How did this happen ???\n";
113 | open ( OUT, '>>consensus_chemical_shifts.tab');
114 |
115 | for ( $i=0 ; $i < $tot ; $i++ )
116 | {
117 | $mean= 0.0;
118 | $nof_lines = 0;
119 | $std = 0.0;
120 | while ( $line = )
121 | {
122 | @data = split( ' ', $line );
123 |
124 | $nof_lines++;
125 | $delta = $data[ $i ] - $mean;
126 | $mean += $delta / $nof_lines;
127 | $std += $delta * ($data[ $i ] - $mean);
128 | }
129 |
130 | printf "%s %8.4f %8.4f\n", $ids[ $i ], $mean, sqrt( $std / ($nof_lines -1));
131 | printf OUT "%s %8.4f %8.4f\n", $ids[ $i ], $mean, sqrt( $std / ($nof_lines -1));
132 | seek( IN, 0, 0 );
133 | }
134 |
135 | close( IN );
136 | close( OUT );
137 |
138 | print "\nAll done.\n\n";
139 |
--------------------------------------------------------------------------------
/Electrostatic_Potential_Globular_Protein/3K5C-BACE_1.mol:
--------------------------------------------------------------------------------
1 | REMARK score -90.21
2 | LCcorina 10041815583D 1 1.00000 0.00000 0
3 | CORINA 4.00 0026 26.04.2017
4 | 87 88 0 0 0 0 999 V2000
5 | 25.9314 5.1125 17.3673 C 0 0 0 0 0 0 0 0 0 0 0 0
6 | 26.3958 5.7692 18.5087 C 0 0 0 0 0 0 0 0 0 0 0 0
7 | 27.6294 6.4115 18.5006 C 0 0 0 0 0 0 0 0 0 0 0 0
8 | 26.6868 5.1203 16.1800 C 0 0 0 0 0 0 0 0 0 0 0 0
9 | 27.9071 5.7803 16.1600 C 0 0 0 0 0 0 0 0 0 0 0 0
10 | 28.3567 6.4294 17.3141 C 0 0 0 0 0 0 0 0 0 0 0 0
11 | 28.7339 5.7745 14.9144 C 0 0 0 0 0 0 0 0 0 0 0 0
12 | 28.2672 5.9839 13.7999 O 0 0 0 0 0 0 0 0 0 0 0 0
13 | 30.0161 5.5308 15.1565 N 0 0 0 0 0 0 0 0 0 0 0 0
14 | 31.1655 5.7229 14.2694 C 0 0 0 0 0 0 0 0 0 0 0 0
15 | 32.3013 6.5543 14.9475 C 0 0 0 0 0 0 0 0 0 0 0 0
16 | 31.7701 7.7059 15.6807 C 0 0 0 0 0 0 0 0 0 0 0 0
17 | 32.5417 8.1089 16.8492 C 0 0 0 0 0 0 0 0 0 0 0 0
18 | 31.7382 8.9416 17.8567 C 0 0 0 0 0 0 0 0 0 0 0 0
19 | 31.4978 8.5096 19.2845 C 0 0 0 0 0 0 0 0 0 0 0 0
20 | 30.6304 9.5192 20.0787 C 0 0 0 0 0 0 0 0 0 0 0 0
21 | 29.4018 8.8984 20.7758 C 0 0 0 0 0 0 0 0 0 0 0 0
22 | 28.4435 8.3191 19.8155 N 0 0 0 0 0 0 0 0 0 0 0 0
23 | 28.2301 6.9783 19.7605 C 0 0 0 0 0 0 0 0 0 0 0 0
24 | 28.6153 6.2012 20.6343 O 0 0 0 0 0 0 0 0 0 0 0 0
25 | 31.6361 8.9012 14.7208 C 0 0 0 0 0 0 0 0 0 0 0 0
26 | 27.5563 9.2855 19.1471 C 0 0 0 0 0 0 0 0 0 0 0 0
27 | 26.1995 9.3897 19.8666 C 0 0 0 0 0 0 0 0 0 0 0 0
28 | 24.7362 4.4553 17.3963 O 0 0 0 0 0 0 0 0 0 0 0 0
29 | 24.1183 4.4752 16.0315 C 0 0 0 0 0 0 0 0 0 0 0 0
30 | 22.5931 4.3157 16.1531 C 0 0 0 0 0 0 0 0 0 0 0 0
31 | 31.6697 4.3250 13.9202 C 0 0 0 0 0 0 0 0 0 0 0 0
32 | 31.9299 3.5877 15.1249 O 0 0 0 0 0 0 0 0 0 0 0 0
33 | 30.5990 3.5391 13.1173 C 0 0 0 0 0 0 0 0 0 0 0 0
34 | 31.2898 2.7953 11.9565 C 0 0 0 0 0 0 0 0 0 0 0 0
35 | 30.2806 1.9129 11.2208 C 0 0 0 0 0 0 0 0 0 0 0 0
36 | 31.8599 3.7873 10.9327 C 0 0 0 0 0 0 0 0 0 0 0 0
37 | 31.1890 4.7139 10.4842 O 0 0 0 0 0 0 0 0 0 0 0 0
38 | 33.0930 3.5508 10.5117 N 0 0 0 0 0 0 0 0 0 0 0 0
39 | 33.6869 4.3522 9.4692 C 0 0 0 0 0 0 0 0 0 0 0 0
40 | 34.4311 3.4314 8.5130 C 0 0 0 0 0 0 0 0 0 0 0 0
41 | 35.0024 4.1969 7.2880 C 0 0 0 0 0 0 0 0 0 0 0 0
42 | 36.2636 4.9971 7.6826 C 0 0 0 0 0 0 0 0 0 0 0 0
43 | 25.7958 5.7810 19.4067 H 0 0 0 0 0 0 0 0 0 0 0 0
44 | 26.3148 4.6169 15.2999 H 0 0 0 0 0 0 0 0 0 0 0 0
45 | 29.2986 6.9557 17.2660 H 0 0 0 0 0 0 0 0 0 0 0 0
46 | 30.1652 5.1352 16.1720 H 0 0 0 0 0 0 0 0 0 0 0 0
47 | 30.8093 6.2738 13.3865 H 0 0 0 0 0 0 0 0 0 0 0 0
48 | 32.8490 5.9165 15.6569 H 0 0 0 0 0 0 0 0 0 0 0 0
49 | 32.9940 6.9252 14.1777 H 0 0 0 0 0 0 0 0 0 0 0 0
50 | 30.8225 7.3438 16.1060 H 0 0 0 0 0 0 0 0 0 0 0 0
51 | 32.9033 7.2138 17.3764 H 0 0 0 0 0 0 0 0 0 0 0 0
52 | 33.4002 8.7179 16.5295 H 0 0 0 0 0 0 0 0 0 0 0 0
53 | 32.2194 9.9220 17.9885 H 0 0 0 0 0 0 0 0 0 0 0 0
54 | 30.7141 9.0833 17.4812 H 0 0 0 0 0 0 0 0 0 0 0 0
55 | 30.9772 7.5406 19.2916 H 0 0 0 0 0 0 0 0 0 0 0 0
56 | 32.4617 8.4118 19.8053 H 0 0 0 0 0 0 0 0 0 0 0 0
57 | 31.2407 9.9882 20.8646 H 0 0 0 0 0 0 0 0 0 0 0 0
58 | 30.2527 10.2947 19.3962 H 0 0 0 0 0 0 0 0 0 0 0 0
59 | 29.7308 8.0964 21.4529 H 0 0 0 0 0 0 0 0 0 0 0 0
60 | 28.8772 9.6737 21.3534 H 0 0 0 0 0 0 0 0 0 0 0 0
61 | 30.9754 8.6277 13.8849 H 0 0 0 0 0 0 0 0 0 0 0 0
62 | 32.6285 9.1711 14.3307 H 0 0 0 0 0 0 0 0 0 0 0 0
63 | 31.2086 9.7589 15.2607 H 0 0 0 0 0 0 0 0 0 0 0 0
64 | 28.0295 10.2785 19.1472 H 0 0 0 0 0 0 0 0 0 0 0 0
65 | 27.3787 8.9640 18.1102 H 0 0 0 0 0 0 0 0 0 0 0 0
66 | 25.7077 8.4058 19.8661 H 0 0 0 0 0 0 0 0 0 0 0 0
67 | 25.5624 10.1184 19.3440 H 0 0 0 0 0 0 0 0 0 0 0 0
68 | 26.3589 9.7195 20.9038 H 0 0 0 0 0 0 0 0 0 0 0 0
69 | 24.3476 5.4318 15.5392 H 0 0 0 0 0 0 0 0 0 0 0 0
70 | 24.5257 3.6475 15.4324 H 0 0 0 0 0 0 0 0 0 0 0 0
71 | 22.1522 4.2293 15.1491 H 0 0 0 0 0 0 0 0 0 0 0 0
72 | 22.1721 5.1941 16.6641 H 0 0 0 0 0 0 0 0 0 0 0 0
73 | 22.3643 3.4095 16.7332 H 0 0 0 0 0 0 0 0 0 0 0 0
74 | 32.6216 4.4157 13.3765 H 0 0 0 0 0 0 0 0 0 0 0 0
75 | 32.9038 3.6816 15.6277 H 0 0 0 0 0 0 0 0 0 0 0 0
76 | 30.1026 2.8145 13.7796 H 0 0 0 0 0 0 0 0 0 0 0 0
77 | 29.8526 4.2409 12.7169 H 0 0 0 0 0 0 0 0 0 0 0 0
78 | 32.0982 2.1846 12.3849 H 0 0 0 0 0 0 0 0 0 0 0 0
79 | 30.7877 1.3786 10.4039 H 0 0 0 0 0 0 0 0 0 0 0 0
80 | 29.4784 2.5408 10.8059 H 0 0 0 0 0 0 0 0 0 0 0 0
81 | 29.8497 1.1844 11.9235 H 0 0 0 0 0 0 0 0 0 0 0 0
82 | 33.6657 2.7302 10.9684 H 0 0 0 0 0 0 0 0 0 0 0 0
83 | 32.8970 4.8943 8.9284 H 0 0 0 0 0 0 0 0 0 0 0 0
84 | 34.3873 5.0738 9.9152 H 0 0 0 0 0 0 0 0 0 0 0 0
85 | 35.2706 2.9557 9.0410 H 0 0 0 0 0 0 0 0 0 0 0 0
86 | 33.7447 2.6560 8.1419 H 0 0 0 0 0 0 0 0 0 0 0 0
87 | 35.2699 3.4787 6.4991 H 0 0 0 0 0 0 0 0 0 0 0 0
88 | 34.2437 4.8957 6.9058 H 0 0 0 0 0 0 0 0 0 0 0 0
89 | 36.0008 5.7418 8.4482 H 0 0 0 0 0 0 0 0 0 0 0 0
90 | 36.6653 5.5090 6.7957 H 0 0 0 0 0 0 0 0 0 0 0 0
91 | 37.0228 4.3100 8.0846 H 0 0 0 0 0 0 0 0 0 0 0 0
92 | 1 2 2 0 0 0 0
93 | 1 4 1 0 0 0 0
94 | 1 24 1 0 0 0 0
95 | 2 3 1 0 0 0 0
96 | 2 39 1 0 0 0 0
97 | 3 6 2 0 0 0 0
98 | 3 19 1 0 0 0 0
99 | 4 5 2 0 0 0 0
100 | 4 40 1 0 0 0 0
101 | 5 6 1 0 0 0 0
102 | 5 7 1 0 0 0 0
103 | 6 41 1 0 0 0 0
104 | 7 8 2 0 0 0 0
105 | 7 9 1 0 0 0 0
106 | 9 10 1 0 0 0 0
107 | 9 42 1 0 0 0 0
108 | 10 11 1 0 0 0 0
109 | 10 27 1 0 0 0 0
110 | 10 43 1 0 0 0 0
111 | 11 12 1 0 0 0 0
112 | 11 44 1 0 0 0 0
113 | 11 45 1 0 0 0 0
114 | 12 13 1 0 0 0 0
115 | 12 21 1 0 0 0 0
116 | 12 46 1 0 0 0 0
117 | 13 14 1 0 0 0 0
118 | 13 47 1 0 0 0 0
119 | 13 48 1 0 0 0 0
120 | 14 15 1 0 0 0 0
121 | 14 49 1 0 0 0 0
122 | 14 50 1 0 0 0 0
123 | 15 16 1 0 0 0 0
124 | 15 51 1 0 0 0 0
125 | 15 52 1 0 0 0 0
126 | 16 17 1 0 0 0 0
127 | 16 53 1 0 0 0 0
128 | 16 54 1 0 0 0 0
129 | 17 18 1 0 0 0 0
130 | 17 55 1 0 0 0 0
131 | 17 56 1 0 0 0 0
132 | 18 19 1 0 0 0 0
133 | 18 22 1 0 0 0 0
134 | 19 20 2 0 0 0 0
135 | 21 57 1 0 0 0 0
136 | 21 58 1 0 0 0 0
137 | 21 59 1 0 0 0 0
138 | 22 23 1 0 0 0 0
139 | 22 60 1 0 0 0 0
140 | 22 61 1 0 0 0 0
141 | 23 62 1 0 0 0 0
142 | 23 63 1 0 0 0 0
143 | 23 64 1 0 0 0 0
144 | 24 25 1 0 0 0 0
145 | 25 26 1 0 0 0 0
146 | 25 65 1 0 0 0 0
147 | 25 66 1 0 0 0 0
148 | 26 67 1 0 0 0 0
149 | 26 68 1 0 0 0 0
150 | 26 69 1 0 0 0 0
151 | 27 28 1 0 0 0 0
152 | 27 29 1 0 0 0 0
153 | 27 70 1 0 0 0 0
154 | 28 71 1 0 0 0 0
155 | 29 30 1 0 0 0 0
156 | 29 72 1 0 0 0 0
157 | 29 73 1 0 0 0 0
158 | 30 31 1 0 0 0 0
159 | 30 32 1 0 0 0 0
160 | 30 74 1 0 0 0 0
161 | 31 75 1 0 0 0 0
162 | 31 76 1 0 0 0 0
163 | 31 77 1 0 0 0 0
164 | 32 33 2 0 0 0 0
165 | 32 34 1 0 0 0 0
166 | 34 35 1 0 0 0 0
167 | 34 78 1 0 0 0 0
168 | 35 36 1 0 0 0 0
169 | 35 79 1 0 0 0 0
170 | 35 80 1 0 0 0 0
171 | 36 37 1 0 0 0 0
172 | 36 81 1 0 0 0 0
173 | 36 82 1 0 0 0 0
174 | 37 38 1 0 0 0 0
175 | 37 83 1 0 0 0 0
176 | 37 84 1 0 0 0 0
177 | 38 85 1 0 0 0 0
178 | 38 86 1 0 0 0 0
179 | 38 87 1 0 0 0 0
180 | M END
181 | $$$$
182 |
--------------------------------------------------------------------------------
/Electrostatic_Potential_Globular_Protein/BACE_pocket_EP2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/Electrostatic_Potential_Globular_Protein/BACE_pocket_EP2.png
--------------------------------------------------------------------------------
/Electrostatic_Potential_Globular_Protein/README.md:
--------------------------------------------------------------------------------
1 | # CALCULATE AND VISUALIZE THE ELECTROSTATIC POTENTIAL OF A GLOBULAR PROTEIN
2 |
3 | #### NOTE: I strongly recommend installing the latest versions of PDB2PQR and APBS from [https://github.com/Electrostatics/apbs-pdb2pqr.git](https://github.com/Electrostatics/apbs-pdb2pqr.git).
4 |
5 | In this example we will calculate the electrostatic potential (**EP**) of BACE protease (beta-secretase 1), one of the targets of [D3R Grand Challenge 2018](https://drugdesigndata.org/about/grand-challenge-4).
6 | All compounds in the BACE free energy set of D3R Challenge 2018 had a charge of +2 at the assay pH value of 4.5. Therefore we shall calculate the electrostatic potential at this pH.
7 | ```
8 | pH=4.5
9 | pdbID=3K5C-BACE
10 | pdb2pqr.py --ff=CHARMM --apbs-input --with-ph=$pH --chain ${pdbID}.pdb ${pdbID}.pH${pH}.pqr
11 | ```
12 | this will create an input file for APBS named '3K5C-BACE.pH4.5.in'
13 |
14 | Launch APBS
15 | ```
16 | apbs ${pdbID}.pH${pH}.in --output-file=${pdbID}.pH${pH}_APBS.log
17 | ```
18 | this will create a density file with the electrostatic potential named '3K5C-BACE.pH4.5.pqr.dx'
19 |
20 |
21 | To view the EP on the protein surface load the receptor pqr file `3K5C-BACE.pH4.5.pqr` on [UCSF Chimera](https://www.cgl.ucsf.edu/chimera/) and display the surface by **Actions->Surface->Show**. If that fails try to adjust the vdw raddi by:
22 | ```
23 | vdwdefine +.05
24 | ```
25 | or to add hydrogens with the command
26 | ```
27 | addh
28 | ```
29 | and then select **Surface/Binding Analysis->Electrostatic Surface Coloring**, and under **potential file** select the .dx file. I will leave the default values, namely the surface will be colored with a color scale from -10 eV to 10 eV, with red representing negative charge, white neutral and blue positive. The charge value shown is that on the solvent
30 | accessible surface of the protein, namely 1.4 Å far from the surface.
31 |
32 | Now we will try to focus on the binding pocket. Load the ligand file `3K5C-BACE_1.mol` and use the **Tools->Depiction->Per-Model Clipping** utility to make tomographies of the EP map while mentaining the cartoon representation of the protein. Select as **model** the `MSMS main surface...` and experiment with **Enable clipping** and **Adjust clipping with mouse as below** options both activated. Use the middle and right mouse buttons to rotate and shift the clipping place, respectively.
33 | Optionally, you can create a nice cartoon representation of the protein and ligand:
34 | ```
35 | ribspline cardinal smoothing both stiffness 0.8
36 | ribrepr edged
37 | set silhouette
38 | set silhouette_width 2
39 | set dcstart 0.3
40 | background solid white
41 | ```
42 | In the case of BACE, the binding pocket is covered by a a loop ("beta turn") therefore it is better to hide the ribbons for clarity.
43 | ```
44 | ~ribbon
45 | ```
46 | Now we are ready to write an image file. Go to **File->Image**, select the image name and type and save it!
47 |
48 | 
49 |
50 |
51 | Once your're done, you can save your session at **File->Save Session as**. When you reopen it with Chimera you might get an error saying:
52 | ```
53 | "Could not restore surface color on surface model with id 0.1 because volume used in coloring was not restored."
54 | ```
55 | To fix it simply go to **Surface/Binding Analysis->Electrostatic Surface Coloring** and click on **color** button.
56 |
57 |
58 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Thomas Evangelidis
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Morphing_Conformations/README.md:
--------------------------------------------------------------------------------
1 | Morphing protein-ligand conformations with UCSF Chimera
2 | ==========================
3 |
4 | In this tutorial we will do simple morphing between 2 protein-ligand complex conformations, but using the same steps
5 | we can use even more structures.
6 |
7 | ```python
8 | open data/state1.pdb
9 | open data/state2.pdb
10 | del :Na+
11 | del :WAT
12 | ribspline cardinal smoothing both stiffness 0.8
13 | ribrepr edged
14 | set silhouette
15 | set silhouette_width 2
16 | set dcstart 0.3
17 | # under Tools->Depiction->Ribbon Style Editor, set Coil width and height to 0.15. The rest look fine.
18 | sel ~:LIG
19 | namesel rec
20 | sel :LIG
21 | namesel lig
22 | color pink rec
23 | color green lig
24 | zonesel lig 3.5 rec
25 | namesel pocket
26 | ~display rec
27 | display pocket
28 | background solid white
29 |
30 | # then Tools->Structure Comparison->Morph Conformations, add the two models, increase the number of intermediate conformations (e.g. 60),
31 | # do NOT select minimize, click create. Alternatively use the following command lines:
32 | morph start #0 name holo2apo frames 60
33 | morph interpolate #1 name holo2apo
34 | morph movie name holo2apo nogui true minimize false
35 |
36 | ```
37 |
38 |
--------------------------------------------------------------------------------
/Multilayer_Perceptron_Keras.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/Multilayer_Perceptron_Keras.tar.gz
--------------------------------------------------------------------------------
/Multilayer_Perceptron_Keras/.ipynb_checkpoints/MLP_Keras-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "#Tutorial 1: Multi-Layer Perceptron with Keras"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "##Objectives:"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {
20 | "collapsed": true
21 | },
22 | "source": [
23 | "In this tutorial you will learn how to construct a simple Multi-Layer Perceptron model with Keras. Specifically you will learn to:\n",
24 | "* Create and add layers including weight initialization and activation.\n",
25 | "* Compile models including optimization method, loss function and metrics.\n",
26 | "* Fit models include epochs and batch size\n",
27 | "* Model predictions.\n",
28 | "* Summarize the model."
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 1,
34 | "metadata": {},
35 | "outputs": [
36 | {
37 | "name": "stderr",
38 | "output_type": "stream",
39 | "text": [
40 | "/home/thomas/Programs/Anaconda2/lib/python2.7/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
41 | " from ._conv import register_converters as _register_converters\n",
42 | "Using TensorFlow backend.\n"
43 | ]
44 | }
45 | ],
46 | "source": [
47 | "import numpy as np\n",
48 | "from keras.models import Sequential\n",
49 | "from keras.layers import Dense\n",
50 | "from keras.wrappers.scikit_learn import KerasRegressor\n",
51 | "from sklearn.model_selection import cross_val_score, KFold, train_test_split\n",
52 | "from sklearn.preprocessing import StandardScaler\n",
53 | "from sklearn.pipeline import Pipeline\n",
54 | "from rdkit import Chem, DataStructs\n",
55 | "from rdkit.Chem import AllChem, Descriptors"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {},
61 | "source": [
62 | "#### Reading molecules and activity from SDF"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 2,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "fname = \"data/cdk2.sdf\"\n",
72 | "\n",
73 | "mols = []\n",
74 | "y = []\n",
75 | "for mol in Chem.SDMolSupplier(fname):\n",
76 | " if mol is not None:\n",
77 | " mols.append(mol)\n",
78 | " y.append(float(mol.GetProp(\"pIC50\")))"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "metadata": {},
84 | "source": [
85 | "#### Calculate descriptors (fingerprints) and convert them into numpy array"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": 3,
91 | "metadata": {},
92 | "outputs": [],
93 | "source": [
94 | "# generate binary Morgan fingerprint with radius 2\n",
95 | "fp = [AllChem.GetMorganFingerprintAsBitVect(m, 2) for m in mols]"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 4,
101 | "metadata": {},
102 | "outputs": [],
103 | "source": [
104 | "def rdkit_numpy_convert(fp):\n",
105 | " output = []\n",
106 | " for f in fp:\n",
107 | " arr = np.zeros((1,))\n",
108 | " DataStructs.ConvertToNumpyArray(f, arr)\n",
109 | " output.append(arr)\n",
110 | " return np.asarray(output)"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 5,
116 | "metadata": {},
117 | "outputs": [],
118 | "source": [
119 | "x = rdkit_numpy_convert(fp)"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": 6,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": [
128 | "# fix random seed for reproducibility\n",
129 | "seed = 2019\n",
130 | "np.random.seed(seed)\n",
131 | "\n",
132 | "# randomly select 20% of compounds as test set\n",
133 | "x_tr, x_ts, y_tr, y_ts = train_test_split(x, y, test_size=0.20, random_state=seed)"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": 7,
139 | "metadata": {},
140 | "outputs": [
141 | {
142 | "name": "stdout",
143 | "output_type": "stream",
144 | "text": [
145 | "# molecules for training = 348, # of features = 2048\n",
146 | "\n"
147 | ]
148 | }
149 | ],
150 | "source": [
151 | "mol_num, feat_num = x_tr.shape\n",
152 | "print(\"# molecules for training = %i, # of features = %i\\n\" % (mol_num, feat_num))"
153 | ]
154 | },
155 | {
156 | "cell_type": "markdown",
157 | "metadata": {},
158 | "source": [
159 | "We can create Keras models and evaluate them with scikit-learn by using handy wrapper objects provided by the Keras library. This is desirable, because scikit-learn excels at evaluating models and will allow us to use powerful data preparation and model evaluation schemes with very few lines of code.\n",
160 | "\n",
161 | "The Keras wrappers require a function as an argument. This function that we must define is responsible for creating the neural network model to be evaluated.\n",
162 | "\n",
163 | "Below we define the function to create a simple MLP regressor that has a single fully connected hidden layer with the same number of neurons as input attributes (13). The network uses the rectifier activation function for the hidden layer. No activation function is used for the output layer because it is a regression problem and we are interested in predicting numerical values directly without transform.\n",
164 | "\n",
165 | "The efficient ADAM optimization algorithm is used and a mean squared error loss function is optimized. This will be the same metric that we will use to evaluate the performance of the model. It is a desirable metric because by taking the square root gives us an error value we can directly understand in the context of the problem (kcal)."
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": 8,
171 | "metadata": {},
172 | "outputs": [],
173 | "source": [
174 | "\n",
175 | "# define the first MLP regressor model\n",
176 | "def MLP_model1(sample_num, feat_num):\n",
177 | "\t# create model\n",
178 | "\tmodel = Sequential()\n",
179 | "\tmodel.add(Dense(sample_num, input_dim=feat_num, kernel_initializer='normal', activation='relu'))\n",
180 | "\tmodel.add(Dense(1, kernel_initializer='normal'))\n",
181 | "\t# Compile model\n",
182 | "\tmodel.compile(loss='mean_squared_error', optimizer='adam')\n",
183 | "\treturn model\n"
184 | ]
185 | },
186 | {
187 | "cell_type": "markdown",
188 | "metadata": {},
189 | "source": [
190 | "The Keras wrapper object for use in scikit-learn as a regression estimator is called KerasRegressor. We create an instance and pass it both the name of the function to create the neural network model as well as some parameters to pass along to the fit() function of the model later, such as the number of epochs and batch size.\n",
191 | "\n",
192 | "We also initialize the random number generator with a constant random seed, a process we will repeat for each model evaluated in this tutorial. This is an attempt to ensure we compare models consistently."
193 | ]
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": 9,
198 | "metadata": {},
199 | "outputs": [],
200 | "source": [
201 | "# evaluate model with standardized dataset\n",
202 | "estimator = KerasRegressor(build_fn=MLP_model1, sample_num=mol_num, feat_num=feat_num, epochs=10, batch_size=2, verbose=0)"
203 | ]
204 | },
205 | {
206 | "cell_type": "markdown",
207 | "metadata": {},
208 | "source": [
209 | "The final step is to evaluate this baseline model. We will use 10-fold cross validation to evaluate the model."
210 | ]
211 | },
212 | {
213 | "cell_type": "code",
214 | "execution_count": 14,
215 | "metadata": {},
216 | "outputs": [
217 | {
218 | "ename": "ValueError",
219 | "evalue": "scoring value should either be a callable, string or None. {'tau': , 'MSE': 'mean_squared_error', 'r2': 'r2'} was passed",
220 | "output_type": "error",
221 | "traceback": [
222 | "\u001b[0;31m\u001b[0m",
223 | "\u001b[0;31mValueError\u001b[0mTraceback (most recent call last)",
224 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mkfold\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKFold\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_splits\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscoring\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mscorer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkfold\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 12\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;31m#print(\"Results: %.2f (%.2f) MSE\" % (results.mean(), results.std()))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
225 | "\u001b[0;32m/home/thomas/Programs/Anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_validation.pyc\u001b[0m in \u001b[0;36mcross_val_score\u001b[0;34m(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch)\u001b[0m\n\u001b[1;32m 347\u001b[0m \"\"\"\n\u001b[1;32m 348\u001b[0m \u001b[0;31m# To ensure multimetric format is not supported\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 349\u001b[0;31m \u001b[0mscorer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_scoring\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscoring\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mscoring\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 350\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 351\u001b[0m cv_results = cross_validate(estimator=estimator, X=X, y=y, groups=groups,\n",
226 | "\u001b[0;32m/home/thomas/Programs/Anaconda2/lib/python2.7/site-packages/sklearn/metrics/scorer.pyc\u001b[0m in \u001b[0;36mcheck_scoring\u001b[0;34m(estimator, scoring, allow_none)\u001b[0m\n\u001b[1;32m 303\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 304\u001b[0m raise ValueError(\"scoring value should either be a callable, string or\"\n\u001b[0;32m--> 305\u001b[0;31m \" None. %r was passed\" % scoring)\n\u001b[0m\u001b[1;32m 306\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 307\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
227 | "\u001b[0;31mValueError\u001b[0m: scoring value should either be a callable, string or None. {'tau': , 'MSE': 'mean_squared_error', 'r2': 'r2'} was passed"
228 | ]
229 | }
230 | ],
231 | "source": [
232 | "def kendalls_tau(estimator, X, y):\n",
233 | " from scipy.stats import kendalltau, pearsonr\n",
234 | " preds = estimator.predict(X)\n",
235 | " t = kendalltau(preds, y)[0]\n",
236 | " return t\n",
237 | "\n",
238 | "scorer = {'r2':'r2', 'MSE':'mean_squared_error'}\n",
239 | " \n",
240 | "\n",
241 | "kfold = KFold(n_splits=2, random_state=seed)\n",
242 | "results = cross_val_score(estimator, x, y, scoring=scorer, cv=kfold)\n",
243 | "print results\n",
244 | "#print(\"Results: %.2f (%.2f) MSE\" % (results.mean(), results.std()))"
245 | ]
246 | },
247 | {
248 | "cell_type": "markdown",
249 | "metadata": {},
250 | "source": [
251 | "Running this code gives us an estimate of the model’s performance on the problem for unseen data. The result reports the mean squared error including the average and standard deviation (average variance) across all 10 folds of the cross validation evaluation.\n",
252 | "\n"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": null,
258 | "metadata": {},
259 | "outputs": [],
260 | "source": []
261 | }
262 | ],
263 | "metadata": {
264 | "kernelspec": {
265 | "display_name": "Python [default]",
266 | "language": "python",
267 | "name": "python2"
268 | },
269 | "language_info": {
270 | "codemirror_mode": {
271 | "name": "ipython",
272 | "version": 2
273 | },
274 | "file_extension": ".py",
275 | "mimetype": "text/x-python",
276 | "name": "python",
277 | "nbconvert_exporter": "python",
278 | "pygments_lexer": "ipython2",
279 | "version": "2.7.15"
280 | }
281 | },
282 | "nbformat": 4,
283 | "nbformat_minor": 1
284 | }
285 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | I will gradually open source and publish various scripts and tutorials from my personal archive about MD setup and analysis, protein-ligand docking, machine learning and lots of other interesting things related to Computational Chemistry and Drug Design.
2 |
3 | Follow me on [twitter](https://twitter.com/tevangelidis) or on [LinkedIn](https://www.linkedin.com/in/thomas-evangelidis-495b45125/) to get regular updates.
4 |
--------------------------------------------------------------------------------
/ROC_curves/.Rhistory:
--------------------------------------------------------------------------------
1 | "data/SF3.scores",
2 | "data/SF4.scores",
3 | "data/SF5.scores"
4 | )
5 | x = read.table(RESULTS_FILES[1], header = TRUE)
6 | valid_molnames <- as.vector(x[1])
7 | for (i in 2:length(RESULTS_FILES)) {
8 | y = read.table(RESULTS_FILES[i], header = TRUE)
9 | print(head(y[1]))
10 | valid_molnames <- intersect(valid_molnames, as.vector(y[1]))
11 | }
12 | valid_molnames
13 | x = read.table(RESULTS_FILES[1], header = TRUE)
14 | valid_molnames <- x[1]
15 | i=2
16 | y = read.table(RESULTS_FILES[i], header = TRUE)
17 | valid_molnames <- intersect(valid_molnames, y[1])
18 | valid_molnames
19 | x = read.table(RESULTS_FILES[1], header = TRUE)
20 | valid_molnames <- x[1]
21 | #for (i in 2:length(RESULTS_FILES)) {
22 | i=2
23 | y = read.table(RESULTS_FILES[i], header = TRUE)
24 | valid_molnames <- intersect(valid_molnames, y[1])
25 | #}
26 | i=3
27 | y = read.table(RESULTS_FILES[i], header = TRUE)
28 | valid_molnames <- intersect(valid_molnames, y[1])
29 | i=4
30 | y = read.table(RESULTS_FILES[i], header = TRUE)
31 | valid_molnames <- intersect(valid_molnames, y[1])
32 | i=5
33 | y = read.table(RESULTS_FILES[i], header = TRUE)
34 | valid_molnames <- intersect(valid_molnames, y[1])
35 | valid_molnames
36 | x = read.table(RESULTS_FILES[1], header = TRUE)
37 | valid_molnames <- x[1]
38 | #for (i in 2:length(RESULTS_FILES)) {
39 | i=2
40 | y = read.table(RESULTS_FILES[i], header = TRUE)
41 | valid_molnames <- intersect(valid_molnames, y[1])
42 | valid_molnames
43 | x = read.table(RESULTS_FILES[1], header = TRUE)
44 | valid_molnames <- x[1]
45 | #for (i in 2:length(RESULTS_FILES)) {
46 | i=2
47 | y = read.table(RESULTS_FILES[i], header = TRUE)
48 | valid_molnames <- intersect(valid_molnames, y[1])
49 | valid_molnames
50 | y
51 | valid_molnames <- c()
52 | for (i in 1:length(RESULTS_FILES)) {
53 | x = read.table(RESULTS_FILES[i], header = TRUE)
54 | valid_molnames <- c(valid_molnames, x[1])
55 | }
56 | valid_molnames
57 | length(valid_molnames)
58 | valid_molnames <- c()
59 | for (i in 1:length(RESULTS_FILES)) {
60 | x = read.table(RESULTS_FILES[i], header = TRUE)
61 | valid_molnames <- c(valid_molnames, as.vector(x[1]))
62 | }
63 | length(valid_molnames)
64 | valid_molnames[1]
65 | valid_molnames[2]
66 | c(c(), c(1,2,3))
67 | v <- c(c(), c(1,2,3))
68 | v
69 | valid_molnames <- vector()
70 | for (i in 1:length(RESULTS_FILES)) {
71 | x = read.table(RESULTS_FILES[i], header = TRUE)
72 | valid_molnames <- c(valid_molnames, as.vector(x[1]))
73 | }
74 | valid_molnames[2]
75 | valid_molnames <- vector()
76 | for (i in 1:length(RESULTS_FILES)) {
77 | x = read.table(RESULTS_FILES[i], header = TRUE)
78 | valid_molnames <- c(valid_molnames, as.vector(x[1]))
79 | }
80 | valid_molnames
81 | length(valid_molnames)
82 | valid_molnames <- vector()
83 | for (i in 1:length(RESULTS_FILES)) {
84 | x = read.table(RESULTS_FILES[i], header = TRUE)
85 | colnames(x)[1] = "molname"
86 | colnames(x)[2] = "score"
87 | valid_molnames <- c(valid_molnames, as.vector(x$molname))
88 | }
89 | valid_molnames
90 | x = read.table(RESULTS_FILES[1], header = TRUE)
91 | colnames(x)[1] = "molname"
92 | valid_molnames <- x$molname
93 | for (i in 2:length(RESULTS_FILES)) {
94 | x = read.table(RESULTS_FILES[i], header = TRUE)
95 | colnames(x)[1] = "molname"
96 | valid_molnames <- intersect(valid_molnames, x$molname)
97 | }
98 | valid_molnames
99 | length(valid_molnames)
100 | x = read.table(RESULTS_FILES[1], header = TRUE)
101 | colnames(x)[1] = "molname"
102 | valid_molnames <- x$molname
103 | vvalid_molnames
104 | length(valid_molnames)
105 | x = read.table(RESULTS_FILES[1], header = TRUE)
106 | colnames(x)[1] = "molname"
107 | valid_molnames <- x$molname
108 | length(valid_molnames)
109 | x = read.table(RESULTS_FILES[1], header = TRUE)
110 | RESULTS_FILES = c("data/SF1.scores",
111 | "data/SF2.scores",
112 | "data/SF3.scores",
113 | "data/SF4.scores",
114 | "data/SF5.scores"
115 | )
116 | x = read.table(RESULTS_FILES[1], header = TRUE)
117 | colnames(x)[1] = "molname"
118 | valid_molnames <- x$molname
119 | length(valid_molnames)
120 | x = read.table(RESULTS_FILES[1], header = TRUE)
121 | colnames(x)[1] = "molname"
122 | valid_molnames <- unique(sort(x$molname))
123 | length(valid_molnames)
124 | x = read.table(RESULTS_FILES[1], header = TRUE)
125 | colnames(x)[1] = "molname"
126 | valid_molnames <- unique(sort(x$molname)) ; # unique molnames
127 | for (i in 2:length(RESULTS_FILES)) {
128 | x = read.table(RESULTS_FILES[i], header = TRUE)
129 | colnames(x)[1] = "molname"
130 | valid_molnames <- intersect(valid_molnames, x$molname)
131 | }
132 | length(valid_molnames)
133 | RESULTS_FILES = c("data/SF1.scores",
134 | "data/SF2.scores",
135 | "data/SF3.scores",
136 | "data/SF4.scores",
137 | "data/SF5.scores"
138 | )
139 | valid_molnames <- common_molnames(RESULTS_FILES) ; # molnames with scores by all scoring functions
140 | # You could also have several functions in a single R file and still document them separately. Simply put an identifier
141 | # starting with ## ---- before each function definition and then create empty chunks referring to each one of the identifiers.
142 | library("ROCR")
143 | library("hash")
144 | ## ---- find the molnames that are common in all score files in order to compare the scoring functions correctly
145 | common_molnames <- function(RESULTS_FILES) {
146 | x = read.table(RESULTS_FILES[1], header = TRUE)
147 | colnames(x)[1] = "molname"
148 | valid_molnames <- unique(sort(x$molname)) ; # unique molnames
149 | for (i in 2:length(RESULTS_FILES)) {
150 | x = read.table(RESULTS_FILES[i], header = TRUE)
151 | colnames(x)[1] = "molname"
152 | valid_molnames <- intersect(valid_molnames, x$molname)
153 | }
154 | }
155 | ## ---- read_scores
156 | read_scores <- function(RESULTS_FILE, ACTIVITIE_FILE, valid_molnames) {
157 | "
158 | The valid_molnames list ensures that only the molnames that were scored by all scoring functions will
159 | be considered.
160 | "
161 | x = read.table(RESULTS_FILE, header = TRUE)
162 | colnames(x)[2] = "score"
163 | # ignore the other columns
164 | score_dict = hash()
165 | for (i in seq(1, nrow(x))) { score_dict[x[i,1]] <- x[i,2] }
166 | a = read.table(ACTIVITIES_FILE)
167 | colnames(a)[1] = "molname"
168 | colnames(a)[2] = "label"
169 | label_dict = hash()
170 | for (i in seq(1, nrow(a))) { label_dict[a[i,1]] <- a[i,2] }
171 | scores <- rep(0, length(valid_molnames))
172 | labels <- rep("", length(valid_molnames)) ; # initialize labels to a list of size(scores) but without contents
173 | i = 1
174 | for (molname in valid_molnames) {
175 | scores[i] <- score_dict[[molname]]
176 | labels[i] <- label_dict[[molname]]
177 | i <- i+1
178 | }
179 | pred = prediction(-1*scores, labels) ;# IMPORTANT: -1* because the function prediction() assumes that the highest the score the better
180 | return(pred)
181 | }
182 | RESULTS_FILES = c("data/SF1.scores",
183 | "data/SF2.scores",
184 | "data/SF3.scores",
185 | "data/SF4.scores",
186 | "data/SF5.scores"
187 | )
188 | valid_molnames <- common_molnames(RESULTS_FILES) ; # molnames with scores by all scoring functions
189 | ACTIVITIES_FILE = "data/activities"
190 | NAMES <- c("Scoring Function 1", "Sscoring Function 2", "Scoring Function 3", "Scoring Function 4",
191 | "Scoring Function 5")
192 | par(cex.main=2.0, cex.lab=1.5) ; # <== CHANGE ME
193 | library("ROCR")
194 | library("hash")
195 | COLORS <- rainbow(3*length(RESULTS_FILES))
196 | COLORS <- COLORS[seq(3, length(COLORS), 3)]
197 | for (i in 1:length(RESULTS_FILES)) {
198 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames)
199 | perf = performance(pred, 'tpr', 'fpr')
200 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") }
201 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) }
202 | # plot(perf2, add = TRUE, colorize = TRUE, lwd=3)
203 | # plot(perf3, add = TRUE, colorize = TRUE, lwd=3)
204 | }
205 | # You could also have several functions in a single R file and still document them separately. Simply put an identifier
206 | # starting with ## ---- before each function definition and then create empty chunks referring to each one of the identifiers.
207 | library("ROCR")
208 | library("hash")
209 | ## ---- find the molnames that are common in all score files in order to compare the scoring functions correctly
210 | common_molnames <- function(RESULTS_FILES) {
211 | x = read.table(RESULTS_FILES[1], header = TRUE)
212 | colnames(x)[1] = "molname"
213 | valid_molnames <- unique(sort(x$molname)) ; # unique molnames
214 | for (i in 2:length(RESULTS_FILES)) {
215 | x = read.table(RESULTS_FILES[i], header = TRUE)
216 | colnames(x)[1] = "molname"
217 | valid_molnames <- intersect(valid_molnames, x$molname)
218 | }
219 | }
220 | ## ---- read_scores
221 | read_scores <- function(RESULTS_FILE, ACTIVITIES_FILE, valid_molnames) {
222 | "
223 | The valid_molnames list ensures that only the molnames that were scored by all scoring functions will
224 | be considered.
225 | "
226 | x = read.table(RESULTS_FILE, header = TRUE)
227 | colnames(x)[2] = "score"
228 | # ignore the other columns
229 | score_dict = hash()
230 | for (i in seq(1, nrow(x))) { score_dict[x[i,1]] <- x[i,2] }
231 | a = read.table(ACTIVITIES_FILE)
232 | colnames(a)[1] = "molname"
233 | colnames(a)[2] = "label"
234 | label_dict = hash()
235 | for (i in seq(1, nrow(a))) { label_dict[a[i,1]] <- a[i,2] }
236 | scores <- rep(0, length(valid_molnames))
237 | labels <- rep("", length(valid_molnames)) ; # initialize labels to a list of size(scores) but without contents
238 | i = 1
239 | for (molname in valid_molnames) {
240 | scores[i] <- score_dict[[molname]]
241 | labels[i] <- label_dict[[molname]]
242 | i <- i+1
243 | }
244 | pred = prediction(-1*scores, labels) ;# IMPORTANT: -1* because the function prediction() assumes that the highest the score the better
245 | return(pred)
246 | }
247 | for (i in 1:length(RESULTS_FILES)) {
248 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames)
249 | perf = performance(pred, 'tpr', 'fpr')
250 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") }
251 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) }
252 | # plot(perf2, add = TRUE, colorize = TRUE, lwd=3)
253 | # plot(perf3, add = TRUE, colorize = TRUE, lwd=3)
254 | }
255 | i
256 | RESULTS_FILES[i]
257 | ACTIVITIES_FILE
258 | getwd(\)
259 | getwd()
260 | valid_molnames
261 | # You could also have several functions in a single R file and still document them separately. Simply put an identifier
262 | # starting with ## ---- before each function definition and then create empty chunks referring to each one of the identifiers.
263 | library("ROCR")
264 | library("hash")
265 | ## ---- find the molnames that are common in all score files in order to compare the scoring functions correctly
266 | common_molnames <- function(RESULTS_FILES) {
267 | x = read.table(RESULTS_FILES[1], header = TRUE)
268 | colnames(x)[1] = "molname"
269 | valid_molnames <- unique(sort(x$molname)) ; # unique molnames
270 | for (i in 2:length(RESULTS_FILES)) {
271 | x = read.table(RESULTS_FILES[i], header = TRUE)
272 | colnames(x)[1] = "molname"
273 | valid_molnames <- intersect(valid_molnames, x$molname)
274 | }
275 | return(valid_molnames)
276 | }
277 | ## ---- read_scores
278 | read_scores <- function(RESULTS_FILE, ACTIVITIES_FILE, valid_molnames) {
279 | "
280 | The valid_molnames list ensures that only the molnames that were scored by all scoring functions will
281 | be considered.
282 | "
283 | x = read.table(RESULTS_FILE, header = TRUE)
284 | colnames(x)[2] = "score"
285 | # ignore the other columns
286 | score_dict = hash()
287 | for (i in seq(1, nrow(x))) { score_dict[x[i,1]] <- x[i,2] }
288 | a = read.table(ACTIVITIES_FILE)
289 | colnames(a)[1] = "molname"
290 | colnames(a)[2] = "label"
291 | label_dict = hash()
292 | for (i in seq(1, nrow(a))) { label_dict[a[i,1]] <- a[i,2] }
293 | scores <- rep(0, length(valid_molnames))
294 | labels <- rep("", length(valid_molnames)) ; # initialize labels to a list of size(scores) but without contents
295 | i = 1
296 | for (molname in valid_molnames) {
297 | scores[i] <- score_dict[[molname]]
298 | labels[i] <- label_dict[[molname]]
299 | i <- i+1
300 | }
301 | pred = prediction(-1*scores, labels) ;# IMPORTANT: -1* because the function prediction() assumes that the highest the score the better
302 | return(pred)
303 | }
304 | valid_molnames <- common_molnames(RESULTS_FILES) ; # molnames with scores by all scoring functions
305 | valid_molnames
306 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames)
307 | valid_molnames
308 | ACTIVITIES_FILE
309 | RESULTS_FILE= RESULTS_FILES[i]
310 | ACTIVITIES_FILE
311 | valid_molnames
312 | x = read.table(RESULTS_FILE, header = TRUE)
313 | colnames(x)[2] = "score"
314 | # ignore the other columns
315 | score_dict = hash()
316 | for (i in seq(1, nrow(x))) { score_dict[x[i,1]] <- x[i,2] }
317 | score_dict
318 | a = read.table(ACTIVITIES_FILE)
319 | colnames(a)[1] = "molname"
320 | colnames(a)[2] = "label"
321 | label_dict = hash()
322 | for (i in seq(1, nrow(a))) { label_dict[a[i,1]] <- a[i,2] }
323 | scores <- rep(0, length(valid_molnames))
324 | labels <- rep("", length(valid_molnames)) ; # initialize labels to a list of size(scores) but without contents
325 | i = 1
326 | scores
327 | labels
328 | for (molname in valid_molnames) {
329 | scores[i] <- score_dict[[molname]]
330 | labels[i] <- label_dict[[molname]]
331 | i <- i+1
332 | }
333 | scores
334 | labels
335 | # You could also have several functions in a single R file and still document them separately. Simply put an identifier
336 | # starting with ## ---- before each function definition and then create empty chunks referring to each one of the identifiers.
337 | library("ROCR")
338 | library("hash")
339 | ## ---- find the molnames that are common in all score files in order to compare the scoring functions correctly
340 | common_molnames <- function(RESULTS_FILES) {
341 | x = read.table(RESULTS_FILES[1], header = TRUE)
342 | colnames(x)[1] = "molname"
343 | valid_molnames <- unique(sort(x$molname)) ; # unique molnames
344 | for (i in 2:length(RESULTS_FILES)) {
345 | x = read.table(RESULTS_FILES[i], header = TRUE)
346 | colnames(x)[1] = "molname"
347 | valid_molnames <- intersect(valid_molnames, x$molname)
348 | }
349 | return(valid_molnames)
350 | }
351 | ## ---- read_scores
352 | read_scores <- function(RESULTS_FILE, ACTIVITIES_FILE, valid_molnames) {
353 | "
354 | The valid_molnames list ensures that only the molnames that were scored by all scoring functions will
355 | be considered.
356 | "
357 | x = read.table(RESULTS_FILE, header = TRUE)
358 | colnames(x)[2] = "score"
359 | # ignore the other columns
360 | score_dict = hash()
361 | for (i in seq(1, nrow(x))) { score_dict[x[i,1]] <- x[i,2] }
362 | a = read.table(ACTIVITIES_FILE)
363 | colnames(a)[1] = "molname"
364 | colnames(a)[2] = "label"
365 | label_dict = hash()
366 | for (i in seq(1, nrow(a))) { label_dict[a[i,1]] <- a[i,2] }
367 | scores <- rep(0, length(valid_molnames))
368 | labels <- rep("", length(valid_molnames)) ; # initialize labels to a list of size(scores) but without contents
369 | i = 1
370 | for (molname in valid_molnames) {
371 | scores[i] <- score_dict[[molname]]
372 | labels[i] <- label_dict[[molname]]
373 | i <- i+1
374 | }
375 | pred = prediction(-1*scores, labels) ;# IMPORTANT: -1* because the function prediction() assumes that the highest the score the better
376 | return(pred)
377 | }
378 | RESULTS_FILES = c("data/SF1.scores",
379 | "data/SF2.scores",
380 | "data/SF3.scores",
381 | "data/SF4.scores",
382 | "data/SF5.scores"
383 | )
384 | valid_molnames <- common_molnames(RESULTS_FILES) ; # molnames with scores by all scoring functions
385 | length(valid_molnames)
386 | ACTIVITIES_FILE = "data/activities"
387 | NAMES <- c("Scoring Function 1", "Sscoring Function 2", "Scoring Function 3", "Scoring Function 4",
388 | "Scoring Function 5")
389 | par(cex.main=2.0, cex.lab=1.5) ; # <== CHANGE ME
390 | library("ROCR")
391 | library("hash")
392 | COLORS <- rainbow(3*length(RESULTS_FILES))
393 | COLORS <- COLORS[seq(3, length(COLORS), 3)]
394 | for (i in 1:length(RESULTS_FILES)) {
395 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames)
396 | perf = performance(pred, 'tpr', 'fpr')
397 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") }
398 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) }
399 | # plot(perf2, add = TRUE, colorize = TRUE, lwd=3)
400 | # plot(perf3, add = TRUE, colorize = TRUE, lwd=3)
401 | }
402 | AUCs = rep(0, length(RESULTS_FILES))
403 | LNAMES = rep(0, length(RESULTS_FILES))
404 | for (i in 1:length(RESULTS_FILES)) {
405 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILES[i])
406 | auc = performance(pred, measure='auc')
407 | AUCs[i] = sprintf("%.3f", auc@y.values)
408 | LNAMES[i] = paste(NAMES[i], "(", AUCs[i],")")
409 | # print(paste(NAMES[i], "AUC-ROC=", auc@y.values))
410 | }
411 | AUCs = rep(0, length(RESULTS_FILES))
412 | LNAMES = rep(0, length(RESULTS_FILES))
413 | for (i in 1:length(RESULTS_FILES)) {
414 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE)
415 | auc = performance(pred, measure='auc')
416 | AUCs[i] = sprintf("%.3f", auc@y.values)
417 | LNAMES[i] = paste(NAMES[i], "(", AUCs[i],")")
418 | # print(paste(NAMES[i], "AUC-ROC=", auc@y.values))
419 | }
420 | AUCs = rep(0, length(RESULTS_FILES))
421 | LNAMES = rep(0, length(RESULTS_FILES))
422 | for (i in 1:length(RESULTS_FILES)) {
423 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames)
424 | auc = performance(pred, measure='auc')
425 | AUCs[i] = sprintf("%.3f", auc@y.values)
426 | LNAMES[i] = paste(NAMES[i], "(", AUCs[i],")")
427 | # print(paste(NAMES[i], "AUC-ROC=", auc@y.values))
428 | }
429 | abline(a=0, b=1, lty=2, lwd=3, col="black")
430 | for (i in 1:length(RESULTS_FILES)) {
431 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames)
432 | perf = performance(pred, 'tpr', 'fpr')
433 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") }
434 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) }
435 | # plot(perf2, add = TRUE, colorize = TRUE, lwd=3)
436 | # plot(perf3, add = TRUE, colorize = TRUE, lwd=3)
437 | }
438 | AUCs = rep(0, length(RESULTS_FILES))
439 | LNAMES = rep(0, length(RESULTS_FILES))
440 | for (i in 1:length(RESULTS_FILES)) {
441 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames)
442 | auc = performance(pred, measure='auc')
443 | AUCs[i] = sprintf("%.3f", auc@y.values)
444 | LNAMES[i] = paste(NAMES[i], "(", AUCs[i],")")
445 | # print(paste(NAMES[i], "AUC-ROC=", auc@y.values))
446 | }
447 | abline(a=0, b=1, lty=2, lwd=3, col="black")
448 | pred
449 | for (i in 1:length(RESULTS_FILES)) {
450 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames)
451 | perf = performance(pred, 'tpr', 'fpr')
452 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") }
453 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) }
454 | }
455 | plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves")
456 | library("ROCR")
457 | library("hash")
458 | for (i in 1:length(RESULTS_FILES)) {
459 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames)
460 | perf = performance(pred, 'tpr', 'fpr')
461 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") }
462 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) }
463 | }
464 | AUCs = rep(0, length(RESULTS_FILES))
465 | LNAMES = rep(0, length(RESULTS_FILES))
466 | for (i in 1:length(RESULTS_FILES)) {
467 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames)
468 | auc = performance(pred, measure='auc')
469 | AUCs[i] = sprintf("%.3f", auc@y.values)
470 | LNAMES[i] = paste(NAMES[i], "(", AUCs[i],")")
471 | # print(paste(NAMES[i], "AUC-ROC=", auc@y.values))
472 | }
473 | abline(a=0, b=1, lty=2, lwd=3, col="black")
474 | for (i in 1:length(RESULTS_FILES)) {
475 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames)
476 | perf = performance(pred, 'tpr', 'fpr')
477 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") }
478 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) }
479 | }
480 | AUCs = rep(0, length(RESULTS_FILES))
481 | LNAMES = rep(0, length(RESULTS_FILES))
482 | for (i in 1:length(RESULTS_FILES)) {
483 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames)
484 | auc = performance(pred, measure='auc')
485 | AUCs[i] = sprintf("%.3f", auc@y.values)
486 | LNAMES[i] = paste(NAMES[i], "(", AUCs[i],")")
487 | # print(paste(NAMES[i], "AUC-ROC=", auc@y.values))
488 | }
489 | legend(0.62, 0.28, legend=LNAMES,
490 | col=COLORS, lty=1, cex=0.8)
491 | COLORS <- rainbow(3*length(RESULTS_FILES))
492 | COLORS <- COLORS[seq(3, length(COLORS), 3)]
493 | for (i in 1:length(RESULTS_FILES)) {
494 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames)
495 | perf = performance(pred, 'tpr', 'fpr')
496 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i], main="ROC Curves") }
497 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) }
498 | }
499 | AUCs = rep(0, length(RESULTS_FILES))
500 | LNAMES = rep(0, length(RESULTS_FILES))
501 | for (i in 1:length(RESULTS_FILES)) {
502 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames)
503 | auc = performance(pred, measure='auc')
504 | AUCs[i] = sprintf("%.3f", auc@y.values)
505 | LNAMES[i] = paste(NAMES[i], "(", AUCs[i],")")
506 | # print(paste(NAMES[i], "AUC-ROC=", auc@y.values))
507 | }
508 | abline(a=0, b=1, lty=2, lwd=3, col="black")
509 | # Add legend
510 | legend(0.62, 0.28, legend=LNAMES,
511 | col=COLORS, lty=1, cex=0.8)
512 | knitr::read_chunk("function_definitions.r")
513 |
--------------------------------------------------------------------------------
/ROC_curves/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Create ROC curves in R"
3 | author: "Thomas Evangelidis"
4 | date: "20/10/2019"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE)
10 | ```
11 |
12 | Paste below all your score files. The first two columns must be the molname and the score (**the lowest score the better, not the opposite!**). The code will ignore the rest of the columns, if they exist.
13 |
14 | ```{r scores}
15 | RESULTS_FILES = c("data/SF1.scores",
16 | "data/SF2.scores",
17 | "data/SF3.scores",
18 | "data/SF4.scores",
19 | "data/SF5.scores"
20 | )
21 |
22 | ```
23 |
24 | Paste below the file with molecular bioactivities of all molecules. First column must be the molname and the second its bioactivity (1 or 0).
25 |
26 | ```{r activities}
27 | ACTIVITIES_FILE = "data/activities"
28 | ```
29 |
30 | Write the names that will be shown in the legend.
31 | ```{r names}
32 | NAMES <- c("Scoring Function 1", "Sscoring Function 2", "Scoring Function 3", "Scoring Function 4",
33 | "Scoring Function 5")
34 | ```
35 |
36 | Define (i) a function to find the molnames that were scored successfully by all given scoring functions, and (ii) a function to read a score file and a file with the bioactivity of each molecule (0 or 1).
37 |
38 |
40 | ```{r,cache = FALSE, echo=FALSE}
41 | knitr::read_chunk("function_definitions.r")
42 | ```
43 |
44 |
45 | ```{r common_molnames}
46 | ```
47 | ```{r count_actives_inactives}
48 | ```
49 | ```{r read_scores}
50 | ```
51 |
52 | Load the required libraries and create ROC curve plots. Adjust the font sizes (first line below).
53 |
54 | ```{r plot}
55 | par(cex.main=2.0, cex.lab=1.5) ; # <== CHANGE ME
56 |
57 | library("ROCR")
58 | library("hash")
59 |
60 | COLORS <- rainbow(3*length(RESULTS_FILES))
61 | COLORS <- COLORS[seq(3, length(COLORS), 3)]
62 |
63 | valid_molnames <- common_molnames(RESULTS_FILES) ; # molnames with scores by all scoring functions; only these will be used for plotting
64 | # Count and print the number of actives and inactives that are common in all score files
65 | num <- count_actives_inactives(ACTIVITIES_FILE, valid_molnames)
66 | for (i in 1:length(RESULTS_FILES)) {
67 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames)
68 | perf = performance(pred, 'tpr', 'fpr')
69 | if (i==1) { plot(perf, add = FALSE, colorize = FALSE, lwd=3, col=COLORS[i],
70 | main=paste("ROC Curves (", num$actives, "actives,", num$inactives, "inactives)")) }
71 | else { plot(perf, add = TRUE, colorize = FALSE, lwd=3, col=COLORS[i]) }
72 | }
73 |
74 | AUCs = rep(0, length(RESULTS_FILES))
75 | LNAMES = rep(0, length(RESULTS_FILES))
76 | for (i in 1:length(RESULTS_FILES)) {
77 | pred = read_scores(RESULTS_FILES[i], ACTIVITIES_FILE, valid_molnames)
78 | auc = performance(pred, measure='auc')
79 | AUCs[i] = sprintf("%.3f", auc@y.values)
80 | LNAMES[i] = paste(NAMES[i], "(", AUCs[i],")")
81 | # print(paste(NAMES[i], "AUC-ROC=", auc@y.values))
82 | }
83 |
84 | abline(a=0, b=1, lty=2, lwd=3, col="black")
85 | # Add legend
86 | legend(0.62, 0.28, legend=LNAMES,
87 | col=COLORS, lty=1, cex=0.8)
88 | ```
89 |
90 |
91 | The number in parenthesis after each name in the legend is the area under the ROC curve.
92 |
93 | **IMPORTANT: You will need to adjust the last line `legend(...)` to have a legend with the right names at the right position.**
94 |
95 |
96 |
97 |
--------------------------------------------------------------------------------
/ROC_curves/README.md:
--------------------------------------------------------------------------------
1 | [Click here for the ROC curves tutorial](http://htmlpreview.github.io/?https://github.com/tevang/tutorials/blob/master/ROC_curves/README.html)
2 |
3 | 
--------------------------------------------------------------------------------
/ROC_curves/data/.Rhistory:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/ROC_curves/data/.Rhistory
--------------------------------------------------------------------------------
/ROC_curves/data/ROC_curves.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/ROC_curves/data/ROC_curves.png
--------------------------------------------------------------------------------
/ROC_curves/function_definitions.r:
--------------------------------------------------------------------------------
1 | # You could also have several functions in a single R file and still document them separately. Simply put an identifier
2 | # starting with ## ---- before each function definition and then create empty chunks referring to each one of the identifiers.
3 |
4 | library("ROCR")
5 | library("hash")
6 |
7 |
8 | ## ---- common_molnames
9 | common_molnames <- function(RESULTS_FILES) {
10 | "
11 | A function to find the molnames that are common in all score files in order to compare the scoring
12 | functions properly.
13 | "
14 | x = read.table(RESULTS_FILES[1], header = TRUE)
15 | colnames(x)[1] = "molname" ; # add a column header and operate on x$molname, otherwise 'inersect' fails!
16 | x$molname <- tolower(x$molname)
17 | valid_molnames <- unique(sort(x$molname)) ; # unique molnames
18 | for (i in 2:length(RESULTS_FILES)) {
19 | x = read.table(RESULTS_FILES[i], header = TRUE)
20 | colnames(x)[1] = "molname"
21 | x$molname <- tolower(x$molname)
22 | valid_molnames <- intersect(valid_molnames, x$molname)
23 | }
24 | return(valid_molnames)
25 | }
26 |
27 | ## ---- count_actives_inactives
28 | count_actives_inactives <- function(ACTIVITIES_FILE, valid_molnames) {
29 | a = read.table(ACTIVITIES_FILE)
30 | colnames(a)[1] = "molname"
31 | colnames(a)[2] = "label"
32 | a$molname <- tolower(a$molname)
33 | actives <- a$molname[a$label==1]
34 | inactives <- a$molname[a$label==0]
35 | active_num <- length(actives[actives %in% valid_molnames])
36 | inactive_num <- length(inactives[actives %in% valid_molnames])
37 | paste("The molecules that have been scored by all scoring functions consist of", active_num, "actives",
38 | " and", inactive_num, "inactives.")
39 | num <- list(actives=active_num, inactives=inactive_num)
40 | return(num)
41 | }
42 |
43 | ## ---- read_scores
44 | read_scores <- function(RESULTS_FILE, ACTIVITIES_FILE, valid_molnames) {
45 | "
46 | The valid_molnames list ensures that only the molnames that were scored by all scoring functions will
47 | be considered.
48 | "
49 | x = read.table(RESULTS_FILE, header = TRUE)
50 | colnames(x)[1] = "molname"
51 | colnames(x)[2] = "score"
52 | x$molname <- tolower(x$molname)
53 | # ignore the other columns
54 | score_dict = hash()
55 | for (i in seq(1, nrow(x))) { score_dict[x[i,1]] <- x[i,2] }
56 |
57 | a = read.table(ACTIVITIES_FILE)
58 | colnames(a)[1] = "molname"
59 | colnames(a)[2] = "label"
60 | a$molname <- tolower(a$molname)
61 | label_dict = hash()
62 | for (i in seq(1, nrow(a))) { label_dict[a[i,1]] <- a[i,2] }
63 | scores <- rep(0, length(valid_molnames))
64 | labels <- rep("", length(valid_molnames)) ; # initialize labels to a list of size(scores) but without contents
65 | i = 1
66 | for (molname in valid_molnames) {
67 | scores[i] <- score_dict[[molname]]
68 | labels[i] <- label_dict[[molname]]
69 | i <- i+1
70 | }
71 | pred = prediction(-1*scores, labels) ;# IMPORTANT: -1* because the function prediction() assumes that the highest the score the better
72 | return(pred)
73 | }
74 |
--------------------------------------------------------------------------------
/compare_atomic_properties/README.md:
--------------------------------------------------------------------------------
1 | # Compare partial charges between multiple ligand conformations.
2 | #### the same code can be adapted to visualize other atomic properties, like lipophilicity, refraction, donor/acceptor of H-bond, electronegativity, van der Waals attraction/repulsion
3 | In this tutorial we will exploit the new RDKit's drawing code to visualize the differences in charge distribution induced by
4 | conformational changes of a molecule. To be fair, the conformations in this example come from docking with Glide (100 docking poses)
5 | and the partial charges were calculated individually for each pose on the PM6 semi-empirical QM level of theory, upon some subtle
6 | geometry optimization.
7 |
8 |
9 | #### 1. Set global variables
10 | ```python
11 | mol2_file = "data/compound_stereo1_ion1_tau1.COSMO_PM6.mol2" # multi-molecule MOL2 file with PM6 partial charges
12 | ```
13 | #### 2. Define the necessary functions.
14 |
15 | ```python
16 | import os
17 | import io
18 | from PIL import Image
19 | import numpy as np
20 | from lib.modlib.pybel import Outputfile, readfile
21 | from rdkit import Chem
22 | from rdkit.Chem import Draw
23 | from rdkit.Chem.Draw import SimilarityMaps
24 | from rdkit.Chem.rdDepictor import Compute2DCoords
25 |
26 | def show_png(data):
27 | bio = io.BytesIO(data)
28 | img = Image.open(bio)
29 | img = img.resize((2000, 2000), Image.ANTIALIAS)
30 | return img
31 |
32 | def mol2_to_sdf(mol2_file, sdf_file=None):
33 | """
34 | Method to convert a multi-mol2 file to sdf format with an extra property that stores the partial charges.
35 | It invokes PyBel's mol2 file loader because RDKit's mol2 file loader that not read the partial charges.
36 | """
37 |
38 | if sdf_file == None:
39 | sdf_file = os.path.splitext(mol2_file)[0] + ".sdf"
40 | largeSDfile = Outputfile("sdf", sdf_file, overwrite=True)
41 | for mymol in readfile("mol2", mol2_file):
42 |
43 | # Add the Molecular (Free) Energy in a new property field in the sdf file
44 | if 'Comment' in mymol.data.keys() and "Energy:" in mymol.data['Comment']:
45 | mymol.data["molecular energy"] = float(mymol.data['Comment'].split()[1])
46 | del mymol.data['Comment'] # if you keep this the energy will be writen under the molname in the sdf
47 | # Add the Partial Charges of the atoms separated by ',' in a new property field in the sdf file
48 | charges = [str(a.partialcharge) for a in mymol.atoms]
49 | if len(set(charges)) > 1:
50 | mymol.data["partial charge"] = ",".join(charges)
51 |
52 | # Write this molecules with the extra property fields into the sdf file
53 | largeSDfile.write(mymol)
54 |
55 | largeSDfile.close()
56 |
57 | def load_sdf_with_charges(sdf_file):
58 | """
59 | This method reads in an sdf file with partial charges as an extra property, and returns an RDKit mol object with
60 | an extra atomic property named "partial charge".
61 | """
62 | # NOTE: for simplicity, because every conformer has different atomic charges and the RDKit MOL object does not
63 | # store varying atomic property values for each conformer, I will save each conformer into a separate
64 | # RDKit MOL object.
65 | mol_list = [] # list of RDKit MOL objects (conformers) of the same molecule but with different atomic charges
66 | suppl = Chem.SDMolSupplier(sdf_file, removeHs=False, sanitize=False)
67 | for mol in suppl:
68 | if 'partial charge' in mol.GetPropNames():
69 | formal_charge = int(np.sum( [float(c) for c in mol.GetProp('partial charge').split(',')] ).round())
70 | for atom, charge in zip(mol.GetAtoms(), mol.GetProp('partial charge').split(',')):
71 | # By default the Atom object does not have a property for its partial charge, therefore add one
72 | atom.SetDoubleProp('partial charge', float(charge)) # property value must be string
73 | atom.SetFormalCharge(formal_charge)
74 | mol_list.append(mol)
75 | return mol_list
76 |
77 | sdf_file = os.path.splitext(mol2_file)[0] + ".sdf"
78 | mol2_to_sdf(mol2_file=mol2_file, sdf_file=sdf_file)
79 | # Load the multi-molecule sdf file with partial charges
80 | mol_list = load_sdf_with_charges(sdf_file)
81 |
82 | ```
83 |
84 | #### 3. First we will compare the partial charges of two distinct conformers.
85 | I will pick up two docking poses with noticeable differences in charge distribution.
86 |
87 |
88 | First conformer:
89 |
90 | ```python
91 | mol1 = mol_list[52]
92 | Compute2DCoords(mol1) # add 2D coordinates for better 2D image depiction
93 | charges1 = [a.GetDoubleProp("partial charge") for a in mol1.GetAtoms()]
94 | d = Chem.Draw.MolDraw2DCairo(400, 400)
95 | Chem.Draw.SimilarityMaps.GetSimilarityMapFromWeights(mol1, charges1, draw2d=d)
96 | d.FinishDrawing()
97 | img = show_png(d.GetDrawingText())
98 | img.save("pose1.png", quality=95)
99 | ```
100 |
101 | Second conformer:
102 |
103 | ```python
104 | mol2 = mol_list[61]
105 | Compute2DCoords(mol2) # add 2D coordinates for better 2D image depiction
106 | charges2 = [a.GetDoubleProp("partial charge") for a in mol2.GetAtoms()]
107 | d = Chem.Draw.MolDraw2DCairo(400, 400)
108 | # But draw the charges on the 1st conformer to be able to compare it visually
109 | Chem.Draw.SimilarityMaps.GetSimilarityMapFromWeights(mol1, charges2, draw2d=d)
110 | d.FinishDrawing()
111 | img = show_png(d.GetDrawingText())
112 | img.save("pose2.png")
113 | ```
114 |
115 | Conformer 1 | Conformer 2
116 | :-------------------------:|:-------------------------:
117 |  | 
118 |
119 | The red arrows on conformer 2 show where the differences are locate. Mainly at the aromatic carbons and less at the polarized oxygen of the methyl phenyl ether.
120 |
121 | #### 4. Now lets visualize the average (unweighted) partial charges of each atom.
122 |
123 | ```python
124 | all_pose_charges = []
125 | for mol in mol_list:
126 | all_pose_charges.append( [a.GetDoubleProp("partial charge") for a in mol.GetAtoms()] )
127 | all_pose_charges = np.array(all_pose_charges)
128 | mean_pose_charges = all_pose_charges.mean(axis=0)
129 | std_pose_charges = all_pose_charges.std(axis=0)
130 |
131 | d = Draw.MolDraw2DCairo(400, 400)
132 | SimilarityMaps.GetSimilarityMapFromWeights(mol1,list(mean_pose_charges),draw2d=d)
133 | d.FinishDrawing()
134 | img = show_png(d.GetDrawingText())
135 | img.save("mean_pose.png")
136 | ```
137 |
138 | 
139 |
140 |
141 | #### 5. Finally we can see in which atoms are located most of the differences in the partial charge value by plotting the standard deviations.
142 |
143 | ```python
144 | d = Draw.MolDraw2DCairo(400, 400)
145 | SimilarityMaps.GetSimilarityMapFromWeights(mol1,list(std_pose_charges),draw2d=d)
146 | d.FinishDrawing()
147 | img = show_png(d.GetDrawingText())
148 | img.save("std_pose.png")
149 | ```
150 |
151 | 
152 |
153 | Notice that in all 100 docking poses, charge differences at the aromatic rings are not noticeable, unlike the two docking pose that we compared before.
--------------------------------------------------------------------------------
/compare_atomic_properties/mean_pose.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/compare_atomic_properties/mean_pose.png
--------------------------------------------------------------------------------
/compare_atomic_properties/pose1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/compare_atomic_properties/pose1.png
--------------------------------------------------------------------------------
/compare_atomic_properties/pose2.marked.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/compare_atomic_properties/pose2.marked.png
--------------------------------------------------------------------------------
/compare_atomic_properties/pose2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/compare_atomic_properties/pose2.png
--------------------------------------------------------------------------------
/compare_atomic_properties/std_pose.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/compare_atomic_properties/std_pose.png
--------------------------------------------------------------------------------
/create_alternative_protonations/README.md:
--------------------------------------------------------------------------------
1 | # CREATE ALTERNATIVE PROTONATION STATES OF A RECEPTOR
2 |
3 | This is a Python script to create all alternative protonation state combinations of a protein given a ligand and a specified radius. It could be useful in case you have a receptor but you are not sure about the protonation states of some residues in the binding site and you want to do docking, MD, or any other structure-based drug design method using all of alternative receptor protonations.
4 |
5 | The script must be executed with [PyChimera](https://pychimera.readthedocs.io/en/latest/), a Python wrappen for [UCSF Chimera](https://www.cgl.ucsf.edu/chimera/), which searches for protonatable standard residues (ASP, GLU, HIS) around the ligand, finds all possible combinations of protonation states, and writes a pdb file for each combination. Since the number of combinations of more than 6 protonatable residues becomes very large, the user can fix some residues to a give protonated/unprotonated state. See the tutorial below. You can also get the example usage by typing `protonate_receptor.py -h`.
6 |
7 | ## INSTALLATION
8 | ```
9 | conda create -n pychimera -c insilichem pychimera # this will create a virtual environment for PyChimera and modify your ~/.bashrc thus you have to source it again
10 | source ~/.bashrc
11 | conda activate pychimera
12 | pip install pychimera
13 | pip install --upgrade numpy # upgrade numpy to match the version that pychimera needs
14 | ```
15 | Finally, add the path to `protonate_receptor.py` to your `PATH` variable and create a symbolic link, like `sudo ln -s [full path to protonate_receptor.py] /usr/local/bin/`.
16 |
17 | ## TUTORIAL
18 |
19 | Download the pdb structure of HIV-1 protease complexed with a tripeptide inhibitor from [PDB](https://www.rcsb.org/structure/1A30). Save the receptor coordinates to file "1a30_receptor.pdb" and the ligand coordinates to a file "1a30_ligand.sdf". Then, list all protonatable residues within 4 Angstroms from the ligand.
20 |
21 | ```
22 | source ~/.bashrc # necessary only if you haven't sourced the lated modified version
23 | conda activate pychimera
24 | pychimera $(which protonate_receptor.py) -rec 1a30_protein.pdb -lig 1a30_ligand.sdf -r 4.0 -list
25 | ```
26 |
27 | You should get that the protonatable residues are: **ASP_25.B ASP_25.A ASP_29.A ASP_30.A**. We want to keep **ASP_29.A ASP_30.A** fixed to the unprotonated state and create alternative protonations for all the rest (namely the catalytic dyad **ASP_25.B and ASP_25.A**).
28 |
29 | `pychimera $(which protonate_receptor.py) -rec 1a30_protein.pdb -lig 1a30_ligand.sdf -r 4.0 -fix ASP_29.A -fix ASP_30.A`
30 |
31 | The script will generate 4 files in the current directory, from which we are interested only in the following 3:
32 | ```
33 | 1a30_protein_ASP25A_ASP25B.pdb
34 | 1a30_protein_ASH25A_ASP25B.pdb
35 | 1a30_protein_ASP25A_ASH25B.pdb
36 | ```
37 | which correspond to the structures shown in the figure below.
38 | 
39 |
40 | The doubly protonated catalytic dyad does not exist. In general, the acid driven catalysis requires that one of the members of the ASP dyad is ionized in order to
41 | activate the water molecule for the nucleophilic attack, while the second member needs to be protonated in order to enhance the electrophilic nature of the substrate
42 | carbonyl group.
43 |
44 | To deactivate the `pychimera` conda virtual environment and switch to your default:
45 | `source deactivate pychimera`
46 |
--------------------------------------------------------------------------------
/create_alternative_protonations/images/1a30_all_protonations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/create_alternative_protonations/images/1a30_all_protonations.png
--------------------------------------------------------------------------------
/create_alternative_protonations/protonate_receptor.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | __author__="Thomas Evangelidis"
4 | __email__="tevang3@gmail.com"
5 |
6 |
7 |
8 | from argparse import ArgumentParser, RawDescriptionHelpFormatter
9 | from itertools import combinations, permutations
10 | import sys, gc, os
11 | from operator import itemgetter
12 | from ete3 import Tree
13 |
14 |
15 | ## Parse command line arguments
16 | def cmdlineparse():
17 | parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, description="""
18 | DESCRIPTION:
19 |
20 | This is a Python script to create all alternative protonation state combinations of a protein given a ligand and a specified radius.
21 | It could be useful in case you have a receptor but you are not sure about the protonation states of some residues in the binding site
22 | and you want to do docking, MD, or any other structure-based drug design method using all of alternative receptor protonations.
23 |
24 | The script must be executed with PyChimera, a Python wrappen of UCSF Chimera, which searches for protonatable standard residues (ASP, GLU, HIS)
25 | around the ligand, finds all possible combinations of protonation states, and writes a pdb file for each combination. Since the number
26 | of combinations of more than 6 protonatable residues becomes very large, the user can fix some residues to a give protonated/unprotonated state.
27 | See the examples below. You can also get the same info by typing `protonate_receptor.py -h`.
28 |
29 | TODO: add optional support for LYS and CYS protonated forms.
30 | https://www.cgl.ucsf.edu/chimera/docs/ContributedSoftware/addh/addh.html
31 |
32 | """,
33 | epilog="""
34 | ### EXAMPLE 1: list all protonatable residues within 4 Angstroms from the ligand.
35 | pychimera $(which protonate_receptor.py) -rec 1a30_protein.pdb -lig 1a30_ligand.sdf -r 4.0 -list
36 |
37 | ### EXAMPLE 2: keep ASP_29.A ASP_30.A fixed to the unprotonated state and create alternative protonations for all the rest.
38 | pychimera $(which protonate_receptor.py) -rec 1a30_protein.pdb -lig 1a30_ligand.sdf -r 4.0 -fix ASP_29.A -fix ASP_30.A
39 |
40 | ### EXAMPLE 3: protonate all protein residues.
41 | pychimera $(which protonate_receptor.py) -rec 1a30_protein.pdb -lig 1a30_ligand.sdf
42 |
43 | """)
44 | parser.add_argument("-list", dest="LIST_PROTONATABLE", required=False, default=False, action='store_true',
45 | help="List the protonatable residues within the binding site and exit.")
46 | parser.add_argument("-rec", dest="RECEPTOR", required=False, default=None, type=str,
47 | help="pdb file with the apo form of the receptor.")
48 | parser.add_argument("-lig", dest="LIGAND", required=False, default=None, type=str,
49 | help="sdf or mol2 file with optimized ligand structure from which to find the binding site residues.")
50 | parser.add_argument("-r", dest="RADIUS", required=False, default=8.0, type=float,
51 | help="The distance around the ligand within which residues will be protonated. Use '-r 0' if you "
52 | "want to protonate the whole protein. Default: %(default)s.")
53 | # parser.add_argument("-p", dest="PYTHONPATH", required=False, default=None, type=str,
54 | # help="the PYTHONPATH environment variable.")
55 | parser.add_argument("-fix", dest="FIXED_STATES", required=False, default=[], type=str, action='append',
56 | help="the residue to fixed to one state. E.g. '-fix ASP_30.A GLH_24.B' will NOT produce any structure with"
57 | "ASH_30.A or GLU_24.B. This is useful when you have >4 protonatable residues within the binding site"
58 | " and you want to reduce the number of combinations.")
59 | parser.add_argument("-flipcooh", dest="FLIP_COOH", required=False, default=False, action='store_true',
60 | help="Flip the carboxylic group of ASH and GLH by 180 degrees. This will produced an order of magnitude "
61 | "more combinations, therefore use with caution. For example, if you do an energy minimization "
62 | "before scoring, it will hopefully suffice (hopefully it will flip the COOH if needed). If not, then "
63 | "use this option.")
64 | parser.add_argument("-dockprep", dest="DOCKPREP", required=False, default=False, action='store_true',
65 | help="Prepare the receptor for docking or MD. Namely:"
66 | "1) delete water molecules, "
67 | "2) repaire truncated sidechains, "
68 | "3) add hydrogens, "
69 | "4) assign partial charges (protein amberSB14, ligand AM1). "
70 | "Also for each protonation state combination write a pdb file that contains the protein+ligand and the first 2 lines"
71 | " will be comments that state the net charge of the receptor and the ligand, respectively.")
72 |
73 | args = parser.parse_args()
74 | return args
75 |
76 | ########################################################## FUNCTION DEFINITIONS ####################################################
77 |
78 | def write_protonated_structure(protonations):
79 |
80 | global residues, args
81 |
82 | id2state = {}
83 | pdb = args.RECEPTOR.replace(".pdb", "")
84 | for rstate in protonations:
85 | state, resid = rstate.split('_')
86 | id2state[resid] = state
87 | if args.FLIP_COOH:
88 | state = state.replace("1", "a").replace("2", "b")
89 | pdb += "_%s%s" % (state , resid.replace(".",""))
90 | pdb += ".pdb"
91 | # Alter the protonation states
92 | ASH_GLH_rstates = []
93 | for r in residues:
94 | try:
95 | r.type = id2state[str(r.id)][:3] # works for both ASH and ASH1
96 | if id2state[str(r.id)][:3] in ["ASH", "GLH"]:
97 | ASH_GLH_rstates.append((str(r.id), id2state[str(r.id)]))
98 | except KeyError:
99 | continue
100 |
101 | if args.FLIP_COOH:
102 | for resid, state in ASH_GLH_rstates:
103 | if state == 'GLH1': # rename the atoms, because by default proton goes to O[DE]2 but we want it in O[DE]1
104 | rc("setattr a name XX :%s@OE1" % resid)
105 | rc("setattr a name OE1 :%s@OE2" % resid)
106 | rc("setattr a name OE2 :%s@XX" % resid)
107 | if state == 'ASH1': # rename the atoms, because by default proton goes to O[DE]2 but we want it in O[DE]1
108 | rc("setattr a name XX :%s@OD1" % resid)
109 | rc("setattr a name OD1 :%s@OD2" % resid)
110 | rc("setattr a name OD2 :%s@XX" % resid)
111 |
112 | # Write the structure
113 | rc("del H")
114 | rc("addh")
115 | if args.DOCKPREP: # prepend net charges to the pdb file
116 | pdb = pdb.replace(".pdb", "_complex.pdb")
117 | rc("combine #0,1 name complex modelId 2")
118 | rc("write format pdb #2 %s" % pdb)
119 | rc("delete #2")
120 | models = chimera.openModels.list(modelTypes=[chimera.Molecule])
121 | rc("addcharge std spec #0") # re-add ff14SB charges to the protonated receptor only (the ligand protonation did not change)
122 | rec_charge = estimateFormalCharge(models[0].atoms)
123 | lig_charge = estimateFormalCharge(models[1].atoms)
124 | # Neutralize system
125 | net_charge = rec_charge+lig_charge
126 | if net_charge < 0:
127 | initiateAddions(q, "Na+", "neutralize", chimera.replyobj.status)
128 | elif net_charge > 0:
129 | initiateAddions(models, "Cl-", "neutralize", chimera.replyobj.status)
130 | with open(pdb, "r+") as f:
131 | s = f.read()
132 | f.seek(0)
133 | f.write("# receptor net charge = %i\n# ligand net charge = %i\n" % (-lig_charge, lig_charge)) # after system neutralization
134 | f.write(s)
135 | else:
136 | rc("write format pdb #0 %s" % pdb)
137 |
138 | if args.FLIP_COOH: # restore the original O[DE] names
139 | for resid, state in ASH_GLH_rstates:
140 | if state == 'GLH1': # rename the atoms, because by default proton goes to O[DE]2 but we want it in O[DE]1
141 | rc("setattr a name XX :%s@OE1" % resid)
142 | rc("setattr a name OE1 :%s@OE2" % resid)
143 | rc("setattr a name OE2 :%s@XX" % resid)
144 | if state == 'ASH1': # rename the atoms, because by default proton goes to O[DE]2 but we want it in O[DE]1
145 | rc("setattr a name XX :%s@OD1" % resid)
146 | rc("setattr a name OD1 :%s@OD2" % resid)
147 | rc("setattr a name OD2 :%s@XX" % resid)
148 |
149 | def populate_leaves(Peptide_Tree, resid, residue_states):
150 | """
151 | FUNCTION that adds new branches to the leaves of the Tree.
152 | ARGUMENTS:
153 | Peptide_Tree: The Tree structure with connectivities
154 | RETURNS:
155 | (Peptide_Tree, BOOLEAN): A tuple with elements the input Tree structure with new branches (if applicable), and a BOOLEAN value which is True if the function added
156 | new leaves to the Tree, or False otherwise
157 | """
158 |
159 | number_of_new_leaves = 0
160 | leaves = list(Peptide_Tree.iter_leaves())
161 | for leaf in leaves:
162 | try:
163 | for state in residue_states[resid]:
164 | new_child = leaf.add_child(name=resid) # add a new brach to the current TOCSY add index (leaf) with length the respective probability
165 | new_child.add_features(state=state)
166 | number_of_new_leaves += 1
167 | # print "DEBUG: adding connection: ",name,"-->",NOESYaaindex
168 | except(KeyError, IndexError):
169 | continue
170 |
171 | # print Peptide_Tree.get_ascii(show_internal=True, compact=False, attributes=["name", "dist", "occupancy", "numOfResonances"])
172 | # print Peptide_Tree.get_ascii(show_internal=True, compact=False)
173 | if number_of_new_leaves > 0:
174 | return (Peptide_Tree, True)
175 | else:
176 | return (Peptide_Tree, False)
177 |
178 |
179 | def build_Protonation_Tree(peptide, residue_states):
180 |
181 | print("Building Protonation Trees from peptide %s" % list(peptide))
182 | expand_tree = True
183 | Peptide_Tree = Tree()
184 | Root = Peptide_Tree.get_tree_root()
185 | Root.add_feature("name", "root")
186 | Root.add_feature("state", "delete")
187 | level = 0
188 | sys.stdout.write("Expanding tree from level ")
189 | while level < len(peptide):
190 | sys.stdout.write(str(level) + " ")
191 | sys.stdout.flush()
192 | Peptide_Tree, expand_tree = populate_leaves(Peptide_Tree, peptide[level], residue_states)
193 | level += 1
194 | # Print the Tree
195 | # print Peptide_Tree.get_ascii(show_internal=True, compact=False)
196 | # print Peptide_Tree.get_ascii(show_internal=True, compact=False, attributes=["name", "dist", "occupancy", "numOfResonances"])
197 |
198 | print("\nSaving protonations from Tree...")
199 |
200 | all_protonations_set = set()
201 | for leaf in Peptide_Tree.iter_leaves():
202 | protonations = []
203 | resid, chain = leaf.name.split(".")
204 | protonations.append((leaf.state, resid, chain))
205 | for ancestor in leaf.get_ancestors()[:-1]: # skip the root
206 | resid, chain = ancestor.name.split(".")
207 | protonations.append((ancestor.state, resid, chain))
208 | protonations.sort(key=itemgetter(2, 1)) # sort by chain and resid to avoid permutations of the same combination
209 | protonations = tuple(["%s_%s.%s" % (t[0], t[1], t[2]) for t in protonations])
210 | all_protonations_set.add(protonations)
211 | del protonations
212 | del ancestor
213 | del leaf
214 | # Peptide_Tree = None
215 | del Peptide_Tree
216 | gc.collect()
217 | return all_protonations_set
218 |
219 | ######################################################################################################################################
220 |
221 |
222 | if __name__ == "__main__":
223 | args = cmdlineparse()
224 |
225 | from chimera import runCommand as rc
226 | from chimera.selection import currentResidues
227 |
228 |
229 | resname_states_dict = {
230 | "GLU": ["GLU", "GLH"],
231 | "ASP": ["ASP", "ASH"]
232 | }
233 |
234 | if args.FLIP_COOH:
235 | resname_states_dict["GLU"] = ["GLU", "GLH1", "GLH2"]
236 | resname_states_dict["ASP"] = ["ASP", "ASH1", "ASH2"]
237 |
238 | rc("open %s" % args.RECEPTOR) # load the receptor
239 | rc("open %s" % args.LIGAND) # load the ligand
240 |
241 | if args.DOCKPREP:
242 | import chimera
243 | from Addions import initiateAddions
244 | from DockPrep import prep
245 | from AddCharge import estimateFormalCharge
246 | models = chimera.openModels.list(modelTypes=[chimera.Molecule])
247 | print("Preparing receptor for docking and calculating ligand AM1 charges (may be slow).")
248 | prep(models, nogui=True, method='am1')
249 |
250 | # Select the residues to be protonated
251 | if args.RADIUS > 0:
252 | rc("sel #1 z<%f & ~ #1" % args.RADIUS)
253 | elif args.RADIUS == 0:
254 | rc("sel #0")
255 | residues = currentResidues() # get the residue of the pocket
256 | residue_states = {}
257 | protonatable_resids = []
258 | protonatable_resnames = []
259 | for r in residues:
260 | if r.type in ["GLU", "GLH"]:
261 | states = resname_states_dict["GLU"]
262 | protonatable_resids.append(str(r.id))
263 | protonatable_resnames.append(r.type)
264 | elif r.type in ["ASP", "ASH"]:
265 | states = resname_states_dict["ASP"]
266 | protonatable_resids.append(str(r.id))
267 | protonatable_resnames.append(r.type)
268 | elif r.type in ["HIS", "HIE", "HID", "HIP"]:
269 | states = ["HIE", "HID", "HIP"]
270 | protonatable_resids.append(str(r.id))
271 | protonatable_resnames.append(r.type)
272 | else:
273 | states = [r.type]
274 | residue_states[str(r.id)] = states
275 |
276 | if args.LIST_PROTONATABLE:
277 | protonatable_rstates = ["%s_%s" % (name,id) for name,id in zip(protonatable_resnames, protonatable_resids)]
278 | print("\n~~~ The protonatable residues within %.3f Angstroms from the ligand are: %s\n" % (args.RADIUS, " ".join(protonatable_rstates)))
279 | sys.exit(0)
280 |
281 |
282 | for rstate in args.FIXED_STATES:
283 | state, resid = rstate.split('_')
284 | residue_states[resid] = [state]
285 | try:
286 | protonatable_resids.remove(resid)
287 | print("Fixed resid %s to %s state." % (resid, state))
288 | except ValueError:
289 | print("Warning: residue %s is not within the specified distance from the ligand or is not a valid residue, " \
290 | "therefore it will be ignored." % rstate)
291 |
292 | all_protonations = set()
293 | for peptide in permutations(protonatable_resids, len(protonatable_resids)):
294 | all_protonations = all_protonations.union(build_Protonation_Tree(peptide, residue_states))
295 |
296 | # Finally create and write the protonated structures
297 | all_protonations = list(all_protonations)
298 | all_protonations.sort(key=lambda x: x.count)
299 | for protonations in all_protonations:
300 | print("Writing structure with the following protonation states: ", protonations)
301 | write_protonated_structure(protonations)
302 |
--------------------------------------------------------------------------------
/dockprep/README.md:
--------------------------------------------------------------------------------
1 | This is a Python script to prepare a receptor-ligand complex for scoring (e.g. for PM6/COSMO scoring) using [UCSF Chimera](https://www.cgl.ucsf.edu/chimera/). It takes as input either (i) the protein-ligand complex in one pdb file, or (ii) the receptor in a pdb file and the ligand in a separate file of any format (pdb, mol, mol2, sdf). The output is always a pdb file of the protein-ligand complex in which the residue name of the ligand is "LIG".
2 |
3 | If you encounter problems with the input pdb file then try correcting it using one of the following programs:
4 | 1) pdb4amber from AmberTools (https://github.com/Amber-MD/pdb4amber)
5 | 2) pdbfixer (https://github.com/pandegroup/pdbfixer)
6 | Sometimes you may need to rename atoms manually in Chimera (e.g. the N- and C-terminal caps), but this is out of the scope of this script.
7 |
8 |
9 | For a full list of options run `dockprep.py -h`.
10 |
11 |
12 | You can launch the script either using [PyChimera](https://pychimera.readthedocs.io/en/latest/) (to install it read this [tutorial](https://github.com/tevang/tutorials/tree/master/create_alternative_protonations))
13 | ```bash
14 | pychimera $(which dockprep.py) -rec example_files/3K5C-BACE.pdb -lig $mol -cmethod gas -neut
15 | ```
16 | or the vanilla UCSF Chimera executable.
17 | ```bash
18 | chimera --nogui --nostatus --script "$(which dockprep.py) -rec example_files/3K5C-BACE.pdb -lig example_files/3K5C-BACE_1.mol -cmethod gas -neut"
19 | ```
20 | To prepare multiple protein-ligand complexes with dockprep.py **in parallel** on the Unix Shel, first create a file with all the individual commands like this:
21 | ```bash
22 | for mol in $(ls example_files/*mol)
23 | do
24 | echo "chimera --nogui --nostatus --script \"$(which dockprep.py) -rec example_files/3K5C-BACE.pdb -lig $mol -cmethod gas -neut\""
25 | done > commands.txt
26 | ```
27 |
28 | Then launch it using [GNU parallel script](https://www.gnu.org/software/parallel/)
29 | ``` bash
30 | parallel -j3 < commands.txt
31 | ```
32 |
33 | Likewise, if you have the protein-ligand complex already saved in one file, you can you do all the above like this:
34 | ```bash
35 | pychimera $(which dockprep.py) -complex complex.pdb -cmethod gas -neut
36 | ```
--------------------------------------------------------------------------------
/dockprep/dockprep.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | __author__="Thomas Evangelidis"
4 | __email__="tevang3@gmail.com"
5 |
6 |
7 |
8 | import os, sys, traceback
9 | import random
10 | from argparse import ArgumentParser, RawDescriptionHelpFormatter
11 | from collections import defaultdict
12 | from operator import itemgetter
13 |
14 |
15 | def cmdlineparse():
16 | parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, description="""
17 | DESCRIPTION:
18 |
19 | This is a Python script to prepare the receptor-ligand complex for scoring. If you encounter problems with the input pdb file
20 | then try correcting it using:
21 | 1) pdb4amber from AmberTools (https://github.com/Amber-MD/pdb4amber)
22 | 2) pdbfixer (https://github.com/pandegroup/pdbfixer)
23 |
24 | TODO: add optional support for LYS and CYS protonated forms.
25 | https://www.cgl.ucsf.edu/chimera/docs/ContributedSoftware/addh/addh.html
26 |
27 | """,
28 | epilog="""
29 | ### EXAMPLE 1:
30 | pychimera $(which dockprep.py) -complex 3K5C-BACE_150_complex.pdb -cmethod gas -neut -lignetcharge -2
31 |
32 |
33 | """)
34 | parser.add_argument("-complex", dest="COMPLEX", required=False, default=None, type=str,
35 | help="pdb file with the holo form of the receptor.")
36 | parser.add_argument("--charge-method", dest="CHARGE_METHOD", required=False, default='gas', type=str,
37 | choices=['gas', 'am1'],
38 | help="Method to calculate charges fo the ligand. Default: %(default)s")
39 | parser.add_argument("-neut", dest="NEUTRALIZE", required=False, default=False, action='store_true',
40 | help="Neutralize the system by adding counter ions.")
41 | parser.add_argument("-stripions", dest="STRIP_IONS", required=False, default=False, action='store_true',
42 | help="Strip out all ions.")
43 | parser.add_argument("-keepchains", dest="KEEP_CHAINIDS", required=False, default=False, action='store_true',
44 | help="Keep the original chain IDs. Default is False, ligand and protein will be chain A for homology modeling.")
45 | parser.add_argument("-keephydrogens", dest="KEEP_PROTEIN_HYDROGENS", required=False, default=False, action='store_true',
46 | help="Keep the protein's hydrogens (default is to strip them).")
47 | parser.add_argument("-rec", dest="RECEPTOR", required=False, default=None, type=str,
48 | help="Instead of -complex give the pdb file with the apo form of the receptor.")
49 | parser.add_argument("-lig", dest="LIGAND", required=False, default=None, type=str,
50 | help="Instead of -complex give an sdf or mol2 file with optimized ligand structure from which to find the "
51 | "binding site residues.")
52 | parser.add_argument("-o", dest="OUT_PDB", required=False, default=None, type=str,
53 | help="Output PDB file name of the prepared protein-ligand complex.")
54 | parser.add_argument("-lignetcharge", dest="LIG_NET_CHARGE", required=False, default=None, type=int,
55 | help="Optionaly (but RECOMMENDED) give the net charge of the ligand, otherwise it will be estimated by Chimera.")
56 | parser.add_argument("-recnetcharge", dest="REC_NET_CHARGE", required=False, default=None, type=int,
57 | help="Optionaly (but RECOMMENDED) give the net charge of the receptor, otherwise it will be estimated by Chimera.")
58 | args = parser.parse_args()
59 | return args
60 |
61 |
62 | ################################################### FUNCTION DEFINITIONS #########################################
63 |
64 | def standardize_terminal_protein_residues(receptor_pdb, molID="#0"):
65 | """
66 | To prevent errors like 'ValueError: Cannot determine GAFF type for :11.A@HD14 (etc.)' raised by
67 | initiateAddions(), originating from termini capped by Shrodinger's Maestro or incomplete or missing protein
68 | residues, mutate the terminal residues to their original aa type (applies to every protein chain).
69 | """
70 | print("Standardizing protein's terminal residues.")
71 | rc("sel %s & protein" % molID)
72 | chaindID_resids = defaultdict(list)
73 | for r in currentResidues():
74 | chaindID_resids[r.id.chainId].append((r.id.position, r.type))
75 | for chainID in chaindID_resids.keys():
76 | chaindID_resids[chainID].sort(key=itemgetter(0))
77 | # NOTE: the side-chain mutations (swapaa) were not necessary in the proteins tested so far.
78 | # rc("swapaa %s %s:%i.%s" %
79 | # (chaindID_resids[chainID][0][1], molID, chaindID_resids[chainID][0][0], str(chainID))) # N-term
80 | rc("del element.H & %s:%i.%s" %
81 | (molID, chaindID_resids[chainID][0][0], str(chainID)))
82 | # rc("swapaa %s %s:%i.%s" %
83 | # (chaindID_resids[chainID][-1][1], molID, chaindID_resids[chainID][-1][0], str(chainID))) # C-term
84 | rc("del element.H & %s:%i.%s" %
85 | (molID, chaindID_resids[chainID][-1][0], str(chainID)))
86 | # Only if you write and load the PDB then Chimera will reset the valence of the N-terminal amide and
87 | # thus will not again the H1,H2,H3 which cause the error.
88 | rc("write format pdb #0 " + receptor_pdb.replace(".pdb", "_tmp.pdb"))
89 | rc("del #0")
90 | rc("open %s %s" % (molID, receptor_pdb.replace(".pdb", "_tmp.pdb")))
91 | os.remove(receptor_pdb.replace(".pdb", "_tmp.pdb"))
92 |
93 | ##################################################################################################################
94 |
95 | if __name__ == "__main__":
96 |
97 | try:
98 |
99 | args = cmdlineparse()
100 | import chimera
101 | from chimera import runCommand as rc
102 | from Addions import initiateAddions
103 | from DockPrep import prep
104 | import AddH
105 | from AddCharge import estimateFormalCharge, addNonstandardResCharges
106 | from chimera.selection import currentResidues
107 |
108 |
109 | if args.KEEP_PROTEIN_HYDROGENS:
110 | addHFunc = AddH.simpleAddHydrogens
111 | # NOTE: addHFunc=None yields unrealistic net charges!
112 | else:
113 | addHFunc = AddH.hbondAddHydrogens
114 | # NOTE: the default option addHFunc=AddH.hbondAddHydrogens raised an Error in Carbonic
115 | # Unhydrase with the Zn+2 ion. However, is works better for some proteins, where AddH.simpleAddHydrogens
116 | # leads to net_charge prediction of the order of 120...
117 |
118 | if args.COMPLEX:
119 | rc("open %s" % args.COMPLEX) # load the protein-ligand complex
120 | if args.KEEP_PROTEIN_HYDROGENS:
121 | rc("delete element.H")
122 | if args.REC_NET_CHARGE == None:
123 | standardize_terminal_protein_residues(args.RECEPTOR, "#0") # TODO: UNTESTED
124 | if args.STRIP_IONS:
125 | rc("delete ions")
126 | rc("split #0 ligands")
127 | rc("sel #0.2") # select the ligand
128 | ligres = currentResidues()[0]
129 | ligres.type = 'LIG' # change the resname of the ligand to 'LIG'
130 | rc("combine #0.1 modelId 1") # create a new molecule containing just the receptor
131 | rc("combine #0.2 modelId 2") # create a new molecule containing just the ligand
132 | rc("del #0")
133 | if args.REC_NET_CHARGE != None:
134 | rec_charge = args.REC_NET_CHARGE
135 | else:
136 | # We will estimate the receptor's net charge. For this we need to DockPrep the receptor (is fast).
137 | models = chimera.openModels.list(modelTypes=[chimera.Molecule])
138 | # For a full list of DockPrep options, look into file Chimera-alpha_py2.7/share/DockPrep/__init__.py
139 | prep([models[0]], nogui=True, method=args.CHARGE_METHOD, addHFunc=addHFunc)
140 | rec_charge = estimateFormalCharge(models[0].atoms) # DockPred does not assign charges to receptor atoms, only to ligand atoms
141 | print("Receptor's net charge =", rec_charge)
142 | # Now that we calculated the charges of the protein and the ligand, we just need the complex
143 | rc("combine #1,2 modelId 3") # create a new molecule containing the protein-ligand complex
144 | rc("del #1-2")
145 | pdb = args.COMPLEX.replace(".pdb", "_prep.pdb")
146 | elif args.RECEPTOR and args.LIGAND:
147 | rc("open %s" % args.RECEPTOR) # load the receptor
148 | if args.REC_NET_CHARGE == None:
149 | standardize_terminal_protein_residues(args.RECEPTOR,"#0") # read function's definition to understand why is here
150 | rc("open %s" % args.LIGAND) # load the ligand
151 | if args.STRIP_IONS:
152 | rc("delete ions")
153 | if args.REC_NET_CHARGE != None:
154 | rec_charge = args.REC_NET_CHARGE
155 | else:
156 | # We will estimate the receptor's net charge. For this we need to DockPrep the receptor (is fast).
157 | models = chimera.openModels.list(modelTypes=[chimera.Molecule])
158 | # For a full list of DockPrep options, look into file Chimera-alpha_py2.7/share/DockPrep/__init__.py
159 | prep([models[0]], nogui=True, method=args.CHARGE_METHOD, addHFunc=addHFunc)
160 | rec_charge = estimateFormalCharge(models[0].atoms) # DockPred does not assign charges to receptor atoms, only to ligand atoms
161 | print("Receptor's net charge =", rec_charge)
162 | rc("sel #1") # select the ligand
163 | ligres = currentResidues()[0]
164 | ligres.type = 'LIG' # change the resname of the ligand to 'LIG'
165 | rc("combine #0,1 modelId 2") # create a new molecule containing the protein-ligand complex
166 | rc("combine #2 modelId 3") # create a new molecule containing the protein-ligand complex
167 | rc("del #0-2")
168 | pdb = os.path.splitext(os.path.basename(args.RECEPTOR))[0] + "_" + os.path.splitext(os.path.basename(args.LIGAND))[0] + "_prep.pdb"
169 | elif args.RECEPTOR:
170 | rc("open %s" % args.RECEPTOR) # load the receptor
171 | if args.KEEP_PROTEIN_HYDROGENS:
172 | rc("delete element.H")
173 | if args.REC_NET_CHARGE == None:
174 | standardize_terminal_protein_residues(args.RECEPTOR,"#0") # TODO: UNTESTED
175 | if args.STRIP_IONS:
176 | rc("delete ions")
177 | if args.REC_NET_CHARGE != None:
178 | rec_charge = args.REC_NET_CHARGE
179 | else:
180 | # We will estimate the receptor's net charge. For this we need to DockPrep the receptor (is fast).
181 | models = chimera.openModels.list(modelTypes=[chimera.Molecule])
182 | # For a full list of DockPrep options, look into file Chimera-alpha_py2.7/share/DockPrep/__init__.py
183 | prep([models[0]], nogui=True, method=args.CHARGE_METHOD, addHFunc=addHFunc)
184 | rec_charge = estimateFormalCharge(models[0].atoms) # DockPred does not assign charges to receptor atoms, only to ligand atoms
185 | print("Receptor's net charge =", rec_charge)
186 | pdb = os.path.splitext(os.path.basename(args.RECEPTOR))[0] + "_prep.pdb"
187 |
188 | print("Preparing receptor for docking and calculating ligand '%s' charges (may be slow)." % args.CHARGE_METHOD)
189 | models = chimera.openModels.list(modelTypes=[chimera.Molecule]) # actually only one model is left
190 | if args.LIGAND and args.LIG_NET_CHARGE != None: # args.LIG_NET_CHARGE doesn't work if 0
191 | net_charge = args.LIG_NET_CHARGE + rec_charge
192 | elif args.LIGAND and args.LIG_NET_CHARGE == None: # not args.LIG_NET_CHARGE doesn't work if 0
193 | # Add partial charges again for initiateAddions() to function.
194 | prep(models, nogui=True, method=args.CHARGE_METHOD, addHFunc=AddH.simpleAddHydrogens)
195 | # NOTE: the default option addHFunc=AddH.hbondAddHydrogens raised an Error in Carbonic Unhydrase with the Zn+2 ion.
196 | net_charge = estimateFormalCharge(models[0].atoms)
197 | elif not args.LIGAND and args.COMPLEX:
198 | # Add partial charges again for initiateAddions() to function.
199 | prep(models, nogui=True, method=args.CHARGE_METHOD, addHFunc=AddH.simpleAddHydrogens)
200 | # NOTE: the default option addHFunc=AddH.hbondAddHydrogens raised an Error in Carbonic Unhydrase with the Zn+2 ion.
201 | net_charge = estimateFormalCharge(models[0].atoms)
202 | elif not args.LIGAND and args.RECEPTOR:
203 | net_charge = rec_charge
204 |
205 | # Neutralize system
206 | if args.NEUTRALIZE:
207 | if net_charge < 0:
208 | initiateAddions(models, "Na+", str(abs(net_charge)), chimera.replyobj.status)
209 | elif net_charge > 0:
210 | initiateAddions(models, "Cl-", str(net_charge), chimera.replyobj.status)
211 |
212 | if net_charge != 0:
213 | # change the resids of the ions, which by default they are all 1
214 | rc("sel ~ions")
215 | existing_resids = [r.id.position for r in currentResidues()]
216 | start = max(existing_resids) + 2
217 | rc("resrenumber %i ions" % start) # renumber the resids of the added ions
218 |
219 | if args.COMPLEX or args.LIGAND:
220 | # change the resid of the ligand
221 | rc('sel #3 & ~ #3:LIG')
222 | existing_resids = [r.id.position for r in currentResidues()]
223 | start = max(existing_resids) + 2
224 | rc("resrenumber %i #3:LIG" % start)
225 | rc("combine #3 modelId 4") # create a new molecule to split it into receptor and ligand
226 | rc("split #4 atoms ~#4:LIG")
227 | rc("combine #4.1 modelId 5") # create a new molecule containing just the receptor
228 | rc("combine #4.2 modelId 6") # create a new molecule containing just the ligand
229 | models = chimera.openModels.list(modelTypes=[chimera.Molecule])
230 | # for m in models: print(len(m.atoms), estimateFormalCharge(m.atoms) # DEBUGGING
231 | if args.LIG_NET_CHARGE != None: # not args.LIG_NET_CHARGE doesn't work if 0
232 | lig_charge = args.LIG_NET_CHARGE
233 | else:
234 | lig_charge = estimateFormalCharge(models[3].atoms)
235 | rc("del #4-6")
236 |
237 | # Finally, write the complex pdb file with headers
238 | if args.KEEP_CHAINIDS == False:
239 | rc("changechains B A all") # <== OPTIONAL (ligand and protein will be chain A for homology modeling)
240 | if args.OUT_PDB:
241 | pdb = args.OUT_PDB
242 | if args.COMPLEX or args.LIGAND:
243 | rc("write format pdb #3 %s" % pdb)
244 | else:
245 | rc("write format pdb #0 %s" % pdb)
246 | with open(pdb, "r+") as f:
247 | s = f.read()
248 | f.seek(0)
249 | if args.COMPLEX or args.LIGAND:
250 | f.write("HEADER receptor net charge = %i\nHEADER ligand net charge = %i\n" % (rec_charge, lig_charge)) # after system neutralization
251 | else:
252 | f.write("HEADER receptor net charge = %i\n" % (rec_charge)) # after system neutralization
253 | f.write(s)
254 |
255 | except:
256 | type, value, tb = sys.exc_info()
257 | lines = traceback.format_exception(type, value, tb)
258 | print((''.join(lines)))
259 | raise
--------------------------------------------------------------------------------
/dockprep/example_files/3K5C-BACE_1.mol:
--------------------------------------------------------------------------------
1 | REMARK score -90.21
2 | LCcorina 10041815583D 1 1.00000 0.00000 0
3 | CORINA 4.00 0026 26.04.2017
4 | 87 88 0 0 0 0 999 V2000
5 | 25.9314 5.1125 17.3673 C 0 0 0 0 0 0 0 0 0 0 0 0
6 | 26.3958 5.7692 18.5087 C 0 0 0 0 0 0 0 0 0 0 0 0
7 | 27.6294 6.4115 18.5006 C 0 0 0 0 0 0 0 0 0 0 0 0
8 | 26.6868 5.1203 16.1800 C 0 0 0 0 0 0 0 0 0 0 0 0
9 | 27.9071 5.7803 16.1600 C 0 0 0 0 0 0 0 0 0 0 0 0
10 | 28.3567 6.4294 17.3141 C 0 0 0 0 0 0 0 0 0 0 0 0
11 | 28.7339 5.7745 14.9144 C 0 0 0 0 0 0 0 0 0 0 0 0
12 | 28.2672 5.9839 13.7999 O 0 0 0 0 0 0 0 0 0 0 0 0
13 | 30.0161 5.5308 15.1565 N 0 0 0 0 0 0 0 0 0 0 0 0
14 | 31.1655 5.7229 14.2694 C 0 0 0 0 0 0 0 0 0 0 0 0
15 | 32.3013 6.5543 14.9475 C 0 0 0 0 0 0 0 0 0 0 0 0
16 | 31.7701 7.7059 15.6807 C 0 0 0 0 0 0 0 0 0 0 0 0
17 | 32.5417 8.1089 16.8492 C 0 0 0 0 0 0 0 0 0 0 0 0
18 | 31.7382 8.9416 17.8567 C 0 0 0 0 0 0 0 0 0 0 0 0
19 | 31.4978 8.5096 19.2845 C 0 0 0 0 0 0 0 0 0 0 0 0
20 | 30.6304 9.5192 20.0787 C 0 0 0 0 0 0 0 0 0 0 0 0
21 | 29.4018 8.8984 20.7758 C 0 0 0 0 0 0 0 0 0 0 0 0
22 | 28.4435 8.3191 19.8155 N 0 0 0 0 0 0 0 0 0 0 0 0
23 | 28.2301 6.9783 19.7605 C 0 0 0 0 0 0 0 0 0 0 0 0
24 | 28.6153 6.2012 20.6343 O 0 0 0 0 0 0 0 0 0 0 0 0
25 | 31.6361 8.9012 14.7208 C 0 0 0 0 0 0 0 0 0 0 0 0
26 | 27.5563 9.2855 19.1471 C 0 0 0 0 0 0 0 0 0 0 0 0
27 | 26.1995 9.3897 19.8666 C 0 0 0 0 0 0 0 0 0 0 0 0
28 | 24.7362 4.4553 17.3963 O 0 0 0 0 0 0 0 0 0 0 0 0
29 | 24.1183 4.4752 16.0315 C 0 0 0 0 0 0 0 0 0 0 0 0
30 | 22.5931 4.3157 16.1531 C 0 0 0 0 0 0 0 0 0 0 0 0
31 | 31.6697 4.3250 13.9202 C 0 0 0 0 0 0 0 0 0 0 0 0
32 | 31.9299 3.5877 15.1249 O 0 0 0 0 0 0 0 0 0 0 0 0
33 | 30.5990 3.5391 13.1173 C 0 0 0 0 0 0 0 0 0 0 0 0
34 | 31.2898 2.7953 11.9565 C 0 0 0 0 0 0 0 0 0 0 0 0
35 | 30.2806 1.9129 11.2208 C 0 0 0 0 0 0 0 0 0 0 0 0
36 | 31.8599 3.7873 10.9327 C 0 0 0 0 0 0 0 0 0 0 0 0
37 | 31.1890 4.7139 10.4842 O 0 0 0 0 0 0 0 0 0 0 0 0
38 | 33.0930 3.5508 10.5117 N 0 0 0 0 0 0 0 0 0 0 0 0
39 | 33.6869 4.3522 9.4692 C 0 0 0 0 0 0 0 0 0 0 0 0
40 | 34.4311 3.4314 8.5130 C 0 0 0 0 0 0 0 0 0 0 0 0
41 | 35.0024 4.1969 7.2880 C 0 0 0 0 0 0 0 0 0 0 0 0
42 | 36.2636 4.9971 7.6826 C 0 0 0 0 0 0 0 0 0 0 0 0
43 | 25.7958 5.7810 19.4067 H 0 0 0 0 0 0 0 0 0 0 0 0
44 | 26.3148 4.6169 15.2999 H 0 0 0 0 0 0 0 0 0 0 0 0
45 | 29.2986 6.9557 17.2660 H 0 0 0 0 0 0 0 0 0 0 0 0
46 | 30.1652 5.1352 16.1720 H 0 0 0 0 0 0 0 0 0 0 0 0
47 | 30.8093 6.2738 13.3865 H 0 0 0 0 0 0 0 0 0 0 0 0
48 | 32.8490 5.9165 15.6569 H 0 0 0 0 0 0 0 0 0 0 0 0
49 | 32.9940 6.9252 14.1777 H 0 0 0 0 0 0 0 0 0 0 0 0
50 | 30.8225 7.3438 16.1060 H 0 0 0 0 0 0 0 0 0 0 0 0
51 | 32.9033 7.2138 17.3764 H 0 0 0 0 0 0 0 0 0 0 0 0
52 | 33.4002 8.7179 16.5295 H 0 0 0 0 0 0 0 0 0 0 0 0
53 | 32.2194 9.9220 17.9885 H 0 0 0 0 0 0 0 0 0 0 0 0
54 | 30.7141 9.0833 17.4812 H 0 0 0 0 0 0 0 0 0 0 0 0
55 | 30.9772 7.5406 19.2916 H 0 0 0 0 0 0 0 0 0 0 0 0
56 | 32.4617 8.4118 19.8053 H 0 0 0 0 0 0 0 0 0 0 0 0
57 | 31.2407 9.9882 20.8646 H 0 0 0 0 0 0 0 0 0 0 0 0
58 | 30.2527 10.2947 19.3962 H 0 0 0 0 0 0 0 0 0 0 0 0
59 | 29.7308 8.0964 21.4529 H 0 0 0 0 0 0 0 0 0 0 0 0
60 | 28.8772 9.6737 21.3534 H 0 0 0 0 0 0 0 0 0 0 0 0
61 | 30.9754 8.6277 13.8849 H 0 0 0 0 0 0 0 0 0 0 0 0
62 | 32.6285 9.1711 14.3307 H 0 0 0 0 0 0 0 0 0 0 0 0
63 | 31.2086 9.7589 15.2607 H 0 0 0 0 0 0 0 0 0 0 0 0
64 | 28.0295 10.2785 19.1472 H 0 0 0 0 0 0 0 0 0 0 0 0
65 | 27.3787 8.9640 18.1102 H 0 0 0 0 0 0 0 0 0 0 0 0
66 | 25.7077 8.4058 19.8661 H 0 0 0 0 0 0 0 0 0 0 0 0
67 | 25.5624 10.1184 19.3440 H 0 0 0 0 0 0 0 0 0 0 0 0
68 | 26.3589 9.7195 20.9038 H 0 0 0 0 0 0 0 0 0 0 0 0
69 | 24.3476 5.4318 15.5392 H 0 0 0 0 0 0 0 0 0 0 0 0
70 | 24.5257 3.6475 15.4324 H 0 0 0 0 0 0 0 0 0 0 0 0
71 | 22.1522 4.2293 15.1491 H 0 0 0 0 0 0 0 0 0 0 0 0
72 | 22.1721 5.1941 16.6641 H 0 0 0 0 0 0 0 0 0 0 0 0
73 | 22.3643 3.4095 16.7332 H 0 0 0 0 0 0 0 0 0 0 0 0
74 | 32.6216 4.4157 13.3765 H 0 0 0 0 0 0 0 0 0 0 0 0
75 | 32.9038 3.6816 15.6277 H 0 0 0 0 0 0 0 0 0 0 0 0
76 | 30.1026 2.8145 13.7796 H 0 0 0 0 0 0 0 0 0 0 0 0
77 | 29.8526 4.2409 12.7169 H 0 0 0 0 0 0 0 0 0 0 0 0
78 | 32.0982 2.1846 12.3849 H 0 0 0 0 0 0 0 0 0 0 0 0
79 | 30.7877 1.3786 10.4039 H 0 0 0 0 0 0 0 0 0 0 0 0
80 | 29.4784 2.5408 10.8059 H 0 0 0 0 0 0 0 0 0 0 0 0
81 | 29.8497 1.1844 11.9235 H 0 0 0 0 0 0 0 0 0 0 0 0
82 | 33.6657 2.7302 10.9684 H 0 0 0 0 0 0 0 0 0 0 0 0
83 | 32.8970 4.8943 8.9284 H 0 0 0 0 0 0 0 0 0 0 0 0
84 | 34.3873 5.0738 9.9152 H 0 0 0 0 0 0 0 0 0 0 0 0
85 | 35.2706 2.9557 9.0410 H 0 0 0 0 0 0 0 0 0 0 0 0
86 | 33.7447 2.6560 8.1419 H 0 0 0 0 0 0 0 0 0 0 0 0
87 | 35.2699 3.4787 6.4991 H 0 0 0 0 0 0 0 0 0 0 0 0
88 | 34.2437 4.8957 6.9058 H 0 0 0 0 0 0 0 0 0 0 0 0
89 | 36.0008 5.7418 8.4482 H 0 0 0 0 0 0 0 0 0 0 0 0
90 | 36.6653 5.5090 6.7957 H 0 0 0 0 0 0 0 0 0 0 0 0
91 | 37.0228 4.3100 8.0846 H 0 0 0 0 0 0 0 0 0 0 0 0
92 | 1 2 2 0 0 0 0
93 | 1 4 1 0 0 0 0
94 | 1 24 1 0 0 0 0
95 | 2 3 1 0 0 0 0
96 | 2 39 1 0 0 0 0
97 | 3 6 2 0 0 0 0
98 | 3 19 1 0 0 0 0
99 | 4 5 2 0 0 0 0
100 | 4 40 1 0 0 0 0
101 | 5 6 1 0 0 0 0
102 | 5 7 1 0 0 0 0
103 | 6 41 1 0 0 0 0
104 | 7 8 2 0 0 0 0
105 | 7 9 1 0 0 0 0
106 | 9 10 1 0 0 0 0
107 | 9 42 1 0 0 0 0
108 | 10 11 1 0 0 0 0
109 | 10 27 1 0 0 0 0
110 | 10 43 1 0 0 0 0
111 | 11 12 1 0 0 0 0
112 | 11 44 1 0 0 0 0
113 | 11 45 1 0 0 0 0
114 | 12 13 1 0 0 0 0
115 | 12 21 1 0 0 0 0
116 | 12 46 1 0 0 0 0
117 | 13 14 1 0 0 0 0
118 | 13 47 1 0 0 0 0
119 | 13 48 1 0 0 0 0
120 | 14 15 1 0 0 0 0
121 | 14 49 1 0 0 0 0
122 | 14 50 1 0 0 0 0
123 | 15 16 1 0 0 0 0
124 | 15 51 1 0 0 0 0
125 | 15 52 1 0 0 0 0
126 | 16 17 1 0 0 0 0
127 | 16 53 1 0 0 0 0
128 | 16 54 1 0 0 0 0
129 | 17 18 1 0 0 0 0
130 | 17 55 1 0 0 0 0
131 | 17 56 1 0 0 0 0
132 | 18 19 1 0 0 0 0
133 | 18 22 1 0 0 0 0
134 | 19 20 2 0 0 0 0
135 | 21 57 1 0 0 0 0
136 | 21 58 1 0 0 0 0
137 | 21 59 1 0 0 0 0
138 | 22 23 1 0 0 0 0
139 | 22 60 1 0 0 0 0
140 | 22 61 1 0 0 0 0
141 | 23 62 1 0 0 0 0
142 | 23 63 1 0 0 0 0
143 | 23 64 1 0 0 0 0
144 | 24 25 1 0 0 0 0
145 | 25 26 1 0 0 0 0
146 | 25 65 1 0 0 0 0
147 | 25 66 1 0 0 0 0
148 | 26 67 1 0 0 0 0
149 | 26 68 1 0 0 0 0
150 | 26 69 1 0 0 0 0
151 | 27 28 1 0 0 0 0
152 | 27 29 1 0 0 0 0
153 | 27 70 1 0 0 0 0
154 | 28 71 1 0 0 0 0
155 | 29 30 1 0 0 0 0
156 | 29 72 1 0 0 0 0
157 | 29 73 1 0 0 0 0
158 | 30 31 1 0 0 0 0
159 | 30 32 1 0 0 0 0
160 | 30 74 1 0 0 0 0
161 | 31 75 1 0 0 0 0
162 | 31 76 1 0 0 0 0
163 | 31 77 1 0 0 0 0
164 | 32 33 2 0 0 0 0
165 | 32 34 1 0 0 0 0
166 | 34 35 1 0 0 0 0
167 | 34 78 1 0 0 0 0
168 | 35 36 1 0 0 0 0
169 | 35 79 1 0 0 0 0
170 | 35 80 1 0 0 0 0
171 | 36 37 1 0 0 0 0
172 | 36 81 1 0 0 0 0
173 | 36 82 1 0 0 0 0
174 | 37 38 1 0 0 0 0
175 | 37 83 1 0 0 0 0
176 | 37 84 1 0 0 0 0
177 | 38 85 1 0 0 0 0
178 | 38 86 1 0 0 0 0
179 | 38 87 1 0 0 0 0
180 | M END
181 | $$$$
182 |
--------------------------------------------------------------------------------
/dockprep/example_files/3K5C-BACE_4.mol:
--------------------------------------------------------------------------------
1 | REMARK score -96.38
2 | LCcorina 10041815553D 1 1.00000 0.00000 0
3 | CORINA 4.00 0026 26.04.2017
4 | 90 92 0 0 0 0 999 V2000
5 | 31.5509 4.5957 14.3313 C 0 0 0 0 0 0 0 0 0 0 0 0
6 | 32.7807 4.7940 13.6214 O 0 0 0 0 0 0 0 0 0 0 0 0
7 | 30.6497 3.7570 13.3907 C 0 0 0 0 0 0 0 0 0 0 0 0
8 | 31.2278 2.4219 13.1292 N 0 3 0 0 0 0 0 0 0 0 0 0
9 | 30.5584 1.7491 12.0021 C 0 0 0 0 0 0 0 0 0 0 0 0
10 | 30.8676 0.2413 11.9094 C 0 0 0 0 0 0 0 0 0 0 0 0
11 | 29.4667 0.7100 12.3551 C 0 0 0 0 0 0 0 0 0 0 0 0
12 | 30.5108 2.4645 10.6461 C 0 0 0 0 0 0 0 0 0 0 0 0
13 | 31.7085 2.6465 9.9184 C 0 0 0 0 0 0 0 0 0 0 0 0
14 | 31.7030 3.3047 8.6594 C 0 0 0 0 0 0 0 0 0 0 0 0
15 | 30.4694 3.7778 8.1475 C 0 0 0 0 0 0 0 0 0 0 0 0
16 | 29.2648 3.6033 8.8686 C 0 0 0 0 0 0 0 0 0 0 0 0
17 | 29.2854 2.9457 10.1183 C 0 0 0 0 0 0 0 0 0 0 0 0
18 | 33.0142 3.4801 7.9025 C 0 0 0 0 0 0 0 0 0 0 0 0
19 | 33.3417 2.1948 7.1492 C 0 0 0 0 0 0 0 0 0 0 0 0
20 | 32.8769 4.6287 6.9086 C 0 0 0 0 0 0 0 0 0 0 0 0
21 | 34.1337 3.7914 8.8885 C 0 0 0 0 0 0 0 0 0 0 0 0
22 | 27.4020 6.6138 17.1724 N 0 0 0 0 0 0 0 0 0 0 0 0
23 | 27.4209 5.4691 16.2173 C 0 0 0 0 0 0 0 0 0 0 0 0
24 | 28.4932 5.6289 15.1339 C 0 0 0 0 0 0 0 0 0 0 0 0
25 | 29.7488 5.7284 15.5740 N 0 0 0 0 0 0 0 0 0 0 0 0
26 | 30.9009 5.9373 14.6885 C 0 0 0 0 0 0 0 0 0 0 0 0
27 | 31.9152 6.8807 15.3686 C 0 0 0 0 0 0 0 0 0 0 0 0
28 | 31.2207 8.2337 15.8118 C 0 0 0 0 0 0 0 0 0 0 0 0
29 | 32.3550 9.0923 16.4012 C 0 0 0 0 0 0 0 0 0 0 0 0
30 | 31.8626 10.3916 17.0956 C 0 0 0 0 0 0 0 0 0 0 0 0
31 | 30.8932 10.1571 18.2658 C 0 0 0 0 0 0 0 0 0 0 0 0
32 | 31.5282 9.2143 19.3473 C 0 0 0 0 0 0 0 0 0 0 0 0
33 | 30.7066 9.3458 20.6753 C 0 0 0 0 0 0 0 0 0 0 0 0
34 | 29.1764 9.1329 20.4634 C 0 0 0 0 0 0 0 0 0 0 0 0
35 | 28.8836 7.9727 19.5313 O 0 0 0 0 0 0 0 0 0 0 0 0
36 | 27.4627 7.5107 19.5111 C 0 0 0 0 0 0 0 0 0 0 0 0
37 | 27.3265 6.3585 18.5102 C 0 0 0 0 0 0 0 0 0 0 0 0
38 | 27.1593 5.2242 18.9600 O 0 0 0 0 0 0 0 0 0 0 0 0
39 | 30.5498 8.9660 14.5629 C 0 0 0 0 0 0 0 0 0 0 0 0
40 | 28.1796 5.7332 13.9498 O 0 0 0 0 0 0 0 0 0 0 0 0
41 | 26.0452 5.2304 15.5522 C 0 0 0 0 0 0 0 0 0 0 0 0
42 | 27.4572 7.9830 16.5957 C 0 0 0 0 0 0 0 0 0 0 0 0
43 | 31.7856 4.0836 15.2760 H 0 0 0 0 0 0 0 0 0 0 0 0
44 | 33.7064 5.0105 14.1747 H 0 0 0 0 0 0 0 0 0 0 0 0
45 | 29.6613 3.6221 13.8541 H 0 0 0 0 0 0 0 0 0 0 0 0
46 | 30.5358 4.2798 12.4296 H 0 0 0 0 0 0 0 0 0 0 0 0
47 | 32.2955 2.5260 12.8853 H 0 0 0 0 0 0 0 0 0 0 0 0
48 | 31.1507 -0.0122 10.8771 H 0 0 0 0 0 0 0 0 0 0 0 0
49 | 31.6974 -0.0053 12.5881 H 0 0 0 0 0 0 0 0 0 0 0 0
50 | 29.1494 0.8516 13.3988 H 0 0 0 0 0 0 0 0 0 0 0 0
51 | 28.6027 0.8447 11.6878 H 0 0 0 0 0 0 0 0 0 0 0 0
52 | 32.6438 2.2833 10.3181 H 0 0 0 0 0 0 0 0 0 0 0 0
53 | 30.4383 4.2808 7.1923 H 0 0 0 0 0 0 0 0 0 0 0 0
54 | 28.3344 3.9723 8.4629 H 0 0 0 0 0 0 0 0 0 0 0 0
55 | 28.3683 2.8092 10.6721 H 0 0 0 0 0 0 0 0 0 0 0 0
56 | 34.2511 2.3435 6.5484 H 0 0 0 0 0 0 0 0 0 0 0 0
57 | 32.5031 1.9342 6.4867 H 0 0 0 0 0 0 0 0 0 0 0 0
58 | 33.5072 1.3798 7.8692 H 0 0 0 0 0 0 0 0 0 0 0 0
59 | 32.2552 4.3071 6.0601 H 0 0 0 0 0 0 0 0 0 0 0 0
60 | 33.8732 4.9191 6.5439 H 0 0 0 0 0 0 0 0 0 0 0 0
61 | 32.4033 5.4886 7.4051 H 0 0 0 0 0 0 0 0 0 0 0 0
62 | 33.7622 4.4840 9.6581 H 0 0 0 0 0 0 0 0 0 0 0 0
63 | 34.9753 4.2551 8.3532 H 0 0 0 0 0 0 0 0 0 0 0 0
64 | 34.4713 2.8597 9.3659 H 0 0 0 0 0 0 0 0 0 0 0 0
65 | 27.6802 4.6115 16.8556 H 0 0 0 0 0 0 0 0 0 0 0 0
66 | 29.8982 5.6458 16.6607 H 0 0 0 0 0 0 0 0 0 0 0 0
67 | 30.5112 6.3992 13.7693 H 0 0 0 0 0 0 0 0 0 0 0 0
68 | 32.3319 6.3896 16.2603 H 0 0 0 0 0 0 0 0 0 0 0 0
69 | 32.7279 7.1139 14.6649 H 0 0 0 0 0 0 0 0 0 0 0 0
70 | 30.4363 8.0695 16.5653 H 0 0 0 0 0 0 0 0 0 0 0 0
71 | 32.9038 8.5078 17.1543 H 0 0 0 0 0 0 0 0 0 0 0 0
72 | 33.0431 9.3927 15.5974 H 0 0 0 0 0 0 0 0 0 0 0 0
73 | 32.7258 10.9414 17.4990 H 0 0 0 0 0 0 0 0 0 0 0 0
74 | 31.3360 11.0216 16.3637 H 0 0 0 0 0 0 0 0 0 0 0 0
75 | 30.6642 11.1177 18.7505 H 0 0 0 0 0 0 0 0 0 0 0 0
76 | 29.9636 9.7068 17.8873 H 0 0 0 0 0 0 0 0 0 0 0 0
77 | 31.4961 8.1701 19.0030 H 0 0 0 0 0 0 0 0 0 0 0 0
78 | 32.5718 9.5062 19.5362 H 0 0 0 0 0 0 0 0 0 0 0 0
79 | 31.0537 8.5921 21.3974 H 0 0 0 0 0 0 0 0 0 0 0 0
80 | 30.8504 10.3514 21.0975 H 0 0 0 0 0 0 0 0 0 0 0 0
81 | 28.6994 8.9133 21.4301 H 0 0 0 0 0 0 0 0 0 0 0 0
82 | 28.7349 10.0452 20.0358 H 0 0 0 0 0 0 0 0 0 0 0 0
83 | 27.1750 7.1657 20.5151 H 0 0 0 0 0 0 0 0 0 0 0 0
84 | 26.8106 8.3434 19.2088 H 0 0 0 0 0 0 0 0 0 0 0 0
85 | 29.6961 8.3720 14.2047 H 0 0 0 0 0 0 0 0 0 0 0 0
86 | 31.2904 9.0672 13.7560 H 0 0 0 0 0 0 0 0 0 0 0 0
87 | 30.2004 9.9637 14.8673 H 0 0 0 0 0 0 0 0 0 0 0 0
88 | 26.1118 4.3695 14.8707 H 0 0 0 0 0 0 0 0 0 0 0 0
89 | 25.7523 6.1258 14.9842 H 0 0 0 0 0 0 0 0 0 0 0 0
90 | 25.2931 5.0263 16.3285 H 0 0 0 0 0 0 0 0 0 0 0 0
91 | 28.2099 8.5775 17.1341 H 0 0 0 0 0 0 0 0 0 0 0 0
92 | 26.4721 8.4624 16.6945 H 0 0 0 0 0 0 0 0 0 0 0 0
93 | 27.7302 7.9221 15.5319 H 0 0 0 0 0 0 0 0 0 0 0 0
94 | 31.1322 1.8524 13.9578 H 0 0 0 0 0 0 0 0 0 0 0 0
95 | 1 2 1 0 0 0 0
96 | 1 3 1 0 0 0 0
97 | 1 22 1 0 0 0 0
98 | 1 39 1 0 0 0 0
99 | 2 40 1 0 0 0 0
100 | 3 4 1 0 0 0 0
101 | 3 41 1 0 0 0 0
102 | 3 42 1 0 0 0 0
103 | 4 5 1 0 0 0 0
104 | 4 43 1 0 0 0 0
105 | 4 90 1 0 0 0 0
106 | 5 6 1 0 0 0 0
107 | 5 7 1 0 0 0 0
108 | 5 8 1 0 0 0 0
109 | 6 7 1 0 0 0 0
110 | 6 44 1 0 0 0 0
111 | 6 45 1 0 0 0 0
112 | 7 46 1 0 0 0 0
113 | 7 47 1 0 0 0 0
114 | 8 9 2 0 0 0 0
115 | 8 13 1 0 0 0 0
116 | 9 10 1 0 0 0 0
117 | 9 48 1 0 0 0 0
118 | 10 11 2 0 0 0 0
119 | 10 14 1 0 0 0 0
120 | 11 12 1 0 0 0 0
121 | 11 49 1 0 0 0 0
122 | 12 13 2 0 0 0 0
123 | 12 50 1 0 0 0 0
124 | 13 51 1 0 0 0 0
125 | 14 15 1 0 0 0 0
126 | 14 16 1 0 0 0 0
127 | 14 17 1 0 0 0 0
128 | 15 52 1 0 0 0 0
129 | 15 53 1 0 0 0 0
130 | 15 54 1 0 0 0 0
131 | 16 55 1 0 0 0 0
132 | 16 56 1 0 0 0 0
133 | 16 57 1 0 0 0 0
134 | 17 58 1 0 0 0 0
135 | 17 59 1 0 0 0 0
136 | 17 60 1 0 0 0 0
137 | 18 19 1 0 0 0 0
138 | 18 33 1 0 0 0 0
139 | 18 38 1 0 0 0 0
140 | 19 20 1 0 0 0 0
141 | 19 37 1 0 0 0 0
142 | 19 61 1 0 0 0 0
143 | 20 21 1 0 0 0 0
144 | 20 36 2 0 0 0 0
145 | 21 22 1 0 0 0 0
146 | 21 62 1 0 0 0 0
147 | 22 23 1 0 0 0 0
148 | 22 63 1 0 0 0 0
149 | 23 24 1 0 0 0 0
150 | 23 64 1 0 0 0 0
151 | 23 65 1 0 0 0 0
152 | 24 25 1 0 0 0 0
153 | 24 35 1 0 0 0 0
154 | 24 66 1 0 0 0 0
155 | 25 26 1 0 0 0 0
156 | 25 67 1 0 0 0 0
157 | 25 68 1 0 0 0 0
158 | 26 27 1 0 0 0 0
159 | 26 69 1 0 0 0 0
160 | 26 70 1 0 0 0 0
161 | 27 28 1 0 0 0 0
162 | 27 71 1 0 0 0 0
163 | 27 72 1 0 0 0 0
164 | 28 29 1 0 0 0 0
165 | 28 73 1 0 0 0 0
166 | 28 74 1 0 0 0 0
167 | 29 30 1 0 0 0 0
168 | 29 75 1 0 0 0 0
169 | 29 76 1 0 0 0 0
170 | 30 31 1 0 0 0 0
171 | 30 77 1 0 0 0 0
172 | 30 78 1 0 0 0 0
173 | 31 32 1 0 0 0 0
174 | 32 33 1 0 0 0 0
175 | 32 79 1 0 0 0 0
176 | 32 80 1 0 0 0 0
177 | 33 34 2 0 0 0 0
178 | 35 81 1 0 0 0 0
179 | 35 82 1 0 0 0 0
180 | 35 83 1 0 0 0 0
181 | 37 84 1 0 0 0 0
182 | 37 85 1 0 0 0 0
183 | 37 86 1 0 0 0 0
184 | 38 87 1 0 0 0 0
185 | 38 88 1 0 0 0 0
186 | 38 89 1 0 0 0 0
187 | M CHG 1 4 1
188 | M END
189 | $$$$
190 |
--------------------------------------------------------------------------------
/dockprep/example_files/3K5C-BACE_5.mol:
--------------------------------------------------------------------------------
1 | REMARK score -91.46
2 | LCcorina 10041815563D 1 1.00000 0.00000 0
3 | CORINA 4.00 0026 26.04.2017
4 | 80 81 0 0 0 0 999 V2000
5 | 33.9967 5.3581 9.1859 C 0 0 0 0 0 0 0 0 0 0 0 0
6 | 32.8799 5.6936 8.2105 C 0 0 0 0 0 0 0 0 0 0 0 0
7 | 32.0881 4.4469 7.8151 C 0 0 0 0 0 0 0 0 0 0 0 0
8 | 31.4016 3.8418 8.9881 C 0 0 0 0 0 0 0 0 0 0 0 0
9 | 31.7937 2.8679 9.8802 C 0 0 0 0 0 0 0 0 0 0 0 0
10 | 30.7061 2.7299 10.7620 C 0 0 0 0 0 0 0 0 0 0 0 0
11 | 29.7002 3.5512 10.4594 N 0 0 0 0 0 0 0 0 0 0 0 0
12 | 30.1457 4.2167 9.3799 N 0 0 0 0 0 0 0 0 0 0 0 0
13 | 31.5630 4.6126 14.3519 C 0 0 0 0 0 0 0 0 0 0 0 0
14 | 32.7624 4.8321 13.5411 O 0 0 0 0 0 0 0 0 0 0 0 0
15 | 30.5960 3.8175 13.4791 C 0 0 0 0 0 0 0 0 0 0 0 0
16 | 31.1683 2.4998 13.1023 N 0 3 0 0 0 0 0 0 0 0 0 0
17 | 30.5638 1.7848 11.9697 C 0 0 0 0 0 0 0 0 0 0 0 0
18 | 27.4233 6.6411 17.1690 N 0 0 0 0 0 0 0 0 0 0 0 0
19 | 27.4267 5.5062 16.2045 C 0 0 0 0 0 0 0 0 0 0 0 0
20 | 28.5014 5.6828 15.1314 C 0 0 0 0 0 0 0 0 0 0 0 0
21 | 29.7568 5.7719 15.5819 N 0 0 0 0 0 0 0 0 0 0 0 0
22 | 30.9180 5.9608 14.7021 C 0 0 0 0 0 0 0 0 0 0 0 0
23 | 31.9295 6.8994 15.3838 C 0 0 0 0 0 0 0 0 0 0 0 0
24 | 31.2379 8.2566 15.8178 C 0 0 0 0 0 0 0 0 0 0 0 0
25 | 32.3716 9.1128 16.4117 C 0 0 0 0 0 0 0 0 0 0 0 0
26 | 31.8799 10.4167 17.0980 C 0 0 0 0 0 0 0 0 0 0 0 0
27 | 30.9012 10.1906 18.2619 C 0 0 0 0 0 0 0 0 0 0 0 0
28 | 31.5243 9.2490 19.3514 C 0 0 0 0 0 0 0 0 0 0 0 0
29 | 30.6938 9.3887 20.6730 C 0 0 0 0 0 0 0 0 0 0 0 0
30 | 29.1641 9.1817 20.4508 C 0 0 0 0 0 0 0 0 0 0 0 0
31 | 28.8733 8.0075 19.5340 O 0 0 0 0 0 0 0 0 0 0 0 0
32 | 27.4498 7.5562 19.5029 C 0 0 0 0 0 0 0 0 0 0 0 0
33 | 27.3118 6.3962 18.5036 C 0 0 0 0 0 0 0 0 0 0 0 0
34 | 27.1061 5.2673 18.9530 O 0 0 0 0 0 0 0 0 0 0 0 0
35 | 30.5782 8.9874 14.5619 C 0 0 0 0 0 0 0 0 0 0 0 0
36 | 28.1941 5.7979 13.9463 O 0 0 0 0 0 0 0 0 0 0 0 0
37 | 26.0514 5.2752 15.5310 C 0 0 0 0 0 0 0 0 0 0 0 0
38 | 27.5762 8.0104 16.6242 C 0 0 0 0 0 0 0 0 0 0 0 0
39 | 33.5650 4.9465 10.1102 H 0 0 0 0 0 0 0 0 0 0 0 0
40 | 34.5646 6.2703 9.4210 H 0 0 0 0 0 0 0 0 0 0 0 0
41 | 34.6686 4.6145 8.7323 H 0 0 0 0 0 0 0 0 0 0 0 0
42 | 32.1891 6.4114 8.6769 H 0 0 0 0 0 0 0 0 0 0 0 0
43 | 33.3092 6.1368 7.2998 H 0 0 0 0 0 0 0 0 0 0 0 0
44 | 31.3274 4.7174 7.0680 H 0 0 0 0 0 0 0 0 0 0 0 0
45 | 32.7718 3.6985 7.3878 H 0 0 0 0 0 0 0 0 0 0 0 0
46 | 32.6116 2.3061 10.0035 H 0 0 0 0 0 0 0 0 0 0 0 0
47 | 28.7849 3.5478 11.0695 H 0 0 0 0 0 0 0 0 0 0 0 0
48 | 31.8474 4.0800 15.2713 H 0 0 0 0 0 0 0 0 0 0 0 0
49 | 33.7108 5.1083 14.0254 H 0 0 0 0 0 0 0 0 0 0 0 0
50 | 29.6595 3.6475 14.0305 H 0 0 0 0 0 0 0 0 0 0 0 0
51 | 30.3829 4.3822 12.5596 H 0 0 0 0 0 0 0 0 0 0 0 0
52 | 32.2268 2.6240 12.8302 H 0 0 0 0 0 0 0 0 0 0 0 0
53 | 31.1005 0.8389 11.8048 H 0 0 0 0 0 0 0 0 0 0 0 0
54 | 29.5074 1.5730 12.1915 H 0 0 0 0 0 0 0 0 0 0 0 0
55 | 27.6713 4.6373 16.8333 H 0 0 0 0 0 0 0 0 0 0 0 0
56 | 29.8978 5.6966 16.6703 H 0 0 0 0 0 0 0 0 0 0 0 0
57 | 30.5394 6.4181 13.7760 H 0 0 0 0 0 0 0 0 0 0 0 0
58 | 32.3383 6.4094 16.2798 H 0 0 0 0 0 0 0 0 0 0 0 0
59 | 32.7479 7.1266 14.6847 H 0 0 0 0 0 0 0 0 0 0 0 0
60 | 30.4482 8.0983 16.5668 H 0 0 0 0 0 0 0 0 0 0 0 0
61 | 32.9129 8.5289 17.1707 H 0 0 0 0 0 0 0 0 0 0 0 0
62 | 33.0665 9.4074 15.6115 H 0 0 0 0 0 0 0 0 0 0 0 0
63 | 32.7425 10.9643 17.5055 H 0 0 0 0 0 0 0 0 0 0 0 0
64 | 31.3613 11.0464 16.3600 H 0 0 0 0 0 0 0 0 0 0 0 0
65 | 30.6729 11.1539 18.7414 H 0 0 0 0 0 0 0 0 0 0 0 0
66 | 29.9725 9.7428 17.8784 H 0 0 0 0 0 0 0 0 0 0 0 0
67 | 31.4903 8.2037 19.0106 H 0 0 0 0 0 0 0 0 0 0 0 0
68 | 32.5678 9.5370 19.5468 H 0 0 0 0 0 0 0 0 0 0 0 0
69 | 31.0323 8.6360 21.4002 H 0 0 0 0 0 0 0 0 0 0 0 0
70 | 30.8388 10.3951 21.0926 H 0 0 0 0 0 0 0 0 0 0 0 0
71 | 28.6774 8.9801 21.4165 H 0 0 0 0 0 0 0 0 0 0 0 0
72 | 28.7321 10.0897 20.0050 H 0 0 0 0 0 0 0 0 0 0 0 0
73 | 27.1504 7.2181 20.5059 H 0 0 0 0 0 0 0 0 0 0 0 0
74 | 26.8069 8.3922 19.1906 H 0 0 0 0 0 0 0 0 0 0 0 0
75 | 29.7472 8.3775 14.1778 H 0 0 0 0 0 0 0 0 0 0 0 0
76 | 31.3343 9.1153 13.7732 H 0 0 0 0 0 0 0 0 0 0 0 0
77 | 30.1984 9.9729 14.8691 H 0 0 0 0 0 0 0 0 0 0 0 0
78 | 26.1184 4.4176 14.8453 H 0 0 0 0 0 0 0 0 0 0 0 0
79 | 25.7639 6.1744 14.9666 H 0 0 0 0 0 0 0 0 0 0 0 0
80 | 25.2953 5.0693 16.3029 H 0 0 0 0 0 0 0 0 0 0 0 0
81 | 27.9578 7.9546 15.5940 H 0 0 0 0 0 0 0 0 0 0 0 0
82 | 28.2844 8.5761 17.2474 H 0 0 0 0 0 0 0 0 0 0 0 0
83 | 26.5999 8.5171 16.6266 H 0 0 0 0 0 0 0 0 0 0 0 0
84 | 31.1229 1.8972 13.9116 H 0 0 0 0 0 0 0 0 0 0 0 0
85 | 1 2 1 0 0 0 0
86 | 1 35 1 0 0 0 0
87 | 1 36 1 0 0 0 0
88 | 1 37 1 0 0 0 0
89 | 2 3 1 0 0 0 0
90 | 2 38 1 0 0 0 0
91 | 2 39 1 0 0 0 0
92 | 3 40 1 0 0 0 0
93 | 3 41 1 0 0 0 0
94 | 3 4 1 0 0 0 0
95 | 4 8 2 0 0 0 0
96 | 4 5 1 0 0 0 0
97 | 5 6 2 0 0 0 0
98 | 5 42 1 0 0 0 0
99 | 6 13 1 0 0 0 0
100 | 6 7 1 0 0 0 0
101 | 7 43 1 0 0 0 0
102 | 7 8 1 0 0 0 0
103 | 9 10 1 0 0 0 0
104 | 9 11 1 0 0 0 0
105 | 9 18 1 0 0 0 0
106 | 9 44 1 0 0 0 0
107 | 10 45 1 0 0 0 0
108 | 11 12 1 0 0 0 0
109 | 11 46 1 0 0 0 0
110 | 11 47 1 0 0 0 0
111 | 12 13 1 0 0 0 0
112 | 12 48 1 0 0 0 0
113 | 12 80 1 0 0 0 0
114 | 13 49 1 0 0 0 0
115 | 13 50 1 0 0 0 0
116 | 14 15 1 0 0 0 0
117 | 14 29 1 0 0 0 0
118 | 14 34 1 0 0 0 0
119 | 15 16 1 0 0 0 0
120 | 15 33 1 0 0 0 0
121 | 15 51 1 0 0 0 0
122 | 16 17 1 0 0 0 0
123 | 16 32 2 0 0 0 0
124 | 17 18 1 0 0 0 0
125 | 17 52 1 0 0 0 0
126 | 18 19 1 0 0 0 0
127 | 18 53 1 0 0 0 0
128 | 19 20 1 0 0 0 0
129 | 19 54 1 0 0 0 0
130 | 19 55 1 0 0 0 0
131 | 20 21 1 0 0 0 0
132 | 20 31 1 0 0 0 0
133 | 20 56 1 0 0 0 0
134 | 21 22 1 0 0 0 0
135 | 21 57 1 0 0 0 0
136 | 21 58 1 0 0 0 0
137 | 22 23 1 0 0 0 0
138 | 22 59 1 0 0 0 0
139 | 22 60 1 0 0 0 0
140 | 23 24 1 0 0 0 0
141 | 23 61 1 0 0 0 0
142 | 23 62 1 0 0 0 0
143 | 24 25 1 0 0 0 0
144 | 24 63 1 0 0 0 0
145 | 24 64 1 0 0 0 0
146 | 25 26 1 0 0 0 0
147 | 25 65 1 0 0 0 0
148 | 25 66 1 0 0 0 0
149 | 26 27 1 0 0 0 0
150 | 26 67 1 0 0 0 0
151 | 26 68 1 0 0 0 0
152 | 27 28 1 0 0 0 0
153 | 28 29 1 0 0 0 0
154 | 28 69 1 0 0 0 0
155 | 28 70 1 0 0 0 0
156 | 29 30 2 0 0 0 0
157 | 31 71 1 0 0 0 0
158 | 31 72 1 0 0 0 0
159 | 31 73 1 0 0 0 0
160 | 33 74 1 0 0 0 0
161 | 33 75 1 0 0 0 0
162 | 33 76 1 0 0 0 0
163 | 34 77 1 0 0 0 0
164 | 34 78 1 0 0 0 0
165 | 34 79 1 0 0 0 0
166 | M CHG 1 12 1
167 | M END
168 | $$$$
169 |
--------------------------------------------------------------------------------
/dockprep/example_files/3K5C-BACE_6.mol:
--------------------------------------------------------------------------------
1 | REMARK score -93.89
2 | LCcorina 10041815563D 1 1.00000 0.00000 0
3 | CORINA 4.00 0026 26.04.2017
4 | 83 85 0 0 0 0 999 V2000
5 | 26.0389 5.0980 17.1262 C 0 0 0 0 0 0 0 0 0 0 0 0
6 | 26.4167 5.7876 18.2802 C 0 0 0 0 0 0 0 0 0 0 0 0
7 | 27.6637 6.4053 18.3411 C 0 0 0 0 0 0 0 0 0 0 0 0
8 | 26.8569 5.1146 15.9818 C 0 0 0 0 0 0 0 0 0 0 0 0
9 | 28.0575 5.8109 16.0184 C 0 0 0 0 0 0 0 0 0 0 0 0
10 | 28.4163 6.4994 17.1827 C 0 0 0 0 0 0 0 0 0 0 0 0
11 | 28.9594 5.7943 14.8234 C 0 0 0 0 0 0 0 0 0 0 0 0
12 | 28.5541 5.9358 13.6745 O 0 0 0 0 0 0 0 0 0 0 0 0
13 | 30.2347 5.6239 15.1551 N 0 0 0 0 0 0 0 0 0 0 0 0
14 | 31.4131 5.8430 14.3108 C 0 0 0 0 0 0 0 0 0 0 0 0
15 | 31.9673 4.4540 13.9943 C 0 0 0 0 0 0 0 0 0 0 0 0
16 | 31.0226 3.6874 13.0758 C 0 0 0 0 0 0 0 0 0 0 0 0
17 | 31.4347 2.3120 12.8649 N 0 3 0 0 0 0 0 0 0 0 0 0
18 | 30.7489 1.5429 11.8351 C 0 0 0 0 0 0 0 0 0 0 0 0
19 | 30.5484 2.3291 10.5546 C 0 0 0 0 0 0 0 0 0 0 0 0
20 | 31.6454 2.8088 9.8772 C 0 0 0 0 0 0 0 0 0 0 0 0
21 | 31.5162 3.5349 8.7105 C 0 0 0 0 0 0 0 0 0 0 0 0
22 | 32.7653 4.0494 8.0830 C 0 0 0 0 0 0 0 0 0 0 0 0
23 | 33.3879 5.1291 8.9235 C 0 0 0 0 0 0 0 0 0 0 0 0
24 | 33.7428 2.9263 7.7978 C 0 0 0 0 0 0 0 0 0 0 0 0
25 | 30.2393 3.8019 8.2119 C 0 0 0 0 0 0 0 0 0 0 0 0
26 | 29.1289 3.3147 8.9032 C 0 0 0 0 0 0 0 0 0 0 0 0
27 | 29.2486 2.5630 10.0638 C 0 0 0 0 0 0 0 0 0 0 0 0
28 | 33.2451 4.5721 13.3243 O 0 0 0 0 0 0 0 0 0 0 0 0
29 | 32.4767 6.7299 15.0334 C 0 0 0 0 0 0 0 0 0 0 0 0
30 | 31.8682 7.8722 15.7172 C 0 0 0 0 0 0 0 0 0 0 0 0
31 | 32.5590 8.3203 16.9183 C 0 0 0 0 0 0 0 0 0 0 0 0
32 | 31.6736 9.1391 17.8655 C 0 0 0 0 0 0 0 0 0 0 0 0
33 | 31.3707 8.8351 19.1153 C 0 0 0 0 0 0 0 0 0 0 0 0
34 | 30.4658 9.8158 19.8470 C 0 0 0 0 0 0 0 0 0 0 0 0
35 | 29.3443 9.0273 20.5325 C 0 0 0 0 0 0 0 0 0 0 0 0
36 | 28.5003 8.3110 19.5637 N 0 0 0 0 0 0 0 0 0 0 0 0
37 | 28.2427 6.9808 19.6075 C 0 0 0 0 0 0 0 0 0 0 0 0
38 | 28.5371 6.2545 20.5547 O 0 0 0 0 0 0 0 0 0 0 0 0
39 | 27.5523 9.1761 18.8879 C 0 0 0 0 0 0 0 0 0 0 0 0
40 | 26.2955 9.0524 19.7015 C 0 0 0 0 0 0 0 0 0 0 0 0
41 | 31.7453 9.0484 14.7312 C 0 0 0 0 0 0 0 0 0 0 0 0
42 | 25.1112 4.5454 17.1067 H 0 0 0 0 0 0 0 0 0 0 0 0
43 | 25.7427 5.8409 19.1224 H 0 0 0 0 0 0 0 0 0 0 0 0
44 | 26.5481 4.5900 15.0897 H 0 0 0 0 0 0 0 0 0 0 0 0
45 | 29.3034 7.1152 17.1636 H 0 0 0 0 0 0 0 0 0 0 0 0
46 | 30.3578 5.2786 16.1922 H 0 0 0 0 0 0 0 0 0 0 0 0
47 | 31.0927 6.3688 13.3992 H 0 0 0 0 0 0 0 0 0 0 0 0
48 | 32.1306 3.9068 14.9344 H 0 0 0 0 0 0 0 0 0 0 0 0
49 | 30.0149 3.6720 13.5166 H 0 0 0 0 0 0 0 0 0 0 0 0
50 | 30.9855 4.1810 12.0934 H 0 0 0 0 0 0 0 0 0 0 0 0
51 | 32.4969 2.2870 12.5803 H 0 0 0 0 0 0 0 0 0 0 0 0
52 | 31.3396 0.6474 11.5918 H 0 0 0 0 0 0 0 0 0 0 0 0
53 | 29.7577 1.2394 12.2030 H 0 0 0 0 0 0 0 0 0 0 0 0
54 | 32.6390 2.6201 10.2561 H 0 0 0 0 0 0 0 0 0 0 0 0
55 | 32.4802 4.5027 7.1222 H 0 0 0 0 0 0 0 0 0 0 0 0
56 | 34.2879 5.5123 8.4203 H 0 0 0 0 0 0 0 0 0 0 0 0
57 | 33.6642 4.7162 9.9050 H 0 0 0 0 0 0 0 0 0 0 0 0
58 | 32.6671 5.9488 9.0602 H 0 0 0 0 0 0 0 0 0 0 0 0
59 | 33.1976 2.0583 7.3985 H 0 0 0 0 0 0 0 0 0 0 0 0
60 | 34.2552 2.6410 8.7284 H 0 0 0 0 0 0 0 0 0 0 0 0
61 | 34.4847 3.2646 7.0595 H 0 0 0 0 0 0 0 0 0 0 0 0
62 | 30.1230 4.3772 7.3053 H 0 0 0 0 0 0 0 0 0 0 0 0
63 | 28.1333 3.5190 8.5382 H 0 0 0 0 0 0 0 0 0 0 0 0
64 | 28.3934 2.1624 10.5878 H 0 0 0 0 0 0 0 0 0 0 0 0
65 | 34.1167 4.9662 13.8675 H 0 0 0 0 0 0 0 0 0 0 0 0
66 | 33.0096 6.1262 15.7827 H 0 0 0 0 0 0 0 0 0 0 0 0
67 | 33.1951 7.1156 14.2951 H 0 0 0 0 0 0 0 0 0 0 0 0
68 | 30.9131 7.4810 16.0978 H 0 0 0 0 0 0 0 0 0 0 0 0
69 | 32.9216 7.4469 17.4801 H 0 0 0 0 0 0 0 0 0 0 0 0
70 | 33.4123 8.9543 16.6356 H 0 0 0 0 0 0 0 0 0 0 0 0
71 | 32.1109 10.1389 18.0037 H 0 0 0 0 0 0 0 0 0 0 0 0
72 | 30.6664 9.2359 17.4340 H 0 0 0 0 0 0 0 0 0 0 0 0
73 | 30.8506 7.8661 19.1412 H 0 0 0 0 0 0 0 0 0 0 0 0
74 | 32.2932 8.7721 19.7112 H 0 0 0 0 0 0 0 0 0 0 0 0
75 | 31.0518 10.3644 20.5990 H 0 0 0 0 0 0 0 0 0 0 0 0
76 | 30.0369 10.5272 19.1260 H 0 0 0 0 0 0 0 0 0 0 0 0
77 | 29.7826 8.2873 21.2183 H 0 0 0 0 0 0 0 0 0 0 0 0
78 | 28.7039 9.7188 21.0998 H 0 0 0 0 0 0 0 0 0 0 0 0
79 | 27.9381 10.2062 18.8783 H 0 0 0 0 0 0 0 0 0 0 0 0
80 | 27.4106 8.8280 17.8541 H 0 0 0 0 0 0 0 0 0 0 0 0
81 | 25.5821 9.8315 19.3948 H 0 0 0 0 0 0 0 0 0 0 0 0
82 | 26.5357 9.1744 20.7680 H 0 0 0 0 0 0 0 0 0 0 0 0
83 | 25.8485 8.0608 19.5373 H 0 0 0 0 0 0 0 0 0 0 0 0
84 | 31.2285 9.8861 15.2222 H 0 0 0 0 0 0 0 0 0 0 0 0
85 | 31.1704 8.7291 13.8494 H 0 0 0 0 0 0 0 0 0 0 0 0
86 | 32.7493 9.3701 14.4174 H 0 0 0 0 0 0 0 0 0 0 0 0
87 | 31.2901 1.7394 13.7929 H 0 0 0 0 0 0 0 0 0 0 0 0
88 | 1 2 2 0 0 0 0
89 | 1 4 1 0 0 0 0
90 | 1 38 1 0 0 0 0
91 | 2 3 1 0 0 0 0
92 | 2 39 1 0 0 0 0
93 | 3 6 2 0 0 0 0
94 | 3 33 1 0 0 0 0
95 | 4 5 2 0 0 0 0
96 | 4 40 1 0 0 0 0
97 | 5 6 1 0 0 0 0
98 | 5 7 1 0 0 0 0
99 | 6 41 1 0 0 0 0
100 | 7 8 2 0 0 0 0
101 | 7 9 1 0 0 0 0
102 | 9 10 1 0 0 0 0
103 | 9 42 1 0 0 0 0
104 | 10 11 1 0 0 0 0
105 | 10 25 1 0 0 0 0
106 | 10 43 1 0 0 0 0
107 | 11 12 1 0 0 0 0
108 | 11 24 1 0 0 0 0
109 | 11 44 1 0 0 0 0
110 | 12 13 1 0 0 0 0
111 | 12 45 1 0 0 0 0
112 | 12 46 1 0 0 0 0
113 | 13 14 1 0 0 0 0
114 | 13 47 1 0 0 0 0
115 | 13 83 1 0 0 0 0
116 | 14 15 1 0 0 0 0
117 | 14 48 1 0 0 0 0
118 | 14 49 1 0 0 0 0
119 | 15 16 2 0 0 0 0
120 | 15 23 1 0 0 0 0
121 | 16 17 1 0 0 0 0
122 | 16 50 1 0 0 0 0
123 | 17 18 1 0 0 0 0
124 | 17 21 2 0 0 0 0
125 | 18 19 1 0 0 0 0
126 | 18 20 1 0 0 0 0
127 | 18 51 1 0 0 0 0
128 | 19 52 1 0 0 0 0
129 | 19 53 1 0 0 0 0
130 | 19 54 1 0 0 0 0
131 | 20 55 1 0 0 0 0
132 | 20 56 1 0 0 0 0
133 | 20 57 1 0 0 0 0
134 | 21 22 1 0 0 0 0
135 | 21 58 1 0 0 0 0
136 | 22 23 2 0 0 0 0
137 | 22 59 1 0 0 0 0
138 | 23 60 1 0 0 0 0
139 | 24 61 1 0 0 0 0
140 | 25 26 1 0 0 0 0
141 | 25 62 1 0 0 0 0
142 | 25 63 1 0 0 0 0
143 | 26 27 1 0 0 0 0
144 | 26 37 1 0 0 0 0
145 | 26 64 1 0 0 0 0
146 | 27 28 1 0 0 0 0
147 | 27 65 1 0 0 0 0
148 | 27 66 1 0 0 0 0
149 | 28 29 1 0 0 0 0
150 | 28 67 1 0 0 0 0
151 | 28 68 1 0 0 0 0
152 | 29 30 1 0 0 0 0
153 | 29 69 1 0 0 0 0
154 | 29 70 1 0 0 0 0
155 | 30 31 1 0 0 0 0
156 | 30 71 1 0 0 0 0
157 | 30 72 1 0 0 0 0
158 | 31 32 1 0 0 0 0
159 | 31 73 1 0 0 0 0
160 | 31 74 1 0 0 0 0
161 | 32 33 1 0 0 0 0
162 | 32 35 1 0 0 0 0
163 | 33 34 2 0 0 0 0
164 | 35 36 1 0 0 0 0
165 | 35 75 1 0 0 0 0
166 | 35 76 1 0 0 0 0
167 | 36 77 1 0 0 0 0
168 | 36 78 1 0 0 0 0
169 | 36 79 1 0 0 0 0
170 | 37 80 1 0 0 0 0
171 | 37 81 1 0 0 0 0
172 | 37 82 1 0 0 0 0
173 | M CHG 1 13 1
174 | M END
175 | $$$$
176 |
--------------------------------------------------------------------------------
/dockprep/example_files/3K5C-BACE_7.mol:
--------------------------------------------------------------------------------
1 | REMARK score -107.15
2 | LCcorina 10041815563D 1 1.00000 0.00000 0
3 | CORINA 4.00 0026 26.04.2017
4 | 99103 0 0 0 0 999 V2000
5 | 26.0120 5.2247 17.1304 C 0 0 0 0 0 0 0 0 0 0 0 0
6 | 26.4129 5.9437 18.2587 C 0 0 0 0 0 0 0 0 0 0 0 0
7 | 27.6961 6.5208 18.2724 C 0 0 0 0 0 0 0 0 0 0 0 0
8 | 26.8310 5.1715 15.9875 C 0 0 0 0 0 0 0 0 0 0 0 0
9 | 28.0498 5.8372 15.9965 C 0 0 0 0 0 0 0 0 0 0 0 0
10 | 28.4355 6.5464 17.1389 C 0 0 0 0 0 0 0 0 0 0 0 0
11 | 28.9508 5.7579 14.8061 C 0 0 0 0 0 0 0 0 0 0 0 0
12 | 28.5574 5.9069 13.6550 O 0 0 0 0 0 0 0 0 0 0 0 0
13 | 30.2184 5.5301 15.1407 N 0 0 0 0 0 0 0 0 0 0 0 0
14 | 31.4096 5.6841 14.2991 C 0 0 0 0 0 0 0 0 0 0 0 0
15 | 31.9172 4.2659 14.0350 C 0 0 0 0 0 0 0 0 0 0 0 0
16 | 30.9735 3.5150 13.1028 C 0 0 0 0 0 0 0 0 0 0 0 0
17 | 31.3860 2.1434 12.8687 N 0 3 0 0 0 0 0 0 0 0 0 0
18 | 30.6596 1.3719 11.8689 C 0 0 0 0 0 0 0 0 0 0 0 0
19 | 30.5068 2.1138 10.5559 C 0 0 0 0 0 0 0 0 0 0 0 0
20 | 31.6263 2.5934 9.9164 C 0 0 0 0 0 0 0 0 0 0 0 0
21 | 31.5403 3.2802 8.7221 C 0 0 0 0 0 0 0 0 0 0 0 0
22 | 32.8087 3.7992 8.1385 C 0 0 0 0 0 0 0 0 0 0 0 0
23 | 33.4712 4.7776 9.0678 C 0 0 0 0 0 0 0 0 0 0 0 0
24 | 33.7429 2.6686 7.7549 C 0 0 0 0 0 0 0 0 0 0 0 0
25 | 30.2843 3.5066 8.1551 C 0 0 0 0 0 0 0 0 0 0 0 0
26 | 29.1509 3.0195 8.8081 C 0 0 0 0 0 0 0 0 0 0 0 0
27 | 29.2282 2.3068 9.9966 C 0 0 0 0 0 0 0 0 0 0 0 0
28 | 33.2208 4.3157 13.4072 O 0 0 0 0 0 0 0 0 0 0 0 0
29 | 32.5026 6.5602 14.9902 C 0 0 0 0 0 0 0 0 0 0 0 0
30 | 31.9326 7.7469 15.6303 C 0 0 0 0 0 0 0 0 0 0 0 0
31 | 32.6375 8.2156 16.8151 C 0 0 0 0 0 0 0 0 0 0 0 0
32 | 31.7796 9.0978 17.7305 C 0 0 0 0 0 0 0 0 0 0 0 0
33 | 31.4661 8.8513 18.9890 C 0 0 0 0 0 0 0 0 0 0 0 0
34 | 30.5944 9.8881 19.6831 C 0 0 0 0 0 0 0 0 0 0 0 0
35 | 29.4393 9.1701 20.3750 C 0 0 0 0 0 0 0 0 0 0 0 0
36 | 28.6442 8.4414 19.3750 N 0 0 0 0 0 0 0 0 0 0 0 0
37 | 28.4523 7.0902 19.4246 C 0 0 0 0 0 0 0 0 0 0 0 0
38 | 28.9044 6.3541 20.2996 O 0 0 0 0 0 0 0 0 0 0 0 0
39 | 27.6632 9.3181 18.7237 C 0 0 0 0 0 0 0 0 0 0 0 0
40 | 28.2853 10.2769 17.6950 C 0 0 0 0 0 0 0 0 0 0 0 0
41 | 31.8501 8.8892 14.6012 C 0 0 0 0 0 0 0 0 0 0 0 0
42 | 24.7301 4.5174 17.1301 C 0 0 0 0 0 0 0 0 0 0 0 0
43 | 23.9180 4.4312 16.0725 N 0 0 0 0 0 0 0 0 0 0 0 0
44 | 22.7447 3.6459 16.4769 C 0 0 0 0 0 0 0 0 0 0 0 0
45 | 22.9121 3.3004 17.7579 C 0 0 0 0 0 0 0 0 0 0 0 0
46 | 24.1981 3.8101 18.3419 O 0 0 0 0 0 0 0 0 0 0 0 0
47 | 25.6256 11.5147 21.8412 C 0 0 0 0 0 0 0 0 0 0 0 0
48 | 25.7133 10.1144 21.9110 C 0 0 0 0 0 0 0 0 0 0 0 0
49 | 26.3816 9.4017 20.9027 C 0 0 0 0 0 0 0 0 0 0 0 0
50 | 26.9631 10.0885 19.8247 C 0 0 0 0 0 0 0 0 0 0 0 0
51 | 26.8755 11.4892 19.7559 C 0 0 0 0 0 0 0 0 0 0 0 0
52 | 26.2072 12.2019 20.7641 C 0 0 0 0 0 0 0 0 0 0 0 0
53 | 25.7423 6.0481 19.0989 H 0 0 0 0 0 0 0 0 0 0 0 0
54 | 26.5092 4.6177 15.1178 H 0 0 0 0 0 0 0 0 0 0 0 0
55 | 29.3474 7.1241 17.1019 H 0 0 0 0 0 0 0 0 0 0 0 0
56 | 30.3206 5.1890 16.1815 H 0 0 0 0 0 0 0 0 0 0 0 0
57 | 31.1102 6.1931 13.3711 H 0 0 0 0 0 0 0 0 0 0 0 0
58 | 32.0264 3.7360 14.9928 H 0 0 0 0 0 0 0 0 0 0 0 0
59 | 29.9654 3.4918 13.5423 H 0 0 0 0 0 0 0 0 0 0 0 0
60 | 30.9372 4.0252 12.1289 H 0 0 0 0 0 0 0 0 0 0 0 0
61 | 32.4343 2.1275 12.5358 H 0 0 0 0 0 0 0 0 0 0 0 0
62 | 31.1988 0.4349 11.6658 H 0 0 0 0 0 0 0 0 0 0 0 0
63 | 29.6520 1.1410 12.2448 H 0 0 0 0 0 0 0 0 0 0 0 0
64 | 32.6037 2.4360 10.3479 H 0 0 0 0 0 0 0 0 0 0 0 0
65 | 32.5420 4.3444 7.2212 H 0 0 0 0 0 0 0 0 0 0 0 0
66 | 34.2745 5.3053 8.5327 H 0 0 0 0 0 0 0 0 0 0 0 0
67 | 33.8960 4.2375 9.9268 H 0 0 0 0 0 0 0 0 0 0 0 0
68 | 32.7278 5.5061 9.4239 H 0 0 0 0 0 0 0 0 0 0 0 0
69 | 34.6386 3.0821 7.2683 H 0 0 0 0 0 0 0 0 0 0 0 0
70 | 33.2291 1.9883 7.0598 H 0 0 0 0 0 0 0 0 0 0 0 0
71 | 34.0389 2.1149 8.6581 H 0 0 0 0 0 0 0 0 0 0 0 0
72 | 30.2014 4.0509 7.2260 H 0 0 0 0 0 0 0 0 0 0 0 0
73 | 28.1704 3.1929 8.3899 H 0 0 0 0 0 0 0 0 0 0 0 0
74 | 28.3561 1.9062 10.4919 H 0 0 0 0 0 0 0 0 0 0 0 0
75 | 34.0860 4.7016 13.9661 H 0 0 0 0 0 0 0 0 0 0 0 0
76 | 33.0143 5.9667 15.7621 H 0 0 0 0 0 0 0 0 0 0 0 0
77 | 33.2342 6.8937 14.2393 H 0 0 0 0 0 0 0 0 0 0 0 0
78 | 30.9647 7.4024 16.0234 H 0 0 0 0 0 0 0 0 0 0 0 0
79 | 32.9707 7.3519 17.4092 H 0 0 0 0 0 0 0 0 0 0 0 0
80 | 33.5115 8.8102 16.5106 H 0 0 0 0 0 0 0 0 0 0 0 0
81 | 32.2501 10.0868 17.8326 H 0 0 0 0 0 0 0 0 0 0 0 0
82 | 30.7764 9.2124 17.2942 H 0 0 0 0 0 0 0 0 0 0 0 0
83 | 30.9134 7.9023 19.0501 H 0 0 0 0 0 0 0 0 0 0 0 0
84 | 32.3855 8.7794 19.5886 H 0 0 0 0 0 0 0 0 0 0 0 0
85 | 31.1947 10.4335 20.4261 H 0 0 0 0 0 0 0 0 0 0 0 0
86 | 30.2023 10.5957 18.9377 H 0 0 0 0 0 0 0 0 0 0 0 0
87 | 29.8390 8.4596 21.1136 H 0 0 0 0 0 0 0 0 0 0 0 0
88 | 28.8011 9.9076 20.8836 H 0 0 0 0 0 0 0 0 0 0 0 0
89 | 26.9721 8.6658 18.1698 H 0 0 0 0 0 0 0 0 0 0 0 0
90 | 27.4928 10.8850 17.2344 H 0 0 0 0 0 0 0 0 0 0 0 0
91 | 28.8003 9.6952 16.9163 H 0 0 0 0 0 0 0 0 0 0 0 0
92 | 29.0077 10.9362 18.1983 H 0 0 0 0 0 0 0 0 0 0 0 0
93 | 31.3941 9.7734 15.0706 H 0 0 0 0 0 0 0 0 0 0 0 0
94 | 31.2352 8.5694 13.7469 H 0 0 0 0 0 0 0 0 0 0 0 0
95 | 32.8619 9.1408 14.2507 H 0 0 0 0 0 0 0 0 0 0 0 0
96 | 21.9539 3.4522 15.7674 H 0 0 0 0 0 0 0 0 0 0 0 0
97 | 22.1588 2.7129 18.2616 H 0 0 0 0 0 0 0 0 0 0 0 0
98 | 25.1107 12.0628 22.6164 H 0 0 0 0 0 0 0 0 0 0 0 0
99 | 25.2666 9.5860 22.7403 H 0 0 0 0 0 0 0 0 0 0 0 0
100 | 26.4484 8.3251 20.9561 H 0 0 0 0 0 0 0 0 0 0 0 0
101 | 27.3224 12.0181 18.9271 H 0 0 0 0 0 0 0 0 0 0 0 0
102 | 26.1405 13.2785 20.7111 H 0 0 0 0 0 0 0 0 0 0 0 0
103 | 31.2885 1.5671 13.8005 H 0 0 0 0 0 0 0 0 0 0 0 0
104 | 1 2 2 0 0 0 0
105 | 1 4 1 0 0 0 0
106 | 1 38 1 0 0 0 0
107 | 2 3 1 0 0 0 0
108 | 2 49 1 0 0 0 0
109 | 3 6 2 0 0 0 0
110 | 3 33 1 0 0 0 0
111 | 4 5 2 0 0 0 0
112 | 4 50 1 0 0 0 0
113 | 5 6 1 0 0 0 0
114 | 5 7 1 0 0 0 0
115 | 6 51 1 0 0 0 0
116 | 7 8 2 0 0 0 0
117 | 7 9 1 0 0 0 0
118 | 9 10 1 0 0 0 0
119 | 9 52 1 0 0 0 0
120 | 10 11 1 0 0 0 0
121 | 10 25 1 0 0 0 0
122 | 10 53 1 0 0 0 0
123 | 11 12 1 0 0 0 0
124 | 11 24 1 0 0 0 0
125 | 11 54 1 0 0 0 0
126 | 12 13 1 0 0 0 0
127 | 12 55 1 0 0 0 0
128 | 12 56 1 0 0 0 0
129 | 13 14 1 0 0 0 0
130 | 13 57 1 0 0 0 0
131 | 13 99 1 0 0 0 0
132 | 14 15 1 0 0 0 0
133 | 14 58 1 0 0 0 0
134 | 14 59 1 0 0 0 0
135 | 15 16 2 0 0 0 0
136 | 15 23 1 0 0 0 0
137 | 16 17 1 0 0 0 0
138 | 16 60 1 0 0 0 0
139 | 17 18 1 0 0 0 0
140 | 17 21 2 0 0 0 0
141 | 18 19 1 0 0 0 0
142 | 18 20 1 0 0 0 0
143 | 18 61 1 0 0 0 0
144 | 19 62 1 0 0 0 0
145 | 19 63 1 0 0 0 0
146 | 19 64 1 0 0 0 0
147 | 20 65 1 0 0 0 0
148 | 20 66 1 0 0 0 0
149 | 20 67 1 0 0 0 0
150 | 21 22 1 0 0 0 0
151 | 21 68 1 0 0 0 0
152 | 22 23 2 0 0 0 0
153 | 22 69 1 0 0 0 0
154 | 23 70 1 0 0 0 0
155 | 24 71 1 0 0 0 0
156 | 25 26 1 0 0 0 0
157 | 25 72 1 0 0 0 0
158 | 25 73 1 0 0 0 0
159 | 26 27 1 0 0 0 0
160 | 26 37 1 0 0 0 0
161 | 26 74 1 0 0 0 0
162 | 27 28 1 0 0 0 0
163 | 27 75 1 0 0 0 0
164 | 27 76 1 0 0 0 0
165 | 28 29 1 0 0 0 0
166 | 28 77 1 0 0 0 0
167 | 28 78 1 0 0 0 0
168 | 29 30 1 0 0 0 0
169 | 29 79 1 0 0 0 0
170 | 29 80 1 0 0 0 0
171 | 30 31 1 0 0 0 0
172 | 30 81 1 0 0 0 0
173 | 30 82 1 0 0 0 0
174 | 31 32 1 0 0 0 0
175 | 31 83 1 0 0 0 0
176 | 31 84 1 0 0 0 0
177 | 32 33 1 0 0 0 0
178 | 32 35 1 0 0 0 0
179 | 33 34 2 0 0 0 0
180 | 35 36 1 0 0 0 0
181 | 35 46 1 0 0 0 0
182 | 35 85 1 0 0 0 0
183 | 36 86 1 0 0 0 0
184 | 36 87 1 0 0 0 0
185 | 36 88 1 0 0 0 0
186 | 37 89 1 0 0 0 0
187 | 37 90 1 0 0 0 0
188 | 37 91 1 0 0 0 0
189 | 38 39 2 0 0 0 0
190 | 38 42 1 0 0 0 0
191 | 39 40 1 0 0 0 0
192 | 40 41 2 0 0 0 0
193 | 40 92 1 0 0 0 0
194 | 41 42 1 0 0 0 0
195 | 41 93 1 0 0 0 0
196 | 43 44 2 0 0 0 0
197 | 43 48 1 0 0 0 0
198 | 43 94 1 0 0 0 0
199 | 44 45 1 0 0 0 0
200 | 44 95 1 0 0 0 0
201 | 45 46 2 0 0 0 0
202 | 45 96 1 0 0 0 0
203 | 46 47 1 0 0 0 0
204 | 47 48 2 0 0 0 0
205 | 47 97 1 0 0 0 0
206 | 48 98 1 0 0 0 0
207 | M CHG 1 13 1
208 | M END
209 | $$$$
210 |
--------------------------------------------------------------------------------
/dockprep/example_files/3K5C-BACE_8.mol:
--------------------------------------------------------------------------------
1 | REMARK score -94.61
2 | LCcorina 10041815573D 1 1.00000 0.00000 0
3 | CORINA 4.00 0026 26.04.2017
4 | 78 80 0 0 0 0 999 V2000
5 | 30.0817 5.8744 15.5689 N 0 0 0 0 0 0 0 0 0 0 0 0
6 | 31.0768 5.9668 14.4764 C 0 0 0 0 0 0 0 0 0 0 0 0
7 | 32.1282 7.0611 14.7985 C 0 0 0 0 0 0 0 0 0 0 0 0
8 | 31.5804 8.4575 15.1801 C 0 0 0 0 0 0 0 0 0 0 0 0
9 | 31.4819 8.6552 16.7044 C 0 0 0 0 0 0 0 0 0 0 0 0
10 | 30.8977 10.0119 17.1396 C 0 0 0 0 0 0 0 0 0 0 0 0
11 | 30.3663 9.9857 18.4589 O 0 0 0 0 0 0 0 0 0 0 0 0
12 | 31.2336 9.5764 19.5124 C 0 0 0 0 0 0 0 0 0 0 0 0
13 | 30.4137 9.5114 20.8109 C 0 0 0 0 0 0 0 0 0 0 0 0
14 | 29.1215 8.6674 20.7001 C 0 0 0 0 0 0 0 0 0 0 0 0
15 | 29.3758 7.1579 20.5765 C 0 0 0 0 0 0 0 0 0 0 0 0
16 | 28.1065 6.4709 20.3694 N 0 0 0 0 0 0 0 0 0 0 0 0
17 | 27.6384 6.1507 19.1255 C 0 0 0 0 0 0 0 0 0 0 0 0
18 | 28.4391 6.1913 17.9486 C 0 0 0 0 0 0 0 0 0 0 0 0
19 | 27.8916 5.8384 16.6906 C 0 0 0 0 0 0 0 0 0 0 0 0
20 | 26.5329 5.4414 16.5987 C 0 0 0 0 0 0 0 0 0 0 0 0
21 | 25.7215 5.3957 17.7612 C 0 0 0 0 0 0 0 0 0 0 0 0
22 | 26.2842 5.7500 19.0174 N 0 0 0 0 0 0 0 0 0 0 0 0
23 | 24.0377 4.9021 17.6423 Cl 0 0 0 0 0 0 0 0 0 0 0 0
24 | 28.7397 5.8986 15.4288 C 0 0 0 0 0 0 0 0 0 0 0 0
25 | 28.2121 5.9479 14.3204 O 0 0 0 0 0 0 0 0 0 0 0 0
26 | 31.7640 4.5859 14.2251 C 0 0 0 0 0 0 0 0 0 0 0 0
27 | 33.0118 4.7540 13.5394 O 0 0 0 0 0 0 0 0 0 0 0 0
28 | 30.8867 3.7097 13.2974 C 0 0 0 0 0 0 0 0 0 0 0 0
29 | 31.4764 2.3700 13.0934 N 0 3 0 0 0 0 0 0 0 0 0 0
30 | 30.8680 1.6731 11.9487 C 0 0 0 0 0 0 0 0 0 0 0 0
31 | 30.7216 2.4319 10.6224 C 0 0 0 0 0 0 0 0 0 0 0 0
32 | 31.8759 2.9487 9.9924 C 0 0 0 0 0 0 0 0 0 0 0 0
33 | 31.7791 3.6572 8.7646 C 0 0 0 0 0 0 0 0 0 0 0 0
34 | 30.4985 3.8370 8.1839 C 0 0 0 0 0 0 0 0 0 0 0 0
35 | 29.3368 3.3259 8.8080 C 0 0 0 0 0 0 0 0 0 0 0 0
36 | 29.4486 2.6228 10.0282 C 0 0 0 0 0 0 0 0 0 0 0 0
37 | 33.0598 4.2031 8.1051 C 0 0 0 0 0 0 0 0 0 0 0 0
38 | 33.6116 5.4207 8.8972 C 0 0 0 0 0 0 0 0 0 0 0 0
39 | 34.1645 3.1498 7.8111 C 0 0 0 0 0 0 0 0 0 0 0 0
40 | 32.5051 9.5454 14.6039 C 0 0 0 0 0 0 0 0 0 0 0 0
41 | 30.4174 5.7753 16.6117 H 0 0 0 0 0 0 0 0 0 0 0 0
42 | 30.5054 6.2581 13.5827 H 0 0 0 0 0 0 0 0 0 0 0 0
43 | 32.7444 6.7380 15.6505 H 0 0 0 0 0 0 0 0 0 0 0 0
44 | 32.7714 7.2213 13.9207 H 0 0 0 0 0 0 0 0 0 0 0 0
45 | 30.5662 8.5392 14.7619 H 0 0 0 0 0 0 0 0 0 0 0 0
46 | 30.8338 7.8770 17.1339 H 0 0 0 0 0 0 0 0 0 0 0 0
47 | 32.4854 8.5831 17.1491 H 0 0 0 0 0 0 0 0 0 0 0 0
48 | 31.6873 10.7772 17.1106 H 0 0 0 0 0 0 0 0 0 0 0 0
49 | 30.0849 10.2975 16.4557 H 0 0 0 0 0 0 0 0 0 0 0 0
50 | 31.6518 8.5861 19.2792 H 0 0 0 0 0 0 0 0 0 0 0 0
51 | 32.0516 10.3043 19.6176 H 0 0 0 0 0 0 0 0 0 0 0 0
52 | 31.0250 9.0643 21.6086 H 0 0 0 0 0 0 0 0 0 0 0 0
53 | 30.1125 10.5274 21.1057 H 0 0 0 0 0 0 0 0 0 0 0 0
54 | 28.5044 8.8221 21.5975 H 0 0 0 0 0 0 0 0 0 0 0 0
55 | 28.5564 8.9774 19.8087 H 0 0 0 0 0 0 0 0 0 0 0 0
56 | 30.0416 6.9671 19.7219 H 0 0 0 0 0 0 0 0 0 0 0 0
57 | 29.8481 6.7889 21.4988 H 0 0 0 0 0 0 0 0 0 0 0 0
58 | 27.4857 6.1903 21.2331 H 0 0 0 0 0 0 0 0 0 0 0 0
59 | 29.4742 6.4932 18.0099 H 0 0 0 0 0 0 0 0 0 0 0 0
60 | 26.1125 5.1725 15.6409 H 0 0 0 0 0 0 0 0 0 0 0 0
61 | 31.9885 4.1091 15.1905 H 0 0 0 0 0 0 0 0 0 0 0 0
62 | 33.9118 5.0479 14.0994 H 0 0 0 0 0 0 0 0 0 0 0 0
63 | 29.8900 3.5847 13.7459 H 0 0 0 0 0 0 0 0 0 0 0 0
64 | 30.7888 4.1979 12.3166 H 0 0 0 0 0 0 0 0 0 0 0 0
65 | 32.5553 2.4694 12.9033 H 0 0 0 0 0 0 0 0 0 0 0 0
66 | 31.4674 0.7851 11.6994 H 0 0 0 0 0 0 0 0 0 0 0 0
67 | 29.8451 1.3636 12.2094 H 0 0 0 0 0 0 0 0 0 0 0 0
68 | 32.8467 2.8072 10.4440 H 0 0 0 0 0 0 0 0 0 0 0 0
69 | 30.3990 4.3712 7.2506 H 0 0 0 0 0 0 0 0 0 0 0 0
70 | 28.3692 3.4738 8.3518 H 0 0 0 0 0 0 0 0 0 0 0 0
71 | 28.5642 2.2306 10.5085 H 0 0 0 0 0 0 0 0 0 0 0 0
72 | 32.7357 4.5323 7.1068 H 0 0 0 0 0 0 0 0 0 0 0 0
73 | 34.4989 5.8200 8.3843 H 0 0 0 0 0 0 0 0 0 0 0 0
74 | 33.8875 5.1025 9.9134 H 0 0 0 0 0 0 0 0 0 0 0 0
75 | 32.8388 6.2014 8.9546 H 0 0 0 0 0 0 0 0 0 0 0 0
76 | 34.6070 2.8083 8.7584 H 0 0 0 0 0 0 0 0 0 0 0 0
77 | 34.9460 3.6035 7.1838 H 0 0 0 0 0 0 0 0 0 0 0 0
78 | 33.7222 2.2923 7.2828 H 0 0 0 0 0 0 0 0 0 0 0 0
79 | 32.0900 10.5381 14.8324 H 0 0 0 0 0 0 0 0 0 0 0 0
80 | 32.5815 9.4219 13.5135 H 0 0 0 0 0 0 0 0 0 0 0 0
81 | 33.5045 9.4533 15.0543 H 0 0 0 0 0 0 0 0 0 0 0 0
82 | 31.3176 1.7585 13.9939 H 0 0 0 0 0 0 0 0 0 0 0 0
83 | 1 2 1 0 0 0 0
84 | 1 20 1 0 0 0 0
85 | 1 37 1 0 0 0 0
86 | 2 3 1 0 0 0 0
87 | 2 22 1 0 0 0 0
88 | 2 38 1 0 0 0 0
89 | 3 4 1 0 0 0 0
90 | 3 39 1 0 0 0 0
91 | 3 40 1 0 0 0 0
92 | 4 5 1 0 0 0 0
93 | 4 36 1 0 0 0 0
94 | 4 41 1 0 0 0 0
95 | 5 6 1 0 0 0 0
96 | 5 42 1 0 0 0 0
97 | 5 43 1 0 0 0 0
98 | 6 7 1 0 0 0 0
99 | 6 44 1 0 0 0 0
100 | 6 45 1 0 0 0 0
101 | 7 8 1 0 0 0 0
102 | 8 9 1 0 0 0 0
103 | 8 46 1 0 0 0 0
104 | 8 47 1 0 0 0 0
105 | 9 10 1 0 0 0 0
106 | 9 48 1 0 0 0 0
107 | 9 49 1 0 0 0 0
108 | 10 11 1 0 0 0 0
109 | 10 50 1 0 0 0 0
110 | 10 51 1 0 0 0 0
111 | 11 12 1 0 0 0 0
112 | 11 52 1 0 0 0 0
113 | 11 53 1 0 0 0 0
114 | 12 13 1 0 0 0 0
115 | 12 54 1 0 0 0 0
116 | 13 14 2 0 0 0 0
117 | 13 18 1 0 0 0 0
118 | 14 15 1 0 0 0 0
119 | 14 55 1 0 0 0 0
120 | 15 16 2 0 0 0 0
121 | 15 20 1 0 0 0 0
122 | 16 17 1 0 0 0 0
123 | 16 56 1 0 0 0 0
124 | 17 18 2 0 0 0 0
125 | 17 19 1 0 0 0 0
126 | 20 21 2 0 0 0 0
127 | 22 23 1 0 0 0 0
128 | 22 24 1 0 0 0 0
129 | 22 57 1 0 0 0 0
130 | 23 58 1 0 0 0 0
131 | 24 25 1 0 0 0 0
132 | 24 59 1 0 0 0 0
133 | 24 60 1 0 0 0 0
134 | 25 26 1 0 0 0 0
135 | 25 61 1 0 0 0 0
136 | 25 78 1 0 0 0 0
137 | 26 27 1 0 0 0 0
138 | 26 62 1 0 0 0 0
139 | 26 63 1 0 0 0 0
140 | 27 28 2 0 0 0 0
141 | 27 32 1 0 0 0 0
142 | 28 29 1 0 0 0 0
143 | 28 64 1 0 0 0 0
144 | 29 30 2 0 0 0 0
145 | 29 33 1 0 0 0 0
146 | 30 31 1 0 0 0 0
147 | 30 65 1 0 0 0 0
148 | 31 32 2 0 0 0 0
149 | 31 66 1 0 0 0 0
150 | 32 67 1 0 0 0 0
151 | 33 34 1 0 0 0 0
152 | 33 35 1 0 0 0 0
153 | 33 68 1 0 0 0 0
154 | 34 69 1 0 0 0 0
155 | 34 70 1 0 0 0 0
156 | 34 71 1 0 0 0 0
157 | 35 72 1 0 0 0 0
158 | 35 73 1 0 0 0 0
159 | 35 74 1 0 0 0 0
160 | 36 75 1 0 0 0 0
161 | 36 76 1 0 0 0 0
162 | 36 77 1 0 0 0 0
163 | M CHG 1 25 1
164 | M END
165 | $$$$
166 |
--------------------------------------------------------------------------------
/mod_frcmod/mod_frcmod.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os, shutil
3 | import traceback, sys, re
4 | from subprocess import call
5 |
6 | import pandas as pd
7 | from argparse import ArgumentParser, RawDescriptionHelpFormatter
8 | from collections import defaultdict
9 | from collections import OrderedDict
10 | import parmed as pmd
11 | from collections import defaultdict
12 | import numpy as np
13 |
14 | class tree(OrderedDict):
15 | def __missing__(self, key):
16 | self[key] = type(self)()
17 | return self[key]
18 |
19 | ## Parse command line arguments
20 | def cmdlineparse():
21 | parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, description="""
22 | DESCRIPTION:
23 | This script reads an optimized ligand structure, measures the bond bonds, dihedrals and bond lengths and writes them to
24 | a provided frcmod file by replacing the existing ones.
25 |
26 | Some examples input files:
27 | * correct ligand frcmod file:
28 | /home2/thomas/Documents/Consensus_Scoring_Project/D3R_2018/BACE/MD_FEset/BACE_from_3dv5_apo/BACE68_frcmod.ligand
29 | * wrong ligand frcmod file:
30 | /home2/thomas/Documents/Consensus_Scoring_Project/D3R_2018/BACE/MD_FEset/BACE_from_3dv5_apo/BACE68_wrong/frcmod.ligand
31 | * optimized ligand geometry file:
32 | /home2/thomas/Documents/Consensus_Scoring_Project/D3R_2018/BACE/MD_FEset/BACE_from_3dv5_apo/ligands/bcc/bace68.bcc.mol2
33 |
34 |
35 |
36 | """,
37 | epilog="""
38 | EXAMPLE:
39 |
40 | mod_frcmod.py -ligfile bace68.bcc.mol2 -frcmod BACE68_frcmod.ligand -ofrcmod BACE68_frcmod.ligand_corrected
41 |
42 | """)
43 | parser.add_argument("-ligfile", dest="LIGFILE", required=False, default=None, type=str,
44 | help="sdf or mol2 file with optimized ligand structure from which to measure the equilibrium "
45 | "bond angles, dihedrals and bond lengths.")
46 | parser.add_argument("-frcmod", dest="FRCMOD", required=True, default=None,
47 | help="the frcmod parameter file of the ligand.")
48 | parser.add_argument("-ofrcmod", dest="OUT_FRCMOD", required=False, default=None,
49 | help="the name of the modified frcmod parameter file of the ligand, namely the output.")
50 | parser.add_argument("-ff", dest="FF", required=False, default="gaff2",
51 | help="the ligand force field.")
52 | parser.add_argument("-verbose", dest="VERBOSE", required=False, default=False, action='store_true',
53 | help="Print more details.")
54 |
55 | args = parser.parse_args()
56 | return args
57 |
58 |
59 | #################################################### FUNCTION DEFINITIONS ################################################
60 |
61 |
62 | # THE FOLLOWING CODE IS USELESS SINCE PARMED CAN READ AND WRITE FRCMOD FILES
63 | # ##~~~~~~~~~~~~~~~~~~~~`` DataFrames to store the force field parameters ``~~~~~~~~~~~~~~~~~~##
64 | # mass_cols = ["KNDSYM", "AMASS", "ATPOL", "comment"]
65 | # # NOTE: by defining the dtype you will be able to retrieve the value of a column by simply doing row[colname]
66 | # mass_df = pd.DataFrame([], columns=mass_cols)
67 | # mass_format = "%2s %-6.3f%13.3f\t%s\n" # (A2,2X,F10.2x,f10.2)
68 | # mass_pattern = "^([ 0-9a-z]{2})\s+([0-9.-]+)\s+([0-9.-]+)[\s$]+(.*)" # 4 groups
69 | #
70 | # bond_cols = ["IBT", "JBT", "RK", "REQ", "comment"]
71 | # bond_df = pd.DataFrame([], columns=bond_cols)
72 | # bond_format = "%2s-%2s%8.2f%8.3f\t%s\n" # A2,1X,A2,2F10.2
73 | # bond_pattern = "^([ 0-9a-z]{2})-([ 0-9a-z]{2})\s+([0-9.-]+)\s+([0-9.-]+)[\s$]+(.*)" # 5 groups
74 | #
75 | # angl_cols = ["ITT" , "JTT" , "KTT" , "TK" , "TEQ", "comment"]
76 | # angl_df = pd.DataFrame([], columns=angl_cols)
77 | # angl_format = "%2s-%2s-%2s%9.3f%12.3f\t%s\n" # A2,1X,A2,1X,A2,2F10.2
78 | # angl_pattern = "^([ 0-9a-z]{2})-([ 0-9a-z]{2})-([ 0-9a-z]{2})\s+([0-9.-]+)\s+([0-9.-]+)[\s$]+(.*)" # 6 groups
79 | #
80 | # dihe_cols = ["IPT" , "JPT" , "KPT" , "LPT" , "IDIVF" , "PK" , "PHASE" , "PN", "comment"]
81 | # dihe_df = pd.DataFrame([], columns=dihe_cols)
82 | # dihe_format = "%2s-%2s-%2s-%2s%4i%9.3f%14.3f%16.3f\t%s\n" # A2,1X,A2,1X,A2,1X,A2,I4,3F15.2
83 | # dihe_pattern = "^([ 0-9a-z]{2})-([ 0-9a-z]{2})-([ 0-9a-z]{2})-([ 0-9a-z]{2})\s+([0-9]+)\s+([0-9.-]+)\s+([0-9.-]+)\s+([0-9.-]+)[\s$]+(.*)" # 9 groups
84 | #
85 | # impr_cols = ["IPT" , "JPT" , "KPT" , "LPT" , "PK" , "PHASE" , "PN", "comment"]
86 | # impr_df = pd.DataFrame([], columns=impr_cols)
87 | # impr_format = "%2s-%2s-%2s-%2s%12.1f%15.1f%12.1f\t%s\n" # A2,1X,A2,1X,A2,1X,A2,I4,3F15.2
88 | # impr_pattern = "^([ 0-9a-z]{2})-([ 0-9a-z]{2})-([ 0-9a-z]{2})-([ 0-9a-z]{2})\s+([0-9.-]+)\s+([0-9.-]+)\s+([0-9.-]+)[\s$]+(.*)" # 8 groups
89 | #
90 | # # H-BOND 10-12 POTENTIAL PARAMETERS
91 | # hbon_cols = ["KT1" , "KT2" , "A" , "B", "comment"]
92 | # hbon_df = pd.DataFrame([], columns=hbon_cols)
93 | # hbon_df = hbon_df.astype({"KT1": 'str', "KT2": 'str', "A": 'float', "B": 'float', "comment": 'str'})
94 | # hbon_format = "" # 2X,A2,2X,A2,2x,5F10.2,I2
95 | #
96 | # # ONLY IF KINDNB .EQ. 'RE' ???
97 | # nonb_cols = ["LTYNB" , "R" , "EDEP", "comment"]
98 | # nonb_df = pd.DataFrame([], columns=nonb_cols)
99 | # nonb_format = "%4s%16.4f%8.4f\t%s\n" # A2,1X,A2,1X,A2,1X,A2,I4,3F15.2
100 | # nonb_pattern = "^\s*([ 0-9a-z]{2})\s+([0-9.-]+)\s+([0-9.-]+)[\s$]+(.*)" # 4 groups
101 | #
102 | # # Put all dataframes together into a dict
103 | # forcefield = {"MASS": mass_df,
104 | # "BOND": bond_df,
105 | # "ANGLE": angl_df,
106 | # "DIHE": dihe_df,
107 | # "IMPROPER": impr_df,
108 | # "NONBON": nonb_df}
109 | # fields = ["MASS", "BOND", "ANGLE", "DIHE", "IMPROPER", "NONBON"]
110 | # columns = [mass_cols, bond_cols, angl_cols, dihe_cols, impr_cols, nonb_cols]
111 | # patterns = [mass_pattern, bond_pattern, angl_pattern, dihe_pattern, impr_pattern, nonb_pattern]
112 | # formats = [mass_format, bond_format, angl_format, dihe_format, impr_format, nonb_format]
113 | #
114 | # def update_forcefield_dtypes():
115 | #
116 | # global forcefield
117 | #
118 | # forcefield["MASS"] = forcefield["MASS"].astype({'KNDSYM': 'str', 'AMASS': 'float', 'ATPOL': 'float', "comment": 'str'})
119 | # forcefield["BOND"] = forcefield["BOND"].astype({"IBT": 'str', "JBT": 'str', "RK": 'float', "REQ": 'float', "comment": 'str'})
120 | # forcefield["ANGLE"] = forcefield["ANGLE"].astype({"ITT": 'str', "JTT": 'str', "KTT": 'str', "TK": 'float', "TEQ": 'float', "comment": 'str'})
121 | # forcefield["DIHE"] = forcefield["DIHE"].astype({"IPT": str, "JPT": 'str', "KPT": 'str', "LPT": 'str', "IDIVF": 'int', "PK": 'float',
122 | # "PHASE": 'float', "PN": 'float', "comment": 'str'})
123 | # forcefield["IMPROPER"] = forcefield["IMPROPER"].astype({"IPT": 'str', "JPT": 'str', "KPT": 'str', "LPT": 'str',
124 | # "PK": 'float', "PHASE": 'float', "PN": 'float', "comment": 'str'})
125 | # forcefield["NONBON"] = forcefield["NONBON"].astype({"LTYNB": 'str', "R": 'float', "EDEP": 'float', "comment": 'str'})
126 | #
127 | # def load_frcmod(fname):
128 | # """
129 | # For the format of frcmod file look at:
130 | # http://ambermd.org/formats.html#frcmod
131 | #
132 | # :param fname:
133 | # :return:
134 | # """
135 | # global forcefield
136 | # with open(fname, 'r') as f:
137 | # contents = f.readlines()
138 | #
139 | # starts = [contents.index(f+"\n") for f in fields]
140 | # ends = [s-1 for s in starts[1:]]
141 | # ends.append(len(contents)-1)
142 | # for i in range(len(fields)):
143 | # field, cols, start, end, pattern = fields[i], columns[i], starts[i], ends[i], patterns[i]
144 | # for line in contents[start+1:end+1]:
145 | # m = re.search(pattern, line)
146 | # if not m:
147 | # continue
148 | # row_dict = {c:w for w,c in zip(m.groups(), cols)}
149 | # forcefield[field] = forcefield[field].append(row_dict, ignore_index=True) # save this line to the dataframe
150 | # update_forcefield_dtypes()
151 | #
152 | # def write_frcmod(outfname):
153 | #
154 | # global forcefield, args
155 | #
156 | # out = open(outfname, 'w')
157 | # for i in range(len(fields)):
158 | # field, format = fields[i], formats[i]
159 | # out.write(field + "\n")
160 | # for i, row in forcefield[field].iterrows():
161 | # out.write(format % tuple(row.values))
162 | # out.write("\n")
163 |
164 |
165 | def run_commandline(commandline, logname="log", append=False, return_out=False, error_keywords=[], skip_fail=False,
166 | verbose=True):
167 | """
168 | FUNCTION to run a single command on the UNIX shell. The worker will only receive an index from network.
169 | """
170 | if append:
171 | fout = open(logname, 'a')
172 | else:
173 | fout = open(logname, 'w')
174 | if verbose:
175 | print("Running commandline:", commandline)
176 | return_code = call(commandline, stdout=fout, stderr=fout, shell=True, executable='/bin/bash')
177 |
178 | if (return_code != 0):
179 | print("ERROR, THE FOLLOWING COMMAND FAILED TO RUN:", "FAIL")
180 | print(commandline)
181 | print("return_code=", return_code)
182 | fout.close()
183 | print("Output:")
184 | with open(logname, 'r') as f:
185 | contents = f.readlines()
186 | for line in contents:
187 | print(line)
188 | if not skip_fail:
189 | raise Exception()
190 | fout.close()
191 |
192 | if len(error_keywords) > 0:
193 | with open(logname, 'r') as f:
194 | contents = f.readlines()
195 | for line in contents:
196 | for word in error_keywords:
197 | if word in line:
198 | print("ERROR, THE FOLLOWING COMMAND FAILED TO RUN:")
199 | print(commandline)
200 | print("COMMAND OUTPUT:")
201 | for line in contents:
202 | print(line)
203 | raise Exception()
204 |
205 | if return_out:
206 | with open(logname, 'r') as f:
207 | contents = f.readlines()
208 | return contents
209 |
210 | def create_prmtop(frcmod, ligfile):
211 |
212 | if os.path.exists("tmp/"):
213 | shutil.rmtree("tmp/")
214 | os.mkdir("tmp/")
215 | run_commandline("ln -s %s %s/frcmod.ligand" % (os.path.abspath(frcmod), os.path.abspath("tmp/")))
216 |
217 | # convert with antechamber to mol2 with GAFF2 atom types
218 | # NOTE: -at gaff2 writes some unknown atom types that are not in the frcmod file (e.g. nh->nu, n->ns, n3->n7).
219 | run_commandline("antechamber -i %s -fi %s -o tmp/ligand.gaff2.mol2 -fo mol2 -rn LIG -at gaff2 -dr n"
220 | % (ligfile, ligfile.split('.')[-1]))
221 |
222 | ligand_leap = """
223 | source leaprc.gaff2
224 | loadAmberParams tmp/frcmod.ligand
225 | LIG = loadMol2 tmp/ligand.gaff2.mol2
226 | saveAmberParm LIG tmp/ligand.prmtop tmp/ligand.inpcrd
227 | quit
228 | """
229 |
230 | with open("tmp/ligand_leap.in", 'w') as f:
231 | f.write(ligand_leap)
232 | leap_out = run_commandline("tleap -s -f tmp/ligand_leap.in", return_out=True, error_keywords=['FATAL:'])
233 |
234 |
235 | def write_corrected_frcmod(ligfile, frcmod, out_frcmod, verbose=False):
236 | """
237 | This method takes the equilibrium bond lengths and angles from the ligfile and writes a new
238 | frcmod file with corrected GAFF2 ligand parameters for MD.
239 |
240 | :param ligfile: mol2 or sdf file with optimized ligand geometry from where to copy bond lengths and angles.
241 | :param frcmod: the frcmod file that needs corrections.
242 | :param out_frcmod: the name of the output frcmod file that carries the corrections.
243 | :return:
244 | """
245 | global args
246 |
247 | # create the prmtop and inpcrd file within a 'tmp/' folder
248 | create_prmtop(frcmod, ligfile)
249 |
250 | # load them to PARMED
251 | mol = pmd.load_file("tmp/ligand.prmtop", xyz="tmp/ligand.inpcrd", structure=True)
252 | bond_dict = defaultdict(list)
253 | for bond in mol.bonds:
254 | # print("%s-%s XXX %f" % (bond.atom1.type, bond.atom2.type, bond.measure()))
255 | bond_dict["%s-%s" % (bond.atom1.type, bond.atom2.type)].append(bond.measure())
256 | bond_dict["%s-%s" % (bond.atom2.type, bond.atom1.type)].append(bond.measure()) # add the reverse bond, too
257 |
258 | if verbose:
259 | print("\nBond = mean value += stdev, min-max")
260 | for bondname, distlist in bond_dict.items():
261 | print("%s = %f +- %f, %f" % (bondname, np.mean(distlist), np.std(distlist), np.ptp(distlist)))
262 |
263 | angle_dict = defaultdict(list)
264 | for angle in mol.angles:
265 | angle_dict["%s-%s-%s" % (angle.atom1.type, angle.atom2.type, angle.atom3.type)].append(angle.measure())
266 | angle_dict["%s-%s-%s" % (angle.atom3.type, angle.atom2.type, angle.atom1.type)].append(angle.measure()) # add the reverse angle, too
267 |
268 | if verbose:
269 | print("\nAngle = mean value += stdev, min-max")
270 | for anglename, anglelist in angle_dict.items():
271 | print("%s = %f +- %f, %f" % (anglename, np.mean(anglelist), np.std(anglelist), np.ptp(anglelist)))
272 |
273 | # par = pmd.load_file(frcmod)
274 |
275 | for bond in mol.bonds:
276 | bondname = "%s-%s" % (bond.atom1.type, bond.atom2.type)
277 | assert bondname in bond_dict.keys(), "ERROR: bond %s does not exist in the mol2 file with " \
278 | "the optimized geometry!" % bondname
279 | idx = bond.type.idx
280 | bond.type.req = round(np.mean(bond_dict[bondname]), 3) # replace with the mean bond value
281 | mol.bond_types[idx].req = round(np.mean(bond_dict[bondname]), 3) # replace with the mean bond value
282 |
283 | for angle in mol.angles:
284 | anglename = "%s-%s-%s" % (angle.atom1.type, angle.atom2.type, angle.atom3.type)
285 | assert anglename in angle_dict.keys(), "ERROR: angle %s does not exist in the mol2 file with " \
286 | "the optimized geometry!" % anglename
287 | idx = angle.type.idx
288 | angle.type.theteq = round(np.mean(angle_dict[anglename]), 3) # replace with the mean angle value
289 | mol.angle_types[idx].theteq = round(np.mean(angle_dict[anglename]), 3) # replace with the mean angle value
290 |
291 | # par.write('edited_'+frcmod, title="Created by mod_frcmod.py script.", style='frcmod')
292 | pmd.tools.writeFrcmod(mol, out_frcmod).execute()
293 |
294 | # clean intermediate files
295 | shutil.rmtree("tmp/")
296 |
297 |
298 | ################################################### END OF FUNCTION DEFINITIONS ##########################################
299 |
300 | if __name__ == "__main__":
301 |
302 | try:
303 | args = cmdlineparse()
304 | if args.OUT_FRCMOD == None:
305 | args.OUT_FRCMOD = "mod_%s" % args.FRCMOD
306 | write_corrected_frcmod(args.LIGFILE, args.FRCMOD, args.OUT_FRCMOD)
307 |
308 | except:
309 | type, value, tb = sys.exc_info()
310 | lines = traceback.format_exception(type, value, tb)
311 | print(''.join(lines))
312 | raise
--------------------------------------------------------------------------------
/show_ligand_interactions/README.md:
--------------------------------------------------------------------------------
1 | # A new PyMOL command to visualize receptor-ligand interactions and create publication-quality images.
2 |
3 | This script is also within the [Pymol-script-repo](https://pymolwiki.org/index.php/Git_install_scripts), which I strongly recommend to install. In that case you just need to import it into PyMOL:
4 | ```
5 | import show_ligand_interactions
6 | ```
7 | Otherwise you can download it from here and load it on PyMOL every time you launch it.
8 | ```
9 | run 0:
54 | contacts_mdict[receptor_pdb][ligpdb] = (cont399, cont402, cont403, cont404)
55 | cmd.delete("cont*")
56 | cmd.delete(ligmol)
57 | cmd.delete(receptor)
58 |
59 | print("\nCONTACT RESULTS FOR INHIBITOR %s:" % inhibitor)
60 | print("receptor_pdb\tligand_pdb\tcontact_399\tcontact_402\tcontact_403\tcontact_404\n")
61 | struct_files = ""
62 | for receptor_pdb in contacts_mdict.keys():
63 | struct_files += " " + receptor_pdb
64 | for ligpdb in contacts_mdict[receptor_pdb].keys():
65 | struct_files += " " + ligpdb
66 | print(receptor_pdb, ligpdb, contacts_mdict[receptor_pdb][ligpdb])
67 | print("To load the poses:")
68 | print("pymol " + struct_files)
69 |
70 |
71 |
--------------------------------------------------------------------------------
/show_ligand_interactions/image_gallery/BACE_104_liginter.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/show_ligand_interactions/image_gallery/BACE_104_liginter.jpg
--------------------------------------------------------------------------------
/show_ligand_interactions/image_gallery/CatS_335_liginter.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/show_ligand_interactions/image_gallery/CatS_335_liginter.jpg
--------------------------------------------------------------------------------
/show_ligand_interactions/image_gallery/Thrombin_2zc9_liginter.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tevang/tutorials/ace2b0967e2a64cf625c8e6ba4cc9f44b38abf85/show_ligand_interactions/image_gallery/Thrombin_2zc9_liginter.jpg
--------------------------------------------------------------------------------
/show_ligand_interactions/show_ligand_interactions.py:
--------------------------------------------------------------------------------
1 | #show_ligand_interactions v.1.0
2 | # author: Thomas Evangelidis, 2019
3 | # License: BSD-2-Clause
4 |
5 | from pymol import cmd, util
6 | import show_bumps
7 |
8 | def show_ligand_interactions(recsel="not hetatm", ligsel="hetatm", cutoff=5):
9 | """
10 | DESCRIPTION
11 |
12 | Visualize interactions between receptor and ligand.
13 |
14 | ARGUMENTS
15 |
16 | recsel = string: atom selection of the receptor {default: "not hetatm"}
17 |
18 | ligsel = string: atom selections of the ligand {default: "hetatm"}
19 |
20 | cutoff = float: show as sticks all receptor residues within this distance from the ligand {default: 5.0}
21 | """
22 | cmd.select('ligand', ligsel)
23 | cmd.select('receptor', recsel)
24 |
25 | cmd.bg_color('white')
26 | cmd.show_as('cartoon')
27 | cmd.show_as('sticks', 'hetatm or ligand')
28 | cmd.show_as('nonbonded', "resn HOH+T3P+WAT within %s of ligand" % cutoff)
29 | cmd.set('cartoon_transparency', 0.2)
30 | cmd.spectrum(selection=recsel+" or "+ligsel,byres=1)
31 | util.cbag('not name C*')
32 | cmd.set('cartoon_fancy_helices', 1);
33 | cmd.show("sticks", "(hydro)");
34 | cmd.select("pocket", "byres (receptor within %s of ligand)" % cutoff);
35 | cmd.show("sticks", "pocket")
36 | cmd.hide('(h. and (e. c extend 1))')
37 | cmd.set('h_bond_max_angle', 30)
38 | cmd.set('h_bond_cutoff_center', 3.6)
39 | cmd.set('h_bond_cutoff_edge', 3.2)
40 | cmd.dist('ligand_Hbonds', 'ligand', 'receptor', 3.5, mode=2)
41 | cmd.set('dash_radius', 0.15)
42 | # now set the label options
43 | cmd.set('label_size', 20)
44 | cmd.set('label_position', [0,0,10])
45 | cmd.orient("ligand")
46 |
47 | cmd.extend('show_ligand_interactions', show_ligand_interactions)
48 |
--------------------------------------------------------------------------------
/visualize_ECFP_fragments/fragment_molecules.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | __author__="Thomas Evangelidis"
4 | __email__="tevang3@gmail.com"
5 |
6 | SMILES="CCC1=CC=C(C=C1)C=C2C(=O)NC(=S)S2" # 10058-F4 c-Myc inhibitor
7 | SMILES='CC(C)c5cc(CNC[C@@H](O)[C@@H]4C[C@H](C)CCCCCN([C@H](C)c1ccccc1)C(=O)c2cc(cc(c2)C3=NC=CO3)C(=O)N4)ccc5' # macrocycle
8 |
9 | from argparse import ArgumentParser, RawDescriptionHelpFormatter
10 | from rdkit.Chem.Draw.IPythonConsole import *
11 | from rdkit.Chem.Draw import MolToFile
12 | from rdkit import Chem
13 | from rdkit.Chem import AllChem
14 | from rdkit.Chem.rdmolfiles import MolFromMol2File
15 | import os, shutil
16 |
17 | ## Parse command line arguments
18 | def cmdlineparse():
19 | parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, description="""
20 | DESCRIPTION:
21 |
22 | This is a Python script to create fragments as in ECFP fingerprints.
23 |
24 | """,
25 | epilog="""
26 | ### EXAMPLE 1:
27 |
28 | """)
29 | parser.add_argument("-smiles", dest="SMILES", required=False, default=None,
30 | help="The molecule to be fragmented in SMILES format.")
31 | parser.add_argument("-mol2", dest="MOL2", required=False, default=None,
32 | help="The molecule to be fragmented in MOL2 format.")
33 | parser.add_argument("-outfolder", dest="OUT_FOLDER", required=False, default="fragments", type=str,
34 | help="The folder name which will be created (or erased if it already exists) where the "
35 | "PNG images of the fragments will be saved.")
36 | parser.add_argument("-fpradius", dest="FP_RADIUS", required=False, default=2, type=int,
37 | help="The ECFP radius parameter value (distance in number of bonds). Default: %(default)s")
38 |
39 | args = parser.parse_args()
40 | return args
41 |
42 | if __name__ == "__main__":
43 | args = cmdlineparse()
44 | if args.SMILES:
45 | mol = Chem.MolFromSmiles(args.SMILES)
46 | if args.MOL2:
47 | mol = MolFromMol2File(args.MOL2, sanitize=False, removeHs=False)
48 | if os.path.exists(args.OUT_FOLDER):
49 | shutil.rmtree(args.OUT_FOLDER)
50 | os.mkdir(args.OUT_FOLDER)
51 |
52 | MolToFile(mol, "original_molecule.png")
53 | shutil.move("original_molecule.png", args.OUT_FOLDER + "/original_molecule.png")
54 | bi = {}
55 | fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=args.FP_RADIUS, bitInfo=bi)
56 | for k in bi.keys():
57 | mfp2_svg = DrawMorganBit(mol, k, bi)
58 | mfp2_svg.save(fp="%s/%i_frag.png" % (args.OUT_FOLDER, k), format="PNG")
59 |
60 | # TODO: show all fragments in one figure
61 | # https://stackoverflow.com/questions/37365824/pandas-ipython-notebook-include-and-display-an-image-in-a-dataframe
62 |
--------------------------------------------------------------------------------
/visualize_ECFP_fragments/visualize_ECFP_fragments.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "heading",
5 | "metadata": {
6 | "collapsed": true,
7 | "pycharm": {}
8 | },
9 | "level": 1,
10 | "source": [
11 | "Create and Visualize Fragments like in ECFP Fingerprints"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {
17 | "pycharm": {}
18 | },
19 | "source": "ECFP (extended connectivity fingerprints) aka circular fingerprints, are built by applying the Morgan algorithm to a set of user-supplied atom invariants. In this tutorial we will generate fragments of a macrocyclic and a non-macrocyclic molecule of similar size and compare them.\n\n When generating Morgan fingerprints, the radius of the fingerprint(must also be provided :"
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 29,
24 | "metadata": {
25 | "pycharm": {}
26 | },
27 | "outputs": [],
28 | "source": [
29 | "from rdkit import Chem, DataStructs\n",
30 | "from rdkit.Chem import AllChem, Draw, rdDepictor #, rdCoordGen # rdCoordGen requires Python 3.7\n",
31 | "from IPython.display import display, HTML\n",
32 | "import pandas as pd\n",
33 | "import os\n",
34 | "\n",
35 | "# The first canonical SMILES corresponds to macrocyclic compound BACE_149 from D3R GC2018\n",
36 | "# The following two SMILES correspond to non-macrocyclic compounds\n",
37 | "smiles \u003d [\u0027CCCCNC(\u003dO)[C@H](C)C[C@H](O)[C@@H]1C[C@H](C)CCCCCCC[C@H](NC(\u003dO)OC(C)(C)C)C(\u003dO)N[C@@H](C)C(\u003dO)N1\u0027,\n",
38 | " \u0027CCCCNC(\u003dO)C(C)CC(C(CC1CCCCC1)NC(\u003dO)C(C(C)C)NC(\u003dO)CNC(\u003dO)OC(C)(C)C)O\u0027,\n",
39 | " \u0027CC(C)C(C(\u003dO)NC(C(C)C)C(\u003dO)OC)NC(\u003dO)CCC(C(CC1CCCCC1)NC(\u003dO)C(C)NC(\u003dO)C(C)N)O\u0027\n",
40 | " ]\n",
41 | "ids \u003d [\u0027BACE_149\u0027, \u0027mol2\u0027, \u0027mol3\u0027]\n",
42 | "df \u003d pd.DataFrame({\u0027mol\u0027: [Chem.MolFromSmiles(x) for x in smiles]}, index\u003dids)\n"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {
48 | "pycharm": {}
49 | },
50 | "source": [
51 | "Lets visualize these 3 molecules."
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 30,
57 | "metadata": {
58 | "pycharm": {}
59 | },
60 | "outputs": [
61 | {
62 | "name": "stdout",
63 | "output_type": "stream",
64 | "text": [
65 | "/home2/thomas/Documents/tutorials/Multilayer_Perceptron_Keras\n"
66 | ]
67 | },
68 | {
69 | "ename": "IOError",
70 | "evalue": "[Errno 2] No such file or directory: \u0027images/gridmol.png\u0027",
71 | "traceback": [
72 | "\u001b[0;31m\u001b[0m",
73 | "\u001b[0;31mIOError\u001b[0mTraceback (most recent call last)",
74 | "\u001b[0;32m\u003cipython-input-30-febc8a8e8909\u003e\u001b[0m in \u001b[0;36m\u003cmodule\u003e\u001b[0;34m()\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;31m# display(img) # try it again in Python 3.7\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetcwd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---\u003e 17\u001b[0;31m \u001b[0mimg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"images/gridmol.png\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# save the image to a file for the time being\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
75 | "\u001b[0;32m/home/thomas/.local/lib/python2.7/site-packages/PIL/Image.pyc\u001b[0m in \u001b[0;36msave\u001b[0;34m(self, fp, format, **params)\u001b[0m\n\u001b[1;32m 2002\u001b[0m \u001b[0;31m# Open also for reading (\"+\"), because TIFF save_all\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2003\u001b[0m \u001b[0;31m# writer needs to go back and edit the written data.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-\u003e 2004\u001b[0;31m \u001b[0mfp\u001b[0m \u001b[0;34m\u003d\u001b[0m \u001b[0mbuiltins\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"w+b\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2005\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2006\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
76 | "\u001b[0;31mIOError\u001b[0m: [Errno 2] No such file or directory: \u0027images/gridmol.png\u0027"
77 | ],
78 | "output_type": "error"
79 | }
80 | ],
81 | "source": "mols \u003d []\nfor mol in df[\u0027mol\u0027].values:\n mol \u003d Chem.Mol(mol)\n rdDepictor.Compute2DCoords\n # rdCoordGen.AddCoords(mol) # requires Python 3.7\n # rescale(mol, f\u003d1.4) # AddCoords seems to produced coordinates that are hard to display, so rescale them\n mols.append(mol)\nlegends \u003d df[\u0027mol\u0027].keys()\nimg \u003d Draw.MolsToGridImage(mols, \n molsPerRow\u003dlen(legends),\n subImgSize\u003d(300, 300),\n legends\u003dlegends,\n useSVG\u003dFalse, # set to True in Python 3.7\n )\n# display(img) # try it again in Python 3.7\nprint(os.getcwd())\nimg.save(\"images/gridmol.png\") # save the image to a file for the time being"
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": null,
86 | "metadata": {
87 | "pycharm": {}
88 | },
89 | "outputs": [],
90 | "source": "fp1 \u003d AllChem.GetMorganFingerprint(m1,radius\u003d3)\nfp2 \u003d AllChem.GetMorganFingerprint(m2,radius\u003d3)\nfp3 \u003d AllChem.GetMorganFingerprint(m3,radius\u003d3)\nprint(\"The ECFP fingeprint(similarity between m1 and m2 is %f\" % DataStructs.DiceSimilarity(fp1,fp2))\nprint(\"The ECFP fingeprint(similarity between m2 and m3 is %f\" % DataStructs.DiceSimilarity(fp2,fp3))\nprint(\"The ECFP fingeprint(similarity between m1 and m3 is %f\" % DataStructs.DiceSimilarity(fp1,fp3))"
91 | },
92 | {
93 | "cell_type": "markdown",
94 | "metadata": {
95 | "pycharm": {}
96 | },
97 | "source": "Morgan fingerprints, like atom pairs and topological torsions, use counts bm1y default, but it’s also possible to calculate them as bit vectors:"
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": 4,
102 | "metadata": {
103 | "pycharm": {}
104 | },
105 | "outputs": [
106 | {
107 | "name": "stdout",
108 | "output_type": "stream",
109 | "text": [
110 | "The ECFP fingeprint similarity between m1 and m2 is 0.480000\nThe ECFP fingeprint similarity between m2 and m3 is 0.555556\nThe ECFP fingeprint similarity between m1 and m3 is 0.248521\n"
111 | ]
112 | }
113 | ],
114 | "source": "fp1 \u003d AllChem.GetMorganFingerprintAsBitVect(m1,radius\u003d3,nBits\u003d4096)\nfp2 \u003d AllChem.GetMorganFingerprintAsBitVect(m2,radius\u003d3,nBits\u003d4096)\nfp3 \u003d AllChem.GetMorganFingerprintAsBitVect(m3,radius\u003d3,nBits\u003d4096)\nprint(\"The ECFP fingeprint(similarity between m1 and m2 is %f\" % DataStructs.DiceSimilarity(fp1,fp2))\nprint(\"The ECFP fingeprint(similarity between m2 and m3 is %f\" % DataStructs.DiceSimilarity(fp2,fp3))\nprint(\"The ECFP fingeprint(similarity between m1 and m3 is %f\" % DataStructs.DiceSimilarity(fp1,fp3))"
115 | },
116 | {
117 | "cell_type": "markdown",
118 | "metadata": {
119 | "pycharm": {}
120 | },
121 | "source": [
122 | "As you can see, the similarity changes slightly if you express them as bit vectors and can change further if you increase the nBits parameter which controls bit collisions.\n",
123 | "\n",
124 | "When comparing the ECFP/FCFP fingerprints and the Morgan fingerprints generated by the RDKit, remember that the 4 in ECFP4 corresponds to the diameter of the atom environments considered, while the Morgan fingerprints take a radius parameter. So the examples above, with radius\u003d2, are roughly equivalent to ECFP4 and FCFP4."
125 | ]
126 | },
127 | {
128 | "cell_type": "heading",
129 | "metadata": {
130 | "pycharm": {}
131 | },
132 | "level": 2,
133 | "source": [
134 | "Explaining bits from Morgan Fingerprints."
135 | ]
136 | }
137 | ],
138 | "metadata": {
139 | "kernelspec": {
140 | "display_name": "Python 2",
141 | "language": "python",
142 | "name": "python2"
143 | },
144 | "language_info": {
145 | "codemirror_mode": {
146 | "name": "ipython",
147 | "version": 2
148 | },
149 | "file_extension": ".py",
150 | "mimetype": "text/x-python",
151 | "name": "python",
152 | "nbconvert_exporter": "python",
153 | "pygments_lexer": "ipython2",
154 | "version": "2.7.6"
155 | }
156 | },
157 | "nbformat": 4,
158 | "nbformat_minor": 0
159 | }
--------------------------------------------------------------------------------
/visualize_ligand_properties/README.md:
--------------------------------------------------------------------------------
1 | # UCSF Chimera Tutorial: visualize ligand properties (charges, bond lengths & angles, etc.)
2 |
3 | Load the two ligand files into Chimera and label atom names and charges
4 | ```
5 | labelopt info "%(name)s %(charge)+.3f"
6 | label
7 | ```
8 | or if you want the atom type as well
9 | ```
10 | labelopt info "%(name)s(%(idatmType)s) %(charge)+.3f"
11 | label
12 | ```
13 | or to show only one molecule and label by atom type
14 | ```
15 | ~display #1
16 | labelopt info "%(idatmType)s"
17 | label
18 | ```
19 | to change the label font size go Favorites->Preferences->"Category:"Background.
20 | to change the background color (althouhg black is good to display labels)
21 | ```
22 | background solid white
23 | ```
24 | change representation (only the "wire" works well with .mol2 files)
25 | ```
26 | represent wire
27 | ```
28 | play with scale command to zoom in the image as much as you want
29 | ```
30 | scale 1.4
31 | ```
32 |
33 |
34 | You can label the bonds with their length with this Python:
35 |
36 | ```python
37 | from chimera import openModels, Molecule
38 | for mol in openModels.list(modelTypes=[Molecule]):
39 | for b in mol.bonds:
40 | b.label = "%.2f" % b.length()
41 | ```
42 |
43 | You can label atoms, bonds, and residues in this fashion, but there is no provision to label bond angles per se. If you meant torsion angles and wanted to add that to the bond label, you can get the floating-point dihedral value of four atoms with this code:
44 |
45 | ```python
46 | import chimera
47 | dihed_val = chimera.dihedral(a1.coord(), a2.coord(), a3.coord(), a4.coord())
48 | ```
49 |
--------------------------------------------------------------------------------