├── .gitignore ├── Docking ├── GUI │ ├── README.md │ ├── __init__.py │ ├── fast_top_hit_search.py │ ├── generate_images.py │ ├── overloaded_final_extraction.py │ ├── process_gui_data.py │ ├── run_search.sh │ ├── slurm_job_manager.py │ └── update_gui.sh ├── ML │ ├── DDCallbacks.py │ ├── DDMetrics.py │ ├── DDModel.py │ ├── DDModelExceptions.py │ ├── Models.py │ ├── Parser.py │ ├── Tokenizer.py │ ├── __init__.py │ ├── data_generator.py │ ├── lasso_regularizer.py │ ├── load_data.py │ ├── model_tuner.py │ ├── transformer_layers.py │ ├── tuner_config.txt │ └── utils.py ├── ProgressiveDocking │ ├── Extract_labels.py │ ├── Extracting_morgan.py │ ├── Extracting_smiles.py │ ├── Prediction_morgan_1024.py │ ├── __init__.py │ ├── activation_script.sh │ ├── autodock_gpu_ad.sh │ ├── check_phase.py │ ├── deactivation_script.sh │ ├── final_extraction.py │ ├── final_extraction.sh │ ├── hyperparameter_result_evaluation.py │ ├── jobid_writer.py │ ├── molecular_file_count_updated.py │ ├── optimize_models.py │ ├── phase_1.sh │ ├── phase_2.sh │ ├── phase_3.sh │ ├── phase_3_concluding_combination.sh │ ├── phase_4.sh │ ├── phase_5.sh │ ├── phase_a.sh │ ├── phase_changer.py │ ├── prepare_ligands_ad.sh │ ├── progressive_docking.py │ ├── reset.py │ ├── reset1.sh │ ├── reset2.sh │ ├── reset3.sh │ ├── reset4.sh │ ├── reset5.sh │ ├── sampling.py │ ├── sanity_check.py │ ├── settings.json │ ├── setup_slurm_specifications.py │ ├── simple_job_models.py │ ├── simple_job_predictions.py │ ├── slurm_file_manager.py │ ├── split_chunks.sh │ ├── split_sdf.py │ ├── util_functions.py │ └── venv_sanity_check.py └── __init__.py ├── GUI ├── README.md ├── package-lock.json ├── package.json ├── public │ ├── css │ │ └── oldSchool.css │ ├── img │ │ ├── Indicator_light_g.svg │ │ ├── Indicator_light_r.svg │ │ ├── Indicator_light_y.svg │ │ ├── close_button.svg │ │ ├── download_icon.svg │ │ ├── left_switch.svg │ │ ├── loading_svg.svg │ │ ├── loading_svg_ripple.svg │ │ ├── reload_icon.svg │ │ └── right_switch.svg │ ├── js │ │ ├── bundle.js │ │ └── bundle.js.map │ └── webp-img │ │ ├── 00b42403057e60520cb497d92556b982.png │ │ ├── 0638bec8443dd6e3385084884ed644a2.png │ │ ├── 1f0710a4a9c764c4801a6b0bbd1f6744.png │ │ ├── 2a9beabef112cd5d9b57edafe04ecd82.png │ │ ├── 8a9e1648449beda9d58190f1bc4a749e.png │ │ ├── c2e5bc8f9058ad350eed2e2559c63174.png │ │ ├── d2d023bf09910fa13a8d59977bad92dd.png │ │ └── d3698da8e2bda9d79e1bb514e2d600fa.png ├── server.py ├── src │ ├── __init__.py │ ├── backend │ │ ├── DataHistory.py │ │ ├── EmailBot.py │ │ ├── EventHandler.py │ │ ├── __init__.py │ │ ├── auto_ssh.py │ │ ├── backend.py │ │ ├── backend_exceptions.py │ │ ├── backend_sanity_check.py │ │ └── cluster_commands.py │ ├── index.js │ ├── login.js │ ├── mainPagejs │ │ ├── basics.js │ │ ├── models.js │ │ ├── progress.js │ │ ├── startarun.js │ │ └── topScoring.js │ └── test.js ├── templates │ ├── login.html │ ├── mainPage.html │ └── test.html └── webpack.config.js ├── LICENSE ├── README.md ├── __init__.py ├── installation ├── DeepDockingLocal.yml ├── fix_sh.sh ├── install-linux.sh ├── install-windows.bat ├── install.py └── welcome_message.txt ├── preparation_scripts ├── README.md ├── compute_morgan_fp.sh ├── morgan_fp.py └── prepare_receptor.sh └── util ├── ProgressBar.py ├── __init__.py ├── __pycache__ ├── ProgressBar.cpython-36.pyc ├── ProgressBar.cpython-38.pyc ├── __init__.cpython-36.pyc └── __init__.cpython-38.pyc └── figures ├── DDGUI-DD.png ├── Monitor.png ├── Progress.png ├── login_screen.png ├── models_full.png ├── new_project_info.png ├── progress_full.png ├── start_a_run_full.png └── top_scoring_full.png /.gitignore: -------------------------------------------------------------------------------- 1 | Scratch 2 | GUI/node_modules 3 | /venv 4 | *.code-workspace 5 | *.pyc 6 | db.json 7 | installation/installation_information.json 8 | installation/*.out 9 | activation_script.sh 10 | GUI/src/backend/projects/*.json 11 | __pycache__ 12 | GUI/src/backend/__pycache__/ 13 | GUI/__pycache__/ 14 | util/__pycache__/__init__.cpython-36.pyc 15 | util/__pycache__/__init__.cpython-37.pyc 16 | util/__pycache__/ProgressBar.cpython-36.pyc 17 | util/__pycache__/ProgressBar.cpython-37.pyc 18 | -------------------------------------------------------------------------------- /Docking/GUI/README.md: -------------------------------------------------------------------------------- 1 | # GUI 2 | 3 | While the GUI is running, these scripts will generate information for display and save it as a pickle file. 4 | -------------------------------------------------------------------------------- /Docking/GUI/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/Docking/GUI/__init__.py -------------------------------------------------------------------------------- /Docking/GUI/fast_top_hit_search.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | from contextlib import closing 3 | import pandas as pd 4 | import os 5 | try: 6 | import __builtin__ 7 | except ImportError: 8 | # Python 3 9 | import builtins as __builtin__ 10 | 11 | # For debugging purposes only: 12 | def print(*args, **kwargs): 13 | __builtin__.print('\t fast top hit search: ', end="") 14 | return __builtin__.print(*args, **kwargs) 15 | 16 | 17 | def find_top_n_predicted_molecules(file_path): 18 | # Search through the predicted morgan files and find the top hits 19 | n = search_size 20 | n = min(100, n) # Cap the number of molecules to 100 21 | 22 | # Read the CSV and extract the top hits 23 | df = pd.read_csv(file_path, names=['id', "score", ]) 24 | top_n = df.nlargest(n, "score") 25 | 26 | # return a series of the top n predictions as a value in a dictionary where the key is the file it was found in 27 | return os.path.basename(file_path), top_n 28 | 29 | 30 | def find_matching_smiles(smile_database_path, file_path, search_dict, itr): 31 | # Grab the targets we are looking for 32 | targets = search_dict[file_path]['id'].tolist() 33 | print("Debug: This process is searching for", targets, "in file", file_path) 34 | 35 | # Read the smile file corresponding to the predictions 36 | smile_file = os.path.join(smile_database_path, os.path.basename(file_path)) 37 | df = pd.read_csv(smile_file, delimiter=" ", index_col=1) 38 | 39 | # Loop through the targets and check if it is found in the file 40 | with open(itr + "/top_hits.csv", "a") as top_hits: 41 | for target in targets: 42 | if target in df.index: 43 | print("Found target:", target) 44 | found_smile = df.loc[target, 'smiles'] 45 | # Write to the top_hits.csv file as: smile,id,score 46 | top_hits.write(found_smile + "," + target + "\n") 47 | 48 | 49 | if __name__ == '__main__': 50 | import argparse 51 | args = argparse.ArgumentParser() 52 | args.add_argument("-sdb", "--smile_database", required=True, type=str) 53 | args.add_argument("-pdb", "--predicted_database", required=True, type=str) 54 | args.add_argument("-tp", "--total_processors", required=True, type=int) 55 | args.add_argument("-n", required=True, type=int) 56 | info = args.parse_args() 57 | 58 | # Get the search size for each process 59 | prediction_files = [os.path.join(info.predicted_database, f) for f in os.listdir(info.predicted_database) if 'smile' in f] 60 | num_prediction_files = len(prediction_files) 61 | search_size = round(info.n/num_prediction_files) 62 | num_processes = min([info.total_processors, num_prediction_files]) 63 | 64 | # Get the file path 65 | itr_path = str(info.predicted_database).replace("/morgan_1024_predictions", "") 66 | 67 | # Make sure we have the prediction files 68 | assert os.path.exists(info.predicted_database), print("Phase 5 Incomplete...") 69 | with open(itr_path + "/top_hits.csv", "w") as init_top_hits: 70 | init_top_hits.write("smile,id\n") 71 | 72 | print("Starting search...") 73 | print("We have the following arguments passed:") 74 | print(" - Number of files to search:", num_prediction_files) 75 | print(" - Number of molecules to find:", info.n) 76 | print(" - Search size:", search_size) 77 | print(" - Number of processes:", num_processes) 78 | print(" - Smile database:", info.smile_database) 79 | print(" - Predicted database:", info.predicted_database) 80 | 81 | print("Finding top predictions") 82 | # Search for the top predicted hits 83 | with closing(Pool(num_processes)) as pool: 84 | predicted = pool.map(find_top_n_predicted_molecules, prediction_files) 85 | print(" - Done") 86 | 87 | # Arrange all of the top predictions into a dictionary indexed by their file name 88 | search = {} 89 | print("Finding top smiles") 90 | for top_list in predicted: 91 | # {os.path.basename(file_path): top_n} 92 | file_name, predictions = top_list 93 | search[file_name] = predictions 94 | 95 | # Generate the args for the multiprocessing 96 | mp_args = [] 97 | for key in search.keys(): 98 | mp_args.append((info.smile_database, key, search, itr_path)) 99 | 100 | # Start searching for the top hits from the smile database 101 | with closing(Pool(num_processes)) as pool: 102 | pool.starmap(find_matching_smiles, mp_args) 103 | print(" - Done") 104 | 105 | -------------------------------------------------------------------------------- /Docking/GUI/generate_images.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def generate_model_image(model_path): 5 | model_name = os.path.basename(model_path) 6 | iteration = os.path.basename(model_path.replace("/all_models/" + model_name, "")) 7 | 8 | # Get the project name -> .../project_name/iteration_n/all_models/model_name -> [..., project_name, ., ., .] 9 | project_name = model_path.split("/")[-4] 10 | file_name = "GUI/images/models/{}_{}_{}.png".format(project_name, model_name, iteration) 11 | 12 | # check if the image already exists... if it does then skip generating a new one 13 | if not os.path.exists(file_name): 14 | import tensorflow as tf 15 | from tensorflow.keras.models import load_model 16 | tf.keras.utils.plot_model( 17 | load_model(model_path), 18 | to_file=file_name, 19 | show_shapes=True, 20 | show_layer_names=True) 21 | 22 | # Grab the hyperparameter info 23 | from ML.Parser import Parser 24 | try: 25 | info = Parser.parse_ddss(model_path + ".ddss") 26 | except FileNotFoundError: 27 | info = {} 28 | print(file_name + "&&&" + str(info)) 29 | 30 | 31 | def generate_molecule_image(path, limit=25): 32 | from rdkit.Chem.Scaffolds import MurckoScaffold 33 | from rdkit.Chem import MolFromSmiles 34 | from rdkit.Chem.Draw import MolToImage 35 | from PIL import ImageDraw 36 | 37 | if os.path.exists(path): 38 | # Read the hits file 39 | smiles = [] 40 | ids = [] 41 | with open(path, 'r') as top_hits: 42 | for line_number, line in enumerate(top_hits.readlines()): 43 | if line_number >= limit: 44 | break 45 | smiles.append(line.split(" ")[0]) 46 | ids.append(line.split(" ")[1]) 47 | 48 | # Generate scaffold 49 | for smile, mid in zip(smiles, ids): 50 | mol = MurckoScaffold.GetScaffoldForMol(MolFromSmiles(smile)) 51 | image = MolToImage(mol) 52 | 53 | # Add text to the image 54 | draw = ImageDraw.Draw(image) 55 | draw.text((5, 5), mid, fill="black", align="right") 56 | image.save("GUI/images/molecules/{}.png".format(smile)) 57 | else: 58 | return 59 | 60 | 61 | if __name__ == '__main__': 62 | import argparse 63 | import sys 64 | sys.path.append(".") 65 | 66 | parser = argparse.ArgumentParser() 67 | parser.add_argument("--image_of", '-imof') 68 | parser.add_argument("--path_to_model") 69 | parser.add_argument("--path_to_molecules") 70 | args = parser.parse_args() 71 | 72 | try: 73 | os.mkdir("GUI/images") 74 | os.mkdir("GUI/images/molecules") 75 | os.mkdir("GUI/images/models") 76 | except OSError: 77 | pass 78 | 79 | if args.image_of == 'model': 80 | generate_model_image(args.path_to_model) 81 | elif args.image_of in {"molec", "molecule"}: 82 | generate_molecule_image(args.path_to_molecules) -------------------------------------------------------------------------------- /Docking/GUI/overloaded_final_extraction.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | from contextlib import closing 3 | import multiprocessing 4 | import pandas as pd 5 | import argparse 6 | import random 7 | import glob 8 | import sys 9 | import os 10 | 11 | 12 | def merge_on_smiles(pred_file): 13 | print("Merging " + os.path.basename(pred_file) + "...", end=" ") 14 | 15 | # Read the predictions 16 | pred = pd.read_csv(pred_file, names=["id", "score"], index_col=0) 17 | pred.drop_duplicates() 18 | 19 | # Read the smiles 20 | smile_file = os.path.join(args.smile_dir, os.path.basename(pred_file)) 21 | smi = pd.read_csv(smile_file, delimiter=" ", names=["smile", "id"], index_col=1) 22 | smi = smi.drop_duplicates() 23 | 24 | # Merge on the IDs and sort by the score 25 | merged = pd.merge(pred, smi, how="inner", on=["id"]) 26 | merged.sort_values(by="score", ascending=False, inplace=True) 27 | 28 | # Save to a csv as (mean_score)_(base_name).csv 29 | size = len(merged) 30 | file_name = "extracted_smiles/" + str(size) + "_" + os.path.basename(pred_file) + ".csv" 31 | merged.to_csv(file_name) 32 | print("Done") 33 | 34 | return file_name 35 | 36 | 37 | def kinda_merge_sort(f): 38 | # Unpack 39 | n, f1, f2 = f 40 | if n is None: 41 | print("Merging", f1, "with", f2, " - Non Terminal") 42 | else: 43 | print("Merging", f1, "with", f2, " - Terminal") 44 | 45 | # Combine f1 and f2 then sort the dataframe 46 | combined = pd.concat([pd.read_csv(f1, index_col=0), 47 | pd.read_csv(f2, index_col=0)]) 48 | combined.sort_values(by="score", ascending=False, inplace=True) 49 | 50 | # Remove the two files 51 | os.remove(f1) 52 | os.remove(f2) 53 | 54 | # If it is the final merge then get the top_n and save to csv 55 | if n is not None and n.lower() != "all": 56 | # If n != "all", then we should not take all of the top hits... 57 | combined = combined.head(int(n)) 58 | # We will finalize our extraction by separating our combined dataframe into two new ones 59 | finalize(combined) 60 | return "" 61 | elif n is not None and n.lower() == "all": 62 | # We will finalize our extraction by separating our combined dataframe into two new ones 63 | finalize(combined) 64 | return "" 65 | else: 66 | # If it is not the final merge iteration, merge as usual 67 | # Generate a random key 68 | size = len(combined) 69 | key = str(size) + "-" 70 | for _ in range(30): 71 | key += str(random.randint(0, 9)) 72 | 73 | f12 = "extracted_smiles/" + key + ".csv" 74 | combined.to_csv(f12) 75 | return f12 76 | 77 | 78 | def finalize(combined): 79 | print("Finished... Saving") 80 | # Rearrange the smiles 81 | smiles = combined.drop('score', 1) 82 | smiles = smiles[["smile"]] 83 | print("Here is the smiles:") 84 | print(smiles.head()) 85 | smiles.to_csv("smiles.csv", sep=" ") 86 | 87 | # Rearrange for id,score 88 | combined.drop("smile", 1, inplace=True) 89 | combined.to_csv("id_score.csv") 90 | print("Here are the ids and scores") 91 | print(combined.head()) 92 | 93 | 94 | if __name__ == '__main__': 95 | parser = argparse.ArgumentParser() 96 | parser.add_argument("-smile_dir", required=True) 97 | parser.add_argument("-morgan_dir", required=True) 98 | parser.add_argument("-processors", required=True) 99 | parser.add_argument("-mols_to_dock", required=False, default="all") 100 | 101 | args = parser.parse_args() 102 | predictions = [] 103 | 104 | for file in glob.glob(args.morgan_dir + "/*"): 105 | if "smile" in os.path.basename(file): 106 | predictions.append(file) 107 | 108 | print("Morgan Dir: " + args.morgan_dir) 109 | print("Smile Dir: " + args.smile_dir) 110 | print("Number Of Files: ", len(predictions)) 111 | # Sort the predictions 112 | # Our name looks like -> smile_all_N.txt and we want N so we get: 113 | # smile_all_N.txt -> ["smile_all_N", "txt"] -> "smile_all_N" -> ["smile", "all", "N"] -> N 114 | predictions.sort(key=lambda x: int(x.split(".")[0].split("_")[-1])) 115 | 116 | # combine the files 117 | print("Finding smiles...") 118 | print("Number of CPUs: " + str(multiprocessing.cpu_count())) 119 | num_jobs = min(len(predictions), int(args.processors)) 120 | 121 | # Try to create a directory for the smile CSVs 122 | try: 123 | print("Created 'extracted_smiles' Directory") 124 | os.mkdir("extracted_smiles/") 125 | with closing(Pool(num_jobs)) as pool: 126 | file_paths = pool.map(merge_on_smiles, predictions) 127 | except IOError: 128 | print("The 'extracted_smiles' Directory Exists... Skipping initial merge.") 129 | file_paths = ["extracted_smiles/" + f for f in os.listdir("extracted_smiles/")] 130 | 131 | # combine all files in the list and sort the values 132 | print("Merging Complete - Concatenating all files...") 133 | 134 | # Run this mapping until we have only a single file left 135 | # We merge each file in parallel and sort them 136 | merging_iteration = 0 137 | num_files = len(os.listdir("extracted_smiles/")) 138 | is_final_iteration = False 139 | while num_files > 1: 140 | # Check if final iteration or if this merge is the final merge 141 | top_n = None if num_files != 2 else args.mols_to_dock 142 | merging_iteration += 1 143 | print("Merging Iteration:", merging_iteration) 144 | print("Files Remaining:", num_files) 145 | print("Percent Complete:", round(1 / num_files, 3) * 100, "%") 146 | 147 | # Create the arguments to run the merge 148 | merging_args = [] 149 | for i in range(len(file_paths) - 1, -1, -2): 150 | if i - 1 >= 0: 151 | merging_args.append((top_n, 152 | file_paths[i], 153 | file_paths[i - 1])) 154 | 155 | # Remove the file paths from the list since they have been combined 156 | file_paths.remove(file_paths[i]) 157 | file_paths.remove(file_paths[i - 1]) 158 | 159 | # Run the jobs and gather all of the file path 160 | num_jobs = min(len(merging_args), int(args.processors)) 161 | with closing(Pool(num_jobs)) as pool: 162 | file_paths += pool.map(kinda_merge_sort, merging_args) 163 | 164 | # Update the number of files 165 | num_files = len(os.listdir("extracted_smiles/")) 166 | 167 | with open("final_phase.info", "w") as info: 168 | info.write("Finished") -------------------------------------------------------------------------------- /Docking/GUI/run_search.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -n 1 3 | #SBATCH --job-name=smile_searching 4 | #SBATCH --cpus-per-task=25 5 | #SBATCH --output=slurm-%x.%j.out 6 | #SBATCH --error=slurm-%x.%j.err 7 | 8 | # Read input 9 | project_path=$1 10 | n_cpus=$2 11 | iteration=$3 12 | n=$4 13 | 14 | echo Args: 15 | echo Iteration: $iteration 16 | echo Total CPUs: $n_cpus 17 | echo Project Path: $project_path 18 | echo Project Name: $(basename "$project_path") 19 | echo Num Mols: $n 20 | 21 | # Set constant 22 | smile_directory=`sed -n '5p' $project_path/logs.txt` 23 | 24 | cd .. 25 | # This should activate the conda environment 26 | source ~/.bashrc 27 | source activation_script.sh 28 | 29 | cd GUI 30 | python fast_top_hit_search.py -sdb $smile_directory -pdb $project_path/iteration_$iteration/morgan_1024_predictions -tp $n_cpus -n $n 31 | -------------------------------------------------------------------------------- /Docking/GUI/update_gui.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python3 "$2"/process_gui_data.py --project_path "$1" --pickle_path "$2" --current_user "$3" 3 | -------------------------------------------------------------------------------- /Docking/ML/DDCallbacks.py: -------------------------------------------------------------------------------- 1 | """ 2 | James Gleave 3 | v1.1.0 4 | """ 5 | 6 | from tensorflow.keras.callbacks import Callback 7 | import pandas as pd 8 | import time 9 | import os 10 | 11 | 12 | class DDLogger(Callback): 13 | """ 14 | Logs the important data regarding model training 15 | """ 16 | 17 | def __init__(self, log_path, 18 | max_time=36000, 19 | max_epochs=500, 20 | monitoring='val_loss', ): 21 | super(Callback, self).__init__() 22 | # Params 23 | self.max_time = max_time 24 | self.max_epochs = max_epochs 25 | self.monitoring = monitoring 26 | 27 | # Stats 28 | self.epoch_start_time = 0 29 | self.current_epoch = 0 30 | 31 | # File 32 | self.log_path = log_path 33 | self.model_history = {} 34 | 35 | def on_train_begin(self, logs={}): 36 | self.epoch_start_time = time.time() 37 | 38 | def on_epoch_begin(self, epoch, logs=None): 39 | self.epoch_start_time = time.time() 40 | 41 | def on_epoch_end(self, epoch, logs={}): 42 | # Store the data 43 | current_time = time.time() 44 | epoch_duration = current_time - self.epoch_start_time 45 | logs['time_per_epoch'] = epoch_duration 46 | self.model_history["epoch_" + str(epoch + 1)] = logs 47 | 48 | # Estimate time to completion 49 | estimate, elapsed, (s, p, x) = self.estimate_training_time() 50 | logs['estimate_time'] = estimate 51 | logs['time_elapsed'] = elapsed 52 | self.model_history["epoch_" + str(epoch + 1)] = logs 53 | 54 | # Save the data to a csv 55 | df = pd.DataFrame(self.model_history) 56 | df.to_csv(self.log_path) 57 | 58 | print("Time taken calculating callbacks:", time.time()-current_time) 59 | 60 | def estimate_training_time(self): 61 | max_allotted_time = self.max_time 62 | max_allotted_epochs = self.max_epochs 63 | 64 | # Grab the info about the model 65 | model_loss = [] 66 | time_per_epoch = [] 67 | for epoch in self.model_history: 68 | model_loss.append(self.model_history[epoch]['val_loss']) 69 | time_per_epoch.append(self.model_history[epoch]['time_per_epoch']) 70 | 71 | time_elapsed = sum(time_per_epoch) 72 | average_time_per_epoch = sum(time_per_epoch) / len(time_per_epoch) 73 | current_epoch = len(time_per_epoch) 74 | 75 | # Find out if the model is approaching an early stop 76 | epochs_until_early_stop = 10 77 | stopping_vector = [] 78 | prev_loss = model_loss[0] 79 | for loss in model_loss: 80 | improved = loss < prev_loss 81 | stopping_vector.append(improved) 82 | if improved: 83 | prev_loss = loss 84 | 85 | # Check how close we are to an early stop 86 | longest_failure = 0 87 | for improved in stopping_vector: 88 | if not improved: 89 | longest_failure += 1 90 | else: 91 | longest_failure = 0 92 | 93 | max_time = max_allotted_epochs * average_time_per_epoch if max_allotted_epochs * average_time_per_epoch < max_allotted_time else max_allotted_time 94 | time_if_early_stop = (epochs_until_early_stop - longest_failure) * average_time_per_epoch 95 | 96 | # Estimate a completion time 97 | loss_drops = stopping_vector.count(True) 98 | loss_gains = len(stopping_vector) - loss_drops 99 | try: 100 | gain_drop_ratio = loss_gains / loss_drops 101 | except ZeroDivisionError: 102 | gain_drop_ratio = 0 103 | 104 | # Created a function to estimate training time 105 | power = 1 - (gain_drop_ratio ** 3 / 5) 106 | time_estimate = (max_time ** power) / (1 + longest_failure) 107 | 108 | # Smooth out the estimate 109 | if current_epoch > 1: 110 | last = self.model_history['epoch_{}'.format(current_epoch - 1)]['estimate_time'] 111 | time_estimate = (time_estimate + last) / 2 112 | 113 | # If the time estimate surpasses the max time then just show the max time 114 | time_for_remaining_epochs = (self.max_epochs - current_epoch) * average_time_per_epoch 115 | if time_for_remaining_epochs < time_estimate: 116 | time_estimate = time_for_remaining_epochs 117 | 118 | return time_estimate, time_elapsed, (longest_failure, gain_drop_ratio, max_time) 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /Docking/ML/DDMetrics.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import tensorflow as tf 4 | from tensorflow.keras import backend as K 5 | 6 | 7 | def recall(y_true, y_pred): 8 | true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) 9 | possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) 10 | recall_keras = true_positives / (possible_positives + K.epsilon()) 11 | return recall_keras 12 | 13 | 14 | def precision(y_true, y_pred): 15 | true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) 16 | predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) 17 | precision_keras = true_positives / (predicted_positives + K.epsilon()) 18 | return precision_keras 19 | 20 | 21 | def specificity(y_true, y_pred): 22 | tn = K.sum(K.round(K.clip((1 - y_true) * (1 - y_pred), 0, 1))) 23 | fp = K.sum(K.round(K.clip((1 - y_true) * y_pred, 0, 1))) 24 | return tn / (tn + fp + K.epsilon()) 25 | 26 | 27 | def negative_predictive_value(y_true, y_pred): 28 | tn = K.sum(K.round(K.clip((1 - y_true) * (1 - y_pred), 0, 1))) 29 | fn = K.sum(K.round(K.clip(y_true * (1 - y_pred), 0, 1))) 30 | return tn / (tn + fn + K.epsilon()) 31 | 32 | 33 | def f1(y_true, y_pred): 34 | p = precision(y_true, y_pred) 35 | r = recall(y_true, y_pred) 36 | return 2 * ((p * r) / (p + r + K.epsilon())) 37 | 38 | 39 | def fbeta(y_true, y_pred, beta=2): 40 | y_pred = K.clip(y_pred, 0, 1) 41 | 42 | tp = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)), axis=1) 43 | fp = K.sum(K.round(K.clip(y_pred - y_true, 0, 1)), axis=1) 44 | fn = K.sum(K.round(K.clip(y_true - y_pred, 0, 1)), axis=1) 45 | 46 | p = tp / (tp + fp + K.epsilon()) 47 | r = tp / (tp + fn + K.epsilon()) 48 | 49 | num = (1 + beta ** 2) * (p * r) 50 | den = (beta ** 2 * p + r + K.epsilon()) 51 | return K.mean(num / den) 52 | 53 | 54 | def matthews_correlation_coefficient(y_true, y_pred): 55 | tp = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) 56 | tn = K.sum(K.round(K.clip((1 - y_true) * (1 - y_pred), 0, 1))) 57 | fp = K.sum(K.round(K.clip((1 - y_true) * y_pred, 0, 1))) 58 | fn = K.sum(K.round(K.clip(y_true * (1 - y_pred), 0, 1))) 59 | 60 | num = tp * tn - fp * fn 61 | den = (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn) 62 | return num / K.sqrt(den + K.epsilon()) 63 | 64 | 65 | def equal_error_rate(y_true, y_pred): 66 | n_imp = tf.count_nonzero(tf.equal(y_true, 0), dtype=tf.float32) + tf.constant(K.epsilon()) 67 | n_gen = tf.count_nonzero(tf.equal(y_true, 1), dtype=tf.float32) + tf.constant(K.epsilon()) 68 | 69 | scores_imp = tf.boolean_mask(y_pred, tf.equal(y_true, 0)) 70 | scores_gen = tf.boolean_mask(y_pred, tf.equal(y_true, 1)) 71 | 72 | loop_vars = (tf.constant(0.0), tf.constant(1.0), tf.constant(0.0)) 73 | cond = lambda t, fpr, fnr: tf.greater_equal(fpr, fnr) 74 | body = lambda t, fpr, fnr: ( 75 | t + 0.001, 76 | tf.divide(tf.count_nonzero(tf.greater_equal(scores_imp, t), dtype=tf.float32), n_imp), 77 | tf.divide(tf.count_nonzero(tf.less(scores_gen, t), dtype=tf.float32), n_gen) 78 | ) 79 | t, fpr, fnr = tf.while_loop(cond, body, loop_vars, back_prop=False) 80 | eer = (fpr + fnr) / 2 81 | 82 | return eer 83 | 84 | 85 | def get_metric(name): 86 | metrics = {"recall": tf.keras.metrics.Recall(), 87 | "precision": tf.keras.metrics.Precision(), 88 | "specificity": specificity, 89 | "negative_predictive_value": negative_predictive_value, 90 | "f1": f1, 91 | "fbeta": fbeta, 92 | "equal_error_rate": equal_error_rate, 93 | "matthews_correlation_coefficient": matthews_correlation_coefficient} 94 | keys = list(metrics.keys()) 95 | assert name in keys, print("Cannot find metric " + name, ". Available metrics are {}".format(keys)) 96 | return metrics[name] 97 | 98 | 99 | class DDMetrics: 100 | def __init__(self, model): 101 | self.model = model 102 | self.params = model.count_params() 103 | 104 | @staticmethod 105 | def scaled_performance(y_true, y_pred): 106 | p = precision(y_true, y_pred) 107 | f = f1(y_true, y_pred) 108 | return ((p*p) + (f*f))/2 109 | 110 | def relative_scaled_performance(self, y_true, y_pred): 111 | params = self.params / 1_000_000 112 | sp = self.scaled_performance(y_true, y_pred) 113 | return sp/(1.03 ** params) 114 | 115 | def relative_precision(self, y_true, y_pred): 116 | p = precision(y_true, y_pred) 117 | params = self.params / 1_000_000 118 | return p/params 119 | -------------------------------------------------------------------------------- /Docking/ML/DDModelExceptions.py: -------------------------------------------------------------------------------- 1 | class Error(Exception): 2 | """Base class for other exceptions""" 3 | pass 4 | 5 | 6 | class IncorrectModelModeError(Error): 7 | """Exception raised for errors in the model mode. 8 | 9 | Attributes: 10 | mode -- input mode which caused the error 11 | message -- explanation of the error 12 | """ 13 | def __init__(self, mode, available_modes, message="Incorrect model mode. Use one of the following modes:"): 14 | self.mode = mode 15 | self.message = message 16 | self.available_modes = available_modes 17 | 18 | def __str__(self): 19 | mode_string = "\n\n" 20 | for mode in self.available_modes: 21 | mode_string += " " + mode + "\n" 22 | 23 | return f'{self.mode} -> {self.message}' + mode_string 24 | -------------------------------------------------------------------------------- /Docking/ML/Parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Version 1.1.2 3 | """ 4 | import pandas as pd 5 | import numpy as np 6 | 7 | 8 | class Parser: 9 | 10 | @staticmethod 11 | def parse_ddss(path): 12 | 13 | architecture = {} 14 | hyperparameters = {} 15 | history = {} 16 | time = {} 17 | info = {'time': time, 'history': history, 'hyperparameters': hyperparameters, 'architecture': architecture} 18 | 19 | with open(path, 'r') as ddss_file: 20 | lines = ddss_file.readlines() 21 | lines.remove('\n') 22 | 23 | for i, line in enumerate(lines): 24 | line = line.strip('\n') 25 | 26 | # Get the model name 27 | if 'Model mode' in line: 28 | info['name'] = line.split()[-1] 29 | 30 | # Get the model timings 31 | if 'training_time' in line: 32 | split_line = line.split() 33 | time['training_time'] = float(split_line[-1]) 34 | if 'prediction_time' in line: 35 | split_line = line.split() 36 | time['prediction_time'] = float(split_line[-1]) 37 | 38 | # Get the history stats 39 | if 'History Stats' in line: 40 | # Grab everything under the history set 41 | for sub_line in lines[i + 1:]: # search the sub lines under history 42 | if '-' not in sub_line or 'Model has not been trained yet' in sub_line: 43 | break 44 | else: # Split up the lines and stores the values 45 | split_line = sub_line.split()[1:] 46 | history_key = split_line[0].replace(":", "") 47 | 48 | value = [] 49 | for v in split_line[1:]: 50 | value.append(float(v.strip(",").strip('[').strip(']'))) 51 | 52 | # If the list has one value, it should be closed to a scalar 53 | if len(value) == 1: 54 | history[history_key] = value[0] 55 | else: 56 | history[history_key] = value 57 | 58 | # Get the history stats 59 | if 'Hyperparameter Stats' in line: 60 | # search the sub lines under history 61 | for sub_line in lines[i + 1:]: 62 | if '-' not in sub_line or 'Model has not been trained yet' in sub_line: 63 | break 64 | else: 65 | sub_line = sub_line.strip(" - ").strip("\n").strip(" ").split(":") 66 | key = sub_line[0].strip(" ") 67 | value = sub_line[1].strip(" ") 68 | 69 | if '[' in value: 70 | value_list = [] 71 | for char in value: 72 | if char.isnumeric(): 73 | value_list.append(int(char)) 74 | value = value_list 75 | else: 76 | try: 77 | value = float(value) 78 | except ValueError: 79 | # If this value error occurs, it is because it has found the non-decimal 80 | # hyperparameters 81 | value = value 82 | 83 | hyperparameters[key] = value 84 | 85 | if 'total_params' in line or 'trainable_params' in line or 'total_params' in line: 86 | if "Cannot be determined" not in line: 87 | sub_line = line.strip(" - ").strip("\n").strip(" ").split(":") 88 | architecture[sub_line[0]] = int(sub_line[1].replace(",", "")) 89 | 90 | return info 91 | 92 | @staticmethod 93 | def ddss_to_csv(path): 94 | info = Parser.parse_ddss(path) 95 | df = pd.DataFrame() 96 | for key in info.keys(): 97 | print(info[key]) 98 | -------------------------------------------------------------------------------- /Docking/ML/Tokenizer.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.preprocessing.text import Tokenizer 2 | from tensorflow.keras.preprocessing.sequence import pad_sequences 3 | import numpy as np 4 | 5 | 6 | class DDTokenizer: 7 | def __init__(self, num_words, oov_token=''): 8 | self.tokenizer = Tokenizer(num_words=num_words, 9 | oov_token=oov_token, 10 | filters='!"#$%&*+,-./:;<>?\\^_`{|}~\t\n', 11 | char_level=True, 12 | lower=False) 13 | self.has_trained = False 14 | 15 | self.pad_type = 'post' 16 | self.trunc_type = 'post' 17 | 18 | # The encoded data 19 | self.word_index = {} 20 | 21 | def fit(self, train_data): 22 | # Get max training sequence length 23 | print("Training Tokenizer...") 24 | self.tokenizer.fit_on_texts(train_data) 25 | self.has_trained = True 26 | print("Done training...") 27 | 28 | # Get our training data word index 29 | self.word_index = self.tokenizer.word_index 30 | 31 | def encode(self, data, use_padding=True, padding_size=None, normalize=False): 32 | # Encode training data sentences into sequences 33 | train_sequences = self.tokenizer.texts_to_sequences(data) 34 | 35 | # Get max training sequence length if there is none passed 36 | if padding_size is None: 37 | maxlen = max([len(x) for x in train_sequences]) 38 | else: 39 | maxlen = padding_size 40 | 41 | if use_padding: 42 | train_sequences = pad_sequences(train_sequences, padding=self.pad_type, 43 | truncating=self.trunc_type, maxlen=maxlen) 44 | 45 | if normalize: 46 | train_sequences = np.multiply(1/len(self.tokenizer.word_index), train_sequences) 47 | 48 | return train_sequences 49 | 50 | def pad(self, data, padding_size=None): 51 | # Get max training sequence length if there is none passed 52 | if padding_size is None: 53 | padding_size = max([len(x) for x in data]) 54 | 55 | padded_sequence = pad_sequences(data, padding=self.pad_type, 56 | truncating=self.trunc_type, maxlen=padding_size) 57 | 58 | return padded_sequence 59 | 60 | def decode(self, array): 61 | assert self.has_trained, "Train this tokenizer before decoding a string." 62 | return self.tokenizer.sequences_to_texts(array) 63 | 64 | def test(self, string): 65 | encoded = list(self.encode(string)[0]) 66 | decoded = self.decode(self.encode(string)) 67 | 68 | print("\nEncoding:") 69 | print("{original} -> {encoded}".format(original=string[0], encoded=encoded)) 70 | print("\nDecoding:") 71 | print("{original} -> {encoded}".format(original=encoded, encoded=decoded[0].replace(" ", ""))) 72 | 73 | def get_info(self): 74 | return self.tokenizer.index_word 75 | 76 | -------------------------------------------------------------------------------- /Docking/ML/__init__.py: -------------------------------------------------------------------------------- 1 | # from .DDModel import DDModel 2 | # from .Models import Models 3 | # import ML.Parser 4 | -------------------------------------------------------------------------------- /Docking/ML/data_generator.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import numpy as np 3 | 4 | 5 | class DDGenerator: 6 | """ 7 | A data generator 8 | """ 9 | def __init__(self, train_features, train_labels, val_features, val_labels, batch_size, cutoff): 10 | self.cutoff = cutoff 11 | self.batch_size = batch_size 12 | 13 | # The path to the training set and labels 14 | self.train_labels_csv = train_labels 15 | self.train_features_csv = train_features 16 | 17 | # The path to the validation set and labels 18 | self.val_features_csv = val_features 19 | self.val_labels_csv = val_labels 20 | 21 | def train_flow(self): 22 | with open(self.train_features_csv, "r") as csv1, open(self.train_labels_csv, "r") as csv2: 23 | reader1 = csv.reader(csv1) 24 | reader2 = csv.reader(csv2) 25 | for row1, row2 in zip(reader1, reader2): 26 | try: 27 | fp = [None] * self.batch_size 28 | score = [None] * self.batch_size 29 | for i in range(self.batch_size): 30 | morgan = np.array(self.decompress_morgan(row1[1:])) 31 | label = float(row2[0]) > self.cutoff 32 | fp[i] = morgan 33 | score[i] = label 34 | yield np.array(fp).reshape((self.batch_size, 1024)), np.array(score) 35 | except ValueError: 36 | yield 37 | 38 | def val_flow(self): 39 | with open(self.val_features_csv, "r") as csv1, open(self.val_labels_csv, "r") as csv2: 40 | reader1 = csv.reader(csv1) 41 | reader2 = csv.reader(csv2) 42 | for row1, row2 in zip(reader1, reader2): 43 | try: 44 | fp = [None] * self.batch_size 45 | score = [None] * self.batch_size 46 | for i in range(self.batch_size): 47 | morgan = np.array(self.decompress_morgan(row1[1:])) 48 | label = float(row2[0]) > self.cutoff 49 | fp[i] = morgan 50 | score[i] = label 51 | yield np.array(fp).reshape((self.batch_size, 1024)), np.array(score) 52 | except ValueError: 53 | yield 54 | 55 | # Decompress a morgan fingerprint from the dataset 56 | def decompress_morgan(self, mol_info): 57 | # ID_labels is a dataframe containing the zincIDs and their corresponding scores. 58 | morgan = np.zeros(1024, dtype=int) 59 | 60 | # "Decompressing" the information from the file about where the 1s are on the 1024 bit vector. 61 | # array of indexes of the binary 1s in the 1024 bit vector representing the morgan fingerprint 62 | bit_indices = mol_info 63 | for elem in bit_indices: 64 | morgan[int(elem)] = 1 65 | 66 | return morgan 67 | 68 | 69 | def keras_generator_test(f="", lbl=""): 70 | from tensorflow.keras.layers import Dense 71 | from tensorflow.keras.models import Input, Model 72 | 73 | inputs = Input(shape=[1024]) 74 | x = inputs 75 | x = Dense(10000, activation='relu')(x) 76 | output = Dense(1, activation='sigmoid')(x) 77 | 78 | model = Model(inputs=inputs, outputs=output) 79 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) 80 | model.summary() 81 | 82 | generator = DDGenerator(train_features=f, train_labels=lbl, val_features=f, val_labels=lbl, cutoff=-10, 83 | batch_size=32) 84 | model.fit_generator(generator=generator.train_flow(), validation_data=generator.val_flow(), steps_per_epoch=100, 85 | validation_steps=100) 86 | 87 | 88 | -------------------------------------------------------------------------------- /Docking/ML/lasso_regularizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tensorflow.keras import backend as K 3 | from tensorflow.keras.regularizers import Regularizer 4 | 5 | 6 | class Lasso(Regularizer): 7 | """Regularizer for L21 regularization. 8 | # Arguments 9 | C: Float; L21 regularization factor. 10 | """ 11 | 12 | def __init__(self, C=0.): 13 | self.C = K.cast_to_floatx(C) 14 | 15 | def __call__(self, x): 16 | const_coeff = np.sqrt(K.int_shape(x)[1]) 17 | return self.C*const_coeff*K.sum(K.sqrt(K.sum(K.square(x), axis=1))) 18 | 19 | def get_config(self): 20 | return {'C': float(self.C)} -------------------------------------------------------------------------------- /Docking/ML/load_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.utils import shuffle 3 | 4 | 5 | def load(path_train, path_test): 6 | loaded_train = shuffle(pd.read_pickle(path_train)) 7 | loaded_test = shuffle(pd.read_pickle(path_test)) 8 | 9 | train_x = loaded_train.morgan_fingerprint 10 | train_y = loaded_train.docking_score 11 | 12 | test_x = loaded_test.morgan_fingerprint 13 | test_y = loaded_test.docking_score 14 | 15 | data = DataContainer(train_x, train_y, test_x, test_y) 16 | return data 17 | 18 | 19 | def load_data(): 20 | """These function will load up the data like as done in phase 4 and 5 """ 21 | pass 22 | 23 | 24 | class DataContainer: 25 | def __init__(self, train_x, train_y, test_x, test_y): 26 | self.train_x = train_x 27 | self.train_y = train_y 28 | self.test_x = test_x 29 | self.test_y = test_y 30 | 31 | def __repr__(self): 32 | print("Features:", self.train_x.name, "-> Lables:", self.train_y.name) 33 | print("Train Size:", len(self.train_x)) 34 | print("Test Size:", len(self.test_x)) 35 | print("Hit/Miss ratio:", sum([1 if x else 0 for x in self.train_y])/len(self.train_y) * 100, "%") 36 | return "" 37 | 38 | def __call__(self, *args, **kwargs): 39 | return self.train_x, self.train_y, self.test_x, self.test_y 40 | -------------------------------------------------------------------------------- /Docking/ML/model_tuner.py: -------------------------------------------------------------------------------- 1 | from Docking.ML.DDModel import DDModel 2 | import numpy as np 3 | import tensorflow as tf 4 | import kerastuner as kt 5 | from Docking.ML.utils import * 6 | import Docking.ML.load_data 7 | import IPython 8 | 9 | 10 | class ClearTrainingOutput(tf.keras.callbacks.Callback): 11 | def on_train_end(*args, **kwargs): 12 | IPython.display.clear_output(wait=True) 13 | 14 | 15 | if __name__ == '__main__': 16 | # Load config 17 | print("Loading Config...") 18 | config = read_tuner_config('tuner_config.txt') 19 | # For hyper band tuner 20 | train_path, test_path = config['training_path'], config['testing_path'] 21 | directory = config['directory'] 22 | project_name = config['project_name'] 23 | objective = config['objective'] 24 | max_trials = config['max_trials'] 25 | max_epochs = config['max_epochs'] 26 | factor = config['factor'] 27 | hyperband_iterations = config['hyperband_iterations'] 28 | direction = config['direction'] 29 | 30 | # For search 31 | steps_per_epoch = config['steps_per_epoch'] 32 | validation_steps = config['validation_steps'] 33 | epochs = config['epochs'] 34 | batch_size = config['batch_size'] 35 | 36 | print("Loading Dataset...") 37 | data = Docking.ML.load_data.load(train_path, test_path) 38 | train_x, train_y, test_x, test_y = data() 39 | train_x, train_y = train_x.tolist(), train_y.tolist() 40 | test_x, test_y = test_x.tolist(), test_y.tolist() 41 | tr_x = np.array(train_x) 42 | tr_y = np.array(train_y) 43 | 44 | tx = np.array(test_x) 45 | ty = np.array(test_y) 46 | 47 | tuner = kt.BayesianOptimization(DDModel.build_tuner_model, 48 | objective=kt.Objective(objective, direction), 49 | project_name=project_name, 50 | directory=directory, 51 | max_trials=max_trials) 52 | 53 | tuner.search_space_summary() 54 | tuner.search(tr_x, tr_y, 55 | validation_data=(tx, ty), epochs=epochs, batch_size=batch_size, 56 | class_weight={0: 2, 1: 1}, 57 | callbacks=[tf.keras.callbacks.EarlyStopping(monitor=objective, 58 | min_delta=0, 59 | patience=3, 60 | verbose=0, 61 | mode=direction)]) 62 | 63 | print("Done...") 64 | 65 | # Show a summary of the search 66 | tuner.results_summary() 67 | 68 | # Retrieve the best 3 models. 69 | 70 | # 1 71 | best_hyperparameters = tuner.get_best_hyperparameters(1)[0] 72 | best_model = tuner.get_best_models(num_models=1)[0] 73 | print("Saving best model...") 74 | 75 | model_location = config['model_location'] + "/" 76 | model = DDModel.load(best_model, kt_hyperparameters=best_hyperparameters) 77 | model.save(model_location + project_name + "_1st_" + objective + "_" + direction, json=True) 78 | 79 | # 2 80 | best_hyperparameters = tuner.get_best_hyperparameters(2)[1] 81 | best_model = tuner.get_best_models(num_models=2)[1] 82 | print("Saving best model...") 83 | 84 | model_location = config['model_location'] + "/" 85 | model = DDModel.load(best_model, kt_hyperparameters=best_hyperparameters) 86 | model.save(model_location + project_name + "_2nd_" + objective + "_" + direction, json=True) 87 | 88 | # 3 89 | best_hyperparameters = tuner.get_best_hyperparameters(3)[2] 90 | best_model = tuner.get_best_models(num_models=3)[2] 91 | print("Saving best model...") 92 | 93 | model_location = config['model_location'] + "/" 94 | model = DDModel.load(best_model, kt_hyperparameters=best_hyperparameters) 95 | model.save(model_location + project_name + "_3rd_" + objective + "_" + direction, json=True) 96 | 97 | print("Saved!") 98 | -------------------------------------------------------------------------------- /Docking/ML/transformer_layers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Version 1.0.0 3 | 4 | Attempts at making a transformer model. 5 | This needs to be redone in the future! 6 | """ 7 | 8 | import tensorflow as tf 9 | from tensorflow import keras 10 | from tensorflow.keras import layers 11 | 12 | 13 | class MultiHeadSelfAttention(layers.Layer): 14 | def __init__(self, embed_dim, num_heads=8, **kwargs): 15 | super(MultiHeadSelfAttention, self).__init__() 16 | self.embed_dim = embed_dim 17 | self.num_heads = num_heads 18 | if embed_dim % num_heads != 0: 19 | raise ValueError( 20 | f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}" 21 | ) 22 | self.projection_dim = embed_dim // num_heads 23 | self.query_dense = layers.Dense(embed_dim) 24 | self.key_dense = layers.Dense(embed_dim) 25 | self.value_dense = layers.Dense(embed_dim) 26 | self.combine_heads = layers.Dense(embed_dim) 27 | 28 | def get_config(self): 29 | config = super().get_config().copy() 30 | config.update({ 31 | 'embed_dim': self.embed_dim, 32 | 'num_heads': self.num_heads, 33 | }) 34 | return config 35 | 36 | def attention(self, query, key, value): 37 | score = tf.matmul(query, key, transpose_b=True) 38 | dim_key = tf.cast(tf.shape(key)[-1], tf.float32) 39 | scaled_score = score / tf.math.sqrt(dim_key) 40 | weights = tf.nn.softmax(scaled_score, axis=-1) 41 | output = tf.matmul(weights, value) 42 | return output, weights 43 | 44 | def separate_heads(self, x, batch_size): 45 | x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim)) 46 | return tf.transpose(x, perm=[0, 2, 1, 3]) 47 | 48 | def call(self, inputs): 49 | # x.shape = [batch_size, seq_len, embedding_dim] 50 | batch_size = tf.shape(inputs)[0] 51 | query = self.query_dense(inputs) # (batch_size, seq_len, embed_dim) 52 | key = self.key_dense(inputs) # (batch_size, seq_len, embed_dim) 53 | value = self.value_dense(inputs) # (batch_size, seq_len, embed_dim) 54 | query = self.separate_heads( 55 | query, batch_size 56 | ) # (batch_size, num_heads, seq_len, projection_dim) 57 | key = self.separate_heads( 58 | key, batch_size 59 | ) # (batch_size, num_heads, seq_len, projection_dim) 60 | value = self.separate_heads( 61 | value, batch_size 62 | ) # (batch_size, num_heads, seq_len, projection_dim) 63 | attention, weights = self.attention(query, key, value) 64 | attention = tf.transpose( 65 | attention, perm=[0, 2, 1, 3] 66 | ) # (batch_size, seq_len, num_heads, projection_dim) 67 | concat_attention = tf.reshape( 68 | attention, (batch_size, -1, self.embed_dim) 69 | ) # (batch_size, seq_len, embed_dim) 70 | output = self.combine_heads( 71 | concat_attention 72 | ) # (batch_size, seq_len, embed_dim) 73 | return output 74 | 75 | 76 | class TransformerBlock(layers.Layer): 77 | def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs): 78 | super(TransformerBlock, self).__init__() 79 | self.att = MultiHeadSelfAttention(embed_dim, num_heads) 80 | self.ffn = keras.Sequential( 81 | [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),] 82 | ) 83 | self.layernorm1 = layers.LayerNormalization(epsilon=1e-6) 84 | self.layernorm2 = layers.LayerNormalization(epsilon=1e-6) 85 | self.dropout1 = layers.Dropout(rate) 86 | self.dropout2 = layers.Dropout(rate) 87 | 88 | self.embed_dim = embed_dim 89 | self.num_heads = num_heads 90 | self.ff_dim = ff_dim 91 | self.rate = rate 92 | 93 | def get_config(self): 94 | config = super().get_config().copy() 95 | config.update({ 96 | 'embed_dim': self.embed_dim, 97 | 'num_heads': self.num_heads, 98 | 'ff_dim': self.ff_dim, 99 | 'rate': self.rate 100 | }) 101 | return config 102 | 103 | def call(self, inputs, training): 104 | attn_output = self.att(inputs) 105 | attn_output = self.dropout1(attn_output, training=training) 106 | out1 = self.layernorm1(inputs + attn_output) 107 | ffn_output = self.ffn(out1) 108 | ffn_output = self.dropout2(ffn_output, training=training) 109 | return self.layernorm2(out1 + ffn_output) 110 | 111 | 112 | class TokenAndPositionEmbedding(layers.Layer): 113 | def __init__(self, maxlen, vocab_size, embed_dim, **kwargs): 114 | super(TokenAndPositionEmbedding, self).__init__() 115 | 116 | self.maxlen = maxlen 117 | self.vocab_size = vocab_size 118 | self.embed_dim = embed_dim 119 | 120 | self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim) 121 | self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim) 122 | 123 | def get_config(self): 124 | config = super().get_config().copy() 125 | config.update({ 126 | 'maxlen': self.maxlen, 127 | 'vocab_size': self.vocab_size, 128 | 'embed_dim': self.vocab_size, 129 | }) 130 | return config 131 | 132 | def call(self, x): 133 | maxlen = tf.shape(x)[-1] 134 | positions = tf.range(start=0, limit=maxlen, delta=1) 135 | positions = self.pos_emb(positions) 136 | x = self.token_emb(x) 137 | return x + positions 138 | -------------------------------------------------------------------------------- /Docking/ML/tuner_config.txt: -------------------------------------------------------------------------------- 1 | <> 2 | <> 3 | training_path= 4 | testing_path= 5 | 6 | <> 7 | directory= 8 | 9 | <> 10 | model_location= 11 | 12 | <> 13 | project_name= 14 | 15 | <> 16 | objective=val_loss 17 | 18 | <> 19 | direction=min 20 | 21 | <> 22 | max_trials=10 23 | 24 | <> 25 | max_epochs=15 26 | 27 | <> 28 | factor=3 29 | 30 | <> 31 | hyperband_iterations=3 32 | 33 | 34 | <> 35 | steps_per_epoch=1000 36 | validation_steps=100 37 | epochs=10 38 | batch_size=256 -------------------------------------------------------------------------------- /Docking/ML/utils.py: -------------------------------------------------------------------------------- 1 | from tensorflow.python.client import device_lib 2 | 3 | 4 | def get_available_gpus(): 5 | local_device_protos = device_lib.list_local_devices() 6 | return [x.name for x in local_device_protos if x.device_type == 'GPU'] 7 | 8 | 9 | def read_tuner_config(path): 10 | with open(path) as config: 11 | values = {} 12 | comment = "<<" 13 | for line in config.readlines(): 14 | line = line.strip(" ").strip("\n") 15 | if comment not in line and len(line) > 1: 16 | split = line.split("=") 17 | if len(split) > 1: 18 | key = split[0] 19 | value = split[1] 20 | else: 21 | key = split[0] 22 | value = "" 23 | 24 | try: 25 | value = int(value) 26 | except ValueError: 27 | pass 28 | values[key] = value 29 | return values 30 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/Extract_labels.py: -------------------------------------------------------------------------------- 1 | import builtins as __builtin__ 2 | 3 | # For debugging purposes only: 4 | def print(*args, **kwargs): 5 | __builtin__.print('\t extract_L: ', end="") 6 | return __builtin__.print(*args, **kwargs) 7 | 8 | 9 | import glob 10 | from multiprocessing import Pool 11 | from contextlib import closing 12 | import gzip 13 | import os 14 | import argparse 15 | 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument('-if','--is_final',required=True) 18 | parser.add_argument('-n_it','--iteration_no',required=True) 19 | parser.add_argument('-protein','--protein',required=True) 20 | parser.add_argument('-file_path','--file_path',required=True) 21 | parser.add_argument('-t_pos','--tot_process',required=True) 22 | parser.add_argument('-sof','--software',required=True) 23 | 24 | io_args = parser.parse_args() 25 | 26 | is_final = io_args.is_final 27 | n_it = int(io_args.iteration_no) 28 | protein = io_args.protein 29 | file_path = io_args.file_path 30 | tot_process = int(io_args.tot_process) 31 | sof = io_args.software 32 | 33 | if is_final == 'False' or is_final == 'false': 34 | is_final = False 35 | elif is_final == 'True' or is_final == 'true': 36 | is_final = True 37 | else: 38 | raise TypeError('-if parameter must be a boolean (true/false)') 39 | 40 | if sof == 'GLIDE': 41 | key_word = 'r_i_docking_score' 42 | elif sof == 'OEDDOCKING': 43 | key_word = 'FRED Chemgauss4 score' 44 | elif sof == "AUTODOCK_GPU": 45 | key_word = 'ADSCOR' 46 | else: 47 | raise ValueError('Unknown docking software, check line 7 logs.txt and try again.') 48 | 49 | #mol_key = 'ZINC' 50 | print(key_word) 51 | 52 | 53 | def get_scores(ref): 54 | scores = [] 55 | for line in ref: # Looping through the molecules 56 | zinc_id = line.rstrip() 57 | line = ref.readline() 58 | # '$$$' signifies end of molecule info 59 | while line != '' and line[:4] != '$$$$': # Looping through its information and saving scores 60 | 61 | tmp = line.rstrip().split('<')[-1] 62 | 63 | if key_word == tmp[:-1]: 64 | tmpp = float(ref.readline().rstrip()) 65 | if tmpp > 50 or tmpp < -50: 66 | print(zinc_id, tmpp) 67 | else: 68 | scores.append([zinc_id, tmpp]) 69 | 70 | line = ref.readline() 71 | return scores 72 | 73 | 74 | def extract_glide_score(filen): 75 | scores = [] 76 | try: 77 | # Opening the GNU compressed file 78 | with gzip.open(filen, 'rt') as ref: 79 | scores = get_scores(ref) 80 | 81 | except Exception as e: 82 | print('Handled exception: ', e) 83 | # file is already decompressed 84 | with open(filen, 'r') as ref: 85 | scores = get_scores(ref) 86 | 87 | if 'test' in os.path.basename(filen): 88 | new_name = 'testing' 89 | elif 'valid' in os.path.basename(filen): 90 | new_name = 'validation' 91 | elif 'train' in os.path.basename(filen): 92 | new_name = 'training' 93 | else: 94 | print("FAIL! Could not generate new training set. Exiting...") 95 | exit() 96 | 97 | with open(file_path+'/'+protein+'/iteration_'+str(n_it)+'/' + new_name + '_' + 'labels.txt', 'w') as ref: 98 | ref.write('r_i_docking_score'+','+'ZINC_ID'+'\n') 99 | for z_id,gc in scores: 100 | ref.write(str(gc)+','+z_id+'\n') 101 | 102 | 103 | if __name__ == '__main__': 104 | files = [] 105 | iter_path = file_path+'/'+protein+'/iteration_'+str(n_it) 106 | 107 | # Checking to see if the labels have already been extracted: 108 | sets = ["training", "testing", "validation"] 109 | files_labels = glob.glob(iter_path+"/*_labels.txt") 110 | foundAll = True 111 | for s in sets: 112 | found = False 113 | print(s) 114 | for f in files_labels: 115 | set_name = f.split('/')[-1].split("_labels.txt")[0] 116 | if set_name == s: 117 | found = True 118 | print('Found') 119 | break 120 | if not found: 121 | foundAll = False 122 | print('Labels not yet extracted -> Not Found') 123 | break 124 | if foundAll: 125 | print('Labels have already been extracted...') 126 | print('Remove "*_labels.text" files in \"'+ iter_path +'\" to re-extract') 127 | exit(0) 128 | 129 | # Checking to see if this is the final iteration to use the right folder 130 | if is_final: 131 | path = file_path+'/'+protein+'/after_iteration/docked/*.sdf*' 132 | else: 133 | path = iter_path+'/docked/*.sdf*' 134 | path_labels = iter_path+'/*labels*' 135 | 136 | for f in glob.glob(path): 137 | files.append(f) 138 | 139 | print("num files in", path, ":", len(files)) 140 | print("Files to extract from:", [os.path.basename(f) for f in files]) 141 | if len(files) == 0: 142 | print('NO FILES IN: ', path) 143 | print('CANCEL JOB...') 144 | exit(1) 145 | 146 | # Parallel running of the extract_glide_score() with each file path of the files array 147 | with closing(Pool(len(files))) as pool: 148 | pool.map(extract_glide_score, files) 149 | 150 | if not is_final: 151 | # renaming from f1_f2_f3 to f3_labels.txt 152 | for f in glob.glob(path_labels): 153 | try: 154 | print(f) 155 | print(iter_path+'/'+f.split('/')[-1].split('_')[2]+'_'+'labels.txt') 156 | os.rename(f, iter_path+'/'+f.split('/')[-1].split('_')[2]+'_'+'labels.txt') 157 | except IndexError: 158 | print("Handled error on renaming", f) # Occurs if it is already correctly named. (deprecated use) 159 | 160 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/Extracting_morgan.py: -------------------------------------------------------------------------------- 1 | # Reads the ids found in sampling and finds the corresponding morgan fingerprint 2 | import argparse 3 | import glob 4 | 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument('-pt', '--protein_name', required=True) 7 | parser.add_argument('-fp', '--file_path', required=True) 8 | parser.add_argument('-it', '--n_iteration', required=True) 9 | parser.add_argument('-md', '--morgan_directory', required=True) 10 | parser.add_argument('-t_pos', '--tot_process', required=True) 11 | 12 | io_args = parser.parse_args() 13 | 14 | import os 15 | from multiprocessing import Pool 16 | import time 17 | from contextlib import closing 18 | import numpy as np 19 | 20 | protein = io_args.protein_name 21 | file_path = io_args.file_path 22 | n_it = int(io_args.n_iteration) 23 | morgan_directory = io_args.morgan_directory 24 | tot_process = int(io_args.tot_process) 25 | 26 | 27 | def extract_morgan(file_name): 28 | train = {} 29 | test = {} 30 | valid = {} 31 | with open(file_path + '/' + protein + "/iteration_" + str(n_it) + "/train_set.txt", 'r') as ref: 32 | for line in ref: 33 | train[line.rstrip()] = 0 34 | with open(file_path + '/' + protein + "/iteration_" + str(n_it) + "/valid_set.txt", 'r') as ref: 35 | for line in ref: 36 | valid[line.rstrip()] = 0 37 | with open(file_path + '/' + protein + "/iteration_" + str(n_it) + "/test_set.txt", 'r') as ref: 38 | for line in ref: 39 | test[line.rstrip()] = 0 40 | 41 | # for file_name in file_names: 42 | ref1 = open( 43 | file_path + '/' + protein + '/iteration_' + str(n_it) + '/morgan/' + 'train_' + file_name.split('/')[-1], 'w') 44 | ref2 = open( 45 | file_path + '/' + protein + '/iteration_' + str(n_it) + '/morgan/' + 'valid_' + file_name.split('/')[-1], 'w') 46 | ref3 = open(file_path + '/' + protein + '/iteration_' + str(n_it) + '/morgan/' + 'test_' + file_name.split('/')[-1], 47 | 'w') 48 | 49 | with open(file_name, 'r') as ref: 50 | flag = 0 51 | for line in ref: 52 | tmpp = line.strip().split(',')[0] 53 | if tmpp in train.keys(): 54 | train[tmpp] += 1 55 | fn = 1 56 | if train[tmpp] == 1: flag = 1 57 | elif tmpp in valid.keys(): 58 | valid[tmpp] += 1 59 | fn = 2 60 | if valid[tmpp] == 1: flag = 1 61 | elif tmpp in test.keys(): 62 | test[tmpp] += 1 63 | fn = 3 64 | if test[tmpp] == 1: flag = 1 65 | if flag == 1: 66 | if fn == 1: 67 | ref1.write(line) 68 | if fn == 2: 69 | ref2.write(line) 70 | if fn == 3: 71 | ref3.write(line) 72 | flag = 0 73 | 74 | 75 | def alternate_concat(files): 76 | to_return = [] 77 | with open(files, 'r') as ref: 78 | for line in ref: 79 | to_return.append(line) 80 | return to_return 81 | 82 | 83 | def delete_all(files): 84 | os.remove(files) 85 | 86 | 87 | def morgan_duplicacy(f_name): 88 | flag = 0 89 | mol_list = {} 90 | ref1 = open(f_name[:-4] + '_updated.csv', 'a') 91 | with open(f_name, 'r') as ref: 92 | for line in ref: 93 | tmpp = line.strip().split(',')[0] 94 | if tmpp not in mol_list: 95 | mol_list[tmpp] = 1 96 | flag = 1 97 | if flag == 1: 98 | ref1.write(line) 99 | flag = 0 100 | os.remove(f_name) 101 | 102 | 103 | if __name__ == '__main__': 104 | try: 105 | os.mkdir(file_path + '/' + protein + '/iteration_' + str(n_it) + '/morgan') 106 | except: 107 | pass 108 | 109 | files = [] 110 | for f in glob.glob(morgan_directory + "/*.txt"): 111 | files.append(f) 112 | 113 | t = time.time() 114 | with closing(Pool(np.min([tot_process, len(files)]))) as pool: 115 | pool.map(extract_morgan, files) 116 | print(time.time() - t) 117 | 118 | all_to_delete = [] 119 | for type_to in ['train', 'valid', 'test']: 120 | t = time.time() 121 | files = [] 122 | for f in glob.glob(file_path + '/' + protein + '/iteration_' + str(n_it) + '/morgan/' + type_to + '*'): 123 | files.append(f) 124 | all_to_delete.append(f) 125 | print(len(files)) 126 | if len(files) == 0: 127 | print("Error in address above") 128 | break 129 | with closing(Pool(np.min([tot_process, len(files)]))) as pool: 130 | to_print = pool.map(alternate_concat, files) 131 | with open(file_path + '/' + protein + '/iteration_' + str(n_it) + '/morgan/' + type_to + '_morgan_1024.csv', 132 | 'w') as ref: 133 | for file_data in to_print: 134 | for line in file_data: 135 | ref.write(line) 136 | to_print = [] 137 | print(type_to, time.time() - t) 138 | 139 | f_names = [] 140 | for f in glob.glob(file_path + '/' + protein + '/iteration_' + str(n_it) + '/morgan/*morgan*'): 141 | f_names.append(f) 142 | 143 | t = time.time() 144 | with closing(Pool(np.min([tot_process, len(f_names)]))) as pool: 145 | pool.map(morgan_duplicacy, f_names) 146 | print(time.time() - t) 147 | 148 | with closing(Pool(np.min([tot_process, len(all_to_delete)]))) as pool: 149 | pool.map(delete_all, all_to_delete) 150 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/Prediction_morgan_1024.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import glob 3 | import os 4 | import time 5 | import warnings 6 | import numpy as np 7 | import pandas as pd 8 | from ML.DDModel import DDModel 9 | 10 | try: 11 | import __builtin__ 12 | except ImportError: 13 | # Python 3 14 | import builtins as __builtin__ 15 | 16 | # For debugging purposes only: 17 | def print(*args, **kwargs): 18 | __builtin__.print('\t sampling: ', end="") 19 | return __builtin__.print(*args, **kwargs) 20 | 21 | warnings.filterwarnings('ignore') 22 | 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument('-fn','--fn', required=True) 25 | parser.add_argument('-protein', '--protein', required=True) 26 | parser.add_argument('-it', '--it', required=True) 27 | parser.add_argument('-file_path', '--file_path', required=True) 28 | parser.add_argument('-mdd', '--morgan_directory', required=True) 29 | 30 | io_args = parser.parse_args() 31 | fn = io_args.fn 32 | protein = str(io_args.protein) 33 | it = int(io_args.it) 34 | file_path = io_args.file_path 35 | mdd = io_args.morgan_directory 36 | 37 | # This debug feature will allow for speedy testing 38 | DEBUG=False 39 | def prediction_morgan(fname, models, thresh): # TODO: improve runtime with parallelization across multiple nodes 40 | print("Starting Predictions...") 41 | t = time.time() 42 | per_time = 1000000 43 | n_features = 1024 44 | z_id = [] 45 | X_set = np.zeros([per_time, n_features]) 46 | total_passed = 0 47 | 48 | print("We are predicting from the file", fname, "located in", mdd) 49 | with open(mdd+'/'+fname,'r') as ref: 50 | no = 0 51 | for line in ref: 52 | tmp = line.rstrip().split(',') 53 | on_bit_vector = tmp[1:] 54 | z_id.append(tmp[0]) 55 | for elem in on_bit_vector: 56 | X_set[no,int(elem)] = 1 57 | no+=1 58 | if no == per_time: 59 | X_set = X_set[:no, :] 60 | pred = [] 61 | print("We are currently running line", line) 62 | print("(1) Predicting... Time elapsed:", time.time() - t, "seconds.") 63 | for model in models: 64 | pred.append(model.predict(X_set)) 65 | 66 | with open(file_path+'/iteration_'+str(it)+'/morgan_1024_predictions/'+fname, 'a') as ref: 67 | for j in range(len(pred[0])): 68 | is_pass = 0 69 | for i,thr in enumerate(thresh): 70 | if float(pred[i][j])>thr: 71 | is_pass += 1 72 | if is_pass >= 1: 73 | total_passed += 1 74 | line = z_id[j]+','+str(float(pred[i][j]))+'\n' 75 | ref.write(line) 76 | X_set = np.zeros([per_time,n_features]) 77 | z_id = [] 78 | no = 0 79 | 80 | # With debug, we will only predict on 'per_time' molecules 81 | if DEBUG: 82 | break 83 | 84 | if no != 0: 85 | X_set = X_set[:no,:] 86 | pred = [] 87 | print("We are currently running line", line) 88 | print("(2) Predicting... Time elapsed:", time.time() - t, "seconds.") 89 | for model in models: 90 | pred.append(model.predict(X_set)) 91 | with open(file_path+'/iteration_'+str(it)+'/morgan_1024_predictions/'+fname, 'a') as ref: 92 | for j in range(len(pred[0])): 93 | is_pass = 0 94 | for i,thr in enumerate(thresh): 95 | if float(pred[i][j])>thr: 96 | is_pass+=1 97 | if is_pass>=1: 98 | total_passed+=1 99 | line = z_id[j]+','+str(float(pred[i][j]))+'\n' 100 | ref.write(line) 101 | print("Prediction time:", time.time() - t) 102 | return total_passed 103 | 104 | 105 | try: 106 | os.mkdir(file_path+'/iteration_'+str(it)+'/morgan_1024_predictions') 107 | except OSError: 108 | print(file_path+'/iteration_'+str(it)+'/morgan_1024_predictions', "already exists") 109 | 110 | thresholds = pd.read_csv(file_path+'/iteration_'+str(it)+'/best_models/thresholds.txt', header=None) 111 | thresholds.columns = ['model_no', 'thresh', 'cutoff'] 112 | 113 | tr = [] 114 | models = [] 115 | for f in glob.glob(file_path+'/iteration_'+str(it)+'/best_models/model_*'): 116 | if "." not in f: # skipping over the .ddss & .csv files 117 | mn = int(f.split('/')[-1].split('_')[1]) 118 | tr.append(thresholds[thresholds.model_no == mn].thresh.iloc[0]) 119 | models.append(DDModel.load(file_path+'/iteration_'+str(it)+'/best_models/model_'+str(mn))) 120 | 121 | print("Number of models to predict:", len(models)) 122 | t = time.time() 123 | returned = prediction_morgan(fn, models, tr) 124 | print(time.time()-t) 125 | 126 | with open(file_path+'/iteration_'+str(it)+'/morgan_1024_predictions/passed_file_ct.txt','a') as ref: 127 | ref.write(fn+','+str(returned)+'\n') 128 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/Docking/ProgressiveDocking/__init__.py -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/activation_script.sh: -------------------------------------------------------------------------------- 1 | echo Activating virtual environment 2 | source ~/.bashrc 3 | conda activate DeepDockingRemote -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/autodock_gpu_ad.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=autodock 3 | #SBATCH --gres=gpu:1 4 | #SBATCH --mem-per-gpu=0 5 | #SBATCH --output=slurm-phase_3-%x.%j.out 6 | #SBATCH --error=slurm-phase_3-%x.%j.err 7 | 8 | start=`date +%s` 9 | 10 | wg=$1 #WORK-GROUP 11 | sa=$2 #SEARCH ALGORITHM 12 | fl=$3 #FLD FILE 13 | lg=$4 #LIGAND FOLDER 14 | lt=$5 #LIST FILE 15 | ne=$6 #NUMBER OF ENERGY EVALUATIONS 16 | nr=$7 #NUMBER OF RUNS 17 | 18 | ad_path=$8 19 | scripts=$9 20 | 21 | echo Partition: $SLURM_JOB_PARTITION 22 | 23 | # This should activate the conda environment 24 | source ~/.bashrc 25 | source $scripts/activation_script.sh 26 | 27 | rm -f list.txt *dlg *xml init* 28 | echo "$fl">>$lt 29 | for i in $lg'/'*pdbqt 30 | do 31 | echo $i>>$lt 32 | tmp=$(awk -F'/' '{print $NF}'<<<$i) 33 | tmp=$(cut -d'.' -f1<<<$tmp) 34 | echo $tmp>>$lt 35 | done 36 | wait 37 | 38 | $ad_path'/'autodock_gpu_"$wg"wi -lsmet $sa -filelist $lt -nrun $nr -nev $ne 39 | wait $! 40 | 41 | #EXTRACT SINGLE BEST POSES 42 | dlg_fold=$(pwd) #FOLDER WITH ALL DLG FILES FROM AUTODOCK 43 | mode=lc #ANALYSIS MODE, LARGEST CLUSTER (lc) or BEST BINDING ENERGY (be) 44 | out_fold=$dlg_fold'/'results #OUTPUT FOLDER 45 | out_file=$(echo $dlg_fold | rev | cut -d'/' -f 1 | rev) #OUTPUT SDF FILE (NO EXTENSION) 46 | 47 | rm -r $out_fold 48 | mkdir $out_fold 49 | mkdir $out_fold/pdbqt 50 | 51 | for i in $dlg_fold/*dlg 52 | do 53 | name=$(grep -m 1 'Name' $i|awk '{print $5}') 54 | if [ "$mode" == "be" ]; then 55 | run=$(grep -m 1 'RANKING' $i|awk '{print $3}') 56 | score=$(grep -m 1 'RANKING' $i|awk '{print $4}') 57 | elif [ "$mode" == "lc" ]; then 58 | score=$(grep '#' $i|awk '$9>a {a=$9; b=$3} END {print b}') 59 | run=$(grep '#' $i|awk '$9>a {a=$9; b=$5} END {print b}') 60 | fi 61 | echo "ADSCOR $score">>$out_fold/pdbqt/$name 62 | awk -v p="DOCKED: MODEL $run" '$0~p{f=1} f{print} f&&/DOCKED: ENDMDL/{exit}' $i|cut -c9-|sed '/USER/d;/REMARK/d;/MODEL/d;/TORSDOF/d'>>$out_fold/pdbqt/$name 63 | done 64 | 65 | find $dlg_fold -name '*dlg' -delete 66 | find $dlg_fold -name '*xml' -delete 67 | 68 | cd $out_fold/pdbqt 69 | mkdir ../sdf 70 | obabel -ipdbqt * -osdf -m 71 | cat *sdf>>../sdf/res_$out_file'.'sdf 72 | cd .. 73 | rm -r pdbqt 74 | 75 | end=`date +%s` 76 | echo $((end-start)) 77 | echo finished 78 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/check_phase.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | import argparse 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('-pf','--phase_file',required=True) 8 | parser.add_argument('-itr','--iteration_directory',required=True) 9 | io_args = parser.parse_args() 10 | 11 | pf = io_args.phase_file 12 | itr = io_args.iteration_directory 13 | 14 | print(pf,itr) 15 | 16 | if os.path.isfile(itr+'/'+pf)==False: 17 | with open(itr+'/'+pf,'w') as ref: 18 | ref.write(pf.split('.')[0]+'\n') 19 | 20 | while 1 == 1: 21 | with open(itr+'/'+pf,'r') as ref: 22 | name = ref.readline().strip() 23 | if name == 'finished': 24 | sys.exit() 25 | else: 26 | time.sleep(60) 27 | 28 | 29 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/deactivation_script.sh: -------------------------------------------------------------------------------- 1 | echo Deactivating virtual environment 2 | source ~/.bashrc 3 | conda deactivate -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/final_extraction.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | from contextlib import closing 3 | import multiprocessing 4 | import pandas as pd 5 | import argparse 6 | import glob 7 | import os 8 | 9 | 10 | def merge_on_smiles(pred_file): 11 | print("Merging " + os.path.basename(pred_file) + "...") 12 | 13 | # Read the predictions 14 | pred = pd.read_csv(pred_file, names=["id", "score"]) 15 | pred.drop_duplicates() 16 | 17 | # Read the smiles 18 | smile_file = os.path.join(args.smile_dir, os.path.basename(pred_file)) 19 | smi = pd.read_csv(smile_file, delimiter=" ", names=["smile", "id"]) 20 | smi = smi.drop_duplicates() 21 | return pd.merge(pred, smi, how="inner", on=["id"]).set_index("id") 22 | 23 | 24 | if __name__ == '__main__': 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument("-smile_dir", required=True) 27 | parser.add_argument("-morgan_dir", required=True) 28 | parser.add_argument("-processors", required=True) 29 | parser.add_argument("-mols_to_dock", required=False) 30 | 31 | args = parser.parse_args() 32 | predictions = [] 33 | 34 | print("Morgan Dir: " + args.morgan_dir) 35 | print("Smile Dir: " + args.smile_dir) 36 | for file in glob.glob(args.morgan_dir + "/*"): 37 | if "smile" in os.path.basename(file): 38 | print(" - " + os.path.basename(file)) 39 | predictions.append(file) 40 | 41 | try: 42 | # combine the files 43 | print("Finding smiles...") 44 | print("Number of CPUs: " + str(multiprocessing.cpu_count())) 45 | num_jobs = min(len(predictions), int(args.processors)) 46 | with closing(Pool(num_jobs)) as pool: 47 | combined = pool.map(merge_on_smiles, predictions) 48 | except Exception as e: 49 | print("While performing the final extraction, we encountered the following exception:", e) 50 | print("This is likely due to memory issues with multiprocessing and pickling...") 51 | print("We will try again with overloaded_final_extraction.py which is slower but can handle more data.") 52 | with open("final_phase.info", "w") as info: 53 | info.write("Failed") 54 | exit() 55 | 56 | # combine all dataframes 57 | print("Combining " + str(len(combined)) + "dataframes...") 58 | base = pd.concat(combined) 59 | combined = None 60 | 61 | print("Done combining... Sorting!") 62 | base = base.sort_values(by="score", ascending=False) 63 | 64 | print("Resetting Index...") 65 | base.reset_index(inplace=True) 66 | 67 | print("Finished Sorting... Here is the base:") 68 | print(base.head()) 69 | 70 | # Check if we want all of the mols 71 | if args.mols_to_dock == "All": 72 | args.mols_to_dock = None 73 | 74 | if args.mols_to_dock is not None: 75 | mtd = int(args.mols_to_dock) 76 | print("Molecules to dock:", mtd) 77 | print("Total molecules:", len(base)) 78 | 79 | if len(base) <= mtd: 80 | print("Our total molecules are less or equal than the number of molecules to dock -> saving all molecules") 81 | else: 82 | print(f"Our total molecules are more than the number of molecules to dock -> saving {mtd} molecules") 83 | base = base.head(mtd) 84 | 85 | print("Saving") 86 | # Rearrange the smiles 87 | smiles = base.drop('score', 1) 88 | smiles = smiles[["smile", "id"]] 89 | print("Here is the smiles:") 90 | print(smiles.head()) 91 | smiles.to_csv("smiles.csv", sep=" ") 92 | 93 | # Rearrange for id,score 94 | base.drop("smile", 1, inplace=True) 95 | base.to_csv("id_score.csv") 96 | print("Here are the ids and scores") 97 | print(base.head()) 98 | 99 | with open("final_phase.info", "w") as info: 100 | info.write("Finished") 101 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/final_extraction.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --cpus-per-task=24 3 | #SBATCH --ntasks=1 4 | #SBATCH --mem=0 # memory per node 5 | #SBATCH --job-name=phase_f 6 | #SBATCH --output=slurm-%x.%j.out 7 | #SBATCH --error=slurm-%x.%j.err 8 | 9 | # Read input 10 | project_path=$1 11 | n_cpus=$2 12 | iteration=$3 13 | scripts=$4 14 | mol_to_dock=$5 15 | 16 | echo Project Path: $project_path 17 | echo Project Name: $(basename "$project_path") 18 | echo Num. CPU: $n_cpus 19 | echo Iteration: $iteration 20 | echo Script Path: $scripts 21 | echo Final Mol. To Dock: $mol_to_dock 22 | 23 | # Set constant 24 | smile_directory=`sed -n '5p' $project_path/logs.txt` 25 | 26 | # This should activate the conda environment 27 | source ~/.bashrc 28 | source activation_script.sh 29 | 30 | # cd into the final iteration and run the search 31 | cd $project_path/iteration_$iteration 32 | echo Running >| final_phase.info # created in phase_a 33 | echo Smile Dir: $smile_directory 34 | python -u $scripts/final_extraction.py -smile_dir $smile_directory -morgan_dir $project_path/iteration_$iteration/morgan_1024_predictions/ -processors $n_cpus -mols_to_dock $mol_to_dock 35 | 36 | # If the above final extraction failed, we try another slower version 37 | if grep -Fxq "Failed" final_phase.info 38 | then 39 | echo Running >| final_phase.info 40 | python -u $scripts/GUI/overloaded_final_extraction.py -smile_dir $smile_directory -morgan_dir $project_path/iteration_$iteration/morgan_1024_predictions/ -processors $n_cpus -mols_to_dock $mol_to_dock 41 | fi 42 | 43 | # Clean up the slurm files 44 | echo cleaning slurm files 45 | cd $scripts 46 | python3 $scripts/slurm_file_manager.py --phase 0 --iteration $iteration --project_path $project_path 47 | echo Done 48 | 49 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/jobid_writer.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | parser = argparse.ArgumentParser() 5 | parser.add_argument('-file_path', '--file_path', required=True) 6 | # adding parameter for where to save all the data to: 7 | parser.add_argument('-save', '--save_path', required=False, default=None) 8 | parser.add_argument('-n_it', '--iteration_no', required=True) 9 | parser.add_argument('-jid', '--job_id', required=True) # SLURM_JOB_NAME 10 | parser.add_argument('-jn', '--job_name', required=True) # SLURM_JOB_NAME.sh 11 | 12 | io_args = parser.parse_args() 13 | n_it = int(io_args.iteration_no) 14 | job_id = io_args.job_id 15 | job_name = io_args.job_name 16 | 17 | DATA_PATH = io_args.file_path # Now == file_path/protein 18 | SAVE_PATH = io_args.save_path 19 | # if no save path is provided we just save it in the same location as the data 20 | if SAVE_PATH is None: SAVE_PATH = DATA_PATH 21 | 22 | if n_it != -1: # creating the job directory 23 | try: 24 | os.mkdir(SAVE_PATH + '/iteration_' + str(n_it)) 25 | except OSError: # file already exists 26 | pass 27 | with open(SAVE_PATH + '/iteration_' + str(n_it) + '/' + job_name, 'w') as ref: 28 | ref.write(job_id + '\n') 29 | 30 | else: # When n_it == -1 we create a seperate directory (for jobs that occur after an iteration) 31 | try: 32 | os.mkdir(SAVE_PATH + '/after_iteration') 33 | except OSError: 34 | pass 35 | with open(SAVE_PATH + '/after_iteration' + '/' + job_name, 'w') as ref: 36 | ref.write(job_id + '\n') 37 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/molecular_file_count_updated.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | from contextlib import closing 3 | import pandas as pd 4 | import numpy as np 5 | import argparse 6 | import glob 7 | import time 8 | import os 9 | 10 | try: 11 | import __builtin__ 12 | except ImportError: 13 | # Python 3 14 | import builtins as __builtin__ 15 | 16 | # For debugging purposes only: 17 | def print(*args, **kwargs): 18 | __builtin__.print('\t molecular_file_count_updated: ', end="") 19 | return __builtin__.print(*args, **kwargs) 20 | 21 | def write_mol_count_list(file_name, mol_count_list): 22 | with open(file_name,'w') as ref: 23 | for ct,file_name in mol_count_list: 24 | ref.write(str(ct)+","+file_name.split('/')[-1]) 25 | ref.write("\n") 26 | 27 | 28 | def molecule_count(file_name): 29 | temp = 0 30 | with open(file_name,'r') as ref: 31 | ref.readline() 32 | for line in ref: 33 | temp+=1 34 | return temp, file_name 35 | 36 | 37 | if __name__=='__main__': 38 | parser = argparse.ArgumentParser() 39 | parser.add_argument('-pt','--protein_name',required=True) 40 | parser.add_argument('-it','--n_iteration',required=True) 41 | parser.add_argument('-cdd','--data_directory',required=True) 42 | parser.add_argument('-cpd','--project_directory',required=True) 43 | parser.add_argument('-t_pos','--tot_process',required=True) 44 | parser.add_argument('-t_samp','--tot_sampling',required=True) 45 | io_args = parser.parse_args() 46 | 47 | protein = io_args.protein_name 48 | n_it = int(io_args.n_iteration) 49 | data_directory = io_args.data_directory 50 | project_directory = io_args.project_directory 51 | tot_process = int(io_args.tot_process) 52 | Total_sampling = int(io_args.tot_sampling) 53 | 54 | print("Parsed Args:") 55 | print(" - Iteration:", n_it) 56 | print(" - Data Directory:", data_directory) 57 | print(" - Num process nodes:", tot_process) 58 | print(" - Total Sampling:", Total_sampling) 59 | 60 | # Creating Mol_ct_file.csv if not already created 61 | if not os.path.exists(project_directory + "/Mol_ct_file.csv"): 62 | files = [] 63 | # saving the files: 64 | for f in glob.glob(data_directory+'/*.txt'): 65 | files.append(f) 66 | print("Number Of Files:", len(files)) 67 | 68 | t=time.time() 69 | print("Reading Files...") 70 | # Counting num of molecules in each file 71 | with closing(Pool(np.min([tot_process,len(files)]))) as pool: 72 | mol_count = pool.map(molecule_count, files) 73 | print("Done Reading Files - Time Taken", time.time()-t) 74 | 75 | print("Saving File Count...") # as a Mol_ct_file.csv 76 | try: 77 | write_mol_count_list(project_directory + "/Mol_ct_file.csv", mol_count) 78 | except PermissionError: 79 | print("Mol_ct_file.csv already created by other user") 80 | 81 | # Creating Mol_ct_file_updated.csv if not already created (project specific) 82 | if not os.path.exists(project_directory + "/Mol_ct_file_updated.csv"): 83 | mol_ct = pd.read_csv(project_directory+'/Mol_ct_file.csv',header=None) 84 | mol_ct.columns = ['Number_of_Molecules','file_name'] 85 | 86 | Total_mols_available = np.sum(mol_ct.Number_of_Molecules) 87 | mol_ct['Sample_for_million'] = [int(Total_sampling/Total_mols_available*elem) for elem in mol_ct.Number_of_Molecules] 88 | 89 | mol_ct.to_csv(project_directory+'/Mol_ct_file_updated.csv',sep=',',index=False) 90 | print("Done - Time Taken", time.time()-t) -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/optimize_models.py: -------------------------------------------------------------------------------- 1 | import IPython 2 | import kerastuner as kt 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | try: 7 | import Docking.ML.load_data 8 | from Docking.ML.DDModel import DDModel 9 | from Docking.ML.Models import TunerModel 10 | from Docking.ML.utils import * 11 | except: 12 | import ML.load_data 13 | from ML.DDModel import DDModel 14 | from ML.Models import TunerModel 15 | from ML.utils import * 16 | 17 | 18 | class ClearTrainingOutput(tf.keras.callbacks.Callback): 19 | def on_train_end(*args, **kwargs): 20 | IPython.display.clear_output(wait=True) 21 | 22 | 23 | class Config: 24 | def __init__(self, config): 25 | self.directory = config['directory'] 26 | self.project_name = config['project_name'] 27 | self.objective = config['objective'] 28 | self.max_trials = config['max_trials'] 29 | self.max_epochs = config['max_epochs'] 30 | self.factor = config['factor'] 31 | self.hyperband_iterations = config['hyperband_iterations'] 32 | self.direction = config['direction'] 33 | 34 | # For search 35 | self.steps_per_epoch = config['steps_per_epoch'] 36 | self.validation_steps = config['validation_steps'] 37 | self.epochs = config['epochs'] 38 | self.batch_size = config['batch_size'] 39 | self.model_location = config['model_location'] 40 | 41 | 42 | def optimize(technique): 43 | # Load config 44 | print("Loading Config...") 45 | config_file = read_tuner_config('../ML/tuner_config.txt') 46 | tuner_config = Config(config_file) 47 | print("Loading Dataset...") 48 | train_path, test_path = config_file['training_path'], config_file['testing_path'] 49 | data = ML.load_data.load(train_path, test_path) 50 | train_x, train_y, test_x, test_y = data() 51 | train_x, train_y = train_x.tolist(), train_y.tolist() 52 | test_x, test_y = test_x.tolist(), test_y.tolist() 53 | 54 | tr_x = np.array(train_x) 55 | tr_y = np.array(train_y) 56 | tx = np.array(test_x) 57 | ty = np.array(test_y) 58 | 59 | if technique == 'bayesian': 60 | return run_bayesian(tr_x, tr_y, tx, ty, tuner_config, {0: 1, 1: 1}) 61 | 62 | 63 | def run_bayesian(tr_x, tr_y, tx, ty, config: Config, class_weights): 64 | tuner_model = TunerModel(tr_x.shape[1:]) 65 | tuner = kt.BayesianOptimization(tuner_model.build_tuner_model, 66 | objective=kt.Objective(config.objective, config.direction), 67 | project_name=config.project_name, 68 | directory=config.directory, 69 | max_trials=config.max_trials, overwrite=True) 70 | 71 | tuner.search_space_summary() 72 | tuner.search(tr_x, tr_y, 73 | validation_data=(tx, ty), 74 | epochs=config.epochs, 75 | batch_size=config.batch_size, 76 | class_weight=class_weights, 77 | callbacks=[tf.keras.callbacks.EarlyStopping(monitor=config.objective, min_delta=0, patience=3, 78 | verbose=0, mode=config.direction)]) 79 | # Show a summary of the search 80 | tuner.results_summary() 81 | 82 | # Retrieve the best model. 83 | print("Saving the top model...") 84 | best_hyperparameters = tuner.get_best_hyperparameters(1)[0] 85 | print("Top hyperparameters:", best_hyperparameters) 86 | 87 | best_model = tuner.hypermodel.build(best_hyperparameters) 88 | model = DDModel.load(best_model, kt_hyperparameters=best_hyperparameters) 89 | 90 | for key in best_hyperparameters.values: 91 | print(key, "->", best_hyperparameters[key]) 92 | 93 | return model 94 | 95 | 96 | def run_sklearn(tr_x, tr_y, config: Config, build_model_func): 97 | """ 98 | Runs the bayesian optimization algorithm on an sklearn model. 99 | """ 100 | from sklearn import metrics, model_selection, ensemble 101 | 102 | # Create the tuner 103 | tuner = kt.tuners.Sklearn( 104 | oracle=kt.oracles.BayesianOptimization(objective=kt.Objective('score', 'max'), 105 | max_trials=config.max_trials), 106 | hypermodel=build_model_func, 107 | scoring=metrics.make_scorer(metrics.precision_score), 108 | cv=model_selection.StratifiedKFold(5), 109 | directory=config.directory, 110 | project_name=config.project_name) 111 | 112 | # Run the search 113 | tuner.search(tr_x, tr_y) 114 | 115 | # Return the best model 116 | return build_model_func(tuner.get_best_hyperparameters(num_trials=1)[0], return_light_model=True) 117 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/phase_1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --cpus-per-task=24 3 | #SBATCH --ntasks=1 4 | #SBATCH --mem=0 # memory per node 5 | #SBATCH --job-name=phase_1 6 | #SBATCH --output=slurm-%x.%j.out 7 | #SBATCH --error=slurm-%x.%j.err 8 | 9 | # ARGS Passed: 10 | iteration=$1 11 | t_cpu=$2 12 | project_path=$3 13 | project_name=$4 14 | mol_to_dock=$5 # Replace with sample size (training set) 15 | local_path=$6 16 | 17 | echo Partition: $SLURM_JOB_PARTITION 18 | echo Args: 19 | echo Iteration: $iteration 20 | echo Total CPUs: $t_cpu 21 | echo Project Path: $project_path 22 | echo Project Name: $project_name 23 | echo Mols To Dock: $mol_to_dock 24 | echo Scripts: $local_path 25 | 26 | # This should activate the conda environment 27 | source ~/.bashrc 28 | source $local_path/activation_script.sh 29 | 30 | 31 | # Set constants 32 | file_path=`sed -n '1p' $project_path/$project_name/logs.txt` 33 | protein=`sed -n '2p' $project_path/$project_name/logs.txt` 34 | n_mol=`sed -n '9p' $project_path/$project_name/logs.txt` 35 | morgan_directory=`sed -n '4p' $project_path/$project_name/logs.txt` 36 | smile_directory=`sed -n '5p' $project_path/$project_name/logs.txt` 37 | sdf_directory=`sed -n '6p' $project_path/$project_name/logs.txt` 38 | 39 | # Set the to be docked 40 | pr_it=$(($1-1)) 41 | # On the first iteration we want to triple the amount we dock so that we can create testing and validation sets 42 | if [ $1 == 1 ] 43 | then 44 | to_d=$((n_mol+n_mol+mol_to_dock)) #n_mol is our test/valid size 45 | else 46 | to_d=$mol_to_dock 47 | fi 48 | echo To Dock: $to_d 49 | 50 | # set the total CPUs 51 | if [ $t_cpu == 64 ];then t_cpu=48;fi 52 | echo Total CPU: $t_cpu 53 | 54 | python jobid_writer.py -file_path $file_path/$protein -n_it $1 -jid $SLURM_JOB_NAME -jn $SLURM_JOB_NAME.sh 55 | if [ $1 == 1 ];then pred_directory=$morgan_directory;else pred_directory=$file_path/$protein/iteration_$pr_it/morgan_1024_predictions;fi 56 | 57 | python molecular_file_count_updated.py -pt $protein -it $1 -cdd $pred_directory -cpd $file_path/$protein -t_pos $t_cpu -t_samp $to_d 58 | python sampling.py -pt $protein -fp $file_path -it $1 -dd $pred_directory -t_pos $t_cpu -tr_sz $mol_to_dock -vl_sz $n_mol 59 | python sanity_check.py -pt $protein -fp $file_path -it $1 60 | python Extracting_morgan.py -pt $protein -fp $file_path -it $1 -md $morgan_directory -t_pos $t_cpu 61 | python Extracting_smiles.py -pt $protein -fp $file_path -it $1 -fn 0 -smd $smile_directory -sd $sdf_directory -t_pos $t_cpu -if False 62 | 63 | python phase_changer.py -pf phase_1.sh -itr $file_path/$protein/iteration_$1 64 | echo python phase_changer.py -pf phase_1.sh -itr $file_path/$protein/iteration_$1 65 | 66 | 67 | # Clean up the slurm files 68 | echo cleaning slurm files 69 | python slurm_file_manager.py --phase 1 --iteration $iteration --project_path $project_path/$project_name 70 | 71 | # This extracts the zinc ids by randomly sampling and creates the datasets 72 | # - If a smile file is found on line l in file f then the morgan fingerprint will be on the same line in the same file in the equivalent file. 73 | 74 | # how to run phase_1.sh: 75 | # sbatch phase_1.sh iteration t_cpu project_path project_name mol_to_dock 76 | # - Note: that mol_to_dock should match what is in the logs file or it could not haha 77 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/phase_2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --cpus-per-task=1 3 | #SBATCH --mem=0 # memory per node 4 | #SBATCH --job-name=phase_2 5 | #SBATCH --output=slurm-%x.%j.out 6 | #SBATCH --error=slurm-%x.%j.err 7 | 8 | # Args 9 | extension=$1 # .smi 10 | chunk_n_lines=$2 # 1000 11 | script_path=$3 # path to scripts 12 | project_path=$4 # path to project 13 | iteration=$5 14 | project_name=$(basename "$project_path") 15 | 16 | echo Partition: $SLURM_JOB_PARTITION 17 | echo Args: 18 | echo Extension: $extension 19 | echo Chunk Size: $chunk_n_lines 20 | echo Project Path: $project_path 21 | echo Project Name: $project_name 22 | echo Iteration: $iteration 23 | 24 | slurm_args_no_cpu=$(sed -n '1p' ${script_path}/slurm_args/${project_name}_slurm_args.txt) 25 | 26 | # This should activate the conda environment 27 | source ~/.bashrc 28 | source $script_path/activation_script.sh 29 | 30 | python $script_path/jobid_writer.py -file_path $project_path -n_it $iteration -jid $SLURM_JOB_NAME -jn $SLURM_JOB_NAME.sh 31 | 32 | # For some reason, running this with the conda environment activated causes an error. 33 | # We must deactivate it before running! 34 | source ~/.bashrc 35 | source $local_path/deactivation_script.sh 36 | 37 | # Move into the project 38 | cd $project_path/iteration_$iteration 39 | 40 | # Start running the chunking 41 | echo Starting Phase 2 42 | echo Chunking Train, Test, and Valid Sets... 43 | sbatch $slurm_args_no_cpu $script_path/split_chunks.sh smile/train_smiles_final_updated.smi $extension train $chunk_n_lines $script_path $project_name 44 | sbatch $slurm_args_no_cpu $script_path/split_chunks.sh smile/test_smiles_final_updated.smi $extension test $chunk_n_lines $script_path $project_name 45 | sbatch $slurm_args_no_cpu $script_path/split_chunks.sh smile/valid_smiles_final_updated.smi $extension valid $chunk_n_lines $script_path $project_name 46 | 47 | # This should activate the conda environment 48 | source ~/.bashrc 49 | source $script_path/activation_script.sh 50 | 51 | # wait for completion 52 | echo Finished Chunking 53 | wait 54 | python $script_path/phase_changer.py -pf phase_2.sh -itr $project_path/iteration_$iteration 55 | echo Phase 2 Finished 56 | 57 | # Clean up the slurm files 58 | echo cleaning slurm files 59 | cd $script_path 60 | python slurm_file_manager.py --phase 2 --iteration $iteration --project_path $project_path 61 | 62 | 63 | # sbatch phase_2.sh .smi 1000 /groups/cherkasvgrp/share/progressive_docking/development/pd_python_pose_v2/test_DD_installation/DeepDocking /groups/cherkasvgrp/share/progressive_docking/development/pd_python_pose_v2/test_DD_installation/DeepDockingProjects/full_run_test_james 1 64 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/phase_3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --cpus-per-task=1 3 | #SBATCH --ntasks=1 4 | #SBATCH --mem=0 # memory per node 5 | #SBATCH --job-name=phase_3 6 | #SBATCH --output=slurm-%x.%j.out 7 | #SBATCH --error=slurm-%x.%j.err 8 | 9 | # ARGS 10 | PATH_FLD=$1 11 | num_energy_evaluations=$2 12 | num_runs=$3 13 | path_to_auto_dock_gpu=$4 14 | project_path=$5 15 | iteration=$6 16 | scripts=$7 17 | project_name=$(basename "$project_path") 18 | 19 | 20 | echo Partition: $SLURM_JOB_PARTITION 21 | echo Args: 22 | echo FLD Path: $PATH_FLD 23 | echo Energy Evaluations: $num_energy_evaluations 24 | echo Num Runs: $num_runs 25 | echo Path To Autodock: $path_to_auto_dock_gpu 26 | echo Project Path: $project_path 27 | echo Project Name: $project_name 28 | echo Iteration: $iteration 29 | echo Scripts: $scripts 30 | 31 | # getting slurm args for gpu req scripts (with cpus-per-task and gpu_partition) 32 | slurm_args_g=$(sed -n '4p' ${scripts}/slurm_args/${project_name}_slurm_args.txt) 33 | 34 | # This should activate the conda environment 35 | source ~/.bashrc 36 | source $scripts/activation_script.sh 37 | 38 | #path_to_auto_dock_gpu=/groups/cherkasvgrp/autodock/scottlegrand/AutoDock-GPU.relicensing/bin 39 | python jobid_writer.py -file_path $project_path -n_it $iteration -jid $SLURM_JOB_NAME -jn $SLURM_JOB_NAME.sh --save_path $project_path 40 | 41 | # Run phase 3 42 | cd $project_path/iteration_$iteration 43 | echo Running Phase 3 44 | mkdir res 45 | for i in $(ls -d chunks_smi/*); do fld=$(echo $i | rev | cut -d'/' -f 1 | rev); mkdir res/$fld; cd res/$fld; sbatch $slurm_args_g $scripts/autodock_gpu_ad.sh 64 sw $PATH_FLD ../../$i'/'$fld'_'pdbqt list.txt $num_energy_evaluations $num_runs $path_to_auto_dock_gpu $scripts; cd ../../;done 46 | 47 | cd $scripts 48 | python phase_changer.py -pf phase_3.sh -itr $project_path/iteration_$iteration 49 | 50 | # Clean up the slurm files 51 | echo cleaning slurm files 52 | python slurm_file_manager.py --phase 3 --iteration $iteration --project_path $project_path 53 | echo Done 54 | 55 | #sbatch phase_3.sh /groups/cherkasvgrp/share/progressive_docking/development/AD_GPU/autodock_grid/x77_grid.maps.fld 5000000 10 /groups/cherkasvgrp/autodock/scottlegrand/AutoDock-GPU.relicensing/bin /groups/cherkasvgrp/share/progressive_docking/development/pd_python_pose_v2/test_DD_installation/DeepDockingProjects/full_run_test_james 1 /groups/cherkasvgrp/share/progressive_docking/development/pd_python_pose_v2/test_DD_installation/DeepDocking 56 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/phase_3_concluding_combination.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd $1 4 | mkdir docked 5 | echo Transfering docked training 6 | cat res/*train*/results/sdf/*sdf >> docked/train_docked.sdf 7 | 8 | echo Transfering docked validation 9 | cat res/*valid*/results/sdf/*sdf >> docked/valid_docked.sdf 10 | 11 | echo Transfering docked testing 12 | cat res/*test*/results/sdf/*sdf >> docked/test_docked.sdf 13 | 14 | echo Done -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/phase_4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --cpus-per-task=3 3 | #SBATCH --ntasks=1 4 | #SBATCH --mem=0 # memory per node 5 | #SBATCH --job-name=phase_4 6 | #SBATCH --output=slurm-%x.%j.out 7 | #SBATCH --error=slurm-%x.%j.err 8 | 9 | ###************************************************************************ 10 | ### Planned Changes: 11 | ### > Reducing the number of passed parameters 12 | ### |- How we currently run phase 4: "sbatch phase_4.sh current_itr n_cpu project_path project_name final_itr? total_itr" 13 | ### |- How we want to run phase 4: "sbatch phase_4.sh current_itr n_cpu project_path/project_name final_itr? total_itr path_to_deep_docking_source" 14 | ### > Get rid of smile_directory and sdf_directory for they are unused 15 | ###************************************************************************ 16 | 17 | echo Partition: $SLURM_JOB_PARTITION 18 | echo "Passed Parameters:" 19 | echo "Current Iteration: $1" 20 | echo "Number of CPUs: $2" 21 | echo "Project Path: $3" 22 | echo Project Name: $(basename "$3") 23 | echo "Final Iteration: $4" 24 | echo "Total Iterations: $5" 25 | echo "Path To Deep Docking Source Scripts: $6" 26 | echo "Percent First Mol: $7" 27 | echo "Percent Last Mol: $8" 28 | 29 | # Reading the log file 30 | file_path=`sed -n '1p' $3/logs.txt` 31 | project_name=`sed -n '2p' $3/logs.txt` 32 | morgan_directory=`sed -n '4p' $3/logs.txt` 33 | num_hyperparameters=`sed -n '8p' $3/logs.txt` # number of hyperparameters 34 | docking_software=`sed -n '7p' $3/logs.txt` # The docking software used 35 | 36 | # The number of molecules to train on: 37 | num_molec=`sed -n '9p' $3/logs.txt` 38 | 39 | local_path=$6 # Should be the path to the deep docking source scripts 40 | save_path=$3 41 | 42 | # getting slurm args for gpu req scripts (with cpus-per-task and gpu_partition) 43 | slurm_args_g=$(sed -n '4p' ${local_path}/slurm_args/${project_name}_slurm_args.txt) 44 | 45 | # This should activate the conda environment 46 | source ~/.bashrc 47 | source $local_path/activation_script.sh 48 | 49 | echo "writing jobs" 50 | python jobid_writer.py -file_path $file_path/$project_name -n_it $1 -jid $SLURM_JOB_NAME -jn $SLURM_JOB_NAME.sh --save_path $save_path 51 | 52 | t_pos=$2 # total number of processors available 53 | echo "Extracting labels" 54 | python Extract_labels.py -if False -n_it $1 -protein $project_name -file_path $file_path -t_pos $t_pos -sof $docking_software 55 | 56 | if [ $? != 0 ]; then 57 | echo "Extract_labels failed... terminating" 58 | exit 59 | fi 60 | 61 | echo "Creating simple jobs" 62 | python simple_job_models.py -n_it $1 -time 00-04:00 -file_path $file_path/$project_name -nhp $num_hyperparameters -titr $5 -n_mol $num_molec --save_path $save_path --percent_first_mols $7 --percent_last_mols $8 63 | 64 | # Executes all the files that were created in the simple_jobs directory 65 | echo "Running simple jobs" 66 | cd $save_path/iteration_$1/simple_job 67 | 68 | # For some reason, running this with the conda environment activated causes an error. 69 | # We must deactivate it before running! 70 | source ~/.bashrc 71 | source $local_path/deactivation_script.sh 72 | for f in *;do sbatch $slurm_args_g $f;done 73 | 74 | echo "running phase_changer" 75 | source ~/.bashrc 76 | source $local_path/activation_script.sh 77 | python $local_path/phase_changer.py -pf phase_4.sh -itr $file_path/$project_name/iteration_$1 78 | 79 | # Clean up the slurm files 80 | echo cleaning slurm files 81 | cd $local_path 82 | python slurm_file_manager.py --phase 4 --iteration $1 --project_path $3 83 | 84 | echo "Done..." 85 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/phase_5.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --ntasks=1 3 | #SBATCH --cpus-per-task=1 4 | #SBATCH --gres=gpu:1 5 | #SBATCH --mem=0 # memory per node 6 | #SBATCH --job-name=phase_5 7 | #SBATCH --output=slurm-%x.%j.out 8 | #SBATCH --error=slurm-%x.%j.err 9 | 10 | ### This will be replacing the old phase 5 script 11 | ### (make sure to decrease all other parameters after 3 when doing so) 12 | 13 | ###************************************************************************ 14 | ### Planned Changes: 15 | ### > Reducing the number of passed parameters to 1 16 | ### |- How we currently run phase 5: "sbatch phase_5.sh iteration path_to_project project_name" 17 | ### |- How we want to run phase 5: "sbatch phase_5.sh iteration path_to_project/project_name progressive_docking_path" 18 | ### > Get rid of smile_directory and sdf_directory for they are unused 19 | ###************************************************************************ 20 | 21 | ### Scan through the logs.txt file 22 | file_path=`sed -n '1p' $2/logs.txt` 23 | protein=`sed -n '2p' $2/logs.txt` # name of project folder 24 | morgan_directory=`sed -n '4p' $2/logs.txt` 25 | num_molec=`sed -n '9p' $2/logs.txt` 26 | 27 | progressive_docking_path=$3 28 | save_path=$2 29 | project_name=$(basename "$2") 30 | 31 | echo Partition: $SLURM_JOB_PARTITION 32 | echo "Passed Parameters:" 33 | echo "Current Iteration: $1" 34 | echo "Project Path: $2" 35 | echo Project Name: $project_name 36 | echo "Scripts: $3" 37 | echo "Number of CPUs: $4" 38 | 39 | # This should activate the conda environment 40 | source ~/.bashrc 41 | source $progressive_docking_path/activation_script.sh 42 | 43 | # getting slurm args for gpu req scripts (with cpus-per-task and gpu_partition) 44 | slurm_args_g=$(sed -n '4p' ${progressive_docking_path}/slurm_args/${project_name}_slurm_args.txt) 45 | 46 | 47 | python jobid_writer.py -file_path $file_path/$protein -n_it $1 -jid $SLURM_JOB_NAME -jn $SLURM_JOB_NAME.sh --save_path $save_path 48 | 49 | echo "Starting Evaluation" 50 | python -u hyperparameter_result_evaluation.py -n_it $1 --data_path $file_path/$protein -n_mol $num_molec --save_path $save_path 51 | echo "Creating simple_job_predictions" 52 | python simple_job_predictions.py -protein $protein -file_path $file_path -n_it $1 -mdd $morgan_directory --save_path $save_path 53 | 54 | # For some reason, running this with the conda environment activated causes an error. 55 | # We must deactivate it before running! 56 | source ~/.bashrc 57 | source $progressive_docking_path/deactivation_script.sh 58 | cd $save_path/iteration_$1/simple_job_predictions/ 59 | echo "running simple_jobs" 60 | for f in *;do sbatch $slurm_args_g $f; done 61 | 62 | echo "waiting for event phase change" 63 | source ~/.bashrc 64 | source $progressive_docking_path/activation_script.sh 65 | python $progressive_docking_path/phase_changer.py -pf phase_5.sh -itr $file_path/$protein/iteration_$1 66 | 67 | # Now we grab the top hits 68 | source ~/.bashrc 69 | source $progressive_docking_path/deactivation_script.sh 70 | 71 | echo Phase 5 is finished. Now searching for top predicted molecules. 72 | cd $progressive_docking_path/GUI 73 | sbatch run_search.sh $2 $4 $1 1000 #TODO: slurm args for this? 74 | 75 | # Clean up the slurm files 76 | echo Cleaning slurm files 77 | cd $progressive_docking_path 78 | python slurm_file_manager.py --phase 5 --iteration $1 --project_path $2 --script_path $3 79 | 80 | echo All finished. -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/phase_changer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import glob 4 | import argparse 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('-pf', '--phase_file', required=True) 8 | parser.add_argument('-itr', '--iteration_directory', required=True) 9 | io_args = parser.parse_args() 10 | pf = io_args.phase_file 11 | itr_dir = io_args.iteration_directory 12 | 13 | print("Phase Changer:") 14 | print(" - Monitoring: {}".format(pf)) 15 | print(" - Project: {}".format(itr_dir.split("/")[-2] + "/" + itr_dir.split("/")[-1])) 16 | 17 | if pf == 'phase_1.sh' or pf == 'phase_a.sh': 18 | # Change the phase_1.sh file. 19 | with open(itr_dir + '/' + pf, 'w') as ref: 20 | ref.write('finished\n') 21 | print(" - Finished... File Updated") 22 | 23 | elif pf == 'phase_2.sh': 24 | 25 | # Check to see if the slurm jobs are done 26 | while True: 27 | try: 28 | # Check every slurm file in chunk_smi 29 | finished_jobs = 0 30 | running_jobs = glob.glob(itr_dir + '/chunk*/*/slurm*.out') 31 | for running in running_jobs: 32 | # open the out file 33 | with open(running) as file: 34 | # check if it is finished 35 | lines = file.readlines() 36 | if len(lines) > 0 and "finished" in lines[-1]: 37 | finished_jobs += 1 38 | 39 | # if they are all finished, break the loop and finish phase 2 40 | if len(running_jobs) == finished_jobs and len(running_jobs) > 0: 41 | break 42 | else: 43 | time.sleep(30) 44 | except OSError: 45 | time.sleep(30) 46 | 47 | # update the phase file 48 | with open(itr_dir + '/' + pf, 'w') as ref: 49 | ref.write('finished\n') 50 | 51 | elif pf == 'phase_3.sh': 52 | # Check to see if the slurm jobs are done 53 | while True: 54 | try: 55 | # Check every slurm file in res 56 | finished_jobs = 0 57 | running_jobs = glob.glob(itr_dir + '/res*/*/slurm*.out') 58 | for running in running_jobs: 59 | # open the out file 60 | with open(running) as file: 61 | # check if it is finished 62 | lines = file.readlines() 63 | if len(lines) > 0 and "finished" in lines[-1]: 64 | finished_jobs += 1 65 | 66 | # if they are all finished, break the loop and finish phase 2 67 | if len(running_jobs) == finished_jobs and len(running_jobs) > 0: 68 | break 69 | else: 70 | time.sleep(60) 71 | except IOError: 72 | time.sleep(60) 73 | 74 | # Perform the final phase 3 operation 75 | print("Wrapping up phase 3...") 76 | os.system("bash phase_3_concluding_combination.sh " + itr_dir) 77 | 78 | # update the phase file 79 | with open(itr_dir + '/' + pf, 'w') as ref: 80 | ref.write('finished\n') 81 | 82 | elif pf == 'phase_4.sh': 83 | while True: 84 | t_jobs = len(glob.glob(itr_dir + '/simple_job/*.sh')) 85 | t_done = len(glob.glob(itr_dir + '/simple_job/*.out')) 86 | print("total jobs:", t_jobs, "total jobs done:", t_done) 87 | if t_done != t_jobs: 88 | time.sleep(60) 89 | else: 90 | jobids = [] 91 | for f in glob.glob(itr_dir + '/simple_job/*.out'): 92 | tmp = f.split(".")[-2] # slurm-phase_4.786716.out -> ['slurm-phase_4', 786716, out] -> 786716 93 | jobids.append(len(os.popen("squeue | grep " + tmp).read()) == 0) # empty string -> job complete 94 | 95 | print("\t{}/{}".format(jobids.count(True), len(jobids))) 96 | 97 | if jobids.count(True) == len(jobids): # if num jobs completed == num total jobs 98 | with open(itr_dir + '/' + pf, 'w') as ref: 99 | ref.write('finished\n') 100 | break 101 | else: 102 | time.sleep(60) 103 | 104 | elif pf == 'phase_5.sh': 105 | while 1 == 1: 106 | t_jobs = len(glob.glob(itr_dir + '/simple_job_predictions/*.sh')) 107 | t_done = len(glob.glob(itr_dir + '/simple_job_predictions/*.out')) 108 | if t_done != t_jobs: 109 | time.sleep(60) 110 | else: 111 | jobids = [] 112 | for f in glob.glob(itr_dir + '/simple_job_predictions/*.out'): 113 | tmp = f.split(".")[-2] # slurm-phase_5.786716.out -> ['slurm-phase_4', 786716, out] -> 786716 114 | jobids.append(len(os.popen("squeue | grep " + tmp).read()) == 0) # empty string -> job complete 115 | 116 | print("\t{}/{}".format(jobids.count(True), len(jobids))) 117 | 118 | if jobids.count(True) == len(jobids): # if num jobs completed == num total jobs 119 | with open(itr_dir + '/' + pf, 'w') as ref: 120 | ref.write('finished\n') 121 | break 122 | else: 123 | time.sleep(60) 124 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/prepare_ligands_ad.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=prepare 3 | #SBATCH --time=04:00:00 4 | #SBATCH --output=slurm-phase_2-%x.%j.out 5 | #SBATCH --error=slurm-phase_2-%x.%j.err 6 | 7 | script_path=$1 8 | 9 | # This should activate the conda environment 10 | source ~/.bashrc 11 | source $script_path/activation_script.sh 12 | 13 | start=`date +%s` 14 | 15 | name=$(pwd| rev | cut -d'/' -f 1 | rev) 16 | fld=$name'_'pdbqt 17 | 18 | # Uncomment the next three lines if you have openeye and want to do tautomer generation on the fly (instead of preparing the library beforehand); add also #SBATCH --cpus-per-task=20 at the top as openeye uses MPI 19 | # $openeye tautomers -in $name'.'smi -out $name'_'h.smi -maxtoreturn 1 -warts false 20 | # wait $! 21 | # mv $name'_'h.smi $name'.'smi 22 | 23 | # obabel takes a lot longer than openeye, but both of the following lines work for 3d conformer generation 24 | # $openeye oeomega classic -in $name'.'smi -out $name'.'sdf -strictstereo false -maxconfs 1 -mpi_np 20 -log $name'.'log -prefix $name 25 | obabel -ismi $name'.'smi -O $name'.'sdf --gen3d --fast 26 | wait $! 27 | 28 | rm -r $fld 29 | mkdir $fld 30 | cp $name'.'sdf $fld'/' 31 | cd $fld 32 | python $script_path'/'split_sdf.py $name'.'sdf 33 | rm $name'.'sdf 34 | obabel -isdf *sdf -opdbqt -m 35 | wait $! 36 | rm *sdf 37 | 38 | end=`date +%s` 39 | echo $((end-start)) 40 | echo finished 41 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/reset.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is used for resetting phases and cancelling jobs. It will remove the slurm files and/or cancel jobs that are 3 | associated with the passed project name and username. 4 | """ 5 | 6 | import argparse 7 | import glob 8 | import csv 9 | import os 10 | 11 | 12 | def judge(slurm_files, project_name, phase_job, remove_slurms=True, test=False): 13 | """ Judges whether or not a file should be removed or jobs should be cancelled""" 14 | 15 | if remove_slurms: 16 | # Look at every file 17 | for slurm_file in slurm_files: 18 | # Read file 19 | with open(slurm_file, "r") as file: 20 | # Look at each line of file 21 | for line in file: 22 | # If the project name is in the header 23 | if "Project Name:" in line and project_name in line: 24 | if not test: 25 | # Get the job ID to cancel 26 | job_id = slurm_file.split(".")[1] 27 | os.system("scancel " + str(job_id)) 28 | 29 | # Remove the files 30 | os.remove(slurm_file) 31 | os.remove(slurm_file.replace("out", "err")) 32 | 33 | print("Judged", os.path.basename(slurm_file)) 34 | break 35 | 36 | # Reads phase_jobs.csv and cancels each job 37 | print("Cancelling Jobs...") 38 | 39 | # Get the ids 40 | ids = [] 41 | with open(phase_job, 'r') as file: 42 | # Read csv 43 | reader = [row for row in csv.reader(file)] 44 | # get the job id index 45 | index = reader[0].index("job_id") 46 | # get the index of the job_ids 47 | rows = reader[1:] 48 | # get the ids 49 | for row in rows: 50 | ids.append(row[index]) 51 | 52 | for jid in ids: 53 | print("Cancelling Job", jid) 54 | if not test: 55 | os.system("scancel " + str(jid)) 56 | 57 | 58 | if __name__ == "__main__": 59 | parser = argparse.ArgumentParser() 60 | parser.add_argument("--project_name", type=str) 61 | parser.add_argument("--username", type=str) 62 | parser.add_argument("--scripts", type=str) 63 | parser.add_argument("--remove_slurms", type=bool, default=True, required=False) 64 | parser.add_argument("--test", type=bool, default=False, required=False) 65 | 66 | args = parser.parse_args() 67 | 68 | # Grab all of the slurm files 69 | files = glob.glob("slurm-*.out") + glob.glob("GUI/slurm-*.out") 70 | files = [args.scripts + "/" + f for f in files] 71 | pj = args.scripts + f"/GUI/Users/{args.username}/" + args.username + "_phase_jobs.csv" 72 | judge(files, project_name=args.project_name, phase_job=pj, remove_slurms=args.remove_slurms, test=args.test) 73 | 74 | 75 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/reset1.sh: -------------------------------------------------------------------------------- 1 | echo Resetting Phase 1 on $(basename $1)... 2 | 3 | # Remove the slurm files associated with the project 4 | python3 reset.py --project_name "$2" --username "$3" --scripts "$4" 5 | 6 | # Move into the project 7 | cd $1 8 | 9 | rm -r * 10 | 11 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/reset2.sh: -------------------------------------------------------------------------------- 1 | echo Resetting Phase 2 on $(basename $1)... 2 | 3 | # Remove the slurm files associated with the project 4 | python3 reset.py --project_name "$2" --username "$3" --scripts "$4" 5 | 6 | # Move into the project 7 | cd $1 8 | rm -r chunks_smi slurm-* -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/reset3.sh: -------------------------------------------------------------------------------- 1 | echo Resetting Phase 3 on $(basename $1)... 2 | 3 | # Remove the slurm files associated with the project 4 | python3 reset.py --project_name "$2" --username "$3" --scripts "$4" 5 | 6 | # Move into the project 7 | cd $1 8 | 9 | rm -r res 10 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/reset4.sh: -------------------------------------------------------------------------------- 1 | echo Resetting Phase 4 on $(basename $1)... 2 | 3 | # Remove the slurm files associated with the project 4 | python3 reset.py --project_name "$2" --username "$3" --scripts "$4" 5 | 6 | # Move into the project 7 | cd $1 8 | 9 | rm -r all_models hyperparameter* model_no.txt morgan_1024_predictions simple_job* 10 | rm -r best_model* 11 | rm testing* validation* training* 12 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/reset5.sh: -------------------------------------------------------------------------------- 1 | echo Resetting Phase 5 on $(basename $1)... 2 | 3 | # Remove the slurm files associated with the project 4 | python3 reset.py --project_name "$2" --username "$3" --scripts "$4" 5 | 6 | cd $1 7 | rm -r simple_job_predictions/ morgan_1024_predictions/ 8 | echo Done -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/sampling.py: -------------------------------------------------------------------------------- 1 | from contextlib import closing 2 | from multiprocessing import Pool 3 | import pandas as pd 4 | import numpy as np 5 | import argparse 6 | import glob 7 | import time 8 | import os 9 | 10 | try: 11 | import __builtin__ 12 | except ImportError: 13 | # Python 3 14 | import builtins as __builtin__ 15 | 16 | # For debugging purposes only: 17 | def print(*args, **kwargs): 18 | __builtin__.print('\t sampling: ', end="") 19 | return __builtin__.print(*args, **kwargs) 20 | 21 | 22 | def train_valid_test(file_name): 23 | sampling_start_time = time.time() 24 | f_name = file_name.split('/')[-1] 25 | mol_ct = pd.read_csv(PROJECT_PATH+"/Mol_ct_file_updated.csv", index_col=1) 26 | if n_it == 1: 27 | to_sample = int(mol_ct.loc[f_name].Sample_for_million/(rt_sz+2)) 28 | else: 29 | to_sample = int(mol_ct.loc[f_name].Sample_for_million/3) 30 | 31 | total_len = int(mol_ct.loc[f_name].Number_of_Molecules) 32 | shuffle_array = np.linspace(0, total_len-1, total_len) 33 | np.random.shuffle(shuffle_array) 34 | 35 | if n_it == 1: 36 | train_ind = shuffle_array[:int(rt_sz*to_sample)] 37 | valid_ind = shuffle_array[int(to_sample*rt_sz):int(to_sample*(rt_sz+1))] 38 | test_ind = shuffle_array[int(to_sample*(rt_sz+1)):int(to_sample*(rt_sz+2))] 39 | else: 40 | train_ind = shuffle_array[:to_sample] 41 | valid_ind = shuffle_array[to_sample:to_sample*2] 42 | test_ind = shuffle_array[to_sample*2:to_sample*3] 43 | 44 | train_ind_dict = {} 45 | valid_ind_dict = {} 46 | test_ind_dict = {} 47 | 48 | train_set = open(PROJECT_PATH + "/iteration_" + str(n_it) + "/train_set.txt", 'a') 49 | test_set = open(PROJECT_PATH + "/iteration_" + str(n_it) + "/test_set.txt", 'a') 50 | valid_set = open(PROJECT_PATH + "/iteration_" + str(n_it) + "/valid_set.txt", 'a') 51 | # smiles = open(file_path + '/' + protein + "/iteration_" + str(n_it) + "/smile_locations.csv", 'a') 52 | 53 | for i in train_ind: 54 | train_ind_dict[i] = 1 55 | for j in valid_ind: 56 | valid_ind_dict[j] = 1 57 | for k in test_ind: 58 | test_ind_dict[k] = 1 59 | 60 | # Opens the file and write the test, train, and valid files 61 | with open(file_name, 'r') as ref: 62 | for ind, line in enumerate(ref): 63 | molecule_id = line.strip().split(',')[0] 64 | if ind == 1: 65 | print("molecule_id:", molecule_id) 66 | 67 | # now we write to the train, test, and validation sets 68 | # we also add to the 69 | if ind in train_ind_dict.keys(): 70 | train_set.write(molecule_id + '\n') 71 | 72 | # Grabs the file number 73 | # The file is actually "smile_all_n.txt" but I only save n 74 | # smile_location = f_name.split("_")[-1].split(".")[0] 75 | # smiles.write("{set},{file_number}\n".format(set="trn", file_number=smile_location)) 76 | elif ind in valid_ind_dict.keys(): 77 | valid_set.write(molecule_id + '\n') 78 | 79 | # Grabs the file number 80 | # The file is actually "smile_all_n.txt" but I only save n 81 | # smile_location = f_name.split("_")[-1].split(".")[0] 82 | # smiles.write("{set},{file_number}\n".format(set="vld", file_number=smile_location)) 83 | elif ind in test_ind_dict.keys(): 84 | test_set.write(molecule_id + '\n') 85 | 86 | # Grabs the file number 87 | # The file is actually "smile_all_n.txt" but I only save n 88 | # smile_location = f_name.split("_")[-1].split(".")[0] 89 | # smiles.write("{set},{file_number}\n".format(set="tst", file_number=smile_location)) 90 | 91 | train_set.close() 92 | valid_set.close() 93 | test_set.close() 94 | # smiles.close() 95 | print("Process finished sampling in " + str(time.time()-sampling_start_time)) 96 | 97 | if __name__ == '__main__': 98 | parser = argparse.ArgumentParser() 99 | parser.add_argument('-pt', '--protein_name',required=True) 100 | parser.add_argument('-fp', '--file_path',required=True) 101 | parser.add_argument('-it', '--n_iteration',required=True) 102 | parser.add_argument('-dd', '--data_directory',required=True) 103 | parser.add_argument('-t_pos', '--tot_process',required=True) 104 | parser.add_argument('-tr_sz', '--train_size',required=True) 105 | parser.add_argument('-vl_sz', '--val_size',required=True) 106 | io_args = parser.parse_args() 107 | 108 | protein = io_args.protein_name 109 | file_path = io_args.file_path 110 | n_it = int(io_args.n_iteration) 111 | data_directory = io_args.data_directory 112 | tot_process = int(io_args.tot_process) 113 | tr_sz = int(io_args.train_size) 114 | vl_sz = int(io_args.val_size) 115 | rt_sz = tr_sz/vl_sz 116 | 117 | PROJECT_PATH = file_path + '/' + protein 118 | 119 | print("Parsed Args:") 120 | print(" - Iteration:", n_it) 121 | print(" - Data Directory:", data_directory) 122 | print(" - Project Directory:", PROJECT_PATH) 123 | print(" - Training Size:", tr_sz) 124 | print(" - Validation Size:", vl_sz) 125 | print(" - tot_process: ", tot_process) 126 | 127 | try: 128 | os.mkdir(PROJECT_PATH+"/iteration_"+str(n_it)) 129 | except OSError: 130 | pass 131 | 132 | f_names = [] 133 | # Getting all the morgan_1024_predictions/smile_all_##.txt files 134 | for f in glob.glob(data_directory+'/smile*_all_*.txt'): 135 | f_names.append(f) 136 | 137 | print("num_f_names: ", len(f_names)) 138 | 139 | t = time.time() 140 | print("Starting Processes...") 141 | with closing(Pool(np.min([tot_process, len(f_names)]))) as pool: 142 | pool.map(train_valid_test, f_names) 143 | 144 | print("Compressing smile file...") 145 | # old_file_size = os.path.getsize(file_path + '/' + protein + "/iteration_" + str(n_it) + "/smile_locations.csv") 146 | # 147 | # new_file_size = os.path.getsize(file_path + '/' + protein + "/iteration_" + str(n_it) + "/smile_locations.csv") 148 | # print(" - Uncompressed file size: {}\n" 149 | # " - Compressed file size: {}\n" 150 | # " - Ratio: {}".format(old_file_size, new_file_size, (old_file_size/new_file_size) * 100)) 151 | print("Sampling Complete - Total Time Taken:", time.time()-t) 152 | 153 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/sanity_check.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import glob 3 | 4 | parser = argparse.ArgumentParser() 5 | parser.add_argument('-pt','--protein_name',required=True) 6 | parser.add_argument('-fp','--file_path',required=True) 7 | parser.add_argument('-it','--n_iteration',required=True) 8 | 9 | io_args = parser.parse_args() 10 | import time 11 | 12 | protein = io_args.protein_name 13 | file_path = io_args.file_path 14 | n_it = int(io_args.n_iteration) 15 | 16 | old_dict = {} 17 | for i in range(1,n_it): 18 | with open(glob.glob(file_path+'/'+protein+'/iteration_'+str(i)+'/training_labels*')[-1]) as ref: 19 | ref.readline() 20 | for line in ref: 21 | tmpp = line.strip().split(',')[-1] 22 | old_dict[tmpp] = 1 23 | with open(glob.glob(file_path+'/'+protein+'/iteration_'+str(i)+'/validation_labels*')[-1]) as ref: 24 | ref.readline() 25 | for line in ref: 26 | tmpp = line.strip().split(',')[-1] 27 | old_dict[tmpp] = 1 28 | with open(glob.glob(file_path+'/'+protein+'/iteration_'+str(i)+'/testing_labels*')[-1]) as ref: 29 | ref.readline() 30 | for line in ref: 31 | tmpp = line.strip().split(',')[-1] 32 | old_dict[tmpp] = 1 33 | 34 | t=time.time() 35 | new_train = {} 36 | new_valid = {} 37 | new_test = {} 38 | with open(glob.glob(file_path+'/'+protein+'/iteration_'+str(n_it)+'/train_set*')[-1]) as ref: 39 | for line in ref: 40 | tmpp = line.strip().split(',')[0] 41 | new_train[tmpp] = 1 42 | with open(glob.glob(file_path+'/'+protein+'/iteration_'+str(n_it)+'/valid_set*')[-1]) as ref: 43 | for line in ref: 44 | tmpp = line.strip().split(',')[0] 45 | new_valid[tmpp] = 1 46 | with open(glob.glob(file_path+'/'+protein+'/iteration_'+str(n_it)+'/test_set*')[-1]) as ref: 47 | for line in ref: 48 | tmpp = line.strip().split(',')[0] 49 | new_test[tmpp] = 1 50 | print(time.time()-t) 51 | 52 | t=time.time() 53 | for keys in new_train.keys(): 54 | if keys in new_valid.keys(): 55 | new_valid.pop(keys) 56 | if keys in new_test.keys(): 57 | new_test.pop(keys) 58 | for keys in new_valid.keys(): 59 | if keys in new_test.keys(): 60 | new_test.pop(keys) 61 | print(time.time()-t) 62 | 63 | for keys in old_dict.keys(): 64 | if keys in new_train.keys(): 65 | new_train.pop(keys) 66 | if keys in new_valid.keys(): 67 | new_valid.pop(keys) 68 | if keys in new_test.keys(): 69 | new_test.pop(keys) 70 | 71 | with open(file_path+'/'+protein+'/iteration_'+str(n_it)+'/train_set.txt','w') as ref: 72 | for keys in new_train.keys(): 73 | ref.write(keys+'\n') 74 | with open(file_path+'/'+protein+'/iteration_'+str(n_it)+'/valid_set.txt','w') as ref: 75 | for keys in new_valid.keys(): 76 | ref.write(keys+'\n') 77 | with open(file_path+'/'+protein+'/iteration_'+str(n_it)+'/test_set.txt','w') as ref: 78 | for keys in new_test.keys(): 79 | ref.write(keys+'\n') 80 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "progressive_docking": { 3 | "metrics": ["accuracy", "recall", "precision"], 4 | "epochs": 500, 5 | "early_stopping_monitor": "val_loss", 6 | "early_stopping_min_delta": 0, 7 | "early_stopping_patience": 10, 8 | "early_stopping_mode": "auto", 9 | "time_limit": 36000 10 | } 11 | } -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/setup_slurm_specifications.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script will be called when a new project is created or a new project is loaded. 3 | It will adjust the number of cpus in each slurm script according to the passed n_cpu argument. 4 | It will also change the partition for the slurm scrips. 5 | 6 | v1.0.1 7 | """ 8 | 9 | import os 10 | 11 | def save_slurm_arg(project_name, path, n_cpu, cpu_partition, gpu_partition, custom_headers=None): 12 | # this saves all the slurm arguments as a single line so that it can 13 | # be called on as arguments to sbatch submissions 14 | try: 15 | os.mkdir("slurm_args") 16 | except: # folder already exists. 17 | pass 18 | 19 | with open(f"./slurm_args/{project_name}_slurm_args.txt", "w") as f: 20 | # "#SBATCH h1#SBATCH h2...#SBATCH hn" --> "h1 h2 ... hn" 21 | slurm_args = " ".join(custom_headers.split("#SBATCH")).strip() if custom_headers is not None else "" 22 | slurm_args_cpart = slurm_args 23 | slurm_args_cpart += " --partition=" + cpu_partition if cpu_partition is not None and "partition" not in slurm_args else "" 24 | f.write(slurm_args_cpart + "\n") # 1: write without cpu arg for non-gpu scripts 25 | 26 | slurm_args_cpart += " --cpus-per-task="+str(n_cpu) if n_cpu is not None and "cpus-per-task" not in slurm_args else "" 27 | f.write(slurm_args_cpart + "\n") # 2: write with cpu arg for non-gpu scripts 28 | 29 | slurm_args_gpart = slurm_args 30 | slurm_args_gpart += " --partition=" + gpu_partition if gpu_partition is not None and "partition" not in slurm_args else "" 31 | f.write(slurm_args_gpart + "\n") # 3: write without cpu arg for gpu req scripts 32 | 33 | slurm_args_gpart += " --cpus-per-task="+str(n_cpu) if n_cpu is not None and "cpus-per-task" not in slurm_args else "" 34 | f.write(slurm_args_gpart + "\n") # 4: write with cpu arg for gpu req scripts 35 | 36 | 37 | if __name__ == "__main__": 38 | import argparse 39 | parser = argparse.ArgumentParser() 40 | parser.add_argument("--path", type=str, required=True) 41 | parser.add_argument("--n_cpu", type=int, required=True) 42 | parser.add_argument("--cpu_partition", type=str, required=True) 43 | parser.add_argument("--gpu_partition", type=str, required=True) 44 | parser.add_argument("--custom_headers", type=str, required=True) 45 | parser.add_argument("--project_name", type=str, required=True) 46 | 47 | args = parser.parse_args() 48 | 49 | # Set to None if none were passed 50 | if args.cpu_partition == "": 51 | args.cpu_partition = None 52 | 53 | if args.gpu_partition == "": 54 | args.gpu_partition = None 55 | 56 | if args.custom_headers == "": 57 | args.custom_headers = None 58 | 59 | save_slurm_arg(project_name=args.project_name, path=args.path, n_cpu=args.n_cpu, 60 | cpu_partition=args.cpu_partition, gpu_partition=args.gpu_partition, 61 | custom_headers=args.custom_headers) 62 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/simple_job_predictions.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import glob 3 | import os 4 | 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument('-protein', '--protein', required=True) 7 | parser.add_argument('-file_path', '--file_path', required=True) 8 | parser.add_argument('-n_it', '--n_it', required=True) 9 | parser.add_argument('-mdd', '--morgan_directory', required=True) 10 | 11 | # adding parameter for where to save all the data to: 12 | parser.add_argument('-save', '--save_path', required=False, default=None) 13 | 14 | io_args = parser.parse_args() 15 | 16 | protein = io_args.protein 17 | n_it = int(io_args.n_it) 18 | mdd = io_args.morgan_directory 19 | 20 | DATA_PATH = io_args.file_path # Now == file_path/protein 21 | SAVE_PATH = io_args.save_path 22 | 23 | 24 | # if no save path is provided we just save it in the same location as the data 25 | if SAVE_PATH is None: SAVE_PATH = DATA_PATH 26 | add = mdd 27 | 28 | try: 29 | os.mkdir(SAVE_PATH + '/iteration_' + str(n_it) + '/simple_job_predictions') 30 | except OSError: 31 | pass 32 | 33 | for f in glob.glob(SAVE_PATH + '/iteration_' + str(n_it) + '/simple_job_predictions/*'): 34 | os.remove(f) 35 | 36 | time = '0-10:30' 37 | 38 | # temp = [] 39 | part_files = [] 40 | 41 | for i, f in enumerate(glob.glob(add + '/*.txt')): 42 | part_files.append(f) 43 | 44 | ct = 1 45 | for f in part_files: 46 | with open(SAVE_PATH + '/iteration_' + str(n_it) + '/simple_job_predictions/simple_job_' + str(ct) + '.sh', 47 | 'w') as ref: 48 | ref.write('#!/bin/bash\n') 49 | ref.write('#SBATCH --ntasks=1\n') 50 | ref.write('#SBATCH --gres=gpu:1\n') 51 | ref.write('#SBATCH --cpus-per-task=1\n') 52 | ref.write('#SBATCH --job-name=phase_5\n') 53 | ref.write('#SBATCH --mem=0 # memory per node\n') 54 | ref.write('#SBATCH --time=' + time + ' # time (DD-HH:MM)\n') 55 | ref.write("#SBATCH --output=slurm-phase_5-%x.%j.out\n") 56 | ref.write("#SBATCH --error=slurm-phase_5-%x.%j.err\n") 57 | ref.write('\n') 58 | ref.write("echo Partition: $SLURM_JOB_PARTITION \n") 59 | 60 | cwd = os.getcwd() 61 | ref.write('cd {}\n'.format(cwd)) 62 | ref.write('source ~/.bashrc\n') 63 | ref.write('source activation_script.sh\n') 64 | ref.write('python -u ' + 'Prediction_morgan_1024.py' + ' ' + '-fn' + ' ' + f.split('/')[ 65 | -1] + ' ' + '-protein' + ' ' + protein + ' ' + '-it' + ' ' + str(n_it) + ' ' + '-mdd' + ' ' + str( 66 | mdd) + ' ' + '-file_path' + ' ' + SAVE_PATH + '\n') 67 | ref.write("\n echo complete") 68 | 69 | ct += 1 70 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/slurm_file_manager.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import shutil 4 | import argparse 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument("--phase", type=int) 8 | parser.add_argument("--iteration", type=int) 9 | parser.add_argument("--project_path", type=str) 10 | parser.add_argument("--script_path", type=str, required=False) 11 | 12 | args = parser.parse_args() 13 | 14 | # Make a directory for all of the slurm files 15 | try: 16 | os.mkdir("slurm_out_files") 17 | except OSError: 18 | pass 19 | 20 | # Create a directory for the project 21 | project_name = os.path.basename(args.project_path) 22 | try: 23 | os.mkdir("slurm_out_files/{}".format(project_name)) 24 | except OSError: 25 | pass 26 | 27 | # Set the phase to "a" if 0 is passed 28 | if args.phase == 0: 29 | args.phase = "final" 30 | 31 | # Create a phase directory 32 | phase_dir = "slurm_out_files/{}/slurm_itr_{}_phase_{}".format(project_name, args.iteration, args.phase) 33 | project_path = args.project_path 34 | 35 | print("Target Project:", project_name) 36 | print("Iteration:", args.iteration) 37 | print("Phase:", args.phase) 38 | 39 | 40 | def make_the_move(f_err, f_out): 41 | # Moves slurm files that belong to the project "project_name" 42 | 43 | # move all slurm files 44 | for err, out in zip(f_err, f_out): 45 | 46 | # Read the file and make sure it belongs to the desired project 47 | with open(out, "r") as out_file: 48 | for line in out_file: 49 | # Move file if the project path is embedded within its own path (meaning it is in the project dir) 50 | # OR 51 | # If the out file has the line Project Name: project_name then move it 52 | if project_path in out or "Project Name:" in line and project_name in line: 53 | # Move the error files 54 | try: 55 | shutil.move(err, "{phase_dir}/{err}".format(phase_dir=phase_dir, err=os.path.basename(err))) 56 | except IOError: 57 | print("Error on:", err) 58 | pass 59 | 60 | # Move the out files 61 | try: 62 | shutil.move(out, "{phase_dir}/{out}".format(phase_dir=phase_dir, out=os.path.basename(out))) 63 | except IOError: 64 | print("Error on:", out) 65 | pass 66 | 67 | break 68 | 69 | 70 | # Running after phase 1 71 | if args.phase == 1: 72 | # Create a directory for all of the slurm files 73 | try: 74 | os.mkdir(phase_dir) 75 | except OSError: 76 | print("The directory {} already exists.".format(phase_dir)) 77 | 78 | # Gather the slurm files 79 | slurm_err = glob.glob("slurm-phase_1*.err") 80 | slurm_out = glob.glob("slurm-phase_1*.out") 81 | 82 | make_the_move(slurm_err, slurm_out) 83 | 84 | elif args.phase == 2: 85 | # Create a directory for all of the slurm files 86 | try: 87 | os.mkdir(phase_dir) 88 | except OSError: 89 | print("The directory {} already exists.".format(phase_dir)) 90 | 91 | # Gather the slurm files in the script dir 92 | slurm_err = glob.glob("slurm-phase_2*.err") 93 | slurm_out = glob.glob("slurm-phase_2*.out") 94 | 95 | # Gather the project dir 96 | slurm_err += glob.glob("{}/iteration_{}/slurm-phase_2*.err".format(args.project_path, args.iteration)) 97 | slurm_out += glob.glob("{}/iteration_{}/slurm-phase_2*.out".format(args.project_path, args.iteration)) 98 | 99 | # Grab the slurm files in the chunks 100 | slurm_err += glob.glob("{}/iteration_{}/chunk*/*/slurm*.err".format(args.project_path, args.iteration)) 101 | slurm_out += glob.glob("{}/iteration_{}/chunk*/*/slurm*.out".format(args.project_path, args.iteration)) 102 | 103 | make_the_move(slurm_err, slurm_out) 104 | 105 | elif args.phase == 3: 106 | # Create a directory for all of the slurm files 107 | try: 108 | os.mkdir(phase_dir) 109 | except OSError: 110 | print("The directory {} already exists.".format(phase_dir)) 111 | 112 | # Gather the slurm files in the script dir 113 | slurm_err = glob.glob("slurm-phase_3*.err") 114 | slurm_out = glob.glob("slurm-phase_3*.out") 115 | 116 | # Gather the project dir 117 | slurm_err += glob.glob("{}/iteration_{}/slurm-phase_3*.err".format(args.project_path, args.iteration)) 118 | slurm_out += glob.glob("{}/iteration_{}/slurm-phase_3*.out".format(args.project_path, args.iteration)) 119 | 120 | # Grab the slurm files in the res 121 | slurm_err += glob.glob("{}/iteration_{}/res/*/slurm-phase_3*.err".format(args.project_path, args.iteration)) 122 | slurm_out += glob.glob("{}/iteration_{}/res/*/slurm-phase_3*.out".format(args.project_path, args.iteration)) 123 | 124 | make_the_move(slurm_err, slurm_out) 125 | 126 | 127 | elif args.phase == 4: 128 | # Create a directory for all of the slurm files 129 | try: 130 | os.mkdir(phase_dir) 131 | except OSError: 132 | print("The directory {} already exists.".format(phase_dir)) 133 | 134 | # Gather the slurm files in the script dir 135 | slurm_err = glob.glob("slurm-phase_4*.err") 136 | slurm_out = glob.glob("slurm-phase_4*.out") 137 | 138 | # Grab the slurm files in the simple_jobs 139 | slurm_err += glob.glob("{}/iteration_{}/simple*/slurm-phase_4*.err".format(args.project_path, args.iteration)) 140 | slurm_out += glob.glob("{}/iteration_{}/simple*/slurm-phase_4*.out".format(args.project_path, args.iteration)) 141 | 142 | make_the_move(slurm_err, slurm_out) 143 | 144 | 145 | elif args.phase == 5: 146 | # Create a directory for all of the slurm files 147 | try: 148 | os.mkdir(phase_dir) 149 | except OSError: 150 | print("The directory {} already exists.".format(phase_dir)) 151 | 152 | # Gather the slurm files in the script dir 153 | slurm_err = glob.glob("slurm-phase_5*.err") 154 | slurm_out = glob.glob("slurm-phase_5*.out") 155 | 156 | # Grab the slurm files in the simple_jobs 157 | slurm_err += glob.glob("{}/iteration_{}/simple*predictions/slurm-phase_5*.err".format(args.project_path, args.iteration)) 158 | slurm_out += glob.glob("{}/iteration_{}/simple*predictions/slurm-phase_5*.out".format(args.project_path, args.iteration)) 159 | 160 | # Grab the slurm files in the GUI dir from smile searching 161 | slurm_err += glob.glob("GUI/slurm-*.err".format(args.script_path, args.iteration)) 162 | slurm_out += glob.glob("GUI/slurm-*.out".format(args.script_path, args.iteration)) 163 | 164 | make_the_move(slurm_err, slurm_out) 165 | 166 | elif args.phase == "final": 167 | # This means everything is finished 168 | # Create a directory for all of the slurm files 169 | try: 170 | os.mkdir(phase_dir) 171 | except OSError: 172 | print("The directory {} already exists.".format(phase_dir)) 173 | 174 | # Gather the slurm files in the script dir 175 | slurm_err = glob.glob("slurm-phase_*.err") 176 | slurm_out = glob.glob("slurm-phase_*.out") 177 | 178 | # Grab the files in the GUI directory 179 | slurm_err += glob.glob("GUI/slurm-*.err".format(args.script_path)) 180 | slurm_out += glob.glob("GUI/slurm-*.out".format(args.script_path)) 181 | 182 | make_the_move(slurm_err, slurm_out) 183 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/split_chunks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --cpus-per-task=24 3 | #SBATCH --ntasks=1 4 | #SBATCH --job-name=split_chunks 5 | #SBATCH --output=slurm-phase_2-%x.%j.out 6 | #SBATCH --error=slurm-phase_2-%x.%j.err 7 | 8 | input=$1 9 | extension=$2 10 | output=$3 11 | chunk_n_lines=$4 12 | script_path=$5 13 | project_name=$6 14 | 15 | slurm_args_no_cpu=$(sed -n '1p' ${script_path}/slurm_args/${project_name}_slurm_args.txt) 16 | 17 | echo "Working..." 18 | 19 | mkdir -p chunks_smi 20 | 21 | split -a 4 -d -l $chunk_n_lines --additional-suffix=${extension} ${input} chunks_smi/${output}_set_part 22 | 23 | cd chunks_smi 24 | 25 | for x in ./$output*${extension}; do 26 | mkdir "${x%.*}" && mv "$x" "${x%.*}" 27 | done 28 | 29 | # Start preparing ligands 30 | cd .. 31 | return=$PWD 32 | echo Preparing Ligands 33 | for i in $(ls -d chunks_smi/$output*); do cd $i; sbatch $slurm_args_no_cpu $script_path/prepare_ligands_ad.sh $script_path; cd $return; done 34 | echo "Done!" -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/split_sdf.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from openbabel import pybel 3 | 4 | for mol in pybel.readfile("sdf", sys.argv[1]): 5 | mol.write("sdf", "%s.sdf" % mol.title,overwrite=True) 6 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/util_functions.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def lerp(a, b, c): 4 | """ 5 | Linearly interpolates between a and b at point c. Where lerp(a, b, 0) = a and lerp(a, b, 1) = b. 6 | """ 7 | assert 0 <= c <= 1, "c must be between 0 and 1" 8 | return (b*c) + ((1-c) * a) 9 | 10 | 11 | def seconds_to_datetime(seconds): 12 | """ 13 | Updates the information from seconds to 00:00:00 time 14 | 15 | :param seconds: 16 | :return: 17 | """ 18 | m, s = divmod(seconds, 60) 19 | h, m = divmod(m, 60) 20 | h, m, s = int(h), int(m), int(s) 21 | 22 | if h < 0: 23 | h = m = s = 0 24 | 25 | if h < 10: 26 | h = "0" + str(h) 27 | else: 28 | h = str(h) 29 | 30 | if m < 10: 31 | m = "0" + str(m) 32 | else: 33 | m = str(m) 34 | 35 | if s < 10: 36 | s = "0" + str(s) 37 | else: 38 | s = str(s) 39 | return h, m, s 40 | 41 | 42 | def datetime_string_to_seconds(dt): 43 | """ 44 | Converts a datetime string like "00-04:00" into seconds. 45 | datetime_string_to_seconds("00-04:00") = 14400 46 | :param dt: 47 | :return: 48 | """ 49 | days = int(dt[0:2]) 50 | hours = int(dt[3:5]) 51 | minutes = int(dt[6:8]) 52 | seconds = 60 * minutes + 60*60 * hours + 60*60*60 * days 53 | return seconds 54 | -------------------------------------------------------------------------------- /Docking/ProgressiveDocking/venv_sanity_check.py: -------------------------------------------------------------------------------- 1 | import tensorflow 2 | import numpy 3 | import pandas 4 | with open("test_check.txt", 'w') as file: 5 | file.write("Success!") 6 | -------------------------------------------------------------------------------- /Docking/__init__.py: -------------------------------------------------------------------------------- 1 | # from Docking import GUI 2 | # from Docking import ProgressiveDocking 3 | # from Docking import ML 4 | # from Docking.ML import DDModel, DDMetrics 5 | import GUI 6 | import ML 7 | import ProgressiveDocking 8 | -------------------------------------------------------------------------------- /GUI/README.md: -------------------------------------------------------------------------------- 1 | # DD_GUI 2 | 3 | GUI for Deep Docking. 4 | 5 | ## Requirements: 6 | * Node Package Manager (NPM) 7 | * Python stuff: 8 | * 9 | 10 | ## Installation: 11 | Make sure you have NPM and Node.js installed and all the other python library requirements above. 12 | To get set up all you need to run is `npm install` in the terminal window. 13 | 14 | ## How to run: 15 | Open terminal and navigate to the directory, then enter the following to host the server locally: 16 | `npm run start-lin` or `start-win` for windows. 17 | Or just run the `server.py` file directly. -------------------------------------------------------------------------------- /GUI/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dd_gui", 3 | "version": "2.0.0", 4 | "description": "Deep Docking GUI", 5 | "main": "server.js", 6 | "dependencies": { 7 | "chai": "^4.2.0", 8 | "chart.js": "^2.9.4", 9 | "jquery": "^3.5.1", 10 | "kekule": "^0.9.3", 11 | "lodash": "^4.17.20", 12 | "mocha": "^8.2.1", 13 | "react": "^17.0.1", 14 | "react-dom": "^17.0.1", 15 | "react-router-dom": "^5.2.0", 16 | "style-loader": "^2.0.0" 17 | }, 18 | "devDependencies": { 19 | "@babel/core": "^7.12.9", 20 | "@babel/preset-env": "^7.12.7", 21 | "babel": "^6.23.0", 22 | "babel-core": "^6.26.3", 23 | "babel-loader": "^8.2.1", 24 | "babel-preset-es2015": "^6.24.1", 25 | "babel-preset-react": "^6.24.1", 26 | "babel-preset-stage-2": "^6.24.1", 27 | "body-parser": "^1.19.0", 28 | "cors": "^2.8.5", 29 | "css-loader": "^5.0.1", 30 | "errorhandler": "^1.5.1", 31 | "express": "^4.17.1", 32 | "file-loader": "^6.2.0", 33 | "morgan": "^1.10.0", 34 | "url-loader": "^4.1.1", 35 | "webpack": "^4.44.1", 36 | "webpack-cli": "^4.2.0" 37 | }, 38 | "scripts": { 39 | "test": "mocha", 40 | "start-lin": "export FLASK_APP=server.py && export FLASK_ENV=local_host && flask run", 41 | "start-win": "conda activate DeepDockingLocal && set FLASK_APP=server.py && set FLASK_ENV=local_host && flask run", 42 | "start-dev": "conda activate DeepDockingLocal && set FLASK_APP=server.py && set FLASK_ENV=development && set FLASK_DEBUG=1 && flask run", 43 | "build": "webpack --progress --config webpack.config.js", 44 | "dev-build": "webpack -d --progress --config webpack.config.js" 45 | }, 46 | "repository": { 47 | "type": "git", 48 | "url": "git+https://github.com/jamesgleave/Deep-Docking.git" 49 | }, 50 | "author": "Jean Charle Yaacoub and James Gleave", 51 | "license": "ISC", 52 | "bugs": { 53 | "url": "https://github.com/jamesgleave/Deep-Docking/issues" 54 | }, 55 | "homepage": "https://github.com/jamesgleave/Deep-Docking" 56 | } 57 | -------------------------------------------------------------------------------- /GUI/public/img/Indicator_light_g.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /GUI/public/img/Indicator_light_r.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /GUI/public/img/Indicator_light_y.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /GUI/public/img/close_button.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /GUI/public/img/download_icon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /GUI/public/img/left_switch.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /GUI/public/img/loading_svg.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /GUI/public/img/loading_svg_ripple.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /GUI/public/img/reload_icon.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /GUI/public/img/right_switch.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /GUI/public/webp-img/00b42403057e60520cb497d92556b982.png: -------------------------------------------------------------------------------- 1 | export default "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAACGFjVEwAAAABAAAAALQt6aAAAAAaZmNUTAAAAAAAAAAgAAAAIAAAAAAAAAAAAKcD6AEAxIjTvQAAAK9JREFUWIXtlUEKwzAMBEelD/PT9LTNy9RLFUzTQNK0MgUP+CKINawjGyZfJCLC3UNSHP3mfrXhsiwASMLMAHB3JEVrza7sf0jA3QN4u86m8ZFAImmVkbTWUuRnEntCfa1UYk+MZzLDJPI4+nRGsEnhVtnd3Te1EoFuIpBETklF71WAlzui/F/oL6whI5lTwKhJ6I+hF7j0GJ3BzCyjt3y1KgUAWmuV7SaTyWQy+RMeiwquZStdcE4AAAAASUVORK5CYII=" -------------------------------------------------------------------------------- /GUI/public/webp-img/0638bec8443dd6e3385084884ed644a2.png: -------------------------------------------------------------------------------- 1 | export default "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAUIAAAA0CAMAAADIf8HGAAAC1lBMVEUAAAD///////////////8AAAD///8RERH////8/Pz////+/v7///8tLS0JCQn///8fHx/////z8/NaWlr+/v4AAAD6+voeHh4yMjL+/v7+/v7////+/v4kJCRhYWH+/v7o6Oj7+/sAAADT09Ps7OwAKJaiogCiAAAiIiK2tra3t7f29vYgICCnp6cFBQXBwcEjIyP8/Pz8/Pz8/PwAAAD39/cvLy8jIyPt7e0AAAAAAAAJCQn5+fn5+fkjIyMAAAAAAACxAACxsQD7+/sFMKAiIiLFxcX+/v5ycnIjIyP5+fmMjIz9/f0zMzPQ0NABAQEKCgoAAAD///91dXWRkZH+/v40NDQAAAA/Pz9VVVUCAgImJiYgICAAAAC8vADp6ekFBQUAAAAAAAAHBwcqKioaGho6OjoAAAAANbi/AAAAAAAAAAAVFRVoaGgAAABRUVEAAAAAAAAAAAAwMDAAAAAeHh4KCgoAAAAAAAAAAAAAAAAMDAwAAAAAAAAAAAAEBAQAAAAfHx8AAAAAAAAODg4AAABeXl4AAAAAAAAAAAADAwPGxgAAAABycnIAAAAAAAAhISEFBQUAAAAAAAAAOMEAAADIAAACAgLJyQAAAAAAAAAKCgoAAAAAAAAAAAAAPcsAAAAAAADQ0AAAAAAAAABwcHAAAAAAAAAAAAAnJyfOAAA/Pz8REREAAAAAP88AAAAlJSXLygEAAABjY2MAAAAJCQPt7QAZGRSYmAIAAAAAAP8iIiIpKSksLCwwMDAxMTEzMzM6Ojo/Pz9JSQhDQ0PvAAAATO0ATO5KSkpMTEz/AAAAU/9UVFT/Dg4QXv/tHCRcXFwVYf9kZGT/KSm9STJsbGy+TjdxcXHAUz51dXUisUzEX0uEhITJaljPe2ulpQTVjX6esOzOyAX/mZn/tLTry8T/x8fx3Nf//wD/2Nj25+Tr8v/v9P/68/H8+Pj3+v//+/v///+Dl5zLAAAAuXRSTlMAAQIDBAQFBQYHCgsMDg4PEBERERMWFxkZHB0gISIiIyMkJCkrLCwsLC0uLi8yMjIzNDU2PT9AQEBBQktMUFBVWVtbW11gYmNlZmZpa2xtcXN4eHl5enp8fH6AgoKDhIWHiIqKjI2NjpCQkJGSlJWVmJmamp2dnp+goaOlpqeoqaqrra6vsLGztLW4ubq7vL6/wcHDxMTFxsjKzM3Nzs/P0NHR0tTV1tja2t3d3d7m6Orr7vH5+vv7/i6BvvAAAAsVSURBVHja7duNW1PXHQfwmxcjVHzBRhGIpjNV6xpFbYvWGauu0rSplCiLUMCpE/AFisoEFUUiVgRtfME4gksYRBFR7KpVyrIGZme7xrpbZ/fSdp2rUgq6bvIf7Jz7/pbce0NaZOv3ecy9CUnkfnJ+55x7ckG++uIbRGa6fiMzSOQzZf1xkPVTIvy2/SB/ByG3kl70+SeyDdGuru+IUKHSgKgUvB883/D+dZD3G56PDF39RZAtYRLek2+Idn0mD5HNopAuGD0+UadLHB/NE7xwnciFyBjObW9vPz8mTMIwDFFgKAuR8doRhpQUwwghLtYGjzr+F7B1HI2P1uBRElXcAPBu3r8PDRt4tRznC8Mw4+q55HALOQxDjFAOIqkUM1Yzfc/bb++Zrhkbw26LihHxahxtBOMHmsS2MzCz9TosCVrsSch6rIq//BLefrCeK/juT+k7E30wE0Mezsk98HZ/Jrxl8sEt9SSfL6KGKGEoGZEsS3OB2XyoufmQGexEMw0VMbajBsCjNhy1MXA1ukYY75tE2gq0WDs8Du1u3ryJlfJxnmAMwjhyo9HIPvzOCewXGM+3j6LuhEsIDT+/FwahVET8ZTHmfWdOnnTX17tPnjxz1BzDEmxrqTGo1YaaljaGoUbnYudUItYMG7BG+J9+jLAhhCDiM44cPtLIOnw/xzB74dmFcgl9R+bCzYuMB+99IccQZRh+1iWZcGyWy3uq2eV0uppPeV3rxjI6SFuLy+WtMRhqvC5Xi43qKqP0y3/CynJ9FEV4/X7/H3mEUJDZF/qMw5HhbMIJfj/L8OzIdcUihD4iDFAMkUmIfCPHkEUopSHiL1NZ7E7H4Vqns/awo9ZuUVPvp4y3O2tra+traurBxmmPJ0YNRWxucxsrzbmxCrKQScLjXEFGX/iEQCtEJnT6GfdGFiPJ50aGnhcKEQJENqEsQ5RtKI6IN8IfWuyHHdX2oiJ7teOw3TKdboYqU7EDxOmEt8UmFTkgJ9RWc1KbAOXXf0ATMocTQjCGHpsF+sIJnaxKzvwQZJno1JpTyAKtUJYhl1AUEWtShpLq6gOHtqWbTOnbDh2ori4x0DPEKFMeiZRniqK7wsq9WFJfgEkFe5U6DTWp+Vc/HJIvTAkpyB+ROXW8/1HAWCiXkN8XyjNE+YahEbGWluR1VO49UPSMUvlM0YG9lQ5vkkqJMAwrsTAEAeEuPE8+AuaEjzwJdzFCbGp9ExuSGVNrUpDqCwHeE/zfni04aT+8uTRJHmGwSDYUIgyJiHkkvV5RXl6VZ0AQQ15VeXnF60kamlBtsO/GYjeoGYSleDA3/B5OCAzxEzwhQaov9PnixA7F/OmnZgQ5jd2GHJElRqohKmwYfHDGCvnxwnWlO3eVZEyfnlGya2fpusLH6UJWG8oqca3KMtpQo9uMhyCkdvFlhoYG1jIDrGDyX9gZKKFUQzSoYVeI4WT0vJRtpVt37iku3rNza+m2lHmj6eHEUFJOaG0uLzGoKMJNeAhCalcwy2Pof4NHKNEwKGEwRPxlWkvB5tc273Y4doNNgUXLmNSU7YA8GRnwdkcZOanR6NbiIQip3W8vAyeUZoiGMBREJKbWtvxNG/Pz167Nz9+4aaONObVOXw10MkymDLBZnT6CaoU5eAhCavfhjhTDkIRCiMQJnsWWm521es2a1VnZuTYL8wRPn/JqToYpKsqUkfNqil5BzQvTVmIhCMFeWoIaGRKGX0skDGLIQyTqVZeSnppqy8mxpaamp+iUCMswE85mokyZtCA4O7GkWWH02KRGb7WmWWKJn05ddbCu7uCqqQ+r4d++HhghF5Hs8xLnJDxmXrnS/FjCnEQle7FLtwCbD0Yt0DFWcFSzwYz6Zat1Blzs0s+wWl+eTQw1S5qu9IJcaVryUBr++x8ihqgEQ9YMhyKJVo6el5o6b7QyWiVlyRVRgsaXmGatgid+jiprWiIx717yVm/vnfbLt3t73xqahpIImQ2RiRKr18cq5Xx9QlYzo4ynNoEW+E5Hxztg0xROLScPtiEqzZBGZJEolQpZvwxWzXjIMl4Fq/g2TnhlldgbzPLN4gr6WYa8k5iJoivdAzWUSkghRvojPtiLE96G2zpRQe75bbK/szOZte7CMWSvbAWLyHc0IQ1R6YY4YqQJ6yDdnY6OO3DbxD5y7rHPQhYyWiGUS+5kC8LXxXEIJSwrxIk9JZQhKscQIkaasAkbjTs6etmE+OJWHKcNLkRmsSo4udPPrmOE6877KHwCH06ceEMNYSiPECB+K4Xc+957vZxCjhMSZLZBf6c/2c8V5LVCSYRSaj24ISrXcIBiwxZZra+MYjyADSe9d7A6Zg0ncSEFsRr2+zu54zG/jn2IKFCclO4yqOF3TThzC1h9eJbxgNRJDVdQoB8UGpEBznBxoDgpq7DBDFG5hkLTPYlzw8lrXqOydib5KJxa374sOrUGDrz5TKf4nFBwWJI9Ioc0jAChKna8pPWCcc1eDx33ZMpQ0gkeX1BSlmKCSyM2x/7LV6KEooa8d1Bqc/fFSzF8tnQrM8/Rtbyqrqmp7iFdZmAb/lPAcMCESm12W0uZJMPJaxh5GhmSETBE5RqScgpKEFy90Fw2nu4Ps35N5UXW/zWfWcjPIf8rhmESqrX4+oJSm3UKXLVQWxhLnywXv0sli03YeGi+y7P2FY9n0Y4hS8g3ROUa4oLjC2zwwiwg6IFrVytmMxa8iruo2DiEP0dc7kVPN7rH5biHLCHPMCxCIOhy2WKVQNAFr1pgCSJ5v6OSwSOsFyCctuEEyIZp0hbMFArFw2WIyjXEx+DaykrXMq02ywmvWmALInm/pyKJcHHrtQDItdbFEn59sESp17IfqrAOriEq1xAjLATXKpQfW5Z1DF60wBFEij6kkskn9ABCz7gcui9c3BogImaogF/MvHnx4gzyTOcyxMtubzcOqmE4hIj6qRXbt2/f4XCA2+1cQaToIyqZ4q1w2olAd8+tQOBWT3egdVrIJe9EBSB0ud1JxCNll8+DU5RzV6/SF8QNhiEq1xAhDMHFClvgFQsr5nC/Oin6mIqEVrjhWuBuf9+tG3393YFrG4KvnygNZTMg4TGnkyAcdX5f+4+Q05dACgezPwyPEBri13dYeIJI4Z+oSGiFJ0AFd/f39PT33AgETrCeP4ZBGG0+6kpSKpQzwPhPEGZnL/TZuadzRsQYqdM5qYaoXEOEMoTXdAgIIoV/plIojfBGdz8mGGhlrlGN8Y0hCRVaW72jKikpyZxbVVVFEJ6diJz97UTuosJS3nIqZ/ULm+7PPeKLgGH3gAihYU6OkCBifoOKWbyQIWGATwjb4BjERxZxgWtvRUWj1+s5XFFR8APsQRu4qtrGmXkasUUFIxJi5RYSAkCfLwLt8JPuEITBDRGGoaBg8ARthXcf9PWArpBRyOAAgSAZvb16B50V+B9QwT/PmXD1HH9hZilHkG0ICI9I+0pKqmH4hMBwtixBQLhrmNe9aH6je3K+mzmcPOi7davvwV3GcMI+QNVLmeT1dpuz0udhU+tlf8CzLGQr5C8XRq4VkoaoXEPmIo3M04SZjZ5fnfLUn/F4XC2e+eSkpjXQ3Q0nNX09jEmNj+0YtSD9Z/n5L8Hvn016/HPD/8Ip86P9In0hb90xUn0hYTgwQtlZCSZ1eH6ZPyzk1JrbeBT61Kw1BiV9djfpEr699PGkwRmRScMQhMEMB/blExXGg9JO8Eb/eIWBbvWPni4kh394mf//feAyQ6voMoPqKfpSO+SvIHB7Gu6YvyeUGsX3BEMg/wW9bXSroB+3mgAAAABJRU5ErkJggg==" -------------------------------------------------------------------------------- /GUI/public/webp-img/1f0710a4a9c764c4801a6b0bbd1f6744.png: -------------------------------------------------------------------------------- 1 | export default "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAACGFjVEwAAAABAAAAALQt6aAAAAAaZmNUTAAAAAAAAAAgAAAAIAAAAAAAAAAAAKcD6AEAxIjTvQAAAKBJREFUWIXtlDEOAyEMBE2Uf2N+trxs0wCCQLqwSeGRTuLuCg9ry2ZBEARBEPwbAAiAqnrPg4Cq9g5JmhnbUZLC49OPWqui/ipQShlneSvm+E3YhmUIAYyb55xv114FUkqpSRDAeJfTdoBsD2z0WVAuo42egmoXHHH3TYITUgkAJNnbQncf364LAWAXOT3urknkXaSn0JO4LjBL/HQ4v80LekuprjId4lUAAAAASUVORK5CYII=" -------------------------------------------------------------------------------- /GUI/public/webp-img/2a9beabef112cd5d9b57edafe04ecd82.png: -------------------------------------------------------------------------------- 1 | export default "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAACGFjVEwAAAABAAAAALQt6aAAAAAaZmNUTAAAAAAAAAAgAAAAIAAAAAAAAAAAAKcD6AEAxIjTvQAAALVJREFUWIXtlVEOwyAMQ51tB2tvlt7MnMz7AQZbpX2sSyctT0IICckB7AAkSZIkSfJvqNLWl2jxbdsiJV8LACCS59xAKQUAQDJS9oG7C4BQrQAAtyhxSTKzvm5eCH0Cd5/ms5hMGKtcUzD2gWtkAWbmy7JgXVd7v/tg6rVPpz8cSSI5iUhSi9+e+EcxlKRSSm8sLWYke9ttcZMEG3N4BGNj2Rvu/l3Ht5+NpMZinp8hjD0P/Dx3JrWfhQN9eBkAAAAASUVORK5CYII=" -------------------------------------------------------------------------------- /GUI/public/webp-img/c2e5bc8f9058ad350eed2e2559c63174.png: -------------------------------------------------------------------------------- 1 | export default "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAACGFjVEwAAAABAAAAALQt6aAAAAAaZmNUTAAAAAAAAAAgAAAAIAAAAAAAAAAAAKcD6AEAxIjTvQAAAJ1JREFUWIXtlEEOAyEMA51qH8bT8jT/zHvYRs2iHroHglRlJA4ggR0TAJoHkJS7S5K2iQMQALm7SIrkWkOz8DyWJhLCUSXJW/WZZeL58KWCmah0S7O9RSPueqL6Kr3Xt0V3r9L/GIguJwngSqK0B+Lu8yhvwvmnKxUH7ilseQXZQFX8R56YmUX0ZmblBgBgjPHTxkioymjTNE3T/C8nhFi5FLM7xWoAAAAASUVORK5CYII=" -------------------------------------------------------------------------------- /GUI/public/webp-img/d3698da8e2bda9d79e1bb514e2d600fa.png: -------------------------------------------------------------------------------- 1 | export default "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAACGFjVEwAAAABAAAAALQt6aAAAAAaZmNUTAAAAAAAAAAgAAAAIAAAAAAAAAAAAKcD6AEAxIjTvQAAAHVJREFUWIXtlcEKgDAMQzvx/395ngSRRVOWToS80w6lCW3DIswP6JXNt69NtKTwW32anRQuY7SCq3gD7x6FJu/Nl01jJK6qhaAbYJAc5EwMJRNgDcjjlzWwFGa0shg+TQAJSGOJdsuISO4i+xfIhI0xxhhzcgA3hhkG91FoQQAAAABJRU5ErkJggg==" -------------------------------------------------------------------------------- /GUI/src/__init__.py: -------------------------------------------------------------------------------- 1 | from src.backend.auto_ssh import SSH 2 | from src.backend.backend import Backend 3 | -------------------------------------------------------------------------------- /GUI/src/backend/DataHistory.py: -------------------------------------------------------------------------------- 1 | """ 2 | James Gleave 3 | v1.2.0 4 | 5 | A class used to store training history 6 | 7 | # Usage... 8 | # hist = DataHistory(data) 9 | # hist.averages -> {'iteration_1': {'loss': [0.7453892707824707, ...], 'sparse_categorical_accuracy': [...], ...}, ...} 10 | # hist.current_phase -> integer from 0 to 5 11 | # hist.plots -> {'iteration_1': [{'loss': [...], 'sparse_categorical_accuracy': [...], ...}, {...}, ...]} 12 | # hist.molecules_remaining -> {'iteration_1': {'true': X, 'estimate': Y, 'error': Z}, 'iteration_2': ...} 13 | # hist.current_iteration -> returns the current iteration 14 | """ 15 | 16 | 17 | class DataHistory: 18 | def __init__(self, data): 19 | self.history = data 20 | 21 | # Load the iterations and sort the list 22 | self.iterations = list(data.keys()) 23 | try: 24 | self.iterations.sort(key=lambda x: int(x.split("_")[1])) 25 | except KeyError: 26 | self.iterations.sort() 27 | 28 | self.plots = self.__generate_plot_data() 29 | self.averages = self.__calc_averages() 30 | 31 | self.current_iteration = self.__get_current_iteration() # TODO: handle error when no project is loaded... 32 | self.current_phase = data[self.current_iteration]['itr']['current_phase'] 33 | self.current_phase_eta = data[self.current_iteration]['itr']['phase_eta'] 34 | self.molecules_remaining = dict.fromkeys(self.iterations) 35 | self.is_idle = data[self.current_iteration]['itr']['is_idle'] 36 | 37 | # Store the crash report (hopefully no crashes) 38 | self.crash_report = self.history[self.current_iteration]['itr']['crash_report'] 39 | 40 | # The percent complete for the iteration 41 | self.itr_percent = self.history[self.current_iteration]['itr']['itr_percent'] 42 | self.full_percent = self.history[self.current_iteration]['itr']['full_percent'] 43 | 44 | # Are we running the final phase? 45 | self.final_phase = self.history[self.current_iteration]['itr']['final_phase'] 46 | 47 | # The number of jobs pending and running 48 | self.pending = self.history[self.current_iteration]['itr']['pending_info'] 49 | 50 | try: 51 | for key in self.molecules_remaining: 52 | self.molecules_remaining[key] = data[key]['itr']['molecules_remaining'] 53 | except TypeError: 54 | pass 55 | 56 | def get_model(self, iteration, model_number, averages=False): 57 | if averages: 58 | return self.averages[iteration] 59 | else: 60 | return self.history[self.iterations[iteration]]['models'][model_number] 61 | 62 | def __get_current_iteration(self): 63 | for key in self.history: 64 | try: 65 | if self.history[key]['itr']['in_progress']: 66 | return key 67 | except TypeError: 68 | pass 69 | return 'iteration_1' 70 | 71 | def __calc_averages(self): 72 | averages = {} 73 | try: 74 | keys = list(self.plots['iteration_1'][0].keys()) 75 | except (IndexError, KeyError): 76 | return averages 77 | for itr in self.plots: 78 | averages[itr] = {} 79 | for metric in keys: 80 | metric_list = [] 81 | for model in self.plots[itr]: 82 | metric_list.append(model[metric]) 83 | averages[itr][metric] = self.__average_cols(metric_list) 84 | return averages 85 | 86 | def __generate_plot_data(self): 87 | # Loop through iterations... 88 | plots = {} 89 | for iteration in self.history: 90 | plots[iteration] = [] # Create a list to store models from each iteration 91 | try: 92 | # Loop through models... 93 | for model_number, model in enumerate(self.history[iteration]['models']): 94 | # Create a dict of lists to store the plot values. 95 | # If the model has not finished an epoch yet, then break 96 | try: 97 | model_data = {} 98 | for key in model["epoch_1"]: 99 | # Reformat the keys to make them easy to display dynamically 100 | model_data[self.reformat(key)] = [] 101 | except KeyError: 102 | break 103 | 104 | # Loop through epochs... 105 | for epoch in model: 106 | # Loop through each metric 107 | for metric in model[epoch]: 108 | model_data[self.reformat(metric)].append(model[epoch][metric]) 109 | plots[iteration].append(model_data) 110 | except TypeError: 111 | pass 112 | return plots 113 | 114 | def reformat(self, key): 115 | new_key = "" 116 | if "_" in key: 117 | tokens = [s.capitalize() for s in key.replace("val", "Validation").split("_")] 118 | for token in tokens: 119 | new_key += token + " " 120 | if new_key[-1] == " ": 121 | new_key = new_key[0:-1] 122 | else: 123 | new_key = key.capitalize() 124 | 125 | # Simplify words 126 | if "Sparse Categorical " in new_key: 127 | new_key = new_key.replace("Sparse Categorical ", "") 128 | 129 | if "acc" == key or "val_acc" == key: 130 | new_key = new_key.replace("Acc", "Accuracy") 131 | 132 | return new_key 133 | 134 | def __repr__(self): 135 | print("Iterations:", self.iterations) 136 | print("Current Iteration:", self.current_iteration) 137 | print("Current Phase:", self.current_phase) 138 | print("Molecules Remaining:", self.molecules_remaining) 139 | print("Iteration Percent:", self.itr_percent) 140 | print("Full Percent:", self.full_percent) 141 | 142 | return "" 143 | 144 | @staticmethod 145 | def __average_cols(arr): 146 | if len(arr) == 0: 147 | return [] 148 | 149 | average = [] 150 | max_len = max([len(length) for length in arr]) 151 | for i in range(max_len): 152 | col_average = [] 153 | for row in arr: 154 | if i < len(row): 155 | col_average.append(row[i]) 156 | average.append(sum(col_average)/len(col_average)) 157 | return average 158 | 159 | 160 | 161 | -------------------------------------------------------------------------------- /GUI/src/backend/EmailBot.py: -------------------------------------------------------------------------------- 1 | import smtplib 2 | from email.mime.text import MIMEText 3 | from email.mime.multipart import MIMEMultipart 4 | 5 | 6 | class EmailNotificationSettings: 7 | PHASE_CHANGE_UPDATE = False 8 | ITERATION_CHANGE_UPDATE = True 9 | FINAL_PHASE_START_UPDATE = False 10 | FINAL_PHASE_FINISH_UPDATE = False 11 | PROJECT_START_UPDATE = False 12 | PROJECT_FINISH_UPDATE = False 13 | 14 | EMAIL_NOTIFICATIONS = False 15 | 16 | @staticmethod 17 | def ChangeSettings(phase_change, 18 | itr_change, 19 | final_phase_start, 20 | final_phase_end, 21 | project_start, 22 | project_finish, 23 | email_notifications): 24 | 25 | EmailNotificationSettings.PHASE_CHANGE_UPDATE = phase_change and email_notifications 26 | EmailNotificationSettings.ITERATION_CHANGE_UPDATE = itr_change and email_notifications 27 | EmailNotificationSettings.FINAL_PHASE_START_UPDATE = final_phase_start and email_notifications 28 | EmailNotificationSettings.FINAL_PHASE_FINISH_UPDATE = final_phase_end and email_notifications 29 | EmailNotificationSettings.PROJECT_START_UPDATE = project_start and email_notifications 30 | EmailNotificationSettings.PROJECT_FINISH_UPDATE = project_finish and email_notifications 31 | EmailNotificationSettings.EMAIL_NOTIFICATIONS = email_notifications and email_notifications 32 | 33 | @staticmethod 34 | def toString(): 35 | return f"PHASE_CHANGE_UPDATE: {EmailNotificationSettings.PHASE_CHANGE_UPDATE}, " \ 36 | f"ITERATION_CHANGE_UPDATE: {EmailNotificationSettings.ITERATION_CHANGE_UPDATE}, " \ 37 | f"FINAL_PHASE_START_UPDATE: {EmailNotificationSettings.FINAL_PHASE_START_UPDATE}, " \ 38 | f"FINAL_PHASE_FINISH_UPDATE: {EmailNotificationSettings.FINAL_PHASE_FINISH_UPDATE}, " \ 39 | f"PROJECT_START_UPDATE: {EmailNotificationSettings.PROJECT_START_UPDATE}, " \ 40 | f"PROJECT_FINISH_UPDATE: {EmailNotificationSettings.PROJECT_FINISH_UPDATE}, " \ 41 | f"EMAIL_NOTIFICATIONS: {EmailNotificationSettings.EMAIL_NOTIFICATIONS}" 42 | 43 | 44 | class EmailBot: 45 | def __init__(self, address, password): 46 | self.s = smtplib.SMTP(host='smtp.gmail.com', port=587) 47 | self.s.starttls() 48 | self.s.login(address, password) 49 | 50 | self.address = address 51 | self.password = password 52 | 53 | def send_message(self, recipient, subject, message): 54 | msg = MIMEMultipart() # create a message 55 | 56 | # setup the parameters of the message 57 | msg['From'] = self.address 58 | msg['To'] = recipient 59 | msg['Subject'] = subject 60 | 61 | # add in the message body 62 | msg.attach(MIMEText(message, 'plain')) 63 | 64 | # send the message via the server set up earlier. 65 | self.s.send_message(msg) 66 | 67 | def send_iteration_change_update(self, user, to, project_name, new_iteration): 68 | prev_itr = new_iteration - 1 69 | message = f"Hello {user},\n\nyour Deep Docking project {project_name} has completed iteration {prev_itr} " \ 70 | f"and is now on iteration {new_iteration}. For further details, visit 'web-address'. \n" \ 71 | f"If you would like to stop receiving updates, log into 'web-address' and turn off email notifications." \ 72 | f"\n\nCheers,\nDeepDockingBot" 73 | 74 | self.send_message(to, "Iteration Complete", message) 75 | 76 | def send_project_started_update(self, user, to, project_name): 77 | message = f"Hello {user},\n\nyour Deep Docking project {project_name} has begun. " \ 78 | f"For further details, visit 'web-address'. \n" \ 79 | f"If you would like to stop receiving updates, " \ 80 | f"log into 'web-address' and turn off email notifications." \ 81 | f"\n\nCheers,\nDeepDockingBot" 82 | 83 | self.send_message(to, "Project Started", message) 84 | 85 | def send_queue_position_update(self, user, to, project_name, queue_position): 86 | pass 87 | 88 | def send_project_finished_update(self, user, to, project_name): 89 | pass 90 | 91 | @staticmethod 92 | def get_user_pw(): 93 | return open("/Users/martingleave/Documents/DeepDocking/DeepDockingGUI/GUI/src/backend/email.txt").read().split(" ") 94 | 95 | @staticmethod 96 | def get_user_email(): 97 | import json 98 | 99 | # load up the data we have from installation 100 | with open('src/backend/db.json') as user_db: 101 | db = user_db.read() 102 | database = json.loads(db) 103 | return database["email"] 104 | -------------------------------------------------------------------------------- /GUI/src/backend/EventHandler.py: -------------------------------------------------------------------------------- 1 | """ 2 | The event handler for the backend. 3 | This class handles all callbacks from withing the backend loop. 4 | """ 5 | 6 | from .EmailBot import * 7 | 8 | 9 | class EventHandler: 10 | @staticmethod 11 | def OnBackendStart(backend): 12 | print("Event Handled Backend Start") 13 | 14 | @staticmethod 15 | def OnProjectRunPhase(backend): 16 | print("Event Handled Run Phase") 17 | if EmailNotificationSettings.PROJECT_START_UPDATE: 18 | address, pw = EmailBot.get_user_pw() 19 | bot = EmailBot(address=address, password=pw) 20 | user_email = EmailBot.get_user_email() 21 | 22 | # Return if we have no email entered 23 | if user_email == "NA": 24 | return 25 | 26 | @staticmethod 27 | def OnPhaseChange(backend): 28 | print("Event Handled Phase Change") 29 | if EmailNotificationSettings.PHASE_CHANGE_UPDATE: 30 | address, pw = EmailBot.get_user_pw() 31 | bot = EmailBot(address=address, password=pw) 32 | user_email = EmailBot.get_user_email() 33 | 34 | # Return if we have no email entered 35 | if user_email == "NA": 36 | return 37 | 38 | @staticmethod 39 | def OnIterationChange(backend): 40 | print("Event Handled Iteration Change") 41 | try: 42 | if EmailNotificationSettings.ITERATION_CHANGE_UPDATE: 43 | address, pw = EmailBot.get_user_pw() 44 | bot = EmailBot(address=address, password=pw) 45 | user_email = EmailBot.get_user_email() 46 | 47 | # Return if we have no email entered 48 | if user_email == "NA" and backend.loaded_project_information['specifications']['iteration'] > 1: 49 | return 50 | 51 | bot.send_iteration_change_update(backend.user_data["username"], 52 | user_email, 53 | backend.loaded_project_name, 54 | backend.loaded_project_information['specifications']['iteration']) 55 | except FileNotFoundError: 56 | print("Email notifications not implemented yet...") 57 | 58 | @staticmethod 59 | def OnFinalPhaseStart(backend): 60 | print("Event Handled Final Phase Start") 61 | if EmailNotificationSettings.FINAL_PHASE_START_UPDATE: 62 | pass 63 | 64 | @staticmethod 65 | def OnFinalPhaseEnd(backend): 66 | print("Event Handled Final Phase End") 67 | if EmailNotificationSettings.FINAL_PHASE_FINISH_UPDATE: 68 | pass 69 | 70 | @staticmethod 71 | def OnProjectFinished(backend): 72 | print("Event Handled Project Finished") 73 | if EmailNotificationSettings.PROJECT_FINISH_UPDATE: 74 | pass 75 | 76 | @staticmethod 77 | def OnDataReadError(backend): 78 | print("Event Handled Data Read Error") 79 | 80 | @staticmethod 81 | def OnErrorDetected(backend): 82 | print("Event Handled Error Detected") 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /GUI/src/backend/__init__.py: -------------------------------------------------------------------------------- 1 | from .auto_ssh import SSH 2 | from .cluster_commands import * 3 | from .DataHistory import DataHistory 4 | from .backend import Backend 5 | -------------------------------------------------------------------------------- /GUI/src/backend/auto_ssh.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import paramiko 3 | import json 4 | 5 | 6 | class SSH: 7 | 8 | """ This class will automatically ssh into the host cluster. """ 9 | 10 | def __init__(self, host=None): 11 | if host is None: 12 | try: 13 | json_str = open('src/backend/db.json').read() # TODO: Sibling files not recognizing each other when called from another file path. 14 | db_dict = json.loads(json_str) 15 | host = db_dict['ip'] 16 | except FileNotFoundError as e: 17 | print(e.__traceback__, "'db.json' not found! Please run the installation first before running GUI.") 18 | raise e 19 | 20 | # The information that will allow for ssh 21 | self.host = host 22 | self.user = "" 23 | self.pwrd = "" 24 | self.ssh = None 25 | 26 | def command(self, command): 27 | # Check if there is a connection 28 | assert self.ssh is not None, "Connect before using a command" 29 | 30 | # Send the command 31 | stdin, stdout, stderr = self.ssh.exec_command(command) 32 | 33 | return stdout 34 | 35 | def connect(self, username, password): 36 | # Set the credentials 37 | self.user = username 38 | self.pwrd = password 39 | 40 | # Connect to ssh and set out ssh object 41 | ssh = paramiko.SSHClient() 42 | ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) 43 | ssh.connect(self.host, username=self.user, password=self.pwrd) 44 | self.ssh = ssh 45 | 46 | def download(self, remote_path, local_path): 47 | ftp_client = self.ssh.open_sftp() 48 | ftp_client.get(remote_path, local_path) 49 | ftp_client.close() 50 | 51 | def read(self, remote_path): 52 | return self.ssh.open_sftp().file(remote_path) 53 | 54 | def get_image(self, remote_path, transparent=False): 55 | im = Image.open(self.read(remote_path)) 56 | if transparent: 57 | im = im.convert("RGBA") 58 | datas = im.getdata() 59 | new_data = [] 60 | for item in datas: 61 | if item[0] == 255 and item[1] == 255 and item[2] == 255: 62 | new_data.append((255, 255, 255, 0)) 63 | else: 64 | new_data.append(item) 65 | 66 | im.putdata(new_data) 67 | return im 68 | 69 | def __repr__(self): 70 | message = self.user + "\n" 71 | message += self.host + "\n" -------------------------------------------------------------------------------- /GUI/src/backend/backend_exceptions.py: -------------------------------------------------------------------------------- 1 | class NullProjectException(Exception): 2 | def __init__(self): 3 | pass 4 | 5 | def __str__(self): 6 | message = "No project is loaded into the backend. Load a project before starting the backend." 7 | return message 8 | 9 | 10 | class NullDBError(Exception): 11 | def __init__(self): 12 | pass 13 | 14 | def __str__(self): 15 | message = "Cannot locate stored user data. Please reinstall DeepDocking." 16 | return message 17 | -------------------------------------------------------------------------------- /GUI/src/backend/backend_sanity_check.py: -------------------------------------------------------------------------------- 1 | from auto_ssh import SSH 2 | from backend import Backend 3 | import time 4 | import json 5 | 6 | 7 | def check_backend(): 8 | json_str = open('src/backend/db.json').read() # TODO: Sibling files not recognizing each other when called from another file path. 9 | db_dict = json.loads(json_str) 10 | ip = db_dict['ip'] 11 | 12 | user = input("cluster username: ") 13 | password = input("cluster password: ") 14 | ssh_connection = SSH(host=ip) 15 | ssh_connection.connect(username=user, password=password) 16 | backend = Backend(ssh=ssh_connection) 17 | return backend 18 | 19 | 20 | def check_load_project(): 21 | b = check_backend() 22 | b.load_project(input("Project Name: ")) 23 | return b 24 | 25 | 26 | def check_run_phase_5(): 27 | b = check_backend() 28 | b.load_project(path_to_project=input("Path to project: ")) 29 | b.run_phase(5, debug=True) 30 | 31 | 32 | def check_run_phase_4(): 33 | b = check_backend() 34 | b.load_project(path_to_project=input("Path to project: ")) 35 | b.run_phase(4, debug=True) 36 | 37 | 38 | def check_venv(): 39 | b = check_backend() 40 | b.send_command("python venv_sanity_check.py", debug=True) 41 | b.send_command("conda list >> test_check.txt", debug=True) 42 | 43 | 44 | def check_backend_functionality(): 45 | b = check_backend() 46 | p = input("Project Name: ") 47 | b.load_project(project_name=p) 48 | b.start() 49 | 50 | while b.status() == "fetching": 51 | pass 52 | 53 | hist = b.pull() 54 | print(hist.current_phase_eta) 55 | 56 | 57 | def check_run_phase(): 58 | b = check_backend() 59 | b.load_project(path_to_project=input("Path to project: ")) 60 | b.start() 61 | while b.status() == "fetching": 62 | pass 63 | 64 | while True: 65 | phase = int(input("Which phase to run? ")) 66 | if input("Debug? y or n ") == "n": 67 | b.run_phase(phase, False) 68 | 69 | 70 | def check_model_image(): 71 | b = check_load_project() 72 | b.get_model_image(1, 3) 73 | 74 | 75 | def check_final_phase(): 76 | b = check_backend() 77 | project_name = input("Project Name: ") 78 | b.load_project(project_name) 79 | b.start() 80 | while b.status() == "fetching": 81 | pass 82 | b.run_phase(phase=-1, debug=True) 83 | 84 | 85 | def check_read_final(): 86 | b = check_backend() 87 | project_name = input("Project Name: ") 88 | b.load_project(project_name) 89 | b.start() 90 | while b.status() == "fetching": 91 | pass 92 | 93 | print(b.get_final_phase_results()) 94 | 95 | 96 | def check_except(): 97 | b = check_backend() 98 | project_name = input("Project Name: ") 99 | b.load_project(project_name) 100 | b.start() 101 | while b.status() == "fetching": 102 | pass 103 | 104 | print(b.core.model_data.keys()) 105 | print(b.core.model_data["iteration_1"]['itr']['crash_report']) 106 | 107 | 108 | def check_update_specs(): 109 | b = check_backend() 110 | project_name = input("Project Name: ") 111 | b.load_project(project_name) 112 | specs = {"num_cpu": 24} 113 | b.update_specifications(specs) 114 | 115 | 116 | def check_itr_percent(): 117 | b = check_backend() 118 | project_name = input("Project Name: ") 119 | b.load_project(project_name) 120 | print(b.pull()) 121 | -------------------------------------------------------------------------------- /GUI/src/index.js: -------------------------------------------------------------------------------- 1 | // Place imports here: 2 | import {$, jQuery} from 'jquery'; //TODO: figure out why webpack doesnt recognize this 3 | import Chart from 'chart.js'; 4 | 5 | // var Kekule = require('kekule').Kekule; -------------------------------------------------------------------------------- /GUI/src/login.js: -------------------------------------------------------------------------------- 1 | function toggleLoadingScreen(turn_on){ 2 | if (turn_on) { 3 | // console.log("Displaying loading screen..."); 4 | document.getElementById("loading").style.visibility = "visible"; 5 | 6 | }else{ 7 | // console.log("Closing loading screen."); 8 | document.getElementById("loading").style.visibility = "hidden"; 9 | } 10 | } 11 | 12 | // gets the username and password and attempts to setup the ssh connection 13 | function setupConnection(){ 14 | toggleLoadingScreen(true); 15 | 16 | // Setting up the connection 17 | let user = document.getElementById("username").value; 18 | let pwd = document.getElementById("pwd").value; 19 | console.log("setting up connection..."); 20 | // console.log("\t"+user+"\n\t"+ pwd); 21 | 22 | // async post request: 23 | $.ajax({ 24 | type: "POST", 25 | url: "/sshConnect", 26 | dataType: 'json', 27 | contentType: 'application/json', 28 | data: JSON.stringify({'user': user, 'pwd': pwd}), 29 | success: function(data, status, settings){ 30 | toggleLoadingScreen(false); 31 | console.log("successful login..."); 32 | console.log(data, status, settings) 33 | }, 34 | error: function(res, opt, err){ // Handling errors (2 possible -> no VPN or Invalid creds) 35 | console.log(res, opt, err); 36 | if (res.status === 401){ 37 | // Creating the error display box if it doesnt exist 38 | if (res.responseText === "creds"){ 39 | let errorElm = document.getElementById("errorText") 40 | if (! errorElm){ // if it doesnt exist we must first create it 41 | errorElm = document.createElement("p") 42 | errorElm.id = "errorText" 43 | 44 | let formElm = document.getElementById("cred-form"); 45 | formElm.insertBefore(errorElm, formElm.firstChild); 46 | } 47 | errorElm.textContent = "Incorrect credentials!" 48 | 49 | } else if (res.responseText === "vpn"){ 50 | let errorElm = document.getElementById("errorText") 51 | if (! errorElm){ // if it doesn't exist we must first create it 52 | errorElm = document.createElement("p") 53 | errorElm.id = "errorText" 54 | 55 | let formElm = document.getElementById("cred-form"); 56 | formElm.insertBefore(errorElm, formElm.firstChild); 57 | } 58 | errorElm.textContent = "Is your VPN on?" 59 | } 60 | } else{ 61 | alert("Something wrong...") 62 | } 63 | toggleLoadingScreen(false); 64 | } 65 | }).done(function(response) { 66 | console.log(response); 67 | window.location = "/main" // redirects user to the main page 68 | }); 69 | 70 | } 71 | 72 | document.getElementById('login-btn').addEventListener('click', setupConnection); 73 | 74 | document.querySelectorAll('input').forEach( el => { 75 | el.addEventListener('keydown', e => { 76 | console.log(e.key); 77 | if(e.key === 'Enter') { 78 | let nextEl = el.nextElementSibling; 79 | if(nextEl.nodeName === 'INPUT') { 80 | nextEl.focus(); 81 | }else if (nextEl.nodeName === 'BUTTON') { 82 | nextEl.focus(); 83 | } else { 84 | alert("done"); 85 | } 86 | } 87 | }) 88 | }); -------------------------------------------------------------------------------- /GUI/src/mainPagejs/basics.js: -------------------------------------------------------------------------------- 1 | // Basic functions required by the entire html page 2 | function togglePopup(elemID,turn_on){ 3 | if (turn_on) { 4 | document.getElementById(elemID).style.visibility = "visible"; 5 | }else{ 6 | document.getElementById(elemID).style.visibility = "hidden"; 7 | } 8 | } 9 | 10 | function toggleLoadingScreen(turn_on){ 11 | togglePopup('loading', turn_on); 12 | } 13 | 14 | function switchTab(evt, tabname, activetab) { 15 | // Declare all variables 16 | var i, tabcontent, tablinks; 17 | 18 | // Get all elements with class="tabcontent" and hide them 19 | tabcontent = document.getElementsByClassName("tabcontent"); 20 | for (i = 0; i < tabcontent.length; i++) { 21 | tabcontent[i].style.display = "none"; 22 | } 23 | 24 | // Get all elements with class="tablinks" and remove the class "active" 25 | if (!activetab){ // Won't change the tab activation 26 | tablinks = document.getElementsByClassName("tablinks"); 27 | for (i = 0; i < tablinks.length; i++) { 28 | tablinks[i].className = tablinks[i].className.replace(" active", ""); 29 | } 30 | } 31 | 32 | // Show the current tab, and add an "active" class to the button that opened the tab 33 | document.getElementById(tabname).style.display = "block"; 34 | document.getElementById(tabname+'Btn').className += " active"; 35 | } 36 | 37 | function flash(elmID, filters){ 38 | var elm = document.getElementById(elmID); 39 | filter1 = filters? filters[0]: 'brightness(100%)'; 40 | filter2 = filters? filters[1]: 'brightness(50%)'; 41 | 42 | if (elm.style.filter === filter1){ 43 | elm.style.filter = filter2; 44 | 45 | } else { 46 | elm.style.filter = filter1; 47 | } 48 | }; 49 | 50 | function destroyChart(id){ 51 | Chart.helpers.each(Chart.instances, function(instance){ 52 | if (instance.chart.canvas.id === id){ 53 | instance.destroy(); 54 | } 55 | }); 56 | }; 57 | 58 | // Pan and zoom functionality for images: 59 | var img_ele = null, 60 | x_img_start = 0, 61 | y_img_start = 0, 62 | starting_L = 0, 63 | starting_T = 0; 64 | 65 | function zoom(zoomincrement, img_id) { 66 | img_ele = document.getElementById(img_id); 67 | var pre_width = img_ele.getBoundingClientRect().width, pre_height = img_ele.getBoundingClientRect().height; 68 | img_ele.style.width = (pre_width * zoomincrement) + 'px'; 69 | img_ele.style.height = (pre_height * zoomincrement) + 'px'; 70 | img_ele = null; 71 | } 72 | 73 | function start_drag(e) { 74 | img_ele = this; 75 | 76 | starting_L = parseInt(img_ele.style.left.split('px')[0]); 77 | starting_T = parseInt(img_ele.style.top.split('px')[0]); 78 | starting_L = (starting_L) ? starting_L : 0; // if the value is auto the conditional will be false (NaN) 79 | starting_T = (starting_T) ? starting_T : 0; // sets it to zero if auto. 80 | 81 | x_img_start = e.clientX; 82 | y_img_start = e.clientY; 83 | } 84 | 85 | function while_drag(e) { 86 | e.preventDefault(); 87 | var delta_x = e.clientX - x_img_start; 88 | var delta_y = e.clientY - y_img_start; 89 | 90 | if (img_ele !== null) { 91 | // calculating amount to move image by 92 | img_ele.style.left = delta_x + starting_L + 'px'; 93 | img_ele.style.top = delta_y + starting_T + 'px'; 94 | } 95 | } 96 | 97 | function stop_drag() { 98 | img_ele = null; 99 | } 100 | 101 | 102 | function resetPanandZoomVals(){ 103 | img_ele = null, 104 | x_img_ele = 0, 105 | y_img_ele = 0; 106 | } 107 | 108 | function addPanAndZoom(img_id){ 109 | var element = document.getElementById(img_id); 110 | 111 | // Adds pan and zoom functionality to the element 112 | element.addEventListener("wheel", function(e){ 113 | e.preventDefault(); 114 | zoom(1 - (e.deltaY/300)* 0.1, img_id); 115 | }); 116 | 117 | element.addEventListener('mousedown', start_drag); 118 | element.addEventListener('mousemove', while_drag); 119 | element.parentElement.addEventListener('mouseup', stop_drag); 120 | } 121 | 122 | function resetImagePos(img_id){ 123 | var element = document.getElementById(img_id); 124 | element.style.left = 'auto'; 125 | element.style.top = 'auto'; 126 | 127 | element.style.width = 'auto'; 128 | element.style.height = 'auto'; 129 | } 130 | 131 | function deleteProject(name){ 132 | var name = (name) ? name : document.querySelector('#curr_project_name').textContent.split(':')[1].trim(); 133 | // console.log('deleting project: ', name); 134 | var args = 'project_name=' + name; 135 | toggleLoadingScreen(true); 136 | 137 | $.ajax({ 138 | type: "POST", 139 | url: `/deleteProject?${args}`, 140 | dataType: 'json', 141 | success: function (data, status, settings) { 142 | console.log('project deleted', data); 143 | }, 144 | error: function (res, opt, err) { 145 | alert('Error\n' + res.status + ': ' + err); 146 | } 147 | }).done(function (response) { 148 | toggleLoadingScreen(false); 149 | }); 150 | } 151 | 152 | var UPDATE_RATE = null; 153 | var UPDATE_CALLBACKS = {}; // Saves the callbacks for all the tabs opened 154 | var UPDATE_ID; // the ID for the async update loop 155 | 156 | function clientUpdateLoop(){ 157 | // This function is a loop that runs in the background that retrives updates from the server as it comes in 158 | // and displays that data to the client depending on which tab they are on. 159 | 160 | // Checking which tab is active: 161 | var active = document.querySelector("body > div.tabs.disable-select > Button.active").id; 162 | var activeTab = active.substring(0, active.length-3); 163 | 164 | // console.log("active tab:", activeTab); 165 | // Not running for top Scoring tab because that would just be annoying when viewing molec: 166 | // Also not really needed for the start a run page... 167 | if (activeTab !== "topScoring" && activeTab !== "startR"){ 168 | // Running the appropriate callback 169 | var callbackfn = UPDATE_CALLBACKS[activeTab]; 170 | if (callbackfn) callbackfn(); 171 | } 172 | } 173 | 174 | function resetUpdateLoop(){ 175 | // Used for when we already have the update rate and 176 | // want to restart the loop to prevent "double loading" of a tab. 177 | if (UPDATE_ID){ // Clearing any previous update loop 178 | clearInterval(UPDATE_ID); 179 | UPDATE_ID = null; 180 | } 181 | UPDATE_ID = setInterval(clientUpdateLoop, UPDATE_RATE); 182 | // console.log("update loop reset!"); 183 | } 184 | 185 | function startUpdateLoop(){ 186 | $.ajax({ 187 | type: "GET", 188 | url: "/getBasics", 189 | dataType: 'json', 190 | success: function (data, status, settings) { 191 | UPDATE_RATE = data.update_rate_ms; 192 | // console.log("update rate (ms):", UPDATE_RATE); 193 | if (UPDATE_RATE){ // IF NOT UNDEF 194 | UPDATE_ID = setInterval(clientUpdateLoop, UPDATE_RATE); 195 | } 196 | }, 197 | error: function (res, opt, err) { 198 | alert("Error!") 199 | console.log(res, opt, err); 200 | } 201 | }); 202 | } 203 | 204 | function startup(){ 205 | startUpdateLoop(); 206 | // Changing the background color: 207 | document.getElementsByTagName("html")[0].style.background = "#5A6E59"; 208 | } 209 | 210 | startup(); -------------------------------------------------------------------------------- /GUI/src/mainPagejs/topScoring.js: -------------------------------------------------------------------------------- 1 | var selected_smile = null; //keeps track of which smile is selected to highlight it. 2 | 3 | // Displays the selected smile 4 | function displaySelectedSmile(e){ 5 | if (selected_smile) selected_smile.className = ''; // clearing the last element 6 | 7 | selected_smile = e.path[0]; 8 | selected_smile.className = 'disabled'; 9 | 10 | const new_smile = selected_smile.innerText; 11 | 12 | // requesting image: 13 | displayScaffold(new_smile); 14 | } 15 | 16 | // Displays the scaffold from request (if none is provided than we assume mode scaffold) 17 | function displayScaffold(smile) { 18 | toggleLoadingScreen(true); 19 | 20 | var new_text = "Most Common Murcko Scaffold"; 21 | 22 | if (smile) new_text = smile; 23 | else if (selected_smile) selected_smile.className = ''; // Clearing the previous selected smile 24 | 25 | // Changing the title to match 26 | document.querySelector('#murckov-scaffold > div > h2').innerHTML = new_text; 27 | 28 | $.ajax({ 29 | type: "POST", 30 | url: "/topScoring", 31 | dataType: 'text', 32 | contentType: 'application/json', 33 | data: JSON.stringify({"smile": String(smile), "image":"true"}), 34 | beforeSend: function (xhr) { 35 | xhr.overrideMimeType('text/plain; charset=x-user-defined'); 36 | }, 37 | success: function (data, status, settings) { 38 | if(data.length < 1){ 39 | alert("The image doesnt exist"); 40 | $("#scaffoldImage").attr("src", "data:image/png;base64,"); 41 | return 42 | } 43 | var binary = ""; 44 | var responseText = data; 45 | var responseTextLen = responseText.length; 46 | 47 | for ( i = 0; i < responseTextLen; i++ ) { 48 | binary += String.fromCharCode(responseText.charCodeAt(i) & 255) 49 | } 50 | $("#scaffoldImage").attr("src", "data:image/jpeg;base64,"+btoa(binary)); 51 | }, 52 | error: function (res, opt, err) { 53 | alert("Error in retriving Murcko Scaffold") 54 | console.log(res,opt,err); 55 | } 56 | }).done(function (response) { 57 | resetImagePos('scaffoldImage'); 58 | toggleLoadingScreen(false); 59 | }); 60 | } 61 | 62 | // adds SMILES to the list 63 | function fillTopScoringList(compounds){ 64 | var list = document.querySelector('#top-scoring-list > ul'); 65 | 66 | // clearing data first: 67 | while (list.firstChild) 68 | list.removeChild(list.lastChild); 69 | 70 | // Adding each of them to the list: 71 | for (var comp in compounds){ 72 | var li = document.createElement('li'); 73 | var a = document.createElement('a'); 74 | a.textContent = compounds[comp]; 75 | 76 | // Connecting them to an appropriate callback: 77 | a.onclick = displaySelectedSmile; 78 | 79 | li.appendChild(a); 80 | list.appendChild(li); 81 | } 82 | }; 83 | 84 | // Downloads a text file containing some specific text 85 | function download(filename, text) { 86 | var element = document.createElement('a'); 87 | element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text)); 88 | element.setAttribute('download', filename); 89 | 90 | element.style.display = 'none'; 91 | document.body.appendChild(element); 92 | 93 | element.click(); 94 | 95 | document.body.removeChild(element); 96 | } 97 | 98 | // Download button -> downloads list of all molecules 99 | document.querySelector("#download-list > img").onclick = function () { 100 | download('top-scoring-smiles.txt', 101 | document.querySelector("#top-scoring-list > ul").innerText); 102 | }; 103 | 104 | // Reload Button -> refreshes the image to be the most common scaffold 105 | document.querySelector('#reload-Murcko > img').onclick = function () { 106 | displayScaffold(); 107 | } 108 | 109 | function bootTopScoringTab(){ 110 | // request to get list of all molecules: 111 | $.ajax({ 112 | type: "POST", 113 | url: "/topScoring", 114 | contentType: 'application/json', 115 | data: JSON.stringify({"smile":"undefined", "image":"false"}), 116 | success: function (data, status, settings) { 117 | fillTopScoringList(data.top_hits); 118 | }, 119 | error: function (res, opt, err) { 120 | alert("Error: top scoring tab failure"); 121 | console.log(res, opt, err); 122 | } 123 | }).done(function (response) { 124 | // request to get the Most common Murcko scaffold 125 | displayScaffold(); 126 | }); 127 | } 128 | 129 | // Tab button 130 | document.getElementById("topScoringBtn").onclick = function() { 131 | addPanAndZoom('scaffoldImage'); 132 | toggleLoadingScreen(true); 133 | bootTopScoringTab(); 134 | switchTab(event, 'topScoring'); 135 | UPDATE_CALLBACKS["topScoring"] = bootTopScoringTab; // not used but left here for future possible use (replace with another function) 136 | resetUpdateLoop(); 137 | }; 138 | -------------------------------------------------------------------------------- /GUI/src/test.js: -------------------------------------------------------------------------------- 1 | // Create a simple CO2 molecule 2 | var mol = new Kekule.Molecule(); 3 | var atomC = mol.appendAtom('C'); 4 | var atomO1 = mol.appendAtom('O'); 5 | var atomO2 = mol.appendAtom('O'); 6 | mol.appendBond([atomC, atomO1], 2); 7 | mol.appendBond([atomC, atomO2], 2); 8 | 9 | // Get formula 10 | var formula = mol.calcFormula(); 11 | console.log('Formula: ', formula.getText()); 12 | 13 | // Output SMILES (IO module should be loaded in web application) 14 | var smiles = Kekule.IO.saveFormatData(mol, 'smi'); 15 | console.log('SMILES: ', smiles); 16 | 17 | // Output MOL2k (IO module should be loaded in web application) 18 | var mol2k = Kekule.IO.saveFormatData(mol, 'mol'); 19 | console.log('MOL 2000: \n', mol2k); 20 | -------------------------------------------------------------------------------- /GUI/templates/login.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Deep Docking GUI 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |
16 |

Deep Docking GUI

17 |
18 | 19 | 20 | 21 |
22 |
23 | 27 | 28 | 29 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /GUI/templates/test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 |
12 | 13 |
14 | 15 | 92 | -------------------------------------------------------------------------------- /GUI/webpack.config.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const webpack = require('webpack'); // eslint-disable-line no-unused-vars 4 | 5 | module.exports = { 6 | mode: 'development', //TODO: change to production when done 7 | entry: './src/index.js', 8 | output: { 9 | path: __dirname, 10 | filename: './public/js/bundle.js', 11 | }, 12 | context: __dirname, 13 | devtool: 'source-map', 14 | resolve: { 15 | extensions: ['.js', '.jsx', '.css'], 16 | alias: { 17 | jquery: "../node_modules/jquery/src/jquery" 18 | }, 19 | modules: ['node_modules'] 20 | }, 21 | module: { 22 | rules: [ 23 | { 24 | test: /\.js$/, 25 | exclude: /(node_modules)/, 26 | use: { 27 | loader: 'babel-loader', 28 | options: { 29 | presets: ['@babel/preset-env'] 30 | } 31 | } 32 | } 33 | ,{ 34 | test: /\.css$/i, 35 | use: ['style-loader', 'css-loader'] 36 | },{ 37 | // Now we apply rule for images 38 | test: /\.(png|jpe?g|gif|svg)$/, 39 | use: [ 40 | { 41 | // Using file-loader for these files 42 | loader: "file-loader", 43 | 44 | // In options we can set different things like format 45 | // and directory to save 46 | options: { 47 | outputPath: './public/webp-img' 48 | } 49 | } 50 | ] 51 | },{ 52 | // Apply rule for fonts files 53 | test: /\.(woff|woff2|ttf|otf|eot)$/, 54 | use: [ 55 | { 56 | // Using file-loader too 57 | loader: "file-loader", 58 | options: { 59 | outputPath: './public/fonts' 60 | } 61 | } 62 | ] 63 | },{ 64 | test: /\.(png|gif|cur|jpg)$/, 65 | loader: 'url-loader', 66 | query: { limit: 8192 } 67 | } 68 | ] 69 | } 70 | }; -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 James Gleave 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /installation/DeepDockingLocal.yml: -------------------------------------------------------------------------------- 1 | name: DeepDockingLocal 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - python=3.6 8 | - paramiko 9 | - ipython 10 | - rdkit 11 | - flask 12 | - flask-cors 13 | - python-dotenv 14 | -------------------------------------------------------------------------------- /installation/fix_sh.sh: -------------------------------------------------------------------------------- 1 | # from https://stackoverflow.com/questions/800030/remove-carriage-return-in-unix 2 | grep -r --color=never --include="*.sh" $'\r' # looks for \r in file 3 | sed -i 's/\r$//g' # fixes the file -------------------------------------------------------------------------------- /installation/install-linux.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Create the local env 4 | python3 install.py --phase install_local 5 | # Activate the env 6 | conda activate DeepDockingLocal 2> conda.out 7 | # Install remote files and create remote env 8 | python3 install.py --phase install_remote 9 | -------------------------------------------------------------------------------- /installation/install-windows.bat: -------------------------------------------------------------------------------- 1 | python install.py --phase install_local 2 | call conda activate DeepDockingLocal > conda.out 3 | python install.py --phase install_remote -------------------------------------------------------------------------------- /installation/welcome_message.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | ########################################################################### 6 | 7 | oooooooooo. 8 | `888' `Y8b 9 | 888 888 .ooooo. .ooooo. oo.ooooo. 10 | 888 888 d88' `88b d88' `88b 888' `88b 11 | 888 888 888ooo888 888ooo888 888 888 12 | 888 d88' 888 .o 888 .o 888 888 13 | o888bood8P' `Y8bod8P' `Y8bod8P' 888bod8P' 14 | 888 15 | oooooooooo. ooooo o8o 16 | `888' `Y8b `888 `"' 17 | 888 888 .ooooo. .ooooo. 888 oooo oooo ooo. .oo. .oooooooo 18 | 888 888 d88' `88b d88' `"Y8 888 .8P' `888 `888P"Y88b 888' `88b 19 | 888 888 888 888 888 888888. 888 888 888 888 888 20 | 888 d88' 888 888 888 .o8 888 `88b. 888 888 888 `88bod8P' 21 | o888bood8P' `Y8bod8P' `Y8bod8P' o888o o888o o888o o888o o888o `8oooooo. 22 | d" YD 23 | "Y88888P' 24 | 25 | ########################################################################### 26 | 27 | -------------------------------------------------------------------------------- /preparation_scripts/README.md: -------------------------------------------------------------------------------- 1 | Scripts to prepare chemical libraries and receptor structures. 2 | -------------------------------------------------------------------------------- /preparation_scripts/compute_morgan_fp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --partition=normal 3 | #SBATCH --ntasks=1 4 | #SBATCH --nodes=1 5 | #SBATCH --job-name=calculate_morgan_fing 6 | 7 | source ~/.bashrc 8 | conda activate $4 9 | 10 | start=`date +%s` 11 | 12 | python -u morgan_fp.py -sfp $1 -fn $2 -tp $3 13 | 14 | end=`date +%s` 15 | runtime=$((end-start)) 16 | echo $runtime 17 | -------------------------------------------------------------------------------- /preparation_scripts/morgan_fp.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import time 3 | import numpy as np 4 | import pickle 5 | from contextlib import closing 6 | from multiprocessing import Pool 7 | import multiprocessing 8 | from rdkit.Chem import AllChem 9 | from rdkit import DataStructs 10 | from rdkit import Chem 11 | from functools import partial 12 | import argparse 13 | import os 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('-sfp','--smile_folder_path',help='name of the folder with prepared smiles',required=True) 17 | parser.add_argument('-fn','--folder_name',help='name of morgan fingerprint folder',required=True) 18 | parser.add_argument('-tp','--tot_process',help='number of cores',required=True) 19 | 20 | io_args = parser.parse_args() 21 | sfp = io_args.smile_folder_path 22 | fn = io_args.folder_name 23 | t_pos = int(io_args.tot_process) 24 | 25 | def morgan_fingp(fname): 26 | nbits=1024 27 | radius=2 28 | fsplit = fname.split('/')[-1] 29 | ref2 = open(fn+'/'+fsplit,'a') 30 | with open(fname,'r') as ref: 31 | for line in ref: 32 | smile,zin_id = line.rstrip().split() 33 | arg = np.zeros((1,)) 34 | try: 35 | DataStructs.ConvertToNumpyArray(AllChem.GetMorganFingerprintAsBitVect(Chem.MolFromSmiles(smile),radius,nBits=nbits,useChirality=True),arg) 36 | 37 | ref2.write((',').join([zin_id]+[str(elem) for elem in np.where(arg==1)[0]])) 38 | ref2.write('\n') 39 | except: 40 | print(line) 41 | pass 42 | 43 | files = [] 44 | for f in glob.glob(sfp+'/*.txt'): 45 | files.append(f) 46 | 47 | try: 48 | os.mkdir(fn) 49 | except: 50 | pass 51 | 52 | t_f = len(files) 53 | t = time.time() 54 | with closing(Pool(np.min([multiprocessing.cpu_count(),t_pos]))) as pool: 55 | pool.map(morgan_fingp,files) 56 | print(time.time()-t) 57 | -------------------------------------------------------------------------------- /preparation_scripts/prepare_receptor.sh: -------------------------------------------------------------------------------- 1 | receptor_f=$1 2 | grid_points=$2 3 | grid_center=$3 4 | 5 | if [ "$1" == "-h" ]; then 6 | echo " 7 | 8 | PREPARE AUTODOCK DOCKING MAPS 9 | 10 | Usage: (ba)sh `basename $0` receptor grid_points grid_center path_adt 11 | 12 | - receptor = receptor pdb file, prepared 13 | - grid_points = 'x,y,z' format, point size of docking box (real size n_points*0.375 A) 14 | - grid_center = 'x,y,z' format, coordinates of docking box center 15 | - path_adt = path to autodock tools folder with python scripts (prepare_receptor4.py, etc etc..) 16 | " 17 | exit 0 18 | fi 19 | 20 | if [ "$1" != "-h" ] && [ $# -lt 4 ]; then 21 | echo "Not all the arguments were supplied; type 'sh prepare_receptor.sh -h' for help" 22 | exit 0 23 | fi 24 | 25 | receptor=$(echo $receptor_f|cut -d'.' -f1) 26 | 27 | IFS=',' read -r -a dim<<<$grid_points 28 | x_points="${dim[0]}" 29 | y_points="${dim[1]}" 30 | z_points="${dim[2]}" 31 | 32 | 33 | IFS=',' read -r -a crd<<<$grid_center 34 | x_crd="${crd[0]}" 35 | y_crd="${crd[1]}" 36 | z_crd="${crd[2]}" 37 | 38 | python $4/prepare_receptor4.py -r $receptor_f -U nphs_lps_waters_nonstdres 39 | wait 40 | 41 | python $4/prepare_gpf4.py -r $receptor'.'pdbqt -o grid_1_$receptor'.'gpf -p ligand_types='P,SA,S,Cl,Ca,Mn,Fe,Zn,Br,I' -p npts=$grid_points -p gridcenter=$grid_center 42 | wait 43 | 44 | python $4/prepare_gpf4.py -r $receptor'.'pdbqt -o grid_2_$receptor'.'gpf -p ligand_types='H,HD,HS,C,A,N,NA,NS,OA,OS,F,Mg' -p npts=$grid_points -p gridcenter=$grid_center 45 | wait 46 | 47 | autogrid4 -p grid_1_$receptor'.'gpf -l grid_1_$receptor'.'log 48 | autogrid4 -p grid_2_$receptor'.'gpf -l grid_2_$receptor'.'log 49 | 50 | rm *fld 51 | 52 | echo "# AVS field file 53 | # 54 | # AutoDock Atomic Affinity and Electrostatic Grids 55 | # 56 | # Created by autogrid4. 57 | # 58 | #SPACING 0.375 59 | #NELEMENTS $x_points $y_points $z_points 60 | #CENTER $x_crd $y_crd $z_crd 61 | #MACROMOLECULE $receptor.pdbqt 62 | #GRID_PARAMETER_FILE grid_$receptor.gpf 63 | # 64 | ndim=3 # number of dimensions in the field 65 | dim1=$(($x_points+1)) # number of x-elements 66 | dim2=$(($y_points+1)) # number of y-elements 67 | dim3=$(($z_points+1)) # number of z-elements 68 | nspace=3 # number of physical coordinates per point 69 | veclen=24 # number of affinity values at each point 70 | data=float # data type (byte, integer, float, double) 71 | field=uniform # field type (uniform, rectilinear, irregular) 72 | coord 1 file=$receptor.maps.xyz filetype=ascii offset=0 73 | coord 2 file=$receptor.maps.xyz filetype=ascii offset=2 74 | coord 3 file=$receptor.maps.xyz filetype=ascii offset=4 75 | label=H-affinity # component label for variable 1 76 | label=HD-affinity # component label for variable 2 77 | label=HS-affinity # component label for variable 3 78 | label=C-affinity # component label for variable 4 79 | label=A-affinity # component label for variable 5 80 | label=N-affinity # component label for variable 6 81 | label=NA-affinity # component label for variable 7 82 | label=NS-affinity # component label for variable 8 83 | label=OA-affinity # component label for variable 9 84 | label=OS-affinity # component label for variable 10 85 | label=F-affinity # component label for variable 11 86 | label=Mg-affinity # component label for variable 12 87 | label=P-affinity # component label for variable 13 88 | label=SA-affinity # component label for variable 14 89 | label=S-affinity # component label for variable 15 90 | label=Cl-affinity # component label for variable 16 91 | label=Ca-affinity # component label for variable 17 92 | label=Mn-affinity # component label for variable 18 93 | label=Fe-affinity # component label for variable 19 94 | label=Zn-affinity # component label for variable 20 95 | label=Br-affinity # component label for variable 21 96 | label=I-affinity # component label for variable 22 97 | label=Electrostatics # component label for variable 22 98 | label=Desolvation # component label for variable 23 99 | # 100 | # location of affinity grid files and how to read them 101 | # 102 | variable 1 file=$receptor.H.map filetype=ascii skip=6 103 | variable 2 file=$receptor.HD.map filetype=ascii skip=6 104 | variable 3 file=$receptor.HS.map filetype=ascii skip=6 105 | variable 4 file=$receptor.C.map filetype=ascii skip=6 106 | variable 5 file=$receptor.A.map filetype=ascii skip=6 107 | variable 6 file=$receptor.N.map filetype=ascii skip=6 108 | variable 7 file=$receptor.NA.map filetype=ascii skip=6 109 | variable 8 file=$receptor.NS.map filetype=ascii skip=6 110 | variable 9 file=$receptor.OA.map filetype=ascii skip=6 111 | variable 10 file=$receptor.OS.map filetype=ascii skip=6 112 | variable 11 file=$receptor.F.map filetype=ascii skip=6 113 | variable 12 file=$receptor.Mg.map filetype=ascii skip=6 114 | variable 13 file=$receptor.P.map filetype=ascii skip=6 115 | variable 14 file=$receptor.SA.map filetype=ascii skip=6 116 | variable 15 file=$receptor.S.map filetype=ascii skip=6 117 | variable 16 file=$receptor.Cl.map filetype=ascii skip=6 118 | variable 17 file=$receptor.Ca.map filetype=ascii skip=6 119 | variable 18 file=$receptor.Mn.map filetype=ascii skip=6 120 | variable 19 file=$receptor.Fe.map filetype=ascii skip=6 121 | variable 20 file=$receptor.Zn.map filetype=ascii skip=6 122 | variable 21 file=$receptor.Br.map filetype=ascii skip=6 123 | variable 22 file=$receptor.I.map filetype=ascii skip=6 124 | variable 23 file=$receptor.e.map filetype=ascii skip=6 125 | variable 24 file=$receptor.d.map filetype=ascii skip=6">>$receptor.maps.fld 126 | 127 | rm -r ad_grids 128 | mkdir ad_grids 129 | mv *map* ad_grids/ 130 | -------------------------------------------------------------------------------- /util/ProgressBar.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | import re 4 | 5 | 6 | class ProgressBar(object): 7 | DEFAULT = 'Progress: %(bar)s %(percent)3d%%' 8 | FULL = '%(bar)s %(current)d/%(total)d (%(percent)3d%%) %(remaining)d to go' 9 | 10 | def __init__(self, total, width=40, fmt=DEFAULT, symbol='=', 11 | output=sys.stderr): 12 | assert len(symbol) == 1 13 | 14 | self.total = total 15 | self.width = width 16 | self.symbol = symbol 17 | self.output = output 18 | self.fmt = re.sub(r'(?P%\(.+?\))d', r'\g%dd' % len(str(total)), fmt) 19 | self.current = 0 20 | 21 | def __call__(self): 22 | percent = self.current / float(self.total) 23 | size = int(self.width * percent) 24 | remaining = self.total - self.current 25 | bar = '[' + self.symbol * size + ' ' * (self.width - size) + ']' 26 | 27 | args = { 28 | 'total': self.total, 29 | 'bar': bar, 30 | 'current': self.current, 31 | 'percent': percent * 100, 32 | 'remaining': remaining 33 | } 34 | print('\r' + self.fmt % args, file=self.output, end='') 35 | 36 | def done(self): 37 | self.current = self.total 38 | self() 39 | print('', file=self.output) 40 | -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- 1 | from .ProgressBar import ProgressBar 2 | -------------------------------------------------------------------------------- /util/__pycache__/ProgressBar.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/__pycache__/ProgressBar.cpython-36.pyc -------------------------------------------------------------------------------- /util/__pycache__/ProgressBar.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/__pycache__/ProgressBar.cpython-38.pyc -------------------------------------------------------------------------------- /util/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /util/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /util/figures/DDGUI-DD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/DDGUI-DD.png -------------------------------------------------------------------------------- /util/figures/Monitor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/Monitor.png -------------------------------------------------------------------------------- /util/figures/Progress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/Progress.png -------------------------------------------------------------------------------- /util/figures/login_screen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/login_screen.png -------------------------------------------------------------------------------- /util/figures/models_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/models_full.png -------------------------------------------------------------------------------- /util/figures/new_project_info.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/new_project_info.png -------------------------------------------------------------------------------- /util/figures/progress_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/progress_full.png -------------------------------------------------------------------------------- /util/figures/start_a_run_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/start_a_run_full.png -------------------------------------------------------------------------------- /util/figures/top_scoring_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/top_scoring_full.png --------------------------------------------------------------------------------