├── .gitignore
├── Docking
    ├── GUI
    │   ├── README.md
    │   ├── __init__.py
    │   ├── fast_top_hit_search.py
    │   ├── generate_images.py
    │   ├── overloaded_final_extraction.py
    │   ├── process_gui_data.py
    │   ├── run_search.sh
    │   ├── slurm_job_manager.py
    │   └── update_gui.sh
    ├── ML
    │   ├── DDCallbacks.py
    │   ├── DDMetrics.py
    │   ├── DDModel.py
    │   ├── DDModelExceptions.py
    │   ├── Models.py
    │   ├── Parser.py
    │   ├── Tokenizer.py
    │   ├── __init__.py
    │   ├── data_generator.py
    │   ├── lasso_regularizer.py
    │   ├── load_data.py
    │   ├── model_tuner.py
    │   ├── transformer_layers.py
    │   ├── tuner_config.txt
    │   └── utils.py
    ├── ProgressiveDocking
    │   ├── Extract_labels.py
    │   ├── Extracting_morgan.py
    │   ├── Extracting_smiles.py
    │   ├── Prediction_morgan_1024.py
    │   ├── __init__.py
    │   ├── activation_script.sh
    │   ├── autodock_gpu_ad.sh
    │   ├── check_phase.py
    │   ├── deactivation_script.sh
    │   ├── final_extraction.py
    │   ├── final_extraction.sh
    │   ├── hyperparameter_result_evaluation.py
    │   ├── jobid_writer.py
    │   ├── molecular_file_count_updated.py
    │   ├── optimize_models.py
    │   ├── phase_1.sh
    │   ├── phase_2.sh
    │   ├── phase_3.sh
    │   ├── phase_3_concluding_combination.sh
    │   ├── phase_4.sh
    │   ├── phase_5.sh
    │   ├── phase_a.sh
    │   ├── phase_changer.py
    │   ├── prepare_ligands_ad.sh
    │   ├── progressive_docking.py
    │   ├── reset.py
    │   ├── reset1.sh
    │   ├── reset2.sh
    │   ├── reset3.sh
    │   ├── reset4.sh
    │   ├── reset5.sh
    │   ├── sampling.py
    │   ├── sanity_check.py
    │   ├── settings.json
    │   ├── setup_slurm_specifications.py
    │   ├── simple_job_models.py
    │   ├── simple_job_predictions.py
    │   ├── slurm_file_manager.py
    │   ├── split_chunks.sh
    │   ├── split_sdf.py
    │   ├── util_functions.py
    │   └── venv_sanity_check.py
    └── __init__.py
├── GUI
    ├── README.md
    ├── package-lock.json
    ├── package.json
    ├── public
    │   ├── css
    │   │   └── oldSchool.css
    │   ├── img
    │   │   ├── Indicator_light_g.svg
    │   │   ├── Indicator_light_r.svg
    │   │   ├── Indicator_light_y.svg
    │   │   ├── close_button.svg
    │   │   ├── download_icon.svg
    │   │   ├── left_switch.svg
    │   │   ├── loading_svg.svg
    │   │   ├── loading_svg_ripple.svg
    │   │   ├── reload_icon.svg
    │   │   └── right_switch.svg
    │   ├── js
    │   │   ├── bundle.js
    │   │   └── bundle.js.map
    │   └── webp-img
    │   │   ├── 00b42403057e60520cb497d92556b982.png
    │   │   ├── 0638bec8443dd6e3385084884ed644a2.png
    │   │   ├── 1f0710a4a9c764c4801a6b0bbd1f6744.png
    │   │   ├── 2a9beabef112cd5d9b57edafe04ecd82.png
    │   │   ├── 8a9e1648449beda9d58190f1bc4a749e.png
    │   │   ├── c2e5bc8f9058ad350eed2e2559c63174.png
    │   │   ├── d2d023bf09910fa13a8d59977bad92dd.png
    │   │   └── d3698da8e2bda9d79e1bb514e2d600fa.png
    ├── server.py
    ├── src
    │   ├── __init__.py
    │   ├── backend
    │   │   ├── DataHistory.py
    │   │   ├── EmailBot.py
    │   │   ├── EventHandler.py
    │   │   ├── __init__.py
    │   │   ├── auto_ssh.py
    │   │   ├── backend.py
    │   │   ├── backend_exceptions.py
    │   │   ├── backend_sanity_check.py
    │   │   └── cluster_commands.py
    │   ├── index.js
    │   ├── login.js
    │   ├── mainPagejs
    │   │   ├── basics.js
    │   │   ├── models.js
    │   │   ├── progress.js
    │   │   ├── startarun.js
    │   │   └── topScoring.js
    │   └── test.js
    ├── templates
    │   ├── login.html
    │   ├── mainPage.html
    │   └── test.html
    └── webpack.config.js
├── LICENSE
├── README.md
├── __init__.py
├── installation
    ├── DeepDockingLocal.yml
    ├── fix_sh.sh
    ├── install-linux.sh
    ├── install-windows.bat
    ├── install.py
    └── welcome_message.txt
├── preparation_scripts
    ├── README.md
    ├── compute_morgan_fp.sh
    ├── morgan_fp.py
    └── prepare_receptor.sh
└── util
    ├── ProgressBar.py
    ├── __init__.py
    ├── __pycache__
        ├── ProgressBar.cpython-36.pyc
        ├── ProgressBar.cpython-38.pyc
        ├── __init__.cpython-36.pyc
        └── __init__.cpython-38.pyc
    └── figures
        ├── DDGUI-DD.png
        ├── Monitor.png
        ├── Progress.png
        ├── login_screen.png
        ├── models_full.png
        ├── new_project_info.png
        ├── progress_full.png
        ├── start_a_run_full.png
        └── top_scoring_full.png


/.gitignore:
--------------------------------------------------------------------------------
 1 | Scratch
 2 | GUI/node_modules
 3 | /venv
 4 | *.code-workspace
 5 | *.pyc
 6 | db.json
 7 | installation/installation_information.json
 8 | installation/*.out
 9 | activation_script.sh
10 | GUI/src/backend/projects/*.json
11 | __pycache__
12 | GUI/src/backend/__pycache__/
13 | GUI/__pycache__/
14 | util/__pycache__/__init__.cpython-36.pyc
15 | util/__pycache__/__init__.cpython-37.pyc
16 | util/__pycache__/ProgressBar.cpython-36.pyc
17 | util/__pycache__/ProgressBar.cpython-37.pyc
18 | 


--------------------------------------------------------------------------------
/Docking/GUI/README.md:
--------------------------------------------------------------------------------
1 | # GUI
2 | 
3 | While the GUI is running, these scripts will generate information for display and save it as a pickle file. 
4 | 


--------------------------------------------------------------------------------
/Docking/GUI/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/Docking/GUI/__init__.py


--------------------------------------------------------------------------------
/Docking/GUI/fast_top_hit_search.py:
--------------------------------------------------------------------------------
  1 | from multiprocessing import Pool
  2 | from contextlib import closing
  3 | import pandas as pd
  4 | import os
  5 | try:
  6 |     import __builtin__
  7 | except ImportError:
  8 |     # Python 3
  9 |     import builtins as __builtin__
 10 | 
 11 | # For debugging purposes only:
 12 | def print(*args, **kwargs):
 13 |     __builtin__.print('\t fast top hit search: ', end="")
 14 |     return __builtin__.print(*args, **kwargs)
 15 | 
 16 | 
 17 | def find_top_n_predicted_molecules(file_path):
 18 |     # Search through the predicted morgan files and find the top hits
 19 |     n = search_size
 20 |     n = min(100, n)  # Cap the number of molecules to 100
 21 | 
 22 |     # Read the CSV and extract the top hits
 23 |     df = pd.read_csv(file_path, names=['id', "score", ])
 24 |     top_n = df.nlargest(n, "score")
 25 | 
 26 |     # return a series of the top n predictions as a value in a dictionary where the key is the file it was found in
 27 |     return os.path.basename(file_path), top_n
 28 | 
 29 | 
 30 | def find_matching_smiles(smile_database_path, file_path, search_dict, itr):
 31 |     # Grab the targets we are looking for
 32 |     targets = search_dict[file_path]['id'].tolist()
 33 |     print("Debug: This process is searching for", targets, "in file", file_path)
 34 | 
 35 |     # Read the smile file corresponding to the predictions
 36 |     smile_file = os.path.join(smile_database_path, os.path.basename(file_path))
 37 |     df = pd.read_csv(smile_file, delimiter=" ", index_col=1)
 38 | 
 39 |     # Loop through the targets and check if it is found in the file
 40 |     with open(itr + "/top_hits.csv", "a") as top_hits:
 41 |         for target in targets:
 42 |             if target in df.index:
 43 |                 print("Found target:", target)
 44 |                 found_smile = df.loc[target, 'smiles']
 45 |                 # Write to the top_hits.csv file as: smile,id,score
 46 |                 top_hits.write(found_smile + "," + target + "\n")
 47 | 
 48 | 
 49 | if __name__ == '__main__':
 50 |     import argparse
 51 |     args = argparse.ArgumentParser()
 52 |     args.add_argument("-sdb", "--smile_database", required=True, type=str)
 53 |     args.add_argument("-pdb", "--predicted_database", required=True, type=str)
 54 |     args.add_argument("-tp", "--total_processors", required=True, type=int)
 55 |     args.add_argument("-n", required=True, type=int)
 56 |     info = args.parse_args()
 57 |     
 58 |     # Get the search size for each process
 59 |     prediction_files = [os.path.join(info.predicted_database, f) for f in os.listdir(info.predicted_database) if 'smile' in f]
 60 |     num_prediction_files = len(prediction_files)
 61 |     search_size = round(info.n/num_prediction_files)
 62 |     num_processes = min([info.total_processors, num_prediction_files])
 63 |     
 64 |     # Get the file path
 65 |     itr_path = str(info.predicted_database).replace("/morgan_1024_predictions", "")
 66 | 
 67 |     # Make sure we have the prediction files
 68 |     assert os.path.exists(info.predicted_database), print("Phase 5 Incomplete...")
 69 |     with open(itr_path + "/top_hits.csv", "w") as init_top_hits:
 70 |         init_top_hits.write("smile,id\n")
 71 | 
 72 |     print("Starting search...")
 73 |     print("We have the following arguments passed:")
 74 |     print("  - Number of files to search:", num_prediction_files)
 75 |     print("  - Number of molecules to find:", info.n)
 76 |     print("  - Search size:", search_size)
 77 |     print("  - Number of processes:", num_processes)
 78 |     print("  - Smile database:", info.smile_database)
 79 |     print("  - Predicted database:", info.predicted_database)
 80 | 
 81 |     print("Finding top predictions")
 82 |     # Search for the top predicted hits
 83 |     with closing(Pool(num_processes)) as pool:
 84 |         predicted = pool.map(find_top_n_predicted_molecules, prediction_files)
 85 |     print("  - Done")
 86 | 
 87 |     # Arrange all of the top predictions into a dictionary indexed by their file name
 88 |     search = {}
 89 |     print("Finding top smiles")
 90 |     for top_list in predicted:
 91 |         # {os.path.basename(file_path): top_n}
 92 |         file_name, predictions = top_list
 93 |         search[file_name] = predictions
 94 | 
 95 |     # Generate the args for the multiprocessing
 96 |     mp_args = []
 97 |     for key in search.keys():
 98 |         mp_args.append((info.smile_database, key, search, itr_path))
 99 | 
100 |     # Start searching for the top hits from the smile database
101 |     with closing(Pool(num_processes)) as pool:
102 |         pool.starmap(find_matching_smiles, mp_args)
103 |     print("  - Done")
104 | 
105 | 


--------------------------------------------------------------------------------
/Docking/GUI/generate_images.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def generate_model_image(model_path):
 5 |     model_name = os.path.basename(model_path)
 6 |     iteration = os.path.basename(model_path.replace("/all_models/" + model_name, ""))
 7 | 
 8 |     # Get the project name -> .../project_name/iteration_n/all_models/model_name -> [..., project_name, ., ., .]
 9 |     project_name = model_path.split("/")[-4]
10 |     file_name = "GUI/images/models/{}_{}_{}.png".format(project_name, model_name, iteration)
11 | 
12 |     # check if the image already exists... if it does then skip generating a new one
13 |     if not os.path.exists(file_name):
14 |         import tensorflow as tf
15 |         from tensorflow.keras.models import load_model
16 |         tf.keras.utils.plot_model(
17 |             load_model(model_path),
18 |             to_file=file_name,
19 |             show_shapes=True,
20 |             show_layer_names=True)
21 | 
22 |     # Grab the hyperparameter info
23 |     from ML.Parser import Parser
24 |     try:
25 |         info = Parser.parse_ddss(model_path + ".ddss")
26 |     except FileNotFoundError:
27 |         info = {}
28 |     print(file_name + "&&&" + str(info))
29 | 
30 | 
31 | def generate_molecule_image(path, limit=25):
32 |     from rdkit.Chem.Scaffolds import MurckoScaffold
33 |     from rdkit.Chem import MolFromSmiles
34 |     from rdkit.Chem.Draw import MolToImage
35 |     from PIL import ImageDraw
36 | 
37 |     if os.path.exists(path):
38 |         # Read the hits file
39 |         smiles = []
40 |         ids = []
41 |         with open(path, 'r') as top_hits:
42 |             for line_number, line in enumerate(top_hits.readlines()):
43 |                 if line_number >= limit:
44 |                     break
45 |                 smiles.append(line.split(" ")[0])
46 |                 ids.append(line.split(" ")[1])
47 | 
48 |         # Generate scaffold
49 |         for smile, mid in zip(smiles, ids):
50 |             mol = MurckoScaffold.GetScaffoldForMol(MolFromSmiles(smile))
51 |             image = MolToImage(mol)
52 | 
53 |             # Add text to the image
54 |             draw = ImageDraw.Draw(image)
55 |             draw.text((5, 5), mid, fill="black", align="right")
56 |             image.save("GUI/images/molecules/{}.png".format(smile))
57 |     else:
58 |         return
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     import argparse
63 |     import sys
64 |     sys.path.append(".")
65 | 
66 |     parser = argparse.ArgumentParser()
67 |     parser.add_argument("--image_of", '-imof')
68 |     parser.add_argument("--path_to_model")
69 |     parser.add_argument("--path_to_molecules")
70 |     args = parser.parse_args()
71 | 
72 |     try:
73 |         os.mkdir("GUI/images")
74 |         os.mkdir("GUI/images/molecules")
75 |         os.mkdir("GUI/images/models")
76 |     except OSError:
77 |         pass
78 | 
79 |     if args.image_of == 'model':
80 |         generate_model_image(args.path_to_model)
81 |     elif args.image_of in {"molec", "molecule"}:
82 |         generate_molecule_image(args.path_to_molecules)


--------------------------------------------------------------------------------
/Docking/GUI/overloaded_final_extraction.py:
--------------------------------------------------------------------------------
  1 | from multiprocessing import Pool
  2 | from contextlib import closing
  3 | import multiprocessing
  4 | import pandas as pd
  5 | import argparse
  6 | import random
  7 | import glob
  8 | import sys
  9 | import os
 10 | 
 11 | 
 12 | def merge_on_smiles(pred_file):
 13 |     print("Merging " + os.path.basename(pred_file) + "...", end=" ")
 14 | 
 15 |     # Read the predictions
 16 |     pred = pd.read_csv(pred_file, names=["id", "score"], index_col=0)
 17 |     pred.drop_duplicates()
 18 | 
 19 |     # Read the smiles
 20 |     smile_file = os.path.join(args.smile_dir, os.path.basename(pred_file))
 21 |     smi = pd.read_csv(smile_file, delimiter=" ", names=["smile", "id"], index_col=1)
 22 |     smi = smi.drop_duplicates()
 23 | 
 24 |     # Merge on the IDs and sort by the score
 25 |     merged = pd.merge(pred, smi, how="inner", on=["id"])
 26 |     merged.sort_values(by="score", ascending=False, inplace=True)
 27 | 
 28 |     # Save to a csv as (mean_score)_(base_name).csv
 29 |     size = len(merged)
 30 |     file_name = "extracted_smiles/" + str(size) + "_" + os.path.basename(pred_file) + ".csv"
 31 |     merged.to_csv(file_name)
 32 |     print("Done")
 33 | 
 34 |     return file_name
 35 | 
 36 | 
 37 | def kinda_merge_sort(f):
 38 |     # Unpack
 39 |     n, f1, f2 = f
 40 |     if n is None:
 41 |         print("Merging", f1, "with", f2, " - Non Terminal")
 42 |     else:
 43 |         print("Merging", f1, "with", f2, " - Terminal")
 44 | 
 45 |     # Combine f1 and f2 then sort the dataframe
 46 |     combined = pd.concat([pd.read_csv(f1, index_col=0),
 47 |                           pd.read_csv(f2, index_col=0)])
 48 |     combined.sort_values(by="score", ascending=False, inplace=True)
 49 | 
 50 |     # Remove the two files
 51 |     os.remove(f1)
 52 |     os.remove(f2)
 53 | 
 54 |     # If it is the final merge then get the top_n and save to csv
 55 |     if n is not None and n.lower() != "all":
 56 |         # If n != "all", then we should not take all of the top hits...
 57 |         combined = combined.head(int(n))
 58 |         # We will finalize our extraction by separating our combined dataframe into two new ones
 59 |         finalize(combined)
 60 |         return ""
 61 |     elif n is not None and n.lower() == "all":
 62 |         # We will finalize our extraction by separating our combined dataframe into two new ones
 63 |         finalize(combined)
 64 |         return ""
 65 |     else:
 66 |         # If it is not the final merge iteration, merge as usual
 67 |         # Generate a random key
 68 |         size = len(combined)
 69 |         key = str(size) + "-"
 70 |         for _ in range(30):
 71 |             key += str(random.randint(0, 9))
 72 | 
 73 |         f12 = "extracted_smiles/" + key + ".csv"
 74 |         combined.to_csv(f12)
 75 |         return f12
 76 | 
 77 | 
 78 | def finalize(combined):
 79 |     print("Finished... Saving")
 80 |     # Rearrange the smiles
 81 |     smiles = combined.drop('score', 1)
 82 |     smiles = smiles[["smile"]]
 83 |     print("Here is the smiles:")
 84 |     print(smiles.head())
 85 |     smiles.to_csv("smiles.csv", sep=" ")
 86 | 
 87 |     # Rearrange for id,score
 88 |     combined.drop("smile", 1, inplace=True)
 89 |     combined.to_csv("id_score.csv")
 90 |     print("Here are the ids and scores")
 91 |     print(combined.head())
 92 | 
 93 | 
 94 | if __name__ == '__main__':
 95 |     parser = argparse.ArgumentParser()
 96 |     parser.add_argument("-smile_dir", required=True)
 97 |     parser.add_argument("-morgan_dir", required=True)
 98 |     parser.add_argument("-processors", required=True)
 99 |     parser.add_argument("-mols_to_dock", required=False, default="all")
100 | 
101 |     args = parser.parse_args()
102 |     predictions = []
103 | 
104 |     for file in glob.glob(args.morgan_dir + "/*"):
105 |         if "smile" in os.path.basename(file):
106 |             predictions.append(file)
107 | 
108 |     print("Morgan Dir: " + args.morgan_dir)
109 |     print("Smile Dir: " + args.smile_dir)
110 |     print("Number Of Files: ", len(predictions))
111 |     # Sort the predictions
112 |     # Our name looks like -> smile_all_N.txt and we want N so we get:
113 |     #   smile_all_N.txt -> ["smile_all_N", "txt"] -> "smile_all_N" -> ["smile", "all", "N"] -> N
114 |     predictions.sort(key=lambda x: int(x.split(".")[0].split("_")[-1]))
115 | 
116 |     # combine the files
117 |     print("Finding smiles...")
118 |     print("Number of CPUs: " + str(multiprocessing.cpu_count()))
119 |     num_jobs = min(len(predictions), int(args.processors))
120 | 
121 |     # Try to create a directory for the smile CSVs
122 |     try:
123 |         print("Created 'extracted_smiles' Directory")
124 |         os.mkdir("extracted_smiles/")
125 |         with closing(Pool(num_jobs)) as pool:
126 |             file_paths = pool.map(merge_on_smiles, predictions)
127 |     except IOError:
128 |         print("The 'extracted_smiles' Directory Exists... Skipping initial merge.")
129 |         file_paths = ["extracted_smiles/" + f for f in os.listdir("extracted_smiles/")]
130 | 
131 |     # combine all files in the list and sort the values
132 |     print("Merging Complete - Concatenating all files...")
133 | 
134 |     # Run this mapping until we have only a single file left
135 |     # We merge each file in parallel and sort them
136 |     merging_iteration = 0
137 |     num_files = len(os.listdir("extracted_smiles/"))
138 |     is_final_iteration = False
139 |     while num_files > 1:
140 |         # Check if final iteration or if this merge is the final merge
141 |         top_n = None if num_files != 2 else args.mols_to_dock
142 |         merging_iteration += 1
143 |         print("Merging Iteration:", merging_iteration)
144 |         print("Files Remaining:", num_files)
145 |         print("Percent Complete:", round(1 / num_files, 3) * 100, "%")
146 | 
147 |         # Create the arguments to run the merge
148 |         merging_args = []
149 |         for i in range(len(file_paths) - 1, -1, -2):
150 |             if i - 1 >= 0:
151 |                 merging_args.append((top_n,
152 |                                      file_paths[i],
153 |                                      file_paths[i - 1]))
154 | 
155 |                 # Remove the file paths from the list since they have been combined
156 |                 file_paths.remove(file_paths[i])
157 |                 file_paths.remove(file_paths[i - 1])
158 | 
159 |         # Run the jobs and gather all of the file path
160 |         num_jobs = min(len(merging_args), int(args.processors))
161 |         with closing(Pool(num_jobs)) as pool:
162 |             file_paths += pool.map(kinda_merge_sort, merging_args)
163 | 
164 |         # Update the number of files
165 |         num_files = len(os.listdir("extracted_smiles/"))
166 | 
167 |     with open("final_phase.info", "w") as info:
168 |         info.write("Finished")


--------------------------------------------------------------------------------
/Docking/GUI/run_search.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -n 1
 3 | #SBATCH --job-name=smile_searching
 4 | #SBATCH --cpus-per-task=25
 5 | #SBATCH --output=slurm-%x.%j.out
 6 | #SBATCH --error=slurm-%x.%j.err
 7 | 
 8 | # Read input
 9 | project_path=$1
10 | n_cpus=$2
11 | iteration=$3
12 | n=$4
13 | 
14 | echo Args:
15 | echo Iteration: $iteration
16 | echo Total CPUs: $n_cpus
17 | echo Project Path: $project_path
18 | echo Project Name: $(basename "$project_path")
19 | echo Num Mols: $n
20 | 
21 | # Set constant
22 | smile_directory=`sed -n '5p' $project_path/logs.txt`
23 | 
24 | cd ..
25 | # This should activate the conda environment
26 | source ~/.bashrc
27 | source activation_script.sh
28 | 
29 | cd GUI
30 | python fast_top_hit_search.py -sdb $smile_directory -pdb $project_path/iteration_$iteration/morgan_1024_predictions -tp $n_cpus -n $n
31 | 


--------------------------------------------------------------------------------
/Docking/GUI/update_gui.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python3 "$2"/process_gui_data.py --project_path "$1" --pickle_path "$2" --current_user "$3"
3 | 


--------------------------------------------------------------------------------
/Docking/ML/DDCallbacks.py:
--------------------------------------------------------------------------------
  1 | """
  2 | James Gleave
  3 | v1.1.0
  4 | """
  5 | 
  6 | from tensorflow.keras.callbacks import Callback
  7 | import pandas as pd
  8 | import time
  9 | import os
 10 | 
 11 | 
 12 | class DDLogger(Callback):
 13 |     """
 14 |     Logs the important data regarding model training
 15 |     """
 16 | 
 17 |     def __init__(self, log_path,
 18 |                  max_time=36000,
 19 |                  max_epochs=500,
 20 |                  monitoring='val_loss', ):
 21 |         super(Callback, self).__init__()
 22 |         # Params
 23 |         self.max_time = max_time
 24 |         self.max_epochs = max_epochs
 25 |         self.monitoring = monitoring
 26 | 
 27 |         # Stats
 28 |         self.epoch_start_time = 0
 29 |         self.current_epoch = 0
 30 | 
 31 |         # File
 32 |         self.log_path = log_path
 33 |         self.model_history = {}
 34 | 
 35 |     def on_train_begin(self, logs={}):
 36 |         self.epoch_start_time = time.time()
 37 | 
 38 |     def on_epoch_begin(self, epoch, logs=None):
 39 |         self.epoch_start_time = time.time()
 40 | 
 41 |     def on_epoch_end(self, epoch, logs={}):
 42 |         # Store the data
 43 |         current_time = time.time()
 44 |         epoch_duration = current_time - self.epoch_start_time
 45 |         logs['time_per_epoch'] = epoch_duration
 46 |         self.model_history["epoch_" + str(epoch + 1)] = logs
 47 | 
 48 |         # Estimate time to completion
 49 |         estimate, elapsed, (s, p, x) = self.estimate_training_time()
 50 |         logs['estimate_time'] = estimate
 51 |         logs['time_elapsed'] = elapsed
 52 |         self.model_history["epoch_" + str(epoch + 1)] = logs
 53 | 
 54 |         # Save the data to a csv
 55 |         df = pd.DataFrame(self.model_history)
 56 |         df.to_csv(self.log_path)
 57 | 
 58 |         print("Time taken calculating callbacks:", time.time()-current_time)
 59 | 
 60 |     def estimate_training_time(self):
 61 |         max_allotted_time = self.max_time
 62 |         max_allotted_epochs = self.max_epochs
 63 | 
 64 |         # Grab the info about the model
 65 |         model_loss = []
 66 |         time_per_epoch = []
 67 |         for epoch in self.model_history:
 68 |             model_loss.append(self.model_history[epoch]['val_loss'])
 69 |             time_per_epoch.append(self.model_history[epoch]['time_per_epoch'])
 70 | 
 71 |         time_elapsed = sum(time_per_epoch)
 72 |         average_time_per_epoch = sum(time_per_epoch) / len(time_per_epoch)
 73 |         current_epoch = len(time_per_epoch)
 74 | 
 75 |         # Find out if the model is approaching an early stop
 76 |         epochs_until_early_stop = 10
 77 |         stopping_vector = []
 78 |         prev_loss = model_loss[0]
 79 |         for loss in model_loss:
 80 |             improved = loss < prev_loss
 81 |             stopping_vector.append(improved)
 82 |             if improved:
 83 |                 prev_loss = loss
 84 | 
 85 |         # Check how close we are to an early stop
 86 |         longest_failure = 0
 87 |         for improved in stopping_vector:
 88 |             if not improved:
 89 |                 longest_failure += 1
 90 |             else:
 91 |                 longest_failure = 0
 92 | 
 93 |         max_time = max_allotted_epochs * average_time_per_epoch if max_allotted_epochs * average_time_per_epoch < max_allotted_time else max_allotted_time
 94 |         time_if_early_stop = (epochs_until_early_stop - longest_failure) * average_time_per_epoch
 95 | 
 96 |         # Estimate a completion time
 97 |         loss_drops = stopping_vector.count(True)
 98 |         loss_gains = len(stopping_vector) - loss_drops
 99 |         try:
100 |             gain_drop_ratio = loss_gains / loss_drops
101 |         except ZeroDivisionError:
102 |             gain_drop_ratio = 0
103 | 
104 |         # Created a function to estimate training time
105 |         power = 1 - (gain_drop_ratio ** 3 / 5)
106 |         time_estimate = (max_time ** power) / (1 + longest_failure)
107 | 
108 |         # Smooth out the estimate
109 |         if current_epoch > 1:
110 |             last = self.model_history['epoch_{}'.format(current_epoch - 1)]['estimate_time']
111 |             time_estimate = (time_estimate + last) / 2
112 | 
113 |         # If the time estimate surpasses the max time then just show the max time
114 |         time_for_remaining_epochs = (self.max_epochs - current_epoch) * average_time_per_epoch
115 |         if time_for_remaining_epochs < time_estimate:
116 |             time_estimate = time_for_remaining_epochs
117 | 
118 |         return time_estimate, time_elapsed, (longest_failure, gain_drop_ratio, max_time)
119 | 
120 | 
121 | 
122 | 


--------------------------------------------------------------------------------
/Docking/ML/DDMetrics.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | from tensorflow.keras import backend as K
  5 | 
  6 | 
  7 | def recall(y_true, y_pred):
  8 |     true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
  9 |     possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
 10 |     recall_keras = true_positives / (possible_positives + K.epsilon())
 11 |     return recall_keras
 12 | 
 13 | 
 14 | def precision(y_true, y_pred):
 15 |     true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
 16 |     predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
 17 |     precision_keras = true_positives / (predicted_positives + K.epsilon())
 18 |     return precision_keras
 19 | 
 20 | 
 21 | def specificity(y_true, y_pred):
 22 |     tn = K.sum(K.round(K.clip((1 - y_true) * (1 - y_pred), 0, 1)))
 23 |     fp = K.sum(K.round(K.clip((1 - y_true) * y_pred, 0, 1)))
 24 |     return tn / (tn + fp + K.epsilon())
 25 | 
 26 | 
 27 | def negative_predictive_value(y_true, y_pred):
 28 |     tn = K.sum(K.round(K.clip((1 - y_true) * (1 - y_pred), 0, 1)))
 29 |     fn = K.sum(K.round(K.clip(y_true * (1 - y_pred), 0, 1)))
 30 |     return tn / (tn + fn + K.epsilon())
 31 | 
 32 | 
 33 | def f1(y_true, y_pred):
 34 |     p = precision(y_true, y_pred)
 35 |     r = recall(y_true, y_pred)
 36 |     return 2 * ((p * r) / (p + r + K.epsilon()))
 37 | 
 38 | 
 39 | def fbeta(y_true, y_pred, beta=2):
 40 |     y_pred = K.clip(y_pred, 0, 1)
 41 | 
 42 |     tp = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)), axis=1)
 43 |     fp = K.sum(K.round(K.clip(y_pred - y_true, 0, 1)), axis=1)
 44 |     fn = K.sum(K.round(K.clip(y_true - y_pred, 0, 1)), axis=1)
 45 | 
 46 |     p = tp / (tp + fp + K.epsilon())
 47 |     r = tp / (tp + fn + K.epsilon())
 48 | 
 49 |     num = (1 + beta ** 2) * (p * r)
 50 |     den = (beta ** 2 * p + r + K.epsilon())
 51 |     return K.mean(num / den)
 52 | 
 53 | 
 54 | def matthews_correlation_coefficient(y_true, y_pred):
 55 |     tp = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
 56 |     tn = K.sum(K.round(K.clip((1 - y_true) * (1 - y_pred), 0, 1)))
 57 |     fp = K.sum(K.round(K.clip((1 - y_true) * y_pred, 0, 1)))
 58 |     fn = K.sum(K.round(K.clip(y_true * (1 - y_pred), 0, 1)))
 59 | 
 60 |     num = tp * tn - fp * fn
 61 |     den = (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)
 62 |     return num / K.sqrt(den + K.epsilon())
 63 | 
 64 | 
 65 | def equal_error_rate(y_true, y_pred):
 66 |     n_imp = tf.count_nonzero(tf.equal(y_true, 0), dtype=tf.float32) + tf.constant(K.epsilon())
 67 |     n_gen = tf.count_nonzero(tf.equal(y_true, 1), dtype=tf.float32) + tf.constant(K.epsilon())
 68 | 
 69 |     scores_imp = tf.boolean_mask(y_pred, tf.equal(y_true, 0))
 70 |     scores_gen = tf.boolean_mask(y_pred, tf.equal(y_true, 1))
 71 | 
 72 |     loop_vars = (tf.constant(0.0), tf.constant(1.0), tf.constant(0.0))
 73 |     cond = lambda t, fpr, fnr: tf.greater_equal(fpr, fnr)
 74 |     body = lambda t, fpr, fnr: (
 75 |         t + 0.001,
 76 |         tf.divide(tf.count_nonzero(tf.greater_equal(scores_imp, t), dtype=tf.float32), n_imp),
 77 |         tf.divide(tf.count_nonzero(tf.less(scores_gen, t), dtype=tf.float32), n_gen)
 78 |     )
 79 |     t, fpr, fnr = tf.while_loop(cond, body, loop_vars, back_prop=False)
 80 |     eer = (fpr + fnr) / 2
 81 | 
 82 |     return eer
 83 | 
 84 | 
 85 | def get_metric(name):
 86 |     metrics = {"recall": tf.keras.metrics.Recall(),
 87 |                "precision": tf.keras.metrics.Precision(),
 88 |                "specificity": specificity,
 89 |                "negative_predictive_value": negative_predictive_value,
 90 |                "f1": f1,
 91 |                "fbeta": fbeta,
 92 |                "equal_error_rate": equal_error_rate,
 93 |                "matthews_correlation_coefficient": matthews_correlation_coefficient}
 94 |     keys = list(metrics.keys())
 95 |     assert name in keys, print("Cannot find metric " + name, ". Available metrics are {}".format(keys))
 96 |     return metrics[name]
 97 | 
 98 | 
 99 | class DDMetrics:
100 |     def __init__(self, model):
101 |         self.model = model
102 |         self.params = model.count_params()
103 | 
104 |     @staticmethod
105 |     def scaled_performance(y_true, y_pred):
106 |         p = precision(y_true, y_pred)
107 |         f = f1(y_true, y_pred)
108 |         return ((p*p) + (f*f))/2
109 | 
110 |     def relative_scaled_performance(self, y_true, y_pred):
111 |         params = self.params / 1_000_000
112 |         sp = self.scaled_performance(y_true, y_pred)
113 |         return sp/(1.03 ** params)
114 | 
115 |     def relative_precision(self, y_true, y_pred):
116 |         p = precision(y_true, y_pred)
117 |         params = self.params / 1_000_000
118 |         return p/params
119 | 


--------------------------------------------------------------------------------
/Docking/ML/DDModelExceptions.py:
--------------------------------------------------------------------------------
 1 | class Error(Exception):
 2 |     """Base class for other exceptions"""
 3 |     pass
 4 | 
 5 | 
 6 | class IncorrectModelModeError(Error):
 7 |     """Exception raised for errors in the model mode.
 8 | 
 9 |     Attributes:
10 |         mode -- input mode which caused the error
11 |         message -- explanation of the error
12 |     """
13 |     def __init__(self, mode, available_modes, message="Incorrect model mode. Use one of the following modes:"):
14 |         self.mode = mode
15 |         self.message = message
16 |         self.available_modes = available_modes
17 | 
18 |     def __str__(self):
19 |         mode_string = "\n\n"
20 |         for mode in self.available_modes:
21 |             mode_string += " " + mode + "\n"
22 | 
23 |         return f'{self.mode} -> {self.message}' + mode_string
24 | 


--------------------------------------------------------------------------------
/Docking/ML/Parser.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Version 1.1.2
 3 | """
 4 | import pandas as pd
 5 | import numpy as np
 6 | 
 7 | 
 8 | class Parser:
 9 | 
10 |     @staticmethod
11 |     def parse_ddss(path):
12 | 
13 |         architecture = {}
14 |         hyperparameters = {}
15 |         history = {}
16 |         time = {}
17 |         info = {'time': time, 'history': history, 'hyperparameters': hyperparameters, 'architecture': architecture}
18 | 
19 |         with open(path, 'r') as ddss_file:
20 |             lines = ddss_file.readlines()
21 |             lines.remove('\n')
22 | 
23 |             for i, line in enumerate(lines):
24 |                 line = line.strip('\n')
25 | 
26 |                 # Get the model name
27 |                 if 'Model mode' in line:
28 |                     info['name'] = line.split()[-1]
29 | 
30 |                 # Get the model timings
31 |                 if 'training_time' in line:
32 |                     split_line = line.split()
33 |                     time['training_time'] = float(split_line[-1])
34 |                 if 'prediction_time' in line:
35 |                     split_line = line.split()
36 |                     time['prediction_time'] = float(split_line[-1])
37 | 
38 |                 # Get the history stats
39 |                 if 'History Stats' in line:
40 |                     #  Grab everything under the history set
41 |                     for sub_line in lines[i + 1:]:  # search the sub lines under history
42 |                         if '-' not in sub_line or 'Model has not been trained yet' in sub_line:
43 |                             break
44 |                         else:  # Split up the lines and stores the values
45 |                             split_line = sub_line.split()[1:]
46 |                             history_key = split_line[0].replace(":", "")
47 | 
48 |                             value = []
49 |                             for v in split_line[1:]:
50 |                                 value.append(float(v.strip(",").strip('[').strip(']')))
51 | 
52 |                             # If the list has one value, it should be closed to a scalar
53 |                             if len(value) == 1:
54 |                                 history[history_key] = value[0]
55 |                             else:
56 |                                 history[history_key] = value
57 | 
58 |                 # Get the history stats
59 |                 if 'Hyperparameter Stats' in line:
60 |                     # search the sub lines under history
61 |                     for sub_line in lines[i + 1:]:
62 |                         if '-' not in sub_line or 'Model has not been trained yet' in sub_line:
63 |                             break
64 |                         else:
65 |                             sub_line = sub_line.strip(" - ").strip("\n").strip(" ").split(":")
66 |                             key = sub_line[0].strip(" ")
67 |                             value = sub_line[1].strip(" ")
68 | 
69 |                             if '[' in value:
70 |                                 value_list = []
71 |                                 for char in value:
72 |                                     if char.isnumeric():
73 |                                         value_list.append(int(char))
74 |                                 value = value_list
75 |                             else:
76 |                                 try:
77 |                                     value = float(value)
78 |                                 except ValueError:
79 |                                     # If this value error occurs, it is because it has found the non-decimal
80 |                                     # hyperparameters
81 |                                     value = value
82 | 
83 |                             hyperparameters[key] = value
84 | 
85 |                 if 'total_params' in line or 'trainable_params' in line or 'total_params' in line:
86 |                     if "Cannot be determined" not in line:
87 |                         sub_line = line.strip(" - ").strip("\n").strip(" ").split(":")
88 |                         architecture[sub_line[0]] = int(sub_line[1].replace(",", ""))
89 | 
90 |         return info
91 | 
92 |     @staticmethod
93 |     def ddss_to_csv(path):
94 |         info = Parser.parse_ddss(path)
95 |         df = pd.DataFrame()
96 |         for key in info.keys():
97 |             print(info[key])
98 | 


--------------------------------------------------------------------------------
/Docking/ML/Tokenizer.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.keras.preprocessing.text import Tokenizer
 2 | from tensorflow.keras.preprocessing.sequence import pad_sequences
 3 | import numpy as np
 4 | 
 5 | 
 6 | class DDTokenizer:
 7 |     def __init__(self, num_words, oov_token='<UNK>'):
 8 |         self.tokenizer = Tokenizer(num_words=num_words,
 9 |                                    oov_token=oov_token,
10 |                                    filters='!"#$%&*+,-./:;<>?\\^_`{|}~\t\n',
11 |                                    char_level=True,
12 |                                    lower=False)
13 |         self.has_trained = False
14 | 
15 |         self.pad_type = 'post'
16 |         self.trunc_type = 'post'
17 | 
18 |         # The encoded data
19 |         self.word_index = {}
20 | 
21 |     def fit(self, train_data):
22 |         # Get max training sequence length
23 |         print("Training Tokenizer...")
24 |         self.tokenizer.fit_on_texts(train_data)
25 |         self.has_trained = True
26 |         print("Done training...")
27 | 
28 |         # Get our training data word index
29 |         self.word_index = self.tokenizer.word_index
30 | 
31 |     def encode(self, data, use_padding=True, padding_size=None, normalize=False):
32 |         # Encode training data sentences into sequences
33 |         train_sequences = self.tokenizer.texts_to_sequences(data)
34 | 
35 |         # Get max training sequence length if there is none passed
36 |         if padding_size is None:
37 |             maxlen = max([len(x) for x in train_sequences])
38 |         else:
39 |             maxlen = padding_size
40 | 
41 |         if use_padding:
42 |             train_sequences = pad_sequences(train_sequences, padding=self.pad_type,
43 |                                             truncating=self.trunc_type, maxlen=maxlen)
44 | 
45 |         if normalize:
46 |             train_sequences = np.multiply(1/len(self.tokenizer.word_index), train_sequences)
47 | 
48 |         return train_sequences
49 | 
50 |     def pad(self, data, padding_size=None):
51 |         # Get max training sequence length if there is none passed
52 |         if padding_size is None:
53 |             padding_size = max([len(x) for x in data])
54 | 
55 |         padded_sequence = pad_sequences(data, padding=self.pad_type,
56 |                                         truncating=self.trunc_type, maxlen=padding_size)
57 | 
58 |         return padded_sequence
59 | 
60 |     def decode(self, array):
61 |         assert self.has_trained, "Train this tokenizer before decoding a string."
62 |         return self.tokenizer.sequences_to_texts(array)
63 | 
64 |     def test(self, string):
65 |         encoded = list(self.encode(string)[0])
66 |         decoded = self.decode(self.encode(string))
67 | 
68 |         print("\nEncoding:")
69 |         print("{original} -> {encoded}".format(original=string[0], encoded=encoded))
70 |         print("\nDecoding:")
71 |         print("{original} -> {encoded}".format(original=encoded, encoded=decoded[0].replace(" ", "")))
72 | 
73 |     def get_info(self):
74 |         return self.tokenizer.index_word
75 | 
76 | 


--------------------------------------------------------------------------------
/Docking/ML/__init__.py:
--------------------------------------------------------------------------------
1 | # from .DDModel import DDModel
2 | # from .Models import Models
3 | # import ML.Parser
4 | 


--------------------------------------------------------------------------------
/Docking/ML/data_generator.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import numpy as np
 3 | 
 4 | 
 5 | class DDGenerator:
 6 |     """
 7 |     A data generator
 8 |     """
 9 |     def __init__(self, train_features, train_labels, val_features, val_labels, batch_size, cutoff):
10 |         self.cutoff = cutoff
11 |         self.batch_size = batch_size
12 | 
13 |         # The path to the training set and labels
14 |         self.train_labels_csv = train_labels
15 |         self.train_features_csv = train_features
16 | 
17 |         # The path to the validation set and labels
18 |         self.val_features_csv = val_features
19 |         self.val_labels_csv = val_labels
20 | 
21 |     def train_flow(self):
22 |         with open(self.train_features_csv, "r") as csv1, open(self.train_labels_csv, "r") as csv2:
23 |             reader1 = csv.reader(csv1)
24 |             reader2 = csv.reader(csv2)
25 |             for row1, row2 in zip(reader1, reader2):
26 |                 try:
27 |                     fp = [None] * self.batch_size
28 |                     score = [None] * self.batch_size
29 |                     for i in range(self.batch_size):
30 |                         morgan = np.array(self.decompress_morgan(row1[1:]))
31 |                         label = float(row2[0]) > self.cutoff
32 |                         fp[i] = morgan
33 |                         score[i] = label
34 |                     yield np.array(fp).reshape((self.batch_size, 1024)), np.array(score)
35 |                 except ValueError:
36 |                     yield
37 | 
38 |     def val_flow(self):
39 |         with open(self.val_features_csv, "r") as csv1, open(self.val_labels_csv, "r") as csv2:
40 |             reader1 = csv.reader(csv1)
41 |             reader2 = csv.reader(csv2)
42 |             for row1, row2 in zip(reader1, reader2):
43 |                 try:
44 |                     fp = [None] * self.batch_size
45 |                     score = [None] * self.batch_size
46 |                     for i in range(self.batch_size):
47 |                         morgan = np.array(self.decompress_morgan(row1[1:]))
48 |                         label = float(row2[0]) > self.cutoff
49 |                         fp[i] = morgan
50 |                         score[i] = label
51 |                     yield np.array(fp).reshape((self.batch_size, 1024)), np.array(score)
52 |                 except ValueError:
53 |                     yield
54 | 
55 |     # Decompress a morgan fingerprint from the dataset
56 |     def decompress_morgan(self, mol_info):
57 |         # ID_labels is a dataframe containing the zincIDs and their corresponding scores.
58 |         morgan = np.zeros(1024, dtype=int)
59 | 
60 |         # "Decompressing" the information from the file about where the 1s are on the 1024 bit vector.
61 |         # array of indexes of the binary 1s in the 1024 bit vector representing the morgan fingerprint
62 |         bit_indices = mol_info
63 |         for elem in bit_indices:
64 |             morgan[int(elem)] = 1
65 | 
66 |         return morgan
67 | 
68 | 
69 | def keras_generator_test(f="", lbl=""):
70 |     from tensorflow.keras.layers import Dense
71 |     from tensorflow.keras.models import Input, Model
72 | 
73 |     inputs = Input(shape=[1024])
74 |     x = inputs
75 |     x = Dense(10000, activation='relu')(x)
76 |     output = Dense(1, activation='sigmoid')(x)
77 | 
78 |     model = Model(inputs=inputs, outputs=output)
79 |     model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
80 |     model.summary()
81 | 
82 |     generator = DDGenerator(train_features=f, train_labels=lbl, val_features=f, val_labels=lbl, cutoff=-10,
83 |                             batch_size=32)
84 |     model.fit_generator(generator=generator.train_flow(), validation_data=generator.val_flow(), steps_per_epoch=100,
85 |                         validation_steps=100)
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------
/Docking/ML/lasso_regularizer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from tensorflow.keras import backend as K
 3 | from tensorflow.keras.regularizers import Regularizer
 4 | 
 5 | 
 6 | class Lasso(Regularizer):
 7 |     """Regularizer for L21 regularization.
 8 |     # Arguments
 9 |         C: Float; L21 regularization factor.
10 |     """
11 | 
12 |     def __init__(self, C=0.):
13 |         self.C = K.cast_to_floatx(C)
14 | 
15 |     def __call__(self, x):
16 |         const_coeff = np.sqrt(K.int_shape(x)[1])
17 |         return self.C*const_coeff*K.sum(K.sqrt(K.sum(K.square(x), axis=1)))
18 | 
19 |     def get_config(self):
20 |         return {'C': float(self.C)}


--------------------------------------------------------------------------------
/Docking/ML/load_data.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.utils import shuffle
 3 | 
 4 | 
 5 | def load(path_train, path_test):
 6 |     loaded_train = shuffle(pd.read_pickle(path_train))
 7 |     loaded_test = shuffle(pd.read_pickle(path_test))
 8 | 
 9 |     train_x = loaded_train.morgan_fingerprint
10 |     train_y = loaded_train.docking_score
11 | 
12 |     test_x = loaded_test.morgan_fingerprint
13 |     test_y = loaded_test.docking_score
14 | 
15 |     data = DataContainer(train_x, train_y, test_x, test_y)
16 |     return data
17 | 
18 | 
19 | def load_data():
20 |     """These function will load up the data like as done in phase 4 and 5 """
21 |     pass
22 | 
23 | 
24 | class DataContainer:
25 |     def __init__(self, train_x, train_y, test_x, test_y):
26 |         self.train_x = train_x
27 |         self.train_y = train_y
28 |         self.test_x = test_x
29 |         self.test_y = test_y
30 | 
31 |     def __repr__(self):
32 |         print("Features:", self.train_x.name, "-> Lables:", self.train_y.name)
33 |         print("Train Size:", len(self.train_x))
34 |         print("Test Size:", len(self.test_x))
35 |         print("Hit/Miss ratio:", sum([1 if x else 0 for x in self.train_y])/len(self.train_y) * 100, "%")
36 |         return ""
37 | 
38 |     def __call__(self, *args, **kwargs):
39 |         return self.train_x, self.train_y, self.test_x, self.test_y
40 | 


--------------------------------------------------------------------------------
/Docking/ML/model_tuner.py:
--------------------------------------------------------------------------------
 1 | from Docking.ML.DDModel import DDModel
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | import kerastuner as kt
 5 | from Docking.ML.utils import *
 6 | import Docking.ML.load_data
 7 | import IPython
 8 | 
 9 | 
10 | class ClearTrainingOutput(tf.keras.callbacks.Callback):
11 |     def on_train_end(*args, **kwargs):
12 |         IPython.display.clear_output(wait=True)
13 | 
14 | 
15 | if __name__ == '__main__':
16 |     # Load config
17 |     print("Loading Config...")
18 |     config = read_tuner_config('tuner_config.txt')
19 |     # For hyper band tuner
20 |     train_path, test_path = config['training_path'], config['testing_path']
21 |     directory = config['directory']
22 |     project_name = config['project_name']
23 |     objective = config['objective']
24 |     max_trials = config['max_trials']
25 |     max_epochs = config['max_epochs']
26 |     factor = config['factor']
27 |     hyperband_iterations = config['hyperband_iterations']
28 |     direction = config['direction']
29 | 
30 |     # For search
31 |     steps_per_epoch = config['steps_per_epoch']
32 |     validation_steps = config['validation_steps']
33 |     epochs = config['epochs']
34 |     batch_size = config['batch_size']
35 | 
36 |     print("Loading Dataset...")
37 |     data = Docking.ML.load_data.load(train_path, test_path)
38 |     train_x, train_y, test_x, test_y = data()
39 |     train_x, train_y = train_x.tolist(), train_y.tolist()
40 |     test_x, test_y = test_x.tolist(), test_y.tolist()
41 |     tr_x = np.array(train_x)
42 |     tr_y = np.array(train_y)
43 | 
44 |     tx = np.array(test_x)
45 |     ty = np.array(test_y)
46 | 
47 |     tuner = kt.BayesianOptimization(DDModel.build_tuner_model,
48 |                                     objective=kt.Objective(objective, direction),
49 |                                     project_name=project_name,
50 |                                     directory=directory,
51 |                                     max_trials=max_trials)
52 | 
53 |     tuner.search_space_summary()
54 |     tuner.search(tr_x, tr_y,
55 |                  validation_data=(tx, ty), epochs=epochs, batch_size=batch_size,
56 |                  class_weight={0: 2, 1: 1},
57 |                  callbacks=[tf.keras.callbacks.EarlyStopping(monitor=objective,
58 |                                                              min_delta=0,
59 |                                                              patience=3,
60 |                                                              verbose=0,
61 |                                                              mode=direction)])
62 | 
63 |     print("Done...")
64 | 
65 |     # Show a summary of the search
66 |     tuner.results_summary()
67 | 
68 |     # Retrieve the best 3 models.
69 | 
70 |     # 1
71 |     best_hyperparameters = tuner.get_best_hyperparameters(1)[0]
72 |     best_model = tuner.get_best_models(num_models=1)[0]
73 |     print("Saving best model...")
74 | 
75 |     model_location = config['model_location'] + "/"
76 |     model = DDModel.load(best_model, kt_hyperparameters=best_hyperparameters)
77 |     model.save(model_location + project_name + "_1st_" + objective + "_" + direction, json=True)
78 | 
79 |     # 2
80 |     best_hyperparameters = tuner.get_best_hyperparameters(2)[1]
81 |     best_model = tuner.get_best_models(num_models=2)[1]
82 |     print("Saving best model...")
83 | 
84 |     model_location = config['model_location'] + "/"
85 |     model = DDModel.load(best_model, kt_hyperparameters=best_hyperparameters)
86 |     model.save(model_location + project_name + "_2nd_" + objective + "_" + direction, json=True)
87 | 
88 |     # 3
89 |     best_hyperparameters = tuner.get_best_hyperparameters(3)[2]
90 |     best_model = tuner.get_best_models(num_models=3)[2]
91 |     print("Saving best model...")
92 | 
93 |     model_location = config['model_location'] + "/"
94 |     model = DDModel.load(best_model, kt_hyperparameters=best_hyperparameters)
95 |     model.save(model_location + project_name + "_3rd_" + objective + "_" + direction, json=True)
96 | 
97 |     print("Saved!")
98 | 


--------------------------------------------------------------------------------
/Docking/ML/transformer_layers.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Version 1.0.0
  3 | 
  4 | Attempts at making a transformer model.
  5 | This needs to be redone in the future!
  6 | """
  7 | 
  8 | import tensorflow as tf
  9 | from tensorflow import keras
 10 | from tensorflow.keras import layers
 11 | 
 12 | 
 13 | class MultiHeadSelfAttention(layers.Layer):
 14 |     def __init__(self, embed_dim, num_heads=8, **kwargs):
 15 |         super(MultiHeadSelfAttention, self).__init__()
 16 |         self.embed_dim = embed_dim
 17 |         self.num_heads = num_heads
 18 |         if embed_dim % num_heads != 0:
 19 |             raise ValueError(
 20 |                 f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
 21 |             )
 22 |         self.projection_dim = embed_dim // num_heads
 23 |         self.query_dense = layers.Dense(embed_dim)
 24 |         self.key_dense = layers.Dense(embed_dim)
 25 |         self.value_dense = layers.Dense(embed_dim)
 26 |         self.combine_heads = layers.Dense(embed_dim)
 27 | 
 28 |     def get_config(self):
 29 |         config = super().get_config().copy()
 30 |         config.update({
 31 |             'embed_dim': self.embed_dim,
 32 |             'num_heads': self.num_heads,
 33 |         })
 34 |         return config
 35 | 
 36 |     def attention(self, query, key, value):
 37 |         score = tf.matmul(query, key, transpose_b=True)
 38 |         dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
 39 |         scaled_score = score / tf.math.sqrt(dim_key)
 40 |         weights = tf.nn.softmax(scaled_score, axis=-1)
 41 |         output = tf.matmul(weights, value)
 42 |         return output, weights
 43 | 
 44 |     def separate_heads(self, x, batch_size):
 45 |         x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
 46 |         return tf.transpose(x, perm=[0, 2, 1, 3])
 47 | 
 48 |     def call(self, inputs):
 49 |         # x.shape = [batch_size, seq_len, embedding_dim]
 50 |         batch_size = tf.shape(inputs)[0]
 51 |         query = self.query_dense(inputs)  # (batch_size, seq_len, embed_dim)
 52 |         key = self.key_dense(inputs)  # (batch_size, seq_len, embed_dim)
 53 |         value = self.value_dense(inputs)  # (batch_size, seq_len, embed_dim)
 54 |         query = self.separate_heads(
 55 |             query, batch_size
 56 |         )  # (batch_size, num_heads, seq_len, projection_dim)
 57 |         key = self.separate_heads(
 58 |             key, batch_size
 59 |         )  # (batch_size, num_heads, seq_len, projection_dim)
 60 |         value = self.separate_heads(
 61 |             value, batch_size
 62 |         )  # (batch_size, num_heads, seq_len, projection_dim)
 63 |         attention, weights = self.attention(query, key, value)
 64 |         attention = tf.transpose(
 65 |             attention, perm=[0, 2, 1, 3]
 66 |         )  # (batch_size, seq_len, num_heads, projection_dim)
 67 |         concat_attention = tf.reshape(
 68 |             attention, (batch_size, -1, self.embed_dim)
 69 |         )  # (batch_size, seq_len, embed_dim)
 70 |         output = self.combine_heads(
 71 |             concat_attention
 72 |         )  # (batch_size, seq_len, embed_dim)
 73 |         return output
 74 | 
 75 | 
 76 | class TransformerBlock(layers.Layer):
 77 |     def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
 78 |         super(TransformerBlock, self).__init__()
 79 |         self.att = MultiHeadSelfAttention(embed_dim, num_heads)
 80 |         self.ffn = keras.Sequential(
 81 |             [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
 82 |         )
 83 |         self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
 84 |         self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
 85 |         self.dropout1 = layers.Dropout(rate)
 86 |         self.dropout2 = layers.Dropout(rate)
 87 | 
 88 |         self.embed_dim = embed_dim
 89 |         self.num_heads = num_heads
 90 |         self.ff_dim = ff_dim
 91 |         self.rate = rate
 92 | 
 93 |     def get_config(self):
 94 |         config = super().get_config().copy()
 95 |         config.update({
 96 |             'embed_dim': self.embed_dim,
 97 |             'num_heads': self.num_heads,
 98 |             'ff_dim': self.ff_dim,
 99 |             'rate': self.rate
100 |         })
101 |         return config
102 | 
103 |     def call(self, inputs, training):
104 |         attn_output = self.att(inputs)
105 |         attn_output = self.dropout1(attn_output, training=training)
106 |         out1 = self.layernorm1(inputs + attn_output)
107 |         ffn_output = self.ffn(out1)
108 |         ffn_output = self.dropout2(ffn_output, training=training)
109 |         return self.layernorm2(out1 + ffn_output)
110 | 
111 | 
112 | class TokenAndPositionEmbedding(layers.Layer):
113 |     def __init__(self, maxlen, vocab_size, embed_dim, **kwargs):
114 |         super(TokenAndPositionEmbedding, self).__init__()
115 | 
116 |         self.maxlen = maxlen
117 |         self.vocab_size = vocab_size
118 |         self.embed_dim = embed_dim
119 | 
120 |         self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
121 |         self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
122 | 
123 |     def get_config(self):
124 |         config = super().get_config().copy()
125 |         config.update({
126 |             'maxlen': self.maxlen,
127 |             'vocab_size': self.vocab_size,
128 |             'embed_dim': self.vocab_size,
129 |         })
130 |         return config
131 | 
132 |     def call(self, x):
133 |         maxlen = tf.shape(x)[-1]
134 |         positions = tf.range(start=0, limit=maxlen, delta=1)
135 |         positions = self.pos_emb(positions)
136 |         x = self.token_emb(x)
137 |         return x + positions
138 | 


--------------------------------------------------------------------------------
/Docking/ML/tuner_config.txt:
--------------------------------------------------------------------------------
 1 | <<Tuner Parameters>>
 2 |     <<The paths to the data>>
 3 |     training_path=
 4 |     testing_path=
 5 | 
 6 |     <<The path to a directory where the output files will be stored>>
 7 |     directory=
 8 | 
 9 |     <<Model storage location - this will be where the final json of the model will be saved>>
10 |     model_location=
11 | 
12 |     <<The project name>>
13 |     project_name=
14 | 
15 |     <<The objective for the tuner. It is the name of model metric to minimize or maximize>>
16 |     objective=val_loss
17 | 
18 |     <<The direction of the objective... that is to min or max the objective>>
19 |     direction=min
20 | 
21 |     <<Total number of trials (model configurations) to test at most>>
22 |     max_trials=10
23 | 
24 |     <<The maximum number of epochs to train one model. It is recommended to use early stopping>>
25 |     max_epochs=15
26 | 
27 |     <<Reduction factor for the number of epochs and number of models for each bracket>>
28 |     factor=3
29 | 
30 |     <<The number of hyperband iterations to perform>>
31 |     hyperband_iterations=3
32 | 
33 | 
34 | <<Search Parameters>>
35 |     steps_per_epoch=1000
36 |     validation_steps=100
37 |     epochs=10
38 |     batch_size=256


--------------------------------------------------------------------------------
/Docking/ML/utils.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.python.client import device_lib
 2 | 
 3 | 
 4 | def get_available_gpus():
 5 |     local_device_protos = device_lib.list_local_devices()
 6 |     return [x.name for x in local_device_protos if x.device_type == 'GPU']
 7 | 
 8 | 
 9 | def read_tuner_config(path):
10 |     with open(path) as config:
11 |         values = {}
12 |         comment = "<<"
13 |         for line in config.readlines():
14 |             line = line.strip(" ").strip("\n")
15 |             if comment not in line and len(line) > 1:
16 |                 split = line.split("=")
17 |                 if len(split) > 1:
18 |                     key = split[0]
19 |                     value = split[1]
20 |                 else:
21 |                     key = split[0]
22 |                     value = ""
23 | 
24 |                 try:
25 |                     value = int(value)
26 |                 except ValueError:
27 |                     pass
28 |                 values[key] = value
29 |         return values
30 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/Extract_labels.py:
--------------------------------------------------------------------------------
  1 | import builtins as __builtin__
  2 | 
  3 | # For debugging purposes only:
  4 | def print(*args, **kwargs):
  5 |     __builtin__.print('\t extract_L: ', end="")
  6 |     return __builtin__.print(*args, **kwargs)
  7 | 
  8 | 
  9 | import glob
 10 | from multiprocessing import Pool
 11 | from contextlib import closing
 12 | import gzip
 13 | import os
 14 | import argparse
 15 | 
 16 | parser = argparse.ArgumentParser()
 17 | parser.add_argument('-if','--is_final',required=True)
 18 | parser.add_argument('-n_it','--iteration_no',required=True)
 19 | parser.add_argument('-protein','--protein',required=True)
 20 | parser.add_argument('-file_path','--file_path',required=True)
 21 | parser.add_argument('-t_pos','--tot_process',required=True) 
 22 | parser.add_argument('-sof','--software',required=True)  
 23 | 
 24 | io_args = parser.parse_args()
 25 | 
 26 | is_final = io_args.is_final
 27 | n_it = int(io_args.iteration_no)
 28 | protein = io_args.protein
 29 | file_path = io_args.file_path
 30 | tot_process = int(io_args.tot_process)
 31 | sof = io_args.software
 32 | 
 33 | if is_final == 'False' or is_final == 'false':
 34 |     is_final = False
 35 | elif is_final == 'True' or is_final == 'true':
 36 |     is_final = True
 37 | else:
 38 |     raise TypeError('-if parameter must be a boolean (true/false)')
 39 | 
 40 | if sof == 'GLIDE':
 41 |     key_word = 'r_i_docking_score'
 42 | elif sof == 'OEDDOCKING':
 43 |     key_word = 'FRED Chemgauss4 score'
 44 | elif sof == "AUTODOCK_GPU":
 45 |     key_word = 'ADSCOR'
 46 | else:
 47 |     raise ValueError('Unknown docking software, check line 7 logs.txt and try again.')
 48 | 
 49 | #mol_key = 'ZINC'
 50 | print(key_word)
 51 | 
 52 | 
 53 | def get_scores(ref):
 54 |     scores = []
 55 |     for line in ref:    # Looping through the molecules
 56 |         zinc_id = line.rstrip()
 57 |         line = ref.readline()
 58 |         # '$$$' signifies end of molecule info
 59 |         while line != '' and line[:4] != '$$$$':   # Looping through its information and saving scores
 60 | 
 61 |             tmp = line.rstrip().split('<')[-1]
 62 | 
 63 |             if key_word == tmp[:-1]:
 64 |                 tmpp = float(ref.readline().rstrip())
 65 |                 if tmpp > 50 or tmpp < -50:
 66 |                     print(zinc_id, tmpp)
 67 |                 else:
 68 |                     scores.append([zinc_id, tmpp])
 69 | 
 70 |             line = ref.readline()
 71 |     return scores
 72 | 
 73 | 
 74 | def extract_glide_score(filen):
 75 |     scores = []
 76 |     try:
 77 |         # Opening the GNU compressed file
 78 |         with gzip.open(filen, 'rt') as ref:
 79 |             scores = get_scores(ref)
 80 |              
 81 |     except Exception as e:
 82 |         print('Handled exception: ', e)
 83 |         # file is already decompressed
 84 |         with open(filen, 'r') as ref:
 85 |             scores = get_scores(ref)               
 86 | 
 87 |     if 'test' in os.path.basename(filen):
 88 |         new_name = 'testing'
 89 |     elif 'valid' in os.path.basename(filen):
 90 |         new_name = 'validation'
 91 |     elif 'train' in os.path.basename(filen):
 92 |         new_name = 'training'
 93 |     else:
 94 |         print("FAIL! Could not generate new training set. Exiting...")
 95 |         exit()
 96 | 
 97 |     with open(file_path+'/'+protein+'/iteration_'+str(n_it)+'/' + new_name + '_' + 'labels.txt', 'w') as ref:
 98 |         ref.write('r_i_docking_score'+','+'ZINC_ID'+'\n')
 99 |         for z_id,gc in scores:
100 |             ref.write(str(gc)+','+z_id+'\n')
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     files = []
105 |     iter_path = file_path+'/'+protein+'/iteration_'+str(n_it)
106 |     
107 |     # Checking to see if the labels have already been extracted:
108 |     sets = ["training", "testing", "validation"]
109 |     files_labels = glob.glob(iter_path+"/*_labels.txt")
110 |     foundAll = True
111 |     for s in sets:
112 |         found = False
113 |         print(s)
114 |         for f in files_labels:
115 |             set_name = f.split('/')[-1].split("_labels.txt")[0]
116 |             if set_name == s:
117 |                 found = True
118 |                 print('Found')
119 |                 break
120 |         if not found:
121 |             foundAll = False
122 |             print('Labels not yet extracted -> Not Found')
123 |             break
124 |     if foundAll:
125 |         print('Labels have already been extracted...')
126 |         print('Remove "*_labels.text" files in \"'+ iter_path +'\" to re-extract')
127 |         exit(0)
128 | 
129 |     # Checking to see if this is the final iteration to use the right folder
130 |     if is_final:
131 |        path = file_path+'/'+protein+'/after_iteration/docked/*.sdf*'
132 |     else:
133 |        path = iter_path+'/docked/*.sdf*'
134 |        path_labels = iter_path+'/*labels*'
135 | 
136 |     for f in glob.glob(path):
137 |         files.append(f)
138 |     
139 |     print("num files in", path, ":", len(files))
140 |     print("Files to extract from:", [os.path.basename(f) for f in files])
141 |     if len(files) == 0:
142 |         print('NO FILES IN: ', path)
143 |         print('CANCEL JOB...')
144 |         exit(1)
145 | 
146 |     # Parallel running of the extract_glide_score() with each file path of the files array
147 |     with closing(Pool(len(files))) as pool: 
148 |         pool.map(extract_glide_score, files)     
149 | 
150 |     if not is_final:
151 |         # renaming from f1_f2_f3 to f3_labels.txt
152 |         for f in glob.glob(path_labels):
153 |             try:
154 |                 print(f)
155 |                 print(iter_path+'/'+f.split('/')[-1].split('_')[2]+'_'+'labels.txt')
156 |                 os.rename(f, iter_path+'/'+f.split('/')[-1].split('_')[2]+'_'+'labels.txt')
157 |             except IndexError:
158 |                 print("Handled error on renaming", f)  # Occurs if it is already correctly named. (deprecated use)
159 | 
160 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/Extracting_morgan.py:
--------------------------------------------------------------------------------
  1 | # Reads the ids found in sampling and finds the corresponding morgan fingerprint
  2 | import argparse
  3 | import glob
  4 | 
  5 | parser = argparse.ArgumentParser()
  6 | parser.add_argument('-pt', '--protein_name', required=True)
  7 | parser.add_argument('-fp', '--file_path', required=True)
  8 | parser.add_argument('-it', '--n_iteration', required=True)
  9 | parser.add_argument('-md', '--morgan_directory', required=True)
 10 | parser.add_argument('-t_pos', '--tot_process', required=True)
 11 | 
 12 | io_args = parser.parse_args()
 13 | 
 14 | import os
 15 | from multiprocessing import Pool
 16 | import time
 17 | from contextlib import closing
 18 | import numpy as np
 19 | 
 20 | protein = io_args.protein_name
 21 | file_path = io_args.file_path
 22 | n_it = int(io_args.n_iteration)
 23 | morgan_directory = io_args.morgan_directory
 24 | tot_process = int(io_args.tot_process)
 25 | 
 26 | 
 27 | def extract_morgan(file_name):
 28 |     train = {}
 29 |     test = {}
 30 |     valid = {}
 31 |     with open(file_path + '/' + protein + "/iteration_" + str(n_it) + "/train_set.txt", 'r') as ref:
 32 |         for line in ref:
 33 |             train[line.rstrip()] = 0
 34 |     with open(file_path + '/' + protein + "/iteration_" + str(n_it) + "/valid_set.txt", 'r') as ref:
 35 |         for line in ref:
 36 |             valid[line.rstrip()] = 0
 37 |     with open(file_path + '/' + protein + "/iteration_" + str(n_it) + "/test_set.txt", 'r') as ref:
 38 |         for line in ref:
 39 |             test[line.rstrip()] = 0
 40 | 
 41 |     # for file_name in file_names:
 42 |     ref1 = open(
 43 |         file_path + '/' + protein + '/iteration_' + str(n_it) + '/morgan/' + 'train_' + file_name.split('/')[-1], 'w')
 44 |     ref2 = open(
 45 |         file_path + '/' + protein + '/iteration_' + str(n_it) + '/morgan/' + 'valid_' + file_name.split('/')[-1], 'w')
 46 |     ref3 = open(file_path + '/' + protein + '/iteration_' + str(n_it) + '/morgan/' + 'test_' + file_name.split('/')[-1],
 47 |                 'w')
 48 | 
 49 |     with open(file_name, 'r') as ref:
 50 |         flag = 0
 51 |         for line in ref:
 52 |             tmpp = line.strip().split(',')[0]
 53 |             if tmpp in train.keys():
 54 |                 train[tmpp] += 1
 55 |                 fn = 1
 56 |                 if train[tmpp] == 1: flag = 1
 57 |             elif tmpp in valid.keys():
 58 |                 valid[tmpp] += 1
 59 |                 fn = 2
 60 |                 if valid[tmpp] == 1: flag = 1
 61 |             elif tmpp in test.keys():
 62 |                 test[tmpp] += 1
 63 |                 fn = 3
 64 |                 if test[tmpp] == 1: flag = 1
 65 |             if flag == 1:
 66 |                 if fn == 1:
 67 |                     ref1.write(line)
 68 |                 if fn == 2:
 69 |                     ref2.write(line)
 70 |                 if fn == 3:
 71 |                     ref3.write(line)
 72 |             flag = 0
 73 | 
 74 | 
 75 | def alternate_concat(files):
 76 |     to_return = []
 77 |     with open(files, 'r') as ref:
 78 |         for line in ref:
 79 |             to_return.append(line)
 80 |     return to_return
 81 | 
 82 | 
 83 | def delete_all(files):
 84 |     os.remove(files)
 85 | 
 86 | 
 87 | def morgan_duplicacy(f_name):
 88 |     flag = 0
 89 |     mol_list = {}
 90 |     ref1 = open(f_name[:-4] + '_updated.csv', 'a')
 91 |     with open(f_name, 'r') as ref:
 92 |         for line in ref:
 93 |             tmpp = line.strip().split(',')[0]
 94 |             if tmpp not in mol_list:
 95 |                 mol_list[tmpp] = 1
 96 |                 flag = 1
 97 |             if flag == 1:
 98 |                 ref1.write(line)
 99 |                 flag = 0
100 |     os.remove(f_name)
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     try:
105 |         os.mkdir(file_path + '/' + protein + '/iteration_' + str(n_it) + '/morgan')
106 |     except:
107 |         pass
108 | 
109 |     files = []
110 |     for f in glob.glob(morgan_directory + "/*.txt"):
111 |         files.append(f)
112 | 
113 |     t = time.time()
114 |     with closing(Pool(np.min([tot_process, len(files)]))) as pool:
115 |         pool.map(extract_morgan, files)
116 |     print(time.time() - t)
117 | 
118 |     all_to_delete = []
119 |     for type_to in ['train', 'valid', 'test']:
120 |         t = time.time()
121 |         files = []
122 |         for f in glob.glob(file_path + '/' + protein + '/iteration_' + str(n_it) + '/morgan/' + type_to + '*'):
123 |             files.append(f)
124 |             all_to_delete.append(f)
125 |         print(len(files))
126 |         if len(files) == 0:
127 |             print("Error in address above")
128 |             break
129 |         with closing(Pool(np.min([tot_process, len(files)]))) as pool:
130 |             to_print = pool.map(alternate_concat, files)
131 |         with open(file_path + '/' + protein + '/iteration_' + str(n_it) + '/morgan/' + type_to + '_morgan_1024.csv',
132 |                   'w') as ref:
133 |             for file_data in to_print:
134 |                 for line in file_data:
135 |                     ref.write(line)
136 |         to_print = []
137 |         print(type_to, time.time() - t)
138 | 
139 |     f_names = []
140 |     for f in glob.glob(file_path + '/' + protein + '/iteration_' + str(n_it) + '/morgan/*morgan*'):
141 |         f_names.append(f)
142 | 
143 |     t = time.time()
144 |     with closing(Pool(np.min([tot_process, len(f_names)]))) as pool:
145 |         pool.map(morgan_duplicacy, f_names)
146 |     print(time.time() - t)
147 | 
148 |     with closing(Pool(np.min([tot_process, len(all_to_delete)]))) as pool:
149 |         pool.map(delete_all, all_to_delete)
150 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/Prediction_morgan_1024.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import glob
  3 | import os
  4 | import time
  5 | import warnings
  6 | import numpy as np
  7 | import pandas as pd
  8 | from ML.DDModel import DDModel
  9 | 
 10 | try:
 11 |     import __builtin__
 12 | except ImportError:
 13 |     # Python 3
 14 |     import builtins as __builtin__
 15 | 
 16 | # For debugging purposes only:
 17 | def print(*args, **kwargs):
 18 |     __builtin__.print('\t sampling: ', end="")
 19 |     return __builtin__.print(*args, **kwargs)
 20 | 
 21 | warnings.filterwarnings('ignore')
 22 | 
 23 | parser = argparse.ArgumentParser()
 24 | parser.add_argument('-fn','--fn', required=True)
 25 | parser.add_argument('-protein', '--protein', required=True)
 26 | parser.add_argument('-it', '--it', required=True)
 27 | parser.add_argument('-file_path', '--file_path', required=True)
 28 | parser.add_argument('-mdd', '--morgan_directory', required=True)
 29 | 
 30 | io_args = parser.parse_args()
 31 | fn = io_args.fn
 32 | protein = str(io_args.protein)
 33 | it = int(io_args.it)
 34 | file_path = io_args.file_path
 35 | mdd = io_args.morgan_directory
 36 | 
 37 | # This debug feature will allow for speedy testing
 38 | DEBUG=False
 39 | def prediction_morgan(fname, models, thresh):   # TODO: improve runtime with parallelization across multiple nodes
 40 |     print("Starting Predictions...")
 41 |     t = time.time()
 42 |     per_time = 1000000
 43 |     n_features = 1024
 44 |     z_id = []
 45 |     X_set = np.zeros([per_time, n_features])
 46 |     total_passed = 0
 47 | 
 48 |     print("We are predicting from the file", fname, "located in", mdd)
 49 |     with open(mdd+'/'+fname,'r') as ref:
 50 |         no = 0
 51 |         for line in ref:
 52 |             tmp = line.rstrip().split(',')
 53 |             on_bit_vector = tmp[1:]
 54 |             z_id.append(tmp[0])
 55 |             for elem in on_bit_vector:
 56 |                 X_set[no,int(elem)] = 1
 57 |             no+=1
 58 |             if no == per_time:
 59 |                 X_set = X_set[:no, :]
 60 |                 pred = []
 61 |                 print("We are currently running line", line)
 62 |                 print("(1) Predicting... Time elapsed:", time.time() - t, "seconds.")
 63 |                 for model in models:
 64 |                     pred.append(model.predict(X_set))
 65 | 
 66 |                 with open(file_path+'/iteration_'+str(it)+'/morgan_1024_predictions/'+fname, 'a') as ref:
 67 |                     for j in range(len(pred[0])):
 68 |                         is_pass = 0
 69 |                         for i,thr in enumerate(thresh):
 70 |                             if float(pred[i][j])>thr:
 71 |                                 is_pass += 1
 72 |                         if is_pass >= 1:
 73 |                             total_passed += 1
 74 |                             line = z_id[j]+','+str(float(pred[i][j]))+'\n'
 75 |                             ref.write(line)
 76 |                 X_set = np.zeros([per_time,n_features])
 77 |                 z_id = []
 78 |                 no = 0
 79 | 
 80 |                 # With debug, we will only predict on 'per_time' molecules
 81 |                 if DEBUG:
 82 |                     break
 83 | 
 84 |         if no != 0:
 85 |             X_set = X_set[:no,:]
 86 |             pred = []
 87 |             print("We are currently running line", line)
 88 |             print("(2) Predicting... Time elapsed:", time.time() - t, "seconds.")
 89 |             for model in models:
 90 |                 pred.append(model.predict(X_set))
 91 |             with open(file_path+'/iteration_'+str(it)+'/morgan_1024_predictions/'+fname, 'a') as ref:
 92 |                 for j in range(len(pred[0])):
 93 |                     is_pass = 0
 94 |                     for i,thr in enumerate(thresh):
 95 |                         if float(pred[i][j])>thr:
 96 |                             is_pass+=1
 97 |                     if is_pass>=1:
 98 |                         total_passed+=1
 99 |                         line = z_id[j]+','+str(float(pred[i][j]))+'\n'
100 |                         ref.write(line)
101 |     print("Prediction time:", time.time() - t)
102 |     return total_passed
103 | 
104 | 
105 | try:
106 |     os.mkdir(file_path+'/iteration_'+str(it)+'/morgan_1024_predictions')
107 | except OSError:
108 |     print(file_path+'/iteration_'+str(it)+'/morgan_1024_predictions', "already exists")
109 | 
110 | thresholds = pd.read_csv(file_path+'/iteration_'+str(it)+'/best_models/thresholds.txt', header=None)
111 | thresholds.columns = ['model_no', 'thresh', 'cutoff']
112 | 
113 | tr = []
114 | models = []
115 | for f in glob.glob(file_path+'/iteration_'+str(it)+'/best_models/model_*'):
116 |     if "." not in f:    # skipping over the .ddss & .csv files
117 |         mn = int(f.split('/')[-1].split('_')[1])
118 |         tr.append(thresholds[thresholds.model_no == mn].thresh.iloc[0])
119 |         models.append(DDModel.load(file_path+'/iteration_'+str(it)+'/best_models/model_'+str(mn)))
120 | 
121 | print("Number of models to predict:", len(models))
122 | t = time.time()
123 | returned = prediction_morgan(fn, models, tr)
124 | print(time.time()-t)
125 | 
126 | with open(file_path+'/iteration_'+str(it)+'/morgan_1024_predictions/passed_file_ct.txt','a') as ref:
127 |         ref.write(fn+','+str(returned)+'\n')
128 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/Docking/ProgressiveDocking/__init__.py


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/activation_script.sh:
--------------------------------------------------------------------------------
1 | echo Activating virtual environment
2 | source ~/.bashrc
3 | conda activate DeepDockingRemote


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/autodock_gpu_ad.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=autodock
 3 | #SBATCH --gres=gpu:1
 4 | #SBATCH --mem-per-gpu=0
 5 | #SBATCH --output=slurm-phase_3-%x.%j.out
 6 | #SBATCH --error=slurm-phase_3-%x.%j.err
 7 | 
 8 | start=`date +%s`
 9 | 
10 | wg=$1                #WORK-GROUP
11 | sa=$2                #SEARCH ALGORITHM
12 | fl=$3                #FLD FILE
13 | lg=$4                #LIGAND FOLDER
14 | lt=$5                #LIST FILE
15 | ne=$6                #NUMBER OF ENERGY EVALUATIONS
16 | nr=$7                #NUMBER OF RUNS
17 | 
18 | ad_path=$8
19 | scripts=$9
20 | 
21 | echo Partition: $SLURM_JOB_PARTITION
22 | 
23 | # This should activate the conda environment
24 | source ~/.bashrc
25 | source $scripts/activation_script.sh
26 | 
27 | rm -f list.txt *dlg *xml init*
28 | echo "$fl">>$lt
29 | for i in $lg'/'*pdbqt
30 | do
31 |     echo $i>>$lt
32 |     tmp=$(awk -F'/' '{print $NF}'<<<$i)
33 |     tmp=$(cut -d'.' -f1<<<$tmp)
34 |     echo $tmp>>$lt
35 | done
36 | wait
37 | 
38 | $ad_path'/'autodock_gpu_"$wg"wi -lsmet $sa -filelist $lt -nrun $nr -nev $ne
39 | wait $!
40 | 
41 | #EXTRACT SINGLE BEST POSES
42 | dlg_fold=$(pwd)                                               #FOLDER WITH ALL DLG FILES FROM AUTODOCK
43 | mode=lc                                                       #ANALYSIS MODE, LARGEST CLUSTER (lc) or BEST BINDING ENERGY (be)
44 | out_fold=$dlg_fold'/'results                                  #OUTPUT FOLDER
45 | out_file=$(echo $dlg_fold | rev | cut -d'/' -f 1 | rev)       #OUTPUT SDF FILE (NO EXTENSION)
46 | 
47 | rm -r $out_fold
48 | mkdir $out_fold
49 | mkdir $out_fold/pdbqt
50 | 
51 | for i in $dlg_fold/*dlg
52 | do
53 |     name=$(grep -m 1 'Name' $i|awk '{print $5}')
54 |     if [ "$mode" == "be" ]; then
55 |        run=$(grep -m 1 'RANKING' $i|awk '{print $3}')
56 |        score=$(grep -m 1 'RANKING' $i|awk '{print $4}')
57 |     elif [ "$mode" == "lc" ]; then
58 |        score=$(grep '#' $i|awk '$9>a {a=$9; b=$3} END {print b}')
59 |        run=$(grep '#' $i|awk '$9>a {a=$9; b=$5} END {print b}')
60 |     fi
61 |     echo "ADSCOR   $score">>$out_fold/pdbqt/$name
62 |     awk -v p="DOCKED: MODEL        $run" '$0~p{f=1} f{print} f&&/DOCKED: ENDMDL/{exit}' $i|cut -c9-|sed '/USER/d;/REMARK/d;/MODEL/d;/TORSDOF/d'>>$out_fold/pdbqt/$name
63 | done
64 | 
65 | find $dlg_fold -name '*dlg' -delete
66 | find $dlg_fold -name '*xml' -delete
67 | 
68 | cd $out_fold/pdbqt
69 | mkdir ../sdf
70 | obabel -ipdbqt * -osdf -m
71 | cat *sdf>>../sdf/res_$out_file'.'sdf
72 | cd ..
73 | rm -r pdbqt
74 | 
75 | end=`date +%s`
76 | echo $((end-start))
77 | echo finished
78 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/check_phase.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import time
 4 | import argparse
 5 | 
 6 | parser = argparse.ArgumentParser()
 7 | parser.add_argument('-pf','--phase_file',required=True)
 8 | parser.add_argument('-itr','--iteration_directory',required=True)
 9 | io_args = parser.parse_args()
10 | 
11 | pf = io_args.phase_file
12 | itr = io_args.iteration_directory
13 | 
14 | print(pf,itr)
15 | 
16 | if os.path.isfile(itr+'/'+pf)==False:
17 |     with open(itr+'/'+pf,'w') as ref:
18 |         ref.write(pf.split('.')[0]+'\n')
19 | 
20 | while 1 == 1:
21 |     with open(itr+'/'+pf,'r') as ref:
22 |         name = ref.readline().strip()
23 |     if name == 'finished':
24 |         sys.exit()
25 |     else:
26 |         time.sleep(60)
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/deactivation_script.sh:
--------------------------------------------------------------------------------
1 | echo Deactivating virtual environment
2 | source ~/.bashrc
3 | conda deactivate


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/final_extraction.py:
--------------------------------------------------------------------------------
  1 | from multiprocessing import Pool
  2 | from contextlib import closing
  3 | import multiprocessing
  4 | import pandas as pd
  5 | import argparse
  6 | import glob
  7 | import os
  8 | 
  9 | 
 10 | def merge_on_smiles(pred_file):
 11 |     print("Merging " + os.path.basename(pred_file) + "...")
 12 | 
 13 |     # Read the predictions
 14 |     pred = pd.read_csv(pred_file, names=["id", "score"])
 15 |     pred.drop_duplicates()
 16 | 
 17 |     # Read the smiles
 18 |     smile_file = os.path.join(args.smile_dir, os.path.basename(pred_file))
 19 |     smi = pd.read_csv(smile_file, delimiter=" ", names=["smile", "id"])
 20 |     smi = smi.drop_duplicates()
 21 |     return pd.merge(pred, smi, how="inner", on=["id"]).set_index("id")
 22 | 
 23 | 
 24 | if __name__ == '__main__':
 25 |     parser = argparse.ArgumentParser()
 26 |     parser.add_argument("-smile_dir", required=True)
 27 |     parser.add_argument("-morgan_dir", required=True)
 28 |     parser.add_argument("-processors", required=True)
 29 |     parser.add_argument("-mols_to_dock", required=False)
 30 | 
 31 |     args = parser.parse_args()
 32 |     predictions = []
 33 | 
 34 |     print("Morgan Dir: " + args.morgan_dir)
 35 |     print("Smile Dir: " + args.smile_dir)
 36 |     for file in glob.glob(args.morgan_dir + "/*"):
 37 |         if "smile" in os.path.basename(file):
 38 |             print(" - " + os.path.basename(file))
 39 |             predictions.append(file)
 40 | 
 41 |     try:
 42 |         # combine the files
 43 |         print("Finding smiles...")
 44 |         print("Number of CPUs: " + str(multiprocessing.cpu_count()))
 45 |         num_jobs = min(len(predictions), int(args.processors))
 46 |         with closing(Pool(num_jobs)) as pool:
 47 |             combined = pool.map(merge_on_smiles, predictions)
 48 |     except Exception as e:
 49 |         print("While performing the final extraction, we encountered the following exception:", e)
 50 |         print("This is likely due to memory issues with multiprocessing and pickling...")
 51 |         print("We will try again with overloaded_final_extraction.py which is slower but can handle more data.")
 52 |         with open("final_phase.info", "w") as info:
 53 |             info.write("Failed")
 54 |         exit()
 55 | 
 56 |     # combine all dataframes
 57 |     print("Combining " + str(len(combined)) + "dataframes...")
 58 |     base = pd.concat(combined)
 59 |     combined = None
 60 | 
 61 |     print("Done combining... Sorting!")
 62 |     base = base.sort_values(by="score", ascending=False)
 63 | 
 64 |     print("Resetting Index...")
 65 |     base.reset_index(inplace=True)
 66 | 
 67 |     print("Finished Sorting... Here is the base:")
 68 |     print(base.head())
 69 | 
 70 |     # Check if we want all of the mols
 71 |     if args.mols_to_dock == "All":
 72 |         args.mols_to_dock = None
 73 | 
 74 |     if args.mols_to_dock is not None:
 75 |         mtd = int(args.mols_to_dock)
 76 |         print("Molecules to dock:", mtd)
 77 |         print("Total molecules:", len(base))
 78 | 
 79 |         if len(base) <= mtd:
 80 |             print("Our total molecules are less or equal than the number of molecules to dock -> saving all molecules")
 81 |         else:
 82 |             print(f"Our total molecules are more than the number of molecules to dock -> saving {mtd} molecules")
 83 |             base = base.head(mtd)
 84 | 
 85 |     print("Saving")
 86 |     # Rearrange the smiles
 87 |     smiles = base.drop('score', 1)
 88 |     smiles = smiles[["smile", "id"]]
 89 |     print("Here is the smiles:")
 90 |     print(smiles.head())
 91 |     smiles.to_csv("smiles.csv", sep=" ")
 92 | 
 93 |     # Rearrange for id,score
 94 |     base.drop("smile", 1, inplace=True)
 95 |     base.to_csv("id_score.csv")
 96 |     print("Here are the ids and scores")
 97 |     print(base.head())
 98 | 
 99 |     with open("final_phase.info", "w") as info:
100 |         info.write("Finished")
101 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/final_extraction.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --cpus-per-task=24
 3 | #SBATCH --ntasks=1
 4 | #SBATCH --mem=0               # memory per node
 5 | #SBATCH --job-name=phase_f
 6 | #SBATCH --output=slurm-%x.%j.out
 7 | #SBATCH --error=slurm-%x.%j.err
 8 | 
 9 | # Read input
10 | project_path=$1
11 | n_cpus=$2
12 | iteration=$3
13 | scripts=$4
14 | mol_to_dock=$5
15 | 
16 | echo Project Path: $project_path
17 | echo Project Name: $(basename "$project_path")
18 | echo Num. CPU: $n_cpus
19 | echo Iteration: $iteration
20 | echo Script Path: $scripts
21 | echo Final Mol. To Dock: $mol_to_dock
22 | 
23 | # Set constant
24 | smile_directory=`sed -n '5p' $project_path/logs.txt`
25 | 
26 | # This should activate the conda environment
27 | source ~/.bashrc
28 | source activation_script.sh
29 | 
30 | # cd into the final iteration and run the search
31 | cd $project_path/iteration_$iteration
32 | echo Running >| final_phase.info # created in phase_a
33 | echo Smile Dir: $smile_directory
34 | python -u $scripts/final_extraction.py -smile_dir $smile_directory -morgan_dir $project_path/iteration_$iteration/morgan_1024_predictions/ -processors $n_cpus -mols_to_dock $mol_to_dock
35 | 
36 | # If the above final extraction failed, we try another slower version
37 | if grep -Fxq "Failed" final_phase.info
38 | then
39 |   echo Running >| final_phase.info
40 |   python -u $scripts/GUI/overloaded_final_extraction.py -smile_dir $smile_directory -morgan_dir $project_path/iteration_$iteration/morgan_1024_predictions/ -processors $n_cpus -mols_to_dock $mol_to_dock
41 | fi
42 | 
43 | # Clean up the slurm files
44 | echo cleaning slurm files
45 | cd $scripts
46 | python3 $scripts/slurm_file_manager.py --phase 0 --iteration $iteration --project_path $project_path
47 | echo Done
48 | 
49 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/jobid_writer.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | 
 4 | parser = argparse.ArgumentParser()
 5 | parser.add_argument('-file_path', '--file_path', required=True)
 6 | # adding parameter for where to save all the data to:
 7 | parser.add_argument('-save', '--save_path', required=False, default=None)
 8 | parser.add_argument('-n_it', '--iteration_no', required=True)
 9 | parser.add_argument('-jid', '--job_id', required=True)  # SLURM_JOB_NAME
10 | parser.add_argument('-jn', '--job_name', required=True)  # SLURM_JOB_NAME.sh
11 | 
12 | io_args = parser.parse_args()
13 | n_it = int(io_args.iteration_no)
14 | job_id = io_args.job_id
15 | job_name = io_args.job_name
16 | 
17 | DATA_PATH = io_args.file_path  # Now == file_path/protein
18 | SAVE_PATH = io_args.save_path
19 | # if no save path is provided we just save it in the same location as the data
20 | if SAVE_PATH is None: SAVE_PATH = DATA_PATH
21 | 
22 | if n_it != -1:  # creating the job directory
23 |     try:
24 |         os.mkdir(SAVE_PATH + '/iteration_' + str(n_it))
25 |     except OSError:  # file already exists
26 |         pass
27 |     with open(SAVE_PATH + '/iteration_' + str(n_it) + '/' + job_name, 'w') as ref:
28 |         ref.write(job_id + '\n')
29 | 
30 | else:  # When n_it == -1 we create a seperate directory (for jobs that occur after an iteration)
31 |     try:
32 |         os.mkdir(SAVE_PATH + '/after_iteration')
33 |     except OSError:
34 |         pass
35 |     with open(SAVE_PATH + '/after_iteration' + '/' + job_name, 'w') as ref:
36 |         ref.write(job_id + '\n')
37 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/molecular_file_count_updated.py:
--------------------------------------------------------------------------------
 1 | from multiprocessing import Pool
 2 | from contextlib import closing
 3 | import pandas as pd
 4 | import numpy as np
 5 | import argparse
 6 | import glob
 7 | import time
 8 | import os
 9 | 
10 | try:
11 |     import __builtin__
12 | except ImportError:
13 |     # Python 3
14 |     import builtins as __builtin__
15 | 
16 | # For debugging purposes only:
17 | def print(*args, **kwargs):
18 |     __builtin__.print('\t molecular_file_count_updated: ', end="")
19 |     return __builtin__.print(*args, **kwargs)
20 | 
21 | def write_mol_count_list(file_name, mol_count_list):
22 |     with open(file_name,'w') as ref:
23 |         for ct,file_name in mol_count_list:
24 |             ref.write(str(ct)+","+file_name.split('/')[-1])
25 |             ref.write("\n")
26 | 
27 | 
28 | def molecule_count(file_name):
29 |     temp = 0
30 |     with open(file_name,'r') as ref:
31 |         ref.readline()
32 |         for line in ref:
33 |             temp+=1
34 |     return temp, file_name
35 | 
36 | 
37 | if __name__=='__main__': 
38 |     parser = argparse.ArgumentParser()
39 |     parser.add_argument('-pt','--protein_name',required=True)
40 |     parser.add_argument('-it','--n_iteration',required=True)
41 |     parser.add_argument('-cdd','--data_directory',required=True)
42 |     parser.add_argument('-cpd','--project_directory',required=True)
43 |     parser.add_argument('-t_pos','--tot_process',required=True)
44 |     parser.add_argument('-t_samp','--tot_sampling',required=True)
45 |     io_args = parser.parse_args()
46 | 
47 |     protein = io_args.protein_name
48 |     n_it = int(io_args.n_iteration)
49 |     data_directory = io_args.data_directory
50 |     project_directory = io_args.project_directory
51 |     tot_process = int(io_args.tot_process)
52 |     Total_sampling = int(io_args.tot_sampling)
53 | 
54 |     print("Parsed Args:")
55 |     print(" - Iteration:", n_it)
56 |     print(" - Data Directory:", data_directory)
57 |     print(" - Num process nodes:", tot_process)
58 |     print(" - Total Sampling:", Total_sampling)
59 | 
60 |     # Creating Mol_ct_file.csv if not already created 
61 |     if not os.path.exists(project_directory + "/Mol_ct_file.csv"):
62 |         files = []
63 |         # saving the files:
64 |         for f in glob.glob(data_directory+'/*.txt'):
65 |             files.append(f)
66 |         print("Number Of Files:", len(files))
67 | 
68 |         t=time.time()
69 |         print("Reading Files...")
70 |         # Counting num of molecules in each file
71 |         with closing(Pool(np.min([tot_process,len(files)]))) as pool:
72 |             mol_count = pool.map(molecule_count, files)
73 |         print("Done Reading Files - Time Taken", time.time()-t)
74 | 
75 |         print("Saving File Count...") # as a Mol_ct_file.csv
76 |         try:
77 |             write_mol_count_list(project_directory + "/Mol_ct_file.csv", mol_count)
78 |         except PermissionError:
79 |             print("Mol_ct_file.csv already created by other user")
80 | 
81 |     # Creating Mol_ct_file_updated.csv if not already created (project specific)
82 |     if not os.path.exists(project_directory + "/Mol_ct_file_updated.csv"):
83 |         mol_ct = pd.read_csv(project_directory+'/Mol_ct_file.csv',header=None)
84 |         mol_ct.columns = ['Number_of_Molecules','file_name']
85 | 
86 |         Total_mols_available = np.sum(mol_ct.Number_of_Molecules)
87 |         mol_ct['Sample_for_million'] = [int(Total_sampling/Total_mols_available*elem) for elem in mol_ct.Number_of_Molecules]
88 |         
89 |         mol_ct.to_csv(project_directory+'/Mol_ct_file_updated.csv',sep=',',index=False)
90 |         print("Done - Time Taken", time.time()-t)


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/optimize_models.py:
--------------------------------------------------------------------------------
  1 | import IPython
  2 | import kerastuner as kt
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | try:
  7 |     import Docking.ML.load_data
  8 |     from Docking.ML.DDModel import DDModel
  9 |     from Docking.ML.Models import TunerModel
 10 |     from Docking.ML.utils import *
 11 | except:
 12 |     import ML.load_data
 13 |     from ML.DDModel import DDModel
 14 |     from ML.Models import TunerModel
 15 |     from ML.utils import *
 16 | 
 17 | 
 18 | class ClearTrainingOutput(tf.keras.callbacks.Callback):
 19 |     def on_train_end(*args, **kwargs):
 20 |         IPython.display.clear_output(wait=True)
 21 | 
 22 | 
 23 | class Config:
 24 |     def __init__(self, config):
 25 |         self.directory = config['directory']
 26 |         self.project_name = config['project_name']
 27 |         self.objective = config['objective']
 28 |         self.max_trials = config['max_trials']
 29 |         self.max_epochs = config['max_epochs']
 30 |         self.factor = config['factor']
 31 |         self.hyperband_iterations = config['hyperband_iterations']
 32 |         self.direction = config['direction']
 33 | 
 34 |         # For search
 35 |         self.steps_per_epoch = config['steps_per_epoch']
 36 |         self.validation_steps = config['validation_steps']
 37 |         self.epochs = config['epochs']
 38 |         self.batch_size = config['batch_size']
 39 |         self.model_location = config['model_location']
 40 | 
 41 | 
 42 | def optimize(technique):
 43 |     # Load config
 44 |     print("Loading Config...")
 45 |     config_file = read_tuner_config('../ML/tuner_config.txt')
 46 |     tuner_config = Config(config_file)
 47 |     print("Loading Dataset...")
 48 |     train_path, test_path = config_file['training_path'], config_file['testing_path']
 49 |     data = ML.load_data.load(train_path, test_path)
 50 |     train_x, train_y, test_x, test_y = data()
 51 |     train_x, train_y = train_x.tolist(), train_y.tolist()
 52 |     test_x, test_y = test_x.tolist(), test_y.tolist()
 53 | 
 54 |     tr_x = np.array(train_x)
 55 |     tr_y = np.array(train_y)
 56 |     tx = np.array(test_x)
 57 |     ty = np.array(test_y)
 58 | 
 59 |     if technique == 'bayesian':
 60 |         return run_bayesian(tr_x, tr_y, tx, ty, tuner_config, {0: 1, 1: 1})
 61 | 
 62 | 
 63 | def run_bayesian(tr_x, tr_y, tx, ty, config: Config, class_weights):
 64 |     tuner_model = TunerModel(tr_x.shape[1:])
 65 |     tuner = kt.BayesianOptimization(tuner_model.build_tuner_model,
 66 |                                     objective=kt.Objective(config.objective, config.direction),
 67 |                                     project_name=config.project_name,
 68 |                                     directory=config.directory,
 69 |                                     max_trials=config.max_trials, overwrite=True)
 70 | 
 71 |     tuner.search_space_summary()
 72 |     tuner.search(tr_x, tr_y,
 73 |                  validation_data=(tx, ty),
 74 |                  epochs=config.epochs,
 75 |                  batch_size=config.batch_size,
 76 |                  class_weight=class_weights,
 77 |                  callbacks=[tf.keras.callbacks.EarlyStopping(monitor=config.objective, min_delta=0, patience=3,
 78 |                                                              verbose=0, mode=config.direction)])
 79 |     # Show a summary of the search
 80 |     tuner.results_summary()
 81 | 
 82 |     # Retrieve the best model.
 83 |     print("Saving the top model...")
 84 |     best_hyperparameters = tuner.get_best_hyperparameters(1)[0]
 85 |     print("Top hyperparameters:", best_hyperparameters)
 86 | 
 87 |     best_model = tuner.hypermodel.build(best_hyperparameters)
 88 |     model = DDModel.load(best_model, kt_hyperparameters=best_hyperparameters)
 89 | 
 90 |     for key in best_hyperparameters.values:
 91 |         print(key, "->", best_hyperparameters[key])
 92 | 
 93 |     return model
 94 | 
 95 | 
 96 | def run_sklearn(tr_x, tr_y, config: Config, build_model_func):
 97 |     """
 98 |     Runs the bayesian optimization algorithm on an sklearn model.
 99 |     """
100 |     from sklearn import metrics, model_selection, ensemble
101 | 
102 |     # Create the tuner
103 |     tuner = kt.tuners.Sklearn(
104 |         oracle=kt.oracles.BayesianOptimization(objective=kt.Objective('score', 'max'),
105 |                                                max_trials=config.max_trials),
106 |         hypermodel=build_model_func,
107 |         scoring=metrics.make_scorer(metrics.precision_score),
108 |         cv=model_selection.StratifiedKFold(5),
109 |         directory=config.directory,
110 |         project_name=config.project_name)
111 | 
112 |     # Run the search
113 |     tuner.search(tr_x, tr_y)
114 | 
115 |     # Return the best model
116 |     return build_model_func(tuner.get_best_hyperparameters(num_trials=1)[0], return_light_model=True)
117 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/phase_1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --cpus-per-task=24
 3 | #SBATCH --ntasks=1
 4 | #SBATCH --mem=0               # memory per node
 5 | #SBATCH --job-name=phase_1
 6 | #SBATCH --output=slurm-%x.%j.out
 7 | #SBATCH --error=slurm-%x.%j.err
 8 | 
 9 | # ARGS Passed:
10 | iteration=$1
11 | t_cpu=$2
12 | project_path=$3
13 | project_name=$4
14 | mol_to_dock=$5  # Replace with sample size (training set)
15 | local_path=$6
16 | 
17 | echo Partition: $SLURM_JOB_PARTITION
18 | echo Args:
19 | echo Iteration: $iteration
20 | echo Total CPUs: $t_cpu
21 | echo Project Path: $project_path
22 | echo Project Name: $project_name
23 | echo Mols To Dock: $mol_to_dock
24 | echo Scripts: $local_path
25 | 
26 | # This should activate the conda environment
27 | source ~/.bashrc
28 | source $local_path/activation_script.sh
29 | 
30 | 
31 | # Set constants
32 | file_path=`sed -n '1p' $project_path/$project_name/logs.txt`
33 | protein=`sed -n '2p' $project_path/$project_name/logs.txt`
34 | n_mol=`sed -n '9p' $project_path/$project_name/logs.txt`
35 | morgan_directory=`sed -n '4p' $project_path/$project_name/logs.txt`
36 | smile_directory=`sed -n '5p' $project_path/$project_name/logs.txt`
37 | sdf_directory=`sed -n '6p' $project_path/$project_name/logs.txt`
38 | 
39 | # Set the to be docked
40 | pr_it=$(($1-1))
41 | # On the first iteration we want to triple the amount we dock so that we can create testing and validation sets
42 | if [ $1 == 1 ] 
43 | then 
44 | 	to_d=$((n_mol+n_mol+mol_to_dock)) #n_mol is our test/valid size
45 | else
46 | 	to_d=$mol_to_dock
47 | fi
48 | echo To Dock: $to_d
49 | 
50 | # set the total CPUs
51 | if [ $t_cpu == 64 ];then t_cpu=48;fi
52 | echo Total CPU: $t_cpu
53 | 
54 | python jobid_writer.py -file_path $file_path/$protein -n_it $1 -jid $SLURM_JOB_NAME -jn $SLURM_JOB_NAME.sh
55 | if [ $1 == 1 ];then pred_directory=$morgan_directory;else pred_directory=$file_path/$protein/iteration_$pr_it/morgan_1024_predictions;fi
56 | 
57 | python molecular_file_count_updated.py -pt $protein -it $1 -cdd $pred_directory -cpd $file_path/$protein -t_pos $t_cpu -t_samp $to_d
58 | python sampling.py -pt $protein -fp $file_path -it $1 -dd $pred_directory -t_pos $t_cpu -tr_sz $mol_to_dock -vl_sz $n_mol
59 | python sanity_check.py -pt $protein -fp $file_path -it $1
60 | python Extracting_morgan.py -pt $protein -fp $file_path -it $1 -md $morgan_directory -t_pos $t_cpu
61 | python Extracting_smiles.py -pt $protein -fp $file_path -it $1 -fn 0 -smd $smile_directory -sd $sdf_directory -t_pos $t_cpu -if False
62 | 
63 | python phase_changer.py -pf phase_1.sh -itr $file_path/$protein/iteration_$1
64 | echo python phase_changer.py -pf phase_1.sh -itr $file_path/$protein/iteration_$1
65 | 
66 | 
67 | # Clean up the slurm files
68 | echo cleaning slurm files
69 | python slurm_file_manager.py --phase 1 --iteration $iteration --project_path $project_path/$project_name
70 | 
71 | # This extracts the zinc ids by randomly sampling and creates the datasets
72 | # - If a smile file is found on line l in file f then the morgan fingerprint will be on the same line in the same file in the equivalent file.
73 | 
74 | # how to run phase_1.sh:
75 | # sbatch phase_1.sh iteration t_cpu project_path project_name mol_to_dock
76 | #   - Note: that mol_to_dock should match what is in the logs file or it could not haha
77 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/phase_2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --cpus-per-task=1
 3 | #SBATCH --mem=0               # memory per node
 4 | #SBATCH --job-name=phase_2
 5 | #SBATCH --output=slurm-%x.%j.out
 6 | #SBATCH --error=slurm-%x.%j.err
 7 | 
 8 | # Args
 9 | extension=$1  # .smi
10 | chunk_n_lines=$2  # 1000
11 | script_path=$3  # path to scripts
12 | project_path=$4  # path to project
13 | iteration=$5
14 | project_name=$(basename "$project_path")
15 | 
16 | echo Partition: $SLURM_JOB_PARTITION
17 | echo Args:
18 | echo Extension: $extension
19 | echo Chunk Size: $chunk_n_lines
20 | echo Project Path: $project_path
21 | echo Project Name: $project_name
22 | echo Iteration: $iteration
23 | 
24 | slurm_args_no_cpu=$(sed -n '1p' ${script_path}/slurm_args/${project_name}_slurm_args.txt)
25 | 
26 | # This should activate the conda environment
27 | source ~/.bashrc
28 | source $script_path/activation_script.sh
29 | 
30 | python $script_path/jobid_writer.py -file_path $project_path -n_it $iteration -jid $SLURM_JOB_NAME -jn $SLURM_JOB_NAME.sh
31 | 
32 | # For some reason, running this with the conda environment activated causes an error.
33 | # We must deactivate it before running!
34 | source ~/.bashrc
35 | source $local_path/deactivation_script.sh
36 | 
37 | # Move into the project
38 | cd $project_path/iteration_$iteration
39 | 
40 | # Start running the chunking
41 | echo Starting Phase 2
42 | echo Chunking Train, Test, and Valid Sets...
43 | sbatch $slurm_args_no_cpu $script_path/split_chunks.sh smile/train_smiles_final_updated.smi $extension train $chunk_n_lines $script_path $project_name
44 | sbatch $slurm_args_no_cpu $script_path/split_chunks.sh smile/test_smiles_final_updated.smi $extension test $chunk_n_lines $script_path $project_name
45 | sbatch $slurm_args_no_cpu $script_path/split_chunks.sh smile/valid_smiles_final_updated.smi $extension valid $chunk_n_lines $script_path $project_name
46 | 
47 | # This should activate the conda environment
48 | source ~/.bashrc
49 | source $script_path/activation_script.sh
50 | 
51 | # wait for completion
52 | echo Finished Chunking
53 | wait
54 | python $script_path/phase_changer.py -pf phase_2.sh -itr $project_path/iteration_$iteration
55 | echo Phase 2 Finished
56 | 
57 | # Clean up the slurm files
58 | echo cleaning slurm files
59 | cd $script_path
60 | python slurm_file_manager.py --phase 2 --iteration $iteration --project_path $project_path
61 | 
62 | 
63 | # sbatch phase_2.sh .smi 1000 /groups/cherkasvgrp/share/progressive_docking/development/pd_python_pose_v2/test_DD_installation/DeepDocking /groups/cherkasvgrp/share/progressive_docking/development/pd_python_pose_v2/test_DD_installation/DeepDockingProjects/full_run_test_james 1
64 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/phase_3.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --cpus-per-task=1
 3 | #SBATCH --ntasks=1
 4 | #SBATCH --mem=0 # memory per node
 5 | #SBATCH --job-name=phase_3
 6 | #SBATCH --output=slurm-%x.%j.out
 7 | #SBATCH --error=slurm-%x.%j.err
 8 | 
 9 | # ARGS
10 | PATH_FLD=$1
11 | num_energy_evaluations=$2
12 | num_runs=$3
13 | path_to_auto_dock_gpu=$4
14 | project_path=$5
15 | iteration=$6
16 | scripts=$7
17 | project_name=$(basename "$project_path")
18 | 
19 | 
20 | echo Partition: $SLURM_JOB_PARTITION
21 | echo Args:
22 | echo FLD Path: $PATH_FLD
23 | echo Energy Evaluations: $num_energy_evaluations
24 | echo Num Runs: $num_runs
25 | echo Path To Autodock: $path_to_auto_dock_gpu
26 | echo Project Path: $project_path
27 | echo Project Name: $project_name
28 | echo Iteration: $iteration
29 | echo Scripts: $scripts
30 | 
31 | # getting slurm args for gpu req scripts (with cpus-per-task and gpu_partition)
32 | slurm_args_g=$(sed -n '4p' ${scripts}/slurm_args/${project_name}_slurm_args.txt)
33 | 
34 | # This should activate the conda environment
35 | source ~/.bashrc
36 | source $scripts/activation_script.sh
37 | 
38 | #path_to_auto_dock_gpu=/groups/cherkasvgrp/autodock/scottlegrand/AutoDock-GPU.relicensing/bin
39 | python jobid_writer.py -file_path $project_path -n_it $iteration -jid $SLURM_JOB_NAME -jn $SLURM_JOB_NAME.sh --save_path $project_path
40 | 
41 | # Run phase 3
42 | cd $project_path/iteration_$iteration
43 | echo Running Phase 3
44 | mkdir res
45 | for i in $(ls -d chunks_smi/*); do fld=$(echo $i | rev | cut -d'/' -f 1 | rev); mkdir res/$fld; cd res/$fld; sbatch $slurm_args_g $scripts/autodock_gpu_ad.sh 64 sw $PATH_FLD ../../$i'/'$fld'_'pdbqt list.txt $num_energy_evaluations $num_runs $path_to_auto_dock_gpu $scripts; cd ../../;done
46 | 
47 | cd $scripts
48 | python phase_changer.py -pf phase_3.sh -itr $project_path/iteration_$iteration
49 | 
50 | # Clean up the slurm files
51 | echo cleaning slurm files
52 | python slurm_file_manager.py --phase 3 --iteration $iteration --project_path $project_path
53 | echo Done
54 | 
55 | #sbatch phase_3.sh /groups/cherkasvgrp/share/progressive_docking/development/AD_GPU/autodock_grid/x77_grid.maps.fld 5000000 10 /groups/cherkasvgrp/autodock/scottlegrand/AutoDock-GPU.relicensing/bin /groups/cherkasvgrp/share/progressive_docking/development/pd_python_pose_v2/test_DD_installation/DeepDockingProjects/full_run_test_james 1 /groups/cherkasvgrp/share/progressive_docking/development/pd_python_pose_v2/test_DD_installation/DeepDocking
56 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/phase_3_concluding_combination.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | cd $1
 4 | mkdir docked
 5 | echo Transfering docked training
 6 | cat res/*train*/results/sdf/*sdf >> docked/train_docked.sdf
 7 | 
 8 | echo Transfering docked validation
 9 | cat res/*valid*/results/sdf/*sdf >> docked/valid_docked.sdf
10 | 
11 | echo Transfering docked testing
12 | cat res/*test*/results/sdf/*sdf >> docked/test_docked.sdf
13 | 
14 | echo Done


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/phase_4.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --cpus-per-task=3
 3 | #SBATCH --ntasks=1
 4 | #SBATCH --mem=0               # memory per node
 5 | #SBATCH --job-name=phase_4
 6 | #SBATCH --output=slurm-%x.%j.out
 7 | #SBATCH --error=slurm-%x.%j.err
 8 | 
 9 | ###************************************************************************
10 | ### Planned Changes:
11 | ###  > Reducing the number of passed parameters
12 | ###   |- How we currently run phase 4: "sbatch phase_4.sh current_itr n_cpu project_path project_name final_itr? total_itr"
13 | ###   |- How we want to run phase 4: "sbatch phase_4.sh current_itr n_cpu project_path/project_name final_itr? total_itr path_to_deep_docking_source"
14 | ###  > Get rid of smile_directory and sdf_directory for they are unused
15 | ###************************************************************************
16 | 
17 | echo Partition: $SLURM_JOB_PARTITION
18 | echo "Passed Parameters:"
19 | echo "Current Iteration: $1"
20 | echo "Number of CPUs: $2"
21 | echo "Project Path: $3"
22 | echo Project Name: $(basename "$3")
23 | echo "Final Iteration: $4"
24 | echo "Total Iterations: $5"
25 | echo "Path To Deep Docking Source Scripts: $6"
26 | echo "Percent First Mol: $7"
27 | echo "Percent Last Mol: $8"
28 | 
29 | # Reading the log file
30 | file_path=`sed -n '1p' $3/logs.txt`
31 | project_name=`sed -n '2p' $3/logs.txt`
32 | morgan_directory=`sed -n '4p' $3/logs.txt`
33 | num_hyperparameters=`sed -n '8p' $3/logs.txt`    # number of hyperparameters
34 | docking_software=`sed -n '7p' $3/logs.txt`    # The docking software used
35 | 
36 | # The number of molecules to train on:
37 | num_molec=`sed -n '9p' $3/logs.txt`
38 | 
39 | local_path=$6  # Should be the path to the deep docking source scripts
40 | save_path=$3
41 | 
42 | # getting slurm args for gpu req scripts (with cpus-per-task and gpu_partition)
43 | slurm_args_g=$(sed -n '4p' ${local_path}/slurm_args/${project_name}_slurm_args.txt)
44 | 
45 | # This should activate the conda environment
46 | source ~/.bashrc
47 | source $local_path/activation_script.sh
48 | 
49 | echo "writing jobs"
50 | python jobid_writer.py -file_path $file_path/$project_name -n_it $1 -jid $SLURM_JOB_NAME -jn $SLURM_JOB_NAME.sh --save_path $save_path
51 | 
52 | t_pos=$2    # total number of processors available
53 | echo "Extracting labels"
54 | python Extract_labels.py -if False -n_it $1 -protein $project_name -file_path $file_path -t_pos $t_pos -sof $docking_software
55 | 
56 | if [ $? != 0 ]; then
57 |   echo "Extract_labels failed... terminating"
58 |   exit
59 | fi
60 | 
61 | echo "Creating simple jobs"
62 | python simple_job_models.py -n_it $1 -time 00-04:00 -file_path $file_path/$project_name -nhp $num_hyperparameters -titr $5 -n_mol $num_molec --save_path $save_path --percent_first_mols $7 --percent_last_mols $8
63 | 
64 | # Executes all the files that were created in the simple_jobs directory
65 | echo "Running simple jobs"
66 | cd $save_path/iteration_$1/simple_job
67 | 
68 | # For some reason, running this with the conda environment activated causes an error.
69 | # We must deactivate it before running!
70 | source ~/.bashrc
71 | source $local_path/deactivation_script.sh
72 | for f in *;do sbatch $slurm_args_g $f;done
73 | 
74 | echo "running phase_changer"
75 | source ~/.bashrc
76 | source $local_path/activation_script.sh
77 | python $local_path/phase_changer.py -pf phase_4.sh -itr $file_path/$project_name/iteration_$1
78 | 
79 | # Clean up the slurm files
80 | echo cleaning slurm files
81 | cd $local_path
82 | python slurm_file_manager.py --phase 4 --iteration $1 --project_path $3
83 | 
84 | echo "Done..."
85 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/phase_5.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --ntasks=1
 3 | #SBATCH --cpus-per-task=1
 4 | #SBATCH --gres=gpu:1
 5 | #SBATCH --mem=0               # memory per node
 6 | #SBATCH --job-name=phase_5
 7 | #SBATCH --output=slurm-%x.%j.out
 8 | #SBATCH --error=slurm-%x.%j.err
 9 | 
10 | ### This will be replacing the old phase 5 script
11 | ### (make sure to decrease all other parameters after 3 when doing so)
12 | 
13 | ###************************************************************************
14 | ### Planned Changes:
15 | ###  > Reducing the number of passed parameters to 1
16 | ###   |- How we currently run phase 5: "sbatch phase_5.sh iteration path_to_project project_name"
17 | ###   |- How we want to run phase 5: "sbatch phase_5.sh iteration path_to_project/project_name progressive_docking_path"
18 | ###  > Get rid of smile_directory and sdf_directory for they are unused
19 | ###************************************************************************
20 | 
21 | ### Scan through the logs.txt file
22 | file_path=`sed -n '1p' $2/logs.txt`
23 | protein=`sed -n '2p' $2/logs.txt`    # name of project folder
24 | morgan_directory=`sed -n '4p' $2/logs.txt`
25 | num_molec=`sed -n '9p' $2/logs.txt`
26 | 
27 | progressive_docking_path=$3
28 | save_path=$2
29 | project_name=$(basename "$2")
30 | 
31 | echo Partition: $SLURM_JOB_PARTITION
32 | echo "Passed Parameters:"
33 | echo "Current Iteration: $1"
34 | echo "Project Path: $2"
35 | echo Project Name: $project_name
36 | echo "Scripts: $3"
37 | echo "Number of CPUs: $4"
38 | 
39 | # This should activate the conda environment
40 | source ~/.bashrc
41 | source $progressive_docking_path/activation_script.sh
42 | 
43 | # getting slurm args for gpu req scripts (with cpus-per-task and gpu_partition)
44 | slurm_args_g=$(sed -n '4p' ${progressive_docking_path}/slurm_args/${project_name}_slurm_args.txt)
45 | 
46 | 
47 | python jobid_writer.py -file_path $file_path/$protein -n_it $1 -jid $SLURM_JOB_NAME -jn $SLURM_JOB_NAME.sh --save_path $save_path
48 | 
49 | echo "Starting Evaluation"
50 | python -u hyperparameter_result_evaluation.py -n_it $1 --data_path $file_path/$protein -n_mol $num_molec --save_path $save_path
51 | echo "Creating simple_job_predictions"
52 | python simple_job_predictions.py -protein $protein -file_path $file_path -n_it $1 -mdd $morgan_directory --save_path $save_path
53 | 
54 | # For some reason, running this with the conda environment activated causes an error.
55 | # We must deactivate it before running!
56 | source ~/.bashrc
57 | source $progressive_docking_path/deactivation_script.sh
58 | cd $save_path/iteration_$1/simple_job_predictions/
59 | echo "running simple_jobs"
60 | for f in *;do sbatch $slurm_args_g $f; done
61 | 
62 | echo "waiting for event phase change"
63 | source ~/.bashrc
64 | source $progressive_docking_path/activation_script.sh
65 | python $progressive_docking_path/phase_changer.py -pf phase_5.sh -itr $file_path/$protein/iteration_$1
66 | 
67 | # Now we grab the top hits
68 | source ~/.bashrc
69 | source $progressive_docking_path/deactivation_script.sh
70 | 
71 | echo Phase 5 is finished. Now searching for top predicted molecules.
72 | cd $progressive_docking_path/GUI
73 | sbatch run_search.sh $2 $4 $1 1000 #TODO: slurm args for this?
74 | 
75 | # Clean up the slurm files
76 | echo Cleaning slurm files
77 | cd $progressive_docking_path
78 | python slurm_file_manager.py --phase 5 --iteration $1 --project_path $2 --script_path $3
79 | 
80 | echo All finished.


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/phase_changer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import glob
  4 | import argparse
  5 | 
  6 | parser = argparse.ArgumentParser()
  7 | parser.add_argument('-pf', '--phase_file', required=True)
  8 | parser.add_argument('-itr', '--iteration_directory', required=True)
  9 | io_args = parser.parse_args()
 10 | pf = io_args.phase_file
 11 | itr_dir = io_args.iteration_directory
 12 | 
 13 | print("Phase Changer:")
 14 | print("    - Monitoring: {}".format(pf))
 15 | print("    - Project: {}".format(itr_dir.split("/")[-2] + "/" + itr_dir.split("/")[-1]))
 16 | 
 17 | if pf == 'phase_1.sh' or pf == 'phase_a.sh':
 18 |     # Change the phase_1.sh file.
 19 |     with open(itr_dir + '/' + pf, 'w') as ref:
 20 |         ref.write('finished\n')
 21 |     print("    - Finished... File Updated")
 22 | 
 23 | elif pf == 'phase_2.sh':
 24 | 
 25 |     # Check to see if the slurm jobs are done
 26 |     while True:
 27 |         try:
 28 |             # Check every slurm file in chunk_smi
 29 |             finished_jobs = 0
 30 |             running_jobs = glob.glob(itr_dir + '/chunk*/*/slurm*.out')
 31 |             for running in running_jobs:
 32 |                 # open the out file
 33 |                 with open(running) as file:
 34 |                     # check if it is finished
 35 |                     lines = file.readlines()
 36 |                     if len(lines) > 0 and "finished" in lines[-1]:
 37 |                         finished_jobs += 1
 38 | 
 39 |             # if they are all finished, break the loop and finish phase 2
 40 |             if len(running_jobs) == finished_jobs and len(running_jobs) > 0:
 41 |                 break
 42 |             else:
 43 |                 time.sleep(30)
 44 |         except OSError:
 45 |             time.sleep(30)
 46 | 
 47 |     # update the phase file
 48 |     with open(itr_dir + '/' + pf, 'w') as ref:
 49 |         ref.write('finished\n')
 50 | 
 51 | elif pf == 'phase_3.sh':
 52 |     # Check to see if the slurm jobs are done
 53 |     while True:
 54 |         try:
 55 |             # Check every slurm file in res
 56 |             finished_jobs = 0
 57 |             running_jobs = glob.glob(itr_dir + '/res*/*/slurm*.out')
 58 |             for running in running_jobs:
 59 |                 # open the out file
 60 |                 with open(running) as file:
 61 |                     # check if it is finished
 62 |                     lines = file.readlines()
 63 |                     if len(lines) > 0 and "finished" in lines[-1]:
 64 |                         finished_jobs += 1
 65 | 
 66 |             # if they are all finished, break the loop and finish phase 2
 67 |             if len(running_jobs) == finished_jobs and len(running_jobs) > 0:
 68 |                 break
 69 |             else:
 70 |                 time.sleep(60)
 71 |         except IOError:
 72 |             time.sleep(60)
 73 | 
 74 |     # Perform the final phase 3 operation
 75 |     print("Wrapping up phase 3...")
 76 |     os.system("bash phase_3_concluding_combination.sh " + itr_dir)
 77 | 
 78 |     # update the phase file
 79 |     with open(itr_dir + '/' + pf, 'w') as ref:
 80 |         ref.write('finished\n')
 81 | 
 82 | elif pf == 'phase_4.sh':
 83 |     while True:
 84 |         t_jobs = len(glob.glob(itr_dir + '/simple_job/*.sh'))
 85 |         t_done = len(glob.glob(itr_dir + '/simple_job/*.out'))
 86 |         print("total jobs:", t_jobs, "total jobs done:", t_done)
 87 |         if t_done != t_jobs:
 88 |             time.sleep(60)
 89 |         else:
 90 |             jobids = []
 91 |             for f in glob.glob(itr_dir + '/simple_job/*.out'):
 92 |                 tmp = f.split(".")[-2]  # slurm-phase_4.786716.out -> ['slurm-phase_4', 786716, out] -> 786716
 93 |                 jobids.append(len(os.popen("squeue | grep " + tmp).read()) == 0) # empty string -> job complete
 94 |             
 95 |             print("\t{}/{}".format(jobids.count(True), len(jobids)))
 96 | 
 97 |             if jobids.count(True) == len(jobids): # if num jobs completed == num total jobs
 98 |                 with open(itr_dir + '/' + pf, 'w') as ref:
 99 |                     ref.write('finished\n')
100 |                 break
101 |             else:
102 |                 time.sleep(60)
103 | 
104 | elif pf == 'phase_5.sh':
105 |     while 1 == 1:
106 |         t_jobs = len(glob.glob(itr_dir + '/simple_job_predictions/*.sh'))
107 |         t_done = len(glob.glob(itr_dir + '/simple_job_predictions/*.out'))
108 |         if t_done != t_jobs:
109 |             time.sleep(60)
110 |         else:
111 |             jobids = []
112 |             for f in glob.glob(itr_dir + '/simple_job_predictions/*.out'):
113 |                 tmp = f.split(".")[-2]  # slurm-phase_5.786716.out -> ['slurm-phase_4', 786716, out] -> 786716
114 |                 jobids.append(len(os.popen("squeue | grep " + tmp).read()) == 0) # empty string -> job complete
115 |             
116 |             print("\t{}/{}".format(jobids.count(True), len(jobids)))
117 | 
118 |             if jobids.count(True) == len(jobids): # if num jobs completed == num total jobs
119 |                 with open(itr_dir + '/' + pf, 'w') as ref:
120 |                     ref.write('finished\n')
121 |                 break
122 |             else:
123 |                 time.sleep(60)
124 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/prepare_ligands_ad.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=prepare
 3 | #SBATCH --time=04:00:00
 4 | #SBATCH --output=slurm-phase_2-%x.%j.out
 5 | #SBATCH --error=slurm-phase_2-%x.%j.err
 6 | 
 7 | script_path=$1
 8 | 
 9 | # This should activate the conda environment
10 | source ~/.bashrc
11 | source $script_path/activation_script.sh
12 | 
13 | start=`date +%s`
14 | 
15 | name=$(pwd| rev | cut -d'/' -f 1 | rev)
16 | fld=$name'_'pdbqt
17 | 
18 | # Uncomment the next three lines if you have openeye and want to do tautomer generation on the fly (instead of preparing the library beforehand); add also #SBATCH --cpus-per-task=20 at the top as openeye uses MPI 
19 | # $openeye tautomers -in $name'.'smi -out $name'_'h.smi -maxtoreturn 1 -warts false
20 | # wait $!
21 | # mv $name'_'h.smi $name'.'smi       
22 | 
23 | # obabel takes a lot longer than openeye, but both of the following lines work for 3d conformer generation
24 | # $openeye oeomega classic -in $name'.'smi -out $name'.'sdf  -strictstereo false -maxconfs 1 -mpi_np 20 -log $name'.'log -prefix $name
25 | obabel -ismi $name'.'smi -O $name'.'sdf --gen3d --fast
26 | wait $!
27 | 
28 | rm -r $fld
29 | mkdir $fld
30 | cp $name'.'sdf $fld'/'
31 | cd $fld
32 | python $script_path'/'split_sdf.py $name'.'sdf
33 | rm $name'.'sdf
34 | obabel -isdf *sdf -opdbqt -m
35 | wait $!
36 | rm *sdf
37 | 
38 | end=`date +%s`
39 | echo $((end-start))
40 | echo finished
41 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/reset.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is used for resetting phases and cancelling jobs. It will remove the slurm files and/or cancel jobs that are
 3 | associated with the passed project name and username.
 4 | """
 5 | 
 6 | import argparse
 7 | import glob
 8 | import csv
 9 | import os
10 | 
11 | 
12 | def judge(slurm_files, project_name, phase_job, remove_slurms=True, test=False):
13 |     """ Judges whether or not a file should be removed or jobs should be cancelled"""
14 | 
15 |     if remove_slurms:
16 |         # Look at every file
17 |         for slurm_file in slurm_files:
18 |             # Read file
19 |             with open(slurm_file, "r") as file:
20 |                 # Look at each line of file
21 |                 for line in file:
22 |                     # If the project name is in the header
23 |                     if "Project Name:" in line and project_name in line:
24 |                         if not test:
25 |                             # Get the job ID to cancel
26 |                             job_id = slurm_file.split(".")[1]
27 |                             os.system("scancel " + str(job_id))
28 | 
29 |                             # Remove the files
30 |                             os.remove(slurm_file)
31 |                             os.remove(slurm_file.replace("out", "err"))
32 | 
33 |                         print("Judged", os.path.basename(slurm_file))
34 |                         break
35 | 
36 |     # Reads phase_jobs.csv and cancels each job
37 |     print("Cancelling Jobs...")
38 | 
39 |     # Get the ids
40 |     ids = []
41 |     with open(phase_job, 'r') as file:
42 |         # Read csv
43 |         reader = [row for row in csv.reader(file)]
44 |         # get the job id index
45 |         index = reader[0].index("job_id")
46 |         # get the index of the job_ids
47 |         rows = reader[1:]
48 |         # get the ids
49 |         for row in rows:
50 |             ids.append(row[index])
51 | 
52 |     for jid in ids:
53 |         print("Cancelling Job", jid)
54 |         if not test:
55 |             os.system("scancel " + str(jid))
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     parser = argparse.ArgumentParser()
60 |     parser.add_argument("--project_name", type=str)
61 |     parser.add_argument("--username", type=str)
62 |     parser.add_argument("--scripts", type=str)
63 |     parser.add_argument("--remove_slurms", type=bool, default=True, required=False)
64 |     parser.add_argument("--test", type=bool, default=False, required=False)
65 | 
66 |     args = parser.parse_args()
67 |     
68 |     # Grab all of the slurm files
69 |     files = glob.glob("slurm-*.out") + glob.glob("GUI/slurm-*.out")
70 |     files = [args.scripts + "/" + f for f in files]
71 |     pj = args.scripts + f"/GUI/Users/{args.username}/" + args.username + "_phase_jobs.csv"
72 |     judge(files, project_name=args.project_name, phase_job=pj, remove_slurms=args.remove_slurms, test=args.test)
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/reset1.sh:
--------------------------------------------------------------------------------
 1 | echo Resetting Phase 1 on $(basename $1)...
 2 | 
 3 | # Remove the slurm files associated with the project
 4 | python3 reset.py --project_name "$2" --username "$3" --scripts "$4"
 5 | 
 6 | # Move into the project
 7 | cd $1
 8 | 
 9 | rm -r *
10 | 
11 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/reset2.sh:
--------------------------------------------------------------------------------
1 | echo Resetting Phase 2 on $(basename $1)...
2 | 
3 | # Remove the slurm files associated with the project
4 | python3 reset.py --project_name "$2" --username "$3" --scripts "$4"
5 | 
6 | # Move into the project
7 | cd $1
8 | rm -r chunks_smi slurm-*


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/reset3.sh:
--------------------------------------------------------------------------------
 1 | echo Resetting Phase 3 on $(basename $1)...
 2 | 
 3 | # Remove the slurm files associated with the project
 4 | python3 reset.py --project_name "$2" --username "$3" --scripts "$4"
 5 | 
 6 | # Move into the project
 7 | cd $1
 8 | 
 9 | rm -r res
10 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/reset4.sh:
--------------------------------------------------------------------------------
 1 | echo Resetting Phase 4 on $(basename $1)...
 2 | 
 3 | # Remove the slurm files associated with the project
 4 | python3 reset.py --project_name "$2" --username "$3" --scripts "$4"
 5 | 
 6 | # Move into the project
 7 | cd $1
 8 | 
 9 | rm -r all_models hyperparameter* model_no.txt morgan_1024_predictions simple_job*
10 | rm -r best_model*
11 | rm testing* validation* training*
12 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/reset5.sh:
--------------------------------------------------------------------------------
1 | echo Resetting Phase 5 on $(basename $1)...
2 | 
3 | # Remove the slurm files associated with the project
4 | python3 reset.py --project_name "$2" --username "$3" --scripts "$4"
5 | 
6 | cd $1
7 | rm -r simple_job_predictions/ morgan_1024_predictions/
8 | echo Done


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/sampling.py:
--------------------------------------------------------------------------------
  1 | from contextlib import closing
  2 | from multiprocessing import Pool
  3 | import pandas as pd
  4 | import numpy as np
  5 | import argparse
  6 | import glob
  7 | import time
  8 | import os
  9 | 
 10 | try:
 11 |     import __builtin__
 12 | except ImportError:
 13 |     # Python 3
 14 |     import builtins as __builtin__
 15 | 
 16 | # For debugging purposes only:
 17 | def print(*args, **kwargs):
 18 |     __builtin__.print('\t sampling: ', end="")
 19 |     return __builtin__.print(*args, **kwargs)
 20 | 
 21 | 
 22 | def train_valid_test(file_name):
 23 |     sampling_start_time = time.time()
 24 |     f_name = file_name.split('/')[-1]
 25 |     mol_ct = pd.read_csv(PROJECT_PATH+"/Mol_ct_file_updated.csv", index_col=1)
 26 |     if n_it == 1:
 27 |         to_sample = int(mol_ct.loc[f_name].Sample_for_million/(rt_sz+2))
 28 |     else:
 29 |         to_sample = int(mol_ct.loc[f_name].Sample_for_million/3)
 30 | 
 31 |     total_len = int(mol_ct.loc[f_name].Number_of_Molecules)
 32 |     shuffle_array = np.linspace(0, total_len-1, total_len)
 33 |     np.random.shuffle(shuffle_array)
 34 | 
 35 |     if n_it == 1:
 36 |         train_ind = shuffle_array[:int(rt_sz*to_sample)]
 37 |         valid_ind = shuffle_array[int(to_sample*rt_sz):int(to_sample*(rt_sz+1))]
 38 |         test_ind = shuffle_array[int(to_sample*(rt_sz+1)):int(to_sample*(rt_sz+2))]
 39 |     else:
 40 |         train_ind = shuffle_array[:to_sample]
 41 |         valid_ind = shuffle_array[to_sample:to_sample*2]
 42 |         test_ind = shuffle_array[to_sample*2:to_sample*3]
 43 | 
 44 |     train_ind_dict = {}
 45 |     valid_ind_dict = {}
 46 |     test_ind_dict = {}
 47 | 
 48 |     train_set = open(PROJECT_PATH + "/iteration_" + str(n_it) + "/train_set.txt", 'a')
 49 |     test_set = open(PROJECT_PATH + "/iteration_" + str(n_it) + "/test_set.txt", 'a')
 50 |     valid_set = open(PROJECT_PATH + "/iteration_" + str(n_it) + "/valid_set.txt", 'a')
 51 |     # smiles = open(file_path + '/' + protein + "/iteration_" + str(n_it) + "/smile_locations.csv", 'a')
 52 | 
 53 |     for i in train_ind:
 54 |         train_ind_dict[i] = 1
 55 |     for j in valid_ind:
 56 |         valid_ind_dict[j] = 1
 57 |     for k in test_ind:
 58 |         test_ind_dict[k] = 1
 59 | 
 60 |     # Opens the file and write the test, train, and valid files
 61 |     with open(file_name, 'r') as ref:
 62 |         for ind, line in enumerate(ref):
 63 |             molecule_id = line.strip().split(',')[0]
 64 |             if ind == 1:
 65 |                 print("molecule_id:", molecule_id)
 66 | 
 67 |             # now we write to the train, test, and validation sets
 68 |             # we also add to the
 69 |             if ind in train_ind_dict.keys():
 70 |                 train_set.write(molecule_id + '\n')
 71 | 
 72 |                 # Grabs the file number
 73 |                 # The file is actually "smile_all_n.txt" but I only save n
 74 |                 # smile_location = f_name.split("_")[-1].split(".")[0]
 75 |                 # smiles.write("{set},{file_number}\n".format(set="trn", file_number=smile_location))
 76 |             elif ind in valid_ind_dict.keys():
 77 |                 valid_set.write(molecule_id + '\n')
 78 | 
 79 |                 # Grabs the file number
 80 |                 # The file is actually "smile_all_n.txt" but I only save n
 81 |                 # smile_location = f_name.split("_")[-1].split(".")[0]
 82 |                 # smiles.write("{set},{file_number}\n".format(set="vld", file_number=smile_location))
 83 |             elif ind in test_ind_dict.keys():
 84 |                 test_set.write(molecule_id + '\n')
 85 | 
 86 |                 # Grabs the file number
 87 |                 # The file is actually "smile_all_n.txt" but I only save n
 88 |                 # smile_location = f_name.split("_")[-1].split(".")[0]
 89 |                 # smiles.write("{set},{file_number}\n".format(set="tst", file_number=smile_location))
 90 | 
 91 |     train_set.close()
 92 |     valid_set.close()
 93 |     test_set.close()
 94 |     # smiles.close()
 95 |     print("Process finished sampling in " + str(time.time()-sampling_start_time))
 96 | 
 97 | if __name__ == '__main__':
 98 |     parser = argparse.ArgumentParser()
 99 |     parser.add_argument('-pt', '--protein_name',required=True)
100 |     parser.add_argument('-fp', '--file_path',required=True)
101 |     parser.add_argument('-it', '--n_iteration',required=True)
102 |     parser.add_argument('-dd', '--data_directory',required=True)
103 |     parser.add_argument('-t_pos', '--tot_process',required=True)
104 |     parser.add_argument('-tr_sz', '--train_size',required=True)
105 |     parser.add_argument('-vl_sz', '--val_size',required=True)
106 |     io_args = parser.parse_args()
107 | 
108 |     protein = io_args.protein_name
109 |     file_path = io_args.file_path
110 |     n_it = int(io_args.n_iteration)
111 |     data_directory = io_args.data_directory
112 |     tot_process = int(io_args.tot_process)
113 |     tr_sz = int(io_args.train_size)
114 |     vl_sz = int(io_args.val_size)
115 |     rt_sz = tr_sz/vl_sz
116 | 
117 |     PROJECT_PATH = file_path + '/' + protein 
118 | 
119 |     print("Parsed Args:")
120 |     print(" - Iteration:", n_it)
121 |     print(" - Data Directory:", data_directory)
122 |     print(" - Project Directory:", PROJECT_PATH)
123 |     print(" - Training Size:", tr_sz)
124 |     print(" - Validation Size:", vl_sz)
125 |     print(" - tot_process: ", tot_process)
126 | 
127 |     try:
128 |         os.mkdir(PROJECT_PATH+"/iteration_"+str(n_it))
129 |     except OSError:
130 |         pass
131 | 
132 |     f_names = []
133 |     # Getting all the morgan_1024_predictions/smile_all_##.txt files
134 |     for f in glob.glob(data_directory+'/smile*_all_*.txt'):
135 |         f_names.append(f)
136 | 
137 |     print("num_f_names: ", len(f_names))
138 | 
139 |     t = time.time()
140 |     print("Starting Processes...")
141 |     with closing(Pool(np.min([tot_process, len(f_names)]))) as pool:
142 |         pool.map(train_valid_test, f_names)
143 | 
144 |     print("Compressing smile file...")
145 |     # old_file_size = os.path.getsize(file_path + '/' + protein + "/iteration_" + str(n_it) + "/smile_locations.csv")
146 |     #
147 |     # new_file_size = os.path.getsize(file_path + '/' + protein + "/iteration_" + str(n_it) + "/smile_locations.csv")
148 |     # print(" - Uncompressed file size: {}\n"
149 |     #       " - Compressed file size: {}\n"
150 |     #       " - Ratio: {}".format(old_file_size, new_file_size, (old_file_size/new_file_size) * 100))
151 |     print("Sampling Complete - Total Time Taken:", time.time()-t)
152 | 
153 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/sanity_check.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import glob
 3 | 
 4 | parser = argparse.ArgumentParser()
 5 | parser.add_argument('-pt','--protein_name',required=True)
 6 | parser.add_argument('-fp','--file_path',required=True)
 7 | parser.add_argument('-it','--n_iteration',required=True)
 8 | 
 9 | io_args = parser.parse_args()
10 | import time
11 | 
12 | protein = io_args.protein_name
13 | file_path = io_args.file_path
14 | n_it = int(io_args.n_iteration)
15 | 
16 | old_dict = {}
17 | for i in range(1,n_it):
18 |     with open(glob.glob(file_path+'/'+protein+'/iteration_'+str(i)+'/training_labels*')[-1]) as ref:
19 |         ref.readline()
20 |         for line in ref:
21 |             tmpp = line.strip().split(',')[-1]
22 |             old_dict[tmpp] = 1
23 |     with open(glob.glob(file_path+'/'+protein+'/iteration_'+str(i)+'/validation_labels*')[-1]) as ref:
24 |         ref.readline()
25 |         for line in ref:
26 |             tmpp = line.strip().split(',')[-1]
27 |             old_dict[tmpp] = 1
28 |     with open(glob.glob(file_path+'/'+protein+'/iteration_'+str(i)+'/testing_labels*')[-1]) as ref:
29 |         ref.readline()
30 |         for line in ref:
31 |             tmpp = line.strip().split(',')[-1]
32 |             old_dict[tmpp] = 1
33 | 
34 | t=time.time()
35 | new_train = {}
36 | new_valid = {}
37 | new_test = {}
38 | with open(glob.glob(file_path+'/'+protein+'/iteration_'+str(n_it)+'/train_set*')[-1]) as ref:
39 |     for line in ref:
40 |         tmpp = line.strip().split(',')[0]
41 |         new_train[tmpp] = 1
42 | with open(glob.glob(file_path+'/'+protein+'/iteration_'+str(n_it)+'/valid_set*')[-1]) as ref:
43 |     for line in ref:
44 |         tmpp = line.strip().split(',')[0]
45 |         new_valid[tmpp] = 1
46 | with open(glob.glob(file_path+'/'+protein+'/iteration_'+str(n_it)+'/test_set*')[-1]) as ref:
47 |     for line in ref:
48 |         tmpp = line.strip().split(',')[0]
49 |         new_test[tmpp] = 1
50 | print(time.time()-t)
51 | 
52 | t=time.time()
53 | for keys in new_train.keys():
54 |     if keys in new_valid.keys():
55 |         new_valid.pop(keys)
56 |     if keys in new_test.keys():
57 |         new_test.pop(keys)
58 | for keys in new_valid.keys():
59 |     if keys in new_test.keys():
60 |         new_test.pop(keys)
61 | print(time.time()-t)
62 | 
63 | for keys in old_dict.keys():
64 |     if keys in new_train.keys():
65 |         new_train.pop(keys)
66 |     if keys in new_valid.keys():
67 |         new_valid.pop(keys)
68 |     if keys in new_test.keys():
69 |         new_test.pop(keys)
70 |         
71 | with open(file_path+'/'+protein+'/iteration_'+str(n_it)+'/train_set.txt','w') as ref:
72 |     for keys in new_train.keys():
73 |         ref.write(keys+'\n')
74 | with open(file_path+'/'+protein+'/iteration_'+str(n_it)+'/valid_set.txt','w') as ref:
75 |     for keys in new_valid.keys():
76 |         ref.write(keys+'\n')
77 | with open(file_path+'/'+protein+'/iteration_'+str(n_it)+'/test_set.txt','w') as ref:
78 |     for keys in new_test.keys():
79 |         ref.write(keys+'\n')
80 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "progressive_docking": {
 3 |     "metrics": ["accuracy", "recall", "precision"],
 4 |     "epochs": 500,
 5 |     "early_stopping_monitor": "val_loss",
 6 |     "early_stopping_min_delta": 0,
 7 |     "early_stopping_patience": 10,
 8 |     "early_stopping_mode": "auto",
 9 |     "time_limit": 36000
10 |   }
11 | }


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/setup_slurm_specifications.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This script will be called when a new project is created or a new project is loaded.
 3 | It will adjust the number of cpus in each slurm script according to the passed n_cpu argument.
 4 | It will also change the partition for the slurm scrips.
 5 | 
 6 | v1.0.1
 7 | """
 8 | 
 9 | import os
10 | 
11 | def save_slurm_arg(project_name, path, n_cpu, cpu_partition, gpu_partition, custom_headers=None):
12 |     # this saves all the slurm arguments as a single line so that it can 
13 |     # be called on as arguments to sbatch submissions
14 |     try:
15 |         os.mkdir("slurm_args")
16 |     except: # folder already exists.
17 |         pass
18 | 
19 |     with open(f"./slurm_args/{project_name}_slurm_args.txt", "w") as f:
20 |         # "#SBATCH h1#SBATCH h2...#SBATCH hn" --> "h1 h2 ... hn"
21 |         slurm_args = " ".join(custom_headers.split("#SBATCH")).strip() if custom_headers is not None else ""
22 |         slurm_args_cpart = slurm_args 
23 |         slurm_args_cpart += " --partition=" + cpu_partition if cpu_partition is not None and "partition" not in slurm_args else ""
24 |         f.write(slurm_args_cpart + "\n") # 1: write without cpu arg for non-gpu scripts 
25 | 
26 |         slurm_args_cpart += " --cpus-per-task="+str(n_cpu) if n_cpu is not None and "cpus-per-task" not in slurm_args else ""
27 |         f.write(slurm_args_cpart + "\n") # 2: write with cpu arg for non-gpu scripts 
28 |         
29 |         slurm_args_gpart = slurm_args 
30 |         slurm_args_gpart += " --partition=" + gpu_partition if gpu_partition is not None and "partition" not in slurm_args else ""
31 |         f.write(slurm_args_gpart + "\n") # 3: write without cpu arg for gpu req scripts 
32 | 
33 |         slurm_args_gpart += " --cpus-per-task="+str(n_cpu) if n_cpu is not None and "cpus-per-task" not in slurm_args else ""
34 |         f.write(slurm_args_gpart + "\n") # 4: write with cpu arg for gpu req scripts 
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     import argparse
39 |     parser = argparse.ArgumentParser()
40 |     parser.add_argument("--path", type=str, required=True)
41 |     parser.add_argument("--n_cpu", type=int, required=True)
42 |     parser.add_argument("--cpu_partition", type=str, required=True)
43 |     parser.add_argument("--gpu_partition", type=str, required=True)
44 |     parser.add_argument("--custom_headers", type=str, required=True)
45 |     parser.add_argument("--project_name", type=str, required=True)
46 | 
47 |     args = parser.parse_args()
48 | 
49 |     # Set to None if none were passed
50 |     if args.cpu_partition == "":
51 |         args.cpu_partition = None
52 | 
53 |     if args.gpu_partition == "":
54 |         args.gpu_partition = None
55 | 
56 |     if args.custom_headers == "":
57 |         args.custom_headers = None
58 | 
59 |     save_slurm_arg(project_name=args.project_name, path=args.path, n_cpu=args.n_cpu,
60 |                     cpu_partition=args.cpu_partition, gpu_partition=args.gpu_partition, 
61 |                     custom_headers=args.custom_headers)
62 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/simple_job_predictions.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import glob
 3 | import os
 4 | 
 5 | parser = argparse.ArgumentParser()
 6 | parser.add_argument('-protein', '--protein', required=True)
 7 | parser.add_argument('-file_path', '--file_path', required=True)
 8 | parser.add_argument('-n_it', '--n_it', required=True)
 9 | parser.add_argument('-mdd', '--morgan_directory', required=True)
10 | 
11 | # adding parameter for where to save all the data to:
12 | parser.add_argument('-save', '--save_path', required=False, default=None)
13 | 
14 | io_args = parser.parse_args()
15 | 
16 | protein = io_args.protein
17 | n_it = int(io_args.n_it)
18 | mdd = io_args.morgan_directory
19 | 
20 | DATA_PATH = io_args.file_path  # Now == file_path/protein
21 | SAVE_PATH = io_args.save_path
22 | 
23 | 
24 | # if no save path is provided we just save it in the same location as the data
25 | if SAVE_PATH is None: SAVE_PATH = DATA_PATH
26 | add = mdd
27 | 
28 | try:
29 |     os.mkdir(SAVE_PATH + '/iteration_' + str(n_it) + '/simple_job_predictions')
30 | except OSError:
31 |     pass
32 | 
33 | for f in glob.glob(SAVE_PATH + '/iteration_' + str(n_it) + '/simple_job_predictions/*'):
34 |     os.remove(f)
35 | 
36 | time = '0-10:30'
37 | 
38 | # temp = []
39 | part_files = []
40 | 
41 | for i, f in enumerate(glob.glob(add + '/*.txt')):
42 |     part_files.append(f)
43 | 
44 | ct = 1
45 | for f in part_files:
46 |     with open(SAVE_PATH + '/iteration_' + str(n_it) + '/simple_job_predictions/simple_job_' + str(ct) + '.sh',
47 |               'w') as ref:
48 |         ref.write('#!/bin/bash\n')
49 |         ref.write('#SBATCH --ntasks=1\n')
50 |         ref.write('#SBATCH --gres=gpu:1\n')
51 |         ref.write('#SBATCH --cpus-per-task=1\n')
52 |         ref.write('#SBATCH --job-name=phase_5\n')
53 |         ref.write('#SBATCH --mem=0               # memory per node\n')
54 |         ref.write('#SBATCH --time=' + time + '            # time (DD-HH:MM)\n')
55 |         ref.write("#SBATCH --output=slurm-phase_5-%x.%j.out\n")
56 |         ref.write("#SBATCH --error=slurm-phase_5-%x.%j.err\n")
57 |         ref.write('\n')
58 |         ref.write("echo Partition: $SLURM_JOB_PARTITION \n")
59 | 
60 |         cwd = os.getcwd()
61 |         ref.write('cd {}\n'.format(cwd))
62 |         ref.write('source ~/.bashrc\n')
63 |         ref.write('source activation_script.sh\n')
64 |         ref.write('python -u ' + 'Prediction_morgan_1024.py' + ' ' + '-fn' + ' ' + f.split('/')[
65 |             -1] + ' ' + '-protein' + ' ' + protein + ' ' + '-it' + ' ' + str(n_it) + ' ' + '-mdd' + ' ' + str(
66 |             mdd) + ' ' + '-file_path' + ' ' + SAVE_PATH + '\n')
67 |         ref.write("\n echo complete")
68 | 
69 |     ct += 1
70 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/slurm_file_manager.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import glob
  3 | import shutil
  4 | import argparse
  5 | 
  6 | parser = argparse.ArgumentParser()
  7 | parser.add_argument("--phase", type=int)
  8 | parser.add_argument("--iteration", type=int)
  9 | parser.add_argument("--project_path", type=str)
 10 | parser.add_argument("--script_path", type=str, required=False)
 11 | 
 12 | args = parser.parse_args()
 13 | 
 14 | # Make a directory for all of the slurm files
 15 | try:
 16 |     os.mkdir("slurm_out_files")
 17 | except OSError:
 18 |     pass
 19 | 
 20 | # Create a directory for the project
 21 | project_name = os.path.basename(args.project_path)
 22 | try:
 23 |     os.mkdir("slurm_out_files/{}".format(project_name))
 24 | except OSError:
 25 |     pass
 26 | 
 27 | # Set the phase to "a" if 0 is passed
 28 | if args.phase == 0:
 29 |     args.phase = "final"
 30 | 
 31 | # Create a phase directory
 32 | phase_dir = "slurm_out_files/{}/slurm_itr_{}_phase_{}".format(project_name, args.iteration, args.phase)
 33 | project_path = args.project_path
 34 | 
 35 | print("Target Project:", project_name)
 36 | print("Iteration:", args.iteration)
 37 | print("Phase:", args.phase)
 38 | 
 39 | 
 40 | def make_the_move(f_err, f_out):
 41 |     # Moves slurm files that belong to the project "project_name"
 42 | 
 43 |     # move all slurm files
 44 |     for err, out in zip(f_err, f_out):
 45 | 
 46 |         # Read the file and make sure it belongs to the desired project
 47 |         with open(out, "r") as out_file:
 48 |             for line in out_file:
 49 |                 # Move file if the project path is embedded within its own path (meaning it is in the project dir)
 50 |                 # OR
 51 |                 # If the out file has the line Project Name: project_name then move it
 52 |                 if project_path in out or "Project Name:" in line and project_name in line:
 53 |                     # Move the error files
 54 |                     try:
 55 |                         shutil.move(err, "{phase_dir}/{err}".format(phase_dir=phase_dir, err=os.path.basename(err)))
 56 |                     except IOError:
 57 |                         print("Error on:", err)
 58 |                         pass
 59 | 
 60 |                     # Move the out files
 61 |                     try:
 62 |                         shutil.move(out, "{phase_dir}/{out}".format(phase_dir=phase_dir, out=os.path.basename(out)))
 63 |                     except IOError:
 64 |                         print("Error on:", out)
 65 |                         pass
 66 | 
 67 |                     break
 68 | 
 69 | 
 70 | # Running after phase 1
 71 | if args.phase == 1:
 72 |     # Create a directory for all of the slurm files
 73 |     try:
 74 |         os.mkdir(phase_dir)
 75 |     except OSError:
 76 |         print("The directory {} already exists.".format(phase_dir))
 77 | 
 78 |     # Gather the slurm files
 79 |     slurm_err = glob.glob("slurm-phase_1*.err")
 80 |     slurm_out = glob.glob("slurm-phase_1*.out")
 81 | 
 82 |     make_the_move(slurm_err, slurm_out)
 83 | 
 84 | elif args.phase == 2:
 85 |     # Create a directory for all of the slurm files
 86 |     try:
 87 |         os.mkdir(phase_dir)
 88 |     except OSError:
 89 |         print("The directory {} already exists.".format(phase_dir))
 90 | 
 91 |     # Gather the slurm files in the script dir
 92 |     slurm_err = glob.glob("slurm-phase_2*.err")
 93 |     slurm_out = glob.glob("slurm-phase_2*.out")
 94 | 
 95 |     # Gather the project dir
 96 |     slurm_err += glob.glob("{}/iteration_{}/slurm-phase_2*.err".format(args.project_path, args.iteration))
 97 |     slurm_out += glob.glob("{}/iteration_{}/slurm-phase_2*.out".format(args.project_path, args.iteration))
 98 | 
 99 |     # Grab the slurm files in the chunks
100 |     slurm_err += glob.glob("{}/iteration_{}/chunk*/*/slurm*.err".format(args.project_path, args.iteration))
101 |     slurm_out += glob.glob("{}/iteration_{}/chunk*/*/slurm*.out".format(args.project_path, args.iteration))
102 | 
103 |     make_the_move(slurm_err, slurm_out)
104 | 
105 | elif args.phase == 3:
106 |     # Create a directory for all of the slurm files
107 |     try:
108 |         os.mkdir(phase_dir)
109 |     except OSError:
110 |         print("The directory {} already exists.".format(phase_dir))
111 | 
112 |     # Gather the slurm files in the script dir
113 |     slurm_err = glob.glob("slurm-phase_3*.err")
114 |     slurm_out = glob.glob("slurm-phase_3*.out")
115 | 
116 |     # Gather the project dir
117 |     slurm_err += glob.glob("{}/iteration_{}/slurm-phase_3*.err".format(args.project_path, args.iteration))
118 |     slurm_out += glob.glob("{}/iteration_{}/slurm-phase_3*.out".format(args.project_path, args.iteration))
119 | 
120 |     # Grab the slurm files in the res
121 |     slurm_err += glob.glob("{}/iteration_{}/res/*/slurm-phase_3*.err".format(args.project_path, args.iteration))
122 |     slurm_out += glob.glob("{}/iteration_{}/res/*/slurm-phase_3*.out".format(args.project_path, args.iteration))
123 | 
124 |     make_the_move(slurm_err, slurm_out)
125 | 
126 | 
127 | elif args.phase == 4:
128 |     # Create a directory for all of the slurm files
129 |     try:
130 |         os.mkdir(phase_dir)
131 |     except OSError:
132 |         print("The directory {} already exists.".format(phase_dir))
133 | 
134 |     # Gather the slurm files in the script dir
135 |     slurm_err = glob.glob("slurm-phase_4*.err")
136 |     slurm_out = glob.glob("slurm-phase_4*.out")
137 | 
138 |     # Grab the slurm files in the simple_jobs
139 |     slurm_err += glob.glob("{}/iteration_{}/simple*/slurm-phase_4*.err".format(args.project_path, args.iteration))
140 |     slurm_out += glob.glob("{}/iteration_{}/simple*/slurm-phase_4*.out".format(args.project_path, args.iteration))
141 | 
142 |     make_the_move(slurm_err, slurm_out)
143 | 
144 | 
145 | elif args.phase == 5:
146 |     # Create a directory for all of the slurm files
147 |     try:
148 |         os.mkdir(phase_dir)
149 |     except OSError:
150 |         print("The directory {} already exists.".format(phase_dir))
151 | 
152 |     # Gather the slurm files in the script dir
153 |     slurm_err = glob.glob("slurm-phase_5*.err")
154 |     slurm_out = glob.glob("slurm-phase_5*.out")
155 | 
156 |     # Grab the slurm files in the simple_jobs
157 |     slurm_err += glob.glob("{}/iteration_{}/simple*predictions/slurm-phase_5*.err".format(args.project_path, args.iteration))
158 |     slurm_out += glob.glob("{}/iteration_{}/simple*predictions/slurm-phase_5*.out".format(args.project_path, args.iteration))
159 | 
160 |     # Grab the slurm files in the GUI dir from smile searching
161 |     slurm_err += glob.glob("GUI/slurm-*.err".format(args.script_path, args.iteration))
162 |     slurm_out += glob.glob("GUI/slurm-*.out".format(args.script_path, args.iteration))
163 | 
164 |     make_the_move(slurm_err, slurm_out)
165 | 
166 | elif args.phase == "final":
167 |     # This means everything is finished
168 |     # Create a directory for all of the slurm files
169 |     try:
170 |         os.mkdir(phase_dir)
171 |     except OSError:
172 |         print("The directory {} already exists.".format(phase_dir))
173 | 
174 |     # Gather the slurm files in the script dir
175 |     slurm_err = glob.glob("slurm-phase_*.err")
176 |     slurm_out = glob.glob("slurm-phase_*.out")
177 | 
178 |     # Grab the files in the GUI directory
179 |     slurm_err += glob.glob("GUI/slurm-*.err".format(args.script_path))
180 |     slurm_out += glob.glob("GUI/slurm-*.out".format(args.script_path))
181 | 
182 |     make_the_move(slurm_err, slurm_out)
183 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/split_chunks.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --cpus-per-task=24
 3 | #SBATCH --ntasks=1
 4 | #SBATCH --job-name=split_chunks
 5 | #SBATCH --output=slurm-phase_2-%x.%j.out
 6 | #SBATCH --error=slurm-phase_2-%x.%j.err
 7 | 
 8 | input=$1
 9 | extension=$2
10 | output=$3
11 | chunk_n_lines=$4
12 | script_path=$5
13 | project_name=$6
14 | 
15 | slurm_args_no_cpu=$(sed -n '1p' ${script_path}/slurm_args/${project_name}_slurm_args.txt)
16 | 
17 | echo "Working..."
18 | 
19 | mkdir -p chunks_smi
20 | 
21 | split -a 4 -d -l $chunk_n_lines --additional-suffix=${extension} ${input} chunks_smi/${output}_set_part
22 | 
23 | cd chunks_smi
24 | 
25 | for x in ./$output*${extension}; do
26 |   mkdir "${x%.*}" && mv "$x" "${x%.*}"
27 | done
28 | 
29 | # Start preparing ligands
30 | cd ..
31 | return=$PWD
32 | echo Preparing Ligands
33 | for i in $(ls -d chunks_smi/$output*); do cd $i; sbatch $slurm_args_no_cpu $script_path/prepare_ligands_ad.sh $script_path; cd $return; done
34 | echo "Done!"


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/split_sdf.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from openbabel import pybel
3 | 
4 | for mol in pybel.readfile("sdf", sys.argv[1]):
5 |        mol.write("sdf", "%s.sdf" % mol.title,overwrite=True)
6 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/util_functions.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def lerp(a, b, c):
 4 |     """
 5 |     Linearly interpolates between a and b at point c. Where lerp(a, b, 0) = a and lerp(a, b, 1) = b.
 6 |     """
 7 |     assert 0 <= c <= 1, "c must be between 0 and 1"
 8 |     return (b*c) + ((1-c) * a)
 9 | 
10 | 
11 | def seconds_to_datetime(seconds):
12 |     """
13 |     Updates the information from seconds to 00:00:00 time
14 | 
15 |     :param seconds:
16 |     :return:
17 |     """
18 |     m, s = divmod(seconds, 60)
19 |     h, m = divmod(m, 60)
20 |     h, m, s = int(h), int(m), int(s)
21 | 
22 |     if h < 0:
23 |         h = m = s = 0
24 | 
25 |     if h < 10:
26 |         h = "0" + str(h)
27 |     else:
28 |         h = str(h)
29 | 
30 |     if m < 10:
31 |         m = "0" + str(m)
32 |     else:
33 |         m = str(m)
34 | 
35 |     if s < 10:
36 |         s = "0" + str(s)
37 |     else:
38 |         s = str(s)
39 |     return h, m, s
40 | 
41 | 
42 | def datetime_string_to_seconds(dt):
43 |     """
44 |     Converts a datetime string like "00-04:00" into seconds.
45 |     datetime_string_to_seconds("00-04:00") = 14400
46 |     :param dt:
47 |     :return:
48 |     """
49 |     days = int(dt[0:2])
50 |     hours = int(dt[3:5])
51 |     minutes = int(dt[6:8])
52 |     seconds = 60 * minutes + 60*60 * hours + 60*60*60 * days
53 |     return seconds
54 | 


--------------------------------------------------------------------------------
/Docking/ProgressiveDocking/venv_sanity_check.py:
--------------------------------------------------------------------------------
1 | import tensorflow
2 | import numpy
3 | import pandas
4 | with open("test_check.txt", 'w') as file:
5 |     file.write("Success!")
6 | 


--------------------------------------------------------------------------------
/Docking/__init__.py:
--------------------------------------------------------------------------------
1 | # from Docking import GUI
2 | # from Docking import ProgressiveDocking
3 | # from Docking import ML
4 | # from Docking.ML import DDModel, DDMetrics
5 | import GUI
6 | import ML
7 | import ProgressiveDocking
8 | 


--------------------------------------------------------------------------------
/GUI/README.md:
--------------------------------------------------------------------------------
 1 | # DD_GUI
 2 | 
 3 | GUI for Deep Docking.
 4 | 
 5 | ## Requirements:
 6 | * Node Package Manager (NPM)
 7 | * Python stuff:
 8 |     * <insert stuff here>
 9 | 
10 | ## Installation:
11 | Make sure you have NPM and Node.js installed and all the other python library requirements above.
12 | To get set up all you need to run is `npm install` in the terminal window.
13 | 
14 | ## How to run:
15 | Open terminal and navigate to the directory, then enter the following to host the server locally:
16 | `npm run start-lin` or `start-win` for windows.
17 | Or just run the `server.py` file directly.


--------------------------------------------------------------------------------
/GUI/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "dd_gui",
 3 |   "version": "2.0.0",
 4 |   "description": "Deep Docking GUI",
 5 |   "main": "server.js",
 6 |   "dependencies": {
 7 |     "chai": "^4.2.0",
 8 |     "chart.js": "^2.9.4",
 9 |     "jquery": "^3.5.1",
10 |     "kekule": "^0.9.3",
11 |     "lodash": "^4.17.20",
12 |     "mocha": "^8.2.1",
13 |     "react": "^17.0.1",
14 |     "react-dom": "^17.0.1",
15 |     "react-router-dom": "^5.2.0",
16 |     "style-loader": "^2.0.0"
17 |   },
18 |   "devDependencies": {
19 |     "@babel/core": "^7.12.9",
20 |     "@babel/preset-env": "^7.12.7",
21 |     "babel": "^6.23.0",
22 |     "babel-core": "^6.26.3",
23 |     "babel-loader": "^8.2.1",
24 |     "babel-preset-es2015": "^6.24.1",
25 |     "babel-preset-react": "^6.24.1",
26 |     "babel-preset-stage-2": "^6.24.1",
27 |     "body-parser": "^1.19.0",
28 |     "cors": "^2.8.5",
29 |     "css-loader": "^5.0.1",
30 |     "errorhandler": "^1.5.1",
31 |     "express": "^4.17.1",
32 |     "file-loader": "^6.2.0",
33 |     "morgan": "^1.10.0",
34 |     "url-loader": "^4.1.1",
35 |     "webpack": "^4.44.1",
36 |     "webpack-cli": "^4.2.0"
37 |   },
38 |   "scripts": {
39 |     "test": "mocha",
40 |     "start-lin": "export FLASK_APP=server.py && export FLASK_ENV=local_host && flask run",
41 |     "start-win": "conda activate DeepDockingLocal && set FLASK_APP=server.py && set FLASK_ENV=local_host && flask run",
42 |     "start-dev": "conda activate DeepDockingLocal && set FLASK_APP=server.py && set FLASK_ENV=development && set FLASK_DEBUG=1 && flask run",
43 |     "build": "webpack --progress --config webpack.config.js",
44 |     "dev-build": "webpack -d --progress --config webpack.config.js"
45 |   },
46 |   "repository": {
47 |     "type": "git",
48 |     "url": "git+https://github.com/jamesgleave/Deep-Docking.git"
49 |   },
50 |   "author": "Jean Charle Yaacoub and James Gleave",
51 |   "license": "ISC",
52 |   "bugs": {
53 |     "url": "https://github.com/jamesgleave/Deep-Docking/issues"
54 |   },
55 |   "homepage": "https://github.com/jamesgleave/Deep-Docking"
56 | }
57 | 


--------------------------------------------------------------------------------
/GUI/public/img/Indicator_light_g.svg:
--------------------------------------------------------------------------------
1 | <svg width="10" height="10" viewBox="0 0 10 10" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <circle cx="5" cy="5" r="5" fill="#00FF0A" fill-opacity="0.63"/>
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/GUI/public/img/Indicator_light_r.svg:
--------------------------------------------------------------------------------
1 | <svg width="10" height="10" viewBox="0 0 10 10" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <circle cx="5" cy="5" r="5" fill="#E17E7E" fill-opacity="0.63"/>
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/GUI/public/img/Indicator_light_y.svg:
--------------------------------------------------------------------------------
1 | <svg width="10" height="10" viewBox="0 0 10 10" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <circle cx="5" cy="5" r="5" fill="#F4FF78" fill-opacity="0.63"/>
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/GUI/public/img/close_button.svg:
--------------------------------------------------------------------------------
 1 | <svg width="29" height="29" viewBox="0 0 29 29" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | <g filter="url(#filter0_d)">
 3 | <path d="M5 1L14.5 10.5M24 20L14.5 10.5M14.5 10.5L24 1M14.5 10.5L5 20" stroke="#A3B99E" stroke-width="2"/>
 4 | </g>
 5 | <defs>
 6 | <filter id="filter0_d" x="0.292969" y="0.292969" width="28.4142" height="28.4142" filterUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
 7 | <feFlood flood-opacity="0" result="BackgroundImageFix"/>
 8 | <feColorMatrix in="SourceAlpha" type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 127 0"/>
 9 | <feOffset dy="4"/>
10 | <feGaussianBlur stdDeviation="2"/>
11 | <feColorMatrix type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.25 0"/>
12 | <feBlend mode="normal" in2="BackgroundImageFix" result="effect1_dropShadow"/>
13 | <feBlend mode="normal" in="SourceGraphic" in2="effect1_dropShadow" result="shape"/>
14 | </filter>
15 | </defs>
16 | </svg>
17 | 


--------------------------------------------------------------------------------
/GUI/public/img/download_icon.svg:
--------------------------------------------------------------------------------
 1 | <svg width="34" height="30" viewBox="0 0 34 30" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | <g filter="url(#filter0_d)">
 3 | <rect x="4" y="20" width="26" height="2" rx="1" fill="#DFFFE6"/>
 4 | <path d="M16.05 18.3071C16.4405 18.6976 17.0737 18.6976 17.4642 18.3071L23.8281 11.9431C24.2187 11.5526 24.2187 10.9195 23.8281 10.5289C23.4376 10.1384 22.8045 10.1384 22.4139 10.5289L16.7571 16.1858L11.1002 10.5289C10.7097 10.1384 10.0765 10.1384 9.68601 10.5289C9.29549 10.9195 9.29549 11.5526 9.68601 11.9431L16.05 18.3071ZM15.7571 3.17738e-08L15.7571 17.6L17.7571 17.6L17.7571 -3.17738e-08L15.7571 3.17738e-08Z" fill="#DFFFE6"/>
 5 | </g>
 6 | <defs>
 7 | <filter id="filter0_d" x="0" y="0" width="34" height="30" filterUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
 8 | <feFlood flood-opacity="0" result="BackgroundImageFix"/>
 9 | <feColorMatrix in="SourceAlpha" type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 127 0"/>
10 | <feOffset dy="4"/>
11 | <feGaussianBlur stdDeviation="2"/>
12 | <feColorMatrix type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.25 0"/>
13 | <feBlend mode="normal" in2="BackgroundImageFix" result="effect1_dropShadow"/>
14 | <feBlend mode="normal" in="SourceGraphic" in2="effect1_dropShadow" result="shape"/>
15 | </filter>
16 | </defs>
17 | </svg>
18 | 


--------------------------------------------------------------------------------
/GUI/public/img/left_switch.svg:
--------------------------------------------------------------------------------
 1 | <svg width="19" height="47" viewBox="0 0 19 47" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | <g filter="url(#filter0_d)">
 3 | <path d="M4 19.5L14.5 0.880454L14.5 38.1195L4 19.5Z" fill="#DFFFE6"/>
 4 | </g>
 5 | <defs>
 6 | <filter id="filter0_d" x="0" y="0.880371" width="18.5" height="45.2391" filterUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
 7 | <feFlood flood-opacity="0" result="BackgroundImageFix"/>
 8 | <feColorMatrix in="SourceAlpha" type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 127 0"/>
 9 | <feOffset dy="4"/>
10 | <feGaussianBlur stdDeviation="2"/>
11 | <feColorMatrix type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.2 0"/>
12 | <feBlend mode="normal" in2="BackgroundImageFix" result="effect1_dropShadow"/>
13 | <feBlend mode="normal" in="SourceGraphic" in2="effect1_dropShadow" result="shape"/>
14 | </filter>
15 | </defs>
16 | </svg>
17 | 


--------------------------------------------------------------------------------
/GUI/public/img/loading_svg.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?><svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" width="64px" height="64px" viewBox="0 0 128 128" xml:space="preserve"><g><path fill="#bfdaf0" fill-opacity="1" d="M64,128a64,64,0,1,1,64-64A64,64,0,0,1,64,128ZM64,2.75A61.25,61.25,0,1,0,125.25,64,61.25,61.25,0,0,0,64,2.75Z"/><path fill="#bfdaf0" fill-opacity="1" d="M64 128a64 64 0 1 1 64-64 64 64 0 0 1-64 64zM64 2.75A61.2 61.2 0 0 0 3.34 72.4c1.28-3.52 3.9-6.32 7.5-6.86 6.55-1 11.9 2.63 13.6 8.08 3.52 11.27.5 23 15 35.25 19.47 16.46 40.34 13.54 52.84 9.46A61.25 61.25 0 0 0 64 2.75z"/><animateTransform attributeName="transform" type="rotate" from="0 64 64" to="360 64 64" dur="1400ms" repeatCount="indefinite"></animateTransform></g></svg>
2 | 


--------------------------------------------------------------------------------
/GUI/public/img/loading_svg_ripple.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" style="margin: auto; background: none; display: block; shape-rendering: auto;" width="211px" height="211px" viewBox="0 0 100 100" preserveAspectRatio="xMidYMid">
 3 | <circle cx="50" cy="50" r="0" fill="none" stroke="#3a473a" stroke-width="9">
 4 |   <animate attributeName="r" repeatCount="indefinite" dur="1s" values="0;38" keyTimes="0;1" keySplines="0 0.2 0.8 1" calcMode="spline" begin="-0.5s"></animate>
 5 |   <animate attributeName="opacity" repeatCount="indefinite" dur="1s" values="1;0" keyTimes="0;1" keySplines="0.2 0 0.8 1" calcMode="spline" begin="-0.5s"></animate>
 6 | </circle>
 7 | <circle cx="50" cy="50" r="0" fill="none" stroke="#5a6e59" stroke-width="9">
 8 |   <animate attributeName="r" repeatCount="indefinite" dur="1s" values="0;38" keyTimes="0;1" keySplines="0 0.2 0.8 1" calcMode="spline"></animate>
 9 |   <animate attributeName="opacity" repeatCount="indefinite" dur="1s" values="1;0" keyTimes="0;1" keySplines="0.2 0 0.8 1" calcMode="spline"></animate>
10 | </circle>
11 | <!-- [ldio] generated by https://loading.io/ --></svg>


--------------------------------------------------------------------------------
/GUI/public/img/reload_icon.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" ?><!DOCTYPE svg  PUBLIC '-//W3C//DTD SVG 1.1//EN'  'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd'><svg enable-background="new 0 0 50 50" height="50px" id="Layer_1" version="1.1" viewBox="0 0 50 50" width="50px" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><rect fill="none" height="50" width="50"/><polyline fill="none" points="40,7 40,16   31,15.999 " stroke="#DFFFE6" stroke-linecap="round" stroke-miterlimit="10" stroke-width="4"/><path d="M41.999,25  c0,9.39-7.61,17-17,17s-17-7.61-17-17s7.61-17,17-17c5.011,0,9.516,2.167,12.627,5.616c0.618,0.686,1.182,1.423,1.683,2.203" fill="none" stroke="#DFFFE6" stroke-linecap="round" stroke-miterlimit="10" stroke-width="4"/></svg>


--------------------------------------------------------------------------------
/GUI/public/img/right_switch.svg:
--------------------------------------------------------------------------------
 1 | <svg width="19" height="47" viewBox="0 0 19 47" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | <g filter="url(#filter0_d)">
 3 | <path d="M15 19.5L4.5 38.1195L4.5 0.880453L15 19.5Z" fill="#DFFFE6"/>
 4 | </g>
 5 | <defs>
 6 | <filter id="filter0_d" x="0.5" y="0.880371" width="18.5" height="45.2391" filterUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
 7 | <feFlood flood-opacity="0" result="BackgroundImageFix"/>
 8 | <feColorMatrix in="SourceAlpha" type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 127 0"/>
 9 | <feOffset dy="4"/>
10 | <feGaussianBlur stdDeviation="2"/>
11 | <feColorMatrix type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.2 0"/>
12 | <feBlend mode="normal" in2="BackgroundImageFix" result="effect1_dropShadow"/>
13 | <feBlend mode="normal" in="SourceGraphic" in2="effect1_dropShadow" result="shape"/>
14 | </filter>
15 | </defs>
16 | </svg>
17 | 


--------------------------------------------------------------------------------
/GUI/public/webp-img/00b42403057e60520cb497d92556b982.png:
--------------------------------------------------------------------------------
1 | export default "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAACGFjVEwAAAABAAAAALQt6aAAAAAaZmNUTAAAAAAAAAAgAAAAIAAAAAAAAAAAAKcD6AEAxIjTvQAAAK9JREFUWIXtlUEKwzAMBEelD/PT9LTNy9RLFUzTQNK0MgUP+CKINawjGyZfJCLC3UNSHP3mfrXhsiwASMLMAHB3JEVrza7sf0jA3QN4u86m8ZFAImmVkbTWUuRnEntCfa1UYk+MZzLDJPI4+nRGsEnhVtnd3Te1EoFuIpBETklF71WAlzui/F/oL6whI5lTwKhJ6I+hF7j0GJ3BzCyjt3y1KgUAWmuV7SaTyWQy+RMeiwquZStdcE4AAAAASUVORK5CYII="


--------------------------------------------------------------------------------
/GUI/public/webp-img/0638bec8443dd6e3385084884ed644a2.png:
--------------------------------------------------------------------------------
1 | export default "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAUIAAAA0CAMAAADIf8HGAAAC1lBMVEUAAAD///////////////8AAAD///8RERH////8/Pz////+/v7///8tLS0JCQn///8fHx/////z8/NaWlr+/v4AAAD6+voeHh4yMjL+/v7+/v7////+/v4kJCRhYWH+/v7o6Oj7+/sAAADT09Ps7OwAKJaiogCiAAAiIiK2tra3t7f29vYgICCnp6cFBQXBwcEjIyP8/Pz8/Pz8/PwAAAD39/cvLy8jIyPt7e0AAAAAAAAJCQn5+fn5+fkjIyMAAAAAAACxAACxsQD7+/sFMKAiIiLFxcX+/v5ycnIjIyP5+fmMjIz9/f0zMzPQ0NABAQEKCgoAAAD///91dXWRkZH+/v40NDQAAAA/Pz9VVVUCAgImJiYgICAAAAC8vADp6ekFBQUAAAAAAAAHBwcqKioaGho6OjoAAAAANbi/AAAAAAAAAAAVFRVoaGgAAABRUVEAAAAAAAAAAAAwMDAAAAAeHh4KCgoAAAAAAAAAAAAAAAAMDAwAAAAAAAAAAAAEBAQAAAAfHx8AAAAAAAAODg4AAABeXl4AAAAAAAAAAAADAwPGxgAAAABycnIAAAAAAAAhISEFBQUAAAAAAAAAOMEAAADIAAACAgLJyQAAAAAAAAAKCgoAAAAAAAAAAAAAPcsAAAAAAADQ0AAAAAAAAABwcHAAAAAAAAAAAAAnJyfOAAA/Pz8REREAAAAAP88AAAAlJSXLygEAAABjY2MAAAAJCQPt7QAZGRSYmAIAAAAAAP8iIiIpKSksLCwwMDAxMTEzMzM6Ojo/Pz9JSQhDQ0PvAAAATO0ATO5KSkpMTEz/AAAAU/9UVFT/Dg4QXv/tHCRcXFwVYf9kZGT/KSm9STJsbGy+TjdxcXHAUz51dXUisUzEX0uEhITJaljPe2ulpQTVjX6esOzOyAX/mZn/tLTry8T/x8fx3Nf//wD/2Nj25+Tr8v/v9P/68/H8+Pj3+v//+/v///+Dl5zLAAAAuXRSTlMAAQIDBAQFBQYHCgsMDg4PEBERERMWFxkZHB0gISIiIyMkJCkrLCwsLC0uLi8yMjIzNDU2PT9AQEBBQktMUFBVWVtbW11gYmNlZmZpa2xtcXN4eHl5enp8fH6AgoKDhIWHiIqKjI2NjpCQkJGSlJWVmJmamp2dnp+goaOlpqeoqaqrra6vsLGztLW4ubq7vL6/wcHDxMTFxsjKzM3Nzs/P0NHR0tTV1tja2t3d3d7m6Orr7vH5+vv7/i6BvvAAAAsVSURBVHja7duNW1PXHQfwmxcjVHzBRhGIpjNV6xpFbYvWGauu0rSplCiLUMCpE/AFisoEFUUiVgRtfME4gksYRBFR7KpVyrIGZme7xrpbZ/fSdp2rUgq6bvIf7Jz7/pbce0NaZOv3ecy9CUnkfnJ+55x7ckG++uIbRGa6fiMzSOQzZf1xkPVTIvy2/SB/ByG3kl70+SeyDdGuru+IUKHSgKgUvB883/D+dZD3G56PDF39RZAtYRLek2+Idn0mD5HNopAuGD0+UadLHB/NE7xwnciFyBjObW9vPz8mTMIwDFFgKAuR8doRhpQUwwghLtYGjzr+F7B1HI2P1uBRElXcAPBu3r8PDRt4tRznC8Mw4+q55HALOQxDjFAOIqkUM1Yzfc/bb++Zrhkbw26LihHxahxtBOMHmsS2MzCz9TosCVrsSch6rIq//BLefrCeK/juT+k7E30wE0Mezsk98HZ/Jrxl8sEt9SSfL6KGKGEoGZEsS3OB2XyoufmQGexEMw0VMbajBsCjNhy1MXA1ukYY75tE2gq0WDs8Du1u3ryJlfJxnmAMwjhyo9HIPvzOCewXGM+3j6LuhEsIDT+/FwahVET8ZTHmfWdOnnTX17tPnjxz1BzDEmxrqTGo1YaaljaGoUbnYudUItYMG7BG+J9+jLAhhCDiM44cPtLIOnw/xzB74dmFcgl9R+bCzYuMB+99IccQZRh+1iWZcGyWy3uq2eV0uppPeV3rxjI6SFuLy+WtMRhqvC5Xi43qKqP0y3/CynJ9FEV4/X7/H3mEUJDZF/qMw5HhbMIJfj/L8OzIdcUihD4iDFAMkUmIfCPHkEUopSHiL1NZ7E7H4Vqns/awo9ZuUVPvp4y3O2tra+traurBxmmPJ0YNRWxucxsrzbmxCrKQScLjXEFGX/iEQCtEJnT6GfdGFiPJ50aGnhcKEQJENqEsQ5RtKI6IN8IfWuyHHdX2oiJ7teOw3TKdboYqU7EDxOmEt8UmFTkgJ9RWc1KbAOXXf0ATMocTQjCGHpsF+sIJnaxKzvwQZJno1JpTyAKtUJYhl1AUEWtShpLq6gOHtqWbTOnbDh2ori4x0DPEKFMeiZRniqK7wsq9WFJfgEkFe5U6DTWp+Vc/HJIvTAkpyB+ROXW8/1HAWCiXkN8XyjNE+YahEbGWluR1VO49UPSMUvlM0YG9lQ5vkkqJMAwrsTAEAeEuPE8+AuaEjzwJdzFCbGp9ExuSGVNrUpDqCwHeE/zfni04aT+8uTRJHmGwSDYUIgyJiHkkvV5RXl6VZ0AQQ15VeXnF60kamlBtsO/GYjeoGYSleDA3/B5OCAzxEzwhQaov9PnixA7F/OmnZgQ5jd2GHJElRqohKmwYfHDGCvnxwnWlO3eVZEyfnlGya2fpusLH6UJWG8oqca3KMtpQo9uMhyCkdvFlhoYG1jIDrGDyX9gZKKFUQzSoYVeI4WT0vJRtpVt37iku3rNza+m2lHmj6eHEUFJOaG0uLzGoKMJNeAhCalcwy2Pof4NHKNEwKGEwRPxlWkvB5tc273Y4doNNgUXLmNSU7YA8GRnwdkcZOanR6NbiIQip3W8vAyeUZoiGMBREJKbWtvxNG/Pz167Nz9+4aaONObVOXw10MkymDLBZnT6CaoU5eAhCavfhjhTDkIRCiMQJnsWWm521es2a1VnZuTYL8wRPn/JqToYpKsqUkfNqil5BzQvTVmIhCMFeWoIaGRKGX0skDGLIQyTqVZeSnppqy8mxpaamp+iUCMswE85mokyZtCA4O7GkWWH02KRGb7WmWWKJn05ddbCu7uCqqQ+r4d++HhghF5Hs8xLnJDxmXrnS/FjCnEQle7FLtwCbD0Yt0DFWcFSzwYz6Zat1Blzs0s+wWl+eTQw1S5qu9IJcaVryUBr++x8ihqgEQ9YMhyKJVo6el5o6b7QyWiVlyRVRgsaXmGatgid+jiprWiIx717yVm/vnfbLt3t73xqahpIImQ2RiRKr18cq5Xx9QlYzo4ynNoEW+E5Hxztg0xROLScPtiEqzZBGZJEolQpZvwxWzXjIMl4Fq/g2TnhlldgbzPLN4gr6WYa8k5iJoivdAzWUSkghRvojPtiLE96G2zpRQe75bbK/szOZte7CMWSvbAWLyHc0IQ1R6YY4YqQJ6yDdnY6OO3DbxD5y7rHPQhYyWiGUS+5kC8LXxXEIJSwrxIk9JZQhKscQIkaasAkbjTs6etmE+OJWHKcNLkRmsSo4udPPrmOE6877KHwCH06ceEMNYSiPECB+K4Xc+957vZxCjhMSZLZBf6c/2c8V5LVCSYRSaj24ISrXcIBiwxZZra+MYjyADSe9d7A6Zg0ncSEFsRr2+zu54zG/jn2IKFCclO4yqOF3TThzC1h9eJbxgNRJDVdQoB8UGpEBznBxoDgpq7DBDFG5hkLTPYlzw8lrXqOydib5KJxa374sOrUGDrz5TKf4nFBwWJI9Ioc0jAChKna8pPWCcc1eDx33ZMpQ0gkeX1BSlmKCSyM2x/7LV6KEooa8d1Bqc/fFSzF8tnQrM8/Rtbyqrqmp7iFdZmAb/lPAcMCESm12W0uZJMPJaxh5GhmSETBE5RqScgpKEFy90Fw2nu4Ps35N5UXW/zWfWcjPIf8rhmESqrX4+oJSm3UKXLVQWxhLnywXv0sli03YeGi+y7P2FY9n0Y4hS8g3ROUa4oLjC2zwwiwg6IFrVytmMxa8iruo2DiEP0dc7kVPN7rH5biHLCHPMCxCIOhy2WKVQNAFr1pgCSJ5v6OSwSOsFyCctuEEyIZp0hbMFArFw2WIyjXEx+DaykrXMq02ywmvWmALInm/pyKJcHHrtQDItdbFEn59sESp17IfqrAOriEq1xAjLATXKpQfW5Z1DF60wBFEij6kkskn9ABCz7gcui9c3BogImaogF/MvHnx4gzyTOcyxMtubzcOqmE4hIj6qRXbt2/f4XCA2+1cQaToIyqZ4q1w2olAd8+tQOBWT3egdVrIJe9EBSB0ud1JxCNll8+DU5RzV6/SF8QNhiEq1xAhDMHFClvgFQsr5nC/Oin6mIqEVrjhWuBuf9+tG3393YFrG4KvnygNZTMg4TGnkyAcdX5f+4+Q05dACgezPwyPEBri13dYeIJI4Z+oSGiFJ0AFd/f39PT33AgETrCeP4ZBGG0+6kpSKpQzwPhPEGZnL/TZuadzRsQYqdM5qYaoXEOEMoTXdAgIIoV/plIojfBGdz8mGGhlrlGN8Y0hCRVaW72jKikpyZxbVVVFEJ6diJz97UTuosJS3nIqZ/ULm+7PPeKLgGH3gAihYU6OkCBifoOKWbyQIWGATwjb4BjERxZxgWtvRUWj1+s5XFFR8APsQRu4qtrGmXkasUUFIxJi5RYSAkCfLwLt8JPuEITBDRGGoaBg8ARthXcf9PWArpBRyOAAgSAZvb16B50V+B9QwT/PmXD1HH9hZilHkG0ICI9I+0pKqmH4hMBwtixBQLhrmNe9aH6je3K+mzmcPOi7davvwV3GcMI+QNVLmeT1dpuz0udhU+tlf8CzLGQr5C8XRq4VkoaoXEPmIo3M04SZjZ5fnfLUn/F4XC2e+eSkpjXQ3Q0nNX09jEmNj+0YtSD9Z/n5L8Hvn016/HPD/8Ip86P9In0hb90xUn0hYTgwQtlZCSZ1eH6ZPyzk1JrbeBT61Kw1BiV9djfpEr699PGkwRmRScMQhMEMB/blExXGg9JO8Eb/eIWBbvWPni4kh394mf//feAyQ6voMoPqKfpSO+SvIHB7Gu6YvyeUGsX3BEMg/wW9bXSroB+3mgAAAABJRU5ErkJggg=="


--------------------------------------------------------------------------------
/GUI/public/webp-img/1f0710a4a9c764c4801a6b0bbd1f6744.png:
--------------------------------------------------------------------------------
1 | export default "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAACGFjVEwAAAABAAAAALQt6aAAAAAaZmNUTAAAAAAAAAAgAAAAIAAAAAAAAAAAAKcD6AEAxIjTvQAAAKBJREFUWIXtlDEOAyEMBE2Uf2N+trxs0wCCQLqwSeGRTuLuCg9ry2ZBEARBEPwbAAiAqnrPg4Cq9g5JmhnbUZLC49OPWqui/ipQShlneSvm+E3YhmUIAYyb55xv114FUkqpSRDAeJfTdoBsD2z0WVAuo42egmoXHHH3TYITUgkAJNnbQncf364LAWAXOT3urknkXaSn0JO4LjBL/HQ4v80LekuprjId4lUAAAAASUVORK5CYII="


--------------------------------------------------------------------------------
/GUI/public/webp-img/2a9beabef112cd5d9b57edafe04ecd82.png:
--------------------------------------------------------------------------------
1 | export default "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAACGFjVEwAAAABAAAAALQt6aAAAAAaZmNUTAAAAAAAAAAgAAAAIAAAAAAAAAAAAKcD6AEAxIjTvQAAALVJREFUWIXtlVEOwyAMQ51tB2tvlt7MnMz7AQZbpX2sSyctT0IICckB7AAkSZIkSfJvqNLWl2jxbdsiJV8LACCS59xAKQUAQDJS9oG7C4BQrQAAtyhxSTKzvm5eCH0Cd5/ms5hMGKtcUzD2gWtkAWbmy7JgXVd7v/tg6rVPpz8cSSI5iUhSi9+e+EcxlKRSSm8sLWYke9ttcZMEG3N4BGNj2Rvu/l3Ht5+NpMZinp8hjD0P/Dx3JrWfhQN9eBkAAAAASUVORK5CYII="


--------------------------------------------------------------------------------
/GUI/public/webp-img/c2e5bc8f9058ad350eed2e2559c63174.png:
--------------------------------------------------------------------------------
1 | export default "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAACGFjVEwAAAABAAAAALQt6aAAAAAaZmNUTAAAAAAAAAAgAAAAIAAAAAAAAAAAAKcD6AEAxIjTvQAAAJ1JREFUWIXtlEEOAyEMA51qH8bT8jT/zHvYRs2iHroHglRlJA4ggR0TAJoHkJS7S5K2iQMQALm7SIrkWkOz8DyWJhLCUSXJW/WZZeL58KWCmah0S7O9RSPueqL6Kr3Xt0V3r9L/GIguJwngSqK0B+Lu8yhvwvmnKxUH7ilseQXZQFX8R56YmUX0ZmblBgBgjPHTxkioymjTNE3T/C8nhFi5FLM7xWoAAAAASUVORK5CYII="


--------------------------------------------------------------------------------
/GUI/public/webp-img/d3698da8e2bda9d79e1bb514e2d600fa.png:
--------------------------------------------------------------------------------
1 | export default "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAACGFjVEwAAAABAAAAALQt6aAAAAAaZmNUTAAAAAAAAAAgAAAAIAAAAAAAAAAAAKcD6AEAxIjTvQAAAHVJREFUWIXtlcEKgDAMQzvx/395ngSRRVOWToS80w6lCW3DIswP6JXNt69NtKTwW32anRQuY7SCq3gD7x6FJu/Nl01jJK6qhaAbYJAc5EwMJRNgDcjjlzWwFGa0shg+TQAJSGOJdsuISO4i+xfIhI0xxhhzcgA3hhkG91FoQQAAAABJRU5ErkJggg=="


--------------------------------------------------------------------------------
/GUI/src/__init__.py:
--------------------------------------------------------------------------------
1 | from src.backend.auto_ssh import SSH
2 | from src.backend.backend import Backend
3 | 


--------------------------------------------------------------------------------
/GUI/src/backend/DataHistory.py:
--------------------------------------------------------------------------------
  1 | """
  2 | James Gleave
  3 | v1.2.0
  4 | 
  5 | A class used to store training history
  6 | 
  7 | # Usage...
  8 | # hist = DataHistory(data)
  9 | # hist.averages -> {'iteration_1': {'loss': [0.7453892707824707, ...], 'sparse_categorical_accuracy': [...], ...}, ...}
 10 | # hist.current_phase -> integer from 0 to 5
 11 | # hist.plots -> {'iteration_1': [{'loss': [...], 'sparse_categorical_accuracy': [...], ...}, {...}, ...]}
 12 | # hist.molecules_remaining -> {'iteration_1': {'true': X, 'estimate': Y, 'error': Z}, 'iteration_2': ...}
 13 | # hist.current_iteration -> returns the current iteration
 14 | """
 15 | 
 16 | 
 17 | class DataHistory:
 18 |     def __init__(self, data):
 19 |         self.history = data
 20 | 
 21 |         # Load the iterations and sort the list
 22 |         self.iterations = list(data.keys())
 23 |         try:
 24 |             self.iterations.sort(key=lambda x: int(x.split("_")[1]))
 25 |         except KeyError:
 26 |             self.iterations.sort()
 27 | 
 28 |         self.plots = self.__generate_plot_data()
 29 |         self.averages = self.__calc_averages()
 30 | 
 31 |         self.current_iteration = self.__get_current_iteration()  # TODO: handle error when no project is loaded...
 32 |         self.current_phase = data[self.current_iteration]['itr']['current_phase']
 33 |         self.current_phase_eta = data[self.current_iteration]['itr']['phase_eta']
 34 |         self.molecules_remaining = dict.fromkeys(self.iterations)
 35 |         self.is_idle = data[self.current_iteration]['itr']['is_idle']
 36 | 
 37 |         # Store the crash report (hopefully no crashes)
 38 |         self.crash_report = self.history[self.current_iteration]['itr']['crash_report']
 39 | 
 40 |         # The percent complete for the iteration
 41 |         self.itr_percent = self.history[self.current_iteration]['itr']['itr_percent']
 42 |         self.full_percent = self.history[self.current_iteration]['itr']['full_percent']
 43 | 
 44 |         # Are we running the final phase?
 45 |         self.final_phase = self.history[self.current_iteration]['itr']['final_phase']
 46 | 
 47 |         # The number of jobs pending and running
 48 |         self.pending = self.history[self.current_iteration]['itr']['pending_info']
 49 | 
 50 |         try:
 51 |             for key in self.molecules_remaining:
 52 |                 self.molecules_remaining[key] = data[key]['itr']['molecules_remaining']
 53 |         except TypeError:
 54 |             pass
 55 | 
 56 |     def get_model(self, iteration, model_number, averages=False):
 57 |         if averages:
 58 |             return self.averages[iteration]
 59 |         else:
 60 |             return self.history[self.iterations[iteration]]['models'][model_number]
 61 | 
 62 |     def __get_current_iteration(self):
 63 |         for key in self.history:
 64 |             try:
 65 |                 if self.history[key]['itr']['in_progress']:
 66 |                     return key
 67 |             except TypeError:
 68 |                 pass
 69 |         return 'iteration_1'
 70 | 
 71 |     def __calc_averages(self):
 72 |         averages = {}
 73 |         try:
 74 |             keys = list(self.plots['iteration_1'][0].keys())
 75 |         except (IndexError, KeyError):
 76 |             return averages
 77 |         for itr in self.plots:
 78 |             averages[itr] = {}
 79 |             for metric in keys:
 80 |                 metric_list = []
 81 |                 for model in self.plots[itr]:
 82 |                     metric_list.append(model[metric])
 83 |                 averages[itr][metric] = self.__average_cols(metric_list)
 84 |         return averages
 85 | 
 86 |     def __generate_plot_data(self):
 87 |         # Loop through iterations...
 88 |         plots = {}
 89 |         for iteration in self.history:
 90 |             plots[iteration] = []  # Create a list to store models from each iteration
 91 |             try:
 92 |                 # Loop through models...
 93 |                 for model_number, model in enumerate(self.history[iteration]['models']):
 94 |                     # Create a dict of lists to store the plot values.
 95 |                     # If the model has not finished an epoch yet, then break
 96 |                     try:
 97 |                         model_data = {}
 98 |                         for key in model["epoch_1"]:
 99 |                             # Reformat the keys to make them easy to display dynamically
100 |                             model_data[self.reformat(key)] = []
101 |                     except KeyError:
102 |                         break
103 | 
104 |                     # Loop through epochs...
105 |                     for epoch in model:
106 |                         # Loop through each metric
107 |                         for metric in model[epoch]:
108 |                             model_data[self.reformat(metric)].append(model[epoch][metric])
109 |                     plots[iteration].append(model_data)
110 |             except TypeError:
111 |                 pass
112 |         return plots
113 | 
114 |     def reformat(self, key):
115 |         new_key = ""
116 |         if "_" in key:
117 |             tokens = [s.capitalize() for s in key.replace("val", "Validation").split("_")]
118 |             for token in tokens:
119 |                 new_key += token + " "
120 |             if new_key[-1] == " ":
121 |                 new_key = new_key[0:-1]
122 |         else:
123 |             new_key = key.capitalize()
124 | 
125 |         # Simplify words
126 |         if "Sparse Categorical " in new_key:
127 |             new_key = new_key.replace("Sparse Categorical ", "")
128 | 
129 |         if "acc" == key or "val_acc" == key:
130 |             new_key = new_key.replace("Acc", "Accuracy")
131 | 
132 |         return new_key
133 | 
134 |     def __repr__(self):
135 |         print("Iterations:", self.iterations)
136 |         print("Current Iteration:", self.current_iteration)
137 |         print("Current Phase:", self.current_phase)
138 |         print("Molecules Remaining:", self.molecules_remaining)
139 |         print("Iteration Percent:", self.itr_percent)
140 |         print("Full Percent:", self.full_percent)
141 | 
142 |         return ""
143 | 
144 |     @staticmethod
145 |     def __average_cols(arr):
146 |         if len(arr) == 0:
147 |             return []
148 | 
149 |         average = []
150 |         max_len = max([len(length) for length in arr])
151 |         for i in range(max_len):
152 |             col_average = []
153 |             for row in arr:
154 |                 if i < len(row):
155 |                     col_average.append(row[i])
156 |             average.append(sum(col_average)/len(col_average))
157 |         return average
158 | 
159 | 
160 | 
161 | 


--------------------------------------------------------------------------------
/GUI/src/backend/EmailBot.py:
--------------------------------------------------------------------------------
  1 | import smtplib
  2 | from email.mime.text import MIMEText
  3 | from email.mime.multipart import MIMEMultipart
  4 | 
  5 | 
  6 | class EmailNotificationSettings:
  7 |     PHASE_CHANGE_UPDATE = False
  8 |     ITERATION_CHANGE_UPDATE = True
  9 |     FINAL_PHASE_START_UPDATE = False
 10 |     FINAL_PHASE_FINISH_UPDATE = False
 11 |     PROJECT_START_UPDATE = False
 12 |     PROJECT_FINISH_UPDATE = False
 13 | 
 14 |     EMAIL_NOTIFICATIONS = False
 15 | 
 16 |     @staticmethod
 17 |     def ChangeSettings(phase_change,
 18 |                        itr_change,
 19 |                        final_phase_start,
 20 |                        final_phase_end,
 21 |                        project_start,
 22 |                        project_finish,
 23 |                        email_notifications):
 24 | 
 25 |         EmailNotificationSettings.PHASE_CHANGE_UPDATE = phase_change and email_notifications
 26 |         EmailNotificationSettings.ITERATION_CHANGE_UPDATE = itr_change and email_notifications
 27 |         EmailNotificationSettings.FINAL_PHASE_START_UPDATE = final_phase_start and email_notifications
 28 |         EmailNotificationSettings.FINAL_PHASE_FINISH_UPDATE = final_phase_end and email_notifications
 29 |         EmailNotificationSettings.PROJECT_START_UPDATE = project_start and email_notifications
 30 |         EmailNotificationSettings.PROJECT_FINISH_UPDATE = project_finish and email_notifications
 31 |         EmailNotificationSettings.EMAIL_NOTIFICATIONS = email_notifications and email_notifications
 32 | 
 33 |     @staticmethod
 34 |     def toString():
 35 |         return f"PHASE_CHANGE_UPDATE: {EmailNotificationSettings.PHASE_CHANGE_UPDATE}, " \
 36 |                f"ITERATION_CHANGE_UPDATE: {EmailNotificationSettings.ITERATION_CHANGE_UPDATE}, " \
 37 |                f"FINAL_PHASE_START_UPDATE: {EmailNotificationSettings.FINAL_PHASE_START_UPDATE}, " \
 38 |                f"FINAL_PHASE_FINISH_UPDATE: {EmailNotificationSettings.FINAL_PHASE_FINISH_UPDATE}, " \
 39 |                f"PROJECT_START_UPDATE: {EmailNotificationSettings.PROJECT_START_UPDATE}, " \
 40 |                f"PROJECT_FINISH_UPDATE: {EmailNotificationSettings.PROJECT_FINISH_UPDATE}, " \
 41 |                f"EMAIL_NOTIFICATIONS: {EmailNotificationSettings.EMAIL_NOTIFICATIONS}"
 42 | 
 43 | 
 44 | class EmailBot:
 45 |     def __init__(self, address, password):
 46 |         self.s = smtplib.SMTP(host='smtp.gmail.com', port=587)
 47 |         self.s.starttls()
 48 |         self.s.login(address, password)
 49 | 
 50 |         self.address = address
 51 |         self.password = password
 52 | 
 53 |     def send_message(self, recipient, subject, message):
 54 |         msg = MIMEMultipart()  # create a message
 55 | 
 56 |         # setup the parameters of the message
 57 |         msg['From'] = self.address
 58 |         msg['To'] = recipient
 59 |         msg['Subject'] = subject
 60 | 
 61 |         # add in the message body
 62 |         msg.attach(MIMEText(message, 'plain'))
 63 | 
 64 |         # send the message via the server set up earlier.
 65 |         self.s.send_message(msg)
 66 | 
 67 |     def send_iteration_change_update(self, user, to, project_name, new_iteration):
 68 |         prev_itr = new_iteration - 1
 69 |         message = f"Hello {user},\n\nyour Deep Docking project {project_name} has completed iteration {prev_itr} " \
 70 |                   f"and is now on iteration {new_iteration}. For further details, visit 'web-address'. \n" \
 71 |                   f"If you would like to stop receiving updates, log into 'web-address' and turn off email notifications." \
 72 |                   f"\n\nCheers,\nDeepDockingBot"
 73 | 
 74 |         self.send_message(to, "Iteration Complete", message)
 75 | 
 76 |     def send_project_started_update(self, user, to, project_name):
 77 |         message = f"Hello {user},\n\nyour Deep Docking project {project_name} has begun. " \
 78 |                   f"For further details, visit 'web-address'. \n" \
 79 |                   f"If you would like to stop receiving updates, " \
 80 |                   f"log into 'web-address' and turn off email notifications." \
 81 |                   f"\n\nCheers,\nDeepDockingBot"
 82 | 
 83 |         self.send_message(to, "Project Started", message)
 84 | 
 85 |     def send_queue_position_update(self, user, to, project_name, queue_position):
 86 |         pass
 87 | 
 88 |     def send_project_finished_update(self, user, to, project_name):
 89 |         pass
 90 | 
 91 |     @staticmethod
 92 |     def get_user_pw():
 93 |         return open("/Users/martingleave/Documents/DeepDocking/DeepDockingGUI/GUI/src/backend/email.txt").read().split(" ")
 94 | 
 95 |     @staticmethod
 96 |     def get_user_email():
 97 |         import json
 98 | 
 99 |         # load up the data we have from installation
100 |         with open('src/backend/db.json') as user_db:
101 |             db = user_db.read()
102 |             database = json.loads(db)
103 |         return database["email"]
104 | 


--------------------------------------------------------------------------------
/GUI/src/backend/EventHandler.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The event handler for the backend.
 3 | This class handles all callbacks from withing the backend loop.
 4 | """
 5 | 
 6 | from .EmailBot import *
 7 | 
 8 | 
 9 | class EventHandler:
10 |     @staticmethod
11 |     def OnBackendStart(backend):
12 |         print("Event Handled Backend Start")
13 | 
14 |     @staticmethod
15 |     def OnProjectRunPhase(backend):
16 |         print("Event Handled Run Phase")
17 |         if EmailNotificationSettings.PROJECT_START_UPDATE:
18 |             address, pw = EmailBot.get_user_pw()
19 |             bot = EmailBot(address=address, password=pw)
20 |             user_email = EmailBot.get_user_email()
21 | 
22 |             # Return if we have no email entered
23 |             if user_email == "NA":
24 |                 return
25 | 
26 |     @staticmethod
27 |     def OnPhaseChange(backend):
28 |         print("Event Handled Phase Change")
29 |         if EmailNotificationSettings.PHASE_CHANGE_UPDATE:
30 |             address, pw = EmailBot.get_user_pw()
31 |             bot = EmailBot(address=address, password=pw)
32 |             user_email = EmailBot.get_user_email()
33 | 
34 |             # Return if we have no email entered
35 |             if user_email == "NA":
36 |                 return
37 | 
38 |     @staticmethod
39 |     def OnIterationChange(backend):
40 |         print("Event Handled Iteration Change")
41 |         try:
42 |             if EmailNotificationSettings.ITERATION_CHANGE_UPDATE:
43 |                 address, pw = EmailBot.get_user_pw()
44 |                 bot = EmailBot(address=address, password=pw)
45 |                 user_email = EmailBot.get_user_email()
46 | 
47 |                 # Return if we have no email entered
48 |                 if user_email == "NA" and backend.loaded_project_information['specifications']['iteration'] > 1:
49 |                     return
50 | 
51 |                 bot.send_iteration_change_update(backend.user_data["username"],
52 |                                                 user_email,
53 |                                                 backend.loaded_project_name,
54 |                                                 backend.loaded_project_information['specifications']['iteration'])
55 |         except FileNotFoundError:
56 |             print("Email notifications not implemented yet...")
57 | 
58 |     @staticmethod
59 |     def OnFinalPhaseStart(backend):
60 |         print("Event Handled Final Phase Start")
61 |         if EmailNotificationSettings.FINAL_PHASE_START_UPDATE:
62 |             pass
63 | 
64 |     @staticmethod
65 |     def OnFinalPhaseEnd(backend):
66 |         print("Event Handled Final Phase End")
67 |         if EmailNotificationSettings.FINAL_PHASE_FINISH_UPDATE:
68 |             pass
69 | 
70 |     @staticmethod
71 |     def OnProjectFinished(backend):
72 |         print("Event Handled Project Finished")
73 |         if EmailNotificationSettings.PROJECT_FINISH_UPDATE:
74 |             pass
75 | 
76 |     @staticmethod
77 |     def OnDataReadError(backend):
78 |         print("Event Handled Data Read Error")
79 | 
80 |     @staticmethod
81 |     def OnErrorDetected(backend):
82 |         print("Event Handled Error Detected")
83 | 
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/GUI/src/backend/__init__.py:
--------------------------------------------------------------------------------
1 | from .auto_ssh import SSH
2 | from .cluster_commands import *
3 | from .DataHistory import DataHistory
4 | from .backend import Backend
5 | 


--------------------------------------------------------------------------------
/GUI/src/backend/auto_ssh.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import paramiko
 3 | import json
 4 | 
 5 | 
 6 | class SSH:
 7 | 
 8 |     """ This class will automatically ssh into the host cluster. """
 9 | 
10 |     def __init__(self, host=None):
11 |         if host is None:
12 |             try:
13 |                 json_str = open('src/backend/db.json').read() # TODO: Sibling files not recognizing each other when called from another file path.
14 |                 db_dict = json.loads(json_str)
15 |                 host = db_dict['ip']
16 |             except FileNotFoundError as e:
17 |                 print(e.__traceback__, "'db.json' not found! Please run the installation first before running GUI.")
18 |                 raise e
19 |         
20 |         # The information that will allow for ssh
21 |         self.host = host
22 |         self.user = ""
23 |         self.pwrd = ""
24 |         self.ssh = None
25 | 
26 |     def command(self, command):
27 |         # Check if there is a connection
28 |         assert self.ssh is not None, "Connect before using a command"
29 | 
30 |         # Send the command
31 |         stdin, stdout, stderr = self.ssh.exec_command(command)
32 | 
33 |         return stdout
34 | 
35 |     def connect(self, username, password):
36 |         # Set the credentials
37 |         self.user = username
38 |         self.pwrd = password
39 | 
40 |         # Connect to ssh and set out ssh object
41 |         ssh = paramiko.SSHClient()
42 |         ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
43 |         ssh.connect(self.host, username=self.user, password=self.pwrd)
44 |         self.ssh = ssh
45 | 
46 |     def download(self, remote_path, local_path):
47 |         ftp_client = self.ssh.open_sftp()
48 |         ftp_client.get(remote_path, local_path)
49 |         ftp_client.close()
50 | 
51 |     def read(self, remote_path):
52 |         return self.ssh.open_sftp().file(remote_path)
53 | 
54 |     def get_image(self, remote_path, transparent=False):
55 |         im = Image.open(self.read(remote_path))
56 |         if transparent:
57 |             im = im.convert("RGBA")
58 |             datas = im.getdata()
59 |             new_data = []
60 |             for item in datas:
61 |                 if item[0] == 255 and item[1] == 255 and item[2] == 255:
62 |                     new_data.append((255, 255, 255, 0))
63 |                 else:
64 |                     new_data.append(item)
65 | 
66 |             im.putdata(new_data)
67 |         return im
68 | 
69 |     def __repr__(self):
70 |         message = self.user + "\n"
71 |         message += self.host + "\n"


--------------------------------------------------------------------------------
/GUI/src/backend/backend_exceptions.py:
--------------------------------------------------------------------------------
 1 | class NullProjectException(Exception):
 2 |     def __init__(self):
 3 |         pass
 4 | 
 5 |     def __str__(self):
 6 |         message = "No project is loaded into the backend. Load a project before starting the backend."
 7 |         return message
 8 | 
 9 | 
10 | class NullDBError(Exception):
11 |     def __init__(self):
12 |         pass
13 | 
14 |     def __str__(self):
15 |         message = "Cannot locate stored user data. Please reinstall DeepDocking."
16 |         return message
17 | 


--------------------------------------------------------------------------------
/GUI/src/backend/backend_sanity_check.py:
--------------------------------------------------------------------------------
  1 | from auto_ssh import SSH
  2 | from backend import Backend
  3 | import time
  4 | import json
  5 | 
  6 | 
  7 | def check_backend():
  8 |     json_str = open('src/backend/db.json').read()  # TODO: Sibling files not recognizing each other when called from another file path.
  9 |     db_dict = json.loads(json_str)
 10 |     ip = db_dict['ip']
 11 | 
 12 |     user = input("cluster username: ")
 13 |     password = input("cluster password: ")
 14 |     ssh_connection = SSH(host=ip)
 15 |     ssh_connection.connect(username=user, password=password)
 16 |     backend = Backend(ssh=ssh_connection)
 17 |     return backend
 18 | 
 19 | 
 20 | def check_load_project():
 21 |     b = check_backend()
 22 |     b.load_project(input("Project Name: "))
 23 |     return b
 24 | 
 25 | 
 26 | def check_run_phase_5():
 27 |     b = check_backend()
 28 |     b.load_project(path_to_project=input("Path to project: "))
 29 |     b.run_phase(5, debug=True)
 30 | 
 31 | 
 32 | def check_run_phase_4():
 33 |     b = check_backend()
 34 |     b.load_project(path_to_project=input("Path to project: "))
 35 |     b.run_phase(4, debug=True)
 36 | 
 37 | 
 38 | def check_venv():
 39 |     b = check_backend()
 40 |     b.send_command("python venv_sanity_check.py", debug=True)
 41 |     b.send_command("conda list >> test_check.txt", debug=True)
 42 | 
 43 | 
 44 | def check_backend_functionality():
 45 |     b = check_backend()
 46 |     p = input("Project Name: ")
 47 |     b.load_project(project_name=p)
 48 |     b.start()
 49 | 
 50 |     while b.status() == "fetching":
 51 |         pass
 52 | 
 53 |     hist = b.pull()
 54 |     print(hist.current_phase_eta)
 55 | 
 56 | 
 57 | def check_run_phase():
 58 |     b = check_backend()
 59 |     b.load_project(path_to_project=input("Path to project: "))
 60 |     b.start()
 61 |     while b.status() == "fetching":
 62 |         pass
 63 | 
 64 |     while True:
 65 |         phase = int(input("Which phase to run? "))
 66 |         if input("Debug? y or n ") == "n":
 67 |             b.run_phase(phase, False)
 68 | 
 69 | 
 70 | def check_model_image():
 71 |     b = check_load_project()
 72 |     b.get_model_image(1, 3)
 73 | 
 74 | 
 75 | def check_final_phase():
 76 |     b = check_backend()
 77 |     project_name = input("Project Name: ")
 78 |     b.load_project(project_name)
 79 |     b.start()
 80 |     while b.status() == "fetching":
 81 |         pass
 82 |     b.run_phase(phase=-1, debug=True)
 83 | 
 84 | 
 85 | def check_read_final():
 86 |     b = check_backend()
 87 |     project_name = input("Project Name: ")
 88 |     b.load_project(project_name)
 89 |     b.start()
 90 |     while b.status() == "fetching":
 91 |         pass
 92 | 
 93 |     print(b.get_final_phase_results())
 94 | 
 95 | 
 96 | def check_except():
 97 |     b = check_backend()
 98 |     project_name = input("Project Name: ")
 99 |     b.load_project(project_name)
100 |     b.start()
101 |     while b.status() == "fetching":
102 |         pass
103 | 
104 |     print(b.core.model_data.keys())
105 |     print(b.core.model_data["iteration_1"]['itr']['crash_report'])
106 | 
107 | 
108 | def check_update_specs():
109 |     b = check_backend()
110 |     project_name = input("Project Name: ")
111 |     b.load_project(project_name)
112 |     specs = {"num_cpu": 24}
113 |     b.update_specifications(specs)
114 | 
115 | 
116 | def check_itr_percent():
117 |     b = check_backend()
118 |     project_name = input("Project Name: ")
119 |     b.load_project(project_name)
120 |     print(b.pull())
121 | 


--------------------------------------------------------------------------------
/GUI/src/index.js:
--------------------------------------------------------------------------------
1 | // Place imports here:
2 | import {$, jQuery} from 'jquery'; //TODO: figure out why webpack doesnt recognize this
3 | import Chart from 'chart.js';
4 | 
5 | // var Kekule = require('kekule').Kekule;


--------------------------------------------------------------------------------
/GUI/src/login.js:
--------------------------------------------------------------------------------
 1 | function toggleLoadingScreen(turn_on){
 2 |     if (turn_on) {
 3 |         // console.log("Displaying loading screen...");
 4 |         document.getElementById("loading").style.visibility = "visible";
 5 |         
 6 |     }else{
 7 |         // console.log("Closing loading screen.");
 8 |         document.getElementById("loading").style.visibility = "hidden";
 9 |     }
10 | }
11 | 
12 | // gets the username and password and attempts to setup the ssh connection
13 | function setupConnection(){
14 |     toggleLoadingScreen(true);
15 | 
16 |     // Setting up the connection
17 |     let user = document.getElementById("username").value;
18 |     let pwd = document.getElementById("pwd").value;
19 |     console.log("setting up connection...");
20 |     // console.log("\t"+user+"\n\t"+ pwd);
21 | 
22 |     // async post request:
23 |     $.ajax({
24 |         type: "POST",
25 |         url: "/sshConnect",
26 |         dataType: 'json',
27 |         contentType: 'application/json',
28 |         data: JSON.stringify({'user': user, 'pwd': pwd}),
29 |         success: function(data, status, settings){
30 |             toggleLoadingScreen(false);
31 |             console.log("successful login...");
32 |             console.log(data, status, settings)
33 |         },
34 |         error: function(res, opt, err){ // Handling errors (2 possible -> no VPN or Invalid creds)
35 |             console.log(res, opt, err);
36 |             if (res.status === 401){
37 |                 // Creating the error display box if it doesnt exist
38 |                 if (res.responseText === "creds"){
39 |                     let errorElm = document.getElementById("errorText")
40 |                     if (! errorElm){ // if it doesnt exist we must first create it
41 |                         errorElm = document.createElement("p")
42 |                         errorElm.id = "errorText"
43 | 
44 |                         let formElm = document.getElementById("cred-form");
45 |                         formElm.insertBefore(errorElm, formElm.firstChild);
46 |                     }
47 |                     errorElm.textContent = "Incorrect credentials!"
48 | 
49 |                 } else if (res.responseText === "vpn"){
50 |                     let errorElm = document.getElementById("errorText")
51 |                     if (! errorElm){ // if it doesn't exist we must first create it
52 |                         errorElm = document.createElement("p")
53 |                         errorElm.id = "errorText"
54 | 
55 |                         let formElm = document.getElementById("cred-form");
56 |                         formElm.insertBefore(errorElm, formElm.firstChild);
57 |                     }
58 |                     errorElm.textContent = "Is your VPN on?"
59 |                 }
60 |             } else{
61 |                 alert("Something wrong...")
62 |             }
63 |             toggleLoadingScreen(false);
64 |         }
65 |       }).done(function(response) {
66 |         console.log(response);
67 |         window.location = "/main" // redirects user to the main page
68 |     });
69 |     
70 | }
71 | 
72 | document.getElementById('login-btn').addEventListener('click', setupConnection);
73 | 
74 | document.querySelectorAll('input').forEach( el => {
75 |     el.addEventListener('keydown', e => {
76 |         console.log(e.key);
77 |         if(e.key === 'Enter') {
78 |             let nextEl = el.nextElementSibling;
79 |             if(nextEl.nodeName === 'INPUT') {
80 |                 nextEl.focus();
81 |             }else if (nextEl.nodeName === 'BUTTON') {
82 |                 nextEl.focus();
83 |             } else {
84 |                 alert("done");
85 |             }
86 |         }
87 |     })
88 | });


--------------------------------------------------------------------------------
/GUI/src/mainPagejs/basics.js:
--------------------------------------------------------------------------------
  1 | // Basic functions required by the entire html page
  2 | function togglePopup(elemID,turn_on){
  3 |   if (turn_on) {
  4 |       document.getElementById(elemID).style.visibility = "visible";
  5 |   }else{
  6 |       document.getElementById(elemID).style.visibility = "hidden";
  7 |   }
  8 | }
  9 | 
 10 | function toggleLoadingScreen(turn_on){
 11 |   togglePopup('loading', turn_on);
 12 | }
 13 | 
 14 | function switchTab(evt, tabname, activetab) {
 15 |     // Declare all variables
 16 |     var i, tabcontent, tablinks;
 17 |   
 18 |     // Get all elements with class="tabcontent" and hide them
 19 |     tabcontent = document.getElementsByClassName("tabcontent");
 20 |     for (i = 0; i < tabcontent.length; i++) {
 21 |       tabcontent[i].style.display = "none";
 22 |     }
 23 |   
 24 |     // Get all elements with class="tablinks" and remove the class "active"
 25 |     if (!activetab){ // Won't change the tab activation
 26 |       tablinks = document.getElementsByClassName("tablinks");
 27 |       for (i = 0; i < tablinks.length; i++) {
 28 |         tablinks[i].className = tablinks[i].className.replace(" active", "");
 29 |       }
 30 |     }
 31 |   
 32 |     // Show the current tab, and add an "active" class to the button that opened the tab
 33 |     document.getElementById(tabname).style.display = "block";
 34 |     document.getElementById(tabname+'Btn').className += " active";
 35 | }
 36 | 
 37 | function flash(elmID, filters){
 38 |   var elm = document.getElementById(elmID);
 39 |   filter1 = filters? filters[0]: 'brightness(100%)';
 40 |   filter2 = filters? filters[1]: 'brightness(50%)';
 41 | 
 42 |   if (elm.style.filter === filter1){
 43 |     elm.style.filter = filter2;
 44 | 
 45 |   } else {
 46 |     elm.style.filter = filter1;
 47 |   }
 48 | };
 49 | 
 50 | function destroyChart(id){
 51 |   Chart.helpers.each(Chart.instances, function(instance){
 52 |     if (instance.chart.canvas.id === id){
 53 |       instance.destroy();
 54 |     }
 55 |   });
 56 | };
 57 | 
 58 | // Pan and zoom functionality for images:
 59 | var img_ele = null,
 60 |   x_img_start = 0,
 61 |   y_img_start = 0,
 62 |   starting_L = 0,
 63 |   starting_T = 0;
 64 | 
 65 | function zoom(zoomincrement, img_id) {
 66 |   img_ele = document.getElementById(img_id);
 67 |   var pre_width = img_ele.getBoundingClientRect().width, pre_height = img_ele.getBoundingClientRect().height;
 68 |   img_ele.style.width = (pre_width * zoomincrement) + 'px';
 69 |   img_ele.style.height = (pre_height * zoomincrement) + 'px';
 70 |   img_ele = null;
 71 | }
 72 | 
 73 | function start_drag(e) {
 74 |   img_ele = this;
 75 |   
 76 |   starting_L = parseInt(img_ele.style.left.split('px')[0]);
 77 |   starting_T = parseInt(img_ele.style.top.split('px')[0]);
 78 |   starting_L = (starting_L) ? starting_L : 0; // if the value is auto the conditional will be false (NaN)
 79 |   starting_T = (starting_T) ? starting_T : 0; // sets it to zero if auto.
 80 |   
 81 |   x_img_start = e.clientX;
 82 |   y_img_start = e.clientY;
 83 | }
 84 | 
 85 | function while_drag(e) {
 86 |   e.preventDefault();
 87 |   var delta_x = e.clientX - x_img_start;
 88 |   var delta_y = e.clientY - y_img_start;
 89 |   
 90 |   if (img_ele !== null) {
 91 |       // calculating amount to move image by
 92 |       img_ele.style.left = delta_x + starting_L + 'px';
 93 |       img_ele.style.top = delta_y + starting_T + 'px';
 94 |   }
 95 | }
 96 | 
 97 | function stop_drag() {
 98 |   img_ele = null;
 99 | }
100 | 
101 | 
102 | function resetPanandZoomVals(){
103 |   img_ele = null,
104 |   x_img_ele = 0,
105 |   y_img_ele = 0;
106 | }
107 | 
108 | function addPanAndZoom(img_id){
109 |   var element = document.getElementById(img_id);
110 | 
111 |     // Adds pan and zoom functionality to the element
112 |   element.addEventListener("wheel", function(e){
113 |     e.preventDefault();
114 |     zoom(1 - (e.deltaY/300)* 0.1, img_id);
115 |   });
116 | 
117 |   element.addEventListener('mousedown', start_drag);
118 |   element.addEventListener('mousemove', while_drag);
119 |   element.parentElement.addEventListener('mouseup', stop_drag);
120 | }
121 | 
122 | function resetImagePos(img_id){
123 |   var element = document.getElementById(img_id);
124 |   element.style.left = 'auto';
125 |   element.style.top = 'auto';
126 | 
127 |   element.style.width = 'auto';
128 |   element.style.height = 'auto';
129 | }
130 | 
131 | function deleteProject(name){
132 |   var name = (name) ? name : document.querySelector('#curr_project_name').textContent.split(':')[1].trim();
133 |   // console.log('deleting project: ', name);  
134 |   var args = 'project_name=' + name;
135 |   toggleLoadingScreen(true);
136 | 
137 |   $.ajax({
138 |     type: "POST",
139 |     url: `/deleteProject?${args}`,
140 |     dataType: 'json',
141 |     success: function (data, status, settings) {
142 |         console.log('project deleted', data);
143 |     },
144 |     error: function (res, opt, err) {
145 |         alert('Error\n' + res.status + ': ' + err);
146 |     }
147 |   }).done(function (response) {
148 |       toggleLoadingScreen(false);
149 |   });
150 | }
151 | 
152 | var UPDATE_RATE = null;
153 | var UPDATE_CALLBACKS = {}; // Saves the callbacks for all the tabs opened
154 | var UPDATE_ID; // the ID for the async update loop
155 | 
156 | function clientUpdateLoop(){
157 |   // This function is a loop that runs in the background that retrives updates from the server as it comes in
158 |   // and displays that data to the client depending on which tab they are on.
159 | 
160 |   // Checking which tab is active:
161 |   var active = document.querySelector("body > div.tabs.disable-select > Button.active").id;
162 |   var activeTab = active.substring(0, active.length-3);
163 | 
164 |   // console.log("active tab:", activeTab);
165 |   // Not running for top Scoring tab because that would just be annoying when viewing molec:
166 |   // Also not really needed for the start a run page...
167 |   if (activeTab !== "topScoring" && activeTab !== "startR"){
168 |     // Running the appropriate callback
169 |     var callbackfn = UPDATE_CALLBACKS[activeTab];
170 |     if (callbackfn) callbackfn();
171 |   }
172 | }
173 | 
174 | function resetUpdateLoop(){
175 |   // Used for when we already have the update rate and 
176 |   // want to restart the loop to prevent "double loading" of a tab.
177 |   if (UPDATE_ID){ // Clearing any previous update loop
178 |     clearInterval(UPDATE_ID);
179 |     UPDATE_ID = null;
180 |   }
181 |   UPDATE_ID = setInterval(clientUpdateLoop, UPDATE_RATE);
182 |   // console.log("update loop reset!");
183 | }
184 | 
185 | function startUpdateLoop(){
186 |   $.ajax({
187 |     type: "GET",
188 |     url: "/getBasics",
189 |     dataType: 'json',
190 |     success: function (data, status, settings) {
191 |       UPDATE_RATE = data.update_rate_ms;
192 |       // console.log("update rate (ms):", UPDATE_RATE);
193 |       if (UPDATE_RATE){ // IF NOT UNDEF
194 |         UPDATE_ID = setInterval(clientUpdateLoop, UPDATE_RATE);
195 |       }
196 |     },
197 |     error: function (res, opt, err) {
198 |       alert("Error!")
199 |       console.log(res, opt, err);
200 |     }
201 |   });
202 | }
203 | 
204 | function startup(){
205 |   startUpdateLoop();
206 |   // Changing the background color:
207 |   document.getElementsByTagName("html")[0].style.background = "#5A6E59";
208 | }
209 | 
210 | startup();


--------------------------------------------------------------------------------
/GUI/src/mainPagejs/topScoring.js:
--------------------------------------------------------------------------------
  1 | var selected_smile = null; //keeps track of which smile is selected to highlight it.
  2 | 
  3 | // Displays the selected smile
  4 | function displaySelectedSmile(e){
  5 |   if (selected_smile) selected_smile.className = ''; // clearing the last element
  6 | 
  7 |   selected_smile = e.path[0];
  8 |   selected_smile.className = 'disabled';
  9 | 
 10 |   const new_smile = selected_smile.innerText;
 11 | 
 12 |   // requesting image:
 13 |   displayScaffold(new_smile);
 14 | }
 15 | 
 16 | // Displays the scaffold from request (if none is provided than we assume mode scaffold)
 17 | function displayScaffold(smile) {
 18 |   toggleLoadingScreen(true);
 19 | 
 20 |   var new_text = "Most Common Murcko Scaffold";
 21 | 
 22 |   if (smile) new_text = smile;
 23 |   else if (selected_smile) selected_smile.className = ''; // Clearing the previous selected smile
 24 |   
 25 |   // Changing the title to match
 26 |   document.querySelector('#murckov-scaffold > div > h2').innerHTML = new_text;
 27 | 
 28 |   $.ajax({ 
 29 |     type: "POST",
 30 |     url: "/topScoring",
 31 |     dataType: 'text',
 32 |     contentType: 'application/json',
 33 |     data: JSON.stringify({"smile": String(smile), "image":"true"}),
 34 |     beforeSend: function (xhr) {
 35 |       xhr.overrideMimeType('text/plain; charset=x-user-defined');
 36 |     },
 37 |     success: function (data, status, settings) {
 38 |       if(data.length < 1){
 39 |           alert("The image doesnt exist");
 40 |           $("#scaffoldImage").attr("src", "data:image/png;base64,");
 41 |           return
 42 |       }
 43 |       var binary = "";
 44 |       var responseText = data;
 45 |       var responseTextLen = responseText.length;
 46 |   
 47 |       for ( i = 0; i < responseTextLen; i++ ) {
 48 |           binary += String.fromCharCode(responseText.charCodeAt(i) & 255)
 49 |       }
 50 |       $("#scaffoldImage").attr("src", "data:image/jpeg;base64,"+btoa(binary));
 51 |     },
 52 |     error: function (res, opt, err) {
 53 |       alert("Error in retriving Murcko Scaffold")
 54 |       console.log(res,opt,err);
 55 |     }
 56 |   }).done(function (response) {
 57 |     resetImagePos('scaffoldImage');
 58 |     toggleLoadingScreen(false);
 59 |   });
 60 | }
 61 | 
 62 | // adds SMILES to the list
 63 | function fillTopScoringList(compounds){
 64 |   var list = document.querySelector('#top-scoring-list > ul');
 65 | 
 66 |   // clearing data first:
 67 |   while (list.firstChild)
 68 |     list.removeChild(list.lastChild);
 69 |   
 70 |   // Adding each of them to the list:
 71 |   for (var comp in compounds){
 72 |     var li = document.createElement('li');
 73 |     var a = document.createElement('a');
 74 |     a.textContent = compounds[comp];
 75 | 
 76 |     // Connecting them to an appropriate callback:
 77 |     a.onclick = displaySelectedSmile;
 78 |     
 79 |     li.appendChild(a);
 80 |     list.appendChild(li);
 81 |   }
 82 | };
 83 | 
 84 | // Downloads a text file containing some specific text
 85 | function download(filename, text) {
 86 |   var element = document.createElement('a');
 87 |   element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text));
 88 |   element.setAttribute('download', filename);
 89 | 
 90 |   element.style.display = 'none';
 91 |   document.body.appendChild(element);
 92 | 
 93 |   element.click();
 94 | 
 95 |   document.body.removeChild(element);
 96 | }
 97 | 
 98 | // Download button -> downloads list of all molecules
 99 | document.querySelector("#download-list > img").onclick = function () {
100 |   download('top-scoring-smiles.txt', 
101 |           document.querySelector("#top-scoring-list > ul").innerText);
102 | };
103 | 
104 | // Reload Button -> refreshes the image to be the most common scaffold
105 | document.querySelector('#reload-Murcko > img').onclick = function () {
106 |   displayScaffold();
107 | }
108 | 
109 | function bootTopScoringTab(){
110 |   // request to get list of all molecules:
111 |   $.ajax({
112 |     type: "POST",
113 |     url: "/topScoring",
114 |     contentType: 'application/json',
115 |     data: JSON.stringify({"smile":"undefined", "image":"false"}),
116 |     success: function (data, status, settings) {
117 |         fillTopScoringList(data.top_hits);
118 |     },
119 |     error: function (res, opt, err) {
120 |         alert("Error: top scoring tab failure");
121 |         console.log(res, opt, err);
122 |     }
123 |   }).done(function (response) {
124 |     // request to get the Most common Murcko scaffold
125 |     displayScaffold();
126 |   });
127 | }
128 | 
129 | // Tab button
130 | document.getElementById("topScoringBtn").onclick = function() {
131 |   addPanAndZoom('scaffoldImage');
132 |   toggleLoadingScreen(true);
133 |   bootTopScoringTab();
134 |   switchTab(event, 'topScoring');
135 |   UPDATE_CALLBACKS["topScoring"] = bootTopScoringTab; // not used but left here for future possible use (replace with another function)
136 |   resetUpdateLoop();
137 | };
138 | 


--------------------------------------------------------------------------------
/GUI/src/test.js:
--------------------------------------------------------------------------------
 1 | // Create a simple CO2 molecule
 2 | var mol = new Kekule.Molecule();
 3 | var atomC = mol.appendAtom('C');
 4 | var atomO1 = mol.appendAtom('O');
 5 | var atomO2 = mol.appendAtom('O');
 6 | mol.appendBond([atomC, atomO1], 2);
 7 | mol.appendBond([atomC, atomO2], 2);
 8 | 
 9 | // Get formula
10 | var formula = mol.calcFormula();
11 | console.log('Formula: ', formula.getText());
12 | 
13 | // Output SMILES (IO module should be loaded in web application)
14 | var smiles = Kekule.IO.saveFormatData(mol, 'smi');
15 | console.log('SMILES: ', smiles);
16 | 
17 | // Output MOL2k (IO module should be loaded in web application)
18 | var mol2k = Kekule.IO.saveFormatData(mol, 'mol');
19 | console.log('MOL 2000: \n', mol2k);
20 |         


--------------------------------------------------------------------------------
/GUI/templates/login.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |     <head>
 4 |         <meta charset="UTF-8">
 5 |         <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |         <title>Deep Docking GUI</title>
 7 |         <link rel="icon" type="image/png" href="https://www.prostatecentre.com/sites/default/files/fav2.png">
 8 |         <!-- Getting all dependencies: -->
 9 |         <script src="public/js/bundle.js"></script>
10 |         <link rel="stylesheet" type="text/css" href="./public/css/oldSchool.css">
11 |         <!-- Importing the fonts: Cairo, K2D-->
12 |         <link href="https://fonts.googleapis.com/css2?family=Cairo&family=K2D&display=swap" rel="stylesheet">
13 |     </head>
14 |     <body>
15 |         <div class="title-creds disable-select">
16 |             <h1 name="title">Deep Docking GUI</h1>
17 |             <form id="cred-form">
18 |                 <input type="text" id="username" name="username" placeholder="Username">
19 |                 <input type="password" id="pwd" name="pwd" placeholder="Password">
20 |                 <button type="button" id="login-btn" name="login-btn">Login</button>
21 |             </form>
22 |         </div>
23 |         <div class="copyright-info disable-select">
24 |             <h2>Cherkasov Lab, UBC</h2>
25 |             <a href="https://doi.org/10.1021/acscentsci.0c00229">doi.org/10.1021/acscentsci.0c00229</a>
26 |         </div>
27 |     </body>
28 | 
29 |     <div class="popup" id="loading">
30 |         <img src="/public/img/loading_svg_ripple.svg" alt="Loading...">
31 |     </div>
32 |     
33 |     <script src="/src/login.js"></script>
34 | </html>


--------------------------------------------------------------------------------
/GUI/templates/test.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |     <head>
 4 |     </head>
 5 |     <body>
 6 |         <div style="
 7 |             overflow: hidden;
 8 |             border: 1px red solid;
 9 |             width: 95vw;
10 |             height: 95vh;"
11 |         >
12 |             <img id="image" style="position: relative;" src="../public/img/model_sample.png">
13 |         </div>
14 |     </body>
15 |     <script>
16 |         // Pan and zoom functionality for images:
17 |         var img_ele = null,
18 |         x_img_start = 0,
19 |         y_img_start = 0,
20 |         starting_L = 0,
21 |         starting_T = 0;
22 | 
23 |         function zoom(zoomincrement, img_id) {
24 |             img_ele = document.getElementById(img_id);
25 |             var pre_width = img_ele.getBoundingClientRect().width, pre_height = img_ele.getBoundingClientRect().height;
26 |             img_ele.style.width = (pre_width * zoomincrement) + 'px';
27 |             img_ele.style.height = (pre_height * zoomincrement) + 'px';
28 |             img_ele = null;
29 |         }
30 | 
31 |         function start_drag(e) {
32 |             // console.log("starting drag");
33 |             img_ele = this;
34 |             
35 |             starting_L = parseInt(img_ele.style.left.split('px')[0]);
36 |             starting_T = parseInt(img_ele.style.top.split('px')[0]);
37 |             starting_L = (starting_L) ? starting_L : 0; // if the value is auto the conditional will be false (NaN)
38 |             starting_T = (starting_T) ? starting_T : 0; // sets it to zero if auto.
39 |             
40 |             x_img_start = e.clientX;
41 |             y_img_start = e.clientY;
42 |         }
43 | 
44 |         function while_drag(e) {
45 |             e.preventDefault();
46 |             var delta_x = e.clientX - x_img_start;
47 |             var delta_y = e.clientY - y_img_start;
48 |             
49 |             if (img_ele !== null) {
50 |                 // calculating amount to move image by
51 |                 img_ele.style.left = delta_x + starting_L + 'px';
52 |                 img_ele.style.top = delta_y + starting_T + 'px';
53 |             }
54 |         }
55 | 
56 |         function stop_drag() {
57 |             img_ele = null;
58 |             // console.log("stop drag");
59 |         }
60 | 
61 |         function resetPanandZoomVals(){
62 |             img_ele = null,
63 |             x_img_ele = 0,
64 |             y_img_ele = 0;
65 |         }
66 | 
67 |         function addPanAndZoom(img_id){
68 |             var element = document.getElementById(img_id);
69 | 
70 |                 // Adds pan and zoom functionality to the element
71 |             element.addEventListener("wheel", function(e){
72 |                 e.preventDefault();
73 |                 zoom(1 - (e.deltaY/300)* 0.1, img_id);
74 |             });
75 | 
76 |             element.addEventListener('mousedown', start_drag);
77 |             element.parentElement.addEventListener('mousemove', while_drag);
78 |             element.parentElement.addEventListener('mouseup', stop_drag);
79 |         }
80 | 
81 |         function resetImagePos(img_id){
82 |             var element = document.getElementById(img_id);
83 |             element.style.left = 'auto';
84 |             element.style.top = 'auto';
85 | 
86 |             element.style.width = 'auto';
87 |             element.style.height = 'auto';
88 |         }
89 | 
90 |         addPanAndZoom("image");
91 |     </script>
92 | </html>


--------------------------------------------------------------------------------
/GUI/webpack.config.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | 
 3 | const webpack = require('webpack'); // eslint-disable-line no-unused-vars
 4 | 
 5 | module.exports = {
 6 |   mode: 'development', //TODO: change to production when done
 7 |   entry: './src/index.js',
 8 |   output: {
 9 |     path: __dirname,
10 |     filename: './public/js/bundle.js',
11 |   },
12 |   context: __dirname,
13 |   devtool: 'source-map',
14 |   resolve: {
15 |       extensions: ['.js', '.jsx', '.css'],
16 |       alias: {
17 |           jquery: "../node_modules/jquery/src/jquery"
18 |       },
19 |       modules: ['node_modules']
20 |   },
21 |   module: {
22 |     rules: [
23 |       {
24 |         test: /\.js$/,
25 |         exclude: /(node_modules)/,
26 |         use: {
27 |           loader: 'babel-loader',
28 |           options: {
29 |             presets: ['@babel/preset-env']
30 |           }
31 |         }
32 |       }
33 |       ,{
34 |         test: /\.css$/i,
35 |         use: ['style-loader', 'css-loader']
36 |       },{
37 |           // Now we apply rule for images
38 |         test: /\.(png|jpe?g|gif|svg)$/,
39 |         use: [
40 |               {
41 |                 // Using file-loader for these files
42 |                 loader: "file-loader",
43 | 
44 |                 // In options we can set different things like format
45 |                 // and directory to save
46 |                 options: {
47 |                   outputPath: './public/webp-img'
48 |                 }
49 |               }
50 |             ]
51 |       },{
52 |         // Apply rule for fonts files
53 |         test: /\.(woff|woff2|ttf|otf|eot)$/,
54 |         use: [
55 |               {
56 |                 // Using file-loader too
57 |                 loader: "file-loader",
58 |                 options: {
59 |                   outputPath: './public/fonts'
60 |                 }
61 |               }
62 |             ]
63 |       },{
64 |         test: /\.(png|gif|cur|jpg)$/, 
65 |         loader: 'url-loader', 
66 |         query: { limit: 8192 }
67 |       }
68 |       ]
69 |   }
70 | };


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 James Gleave
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/installation/DeepDockingLocal.yml:
--------------------------------------------------------------------------------
 1 | name: DeepDockingLocal
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - python=3.6
 8 |   - paramiko
 9 |   - ipython
10 |   - rdkit
11 |   - flask
12 |   - flask-cors
13 |   - python-dotenv
14 | 


--------------------------------------------------------------------------------
/installation/fix_sh.sh:
--------------------------------------------------------------------------------
1 | # from https://stackoverflow.com/questions/800030/remove-carriage-return-in-unix
2 | grep -r --color=never --include="*.sh" $'\r' <filename> # looks for \r in file
3 | sed -i 's/\r$//g' <filename> # fixes the file 


--------------------------------------------------------------------------------
/installation/install-linux.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Create the local env
4 | python3 install.py --phase install_local
5 | # Activate the env
6 | conda activate DeepDockingLocal 2> conda.out
7 | # Install remote files and create remote env
8 | python3 install.py --phase install_remote
9 | 


--------------------------------------------------------------------------------
/installation/install-windows.bat:
--------------------------------------------------------------------------------
1 | python install.py --phase install_local
2 | call conda activate DeepDockingLocal > conda.out
3 | python install.py --phase install_remote


--------------------------------------------------------------------------------
/installation/welcome_message.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | 
 5 | ###########################################################################
 6 | 
 7 | oooooooooo.
 8 | `888'   `Y8b
 9 |  888      888  .ooooo.   .ooooo.  oo.ooooo.
10 |  888      888 d88' `88b d88' `88b  888' `88b
11 |  888      888 888ooo888 888ooo888  888   888
12 |  888     d88' 888    .o 888    .o  888   888
13 | o888bood8P'   `Y8bod8P' `Y8bod8P'  888bod8P'
14 |                                    888
15 | oooooooooo.                       ooooo        o8o
16 | `888'   `Y8b                      `888         `"'
17 |  888      888  .ooooo.   .ooooo.   888  oooo  oooo  ooo. .oo.    .oooooooo
18 |  888      888 d88' `88b d88' `"Y8  888 .8P'   `888  `888P"Y88b  888' `88b
19 |  888      888 888   888 888        888888.     888   888   888  888   888
20 |  888     d88' 888   888 888   .o8  888 `88b.   888   888   888  `88bod8P'
21 | o888bood8P'   `Y8bod8P' `Y8bod8P' o888o o888o o888o o888o o888o `8oooooo.
22 |                                                                 d"     YD
23 |                                                                 "Y88888P'
24 | 
25 | ###########################################################################
26 | 
27 | 


--------------------------------------------------------------------------------
/preparation_scripts/README.md:
--------------------------------------------------------------------------------
1 | Scripts to prepare chemical libraries and receptor structures.
2 | 


--------------------------------------------------------------------------------
/preparation_scripts/compute_morgan_fp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --partition=normal
 3 | #SBATCH --ntasks=1
 4 | #SBATCH --nodes=1
 5 | #SBATCH --job-name=calculate_morgan_fing
 6 | 
 7 | source ~/.bashrc
 8 | conda activate $4
 9 | 
10 | start=`date +%s`
11 | 
12 | python -u morgan_fp.py -sfp $1 -fn $2 -tp $3
13 | 
14 | end=`date +%s`
15 | runtime=$((end-start))
16 | echo $runtime
17 | 


--------------------------------------------------------------------------------
/preparation_scripts/morgan_fp.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import time
 3 | import numpy as np
 4 | import pickle
 5 | from contextlib import closing
 6 | from multiprocessing import Pool
 7 | import multiprocessing
 8 | from rdkit.Chem import AllChem
 9 | from rdkit import DataStructs
10 | from rdkit import Chem
11 | from functools import partial
12 | import argparse
13 | import os
14 | 
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument('-sfp','--smile_folder_path',help='name of the folder with prepared smiles',required=True)
17 | parser.add_argument('-fn','--folder_name',help='name of morgan fingerprint folder',required=True)
18 | parser.add_argument('-tp','--tot_process',help='number of cores',required=True)
19 | 
20 | io_args = parser.parse_args()
21 | sfp = io_args.smile_folder_path
22 | fn = io_args.folder_name
23 | t_pos = int(io_args.tot_process)
24 | 
25 | def morgan_fingp(fname):
26 |     nbits=1024
27 |     radius=2
28 |     fsplit = fname.split('/')[-1]
29 |     ref2  = open(fn+'/'+fsplit,'a')
30 |     with open(fname,'r') as ref:
31 |         for line in ref:
32 |             smile,zin_id = line.rstrip().split()
33 |             arg = np.zeros((1,))
34 |             try:
35 |                 DataStructs.ConvertToNumpyArray(AllChem.GetMorganFingerprintAsBitVect(Chem.MolFromSmiles(smile),radius,nBits=nbits,useChirality=True),arg)
36 | 
37 |                 ref2.write((',').join([zin_id]+[str(elem) for elem in np.where(arg==1)[0]]))
38 |                 ref2.write('\n')
39 |             except:
40 |                 print(line)
41 |                 pass
42 | 
43 | files = []
44 | for f in glob.glob(sfp+'/*.txt'):
45 |     files.append(f)
46 | 
47 | try:
48 |     os.mkdir(fn)
49 | except:
50 |     pass
51 | 
52 | t_f = len(files)
53 | t = time.time()
54 | with closing(Pool(np.min([multiprocessing.cpu_count(),t_pos]))) as pool:
55 |     pool.map(morgan_fingp,files)
56 | print(time.time()-t)
57 | 


--------------------------------------------------------------------------------
/preparation_scripts/prepare_receptor.sh:
--------------------------------------------------------------------------------
  1 | receptor_f=$1
  2 | grid_points=$2 
  3 | grid_center=$3
  4 | 
  5 | if [ "$1" == "-h" ]; then
  6 |     echo "
  7 | 
  8 | PREPARE AUTODOCK DOCKING MAPS 
  9 | 
 10 | Usage: (ba)sh `basename $0` receptor grid_points grid_center path_adt
 11 | 
 12 | - receptor = receptor pdb file, prepared
 13 | - grid_points = 'x,y,z' format, point size of docking box (real size n_points*0.375 A)
 14 | - grid_center = 'x,y,z' format, coordinates of docking box center
 15 | - path_adt =  path to autodock tools folder with python scripts (prepare_receptor4.py, etc etc..)
 16 | "
 17 |     exit 0
 18 | fi
 19 | 
 20 | if [ "$1" != "-h" ] && [ $# -lt 4 ]; then
 21 |     echo "Not all the arguments were supplied; type 'sh prepare_receptor.sh -h' for help"
 22 |     exit 0
 23 | fi
 24 | 
 25 | receptor=$(echo $receptor_f|cut -d'.' -f1)
 26 | 
 27 | IFS=',' read -r -a dim<<<$grid_points
 28 | x_points="${dim[0]}"
 29 | y_points="${dim[1]}"
 30 | z_points="${dim[2]}"
 31 | 
 32 | 
 33 | IFS=',' read -r -a crd<<<$grid_center
 34 | x_crd="${crd[0]}"
 35 | y_crd="${crd[1]}"
 36 | z_crd="${crd[2]}"
 37 | 
 38 | python $4/prepare_receptor4.py -r $receptor_f -U nphs_lps_waters_nonstdres
 39 | wait
 40 | 
 41 | python $4/prepare_gpf4.py -r $receptor'.'pdbqt -o grid_1_$receptor'.'gpf -p ligand_types='P,SA,S,Cl,Ca,Mn,Fe,Zn,Br,I' -p npts=$grid_points -p gridcenter=$grid_center
 42 | wait
 43 | 
 44 | python $4/prepare_gpf4.py -r $receptor'.'pdbqt -o grid_2_$receptor'.'gpf -p ligand_types='H,HD,HS,C,A,N,NA,NS,OA,OS,F,Mg' -p npts=$grid_points -p gridcenter=$grid_center
 45 | wait
 46 | 
 47 | autogrid4 -p grid_1_$receptor'.'gpf -l grid_1_$receptor'.'log
 48 | autogrid4 -p grid_2_$receptor'.'gpf -l grid_2_$receptor'.'log
 49 | 
 50 | rm *fld
 51 | 
 52 | echo "# AVS field file
 53 | #
 54 | # AutoDock Atomic Affinity and Electrostatic Grids
 55 | #
 56 | # Created by autogrid4.
 57 | #
 58 | #SPACING 0.375
 59 | #NELEMENTS $x_points $y_points $z_points 
 60 | #CENTER $x_crd $y_crd $z_crd
 61 | #MACROMOLECULE $receptor.pdbqt
 62 | #GRID_PARAMETER_FILE grid_$receptor.gpf
 63 | #
 64 | ndim=3			# number of dimensions in the field
 65 | dim1=$(($x_points+1))			# number of x-elements
 66 | dim2=$(($y_points+1))			# number of y-elements
 67 | dim3=$(($z_points+1))			# number of z-elements
 68 | nspace=3		# number of physical coordinates per point
 69 | veclen=24		# number of affinity values at each point
 70 | data=float		# data type (byte, integer, float, double)
 71 | field=uniform		# field type (uniform, rectilinear, irregular)
 72 | coord 1 file=$receptor.maps.xyz filetype=ascii offset=0
 73 | coord 2 file=$receptor.maps.xyz filetype=ascii offset=2
 74 | coord 3 file=$receptor.maps.xyz filetype=ascii offset=4
 75 | label=H-affinity	# component label for variable 1
 76 | label=HD-affinity	# component label for variable 2
 77 | label=HS-affinity	# component label for variable 3
 78 | label=C-affinity	# component label for variable 4
 79 | label=A-affinity	# component label for variable 5
 80 | label=N-affinity	# component label for variable 6
 81 | label=NA-affinity	# component label for variable 7
 82 | label=NS-affinity	# component label for variable 8
 83 | label=OA-affinity	# component label for variable 9
 84 | label=OS-affinity	# component label for variable 10
 85 | label=F-affinity        # component label for variable 11                                                    
 86 | label=Mg-affinity       # component label for variable 12                                                    
 87 | label=P-affinity        # component label for variable 13                                                    
 88 | label=SA-affinity       # component label for variable 14                                                    
 89 | label=S-affinity        # component label for variable 15                                                    
 90 | label=Cl-affinity       # component label for variable 16                                      
 91 | label=Ca-affinity       # component label for variable 17             
 92 | label=Mn-affinity       # component label for variable 18                                      
 93 | label=Fe-affinity       # component label for variable 19                                      
 94 | label=Zn-affinity       # component label for variable 20                                      
 95 | label=Br-affinity       # component label for variable 21                                      
 96 | label=I-affinity        # component label for variable 22                                      
 97 | label=Electrostatics    # component label for variable 22
 98 | label=Desolvation       # component label for variable 23 
 99 | #
100 | # location of affinity grid files and how to read them
101 | #
102 | variable 1 file=$receptor.H.map filetype=ascii skip=6
103 | variable 2 file=$receptor.HD.map filetype=ascii skip=6
104 | variable 3 file=$receptor.HS.map filetype=ascii skip=6
105 | variable 4 file=$receptor.C.map filetype=ascii skip=6
106 | variable 5 file=$receptor.A.map filetype=ascii skip=6
107 | variable 6 file=$receptor.N.map filetype=ascii skip=6
108 | variable 7 file=$receptor.NA.map filetype=ascii skip=6
109 | variable 8 file=$receptor.NS.map filetype=ascii skip=6
110 | variable 9 file=$receptor.OA.map filetype=ascii skip=6
111 | variable 10 file=$receptor.OS.map filetype=ascii skip=6
112 | variable 11 file=$receptor.F.map filetype=ascii skip=6                                                                  
113 | variable 12 file=$receptor.Mg.map filetype=ascii skip=6                                                                
114 | variable 13 file=$receptor.P.map filetype=ascii skip=6                                                                  
115 | variable 14 file=$receptor.SA.map filetype=ascii skip=6                                                                 
116 | variable 15 file=$receptor.S.map filetype=ascii skip=6                                                                  
117 | variable 16 file=$receptor.Cl.map filetype=ascii skip=6                                                                 
118 | variable 17 file=$receptor.Ca.map filetype=ascii skip=6                                                                 
119 | variable 18 file=$receptor.Mn.map filetype=ascii skip=6                                                                 
120 | variable 19 file=$receptor.Fe.map filetype=ascii skip=6                                                                 
121 | variable 20 file=$receptor.Zn.map filetype=ascii skip=6                                                                 
122 | variable 21 file=$receptor.Br.map filetype=ascii skip=6                                                                 
123 | variable 22 file=$receptor.I.map filetype=ascii skip=6                                                                  
124 | variable 23 file=$receptor.e.map filetype=ascii skip=6                                                                  
125 | variable 24 file=$receptor.d.map filetype=ascii skip=6">>$receptor.maps.fld
126 | 
127 | rm -r ad_grids
128 | mkdir ad_grids
129 | mv *map* ad_grids/
130 | 


--------------------------------------------------------------------------------
/util/ProgressBar.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import sys
 3 | import re
 4 | 
 5 | 
 6 | class ProgressBar(object):
 7 |     DEFAULT = 'Progress: %(bar)s %(percent)3d%%'
 8 |     FULL = '%(bar)s %(current)d/%(total)d (%(percent)3d%%) %(remaining)d to go'
 9 | 
10 |     def __init__(self, total, width=40, fmt=DEFAULT, symbol='=',
11 |                  output=sys.stderr):
12 |         assert len(symbol) == 1
13 | 
14 |         self.total = total
15 |         self.width = width
16 |         self.symbol = symbol
17 |         self.output = output
18 |         self.fmt = re.sub(r'(?P<name>%\(.+?\))d', r'\g<name>%dd' % len(str(total)), fmt)
19 |         self.current = 0
20 | 
21 |     def __call__(self):
22 |         percent = self.current / float(self.total)
23 |         size = int(self.width * percent)
24 |         remaining = self.total - self.current
25 |         bar = '[' + self.symbol * size + ' ' * (self.width - size) + ']'
26 | 
27 |         args = {
28 |             'total': self.total,
29 |             'bar': bar,
30 |             'current': self.current,
31 |             'percent': percent * 100,
32 |             'remaining': remaining
33 |         }
34 |         print('\r' + self.fmt % args, file=self.output, end='')
35 | 
36 |     def done(self):
37 |         self.current = self.total
38 |         self()
39 |         print('', file=self.output)
40 | 


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
1 | from .ProgressBar import ProgressBar
2 | 


--------------------------------------------------------------------------------
/util/__pycache__/ProgressBar.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/__pycache__/ProgressBar.cpython-36.pyc


--------------------------------------------------------------------------------
/util/__pycache__/ProgressBar.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/__pycache__/ProgressBar.cpython-38.pyc


--------------------------------------------------------------------------------
/util/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/util/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/util/figures/DDGUI-DD.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/DDGUI-DD.png


--------------------------------------------------------------------------------
/util/figures/Monitor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/Monitor.png


--------------------------------------------------------------------------------
/util/figures/Progress.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/Progress.png


--------------------------------------------------------------------------------
/util/figures/login_screen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/login_screen.png


--------------------------------------------------------------------------------
/util/figures/models_full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/models_full.png


--------------------------------------------------------------------------------
/util/figures/new_project_info.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/new_project_info.png


--------------------------------------------------------------------------------
/util/figures/progress_full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/progress_full.png


--------------------------------------------------------------------------------
/util/figures/start_a_run_full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/start_a_run_full.png


--------------------------------------------------------------------------------
/util/figures/top_scoring_full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jamesgleave/DeepDockingGUI/cdd947b794ae52db2bee124516c4455f2a7ef0b5/util/figures/top_scoring_full.png


--------------------------------------------------------------------------------