├── Abstract.png ├── LICENSE ├── README.md ├── RUSH.egg-info ├── PKG-INFO ├── SOURCES.txt ├── dependency_links.txt └── top_level.txt ├── RUSH ├── data │ ├── DL_data │ │ ├── aha006.sdf │ │ ├── aha006_fragments.txt │ │ ├── aha006_phenyl.sdf │ │ ├── aha006_phenylether.sdf │ │ ├── lre001.sdf │ │ ├── lre001_fragments.txt │ │ ├── lre001_pyrimidine.sdf │ │ ├── lre001_thiophene.sdf │ │ ├── pim447.sdf │ │ ├── pim447_cyclohexyl.sdf │ │ ├── pim447_fluorophenyl.sdf │ │ ├── pim447_fragments.txt │ │ ├── sr3737.sdf │ │ ├── sr3737_fluorophenyl.sdf │ │ ├── sr3737_fragments.txt │ │ └── sr3737_trimethoxyphenyl.sdf │ ├── PDB_structures │ │ ├── aha006.sdf │ │ ├── lre001.sdf │ │ ├── pim447.sdf │ │ └── sr3737.sdf │ ├── PIM1_CHEMBL2147_ligands.csv │ ├── custom_MCS.py │ └── input_mols.ipynb ├── linkers.csv ├── notebooks │ ├── DL_pim447.ipynb │ ├── LI_RL_pim447.ipynb │ ├── LI_SF_pim447.ipynb │ ├── RE_RL_pim447.ipynb │ └── RE_TL_pim447.ipynb ├── scoring_plugins │ ├── REINVENT3.2 │ │ └── reinvent_scoring │ │ │ ├── __init__.py │ │ │ └── scoring │ │ │ ├── .vscode │ │ │ └── settings.json │ │ │ ├── __init__.py │ │ │ ├── component_parameters.py │ │ │ ├── configs │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ ├── example.config.json │ │ │ └── test_config.json │ │ │ ├── diversity_filters │ │ │ ├── __init__.py │ │ │ ├── curriculum_learning │ │ │ │ ├── __init__.py │ │ │ │ ├── base_diversity_filter.py │ │ │ │ ├── column_names_enum.py │ │ │ │ ├── diversity_filter.py │ │ │ │ ├── diversity_filter_memory.py │ │ │ │ ├── diversity_filter_parameters.py │ │ │ │ ├── identical_murcko_scaffold.py │ │ │ │ ├── identical_topological_scaffold.py │ │ │ │ ├── loggable_data_dto.py │ │ │ │ ├── memory_record_dto.py │ │ │ │ ├── no_filter.py │ │ │ │ ├── no_filter_with_penalty.py │ │ │ │ ├── scaffold_similarity.py │ │ │ │ └── update_diversity_filter_dto.py │ │ │ ├── lib_invent │ │ │ │ ├── __init__.py │ │ │ │ ├── base_diversity_filter.py │ │ │ │ ├── diversity_filter.py │ │ │ │ ├── diversity_filter_memory.py │ │ │ │ ├── diversity_filter_parameters.py │ │ │ │ ├── identical_murcko_scaffold.py │ │ │ │ ├── no_filter.py │ │ │ │ └── no_filter_with_penalty.py │ │ │ └── reinvent_core │ │ │ │ ├── __init__.py │ │ │ │ ├── base_diversity_filter.py │ │ │ │ ├── diversity_filter.py │ │ │ │ ├── diversity_filter_memory.py │ │ │ │ ├── diversity_filter_parameters.py │ │ │ │ ├── identical_murcko_scaffold.py │ │ │ │ ├── identical_topological_scaffold.py │ │ │ │ ├── no_scaffold_filter.py │ │ │ │ └── scaffold_similarity.py │ │ │ ├── enums │ │ │ ├── __init__.py │ │ │ ├── component_specific_parameters_enum.py │ │ │ ├── container_type_enum.py │ │ │ ├── descriptor_types_enum.py │ │ │ ├── diversity_filter_enum.py │ │ │ ├── environmental_variables_enum.py │ │ │ ├── logging_mode_enum.py │ │ │ ├── rocs_input_file_types_enum.py │ │ │ ├── rocs_similarity_measures_enum.py │ │ │ ├── rocs_specific_parameters_enum.py │ │ │ ├── scoring_function_component_enum.py │ │ │ ├── scoring_function_enum.py │ │ │ ├── transformation_parameters_enum.py │ │ │ └── transformation_type_enum.py │ │ │ ├── function │ │ │ ├── __init__.py │ │ │ ├── base_scoring_function.py │ │ │ ├── custom_product.py │ │ │ └── custom_sum.py │ │ │ ├── predictive_model │ │ │ ├── __init__.py │ │ │ ├── base_model_container.py │ │ │ ├── model_container.py │ │ │ ├── optuna_container.py │ │ │ └── scikit_model_container.py │ │ │ ├── score_components │ │ │ ├── __init__.py │ │ │ ├── aizynth │ │ │ │ ├── __init__.py │ │ │ │ └── building_block_availability_component.py │ │ │ ├── base_score_component.py │ │ │ ├── console_invoked │ │ │ │ ├── __init__.py │ │ │ │ ├── base_console_invoked_component.py │ │ │ │ └── icolos.py │ │ │ ├── link_invent │ │ │ │ ├── __init__.py │ │ │ │ ├── base_link_invent_component.py │ │ │ │ ├── linker_effective_length.py │ │ │ │ ├── linker_graph_length.py │ │ │ │ ├── linker_length_ratio.py │ │ │ │ ├── linker_mol_weight.py │ │ │ │ ├── linker_num_aliphatic_rings.py │ │ │ │ ├── linker_num_aromatic_rings.py │ │ │ │ ├── linker_num_hba.py │ │ │ │ ├── linker_num_hbd.py │ │ │ │ ├── linker_num_rings.py │ │ │ │ ├── linker_num_sp2_atoms.py │ │ │ │ ├── linker_num_sp3_atoms.py │ │ │ │ ├── linker_num_sp_atoms.py │ │ │ │ └── linker_ratio_rotatable_bonds.py │ │ │ ├── physchem │ │ │ │ ├── __init__.py │ │ │ │ ├── base_physchem_component.py │ │ │ │ ├── graph_length.py │ │ │ │ ├── hba.py │ │ │ │ ├── hbd.py │ │ │ │ ├── mol_weight.py │ │ │ │ ├── num_aliphatic_rings.py │ │ │ │ ├── num_aromatic_rings.py │ │ │ │ ├── num_rings.py │ │ │ │ ├── number_of_stereo_centers.py │ │ │ │ ├── rot_bonds.py │ │ │ │ ├── slogp.py │ │ │ │ └── tpsa.py │ │ │ ├── pip │ │ │ │ ├── __init__.py │ │ │ │ ├── base_pip_model_batching_component.py │ │ │ │ ├── base_pip_model_component.py │ │ │ │ ├── base_rest_component.py │ │ │ │ ├── pip_log_prediction_component.py │ │ │ │ ├── pip_prediction_component.py │ │ │ │ ├── qptuna_pip_model_component.py │ │ │ │ ├── ratpk_pip.py │ │ │ │ └── string_pip_prediction_component.py │ │ │ ├── rest │ │ │ │ ├── __init__.py │ │ │ │ └── general_rest_component.py │ │ │ ├── rocs │ │ │ │ ├── __init__.py │ │ │ │ ├── base_rocs_component.py │ │ │ │ ├── default_values.py │ │ │ │ ├── oefuncs.py │ │ │ │ ├── oehelper.py │ │ │ │ ├── parallel_rocs_similarity.py │ │ │ │ └── rocs_similarity.py │ │ │ ├── scaffold_hopping │ │ │ │ ├── RuSH.py │ │ │ │ ├── __init__.py │ │ │ │ └── obtain_scaffold.py │ │ │ ├── score_component_factory.py │ │ │ ├── standard │ │ │ │ ├── __init__.py │ │ │ │ ├── custom_alerts_component.py │ │ │ │ ├── jaccard_distance.py │ │ │ │ ├── matching_substructure.py │ │ │ │ ├── predictive_property_component.py │ │ │ │ ├── qed_score.py │ │ │ │ ├── selectivity_component.py │ │ │ │ └── tanimoto_similarity.py │ │ │ ├── structural │ │ │ │ ├── __init__.py │ │ │ │ ├── azdock.py │ │ │ │ ├── base_structural_component.py │ │ │ │ └── dockstream.py │ │ │ └── synthetic_accessibility │ │ │ │ ├── __init__.py │ │ │ │ ├── fpscores.pkl.gz │ │ │ │ ├── sas_component.py │ │ │ │ └── sascorer.py │ │ │ ├── score_summary.py │ │ │ ├── score_transformations.py │ │ │ ├── scoring_function_factory.py │ │ │ ├── scoring_function_parameters.py │ │ │ └── utils.py │ └── REINVENT4 │ │ └── reinvent_plugins │ │ ├── components │ │ └── comp_RuSHscore.py │ │ └── decorators.py ├── scripts │ ├── RuSH.py │ ├── calc_SC_RDKit.py │ └── scaffoldfinder.py └── using_scaffoldfinder.ipynb ├── init_setup.py ├── rush.yml └── setup.py /Abstract.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/molML/RUSH/8cbfac695e001bfc2a05b4aaaa58e11f367b79c0/Abstract.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2024, Luke Rossen 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /RUSH.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: RUSH 3 | Version: 0.1 4 | -------------------------------------------------------------------------------- /RUSH.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | setup.py 2 | RUSH.egg-info/PKG-INFO 3 | RUSH.egg-info/SOURCES.txt 4 | RUSH.egg-info/dependency_links.txt 5 | RUSH.egg-info/top_level.txt -------------------------------------------------------------------------------- /RUSH.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /RUSH.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /RUSH/data/DL_data/aha006.sdf: -------------------------------------------------------------------------------- 1 | AHA006 2 | PyMOL2.5 3D 0 3 | 4 | 41 45 0 0 0 0 0 0 0 0999 V2000 5 | 11.2880 23.4910 5.8630 N 0 0 0 0 0 0 0 0 0 0 0 0 6 | 11.7520 21.9940 6.4090 S 0 0 0 0 0 0 0 0 0 0 0 0 7 | 13.3390 22.0510 6.1430 N 0 0 0 0 0 0 0 0 0 0 0 0 8 | 13.7180 21.8820 4.7150 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 13.8660 23.2220 4.0190 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 12.9640 24.3140 4.5500 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 11.4700 24.0310 4.5340 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 10.0970 23.9500 6.4360 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 9.7590 25.1350 7.2730 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 8.5260 25.7840 7.1410 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | 8.2300 26.8970 7.9290 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 9.1850 27.3780 8.8690 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 10.4310 26.7390 9.0090 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 10.7160 25.6130 8.2060 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 11.1950 20.9190 5.6210 O 0 0 0 0 0 0 0 0 0 0 0 0 20 | 14.7620 20.0210 3.5210 O 0 0 0 0 0 0 0 0 0 0 0 0 21 | 15.7580 19.1500 3.2110 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | 16.6850 18.7520 4.1980 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 17.7110 17.8480 3.8820 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 17.8040 17.3340 2.5690 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 16.8800 17.7260 1.5850 C 0 0 0 0 0 0 0 0 0 0 0 0 26 | 15.8660 18.6320 1.9120 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 14.1990 22.6960 7.1600 C 0 0 0 0 0 0 0 0 0 0 0 0 28 | 15.1850 21.8560 7.9250 C 0 0 0 0 0 0 0 0 0 0 0 0 29 | 14.7480 20.9690 8.9400 C 0 0 0 0 0 0 0 0 0 0 0 0 30 | 15.6870 20.1700 9.6510 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | 17.0610 20.2600 9.3460 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | 17.4950 21.1470 8.3360 C 0 0 0 0 0 0 0 0 0 0 0 0 33 | 16.5580 21.9380 7.6310 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | 15.1850 23.6960 4.1710 O 0 0 0 0 0 0 0 0 0 0 0 0 35 | 13.2820 25.4920 3.8480 O 0 0 0 0 0 0 0 0 0 0 0 0 36 | 10.8120 23.1390 3.4660 C 0 0 0 0 0 0 0 0 0 0 0 0 37 | 9.3880 23.2350 3.7130 O 0 0 0 0 0 0 0 0 0 0 0 0 38 | 8.6380 24.0960 2.9990 C 0 0 0 0 0 0 0 0 0 0 0 0 39 | 9.1860 25.2970 2.5060 C 0 0 0 0 0 0 0 0 0 0 0 0 40 | 8.4120 26.2030 1.7670 C 0 0 0 0 0 0 0 0 0 0 0 0 41 | 7.0650 25.9140 1.5160 C 0 0 0 0 0 0 0 0 0 0 0 0 42 | 6.5070 24.7220 2.0070 C 0 0 0 0 0 0 0 0 0 0 0 0 43 | 7.2990 23.8160 2.7450 C 0 0 0 0 0 0 0 0 0 0 0 0 44 | 11.5630 21.9660 7.8210 O 0 0 0 0 0 0 0 0 0 0 0 0 45 | 14.9800 21.0170 4.5480 C 0 0 0 0 0 0 0 0 0 0 0 0 46 | 1 2 1 0 0 0 0 47 | 1 7 1 0 0 0 0 48 | 1 8 1 0 0 0 0 49 | 2 3 1 0 0 0 0 50 | 2 15 2 0 0 0 0 51 | 2 40 2 0 0 0 0 52 | 3 4 1 0 0 0 0 53 | 3 23 1 0 0 0 0 54 | 4 5 1 0 0 0 0 55 | 4 41 1 0 0 0 0 56 | 5 6 1 0 0 0 0 57 | 5 30 1 0 0 0 0 58 | 6 7 1 0 0 0 0 59 | 6 31 1 0 0 0 0 60 | 7 32 1 0 0 0 0 61 | 8 9 1 0 0 0 0 62 | 9 10 4 0 0 0 0 63 | 9 14 4 0 0 0 0 64 | 10 11 4 0 0 0 0 65 | 11 12 4 0 0 0 0 66 | 12 13 4 0 0 0 0 67 | 13 14 4 0 0 0 0 68 | 16 17 1 0 0 0 0 69 | 16 41 1 0 0 0 0 70 | 17 18 4 0 0 0 0 71 | 17 22 4 0 0 0 0 72 | 18 19 4 0 0 0 0 73 | 19 20 4 0 0 0 0 74 | 20 21 4 0 0 0 0 75 | 21 22 4 0 0 0 0 76 | 23 24 1 0 0 0 0 77 | 24 25 4 0 0 0 0 78 | 24 29 4 0 0 0 0 79 | 25 26 4 0 0 0 0 80 | 26 27 4 0 0 0 0 81 | 27 28 4 0 0 0 0 82 | 28 29 4 0 0 0 0 83 | 32 33 1 0 0 0 0 84 | 33 34 1 0 0 0 0 85 | 34 35 4 0 0 0 0 86 | 34 39 4 0 0 0 0 87 | 35 36 4 0 0 0 0 88 | 36 37 4 0 0 0 0 89 | 37 38 4 0 0 0 0 90 | 38 39 4 0 0 0 0 91 | M END 92 | $$$$ 93 | -------------------------------------------------------------------------------- /RUSH/data/DL_data/aha006_fragments.txt: -------------------------------------------------------------------------------- 1 | *Oc1ccccc1.*c1ccccc1 8.033096583366587 1.6459199064314043 -------------------------------------------------------------------------------- /RUSH/data/DL_data/aha006_phenyl.sdf: -------------------------------------------------------------------------------- 1 | AHA006 2 | PyMOL2.5 3D 0 3 | 4 | 6 6 0 0 0 0 0 0 0 0999 V2000 5 | 9.7590 25.1350 7.2730 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 8.5260 25.7840 7.1410 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 8.2300 26.8970 7.9290 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 9.1850 27.3780 8.8690 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 10.4310 26.7390 9.0090 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 10.7160 25.6130 8.2060 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 1 2 4 0 0 0 0 12 | 1 6 4 0 0 0 0 13 | 2 3 4 0 0 0 0 14 | 3 4 4 0 0 0 0 15 | 4 5 4 0 0 0 0 16 | 5 6 4 0 0 0 0 17 | M END 18 | $$$$ 19 | -------------------------------------------------------------------------------- /RUSH/data/DL_data/aha006_phenylether.sdf: -------------------------------------------------------------------------------- 1 | AHA006 2 | PyMOL2.5 3D 0 3 | 4 | 7 7 0 0 0 0 0 0 0 0999 V2000 5 | 14.7620 20.0210 3.5210 O 0 0 0 0 0 0 0 0 0 0 0 0 6 | 15.7580 19.1500 3.2110 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 16.6850 18.7520 4.1980 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 17.7110 17.8480 3.8820 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 17.8040 17.3340 2.5690 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 16.8800 17.7260 1.5850 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 15.8660 18.6320 1.9120 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 1 2 1 0 0 0 0 13 | 2 3 4 0 0 0 0 14 | 2 7 4 0 0 0 0 15 | 3 4 4 0 0 0 0 16 | 4 5 4 0 0 0 0 17 | 5 6 4 0 0 0 0 18 | 6 7 4 0 0 0 0 19 | M END 20 | $$$$ 21 | -------------------------------------------------------------------------------- /RUSH/data/DL_data/lre001.sdf: -------------------------------------------------------------------------------- 1 | LRE1 2 | PyMOL2.5 3D 0 3 | 4 | 18 20 0 0 0 0 0 0 0 0999 V2000 5 | 13.0630 26.5770 -0.9170 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 13.6610 25.9090 -2.2010 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 14.5810 26.4670 -2.9900 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 14.8640 25.5960 -4.0130 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 14.1510 24.4780 -4.1160 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 12.1820 28.6200 -1.8670 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 11.6800 29.8890 -1.1200 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 13.1670 29.4970 -1.2320 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 9.5780 23.5120 -1.2230 N 0 0 0 0 0 0 0 0 0 0 0 0 14 | 11.8870 27.3540 -1.2210 N 0 0 0 0 0 0 0 0 0 0 0 0 15 | 13.1010 24.4330 -2.7710 S 0 0 0 0 0 0 0 0 0 0 0 0 16 | 6.9640 27.2280 -2.9960 Cl 0 0 0 0 0 0 0 0 0 0 0 0 17 | 8.4800 25.3500 -1.9960 N 0 0 0 0 0 0 0 0 0 0 0 0 18 | 9.5630 24.7990 -1.4440 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 10.6290 25.4680 -1.1020 N 0 0 0 0 0 0 0 0 0 0 0 0 20 | 10.7390 26.7500 -1.4190 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 9.6280 27.3530 -2.0350 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | 8.4930 26.6390 -2.3120 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 1 2 1 0 0 0 0 24 | 1 10 1 0 0 0 0 25 | 2 3 4 0 0 0 0 26 | 2 11 4 0 0 0 0 27 | 3 4 4 0 0 0 0 28 | 4 5 4 0 0 0 0 29 | 5 11 4 0 0 0 0 30 | 6 7 1 0 0 0 0 31 | 6 8 1 0 0 0 0 32 | 6 10 1 0 0 0 0 33 | 7 8 1 0 0 0 0 34 | 9 14 1 0 0 0 0 35 | 10 16 1 0 0 0 0 36 | 12 18 1 0 0 0 0 37 | 13 14 4 0 0 0 0 38 | 13 18 4 0 0 0 0 39 | 14 15 4 0 0 0 0 40 | 15 16 4 0 0 0 0 41 | 16 17 4 0 0 0 0 42 | 17 18 4 0 0 0 0 43 | M END 44 | $$$$ 45 | -------------------------------------------------------------------------------- /RUSH/data/DL_data/lre001_fragments.txt: -------------------------------------------------------------------------------- 1 | *c1cc(Cl)nc(N)n1.*c1cccs1 3.1258485895501416 1.4150239454307496 -------------------------------------------------------------------------------- /RUSH/data/DL_data/lre001_pyrimidine.sdf: -------------------------------------------------------------------------------- 1 | LRE1 2 | PyMOL2.5 3D 0 3 | 4 | 8 8 0 0 0 0 0 0 0 0999 V2000 5 | 9.5780 23.5120 -1.2230 N 0 0 0 0 0 0 0 0 0 0 0 0 6 | 6.9640 27.2280 -2.9960 Cl 0 0 0 0 0 0 0 0 0 0 0 0 7 | 8.4800 25.3500 -1.9960 N 0 0 0 0 0 0 0 0 0 0 0 0 8 | 9.5630 24.7990 -1.4440 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 10.6290 25.4680 -1.1020 N 0 0 0 0 0 0 0 0 0 0 0 0 10 | 10.7390 26.7500 -1.4190 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 9.6280 27.3530 -2.0350 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 8.4930 26.6390 -2.3120 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 1 4 1 0 0 0 0 14 | 2 8 1 0 0 0 0 15 | 3 4 4 0 0 0 0 16 | 3 8 4 0 0 0 0 17 | 4 5 4 0 0 0 0 18 | 5 6 4 0 0 0 0 19 | 6 7 4 0 0 0 0 20 | 7 8 4 0 0 0 0 21 | M END 22 | $$$$ 23 | -------------------------------------------------------------------------------- /RUSH/data/DL_data/lre001_thiophene.sdf: -------------------------------------------------------------------------------- 1 | LRE1 2 | PyMOL2.5 3D 0 3 | 4 | 5 5 0 0 0 0 0 0 0 0999 V2000 5 | 13.6610 25.9090 -2.2010 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 14.5810 26.4670 -2.9900 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 14.8640 25.5960 -4.0130 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 14.1510 24.4780 -4.1160 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 13.1010 24.4330 -2.7710 S 0 0 0 0 0 0 0 0 0 0 0 0 10 | 1 2 4 0 0 0 0 11 | 1 5 4 0 0 0 0 12 | 2 3 4 0 0 0 0 13 | 3 4 4 0 0 0 0 14 | 4 5 4 0 0 0 0 15 | M END 16 | $$$$ 17 | -------------------------------------------------------------------------------- /RUSH/data/DL_data/pim447.sdf: -------------------------------------------------------------------------------- 1 | PIM447 2 | PyMOL2.5 3D 0 3 | 4 | 55 58 0 0 0 0 0 0 0 0999 V2000 5 | -40.5840 -5.4540 5.8860 N 0 0 0 0 0 0 0 0 0 0 0 0 6 | -41.0440 -0.5240 5.2440 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | -40.5010 -2.8530 -2.0760 O 0 0 0 0 0 0 0 0 0 0 0 0 8 | -40.9660 -1.8740 4.5420 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | -36.7290 -3.2430 -0.2500 N 0 0 0 0 0 0 0 0 0 0 0 0 10 | -40.8620 -3.0170 5.5460 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | -40.3140 -2.4310 0.1650 N 0 0 0 0 0 0 0 0 0 0 0 0 12 | -40.7410 -4.4010 4.8800 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | -42.9350 -2.0310 0.3670 N 0 0 0 0 0 0 0 0 0 0 0 0 14 | -39.5270 -4.3670 3.9490 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | -39.7190 -3.2860 2.8850 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | -39.7900 -1.9210 3.5720 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | -38.6900 -3.3130 1.7630 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | -37.3800 -3.7140 2.0020 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | -36.4530 -3.6660 0.9820 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | -37.9850 -2.8640 -0.4850 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | -39.0090 -2.8770 0.4690 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | -40.9940 -2.5110 -1.0020 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | -42.4570 -2.2000 -0.8760 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | -43.2690 -2.2150 -2.0050 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | -44.6380 -2.1050 -1.8270 C 0 0 0 0 0 0 0 0 0 0 0 0 26 | -45.1360 -1.9630 -0.5540 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | -44.2610 -1.9200 0.5320 C 0 0 0 0 0 0 0 0 0 0 0 0 28 | -44.7350 -1.7710 1.9400 C 0 0 0 0 0 0 0 0 0 0 0 0 29 | -45.6500 -0.7960 2.3210 C 0 0 0 0 0 0 0 0 0 0 0 0 30 | -46.1530 -0.6720 3.5920 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | -45.7270 -1.5680 4.5600 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | -44.8000 -2.5500 4.2510 C 0 0 0 0 0 0 0 0 0 0 0 0 33 | -44.3300 -2.6200 2.9600 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | -46.4730 -1.8840 -0.3750 F 0 0 0 0 0 0 0 0 0 0 0 0 35 | -46.0540 0.0830 1.3780 F 0 0 0 0 0 0 0 0 0 0 0 0 36 | -43.3970 -3.5570 2.6630 F 0 0 0 0 0 0 0 0 0 0 0 0 37 | -41.8870 -1.9970 3.9780 H 0 0 0 0 0 0 0 0 0 0 0 0 38 | -41.6400 -4.6340 4.3120 H 0 0 0 0 0 0 0 0 0 0 0 0 39 | -40.7120 -3.4420 2.4680 H 0 0 0 0 0 0 0 0 0 0 0 0 40 | -41.8320 -0.4870 5.9940 H 0 0 0 0 0 0 0 0 0 0 0 0 41 | -40.1080 -0.2940 5.7520 H 0 0 0 0 0 0 0 0 0 0 0 0 42 | -41.2260 0.2870 4.5410 H 0 0 0 0 0 0 0 0 0 0 0 0 43 | -40.0080 -2.8340 6.1960 H 0 0 0 0 0 0 0 0 0 0 0 0 44 | -41.7250 -3.0030 6.2110 H 0 0 0 0 0 0 0 0 0 0 0 0 45 | -39.6660 -5.4790 6.3190 H 0 0 0 0 0 0 0 0 0 0 0 0 46 | -41.3080 -5.4470 6.5950 H 0 0 0 0 0 0 0 0 0 0 0 0 47 | -38.6310 -4.1750 4.5360 H 0 0 0 0 0 0 0 0 0 0 0 0 48 | -39.3470 -5.3320 3.4750 H 0 0 0 0 0 0 0 0 0 0 0 0 49 | -39.8790 -1.1220 2.8380 H 0 0 0 0 0 0 0 0 0 0 0 0 50 | -38.8660 -1.7220 4.1120 H 0 0 0 0 0 0 0 0 0 0 0 0 51 | -37.0450 -4.0510 2.9820 H 0 0 0 0 0 0 0 0 0 0 0 0 52 | -35.4380 -4.0280 1.1300 H 0 0 0 0 0 0 0 0 0 0 0 0 53 | -38.1510 -2.5110 -1.5010 H 0 0 0 0 0 0 0 0 0 0 0 0 54 | -40.7530 -1.9130 0.9180 H 0 0 0 0 0 0 0 0 0 0 0 0 55 | -42.8590 -2.2960 -3.0100 H 0 0 0 0 0 0 0 0 0 0 0 0 56 | -45.3160 -2.1640 -2.6740 H 0 0 0 0 0 0 0 0 0 0 0 0 57 | -46.8660 0.1130 3.8370 H 0 0 0 0 0 0 0 0 0 0 0 0 58 | -46.1300 -1.5080 5.5700 H 0 0 0 0 0 0 0 0 0 0 0 0 59 | -44.4590 -3.2480 5.0130 H 0 0 0 0 0 0 0 0 0 0 0 0 60 | 1 8 1 0 0 0 0 61 | 1 41 1 0 0 0 0 62 | 1 42 1 0 0 0 0 63 | 2 4 1 0 0 0 0 64 | 2 36 1 0 0 0 0 65 | 2 37 1 0 0 0 0 66 | 2 38 1 0 0 0 0 67 | 3 18 2 0 0 0 0 68 | 4 6 1 0 0 0 0 69 | 4 12 1 0 0 0 0 70 | 4 33 1 0 0 0 0 71 | 5 15 4 0 0 0 0 72 | 5 16 4 0 0 0 0 73 | 6 8 1 0 0 0 0 74 | 6 39 1 0 0 0 0 75 | 6 40 1 0 0 0 0 76 | 7 17 1 0 0 0 0 77 | 7 18 1 0 0 0 0 78 | 7 50 1 0 0 0 0 79 | 8 10 1 0 0 0 0 80 | 8 34 1 0 0 0 0 81 | 9 19 4 0 0 0 0 82 | 9 23 4 0 0 0 0 83 | 10 11 1 0 0 0 0 84 | 10 43 1 0 0 0 0 85 | 10 44 1 0 0 0 0 86 | 11 12 1 0 0 0 0 87 | 11 13 1 0 0 0 0 88 | 11 35 1 0 0 0 0 89 | 12 45 1 0 0 0 0 90 | 12 46 1 0 0 0 0 91 | 13 14 4 0 0 0 0 92 | 13 17 4 0 0 0 0 93 | 14 15 4 0 0 0 0 94 | 14 47 1 0 0 0 0 95 | 15 48 1 0 0 0 0 96 | 16 17 4 0 0 0 0 97 | 16 49 1 0 0 0 0 98 | 18 19 1 0 0 0 0 99 | 19 20 4 0 0 0 0 100 | 20 21 4 0 0 0 0 101 | 20 51 1 0 0 0 0 102 | 21 22 4 0 0 0 0 103 | 21 52 1 0 0 0 0 104 | 22 23 4 0 0 0 0 105 | 22 30 1 0 0 0 0 106 | 23 24 1 0 0 0 0 107 | 24 25 4 0 0 0 0 108 | 24 29 4 0 0 0 0 109 | 25 26 4 0 0 0 0 110 | 25 31 1 0 0 0 0 111 | 26 27 4 0 0 0 0 112 | 26 53 1 0 0 0 0 113 | 27 28 4 0 0 0 0 114 | 27 54 1 0 0 0 0 115 | 28 29 4 0 0 0 0 116 | 28 55 1 0 0 0 0 117 | 29 32 1 0 0 0 0 118 | M END 119 | $$$$ 120 | -------------------------------------------------------------------------------- /RUSH/data/DL_data/pim447_cyclohexyl.sdf: -------------------------------------------------------------------------------- 1 | PIM447 2 | PyMOL2.5 3D 0 3 | 4 | 14 15 0 0 0 0 0 0 0 0999 V2000 5 | -40.5840 -5.4540 5.8860 N 0 0 0 0 0 0 0 0 0 0 0 0 6 | -41.0440 -0.5240 5.2440 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | -40.9660 -1.8740 4.5420 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | -36.7290 -3.2430 -0.2500 N 0 0 0 0 0 0 0 0 0 0 0 0 9 | -40.8620 -3.0170 5.5460 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | -40.7410 -4.4010 4.8800 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | -39.5270 -4.3670 3.9490 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | -39.7190 -3.2860 2.8850 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | -39.7900 -1.9210 3.5720 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | -38.6900 -3.3130 1.7630 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | -37.3800 -3.7140 2.0020 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | -36.4530 -3.6660 0.9820 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | -37.9850 -2.8640 -0.4850 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | -39.0090 -2.8770 0.4690 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 1 6 1 0 0 0 0 20 | 2 3 1 0 0 0 0 21 | 3 5 1 0 0 0 0 22 | 3 9 1 0 0 0 0 23 | 4 12 4 0 0 0 0 24 | 4 13 4 0 0 0 0 25 | 5 6 1 0 0 0 0 26 | 6 7 1 0 0 0 0 27 | 7 8 1 0 0 0 0 28 | 8 9 1 0 0 0 0 29 | 8 10 1 0 0 0 0 30 | 10 11 4 0 0 0 0 31 | 10 14 4 0 0 0 0 32 | 11 12 4 0 0 0 0 33 | 13 14 4 0 0 0 0 34 | M END 35 | $$$$ 36 | -------------------------------------------------------------------------------- /RUSH/data/DL_data/pim447_fluorophenyl.sdf: -------------------------------------------------------------------------------- 1 | PIM447 2 | PyMOL2.5 3D 0 3 | 4 | 8 8 0 0 0 0 0 0 0 0999 V2000 5 | -44.7350 -1.7710 1.9400 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | -45.6500 -0.7960 2.3210 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | -46.1530 -0.6720 3.5920 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | -45.7270 -1.5680 4.5600 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | -44.8000 -2.5500 4.2510 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | -44.3300 -2.6200 2.9600 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | -46.0540 0.0830 1.3780 F 0 0 0 0 0 0 0 0 0 0 0 0 12 | -43.3970 -3.5570 2.6630 F 0 0 0 0 0 0 0 0 0 0 0 0 13 | 1 2 4 0 0 0 0 14 | 1 6 4 0 0 0 0 15 | 2 3 4 0 0 0 0 16 | 2 7 1 0 0 0 0 17 | 3 4 4 0 0 0 0 18 | 4 5 4 0 0 0 0 19 | 5 6 4 0 0 0 0 20 | 6 8 1 0 0 0 0 21 | M END 22 | $$$$ 23 | -------------------------------------------------------------------------------- /RUSH/data/DL_data/pim447_fragments.txt: -------------------------------------------------------------------------------- 1 | *c1c(F)cccc1F.*c1cnccc1C1CC(C)CC(N)C1 5.9971151223578865 1.7318860611960427 -------------------------------------------------------------------------------- /RUSH/data/DL_data/sr3737.sdf: -------------------------------------------------------------------------------- 1 | SR3737 2 | PyMOL2.5 3D 0 3 | 4 | 38 42 0 0 0 0 0 0 0 0999 V2000 5 | -38.4350 -37.7620 -17.5380 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | -40.2900 -36.2520 -11.2990 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | -39.7450 -39.4790 -14.3560 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | -25.8270 -39.3590 -6.6540 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | -26.9140 -39.4950 -7.5680 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | -31.9370 -31.7160 -13.3660 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | -24.9510 -38.2560 -6.7010 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | -33.1510 -32.1330 -14.0230 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | -31.4150 -32.4640 -12.2790 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | -27.1030 -38.5160 -8.5500 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | -27.8890 -34.9570 -11.2350 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | -29.1090 -34.4500 -11.5340 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | -31.2930 -35.5830 -8.9320 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | -36.8610 -36.1110 -14.9630 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | -38.2400 -35.4400 -13.0210 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | -33.3090 -34.0270 -12.5320 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | -28.8270 -36.0060 -9.2150 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | -35.0510 -33.8380 -14.2330 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | -32.1900 -34.8750 -9.6820 N 0 0 0 0 0 0 0 0 0 0 0 0 24 | -36.0530 -34.0000 -13.3410 N 0 0 0 0 0 0 0 0 0 0 0 0 25 | -26.5760 -36.4090 -9.5350 N 0 0 0 0 0 0 0 0 0 0 0 0 26 | -35.1270 -33.9320 -15.4050 O 0 0 0 0 0 0 0 0 0 0 0 0 27 | -37.5470 -37.8430 -16.4630 O 0 0 0 0 0 0 0 0 0 0 0 0 28 | -40.3470 -36.5940 -12.6540 O 0 0 0 0 0 0 0 0 0 0 0 0 29 | -39.8680 -38.2160 -14.9430 O 0 0 0 0 0 0 0 0 0 0 0 0 30 | -27.7630 -35.7560 -10.0170 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | -37.1160 -35.2540 -13.7750 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | -25.1330 -37.2990 -7.6430 C 0 0 0 0 0 0 0 0 0 0 0 0 33 | -33.8070 -33.2890 -13.5690 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | -32.0680 -33.5720 -11.8420 C 0 0 0 0 0 0 0 0 0 0 0 0 35 | -26.2300 -37.4530 -8.6090 C 0 0 0 0 0 0 0 0 0 0 0 0 36 | -37.7770 -37.0600 -15.3330 C 0 0 0 0 0 0 0 0 0 0 0 0 37 | -39.1920 -36.4130 -13.4160 C 0 0 0 0 0 0 0 0 0 0 0 0 38 | -38.9540 -37.2320 -14.5800 C 0 0 0 0 0 0 0 0 0 0 0 0 39 | -30.0920 -35.4870 -9.5420 C 0 0 0 0 0 0 0 0 0 0 0 0 40 | -30.2270 -34.7160 -10.6870 C 0 0 0 0 0 0 0 0 0 0 0 0 41 | -31.5100 -34.3530 -10.7610 N 0 0 0 0 0 0 0 0 0 0 0 0 42 | -24.2980 -36.2220 -7.7080 F 0 0 0 0 0 0 0 0 0 0 0 0 43 | 1 23 1 0 0 0 0 44 | 2 24 1 0 0 0 0 45 | 3 25 1 0 0 0 0 46 | 4 5 4 0 0 0 0 47 | 4 7 4 0 0 0 0 48 | 5 10 4 0 0 0 0 49 | 6 8 4 0 0 0 0 50 | 6 9 4 0 0 0 0 51 | 7 28 4 0 0 0 0 52 | 8 29 4 0 0 0 0 53 | 9 30 4 0 0 0 0 54 | 10 31 4 0 0 0 0 55 | 11 12 4 0 0 0 0 56 | 11 26 4 0 0 0 0 57 | 12 36 4 0 0 0 0 58 | 13 19 4 0 0 0 0 59 | 13 35 4 0 0 0 0 60 | 14 27 4 0 0 0 0 61 | 14 32 4 0 0 0 0 62 | 15 27 4 0 0 0 0 63 | 15 33 4 0 0 0 0 64 | 16 29 4 0 0 0 0 65 | 16 30 4 0 0 0 0 66 | 17 26 4 0 0 0 0 67 | 17 35 4 0 0 0 0 68 | 18 20 1 0 0 0 0 69 | 18 22 2 0 0 0 0 70 | 18 29 1 0 0 0 0 71 | 19 37 4 0 0 0 0 72 | 20 27 1 0 0 0 0 73 | 21 26 1 0 0 0 0 74 | 21 31 1 0 0 0 0 75 | 23 32 1 0 0 0 0 76 | 24 33 1 0 0 0 0 77 | 25 34 1 0 0 0 0 78 | 28 31 4 0 0 0 0 79 | 28 38 1 0 0 0 0 80 | 30 37 1 0 0 0 0 81 | 32 34 4 0 0 0 0 82 | 33 34 4 0 0 0 0 83 | 35 36 4 0 0 0 0 84 | 36 37 4 0 0 0 0 85 | M END 86 | $$$$ 87 | -------------------------------------------------------------------------------- /RUSH/data/DL_data/sr3737_fluorophenyl.sdf: -------------------------------------------------------------------------------- 1 | SR3737 2 | PyMOL2.5 3D 0 3 | 4 | 8 8 0 0 0 0 0 0 0 0999 V2000 5 | -25.8270 -39.3590 -6.6540 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | -26.9140 -39.4950 -7.5680 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | -24.9510 -38.2560 -6.7010 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | -27.1030 -38.5160 -8.5500 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | -26.5760 -36.4090 -9.5350 N 0 0 0 0 0 0 0 0 0 0 0 0 10 | -25.1330 -37.2990 -7.6430 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | -26.2300 -37.4530 -8.6090 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | -24.2980 -36.2220 -7.7080 F 0 0 0 0 0 0 0 0 0 0 0 0 13 | 1 2 4 0 0 0 0 14 | 1 3 4 0 0 0 0 15 | 2 4 4 0 0 0 0 16 | 3 6 4 0 0 0 0 17 | 4 7 4 0 0 0 0 18 | 5 7 1 0 0 0 0 19 | 6 7 4 0 0 0 0 20 | 6 8 1 0 0 0 0 21 | M END 22 | $$$$ 23 | -------------------------------------------------------------------------------- /RUSH/data/DL_data/sr3737_fragments.txt: -------------------------------------------------------------------------------- 1 | *Nc1ccccc1F.*c1cccc(C(=O)Nc2cc(OC)c(OC)c(OC)c2)c1 6.60704744165113 1.9142153584249308 -------------------------------------------------------------------------------- /RUSH/data/DL_data/sr3737_trimethoxyphenyl.sdf: -------------------------------------------------------------------------------- 1 | SR3737 2 | PyMOL2.5 3D 0 3 | 4 | 21 22 0 0 0 0 0 0 0 0999 V2000 5 | -38.4350 -37.7620 -17.5380 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | -40.2900 -36.2520 -11.2990 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | -39.7450 -39.4790 -14.3560 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | -31.9370 -31.7160 -13.3660 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | -33.1510 -32.1330 -14.0230 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | -31.4150 -32.4640 -12.2790 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | -36.8610 -36.1110 -14.9630 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | -38.2400 -35.4400 -13.0210 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | -33.3090 -34.0270 -12.5320 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | -35.0510 -33.8380 -14.2330 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | -36.0530 -34.0000 -13.3410 N 0 0 0 0 0 0 0 0 0 0 0 0 16 | -35.1270 -33.9320 -15.4050 O 0 0 0 0 0 0 0 0 0 0 0 0 17 | -37.5470 -37.8430 -16.4630 O 0 0 0 0 0 0 0 0 0 0 0 0 18 | -40.3470 -36.5940 -12.6540 O 0 0 0 0 0 0 0 0 0 0 0 0 19 | -39.8680 -38.2160 -14.9430 O 0 0 0 0 0 0 0 0 0 0 0 0 20 | -37.1160 -35.2540 -13.7750 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | -33.8070 -33.2890 -13.5690 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | -32.0680 -33.5720 -11.8420 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | -37.7770 -37.0600 -15.3330 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | -39.1920 -36.4130 -13.4160 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | -38.9540 -37.2320 -14.5800 C 0 0 0 0 0 0 0 0 0 0 0 0 26 | 1 13 1 0 0 0 0 27 | 2 14 1 0 0 0 0 28 | 3 15 1 0 0 0 0 29 | 4 5 4 0 0 0 0 30 | 4 6 4 0 0 0 0 31 | 5 17 4 0 0 0 0 32 | 6 18 4 0 0 0 0 33 | 7 16 4 0 0 0 0 34 | 7 19 4 0 0 0 0 35 | 8 16 4 0 0 0 0 36 | 8 20 4 0 0 0 0 37 | 9 17 4 0 0 0 0 38 | 9 18 4 0 0 0 0 39 | 10 11 1 0 0 0 0 40 | 10 12 2 0 0 0 0 41 | 10 17 1 0 0 0 0 42 | 11 16 1 0 0 0 0 43 | 13 19 1 0 0 0 0 44 | 14 20 1 0 0 0 0 45 | 15 21 1 0 0 0 0 46 | 19 21 4 0 0 0 0 47 | 20 21 4 0 0 0 0 48 | M END 49 | $$$$ 50 | -------------------------------------------------------------------------------- /RUSH/data/PDB_structures/aha006.sdf: -------------------------------------------------------------------------------- 1 | AHA006 2 | PyMOL2.5 3D 0 3 | 4 | 41 45 0 0 0 0 0 0 0 0999 V2000 5 | 11.2880 23.4910 5.8630 N 0 0 0 0 0 0 0 0 0 0 0 0 6 | 11.7520 21.9940 6.4090 S 0 0 0 0 0 0 0 0 0 0 0 0 7 | 13.3390 22.0510 6.1430 N 0 0 0 0 0 0 0 0 0 0 0 0 8 | 13.7180 21.8820 4.7150 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 13.8660 23.2220 4.0190 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 12.9640 24.3140 4.5500 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 11.4700 24.0310 4.5340 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 10.0970 23.9500 6.4360 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 9.7590 25.1350 7.2730 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 8.5260 25.7840 7.1410 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | 8.2300 26.8970 7.9290 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 9.1850 27.3780 8.8690 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 10.4310 26.7390 9.0090 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 10.7160 25.6130 8.2060 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 11.1950 20.9190 5.6210 O 0 0 0 0 0 0 0 0 0 0 0 0 20 | 14.7620 20.0210 3.5210 O 0 0 0 0 0 0 0 0 0 0 0 0 21 | 15.7580 19.1500 3.2110 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | 16.6850 18.7520 4.1980 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 17.7110 17.8480 3.8820 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 17.8040 17.3340 2.5690 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 16.8800 17.7260 1.5850 C 0 0 0 0 0 0 0 0 0 0 0 0 26 | 15.8660 18.6320 1.9120 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 14.1990 22.6960 7.1600 C 0 0 0 0 0 0 0 0 0 0 0 0 28 | 15.1850 21.8560 7.9250 C 0 0 0 0 0 0 0 0 0 0 0 0 29 | 14.7480 20.9690 8.9400 C 0 0 0 0 0 0 0 0 0 0 0 0 30 | 15.6870 20.1700 9.6510 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | 17.0610 20.2600 9.3460 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | 17.4950 21.1470 8.3360 C 0 0 0 0 0 0 0 0 0 0 0 0 33 | 16.5580 21.9380 7.6310 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | 15.1850 23.6960 4.1710 O 0 0 0 0 0 0 0 0 0 0 0 0 35 | 13.2820 25.4920 3.8480 O 0 0 0 0 0 0 0 0 0 0 0 0 36 | 10.8120 23.1390 3.4660 C 0 0 0 0 0 0 0 0 0 0 0 0 37 | 9.3880 23.2350 3.7130 O 0 0 0 0 0 0 0 0 0 0 0 0 38 | 8.6380 24.0960 2.9990 C 0 0 0 0 0 0 0 0 0 0 0 0 39 | 9.1860 25.2970 2.5060 C 0 0 0 0 0 0 0 0 0 0 0 0 40 | 8.4120 26.2030 1.7670 C 0 0 0 0 0 0 0 0 0 0 0 0 41 | 7.0650 25.9140 1.5160 C 0 0 0 0 0 0 0 0 0 0 0 0 42 | 6.5070 24.7220 2.0070 C 0 0 0 0 0 0 0 0 0 0 0 0 43 | 7.2990 23.8160 2.7450 C 0 0 0 0 0 0 0 0 0 0 0 0 44 | 11.5630 21.9660 7.8210 O 0 0 0 0 0 0 0 0 0 0 0 0 45 | 14.9800 21.0170 4.5480 C 0 0 0 0 0 0 0 0 0 0 0 0 46 | 1 2 1 0 0 0 0 47 | 1 7 1 0 0 0 0 48 | 1 8 1 0 0 0 0 49 | 2 3 1 0 0 0 0 50 | 2 15 2 0 0 0 0 51 | 2 40 2 0 0 0 0 52 | 3 4 1 0 0 0 0 53 | 3 23 1 0 0 0 0 54 | 4 5 1 0 0 0 0 55 | 4 41 1 0 0 0 0 56 | 5 6 1 0 0 0 0 57 | 5 30 1 0 0 0 0 58 | 6 7 1 0 0 0 0 59 | 6 31 1 0 0 0 0 60 | 7 32 1 0 0 0 0 61 | 8 9 1 0 0 0 0 62 | 9 10 4 0 0 0 0 63 | 9 14 4 0 0 0 0 64 | 10 11 4 0 0 0 0 65 | 11 12 4 0 0 0 0 66 | 12 13 4 0 0 0 0 67 | 13 14 4 0 0 0 0 68 | 16 17 1 0 0 0 0 69 | 16 41 1 0 0 0 0 70 | 17 18 4 0 0 0 0 71 | 17 22 4 0 0 0 0 72 | 18 19 4 0 0 0 0 73 | 19 20 4 0 0 0 0 74 | 20 21 4 0 0 0 0 75 | 21 22 4 0 0 0 0 76 | 23 24 1 0 0 0 0 77 | 24 25 4 0 0 0 0 78 | 24 29 4 0 0 0 0 79 | 25 26 4 0 0 0 0 80 | 26 27 4 0 0 0 0 81 | 27 28 4 0 0 0 0 82 | 28 29 4 0 0 0 0 83 | 32 33 1 0 0 0 0 84 | 33 34 1 0 0 0 0 85 | 34 35 4 0 0 0 0 86 | 34 39 4 0 0 0 0 87 | 35 36 4 0 0 0 0 88 | 36 37 4 0 0 0 0 89 | 37 38 4 0 0 0 0 90 | 38 39 4 0 0 0 0 91 | M END 92 | $$$$ 93 | -------------------------------------------------------------------------------- /RUSH/data/PDB_structures/lre001.sdf: -------------------------------------------------------------------------------- 1 | LRE1 2 | PyMOL2.5 3D 0 3 | 4 | 18 20 0 0 0 0 0 0 0 0999 V2000 5 | 13.0630 26.5770 -0.9170 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 13.6610 25.9090 -2.2010 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 14.5810 26.4670 -2.9900 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 14.8640 25.5960 -4.0130 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 14.1510 24.4780 -4.1160 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 12.1820 28.6200 -1.8670 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 11.6800 29.8890 -1.1200 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 13.1670 29.4970 -1.2320 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 9.5780 23.5120 -1.2230 N 0 0 0 0 0 0 0 0 0 0 0 0 14 | 11.8870 27.3540 -1.2210 N 0 0 0 0 0 0 0 0 0 0 0 0 15 | 13.1010 24.4330 -2.7710 S 0 0 0 0 0 0 0 0 0 0 0 0 16 | 6.9640 27.2280 -2.9960 Cl 0 0 0 0 0 0 0 0 0 0 0 0 17 | 8.4800 25.3500 -1.9960 N 0 0 0 0 0 0 0 0 0 0 0 0 18 | 9.5630 24.7990 -1.4440 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 10.6290 25.4680 -1.1020 N 0 0 0 0 0 0 0 0 0 0 0 0 20 | 10.7390 26.7500 -1.4190 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 9.6280 27.3530 -2.0350 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | 8.4930 26.6390 -2.3120 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 1 10 1 0 0 0 0 24 | 1 2 1 0 0 0 0 25 | 2 3 4 0 0 0 0 26 | 3 4 4 0 0 0 0 27 | 4 5 4 0 0 0 0 28 | 5 11 4 0 0 0 0 29 | 6 7 1 0 0 0 0 30 | 6 8 1 0 0 0 0 31 | 7 8 1 0 0 0 0 32 | 6 10 1 0 0 0 0 33 | 10 16 1 0 0 0 0 34 | 2 11 4 0 0 0 0 35 | 13 14 4 0 0 0 0 36 | 9 14 1 0 0 0 0 37 | 14 15 4 0 0 0 0 38 | 15 16 4 0 0 0 0 39 | 16 17 4 0 0 0 0 40 | 17 18 4 0 0 0 0 41 | 12 18 1 0 0 0 0 42 | 13 18 4 0 0 0 0 43 | M END 44 | $$$$ 45 | -------------------------------------------------------------------------------- /RUSH/data/PDB_structures/sr3737.sdf: -------------------------------------------------------------------------------- 1 | SR3737 2 | PyMOL2.5 3D 0 3 | 4 | 38 42 0 0 0 0 0 0 0 0999 V2000 5 | -38.4350 -37.7620 -17.5380 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | -40.2900 -36.2520 -11.2990 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | -39.7450 -39.4790 -14.3560 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | -25.8270 -39.3590 -6.6540 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | -26.9140 -39.4950 -7.5680 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | -31.9370 -31.7160 -13.3660 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | -24.9510 -38.2560 -6.7010 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | -33.1510 -32.1330 -14.0230 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | -31.4150 -32.4640 -12.2790 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | -27.1030 -38.5160 -8.5500 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | -27.8890 -34.9570 -11.2350 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | -29.1090 -34.4500 -11.5340 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | -31.2930 -35.5830 -8.9320 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | -36.8610 -36.1110 -14.9630 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | -38.2400 -35.4400 -13.0210 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | -33.3090 -34.0270 -12.5320 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | -28.8270 -36.0060 -9.2150 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | -35.0510 -33.8380 -14.2330 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | -32.1900 -34.8750 -9.6820 N 0 0 0 0 0 0 0 0 0 0 0 0 24 | -36.0530 -34.0000 -13.3410 N 0 0 0 0 0 0 0 0 0 0 0 0 25 | -26.5760 -36.4090 -9.5350 N 0 0 0 0 0 0 0 0 0 0 0 0 26 | -35.1270 -33.9320 -15.4050 O 0 0 0 0 0 0 0 0 0 0 0 0 27 | -37.5470 -37.8430 -16.4630 O 0 0 0 0 0 0 0 0 0 0 0 0 28 | -40.3470 -36.5940 -12.6540 O 0 0 0 0 0 0 0 0 0 0 0 0 29 | -39.8680 -38.2160 -14.9430 O 0 0 0 0 0 0 0 0 0 0 0 0 30 | -27.7630 -35.7560 -10.0170 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | -37.1160 -35.2540 -13.7750 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | -25.1330 -37.2990 -7.6430 C 0 0 0 0 0 0 0 0 0 0 0 0 33 | -33.8070 -33.2890 -13.5690 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | -32.0680 -33.5720 -11.8420 C 0 0 0 0 0 0 0 0 0 0 0 0 35 | -26.2300 -37.4530 -8.6090 C 0 0 0 0 0 0 0 0 0 0 0 0 36 | -37.7770 -37.0600 -15.3330 C 0 0 0 0 0 0 0 0 0 0 0 0 37 | -39.1920 -36.4130 -13.4160 C 0 0 0 0 0 0 0 0 0 0 0 0 38 | -38.9540 -37.2320 -14.5800 C 0 0 0 0 0 0 0 0 0 0 0 0 39 | -30.0920 -35.4870 -9.5420 C 0 0 0 0 0 0 0 0 0 0 0 0 40 | -30.2270 -34.7160 -10.6870 C 0 0 0 0 0 0 0 0 0 0 0 0 41 | -31.5100 -34.3530 -10.7610 N 0 0 0 0 0 0 0 0 0 0 0 0 42 | -24.2980 -36.2220 -7.7080 F 0 0 0 0 0 0 0 0 0 0 0 0 43 | 4 5 4 0 0 0 0 44 | 6 8 4 0 0 0 0 45 | 4 7 4 0 0 0 0 46 | 6 9 4 0 0 0 0 47 | 5 10 4 0 0 0 0 48 | 11 12 4 0 0 0 0 49 | 12 36 4 0 0 0 0 50 | 13 19 4 0 0 0 0 51 | 13 35 4 0 0 0 0 52 | 14 27 4 0 0 0 0 53 | 14 32 4 0 0 0 0 54 | 15 33 4 0 0 0 0 55 | 16 29 4 0 0 0 0 56 | 16 30 4 0 0 0 0 57 | 18 20 1 0 0 0 0 58 | 18 22 2 0 0 0 0 59 | 18 29 1 0 0 0 0 60 | 1 23 1 0 0 0 0 61 | 2 24 1 0 0 0 0 62 | 3 25 1 0 0 0 0 63 | 11 26 4 0 0 0 0 64 | 17 26 4 0 0 0 0 65 | 21 26 1 0 0 0 0 66 | 15 27 4 0 0 0 0 67 | 20 27 1 0 0 0 0 68 | 7 28 4 0 0 0 0 69 | 8 29 4 0 0 0 0 70 | 9 30 4 0 0 0 0 71 | 10 31 4 0 0 0 0 72 | 21 31 1 0 0 0 0 73 | 28 31 4 0 0 0 0 74 | 23 32 1 0 0 0 0 75 | 24 33 1 0 0 0 0 76 | 33 34 4 0 0 0 0 77 | 25 34 1 0 0 0 0 78 | 32 34 4 0 0 0 0 79 | 17 35 4 0 0 0 0 80 | 35 36 4 0 0 0 0 81 | 36 37 4 0 0 0 0 82 | 19 37 4 0 0 0 0 83 | 30 37 1 0 0 0 0 84 | 28 38 1 0 0 0 0 85 | M END 86 | $$$$ 87 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring import * 2 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.fontSize": 12 3 | } -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.function import * 2 | from reinvent_scoring.scoring.enums import * 3 | from reinvent_scoring.scoring.score_components import * 4 | 5 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 6 | from reinvent_scoring.scoring.score_summary import FinalSummary, ComponentSummary, LoggableComponent 7 | from reinvent_scoring.scoring.score_transformations import TransformationFactory 8 | from reinvent_scoring.scoring.scoring_function_factory import ScoringFunctionFactory 9 | from reinvent_scoring.scoring.scoring_function_parameters import ScoringFunctionParameters 10 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/component_parameters.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class ComponentParameters: 6 | component_type: str 7 | name: str 8 | weight: float 9 | specific_parameters: dict = None 10 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/molML/RUSH/8cbfac695e001bfc2a05b4aaaa58e11f367b79c0/RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/configs/__init__.py -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/configs/config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | from pathlib import Path 5 | 6 | 7 | DEFAULT_BASE_CONFIG_PATH = (Path(__file__).parent / 'test_config.json').resolve() 8 | 9 | parser = argparse.ArgumentParser(description='Reinvent Scoring configuration parser') 10 | parser.add_argument( 11 | '--base_config', type=str, default=DEFAULT_BASE_CONFIG_PATH, 12 | help='Path to basic configuration for Reinvent Scoring environment.' 13 | ) 14 | 15 | 16 | def read_json_file(path): 17 | with open(path) as f: 18 | json_input = f.read().replace('\r', '').replace('\n', '') 19 | try: 20 | return json.loads(json_input) 21 | except (ValueError, KeyError, TypeError) as e: 22 | print(f"JSON format error in file ${path}: \n ${e}") 23 | 24 | 25 | args, _ = parser.parse_known_args() 26 | 27 | reinvent_scoring_config = read_json_file(args.base_config) 28 | 29 | for key, value in reinvent_scoring_config['ENVIRONMENTAL_VARIABLES'].items(): 30 | os.environ[key] = value 31 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/configs/example.config.json: -------------------------------------------------------------------------------- 1 | { 2 | "USER_NAME": "", 3 | "MAIN_TEST_PATH": "", 4 | "DEVELOPMENT_ENVIRONMENT": true, 5 | "COMPONENT_SPECIFIC": 6 | { 7 | "AZDOCK": { 8 | "AZDOCK_DOCKER_SCRIPT_PATH": "//docker.py", 9 | "AZDOCK_ENV_PATH": "//miniconda3/envs/AZdock/bin/python", 10 | "AZDOCK_DEBUG": true 11 | }, 12 | "DOCKSTREAM": { 13 | "DOCKSTREAM_DOCKER_SCRIPT_PATH": "//docker.py", 14 | "DOCKSTREAM_ENV_PATH": "//miniconda3/envs/DockStream/bin/python", 15 | "DOCKSTREAM_DEBUG": true 16 | }, 17 | "ICOLOS": { 18 | "ICOLOS_EXECUTOR_PATH": "//miniconda3/envs/icolosprod/bin/icolos", 19 | "ICOLOS_DEBUG": true 20 | } 21 | }, 22 | "ENVIRONMENTAL_VARIABLES": { 23 | "PIP_URL": "", 24 | "PIP_KEY": "", 25 | "PIP_GET_RESULTS": "", 26 | } 27 | } -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/configs/test_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "DEVELOPMENT_ENVIRONMENT": true, 3 | "MAIN_TEST_PATH": "tmp_test_folder", 4 | "COMPONENT_SPECIFIC": { 5 | "AZDOCK": { 6 | "AZDOCK_DOCKER_SCRIPT_PATH": "/opt/scp/services/reinvent/docking/azdock/docker.py", 7 | "AZDOCK_ENV_PATH": "/opt/scp/services/reinvent/miniconda3/envs/AZdock/bin/python", 8 | "AZDOCK_DEBUG": true 9 | }, 10 | "DOCKSTREAM": { 11 | "DOCKSTREAM_DOCKER_SCRIPT_PATH": "/opt/scp/services/reinvent/docking/azdock/docker.py", 12 | "DOCKSTREAM_ENV_PATH": "/opt/scp/services/reinvent/miniconda3/envs/AZdock/bin/python", 13 | "DOCKSTREAM_DEBUG": true 14 | }, 15 | "ICOLOS": { 16 | "ICOLOS_EXECUTOR_PATH": "//miniconda3/envs/icolosprod/bin/icolos", 17 | "ICOLOS_DEBUG": true 18 | } 19 | }, 20 | "ENVIRONMENTAL_VARIABLES": { 21 | "PIP_URL": "https://pip.dummy.net/bapi/{}/predict", 22 | "PIP_KEY": "something-secret", 23 | "PIP_GET_RESULTS": "https://pip.dummy.net" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.diversity_filters.reinvent_core import IdenticalMurckoScaffold, NoScaffoldFilter, \ 2 | ScaffoldSimilarity, IdenticalTopologicalScaffold 3 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/curriculum_learning/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.diversity_filter_memory import DiversityFilterMemory 2 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.diversity_filter_parameters import \ 3 | DiversityFilterParameters 4 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/curriculum_learning/base_diversity_filter.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from reinvent_chemistry.conversions import Conversions 6 | 7 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning import DiversityFilterParameters, \ 8 | DiversityFilterMemory 9 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.loggable_data_dto import UpdateLoggableDataDTO 10 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.memory_record_dto import MemoryRecordDTO 11 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.update_diversity_filter_dto import \ 12 | UpdateDiversityFilterDTO 13 | 14 | 15 | class BaseDiversityFilter(abc.ABC): 16 | 17 | @abc.abstractmethod 18 | def __init__(self, parameters: DiversityFilterParameters): 19 | self.parameters = parameters 20 | self._diversity_filter_memory = DiversityFilterMemory() 21 | self._chemistry = Conversions() 22 | 23 | @abc.abstractmethod 24 | def update_score(self, update_dto: UpdateDiversityFilterDTO) -> np.array: 25 | raise NotImplementedError("The method 'update_score' is not implemented!") 26 | 27 | def get_memory_as_dataframe(self) -> pd.DataFrame: 28 | return self._diversity_filter_memory.get_memory() 29 | 30 | def set_memory_from_dataframe(self, memory: pd.DataFrame): 31 | self._diversity_filter_memory.set_memory(memory) 32 | 33 | def number_of_smiles_in_memory(self) -> int: 34 | return self._diversity_filter_memory.number_of_smiles() 35 | 36 | def number_of_scaffold_in_memory(self) -> int: 37 | return self._diversity_filter_memory.number_of_scaffolds() 38 | 39 | def update_bucket_size(self, bucket_size: int): 40 | self.parameters.bucket_size = bucket_size 41 | 42 | def _calculate_scaffold(self, smile): 43 | raise NotImplementedError 44 | 45 | def _smiles_exists(self, smile): 46 | return self._diversity_filter_memory.smiles_exists(smile) 47 | 48 | def _add_to_memory(self, memory_dto: MemoryRecordDTO): 49 | self._diversity_filter_memory.update(memory_dto) 50 | 51 | def _penalize_score(self, scaffold, score): 52 | """Penalizes the score if the scaffold bucket is full""" 53 | if self._diversity_filter_memory.scaffold_instances_count(scaffold) > self.parameters.bucket_size: 54 | score = 0. 55 | return score 56 | 57 | def _compose_loggable_data(self, dto: UpdateLoggableDataDTO): 58 | prior_likelihood = f'{dto.prior_likelihood}|' if dto.prior_likelihood else '' 59 | likelihood = f'{dto.likelihood}|' if dto.likelihood else '' 60 | input = f'{dto.input}|' if dto.input else '' 61 | output = f'{dto.output}' if dto.output else '' 62 | loggable_data = f'{prior_likelihood}{likelihood}{input}{output}' 63 | return loggable_data -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/curriculum_learning/column_names_enum.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass(frozen=True) 5 | class ColumnNamesEnum: 6 | STEP: str = "Step" 7 | SCAFFOLD: str = "Scaffold" 8 | SMILES: str = "SMILES" 9 | METADATA: str = "Metadata" -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/curriculum_learning/diversity_filter.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning import DiversityFilterParameters 2 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.base_diversity_filter import BaseDiversityFilter 3 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.identical_murcko_scaffold import \ 4 | IdenticalMurckoScaffold 5 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.identical_topological_scaffold import \ 6 | IdenticalTopologicalScaffold 7 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.no_filter import NoFilter 8 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.no_filter_with_penalty import NoFilterWithPenalty 9 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.scaffold_similarity import ScaffoldSimilarity 10 | 11 | 12 | class DiversityFilter: 13 | 14 | def __new__(cls, parameters: DiversityFilterParameters) -> BaseDiversityFilter: 15 | all_filters = dict(IdenticalMurckoScaffold=IdenticalMurckoScaffold, 16 | NoFilterWithPenalty=NoFilterWithPenalty, 17 | IdenticalTopologicalScaffold=IdenticalTopologicalScaffold, 18 | ScaffoldSimilarity=ScaffoldSimilarity, 19 | NoFilter=NoFilter 20 | ) 21 | div_filter = all_filters.get(parameters.name, KeyError(f"Invalid filter name: `{parameters.name}'")) 22 | return div_filter(parameters) 23 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/curriculum_learning/diversity_filter_memory.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | 3 | import pandas as pd 4 | 5 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.column_names_enum import ColumnNamesEnum 6 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.memory_record_dto import MemoryRecordDTO 7 | from reinvent_scoring.scoring.score_summary import ComponentSummary 8 | from reinvent_scoring.scoring.enums.scoring_function_component_enum import ScoringFunctionComponentNameEnum 9 | 10 | 11 | class DiversityFilterMemory: 12 | 13 | def __init__(self): 14 | self._sf_component_name = ScoringFunctionComponentNameEnum() 15 | self._column_name = ColumnNamesEnum() 16 | df_dict = {self._column_name.STEP: [], self._column_name.SCAFFOLD: [], self._column_name.SMILES: [], 17 | self._column_name.METADATA: []} 18 | self._memory_dataframe = pd.DataFrame(df_dict) 19 | 20 | def update(self, dto: MemoryRecordDTO): 21 | component_scores = {c.parameters.name: float(c.total_score[dto.id]) for c in dto.components} 22 | component_scores = self._include_raw_score(dto.id, component_scores, dto.components) 23 | component_scores[self._sf_component_name.TOTAL_SCORE] = float(dto.score) 24 | if not self.smiles_exists(dto.smile): self._add_to_memory_dataframe(dto, component_scores) 25 | 26 | def _add_to_memory_dataframe(self, dto: MemoryRecordDTO, component_scores: Dict): 27 | data = [] 28 | headers = [] 29 | for name, score in component_scores.items(): 30 | headers.append(name) 31 | data.append(score) 32 | headers.append(self._column_name.STEP) 33 | data.append(dto.step) 34 | headers.append(self._column_name.SCAFFOLD) 35 | data.append(dto.scaffold) 36 | headers.append(self._column_name.SMILES) 37 | data.append(dto.smile) 38 | headers.append(self._column_name.METADATA) 39 | data.append(dto.loggable_data) 40 | new_data = pd.DataFrame([data], columns=headers) 41 | self._memory_dataframe = pd.concat([self._memory_dataframe, new_data], ignore_index=True, sort=False) 42 | 43 | def get_memory(self) -> pd.DataFrame: 44 | return self._memory_dataframe 45 | 46 | def set_memory(self, memory: pd.DataFrame): 47 | self._memory_dataframe = memory 48 | 49 | def smiles_exists(self, smiles: str): 50 | if len(self._memory_dataframe) == 0: 51 | return False 52 | return smiles in self._memory_dataframe[self._column_name.SMILES].values 53 | 54 | def scaffold_instances_count(self, scaffold: str): 55 | return (self._memory_dataframe[self._column_name.SCAFFOLD].values == scaffold).sum() 56 | 57 | def number_of_scaffolds(self): 58 | return len(set(self._memory_dataframe[self._column_name.SCAFFOLD].values)) 59 | 60 | def number_of_smiles(self): 61 | return len(set(self._memory_dataframe[self._column_name.SMILES].values)) 62 | 63 | def _include_raw_score(self, indx: int, component_scores: dict, components: List[ComponentSummary]): 64 | raw_scores = {f'raw_{c.parameters.name}': float(c.raw_score[indx]) for c in components if 65 | c.raw_score is not None} 66 | all_scores = {**component_scores, **raw_scores} 67 | return all_scores 68 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/curriculum_learning/diversity_filter_parameters.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class DiversityFilterParameters: 6 | name: str 7 | minscore: float = 0.4 8 | bucket_size: int = 25 9 | minsimilarity: float = 0.4 10 | penalty_multiplier: float = 0.5 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/curriculum_learning/identical_murcko_scaffold.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import numpy as np 4 | from rdkit.Chem.Scaffolds import MurckoScaffold 5 | 6 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning import DiversityFilterParameters 7 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.base_diversity_filter import BaseDiversityFilter 8 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.loggable_data_dto import UpdateLoggableDataDTO 9 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.memory_record_dto import MemoryRecordDTO 10 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.update_diversity_filter_dto import \ 11 | UpdateDiversityFilterDTO 12 | 13 | 14 | class IdenticalMurckoScaffold(BaseDiversityFilter): 15 | """Penalizes compounds based on exact Murcko Scaffolds previously generated.""" 16 | 17 | def __init__(self, parameters: DiversityFilterParameters): 18 | super().__init__(parameters) 19 | 20 | def update_score(self, dto: UpdateDiversityFilterDTO) -> np.array: 21 | score_summary = deepcopy(dto.score_summary) 22 | scores = score_summary.total_score 23 | smiles = score_summary.scored_smiles 24 | 25 | for i in score_summary.valid_idxs: 26 | smile = self._chemistry.convert_to_rdkit_smiles(smiles[i]) 27 | scaffold = self._calculate_scaffold(smile) 28 | scores[i] = 0 if self._smiles_exists(smile) else scores[i] 29 | 30 | if scores[i] >= self.parameters.minscore: 31 | loggable_data = self._compose_loggable_data(dto.loggable_data[i]) if dto.loggable_data else '' 32 | memory_dto = MemoryRecordDTO(i, dto.step, scores[i], smile, scaffold, loggable_data, 33 | score_summary.scaffold_log) 34 | self._add_to_memory(memory_dto) 35 | scores[i] = self._penalize_score(scaffold, scores[i]) 36 | 37 | return scores 38 | 39 | #TODO: move this to reinvent chemistry 40 | def _calculate_scaffold(self, smile): 41 | mol = self._chemistry.smile_to_mol(smile) 42 | if mol: 43 | try: 44 | scaffold = MurckoScaffold.GetScaffoldForMol(mol) 45 | scaffold_smiles = self._chemistry.mol_to_smiles(scaffold) 46 | except ValueError: 47 | scaffold_smiles = '' 48 | else: 49 | scaffold_smiles = '' 50 | return scaffold_smiles 51 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/curriculum_learning/identical_topological_scaffold.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import numpy as np 4 | from rdkit.Chem.Scaffolds import MurckoScaffold 5 | 6 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning import DiversityFilterParameters 7 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.base_diversity_filter import BaseDiversityFilter 8 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.memory_record_dto import MemoryRecordDTO 9 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.update_diversity_filter_dto import \ 10 | UpdateDiversityFilterDTO 11 | 12 | 13 | class IdenticalTopologicalScaffold(BaseDiversityFilter): 14 | """Penalizes compounds based on exact Topological Scaffolds previously generated.""" 15 | 16 | def __init__(self, parameters: DiversityFilterParameters): 17 | super().__init__(parameters) 18 | 19 | def update_score(self, dto: UpdateDiversityFilterDTO) -> np.array: 20 | score_summary = deepcopy(dto.score_summary) 21 | scores = score_summary.total_score 22 | smiles = score_summary.scored_smiles 23 | 24 | for i in score_summary.valid_idxs: 25 | smile = self._chemistry.convert_to_rdkit_smiles(smiles[i]) 26 | scaffold = self._calculate_scaffold(smile) 27 | scores[i] = 0 if self._smiles_exists(smile) else scores[i] 28 | 29 | if scores[i] >= self.parameters.minscore: 30 | loggable_data = self._compose_loggable_data(dto.loggable_data[i]) if dto.loggable_data else '' 31 | memory_dto = MemoryRecordDTO(i, dto.step, scores[i], smile, scaffold, loggable_data, 32 | score_summary.scaffold_log) 33 | self._add_to_memory(memory_dto) 34 | scores[i] = self._penalize_score(scaffold, scores[i]) 35 | 36 | return scores 37 | 38 | def _calculate_scaffold(self, smile): 39 | mol = self._chemistry.smile_to_mol(smile) 40 | if mol: 41 | try: 42 | scaffold = MurckoScaffold.MakeScaffoldGeneric(MurckoScaffold.GetScaffoldForMol(mol)) 43 | scaffold_smiles = self._chemistry.mol_to_smiles(scaffold) 44 | except ValueError: 45 | scaffold_smiles = '' 46 | else: 47 | scaffold_smiles = '' 48 | return scaffold_smiles 49 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/curriculum_learning/loggable_data_dto.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class UpdateLoggableDataDTO: 6 | """This class is used by the Diversity Filters to log out metadata.""" 7 | input: str 8 | output: str 9 | likelihood: float = None 10 | prior_likelihood: float = None 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/curriculum_learning/memory_record_dto.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List 3 | 4 | from reinvent_scoring.scoring.score_summary import ComponentSummary 5 | 6 | 7 | @dataclass 8 | class MemoryRecordDTO: 9 | id: int 10 | step: int 11 | score: float 12 | smile: str 13 | scaffold: str 14 | loggable_data: str 15 | components: List[ComponentSummary] 16 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/curriculum_learning/no_filter.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import numpy as np 4 | 5 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning import DiversityFilterParameters 6 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.base_diversity_filter import BaseDiversityFilter 7 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.memory_record_dto import MemoryRecordDTO 8 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.update_diversity_filter_dto import \ 9 | UpdateDiversityFilterDTO 10 | 11 | 12 | class NoFilter(BaseDiversityFilter): 13 | """Doesn't penalize compounds.""" 14 | 15 | def __init__(self, parameters: DiversityFilterParameters): 16 | super().__init__(parameters) 17 | 18 | def update_score(self, dto: UpdateDiversityFilterDTO) -> np.array: 19 | score_summary = deepcopy(dto.score_summary) 20 | scores = score_summary.total_score 21 | for i in score_summary.valid_idxs: 22 | if scores[i] >= self.parameters.minscore: 23 | #TODO: perhaps no validation is needed 24 | # smile = score_summary.scored_smiles[i] 25 | smile = self._chemistry.convert_to_rdkit_smiles(score_summary.scored_smiles[i]) 26 | loggable_data = self._compose_loggable_data(dto.loggable_data[i]) if dto.loggable_data else '' 27 | memory_dto = MemoryRecordDTO(i, dto.step, scores[i], smile, smile, loggable_data, 28 | score_summary.scaffold_log) 29 | self._add_to_memory(memory_dto) 30 | return scores 31 | 32 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/curriculum_learning/no_filter_with_penalty.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | import numpy as np 3 | 4 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning import DiversityFilterParameters 5 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.base_diversity_filter import BaseDiversityFilter 6 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.memory_record_dto import MemoryRecordDTO 7 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.update_diversity_filter_dto import \ 8 | UpdateDiversityFilterDTO 9 | 10 | 11 | class NoFilterWithPenalty(BaseDiversityFilter): 12 | """Penalize previously generated compounds.""" 13 | 14 | def __init__(self, parameters: DiversityFilterParameters): 15 | super().__init__(parameters) 16 | 17 | def update_score(self, dto: UpdateDiversityFilterDTO) -> np.array: 18 | score_summary = deepcopy(dto.score_summary) 19 | scores = score_summary.total_score 20 | smiles = score_summary.scored_smiles 21 | 22 | for i in score_summary.valid_idxs: 23 | smiles[i] = self._chemistry.convert_to_rdkit_smiles(smiles[i]) 24 | scores[i] = self.parameters.penalty_multiplier * scores[i] if self._smiles_exists(smiles[i]) else scores[i] 25 | 26 | for i in score_summary.valid_idxs: 27 | if scores[i] >= self.parameters.minscore: 28 | loggable_data = self._compose_loggable_data(dto.loggable_data[i]) 29 | memory_dto = MemoryRecordDTO(i, dto.step, scores[i], smiles[i], smiles[i], loggable_data, 30 | score_summary.scaffold_log) 31 | self._add_to_memory(memory_dto) 32 | return scores 33 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/curriculum_learning/scaffold_similarity.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import numpy as np 4 | from rdkit import Chem 5 | from rdkit import DataStructs 6 | from rdkit.Chem.AtomPairs import Pairs 7 | from rdkit.Chem.Scaffolds import MurckoScaffold 8 | 9 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning import DiversityFilterParameters 10 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.base_diversity_filter import BaseDiversityFilter 11 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.memory_record_dto import MemoryRecordDTO 12 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.update_diversity_filter_dto import \ 13 | UpdateDiversityFilterDTO 14 | 15 | 16 | class ScaffoldSimilarity(BaseDiversityFilter): 17 | """Penalizes compounds based on atom pair Tanimoto similarity to previously generated Murcko Scaffolds.""" 18 | 19 | def __init__(self, parameters: DiversityFilterParameters): 20 | super().__init__(parameters) 21 | self._scaffold_fingerprints = {} 22 | 23 | def update_score(self, dto: UpdateDiversityFilterDTO) -> np.array: 24 | score_summary = deepcopy(dto.score_summary) 25 | scores = score_summary.total_score 26 | smiles = score_summary.scored_smiles 27 | 28 | for i in score_summary.valid_idxs: 29 | smile = self._chemistry.convert_to_rdkit_smiles(smiles[i]) 30 | scaffold = self._calculate_scaffold(smile) 31 | 32 | # check, if another scaffold should be used as "bucket", because it is very similar as defined by the 33 | # "minsimilarity" threshold; if not, this call is a no-op and the smiles' normal Murcko scaffold will be used in case 34 | # -> usage of the "murcko scaffold filter" is actually a special case, where "minsimilarity" is 1.0 35 | scaffold = self._find_similar_scaffold(scaffold) 36 | scores[i] = 0 if self._smiles_exists(smile) else scores[i] 37 | 38 | if scores[i] >= self.parameters.minscore: 39 | loggable_data = self._compose_loggable_data(dto.loggable_data[i]) if dto.loggable_data else '' 40 | memory_dto = MemoryRecordDTO(i, dto.step, scores[i], smile, scaffold, loggable_data, 41 | score_summary.scaffold_log) 42 | self._add_to_memory(memory_dto) 43 | scores[i] = self._penalize_score(scaffold, scores[i]) 44 | return scores 45 | 46 | def _calculate_scaffold(self, smile): 47 | mol = self._chemistry.smile_to_mol(smile) 48 | if mol: 49 | try: 50 | scaffold = MurckoScaffold.GetScaffoldForMol(mol) 51 | scaffold_smiles = self._chemistry.mol_to_smiles(scaffold) 52 | except ValueError: 53 | scaffold_smiles = '' 54 | else: 55 | scaffold_smiles = '' 56 | return scaffold_smiles 57 | 58 | def _find_similar_scaffold(self, scaffold): 59 | """ 60 | this function tries to find a "similar" scaffold (according to the threshold set by parameter "minsimilarity") and if at least one 61 | scaffold satisfies this criteria, it will replace the smiles' scaffold with the most similar one 62 | -> in effect, this reduces the number of scaffold buckets in the memory (the lower parameter "minsimilarity", the more 63 | pronounced the reduction) 64 | generate a "mol" scaffold from the smile and calculate an atom pair fingerprint 65 | 66 | :param scaffold: scaffold represented by a smiles string 67 | :return: closest scaffold given a certain similarity threshold 68 | """ 69 | if scaffold is not '': 70 | fp = Pairs.GetAtomPairFingerprint(Chem.MolFromSmiles(scaffold)) 71 | 72 | # make a list of the stored fingerprints for similarity calculations 73 | fps = list(self._scaffold_fingerprints.values()) 74 | 75 | # check, if a similar scaffold entry already exists and if so, use this one instead 76 | if len(fps) > 0: 77 | similarity_scores = DataStructs.BulkDiceSimilarity(fp, fps) 78 | closest = np.argmax(similarity_scores) 79 | if similarity_scores[closest] >= self.parameters.minsimilarity: 80 | scaffold = list(self._scaffold_fingerprints.keys())[closest] 81 | fp = self._scaffold_fingerprints[scaffold] 82 | 83 | self._scaffold_fingerprints[scaffold] = fp 84 | return scaffold 85 | 86 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/curriculum_learning/update_diversity_filter_dto.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List 3 | 4 | from reinvent_scoring.scoring.diversity_filters.curriculum_learning.loggable_data_dto import UpdateLoggableDataDTO 5 | from reinvent_scoring.scoring.score_summary import FinalSummary 6 | 7 | 8 | @dataclass 9 | class UpdateDiversityFilterDTO: 10 | score_summary: FinalSummary 11 | loggable_data: List[UpdateLoggableDataDTO] 12 | step: int = 0 13 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/lib_invent/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.diversity_filters.lib_invent.identical_murcko_scaffold import IdenticalMurckoScaffold 2 | from reinvent_scoring.scoring.diversity_filters.lib_invent.no_filter import NoFilter 3 | from reinvent_scoring.scoring.diversity_filters.lib_invent.no_filter_with_penalty import NoFilterWithPenalty 4 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/lib_invent/base_diversity_filter.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import List 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from reinvent_scoring.scoring.diversity_filters.lib_invent.diversity_filter_memory import DiversityFilterMemory 8 | from reinvent_scoring.scoring.diversity_filters.lib_invent.diversity_filter_parameters import DiversityFilterParameters 9 | from reinvent_chemistry.conversions import Conversions 10 | 11 | 12 | class BaseDiversityFilter(abc.ABC): 13 | 14 | @abc.abstractmethod 15 | def __init__(self, parameters: DiversityFilterParameters): 16 | self.parameters = parameters 17 | self._chemistry = Conversions() 18 | self._diversity_filter_memory = DiversityFilterMemory() 19 | 20 | @abc.abstractmethod 21 | def update_score(self, score_summary, sampled_sequences: List, step=0) -> np.array: 22 | raise NotImplementedError("The method 'evaluate' is not implemented!") 23 | 24 | def get_memory_as_dataframe(self) -> pd.DataFrame: 25 | return self._diversity_filter_memory.get_memory() 26 | 27 | def set_memory_from_dataframe(self, memory: pd.DataFrame): 28 | self._diversity_filter_memory.set_memory(memory) 29 | 30 | def number_of_smiles_in_memory(self) -> int: 31 | return self._diversity_filter_memory.number_of_smiles() 32 | 33 | def number_of_scaffold_in_memory(self) -> int: 34 | return self._diversity_filter_memory.number_of_scaffolds() 35 | 36 | def _calculate_scaffold(self, smile): 37 | raise NotImplementedError 38 | 39 | def _smiles_exists(self, smile): 40 | return self._diversity_filter_memory.smiles_exists(smile) 41 | 42 | def _add_to_memory(self, indx: int, score, smile, scaffold, components: List, step): 43 | self._diversity_filter_memory.update(indx, score, smile, scaffold, components, step) 44 | 45 | def _penalize_score(self, scaffold, score): 46 | """Penalizes the score if the scaffold bucket is full""" 47 | if self._diversity_filter_memory.scaffold_instances_count(scaffold) > self.parameters.bucket_size: 48 | score = 0. 49 | return score 50 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/lib_invent/diversity_filter.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.diversity_filters.lib_invent import NoFilter, NoFilterWithPenalty, IdenticalMurckoScaffold 2 | from reinvent_scoring.scoring.diversity_filters.lib_invent.base_diversity_filter import BaseDiversityFilter 3 | from reinvent_scoring.scoring.diversity_filters.lib_invent.diversity_filter_parameters import DiversityFilterParameters 4 | 5 | 6 | class DiversityFilter: 7 | 8 | def __new__(cls, parameters: DiversityFilterParameters) -> BaseDiversityFilter: 9 | all_filters = dict(NoFilter=NoFilter, 10 | IdenticalMurckoScaffold=IdenticalMurckoScaffold, 11 | NoFilterWithPenalty=NoFilterWithPenalty) 12 | div_filter = all_filters.get(parameters.name, KeyError(f"Invalid filter name: `{parameters.name}'")) 13 | return div_filter(parameters) 14 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/lib_invent/diversity_filter_memory.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import pandas as pd 4 | from reinvent_scoring.scoring.score_summary import ComponentSummary 5 | from reinvent_scoring.scoring.enums.scoring_function_component_enum import ScoringFunctionComponentNameEnum 6 | 7 | 8 | class DiversityFilterMemory: 9 | 10 | def __init__(self): 11 | self._sf_component_name = ScoringFunctionComponentNameEnum() 12 | df_dict = {"Step": [], "Scaffold": [], "SMILES": []} 13 | self._memory_dataframe = pd.DataFrame(df_dict) 14 | 15 | def update(self, indx: int, score: float, smile: str, scaffold: str, components: List, step: int): 16 | component_scores = {c.parameters.name: float(c.total_score[indx]) for c in components} 17 | component_scores = self._include_raw_score(indx, component_scores, components) 18 | component_scores[self._sf_component_name.TOTAL_SCORE] = float(score) 19 | if not self.smiles_exists(smile): 20 | self._add_to_memory_dataframe(step, smile, scaffold, component_scores) 21 | 22 | def _add_to_memory_dataframe(self, step: int, smile: str, scaffold: str, component_scores: {} = None): 23 | data = [] 24 | headers = [] 25 | for name, score in component_scores.items(): 26 | headers.append(name) 27 | data.append(score) 28 | headers.append("Step") 29 | data.append(step) 30 | headers.append("Scaffold") 31 | data.append(scaffold) 32 | headers.append("SMILES") 33 | data.append(smile) 34 | new_data = pd.DataFrame([data], columns=headers) 35 | self._memory_dataframe = pd.concat([self._memory_dataframe, new_data], ignore_index=True, sort=False) 36 | 37 | def get_memory(self) -> pd.DataFrame: 38 | return self._memory_dataframe 39 | 40 | def set_memory(self, memory: pd.DataFrame): 41 | self._memory_dataframe = memory 42 | 43 | def smiles_exists(self, smiles: str): 44 | if len(self._memory_dataframe) == 0: 45 | return False 46 | return smiles in self._memory_dataframe['SMILES'].values 47 | 48 | def scaffold_instances_count(self, scaffold: str): 49 | return (self._memory_dataframe["Scaffold"].values == scaffold).sum() 50 | 51 | def number_of_scaffolds(self): 52 | return len(set(self._memory_dataframe["Scaffold"].values)) 53 | 54 | def number_of_smiles(self): 55 | return len(set(self._memory_dataframe["SMILES"].values)) 56 | 57 | def _include_raw_score(self, indx: int, component_scores: dict, components: List[ComponentSummary]): 58 | raw_scores = {f'raw_{c.parameters.name}': float(c.raw_score[indx]) for c in components if 59 | c.raw_score is not None} 60 | all_scores = {**component_scores, **raw_scores} 61 | return all_scores 62 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/lib_invent/diversity_filter_parameters.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class DiversityFilterParameters: 6 | name: str 7 | minscore: float = 0.4 8 | bucket_size: int = 25 9 | minsimilarity: float = 0.4 10 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/lib_invent/identical_murcko_scaffold.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from typing import List 3 | 4 | import numpy as np 5 | from rdkit import Chem 6 | from rdkit.Chem.Scaffolds import MurckoScaffold 7 | # from reinvent_scoring.scoring.score_summary import FinalSummary 8 | 9 | from reinvent_scoring.scoring.diversity_filters.lib_invent.base_diversity_filter import BaseDiversityFilter 10 | from reinvent_scoring.scoring.diversity_filters.lib_invent.diversity_filter_parameters import DiversityFilterParameters 11 | 12 | 13 | class IdenticalMurckoScaffold(BaseDiversityFilter): 14 | """Penalizes compounds based on exact Murcko Scaffolds previously generated.""" 15 | 16 | def __init__(self, parameters: DiversityFilterParameters): 17 | super().__init__(parameters) 18 | 19 | def update_score(self, score_summary, sampled_sequences: List, step=0) -> np.array: 20 | score_summary = deepcopy(score_summary) 21 | scores = score_summary.total_score 22 | smiles = score_summary.scored_smiles 23 | 24 | for i in score_summary.valid_idxs: 25 | smile = self._chemistry.convert_to_rdkit_smiles(smiles[i]) 26 | scaffold = self._calculate_scaffold(smile) 27 | scores[i] = 0 if self._smiles_exists(smile) else scores[i] 28 | 29 | if scores[i] >= self.parameters.minscore: 30 | self._add_to_memory(i, scores[i], smile, scaffold, score_summary.scaffold_log, step) 31 | scores[i] = self._penalize_score(scaffold, scores[i]) 32 | 33 | return scores 34 | 35 | #TODO: move this to reinvent chemistry 36 | def _calculate_scaffold(self, smile): 37 | mol = Chem.MolFromSmiles(smile) 38 | if mol: 39 | try: 40 | scaffold = MurckoScaffold.GetScaffoldForMol(mol) 41 | scaffold_smiles = Chem.MolToSmiles(scaffold, isomericSmiles=False) 42 | except ValueError: 43 | scaffold_smiles = '' 44 | else: 45 | scaffold_smiles = '' 46 | return scaffold_smiles 47 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/lib_invent/no_filter.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from typing import List 3 | 4 | import numpy as np 5 | 6 | from reinvent_scoring.scoring.diversity_filters.lib_invent.base_diversity_filter import BaseDiversityFilter 7 | from reinvent_scoring.scoring.diversity_filters.lib_invent.diversity_filter_parameters import DiversityFilterParameters 8 | # The import below is a deal breaker 9 | # from reinvent_scoring.scoring.score_summary import FinalSummary 10 | 11 | 12 | 13 | class NoFilter(BaseDiversityFilter): 14 | """Doesn't penalize compounds.""" 15 | 16 | def __init__(self, parameters: DiversityFilterParameters): 17 | super().__init__(parameters) 18 | 19 | def update_score(self, score_summary, sampled_sequences: List, step=0) -> np.array: 20 | score_summary = deepcopy(score_summary) 21 | scores = score_summary.total_score 22 | for i in score_summary.valid_idxs: 23 | if scores[i] >= self.parameters.minscore: 24 | smile = score_summary.scored_smiles[i] 25 | decorations = f'{sampled_sequences[i].input}|{sampled_sequences[i].output}' 26 | self._add_to_memory(i, scores[i], smile, decorations, score_summary.scaffold_log, step) 27 | return scores 28 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/lib_invent/no_filter_with_penalty.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from typing import List 3 | 4 | import numpy as np 5 | # The import below is a deal breaker 6 | # from reinvent_scoring.scoring.score_summary import FinalSummary 7 | 8 | from reinvent_scoring.scoring.diversity_filters.lib_invent.base_diversity_filter import BaseDiversityFilter 9 | from reinvent_scoring.scoring.diversity_filters.lib_invent.diversity_filter_parameters import DiversityFilterParameters 10 | 11 | 12 | class NoFilterWithPenalty(BaseDiversityFilter): 13 | """Penalize repeatedly generated compounds.""" 14 | 15 | def __init__(self, parameters: DiversityFilterParameters): 16 | super().__init__(parameters) 17 | 18 | def update_score(self, score_summary, sampled_sequences: List, step=0) -> np.array: 19 | score_summary = deepcopy(score_summary) 20 | scores = score_summary.total_score 21 | smiles = score_summary.scored_smiles 22 | 23 | for i in score_summary.valid_idxs: 24 | smiles[i] = self._chemistry.convert_to_rdkit_smiles(smiles[i]) 25 | scores[i] = 0.5*scores[i] if self._smiles_exists(smiles[i]) else scores[i] 26 | 27 | for i in score_summary.valid_idxs: 28 | if scores[i] >= self.parameters.minscore: 29 | decorations = f'{sampled_sequences[i].input}|{sampled_sequences[i].output}' 30 | self._add_to_memory(i, scores[i], smiles[i], decorations, score_summary.scaffold_log, step) 31 | return scores 32 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/reinvent_core/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.diversity_filters.reinvent_core.identical_murcko_scaffold import IdenticalMurckoScaffold 2 | from reinvent_scoring.scoring.diversity_filters.reinvent_core.no_scaffold_filter import NoScaffoldFilter 3 | from reinvent_scoring.scoring.diversity_filters.reinvent_core.scaffold_similarity import ScaffoldSimilarity 4 | from reinvent_scoring.scoring.diversity_filters.reinvent_core.identical_topological_scaffold import IdenticalTopologicalScaffold 5 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/reinvent_core/base_diversity_filter.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import List 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from reinvent_scoring.scoring.diversity_filters.reinvent_core.diversity_filter_memory import DiversityFilterMemory 8 | from reinvent_scoring.scoring.diversity_filters.reinvent_core.diversity_filter_parameters import DiversityFilterParameters 9 | from reinvent_scoring.scoring.score_summary import FinalSummary, ComponentSummary 10 | from reinvent_chemistry.conversions import Conversions 11 | 12 | 13 | class BaseDiversityFilter(abc.ABC): 14 | 15 | @abc.abstractmethod 16 | def __init__(self, parameters: DiversityFilterParameters): 17 | self.parameters = parameters 18 | self._diversity_filter_memory = DiversityFilterMemory() 19 | self._chemistry = Conversions() 20 | 21 | @abc.abstractmethod 22 | def update_score(self, score_summary: FinalSummary, step=0) -> np.array: 23 | raise NotImplementedError("The method 'update_score' is not implemented!") 24 | 25 | def get_memory_as_dataframe(self) -> pd.DataFrame: 26 | return self._diversity_filter_memory.get_memory() 27 | 28 | def set_memory_from_dataframe(self, memory: pd.DataFrame): 29 | self._diversity_filter_memory.set_memory(memory) 30 | 31 | def number_of_smiles_in_memory(self) -> int: 32 | return self._diversity_filter_memory.number_of_smiles() 33 | 34 | def number_of_scaffold_in_memory(self) -> int: 35 | return self._diversity_filter_memory.number_of_scaffolds() 36 | 37 | def update_bucket_size(self, bucket_size: int): 38 | self.parameters.bucket_size = bucket_size 39 | 40 | def _calculate_scaffold(self, smile): 41 | raise NotImplementedError 42 | 43 | def _smiles_exists(self, smile): 44 | return self._diversity_filter_memory.smiles_exists(smile) 45 | 46 | def _add_to_memory(self, indx: int, score, smile, scaffold, components: List[ComponentSummary], step): 47 | self._diversity_filter_memory.update(indx, score, smile, scaffold, components, step) 48 | 49 | def _penalize_score(self, scaffold, score): 50 | """Penalizes the score if the scaffold bucket is full""" 51 | if self._diversity_filter_memory.scaffold_instances_count(scaffold) > self.parameters.bucket_size: 52 | score = 0. 53 | return score 54 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/reinvent_core/diversity_filter.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.diversity_filters.reinvent_core import IdenticalMurckoScaffold, \ 2 | IdenticalTopologicalScaffold, ScaffoldSimilarity, NoScaffoldFilter 3 | from reinvent_scoring.scoring.diversity_filters.reinvent_core.base_diversity_filter import BaseDiversityFilter 4 | from reinvent_scoring.scoring.diversity_filters.reinvent_core.diversity_filter_parameters import \ 5 | DiversityFilterParameters 6 | 7 | 8 | class DiversityFilter: 9 | 10 | def __new__(cls, parameters: DiversityFilterParameters) -> BaseDiversityFilter: 11 | all_filters = dict(IdenticalMurckoScaffold=IdenticalMurckoScaffold, 12 | IdenticalTopologicalScaffold=IdenticalTopologicalScaffold, 13 | ScaffoldSimilarity=ScaffoldSimilarity, 14 | NoFilter=NoScaffoldFilter) 15 | div_filter = all_filters.get(parameters.name, KeyError(f"Invalid filter name: `{parameters.name}'")) 16 | return div_filter(parameters) 17 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/reinvent_core/diversity_filter_memory.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | 3 | import pandas as pd 4 | 5 | from reinvent_scoring.scoring.score_summary import ComponentSummary 6 | from reinvent_scoring.scoring.enums.scoring_function_component_enum import ScoringFunctionComponentNameEnum 7 | 8 | 9 | class DiversityFilterMemory: 10 | 11 | def __init__(self): 12 | self._sf_component_name = ScoringFunctionComponentNameEnum() 13 | df_dict = {"Step": [], "Scaffold": [], "SMILES": []} 14 | self._memory_dataframe = pd.DataFrame(df_dict) 15 | 16 | def update(self, indx: int, score: float, smile: str, scaffold: str, components: List[ComponentSummary], step: int): 17 | component_scores = {c.parameters.name: float(c.total_score[indx]) for c in components} 18 | component_scores = self._include_raw_score(indx, component_scores, components) 19 | component_scores[self._sf_component_name.TOTAL_SCORE] = float(score) 20 | if not self.smiles_exists(smile): 21 | self._add_to_memory_dataframe(step, smile, scaffold, component_scores) 22 | 23 | def _add_to_memory_dataframe(self, step: int, smile: str, scaffold: str, component_scores: Dict): 24 | data = [] 25 | headers = [] 26 | for name, score in component_scores.items(): 27 | headers.append(name) 28 | data.append(score) 29 | headers.append("Step") 30 | data.append(step) 31 | headers.append("Scaffold") 32 | data.append(scaffold) 33 | headers.append("SMILES") 34 | data.append(smile) 35 | new_data = pd.DataFrame([data], columns=headers) 36 | self._memory_dataframe = pd.concat([self._memory_dataframe, new_data], ignore_index=True, sort=False) 37 | 38 | def get_memory(self) -> pd.DataFrame: 39 | return self._memory_dataframe 40 | 41 | def set_memory(self, memory: pd.DataFrame): 42 | self._memory_dataframe = memory 43 | 44 | def smiles_exists(self, smiles: str): 45 | if len(self._memory_dataframe) == 0: 46 | return False 47 | return smiles in self._memory_dataframe['SMILES'].values 48 | 49 | def scaffold_instances_count(self, scaffold: str): 50 | return (self._memory_dataframe["Scaffold"].values == scaffold).sum() 51 | 52 | def number_of_scaffolds(self): 53 | return len(set(self._memory_dataframe["Scaffold"].values)) 54 | 55 | def number_of_smiles(self): 56 | return len(set(self._memory_dataframe["SMILES"].values)) 57 | 58 | def _include_raw_score(self, indx: int, component_scores: dict, components: List[ComponentSummary]): 59 | raw_scores = {f'raw_{c.parameters.name}': float(c.raw_score[indx]) for c in components if 60 | c.raw_score is not None} 61 | all_scores = {**component_scores, **raw_scores} 62 | return all_scores 63 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/reinvent_core/diversity_filter_parameters.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class DiversityFilterParameters: 6 | name: str 7 | minscore: float = 0.4 8 | bucket_size: int = 25 9 | minsimilarity: float = 0.4 10 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/reinvent_core/identical_murcko_scaffold.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import numpy as np 4 | from rdkit import Chem 5 | from rdkit.Chem.Scaffolds import MurckoScaffold 6 | from reinvent_scoring.scoring.score_summary import FinalSummary 7 | 8 | from reinvent_scoring.scoring.diversity_filters.reinvent_core.base_diversity_filter import BaseDiversityFilter 9 | from reinvent_scoring.scoring.diversity_filters.reinvent_core.diversity_filter_parameters import DiversityFilterParameters 10 | 11 | 12 | class IdenticalMurckoScaffold(BaseDiversityFilter): 13 | """Penalizes compounds based on exact Murcko Scaffolds previously generated.""" 14 | 15 | def __init__(self, parameters: DiversityFilterParameters): 16 | super().__init__(parameters) 17 | 18 | def update_score(self, score_summary: FinalSummary, step=0) -> np.array: 19 | score_summary = deepcopy(score_summary) 20 | scores = score_summary.total_score 21 | smiles = score_summary.scored_smiles 22 | 23 | for i in score_summary.valid_idxs: 24 | smile = self._chemistry.convert_to_rdkit_smiles(smiles[i]) 25 | scaffold = self._calculate_scaffold(smile) 26 | scores[i] = 0 if self._smiles_exists(smile) else scores[i] 27 | 28 | if scores[i] >= self.parameters.minscore: 29 | self._add_to_memory(i, scores[i], smile, scaffold, score_summary.scaffold_log, step) 30 | scores[i] = self._penalize_score(scaffold, scores[i]) 31 | 32 | return scores 33 | 34 | def _calculate_scaffold(self, smile): 35 | mol = Chem.MolFromSmiles(smile) 36 | if mol: 37 | try: 38 | scaffold = MurckoScaffold.GetScaffoldForMol(mol) 39 | scaffold_smiles = Chem.MolToSmiles(scaffold, isomericSmiles=False) 40 | except ValueError: 41 | scaffold_smiles = '' 42 | else: 43 | scaffold_smiles = '' 44 | return scaffold_smiles 45 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/reinvent_core/identical_topological_scaffold.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import numpy as np 4 | from rdkit import Chem 5 | from rdkit.Chem.Scaffolds import MurckoScaffold 6 | 7 | from reinvent_scoring.scoring.diversity_filters.reinvent_core.base_diversity_filter import BaseDiversityFilter 8 | from reinvent_scoring.scoring.diversity_filters.reinvent_core.diversity_filter_parameters import DiversityFilterParameters 9 | from reinvent_scoring.scoring.score_summary import FinalSummary 10 | 11 | 12 | class IdenticalTopologicalScaffold(BaseDiversityFilter): 13 | """Penalizes compounds based on exact Topological Scaffolds previously generated.""" 14 | 15 | def __init__(self, parameters: DiversityFilterParameters): 16 | super().__init__(parameters) 17 | 18 | def update_score(self, score_summary: FinalSummary, step=0) -> np.array: 19 | score_summary = deepcopy(score_summary) 20 | scores = score_summary.total_score 21 | smiles = score_summary.scored_smiles 22 | 23 | for i in score_summary.valid_idxs: 24 | smile = self._chemistry.convert_to_rdkit_smiles(smiles[i]) 25 | scaffold = self._calculate_scaffold(smile) 26 | scores[i] = 0 if self._smiles_exists(smile) else scores[i] 27 | if scores[i] >= self.parameters.minscore: 28 | self._add_to_memory(i, scores[i], smile, scaffold, score_summary.scaffold_log, step) 29 | scores[i] = self._penalize_score(scaffold, scores[i]) 30 | return scores 31 | 32 | def _calculate_scaffold(self, smile): 33 | mol = Chem.MolFromSmiles(smile) 34 | if mol: 35 | try: 36 | scaffold = MurckoScaffold.MakeScaffoldGeneric(MurckoScaffold.GetScaffoldForMol(mol)) 37 | scaffold_smiles = Chem.MolToSmiles(scaffold, isomericSmiles=False) 38 | except ValueError: 39 | scaffold_smiles = '' 40 | else: 41 | scaffold_smiles = '' 42 | return scaffold_smiles 43 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/reinvent_core/no_scaffold_filter.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import numpy as np 4 | 5 | from reinvent_scoring.scoring.diversity_filters.reinvent_core.base_diversity_filter import BaseDiversityFilter 6 | from reinvent_scoring.scoring.diversity_filters.reinvent_core.diversity_filter_parameters import DiversityFilterParameters 7 | from reinvent_scoring.scoring.score_summary import FinalSummary 8 | 9 | 10 | class NoScaffoldFilter(BaseDiversityFilter): 11 | """Don't penalize compounds.""" 12 | 13 | def __init__(self, parameters: DiversityFilterParameters): 14 | super().__init__(parameters) 15 | 16 | def update_score(self, score_summary: FinalSummary, step=0) -> np.array: 17 | score_summary = deepcopy(score_summary) 18 | scores = score_summary.total_score 19 | smiles = score_summary.scored_smiles 20 | for i in score_summary.valid_idxs: 21 | if scores[i] >= self.parameters.minscore: 22 | smile = self._chemistry.convert_to_rdkit_smiles(smiles[i]) 23 | self._add_to_memory(i, scores[i], smile, smile, score_summary.scaffold_log, step) 24 | return scores 25 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/diversity_filters/reinvent_core/scaffold_similarity.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import numpy as np 4 | from rdkit import Chem 5 | from rdkit import DataStructs 6 | from rdkit.Chem.AtomPairs import Pairs 7 | from rdkit.Chem.Scaffolds import MurckoScaffold 8 | 9 | from reinvent_scoring.scoring.diversity_filters.reinvent_core.base_diversity_filter import BaseDiversityFilter 10 | from reinvent_scoring.scoring.diversity_filters.reinvent_core.diversity_filter_parameters import DiversityFilterParameters 11 | from reinvent_scoring.scoring.score_summary import FinalSummary 12 | 13 | 14 | class ScaffoldSimilarity(BaseDiversityFilter): 15 | """Penalizes compounds based on atom pair Tanimoto similarity to previously generated Murcko Scaffolds.""" 16 | 17 | def __init__(self, parameters: DiversityFilterParameters): 18 | super().__init__(parameters) 19 | self._scaffold_fingerprints = {} 20 | 21 | def update_score(self, score_summary: FinalSummary, step=0) -> np.array: 22 | score_summary = deepcopy(score_summary) 23 | scores = score_summary.total_score 24 | smiles = score_summary.scored_smiles 25 | 26 | for i in score_summary.valid_idxs: 27 | smile = self._chemistry.convert_to_rdkit_smiles(smiles[i]) 28 | scaffold = self._calculate_scaffold(smile) 29 | 30 | # check, if another scaffold should be used as "bucket", because it is very similar as defined by the 31 | # "minsimilarity" threshold; if not, this call is a no-op and the smiles' normal Murcko scaffold will be used in case 32 | # -> usage of the "murcko scaffold filter" is actually a special case, where "minsimilarity" is 1.0 33 | scaffold = self._find_similar_scaffold(scaffold) 34 | 35 | scores[i] = 0 if self._smiles_exists(smile) else scores[i] 36 | if scores[i] >= self.parameters.minscore: 37 | self._add_to_memory(i, scores[i], smile, scaffold, score_summary.scaffold_log, step) 38 | scores[i] = self._penalize_score(scaffold, scores[i]) 39 | return scores 40 | 41 | def _calculate_scaffold(self, smile): 42 | mol = Chem.MolFromSmiles(smile) 43 | if mol: 44 | try: 45 | scaffold = MurckoScaffold.GetScaffoldForMol(mol) 46 | return Chem.MolToSmiles(scaffold, isomericSmiles=False) 47 | except ValueError: 48 | scaffold_smiles = '' 49 | else: 50 | scaffold_smiles = '' 51 | return scaffold_smiles 52 | 53 | def _find_similar_scaffold(self, scaffold): 54 | """ 55 | this function tries to find a "similar" scaffold (according to the threshold set by parameter "minsimilarity") and if at least one 56 | scaffold satisfies this criteria, it will replace the smiles' scaffold with the most similar one 57 | -> in effect, this reduces the number of scaffold buckets in the memory (the lower parameter "minsimilarity", the more 58 | pronounced the reduction) 59 | generate a "mol" scaffold from the smile and calculate an atom pair fingerprint 60 | 61 | :param scaffold: scaffold represented by a smiles string 62 | :return: closest scaffold given a certain similarity threshold 63 | """ 64 | if scaffold is not '': 65 | fp = Pairs.GetAtomPairFingerprint(Chem.MolFromSmiles(scaffold)) 66 | 67 | # make a list of the stored fingerprints for similarity calculations 68 | fps = list(self._scaffold_fingerprints.values()) 69 | 70 | # check, if a similar scaffold entry already exists and if so, use this one instead 71 | if len(fps) > 0: 72 | similarity_scores = DataStructs.BulkDiceSimilarity(fp, fps) 73 | closest = np.argmax(similarity_scores) 74 | if similarity_scores[closest] >= self.parameters.minsimilarity: 75 | scaffold = list(self._scaffold_fingerprints.keys())[closest] 76 | fp = self._scaffold_fingerprints[scaffold] 77 | 78 | self._scaffold_fingerprints[scaffold] = fp 79 | return scaffold 80 | 81 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/enums/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.enums.component_specific_parameters_enum import ComponentSpecificParametersEnum 2 | from reinvent_scoring.scoring.enums.container_type_enum import ContainerType 3 | from reinvent_scoring.scoring.enums.descriptor_types_enum import DescriptorTypesEnum 4 | from reinvent_scoring.scoring.enums.environmental_variables_enum import EnvironmentalVariablesEnum 5 | from reinvent_scoring.scoring.enums.logging_mode_enum import LoggingModeEnum 6 | from reinvent_scoring.scoring.enums.rocs_input_file_types_enum import ROCSInputFileTypesEnum 7 | from reinvent_scoring.scoring.enums.rocs_similarity_measures_enum import ROCSSimilarityMeasuresEnum 8 | from reinvent_scoring.scoring.enums.rocs_specific_parameters_enum import ROCSSpecificParametersEnum 9 | from reinvent_scoring.scoring.enums.scoring_function_component_enum import ScoringFunctionComponentNameEnum 10 | from reinvent_scoring.scoring.enums.scoring_function_enum import ScoringFunctionNameEnum 11 | from reinvent_scoring.scoring.enums.transformation_type_enum import TransformationTypeEnum 12 | from reinvent_scoring.scoring.enums.transformation_parameters_enum import TransformationParametersEnum 13 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/enums/component_specific_parameters_enum.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass(frozen=True) 5 | class ComponentSpecificParametersEnum: 6 | SCIKIT = "scikit" 7 | CLAB_INPUT_FILE = "clab_input_file" 8 | DESCRIPTOR_TYPE = "descriptor_type" 9 | TRANSFORMATION = "transformation" 10 | 11 | # structural components 12 | # --------- 13 | # AZDOCK 14 | AZDOCK_CONFPATH = "configuration_path" 15 | AZDOCK_DOCKERSCRIPTPATH = "docker_script_path" 16 | AZDOCK_ENVPATH = "environment_path" 17 | AZDOCK_DEBUG = "debug" 18 | 19 | # DockStream 20 | DOCKSTREAM_CONFPATH = "configuration_path" 21 | DOCKSTREAM_DOCKERSCRIPTPATH = "docker_script_path" 22 | DOCKSTREAM_ENVPATH = "environment_path" 23 | DOCKSTREAM_DEBUG = "debug" 24 | 25 | # ICOLOS 26 | ICOLOS_CONFPATH = "configuration_path" 27 | ICOLOS_EXECUTOR_PATH = "executor_path" 28 | ICOLOS_VALUES_KEY = "values_key" 29 | ICOLOS_DEBUG = "debug" 30 | ####################### 31 | 32 | RAT_PK_PROPERTY = "rat_pk_property" 33 | CLAB_TOP_20_VALUE = "clab_top_20_value" 34 | ION_CLASS = "Ion class" 35 | CONTAINER_TYPE = "container_type" 36 | 37 | SMILES = "smiles" 38 | MODEL_PATH = "model_path" 39 | 40 | ####################### 41 | ARTIFACT = "artifact" 42 | 43 | AIZYNTH_CONFIG_FILE_PATH = "aizynth_config_file_path" 44 | 45 | VALUE_MAPPING = "value_mapping" 46 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/enums/container_type_enum.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass(frozen=True) 5 | class ContainerType: 6 | SCIKIT_CONTAINER = "scikit_container" 7 | OPTUNA_CONTAINER = "optuna_container" 8 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/enums/descriptor_types_enum.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass(frozen=True) 5 | class DescriptorTypesEnum(): 6 | ECFP = "ecfp" 7 | ECFP_COUNTS = "ecfp_counts" 8 | MACCS_KEYS = "maccs_keys" 9 | AVALON = "avalon" 10 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/enums/diversity_filter_enum.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass(frozen=True) 5 | class DiversityFilterEnum: 6 | IDENTICAL_TOPOLOGICAL_SCAFFOLD = "IdenticalTopologicalScaffold" 7 | IDENTICAL_MURCKO_SCAFFOLD = "IdenticalMurckoScaffold" 8 | SCAFFOLD_SIMILARITY = "ScaffoldSimilarity" 9 | NO_FILTER = "NoFilter" 10 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/enums/environmental_variables_enum.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass(frozen=True) 5 | class EnvironmentalVariablesEnum: 6 | PIP_URL = "PIP_URL" 7 | PIP_KEY = "PIP_KEY" 8 | PIP_GET_RESULTS = "PIP_GET_RESULTS" 9 | ENVIRONMENTAL_VARIABLES = "ENVIRONMENTAL_VARIABLES" 10 | AIZYNTH_PREDICTION_URL = "AIZYNTH_PREDICTION_URL" 11 | AIZYNTH_BUILDING_BLOCKS_URL = "AIZYNTH_BUILDING_BLOCKS_URL" 12 | AIZYNTH_TOKEN = "AIZYNTH_TOKEN" 13 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/enums/logging_mode_enum.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass(frozen=True) 5 | class LoggingModeEnum: 6 | LOCAL = "local" 7 | REMOTE = "remote" 8 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/enums/rocs_input_file_types_enum.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass(frozen=True) 5 | class ROCSInputFileTypesEnum: 6 | SHAPE_QUERY = "shape_query" 7 | SDF_QUERY = "sdf" 8 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/enums/rocs_similarity_measures_enum.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass(frozen=True) 5 | class ROCSSimilarityMeasuresEnum(): 6 | TANIMOTO = "Tanimoto" 7 | REF_TVERSKY = "RefTversky" 8 | FIT_TVERSKY = "FitTversky" 9 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/enums/rocs_specific_parameters_enum.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass(frozen=True) 5 | class ROCSSpecificParametersEnum(): 6 | ROCS_INPUT = "rocs_input" 7 | INPUT_TYPE = "input_type" 8 | SHAPE_WEIGHT = "shape_weight" 9 | COLOR_WEIGHT = "color_weight" 10 | SIM_MEASURE = "similarity_measure" 11 | MAX_CPUS = "max_num_cpus" 12 | CUSTOM_CFF = "custom_cff" 13 | SAVE_ROCS_OVERLAYS = "save_rocs_overlays" 14 | ROCS_OVERLAYS_DIR = "rocs_overlays_dir" 15 | ROCS_OVERLAYS_PREFIX = "rocs_overlays_prefix" 16 | ENUM_STEREO = "enumerate_stereo" 17 | MAX_STEREO = "max_stereocenters" 18 | NEGATIVE_VOLUME = "negative_volume" 19 | PROTEIN_NEG_VOL_FILE = "protein_neg_vol_file" 20 | LIGAND_NEG_VOL_FILE = "ligand_neg_vol_file" 21 | MAX_CONFS = "max_confs" 22 | EWINDOW = "ewindow" 23 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/enums/scoring_function_component_enum.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass(frozen=True) 5 | class ScoringFunctionComponentNameEnum: 6 | # Custom add 7 | RUSH = "RuSH" 8 | 9 | 10 | PARALLEL_ROCS_SIMILARITY = "parallel_rocs_similarity" 11 | SELECTIVITY = "selectivity" 12 | PREDICTIVE_PROPERTY = "predictive_property" 13 | CHEMPROP = "chemprop" 14 | ROCS_SIMILARITY = "rocs_similarity" 15 | MATCHING_SUBSTRUCTURE = "matching_substructure" 16 | TANIMOTO_SIMILARITY = "tanimoto_similarity" 17 | JACCARD_DISTANCE = "jaccard_distance" 18 | CUSTOM_ALERTS = "custom_alerts" 19 | QED_SCORE = "qed_score" 20 | MOLECULAR_WEIGHT = "molecular_weight" 21 | NUM_ROTATABLE_BONDS = "num_rotatable_bonds" 22 | NUM_HBD_LIPINSKI = "num_hbd_lipinski" 23 | NUM_HBA_LIPINSKI = "num_hba_lipinski" 24 | NUM_RINGS = "num_rings" 25 | NUM_AROMATIC_RINGS = "num_aromatic_rings" 26 | NUM_ALIPHATIC_RINGS = "num_aliphatic_rings" 27 | TPSA = "tpsa" 28 | SLOGP = "slogp" 29 | GRAPH_LENGTH = "graph_length" 30 | NUMBER_OF_STEREO_CENTERS = "number_of_stereo_centers" 31 | TOTAL_SCORE = "total_score" # there is no actual component corresponding to this type 32 | REACTION_FILTERS = "reaction_filters" 33 | 34 | # Link invent specific 35 | LINKER_EFFECTIVE_LENGTH = "linker_effective_length" 36 | LINKER_GRAPH_LENGTH = "linker_graph_length" 37 | LINKER_LENGTH_RATIO = "linker_length_ratio" 38 | LINKER_NUM_RINGS = "linker_num_rings" 39 | LINKER_NUM_ALIPHATIC_RINGS = "linker_num_aliphatic_rings" 40 | LINKER_NUM_AROMATIC_RINGS = "linker_num_aromatic_rings" 41 | LINKER_NUM_SP_ATOMS = "linker_num_sp_atoms" 42 | LINKER_NUM_SP2_ATOMS = "linker_num_sp2_atoms" 43 | LINKER_NUM_SP3_ATOMS = "linker_num_sp3_atoms" 44 | LINKER_NUM_HBA = "linker_num_hba" 45 | LINKER_NUM_HBD = "linker_num_hbd" 46 | LINKER_MOL_WEIGHT = "linker_mol_weight" 47 | LINKER_RATIO_ROTATABLE_BONDS = "linker_ratio_rotatable_bonds" 48 | 49 | #NOTE: components below are AZ specific 50 | SA_SCORE = "sa_score" 51 | AZDOCK = "azdock" 52 | DOCKSTREAM = "dockstream" 53 | ICOLOS = "icolos" 54 | AZ_LOGD74_PIP = "azlogd74" 55 | CACO2_INTR_PIP = "caco2-intrinsic-papp" 56 | CACO2_EFFLUX_PIP = "caco2-efflux" 57 | HH_CLINT_PIP = "hh-clint" 58 | HLM_CLINT_PIP = "hlm-clint" 59 | RH_CLINT_PIP = "rh-clint" 60 | SOLUBILITY_DD_PIP = "solubility-dd" 61 | HERG_PIP = "herg" 62 | KPUU_PIP = "rat-kpuu-brain" 63 | RAT_PK_PIP = "rat-pk" 64 | CLAB_TOP_20 = "clab_top_20" 65 | RA_SCORE = "rascore" 66 | AIZYNTH = "aizynth" 67 | QPTUNA_PIP_MODEL = "optuna-multi" 68 | THP1_CYTOTOXICITY = "thp1-class" 69 | GENERAL_REST = "general_rest" 70 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/enums/scoring_function_enum.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass(frozen=True) 5 | class ScoringFunctionNameEnum: 6 | CUSTOM_PRODUCT = "custom_product" 7 | CUSTOM_SUM = "custom_sum" 8 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/enums/transformation_parameters_enum.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass(frozen=True) 5 | class TransformationParameters: 6 | TRANSFORMATION_TYPE = "transformation_type" 7 | LOW = "low" 8 | HIGH = "high" 9 | K = "k" 10 | COEF_DIV = "coef_div" 11 | COEF_SI = "coef_si" 12 | COEF_SE = "coef_se" 13 | TRUNCATE_LEFT = "truncate_left" 14 | TRUNCATE_RIGHT = "truncate_right" 15 | INTERPOLATION_MAP = "interpolation_map" 16 | 17 | 18 | TransformationParametersEnum = TransformationParameters() 19 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/enums/transformation_type_enum.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass(frozen=True) 5 | class TransformationTypeEnum: 6 | DOUBLE_SIGMOID = "double_sigmoid" 7 | SIGMOID = "sigmoid" 8 | REVERSE_SIGMOID = "reverse_sigmoid" 9 | RIGHT_STEP = "right_step" 10 | LEFT_STEP = "left_step" 11 | STEP = "step" 12 | CUSTOM_INTERPOLATION = "custom_interpolation" 13 | NO_TRANSFORMATION = "no_transformation" 14 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/function/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.function.custom_product import CustomProduct 2 | from reinvent_scoring.scoring.function.custom_sum import CustomSum 3 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/function/custom_product.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import List 3 | 4 | import numpy as np 5 | 6 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 7 | from reinvent_scoring.scoring.function.base_scoring_function import BaseScoringFunction 8 | from reinvent_scoring.scoring.score_summary import ComponentSummary 9 | 10 | 11 | class CustomProduct(BaseScoringFunction): 12 | 13 | def __init__(self, parameters: List[ComponentParameters], parallel=False): 14 | super().__init__(parameters, parallel) 15 | 16 | def _calculate_pow(self, values, weight): 17 | y = [math.pow(value, weight) for value in values] 18 | return np.array(y, dtype=np.float32) 19 | 20 | def _get_all_weights(self, summaries: List[ComponentSummary]) -> int: 21 | all_weights = [] 22 | 23 | for summary in summaries: 24 | if not self._component_is_penalty(summary): 25 | all_weights.append(summary.parameters.weight) 26 | return sum(all_weights) 27 | 28 | def _compute_non_penalty_components(self, summaries: List[ComponentSummary], smiles: List[str]): 29 | product = np.full(len(smiles), 1, dtype=np.float32) 30 | all_weights = self._get_all_weights(summaries) 31 | 32 | for summary in summaries: 33 | if not self._component_is_penalty(summary): 34 | comp_pow = self._calculate_pow(summary.total_score, summary.parameters.weight / all_weights) 35 | product = product * comp_pow 36 | 37 | return product 38 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/function/custom_sum.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | 5 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 6 | from reinvent_scoring.scoring.function.base_scoring_function import BaseScoringFunction 7 | from reinvent_scoring.scoring.score_summary import ComponentSummary 8 | 9 | 10 | class CustomSum(BaseScoringFunction): 11 | 12 | def __init__(self, parameters: List[ComponentParameters], parallel=False): 13 | super().__init__(parameters, parallel) 14 | 15 | def _compute_non_penalty_components(self, summaries: List[ComponentSummary], smiles: List[str]): 16 | total_sum = np.full(len(smiles), 0, dtype=np.float32) 17 | all_weights = 0. 18 | 19 | for summary in summaries: 20 | if not self._component_is_penalty(summary): 21 | total_sum = total_sum + summary.total_score * summary.parameters.weight 22 | all_weights += summary.parameters.weight 23 | 24 | if all_weights == 0: 25 | """There are no non-penalty components and return array of ones. 26 | This is needed so that it can work in cases where only penalty components are used""" 27 | return np.full(len(smiles), 1, dtype=np.float32) 28 | 29 | return total_sum / all_weights 30 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/predictive_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/molML/RUSH/8cbfac695e001bfc2a05b4aaaa58e11f367b79c0/RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/predictive_model/__init__.py -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/predictive_model/base_model_container.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from typing import Dict, List 4 | 5 | 6 | class BaseModelContainer(ABC): 7 | 8 | @abstractmethod 9 | def predict(self, molecules: List, parameters: Dict): 10 | raise NotImplementedError("'predict' method is not implemented !") 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/predictive_model/model_container.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any 2 | 3 | from reinvent_scoring.scoring.enums.container_type_enum import ContainerType 4 | from reinvent_scoring.scoring.enums.component_specific_parameters_enum import ComponentSpecificParametersEnum 5 | 6 | from reinvent_scoring.scoring.predictive_model.base_model_container import BaseModelContainer 7 | from reinvent_scoring.scoring.predictive_model.optuna_container import OptunaModelContainer 8 | from reinvent_scoring.scoring.predictive_model.scikit_model_container import ScikitModelContainer 9 | 10 | 11 | class ModelContainer: 12 | 13 | def __new__(cls, activity_model: Any, specific_parameters: Dict) -> BaseModelContainer: 14 | _component_specific_parameters = ComponentSpecificParametersEnum() 15 | _container_type = ContainerType() 16 | container_type = specific_parameters.get(_component_specific_parameters.CONTAINER_TYPE, 17 | _container_type.SCIKIT_CONTAINER) 18 | if container_type == _container_type.SCIKIT_CONTAINER: 19 | container_instance = ScikitModelContainer(activity_model, 20 | specific_parameters[_component_specific_parameters.SCIKIT], 21 | specific_parameters) 22 | else: 23 | # TODO: possibly a good spot for error try/catching 24 | container_instance = OptunaModelContainer(activity_model) 25 | 26 | return container_instance 27 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/predictive_model/optuna_container.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Any 2 | 3 | import numpy as np 4 | from reinvent_chemistry.conversions import Conversions 5 | 6 | from reinvent_scoring.scoring.predictive_model.base_model_container import BaseModelContainer 7 | 8 | 9 | class OptunaModelContainer(BaseModelContainer): 10 | def __init__(self, activity_model): 11 | """ 12 | :type activity_model: scikit-learn object 13 | """ 14 | self._activity_model = activity_model 15 | self._conversions = Conversions() 16 | 17 | def predict(self, molecules: List[Any], parameters: Dict) -> np.array: 18 | """ 19 | Takes a list of smiles as input an predicts activities. 20 | :param molecules: 21 | :param parameters: 22 | :return: 23 | """ 24 | 25 | if len(molecules) == 0: 26 | return np.empty([]) 27 | 28 | smiles = [self._conversions.mol_to_smiles(mol) for mol in molecules] 29 | activity = self._activity_model.predict_from_smiles(smiles) 30 | 31 | return activity 32 | 33 | 34 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/predictive_model/scikit_model_container.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | 3 | import numpy as np 4 | 5 | from reinvent_chemistry.descriptors import Descriptors 6 | from reinvent_scoring.scoring.predictive_model.base_model_container import BaseModelContainer 7 | 8 | 9 | class ScikitModelContainer(BaseModelContainer): 10 | def __init__(self, activity_model, model_type: str, specific_parameters: {}): 11 | """ 12 | :type activity_model: scikit-learn type of model object 13 | :type model_type: can be "classification" or "regression" 14 | """ 15 | self._activity_model = activity_model 16 | self._model_type = model_type 17 | self._molecules_to_descriptors = self._load_descriptor(specific_parameters) 18 | 19 | def predict(self, molecules: List, parameters: Dict) -> np.array: 20 | """ 21 | Takes as input RDKit molecules and uses a pickled scikit-learn model to predict activities. 22 | :param molecules: This is a list of rdkit.Chem.Mol objects 23 | :param parameters: Those are descriptor-specific parameters. 24 | :return: numpy.array with activity predictions 25 | """ 26 | return self.predict_from_mols(molecules, parameters) 27 | 28 | def predict_from_mols(self, molecules: List, parameters: dict): 29 | if len(molecules) == 0: 30 | return np.empty([]) 31 | fps = self._molecules_to_descriptors(molecules, parameters) 32 | activity = self.predict_from_fingerprints(fps) 33 | return activity 34 | 35 | def predict_from_fingerprints(self, fps): 36 | if self._model_type == "regression": 37 | activity = self._activity_model.predict(fps) 38 | else: 39 | predictions = self._activity_model.predict_proba(fps) 40 | activity = predictions[:, 1] 41 | 42 | return activity 43 | 44 | def _load_descriptor(self, parameters: {}): 45 | descriptors = Descriptors() 46 | descriptor = descriptors.load_descriptor(parameters) 47 | return descriptor -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/__init__.py: -------------------------------------------------------------------------------- 1 | # from reinvent_scoring.scoring.score_components.aizynth import BuildingBlockAvailabilityComponent 2 | from reinvent_scoring.scoring.score_components.base_score_component import * 3 | from reinvent_scoring.scoring.score_components.synthetic_accessibility import * 4 | from reinvent_scoring.scoring.score_components.physchem import * 5 | from reinvent_scoring.scoring.score_components.pip import * 6 | from reinvent_scoring.scoring.score_components.structural import * 7 | from reinvent_scoring.scoring.score_components.physchem import * 8 | from reinvent_scoring.scoring.score_components.standard import * 9 | from reinvent_scoring.scoring.score_components.rocs import * 10 | from reinvent_scoring.scoring.score_components.link_invent import * 11 | from reinvent_scoring.scoring.score_components.scaffold_hopping import * 12 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/aizynth/__init__.py: -------------------------------------------------------------------------------- 1 | # from reinvent_scoring.scoring.score_components.aizynth.building_block_availability_component import \ 2 | # BuildingBlockAvailabilityComponent 3 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/aizynth/building_block_availability_component.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | 5 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 6 | from reinvent_scoring.scoring.score_components.base_score_component import BaseScoreComponent 7 | from reinvent_scoring.scoring.score_summary import ComponentSummary 8 | from aizynthfinder.aizynthfinder import AiZynthExpander 9 | 10 | 11 | class BuildingBlockAvailabilityComponent(BaseScoreComponent): 12 | """AiZynth one-step synthesis building block availability. 13 | 14 | Score is the ratio between 15 | the number of reactants in stock 16 | and the number of all reactants. 17 | 18 | If a molecule can be synthesized using different reactions, 19 | with different sets of reactants, 20 | the maximum ratio is used. 21 | 22 | This scoring component uses AiZynthFinder Expansion interface: 23 | https://molecularai.github.io/aizynthfinder/python_interface.html#expansion-interface 24 | """ 25 | 26 | def __init__(self, parameters: ComponentParameters): 27 | super().__init__(parameters) 28 | 29 | configfile = self.parameters.specific_parameters[ 30 | self.component_specific_parameters.AIZYNTH_CONFIG_FILE_PATH 31 | ] 32 | self._expander = self._set_up_expander(configfile) 33 | 34 | def calculate_score(self, molecules: List, step=-1) -> ComponentSummary: 35 | valid_smiles = self._chemistry.mols_to_smiles(molecules) 36 | score = self._score_smiles(valid_smiles) # This is the main calculation. 37 | score_summary = ComponentSummary(total_score=score, parameters=self.parameters, raw_score=score) 38 | 39 | return score_summary 40 | 41 | def _score_one_smi(self, smi: str) -> float: 42 | stock = self._expander.config.stock 43 | 44 | reactions = self._expander.do_expansion(smi) 45 | 46 | ratios = [] # Collect all, in case there are alternative reactions. 47 | for reaction_tuple in reactions: 48 | precursors = reaction_tuple[0].reactants[0] 49 | if len(precursors) == 0: 50 | # Corner case - no reactants. 51 | # It implies that the template was not applicable on the query molecule, 52 | # or there was an error, and it was not possible to produce reactants. 53 | ratios.append(0) # Assign the lowest possible score. 54 | else: 55 | in_stock = [mol in stock for mol in precursors] 56 | ratio_in_stock = sum(in_stock) / len(in_stock) 57 | ratios.append(ratio_in_stock) 58 | 59 | if len(ratios) > 0: 60 | max_ratio = max(ratios) # Take the best. 61 | else: 62 | max_ratio = 0 # No building blocks, return the lowest possible score. 63 | 64 | return max_ratio 65 | 66 | def _score_smiles(self, smiles: List[str]) -> np.ndarray: 67 | results = [self._score_one_smi(smi) for smi in smiles] 68 | return np.array(results) 69 | 70 | def _set_up_expander(self, configfile: str) -> AiZynthExpander: 71 | expander = AiZynthExpander(configfile=configfile) 72 | expander.expansion_policy.select_first() 73 | expander.filter_policy.select_first() 74 | expander.config.stock.select_first() 75 | return expander 76 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/base_score_component.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List 3 | 4 | from reinvent_chemistry.conversions import Conversions 5 | 6 | from reinvent_scoring.scoring.enums import TransformationTypeEnum, TransformationParametersEnum 7 | from reinvent_scoring.scoring.score_transformations import TransformationFactory 8 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 9 | from reinvent_scoring.scoring.score_summary import ComponentSummary 10 | from reinvent_scoring.scoring.enums import ComponentSpecificParametersEnum 11 | 12 | 13 | class BaseScoreComponent(ABC): 14 | 15 | def __init__(self, parameters: ComponentParameters): 16 | self.component_specific_parameters = ComponentSpecificParametersEnum() 17 | self.parameters = parameters 18 | self._chemistry = Conversions() 19 | self._transformation_function = self._assign_transformation(self.parameters.specific_parameters) 20 | 21 | @abstractmethod 22 | def calculate_score(self, molecules: List, step=-1) -> ComponentSummary: 23 | raise NotImplementedError("calculate_score method is not implemented") 24 | 25 | def calculate_score_for_step(self, molecules: List, step=-1) -> ComponentSummary: 26 | return self.calculate_score(molecules) 27 | 28 | def _assign_transformation(self, specific_parameters: {}): 29 | transformation_type = TransformationTypeEnum() 30 | factory = TransformationFactory() 31 | if not self.parameters.specific_parameters: #FIXME: this is a hack 32 | self.parameters.specific_parameters = {} 33 | transform_params = self.parameters.specific_parameters.get( 34 | self.component_specific_parameters.TRANSFORMATION, {}) 35 | if transform_params: 36 | transform_function = factory.get_transformation_function(transform_params) 37 | else: 38 | self.parameters.specific_parameters[ 39 | self.component_specific_parameters.TRANSFORMATION] = { 40 | TransformationParametersEnum.TRANSFORMATION_TYPE: transformation_type.NO_TRANSFORMATION 41 | } 42 | transform_function = factory.no_transformation 43 | return transform_function -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/console_invoked/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.score_components.console_invoked.icolos import Icolos 2 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/console_invoked/base_console_invoked_component.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from typing import List 3 | 4 | import numpy as np 5 | 6 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 7 | from reinvent_scoring.scoring.score_components import BaseScoreComponent 8 | from reinvent_scoring.scoring.score_summary import ComponentSummary 9 | 10 | 11 | class BaseConsoleInvokedComponent(BaseScoreComponent): 12 | def __init__(self, parameters: ComponentParameters): 13 | super().__init__(parameters) 14 | 15 | def calculate_score_for_step(self, molecules: List, step=-1) -> ComponentSummary: 16 | return self.calculate_score(molecules, step) 17 | 18 | def calculate_score(self, molecules: List, step=-1) -> ComponentSummary: 19 | valid_smiles = self._chemistry.mols_to_smiles(molecules) 20 | score, raw_score = self._calculate_score(valid_smiles, step) 21 | score_summary = ComponentSummary(total_score=score, parameters=self.parameters, raw_score=raw_score) 22 | return score_summary 23 | 24 | def _get_step_string(self, step) -> str: 25 | if step == -1: 26 | return "\"\"" 27 | return "".join(["\"e", str(step).zfill(4), "_\""]) 28 | 29 | @abstractmethod 30 | def _calculate_score(self, smiles: List[str], step) -> np.array: 31 | raise NotImplementedError("_calculate_score method is not implemented") 32 | 33 | @abstractmethod 34 | def _create_command(self, step, input_json_path: str, output_json_path: str): 35 | raise NotImplementedError("_create_command method is not implemented") 36 | 37 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/link_invent/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.score_components.link_invent.linker_effective_length import LinkerEffectiveLength 2 | from reinvent_scoring.scoring.score_components.link_invent.linker_graph_length import LinkerGraphLength 3 | from reinvent_scoring.scoring.score_components.link_invent.linker_length_ratio import LinkerLengthRatio 4 | from reinvent_scoring.scoring.score_components.link_invent.linker_num_rings import LinkerNumRings 5 | from reinvent_scoring.scoring.score_components.link_invent.linker_num_aliphatic_rings import LinkerNumAliphaticRings 6 | from reinvent_scoring.scoring.score_components.link_invent.linker_num_aromatic_rings import LinkerNumAromaticRings 7 | from reinvent_scoring.scoring.score_components.link_invent.linker_num_sp_atoms import LinkerNumSPAtoms 8 | from reinvent_scoring.scoring.score_components.link_invent.linker_num_sp2_atoms import LinkerNumSP2Atoms 9 | from reinvent_scoring.scoring.score_components.link_invent.linker_num_sp3_atoms import LinkerNumSP3Atoms 10 | from reinvent_scoring.scoring.score_components.link_invent.linker_num_hba import LinkerNumHBA 11 | from reinvent_scoring.scoring.score_components.link_invent.linker_num_hbd import LinkerNumHBD 12 | from reinvent_scoring.scoring.score_components.link_invent.linker_mol_weight import LinkerMolWeight 13 | from reinvent_scoring.scoring.score_components.link_invent.linker_ratio_rotatable_bonds import LinkerRatioRotatableBonds 14 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/link_invent/base_link_invent_component.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from typing import List 3 | 4 | import numpy as np 5 | from reinvent_chemistry.link_invent.linker_descriptors import LinkerDescriptors 6 | 7 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 8 | from reinvent_scoring.scoring.score_components import BaseScoreComponent 9 | from reinvent_scoring.scoring.score_summary import ComponentSummary 10 | 11 | 12 | class BaseLinkInventComponent(BaseScoreComponent): 13 | def __init__(self, parameters: ComponentParameters): 14 | super().__init__(parameters) 15 | self._linker_descriptor = LinkerDescriptors() 16 | 17 | def calculate_score(self, labeled_molecules: List, step=-1) -> ComponentSummary: 18 | score, raw_score = self._calculate_score(labeled_molecules) 19 | score_summary = ComponentSummary(total_score=score, parameters=self.parameters, raw_score=raw_score) 20 | return score_summary 21 | 22 | def _calculate_score(self, query_labeled_mols) -> np.array: 23 | scores = [] 24 | for mol in query_labeled_mols: 25 | try: 26 | score = self._calculate_linker_property(mol) 27 | except ValueError: 28 | score = 0.0 29 | scores.append(score) 30 | transform_params = self.parameters.specific_parameters.get( 31 | self.component_specific_parameters.TRANSFORMATION, {} 32 | ) 33 | transformed_scores = self._transformation_function(scores, transform_params) 34 | return np.array(transformed_scores, dtype=np.float32), np.array(scores, dtype=np.float32) 35 | 36 | @abstractmethod 37 | def _calculate_linker_property(self, labeled_mol): 38 | raise NotImplementedError("_calculate_linker_property method is not implemented") -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/link_invent/linker_effective_length.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.link_invent.base_link_invent_component import BaseLinkInventComponent 3 | 4 | 5 | class LinkerEffectiveLength(BaseLinkInventComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_linker_property(self, labeled_mol): 10 | return self._linker_descriptor.effective_length(labeled_mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/link_invent/linker_graph_length.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.link_invent.base_link_invent_component import BaseLinkInventComponent 3 | 4 | 5 | class LinkerGraphLength(BaseLinkInventComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_linker_property(self, labeled_mol): 10 | return self._linker_descriptor.max_graph_length(labeled_mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/link_invent/linker_length_ratio.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.link_invent.base_link_invent_component import BaseLinkInventComponent 3 | 4 | 5 | class LinkerLengthRatio(BaseLinkInventComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_linker_property(self, labeled_mol): 10 | return self._linker_descriptor.length_ratio(labeled_mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/link_invent/linker_mol_weight.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.link_invent.base_link_invent_component import BaseLinkInventComponent 3 | 4 | 5 | class LinkerMolWeight(BaseLinkInventComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_linker_property(self, labeled_mol): 10 | return self._linker_descriptor.mol_weight(labeled_mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/link_invent/linker_num_aliphatic_rings.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.link_invent.base_link_invent_component import BaseLinkInventComponent 3 | 4 | 5 | class LinkerNumAliphaticRings(BaseLinkInventComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_linker_property(self, labeled_mol): 10 | return self._linker_descriptor.num_aliphatic_rings(labeled_mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/link_invent/linker_num_aromatic_rings.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.link_invent.base_link_invent_component import BaseLinkInventComponent 3 | 4 | 5 | class LinkerNumAromaticRings(BaseLinkInventComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_linker_property(self, labeled_mol): 10 | return self._linker_descriptor.num_aromatic_rings(labeled_mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/link_invent/linker_num_hba.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.link_invent.base_link_invent_component import BaseLinkInventComponent 3 | 4 | 5 | class LinkerNumHBA(BaseLinkInventComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_linker_property(self, labeled_mol): 10 | return self._linker_descriptor.num_hba(labeled_mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/link_invent/linker_num_hbd.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.link_invent.base_link_invent_component import BaseLinkInventComponent 3 | 4 | 5 | class LinkerNumHBD(BaseLinkInventComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_linker_property(self, labeled_mol): 10 | return self._linker_descriptor.num_hbd(labeled_mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/link_invent/linker_num_rings.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.link_invent.base_link_invent_component import BaseLinkInventComponent 3 | 4 | 5 | class LinkerNumRings(BaseLinkInventComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_linker_property(self, labeled_mol): 10 | return self._linker_descriptor.num_rings(labeled_mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/link_invent/linker_num_sp2_atoms.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.link_invent.base_link_invent_component import BaseLinkInventComponent 3 | 4 | 5 | class LinkerNumSP2Atoms(BaseLinkInventComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_linker_property(self, labeled_mol): 10 | return self._linker_descriptor.num_sp2_atoms(labeled_mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/link_invent/linker_num_sp3_atoms.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.link_invent.base_link_invent_component import BaseLinkInventComponent 3 | 4 | 5 | class LinkerNumSP3Atoms(BaseLinkInventComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_linker_property(self, labeled_mol): 10 | return self._linker_descriptor.num_sp3_atoms(labeled_mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/link_invent/linker_num_sp_atoms.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.link_invent.base_link_invent_component import BaseLinkInventComponent 3 | 4 | 5 | class LinkerNumSPAtoms(BaseLinkInventComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_linker_property(self, labeled_mol): 10 | return self._linker_descriptor.num_sp_atoms(labeled_mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/link_invent/linker_ratio_rotatable_bonds.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.link_invent.base_link_invent_component import BaseLinkInventComponent 3 | 4 | 5 | class LinkerRatioRotatableBonds(BaseLinkInventComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_linker_property(self, labeled_mol): 10 | return self._linker_descriptor.ratio_rotatable_bonds(labeled_mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/physchem/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.score_components.physchem.num_aliphatic_rings import NumAliphaticRings 2 | from reinvent_scoring.scoring.score_components.physchem.num_aromatic_rings import NumAromaticRings 3 | from reinvent_scoring.scoring.score_components.physchem.number_of_stereo_centers import NumberOfStereoCenters 4 | from reinvent_scoring.scoring.score_components.physchem.tpsa import PSA 5 | from reinvent_scoring.scoring.score_components.physchem.hba import HBA_Lipinski 6 | from reinvent_scoring.scoring.score_components.physchem.hbd import HBD_Lipinski 7 | from reinvent_scoring.scoring.score_components.physchem.mol_weight import MolWeight 8 | from reinvent_scoring.scoring.score_components.physchem.num_rings import NumRings 9 | from reinvent_scoring.scoring.score_components.physchem.rot_bonds import RotatableBonds 10 | from reinvent_scoring.scoring.score_components.physchem.slogp import SlogP 11 | from reinvent_scoring.scoring.score_components.physchem.graph_length import GraphLength 12 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/physchem/base_physchem_component.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from typing import List 3 | 4 | import numpy as np 5 | from reinvent_chemistry.phys_chem_descriptors import PhysChemDescriptors 6 | 7 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 8 | from reinvent_scoring.scoring.score_components import BaseScoreComponent 9 | from reinvent_scoring.scoring.score_summary import ComponentSummary 10 | 11 | 12 | class BasePhysChemComponent(BaseScoreComponent): 13 | def __init__(self, parameters: ComponentParameters): 14 | super().__init__(parameters) 15 | self._phys_chem_descriptors = PhysChemDescriptors() 16 | 17 | def calculate_score(self, molecules: List, step=-1) -> ComponentSummary: 18 | score, raw_score = self._calculate_score(molecules) 19 | score_summary = ComponentSummary(total_score=score, parameters=self.parameters, raw_score=raw_score) 20 | return score_summary 21 | 22 | def _calculate_score(self, query_mols) -> np.array: 23 | scores = [] 24 | for mol in query_mols: 25 | try: 26 | score = self._calculate_phys_chem_property(mol) 27 | except ValueError: 28 | score = 0.0 29 | scores.append(score) 30 | transform_params = self.parameters.specific_parameters.get( 31 | self.component_specific_parameters.TRANSFORMATION, {} 32 | ) 33 | transformed_scores = self._transformation_function(scores, transform_params) 34 | return np.array(transformed_scores, dtype=np.float32), np.array(scores, dtype=np.float32) 35 | 36 | @abstractmethod 37 | def _calculate_phys_chem_property(self, mol): 38 | raise NotImplementedError("_calculate_phys_chem_property method is not implemented") 39 | 40 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/physchem/graph_length.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.physchem.base_physchem_component import BasePhysChemComponent 3 | 4 | 5 | class GraphLength(BasePhysChemComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_phys_chem_property(self, mol): 10 | return self._phys_chem_descriptors.maximum_graph_length(mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/physchem/hba.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.physchem.base_physchem_component import BasePhysChemComponent 3 | 4 | 5 | class HBA_Lipinski(BasePhysChemComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_phys_chem_property(self, mol): 10 | return self._phys_chem_descriptors.hba_libinski(mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/physchem/hbd.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.physchem.base_physchem_component import BasePhysChemComponent 3 | 4 | 5 | class HBD_Lipinski(BasePhysChemComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_phys_chem_property(self, mol): 10 | return self._phys_chem_descriptors.hbd_libinski(mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/physchem/mol_weight.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.physchem.base_physchem_component import BasePhysChemComponent 3 | 4 | 5 | class MolWeight(BasePhysChemComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_phys_chem_property(self, mol): 10 | return self._phys_chem_descriptors.mol_weight(mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/physchem/num_aliphatic_rings.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.physchem.base_physchem_component import BasePhysChemComponent 3 | 4 | 5 | class NumAliphaticRings(BasePhysChemComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_phys_chem_property(self, mol): 10 | return self._phys_chem_descriptors.number_of_aliphatic_rings(mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/physchem/num_aromatic_rings.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.physchem.base_physchem_component import BasePhysChemComponent 3 | 4 | 5 | class NumAromaticRings(BasePhysChemComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_phys_chem_property(self, mol): 10 | return self._phys_chem_descriptors.number_of_aromatic_rings(mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/physchem/num_rings.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.physchem.base_physchem_component import BasePhysChemComponent 3 | 4 | 5 | class NumRings(BasePhysChemComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_phys_chem_property(self, mol): 10 | return self._phys_chem_descriptors.number_of_rings(mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/physchem/number_of_stereo_centers.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.physchem.base_physchem_component import BasePhysChemComponent 3 | 4 | 5 | class NumberOfStereoCenters(BasePhysChemComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_phys_chem_property(self, mol): 10 | return self._phys_chem_descriptors.number_of_stereo_centers(mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/physchem/rot_bonds.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.physchem.base_physchem_component import BasePhysChemComponent 3 | 4 | 5 | class RotatableBonds(BasePhysChemComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_phys_chem_property(self, mol): 10 | return self._phys_chem_descriptors.number_of_rotatable_bonds(mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/physchem/slogp.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.physchem.base_physchem_component import BasePhysChemComponent 3 | 4 | 5 | class SlogP(BasePhysChemComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_phys_chem_property(self, mol): 10 | return self._phys_chem_descriptors.slog_p(mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/physchem/tpsa.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.physchem.base_physchem_component import BasePhysChemComponent 3 | 4 | 5 | class PSA(BasePhysChemComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _calculate_phys_chem_property(self, mol): 10 | return self._phys_chem_descriptors.tpsa(mol) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/pip/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.score_components.pip.ratpk_pip import RatPKPiP 2 | from reinvent_scoring.scoring.score_components.pip.pip_log_prediction_component import PiPLogPredictionComponent 3 | from reinvent_scoring.scoring.score_components.pip.pip_prediction_component import PiPPredictionComponent 4 | from reinvent_scoring.scoring.score_components.pip.qptuna_pip_model_component import QptunaPiPModelComponent 5 | from reinvent_scoring.scoring.score_components.pip.string_pip_prediction_component import StringPiPPredictionComponent 6 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/pip/base_pip_model_batching_component.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from abc import abstractmethod 4 | from typing import List 5 | 6 | import numpy as np 7 | 8 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 9 | from reinvent_scoring.scoring.enums import EnvironmentalVariablesEnum 10 | from reinvent_scoring.scoring.score_components.pip.base_rest_component import BaseRESTComponent 11 | from reinvent_scoring.configs.config import reinvent_scoring_config 12 | import requests 13 | import time 14 | 15 | class BasePiPModelBatchingComponent(BaseRESTComponent): 16 | def __init__(self, parameters: ComponentParameters): 17 | self._environment_keys = EnvironmentalVariablesEnum() 18 | super().__init__(parameters) 19 | self._get_header = self._create_get_header() 20 | 21 | def _execute_request(self, request_url, data, header) -> dict: 22 | request = requests.post(request_url, json=data, headers=header) 23 | if request.status_code != 202: 24 | raise ValueError( 25 | f" Status: {request.status_code} Reason: ({request.reason})." 26 | f"Response content: {request.content}\n" 27 | f"Response content: {request.text}" 28 | ) 29 | response = self._get_results(request) 30 | 31 | return response.json() 32 | 33 | def _parse_response(self, response_json: dict, data_size: int) -> np.array: 34 | compounds = response_json['jsonData']['data'] 35 | results_raw = np.empty(data_size, dtype=np.float32) 36 | results_raw[:] = np.nan 37 | 38 | try: 39 | for compound in compounds: 40 | try: 41 | index = int(compound["id"]) 42 | results_raw[index] = self._parse_single_compound(compound) 43 | 44 | except (ValueError, TypeError, KeyError): 45 | pass # If parsing failed, keep value NaN for this compound and continue. 46 | finally: 47 | return results_raw 48 | 49 | @abstractmethod 50 | def _parse_single_compound(self, compound): 51 | raise NotImplementedError("_parse_compound method is not implemented") 52 | 53 | def _format_data(self, smiles: List[str]) -> dict: 54 | molecules = [{"molData": smi, "id": f"{i}"} for i, smi in enumerate(smiles)] 55 | data = { 56 | "jsonData": { 57 | "data": molecules, 58 | "metadata": { 59 | "molFormat": 60 | "smiles" 61 | }, 62 | "parameters": { 63 | 64 | } 65 | } 66 | } 67 | return data 68 | 69 | def _create_url(self, async_path: str) -> str: 70 | pip_url = self._get_enviornment_variable(self._environment_keys.PIP_URL) 71 | request_url = pip_url.format(async_path) 72 | return request_url 73 | 74 | def _create_get_url(self, component_name) -> str: 75 | pip_url = self._get_enviornment_variable(self._environment_keys.PIP_GET_RESULTS) 76 | request_url = pip_url.format(component_name) 77 | return request_url 78 | 79 | def _create_header(self) -> dict: 80 | pip_key = self._get_enviornment_variable(self._environment_keys.PIP_KEY) 81 | 82 | header = { 83 | 'Content-Type': 'application/vnd.az.batch.v1+json', 'x-api-key': pip_key, 84 | 'Accept': 'application/vnd.az.resultset.v1+json', 85 | 'Prefer': 'respond-async' 86 | } 87 | return header 88 | 89 | def _create_get_header(self) -> dict: 90 | pip_key = self._get_enviornment_variable(self._environment_keys.PIP_KEY) 91 | header = {'Content-Type': 'application/json', 'x-api-key': pip_key} 92 | return header 93 | 94 | def _get_enviornment_variable(self, variable: str) -> str: 95 | try: 96 | return os.environ[variable] 97 | except KeyError: 98 | return self._retrieve_pip_key_from_config(variable) 99 | 100 | def _retrieve_pip_key_from_config(self, variable: str) -> str: 101 | try: 102 | environmental_variables = reinvent_scoring_config[self._environment_keys.ENVIRONMENTAL_VARIABLES] 103 | return environmental_variables[variable] 104 | except KeyError as ex: 105 | raise KeyError(f"Key {variable} not found in reinvent scoring config") 106 | 107 | def _get_results(self, response): 108 | async_location = response.headers.get('Location', None) 109 | url = self._create_get_url(async_location) 110 | 111 | time_threshold = 300 112 | time_delay = 1 113 | 114 | while time_delay > 0 : 115 | 116 | try: 117 | response = requests.get(url=url, headers=self._get_header) 118 | time_delay = int(response.headers.get('retry-after', 0)) 119 | time_threshold-=time_delay 120 | time.sleep(time_delay) 121 | if time_threshold <= 0: 122 | raise TimeoutError(f'Terminated since retrieving results took too long') 123 | except: 124 | raise ValueError( 125 | f" Status: {response.status_code} Reason: ({response.reason})." 126 | f"Response content: {response.content}\n" 127 | f"Response content: {response.text}" 128 | ) 129 | result = response 130 | return result -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/pip/base_pip_model_component.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from abc import abstractmethod 4 | from typing import List 5 | 6 | import numpy as np 7 | import requests 8 | 9 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 10 | from reinvent_scoring.scoring.enums import EnvironmentalVariablesEnum 11 | from reinvent_scoring.scoring.score_components.pip.base_rest_component import BaseRESTComponent 12 | from reinvent_scoring.configs.config import reinvent_scoring_config 13 | 14 | 15 | class BasePiPModelComponent(BaseRESTComponent): 16 | def __init__(self, parameters: ComponentParameters): 17 | self._environment_keys = EnvironmentalVariablesEnum() 18 | super().__init__(parameters) 19 | 20 | def _execute_request(self, request_url, data, header) -> dict: 21 | request = requests.post(request_url, json=data, headers=header) 22 | if request.status_code != 200: 23 | raise ValueError( 24 | f" Status: {request.status_code} Reason: ({request.reason})." 25 | f"Response content: {request.content}\n" 26 | f"Response content: {request.text}" 27 | ) 28 | return request.json() 29 | 30 | def _parse_response(self, response_json: dict, data_size: int) -> np.array: 31 | compounds = response_json['jsonData']['data'] 32 | results_raw = np.empty(data_size, dtype=np.float32) 33 | results_raw[:] = np.nan 34 | 35 | try: 36 | for compound in compounds: 37 | try: 38 | index = int(compound["id"]) 39 | results_raw[index] = self._parse_single_compound(compound) 40 | 41 | except (ValueError, TypeError, KeyError): 42 | pass # If parsing failed, keep value NaN for this compound and continue. 43 | finally: 44 | return results_raw 45 | 46 | @abstractmethod 47 | def _parse_single_compound(self, compound): 48 | raise NotImplementedError("_parse_compound method is not implemented") 49 | 50 | def _format_data(self, smiles: List[str]) -> dict: 51 | molecules = [{"molData": smi, "id": f"{i}"} for i, smi in enumerate(smiles)] 52 | data = { 53 | "jsonData": { 54 | "data": molecules, 55 | "metadata": { 56 | "molFormat": 57 | "smiles" 58 | }, 59 | "parameters": { 60 | 61 | } 62 | } 63 | } 64 | return data 65 | 66 | def _create_url(self, async_path: str) -> str: 67 | pip_url = self._get_enviornment_variable(self._environment_keys.PIP_URL) 68 | request_url = pip_url.format(async_path) 69 | return request_url 70 | 71 | def _create_header(self) -> dict: 72 | pip_key = self._get_enviornment_variable(self._environment_keys.PIP_KEY) 73 | 74 | header = { 75 | 'Content-Type': 'application/vnd.az.batch.v1+json', 'x-api-key': pip_key, 76 | 'Accept': 'application/vnd.az.resultset.v1+json' 77 | } 78 | return header 79 | 80 | def _get_enviornment_variable(self, variable: str) -> str: 81 | try: 82 | return os.environ[variable] 83 | except KeyError: 84 | return self._retrieve_pip_key_from_config(variable) 85 | 86 | def _retrieve_pip_key_from_config(self, variable: str) -> str: 87 | try: 88 | environmental_variables = reinvent_scoring_config[self._environment_keys.ENVIRONMENTAL_VARIABLES] 89 | return environmental_variables[variable] 90 | except KeyError as ex: 91 | raise KeyError(f"Key {variable} not found in reinvent scoring config") 92 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/pip/base_rest_component.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from typing import List 3 | 4 | import numpy as np 5 | 6 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 7 | from reinvent_scoring.scoring.score_components import BaseScoreComponent 8 | from reinvent_scoring.scoring.score_summary import ComponentSummary 9 | 10 | 11 | class BaseRESTComponent(BaseScoreComponent): 12 | def __init__(self, parameters: ComponentParameters): 13 | super().__init__(parameters) 14 | self._request_url = self._create_url(self.parameters.component_type) 15 | self._request_header = self._create_header() 16 | 17 | def calculate_score(self, molecules: List, step=-1) -> ComponentSummary: 18 | valid_smiles = self._chemistry.mols_to_smiles(molecules) 19 | score, raw_score = self._score_smiles(valid_smiles) 20 | score_summary = ComponentSummary(total_score=score, parameters=self.parameters, raw_score=raw_score) 21 | 22 | return score_summary 23 | 24 | def _score_smiles(self, smiles: List[str]) -> np.array: 25 | response = self._post_request(self._request_url, smiles, self._request_header) 26 | results_raw = self._parse_response(response, len(smiles)) 27 | results = self._apply_score_transformation(results_raw) 28 | 29 | return results, results_raw 30 | 31 | def _post_request(self, url, smiles, header): 32 | data = self._format_data(smiles) 33 | result = self._execute_request(url, data, header) 34 | 35 | return result 36 | 37 | @abstractmethod 38 | def _format_data(self, smiles: List[str]) -> dict: 39 | raise NotImplementedError("_format_data method is not implemented") 40 | 41 | @abstractmethod 42 | def _execute_request(self, request_url, data, header) -> dict: 43 | raise NotImplementedError("_execute_request method is not implemented") 44 | 45 | @abstractmethod 46 | def _parse_response(self, response_json: dict, data_size: int) -> np.array: 47 | raise NotImplementedError("_parse_response method is not implemented") 48 | 49 | def _apply_score_transformation(self, results_raw: np.array) -> np.array: 50 | """Returns np.array with non-NaN elements transformed by transformation function, and all NaN elements 51 | transformed into 0. """ 52 | valid_mask = ~np.isnan(results_raw) 53 | results_raw_valid = results_raw[valid_mask] 54 | transform_params = self.parameters.specific_parameters.get( 55 | self.component_specific_parameters.TRANSFORMATION, {} 56 | ) 57 | results_transformed = self._transformation_function(results_raw_valid, transform_params) 58 | results = np.zeros(len(results_raw), dtype=np.float32) 59 | results[valid_mask] = results_transformed 60 | 61 | return results 62 | 63 | @abstractmethod 64 | def _create_url(self, component_name) -> str: 65 | raise NotImplementedError("_create_url method is not implemented") 66 | 67 | @abstractmethod 68 | def _create_header(self) -> dict: 69 | raise NotImplementedError("_create_header method is not implemented") 70 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/pip/pip_log_prediction_component.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.pip.base_pip_model_component import BasePiPModelComponent 3 | 4 | 5 | class PiPLogPredictionComponent(BasePiPModelComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _parse_single_compound(self, compound): 10 | return float(compound["log_prediction"]) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/pip/pip_prediction_component.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.pip.base_pip_model_component import BasePiPModelComponent 3 | 4 | 5 | class PiPPredictionComponent(BasePiPModelComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | 9 | def _parse_single_compound(self, compound): 10 | return float(compound["prediction"]) 11 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/pip/qptuna_pip_model_component.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 3 | from reinvent_scoring.scoring.score_components.pip.pip_prediction_component import PiPPredictionComponent 4 | 5 | 6 | class QptunaPiPModelComponent(PiPPredictionComponent): 7 | 8 | def _format_data(self, smiles: List[str]) -> dict: 9 | molecules = [{"molData": smi, "id": f"{i}"} for i, smi in enumerate(smiles)] 10 | data = { 11 | "jsonData": { 12 | "data": molecules, 13 | "metadata": { 14 | "molFormat": 15 | "smiles" 16 | }, 17 | "parameters": { 18 | "artifact": self.parameters.specific_parameters.get(self.component_specific_parameters.ARTIFACT) 19 | } 20 | } 21 | } 22 | 23 | return data 24 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/pip/ratpk_pip.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring.score_components.pip.base_pip_model_component import BasePiPModelComponent 3 | 4 | 5 | class RatPKPiP(BasePiPModelComponent): 6 | def __init__(self, parameters: ComponentParameters): 7 | super().__init__(parameters) 8 | self._rat_pk_property = self.parameters.specific_parameters[self.component_specific_parameters.RAT_PK_PROPERTY] 9 | 10 | 11 | def _parse_single_compound(self, compound): 12 | return float(compound[self._rat_pk_property]) 13 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/pip/string_pip_prediction_component.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import List 3 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 4 | from reinvent_scoring.scoring.score_components.pip.base_pip_model_component import BasePiPModelComponent 5 | 6 | 7 | class StringPiPPredictionComponent(BasePiPModelComponent): 8 | """ 9 | This class is to be used with pip models that return non-numeric (string mostly) 10 | values as predictions - casts raw value to float immediately. 11 | """ 12 | 13 | def _parse_single_compound(self, compound): 14 | mapping = self.parameters.specific_parameters[self.component_specific_parameters.VALUE_MAPPING] 15 | value = compound["prediction"] 16 | 17 | return mapping.get(value, 0.0) 18 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/rest/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.score_components.rest.general_rest_component import GeneralRESTComponent -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/rest/general_rest_component.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | 4 | import numpy as np 5 | 6 | from typing import List 7 | 8 | from reinvent_scoring.scoring.score_components.pip.base_rest_component import BaseRESTComponent 9 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 10 | 11 | 12 | class GeneralRESTComponent(BaseRESTComponent): 13 | def __init__(self, parameters: ComponentParameters): 14 | 15 | self._server_url = parameters.specific_parameters["server_url"] 16 | self._server_port = parameters.specific_parameters["server_port"] 17 | self._server_endpoint = parameters.specific_parameters["server_endpoint"] 18 | 19 | self._predictor_id = parameters.specific_parameters["predictor_id"] 20 | self._predictor_version = parameters.specific_parameters["predictor_version"] 21 | 22 | self._request_header = parameters.specific_parameters.get("header", self._default_header) 23 | 24 | super().__init__(parameters) 25 | 26 | @property 27 | def _default_header(self): 28 | return { 29 | 'accept': 'application/json', 30 | 'Content-Type': 'application/json', 31 | } 32 | 33 | def _execute_request(self, request_url, data, header) -> dict: 34 | params = self._create_params() 35 | request = requests.post(request_url, json=data, headers=header, params=params) 36 | if request.status_code != 200: 37 | raise ValueError( 38 | f" Status: {request.status_code} Reason: ({request.reason})." 39 | f"Response content: {request.content}\n" 40 | f"Response content: {request.text}" 41 | ) 42 | return request.json() 43 | 44 | def _parse_response(self, response_json: dict, data_size: int) -> np.array: 45 | compounds = response_json['output']["successes_list"] 46 | results_raw = np.empty(data_size, dtype=np.float32) 47 | results_raw[:] = np.nan 48 | 49 | try: 50 | for compound in compounds: 51 | try: 52 | index = int(compound["query_id"]) 53 | results_raw[index] = self._parse_single_compound(compound) 54 | 55 | except (ValueError, TypeError, KeyError): 56 | pass # If parsing failed, keep value NaN for this compound and continue. 57 | finally: 58 | return results_raw 59 | 60 | def _parse_single_compound(self, compound): 61 | return float(compound["output_value"]) 62 | 63 | def _format_data(self, smiles: List[str]) -> dict: 64 | json_data = [{'input_string': smi, 65 | 'query_id': str(i)} for i, smi in enumerate(smiles)] 66 | return json_data 67 | 68 | def _create_url(self, component_name) -> str: 69 | url = f"{self._server_url}:{self._server_port}/{self._server_endpoint}" 70 | return url 71 | 72 | 73 | def _create_header(self) -> dict: 74 | return self._request_header 75 | 76 | def _create_params(self) -> dict: 77 | return { 78 | 'predictor_id': self._predictor_id, 79 | 'predictor_version': self._predictor_version, 80 | 'inp_fmt': 'smiles', 81 | } -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/rocs/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.score_components.rocs.rocs_similarity import RocsSimilarity 2 | from reinvent_scoring.scoring.score_components.rocs.parallel_rocs_similarity import ParallelRocsSimilarity 3 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/rocs/base_rocs_component.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import re 4 | from abc import abstractmethod 5 | from typing import List 6 | 7 | import numpy as np 8 | 9 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 10 | from reinvent_scoring.scoring.score_components import BaseScoreComponent 11 | from reinvent_scoring.scoring.score_summary import ComponentSummary 12 | 13 | 14 | class BaseROCSComponent(BaseScoreComponent): 15 | def __init__(self, parameters: ComponentParameters): 16 | super().__init__(parameters) 17 | 18 | def calculate_score_for_step(self, molecules: List, step=-1) -> ComponentSummary: 19 | return self.calculate_score(molecules, step) 20 | 21 | def calculate_score(self, molecules: List, step=-1) -> ComponentSummary: 22 | # NOTE: valid_idxs are determined with RDKit not with Open Eye 23 | valid_smiles = self._chemistry.mols_to_smiles(molecules) 24 | score = self._calculate_omega_score(valid_smiles, step) 25 | transform_params = self.parameters.specific_parameters.get( 26 | self.component_specific_parameters.TRANSFORMATION, {} 27 | ) 28 | score = self._transformation_function(score, transform_params) 29 | score_summary = ComponentSummary(total_score=score, parameters=self.parameters) 30 | return score_summary 31 | 32 | @abstractmethod 33 | def _calculate_omega_score(self, smiles, step) -> np.array: 34 | raise NotImplementedError("_calculate_omega_score method is not implemented") 35 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/rocs/default_values.py: -------------------------------------------------------------------------------- 1 | ROCS_DEFAULT_VALUES = dict( 2 | MAX_STEREO=0, 3 | MAX_CPUS=4, 4 | SAVE_ROCS_OVERLAYS=False, 5 | ENUM_STEREO=False, 6 | MAX_CONFS=200, 7 | EWINDOW=10, 8 | ROCS_OVERLAYS_DIR=None, 9 | ROCS_OVERLAYS_PREFIX="", 10 | PROTEIN_NEG_VOL_FILE=None, 11 | LIGAND_NEG_VOL_FILE=None, 12 | ROCS_INPUT=None, 13 | INPUT_TYPE=None, 14 | CUSTOM_CFF="", 15 | SHAPE_WEIGHT=None, 16 | COLOR_WEIGHT=None, 17 | NEGATIVE_VOLUME=False, 18 | ) -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/rocs/oefuncs.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | from openeye import oeshape 4 | 5 | SIM_FUNC = namedtuple('SIM_FUNC', ['shape', 'color', 'predicate']) 6 | 7 | def get_similarity_name_set(parameters, param_names_enum, sim_measure_enum): 8 | similarity_collection_name = parameters.specific_parameters.get(param_names_enum.SIM_MEASURE, 9 | sim_measure_enum.TANIMOTO) 10 | return similarity_collection(similarity_collection_name, sim_measure_enum) 11 | 12 | 13 | def similarity_collection(sim_measure_type, sim_measure_enum): 14 | sim_def_dict = { 15 | sim_measure_enum.TANIMOTO: SIM_FUNC('GetTanimoto', 'GetColorTanimoto', 'OEHighestTanimotoCombo'), 16 | sim_measure_enum.REF_TVERSKY: SIM_FUNC('GetRefTversky', 'GetRefColorTversky', 17 | 'OEHighestRefTverskyCombo'), 18 | sim_measure_enum.FIT_TVERSKY: SIM_FUNC('GetFitTversky', 'GetFitColorTversky', 19 | 'OEHighestFitTverskyCombo'), 20 | } 21 | return sim_def_dict.get(sim_measure_type) 22 | 23 | def init_cff(prep, cff_path): 24 | if len(cff_path) == 0: 25 | cff_path = oeshape.OEColorFFType_ImplicitMillsDean 26 | cff = oeshape.OEColorForceField() 27 | if cff.Init(cff_path): 28 | prep.SetColorForceField(cff) 29 | else: 30 | raise ValueError("Custom color force field initialisation failed") -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/rocs/oehelper.py: -------------------------------------------------------------------------------- 1 | from openeye import oechem, oeshape, oeomega 2 | from rdkit import Chem 3 | 4 | def get_omega_confs(imol, omega, enum_stereo, max_stereo): 5 | stereo = False 6 | no_stereo = False 7 | if enum_stereo: 8 | enantiomers = list(oeomega.OEFlipper(imol.GetActive(), max_stereo, False, True)) 9 | for k, enantiomer in enumerate(enantiomers): 10 | # Any other simpler way to combine and add all conformers to imol have failed !! 11 | # Failure = Creates conformers with wrong indices and wrong connections 12 | enantiomer = oechem.OEMol(enantiomer) 13 | ret_code = omega.Build(enantiomer) 14 | if ret_code == oeomega.OEOmegaReturnCode_Success: 15 | if k == 0: 16 | imol = oechem.OEMol(enantiomer.SCMol()) 17 | imol.DeleteConfs() 18 | stereo = True 19 | for x in enantiomer.GetConfs(): 20 | imol.NewConf(x) 21 | else: 22 | no_stereo = omega(imol) 23 | return no_stereo or stereo, imol 24 | 25 | def get_score(mol, score, sim_func_name_set, shape_weight, color_weight, neg_prot_file, neg_lig_file): 26 | neg_score = 0.0 27 | if len(neg_prot_file) > 0: 28 | neg_score = neg_vol_score(mol, neg_prot_file, neg_lig_file) 29 | 30 | best_score_shape = getattr(score, sim_func_name_set.shape)() 31 | best_score_shape = correct_shape_score(best_score_shape) 32 | best_score_shape = penalise_neg_volume(best_score_shape, neg_score) 33 | 34 | best_score_color = getattr(score, sim_func_name_set.color)() 35 | best_score_color = correct_color_score(best_score_color) 36 | best_score = ((shape_weight * best_score_shape) + ( 37 | color_weight * best_score_color)) / (shape_weight + color_weight) 38 | return best_score, best_score_shape, best_score_color, neg_score 39 | 40 | def neg_vol_score(mol, neg_prot_file, neg_lig_file): 41 | # 'mol' is the active conformation as obtained from overlay with the main query 42 | # It is important to use the protein ligand and do the overlay with mol again (query is the protein) 43 | # otherwise the score calculation is incorrect 44 | qfs = oechem.oemolistream() 45 | if not qfs.open(neg_lig_file): 46 | raise ValueError(f'Ligand file {neg_lig_file} could not be opened') 47 | qmol = oechem.OEMol() 48 | oechem.OEReadMolecule(qfs, qmol) 49 | 50 | efs = oechem.oemolistream() 51 | if not efs.open(neg_prot_file): 52 | raise ValueError(f'Protein file {neg_prot_file} could not be opened') 53 | emol = oechem.OEMol() 54 | oechem.OEReadMolecule(efs, emol) 55 | 56 | res = oeshape.OEROCSResult() 57 | evol = oeshape.OEExactShapeFunc() 58 | evol.SetupRef(emol) 59 | 60 | oeshape.OEROCSOverlay(res, qmol, mol) 61 | outmol = res.GetOverlayConf() 62 | 63 | # calculate overlap with protein 64 | eres = oeshape.OEOverlapResults() 65 | evol.Overlap(outmol, eres) 66 | 67 | frac = eres.GetOverlap() / eres.GetFitSelfOverlap() 68 | return frac 69 | 70 | def prep_sdf_file(outmol, score, smile, batch_id, best_score_shape, best_score_color, neg_score): 71 | mol = Chem.MolFromSmiles(smile) 72 | smile = Chem.MolToSmiles(mol, canonical=True) if mol else "" 73 | score.Transform(outmol) 74 | oechem.OESetSDData(outmol, "Batch ID", batch_id) 75 | oechem.OESetSDData(outmol, "Smiles", smile) 76 | oechem.OESetSDData(outmol, "Shape", "%-.3f" % best_score_shape) 77 | oechem.OESetSDData(outmol, "Color", "%-.3f" % best_score_color) 78 | oechem.OESetSDData(outmol, "Negative vol penalty", "%-.3f" % neg_score) 79 | 80 | def correct_color_score(score): 81 | if score >= 1.0: 82 | score = 0.90 # or alternative 83 | return score 84 | 85 | def correct_shape_score(score): 86 | if score >= 1.0: 87 | score = 0.95 # or alternative 88 | return score 89 | 90 | def penalise_neg_volume(score, neg_score): 91 | # Generally neg_score is not very high rouhly representing the % of molecule clashing with the protein 92 | # Assuming that even a low value is unacceptable, a different penalty function should be considered 93 | return score - neg_score -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/scaffold_hopping/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.score_components.scaffold_hopping.RuSH import RuSHScore 2 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/standard/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.score_components.standard.selectivity_component import SelectivityComponent 2 | from reinvent_scoring.scoring.score_components.standard.custom_alerts_component import CustomAlerts 3 | from reinvent_scoring.scoring.score_components.standard.jaccard_distance import JaccardDistance 4 | from reinvent_scoring.scoring.score_components.standard.matching_substructure import MatchingSubstructure 5 | from reinvent_scoring.scoring.score_components.standard.predictive_property_component import PredictivePropertyComponent 6 | from reinvent_scoring.scoring.score_components.standard.qed_score import QedScore 7 | from reinvent_scoring.scoring.score_components.standard.tanimoto_similarity import TanimotoSimilarity 8 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/standard/custom_alerts_component.py: -------------------------------------------------------------------------------- 1 | from rdkit import Chem 2 | from typing import List 3 | 4 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 5 | from reinvent_scoring.scoring.score_components import BaseScoreComponent 6 | from reinvent_scoring.scoring.score_summary import ComponentSummary 7 | 8 | 9 | class CustomAlerts(BaseScoreComponent): 10 | def __init__(self, parameters: ComponentParameters): 11 | super().__init__(parameters) 12 | self.custom_alerts = self.parameters.specific_parameters.get(self.component_specific_parameters.SMILES, ['']) 13 | 14 | def calculate_score(self, molecules: List) -> ComponentSummary: 15 | score = self._substructure_match(molecules, self.custom_alerts) 16 | score_summary = ComponentSummary(total_score=score, parameters=self.parameters) 17 | return score_summary 18 | 19 | def _substructure_match(self, query_mols, list_of_SMARTS): 20 | match = [any([mol.HasSubstructMatch(Chem.MolFromSmarts(subst)) for subst in list_of_SMARTS 21 | if Chem.MolFromSmarts(subst)]) for mol in query_mols] 22 | reverse = [1 - m for m in match] 23 | return reverse 24 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/standard/jaccard_distance.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from reinvent_chemistry.similarity import Similarity 4 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 5 | from reinvent_scoring.scoring.score_components import BaseScoreComponent 6 | from reinvent_scoring.scoring.score_summary import ComponentSummary 7 | 8 | 9 | class JaccardDistance(BaseScoreComponent): 10 | def __init__(self, parameters: ComponentParameters): 11 | super().__init__(parameters) 12 | self._similarity = Similarity() 13 | self._radius = self.parameters.specific_parameters.get("radius", 3) 14 | self._use_counts = self.parameters.specific_parameters.get("use_counts", True) 15 | self._use_features = self.parameters.specific_parameters.get("use_features", True) 16 | smiles = self.parameters.specific_parameters.get(self.component_specific_parameters.SMILES, []) 17 | self._ref_fingerprints = self._chemistry.smiles_to_fingerprints(smiles, radius=self._radius, 18 | use_counts=self._use_counts, 19 | use_features=self._use_features) 20 | 21 | def calculate_score(self, molecules: List) -> ComponentSummary: 22 | query_fps = self._chemistry.mols_to_fingerprints(molecules, self._radius, self._use_counts, self._use_features) 23 | score = self._similarity.calculate_jaccard_distance(query_fps, self._ref_fingerprints) 24 | score_summary = ComponentSummary(total_score=score, parameters=self.parameters) 25 | return score_summary 26 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/standard/matching_substructure.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | from rdkit import Chem 5 | 6 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 7 | from reinvent_scoring.scoring.score_components import BaseScoreComponent 8 | from reinvent_scoring.scoring.score_summary import ComponentSummary 9 | 10 | 11 | class MatchingSubstructure(BaseScoreComponent): 12 | def __init__(self, parameters: ComponentParameters): 13 | super().__init__(parameters) 14 | self.target_smarts = self.parameters.specific_parameters.get(self.component_specific_parameters.SMILES, []) 15 | self._validate_inputs(self.target_smarts) 16 | 17 | def calculate_score(self, molecules: List) -> ComponentSummary: 18 | score = self._substructure_match(molecules, self.target_smarts) 19 | score_summary = ComponentSummary(total_score=score, parameters=self.parameters) 20 | return score_summary 21 | 22 | def _substructure_match(self, query_mols, list_of_SMARTS): 23 | if len(list_of_SMARTS) == 0: 24 | return np.ones(len(query_mols), dtype=np.float32) 25 | 26 | match = [any([mol.HasSubstructMatch(Chem.MolFromSmarts(subst)) for subst in list_of_SMARTS 27 | if Chem.MolFromSmarts(subst)]) for mol in query_mols] 28 | return 0.5 * (1 + np.array(match)) 29 | 30 | def _validate_inputs(self, smiles): 31 | for smart in smiles: 32 | if Chem.MolFromSmarts(smart) is None: 33 | raise IOError(f"Invalid smarts pattern provided as a matching substructure: {smart}") 34 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/standard/predictive_property_component.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from typing import List 4 | 5 | from reinvent_scoring.scoring.predictive_model.model_container import ModelContainer 6 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 7 | from reinvent_scoring.scoring.score_components import BaseScoreComponent 8 | from reinvent_scoring.scoring.score_summary import ComponentSummary 9 | from reinvent_scoring.scoring.score_transformations import TransformationFactory 10 | from reinvent_scoring.scoring.enums import TransformationTypeEnum, TransformationParametersEnum 11 | 12 | 13 | class PredictivePropertyComponent(BaseScoreComponent): 14 | def __init__(self, parameters: ComponentParameters): 15 | super().__init__(parameters) 16 | self.activity_model = self._load_model(parameters) 17 | self._transformation_function = self._assign_transformation(parameters.specific_parameters) 18 | 19 | def calculate_score(self, molecules: List, step=-1) -> ComponentSummary: 20 | score, raw_score = self._predict_and_transform(molecules) 21 | score_summary = ComponentSummary(total_score=score, parameters=self.parameters, raw_score=raw_score) 22 | return score_summary 23 | 24 | def _predict_and_transform(self, molecules: List): 25 | score = self.activity_model.predict(molecules, self.parameters.specific_parameters) 26 | transformed_score = self._apply_transformation(score, self.parameters.specific_parameters) 27 | return transformed_score, score 28 | 29 | def _load_model(self, parameters: ComponentParameters): 30 | try: 31 | activity_model = self._load_container(parameters) 32 | except: 33 | model_path = self.parameters.specific_parameters.get(self.component_specific_parameters.MODEL_PATH, "") 34 | raise Exception(f"The loaded file `{model_path}` isn't a valid scikit-learn model") 35 | return activity_model 36 | 37 | def _load_container(self, parameters: ComponentParameters): 38 | model_path = self.parameters.specific_parameters.get(self.component_specific_parameters.MODEL_PATH, "") 39 | with open(model_path, "rb") as f: 40 | scikit_model = pickle.load(f) 41 | packaged_model = ModelContainer(scikit_model, parameters.specific_parameters) 42 | return packaged_model 43 | 44 | def _apply_transformation(self, predicted_activity, parameters: dict): 45 | transform_params = parameters.get(self.component_specific_parameters.TRANSFORMATION) 46 | if transform_params: 47 | activity = self._transformation_function(predicted_activity, transform_params) 48 | else: 49 | activity = predicted_activity 50 | return activity 51 | 52 | def _assign_transformation(self, specific_parameters: dict): 53 | transformation_type = TransformationTypeEnum() 54 | transform_params = specific_parameters.get(self.component_specific_parameters.TRANSFORMATION) 55 | if not transform_params: 56 | specific_parameters[self.component_specific_parameters.TRANSFORMATION] = { 57 | TransformationParametersEnum.TRANSFORMATION_TYPE: transformation_type.NO_TRANSFORMATION 58 | } 59 | factory = TransformationFactory() 60 | transform_function = factory.get_transformation_function(transform_params) 61 | return transform_function 62 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/standard/qed_score.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from rdkit.Chem.Descriptors import qed 3 | from typing import List 4 | 5 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 6 | from reinvent_scoring.scoring.score_components import BaseScoreComponent 7 | from reinvent_scoring.scoring.score_summary import ComponentSummary 8 | 9 | 10 | class QedScore(BaseScoreComponent): 11 | def __init__(self, parameters: ComponentParameters): 12 | super().__init__(parameters) 13 | 14 | def calculate_score(self, molecules: List) -> ComponentSummary: 15 | score = self._calculate_qed(molecules) 16 | score_summary = ComponentSummary(total_score=score, parameters=self.parameters) 17 | return score_summary 18 | 19 | def _calculate_qed(self, query_mols) -> np.array: 20 | qed_scores = [] 21 | for mol in query_mols: 22 | try: 23 | qed_score = qed(mol) 24 | except ValueError: 25 | qed_score = 0.0 26 | qed_scores.append(qed_score) 27 | return np.array(qed_scores, dtype=np.float32) -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/standard/tanimoto_similarity.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from reinvent_chemistry.similarity import Similarity 4 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 5 | from reinvent_scoring.scoring.score_components import BaseScoreComponent 6 | from reinvent_scoring.scoring.score_summary import ComponentSummary 7 | 8 | 9 | class TanimotoSimilarity(BaseScoreComponent): 10 | def __init__(self, parameters: ComponentParameters): 11 | super().__init__(parameters) 12 | self._similarity = Similarity() 13 | self._radius = self.parameters.specific_parameters.get("radius", 3) 14 | self._use_counts = self.parameters.specific_parameters.get("use_counts", True) 15 | self._use_features = self.parameters.specific_parameters.get("use_features", True) 16 | smiles = self.parameters.specific_parameters.get(self.component_specific_parameters.SMILES, []) 17 | self._ref_fingerprints = self._chemistry.smiles_to_fingerprints(smiles, radius=self._radius, 18 | use_counts=self._use_counts, 19 | use_features=self._use_features) 20 | 21 | def calculate_score(self, molecules: List) -> ComponentSummary: 22 | query_fps = self._chemistry.mols_to_fingerprints(molecules, self._radius, self._use_counts, self._use_features) 23 | score = self._similarity.calculate_tanimoto(query_fps, self._ref_fingerprints) 24 | score_summary = ComponentSummary(total_score=score, parameters=self.parameters) 25 | return score_summary 26 | 27 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/structural/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.score_components.structural.azdock import AZdock 2 | from reinvent_scoring.scoring.score_components.structural.dockstream import DockStream 3 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/structural/azdock.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import List 3 | 4 | from reinvent_scoring.scoring.utils import _is_development_environment 5 | 6 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 7 | from reinvent_scoring.scoring.score_components.structural.base_structural_component import BaseStructuralComponent 8 | 9 | 10 | class AZdock(BaseStructuralComponent): 11 | def __init__(self, parameters: ComponentParameters): 12 | super().__init__(parameters) 13 | self._configuration_path = self.parameters.specific_parameters[self.component_specific_parameters.AZDOCK_CONFPATH] 14 | self._docker_script_path = self.parameters.specific_parameters[self.component_specific_parameters.AZDOCK_DOCKERSCRIPTPATH] 15 | self._environment_path = self.parameters.specific_parameters[self.component_specific_parameters.AZDOCK_ENVPATH] 16 | 17 | def _add_debug_mode_if_selected(self, command): 18 | if self.parameters.specific_parameters.get(self.component_specific_parameters.AZDOCK_DEBUG, False)\ 19 | or _is_development_environment(): 20 | command = ' '.join([command, "-debug"]) 21 | return command 22 | 23 | def _create_command(self, smiles: List[str], step): 24 | concat_smiles = '"' + ';'.join(smiles) + '"' 25 | command = ' '.join([self._environment_path, 26 | self._docker_script_path, 27 | "-conf", self._configuration_path, 28 | "-output_prefix", self._get_step_string(step), 29 | "-smiles", concat_smiles, 30 | "-print_scores"]) 31 | 32 | # check, if AZdock is to be executed in debug mode, which will cause its loggers to print out much more detailed 33 | # information 34 | command = self._add_debug_mode_if_selected(command) 35 | return command 36 | 37 | def _calculate_score(self, smiles: List[str], step) -> np.array: 38 | # create the external command 39 | command = self._create_command(smiles, step) 40 | 41 | # send the batch smiles and retrieve the result as a list of strings 42 | results = self._send_request_with_stepwize_read(command, len(smiles)) 43 | 44 | # note: some ligands might have failed in AZdock (embedding or docking) although they are valid RDkit molecules 45 | # -> "docker.py" will return "NA"'s for failed molecules, as '0' could be a perfectly normal value; anything 46 | # that cannot be cast to a floating point number will result in '0' 47 | scores = [] 48 | for score in results: 49 | try: 50 | score = float(score) 51 | except ValueError: 52 | score = 0 53 | scores.append(score) 54 | transform_params = self.parameters.specific_parameters.get( 55 | self.component_specific_parameters.TRANSFORMATION, {} 56 | ) 57 | transformed_scores = self._transformation_function(scores, transform_params) 58 | 59 | return np.array(transformed_scores), np.array(scores) 60 | 61 | def _parse_result(self, result): 62 | return str(result).strip() 63 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/structural/base_structural_component.py: -------------------------------------------------------------------------------- 1 | import io 2 | import subprocess 3 | from abc import abstractmethod 4 | from typing import List 5 | 6 | import numpy as np 7 | 8 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 9 | from reinvent_scoring.scoring.score_components import BaseScoreComponent 10 | from reinvent_scoring.scoring.score_summary import ComponentSummary 11 | 12 | 13 | class BaseStructuralComponent(BaseScoreComponent): 14 | def __init__(self, parameters: ComponentParameters): 15 | super().__init__(parameters) 16 | 17 | def calculate_score_for_step(self, molecules: List, step=-1) -> ComponentSummary: 18 | return self.calculate_score(molecules, step) 19 | 20 | def calculate_score(self, molecules: List, step=-1) -> ComponentSummary: 21 | valid_smiles = self._chemistry.mols_to_smiles(molecules) 22 | score, raw_score = self._calculate_score(valid_smiles, step) 23 | score_summary = ComponentSummary(total_score=score, parameters=self.parameters, raw_score=raw_score) 24 | return score_summary 25 | 26 | def _get_step_string(self, step) -> str: 27 | if step == -1: 28 | return "\"\"" 29 | return "".join(["\"e", str(step).zfill(4), "_\""]) 30 | 31 | @abstractmethod 32 | def _calculate_score(self, smiles: List[str], step) -> np.array: 33 | raise NotImplementedError("_calculate_score method is not implemented") 34 | 35 | @abstractmethod 36 | def _create_command(self, input_file, step) -> str: 37 | raise NotImplementedError("_create_command method is not implemented") 38 | 39 | def _send_request_with_stepwize_read(self, command, data_size: int): 40 | with subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, 41 | shell=True) as proc: 42 | wrapped_proc_in = io.TextIOWrapper(proc.stdin, 'utf-8') 43 | wrapped_proc_out = io.TextIOWrapper(proc.stdout, 'utf-8') 44 | result = [self._parse_result(wrapped_proc_out.readline()) for i in range(data_size)] 45 | wrapped_proc_in.close() 46 | wrapped_proc_out.close() 47 | proc.wait() 48 | proc.terminate() 49 | return result 50 | 51 | @abstractmethod 52 | def _parse_result(self, result) -> str: 53 | raise NotImplementedError("_parse_result method is not implemented") 54 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/structural/dockstream.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import List 3 | 4 | from reinvent_scoring.scoring.utils import _is_development_environment 5 | 6 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 7 | from reinvent_scoring.scoring.score_components.structural.base_structural_component import BaseStructuralComponent 8 | 9 | 10 | class DockStream(BaseStructuralComponent): 11 | def __init__(self, parameters: ComponentParameters): 12 | super().__init__(parameters) 13 | self._configuration_path = self.parameters.specific_parameters[self.component_specific_parameters.DOCKSTREAM_CONFPATH] 14 | self._docker_script_path = self.parameters.specific_parameters[self.component_specific_parameters.DOCKSTREAM_DOCKERSCRIPTPATH] 15 | self._environment_path = self.parameters.specific_parameters[self.component_specific_parameters.DOCKSTREAM_ENVPATH] 16 | 17 | def _add_debug_mode_if_selected(self, command): 18 | if self.parameters.specific_parameters.get(self.component_specific_parameters.DOCKSTREAM_DEBUG, False)\ 19 | or _is_development_environment(): 20 | command = ' '.join([command, "-debug"]) 21 | return command 22 | 23 | def _create_command(self, smiles: List[str], step): 24 | concat_smiles = '"' + ';'.join(smiles) + '"' 25 | command = ' '.join([self._environment_path, 26 | self._docker_script_path, 27 | "-conf", self._configuration_path, 28 | "-output_prefix", self._get_step_string(step), 29 | "-smiles", concat_smiles, 30 | "-print_scores"]) 31 | 32 | # check, if DockStream is to be executed in debug mode, which will cause its loggers to print out 33 | # much more detailed information 34 | command = self._add_debug_mode_if_selected(command) 35 | return command 36 | 37 | def _calculate_score(self, smiles: List[str], step) -> np.array: 38 | # create the external command 39 | command = self._create_command(smiles, step) 40 | 41 | # send the batch smiles and retrieve the result as a list of strings 42 | results = self._send_request_with_stepwize_read(command, len(smiles)) 43 | 44 | # note: some ligands might have failed in DockStream (embedding or docking) although they are valid 45 | # RDkit molecules -> "docker.py" will return "NA"'s for failed molecules, as '0' could be a perfectly 46 | # normal value; anything that cannot be cast to a floating point number will result in '0' 47 | scores = [] 48 | for score in results: 49 | try: 50 | score = float(score) 51 | except ValueError: 52 | score = 0 53 | scores.append(score) 54 | transform_params = self.parameters.specific_parameters.get( 55 | self.component_specific_parameters.TRANSFORMATION, {} 56 | ) 57 | transformed_scores = self._transformation_function(scores, transform_params) 58 | 59 | return np.array(transformed_scores), np.array(scores) 60 | 61 | def _parse_result(self, result): 62 | return str(result).strip() 63 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/synthetic_accessibility/__init__.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.score_components.synthetic_accessibility.sas_component import SASComponent 2 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/synthetic_accessibility/fpscores.pkl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/molML/RUSH/8cbfac695e001bfc2a05b4aaaa58e11f367b79c0/RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/synthetic_accessibility/fpscores.pkl.gz -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_components/synthetic_accessibility/sas_component.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from typing import List 3 | 4 | import numpy as np 5 | from rdkit.Chem import Mol 6 | from rdkit.Chem.Descriptors import ExactMolWt 7 | 8 | from reinvent_chemistry import Descriptors 9 | 10 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 11 | from reinvent_scoring.scoring.score_components import BaseScoreComponent 12 | from reinvent_scoring.scoring.score_components.synthetic_accessibility.sascorer import calculateScore 13 | from reinvent_scoring.scoring.score_summary import ComponentSummary 14 | 15 | 16 | class SASComponent(BaseScoreComponent): 17 | def __init__(self, parameters: ComponentParameters): 18 | super().__init__(parameters) 19 | self.activity_model = self._load_model(parameters) 20 | self._descriptors = Descriptors() 21 | self.fp_parameters = dict( 22 | radius=3, 23 | size=4096, # Descriptors class calls this parameter "size", RDKit calls it "nBits". 24 | use_features=False, # RDKit has False as default, Descriptors class has True. 25 | ) 26 | 27 | def calculate_score(self, molecules: List[Mol], step=-1) -> ComponentSummary: 28 | score = self.predict_from_molecules(molecules) 29 | score_summary = ComponentSummary(total_score=score, parameters=self.parameters) 30 | return score_summary 31 | 32 | def predict_from_molecules(self, molecules: List[Mol]) -> np.ndarray: 33 | if len(molecules) == 0: 34 | return np.array([]) 35 | 36 | descriptors = self._calculate_descriptors(molecules) 37 | 38 | # Normally, predict_proba takes a 2d array, one row per observation, 39 | # but a list of 1d arrays works too. 40 | sas_predictions = self.activity_model.predict_proba(descriptors) 41 | 42 | return sas_predictions[:, 1] 43 | 44 | def _load_model(self, parameters: ComponentParameters): 45 | try: 46 | # TODO: in the future should use self.component_specific_parameters.MODEL_PATH 47 | # model_path = self.parameters.specific_parameters.get(self.component_specific_parameters.MODEL_PATH, "") 48 | model_path = self.parameters.specific_parameters.get("saz_model_path", "") 49 | activity_model = self._load_scikit_model(model_path) 50 | except: 51 | # model_path = self.parameters.specific_parameters.get(self.component_specific_parameters.MODEL_PATH, "") 52 | model_path = self.parameters.specific_parameters.get("saz_model_path", "") 53 | raise Exception(f"The loaded file `{model_path}` isn't a valid scikit-learn model") 54 | return activity_model 55 | 56 | def _load_scikit_model(self, model_path: str): 57 | with open(model_path, "rb") as f: 58 | scikit_model = pickle.load(f) 59 | return scikit_model 60 | 61 | def _calculate_descriptors(self, molecules: List[Mol]) -> List[np.ndarray]: 62 | descriptors = [self._sas_descriptor(mol) for mol in molecules] 63 | return descriptors 64 | 65 | def _sas_descriptor(self, mol: Mol) -> np.ndarray: 66 | """Returns SAS descriptor for a molecule, to be used as input to SAS model. 67 | 68 | SAS descriptor consists of three parts: 69 | 1. SA score by Ertl and Schuffenhauer (Novartis), part of RDKit, copied to this repo. 70 | 2. Molecular weight. 71 | 3. Morgan fingerprint, with counts (ECFP6). 72 | 73 | The three parts are concatenated into one 1d numpy array. 74 | """ 75 | 76 | sascore = calculateScore(mol) 77 | molwt = ExactMolWt(mol) 78 | fp = self._fingerprint(mol) 79 | 80 | descriptor = np.concatenate([[sascore], [molwt], fp]) 81 | 82 | return descriptor 83 | 84 | def _fingerprint(self, mol: Mol) -> np.ndarray: 85 | fps = self._descriptors.molecules_to_count_fingerprints([mol], parameters=self.fp_parameters) 86 | return fps[0] 87 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/score_summary.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import numpy as np 4 | from typing import List 5 | 6 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 7 | 8 | 9 | @dataclass 10 | class ComponentSummary: 11 | total_score: np.array 12 | parameters: ComponentParameters 13 | raw_score: np.ndarray = None 14 | 15 | 16 | class FinalSummary: 17 | def __init__(self, total_score: np.array, scored_smiles: List[str], valid_idxs: List[int], 18 | scaffold_log_summary: List[ComponentSummary]): 19 | self.total_score = total_score 20 | self.scored_smiles = scored_smiles 21 | self.valid_idxs = valid_idxs 22 | score = [LoggableComponent(c.parameters.component_type, c.parameters.name, c.total_score) for c in scaffold_log_summary] 23 | raw_score = [LoggableComponent(c.parameters.component_type, f'raw_{c.parameters.name}', c.raw_score) for c in 24 | scaffold_log_summary if c.raw_score is not None] 25 | score.extend(raw_score) 26 | self.scaffold_log: List[ComponentSummary] = scaffold_log_summary 27 | self.profile: List[LoggableComponent] = score 28 | 29 | 30 | 31 | 32 | @dataclass 33 | class LoggableComponent: 34 | component_type: str 35 | name: str 36 | score: np.array 37 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/scoring_function_factory.py: -------------------------------------------------------------------------------- 1 | from reinvent_scoring.scoring.component_parameters import ComponentParameters 2 | from reinvent_scoring.scoring import CustomProduct, CustomSum 3 | from reinvent_scoring.scoring.function.base_scoring_function import BaseScoringFunction 4 | from reinvent_scoring.scoring.scoring_function_parameters import ScoringFunctionParameters 5 | from reinvent_scoring.scoring.enums import ScoringFunctionNameEnum 6 | 7 | 8 | class ScoringFunctionFactory: 9 | 10 | def __new__(cls, sf_parameters: ScoringFunctionParameters) -> BaseScoringFunction: 11 | enum = ScoringFunctionNameEnum() 12 | scoring_function_registry = { 13 | enum.CUSTOM_PRODUCT: CustomProduct, 14 | enum.CUSTOM_SUM: CustomSum 15 | } 16 | return cls.create_scoring_function_instance(sf_parameters, scoring_function_registry) 17 | 18 | @staticmethod 19 | def create_scoring_function_instance(sf_parameters: ScoringFunctionParameters, 20 | scoring_function_registry: dict) -> BaseScoringFunction: 21 | """Returns a scoring function instance""" 22 | scoring_function = scoring_function_registry[sf_parameters.name] 23 | parameters = [ComponentParameters(**p) for p in sf_parameters.parameters] 24 | 25 | return scoring_function(parameters, sf_parameters.parallel) 26 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/scoring_function_parameters.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List 3 | 4 | 5 | @dataclass 6 | class ScoringFunctionParameters: 7 | name: str 8 | parameters: List[dict] 9 | parallel: bool = False -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT3.2/reinvent_scoring/scoring/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | 5 | def _is_development_environment() -> bool: 6 | try: 7 | project_root = os.path.dirname(__file__) 8 | with open(os.path.join(project_root, '../configs/config.json'), 'r') as f: 9 | config = json.load(f) 10 | is_dev = config.get("DEVELOPMENT_ENVIRONMENT", False) 11 | return is_dev 12 | except: 13 | return False 14 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT4/reinvent_plugins/components/comp_RuSHscore.py: -------------------------------------------------------------------------------- 1 | __all__ = ["RushSCORE"] 2 | 3 | from dataclasses import dataclass, asdict, field 4 | from typing import List, Tuple 5 | import logging 6 | 7 | import rdkit 8 | from rdkit import Chem 9 | 10 | from RUSH.scoring_plugins.REINVENT4.reinvent_plugins.decorators import ComponentResults, molcache, add_tag 11 | from RUSH.scoring_plugins.REINVENT4.reinvent_plugins.decorators import BaseParameters 12 | 13 | from RUSH.scripts.RuSH import RuSHScorer 14 | 15 | logger = logging.getLogger("reinvent") 16 | 17 | @add_tag("__parameters") 18 | @dataclass 19 | class Parameters(BaseParameters): 20 | output_dir : List[str] = "~/RUSH" 21 | # the order of these molecules should be consistent. 22 | database_from_smiles : List[bool] = False 23 | reference_smiles : List[List[Tuple[str, Tuple[str], str]]] = field(default_factory=lambda: list(zip('', ('', ''), ''))) 24 | database_path : List[str] = "~/RUSH/data/PDB_structures/pim447.sdf" 25 | 26 | partial_reward : List[float] = 0.3 27 | allowance : List[float] = 0.9 28 | 29 | oeomega_CA : List[str] = 'classic' 30 | oeomega_rms : List[float] = 0.5 31 | n_conformers : List[int] = 32 32 | 33 | max_centers : List[int] = 6 34 | max_molwt : List[int] = 500 35 | max_rotors : List[int] = 10 36 | 37 | roc_maxconfs : List[int] = 100 38 | roc_besthits : List[int] = 500 39 | roc_timeout : List[int] = 1000 40 | score_cutoff : List[float] = 0.8 41 | 42 | mcquery : List[float] = True 43 | nostructs : List[float] = True 44 | 45 | shape_weight : List[float] = 1.0 46 | color_weight : List[float] = 1.0 47 | jacc_weight : List[float] = 1.0 48 | rocs_weight : List[float] = 1.0 49 | score_operator : List[str] = 'mean' 50 | num_cores : List[int] = 10 51 | 52 | @add_tag("__component") 53 | class RuSHScore: 54 | def __init__(self, params: Parameters): 55 | self.endpoints = params.get_endpoints() 56 | 57 | for endpoint in self.endpoints: 58 | endpoint.RuSHScorer = RuSHScorer(**asdict(endpoint)) 59 | 60 | @molcache 61 | def __call__(self, mols: List[Chem.Mol]) -> ComponentResults: 62 | scores = [] 63 | for endpoint in self.endpoints: 64 | endpoint_mols = mols.copy() 65 | endpoint_scores = endpoint.RuSHScorer(endpoint_mols) 66 | 67 | scores.append(endpoint_scores) 68 | 69 | return ComponentResults(scores=scores) 70 | -------------------------------------------------------------------------------- /RUSH/scoring_plugins/REINVENT4/reinvent_plugins/decorators.py: -------------------------------------------------------------------------------- 1 | __all__ = ["molcache", "add_tag", "ComponentResults"] 2 | 3 | from dataclasses import dataclass, asdict, field, replace 4 | from typing import List, Dict, Optional, Callable, Any 5 | from rdkit import Chem 6 | import numpy as np 7 | import logging 8 | 9 | logger = logging.getLogger("reinvent") 10 | cache = {} 11 | 12 | # QoL additions to REINVENT for scoring plugins. 13 | 14 | @dataclass 15 | class BaseParameters: 16 | """ 17 | Base class for REINVENT scoring component Parameters dataclass. 18 | Mostly QoL for dealing with multiple endpoints and default parameters. 19 | 20 | simply inherent and pass default parameters like so: 21 | class Parameters(BaseParameters): 22 | var: List[bool] = True 23 | 24 | use get_endpoints() to unpack into a list of endpoints: 25 | self.endpoints = params.get_endpoints() 26 | for endpoint in self.endpoints: 27 | endpoint.object = Object(**asdict(endpoint)) 28 | 29 | """ 30 | do_post : bool = True 31 | 32 | def __post_init__(self): 33 | if self.do_post: 34 | for name, field in self.__class__.__dataclass_fields__.items(): 35 | value = getattr(self, name) 36 | if not isinstance(value, list): 37 | setattr(self, name, [value]) 38 | 39 | max_len = max([len(getattr(self, name)) for name in self.__dataclass_fields__]) 40 | for name in self.__dataclass_fields__: 41 | current = getattr(self, name) 42 | if len(current) == 1: 43 | setattr(self, name, current * max_len) 44 | 45 | 46 | def get_endpoints(self, ) -> List[Any]: 47 | """ 48 | QoL function to remap Parameters dataclass into a list of Parameters, one per endpoint. 49 | No nested param lists. 50 | """ 51 | params_dict = {k:v for k,v in asdict(self).items() if k != 'do_post'} 52 | num_endpoints = len(next(iter(params_dict.values()))) 53 | 54 | return [ 55 | self.__class__(do_post=False, **{ 56 | key: values[i] if isinstance(values, list) else values 57 | for key, values in params_dict.items() 58 | }) 59 | for i in range(num_endpoints) 60 | ] 61 | 62 | 63 | """ 64 | Copy pasta from https://github.com/MolecularAI/REINVENT4/tree/main/reinvent_plugins 65 | So we don't need to deal with relative import issues if using components outside of the main reinvent loop. 66 | """ 67 | 68 | def molcache(func: Callable): 69 | def wrapper(self, smilies: List[str]): 70 | mols = [] 71 | 72 | for smiles in smilies: 73 | if smiles in cache: 74 | mol = cache[smiles] 75 | else: 76 | mol = Chem.MolFromSmiles(smiles) 77 | cache[smiles] = mol 78 | 79 | if not mol: 80 | logger.warning(f"{__name__}: {smiles} could not be converted") 81 | 82 | mols.append(mol) 83 | 84 | return func(self, mols) 85 | 86 | return wrapper 87 | 88 | 89 | def add_tag(label: str, text: str = "True"): 90 | def wrapper(cls): 91 | setattr(cls, label, text) 92 | return cls 93 | 94 | return wrapper 95 | 96 | 97 | @dataclass 98 | class ComponentResults: 99 | scores: List[np.ndarray] 100 | scores_properties: Optional[List[Dict]] = None 101 | uncertainty: Optional[List[np.ndarray]] = None 102 | uncertainty_type: Optional[str] = None 103 | uncertainty_properties: Optional[List[Dict]] = None 104 | failures_properties: Optional[List[Dict]] = None 105 | metadata: Optional[Dict] = None -------------------------------------------------------------------------------- /RUSH/scripts/calc_SC_RDKit.py: -------------------------------------------------------------------------------- 1 | # taken directly from DeLinker git repo without modification. 2 | 3 | import os 4 | from rdkit import Chem 5 | from rdkit.Chem import AllChem, rdShapeHelpers 6 | from rdkit.Chem.FeatMaps import FeatMaps 7 | from rdkit import RDConfig 8 | 9 | # Set up features to use in FeatureMap 10 | fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') 11 | fdef = AllChem.BuildFeatureFactory(fdefName) 12 | 13 | fmParams = {} 14 | for k in fdef.GetFeatureFamilies(): 15 | fparams = FeatMaps.FeatMapParams() 16 | fmParams[k] = fparams 17 | 18 | keep = ('Donor', 'Acceptor', 'NegIonizable', 'PosIonizable', 19 | 'ZnBinder', 'Aromatic', 'Hydrophobe', 'LumpedHydrophobe') 20 | 21 | 22 | def get_FeatureMapScore(query_mol, ref_mol): 23 | featLists = [] 24 | for m in [query_mol, ref_mol]: 25 | rawFeats = fdef.GetFeaturesForMol(m) 26 | # filter that list down to only include the ones we're intereted in 27 | featLists.append([f for f in rawFeats if f.GetFamily() in keep]) 28 | fms = [FeatMaps.FeatMap(feats=x, weights=[1] * len(x), params=fmParams) for x in featLists] 29 | fms[0].scoreMode = FeatMaps.FeatMapScoreMode.Best 30 | fm_score = fms[0].ScoreFeats(featLists[1]) / min(fms[0].GetNumFeatures(), len(featLists[1])) 31 | 32 | return fm_score 33 | 34 | 35 | def calc_SC_RDKit_score(query_mol, ref_mol): 36 | fm_score = get_FeatureMapScore(query_mol, ref_mol) 37 | 38 | protrude_dist = rdShapeHelpers.ShapeProtrudeDist(query_mol, ref_mol, 39 | allowReordering=False) 40 | SC_RDKit_score = 0.5 * fm_score + 0.5 * (1 - protrude_dist) 41 | 42 | return SC_RDKit_score 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /init_setup.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import pkg_resources 3 | import sys 4 | 5 | def check_and_install_package(): 6 | try: 7 | # Try to get the installed version 8 | pkg_resources.get_distribution('RUSH') 9 | 10 | # Check if it's installed in editable mode by checking the .egg-link file 11 | RUSH_egg_link = None 12 | for path in pkg_resources.working_set.entries: 13 | if 'RUSH.egg-link' in str(path): 14 | RUSH_egg_link = path 15 | break 16 | 17 | if not RUSH_egg_link: 18 | print("RUSH is installed but not in editable mode. Reinstalling...") 19 | subprocess.check_call([sys.executable, '-m', 'pip', 'uninstall', 'RUSH', '-y']) 20 | subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-e', '.']) 21 | print("Reinstalled RUSH in editable mode.") 22 | else: 23 | print("RUSH is already installed in editable mode.") 24 | 25 | except pkg_resources.DistributionNotFound: 26 | print("RUSH not found. Installing...") 27 | subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-e', '.']) 28 | print("Installed RUSH in editable mode.") 29 | 30 | if __name__ == "__main__": 31 | check_and_install_package() -------------------------------------------------------------------------------- /rush.yml: -------------------------------------------------------------------------------- 1 | name: rush 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1=main 6 | - _openmp_mutex=5.1=1_gnu 7 | - bzip2=1.0.8=h5eee18b_6 8 | - ca-certificates=2024.11.26=h06a4308_0 9 | - expat=2.6.4=h6a678d5_0 10 | - ld_impl_linux-64=2.40=h12ee557_0 11 | - libffi=3.4.4=h6a678d5_1 12 | - libgcc-ng=11.2.0=h1234567_1 13 | - libgomp=11.2.0=h1234567_1 14 | - libstdcxx-ng=11.2.0=h1234567_1 15 | - libuuid=1.41.5=h5eee18b_0 16 | - ncurses=6.4=h6a678d5_0 17 | - openssl=3.0.15=h5eee18b_0 18 | - pip=24.2=py312h06a4308_0 19 | - python=3.12.8=h5148396_0 20 | - readline=8.2=h5eee18b_0 21 | - setuptools=75.1.0=py312h06a4308_0 22 | - sqlite=3.45.3=h5eee18b_0 23 | - tk=8.6.14=h39e8969_0 24 | - wheel=0.44.0=py312h06a4308_0 25 | - xz=5.4.6=h5eee18b_1 26 | - zlib=1.2.13=h5eee18b_1 27 | - pip: 28 | - asttokens==3.0.0 29 | - comm==0.2.2 30 | - debugpy==1.8.11 31 | - decorator==5.1.1 32 | - executing==2.1.0 33 | - ipykernel==6.29.5 34 | - ipython==8.31.0 35 | - jedi==0.19.2 36 | - jupyter-client==8.6.3 37 | - jupyter-core==5.7.2 38 | - matplotlib-inline==0.1.7 39 | - nest-asyncio==1.6.0 40 | - numpy==2.2.1 41 | - packaging==24.2 42 | - pandas==2.2.3 43 | - parso==0.8.4 44 | - pexpect==4.9.0 45 | - pillow==11.1.0 46 | - platformdirs==4.3.6 47 | - prompt-toolkit==3.0.48 48 | - psutil==6.1.1 49 | - ptyprocess==0.7.0 50 | - pure-eval==0.2.3 51 | - pygments==2.19.1 52 | - python-dateutil==2.9.0.post0 53 | - pytz==2024.2 54 | - pyzmq==26.2.0 55 | - rdkit==2024.9.3 56 | - six==1.17.0 57 | - stack-data==0.6.3 58 | - tornado==6.4.2 59 | - traitlets==5.14.3 60 | - tzdata==2024.2 61 | - wcwidth==0.2.13 62 | prefix: ~/anaconda3/envs/rush 63 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="RUSH", 5 | version="0.1", 6 | packages=find_packages(where="."), 7 | package_dir={"": "."}, 8 | include_package_data=True, 9 | ) --------------------------------------------------------------------------------