├── environment.yml ├── example_input_output_files ├── example_input_file.csv └── example_output_file.csv ├── license.txt ├── test_spacial_score.py ├── README.md └── spacial_score.py /environment.yml: -------------------------------------------------------------------------------- 1 | name: my_sps_env 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python>=3.8 7 | - rdkit=2021.09.3 8 | - numpy=1.21.5 9 | - pytest -------------------------------------------------------------------------------- /example_input_output_files/example_input_file.csv: -------------------------------------------------------------------------------- 1 | Smiles,ID 2 | C1COCC(=O)N1c1ccc(cc1)N1C[C@@H](OC1=O)CNC(=O)c1ccc(s1)Cl,1 3 | C[C@@H]1CC[C@@]23CCC(=O)[C@H]2[C@@]1([C@@H](C[C@@]([C@H]([C@@H]3C)O)(C)C=C)OC(=O)CO)C,2 4 | C[C@@H]1CCC23[C@H]1CC[C@]2([C@H]1C[C@]2(CC[C@H]([C@@H]2CC1=C3C(=O)O)C(C)C)C)C,3 5 | CC(C)CCCC(C)C1CCC2C1(CCC3C2CC=C4C3(CCC(C4)O)C)C,4 6 | O=C(OCC1=CC=CC=C1)C=P(C2=CC=CC=C2)(C3=CC=CC=C3)C4=CC=CC=C4,5 7 | BrC(C(OCC1=CC=CC=C1)=O)/C(C(OC)OC)=C/C(OCC2=CC=CC=C2)=O,6 8 | O[C@H]([C@H](C(OCC1=CC=CC=C1)=O)/C(C(OC)OC)=C/C(OCC2=CC=CC=C2)=O)/C=C(C(C)(C)C)\Br,7 9 | O[C@H]1C=C(C(C)(C)C)[C@](CC(OCC2=CC=CC=C2)=O)(C(OC)OC)[C@H]1C(OCC3=CC=CC=C3)=O,8 10 | O[C@H]1C[C@@](O)(C(C)(C)C)[C@](CC(OCC2=CC=CC=C2)=O)(C(OC)OC)[C@H]1C(OCC3=CC=CC=C3)=O,9 11 | O[C@H]1C[C@@](O[C@@H]2OC)(C(C)(C)C)[C@@]2(CC(OCC3=CC=CC=C3)=O)[C@H]1C(OCC4=CC=CC=C4)=O,10 12 | O=C1C[C@@](O[C@@H]2OC)(C(C)(C)C)[C@@]2(CC(OCC3=CC=CC=C3)=O)[C@H]1C(OCC4=CC=CC=C4)=O,11 13 | O=C1C[C@@](O[C@@H]2OC)(C(C)(C)C)[C@@]2(CC(OCC3=CC=CC=C3)=O)[C@]1(C#C)C(OCC4=CC=CC=C4)=O,12 14 | O[C@H]1C[C@@](O[C@@H]2OC)(C(C)(C)C)[C@@]2(CC(OCC3=CC=CC=C3)=O)[C@]1(C#C)C(OCC4=CC=CC=C4)=O,13 15 | O=C1C[C@]2(C(OCC3=CC=CC=C3)=O)[C@]4(CC(OCC5=CC=CC=C5)=O)[C@](O[C@@H]4OC)(C(C)(C)C)C[C@]2([H])O1,14 16 | O=C1C[C@]23C([C@]4([H])OC3=O)(CC(O4)=O)[C@](O)(C(C)(C)C)C[C@]2([H])O1,15 17 | O=C1C[C@]23C([C@]4([H])OC3=O)([C@](O)([H])C(O4)=O)[C@](O)(C(C)(C)C)C[C@]2([H])O1,16 18 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023, Waldmann Lab. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /example_input_output_files/example_output_file.csv: -------------------------------------------------------------------------------- 1 | Smiles,ID,nSPS 2 | C1COCC(=O)N1c1ccc(cc1)N1C[C@@H](OC1=O)CNC(=O)c1ccc(s1)Cl,1,19.413793103448278 3 | C[C@@H]1CC[C@@]23CCC(=O)[C@H]2[C@@]1([C@@H](C[C@@]([C@H]([C@@H]3C)O)(C)C=C)OC(=O)CO)C,2,49.7037037037037 4 | C[C@@H]1CCC23[C@H]1CC[C@]2([C@H]1C[C@]2(CC[C@H]([C@@H]2CC1=C3C(=O)O)C(C)C)C)C,3,53.48148148148148 5 | CC(C)CCCC(C)C1CCC2C1(CCC3C2CC=C4C3(CCC(C4)O)C)C,4,46.535714285714285 6 | O=C(OCC1=CC=CC=C1)C=P(C2=CC=CC=C2)(C3=CC=CC=C3)C4=CC=CC=C4,5,10.933333333333334 7 | BrC(C(OCC1=CC=CC=C1)=O)/C(C(OC)OC)=C/C(OCC2=CC=CC=C2)=O,6,12.482758620689655 8 | O[C@H]([C@H](C(OCC1=CC=CC=C1)=O)/C(C(OC)OC)=C/C(OCC2=CC=CC=C2)=O)/C=C(C(C)(C)C)\Br,7,14.27027027027027 9 | O[C@H]1C=C(C(C)(C)C)[C@](CC(OCC2=CC=CC=C2)=O)(C(OC)OC)[C@H]1C(OCC3=CC=CC=C3)=O,8,21.805555555555557 10 | O[C@H]1C[C@@](O)(C(C)(C)C)[C@](CC(OCC2=CC=CC=C2)=O)(C(OC)OC)[C@H]1C(OCC3=CC=CC=C3)=O,9,25.72972972972973 11 | O[C@H]1C[C@@](O[C@@H]2OC)(C(C)(C)C)[C@@]2(CC(OCC3=CC=CC=C3)=O)[C@H]1C(OCC4=CC=CC=C4)=O,10,29.685714285714287 12 | O=C1C[C@@](O[C@@H]2OC)(C(C)(C)C)[C@@]2(CC(OCC3=CC=CC=C3)=O)[C@H]1C(OCC4=CC=CC=C4)=O,11,27.6 13 | O=C1C[C@@](O[C@@H]2OC)(C(C)(C)C)[C@@]2(CC(OCC3=CC=CC=C3)=O)[C@]1(C#C)C(OCC4=CC=CC=C4)=O,12,28.513513513513512 14 | O[C@H]1C[C@@](O[C@@H]2OC)(C(C)(C)C)[C@@]2(CC(OCC3=CC=CC=C3)=O)[C@]1(C#C)C(OCC4=CC=CC=C4)=O,13,30.486486486486488 15 | O=C1C[C@]2(C(OCC3=CC=CC=C3)=O)[C@]4(CC(OCC5=CC=CC=C5)=O)[C@](O[C@@H]4OC)(C(C)(C)C)C[C@]2([H])O1,14,31.526315789473685 16 | O=C1C[C@]23C([C@]4([H])OC3=O)(CC(O4)=O)[C@](O)(C(C)(C)C)C[C@]2([H])O1,15,49.36363636363637 17 | O=C1C[C@]23C([C@]4([H])OC3=O)([C@](O)([H])C(O4)=O)[C@](O)(C(C)(C)C)C[C@]2([H])O1,16,51.0 18 | -------------------------------------------------------------------------------- /test_spacial_score.py: -------------------------------------------------------------------------------- 1 | # Script for testing of spacial_score.py 2 | 3 | import numpy as np 4 | import rdkit 5 | import spacial_score as sps 6 | 7 | 8 | valid_smiles = r"C/C=C\C1C=CCC(Br)C1C2=CC(C#C)=CC=C2" 9 | invalid_smiles = r"abcx--a" 10 | 11 | 12 | def test_smiles_to_mol(): 13 | """ Test conversion of SMILES to RDKit mol """ 14 | valid_mol = sps.smiles_to_mol(valid_smiles) 15 | invalid_mol = sps.smiles_to_mol(invalid_smiles) 16 | 17 | assert isinstance(valid_mol, rdkit.Chem.rdchem.Mol) 18 | assert invalid_mol is np.nan 19 | 20 | 21 | def create_sps_object(): 22 | """ Create an instance of sps object based on a valid SMILES representation of a molecule """ 23 | mol = sps.smiles_to_mol(valid_smiles) 24 | sps_object = sps.SpacialScore(valid_smiles, mol) 25 | return sps_object 26 | 27 | 28 | def calc_sum_score(score_type): 29 | """ Calculate a summed score for values in a dictionary """ 30 | sum_score = 0 31 | for atom_score in score_type.values(): 32 | sum_score += atom_score 33 | return sum_score 34 | 35 | 36 | def test_hybridisation_score(): 37 | """ Test if the summed hybridisation score for a molecule is calculated properly """ 38 | hyb_score = calc_sum_score(create_sps_object().hyb_score) 39 | assert hyb_score == 40 40 | 41 | 42 | def test_stereo_score(): 43 | """ Test if the summed stereo score for a molecule is calculated properly """ 44 | 45 | stereo_score = calc_sum_score(create_sps_object().stereo_score) 46 | assert stereo_score == 23 47 | 48 | 49 | def test_ring_score(): 50 | """ Test if the summed ring score for a molecule is calculated properly """ 51 | ring_score = calc_sum_score(create_sps_object().ring_score) 52 | assert ring_score == 24 53 | 54 | 55 | def test_bond_score(): 56 | """ Test if the summed bond score for a molecule is calculated properly """ 57 | bond_score = calc_sum_score(create_sps_object().bond_score) 58 | assert bond_score == 88 59 | 60 | 61 | def test_SPS_from_smiles(): 62 | """ Test if the un-normilised SPS is calculated properly for a test molecule """ 63 | sps_score = sps.calculate_score_from_smiles(valid_smiles) 64 | assert sps_score == 491 65 | 66 | 67 | def test_nSPS_from_smiles(): 68 | """ Test if the normilised SPS (nSPS) is calculated properly for a test molecule """ 69 | sps_score = sps.calculate_score_from_smiles(valid_smiles, per_atom=True).__round__(2) 70 | assert sps_score == 27.28 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Spacial-Score 2 | A Comprehensive Topological Indicator for Small Molecule Complexity 3 |
(Created by the Waldmann Lab at the Max Planck Institute of Molecular Physiology, Dortmund) 4 | 5 | The score is intended for assessing molecular topology of organic molecules and to improve upon the idea of the fraction of stereo and sp3 carbons. The score is described in our J. Med. Chem. paper: [Spacial Score – A Comprehensive Topological Indicator for Small Molecule Complexity](https://doi.org/10.1021/acs.jmedchem.3c00689). 6 | 7 | > [!IMPORTANT] 8 | > **The SPS indicator is now also a part of the RDKit package and available through the [rdkit.Chem.SpacialScore](https://www.rdkit.org/docs/source/rdkit.Chem.SpacialScore.html#module-rdkit.Chem.SpacialScore) module. SPS script is actively mantained as part of the RDKit package, and thus, it is recommended to calculate the SPS through RDKit.** 9 | 10 | logo 11 | 12 | *** 13 | ### Required Python Packages 14 | The script requires [RDKit package](https://www.rdkit.org/) and [NumPy](https://numpy.org/). 15 | To install the required packages through [Conda](https://docs.conda.io/en/latest/miniconda.html), simply use the `environment.yml` file: 16 | ``` 17 | conda env create -f environment.yml 18 | ``` 19 | The script can be used after activation of the just created conda environment: 20 | ``` 21 | conda activate my_sps_env 22 | ``` 23 | To display the options of `spacial_score.py`, type: 24 | ``` 25 | python spacial_score.py -h 26 | ``` 27 | (Please remember that `spacial_score.py` needs to be in your current directory) 28 | *** 29 | ### Using the Script Through Command Line 30 | The script can be used directly from a command line, reading either a directly provided SMILES string `(-s)` or a .csv/.tsv file `(-i)`: 31 | ``` 32 | usage: spacial_score.py [-h] [-s SMILES string] [-i filename.ext] [-o filename.ext] [-t] [-v] [-p] 33 | 34 | Script for calculating Spacial Score (SPS) or normalised SPS (nSPS) for small molecules. 35 | The script can calculate the scores for a direct SMILES input or for a .csv or .tsv file containing a list of SMILES. 36 | nSPS is calculated by deafult. 37 | 38 | optional arguments: 39 | -h, --help show this help message and exit 40 | -s SMILES string Your input SMILES string for which to calculate the score 41 | -i filename.ext Your .csv or .tsv file containing column called "Smiles" which contains SMILES strings. Resutls will be 42 | saved in a new .csv file 43 | -o filename.csv You can specify name of the output .csv file. Not required. 44 | -t Option to calculate total SPS (no normalisation). 45 | -v Option to print verbose results, with information for each atom index. 46 | -p Option to print confirmation after processing of each SMILES string in a file. 47 | ``` 48 | 49 | To calculate nSPS directly from a SMILES string you can just type: 50 | ``` 51 | python spacial_score.py -s CC(C)CBr 52 | ``` 53 | Where CC(C)CBr is just an example of a SMILES string (on Linux you may need to use quotation marks "CC(C)CBr"). 54 | This returns: 55 | ``` 56 | Normalisation Applied: True 57 | SMILES: CC(C)CBr 58 | Calculated nSPS: 9.6 59 | ``` 60 | 61 | To calculate the un-normalised (total) SPS you need to add option `-t`: 62 | ``` 63 | python spacial_score.py -s CC(C)CBr -t 64 | ``` 65 | This returns: 66 | ``` 67 | Normalisation Applied: False 68 | SMILES: CC(C)CBr 69 | Calculated SPS: 48 70 | ``` 71 | 72 | A more verbose output can be achieved with the option `-v`: 73 | ``` 74 | python spacial_score.py -s CC(C)CBr -v 75 | ``` 76 | The output: 77 | ``` 78 | SMILES: CC(C)CBr 79 | Atom Idx Element Hybrid Stereo Ring Neighbs 80 | ------------------------------------------------------------ 81 | 0 C 3 1 1 1 82 | 1 C 3 1 1 9 83 | 2 C 3 1 1 1 84 | 3 C 3 1 1 4 85 | 4 Br 3 1 1 1 86 | ------------------------------------------------------------ 87 | Total Spacial Score: 48 88 | Per-Atom Score: 9.6 89 | ``` 90 | 91 | To read in a .csv or .tsv file, please type: 92 | ``` 93 | python spacial_score.py -i your_input_file_name.csv -o your_output_file_name.csv 94 | ``` 95 | nSPS is calculated by default, and option `-t` can be used to calculate un-normalised SPS. 96 | Please, remember that your input file needs to contain a column named `Smiles` containing SMILES which will be used for the calculation of the scores. 97 | Examples of input and output files can be found in the folder named `example_input_output_files`. 98 | 99 | *** 100 | ### Calculate the Score with a Python Function 101 | The scores can also be calculated by using function: 102 | ``` 103 | def calculate_score_from_smiles(smiles: str, per_atom=False, verbose=False) -> float: 104 | """ Calculates the spacial score as a total SPS or size-normalised, per-atom nSPS for a molecule. 105 | 106 | Parameters: 107 | =========== 108 | smiles: valid SMILES string 109 | per_atom: flag to denote if the normalised per-atom result (nSPS) should be returned 110 | verbose: flag to denote if the detailed scores for each atom should be printed 111 | 112 | Returns: 113 | ======== 114 | Total or per-atom numeric spacial score for the provided molecule. 115 | """ 116 | ``` 117 | *** 118 | ### Testing the Script 119 | The script in `spacial_score.py` can be tested by running [pytest](https://docs.pytest.org/en/7.2.x/contents.html). In the active `my_sps_env` conda environment type: 120 | ``` 121 | pytest 122 | ``` 123 | Correct output will look like this: 124 | ``` 125 | collected 7 items 126 | 127 | test_spacial_score.py ....... [100%] 128 | 129 | ================================================== 7 passed in 0.48s ================================================== 130 | 131 | ``` 132 | -------------------------------------------------------------------------------- /spacial_score.py: -------------------------------------------------------------------------------- 1 | # Version 1.0 2 | 3 | from rdkit import Chem 4 | import rdkit.Chem.Descriptors as Desc 5 | import numpy as np 6 | import argparse 7 | import sys 8 | import csv 9 | 10 | 11 | class SpacialScore: 12 | """Class intended for calculating spacial score (SPS) and size-normalised SPS (nSPS) for small organic molecules""" 13 | def __init__(self, smiles, mol, verbose=False): 14 | self.smiles = smiles 15 | self.mol = mol 16 | self.verbose = verbose 17 | 18 | self.hyb_score = {} 19 | self.stereo_score = {} 20 | self.ring_score = {} 21 | self.bond_score = {} 22 | self.chiral_idxs = self.find_stereo_atom_idxs() 23 | self.doublebonds_stereo = self.find_doublebonds_stereo() 24 | self.score = self.calculate_spacial_score() 25 | self.per_atom_score = self.score/Desc.HeavyAtomCount(self.mol) 26 | 27 | if self.verbose: 28 | self.display_scores() 29 | 30 | 31 | def display_scores(self): 32 | """Displays the individual scores for each molecule atom""" 33 | 34 | print("SMILES:", self.smiles) 35 | print("Atom Idx".ljust(10, " "), end="") 36 | print("Element".ljust(10, " "), end="") 37 | print("Hybrid".ljust(10, " "), end="") 38 | print("Stereo".ljust(10, " "), end="") 39 | print("Ring".ljust(10, " "), end="") 40 | print("Neighbs".ljust(10, " ")) 41 | print("".ljust(60, "-")) 42 | 43 | for atom in self.mol.GetAtoms(): 44 | atom_idx = atom.GetIdx() 45 | print(str(atom_idx).ljust(10, " "), end="") 46 | print(str(Chem.rdchem.Atom.GetSymbol(atom)).ljust(10, " "), end="") 47 | print(str(self.hyb_score[atom_idx]).ljust(10, " "), end="") 48 | print(str(self.stereo_score[atom_idx]).ljust(10, " "), end="") 49 | print(str(self.ring_score[atom_idx]).ljust(10, " "), end="") 50 | print(str(self.bond_score[atom_idx]).ljust(10, " ")) 51 | 52 | print("".ljust(60, "-")) 53 | print("Total Spacial Score:", self.score) 54 | print("Per-Atom Score:", self.per_atom_score.__round__(2), "\n") 55 | 56 | 57 | def find_stereo_atom_idxs(self, includeUnassigned=True): 58 | """Finds indeces of atoms that are (pseudo)stereo/chiralcentres, in repsect to the attached groups (does not account for double bond isomers)""" 59 | stereo_centers = Chem.FindMolChiralCenters(self.mol, includeUnassigned=includeUnassigned, includeCIP=False, useLegacyImplementation=False) 60 | stereo_idxs = [atom_idx for atom_idx, _ in stereo_centers] 61 | return stereo_idxs 62 | 63 | 64 | def find_doublebonds_stereo(self): 65 | """Finds indeces of stereo double bond atoms (E/Z)""" 66 | db_stereo = {} 67 | for bond in self.mol.GetBonds(): 68 | if str(bond.GetBondType()) == "DOUBLE": 69 | db_stereo[(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())] = bond.GetStereo() 70 | return db_stereo 71 | 72 | 73 | def calculate_spacial_score(self): 74 | """Calculates the total spacial score for a molecule""" 75 | score = 0 76 | for atom in self.mol.GetAtoms(): 77 | atom_idx = atom.GetIdx() 78 | self.hyb_score[atom_idx] = self._account_for_hybridisation(atom) 79 | self.stereo_score[atom_idx] = self._account_for_stereo(atom_idx) 80 | self.ring_score[atom_idx] = self._account_for_ring(atom) 81 | self.bond_score[atom_idx] = self._account_for_neighbours(atom) 82 | score += self._calculate_score_for_atom(atom_idx) 83 | return score 84 | 85 | 86 | def _calculate_score_for_atom(self, atom_idx): 87 | """Calculates the total score for a single atom in a molecule""" 88 | atom_score = self.hyb_score[atom_idx] * self.stereo_score[atom_idx] * self.ring_score[atom_idx] * self.bond_score[atom_idx] 89 | return atom_score 90 | 91 | 92 | def _account_for_hybridisation(self, atom): 93 | """Calculates the hybridisation score for a single atom in a molecule""" 94 | hybridisations = {"SP": 1, "SP2": 2, "SP3": 3} 95 | hyb_type = str(atom.GetHybridization()) 96 | 97 | if hyb_type in hybridisations.keys(): 98 | return hybridisations[hyb_type] 99 | return 4 # h score for any other hybridisation than sp, sp2 or sp3 100 | 101 | 102 | def _account_for_stereo(self, atom_idx): 103 | """Calculates the stereo score for a single atom in a molecule""" 104 | if atom_idx in self.chiral_idxs: 105 | return 2 106 | for bond_atom_idxs, stereo in self.doublebonds_stereo.items(): 107 | if atom_idx in bond_atom_idxs and not(str(stereo).endswith("NONE")): 108 | return 2 109 | return 1 110 | 111 | 112 | def _account_for_ring(self, atom): 113 | """Calculates the ring score for a single atom in a molecule""" 114 | if atom.GetIsAromatic(): # aromatic rings are not promoted 115 | return 1 116 | if atom.IsInRing(): 117 | return 2 118 | return 1 119 | 120 | 121 | def _account_for_neighbours(self, atom): 122 | """Calculates the neighbour score for a single atom in a molecule 123 | The second power allows to account for branching in the molecular structure""" 124 | return (len(atom.GetNeighbors()))**2 125 | 126 | 127 | def smiles_to_mol(smiles: str): 128 | """ Generate a RDKit Molecule from SMILES. 129 | 130 | Parameters: 131 | =========== 132 | smiles: the input string 133 | 134 | Returns: 135 | ======== 136 | The RDKit Molecule. If the Smiles parsing failed, NAN is returned instead. 137 | """ 138 | try: 139 | mol = Chem.MolFromSmiles(smiles) 140 | if mol is not None: 141 | return mol 142 | return np.nan 143 | except: 144 | return np.nan 145 | 146 | 147 | def close_files(open_files:tuple): 148 | """Closes open files""" 149 | for file in open_files: 150 | file.close() 151 | 152 | 153 | def process_input(smiles:str, filename:str, output_name:str, total_score:bool, verbose:bool, confirmation:False): 154 | """Processes the command line input to print out the resulting score or create a file with added results""" 155 | 156 | if smiles: # process a directly provided SMILES string 157 | result = calculate_score_from_smiles(smiles, per_atom=(not total_score), verbose=verbose) 158 | if not verbose: 159 | score_type = "SPS" if total_score else "nSPS" 160 | print(f"\nNormalisation Applied: {not total_score}\nSMILES: {smiles}\nCalculated {score_type}: {result}") 161 | if result is np.nan: 162 | print("\nPlease double-check your input SMILES string...") 163 | 164 | elif filename: # process provided file 165 | provided_filename_base = filename.split(".")[0] 166 | output_filename = output_name if output_name else provided_filename_base + "_SPS.csv" 167 | outfile = open(output_filename, "w") 168 | 169 | # read the input .csv or .tsv file 170 | if filename.endswith("csv"): 171 | infile = open(filename, "r") 172 | reader = csv.DictReader(infile, dialect="excel") 173 | elif filename.endswith("tsv"): 174 | infile = open(filename, "r") 175 | reader = csv.DictReader(infile, dialect="excel-tab") 176 | else: 177 | raise ValueError(f"Unknown input file format: {filename}") 178 | 179 | print("\nProcessing, please wait...") 180 | for idx, row in enumerate(reader): 181 | if idx == 0: 182 | header = [column_name for column_name in row] # read existing headers 183 | # add SPS or nSPS column to the file 184 | if total_score: 185 | header.append("SPS") 186 | header.append("nSPS") 187 | outfile.write(",".join(header) + "\n") 188 | 189 | try: 190 | if total_score: 191 | row["SPS"] = calculate_score_from_smiles(row["Smiles"], per_atom=False, verbose=verbose) 192 | row["nSPS"] = calculate_score_from_smiles(row["Smiles"], per_atom=True, verbose=verbose) 193 | except KeyError: 194 | close_files((outfile, infile)) 195 | raise KeyError("Please make sure that your file contains column called 'Smiles' with SMILES strings") 196 | 197 | line = [str(row[x]) for x in row] # reconstruct the row 198 | outfile.write(",".join(line) + "\n") 199 | if confirmation: 200 | print("Finished calculations for:", row["Smiles"]) 201 | 202 | close_files((outfile, infile)) 203 | print(f"Finished. {output_filename} was saved.") 204 | else: 205 | raise ValueError(f"No input was provided") 206 | 207 | 208 | def calculate_score_from_smiles(smiles: str, per_atom=False, verbose=False) -> float: 209 | """ Calculates the spacial score as a total SPS or size-normalised, per-atom nSPS for a molecule. 210 | 211 | Parameters: 212 | =========== 213 | smiles: valid SMILES string 214 | per_atom: flag to denote if the normalised per-atom result (nSPS) should be returned 215 | verbose: flag to denote if the detailed scores for each atom should be printed 216 | 217 | Returns: 218 | ======== 219 | Total or per-atom numeric spacial score for the provided molecule. 220 | """ 221 | mol = smiles_to_mol(smiles) 222 | if mol is np.nan: 223 | return np.nan 224 | sps = SpacialScore(smiles, mol, verbose) 225 | if per_atom: 226 | return sps.per_atom_score 227 | return sps.score 228 | 229 | 230 | if __name__ == "__main__": 231 | 232 | parser = argparse.ArgumentParser(description= 233 | 'Script for calculating Spacial Score (SPS) or normalised SPS (nSPS) for small molecules.\ 234 | \nThe script can calculate the scores for a direct SMILES input or for a .csv or .tsv file containing a list of SMILES.\ 235 | \nnSPS is calculated by deafult.', 236 | usage=None, formatter_class=argparse.RawDescriptionHelpFormatter) 237 | parser.add_argument('-s', action="store", 238 | metavar="SMILES string", 239 | help='Your input SMILES string for which to calculate the score', default=None) 240 | parser.add_argument('-i', action="store", 241 | help='Your .csv or .tsv file containing column called "Smiles" which contains SMILES strings. Resutls will be saved in a new .csv file', 242 | metavar='filename.ext', 243 | default=None) 244 | parser.add_argument('-o', action="store", 245 | help='You can specify name of the output .csv file. Not required.', 246 | metavar='filename.csv', 247 | default=None) 248 | parser.add_argument('-t', action="store_true", 249 | help='Option to calculate total SPS (no normalisation).', 250 | default=False) 251 | parser.add_argument('-v', action="store_true", 252 | help='Option to print verbose results, with information for each atom index.', 253 | default=False) 254 | parser.add_argument('-p', action="store_true", 255 | help='Option to print confirmation after processing of each SMILES string in a file.', 256 | default=False) 257 | 258 | if len(sys.argv) < 2: 259 | parser.print_help() 260 | sys.exit(1) 261 | 262 | ARGS = parser.parse_args() 263 | process_input(smiles=ARGS.s, filename=ARGS.i, output_name=ARGS.o, total_score=ARGS.t, verbose=ARGS.v, confirmation=ARGS.p) 264 | --------------------------------------------------------------------------------