├── environment.yml
├── example_input_output_files
    ├── example_input_file.csv
    └── example_output_file.csv
├── license.txt
├── test_spacial_score.py
├── README.md
└── spacial_score.py


/environment.yml:
--------------------------------------------------------------------------------
1 | name: my_sps_env
2 | channels:
3 |   - conda-forge
4 |   - defaults
5 | dependencies:
6 |   - python>=3.8
7 |   - rdkit=2021.09.3
8 |   - numpy=1.21.5
9 |   - pytest


--------------------------------------------------------------------------------
/example_input_output_files/example_input_file.csv:
--------------------------------------------------------------------------------
 1 | Smiles,ID
 2 | C1COCC(=O)N1c1ccc(cc1)N1C[C@@H](OC1=O)CNC(=O)c1ccc(s1)Cl,1
 3 | C[C@@H]1CC[C@@]23CCC(=O)[C@H]2[C@@]1([C@@H](C[C@@]([C@H]([C@@H]3C)O)(C)C=C)OC(=O)CO)C,2
 4 | C[C@@H]1CCC23[C@H]1CC[C@]2([C@H]1C[C@]2(CC[C@H]([C@@H]2CC1=C3C(=O)O)C(C)C)C)C,3
 5 | CC(C)CCCC(C)C1CCC2C1(CCC3C2CC=C4C3(CCC(C4)O)C)C,4
 6 | O=C(OCC1=CC=CC=C1)C=P(C2=CC=CC=C2)(C3=CC=CC=C3)C4=CC=CC=C4,5
 7 | BrC(C(OCC1=CC=CC=C1)=O)/C(C(OC)OC)=C/C(OCC2=CC=CC=C2)=O,6
 8 | O[C@H]([C@H](C(OCC1=CC=CC=C1)=O)/C(C(OC)OC)=C/C(OCC2=CC=CC=C2)=O)/C=C(C(C)(C)C)\Br,7
 9 | O[C@H]1C=C(C(C)(C)C)[C@](CC(OCC2=CC=CC=C2)=O)(C(OC)OC)[C@H]1C(OCC3=CC=CC=C3)=O,8
10 | O[C@H]1C[C@@](O)(C(C)(C)C)[C@](CC(OCC2=CC=CC=C2)=O)(C(OC)OC)[C@H]1C(OCC3=CC=CC=C3)=O,9
11 | O[C@H]1C[C@@](O[C@@H]2OC)(C(C)(C)C)[C@@]2(CC(OCC3=CC=CC=C3)=O)[C@H]1C(OCC4=CC=CC=C4)=O,10
12 | O=C1C[C@@](O[C@@H]2OC)(C(C)(C)C)[C@@]2(CC(OCC3=CC=CC=C3)=O)[C@H]1C(OCC4=CC=CC=C4)=O,11
13 | O=C1C[C@@](O[C@@H]2OC)(C(C)(C)C)[C@@]2(CC(OCC3=CC=CC=C3)=O)[C@]1(C#C)C(OCC4=CC=CC=C4)=O,12
14 | O[C@H]1C[C@@](O[C@@H]2OC)(C(C)(C)C)[C@@]2(CC(OCC3=CC=CC=C3)=O)[C@]1(C#C)C(OCC4=CC=CC=C4)=O,13
15 | O=C1C[C@]2(C(OCC3=CC=CC=C3)=O)[C@]4(CC(OCC5=CC=CC=C5)=O)[C@](O[C@@H]4OC)(C(C)(C)C)C[C@]2([H])O1,14
16 | O=C1C[C@]23C([C@]4([H])OC3=O)(CC(O4)=O)[C@](O)(C(C)(C)C)C[C@]2([H])O1,15
17 | O=C1C[C@]23C([C@]4([H])OC3=O)([C@](O)([H])C(O4)=O)[C@](O)(C(C)(C)C)C[C@]2([H])O1,16
18 | 


--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2023, Waldmann Lab.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/example_input_output_files/example_output_file.csv:
--------------------------------------------------------------------------------
 1 | Smiles,ID,nSPS
 2 | C1COCC(=O)N1c1ccc(cc1)N1C[C@@H](OC1=O)CNC(=O)c1ccc(s1)Cl,1,19.413793103448278
 3 | C[C@@H]1CC[C@@]23CCC(=O)[C@H]2[C@@]1([C@@H](C[C@@]([C@H]([C@@H]3C)O)(C)C=C)OC(=O)CO)C,2,49.7037037037037
 4 | C[C@@H]1CCC23[C@H]1CC[C@]2([C@H]1C[C@]2(CC[C@H]([C@@H]2CC1=C3C(=O)O)C(C)C)C)C,3,53.48148148148148
 5 | CC(C)CCCC(C)C1CCC2C1(CCC3C2CC=C4C3(CCC(C4)O)C)C,4,46.535714285714285
 6 | O=C(OCC1=CC=CC=C1)C=P(C2=CC=CC=C2)(C3=CC=CC=C3)C4=CC=CC=C4,5,10.933333333333334
 7 | BrC(C(OCC1=CC=CC=C1)=O)/C(C(OC)OC)=C/C(OCC2=CC=CC=C2)=O,6,12.482758620689655
 8 | O[C@H]([C@H](C(OCC1=CC=CC=C1)=O)/C(C(OC)OC)=C/C(OCC2=CC=CC=C2)=O)/C=C(C(C)(C)C)\Br,7,14.27027027027027
 9 | O[C@H]1C=C(C(C)(C)C)[C@](CC(OCC2=CC=CC=C2)=O)(C(OC)OC)[C@H]1C(OCC3=CC=CC=C3)=O,8,21.805555555555557
10 | O[C@H]1C[C@@](O)(C(C)(C)C)[C@](CC(OCC2=CC=CC=C2)=O)(C(OC)OC)[C@H]1C(OCC3=CC=CC=C3)=O,9,25.72972972972973
11 | O[C@H]1C[C@@](O[C@@H]2OC)(C(C)(C)C)[C@@]2(CC(OCC3=CC=CC=C3)=O)[C@H]1C(OCC4=CC=CC=C4)=O,10,29.685714285714287
12 | O=C1C[C@@](O[C@@H]2OC)(C(C)(C)C)[C@@]2(CC(OCC3=CC=CC=C3)=O)[C@H]1C(OCC4=CC=CC=C4)=O,11,27.6
13 | O=C1C[C@@](O[C@@H]2OC)(C(C)(C)C)[C@@]2(CC(OCC3=CC=CC=C3)=O)[C@]1(C#C)C(OCC4=CC=CC=C4)=O,12,28.513513513513512
14 | O[C@H]1C[C@@](O[C@@H]2OC)(C(C)(C)C)[C@@]2(CC(OCC3=CC=CC=C3)=O)[C@]1(C#C)C(OCC4=CC=CC=C4)=O,13,30.486486486486488
15 | O=C1C[C@]2(C(OCC3=CC=CC=C3)=O)[C@]4(CC(OCC5=CC=CC=C5)=O)[C@](O[C@@H]4OC)(C(C)(C)C)C[C@]2([H])O1,14,31.526315789473685
16 | O=C1C[C@]23C([C@]4([H])OC3=O)(CC(O4)=O)[C@](O)(C(C)(C)C)C[C@]2([H])O1,15,49.36363636363637
17 | O=C1C[C@]23C([C@]4([H])OC3=O)([C@](O)([H])C(O4)=O)[C@](O)(C(C)(C)C)C[C@]2([H])O1,16,51.0
18 | 


--------------------------------------------------------------------------------
/test_spacial_score.py:
--------------------------------------------------------------------------------
 1 | # Script for testing of spacial_score.py
 2 | 
 3 | import numpy as np
 4 | import rdkit
 5 | import spacial_score as sps
 6 | 
 7 | 
 8 | valid_smiles = r"C/C=C\C1C=CCC(Br)C1C2=CC(C#C)=CC=C2"
 9 | invalid_smiles = r"abcx--a"
10 | 
11 | 
12 | def test_smiles_to_mol():
13 |     """ Test conversion of SMILES to RDKit mol """
14 |     valid_mol = sps.smiles_to_mol(valid_smiles)
15 |     invalid_mol =  sps.smiles_to_mol(invalid_smiles)
16 | 
17 |     assert isinstance(valid_mol, rdkit.Chem.rdchem.Mol)
18 |     assert invalid_mol is np.nan
19 | 
20 | 
21 | def create_sps_object():
22 |     """ Create an instance of sps object based on a valid SMILES representation of a molecule """
23 |     mol = sps.smiles_to_mol(valid_smiles)
24 |     sps_object = sps.SpacialScore(valid_smiles, mol)
25 |     return sps_object
26 | 
27 | 
28 | def calc_sum_score(score_type):
29 |     """ Calculate a summed score for values in a dictionary """
30 |     sum_score = 0
31 |     for atom_score in score_type.values():
32 |         sum_score += atom_score
33 |     return sum_score
34 | 
35 | 
36 | def test_hybridisation_score():
37 |     """ Test if the summed hybridisation score for a molecule is calculated properly """
38 |     hyb_score = calc_sum_score(create_sps_object().hyb_score)
39 |     assert hyb_score == 40
40 | 
41 | 
42 | def test_stereo_score():
43 |     """ Test if the summed stereo score for a molecule is calculated properly """
44 | 
45 |     stereo_score = calc_sum_score(create_sps_object().stereo_score)
46 |     assert stereo_score == 23
47 | 
48 | 
49 | def test_ring_score():
50 |     """ Test if the summed ring score for a molecule is calculated properly """
51 |     ring_score = calc_sum_score(create_sps_object().ring_score)
52 |     assert ring_score == 24
53 | 
54 | 
55 | def test_bond_score():
56 |     """ Test if the summed bond score for a molecule is calculated properly """
57 |     bond_score = calc_sum_score(create_sps_object().bond_score)
58 |     assert bond_score == 88
59 | 
60 | 
61 | def test_SPS_from_smiles():
62 |     """ Test if the un-normilised SPS is calculated properly for a test molecule """
63 |     sps_score = sps.calculate_score_from_smiles(valid_smiles)
64 |     assert sps_score == 491
65 | 
66 | 
67 | def test_nSPS_from_smiles():
68 |     """ Test if the normilised SPS (nSPS) is calculated properly for a test molecule """
69 |     sps_score = sps.calculate_score_from_smiles(valid_smiles, per_atom=True).__round__(2)
70 |     assert sps_score == 27.28
71 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Spacial-Score
  2 | <b>A Comprehensive Topological Indicator for Small Molecule Complexity</b>
  3 | <br>(Created by the Waldmann Lab at the Max Planck Institute of Molecular Physiology, Dortmund)
  4 | 
  5 | The score is intended for assessing molecular topology of organic molecules and to improve upon the idea of the fraction of stereo and sp<sup>3</sup> carbons. The score is described in our J. Med. Chem. paper: [Spacial Score – A Comprehensive Topological Indicator for Small Molecule Complexity](https://doi.org/10.1021/acs.jmedchem.3c00689).
  6 | 
  7 | > [!IMPORTANT]
  8 | > **The SPS indicator is now also a part of the RDKit package and available through the [rdkit.Chem.SpacialScore](https://www.rdkit.org/docs/source/rdkit.Chem.SpacialScore.html#module-rdkit.Chem.SpacialScore) module. SPS script is actively mantained as part of the RDKit package, and thus, it is recommended to calculate the SPS through RDKit.**
  9 | 
 10 | <img src="https://repository-images.githubusercontent.com/609714000/d6a66509-cce9-4318-8023-17c674cd8cf7" alt="logo" width="500" align="centre"/>
 11 | 
 12 | ***
 13 | ### Required Python Packages
 14 | The script requires [RDKit package](https://www.rdkit.org/) and [NumPy](https://numpy.org/).
 15 | To install the required packages through [Conda](https://docs.conda.io/en/latest/miniconda.html), simply use the `environment.yml` file:
 16 | ```
 17 | conda env create -f environment.yml
 18 | ```
 19 | The script can be used after activation of the just created conda environment:
 20 | ```
 21 | conda activate my_sps_env
 22 | ```
 23 | To display the options of `spacial_score.py`, type:
 24 | ```
 25 | python spacial_score.py -h
 26 | ```
 27 | (Please remember that `spacial_score.py` needs to be in your current directory)
 28 | ***
 29 | ### Using the Script Through Command Line
 30 | The script can be used directly from a command line, reading either a directly provided SMILES string `(-s)` or a .csv/.tsv file `(-i)`:
 31 | ```
 32 | usage: spacial_score.py [-h] [-s SMILES string] [-i filename.ext] [-o filename.ext] [-t] [-v] [-p]
 33 | 
 34 | Script for calculating Spacial Score (SPS) or normalised SPS (nSPS) for small molecules.
 35 | The script can calculate the scores for a direct SMILES input or for a .csv or .tsv file containing a list of SMILES.
 36 | nSPS is calculated by deafult.
 37 | 
 38 | optional arguments:
 39 |   -h, --help        show this help message and exit
 40 |   -s SMILES string  Your input SMILES string for which to calculate the score
 41 |   -i filename.ext   Your .csv or .tsv file containing column called "Smiles" which contains SMILES strings. Resutls will be
 42 |                     saved in a new .csv file
 43 |   -o filename.csv   You can specify name of the output .csv file. Not required.
 44 |   -t                Option to calculate total SPS (no normalisation).
 45 |   -v                Option to print verbose results, with information for each atom index.
 46 |   -p                Option to print confirmation after processing of each SMILES string in a file.
 47 | ```
 48 | 
 49 | To calculate nSPS directly from a SMILES string you can just type:
 50 | ```
 51 | python spacial_score.py -s CC(C)CBr
 52 | ```
 53 | Where CC(C)CBr is just an example of a SMILES string (on Linux you may need to use quotation marks "CC(C)CBr").
 54 | This returns:
 55 | ```
 56 | Normalisation Applied: True
 57 | SMILES: CC(C)CBr
 58 | Calculated nSPS: 9.6
 59 | ```
 60 | 
 61 | To calculate the un-normalised (total) SPS you need to add option `-t`:
 62 | ```
 63 | python spacial_score.py -s CC(C)CBr -t
 64 | ```
 65 | This returns:
 66 | ```
 67 | Normalisation Applied: False
 68 | SMILES: CC(C)CBr
 69 | Calculated SPS: 48
 70 | ```
 71 | 
 72 | A more verbose output can be achieved with the option `-v`: 
 73 | ```
 74 | python spacial_score.py -s CC(C)CBr -v
 75 | ```
 76 | The output:
 77 | ```
 78 | SMILES: CC(C)CBr
 79 | Atom Idx  Element   Hybrid    Stereo    Ring      Neighbs
 80 | ------------------------------------------------------------
 81 | 0         C         3         1         1         1
 82 | 1         C         3         1         1         9
 83 | 2         C         3         1         1         1
 84 | 3         C         3         1         1         4
 85 | 4         Br        3         1         1         1
 86 | ------------------------------------------------------------
 87 | Total Spacial Score: 48
 88 | Per-Atom Score: 9.6
 89 | ```
 90 | 
 91 | To read in a .csv or .tsv file, please type:
 92 | ```
 93 | python spacial_score.py -i your_input_file_name.csv -o your_output_file_name.csv
 94 | ```
 95 | nSPS is calculated by default, and option `-t` can be used to calculate un-normalised SPS. 
 96 | Please, remember that your input file needs to contain a column named `Smiles` containing SMILES which will be used for the calculation of the scores.
 97 | Examples of input and output files can be found in the folder named `example_input_output_files`.
 98 | 
 99 | ***
100 | ### Calculate the Score with a Python Function
101 | The scores can also be calculated by using function:
102 | ```
103 | def calculate_score_from_smiles(smiles: str, per_atom=False, verbose=False) -> float:
104 |     """ Calculates the spacial score as a total SPS or size-normalised, per-atom nSPS for a molecule.
105 | 
106 |     Parameters:
107 |     ===========
108 |     smiles: valid SMILES string
109 |     per_atom: flag to denote if the normalised per-atom result (nSPS) should be returned
110 |     verbose: flag to denote if the detailed scores for each atom should be printed
111 | 
112 |     Returns:
113 |     ========
114 |     Total or per-atom numeric spacial score for the provided molecule.
115 |     """
116 | ```
117 | ***
118 | ### Testing the Script
119 | The script in `spacial_score.py` can be tested by running [pytest](https://docs.pytest.org/en/7.2.x/contents.html). In the active `my_sps_env` conda environment type:
120 | ```
121 | pytest
122 | ```
123 | Correct output will look like this:
124 | ```
125 | collected 7 items
126 | 
127 | test_spacial_score.py .......                                                                                    [100%]
128 | 
129 | ================================================== 7 passed in 0.48s ==================================================
130 | 
131 | ```
132 | 


--------------------------------------------------------------------------------
/spacial_score.py:
--------------------------------------------------------------------------------
  1 | # Version 1.0
  2 | 
  3 | from rdkit import Chem
  4 | import rdkit.Chem.Descriptors as Desc
  5 | import numpy as np
  6 | import argparse
  7 | import sys
  8 | import csv
  9 | 
 10 | 
 11 | class SpacialScore:
 12 |     """Class intended for calculating spacial score (SPS) and size-normalised SPS (nSPS) for small organic molecules"""
 13 |     def __init__(self, smiles, mol, verbose=False):
 14 |         self.smiles = smiles
 15 |         self.mol = mol
 16 |         self.verbose = verbose
 17 | 
 18 |         self.hyb_score = {}
 19 |         self.stereo_score = {}
 20 |         self.ring_score = {}
 21 |         self.bond_score = {}
 22 |         self.chiral_idxs = self.find_stereo_atom_idxs()
 23 |         self.doublebonds_stereo = self.find_doublebonds_stereo()
 24 |         self.score = self.calculate_spacial_score()
 25 |         self.per_atom_score = self.score/Desc.HeavyAtomCount(self.mol)
 26 | 
 27 |         if self.verbose:
 28 |             self.display_scores()
 29 | 
 30 | 
 31 |     def display_scores(self):
 32 |         """Displays the individual scores for each molecule atom"""
 33 | 
 34 |         print("SMILES:", self.smiles)
 35 |         print("Atom Idx".ljust(10, " "), end="")
 36 |         print("Element".ljust(10, " "), end="")
 37 |         print("Hybrid".ljust(10, " "), end="")
 38 |         print("Stereo".ljust(10, " "), end="")
 39 |         print("Ring".ljust(10, " "), end="")
 40 |         print("Neighbs".ljust(10, " "))
 41 |         print("".ljust(60, "-"))
 42 | 
 43 |         for atom in self.mol.GetAtoms():
 44 |             atom_idx = atom.GetIdx()
 45 |             print(str(atom_idx).ljust(10, " "), end="")
 46 |             print(str(Chem.rdchem.Atom.GetSymbol(atom)).ljust(10, " "), end="")
 47 |             print(str(self.hyb_score[atom_idx]).ljust(10, " "), end="")
 48 |             print(str(self.stereo_score[atom_idx]).ljust(10, " "), end="")
 49 |             print(str(self.ring_score[atom_idx]).ljust(10, " "), end="")
 50 |             print(str(self.bond_score[atom_idx]).ljust(10, " "))
 51 |        
 52 |         print("".ljust(60, "-"))
 53 |         print("Total Spacial Score:", self.score)
 54 |         print("Per-Atom Score:", self.per_atom_score.__round__(2), "\n")
 55 | 
 56 | 
 57 |     def find_stereo_atom_idxs(self, includeUnassigned=True):
 58 |         """Finds indeces of atoms that are (pseudo)stereo/chiralcentres, in repsect to the attached groups (does not account for double bond isomers)"""
 59 |         stereo_centers = Chem.FindMolChiralCenters(self.mol, includeUnassigned=includeUnassigned, includeCIP=False, useLegacyImplementation=False)
 60 |         stereo_idxs = [atom_idx for atom_idx, _ in stereo_centers]
 61 |         return stereo_idxs
 62 |     
 63 | 
 64 |     def find_doublebonds_stereo(self):
 65 |         """Finds indeces of stereo double bond atoms (E/Z)"""
 66 |         db_stereo = {}
 67 |         for bond in self.mol.GetBonds():
 68 |             if str(bond.GetBondType()) == "DOUBLE":
 69 |                 db_stereo[(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())] = bond.GetStereo()
 70 |         return db_stereo
 71 | 
 72 | 
 73 |     def calculate_spacial_score(self):
 74 |         """Calculates the total spacial score for a molecule"""
 75 |         score = 0
 76 |         for atom in self.mol.GetAtoms():
 77 |             atom_idx = atom.GetIdx()
 78 |             self.hyb_score[atom_idx] = self._account_for_hybridisation(atom)
 79 |             self.stereo_score[atom_idx] = self._account_for_stereo(atom_idx)
 80 |             self.ring_score[atom_idx] = self._account_for_ring(atom)
 81 |             self.bond_score[atom_idx] = self._account_for_neighbours(atom)
 82 |             score += self._calculate_score_for_atom(atom_idx)
 83 |         return score
 84 | 
 85 | 
 86 |     def _calculate_score_for_atom(self, atom_idx):
 87 |         """Calculates the total score for a single atom in a molecule"""
 88 |         atom_score = self.hyb_score[atom_idx] * self.stereo_score[atom_idx] * self.ring_score[atom_idx] * self.bond_score[atom_idx]
 89 |         return atom_score
 90 | 
 91 | 
 92 |     def _account_for_hybridisation(self, atom):
 93 |         """Calculates the hybridisation score for a single atom in a molecule"""
 94 |         hybridisations = {"SP": 1, "SP2": 2, "SP3": 3}
 95 |         hyb_type = str(atom.GetHybridization())
 96 | 
 97 |         if hyb_type in hybridisations.keys():
 98 |             return hybridisations[hyb_type]
 99 |         return 4  # h score for any other hybridisation than sp, sp2 or sp3 
100 |     
101 | 
102 |     def _account_for_stereo(self, atom_idx):
103 |         """Calculates the stereo score for a single atom in a molecule"""
104 |         if atom_idx in self.chiral_idxs:
105 |             return 2
106 |         for bond_atom_idxs, stereo in self.doublebonds_stereo.items():
107 |             if atom_idx in bond_atom_idxs and not(str(stereo).endswith("NONE")):
108 |                 return 2
109 |         return 1
110 | 
111 | 
112 |     def _account_for_ring(self, atom):
113 |         """Calculates the ring score for a single atom in a molecule"""
114 |         if atom.GetIsAromatic():  # aromatic rings are not promoted 
115 |             return 1
116 |         if atom.IsInRing():
117 |             return 2
118 |         return 1
119 | 
120 | 
121 |     def _account_for_neighbours(self, atom):
122 |         """Calculates the neighbour score for a single atom in a molecule
123 |         The second power allows to account for branching in the molecular structure"""
124 |         return (len(atom.GetNeighbors()))**2
125 | 
126 | 
127 | def smiles_to_mol(smiles: str):
128 |     """ Generate a RDKit Molecule from SMILES.
129 | 
130 |     Parameters:
131 |     ===========
132 |     smiles: the input string
133 | 
134 |     Returns:
135 |     ========
136 |     The RDKit Molecule. If the Smiles parsing failed, NAN is returned instead.
137 |     """
138 |     try:
139 |         mol = Chem.MolFromSmiles(smiles)
140 |         if mol is not None:
141 |             return mol
142 |         return np.nan
143 |     except:
144 |         return np.nan
145 | 
146 | 
147 | def close_files(open_files:tuple):
148 |     """Closes open files"""
149 |     for file in open_files:
150 |         file.close()
151 | 
152 | 
153 | def process_input(smiles:str, filename:str, output_name:str, total_score:bool, verbose:bool, confirmation:False):
154 |     """Processes the command line input to print out the resulting score or create a file with added results"""
155 | 
156 |     if smiles:  # process a directly provided SMILES string
157 |         result = calculate_score_from_smiles(smiles, per_atom=(not total_score), verbose=verbose)
158 |         if not verbose:
159 |             score_type = "SPS" if total_score else "nSPS" 
160 |             print(f"\nNormalisation Applied: {not total_score}\nSMILES: {smiles}\nCalculated {score_type}: {result}")
161 |         if result is np.nan:
162 |             print("\nPlease double-check your input SMILES string...")
163 |     
164 |     elif filename:  # process provided file
165 |         provided_filename_base = filename.split(".")[0]
166 |         output_filename = output_name if output_name else provided_filename_base + "_SPS.csv"
167 |         outfile = open(output_filename, "w")
168 |         
169 |         # read the input .csv or .tsv file
170 |         if filename.endswith("csv"):
171 |             infile = open(filename, "r")
172 |             reader = csv.DictReader(infile, dialect="excel")
173 |         elif filename.endswith("tsv"):
174 |             infile = open(filename, "r")
175 |             reader = csv.DictReader(infile, dialect="excel-tab")
176 |         else:
177 |             raise ValueError(f"Unknown input file format: {filename}")
178 | 
179 |         print("\nProcessing, please wait...")
180 |         for idx, row in enumerate(reader):
181 |             if idx == 0:
182 |                 header = [column_name for column_name in row]  # read existing headers
183 |                 # add SPS or nSPS column to the file 
184 |                 if total_score:
185 |                     header.append("SPS")  
186 |                 header.append("nSPS")
187 |                 outfile.write(",".join(header) + "\n")
188 |             
189 |             try:
190 |                 if total_score:
191 |                     row["SPS"] = calculate_score_from_smiles(row["Smiles"], per_atom=False, verbose=verbose)
192 |                 row["nSPS"] = calculate_score_from_smiles(row["Smiles"], per_atom=True, verbose=verbose)
193 |             except KeyError:
194 |                 close_files((outfile, infile))
195 |                 raise KeyError("Please make sure that your file contains column called 'Smiles' with SMILES strings")
196 |             
197 |             line = [str(row[x]) for x in row]  # reconstruct the row 
198 |             outfile.write(",".join(line) + "\n")
199 |             if confirmation:
200 |                 print("Finished calculations for:", row["Smiles"])
201 | 
202 |         close_files((outfile, infile))
203 |         print(f"Finished. {output_filename} was saved.")
204 |     else:
205 |         raise ValueError(f"No input was provided")
206 | 
207 | 
208 | def calculate_score_from_smiles(smiles: str, per_atom=False, verbose=False) -> float:
209 |     """ Calculates the spacial score as a total SPS or size-normalised, per-atom nSPS for a molecule.
210 | 
211 |     Parameters:
212 |     ===========
213 |     smiles: valid SMILES string
214 |     per_atom: flag to denote if the normalised per-atom result (nSPS) should be returned
215 |     verbose: flag to denote if the detailed scores for each atom should be printed
216 | 
217 |     Returns:
218 |     ========
219 |     Total or per-atom numeric spacial score for the provided molecule.
220 |     """
221 |     mol = smiles_to_mol(smiles)
222 |     if mol is np.nan:
223 |         return np.nan
224 |     sps = SpacialScore(smiles, mol, verbose)
225 |     if per_atom:
226 |         return sps.per_atom_score
227 |     return sps.score
228 | 
229 | 
230 | if __name__ == "__main__":
231 | 
232 |     parser = argparse.ArgumentParser(description=
233 |     'Script for calculating Spacial Score (SPS) or normalised SPS (nSPS) for small molecules.\
234 |     \nThe script can calculate the scores for a direct SMILES input or for a .csv or .tsv file containing a list of SMILES.\
235 |     \nnSPS is calculated by deafult.', 
236 |     usage=None, formatter_class=argparse.RawDescriptionHelpFormatter)
237 |     parser.add_argument('-s', action="store",
238 |                         metavar="SMILES string",
239 |                         help='Your input SMILES string for which to calculate the score', default=None)
240 |     parser.add_argument('-i', action="store",
241 |                         help='Your .csv or .tsv file containing column called "Smiles" which contains SMILES strings. Resutls will be saved in a new .csv file', 
242 |                         metavar='filename.ext', 
243 |                         default=None)
244 |     parser.add_argument('-o', action="store",
245 |                         help='You can specify name of the output .csv file. Not required.', 
246 |                         metavar='filename.csv', 
247 |                         default=None)
248 |     parser.add_argument('-t', action="store_true",
249 |                         help='Option to calculate total SPS (no normalisation).',
250 |                         default=False)
251 |     parser.add_argument('-v', action="store_true",
252 |                         help='Option to print verbose results, with information for each atom index.', 
253 |                         default=False)
254 |     parser.add_argument('-p', action="store_true",
255 |                         help='Option to print confirmation after processing of each SMILES string in a file.',
256 |                         default=False)
257 |                                             
258 |     if len(sys.argv) < 2:
259 |         parser.print_help()
260 |         sys.exit(1)
261 | 
262 |     ARGS = parser.parse_args()
263 |     process_input(smiles=ARGS.s, filename=ARGS.i, output_name=ARGS.o, total_score=ARGS.t, verbose=ARGS.v, confirmation=ARGS.p) 
264 | 


--------------------------------------------------------------------------------