├── .github └── workflows │ └── ci.yml ├── .gitignore ├── README.md ├── assets ├── logo.png ├── output_one.png └── output_two.png ├── setup.py ├── tests └── test_explainer.py ├── tutorial.ipynb └── vsa_explainer ├── __init__.py ├── develop.ipynb └── explainer.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v3 15 | 16 | - name: Set up Python 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version: '3.9' 20 | 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install -e .[dev] 25 | 26 | - name: Run tests 27 | run: | 28 | pytest --maxfail=1 --disable-warnings -q 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | build/ 11 | dist/ 12 | *.egg-info/ 13 | .eggs/ 14 | 15 | # Installer logs 16 | pip-log.txt 17 | 18 | # Unit test / coverage reports 19 | .coverage 20 | .coverage.* 21 | .pytest_cache/ 22 | htmlcov/ 23 | .tox/ 24 | 25 | # Jupyter Notebooks 26 | .ipynb_checkpoints/ 27 | 28 | # Virtual environments 29 | venv/ 30 | ENV/ 31 | env/ 32 | .venv/ 33 | 34 | # IDE settings 35 | .vscode/ 36 | .idea/ 37 | 38 | # Mac 39 | .DS_Store 40 | 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | vsa_explainer Logo 3 |

4 |

5 | vsa_explainer: A simple Python package to visualize and explain RDKit SlogP_VSA, SMR_VSA, PEOE_VSA, EState_VSA, VSA_EState descriptor and atomic contributions 6 |

7 |
8 | 9 | [![PyPI](https://img.shields.io/pypi/v/vsa_explainer.svg)](https://pypi.org/project/vsa_explainer/) 10 | [![Python](https://img.shields.io/pypi/pyversions/vsa_explainer.svg)](https://pypi.org/project/vsa_explainer/) 11 | [![Python Tests](https://github.com/srijitseal/vsa_explainer/actions/workflows/ci.yml/badge.svg)](https://github.com/srijitseal/vsa_explainer/actions/workflows/ci.yml) 12 | [![Repo Size](https://img.shields.io/github/repo-size/srijitseal/vsa_explainer.svg)](https://github.com/srijitseal/vsa_explainer) 13 | 14 | --- 15 | 16 | ## 📌 Installation 17 | ```bash 18 | pip install vsa_explainer 19 | ``` 20 | 21 | ## 📌 Quick Usage 22 | ```python 23 | from vsa_explainer import visualize_vsa_contributions 24 | 25 | # Highlight per-atom contributions to SMR_VSA7 and EState_VSA5 26 | smiles = "C1CO[C@@H]1CN2C3=C(C=CC(=C3)C(=O)O)N=C2CN4CCC(CC4)C5=NC(=CC=C5)OCC6=C(C=C(C=C6)C#N)F" 27 | visualize_vsa_contributions(smiles, ["SMR_VSA7", "EState_VSA5"]) 28 | ``` 29 | 30 |

31 | vsa_explainer output 1 32 |

33 |

34 | vsa_explainer output 2 35 |

36 | 37 | - Draws an SVG of your molecule with atoms colored by their contribution to each selected VSA descriptor. 38 | - Displays a table reporting per-atom values, contributions, and percentage of the total. 39 | 40 | 41 | ## 📌 Support 42 | 43 | - **SMR_VSA** 44 | MOE-type descriptors using MR contributions and surface area contributions 45 | 46 | - **SlogP_VSA** 47 | MOE-type descriptors using LogP contributions and surface area contributions 48 | 49 | - **PEOE_VSA** 50 | MOE-type descriptors using partial charges and surface area contributions 51 | 52 | - **EState_VSA** 53 | MOE-type descriptors using EState indices and surface area contributions (developed at RD, not described in the CCG paper) 54 | 55 | - **VSA_EState** 56 | MOE-type descriptors using EState indices and surface area contributions (developed at RD, not described in the CCG paper) 57 | 58 | 59 | 60 | ## 📌 Contributing 61 | 1. Fork the repo 62 | 2. Create your feature branch (`git checkout -b feature/YourFeature`) 63 | 3. Commit your changes (`git commit -m "Add feature"`) 64 | 4. Push to the branch (`git push origin feature/YourFeature`) 65 | 5. Open a Pull Request 66 | 67 | ## 📌 License 68 | Released under the MIT License. See LICENSE for details. 69 | 70 | ✨ Enjoy exploring molecular surface areas with vsa_explainer! 71 | -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srijitseal/vsa_explainer/00192d545b30754b5791ca9ca944b13b7a40c723/assets/logo.png -------------------------------------------------------------------------------- /assets/output_one.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srijitseal/vsa_explainer/00192d545b30754b5791ca9ca944b13b7a40c723/assets/output_one.png -------------------------------------------------------------------------------- /assets/output_two.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/srijitseal/vsa_explainer/00192d545b30754b5791ca9ca944b13b7a40c723/assets/output_two.png -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import pathlib 3 | 4 | # read the README file 5 | here = pathlib.Path(__file__).parent 6 | long_description = (here / "README.md").read_text(encoding="utf-8") 7 | 8 | setup( 9 | name="vsa_explainer", 10 | version="0.2.0", 11 | description="Visualize and explain RDKit VSA descriptor contributions", 12 | 13 | long_description=long_description, 14 | long_description_content_type="text/markdown", # tells PyPI to render Markdown 15 | 16 | 17 | author="Srijit Seal", 18 | author_email="seal@understanding.bio", 19 | license="MIT", 20 | packages=find_packages(), # finds vsa_explainer/ 21 | 22 | install_requires=[ 23 | "numpy>=1.18", 24 | "matplotlib>=3.0", 25 | "rdkit", # see rdkit install instructions for your platform 26 | "ipython", # for IPython.display.SVG 27 | ], 28 | 29 | extras_require={ 30 | "dev": ["pytest"], 31 | }, 32 | tests_require=["pytest"], 33 | 34 | entry_points={ 35 | "console_scripts": [ 36 | "vsa-explain=vsa_explainer.explainer:visualize_vsa_contributions", 37 | ], 38 | }, 39 | python_requires=">=3.8", 40 | classifiers=[ 41 | "Programming Language :: Python :: 3", 42 | "License :: OSI Approved :: MIT License", 43 | ], 44 | url='https://github.com/srijitseal/vsa_explainer', 45 | ) -------------------------------------------------------------------------------- /tests/test_explainer.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from vsa_explainer import ( 3 | load_crippen_data, 4 | get_vsa_bin_bounds, 5 | get_bin_bounds, 6 | visualize_vsa_contributions, 7 | ) 8 | 9 | def test_load_crippen_data(): 10 | data = load_crippen_data() 11 | assert isinstance(data, list) 12 | # each entry is a tuple of length 5 13 | assert all(len(t) == 5 for t in data) 14 | 15 | @pytest.mark.parametrize("bins,idx,expected", [ 16 | ([0.1, 0.5, 1.0], 1, (-float("inf"), 0.1)), 17 | ([0.1, 0.5, 1.0], 2, (0.1, 0.5)), 18 | ([0.1, 0.5, 1.0], 4, (1.0, float("inf"))), 19 | ]) 20 | def test_get_bin_bounds(bins, idx, expected): 21 | assert get_bin_bounds(idx, bins) == expected 22 | 23 | def test_get_vsa_bin_bounds_smrvsa8(): 24 | # SMR_VSA8 should parse from Descriptors.__doc__ 25 | lb, ub = get_vsa_bin_bounds("SMR_VSA8") 26 | assert lb < ub # sanity check 27 | 28 | def test_visualize_vsa_contributions_smoke(capsys): 29 | # just ensure it runs without crashing on a simple molecule 30 | visualize_vsa_contributions("CCO", ["SMR_VSA1"]) 31 | # it should print either contributions or “No atoms contribute…”: 32 | captured = capsys.readouterr() 33 | assert "SMR_VSA1" in captured.out 34 | -------------------------------------------------------------------------------- /tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "5f0b3d65-cbdb-4242-96bc-3fe52306d68f", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "\n", 14 | "No atoms contribute to SMR_VSA7 (range 3.05 to 3.63).\n" 15 | ] 16 | }, 17 | { 18 | "data": { 19 | "image/svg+xml": [ 20 | "\n", 21 | "\n", 22 | " \n", 23 | "\n", 24 | "\n", 25 | "\n", 26 | "\n", 27 | "\n", 28 | "\n", 29 | "\n", 30 | "\n", 31 | "\n", 32 | "\n", 33 | "\n", 34 | "\n", 35 | "\n", 36 | "\n", 37 | "\n", 38 | "\n", 39 | "\n", 40 | "\n", 41 | "\n", 42 | "\n", 43 | "\n", 44 | "\n", 45 | "\n", 46 | "\n", 47 | "\n", 48 | "" 49 | ], 50 | "text/plain": [ 51 | "" 52 | ] 53 | }, 54 | "metadata": {}, 55 | "output_type": "display_data" 56 | }, 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "\n", 62 | "### EState_VSA5 Contributions — Total: 19.2625\n", 63 | "Bin range: 1.1650 to 1.5400\n", 64 | "Idx Sym Value Contrib % of total\n", 65 | "----------------------------------------\n", 66 | "1 C 1.237 6.421 33.3%\n", 67 | "2 C 1.330 6.421 33.3%\n", 68 | "3 C 1.237 6.421 33.3%\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | "from vsa_explainer import visualize_vsa_contributions\n", 74 | "\n", 75 | "smiles = \"C1CCCCC1O\"\n", 76 | "visualize_vsa_contributions(smiles, [\"SMR_VSA7\", \"EState_VSA5\"])\n" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "id": "7075cab6-1288-42b1-b1db-99a0c305900a", 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [] 86 | } 87 | ], 88 | "metadata": { 89 | "kernelspec": { 90 | "display_name": "Python 3 (ipykernel)", 91 | "language": "python", 92 | "name": "python3" 93 | }, 94 | "language_info": { 95 | "codemirror_mode": { 96 | "name": "ipython", 97 | "version": 3 98 | }, 99 | "file_extension": ".py", 100 | "mimetype": "text/x-python", 101 | "name": "python", 102 | "nbconvert_exporter": "python", 103 | "pygments_lexer": "ipython3", 104 | "version": "3.9.18" 105 | } 106 | }, 107 | "nbformat": 4, 108 | "nbformat_minor": 5 109 | } 110 | -------------------------------------------------------------------------------- /vsa_explainer/__init__.py: -------------------------------------------------------------------------------- 1 | # vsa_explainer/__init__.py 2 | 3 | from .explainer import ( 4 | load_crippen_data, 5 | get_vsa_bin_bounds, 6 | get_bin_bounds, 7 | visualize_vsa_contributions, 8 | ) 9 | 10 | __version__ = "0.2.0" 11 | -------------------------------------------------------------------------------- /vsa_explainer/explainer.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from rdkit import Chem 3 | from rdkit.Chem import rdMolDescriptors, Draw, AllChem, Descriptors 4 | from rdkit.Chem.Draw import rdMolDraw2D, MolToImage 5 | from rdkit.Chem.EState import EState, EState_VSA 6 | from rdkit.Chem.Lipinski import NumHDonors, NumHAcceptors 7 | from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges 8 | import numpy as np 9 | import re 10 | from collections import namedtuple 11 | from IPython.display import SVG, display 12 | 13 | def load_crippen_data(): 14 | """Load Crippen data for atom typing""" 15 | # This data is from the blog post 16 | rdkit_data = '''C1 [CH4] 0.1441 2.503 17 | C1 [CH3]C 0.1441 2.503 18 | C1 [CH2](C)C 0.1441 2.503 19 | C2 [CH](C)(C)C 0 2.433 20 | C2 [C](C)(C)(C)C 0 2.433 21 | C3 [CH3][N,O,P,S,F,Cl,Br,I] -0.2035 2.753 22 | C3 [CH2X4]([N,O,P,S,F,Cl,Br,I])[A;!#1] -0.2035 2.753 23 | C4 [CH1X4]([N,O,P,S,F,Cl,Br,I])([A;!#1])[A;!#1] -0.2051 2.731 24 | C4 [CH0X4]([N,O,P,S,F,Cl,Br,I])([A;!#1])([A;!#1])[A;!#1] -0.2051 2.731 25 | C5 [C]=[!C;A;!#1] -0.2783 5.007 26 | C6 [CH2]=C 0.1551 3.513 27 | C6 [CH1](=C)[A;!#1] 0.1551 3.513 28 | C6 [CH0](=C)([A;!#1])[A;!#1] 0.1551 3.513 29 | C6 [C](=C)=C 0.1551 3.513 30 | C7 [CX2]#[A;!#1] 0.0017 3.888 31 | C8 [CH3]c 0.08452 2.464 32 | C9 [CH3]a -0.1444 2.412 33 | C10 [CH2X4]a -0.0516 2.488 34 | C11 [CHX4]a 0.1193 2.582 35 | C12 [CH0X4]a -0.0967 2.576 36 | C13 [cH0]-[A;!C;!N;!O;!S;!F;!Cl;!Br;!I;!#1] -0.5443 4.041 37 | C14 [c][#9] 0 3.257 38 | C15 [c][#17] 0.245 3.564 39 | C16 [c][#35] 0.198 3.18 40 | C17 [c][#53] 0 3.104 41 | C18 [cH] 0.1581 3.35 42 | C19 [c](:a)(:a):a 0.2955 4.346 43 | C20 [c](:a)(:a)-a 0.2713 3.904 44 | C21 [c](:a)(:a)-C 0.136 3.509 45 | C22 [c](:a)(:a)-N 0.4619 4.067 46 | C23 [c](:a)(:a)-O 0.5437 3.853 47 | C24 [c](:a)(:a)-S 0.1893 2.673 48 | C25 [c](:a)(:a)=[C,N,O] -0.8186 3.135 49 | C26 [C](=C)(a)[A;!#1] 0.264 4.305 50 | C26 [C](=C)(c)a 0.264 4.305 51 | C26 [CH1](=C)a 0.264 4.305 52 | C26 [C]=c 0.264 4.305 53 | C27 [CX4][A;!C;!N;!O;!P;!S;!F;!Cl;!Br;!I;!#1] 0.2148 2.693 54 | CS [#6] 0.08129 3.243 55 | H1 [#1][#6,#1] 0.123 1.057 56 | H2 [#1]O[CX4,c] -0.2677 1.395 57 | H2 [#1]O[!#6;!#7;!#8;!#16] -0.2677 1.395 58 | H2 [#1][!#6;!#7;!#8] -0.2677 1.395 59 | H3 [#1][#7] 0.2142 0.9627 60 | H3 [#1]O[#7] 0.2142 0.9627 61 | H4 [#1]OC=[#6,#7,O,S] 0.298 1.805 62 | H4 [#1]O[O,S] 0.298 1.805 63 | HS [#1] 0.1125 1.112 64 | N1 [NH2+0][A;!#1] -1.019 2.262 65 | N2 [NH+0]([A;!#1])[A;!#1] -0.7096 2.173 66 | N3 [NH2+0]a -1.027 2.827 67 | N4 [NH1+0]([!#1;A,a])a -0.5188 3 68 | N5 [NH+0]=[!#1;A,a] 0.08387 1.757 69 | N6 [N+0](=[!#1;A,a])[!#1;A,a] 0.1836 2.428 70 | N7 [N+0]([A;!#1])([A;!#1])[A;!#1] -0.3187 1.839 71 | N8 [N+0](a)([!#1;A,a])[A;!#1] -0.4458 2.819 72 | N8 [N+0](a)(a)a -0.4458 2.819 73 | N9 [N+0]#[A;!#1] 0.01508 1.725 74 | N10 [NH3,NH2,NH;+,+2,+3] -1.95 75 | N11 [n+0] -0.3239 2.202 76 | N12 [n;+,+2,+3] -1.119 77 | N13 [NH0;+,+2,+3]([A;!#1])([A;!#1])([A;!#1])[A;!#1] -0.3396 0.2604 78 | N13 [NH0;+,+2,+3](=[A;!#1])([A;!#1])[!#1;A,a] -0.3396 0.2604 79 | N13 [NH0;+,+2,+3](=[#6])=[#7] -0.3396 0.2604 80 | N14 [N;+,+2,+3]#[A;!#1] 0.2887 3.359 81 | N14 [N;-,-2,-3] 0.2887 3.359 82 | N14 [N;+,+2,+3](=[N;-,-2,-3])=N 0.2887 3.359 83 | NS [#7] -0.4806 2.134 84 | O1 [o] 0.1552 1.08 85 | O2 [OH,OH2] -0.2893 0.8238 86 | O3 [O]([A;!#1])[A;!#1] -0.0684 1.085 87 | O4 [O](a)[!#1;A,a] -0.4195 1.182 88 | O5 [O]=[#7,#8] 0.0335 3.367 89 | O5 [OX1;-,-2,-3][#7] 0.0335 3.367 90 | O6 [OX1;-,-2,-2][#16] -0.3339 0.7774 91 | O6 [O;-0]=[#16;-0] -0.3339 0.7774 92 | O12 [O-]C(=O) -1.326 93 | O7 [OX1;-,-2,-3][!#1;!N;!S] -1.189 0 94 | O8 [O]=c 0.1788 3.135 95 | O9 [O]=[CH]C -0.1526 0 96 | O9 [O]=C(C)([A;!#1]) -0.1526 0 97 | O9 [O]=[CH][N,O] -0.1526 0 98 | O9 [O]=[CH2] -0.1526 0 99 | O9 [O]=[CX2]=O -0.1526 0 100 | O10 [O]=[CH]c 0.1129 0.2215 101 | O10 [O]=C([C,c])[a;!#1] 0.1129 0.2215 102 | O10 [O]=C(c)[A;!#1] 0.1129 0.2215 103 | O11 [O]=C([!#1;!#6])[!#1;!#6] 0.4833 0.389 104 | OS [#8] -0.1188 0.6865 105 | F [#9-0] 0.4202 1.108 106 | Cl [#17-0] 0.6895 5.853 107 | Br [#35-0] 0.8456 8.927 108 | I [#53-0] 0.8857 14.02 109 | Hal [#9,#17,#35,#53;-] -2.996 110 | Hal [#53;+,+2,+3] -2.996 111 | Hal [+;#3,#11,#19,#37,#55] -2.996 112 | P [#15] 0.8612 6.92 113 | S2 [S;-,-2,-3,-4,+1,+2,+3,+5,+6] -0.0024 7.365 114 | S2 [S-0]=[N,O,P,S] -0.0024 7.365 115 | S1 [S;A] 0.6482 7.591 116 | S3 [s;a] 0.6237 6.691 117 | Me1 [#3,#11,#19,#37,#55] -0.3808 5.754 118 | Me1 [#4,#12,#20,#38,#56] -0.3808 5.754 119 | Me1 [#5,#13,#31,#49,#81] -0.3808 5.754 120 | Me1 [#14,#32,#50,#82] -0.3808 5.754 121 | Me1 [#33,#51,#83] -0.3808 5.754 122 | Me1 [#34,#52,#84] -0.3808 5.754 123 | Me2 [#21,#22,#23,#24,#25,#26,#27,#28,#29,#30] -0.0025 124 | Me2 [#39,#40,#41,#42,#43,#44,#45,#46,#47,#48] -0.0025 125 | Me2 [#72,#73,#74,#75,#76,#77,#78,#79,#80] -0.0025 ''' 126 | 127 | CrippenTuple = namedtuple('CrippenTuple', 128 | ('name', 'smarts', 'logp_contrib', 'mr_contrib', 'note')) 129 | 130 | crippenData = [] 131 | for line in rdkit_data.split('\n'): 132 | line = line.strip() 133 | if not line: 134 | continue 135 | parts = re.split(r'\s+', line) 136 | # we need at least 4 columns: name, smarts, logP, MR 137 | if len(parts) < 4: 138 | continue 139 | name, smarts = parts[0], parts[1] 140 | try: 141 | logp = float(parts[2]) 142 | except ValueError: 143 | logp = None 144 | try: 145 | mr = float(parts[3]) 146 | except ValueError: 147 | mr = None 148 | note = parts[4] if len(parts) > 4 else "" 149 | crippenData.append(CrippenTuple(name, smarts, logp, mr, note)) 150 | 151 | return crippenData 152 | 153 | 154 | def get_vsa_bin_bounds(descriptor_name): 155 | """ 156 | Given a descriptor like "SMR_VSA3" or "SlogP_VSA1", parse its __doc__ 157 | and return (lower_bound, upper_bound) as floats. 158 | """ 159 | func = getattr(Descriptors, descriptor_name, None) 160 | if func is None or func.__doc__ is None: 161 | raise ValueError(f"No such descriptor {descriptor_name!r} or missing __doc__") 162 | doc = func.__doc__ 163 | # unified pattern: matches "a <= x < b", "a < x < b", including "-inf" 164 | m = re.search( 165 | r"""\(\s*([+-]?\d*\.?\d+|[-]?inf) # group 1: a or -inf 166 | \s*(?:<=|<)\s*x\s*(?:<|<=)\s* 167 | ([+-]?\d*\.?\d+|[-]?inf) # group 2: b or inf 168 | \s*\)""", 169 | doc, 170 | flags=re.IGNORECASE | re.VERBOSE 171 | ) 172 | if m: 173 | lb, ub = m.group(1).lower(), m.group(2).lower() 174 | lower = float("-inf") if lb in ("-inf",) else float(lb) 175 | upper = float("inf") if ub in ("inf", "+inf") else float(ub) 176 | return lower, upper 177 | 178 | # fallback: single‑sided "( x < b )" 179 | m2 = re.search(r"\(\s*x\s*<\s*([+-]?\d*\.?\d+)\s*\)", doc) 180 | if m2: 181 | return float("-inf"), float(m2.group(1)) 182 | # fallback: "( a <= x )" 183 | m3 = re.search(r"\(\s*([+-]?\d*\.?\d+)\s*<=\s*x\s*\)", doc) 184 | if m3: 185 | return float(m3.group(1)), float("inf") 186 | 187 | raise ValueError(f"Could not parse bin bounds from {doc!r}") 188 | 189 | def get_bin_bounds(idx, bins): 190 | """ 191 | idx is 1‑based. bins is a sorted list of length N. 192 | idx == 1 → (-inf, bins[0]) 193 | 2 <= idx <= N → [bins[idx-2], bins[idx-1]) 194 | idx == N + 1 → [bins[-1], inf) 195 | """ 196 | N = len(bins) 197 | if idx == 1: 198 | return float("-inf"), bins[0] 199 | elif 2 <= idx <= N: 200 | return bins[idx-2], bins[idx-1] 201 | elif idx == N + 1: 202 | return bins[-1], float("inf") 203 | else: 204 | raise ValueError(f"Index {idx} out of range for {N}-boundary bins") 205 | 206 | def get_peoe_charges(mol): 207 | """ 208 | Compute PEOE (Partial Equalization of Orbital Electronegativity) charges. 209 | This uses Gasteiger charges as an approximation since RDKit doesn't have 210 | true PEOE charges built-in. 211 | """ 212 | # Make a copy to avoid modifying the original molecule 213 | mol_copy = Chem.Mol(mol) 214 | 215 | # Compute Gasteiger charges (closest approximation to PEOE in RDKit) 216 | ComputeGasteigerCharges(mol_copy) 217 | 218 | # Extract charges 219 | charges = [] 220 | for atom in mol_copy.GetAtoms(): 221 | charge = atom.GetDoubleProp('_GasteigerCharge') 222 | # Handle NaN values that can occur with Gasteiger calculation 223 | if np.isnan(charge): 224 | charge = 0.0 225 | charges.append(charge) 226 | 227 | return charges 228 | 229 | def get_peoe_vsa_bins(): 230 | """ 231 | Return the charge bins used for PEOE_VSA descriptors. 232 | These are the standard bins used in RDKit for PEOE_VSA calculations. 233 | """ 234 | # Standard PEOE_VSA charge bins (from RDKit source) 235 | return [-0.30, -0.25, -0.20, -0.15, -0.10, -0.05, 0.00, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30] 236 | 237 | def visualize_vsa_contributions(smiles, highlight_descriptors=None, save_path = None): 238 | """ 239 | Analyze and visualize VSA descriptor contributions for a molecule, 240 | including SMR_VSA, SlogP_VSA, EState_VSA, VSA_EState, and PEOE_VSA families. 241 | """ 242 | if highlight_descriptors is None: 243 | highlight_descriptors = ["SMR_VSA8", "SlogP_VSA8", "PEOE_VSA8"] 244 | 245 | mol = Chem.MolFromSmiles(smiles) 246 | if mol is None: 247 | print(f"Error: Could not parse SMILES '{smiles}'") 248 | return 249 | if not mol.GetNumConformers(): 250 | AllChem.Compute2DCoords(mol) 251 | 252 | # precompute all per-atom values/contributions 253 | crippen_contribs = rdMolDescriptors._CalcCrippenContribs(mol) 254 | vsa_contribs = list(rdMolDescriptors._CalcLabuteASAContribs(mol)[0]) 255 | estate_indices = EState.EStateIndices(mol) 256 | peoe_charges = get_peoe_charges(mol) 257 | 258 | for desc in highlight_descriptors: 259 | # --- pick the correct pairing of "values" vs "contributions" and the bin boundaries --- 260 | if desc.startswith("SMR_VSA") or desc.startswith("SlogP_VSA"): 261 | # SMR_VSA* or SlogP_VSA* via get_vsa_bin_bounds() 262 | try: 263 | lower, upper = get_vsa_bin_bounds(desc) 264 | except ValueError as e: 265 | print(e) 266 | continue 267 | prop_idx = 1 if desc.startswith("SMR") else 0 268 | values = [c[prop_idx] for c in crippen_contribs] 269 | contributions = vsa_contribs 270 | 271 | elif desc.startswith("EState_VSA"): 272 | # EState_VSA*: sum VSA over EState bins 273 | idx = int(desc.split("EState_VSA")[1]) #descriptors start from 1 274 | 275 | bins = EState_VSA.estateBins 276 | lower, upper = get_bin_bounds(idx, bins) 277 | 278 | values = estate_indices 279 | contributions = vsa_contribs 280 | 281 | elif desc.startswith("VSA_EState"): 282 | # VSA_EState*: sum EState over VSA bins 283 | idx = int(desc.split("VSA_EState")[1]) #descriptors start from 1 284 | bins = EState_VSA.vsaBins 285 | lower, upper = get_bin_bounds(idx, bins) 286 | 287 | values = vsa_contribs 288 | contributions = estate_indices 289 | 290 | elif desc.startswith("PEOE_VSA"): 291 | # PEOE_VSA*: sum VSA over PEOE charge bins 292 | idx = int(desc.split("PEOE_VSA")[1]) # descriptors start from 1 293 | bins = get_peoe_vsa_bins() 294 | lower, upper = get_bin_bounds(idx, bins) 295 | 296 | values = peoe_charges 297 | contributions = vsa_contribs 298 | 299 | else: 300 | print(f"Unknown descriptor '{desc}', skipping.") 301 | continue 302 | 303 | # --- find atoms in the selected bin --- 304 | atoms, contribs = [], [] 305 | for i, (val, contrib) in enumerate(zip(values, contributions)): 306 | if lower <= val < upper: 307 | atoms.append(i) 308 | contribs.append(contrib) 309 | total = sum(contribs) 310 | if not atoms: 311 | print(f"\nNo atoms contribute to {desc} (range {lower:.4f} to {upper:.4f}).") 312 | continue 313 | 314 | # --- normalize & color (green intensity here; switch channels as you like) --- 315 | norm = {i: c/total for i, c in zip(atoms, contribs)} 316 | highlight_colors = {i: (0.0, 0.7, 0.0) for i, v in norm.items()} 317 | 318 | # --- draw SVG with atom indices --- 319 | drawer = rdMolDraw2D.MolDraw2DSVG(500, 500) 320 | drawer.drawOptions().addAtomIndices = True 321 | rdMolDraw2D.PrepareAndDrawMolecule( 322 | drawer, mol, 323 | highlightAtoms=list(atoms), 324 | highlightAtomColors=highlight_colors 325 | ) 326 | drawer.FinishDrawing() 327 | svg_text = drawer.GetDrawingText() 328 | display(SVG(svg_text)) 329 | if save_path: 330 | with open(save_path, "w") as f: 331 | f.write(svg_text) 332 | print(f"Saved SVG to {save_path}") 333 | 334 | # --- print contribution table --- 335 | descriptor_type = "Charge" if desc.startswith("PEOE_VSA") else "Value" 336 | print(f"\n### {desc} Contributions — Total: {total:.4f}") 337 | print(f"Bin range: {lower:.4f} to {upper:.4f}") 338 | print(f"{'Idx':<4s}{'Sym':<4s}{descriptor_type:>8s}{'Contrib':>12s}{'% of total':>12s}") 339 | print("-"* 44) 340 | for i in atoms: 341 | sym = mol.GetAtomWithIdx(i).GetSymbol() 342 | val = values[i] 343 | cst = contributions[i] 344 | pct = 100*cst/total if total else 0 345 | print(f"{i:<4d}{sym:<4s}{val:8.3f}{cst:12.3f}{pct:12.1f}%") 346 | --------------------------------------------------------------------------------