├── .github
└── workflows
│ └── ci.yml
├── .gitignore
├── README.md
├── assets
├── logo.png
├── output_one.png
└── output_two.png
├── setup.py
├── tests
└── test_explainer.py
├── tutorial.ipynb
└── vsa_explainer
├── __init__.py
├── develop.ipynb
└── explainer.py
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | pull_request:
7 | branches: [ main ]
8 |
9 | jobs:
10 | build:
11 | runs-on: ubuntu-latest
12 |
13 | steps:
14 | - uses: actions/checkout@v3
15 |
16 | - name: Set up Python
17 | uses: actions/setup-python@v4
18 | with:
19 | python-version: '3.9'
20 |
21 | - name: Install dependencies
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install -e .[dev]
25 |
26 | - name: Run tests
27 | run: |
28 | pytest --maxfail=1 --disable-warnings -q
29 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | build/
11 | dist/
12 | *.egg-info/
13 | .eggs/
14 |
15 | # Installer logs
16 | pip-log.txt
17 |
18 | # Unit test / coverage reports
19 | .coverage
20 | .coverage.*
21 | .pytest_cache/
22 | htmlcov/
23 | .tox/
24 |
25 | # Jupyter Notebooks
26 | .ipynb_checkpoints/
27 |
28 | # Virtual environments
29 | venv/
30 | ENV/
31 | env/
32 | .venv/
33 |
34 | # IDE settings
35 | .vscode/
36 | .idea/
37 |
38 | # Mac
39 | .DS_Store
40 |
41 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | vsa_explainer: A simple Python package to visualize and explain RDKit SlogP_VSA, SMR_VSA, PEOE_VSA, EState_VSA, VSA_EState descriptor and atomic contributions
6 |
7 |
8 |
9 | [](https://pypi.org/project/vsa_explainer/)
10 | [](https://pypi.org/project/vsa_explainer/)
11 | [](https://github.com/srijitseal/vsa_explainer/actions/workflows/ci.yml)
12 | [](https://github.com/srijitseal/vsa_explainer)
13 |
14 | ---
15 |
16 | ## 📌 Installation
17 | ```bash
18 | pip install vsa_explainer
19 | ```
20 |
21 | ## 📌 Quick Usage
22 | ```python
23 | from vsa_explainer import visualize_vsa_contributions
24 |
25 | # Highlight per-atom contributions to SMR_VSA7 and EState_VSA5
26 | smiles = "C1CO[C@@H]1CN2C3=C(C=CC(=C3)C(=O)O)N=C2CN4CCC(CC4)C5=NC(=CC=C5)OCC6=C(C=C(C=C6)C#N)F"
27 | visualize_vsa_contributions(smiles, ["SMR_VSA7", "EState_VSA5"])
28 | ```
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 | - Draws an SVG of your molecule with atoms colored by their contribution to each selected VSA descriptor.
38 | - Displays a table reporting per-atom values, contributions, and percentage of the total.
39 |
40 |
41 | ## 📌 Support
42 |
43 | - **SMR_VSA**
44 | MOE-type descriptors using MR contributions and surface area contributions
45 |
46 | - **SlogP_VSA**
47 | MOE-type descriptors using LogP contributions and surface area contributions
48 |
49 | - **PEOE_VSA**
50 | MOE-type descriptors using partial charges and surface area contributions
51 |
52 | - **EState_VSA**
53 | MOE-type descriptors using EState indices and surface area contributions (developed at RD, not described in the CCG paper)
54 |
55 | - **VSA_EState**
56 | MOE-type descriptors using EState indices and surface area contributions (developed at RD, not described in the CCG paper)
57 |
58 |
59 |
60 | ## 📌 Contributing
61 | 1. Fork the repo
62 | 2. Create your feature branch (`git checkout -b feature/YourFeature`)
63 | 3. Commit your changes (`git commit -m "Add feature"`)
64 | 4. Push to the branch (`git push origin feature/YourFeature`)
65 | 5. Open a Pull Request
66 |
67 | ## 📌 License
68 | Released under the MIT License. See LICENSE for details.
69 |
70 | ✨ Enjoy exploring molecular surface areas with vsa_explainer!
71 |
--------------------------------------------------------------------------------
/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/srijitseal/vsa_explainer/00192d545b30754b5791ca9ca944b13b7a40c723/assets/logo.png
--------------------------------------------------------------------------------
/assets/output_one.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/srijitseal/vsa_explainer/00192d545b30754b5791ca9ca944b13b7a40c723/assets/output_one.png
--------------------------------------------------------------------------------
/assets/output_two.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/srijitseal/vsa_explainer/00192d545b30754b5791ca9ca944b13b7a40c723/assets/output_two.png
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | import pathlib
3 |
4 | # read the README file
5 | here = pathlib.Path(__file__).parent
6 | long_description = (here / "README.md").read_text(encoding="utf-8")
7 |
8 | setup(
9 | name="vsa_explainer",
10 | version="0.2.0",
11 | description="Visualize and explain RDKit VSA descriptor contributions",
12 |
13 | long_description=long_description,
14 | long_description_content_type="text/markdown", # tells PyPI to render Markdown
15 |
16 |
17 | author="Srijit Seal",
18 | author_email="seal@understanding.bio",
19 | license="MIT",
20 | packages=find_packages(), # finds vsa_explainer/
21 |
22 | install_requires=[
23 | "numpy>=1.18",
24 | "matplotlib>=3.0",
25 | "rdkit", # see rdkit install instructions for your platform
26 | "ipython", # for IPython.display.SVG
27 | ],
28 |
29 | extras_require={
30 | "dev": ["pytest"],
31 | },
32 | tests_require=["pytest"],
33 |
34 | entry_points={
35 | "console_scripts": [
36 | "vsa-explain=vsa_explainer.explainer:visualize_vsa_contributions",
37 | ],
38 | },
39 | python_requires=">=3.8",
40 | classifiers=[
41 | "Programming Language :: Python :: 3",
42 | "License :: OSI Approved :: MIT License",
43 | ],
44 | url='https://github.com/srijitseal/vsa_explainer',
45 | )
--------------------------------------------------------------------------------
/tests/test_explainer.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from vsa_explainer import (
3 | load_crippen_data,
4 | get_vsa_bin_bounds,
5 | get_bin_bounds,
6 | visualize_vsa_contributions,
7 | )
8 |
9 | def test_load_crippen_data():
10 | data = load_crippen_data()
11 | assert isinstance(data, list)
12 | # each entry is a tuple of length 5
13 | assert all(len(t) == 5 for t in data)
14 |
15 | @pytest.mark.parametrize("bins,idx,expected", [
16 | ([0.1, 0.5, 1.0], 1, (-float("inf"), 0.1)),
17 | ([0.1, 0.5, 1.0], 2, (0.1, 0.5)),
18 | ([0.1, 0.5, 1.0], 4, (1.0, float("inf"))),
19 | ])
20 | def test_get_bin_bounds(bins, idx, expected):
21 | assert get_bin_bounds(idx, bins) == expected
22 |
23 | def test_get_vsa_bin_bounds_smrvsa8():
24 | # SMR_VSA8 should parse from Descriptors.__doc__
25 | lb, ub = get_vsa_bin_bounds("SMR_VSA8")
26 | assert lb < ub # sanity check
27 |
28 | def test_visualize_vsa_contributions_smoke(capsys):
29 | # just ensure it runs without crashing on a simple molecule
30 | visualize_vsa_contributions("CCO", ["SMR_VSA1"])
31 | # it should print either contributions or “No atoms contribute…”:
32 | captured = capsys.readouterr()
33 | assert "SMR_VSA1" in captured.out
34 |
--------------------------------------------------------------------------------
/tutorial.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "5f0b3d65-cbdb-4242-96bc-3fe52306d68f",
7 | "metadata": {},
8 | "outputs": [
9 | {
10 | "name": "stdout",
11 | "output_type": "stream",
12 | "text": [
13 | "\n",
14 | "No atoms contribute to SMR_VSA7 (range 3.05 to 3.63).\n"
15 | ]
16 | },
17 | {
18 | "data": {
19 | "image/svg+xml": [
20 | ""
49 | ],
50 | "text/plain": [
51 | ""
52 | ]
53 | },
54 | "metadata": {},
55 | "output_type": "display_data"
56 | },
57 | {
58 | "name": "stdout",
59 | "output_type": "stream",
60 | "text": [
61 | "\n",
62 | "### EState_VSA5 Contributions — Total: 19.2625\n",
63 | "Bin range: 1.1650 to 1.5400\n",
64 | "Idx Sym Value Contrib % of total\n",
65 | "----------------------------------------\n",
66 | "1 C 1.237 6.421 33.3%\n",
67 | "2 C 1.330 6.421 33.3%\n",
68 | "3 C 1.237 6.421 33.3%\n"
69 | ]
70 | }
71 | ],
72 | "source": [
73 | "from vsa_explainer import visualize_vsa_contributions\n",
74 | "\n",
75 | "smiles = \"C1CCCCC1O\"\n",
76 | "visualize_vsa_contributions(smiles, [\"SMR_VSA7\", \"EState_VSA5\"])\n"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": null,
82 | "id": "7075cab6-1288-42b1-b1db-99a0c305900a",
83 | "metadata": {},
84 | "outputs": [],
85 | "source": []
86 | }
87 | ],
88 | "metadata": {
89 | "kernelspec": {
90 | "display_name": "Python 3 (ipykernel)",
91 | "language": "python",
92 | "name": "python3"
93 | },
94 | "language_info": {
95 | "codemirror_mode": {
96 | "name": "ipython",
97 | "version": 3
98 | },
99 | "file_extension": ".py",
100 | "mimetype": "text/x-python",
101 | "name": "python",
102 | "nbconvert_exporter": "python",
103 | "pygments_lexer": "ipython3",
104 | "version": "3.9.18"
105 | }
106 | },
107 | "nbformat": 4,
108 | "nbformat_minor": 5
109 | }
110 |
--------------------------------------------------------------------------------
/vsa_explainer/__init__.py:
--------------------------------------------------------------------------------
1 | # vsa_explainer/__init__.py
2 |
3 | from .explainer import (
4 | load_crippen_data,
5 | get_vsa_bin_bounds,
6 | get_bin_bounds,
7 | visualize_vsa_contributions,
8 | )
9 |
10 | __version__ = "0.2.0"
11 |
--------------------------------------------------------------------------------
/vsa_explainer/explainer.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from rdkit import Chem
3 | from rdkit.Chem import rdMolDescriptors, Draw, AllChem, Descriptors
4 | from rdkit.Chem.Draw import rdMolDraw2D, MolToImage
5 | from rdkit.Chem.EState import EState, EState_VSA
6 | from rdkit.Chem.Lipinski import NumHDonors, NumHAcceptors
7 | from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges
8 | import numpy as np
9 | import re
10 | from collections import namedtuple
11 | from IPython.display import SVG, display
12 |
13 | def load_crippen_data():
14 | """Load Crippen data for atom typing"""
15 | # This data is from the blog post
16 | rdkit_data = '''C1 [CH4] 0.1441 2.503
17 | C1 [CH3]C 0.1441 2.503
18 | C1 [CH2](C)C 0.1441 2.503
19 | C2 [CH](C)(C)C 0 2.433
20 | C2 [C](C)(C)(C)C 0 2.433
21 | C3 [CH3][N,O,P,S,F,Cl,Br,I] -0.2035 2.753
22 | C3 [CH2X4]([N,O,P,S,F,Cl,Br,I])[A;!#1] -0.2035 2.753
23 | C4 [CH1X4]([N,O,P,S,F,Cl,Br,I])([A;!#1])[A;!#1] -0.2051 2.731
24 | C4 [CH0X4]([N,O,P,S,F,Cl,Br,I])([A;!#1])([A;!#1])[A;!#1] -0.2051 2.731
25 | C5 [C]=[!C;A;!#1] -0.2783 5.007
26 | C6 [CH2]=C 0.1551 3.513
27 | C6 [CH1](=C)[A;!#1] 0.1551 3.513
28 | C6 [CH0](=C)([A;!#1])[A;!#1] 0.1551 3.513
29 | C6 [C](=C)=C 0.1551 3.513
30 | C7 [CX2]#[A;!#1] 0.0017 3.888
31 | C8 [CH3]c 0.08452 2.464
32 | C9 [CH3]a -0.1444 2.412
33 | C10 [CH2X4]a -0.0516 2.488
34 | C11 [CHX4]a 0.1193 2.582
35 | C12 [CH0X4]a -0.0967 2.576
36 | C13 [cH0]-[A;!C;!N;!O;!S;!F;!Cl;!Br;!I;!#1] -0.5443 4.041
37 | C14 [c][#9] 0 3.257
38 | C15 [c][#17] 0.245 3.564
39 | C16 [c][#35] 0.198 3.18
40 | C17 [c][#53] 0 3.104
41 | C18 [cH] 0.1581 3.35
42 | C19 [c](:a)(:a):a 0.2955 4.346
43 | C20 [c](:a)(:a)-a 0.2713 3.904
44 | C21 [c](:a)(:a)-C 0.136 3.509
45 | C22 [c](:a)(:a)-N 0.4619 4.067
46 | C23 [c](:a)(:a)-O 0.5437 3.853
47 | C24 [c](:a)(:a)-S 0.1893 2.673
48 | C25 [c](:a)(:a)=[C,N,O] -0.8186 3.135
49 | C26 [C](=C)(a)[A;!#1] 0.264 4.305
50 | C26 [C](=C)(c)a 0.264 4.305
51 | C26 [CH1](=C)a 0.264 4.305
52 | C26 [C]=c 0.264 4.305
53 | C27 [CX4][A;!C;!N;!O;!P;!S;!F;!Cl;!Br;!I;!#1] 0.2148 2.693
54 | CS [#6] 0.08129 3.243
55 | H1 [#1][#6,#1] 0.123 1.057
56 | H2 [#1]O[CX4,c] -0.2677 1.395
57 | H2 [#1]O[!#6;!#7;!#8;!#16] -0.2677 1.395
58 | H2 [#1][!#6;!#7;!#8] -0.2677 1.395
59 | H3 [#1][#7] 0.2142 0.9627
60 | H3 [#1]O[#7] 0.2142 0.9627
61 | H4 [#1]OC=[#6,#7,O,S] 0.298 1.805
62 | H4 [#1]O[O,S] 0.298 1.805
63 | HS [#1] 0.1125 1.112
64 | N1 [NH2+0][A;!#1] -1.019 2.262
65 | N2 [NH+0]([A;!#1])[A;!#1] -0.7096 2.173
66 | N3 [NH2+0]a -1.027 2.827
67 | N4 [NH1+0]([!#1;A,a])a -0.5188 3
68 | N5 [NH+0]=[!#1;A,a] 0.08387 1.757
69 | N6 [N+0](=[!#1;A,a])[!#1;A,a] 0.1836 2.428
70 | N7 [N+0]([A;!#1])([A;!#1])[A;!#1] -0.3187 1.839
71 | N8 [N+0](a)([!#1;A,a])[A;!#1] -0.4458 2.819
72 | N8 [N+0](a)(a)a -0.4458 2.819
73 | N9 [N+0]#[A;!#1] 0.01508 1.725
74 | N10 [NH3,NH2,NH;+,+2,+3] -1.95
75 | N11 [n+0] -0.3239 2.202
76 | N12 [n;+,+2,+3] -1.119
77 | N13 [NH0;+,+2,+3]([A;!#1])([A;!#1])([A;!#1])[A;!#1] -0.3396 0.2604
78 | N13 [NH0;+,+2,+3](=[A;!#1])([A;!#1])[!#1;A,a] -0.3396 0.2604
79 | N13 [NH0;+,+2,+3](=[#6])=[#7] -0.3396 0.2604
80 | N14 [N;+,+2,+3]#[A;!#1] 0.2887 3.359
81 | N14 [N;-,-2,-3] 0.2887 3.359
82 | N14 [N;+,+2,+3](=[N;-,-2,-3])=N 0.2887 3.359
83 | NS [#7] -0.4806 2.134
84 | O1 [o] 0.1552 1.08
85 | O2 [OH,OH2] -0.2893 0.8238
86 | O3 [O]([A;!#1])[A;!#1] -0.0684 1.085
87 | O4 [O](a)[!#1;A,a] -0.4195 1.182
88 | O5 [O]=[#7,#8] 0.0335 3.367
89 | O5 [OX1;-,-2,-3][#7] 0.0335 3.367
90 | O6 [OX1;-,-2,-2][#16] -0.3339 0.7774
91 | O6 [O;-0]=[#16;-0] -0.3339 0.7774
92 | O12 [O-]C(=O) -1.326
93 | O7 [OX1;-,-2,-3][!#1;!N;!S] -1.189 0
94 | O8 [O]=c 0.1788 3.135
95 | O9 [O]=[CH]C -0.1526 0
96 | O9 [O]=C(C)([A;!#1]) -0.1526 0
97 | O9 [O]=[CH][N,O] -0.1526 0
98 | O9 [O]=[CH2] -0.1526 0
99 | O9 [O]=[CX2]=O -0.1526 0
100 | O10 [O]=[CH]c 0.1129 0.2215
101 | O10 [O]=C([C,c])[a;!#1] 0.1129 0.2215
102 | O10 [O]=C(c)[A;!#1] 0.1129 0.2215
103 | O11 [O]=C([!#1;!#6])[!#1;!#6] 0.4833 0.389
104 | OS [#8] -0.1188 0.6865
105 | F [#9-0] 0.4202 1.108
106 | Cl [#17-0] 0.6895 5.853
107 | Br [#35-0] 0.8456 8.927
108 | I [#53-0] 0.8857 14.02
109 | Hal [#9,#17,#35,#53;-] -2.996
110 | Hal [#53;+,+2,+3] -2.996
111 | Hal [+;#3,#11,#19,#37,#55] -2.996
112 | P [#15] 0.8612 6.92
113 | S2 [S;-,-2,-3,-4,+1,+2,+3,+5,+6] -0.0024 7.365
114 | S2 [S-0]=[N,O,P,S] -0.0024 7.365
115 | S1 [S;A] 0.6482 7.591
116 | S3 [s;a] 0.6237 6.691
117 | Me1 [#3,#11,#19,#37,#55] -0.3808 5.754
118 | Me1 [#4,#12,#20,#38,#56] -0.3808 5.754
119 | Me1 [#5,#13,#31,#49,#81] -0.3808 5.754
120 | Me1 [#14,#32,#50,#82] -0.3808 5.754
121 | Me1 [#33,#51,#83] -0.3808 5.754
122 | Me1 [#34,#52,#84] -0.3808 5.754
123 | Me2 [#21,#22,#23,#24,#25,#26,#27,#28,#29,#30] -0.0025
124 | Me2 [#39,#40,#41,#42,#43,#44,#45,#46,#47,#48] -0.0025
125 | Me2 [#72,#73,#74,#75,#76,#77,#78,#79,#80] -0.0025 '''
126 |
127 | CrippenTuple = namedtuple('CrippenTuple',
128 | ('name', 'smarts', 'logp_contrib', 'mr_contrib', 'note'))
129 |
130 | crippenData = []
131 | for line in rdkit_data.split('\n'):
132 | line = line.strip()
133 | if not line:
134 | continue
135 | parts = re.split(r'\s+', line)
136 | # we need at least 4 columns: name, smarts, logP, MR
137 | if len(parts) < 4:
138 | continue
139 | name, smarts = parts[0], parts[1]
140 | try:
141 | logp = float(parts[2])
142 | except ValueError:
143 | logp = None
144 | try:
145 | mr = float(parts[3])
146 | except ValueError:
147 | mr = None
148 | note = parts[4] if len(parts) > 4 else ""
149 | crippenData.append(CrippenTuple(name, smarts, logp, mr, note))
150 |
151 | return crippenData
152 |
153 |
154 | def get_vsa_bin_bounds(descriptor_name):
155 | """
156 | Given a descriptor like "SMR_VSA3" or "SlogP_VSA1", parse its __doc__
157 | and return (lower_bound, upper_bound) as floats.
158 | """
159 | func = getattr(Descriptors, descriptor_name, None)
160 | if func is None or func.__doc__ is None:
161 | raise ValueError(f"No such descriptor {descriptor_name!r} or missing __doc__")
162 | doc = func.__doc__
163 | # unified pattern: matches "a <= x < b", "a < x < b", including "-inf"
164 | m = re.search(
165 | r"""\(\s*([+-]?\d*\.?\d+|[-]?inf) # group 1: a or -inf
166 | \s*(?:<=|<)\s*x\s*(?:<|<=)\s*
167 | ([+-]?\d*\.?\d+|[-]?inf) # group 2: b or inf
168 | \s*\)""",
169 | doc,
170 | flags=re.IGNORECASE | re.VERBOSE
171 | )
172 | if m:
173 | lb, ub = m.group(1).lower(), m.group(2).lower()
174 | lower = float("-inf") if lb in ("-inf",) else float(lb)
175 | upper = float("inf") if ub in ("inf", "+inf") else float(ub)
176 | return lower, upper
177 |
178 | # fallback: single‑sided "( x < b )"
179 | m2 = re.search(r"\(\s*x\s*<\s*([+-]?\d*\.?\d+)\s*\)", doc)
180 | if m2:
181 | return float("-inf"), float(m2.group(1))
182 | # fallback: "( a <= x )"
183 | m3 = re.search(r"\(\s*([+-]?\d*\.?\d+)\s*<=\s*x\s*\)", doc)
184 | if m3:
185 | return float(m3.group(1)), float("inf")
186 |
187 | raise ValueError(f"Could not parse bin bounds from {doc!r}")
188 |
189 | def get_bin_bounds(idx, bins):
190 | """
191 | idx is 1‑based. bins is a sorted list of length N.
192 | idx == 1 → (-inf, bins[0])
193 | 2 <= idx <= N → [bins[idx-2], bins[idx-1])
194 | idx == N + 1 → [bins[-1], inf)
195 | """
196 | N = len(bins)
197 | if idx == 1:
198 | return float("-inf"), bins[0]
199 | elif 2 <= idx <= N:
200 | return bins[idx-2], bins[idx-1]
201 | elif idx == N + 1:
202 | return bins[-1], float("inf")
203 | else:
204 | raise ValueError(f"Index {idx} out of range for {N}-boundary bins")
205 |
206 | def get_peoe_charges(mol):
207 | """
208 | Compute PEOE (Partial Equalization of Orbital Electronegativity) charges.
209 | This uses Gasteiger charges as an approximation since RDKit doesn't have
210 | true PEOE charges built-in.
211 | """
212 | # Make a copy to avoid modifying the original molecule
213 | mol_copy = Chem.Mol(mol)
214 |
215 | # Compute Gasteiger charges (closest approximation to PEOE in RDKit)
216 | ComputeGasteigerCharges(mol_copy)
217 |
218 | # Extract charges
219 | charges = []
220 | for atom in mol_copy.GetAtoms():
221 | charge = atom.GetDoubleProp('_GasteigerCharge')
222 | # Handle NaN values that can occur with Gasteiger calculation
223 | if np.isnan(charge):
224 | charge = 0.0
225 | charges.append(charge)
226 |
227 | return charges
228 |
229 | def get_peoe_vsa_bins():
230 | """
231 | Return the charge bins used for PEOE_VSA descriptors.
232 | These are the standard bins used in RDKit for PEOE_VSA calculations.
233 | """
234 | # Standard PEOE_VSA charge bins (from RDKit source)
235 | return [-0.30, -0.25, -0.20, -0.15, -0.10, -0.05, 0.00, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30]
236 |
237 | def visualize_vsa_contributions(smiles, highlight_descriptors=None, save_path = None):
238 | """
239 | Analyze and visualize VSA descriptor contributions for a molecule,
240 | including SMR_VSA, SlogP_VSA, EState_VSA, VSA_EState, and PEOE_VSA families.
241 | """
242 | if highlight_descriptors is None:
243 | highlight_descriptors = ["SMR_VSA8", "SlogP_VSA8", "PEOE_VSA8"]
244 |
245 | mol = Chem.MolFromSmiles(smiles)
246 | if mol is None:
247 | print(f"Error: Could not parse SMILES '{smiles}'")
248 | return
249 | if not mol.GetNumConformers():
250 | AllChem.Compute2DCoords(mol)
251 |
252 | # precompute all per-atom values/contributions
253 | crippen_contribs = rdMolDescriptors._CalcCrippenContribs(mol)
254 | vsa_contribs = list(rdMolDescriptors._CalcLabuteASAContribs(mol)[0])
255 | estate_indices = EState.EStateIndices(mol)
256 | peoe_charges = get_peoe_charges(mol)
257 |
258 | for desc in highlight_descriptors:
259 | # --- pick the correct pairing of "values" vs "contributions" and the bin boundaries ---
260 | if desc.startswith("SMR_VSA") or desc.startswith("SlogP_VSA"):
261 | # SMR_VSA* or SlogP_VSA* via get_vsa_bin_bounds()
262 | try:
263 | lower, upper = get_vsa_bin_bounds(desc)
264 | except ValueError as e:
265 | print(e)
266 | continue
267 | prop_idx = 1 if desc.startswith("SMR") else 0
268 | values = [c[prop_idx] for c in crippen_contribs]
269 | contributions = vsa_contribs
270 |
271 | elif desc.startswith("EState_VSA"):
272 | # EState_VSA*: sum VSA over EState bins
273 | idx = int(desc.split("EState_VSA")[1]) #descriptors start from 1
274 |
275 | bins = EState_VSA.estateBins
276 | lower, upper = get_bin_bounds(idx, bins)
277 |
278 | values = estate_indices
279 | contributions = vsa_contribs
280 |
281 | elif desc.startswith("VSA_EState"):
282 | # VSA_EState*: sum EState over VSA bins
283 | idx = int(desc.split("VSA_EState")[1]) #descriptors start from 1
284 | bins = EState_VSA.vsaBins
285 | lower, upper = get_bin_bounds(idx, bins)
286 |
287 | values = vsa_contribs
288 | contributions = estate_indices
289 |
290 | elif desc.startswith("PEOE_VSA"):
291 | # PEOE_VSA*: sum VSA over PEOE charge bins
292 | idx = int(desc.split("PEOE_VSA")[1]) # descriptors start from 1
293 | bins = get_peoe_vsa_bins()
294 | lower, upper = get_bin_bounds(idx, bins)
295 |
296 | values = peoe_charges
297 | contributions = vsa_contribs
298 |
299 | else:
300 | print(f"Unknown descriptor '{desc}', skipping.")
301 | continue
302 |
303 | # --- find atoms in the selected bin ---
304 | atoms, contribs = [], []
305 | for i, (val, contrib) in enumerate(zip(values, contributions)):
306 | if lower <= val < upper:
307 | atoms.append(i)
308 | contribs.append(contrib)
309 | total = sum(contribs)
310 | if not atoms:
311 | print(f"\nNo atoms contribute to {desc} (range {lower:.4f} to {upper:.4f}).")
312 | continue
313 |
314 | # --- normalize & color (green intensity here; switch channels as you like) ---
315 | norm = {i: c/total for i, c in zip(atoms, contribs)}
316 | highlight_colors = {i: (0.0, 0.7, 0.0) for i, v in norm.items()}
317 |
318 | # --- draw SVG with atom indices ---
319 | drawer = rdMolDraw2D.MolDraw2DSVG(500, 500)
320 | drawer.drawOptions().addAtomIndices = True
321 | rdMolDraw2D.PrepareAndDrawMolecule(
322 | drawer, mol,
323 | highlightAtoms=list(atoms),
324 | highlightAtomColors=highlight_colors
325 | )
326 | drawer.FinishDrawing()
327 | svg_text = drawer.GetDrawingText()
328 | display(SVG(svg_text))
329 | if save_path:
330 | with open(save_path, "w") as f:
331 | f.write(svg_text)
332 | print(f"Saved SVG to {save_path}")
333 |
334 | # --- print contribution table ---
335 | descriptor_type = "Charge" if desc.startswith("PEOE_VSA") else "Value"
336 | print(f"\n### {desc} Contributions — Total: {total:.4f}")
337 | print(f"Bin range: {lower:.4f} to {upper:.4f}")
338 | print(f"{'Idx':<4s}{'Sym':<4s}{descriptor_type:>8s}{'Contrib':>12s}{'% of total':>12s}")
339 | print("-"* 44)
340 | for i in atoms:
341 | sym = mol.GetAtomWithIdx(i).GetSymbol()
342 | val = values[i]
343 | cst = contributions[i]
344 | pct = 100*cst/total if total else 0
345 | print(f"{i:<4d}{sym:<4s}{val:8.3f}{cst:12.3f}{pct:12.1f}%")
346 |
--------------------------------------------------------------------------------