├── .github
    └── workflows
    │   └── pythonapp.yml
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── examples
    ├── acetate.xyz
    ├── chiral_stereo_test.xyz
    ├── ethane.xyz
    └── propylbenzene.xyz
├── requirements.txt
├── requirements.yml
├── setup.py
├── test.py
└── xyz2mol.py


/.github/workflows/pythonapp.yml:
--------------------------------------------------------------------------------
 1 | name: Conda/Python pytest
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 | 
 8 |     runs-on: ubuntu-latest
 9 | 
10 |     steps:
11 |     - uses: actions/checkout@v1
12 |     - name: Set up Python and Conda enviroment
13 |       run: |
14 |         conda update -n base -c defaults conda
15 |         conda env create -f requirements.yml -p env
16 |         source $(conda info --root)/etc/profile.d/conda.sh
17 |         conda activate ./env
18 |         which pip
19 |     - name: Install dependencies
20 |       run: |
21 |         # Activate
22 |         source $(conda info --root)/etc/profile.d/conda.sh
23 |         conda activate ./env
24 |         # Pip install requirments
25 |         python -m pip install --upgrade pip
26 |         pip install -r requirements.txt
27 |     - name: Lint with flake8
28 |       run: |
29 |         # Activate
30 |         source $(conda info --root)/etc/profile.d/conda.sh
31 |         conda activate ./env
32 |         # Make sure it is installed
33 |         pip install flake8
34 |         # stop the build if there are Python syntax errors or undefined names
35 |         flake8 *.py --count --select=E9,F63,F7,F82 --show-source --statistics
36 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
37 |         flake8 *.py --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
38 |     - name: Test with pytest
39 |       run: |
40 |         # Activate
41 |         source $(conda info --root)/etc/profile.d/conda.sh
42 |         conda activate ./env
43 |         # Make sure pytest is avaliabel
44 |         pip install pytest
45 |         # Test files
46 |         pytest -v test.py
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.cprof
  2 | .pytest_cache
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | env/
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # pyenv
 77 | .python-version
 78 | 
 79 | # celery beat schedule file
 80 | celerybeat-schedule
 81 | 
 82 | # SageMath parsed files
 83 | *.sage.py
 84 | 
 85 | # dotenv
 86 | .env
 87 | 
 88 | # virtualenv
 89 | .venv
 90 | venv/
 91 | ENV/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Jensen Group
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | PYTHON=python
 3 | CONDA=conda
 4 | FLAKE=flake8
 5 | 
 6 | all: env
 7 | 
 8 | setup: env pip
 9 | 
10 | env:
11 | 	${CONDA} env create -f requirements.yml -p env
12 | 
13 | pip: env
14 | 	${PYTHON} -m pip install -r requirements.txt --no-cache-dir
15 | 
16 | test:
17 | 	${PYTHON} -m pytest -v test.py
18 | 
19 | test-lint:
20 | 	@# stop the build if there are Python syntax errors or undefined names
21 | 	${FLAKE} *.py --count --select=E9,F63,F7,F82 --show-source --statistics
22 | 	@# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
23 | 	${FLAKE} *.py --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
24 | 
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # xyz2mol has now been implemented in RDKit
 2 | 
 3 | ```
 4 | raw_mol = Chem.MolFromXYZFile('acetate.xyz')
 5 | mol = Chem.Mol(raw_mol)
 6 | rdDetermineBonds.DetermineBonds(mol,charge=-1)
 7 | ```
 8 | 
 9 | # Convert Cartesian coordinates to one or more molecular graphs
10 | 
11 | Given Cartesian coordinates in the form of a `.xyz` file, the code constructs a list of one or more molecular graphs. In cases where there are several possible resonance forms xyz2mol returns a list of all, otherwise just a list of one.
12 | 
13 | This code is based on the work of
14 | DOI: [10.1002/bkcs.10334](http://dx.doi.org/10.1002/bkcs.10334)
15 | 
16 |     Yeonjoon Kim and Woo Youn Kim
17 |     "Universal Structure Conversion Method for Organic Molecules:
18 |     From Atomic Connectivity to Three-Dimensional Geometry"
19 |     Bull. Korean Chem. Soc.
20 |     2015, Vol. 36, 1769-1777
21 | 
22 | ## Setup
23 | 
24 | Depends on `rdkit`, `numpy`, and `networkx`. Easiest to setup via anaconda/conda: 
25 | 
26 | `conda install -c conda-forge xyz2mol`
27 | 
28 | Setup for a standalone enviroment is avaliable via `Makefile`. To setup and test simply clone the project and make.
29 | 
30 |     git clone https://github.com/jensengroup/xyz2mol
31 | 
32 | and then run the following the the `xyz2mol` folder
33 | 
34 |     make
35 |     make test
36 | 
37 | Note, it is also possible to run the code without the `networkx` dependencies, but is slower.
38 | 
39 | 
40 | ## Example usage
41 | 
42 | Read in xyz file and print out the SMILES, but don't incode the chirality.
43 | 
44 |     xyz2mol.py examples/chiral_stereo_test.xyz --ignore-chiral
45 | 
46 | Read in xyz file and print out the SDF format, save it in a file
47 | 
48 |     xyz2mol.py examples/chiral_stereo_test.xyz -o sdf > save_file.sdf
49 | 
50 | Read in xyz file with a charge and print out the SMILES
51 | 
52 |     xyz2mol.py examples/acetate.xyz --charge -1
53 | 
54 | ## Dependencies:
55 | 
56 |     rdkit # (version 2019.9.1 or later needed for huckel option)
57 |     networkx
58 | 
59 | 


--------------------------------------------------------------------------------
/examples/acetate.xyz:
--------------------------------------------------------------------------------
 1 | 7
 2 | charge=-1=
 3 | C         -4.71686        0.89919        0.05714
 4 | C         -3.24898        0.98400       -0.22830
 5 | H         -5.04167        1.74384        0.67862
 6 | H         -5.01710       -0.02205        0.56344
 7 | H         -5.21076        0.96874       -0.91208
 8 | O         -2.65909        2.05702       -0.34025
 9 | O         -2.63413       -0.18702       -0.48679
10 | 


--------------------------------------------------------------------------------
/examples/chiral_stereo_test.xyz:
--------------------------------------------------------------------------------
 1 | 15
 2 | 	Energy:      10.5637353
 3 | C         -5.48821        0.02982       -0.00852
 4 | C         -4.15445       -0.12323       -0.04208
 5 | C         -3.48273       -1.46491        0.04697
 6 | F         -3.88123       -2.11120        1.17935
 7 | C         -1.96681       -1.36452        0.07853
 8 | H         -3.78257       -2.08264       -0.80658
 9 | C         -6.18988        1.34568       -0.08727
10 | H         -6.12260       -0.84989        0.08936
11 | H         -3.51606        0.75189       -0.13305
12 | H         -5.49066        2.18549       -0.14705
13 | H         -6.81679        1.48581        0.79859
14 | H         -6.83374        1.37210       -0.97169
15 | H         -1.62796       -0.78043        0.94086
16 | H         -1.57677       -0.90140       -0.83351
17 | H         -1.52787       -2.36296        0.17627
18 | 


--------------------------------------------------------------------------------
/examples/ethane.xyz:
--------------------------------------------------------------------------------
 1 | 8
 2 | charge=0=
 3 | C         -4.58735        0.92696        0.00000
 4 | C         -3.11050        0.92696        0.00000
 5 | H         -4.93786        1.78883        0.58064
 6 | H         -4.93786       -0.00682        0.45608
 7 | H         -4.93786        0.99888       -1.03672
 8 | H         -2.75999        0.85505        1.03672
 9 | H         -2.75998        1.86075       -0.45608
10 | H         -2.75998        0.06509       -0.58064
11 | 


--------------------------------------------------------------------------------
/examples/propylbenzene.xyz:
--------------------------------------------------------------------------------
 1 | 20
 2 | 
 3 | C      -2.08081073      1.27759366      0.52999704
 4 | C      -1.36085808      0.01534835      0.13171776
 5 | C       0.12921265     -0.00145767     -0.01251015
 6 | C       0.89390756      1.16259960      0.22072207
 7 | C       2.28529729      1.14285208      0.08499036
 8 | C       2.93783862     -0.03314066     -0.28435514
 9 | C       2.20046595     -1.19345389     -0.51916959
10 | C       0.80878206     -1.18180595     -0.38553184
11 | C      -2.17184071     -1.22963114     -0.11838690
12 | H      -1.72431086      1.61348849      1.52614588
13 | H      -3.17848660      1.12721396      0.59360457
14 | H      -1.88832766      2.07143028     -0.22166901
15 | H       0.42742526      2.09446201      0.50865072
16 | H       2.85855884      2.04284076      0.26700529
17 | H       4.01510494     -0.04529350     -0.38861905
18 | H       2.70792713     -2.10563507     -0.80577565
19 | H       0.27503723     -2.10238605     -0.57663639
20 | H      -1.85660650     -2.02918702      0.58415512
21 | H      -2.02061491     -1.57122523     -1.16369726
22 | H      -3.25770147     -1.05461302      0.02936218
23 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | networkx
4 | rmsd
5 | pytest
6 | flake8
7 | 


--------------------------------------------------------------------------------
/requirements.yml:
--------------------------------------------------------------------------------
1 | name: x2m_env
2 | dependencies:
3 |   - python=3.7
4 |   - rdkit::rdkit
5 |   - pip
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name="xyz2mol",
 5 |     version="0.1.2",
 6 |     description="Convert Cartesian coordinates to one or more molecular graphs",
 7 |     url="https://github.com/jensengroup/xyz2mol",
 8 |     py_modules=["xyz2mol"],
 9 |     entry_points={"console_scripts": ["xyz2mol=xyz2mol:main"]},
10 | )
11 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import pytest
  4 | from rdkit import Chem, rdBase
  5 | from rdkit.Chem import AllChem, rdmolops
  6 | 
  7 | import xyz2mol as x2m
  8 | 
  9 | __TEST_SMILES__ = [
 10 |     'C[C-](c1ccccc1)C',
 11 |     'C[C-](C)c1ccccc1',
 12 |     'C=C([O-])CC',
 13 |     'C=C([NH3+])CC',
 14 |     'CC(=O)[O-]',
 15 |     'C[N+](=O)[O-]',
 16 |     'CS(CC)(=O)=O',
 17 |     'CS([O-])(=O)=O',
 18 |     'C=C(C)CC',
 19 |     'CC(C)CC',
 20 |     'C=C(N)CC',
 21 |     'C=C(C)C=C',
 22 |     'C#CC=C',
 23 |     'c1ccccc1',
 24 |     'c1ccccc1c1ccccc1',
 25 |     '[NH3+]CS([O-])(=O)=O',
 26 |     'CC(NC)=O',
 27 |     '[O-]c1ccccc1',
 28 |     'O=C(C=C1)C=CC1=CCC([O-])=O',
 29 |     'C#CC#C',
 30 |     'Cc1ccc(cc1)C1C=CC2C(C=CC2(C#N)C#N)=CC=1',
 31 |     # 'C[NH+]=C([O-])CC[NH+]=C([O-])C',
 32 |     # 'C[NH+]=CC=C([O-])C',
 33 |     '[C+](C)(C)CC[C-](C)(C)',
 34 |     'O=C(C=C1)C=CC1=CCC([O-])=O',
 35 |     # 'O=C([CH-]C=CC(C([O-])=O)=O)[O-]',
 36 |     '[O-]c1ccccc1',
 37 |     # 'CNC(C(C)=[NH+][CH-]CC(O)=O)=O',
 38 |     # "[CH2][CH2][CH]=[CH][CH2]",
 39 |     'Cc1ccc(cc1)C1C=CC2C(C=CC2(C#N)C#N)=CC=1',
 40 |     'CC1C=CC2C(C=CC2(C)C)=CC=1',
 41 |     'CC1=CC=C(C=CC2)C2C=C1',
 42 |     'CC1=CC=C(C2=CC=CC=C2)C=C1',
 43 |     'C1(CC2=CC=CC=C2)=CC=CC=C1',
 44 |     '[O-]c1ccccc1[O-]',
 45 |     'C[N+](=O)[O-]',
 46 |     'N#CC(C#N)=CC=C1C=CC=CC(=C1)c1ccc(cc1)[N+](=O)[O-]',
 47 |     'CNC([O-])=C([NH+]=C/CC(O)=O)C',
 48 |     # 'Cc1cn(C2CC(O)C(COP(=O)([O-])OP(=O)([O-])OC3OC(C)C([NH3+])C(O)C3O)O2)c(=O)[nH]c1=O', # works, just slow
 49 | ]
 50 | 
 51 | __TEST_FILES__ = [
 52 |     ("examples/ethane.xyz", 0, "CC"),
 53 |     ("examples/acetate.xyz", -1, "CC(=O)[O-]"),
 54 |     ("examples/chiral_stereo_test.xyz", 0, "C/C=C/[C@@H](C)F"),
 55 |     ("examples/propylbenzene.xyz", -1, "C[C-](C)c1ccccc1"),
 56 | ]
 57 | 
 58 | def get_atoms(mol):
 59 |     atoms = [a.GetAtomicNum() for a in mol.GetAtoms()]
 60 |     return atoms
 61 | 
 62 | def get_mol(smiles):
 63 |     mol = Chem.MolFromSmiles(smiles)
 64 |     Chem.Kekulize(mol, clearAromaticFlags=True)
 65 |     charge = Chem.GetFormalCharge(mol)
 66 |     mol = Chem.AddHs(mol)
 67 |     return mol
 68 | 
 69 | def generate_structure_from_smiles(smiles):
 70 | 
 71 |     # Generate a 3D structure from smiles
 72 | 
 73 |     mol = Chem.MolFromSmiles(smiles)
 74 |     mol = Chem.AddHs(mol)
 75 | 
 76 |     status = AllChem.EmbedMolecule(mol)
 77 |     status = AllChem.UFFOptimizeMolecule(mol)
 78 | 
 79 |     conformer = mol.GetConformer()
 80 |     coordinates = conformer.GetPositions()
 81 |     coordinates = np.array(coordinates)
 82 | 
 83 |     atoms = get_atoms(mol)
 84 | 
 85 |     return atoms, coordinates
 86 | 
 87 | @pytest.mark.parametrize("smiles", __TEST_SMILES__)
 88 | def test_smiles_from_adjacent_matrix(smiles):
 89 | 
 90 |     charged_fragments = True
 91 |     quick = True
 92 | 
 93 |     # Cut apart the smiles
 94 |     mol = get_mol(smiles)
 95 |     atoms = get_atoms(mol)
 96 |     charge = Chem.GetFormalCharge(mol)
 97 |     adjacent_matrix = Chem.GetAdjacencyMatrix(mol)
 98 | 
 99 |     #
100 |     mol = Chem.RemoveHs(mol)
101 |     canonical_smiles = Chem.MolToSmiles(mol)
102 | 
103 |     # Define new molecule template from atoms
104 |     new_mol = x2m.get_proto_mol(atoms)
105 | 
106 |         # reconstruct the molecule from adjacent matrix, atoms and total charge
107 |     new_mols = x2m.AC2mol(new_mol, adjacent_matrix, atoms, charge, charged_fragments, quick)
108 |     
109 |     new_mol_smiles_list = []
110 |     for new_mol in new_mols:
111 |         new_mol = Chem.RemoveHs(new_mol)
112 |         new_mol_smiles = Chem.MolToSmiles(new_mol)
113 | 
114 |         new_mol_smiles_list.append(new_mol_smiles)
115 | 
116 |     assert canonical_smiles in new_mol_smiles_list
117 | 
118 |     return
119 | 
120 | @pytest.mark.parametrize("smiles", __TEST_SMILES__)
121 | def test_smiles_from_coord_vdw(smiles):
122 | 
123 |     # The answer
124 |     mol = Chem.MolFromSmiles(smiles)
125 |     charge = Chem.GetFormalCharge(mol)
126 |     canonical_smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
127 | 
128 |     # generate forcefield coordinates
129 |     atoms, coordinates = generate_structure_from_smiles(smiles)
130 | 
131 |     # Generate molobj from atoms, charge and coordinates
132 |     mols = x2m.xyz2mol(atoms, coordinates, charge=charge)
133 | 
134 |     smiles_list = []
135 |     for mol in mols:
136 |     # For this test, remove chira. clean and canonical
137 |         Chem.Kekulize(mol)
138 |         mol = Chem.RemoveHs(mol)
139 |         Chem.RemoveStereochemistry(mol)
140 |         smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
141 | 
142 |         # Please look away. A small hack that removes the explicit hydrogens
143 |         mol = Chem.MolFromSmiles(smiles)
144 |         smiles = Chem.MolToSmiles(mol)
145 |         smiles_list.append(smiles)
146 | 
147 |     assert canonical_smiles in smiles_list
148 | 
149 |     return
150 | 
151 | 
152 | @pytest.mark.parametrize("smiles", __TEST_SMILES__)
153 | def test_smiles_from_coord_huckel(smiles):
154 | 
155 |     # The answer
156 |     mol = Chem.MolFromSmiles(smiles)
157 |     charge = Chem.GetFormalCharge(mol)
158 |     canonical_smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
159 | 
160 |     # generate forcefield coordinates
161 |     atoms, coordinates = generate_structure_from_smiles(smiles)
162 | 
163 |     # Generate molobj from atoms, charge and coordinates
164 |     mols = x2m.xyz2mol(atoms, coordinates, charge=charge, use_huckel=True)
165 | 
166 |     smiles_list = []
167 |     for mol in mols:
168 |         # For this test, remove chira. clean and canonical
169 |         Chem.Kekulize(mol)
170 |         mol = Chem.RemoveHs(mol)
171 |         Chem.RemoveStereochemistry(mol)
172 |         smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
173 | 
174 |         # Please look away. A small hack that removes the explicit hydrogens
175 |         mol = Chem.MolFromSmiles(smiles)
176 |         smiles = Chem.MolToSmiles(mol)
177 |         smiles_list.append(smiles)
178 | 
179 |     assert canonical_smiles in smiles_list
180 | 
181 |     return
182 | 
183 | 
184 | @pytest.mark.parametrize("filename, charge, answer", __TEST_FILES__)
185 | def test_smiles_from_xyz_files(filename, charge, answer):
186 | 
187 |     charged_fragments = True
188 |     quick = True
189 | 
190 |     atoms, charge_read, coordinates = x2m.read_xyz_file(filename)
191 | 
192 |     mols = x2m.xyz2mol(atoms, coordinates, charge=charge)
193 | 
194 |     smiles_list = []
195 |     for mol in mols:
196 |         mol = Chem.RemoveHs(mol)
197 | 
198 |         smiles = Chem.MolToSmiles(mol)
199 |         smiles_list.append(smiles)
200 | 
201 |     assert answer in smiles_list
202 | 
203 |     return
204 | 
205 | 
206 | if __name__ == "__main__":
207 | 
208 |     import argparse
209 |     parser = argparse.ArgumentParser()
210 |     parser.add_argument('-t', '--test-type', type=str, help="")
211 |     parser.add_argument('-s', '--smiles', help="")
212 |     args = parser.parse_args()
213 | 
214 |     for smiles in __TEST_SMILES__:
215 |         test_smiles_from_adjacent_matrix(smiles)
216 |         print(True, smiles)
217 | 
218 |     for filename, charge, answer in __TEST_FILES__:
219 |         test_smiles_from_xyz_files(filename, charge, answer)
220 |         print(True, answer)
221 | 
222 |     for smiles in __TEST_SMILES__:
223 |         test_smiles_from_coord_vdw(smiles)
224 |         print(True, smiles)
225 | 
226 |     for smiles in __TEST_SMILES__:
227 |         test_smiles_from_coord_huckel(smiles)
228 |         print(True, smiles)
229 | 


--------------------------------------------------------------------------------
/xyz2mol.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module for generating rdkit molobj/smiles/molecular graph from free atoms
  3 | 
  4 | Implementation by Jan H. Jensen, based on the paper
  5 | 
  6 |     Yeonjoon Kim and Woo Youn Kim
  7 |     "Universal Structure Conversion Method for Organic Molecules: From Atomic Connectivity
  8 |     to Three-Dimensional Geometry"
  9 |     Bull. Korean Chem. Soc. 2015, Vol. 36, 1769-1777
 10 |     DOI: 10.1002/bkcs.10334
 11 | 
 12 | """
 13 | 
 14 | import copy
 15 | import itertools
 16 | 
 17 | from rdkit.Chem import rdmolops
 18 | from rdkit.Chem import rdchem
 19 | try:
 20 |     from rdkit.Chem import rdEHTTools #requires RDKit 2019.9.1 or later
 21 | except ImportError:
 22 |     rdEHTTools = None
 23 |     
 24 | from collections import defaultdict
 25 | 
 26 | import numpy as np
 27 | import networkx as nx
 28 | 
 29 | from rdkit import Chem
 30 | from rdkit.Chem import AllChem, rdmolops
 31 | import sys
 32 | 
 33 | global __ATOM_LIST__
 34 | __ATOM_LIST__ = \
 35 |     ['h',  'he',
 36 |      'li', 'be', 'b',  'c',  'n',  'o',  'f',  'ne',
 37 |      'na', 'mg', 'al', 'si', 'p',  's',  'cl', 'ar',
 38 |      'k',  'ca', 'sc', 'ti', 'v ', 'cr', 'mn', 'fe', 'co', 'ni', 'cu',
 39 |      'zn', 'ga', 'ge', 'as', 'se', 'br', 'kr',
 40 |      'rb', 'sr', 'y',  'zr', 'nb', 'mo', 'tc', 'ru', 'rh', 'pd', 'ag',
 41 |      'cd', 'in', 'sn', 'sb', 'te', 'i',  'xe',
 42 |      'cs', 'ba', 'la', 'ce', 'pr', 'nd', 'pm', 'sm', 'eu', 'gd', 'tb', 'dy',
 43 |      'ho', 'er', 'tm', 'yb', 'lu', 'hf', 'ta', 'w',  're', 'os', 'ir', 'pt',
 44 |      'au', 'hg', 'tl', 'pb', 'bi', 'po', 'at', 'rn',
 45 |      'fr', 'ra', 'ac', 'th', 'pa', 'u',  'np', 'pu']
 46 | 
 47 | 
 48 | global atomic_valence
 49 | global atomic_valence_electrons
 50 | 
 51 | atomic_valence = defaultdict(list)
 52 | atomic_valence[1] = [1]
 53 | atomic_valence[5] = [3,4]
 54 | atomic_valence[6] = [4]
 55 | atomic_valence[7] = [3,4]
 56 | atomic_valence[8] = [2,1,3]
 57 | atomic_valence[9] = [1]
 58 | atomic_valence[14] = [4]
 59 | atomic_valence[15] = [5,3] #[5,4,3]
 60 | atomic_valence[16] = [6,3,2] #[6,4,2]
 61 | atomic_valence[17] = [1]
 62 | atomic_valence[32] = [4]
 63 | atomic_valence[35] = [1]
 64 | atomic_valence[53] = [1]
 65 | 
 66 | atomic_valence_electrons = {}
 67 | atomic_valence_electrons[1] = 1
 68 | atomic_valence_electrons[5] = 3
 69 | atomic_valence_electrons[6] = 4
 70 | atomic_valence_electrons[7] = 5
 71 | atomic_valence_electrons[8] = 6
 72 | atomic_valence_electrons[9] = 7
 73 | atomic_valence_electrons[14] = 4
 74 | atomic_valence_electrons[15] = 5
 75 | atomic_valence_electrons[16] = 6
 76 | atomic_valence_electrons[17] = 7
 77 | atomic_valence_electrons[32] = 4
 78 | atomic_valence_electrons[35] = 7
 79 | atomic_valence_electrons[53] = 7
 80 | 
 81 | 
 82 | def str_atom(atom):
 83 |     """
 84 |     convert integer atom to string atom
 85 |     """
 86 |     global __ATOM_LIST__
 87 |     atom = __ATOM_LIST__[atom - 1]
 88 |     return atom
 89 | 
 90 | 
 91 | def int_atom(atom):
 92 |     """
 93 |     convert str atom to integer atom
 94 |     """
 95 |     global __ATOM_LIST__
 96 |     #print(atom)
 97 |     atom = atom.lower()
 98 |     return __ATOM_LIST__.index(atom) + 1
 99 | 
100 | 
101 | def get_UA(maxValence_list, valence_list):
102 |     """
103 |     """
104 |     UA = []
105 |     DU = []
106 |     for i, (maxValence, valence) in enumerate(zip(maxValence_list, valence_list)):
107 |         if not maxValence - valence > 0:
108 |             continue
109 |         UA.append(i)
110 |         DU.append(maxValence - valence)
111 |     return UA, DU
112 | 
113 | 
114 | def get_BO(AC, UA, DU, valences, UA_pairs, use_graph=True):
115 |     """
116 |     """
117 |     BO = AC.copy()
118 |     DU_save = []
119 | 
120 |     while DU_save != DU:
121 |         for i, j in UA_pairs:
122 |             BO[i, j] += 1
123 |             BO[j, i] += 1
124 | 
125 |         BO_valence = list(BO.sum(axis=1))
126 |         DU_save = copy.copy(DU)
127 |         UA, DU = get_UA(valences, BO_valence)
128 |         UA_pairs = get_UA_pairs(UA, AC, use_graph=use_graph)[0]
129 | 
130 |     return BO
131 | 
132 | 
133 | def valences_not_too_large(BO, valences):
134 |     """
135 |     """
136 |     number_of_bonds_list = BO.sum(axis=1)
137 |     for valence, number_of_bonds in zip(valences, number_of_bonds_list):
138 |         if number_of_bonds > valence:
139 |             return False
140 | 
141 |     return True
142 | 
143 | def charge_is_OK(BO, AC, charge, DU, atomic_valence_electrons, atoms, valences,
144 |                  allow_charged_fragments=True):
145 |     # total charge
146 |     Q = 0
147 | 
148 |     # charge fragment list
149 |     q_list = []
150 | 
151 |     if allow_charged_fragments:
152 | 
153 |         BO_valences = list(BO.sum(axis=1))
154 |         for i, atom in enumerate(atoms):
155 |             q = get_atomic_charge(atom, atomic_valence_electrons[atom], BO_valences[i])
156 |             Q += q
157 |             if atom == 6:
158 |                 number_of_single_bonds_to_C = list(BO[i, :]).count(1)
159 |                 if number_of_single_bonds_to_C == 2 and BO_valences[i] == 2:
160 |                     Q += 1
161 |                     q = 2
162 |                 if number_of_single_bonds_to_C == 3 and Q + 1 < charge:
163 |                     Q += 2
164 |                     q = 1
165 | 
166 |             if q != 0:
167 |                 q_list.append(q)
168 | 
169 |     return (charge == Q)
170 | 
171 | def BO_is_OK(BO, AC, charge, DU, atomic_valence_electrons, atoms, valences,
172 |     allow_charged_fragments=True):
173 |     """
174 |     Sanity of bond-orders
175 | 
176 |     args:
177 |         BO -
178 |         AC -
179 |         charge -
180 |         DU - 
181 | 
182 | 
183 |     optional
184 |         allow_charges_fragments - 
185 | 
186 | 
187 |     returns:
188 |         boolean - true of molecule is OK, false if not
189 |     """
190 | 
191 |     if not valences_not_too_large(BO, valences):
192 |         return False
193 | 
194 |     check_sum = (BO - AC).sum() == sum(DU)
195 |     check_charge = charge_is_OK(BO, AC, charge, DU, atomic_valence_electrons, atoms, valences,
196 |                                 allow_charged_fragments)
197 | 
198 |     if check_charge and check_sum: 
199 |         return True
200 | 
201 |     return False
202 | 
203 | 
204 | def get_atomic_charge(atom, atomic_valence_electrons, BO_valence):
205 |     """
206 |     """
207 | 
208 |     if atom == 1:
209 |         charge = 1 - BO_valence
210 |     elif atom == 5:
211 |         charge = 3 - BO_valence
212 |     elif atom == 15 and BO_valence == 5:
213 |         charge = 0
214 |     elif atom == 16 and BO_valence == 6:
215 |         charge = 0
216 |     else:
217 |         charge = atomic_valence_electrons - 8 + BO_valence
218 | 
219 |     return charge
220 | 
221 | 
222 | def clean_charges(mol):
223 |     """
224 |     This hack should not be needed anymore, but is kept just in case
225 | 
226 |     """
227 | 
228 |     Chem.SanitizeMol(mol)
229 |     #rxn_smarts = ['[N+:1]=[*:2]-[C-:3]>>[N+0:1]-[*:2]=[C-0:3]',
230 |     #              '[N+:1]=[*:2]-[O-:3]>>[N+0:1]-[*:2]=[O-0:3]',
231 |     #              '[N+:1]=[*:2]-[*:3]=[*:4]-[O-:5]>>[N+0:1]-[*:2]=[*:3]-[*:4]=[O-0:5]',
232 |     #              '[#8:1]=[#6:2]([!-:6])[*:3]=[*:4][#6-:5]>>[*-:1][*:2]([*:6])=[*:3][*:4]=[*+0:5]',
233 |     #              '[O:1]=[c:2][c-:3]>>[*-:1][*:2][*+0:3]',
234 |     #              '[O:1]=[C:2][C-:3]>>[*-:1][*:2]=[*+0:3]']
235 | 
236 |     rxn_smarts = ['[#6,#7:1]1=[#6,#7:2][#6,#7:3]=[#6,#7:4][CX3-,NX3-:5][#6,#7:6]1=[#6,#7:7]>>'
237 |                   '[#6,#7:1]1=[#6,#7:2][#6,#7:3]=[#6,#7:4][-0,-0:5]=[#6,#7:6]1[#6-,#7-:7]',
238 |                   '[#6,#7:1]1=[#6,#7:2][#6,#7:3](=[#6,#7:4])[#6,#7:5]=[#6,#7:6][CX3-,NX3-:7]1>>'
239 |                   '[#6,#7:1]1=[#6,#7:2][#6,#7:3]([#6-,#7-:4])=[#6,#7:5][#6,#7:6]=[-0,-0:7]1']
240 | 
241 |     fragments = Chem.GetMolFrags(mol,asMols=True,sanitizeFrags=False)
242 | 
243 |     for i, fragment in enumerate(fragments):
244 |         for smarts in rxn_smarts:
245 |             patt = Chem.MolFromSmarts(smarts.split(">>")[0])
246 |             while fragment.HasSubstructMatch(patt):
247 |                 rxn = AllChem.ReactionFromSmarts(smarts)
248 |                 ps = rxn.RunReactants((fragment,))
249 |                 fragment = ps[0][0]
250 |                 Chem.SanitizeMol(fragment)
251 |         if i == 0:
252 |             mol = fragment
253 |         else:
254 |             mol = Chem.CombineMols(mol, fragment)
255 | 
256 |     return mol
257 | 
258 | 
259 | def BO2mol(mol, BO_matrix, atoms, atomic_valence_electrons,
260 |            mol_charge, allow_charged_fragments=True,  use_atom_maps=False):
261 |     """
262 |     based on code written by Paolo Toscani
263 | 
264 |     From bond order, atoms, valence structure and total charge, generate an
265 |     rdkit molecule.
266 | 
267 |     args:
268 |         mol - rdkit molecule
269 |         BO_matrix - bond order matrix of molecule
270 |         atoms - list of integer atomic symbols
271 |         atomic_valence_electrons -
272 |         mol_charge - total charge of molecule
273 | 
274 |     optional:
275 |         allow_charged_fragments - bool - allow charged fragments
276 | 
277 |     returns
278 |         mol - updated rdkit molecule with bond connectivity
279 | 
280 |     """
281 | 
282 |     l = len(BO_matrix)
283 |     l2 = len(atoms)
284 |     BO_valences = list(BO_matrix.sum(axis=1))
285 | 
286 |     if (l != l2):
287 |         raise RuntimeError('sizes of adjMat ({0:d}) and Atoms {1:d} differ'.format(l, l2))
288 | 
289 |     rwMol = Chem.RWMol(mol)
290 | 
291 |     bondTypeDict = {
292 |         1: Chem.BondType.SINGLE,
293 |         2: Chem.BondType.DOUBLE,
294 |         3: Chem.BondType.TRIPLE
295 |     }
296 | 
297 |     for i in range(l):
298 |         for j in range(i + 1, l):
299 |             bo = int(round(BO_matrix[i, j]))
300 |             if (bo == 0):
301 |                 continue
302 |             bt = bondTypeDict.get(bo, Chem.BondType.SINGLE)
303 |             rwMol.AddBond(i, j, bt)
304 | 
305 |     mol = rwMol.GetMol()
306 | 
307 |     if allow_charged_fragments:
308 |         mol = set_atomic_charges(
309 |             mol,
310 |             atoms,
311 |             atomic_valence_electrons,
312 |             BO_valences,
313 |             BO_matrix,
314 |             mol_charge,
315 |             use_atom_maps)
316 |     else:
317 |         mol = set_atomic_radicals(mol, atoms, atomic_valence_electrons, BO_valences,
318 |                                                             use_atom_maps)
319 | 
320 |     return mol
321 | 
322 | 
323 | def set_atomic_charges(mol, atoms, atomic_valence_electrons,
324 |                        BO_valences, BO_matrix, mol_charge,
325 |                        use_atom_maps):
326 |     """
327 |     """
328 |     q = 0
329 |     for i, atom in enumerate(atoms):
330 |         a = mol.GetAtomWithIdx(i)
331 |         if use_atom_maps:
332 |             a.SetAtomMapNum(i+1)
333 |         charge = get_atomic_charge(atom, atomic_valence_electrons[atom], BO_valences[i])
334 |         q += charge
335 |         if atom == 6:
336 |             number_of_single_bonds_to_C = list(BO_matrix[i, :]).count(1)
337 |             if number_of_single_bonds_to_C == 2 and BO_valences[i] == 2:
338 |                 q += 1
339 |                 charge = 0
340 |             if number_of_single_bonds_to_C == 3 and q + 1 < mol_charge:
341 |                 q += 2
342 |                 charge = 1
343 | 
344 |         if (abs(charge) > 0):
345 |             a.SetFormalCharge(int(charge))
346 | 
347 |     #mol = clean_charges(mol)
348 | 
349 |     return mol
350 | 
351 | 
352 | def set_atomic_radicals(mol, atoms, atomic_valence_electrons, BO_valences,
353 |                                                 use_atom_maps):
354 |     """
355 | 
356 |     The number of radical electrons = absolute atomic charge
357 | 
358 |     """
359 |     for i, atom in enumerate(atoms):
360 |         a = mol.GetAtomWithIdx(i)
361 |         if use_atom_maps:
362 |             a.SetAtomMapNum(i+1)
363 |         charge = get_atomic_charge(
364 |             atom,
365 |             atomic_valence_electrons[atom],
366 |             BO_valences[i])
367 | 
368 |         if (abs(charge) > 0):
369 |             a.SetNumRadicalElectrons(abs(int(charge)))
370 | 
371 |     return mol
372 | 
373 | 
374 | def get_bonds(UA, AC):
375 |     """
376 | 
377 |     """
378 |     bonds = []
379 | 
380 |     for k, i in enumerate(UA):
381 |         for j in UA[k + 1:]:
382 |             if AC[i, j] == 1:
383 |                 bonds.append(tuple(sorted([i, j])))
384 | 
385 |     return bonds
386 | 
387 | 
388 | def get_UA_pairs(UA, AC, use_graph=True):
389 |     """
390 | 
391 |     """
392 | 
393 |     bonds = get_bonds(UA, AC)
394 | 
395 |     if len(bonds) == 0:
396 |         return [()]
397 | 
398 |     if use_graph:
399 |         G = nx.Graph()
400 |         G.add_edges_from(bonds)
401 |         UA_pairs = [list(nx.max_weight_matching(G))]
402 |         return UA_pairs
403 | 
404 |     max_atoms_in_combo = 0
405 |     UA_pairs = [()]
406 |     for combo in list(itertools.combinations(bonds, int(len(UA) / 2))):
407 |         flat_list = [item for sublist in combo for item in sublist]
408 |         atoms_in_combo = len(set(flat_list))
409 |         if atoms_in_combo > max_atoms_in_combo:
410 |             max_atoms_in_combo = atoms_in_combo
411 |             UA_pairs = [combo]
412 | 
413 |         elif atoms_in_combo == max_atoms_in_combo:
414 |             UA_pairs.append(combo)
415 | 
416 |     return UA_pairs
417 | 
418 | 
419 | def AC2BO(AC, atoms, charge, allow_charged_fragments=True, use_graph=True):
420 |     """
421 | 
422 |     implemenation of algorithm shown in Figure 2
423 | 
424 |     UA: unsaturated atoms
425 | 
426 |     DU: degree of unsaturation (u matrix in Figure)
427 | 
428 |     best_BO: Bcurr in Figure
429 | 
430 |     """
431 | 
432 |     global atomic_valence
433 |     global atomic_valence_electrons
434 | 
435 |     # make a list of valences, e.g. for CO: [[4],[2,1]]
436 |     valences_list_of_lists = []
437 |     AC_valence = list(AC.sum(axis=1))
438 |     
439 |     for i,(atomicNum,valence) in enumerate(zip(atoms,AC_valence)):
440 |         # valence can't be smaller than number of neighbourgs
441 |         possible_valence = [x for x in atomic_valence[atomicNum] if x >= valence]
442 |         if not possible_valence:
443 |             print('Valence of atom',i,'is',valence,'which bigger than allowed max',max(atomic_valence[atomicNum]),'. Stopping')
444 |             sys.exit()
445 |         valences_list_of_lists.append(possible_valence)
446 | 
447 |     # convert [[4],[2,1]] to [[4,2],[4,1]]
448 |     valences_list = itertools.product(*valences_list_of_lists)
449 | 
450 |     best_BO = AC.copy()
451 | 
452 |     for valences in valences_list:
453 | 
454 |         UA, DU_from_AC = get_UA(valences, AC_valence)
455 | 
456 |         check_len = (len(UA) == 0)
457 |         if check_len:
458 |             check_bo = BO_is_OK(AC, AC, charge, DU_from_AC,
459 |                 atomic_valence_electrons, atoms, valences,
460 |                 allow_charged_fragments=allow_charged_fragments)
461 |         else:
462 |             check_bo = None
463 | 
464 |         if check_len and check_bo:
465 |             return AC, atomic_valence_electrons
466 | 
467 |         UA_pairs_list = get_UA_pairs(UA, AC, use_graph=use_graph)
468 |         for UA_pairs in UA_pairs_list:
469 |             BO = get_BO(AC, UA, DU_from_AC, valences, UA_pairs, use_graph=use_graph)
470 |             status = BO_is_OK(BO, AC, charge, DU_from_AC,
471 |                         atomic_valence_electrons, atoms, valences,
472 |                         allow_charged_fragments=allow_charged_fragments)
473 |             charge_OK = charge_is_OK(BO, AC, charge, DU_from_AC, atomic_valence_electrons, atoms, valences,
474 |                                      allow_charged_fragments=allow_charged_fragments)
475 | 
476 |             if status:
477 |                 return BO, atomic_valence_electrons
478 |             elif BO.sum() >= best_BO.sum() and valences_not_too_large(BO, valences) and charge_OK:
479 |                 best_BO = BO.copy()
480 | 
481 |     return best_BO, atomic_valence_electrons
482 | 
483 | 
484 | def AC2mol(mol, AC, atoms, charge, allow_charged_fragments=True, 
485 |            use_graph=True, use_atom_maps=False):
486 |     """
487 |     """
488 | 
489 |     # convert AC matrix to bond order (BO) matrix
490 |     BO, atomic_valence_electrons = AC2BO(
491 |         AC,
492 |         atoms,
493 |         charge,
494 |         allow_charged_fragments=allow_charged_fragments,
495 |         use_graph=use_graph)
496 | 
497 |     # add BO connectivity and charge info to mol object
498 |     mol = BO2mol(
499 |         mol,
500 |         BO,
501 |         atoms,
502 |         atomic_valence_electrons,
503 |         charge,
504 |         allow_charged_fragments=allow_charged_fragments,
505 |         use_atom_maps=use_atom_maps)
506 | 
507 |     # If charge is not correct don't return mol
508 |     if Chem.GetFormalCharge(mol) != charge:
509 |         return []
510 | 
511 |     # BO2mol returns an arbitrary resonance form. Let's make the rest
512 |     mols = rdchem.ResonanceMolSupplier(mol, Chem.UNCONSTRAINED_CATIONS, Chem.UNCONSTRAINED_ANIONS)
513 |     mols = [mol for mol in mols]
514 | 
515 |     return mols
516 | 
517 | 
518 | def get_proto_mol(atoms):
519 |     """
520 |     """
521 |     mol = Chem.MolFromSmarts("[#" + str(atoms[0]) + "]")
522 |     rwMol = Chem.RWMol(mol)
523 |     for i in range(1, len(atoms)):
524 |         a = Chem.Atom(atoms[i])
525 |         rwMol.AddAtom(a)
526 | 
527 |     mol = rwMol.GetMol()
528 | 
529 |     return mol
530 | 
531 | 
532 | def read_xyz_file(filename, look_for_charge=True):
533 |     """
534 |     """
535 | 
536 |     atomic_symbols = []
537 |     xyz_coordinates = []
538 |     charge = 0
539 |     title = ""
540 | 
541 |     with open(filename, "r") as file:
542 |         for line_number, line in enumerate(file):
543 |             if line_number == 0:
544 |                 num_atoms = int(line)
545 |             elif line_number == 1:
546 |                 title = line
547 |                 if "charge=" in line:
548 |                     charge = int(line.split("=")[1])
549 |             else:
550 |                 atomic_symbol, x, y, z = line.split()
551 |                 atomic_symbols.append(atomic_symbol)
552 |                 xyz_coordinates.append([float(x), float(y), float(z)])
553 | 
554 |     atoms = [int_atom(atom) for atom in atomic_symbols]
555 | 
556 |     return atoms, charge, xyz_coordinates
557 | 
558 | 
559 | def xyz2AC(atoms, xyz, charge, use_huckel=False):
560 |     """
561 | 
562 |     atoms and coordinates to atom connectivity (AC)
563 | 
564 |     args:
565 |         atoms - int atom types
566 |         xyz - coordinates
567 |         charge - molecule charge
568 | 
569 |     optional:
570 |         use_huckel - Use Huckel method for atom connecitivty
571 | 
572 |     returns
573 |         ac - atom connectivity matrix
574 |         mol - rdkit molecule
575 | 
576 |     """
577 | 
578 |     if use_huckel:
579 |         return xyz2AC_huckel(atoms, xyz, charge)
580 |     else:
581 |         return xyz2AC_vdW(atoms, xyz)
582 | 
583 | 
584 | def xyz2AC_vdW(atoms, xyz):
585 | 
586 |     # Get mol template
587 |     mol = get_proto_mol(atoms)
588 | 
589 |     # Set coordinates
590 |     conf = Chem.Conformer(mol.GetNumAtoms())
591 |     for i in range(mol.GetNumAtoms()):
592 |         conf.SetAtomPosition(i, (xyz[i][0], xyz[i][1], xyz[i][2]))
593 |     mol.AddConformer(conf)
594 | 
595 |     AC = get_AC(mol)
596 | 
597 |     return AC, mol
598 | 
599 | 
600 | def get_AC(mol, covalent_factor=1.3):
601 |     """
602 | 
603 |     Generate adjacent matrix from atoms and coordinates.
604 | 
605 |     AC is a (num_atoms, num_atoms) matrix with 1 being covalent bond and 0 is not
606 | 
607 | 
608 |     covalent_factor - 1.3 is an arbitrary factor
609 | 
610 |     args:
611 |         mol - rdkit molobj with 3D conformer
612 | 
613 |     optional
614 |         covalent_factor - increase covalent bond length threshold with facto
615 | 
616 |     returns:
617 |         AC - adjacent matrix
618 | 
619 |     """
620 | 
621 |     # Calculate distance matrix
622 |     dMat = Chem.Get3DDistanceMatrix(mol)
623 | 
624 |     pt = Chem.GetPeriodicTable()
625 |     num_atoms = mol.GetNumAtoms()
626 |     AC = np.zeros((num_atoms, num_atoms), dtype=int)
627 | 
628 |     for i in range(num_atoms):
629 |         a_i = mol.GetAtomWithIdx(i)
630 |         Rcov_i = pt.GetRcovalent(a_i.GetAtomicNum()) * covalent_factor
631 |         for j in range(i + 1, num_atoms):
632 |             a_j = mol.GetAtomWithIdx(j)
633 |             Rcov_j = pt.GetRcovalent(a_j.GetAtomicNum()) * covalent_factor
634 |             if dMat[i, j] <= Rcov_i + Rcov_j:
635 |                 AC[i, j] = 1
636 |                 AC[j, i] = 1
637 | 
638 |     return AC
639 | 
640 | 
641 | def xyz2AC_huckel(atomicNumList, xyz, charge):
642 |     """
643 | 
644 |     args
645 |         atomicNumList - atom type list
646 |         xyz - coordinates
647 |         charge - molecule charge
648 | 
649 |     returns
650 |         ac - atom connectivity
651 |         mol - rdkit molecule
652 | 
653 |     """
654 |     mol = get_proto_mol(atomicNumList)
655 | 
656 |     conf = Chem.Conformer(mol.GetNumAtoms())
657 |     for i in range(mol.GetNumAtoms()):
658 |         conf.SetAtomPosition(i,(xyz[i][0],xyz[i][1],xyz[i][2]))
659 |     mol.AddConformer(conf)
660 | 
661 |     num_atoms = len(atomicNumList)
662 |     AC = np.zeros((num_atoms,num_atoms)).astype(int)
663 | 
664 |     mol_huckel = Chem.Mol(mol)
665 |     mol_huckel.GetAtomWithIdx(0).SetFormalCharge(charge) #mol charge arbitrarily added to 1st atom    
666 | 
667 |     passed,result = rdEHTTools.RunMol(mol_huckel)
668 |     opop = result.GetReducedOverlapPopulationMatrix()
669 |     tri = np.zeros((num_atoms, num_atoms))
670 |     tri[np.tril(np.ones((num_atoms, num_atoms), dtype=bool))] = opop #lower triangular to square matrix
671 |     for i in range(num_atoms):
672 |         for j in range(i+1,num_atoms):
673 |             pair_pop = abs(tri[j,i])   
674 |             if pair_pop >= 0.15: #arbitry cutoff for bond. May need adjustment
675 |                 AC[i,j] = 1
676 |                 AC[j,i] = 1
677 | 
678 |     return AC, mol
679 | 
680 | 
681 | def chiral_stereo_check(mol):
682 |     """
683 |     Find and embed chiral information into the model based on the coordinates
684 | 
685 |     args:
686 |         mol - rdkit molecule, with embeded conformer
687 | 
688 |     """
689 |     Chem.SanitizeMol(mol)
690 |     Chem.DetectBondStereochemistry(mol, -1)
691 |     Chem.AssignStereochemistry(mol, flagPossibleStereoCenters=True, force=True)
692 |     Chem.AssignAtomChiralTagsFromStructure(mol, -1)
693 | 
694 |     return
695 | 
696 | 
697 | def xyz2mol(atoms, coordinates, charge=0, allow_charged_fragments=True,
698 |             use_graph=True, use_huckel=False, embed_chiral=True,
699 |             use_atom_maps=False):
700 |     """
701 |     Generate a rdkit molobj from atoms, coordinates and a total_charge.
702 | 
703 |     args:
704 |         atoms - list of atom types (int)
705 |         coordinates - 3xN Cartesian coordinates
706 |         charge - total charge of the system (default: 0)
707 | 
708 |     optional:
709 |         allow_charged_fragments - alternatively radicals are made
710 |         use_graph - use graph (networkx)
711 |         use_huckel - Use Huckel method for atom connectivity prediction
712 |         embed_chiral - embed chiral information to the molecule
713 | 
714 |     returns:
715 |         mols - list of rdkit molobjects
716 | 
717 |     """
718 | 
719 |     # Get atom connectivity (AC) matrix, list of atomic numbers, molecular charge,
720 |     # and mol object with no connectivity information
721 |     AC, mol = xyz2AC(atoms, coordinates, charge, use_huckel=use_huckel)
722 | 
723 |     # Convert AC to bond order matrix and add connectivity and charge info to
724 |     # mol object
725 |     new_mols = AC2mol(mol, AC, atoms, charge,
726 |                      allow_charged_fragments=allow_charged_fragments,
727 |                      use_graph=use_graph,
728 |                      use_atom_maps=use_atom_maps)
729 | 
730 |     # Check for stereocenters and chiral centers
731 |     if embed_chiral:
732 |         for new_mol in new_mols:
733 |             chiral_stereo_check(new_mol)
734 | 
735 |     return new_mols
736 | 
737 | 
738 | def main():
739 | 
740 | 
741 |     return
742 | 
743 | 
744 | if __name__ == "__main__":
745 | 
746 |     import argparse
747 | 
748 |     parser = argparse.ArgumentParser(usage='%(prog)s [options] molecule.xyz')
749 |     parser.add_argument('structure', metavar='structure', type=str)
750 |     parser.add_argument('-s', '--sdf',
751 |         action="store_true",
752 |         help="Dump sdf file")
753 |     parser.add_argument('--ignore-chiral',
754 |         action="store_true",
755 |         help="Ignore chiral centers")
756 |     parser.add_argument('--no-charged-fragments',
757 |         action="store_true",
758 |         help="Allow radicals to be made")
759 |     parser.add_argument('--no-graph',
760 |         action="store_true",
761 |         help="Run xyz2mol without networkx dependencies")
762 | 
763 |     # huckel uses extended Huckel bond orders to locate bonds (requires RDKit 2019.9.1 or later)
764 |     # otherwise van der Waals radii are used
765 |     parser.add_argument('--use-huckel',
766 |         action="store_true",
767 |         help="Use Huckel method for atom connectivity")
768 |     parser.add_argument('-o', '--output-format',
769 |         action="store",
770 |         type=str,
771 |         help="Output format [smiles,sdf] (default=sdf)")
772 |     parser.add_argument('-c', '--charge',
773 |         action="store",
774 |         metavar="int",
775 |         type=int,
776 |         help="Total charge of the system")
777 | 
778 |     args = parser.parse_args()
779 | 
780 |     # read xyz file
781 |     filename = args.structure
782 | 
783 |     # allow for charged fragments, alternatively radicals are made
784 |     charged_fragments = not args.no_charged_fragments
785 | 
786 |     # quick is faster for large systems but requires networkx
787 |     # if you don't want to install networkx set quick=False and
788 |     # uncomment 'import networkx as nx' at the top of the file
789 |     quick = not args.no_graph
790 | 
791 |     # chiral comment
792 |     embed_chiral = not args.ignore_chiral
793 | 
794 |     # read atoms and coordinates. Try to find the charge
795 |     atoms, charge, xyz_coordinates = read_xyz_file(filename)
796 | 
797 |     # huckel uses extended Huckel bond orders to locate bonds (requires RDKit 2019.9.1 or later)
798 |     # otherwise van der Waals radii are used
799 |     use_huckel = args.use_huckel
800 | 
801 |     # if explicit charge from args, set it
802 |     if args.charge is not None:
803 |         charge = int(args.charge)
804 | 
805 |     # Get the molobjs
806 |     mols = xyz2mol(atoms, xyz_coordinates,
807 |         charge=charge,
808 |         use_graph=quick,
809 |         allow_charged_fragments=charged_fragments,
810 |         embed_chiral=embed_chiral,
811 |         use_huckel=use_huckel)
812 | 
813 |     # Print output
814 |     for mol in mols:
815 |         if args.output_format == "sdf":
816 |             txt = Chem.MolToMolBlock(mol)
817 |             print(txt)
818 | 
819 |         else:
820 |             # Canonical hack
821 |             isomeric_smiles = not args.ignore_chiral
822 |             smiles = Chem.MolToSmiles(mol, isomericSmiles=isomeric_smiles)
823 |             m = Chem.MolFromSmiles(smiles)
824 |             smiles = Chem.MolToSmiles(m, isomericSmiles=isomeric_smiles)
825 |             print(smiles)
826 | 


--------------------------------------------------------------------------------