├── .github └── workflows │ └── pythonapp.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── examples ├── acetate.xyz ├── chiral_stereo_test.xyz ├── ethane.xyz └── propylbenzene.xyz ├── requirements.txt ├── requirements.yml ├── setup.py ├── test.py └── xyz2mol.py /.github/workflows/pythonapp.yml: -------------------------------------------------------------------------------- 1 | name: Conda/Python pytest 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v1 12 | - name: Set up Python and Conda enviroment 13 | run: | 14 | conda update -n base -c defaults conda 15 | conda env create -f requirements.yml -p env 16 | source $(conda info --root)/etc/profile.d/conda.sh 17 | conda activate ./env 18 | which pip 19 | - name: Install dependencies 20 | run: | 21 | # Activate 22 | source $(conda info --root)/etc/profile.d/conda.sh 23 | conda activate ./env 24 | # Pip install requirments 25 | python -m pip install --upgrade pip 26 | pip install -r requirements.txt 27 | - name: Lint with flake8 28 | run: | 29 | # Activate 30 | source $(conda info --root)/etc/profile.d/conda.sh 31 | conda activate ./env 32 | # Make sure it is installed 33 | pip install flake8 34 | # stop the build if there are Python syntax errors or undefined names 35 | flake8 *.py --count --select=E9,F63,F7,F82 --show-source --statistics 36 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 37 | flake8 *.py --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 38 | - name: Test with pytest 39 | run: | 40 | # Activate 41 | source $(conda info --root)/etc/profile.d/conda.sh 42 | conda activate ./env 43 | # Make sure pytest is avaliabel 44 | pip install pytest 45 | # Test files 46 | pytest -v test.py 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.cprof 2 | .pytest_cache 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # dotenv 86 | .env 87 | 88 | # virtualenv 89 | .venv 90 | venv/ 91 | ENV/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Jensen Group 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | PYTHON=python 3 | CONDA=conda 4 | FLAKE=flake8 5 | 6 | all: env 7 | 8 | setup: env pip 9 | 10 | env: 11 | ${CONDA} env create -f requirements.yml -p env 12 | 13 | pip: env 14 | ${PYTHON} -m pip install -r requirements.txt --no-cache-dir 15 | 16 | test: 17 | ${PYTHON} -m pytest -v test.py 18 | 19 | test-lint: 20 | @# stop the build if there are Python syntax errors or undefined names 21 | ${FLAKE} *.py --count --select=E9,F63,F7,F82 --show-source --statistics 22 | @# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 23 | ${FLAKE} *.py --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 24 | 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # xyz2mol has now been implemented in RDKit 2 | 3 | ``` 4 | raw_mol = Chem.MolFromXYZFile('acetate.xyz') 5 | mol = Chem.Mol(raw_mol) 6 | rdDetermineBonds.DetermineBonds(mol,charge=-1) 7 | ``` 8 | 9 | # Convert Cartesian coordinates to one or more molecular graphs 10 | 11 | Given Cartesian coordinates in the form of a `.xyz` file, the code constructs a list of one or more molecular graphs. In cases where there are several possible resonance forms xyz2mol returns a list of all, otherwise just a list of one. 12 | 13 | This code is based on the work of 14 | DOI: [10.1002/bkcs.10334](http://dx.doi.org/10.1002/bkcs.10334) 15 | 16 | Yeonjoon Kim and Woo Youn Kim 17 | "Universal Structure Conversion Method for Organic Molecules: 18 | From Atomic Connectivity to Three-Dimensional Geometry" 19 | Bull. Korean Chem. Soc. 20 | 2015, Vol. 36, 1769-1777 21 | 22 | ## Setup 23 | 24 | Depends on `rdkit`, `numpy`, and `networkx`. Easiest to setup via anaconda/conda: 25 | 26 | `conda install -c conda-forge xyz2mol` 27 | 28 | Setup for a standalone enviroment is avaliable via `Makefile`. To setup and test simply clone the project and make. 29 | 30 | git clone https://github.com/jensengroup/xyz2mol 31 | 32 | and then run the following the the `xyz2mol` folder 33 | 34 | make 35 | make test 36 | 37 | Note, it is also possible to run the code without the `networkx` dependencies, but is slower. 38 | 39 | 40 | ## Example usage 41 | 42 | Read in xyz file and print out the SMILES, but don't incode the chirality. 43 | 44 | xyz2mol.py examples/chiral_stereo_test.xyz --ignore-chiral 45 | 46 | Read in xyz file and print out the SDF format, save it in a file 47 | 48 | xyz2mol.py examples/chiral_stereo_test.xyz -o sdf > save_file.sdf 49 | 50 | Read in xyz file with a charge and print out the SMILES 51 | 52 | xyz2mol.py examples/acetate.xyz --charge -1 53 | 54 | ## Dependencies: 55 | 56 | rdkit # (version 2019.9.1 or later needed for huckel option) 57 | networkx 58 | 59 | -------------------------------------------------------------------------------- /examples/acetate.xyz: -------------------------------------------------------------------------------- 1 | 7 2 | charge=-1= 3 | C -4.71686 0.89919 0.05714 4 | C -3.24898 0.98400 -0.22830 5 | H -5.04167 1.74384 0.67862 6 | H -5.01710 -0.02205 0.56344 7 | H -5.21076 0.96874 -0.91208 8 | O -2.65909 2.05702 -0.34025 9 | O -2.63413 -0.18702 -0.48679 10 | -------------------------------------------------------------------------------- /examples/chiral_stereo_test.xyz: -------------------------------------------------------------------------------- 1 | 15 2 | Energy: 10.5637353 3 | C -5.48821 0.02982 -0.00852 4 | C -4.15445 -0.12323 -0.04208 5 | C -3.48273 -1.46491 0.04697 6 | F -3.88123 -2.11120 1.17935 7 | C -1.96681 -1.36452 0.07853 8 | H -3.78257 -2.08264 -0.80658 9 | C -6.18988 1.34568 -0.08727 10 | H -6.12260 -0.84989 0.08936 11 | H -3.51606 0.75189 -0.13305 12 | H -5.49066 2.18549 -0.14705 13 | H -6.81679 1.48581 0.79859 14 | H -6.83374 1.37210 -0.97169 15 | H -1.62796 -0.78043 0.94086 16 | H -1.57677 -0.90140 -0.83351 17 | H -1.52787 -2.36296 0.17627 18 | -------------------------------------------------------------------------------- /examples/ethane.xyz: -------------------------------------------------------------------------------- 1 | 8 2 | charge=0= 3 | C -4.58735 0.92696 0.00000 4 | C -3.11050 0.92696 0.00000 5 | H -4.93786 1.78883 0.58064 6 | H -4.93786 -0.00682 0.45608 7 | H -4.93786 0.99888 -1.03672 8 | H -2.75999 0.85505 1.03672 9 | H -2.75998 1.86075 -0.45608 10 | H -2.75998 0.06509 -0.58064 11 | -------------------------------------------------------------------------------- /examples/propylbenzene.xyz: -------------------------------------------------------------------------------- 1 | 20 2 | 3 | C -2.08081073 1.27759366 0.52999704 4 | C -1.36085808 0.01534835 0.13171776 5 | C 0.12921265 -0.00145767 -0.01251015 6 | C 0.89390756 1.16259960 0.22072207 7 | C 2.28529729 1.14285208 0.08499036 8 | C 2.93783862 -0.03314066 -0.28435514 9 | C 2.20046595 -1.19345389 -0.51916959 10 | C 0.80878206 -1.18180595 -0.38553184 11 | C -2.17184071 -1.22963114 -0.11838690 12 | H -1.72431086 1.61348849 1.52614588 13 | H -3.17848660 1.12721396 0.59360457 14 | H -1.88832766 2.07143028 -0.22166901 15 | H 0.42742526 2.09446201 0.50865072 16 | H 2.85855884 2.04284076 0.26700529 17 | H 4.01510494 -0.04529350 -0.38861905 18 | H 2.70792713 -2.10563507 -0.80577565 19 | H 0.27503723 -2.10238605 -0.57663639 20 | H -1.85660650 -2.02918702 0.58415512 21 | H -2.02061491 -1.57122523 -1.16369726 22 | H -3.25770147 -1.05461302 0.02936218 23 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | networkx 4 | rmsd 5 | pytest 6 | flake8 7 | -------------------------------------------------------------------------------- /requirements.yml: -------------------------------------------------------------------------------- 1 | name: x2m_env 2 | dependencies: 3 | - python=3.7 4 | - rdkit::rdkit 5 | - pip 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name="xyz2mol", 5 | version="0.1.2", 6 | description="Convert Cartesian coordinates to one or more molecular graphs", 7 | url="https://github.com/jensengroup/xyz2mol", 8 | py_modules=["xyz2mol"], 9 | entry_points={"console_scripts": ["xyz2mol=xyz2mol:main"]}, 10 | ) 11 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import pytest 4 | from rdkit import Chem, rdBase 5 | from rdkit.Chem import AllChem, rdmolops 6 | 7 | import xyz2mol as x2m 8 | 9 | __TEST_SMILES__ = [ 10 | 'C[C-](c1ccccc1)C', 11 | 'C[C-](C)c1ccccc1', 12 | 'C=C([O-])CC', 13 | 'C=C([NH3+])CC', 14 | 'CC(=O)[O-]', 15 | 'C[N+](=O)[O-]', 16 | 'CS(CC)(=O)=O', 17 | 'CS([O-])(=O)=O', 18 | 'C=C(C)CC', 19 | 'CC(C)CC', 20 | 'C=C(N)CC', 21 | 'C=C(C)C=C', 22 | 'C#CC=C', 23 | 'c1ccccc1', 24 | 'c1ccccc1c1ccccc1', 25 | '[NH3+]CS([O-])(=O)=O', 26 | 'CC(NC)=O', 27 | '[O-]c1ccccc1', 28 | 'O=C(C=C1)C=CC1=CCC([O-])=O', 29 | 'C#CC#C', 30 | 'Cc1ccc(cc1)C1C=CC2C(C=CC2(C#N)C#N)=CC=1', 31 | # 'C[NH+]=C([O-])CC[NH+]=C([O-])C', 32 | # 'C[NH+]=CC=C([O-])C', 33 | '[C+](C)(C)CC[C-](C)(C)', 34 | 'O=C(C=C1)C=CC1=CCC([O-])=O', 35 | # 'O=C([CH-]C=CC(C([O-])=O)=O)[O-]', 36 | '[O-]c1ccccc1', 37 | # 'CNC(C(C)=[NH+][CH-]CC(O)=O)=O', 38 | # "[CH2][CH2][CH]=[CH][CH2]", 39 | 'Cc1ccc(cc1)C1C=CC2C(C=CC2(C#N)C#N)=CC=1', 40 | 'CC1C=CC2C(C=CC2(C)C)=CC=1', 41 | 'CC1=CC=C(C=CC2)C2C=C1', 42 | 'CC1=CC=C(C2=CC=CC=C2)C=C1', 43 | 'C1(CC2=CC=CC=C2)=CC=CC=C1', 44 | '[O-]c1ccccc1[O-]', 45 | 'C[N+](=O)[O-]', 46 | 'N#CC(C#N)=CC=C1C=CC=CC(=C1)c1ccc(cc1)[N+](=O)[O-]', 47 | 'CNC([O-])=C([NH+]=C/CC(O)=O)C', 48 | # 'Cc1cn(C2CC(O)C(COP(=O)([O-])OP(=O)([O-])OC3OC(C)C([NH3+])C(O)C3O)O2)c(=O)[nH]c1=O', # works, just slow 49 | ] 50 | 51 | __TEST_FILES__ = [ 52 | ("examples/ethane.xyz", 0, "CC"), 53 | ("examples/acetate.xyz", -1, "CC(=O)[O-]"), 54 | ("examples/chiral_stereo_test.xyz", 0, "C/C=C/[C@@H](C)F"), 55 | ("examples/propylbenzene.xyz", -1, "C[C-](C)c1ccccc1"), 56 | ] 57 | 58 | def get_atoms(mol): 59 | atoms = [a.GetAtomicNum() for a in mol.GetAtoms()] 60 | return atoms 61 | 62 | def get_mol(smiles): 63 | mol = Chem.MolFromSmiles(smiles) 64 | Chem.Kekulize(mol, clearAromaticFlags=True) 65 | charge = Chem.GetFormalCharge(mol) 66 | mol = Chem.AddHs(mol) 67 | return mol 68 | 69 | def generate_structure_from_smiles(smiles): 70 | 71 | # Generate a 3D structure from smiles 72 | 73 | mol = Chem.MolFromSmiles(smiles) 74 | mol = Chem.AddHs(mol) 75 | 76 | status = AllChem.EmbedMolecule(mol) 77 | status = AllChem.UFFOptimizeMolecule(mol) 78 | 79 | conformer = mol.GetConformer() 80 | coordinates = conformer.GetPositions() 81 | coordinates = np.array(coordinates) 82 | 83 | atoms = get_atoms(mol) 84 | 85 | return atoms, coordinates 86 | 87 | @pytest.mark.parametrize("smiles", __TEST_SMILES__) 88 | def test_smiles_from_adjacent_matrix(smiles): 89 | 90 | charged_fragments = True 91 | quick = True 92 | 93 | # Cut apart the smiles 94 | mol = get_mol(smiles) 95 | atoms = get_atoms(mol) 96 | charge = Chem.GetFormalCharge(mol) 97 | adjacent_matrix = Chem.GetAdjacencyMatrix(mol) 98 | 99 | # 100 | mol = Chem.RemoveHs(mol) 101 | canonical_smiles = Chem.MolToSmiles(mol) 102 | 103 | # Define new molecule template from atoms 104 | new_mol = x2m.get_proto_mol(atoms) 105 | 106 | # reconstruct the molecule from adjacent matrix, atoms and total charge 107 | new_mols = x2m.AC2mol(new_mol, adjacent_matrix, atoms, charge, charged_fragments, quick) 108 | 109 | new_mol_smiles_list = [] 110 | for new_mol in new_mols: 111 | new_mol = Chem.RemoveHs(new_mol) 112 | new_mol_smiles = Chem.MolToSmiles(new_mol) 113 | 114 | new_mol_smiles_list.append(new_mol_smiles) 115 | 116 | assert canonical_smiles in new_mol_smiles_list 117 | 118 | return 119 | 120 | @pytest.mark.parametrize("smiles", __TEST_SMILES__) 121 | def test_smiles_from_coord_vdw(smiles): 122 | 123 | # The answer 124 | mol = Chem.MolFromSmiles(smiles) 125 | charge = Chem.GetFormalCharge(mol) 126 | canonical_smiles = Chem.MolToSmiles(mol, isomericSmiles=False) 127 | 128 | # generate forcefield coordinates 129 | atoms, coordinates = generate_structure_from_smiles(smiles) 130 | 131 | # Generate molobj from atoms, charge and coordinates 132 | mols = x2m.xyz2mol(atoms, coordinates, charge=charge) 133 | 134 | smiles_list = [] 135 | for mol in mols: 136 | # For this test, remove chira. clean and canonical 137 | Chem.Kekulize(mol) 138 | mol = Chem.RemoveHs(mol) 139 | Chem.RemoveStereochemistry(mol) 140 | smiles = Chem.MolToSmiles(mol, isomericSmiles=False) 141 | 142 | # Please look away. A small hack that removes the explicit hydrogens 143 | mol = Chem.MolFromSmiles(smiles) 144 | smiles = Chem.MolToSmiles(mol) 145 | smiles_list.append(smiles) 146 | 147 | assert canonical_smiles in smiles_list 148 | 149 | return 150 | 151 | 152 | @pytest.mark.parametrize("smiles", __TEST_SMILES__) 153 | def test_smiles_from_coord_huckel(smiles): 154 | 155 | # The answer 156 | mol = Chem.MolFromSmiles(smiles) 157 | charge = Chem.GetFormalCharge(mol) 158 | canonical_smiles = Chem.MolToSmiles(mol, isomericSmiles=False) 159 | 160 | # generate forcefield coordinates 161 | atoms, coordinates = generate_structure_from_smiles(smiles) 162 | 163 | # Generate molobj from atoms, charge and coordinates 164 | mols = x2m.xyz2mol(atoms, coordinates, charge=charge, use_huckel=True) 165 | 166 | smiles_list = [] 167 | for mol in mols: 168 | # For this test, remove chira. clean and canonical 169 | Chem.Kekulize(mol) 170 | mol = Chem.RemoveHs(mol) 171 | Chem.RemoveStereochemistry(mol) 172 | smiles = Chem.MolToSmiles(mol, isomericSmiles=False) 173 | 174 | # Please look away. A small hack that removes the explicit hydrogens 175 | mol = Chem.MolFromSmiles(smiles) 176 | smiles = Chem.MolToSmiles(mol) 177 | smiles_list.append(smiles) 178 | 179 | assert canonical_smiles in smiles_list 180 | 181 | return 182 | 183 | 184 | @pytest.mark.parametrize("filename, charge, answer", __TEST_FILES__) 185 | def test_smiles_from_xyz_files(filename, charge, answer): 186 | 187 | charged_fragments = True 188 | quick = True 189 | 190 | atoms, charge_read, coordinates = x2m.read_xyz_file(filename) 191 | 192 | mols = x2m.xyz2mol(atoms, coordinates, charge=charge) 193 | 194 | smiles_list = [] 195 | for mol in mols: 196 | mol = Chem.RemoveHs(mol) 197 | 198 | smiles = Chem.MolToSmiles(mol) 199 | smiles_list.append(smiles) 200 | 201 | assert answer in smiles_list 202 | 203 | return 204 | 205 | 206 | if __name__ == "__main__": 207 | 208 | import argparse 209 | parser = argparse.ArgumentParser() 210 | parser.add_argument('-t', '--test-type', type=str, help="") 211 | parser.add_argument('-s', '--smiles', help="") 212 | args = parser.parse_args() 213 | 214 | for smiles in __TEST_SMILES__: 215 | test_smiles_from_adjacent_matrix(smiles) 216 | print(True, smiles) 217 | 218 | for filename, charge, answer in __TEST_FILES__: 219 | test_smiles_from_xyz_files(filename, charge, answer) 220 | print(True, answer) 221 | 222 | for smiles in __TEST_SMILES__: 223 | test_smiles_from_coord_vdw(smiles) 224 | print(True, smiles) 225 | 226 | for smiles in __TEST_SMILES__: 227 | test_smiles_from_coord_huckel(smiles) 228 | print(True, smiles) 229 | -------------------------------------------------------------------------------- /xyz2mol.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for generating rdkit molobj/smiles/molecular graph from free atoms 3 | 4 | Implementation by Jan H. Jensen, based on the paper 5 | 6 | Yeonjoon Kim and Woo Youn Kim 7 | "Universal Structure Conversion Method for Organic Molecules: From Atomic Connectivity 8 | to Three-Dimensional Geometry" 9 | Bull. Korean Chem. Soc. 2015, Vol. 36, 1769-1777 10 | DOI: 10.1002/bkcs.10334 11 | 12 | """ 13 | 14 | import copy 15 | import itertools 16 | 17 | from rdkit.Chem import rdmolops 18 | from rdkit.Chem import rdchem 19 | try: 20 | from rdkit.Chem import rdEHTTools #requires RDKit 2019.9.1 or later 21 | except ImportError: 22 | rdEHTTools = None 23 | 24 | from collections import defaultdict 25 | 26 | import numpy as np 27 | import networkx as nx 28 | 29 | from rdkit import Chem 30 | from rdkit.Chem import AllChem, rdmolops 31 | import sys 32 | 33 | global __ATOM_LIST__ 34 | __ATOM_LIST__ = \ 35 | ['h', 'he', 36 | 'li', 'be', 'b', 'c', 'n', 'o', 'f', 'ne', 37 | 'na', 'mg', 'al', 'si', 'p', 's', 'cl', 'ar', 38 | 'k', 'ca', 'sc', 'ti', 'v ', 'cr', 'mn', 'fe', 'co', 'ni', 'cu', 39 | 'zn', 'ga', 'ge', 'as', 'se', 'br', 'kr', 40 | 'rb', 'sr', 'y', 'zr', 'nb', 'mo', 'tc', 'ru', 'rh', 'pd', 'ag', 41 | 'cd', 'in', 'sn', 'sb', 'te', 'i', 'xe', 42 | 'cs', 'ba', 'la', 'ce', 'pr', 'nd', 'pm', 'sm', 'eu', 'gd', 'tb', 'dy', 43 | 'ho', 'er', 'tm', 'yb', 'lu', 'hf', 'ta', 'w', 're', 'os', 'ir', 'pt', 44 | 'au', 'hg', 'tl', 'pb', 'bi', 'po', 'at', 'rn', 45 | 'fr', 'ra', 'ac', 'th', 'pa', 'u', 'np', 'pu'] 46 | 47 | 48 | global atomic_valence 49 | global atomic_valence_electrons 50 | 51 | atomic_valence = defaultdict(list) 52 | atomic_valence[1] = [1] 53 | atomic_valence[5] = [3,4] 54 | atomic_valence[6] = [4] 55 | atomic_valence[7] = [3,4] 56 | atomic_valence[8] = [2,1,3] 57 | atomic_valence[9] = [1] 58 | atomic_valence[14] = [4] 59 | atomic_valence[15] = [5,3] #[5,4,3] 60 | atomic_valence[16] = [6,3,2] #[6,4,2] 61 | atomic_valence[17] = [1] 62 | atomic_valence[32] = [4] 63 | atomic_valence[35] = [1] 64 | atomic_valence[53] = [1] 65 | 66 | atomic_valence_electrons = {} 67 | atomic_valence_electrons[1] = 1 68 | atomic_valence_electrons[5] = 3 69 | atomic_valence_electrons[6] = 4 70 | atomic_valence_electrons[7] = 5 71 | atomic_valence_electrons[8] = 6 72 | atomic_valence_electrons[9] = 7 73 | atomic_valence_electrons[14] = 4 74 | atomic_valence_electrons[15] = 5 75 | atomic_valence_electrons[16] = 6 76 | atomic_valence_electrons[17] = 7 77 | atomic_valence_electrons[32] = 4 78 | atomic_valence_electrons[35] = 7 79 | atomic_valence_electrons[53] = 7 80 | 81 | 82 | def str_atom(atom): 83 | """ 84 | convert integer atom to string atom 85 | """ 86 | global __ATOM_LIST__ 87 | atom = __ATOM_LIST__[atom - 1] 88 | return atom 89 | 90 | 91 | def int_atom(atom): 92 | """ 93 | convert str atom to integer atom 94 | """ 95 | global __ATOM_LIST__ 96 | #print(atom) 97 | atom = atom.lower() 98 | return __ATOM_LIST__.index(atom) + 1 99 | 100 | 101 | def get_UA(maxValence_list, valence_list): 102 | """ 103 | """ 104 | UA = [] 105 | DU = [] 106 | for i, (maxValence, valence) in enumerate(zip(maxValence_list, valence_list)): 107 | if not maxValence - valence > 0: 108 | continue 109 | UA.append(i) 110 | DU.append(maxValence - valence) 111 | return UA, DU 112 | 113 | 114 | def get_BO(AC, UA, DU, valences, UA_pairs, use_graph=True): 115 | """ 116 | """ 117 | BO = AC.copy() 118 | DU_save = [] 119 | 120 | while DU_save != DU: 121 | for i, j in UA_pairs: 122 | BO[i, j] += 1 123 | BO[j, i] += 1 124 | 125 | BO_valence = list(BO.sum(axis=1)) 126 | DU_save = copy.copy(DU) 127 | UA, DU = get_UA(valences, BO_valence) 128 | UA_pairs = get_UA_pairs(UA, AC, use_graph=use_graph)[0] 129 | 130 | return BO 131 | 132 | 133 | def valences_not_too_large(BO, valences): 134 | """ 135 | """ 136 | number_of_bonds_list = BO.sum(axis=1) 137 | for valence, number_of_bonds in zip(valences, number_of_bonds_list): 138 | if number_of_bonds > valence: 139 | return False 140 | 141 | return True 142 | 143 | def charge_is_OK(BO, AC, charge, DU, atomic_valence_electrons, atoms, valences, 144 | allow_charged_fragments=True): 145 | # total charge 146 | Q = 0 147 | 148 | # charge fragment list 149 | q_list = [] 150 | 151 | if allow_charged_fragments: 152 | 153 | BO_valences = list(BO.sum(axis=1)) 154 | for i, atom in enumerate(atoms): 155 | q = get_atomic_charge(atom, atomic_valence_electrons[atom], BO_valences[i]) 156 | Q += q 157 | if atom == 6: 158 | number_of_single_bonds_to_C = list(BO[i, :]).count(1) 159 | if number_of_single_bonds_to_C == 2 and BO_valences[i] == 2: 160 | Q += 1 161 | q = 2 162 | if number_of_single_bonds_to_C == 3 and Q + 1 < charge: 163 | Q += 2 164 | q = 1 165 | 166 | if q != 0: 167 | q_list.append(q) 168 | 169 | return (charge == Q) 170 | 171 | def BO_is_OK(BO, AC, charge, DU, atomic_valence_electrons, atoms, valences, 172 | allow_charged_fragments=True): 173 | """ 174 | Sanity of bond-orders 175 | 176 | args: 177 | BO - 178 | AC - 179 | charge - 180 | DU - 181 | 182 | 183 | optional 184 | allow_charges_fragments - 185 | 186 | 187 | returns: 188 | boolean - true of molecule is OK, false if not 189 | """ 190 | 191 | if not valences_not_too_large(BO, valences): 192 | return False 193 | 194 | check_sum = (BO - AC).sum() == sum(DU) 195 | check_charge = charge_is_OK(BO, AC, charge, DU, atomic_valence_electrons, atoms, valences, 196 | allow_charged_fragments) 197 | 198 | if check_charge and check_sum: 199 | return True 200 | 201 | return False 202 | 203 | 204 | def get_atomic_charge(atom, atomic_valence_electrons, BO_valence): 205 | """ 206 | """ 207 | 208 | if atom == 1: 209 | charge = 1 - BO_valence 210 | elif atom == 5: 211 | charge = 3 - BO_valence 212 | elif atom == 15 and BO_valence == 5: 213 | charge = 0 214 | elif atom == 16 and BO_valence == 6: 215 | charge = 0 216 | else: 217 | charge = atomic_valence_electrons - 8 + BO_valence 218 | 219 | return charge 220 | 221 | 222 | def clean_charges(mol): 223 | """ 224 | This hack should not be needed anymore, but is kept just in case 225 | 226 | """ 227 | 228 | Chem.SanitizeMol(mol) 229 | #rxn_smarts = ['[N+:1]=[*:2]-[C-:3]>>[N+0:1]-[*:2]=[C-0:3]', 230 | # '[N+:1]=[*:2]-[O-:3]>>[N+0:1]-[*:2]=[O-0:3]', 231 | # '[N+:1]=[*:2]-[*:3]=[*:4]-[O-:5]>>[N+0:1]-[*:2]=[*:3]-[*:4]=[O-0:5]', 232 | # '[#8:1]=[#6:2]([!-:6])[*:3]=[*:4][#6-:5]>>[*-:1][*:2]([*:6])=[*:3][*:4]=[*+0:5]', 233 | # '[O:1]=[c:2][c-:3]>>[*-:1][*:2][*+0:3]', 234 | # '[O:1]=[C:2][C-:3]>>[*-:1][*:2]=[*+0:3]'] 235 | 236 | rxn_smarts = ['[#6,#7:1]1=[#6,#7:2][#6,#7:3]=[#6,#7:4][CX3-,NX3-:5][#6,#7:6]1=[#6,#7:7]>>' 237 | '[#6,#7:1]1=[#6,#7:2][#6,#7:3]=[#6,#7:4][-0,-0:5]=[#6,#7:6]1[#6-,#7-:7]', 238 | '[#6,#7:1]1=[#6,#7:2][#6,#7:3](=[#6,#7:4])[#6,#7:5]=[#6,#7:6][CX3-,NX3-:7]1>>' 239 | '[#6,#7:1]1=[#6,#7:2][#6,#7:3]([#6-,#7-:4])=[#6,#7:5][#6,#7:6]=[-0,-0:7]1'] 240 | 241 | fragments = Chem.GetMolFrags(mol,asMols=True,sanitizeFrags=False) 242 | 243 | for i, fragment in enumerate(fragments): 244 | for smarts in rxn_smarts: 245 | patt = Chem.MolFromSmarts(smarts.split(">>")[0]) 246 | while fragment.HasSubstructMatch(patt): 247 | rxn = AllChem.ReactionFromSmarts(smarts) 248 | ps = rxn.RunReactants((fragment,)) 249 | fragment = ps[0][0] 250 | Chem.SanitizeMol(fragment) 251 | if i == 0: 252 | mol = fragment 253 | else: 254 | mol = Chem.CombineMols(mol, fragment) 255 | 256 | return mol 257 | 258 | 259 | def BO2mol(mol, BO_matrix, atoms, atomic_valence_electrons, 260 | mol_charge, allow_charged_fragments=True, use_atom_maps=False): 261 | """ 262 | based on code written by Paolo Toscani 263 | 264 | From bond order, atoms, valence structure and total charge, generate an 265 | rdkit molecule. 266 | 267 | args: 268 | mol - rdkit molecule 269 | BO_matrix - bond order matrix of molecule 270 | atoms - list of integer atomic symbols 271 | atomic_valence_electrons - 272 | mol_charge - total charge of molecule 273 | 274 | optional: 275 | allow_charged_fragments - bool - allow charged fragments 276 | 277 | returns 278 | mol - updated rdkit molecule with bond connectivity 279 | 280 | """ 281 | 282 | l = len(BO_matrix) 283 | l2 = len(atoms) 284 | BO_valences = list(BO_matrix.sum(axis=1)) 285 | 286 | if (l != l2): 287 | raise RuntimeError('sizes of adjMat ({0:d}) and Atoms {1:d} differ'.format(l, l2)) 288 | 289 | rwMol = Chem.RWMol(mol) 290 | 291 | bondTypeDict = { 292 | 1: Chem.BondType.SINGLE, 293 | 2: Chem.BondType.DOUBLE, 294 | 3: Chem.BondType.TRIPLE 295 | } 296 | 297 | for i in range(l): 298 | for j in range(i + 1, l): 299 | bo = int(round(BO_matrix[i, j])) 300 | if (bo == 0): 301 | continue 302 | bt = bondTypeDict.get(bo, Chem.BondType.SINGLE) 303 | rwMol.AddBond(i, j, bt) 304 | 305 | mol = rwMol.GetMol() 306 | 307 | if allow_charged_fragments: 308 | mol = set_atomic_charges( 309 | mol, 310 | atoms, 311 | atomic_valence_electrons, 312 | BO_valences, 313 | BO_matrix, 314 | mol_charge, 315 | use_atom_maps) 316 | else: 317 | mol = set_atomic_radicals(mol, atoms, atomic_valence_electrons, BO_valences, 318 | use_atom_maps) 319 | 320 | return mol 321 | 322 | 323 | def set_atomic_charges(mol, atoms, atomic_valence_electrons, 324 | BO_valences, BO_matrix, mol_charge, 325 | use_atom_maps): 326 | """ 327 | """ 328 | q = 0 329 | for i, atom in enumerate(atoms): 330 | a = mol.GetAtomWithIdx(i) 331 | if use_atom_maps: 332 | a.SetAtomMapNum(i+1) 333 | charge = get_atomic_charge(atom, atomic_valence_electrons[atom], BO_valences[i]) 334 | q += charge 335 | if atom == 6: 336 | number_of_single_bonds_to_C = list(BO_matrix[i, :]).count(1) 337 | if number_of_single_bonds_to_C == 2 and BO_valences[i] == 2: 338 | q += 1 339 | charge = 0 340 | if number_of_single_bonds_to_C == 3 and q + 1 < mol_charge: 341 | q += 2 342 | charge = 1 343 | 344 | if (abs(charge) > 0): 345 | a.SetFormalCharge(int(charge)) 346 | 347 | #mol = clean_charges(mol) 348 | 349 | return mol 350 | 351 | 352 | def set_atomic_radicals(mol, atoms, atomic_valence_electrons, BO_valences, 353 | use_atom_maps): 354 | """ 355 | 356 | The number of radical electrons = absolute atomic charge 357 | 358 | """ 359 | for i, atom in enumerate(atoms): 360 | a = mol.GetAtomWithIdx(i) 361 | if use_atom_maps: 362 | a.SetAtomMapNum(i+1) 363 | charge = get_atomic_charge( 364 | atom, 365 | atomic_valence_electrons[atom], 366 | BO_valences[i]) 367 | 368 | if (abs(charge) > 0): 369 | a.SetNumRadicalElectrons(abs(int(charge))) 370 | 371 | return mol 372 | 373 | 374 | def get_bonds(UA, AC): 375 | """ 376 | 377 | """ 378 | bonds = [] 379 | 380 | for k, i in enumerate(UA): 381 | for j in UA[k + 1:]: 382 | if AC[i, j] == 1: 383 | bonds.append(tuple(sorted([i, j]))) 384 | 385 | return bonds 386 | 387 | 388 | def get_UA_pairs(UA, AC, use_graph=True): 389 | """ 390 | 391 | """ 392 | 393 | bonds = get_bonds(UA, AC) 394 | 395 | if len(bonds) == 0: 396 | return [()] 397 | 398 | if use_graph: 399 | G = nx.Graph() 400 | G.add_edges_from(bonds) 401 | UA_pairs = [list(nx.max_weight_matching(G))] 402 | return UA_pairs 403 | 404 | max_atoms_in_combo = 0 405 | UA_pairs = [()] 406 | for combo in list(itertools.combinations(bonds, int(len(UA) / 2))): 407 | flat_list = [item for sublist in combo for item in sublist] 408 | atoms_in_combo = len(set(flat_list)) 409 | if atoms_in_combo > max_atoms_in_combo: 410 | max_atoms_in_combo = atoms_in_combo 411 | UA_pairs = [combo] 412 | 413 | elif atoms_in_combo == max_atoms_in_combo: 414 | UA_pairs.append(combo) 415 | 416 | return UA_pairs 417 | 418 | 419 | def AC2BO(AC, atoms, charge, allow_charged_fragments=True, use_graph=True): 420 | """ 421 | 422 | implemenation of algorithm shown in Figure 2 423 | 424 | UA: unsaturated atoms 425 | 426 | DU: degree of unsaturation (u matrix in Figure) 427 | 428 | best_BO: Bcurr in Figure 429 | 430 | """ 431 | 432 | global atomic_valence 433 | global atomic_valence_electrons 434 | 435 | # make a list of valences, e.g. for CO: [[4],[2,1]] 436 | valences_list_of_lists = [] 437 | AC_valence = list(AC.sum(axis=1)) 438 | 439 | for i,(atomicNum,valence) in enumerate(zip(atoms,AC_valence)): 440 | # valence can't be smaller than number of neighbourgs 441 | possible_valence = [x for x in atomic_valence[atomicNum] if x >= valence] 442 | if not possible_valence: 443 | print('Valence of atom',i,'is',valence,'which bigger than allowed max',max(atomic_valence[atomicNum]),'. Stopping') 444 | sys.exit() 445 | valences_list_of_lists.append(possible_valence) 446 | 447 | # convert [[4],[2,1]] to [[4,2],[4,1]] 448 | valences_list = itertools.product(*valences_list_of_lists) 449 | 450 | best_BO = AC.copy() 451 | 452 | for valences in valences_list: 453 | 454 | UA, DU_from_AC = get_UA(valences, AC_valence) 455 | 456 | check_len = (len(UA) == 0) 457 | if check_len: 458 | check_bo = BO_is_OK(AC, AC, charge, DU_from_AC, 459 | atomic_valence_electrons, atoms, valences, 460 | allow_charged_fragments=allow_charged_fragments) 461 | else: 462 | check_bo = None 463 | 464 | if check_len and check_bo: 465 | return AC, atomic_valence_electrons 466 | 467 | UA_pairs_list = get_UA_pairs(UA, AC, use_graph=use_graph) 468 | for UA_pairs in UA_pairs_list: 469 | BO = get_BO(AC, UA, DU_from_AC, valences, UA_pairs, use_graph=use_graph) 470 | status = BO_is_OK(BO, AC, charge, DU_from_AC, 471 | atomic_valence_electrons, atoms, valences, 472 | allow_charged_fragments=allow_charged_fragments) 473 | charge_OK = charge_is_OK(BO, AC, charge, DU_from_AC, atomic_valence_electrons, atoms, valences, 474 | allow_charged_fragments=allow_charged_fragments) 475 | 476 | if status: 477 | return BO, atomic_valence_electrons 478 | elif BO.sum() >= best_BO.sum() and valences_not_too_large(BO, valences) and charge_OK: 479 | best_BO = BO.copy() 480 | 481 | return best_BO, atomic_valence_electrons 482 | 483 | 484 | def AC2mol(mol, AC, atoms, charge, allow_charged_fragments=True, 485 | use_graph=True, use_atom_maps=False): 486 | """ 487 | """ 488 | 489 | # convert AC matrix to bond order (BO) matrix 490 | BO, atomic_valence_electrons = AC2BO( 491 | AC, 492 | atoms, 493 | charge, 494 | allow_charged_fragments=allow_charged_fragments, 495 | use_graph=use_graph) 496 | 497 | # add BO connectivity and charge info to mol object 498 | mol = BO2mol( 499 | mol, 500 | BO, 501 | atoms, 502 | atomic_valence_electrons, 503 | charge, 504 | allow_charged_fragments=allow_charged_fragments, 505 | use_atom_maps=use_atom_maps) 506 | 507 | # If charge is not correct don't return mol 508 | if Chem.GetFormalCharge(mol) != charge: 509 | return [] 510 | 511 | # BO2mol returns an arbitrary resonance form. Let's make the rest 512 | mols = rdchem.ResonanceMolSupplier(mol, Chem.UNCONSTRAINED_CATIONS, Chem.UNCONSTRAINED_ANIONS) 513 | mols = [mol for mol in mols] 514 | 515 | return mols 516 | 517 | 518 | def get_proto_mol(atoms): 519 | """ 520 | """ 521 | mol = Chem.MolFromSmarts("[#" + str(atoms[0]) + "]") 522 | rwMol = Chem.RWMol(mol) 523 | for i in range(1, len(atoms)): 524 | a = Chem.Atom(atoms[i]) 525 | rwMol.AddAtom(a) 526 | 527 | mol = rwMol.GetMol() 528 | 529 | return mol 530 | 531 | 532 | def read_xyz_file(filename, look_for_charge=True): 533 | """ 534 | """ 535 | 536 | atomic_symbols = [] 537 | xyz_coordinates = [] 538 | charge = 0 539 | title = "" 540 | 541 | with open(filename, "r") as file: 542 | for line_number, line in enumerate(file): 543 | if line_number == 0: 544 | num_atoms = int(line) 545 | elif line_number == 1: 546 | title = line 547 | if "charge=" in line: 548 | charge = int(line.split("=")[1]) 549 | else: 550 | atomic_symbol, x, y, z = line.split() 551 | atomic_symbols.append(atomic_symbol) 552 | xyz_coordinates.append([float(x), float(y), float(z)]) 553 | 554 | atoms = [int_atom(atom) for atom in atomic_symbols] 555 | 556 | return atoms, charge, xyz_coordinates 557 | 558 | 559 | def xyz2AC(atoms, xyz, charge, use_huckel=False): 560 | """ 561 | 562 | atoms and coordinates to atom connectivity (AC) 563 | 564 | args: 565 | atoms - int atom types 566 | xyz - coordinates 567 | charge - molecule charge 568 | 569 | optional: 570 | use_huckel - Use Huckel method for atom connecitivty 571 | 572 | returns 573 | ac - atom connectivity matrix 574 | mol - rdkit molecule 575 | 576 | """ 577 | 578 | if use_huckel: 579 | return xyz2AC_huckel(atoms, xyz, charge) 580 | else: 581 | return xyz2AC_vdW(atoms, xyz) 582 | 583 | 584 | def xyz2AC_vdW(atoms, xyz): 585 | 586 | # Get mol template 587 | mol = get_proto_mol(atoms) 588 | 589 | # Set coordinates 590 | conf = Chem.Conformer(mol.GetNumAtoms()) 591 | for i in range(mol.GetNumAtoms()): 592 | conf.SetAtomPosition(i, (xyz[i][0], xyz[i][1], xyz[i][2])) 593 | mol.AddConformer(conf) 594 | 595 | AC = get_AC(mol) 596 | 597 | return AC, mol 598 | 599 | 600 | def get_AC(mol, covalent_factor=1.3): 601 | """ 602 | 603 | Generate adjacent matrix from atoms and coordinates. 604 | 605 | AC is a (num_atoms, num_atoms) matrix with 1 being covalent bond and 0 is not 606 | 607 | 608 | covalent_factor - 1.3 is an arbitrary factor 609 | 610 | args: 611 | mol - rdkit molobj with 3D conformer 612 | 613 | optional 614 | covalent_factor - increase covalent bond length threshold with facto 615 | 616 | returns: 617 | AC - adjacent matrix 618 | 619 | """ 620 | 621 | # Calculate distance matrix 622 | dMat = Chem.Get3DDistanceMatrix(mol) 623 | 624 | pt = Chem.GetPeriodicTable() 625 | num_atoms = mol.GetNumAtoms() 626 | AC = np.zeros((num_atoms, num_atoms), dtype=int) 627 | 628 | for i in range(num_atoms): 629 | a_i = mol.GetAtomWithIdx(i) 630 | Rcov_i = pt.GetRcovalent(a_i.GetAtomicNum()) * covalent_factor 631 | for j in range(i + 1, num_atoms): 632 | a_j = mol.GetAtomWithIdx(j) 633 | Rcov_j = pt.GetRcovalent(a_j.GetAtomicNum()) * covalent_factor 634 | if dMat[i, j] <= Rcov_i + Rcov_j: 635 | AC[i, j] = 1 636 | AC[j, i] = 1 637 | 638 | return AC 639 | 640 | 641 | def xyz2AC_huckel(atomicNumList, xyz, charge): 642 | """ 643 | 644 | args 645 | atomicNumList - atom type list 646 | xyz - coordinates 647 | charge - molecule charge 648 | 649 | returns 650 | ac - atom connectivity 651 | mol - rdkit molecule 652 | 653 | """ 654 | mol = get_proto_mol(atomicNumList) 655 | 656 | conf = Chem.Conformer(mol.GetNumAtoms()) 657 | for i in range(mol.GetNumAtoms()): 658 | conf.SetAtomPosition(i,(xyz[i][0],xyz[i][1],xyz[i][2])) 659 | mol.AddConformer(conf) 660 | 661 | num_atoms = len(atomicNumList) 662 | AC = np.zeros((num_atoms,num_atoms)).astype(int) 663 | 664 | mol_huckel = Chem.Mol(mol) 665 | mol_huckel.GetAtomWithIdx(0).SetFormalCharge(charge) #mol charge arbitrarily added to 1st atom 666 | 667 | passed,result = rdEHTTools.RunMol(mol_huckel) 668 | opop = result.GetReducedOverlapPopulationMatrix() 669 | tri = np.zeros((num_atoms, num_atoms)) 670 | tri[np.tril(np.ones((num_atoms, num_atoms), dtype=bool))] = opop #lower triangular to square matrix 671 | for i in range(num_atoms): 672 | for j in range(i+1,num_atoms): 673 | pair_pop = abs(tri[j,i]) 674 | if pair_pop >= 0.15: #arbitry cutoff for bond. May need adjustment 675 | AC[i,j] = 1 676 | AC[j,i] = 1 677 | 678 | return AC, mol 679 | 680 | 681 | def chiral_stereo_check(mol): 682 | """ 683 | Find and embed chiral information into the model based on the coordinates 684 | 685 | args: 686 | mol - rdkit molecule, with embeded conformer 687 | 688 | """ 689 | Chem.SanitizeMol(mol) 690 | Chem.DetectBondStereochemistry(mol, -1) 691 | Chem.AssignStereochemistry(mol, flagPossibleStereoCenters=True, force=True) 692 | Chem.AssignAtomChiralTagsFromStructure(mol, -1) 693 | 694 | return 695 | 696 | 697 | def xyz2mol(atoms, coordinates, charge=0, allow_charged_fragments=True, 698 | use_graph=True, use_huckel=False, embed_chiral=True, 699 | use_atom_maps=False): 700 | """ 701 | Generate a rdkit molobj from atoms, coordinates and a total_charge. 702 | 703 | args: 704 | atoms - list of atom types (int) 705 | coordinates - 3xN Cartesian coordinates 706 | charge - total charge of the system (default: 0) 707 | 708 | optional: 709 | allow_charged_fragments - alternatively radicals are made 710 | use_graph - use graph (networkx) 711 | use_huckel - Use Huckel method for atom connectivity prediction 712 | embed_chiral - embed chiral information to the molecule 713 | 714 | returns: 715 | mols - list of rdkit molobjects 716 | 717 | """ 718 | 719 | # Get atom connectivity (AC) matrix, list of atomic numbers, molecular charge, 720 | # and mol object with no connectivity information 721 | AC, mol = xyz2AC(atoms, coordinates, charge, use_huckel=use_huckel) 722 | 723 | # Convert AC to bond order matrix and add connectivity and charge info to 724 | # mol object 725 | new_mols = AC2mol(mol, AC, atoms, charge, 726 | allow_charged_fragments=allow_charged_fragments, 727 | use_graph=use_graph, 728 | use_atom_maps=use_atom_maps) 729 | 730 | # Check for stereocenters and chiral centers 731 | if embed_chiral: 732 | for new_mol in new_mols: 733 | chiral_stereo_check(new_mol) 734 | 735 | return new_mols 736 | 737 | 738 | def main(): 739 | 740 | 741 | return 742 | 743 | 744 | if __name__ == "__main__": 745 | 746 | import argparse 747 | 748 | parser = argparse.ArgumentParser(usage='%(prog)s [options] molecule.xyz') 749 | parser.add_argument('structure', metavar='structure', type=str) 750 | parser.add_argument('-s', '--sdf', 751 | action="store_true", 752 | help="Dump sdf file") 753 | parser.add_argument('--ignore-chiral', 754 | action="store_true", 755 | help="Ignore chiral centers") 756 | parser.add_argument('--no-charged-fragments', 757 | action="store_true", 758 | help="Allow radicals to be made") 759 | parser.add_argument('--no-graph', 760 | action="store_true", 761 | help="Run xyz2mol without networkx dependencies") 762 | 763 | # huckel uses extended Huckel bond orders to locate bonds (requires RDKit 2019.9.1 or later) 764 | # otherwise van der Waals radii are used 765 | parser.add_argument('--use-huckel', 766 | action="store_true", 767 | help="Use Huckel method for atom connectivity") 768 | parser.add_argument('-o', '--output-format', 769 | action="store", 770 | type=str, 771 | help="Output format [smiles,sdf] (default=sdf)") 772 | parser.add_argument('-c', '--charge', 773 | action="store", 774 | metavar="int", 775 | type=int, 776 | help="Total charge of the system") 777 | 778 | args = parser.parse_args() 779 | 780 | # read xyz file 781 | filename = args.structure 782 | 783 | # allow for charged fragments, alternatively radicals are made 784 | charged_fragments = not args.no_charged_fragments 785 | 786 | # quick is faster for large systems but requires networkx 787 | # if you don't want to install networkx set quick=False and 788 | # uncomment 'import networkx as nx' at the top of the file 789 | quick = not args.no_graph 790 | 791 | # chiral comment 792 | embed_chiral = not args.ignore_chiral 793 | 794 | # read atoms and coordinates. Try to find the charge 795 | atoms, charge, xyz_coordinates = read_xyz_file(filename) 796 | 797 | # huckel uses extended Huckel bond orders to locate bonds (requires RDKit 2019.9.1 or later) 798 | # otherwise van der Waals radii are used 799 | use_huckel = args.use_huckel 800 | 801 | # if explicit charge from args, set it 802 | if args.charge is not None: 803 | charge = int(args.charge) 804 | 805 | # Get the molobjs 806 | mols = xyz2mol(atoms, xyz_coordinates, 807 | charge=charge, 808 | use_graph=quick, 809 | allow_charged_fragments=charged_fragments, 810 | embed_chiral=embed_chiral, 811 | use_huckel=use_huckel) 812 | 813 | # Print output 814 | for mol in mols: 815 | if args.output_format == "sdf": 816 | txt = Chem.MolToMolBlock(mol) 817 | print(txt) 818 | 819 | else: 820 | # Canonical hack 821 | isomeric_smiles = not args.ignore_chiral 822 | smiles = Chem.MolToSmiles(mol, isomericSmiles=isomeric_smiles) 823 | m = Chem.MolFromSmiles(smiles) 824 | smiles = Chem.MolToSmiles(m, isomericSmiles=isomeric_smiles) 825 | print(smiles) 826 | --------------------------------------------------------------------------------