├── .github └── workflows │ ├── lint.yml │ └── test.yml ├── .gitignore ├── .travis.yml ├── Colab_notebook ├── psp_Colab_notebook.ipynb └── psp_colab_notebook.py ├── Dockerfile ├── LICENSE ├── LigParGenPSP ├── BOSS2LAMMPS.py ├── BOSSReader.py ├── Converter.py ├── CreatZmat.py ├── README ├── Vector_algebra.py ├── __init__.py ├── fepzmat.py └── mol_boss.py ├── README.md ├── documentation └── PSP_user_manual.pdf ├── psp ├── AmorphousBuilder.py ├── ChainBuilder.py ├── CrystalBuilder.py ├── MD_lib.py ├── MoleculeBuilder.py ├── PSP_lib.py ├── __init__.py ├── output_lib.py └── simulated_annealing.py ├── requirements.txt ├── setup.cfg ├── setup.py └── test ├── .DS_Store ├── AmorphousBuilder ├── amor_model.py ├── amor_model_gaff2.py ├── amor_model_opls.py ├── input_PE.csv └── input_amor.csv ├── ChainBuilder ├── chain_model.py └── input_chain.csv ├── CrystalBuilder ├── crystal_model.py └── input_chain.csv ├── MoleculeBuilder ├── circular_oligomer.csv ├── linear_oligomer.csv ├── linear_oligomer_with_endcaps.csv └── molecule_model.py ├── chain.csv └── test.py /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Linting 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | lint: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | max-parallel: 4 10 | matrix: 11 | python-version: [3.7] 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | with: 16 | fetch-depth: 0 17 | - name: Set up Python ${{ matrix.python-version }} 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | - name: pycodestyle 25 | run: | 26 | pip install pycodestyle --upgrade --quiet 27 | pycodestyle psp 28 | - name: flake8 29 | run: | 30 | pip install flake8 --upgrade --quiet 31 | flake8 --extend-ignore=F841 --count --show-source --statistics psp 32 | # exit-zero treats all errors as warnings. 33 | flake8 --extend-ignore=F841 --count --exit-zero --max-complexity=20 --statistics psp 34 | # Note: enable this when docstrings are ready 35 | # - name: pydocstyle 36 | # run: | 37 | # pip install pydocstyle --upgrade --quiet 38 | # pydocstyle --count psp 39 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Testing - main 2 | 3 | on: [push, pull_request] 4 | 5 | 6 | jobs: 7 | test: 8 | name: Testing (${{ matrix.python-version }}, ${{ matrix.os }}) 9 | runs-on: ${{ matrix.os }} 10 | strategy: 11 | fail-fast: true 12 | matrix: 13 | os: [ 14 | "ubuntu-latest", 15 | #"macos-latest", 16 | # "windows-latest" 17 | ] 18 | python-version: ["3.7"] 19 | steps: 20 | - uses: actions/checkout@v2 21 | - uses: conda-incubator/setup-miniconda@v2 22 | with: 23 | auto-update-conda: true 24 | python-version: ${{ matrix.python-version }} 25 | - name: Install dependencies 26 | shell: bash -l {0} 27 | run: | 28 | conda install -c conda-forge rdkit openbabel==3.1.1 29 | conda install -c conda-forge tqdm 30 | conda install -c conda-forge tabulate 31 | #git clone https://github.com/polysimtools/pysimm 32 | #sudo python pysimm/complete_install.py --pysimm $PWD 33 | #source ~/.bashrc 34 | python -m pip install --upgrade pip 35 | pip install networkx 36 | pip install -e . 37 | - name: pytest 38 | shell: bash -l {0} 39 | run: | 40 | pip install pytest pytest-cov 41 | pytest test/test.py --color=yes --cov=psp --cov-report html:coverage_reports 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # Jupyter Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # SageMath parsed files 79 | *.sage.py 80 | 81 | # dotenv 82 | .env 83 | 84 | # virtualenv 85 | .venv 86 | venv/ 87 | ENV/ 88 | 89 | # Spyder project settings 90 | .spyderproject 91 | .spyproject 92 | 93 | # Rope project settings 94 | .ropeproject 95 | 96 | # mkdocs documentation 97 | /site 98 | 99 | # mypy 100 | .mypy_cache/ 101 | 102 | # IntelliJ environment files 103 | .idea 104 | 105 | # DS Store 106 | .DS_store 107 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.7" 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official Ubuntu image as a parent image 2 | FROM ubuntu:latest 3 | 4 | # Avoid interactive dialog during package installations 5 | ARG DEBIAN_FRONTEND=noninteractive 6 | 7 | # Install necessary packages 8 | RUN apt-get update -y && \ 9 | apt-get install -y vim wget git 10 | 11 | # Set environment variables 12 | ENV CONDA_HOME=/opt/conda 13 | ENV PATH=$CONDA_HOME/bin:$PATH 14 | 15 | # Install Miniconda 16 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \ 17 | bash miniconda.sh -b -p $CONDA_HOME && \ 18 | rm miniconda.sh 19 | 20 | # Set the working directory 21 | WORKDIR /opt 22 | 23 | # Create a Conda environment and install dependencies 24 | RUN /opt/conda/bin/conda create -n myenv -y python=3.8 25 | RUN /opt/conda/bin/conda init bash 26 | RUN echo "conda activate myenv" >> ~/.bashrc 27 | ENV PATH=$CONDA_HOME/envs/myenv/bin:$PATH 28 | 29 | # Install additional Python packages directly 30 | RUN conda install -n myenv -y -c anaconda scipy=1.7 pandas=1.5 'numpy<1.23.0' 31 | RUN /opt/conda/envs/myenv/bin/pip install rdkit 32 | RUN conda install -n myenv -y -c conda-forge openbabel 33 | RUN conda install -n myenv -y anaconda::networkx anaconda::tqdm anaconda::tabulate 34 | 35 | ## Install packmol 36 | # Clone Packmol repository 37 | RUN apt-get install -y build-essential gfortran 38 | RUN git clone https://github.com/m3g/packmol.git /opt/packmol 39 | WORKDIR /opt/packmol 40 | RUN make 41 | 42 | # Set the PACKMOL_EXEC environment variable 43 | ENV PACKMOL_EXEC=/opt/packmol/packmol 44 | 45 | ## Install pysimm 46 | WORKDIR /opt 47 | RUN git clone -b 1.1 --single-branch https://github.com/polysimtools/pysimm 48 | # Set up PYTHONPATH 49 | ENV PYTHONPATH=$PYTHONPATH:/opt/pysimm 50 | # Set up PATH 51 | ENV PATH=$PATH:/opt/pysimm/bin 52 | 53 | ## Install ambertools 54 | RUN conda install -n myenv -y -c conda-forge ambertools 55 | ENV ANTECHAMBER_EXEC=/opt/conda/envs/myenv/bin/antechamber 56 | 57 | ## Install PSP 58 | RUN git clone https://github.com/Ramprasad-Group/PSP.git 59 | WORKDIR /opt/PSP 60 | RUN /opt/conda/envs/myenv/bin/python setup.py install 61 | 62 | # Set up default Python to /opt/conda/envs/myenv/bin/python 63 | RUN echo 'export PATH=/opt/conda/envs/myenv/bin:$PATH' >> /etc/profile.d/python.sh && \ 64 | echo 'alias python=/opt/conda/envs/myenv/bin/python' >> /etc/profile.d/python.sh 65 | 66 | # Set HOME as working directory 67 | WORKDIR /root 68 | 69 | # Copy test files to /root 70 | RUN cp -r /opt/PSP/test/ /root/ 71 | 72 | # Set the default command to run your application 73 | CMD ["bash"] 74 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Ramprasad Group, Georgia Tech, USA 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LigParGenPSP/BOSS2LAMMPS.py: -------------------------------------------------------------------------------- 1 | """ 2 | SCRIPT TO WRITE LAMMPS DATA FILES LMP & INP Files 3 | FROM BOSS ZMATRIX 4 | Created on Mon Sep 30 03:31:05 2017 5 | @author: Leela S. Dodda leela.dodda@yale.edu 6 | @author: William L. Jorgensen Lab 7 | 8 | REQUIREMENTS: 9 | BOSS (need to set BOSSdir in bashrc and cshrc) 10 | Preferably Anaconda python with following modules 11 | pandas 12 | argparse 13 | numpy 14 | """ 15 | 16 | from LigParGenPSP.BOSSReader import bossPdbAtom2Element, ucomb 17 | from LigParGenPSP.BOSSReader import bossElement2Mass, tor_cent 18 | import pickle 19 | import pandas as pd 20 | import numpy as np 21 | 22 | 23 | def Boss2LammpsLMP(resid, num2typ2symb, Qs, bnd_df, ang_df, tor_df, molecule_data): 24 | xyz_df = molecule_data.MolData["XYZ"] 25 | max_mol_size = 50 26 | prm = open(resid + ".lmp", "w+") 27 | prm.write("LAMMPS data file Created by - (Written by Leela S. Dodda)\n\n") 28 | prm.write("%8d atoms\n" % len(Qs)) 29 | prm.write("%8d bonds\n" % len(bnd_df.KIJ)) 30 | prm.write("%8d angles\n" % len(ang_df.K)) 31 | prm.write("%8d dihedrals\n" % len(tor_df[tor_df.TY == "Proper"].index)) 32 | prm.write("%8d impropers\n \n" % len(tor_df[tor_df.TY == "Improper"].index)) 33 | prm.write("%8d atom types\n" % len(Qs)) 34 | prm.write("%8d bond types\n" % len(bnd_df.KIJ)) 35 | prm.write("%8d angle types\n" % len(ang_df.K)) 36 | prm.write("%8d dihedral types\n" % len(tor_df[tor_df.TY == "Proper"].index)) 37 | prm.write("%8d improper types\n \n" % len(tor_df[tor_df.TY == "Improper"].index)) 38 | prm.write( 39 | "%12.6f %12.6f xlo xhi\n" % (xyz_df.X.min(), xyz_df.X.min() + max_mol_size) 40 | ) 41 | prm.write( 42 | "%12.6f %12.6f ylo yhi\n" % (xyz_df.Y.min(), xyz_df.Y.min() + max_mol_size) 43 | ) 44 | prm.write( 45 | "%12.6f %12.6f zlo zhi\n" % (xyz_df.Z.min(), xyz_df.Z.min() + max_mol_size) 46 | ) 47 | # Printing Parameters for ALL BONDS/ANGLES/DIHEDRALS/IMPROPERS/Q/LJ ####### 48 | prm.write("\nMasses\n\n") 49 | for i in range(len(Qs)): 50 | prm.write("%8d %10.3f \n" % (i + 1, float(num2typ2symb[i][4]))) 51 | prm.write("\nPair Coeffs \n\n") 52 | for i in range(len(Qs)): 53 | prm.write("%8d%11.3f%11.7f \n" % (i + 1, float(Qs[i][3]), float(Qs[i][2]))) 54 | prm.write("\nBond Coeffs \n\n") 55 | for i in bnd_df.index: 56 | prm.write("%8d%11.4f%11.4f \n" % (i + 1, bnd_df.KIJ[i], bnd_df.RIJ[i])) 57 | prm.write("\nAngle Coeffs \n\n") 58 | for i in ang_df.index: 59 | prm.write("%8d%11.3f%11.3f\n" % (i + 1, ang_df.K[i], ang_df.R[i])) 60 | dihedral_df = tor_df[tor_df.TY == "Proper"] 61 | dihedral_df.index = range(len(dihedral_df.V1)) 62 | prm.write("\nDihedral Coeffs \n\n") 63 | for i, row in dihedral_df.iterrows(): 64 | prm.write( 65 | "%8d%11.3f%11.3f%11.3f%11.3f \n" % (i + 1, row.V1, row.V2, row.V3, row.V4) 66 | ) 67 | bndlist = list(bnd_df.UR) + (list(bnd_df.UR)) 68 | improper_df = tor_df[tor_df.TY == "Improper"] 69 | improper_df.index = range(len(improper_df.V2)) 70 | if len(improper_df.index) > 0: 71 | prm.write("\nImproper Coeffs \n\n") 72 | for i, row in improper_df.iterrows(): 73 | prm.write("%8d%11.3f%8d%8d \n" % (i + 1, row.V2 * 0.5, -1, 2)) 74 | # Printing EXPLICITLY ALL BONDS/ANGLES/DIHEDRALS/IMPROPERS/Q/LJ ####### 75 | prm.write("\nAtoms \n\n") 76 | for i in range(len(xyz_df.index)): 77 | prm.write( 78 | "%6d %6d %6d %10.8f %8.3f %8.5f %8.5f\n" 79 | % (i + 1, 1, i + 1, float(Qs[i][1]), xyz_df.X[i], xyz_df.Y[i], xyz_df.Z[i]) 80 | ) 81 | prm.write("\nBonds \n\n") 82 | for i in bnd_df.index: 83 | prm.write( 84 | "%6d %6d %6d %6d\n" % (i + 1, i + 1, bnd_df.cl1[i] + 1, bnd_df.cl2[i] + 1) 85 | ) 86 | prm.write("\nAngles \n\n") 87 | for i in ang_df.index: 88 | prm.write( 89 | "%6d %6d %6d %6d %6d\n" 90 | % (i + 1, i + 1, ang_df.cl1[i] + 1, ang_df.cl2[i] + 1, ang_df.cl3[i] + 1) 91 | ) 92 | prm.write("\nDihedrals\n\n") 93 | for i, row in dihedral_df.iterrows(): 94 | prm.write( 95 | "%6d %6d %6d %6d %6d %6d \n" 96 | % (i + 1, i + 1, row.I + 1, row.J + 1, row.K + 1, row.L + 1) 97 | ) 98 | if len(improper_df.index) > 0: 99 | prm.write("\nImpropers\n\n") 100 | for row in improper_df.iterrows(): 101 | index, dat = row 102 | ndata = tor_cent([dat.I, dat.J, dat.K, dat.L], bndlist) 103 | prm.write( 104 | "%6d %6d %6d %6d %6d %6d \n" 105 | % ( 106 | index + 1, 107 | index + 1, 108 | ndata[0] + 1, 109 | ndata[1] + 1, 110 | ndata[2] + 1, 111 | ndata[3] + 1, 112 | ) 113 | ) 114 | return None 115 | 116 | 117 | def Boss2CharmmTorsion(bnd_df, num2opls, st_no, molecule_data, num2typ2symb): 118 | dhd = [] 119 | for line in molecule_data.MolData["TORSIONS"]: 120 | dt = [float(i) for i in line] 121 | dhd.append(dt) 122 | dhd = np.array(dhd) 123 | dhd = dhd # kcal to kj conversion 124 | dhd = dhd # Klammps = Vopls 125 | dhd_df = pd.DataFrame(dhd, columns=["V1", "V2", "V3", "V4"]) 126 | ats = [] 127 | for line in molecule_data.MolData["ATOMS"][3:]: 128 | dt = [line.split()[0], line.split()[4], line.split()[6], line.split()[8]] 129 | dt = [int(d) for d in dt] 130 | ats.append(dt) 131 | for line in molecule_data.MolData["ADD_DIHED"]: 132 | dt = [int(i) for i in line] 133 | ats.append(dt) 134 | assert len(ats) == len( 135 | dhd 136 | ), "Number of Dihedral angles in Zmatrix and Out file dont match" 137 | ats = np.array(ats) - st_no 138 | for i in range(len(ats)): 139 | for j in range(len(ats[0])): 140 | if ats[i][j] < 0: 141 | ats[i][j] = 0 142 | at_df = pd.DataFrame(ats, columns=["I", "J", "K", "L"]) 143 | # final_df = pd.concat([dhd_df, at_df], axis=1, join_axes=[at_df.index]) backup 144 | final_df = pd.concat([dhd_df, at_df], axis=1) 145 | final_df = final_df.reindex(dhd_df.index) 146 | 147 | bndlist = list(bnd_df.UR) + (list(bnd_df.UR)) 148 | final_df["TY"] = [ 149 | "Proper" 150 | if ucomb( 151 | list([final_df.I[n], final_df.J[n], final_df.K[n], final_df.L[n]]), bndlist 152 | ) 153 | == 3 154 | else "Improper" 155 | for n in range(len(final_df.I)) 156 | ] 157 | final_df["TI"] = [num2typ2symb[j][2] for j in final_df.I] 158 | final_df["TJ"] = [num2typ2symb[j][2] for j in final_df.J] 159 | final_df["TK"] = [num2typ2symb[j][2] for j in final_df.K] 160 | final_df["TL"] = [num2typ2symb[j][2] for j in final_df.L] 161 | final_df["SYMB"] = [ 162 | "-".join( 163 | [ 164 | num2typ2symb[final_df.I[i]][0], 165 | num2typ2symb[final_df.J[i]][0], 166 | num2typ2symb[final_df.K[i]][0], 167 | num2typ2symb[final_df.L[i]][0], 168 | ] 169 | ) 170 | for i in final_df.index 171 | ] 172 | if len(final_df.index) > 0: 173 | final_df["NAME"] = ( 174 | final_df.TI + "-" + final_df.TJ + "-" + final_df.TK + "-" + final_df.TL 175 | ) 176 | return final_df 177 | 178 | 179 | def boss2CharmmBond(molecule_data, st_no): 180 | bdat = molecule_data.MolData["BONDS"] 181 | bdat["cl1"] = [x - st_no if not x - st_no < 0 else 0 for x in bdat["cl1"]] 182 | bdat["cl2"] = [x - st_no if not x - st_no < 0 else 0 for x in bdat["cl2"]] 183 | bnd_df = pd.DataFrame(bdat) 184 | bnd_df["UF"] = ( 185 | (bnd_df.cl1 + bnd_df.cl2) * (bnd_df.cl1 + bnd_df.cl2 + 1) * 0.5 186 | ) + bnd_df.cl2 187 | bnd_df["UR"] = ( 188 | (bnd_df.cl1 + bnd_df.cl2) * (bnd_df.cl1 + bnd_df.cl2 + 1) * 0.5 189 | ) + bnd_df.cl1 190 | hb_df = bnd_df.drop(["cl1", "cl2", "UF", "UR"], 1) 191 | hb_df = hb_df.drop_duplicates() 192 | return bnd_df 193 | 194 | 195 | def boss2CharmmAngle(anglefile, num2opls, st_no): 196 | adat = anglefile 197 | adat["cl1"] = [x - st_no if not x - st_no < 0 else 0 for x in adat["cl1"]] 198 | adat["cl2"] = [x - st_no if not x - st_no < 0 else 0 for x in adat["cl2"]] 199 | adat["cl3"] = [x - st_no if not x - st_no < 0 else 0 for x in adat["cl3"]] 200 | ang_df = pd.DataFrame(adat) 201 | ang_df = ang_df[ang_df.K > 0] 202 | ang_df["TY"] = np.array( 203 | [ 204 | num2opls[i] + "-" + num2opls[j] + "-" + num2opls[k] 205 | for i, j, k in zip(ang_df.cl1, ang_df.cl2, ang_df.cl3) 206 | ] 207 | ) 208 | return ang_df 209 | 210 | 211 | def bossData(molecule_data): 212 | ats_file = molecule_data.MolData["ATOMS"] 213 | types = [] 214 | for i in enumerate(ats_file): 215 | types.append([i[1].split()[1], "opls_" + i[1].split()[2]]) 216 | st_no = 3 217 | Qs = molecule_data.MolData["Q_LJ"] 218 | assert len(Qs) == len(types), "Please check the at_info and Q_LJ_dat files" 219 | num2opls = {} 220 | for i in range(0, len(types)): 221 | num2opls[i] = Qs[i][0] 222 | num2typ2symb = {i: types[i] for i in range(len(Qs))} 223 | for i in range(len(Qs)): 224 | num2typ2symb[i].append( 225 | bossPdbAtom2Element(num2typ2symb[i][0]) + num2typ2symb[i][1][-3:] 226 | ) 227 | num2typ2symb[i].append(bossPdbAtom2Element(num2typ2symb[i][0])) 228 | num2typ2symb[i].append(bossElement2Mass(num2typ2symb[i][3])) 229 | num2typ2symb[i].append(Qs[i][0]) 230 | return (types, Qs, num2opls, st_no, num2typ2symb) 231 | 232 | 233 | def Boss2Lammps(resid, molecule_data): 234 | types, Qs, num2opls, st_no, num2typ2symb = bossData(molecule_data) 235 | bnd_df = boss2CharmmBond(molecule_data, st_no) 236 | ang_df = boss2CharmmAngle(molecule_data.MolData["ANGLES"], num2opls, st_no) 237 | tor_df = Boss2CharmmTorsion(bnd_df, num2opls, st_no, molecule_data, num2typ2symb) 238 | Boss2LammpsLMP(resid, num2typ2symb, Qs, bnd_df, ang_df, tor_df, molecule_data) 239 | return None 240 | 241 | 242 | def mainBOSS2LAMMPS(resid, clu=False): 243 | mol = pickle.load(open(resid + ".p", "rb")) 244 | Boss2Lammps(resid, mol) 245 | return None 246 | -------------------------------------------------------------------------------- /LigParGenPSP/BOSSReader.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import numpy as np 4 | from LigParGenPSP.mol_boss import new_mol_info 5 | import pandas as pd 6 | from collections import OrderedDict 7 | 8 | from LigParGenPSP.fepzmat import BCC_file2zmat 9 | import warnings 10 | 11 | warnings.simplefilter(action='ignore', category=FutureWarning) 12 | 13 | 14 | def VerifyMolandSave(mol, charge, resname): 15 | if mol is not None: 16 | import pickle 17 | 18 | assert ( 19 | mol.MolData['TotalQ']['Reference-Solute'] == charge 20 | ), "PROPOSED CHARGE IS NOT POSSIBLE: SOLUTE MAY BE AN OPEN SHELL" 21 | pickle.dump(mol, open(resname + ".p", "wb")) 22 | else: 23 | print('Problem Detected Molecule Object Not created') 24 | return None 25 | 26 | 27 | def LinCheck(fname): 28 | imp_dat = 0 29 | zlines = open(fname, 'r').readlines() 30 | for i in range(len(zlines)): 31 | if 'Geometry Variations follow ' in zlines[i]: 32 | imp_dat = i 33 | Atypes = [] 34 | for i in zlines[1:imp_dat]: 35 | Atypes.append(i.split()[2]) 36 | Atypes = np.array(Atypes, dtype=np.int) 37 | Atypes = Atypes[Atypes < 0] 38 | Check = False 39 | if len(Atypes) > 2: 40 | Check = True 41 | return Check 42 | 43 | 44 | def mod_add_diheds(line): 45 | adihed = [int(i) for i in line.split()[0:4]] + [-1, -1] 46 | return adihed 47 | 48 | 49 | def fix_add_dihed(zmat_name): 50 | flines = open('%s.z' % zmat_name, 'r').readlines() 51 | imp_lines = [] 52 | for i in range(len(flines)): 53 | if 'Additional Dihedrals follow' in flines[i]: 54 | imp_lines.append(i + 1) 55 | elif 'Domain Definitions follow' in flines[i]: 56 | imp_lines.append(i) 57 | ofile = open('%s_fixed.z' % zmat_name, 'w+') 58 | for line in flines[0: imp_lines[0]]: 59 | ofile.write('%s\n' % (line.rstrip())) 60 | for line in flines[imp_lines[0]: imp_lines[1]]: 61 | m_ad = mod_add_diheds(line) 62 | ofile.write( 63 | '%4d%4d%4d%4d%4d%4d\n' 64 | % (m_ad[0], m_ad[1], m_ad[2], m_ad[3], m_ad[4], m_ad[5]) 65 | ) 66 | for line in flines[imp_lines[1]:]: 67 | ofile.write('%s\n' % line.rstrip()) 68 | ofile.close() 69 | return None 70 | 71 | 72 | def CheckForHs(atoms): 73 | atype = [line.split()[1][0] for line in atoms] 74 | ans = False 75 | if 'H' in atype: 76 | ans = True 77 | return ans 78 | 79 | 80 | def bcc_db(): 81 | ''' 82 | 19 LBCCs from 1.14*CM1A-LBCC paper 83 | ''' 84 | lbcc = { 85 | 'C#-C=': 0.0, 86 | 'C-N': 0.0, 87 | 'C-O': 0.05, 88 | 'C-OE': 0.0, 89 | 'C-OH': 0.0, 90 | 'C-OS': 0.0, 91 | 'CA-Br': 0.19, 92 | 'CA-C': 0.0, 93 | 'CA-C!': -0.0, 94 | 'CA-C=': 0.0, 95 | 'CA-CB': -0.0, 96 | 'CA-CE': 0.0, 97 | 'CA-CF': 0.0, 98 | 'CA-CK': -0.0, 99 | 'CA-CT': 0.0, 100 | 'CA-CZ': 0.0, 101 | 'CA-CZA': 0.0, 102 | 'CA-Cl': 0.0, 103 | 'CA-F': 0.13, 104 | 'CA-I': 0.0, 105 | 'CA-N3': 0.0, 106 | 'CA-NC': 0.07, 107 | 'CA-NO': -0.08, 108 | 'CA-NP': 0.06, 109 | 'CA-NS': 0.0, 110 | 'CA-OH': 0.22, 111 | 'CA-OS': -0.0, 112 | 'CA-S': -0.0, 113 | 'CA-SH': -0.0, 114 | 'CAM-CA': 0.0, 115 | 'CAM-CT': 0.0, 116 | 'CAM-N': 0.0, 117 | 'CAM-O': 0.0, 118 | 'CB-C=': -0.0, 119 | 'CB-NC': -0.0, 120 | 'CE-O': -0.0, 121 | 'CE-OE': 0.0, 122 | 'CE-OS': 0.0, 123 | 'CF-F': -0.0, 124 | 'CF-OS': -0.0, 125 | 'CK-O': -0.0, 126 | 'CM-C': 0.0, 127 | 'CM-C=': -0.0, 128 | 'CM-CT': -0.0, 129 | 'CM-Cl': -0.0, 130 | 'CP-CS': 0.0, 131 | 'CP-SA': -0.0, 132 | 'CT-Br': 0.08, 133 | 'CT-C': -0.0, 134 | 'CT-C=': 0.0, 135 | 'CT-CE': -0.0, 136 | 'CT-CF': 0.0, 137 | 'CT-CK': -0.0, 138 | 'CT-CP': 0.0, 139 | 'CT-CZ': -0.0, 140 | 'CT-CZT': -0.0, 141 | 'CT-Cl': 0.1, 142 | 'CT-F': -0.0, 143 | 'CT-I': -0.0, 144 | 'CT-N': -0.0, 145 | 'CT-N3': -0.0, 146 | 'CT-NO': 0.0, 147 | 'CT-NP': 0.04, 148 | 'CT-NS': -0.0, 149 | 'CT-NT': -0.0, 150 | 'CT-OE': -0.0, 151 | 'CT-OH': 0.1, 152 | 'CT-OS': -0.0, 153 | 'CT-S': 0.08, 154 | 'CT-SH': 0.175, 155 | 'CT-SZ': 0.0, 156 | 'CY-C': 0.0, 157 | 'CY-CE': 0.0, 158 | 'CZ-NZ': -0.0, 159 | 'CZA-NZ': 0.09, 160 | 'CZT-NZ': 0.03, 161 | 'H-N': -0.0, 162 | 'H-N3': -0.0, 163 | 'H-NP': -0.05, 164 | 'H-NS': -0.0, 165 | 'H-NT': -0.0, 166 | 'HA-CA': -0.01, 167 | 'HA-CM': 0.0, 168 | 'HA-CP': -0.0, 169 | 'HA-CS': -0.0, 170 | 'HC-C': 0.0, 171 | 'HC-C#': -0.0, 172 | 'HC-C=': -0.0, 173 | 'HC-CAM': 0.0, 174 | 'HC-CE': 0.0, 175 | 'HC-CF': -0.0, 176 | 'HC-CM': -0.0, 177 | 'HC-CT': 0.0, 178 | 'HC-CY': 0.0, 179 | 'HC-CZ': -0.0, 180 | 'HO-OH': 0.0, 181 | 'HS-SH': 0.0, 182 | 'NO-ON': -0.18, 183 | 'O-P': 0.0, 184 | 'OS-P': 0.0, 185 | 'OY-SZ': 0.06, 186 | 'U-U': 0.0, 187 | 'X-X': 0.0, 188 | } 189 | db = OrderedDict(lbcc) 190 | return db 191 | 192 | 193 | def Refine_PDB_file(fname): 194 | flines = open(fname, 'r+').readlines() 195 | pdb_lines = [] 196 | for line in flines: 197 | if ('ATOM' in line) or ('HETATM' in line): 198 | line = line.rstrip() 199 | line = line.lstrip() 200 | if 'DUM' not in line: 201 | pdb_lines.append(line) 202 | return pdb_lines 203 | 204 | 205 | def get_coos_from_pdb(pdb_dat): 206 | atoms = [] 207 | coos = [] 208 | for line in pdb_dat: 209 | atom = line.split()[2] 210 | x, y, z = line[28:56].split() 211 | atoms.append(atom) 212 | coos.append([float(x), float(y), float(z)]) 213 | return (atoms, coos) 214 | 215 | 216 | def pairing_func(a, b): 217 | ans = (a + b) * (a + b + 1) * 0.5 218 | if a > b: 219 | ans = ans + a 220 | pans = '%6d%6d' % (b, a) 221 | else: 222 | ans = ans + b 223 | pans = '%6d%6d' % (a, b) 224 | return (int(ans), pans) 225 | 226 | 227 | def ucomb(vec, blist): 228 | res = 0 229 | for a in vec: 230 | vec.remove(a) 231 | for b in vec: 232 | ans = (a + b) * (a + b + 1) * 0.5 233 | if (ans + a in blist) or (ans + b in blist): 234 | res = res + 1 235 | return res 236 | 237 | 238 | def tor_cent(vec, blist): 239 | db = {} 240 | for a in vec: 241 | na = 0 242 | for b in vec: 243 | ans = (a + b) * (a + b + 1) * 0.5 244 | if (ans + a in blist) or (ans + b in blist): 245 | na += 1 246 | db[a] = na 247 | new_vec = list(sorted(db, key=db.__getitem__, reverse=True)) 248 | return new_vec 249 | 250 | 251 | def bossPdbAtom2Element(attype): 252 | elem = ''.join([i for i in attype[:-1] if not i.isdigit()]) 253 | return elem 254 | 255 | 256 | def bossElement2Mass(elem): 257 | symb2mass = { 258 | 'H': 1.008, 259 | 'F': 18.998403163, 260 | 'Cl': 35.45, 261 | 'Br': 79.904, 262 | 'I': 126.90447, 263 | 'O': 15.999, 264 | 'S': 32.06, 265 | 'N': 14.007, 266 | 'P': 30.973761998, 267 | 'C': 12.011, 268 | 'Si': 28.085, 269 | 'Na': 22.98976928, 270 | 'SOD': 22.98976928, 271 | 'K': 39.0983, 272 | 'Mg': 24.305, 273 | 'Ca': 40.078, 274 | 'Mn': 54.938044, 275 | 'Fe': 55.845, 276 | 'Co': 58.933194, 277 | 'Ni': 58.6934, 278 | 'Cu': 63.546, 279 | 'Zn': 65.38, 280 | } 281 | try: 282 | res = symb2mass[elem] 283 | except NameError: 284 | print("Mass for atom %s is not available \n add it to symb2mass dictionary") 285 | return res 286 | 287 | 288 | def Refine_file(fname): 289 | flines = open(fname, 'r+') 290 | lines = [] 291 | for line in flines: 292 | if line.rstrip(): 293 | line = line.rstrip() 294 | line = line.lstrip() 295 | lines.append(line) 296 | flines.close() 297 | return lines 298 | 299 | 300 | class BOSSReader(object): 301 | def __init__(self, zmatrix, outdir, optim, charge=0, lbcc=False): 302 | self.zmat = zmatrix 303 | self.outdir = outdir 304 | self.impDat = {} 305 | self.MolData = {} 306 | self.refine_data(optim, charge, lbcc) 307 | 308 | def Get_OPT(self, optim, charge): 309 | assert os.path.isfile(self.zmat), 'File named %10s does not exist' % self.zmat 310 | assert ('BOSSdir' in os.environ) and os.path.isfile( 311 | (os.environ['BOSSdir'] + '/scripts/xZCM1A') 312 | ), 'Please Make sure $BOSSdir is defined \n xZCM1A and related files are in scripts directory of BOSS' 313 | execs = { 314 | 2: os.environ['BOSSdir'] + '/scripts/xZCM1A+2 > olog', 315 | 1: os.environ['BOSSdir'] + '/scripts/xZCM1A+ > olog', 316 | 0: os.environ['BOSSdir'] + '/scripts/xZCM1A > olog', 317 | -1: os.environ['BOSSdir'] + '/scripts/xZCM1A- > olog', 318 | -2: os.environ['BOSSdir'] + '/scripts/xZCM1A-2 > olog', 319 | } 320 | # print('MOLECULE HAS A CHARGE of %d' % charge) 321 | if optim > 0: 322 | print('Optimization level requested %d' % optim) 323 | for opt_lev in range(optim): 324 | print('Performing Stage %d of Charge Generation' % (opt_lev + 1)) 325 | execfile = execs[charge] 326 | coma = execfile + ' ' + self.zmat[:-2] 327 | os.system(coma) 328 | os.system('cp sum %s' % (self.zmat)) 329 | execfile = os.environ['BOSSdir'] + '/scripts/xOPT > olog' 330 | coma = execfile + ' ' + self.zmat[:-2] 331 | os.system(coma) 332 | # os.system('cd ' + self.outdir +';/bin/cp sum %s' % (self.zmat)) 333 | os.system('/bin/cp sum %s' % (self.zmat)) 334 | execfile = os.environ['BOSSdir'] + '/scripts/xSPM > olog' 335 | coma = execfile + ' ' + self.zmat[:-2] 336 | os.system(coma) 337 | # os.system('cd ' + self.outdir + ';/bin/cp sum %s' % (self.zmat)) 338 | os.system('/bin/cp sum %s' % (self.zmat)) 339 | return None 340 | 341 | def get_addihed(self, data): 342 | add = [] 343 | nadd = 0 344 | for line in data: 345 | if line[0].isdigit(): 346 | add.append(line.split()[0:4]) 347 | nadd = nadd + 1 348 | return add 349 | 350 | def get_atinfo(self, data): 351 | ats = [] 352 | nat = 0 353 | for line in data: 354 | if line[0].isdigit() and float(line.split()[2]) > 1: 355 | ats.append(line) 356 | nat += 1 357 | return ats 358 | 359 | def get_charge(self, data): 360 | TotQ = {} 361 | for line in data[1:]: 362 | words = line.split() 363 | TotQ['-'.join(words[:-1])] = round(float(words[-1]), 3) 364 | return TotQ 365 | 366 | def get_tors(self, data): 367 | tors = [] 368 | ntor = 0 369 | for line in data: 370 | if 'All Solutes' in line: 371 | tors.append(line.split()[4:8]) 372 | for tor in line.split()[4:8]: 373 | if abs(float(tor)) > 0.0: 374 | ntor = ntor + 1 375 | return tors 376 | 377 | def get_QLJ(self, data): 378 | qlj = [] 379 | nqlj = 0 380 | for line in data: 381 | if 'All Solutes' in line and line[0].isalpha(): 382 | qlj.append( 383 | [line.split()[0], line.split()[2], line.split()[3], line.split()[4]] 384 | ) 385 | nqlj += 1 386 | return qlj 387 | 388 | def get_angs(self, data): 389 | angs = {'cl1': [], 'cl2': [], 'cl3': [], 'R': [], 'K': []} 390 | nang = 0 391 | for line in data: 392 | if line[0].isdigit() and float(line.split()[4]) > 0: 393 | word = line.split() 394 | angs['cl1'].append(int(word[0])) 395 | angs['cl2'].append(int(word[1])) 396 | angs['cl3'].append(int(word[2])) 397 | angs['R'].append(float(word[3])) 398 | angs['K'].append(float(word[4])) 399 | nang = nang + 1 400 | # print 'Total No of Non-zero Angles in BOSS is %d' % (nang) 401 | return angs 402 | 403 | def get_XYZ(self, data): 404 | XYZ = {'at_num': [], 'X': [], 'Y': [], 'Z': [], 'at_symb': []} 405 | for line in data: 406 | if line[0].isdigit() and len(line.split()) == 5: 407 | word = line.split() 408 | if int(word[0]) > 0: 409 | XYZ['at_num'].append(int(word[0])) 410 | XYZ['X'].append(float(word[1])) 411 | XYZ['Y'].append(float(word[2])) 412 | XYZ['Z'].append(float(word[3])) 413 | XYZ['at_symb'].append(word[4]) 414 | XYZ = pd.DataFrame(XYZ) 415 | return XYZ 416 | 417 | def get_pairs(self, data): 418 | data = data[1:] 419 | plnos = [] 420 | for i in range(0, len(data)): 421 | if 'Atom' in data[i]: 422 | plnos.append(i) 423 | plnos.append(len(data)) 424 | pair_dat = { 425 | i: ' '.join(data[plnos[i]: plnos[i + 1]]) for i in range(len(plnos) - 1) 426 | } 427 | for nu in range(len(plnos) - 1): 428 | pair_dat[nu] = list(pair_dat[nu][10:].split()) 429 | pair_dat[nu] = np.array([int(a) - 2 for a in pair_dat[nu]]) 430 | pairs = [] 431 | for k in pair_dat.keys(): 432 | for j in pair_dat[k]: 433 | pairs.append('%6d%6d%6d\n' % (k - 1, j, 1)) 434 | return pairs 435 | 436 | def get_bonds(self, data): 437 | bnds = {'cl1': [], 'cl2': [], 'RIJ': [], 'KIJ': [], 'TIJ': []} 438 | nbnd = 0 439 | for line in data: 440 | if line[0].isdigit() and float(line.split()[3]) > 0: 441 | word = line.split() 442 | bnds['cl1'].append(int(word[0])) 443 | bnds['cl2'].append(int(word[1])) 444 | bnds['RIJ'].append(float(word[2])) 445 | bnds['KIJ'].append(float(word[3])) 446 | bnds['TIJ'].append(line[-5:]) 447 | nbnd += 1 448 | return bnds 449 | 450 | def prep_lbcc(self, bond_data, qdata): 451 | db = bcc_db() 452 | bnd_df = pd.DataFrame(bond_data) 453 | bnd_df = bnd_df[['cl1', 'cl2']] 454 | bnd_df.columns = ['I', 'J'] 455 | q_df = pd.DataFrame(columns=['TY', 'Q']) 456 | q_df.loc[0] = ['1', 0.000] 457 | q_df.loc[1] = ['2', 0.000] 458 | for i in range(len(qdata)): 459 | q_df.loc[i + 2] = [qdata[i][0], float(qdata[i][1])] 460 | bond, cha, QBC1 = new_mol_info(db, q_df, bnd_df) 461 | lbcc_qdat = [] 462 | for i in range(len(qdata)): 463 | lbcc_qdat.append( 464 | [qdata[i][0], str(cha.QBCC.values[i]), qdata[i][2], qdata[i][3]] 465 | ) 466 | bond.to_csv('LBCC_BONDS.csv', index=False) 467 | cha.to_csv('LBCC_CHARGES.csv', index=False) 468 | return np.array(cha.QBCC), lbcc_qdat 469 | 470 | def cleanup(self): 471 | # os.system('cd ' + self.outdir + ';/bin/rm sum log olog out plt.pdb') 472 | os.system('/bin/rm sum log olog out plt.pdb') 473 | 474 | def get_ImpDat(self, optim, charge): 475 | self.Get_OPT(optim, charge) 476 | odat = Refine_file('out') 477 | sdat = Refine_file('sum') 478 | MolData = {} 479 | impDat = {} 480 | MolData['PDB'] = Refine_file('plt.pdb') 481 | for nl in range(len(odat)): 482 | if 'Z-Matrix for Reference Solutes' in odat[nl]: 483 | impDat['ATMinit'] = nl 484 | elif 'Net Charge' in odat[nl]: 485 | impDat['TotalQ'] = nl 486 | elif 'OPLS Force Field Parameters' in odat[nl]: 487 | impDat['ATMfinal'] = nl 488 | impDat['NBDinit'] = nl 489 | elif 'Fourier Coefficients' in odat[nl]: 490 | impDat['TORinit'] = nl 491 | impDat['NBDfinal'] = nl 492 | elif 'Bond Stretching Parameters' in odat[nl]: 493 | impDat['TORfinal'] = nl 494 | impDat['BNDinit'] = nl 495 | elif 'Angle Bending Parameters' in odat[nl]: 496 | impDat['BNDfinal'] = nl 497 | impDat['ANGinit'] = nl 498 | elif 'Non-bonded Pairs List' in odat[nl]: 499 | impDat['ANGfinal'] = nl 500 | impDat['PAIRinit'] = nl 501 | elif 'Solute 0: X Y Z' in odat[nl]: 502 | impDat['XYZinit'] = nl 503 | elif 'Atom I Atom J RIJ' in odat[nl]: 504 | impDat['XYZfinal'] = nl 505 | elif 'Checking' in odat[nl]: 506 | impDat['PAIRfinal'] = nl 507 | # THIS PART IS READ FROM SUM FILE ### 508 | for ml in range(len(sdat)): 509 | if 'Additional Dihedrals follow' in sdat[ml]: 510 | impDat['ADDinit'] = ml 511 | elif 'Domain Definitions follow' in sdat[ml]: 512 | impDat['ADDfinal'] = ml 513 | # THIS PART IS READ FROM SUM FILE ### 514 | MolData['ATOMS'] = self.get_atinfo(odat[impDat['ATMinit']: impDat['ATMfinal']]) 515 | MolData['Q_LJ'] = self.get_QLJ(odat[impDat['NBDinit']: impDat['NBDfinal']]) 516 | MolData['BONDS'] = self.get_bonds(odat[impDat['BNDinit']: impDat['BNDfinal']]) 517 | MolData['ANGLES'] = self.get_angs(odat[impDat['ANGinit']: impDat['ANGfinal']]) 518 | MolData['TORSIONS'] = self.get_tors( 519 | odat[impDat['TORinit']: impDat['TORfinal']] 520 | ) 521 | MolData['ADD_DIHED'] = self.get_addihed( 522 | sdat[impDat['ADDinit']: impDat['ADDfinal']] 523 | ) 524 | MolData['XYZ'] = self.get_XYZ(odat[impDat['XYZinit']: impDat['XYZfinal']]) 525 | MolData['PAIRS'] = self.get_pairs( 526 | odat[impDat['PAIRinit']: impDat['PAIRfinal']] 527 | ) 528 | MolData['TotalQ'] = self.get_charge( 529 | odat[impDat['TotalQ']: impDat['TotalQ'] + 4] 530 | ) 531 | return MolData 532 | 533 | def refine_data(self, optim, charge, lbcc): 534 | if lbcc and (charge == 0): 535 | lbcc_MD = self.get_ImpDat(optim, charge) 536 | QLBCC, DATA_Q_LJ = self.prep_lbcc(lbcc_MD['BONDS'], lbcc_MD['Q_LJ']) 537 | lbcc_MD['Q_LJ'] = DATA_Q_LJ 538 | BCC_file2zmat(self.zmat, QLBCC, oname='%s_BCC.z' % self.zmat[:-2]) 539 | os.system('mv %s.z %s_NO_LBCC.z' % (self.zmat[:-2], self.zmat[:-2])) 540 | os.system('mv %s_BCC.z %s.z' % (self.zmat[:-2], self.zmat[:-2])) 541 | self.MolData = lbcc_MD 542 | elif lbcc and (charge != 0): 543 | print('LBCC IS SUPPORTED ONLY FOR NEUTRAL MOLECULES') 544 | else: 545 | self.MolData = self.get_ImpDat(optim, charge) 546 | return None 547 | -------------------------------------------------------------------------------- /LigParGenPSP/Converter.py: -------------------------------------------------------------------------------- 1 | from LigParGenPSP.BOSSReader import BOSSReader, CheckForHs 2 | from LigParGenPSP.BOSS2LAMMPS import mainBOSS2LAMMPS 3 | from LigParGenPSP.CreatZmat import GenMolRep 4 | import argparse 5 | import pickle 6 | import os 7 | from openbabel import openbabel as ob 8 | 9 | obConversion = ob.OBConversion() 10 | obConversion.SetInAndOutFormats("pdb", "mol") 11 | 12 | 13 | def main(): 14 | 15 | parser = argparse.ArgumentParser( 16 | prog='LigParGenPSP', 17 | formatter_class=argparse.RawDescriptionHelpFormatter, 18 | description=""" 19 | Ligand Parameter Generator Based on 20 | Jorgensen group's OPLS-AA/CM1A(-LBCC) FF 21 | Created on Mon Feb 15 15:40:05 2016 22 | @author: Leela S. Dodda leela.dodda@yale.edu 23 | @author: William L. Jorgensen Lab 24 | 25 | FF formats provided : 26 | -------------------- 27 | OpenMM .xml 28 | CHARMM/NAMD .prm & .rtf 29 | GROMACS .itp & .gro 30 | CNS/X-PLOR .param & .top 31 | Q .Q.prm & .Q.lib 32 | DESMOND .cms 33 | BOSS/MCPRO .z 34 | PDB2PQR .pqr 35 | 36 | Input Files supported : 37 | -------------------- 38 | SMILES code 39 | PDB 40 | MDL MOL Format 41 | 42 | ################################################ 43 | if using MOL file 44 | Usage: LigParGenPSP -m phenol.mol -r PHN -c 0 -o 0 45 | 46 | if using PDB file 47 | Usage: LigParGenPSP -p phenol.pdb -r PHN -c 0 -o 0 48 | 49 | if using BOSS SMILES CODE 50 | Usage: LigParGenPSP -s 'c1ccc(cc1)O' -r PHN -c 0 -o 0 51 | 52 | REQUIREMENTS: 53 | BOSS (need to set BOSSdir in bashrc and cshrc) 54 | Preferably Anaconda python with following modules 55 | pandas 56 | argparse 57 | numpy 58 | openbabel 59 | 60 | Please cite following references: 61 | 1. LigParGen web server: an automatic OPLS-AA parameter generator for organic ligands 62 | Leela S. Dodda Israel Cabeza de Vaca Julian Tirado-Rives William L. Jorgensen 63 | Nucleic Acids Research, Volume 45, Issue W1, 3 July 2017, Pages W331–W336 64 | 2. 1.14*CM1A-LBCC: Localized Bond-Charge Corrected CM1A Charges for Condensed-Phase Simulations 65 | Leela S. Dodda, Jonah Z. Vilseck, Julian Tirado-Rives , and William L. Jorgensen 66 | Department of Chemistry, Yale University, New Haven, Connecticut 06520-8107, United States 67 | J. Phys. Chem. B, 2017, 121 (15), pp 3864–3870 68 | 3. Accuracy of free energies of hydration using CM1 and CM3 atomic charges. 69 | Udier–Blagović, M., Morales De Tirado, P., Pearlman, S. A. and Jorgensen, W. L. 70 | J. Comput. Chem., 2004, 25,1322–1332. doi:10.1002/jcc.20059 71 | """, 72 | ) 73 | parser.add_argument("-r", "--resname", help="Residue name from PDB FILE", type=str) 74 | parser.add_argument( 75 | "-s", "--smiles", help="Paste SMILES code from CHEMSPIDER or PubChem", type=str 76 | ) 77 | parser.add_argument( 78 | "-m", "--mol", help="Submit MOL file from CHEMSPIDER or PubChem", type=str 79 | ) 80 | parser.add_argument( 81 | "-p", "--pdb", help="Submit PDB file from CHEMSPIDER or PubChem", type=str 82 | ) 83 | parser.add_argument( 84 | "-o", 85 | "--opt", 86 | help="Optimization or Single Point Calculation", 87 | type=int, 88 | choices=[0, 1, 2, 3], 89 | ) 90 | parser.add_argument( 91 | "-c", 92 | "--charge", 93 | type=int, 94 | choices=[0, -1, 1, -2, 2], 95 | help="0: Neutral <0: Anion >0: Cation ", 96 | ) 97 | parser.add_argument( 98 | "-l", 99 | "--lbcc", 100 | help="Use 1.14*CM1A-LBCC charges instead of 1.14*CM1A", 101 | action="store_true", 102 | ) 103 | parser.add_argument( 104 | "-d", "--outdir", help="PATH for output directory", type=str, default='.' 105 | ) 106 | args = parser.parse_args() 107 | 108 | convert(**vars(args)) 109 | 110 | 111 | def convert(**kwargs): 112 | 113 | # set the default values 114 | options = { 115 | 'opt': 0, 116 | 'smiles': None, 117 | 'zmat': None, 118 | 'charge': 0, 119 | 'lbcc': False, 120 | 'mol': None, 121 | 'resname': 'UNK', 122 | 'pdb': None, 123 | } 124 | 125 | # update the default values based on the arguments 126 | options.update(kwargs) 127 | 128 | # set the arguments that you would used to get from argparse 129 | opt = options['opt'] 130 | smiles = options['smiles'] 131 | # zmat = options['zmat'] 132 | charge = options['charge'] 133 | lbcc = options['lbcc'] 134 | resname = options['resname'] 135 | mol = options['mol'] 136 | pdb = options['pdb'] 137 | outdir = options['outdir'] 138 | if opt is not None: 139 | optim = opt 140 | else: 141 | optim = 0 142 | 143 | clu = False 144 | 145 | # assert (which('obabel') 146 | # is not None), "OpenBabel is Not installed or \n the executable location is not accessable" 147 | if os.path.exists(outdir + resname + '.xml'): 148 | os.system('/bin/rm ' + outdir + resname + '.*') 149 | if lbcc: 150 | if charge == 0: 151 | lbcc = True 152 | print('LBCC converter is activated') 153 | else: 154 | lbcc = False 155 | print( 156 | '1.14*CM1A-LBCC is only available for neutral molecules\n Assigning unscaled CM1A charges' 157 | ) 158 | 159 | if smiles is not None: 160 | os.chdir(outdir) 161 | smifile = open('%s.smi' % resname, 'w+') 162 | smifile.write('%s' % smiles) 163 | smifile.close() 164 | GenMolRep('%s.smi' % resname, optim, resname, charge) 165 | mol = BOSSReader('%s.z' % resname, '%s' % outdir, optim, charge, lbcc) 166 | elif mol is not None: 167 | if not os.path.exists(os.path.join(outdir, mol)): 168 | os.system('cp %s %s' % (mol, outdir)) 169 | os.chdir(outdir) 170 | GenMolRep(mol, optim, resname, charge) 171 | mol = BOSSReader('%s.z' % resname, '%s' % outdir, optim, charge, lbcc) 172 | elif pdb is not None: 173 | if not os.path.exists(os.path.join(outdir, pdb)): 174 | os.system('cp %s %s' % (pdb, outdir)) 175 | os.chdir(outdir) 176 | # Convert pdb to mol using Obabelv3 177 | mole = ob.OBMol() 178 | obConversion.ReadFile(mole, pdb) 179 | mol = pdb.replace('pdb', 'mol') 180 | obConversion.WriteFile(mole, mol) 181 | GenMolRep(mol, optim, resname, charge) 182 | mol = BOSSReader('%s.z' % resname, '%s' % outdir, optim, charge, lbcc) 183 | clu = True 184 | assert ( 185 | mol.MolData['TotalQ']['Reference-Solute'] == charge 186 | ), "PROPOSED CHARGE IS NOT POSSIBLE: SOLUTE MAY BE AN OPEN SHELL" 187 | assert CheckForHs( 188 | mol.MolData['ATOMS'] 189 | ), "Hydrogens are not added. Please add Hydrogens" 190 | 191 | pickle.dump(mol, open(resname + ".p", "wb")) 192 | mainBOSS2LAMMPS(resname, clu) 193 | print('DONE WITH LAMMPS') 194 | 195 | # Cleanup 196 | list_files = [ 197 | "sum", 198 | "log", 199 | "olog", 200 | "out", 201 | "optzmat", 202 | "slvzmat", 203 | "plt.pdb", 204 | "clu.pdb", 205 | "LL", 206 | "LBCC_BONDS.csv", 207 | "LBCC_CHARGES.csv ", 208 | resname + ".p", 209 | resname + ".z", 210 | resname + "_NO_LBCC.z", 211 | ] 212 | for file in list_files: 213 | if os.path.exists(file): 214 | os.remove(file) 215 | 216 | 217 | if __name__ == "__main__": 218 | 219 | main() 220 | -------------------------------------------------------------------------------- /LigParGenPSP/CreatZmat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | AutoZmat_VersionLSD: 5 | A python program to create BOSS zmatrix from any molecular input format. 6 | Need BOSS and OpenBabel executable to work 7 | Python Modeules Needed - networkx, numpy, pandas 8 | Created on Wed Jun 14 2017 9 | 10 | @author: Leela Sriram Dodda 11 | @email: leela.dodda@yale.edu 12 | """ 13 | import os 14 | import numpy as np 15 | from LigParGenPSP.Vector_algebra import ( 16 | pairing_func, 17 | angle, 18 | dihedral, 19 | tor_id, 20 | ang_id, 21 | bossElement2Num, 22 | Distance, 23 | ) 24 | import collections 25 | import networkx as nx 26 | import time 27 | 28 | 29 | def AsitIsZmat(ifile, optim, resid): 30 | iform = ifile.split('.') 31 | # CREATE A MOL FILE FROM ANY FILE 32 | if iform[1] == 'smi': 33 | os.system('obabel -i%s %s -omol %s.mol --gen3D' % (iform[1], ifile, iform[0])) 34 | else: 35 | os.system( 36 | 'obabel -i%s %s -omol %s.mol ---errorlevel 1 -b &>LL' 37 | % (iform[1], ifile, iform[0]) 38 | ) 39 | while not os.path.exists(iform[0] + '.mol'): 40 | time.sleep(1) 41 | mollines = open(iform[0] + '.mol', 'r').readlines() 42 | COOS, ATYPES, MolBonds = ReadMolFile(mollines) 43 | G_mol, mol_icords = make_graphs(ATYPES, COOS, MolBonds) 44 | print_ZMAT(ATYPES, G_mol, mol_icords, COOS, '%s.z' % resid, resid) 45 | return None 46 | 47 | 48 | def CanonicaliedZmat(ifile, optim, resid): 49 | iform = ifile.split('.') 50 | # CREATE A MOL FILE FROM ANY FILE 51 | if iform[1] == 'smi': 52 | os.system('obabel -i%s %s -omol %s.mol --gen3D' % (iform[1], ifile, iform[0])) 53 | else: 54 | os.system( 55 | 'obabel -i%s %s -omol --canonical %s.mol' % (iform[1], ifile, iform[0]) 56 | ) 57 | mollines = open(iform[0] + '.mol', 'r').readlines() 58 | COOS, ATYPES, MolBonds = ReadMolFile(mollines) 59 | G_mol, mol_icords = make_graphs(ATYPES, COOS, MolBonds) 60 | print_ZMAT(ATYPES, G_mol, mol_icords, COOS, '%s.z' % resid, resid) 61 | return None 62 | 63 | 64 | def GenMolRep(ifile, optim, resid, charge): 65 | iform = ifile.split('.') 66 | try: 67 | AsitIsZmat(ifile, optim, resid) 68 | except ZeroDivisionError: 69 | print( 70 | 'Warning!!\n 1.Cannonicalising Input MOL/PDB file\n 2.Atom ordering may change \n 3.But the Coordinates remain the same' 71 | ) 72 | CanonicaliedZmat(ifile, optim, resid) 73 | Get_OPT('%s.z' % resid, optim, charge) 74 | if os.path.exists('clu.pdb'): 75 | os.system('/bin/rm clu.pdb') 76 | if iform[1] == 'pdb': 77 | if os.environ.get('MCPROdir') is not None: 78 | os.system( 79 | '$MCPROdir/miscexec/clu -t:f=pdb %s.pdb -r %s.z -n:f=p clu.pdb -m ma' 80 | % (iform[0], resid) 81 | ) 82 | else: 83 | execfile = os.environ['BOSSdir'] + '/scripts/xSPM > olog' 84 | coma = execfile + ' ' + resid 85 | os.system(coma) 86 | os.system('cp plt.pdb clu.pdb') 87 | return True 88 | 89 | 90 | def Get_OPT(zmat, optim, charge): 91 | assert os.path.isfile(zmat), 'File named %10s does not exist' % zmat 92 | assert ( 93 | 'BOSSdir' in os.environ 94 | ), 'Please Make sure $BOSSdir is defined \n xZCM1A and related files are in scripts directory of BOSS' 95 | execs = { 96 | 2: os.environ['BOSSdir'] + '/scripts/xZCM1A+2 > olog', 97 | -2: os.environ['BOSSdir'] + '/scripts/xZCM1A-2 > olog', 98 | 0: os.environ['BOSSdir'] + '/scripts/xZCM1A > olog', 99 | 1: os.environ['BOSSdir'] + '/scripts/xZCM1A+ > olog', 100 | -1: os.environ['BOSSdir'] + '/scripts/xZCM1A- > olog', 101 | } 102 | print('MOLECULE HAS A CHARGE of %d' % charge) 103 | execfile = execs[charge] 104 | coma = execfile + ' ' + zmat[:-2] 105 | os.system(coma) 106 | os.system('cp sum %s' % (zmat)) 107 | execfile = os.environ['BOSSdir'] + '/scripts/xSPM > olog' 108 | coma = execfile + ' ' + zmat[:-2] 109 | os.system(coma) 110 | os.system('/bin/cp sum %s' % (zmat)) 111 | return None 112 | 113 | 114 | def ReadMolFile(mollines): 115 | [nats, nbonds] = map(int, (mollines[3][0:3], mollines[3][3:6])) 116 | cooslines = mollines[4: 4 + nats] 117 | coos = {} 118 | atypes = {} 119 | for i in range(nats): 120 | els = cooslines[i].split() 121 | coos[i + 1] = [float(e) for e in els[0:3]] 122 | atypes[i + 1] = els[3] 123 | bondlines = mollines[4 + nats: 4 + nats + nbonds] 124 | bonds = {'BI': [], 'BJ': [], 'RIJ': [], 'UID': []} 125 | for line in bondlines: 126 | [bi, bj] = map(int, [line[0:3], line[3:6]]) 127 | bonds['BI'].append(bi) 128 | bonds['BJ'].append(bj) 129 | bonds['RIJ'].append(Distance(coos[bi], coos[bj])) 130 | bonds['UID'].append(pairing_func(bi, bj)) 131 | return (coos, atypes, bonds) 132 | 133 | 134 | def make_graphs(atoms, coos, bonds): 135 | G = nx.DiGraph() 136 | # ADD NODES USING ATOM TYPES AND COORDINATES 137 | for i in coos.keys(): 138 | G.add_node(i, XYZ=coos[i], elem=atoms[i], atno=bossElement2Num(atoms[i])) 139 | for (i, j, rij) in zip(bonds['BI'], bonds['BJ'], bonds['RIJ']): 140 | G.add_edge(i, j, distance=rij) 141 | G.add_edge(j, i, distance=rij) 142 | all_ps = dict(nx.algorithms.all_pairs_shortest_path_length(G)) 143 | all_paths = [] 144 | for s in all_ps.keys(): 145 | for e in all_ps[s].keys(): 146 | # if all_ps[s][e] == 1: all_paths+=list(nx.algorithms.shortest_simple_paths(G,s,e)) 147 | # elif all_ps[s][e] == 2: all_paths+=list(nx.algorithms.shortest_simple_paths(G,s,e)) 148 | # elif all_ps[s][e] == 3: all_paths+=list(nx.algorithms.shortest_simple_paths(G,s,e)) 149 | if all_ps[s][e] == 1: 150 | all_paths += list(nx.algorithms.all_simple_paths(G, s, e, cutoff=1)) 151 | elif all_ps[s][e] == 2: 152 | all_paths += list(nx.algorithms.all_simple_paths(G, s, e, cutoff=2)) 153 | elif all_ps[s][e] == 3: 154 | all_paths += list(nx.algorithms.all_simple_paths(G, s, e, cutoff=3)) 155 | 156 | all_bonds = [p for p in all_paths if len(set(p)) == 2] 157 | new_angs = [p for p in all_paths if len(set(p)) == 3] 158 | new_tors = [p for p in all_paths if len(set(p)) == 4] 159 | dict_new_tors = {tor_id(t): t for t in new_tors} 160 | dict_new_angs = {ang_id(t): t for t in new_angs} 161 | imp_keys = [n for n in G.nodes() if G.degree(n) / 2 == 3] 162 | all_imps = {} 163 | for i in imp_keys: 164 | nei = list(G.neighbors(i)) 165 | # if G.node[i]['atno'] == 6: (backup) 166 | if G.nodes[i]['atno'] == 6: 167 | all_imps[i] = [nei[0], i, nei[1], nei[2]] 168 | MOL_ICOORDS = { 169 | 'BONDS': all_bonds, 170 | 'ANGLES': dict_new_angs, 171 | 'TORSIONS': dict_new_tors, 172 | 'IMPROPERS': all_imps, 173 | } 174 | return (G, MOL_ICOORDS) 175 | 176 | 177 | def Get_Add_Int(mol_icords, Z_BONDS, Z_ANGLES, Z_TORSIONS): 178 | all_bonds_mol, all_angles_mol, all_torsions_mol = ( 179 | mol_icords['BONDS'], 180 | mol_icords['ANGLES'], 181 | mol_icords['TORSIONS'], 182 | ) 183 | Z_B = { 184 | pairing_func(i[0] - 2, i[1] - 2): [i[0] - 2, i[1] - 2] for i in Z_BONDS.values() 185 | } 186 | Z_A = { 187 | ang_id([i[0] - 2, i[1] - 2, i[2] - 2]): [i[0] - 2, i[1] - 2, i[2] - 2] 188 | for i in Z_ANGLES.values() 189 | } 190 | Z_T = { 191 | tor_id([i[0] - 2, i[1] - 2, i[2] - 2, i[3] - 2]): [ 192 | i[0] - 2, 193 | i[1] - 2, 194 | i[2] - 2, 195 | i[3] - 2, 196 | ] 197 | for i in Z_TORSIONS.values() 198 | } 199 | Z_Ad_B, Z_Ad_A, Z_Ad_T = ( 200 | collections.OrderedDict(), 201 | collections.OrderedDict(), 202 | collections.OrderedDict(), 203 | ) 204 | for b_ij in all_bonds_mol: 205 | uid_b_ij = pairing_func(b_ij[0], b_ij[1]) 206 | if uid_b_ij not in list(Z_B.keys()): 207 | Z_Ad_B[uid_b_ij] = [b_ij[0] + 2, b_ij[1] + 2] 208 | for a_ij in all_angles_mol.keys(): 209 | if a_ij not in list(Z_A.keys()): 210 | Z_Ad_A[a_ij] = [i + 2 for i in all_angles_mol[a_ij]] 211 | for t_ij in all_torsions_mol.keys(): 212 | if t_ij not in list(Z_T.keys()): 213 | Z_Ad_T[t_ij] = [i + 2 for i in all_torsions_mol[t_ij]] 214 | for c in mol_icords['IMPROPERS'].values(): 215 | Z_Ad_T["-".join(list(map(str, c)))] = [i + 2 for i in c] 216 | return (Z_Ad_B, Z_Ad_A, Z_Ad_T) 217 | 218 | 219 | def print_ZMAT(atoms, G_mol, mol_icords, coos, zmat_name, resid): 220 | if not zmat_name: 221 | zmat_name = resid 222 | Z_ATOMS = {1: 'X', 2: 'X'} 223 | Z_NO = {1: -1, 2: -1} 224 | Z_BONDS = {1: (1, 0, 0.000), 2: (2, 1, 1.00), 3: (3, 2, 1.00)} 225 | Z_ANGLES = { 226 | 1: (1, 0, 0, 0.000), 227 | 2: (2, 1, 0, 0.000), 228 | 3: (3, 2, 1, 90.00), 229 | 4: (4, 3, 2, 90.0), 230 | } 231 | Z_TORSIONS = { 232 | 1: (1, 0, 0, 0, 0.00), 233 | 2: (2, 1, 0, 0, 0.00), 234 | 3: (3, 2, 1, 0, 0.00), 235 | 4: (4, 3, 2, 1, 0.00), 236 | 5: (5, 4, 3, 2, 90.0), 237 | } 238 | for i in range(1, len(atoms) + 1): 239 | Z_ATOMS[i + 2] = atoms[i] 240 | for i in range(1, len(atoms) + 1): 241 | # Z_NO[i + 2] = G_mol.node[i]['atno'] # backup 242 | Z_NO[i + 2] = G_mol.nodes[i]['atno'] 243 | n_ats = 0 244 | B_LINK = {} 245 | for i in G_mol.nodes(): 246 | if n_ats > 0: 247 | neigs = np.sort(list(G_mol.neighbors(i))) 248 | B_LINK[i] = neigs[0] 249 | Z_BONDS[i + 2] = (i + 2, neigs[0] + 2, G_mol[i][neigs[0]]['distance']) 250 | n_ats += 1 251 | n_ats = 0 252 | A_LINK = {} 253 | for i in G_mol.nodes(): 254 | if n_ats > 1: 255 | neigs = np.sort(list(G_mol.neighbors(B_LINK[i]))) 256 | A_LINK[i] = neigs[0] 257 | ang = angle(coos[i], coos[B_LINK[i]], coos[neigs[0]]) 258 | Z_ANGLES[i + 2] = (i + 2, B_LINK[i] + 2, neigs[0] + 2, ang) 259 | n_ats += 1 260 | n_ats = 0 261 | for i in G_mol.nodes(): 262 | if n_ats > 2: 263 | neigs = list(G_mol.neighbors(A_LINK[i])) 264 | neigs = np.array([j for j in neigs if j not in [i, B_LINK[i], A_LINK[i]]]) 265 | neigs = np.sort(neigs) 266 | neigs = neigs[neigs < i] 267 | if len(neigs) < 1: 268 | neigs = [ 269 | j 270 | for j in list(G_mol.neighbors(B_LINK[i])) 271 | if j not in [i, A_LINK[i]] 272 | ] 273 | if B_LINK[i] in list(mol_icords['IMPROPERS'].keys()): 274 | del mol_icords['IMPROPERS'][B_LINK[i]] 275 | [ti, tj, tk, tl] = [i, B_LINK[i], A_LINK[i], neigs[0]] 276 | dihed = dihedral(coos[ti], coos[tj], coos[tk], coos[tl]) 277 | Z_TORSIONS[i + 2] = (ti + 2, tj + 2, tk + 2, tl + 2, dihed) 278 | n_ats += 1 279 | Z_Ad_B, Z_Ad_A, Z_Ad_T = Get_Add_Int(mol_icords, Z_BONDS, Z_ANGLES, Z_TORSIONS) 280 | # PRINTING ACTUAL Z-MATRIX 281 | ofile = open(zmat_name, 'w+') 282 | ofile.write('BOSS Z-Matrix with LSDautozmat (written by Leela S. Dodda)\n') 283 | for i in range(1, len(atoms) + 3): 284 | ofile.write( 285 | '%4d %-3s%5d%5d%5d%12.6f%4d%12.6f%4d%12.6f%4s%5d\n' 286 | % ( 287 | i, 288 | Z_ATOMS[i], 289 | Z_NO[i], 290 | Z_NO[i], 291 | Z_BONDS[i][1], 292 | Z_BONDS[i][-1], 293 | Z_ANGLES[i][-2], 294 | Z_ANGLES[i][-1], 295 | Z_TORSIONS[i][-2], 296 | Z_TORSIONS[i][-1], 297 | resid[0:3], 298 | 1, 299 | ) 300 | ) 301 | ofile.write( 302 | ''' Geometry Variations follow (2I4,F12.6) 303 | Variable Bonds follow (I4)\n''' 304 | ) 305 | for i in range(4, len(atoms) + 3): 306 | ofile.write('%4d\n' % i) 307 | ofile.write(' Additional Bonds follow (2I4)\n') 308 | if len(Z_Ad_B) > 0: 309 | for i in Z_Ad_B.values(): 310 | ofile.write('%4d%4d\n' % (i[0], i[1])) 311 | # CREATE A FUNCTION TO DEFINE ADDITIONAL BONDS IN CASE OF RINGS 312 | ofile.write( 313 | ''' Harmonic Constraints follow (2I4,4F10.4) 314 | Variable Bond Angles follow (I4)\n''' 315 | ) 316 | for i in range(5, len(atoms) + 3): 317 | ofile.write('%4d\n' % i) 318 | ofile.write(' Additional Bond Angles follow (3I4)\n') 319 | if len(Z_Ad_A) > 0: 320 | for i in Z_Ad_A.values(): 321 | ofile.write('%4d%4d%4d\n' % (i[0], i[1], i[2])) 322 | # CREATE A FUNCTION TO DEFINE ADDITIONAL BONDS IN CASE OF RINGS 323 | ofile.write(' Variable Dihedrals follow (3I4,F12.6)\n') 324 | for i in range(6, len(atoms) + 3): 325 | ofile.write('%4d%4d%4d%12.6f\n' % (i, -1, -1, 0.000)) 326 | ofile.write(' Additional Dihedrals follow (6I4)\n') 327 | if len(Z_Ad_T) > 0: 328 | for k in Z_Ad_T.keys(): 329 | torsion = Z_Ad_T[k] 330 | ofile.write( 331 | '%4d%4d%4d%4d%4d%4d\n' 332 | % (torsion[0], torsion[1], torsion[2], torsion[3], -1, -1) 333 | ) 334 | ofile.write( 335 | ''' Domain Definitions follow (4I4) 336 | Conformational Search (2I4,2F12.6) 337 | Local Heating Residues follow (I4 or I4-I4) 338 | Final blank line 339 | ''' 340 | ) 341 | ofile.close() 342 | return None 343 | -------------------------------------------------------------------------------- /LigParGenPSP/README: -------------------------------------------------------------------------------- 1 | LigParGen scripts included with the PSP package were taken from LigParGenv2.1 (https://pypi.org/project/LigParGen/#description) and modified to make them compatible with the PSP package. 2 | 3 | Details of the original distribution: 4 | Author: Leela S. Dodda, Matthew C. Robinson 5 | License: MIT 6 | Email: leela.dodda@yale.edu,matthew.robinson@yale.edu 7 | Homepage: https://bitbucket.org/leelasd/ligpargen_2017_sep18/src/master/ 8 | 9 | We have updated the original LigParGen source code to include the following features: 10 | (1) Able to store output files in a user-defined directory. 11 | (2) Compatible with the recent versions of Open Babel (v3.1.1), NetworkX (v2.5), and pandas (v1.2.4) libraries. 12 | (3) Generate a data file for the LAMMPS package only. 13 | (4) Delete all temporary files. 14 | -------------------------------------------------------------------------------- /LigParGenPSP/Vector_algebra.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | 4 | 5 | def bossElement2Num(elem): 6 | symb2mass = { 7 | "H": 1, 8 | "B": 5, 9 | "C": 6, 10 | "N": 7, 11 | "O": 8, 12 | "F": 9, 13 | "Si": 14, 14 | "P": 15, 15 | "S": 16, 16 | "Cl": 17, 17 | "Br": 35, 18 | "I": 53, 19 | } 20 | try: 21 | res = symb2mass[elem] 22 | except NameError: 23 | print( 24 | "Mass for atom %s is not available \n add it to symb2mass dictionary" 25 | ) 26 | return res 27 | 28 | 29 | def pairing_func(a, b): 30 | ans = (a + b) * (a + b + 1) * 0.5 31 | if a > b: 32 | ans = ans + a 33 | else: 34 | ans = ans + b 35 | return int(ans) 36 | 37 | 38 | def Vector(x, y, z): 39 | return (x, y, z) 40 | 41 | 42 | def length(v): 43 | "Return length of a vector." 44 | sum = 0.0 45 | for c in v: 46 | sum += c * c 47 | return math.sqrt(sum) 48 | 49 | 50 | def subtract(u, v): 51 | "Return difference between two vectors." 52 | x = u[0] - v[0] 53 | y = u[1] - v[1] 54 | z = u[2] - v[2] 55 | return Vector(x, y, z) 56 | 57 | 58 | def dot(u, v): 59 | "Return dot product of two vectors." 60 | sum = 0.0 61 | for cu, cv in zip(u, v): 62 | sum += cu * cv 63 | return sum 64 | 65 | 66 | def Distance(u, v): 67 | "Return length of a vector." 68 | # print(u,v) 69 | uv = subtract(u, v) 70 | lsum = 0.0 71 | for c in uv: 72 | lsum += c * c 73 | return math.sqrt(lsum) 74 | 75 | 76 | def cross(u, v): 77 | "Return the cross product of two vectors." 78 | x = u[1] * v[2] - u[2] * v[1] 79 | y = u[2] * v[0] - u[0] * v[2] 80 | z = u[0] * v[1] - u[1] * v[0] 81 | return Vector(x, y, z) 82 | 83 | 84 | def Mol_angle(v0, v1): 85 | "Return angle [0..pi] between two vectors." 86 | cosa = round(dot(v0, v1) / length(v0) / length(v1), 3) 87 | return np.arccos(cosa) 88 | 89 | 90 | def angle(p0, p1, p2): 91 | "Return angle [0..pi] between two vectors." 92 | v0 = subtract(p0, p1) 93 | v1 = subtract(p2, p1) 94 | cosa = dot(v0, v1) / length(v0) / length(v1) 95 | # print(cosa) 96 | return 180.0 * np.arccos(round(cosa, 3)) * 7.0 / 22.0 97 | 98 | 99 | def dihedral(p0, p1, p2, p3): 100 | "Return angle [0..2*pi] formed by vertices p0-p1-p2-p3." 101 | v01 = subtract(p0, p1) 102 | v32 = subtract(p3, p2) 103 | v12 = subtract(p1, p2) 104 | v0 = cross(v12, v01) 105 | v3 = cross(v12, v32) 106 | # The cross product vectors are both normal to the axis 107 | # vector v12, so the angle between them is the dihedral 108 | # angle that we are looking for. However, since "angle" 109 | # only returns values between 0 and pi, we need to make 110 | # sure we get the right sign relative to the rotation axis 111 | a = Mol_angle(v0, v3) 112 | if dot(cross(v0, v3), v12) > 0: 113 | a = -a 114 | return a * 180.0 * 7.0 / 22.0 115 | 116 | 117 | def tor_id(a): 118 | bond = pairing_func(a[1], a[2]) 119 | ends = pairing_func(a[0], a[3]) 120 | return "%d-%d" % (bond, ends) 121 | 122 | 123 | def ang_id(a): 124 | bond_a = pairing_func(a[0], a[1]) 125 | bond_b = pairing_func(a[1], a[2]) 126 | return pairing_func(bond_a, bond_b) 127 | -------------------------------------------------------------------------------- /LigParGenPSP/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ramprasad-Group/PSP/fa846bdd07b45461d5d747e5bd60b5ee80f13938/LigParGenPSP/__init__.py -------------------------------------------------------------------------------- /LigParGenPSP/fepzmat.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def new_func(linex, match): 5 | out = 0 6 | for word in linex.split(): 7 | if word == match: 8 | out = out + 1 9 | return out 10 | 11 | 12 | def read_coords(data): 13 | cmatrix = [] 14 | ta = [] 15 | tb = [] 16 | for i in range(0, len(data)): 17 | cmatrix.append(data[i].split()) 18 | ta = [int(cmatrix[i][2]) for i in range(0, len(data))] 19 | tb = [int(cmatrix[i][3]) for i in range(0, len(data))] 20 | ta = np.array(ta) 21 | maxa = ta.max() 22 | tb = np.array(tb) 23 | numi = 1 24 | for i in range(0, len(tb)): 25 | if tb[i] > 1: 26 | tb[i] = maxa + numi 27 | numi = numi + 1 28 | for i in range(0, len(data)): 29 | cmatrix[i][3] = str(tb[i]) 30 | outdat = [] 31 | new_coord = '' 32 | for i in range(0, len(data)): 33 | new_coord = '{:>4s} {:<3s} {:>4s} {:>4s}'.format( 34 | cmatrix[i][0], cmatrix[i][1], cmatrix[i][2], cmatrix[i][3] 35 | ) 36 | new_coord = new_coord + '{:>5s}{:>12s}{:>4s}{:>12s}'.format( 37 | cmatrix[i][4], cmatrix[i][5], cmatrix[i][6], cmatrix[i][7] 38 | ) 39 | new_coord = new_coord + '{:>4s}{:>12s}{:>9s}'.format( 40 | cmatrix[i][8], cmatrix[i][9], cmatrix[i][10] 41 | ) 42 | outdat.append(new_coord) 43 | tb = tb[tb > 0] # IMPORTANT TO AVOID THE -1 and 0 IN FINAL ATOM TYPE 44 | ta = ta[ta > 0] # IMPORTANT TO AVOID THE -1 and 0 IN FINAL ATOM TYPE 45 | return outdat, tb, ta 46 | 47 | 48 | def read_files(infile): 49 | nline = 0 50 | cline = 0 51 | oline = 0 52 | data = [] 53 | for line in infile: 54 | if line.rstrip(): 55 | data.append(line) 56 | if "Non-Bonded" in line: 57 | oline = nline 58 | elif "Variations" in line: 59 | cline = nline 60 | nline += 1 61 | return data, nline, cline, oline 62 | 63 | 64 | def rel_nbd(data, tb, QBCC=None): 65 | if QBCC is None: 66 | QBCC = np.zeros(len(data), dtype=float) 67 | nmat = [] 68 | nmat = [ndat.split() for ndat in data] 69 | ondat = [] 70 | for i in range(0, len(data)): 71 | nmat[i][0] = str(tb[i]) 72 | nmat[i][3] = '%.6f' % QBCC[i] 73 | new_nb = '{:>4s}{:>3s} {:<3s} {:>9s} {:>9s} {:>9s}'.format( 74 | nmat[i][0], nmat[i][1], nmat[i][2], nmat[i][3], nmat[i][4], nmat[i][5] 75 | ) 76 | ondat.append(new_nb) 77 | return ondat 78 | 79 | 80 | def fepZmatFromFile(filenme, QBCC=None): 81 | qfile = open(filenme) 82 | qdat, nl1, cl1, ol1 = read_files(qfile) 83 | cdat, tb, ta = read_coords(qdat[1:cl1]) 84 | ndat = rel_nbd(qdat[ol1 + 1:], tb, QBCC) 85 | qdat[ol1] = qdat[ol1].replace("AM1 CM1Ax1.14", "CM1Ax1.14TO1.14CM1A-BCC", 1) 86 | target = open(filenme[:-2] + '_fep.z', 'w') 87 | target.write(qdat[0]) 88 | for i in range(0, len(cdat)): 89 | target.write(cdat[i] + '\n') 90 | for i in range(cl1, nl1): 91 | target.write(qdat[i]) 92 | for i in range(0, len(ndat)): 93 | target.write(ndat[i] + '\n') 94 | target.close() 95 | return None 96 | 97 | 98 | def fepZmatFromPkl(zmat_dat, filenme, QBCC=None): 99 | qdat, nl1, cl1, ol1 = read_files(zmat_dat) 100 | cdat, tb, ta = read_coords(qdat[1:cl1]) 101 | ndat = rel_nbd(qdat[ol1 + 1:], tb, QBCC) 102 | qdat[ol1] = qdat[ol1].replace("AM1 CM1Ax1.14", "CM1Ax1.14TO1.14CM1A-BCC", 1) 103 | target = open(filenme + '_fep.z', 'w') 104 | target.write(qdat[0]) 105 | for i in range(0, len(cdat)): 106 | target.write(cdat[i] + '\n') 107 | for i in range(cl1, nl1): 108 | target.write(qdat[i]) 109 | for i in range(0, len(ndat)): 110 | target.write(ndat[i] + '\n') 111 | target.close() 112 | return None 113 | 114 | 115 | def BCC_file2zmat(zmat, QBCC, oname): 116 | qfile = open(zmat, 'r+') 117 | qdat, nl1, cl1, ol1 = read_files(qfile) 118 | cdat, tb, ta = read_coords(qdat[1:cl1]) 119 | ndat = rel_nbd(qdat[ol1 + 1:], ta, QBCC) 120 | qdat[ol1] = qdat[ol1].replace("AM1 CM1Ax1.14", "1.14CM1A-LBCC", 1) 121 | qfile.close() 122 | target = open(oname, 'w+') 123 | for i in range(0, ol1 + 1): 124 | target.write(qdat[i]) 125 | for i in range(0, len(ndat)): 126 | target.write(ndat[i] + '\n') 127 | target.close() 128 | return None 129 | -------------------------------------------------------------------------------- /LigParGenPSP/mol_boss.py: -------------------------------------------------------------------------------- 1 | # THIS IS THE HEART OF BCC CORRECTION METHODOLOGY 2 | # THIS MODULE DOES THE BCC ASSIGNMENT BY COLLECTING 3 | # BONDING INFO AND ASSIGNING BCC CORRECTIONS FOR ATOMS 4 | import numpy as np 5 | 6 | 7 | def rev_bnd(bnd): 8 | a, b = bnd.split('-') 9 | return b + '-' + a 10 | 11 | 12 | def sign_bnd(bnd, at): 13 | if bnd == rev_bnd(bnd): 14 | si = 0 15 | else: 16 | si = (-2 * bnd.split('-').index(at)) + 1 17 | return si 18 | 19 | 20 | def get_bcc_types(db, cha, bond): 21 | rtij = [] 22 | mtij = [] 23 | bond['NTIJ'] = [ 24 | str(cha.TY[i - 1]) + '-' + str(cha.TY[j - 1]) for (i, j) in zip(bond.I, bond.J) 25 | ] 26 | for i in bond.NTIJ: 27 | if i == rev_bnd(i): 28 | mtij.append('X-X') 29 | rtij.append(i) 30 | elif i in db.keys(): 31 | rtij.append(i) 32 | mtij.append(i) 33 | elif rev_bnd(i) in db.keys(): 34 | rtij.append(rev_bnd(i)) 35 | mtij.append(rev_bnd(i)) 36 | else: 37 | print('%5s not found in bonds.csv' % i) 38 | mtij.append('U-U') 39 | rtij.append('U-U') 40 | bond['TIJ'] = mtij 41 | bond['MTIJ'] = rtij 42 | bond['AI'] = [str(cha.TY[i - 1]) for i in bond.I] 43 | bond['AJ'] = [str(cha.TY[j - 1]) for j in bond.J] 44 | bond['SI'] = [sign_bnd(bnd, at) for bnd, at in zip(bond.TIJ, bond.AI)] 45 | bond['SJ'] = [sign_bnd(bnd, at) for bnd, at in zip(bond.TIJ, bond.AJ)] 46 | return bond 47 | 48 | 49 | def new_mol_info(db, cha, bond): 50 | # cha = pd.read_csv('CM1AQ', header=None, delim_whitespace=True) 51 | # cha.columns = ['TY', 'Q'] 52 | bond = get_bcc_types(db, cha, bond) 53 | MOLBtype = {} 54 | for an in cha.index: 55 | MOLBtype[an] = list(bond[bond['I'] == an + 1].TIJ) + list( 56 | bond[bond['J'] == an + 1].TIJ 57 | ) 58 | if (cha.TY[an] == 'OS') and ('C-OS' in MOLBtype[an]): 59 | print("Changing OS TO OE") 60 | cha.loc[an, 'TY'] = 'OE' 61 | bond = get_bcc_types(db, cha, bond) 62 | # Seperate Correction for Esters 63 | if (cha.TY[an] == 'C') and ('C-O' in MOLBtype[an]): 64 | if ('C-OS' in MOLBtype[an]) or ('C-OE' in MOLBtype[an]): 65 | print("Changing OS TO OE") 66 | cha.loc[an, 'TY'] = 'CE' 67 | bond = get_bcc_types(db, cha, bond) 68 | # Seperate Correction for Amides 69 | if (cha.TY[an] == 'C') and ('C-N' in MOLBtype[an]): 70 | print("Changing C TO CAM") 71 | cha.loc[an, 'TY'] = 'CAM' 72 | bond = get_bcc_types(db, cha, bond) 73 | # Seperate Correction for Aromatic Nitriles 74 | if (cha.TY[an] == 'CZ') and (set(['CA-CZ', 'CZ-NZ']) <= set(MOLBtype[an])): 75 | print(MOLBtype[an]) 76 | print("Changing CZ-NZ to CZA-NZ") 77 | cha.loc[an, 'TY'] = 'CZA' 78 | bond = get_bcc_types(db, cha, bond) 79 | if (cha.TY[an] == 'CZ') and (set(['CT-CZ', 'CZ-NZ']) <= set(MOLBtype[an])): 80 | print(MOLBtype[an]) 81 | print("Changing CZ-NZ to CZT-NZ") 82 | cha.loc[an, 'TY'] = 'CZT' 83 | bond = get_bcc_types(db, cha, bond) 84 | # Seperate Correction for 1,2,3 Amines 85 | if (cha.TY[an] == 'NT') and MOLBtype[an].count('H-NT') == 2: 86 | print(MOLBtype[an]) 87 | print("Changing NT to NP") 88 | cha.loc[an, 'TY'] = 'NP' 89 | bond = get_bcc_types(db, cha, bond) 90 | if (cha.TY[an] == 'NT') and MOLBtype[an].count('H-NT') == 1: 91 | print("Changing NT to NS") 92 | cha.loc[an, 'TY'] = 'NS' 93 | bond = get_bcc_types(db, cha, bond) 94 | if (cha.TY[an] == 'NT') and MOLBtype[an].count('H-NT') == 0: 95 | print("Changing NT to N3") 96 | cha.loc[an, 'TY'] = 'N3' 97 | bond = get_bcc_types(db, cha, bond) 98 | cha = get_bcc_charges(db, bond, cha) 99 | QBCC = np.array(cha.QBCC) 100 | return (bond, cha, QBCC) 101 | 102 | 103 | def get_bcc_charges(db, bond, cha): 104 | bond['IBCC'] = [sign * db[bcc] for sign, bcc in zip(bond.SI, bond.TIJ)] 105 | bond['JBCC'] = [sign * db[bcc] for sign, bcc in zip(bond.SJ, bond.TIJ)] 106 | cha['BCC'] = [ 107 | sum(bond[bond['I'] == an + 1]['IBCC']) + sum(bond[bond['J'] == an + 1]['JBCC']) 108 | for an in cha.index 109 | ] 110 | cha['QBCC'] = cha['Q'] + cha['BCC'] 111 | ars = [i for i in range(0, len(cha.TY)) if not cha['TY'][i].isdigit()] 112 | cha = cha.loc[ars] 113 | return cha 114 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PolymerStructurePredictor (PSP) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)][1] 2 | 3 | Three-dimensional atomic-level models of polymers are necessary prerequisites for physics-based simulation studies. Polymer structure predictor (PSP) is capable of generating a hierarchy of polymer models, ranging from oligomers to infinite chains to crystals to amorphous models, using a simplified molecular-input line-entry system (SMILES) string of the polymer repeat unit as the primary input. The output structures and accompanying force field (GAFF2/OPLS-AA) parameter files are provided for downstream DFT and MD simulations. 4 | 5 | >PSP generates models for monomers, linear and loop oligomers, infinite polymer chains, crystal, and amorphous structures using SMILES strings. 6 | 7 | ## Contributors 8 | * Harikrishna Sahu 9 | * Huan Tran 10 | * Kuan-Hsuan Shen 11 | * Joseph Montoya 12 | * Rampi Ramprasad 13 | 14 | ## License & copyright 15 | Ramprasad Group, Georgia Tech, USA\ 16 | [Ramprasad Group website](http://ramprasad.mse.gatech.edu/)\ 17 | Licensed under the [MIT License](LICENSE). 18 | 19 | ## Contact 20 | All queries regarding the usage of PSP should be addressed to: **psp-users@groups.gatech.edu**. 21 | 22 | We highly recommend interested individuals join the PSP group. This allows you to participate in discussions and keeps you updated on issues, bug fixes, and latest developments. 23 | 24 | ## Reference 25 | If you use PSP, please cite:\ 26 | Sahu, H.; Shen, K.-H.; Montoya, J. H.; Tran, H.; Ramprasad, R. Polymer Structure Predictor (PSP): A Python Toolkit for Predicting Atomic-Level Structural Models for a Range of Polymer Geometries, *J. Chem. Theory Comput.*, **2022**. 27 | 28 | ## Installation 29 | PSP requires the following packages to be installed in order to function properly: 30 | * [RDKit](https://www.rdkit.org/) v2020.09.1.0 31 | * [Open Babel](https://open-babel.readthedocs.io/en/latest/index.html) v3.1.1 32 | * [PACKMOL](http://leandro.iqm.unicamp.br/m3g/packmol/home.shtml) v20.2.2 33 | * [PySIMM](https://pysimm.org/) v0.2.3 34 | * [LAMMPS](https://docs.lammps.org/Manual.html) 35 | * [AmberTools21](https://ambermd.org/AmberTools.php) (optional, only needed for the `get_gaff2()` function in the AmorphousBuilder) 36 | * LigParGen dependencies[](http://zarbi.chem.yale.edu/ligpargen/) (optional, only needed for the `get_opls()` function in the AmorphousBuilder) 37 | 38 | It should be noted that all dependencies must be installed separately and tested to ensure that they all function. We recommend using Anaconda python, and creating a fresh conda environment for PSP (e. g. `conda create -n MY_ENV_NAME`). 39 | 40 | RDKit and OpenBabel are available as conda packages and can be installed using the instructions provided in the following links (1)[https://anaconda.org/rdkit/rdkit](https://anaconda.org/rdkit/rdkit) and (2)[https://anaconda.org/conda-forge/openbabel](https://anaconda.org/conda-forge/openbabel). 41 | 42 | The deatiled intructions for the installation of PACKMOL package can be found at the following URL: [http://leandro.iqm.unicamp.br/m3g/packmol/home.shtml](http://leandro.iqm.unicamp.br/m3g/packmol/home.shtml). Make sure to include the path for PACKMOL executable as an environment variable "PACKMOL\_EXEC" in ~/.bashrc file. 43 | 44 | LAMMPS can be installed separately or along with PySIMM. Make sure to add the PySIMM package to your PYTHONPATH and add PySIMM and LAMMPS command-line tools to your PATH as mentioned in the PySIMM documentation. 45 | 46 | Ambertools is available as a conda package and can be installed using the instructions provided in the following links: [https://ambermd.org/AmberTools.php](https://ambermd.org/AmberTools.php). Make sure to include the path for the Antechamber executable as an environment variable "ANTECHAMBER\_EXEC" in ~/.bashrc file. 47 | 48 | Following that, source your ~/.bashrc file. PSP will look for PATHs for PACKMOL, PySIMM, LAMMPS, and Antechamber while performing its tasks. 49 | 50 | LigParGen and its dependencies: LigParGen requires the BOSS executable. Obtain a copy of it and set $BOSSdir variable in bash. For more information, see [http://zarbi.chem.yale.edu/ligpargen](http://zarbi.chem.yale.edu/ligpargen) and [http://zarbi.chem.yale.edu/software.html](http://zarbi.chem.yale.edu/software.html). To make LigParGen compatible with PSP, we updated it to include the following features: (1) the ability to store the output files in a user-defined directory; and (2) compatibility with the recent versions of Open Babel (v3.1.1), NetworkX (v2.5), and pandas (v1.2.4). Take note that we have not yet installed NetworkX; ensure that this is done. The updated LigParGen source code is redistributed as part of the PSP package. 51 | 52 | Once all dependencies are installed, clone the PSP repository and install it using the *setup.py* included in the package. 53 | 54 | ```angular2 55 | python setup.py install 56 | ``` 57 | >**NOTE**: A colab notebook that demonstrates the step-by-step installation procedure and installs PSP and its dependencies has been provided. [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)][1] 58 | 59 | [1]:https://colab.research.google.com/github/Ramprasad-Group/PSP/blob/master/Colab_notebook/psp_Colab_notebook.ipynb 60 | 61 | -------------------------------------------------------------------------------- /documentation/PSP_user_manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ramprasad-Group/PSP/fa846bdd07b45461d5d747e5bd60b5ee80f13938/documentation/PSP_user_manual.pdf -------------------------------------------------------------------------------- /psp/AmorphousBuilder.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import psp.MD_lib as MDlib 4 | import time 5 | import os 6 | import psp.PSP_lib as bd 7 | from openbabel import openbabel as ob 8 | import glob 9 | import psp.output_lib as lib 10 | from tqdm import tqdm 11 | from LigParGenPSP import Converter 12 | import psp.MoleculeBuilder as mb 13 | import random 14 | 15 | obConversion = ob.OBConversion() 16 | 17 | 18 | class Builder: 19 | def __init__( 20 | self, 21 | Dataframe, 22 | ID_col="ID", 23 | SMILES_col="smiles", 24 | NumMole="Num", 25 | Length="Len", 26 | NumConf="NumConf", 27 | NumModel=1, 28 | LeftCap="LeftCap", 29 | RightCap="RightCap", 30 | Loop="Loop", 31 | OutFile="amor_model", 32 | OutDir="amorphous_models", 33 | OutDir_xyz="molecules", 34 | density=0.65, 35 | tol_dis=2.0, 36 | box_type="c", 37 | box_size=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 38 | incr_per=0.4, 39 | BondInfo=True, 40 | ): 41 | self.Dataframe = Dataframe 42 | self.ID_col = ID_col 43 | self.SMILES_col = SMILES_col 44 | self.NumMole = NumMole 45 | self.Length = Length 46 | self.NumConf = NumConf 47 | self.NumModel = NumModel 48 | self.LeftCap = LeftCap 49 | self.RightCap = RightCap 50 | self.Loop = Loop 51 | self.OutFile = OutFile 52 | self.OutDir = os.path.join(OutDir, "") 53 | self.OutDir_xyz = os.path.join(OutDir, OutDir_xyz, "") 54 | self.OutDir_packmol = os.path.join(OutDir, "packmol", "") 55 | self.OutDir_ligpargen = os.path.join(OutDir, "ligpargen", "") 56 | self.OutDir_pysimm = os.path.join(OutDir, "pysimm", "") 57 | self.density = density 58 | self.tol_dis = tol_dis 59 | self.box_type = box_type 60 | self.box_size = box_size 61 | self.incr_per = incr_per 62 | self.BondInfo = BondInfo 63 | 64 | def Build(self): 65 | start_1 = time.time() 66 | lib.print_psp_info() # Print PSP info 67 | lib.print_input("AmorphousBuilder", self.Dataframe) 68 | if self.box_type == "c": 69 | box_type_ = "Cubic" 70 | else: 71 | box_type_ = "Rectangular" 72 | 73 | print( 74 | "\n", 75 | "Additional information: ", 76 | "\n", 77 | "Number of models: ", 78 | self.NumModel, 79 | "\n", 80 | "Density (g/cm3): ", 81 | self.density, 82 | "\n", 83 | "Tolerance distance (angstrom): ", 84 | self.tol_dis, 85 | "\n", 86 | "Box type: ", 87 | box_type_, 88 | "\n", 89 | "Output directory: ", 90 | self.OutDir, 91 | "\n", 92 | ) 93 | 94 | # location of directory for VASP inputs (polymers) and build a directory 95 | bd.build_dir(self.OutDir) 96 | bd.build_dir(self.OutDir_xyz) 97 | 98 | # PACKMOL 99 | packmol_path = os.getenv("PACKMOL_EXEC") 100 | # packmol_path = '/home/hari/.soft/packmol/packmol' 101 | 102 | xyz_gen_pd = pd.DataFrame() 103 | for i in self.Dataframe.index: 104 | df = pd.DataFrame(self.Dataframe.loc[i]).T 105 | 106 | mol = mb.Builder( 107 | df, 108 | ID_col=self.ID_col, 109 | SMILES_col=self.SMILES_col, 110 | LeftCap=self.LeftCap[0], 111 | RightCap=self.RightCap[0], 112 | OutDir=self.OutDir_xyz, 113 | Length=[int(df[self.Length].values)], 114 | NumConf=int(df[self.NumConf].values) * self.NumModel, 115 | Loop=eval(str(df[self.Loop].values[0])), 116 | NCores=-1, 117 | Subscript=True, 118 | ) 119 | results = mol.Build() 120 | xyz_gen_pd = pd.concat([xyz_gen_pd, results]) 121 | 122 | if len(list(set(xyz_gen_pd["Result"].values))) != 1: 123 | xyz_gen_pd.to_csv("molecules.csv") 124 | print( 125 | "Couldn't generate XYZ coordinates of molecules, check 'molecules.csv'" 126 | ) 127 | 128 | XYZ_list, smi_list, NMol_list, NumConf_list = [], [], [], [] 129 | for index, row in self.Dataframe.iterrows(): 130 | # Get number of molecules for each conformer of molecules 131 | NMol_list += [int(row[self.NumMole] / row[self.NumConf])] * row[ 132 | self.NumConf 133 | ] 134 | 135 | # Get SMILES string for oligomers 136 | smiles_each = xyz_gen_pd[xyz_gen_pd['ID'] == row['ID']]['SMILES'].values[0] 137 | smi_list += smiles_each * row[self.NumConf] 138 | 139 | # Get a list of filenames for XYZ coordinates 140 | XYZ_list_ind = glob.glob(self.OutDir_xyz + str(row[self.ID_col]) + "*.pdb") 141 | XYZ_list.append(XYZ_list_ind) 142 | NumConf_list.append(int(row[self.NumConf])) 143 | 144 | # Define boundary conditions 145 | if max(self.box_size) == 0.0: # Box size is not provided 146 | NMol_type = len(NMol_list) 147 | Total_NMol = sum(NMol_list) 148 | total_vol = 0 149 | for i in range(NMol_type): 150 | molar_mass = MDlib.get_molar_mass(smi_list[i]) 151 | total_vol += MDlib.get_vol(self.density, NMol_list[i], molar_mass) 152 | self.box_size = MDlib.get_box_size( 153 | total_vol, box_type=self.box_type, incr_per=self.incr_per 154 | ) 155 | 156 | xmin, xmax, ymin, ymax, zmin, zmax = ( 157 | self.box_size[0], 158 | self.box_size[1], 159 | self.box_size[2], 160 | self.box_size[3], 161 | self.box_size[4], 162 | self.box_size[5], 163 | ) 164 | 165 | fix_dis = self.tol_dis / 2 166 | 167 | ind_mol_count = [0] * len(NumConf_list) 168 | count_model = 0 169 | for model in tqdm(range(1, self.NumModel + 1), desc='Building models ...'): 170 | if self.NumModel > 1: 171 | print("MODEL ", model) 172 | packmol_outdir_model = self.OutDir_packmol[:-1] + '_' + str(model) + "/" 173 | bd.build_dir(packmol_outdir_model) 174 | 175 | XYZ_list_ind_model = [] 176 | count_mol = 0 177 | for ind_list in XYZ_list: 178 | if len(ind_list) >= (count_model + 1) * NumConf_list[count_mol]: 179 | XYZ_list_ind_model.append( 180 | ind_list[ 181 | count_model 182 | * NumConf_list[count_mol]: (count_model + 1) 183 | * NumConf_list[count_mol] 184 | ] 185 | ) 186 | else: 187 | XYZ_list_ind_model.append( 188 | random.sample(ind_list, NumConf_list[count_mol]) 189 | ) 190 | 191 | count_mol += 1 192 | 193 | XYZ_list_model = [ 194 | item for sublist in XYZ_list_ind_model for item in sublist 195 | ] 196 | count_model += 1 197 | else: 198 | bd.build_dir(self.OutDir_packmol) 199 | 200 | packmol_outdir_model = self.OutDir_packmol 201 | XYZ_list_model = [item for sublist in XYZ_list for item in sublist] 202 | # exit() 203 | # PACKMOL input file 204 | MDlib.gen_packmol_inp( 205 | packmol_outdir_model, 206 | self.tol_dis, 207 | XYZ_list_model, 208 | NMol_list, 209 | xmin + fix_dis, 210 | xmax - fix_dis, 211 | ymin + fix_dis, 212 | ymax - fix_dis, 213 | zmin + fix_dis, 214 | zmax - fix_dis, 215 | ) 216 | # PACKMOL calculation 217 | command = ( 218 | packmol_path + " < " + os.path.join(packmol_outdir_model, "packmol.inp") 219 | ) 220 | errout = MDlib.run_packmol( 221 | command, os.path.join(packmol_outdir_model, "packmol.out") 222 | ) 223 | 224 | if errout is not None: 225 | print(" Error in packmol calculation") 226 | exit() 227 | elif ( 228 | os.path.exists(os.path.join(packmol_outdir_model, "packmol.pdb")) 229 | is False 230 | ): 231 | print(" Error in packmol calculation") 232 | exit() 233 | 234 | mol = ob.OBMol() 235 | obConversion = ob.OBConversion() 236 | obConversion.SetInAndOutFormats("pdb", "mol2") 237 | obConversion.ReadFile( 238 | mol, os.path.join(packmol_outdir_model, "packmol.pdb") 239 | ) 240 | obConversion.WriteFile( 241 | mol, os.path.join(packmol_outdir_model, "packmol.mol2") 242 | ) 243 | 244 | packmol_xyz = MDlib.read_mol2_xyz( 245 | os.path.join(packmol_outdir_model, "packmol.mol2") 246 | ) 247 | packmol_bond = MDlib.read_mol2_bond( 248 | os.path.join(packmol_outdir_model, "packmol.mol2") 249 | ) 250 | 251 | # Output filename 252 | if self.NumModel > 1: 253 | output_filename = self.OutFile + "_N" + str(count_model) 254 | else: 255 | output_filename = self.OutFile 256 | 257 | MDlib.gen_sys_vasp( 258 | os.path.join(self.OutDir, output_filename + ".vasp"), 259 | packmol_xyz, 260 | xmin, 261 | xmax, 262 | ymin, 263 | ymax, 264 | zmin, 265 | zmax, 266 | ) 267 | MDlib.gen_sys_data( 268 | os.path.join(self.OutDir, output_filename + ".data"), 269 | packmol_xyz, 270 | packmol_bond, 271 | xmin, 272 | xmax, 273 | ymin, 274 | ymax, 275 | zmin, 276 | zmax, 277 | self.BondInfo, 278 | ) 279 | end_1 = time.time() 280 | lib.print_out( 281 | pd.DataFrame(), "Amorphous model", np.round((end_1 - start_1) / 60, 2) 282 | ) 283 | 284 | def get_opls(self, output_fname='amor_opls.lmps', lbcc_charges=True): 285 | print("\nGenerating OPLS parameter file ...\n") 286 | system_pdb_fname = os.path.join(self.OutDir_packmol, "packmol.pdb") 287 | r = MDlib.get_coord_from_pdb(system_pdb_fname) 288 | 289 | bd.build_dir(self.OutDir_ligpargen) 290 | 291 | system_stats = { 292 | 'total_atoms': 0, 293 | 'total_bonds': 0, 294 | 'total_angles': 0, 295 | 'total_dihedrals': 0, 296 | 'total_impropers': 0, 297 | 'total_atom_types': 0, 298 | 'total_bond_types': 0, 299 | 'total_angle_types': 0, 300 | 'total_dihedral_types': 0, 301 | 'total_improper_types': 0, 302 | } 303 | dicts = [] 304 | 305 | # run LigParGen for every pdb file in the OutDir_xyz directory 306 | for index, row in self.Dataframe.iterrows(): 307 | _id = str(row[self.ID_col]) 308 | _length = row[self.Length] 309 | _num = row[self.NumMole] 310 | _conf = 1 # read in only the first conformer 311 | output_prefix = "{}_N{}_C{}".format(_id, _length, _conf) 312 | lig_output_fname = "{}.lmp".format(output_prefix) 313 | data_fname = os.path.join(self.OutDir_ligpargen, lig_output_fname) 314 | 315 | try: 316 | print("LigParGen working on {}.pdb".format(output_prefix)) 317 | Converter.convert( 318 | pdb=os.path.join(self.OutDir_xyz, output_prefix + '.pdb'), 319 | resname=output_prefix, 320 | charge=0, 321 | opt=0, 322 | lbcc=lbcc_charges, 323 | outdir='.', 324 | ) 325 | os.rename(lig_output_fname, data_fname) 326 | except BaseException: 327 | print('problem running LigParGen for {}.pdb.'.format(output_prefix)) 328 | 329 | # quickly read the headers of LigParGen generated LAMMPS 330 | # files to count total number of atoms/bonds/angles...etc 331 | ( 332 | natoms, 333 | nbonds, 334 | nangles, 335 | ndihedrals, 336 | nimpropers, 337 | natom_types, 338 | nbond_types, 339 | nangle_types, 340 | ndihedral_types, 341 | nimproper_types, 342 | ) = MDlib.read_lmps_header(data_fname) 343 | 344 | system_stats['total_atom_types'] += natom_types 345 | system_stats['total_bond_types'] += nbond_types 346 | system_stats['total_angle_types'] += nangle_types 347 | system_stats['total_dihedral_types'] += ndihedral_types 348 | system_stats['total_improper_types'] += nimproper_types 349 | system_stats['total_atoms'] += natoms * _num 350 | system_stats['total_bonds'] += nbonds * _num 351 | system_stats['total_angles'] += nangles * _num 352 | system_stats['total_dihedrals'] += ndihedrals * _num 353 | system_stats['total_impropers'] += nimpropers * _num 354 | 355 | # this switcher dict is to navigate through and store info for each section of a LAMMPS file 356 | switcher = { 357 | 'Masses': [], 358 | 'Pair Coeffs': [], 359 | 'Bond Coeffs': [], 360 | 'Angle Coeffs': [], 361 | 'Dihedral Coeffs': [], 362 | 'Improper Coeffs': [], 363 | 'Atoms': [], 364 | 'Bonds': [], 365 | 'Angles': [], 366 | 'Dihedrals': [], 367 | 'Impropers': [], 368 | 'Num': _num, 369 | } 370 | current_section = None 371 | 372 | # read all the info in the LigParGen generated LAMMPS file for modification 373 | with open(data_fname, 'rt') as lines: 374 | for line in lines: 375 | if any(x in line for x in switcher.keys()): 376 | current_section = line.strip() 377 | elif line == '\n' or not current_section: 378 | continue 379 | else: 380 | section_list = switcher.get( 381 | current_section, 'Invalid current section' 382 | ) 383 | section_list.append(line.split()) 384 | dicts.append(switcher) 385 | 386 | lammps_output = os.path.join(self.OutDir, output_fname) 387 | MDlib.write_lammps_ouput(lammps_output, r, self.box_size, system_stats, dicts) 388 | print("\nOPLS parameter file generated.") 389 | 390 | def get_gaff2( 391 | self, output_fname='amor_gaff2.lmps', atom_typing='pysimm', am1bcc_charges=False, swap_dict=None 392 | ): 393 | print("\nGenerating GAFF2 parameter file ...\n") 394 | system_pdb_fname = os.path.join(self.OutDir_packmol, "packmol.pdb") 395 | r = MDlib.get_coord_from_pdb(system_pdb_fname) 396 | 397 | bd.build_dir(self.OutDir_pysimm) 398 | 399 | system_stats = { 400 | 'total_atoms': 0, 401 | 'total_bonds': 0, 402 | 'total_angles': 0, 403 | 'total_dihedrals': 0, 404 | 'total_impropers': 0, 405 | 'total_atom_types': 0, 406 | 'total_bond_types': 0, 407 | 'total_angle_types': 0, 408 | 'total_dihedral_types': 0, 409 | 'total_improper_types': 0, 410 | } 411 | dicts = [] 412 | 413 | from pysimm import system, forcefield 414 | 415 | # run Pysimm for every cml (converted from pdb with Babel) file in the OutDir_xyz directory 416 | for index, row in self.Dataframe.iterrows(): 417 | _id = str(row[self.ID_col]) 418 | _length = row[self.Length] 419 | _num = row[self.NumMole] 420 | _conf = 1 # read in only the first conformer 421 | output_prefix = "{}_N{}_C{}".format(_id, _length, _conf) 422 | pdb_file = os.path.join(self.OutDir_xyz, "{}.pdb".format(output_prefix)) 423 | cml_file = os.path.join(self.OutDir_xyz, "{}.cml".format(output_prefix)) 424 | 425 | obConversion.SetInAndOutFormats("pdb", "cml") 426 | mol = ob.OBMol() 427 | obConversion.ReadFile(mol, pdb_file) 428 | obConversion.WriteFile(mol, cml_file) 429 | 430 | data_fname = os.path.join( 431 | self.OutDir_pysimm, "{}.lmp".format(output_prefix) 432 | ) 433 | 434 | try: 435 | print("Pysimm working on {}".format(cml_file)) 436 | s = system.read_cml(cml_file) 437 | except BaseException: 438 | print('problem reading {} for Pysimm.'.format(cml_file)) 439 | exit() 440 | 441 | f = forcefield.Gaff2() 442 | if atom_typing == 'pysimm': 443 | if am1bcc_charges: 444 | print('AM1BCC method is not available with pysimm, using gasteiger method instead') 445 | for b in s.bonds: 446 | if b.a.bonds.count == 3 and b.b.bonds.count == 3: 447 | b.order = 4 448 | s.apply_forcefield(f, charges='gasteiger') 449 | elif atom_typing == 'antechamber': 450 | mol2_file = os.path.join( 451 | self.OutDir_xyz, "{}.mol2".format(output_prefix) 452 | ) 453 | obConversion.SetInAndOutFormats("pdb", "mol2") 454 | mol = ob.OBMol() 455 | obConversion.ReadFile(mol, pdb_file) 456 | obConversion.WriteFile(mol, mol2_file) 457 | 458 | print("Antechamber working on {}".format(mol2_file)) 459 | MDlib.get_type_from_antechamber(s, mol2_file, 'gaff2', f, am1bcc_charges, swap_dict) 460 | s.pair_style = 'lj' 461 | s.apply_forcefield(f, charges=None if am1bcc_charges else 'gasteiger', skip_ptypes=True) 462 | else: 463 | print( 464 | 'Invalid atom typing option, please select pysimm or antechamber.' 465 | ) 466 | exit() 467 | s.write_lammps(data_fname) 468 | 469 | # quickly read the headers of Pysimm generated LAMMPS 470 | # files to count total number of atoms/bonds/angles...etc 471 | ( 472 | natoms, 473 | nbonds, 474 | nangles, 475 | ndihedrals, 476 | nimpropers, 477 | natom_types, 478 | nbond_types, 479 | nangle_types, 480 | ndihedral_types, 481 | nimproper_types, 482 | ) = MDlib.read_lmps_header(data_fname) 483 | 484 | system_stats['total_atom_types'] += natom_types 485 | system_stats['total_bond_types'] += nbond_types 486 | system_stats['total_angle_types'] += nangle_types 487 | system_stats['total_dihedral_types'] += ndihedral_types 488 | system_stats['total_improper_types'] += nimproper_types 489 | system_stats['total_atoms'] += natoms * _num 490 | system_stats['total_bonds'] += nbonds * _num 491 | system_stats['total_angles'] += nangles * _num 492 | system_stats['total_dihedrals'] += ndihedrals * _num 493 | system_stats['total_impropers'] += nimpropers * _num 494 | 495 | # this switcher dict is to navigate through and store info for each section of a LAMMPS file 496 | switcher = { 497 | 'Masses': [], 498 | 'Pair Coeffs': [], 499 | 'Bond Coeffs': [], 500 | 'Angle Coeffs': [], 501 | 'Dihedral Coeffs': [], 502 | 'Improper Coeffs': [], 503 | 'Atoms': [], 504 | 'Velocities': [], 505 | 'Bonds': [], 506 | 'Angles': [], 507 | 'Dihedrals': [], 508 | 'Impropers': [], 509 | 'Num': _num, 510 | } 511 | current_section = None 512 | 513 | # read all the info in the Pysimm generated LAMMPS file for modification 514 | with open(data_fname, 'rt') as lines: 515 | for line in lines: 516 | if any(x in line for x in switcher.keys()): 517 | current_section = line.strip() 518 | elif line == '\n' or not current_section: 519 | continue 520 | else: 521 | section_list = switcher.get( 522 | current_section, 'Invalid current section' 523 | ) 524 | section_list.append(line.split()) 525 | dicts.append(switcher) 526 | 527 | lammps_output = os.path.join(self.OutDir, output_fname) 528 | MDlib.write_lammps_ouput(lammps_output, r, self.box_size, system_stats, dicts) 529 | print("\nGAFF2 parameter file generated.") 530 | -------------------------------------------------------------------------------- /psp/ChainBuilder.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import psp.PSP_lib as bd 4 | from openbabel import openbabel as ob 5 | import os 6 | import shutil 7 | import time 8 | import multiprocessing 9 | from joblib import Parallel, delayed 10 | import psp.output_lib as lib 11 | from tqdm import tqdm 12 | 13 | obConversion = ob.OBConversion() 14 | obConversion.SetInAndOutFormats("mol", "xyz") 15 | 16 | 17 | class Builder: 18 | def __init__( 19 | self, 20 | Dataframe, 21 | NumConf=1, 22 | Length=['n'], 23 | MonomerAng='medium', 24 | DimerAng='low', 25 | Steps=20, 26 | Substeps=10, 27 | NCores=0, 28 | Method='SA', 29 | ID_col='ID', 30 | SMILES_col='smiles', 31 | IntraChainCorr=1, 32 | Tol_ChainCorr=50, 33 | Inter_Chain_Dis=12, 34 | OutDir='chains', 35 | ): 36 | self.ID_col = ID_col 37 | self.SMILES_col = SMILES_col 38 | self.OutDir = OutDir 39 | self.Dataframe = Dataframe 40 | self.NumConf = NumConf 41 | self.Length = Length 42 | self.MonomerAng = MonomerAng 43 | self.DimerAng = DimerAng 44 | self.Steps = Steps 45 | self.Substeps = Substeps 46 | self.NCores = NCores 47 | self.Method = Method 48 | self.IntraChainCorr = IntraChainCorr 49 | self.Tol_ChainCorr = Tol_ChainCorr 50 | self.Inter_Chain_Dis = Inter_Chain_Dis 51 | 52 | if self.Method not in ['SA', 'Dimer']: 53 | print("Error: please check keyword for * method ") 54 | print("SA == simulated annealing") 55 | print("Dimer == dimerization") 56 | exit() 57 | 58 | # list of molecules name and CORRECT/WRONG 59 | def BuildChain(self): 60 | start_1 = time.time() 61 | lib.print_psp_info() # Print PSP info 62 | lib.print_input("ChainBuilder", self.Dataframe) 63 | if self.NCores <= 0: 64 | ncore_print = 'All' 65 | else: 66 | ncore_print = self.NCores 67 | if self.Method != 'SA': 68 | self.Steps = 'NA' 69 | self.Substeps = 'NA' 70 | 71 | print( 72 | "\n", 73 | "Additional information: ", 74 | "\n", 75 | "Length of oligomers: ", 76 | self.Length, 77 | "\n", 78 | "Method: ", 79 | self.Method, 80 | "| Steps: ", 81 | self.Steps, 82 | "| Substeps: ", 83 | self.Substeps, 84 | "\n", 85 | "Intrachain correction: ", 86 | self.IntraChainCorr, 87 | "\n", 88 | "Tolerance for intrachain correction: ", 89 | self.Tol_ChainCorr, 90 | "\n", 91 | "Number of cores: ", 92 | ncore_print, 93 | "\n", 94 | "Output directory: ", 95 | self.OutDir, 96 | "\n", 97 | ) 98 | 99 | # Input Parameters 100 | intense = np.arange(-180, 180, 10) 101 | medium = [ 102 | 0, 103 | 30, 104 | -30, 105 | 45, 106 | -45, 107 | 60, 108 | -60, 109 | 90, 110 | 120, 111 | -120, 112 | 135, 113 | -135, 114 | 150, 115 | -150, 116 | 180, 117 | ] 118 | low = [0, 45, -45, 60, -60, 90, 120, -120, 180] 119 | 120 | # Directories 121 | # Working directory 122 | bd.build_dir('work_dir/') 123 | 124 | # location of input XYZ files 125 | xyz_in_dir = 'work_dir/xyz-in/' 126 | bd.build_dir(xyz_in_dir) 127 | 128 | xyz_tmp_dir = 'work_dir/xyz-temp/' 129 | bd.build_dir(xyz_tmp_dir) 130 | 131 | # location of directory for VASP inputs (polymers) and build a directory 132 | vasp_out_dir = os.path.join(self.OutDir, "") 133 | bd.build_dir(vasp_out_dir) 134 | 135 | list_out_xyz = 'output_CB.csv' 136 | chk_tri = [] 137 | ID = self.ID_col 138 | SMILES = self.SMILES_col 139 | df = self.Dataframe.copy() 140 | df[ID] = df[ID].apply(str) 141 | 142 | rot_angles_monomer = vars()[self.MonomerAng] 143 | rot_angles_dimer = vars()[self.DimerAng] 144 | 145 | if self.NCores == 0: 146 | self.NCores = multiprocessing.cpu_count() - 1 147 | print("\n Polymer chain building started...\n") 148 | result = Parallel(n_jobs=self.NCores)( 149 | delayed(bd.build_polymer)( 150 | unit_name, 151 | df, 152 | ID, 153 | SMILES, 154 | xyz_in_dir, 155 | xyz_tmp_dir, 156 | vasp_out_dir, 157 | rot_angles_monomer, 158 | rot_angles_dimer, 159 | self.Steps, 160 | self.Substeps, 161 | self.NumConf, 162 | self.Length, 163 | self.Method, 164 | self.IntraChainCorr, 165 | self.Tol_ChainCorr, 166 | self.Inter_Chain_Dis, 167 | ) 168 | for unit_name in tqdm(df[ID].values, desc='Building models ...',) 169 | ) 170 | for i in result: 171 | chk_tri.append([i[0], i[1]]) # i[2] 172 | 173 | chk_tri = pd.DataFrame(chk_tri, columns=['ID', 'Result']) # Conformers 174 | chk_tri.to_csv(list_out_xyz) 175 | 176 | # Delete empty directory 177 | for index, row in chk_tri.iterrows(): 178 | if row['Result'] != 'SUCCESS': 179 | os.rmdir(vasp_out_dir + row['ID'] + '/') 180 | 181 | # Delete work directory 182 | if os.path.isdir('work_dir/'): 183 | shutil.rmtree('work_dir/') 184 | 185 | end_1 = time.time() 186 | lib.print_out(chk_tri, "Polymer chain", np.round((end_1 - start_1) / 60, 2)) 187 | return chk_tri 188 | -------------------------------------------------------------------------------- /psp/CrystalBuilder.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import os 4 | from scipy.spatial.distance import cdist 5 | import time 6 | import multiprocessing 7 | from joblib import Parallel, delayed 8 | import psp.PSP_lib as bd 9 | import psp.output_lib as lib 10 | from tqdm import tqdm 11 | 12 | 13 | class Builder: 14 | def __init__( 15 | self, 16 | VaspInp_list, 17 | NSamples=5, 18 | InputRadius='auto', 19 | MinAtomicDis=2.0, 20 | OutDir='crystals', 21 | Polymer=True, 22 | Optimize=False, 23 | NumCandidate=50, 24 | NCores=0, 25 | ): 26 | self.VaspInp_list = VaspInp_list 27 | self.NSamples = NSamples 28 | self.InputRadius = InputRadius 29 | self.MinAtomicDis = MinAtomicDis 30 | self.OutDir = os.path.join(OutDir, "") 31 | self.NCores = NCores 32 | self.Polymer = Polymer 33 | self.Optimize = Optimize 34 | self.NumCandidate = NumCandidate 35 | 36 | def BuildCrystal(self): 37 | start_1 = time.time() 38 | lib.print_psp_info() # Print PSP info 39 | lib.print_input("CrystalBuilder") 40 | if self.Optimize is False: 41 | self.NumCandidate == 'All' 42 | if self.NCores <= 0: 43 | ncore_print = 'All' 44 | else: 45 | ncore_print = self.NCores 46 | 47 | print( 48 | " ----------------------------------------------- INPUT --------------------------------------------- ", 49 | "\n", 50 | "List of chain models (POSCAR): ", 51 | self.VaspInp_list, 52 | "\n", 53 | "Are they infinite polymer chains?: ", 54 | self.Polymer, 55 | "\n", 56 | "Number of samples: ", 57 | self.NSamples, 58 | "\n", 59 | "Optimize models: ", 60 | self.Optimize, 61 | "\n", 62 | "Number of models to be selected: ", 63 | self.NumCandidate, 64 | "\n", 65 | "Minimum atomic distance (angstrom): ", 66 | self.MinAtomicDis, 67 | "\n", 68 | "Number of cores: ", 69 | ncore_print, 70 | "\n", 71 | "Output directory: ", 72 | self.OutDir, 73 | "\n", 74 | ) 75 | 76 | build_dir(self.OutDir) 77 | # result = [] 78 | 79 | if self.NCores == 0: 80 | self.NCores = multiprocessing.cpu_count() - 1 81 | 82 | NCores_opt = 1 83 | NCores = self.NCores 84 | 85 | if self.Polymer is True: 86 | if isinstance(self.NSamples, int): 87 | print( 88 | ' maximum number of possible crustals for each polymer chain: ', 89 | self.NSamples * self.NSamples * self.NSamples, 90 | "\n", 91 | ) 92 | else: 93 | print( 94 | ' maximum number of possible crustals for each polymer chain: ', 95 | len(self.NSamples[0]) 96 | * len(self.NSamples[1]) 97 | * len(self.NSamples[2]), 98 | "\n", 99 | ) 100 | else: 101 | if isinstance(self.NSamples, int): 102 | print( 103 | ' maximum number of possible crustals for each chain: ', 104 | self.NSamples ** 8, 105 | "\n", 106 | ) 107 | else: 108 | print( 109 | ' maximum number of possible crustals for each polymer chain: ', 110 | len(self.NSamples[0]) 111 | * len(self.NSamples[1]) 112 | * len(self.NSamples[2]) 113 | * len(self.NSamples[3]) 114 | * len(self.NSamples[4]) 115 | * len(self.NSamples[5]) 116 | * len(self.NSamples[6]) 117 | * len(self.NSamples[7]), 118 | "\n", 119 | ) 120 | 121 | if self.Polymer is True: 122 | result = Parallel(n_jobs=NCores)( 123 | delayed(CrystalBuilderMainPolymer)( 124 | VaspInp, 125 | self.NSamples, 126 | self.InputRadius, 127 | self.MinAtomicDis, 128 | self.OutDir, 129 | self.Optimize, 130 | self.NumCandidate, 131 | NCores_opt, 132 | ) 133 | for VaspInp in tqdm(self.VaspInp_list, desc="Building models ...") 134 | ) 135 | else: 136 | result = Parallel(n_jobs=NCores)( 137 | delayed(CrystalBuilderMain)( 138 | VaspInp, 139 | self.NSamples, 140 | self.InputRadius, 141 | self.MinAtomicDis, 142 | self.OutDir, 143 | self.Optimize, 144 | self.NumCandidate, 145 | NCores_opt, 146 | ) 147 | for VaspInp in tqdm(self.VaspInp_list, desc="Building models ...") 148 | ) 149 | 150 | output = [] 151 | for i in result: 152 | output.append([i[0].replace('.vasp', ''), i[1], i[2]]) 153 | 154 | output = pd.DataFrame(output, columns=['ID', 'Count', 'radius']) 155 | end_1 = time.time() 156 | lib.print_out(output, "Crystal model", np.round((end_1 - start_1) / 60, 2)) 157 | return output 158 | 159 | 160 | def readvasp(inputvasp): 161 | basis_vec = [] 162 | Num_atom = [] 163 | xyz_coordinates = [] 164 | with open(inputvasp, 'r') as f: 165 | content = [line.rstrip() for line in f] 166 | file_info = content[0] 167 | for vec in content[2:5]: 168 | basis_vec.append(vec.split()) 169 | basis_vec = pd.DataFrame(basis_vec) 170 | for atoms in content[5:7]: 171 | Num_atom.append(atoms.split()) 172 | Num_atom = pd.DataFrame(Num_atom) 173 | 174 | nats = 0 175 | for nat in np.array(Num_atom.iloc[1]): 176 | nats += int(nat) 177 | 178 | # Do not read all the lines in the POSCAR generated by VASP. 179 | for xyz in content[8: 8 + nats]: 180 | xyz_coordinates.append(xyz.split()) 181 | 182 | # There are two modes in writing coordinated, Direct and Cartesian 183 | if str(content[7]).startswith('D'): 184 | rprim = np.array(basis_vec) 185 | xred = np.array(xyz_coordinates) 186 | xcart = np.matmul( 187 | np.transpose(rprim).astype(float), np.transpose(xred).astype(float) 188 | ) 189 | xyz_coordinates = pd.DataFrame(np.transpose(xcart)).astype(float) 190 | elif str(content[7]).startswith('C'): 191 | xyz_coordinates = pd.DataFrame(xyz_coordinates).astype(float) 192 | xyz_coordinates.columns = [1, 2, 3] 193 | return file_info, basis_vec, Num_atom, xyz_coordinates 194 | 195 | 196 | # Center of origin + peri_circle 197 | def Center_XY_r(xyz_coordinates, angle, r_cricle): 198 | xyz_copy = xyz_coordinates.copy() 199 | X_avg = xyz_copy[1].mean() 200 | Y_avg = xyz_copy[2].mean() 201 | xyz_copy[1] = xyz_copy[1] - X_avg + np.cos(np.deg2rad(angle)) * r_cricle 202 | xyz_copy[2] = xyz_copy[2] - Y_avg + np.sin(np.deg2rad(angle)) * r_cricle 203 | return xyz_copy 204 | 205 | 206 | def create_crystal_vasp( 207 | filename, 208 | first_poly, 209 | second_poly, 210 | Num_atom, 211 | basis_vec, 212 | file_info, 213 | cry_info, 214 | MinAtomicDis, 215 | Polymer=True, 216 | ): 217 | crystal_struc = pd.DataFrame() 218 | row1 = 0 219 | for col in Num_atom.columns: 220 | crystal_struc = pd.concat( 221 | [ 222 | crystal_struc, 223 | first_poly.loc[row1: row1 + int(Num_atom[col].values[1]) - 1], 224 | second_poly.loc[row1: row1 + int(Num_atom[col].values[1]) - 1], 225 | ] 226 | ) 227 | row1 += int(Num_atom[col].values[1]) 228 | 229 | Crystal_Num_atom = Num_atom.copy() 230 | Crystal_Num_atom.loc[1] = 2 * Crystal_Num_atom.loc[1].astype(int) 231 | keep_space = MinAtomicDis # in angstrom 232 | 233 | crystal_struc[1] = crystal_struc[1] - crystal_struc[1].min() + keep_space / 2 234 | crystal_struc[2] = crystal_struc[2] - crystal_struc[2].min() + keep_space / 2 235 | 236 | with open(filename, 'w') as f: 237 | f.write(file_info + ' (' + cry_info + ')\n') 238 | f.write('1.0' + '\n') 239 | a_vec = crystal_struc[1].max() - crystal_struc[1].min() + keep_space 240 | b_vec = crystal_struc[2].max() - crystal_struc[2].min() + keep_space 241 | 242 | if Polymer is True: 243 | c_vec = basis_vec.loc[2, 2] 244 | else: 245 | c_vec = crystal_struc[3].max() - crystal_struc[3].min() + keep_space 246 | 247 | f.write(' ' + str(a_vec) + ' ' + str(0.0) + ' ' + str(0.0) + '\n') 248 | f.write(' ' + str(0.0) + ' ' + str(b_vec) + ' ' + str(0.0) + '\n') 249 | f.write(' ' + str(0.0) + ' ' + str(0.0) + ' ' + str(c_vec) + '\n') 250 | 251 | f.write(Crystal_Num_atom.to_string(header=False, index=False)) 252 | f.write('\nCartesian\n') 253 | f.write(crystal_struc.to_string(header=False, index=False)) 254 | 255 | 256 | # Translation 257 | # INPUT: XYZ-coordinates and distance 258 | # OUTPUT: A new sets of XYZ-coordinates 259 | def tl(unit, dis): 260 | unit_copy = unit.copy() 261 | unit_copy[3] = unit_copy[3] + dis # Z direction 262 | return unit_copy 263 | 264 | 265 | # Distance between two points 266 | def CalDis(x1, x2, x3, y1, y2, y3): 267 | return np.sqrt((x1 - y1) ** 2 + (x2 - y2) ** 2 + (x3 - y3) ** 2) 268 | 269 | 270 | # This function try to create a directory 271 | # If it fails, the program will be terminated. 272 | def build_dir(path): 273 | try: 274 | # os.mkdir(path) 275 | os.makedirs(path) 276 | except OSError: 277 | pass 278 | 279 | 280 | # Rotate on XY plane 281 | # INPUT: XYZ-coordinates and angle in Degree 282 | # OUTPUT: A new sets of XYZ-coordinates 283 | def rotateXY(xyz_coordinates, theta): # XYZ coordinates and angle 284 | unit = xyz_coordinates.copy() 285 | R_z = np.array( 286 | [ 287 | [np.cos(theta * np.pi / 180.0), -np.sin(theta * np.pi / 180.0)], 288 | [np.sin(theta * np.pi / 180.0), np.cos(theta * np.pi / 180.0)], 289 | ] 290 | ) 291 | oldXYZ = unit[[1, 2]].copy() 292 | XYZcollect = [] 293 | for eachatom in np.arange(oldXYZ.values.shape[0]): 294 | rotate_each = oldXYZ.iloc[eachatom].values.dot(R_z) 295 | XYZcollect.append(rotate_each) 296 | newXYZ = pd.DataFrame(XYZcollect) 297 | unit[[1, 2]] = newXYZ[[0, 1]] 298 | return unit 299 | 300 | 301 | # for VaspInp in VaspInp_list: 302 | def CrystalBuilderMainPolymer( 303 | VaspInp, 304 | NSamples, 305 | Input_radius, 306 | MinAtomicDis, 307 | OutDir, 308 | Optimize, 309 | NumCandidate, 310 | NCores_opt, 311 | ): 312 | file_info, basis_vec, Num_atom, xyz_coordinates = readvasp( 313 | VaspInp.replace('.vasp', '') + '.vasp' 314 | ) 315 | VaspInp = VaspInp.split('/')[-1].replace('.vasp', '') 316 | print(" Crystal model building started for", VaspInp, "...") 317 | build_dir(OutDir + VaspInp) # .split('/')[-1]) 318 | 319 | if isinstance(NSamples, int): 320 | samples = NSamples - 1 321 | tm = np.around( 322 | np.arange( 323 | 0, 324 | max(xyz_coordinates[3].values) 325 | - min(xyz_coordinates[3].values) 326 | + (max(xyz_coordinates[3].values) - min(xyz_coordinates[3].values)) 327 | / samples, 328 | (max(xyz_coordinates[3].values) - min(xyz_coordinates[3].values)) 329 | / samples, 330 | ), 331 | decimals=2, 332 | ) 333 | rm1 = np.around(np.arange(0, 180 + (180 / samples), 180 / samples), decimals=1) 334 | rm2 = np.around( 335 | np.arange(0, 360 + (360 / samples), 360 / samples), decimals=1 336 | ) # 0 and 180 degree creates problems 337 | 338 | # Total samples 339 | samp = [tm, rm1, rm2] 340 | 341 | # Number of digits in total number of crystal models 342 | digits = bd.len_digit_number(NSamples * NSamples * NSamples) 343 | 344 | elif isinstance(NSamples, list): 345 | if len(NSamples) == 3 and isinstance(NSamples[0], list) is True: 346 | samp = NSamples.copy() 347 | # Number of digits in total number of crystal models 348 | digits = bd.len_digit_number(len(samp[0]) * len(samp[1]) * len(samp[2])) 349 | else: 350 | print("There is an error in inputs: Check 'NSamples'") 351 | exit() 352 | else: 353 | print("There is an error in inputs: Check 'NSamples'") 354 | exit() 355 | 356 | first_poly = Center_XY_r(xyz_coordinates, 0.0, 0.0) 357 | 358 | # Calculate distance between two chains 359 | if Input_radius == 'auto': 360 | radius = ( 361 | np.sqrt( 362 | ( 363 | (first_poly[1].max() - first_poly[1].min()) 364 | * (first_poly[1].max() - first_poly[1].min()) 365 | ) 366 | + ( 367 | (first_poly[2].max() - first_poly[2].min()) 368 | * (first_poly[2].max() - first_poly[2].min()) 369 | ) 370 | ) 371 | + MinAtomicDis 372 | ) 373 | 374 | else: 375 | radius = float(Input_radius) 376 | 377 | count = 0 378 | for i in tqdm(samp[0], desc=VaspInp): 379 | for j in samp[2]: 380 | for k in samp[1]: 381 | second_poly_tl = tl(xyz_coordinates, i) 382 | second_poly_rm1 = rotateXY(second_poly_tl, j) 383 | second_poly_rm2 = Center_XY_r(second_poly_rm1, k, radius) 384 | 385 | if Input_radius == 'auto': 386 | # Calculate distance between atoms in first_unit and second_unit 387 | dist = cdist( 388 | first_poly[[1, 2, 3]].values, second_poly_rm2[[1, 2, 3]].values, 389 | ) 390 | dist[np.isnan(dist)] = 0.0 391 | dist = dist.flatten() 392 | 393 | adj_radius = radius - (min(dist) - MinAtomicDis) 394 | second_poly_rm2 = Center_XY_r(second_poly_rm1, k, adj_radius) 395 | 396 | dist = cdist( 397 | first_poly[[1, 2, 3]].values, second_poly_rm2[[1, 2, 3]].values, 398 | ) 399 | dist[np.isnan(dist)] = 0.0 400 | dist = dist.flatten() 401 | while min(dist) < MinAtomicDis or min(dist) >= MinAtomicDis + 0.5: 402 | if min(dist) < MinAtomicDis: 403 | adj_radius += 0.4 404 | second_poly_rm2 = Center_XY_r( 405 | second_poly_rm1, k, adj_radius 406 | ) 407 | dist = cdist( 408 | first_poly[[1, 2, 3]].values, 409 | second_poly_rm2[[1, 2, 3]].values, 410 | ) 411 | dist[np.isnan(dist)] = 0.0 412 | dist = dist.flatten() 413 | elif min(dist) >= MinAtomicDis + 0.5: 414 | adj_radius -= 0.4 415 | if adj_radius < 0.5: 416 | break 417 | second_poly_rm2 = Center_XY_r( 418 | second_poly_rm1, k, adj_radius 419 | ) 420 | dist = cdist( 421 | first_poly[[1, 2, 3]].values, 422 | second_poly_rm2[[1, 2, 3]].values, 423 | ) 424 | dist[np.isnan(dist)] = 0.0 425 | dist = dist.flatten() 426 | 427 | count += 1 428 | create_crystal_vasp( 429 | os.path.join( 430 | OutDir, 431 | VaspInp, 432 | 'cryst_out-' + str(count).zfill(digits) + '.vasp', 433 | ), 434 | first_poly, 435 | second_poly_rm2, 436 | Num_atom, 437 | basis_vec, 438 | file_info, 439 | 'CrystalBuilder Info:: Translation: ' 440 | + str(i) 441 | + '; ' 442 | + 'Rotation 1 ' 443 | + str(j) 444 | + '; ' 445 | + 'Rotation 2 ' 446 | + str(k), 447 | MinAtomicDis, 448 | ) 449 | print(" Crystal model building completed for", VaspInp) 450 | if Optimize is True: 451 | print(" Optimizing crystal models started for", VaspInp, "...") 452 | bd.screen_Candidates( 453 | OutDir + VaspInp, NumCandidate=NumCandidate, NCores_opt=NCores_opt 454 | ) 455 | print(" Optimizing crystal models completed for", VaspInp) 456 | return VaspInp, count, radius 457 | 458 | 459 | # for VaspInp in VaspInp_list: 460 | def CrystalBuilderMain( 461 | VaspInp, 462 | NSamples, 463 | Input_radius, 464 | MinAtomicDis, 465 | OutDir, 466 | Optimize, 467 | NumCandidate, 468 | NCores_opt, 469 | ): 470 | file_info, basis_vec, Num_atom, xyz_coordinates = readvasp( 471 | VaspInp.replace('.vasp', '') + '.vasp' 472 | ) 473 | VaspInp = VaspInp.split('/')[-1].replace('.vasp', '') 474 | print(" Crystal model building started for", VaspInp, "...") 475 | build_dir(OutDir + VaspInp) # .split('/')[-1]) 476 | 477 | if isinstance(NSamples, int): 478 | samples = NSamples - 1 479 | tm = np.around( 480 | np.arange( 481 | 0, 482 | max(xyz_coordinates[3].values) 483 | - min(xyz_coordinates[3].values) 484 | + (max(xyz_coordinates[3].values) - min(xyz_coordinates[3].values)) 485 | / samples, 486 | (max(xyz_coordinates[3].values) - min(xyz_coordinates[3].values)) 487 | / samples, 488 | ), 489 | decimals=2, 490 | ) 491 | rm1 = np.around(np.arange(0, 180 + (180 / samples), 180 / samples), decimals=1) 492 | rm2 = np.around( 493 | np.arange(0, 360 + (360 / samples), 360 / samples), decimals=1 494 | ) # Rotation in X and Y axes 495 | 496 | # Total samples 497 | samp = [tm, rm1, rm2, rm2, rm2, rm2, rm2, rm2] 498 | 499 | # Number of digits in total number of crystal models 500 | digits = bd.len_digit_number(NSamples ** 8) 501 | 502 | elif isinstance(NSamples, list): 503 | if len(NSamples) == 8 and isinstance(NSamples[0], list) is True: 504 | samp = NSamples.copy() 505 | 506 | # Number of digits in total number of crystal models 507 | digits = bd.len_digit_number( 508 | len(samp[0]) 509 | * len(samp[1]) 510 | * len(samp[2]) 511 | * len(samp[3]) 512 | * len(samp[4]) 513 | * len(samp[5]) 514 | * len(samp[6]) 515 | * len(samp[7]) 516 | ) 517 | 518 | else: 519 | print("There is an error in inputs: Check 'NSamples'") 520 | exit() 521 | else: 522 | print("There is an error in inputs: Check 'NSamples'") 523 | exit() 524 | 525 | first_poly = Center_XY_r(xyz_coordinates, 0.0, 0.0) 526 | 527 | # Calculate distance between two chains 528 | if Input_radius == 'auto': 529 | radius = ( 530 | np.sqrt( 531 | ( 532 | (first_poly[1].max() - first_poly[1].min()) 533 | * (first_poly[1].max() - first_poly[1].min()) 534 | ) 535 | + ( 536 | (first_poly[2].max() - first_poly[2].min()) 537 | * (first_poly[2].max() - first_poly[2].min()) 538 | ) 539 | ) 540 | + MinAtomicDis 541 | ) 542 | 543 | else: 544 | radius = float(Input_radius) 545 | 546 | # Number of digits in total number of crystal models 547 | # digits = bd.len_digit_number(NSamples ** 8) 548 | 549 | count = 0 550 | for i in tqdm(samp[0], desc=VaspInp + " Generating models"): # Second poly 551 | for j in samp[2]: # Second poly 552 | for k in samp[1]: # Second poly 553 | for aX in samp[3]: # Second poly 554 | for aY in samp[4]: # Second poly 555 | for bX in samp[5]: # First poly 556 | for bY in samp[6]: # First poly 557 | for bZ in samp[7]: # First poly 558 | 559 | first_poly_bX = bd.rotateXYZOrigin( 560 | first_poly, bX, 0.0, 0.0 561 | ) 562 | first_poly_bY = bd.rotateXYZOrigin( 563 | first_poly_bX, 0.0, bY, 0.0 564 | ) 565 | first_poly_moved = bd.rotateXYZOrigin( 566 | first_poly_bY, 0.0, 0.0, bZ 567 | ) 568 | 569 | second_poly_tl = tl(xyz_coordinates, i) 570 | second_poly_rm1 = rotateXY(second_poly_tl, j) 571 | second_poly_rm2_aX = bd.rotateXYZOrigin( 572 | second_poly_rm1, aX, 0.0, 0.0 573 | ) 574 | second_poly_rm2_aY = bd.rotateXYZOrigin( 575 | second_poly_rm2_aX, 0.0, aY, 0.0 576 | ) 577 | second_poly_moved = Center_XY_r( 578 | second_poly_rm2_aY, k, radius 579 | ) 580 | 581 | if Input_radius == 'auto': 582 | # Calculate distance between atoms in first_unit and second_unit 583 | dist = cdist( 584 | first_poly_moved[[1, 2, 3]].values, 585 | second_poly_moved[[1, 2, 3]].values, 586 | ) 587 | dist[np.isnan(dist)] = 0.0 588 | dist = dist.flatten() 589 | 590 | adj_radius = radius - (min(dist) - MinAtomicDis) 591 | second_poly_moved = Center_XY_r( 592 | second_poly_rm2_aY, k, adj_radius 593 | ) 594 | 595 | dist = cdist( 596 | first_poly_moved[[1, 2, 3]].values, 597 | second_poly_moved[[1, 2, 3]].values, 598 | ) 599 | dist[np.isnan(dist)] = 0.0 600 | dist = dist.flatten() 601 | while ( 602 | min(dist) < MinAtomicDis 603 | or min(dist) >= MinAtomicDis + 0.5 604 | ): 605 | if min(dist) < MinAtomicDis: 606 | adj_radius += 0.4 607 | second_poly_moved = Center_XY_r( 608 | second_poly_rm2_aY, k, adj_radius 609 | ) 610 | dist = cdist( 611 | first_poly_moved[[1, 2, 3]].values, 612 | second_poly_moved[[1, 2, 3]].values, 613 | ) 614 | dist[np.isnan(dist)] = 0.0 615 | dist = dist.flatten() 616 | elif min(dist) >= MinAtomicDis + 0.5: 617 | adj_radius -= 0.4 618 | if adj_radius < 0.5: 619 | break 620 | second_poly_moved = Center_XY_r( 621 | second_poly_rm2_aY, k, adj_radius 622 | ) 623 | dist = cdist( 624 | first_poly_moved[[1, 2, 3]].values, 625 | second_poly_moved[[1, 2, 3]].values, 626 | ) 627 | dist[np.isnan(dist)] = 0.0 628 | dist = dist.flatten() 629 | 630 | count += 1 631 | create_crystal_vasp( 632 | os.path.join( 633 | OutDir, 634 | VaspInp, 635 | 'cryst_out-' 636 | + str(count).zfill(digits) 637 | + '.vasp', 638 | ), 639 | first_poly_moved, 640 | second_poly_moved, 641 | Num_atom, 642 | basis_vec, 643 | file_info, 644 | 'CrystalBuilder Info:: Translation: ' 645 | + str(i) 646 | + '; ' 647 | + 'Rotation 1 ' 648 | + str(j) 649 | + '; ' 650 | + 'Rotation 2 ' 651 | + str(k), 652 | MinAtomicDis, 653 | Polymer=False, 654 | ) 655 | print(" Crystal model building completed for", VaspInp) 656 | if Optimize is True: 657 | print(" Optimizing crystal models started for", VaspInp, "...") 658 | bd.screen_Candidates( 659 | OutDir + VaspInp, NumCandidate=NumCandidate, NCores_opt=NCores_opt 660 | ) 661 | print(" Optimizing crystal models completed for", VaspInp) 662 | 663 | return VaspInp, count, radius 664 | -------------------------------------------------------------------------------- /psp/MD_lib.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from rdkit import Chem 4 | from rdkit.Chem import Descriptors 5 | from scipy.spatial.distance import cdist 6 | from random import shuffle 7 | import subprocess 8 | 9 | from itertools import takewhile, islice, dropwhile 10 | 11 | 12 | def barycenter(unit): 13 | return unit.mean() 14 | 15 | 16 | def move_barycenter(unit, xyz_shift, origin=True, barycenter=True): 17 | unit_copy = unit.copy() 18 | if origin is True: 19 | if barycenter is False: 20 | unit_copy[1] = unit_copy[1] - unit_copy.min()[1] 21 | unit_copy[2] = unit_copy[2] - unit_copy.min()[2] 22 | unit_copy[3] = unit_copy[3] - unit_copy.min()[3] 23 | else: 24 | unit_copy[1] = unit_copy[1] - unit_copy.mean()[1] 25 | unit_copy[2] = unit_copy[2] - unit_copy.mean()[2] 26 | unit_copy[3] = unit_copy[3] - unit_copy.mean()[3] 27 | else: 28 | unit_copy[1] = unit_copy[1] + xyz_shift[0] 29 | unit_copy[2] = unit_copy[2] + xyz_shift[1] 30 | unit_copy[3] = unit_copy[3] + xyz_shift[2] 31 | return unit_copy 32 | 33 | 34 | def add_mole(sys, unit): 35 | df = unit.copy() 36 | if sys.empty: 37 | df['i'] = 1 38 | else: 39 | df['i'] = max(list(sys.i.values)) + 1 40 | df['j'] = np.arange(1, len(unit.index) + 1, 1) 41 | sys = pd.concat([sys, df]) 42 | return sys.reset_index(drop=True) 43 | 44 | 45 | def get_initial_model(NMol_list, XYZ_list, tol_dis, xmin, xmax, ymin, ymax, zmin, zmax): 46 | # List index for all possible molecules 47 | all_mole_idx = [] 48 | moleSN = 1 49 | for mole in NMol_list: 50 | all_mole_idx += [moleSN] * mole 51 | moleSN += 1 52 | # Shuffle indexes of molecules in the list 53 | shuffle(all_mole_idx) 54 | 55 | # create a system at origin 56 | per_incr = [0.0, 0.1, 0.2, 0.3] 57 | for per in per_incr: 58 | print("Percent increase: ", per) 59 | x_expand = (xmax - xmin) * per 60 | y_expand = (ymax - ymin) * per 61 | z_expand = (zmax - zmin) * per 62 | xmax_ex = xmax + x_expand 63 | ymax_ex = ymax + y_expand 64 | zmax_ex = zmax + z_expand 65 | 66 | # create a DataFrame for the system 67 | sys = pd.DataFrame() 68 | count = 0 69 | success = True 70 | add_yaxis = 0.0 71 | zlayer = 1 72 | 73 | for i in all_mole_idx: 74 | unit = pd.read_csv( 75 | XYZ_list[i - 1], header=None, skiprows=2, delim_whitespace=True 76 | ) 77 | Natm = unit.shape[0] 78 | unit = move_barycenter(unit, 0, origin=True, barycenter=False) 79 | unit_mod, success, add_yaxis, zlayer = move_unit( 80 | unit, 81 | sys, 82 | tol_dis, 83 | xmin, 84 | xmax_ex, 85 | ymin, 86 | ymax_ex, 87 | zmin, 88 | zmax_ex, 89 | add_yaxis, 90 | zlayer=zlayer, 91 | Natm=Natm, 92 | ) 93 | if success is True: 94 | count += 1 95 | sys = add_mole(sys, unit_mod) 96 | elif success is False and per < per_incr[-1]: 97 | break 98 | else: 99 | print("Can't pack molecules within the given box size.") 100 | exit() 101 | if success is True and per > 0.0: 102 | sys[1] = sys[1] - (sys[1].max() - xmax) / 2 103 | sys[2] = sys[2] - (sys[2].max() - ymax) / 2 104 | sys[3] = sys[3] - (sys[3].max() - zmax) / 2 105 | return sys 106 | elif success is True: 107 | return sys 108 | 109 | 110 | def move_unit( 111 | unit, 112 | sys_mod, 113 | tol_dis, 114 | xmin, 115 | xmax, 116 | ymin, 117 | ymax, 118 | zmin, 119 | zmax, 120 | add_yaxis, 121 | zlayer=1, 122 | Natm=0, 123 | ): 124 | unit_mod = unit.copy() 125 | min_x_dis = unit_mod[1].max() - unit_mod[1].min() + tol_dis 126 | min_y_dis = unit_mod[2].max() - unit_mod[2].min() + tol_dis 127 | min_z_dis = unit_mod[3].max() - unit_mod[3].min() + tol_dis 128 | per = 0.0 129 | tol_dis_mod = tol_dis + per * tol_dis 130 | if sys_mod.empty is False: 131 | last_mol = sys_mod.tail(Natm) 132 | mol_xmax, mol_ymax, mol_zmax, = ( 133 | last_mol[1].max(), 134 | last_mol[2].max(), 135 | last_mol[3].max(), 136 | ) 137 | sys_xmax, sys_ymax, sys_zmax = ( 138 | sys_mod[1].max(), 139 | sys_mod[2].max(), 140 | sys_mod[3].max(), 141 | ) 142 | if ( 143 | mol_zmax > zmax - min_z_dis 144 | and mol_ymax > ymax - min_y_dis 145 | and mol_xmax > xmax - min_x_dis 146 | ): 147 | return unit_mod, False, add_yaxis, zlayer 148 | 149 | else: 150 | if mol_ymax > ymax - min_y_dis: 151 | if mol_xmax > xmax - min_x_dis: 152 | unit_mod[3] = unit_mod[3] + sys_mod[3].max() + tol_dis_mod 153 | add_yaxis = unit_mod[2].max() 154 | zlayer += 1 155 | else: 156 | unit_mod[3] = unit_mod[3] + last_mol[3].min() 157 | unit_mod[2] = unit_mod[2] + last_mol[2].min() - 0.1 158 | unit_mod[1] = unit_mod[1] + last_mol[1].max() + tol_dis_mod 159 | 160 | add_yaxis = max(add_yaxis, unit_mod[2].max()) 161 | 162 | elif zlayer > 1: 163 | if mol_xmax > xmax - min_x_dis: 164 | if add_yaxis + min_y_dis < ymax: 165 | unit_mod[3] = unit_mod[3] + last_mol[3].min() 166 | unit_mod[2] = unit_mod[2] + add_yaxis + tol_dis_mod 167 | else: # Add to z axis 168 | unit_mod[3] = unit_mod[3] + sys_mod[3].max() + tol_dis_mod 169 | zlayer += 1 170 | 171 | add_yaxis = unit_mod[2].max() 172 | elif sys_xmax > xmax - min_x_dis: 173 | unit_mod[3] = unit_mod[3] + last_mol[3].min() 174 | unit_mod[2] = unit_mod[2] + last_mol[2].min() 175 | unit_mod[1] = unit_mod[1] + last_mol[1].max() + tol_dis_mod 176 | 177 | else: 178 | unit_mod[3] = unit_mod[3] + last_mol[3].min() 179 | unit_mod[1] = unit_mod[1] + last_mol[1].max() + tol_dis_mod 180 | 181 | else: 182 | if mol_xmax > xmax - min_x_dis: 183 | unit_mod[2] = unit_mod[2] + sys_mod[2].max() + tol_dis_mod 184 | add_yaxis = unit_mod[2].max() 185 | elif sys_xmax > xmax - min_x_dis: 186 | unit_mod[3] = unit_mod[3] + last_mol[3].min() 187 | unit_mod[2] = unit_mod[2] + last_mol[2].min() 188 | unit_mod[1] = unit_mod[1] + last_mol[1].max() + tol_dis_mod 189 | 190 | add_yaxis = max(add_yaxis, unit_mod[2].max()) 191 | else: 192 | unit_mod[1] = unit_mod[1] + last_mol[1].max() + tol_dis_mod 193 | add_yaxis = max(add_yaxis, unit_mod[2].max()) 194 | 195 | return unit_mod, True, add_yaxis, zlayer 196 | 197 | 198 | def get_vol(density, Nmol, molar_mass): 199 | return (Nmol * molar_mass * 10) / (6.02214076 * density) # in Ang 200 | 201 | 202 | def get_molar_mass(smi): 203 | return Descriptors.ExactMolWt(Chem.MolFromSmiles(smi)) 204 | 205 | 206 | def get_box_size(vol, box_type="cubic", incr_per=0.4): # c = cubic; r = rectangular 207 | axis = vol ** (1.0 / 3.0) 208 | if box_type == 'r': 209 | zmax = axis + axis * incr_per 210 | axis2 = np.sqrt(vol / zmax) 211 | return 0, axis2, 0, axis2, 0, zmax 212 | else: 213 | return 0, axis, 0, axis, 0, axis 214 | 215 | 216 | def eval_dis(sys_dis_arr, dis_cutoff, dis_value, a): 217 | unit1 = sys_dis_arr[sys_dis_arr[:, 3] == a][:, :-1] 218 | unit1_minX, unit1_maxX, unit1_minY, unit1_maxY, unit1_minZ, unit1_maxZ = ( 219 | np.amin(unit1[:, 0]), 220 | np.amax(unit1[:, 0]), 221 | np.amin(unit1[:, 1]), 222 | np.amax(unit1[:, 1]), 223 | np.amin(unit1[:, 2]), 224 | np.amax(unit1[:, 2]), 225 | ) 226 | 227 | unit2 = sys_dis_arr[sys_dis_arr[:, 3] != a][:, :-1] 228 | unit2 = unit2[ 229 | (unit2[:, 0] > unit1_minX - dis_cutoff) 230 | & (unit2[:, 0] < unit1_maxX + dis_cutoff) 231 | & (unit2[:, 1] > unit1_minY - dis_cutoff) 232 | & (unit2[:, 1] < unit1_maxY + dis_cutoff) 233 | & (unit2[:, 2] > unit1_minZ - dis_cutoff) 234 | & (unit2[:, 2] < unit1_maxZ + dis_cutoff) 235 | ] 236 | 237 | dist = cdist(unit1, unit2) 238 | 239 | new_arr = dist[ 240 | dist < dis_cutoff 241 | ] # If you may need to remove double counted distances (ij and ji) 242 | new_arr = dis_cutoff - new_arr 243 | 244 | dis_value = dis_value + np.sum(new_arr) 245 | 246 | sys_dis_arr = sys_dis_arr[sys_dis_arr[:, 3] != a] 247 | return sys_dis_arr, dis_value 248 | 249 | 250 | def evaluate_obj(sys, dis_cutoff, xmin, xmax, ymin, ymax, zmin, zmax): 251 | sys_dis_arr = sys[[1, 2, 3, 'i']].to_numpy() 252 | 253 | dis_value = 0 254 | # Last molecule is removed from the list 255 | list_mol = np.unique(sys_dis_arr[:, 3])[:-1].astype(int) 256 | 257 | dis_val = list( 258 | zip(*[eval_dis(sys_dis_arr, dis_cutoff, dis_value, a) for a in list_mol]) 259 | )[1] 260 | 261 | for a in list_mol: 262 | unit1 = sys_dis_arr[sys_dis_arr[:, 3] == a][:, :-1] 263 | unit1_minX, unit1_maxX, unit1_minY, unit1_maxY, unit1_minZ, unit1_maxZ = ( 264 | np.amin(unit1[:, 0]), 265 | np.amax(unit1[:, 0]), 266 | np.amin(unit1[:, 1]), 267 | np.amax(unit1[:, 1]), 268 | np.amin(unit1[:, 2]), 269 | np.amax(unit1[:, 2]), 270 | ) 271 | 272 | unit2 = sys_dis_arr[sys_dis_arr[:, 3] != a][:, :-1] 273 | unit2 = unit2[ 274 | (unit2[:, 0] > unit1_minX - dis_cutoff) 275 | & (unit2[:, 0] < unit1_maxX + dis_cutoff) 276 | & (unit2[:, 1] > unit1_minY - dis_cutoff) 277 | & (unit2[:, 1] < unit1_maxY + dis_cutoff) 278 | & (unit2[:, 2] > unit1_minZ - dis_cutoff) 279 | & (unit2[:, 2] < unit1_maxZ + dis_cutoff) 280 | ] 281 | 282 | dist = cdist(unit1, unit2) 283 | 284 | new_arr = dist[ 285 | dist < dis_cutoff 286 | ] # If you may need to remove double counted distances (ij and ji) 287 | new_arr = dis_cutoff - new_arr 288 | 289 | dis_value = dis_value + np.sum(new_arr) 290 | 291 | sys_dis_arr = sys_dis_arr[sys_dis_arr[:, 3] != a] 292 | 293 | bound_value = 0.0 294 | # X axis 295 | Arr_x = sys[1].values 296 | newArr_x_min = Arr_x[Arr_x < xmin] 297 | newArr_x_min = xmin - newArr_x_min 298 | 299 | newArr_x_max = Arr_x[Arr_x > xmax] 300 | newArr_x_max = newArr_x_max - xmax 301 | 302 | # Y axis 303 | Arr_y = sys[2].values 304 | newArr_y_min = Arr_y[Arr_y < ymin] 305 | newArr_y_min = ymin - newArr_y_min 306 | 307 | newArr_y_max = Arr_y[Arr_y > ymax] 308 | newArr_y_max = newArr_y_max - ymax 309 | 310 | # Z axis 311 | Arr_z = sys[3].values 312 | newArr_z_min = Arr_z[Arr_z < zmin] 313 | newArr_z_min = zmin - newArr_z_min 314 | 315 | newArr_z_max = Arr_z[Arr_z > zmax] 316 | newArr_z_max = newArr_z_max - zmax 317 | 318 | bound_value = ( 319 | bound_value 320 | + np.sum(newArr_x_min) 321 | + np.sum(newArr_x_max) 322 | + np.sum(newArr_y_min) 323 | + np.sum(newArr_y_max) 324 | + np.sum(newArr_z_min) 325 | + np.sum(newArr_z_max) 326 | ) 327 | 328 | return dis_value + bound_value 329 | 330 | 331 | # Rotate in X, Y and Z directions simultaneously 332 | def rotateXYZ(unit, theta3, theta2, theta1): 333 | th1 = theta1 * np.pi / 180.0 # Z-axis 334 | th2 = theta2 * np.pi / 180.0 # Y-axis 335 | th3 = theta3 * np.pi / 180.0 # X-axis 336 | Rot_matrix = np.array( 337 | [ 338 | [ 339 | np.cos(th1) * np.cos(th2), 340 | np.cos(th1) * np.sin(th2) * np.sin(th3) - np.sin(th1) * np.cos(th3), 341 | np.cos(th1) * np.sin(th2) * np.cos(th3) + np.sin(th1) * np.sin(th3), 342 | ], 343 | [ 344 | np.sin(th1) * np.cos(th2), 345 | np.sin(th1) * np.sin(th2) * np.sin(th3) + np.cos(th1) * np.cos(th3), 346 | np.sin(th1) * np.sin(th2) * np.cos(th3) - np.cos(th1) * np.sin(th3), 347 | ], 348 | [-np.sin(th2), np.cos(th2) * np.sin(th3), np.cos(th2) * np.cos(th3)], 349 | ] 350 | ) 351 | 352 | rot_XYZ = unit.loc[:, [1, 2, 3]].copy() 353 | rotated_unit = rot_XYZ.values.dot(Rot_matrix) 354 | newXYZ = pd.DataFrame(rotated_unit, columns=[1, 2, 3]) 355 | newXYZ.index = unit.index 356 | unit.loc[:, [1, 2, 3]] = newXYZ.loc[:, [1, 2, 3]] 357 | return unit 358 | 359 | 360 | # This function generates an input file for PACKMOL 361 | # INPUT: 362 | # OUTPUT: Write an input file for PACKMOL 363 | def gen_packmol_inp( 364 | OutDir_packmol, tolerance, XYZ_list, NMol_list, xmin, xmax, ymin, ymax, zmin, zmax 365 | ): 366 | with open(OutDir_packmol + "packmol.inp", 'w') as f: 367 | f.write( 368 | "tolerance " + str(tolerance) + "\n" 369 | ) # Minimum distance between any two molecule 370 | f.write("output " + OutDir_packmol + "packmol.pdb\n") 371 | f.write("filetype pdb\n\n") 372 | for mol in range(len(NMol_list)): 373 | f.write("structure " + XYZ_list[mol] + "\n") 374 | f.write(" number " + str(NMol_list[mol]) + "\n") 375 | f.write( 376 | " inside box " 377 | + str(xmin) 378 | + " " 379 | + str(ymin) 380 | + " " 381 | + str(zmin) 382 | + " " 383 | + str(xmax) 384 | + " " 385 | + str(ymax) 386 | + " " 387 | + str(zmax) 388 | + "\n" 389 | ) 390 | f.write("end structure\n\n") 391 | 392 | 393 | # Run packmol 394 | def run_packmol(bashCommand, output): 395 | f = open(output, "w") 396 | process = subprocess.Popen( 397 | bashCommand, stdout=f, shell=True 398 | ) # stdout=subprocess.PIPE 399 | output, error = process.communicate() 400 | return error 401 | 402 | 403 | # This function generates a xyz file 404 | # INPUT: Name of a output file and a DataFrame of element names and respective XYZ-coordinates 405 | # OUTPUT: Write a XYZ file 406 | def gen_sys_xyz(filename, unit): 407 | unit = unit[[0, 1, 2, 3]] 408 | with open(filename, 'w') as f: 409 | f.write(str(unit.values.shape[0])) # NUMBER OF ATOMS 410 | f.write("\n\n") # TWO SPACES 411 | unit.to_csv( 412 | f, sep=' ', index=False, header=False 413 | ) # XYZ COORDINATES OF NEW MOLECULE 414 | 415 | 416 | def move_molecules(sys, disx, disy, disz, theta1, theta2, theta3): 417 | df = pd.DataFrame() 418 | for i in set(sys.i.values): 419 | Mi = sys[sys['i'] == i] 420 | Mi = move_barycenter(Mi, [disx[i - 1], disy[i - 1], disz[i - 1]], False) 421 | Mi = rotateXYZ(Mi, theta1[i - 1], theta2[i - 1], theta3[i - 1]) 422 | df = pd.concat([df, Mi]) 423 | return df 424 | 425 | 426 | def gen_sys_vasp(filename, unit, xmin, xmax, ymin, ymax, zmin, zmax): 427 | unit = unit.sort_values(by=[0]) 428 | add_dis = 0.4 # This additional distance (in Ang) is added to avoid interaction near boundary 429 | file = open(filename, 'w+') 430 | file.write('### ' + 'POSCAR' + ' ###\n') 431 | file.write('1\n') 432 | a_vec = xmax - xmin + add_dis 433 | b_vec = ymax - ymin + add_dis 434 | c_vec = zmax - zmin + add_dis 435 | 436 | file.write(' ' + str(a_vec) + ' ' + str(0.0) + ' ' + str(0.0) + '\n') 437 | file.write(' ' + str(0.0) + ' ' + str(b_vec) + ' ' + str(0.0) + '\n') 438 | file.write(' ' + str(0.0) + ' ' + str(0.0) + ' ' + str(c_vec) + '\n') 439 | 440 | ele_list = [] 441 | count_ele_list = [] 442 | for element in sorted(set(unit[0].values)): 443 | ele_list.append(element) 444 | count_ele_list.append(list(unit[0].values).count(element)) 445 | 446 | for item in ele_list: 447 | file.write(str(item) + ' ') 448 | 449 | file.write('\n ') 450 | for item in count_ele_list: 451 | file.write(str(item) + ' ') 452 | 453 | file.write('\nCartesian\n') 454 | 455 | file.write(unit[[1, 2, 3]].to_string(header=False, index=False)) 456 | file.close() 457 | 458 | 459 | def gen_sys_data( 460 | filename, 461 | unit, 462 | packmol_bond, 463 | xmin, 464 | xmax, 465 | ymin, 466 | ymax, 467 | zmin, 468 | zmax, 469 | BondInfo, 470 | Inter_Mol_Dis=0.0, 471 | ): # lammps data file 472 | # move unit to the center of a box 473 | unit[[1, 2, 3]] = unit[[1, 2, 3]].astype(float) 474 | unit[1] = unit[1] - unit[1].min() + Inter_Mol_Dis / 2 475 | unit[2] = unit[2] - unit[2].min() + Inter_Mol_Dis / 2 476 | unit[3] = unit[3] - unit[3].min() + Inter_Mol_Dis / 2 477 | 478 | unit = unit.sort_values(by=[0]) 479 | new_atom_num = list(unit.index) 480 | 481 | unit_ele = unit.drop_duplicates(subset=0, keep="first").copy() 482 | 483 | # add_dis = 0.4 # This additional distance (in Ang) is added to avoid interaction near boundary 484 | file = open(filename, 'w+') 485 | file.write('### ' + '# LAMMPS data file written by PSP' + ' ###\n') 486 | file.write(str(unit.shape[0]) + ' atoms\n') 487 | if BondInfo is True: 488 | file.write(str(packmol_bond.shape[0]) + ' bonds\n') 489 | file.write(str(len(list(unit_ele[0].values))) + ' atom types\n') 490 | file.write(str(0.0) + ' ' + str(xmax - xmin) + ' xlo xhi\n') 491 | file.write(str(0.0) + ' ' + str(ymax - ymin) + ' ylo yhi\n') 492 | file.write(str(0.0) + ' ' + str(zmax - zmin) + ' zlo zhi\n\n') 493 | 494 | ele_list = [] 495 | ele_mass = [] 496 | ele_type = [] 497 | count = 1 498 | for index, row in unit_ele.iterrows(): 499 | ele_list.append(row[0]) 500 | ele_mass.append( 501 | Chem.GetPeriodicTable().GetAtomicWeight(row[0]) 502 | ) # Check error: Element not found 503 | ele_type.append(count) 504 | count += 1 505 | 506 | unit_ele['ele_type'] = ele_type 507 | ele_type_sys = [] 508 | for index, row in unit.iterrows(): 509 | ele_type_sys.append(unit_ele[unit_ele[0] == row[0]]['ele_type'].values[0]) 510 | 511 | file.write('Masses\n\n') 512 | count = 1 513 | for mass in ele_mass: 514 | file.write(str(count) + ' ' + str(mass) + '\n') 515 | count += 1 516 | 517 | SN = np.arange(1, unit.shape[0] + 1) 518 | unit['SN'] = SN 519 | unit['ele_type'] = ele_type_sys 520 | unit['charge'] = [0] * unit.shape[0] 521 | file.write('\nAtoms\n\n') 522 | file.write( 523 | unit[['SN', 'ele_type', 'charge', 1, 2, 3]].to_string(header=False, index=False) 524 | ) 525 | 526 | if BondInfo is True: 527 | file.write('\n\nBonds\n\n') 528 | 529 | packmol_bond_reorder = [] 530 | for index, row in packmol_bond.iterrows(): 531 | packmol_bond_reorder.append( 532 | [new_atom_num[int(row[2]) - 1], new_atom_num[int(row[3]) - 1]] 533 | ) 534 | 535 | packmol_bond_reorder = pd.DataFrame( 536 | packmol_bond_reorder, columns=['atm1', 'atm2'] 537 | ) 538 | packmol_bond_reorder['atm1'] += 1 539 | packmol_bond_reorder['atm2'] += 1 540 | packmol_bond_reorder['BO'] = packmol_bond[1] 541 | packmol_bond_reorder = packmol_bond_reorder.sort_values(by=['atm1']) 542 | packmol_bond_reorder['sl'] = packmol_bond[0].values 543 | 544 | file.write( 545 | packmol_bond_reorder[['sl', 'BO', 'atm1', 'atm2']].to_string( 546 | header=False, index=False 547 | ) 548 | ) 549 | file.close() 550 | 551 | 552 | def main_func(x, *args): 553 | arr_x = np.array_split(x, 6) 554 | disx = arr_x[0] 555 | disy = arr_x[1] 556 | disz = arr_x[2] 557 | theta1 = arr_x[3] 558 | theta2 = arr_x[4] 559 | theta3 = arr_x[5] 560 | sys = move_molecules(args[0], disx, disy, disz, theta1, theta2, theta3) 561 | return evaluate_obj( 562 | sys, args[1], args[2], args[3], args[4], args[5], args[6], args[7] 563 | ) 564 | 565 | 566 | def read_mol2_bond(mol2_file): 567 | list_bonds = [] 568 | with open(mol2_file, 'r') as f: 569 | dropped = dropwhile(lambda _line: "@BOND" not in _line, f) 570 | next(dropped, '') 571 | for line in dropped: 572 | list_bonds.append([line.split()[0]] + [line.split()[3]] + line.split()[1:3]) 573 | return pd.DataFrame(list_bonds) 574 | 575 | 576 | def read_mol2_xyz(mol2_file): 577 | list_xyz = [] 578 | with open(mol2_file) as f: 579 | for ln in takewhile( 580 | lambda x: "@BOND" not in x, 581 | islice(dropwhile(lambda x: "@ATOM" not in x, f), 1, None), 582 | ): 583 | list_xyz.append([ln.split()[5].split(".")[0]] + ln.split()[2:5]) 584 | return pd.DataFrame(list_xyz) 585 | 586 | 587 | # read in pdb file; please see the following link for details of pdb format 588 | # https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html 589 | def read_pdb_line(line): 590 | record_type = line[0:6] 591 | atom_serial_num = line[6:11] 592 | atom_name = line[12:16] 593 | residue_name = line[17:20] 594 | chain_identifier = line[21] 595 | residue_seq_num = line[22:26] 596 | x_coord = float(line[30:38]) 597 | y_coord = float(line[38:46]) 598 | z_coord = float(line[46:54]) 599 | element = line[76:78] 600 | return x_coord, y_coord, z_coord 601 | 602 | 603 | def read_lmps_header(lmp_file): 604 | f = open(lmp_file) 605 | lines = f.readlines() 606 | natoms = int(lines[2].split()[0]) 607 | nbonds = int(lines[3].split()[0]) 608 | nangles = int(lines[4].split()[0]) 609 | ndihedrals = int(lines[5].split()[0]) 610 | nimpropers = int(lines[6].split()[0]) 611 | 612 | parts = lines[8].split() 613 | if len(parts) >= 2 and parts[1] == 'atom': 614 | natom_types = int(parts[0]) 615 | else: 616 | natom_types = 0 617 | 618 | parts = lines[9].split() 619 | if len(parts) >= 2 and parts[1] == 'bond': 620 | nbond_types = int(parts[0]) 621 | else: 622 | nbond_types = 0 623 | 624 | parts = lines[10].split() 625 | if len(parts) >= 2 and parts[1] == 'angle': 626 | nangle_types = int(parts[0]) 627 | else: 628 | nangle_types = 0 629 | 630 | parts = lines[11].split() 631 | if len(parts) >= 2 and parts[1] == 'dihedral': 632 | ndihedral_types = int(parts[0]) 633 | else: 634 | ndihedral_types = 0 635 | 636 | parts = lines[12].split() 637 | if len(parts) >= 2 and parts[1] == 'improper': 638 | nimproper_types = int(parts[0]) 639 | else: 640 | nimproper_types = 0 641 | return ( 642 | natoms, 643 | nbonds, 644 | nangles, 645 | ndihedrals, 646 | nimpropers, 647 | natom_types, 648 | nbond_types, 649 | nangle_types, 650 | ndihedral_types, 651 | nimproper_types, 652 | ) 653 | 654 | 655 | # returns a 2D array of x, y, z coordinates (i.e. r[id][coordinate]) 656 | def get_coord_from_pdb(system_pdb_fname): 657 | skip_beginning = 5 # header lines of packmol.pdb 658 | atom_count = 0 # coutner for atom number 659 | r = np.zeros([1, 3], float) # 2D array of x, y, z coordinates, r[id][coordinate] 660 | 661 | # get all atom coordinates from the system/packmol pdb file 662 | with open(system_pdb_fname, 'r') as f: 663 | for skipped_frame in range(skip_beginning): 664 | f.readline() 665 | 666 | line = f.readline() 667 | x_coord, y_coord, z_coord = read_pdb_line(line) 668 | r[atom_count][0] = x_coord 669 | r[atom_count][1] = y_coord 670 | r[atom_count][2] = z_coord 671 | 672 | # if next line still returns x, y, z coordinates, allocate more memeory for the array 673 | while True: 674 | try: 675 | atom_count += 1 676 | line = f.readline() 677 | x_coord, y_coord, z_coord = read_pdb_line(line) 678 | r = np.concatenate((r, np.zeros([1, 3], float))) 679 | r[atom_count][0] = x_coord 680 | r[atom_count][1] = y_coord 681 | r[atom_count][2] = z_coord 682 | except Exception: 683 | break 684 | return r 685 | 686 | 687 | def write_lammps_ouput(lammps_output, r, box_size, system_stats, dicts): 688 | # These switcher dicts are for each section of the LAMMPS file that we will build 689 | ( 690 | atomconvertdicts, 691 | bondconvertdicts, 692 | angleconvertdicts, 693 | dihedralconvertdicts, 694 | improperconvertdicts, 695 | ) = ([] for i in range(5)) 696 | switcher_coeffs = { 697 | 'Pair Coeffs': [system_stats['total_atoms'], atomconvertdicts], 698 | 'Bond Coeffs': [system_stats['total_bonds'], bondconvertdicts], 699 | 'Angle Coeffs': [system_stats['total_angles'], angleconvertdicts], 700 | 'Dihedral Coeffs': [system_stats['total_dihedrals'], dihedralconvertdicts], 701 | 'Improper Coeffs': [system_stats['total_impropers'], improperconvertdicts], 702 | } 703 | switcher_main = { 704 | 'Bonds': [system_stats['total_bonds'], bondconvertdicts], 705 | 'Angles': [system_stats['total_angles'], angleconvertdicts], 706 | 'Dihedrals': [system_stats['total_dihedrals'], dihedralconvertdicts], 707 | 'Impropers': [system_stats['total_impropers'], improperconvertdicts], 708 | } 709 | 710 | # build the final LAMMPS output 711 | with open(lammps_output, 'wt') as out: 712 | # header section 713 | out.write('LAMMPS data file Created by PSP\n') 714 | out.write('\n') 715 | out.write('{:>12} atoms\n'.format(system_stats['total_atoms'])) 716 | out.write('{:>12} bonds\n'.format(system_stats['total_bonds'])) 717 | out.write('{:>12} angles\n'.format(system_stats['total_angles'])) 718 | out.write('{:>12} dihedrals\n'.format(system_stats['total_dihedrals'])) 719 | out.write('{:>12} impropers\n'.format(system_stats['total_impropers'])) 720 | out.write('\n') 721 | out.write('{:>12} atom types\n'.format(system_stats['total_atom_types'])) 722 | out.write('{:>12} bond types\n'.format(system_stats['total_bond_types'])) 723 | out.write('{:>12} angle types\n'.format(system_stats['total_angle_types'])) 724 | out.write( 725 | '{:>12} dihedral types\n'.format(system_stats['total_dihedral_types']) 726 | ) 727 | out.write( 728 | '{:>12} improper types\n'.format(system_stats['total_improper_types']) 729 | ) 730 | out.write('\n') 731 | out.write('{:>12} {:>12} xlo xhi\n'.format(box_size[0], box_size[1])) 732 | out.write('{:>12} {:>12} ylo yhi\n'.format(box_size[2], box_size[3])) 733 | out.write('{:>12} {:>12} zlo zhi\n'.format(box_size[4], box_size[5])) 734 | out.write('\n') 735 | 736 | # Masses section 737 | out.write('Masses\n') 738 | out.write('\n') 739 | counter = 0 740 | for dic in dicts: 741 | for fields in dic.get('Masses'): 742 | counter += 1 743 | parts = ' '.join(['%s' % (i,) for i in fields[1:]]) 744 | out.write('{:>12} {:<}\n'.format(counter, parts)) 745 | out.write('\n') 746 | 747 | # Pair, Bond, Angle, Dihedral, and Improper Coeffs sections 748 | for coeff_type in switcher_coeffs: 749 | if switcher_coeffs.get(coeff_type)[0] == 0: 750 | continue 751 | out.write('{}\n'.format(coeff_type)) 752 | out.write('\n') 753 | counter = 0 754 | for dic in dicts: 755 | convertdict = {} 756 | for fields in dic.get(coeff_type): 757 | counter += 1 758 | convertdict[fields[0]] = counter 759 | parts = ' '.join(['%s' % (i,) for i in fields[1:]]) 760 | out.write('{:>12} {:<}\n'.format(counter, parts)) 761 | switcher_coeffs.get(coeff_type)[1].append(convertdict) 762 | out.write('\n') 763 | 764 | # Atom section 765 | out.write('Atoms\n') 766 | out.write('\n') 767 | atom_counter = 0 768 | chain_counter = 0 769 | for index, dic in enumerate(dicts): 770 | for num in range(dic.get('Num')): 771 | chain_counter += 1 772 | for fields in dic.get('Atoms'): 773 | atom_counter += 1 774 | new_x = r[atom_counter - 1][0] 775 | new_y = r[atom_counter - 1][1] 776 | new_z = r[atom_counter - 1][2] 777 | new_atomtype = atomconvertdicts[index][fields[2]] 778 | out.write( 779 | '{:>8} {:>7} {:>3} {:>12} {:>10} {:>10} {:>10}\n'.format( 780 | atom_counter, 781 | chain_counter, 782 | new_atomtype, 783 | fields[3], 784 | new_x, 785 | new_y, 786 | new_z, 787 | ) 788 | ) 789 | out.write('\n') 790 | 791 | # Bond, Angle, Dihedral, and Improper sections 792 | for section_type in switcher_main: 793 | if switcher_main.get(section_type)[0] == 0: 794 | continue 795 | out.write('{}\n'.format(section_type)) 796 | out.write('\n') 797 | atom_counter = 0 798 | type_counter = 0 799 | for index, dic in enumerate(dicts): 800 | for num in range(dic.get('Num')): 801 | for fields in dic.get(section_type): 802 | new_id = int(fields[0]) + type_counter 803 | section_convertdicts = switcher_main.get(section_type)[1] 804 | new_type = section_convertdicts[index][fields[1]] 805 | new_atom1 = int(fields[2]) + atom_counter 806 | new_atom2 = int(fields[3]) + atom_counter 807 | out.write( 808 | '{:>8} {:>8} {:>6} {:>6}'.format( 809 | new_id, new_type, new_atom1, new_atom2 810 | ) 811 | ) 812 | if not section_type == 'Bonds': 813 | new_atom3 = int(fields[4]) + atom_counter 814 | out.write(' {:>6}'.format(new_atom3)) 815 | if not section_type == 'Angles': 816 | new_atom4 = int(fields[5]) + atom_counter 817 | out.write(' {:>6}'.format(new_atom4)) 818 | out.write('\n') 819 | atom_counter += len(dic.get('Atoms')) 820 | type_counter += len(dic.get(section_type)) 821 | out.write('\n') 822 | 823 | 824 | def get_type_from_antechamber( 825 | s, mol2_file, types='gaff2', f=None, am1bcc_charges=False, swap_dict=None, cleanup=True 826 | ): 827 | import os 828 | import glob 829 | 830 | ANTECHAMBER_EXEC = os.environ.get('ANTECHAMBER_EXEC') 831 | temp_ac_fname = 'temp.ac' 832 | temp_pdb_fname = None 833 | try: 834 | command = '{} -fi mol2 -i {} -fo ac -o {} -at {}'.format(ANTECHAMBER_EXEC, mol2_file, temp_ac_fname, types) 835 | if am1bcc_charges: 836 | command += ' -c bcc' 837 | subprocess.call(command, shell=True) 838 | fr = open(temp_ac_fname, "r") 839 | except BaseException: 840 | print('Error running Antechamber with the mol2 file, switch to using pdb file.') 841 | temp_pdb_fname = 'temp.pdb' 842 | s.write_pdb(temp_pdb_fname) 843 | command = '{} -fi pdb -i {} -fo ac -o {} -at {}'.format(ANTECHAMBER_EXEC, temp_pdb_fname, temp_ac_fname, types) 844 | if am1bcc_charges: 845 | command += ' -c bcc' 846 | subprocess.call(command, shell=True) 847 | fr = open(temp_ac_fname, "r") 848 | fr.readline() 849 | fr.readline() 850 | line = fr.readline() 851 | while line.split()[0] == 'ATOM': 852 | tag = int(line.split()[1]) 853 | type_name = line.split()[-1] 854 | if am1bcc_charges: 855 | charge = float(line.split()[-2]) 856 | s.particles[tag].charge = charge 857 | if swap_dict: 858 | for key in swap_dict: 859 | if type_name == key: 860 | type_name = swap_dict[key] 861 | if s.particle_types.get(type_name): 862 | s.particles[tag].type = s.particle_types.get(type_name)[0] 863 | elif f: 864 | pt = f.particle_types.get(type_name) 865 | if pt: 866 | s.particles[tag].type = s.particle_types.add(pt[0].copy()) 867 | else: 868 | print('cannot find type {} in system or forcefield'.format(type_name)) 869 | line = fr.readline() 870 | fr.close() 871 | 872 | if cleanup: 873 | fnames = ['ATOMTYPE.INF', temp_ac_fname] 874 | fnames += glob.glob('ANTECHAMBER*') 875 | if temp_pdb_fname: 876 | fnames += [temp_pdb_fname] 877 | for fname in fnames: 878 | try: 879 | os.remove(fname) 880 | except Exception: 881 | print('problem removing {} during cleanup'.format(fname)) 882 | -------------------------------------------------------------------------------- /psp/MoleculeBuilder.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import psp.PSP_lib as bd 4 | import os 5 | import shutil 6 | import time 7 | import multiprocessing 8 | from joblib import Parallel, delayed 9 | import psp.output_lib as lib 10 | from tqdm import tqdm 11 | 12 | 13 | class Builder: 14 | def __init__( 15 | self, 16 | Dataframe, 17 | NCores=0, 18 | ID_col='ID', 19 | SMILES_col='smiles', 20 | LeftCap='LeftCap', 21 | RightCap='RightCap', 22 | OutDir='molecules', 23 | Inter_Mol_Dis=6, 24 | Length=[1], 25 | NumConf=1, 26 | Loop=False, 27 | IrrStruc=False, 28 | OPLS=False, 29 | GAFF2=False, 30 | GAFF2_atom_typing='pysimm', 31 | Subscript=False, 32 | ): 33 | self.ID_col = ID_col 34 | self.SMILES_col = SMILES_col 35 | self.LeftCap = LeftCap 36 | self.RightCap = RightCap 37 | self.OutDir = OutDir 38 | self.Dataframe = Dataframe 39 | self.NCores = NCores 40 | self.Inter_Mol_Dis = Inter_Mol_Dis 41 | self.Length = Length 42 | self.NumConf = NumConf 43 | self.Loop = Loop 44 | self.IrrStruc = IrrStruc 45 | self.OPLS = OPLS 46 | self.GAFF2 = GAFF2 47 | self.GAFF2_atom_typing = GAFF2_atom_typing 48 | self.Subscript = Subscript 49 | 50 | # list of molecules name and CORRECT/WRONG 51 | def Build(self): 52 | if self.Subscript is False: 53 | lib.print_psp_info() # Print PSP info 54 | lib.print_input("MoleculeBuilder", self.Dataframe) 55 | if self.OPLS is True: 56 | self.NCores = 1 57 | if self.NCores <= 0: 58 | ncore_print = 'All' 59 | else: 60 | ncore_print = self.NCores 61 | 62 | print( 63 | "\n", 64 | "Additional information: ", 65 | "\n", 66 | "Length of oligomers: ", 67 | self.Length, 68 | "\n", 69 | "Number of conformers: ", 70 | self.NumConf, 71 | "\n", 72 | "Loop model: ", 73 | self.Loop, 74 | "\n", 75 | "Run short MD simulation: ", 76 | self.IrrStruc, 77 | "\n", 78 | "Generate OPLS parameter file: ", 79 | self.OPLS, 80 | "\n", 81 | "Intermolecular distance in POSCAR: ", 82 | self.Inter_Mol_Dis, 83 | "\n", 84 | "Number of cores: ", 85 | ncore_print, 86 | "\n", 87 | "Output Directory: ", 88 | self.OutDir, 89 | "\n", 90 | ) 91 | 92 | # location of directory for VASP inputs (polymers) and build a directory 93 | out_dir = self.OutDir + '/' 94 | bd.build_dir(out_dir) 95 | 96 | # Directories 97 | # Working directory 98 | bd.build_dir('work_dir/') 99 | 100 | # location of input XYZ files 101 | xyz_in_dir = 'work_dir/xyz-in/' 102 | bd.build_dir(xyz_in_dir) 103 | 104 | start_1 = time.time() 105 | list_out_xyz = 'output_MB.csv' 106 | chk_tri = [] 107 | 108 | df = self.Dataframe.copy() 109 | df[self.ID_col] = df[self.ID_col].apply(str) 110 | 111 | if self.NCores == 0: 112 | self.NCores = multiprocessing.cpu_count() - 1 113 | 114 | if self.NCores == -1 or self.IrrStruc is True: 115 | NCores_opt = 0 116 | self.NCores = 1 117 | else: 118 | NCores_opt = 1 119 | print("\n 3D model building started...\n") 120 | result = Parallel(n_jobs=self.NCores)( 121 | delayed(bd.build_3D)( 122 | unit_name, 123 | df, 124 | self.ID_col, 125 | self.SMILES_col, 126 | self.LeftCap, 127 | self.RightCap, 128 | out_dir, 129 | self.Inter_Mol_Dis, 130 | self.Length, 131 | xyz_in_dir, 132 | self.NumConf, 133 | self.Loop, 134 | self.IrrStruc, 135 | self.OPLS, 136 | self.GAFF2, 137 | self.GAFF2_atom_typing, 138 | NCores_opt, 139 | ) 140 | for unit_name in tqdm(df[self.ID_col].values, desc='Building models ...',) 141 | ) 142 | 143 | for i in result: 144 | chk_tri.append([i[0], i[1], i[2]]) 145 | 146 | chk_tri = pd.DataFrame(chk_tri, columns=['ID', 'Result', 'SMILES']) 147 | chk_tri.to_csv(list_out_xyz) 148 | 149 | bd.del_tmp_files() 150 | 151 | # Delete work directory 152 | if os.path.isdir('work_dir/'): 153 | shutil.rmtree('work_dir/') 154 | 155 | end_1 = time.time() 156 | lib.print_out( 157 | chk_tri, "3D model", np.round((end_1 - start_1) / 60, 2), self.Subscript 158 | ) 159 | return chk_tri 160 | -------------------------------------------------------------------------------- /psp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ramprasad-Group/PSP/fa846bdd07b45461d5d747e5bd60b5ee80f13938/psp/__init__.py -------------------------------------------------------------------------------- /psp/output_lib.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | pd.set_option('display.max_rows', None) 5 | pd.set_option('display.max_columns', None) 6 | pd.set_option('display.width', 1000) 7 | pd.set_option('display.colheader_justify', 'center') 8 | pd.set_option('display.precision', 3) 9 | 10 | 11 | def print_psp_info(): 12 | print("") 13 | print( 14 | " --------- PPPPPP SSSSSS PPPPPP --------- " 15 | ) 16 | print( 17 | " ----------------- PP PP SS PP PP ----------------- " 18 | ) 19 | print( 20 | " ------------------------- PP PP SS PP PP ------------------------- " 21 | ) 22 | print( 23 | " -------------------------------- PPPPPP SSSSS PPPPPP -------------------------------- " 24 | ) 25 | print( 26 | " ------------------------- PP SS PP ------------------------- " 27 | ) 28 | print( 29 | " ----------------- PP SS PP ----------------- " 30 | ) 31 | print( 32 | " --------- PP SSSSSS PP --------- " 33 | ) 34 | print( 35 | " --------------------------------------------------------------------------------------------------- " 36 | ) 37 | version = '1.0.0' # pkg_resources.require("PolymerStructurePredictor")[0].version 38 | print( 39 | " ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** " 40 | ) 41 | print( 42 | " Polymer Structure Predictor (PSP) version = ", 43 | version, 44 | " ", 45 | ) 46 | print( 47 | " Developed at Ramprasad Group " 48 | ) 49 | print( 50 | " Materials Science and Engineering, Georgia Institute of Technology, Atlanta, US " 51 | ) 52 | print("") 53 | print( 54 | " Cite this work as: " 55 | ) 56 | print( 57 | " H. Sahu, K-H. Shen, J. H. Montoya, H. Tran, R. Ramprasad, PSP: A python toolkit " 58 | ) 59 | print( 60 | " for predicting 3D models of polymers, journal name, volume, page, 2022. " 61 | ) 62 | 63 | print( 64 | " ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** " 65 | ) 66 | print( 67 | " --------------------------------------------------------------------------------------------------- " 68 | ) 69 | 70 | 71 | def print_input(builder, input_file=pd.DataFrame()): 72 | print(" ", builder, " started...") 73 | if not input_file.empty: 74 | print( 75 | " ----------------------------------------------- INPUT --------------------------------------------- " 76 | ) 77 | input_file.index = np.arange(1, len(input_file) + 1) 78 | print(input_file.to_markdown()) 79 | 80 | 81 | def print_out(output_file, model_name, time, subscript=False): 82 | print("\n", model_name, "building completed.\n") 83 | if subscript is True or model_name == "Amorphous model": 84 | pass 85 | else: 86 | print( 87 | " ----------------------------------------------- OUTPUT -------------------------------------------- " 88 | ) 89 | if not output_file.empty: 90 | output_file.index = np.arange(1, len(output_file) + 1) 91 | print(output_file.to_markdown()) 92 | print("") 93 | if subscript is False: 94 | print(" Total run time (minutes): ", time) 95 | print( 96 | " ------------------------------------- PSP TERMINATED NORMALLY ------------------------------------- " 97 | ) 98 | else: 99 | print("", model_name, "building time (minutes): ", time) 100 | print("") 101 | -------------------------------------------------------------------------------- /psp/simulated_annealing.py: -------------------------------------------------------------------------------- 1 | import psp.PSP_lib as bd 2 | import numpy as np 3 | import pandas as pd 4 | import random 5 | import math 6 | from openbabel import openbabel as ob 7 | 8 | obConversion = ob.OBConversion() 9 | obConversion.SetInAndOutFormats("xyz", "xyz") 10 | ff = ob.OBForceField.FindForceField('UFF') 11 | mol = ob.OBMol() 12 | np.set_printoptions(precision=20) 13 | 14 | 15 | # define objective function 16 | def f( 17 | unit_name, 18 | sl, 19 | unit, 20 | bond, 21 | angle, 22 | neigh_atoms_info, 23 | xyz_tmp_dir, 24 | dum1, 25 | dum2, 26 | atom1, 27 | atom2, 28 | ): 29 | file_name, conf_unit, dis_dum1_dum2, ang_1st_2nd, penalty = bd.create_conformer( 30 | unit_name, 31 | sl, 32 | unit, 33 | bond, 34 | neigh_atoms_info, 35 | angle, 36 | xyz_tmp_dir, 37 | dum1, 38 | dum2, 39 | atom1, 40 | atom2, 41 | ) 42 | obConversion.ReadFile(mol, file_name) 43 | ff.Setup(mol) 44 | E_cost = ( 45 | ff.Energy() 46 | + ff.Energy() * (1 - (ang_1st_2nd / 180.0)) 47 | + ff.Energy() * penalty * 10 48 | ) 49 | return E_cost, conf_unit, file_name 50 | 51 | 52 | ###################################################### 53 | # Simulated Annealing 54 | ###################################################### 55 | def SA( 56 | unit_name, 57 | unit, 58 | bonds, 59 | angle, 60 | neigh_atoms_info, 61 | xyz_tmp_dir, 62 | dum1, 63 | dum2, 64 | atom1, 65 | atom2, 66 | Steps, 67 | Substeps, 68 | ): 69 | i1 = bonds.index.values 70 | i2 = angle 71 | 72 | # Start location 73 | x_start = [i1[0], i2[0]] 74 | # Number of cycles 75 | n = Steps 76 | # Number of trials per cycle 77 | m = Substeps 78 | # Number of accepted solutions 79 | na = 0.0 80 | # Probability of accepting worse solution at the start 81 | p1 = 0.3 82 | # Probability of accepting worse solution at the end 83 | p50 = 0.001 84 | # Initial temperature 85 | t1 = -1.0 / math.log(p1) 86 | # Final temperature 87 | t50 = -1.0 / math.log(p50) 88 | 89 | # Fractional reduction every cycle 90 | frac = (t50 / t1) ** (1.0 / (n - 1.0)) 91 | 92 | # Initialize x 93 | x = np.zeros((n + 1, 2)) 94 | 95 | x[0] = x_start 96 | 97 | results = [] 98 | 99 | xi = np.zeros(2) 100 | xi = x_start 101 | na = na + 1.0 102 | 103 | # Current best results so far 104 | xc = np.zeros(2) 105 | xc = x[0] 106 | fc, unit_new, file_name = f( 107 | unit_name, 108 | 0, 109 | unit, 110 | bonds.loc[0], 111 | 0.0, 112 | neigh_atoms_info, 113 | xyz_tmp_dir, 114 | dum1, 115 | dum2, 116 | atom1, 117 | atom2, 118 | ) 119 | fs = np.zeros(n + 1) 120 | fs[0] = fc 121 | results.append([0, fc, file_name]) 122 | 123 | # Current temperature 124 | t = t1 125 | # DeltaE Average 126 | DeltaE_avg = 0.0 127 | 128 | for i in range(n): 129 | for j in range(m): 130 | unit_prev = unit.copy() 131 | xi[0] = np.random.choice(i1) 132 | xi[1] = np.random.choice(i2) 133 | fc_new, unit, file_name = f( 134 | unit_name, 135 | i, 136 | unit, 137 | bonds.loc[xi[0]], 138 | xi[1], 139 | neigh_atoms_info, 140 | xyz_tmp_dir, 141 | dum1, 142 | dum2, 143 | atom1, 144 | atom2, 145 | ) 146 | DeltaE = abs(fc_new - fc) 147 | 148 | if fc_new > fc: 149 | # Initialize DeltaE_avg if a worse solution was found 150 | # on the first iteration 151 | if i == 0 and j == 0: 152 | DeltaE_avg = DeltaE 153 | 154 | # To avoid divide by ZERO add a small number to DeltaE_avg 155 | if DeltaE_avg == 0.0: 156 | DeltaE_avg = DeltaE_avg + 1.0e-13 157 | 158 | # objective function is worse 159 | # generate probability of acceptance 160 | p = math.exp(-DeltaE / (DeltaE_avg * t)) 161 | 162 | # determine whether to accept worse point 163 | if random.random() < p: 164 | # accept the worse solution 165 | accept = True 166 | else: 167 | # don't accept the worse solution 168 | accept = False 169 | else: 170 | # objective function is lower, automatically accept 171 | accept = True 172 | 173 | if accept is True: 174 | # update currently accepted solution 175 | xc[0] = xi[0] 176 | xc[1] = xi[1] 177 | fc = fc_new 178 | best_xyz = file_name 179 | # increment number of accepted solutions 180 | na = na + 1.0 181 | # update DeltaE_avg 182 | DeltaE_avg = (DeltaE_avg * (na - 1.0) + DeltaE) / na 183 | 184 | else: 185 | unit = unit_prev.copy() 186 | 187 | # Record the best x values at the end of every cycle 188 | x[i + 1][0] = xc[0] 189 | x[i + 1][1] = xc[1] 190 | try: 191 | results.append([i, fc, best_xyz]) 192 | except Exception: 193 | results.append([i, fc, 'XXX']) 194 | fs[i + 1] = fc 195 | 196 | if np.around(fs[i], decimals=15) == np.around( 197 | fs[i + 1], decimals=15 198 | ) and np.around(fs[i - 1], decimals=15) == np.around(fs[i + 1], decimals=15): 199 | break 200 | # Lower the temperature for next cycle 201 | t = frac * t 202 | results = pd.DataFrame(results, columns=['i', 'Energy+', 'xyzFile']) 203 | results = results[results['xyzFile'] != 'XXX'] 204 | results = results.drop_duplicates(subset='xyzFile', keep="last") 205 | return results 206 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | time 4 | os 5 | openbabel 6 | subprocess 7 | glob 8 | tqdm 9 | random 10 | mmap 11 | multiprocessing 12 | rdkit 13 | shutil 14 | joblib 15 | scipy 16 | itertools 17 | pkg_resources 18 | math 19 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [tool:pytest] 2 | addopts = --durations=30 --quiet 3 | filterwarnings = 4 | ignore::UserWarning 5 | ignore::RuntimeWarning 6 | 7 | [pycodestyle] 8 | count = True 9 | ignore = E121,E123,E126,E133,E226,E241,E242,E704,W503,W504,W505,E741,W605,W293,W291 10 | max-line-length = 120 11 | statistics = True 12 | exclude=*/tests/* 13 | 14 | [flake8] 15 | exclude = .git,__pycache__,docs_rst/conf.py,tests,__init__.py 16 | # max-complexity = 10 17 | extend-ignore = E741,W291 18 | max-line-length = 120 19 | 20 | [pydocstyle] 21 | ignore = D105,D2,D4 22 | match-dir=(?!(tests)).* 23 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | from subprocess import call 4 | 5 | # Test for openbabel/rdkit conda installs 6 | try: 7 | from openbabel import openbabel 8 | except ImportError: 9 | raise ModuleNotFoundError("openbabel not found, install openbabel via conda-forge.") 10 | 11 | try: 12 | import rdkit 13 | except ImportError: 14 | raise ModuleNotFoundError("rdkit not found, install openbabel via conda-forge.") 15 | 16 | # Get PATH for external software and write in .bashrc 17 | #HOME_DIR = os.environ.get('HOME') 18 | # PACKMOL 19 | #if os.getenv('PACKMOL_EXEC') is None: 20 | # print("Enter PATH for PACKMOL executable: ") 21 | # print("For example '/home/opt/packmol/packmol'") 22 | # packmol_exec = input("") 23 | # call("echo \# PACKMOL_PSP >> {}".format(os.path.join(HOME_DIR,'.bashrc')),shell=True) 24 | # call("echo export PACKMOL_EXEC={} >> {}".format(packmol_exec,os.path.join(HOME_DIR,'.bashrc')),shell=True) 25 | 26 | # ORCA 27 | #if os.getenv('ORCA_EXEC') is None: 28 | # print("Enter PATH for ORCA executable: ") 29 | # print("For example '/home/opt/orca_4_2/orca'") 30 | # orca_exec = input("") 31 | # call("echo \# ORCA_PSP >> {}".format(os.path.join(HOME_DIR,'.bashrc')),shell=True) 32 | # call("echo export ORCA_EXEC={} >> {}".format(orca_exec,os.path.join(HOME_DIR,'.bashrc')),shell=True) 33 | 34 | # OPENMPI 35 | #if os.getenv('OPENMPI_bin') is None: 36 | # print("Enter PATH for OPENMPI: ") 37 | # print("For example '/home/opt/openmpi-316'") 38 | # openmpi_path = input("") 39 | # call("echo \# OPENMPI_PSP >> {}".format(os.path.join(HOME_DIR,'.bashrc')),shell=True) 40 | # call("echo export OPENMPI_bin={} >> {}".format(os.path.join(openmpi_path,'bin'),os.path.join(HOME_DIR,'.bashrc')),shell=True) 41 | # call("echo export OPENMPI_lib={} >> {}".format(os.path.join(openmpi_path,'lib'),os.path.join(HOME_DIR,'.bashrc')),shell=True) 42 | 43 | 44 | # Read the contents of your README file 45 | PACKAGE_DIR = os.path.abspath(os.path.dirname(__file__)) 46 | with open(os.path.join(PACKAGE_DIR, 'README.md'), encoding='utf-8') as f: 47 | LONG_DESCRIPTION = f.read() 48 | 49 | setup(name='PolymerStructurePredictor', 50 | version='1.0.0', 51 | long_description=LONG_DESCRIPTION, 52 | long_description_content_type='text/markdown', 53 | description='Build single chains and crystal structures of polymers', 54 | keywords=['SMILES', 'polymer', 'single chain', 'crystal structure'], 55 | url='https://github.com/Ramprasad-Group/PSP', 56 | author='Harikrishna Sahu', 57 | author_email='harikrishnasahu89@gmail.com', 58 | classifiers=[ 59 | "Programming Language :: Python :: 3", 60 | "License :: OSI Approved :: MIT License", 61 | "Operating System :: OS Independent", 62 | ], 63 | #license='MIT', 64 | packages=find_packages(), 65 | install_requires=['scipy', 66 | 'pandas', 67 | 'joblib'], 68 | zip_safe=False 69 | ) 70 | -------------------------------------------------------------------------------- /test/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ramprasad-Group/PSP/fa846bdd07b45461d5d747e5bd60b5ee80f13938/test/.DS_Store -------------------------------------------------------------------------------- /test/AmorphousBuilder/amor_model.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import psp.AmorphousBuilder as ab 3 | 4 | input_df = pd.read_csv("input_amor.csv") 5 | amor = ab.Builder( 6 | input_df, 7 | ID_col="ID", 8 | SMILES_col="smiles", 9 | OutDir='amor_model', 10 | Length='Len', 11 | NumConf='NumConf', 12 | NumModel=1, 13 | LeftCap = "LeftCap", 14 | RightCap = "RightCap", 15 | Loop='Loop', 16 | density=0.85, 17 | box_type='c', 18 | ) 19 | amor.Build() 20 | -------------------------------------------------------------------------------- /test/AmorphousBuilder/amor_model_gaff2.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import psp.AmorphousBuilder as ab 3 | 4 | input_df = pd.read_csv("input_amor.csv") 5 | amor = ab.Builder( 6 | input_df, 7 | ID_col="ID", 8 | SMILES_col="smiles", 9 | Length='Len', 10 | NumConf='NumConf', 11 | LeftCap = "LeftCap", 12 | RightCap = "RightCap", 13 | Loop='Loop', 14 | density=0.85, 15 | box_type='c', 16 | BondInfo=False 17 | ) 18 | amor.Build() 19 | 20 | # Default get_gaff2() uses Pysimm for atom typing 21 | amor.get_gaff2(output_fname='amor_gaff2.lmps') 22 | 23 | ''' 24 | [ADVANCED] If Ambertools is installed, and antechamber is in the PATH 25 | (e.g. export ANTECHAMBER_EXEC=~/.conda/envs/AmberTools21/bin/antechamber), 26 | atom typing can also be done using antechamber by specifying atom_typing='antechamber'. 27 | In addition, atom types can be swapped manually by specifying the swap_dict 28 | (e.g. swap_dict={'ns': 'n'}). 29 | 30 | A representative example is as below: 31 | amor.get_gaff2(output_fname='amor_gaff2.lmps', atom_typing='antechamber', swap_dict={'ns': 'n'}) 32 | ''' 33 | -------------------------------------------------------------------------------- /test/AmorphousBuilder/amor_model_opls.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import psp.AmorphousBuilder as ab 3 | 4 | input_df = pd.read_csv("input_amor.csv") 5 | amor = ab.Builder( 6 | input_df, 7 | ID_col="ID", 8 | SMILES_col="smiles", 9 | Length='Len', 10 | NumConf='NumConf', 11 | LeftCap = "LeftCap", 12 | RightCap = "RightCap", 13 | Loop='Loop', 14 | density=0.85, 15 | box_type='c', 16 | BondInfo=False 17 | ) 18 | amor.Build() 19 | amor.get_opls(output_fname='amor_opls.lmps') 20 | -------------------------------------------------------------------------------- /test/AmorphousBuilder/input_PE.csv: -------------------------------------------------------------------------------- 1 | ID,smiles,Len,Num,NumConf,Loop 2 | CC25,[*]CC[*],20,80,1,False 3 | -------------------------------------------------------------------------------- /test/AmorphousBuilder/input_amor.csv: -------------------------------------------------------------------------------- 1 | ID,smiles,Len,Num,NumConf,Loop 2 | PVC3,C(C([*])Cl)[*],3,8,2,False 3 | PVC5,C(C([*])Cl)[*],5,4,2,False 4 | cc5,[*]CC[*],5,8,2,False 5 | -------------------------------------------------------------------------------- /test/ChainBuilder/chain_model.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import psp.ChainBuilder as ChB 3 | 4 | df_smiles = pd.read_csv("input_chain.csv") 5 | chain_builder = ChB.Builder( 6 | Dataframe=df_smiles, 7 | ID_col="PID", 8 | SMILES_col="smiles_polymer", 9 | NumConf=1, 10 | Length=["n", 5], 11 | Steps=100, 12 | Substeps=20, 13 | Method="SA", 14 | NCores=1, 15 | OutDir='chains', 16 | Tol_ChainCorr=50, 17 | Inter_Chain_Dis=12, 18 | ) 19 | results = chain_builder.BuildChain() 20 | -------------------------------------------------------------------------------- /test/ChainBuilder/input_chain.csv: -------------------------------------------------------------------------------- 1 | PID,smiles_polymer 2 | PE,[*]CC[*] 3 | PVC,C(C(CC([*])Cl)Cl)[*] 4 | ABPBO,c1c2c(cc(c1)c1oc3c(n1)cc(cc3)[*])nc(o2)[*] 5 | PVDF,C(C(F)(F)[*])[*] 6 | PAN,C(CC(C[*])C#N)([*])C#N 7 | PPS,C1=CC(=CC=C1SC2=CC=C(C=C2)S[*])[*] 8 | -------------------------------------------------------------------------------- /test/CrystalBuilder/crystal_model.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import glob 3 | import psp.ChainBuilder as ChB 4 | import psp.CrystalBuilder as CrB 5 | 6 | df_smiles = pd.read_csv("input_chain.csv") 7 | 8 | chain_builder = ChB.Builder( 9 | Dataframe=df_smiles, 10 | ID_col="PID", 11 | SMILES_col="smiles_polymer", 12 | NumConf=1, 13 | Length=['n',5], 14 | Steps=20, 15 | Substeps=20, 16 | Method="SA", 17 | NCores=1, 18 | OutDir='chains', 19 | Tol_ChainCorr=50, 20 | ) 21 | results = chain_builder.BuildChain() 22 | 23 | ID = "PVC" 24 | vasp_input_list = glob.glob("chains/" + ID + "/" + "*.vasp") 25 | crystal_builder = CrB.Builder( 26 | VaspInp_list=vasp_input_list, 27 | NSamples=10, 28 | InputRadius="auto", 29 | MinAtomicDis=2.0, 30 | Polymer=True, 31 | Optimize=False, 32 | NCores=1, 33 | ) 34 | results = crystal_builder.BuildCrystal() 35 | -------------------------------------------------------------------------------- /test/CrystalBuilder/input_chain.csv: -------------------------------------------------------------------------------- 1 | PID,smiles_polymer 2 | PVC,C(C([*])Cl)[*] 3 | -------------------------------------------------------------------------------- /test/MoleculeBuilder/circular_oligomer.csv: -------------------------------------------------------------------------------- 1 | ID,smiles 2 | PE,[*]CC[*] 3 | PVC,C(C([*])Cl)[*] 4 | -------------------------------------------------------------------------------- /test/MoleculeBuilder/linear_oligomer.csv: -------------------------------------------------------------------------------- 1 | ID,smiles 2 | Mol1,C13C=CC(C2C=CC1N=C2OC)N=C3OC 3 | PE,[*]CC[*] 4 | PVC,C(C([*])Cl)[*] 5 | -------------------------------------------------------------------------------- /test/MoleculeBuilder/linear_oligomer_with_endcaps.csv: -------------------------------------------------------------------------------- 1 | ID,smiles,LeftCap,RightCap 2 | Mol1,C13C=CC(C2C=CC1N=C2OC)N=C3OC,, 3 | PE,[*]CC[*],C(Cl)(Cl)(Cl)[*],C(F)(F)(F)[*] 4 | PVC,C(C([*])Cl)[*],C(Cl)(Cl)(Cl)[*],C(F)(F)(F)[*] 5 | -------------------------------------------------------------------------------- /test/MoleculeBuilder/molecule_model.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import psp.MoleculeBuilder as mb 3 | 4 | df_smiles = pd.read_csv("linear_oligomer.csv") 5 | 6 | mol = mb.Builder( 7 | df_smiles, 8 | ID_col="ID", 9 | SMILES_col="smiles", 10 | LeftCap = "LeftCap", 11 | RightCap ='RightCap', 12 | OutDir='models', 13 | Inter_Mol_Dis=6, 14 | Length=[1,3], 15 | NumConf=1, 16 | Loop=False, 17 | NCores=1, 18 | IrrStruc=False, 19 | OPLS=False, 20 | GAFF2=True, 21 | GAFF2_atom_typing='pysimm' 22 | ) 23 | results = mol.Build() 24 | -------------------------------------------------------------------------------- /test/chain.csv: -------------------------------------------------------------------------------- 1 | PID,smiles_polymer 2 | PE,[*]CC[*] 3 | PVC,C(C([*])Cl)[*] 4 | PVC2,C(C(CC([*])Cl)Cl)[*] 5 | ABPBO,c1c2c(cc(c1)c1oc3c(n1)cc(cc3)[*])nc(o2)[*] 6 | beta-PVDF,C(C(F)(F)[*])[*] 7 | delta-PVDF,C(C[*])(CC([*])(F)F)(F)F 8 | PAN,C(CC(C[*])C#N)([*])C#N 9 | PPS,C1=CC(=CC=C1SC2=CC=C(C=C2)S[*])[*] 10 | -------------------------------------------------------------------------------- /test/test.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import unittest 3 | import glob 4 | import os 5 | import psp.ChainBuilder as ChB 6 | import psp.CrystalBuilder as CrB 7 | 8 | TEST_DIR = os.path.abspath(os.path.dirname(__file__)) 9 | 10 | class PspGeneralTest(unittest.TestCase): 11 | def test_crystal_build(self): 12 | df_smiles = pd.read_csv( 13 | os.path.join(TEST_DIR, "chain.csv"), low_memory=False 14 | ) # fingerprinted data 15 | 16 | chain_builder = ChB.Builder( 17 | Dataframe=df_smiles, 18 | ID_col="PID", 19 | SMILES_col="smiles_polymer", 20 | Length=["n"], 21 | Steps=25, 22 | Substeps=10, 23 | MonomerAng="medium", 24 | DimerAng="medium", 25 | Method="SA", 26 | OutDir="chains", 27 | ) 28 | results = chain_builder.BuildChain() 29 | print(results) 30 | ID = "PVC2" 31 | vasp_input_list = glob.glob("chains/" + ID + "/" + "*.vasp") 32 | crystal_builder = CrB.Builder( 33 | VaspInp_list=vasp_input_list, 34 | NSamples=5, 35 | InputRadius="auto", 36 | MinAtomicDis=2.0, 37 | OutDir="crystals", 38 | ) 39 | results = crystal_builder.BuildCrystal() 40 | self.assertIsNotNone(results) 41 | print(results) 42 | --------------------------------------------------------------------------------