├── .github
    └── workflows
    │   ├── lint.yml
    │   └── test.yml
├── .gitignore
├── .travis.yml
├── Colab_notebook
    ├── psp_Colab_notebook.ipynb
    └── psp_colab_notebook.py
├── Dockerfile
├── LICENSE
├── LigParGenPSP
    ├── BOSS2LAMMPS.py
    ├── BOSSReader.py
    ├── Converter.py
    ├── CreatZmat.py
    ├── README
    ├── Vector_algebra.py
    ├── __init__.py
    ├── fepzmat.py
    └── mol_boss.py
├── README.md
├── documentation
    └── PSP_user_manual.pdf
├── psp
    ├── AmorphousBuilder.py
    ├── ChainBuilder.py
    ├── CrystalBuilder.py
    ├── MD_lib.py
    ├── MoleculeBuilder.py
    ├── PSP_lib.py
    ├── __init__.py
    ├── output_lib.py
    └── simulated_annealing.py
├── requirements.txt
├── setup.cfg
├── setup.py
└── test
    ├── .DS_Store
    ├── AmorphousBuilder
        ├── amor_model.py
        ├── amor_model_gaff2.py
        ├── amor_model_opls.py
        ├── input_PE.csv
        └── input_amor.csv
    ├── ChainBuilder
        ├── chain_model.py
        └── input_chain.csv
    ├── CrystalBuilder
        ├── crystal_model.py
        └── input_chain.csv
    ├── MoleculeBuilder
        ├── circular_oligomer.csv
        ├── linear_oligomer.csv
        ├── linear_oligomer_with_endcaps.csv
        └── molecule_model.py
    ├── chain.csv
    └── test.py


/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: Linting
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   lint:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       max-parallel: 4
10 |       matrix:
11 |         python-version: [3.7]
12 | 
13 |     steps:
14 |     - uses: actions/checkout@v2
15 |       with:
16 |         fetch-depth: 0
17 |     - name: Set up Python ${{ matrix.python-version }}
18 |       uses: actions/setup-python@v2
19 |       with:
20 |         python-version: ${{ matrix.python-version }}
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |     - name: pycodestyle
25 |       run: |
26 |         pip install pycodestyle --upgrade --quiet
27 |         pycodestyle psp
28 |     - name: flake8
29 |       run: |
30 |         pip install flake8 --upgrade --quiet
31 |         flake8 --extend-ignore=F841 --count --show-source --statistics psp
32 |         # exit-zero treats all errors as warnings.
33 |         flake8 --extend-ignore=F841 --count --exit-zero --max-complexity=20 --statistics psp
34 |     # Note: enable this when docstrings are ready
35 |     # - name: pydocstyle
36 |     #   run: |
37 |     #     pip install pydocstyle --upgrade --quiet
38 |     #     pydocstyle --count psp
39 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Testing - main
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | 
 6 | jobs:
 7 |   test:
 8 |     name: Testing (${{ matrix.python-version }}, ${{ matrix.os }})
 9 |     runs-on: ${{ matrix.os }}
10 |     strategy:
11 |       fail-fast: true
12 |       matrix:
13 |         os: [
14 |           "ubuntu-latest",
15 |           #"macos-latest",
16 |           # "windows-latest"
17 |         ]
18 |         python-version: ["3.7"]
19 |     steps:
20 |       - uses: actions/checkout@v2
21 |       - uses: conda-incubator/setup-miniconda@v2
22 |         with:
23 |           auto-update-conda: true
24 |           python-version: ${{ matrix.python-version }}
25 |       - name: Install dependencies
26 |         shell: bash -l {0}
27 |         run: |
28 |           conda install -c conda-forge rdkit openbabel==3.1.1
29 |           conda install -c conda-forge tqdm
30 |           conda install -c conda-forge tabulate
31 |           #git clone https://github.com/polysimtools/pysimm
32 |           #sudo python pysimm/complete_install.py --pysimm $PWD
33 |           #source ~/.bashrc
34 |           python -m pip install --upgrade pip
35 |           pip install networkx
36 |           pip install -e .
37 |       - name: pytest
38 |         shell: bash -l {0}
39 |         run: |
40 |           pip install pytest pytest-cov
41 |           pytest test/test.py --color=yes --cov=psp --cov-report html:coverage_reports
42 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # Jupyter Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # SageMath parsed files
 79 | *.sage.py
 80 | 
 81 | # dotenv
 82 | .env
 83 | 
 84 | # virtualenv
 85 | .venv
 86 | venv/
 87 | ENV/
 88 | 
 89 | # Spyder project settings
 90 | .spyderproject
 91 | .spyproject
 92 | 
 93 | # Rope project settings
 94 | .ropeproject
 95 | 
 96 | # mkdocs documentation
 97 | /site
 98 | 
 99 | # mypy
100 | .mypy_cache/
101 | 
102 | # IntelliJ environment files
103 | .idea
104 | 
105 | # DS Store
106 | .DS_store
107 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 |   - "3.7"
4 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use the official Ubuntu image as a parent image
 2 | FROM ubuntu:latest
 3 | 
 4 | # Avoid interactive dialog during package installations
 5 | ARG DEBIAN_FRONTEND=noninteractive
 6 | 
 7 | # Install necessary packages
 8 | RUN apt-get update -y && \
 9 |     apt-get install -y vim wget git
10 | 
11 | # Set environment variables
12 | ENV CONDA_HOME=/opt/conda
13 | ENV PATH=$CONDA_HOME/bin:$PATH
14 | 
15 | # Install Miniconda
16 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
17 |     bash miniconda.sh -b -p $CONDA_HOME && \
18 |     rm miniconda.sh
19 | 
20 | # Set the working directory
21 | WORKDIR /opt
22 | 
23 | # Create a Conda environment and install dependencies
24 | RUN /opt/conda/bin/conda create -n myenv -y python=3.8
25 | RUN /opt/conda/bin/conda init bash
26 | RUN echo "conda activate myenv" >> ~/.bashrc
27 | ENV PATH=$CONDA_HOME/envs/myenv/bin:$PATH
28 | 
29 | # Install additional Python packages directly
30 | RUN conda install -n myenv -y -c anaconda scipy=1.7 pandas=1.5 'numpy<1.23.0'
31 | RUN /opt/conda/envs/myenv/bin/pip install rdkit
32 | RUN conda install -n myenv -y -c conda-forge openbabel
33 | RUN conda install -n myenv -y anaconda::networkx anaconda::tqdm anaconda::tabulate
34 | 
35 | ## Install packmol
36 | # Clone Packmol repository
37 | RUN apt-get install -y build-essential gfortran
38 | RUN git clone https://github.com/m3g/packmol.git /opt/packmol
39 | WORKDIR /opt/packmol
40 | RUN make
41 | 
42 | # Set the PACKMOL_EXEC environment variable
43 | ENV PACKMOL_EXEC=/opt/packmol/packmol
44 | 
45 | ## Install pysimm
46 | WORKDIR /opt
47 | RUN git clone -b 1.1 --single-branch https://github.com/polysimtools/pysimm 
48 | # Set up PYTHONPATH
49 | ENV PYTHONPATH=$PYTHONPATH:/opt/pysimm
50 | # Set up PATH
51 | ENV PATH=$PATH:/opt/pysimm/bin
52 | 
53 | ## Install ambertools
54 | RUN conda install -n myenv -y -c conda-forge ambertools
55 | ENV ANTECHAMBER_EXEC=/opt/conda/envs/myenv/bin/antechamber
56 | 
57 | ## Install PSP
58 | RUN git clone https://github.com/Ramprasad-Group/PSP.git
59 | WORKDIR /opt/PSP
60 | RUN /opt/conda/envs/myenv/bin/python setup.py install
61 | 
62 | # Set up default Python to /opt/conda/envs/myenv/bin/python
63 | RUN echo 'export PATH=/opt/conda/envs/myenv/bin:$PATH' >> /etc/profile.d/python.sh && \
64 |     echo 'alias python=/opt/conda/envs/myenv/bin/python' >> /etc/profile.d/python.sh
65 | 
66 | # Set HOME as working directory
67 | WORKDIR /root
68 | 
69 | # Copy test files to /root
70 | RUN cp -r /opt/PSP/test/ /root/
71 | 
72 | # Set the default command to run your application
73 | CMD ["bash"]
74 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Ramprasad Group, Georgia Tech, USA
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/LigParGenPSP/BOSS2LAMMPS.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SCRIPT TO  WRITE LAMMPS DATA FILES LMP & INP Files
  3 | FROM BOSS ZMATRIX
  4 | Created on Mon Sep 30 03:31:05 2017
  5 | @author: Leela S. Dodda leela.dodda@yale.edu
  6 | @author: William L. Jorgensen Lab
  7 | 
  8 | REQUIREMENTS:
  9 | BOSS (need to set BOSSdir in bashrc and cshrc)
 10 | Preferably Anaconda python with following modules
 11 | pandas
 12 | argparse
 13 | numpy
 14 | """
 15 | 
 16 | from LigParGenPSP.BOSSReader import bossPdbAtom2Element, ucomb
 17 | from LigParGenPSP.BOSSReader import bossElement2Mass, tor_cent
 18 | import pickle
 19 | import pandas as pd
 20 | import numpy as np
 21 | 
 22 | 
 23 | def Boss2LammpsLMP(resid, num2typ2symb, Qs, bnd_df, ang_df, tor_df, molecule_data):
 24 |     xyz_df = molecule_data.MolData["XYZ"]
 25 |     max_mol_size = 50
 26 |     prm = open(resid + ".lmp", "w+")
 27 |     prm.write("LAMMPS data file Created by - (Written by Leela S. Dodda)\n\n")
 28 |     prm.write("%8d atoms\n" % len(Qs))
 29 |     prm.write("%8d bonds\n" % len(bnd_df.KIJ))
 30 |     prm.write("%8d angles\n" % len(ang_df.K))
 31 |     prm.write("%8d dihedrals\n" % len(tor_df[tor_df.TY == "Proper"].index))
 32 |     prm.write("%8d impropers\n \n" % len(tor_df[tor_df.TY == "Improper"].index))
 33 |     prm.write("%8d atom types\n" % len(Qs))
 34 |     prm.write("%8d bond types\n" % len(bnd_df.KIJ))
 35 |     prm.write("%8d angle types\n" % len(ang_df.K))
 36 |     prm.write("%8d dihedral types\n" % len(tor_df[tor_df.TY == "Proper"].index))
 37 |     prm.write("%8d improper types\n \n" % len(tor_df[tor_df.TY == "Improper"].index))
 38 |     prm.write(
 39 |         "%12.6f %12.6f xlo xhi\n" % (xyz_df.X.min(), xyz_df.X.min() + max_mol_size)
 40 |     )
 41 |     prm.write(
 42 |         "%12.6f %12.6f ylo yhi\n" % (xyz_df.Y.min(), xyz_df.Y.min() + max_mol_size)
 43 |     )
 44 |     prm.write(
 45 |         "%12.6f %12.6f zlo zhi\n" % (xyz_df.Z.min(), xyz_df.Z.min() + max_mol_size)
 46 |     )
 47 |     # Printing Parameters for ALL BONDS/ANGLES/DIHEDRALS/IMPROPERS/Q/LJ #######
 48 |     prm.write("\nMasses\n\n")
 49 |     for i in range(len(Qs)):
 50 |         prm.write("%8d %10.3f  \n" % (i + 1, float(num2typ2symb[i][4])))
 51 |     prm.write("\nPair Coeffs \n\n")
 52 |     for i in range(len(Qs)):
 53 |         prm.write("%8d%11.3f%11.7f \n" % (i + 1, float(Qs[i][3]), float(Qs[i][2])))
 54 |     prm.write("\nBond Coeffs \n\n")
 55 |     for i in bnd_df.index:
 56 |         prm.write("%8d%11.4f%11.4f \n" % (i + 1, bnd_df.KIJ[i], bnd_df.RIJ[i]))
 57 |     prm.write("\nAngle Coeffs \n\n")
 58 |     for i in ang_df.index:
 59 |         prm.write("%8d%11.3f%11.3f\n" % (i + 1, ang_df.K[i], ang_df.R[i]))
 60 |     dihedral_df = tor_df[tor_df.TY == "Proper"]
 61 |     dihedral_df.index = range(len(dihedral_df.V1))
 62 |     prm.write("\nDihedral Coeffs  \n\n")
 63 |     for i, row in dihedral_df.iterrows():
 64 |         prm.write(
 65 |             "%8d%11.3f%11.3f%11.3f%11.3f \n" % (i + 1, row.V1, row.V2, row.V3, row.V4)
 66 |         )
 67 |     bndlist = list(bnd_df.UR) + (list(bnd_df.UR))
 68 |     improper_df = tor_df[tor_df.TY == "Improper"]
 69 |     improper_df.index = range(len(improper_df.V2))
 70 |     if len(improper_df.index) > 0:
 71 |         prm.write("\nImproper Coeffs \n\n")
 72 |         for i, row in improper_df.iterrows():
 73 |             prm.write("%8d%11.3f%8d%8d \n" % (i + 1, row.V2 * 0.5, -1, 2))
 74 |     # Printing EXPLICITLY ALL BONDS/ANGLES/DIHEDRALS/IMPROPERS/Q/LJ #######
 75 |     prm.write("\nAtoms \n\n")
 76 |     for i in range(len(xyz_df.index)):
 77 |         prm.write(
 78 |             "%6d %6d %6d %10.8f %8.3f %8.5f %8.5f\n"
 79 |             % (i + 1, 1, i + 1, float(Qs[i][1]), xyz_df.X[i], xyz_df.Y[i], xyz_df.Z[i])
 80 |         )
 81 |     prm.write("\nBonds \n\n")
 82 |     for i in bnd_df.index:
 83 |         prm.write(
 84 |             "%6d %6d %6d %6d\n" % (i + 1, i + 1, bnd_df.cl1[i] + 1, bnd_df.cl2[i] + 1)
 85 |         )
 86 |     prm.write("\nAngles \n\n")
 87 |     for i in ang_df.index:
 88 |         prm.write(
 89 |             "%6d %6d %6d %6d %6d\n"
 90 |             % (i + 1, i + 1, ang_df.cl1[i] + 1, ang_df.cl2[i] + 1, ang_df.cl3[i] + 1)
 91 |         )
 92 |     prm.write("\nDihedrals\n\n")
 93 |     for i, row in dihedral_df.iterrows():
 94 |         prm.write(
 95 |             "%6d %6d %6d %6d %6d %6d \n"
 96 |             % (i + 1, i + 1, row.I + 1, row.J + 1, row.K + 1, row.L + 1)
 97 |         )
 98 |     if len(improper_df.index) > 0:
 99 |         prm.write("\nImpropers\n\n")
100 |         for row in improper_df.iterrows():
101 |             index, dat = row
102 |             ndata = tor_cent([dat.I, dat.J, dat.K, dat.L], bndlist)
103 |             prm.write(
104 |                 "%6d %6d %6d %6d %6d %6d \n"
105 |                 % (
106 |                     index + 1,
107 |                     index + 1,
108 |                     ndata[0] + 1,
109 |                     ndata[1] + 1,
110 |                     ndata[2] + 1,
111 |                     ndata[3] + 1,
112 |                 )
113 |             )
114 |     return None
115 | 
116 | 
117 | def Boss2CharmmTorsion(bnd_df, num2opls, st_no, molecule_data, num2typ2symb):
118 |     dhd = []
119 |     for line in molecule_data.MolData["TORSIONS"]:
120 |         dt = [float(i) for i in line]
121 |         dhd.append(dt)
122 |     dhd = np.array(dhd)
123 |     dhd = dhd  # kcal to kj conversion
124 |     dhd = dhd  # Klammps = Vopls
125 |     dhd_df = pd.DataFrame(dhd, columns=["V1", "V2", "V3", "V4"])
126 |     ats = []
127 |     for line in molecule_data.MolData["ATOMS"][3:]:
128 |         dt = [line.split()[0], line.split()[4], line.split()[6], line.split()[8]]
129 |         dt = [int(d) for d in dt]
130 |         ats.append(dt)
131 |     for line in molecule_data.MolData["ADD_DIHED"]:
132 |         dt = [int(i) for i in line]
133 |         ats.append(dt)
134 |     assert len(ats) == len(
135 |         dhd
136 |     ), "Number of Dihedral angles in Zmatrix and Out file dont match"
137 |     ats = np.array(ats) - st_no
138 |     for i in range(len(ats)):
139 |         for j in range(len(ats[0])):
140 |             if ats[i][j] < 0:
141 |                 ats[i][j] = 0
142 |     at_df = pd.DataFrame(ats, columns=["I", "J", "K", "L"])
143 |     # final_df = pd.concat([dhd_df, at_df], axis=1, join_axes=[at_df.index]) backup
144 |     final_df = pd.concat([dhd_df, at_df], axis=1)
145 |     final_df = final_df.reindex(dhd_df.index)
146 | 
147 |     bndlist = list(bnd_df.UR) + (list(bnd_df.UR))
148 |     final_df["TY"] = [
149 |         "Proper"
150 |         if ucomb(
151 |             list([final_df.I[n], final_df.J[n], final_df.K[n], final_df.L[n]]), bndlist
152 |         )
153 |         == 3
154 |         else "Improper"
155 |         for n in range(len(final_df.I))
156 |     ]
157 |     final_df["TI"] = [num2typ2symb[j][2] for j in final_df.I]
158 |     final_df["TJ"] = [num2typ2symb[j][2] for j in final_df.J]
159 |     final_df["TK"] = [num2typ2symb[j][2] for j in final_df.K]
160 |     final_df["TL"] = [num2typ2symb[j][2] for j in final_df.L]
161 |     final_df["SYMB"] = [
162 |         "-".join(
163 |             [
164 |                 num2typ2symb[final_df.I[i]][0],
165 |                 num2typ2symb[final_df.J[i]][0],
166 |                 num2typ2symb[final_df.K[i]][0],
167 |                 num2typ2symb[final_df.L[i]][0],
168 |             ]
169 |         )
170 |         for i in final_df.index
171 |     ]
172 |     if len(final_df.index) > 0:
173 |         final_df["NAME"] = (
174 |             final_df.TI + "-" + final_df.TJ + "-" + final_df.TK + "-" + final_df.TL
175 |         )
176 |     return final_df
177 | 
178 | 
179 | def boss2CharmmBond(molecule_data, st_no):
180 |     bdat = molecule_data.MolData["BONDS"]
181 |     bdat["cl1"] = [x - st_no if not x - st_no < 0 else 0 for x in bdat["cl1"]]
182 |     bdat["cl2"] = [x - st_no if not x - st_no < 0 else 0 for x in bdat["cl2"]]
183 |     bnd_df = pd.DataFrame(bdat)
184 |     bnd_df["UF"] = (
185 |         (bnd_df.cl1 + bnd_df.cl2) * (bnd_df.cl1 + bnd_df.cl2 + 1) * 0.5
186 |     ) + bnd_df.cl2
187 |     bnd_df["UR"] = (
188 |         (bnd_df.cl1 + bnd_df.cl2) * (bnd_df.cl1 + bnd_df.cl2 + 1) * 0.5
189 |     ) + bnd_df.cl1
190 |     hb_df = bnd_df.drop(["cl1", "cl2", "UF", "UR"], 1)
191 |     hb_df = hb_df.drop_duplicates()
192 |     return bnd_df
193 | 
194 | 
195 | def boss2CharmmAngle(anglefile, num2opls, st_no):
196 |     adat = anglefile
197 |     adat["cl1"] = [x - st_no if not x - st_no < 0 else 0 for x in adat["cl1"]]
198 |     adat["cl2"] = [x - st_no if not x - st_no < 0 else 0 for x in adat["cl2"]]
199 |     adat["cl3"] = [x - st_no if not x - st_no < 0 else 0 for x in adat["cl3"]]
200 |     ang_df = pd.DataFrame(adat)
201 |     ang_df = ang_df[ang_df.K > 0]
202 |     ang_df["TY"] = np.array(
203 |         [
204 |             num2opls[i] + "-" + num2opls[j] + "-" + num2opls[k]
205 |             for i, j, k in zip(ang_df.cl1, ang_df.cl2, ang_df.cl3)
206 |         ]
207 |     )
208 |     return ang_df
209 | 
210 | 
211 | def bossData(molecule_data):
212 |     ats_file = molecule_data.MolData["ATOMS"]
213 |     types = []
214 |     for i in enumerate(ats_file):
215 |         types.append([i[1].split()[1], "opls_" + i[1].split()[2]])
216 |     st_no = 3
217 |     Qs = molecule_data.MolData["Q_LJ"]
218 |     assert len(Qs) == len(types), "Please check the at_info and Q_LJ_dat files"
219 |     num2opls = {}
220 |     for i in range(0, len(types)):
221 |         num2opls[i] = Qs[i][0]
222 |     num2typ2symb = {i: types[i] for i in range(len(Qs))}
223 |     for i in range(len(Qs)):
224 |         num2typ2symb[i].append(
225 |             bossPdbAtom2Element(num2typ2symb[i][0]) + num2typ2symb[i][1][-3:]
226 |         )
227 |         num2typ2symb[i].append(bossPdbAtom2Element(num2typ2symb[i][0]))
228 |         num2typ2symb[i].append(bossElement2Mass(num2typ2symb[i][3]))
229 |         num2typ2symb[i].append(Qs[i][0])
230 |     return (types, Qs, num2opls, st_no, num2typ2symb)
231 | 
232 | 
233 | def Boss2Lammps(resid, molecule_data):
234 |     types, Qs, num2opls, st_no, num2typ2symb = bossData(molecule_data)
235 |     bnd_df = boss2CharmmBond(molecule_data, st_no)
236 |     ang_df = boss2CharmmAngle(molecule_data.MolData["ANGLES"], num2opls, st_no)
237 |     tor_df = Boss2CharmmTorsion(bnd_df, num2opls, st_no, molecule_data, num2typ2symb)
238 |     Boss2LammpsLMP(resid, num2typ2symb, Qs, bnd_df, ang_df, tor_df, molecule_data)
239 |     return None
240 | 
241 | 
242 | def mainBOSS2LAMMPS(resid, clu=False):
243 |     mol = pickle.load(open(resid + ".p", "rb"))
244 |     Boss2Lammps(resid, mol)
245 |     return None
246 | 


--------------------------------------------------------------------------------
/LigParGenPSP/BOSSReader.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import numpy as np
  4 | from LigParGenPSP.mol_boss import new_mol_info
  5 | import pandas as pd
  6 | from collections import OrderedDict
  7 | 
  8 | from LigParGenPSP.fepzmat import BCC_file2zmat
  9 | import warnings
 10 | 
 11 | warnings.simplefilter(action='ignore', category=FutureWarning)
 12 | 
 13 | 
 14 | def VerifyMolandSave(mol, charge, resname):
 15 |     if mol is not None:
 16 |         import pickle
 17 | 
 18 |         assert (
 19 |             mol.MolData['TotalQ']['Reference-Solute'] == charge
 20 |         ), "PROPOSED CHARGE IS NOT POSSIBLE: SOLUTE MAY BE AN OPEN SHELL"
 21 |         pickle.dump(mol, open(resname + ".p", "wb"))
 22 |     else:
 23 |         print('Problem Detected Molecule Object Not created')
 24 |     return None
 25 | 
 26 | 
 27 | def LinCheck(fname):
 28 |     imp_dat = 0
 29 |     zlines = open(fname, 'r').readlines()
 30 |     for i in range(len(zlines)):
 31 |         if 'Geometry Variations follow ' in zlines[i]:
 32 |             imp_dat = i
 33 |     Atypes = []
 34 |     for i in zlines[1:imp_dat]:
 35 |         Atypes.append(i.split()[2])
 36 |     Atypes = np.array(Atypes, dtype=np.int)
 37 |     Atypes = Atypes[Atypes < 0]
 38 |     Check = False
 39 |     if len(Atypes) > 2:
 40 |         Check = True
 41 |     return Check
 42 | 
 43 | 
 44 | def mod_add_diheds(line):
 45 |     adihed = [int(i) for i in line.split()[0:4]] + [-1, -1]
 46 |     return adihed
 47 | 
 48 | 
 49 | def fix_add_dihed(zmat_name):
 50 |     flines = open('%s.z' % zmat_name, 'r').readlines()
 51 |     imp_lines = []
 52 |     for i in range(len(flines)):
 53 |         if 'Additional Dihedrals follow' in flines[i]:
 54 |             imp_lines.append(i + 1)
 55 |         elif 'Domain Definitions follow' in flines[i]:
 56 |             imp_lines.append(i)
 57 |     ofile = open('%s_fixed.z' % zmat_name, 'w+')
 58 |     for line in flines[0: imp_lines[0]]:
 59 |         ofile.write('%s\n' % (line.rstrip()))
 60 |     for line in flines[imp_lines[0]: imp_lines[1]]:
 61 |         m_ad = mod_add_diheds(line)
 62 |         ofile.write(
 63 |             '%4d%4d%4d%4d%4d%4d\n'
 64 |             % (m_ad[0], m_ad[1], m_ad[2], m_ad[3], m_ad[4], m_ad[5])
 65 |         )
 66 |     for line in flines[imp_lines[1]:]:
 67 |         ofile.write('%s\n' % line.rstrip())
 68 |     ofile.close()
 69 |     return None
 70 | 
 71 | 
 72 | def CheckForHs(atoms):
 73 |     atype = [line.split()[1][0] for line in atoms]
 74 |     ans = False
 75 |     if 'H' in atype:
 76 |         ans = True
 77 |     return ans
 78 | 
 79 | 
 80 | def bcc_db():
 81 |     '''
 82 |     19 LBCCs from 1.14*CM1A-LBCC paper
 83 |     '''
 84 |     lbcc = {
 85 |         'C#-C=': 0.0,
 86 |         'C-N': 0.0,
 87 |         'C-O': 0.05,
 88 |         'C-OE': 0.0,
 89 |         'C-OH': 0.0,
 90 |         'C-OS': 0.0,
 91 |         'CA-Br': 0.19,
 92 |         'CA-C': 0.0,
 93 |         'CA-C!': -0.0,
 94 |         'CA-C=': 0.0,
 95 |         'CA-CB': -0.0,
 96 |         'CA-CE': 0.0,
 97 |         'CA-CF': 0.0,
 98 |         'CA-CK': -0.0,
 99 |         'CA-CT': 0.0,
100 |         'CA-CZ': 0.0,
101 |         'CA-CZA': 0.0,
102 |         'CA-Cl': 0.0,
103 |         'CA-F': 0.13,
104 |         'CA-I': 0.0,
105 |         'CA-N3': 0.0,
106 |         'CA-NC': 0.07,
107 |         'CA-NO': -0.08,
108 |         'CA-NP': 0.06,
109 |         'CA-NS': 0.0,
110 |         'CA-OH': 0.22,
111 |         'CA-OS': -0.0,
112 |         'CA-S': -0.0,
113 |         'CA-SH': -0.0,
114 |         'CAM-CA': 0.0,
115 |         'CAM-CT': 0.0,
116 |         'CAM-N': 0.0,
117 |         'CAM-O': 0.0,
118 |         'CB-C=': -0.0,
119 |         'CB-NC': -0.0,
120 |         'CE-O': -0.0,
121 |         'CE-OE': 0.0,
122 |         'CE-OS': 0.0,
123 |         'CF-F': -0.0,
124 |         'CF-OS': -0.0,
125 |         'CK-O': -0.0,
126 |         'CM-C': 0.0,
127 |         'CM-C=': -0.0,
128 |         'CM-CT': -0.0,
129 |         'CM-Cl': -0.0,
130 |         'CP-CS': 0.0,
131 |         'CP-SA': -0.0,
132 |         'CT-Br': 0.08,
133 |         'CT-C': -0.0,
134 |         'CT-C=': 0.0,
135 |         'CT-CE': -0.0,
136 |         'CT-CF': 0.0,
137 |         'CT-CK': -0.0,
138 |         'CT-CP': 0.0,
139 |         'CT-CZ': -0.0,
140 |         'CT-CZT': -0.0,
141 |         'CT-Cl': 0.1,
142 |         'CT-F': -0.0,
143 |         'CT-I': -0.0,
144 |         'CT-N': -0.0,
145 |         'CT-N3': -0.0,
146 |         'CT-NO': 0.0,
147 |         'CT-NP': 0.04,
148 |         'CT-NS': -0.0,
149 |         'CT-NT': -0.0,
150 |         'CT-OE': -0.0,
151 |         'CT-OH': 0.1,
152 |         'CT-OS': -0.0,
153 |         'CT-S': 0.08,
154 |         'CT-SH': 0.175,
155 |         'CT-SZ': 0.0,
156 |         'CY-C': 0.0,
157 |         'CY-CE': 0.0,
158 |         'CZ-NZ': -0.0,
159 |         'CZA-NZ': 0.09,
160 |         'CZT-NZ': 0.03,
161 |         'H-N': -0.0,
162 |         'H-N3': -0.0,
163 |         'H-NP': -0.05,
164 |         'H-NS': -0.0,
165 |         'H-NT': -0.0,
166 |         'HA-CA': -0.01,
167 |         'HA-CM': 0.0,
168 |         'HA-CP': -0.0,
169 |         'HA-CS': -0.0,
170 |         'HC-C': 0.0,
171 |         'HC-C#': -0.0,
172 |         'HC-C=': -0.0,
173 |         'HC-CAM': 0.0,
174 |         'HC-CE': 0.0,
175 |         'HC-CF': -0.0,
176 |         'HC-CM': -0.0,
177 |         'HC-CT': 0.0,
178 |         'HC-CY': 0.0,
179 |         'HC-CZ': -0.0,
180 |         'HO-OH': 0.0,
181 |         'HS-SH': 0.0,
182 |         'NO-ON': -0.18,
183 |         'O-P': 0.0,
184 |         'OS-P': 0.0,
185 |         'OY-SZ': 0.06,
186 |         'U-U': 0.0,
187 |         'X-X': 0.0,
188 |     }
189 |     db = OrderedDict(lbcc)
190 |     return db
191 | 
192 | 
193 | def Refine_PDB_file(fname):
194 |     flines = open(fname, 'r+').readlines()
195 |     pdb_lines = []
196 |     for line in flines:
197 |         if ('ATOM' in line) or ('HETATM' in line):
198 |             line = line.rstrip()
199 |             line = line.lstrip()
200 |             if 'DUM' not in line:
201 |                 pdb_lines.append(line)
202 |     return pdb_lines
203 | 
204 | 
205 | def get_coos_from_pdb(pdb_dat):
206 |     atoms = []
207 |     coos = []
208 |     for line in pdb_dat:
209 |         atom = line.split()[2]
210 |         x, y, z = line[28:56].split()
211 |         atoms.append(atom)
212 |         coos.append([float(x), float(y), float(z)])
213 |     return (atoms, coos)
214 | 
215 | 
216 | def pairing_func(a, b):
217 |     ans = (a + b) * (a + b + 1) * 0.5
218 |     if a > b:
219 |         ans = ans + a
220 |         pans = '%6d%6d' % (b, a)
221 |     else:
222 |         ans = ans + b
223 |         pans = '%6d%6d' % (a, b)
224 |     return (int(ans), pans)
225 | 
226 | 
227 | def ucomb(vec, blist):
228 |     res = 0
229 |     for a in vec:
230 |         vec.remove(a)
231 |         for b in vec:
232 |             ans = (a + b) * (a + b + 1) * 0.5
233 |             if (ans + a in blist) or (ans + b in blist):
234 |                 res = res + 1
235 |     return res
236 | 
237 | 
238 | def tor_cent(vec, blist):
239 |     db = {}
240 |     for a in vec:
241 |         na = 0
242 |         for b in vec:
243 |             ans = (a + b) * (a + b + 1) * 0.5
244 |             if (ans + a in blist) or (ans + b in blist):
245 |                 na += 1
246 |         db[a] = na
247 |     new_vec = list(sorted(db, key=db.__getitem__, reverse=True))
248 |     return new_vec
249 | 
250 | 
251 | def bossPdbAtom2Element(attype):
252 |     elem = ''.join([i for i in attype[:-1] if not i.isdigit()])
253 |     return elem
254 | 
255 | 
256 | def bossElement2Mass(elem):
257 |     symb2mass = {
258 |         'H': 1.008,
259 |         'F': 18.998403163,
260 |         'Cl': 35.45,
261 |         'Br': 79.904,
262 |         'I': 126.90447,
263 |         'O': 15.999,
264 |         'S': 32.06,
265 |         'N': 14.007,
266 |         'P': 30.973761998,
267 |         'C': 12.011,
268 |         'Si': 28.085,
269 |         'Na': 22.98976928,
270 |         'SOD': 22.98976928,
271 |         'K': 39.0983,
272 |         'Mg': 24.305,
273 |         'Ca': 40.078,
274 |         'Mn': 54.938044,
275 |         'Fe': 55.845,
276 |         'Co': 58.933194,
277 |         'Ni': 58.6934,
278 |         'Cu': 63.546,
279 |         'Zn': 65.38,
280 |     }
281 |     try:
282 |         res = symb2mass[elem]
283 |     except NameError:
284 |         print("Mass for atom %s is not available \n add it to symb2mass dictionary")
285 |     return res
286 | 
287 | 
288 | def Refine_file(fname):
289 |     flines = open(fname, 'r+')
290 |     lines = []
291 |     for line in flines:
292 |         if line.rstrip():
293 |             line = line.rstrip()
294 |             line = line.lstrip()
295 |             lines.append(line)
296 |     flines.close()
297 |     return lines
298 | 
299 | 
300 | class BOSSReader(object):
301 |     def __init__(self, zmatrix, outdir, optim, charge=0, lbcc=False):
302 |         self.zmat = zmatrix
303 |         self.outdir = outdir
304 |         self.impDat = {}
305 |         self.MolData = {}
306 |         self.refine_data(optim, charge, lbcc)
307 | 
308 |     def Get_OPT(self, optim, charge):
309 |         assert os.path.isfile(self.zmat), 'File named %10s does not exist' % self.zmat
310 |         assert ('BOSSdir' in os.environ) and os.path.isfile(
311 |             (os.environ['BOSSdir'] + '/scripts/xZCM1A')
312 |         ), 'Please Make sure $BOSSdir is defined \n xZCM1A and related files are in scripts directory of BOSS'
313 |         execs = {
314 |             2: os.environ['BOSSdir'] + '/scripts/xZCM1A+2 > olog',
315 |             1: os.environ['BOSSdir'] + '/scripts/xZCM1A+  > olog',
316 |             0: os.environ['BOSSdir'] + '/scripts/xZCM1A > olog',
317 |             -1: os.environ['BOSSdir'] + '/scripts/xZCM1A-  > olog',
318 |             -2: os.environ['BOSSdir'] + '/scripts/xZCM1A-2 > olog',
319 |         }
320 |         # print('MOLECULE HAS A CHARGE of %d' % charge)
321 |         if optim > 0:
322 |             print('Optimization level requested %d' % optim)
323 |             for opt_lev in range(optim):
324 |                 print('Performing Stage %d of Charge Generation' % (opt_lev + 1))
325 |                 execfile = execs[charge]
326 |                 coma = execfile + ' ' + self.zmat[:-2]
327 |                 os.system(coma)
328 |                 os.system('cp sum %s' % (self.zmat))
329 |                 execfile = os.environ['BOSSdir'] + '/scripts/xOPT > olog'
330 |                 coma = execfile + ' ' + self.zmat[:-2]
331 |                 os.system(coma)
332 |                 # os.system('cd ' + self.outdir +';/bin/cp sum %s' % (self.zmat))
333 |                 os.system('/bin/cp sum %s' % (self.zmat))
334 |         execfile = os.environ['BOSSdir'] + '/scripts/xSPM > olog'
335 |         coma = execfile + ' ' + self.zmat[:-2]
336 |         os.system(coma)
337 |         # os.system('cd ' + self.outdir + ';/bin/cp sum %s' % (self.zmat))
338 |         os.system('/bin/cp sum %s' % (self.zmat))
339 |         return None
340 | 
341 |     def get_addihed(self, data):
342 |         add = []
343 |         nadd = 0
344 |         for line in data:
345 |             if line[0].isdigit():
346 |                 add.append(line.split()[0:4])
347 |                 nadd = nadd + 1
348 |         return add
349 | 
350 |     def get_atinfo(self, data):
351 |         ats = []
352 |         nat = 0
353 |         for line in data:
354 |             if line[0].isdigit() and float(line.split()[2]) > 1:
355 |                 ats.append(line)
356 |                 nat += 1
357 |         return ats
358 | 
359 |     def get_charge(self, data):
360 |         TotQ = {}
361 |         for line in data[1:]:
362 |             words = line.split()
363 |             TotQ['-'.join(words[:-1])] = round(float(words[-1]), 3)
364 |         return TotQ
365 | 
366 |     def get_tors(self, data):
367 |         tors = []
368 |         ntor = 0
369 |         for line in data:
370 |             if 'All Solutes' in line:
371 |                 tors.append(line.split()[4:8])
372 |                 for tor in line.split()[4:8]:
373 |                     if abs(float(tor)) > 0.0:
374 |                         ntor = ntor + 1
375 |         return tors
376 | 
377 |     def get_QLJ(self, data):
378 |         qlj = []
379 |         nqlj = 0
380 |         for line in data:
381 |             if 'All Solutes' in line and line[0].isalpha():
382 |                 qlj.append(
383 |                     [line.split()[0], line.split()[2], line.split()[3], line.split()[4]]
384 |                 )
385 |                 nqlj += 1
386 |         return qlj
387 | 
388 |     def get_angs(self, data):
389 |         angs = {'cl1': [], 'cl2': [], 'cl3': [], 'R': [], 'K': []}
390 |         nang = 0
391 |         for line in data:
392 |             if line[0].isdigit() and float(line.split()[4]) > 0:
393 |                 word = line.split()
394 |                 angs['cl1'].append(int(word[0]))
395 |                 angs['cl2'].append(int(word[1]))
396 |                 angs['cl3'].append(int(word[2]))
397 |                 angs['R'].append(float(word[3]))
398 |                 angs['K'].append(float(word[4]))
399 |                 nang = nang + 1
400 |             #        print 'Total No of Non-zero Angles in BOSS is %d' % (nang)
401 |         return angs
402 | 
403 |     def get_XYZ(self, data):
404 |         XYZ = {'at_num': [], 'X': [], 'Y': [], 'Z': [], 'at_symb': []}
405 |         for line in data:
406 |             if line[0].isdigit() and len(line.split()) == 5:
407 |                 word = line.split()
408 |                 if int(word[0]) > 0:
409 |                     XYZ['at_num'].append(int(word[0]))
410 |                     XYZ['X'].append(float(word[1]))
411 |                     XYZ['Y'].append(float(word[2]))
412 |                     XYZ['Z'].append(float(word[3]))
413 |                     XYZ['at_symb'].append(word[4])
414 |         XYZ = pd.DataFrame(XYZ)
415 |         return XYZ
416 | 
417 |     def get_pairs(self, data):
418 |         data = data[1:]
419 |         plnos = []
420 |         for i in range(0, len(data)):
421 |             if 'Atom' in data[i]:
422 |                 plnos.append(i)
423 |         plnos.append(len(data))
424 |         pair_dat = {
425 |             i: ' '.join(data[plnos[i]: plnos[i + 1]]) for i in range(len(plnos) - 1)
426 |         }
427 |         for nu in range(len(plnos) - 1):
428 |             pair_dat[nu] = list(pair_dat[nu][10:].split())
429 |             pair_dat[nu] = np.array([int(a) - 2 for a in pair_dat[nu]])
430 |         pairs = []
431 |         for k in pair_dat.keys():
432 |             for j in pair_dat[k]:
433 |                 pairs.append('%6d%6d%6d\n' % (k - 1, j, 1))
434 |         return pairs
435 | 
436 |     def get_bonds(self, data):
437 |         bnds = {'cl1': [], 'cl2': [], 'RIJ': [], 'KIJ': [], 'TIJ': []}
438 |         nbnd = 0
439 |         for line in data:
440 |             if line[0].isdigit() and float(line.split()[3]) > 0:
441 |                 word = line.split()
442 |                 bnds['cl1'].append(int(word[0]))
443 |                 bnds['cl2'].append(int(word[1]))
444 |                 bnds['RIJ'].append(float(word[2]))
445 |                 bnds['KIJ'].append(float(word[3]))
446 |                 bnds['TIJ'].append(line[-5:])
447 |                 nbnd += 1
448 |         return bnds
449 | 
450 |     def prep_lbcc(self, bond_data, qdata):
451 |         db = bcc_db()
452 |         bnd_df = pd.DataFrame(bond_data)
453 |         bnd_df = bnd_df[['cl1', 'cl2']]
454 |         bnd_df.columns = ['I', 'J']
455 |         q_df = pd.DataFrame(columns=['TY', 'Q'])
456 |         q_df.loc[0] = ['1', 0.000]
457 |         q_df.loc[1] = ['2', 0.000]
458 |         for i in range(len(qdata)):
459 |             q_df.loc[i + 2] = [qdata[i][0], float(qdata[i][1])]
460 |         bond, cha, QBC1 = new_mol_info(db, q_df, bnd_df)
461 |         lbcc_qdat = []
462 |         for i in range(len(qdata)):
463 |             lbcc_qdat.append(
464 |                 [qdata[i][0], str(cha.QBCC.values[i]), qdata[i][2], qdata[i][3]]
465 |             )
466 |         bond.to_csv('LBCC_BONDS.csv', index=False)
467 |         cha.to_csv('LBCC_CHARGES.csv', index=False)
468 |         return np.array(cha.QBCC), lbcc_qdat
469 | 
470 |     def cleanup(self):
471 |         # os.system('cd ' + self.outdir + ';/bin/rm sum log olog out plt.pdb')
472 |         os.system('/bin/rm sum log olog out plt.pdb')
473 | 
474 |     def get_ImpDat(self, optim, charge):
475 |         self.Get_OPT(optim, charge)
476 |         odat = Refine_file('out')
477 |         sdat = Refine_file('sum')
478 |         MolData = {}
479 |         impDat = {}
480 |         MolData['PDB'] = Refine_file('plt.pdb')
481 |         for nl in range(len(odat)):
482 |             if 'Z-Matrix for Reference Solutes' in odat[nl]:
483 |                 impDat['ATMinit'] = nl
484 |             elif 'Net Charge' in odat[nl]:
485 |                 impDat['TotalQ'] = nl
486 |             elif 'OPLS Force Field Parameters' in odat[nl]:
487 |                 impDat['ATMfinal'] = nl
488 |                 impDat['NBDinit'] = nl
489 |             elif 'Fourier Coefficients' in odat[nl]:
490 |                 impDat['TORinit'] = nl
491 |                 impDat['NBDfinal'] = nl
492 |             elif 'Bond Stretching Parameters' in odat[nl]:
493 |                 impDat['TORfinal'] = nl
494 |                 impDat['BNDinit'] = nl
495 |             elif 'Angle Bending Parameters' in odat[nl]:
496 |                 impDat['BNDfinal'] = nl
497 |                 impDat['ANGinit'] = nl
498 |             elif 'Non-bonded Pairs List' in odat[nl]:
499 |                 impDat['ANGfinal'] = nl
500 |                 impDat['PAIRinit'] = nl
501 |             elif 'Solute 0:   X          Y          Z' in odat[nl]:
502 |                 impDat['XYZinit'] = nl
503 |             elif 'Atom I      Atom J      RIJ' in odat[nl]:
504 |                 impDat['XYZfinal'] = nl
505 |             elif 'Checking' in odat[nl]:
506 |                 impDat['PAIRfinal'] = nl
507 |         # THIS PART IS READ FROM SUM FILE ###
508 |         for ml in range(len(sdat)):
509 |             if 'Additional Dihedrals follow' in sdat[ml]:
510 |                 impDat['ADDinit'] = ml
511 |             elif 'Domain Definitions follow' in sdat[ml]:
512 |                 impDat['ADDfinal'] = ml
513 |         # THIS PART IS READ FROM SUM FILE ###
514 |         MolData['ATOMS'] = self.get_atinfo(odat[impDat['ATMinit']: impDat['ATMfinal']])
515 |         MolData['Q_LJ'] = self.get_QLJ(odat[impDat['NBDinit']: impDat['NBDfinal']])
516 |         MolData['BONDS'] = self.get_bonds(odat[impDat['BNDinit']: impDat['BNDfinal']])
517 |         MolData['ANGLES'] = self.get_angs(odat[impDat['ANGinit']: impDat['ANGfinal']])
518 |         MolData['TORSIONS'] = self.get_tors(
519 |             odat[impDat['TORinit']: impDat['TORfinal']]
520 |         )
521 |         MolData['ADD_DIHED'] = self.get_addihed(
522 |             sdat[impDat['ADDinit']: impDat['ADDfinal']]
523 |         )
524 |         MolData['XYZ'] = self.get_XYZ(odat[impDat['XYZinit']: impDat['XYZfinal']])
525 |         MolData['PAIRS'] = self.get_pairs(
526 |             odat[impDat['PAIRinit']: impDat['PAIRfinal']]
527 |         )
528 |         MolData['TotalQ'] = self.get_charge(
529 |             odat[impDat['TotalQ']: impDat['TotalQ'] + 4]
530 |         )
531 |         return MolData
532 | 
533 |     def refine_data(self, optim, charge, lbcc):
534 |         if lbcc and (charge == 0):
535 |             lbcc_MD = self.get_ImpDat(optim, charge)
536 |             QLBCC, DATA_Q_LJ = self.prep_lbcc(lbcc_MD['BONDS'], lbcc_MD['Q_LJ'])
537 |             lbcc_MD['Q_LJ'] = DATA_Q_LJ
538 |             BCC_file2zmat(self.zmat, QLBCC, oname='%s_BCC.z' % self.zmat[:-2])
539 |             os.system('mv %s.z %s_NO_LBCC.z' % (self.zmat[:-2], self.zmat[:-2]))
540 |             os.system('mv %s_BCC.z %s.z' % (self.zmat[:-2], self.zmat[:-2]))
541 |             self.MolData = lbcc_MD
542 |         elif lbcc and (charge != 0):
543 |             print('LBCC IS SUPPORTED ONLY FOR NEUTRAL MOLECULES')
544 |         else:
545 |             self.MolData = self.get_ImpDat(optim, charge)
546 |         return None
547 | 


--------------------------------------------------------------------------------
/LigParGenPSP/Converter.py:
--------------------------------------------------------------------------------
  1 | from LigParGenPSP.BOSSReader import BOSSReader, CheckForHs
  2 | from LigParGenPSP.BOSS2LAMMPS import mainBOSS2LAMMPS
  3 | from LigParGenPSP.CreatZmat import GenMolRep
  4 | import argparse
  5 | import pickle
  6 | import os
  7 | from openbabel import openbabel as ob
  8 | 
  9 | obConversion = ob.OBConversion()
 10 | obConversion.SetInAndOutFormats("pdb", "mol")
 11 | 
 12 | 
 13 | def main():
 14 | 
 15 |     parser = argparse.ArgumentParser(
 16 |         prog='LigParGenPSP',
 17 |         formatter_class=argparse.RawDescriptionHelpFormatter,
 18 |         description="""
 19 |     Ligand Parameter Generator Based on
 20 |     Jorgensen group's OPLS-AA/CM1A(-LBCC) FF
 21 |     Created on Mon Feb 15 15:40:05 2016
 22 |     @author: Leela S. Dodda leela.dodda@yale.edu
 23 |     @author: William L. Jorgensen Lab
 24 | 
 25 |     FF formats provided :
 26 |     --------------------
 27 |     OpenMM       .xml
 28 |     CHARMM/NAMD  .prm & .rtf
 29 |     GROMACS      .itp & .gro
 30 |     CNS/X-PLOR   .param & .top
 31 |     Q            .Q.prm & .Q.lib
 32 |     DESMOND      .cms
 33 |     BOSS/MCPRO   .z
 34 |     PDB2PQR      .pqr
 35 | 
 36 |     Input Files supported :
 37 |     --------------------
 38 |     SMILES code
 39 |     PDB
 40 |     MDL MOL Format
 41 | 
 42 |     ################################################
 43 |     if using MOL file
 44 |     Usage: LigParGenPSP -m phenol.mol    -r PHN -c 0 -o 0
 45 | 
 46 |     if using PDB file
 47 |     Usage: LigParGenPSP -p phenol.pdb    -r PHN -c 0 -o 0
 48 | 
 49 |     if using BOSS SMILES CODE
 50 |     Usage: LigParGenPSP -s 'c1ccc(cc1)O' -r PHN -c 0 -o 0
 51 | 
 52 |     REQUIREMENTS:
 53 |     BOSS (need to set BOSSdir in bashrc and cshrc)
 54 |     Preferably Anaconda python with following modules
 55 |     pandas
 56 |     argparse
 57 |     numpy
 58 |     openbabel
 59 | 
 60 |     Please cite following references:
 61 |     1. LigParGen web server: an automatic OPLS-AA parameter generator for organic ligands
 62 |        Leela S. Dodda  Israel Cabeza de Vaca  Julian Tirado-Rives William L. Jorgensen
 63 |        Nucleic Acids Research, Volume 45, Issue W1, 3 July 2017, Pages W331–W336
 64 |     2. 1.14*CM1A-LBCC: Localized Bond-Charge Corrected CM1A Charges for Condensed-Phase Simulations
 65 |        Leela S. Dodda, Jonah Z. Vilseck, Julian Tirado-Rives , and William L. Jorgensen
 66 |        Department of Chemistry, Yale University, New Haven, Connecticut 06520-8107, United States
 67 |        J. Phys. Chem. B, 2017, 121 (15), pp 3864–3870
 68 |     3. Accuracy of free energies of hydration using CM1 and CM3 atomic charges.
 69 |        Udier–Blagović, M., Morales De Tirado, P., Pearlman, S. A. and Jorgensen, W. L.
 70 |        J. Comput. Chem., 2004, 25,1322–1332. doi:10.1002/jcc.20059
 71 |     """,
 72 |     )
 73 |     parser.add_argument("-r", "--resname", help="Residue name from PDB FILE", type=str)
 74 |     parser.add_argument(
 75 |         "-s", "--smiles", help="Paste SMILES code from CHEMSPIDER or PubChem", type=str
 76 |     )
 77 |     parser.add_argument(
 78 |         "-m", "--mol", help="Submit MOL file from CHEMSPIDER or PubChem", type=str
 79 |     )
 80 |     parser.add_argument(
 81 |         "-p", "--pdb", help="Submit PDB file from CHEMSPIDER or PubChem", type=str
 82 |     )
 83 |     parser.add_argument(
 84 |         "-o",
 85 |         "--opt",
 86 |         help="Optimization or Single Point Calculation",
 87 |         type=int,
 88 |         choices=[0, 1, 2, 3],
 89 |     )
 90 |     parser.add_argument(
 91 |         "-c",
 92 |         "--charge",
 93 |         type=int,
 94 |         choices=[0, -1, 1, -2, 2],
 95 |         help="0: Neutral <0: Anion >0: Cation ",
 96 |     )
 97 |     parser.add_argument(
 98 |         "-l",
 99 |         "--lbcc",
100 |         help="Use 1.14*CM1A-LBCC charges instead of 1.14*CM1A",
101 |         action="store_true",
102 |     )
103 |     parser.add_argument(
104 |         "-d", "--outdir", help="PATH for output directory", type=str, default='.'
105 |     )
106 |     args = parser.parse_args()
107 | 
108 |     convert(**vars(args))
109 | 
110 | 
111 | def convert(**kwargs):
112 | 
113 |     # set the default values
114 |     options = {
115 |         'opt': 0,
116 |         'smiles': None,
117 |         'zmat': None,
118 |         'charge': 0,
119 |         'lbcc': False,
120 |         'mol': None,
121 |         'resname': 'UNK',
122 |         'pdb': None,
123 |     }
124 | 
125 |     # update the default values based on the arguments
126 |     options.update(kwargs)
127 | 
128 |     # set the arguments that you would used to get from argparse
129 |     opt = options['opt']
130 |     smiles = options['smiles']
131 |     # zmat = options['zmat']
132 |     charge = options['charge']
133 |     lbcc = options['lbcc']
134 |     resname = options['resname']
135 |     mol = options['mol']
136 |     pdb = options['pdb']
137 |     outdir = options['outdir']
138 |     if opt is not None:
139 |         optim = opt
140 |     else:
141 |         optim = 0
142 | 
143 |     clu = False
144 | 
145 |     # assert (which('obabel')
146 |     # is not None), "OpenBabel is Not installed or \n the executable location is not accessable"
147 |     if os.path.exists(outdir + resname + '.xml'):
148 |         os.system('/bin/rm ' + outdir + resname + '.*')
149 |     if lbcc:
150 |         if charge == 0:
151 |             lbcc = True
152 |             print('LBCC converter is activated')
153 |         else:
154 |             lbcc = False
155 |             print(
156 |                 '1.14*CM1A-LBCC is only available for neutral molecules\n Assigning unscaled CM1A charges'
157 |             )
158 | 
159 |     if smiles is not None:
160 |         os.chdir(outdir)
161 |         smifile = open('%s.smi' % resname, 'w+')
162 |         smifile.write('%s' % smiles)
163 |         smifile.close()
164 |         GenMolRep('%s.smi' % resname, optim, resname, charge)
165 |         mol = BOSSReader('%s.z' % resname, '%s' % outdir, optim, charge, lbcc)
166 |     elif mol is not None:
167 |         if not os.path.exists(os.path.join(outdir, mol)):
168 |             os.system('cp %s %s' % (mol, outdir))
169 |         os.chdir(outdir)
170 |         GenMolRep(mol, optim, resname, charge)
171 |         mol = BOSSReader('%s.z' % resname, '%s' % outdir, optim, charge, lbcc)
172 |     elif pdb is not None:
173 |         if not os.path.exists(os.path.join(outdir, pdb)):
174 |             os.system('cp %s %s' % (pdb, outdir))
175 |         os.chdir(outdir)
176 |         # Convert pdb to mol using Obabelv3
177 |         mole = ob.OBMol()
178 |         obConversion.ReadFile(mole, pdb)
179 |         mol = pdb.replace('pdb', 'mol')
180 |         obConversion.WriteFile(mole, mol)
181 |         GenMolRep(mol, optim, resname, charge)
182 |         mol = BOSSReader('%s.z' % resname, '%s' % outdir, optim, charge, lbcc)
183 |         clu = True
184 |     assert (
185 |         mol.MolData['TotalQ']['Reference-Solute'] == charge
186 |     ), "PROPOSED CHARGE IS NOT POSSIBLE: SOLUTE MAY BE AN OPEN SHELL"
187 |     assert CheckForHs(
188 |         mol.MolData['ATOMS']
189 |     ), "Hydrogens are not added. Please add Hydrogens"
190 | 
191 |     pickle.dump(mol, open(resname + ".p", "wb"))
192 |     mainBOSS2LAMMPS(resname, clu)
193 |     print('DONE WITH LAMMPS')
194 | 
195 |     # Cleanup
196 |     list_files = [
197 |         "sum",
198 |         "log",
199 |         "olog",
200 |         "out",
201 |         "optzmat",
202 |         "slvzmat",
203 |         "plt.pdb",
204 |         "clu.pdb",
205 |         "LL",
206 |         "LBCC_BONDS.csv",
207 |         "LBCC_CHARGES.csv ",
208 |         resname + ".p",
209 |         resname + ".z",
210 |         resname + "_NO_LBCC.z",
211 |     ]
212 |     for file in list_files:
213 |         if os.path.exists(file):
214 |             os.remove(file)
215 | 
216 | 
217 | if __name__ == "__main__":
218 | 
219 |     main()
220 | 


--------------------------------------------------------------------------------
/LigParGenPSP/CreatZmat.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | AutoZmat_VersionLSD:
  5 | A python program to create BOSS zmatrix from any molecular input format.
  6 | Need BOSS and OpenBabel executable to work
  7 | Python Modeules Needed - networkx, numpy, pandas
  8 | Created on Wed Jun 14 2017
  9 | 
 10 | @author: Leela Sriram Dodda
 11 | @email:  leela.dodda@yale.edu
 12 | """
 13 | import os
 14 | import numpy as np
 15 | from LigParGenPSP.Vector_algebra import (
 16 |     pairing_func,
 17 |     angle,
 18 |     dihedral,
 19 |     tor_id,
 20 |     ang_id,
 21 |     bossElement2Num,
 22 |     Distance,
 23 | )
 24 | import collections
 25 | import networkx as nx
 26 | import time
 27 | 
 28 | 
 29 | def AsitIsZmat(ifile, optim, resid):
 30 |     iform = ifile.split('.')
 31 |     # CREATE A MOL FILE FROM ANY FILE
 32 |     if iform[1] == 'smi':
 33 |         os.system('obabel -i%s %s -omol %s.mol --gen3D' % (iform[1], ifile, iform[0]))
 34 |     else:
 35 |         os.system(
 36 |             'obabel -i%s %s -omol %s.mol ---errorlevel 1 -b &>LL'
 37 |             % (iform[1], ifile, iform[0])
 38 |         )
 39 |     while not os.path.exists(iform[0] + '.mol'):
 40 |         time.sleep(1)
 41 |     mollines = open(iform[0] + '.mol', 'r').readlines()
 42 |     COOS, ATYPES, MolBonds = ReadMolFile(mollines)
 43 |     G_mol, mol_icords = make_graphs(ATYPES, COOS, MolBonds)
 44 |     print_ZMAT(ATYPES, G_mol, mol_icords, COOS, '%s.z' % resid, resid)
 45 |     return None
 46 | 
 47 | 
 48 | def CanonicaliedZmat(ifile, optim, resid):
 49 |     iform = ifile.split('.')
 50 |     # CREATE A MOL FILE FROM ANY FILE
 51 |     if iform[1] == 'smi':
 52 |         os.system('obabel -i%s %s -omol %s.mol --gen3D' % (iform[1], ifile, iform[0]))
 53 |     else:
 54 |         os.system(
 55 |             'obabel -i%s %s -omol --canonical %s.mol' % (iform[1], ifile, iform[0])
 56 |         )
 57 |     mollines = open(iform[0] + '.mol', 'r').readlines()
 58 |     COOS, ATYPES, MolBonds = ReadMolFile(mollines)
 59 |     G_mol, mol_icords = make_graphs(ATYPES, COOS, MolBonds)
 60 |     print_ZMAT(ATYPES, G_mol, mol_icords, COOS, '%s.z' % resid, resid)
 61 |     return None
 62 | 
 63 | 
 64 | def GenMolRep(ifile, optim, resid, charge):
 65 |     iform = ifile.split('.')
 66 |     try:
 67 |         AsitIsZmat(ifile, optim, resid)
 68 |     except ZeroDivisionError:
 69 |         print(
 70 |             'Warning!!\n 1.Cannonicalising Input MOL/PDB file\n 2.Atom ordering may change \n 3.But the Coordinates remain the same'
 71 |         )
 72 |         CanonicaliedZmat(ifile, optim, resid)
 73 |     Get_OPT('%s.z' % resid, optim, charge)
 74 |     if os.path.exists('clu.pdb'):
 75 |         os.system('/bin/rm clu.pdb')
 76 |     if iform[1] == 'pdb':
 77 |         if os.environ.get('MCPROdir') is not None:
 78 |             os.system(
 79 |                 '$MCPROdir/miscexec/clu -t:f=pdb %s.pdb -r %s.z -n:f=p clu.pdb -m ma'
 80 |                 % (iform[0], resid)
 81 |             )
 82 |         else:
 83 |             execfile = os.environ['BOSSdir'] + '/scripts/xSPM > olog'
 84 |             coma = execfile + ' ' + resid
 85 |             os.system(coma)
 86 |             os.system('cp plt.pdb clu.pdb')
 87 |     return True
 88 | 
 89 | 
 90 | def Get_OPT(zmat, optim, charge):
 91 |     assert os.path.isfile(zmat), 'File named %10s does not exist' % zmat
 92 |     assert (
 93 |         'BOSSdir' in os.environ
 94 |     ), 'Please Make sure $BOSSdir is defined \n xZCM1A and related files are in scripts directory of BOSS'
 95 |     execs = {
 96 |         2: os.environ['BOSSdir'] + '/scripts/xZCM1A+2 > olog',
 97 |         -2: os.environ['BOSSdir'] + '/scripts/xZCM1A-2 > olog',
 98 |         0: os.environ['BOSSdir'] + '/scripts/xZCM1A > olog',
 99 |         1: os.environ['BOSSdir'] + '/scripts/xZCM1A+  > olog',
100 |         -1: os.environ['BOSSdir'] + '/scripts/xZCM1A-  > olog',
101 |     }
102 |     print('MOLECULE HAS A CHARGE of %d' % charge)
103 |     execfile = execs[charge]
104 |     coma = execfile + ' ' + zmat[:-2]
105 |     os.system(coma)
106 |     os.system('cp sum %s' % (zmat))
107 |     execfile = os.environ['BOSSdir'] + '/scripts/xSPM > olog'
108 |     coma = execfile + ' ' + zmat[:-2]
109 |     os.system(coma)
110 |     os.system('/bin/cp sum %s' % (zmat))
111 |     return None
112 | 
113 | 
114 | def ReadMolFile(mollines):
115 |     [nats, nbonds] = map(int, (mollines[3][0:3], mollines[3][3:6]))
116 |     cooslines = mollines[4: 4 + nats]
117 |     coos = {}
118 |     atypes = {}
119 |     for i in range(nats):
120 |         els = cooslines[i].split()
121 |         coos[i + 1] = [float(e) for e in els[0:3]]
122 |         atypes[i + 1] = els[3]
123 |     bondlines = mollines[4 + nats: 4 + nats + nbonds]
124 |     bonds = {'BI': [], 'BJ': [], 'RIJ': [], 'UID': []}
125 |     for line in bondlines:
126 |         [bi, bj] = map(int, [line[0:3], line[3:6]])
127 |         bonds['BI'].append(bi)
128 |         bonds['BJ'].append(bj)
129 |         bonds['RIJ'].append(Distance(coos[bi], coos[bj]))
130 |         bonds['UID'].append(pairing_func(bi, bj))
131 |     return (coos, atypes, bonds)
132 | 
133 | 
134 | def make_graphs(atoms, coos, bonds):
135 |     G = nx.DiGraph()
136 |     # ADD NODES USING ATOM TYPES AND COORDINATES
137 |     for i in coos.keys():
138 |         G.add_node(i, XYZ=coos[i], elem=atoms[i], atno=bossElement2Num(atoms[i]))
139 |     for (i, j, rij) in zip(bonds['BI'], bonds['BJ'], bonds['RIJ']):
140 |         G.add_edge(i, j, distance=rij)
141 |         G.add_edge(j, i, distance=rij)
142 |     all_ps = dict(nx.algorithms.all_pairs_shortest_path_length(G))
143 |     all_paths = []
144 |     for s in all_ps.keys():
145 |         for e in all_ps[s].keys():
146 |             #            if   all_ps[s][e] == 1: all_paths+=list(nx.algorithms.shortest_simple_paths(G,s,e))
147 |             #            elif all_ps[s][e] == 2: all_paths+=list(nx.algorithms.shortest_simple_paths(G,s,e))
148 |             #            elif all_ps[s][e] == 3: all_paths+=list(nx.algorithms.shortest_simple_paths(G,s,e))
149 |             if all_ps[s][e] == 1:
150 |                 all_paths += list(nx.algorithms.all_simple_paths(G, s, e, cutoff=1))
151 |             elif all_ps[s][e] == 2:
152 |                 all_paths += list(nx.algorithms.all_simple_paths(G, s, e, cutoff=2))
153 |             elif all_ps[s][e] == 3:
154 |                 all_paths += list(nx.algorithms.all_simple_paths(G, s, e, cutoff=3))
155 | 
156 |     all_bonds = [p for p in all_paths if len(set(p)) == 2]
157 |     new_angs = [p for p in all_paths if len(set(p)) == 3]
158 |     new_tors = [p for p in all_paths if len(set(p)) == 4]
159 |     dict_new_tors = {tor_id(t): t for t in new_tors}
160 |     dict_new_angs = {ang_id(t): t for t in new_angs}
161 |     imp_keys = [n for n in G.nodes() if G.degree(n) / 2 == 3]
162 |     all_imps = {}
163 |     for i in imp_keys:
164 |         nei = list(G.neighbors(i))
165 |         # if G.node[i]['atno'] == 6: (backup)
166 |         if G.nodes[i]['atno'] == 6:
167 |             all_imps[i] = [nei[0], i, nei[1], nei[2]]
168 |     MOL_ICOORDS = {
169 |         'BONDS': all_bonds,
170 |         'ANGLES': dict_new_angs,
171 |         'TORSIONS': dict_new_tors,
172 |         'IMPROPERS': all_imps,
173 |     }
174 |     return (G, MOL_ICOORDS)
175 | 
176 | 
177 | def Get_Add_Int(mol_icords, Z_BONDS, Z_ANGLES, Z_TORSIONS):
178 |     all_bonds_mol, all_angles_mol, all_torsions_mol = (
179 |         mol_icords['BONDS'],
180 |         mol_icords['ANGLES'],
181 |         mol_icords['TORSIONS'],
182 |     )
183 |     Z_B = {
184 |         pairing_func(i[0] - 2, i[1] - 2): [i[0] - 2, i[1] - 2] for i in Z_BONDS.values()
185 |     }
186 |     Z_A = {
187 |         ang_id([i[0] - 2, i[1] - 2, i[2] - 2]): [i[0] - 2, i[1] - 2, i[2] - 2]
188 |         for i in Z_ANGLES.values()
189 |     }
190 |     Z_T = {
191 |         tor_id([i[0] - 2, i[1] - 2, i[2] - 2, i[3] - 2]): [
192 |             i[0] - 2,
193 |             i[1] - 2,
194 |             i[2] - 2,
195 |             i[3] - 2,
196 |         ]
197 |         for i in Z_TORSIONS.values()
198 |     }
199 |     Z_Ad_B, Z_Ad_A, Z_Ad_T = (
200 |         collections.OrderedDict(),
201 |         collections.OrderedDict(),
202 |         collections.OrderedDict(),
203 |     )
204 |     for b_ij in all_bonds_mol:
205 |         uid_b_ij = pairing_func(b_ij[0], b_ij[1])
206 |         if uid_b_ij not in list(Z_B.keys()):
207 |             Z_Ad_B[uid_b_ij] = [b_ij[0] + 2, b_ij[1] + 2]
208 |     for a_ij in all_angles_mol.keys():
209 |         if a_ij not in list(Z_A.keys()):
210 |             Z_Ad_A[a_ij] = [i + 2 for i in all_angles_mol[a_ij]]
211 |     for t_ij in all_torsions_mol.keys():
212 |         if t_ij not in list(Z_T.keys()):
213 |             Z_Ad_T[t_ij] = [i + 2 for i in all_torsions_mol[t_ij]]
214 |     for c in mol_icords['IMPROPERS'].values():
215 |         Z_Ad_T["-".join(list(map(str, c)))] = [i + 2 for i in c]
216 |     return (Z_Ad_B, Z_Ad_A, Z_Ad_T)
217 | 
218 | 
219 | def print_ZMAT(atoms, G_mol, mol_icords, coos, zmat_name, resid):
220 |     if not zmat_name:
221 |         zmat_name = resid
222 |     Z_ATOMS = {1: 'X', 2: 'X'}
223 |     Z_NO = {1: -1, 2: -1}
224 |     Z_BONDS = {1: (1, 0, 0.000), 2: (2, 1, 1.00), 3: (3, 2, 1.00)}
225 |     Z_ANGLES = {
226 |         1: (1, 0, 0, 0.000),
227 |         2: (2, 1, 0, 0.000),
228 |         3: (3, 2, 1, 90.00),
229 |         4: (4, 3, 2, 90.0),
230 |     }
231 |     Z_TORSIONS = {
232 |         1: (1, 0, 0, 0, 0.00),
233 |         2: (2, 1, 0, 0, 0.00),
234 |         3: (3, 2, 1, 0, 0.00),
235 |         4: (4, 3, 2, 1, 0.00),
236 |         5: (5, 4, 3, 2, 90.0),
237 |     }
238 |     for i in range(1, len(atoms) + 1):
239 |         Z_ATOMS[i + 2] = atoms[i]
240 |     for i in range(1, len(atoms) + 1):
241 |         # Z_NO[i + 2] = G_mol.node[i]['atno'] # backup
242 |         Z_NO[i + 2] = G_mol.nodes[i]['atno']
243 |     n_ats = 0
244 |     B_LINK = {}
245 |     for i in G_mol.nodes():
246 |         if n_ats > 0:
247 |             neigs = np.sort(list(G_mol.neighbors(i)))
248 |             B_LINK[i] = neigs[0]
249 |             Z_BONDS[i + 2] = (i + 2, neigs[0] + 2, G_mol[i][neigs[0]]['distance'])
250 |         n_ats += 1
251 |     n_ats = 0
252 |     A_LINK = {}
253 |     for i in G_mol.nodes():
254 |         if n_ats > 1:
255 |             neigs = np.sort(list(G_mol.neighbors(B_LINK[i])))
256 |             A_LINK[i] = neigs[0]
257 |             ang = angle(coos[i], coos[B_LINK[i]], coos[neigs[0]])
258 |             Z_ANGLES[i + 2] = (i + 2, B_LINK[i] + 2, neigs[0] + 2, ang)
259 |         n_ats += 1
260 |     n_ats = 0
261 |     for i in G_mol.nodes():
262 |         if n_ats > 2:
263 |             neigs = list(G_mol.neighbors(A_LINK[i]))
264 |             neigs = np.array([j for j in neigs if j not in [i, B_LINK[i], A_LINK[i]]])
265 |             neigs = np.sort(neigs)
266 |             neigs = neigs[neigs < i]
267 |             if len(neigs) < 1:
268 |                 neigs = [
269 |                     j
270 |                     for j in list(G_mol.neighbors(B_LINK[i]))
271 |                     if j not in [i, A_LINK[i]]
272 |                 ]
273 |                 if B_LINK[i] in list(mol_icords['IMPROPERS'].keys()):
274 |                     del mol_icords['IMPROPERS'][B_LINK[i]]
275 |             [ti, tj, tk, tl] = [i, B_LINK[i], A_LINK[i], neigs[0]]
276 |             dihed = dihedral(coos[ti], coos[tj], coos[tk], coos[tl])
277 |             Z_TORSIONS[i + 2] = (ti + 2, tj + 2, tk + 2, tl + 2, dihed)
278 |         n_ats += 1
279 |     Z_Ad_B, Z_Ad_A, Z_Ad_T = Get_Add_Int(mol_icords, Z_BONDS, Z_ANGLES, Z_TORSIONS)
280 |     # PRINTING ACTUAL Z-MATRIX
281 |     ofile = open(zmat_name, 'w+')
282 |     ofile.write('BOSS Z-Matrix with LSDautozmat (written by Leela S. Dodda)\n')
283 |     for i in range(1, len(atoms) + 3):
284 |         ofile.write(
285 |             '%4d %-3s%5d%5d%5d%12.6f%4d%12.6f%4d%12.6f%4s%5d\n'
286 |             % (
287 |                 i,
288 |                 Z_ATOMS[i],
289 |                 Z_NO[i],
290 |                 Z_NO[i],
291 |                 Z_BONDS[i][1],
292 |                 Z_BONDS[i][-1],
293 |                 Z_ANGLES[i][-2],
294 |                 Z_ANGLES[i][-1],
295 |                 Z_TORSIONS[i][-2],
296 |                 Z_TORSIONS[i][-1],
297 |                 resid[0:3],
298 |                 1,
299 |             )
300 |         )
301 |     ofile.write(
302 |         '''                    Geometry Variations follow    (2I4,F12.6)
303 |                     Variable Bonds follow         (I4)\n'''
304 |     )
305 |     for i in range(4, len(atoms) + 3):
306 |         ofile.write('%4d\n' % i)
307 |     ofile.write('                    Additional Bonds follow       (2I4)\n')
308 |     if len(Z_Ad_B) > 0:
309 |         for i in Z_Ad_B.values():
310 |             ofile.write('%4d%4d\n' % (i[0], i[1]))
311 |     # CREATE A FUNCTION TO DEFINE ADDITIONAL BONDS IN CASE OF RINGS
312 |     ofile.write(
313 |         '''                    Harmonic Constraints follow   (2I4,4F10.4)
314 |                     Variable Bond Angles follow   (I4)\n'''
315 |     )
316 |     for i in range(5, len(atoms) + 3):
317 |         ofile.write('%4d\n' % i)
318 |     ofile.write('                    Additional Bond Angles follow (3I4)\n')
319 |     if len(Z_Ad_A) > 0:
320 |         for i in Z_Ad_A.values():
321 |             ofile.write('%4d%4d%4d\n' % (i[0], i[1], i[2]))
322 |     # CREATE A FUNCTION TO DEFINE ADDITIONAL BONDS IN CASE OF RINGS
323 |     ofile.write('                    Variable Dihedrals follow     (3I4,F12.6)\n')
324 |     for i in range(6, len(atoms) + 3):
325 |         ofile.write('%4d%4d%4d%12.6f\n' % (i, -1, -1, 0.000))
326 |     ofile.write('                    Additional Dihedrals follow   (6I4)\n')
327 |     if len(Z_Ad_T) > 0:
328 |         for k in Z_Ad_T.keys():
329 |             torsion = Z_Ad_T[k]
330 |             ofile.write(
331 |                 '%4d%4d%4d%4d%4d%4d\n'
332 |                 % (torsion[0], torsion[1], torsion[2], torsion[3], -1, -1)
333 |             )
334 |     ofile.write(
335 |         '''                    Domain Definitions follow     (4I4)
336 |                     Conformational Search (2I4,2F12.6)
337 |                     Local Heating Residues follow (I4 or I4-I4)
338 |                     Final blank line
339 | '''
340 |     )
341 |     ofile.close()
342 |     return None
343 | 


--------------------------------------------------------------------------------
/LigParGenPSP/README:
--------------------------------------------------------------------------------
 1 | LigParGen scripts included with the PSP package were taken from LigParGenv2.1 (https://pypi.org/project/LigParGen/#description) and modified to make them compatible with the PSP package.
 2 | 
 3 | Details of the original distribution:
 4 | Author: Leela S. Dodda, Matthew C. Robinson
 5 | License: MIT
 6 | Email: leela.dodda@yale.edu,matthew.robinson@yale.edu
 7 | Homepage: https://bitbucket.org/leelasd/ligpargen_2017_sep18/src/master/
 8 | 
 9 | We have updated the original LigParGen source code to include the following features:
10 | (1) Able to store output files in a user-defined directory.
11 | (2) Compatible with the recent versions of Open Babel (v3.1.1), NetworkX (v2.5), and pandas (v1.2.4) libraries.
12 | (3) Generate a data file for the LAMMPS package only.
13 | (4) Delete all temporary files.
14 | 


--------------------------------------------------------------------------------
/LigParGenPSP/Vector_algebra.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | 
  4 | 
  5 | def bossElement2Num(elem):
  6 |     symb2mass = {
  7 |         "H": 1,
  8 |         "B": 5,
  9 |         "C": 6,
 10 |         "N": 7,
 11 |         "O": 8,
 12 |         "F": 9,
 13 |         "Si": 14,
 14 |         "P": 15,
 15 |         "S": 16,
 16 |         "Cl": 17,
 17 |         "Br": 35,
 18 |         "I": 53,
 19 |     }
 20 |     try:
 21 |         res = symb2mass[elem]
 22 |     except NameError:
 23 |         print(
 24 |             "Mass for atom %s is not available \n add it to symb2mass dictionary"
 25 |         )
 26 |     return res
 27 | 
 28 | 
 29 | def pairing_func(a, b):
 30 |     ans = (a + b) * (a + b + 1) * 0.5
 31 |     if a > b:
 32 |         ans = ans + a
 33 |     else:
 34 |         ans = ans + b
 35 |     return int(ans)
 36 | 
 37 | 
 38 | def Vector(x, y, z):
 39 |     return (x, y, z)
 40 | 
 41 | 
 42 | def length(v):
 43 |     "Return length of a vector."
 44 |     sum = 0.0
 45 |     for c in v:
 46 |         sum += c * c
 47 |     return math.sqrt(sum)
 48 | 
 49 | 
 50 | def subtract(u, v):
 51 |     "Return difference between two vectors."
 52 |     x = u[0] - v[0]
 53 |     y = u[1] - v[1]
 54 |     z = u[2] - v[2]
 55 |     return Vector(x, y, z)
 56 | 
 57 | 
 58 | def dot(u, v):
 59 |     "Return dot product of two vectors."
 60 |     sum = 0.0
 61 |     for cu, cv in zip(u, v):
 62 |         sum += cu * cv
 63 |     return sum
 64 | 
 65 | 
 66 | def Distance(u, v):
 67 |     "Return length of a vector."
 68 |     #    print(u,v)
 69 |     uv = subtract(u, v)
 70 |     lsum = 0.0
 71 |     for c in uv:
 72 |         lsum += c * c
 73 |     return math.sqrt(lsum)
 74 | 
 75 | 
 76 | def cross(u, v):
 77 |     "Return the cross product of two vectors."
 78 |     x = u[1] * v[2] - u[2] * v[1]
 79 |     y = u[2] * v[0] - u[0] * v[2]
 80 |     z = u[0] * v[1] - u[1] * v[0]
 81 |     return Vector(x, y, z)
 82 | 
 83 | 
 84 | def Mol_angle(v0, v1):
 85 |     "Return angle [0..pi] between two vectors."
 86 |     cosa = round(dot(v0, v1) / length(v0) / length(v1), 3)
 87 |     return np.arccos(cosa)
 88 | 
 89 | 
 90 | def angle(p0, p1, p2):
 91 |     "Return angle [0..pi] between two vectors."
 92 |     v0 = subtract(p0, p1)
 93 |     v1 = subtract(p2, p1)
 94 |     cosa = dot(v0, v1) / length(v0) / length(v1)
 95 |     #    print(cosa)
 96 |     return 180.0 * np.arccos(round(cosa, 3)) * 7.0 / 22.0
 97 | 
 98 | 
 99 | def dihedral(p0, p1, p2, p3):
100 |     "Return angle [0..2*pi] formed by vertices p0-p1-p2-p3."
101 |     v01 = subtract(p0, p1)
102 |     v32 = subtract(p3, p2)
103 |     v12 = subtract(p1, p2)
104 |     v0 = cross(v12, v01)
105 |     v3 = cross(v12, v32)
106 |     # The cross product vectors are both normal to the axis
107 |     # vector v12, so the angle between them is the dihedral
108 |     # angle that we are looking for.  However, since "angle"
109 |     # only returns values between 0 and pi, we need to make
110 |     # sure we get the right sign relative to the rotation axis
111 |     a = Mol_angle(v0, v3)
112 |     if dot(cross(v0, v3), v12) > 0:
113 |         a = -a
114 |     return a * 180.0 * 7.0 / 22.0
115 | 
116 | 
117 | def tor_id(a):
118 |     bond = pairing_func(a[1], a[2])
119 |     ends = pairing_func(a[0], a[3])
120 |     return "%d-%d" % (bond, ends)
121 | 
122 | 
123 | def ang_id(a):
124 |     bond_a = pairing_func(a[0], a[1])
125 |     bond_b = pairing_func(a[1], a[2])
126 |     return pairing_func(bond_a, bond_b)
127 | 


--------------------------------------------------------------------------------
/LigParGenPSP/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ramprasad-Group/PSP/fa846bdd07b45461d5d747e5bd60b5ee80f13938/LigParGenPSP/__init__.py


--------------------------------------------------------------------------------
/LigParGenPSP/fepzmat.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def new_func(linex, match):
  5 |     out = 0
  6 |     for word in linex.split():
  7 |         if word == match:
  8 |             out = out + 1
  9 |     return out
 10 | 
 11 | 
 12 | def read_coords(data):
 13 |     cmatrix = []
 14 |     ta = []
 15 |     tb = []
 16 |     for i in range(0, len(data)):
 17 |         cmatrix.append(data[i].split())
 18 |     ta = [int(cmatrix[i][2]) for i in range(0, len(data))]
 19 |     tb = [int(cmatrix[i][3]) for i in range(0, len(data))]
 20 |     ta = np.array(ta)
 21 |     maxa = ta.max()
 22 |     tb = np.array(tb)
 23 |     numi = 1
 24 |     for i in range(0, len(tb)):
 25 |         if tb[i] > 1:
 26 |             tb[i] = maxa + numi
 27 |             numi = numi + 1
 28 |     for i in range(0, len(data)):
 29 |         cmatrix[i][3] = str(tb[i])
 30 |     outdat = []
 31 |     new_coord = ''
 32 |     for i in range(0, len(data)):
 33 |         new_coord = '{:>4s} {:<3s} {:>4s} {:>4s}'.format(
 34 |             cmatrix[i][0], cmatrix[i][1], cmatrix[i][2], cmatrix[i][3]
 35 |         )
 36 |         new_coord = new_coord + '{:>5s}{:>12s}{:>4s}{:>12s}'.format(
 37 |             cmatrix[i][4], cmatrix[i][5], cmatrix[i][6], cmatrix[i][7]
 38 |         )
 39 |         new_coord = new_coord + '{:>4s}{:>12s}{:>9s}'.format(
 40 |             cmatrix[i][8], cmatrix[i][9], cmatrix[i][10]
 41 |         )
 42 |         outdat.append(new_coord)
 43 |     tb = tb[tb > 0]  # IMPORTANT TO AVOID THE -1 and 0 IN FINAL ATOM TYPE
 44 |     ta = ta[ta > 0]  # IMPORTANT TO AVOID THE -1 and 0 IN FINAL ATOM TYPE
 45 |     return outdat, tb, ta
 46 | 
 47 | 
 48 | def read_files(infile):
 49 |     nline = 0
 50 |     cline = 0
 51 |     oline = 0
 52 |     data = []
 53 |     for line in infile:
 54 |         if line.rstrip():
 55 |             data.append(line)
 56 |             if "Non-Bonded" in line:
 57 |                 oline = nline
 58 |             elif "Variations" in line:
 59 |                 cline = nline
 60 |             nline += 1
 61 |     return data, nline, cline, oline
 62 | 
 63 | 
 64 | def rel_nbd(data, tb, QBCC=None):
 65 |     if QBCC is None:
 66 |         QBCC = np.zeros(len(data), dtype=float)
 67 |     nmat = []
 68 |     nmat = [ndat.split() for ndat in data]
 69 |     ondat = []
 70 |     for i in range(0, len(data)):
 71 |         nmat[i][0] = str(tb[i])
 72 |         nmat[i][3] = '%.6f' % QBCC[i]
 73 |         new_nb = '{:>4s}{:>3s} {:<3s} {:>9s} {:>9s} {:>9s}'.format(
 74 |             nmat[i][0], nmat[i][1], nmat[i][2], nmat[i][3], nmat[i][4], nmat[i][5]
 75 |         )
 76 |         ondat.append(new_nb)
 77 |     return ondat
 78 | 
 79 | 
 80 | def fepZmatFromFile(filenme, QBCC=None):
 81 |     qfile = open(filenme)
 82 |     qdat, nl1, cl1, ol1 = read_files(qfile)
 83 |     cdat, tb, ta = read_coords(qdat[1:cl1])
 84 |     ndat = rel_nbd(qdat[ol1 + 1:], tb, QBCC)
 85 |     qdat[ol1] = qdat[ol1].replace("AM1 CM1Ax1.14", "CM1Ax1.14TO1.14CM1A-BCC", 1)
 86 |     target = open(filenme[:-2] + '_fep.z', 'w')
 87 |     target.write(qdat[0])
 88 |     for i in range(0, len(cdat)):
 89 |         target.write(cdat[i] + '\n')
 90 |     for i in range(cl1, nl1):
 91 |         target.write(qdat[i])
 92 |     for i in range(0, len(ndat)):
 93 |         target.write(ndat[i] + '\n')
 94 |     target.close()
 95 |     return None
 96 | 
 97 | 
 98 | def fepZmatFromPkl(zmat_dat, filenme, QBCC=None):
 99 |     qdat, nl1, cl1, ol1 = read_files(zmat_dat)
100 |     cdat, tb, ta = read_coords(qdat[1:cl1])
101 |     ndat = rel_nbd(qdat[ol1 + 1:], tb, QBCC)
102 |     qdat[ol1] = qdat[ol1].replace("AM1 CM1Ax1.14", "CM1Ax1.14TO1.14CM1A-BCC", 1)
103 |     target = open(filenme + '_fep.z', 'w')
104 |     target.write(qdat[0])
105 |     for i in range(0, len(cdat)):
106 |         target.write(cdat[i] + '\n')
107 |     for i in range(cl1, nl1):
108 |         target.write(qdat[i])
109 |     for i in range(0, len(ndat)):
110 |         target.write(ndat[i] + '\n')
111 |     target.close()
112 |     return None
113 | 
114 | 
115 | def BCC_file2zmat(zmat, QBCC, oname):
116 |     qfile = open(zmat, 'r+')
117 |     qdat, nl1, cl1, ol1 = read_files(qfile)
118 |     cdat, tb, ta = read_coords(qdat[1:cl1])
119 |     ndat = rel_nbd(qdat[ol1 + 1:], ta, QBCC)
120 |     qdat[ol1] = qdat[ol1].replace("AM1 CM1Ax1.14", "1.14CM1A-LBCC", 1)
121 |     qfile.close()
122 |     target = open(oname, 'w+')
123 |     for i in range(0, ol1 + 1):
124 |         target.write(qdat[i])
125 |     for i in range(0, len(ndat)):
126 |         target.write(ndat[i] + '\n')
127 |     target.close()
128 |     return None
129 | 


--------------------------------------------------------------------------------
/LigParGenPSP/mol_boss.py:
--------------------------------------------------------------------------------
  1 | # THIS IS THE HEART OF BCC CORRECTION METHODOLOGY
  2 | # THIS MODULE DOES THE BCC ASSIGNMENT BY COLLECTING
  3 | # BONDING INFO AND ASSIGNING BCC CORRECTIONS FOR ATOMS
  4 | import numpy as np
  5 | 
  6 | 
  7 | def rev_bnd(bnd):
  8 |     a, b = bnd.split('-')
  9 |     return b + '-' + a
 10 | 
 11 | 
 12 | def sign_bnd(bnd, at):
 13 |     if bnd == rev_bnd(bnd):
 14 |         si = 0
 15 |     else:
 16 |         si = (-2 * bnd.split('-').index(at)) + 1
 17 |     return si
 18 | 
 19 | 
 20 | def get_bcc_types(db, cha, bond):
 21 |     rtij = []
 22 |     mtij = []
 23 |     bond['NTIJ'] = [
 24 |         str(cha.TY[i - 1]) + '-' + str(cha.TY[j - 1]) for (i, j) in zip(bond.I, bond.J)
 25 |     ]
 26 |     for i in bond.NTIJ:
 27 |         if i == rev_bnd(i):
 28 |             mtij.append('X-X')
 29 |             rtij.append(i)
 30 |         elif i in db.keys():
 31 |             rtij.append(i)
 32 |             mtij.append(i)
 33 |         elif rev_bnd(i) in db.keys():
 34 |             rtij.append(rev_bnd(i))
 35 |             mtij.append(rev_bnd(i))
 36 |         else:
 37 |             print('%5s not found in bonds.csv' % i)
 38 |             mtij.append('U-U')
 39 |             rtij.append('U-U')
 40 |     bond['TIJ'] = mtij
 41 |     bond['MTIJ'] = rtij
 42 |     bond['AI'] = [str(cha.TY[i - 1]) for i in bond.I]
 43 |     bond['AJ'] = [str(cha.TY[j - 1]) for j in bond.J]
 44 |     bond['SI'] = [sign_bnd(bnd, at) for bnd, at in zip(bond.TIJ, bond.AI)]
 45 |     bond['SJ'] = [sign_bnd(bnd, at) for bnd, at in zip(bond.TIJ, bond.AJ)]
 46 |     return bond
 47 | 
 48 | 
 49 | def new_mol_info(db, cha, bond):
 50 |     #    cha = pd.read_csv('CM1AQ', header=None, delim_whitespace=True)
 51 |     #    cha.columns = ['TY', 'Q']
 52 |     bond = get_bcc_types(db, cha, bond)
 53 |     MOLBtype = {}
 54 |     for an in cha.index:
 55 |         MOLBtype[an] = list(bond[bond['I'] == an + 1].TIJ) + list(
 56 |             bond[bond['J'] == an + 1].TIJ
 57 |         )
 58 |         if (cha.TY[an] == 'OS') and ('C-OS' in MOLBtype[an]):
 59 |             print("Changing OS TO OE")
 60 |             cha.loc[an, 'TY'] = 'OE'
 61 |             bond = get_bcc_types(db, cha, bond)
 62 |         # Seperate Correction for Esters
 63 |         if (cha.TY[an] == 'C') and ('C-O' in MOLBtype[an]):
 64 |             if ('C-OS' in MOLBtype[an]) or ('C-OE' in MOLBtype[an]):
 65 |                 print("Changing OS TO OE")
 66 |                 cha.loc[an, 'TY'] = 'CE'
 67 |                 bond = get_bcc_types(db, cha, bond)
 68 |         # Seperate Correction for Amides
 69 |         if (cha.TY[an] == 'C') and ('C-N' in MOLBtype[an]):
 70 |             print("Changing C TO CAM")
 71 |             cha.loc[an, 'TY'] = 'CAM'
 72 |             bond = get_bcc_types(db, cha, bond)
 73 |         # Seperate Correction for Aromatic Nitriles
 74 |         if (cha.TY[an] == 'CZ') and (set(['CA-CZ', 'CZ-NZ']) <= set(MOLBtype[an])):
 75 |             print(MOLBtype[an])
 76 |             print("Changing CZ-NZ to CZA-NZ")
 77 |             cha.loc[an, 'TY'] = 'CZA'
 78 |             bond = get_bcc_types(db, cha, bond)
 79 |         if (cha.TY[an] == 'CZ') and (set(['CT-CZ', 'CZ-NZ']) <= set(MOLBtype[an])):
 80 |             print(MOLBtype[an])
 81 |             print("Changing CZ-NZ to CZT-NZ")
 82 |             cha.loc[an, 'TY'] = 'CZT'
 83 |             bond = get_bcc_types(db, cha, bond)
 84 |         # Seperate Correction for 1,2,3 Amines
 85 |         if (cha.TY[an] == 'NT') and MOLBtype[an].count('H-NT') == 2:
 86 |             print(MOLBtype[an])
 87 |             print("Changing NT to NP")
 88 |             cha.loc[an, 'TY'] = 'NP'
 89 |             bond = get_bcc_types(db, cha, bond)
 90 |         if (cha.TY[an] == 'NT') and MOLBtype[an].count('H-NT') == 1:
 91 |             print("Changing NT to NS")
 92 |             cha.loc[an, 'TY'] = 'NS'
 93 |             bond = get_bcc_types(db, cha, bond)
 94 |         if (cha.TY[an] == 'NT') and MOLBtype[an].count('H-NT') == 0:
 95 |             print("Changing NT to N3")
 96 |             cha.loc[an, 'TY'] = 'N3'
 97 |             bond = get_bcc_types(db, cha, bond)
 98 |     cha = get_bcc_charges(db, bond, cha)
 99 |     QBCC = np.array(cha.QBCC)
100 |     return (bond, cha, QBCC)
101 | 
102 | 
103 | def get_bcc_charges(db, bond, cha):
104 |     bond['IBCC'] = [sign * db[bcc] for sign, bcc in zip(bond.SI, bond.TIJ)]
105 |     bond['JBCC'] = [sign * db[bcc] for sign, bcc in zip(bond.SJ, bond.TIJ)]
106 |     cha['BCC'] = [
107 |         sum(bond[bond['I'] == an + 1]['IBCC']) + sum(bond[bond['J'] == an + 1]['JBCC'])
108 |         for an in cha.index
109 |     ]
110 |     cha['QBCC'] = cha['Q'] + cha['BCC']
111 |     ars = [i for i in range(0, len(cha.TY)) if not cha['TY'][i].isdigit()]
112 |     cha = cha.loc[ars]
113 |     return cha
114 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PolymerStructurePredictor (PSP) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)][1]
 2 | 
 3 | Three-dimensional atomic-level models of polymers are necessary prerequisites for physics-based simulation studies. Polymer structure predictor (PSP) is capable of generating a hierarchy of polymer models, ranging from oligomers to infinite chains to crystals to amorphous models, using a simplified molecular-input line-entry system (SMILES) string of the polymer repeat unit as the primary input. The output structures and accompanying force field (GAFF2/OPLS-AA) parameter files are provided for downstream DFT and MD simulations.
 4 | 
 5 | >PSP generates models for monomers, linear and loop oligomers, infinite polymer chains, crystal, and amorphous structures using SMILES strings.
 6 | 
 7 | ## Contributors
 8 | * Harikrishna Sahu
 9 | * Huan Tran
10 | * Kuan-Hsuan Shen
11 | * Joseph Montoya
12 | * Rampi Ramprasad
13 | 
14 | ## License & copyright
15 | Ramprasad Group, Georgia Tech, USA\
16 | [Ramprasad Group website](http://ramprasad.mse.gatech.edu/)\
17 | Licensed under the [MIT License](LICENSE).
18 | 
19 | ## Contact
20 | All queries regarding the usage of PSP should be addressed to: **psp-users@groups.gatech.edu**. 
21 | 
22 | We highly recommend interested individuals join the PSP group. This allows you to participate in discussions and keeps you updated on issues, bug fixes, and latest developments.
23 | 
24 | ## Reference
25 | If you use PSP, please cite:\
26 | Sahu, H.; Shen, K.-H.; Montoya, J. H.; Tran, H.; Ramprasad, R. Polymer Structure Predictor (PSP): A Python Toolkit for Predicting Atomic-Level Structural Models for a Range of Polymer Geometries, *J. Chem. Theory Comput.*, **2022**.
27 | 
28 | ## Installation
29 | PSP requires the following packages to be installed in order to function properly:
30 | * [RDKit](https://www.rdkit.org/) v2020.09.1.0
31 | * [Open Babel](https://open-babel.readthedocs.io/en/latest/index.html) v3.1.1
32 | * [PACKMOL](http://leandro.iqm.unicamp.br/m3g/packmol/home.shtml) v20.2.2
33 | * [PySIMM](https://pysimm.org/) v0.2.3
34 | * [LAMMPS](https://docs.lammps.org/Manual.html)
35 | * [AmberTools21](https://ambermd.org/AmberTools.php) (optional, only needed for the `get_gaff2()` function in the AmorphousBuilder)
36 | * LigParGen dependencies[](http://zarbi.chem.yale.edu/ligpargen/) (optional, only needed for the `get_opls()` function in the AmorphousBuilder)
37 | 
38 | It should be noted that all dependencies must be installed separately and tested to ensure that they all function. We recommend using Anaconda python, and creating a fresh conda environment for PSP (e. g. `conda create -n MY_ENV_NAME`).
39 | 
40 | RDKit and OpenBabel are available as conda packages and can be installed using the instructions provided in the following links (1)[https://anaconda.org/rdkit/rdkit](https://anaconda.org/rdkit/rdkit) and (2)[https://anaconda.org/conda-forge/openbabel](https://anaconda.org/conda-forge/openbabel).
41 | 
42 | The deatiled intructions for the installation of PACKMOL package can be found at the following URL: [http://leandro.iqm.unicamp.br/m3g/packmol/home.shtml](http://leandro.iqm.unicamp.br/m3g/packmol/home.shtml). Make sure to include the path for PACKMOL executable as an environment variable "PACKMOL\_EXEC" in ~/.bashrc file.
43 | 
44 | LAMMPS can be installed separately or along with PySIMM. Make sure to add the PySIMM package to your PYTHONPATH and add PySIMM and LAMMPS command-line tools to your PATH as mentioned in the PySIMM documentation.
45 | 
46 | Ambertools is available as a conda package and can be installed using the instructions provided in the following links: [https://ambermd.org/AmberTools.php](https://ambermd.org/AmberTools.php). Make sure to include the path for the Antechamber executable as an environment variable "ANTECHAMBER\_EXEC" in ~/.bashrc file.
47 | 
48 | Following that, source your ~/.bashrc file.  PSP will look for PATHs for PACKMOL, PySIMM, LAMMPS, and Antechamber while performing its tasks.
49 | 
50 | LigParGen and its dependencies: LigParGen requires the BOSS executable. Obtain a copy of it and set $BOSSdir variable in bash. For more information, see [http://zarbi.chem.yale.edu/ligpargen](http://zarbi.chem.yale.edu/ligpargen) and [http://zarbi.chem.yale.edu/software.html](http://zarbi.chem.yale.edu/software.html). To make LigParGen compatible with PSP, we updated it to include the following features: (1) the ability to store the output files in a user-defined directory; and (2) compatibility with the recent versions of Open Babel (v3.1.1), NetworkX (v2.5), and pandas (v1.2.4). Take note that we have not yet installed NetworkX; ensure that this is done. The updated LigParGen source code is redistributed as part of the PSP package. 
51 | 
52 | Once all dependencies are installed, clone the PSP repository and install it using the *setup.py* included in the package.
53 | 
54 | ```angular2
55 | python setup.py install
56 | ```
57 | >**NOTE**: A colab notebook that demonstrates the step-by-step installation procedure and installs PSP and its dependencies has been provided. [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)][1]
58 | 
59 | [1]:https://colab.research.google.com/github/Ramprasad-Group/PSP/blob/master/Colab_notebook/psp_Colab_notebook.ipynb
60 | 
61 | 


--------------------------------------------------------------------------------
/documentation/PSP_user_manual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ramprasad-Group/PSP/fa846bdd07b45461d5d747e5bd60b5ee80f13938/documentation/PSP_user_manual.pdf


--------------------------------------------------------------------------------
/psp/AmorphousBuilder.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import psp.MD_lib as MDlib
  4 | import time
  5 | import os
  6 | import psp.PSP_lib as bd
  7 | from openbabel import openbabel as ob
  8 | import glob
  9 | import psp.output_lib as lib
 10 | from tqdm import tqdm
 11 | from LigParGenPSP import Converter
 12 | import psp.MoleculeBuilder as mb
 13 | import random
 14 | 
 15 | obConversion = ob.OBConversion()
 16 | 
 17 | 
 18 | class Builder:
 19 |     def __init__(
 20 |         self,
 21 |         Dataframe,
 22 |         ID_col="ID",
 23 |         SMILES_col="smiles",
 24 |         NumMole="Num",
 25 |         Length="Len",
 26 |         NumConf="NumConf",
 27 |         NumModel=1,
 28 |         LeftCap="LeftCap",
 29 |         RightCap="RightCap",
 30 |         Loop="Loop",
 31 |         OutFile="amor_model",
 32 |         OutDir="amorphous_models",
 33 |         OutDir_xyz="molecules",
 34 |         density=0.65,
 35 |         tol_dis=2.0,
 36 |         box_type="c",
 37 |         box_size=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 38 |         incr_per=0.4,
 39 |         BondInfo=True,
 40 |     ):
 41 |         self.Dataframe = Dataframe
 42 |         self.ID_col = ID_col
 43 |         self.SMILES_col = SMILES_col
 44 |         self.NumMole = NumMole
 45 |         self.Length = Length
 46 |         self.NumConf = NumConf
 47 |         self.NumModel = NumModel
 48 |         self.LeftCap = LeftCap
 49 |         self.RightCap = RightCap
 50 |         self.Loop = Loop
 51 |         self.OutFile = OutFile
 52 |         self.OutDir = os.path.join(OutDir, "")
 53 |         self.OutDir_xyz = os.path.join(OutDir, OutDir_xyz, "")
 54 |         self.OutDir_packmol = os.path.join(OutDir, "packmol", "")
 55 |         self.OutDir_ligpargen = os.path.join(OutDir, "ligpargen", "")
 56 |         self.OutDir_pysimm = os.path.join(OutDir, "pysimm", "")
 57 |         self.density = density
 58 |         self.tol_dis = tol_dis
 59 |         self.box_type = box_type
 60 |         self.box_size = box_size
 61 |         self.incr_per = incr_per
 62 |         self.BondInfo = BondInfo
 63 | 
 64 |     def Build(self):
 65 |         start_1 = time.time()
 66 |         lib.print_psp_info()  # Print PSP info
 67 |         lib.print_input("AmorphousBuilder", self.Dataframe)
 68 |         if self.box_type == "c":
 69 |             box_type_ = "Cubic"
 70 |         else:
 71 |             box_type_ = "Rectangular"
 72 | 
 73 |         print(
 74 |             "\n",
 75 |             "Additional information: ",
 76 |             "\n",
 77 |             "Number of models: ",
 78 |             self.NumModel,
 79 |             "\n",
 80 |             "Density (g/cm3): ",
 81 |             self.density,
 82 |             "\n",
 83 |             "Tolerance distance (angstrom): ",
 84 |             self.tol_dis,
 85 |             "\n",
 86 |             "Box type: ",
 87 |             box_type_,
 88 |             "\n",
 89 |             "Output directory: ",
 90 |             self.OutDir,
 91 |             "\n",
 92 |         )
 93 | 
 94 |         # location of directory for VASP inputs (polymers) and build a directory
 95 |         bd.build_dir(self.OutDir)
 96 |         bd.build_dir(self.OutDir_xyz)
 97 | 
 98 |         # PACKMOL
 99 |         packmol_path = os.getenv("PACKMOL_EXEC")
100 |         # packmol_path = '/home/hari/.soft/packmol/packmol'
101 | 
102 |         xyz_gen_pd = pd.DataFrame()
103 |         for i in self.Dataframe.index:
104 |             df = pd.DataFrame(self.Dataframe.loc[i]).T
105 | 
106 |             mol = mb.Builder(
107 |                 df,
108 |                 ID_col=self.ID_col,
109 |                 SMILES_col=self.SMILES_col,
110 |                 LeftCap=self.LeftCap[0],
111 |                 RightCap=self.RightCap[0],
112 |                 OutDir=self.OutDir_xyz,
113 |                 Length=[int(df[self.Length].values)],
114 |                 NumConf=int(df[self.NumConf].values) * self.NumModel,
115 |                 Loop=eval(str(df[self.Loop].values[0])),
116 |                 NCores=-1,
117 |                 Subscript=True,
118 |             )
119 |             results = mol.Build()
120 |             xyz_gen_pd = pd.concat([xyz_gen_pd, results])
121 | 
122 |         if len(list(set(xyz_gen_pd["Result"].values))) != 1:
123 |             xyz_gen_pd.to_csv("molecules.csv")
124 |             print(
125 |                 "Couldn't generate XYZ coordinates of molecules, check 'molecules.csv'"
126 |             )
127 | 
128 |         XYZ_list, smi_list, NMol_list, NumConf_list = [], [], [], []
129 |         for index, row in self.Dataframe.iterrows():
130 |             # Get number of molecules for each conformer of molecules
131 |             NMol_list += [int(row[self.NumMole] / row[self.NumConf])] * row[
132 |                 self.NumConf
133 |             ]
134 | 
135 |             # Get SMILES string for oligomers
136 |             smiles_each = xyz_gen_pd[xyz_gen_pd['ID'] == row['ID']]['SMILES'].values[0]
137 |             smi_list += smiles_each * row[self.NumConf]
138 | 
139 |             # Get a list of filenames for XYZ coordinates
140 |             XYZ_list_ind = glob.glob(self.OutDir_xyz + str(row[self.ID_col]) + "*.pdb")
141 |             XYZ_list.append(XYZ_list_ind)
142 |             NumConf_list.append(int(row[self.NumConf]))
143 | 
144 |         # Define boundary conditions
145 |         if max(self.box_size) == 0.0:  # Box size is not provided
146 |             NMol_type = len(NMol_list)
147 |             Total_NMol = sum(NMol_list)
148 |             total_vol = 0
149 |             for i in range(NMol_type):
150 |                 molar_mass = MDlib.get_molar_mass(smi_list[i])
151 |                 total_vol += MDlib.get_vol(self.density, NMol_list[i], molar_mass)
152 |             self.box_size = MDlib.get_box_size(
153 |                 total_vol, box_type=self.box_type, incr_per=self.incr_per
154 |             )
155 | 
156 |         xmin, xmax, ymin, ymax, zmin, zmax = (
157 |             self.box_size[0],
158 |             self.box_size[1],
159 |             self.box_size[2],
160 |             self.box_size[3],
161 |             self.box_size[4],
162 |             self.box_size[5],
163 |         )
164 | 
165 |         fix_dis = self.tol_dis / 2
166 | 
167 |         ind_mol_count = [0] * len(NumConf_list)
168 |         count_model = 0
169 |         for model in tqdm(range(1, self.NumModel + 1), desc='Building models ...'):
170 |             if self.NumModel > 1:
171 |                 print("MODEL ", model)
172 |                 packmol_outdir_model = self.OutDir_packmol[:-1] + '_' + str(model) + "/"
173 |                 bd.build_dir(packmol_outdir_model)
174 | 
175 |                 XYZ_list_ind_model = []
176 |                 count_mol = 0
177 |                 for ind_list in XYZ_list:
178 |                     if len(ind_list) >= (count_model + 1) * NumConf_list[count_mol]:
179 |                         XYZ_list_ind_model.append(
180 |                             ind_list[
181 |                                 count_model
182 |                                 * NumConf_list[count_mol]: (count_model + 1)
183 |                                 * NumConf_list[count_mol]
184 |                             ]
185 |                         )
186 |                     else:
187 |                         XYZ_list_ind_model.append(
188 |                             random.sample(ind_list, NumConf_list[count_mol])
189 |                         )
190 | 
191 |                     count_mol += 1
192 | 
193 |                 XYZ_list_model = [
194 |                     item for sublist in XYZ_list_ind_model for item in sublist
195 |                 ]
196 |                 count_model += 1
197 |             else:
198 |                 bd.build_dir(self.OutDir_packmol)
199 | 
200 |                 packmol_outdir_model = self.OutDir_packmol
201 |                 XYZ_list_model = [item for sublist in XYZ_list for item in sublist]
202 |             # exit()
203 |             # PACKMOL input file
204 |             MDlib.gen_packmol_inp(
205 |                 packmol_outdir_model,
206 |                 self.tol_dis,
207 |                 XYZ_list_model,
208 |                 NMol_list,
209 |                 xmin + fix_dis,
210 |                 xmax - fix_dis,
211 |                 ymin + fix_dis,
212 |                 ymax - fix_dis,
213 |                 zmin + fix_dis,
214 |                 zmax - fix_dis,
215 |             )
216 |             # PACKMOL calculation
217 |             command = (
218 |                 packmol_path + " < " + os.path.join(packmol_outdir_model, "packmol.inp")
219 |             )
220 |             errout = MDlib.run_packmol(
221 |                 command, os.path.join(packmol_outdir_model, "packmol.out")
222 |             )
223 | 
224 |             if errout is not None:
225 |                 print(" Error in packmol calculation")
226 |                 exit()
227 |             elif (
228 |                 os.path.exists(os.path.join(packmol_outdir_model, "packmol.pdb"))
229 |                 is False
230 |             ):
231 |                 print(" Error in packmol calculation")
232 |                 exit()
233 | 
234 |             mol = ob.OBMol()
235 |             obConversion = ob.OBConversion()
236 |             obConversion.SetInAndOutFormats("pdb", "mol2")
237 |             obConversion.ReadFile(
238 |                 mol, os.path.join(packmol_outdir_model, "packmol.pdb")
239 |             )
240 |             obConversion.WriteFile(
241 |                 mol, os.path.join(packmol_outdir_model, "packmol.mol2")
242 |             )
243 | 
244 |             packmol_xyz = MDlib.read_mol2_xyz(
245 |                 os.path.join(packmol_outdir_model, "packmol.mol2")
246 |             )
247 |             packmol_bond = MDlib.read_mol2_bond(
248 |                 os.path.join(packmol_outdir_model, "packmol.mol2")
249 |             )
250 | 
251 |             # Output filename
252 |             if self.NumModel > 1:
253 |                 output_filename = self.OutFile + "_N" + str(count_model)
254 |             else:
255 |                 output_filename = self.OutFile
256 | 
257 |             MDlib.gen_sys_vasp(
258 |                 os.path.join(self.OutDir, output_filename + ".vasp"),
259 |                 packmol_xyz,
260 |                 xmin,
261 |                 xmax,
262 |                 ymin,
263 |                 ymax,
264 |                 zmin,
265 |                 zmax,
266 |             )
267 |             MDlib.gen_sys_data(
268 |                 os.path.join(self.OutDir, output_filename + ".data"),
269 |                 packmol_xyz,
270 |                 packmol_bond,
271 |                 xmin,
272 |                 xmax,
273 |                 ymin,
274 |                 ymax,
275 |                 zmin,
276 |                 zmax,
277 |                 self.BondInfo,
278 |             )
279 |         end_1 = time.time()
280 |         lib.print_out(
281 |             pd.DataFrame(), "Amorphous model", np.round((end_1 - start_1) / 60, 2)
282 |         )
283 | 
284 |     def get_opls(self, output_fname='amor_opls.lmps', lbcc_charges=True):
285 |         print("\nGenerating OPLS parameter file ...\n")
286 |         system_pdb_fname = os.path.join(self.OutDir_packmol, "packmol.pdb")
287 |         r = MDlib.get_coord_from_pdb(system_pdb_fname)
288 | 
289 |         bd.build_dir(self.OutDir_ligpargen)
290 | 
291 |         system_stats = {
292 |             'total_atoms': 0,
293 |             'total_bonds': 0,
294 |             'total_angles': 0,
295 |             'total_dihedrals': 0,
296 |             'total_impropers': 0,
297 |             'total_atom_types': 0,
298 |             'total_bond_types': 0,
299 |             'total_angle_types': 0,
300 |             'total_dihedral_types': 0,
301 |             'total_improper_types': 0,
302 |         }
303 |         dicts = []
304 | 
305 |         # run LigParGen for every pdb file in the OutDir_xyz directory
306 |         for index, row in self.Dataframe.iterrows():
307 |             _id = str(row[self.ID_col])
308 |             _length = row[self.Length]
309 |             _num = row[self.NumMole]
310 |             _conf = 1  # read in only the first conformer
311 |             output_prefix = "{}_N{}_C{}".format(_id, _length, _conf)
312 |             lig_output_fname = "{}.lmp".format(output_prefix)
313 |             data_fname = os.path.join(self.OutDir_ligpargen, lig_output_fname)
314 | 
315 |             try:
316 |                 print("LigParGen working on {}.pdb".format(output_prefix))
317 |                 Converter.convert(
318 |                     pdb=os.path.join(self.OutDir_xyz, output_prefix + '.pdb'),
319 |                     resname=output_prefix,
320 |                     charge=0,
321 |                     opt=0,
322 |                     lbcc=lbcc_charges,
323 |                     outdir='.',
324 |                 )
325 |                 os.rename(lig_output_fname, data_fname)
326 |             except BaseException:
327 |                 print('problem running LigParGen for {}.pdb.'.format(output_prefix))
328 | 
329 |             # quickly read the headers of LigParGen generated LAMMPS
330 |             # files to count total number of atoms/bonds/angles...etc
331 |             (
332 |                 natoms,
333 |                 nbonds,
334 |                 nangles,
335 |                 ndihedrals,
336 |                 nimpropers,
337 |                 natom_types,
338 |                 nbond_types,
339 |                 nangle_types,
340 |                 ndihedral_types,
341 |                 nimproper_types,
342 |             ) = MDlib.read_lmps_header(data_fname)
343 | 
344 |             system_stats['total_atom_types'] += natom_types
345 |             system_stats['total_bond_types'] += nbond_types
346 |             system_stats['total_angle_types'] += nangle_types
347 |             system_stats['total_dihedral_types'] += ndihedral_types
348 |             system_stats['total_improper_types'] += nimproper_types
349 |             system_stats['total_atoms'] += natoms * _num
350 |             system_stats['total_bonds'] += nbonds * _num
351 |             system_stats['total_angles'] += nangles * _num
352 |             system_stats['total_dihedrals'] += ndihedrals * _num
353 |             system_stats['total_impropers'] += nimpropers * _num
354 | 
355 |             # this switcher dict is to navigate through and store info for each section of a LAMMPS file
356 |             switcher = {
357 |                 'Masses': [],
358 |                 'Pair Coeffs': [],
359 |                 'Bond Coeffs': [],
360 |                 'Angle Coeffs': [],
361 |                 'Dihedral Coeffs': [],
362 |                 'Improper Coeffs': [],
363 |                 'Atoms': [],
364 |                 'Bonds': [],
365 |                 'Angles': [],
366 |                 'Dihedrals': [],
367 |                 'Impropers': [],
368 |                 'Num': _num,
369 |             }
370 |             current_section = None
371 | 
372 |             # read all the info in the LigParGen generated LAMMPS file for modification
373 |             with open(data_fname, 'rt') as lines:
374 |                 for line in lines:
375 |                     if any(x in line for x in switcher.keys()):
376 |                         current_section = line.strip()
377 |                     elif line == '\n' or not current_section:
378 |                         continue
379 |                     else:
380 |                         section_list = switcher.get(
381 |                             current_section, 'Invalid current section'
382 |                         )
383 |                         section_list.append(line.split())
384 |             dicts.append(switcher)
385 | 
386 |         lammps_output = os.path.join(self.OutDir, output_fname)
387 |         MDlib.write_lammps_ouput(lammps_output, r, self.box_size, system_stats, dicts)
388 |         print("\nOPLS parameter file generated.")
389 | 
390 |     def get_gaff2(
391 |         self, output_fname='amor_gaff2.lmps', atom_typing='pysimm', am1bcc_charges=False, swap_dict=None
392 |     ):
393 |         print("\nGenerating GAFF2 parameter file ...\n")
394 |         system_pdb_fname = os.path.join(self.OutDir_packmol, "packmol.pdb")
395 |         r = MDlib.get_coord_from_pdb(system_pdb_fname)
396 | 
397 |         bd.build_dir(self.OutDir_pysimm)
398 | 
399 |         system_stats = {
400 |             'total_atoms': 0,
401 |             'total_bonds': 0,
402 |             'total_angles': 0,
403 |             'total_dihedrals': 0,
404 |             'total_impropers': 0,
405 |             'total_atom_types': 0,
406 |             'total_bond_types': 0,
407 |             'total_angle_types': 0,
408 |             'total_dihedral_types': 0,
409 |             'total_improper_types': 0,
410 |         }
411 |         dicts = []
412 | 
413 |         from pysimm import system, forcefield
414 | 
415 |         # run Pysimm for every cml (converted from pdb with Babel) file in the OutDir_xyz directory
416 |         for index, row in self.Dataframe.iterrows():
417 |             _id = str(row[self.ID_col])
418 |             _length = row[self.Length]
419 |             _num = row[self.NumMole]
420 |             _conf = 1  # read in only the first conformer
421 |             output_prefix = "{}_N{}_C{}".format(_id, _length, _conf)
422 |             pdb_file = os.path.join(self.OutDir_xyz, "{}.pdb".format(output_prefix))
423 |             cml_file = os.path.join(self.OutDir_xyz, "{}.cml".format(output_prefix))
424 | 
425 |             obConversion.SetInAndOutFormats("pdb", "cml")
426 |             mol = ob.OBMol()
427 |             obConversion.ReadFile(mol, pdb_file)
428 |             obConversion.WriteFile(mol, cml_file)
429 | 
430 |             data_fname = os.path.join(
431 |                 self.OutDir_pysimm, "{}.lmp".format(output_prefix)
432 |             )
433 | 
434 |             try:
435 |                 print("Pysimm working on {}".format(cml_file))
436 |                 s = system.read_cml(cml_file)
437 |             except BaseException:
438 |                 print('problem reading {} for Pysimm.'.format(cml_file))
439 |                 exit()
440 | 
441 |             f = forcefield.Gaff2()
442 |             if atom_typing == 'pysimm':
443 |                 if am1bcc_charges:
444 |                     print('AM1BCC method is not available with pysimm, using gasteiger method instead')
445 |                 for b in s.bonds:
446 |                     if b.a.bonds.count == 3 and b.b.bonds.count == 3:
447 |                         b.order = 4
448 |                 s.apply_forcefield(f, charges='gasteiger')
449 |             elif atom_typing == 'antechamber':
450 |                 mol2_file = os.path.join(
451 |                     self.OutDir_xyz, "{}.mol2".format(output_prefix)
452 |                 )
453 |                 obConversion.SetInAndOutFormats("pdb", "mol2")
454 |                 mol = ob.OBMol()
455 |                 obConversion.ReadFile(mol, pdb_file)
456 |                 obConversion.WriteFile(mol, mol2_file)
457 | 
458 |                 print("Antechamber working on {}".format(mol2_file))
459 |                 MDlib.get_type_from_antechamber(s, mol2_file, 'gaff2', f, am1bcc_charges, swap_dict)
460 |                 s.pair_style = 'lj'
461 |                 s.apply_forcefield(f, charges=None if am1bcc_charges else 'gasteiger', skip_ptypes=True)
462 |             else:
463 |                 print(
464 |                     'Invalid atom typing option, please select pysimm or antechamber.'
465 |                 )
466 |                 exit()
467 |             s.write_lammps(data_fname)
468 | 
469 |             # quickly read the headers of Pysimm generated LAMMPS
470 |             # files to count total number of atoms/bonds/angles...etc
471 |             (
472 |                 natoms,
473 |                 nbonds,
474 |                 nangles,
475 |                 ndihedrals,
476 |                 nimpropers,
477 |                 natom_types,
478 |                 nbond_types,
479 |                 nangle_types,
480 |                 ndihedral_types,
481 |                 nimproper_types,
482 |             ) = MDlib.read_lmps_header(data_fname)
483 | 
484 |             system_stats['total_atom_types'] += natom_types
485 |             system_stats['total_bond_types'] += nbond_types
486 |             system_stats['total_angle_types'] += nangle_types
487 |             system_stats['total_dihedral_types'] += ndihedral_types
488 |             system_stats['total_improper_types'] += nimproper_types
489 |             system_stats['total_atoms'] += natoms * _num
490 |             system_stats['total_bonds'] += nbonds * _num
491 |             system_stats['total_angles'] += nangles * _num
492 |             system_stats['total_dihedrals'] += ndihedrals * _num
493 |             system_stats['total_impropers'] += nimpropers * _num
494 | 
495 |             # this switcher dict is to navigate through and store info for each section of a LAMMPS file
496 |             switcher = {
497 |                 'Masses': [],
498 |                 'Pair Coeffs': [],
499 |                 'Bond Coeffs': [],
500 |                 'Angle Coeffs': [],
501 |                 'Dihedral Coeffs': [],
502 |                 'Improper Coeffs': [],
503 |                 'Atoms': [],
504 |                 'Velocities': [],
505 |                 'Bonds': [],
506 |                 'Angles': [],
507 |                 'Dihedrals': [],
508 |                 'Impropers': [],
509 |                 'Num': _num,
510 |             }
511 |             current_section = None
512 | 
513 |             # read all the info in the Pysimm generated LAMMPS file for modification
514 |             with open(data_fname, 'rt') as lines:
515 |                 for line in lines:
516 |                     if any(x in line for x in switcher.keys()):
517 |                         current_section = line.strip()
518 |                     elif line == '\n' or not current_section:
519 |                         continue
520 |                     else:
521 |                         section_list = switcher.get(
522 |                             current_section, 'Invalid current section'
523 |                         )
524 |                         section_list.append(line.split())
525 |             dicts.append(switcher)
526 | 
527 |         lammps_output = os.path.join(self.OutDir, output_fname)
528 |         MDlib.write_lammps_ouput(lammps_output, r, self.box_size, system_stats, dicts)
529 |         print("\nGAFF2 parameter file generated.")
530 | 


--------------------------------------------------------------------------------
/psp/ChainBuilder.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import psp.PSP_lib as bd
  4 | from openbabel import openbabel as ob
  5 | import os
  6 | import shutil
  7 | import time
  8 | import multiprocessing
  9 | from joblib import Parallel, delayed
 10 | import psp.output_lib as lib
 11 | from tqdm import tqdm
 12 | 
 13 | obConversion = ob.OBConversion()
 14 | obConversion.SetInAndOutFormats("mol", "xyz")
 15 | 
 16 | 
 17 | class Builder:
 18 |     def __init__(
 19 |         self,
 20 |         Dataframe,
 21 |         NumConf=1,
 22 |         Length=['n'],
 23 |         MonomerAng='medium',
 24 |         DimerAng='low',
 25 |         Steps=20,
 26 |         Substeps=10,
 27 |         NCores=0,
 28 |         Method='SA',
 29 |         ID_col='ID',
 30 |         SMILES_col='smiles',
 31 |         IntraChainCorr=1,
 32 |         Tol_ChainCorr=50,
 33 |         Inter_Chain_Dis=12,
 34 |         OutDir='chains',
 35 |     ):
 36 |         self.ID_col = ID_col
 37 |         self.SMILES_col = SMILES_col
 38 |         self.OutDir = OutDir
 39 |         self.Dataframe = Dataframe
 40 |         self.NumConf = NumConf
 41 |         self.Length = Length
 42 |         self.MonomerAng = MonomerAng
 43 |         self.DimerAng = DimerAng
 44 |         self.Steps = Steps
 45 |         self.Substeps = Substeps
 46 |         self.NCores = NCores
 47 |         self.Method = Method
 48 |         self.IntraChainCorr = IntraChainCorr
 49 |         self.Tol_ChainCorr = Tol_ChainCorr
 50 |         self.Inter_Chain_Dis = Inter_Chain_Dis
 51 | 
 52 |         if self.Method not in ['SA', 'Dimer']:
 53 |             print("Error: please check keyword for * method ")
 54 |             print("SA == simulated annealing")
 55 |             print("Dimer == dimerization")
 56 |             exit()
 57 | 
 58 |     # list of molecules name and CORRECT/WRONG
 59 |     def BuildChain(self):
 60 |         start_1 = time.time()
 61 |         lib.print_psp_info()  # Print PSP info
 62 |         lib.print_input("ChainBuilder", self.Dataframe)
 63 |         if self.NCores <= 0:
 64 |             ncore_print = 'All'
 65 |         else:
 66 |             ncore_print = self.NCores
 67 |         if self.Method != 'SA':
 68 |             self.Steps = 'NA'
 69 |             self.Substeps = 'NA'
 70 | 
 71 |         print(
 72 |             "\n",
 73 |             "Additional information: ",
 74 |             "\n",
 75 |             "Length of oligomers: ",
 76 |             self.Length,
 77 |             "\n",
 78 |             "Method: ",
 79 |             self.Method,
 80 |             "| Steps: ",
 81 |             self.Steps,
 82 |             "| Substeps: ",
 83 |             self.Substeps,
 84 |             "\n",
 85 |             "Intrachain correction: ",
 86 |             self.IntraChainCorr,
 87 |             "\n",
 88 |             "Tolerance for intrachain correction: ",
 89 |             self.Tol_ChainCorr,
 90 |             "\n",
 91 |             "Number of cores: ",
 92 |             ncore_print,
 93 |             "\n",
 94 |             "Output directory: ",
 95 |             self.OutDir,
 96 |             "\n",
 97 |         )
 98 | 
 99 |         # Input Parameters
100 |         intense = np.arange(-180, 180, 10)
101 |         medium = [
102 |             0,
103 |             30,
104 |             -30,
105 |             45,
106 |             -45,
107 |             60,
108 |             -60,
109 |             90,
110 |             120,
111 |             -120,
112 |             135,
113 |             -135,
114 |             150,
115 |             -150,
116 |             180,
117 |         ]
118 |         low = [0, 45, -45, 60, -60, 90, 120, -120, 180]
119 | 
120 |         # Directories
121 |         # Working directory
122 |         bd.build_dir('work_dir/')
123 | 
124 |         # location of input XYZ files
125 |         xyz_in_dir = 'work_dir/xyz-in/'
126 |         bd.build_dir(xyz_in_dir)
127 | 
128 |         xyz_tmp_dir = 'work_dir/xyz-temp/'
129 |         bd.build_dir(xyz_tmp_dir)
130 | 
131 |         # location of directory for VASP inputs (polymers) and build a directory
132 |         vasp_out_dir = os.path.join(self.OutDir, "")
133 |         bd.build_dir(vasp_out_dir)
134 | 
135 |         list_out_xyz = 'output_CB.csv'
136 |         chk_tri = []
137 |         ID = self.ID_col
138 |         SMILES = self.SMILES_col
139 |         df = self.Dataframe.copy()
140 |         df[ID] = df[ID].apply(str)
141 | 
142 |         rot_angles_monomer = vars()[self.MonomerAng]
143 |         rot_angles_dimer = vars()[self.DimerAng]
144 | 
145 |         if self.NCores == 0:
146 |             self.NCores = multiprocessing.cpu_count() - 1
147 |         print("\n Polymer chain building started...\n")
148 |         result = Parallel(n_jobs=self.NCores)(
149 |             delayed(bd.build_polymer)(
150 |                 unit_name,
151 |                 df,
152 |                 ID,
153 |                 SMILES,
154 |                 xyz_in_dir,
155 |                 xyz_tmp_dir,
156 |                 vasp_out_dir,
157 |                 rot_angles_monomer,
158 |                 rot_angles_dimer,
159 |                 self.Steps,
160 |                 self.Substeps,
161 |                 self.NumConf,
162 |                 self.Length,
163 |                 self.Method,
164 |                 self.IntraChainCorr,
165 |                 self.Tol_ChainCorr,
166 |                 self.Inter_Chain_Dis,
167 |             )
168 |             for unit_name in tqdm(df[ID].values, desc='Building models ...',)
169 |         )
170 |         for i in result:
171 |             chk_tri.append([i[0], i[1]])  # i[2]
172 | 
173 |         chk_tri = pd.DataFrame(chk_tri, columns=['ID', 'Result'])  # Conformers
174 |         chk_tri.to_csv(list_out_xyz)
175 | 
176 |         # Delete empty directory
177 |         for index, row in chk_tri.iterrows():
178 |             if row['Result'] != 'SUCCESS':
179 |                 os.rmdir(vasp_out_dir + row['ID'] + '/')
180 | 
181 |         # Delete work directory
182 |         if os.path.isdir('work_dir/'):
183 |             shutil.rmtree('work_dir/')
184 | 
185 |         end_1 = time.time()
186 |         lib.print_out(chk_tri, "Polymer chain", np.round((end_1 - start_1) / 60, 2))
187 |         return chk_tri
188 | 


--------------------------------------------------------------------------------
/psp/CrystalBuilder.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import os
  4 | from scipy.spatial.distance import cdist
  5 | import time
  6 | import multiprocessing
  7 | from joblib import Parallel, delayed
  8 | import psp.PSP_lib as bd
  9 | import psp.output_lib as lib
 10 | from tqdm import tqdm
 11 | 
 12 | 
 13 | class Builder:
 14 |     def __init__(
 15 |         self,
 16 |         VaspInp_list,
 17 |         NSamples=5,
 18 |         InputRadius='auto',
 19 |         MinAtomicDis=2.0,
 20 |         OutDir='crystals',
 21 |         Polymer=True,
 22 |         Optimize=False,
 23 |         NumCandidate=50,
 24 |         NCores=0,
 25 |     ):
 26 |         self.VaspInp_list = VaspInp_list
 27 |         self.NSamples = NSamples
 28 |         self.InputRadius = InputRadius
 29 |         self.MinAtomicDis = MinAtomicDis
 30 |         self.OutDir = os.path.join(OutDir, "")
 31 |         self.NCores = NCores
 32 |         self.Polymer = Polymer
 33 |         self.Optimize = Optimize
 34 |         self.NumCandidate = NumCandidate
 35 | 
 36 |     def BuildCrystal(self):
 37 |         start_1 = time.time()
 38 |         lib.print_psp_info()  # Print PSP info
 39 |         lib.print_input("CrystalBuilder")
 40 |         if self.Optimize is False:
 41 |             self.NumCandidate == 'All'
 42 |         if self.NCores <= 0:
 43 |             ncore_print = 'All'
 44 |         else:
 45 |             ncore_print = self.NCores
 46 | 
 47 |         print(
 48 |             " ----------------------------------------------- INPUT --------------------------------------------- ",
 49 |             "\n",
 50 |             "List of chain models (POSCAR): ",
 51 |             self.VaspInp_list,
 52 |             "\n",
 53 |             "Are they infinite polymer chains?: ",
 54 |             self.Polymer,
 55 |             "\n",
 56 |             "Number of samples: ",
 57 |             self.NSamples,
 58 |             "\n",
 59 |             "Optimize models: ",
 60 |             self.Optimize,
 61 |             "\n",
 62 |             "Number of models to be selected: ",
 63 |             self.NumCandidate,
 64 |             "\n",
 65 |             "Minimum atomic distance (angstrom): ",
 66 |             self.MinAtomicDis,
 67 |             "\n",
 68 |             "Number of cores: ",
 69 |             ncore_print,
 70 |             "\n",
 71 |             "Output directory: ",
 72 |             self.OutDir,
 73 |             "\n",
 74 |         )
 75 | 
 76 |         build_dir(self.OutDir)
 77 |         # result = []
 78 | 
 79 |         if self.NCores == 0:
 80 |             self.NCores = multiprocessing.cpu_count() - 1
 81 | 
 82 |         NCores_opt = 1
 83 |         NCores = self.NCores
 84 | 
 85 |         if self.Polymer is True:
 86 |             if isinstance(self.NSamples, int):
 87 |                 print(
 88 |                     ' maximum number of possible crustals for each polymer chain: ',
 89 |                     self.NSamples * self.NSamples * self.NSamples,
 90 |                     "\n",
 91 |                 )
 92 |             else:
 93 |                 print(
 94 |                     ' maximum number of possible crustals for each polymer chain: ',
 95 |                     len(self.NSamples[0])
 96 |                     * len(self.NSamples[1])
 97 |                     * len(self.NSamples[2]),
 98 |                     "\n",
 99 |                 )
100 |         else:
101 |             if isinstance(self.NSamples, int):
102 |                 print(
103 |                     ' maximum number of possible crustals for each chain: ',
104 |                     self.NSamples ** 8,
105 |                     "\n",
106 |                 )
107 |             else:
108 |                 print(
109 |                     ' maximum number of possible crustals for each polymer chain: ',
110 |                     len(self.NSamples[0])
111 |                     * len(self.NSamples[1])
112 |                     * len(self.NSamples[2])
113 |                     * len(self.NSamples[3])
114 |                     * len(self.NSamples[4])
115 |                     * len(self.NSamples[5])
116 |                     * len(self.NSamples[6])
117 |                     * len(self.NSamples[7]),
118 |                     "\n",
119 |                 )
120 | 
121 |         if self.Polymer is True:
122 |             result = Parallel(n_jobs=NCores)(
123 |                 delayed(CrystalBuilderMainPolymer)(
124 |                     VaspInp,
125 |                     self.NSamples,
126 |                     self.InputRadius,
127 |                     self.MinAtomicDis,
128 |                     self.OutDir,
129 |                     self.Optimize,
130 |                     self.NumCandidate,
131 |                     NCores_opt,
132 |                 )
133 |                 for VaspInp in tqdm(self.VaspInp_list, desc="Building models ...")
134 |             )
135 |         else:
136 |             result = Parallel(n_jobs=NCores)(
137 |                 delayed(CrystalBuilderMain)(
138 |                     VaspInp,
139 |                     self.NSamples,
140 |                     self.InputRadius,
141 |                     self.MinAtomicDis,
142 |                     self.OutDir,
143 |                     self.Optimize,
144 |                     self.NumCandidate,
145 |                     NCores_opt,
146 |                 )
147 |                 for VaspInp in tqdm(self.VaspInp_list, desc="Building models ...")
148 |             )
149 | 
150 |         output = []
151 |         for i in result:
152 |             output.append([i[0].replace('.vasp', ''), i[1], i[2]])
153 | 
154 |         output = pd.DataFrame(output, columns=['ID', 'Count', 'radius'])
155 |         end_1 = time.time()
156 |         lib.print_out(output, "Crystal model", np.round((end_1 - start_1) / 60, 2))
157 |         return output
158 | 
159 | 
160 | def readvasp(inputvasp):
161 |     basis_vec = []
162 |     Num_atom = []
163 |     xyz_coordinates = []
164 |     with open(inputvasp, 'r') as f:
165 |         content = [line.rstrip() for line in f]
166 |         file_info = content[0]
167 |         for vec in content[2:5]:
168 |             basis_vec.append(vec.split())
169 |         basis_vec = pd.DataFrame(basis_vec)
170 |         for atoms in content[5:7]:
171 |             Num_atom.append(atoms.split())
172 |         Num_atom = pd.DataFrame(Num_atom)
173 | 
174 |         nats = 0
175 |         for nat in np.array(Num_atom.iloc[1]):
176 |             nats += int(nat)
177 | 
178 |         # Do not read all the lines in the POSCAR generated by VASP.
179 |         for xyz in content[8: 8 + nats]:
180 |             xyz_coordinates.append(xyz.split())
181 | 
182 |         # There are two modes in writing coordinated, Direct and Cartesian
183 |         if str(content[7]).startswith('D'):
184 |             rprim = np.array(basis_vec)
185 |             xred = np.array(xyz_coordinates)
186 |             xcart = np.matmul(
187 |                 np.transpose(rprim).astype(float), np.transpose(xred).astype(float)
188 |             )
189 |             xyz_coordinates = pd.DataFrame(np.transpose(xcart)).astype(float)
190 |         elif str(content[7]).startswith('C'):
191 |             xyz_coordinates = pd.DataFrame(xyz_coordinates).astype(float)
192 |     xyz_coordinates.columns = [1, 2, 3]
193 |     return file_info, basis_vec, Num_atom, xyz_coordinates
194 | 
195 | 
196 | # Center of origin + peri_circle
197 | def Center_XY_r(xyz_coordinates, angle, r_cricle):
198 |     xyz_copy = xyz_coordinates.copy()
199 |     X_avg = xyz_copy[1].mean()
200 |     Y_avg = xyz_copy[2].mean()
201 |     xyz_copy[1] = xyz_copy[1] - X_avg + np.cos(np.deg2rad(angle)) * r_cricle
202 |     xyz_copy[2] = xyz_copy[2] - Y_avg + np.sin(np.deg2rad(angle)) * r_cricle
203 |     return xyz_copy
204 | 
205 | 
206 | def create_crystal_vasp(
207 |     filename,
208 |     first_poly,
209 |     second_poly,
210 |     Num_atom,
211 |     basis_vec,
212 |     file_info,
213 |     cry_info,
214 |     MinAtomicDis,
215 |     Polymer=True,
216 | ):
217 |     crystal_struc = pd.DataFrame()
218 |     row1 = 0
219 |     for col in Num_atom.columns:
220 |         crystal_struc = pd.concat(
221 |             [
222 |                 crystal_struc,
223 |                 first_poly.loc[row1: row1 + int(Num_atom[col].values[1]) - 1],
224 |                 second_poly.loc[row1: row1 + int(Num_atom[col].values[1]) - 1],
225 |             ]
226 |         )
227 |         row1 += int(Num_atom[col].values[1])
228 | 
229 |     Crystal_Num_atom = Num_atom.copy()
230 |     Crystal_Num_atom.loc[1] = 2 * Crystal_Num_atom.loc[1].astype(int)
231 |     keep_space = MinAtomicDis  # in angstrom
232 | 
233 |     crystal_struc[1] = crystal_struc[1] - crystal_struc[1].min() + keep_space / 2
234 |     crystal_struc[2] = crystal_struc[2] - crystal_struc[2].min() + keep_space / 2
235 | 
236 |     with open(filename, 'w') as f:
237 |         f.write(file_info + ' (' + cry_info + ')\n')
238 |         f.write('1.0' + '\n')
239 |         a_vec = crystal_struc[1].max() - crystal_struc[1].min() + keep_space
240 |         b_vec = crystal_struc[2].max() - crystal_struc[2].min() + keep_space
241 | 
242 |         if Polymer is True:
243 |             c_vec = basis_vec.loc[2, 2]
244 |         else:
245 |             c_vec = crystal_struc[3].max() - crystal_struc[3].min() + keep_space
246 | 
247 |         f.write(' ' + str(a_vec) + ' ' + str(0.0) + ' ' + str(0.0) + '\n')
248 |         f.write(' ' + str(0.0) + ' ' + str(b_vec) + ' ' + str(0.0) + '\n')
249 |         f.write(' ' + str(0.0) + ' ' + str(0.0) + ' ' + str(c_vec) + '\n')
250 | 
251 |         f.write(Crystal_Num_atom.to_string(header=False, index=False))
252 |         f.write('\nCartesian\n')
253 |         f.write(crystal_struc.to_string(header=False, index=False))
254 | 
255 | 
256 | # Translation
257 | # INPUT: XYZ-coordinates and distance
258 | # OUTPUT: A new sets of XYZ-coordinates
259 | def tl(unit, dis):
260 |     unit_copy = unit.copy()
261 |     unit_copy[3] = unit_copy[3] + dis  # Z direction
262 |     return unit_copy
263 | 
264 | 
265 | # Distance between two points
266 | def CalDis(x1, x2, x3, y1, y2, y3):
267 |     return np.sqrt((x1 - y1) ** 2 + (x2 - y2) ** 2 + (x3 - y3) ** 2)
268 | 
269 | 
270 | # This function try to create a directory
271 | # If it fails, the program will be terminated.
272 | def build_dir(path):
273 |     try:
274 |         #        os.mkdir(path)
275 |         os.makedirs(path)
276 |     except OSError:
277 |         pass
278 | 
279 | 
280 | # Rotate on XY plane
281 | # INPUT: XYZ-coordinates and angle in Degree
282 | # OUTPUT: A new sets of XYZ-coordinates
283 | def rotateXY(xyz_coordinates, theta):  # XYZ coordinates and angle
284 |     unit = xyz_coordinates.copy()
285 |     R_z = np.array(
286 |         [
287 |             [np.cos(theta * np.pi / 180.0), -np.sin(theta * np.pi / 180.0)],
288 |             [np.sin(theta * np.pi / 180.0), np.cos(theta * np.pi / 180.0)],
289 |         ]
290 |     )
291 |     oldXYZ = unit[[1, 2]].copy()
292 |     XYZcollect = []
293 |     for eachatom in np.arange(oldXYZ.values.shape[0]):
294 |         rotate_each = oldXYZ.iloc[eachatom].values.dot(R_z)
295 |         XYZcollect.append(rotate_each)
296 |     newXYZ = pd.DataFrame(XYZcollect)
297 |     unit[[1, 2]] = newXYZ[[0, 1]]
298 |     return unit
299 | 
300 | 
301 | # for VaspInp in VaspInp_list:
302 | def CrystalBuilderMainPolymer(
303 |     VaspInp,
304 |     NSamples,
305 |     Input_radius,
306 |     MinAtomicDis,
307 |     OutDir,
308 |     Optimize,
309 |     NumCandidate,
310 |     NCores_opt,
311 | ):
312 |     file_info, basis_vec, Num_atom, xyz_coordinates = readvasp(
313 |         VaspInp.replace('.vasp', '') + '.vasp'
314 |     )
315 |     VaspInp = VaspInp.split('/')[-1].replace('.vasp', '')
316 |     print(" Crystal model building started for", VaspInp, "...")
317 |     build_dir(OutDir + VaspInp)  # .split('/')[-1])
318 | 
319 |     if isinstance(NSamples, int):
320 |         samples = NSamples - 1
321 |         tm = np.around(
322 |             np.arange(
323 |                 0,
324 |                 max(xyz_coordinates[3].values)
325 |                 - min(xyz_coordinates[3].values)
326 |                 + (max(xyz_coordinates[3].values) - min(xyz_coordinates[3].values))
327 |                 / samples,
328 |                 (max(xyz_coordinates[3].values) - min(xyz_coordinates[3].values))
329 |                 / samples,
330 |             ),
331 |             decimals=2,
332 |         )
333 |         rm1 = np.around(np.arange(0, 180 + (180 / samples), 180 / samples), decimals=1)
334 |         rm2 = np.around(
335 |             np.arange(0, 360 + (360 / samples), 360 / samples), decimals=1
336 |         )  # 0 and 180 degree creates problems
337 | 
338 |         # Total samples
339 |         samp = [tm, rm1, rm2]
340 | 
341 |         # Number of digits in total number of crystal models
342 |         digits = bd.len_digit_number(NSamples * NSamples * NSamples)
343 | 
344 |     elif isinstance(NSamples, list):
345 |         if len(NSamples) == 3 and isinstance(NSamples[0], list) is True:
346 |             samp = NSamples.copy()
347 |             # Number of digits in total number of crystal models
348 |             digits = bd.len_digit_number(len(samp[0]) * len(samp[1]) * len(samp[2]))
349 |         else:
350 |             print("There is an error in inputs: Check 'NSamples'")
351 |             exit()
352 |     else:
353 |         print("There is an error in inputs: Check 'NSamples'")
354 |         exit()
355 | 
356 |     first_poly = Center_XY_r(xyz_coordinates, 0.0, 0.0)
357 | 
358 |     # Calculate distance between two chains
359 |     if Input_radius == 'auto':
360 |         radius = (
361 |             np.sqrt(
362 |                 (
363 |                     (first_poly[1].max() - first_poly[1].min())
364 |                     * (first_poly[1].max() - first_poly[1].min())
365 |                 )
366 |                 + (
367 |                     (first_poly[2].max() - first_poly[2].min())
368 |                     * (first_poly[2].max() - first_poly[2].min())
369 |                 )
370 |             )
371 |             + MinAtomicDis
372 |         )
373 | 
374 |     else:
375 |         radius = float(Input_radius)
376 | 
377 |     count = 0
378 |     for i in tqdm(samp[0], desc=VaspInp):
379 |         for j in samp[2]:
380 |             for k in samp[1]:
381 |                 second_poly_tl = tl(xyz_coordinates, i)
382 |                 second_poly_rm1 = rotateXY(second_poly_tl, j)
383 |                 second_poly_rm2 = Center_XY_r(second_poly_rm1, k, radius)
384 | 
385 |                 if Input_radius == 'auto':
386 |                     # Calculate distance between atoms in first_unit and second_unit
387 |                     dist = cdist(
388 |                         first_poly[[1, 2, 3]].values, second_poly_rm2[[1, 2, 3]].values,
389 |                     )
390 |                     dist[np.isnan(dist)] = 0.0
391 |                     dist = dist.flatten()
392 | 
393 |                     adj_radius = radius - (min(dist) - MinAtomicDis)
394 |                     second_poly_rm2 = Center_XY_r(second_poly_rm1, k, adj_radius)
395 | 
396 |                     dist = cdist(
397 |                         first_poly[[1, 2, 3]].values, second_poly_rm2[[1, 2, 3]].values,
398 |                     )
399 |                     dist[np.isnan(dist)] = 0.0
400 |                     dist = dist.flatten()
401 |                     while min(dist) < MinAtomicDis or min(dist) >= MinAtomicDis + 0.5:
402 |                         if min(dist) < MinAtomicDis:
403 |                             adj_radius += 0.4
404 |                             second_poly_rm2 = Center_XY_r(
405 |                                 second_poly_rm1, k, adj_radius
406 |                             )
407 |                             dist = cdist(
408 |                                 first_poly[[1, 2, 3]].values,
409 |                                 second_poly_rm2[[1, 2, 3]].values,
410 |                             )
411 |                             dist[np.isnan(dist)] = 0.0
412 |                             dist = dist.flatten()
413 |                         elif min(dist) >= MinAtomicDis + 0.5:
414 |                             adj_radius -= 0.4
415 |                             if adj_radius < 0.5:
416 |                                 break
417 |                             second_poly_rm2 = Center_XY_r(
418 |                                 second_poly_rm1, k, adj_radius
419 |                             )
420 |                             dist = cdist(
421 |                                 first_poly[[1, 2, 3]].values,
422 |                                 second_poly_rm2[[1, 2, 3]].values,
423 |                             )
424 |                             dist[np.isnan(dist)] = 0.0
425 |                             dist = dist.flatten()
426 | 
427 |                 count += 1
428 |                 create_crystal_vasp(
429 |                     os.path.join(
430 |                         OutDir,
431 |                         VaspInp,
432 |                         'cryst_out-' + str(count).zfill(digits) + '.vasp',
433 |                     ),
434 |                     first_poly,
435 |                     second_poly_rm2,
436 |                     Num_atom,
437 |                     basis_vec,
438 |                     file_info,
439 |                     'CrystalBuilder Info:: Translation: '
440 |                     + str(i)
441 |                     + '; '
442 |                     + 'Rotation 1 '
443 |                     + str(j)
444 |                     + '; '
445 |                     + 'Rotation 2 '
446 |                     + str(k),
447 |                     MinAtomicDis,
448 |                 )
449 |     print(" Crystal model building completed for", VaspInp)
450 |     if Optimize is True:
451 |         print(" Optimizing crystal models started for", VaspInp, "...")
452 |         bd.screen_Candidates(
453 |             OutDir + VaspInp, NumCandidate=NumCandidate, NCores_opt=NCores_opt
454 |         )
455 |         print(" Optimizing crystal models completed for", VaspInp)
456 |     return VaspInp, count, radius
457 | 
458 | 
459 | # for VaspInp in VaspInp_list:
460 | def CrystalBuilderMain(
461 |     VaspInp,
462 |     NSamples,
463 |     Input_radius,
464 |     MinAtomicDis,
465 |     OutDir,
466 |     Optimize,
467 |     NumCandidate,
468 |     NCores_opt,
469 | ):
470 |     file_info, basis_vec, Num_atom, xyz_coordinates = readvasp(
471 |         VaspInp.replace('.vasp', '') + '.vasp'
472 |     )
473 |     VaspInp = VaspInp.split('/')[-1].replace('.vasp', '')
474 |     print(" Crystal model building started for", VaspInp, "...")
475 |     build_dir(OutDir + VaspInp)  # .split('/')[-1])
476 | 
477 |     if isinstance(NSamples, int):
478 |         samples = NSamples - 1
479 |         tm = np.around(
480 |             np.arange(
481 |                 0,
482 |                 max(xyz_coordinates[3].values)
483 |                 - min(xyz_coordinates[3].values)
484 |                 + (max(xyz_coordinates[3].values) - min(xyz_coordinates[3].values))
485 |                 / samples,
486 |                 (max(xyz_coordinates[3].values) - min(xyz_coordinates[3].values))
487 |                 / samples,
488 |             ),
489 |             decimals=2,
490 |         )
491 |         rm1 = np.around(np.arange(0, 180 + (180 / samples), 180 / samples), decimals=1)
492 |         rm2 = np.around(
493 |             np.arange(0, 360 + (360 / samples), 360 / samples), decimals=1
494 |         )  # Rotation in X and Y axes
495 | 
496 |         # Total samples
497 |         samp = [tm, rm1, rm2, rm2, rm2, rm2, rm2, rm2]
498 | 
499 |         # Number of digits in total number of crystal models
500 |         digits = bd.len_digit_number(NSamples ** 8)
501 | 
502 |     elif isinstance(NSamples, list):
503 |         if len(NSamples) == 8 and isinstance(NSamples[0], list) is True:
504 |             samp = NSamples.copy()
505 | 
506 |             # Number of digits in total number of crystal models
507 |             digits = bd.len_digit_number(
508 |                 len(samp[0])
509 |                 * len(samp[1])
510 |                 * len(samp[2])
511 |                 * len(samp[3])
512 |                 * len(samp[4])
513 |                 * len(samp[5])
514 |                 * len(samp[6])
515 |                 * len(samp[7])
516 |             )
517 | 
518 |         else:
519 |             print("There is an error in inputs: Check 'NSamples'")
520 |             exit()
521 |     else:
522 |         print("There is an error in inputs: Check 'NSamples'")
523 |         exit()
524 | 
525 |     first_poly = Center_XY_r(xyz_coordinates, 0.0, 0.0)
526 | 
527 |     # Calculate distance between two chains
528 |     if Input_radius == 'auto':
529 |         radius = (
530 |             np.sqrt(
531 |                 (
532 |                     (first_poly[1].max() - first_poly[1].min())
533 |                     * (first_poly[1].max() - first_poly[1].min())
534 |                 )
535 |                 + (
536 |                     (first_poly[2].max() - first_poly[2].min())
537 |                     * (first_poly[2].max() - first_poly[2].min())
538 |                 )
539 |             )
540 |             + MinAtomicDis
541 |         )
542 | 
543 |     else:
544 |         radius = float(Input_radius)
545 | 
546 |     # Number of digits in total number of crystal models
547 |     # digits = bd.len_digit_number(NSamples ** 8)
548 | 
549 |     count = 0
550 |     for i in tqdm(samp[0], desc=VaspInp + " Generating models"):  # Second poly
551 |         for j in samp[2]:  # Second poly
552 |             for k in samp[1]:  # Second poly
553 |                 for aX in samp[3]:  # Second poly
554 |                     for aY in samp[4]:  # Second poly
555 |                         for bX in samp[5]:  # First poly
556 |                             for bY in samp[6]:  # First poly
557 |                                 for bZ in samp[7]:  # First poly
558 | 
559 |                                     first_poly_bX = bd.rotateXYZOrigin(
560 |                                         first_poly, bX, 0.0, 0.0
561 |                                     )
562 |                                     first_poly_bY = bd.rotateXYZOrigin(
563 |                                         first_poly_bX, 0.0, bY, 0.0
564 |                                     )
565 |                                     first_poly_moved = bd.rotateXYZOrigin(
566 |                                         first_poly_bY, 0.0, 0.0, bZ
567 |                                     )
568 | 
569 |                                     second_poly_tl = tl(xyz_coordinates, i)
570 |                                     second_poly_rm1 = rotateXY(second_poly_tl, j)
571 |                                     second_poly_rm2_aX = bd.rotateXYZOrigin(
572 |                                         second_poly_rm1, aX, 0.0, 0.0
573 |                                     )
574 |                                     second_poly_rm2_aY = bd.rotateXYZOrigin(
575 |                                         second_poly_rm2_aX, 0.0, aY, 0.0
576 |                                     )
577 |                                     second_poly_moved = Center_XY_r(
578 |                                         second_poly_rm2_aY, k, radius
579 |                                     )
580 | 
581 |                                     if Input_radius == 'auto':
582 |                                         # Calculate distance between atoms in first_unit and second_unit
583 |                                         dist = cdist(
584 |                                             first_poly_moved[[1, 2, 3]].values,
585 |                                             second_poly_moved[[1, 2, 3]].values,
586 |                                         )
587 |                                         dist[np.isnan(dist)] = 0.0
588 |                                         dist = dist.flatten()
589 | 
590 |                                         adj_radius = radius - (min(dist) - MinAtomicDis)
591 |                                         second_poly_moved = Center_XY_r(
592 |                                             second_poly_rm2_aY, k, adj_radius
593 |                                         )
594 | 
595 |                                         dist = cdist(
596 |                                             first_poly_moved[[1, 2, 3]].values,
597 |                                             second_poly_moved[[1, 2, 3]].values,
598 |                                         )
599 |                                         dist[np.isnan(dist)] = 0.0
600 |                                         dist = dist.flatten()
601 |                                         while (
602 |                                             min(dist) < MinAtomicDis
603 |                                             or min(dist) >= MinAtomicDis + 0.5
604 |                                         ):
605 |                                             if min(dist) < MinAtomicDis:
606 |                                                 adj_radius += 0.4
607 |                                                 second_poly_moved = Center_XY_r(
608 |                                                     second_poly_rm2_aY, k, adj_radius
609 |                                                 )
610 |                                                 dist = cdist(
611 |                                                     first_poly_moved[[1, 2, 3]].values,
612 |                                                     second_poly_moved[[1, 2, 3]].values,
613 |                                                 )
614 |                                                 dist[np.isnan(dist)] = 0.0
615 |                                                 dist = dist.flatten()
616 |                                             elif min(dist) >= MinAtomicDis + 0.5:
617 |                                                 adj_radius -= 0.4
618 |                                                 if adj_radius < 0.5:
619 |                                                     break
620 |                                                 second_poly_moved = Center_XY_r(
621 |                                                     second_poly_rm2_aY, k, adj_radius
622 |                                                 )
623 |                                                 dist = cdist(
624 |                                                     first_poly_moved[[1, 2, 3]].values,
625 |                                                     second_poly_moved[[1, 2, 3]].values,
626 |                                                 )
627 |                                                 dist[np.isnan(dist)] = 0.0
628 |                                                 dist = dist.flatten()
629 | 
630 |                                     count += 1
631 |                                     create_crystal_vasp(
632 |                                         os.path.join(
633 |                                             OutDir,
634 |                                             VaspInp,
635 |                                             'cryst_out-'
636 |                                             + str(count).zfill(digits)
637 |                                             + '.vasp',
638 |                                         ),
639 |                                         first_poly_moved,
640 |                                         second_poly_moved,
641 |                                         Num_atom,
642 |                                         basis_vec,
643 |                                         file_info,
644 |                                         'CrystalBuilder Info:: Translation: '
645 |                                         + str(i)
646 |                                         + '; '
647 |                                         + 'Rotation 1 '
648 |                                         + str(j)
649 |                                         + '; '
650 |                                         + 'Rotation 2 '
651 |                                         + str(k),
652 |                                         MinAtomicDis,
653 |                                         Polymer=False,
654 |                                     )
655 |     print(" Crystal model building completed for", VaspInp)
656 |     if Optimize is True:
657 |         print(" Optimizing crystal models started for", VaspInp, "...")
658 |         bd.screen_Candidates(
659 |             OutDir + VaspInp, NumCandidate=NumCandidate, NCores_opt=NCores_opt
660 |         )
661 |         print(" Optimizing crystal models completed for", VaspInp)
662 | 
663 |     return VaspInp, count, radius
664 | 


--------------------------------------------------------------------------------
/psp/MD_lib.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from rdkit import Chem
  4 | from rdkit.Chem import Descriptors
  5 | from scipy.spatial.distance import cdist
  6 | from random import shuffle
  7 | import subprocess
  8 | 
  9 | from itertools import takewhile, islice, dropwhile
 10 | 
 11 | 
 12 | def barycenter(unit):
 13 |     return unit.mean()
 14 | 
 15 | 
 16 | def move_barycenter(unit, xyz_shift, origin=True, barycenter=True):
 17 |     unit_copy = unit.copy()
 18 |     if origin is True:
 19 |         if barycenter is False:
 20 |             unit_copy[1] = unit_copy[1] - unit_copy.min()[1]
 21 |             unit_copy[2] = unit_copy[2] - unit_copy.min()[2]
 22 |             unit_copy[3] = unit_copy[3] - unit_copy.min()[3]
 23 |         else:
 24 |             unit_copy[1] = unit_copy[1] - unit_copy.mean()[1]
 25 |             unit_copy[2] = unit_copy[2] - unit_copy.mean()[2]
 26 |             unit_copy[3] = unit_copy[3] - unit_copy.mean()[3]
 27 |     else:
 28 |         unit_copy[1] = unit_copy[1] + xyz_shift[0]
 29 |         unit_copy[2] = unit_copy[2] + xyz_shift[1]
 30 |         unit_copy[3] = unit_copy[3] + xyz_shift[2]
 31 |     return unit_copy
 32 | 
 33 | 
 34 | def add_mole(sys, unit):
 35 |     df = unit.copy()
 36 |     if sys.empty:
 37 |         df['i'] = 1
 38 |     else:
 39 |         df['i'] = max(list(sys.i.values)) + 1
 40 |     df['j'] = np.arange(1, len(unit.index) + 1, 1)
 41 |     sys = pd.concat([sys, df])
 42 |     return sys.reset_index(drop=True)
 43 | 
 44 | 
 45 | def get_initial_model(NMol_list, XYZ_list, tol_dis, xmin, xmax, ymin, ymax, zmin, zmax):
 46 |     # List index for all possible molecules
 47 |     all_mole_idx = []
 48 |     moleSN = 1
 49 |     for mole in NMol_list:
 50 |         all_mole_idx += [moleSN] * mole
 51 |         moleSN += 1
 52 |     # Shuffle indexes of molecules in the list
 53 |     shuffle(all_mole_idx)
 54 | 
 55 |     # create a system at origin
 56 |     per_incr = [0.0, 0.1, 0.2, 0.3]
 57 |     for per in per_incr:
 58 |         print("Percent increase: ", per)
 59 |         x_expand = (xmax - xmin) * per
 60 |         y_expand = (ymax - ymin) * per
 61 |         z_expand = (zmax - zmin) * per
 62 |         xmax_ex = xmax + x_expand
 63 |         ymax_ex = ymax + y_expand
 64 |         zmax_ex = zmax + z_expand
 65 | 
 66 |         # create a DataFrame for the system
 67 |         sys = pd.DataFrame()
 68 |         count = 0
 69 |         success = True
 70 |         add_yaxis = 0.0
 71 |         zlayer = 1
 72 | 
 73 |         for i in all_mole_idx:
 74 |             unit = pd.read_csv(
 75 |                 XYZ_list[i - 1], header=None, skiprows=2, delim_whitespace=True
 76 |             )
 77 |             Natm = unit.shape[0]
 78 |             unit = move_barycenter(unit, 0, origin=True, barycenter=False)
 79 |             unit_mod, success, add_yaxis, zlayer = move_unit(
 80 |                 unit,
 81 |                 sys,
 82 |                 tol_dis,
 83 |                 xmin,
 84 |                 xmax_ex,
 85 |                 ymin,
 86 |                 ymax_ex,
 87 |                 zmin,
 88 |                 zmax_ex,
 89 |                 add_yaxis,
 90 |                 zlayer=zlayer,
 91 |                 Natm=Natm,
 92 |             )
 93 |             if success is True:
 94 |                 count += 1
 95 |                 sys = add_mole(sys, unit_mod)
 96 |             elif success is False and per < per_incr[-1]:
 97 |                 break
 98 |             else:
 99 |                 print("Can't pack molecules within the given box size.")
100 |                 exit()
101 |         if success is True and per > 0.0:
102 |             sys[1] = sys[1] - (sys[1].max() - xmax) / 2
103 |             sys[2] = sys[2] - (sys[2].max() - ymax) / 2
104 |             sys[3] = sys[3] - (sys[3].max() - zmax) / 2
105 |             return sys
106 |         elif success is True:
107 |             return sys
108 | 
109 | 
110 | def move_unit(
111 |     unit,
112 |     sys_mod,
113 |     tol_dis,
114 |     xmin,
115 |     xmax,
116 |     ymin,
117 |     ymax,
118 |     zmin,
119 |     zmax,
120 |     add_yaxis,
121 |     zlayer=1,
122 |     Natm=0,
123 | ):
124 |     unit_mod = unit.copy()
125 |     min_x_dis = unit_mod[1].max() - unit_mod[1].min() + tol_dis
126 |     min_y_dis = unit_mod[2].max() - unit_mod[2].min() + tol_dis
127 |     min_z_dis = unit_mod[3].max() - unit_mod[3].min() + tol_dis
128 |     per = 0.0
129 |     tol_dis_mod = tol_dis + per * tol_dis
130 |     if sys_mod.empty is False:
131 |         last_mol = sys_mod.tail(Natm)
132 |         mol_xmax, mol_ymax, mol_zmax, = (
133 |             last_mol[1].max(),
134 |             last_mol[2].max(),
135 |             last_mol[3].max(),
136 |         )
137 |         sys_xmax, sys_ymax, sys_zmax = (
138 |             sys_mod[1].max(),
139 |             sys_mod[2].max(),
140 |             sys_mod[3].max(),
141 |         )
142 |         if (
143 |             mol_zmax > zmax - min_z_dis
144 |             and mol_ymax > ymax - min_y_dis
145 |             and mol_xmax > xmax - min_x_dis
146 |         ):
147 |             return unit_mod, False, add_yaxis, zlayer
148 | 
149 |         else:
150 |             if mol_ymax > ymax - min_y_dis:
151 |                 if mol_xmax > xmax - min_x_dis:
152 |                     unit_mod[3] = unit_mod[3] + sys_mod[3].max() + tol_dis_mod
153 |                     add_yaxis = unit_mod[2].max()
154 |                     zlayer += 1
155 |                 else:
156 |                     unit_mod[3] = unit_mod[3] + last_mol[3].min()
157 |                     unit_mod[2] = unit_mod[2] + last_mol[2].min() - 0.1
158 |                     unit_mod[1] = unit_mod[1] + last_mol[1].max() + tol_dis_mod
159 | 
160 |                     add_yaxis = max(add_yaxis, unit_mod[2].max())
161 | 
162 |             elif zlayer > 1:
163 |                 if mol_xmax > xmax - min_x_dis:
164 |                     if add_yaxis + min_y_dis < ymax:
165 |                         unit_mod[3] = unit_mod[3] + last_mol[3].min()
166 |                         unit_mod[2] = unit_mod[2] + add_yaxis + tol_dis_mod
167 |                     else:  # Add to z axis
168 |                         unit_mod[3] = unit_mod[3] + sys_mod[3].max() + tol_dis_mod
169 |                         zlayer += 1
170 | 
171 |                     add_yaxis = unit_mod[2].max()
172 |                 elif sys_xmax > xmax - min_x_dis:
173 |                     unit_mod[3] = unit_mod[3] + last_mol[3].min()
174 |                     unit_mod[2] = unit_mod[2] + last_mol[2].min()
175 |                     unit_mod[1] = unit_mod[1] + last_mol[1].max() + tol_dis_mod
176 | 
177 |                 else:
178 |                     unit_mod[3] = unit_mod[3] + last_mol[3].min()
179 |                     unit_mod[1] = unit_mod[1] + last_mol[1].max() + tol_dis_mod
180 | 
181 |             else:
182 |                 if mol_xmax > xmax - min_x_dis:
183 |                     unit_mod[2] = unit_mod[2] + sys_mod[2].max() + tol_dis_mod
184 |                     add_yaxis = unit_mod[2].max()
185 |                 elif sys_xmax > xmax - min_x_dis:
186 |                     unit_mod[3] = unit_mod[3] + last_mol[3].min()
187 |                     unit_mod[2] = unit_mod[2] + last_mol[2].min()
188 |                     unit_mod[1] = unit_mod[1] + last_mol[1].max() + tol_dis_mod
189 | 
190 |                     add_yaxis = max(add_yaxis, unit_mod[2].max())
191 |                 else:
192 |                     unit_mod[1] = unit_mod[1] + last_mol[1].max() + tol_dis_mod
193 |                     add_yaxis = max(add_yaxis, unit_mod[2].max())
194 | 
195 |     return unit_mod, True, add_yaxis, zlayer
196 | 
197 | 
198 | def get_vol(density, Nmol, molar_mass):
199 |     return (Nmol * molar_mass * 10) / (6.02214076 * density)  # in Ang
200 | 
201 | 
202 | def get_molar_mass(smi):
203 |     return Descriptors.ExactMolWt(Chem.MolFromSmiles(smi))
204 | 
205 | 
206 | def get_box_size(vol, box_type="cubic", incr_per=0.4):  # c = cubic; r = rectangular
207 |     axis = vol ** (1.0 / 3.0)
208 |     if box_type == 'r':
209 |         zmax = axis + axis * incr_per
210 |         axis2 = np.sqrt(vol / zmax)
211 |         return 0, axis2, 0, axis2, 0, zmax
212 |     else:
213 |         return 0, axis, 0, axis, 0, axis
214 | 
215 | 
216 | def eval_dis(sys_dis_arr, dis_cutoff, dis_value, a):
217 |     unit1 = sys_dis_arr[sys_dis_arr[:, 3] == a][:, :-1]
218 |     unit1_minX, unit1_maxX, unit1_minY, unit1_maxY, unit1_minZ, unit1_maxZ = (
219 |         np.amin(unit1[:, 0]),
220 |         np.amax(unit1[:, 0]),
221 |         np.amin(unit1[:, 1]),
222 |         np.amax(unit1[:, 1]),
223 |         np.amin(unit1[:, 2]),
224 |         np.amax(unit1[:, 2]),
225 |     )
226 | 
227 |     unit2 = sys_dis_arr[sys_dis_arr[:, 3] != a][:, :-1]
228 |     unit2 = unit2[
229 |         (unit2[:, 0] > unit1_minX - dis_cutoff)
230 |         & (unit2[:, 0] < unit1_maxX + dis_cutoff)
231 |         & (unit2[:, 1] > unit1_minY - dis_cutoff)
232 |         & (unit2[:, 1] < unit1_maxY + dis_cutoff)
233 |         & (unit2[:, 2] > unit1_minZ - dis_cutoff)
234 |         & (unit2[:, 2] < unit1_maxZ + dis_cutoff)
235 |     ]
236 | 
237 |     dist = cdist(unit1, unit2)
238 | 
239 |     new_arr = dist[
240 |         dist < dis_cutoff
241 |     ]  # If you may need to remove double counted distances (ij and ji)
242 |     new_arr = dis_cutoff - new_arr
243 | 
244 |     dis_value = dis_value + np.sum(new_arr)
245 | 
246 |     sys_dis_arr = sys_dis_arr[sys_dis_arr[:, 3] != a]
247 |     return sys_dis_arr, dis_value
248 | 
249 | 
250 | def evaluate_obj(sys, dis_cutoff, xmin, xmax, ymin, ymax, zmin, zmax):
251 |     sys_dis_arr = sys[[1, 2, 3, 'i']].to_numpy()
252 | 
253 |     dis_value = 0
254 |     # Last molecule is removed from the list
255 |     list_mol = np.unique(sys_dis_arr[:, 3])[:-1].astype(int)
256 | 
257 |     dis_val = list(
258 |         zip(*[eval_dis(sys_dis_arr, dis_cutoff, dis_value, a) for a in list_mol])
259 |     )[1]
260 | 
261 |     for a in list_mol:
262 |         unit1 = sys_dis_arr[sys_dis_arr[:, 3] == a][:, :-1]
263 |         unit1_minX, unit1_maxX, unit1_minY, unit1_maxY, unit1_minZ, unit1_maxZ = (
264 |             np.amin(unit1[:, 0]),
265 |             np.amax(unit1[:, 0]),
266 |             np.amin(unit1[:, 1]),
267 |             np.amax(unit1[:, 1]),
268 |             np.amin(unit1[:, 2]),
269 |             np.amax(unit1[:, 2]),
270 |         )
271 | 
272 |         unit2 = sys_dis_arr[sys_dis_arr[:, 3] != a][:, :-1]
273 |         unit2 = unit2[
274 |             (unit2[:, 0] > unit1_minX - dis_cutoff)
275 |             & (unit2[:, 0] < unit1_maxX + dis_cutoff)
276 |             & (unit2[:, 1] > unit1_minY - dis_cutoff)
277 |             & (unit2[:, 1] < unit1_maxY + dis_cutoff)
278 |             & (unit2[:, 2] > unit1_minZ - dis_cutoff)
279 |             & (unit2[:, 2] < unit1_maxZ + dis_cutoff)
280 |         ]
281 | 
282 |         dist = cdist(unit1, unit2)
283 | 
284 |         new_arr = dist[
285 |             dist < dis_cutoff
286 |         ]  # If you may need to remove double counted distances (ij and ji)
287 |         new_arr = dis_cutoff - new_arr
288 | 
289 |         dis_value = dis_value + np.sum(new_arr)
290 | 
291 |         sys_dis_arr = sys_dis_arr[sys_dis_arr[:, 3] != a]
292 | 
293 |     bound_value = 0.0
294 |     # X axis
295 |     Arr_x = sys[1].values
296 |     newArr_x_min = Arr_x[Arr_x < xmin]
297 |     newArr_x_min = xmin - newArr_x_min
298 | 
299 |     newArr_x_max = Arr_x[Arr_x > xmax]
300 |     newArr_x_max = newArr_x_max - xmax
301 | 
302 |     # Y axis
303 |     Arr_y = sys[2].values
304 |     newArr_y_min = Arr_y[Arr_y < ymin]
305 |     newArr_y_min = ymin - newArr_y_min
306 | 
307 |     newArr_y_max = Arr_y[Arr_y > ymax]
308 |     newArr_y_max = newArr_y_max - ymax
309 | 
310 |     # Z axis
311 |     Arr_z = sys[3].values
312 |     newArr_z_min = Arr_z[Arr_z < zmin]
313 |     newArr_z_min = zmin - newArr_z_min
314 | 
315 |     newArr_z_max = Arr_z[Arr_z > zmax]
316 |     newArr_z_max = newArr_z_max - zmax
317 | 
318 |     bound_value = (
319 |         bound_value
320 |         + np.sum(newArr_x_min)
321 |         + np.sum(newArr_x_max)
322 |         + np.sum(newArr_y_min)
323 |         + np.sum(newArr_y_max)
324 |         + np.sum(newArr_z_min)
325 |         + np.sum(newArr_z_max)
326 |     )
327 | 
328 |     return dis_value + bound_value
329 | 
330 | 
331 | # Rotate in X, Y and Z directions simultaneously
332 | def rotateXYZ(unit, theta3, theta2, theta1):
333 |     th1 = theta1 * np.pi / 180.0  # Z-axis
334 |     th2 = theta2 * np.pi / 180.0  # Y-axis
335 |     th3 = theta3 * np.pi / 180.0  # X-axis
336 |     Rot_matrix = np.array(
337 |         [
338 |             [
339 |                 np.cos(th1) * np.cos(th2),
340 |                 np.cos(th1) * np.sin(th2) * np.sin(th3) - np.sin(th1) * np.cos(th3),
341 |                 np.cos(th1) * np.sin(th2) * np.cos(th3) + np.sin(th1) * np.sin(th3),
342 |             ],
343 |             [
344 |                 np.sin(th1) * np.cos(th2),
345 |                 np.sin(th1) * np.sin(th2) * np.sin(th3) + np.cos(th1) * np.cos(th3),
346 |                 np.sin(th1) * np.sin(th2) * np.cos(th3) - np.cos(th1) * np.sin(th3),
347 |             ],
348 |             [-np.sin(th2), np.cos(th2) * np.sin(th3), np.cos(th2) * np.cos(th3)],
349 |         ]
350 |     )
351 | 
352 |     rot_XYZ = unit.loc[:, [1, 2, 3]].copy()
353 |     rotated_unit = rot_XYZ.values.dot(Rot_matrix)
354 |     newXYZ = pd.DataFrame(rotated_unit, columns=[1, 2, 3])
355 |     newXYZ.index = unit.index
356 |     unit.loc[:, [1, 2, 3]] = newXYZ.loc[:, [1, 2, 3]]
357 |     return unit
358 | 
359 | 
360 | # This function generates an input file for PACKMOL
361 | # INPUT:
362 | # OUTPUT: Write an input file for PACKMOL
363 | def gen_packmol_inp(
364 |     OutDir_packmol, tolerance, XYZ_list, NMol_list, xmin, xmax, ymin, ymax, zmin, zmax
365 | ):
366 |     with open(OutDir_packmol + "packmol.inp", 'w') as f:
367 |         f.write(
368 |             "tolerance " + str(tolerance) + "\n"
369 |         )  # Minimum distance between any two molecule
370 |         f.write("output " + OutDir_packmol + "packmol.pdb\n")
371 |         f.write("filetype pdb\n\n")
372 |         for mol in range(len(NMol_list)):
373 |             f.write("structure " + XYZ_list[mol] + "\n")
374 |             f.write("  number " + str(NMol_list[mol]) + "\n")
375 |             f.write(
376 |                 "  inside box "
377 |                 + str(xmin)
378 |                 + " "
379 |                 + str(ymin)
380 |                 + " "
381 |                 + str(zmin)
382 |                 + " "
383 |                 + str(xmax)
384 |                 + " "
385 |                 + str(ymax)
386 |                 + " "
387 |                 + str(zmax)
388 |                 + "\n"
389 |             )
390 |             f.write("end structure\n\n")
391 | 
392 | 
393 | # Run packmol
394 | def run_packmol(bashCommand, output):
395 |     f = open(output, "w")
396 |     process = subprocess.Popen(
397 |         bashCommand, stdout=f, shell=True
398 |     )  # stdout=subprocess.PIPE
399 |     output, error = process.communicate()
400 |     return error
401 | 
402 | 
403 | # This function generates a xyz file
404 | # INPUT: Name of a output file and a DataFrame of element names and respective XYZ-coordinates
405 | # OUTPUT: Write a XYZ file
406 | def gen_sys_xyz(filename, unit):
407 |     unit = unit[[0, 1, 2, 3]]
408 |     with open(filename, 'w') as f:
409 |         f.write(str(unit.values.shape[0]))  # NUMBER OF ATOMS
410 |         f.write("\n\n")  # TWO SPACES
411 |         unit.to_csv(
412 |             f, sep=' ', index=False, header=False
413 |         )  # XYZ COORDINATES OF NEW MOLECULE
414 | 
415 | 
416 | def move_molecules(sys, disx, disy, disz, theta1, theta2, theta3):
417 |     df = pd.DataFrame()
418 |     for i in set(sys.i.values):
419 |         Mi = sys[sys['i'] == i]
420 |         Mi = move_barycenter(Mi, [disx[i - 1], disy[i - 1], disz[i - 1]], False)
421 |         Mi = rotateXYZ(Mi, theta1[i - 1], theta2[i - 1], theta3[i - 1])
422 |         df = pd.concat([df, Mi])
423 |     return df
424 | 
425 | 
426 | def gen_sys_vasp(filename, unit, xmin, xmax, ymin, ymax, zmin, zmax):
427 |     unit = unit.sort_values(by=[0])
428 |     add_dis = 0.4  # This additional distance (in Ang) is added to avoid interaction near boundary
429 |     file = open(filename, 'w+')
430 |     file.write('### ' + 'POSCAR' + ' ###\n')
431 |     file.write('1\n')
432 |     a_vec = xmax - xmin + add_dis
433 |     b_vec = ymax - ymin + add_dis
434 |     c_vec = zmax - zmin + add_dis
435 | 
436 |     file.write(' ' + str(a_vec) + ' ' + str(0.0) + ' ' + str(0.0) + '\n')
437 |     file.write(' ' + str(0.0) + ' ' + str(b_vec) + ' ' + str(0.0) + '\n')
438 |     file.write(' ' + str(0.0) + ' ' + str(0.0) + ' ' + str(c_vec) + '\n')
439 | 
440 |     ele_list = []
441 |     count_ele_list = []
442 |     for element in sorted(set(unit[0].values)):
443 |         ele_list.append(element)
444 |         count_ele_list.append(list(unit[0].values).count(element))
445 | 
446 |     for item in ele_list:
447 |         file.write(str(item) + '  ')
448 | 
449 |     file.write('\n ')
450 |     for item in count_ele_list:
451 |         file.write(str(item) + ' ')
452 | 
453 |     file.write('\nCartesian\n')
454 | 
455 |     file.write(unit[[1, 2, 3]].to_string(header=False, index=False))
456 |     file.close()
457 | 
458 | 
459 | def gen_sys_data(
460 |     filename,
461 |     unit,
462 |     packmol_bond,
463 |     xmin,
464 |     xmax,
465 |     ymin,
466 |     ymax,
467 |     zmin,
468 |     zmax,
469 |     BondInfo,
470 |     Inter_Mol_Dis=0.0,
471 | ):  # lammps data file
472 |     # move unit to the center of a box
473 |     unit[[1, 2, 3]] = unit[[1, 2, 3]].astype(float)
474 |     unit[1] = unit[1] - unit[1].min() + Inter_Mol_Dis / 2
475 |     unit[2] = unit[2] - unit[2].min() + Inter_Mol_Dis / 2
476 |     unit[3] = unit[3] - unit[3].min() + Inter_Mol_Dis / 2
477 | 
478 |     unit = unit.sort_values(by=[0])
479 |     new_atom_num = list(unit.index)
480 | 
481 |     unit_ele = unit.drop_duplicates(subset=0, keep="first").copy()
482 | 
483 |     # add_dis = 0.4 # This additional distance (in Ang) is added to avoid interaction near boundary
484 |     file = open(filename, 'w+')
485 |     file.write('### ' + '# LAMMPS data file written by PSP' + ' ###\n')
486 |     file.write(str(unit.shape[0]) + ' atoms\n')
487 |     if BondInfo is True:
488 |         file.write(str(packmol_bond.shape[0]) + ' bonds\n')
489 |     file.write(str(len(list(unit_ele[0].values))) + ' atom types\n')
490 |     file.write(str(0.0) + ' ' + str(xmax - xmin) + ' xlo xhi\n')
491 |     file.write(str(0.0) + ' ' + str(ymax - ymin) + ' ylo yhi\n')
492 |     file.write(str(0.0) + ' ' + str(zmax - zmin) + ' zlo zhi\n\n')
493 | 
494 |     ele_list = []
495 |     ele_mass = []
496 |     ele_type = []
497 |     count = 1
498 |     for index, row in unit_ele.iterrows():
499 |         ele_list.append(row[0])
500 |         ele_mass.append(
501 |             Chem.GetPeriodicTable().GetAtomicWeight(row[0])
502 |         )  # Check error: Element not found
503 |         ele_type.append(count)
504 |         count += 1
505 | 
506 |     unit_ele['ele_type'] = ele_type
507 |     ele_type_sys = []
508 |     for index, row in unit.iterrows():
509 |         ele_type_sys.append(unit_ele[unit_ele[0] == row[0]]['ele_type'].values[0])
510 | 
511 |     file.write('Masses\n\n')
512 |     count = 1
513 |     for mass in ele_mass:
514 |         file.write(str(count) + ' ' + str(mass) + '\n')
515 |         count += 1
516 | 
517 |     SN = np.arange(1, unit.shape[0] + 1)
518 |     unit['SN'] = SN
519 |     unit['ele_type'] = ele_type_sys
520 |     unit['charge'] = [0] * unit.shape[0]
521 |     file.write('\nAtoms\n\n')
522 |     file.write(
523 |         unit[['SN', 'ele_type', 'charge', 1, 2, 3]].to_string(header=False, index=False)
524 |     )
525 | 
526 |     if BondInfo is True:
527 |         file.write('\n\nBonds\n\n')
528 | 
529 |         packmol_bond_reorder = []
530 |         for index, row in packmol_bond.iterrows():
531 |             packmol_bond_reorder.append(
532 |                 [new_atom_num[int(row[2]) - 1], new_atom_num[int(row[3]) - 1]]
533 |             )
534 | 
535 |         packmol_bond_reorder = pd.DataFrame(
536 |             packmol_bond_reorder, columns=['atm1', 'atm2']
537 |         )
538 |         packmol_bond_reorder['atm1'] += 1
539 |         packmol_bond_reorder['atm2'] += 1
540 |         packmol_bond_reorder['BO'] = packmol_bond[1]
541 |         packmol_bond_reorder = packmol_bond_reorder.sort_values(by=['atm1'])
542 |         packmol_bond_reorder['sl'] = packmol_bond[0].values
543 | 
544 |         file.write(
545 |             packmol_bond_reorder[['sl', 'BO', 'atm1', 'atm2']].to_string(
546 |                 header=False, index=False
547 |             )
548 |         )
549 |     file.close()
550 | 
551 | 
552 | def main_func(x, *args):
553 |     arr_x = np.array_split(x, 6)
554 |     disx = arr_x[0]
555 |     disy = arr_x[1]
556 |     disz = arr_x[2]
557 |     theta1 = arr_x[3]
558 |     theta2 = arr_x[4]
559 |     theta3 = arr_x[5]
560 |     sys = move_molecules(args[0], disx, disy, disz, theta1, theta2, theta3)
561 |     return evaluate_obj(
562 |         sys, args[1], args[2], args[3], args[4], args[5], args[6], args[7]
563 |     )
564 | 
565 | 
566 | def read_mol2_bond(mol2_file):
567 |     list_bonds = []
568 |     with open(mol2_file, 'r') as f:
569 |         dropped = dropwhile(lambda _line: "@<TRIPOS>BOND" not in _line, f)
570 |         next(dropped, '')
571 |         for line in dropped:
572 |             list_bonds.append([line.split()[0]] + [line.split()[3]] + line.split()[1:3])
573 |     return pd.DataFrame(list_bonds)
574 | 
575 | 
576 | def read_mol2_xyz(mol2_file):
577 |     list_xyz = []
578 |     with open(mol2_file) as f:
579 |         for ln in takewhile(
580 |             lambda x: "@<TRIPOS>BOND" not in x,
581 |             islice(dropwhile(lambda x: "@<TRIPOS>ATOM" not in x, f), 1, None),
582 |         ):
583 |             list_xyz.append([ln.split()[5].split(".")[0]] + ln.split()[2:5])
584 |     return pd.DataFrame(list_xyz)
585 | 
586 | 
587 | # read in pdb file; please see the following link for details of pdb format
588 | # https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html
589 | def read_pdb_line(line):
590 |     record_type = line[0:6]
591 |     atom_serial_num = line[6:11]
592 |     atom_name = line[12:16]
593 |     residue_name = line[17:20]
594 |     chain_identifier = line[21]
595 |     residue_seq_num = line[22:26]
596 |     x_coord = float(line[30:38])
597 |     y_coord = float(line[38:46])
598 |     z_coord = float(line[46:54])
599 |     element = line[76:78]
600 |     return x_coord, y_coord, z_coord
601 | 
602 | 
603 | def read_lmps_header(lmp_file):
604 |     f = open(lmp_file)
605 |     lines = f.readlines()
606 |     natoms = int(lines[2].split()[0])
607 |     nbonds = int(lines[3].split()[0])
608 |     nangles = int(lines[4].split()[0])
609 |     ndihedrals = int(lines[5].split()[0])
610 |     nimpropers = int(lines[6].split()[0])
611 | 
612 |     parts = lines[8].split()
613 |     if len(parts) >= 2 and parts[1] == 'atom':
614 |         natom_types = int(parts[0])
615 |     else:
616 |         natom_types = 0
617 | 
618 |     parts = lines[9].split()
619 |     if len(parts) >= 2 and parts[1] == 'bond':
620 |         nbond_types = int(parts[0])
621 |     else:
622 |         nbond_types = 0
623 | 
624 |     parts = lines[10].split()
625 |     if len(parts) >= 2 and parts[1] == 'angle':
626 |         nangle_types = int(parts[0])
627 |     else:
628 |         nangle_types = 0
629 | 
630 |     parts = lines[11].split()
631 |     if len(parts) >= 2 and parts[1] == 'dihedral':
632 |         ndihedral_types = int(parts[0])
633 |     else:
634 |         ndihedral_types = 0
635 | 
636 |     parts = lines[12].split()
637 |     if len(parts) >= 2 and parts[1] == 'improper':
638 |         nimproper_types = int(parts[0])
639 |     else:
640 |         nimproper_types = 0
641 |     return (
642 |         natoms,
643 |         nbonds,
644 |         nangles,
645 |         ndihedrals,
646 |         nimpropers,
647 |         natom_types,
648 |         nbond_types,
649 |         nangle_types,
650 |         ndihedral_types,
651 |         nimproper_types,
652 |     )
653 | 
654 | 
655 | # returns a 2D array of x, y, z coordinates (i.e. r[id][coordinate])
656 | def get_coord_from_pdb(system_pdb_fname):
657 |     skip_beginning = 5  # header lines of packmol.pdb
658 |     atom_count = 0  # coutner for atom number
659 |     r = np.zeros([1, 3], float)  # 2D array of x, y, z coordinates, r[id][coordinate]
660 | 
661 |     # get all atom coordinates from the system/packmol pdb file
662 |     with open(system_pdb_fname, 'r') as f:
663 |         for skipped_frame in range(skip_beginning):
664 |             f.readline()
665 | 
666 |         line = f.readline()
667 |         x_coord, y_coord, z_coord = read_pdb_line(line)
668 |         r[atom_count][0] = x_coord
669 |         r[atom_count][1] = y_coord
670 |         r[atom_count][2] = z_coord
671 | 
672 |         # if next line still returns x, y, z coordinates, allocate more memeory for the array
673 |         while True:
674 |             try:
675 |                 atom_count += 1
676 |                 line = f.readline()
677 |                 x_coord, y_coord, z_coord = read_pdb_line(line)
678 |                 r = np.concatenate((r, np.zeros([1, 3], float)))
679 |                 r[atom_count][0] = x_coord
680 |                 r[atom_count][1] = y_coord
681 |                 r[atom_count][2] = z_coord
682 |             except Exception:
683 |                 break
684 |     return r
685 | 
686 | 
687 | def write_lammps_ouput(lammps_output, r, box_size, system_stats, dicts):
688 |     # These switcher dicts are for each section of the LAMMPS file that we will build
689 |     (
690 |         atomconvertdicts,
691 |         bondconvertdicts,
692 |         angleconvertdicts,
693 |         dihedralconvertdicts,
694 |         improperconvertdicts,
695 |     ) = ([] for i in range(5))
696 |     switcher_coeffs = {
697 |         'Pair Coeffs': [system_stats['total_atoms'], atomconvertdicts],
698 |         'Bond Coeffs': [system_stats['total_bonds'], bondconvertdicts],
699 |         'Angle Coeffs': [system_stats['total_angles'], angleconvertdicts],
700 |         'Dihedral Coeffs': [system_stats['total_dihedrals'], dihedralconvertdicts],
701 |         'Improper Coeffs': [system_stats['total_impropers'], improperconvertdicts],
702 |     }
703 |     switcher_main = {
704 |         'Bonds': [system_stats['total_bonds'], bondconvertdicts],
705 |         'Angles': [system_stats['total_angles'], angleconvertdicts],
706 |         'Dihedrals': [system_stats['total_dihedrals'], dihedralconvertdicts],
707 |         'Impropers': [system_stats['total_impropers'], improperconvertdicts],
708 |     }
709 | 
710 |     # build the final LAMMPS output
711 |     with open(lammps_output, 'wt') as out:
712 |         # header section
713 |         out.write('LAMMPS data file Created by PSP\n')
714 |         out.write('\n')
715 |         out.write('{:>12}  atoms\n'.format(system_stats['total_atoms']))
716 |         out.write('{:>12}  bonds\n'.format(system_stats['total_bonds']))
717 |         out.write('{:>12}  angles\n'.format(system_stats['total_angles']))
718 |         out.write('{:>12}  dihedrals\n'.format(system_stats['total_dihedrals']))
719 |         out.write('{:>12}  impropers\n'.format(system_stats['total_impropers']))
720 |         out.write('\n')
721 |         out.write('{:>12}  atom types\n'.format(system_stats['total_atom_types']))
722 |         out.write('{:>12}  bond types\n'.format(system_stats['total_bond_types']))
723 |         out.write('{:>12}  angle types\n'.format(system_stats['total_angle_types']))
724 |         out.write(
725 |             '{:>12}  dihedral types\n'.format(system_stats['total_dihedral_types'])
726 |         )
727 |         out.write(
728 |             '{:>12}  improper types\n'.format(system_stats['total_improper_types'])
729 |         )
730 |         out.write('\n')
731 |         out.write('{:>12}  {:>12} xlo xhi\n'.format(box_size[0], box_size[1]))
732 |         out.write('{:>12}  {:>12} ylo yhi\n'.format(box_size[2], box_size[3]))
733 |         out.write('{:>12}  {:>12} zlo zhi\n'.format(box_size[4], box_size[5]))
734 |         out.write('\n')
735 | 
736 |         # Masses section
737 |         out.write('Masses\n')
738 |         out.write('\n')
739 |         counter = 0
740 |         for dic in dicts:
741 |             for fields in dic.get('Masses'):
742 |                 counter += 1
743 |                 parts = ' '.join(['%s' % (i,) for i in fields[1:]])
744 |                 out.write('{:>12}  {:<}\n'.format(counter, parts))
745 |         out.write('\n')
746 | 
747 |         # Pair, Bond, Angle, Dihedral, and Improper Coeffs sections
748 |         for coeff_type in switcher_coeffs:
749 |             if switcher_coeffs.get(coeff_type)[0] == 0:
750 |                 continue
751 |             out.write('{}\n'.format(coeff_type))
752 |             out.write('\n')
753 |             counter = 0
754 |             for dic in dicts:
755 |                 convertdict = {}
756 |                 for fields in dic.get(coeff_type):
757 |                     counter += 1
758 |                     convertdict[fields[0]] = counter
759 |                     parts = ' '.join(['%s' % (i,) for i in fields[1:]])
760 |                     out.write('{:>12}  {:<}\n'.format(counter, parts))
761 |                 switcher_coeffs.get(coeff_type)[1].append(convertdict)
762 |             out.write('\n')
763 | 
764 |         # Atom section
765 |         out.write('Atoms\n')
766 |         out.write('\n')
767 |         atom_counter = 0
768 |         chain_counter = 0
769 |         for index, dic in enumerate(dicts):
770 |             for num in range(dic.get('Num')):
771 |                 chain_counter += 1
772 |                 for fields in dic.get('Atoms'):
773 |                     atom_counter += 1
774 |                     new_x = r[atom_counter - 1][0]
775 |                     new_y = r[atom_counter - 1][1]
776 |                     new_z = r[atom_counter - 1][2]
777 |                     new_atomtype = atomconvertdicts[index][fields[2]]
778 |                     out.write(
779 |                         '{:>8} {:>7} {:>3} {:>12} {:>10} {:>10} {:>10}\n'.format(
780 |                             atom_counter,
781 |                             chain_counter,
782 |                             new_atomtype,
783 |                             fields[3],
784 |                             new_x,
785 |                             new_y,
786 |                             new_z,
787 |                         )
788 |                     )
789 |         out.write('\n')
790 | 
791 |         # Bond, Angle, Dihedral, and Improper sections
792 |         for section_type in switcher_main:
793 |             if switcher_main.get(section_type)[0] == 0:
794 |                 continue
795 |             out.write('{}\n'.format(section_type))
796 |             out.write('\n')
797 |             atom_counter = 0
798 |             type_counter = 0
799 |             for index, dic in enumerate(dicts):
800 |                 for num in range(dic.get('Num')):
801 |                     for fields in dic.get(section_type):
802 |                         new_id = int(fields[0]) + type_counter
803 |                         section_convertdicts = switcher_main.get(section_type)[1]
804 |                         new_type = section_convertdicts[index][fields[1]]
805 |                         new_atom1 = int(fields[2]) + atom_counter
806 |                         new_atom2 = int(fields[3]) + atom_counter
807 |                         out.write(
808 |                             '{:>8} {:>8} {:>6} {:>6}'.format(
809 |                                 new_id, new_type, new_atom1, new_atom2
810 |                             )
811 |                         )
812 |                         if not section_type == 'Bonds':
813 |                             new_atom3 = int(fields[4]) + atom_counter
814 |                             out.write(' {:>6}'.format(new_atom3))
815 |                             if not section_type == 'Angles':
816 |                                 new_atom4 = int(fields[5]) + atom_counter
817 |                                 out.write(' {:>6}'.format(new_atom4))
818 |                         out.write('\n')
819 |                     atom_counter += len(dic.get('Atoms'))
820 |                     type_counter += len(dic.get(section_type))
821 |             out.write('\n')
822 | 
823 | 
824 | def get_type_from_antechamber(
825 |     s, mol2_file, types='gaff2', f=None, am1bcc_charges=False,  swap_dict=None, cleanup=True
826 | ):
827 |     import os
828 |     import glob
829 | 
830 |     ANTECHAMBER_EXEC = os.environ.get('ANTECHAMBER_EXEC')
831 |     temp_ac_fname = 'temp.ac'
832 |     temp_pdb_fname = None
833 |     try:
834 |         command = '{} -fi mol2 -i {} -fo ac -o {} -at {}'.format(ANTECHAMBER_EXEC, mol2_file, temp_ac_fname, types)
835 |         if am1bcc_charges:
836 |             command += ' -c bcc'
837 |         subprocess.call(command, shell=True)
838 |         fr = open(temp_ac_fname, "r")
839 |     except BaseException:
840 |         print('Error running Antechamber with the mol2 file, switch to using pdb file.')
841 |         temp_pdb_fname = 'temp.pdb'
842 |         s.write_pdb(temp_pdb_fname)
843 |         command = '{} -fi pdb -i {} -fo ac -o {} -at {}'.format(ANTECHAMBER_EXEC, temp_pdb_fname, temp_ac_fname, types)
844 |         if am1bcc_charges:
845 |             command += ' -c bcc'
846 |         subprocess.call(command, shell=True)
847 |         fr = open(temp_ac_fname, "r")
848 |     fr.readline()
849 |     fr.readline()
850 |     line = fr.readline()
851 |     while line.split()[0] == 'ATOM':
852 |         tag = int(line.split()[1])
853 |         type_name = line.split()[-1]
854 |         if am1bcc_charges:
855 |             charge = float(line.split()[-2])
856 |             s.particles[tag].charge = charge
857 |         if swap_dict:
858 |             for key in swap_dict:
859 |                 if type_name == key:
860 |                     type_name = swap_dict[key]
861 |         if s.particle_types.get(type_name):
862 |             s.particles[tag].type = s.particle_types.get(type_name)[0]
863 |         elif f:
864 |             pt = f.particle_types.get(type_name)
865 |             if pt:
866 |                 s.particles[tag].type = s.particle_types.add(pt[0].copy())
867 |         else:
868 |             print('cannot find type {} in system or forcefield'.format(type_name))
869 |         line = fr.readline()
870 |     fr.close()
871 | 
872 |     if cleanup:
873 |         fnames = ['ATOMTYPE.INF', temp_ac_fname]
874 |         fnames += glob.glob('ANTECHAMBER*')
875 |         if temp_pdb_fname:
876 |             fnames += [temp_pdb_fname]
877 |         for fname in fnames:
878 |             try:
879 |                 os.remove(fname)
880 |             except Exception:
881 |                 print('problem removing {} during cleanup'.format(fname))
882 | 


--------------------------------------------------------------------------------
/psp/MoleculeBuilder.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import psp.PSP_lib as bd
  4 | import os
  5 | import shutil
  6 | import time
  7 | import multiprocessing
  8 | from joblib import Parallel, delayed
  9 | import psp.output_lib as lib
 10 | from tqdm import tqdm
 11 | 
 12 | 
 13 | class Builder:
 14 |     def __init__(
 15 |         self,
 16 |         Dataframe,
 17 |         NCores=0,
 18 |         ID_col='ID',
 19 |         SMILES_col='smiles',
 20 |         LeftCap='LeftCap',
 21 |         RightCap='RightCap',
 22 |         OutDir='molecules',
 23 |         Inter_Mol_Dis=6,
 24 |         Length=[1],
 25 |         NumConf=1,
 26 |         Loop=False,
 27 |         IrrStruc=False,
 28 |         OPLS=False,
 29 |         GAFF2=False,
 30 |         GAFF2_atom_typing='pysimm',
 31 |         Subscript=False,
 32 |     ):
 33 |         self.ID_col = ID_col
 34 |         self.SMILES_col = SMILES_col
 35 |         self.LeftCap = LeftCap
 36 |         self.RightCap = RightCap
 37 |         self.OutDir = OutDir
 38 |         self.Dataframe = Dataframe
 39 |         self.NCores = NCores
 40 |         self.Inter_Mol_Dis = Inter_Mol_Dis
 41 |         self.Length = Length
 42 |         self.NumConf = NumConf
 43 |         self.Loop = Loop
 44 |         self.IrrStruc = IrrStruc
 45 |         self.OPLS = OPLS
 46 |         self.GAFF2 = GAFF2
 47 |         self.GAFF2_atom_typing = GAFF2_atom_typing
 48 |         self.Subscript = Subscript
 49 | 
 50 |     # list of molecules name and CORRECT/WRONG
 51 |     def Build(self):
 52 |         if self.Subscript is False:
 53 |             lib.print_psp_info()  # Print PSP info
 54 |         lib.print_input("MoleculeBuilder", self.Dataframe)
 55 |         if self.OPLS is True:
 56 |             self.NCores = 1
 57 |         if self.NCores <= 0:
 58 |             ncore_print = 'All'
 59 |         else:
 60 |             ncore_print = self.NCores
 61 | 
 62 |         print(
 63 |             "\n",
 64 |             "Additional information: ",
 65 |             "\n",
 66 |             "Length of oligomers: ",
 67 |             self.Length,
 68 |             "\n",
 69 |             "Number of conformers: ",
 70 |             self.NumConf,
 71 |             "\n",
 72 |             "Loop model: ",
 73 |             self.Loop,
 74 |             "\n",
 75 |             "Run short MD simulation: ",
 76 |             self.IrrStruc,
 77 |             "\n",
 78 |             "Generate OPLS parameter file: ",
 79 |             self.OPLS,
 80 |             "\n",
 81 |             "Intermolecular distance in POSCAR: ",
 82 |             self.Inter_Mol_Dis,
 83 |             "\n",
 84 |             "Number of cores: ",
 85 |             ncore_print,
 86 |             "\n",
 87 |             "Output Directory: ",
 88 |             self.OutDir,
 89 |             "\n",
 90 |         )
 91 | 
 92 |         # location of directory for VASP inputs (polymers) and build a directory
 93 |         out_dir = self.OutDir + '/'
 94 |         bd.build_dir(out_dir)
 95 | 
 96 |         # Directories
 97 |         # Working directory
 98 |         bd.build_dir('work_dir/')
 99 | 
100 |         # location of input XYZ files
101 |         xyz_in_dir = 'work_dir/xyz-in/'
102 |         bd.build_dir(xyz_in_dir)
103 | 
104 |         start_1 = time.time()
105 |         list_out_xyz = 'output_MB.csv'
106 |         chk_tri = []
107 | 
108 |         df = self.Dataframe.copy()
109 |         df[self.ID_col] = df[self.ID_col].apply(str)
110 | 
111 |         if self.NCores == 0:
112 |             self.NCores = multiprocessing.cpu_count() - 1
113 | 
114 |         if self.NCores == -1 or self.IrrStruc is True:
115 |             NCores_opt = 0
116 |             self.NCores = 1
117 |         else:
118 |             NCores_opt = 1
119 |         print("\n 3D model building started...\n")
120 |         result = Parallel(n_jobs=self.NCores)(
121 |             delayed(bd.build_3D)(
122 |                 unit_name,
123 |                 df,
124 |                 self.ID_col,
125 |                 self.SMILES_col,
126 |                 self.LeftCap,
127 |                 self.RightCap,
128 |                 out_dir,
129 |                 self.Inter_Mol_Dis,
130 |                 self.Length,
131 |                 xyz_in_dir,
132 |                 self.NumConf,
133 |                 self.Loop,
134 |                 self.IrrStruc,
135 |                 self.OPLS,
136 |                 self.GAFF2,
137 |                 self.GAFF2_atom_typing,
138 |                 NCores_opt,
139 |             )
140 |             for unit_name in tqdm(df[self.ID_col].values, desc='Building models ...',)
141 |         )
142 | 
143 |         for i in result:
144 |             chk_tri.append([i[0], i[1], i[2]])
145 | 
146 |         chk_tri = pd.DataFrame(chk_tri, columns=['ID', 'Result', 'SMILES'])
147 |         chk_tri.to_csv(list_out_xyz)
148 | 
149 |         bd.del_tmp_files()
150 | 
151 |         # Delete work directory
152 |         if os.path.isdir('work_dir/'):
153 |             shutil.rmtree('work_dir/')
154 | 
155 |         end_1 = time.time()
156 |         lib.print_out(
157 |             chk_tri, "3D model", np.round((end_1 - start_1) / 60, 2), self.Subscript
158 |         )
159 |         return chk_tri
160 | 


--------------------------------------------------------------------------------
/psp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ramprasad-Group/PSP/fa846bdd07b45461d5d747e5bd60b5ee80f13938/psp/__init__.py


--------------------------------------------------------------------------------
/psp/output_lib.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | pd.set_option('display.max_rows', None)
  5 | pd.set_option('display.max_columns', None)
  6 | pd.set_option('display.width', 1000)
  7 | pd.set_option('display.colheader_justify', 'center')
  8 | pd.set_option('display.precision', 3)
  9 | 
 10 | 
 11 | def print_psp_info():
 12 |     print("")
 13 |     print(
 14 |         "                        ---------  PPPPPP       SSSSSS    PPPPPP    ---------                        "
 15 |     )
 16 |     print(
 17 |         "                -----------------  PP    PP   SS          PP    PP  -----------------                "
 18 |     )
 19 |     print(
 20 |         "        -------------------------  PP    PP   SS          PP    PP  -------------------------        "
 21 |     )
 22 |     print(
 23 |         " --------------------------------  PPPPPP       SSSSS     PPPPPP    -------------------------------- "
 24 |     )
 25 |     print(
 26 |         "        -------------------------  PP                SS   PP        -------------------------        "
 27 |     )
 28 |     print(
 29 |         "                -----------------  PP                SS   PP        -----------------                "
 30 |     )
 31 |     print(
 32 |         "                        ---------  PP          SSSSSS     PP        ---------                        "
 33 |     )
 34 |     print(
 35 |         " --------------------------------------------------------------------------------------------------- "
 36 |     )
 37 |     version = '1.0.0'  # pkg_resources.require("PolymerStructurePredictor")[0].version
 38 |     print(
 39 |         "          ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **          "
 40 |     )
 41 |     print(
 42 |         "                        Polymer Structure Predictor (PSP) version = ",
 43 |         version,
 44 |         "                    ",
 45 |     )
 46 |     print(
 47 |         "                                   Developed at Ramprasad Group                                      "
 48 |     )
 49 |     print(
 50 |         "           Materials Science and Engineering, Georgia Institute of Technology, Atlanta, US           "
 51 |     )
 52 |     print("")
 53 |     print(
 54 |         "           Cite this work as:                                                                "
 55 |     )
 56 |     print(
 57 |         "           H. Sahu, K-H. Shen, J. H. Montoya, H. Tran, R. Ramprasad, PSP: A python toolkit           "
 58 |     )
 59 |     print(
 60 |         "           for predicting 3D models of polymers, journal name, volume, page, 2022.                   "
 61 |     )
 62 | 
 63 |     print(
 64 |         "          ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **          "
 65 |     )
 66 |     print(
 67 |         " --------------------------------------------------------------------------------------------------- "
 68 |     )
 69 | 
 70 | 
 71 | def print_input(builder, input_file=pd.DataFrame()):
 72 |     print(" ", builder, " started...")
 73 |     if not input_file.empty:
 74 |         print(
 75 |             " ----------------------------------------------- INPUT --------------------------------------------- "
 76 |         )
 77 |         input_file.index = np.arange(1, len(input_file) + 1)
 78 |         print(input_file.to_markdown())
 79 | 
 80 | 
 81 | def print_out(output_file, model_name, time, subscript=False):
 82 |     print("\n", model_name, "building completed.\n")
 83 |     if subscript is True or model_name == "Amorphous model":
 84 |         pass
 85 |     else:
 86 |         print(
 87 |             " ----------------------------------------------- OUTPUT -------------------------------------------- "
 88 |         )
 89 |     if not output_file.empty:
 90 |         output_file.index = np.arange(1, len(output_file) + 1)
 91 |         print(output_file.to_markdown())
 92 |         print("")
 93 |     if subscript is False:
 94 |         print(" Total run time (minutes): ", time)
 95 |         print(
 96 |             " ------------------------------------- PSP TERMINATED NORMALLY ------------------------------------- "
 97 |         )
 98 |     else:
 99 |         print("", model_name, "building time (minutes): ", time)
100 |         print("")
101 | 


--------------------------------------------------------------------------------
/psp/simulated_annealing.py:
--------------------------------------------------------------------------------
  1 | import psp.PSP_lib as bd
  2 | import numpy as np
  3 | import pandas as pd
  4 | import random
  5 | import math
  6 | from openbabel import openbabel as ob
  7 | 
  8 | obConversion = ob.OBConversion()
  9 | obConversion.SetInAndOutFormats("xyz", "xyz")
 10 | ff = ob.OBForceField.FindForceField('UFF')
 11 | mol = ob.OBMol()
 12 | np.set_printoptions(precision=20)
 13 | 
 14 | 
 15 | # define objective function
 16 | def f(
 17 |     unit_name,
 18 |     sl,
 19 |     unit,
 20 |     bond,
 21 |     angle,
 22 |     neigh_atoms_info,
 23 |     xyz_tmp_dir,
 24 |     dum1,
 25 |     dum2,
 26 |     atom1,
 27 |     atom2,
 28 | ):
 29 |     file_name, conf_unit, dis_dum1_dum2, ang_1st_2nd, penalty = bd.create_conformer(
 30 |         unit_name,
 31 |         sl,
 32 |         unit,
 33 |         bond,
 34 |         neigh_atoms_info,
 35 |         angle,
 36 |         xyz_tmp_dir,
 37 |         dum1,
 38 |         dum2,
 39 |         atom1,
 40 |         atom2,
 41 |     )
 42 |     obConversion.ReadFile(mol, file_name)
 43 |     ff.Setup(mol)
 44 |     E_cost = (
 45 |         ff.Energy()
 46 |         + ff.Energy() * (1 - (ang_1st_2nd / 180.0))
 47 |         + ff.Energy() * penalty * 10
 48 |     )
 49 |     return E_cost, conf_unit, file_name
 50 | 
 51 | 
 52 | ######################################################
 53 | # Simulated Annealing
 54 | ######################################################
 55 | def SA(
 56 |     unit_name,
 57 |     unit,
 58 |     bonds,
 59 |     angle,
 60 |     neigh_atoms_info,
 61 |     xyz_tmp_dir,
 62 |     dum1,
 63 |     dum2,
 64 |     atom1,
 65 |     atom2,
 66 |     Steps,
 67 |     Substeps,
 68 | ):
 69 |     i1 = bonds.index.values
 70 |     i2 = angle
 71 | 
 72 |     # Start location
 73 |     x_start = [i1[0], i2[0]]
 74 |     # Number of cycles
 75 |     n = Steps
 76 |     # Number of trials per cycle
 77 |     m = Substeps
 78 |     # Number of accepted solutions
 79 |     na = 0.0
 80 |     # Probability of accepting worse solution at the start
 81 |     p1 = 0.3
 82 |     # Probability of accepting worse solution at the end
 83 |     p50 = 0.001
 84 |     # Initial temperature
 85 |     t1 = -1.0 / math.log(p1)
 86 |     # Final temperature
 87 |     t50 = -1.0 / math.log(p50)
 88 | 
 89 |     # Fractional reduction every cycle
 90 |     frac = (t50 / t1) ** (1.0 / (n - 1.0))
 91 | 
 92 |     # Initialize x
 93 |     x = np.zeros((n + 1, 2))
 94 | 
 95 |     x[0] = x_start
 96 | 
 97 |     results = []
 98 | 
 99 |     xi = np.zeros(2)
100 |     xi = x_start
101 |     na = na + 1.0
102 | 
103 |     # Current best results so far
104 |     xc = np.zeros(2)
105 |     xc = x[0]
106 |     fc, unit_new, file_name = f(
107 |         unit_name,
108 |         0,
109 |         unit,
110 |         bonds.loc[0],
111 |         0.0,
112 |         neigh_atoms_info,
113 |         xyz_tmp_dir,
114 |         dum1,
115 |         dum2,
116 |         atom1,
117 |         atom2,
118 |     )
119 |     fs = np.zeros(n + 1)
120 |     fs[0] = fc
121 |     results.append([0, fc, file_name])
122 | 
123 |     # Current temperature
124 |     t = t1
125 |     # DeltaE Average
126 |     DeltaE_avg = 0.0
127 | 
128 |     for i in range(n):
129 |         for j in range(m):
130 |             unit_prev = unit.copy()
131 |             xi[0] = np.random.choice(i1)
132 |             xi[1] = np.random.choice(i2)
133 |             fc_new, unit, file_name = f(
134 |                 unit_name,
135 |                 i,
136 |                 unit,
137 |                 bonds.loc[xi[0]],
138 |                 xi[1],
139 |                 neigh_atoms_info,
140 |                 xyz_tmp_dir,
141 |                 dum1,
142 |                 dum2,
143 |                 atom1,
144 |                 atom2,
145 |             )
146 |             DeltaE = abs(fc_new - fc)
147 | 
148 |             if fc_new > fc:
149 |                 # Initialize DeltaE_avg if a worse solution was found
150 |                 #   on the first iteration
151 |                 if i == 0 and j == 0:
152 |                     DeltaE_avg = DeltaE
153 | 
154 |                 # To avoid divide by ZERO add a small number to DeltaE_avg
155 |                 if DeltaE_avg == 0.0:
156 |                     DeltaE_avg = DeltaE_avg + 1.0e-13
157 | 
158 |                 # objective function is worse
159 |                 # generate probability of acceptance
160 |                 p = math.exp(-DeltaE / (DeltaE_avg * t))
161 | 
162 |                 # determine whether to accept worse point
163 |                 if random.random() < p:
164 |                     # accept the worse solution
165 |                     accept = True
166 |                 else:
167 |                     # don't accept the worse solution
168 |                     accept = False
169 |             else:
170 |                 # objective function is lower, automatically accept
171 |                 accept = True
172 | 
173 |             if accept is True:
174 |                 # update currently accepted solution
175 |                 xc[0] = xi[0]
176 |                 xc[1] = xi[1]
177 |                 fc = fc_new
178 |                 best_xyz = file_name
179 |                 # increment number of accepted solutions
180 |                 na = na + 1.0
181 |                 # update DeltaE_avg
182 |                 DeltaE_avg = (DeltaE_avg * (na - 1.0) + DeltaE) / na
183 | 
184 |             else:
185 |                 unit = unit_prev.copy()
186 | 
187 |         # Record the best x values at the end of every cycle
188 |         x[i + 1][0] = xc[0]
189 |         x[i + 1][1] = xc[1]
190 |         try:
191 |             results.append([i, fc, best_xyz])
192 |         except Exception:
193 |             results.append([i, fc, 'XXX'])
194 |         fs[i + 1] = fc
195 | 
196 |         if np.around(fs[i], decimals=15) == np.around(
197 |             fs[i + 1], decimals=15
198 |         ) and np.around(fs[i - 1], decimals=15) == np.around(fs[i + 1], decimals=15):
199 |             break
200 |         # Lower the temperature for next cycle
201 |         t = frac * t
202 |     results = pd.DataFrame(results, columns=['i', 'Energy+', 'xyzFile'])
203 |     results = results[results['xyzFile'] != 'XXX']
204 |     results = results.drop_duplicates(subset='xyzFile', keep="last")
205 |     return results
206 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | pandas
 3 | time
 4 | os
 5 | openbabel
 6 | subprocess
 7 | glob
 8 | tqdm
 9 | random
10 | mmap
11 | multiprocessing
12 | rdkit
13 | shutil
14 | joblib
15 | scipy
16 | itertools
17 | pkg_resources
18 | math
19 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [tool:pytest]
 2 | addopts = --durations=30 --quiet
 3 | filterwarnings =
 4 |     ignore::UserWarning
 5 |     ignore::RuntimeWarning
 6 | 
 7 | [pycodestyle]
 8 | count = True
 9 | ignore = E121,E123,E126,E133,E226,E241,E242,E704,W503,W504,W505,E741,W605,W293,W291
10 | max-line-length = 120
11 | statistics = True
12 | exclude=*/tests/*
13 | 
14 | [flake8]
15 | exclude = .git,__pycache__,docs_rst/conf.py,tests,__init__.py
16 | # max-complexity = 10
17 | extend-ignore = E741,W291
18 | max-line-length = 120
19 | 
20 | [pydocstyle]
21 | ignore = D105,D2,D4
22 | match-dir=(?!(tests)).*
23 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup, find_packages
 3 | from subprocess import call
 4 | 
 5 | # Test for openbabel/rdkit conda installs
 6 | try:
 7 |     from openbabel import openbabel
 8 | except ImportError:
 9 |     raise ModuleNotFoundError("openbabel not found, install openbabel via conda-forge.")
10 | 
11 | try:
12 |     import rdkit
13 | except ImportError:
14 |     raise ModuleNotFoundError("rdkit not found, install openbabel via conda-forge.")
15 | 
16 | # Get PATH for external software and write in .bashrc
17 | #HOME_DIR = os.environ.get('HOME')
18 | # PACKMOL
19 | #if os.getenv('PACKMOL_EXEC') is None:
20 | #    print("Enter PATH for PACKMOL executable: ")
21 | #    print("For example '/home/opt/packmol/packmol'")
22 | #    packmol_exec = input("")
23 | #    call("echo \# PACKMOL_PSP >> {}".format(os.path.join(HOME_DIR,'.bashrc')),shell=True)
24 | #    call("echo export PACKMOL_EXEC={} >> {}".format(packmol_exec,os.path.join(HOME_DIR,'.bashrc')),shell=True)
25 | 
26 | # ORCA
27 | #if os.getenv('ORCA_EXEC') is None:
28 | #    print("Enter PATH for ORCA executable: ")
29 | #    print("For example '/home/opt/orca_4_2/orca'")
30 | #    orca_exec = input("")
31 | #    call("echo \# ORCA_PSP >> {}".format(os.path.join(HOME_DIR,'.bashrc')),shell=True)
32 | #    call("echo export ORCA_EXEC={} >> {}".format(orca_exec,os.path.join(HOME_DIR,'.bashrc')),shell=True)
33 | 
34 | # OPENMPI
35 | #if os.getenv('OPENMPI_bin') is None:
36 | #    print("Enter PATH for OPENMPI: ")
37 | #    print("For example '/home/opt/openmpi-316'")
38 | #    openmpi_path = input("")
39 | #    call("echo \# OPENMPI_PSP >> {}".format(os.path.join(HOME_DIR,'.bashrc')),shell=True)
40 | #    call("echo export OPENMPI_bin={} >> {}".format(os.path.join(openmpi_path,'bin'),os.path.join(HOME_DIR,'.bashrc')),shell=True)
41 | #    call("echo export OPENMPI_lib={} >> {}".format(os.path.join(openmpi_path,'lib'),os.path.join(HOME_DIR,'.bashrc')),shell=True)
42 | 
43 | 
44 | # Read the contents of your README file
45 | PACKAGE_DIR = os.path.abspath(os.path.dirname(__file__))
46 | with open(os.path.join(PACKAGE_DIR, 'README.md'), encoding='utf-8') as f:
47 |     LONG_DESCRIPTION = f.read()
48 | 
49 | setup(name='PolymerStructurePredictor',
50 |       version='1.0.0',
51 |       long_description=LONG_DESCRIPTION,
52 |       long_description_content_type='text/markdown',
53 |       description='Build single chains and crystal structures of polymers',
54 |       keywords=['SMILES', 'polymer', 'single chain', 'crystal structure'],
55 |       url='https://github.com/Ramprasad-Group/PSP',
56 |       author='Harikrishna Sahu',
57 |       author_email='harikrishnasahu89@gmail.com',
58 |       classifiers=[
59 |         "Programming Language :: Python :: 3",
60 |         "License :: OSI Approved :: MIT License",
61 |         "Operating System :: OS Independent",
62 |         ],
63 |       #license='MIT',
64 |       packages=find_packages(),
65 |       install_requires=['scipy',
66 |                         'pandas',
67 |                         'joblib'],
68 |       zip_safe=False
69 |       )
70 | 


--------------------------------------------------------------------------------
/test/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ramprasad-Group/PSP/fa846bdd07b45461d5d747e5bd60b5ee80f13938/test/.DS_Store


--------------------------------------------------------------------------------
/test/AmorphousBuilder/amor_model.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import psp.AmorphousBuilder as ab
 3 | 
 4 | input_df = pd.read_csv("input_amor.csv")
 5 | amor = ab.Builder(
 6 |     input_df,
 7 |     ID_col="ID",
 8 |     SMILES_col="smiles",
 9 |     OutDir='amor_model',
10 |     Length='Len',
11 |     NumConf='NumConf',
12 |     NumModel=1,
13 |     LeftCap = "LeftCap",
14 |     RightCap = "RightCap",
15 |     Loop='Loop',
16 |     density=0.85,
17 |     box_type='c',
18 | )
19 | amor.Build()
20 | 


--------------------------------------------------------------------------------
/test/AmorphousBuilder/amor_model_gaff2.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import psp.AmorphousBuilder as ab
 3 | 
 4 | input_df = pd.read_csv("input_amor.csv")
 5 | amor = ab.Builder(
 6 |     input_df,
 7 |     ID_col="ID",
 8 |     SMILES_col="smiles",
 9 |     Length='Len',
10 |     NumConf='NumConf',
11 |     LeftCap = "LeftCap",
12 |     RightCap = "RightCap",
13 |     Loop='Loop',
14 |     density=0.85,
15 |     box_type='c',
16 |     BondInfo=False
17 | )
18 | amor.Build()
19 | 
20 | # Default get_gaff2() uses Pysimm for atom typing
21 | amor.get_gaff2(output_fname='amor_gaff2.lmps')
22 | 
23 | '''
24 | [ADVANCED] If Ambertools is installed, and antechamber is in the PATH
25 | (e.g. export ANTECHAMBER_EXEC=~/.conda/envs/AmberTools21/bin/antechamber),
26 | atom typing can also be done using antechamber by specifying atom_typing='antechamber'.
27 | In addition, atom types can be swapped manually by specifying the swap_dict
28 | (e.g. swap_dict={'ns': 'n'}).
29 | 
30 | A representative example is as below:
31 | amor.get_gaff2(output_fname='amor_gaff2.lmps', atom_typing='antechamber', swap_dict={'ns': 'n'})
32 | '''
33 | 


--------------------------------------------------------------------------------
/test/AmorphousBuilder/amor_model_opls.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import psp.AmorphousBuilder as ab
 3 | 
 4 | input_df = pd.read_csv("input_amor.csv")
 5 | amor = ab.Builder(
 6 |     input_df,
 7 |     ID_col="ID",
 8 |     SMILES_col="smiles",
 9 |     Length='Len',
10 |     NumConf='NumConf',
11 |     LeftCap = "LeftCap",
12 |     RightCap = "RightCap",
13 |     Loop='Loop',
14 |     density=0.85,
15 |     box_type='c',
16 |     BondInfo=False
17 | )
18 | amor.Build()
19 | amor.get_opls(output_fname='amor_opls.lmps')
20 | 


--------------------------------------------------------------------------------
/test/AmorphousBuilder/input_PE.csv:
--------------------------------------------------------------------------------
1 | ID,smiles,Len,Num,NumConf,Loop
2 | CC25,[*]CC[*],20,80,1,False
3 | 


--------------------------------------------------------------------------------
/test/AmorphousBuilder/input_amor.csv:
--------------------------------------------------------------------------------
1 | ID,smiles,Len,Num,NumConf,Loop
2 | PVC3,C(C([*])Cl)[*],3,8,2,False
3 | PVC5,C(C([*])Cl)[*],5,4,2,False
4 | cc5,[*]CC[*],5,8,2,False
5 | 


--------------------------------------------------------------------------------
/test/ChainBuilder/chain_model.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import psp.ChainBuilder as ChB
 3 | 
 4 | df_smiles = pd.read_csv("input_chain.csv")
 5 | chain_builder = ChB.Builder(
 6 |     Dataframe=df_smiles,
 7 |     ID_col="PID",
 8 |     SMILES_col="smiles_polymer",
 9 |     NumConf=1,
10 |     Length=["n", 5],
11 |     Steps=100,
12 |     Substeps=20,
13 |     Method="SA",
14 |     NCores=1,
15 |     OutDir='chains',
16 |     Tol_ChainCorr=50,
17 |     Inter_Chain_Dis=12,
18 | )
19 | results = chain_builder.BuildChain()
20 | 


--------------------------------------------------------------------------------
/test/ChainBuilder/input_chain.csv:
--------------------------------------------------------------------------------
1 | PID,smiles_polymer
2 | PE,[*]CC[*]
3 | PVC,C(C(CC([*])Cl)Cl)[*]
4 | ABPBO,c1c2c(cc(c1)c1oc3c(n1)cc(cc3)[*])nc(o2)[*]
5 | PVDF,C(C(F)(F)[*])[*]
6 | PAN,C(CC(C[*])C#N)([*])C#N
7 | PPS,C1=CC(=CC=C1SC2=CC=C(C=C2)S[*])[*]
8 | 


--------------------------------------------------------------------------------
/test/CrystalBuilder/crystal_model.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import glob
 3 | import psp.ChainBuilder as ChB
 4 | import psp.CrystalBuilder as CrB
 5 | 
 6 | df_smiles = pd.read_csv("input_chain.csv")
 7 | 
 8 | chain_builder = ChB.Builder(
 9 |     Dataframe=df_smiles,
10 |     ID_col="PID",
11 |     SMILES_col="smiles_polymer",
12 |     NumConf=1,
13 |     Length=['n',5],
14 |     Steps=20,
15 |     Substeps=20,
16 |     Method="SA",
17 |     NCores=1,
18 |     OutDir='chains',
19 |     Tol_ChainCorr=50,
20 | )
21 | results = chain_builder.BuildChain()
22 | 
23 | ID = "PVC"
24 | vasp_input_list = glob.glob("chains/" + ID + "/" + "*.vasp")
25 | crystal_builder = CrB.Builder(
26 |     VaspInp_list=vasp_input_list,
27 |     NSamples=10,
28 |     InputRadius="auto",
29 |     MinAtomicDis=2.0,
30 |     Polymer=True,
31 |     Optimize=False,
32 |     NCores=1,
33 | )
34 | results = crystal_builder.BuildCrystal()
35 | 


--------------------------------------------------------------------------------
/test/CrystalBuilder/input_chain.csv:
--------------------------------------------------------------------------------
1 | PID,smiles_polymer
2 | PVC,C(C([*])Cl)[*]
3 | 


--------------------------------------------------------------------------------
/test/MoleculeBuilder/circular_oligomer.csv:
--------------------------------------------------------------------------------
1 | ID,smiles
2 | PE,[*]CC[*]
3 | PVC,C(C([*])Cl)[*]
4 | 


--------------------------------------------------------------------------------
/test/MoleculeBuilder/linear_oligomer.csv:
--------------------------------------------------------------------------------
1 | ID,smiles
2 | Mol1,C13C=CC(C2C=CC1N=C2OC)N=C3OC
3 | PE,[*]CC[*]
4 | PVC,C(C([*])Cl)[*]
5 | 


--------------------------------------------------------------------------------
/test/MoleculeBuilder/linear_oligomer_with_endcaps.csv:
--------------------------------------------------------------------------------
1 | ID,smiles,LeftCap,RightCap
2 | Mol1,C13C=CC(C2C=CC1N=C2OC)N=C3OC,,
3 | PE,[*]CC[*],C(Cl)(Cl)(Cl)[*],C(F)(F)(F)[*]
4 | PVC,C(C([*])Cl)[*],C(Cl)(Cl)(Cl)[*],C(F)(F)(F)[*]
5 | 


--------------------------------------------------------------------------------
/test/MoleculeBuilder/molecule_model.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import psp.MoleculeBuilder as mb
 3 | 
 4 | df_smiles = pd.read_csv("linear_oligomer.csv")
 5 | 
 6 | mol = mb.Builder(
 7 |     df_smiles,
 8 |     ID_col="ID",
 9 |     SMILES_col="smiles",
10 |     LeftCap = "LeftCap",
11 |     RightCap ='RightCap',
12 |     OutDir='models',
13 |     Inter_Mol_Dis=6,
14 |     Length=[1,3],
15 |     NumConf=1,
16 |     Loop=False,
17 |     NCores=1,
18 |     IrrStruc=False,
19 |     OPLS=False,
20 |     GAFF2=True,
21 |     GAFF2_atom_typing='pysimm'
22 | )
23 | results = mol.Build()
24 | 


--------------------------------------------------------------------------------
/test/chain.csv:
--------------------------------------------------------------------------------
 1 | PID,smiles_polymer
 2 | PE,[*]CC[*]
 3 | PVC,C(C([*])Cl)[*]
 4 | PVC2,C(C(CC([*])Cl)Cl)[*]
 5 | ABPBO,c1c2c(cc(c1)c1oc3c(n1)cc(cc3)[*])nc(o2)[*]
 6 | beta-PVDF,C(C(F)(F)[*])[*]
 7 | delta-PVDF,C(C[*])(CC([*])(F)F)(F)F
 8 | PAN,C(CC(C[*])C#N)([*])C#N
 9 | PPS,C1=CC(=CC=C1SC2=CC=C(C=C2)S[*])[*]
10 | 


--------------------------------------------------------------------------------
/test/test.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import unittest
 3 | import glob
 4 | import os
 5 | import psp.ChainBuilder as ChB
 6 | import psp.CrystalBuilder as CrB
 7 | 
 8 | TEST_DIR = os.path.abspath(os.path.dirname(__file__))
 9 | 
10 | class PspGeneralTest(unittest.TestCase):
11 |     def test_crystal_build(self):
12 |         df_smiles = pd.read_csv(
13 |             os.path.join(TEST_DIR, "chain.csv"), low_memory=False
14 |         )  # fingerprinted data
15 | 
16 |         chain_builder = ChB.Builder(
17 |             Dataframe=df_smiles,
18 |             ID_col="PID",
19 |             SMILES_col="smiles_polymer",
20 |             Length=["n"],
21 |             Steps=25,
22 |             Substeps=10,
23 |             MonomerAng="medium",
24 |             DimerAng="medium",
25 |             Method="SA",
26 |             OutDir="chains",
27 |         )
28 |         results = chain_builder.BuildChain()
29 |         print(results)
30 |         ID = "PVC2"
31 |         vasp_input_list = glob.glob("chains/" + ID + "/" + "*.vasp")
32 |         crystal_builder = CrB.Builder(
33 |             VaspInp_list=vasp_input_list,
34 |             NSamples=5,
35 |             InputRadius="auto",
36 |             MinAtomicDis=2.0,
37 |             OutDir="crystals",
38 |         )
39 |         results = crystal_builder.BuildCrystal()
40 |         self.assertIsNotNone(results)
41 |         print(results)
42 | 


--------------------------------------------------------------------------------