├── .github
    └── workflows
    │   └── test.yaml
├── .gitignore
├── .python-version
├── LICENSE
├── README.ipynb
├── README.md
├── README_files
    ├── README_10_0.png
    ├── README_16_0.png
    ├── README_18_0.png
    ├── README_20_0.png
    ├── README_23_0.png
    ├── README_25_0.png
    ├── README_28_0.png
    ├── README_30_0.png
    ├── README_35_2.png
    ├── README_42_0.png
    ├── README_43_0.png
    ├── README_45_2.png
    ├── README_46_2.png
    └── README_52_0.png
├── autoadsorbate
    ├── Neb.py
    ├── Particle.py
    ├── Smile.py
    ├── Surf.py
    ├── __init__.py
    ├── autoadsorbate.py
    ├── plotting.py
    ├── popneb.py
    ├── raster_utilities.py
    ├── string_utils.py
    └── utils.py
├── pyproject.toml
├── scripts
    ├── aads_2025.ipynb
    ├── all_smiles_trj.xyz
    ├── generate_aads_configs.py
    ├── make_shrinkwrap_video.ipynb
    ├── paper_prep.ipynb
    ├── particle.ipynb
    └── relax.py
├── tests
    ├── __init__.py
    └── test_all.py
└── uv.lock


/.github/workflows/test.yaml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: tests
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ main ]
 9 |   pull_request:
10 | 
11 | jobs:
12 |   pytest:
13 |     runs-on: ${{ matrix.os }}
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         python-version:
18 |           - "3.13"
19 |           - "3.12"
20 |           - "3.11"
21 |           - "3.10"
22 |           - "3.9"
23 |         os:
24 |           - ubuntu-latest
25 | 
26 |     steps:
27 |       - uses: actions/checkout@v4
28 |       - name: Install uv and set the python version
29 |         uses: astral-sh/setup-uv@v5
30 |         with:
31 |           python-version: ${{ matrix.python-version }}
32 |       - name: Install the project
33 |         run: uv sync --all-extras --dev
34 |       - name: Pytest
35 |         run: |
36 |           uv run python --version
37 |           uv run pytest 
38 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # UV
 98 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #uv.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | 
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 | 
143 | # Rope project settings
144 | .ropeproject
145 | 
146 | # mkdocs documentation
147 | /site
148 | 
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 | 
154 | # Pyre type checker
155 | .pyre/
156 | 
157 | # pytype static type analyzer
158 | .pytype/
159 | 
160 | # Cython debug symbols
161 | cython_debug/
162 | 
163 | # PyCharm
164 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
167 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 | 
170 | # Ruff stuff:
171 | .ruff_cache/
172 | 
173 | # PyPI configuration file
174 | .pypirc
175 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.11
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025, Fako Edvin,  Sandip De, BASF SE
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "5d816139-ae3f-4ad3-ae7a-f802e7615344",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## dev cells"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "id": "689bcf2b-0278-425e-ba07-fa931abeb6f1",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "from ase.visualize.plot import plot_atoms\n",
 19 |     "\n",
 20 |     "from autoadsorbate import Fragment, Surface"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "id": "afbf84ab-f714-4c5f-9a0b-9187123e709c",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "## autoadsorbate"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "id": "d99a8a08-42a9-4ee9-97d4-37de91697265",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "The challenge of generating initial structures for heterogeneous catalysis is traditionally addressed through manual labor. However, this package aims to offer an alternative approach.\n",
 37 |     "\n",
 38 |     "To effectively simulate reactive behavior at surfaces, it is crucial to establish clear definitions within our framework. The following definitions are essential in order to accurately characterize the structures of interest:\n",
 39 |     "\n",
 40 |     "- __Fragment__: \n",
 41 |     "    - <font color='red'>Molecules</font> - species that exist in their corresponding geometries __even when isolated from the surface__.\n",
 42 |     "    - <font color='red'>Reactive species</font> - species that exist in their corresponding geometries __only when attached to the surface__.\n",
 43 |     "- __Surface__:\n",
 44 |     "    - The definition of the surface is simple - <font color='red'>every atom of the slab that can be in contact with an intermediate is considered a surface atom</font>. The surface is a collection of such atoms.\n",
 45 |     "    - Every atom of the surface is a \"top\" site.\n",
 46 |     "    - When two \"top\" sites are close (close in its literal meaning) to each other, they form a \"bridge\" site.\n",
 47 |     "    - When three \"top\" sites are close (close in its literal meaning) to each other, they form a \"3-fold\" site.\n",
 48 |     "    - etc.\n",
 49 |     "- __Active Site__:\n",
 50 |     "    - A collection of one or more sites that can facilitate a chemical transformation is called an active site.\n",
 51 |     "    - A \"top\" site can be an active site only for Eley-Rideal transformations.\n",
 52 |     "    - All other transformations require that at least one intermediate binds through at least two sites. All involved sites compose an active site.\n",
 53 |     "- __Intermediate__:\n",
 54 |     "    - Intermediates are fragments bound to an active site."
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "id": "43a757a1-5a02-4a8d-9737-0252c6471a1d",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "<!-- ### basic imports -->"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "id": "7ae68f87-e920-4a7b-b9f8-0315a4b3a6a4",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "the idea was to keep the package as light as possible, hence the foundation of this package is ase and rdkit, allong with some basic python packages (pandas, numpy, etc.)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "id": "b7dac144-3013-4cf3-9144-6b38b40cec0d",
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "# from autoadsorbate.autoadsorbate import Surface, Fragment\n",
 81 |     "# from ase.io import read, write\n",
 82 |     "# from ase.visualize import view\n",
 83 |     "# from ase.visualize.plot import plot_atoms\n",
 84 |     "# import matplotlib.pyplot as plt"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "id": "e2e51886-9b71-4954-b2a9-d6846b5707ff",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "### Fragment"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "id": "e7abcb94-3e04-4fef-aa8e-fee2d54df704",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "#### Molecules"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "id": "99f5dc2e-5778-44e8-8dd4-808faee575e1",
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "f = Fragment(smile=\"COC\", to_initialize=5)"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "id": "b88aae20-0f80-4acd-beb8-ced4b73d321d",
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "from autoadsorbate import docs_plot_conformers\n",
121 |     "\n",
122 |     "conformer_trajectory = f.conformers\n",
123 |     "fig = docs_plot_conformers(conformer_trajectory)"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "id": "d89ea086-b0b5-4010-ae64-5e84ff39c63b",
129 |    "metadata": {},
130 |    "source": [
131 |     "Notice that the orientation of the fragment is arbitrary. We could simply paste these structures on a surface of some material, but it would be difficult to quantify the quality of the initial random guesses and hence how many structures we need to sample. We would then have to run dynamic simulations to probe for local minima and check which minima are the most stable.\n",
132 |     "\n",
133 |     "In this case of DME, we can use our knowledge of chemistry to simplify the problem. Since the O atom bridging the two methyl groups had 2 \"lone electron pairs,\" we can use a simple trick: replacing one of the lone pairs with a marker atom (let's use Cl)."
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "id": "75d91009-08a3-4810-9ea2-3bdd3b410959",
139 |    "metadata": {},
140 |    "source": [
141 |     "\n",
142 |     "Notice that we had to make two adjustments to the SMILES string:\n",
143 |     "- to be able to replace the lone pair with a marker we must \"trick\" the valnce of the O atom, and reshufle the smiles formula so that the marker is in first place (for easy book-keeping)\n",
144 |     "    - ```COC``` original\n",
145 |     "    - ```CO(Cl)C``` add Cl instead of the O lone pair (this is an invalid SMILES)\n",
146 |     "    - ```C[O+](Cl)C``` trick to make the valence work\n",
147 |     "    - ```Cl[O+](C)C``` rearrange so taht the SMILES string starts with the marker first (for easy book keeping)"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "id": "d1ccd594-03ad-4406-b963-22cc2f8ee287",
153 |    "metadata": {},
154 |    "source": [
155 |     "This can be also done with a function:"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "id": "6f7799f1-58be-4d0b-8253-079df53a0d85",
162 |    "metadata": {},
163 |    "outputs": [],
164 |    "source": [
165 |     "from autoadsorbate import get_marked_smiles\n",
166 |     "\n",
167 |     "marked_smile = get_marked_smiles([\"COC\"])[0]\n",
168 |     "marked_smile"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "id": "7e32efc5-795c-4d7f-84b5-530042c8163e",
175 |    "metadata": {},
176 |    "outputs": [],
177 |    "source": [
178 |     "f = Fragment(smile=\"Cl[O+](C)(C)\", to_initialize=5)\n",
179 |     "len(f.conformers)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "id": "688e74c5-6400-4c27-8da5-a5d4eab3832d",
186 |    "metadata": {},
187 |    "outputs": [],
188 |    "source": [
189 |     "from autoadsorbate import docs_plot_conformers\n",
190 |     "\n",
191 |     "conformer_trajectory = f.conformers\n",
192 |     "fig = docs_plot_conformers(conformer_trajectory)"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "id": "048c6751-60ea-4692-a6c6-914e06e76a89",
198 |    "metadata": {},
199 |    "source": [
200 |     "Now we can use the marker atom to orient our molecule:"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": null,
206 |    "id": "9b6c6673-7b75-41ac-91ce-6d3f98e5540a",
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": [
210 |     "from autoadsorbate import docs_plot_sites\n",
211 |     "\n",
212 |     "oriented_conformer_trajectory = [f.get_conformer(i) for i, _ in enumerate(f.conformers)]\n",
213 |     "fig = docs_plot_conformers(oriented_conformer_trajectory)"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "id": "0fa177ac-1f08-4488-8715-18a449cb1859",
219 |    "metadata": {},
220 |    "source": [
221 |     "We can also easily remove the marker:"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": null,
227 |    "id": "08cd4bd3-85bf-4fd0-874f-55d3087f81b6",
228 |    "metadata": {},
229 |    "outputs": [],
230 |    "source": [
231 |     "clean_conformer_trajectory = [atoms[1:] for atoms in oriented_conformer_trajectory]\n",
232 |     "fig = docs_plot_conformers(clean_conformer_trajectory)"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "markdown",
237 |    "id": "dc13f155-13a0-437e-8a4e-f0ce7b9ab832",
238 |    "metadata": {},
239 |    "source": [
240 |     "#### Reactive species "
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "markdown",
245 |    "id": "7586f51e-ab4a-4535-a6f9-7d1434d3d2bf",
246 |    "metadata": {},
247 |    "source": [
248 |     "Methoxy"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": null,
254 |    "id": "adbae14c-f656-4659-b9cf-e2ad3e0d5188",
255 |    "metadata": {},
256 |    "outputs": [],
257 |    "source": [
258 |     "f = Fragment(smile=\"ClOC\", to_initialize=5)\n",
259 |     "oriented_conformer_trajectory = [f.get_conformer(i) for i, _ in enumerate(f.conformers)]\n",
260 |     "fig = docs_plot_conformers(oriented_conformer_trajectory)"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "markdown",
265 |    "id": "ee45e5bc-04f4-43a1-bf22-8364e45e5dfa",
266 |    "metadata": {},
267 |    "source": [
268 |     "##### Methyl"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": null,
274 |    "id": "f13b7559-9242-4f0a-995a-3c60e4051386",
275 |    "metadata": {},
276 |    "outputs": [],
277 |    "source": [
278 |     "f = Fragment(smile=\"ClC\", to_initialize=5)\n",
279 |     "oriented_conformer_trajectory = [f.get_conformer(i) for i, _ in enumerate(f.conformers)]\n",
280 |     "fig = docs_plot_conformers(oriented_conformer_trajectory)"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "markdown",
285 |    "id": "5dea02c1-8d39-46eb-8992-f3126fd70946",
286 |    "metadata": {},
287 |    "source": [
288 |     "##### Frangments with more than one binding mode (e.g. 1,2-PDO)"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "markdown",
293 |    "id": "ac40c5c3-da5b-46cc-bc5a-ff14b8bfc16c",
294 |    "metadata": {},
295 |    "source": [
296 |     "bound through single site:"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": null,
302 |    "id": "806d656e-48c3-4cba-afde-c9c6cb045a10",
303 |    "metadata": {},
304 |    "outputs": [],
305 |    "source": [
306 |     "f = Fragment(smile=\"Cl[OH+]CC(O)C\", to_initialize=5)\n",
307 |     "oriented_conformer_trajectory = [f.get_conformer(i) for i, _ in enumerate(f.conformers)]\n",
308 |     "fig = docs_plot_conformers(oriented_conformer_trajectory)"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "markdown",
313 |    "id": "2910a638-60bc-40be-8684-f62623cd89f2",
314 |    "metadata": {},
315 |    "source": [
316 |     "Coordinated withboth hydroxil:"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": null,
322 |    "id": "306e3fb4-e542-409c-8a7f-fdce06b27440",
323 |    "metadata": {},
324 |    "outputs": [],
325 |    "source": [
326 |     "f = Fragment(smile=\"S1S[OH+]CC([OH+]1)C\", to_initialize=5)\n",
327 |     "oriented_conformer_trajectory = [f.get_conformer(i) for i, _ in enumerate(f.conformers)]\n",
328 |     "fig = docs_plot_conformers(oriented_conformer_trajectory)"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "markdown",
333 |    "id": "6bcb273a-56e1-4969-ba19-a30753730388",
334 |    "metadata": {},
335 |    "source": [
336 |     "### Surface"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "markdown",
341 |    "id": "ad9b99ac-ee89-4142-b041-c41fb1860408",
342 |    "metadata": {},
343 |    "source": [
344 |     "First we need to have a slab (slab is an arrangement of atoms that contains the boundry between the material in question and other - i.e. gas, fluid, other material). We can read one (```ase.io.read('path_to_file')```) we prepared earlier, or we can use ase to construct a new slab:"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "code",
349 |    "execution_count": null,
350 |    "id": "13950877-b3a8-4ad6-b35e-15610a4e7b5b",
351 |    "metadata": {},
352 |    "outputs": [],
353 |    "source": [
354 |     "from ase.build import fcc111\n",
355 |     "\n",
356 |     "slab = fcc111(\"Cu\", (4, 4, 4), periodic=True, vacuum=10)"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "markdown",
361 |    "id": "685babf1-0388-4bd5-9273-d1fcafa7fc39",
362 |    "metadata": {},
363 |    "source": [
364 |     "Now we can initalize the Surface object which associates the constructed slab (ase.Atoms) with additional information required for placing Fragments.\n",
365 |     "We can view which atoms are in the surface:"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "code",
370 |    "execution_count": null,
371 |    "id": "6808f6f8-9a73-4534-96e8-742f0e5f0498",
372 |    "metadata": {},
373 |    "outputs": [],
374 |    "source": [
375 |     "s = Surface(slab)\n",
376 |     "plot_atoms(s.view_surface(return_atoms=True))"
377 |    ]
378 |   },
379 |   {
380 |    "cell_type": "markdown",
381 |    "id": "529f95c0-4c02-4023-bb37-b38083037b07",
382 |    "metadata": {},
383 |    "source": [
384 |     "We have access to all the sites info as a pandas dataframe:"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "code",
389 |    "execution_count": null,
390 |    "id": "45a4abf2-0012-4de2-bde0-208ce9f15124",
391 |    "metadata": {},
392 |    "outputs": [],
393 |    "source": [
394 |     "s.site_df.head()"
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "markdown",
399 |    "id": "99aee59b-38fe-45d4-84ca-67c5534bf213",
400 |    "metadata": {},
401 |    "source": [
402 |     "or in dict form:"
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "code",
407 |    "execution_count": null,
408 |    "id": "89541ad9-e859-447e-a69e-fdc1982dad67",
409 |    "metadata": {},
410 |    "outputs": [],
411 |    "source": [
412 |     "s.site_dict.keys()"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "markdown",
417 |    "id": "fae84996-b0a3-49db-87f8-d118669a1782",
418 |    "metadata": {},
419 |    "source": [
420 |     "One can easily get access to sites ase.Atoms and find useful information in the ase.Atoms.info:"
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "code",
425 |    "execution_count": null,
426 |    "id": "63771bd4-debf-4a8c-9b5c-9b15769138fd",
427 |    "metadata": {},
428 |    "outputs": [],
429 |    "source": [
430 |     "site_atoms = s.view_site(0, return_atoms=True)\n",
431 |     "site_atoms.info"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "code",
436 |    "execution_count": null,
437 |    "id": "0115fe50-0e06-4a2b-9391-12edca635e5c",
438 |    "metadata": {},
439 |    "outputs": [],
440 |    "source": [
441 |     "fig = docs_plot_sites(s)"
442 |    ]
443 |   },
444 |   {
445 |    "cell_type": "markdown",
446 |    "id": "3f43d66e-beee-46c4-9773-339835ec8565",
447 |    "metadata": {},
448 |    "source": [
449 |     "We can keep only the symmetry unique ones like this:"
450 |    ]
451 |   },
452 |   {
453 |    "cell_type": "code",
454 |    "execution_count": null,
455 |    "id": "0bd3ed03-b0df-4404-a9d1-7e36136d8984",
456 |    "metadata": {},
457 |    "outputs": [],
458 |    "source": [
459 |     "s.sym_reduce()\n",
460 |     "s.site_df"
461 |    ]
462 |   },
463 |   {
464 |    "cell_type": "code",
465 |    "execution_count": null,
466 |    "id": "ecfc8ed1-e632-46b8-ad85-e67d3df26db2",
467 |    "metadata": {},
468 |    "outputs": [],
469 |    "source": [
470 |     "plot_atoms(s.view_surface(return_atoms=True))"
471 |    ]
472 |   },
473 |   {
474 |    "cell_type": "markdown",
475 |    "id": "5ae799f7-e30c-44b8-a669-3cababcb3c8e",
476 |    "metadata": {},
477 |    "source": [
478 |     "## Making surrgate smiles automatically"
479 |    ]
480 |   },
481 |   {
482 |    "cell_type": "code",
483 |    "execution_count": null,
484 |    "id": "02b246fa-fc48-4c8c-9d6b-26b6e0086796",
485 |    "metadata": {},
486 |    "outputs": [],
487 |    "source": [
488 |     "from autoadsorbate import _example_config\n",
489 |     "\n",
490 |     "_example_config"
491 |    ]
492 |   },
493 |   {
494 |    "cell_type": "code",
495 |    "execution_count": null,
496 |    "id": "9a874270-94d6-4be3-bde2-30e90e746743",
497 |    "metadata": {},
498 |    "outputs": [],
499 |    "source": [
500 |     "from autoadsorbate import construct_smiles\n",
501 |     "\n",
502 |     "config = {\n",
503 |     "    \"backbone_info\": {\"C\": 0, \"O\": 0, \"N\": 2},\n",
504 |     "    \"allow_intramolec_rings\": True,\n",
505 |     "    \"ring_marker\": 2,\n",
506 |     "    \"side_chain\": [\"(\", \")\"],\n",
507 |     "    \"brackets\": [\"[\", \"]\", \"H+]\", \"H2+]\", \"H3+]\"],\n",
508 |     "    \"make_labeled\": True,\n",
509 |     "}\n",
510 |     "\n",
511 |     "smiles = construct_smiles(config)"
512 |    ]
513 |   },
514 |   {
515 |    "cell_type": "code",
516 |    "execution_count": null,
517 |    "id": "382a82eb-d4cb-4eb9-a03c-3f166c62ef00",
518 |    "metadata": {},
519 |    "outputs": [],
520 |    "source": [
521 |     "smiles"
522 |    ]
523 |   },
524 |   {
525 |    "cell_type": "code",
526 |    "execution_count": null,
527 |    "id": "1a0e9320-7c9c-49ef-bea3-cc3d7d6853f7",
528 |    "metadata": {},
529 |    "outputs": [],
530 |    "source": [
531 |     "from autoadsorbate import Fragment\n",
532 |     "\n",
533 |     "trj = []\n",
534 |     "for s in smiles:\n",
535 |     "    try:\n",
536 |     "        f = Fragment(s, to_initialize=1)\n",
537 |     "        a = f.get_conformer(0)\n",
538 |     "        trj.append(a)\n",
539 |     "    except:\n",
540 |     "        pass\n",
541 |     "\n",
542 |     "lst = [z for z in zip([a.get_chemical_formula() for a in trj], trj)]\n",
543 |     "lst.sort(key=lambda tup: tup[0])\n",
544 |     "trj = [a[1] for a in lst]\n",
545 |     "len(trj)"
546 |    ]
547 |   },
548 |   {
549 |    "cell_type": "code",
550 |    "execution_count": null,
551 |    "id": "a734dcdd-74ca-4aa8-b3d3-ff7595d3d00a",
552 |    "metadata": {},
553 |    "outputs": [],
554 |    "source": [
555 |     "from autoadsorbate import get_drop_snapped\n",
556 |     "\n",
557 |     "xtrj = get_drop_snapped(trj, d_cut=1.5)\n",
558 |     "len(xtrj)"
559 |    ]
560 |   },
561 |   {
562 |    "cell_type": "code",
563 |    "execution_count": null,
564 |    "id": "fe8e6b91-c450-4b0a-9479-98a5fc2167a4",
565 |    "metadata": {},
566 |    "outputs": [],
567 |    "source": [
568 |     "import matplotlib.pyplot as plt\n",
569 |     "from ase import Atoms\n",
570 |     "from ase.visualize.plot import plot_atoms\n",
571 |     "\n",
572 |     "fig, axs = plt.subplots(3, 11, figsize=[10, 5], dpi=100)\n",
573 |     "\n",
574 |     "for i, ax in enumerate(axs.flatten()):\n",
575 |     "    try:\n",
576 |     "        platoms = xtrj[i].copy()\n",
577 |     "\n",
578 |     "    except:\n",
579 |     "        platoms = Atoms(\"X\", positions=[[0, 0, 0]])\n",
580 |     "\n",
581 |     "    for atom in platoms:\n",
582 |     "        if atom.symbol in [\"Cl\", \"S\"]:\n",
583 |     "            atom.symbol = \"Ga\"\n",
584 |     "    plot_atoms(platoms, rotation=(\"-90x,0y,0z\"), ax=ax)\n",
585 |     "    ax.set_axis_off()\n",
586 |     "    ax.set_xlim(-1, 5)\n",
587 |     "    ax.set_ylim(-0.5, 5.5)\n",
588 |     "\n",
589 |     "fig.set_layout_engine(layout=\"tight\")"
590 |    ]
591 |   },
592 |   {
593 |    "cell_type": "markdown",
594 |    "id": "418ee1c3-7c01-4601-89dc-cfb8316e47bb",
595 |    "metadata": {},
596 |    "source": [
597 |     "## Fully automatic - populate Surface with Fragment"
598 |    ]
599 |   },
600 |   {
601 |    "cell_type": "code",
602 |    "execution_count": null,
603 |    "id": "decbf165-1a48-49a0-b16b-8c63e1d439d5",
604 |    "metadata": {},
605 |    "outputs": [],
606 |    "source": [
607 |     "from ase.build import fcc211\n",
608 |     "\n",
609 |     "from autoadsorbate import Fragment, Surface\n",
610 |     "\n",
611 |     "slab = fcc211(symbol=\"Cu\", size=(6, 3, 3), vacuum=10)\n",
612 |     "s = Surface(slab, touch_sphere_size=2.7)\n",
613 |     "s.sym_reduce()\n",
614 |     "\n",
615 |     "fragments = [\n",
616 |     "    Fragment(\"S1S[OH+]CC(N)[OH+]1\", to_initialize=20),\n",
617 |     "    Fragment(\"Cl[OH+]CC(=O)[OH+]\", to_initialize=5),\n",
618 |     "]\n",
619 |     "\n",
620 |     "out_trj = []\n",
621 |     "for fragment in fragments:\n",
622 |     "    out_trj += s.get_populated_sites(\n",
623 |     "        fragment,\n",
624 |     "        site_index=\"all\",\n",
625 |     "        sample_rotation=True,\n",
626 |     "        mode=\"heuristic\",\n",
627 |     "        conformers_per_site_cap=5,\n",
628 |     "        overlap_thr=1.6,\n",
629 |     "        verbose=True,\n",
630 |     "    )\n",
631 |     "    print(\"out_trj \", len(out_trj))"
632 |    ]
633 |   },
634 |   {
635 |    "cell_type": "code",
636 |    "execution_count": null,
637 |    "id": "48db2ef6-4216-4688-a2bb-f24a989a5ad3",
638 |    "metadata": {},
639 |    "outputs": [],
640 |    "source": []
641 |   }
642 |  ],
643 |  "metadata": {
644 |   "kernelspec": {
645 |    "display_name": ".venv",
646 |    "language": "python",
647 |    "name": "python3"
648 |   },
649 |   "language_info": {
650 |    "codemirror_mode": {
651 |     "name": "ipython",
652 |     "version": 3
653 |    },
654 |    "file_extension": ".py",
655 |    "mimetype": "text/x-python",
656 |    "name": "python",
657 |    "nbconvert_exporter": "python",
658 |    "pygments_lexer": "ipython3",
659 |    "version": "3.11.5"
660 |   }
661 |  },
662 |  "nbformat": 4,
663 |  "nbformat_minor": 5
664 | }
665 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Table of Contents
  2 | 
  3 | - [Installation](#installation)
  4 | - [AutoAdsorbate](#AutoAdsorbate)
  5 |   - [Fragment](#fragment)
  6 |     - [Molecules](#molecules)
  7 |     - [Reactive species](#reactive-species)
  8 |   - [Surface](#surface)
  9 | - [Making surrogate SMILES automatically](#making-surrogate-smiles-automatically)
 10 | - [Fully automatic - populate Surface with Fragment](#fully-automatic---populate-surface-with-fragment)
 11 | 
 12 | 
 13 | ## Installation
 14 | 
 15 | The package is designed to be as lightweight as possible, to implement seamlessly into existing environments with complex dependecies. If you `git clone <autoadsorbate>` and just `sys.path.insert(0, <path/to/autoadsorbate>)`, most likely it will work.
 16 | 
 17 | - Built on only:
 18 |   - `ase`
 19 |   - `rdkit`
 20 |   - Basic Python packages: `pandas`, `numpy`
 21 | 
 22 | The package is available on PyPi:
 23 | 
 24 | ```python
 25 | pip install autoadsorbate
 26 | ```
 27 | 
 28 | Installation from source:
 29 |  ```python
 30 | git clone <autoadsorbate>
 31 | cd autoadsorbate
 32 | pip install .
 33 | ```
 34 | 
 35 | ## AutoAdsorbate
 36 | 
 37 | AutoAdsorbate is a lightweight and easy-to-use Python package for generating chemically meaningful configurations of molecules and fragments on surfaces. Built with minimal dependencies and a low barrier to entry, it enables rapid setup of surface-adsorbate systems using the Surrogate-SMILES (*SMILES) representation. Ideal for researchers in catalysis, nanotech, and materials science, AutoAdsorbate streamlines dataset generation for simulations and machine learning workflows.
 38 | 
 39 | The challenge of generating initial structures for heterogeneous catalysis has traditionally been addressed through manual effort. This package offers an alternative, automated approach.
 40 | 
 41 | To effectively simulate reactive behavior at surfaces, it is crucial to establish clear definitions within our framework. The following definitions are essential for accurately characterizing the structures of interest:
 42 | 
 43 | - __Fragment__:
 44 |     - <font color='red'>Molecules</font> – species that retain their corresponding geometries __even when isolated from the surface__.
 45 |     - <font color='red'>Reactive species</font> – species that adopt their corresponding geometries __only when attached to the surface__.
 46 |     
 47 | - __Surface__:
 48 |     - The surface is defined simply – <font color='red'>every atom of the slab that can be in contact with an intermediate is considered a surface atom</font>. The surface is the collection of such atoms.
 49 |     - Every atom of the surface is a "top" site.
 50 |     - When two "top" sites are close (close in its literal meaning), they form a "bridge" site.
 51 |     - When three "top" sites are close (close in its literal meaning), they form a "3-fold" site.
 52 |     - etc.
 53 |     
 54 | - __Active Site__:
 55 |     - A collection of one or more sites that can facilitate a chemical transformation is called an active site.
 56 |     - A "top" site can be an active site only for Eley-Rideal transformations.
 57 |     - All other transformations require that at least one intermediate binds through at least two sites. All involved sites compose an active site.
 58 |     
 59 | - __Intermediate__:
 60 |     - Intermediates are fragments bound to an active site.
 61 | 
 62 | <!-- ### basic imports -->
 63 | 
 64 | ### Fragment
 65 | 
 66 | Molecules and reactive species are both initialized as the Fragment object (based on ase.Atoms). Some examples are given bellow.
 67 | 
 68 | #### Molecules
 69 | 
 70 | Before to follow this guide, you need to load the following packages:
 71 | ```python
 72 | import matplotlib.pyplot as plt 
 73 | from autoadsorbate import Fragment, Surface, docs_plot_conformers, get_marked_smiles, get_drop_snapped, docs_plot_sites, _example_config,  construct_smiles
 74 | from ase.visualize.plot import plot_atoms
 75 | from ase import Atoms
 76 | ```
 77 | 
 78 | 
 79 | Let us initialize a molecule of dimethyl ether (DME):
 80 | 
 81 | ```python
 82 | from autoadsorbate import Fragment
 83 | 
 84 | f = Fragment(smile = 'COC', to_initialize = 5)
 85 | ```
 86 | 
 87 | 
 88 | ```python
 89 | import matplotlib.pyplot as plt
 90 | from autoadsorbate import docs_plot_conformers
 91 | 
 92 | conformer_trajectory = f.conformers
 93 | fig = docs_plot_conformers(conformer_trajectory)
 94 | plt.show()
 95 | ```
 96 | 
 97 | 
 98 |     
 99 | ![png](README_files/README_10_0.png)
100 |     
101 | 
102 | Notice that the orientation of the fragment is arbitrary. While we could simply place these structures onto the surface of a material, it would be difficult to evaluate the quality of these initial random configurations. This uncertainty would force us to sample a large number of structures and run dynamic simulations to explore local minima and determine which configurations are the most stable.
103 | 
104 | However, in the case of DME, we can leverage chemical intuition to simplify the problem. The oxygen atom bridging the two methyl groups has two lone electron pairs. By using a simple trick—replacing one of these lone pairs with a marker atom (such as chlorine, Cl)—we can guide the placement more effectively.
105 | 
106 | 
107 | Notice that we had to make two adjustments to the SMILES string. To replace the lone pair with a marker atom, we must "trick" the valence of the oxygen atom and rearrange the SMILES formula so that the marker atom appears first (for easier bookkeeping).
108 |     - ```COC``` original
109 |     - ```CO(Cl)C``` add Cl instead of the O lone pair (this is an invalid SMILES)
110 |     - ```C[O+](Cl)C``` trick to make the valence work
111 |     - ```Cl[O+](C)C``` rearrange so that the SMILES string starts with the marker first (for easy book keeping)
112 | 
113 | This can be also done with a function:
114 | 
115 | 
116 | ```python
117 | from autoadsorbate import get_marked_smiles
118 | marked_smile = get_marked_smiles(['COC'])[0]
119 | marked_smile
120 | ```
121 |     'Cl[O+](C)(C)'
122 | 
123 | These surrogate smilles can now be used to initialize a Fragment object (we can set the number of randoms conformers to be initialized):
124 | 
125 | ```python
126 | f = Fragment(smile = 'Cl[O+](C)(C)', to_initialize = 5)
127 | len(f.conformers)
128 | ```
129 |     5
130 | 
131 | We can visualize these structures:
132 | ```python
133 | conformer_trajectory = f.conformers
134 | fig = docs_plot_conformers(conformer_trajectory)
135 | plt.show()
136 | ```
137 | 
138 | 
139 |     
140 | ![png](README_files/README_16_0.png)
141 |     
142 | 
143 | 
144 | Now we can use the marker atom to orient our molecule:
145 | 
146 | 
147 | ```python
148 | from autoadsorbate import docs_plot_sites
149 | 
150 | oriented_conformer_trajectory = [f.get_conformer(i) for i, _ in enumerate(f.conformers)]
151 | fig = docs_plot_conformers(oriented_conformer_trajectory)
152 | plt.show()
153 | ```
154 | 
155 | 
156 |     
157 | ![png](README_files/README_18_0.png)
158 |     
159 | 
160 | 
161 | We can also easily remove the marker:
162 | 
163 | 
164 | ```python
165 | clean_conformer_trajectory = [atoms[1:] for atoms in oriented_conformer_trajectory]
166 | fig = docs_plot_conformers(clean_conformer_trajectory)
167 | plt.show()
168 | ```
169 | 
170 | 
171 |     
172 | ![png](README_files/README_20_0.png)
173 |     
174 | 
175 | 
176 | #### Reactive species 
177 | 
178 | Methoxy
179 | 
180 | 
181 | ```python
182 | f = Fragment(smile = 'ClOC', to_initialize = 5)
183 | oriented_conformer_trajectory = [f.get_conformer(i) for i, _ in enumerate(f.conformers)]
184 | fig = docs_plot_conformers(oriented_conformer_trajectory)
185 | plt.show()
186 | ```
187 | 
188 | 
189 |     
190 | ![png](README_files/README_23_0.png)
191 |     
192 | 
193 | 
194 | ##### Methyl
195 | 
196 | 
197 | ```python
198 | f = Fragment(smile = 'ClC', to_initialize = 5)
199 | oriented_conformer_trajectory = [f.get_conformer(i) for i, _ in enumerate(f.conformers)]
200 | fig = docs_plot_conformers(oriented_conformer_trajectory)
201 | plt.show()
202 | ```
203 | 
204 | 
205 |     
206 | ![png](README_files/README_25_0.png)
207 |     
208 | 
209 | 
210 | ##### Frangments with more than one binding mode (e.g. 1,2-PDO)
211 | 
212 | bound through single site:
213 | 
214 | 
215 | ```python
216 | f = Fragment(smile = 'Cl[OH+]CC(O)C', to_initialize = 5)
217 | oriented_conformer_trajectory = [f.get_conformer(i) for i, _ in enumerate(f.conformers)]
218 | fig = docs_plot_conformers(oriented_conformer_trajectory)
219 | plt.show()
220 | ```
221 | 
222 | 
223 |     
224 | ![png](README_files/README_28_0.png)
225 |     
226 | 
227 | 
228 | Coordinated withboth hydroxil:
229 | 
230 | 
231 | ```python
232 | f = Fragment(smile = 'S1S[OH+]CC([OH+]1)C', to_initialize = 5)
233 | oriented_conformer_trajectory = [f.get_conformer(i) for i, _ in enumerate(f.conformers)]
234 | fig = docs_plot_conformers(oriented_conformer_trajectory)
235 | plt.show()
236 | ```
237 | 
238 | 
239 |     
240 | ![png](README_files/README_30_0.png)
241 |     
242 | 
243 | 
244 | ### Surface
245 | 
246 | Defining the surface of a slab may seem like a simple task, but different approaches can yield varying results depending on the context. When considering catalytic sites, we can define these as surface regions capable of binding a fragment. By using reasonable steric criteria—essentially asking, "Is there enough space for a molecule to bind to that site?"—we can identify all possible binding sites on the slab's surface. These sites can be classified as top, bridge, or multi-fold, depending on how many atoms surround the site.
247 | 
248 | As an example: First, we need to define a slab (any ```ase.Atoms``` object). A slab is an arrangement of atoms that represents the boundary between a material and another phase, such as gas, fluid, or another material. We can either read an existing slab, or a new slab:
249 | 
250 | 
251 | ```python
252 | from ase.build import fcc111
253 | slab = fcc111('Cu', (4,4,4), periodic=True, vacuum=10)
254 | ```
255 | 
256 | Now we can initalize the Surface object which associates the constructed slab (ase.Atoms) with additional information required for placing Fragments.
257 | We can view which atoms are in the surface:
258 | 
259 | 
260 | ```python
261 | s = Surface(slab)
262 | plot_atoms(s.view_surface(return_atoms=True))
263 | ```
264 | 
265 |     Visualizing surface Cu atoms as Zn
266 | 
267 |     
268 | ![png](README_files/README_35_2.png)
269 |     
270 | 
271 | 
272 | We have access to all the sites info as a pandas dataframe:
273 | 
274 | 
275 | ```python
276 | s.site_df.head()
277 | ```
278 | 
279 | <div>
280 | <style scoped>
281 |     .dataframe tbody tr th:only-of-type {
282 |         vertical-align: middle;
283 |     }
284 | 
285 |     .dataframe tbody tr th {
286 |         vertical-align: top;
287 |     }
288 | 
289 |     .dataframe thead th {
290 |         text-align: right;
291 |     }
292 | </style>
293 | <table border="1" class="dataframe">
294 |   <thead>
295 |     <tr style="text-align: right;">
296 |       <th></th>
297 |       <th>coordinates</th>
298 |       <th>connectivity</th>
299 |       <th>topology</th>
300 |       <th>n_vector</th>
301 |       <th>h_vector</th>
302 |       <th>site_formula</th>
303 |     </tr>
304 |   </thead>
305 |   <tbody>
306 |     <tr>
307 |       <th>0</th>
308 |       <td>[0.0, 0.0, 16.252703415323644]</td>
309 |       <td>1</td>
310 |       <td>[48]</td>
311 |       <td>[-0.004670396521231514, -0.0031449903964026822...</td>
312 |       <td>[1.0, 0.0, 0.0]</td>
313 |       <td>{'Cu': 1}</td>
314 |     </tr>
315 |     <tr>
316 |       <th>1</th>
317 |       <td>[0.6381638700208592, 1.105332246430909, 16.252...</td>
318 |       <td>2</td>
319 |       <td>[48, 52]</td>
320 |       <td>[0.0006776311857337964, -0.010516809475472271,...</td>
321 |       <td>[-0.5000000000000001, -0.8660254037844387, 0.0]</td>
322 |       <td>{'Cu': 2}</td>
323 |     </tr>
324 |     <tr>
325 |       <th>2</th>
326 |       <td>[1.2763277400417168, 5.162938145598479e-16, 16...</td>
327 |       <td>2</td>
328 |       <td>[48, 49]</td>
329 |       <td>[-0.011576660085263627, -0.017987208564805915,...</td>
330 |       <td>[-1.0, 0.0, 0.0]</td>
331 |       <td>{'Cu': 2}</td>
332 |     </tr>
333 |     <tr>
334 |       <th>3</th>
335 |       <td>[1.2763277400417183, 0.7368881642872727, 16.25...</td>
336 |       <td>3</td>
337 |       <td>[48, 49, 52]</td>
338 |       <td>[-0.01272989568588465, 0.0042077202541598024, ...</td>
339 |       <td>[-0.5000000000000001, -0.8660254037844387, 0.0]</td>
340 |       <td>{'Cu': 3}</td>
341 |     </tr>
342 |     <tr>
343 |       <th>4</th>
344 |       <td>[1.2763277400417183, 2.210664492861818, 16.252...</td>
345 |       <td>1</td>
346 |       <td>[52]</td>
347 |       <td>[0.0013334161774154326, -0.007734740595549886,...</td>
348 |       <td>[1.0, 0.0, 0.0]</td>
349 |       <td>{'Cu': 1}</td>
350 |     </tr>
351 |   </tbody>
352 | </table>
353 | </div>
354 | 
355 | 
356 | 
357 | or in dict form:
358 | 
359 | 
360 | ```python
361 | s.site_dict.keys()
362 | ```
363 | 
364 | 
365 | 
366 | 
367 |     dict_keys(['coordinates', 'connectivity', 'topology', 'n_vector', 'h_vector', 'site_formula'])
368 | 
369 | 
370 | 
371 | One can easily get access to sites as ```ase.Atoms``` as well, and find useful information in the ```ase.Atoms.info```:
372 | 
373 | 
374 | ```python
375 | site_atoms = s.view_site(0, return_atoms=True)
376 | site_atoms.info
377 | ```
378 | 
379 | 
380 | 
381 | 
382 |     {'coordinates': array([ 0.        ,  0.        , 16.25270342]),
383 |      'connectivity': 1,
384 |      'topology': [48],
385 |      'n_vector': array([-0.0046704 , -0.00314499,  0.99998415]),
386 |      'h_vector': array([1., 0., 0.]),
387 |      'site_formula': {'Cu': 1}}
388 | 
389 | 
390 | We can visualize a few surface sites:
391 | 
392 | ```python
393 | from autoadsorbate import docs_plot_sites
394 | fig = docs_plot_sites(s)
395 | plt.show()
396 | ```
397 | 
398 | 
399 |     
400 | ![png](README_files/README_42_0.png)
401 |     
402 | 
403 | 
404 | We can reduce the complete list of sites based on symmetry (```ase.utils.structure_comparator.SymmetryEquivalenceCheck```):
405 | 
406 | 
407 | ```python
408 | s.sym_reduce()
409 | s.site_df
410 | ```
411 | 
412 | 
413 | 
414 | 
415 | <div>
416 | <style scoped>
417 |     .dataframe tbody tr th:only-of-type {
418 |         vertical-align: middle;
419 |     }
420 | 
421 |     .dataframe tbody tr th {
422 |         vertical-align: top;
423 |     }
424 | 
425 |     .dataframe thead th {
426 |         text-align: right;
427 |     }
428 | </style>
429 | <table border="1" class="dataframe">
430 |   <thead>
431 |     <tr style="text-align: right;">
432 |       <th></th>
433 |       <th>coordinates</th>
434 |       <th>connectivity</th>
435 |       <th>topology</th>
436 |       <th>n_vector</th>
437 |       <th>h_vector</th>
438 |       <th>site_formula</th>
439 |     </tr>
440 |   </thead>
441 |   <tbody>
442 |     <tr>
443 |       <th>0</th>
444 |       <td>[0.0, 0.0, 16.252703415323644]</td>
445 |       <td>1</td>
446 |       <td>[48]</td>
447 |       <td>[-0.004670396521231514, -0.0031449903964026822...</td>
448 |       <td>[1.0, 0.0, 0.0]</td>
449 |       <td>{'Cu': 1}</td>
450 |     </tr>
451 |     <tr>
452 |       <th>1</th>
453 |       <td>[0.6381638700208592, 1.105332246430909, 16.252...</td>
454 |       <td>2</td>
455 |       <td>[48, 52]</td>
456 |       <td>[0.0006776311857337964, -0.010516809475472271,...</td>
457 |       <td>[-0.5000000000000001, -0.8660254037844387, 0.0]</td>
458 |       <td>{'Cu': 2}</td>
459 |     </tr>
460 |     <tr>
461 |       <th>3</th>
462 |       <td>[1.2763277400417183, 0.7368881642872727, 16.25...</td>
463 |       <td>3</td>
464 |       <td>[48, 49, 52]</td>
465 |       <td>[-0.01272989568588465, 0.0042077202541598024, ...</td>
466 |       <td>[-0.5000000000000001, -0.8660254037844387, 0.0]</td>
467 |       <td>{'Cu': 3}</td>
468 |     </tr>
469 |     <tr>
470 |       <th>8</th>
471 |       <td>[2.552655480083436, 1.4737763285745453, 16.252...</td>
472 |       <td>3</td>
473 |       <td>[49, 52, 53]</td>
474 |       <td>[-0.0011596349368944389, -0.001445905668587753...</td>
475 |       <td>[0.5000000000000002, -0.8660254037844385, 0.0]</td>
476 |       <td>{'Cu': 3}</td>
477 |     </tr>
478 |   </tbody>
479 | </table>
480 | </div>
481 | 
482 | 
483 | We can again visualize the sites:
484 | 
485 | ```python
486 | plot_atoms(s.view_surface(return_atoms=True))
487 | ```
488 | 
489 |     Visualizing surface Cu atoms as Zn
490 | 
491 | 
492 | 
493 | 
494 | 
495 | ![png](README_files/README_45_2.png)
496 |     
497 | 
498 | 
499 | ## Making surogate SMILES automatically
500 | 
501 | Simple methods of brute force SMILES enumeration are implemented as well. For example, only using a few lines of code we can initialize multiple conformers of all reaction intermediates in the nitrogen hydrogenation reaction. A template of the required information can be found here:
502 | 
503 | ```python
504 | from autoadsorbate import _example_config
505 | _example_config
506 | ```
507 | 
508 | 
509 | 
510 | 
511 |     {'backbone_info': {'C': 1, 'N': 0, 'O': 2},
512 |      'allow_intramolec_rings': True,
513 |      'ring_marker': 2,
514 |      'side_chain': ['(', ')'],
515 |      'brackets': ['[', ']', 'H2]', 'H3]', 'H-]', 'H+]'],
516 |      'make_labeled': True}
517 | 
518 | 
519 | Now we can use (or edit) this information as we see fit:
520 | 
521 | ```python
522 | from autoadsorbate import construct_smiles
523 |  
524 | config = {
525 | 'backbone_info': {'C': 0, 'O': 0, 'N':2},
526 | 'allow_intramolec_rings': True,
527 | 'ring_marker': 2,
528 | 'side_chain': ['(', ')'],
529 | 'brackets': ['[', ']', 'H+]', 'H2+]', 'H3+]'],
530 | 'make_labeled': True
531 | }
532 | 
533 | smiles = construct_smiles(config)
534 | ```
535 | We now have a list of surrgate SMILES that can be used to initalize Fragment objects.
536 | 
537 | 
538 | ```python
539 | smiles
540 | ```
541 | 
542 | 
543 | 
544 | 
545 |     ['ClNN',
546 |      'Cl[N]N',
547 |      'Cl[NH+]N',
548 |      'Cl[NH2+]N',
549 |      'ClN[N]',
550 |      'ClN[NH+]',
551 |      'ClN[NH2+]',
552 |      'ClN[NH3+]',
553 |      'Cl[N][N]',
554 |      'Cl[N][NH+]',
555 |      'Cl[N][NH2+]',
556 |      'Cl[N][NH3+]',
557 |      'Cl[NH+][N]',
558 |      'Cl[NH+][NH+]',
559 |      'Cl[NH+][NH2+]',
560 |      'Cl[NH+][NH3+]',
561 |      'Cl[NH2+][N]',
562 |      'Cl[NH2+][NH+]',
563 |      'Cl[NH2+][NH2+]',
564 |      'Cl[NH2+][NH3+]',
565 |      'S1SN1N',
566 |      'S1SNN1',
567 |      'S1S[N]N1',
568 |      'S1S[NH+]1N',
569 |      'S1S[NH+]N1',
570 |      'S1S[NH2+]N1',
571 |      'S1SN1[N]',
572 |      'S1SN1[NH+]',
573 |      'S1SN1[NH2+]',
574 |      'S1SN1[NH3+]',
575 |      'S1S[N][N]1',
576 |      'S1S[N][NH+]1',
577 |      'S1S[N][NH2+]1',
578 |      'S1S[NH+]1[N]',
579 |      'S1S[NH+]1[NH+]',
580 |      'S1S[NH+][NH+]1',
581 |      'S1S[NH+]1[NH2+]',
582 |      'S1S[NH+][NH2+]1',
583 |      'S1S[NH+]1[NH3+]',
584 |      'S1S[NH2+][NH2+]1',
585 |      'ClN=N',
586 |      'Cl[NH+]=N',
587 |      'ClN=[N]',
588 |      'ClN=[NH+]',
589 |      'ClN=[NH2+]',
590 |      'Cl[NH+]=[N]',
591 |      'Cl[NH+]=[NH+]',
592 |      'Cl[NH+]=[NH2+]',
593 |      'S1SN=N1',
594 |      'S1S[NH+]=N1',
595 |      'S1S[NH+]=[NH+]1',
596 |      'S1SN1#N']
597 | 
598 | 
599 | 
600 | 
601 | ```python
602 | from autoadsorbate import Fragment
603 |  
604 | trj = []
605 | for s in smiles:
606 |     try:
607 |         f = Fragment(s, to_initialize=1)
608 |         a = f.get_conformer(0)
609 |         trj.append(a)
610 |     except:
611 |         pass
612 |  
613 | lst = [z for z in zip([a.get_chemical_formula() for a in trj],trj)]
614 | lst.sort(key=lambda tup: tup[0])
615 | trj =  [a[1] for a in lst]
616 | len(trj)
617 | ```
618 | 
619 | 
620 | 
621 | 
622 |     52
623 | 
624 | From the list of initialized conformers we can remove the ones that are effectively identical:
625 | 
626 | 
627 | ```python
628 | from autoadsorbate import get_drop_snapped 
629 |  
630 | xtrj = get_drop_snapped(trj, d_cut=1.5)
631 | len(xtrj)
632 | ```
633 | 
634 |     33
635 | 
636 | We can visualize these structures:
637 | 
638 | 
639 | ```python
640 | import matplotlib.pyplot as plt
641 | from ase.visualize.plot import plot_atoms
642 | from ase import Atoms
643 |  
644 | fig, axs = plt.subplots(3,11, figsize=[10,5], dpi=100)
645 |  
646 | for i, ax in enumerate(axs.flatten()):
647 |     try:
648 |         platoms = xtrj[i].copy()
649 |          
650 |     except:
651 |         platoms = Atoms('X', positions = [[0,0,0]])
652 |  
653 |     for atom in platoms:
654 |         if atom.symbol in ['Cl', 'S']:
655 |             atom.symbol = 'Ga'
656 |     plot_atoms(platoms, rotation=('-90x,0y,0z'), ax=ax)
657 |     ax.set_axis_off()
658 |     ax.set_xlim(-1, 5)
659 |     ax.set_ylim(-0.5, 5.5)
660 |  
661 | fig.set_layout_engine(layout='tight')
662 | plt.show()
663 | ```
664 | 
665 | 
666 |     
667 | ![png](README_files/README_52_0.png)
668 |     
669 | 
670 | 
671 | ## Fully automatic - populate Surface with Fragment
672 | 
673 | A autonomous mode of Fragment placement on Surface is also implemented. The method tries to minimze the overlap of the Fragment and Surface while keeping the requested connectivity to the surface.
674 | 
675 | ```python
676 | from ase.build import fcc211
677 | from autoadsorbate import Surface, Fragment
678 | 
679 | slab = fcc211(symbol = 'Cu', size=(6,3,3), vacuum=10)  # any ase.Atoms object
680 | s=Surface(slab, touch_sphere_size=2.7)                 # finding all surface atoms
681 | s.sym_reduce()                                         # keeping only non-identical sites
682 | 
683 | fragments = [
684 |     Fragment('S1S[OH+]CC(N)[OH+]1', to_initialize=20), # For each *SMILES we can request a differnet number of conformers 
685 |     Fragment('Cl[OH+]CC(=O)[OH+]', to_initialize=5)    # based on how much conformational complexity we expect.
686 | ]
687 | 
688 | out_trj = []
689 | for  fragment in fragments:
690 |     out_trj += s.get_populated_sites(
691 |       fragment,                    # Fragment object
692 |       site_index='all',            # a single site can be provided here
693 |       sample_rotation=True,        # rotate the Fragment around the surface-fragment bond?
694 |       mode='heuristic',            # 'all' or 'heuristic', if heuristic surrogate smiles with 'Cl...' will be matched with top sites, etc. 
695 |       conformers_per_site_cap=5,   # max number of conformers to sample
696 |       overlap_thr=1.6,             # tolerated bond overlap betwen the surface and fragment      
697 |       verbose=True
698 |       )
699 |     print('out_trj ', len(out_trj))
700 | ```
701 | 
702 |     conformers 40
703 |     sites 9
704 |     SUCCESS! Found the requested numer of conformers with condition: ovelap_thr = 1.6. Found 5 / 5.
705 |     WARNING: Failed to find requested number of conformers with condition: ovelap_thr = 1.6. Found 0 / 5. Consider setting a higher Fragment(to_initialize = < N >)
706 |     WARNING: Failed to find requested number of conformers with condition: ovelap_thr = 1.6. Found 1 / 5. Consider setting a higher Fragment(to_initialize = < N >)
707 |     SUCCESS! Found the requested numer of conformers with condition: ovelap_thr = 1.6. Found 5 / 5.
708 |     SUCCESS! Found the requested numer of conformers with condition: ovelap_thr = 1.6. Found 5 / 5.
709 |     SUCCESS! Found the requested numer of conformers with condition: ovelap_thr = 1.6. Found 5 / 5.
710 |     SUCCESS! Found the requested numer of conformers with condition: ovelap_thr = 1.6. Found 5 / 5.
711 |     WARNING: Failed to find requested number of conformers with condition: ovelap_thr = 1.6. Found 3 / 5. Consider setting a higher Fragment(to_initialize = < N >)
712 |     WARNING: Failed to find requested number of conformers with condition: ovelap_thr = 1.6. Found 0 / 5. Consider setting a higher Fragment(to_initialize = < N >)
713 |     out_trj  29
714 |     conformers 40
715 |     sites 3
716 |     SUCCESS! Found the requested numer of conformers with condition: ovelap_thr = 1.6. Found 5 / 5.
717 |     SUCCESS! Found the requested numer of conformers with condition: ovelap_thr = 1.6. Found 5 / 5.
718 |     SUCCESS! Found the requested numer of conformers with condition: ovelap_thr = 1.6. Found 5 / 5.
719 |     out_trj  44
720 | 
721 | You can visualize a onfiguration in ASE with:
722 | 
723 | ```python
724 | from ase.visualize import view
725 | view(out_trj[0])
726 | ```
727 | 


--------------------------------------------------------------------------------
/README_files/README_10_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basf/autoadsorbate/626bd10da59bb091559115986b95c873abaf9a74/README_files/README_10_0.png


--------------------------------------------------------------------------------
/README_files/README_16_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basf/autoadsorbate/626bd10da59bb091559115986b95c873abaf9a74/README_files/README_16_0.png


--------------------------------------------------------------------------------
/README_files/README_18_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basf/autoadsorbate/626bd10da59bb091559115986b95c873abaf9a74/README_files/README_18_0.png


--------------------------------------------------------------------------------
/README_files/README_20_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basf/autoadsorbate/626bd10da59bb091559115986b95c873abaf9a74/README_files/README_20_0.png


--------------------------------------------------------------------------------
/README_files/README_23_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basf/autoadsorbate/626bd10da59bb091559115986b95c873abaf9a74/README_files/README_23_0.png


--------------------------------------------------------------------------------
/README_files/README_25_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basf/autoadsorbate/626bd10da59bb091559115986b95c873abaf9a74/README_files/README_25_0.png


--------------------------------------------------------------------------------
/README_files/README_28_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basf/autoadsorbate/626bd10da59bb091559115986b95c873abaf9a74/README_files/README_28_0.png


--------------------------------------------------------------------------------
/README_files/README_30_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basf/autoadsorbate/626bd10da59bb091559115986b95c873abaf9a74/README_files/README_30_0.png


--------------------------------------------------------------------------------
/README_files/README_35_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basf/autoadsorbate/626bd10da59bb091559115986b95c873abaf9a74/README_files/README_35_2.png


--------------------------------------------------------------------------------
/README_files/README_42_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basf/autoadsorbate/626bd10da59bb091559115986b95c873abaf9a74/README_files/README_42_0.png


--------------------------------------------------------------------------------
/README_files/README_43_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basf/autoadsorbate/626bd10da59bb091559115986b95c873abaf9a74/README_files/README_43_0.png


--------------------------------------------------------------------------------
/README_files/README_45_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basf/autoadsorbate/626bd10da59bb091559115986b95c873abaf9a74/README_files/README_45_2.png


--------------------------------------------------------------------------------
/README_files/README_46_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basf/autoadsorbate/626bd10da59bb091559115986b95c873abaf9a74/README_files/README_46_2.png


--------------------------------------------------------------------------------
/README_files/README_52_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basf/autoadsorbate/626bd10da59bb091559115986b95c873abaf9a74/README_files/README_52_0.png


--------------------------------------------------------------------------------
/autoadsorbate/Neb.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | from typing import Union
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | from ase import Atoms
  7 | from ase.neb import NEB
  8 | 
  9 | 
 10 | def permute_image(atoms, fix_species=None):
 11 |     """
 12 |     Permutes the positions of atoms in the given atomic structure, optionally fixing the positions of specified species.
 13 | 
 14 |     Parameters:
 15 |     atoms (object): An ASE atoms object representing the atomic structure.
 16 |     fix_species (list or None): A list of atomic numbers representing species whose positions should not be permuted. Default is None.
 17 | 
 18 |     Returns:
 19 |     list: A list of ASE atoms objects, each representing a unique permutation of the atomic structure.
 20 |     """
 21 |     ind_dict = {}
 22 | 
 23 |     atoms_numbers = list(set(atoms.arrays["numbers"]))
 24 |     atoms_numbers.sort()
 25 | 
 26 |     perturb_numbers = atoms_numbers.copy()
 27 |     if fix_species != None:
 28 |         for n in fix_species:
 29 |             perturb_numbers.remove(n)
 30 |             ind_dict[n] = [[atom.index for atom in atoms if atom.number == n]]
 31 | 
 32 |     for n in perturb_numbers:
 33 |         ind_dict[n] = []
 34 | 
 35 |         inds = [atom.index for atom in atoms if atom.number == n]
 36 | 
 37 |         for perm in [p for p in itertools.permutations(inds)]:
 38 |             ind_dict[n].append(list(perm))
 39 | 
 40 |     arrays = [v for _, v in ind_dict.items()]
 41 | 
 42 |     all_perm_inds = []
 43 | 
 44 |     for i in itertools.product(*arrays):
 45 |         l = [item for sublist in i for item in sublist]
 46 |         all_perm_inds.append(l)
 47 | 
 48 |     perm_ini_traj = []
 49 |     for perm in all_perm_inds:
 50 |         a = atoms.copy()
 51 |         a = a[perm]
 52 |         perm_ini_traj.append(a)
 53 | 
 54 |     return perm_ini_traj
 55 | 
 56 | 
 57 | def get_connectivity(atoms, bond_range=[0.0, 2]):
 58 |     """
 59 |     Computes the connectivity matrix for the given atomic structure based on the specified bond range.
 60 | 
 61 |     Parameters:
 62 |     atoms (object): An ASE atoms object representing the atomic structure.
 63 |     bond_range (list): A list containing the minimum and maximum bond distances to consider. Default is [0., 2.].
 64 | 
 65 |     Returns:
 66 |     numpy.ndarray: A 2D array representing the connectivity matrix, where each element indicates the presence of a bond.
 67 |     """
 68 |     distm = atoms.get_all_distances()
 69 |     distm = np.triu(distm, k=0)
 70 | 
 71 |     bondm = np.logical_and(distm > bond_range[0], distm < bond_range[1]) * 1
 72 |     distm = distm * bondm
 73 | 
 74 |     return distm
 75 | 
 76 | 
 77 | def get_neb_images(ini, fin, images_no=10, method="linear"):
 78 |     """
 79 |     Generates a series of images for the Nudged Elastic Band (NEB) method by interpolating between initial and final structures.
 80 | 
 81 |     Parameters:
 82 |     ini (object): An ASE atoms object representing the initial structure.
 83 |     fin (object): An ASE atoms object representing the final structure.
 84 |     images_no (int): The number of intermediate images to generate. Default is 10.
 85 |     method (str): The interpolation method to use. Default is 'linear'.
 86 | 
 87 |     Returns:
 88 |     list: A list of ASE atoms objects representing the interpolated images.
 89 |     """
 90 |     traj = [ini.copy() for _ in range(images_no)] + [fin.copy()]
 91 |     neb = NEB(traj)
 92 |     neb.interpolate(method=method)
 93 | 
 94 |     return neb.images
 95 | 
 96 | 
 97 | def get_neb_norm(neb_images):
 98 |     """
 99 |     Computes the NEB (Nudged Elastic Band) norm, which is the minimum distance between atoms across all NEB images.
100 | 
101 |     Parameters:
102 |     neb_images (list): A list of ASE atoms objects representing the NEB images.
103 | 
104 |     Returns:
105 |     float: The minimum distance between atoms across all NEB images.
106 |     """
107 |     im_dist = []
108 |     for i, im in enumerate(neb_images):
109 |         d = im.get_all_distances()
110 |         d = d[d > 0]
111 |         im_dist.append(np.min(d))
112 | 
113 |     neb_norm = np.min(im_dist)
114 |     return neb_norm
115 | 
116 | 
117 | def get_neb_dists(neb_images):
118 |     """
119 |     Computes the minimum distances between atoms for each NEB (Nudged Elastic Band) image.
120 | 
121 |     Parameters:
122 |     neb_images (list): A list of ASE atoms objects representing the NEB images.
123 | 
124 |     Returns:
125 |     tuple: A tuple containing two lists:
126 |         - inds (list): The indices of the NEB images.
127 |         - im_dist (list): The minimum distances between atoms for each NEB image.
128 |     """
129 |     im_dist = []
130 |     inds = []
131 |     for i, im in enumerate(neb_images):
132 |         d = im.get_all_distances()
133 |         d = d[d > 0]
134 |         im_dist.append(np.min(d))
135 |         inds.append(i)
136 | 
137 |     return inds, im_dist
138 | 
139 | 
140 | def get_distm(ini, fin):
141 |     """
142 |     Computes the absolute difference in connectivity matrices between the initial and final atomic structures.
143 | 
144 |     Parameters:
145 |     ini (object): An ASE atoms object representing the initial structure.
146 |     fin (object): An ASE atoms object representing the final structure.
147 | 
148 |     Returns:
149 |     numpy.ndarray: A 2D array representing the absolute difference in connectivity matrices between the initial and final structures.
150 |     """
151 |     distm_fin = get_connectivity(fin)
152 |     distm_ini = get_connectivity(ini)
153 |     distm = distm_fin - distm_ini
154 | 
155 |     distm = np.abs(distm)
156 | 
157 |     return distm
158 | 
159 | 
160 | def get_distm_sum(ini, fin, unit="A", tolerance=0.5):
161 |     """
162 |     Computes the sum of the absolute differences in connectivity matrices between the initial and final atomic structures.
163 | 
164 |     Parameters:
165 |     ini (object): An ASE atoms object representing the initial structure.
166 |     fin (object): An ASE atoms object representing the final structure.
167 |     unit (str): The unit of measurement for the differences. Can be 'A' for angstroms or 'bonds' for covalent bonds. Default is 'A'.
168 |     tolerance (float): The tolerance value for considering a difference as significant. Default is 0.5.
169 | 
170 |     Returns:
171 |     float: The sum of the absolute differences in connectivity matrices, considering the specified unit and tolerance.
172 | 
173 |     Raises:
174 |     ValueError: If the unit is not 'A' or 'bonds'.
175 |     """
176 |     distm = get_distm(ini, fin)
177 |     distm = np.abs(distm)
178 | 
179 |     if unit == "bonds":
180 |         distm = distm > tolerance
181 |         return np.sum(distm)
182 | 
183 |     elif unit == "A":
184 |         distm = distm * (distm > tolerance)
185 |         return np.sum(distm)
186 | 
187 |     else:
188 |         raise ValueError(
189 |             "Value of variable 'unit' can be 'A' - angstrom or 'bonds' - covalent bond"
190 |         )
191 | 
192 | 
193 | def plot_distm(ini, fin):
194 |     """
195 |     Plots the connectivity matrices and their differences for the initial and final atomic structures, along with the NEB distances.
196 | 
197 |     Parameters:
198 |     ini (object): An ASE atoms object representing the initial structure.
199 |     fin (object): An ASE atoms object representing the final structure.
200 | 
201 |     Returns:
202 |     None
203 |     """
204 |     import seaborn as sns
205 | 
206 |     f, (ax1, ax2, ax3, ax4) = plt.subplots(
207 |         1, 4, figsize=(12, 2.3)
208 |     )  # , gridspec_kw = {'width_ratios': [1, 3]})
209 | 
210 |     distm_fin = Neb.get_connectivity(fin)
211 |     distm_ini = Neb.get_connectivity(ini)
212 |     distm = distm_fin - distm_ini
213 | 
214 |     neb_images = get_neb_images(ini, fin)
215 |     x, y = get_neb_dists(neb_images)
216 | 
217 |     sns.heatmap(distm_ini, linewidth=0.5, ax=ax1)
218 |     sns.heatmap(distm_fin, linewidth=0.5, ax=ax2)
219 |     sns.heatmap(np.abs(distm), linewidth=0.5, ax=ax3)
220 |     sns.scatterplot(x=x, y=y, ax=ax4)
221 |     f.tight_layout()
222 | 
223 | 
224 | def arrange_backbone(ini, fin, bond_len_tolerance=0.5):
225 |     """
226 |     Arranges the backbone of the initial and final atomic structures by permuting the initial structure to best match the final structure.
227 | 
228 |     Parameters:
229 |     ini (object): An ASE atoms object representing the initial structure.
230 |     fin (object): An ASE atoms object representing the final structure.
231 |     bond_len_tolerance (float): The tolerance value for considering bond length differences as significant. Default is 0.5.
232 | 
233 |     Returns:
234 |     tuple: A tuple containing two ASE atoms objects:
235 |         - best_ini (object): The permuted initial structure that best matches the final structure.
236 |         - best_fin (object): The final structure.
237 |     """
238 |     ini = ini[ini.numbers.argsort()]
239 |     fin = fin[fin.numbers.argsort()]
240 | 
241 |     h = ini.copy()
242 |     h = h[[atom.index for atom in h if atom.symbol == "H"]]
243 | 
244 |     h_fin = fin.copy()
245 |     h_fin = h_fin[[atom.index for atom in h_fin if atom.symbol == "H"]]
246 | 
247 |     b_ini = ini.copy()
248 |     b_ini = b_ini[[atom.index for atom in b_ini if atom.symbol != "H"]]
249 | 
250 |     b_fin = fin.copy()
251 |     b_fin = b_fin[[atom.index for atom in b_fin if atom.symbol != "H"]]
252 | 
253 |     b_ini_perms = permute_image(b_ini)
254 | 
255 |     best_ini_perm = get_best_perm(
256 |         b_ini_perms, b_fin, bond_len_tolerance=bond_len_tolerance
257 |     )
258 | 
259 |     if fin[0].symbol == "H":
260 |         best_fin = h_fin.copy()
261 |         best_fin += b_fin
262 | 
263 |         best_ini = h.copy()
264 |         best_ini += best_ini_perm
265 |     else:
266 |         best_fin = b_fin.copy()
267 |         best_fin += h_fin
268 | 
269 |         best_ini = best_ini_perm.copy()
270 |         best_ini += h
271 | 
272 |     # print('best_ini: ', best_ini)
273 |     # print('best_fin: ', best_fin)
274 | 
275 |     return best_ini, best_fin
276 | 
277 | 
278 | def get_sorted(atoms):
279 |     """
280 |     Sorts the atoms in the given atomic structure by their atomic numbers.
281 | 
282 |     Parameters:
283 |     atoms (object): An ASE atoms object representing the atomic structure.
284 | 
285 |     Returns:
286 |     object: An ASE atoms object with atoms sorted by their atomic numbers.
287 |     """
288 |     atoms_numbers = list(set(atoms.arrays["numbers"]))
289 |     atoms_numbers.sort()
290 | 
291 |     inds = []
292 |     for n in atoms_numbers:
293 |         inds += [atom.index for atom in atoms if atom.number == n]
294 | 
295 |     b = atoms.copy()
296 |     b = b[inds]
297 |     return b
298 | 
299 | 
300 | def get_best_perm(ini_perms, fin, bond_len_tolerance, plot=False):
301 |     """
302 |     Finds the best permutation of the initial atomic structure that best matches the final structure based on bond length tolerance.
303 | 
304 |     Parameters:
305 |     ini_perms (list): A list of ASE atoms objects representing the permuted initial structures.
306 |     fin (object): An ASE atoms object representing the final structure.
307 |     bond_len_tolerance (float): The tolerance value for considering bond length differences as significant.
308 |     plot (bool): Whether to plot the connectivity matrices and their differences. Default is False.
309 | 
310 |     Returns:
311 |     object: The best permuted ASE atoms object that matches the final structure.
312 |     """
313 |     best_ds = 1000000000000000  # very big number
314 |     fin = get_sorted(fin)
315 | 
316 |     df = []
317 |     for j, a in enumerate(ini_perms):
318 |         a = get_sorted(a)
319 |         # print('ini: ', a)
320 |         # print('fin: ', fin)
321 | 
322 |         if plot:
323 |             plot_distm(a, fin)
324 | 
325 |         neb_images = get_neb_images(a, fin)
326 | 
327 |         ds = get_distm_sum(a, fin, unit="bonds", tolerance=bond_len_tolerance)
328 |         if ds <= best_ds:
329 |             df.append({"ds": ds, "touch": get_neb_norm(neb_images), "perm_index": j})
330 |             best_ds = ds
331 |     df = pd.DataFrame(df)
332 | 
333 |     min_ds = np.min(df.ds.values)
334 | 
335 |     df = df.sort_values(by="touch", ascending=False)
336 | 
337 |     df = df[df.ds == min_ds]
338 |     print(df)
339 |     best_index = df.perm_index.values[0]
340 |     best_touch = df.touch.values[0]
341 |     print("best index: ", best_index)
342 |     best_perm = ini_perms[best_index].copy()
343 |     best_perm = get_sorted(best_perm)
344 | 
345 |     info = {"ds": min_ds, "touch": best_touch}
346 |     best_perm.info.update(info)
347 | 
348 |     return best_perm
349 | 
350 | 
351 | def arrange_images(ini, fin, bond_len_tolerance=0.5, plot=False):
352 |     """
353 |     Arranges the initial and final atomic structures by permuting the initial structure to best match the final structure.
354 | 
355 |     Parameters:
356 |     ini (object): An ASE atoms object representing the initial structure.
357 |     fin (object): An ASE atoms object representing the final structure.
358 |     bond_len_tolerance (float): The tolerance value for considering bond length differences as significant. Default is 0.5.
359 |     plot (bool): Whether to plot the connectivity matrices and their differences. Default is False.
360 | 
361 |     Returns:
362 |     tuple: A tuple containing two ASE atoms objects:
363 |         - best_ini (object): The permuted initial structure that best matches the final structure.
364 |         - fin (object): The final structure.
365 |     """
366 |     t = []
367 | 
368 |     ini, fin = arrange_backbone(ini, fin, bond_len_tolerance=bond_len_tolerance)
369 |     fix_species = [atom.number for atom in ini if atom.number != 1]
370 |     perms = permute_image(ini, fix_species=[6, 8])
371 |     print(len(perms))
372 |     best_ini = get_best_perm(perms, fin, bond_len_tolerance=0.1, plot=plot)
373 |     return best_ini, fin
374 | 
375 | 
376 | # def make_best_nebs(ini, fin, rnorm_cutoff=2, bond_range = [0., 2], neb_images=10, neb_dist=0.7, verbose=False):
377 | #
378 | #    ini = ini[ini.numbers.argsort()]
379 | #    fin = fin[fin.numbers.argsort()]
380 | #
381 | #    permuted_ini = permute_image(ini)
382 | #
383 | #    best_nebs = []
384 | #
385 | #    best_ini = select_best_neb(permuted_ini, fin, rnorm_cutoff=rnorm_cutoff, bond_range = bond_range, neb_images = neb_images, neb_dist = neb_dist, verbose=False)
386 | #
387 | #    if best_ini:
388 | #
389 | #        neb_trj = [best_ini.copy() for _ in range(10)]
390 | #        neb_trj.append(fin.copy())
391 | #
392 | #        neb = NEB(neb_trj)
393 | #
394 | #        neb.interpolate()
395 | #
396 | #        best_nebs.append(neb.images)
397 | #
398 | #    return best_nebs
399 | 
400 | # def check_neb(iatoms, fatoms, bonds=1, bond_range = [0., 2]):
401 | #
402 | #    reactm = check_image(iatoms) - check_image(fatoms)
403 | #
404 | #    flat_reactm = reactm.flatten()
405 | #    flat_reactm = flat_reactm[flat_reactm>0]
406 | #    flat_reactm.sort()
407 | #
408 | #    return np.linalg.norm(reactm), np.linalg.norm(flat_reactm[:-bonds])
409 | #
410 | # def select_best_ini(ini, fin, rnorm_cutoff=3, bond_range = [0., 2], neb_images=10, neb_dist=0.8, verbose=False):
411 | #    df = []
412 | #
413 | #    perm_ini_traj = permute_atoms(ini)
414 | #
415 | #    for i, a in enumerate(perm_ini_traj):
416 | #
417 | #        INI = perm_ini_traj[i].copy()
418 | #        FIN = fin.copy()
419 | #
420 | #        rnorm, _ = check_neb(INI, FIN, bonds=1, bond_range = [0., 2])
421 | #
422 | #        neb = NEB([INI.copy() for _ in range(neb_images)]+[FIN])
423 | #        neb.interpolate()
424 | #
425 | #        im_dist = []
426 | #        for im in neb.images:
427 | #            d = im.get_all_distances()
428 | #            d = d[d>0]
429 | #            im_dist.append(np.min(d))
430 | #        neb_norm = np.min(im_dist)
431 | #
432 | #        info = {'neb_index':i, 'rnorm': rnorm, 'neb_norm': neb_norm}
433 | #        df.append(info)
434 | #    df = pd.DataFrame(df)
435 | #    df=df[df['rnorm'] < rnorm_cutoff]
436 | #    df=df[df['neb_norm'] > neb_dist]
437 | #    if verbose:
438 | #        print(df)
439 | #    df = df.sort_values(by='rnorm', ascending=True)[:1]
440 | #    if len(df.index.values) > 0:
441 | #        best_index = df.neb_index.values[0]
442 | #        best_ini = perm_ini_traj[best_index].copy()
443 | #        best_ini.info.update(info)
444 | #        return best_ini
445 | #    else:
446 | #        if verbose:
447 | #            print(f'select_best_neb returns empty list.')
448 | #        return False
449 | 
450 | 
451 | # Credits Lars Leon Schaaf
452 | 
453 | # def permute_like_species(sp_t, select):
454 | #    """
455 | #    Returns new index of all posible permutations between like elements of the species
456 | #    total array (spt_t), but only for the select configurations
457 | #
458 | #    Args:
459 | #        spt_t (list): list of species
460 | #        select (list): if only a subset of the indicies need to be permuted
461 | #
462 | #    Returns:
463 | #        list: index of new permutations
464 | #    """
465 | #    sp_t = np.array(sp_t)
466 | #    select = np.array(select)
467 | #
468 | #    # Indexes -> which will be permuted
469 | #    indexs = np.arange(len(sp_t))  # the value we're transforming is simple its position
470 | #
471 | #    # Select for permutations
472 | #    indexs_sel = indexs[select]
473 | #    species = sp_t[select]
474 | #
475 | #    # Unique species
476 | #    grouped_s = np.unique(species)
477 | #    # ['A', 'B']
478 | #    grouped_i = [indexs_sel[species == s] for s in grouped_s]
479 | #    # [array([0, 2, 3]), array([5, 7])]
480 | #
481 | #    indiv_permutations = [list(itertools.permutations(i)) for i in grouped_i]
482 | #    indiv_permutations
483 | #    #   [[(0, 2, 3), (0, 3, 2), (2, 0, 3), (2, 3, 0), (3, 0, 2), (3, 2, 0)],
484 | #    #    [(5, 7), (7, 5)]]
485 | #
486 | #    all_perm = list(itertools.product(*indiv_permutations))
487 | #    #    [((0, 2, 3), (5, 7)),
488 | #    #     ((0, 2, 3), (7, 5)),
489 | #    #     ((0, 3, 2), (5, 7)),
490 | #    #     ((0, 3, 2), (7, 5)),
491 | #    #     ((2, 0, 3), (5, 7)),
492 | #    #     ((2, 0, 3), (7, 5)),
493 | #    #     ((2, 3, 0), (5, 7)),
494 | #    #     ((2, 3, 0), (7, 5)),
495 | #    #     ((3, 0, 2), (5, 7)),
496 | #    #     ((3, 0, 2), (7, 5)),
497 | #    #     ((3, 2, 0), (5, 7)),
498 | #    #     ((3, 2, 0), (7, 5))]
499 | #
500 | #    all_perm_flattened = [list(itertools.chain.from_iterable(i)) for i in all_perm]
501 | #
502 | #    indexs_permutations = []
503 | #
504 | #    for perm in all_perm:
505 | #        indexi = indexs.copy()
506 | #        for i, s in enumerate(grouped_s):
507 | #            indexi[select & (sp_t == s)] = perm[i]
508 | #        indexs_permutations.append(indexi)
509 | #        # del s
510 | #        # del i
511 | #
512 | #    # del perm
513 | #
514 | #    indexs_permutations = np.array(indexs_permutations)
515 | #
516 | #    return indexs_permutations
517 | #
518 | #
519 | # def get_values_for_all_permutations(val_t, permutations):
520 | #    # Val_t could be an atoms object
521 | #    return np.array([val_t[perm] for perm in permutations])
522 | #
523 | #
524 | # def get_distances_between_images(imagesi):
525 | #    """Returns distance between each image ie 2norm of d2-d1"""
526 | #
527 | #    spring_lengths = []
528 | #    for j in range(len(imagesi) - 1):
529 | #        spring_vec = imagesi[j + 1].get_positions() - imagesi[j].get_positions()
530 | #        spring_lengths.append(np.linalg.norm(spring_vec))
531 | #    return np.array(spring_lengths)
532 | #
533 | #
534 | # def add_intermediary_images(
535 | #    imagesi, dist_cutoff, interpolate_method="idpp", max_number=100, verbose=False,
536 | # ):
537 | #    """Add additional images inbetween existing ones, purely based on geometry"""
538 | #    # create copy of images
539 | #    imagesi = [at.copy() for at in imagesi]
540 | #    interp_images = []
541 | #    max_dist_images = max(get_distances_between_images(imagesi))
542 | #    for iter in range(max_number):
543 | #        if max_dist_images <= dist_cutoff:
544 | #            print(f"Max distance readched after {iter} iterations")
545 | #            break
546 | #        distances = get_distances_between_images(imagesi)
547 | #        jmax = np.argmax(distances)
548 | #
549 | #        toInterpolate = [imagesi[jmax]]
550 | #        toInterpolate += [toInterpolate[0].copy()]
551 | #        toInterpolate += [imagesi[jmax + 1]]
552 | #
553 | #        neb = NEB(toInterpolate)
554 | #        neb.interpolate(method=interpolate_method, apply_constraint=True)
555 | #
556 | #        interp_images.append([jmax, toInterpolate[1].copy()])
557 | #        # Add images
558 | #        imagesi.insert(jmax + 1, toInterpolate[1].copy())
559 | #        if verbose:
560 | #            print(f"Additional image added at {jmax} with distances {max(distances)}")
561 | #        max_dist_images = max(get_distances_between_images(imagesi))
562 | #
563 | #    return interp_images, imagesi
564 | #
565 | # def get_slice(mystring):
566 | #    return slice(*[{True: lambda n: None, False: int}[x == ''](x) for x in (mystring.split(':') + ['', '', ''])[:3]])
567 | #
568 | # def sort_atom_species(at, mask_slice):
569 | #    cs = at.get_chemical_symbols()
570 | #    order = np.arange(len(at))
571 | #    order[get_slice(mask_slice)] = order[get_slice(mask_slice)][np.argsort(cs[get_slice(mask_slice)])]
572 | #    nat = ase.Atoms([at[i] for i in order], cell=at.get_cell(), pbc=at.get_pbc())
573 | #    return nat
574 | #
575 | #
576 | ## permuting atoms
577 | # def get_all_permutations(data, tags):
578 | #    all_perms = [data]
579 | #    for tag in np.unique(tags):
580 | #        # print(tag)
581 | #        # print(all_perms)
582 | #        all_permsi = all_perms.copy()
583 | #        all_perms = []
584 | #        for dataj in all_permsi:
585 | #            for perm in list(itertools.permutations(dataj[tags == tag])):
586 | #                datai = dataj.copy()
587 | #                datai[tags == tag] = perm
588 | #                all_perms.append(datai)
589 | #
590 | #    return np.array(all_perms)
591 | #
592 | # def permute_atoms_to_smallest_distance(atstart, atend, mol_index):
593 | #    indices = np.arange(len(atend))
594 | #    species = np.array(atend.get_chemical_symbols())
595 | #    positions = atend.get_positions()
596 | #
597 | #    data = indices[mol_index:]
598 | #    tags = species[mol_index:]
599 | #
600 | #    permuts = get_all_permutations(data, tags)
601 | #
602 | #    # get distance between all permutations
603 | #    pos_start = atstart.get_positions()[mol_index:]
604 | #    dists = [np.linalg.norm(positions[perm] - pos_start) for perm in permuts]
605 | #    at_new = atend.copy()
606 | #
607 | #    at_new.set_positions(np.concatenate([positions[:mol_index], positions[permuts[np.argmin(dists)]]]))
608 | #    dist = np.min(dists)
609 | #    # print(dist)
610 | #    at_new.info['neb_dist'] = dist
611 | #    return at_new
612 | #
613 | # def permute_trajectory_to_smallest_distance_between_images(traj,mol_index):
614 | #    traj = [at.copy() for at in traj]
615 | #    for i, at in enumerate(traj):
616 | #        if i == 0:
617 | #            continue
618 | #        traj[i] = permute_atoms_to_smallest_distance(traj[i-1], traj[i], mol_index).copy()
619 | #    return traj
620 | 
621 | 
622 | def make_neb(initial: Atoms, final: Atoms, images_no: int = 10, idpp: bool = True):
623 |     """Function that creates a trajectory of linear interpolations between two ase atoms objects.
624 | 
625 |     Args:
626 |         initial (Atoms): Initial image of the neb chain
627 |         final (Atoms): Final image of the neb chain
628 |         images_no (int, optional): Number of images the neb chain. Defaults to 10.
629 |         idpp (bool): Use preoptimization on linear interpolated images. Defaults to True.
630 | 
631 |     Returns:
632 |         list of Atoms: Interpolated images
633 |     """
634 |     from ase.neb import NEB
635 | 
636 |     images = [initial]
637 |     for i in range(images_no):
638 |         images.append(initial.copy())
639 |     images.append(final)
640 | 
641 |     neb = NEB(images, images_no)
642 | 
643 |     if idpp == True:
644 |         neb.interpolate(method="idpp")
645 |     else:
646 |         neb.interpolate()
647 | 
648 |     return neb.images
649 | 
650 | 
651 | def _swap_atoms_positions(
652 |     atoms: Atoms, target_ind: Union[tuple, list], permutation: Union[tuple, list]
653 | ):
654 |     """Internal routine that
655 | 
656 |     Args:
657 |         atoms (Atoms): _description_
658 |         target_ind (Union[tuple,list]): _description_
659 |         permutation (Union[tuple,list]): _description_
660 | 
661 |     Returns:
662 |         _type_: _description_
663 |     """
664 | 
665 |     if not set(target_ind) == set(permutation):
666 |         print("Target_ind and permutation must contain same indicies.")
667 |         return
668 |     else:
669 |         positions = [atoms[i].position.copy() for i in permutation]
670 | 
671 |         # print(positions)
672 | 
673 |         for i, j in enumerate(target_ind):
674 |             # print(f'setting position of atom {atoms[j]} from {atoms[j].position} to {positions[i]}', )
675 |             atoms[j].position = positions[i]
676 |     return atoms
677 | 
678 | 
679 | def _get_permuted_indices(atoms):
680 |     from collections import OrderedDict
681 |     from itertools import permutations
682 | 
683 |     species_in_order = [atom.symbol for atom in atoms]
684 |     species = list(OrderedDict.fromkeys(species_in_order))
685 | 
686 |     mol_atom_list = [i for i, f in enumerate(atoms.arrays["fragments"]) if f == 1]
687 | 
688 |     permuted_trj = []
689 |     groups = []
690 |     for symbol in species:
691 |         group = [
692 |             a.index for a in atoms if a.symbol == symbol and a.index in mol_atom_list
693 |         ]
694 |         groups.append(group)
695 | 
696 |     pgroups = []
697 |     for group in groups:
698 |         if len(group) > 0:
699 |             pgroup = []
700 |             for p in permutations(group):
701 |                 pgroup.append(p)
702 |             pgroups.append(pgroup)
703 | 
704 |     return pgroups
705 | 
706 | 
707 | def get_swapped_by_species(atoms: Atoms):
708 |     """_summary_
709 | 
710 |     Args:
711 |         atoms (Atoms): _description_
712 |     """
713 | 
714 |     from itertools import permutations
715 | 
716 |     pgroups = _get_permuted_indices(atoms)
717 | 
718 |     permutations = []
719 |     for pgroup in itertools.product(*pgroups):
720 |         tup = ()
721 |         for t in pgroup:
722 |             tup += t
723 |         permutations.append(tup)
724 | 
725 |     swapped_trj = []
726 |     for permutation in permutations:
727 |         a = atoms.copy()
728 |         a = _swap_atoms_positions(a, permutations[0], permutation)
729 |         swapped_trj.append(a)
730 | 
731 |     return swapped_trj
732 | 


--------------------------------------------------------------------------------
/autoadsorbate/Particle.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from ase import Atoms
  3 | from typing import Union, Literal
  4 | 
  5 | def subdivisions_for_min_edge_length(d_min, radius=1.0):
  6 |     return max(int(np.floor(2 * radius / d_min)), 1)
  7 | 
  8 | 
  9 | def get_cube_surface_pts(radius=1.0, center=(0, 0, 0), d_min=0.1):
 10 |     center = np.array(center, dtype=np.float32)
 11 |     subdivisions = subdivisions_for_min_edge_length(d_min, radius)
 12 | 
 13 |     lin = np.linspace(-1, 1, subdivisions + 1)
 14 |     grid = np.array(np.meshgrid(lin, lin, lin)).reshape(3, -1).T
 15 | 
 16 |     mask = np.any(np.isclose(np.abs(grid), 1.0), axis=1)
 17 |     cube_surface_pts = grid[mask]
 18 |     return cube_surface_pts, subdivisions, lin
 19 | 
 20 | def grid_round_cube(radius=1.0, center=(0, 0, 0), d_min=0.1):
 21 | 
 22 |     cube_surface_pts, subdivisions, lin = get_cube_surface_pts(radius=radius, center=center, d_min=d_min)
 23 |     
 24 |     vert_idx_map = {tuple(v): i for i, v in enumerate(cube_surface_pts)}
 25 | 
 26 |     def spherify(v):
 27 |         x, y, z = v[:, 0], v[:, 1], v[:, 2]
 28 |         x2, y2, z2 = x**2, y**2, z**2
 29 |         sx = x * np.sqrt(1 - (y2 + z2)/2 + (y2 * z2)/3)
 30 |         sy = y * np.sqrt(1 - (z2 + x2)/2 + (z2 * x2)/3)
 31 |         sz = z * np.sqrt(1 - (x2 + y2)/2 + (x2 * y2)/3)
 32 |         return np.column_stack([sx, sy, sz])
 33 | 
 34 |     vertices = spherify(cube_surface_pts) * radius + center
 35 |     faces = []
 36 |     for axis in range(3):
 37 |         for sign in [-1, 1]:
 38 |             coord = sign
 39 |             mask = np.isclose(cube_surface_pts[:, axis], coord)
 40 |             face_pts = cube_surface_pts[mask]
 41 |             u_axis, v_axis = [i for i in range(3) if i != axis]
 42 |             sorted_idx = np.lexsort((face_pts[:, v_axis], face_pts[:, u_axis]))
 43 |             face_pts = face_pts[sorted_idx]
 44 | 
 45 |             for i in range(subdivisions):
 46 |                 for j in range(subdivisions):
 47 |                     def get_index(di, dj):
 48 |                         pt = np.zeros(3)
 49 |                         pt[axis] = coord
 50 |                         pt[u_axis] = lin[i + di]
 51 |                         pt[v_axis] = lin[j + dj]
 52 |                         return vert_idx_map[tuple(pt)]
 53 | 
 54 |                     a = get_index(0, 0)
 55 |                     b = get_index(1, 0)
 56 |                     c = get_index(1, 1)
 57 |                     d = get_index(0, 1)
 58 |                     faces.append([a, b, c, d])
 59 | 
 60 |     faces = np.array(faces, dtype=np.int32)
 61 | 
 62 |     # Ensure outward-facing winding
 63 |     centers = vertices[faces].mean(axis=1)
 64 |     normals = np.cross(
 65 |         vertices[faces][:, 1] - vertices[faces][:, 0],
 66 |         vertices[faces][:, 2] - vertices[faces][:, 1]
 67 |     )
 68 |     outward = centers - center
 69 |     if np.mean(np.einsum('ij,ij->i', normals, outward)) < 0:
 70 |         faces = faces[:, ::-1]
 71 | 
 72 |     # Compute per-vertex normals (area-weighted face normals)
 73 |     vnormals = np.zeros_like(vertices)
 74 |     face_normals = np.cross(
 75 |         vertices[faces][:, 1] - vertices[faces][:, 0],
 76 |         vertices[faces][:, 2] - vertices[faces][:, 1]
 77 |     )
 78 |     face_areas = np.linalg.norm(face_normals, axis=1, keepdims=True)
 79 |     face_normals_unit = face_normals / np.maximum(face_areas, 1e-10)
 80 | 
 81 |     for i, face in enumerate(faces):
 82 |         for j in face:
 83 |             vnormals[j] += face_normals_unit[i]
 84 | 
 85 |     vnormals /= np.linalg.norm(vnormals, axis=1, keepdims=True)
 86 | 
 87 |     return [vertices.astype(np.float32), faces.astype(np.int32), vnormals.astype(np.float32)]
 88 | 
 89 | 
 90 | def fibonacci_sphere(center, radius, point_distance):
 91 |     """
 92 |     Generate spiral grid of points over a sphere using Fibonacci lattice with approximately even spacing.
 93 | 
 94 |     Parameters:
 95 |     - center: tuple of (x, y, z), center of the sphere
 96 |     - radius: float, radius of the sphere
 97 |     - point_distance: float, approximate desired spacing between points on the surface
 98 | 
 99 |     Returns:
100 |     - numpy array of shape (n_points, 3) with Cartesian coordinates
101 |     """
102 |     surface_area = 4 * np.pi * radius**2
103 |     approx_area_per_point = point_distance**2
104 |     n_points = int(surface_area / approx_area_per_point)
105 | 
106 |     indices = np.arange(0, n_points, dtype=float) + 0.5
107 |     phi = np.arccos(1 - 2 * indices / n_points)
108 |     theta = np.pi * (1 + 5**0.5) * indices
109 | 
110 |     x = radius * np.sin(phi) * np.cos(theta) + center[0]
111 |     y = radius * np.sin(phi) * np.sin(theta) + center[1]
112 |     z = radius * np.cos(phi) + center[2]
113 | 
114 |     return [np.stack((x, y, z), axis=-1)]
115 | 
116 | def move_sphere_points_toward_center(sphere_points, center, point_cloud,
117 |                                      touch_criteria, step_size=0.01, max_steps=1000):
118 |     """
119 |     Move each point on the sphere toward the center until it's within `touch_criteria`
120 |     of any point in the point_cloud.
121 | 
122 |     Parameters:
123 |     - sphere_points: (N, 3) array of points on the sphere
124 |     - center: (3,) tuple or array, the center of the sphere
125 |     - point_cloud: (M, 3) array of reference points
126 |     - touch_criteria: float, distance threshold to stop movement
127 |     - step_size: float, how far to move each step
128 |     - max_steps: int, maximum number of steps to avoid infinite loops
129 | 
130 |     Returns:
131 |     - (N, 3) numpy array of adjusted sphere points
132 |     """
133 |     from scipy.spatial import KDTree
134 |     sphere_points = np.array(sphere_points, dtype=float)
135 |     center = np.array(center, dtype=float)
136 |     directions = center - sphere_points
137 |     directions /= np.linalg.norm(directions, axis=1)[:, np.newaxis]
138 | 
139 |     tree = KDTree(point_cloud)
140 |     updated_points = sphere_points.copy()
141 | 
142 |     for i in range(len(updated_points)):
143 |         for _ in range(max_steps):
144 |             dist, _ = tree.query(updated_points[i], k=1)
145 |             if dist < touch_criteria:
146 |                 break
147 |             updated_points[i] += directions[i] * step_size
148 | 
149 |     return updated_points
150 | 
151 | def random_point_in_sphere(radius, center=(0, 0, 0)):
152 |     """
153 |     Generate a single random 3D point uniformly inside a sphere.
154 | 
155 |     Parameters:
156 |     - radius: float, radius of the sphere
157 |     - center: tuple of 3 floats, center of the sphere
158 | 
159 |     Returns:
160 |     - numpy array of shape (3,)
161 |     """
162 |     # Random direction
163 |     vec = np.random.normal(0, 1, 3)
164 |     vec /= np.linalg.norm(vec)
165 | 
166 |     # Random radius with cube root to ensure uniform volume distribution
167 |     r = radius * np.random.uniform(0, 1) ** (1/3)
168 | 
169 |     return np.array(center) + vec * r
170 | 
171 | def random_points_in_sphere(radius, n_points, center=(0, 0, 0)):
172 |     vecs = np.random.normal(0, 1, (n_points, 3))
173 |     vecs /= np.linalg.norm(vecs, axis=1)[:, None]
174 |     rs = radius * np.random.uniform(0, 1, n_points) ** (1/3)
175 |     return np.array(center) + vecs * rs[:, None]
176 | 
177 | 
178 | def get_nearby_point_indices(sphere_points, point_cloud, threshold):
179 |     """
180 |     For each sphere point, return the unique indices of point_cloud points within the given threshold.
181 | 
182 |     Parameters:
183 |     - sphere_points: (N, 3) numpy array of query points (e.g., sphere surface)
184 |     - point_cloud: (M, 3) numpy array of atoms or points in space
185 |     - threshold: float, distance threshold
186 | 
187 |     Returns:
188 |     - A set of unique indices from point_cloud within threshold of any sphere point
189 |     """
190 |     from scipy.spatial import KDTree
191 |     
192 |     tree = KDTree(point_cloud)
193 |     unique_indices = set()
194 | 
195 |     for point in sphere_points:
196 |         indices = tree.query_ball_point(point, r=threshold)
197 |         unique_indices.update(indices)
198 | 
199 |     return np.array(sorted(unique_indices))
200 | 
201 | def get_all_nearby_indices_per_point(sphere_points, point_cloud, threshold):
202 |     from scipy.spatial import KDTree
203 |     tree = KDTree(point_cloud)
204 |     return [tree.query_ball_point(p, r=threshold) for p in sphere_points]
205 | 
206 | def keep_unique_inds(inds):
207 |     unique=[]
208 |     for lst in inds:
209 |         
210 |         lst.sort()
211 |         string = ''
212 |         for i in lst:
213 |             string+=f'{i}#'
214 |         string = string[:-1]
215 |         unique.append(string)
216 |     unique = list(set(unique))
217 |     for i, u in enumerate(unique):
218 |         unique[i] = [int(x) for x in u.split('#')] 
219 |     return unique
220 | 
221 | 
222 | def mean_of_close_sphere_points(query_point, sphere_points, threshold):
223 |     """
224 |     Find all points on the sphere within `threshold` distance to `query_point`,
225 |     and return their mean position.
226 | 
227 |     Parameters:
228 |     - query_point: array-like of shape (3,)
229 |     - sphere_points: numpy array of shape (N, 3)
230 |     - threshold: float, distance threshold
231 | 
232 |     Returns:
233 |     - mean_point: numpy array of shape (3,), or None if no points are within threshold
234 |     """
235 |     from scipy.spatial import KDTree
236 |     tree = KDTree(sphere_points)
237 |     indices = tree.query_ball_point(query_point, r=threshold)
238 |     
239 |     if not indices:
240 |         return None  # No points within threshold
241 | 
242 |     nearby_points = sphere_points[indices]
243 |     return np.mean(nearby_points, axis=0)
244 | 
245 | 
246 | def calculate_sites(inds, particle_atoms, shrinkwrap, threshold=2.7):
247 |     particle = particle_atoms.positions
248 |     
249 |     site_dict = {}
250 |     for k in ['coordinates', 'connectivity', 'topology', 'n_vector', 'h_vector', 'site_formula']:
251 |         site_dict[k]=[]
252 |         
253 |     for i in inds:
254 |         site_dict['topology'].append(i)
255 |         site_dict['connectivity'].append(len(i))
256 |         p1 = np.mean(particle[i], axis=0)
257 |         site_dict['coordinates'].append(p1)
258 |         p2 = mean_of_close_sphere_points(query_point=p1, sphere_points=shrinkwrap, threshold=threshold)
259 |         n_vector = p2-p1
260 |         n_vector /= np.linalg.norm(n_vector)
261 |         site_dict['n_vector'].append(n_vector)
262 |         
263 |         if len(i)==1:
264 |             h_vector = [1.,0,0]
265 |         else:
266 |             h_vector = particle[i[0]] - particle[i[1]]
267 |             h_vector /= np.linalg.norm(h_vector)
268 |         site_dict['h_vector'].append(h_vector)
269 |         
270 |         site_dict['site_formula'].append(particle_atoms[i].symbols.formula.count())
271 |     
272 |     return site_dict
273 | 
274 | def get_shrinkwrap_particle_ads_sites(
275 |     particle_atoms: Atoms,
276 |     grid_mode: Union[Literal['fibonacci', 'grid'], list],
277 |     precision: float = 1.,
278 |     touch_sphere_size: float = 3.,
279 |     return_geometry = False,
280 | ):
281 |     """Identifies adsorption sites on a surface using a shrinkwrap grid.
282 | 
283 |     Args:
284 |         particle_atoms (Atoms): Atoms slab.
285 |         grid_mode (str or numpy array): 'fibonacci' or 'grid'; grid points can be set explicitly by providing a numpy array
286 |         precision (float): Precision for the shrinkwrap grid.
287 |         touch_sphere_size (float): Radius to consider for grid points.
288 |         return_trj (bool): Whether to return the trajectory for demo mode.
289 |         return_geometry (bool): dev/visualization option.
290 | 
291 |     Returns:
292 |         dict: Dictionary containing site information.
293 |     """
294 |     touch_buffer = 0.2
295 |      
296 |     center = np.mean(particle_atoms.positions, axis=0)
297 |     
298 |     diffs = particle_atoms.positions - center
299 |     dists = np.linalg.norm(diffs, axis=1)
300 |     index = np.argmax(dists)
301 |     particle_radius = dists[index]
302 |     
303 |     grid_radius = particle_radius + touch_sphere_size + 0.5 # 0.5 is safety buffer
304 |     
305 |     if isinstance(grid_mode, list):
306 |         round_cube_geometry = grid_mode
307 |         grid = round_cube_geometry[0]
308 |     elif grid_mode == 'fibonacci':
309 |         grid = fibonacci_sphere(center=center, radius=grid_radius, point_distance=precision)[0]
310 |     elif grid_mode == 'round_cube':
311 |         round_cube_geometry = grid_round_cube(center=center, radius=grid_radius, d_min=precision)
312 |         grid = round_cube_geometry[0]
313 |     else:
314 |         raise ValueError('grid_mode supported: fibonacci, grid; alternatively provide your own geometry')
315 | 
316 |     shrinkwrap = move_sphere_points_toward_center(
317 |         sphere_points = grid,
318 |         center = center,
319 |         point_cloud = particle_atoms.positions,
320 |         touch_criteria=touch_sphere_size,
321 |         step_size=0.05,
322 |         max_steps=1000
323 |         )
324 |     
325 |     
326 |     inds = get_all_nearby_indices_per_point(shrinkwrap, particle_atoms.positions, touch_sphere_size+touch_buffer)
327 |     inds = keep_unique_inds(inds)
328 | 
329 |     sites_dict = calculate_sites(inds,
330 |                                  particle_atoms=particle_atoms,
331 |                                  shrinkwrap=shrinkwrap,
332 |                                  threshold=touch_sphere_size+touch_buffer)
333 |     
334 |     if return_geometry:
335 |         _, faces, __ = round_cube_geometry
336 |         return shrinkwrap, faces, sites_dict
337 | 
338 |     
339 |     return sites_dict


--------------------------------------------------------------------------------
/autoadsorbate/__init__.py:
--------------------------------------------------------------------------------
 1 | """Top-level package for autoadsorbate."""
 2 | 
 3 | __author__ = """Fakoe Edvin"""
 4 | __email__ = "edvin.fako@basf.com"
 5 | __version__ = "0.2.0"
 6 | 
 7 | from autoadsorbate.autoadsorbate import Fragment, Surface
 8 | from autoadsorbate.Smile import get_marked_smiles
 9 | from autoadsorbate.string_utils import _example_config, construct_smiles
10 | from autoadsorbate.utils import docs_plot_conformers, docs_plot_sites, get_drop_snapped, compute_energy
11 | 
12 | __all__ = [
13 |     "Fragment",
14 |     "Surface",
15 |     "docs_plot_conformers",
16 |     "get_marked_smiles",
17 |     "docs_plot_sites",
18 |     "construct_smiles",
19 |     "_example_config",
20 |     "get_drop_snapped",
21 |     "compute_energy",
22 | ]
23 | 


--------------------------------------------------------------------------------
/autoadsorbate/autoadsorbate.py:
--------------------------------------------------------------------------------
  1 | """Main module."""
  2 | 
  3 | from typing import Dict, List, Literal, Union
  4 | import copy
  5 | 
  6 | import ase
  7 | import numpy as np
  8 | import pandas as pd
  9 | from ase import Atom, Atoms
 10 | from ase.visualize import view
 11 | 
 12 | from .Smile import (
 13 |     _reset_position,
 14 |     _reset_rotation,
 15 |     conformers_from_smile,
 16 | )
 17 | from .Surf import conformer_to_site, get_shrinkwrap_ads_sites
 18 | from .utils import (
 19 |     get_sorted_by_snap_dist,
 20 |     make_site_info_writable,
 21 | )
 22 | 
 23 | from .Particle import get_shrinkwrap_particle_ads_sites
 24 | 
 25 | class Intermediate:
 26 |     """
 27 |     Base class for initializing reaction intermediates.
 28 | 
 29 |     Attributes:
 30 |         ActiveSite: The active site for the intermediate.
 31 |         fragments: A list of fragments associated with the intermediate.
 32 |     """
 33 | 
 34 |     def __init__(self, ActiveSite, fragments=None):
 35 |         """
 36 |         Initialize attributes.
 37 | 
 38 |         Args:
 39 |             ActiveSite: The active site for the intermediate.
 40 |             fragments (list, optional): A list of fragments associated with the intermediate. Defaults to an empty list.
 41 |         """
 42 |         self.ActiveSite = ActiveSite
 43 |         self.fragments = fragments if fragments is not None else []
 44 | 
 45 | 
 46 | class Fragment:
 47 |     """ Base class for initializing reaction fragments. """
 48 | 
 49 |     def __init__(
 50 |         self,
 51 |         smile: str,
 52 |         to_initialize: int = 10,
 53 |         random_seed: int = 2104,
 54 |         sort_conformers: bool = False,
 55 |         prune_rms_thresh: float = .5
 56 |     ):
 57 |         """
 58 |         Initialize attributes.
 59 | 
 60 |         Args:
 61 |             smile (str): The SMILES string of the fragment.
 62 |             to_initialize (int, optional): The number of conformers to initialize. Defaults to 10.
 63 |             random_seed (int, optional): The random seed for conformer generation. Defaults to 2104.
 64 |             sort_conformers (bool, optional): Decides if the initial orientation of the fragment conformations is diverse.
 65 |             prune_rms_thresh (float, optional): RMSD threshold for pruning duplicates. Defaults to 0.5 Å.
 66 |         """
 67 |         self.smile = smile
 68 |         self.to_initialize = to_initialize
 69 |         self.randomSeed = random_seed
 70 | 
 71 |         self.conformers = conformers_from_smile(
 72 |             smile, to_initialize, random_seed=random_seed, prune_rms_thresh=prune_rms_thresh
 73 |         )
 74 |         self.conformers_aligned = [False for _ in self.conformers]
 75 | 
 76 |         self.sort_conformers = sort_conformers
 77 |         if self.sort_conformers:
 78 |             self.conformers = get_sorted_by_snap_dist(self.conformers)
 79 | 
 80 |     def get_conformer(
 81 |         self,
 82 |         i: Union[int, float],
 83 |         n_vector: np.ndarray = np.array([0, 0, 1]),
 84 |         rot_deg: float = 0
 85 |     ) -> Atoms:
 86 |         """
 87 |         Returns a copy of the i-th conformer, aligned and rotated as specified.
 88 | 
 89 |         Args:
 90 |             i (int): The index of the conformer to retrieve.
 91 |             n_vector (np.ndarray, optional): The normal vector for rotation. Defaults to [0, 0, 1].
 92 |             rot_deg (float, optional): The rotation angle in degrees. Defaults to 0.
 93 | 
 94 |         Returns:
 95 |             Atoms: A copy of the aligned and rotated conformer.
 96 |         """
 97 |         # Resolve index
 98 |         if isinstance(i, float):
 99 |             if not (0.0 <= i <= 1.0):
100 |                 raise ValueError("Float index must be between 0 and 1.")
101 |             position = int(i * len(self.conformers))
102 |             position = min(position, len(self.conformers) - 1)  # clamp to valid range
103 |             i = position
104 |         elif i > len(self.conformers):
105 |             raise KeyError(f"Index {i} is larger than number of initialized conformers.")
106 | 
107 | 
108 |         if not self.conformers_aligned[i]:
109 |             self.conformers[i] = _reset_position(self.conformers[i])
110 |             self.conformers[i] = _reset_rotation(self.conformers[i])
111 |             self.conformers_aligned[i] = True
112 | 
113 |         self.conformers[i].rotate(rot_deg, n_vector)
114 |         self.conformers[i].info["smiles"] = self.smile
115 |         return self.conformers[i].copy()
116 | 
117 |     def view(self, return_traj=False):
118 |         traj = [self.get_conformer(i) for i, _ in enumerate(self.conformers)]
119 |         if return_traj:
120 |             return traj
121 |         view(traj)
122 |     
123 |     def get_chemical_formula(self, empirical=True):
124 |         """
125 |         Function that makes is easy to get the chemical formulas of surrogate smiles.
126 |         Returns atoms.get_chemical_formula(empirical=empirical) for the NON_SURROGATE atoms in Fragment.
127 |         Is not surrogate smiles returns same ase atoms.get_chemical_formula(empirical=empirical).
128 |         """
129 |         if self.smile[:2] == 'Cl':
130 |             return self.conformers[0][1:].get_chemical_formula(empirical=empirical)
131 |         elif self.smile[:3] == 'S1S':
132 |             return self.conformers[0][2:].get_chemical_formula(empirical=empirical)
133 |         else:
134 |             return self.conformers[0].get_chemical_formula(empirical=empirical)
135 |         
136 |     def copy(self) -> "Fragment":
137 |         """Return a deep copy of this instance."""
138 |         return copy.deepcopy(self)
139 | 
140 | 
141 | class Surface:
142 |     """
143 |     Base class for initializing a reactive surface.
144 | 
145 |     Attributes:
146 |         atoms (Atoms): The ASE Atoms object representing the surface.
147 |         precision (float): The precision for the grid spacing.
148 |         touch_sphere_size (float): The size of the touch sphere.
149 |         site_dict (Dict): Dictionary containing site information.
150 |         site_df (pd.DataFrame): DataFrame containing site information.
151 |     """
152 | 
153 |     def __init__(
154 |         self,
155 |         atoms: Atoms,
156 |         precision: float = 0.25,
157 |         touch_sphere_size: float = 3,
158 |         mode: Literal['slab', 'particle', 'dummy'] = 'slab',
159 |         grid_mode: Union[Literal['fibonacci', 'round_cube'], list] = None
160 |     ):
161 |         """
162 |         Initialize attributes.
163 | 
164 |         Args:
165 |             atoms (Atoms): The ASE Atoms object representing the surface.
166 |             precision (float, optional): The precision for the grid spacing. Defaults to 0.25.
167 |             touch_sphere_size (float, optional): The size of the touch sphere. Defaults to 3.
168 |             grid_mode: Union[Literal['fibonacci', 'round_cube'], np.ndarray] provides options for grid geometry used to initialize shrinkwrap. If array, should contain, verts, faces, normals. Like output of "round_cube_geometry"
169 |         """
170 |         self.mode = mode
171 |         self.atoms = atoms
172 |         self.precision = precision
173 |         self.touch_sphere_size = touch_sphere_size
174 |         self.grid_mode = grid_mode
175 |         
176 |         self.grid, self.faces, self.site_dict = self._shrikwrap(self.atoms)
177 |         self.site_df = pd.DataFrame(self.site_dict)
178 |         self.sort_site_df()
179 | 
180 |         self.sites_atoms = Atoms(['He' for _ in self.site_df.index.values],  [ v for v in self.site_df.coordinates.values])
181 |         self.surf_inds = list(set([i for t in list(self.site_df.topology.values) for i in t]))
182 | 
183 |     def _shrikwrap(self, atoms):
184 |         
185 |         if self.mode == 'dummy':
186 |             grid, faces, site_dict = [], [], {}
187 |             
188 |         elif self.mode == 'slab':
189 |             grid, faces, site_dict = get_shrinkwrap_ads_sites(
190 |                 atoms=atoms,
191 |                 precision=self.precision,
192 |                 touch_sphere_size=self.touch_sphere_size,
193 |                 return_geometry=True
194 |             )
195 |             
196 |         elif self.mode == 'particle':
197 |             if self.grid_mode is None:
198 |                 self.grid_mode = 'round_cube'
199 |             grid, faces, site_dict = get_shrinkwrap_particle_ads_sites(
200 |                 particle_atoms=atoms,
201 |                 precision=self.precision,
202 |                 touch_sphere_size=self.touch_sphere_size,
203 |                 grid_mode = self.grid_mode,
204 |                 return_geometry = True
205 |             )
206 | 
207 |         return grid, faces, site_dict
208 | 
209 |     def sort_site_df(self, by: str = "xyz"):
210 |         """
211 |         Sorts the site DataFrame by coordinates or distance.
212 | 
213 |         Args:
214 |             by (str, optional): The sorting criterion ('xyz' or 'dist'). Defaults to 'xyz'.
215 |         """
216 |         if by == "xyz":
217 |             sort = {}
218 |             for c in [0, 1, 2]:
219 |                 sort[f"sort_{c}"] = [
220 |                     np.round(coord[c], 1) for coord in self.site_df.coordinates
221 |                 ]
222 |             for k, v in sort.items():
223 |                 self.site_df[k] = v
224 |             self.site_df = self.site_df.sort_values(
225 |                 by=list(sort.keys()), ignore_index=True
226 |             )
227 |             for k in sort.keys():
228 |                 self.site_df.pop(k)
229 |         elif by == "dist":
230 |             self.site_df["sort"] = [
231 |                 np.round(np.linalg.norm(coord), 1) for coord in self.site_df.coordinates
232 |             ]
233 |             self.site_df = self.site_df.sort_values(by="sort", ignore_index=True)
234 |             self.site_df.pop("sort")
235 | 
236 |     # def get_site(self, index: int) -> Atoms:
237 |         # """
238 |         # Returns the atoms object for a specific site.
239 | 
240 |         # Args:
241 |         #     index (int): The index of the site.
242 | 
243 |         # Returns:
244 |         #     Atoms: The ASE Atoms object for the site.
245 |         # """
246 |         # site_atoms = self.atoms.copy()
247 | 
248 |         # if "adsorbate_info" in site_atoms.info.keys():
249 |         #     site_atoms.info.pop("adsorbate_info")
250 | 
251 |         # info = self.site_df.loc[index].to_dict()
252 |         # site_atoms.info.update(info)
253 |         # site_atoms.append(Atom("X", position=self.site_df["coordinates"].loc[index]))
254 |         # del site_atoms[:-1]
255 |         # return site_atoms
256 | 
257 |     def get_site(self, index: Union[int, float]) -> Atoms:
258 |         """
259 |         Returns the Atoms object for a specific site.
260 | 
261 |         Args:
262 |             index (int or float): If int, used directly as index label in site_df.
263 |                                 If float in [0, 1], used as fractional position in site_df.
264 | 
265 |         Returns:
266 |             Atoms: The ASE Atoms object for the site.
267 |         """
268 |         site_atoms = self.atoms.copy()
269 |         site_atoms.info.pop("adsorbate_info", None)
270 | 
271 |         # Resolve index
272 |         return_site_from_df = True
273 |         if isinstance(index, (float)):
274 |             if not (0.0 <= index <= 1.0):
275 |                 raise ValueError("Float index must be between 0 and 1.")
276 |             position = int(index * len(self.site_df))
277 |             position = min(position, len(self.site_df) - 1)  # clamp to valid range
278 |             index = self.site_df.index[position]
279 | 
280 |         elif index not in self.site_df.index:
281 |             raise KeyError(f"Index {index} not found in site_df index.")
282 | 
283 |         elif isinstance(index, (list, tuple, np.ndarray)) and len(index) == 2:
284 |             info = self.get_surface_interpolated_site(index)
285 |             coordinates = info['coordinates']
286 |             return_site_from_df = False
287 | 
288 |         # Extract and assign site info
289 |         if return_site_from_df:
290 |             info = self.site_df.loc[index].to_dict()
291 |             coordinates = self.site_df.loc[index, "coordinates"]
292 |         
293 |         site_atoms.info.update(info)
294 |         site_atoms.append(Atom("X", position=coordinates))
295 |         del site_atoms[:-1]
296 | 
297 |         return site_atoms
298 |     
299 |     def get_surface_interpolated_site(self, index):
300 |         return
301 | 
302 | 
303 |     def view_site(self, index: int, return_atoms: bool = False) -> Atoms:
304 |         """
305 |         Visualizes a specific site.
306 | 
307 |         Args:
308 |             index (int): The index of the site.
309 |             return_atoms (bool, optional): Whether to return the atoms object. Defaults to False.
310 | 
311 |         Returns:
312 |             Atoms: The ASE Atoms object for the site if return_atoms is True.
313 |         """
314 |         site_atoms = self.get_site(index)
315 |         site_atoms += self.atoms[site_atoms.info["topology"]]
316 |         for x in [np.round(x, 1) for x in np.arange(0.1, 2.1, 0.1)]:
317 |             site_atoms.append(
318 |                 Atom(
319 |                     "X",
320 |                     position=site_atoms.info["coordinates"]
321 |                     + site_atoms.info["n_vector"] * x,
322 |                 )
323 |             )
324 |         if return_atoms:
325 |             return site_atoms
326 |         else:
327 |             view(site_atoms)
328 | 
329 |     def view_surface(
330 |         self,
331 |         return_atoms: bool = False,
332 |         explicit_marker: str = None,
333 |         mode: str = "normal",
334 |     ) -> Atoms:
335 |         """
336 |         Visualizes the entire surface.
337 | 
338 |         Args:
339 |             return_atoms (bool, optional): Whether to return the atoms object. Defaults to False.
340 |             explicit_marker (str, optional): The marker symbol to use for visualization. Defaults to None.
341 |             mode (str, optional): choose from ['normal', hedgehog]. Defaults to 'normal'.
342 | 
343 |         Returns:
344 |             Atoms: The ASE Atoms object for the surface if return_atoms is True.
345 |         """
346 |         view_atoms = self.atoms.copy()
347 |         inds = list(set([i for ind_ls in self.site_dict["topology"] for i in ind_ls]))
348 | 
349 |         if explicit_marker:
350 |             for i in inds:
351 |                 view_atoms[i].symbol = explicit_marker
352 |         else:
353 |             marker_map = _get_marker_map(self.atoms)
354 |             for i in inds:
355 |                 view_atoms[i].number = marker_map[view_atoms[i].number]
356 | 
357 |         if mode == "hedgehog":
358 |             for i in self.site_df.index.values:
359 |                 view_atoms += self.view_site(i, return_atoms=True)[
360 |                     [
361 |                         atom.index
362 |                         for atom in self.view_site(i, return_atoms=True)
363 |                         if atom.symbol == "X"
364 |                     ]
365 |                 ]
366 |         if return_atoms:
367 |             return view_atoms
368 |         else:
369 |             view(view_atoms)
370 | 
371 |     def compare_sites(self, site_index1: int, site_index2: int, **kwargs) -> bool:
372 |         """
373 |         Compares two sites for symmetry equivalence.
374 | 
375 |         Args:
376 |             site_index1 (int): The index of the first site.
377 |             site_index2 (int): The index of the second site.
378 | 
379 |         Returns:
380 |             bool: True if the sites are equivalent, False otherwise.
381 |         """
382 |         from ase.utils.structure_comparator import SymmetryEquivalenceCheck
383 | 
384 |         SEC = SymmetryEquivalenceCheck(**kwargs)
385 |         site1 = self.get_site(site_index1)
386 |         site2 = self.get_site(site_index2)
387 | 
388 |         for s in [site1, site2]:
389 |             if len(s) == 1:
390 |                 s.positions += [0, 0, 0.01]
391 | 
392 |         return SEC.compare(self.atoms + site1, self.atoms + site2)
393 | 
394 |     def get_nonequivalent_sites(self, **kwargs) -> List[int]:
395 |         """
396 |         Returns a list of indices for nonequivalent sites.
397 | 
398 |         Returns:
399 |             List[int]: A list of indices for nonequivalent sites.
400 |         """
401 |         original = []
402 |         i_s = self.site_df.index.values
403 |         matches = np.array([False for _ in i_s])
404 | 
405 |         for i in i_s:
406 |             if not matches[i]:
407 |                 m = [self.compare_sites(i, j, **kwargs) for j in i_s]
408 |                 matches += m
409 |                 matches = matches > 0
410 |                 original.append(i)
411 |             if all(matches):
412 |                 break
413 |         return original
414 | 
415 |     def sym_reduce(self, **kwargs):
416 |         """
417 |         Reduces the site DataFrame to nonequivalent sites.
418 |         """
419 |         include = self.get_nonequivalent_sites(**kwargs)
420 |         include_filter = [i in include for i in self.site_df.index.values]
421 |         self.site_df = self.site_df[include_filter]
422 |         self.site_dict = self.site_df.to_dict(orient="list")
423 | 
424 |     def get_populated_sites(
425 |         self,
426 |         fragment,
427 |         site_index="all",
428 |         sample_rotation=True,
429 |         mode="heuristic",
430 |         conformers_per_site_cap=None,
431 |         overlap_thr=1.5,
432 |         verbose=False,
433 |     ):
434 |         """
435 |         Populates the specified sites with the given fragment, optimizing the orientation to minimize overlap.
436 | 
437 |         Parameters:
438 |         fragment (object): An object containing the fragment to be attached.
439 |         site_index (str or int): The index of the site to be populated. Default is 'all'.
440 |         sample_rotation (bool): Whether to sample different rotations of the fragment. Default is True.
441 |         mode (str): The mode of operation. Can be 'heuristic' or 'all'. Default is 'heuristic'.
442 |         conformers_per_site_cap (int or None): The maximum number of conformers per site. Default is None.
443 |         overlap_thr (float): The overlap threshold. Default is 1.5.
444 |         verbose (bool): Whether to print detailed information during execution. Default is False.
445 | 
446 |         Returns:
447 |         list: A list containing the optimized atoms objects for each site.
448 | 
449 |         Raises:
450 |         ValueError: If the mode is not implemented or if the fragment object is invalid.
451 |         """
452 | 
453 |         all_sites = {}
454 |         site_df = self.site_df
455 | 
456 |         if mode.lower() == "all":
457 |             sites = [site_df.loc[i].to_dict() for i in site_df.index.values]
458 | 
459 |         elif mode.lower() == "heuristic":
460 |             all_sites["S1S"] = [
461 |                 site_df.loc[i].to_dict()
462 |                 for i in site_df[site_df.connectivity > 1].index.values
463 |             ]
464 |             all_sites["Cl"] = [
465 |                 site_df.loc[i].to_dict()
466 |                 for i in site_df[site_df.connectivity == 1].index.values
467 |             ]
468 | 
469 |             if fragment.smile[:3] == "S1S":
470 |                 sites = all_sites["S1S"]
471 | 
472 |             if fragment.smile[:2] == "Cl":
473 |                 sites = all_sites["Cl"]
474 | 
475 |         else:
476 |             raise ValueError("argument 'mode' can be 'heuristic' or 'all'")
477 |             return
478 | 
479 |         if sample_rotation:
480 |             conformers = []
481 |             for i, _ in enumerate(fragment.conformers):
482 |                 c = fragment.get_conformer(i)
483 |                 if c.info["smiles"][:3] == "S1S":
484 |                     angles = [0, 180]
485 |                 if c.info["smiles"][:2] == "Cl":
486 |                     angles = [a for a in range(0, 360, 45)]
487 |                 for a in angles:
488 |                     ca = c.copy()
489 |                     ca.rotate(a, "z")
490 |                     conformers.append(ca)
491 |         else:
492 |             conformers = [c.copy() for c in fragment.conformers]
493 | 
494 |         out_trj = []
495 | 
496 |         if verbose:
497 |             print("conformers", len(conformers))
498 |             print("sites", len(sites))
499 | 
500 |         for site in sites:
501 |             c_trj = []
502 |             for conformer in conformers:
503 |                 c_trj += conformer_to_site(
504 |                     self.atoms, site, conformer, mode="optimize", overlap_thr=0
505 |                 )  # the zero is intentional
506 | 
507 |             if conformers_per_site_cap != None:
508 |                 c_trj = [atoms for atoms in c_trj if atoms.info["mdf"] > overlap_thr]
509 | 
510 |                 if len(c_trj) > 1:
511 |                     c_trj = get_sorted_by_snap_dist(c_trj)[
512 |                         : int(np.min([conformers_per_site_cap, len(c_trj)]))
513 |                     ]
514 |                 elif len(c_trj) == 0:
515 |                     c_trj = []
516 |                 else:
517 |                     pass
518 | 
519 |                 if len(c_trj) < conformers_per_site_cap and verbose:
520 |                     print(
521 |                         f"WARNING: Failed to find requested number of conformers with condition: ovelap_thr = {overlap_thr}. Found {len(c_trj)} / {conformers_per_site_cap}. Consider setting a higher Fragment(to_initialize = < N >)"
522 |                     )
523 |                 if len(c_trj) == conformers_per_site_cap and verbose:
524 |                     print(
525 |                         f"SUCCESS! Found the requested numer of conformers with condition: ovelap_thr = {overlap_thr}. Found {len(c_trj)} / {conformers_per_site_cap}."
526 |                     )
527 | 
528 |                 for atoms in c_trj:
529 |                     atoms.info["adsorbate_info"] = {}
530 |                     atoms.info["adsorbate_info"]["site"] = make_site_info_writable(site)
531 |                     atoms.info["adsorbate_info"]["smiles"] = fragment.smile
532 |                     atoms.info["adsorbate_info"]["mdf"] = atoms.info.pop("mdf")
533 |                     formula = atoms[
534 |                         [
535 |                             atom.index
536 |                             for atom in atoms
537 |                             if atoms.arrays["fragments"][atom.index]
538 |                             == max(atoms.arrays["fragments"])
539 |                         ]
540 |                     ].get_chemical_formula()
541 |                     atoms.info["adsorbate_info"]["adsorbate_formula"] = formula
542 | 
543 |                 out_trj += c_trj
544 | 
545 |         return out_trj
546 | 
547 | 
548 | def _get_marker_map(atoms: Atoms) -> Dict[int, int]:
549 |     """
550 |     Generates a marker map for visualizing the surface.
551 | 
552 |     Args:
553 |         atoms (Atoms): The ASE Atoms object representing the surface.
554 | 
555 |     Returns:
556 |         Dict[int, int]: A dictionary mapping atomic numbers to marker atomic numbers.
557 |     """
558 |     marker_map = {}
559 |     at_no = set(atoms.get_atomic_numbers())
560 |     for atomic_number in at_no:
561 |         done = False
562 |         swap_atomic_number = atomic_number + 1
563 |         while not done:
564 |             if swap_atomic_number not in at_no:
565 |                 done = True
566 |             else:
567 |                 swap_atomic_number += 1
568 |         marker_map[atomic_number] = swap_atomic_number
569 | 
570 |     for k, v in marker_map.items():
571 |         print(
572 |             f"Visualizing surface {ase.symbols.chemical_symbols[k]} atoms as {ase.symbols.chemical_symbols[v]}"
573 |         )
574 |         # print(f'Visualizing surface {ase.symbols.symbols([k]).get_chemical_formula()} atoms as {ase.symbols.symbols([v]).get_chemical_formula()}')
575 |     return marker_map
576 | 
577 | 
578 | class ActiveSite(Surface):
579 |     """
580 |     Base class for initializing reaction fragments.
581 | 
582 |     Attributes:
583 |         atoms (Atoms): The ASE Atoms object representing the surface.
584 |         must_include (List[int]): List of atom indices that must be included in the active site.
585 |         must_exclude (List[int]): List of atom indices that must be excluded from the active site.
586 |         keep_tops (bool): Whether to keep top sites.
587 |     """
588 | 
589 |     def __init__(
590 |         self,
591 |         atoms: Atoms,
592 |         must_include: List[int] = [],
593 |         must_exclude: List[int] = [],
594 |         keep_tops: bool = True,
595 |     ):
596 |         """
597 |         Initialize attributes.
598 | 
599 |         Args:
600 |             atoms (Atoms): The ASE Atoms object representing the surface.
601 |             must_include (List[int], optional): List of atom indices that must be included in the active site. Defaults to [].
602 |             must_exclude (List[int], optional): List of atom indices that must be excluded from the active site. Defaults to [].
603 |             keep_tops (bool, optional): Whether to keep top sites. Defaults to True.
604 |         """
605 |         super().__init__(atoms)
606 |         self.must_include = must_include
607 |         self.must_exclude = must_exclude
608 |         self.keep_tops = keep_tops
609 |         self.pop_sites()
610 | 
611 |     def pop_sites(self):
612 |         """
613 |         Filters the sites based on must_include and must_exclude lists.
614 |         """
615 |         if not self.must_include:
616 |             return
617 | 
618 |         include_filter = np.array(
619 |             [
620 |                 any(i in v for i in self.must_include)
621 |                 for v in self.site_df.topology.values
622 |             ]
623 |         )
624 | 
625 |         for e in self.must_exclude:
626 |             include_filter &= np.array(
627 |                 [e not in v for v in self.site_df.topology.values]
628 |             )
629 | 
630 |         if self.keep_tops:
631 |             temp_site_df = self.site_df[include_filter]
632 |             tops = list(set(t for v in temp_site_df.topology.values for t in v))
633 | 
634 |             for i, v in enumerate(self.site_df.topology.values):
635 |                 if v in [[t] for t in tops]:
636 |                     include_filter[i] = True
637 | 
638 |         self.site_df = self.site_df[include_filter]
639 |         self.site_dict = self.site_df.to_dict(orient="list")
640 | 


--------------------------------------------------------------------------------
/autoadsorbate/plotting.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | import seaborn as sns
  4 | from ase.visualize.plot import plot_atoms
  5 | 
  6 | from autoadsorbate.utils import count_C_next_to_O
  7 | 
  8 | 
  9 | def gaussian(x, mu, sig):
 10 |     return (
 11 |         1.0
 12 |         / (np.sqrt(2.0 * np.pi) * sig)
 13 |         * np.exp(-np.power((x - mu) / sig, 2.0) / 2)
 14 |         / 100
 15 |     )
 16 | 
 17 | 
 18 | def normalize_energy_values(energy_values, mode="intergal"):
 19 |     if mode.lower() == "integral":
 20 |         energy_values = energy_values / np.sum(energy_values)
 21 |     elif mode.lower() == "max":
 22 |         energy_values = energy_values / np.max(energy_values)
 23 |     else:
 24 |         raise ValueError("normalize_energy_values supports modes: 'integral', 'max'")
 25 |     return energy_values
 26 | 
 27 | 
 28 | def get_gaussian_vector(
 29 |     e,
 30 |     std=0.05,
 31 |     e_min=-0.2,
 32 |     e_max=3,
 33 |     resolution=0.01,
 34 |     normalize=False,
 35 |     normalize_mode="integral",
 36 | ):
 37 |     energy_range = np.linspace(e_min, e_max, int((e_max - e_min) / resolution))
 38 |     energy_values = np.zeros(len(energy_range))
 39 |     for i, energy in enumerate(energy_range):
 40 |         energy_values[i] = gaussian(energy, e, std)
 41 | 
 42 |     if normalize:
 43 |         energy_values = normalize_energy_values(energy_values, mode=normalize_mode)
 44 |     return energy_range, energy_values
 45 | 
 46 | 
 47 | def get_gaussian_vectors(
 48 |     energies,
 49 |     std=0.05,
 50 |     e_min=-0.2,
 51 |     e_max=3,
 52 |     resolution=0.01,
 53 |     normalize=True,
 54 |     normalize_mode="integral",
 55 | ):
 56 |     energy_range = np.linspace(e_min, e_max, int((e_max - e_min) / resolution))
 57 |     energy_values = np.zeros(len(energy_range))
 58 |     for e in energies:
 59 |         energy_values += get_gaussian_vector(
 60 |             e, std=std, e_min=e_min, e_max=e_max, resolution=resolution
 61 |         )[1]
 62 | 
 63 |     if normalize:
 64 |         energy_values = normalize_energy_values(energy_values, mode=normalize_mode)
 65 | 
 66 |     return energy_range, energy_values
 67 | 
 68 | 
 69 | def energy_descriptor_from_slice(
 70 |     df_slice,
 71 |     column="energy_calibrated",
 72 |     std=0.05,
 73 |     e_min="auto",
 74 |     e_max="auto",
 75 |     resolution="auto",
 76 |     normalize=True,
 77 |     normalize_mode="integral",
 78 | ):
 79 |     if e_min == "auto":
 80 |         e_min = df_slice[column].min() - 5 * std
 81 | 
 82 |     if e_max == "auto":
 83 |         e_max = df_slice[column].max() + 5 * std
 84 | 
 85 |     if resolution == "auto":
 86 |         resolution = std / 7
 87 | 
 88 |     energy_range, energy_values = get_gaussian_vectors(
 89 |         df_slice[column].values,
 90 |         std=std,
 91 |         e_min=e_min,
 92 |         e_max=e_max,
 93 |         resolution=resolution,
 94 |         normalize=normalize,
 95 |         normalize_mode=normalize_mode,
 96 |     )
 97 | 
 98 |     return energy_range, energy_values
 99 | 
100 | 
101 | def filter_xdf(xdf, relaxed_traj):
102 |     _xdf = xdf[
103 |         (xdf["energy"] > -100)
104 |         & (xdf["energy"] < 40)
105 |         & (xdf.bond_change == 0)
106 |         & xdf.backbone_formula.isin(["C", "C2", "C2O", "CO", "CO2", "O", "O2"])
107 |         # & (xdf.origin == 'aads')
108 |         # & (xdf.H < 7)
109 |     ]
110 | 
111 |     if "array_from_ocp" in _xdf.columns:
112 |         _xdf.pop("array_from_ocp")
113 | 
114 |     C_bonds_O = []
115 |     for i in _xdf.traj_index.values:
116 |         sx = count_C_next_to_O(relaxed_traj[i])
117 |         C_bonds_O.append(sx)
118 |     _xdf["C_bonds_O"] = C_bonds_O
119 | 
120 |     backbone = []
121 |     for i in _xdf.index.values:
122 |         f = _xdf.backbone_formula.loc[i]
123 |         if f == "C2O" and (_xdf.loc[i]["C_bonds_O"] == 2):
124 |             f = "COC"
125 |         backbone.append(f)
126 |     _xdf["backbone"] = backbone
127 | 
128 |     # set H_max for each backbone manually
129 |     map_H_max = {"C": 3, "C2": 6, "C2O": 6, "CO": 4, "CO2": 4, "O": 2, "O2": 2}
130 |     map_backbone = dict(
131 |         [(v, i) for i, v in enumerate(sorted(list(_xdf.backbone.unique())))]
132 |     )
133 | 
134 |     # make plot float for plotting energy verus multiple in 2d
135 |     map_origin = {"aads": 0, "ocp": 1}
136 |     _xdf["plot_float"] = [
137 |         map_backbone[_xdf.backbone.values[i]]
138 |         + _xdf.H.values[i] / 20
139 |         + map_origin[_xdf.origin.values[i]] * 0.5
140 |         for i in range(len(_xdf))
141 |     ]
142 |     _xdf["plot_float"] = _xdf["plot_float"] * 0.2
143 |     _xdf["H_max"] = [
144 |         map_H_max[_xdf.backbone_formula.values[i]] for i in range(len(_xdf))
145 |     ]
146 |     _xdf = _xdf[_xdf["H"] <= _xdf["H_max"]]
147 | 
148 |     # make reference energy
149 |     _xdf["calibrate_keys"] = _xdf.backbone + "-H" + _xdf.H.astype(int).astype(str)
150 | 
151 |     set_zero_dict = {}
152 | 
153 |     for k, v in dict(_xdf.groupby(["backbone", "H"]).energy.min()).items():
154 |         key = f"{k[0]}-H{int(k[1])}"
155 |         set_zero_dict[key] = v
156 | 
157 |     group_ref_energy = []
158 |     for k in _xdf["calibrate_keys"]:
159 |         group_ref_energy.append(set_zero_dict[k])
160 | 
161 |     _xdf["group_ref_energy"] = group_ref_energy
162 |     _xdf["energy_calibrated"] = _xdf["energy"] - _xdf["group_ref_energy"]
163 | 
164 |     print(f"remaining values in DF: {len(_xdf)}")
165 |     return _xdf
166 | 
167 | 
168 | def center_fragment_in_cell(atoms, fragment_inds):
169 |     a = atoms.copy()
170 |     fragment = atoms.copy()[
171 |         [
172 |             atom.index
173 |             for atom in atoms
174 |             if atoms.arrays["fragments"][atom.index] in fragment_inds
175 |         ]
176 |     ]
177 |     fragment_center = fragment.get_center_of_mass()
178 |     fragment_center[2] = 0
179 |     a.positions += -fragment_center + (a.cell[0] + a.cell[1]) / 2
180 |     a.wrap()
181 |     return a
182 | 
183 | 
184 | def get_fragment_center(atoms, fragment_index):
185 |     a = atoms.copy()
186 |     a = a[
187 |         [
188 |             atom.index
189 |             for atom in atoms
190 |             if a.arrays["fragments"][atom.index] == fragment_index
191 |         ]
192 |     ]
193 |     center = []
194 |     for i in [0, 1, 2]:
195 |         center.append(
196 |             (np.max(a.positions[:, i]) - np.min(a.positions[:, i])) * 0.5
197 |             + np.min(a.positions[:, i])
198 |         )
199 |     return np.array(center)
200 | 
201 | 
202 | def plot_most_stable(_xdf, relaxed_traj):
203 |     fig, axs = plt.subplots(
204 |         ncols=len(_xdf.H.unique()), nrows=len(_xdf.backbone.unique()), figsize=[10, 8]
205 |     )
206 | 
207 |     _xdf = _xdf.sort_values(by=["H", "backbone"])
208 | 
209 |     view_atoms = []
210 | 
211 |     for i, backbone in enumerate(_xdf.backbone.unique()):
212 |         for j, H in enumerate(_xdf.H.unique()):
213 |             ax = axs[i, j]
214 | 
215 |             df_slice = _xdf[_xdf.H.isin([H]) & _xdf.backbone.isin([backbone])]
216 |             # df_slice.sort_values(by=['energy', 'backbone', 'H'],ascending=True)
217 |             df_slice = df_slice[df_slice.energy == df_slice.energy.min()]
218 | 
219 |             if len(df_slice) > 0:
220 |                 e = np.round(df_slice.iloc[0].energy, 2)
221 |                 origin = df_slice.iloc[0].origin
222 |                 traj_index = df_slice.iloc[0].traj_index
223 | 
224 |                 atoms = relaxed_traj[traj_index].copy()
225 |                 atoms_center = get_fragment_center(atoms, fragment_index=1)
226 |                 atoms_center[2] = 0
227 |                 half_cell = atoms.cell[1] * 0.5 + atoms.cell[0] * 0.5
228 |                 atoms.positions += -atoms_center + half_cell
229 |                 atoms.wrap()
230 |                 # atoms.positions -= half_cell
231 | 
232 |                 plot_atoms(atoms, ax, rotation=("0x,0y,0z"), show_unit_cell=0)
233 |                 # ax.set_title(atoms.info['adsorbate_info']['smiles'], size=8)
234 |                 ax.set_title(df_slice.smiles.iloc[0], size=8)
235 | 
236 |             ax.set_axis_off()
237 |             # x = cell[0][0] + cell[1][0]
238 |             # y = cell[0][1] + cell[1][1]
239 |             ax.set_xlim(half_cell[0] - 3, half_cell[0] + 3)
240 |             ax.set_ylim(half_cell[1] - 3, half_cell[1] + 3)
241 | 
242 |             view_atoms.append(atoms)
243 | 
244 |     # fig.set_layout_engine(layout='tight')
245 |     plt.tight_layout(pad=0.01, w_pad=0.4, h_pad=0.01)
246 | 
247 | 
248 | def make_hist_plot(_xdf):
249 |     fig, axs = plt.subplots(
250 |         ncols=len(_xdf.H.unique()),
251 |         nrows=len(_xdf.backbone.unique()),
252 |         figsize=[10, 10],
253 |         sharex=True,
254 |         sharey=True,
255 |     )
256 | 
257 |     _xdf = _xdf.sort_values(by=["H", "backbone"], ascending=True)
258 | 
259 |     view_atoms = []
260 | 
261 |     for i, backbone in enumerate(_xdf.backbone.unique()):
262 |         for j, H in enumerate(_xdf.H.unique()):
263 |             ax = axs[i, j]
264 | 
265 |             df_slice = _xdf[_xdf.H.isin([H]) & _xdf.backbone.isin([backbone])]
266 |             # df_slice.sort_values(by=['energy', 'backbone', 'H'],ascending=True)
267 |             # df_slice=df_slice[df_slice.energy==df_slice.energy.min()]
268 | 
269 |             if len(df_slice) > 0:
270 |                 sns.histplot(df_slice, x="energy_calibrated", ax=ax, bins=3, kde=True)
271 |                 ax.set_title(df_slice.calibrate_keys.values[0], size=8)
272 |                 # ax.set_ylim(0, 150)
273 |                 ax.tick_params(axis="x", labelsize=6)
274 |                 ax.tick_params(axis="y", labelsize=6)
275 | 
276 |             else:
277 |                 ax.set_axis_off()
278 |             # ax.set_xlim(1, 6)
279 | 
280 |             # x = cell[0][0] + cell[1][0]
281 |             # y = cell[0][1] + cell[1][1]
282 |             # ax.set_xlim(x+2, x+9)
283 |             # ax.set_ylim(y-3, y+3)
284 | 
285 |             # view_atoms.append(atoms)
286 | 
287 |     # fig.set_layout_engine(layout='tight')
288 |     plt.tight_layout(pad=0.01, w_pad=0.4, h_pad=0.01)
289 | 
290 | 
291 | def plot_energy_heatmap(
292 |     _xdf,
293 |     column,
294 |     std,
295 |     e_min,
296 |     e_max,
297 |     resolution,
298 |     normalize,
299 |     return_heatmap=False,
300 |     T=False,
301 |     cmap="viridis",
302 |     normalize_mode="max",
303 |     ax=None,
304 | ):
305 |     heat_map = []
306 |     yticklabels = []
307 | 
308 |     for i, backbone in enumerate(_xdf.backbone.unique()):
309 |         for j, H in enumerate(_xdf.H.unique()):
310 |             df_slice = _xdf[_xdf.H.isin([H]) & _xdf.backbone.isin([backbone])]
311 |             if len(df_slice) > 0:
312 |                 v = energy_descriptor_from_slice(
313 |                     df_slice,
314 |                     column=column,
315 |                     std=std,
316 |                     e_min=e_min,
317 |                     e_max=e_max,
318 |                     resolution=resolution,
319 |                     normalize=normalize,
320 |                     normalize_mode=normalize_mode,
321 |                 )
322 |                 heat_map.append(v[1])
323 |                 yticklabels.append(df_slice.calibrate_keys.values[0])
324 |     heat_map = np.array(heat_map)
325 | 
326 |     xticklabels = []
327 |     wanted_labels = np.arange(-10, 10, 0.4)
328 |     for i, e in enumerate(v[0]):
329 |         if any(np.abs(e - wanted_labels) < 1e-2):
330 |             label = str(np.round(e, 1))
331 |             if label not in xticklabels + ["-0.0"]:
332 |                 xticklabels.append(label)
333 |             else:
334 |                 xticklabels.append("")
335 | 
336 |         else:
337 |             xticklabels.append("")
338 | 
339 |     if ax == None:
340 |         fig = plt.figure()
341 |         ax = fig.add_subplot(111)
342 |     if T == False:
343 |         sns.heatmap(
344 |             heat_map,
345 |             xticklabels=xticklabels,
346 |             yticklabels=yticklabels,
347 |             cbar=False,
348 |             ax=ax,
349 |         )
350 |         for i in range(heat_map.shape[0] + 1):
351 |             ax.axhline(i, color="white", lw=2)
352 | 
353 |     else:
354 |         ax = sns.heatmap(
355 |             heat_map.T,
356 |             xticklabels=yticklabels,
357 |             yticklabels=xticklabels,
358 |             cbar=False,
359 |             cmap=cmap,
360 |             ax=ax,
361 |         )
362 |         for i in range(heat_map.shape[1] + 1):
363 |             ax.axvline(i, color="white", lw=0.5)
364 | 
365 |     ax.tick_params(axis="both", which="both", length=0)
366 |     ax.tick_params(axis="x", labelsize=6)
367 |     ax.tick_params(axis="y", labelsize=6)
368 |     ax.invert_yaxis()
369 | 
370 |     if return_heatmap:
371 |         return heat_map
372 | 


--------------------------------------------------------------------------------
/autoadsorbate/popneb.py:
--------------------------------------------------------------------------------
  1 | import uuid
  2 | from tempfile import NamedTemporaryFile
  3 | from typing import List
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | from ase.io import read, write
  8 | from ase.mep import NEB, NEBTools
  9 | from ase.optimize import BFGS, FIRE
 10 | 
 11 | 
 12 | class popNEB:
 13 |     """
 14 |     A class to perform Nudged Elastic Band (NEB) calculations in
 15 |     "popped" segments, by relaxing each image allong the
 16 |     initial chain and running NEB chains between local minima.
 17 | 
 18 |     Attributes:
 19 |     images (List[str]): A list of file paths to the initial images.
 20 |     n_ini_chain (int): The number of initial chains.
 21 |     n_chain (int): The number of chains.
 22 |     max_steps (int): The maximum number of optimization steps.
 23 |     f_max (float): The maximum force convergence criterion.
 24 |     ini_chain_steps (int): The number of steps for the initial chain.
 25 |     compare_mode (str): The mode of comparison for structures. Default is 'cartesian'.
 26 |     compare_threshold (float): The threshold for comparison.
 27 |     compare_kwargs (dict): Additional keyword arguments for comparison.
 28 |     calc (object): The calculator to be used for NEB calculations.
 29 |     calc_kwargs (dict): Additional keyword arguments for the calculator.
 30 |     store_all_structures (bool): Whether to store all structures during the calculation.
 31 | 
 32 |     Methods:
 33 |     __init__: Initializes the popNEB class with the provided parameters.
 34 |     """
 35 | 
 36 |     def __init__(
 37 |         self,
 38 |         images: List[str] = [],
 39 |         n_ini_chain=10,
 40 |         n_chain: int = 10,
 41 |         max_steps: int = 100,
 42 |         f_max: float = 0.05,
 43 |         ini_chain_steps: int = 5,
 44 |         compare_mode="cartesian",
 45 |         compare_threshold=0.3,
 46 |         compare_kwargs={},
 47 |         calc=None,
 48 |         calc_kwargs={},
 49 |         store_all_structures=True,
 50 |     ):
 51 |         self._id = str(uuid.uuid1())
 52 |         self.store_all_structures = store_all_structures
 53 |         self.images = images
 54 |         self.n_chain = n_chain
 55 |         self.n_ini_chain = n_ini_chain
 56 |         self.max_steps = max_steps
 57 |         self.f_max = f_max
 58 |         self.ini_chain_steps = ini_chain_steps
 59 |         self.compare_mode = compare_mode
 60 |         self.compare_threshold = compare_threshold
 61 |         self.compare_kwargs = compare_kwargs
 62 |         self.images_original = self.images.copy()
 63 |         self.calc = calc
 64 |         self.calc_kwargs = calc_kwargs
 65 |         self.minima_allong_chain = None
 66 |         self.neb_path = None
 67 |         self.results = {}
 68 |         self.tmp_file = NamedTemporaryFile(suffix=".xyz")
 69 | 
 70 |     def save(self, path="./", fname="none"):
 71 |         write(path + f"{self._id}.xyz", self.result_traj)
 72 | 
 73 |     def get_unique_images(self, images=None):
 74 |         """
 75 |         Saves the result trajectory to a file.
 76 | 
 77 |         Parameters:
 78 |         path (str): The directory path where the result file will be saved.
 79 | 
 80 |         Returns:
 81 |         None
 82 |         """
 83 |         if self.compare_mode.lower() == "cartesian":
 84 |             if images == None:
 85 |                 return compare_cartesian(self.images, self.compare_threshold)
 86 |             else:
 87 |                 return compare_cartesian(images, self.compare_threshold)
 88 | 
 89 |         if self.compare_mode.lower() == "default":
 90 |             print("Using default SOAP and clustering to determine unique minima!")
 91 |             if images == None:
 92 |                 return compare_soap_default(self.images)
 93 |             else:
 94 |                 unique_trj, self.best_clustering = compare_soap_default(images)
 95 |                 print(f"Found {len(unique_trj)} unique local minima!")
 96 |                 return unique_trj
 97 | 
 98 |         else:
 99 |             print(f"compare_mode: '{self.compare_mode}' not implemented ")
100 | 
101 |     def interpolate_pair(self, initial, final, n_chain):
102 |         """
103 |         Generates a series of images for the Nudged Elastic Band (NEB) method by interpolating between initial and final structures.
104 | 
105 |         Parameters:
106 |         initial (object): An ASE atoms object representing the initial structure.
107 |         final (object): An ASE atoms object representing the final structure.
108 |         n_chain (int): The number of intermediate images to generate.
109 | 
110 |         Returns:
111 |         list: A list of ASE atoms objects representing the interpolated images.
112 |         """
113 |         images = [initial.copy()]
114 |         for i in range(n_chain):
115 |             image = initial.copy()
116 |             images.append(image)
117 |         images.append(final)
118 |         neb = NEB(images)
119 |         neb.interpolate()
120 |         return neb.images
121 | 
122 |     def run_few_steps(self, initial, final):
123 |         """
124 |         Runs a few steps of the NEB (Nudged Elastic Band) method between the initial and final structures.
125 | 
126 |         Parameters:
127 |         initial (object): An ASE atoms object representing the initial structure.
128 |         final (object): An ASE atoms object representing the final structure.
129 | 
130 |         Returns:
131 |         list: A list of ASE atoms objects representing the trajectory after running a few NEB steps.
132 |         """
133 |         dyn_index = "iniNEB"
134 |         few_step_trj = self.run_neb(
135 |             initial,
136 |             final,
137 |             self.ini_chain_steps,
138 |             self.n_ini_chain,
139 |             dyn_index=dyn_index,
140 |             neb_type="iniNEB",
141 |         )
142 |         return few_step_trj
143 | 
144 |     def run_neb(self, initial, final, max_steps, n_chain, dyn_index, neb_type="NEB"):
145 |         """
146 |         Runs the NEB (Nudged Elastic Band) method between the initial and final structures.
147 | 
148 |         Parameters:
149 |         initial (object): An ASE atoms object representing the initial structure.
150 |         final (object): An ASE atoms object representing the final structure.
151 |         max_steps (int): The maximum number of optimization steps.
152 |         n_chain (int): The number of intermediate images to generate.
153 |         dyn_index (str): The index for the dynamics observer.
154 |         neb_type (str): The type of NEB calculation. Default is 'NEB'.
155 | 
156 |         Returns:
157 |         list: A list of ASE atoms objects representing the NEB images after running the NEB method.
158 |         """
159 |         images = self.interpolate_pair(initial, final, n_chain)
160 |         images = self.set_calculator(images, self.calc)
161 | 
162 |         neb = NEB(images)
163 |         qn = BFGS(neb)
164 |         qn.attach(
165 |             neb_observer,
166 |             dyn=qn,
167 |             dyn_index=dyn_index,
168 |             uid=self._id,
169 |             traj_file=self.tmp_file.name,
170 |             interval=1,
171 |         )
172 |         qn.run(fmax=self.f_max, steps=max_steps)
173 |         if "ini" not in neb_type:
174 |             self.update_result()
175 |         return neb.images
176 | 
177 |     def relax_images(self, images):
178 |         """
179 |         Relaxes a series of images using the FIRE optimizer.
180 | 
181 |         Parameters:
182 |         images (list): A list of ASE atoms objects representing the images to be relaxed.
183 | 
184 |         Returns:
185 |         list: A list of ASE atoms objects representing the relaxed images.
186 |         """
187 |         relax_chain = images.copy()
188 |         self.set_calculator(relax_chain, self.calc)
189 |         for i, atoms in enumerate(relax_chain):
190 |             dyn = FIRE(atoms, trajectory=None)
191 |             dyn.attach(
192 |                 relax_observer,
193 |                 dyn=dyn,
194 |                 dyn_index=i,
195 |                 uid=self._id,
196 |                 traj_file=self.tmp_file.name,
197 |             )
198 |             dyn.run(fmax=self.f_max, steps=self.max_steps)
199 | 
200 |         self.update_result()
201 |         return relax_chain
202 | 
203 |     def set_calculator(self, images):
204 |         """
205 |         Sets the calculator for a series of images.
206 | 
207 |         Parameters:
208 |         images (list): A list of ASE atoms objects representing the images.
209 |         calculator (object): The calculator to be set for each image.
210 | 
211 |         Returns:
212 |         list: A list of ASE atoms objects with the calculator set.
213 |         """
214 |         for a in images:
215 |             calc = self.get_calculator()
216 |             a.calc = calc
217 |         return images
218 | 
219 |     def get_calculator(self):
220 |         """
221 |         Returns the calculator object based on the specified calculator name in the configuration.
222 | 
223 |         Returns:
224 |         object: The calculator object to be used for NEB calculations.
225 | 
226 |         Raises:
227 |         ValueError: If the specified calculator name is not supported.
228 |         """
229 |         if self.calc["name"].lower() == "emt":
230 |             from ase.calculators.emt import EMT
231 | 
232 |             return EMT()
233 | 
234 |         elif self.calc["name"].lower() == "mace":
235 |             from mace.calculators import mace_mp
236 | 
237 |             calc = mace_mp(
238 |                 model=self.calc["path"],
239 |                 dispersion=False,
240 |                 default_dtype="float64",
241 |                 device="cpu",
242 |                 **self.calc_kwargs,
243 |             )
244 |             return calc
245 | 
246 |         else:
247 |             print(f"not supported: {self.calc}")
248 | 
249 |     def run(self):
250 |         self.get_minima_allong_chain()
251 |         self.run_neb_from_minima()
252 | 
253 |     def get_minima_allong_chain(self):
254 |         """
255 |         Finds and relaxes unique images along the NEB chain to identify local minima.
256 | 
257 |         Returns:
258 |         None
259 |         """
260 |         unique_images = self.get_unique_images()
261 | 
262 |         c_relaxed = []
263 |         for a1, a2 in zip(unique_images, unique_images[1:]):
264 |             c_images = self.run_few_steps(a1, a2)
265 |             c_relaxed += self.relax_images(c_images)
266 | 
267 |         self.minima_allong_chain = c_relaxed
268 |         self.minima_allong_chain = self.get_unique_images(c_relaxed)
269 | 
270 |     def run_neb_from_minima(self):
271 |         """
272 |         Runs the NEB (Nudged Elastic Band) method between local minima identified along the NEB chain.
273 | 
274 |         Returns:
275 |         None
276 |         """
277 |         if self.minima_allong_chain == None:
278 |             print("run function: get_minima_allong_chain()")
279 |             return
280 | 
281 |         self.neb_path = []
282 |         dyn_index = 0
283 |         for a1, a2 in zip(self.minima_allong_chain, self.minima_allong_chain[1:]):
284 |             self.neb_path += self.run_neb(
285 |                 a1, a2, self.max_steps, self.n_chain, dyn_index
286 |             )
287 |             dyn_index += 1
288 | 
289 |     def update_result(self):
290 |         """
291 |         Updates the result trajectory and result DataFrame with information from the temporary file.
292 | 
293 |         Returns:
294 |         None
295 |         """
296 |         result_df = []
297 |         if type(self.tmp_file) == str:
298 |             self.result_traj = read(self.tmp_file, index=":")
299 |         else:
300 |             self.result_traj = read(self.tmp_file.name, index=":")
301 |         for a in self.result_traj:
302 |             result_df.append(a.info["popneb_info"])
303 |         result_df = pd.DataFrame(result_df)
304 |         result_df["converged"] = [
305 |             max_f < self.f_max for max_f in result_df.max_f.values
306 |         ]
307 |         self.result_df = result_df
308 | 
309 |     def show_neb_report(self, x_scale=1, y_scale=1.5, converged=[True]):
310 |         """
311 |         Generates and displays a NEB (Nudged Elastic Band) report plot.
312 | 
313 |         Parameters:
314 |         x_scale (float): The scaling factor for the x-axis of the plot. Default is 1.
315 |         y_scale (float): The scaling factor for the y-axis of the plot. Default is 1.5.
316 |         converged (list): A list of boolean values indicating whether to include converged results. Default is [True].
317 | 
318 |         Returns:
319 |         None
320 |         """
321 |         import matplotlib.pyplot as plt
322 |         from ase.visualize.plot import plot_atoms
323 |         from matplotlib.gridspec import GridSpec
324 | 
325 |         type = "NEB"
326 |         pdf = self.result_df[
327 |             self.result_df.type.isin([type]) & self.result_df.converged.isin(converged)
328 |         ]
329 | 
330 |         fig_size_x = len(pdf.image_index.unique()) * x_scale
331 |         fig_size_y = len(pdf.dyn_index.unique()) * y_scale
332 | 
333 |         fig = plt.figure(layout="constrained", figsize=[fig_size_x, fig_size_y])
334 |         gs = GridSpec(
335 |             len(pdf.dyn_index.unique()) + 1, len(pdf.image_index.unique()), figure=fig
336 |         )
337 | 
338 |         ax1 = fig.add_subplot(gs[0, :])
339 | 
340 |         _x = []
341 |         _y = []
342 |         _c = []
343 | 
344 |         for i in pdf.dyn_index.unique():
345 |             _df = pdf[pdf.dyn_index == i]
346 |             _x += [x * 9 + i * 10 for x in _df.image_index.values / len(_df)]
347 |             _y += list(_df.e.values)
348 |             _c += [i for v in _df.dyn_index.values]
349 | 
350 |             structures = [self.result_traj[j].copy() for j in _df.index.values]
351 | 
352 |             for j in _df.image_index.values:
353 |                 j = int(j)
354 |                 ax = fig.add_subplot(gs[i + 1, j])
355 |                 _a = structures[j]
356 |                 plot_atoms(_a, ax=ax)
357 |                 ax.set_axis_off()
358 | 
359 |         ax1.scatter(_x, _y, c=_c)
360 |         ax1.set_xticks([])
361 | 
362 |         fig.suptitle(type)
363 |         fig.set_layout_engine(layout="tight")
364 | 
365 |         plt.show()
366 | 
367 |     def show_relax_report(self, x_scale=1, y_scale=4):
368 |         """
369 |         Generates and displays a relaxation report plot.
370 | 
371 |         Parameters:
372 |         x_scale (float): The scaling factor for the x-axis of the plot. Default is 1.
373 |         y_scale (float): The scaling factor for the y-axis of the plot. Default is 4.
374 | 
375 |         Returns:
376 |         None
377 |         """
378 |         import matplotlib.pyplot as plt
379 |         from ase.visualize.plot import plot_atoms
380 |         from matplotlib.gridspec import GridSpec
381 | 
382 |         type = "RELAX"
383 |         pdf = self.result_df[self.result_df.type.isin([type])]
384 | 
385 |         fig_size_x = len(pdf.dyn_index.unique()) * x_scale
386 |         fig_size_y = 1 * y_scale
387 | 
388 |         fig = plt.figure(layout="constrained", figsize=[fig_size_x, fig_size_y])
389 |         gs = GridSpec(2, len(pdf.dyn_index.unique()), figure=fig)
390 | 
391 |         ax1 = fig.add_subplot(gs[0, :])
392 | 
393 |         cmap = {}
394 |         for i, cluster in enumerate(self.best_clustering):
395 |             cmap[i] = cluster
396 | 
397 |         _x = []
398 |         _y = []
399 |         _c = []
400 |         structures = []
401 | 
402 |         for i in pdf.dyn_index.unique():
403 |             _df = pdf[pdf.dyn_index == i]
404 |             _x += [x * 9 + i * 10 for x in _df.nsteps / len(_df)]
405 |             _y += list(_df.e.values)
406 |             _c += [cmap[v] for v in _df.dyn_index.values]
407 | 
408 |             structure_inds = _df[_df.converged == True].index.values
409 |             if len(structure_inds) == 1:
410 |                 structure_i = structure_inds[0]
411 | 
412 |             structures.append(self.result_traj[structure_i].copy())
413 | 
414 |         ax1.scatter(_x, _y, c=_c)
415 |         ax1.set_xticks([])
416 | 
417 |         axs = []
418 |         for i in pdf.dyn_index.unique():
419 |             ax = fig.add_subplot(gs[1, i])
420 |             axs.append(ax)
421 |             plot_atoms(structures[i], ax=axs[i])
422 |             ax.set_axis_off()
423 | 
424 |         fig.suptitle(type)
425 |         fig.set_layout_engine(layout="tight")
426 | 
427 |         plt.show()
428 | 
429 | 
430 | def get_all_speacies(traj):
431 |     """
432 |     Extracts all unique atomic species from a trajectory.
433 | 
434 |     Parameters:
435 |     traj (list): A list of ASE atoms objects representing the trajectory.
436 | 
437 |     Returns:
438 |     list: A list of unique atomic species present in the trajectory.
439 |     """
440 |     all_syms = []
441 |     for atoms in traj:
442 |         all_syms += [atom.symbol for atom in atoms]
443 |     all_syms = list(set(all_syms))
444 |     return all_syms
445 | 
446 | 
447 | def get_flattened_soap_desc(soap, traj):
448 |     """
449 |     Generates flattened SOAP (Smooth Overlap of Atomic Positions) descriptors for a trajectory.
450 | 
451 |     Parameters:
452 |     soap (object): A SOAP descriptor object.
453 |     traj (list): A list of ASE atoms objects representing the trajectory.
454 | 
455 |     Returns:
456 |     numpy.ndarray: A 2D array where each row is a flattened SOAP descriptor for an atomic structure in the trajectory.
457 |     """
458 |     flat_soap = []
459 |     for atoms in traj:
460 |         flat_soap.append(soap.create(atoms).flatten())
461 |     return np.array(flat_soap)
462 | 
463 | 
464 | def get_default_soap(traj):
465 |     """
466 |     Creates a default SOAP (Smooth Overlap of Atomic Positions) descriptor object for a trajectory.
467 | 
468 |     Parameters:
469 |     traj (list): A list of ASE atoms objects representing the trajectory.
470 | 
471 |     Returns:
472 |     object: A SOAP descriptor object configured with the species present in the trajectory.
473 |     """
474 |     from dscribe.descriptors import SOAP
475 | 
476 |     species = get_all_speacies(traj)
477 |     soap = SOAP(
478 |         species=species,
479 |         r_cut=4.5,
480 |         n_max=8,
481 |         l_max=4,
482 |         sigma=0.5,
483 |         periodic=False,
484 |         rbf="gto",
485 |         crossover=True,
486 |     )
487 |     return soap
488 | 
489 | 
490 | def get_best_clustering(X):
491 |     """
492 |     Finds the best clustering for the given data using KMeans and silhouette score.
493 | 
494 |     Parameters:
495 |     X (numpy.ndarray): A 2D array where each row represents a data point.
496 | 
497 |     Returns:
498 |     numpy.ndarray: An array of cluster labels for the best clustering.
499 |     """
500 |     from sklearn.cluster import KMeans
501 |     from sklearn.metrics import silhouette_score
502 | 
503 |     model_sizes = [k for k in range(3, len(X))]
504 | 
505 |     kmeans_per_k = [KMeans(n_clusters=k, random_state=42).fit(X) for k in model_sizes]
506 |     # inertias = [model.inertia_ for model in kmeans_per_k]
507 |     y = [model.fit_predict(X) for model in kmeans_per_k]
508 | 
509 |     silhouette_scores = [silhouette_score(X, model.labels_) for model in kmeans_per_k]
510 |     best_clustering_ind = silhouette_scores.index(
511 |         max(silhouette_scores)
512 |     )  # index shift due to cut list
513 |     best_clustering = y[best_clustering_ind]
514 |     return best_clustering
515 | 
516 | 
517 | def get_best_soap_clustering(traj):
518 |     """
519 |     Finds the best clustering for a trajectory using SOAP (Smooth Overlap of Atomic Positions) descriptors.
520 | 
521 |     Parameters:
522 |     traj (list): A list of ASE atoms objects representing the trajectory.
523 | 
524 |     Returns:
525 |     numpy.ndarray: An array of cluster labels for the best clustering.
526 |     """
527 |     soap = get_default_soap(traj)
528 |     flat_soap = get_flattened_soap_desc(soap, traj)
529 |     return get_best_clustering(flat_soap)
530 | 
531 | 
532 | def compare_soap_default(traj):
533 |     if len(traj) <= 2:
534 |         return traj
535 |     unique_trj = []
536 |     best_soap_clustering = get_best_soap_clustering(traj)
537 |     # print(best_soap_clustering)
538 |     included_clusters = []
539 |     for i, a in enumerate(traj):
540 |         if best_soap_clustering[i] not in included_clusters:
541 |             unique_trj.append(a.copy())
542 |             included_clusters.append(best_soap_clustering[i])
543 |     return unique_trj, best_soap_clustering
544 | 
545 | 
546 | def compare_cartesian(self, images, compare_threshold):
547 |     """
548 |     Compares SOAP (Smooth Overlap of Atomic Positions) descriptors for a trajectory and returns unique structures.
549 | 
550 |     Parameters:
551 |     traj (list): A list of ASE atoms objects representing the trajectory.
552 | 
553 |     Returns:
554 |     tuple: A tuple containing:
555 |         - unique_trj (list): A list of unique ASE atoms objects based on SOAP clustering.
556 |         - best_soap_clustering (numpy.ndarray): An array of cluster labels for the best clustering.
557 |     """
558 |     unique_index = [0]
559 |     for i, atoms in enumerate(images):
560 |         deltas = [
561 |             np.sum(np.abs(atoms.positions - images[j].positions)) for j in unique_index
562 |         ]
563 |         if all([d > compare_threshold for d in deltas]):
564 |             unique_index.append(i)
565 |     return [images[i].copy() for i in unique_index]
566 | 
567 | 
568 | def neb_observer(dyn, dyn_index, neb_type="NEB", uid=None, traj_file="default"):
569 |     """
570 |     Observes and records the state of the NEB (Nudged Elastic Band) calculation at each step.
571 | 
572 |     Parameters:
573 |     dyn (object): The ASE dynamics object containing the NEB calculation.
574 |     dyn_index (int): The index for the dynamics observer.
575 |     neb_type (str): The type of NEB calculation. Default is 'NEB'.
576 |     uid (str or None): A unique identifier for the NEB calculation. Default is None.
577 |     traj_file (str): The file path to save the trajectory. Default is 'default'.
578 | 
579 |     Returns:
580 |     None
581 |     """
582 |     neb = dyn.atoms
583 |     epot = []
584 | 
585 |     nebtools = NEBTools(neb.images)
586 |     max_f_chain = nebtools.get_fmax()
587 | 
588 |     for i, a in enumerate(neb.images):
589 |         e = a.get_potential_energy()
590 |         forces_popneb = a.get_forces()
591 |         max_f = np.max(np.linalg.norm(forces_popneb, axis=1))
592 | 
593 |         converged = 0
594 |         last_step = 0
595 |         if max_f < dyn.fmax:
596 |             converged = 1
597 |         if max_f < dyn.fmax or dyn.nsteps >= dyn.max_steps - 1:
598 |             last_step = 1
599 |         #        converged = max_f_chain < dyn.fmax
600 | 
601 |         a.info["popneb_info"] = {
602 |             "type": neb_type,
603 |             "e": e,
604 |             "max_f": max_f_chain,
605 |             #            'chain_max_f': max_f_chain,
606 |             #            'max_f':max_f,
607 |             "image_index": i,
608 |             "dyn_index": dyn_index,
609 |             "nsteps": dyn.nsteps,
610 |             "converged": converged,
611 |             "last_step": last_step,
612 |         }
613 |         if uid != None:
614 |             a.info["popneb_info"]["_id"] = uid
615 | 
616 |         a.arrays["forces_popneb"] = forces_popneb
617 | 
618 |         if traj_file == "default":
619 |             write(f"./{uid}.xyz", a, append=True)
620 |         else:
621 |             write(traj_file, a, append=True)
622 |     # out_traj.append([a for a in neb.images])
623 | 
624 | 
625 | def relax_observer(dyn, dyn_index, uid=None, traj_file="default"):
626 |     """
627 |     Observes and records the state of the relaxation calculation at each step.
628 | 
629 |     Parameters:
630 |     dyn (object): The ASE dynamics object containing the relaxation calculation.
631 |     dyn_index (int): The index for the dynamics observer.
632 |     uid (str or None): A unique identifier for the relaxation calculation. Default is None.
633 |     traj_file (str): The file path to save the trajectory. Default is 'default'.
634 | 
635 |     Returns:
636 |     None
637 |     """
638 |     a = dyn.atoms
639 | 
640 |     e = a.get_potential_energy()
641 |     forces_popneb = a.get_forces()
642 |     max_f = np.max(np.linalg.norm(forces_popneb, axis=1))
643 | 
644 |     converged = 0
645 |     last_step = 0
646 |     if max_f < dyn.fmax:
647 |         converged = 1
648 |     if max_f < dyn.fmax or dyn.nsteps == dyn.max_steps - 1:
649 |         last_step = 1
650 | 
651 |     a.info["popneb_info"] = {
652 |         "type": "RELAX",
653 |         "e": e,
654 |         "max_f": max_f,
655 |         "dyn_index": dyn_index,
656 |         "nsteps": dyn.nsteps,
657 |         "converged": converged,
658 |         "last_step": last_step,
659 |     }
660 |     if uid != None:
661 |         a.info["popneb_info"]["_id"] = uid
662 | 
663 |     a.arrays["forces_popneb"] = forces_popneb
664 | 
665 |     if traj_file == "default":
666 |         write(f"./{uid}.xyz", a, append=True)
667 |     else:
668 |         write(traj_file, a, append=True)
669 | 
670 | 
671 | #    out_traj.append(a)
672 | 


--------------------------------------------------------------------------------
/autoadsorbate/raster_utilities.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from ase.build.tools import sort as ase_sort
  3 | 
  4 | # import matplotlib.image as mpimg
  5 | # from ase.io import read, write
  6 | # from ase.visualize import view
  7 | # import matplotlib.pyplot as plt
  8 | 
  9 | 
 10 | def snap_to_grid(atoms):
 11 |     """
 12 |     Returns the rounded positions of atoms in the given atomic structure.
 13 | 
 14 |     Parameters:
 15 |     atoms (object): An ASE atoms object representing the atomic structure.
 16 | 
 17 |     Returns:
 18 |     numpy.ndarray: A 2D array of rounded atomic positions.
 19 |     """
 20 |     a = atoms.copy()
 21 |     pos = np.round(a.positions, 2)
 22 |     return pos
 23 | 
 24 | 
 25 | def get_pixel_positions(atoms):
 26 |     """
 27 |     Returns the pixel positions of atoms in the given atomic structure by snapping them to a grid.
 28 | 
 29 |     Parameters:
 30 |     atoms (object): An ASE atoms object representing the atomic structure.
 31 | 
 32 |     Returns:
 33 |     numpy.ndarray: A 2D array of pixel positions of the atoms, excluding the last column.
 34 |     """
 35 |     return snap_to_grid(atoms)[:, :-1]
 36 | 
 37 | 
 38 | def get_pixel_grid(atoms, pixel_per_angstrom=100):
 39 |     """
 40 |     Generates a pixel grid based on the atomic structure and specified pixel density.
 41 | 
 42 |     Parameters:
 43 |     atoms (object): An ASE atoms object representing the atomic structure.
 44 |     pixel_per_angstrom (int): The number of pixels per angstrom. Default is 100.
 45 | 
 46 |     Returns:
 47 |     numpy.ndarray: A 2D array representing the pixel grid.
 48 |     """
 49 |     x_footprint = atoms.cell[0][0] + atoms.cell[1][0]
 50 |     y_footprint = atoms.cell[0][1] + atoms.cell[1][1]
 51 | 
 52 |     x_size = int(np.ceil((x_footprint) * pixel_per_angstrom))
 53 |     y_size = int(np.ceil((y_footprint) * pixel_per_angstrom))
 54 | 
 55 |     grid = np.zeros([y_size, x_size])
 56 | 
 57 |     return grid
 58 | 
 59 | 
 60 | def createKernel(radius, value):
 61 |     """
 62 |     Creates a circular kernel with the specified radius and value.
 63 | 
 64 |     Parameters:
 65 |     radius (int): The radius of the circular kernel.
 66 |     value (float): The value to assign to the elements within the circular area.
 67 | 
 68 |     Returns:
 69 |     numpy.ndarray: A 2D array representing the circular kernel.
 70 |     """
 71 |     kernel = np.zeros((2 * radius + 1, 2 * radius + 1))
 72 |     y, x = np.ogrid[-radius : radius + 1, -radius : radius + 1]
 73 |     mask = x**2 + y**2 <= radius**2
 74 |     kernel[mask] = value
 75 |     return kernel
 76 | 
 77 | 
 78 | def get_surface_from_rasterized_top_view(
 79 |     atoms_org, pixel_per_angstrom=10, return_raster=False
 80 | ):
 81 |     """
 82 |     Identifies surface atoms from a rasterized top view of the atomic structure.
 83 | 
 84 |     Parameters:
 85 |     atoms_org (object): An ASE atoms object representing the original atomic structure.
 86 |     pixel_per_angstrom (int): The number of pixels per angstrom for the rasterization. Default is 10.
 87 |     return_raster (bool): Whether to return the rasterized grid along with the surface indices. Default is False.
 88 | 
 89 |     Returns:
 90 |     list or tuple: If return_raster is False, returns a list of surface atom indices.
 91 |                    If return_raster is True, returns a tuple containing the list of surface atom indices and the rasterized grid.
 92 |     """
 93 |     atoms = atoms_org.copy()
 94 |     atoms.arrays["original_index"] = np.array([atom.index for atom in atoms])
 95 |     atoms = atoms * [2, 2, 1]
 96 |     atoms = ase_sort(atoms, tags=atoms.positions[:, 2])
 97 |     pixel_grid = get_pixel_grid(atoms, pixel_per_angstrom=pixel_per_angstrom)
 98 |     h, w = pixel_grid.shape[1], pixel_grid.shape[0]  # img size
 99 |     A = pixel_grid.copy()
100 | 
101 |     mapping = {}
102 | 
103 |     for i, pos in enumerate(get_pixel_positions(atoms)):
104 |         y, x = pos[1] * pixel_per_angstrom, pos[0] * pixel_per_angstrom
105 |         xx, yy = np.meshgrid(np.linspace(0, h - 1, h), np.linspace(0, w - 1, w))
106 |         radius = pixel_per_angstrom * 2
107 |         mask = (xx - x) ** 2 + (yy - y) ** 2 < radius**2
108 |         A[mask] = i
109 |         mapping[i] = atoms.arrays["original_index"][i]
110 | 
111 |     B = A[
112 |         int(len(A) * 0.2) : int(len(A) * 0.8), int(len(A.T) * 0.2) : int(len(A.T) * 0.8)
113 |     ]
114 |     surf_inds = list(set([mapping[val] for val in np.unique(B)]))
115 | 
116 |     if return_raster:
117 |         return surf_inds, B
118 |     else:
119 |         return surf_inds
120 | 


--------------------------------------------------------------------------------
/autoadsorbate/string_utils.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | 
  3 | from .Smile import check_smile, remove_canonical_duplicates
  4 | 
  5 | 
  6 | def make_base(backbone_info):
  7 |     """
  8 |     Creates a list of atomic species based on the provided backbone information.
  9 | 
 10 |     Parameters:
 11 |     backbone_info (dict): A dictionary where keys are atomic species (str) and values are their counts (int).
 12 | 
 13 |     Returns:
 14 |     list: A list containing the atomic species repeated according to their counts.
 15 |     """
 16 |     base = []
 17 |     for specie, count in backbone_info.items():
 18 |         for i in range(count):
 19 |             base.append(specie)
 20 |     return base
 21 | 
 22 | 
 23 | def get_all_bases(backbone_info):
 24 |     """
 25 |     Generates all unique permutations of atomic species based on the provided backbone information.
 26 | 
 27 |     Parameters:
 28 |     backbone_info (dict): A dictionary where keys are atomic species (str) and values are their counts (int).
 29 | 
 30 |     Returns:
 31 |     list: A list of tuples, each containing a unique permutation of the atomic species.
 32 |     """
 33 |     base = make_base(backbone_info)
 34 |     return list(set(list(itertools.permutations(base))))
 35 | 
 36 | 
 37 | def get_all_backbones(backbone_info):
 38 |     """
 39 |     Generates all unique permutations of atomic species based on the provided backbone information.
 40 | 
 41 |     Parameters:
 42 |     backbone_info (dict): A dictionary where keys are atomic species (str) and values are their counts (int).
 43 | 
 44 |     Returns:
 45 |     list: A list of tuples, each containing a unique permutation of the atomic species.
 46 |     """
 47 |     base = make_base(backbone_info)
 48 |     return list(set(list(itertools.permutations(base))))
 49 | 
 50 | 
 51 | #     base = make_base(backbone_info)
 52 | #     return get_all_bases(base)
 53 | 
 54 | 
 55 | def get_all_huged_backbones(backbone, a, b):
 56 |     """
 57 |     Generates all possible backbones by inserting two specified atomic species into all possible positions.
 58 | 
 59 |     Parameters:
 60 |     backbone (list): A list of atomic species representing the backbone.
 61 |     a (str): The first atomic species to be inserted.
 62 |     b (str): The second atomic species to be inserted.
 63 | 
 64 |     Returns:
 65 |     list: A list of lists, each containing a unique backbone with the two atomic species inserted.
 66 |     """
 67 |     rng = [n for n in range(0, len(backbone) + 1)]
 68 |     out_trj = []
 69 |     for i in rng:
 70 |         for j in rng[i + 1 :]:
 71 |             bckb = list(backbone).copy()
 72 |             bckb.insert(i, a)
 73 |             bckb.insert(j + 1, b)
 74 |             out_trj.append(bckb)
 75 |     return out_trj
 76 | 
 77 | 
 78 | def get_cl_marked(backbones):
 79 |     """
 80 |     Adds a 'Cl' atom at the beginning of each backbone in the provided list of backbones.
 81 | 
 82 |     Parameters:
 83 |     backbones (list): A list of backbones, where each backbone is a list of atomic species.
 84 | 
 85 |     Returns:
 86 |     list: A list of backbones with 'Cl' added at the beginning of each.
 87 |     """
 88 |     out_trj = []
 89 |     for backbone in backbones:
 90 |         x = ["Cl"] + list(backbone)
 91 |         out_trj.append(x)
 92 |     return out_trj
 93 | 
 94 | 
 95 | def get_s1s_marked(backbones):
 96 |     """
 97 |     Adds 'S1S' and '1' atoms at various positions in each backbone in the provided list of backbones.
 98 | 
 99 |     Parameters:
100 |     backbones (list): A list of backbones, where each backbone is a list of atomic species.
101 | 
102 |     Returns:
103 |     list: A list of backbones with 'S1S' and '1' added at various positions.
104 |     """
105 |     out_trj = []
106 |     for backbone in backbones:
107 |         backbone = list(backbone)
108 |         rng = [n for n in range(0, len(backbone) + 1)]
109 | 
110 |         for i in [0]:
111 |             for j in rng[i + 1 :]:
112 |                 bckb = backbone.copy()
113 |                 bckb.insert(i, "S1S")
114 |                 bckb.insert(j + 1, "1")
115 |                 out_trj.append(bckb)
116 |     return out_trj
117 | 
118 | 
119 | def insert_unsaturated_bond(backbones, bond):
120 |     """
121 |     Inserts an unsaturated bond at various positions in each backbone in the provided list of backbones.
122 | 
123 |     Parameters:
124 |     backbones (list): A list of backbones, where each backbone is a list of atomic species.
125 |     bond (str): The unsaturated bond to be inserted.
126 | 
127 |     Returns:
128 |     list: A list of backbones with the unsaturated bond inserted at various positions.
129 |     """
130 |     out_trj = []
131 |     for backbone in backbones:
132 |         backbone = list(backbone)
133 |         rng = [n for n in range(0, len(backbone) + 1)]
134 | 
135 |         for i in [0]:
136 |             for j in rng[i + 1 :]:
137 |                 bckb = backbone.copy()
138 |                 # bckb.insert(i, 'S1S')
139 |                 bckb.insert(j, bond)
140 |                 out_trj.append(bckb)
141 |     return out_trj
142 | 
143 | 
144 | def get_all_side_chains(backbones):
145 |     """
146 |     Generates all possible side chains by inserting '(' and ')' at various positions in each backbone in the provided list of backbones.
147 | 
148 |     Parameters:
149 |     backbones (list): A list of backbones, where each backbone is a list of atomic species.
150 | 
151 |     Returns:
152 |     list: A list of backbones with '(' and ')' inserted at various positions to represent side chains.
153 |     """
154 |     out_trj = []
155 |     for backbone in backbones:
156 |         out_trj += get_all_huged_backbones(backbone, "(", ")")
157 |     # rng = [n for n in range(0, len(backbone)+1)]
158 |     # out_trj = []
159 |     # for i in rng:
160 |     #     for j in rng[i+1:]:
161 |     #         bckb = backbone.copy()
162 |     #         bckb.insert(i, '(')
163 |     #         bckb.insert(j+1, ')')
164 |     #         out_trj.append(bckb)
165 |     return out_trj
166 | 
167 | 
168 | def get_rings(backbone, ring_marker):
169 |     """
170 |     Generates all possible ring structures by inserting a ring marker at various positions in the provided backbone.
171 | 
172 |     Parameters:
173 |     backbone (list): A list of atomic species representing the backbone.
174 |     ring_marker (str): The marker to be used for indicating ring positions.
175 | 
176 |     Returns:
177 |     list: A list of backbones with the ring marker inserted at various positions to represent rings.
178 |     """
179 |     out_trj = get_all_huged_backbones(backbone, str(ring_marker), str(ring_marker))
180 |     return out_trj
181 | 
182 | 
183 | def get_all_ringed(backbones, ring_marker):
184 |     """
185 |     Generates all possible ring structures for each backbone in the provided list of backbones by inserting a ring marker at various positions.
186 | 
187 |     Parameters:
188 |     backbones (list): A list of backbones, where each backbone is a list of atomic species.
189 |     ring_marker (str): The marker to be used for indicating ring positions.
190 | 
191 |     Returns:
192 |     list: A list of backbones with the ring marker inserted at various positions to represent rings.
193 |     """
194 |     out_trj = []
195 |     for backbone in backbones:
196 |         backbone = list(backbone)
197 |         out_trj += get_rings(backbone, ring_marker)
198 |     return out_trj
199 | 
200 | 
201 | def make_unsaturated(backbone, brackets):
202 |     """
203 |     Generates all possible unsaturated structures by inserting brackets around each atomic species in the provided backbone.
204 | 
205 |     Parameters:
206 |     backbone (list): A list of atomic species representing the backbone.
207 |     brackets (list): A list of bracket pairs to be used for indicating unsaturation.
208 | 
209 |     Returns:
210 |     list: A list of backbones with brackets inserted around each atomic species to represent unsaturation.
211 |     """
212 |     unsaturated = []
213 |     open_bracket = brackets[0]
214 |     for i, _ in enumerate(backbone):
215 |         for closed_bracket in brackets[1:]:
216 |             bckb = backbone.copy()
217 |             if open_bracket not in bckb[i]:
218 |                 bckb[i] = f"{open_bracket}{bckb[i]}{closed_bracket}"
219 |                 unsaturated.append(bckb)
220 | 
221 |     return unsaturated
222 | 
223 | 
224 | def make_all_unsaturated_backbones(all_backbones, brackets):
225 |     """
226 |     Generates all possible unsaturated structures for each backbone in the provided list of backbones by inserting brackets around each atomic species.
227 | 
228 |     Parameters:
229 |     all_backbones (list): A list of backbones, where each backbone is a list of atomic species.
230 |     brackets (list): A list of bracket pairs to be used for indicating unsaturation.
231 | 
232 |     Returns:
233 |     list: A list of backbones with brackets inserted around each atomic species to represent unsaturation.
234 |     """
235 |     all_unsaturated_backbones = []
236 |     for backbone in all_backbones:
237 |         for _ in range(len(backbone)):
238 |             all_unsaturated_backbones += make_unsaturated(backbone, brackets)
239 | 
240 | 
241 | def get_all_unsaturated_from_backbone(backbone, brackets, return_dict=False):
242 |     """
243 |     Generates all possible unsaturated structures from a backbone by inserting brackets around each atomic species.
244 | 
245 |     Parameters:
246 |     backbone (list): A list of atomic species representing the backbone.
247 |     brackets (list): A list of bracket pairs to be used for indicating unsaturation.
248 |     return_dict (bool): Whether to return the results as a dictionary. Default is False.
249 | 
250 |     Returns:
251 |     list or dict: A list of backbones with brackets inserted around each atomic species to represent unsaturation.
252 |                   If return_dict is True, returns a dictionary where keys are the number of unsaturated positions and values are the corresponding backbones.
253 |     """
254 |     multiple_unsaturated = {}
255 | 
256 |     for i in range(len(backbone)):
257 |         if i == 0:
258 |             multiple_unsaturated[i + 1] = make_unsaturated(backbone, brackets)
259 |         else:
260 |             unsaturated = []
261 |             for bckb in multiple_unsaturated[i]:
262 |                 unsaturated += make_unsaturated(bckb, brackets)
263 |                 multiple_unsaturated[i + 1] = unsaturated
264 |     if return_dict:
265 |         return multiple_unsaturated
266 |     else:
267 |         lst = []
268 |         for k, v in multiple_unsaturated.items():
269 |             lst += v
270 |         return lst
271 | 
272 | 
273 | def get_checked_smiles(smiles_list):
274 |     """
275 |     Filters a list of SMILES strings, returning only those that pass a validity check.
276 | 
277 |     Parameters:
278 |     smiles_list (list): A list of SMILES strings to be checked.
279 | 
280 |     Returns:
281 |     list: A list of valid SMILES strings that passed the check.
282 |     """
283 |     checked_smiles = []
284 |     for s in smiles_list:
285 |         if check_smile(s):
286 |             checked_smiles.append(s)
287 |     return checked_smiles
288 | 
289 | 
290 | def get_smiles_from_backbones(backbones):
291 |     """
292 |     Converts a list of backbones into their corresponding SMILES strings.
293 | 
294 |     Parameters:
295 |     backbones (list): A list of backbones, where each backbone is a list of atomic species.
296 | 
297 |     Returns:
298 |     list: A list of SMILES strings generated from the backbones.
299 |     """
300 |     smiles = []
301 |     for backbone in backbones:
302 |         smiles.append("".join(backbone))
303 |     return smiles
304 | 
305 | 
306 | def xx_get_special_symbols(config):
307 |     """
308 |     Generates a dictionary of special symbols based on the provided configuration.
309 | 
310 |     Parameters:
311 |     config (dict): A dictionary containing configuration information, including 'backbone_info' and 'specials'.
312 | 
313 |     Returns:
314 |     dict: A dictionary where keys are atomic symbols and values are lists of special symbols.
315 |     """
316 |     special_symbols = {}
317 |     for symbol, _ in config["backbone_info"].items():
318 |         special_symbols[symbol] = [symbol] + [
319 |             f"[{symbol}{marker}" for marker in config["specials"]
320 |         ]
321 |     return special_symbols
322 | 
323 | 
324 | def xx_unpack_symbols(config):
325 |     """
326 |     Unpacks special symbols based on the provided configuration.
327 | 
328 |     Parameters:
329 |     config (dict): A dictionary containing configuration information, including 'backbone_info' and 'specials'.
330 | 
331 |     Returns:
332 |     list: A list of lists, where each sublist contains unpacked symbols for each atomic species.
333 |     """
334 |     special_symbols = xx_get_special_symbols(config)
335 |     print("special_symbols: ", special_symbols)
336 | 
337 |     unpacked_symbols = []
338 |     for symbol, value in config["backbone_info"].items():
339 |         print(symbol, value)
340 |         if value == 0:
341 |             continue
342 |         if value == 1:
343 |             # unpacked_symbols.append([tuple([s]) for s in special_symbols[symbol]])
344 |             unpacked_symbols.append(tuple(special_symbols[symbol]))
345 |         if value > 1:
346 |             lst = list(itertools.product(special_symbols[symbol], repeat=value))
347 |             print("lst: ", lst)
348 |             # joined_lst_elements = [''.join(t) for t in lst]
349 |             joined_lst_elements = [list(t) for t in lst]
350 |             print("joined_lst_elements: ", joined_lst_elements)
351 |             unpacked_symbols.append(joined_lst_elements)
352 |             # for group in list(itertools.product(special_symbols[symbol], repeat=value)):
353 |             #     unpacked_symbols.append(list(group))
354 |             #     print(unpacked_symbols)
355 |     return unpacked_symbols
356 | 
357 | 
358 | def xx_get_all_backbones(config):
359 |     """
360 |     Generates all possible backbones based on the provided configuration.
361 | 
362 |     Parameters:
363 |     config (dict): A dictionary containing configuration information, including 'backbone_info' and 'specials'.
364 | 
365 |     Returns:
366 |     list: A list of tuples, each containing a unique combination of unpacked symbols for the backbones.
367 |     """
368 |     unpacked_symbols = xx_unpack_symbols(config)
369 |     print(unpacked_symbols)
370 |     print("xxxx")
371 |     all_backbones = list(itertools.product(*unpacked_symbols))
372 |     return all_backbones
373 | 
374 | 
375 | def concat_tuples(list_of_tuples):
376 |     """
377 |     Concatenates a list of tuples into a single tuple.
378 | 
379 |     Parameters:
380 |     list_of_tuples (list): A list of tuples to be concatenated.
381 | 
382 |     Returns:
383 |     tuple: A single tuple containing all elements from the input list of tuples.
384 |     """
385 |     t = ()
386 |     for x in list_of_tuples:
387 |         t += x
388 |     return t
389 | 
390 | 
391 | def construct_smiles(config):
392 |     """
393 |     Constructs a list of unique SMILES strings based on the provided configuration.
394 | 
395 |     Parameters:
396 |     config (dict): A dictionary containing configuration information, including:
397 |         - 'backbone_info' (dict): Information about the backbone atomic species and their counts.
398 |         - 'brackets' (list): A list of bracket pairs to be used for indicating unsaturation.
399 |         - 'allow_intramolec_rings' (bool): Whether to allow intramolecular rings.
400 |         - 'ring_marker' (str): The marker to be used for indicating ring positions.
401 |         - 'make_labeled' (bool): Whether to label the backbones with special markers.
402 |         - 'specials' (list): A list of special markers to be used for labeling.
403 | 
404 |     Returns:
405 |     list: A list of unique SMILES strings generated based on the configuration.
406 |     """
407 |     basic_backbones = get_all_backbones(config["backbone_info"])
408 | 
409 |     multiple_unsaturated = []
410 |     for backbone in basic_backbones:
411 |         backbone = list(backbone)
412 |         multiple_unsaturated += get_all_unsaturated_from_backbone(
413 |             backbone, config["brackets"]
414 |         )
415 | 
416 |     all_backbones = basic_backbones + multiple_unsaturated
417 | 
418 |     all_backbones += get_all_side_chains(all_backbones)
419 | 
420 |     if config["allow_intramolec_rings"]:
421 |         ring_backbones = get_all_ringed(all_backbones, config["ring_marker"])
422 |         all_backbones += ring_backbones
423 | 
424 |     if config["make_labeled"]:
425 |         cl_backbones = get_cl_marked(all_backbones)
426 |         s1s_backbones = get_s1s_marked(all_backbones)
427 |         all_backbones = cl_backbones + s1s_backbones
428 | 
429 |     double = insert_unsaturated_bond(all_backbones, "=")
430 |     triple = insert_unsaturated_bond(all_backbones, "#")
431 |     all_backbones += double
432 |     all_backbones += triple
433 | 
434 |     print(all_backbones[-10:])
435 |     smiles_list = get_smiles_from_backbones(all_backbones)
436 | 
437 |     checked_smiles = get_checked_smiles(smiles_list)
438 |     smiles = remove_canonical_duplicates(checked_smiles)
439 | 
440 |     return smiles
441 | 
442 | 
443 | _example_config = {
444 |     "backbone_info": {"C": 1, "N": 0, "O": 2},
445 |     "allow_intramolec_rings": True,
446 |     "ring_marker": 2,
447 |     "side_chain": ["(", ")"],
448 |     "brackets": ["[", "]", "H2]", "H3]", "H-]", "H+]"],  # , '-]', '--]', '---]']
449 |     "make_labeled": True,
450 | }
451 | 
452 | 
453 | def _show_ussage():
454 |     print("eg ussage: smiles = construct_smiles(config)")
455 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "autoadsorbate"
 3 | version = "0.2.4"
 4 | description = "AutoAdsorbate is a lightweight and easy-to-use Python package for generating chemically meaningful configurations of molecules and fragments on surfaces. Built with minimal dependencies and a low barrier to entry, it enables rapid setup of surface-adsorbate systems using the Surrogate-SMILES (*SMILES) representation. Ideal for researchers in catalysis, nanotech, and materials science, AutoAdsorbate streamlines dataset generation for simulations and machine learning workflows."
 5 | authors = [
 6 |     { name="Edvin Fako", email="edvinfako@gmail.com" }
 7 | ]
 8 | readme = "README.md"
 9 | requires-python = ">=3.9"
10 | license = "MIT"
11 | 
12 | dependencies = [
13 |     "ase>=3.24.0",
14 |     "pandas>=2.0.0",
15 |     "rdkit>=2022.9.5",
16 | ]
17 | 
18 | [dependency-groups]
19 | dev = [
20 |     "ipykernel>=6.29.5",
21 |     "pytest>=8.3.5",
22 |     "ruff>=0.11.6",
23 | ]
24 | 
25 | [tool.setuptools.packages.find]
26 | include = ["autoadsorbate"]
27 | where = ["."]
28 | 


--------------------------------------------------------------------------------
/scripts/generate_aads_configs.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import uuid
 3 | from glob import glob
 4 | 
 5 | import numpy as np
 6 | from ase.build import fcc211
 7 | from ase.io import read, write
 8 | from joblib import Parallel, delayed
 9 | 
10 | sys.path.insert(0, "/gpfs/projects/qm_inorganics/notebook_edvin/git/autoadsorbate/")
11 | from autoadsorbate.autoadsorbate import Fragment, Surface
12 | 
13 | smiles_file_path = "/gpfs/projects/qm_inorganics/notebook_edvin/aads_paper/relax_Cu111_Cu2_try2/MACE_relax_*.xyz"
14 | slab = fcc211(symbol="Cu", size=(6, 3, 3), vacuum=10)
15 | touch_sphere_size = 2.8
16 | to_init_dict = {"S1": 10, "Cl": 5}
17 | 
18 | print("preparing slab and Surface")
19 | slab = fcc211(symbol="Cu", size=(3, 3, 2), vacuum=10)
20 | slab.positions += np.array([5, 0, 0])
21 | slab.wrap()
22 | s = Surface(slab, touch_sphere_size=touch_sphere_size)
23 | s.site_df = s.site_df[s.site_df.index.isin([15, 16])]
24 | 
25 | files = glob(smiles_file_path)
26 | files.sort()
27 | 
28 | 
29 | def run_generate(file, slab, touch_sphere_size, to_init_dict):
30 |     try:
31 |         smiles = read(file).info["smiles"]
32 |     except:
33 |         print("Failed at file: ", file)
34 | 
35 |     try:
36 |         fragment = Fragment(smiles, to_initialize=to_init_dict[smiles[:2]])
37 |         print("Success: working on SMILES: ", smiles)
38 |         print("running get_populated_sites...")
39 | 
40 |         pop_trj = s.get_populated_sites(
41 |             fragment,
42 |             site_index="all",
43 |             sample_rotation=True,
44 |             mode="heuristic",
45 |             conformers_per_site_cap=5,
46 |             overlap_thr=1.6,
47 |             verbose=True,
48 |         )
49 |         for atoms in pop_trj:
50 |             atoms.info["uid"] = str(uuid.uuid4())
51 | 
52 |         write(
53 |             f"./generated_{atoms.info['adsorbate_info']['adsorbate_formula']}_{smiles}.xyz",
54 |             pop_trj,
55 |         )
56 |         print(
57 |             f"at file {len(glob('./generated*xyz'))} / {len(files)}. Generated {len(pop_trj)} structures for smiles {smiles}"
58 |         )
59 | 
60 |     except:
61 |         print("Failed at SMILES: ", smiles)
62 | 
63 | 
64 | # for i, file in enumerate(files):
65 | #    run_generate(file, slab, touch_sphere_size, to_init_dict)
66 | 
67 | Parallel(n_jobs=64)(
68 |     delayed(run_generate)(file, slab, touch_sphere_size, to_init_dict) for file in files
69 | )
70 | 


--------------------------------------------------------------------------------
/scripts/make_shrinkwrap_video.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 6,
  6 |    "id": "60063ebc",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "from autoadsorbate import Surface\n",
 11 |     "from ase.build import fcc211\n",
 12 |     "from ase.constraints import FixAtoms\n",
 13 |     "from ase.visualize import view\n",
 14 |     "from ase import Atom, Atoms\n",
 15 |     "import numpy as np\n",
 16 |     "\n",
 17 |     "from autoadsorbate.Surf import _get_starting_grid, get_shrinkwrap_grid, shrinkwrap_surface\n",
 18 |     "from autoadsorbate.utils import get_blenderized"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 7,
 24 |    "id": "062529d7",
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "def interpolate_neb_trajectory(atoms_a, atoms_b, image_no = 5):\n",
 29 |     "    \"\"\"helper funct to quickly make linear interpolations\"\"\"\n",
 30 |     "    from ase.mep import interpolate\n",
 31 |     "    \n",
 32 |     "    if len(atoms_a) != len(atoms_b): # Safety checks\n",
 33 |     "        raise ValueError(\"Atoms objects must have the same number of atoms.\")\n",
 34 |     "    if atoms_a.get_chemical_symbols() != atoms_b.get_chemical_symbols():\n",
 35 |     "        raise ValueError(\"Atoms must have the same atomic symbols in the same order.\")\n",
 36 |     "    \n",
 37 |     "    images = [atoms_a.copy()]\n",
 38 |     "    for _ in range(image_no):\n",
 39 |     "        images.append(atoms_a.copy())\n",
 40 |     "    images.append(atoms_b.copy())\n",
 41 |     "    interpolate(images)\n",
 42 |     "    return images\n",
 43 |     "\n",
 44 |     "def frames_from_info(info):\n",
 45 |     "    out = interpolate_neb_trajectory(\n",
 46 |     "        atoms_a=info['traj'][0],\n",
 47 |     "        atoms_b=info['traj'][-1],\n",
 48 |     "        image_no = info['frames_repeat']\n",
 49 |     "        )\n",
 50 |     "    return out"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "id": "48f630d5",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "## make an \"easy to read\" but interesting slab"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 8,
 64 |    "id": "4d17a59f",
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "prec = 0.3\n",
 69 |     "tss = 2.2\n",
 70 |     "\n",
 71 |     "slab = fcc211('Cu', size=[6,3,4], vacuum=10)\n",
 72 |     "slab.positions[:,0]+=2\n",
 73 |     "slab.wrap()\n",
 74 |     "\n",
 75 |     "# get surface, sub surface and sub sub surface\n",
 76 |     "# maybe useful method for the Surface class?\n",
 77 |     "motv = [0,0,10] #move_out_of_the_way_vector\n",
 78 |     "surf_inds = shrinkwrap_surface(slab)\n",
 79 |     "slab.positions-= [motv if atom.index in surf_inds else [0,0,0]  for atom in slab]\n",
 80 |     "sub_surf_inds = shrinkwrap_surface(slab)\n",
 81 |     "slab.positions-= [motv if atom.index in sub_surf_inds else [0,0,0]  for atom in slab]\n",
 82 |     "sub_sub_surf_inds = shrinkwrap_surface(slab)\n",
 83 |     "\n",
 84 |     "slab.positions+=[motv if atom.index in list(surf_inds)+list(sub_surf_inds) else [0,0,0]  for atom in slab]\n",
 85 |     "\n",
 86 |     "unfrozen = list(surf_inds)+list(sub_surf_inds)+list(sub_sub_surf_inds)\n",
 87 |     "\n",
 88 |     "for i in unfrozen:\n",
 89 |     "    view_slab = slab.copy()\n",
 90 |     "    view_slab[i].symbol ='Zn'\n",
 91 |     "# view(slab)\n",
 92 |     "\n",
 93 |     "slab.set_constraint(FixAtoms(indices=[atom.index for atom in slab if atom.index not in unfrozen]))\n",
 94 |     "slab.rattle(stdev=0.2)"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 5,
100 |    "id": "cbdd40b7",
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "data": {
105 |       "text/plain": [
106 |        "<Popen: returncode: None args: ['/home/djrm/venv/mace_env/bin/python', '-m',...>"
107 |       ]
108 |      },
109 |      "execution_count": 5,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "view(slab)"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "id": "2cff2732",
121 |    "metadata": {},
122 |    "source": [
123 |     "## Init Surface object"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 9,
129 |    "id": "1da2e99b",
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "name": "stdout",
134 |      "output_type": "stream",
135 |      "text": [
136 |       "Visualizing surface Cu atoms as Zn\n"
137 |      ]
138 |     }
139 |    ],
140 |    "source": [
141 |     "s = Surface(slab, precision=prec, touch_sphere_size=tss)\n",
142 |     "marked_surface = s.view_surface(return_atoms=True)"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "id": "21daaad1",
148 |    "metadata": {},
149 |    "source": [
150 |     "## prep assets for render"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 10,
156 |    "id": "208dfa35",
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "starting_grid = _get_starting_grid(slab, precision=prec)\n",
161 |     "\n",
162 |     "high_grid = starting_grid.copy()\n",
163 |     "high_grid.positions[:,2] += (high_grid.positions[:,0]+4)**1.1 #high_grid.positions[:,0] + 10\n",
164 |     "high_grid.positions[:,2] += (high_grid.positions[:,1]+3)**2*0.1 #high_grid.positions[:,0] \n",
165 |     "\n",
166 |     "\n",
167 |     "grid = get_shrinkwrap_grid(slab, precision=prec, touch_sphere_size=tss)\n"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 23,
173 |    "id": "a8be1cda",
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": [
177 |     "# view([s.view_site(i, return_atoms=True) for i in s.site_df.index.values])\n",
178 |     "site = s.view_site(39, return_atoms=True)\n",
179 |     "site_vector = site[[atom.index for atom in site if atom.symbol != 'Fe']]\n",
180 |     "for atom in site_vector:\n",
181 |     "    atom.symbol = 'H'\n",
182 |     "    \n",
183 |     "site = site[[atom.index for atom in site if atom.symbol == 'Cu']]\n",
184 |     "for atom in site:\n",
185 |     "    if atom.symbol=='Cu':\n",
186 |     "        atom.symbol='Zn'\n",
187 |     "\n",
188 |     "hedgehog=slab.copy()[[]]\n",
189 |     "for a in [s.view_site(i, return_atoms=True) for i in s.site_df.index.values]:\n",
190 |     "    hedgehog+=a\n",
191 |     "    \n",
192 |     "hedgehog=hedgehog[[atom.index for atom in hedgehog if atom.symbol == 'X']]\n",
193 |     "for atom in hedgehog:\n",
194 |     "    atom.symbol='H'\n",
195 |     "            \n",
196 |     "site_marker = Atoms(['Pt'], [site.info['coordinates']])\n",
197 |     "site_sphere = Atoms(['Cd'], [site.info['coordinates']])\n",
198 |     "\n",
199 |     "site_blanket = grid[[atom.index for atom in grid if np.linalg.norm(atom.position-site.info['coordinates']) < tss *1.2]]\n",
200 |     "for atom in site_blanket:\n",
201 |     "    atom.symbol='H'\n",
202 |     "    \n",
203 |     "nvector_marker = site_blanket.copy()\n",
204 |     "nvector_marker.positions = np.array([site_blanket.get_center_of_mass()])\n",
205 |     "\n",
206 |     "grid_marker = grid[[int(np.argmin(np.linalg.norm(grid.get_positions() - site_vector[-1].position, axis=1)))]]\n",
207 |     "grid_marker[0].symbol = 'Br'\n",
208 |     "\n",
209 |     "grid_sphere = grid_marker.copy()\n",
210 |     "grid_sphere[0].symbol = 'F'"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": 24,
216 |    "id": "ea03a3eb",
217 |    "metadata": {},
218 |    "outputs": [],
219 |    "source": [
220 |     "# view([site+hedgehog+site_marker+site_blanket+nvector_marker])\n",
221 |     "# view([site+grid_marker])"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "markdown",
226 |    "id": "8a68d966",
227 |    "metadata": {},
228 |    "source": [
229 |     "## directing the movie"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": 25,
235 |    "id": "54845d7a",
236 |    "metadata": {},
237 |    "outputs": [],
238 |    "source": [
239 |     "rslab = marked_surface #+ slab\n",
240 |     "\n",
241 |     "fps = 24\n",
242 |     "def stofps(s):\n",
243 |     "    return max([1, int(-2+fps*s)])\n",
244 |     "\n",
245 |     "composition = [\n",
246 |     "    {'traj': [\n",
247 |     "        slab+high_grid,\n",
248 |     "        slab+starting_grid\n",
249 |     "        ], 'frames_repeat':100},               # drop grid\n",
250 |     "    {'traj': [\n",
251 |     "        slab+starting_grid,\n",
252 |     "        slab+grid\n",
253 |     "        ], 'frames_repeat':100},               #shrinkwrap         \n",
254 |     "    {'traj': [\n",
255 |     "        marked_surface+grid+grid_marker,\n",
256 |     "        marked_surface+grid+grid_marker\n",
257 |     "              ], 'frames_repeat':50},       \n",
258 |     "    {'traj': [\n",
259 |     "        slab+grid+grid_marker+grid_sphere+site,\n",
260 |     "        slab+grid+grid_marker+grid_sphere+site\n",
261 |     "        ], 'frames_repeat':50},\n",
262 |     "    {'traj': [\n",
263 |     "        slab+grid+site+site_marker,\n",
264 |     "        slab+grid+site+site_marker\n",
265 |     "        ], 'frames_repeat':50},\n",
266 |     "    {'traj': [\n",
267 |     "        slab+grid+site+site_marker+site_sphere+site_blanket,\n",
268 |     "        slab+grid+site+site_marker+site_sphere+nvector_marker\n",
269 |     "        ], 'frames_repeat':100}, \n",
270 |     "    {'traj': [\n",
271 |     "        slab+site+site_vector,\n",
272 |     "        slab+site+site_vector\n",
273 |     "        ], 'frames_repeat':50},\n",
274 |     "    {'traj': [\n",
275 |     "        marked_surface+hedgehog,\n",
276 |     "        marked_surface+hedgehog\n",
277 |     "        ], 'frames_repeat':100}\n",
278 |     "    \n",
279 |     "]\n",
280 |     "\n",
281 |     "render_traj = []\n",
282 |     "\n",
283 |     "for info in composition:\n",
284 |     "    render_traj += frames_from_info(info)"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "code",
289 |    "execution_count": null,
290 |    "id": "be304756",
291 |    "metadata": {},
292 |    "outputs": [],
293 |    "source": [
294 |     "from ase.io import write\n",
295 |     "render_traj = get_blenderized(render_traj, hide_spot=[0,0,0])\n",
296 |     "write('./render_traj.xyz', render_traj)"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": 22,
302 |    "id": "f96123f0",
303 |    "metadata": {},
304 |    "outputs": [
305 |     {
306 |      "data": {
307 |       "text/plain": [
308 |        "<Popen: returncode: None args: ['/home/djrm/venv/mace_env/bin/python', '-m',...>"
309 |       ]
310 |      },
311 |      "execution_count": 22,
312 |      "metadata": {},
313 |      "output_type": "execute_result"
314 |     }
315 |    ],
316 |    "source": [
317 |     "view(render_traj)\n",
318 |     "# view(high_grid)"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "id": "6d1f4a12",
325 |    "metadata": {},
326 |    "outputs": [],
327 |    "source": []
328 |   }
329 |  ],
330 |  "metadata": {
331 |   "kernelspec": {
332 |    "display_name": "mace_env",
333 |    "language": "python",
334 |    "name": "python3"
335 |   },
336 |   "language_info": {
337 |    "codemirror_mode": {
338 |     "name": "ipython",
339 |     "version": 3
340 |    },
341 |    "file_extension": ".py",
342 |    "mimetype": "text/x-python",
343 |    "name": "python",
344 |    "nbconvert_exporter": "python",
345 |    "pygments_lexer": "ipython3",
346 |    "version": "3.12.3"
347 |   }
348 |  },
349 |  "nbformat": 4,
350 |  "nbformat_minor": 5
351 | }
352 | 


--------------------------------------------------------------------------------
/scripts/paper_prep.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## init"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 59,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "name": "stdout",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "The autoreload extension is already loaded. To reload it, use:\n",
 20 |       "  %reload_ext autoreload\n"
 21 |      ]
 22 |     }
 23 |    ],
 24 |    "source": [
 25 |     "%load_ext autoreload\n",
 26 |     "%autoreload 2\n",
 27 |     "\n",
 28 |     "import sys\n",
 29 |     "\n",
 30 |     "import pandas as pd\n",
 31 |     "from ase.visualize import view\n",
 32 |     "\n",
 33 |     "from autoadsorbate import Surface\n",
 34 |     "\n",
 35 |     "# sys.path.insert(0, '/home/djrm/git/hetcattoolbox')\n",
 36 |     "sys.path.insert(0, \"/root/git/autoadsorbate\")\n",
 37 |     "sys.path.insert(0, \"/root/git/hetcattoolbox\")\n",
 38 |     "# sys.path.insert(0, '/home/djrm/git/autoadsorbate')\n",
 39 |     "\n",
 40 |     "from hetcattoolbox.hetcattoolbox import MPResterSimple\n",
 41 |     "from hetcattoolbox.utils import *"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 60,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# import inspect\n",
 51 |     "# inspect.getabsfile(Surface)"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 61,
 57 |    "metadata": {},
 58 |    "outputs": [
 59 |     {
 60 |      "data": {
 61 |       "application/vnd.jupyter.widget-view+json": {
 62 |        "model_id": "1c52a63f3fe44485a9125c8af100a38d",
 63 |        "version_major": 2,
 64 |        "version_minor": 0
 65 |       },
 66 |       "text/plain": [
 67 |        "Retrieving SummaryDoc documents:   0%|          | 0/8 [00:00<?, ?it/s]"
 68 |       ]
 69 |      },
 70 |      "metadata": {},
 71 |      "output_type": "display_data"
 72 |     }
 73 |    ],
 74 |    "source": [
 75 |     "api_key = \"0FZIs3w6bTqjjZdjdrc4G0wCQpKj15Ai\"\n",
 76 |     "mprs = MPResterSimple(api_key=api_key)\n",
 77 |     "df = mprs.query_by_comp(\"Cu\")\n",
 78 |     "# df = drop_excess_columns(df)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 62,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "# #make mappings for some string info to keep in the group by dataframe\n",
 88 |     "# mp_id_map = {}\n",
 89 |     "# for i, mpid in enumerate(df.material_id.values):\n",
 90 |     "#     mp_id_map[i] = mpid\n",
 91 |     "# df['mpid'] = [k for k in mp_id_map.keys()]\n",
 92 |     "\n",
 93 |     "# formula_anonymous_map = {}\n",
 94 |     "# for i, fa in enumerate(df.formula_anonymous.values):\n",
 95 |     "#     formula_anonymous_map[i] = fa\n",
 96 |     "# df['formula_anonymous_id'] = [k for k in formula_anonymous_map.keys()]\n",
 97 |     "\n",
 98 |     "# #group df\n",
 99 |     "# select_df= df.groupby(['cs', 'formula_anonymous'], as_index=False).min('energy_above_hull')\n",
100 |     "\n",
101 |     "# #map back string data\n",
102 |     "# select_ids = [str(mp_id_map[mpid]) for mpid in select_df.mpid.values]\n",
103 |     "# select_df['material_id'] = select_ids\n",
104 |     "\n",
105 |     "# formula_anonymous = [str(formula_anonymous_map[idx]) for idx in select_df.formula_anonymous_id.values]\n",
106 |     "# select_df['formula_anonymous'] = formula_anonymous\n",
107 |     "\n",
108 |     "\n",
109 |     "# select_ids"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 63,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "# df"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 64,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "# select_df = df[df.density > 5]\n",
128 |     "# select_df = df[df.energy_above_hull < 0.05]\n",
129 |     "# select_df"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 65,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "# sns.histplot(select_df.energy_above_hull.values, bins=100)"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "# _df = select_df[(select_df.nsites < 50) & (select_df.energy_above_hull<0.001)]\n",
148 |     "# bulk_trj = []\n",
149 |     "# for mpid in _df.material_id.values:\n",
150 |     "#         atoms = mprs.get_atoms_by_material_id(mpid)\n",
151 |     "#         atoms.info['mpid'] = str(mpid)\n",
152 |     "#         bulk_trj.append(atoms)"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {},
159 |    "outputs": [
160 |     {
161 |      "data": {
162 |       "application/vnd.jupyter.widget-view+json": {
163 |        "model_id": "791e614eb0634cbcba4215dc184a94dd",
164 |        "version_major": 2,
165 |        "version_minor": 0
166 |       },
167 |       "text/plain": [
168 |        "Retrieving MaterialsDoc documents:   0%|          | 0/1 [00:00<?, ?it/s]"
169 |       ]
170 |      },
171 |      "metadata": {},
172 |      "output_type": "display_data"
173 |     },
174 |     {
175 |      "data": {
176 |       "application/vnd.jupyter.widget-view+json": {
177 |        "model_id": "8391b554348b49f09ede52f6ea64107c",
178 |        "version_major": 2,
179 |        "version_minor": 0
180 |       },
181 |       "text/plain": [
182 |        "Retrieving MaterialsDoc documents:   0%|          | 0/1 [00:00<?, ?it/s]"
183 |       ]
184 |      },
185 |      "metadata": {},
186 |      "output_type": "display_data"
187 |     },
188 |     {
189 |      "data": {
190 |       "application/vnd.jupyter.widget-view+json": {
191 |        "model_id": "7c3c186a1f5c4e7194b145b8da98289b",
192 |        "version_major": 2,
193 |        "version_minor": 0
194 |       },
195 |       "text/plain": [
196 |        "Retrieving MaterialsDoc documents:   0%|          | 0/1 [00:00<?, ?it/s]"
197 |       ]
198 |      },
199 |      "metadata": {},
200 |      "output_type": "display_data"
201 |     },
202 |     {
203 |      "data": {
204 |       "application/vnd.jupyter.widget-view+json": {
205 |        "model_id": "6a3a091f70bb468dbba6f8a9eaed8692",
206 |        "version_major": 2,
207 |        "version_minor": 0
208 |       },
209 |       "text/plain": [
210 |        "Retrieving MaterialsDoc documents:   0%|          | 0/1 [00:00<?, ?it/s]"
211 |       ]
212 |      },
213 |      "metadata": {},
214 |      "output_type": "display_data"
215 |     },
216 |     {
217 |      "data": {
218 |       "application/vnd.jupyter.widget-view+json": {
219 |        "model_id": "d94074fd0fde4464acd39b68ded9ddb4",
220 |        "version_major": 2,
221 |        "version_minor": 0
222 |       },
223 |       "text/plain": [
224 |        "Retrieving MaterialsDoc documents:   0%|          | 0/1 [00:00<?, ?it/s]"
225 |       ]
226 |      },
227 |      "metadata": {},
228 |      "output_type": "display_data"
229 |     }
230 |    ],
231 |    "source": [
232 |     "manual_select = {\n",
233 |     "    \"Cu\": \"mp-30\",\n",
234 |     "    \"CuO\": \"mp-1692\",\n",
235 |     "    \"PdO\": \"mp-1336\",\n",
236 |     "    \"PdS\": \"mp-20250\",\n",
237 |     "    \"CuS\": \"mp-760381\",\n",
238 |     "}\n",
239 |     "\n",
240 |     "bulk_trj = []\n",
241 |     "for mpid in manual_select.values():\n",
242 |     "    atoms = mprs.get_atoms_by_material_id(mpid)\n",
243 |     "    atoms.info[\"mpid\"] = str(mpid)\n",
244 |     "    atoms.info[\"material_formula\"] = atoms.get_chemical_formula(empirical=True)\n",
245 |     "    bulk_trj.append(atoms)"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": null,
251 |    "metadata": {},
252 |    "outputs": [
253 |     {
254 |      "data": {
255 |       "text/plain": [
256 |        "<Popen: returncode: None args: ['/root/venvs/mace_env/bin/python', '-m', 'as...>"
257 |       ]
258 |      },
259 |      "execution_count": 55,
260 |      "metadata": {},
261 |      "output_type": "execute_result"
262 |     }
263 |    ],
264 |    "source": [
265 |     "view(bulk_trj)"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": 69,
271 |    "metadata": {},
272 |    "outputs": [],
273 |    "source": [
274 |     "construct_recepie = [\n",
275 |     "    {\n",
276 |     "        \"mpid\": \"mp-2\",\n",
277 |     "        \"mi\": \"1#1#1\",\n",
278 |     "        \"iterm\": 0,\n",
279 |     "        \"vacuum\": 10,\n",
280 |     "        \"layers\": 7,\n",
281 |     "        \"min_a\": 8,\n",
282 |     "        \"min_b\": 8,\n",
283 |     "    },\n",
284 |     "    {\n",
285 |     "        \"mpid\": \"mp-2\",\n",
286 |     "        \"mi\": \"2#1#1\",\n",
287 |     "        \"iterm\": 0,\n",
288 |     "        \"vacuum\": 10,\n",
289 |     "        \"layers\": 7,\n",
290 |     "        \"min_a\": 7,\n",
291 |     "        \"min_b\": 6,\n",
292 |     "    },\n",
293 |     "    {\n",
294 |     "        \"mpid\": \"mp-2\",\n",
295 |     "        \"mi\": \"1#0#0\",\n",
296 |     "        \"iterm\": 0,\n",
297 |     "        \"vacuum\": 10,\n",
298 |     "        \"layers\": 7,\n",
299 |     "        \"min_a\": 7,\n",
300 |     "        \"min_b\": 7,\n",
301 |     "    },\n",
302 |     "    #  {\n",
303 |     "    #      'mpid': 'mp-30',\n",
304 |     "    #      'mi': '1#1#1',\n",
305 |     "    #      'iterm': 0,\n",
306 |     "    #      'vacuum': 10,\n",
307 |     "    #      'layers' : 7,\n",
308 |     "    #      'min_a' : 7,\n",
309 |     "    #      'min_b' : 7,\n",
310 |     "    #   },\n",
311 |     "    #   {\n",
312 |     "    #      'mpid': 'mp-30',\n",
313 |     "    #      'mi': '2#1#1',\n",
314 |     "    #      'iterm': 0,\n",
315 |     "    #      'vacuum': 10,\n",
316 |     "    #      'layers' : 7,\n",
317 |     "    #      'min_a' : 7,\n",
318 |     "    #      'min_b' : 6,\n",
319 |     "    #   },\n",
320 |     "    #   {\n",
321 |     "    #      'mpid': 'mp-30',\n",
322 |     "    #      'mi': '1#0#0',\n",
323 |     "    #      'iterm': 0,\n",
324 |     "    #      'vacuum': 10,\n",
325 |     "    #      'layers' : 7,\n",
326 |     "    #      'min_a' : 7,\n",
327 |     "    #      'min_b' : 7,\n",
328 |     "    #   },\n",
329 |     "    #  {\n",
330 |     "    #      'mpid': 'mp-1692',\n",
331 |     "    #      'mi': '1#1#0',\n",
332 |     "    #      'iterm': 2,\n",
333 |     "    #      'vacuum': 10,\n",
334 |     "    #      'layers' : 7,\n",
335 |     "    #      'min_a' : 10,\n",
336 |     "    #      'min_b' : 10,\n",
337 |     "    #   },\n",
338 |     "    #  {\n",
339 |     "    #      'mpid': 'mp-1336',\n",
340 |     "    #      'mi': '1#0#0',\n",
341 |     "    #      'iterm': 0,\n",
342 |     "    #      'vacuum': 10,\n",
343 |     "    #      'layers' : 7,\n",
344 |     "    #      'min_a' : 10,\n",
345 |     "    #      'min_b' : 10,\n",
346 |     "    #   },\n",
347 |     "    #  {\n",
348 |     "    #      'mpid': 'mp-20250',\n",
349 |     "    #      'mi': '0#0#1',\n",
350 |     "    #      'iterm': 0,\n",
351 |     "    #      'vacuum': 10,\n",
352 |     "    #      'layers' : 7,\n",
353 |     "    #      'min_a' : 10,\n",
354 |     "    #      'min_b' : 10,\n",
355 |     "    #   },\n",
356 |     "    #  {\n",
357 |     "    #      'mpid': 'mp-760381',\n",
358 |     "    #      'mi': '1#1#1',\n",
359 |     "    #      'iterm': 0,\n",
360 |     "    #      'vacuum': 10,\n",
361 |     "    #      'layers' : 7,\n",
362 |     "    #      'min_a' : 10,\n",
363 |     "    #      'min_b' : 10,\n",
364 |     "    #   }\n",
365 |     "]"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "code",
370 |    "execution_count": 70,
371 |    "metadata": {},
372 |    "outputs": [
373 |     {
374 |      "data": {
375 |       "application/vnd.jupyter.widget-view+json": {
376 |        "model_id": "120bc56887444c168e98c19c48a84309",
377 |        "version_major": 2,
378 |        "version_minor": 0
379 |       },
380 |       "text/plain": [
381 |        "Retrieving MaterialsDoc documents:   0%|          | 0/1 [00:00<?, ?it/s]"
382 |       ]
383 |      },
384 |      "metadata": {},
385 |      "output_type": "display_data"
386 |     },
387 |     {
388 |      "name": "stdout",
389 |      "output_type": "stream",
390 |      "text": [
391 |       "[1, 1, 1]\n",
392 |       "Using provided mi for generation: [1, 1, 1]\n"
393 |      ]
394 |     },
395 |     {
396 |      "data": {
397 |       "application/vnd.jupyter.widget-view+json": {
398 |        "model_id": "51ed6a3ee852489d9ce3f9ec8bb19e5e",
399 |        "version_major": 2,
400 |        "version_minor": 0
401 |       },
402 |       "text/plain": [
403 |        "Retrieving MaterialsDoc documents:   0%|          | 0/1 [00:00<?, ?it/s]"
404 |       ]
405 |      },
406 |      "metadata": {},
407 |      "output_type": "display_data"
408 |     },
409 |     {
410 |      "name": "stdout",
411 |      "output_type": "stream",
412 |      "text": [
413 |       "[2, 1, 1]\n",
414 |       "Using provided mi for generation: [2, 1, 1]\n"
415 |      ]
416 |     },
417 |     {
418 |      "data": {
419 |       "application/vnd.jupyter.widget-view+json": {
420 |        "model_id": "86e9956a8c2049d09795ee0e68284f74",
421 |        "version_major": 2,
422 |        "version_minor": 0
423 |       },
424 |       "text/plain": [
425 |        "Retrieving MaterialsDoc documents:   0%|          | 0/1 [00:00<?, ?it/s]"
426 |       ]
427 |      },
428 |      "metadata": {},
429 |      "output_type": "display_data"
430 |     },
431 |     {
432 |      "name": "stdout",
433 |      "output_type": "stream",
434 |      "text": [
435 |       "[1, 0, 0]\n",
436 |       "Using provided mi for generation: [1, 0, 0]\n"
437 |      ]
438 |     }
439 |    ],
440 |    "source": [
441 |     "slab_trj = []\n",
442 |     "for r in construct_recepie:\n",
443 |     "    mpid = r[\"mpid\"]\n",
444 |     "    atoms = mprs.get_atoms_by_material_id(mpid)\n",
445 |     "    r[\"mpid\"] = str(mpid)\n",
446 |     "    r[\"material_formula\"] = atoms.get_chemical_formula(empirical=True)\n",
447 |     "\n",
448 |     "    mi = str_to_mi(r[\"mi\"])\n",
449 |     "    print(mi)\n",
450 |     "\n",
451 |     "    out_atoms = get_scalled_slabs(\n",
452 |     "        atoms,\n",
453 |     "        layers=r[\"layers\"],\n",
454 |     "        vacuum=r[\"vacuum\"],\n",
455 |     "        mi=mi,\n",
456 |     "        min_a=r[\"min_a\"],\n",
457 |     "        min_b=r[\"min_b\"],\n",
458 |     "    )[r[\"iterm\"]]\n",
459 |     "    r[\"mi\"] = mi_to_str(mi)\n",
460 |     "    out_atoms.info[\"slab_info\"] = r\n",
461 |     "\n",
462 |     "    pop_k = [k for k in out_atoms.info.keys() if k not in [\"slab_info\"]]\n",
463 |     "    for k in pop_k:\n",
464 |     "        out_atoms.info.pop(k)\n",
465 |     "\n",
466 |     "    slab_trj.append(out_atoms)"
467 |    ]
468 |   },
469 |   {
470 |    "cell_type": "code",
471 |    "execution_count": 71,
472 |    "metadata": {},
473 |    "outputs": [
474 |     {
475 |      "data": {
476 |       "text/plain": [
477 |        "<Popen: returncode: None args: ['/root/venvs/mace_env/bin/python', '-m', 'as...>"
478 |       ]
479 |      },
480 |      "execution_count": 71,
481 |      "metadata": {},
482 |      "output_type": "execute_result"
483 |     }
484 |    ],
485 |    "source": [
486 |     "view(slab_trj)"
487 |    ]
488 |   },
489 |   {
490 |    "cell_type": "code",
491 |    "execution_count": 72,
492 |    "metadata": {},
493 |    "outputs": [
494 |     {
495 |      "name": "stdout",
496 |      "output_type": "stream",
497 |      "text": [
498 |       "{'slab_info': {'mpid': 'mp-2', 'mi': '1#1#1', 'iterm': 0, 'vacuum': 10, 'layers': 7, 'min_a': 8, 'min_b': 8, 'material_formula': 'Pd'}}\n",
499 |       "{'slab_info': {'mpid': 'mp-2', 'mi': '2#1#1', 'iterm': 0, 'vacuum': 10, 'layers': 7, 'min_a': 7, 'min_b': 6, 'material_formula': 'Pd'}}\n",
500 |       "{'slab_info': {'mpid': 'mp-2', 'mi': '1#0#0', 'iterm': 0, 'vacuum': 10, 'layers': 7, 'min_a': 7, 'min_b': 7, 'material_formula': 'Pd'}}\n"
501 |      ]
502 |     }
503 |    ],
504 |    "source": [
505 |     "# surfaces = []\n",
506 |     "for atoms in slab_trj:\n",
507 |     "    print(atoms.info)\n",
508 |     "    s = Surface(atoms, precision=0.5)\n",
509 |     "    s.sym_reduce()\n",
510 |     "    surfaces.append(s)"
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "code",
515 |    "execution_count": 73,
516 |    "metadata": {},
517 |    "outputs": [
518 |     {
519 |      "name": "stdout",
520 |      "output_type": "stream",
521 |      "text": [
522 |       "{'slab_info': {'mpid': 'mp-30', 'mi': '1#1#1', 'iterm': 0, 'vacuum': 10, 'layers': 7, 'min_a': 7, 'min_b': 7, 'material_formula': 'Cu'}}\n",
523 |       "{'slab_info': {'mpid': 'mp-30', 'mi': '2#1#1', 'iterm': 0, 'vacuum': 10, 'layers': 7, 'min_a': 7, 'min_b': 6, 'material_formula': 'Cu'}}\n",
524 |       "{'slab_info': {'mpid': 'mp-30', 'mi': '1#0#0', 'iterm': 0, 'vacuum': 10, 'layers': 7, 'min_a': 7, 'min_b': 7, 'material_formula': 'Cu'}}\n",
525 |       "{'slab_info': {'mpid': 'mp-1692', 'mi': '1#1#0', 'iterm': 2, 'vacuum': 10, 'layers': 7, 'min_a': 10, 'min_b': 10, 'material_formula': 'CuO'}}\n",
526 |       "{'slab_info': {'mpid': 'mp-1336', 'mi': '1#0#0', 'iterm': 0, 'vacuum': 10, 'layers': 7, 'min_a': 10, 'min_b': 10, 'material_formula': 'OPd'}}\n",
527 |       "{'slab_info': {'mpid': 'mp-20250', 'mi': '0#0#1', 'iterm': 0, 'vacuum': 10, 'layers': 7, 'min_a': 10, 'min_b': 10, 'material_formula': 'PdS'}}\n",
528 |       "{'slab_info': {'mpid': 'mp-760381', 'mi': '1#1#1', 'iterm': 0, 'vacuum': 10, 'layers': 7, 'min_a': 10, 'min_b': 10, 'material_formula': 'CuS'}}\n",
529 |       "{'slab_info': {'mpid': 'mp-2', 'mi': '1#1#1', 'iterm': 0, 'vacuum': 10, 'layers': 7, 'min_a': 8, 'min_b': 8, 'material_formula': 'Pd'}}\n",
530 |       "{'slab_info': {'mpid': 'mp-2', 'mi': '2#1#1', 'iterm': 0, 'vacuum': 10, 'layers': 7, 'min_a': 7, 'min_b': 6, 'material_formula': 'Pd'}}\n",
531 |       "{'slab_info': {'mpid': 'mp-2', 'mi': '1#0#0', 'iterm': 0, 'vacuum': 10, 'layers': 7, 'min_a': 7, 'min_b': 7, 'material_formula': 'Pd'}}\n"
532 |      ]
533 |     }
534 |    ],
535 |    "source": [
536 |     "for s in surfaces:\n",
537 |     "    print(s.atoms.info)"
538 |    ]
539 |   },
540 |   {
541 |    "cell_type": "code",
542 |    "execution_count": 82,
543 |    "metadata": {},
544 |    "outputs": [
545 |     {
546 |      "data": {
547 |       "text/plain": [
548 |        "{'numbers': array([47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,\n",
549 |        "        47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,\n",
550 |        "        47, 47,  1, 47, 47, 47, 47,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,\n",
551 |        "         1,  1,  1,  1,  1,  1,  1,  1,  1,  1]),\n",
552 |        " 'positions': array([[ 1.38497525e+00,  1.38497525e+00,  1.00000000e+01],\n",
553 |        "        [ 1.61728331e-16, -4.53324235e-16,  1.19586508e+01],\n",
554 |        "        [ 1.38497525e+00,  1.38497525e+00,  1.39173016e+01],\n",
555 |        "        [ 0.00000000e+00,  0.00000000e+00,  1.58759524e+01],\n",
556 |        "        [ 1.38497525e+00,  4.15492576e+00,  1.00000000e+01],\n",
557 |        "        [ 1.61728331e-16,  2.76995051e+00,  1.19586508e+01],\n",
558 |        "        [ 1.38497525e+00,  4.15492576e+00,  1.39173016e+01],\n",
559 |        "        [ 0.00000000e+00,  2.76995051e+00,  1.58759524e+01],\n",
560 |        "        [ 1.38497525e+00,  6.92487627e+00,  1.00000000e+01],\n",
561 |        "        [ 1.61728331e-16,  5.53990102e+00,  1.19586508e+01],\n",
562 |        "        [ 1.38497525e+00,  6.92487627e+00,  1.39173016e+01],\n",
563 |        "        [ 0.00000000e+00,  5.53990102e+00,  1.58759524e+01],\n",
564 |        "        [ 4.15492576e+00,  1.38497525e+00,  1.00000000e+01],\n",
565 |        "        [ 2.76995051e+00, -4.53324235e-16,  1.19586508e+01],\n",
566 |        "        [ 4.15492576e+00,  1.38497525e+00,  1.39173016e+01],\n",
567 |        "        [ 2.76995051e+00,  0.00000000e+00,  1.58759524e+01],\n",
568 |        "        [ 4.15492576e+00,  4.15492576e+00,  1.00000000e+01],\n",
569 |        "        [ 2.76995051e+00,  2.76995051e+00,  1.19586508e+01],\n",
570 |        "        [ 4.15492576e+00,  4.15492576e+00,  1.39173016e+01],\n",
571 |        "        [ 2.76995051e+00,  2.76995051e+00,  1.58759524e+01],\n",
572 |        "        [ 4.15492576e+00,  6.92487627e+00,  1.00000000e+01],\n",
573 |        "        [ 2.76995051e+00,  5.53990102e+00,  1.19586508e+01],\n",
574 |        "        [ 4.15492576e+00,  6.92487627e+00,  1.39173016e+01],\n",
575 |        "        [ 2.76995051e+00,  5.53990102e+00,  1.58759524e+01],\n",
576 |        "        [ 6.92487627e+00,  1.38497525e+00,  1.00000000e+01],\n",
577 |        "        [ 5.53990102e+00, -4.53324235e-16,  1.19586508e+01],\n",
578 |        "        [ 6.92487627e+00,  1.38497525e+00,  1.39173016e+01],\n",
579 |        "        [ 5.53990102e+00,  0.00000000e+00,  1.58759524e+01],\n",
580 |        "        [ 6.92487627e+00,  4.15492576e+00,  1.00000000e+01],\n",
581 |        "        [ 5.53990102e+00,  2.76995051e+00,  1.19586508e+01],\n",
582 |        "        [ 6.92487627e+00,  4.15492576e+00,  1.39173016e+01],\n",
583 |        "        [ 5.53990102e+00,  2.76995051e+00,  1.58759524e+01],\n",
584 |        "        [ 6.92487627e+00,  6.92487627e+00,  1.00000000e+01],\n",
585 |        "        [ 5.53990102e+00,  5.53990102e+00,  1.19586508e+01],\n",
586 |        "        [ 6.92487627e+00,  6.92487627e+00,  1.39173016e+01],\n",
587 |        "        [ 5.53990102e+00,  5.53990102e+00,  1.58759524e+01],\n",
588 |        "        [ 6.92487627e+00,  6.92487627e+00,  1.68759524e+01],\n",
589 |        "        [ 0.00000000e+00,  0.00000000e+00,  1.68759524e+01],\n",
590 |        "        [ 0.00000000e+00,  5.53990102e+00,  1.68759524e+01],\n",
591 |        "        [ 5.53990102e+00,  0.00000000e+00,  1.68759524e+01],\n",
592 |        "        [ 5.53990102e+00,  5.53990102e+00,  1.68759524e+01],\n",
593 |        "        [ 6.92041250e+00,  6.92041250e+00,  1.69757529e+01],\n",
594 |        "        [ 6.91594874e+00,  6.91594874e+00,  1.70755535e+01],\n",
595 |        "        [ 6.91148497e+00,  6.91148497e+00,  1.71753540e+01],\n",
596 |        "        [ 6.90702120e+00,  6.90702120e+00,  1.72751546e+01],\n",
597 |        "        [ 6.90255744e+00,  6.90255744e+00,  1.73749551e+01],\n",
598 |        "        [ 6.89809367e+00,  6.89809367e+00,  1.74747557e+01],\n",
599 |        "        [ 6.89362990e+00,  6.89362990e+00,  1.75745562e+01],\n",
600 |        "        [ 6.88916614e+00,  6.88916614e+00,  1.76743568e+01],\n",
601 |        "        [ 6.88470237e+00,  6.88470237e+00,  1.77741573e+01],\n",
602 |        "        [ 6.88023860e+00,  6.88023860e+00,  1.78739579e+01],\n",
603 |        "        [ 6.87577484e+00,  6.87577484e+00,  1.79737584e+01],\n",
604 |        "        [ 6.87131107e+00,  6.87131107e+00,  1.80735590e+01],\n",
605 |        "        [ 6.86684730e+00,  6.86684730e+00,  1.81733595e+01],\n",
606 |        "        [ 6.86238354e+00,  6.86238354e+00,  1.82731600e+01],\n",
607 |        "        [ 6.85791977e+00,  6.85791977e+00,  1.83729606e+01],\n",
608 |        "        [ 6.85345600e+00,  6.85345600e+00,  1.84727611e+01],\n",
609 |        "        [ 6.84899224e+00,  6.84899224e+00,  1.85725617e+01],\n",
610 |        "        [ 6.84452847e+00,  6.84452847e+00,  1.86723622e+01],\n",
611 |        "        [ 6.84006471e+00,  6.84006471e+00,  1.87721628e+01],\n",
612 |        "        [ 6.83560094e+00,  6.83560094e+00,  1.88719633e+01]]),\n",
613 |        " 'initial_magmoms': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
614 |        "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
615 |        "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
616 |        "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),\n",
617 |        " 'surface_atoms': array([-1., -0., -0.,  1., -1., -0., -0.,  1., -1., -0., -0.,  1., -1.,\n",
618 |        "        -0., -0.,  1., -1., -0., -0.,  1., -1., -0., -0.,  1., -1., -0.,\n",
619 |        "        -0.,  1., -1., -0., -0.,  1., -1., -0., -0.,  1.,  0.,  1.,  1.,\n",
620 |        "         1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,\n",
621 |        "         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]),\n",
622 |        " 'tags': array([4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3,\n",
623 |        "        2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 0, 1, 1, 1, 1, 0, 0, 0,\n",
624 |        "        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])}"
625 |       ]
626 |      },
627 |      "execution_count": 82,
628 |      "metadata": {},
629 |      "output_type": "execute_result"
630 |     }
631 |    ],
632 |    "source": [
633 |     "atoms.arrays[\"numbers\"] += 1\n",
634 |     "atoms.arrays"
635 |    ]
636 |   },
637 |   {
638 |    "cell_type": "code",
639 |    "execution_count": 87,
640 |    "metadata": {},
641 |    "outputs": [],
642 |    "source": [
643 |     "sites_traj = []\n",
644 |     "for s in surfaces:\n",
645 |     "    for i in s.site_df.index.values:\n",
646 |     "        atoms = s.atoms.copy()\n",
647 |     "        atoms.arrays[\"numbers\"] += 1\n",
648 |     "        site = s.view_site(i, return_atoms=True)\n",
649 |     "        site.positions += [0, 0, 0.2]\n",
650 |     "        atoms += site\n",
651 |     "        # site_info = s.site_df.loc[i].to_dict()\n",
652 |     "        # atoms.info['site_info'] =site_info\n",
653 |     "        sites_traj.append(atoms * [2, 2, 1])\n",
654 |     "# write('./sites_traj.xyz', sites_traj)"
655 |    ]
656 |   },
657 |   {
658 |    "cell_type": "code",
659 |    "execution_count": 88,
660 |    "metadata": {},
661 |    "outputs": [
662 |     {
663 |      "data": {
664 |       "text/plain": [
665 |        "<Popen: returncode: None args: ['/root/venvs/mace_env/bin/python', '-m', 'as...>"
666 |       ]
667 |      },
668 |      "execution_count": 88,
669 |      "metadata": {},
670 |      "output_type": "execute_result"
671 |     }
672 |    ],
673 |    "source": [
674 |     "view(sites_traj)"
675 |    ]
676 |   },
677 |   {
678 |    "cell_type": "code",
679 |    "execution_count": null,
680 |    "metadata": {},
681 |    "outputs": [
682 |     {
683 |      "data": {
684 |       "text/html": [
685 |        "<div>\n",
686 |        "<style scoped>\n",
687 |        "    .dataframe tbody tr th:only-of-type {\n",
688 |        "        vertical-align: middle;\n",
689 |        "    }\n",
690 |        "\n",
691 |        "    .dataframe tbody tr th {\n",
692 |        "        vertical-align: top;\n",
693 |        "    }\n",
694 |        "\n",
695 |        "    .dataframe thead th {\n",
696 |        "        text-align: right;\n",
697 |        "    }\n",
698 |        "</style>\n",
699 |        "<table border=\"1\" class=\"dataframe\">\n",
700 |        "  <thead>\n",
701 |        "    <tr style=\"text-align: right;\">\n",
702 |        "      <th></th>\n",
703 |        "    </tr>\n",
704 |        "  </thead>\n",
705 |        "  <tbody>\n",
706 |        "  </tbody>\n",
707 |        "</table>\n",
708 |        "</div>"
709 |       ],
710 |       "text/plain": [
711 |        "Empty DataFrame\n",
712 |        "Columns: []\n",
713 |        "Index: []"
714 |       ]
715 |      },
716 |      "execution_count": 155,
717 |      "metadata": {},
718 |      "output_type": "execute_result"
719 |     }
720 |    ],
721 |    "source": [
722 |     "xx = pd.DataFrame({})\n",
723 |     "xx.sort"
724 |    ]
725 |   },
726 |   {
727 |    "cell_type": "code",
728 |    "execution_count": null,
729 |    "metadata": {},
730 |    "outputs": [],
731 |    "source": [
732 |     "# for s in surfaces:\n",
733 |     "#     s.site_df.to_csv(f'{s.atoms.info['slab_info']['mpid']}_cache_site_df.csv')"
734 |    ]
735 |   },
736 |   {
737 |    "cell_type": "code",
738 |    "execution_count": null,
739 |    "metadata": {},
740 |    "outputs": [],
741 |    "source": [
742 |     "# from dscribe.descriptors import SOAP\n",
743 |     "\n",
744 |     "# species = get_all_species_in_traj(trj)\n",
745 |     "# r_cut = 6.0\n",
746 |     "# n_max = 8\n",
747 |     "# l_max = 6\n",
748 |     "\n",
749 |     "# # Setting up the SOAP descriptor\n",
750 |     "# soap = SOAP(\n",
751 |     "#     species=species,\n",
752 |     "#     periodic=True,\n",
753 |     "#     r_cut=r_cut,\n",
754 |     "#     n_max=n_max,\n",
755 |     "#     l_max=l_max,\n",
756 |     "# )\n",
757 |     "\n",
758 |     "# view_trj= []\n",
759 |     "# site_pos=[]\n",
760 |     "# soaps = []\n",
761 |     "\n",
762 |     "# for s in surfaces:\n",
763 |     "#     for i in s.site_df.index.values:\n",
764 |     "#         pos = s.site_df.coordinates.loc[i]\n",
765 |     "#         view_trj.append(s.view_site(i, return_atoms=True))\n",
766 |     "#         site_pos.append(pos)\n",
767 |     "#         soaps.append(soap.create(s.atoms, centers=[pos]))"
768 |    ]
769 |   }
770 |  ],
771 |  "metadata": {
772 |   "kernelspec": {
773 |    "display_name": "mace_venv",
774 |    "language": "python",
775 |    "name": "mace_venv"
776 |   },
777 |   "language_info": {
778 |    "codemirror_mode": {
779 |     "name": "ipython",
780 |     "version": 3
781 |    },
782 |    "file_extension": ".py",
783 |    "mimetype": "text/x-python",
784 |    "name": "python",
785 |    "nbconvert_exporter": "python",
786 |    "pygments_lexer": "ipython3",
787 |    "version": "3.12.3"
788 |   }
789 |  },
790 |  "nbformat": 4,
791 |  "nbformat_minor": 2
792 | }
793 | 


--------------------------------------------------------------------------------
/scripts/relax.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import os
 3 | from glob import glob
 4 | from joblib import Parallel, delayed
 5 | 
 6 | import pandas as pd
 7 | import numpy as np
 8 | 
 9 | import ase
10 | from ase.optimize import BFGS
11 | from ase import units
12 | from ase.constraints import FixAtoms
13 | 
14 | from mace.calculators import mace_mp
15 | from pathlib import Path
16 | 
17 | import sys
18 | sys.path.insert(0, '/gpfs/projects/qm_inorganics/notebook_edvin/git/autoadsorbate/')
19 | from autoadsorbate.utils freeze_atoms, save_relax_config, relaxatoms
20 | 
21 | rcut=40
22 | steps = 15
23 | prefix = 'MACE_prerelax'
24 | freeze_bottom=False
25 | 
26 | #traj = ase.io.read('./traj.xyz', index = ':')
27 | 
28 | print('Reading generated traj . . . ')
29 | traj = []
30 | for file in glob('/gpfs/projects/qm_inorganics/notebook_edvin/aads_paper/RELAX/relax_Cu211_get_population/generate/generated_*.xyz'):
31 |     traj += ase.io.read(file, index=':')
32 | print(f'Done reading generated traj . . . found {len(traj)}')
33 | 
34 | models = '/gpfs/projects/qm_inorganics/notebook_edvin/mace_paper/revision_nature_09012025_mpa-0/mace-mpa-0-medium.model'
35 | 
36 | print(models)
37 | 
38 | macemp = mace_mp(model=models,dispersion=True,device="cuda",dispersion_cutoff=rcut* units.Bohr, default_dtype="float64", enable_cueq=True) # return ASE calculator
39 | 
40 | for atoms in traj:
41 |     c = FixAtoms(mask=[atom.symbol == 'Cu'  for atom in atoms]) 
42 |     atoms.set_constraint(c)
43 | 
44 | def main():
45 |     for atoms in traj:
46 |         relaxatoms(atoms, macemp, prefix, steps=steps, freeze_bottom=freeze_bottom)
47 |     
48 | if __name__ == "__main__":
49 |     main()
50 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basf/autoadsorbate/626bd10da59bb091559115986b95c873abaf9a74/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_all.py:
--------------------------------------------------------------------------------
 1 | from autoadsorbate import Fragment, Surface
 2 | 
 3 | 
 4 | def test_Surface():
 5 |     from ase.build import fcc111
 6 | 
 7 |     slab = fcc111("Cu", (2, 2, 2), periodic=True, vacuum=10)
 8 |     s = Surface(slab)
 9 |     assert type(s.site_dict) == dict
10 | 
11 | 
12 | def test_Fragment():
13 |     f = Fragment(smile="COC", to_initialize=5)
14 |     assert f.smile == "COC"
15 | 


--------------------------------------------------------------------------------