├── .github
├── threadpool.yaml
└── workflows
│ └── run_pytest.yaml
├── .gitignore
├── Dockerfile
├── Dockerfile.cp2k
├── Dockerfile.gpaw
├── LICENSE
├── README.md
├── build_containers.sh
├── configs
├── hortense.yaml
├── lumi.yaml
├── threadpool.yaml
└── wq.yaml
├── docs
├── api_example.png
├── configuration.md
├── data.md
├── free_energy.md
├── hamiltonian.md
├── hamiltonians_umbrella.svg
├── icon.svg
├── index.md
├── install.sh
├── installation.md
├── learning.md
├── logo_dark.png
├── logo_light.png
├── models.md
├── overview.png
├── reference.md
├── sampling.md
└── wandb.png
├── examples
├── README.md
├── alanine_replica_exchange.py
├── data
│ ├── acetaldehyde.xyz
│ ├── ani500k_cc_cpu.model
│ ├── cp2k_input.txt
│ ├── h2o_32.xyz
│ ├── vinyl_alcohol.xyz
│ └── water_train.xyz
├── h2_static_dynamic.py
├── iron_bulk_modulus.py
├── iron_harmonic_fcc_bcc.py
├── online_learning_pimd.py
├── proton_jump_plumed.py
├── submit
│ ├── hortense.yaml
│ ├── lumi.yaml
│ ├── submit_hortense.sh
│ └── submit_lumi.sh
├── water_cp2k_noise.py
├── water_online_learning.py
├── water_path_integral_md.py
└── water_train_validate.py
├── mkdocs.yml
├── psiflow
├── __init__.py
├── config.py
├── data
│ ├── __init__.py
│ ├── dataset.py
│ └── utils.py
├── execution.py
├── free_energy
│ ├── __init__.py
│ ├── integration.py
│ └── phonons.py
├── functions.py
├── geometry.py
├── hamiltonians.py
├── learning.py
├── metrics.py
├── models
│ ├── __init__.py
│ ├── _mace.py
│ ├── mace_utils.py
│ └── model.py
├── order_parameters.py
├── reference
│ ├── __init__.py
│ ├── _cp2k.py
│ ├── _dftd3.py
│ ├── gpaw_.py
│ ├── orca.py
│ └── reference.py
├── sampling
│ ├── __init__.py
│ ├── _ase.py
│ ├── ase.py
│ ├── client.py
│ ├── metadynamics.py
│ ├── optimize.py
│ ├── order.py
│ ├── output.py
│ ├── sampling.py
│ ├── server.py
│ ├── utils.py
│ └── walker.py
├── serialization.py
└── utils
│ ├── __init__.py
│ ├── _plumed.py
│ ├── apps.py
│ └── io.py
├── pyproject.toml
└── tests
├── conftest.py
├── test_data.py
├── test_free_energy.py
├── test_function.py
├── test_learning.py
├── test_models.py
├── test_reference.py
├── test_sampling.py
└── test_serialization.py
/.github/threadpool.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | parsl_log_level: WARNING
3 | retries: 0
4 | ModelEvaluation:
5 | max_simulation_time: 0.4
6 | gpu: false
7 | use_threadpool: true
8 | ModelTraining:
9 | max_training_time: 1
10 | gpu: true
11 | use_threadpool: true
12 | max_workers: 1
13 | CP2K:
14 | cores_per_worker: 2
15 | max_evaluation_time: 0.3
16 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -bind-to core -np 2 -env OMP_NUM_THREADS 1 cp2k.psmp'
17 | CP2K_container:
18 | cores_per_worker: 2
19 | max_evaluation_time: 0.3
20 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -bind-to core -np 2 -env OMP_NUM_THREADS 1 cp2k.psmp'
21 | GPAW:
22 | cores_per_worker: 2
23 | max_evaluation_time: 0.3
24 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/gpaw:24.1 /opt/entry.sh mpirun -np 2 gpaw python /opt/run_gpaw.py'
25 | GPAW_container:
26 | cores_per_worker: 2
27 | max_evaluation_time: 0.3
28 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/gpaw:24.1 /opt/entry.sh mpirun -np 2 gpaw python /opt/run_gpaw.py'
29 | ...
30 |
--------------------------------------------------------------------------------
/.github/workflows/run_pytest.yaml:
--------------------------------------------------------------------------------
1 | name: CI
2 | run-name: pytest-CI
3 | on:
4 | push:
5 | branches: [ main ]
6 | pull_request:
7 | branches: [ main ]
8 | jobs:
9 | test-py310:
10 | runs-on: ubuntu-24.04
11 | steps:
12 | - uses: eWaterCycle/setup-apptainer@v2
13 | with:
14 | apptainer-version: 1.3.6
15 | - uses: mamba-org/setup-micromamba@v1
16 | with:
17 | micromamba-version: '2.0.5-0'
18 | environment-name: 'test-env'
19 | generate-run-shell: true
20 | create-args: >-
21 | python=3.10
22 | ndcctools==7.14.0
23 | py-plumed
24 | simple-dftd3
25 | dftd3-python
26 | pip
27 | -c conda-forge
28 | init-shell: bash
29 | cache-environment: true
30 | post-cleanup: 'all'
31 | - name: Install dependencies and download containers
32 | shell: micromamba-shell {0}
33 | run: |
34 | which pip
35 | pip install pyfftw colorcet wandb pandas plotly plumed 'numpy<2.0.0'
36 | pip install --no-cache-dir git+https://github.com/i-pi/i-pi.git@v3.0.0-beta4
37 | pip install torch==2.5.1
38 | pip install git+https://github.com/acesuit/MACE.git@v0.3.5
39 | apptainer exec oras://ghcr.io/molmod/cp2k:2024.1 ls
40 | apptainer exec oras://ghcr.io/molmod/gpaw:24.1 ls
41 | - name: Checkout specific commit
42 | uses: actions/checkout@v4
43 | - name: Install Psiflow and run tests
44 | shell: micromamba-shell {0}
45 | env:
46 | WANDB_API_KEY: secrets.WANDB_API_KEY
47 | WANDB_MODE: offline # disables WandB server calls
48 | run: |
49 | pip install .[dev]
50 | pip list
51 | pytest --skip-gpu --psiflow-config=.github/threadpool.yaml
52 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | *.swp
6 |
7 | # C extensions
8 | *.so
9 |
10 | # Distribution / packaging
11 | .Python
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | pip-wheel-metadata/
25 | share/python-wheels/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | MANIFEST
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .nox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *.cover
51 | *.py,cover
52 | .hypothesis/
53 | .pytest_cache/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | .python-version
87 |
88 | # pipenv
89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
92 | # install all needed dependencies.
93 | #Pipfile.lock
94 |
95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
96 | __pypackages__/
97 |
98 | # Celery stuff
99 | celerybeat-schedule
100 | celerybeat.pid
101 |
102 | # SageMath parsed files
103 | *.sage.py
104 |
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 |
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 |
118 | # Rope project settings
119 | .ropeproject
120 |
121 | # mkdocs documentation
122 | /site
123 |
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 |
129 | # Pyre type checker
130 | .pyre/
131 |
132 | pytest-tmp/
133 | wandb/
134 |
135 | # psiflow internal and its symlinks
136 | psiflow_internal/
137 | psiflow_log
138 | psiflow_submit_scripts
139 | psiflow_task_logs
140 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:22.04
2 |
3 | ENV DEBIAN_FRONTEND=noninteractive
4 | ENV TZ=Etc/UTC
5 |
6 | RUN apt-get update && apt-get install -y \
7 | python3.10 \
8 | python3.10-venv \
9 | python3.10-dev \
10 | build-essential \
11 | cmake \
12 | wget \
13 | git \
14 | zlib1g-dev \
15 | libssl-dev \
16 | libcurl4-openssl-dev \
17 | libgsl-dev \
18 | perl \
19 | fftw3 \
20 | && apt-get clean \
21 | && rm -rf /var/lib/apt/lists/*
22 |
23 | RUN python3.10 -m venv /opt/venv \
24 | && /opt/venv/bin/pip install --upgrade pip
25 |
26 | # Install PLUMED
27 | ARG PLUMED_VERSION
28 | RUN wget https://github.com/plumed/plumed2/archive/refs/tags/v${PLUMED_VERSION}.tar.gz \
29 | && tar -xzf v${PLUMED_VERSION}.tar.gz \
30 | && cd plumed2-${PLUMED_VERSION} \
31 | && ./configure --prefix=/usr/local/plumed \
32 | && make -j$(nproc) \
33 | && make install \
34 | && cd .. \
35 | && rm -rf plumed2-${PLUMED_VERSION} v${PLUMED_VERSION}.tar.gz
36 |
37 | # Ensure cctools can find the Python environment
38 | ENV PYTHONPATH="/opt/venv/lib/python3.10/site-packages:$PYTHONPATH"
39 | ENV PATH="/opt/venv/bin:$PATH"
40 |
41 | # Install cctools
42 | ARG CCTOOLS_VERSION
43 | RUN wget https://github.com/cooperative-computing-lab/cctools/archive/refs/tags/release/${CCTOOLS_VERSION}.tar.gz \
44 | && tar -xzf ${CCTOOLS_VERSION}.tar.gz \
45 | && cd cctools-release-${CCTOOLS_VERSION} \
46 | && ./configure --prefix=/usr/local/cctools \
47 | && make -j$(nproc) \
48 | && make install \
49 | && cd .. \
50 | && rm -rf cctools-release-${CCTOOLS_VERSION} ${CCTOOLS_VERSION}.tar.gz
51 |
52 | # Set environment variables for PLUMED and cctools
53 | ENV PATH="/usr/local/plumed/bin:/usr/local/cctools/bin:$PATH"
54 | ENV LD_LIBRARY_PATH="/usr/local/plumed/lib:/usr/local/cctools/lib:$LD_LIBRARY_PATH"
55 |
56 | ARG PSIFLOW_VERSION
57 | ARG PARSL_VERSION
58 | ARG GPU_LIBRARY
59 | RUN /bin/bash -c -o pipefail \
60 | "source /opt/venv/bin/activate && \
61 | pip install --no-cache-dir pyfftw colorcet wandb pandas plotly plumed 'numpy<2.0.0' && \
62 | pip install --no-cache-dir git+https://github.com/i-pi/i-pi.git@v3.0.0-beta4 && \
63 | pip install --no-cache-dir torch>=2.5 --index-url https://download.pytorch.org/whl/${GPU_LIBRARY} && \
64 | pip install --no-cache-dir git+https://github.com/acesuit/mace.git@v0.3.5"
65 | ARG DATE
66 | RUN /bin/bash -c -o pipefail \
67 | "pip install --no-cache-dir git+https://github.com/molmod/psiflow.git@${PSIFLOW_VERSION}"
68 |
69 | # Set entrypoint
70 | RUN echo '#!/bin/bash' >> /opt/entry.sh && \
71 | echo 'source /opt/venv/bin/activate' >> /opt/entry.sh && \
72 | echo 'export PLUMED_KERNEL=/usr/local/plumed/lib/libplumedKernel.so' >> /opt/entry.sh && \
73 | echo '"$@"' >> /opt/entry.sh
74 | RUN chmod +x /opt/entry.sh
75 | ENTRYPOINT ["/opt/entry.sh"]
76 |
77 | # Default command
78 | CMD ["bash"]
79 |
--------------------------------------------------------------------------------
/Dockerfile.cp2k:
--------------------------------------------------------------------------------
1 | #
2 | # This file was created by generate_docker_files.py
3 | #
4 | # Usage: docker build -f ./2024.1_mpich_generic_psmp.Dockerfile -t cp2k/cp2k:2024.1_mpich_generic_psmp .
5 |
6 | # Stage 1: build step
7 | FROM ubuntu:22.04 AS build
8 |
9 |
10 | # Install packages required for the CP2K toolchain build
11 | RUN apt-get update -qq && apt-get install -qq --no-install-recommends \
12 | g++ gcc gfortran libmpich-dev mpich openssh-client python3 \
13 | bzip2 ca-certificates git make patch pkg-config unzip wget zlib1g-dev
14 |
15 | # Download CP2K
16 | RUN git clone --recursive -b support/v2024.1 https://github.com/cp2k/cp2k.git /opt/cp2k
17 |
18 | # Build CP2K toolchain for target CPU generic
19 | WORKDIR /opt/cp2k/tools/toolchain
20 | RUN /bin/bash -c -o pipefail \
21 | "./install_cp2k_toolchain.sh -j 8 \
22 | --install-all \
23 | --enable-cuda=no \
24 | --target-cpu=generic \
25 | --with-cusolvermp=no \
26 | --with-gcc=system \
27 | --with-mpich=system"
28 |
29 | # Build CP2K for target CPU generic
30 | WORKDIR /opt/cp2k
31 | RUN /bin/bash -c -o pipefail \
32 | "cp ./tools/toolchain/install/arch/local.psmp ./arch/; \
33 | source ./tools/toolchain/install/setup; \
34 | make -j 8 ARCH=local VERSION=psmp"
35 |
36 | # Collect components for installation and remove symbolic links
37 | RUN /bin/bash -c -o pipefail \
38 | "mkdir -p /toolchain/install /toolchain/scripts; \
39 | for libdir in \$(ldd ./exe/local/cp2k.psmp | \
40 | grep /opt/cp2k/tools/toolchain/install | \
41 | awk '{print \$3}' | cut -d/ -f7 | \
42 | sort | uniq) setup; do \
43 | cp -ar /opt/cp2k/tools/toolchain/install/\${libdir} /toolchain/install; \
44 | done; \
45 | cp /opt/cp2k/tools/toolchain/scripts/tool_kit.sh /toolchain/scripts; \
46 | unlink ./exe/local/cp2k.popt; \
47 | unlink ./exe/local/cp2k_shell.psmp"
48 |
49 | # Stage 2: install step
50 | FROM ubuntu:22.04 AS install
51 |
52 | # Install required packages
53 | RUN apt-get update -qq && apt-get install -qq --no-install-recommends \
54 | g++ gcc gfortran libmpich-dev mpich openssh-client python3 && rm -rf /var/lib/apt/lists/*
55 |
56 | # Install CP2K binaries
57 | COPY --from=build /opt/cp2k/exe/local/ /opt/cp2k/exe/local/
58 |
59 | # Install CP2K regression tests
60 | COPY --from=build /opt/cp2k/tests/ /opt/cp2k/tests/
61 | COPY --from=build /opt/cp2k/tools/regtesting/ /opt/cp2k/tools/regtesting/
62 | COPY --from=build /opt/cp2k/src/grid/sample_tasks/ /opt/cp2k/src/grid/sample_tasks/
63 |
64 | # Install CP2K database files
65 | COPY --from=build /opt/cp2k/data/ /opt/cp2k/data/
66 |
67 | # Install shared libraries required by the CP2K binaries
68 | COPY --from=build /toolchain/ /opt/cp2k/tools/toolchain/
69 |
70 | # Create links to CP2K binaries
71 | RUN /bin/bash -c -o pipefail \
72 | "for binary in cp2k dumpdcd graph xyz2dcd; do \
73 | ln -sf /opt/cp2k/exe/local/\${binary}.psmp \
74 | /usr/local/bin/\${binary}; \
75 | done; \
76 | ln -sf /opt/cp2k/exe/local/cp2k.psmp \
77 | /usr/local/bin/cp2k_shell; \
78 | ln -sf /opt/cp2k/exe/local/cp2k.psmp \
79 | /usr/local/bin/cp2k.popt"
80 |
81 | # Create entrypoint script file
82 | RUN printf "#!/bin/bash\n\
83 | ulimit -c 0 -s unlimited\n\
84 | \
85 | export OMP_STACKSIZE=16M\n\
86 | export OMP_NUM_THREADS=1\n\
87 | export PATH=/opt/cp2k/exe/local:\${PATH}\n\
88 | source /opt/cp2k/tools/toolchain/install/setup\n\
89 | if [ -n "\${MEMORY_LIMIT}" ]; then\n\
90 | ulimit -v \${MEMORY_LIMIT}\n\
91 | fi\n\
92 | \n\
93 | \"\$@\"\n" \
94 | >/opt/entry.sh && chmod 755 /opt/entry.sh
95 |
96 | # Create shortcut for regression test
97 | RUN printf "/opt/cp2k/tests/do_regtest.py --maxtasks 8 --workbasedir /mnt \$* local psmp" \
98 | >/usr/local/bin/run_tests && chmod 755 /usr/local/bin/run_tests
99 |
100 | # Define entrypoint
101 | WORKDIR /mnt
102 | ENTRYPOINT ["/opt/entry.sh"]
103 | CMD ["cp2k", "--help"]
104 |
105 | # Label docker image
106 | LABEL author="CP2K Developers" \
107 | cp2k_version="2024.1" \
108 | dockerfile_generator_version="0.2"
109 |
110 | # EOF
111 |
--------------------------------------------------------------------------------
/Dockerfile.gpaw:
--------------------------------------------------------------------------------
1 | FROM ubuntu:22.04
2 |
3 | ENV DEBIAN_FRONTEND=noninteractive
4 | ENV TZ=Etc/UTC
5 |
6 | RUN apt-get update && apt-get install -y \
7 | python3.10 \
8 | python3.10-venv \
9 | python3.10-dev \
10 | build-essential \
11 | autoconf \
12 | automake \
13 | libtool \
14 | make \
15 | cmake \
16 | wget \
17 | git \
18 | libopenblas-dev \
19 | libfftw3-dev \
20 | libopenmpi-dev \
21 | openmpi-bin \
22 | libscalapack-mpi-dev \
23 | libelpa-dev \
24 | libomp-dev \
25 | && apt-get clean \
26 | && rm -rf /var/lib/apt/lists/*
27 |
28 | RUN python3.10 -m venv /opt/venv \
29 | && /opt/venv/bin/pip install --upgrade pip
30 |
31 | RUN /bin/bash -c -o pipefail \
32 | "source /opt/venv/bin/activate && \
33 | pip install --no-cache-dir numpy && \
34 | pip install --no-cache-dir git+https://gitlab.com/ase/ase"
35 |
36 | # install libxc
37 | RUN cd /opt && \
38 | wget https://gitlab.com/libxc/libxc/-/archive/6.2.2/libxc-6.2.2.tar.bz2 -O libxc-6.2.2.tar.bz2 && \
39 | tar -xvf libxc-6.2.2.tar.bz2 && \
40 | cd libxc-6.2.2 && \
41 | autoreconf -i && \
42 | ./configure --prefix=/usr/local CFLAGS="-fPIC" && \
43 | make -j$(nproc) && \
44 | make install && \
45 | ldconfig
46 |
47 | # install GPAW
48 | WORKDIR /opt/gpaw
49 | ENV GPAW_CONFIG=/opt/gpaw/siteconfig.py
50 | RUN echo "scalapack = True" >> siteconfig.py \
51 | && echo "fftw = True" >> siteconfig.py \
52 | && echo "elpa = True" >> siteconfig.py \
53 | && echo "libraries = ['openblas', 'fftw3', 'scalapack-openmpi', 'elpa', 'omp5']" >> siteconfig.py \
54 | && echo "library_dirs = ['/usr/lib', '/usr/local/lib']" >> siteconfig.py \
55 | && echo "extra_link_args = ['/usr/local/lib/libxc.a']" >> siteconfig.py \
56 | && echo "include_dirs = ['/usr/include', '/usr/local/include', '/usr/include/elpa']" >> siteconfig.py
57 | RUN /bin/bash -c -o pipefail "source /opt/venv/bin/activate && pip install --no-cache-dir gpaw dftd3"
58 | RUN mkdir /opt/gpaw-data
59 | RUN /bin/bash -c -o pipefail "source /opt/venv/bin/activate && yes | gpaw install-data /opt/gpaw-data" || true
60 |
61 | ARG PSIFLOW_VERSION
62 | RUN /bin/bash -c -o pipefail \
63 | "source /opt/venv/bin/activate && pip install --no-cache-dir git+https://github.com/molmod/psiflow.git@${PSIFLOW_VERSION}"
64 | RUN ln -s /opt/venv/lib/python3.10/site-packages/psiflow/reference/gpaw_.py /opt/run_gpaw.py
65 |
66 | # Create entrypoint script
67 | RUN echo '#!/bin/bash' >> /opt/entry.sh && \
68 | echo 'source /opt/venv/bin/activate' >> /opt/entry.sh && \
69 | echo 'export LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}"' >> /opt/entry.sh && \
70 | echo 'export GPAW_SETUP_PATH="/opt/gpaw-data/gpaw-setups-24.1.0"' >> /opt/entry.sh && \
71 | echo '"$@"' >> /opt/entry.sh
72 | RUN chmod +x /opt/entry.sh
73 |
74 | # libxc needed at runtime
75 | ENV LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}"
76 | ENTRYPOINT ["/opt/entry.sh"]
77 |
78 |
79 | # Default command
80 | CMD ["bash"]
81 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022-2025 Ghent University
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 |
4 |
5 | 
6 | [](https://molmod.github.io/psiflow)
7 | [](https://actions-badge.atrox.dev/molmod/psiflow/goto?ref=main)
8 | 
9 | 
10 | [](https://www.nature.com/articles/s41524-023-00969-x)
11 |
12 |
13 | # Scalable Molecular Simulation
14 |
15 | Psiflow is a scalable molecular simulation engine for chemistry and materials science applications.
16 | It supports:
17 | - **quantum mechanical calculations** at various levels of theory (GGA and hybrid DFT, post-HF methods such as MP2 or RPA, and even coupled cluster; using CP2K|GPAW|ORCA)
18 | - **trainable interaction potentials** as well as easy-to-use universal potentials, e.g. [MACE-MP0](https://arxiv.org/abs/2401.00096)
19 | - a wide range of **sampling algorithms**: NVE | NVT | NPT, path-integral molecular dynamics, alchemical replica exchange, metadynamics, phonon-based sampling, thermodynamic integration; using [i-PI](https://ipi-code.org/),
20 | [PLUMED](https://www.plumed.org/), ...
21 |
22 | Users may define arbitrarily complex workflows and execute them **automatically** on local, HPC, and/or cloud infrastructure.
23 | To achieve this, psiflow is built using [Parsl](https://parsl-project.org/): a parallel execution library which manages job submission and workload distribution.
24 | As such, psiflow can orchestrate large molecular simulation pipelines on hundreds or even thousands of nodes.
25 |
26 |
27 |
28 |
29 |
30 |
31 | # Setup
32 |
33 | Use the following one-liner to create a lightweight [micromamba](https://mamba.readthedocs.io/en/latest/user_guide/micromamba.html) Python environment with all dependencies readily available:
34 | ```sh
35 | curl -L molmod.github.io/psiflow/install.sh | bash
36 | ```
37 | The environment can be activated by sourcing the `activate.sh` file which will be created in the current working directory.
38 | Next, create a `config.yaml` file which defines the compute resources. For SLURM-based HPC systems, psiflow can initialize your configuration automatically via the following command:
39 | ```sh
40 | python -c 'import psiflow; psiflow.setup_slurm_config()'
41 | ```
42 | Example configuration files for [LUMI](https://lumi-supercomputer.eu/), [MeluXina](https://luxembourg.public.lu/en/invest/innovation/meluxina-supercomputer.html), or [VSC](https://www.vscentrum.be/) can be found [here](https://github.com/molmod/psiflow/tree/main/configs).
43 | No additional software compilation is required since all of the heavy lifting (CP2K/ORCA/GPAW, PyTorch model training, i-PI dynamics) is executed within preconfigured [Apptainer](https://apptainer.org/)/[Singularity](https://sylabs.io/singularity/) containers which are production-ready for most HPCs.
44 |
45 | That's it! Contrary to frameworks like pyiron or aiida, psiflow does not require any databases or web servers.
46 | The only requirement is that you set up a Python environment and provide a `config.yaml`.
47 |
48 | [**EXAMPLES**](https://github.com/molmod/psiflow/tree/main/examples)
49 |
50 |
51 |
52 |
53 | # FAQ
54 |
55 | **Where do I start?**
56 |
57 | Take a brief look at the [examples](https://github.com/molmod/psiflow/tree/main/examples) or the
58 | [documentation](https://molmod.github.io/psiflow) to get an idea for psiflow's
59 | capabilities. Next, head over to the [setup & configuration](https://molmod.github.io/psiflow/configuration/) section of the docs to get started!
60 |
61 | **Is psiflow a workflow manager?**
62 |
63 | Absolutely not! Psiflow is a Python library which allows you to perform complex molecular simulations and scale them towards large numbers of compute nodes automatically.
64 | It does not have 'fixed' workflow recipes, it does not require you to set up 'databases'
65 | or 'server daemons'. The only thing it does is expose a concise and powerful API to
66 | perform arbitrarily complex calculations in a highly efficiently manner.
67 |
68 | **Is it compatible with my cluster?**
69 |
70 | Most likely yes. Check which resource scheduling system your cluster uses (probably either
71 | SLURM/PBSPro/SGE). If you're not sure, ask your system administrators or open an issue
72 |
73 | **Can I use VASP with it?**
74 |
75 | You cannot automate VASP calculations with it, but in 83% of cases there is either no need
76 | to use VASP, or it's very easy to quickly perform the VASP part manually, outside of psiflow,
77 | and do everything else (data generation, ML potential training, sampling) with psiflow.
78 | Open an issue if you're not sure how to do this.
79 |
80 | **I would like to have feature X**
81 |
82 | Psiflow is continuously in development; if you're missing a feature feel free to open an
83 | issue or pull request!
84 |
85 | **I have a bug. Where is my error message and how do I solve it?**
86 |
87 | Psiflow covers essentially all major aspects of computational molecular simulation (most
88 | notably including the executation and parallelization), so there's bound to be some bug
89 | once in a while. Debugging can be challenging, and we recommend to follow the following steps in
90 | order:
91 |
92 | 1. Check the stderr/stdout of the main Python process (i.e. the `python main.py
93 | config.yaml` one). See if there are any clues. If it has contents which you don't
94 | understand, open an issue. If there's seemingly nothing there, go to step 2.
95 | 2. Check Parsl's log file. This can be found in the current working directory, under
96 | `psiflow_internal/parsl.log`. If it's a long file, search for any errors using `Error`
97 | or `ERROR`. If you find anything suspicious but do not know how to solve it,
98 | open an issue.
99 | 3. Check the output files of individual ML training, QM singlepoints, or i-PI molecular
100 | dynamics runs. These can be found under `psiflow_internal/000/task_logs/*`.
101 | Again, if you find an error but do not exactly know why it happens or how to solve it,
102 | feel free to open an issue. Most likely, it will be useful to other people as well
103 | 4. Check the actual 'jobscripts' that were generated and which were submitted to the
104 | cluster. Quite often, there can be a spelling mistake in e.g. the compute project you
105 | are using, or you are requesting a resource on a partition that is not available.
106 | These jobscripts (and their output and error) can be found under
107 | `psiflow_internal/000/submit_scripts/`.
108 |
109 | **Where do these container images come from?**
110 |
111 | They were generated using Docker based on the recipes in this repository, and were then
112 | converted to `.sif` format using `apptainer`
113 |
114 | **Can I run psiflow locally for small runs or debug purposes?**
115 |
116 | Of course! If you do not provide a `config.yaml`, psiflow will just use your local
117 | workstation for its execution. See e.g. [this](https://github.com/molmod/psiflow/blob/main/configs/threadpool.yaml) or [this](https://github.com/molmod/psiflow/blob/main/configs/wq.yaml) config used for testing.
118 |
--------------------------------------------------------------------------------
/build_containers.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e # exit upon failure
4 |
5 | if [ "$EUID" -ne 0 ]; then
6 | echo "Please run this script as root or with sudo."
7 | exit 1
8 | fi
9 |
10 | # Initialize flags
11 | psiflow=false
12 | gpaw=false
13 | cp2k=false
14 | build_sif=false
15 | #mpi=mpich
16 |
17 | # Parse command line options
18 | while [[ $# -gt 0 ]]; do
19 | case "$1" in
20 | --gpaw)
21 | gpaw=true
22 | shift # Shift to next argument
23 | ;;
24 | --cp2k)
25 | cp2k=true
26 | shift
27 | ;;
28 | --psiflow)
29 | psiflow=true
30 | shift
31 | ;;
32 | --build_sif)
33 | build_sif=true
34 | shift
35 | ;;
36 | *)
37 | echo "Unknown option: $1"
38 | exit 1
39 | ;;
40 | esac
41 | done
42 |
43 | PSIFLOW_VERSION="v4.0.0"
44 | CCTOOLS_VERSION=7.14.0
45 | PLUMED_VERSION=2.9.0
46 | GPU_LIBRARIES=("rocm6.2" "cu118")
47 |
48 | # build model
49 | if [ "$psiflow" = "true" ]; then
50 | for GPU_LIBRARY in "${GPU_LIBRARIES[@]}"; do
51 | TAG="psiflow:${PSIFLOW_VERSION}_${GPU_LIBRARY}"
52 | docker build \
53 | --build-arg GPU_LIBRARY=${GPU_LIBRARY} \
54 | --build-arg PARSL_VERSION=$PARSL_VERSION \
55 | --build-arg PSIFLOW_VERSION=$PSIFLOW_VERSION \
56 | --build-arg CCTOOLS_VERSION=$CCTOOLS_VERSION \
57 | --build-arg PLUMED_VERSION=$PLUMED_VERSION \
58 | --build-arg DATE=$(date +%s) \
59 | -t ghcr.io/molmod/$TAG \
60 | -f Dockerfile . # test
61 | if [ "$build_sif" = "true" ]; then
62 | export TMPDIR=$(pwd)/tmp
63 | mkdir -p $TMPDIR
64 | apptainer build -F $TAG.sif docker-daemon:ghcr.io/molmod/$TAG
65 | apptainer push $TAG.sif oras://ghcr.io/molmod/$TAG
66 | rm $TAG.sif
67 | rm -rf $TMPDIR
68 | fi
69 | done
70 | fi
71 |
72 | if [ "$cp2k" = "true" ]; then
73 | TAG="cp2k:2024.1"
74 | docker build \
75 | -t ghcr.io/molmod/$TAG \
76 | -f Dockerfile.cp2k .
77 | if [ "$build_sif" = "true" ]; then
78 | apptainer build -F $TAG.sif docker-daemon:ghcr.io/molmod/$TAG
79 | apptainer push $TAG.sif oras://ghcr.io/molmod/$TAG
80 | rm $TAG.sif
81 | fi
82 | fi
83 |
84 | if [ "$gpaw" = "true" ]; then
85 | TAG="gpaw:24.1"
86 | sudo docker build \
87 | --build-arg PSIFLOW_VERSION=$PSIFLOW_VERSION \
88 | -t ghcr.io/molmod/$TAG \
89 | -f Dockerfile.gpaw .
90 | if [ "$build_sif" = "true" ]; then
91 | apptainer build -F $TAG.sif docker-daemon:ghcr.io/molmod/$TAG
92 | apptainer push $TAG.sif oras://ghcr.io/molmod/$TAG
93 | rm $TAG.sif
94 | fi
95 | fi
96 |
--------------------------------------------------------------------------------
/configs/hortense.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | parsl_log_level: WARNING
3 | container_engine: 'apptainer'
4 | container_uri: 'oras://ghcr.io/molmod/psiflow:4.0.0_cu118'
5 | default_threads: 8
6 | ModelEvaluation:
7 | cores_per_worker: 12
8 | gpu: True
9 | max_simulation_time: 20
10 | slurm:
11 | partition: "gpu_rome_a100"
12 | account: "2023_070"
13 | nodes_per_block: 1
14 | cores_per_node: 48
15 | max_blocks: 1
16 | walltime: "12:00:00"
17 | scheduler_options: "#SBATCH --clusters=dodrio\n#SBATCH --gpus=4\n"
18 | ModelTraining:
19 | cores_per_worker: 12
20 | gpu: true
21 | max_training_time: 40
22 | slurm:
23 | partition: "gpu_rome_a100"
24 | account: "2023_070"
25 | nodes_per_block: 1
26 | cores_per_node: 12
27 | max_blocks: 1
28 | walltime: "12:00:00"
29 | scheduler_options: "#SBATCH --clusters=dodrio\n#SBATCH --gpus=1\n"
30 | CP2K:
31 | cores_per_worker: 64
32 | max_evaluation_time: 30
33 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -np 32 -bind-to core cp2k.psmp'
34 | slurm:
35 | partition: "cpu_rome"
36 | account: "2024_079"
37 | nodes_per_block: 1
38 | cores_per_node: 64
39 | max_blocks: 25
40 | walltime: "06:00:00"
41 | scheduler_options: "#SBATCH --clusters=dodrio\n"
42 | ...
43 |
--------------------------------------------------------------------------------
/configs/lumi.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | parsl_log_level: WARNING
3 | container_engine: 'singularity'
4 | container_uri: 'oras://ghcr.io/molmod/psiflow:4.0.0_rocm6.2'
5 | default_threads: 8
6 | CP2K:
7 | cores_per_worker: 32
8 | max_evaluation_time: 20
9 | launch_command: 'singularity exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -np 32 cp2k.psmp'
10 | slurm:
11 | partition: "standard"
12 | account: "project_465001125"
13 | nodes_per_block: 1
14 | cores_per_node: 128
15 | max_blocks: 10
16 | walltime: "01:00:00"
17 | ModelEvaluation:
18 | cores_per_worker: 7
19 | gpu: True
20 | slurm:
21 | partition: "standard-g"
22 | account: "project_465001125"
23 | nodes_per_block: 1
24 | cores_per_node: 56
25 | max_blocks: 10
26 | walltime: "01:00:00"
27 | scheduler_options: "#SBATCH --gres=gpu:8\n"
28 | ModelTraining:
29 | cores_per_worker: 7
30 | gpu: true
31 | multigpu: true
32 | slurm:
33 | partition: "standard-g"
34 | account: "project_465001125"
35 | nodes_per_block: 1
36 | cores_per_node: 56
37 | walltime: "01:00:00"
38 | scheduler_options: "#SBATCH --gres=gpu:8\n"
39 | ...
40 |
--------------------------------------------------------------------------------
/configs/threadpool.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | parsl_log_level: WARNING
3 | retries: 0
4 | ModelEvaluation:
5 | gpu: false
6 | use_threadpool: true
7 | max_simulation_time: 0.4
8 | ModelTraining:
9 | gpu: true
10 | use_threadpool: true
11 | max_training_time: 1
12 | max_workers: 1 # suppress assertion for multigpu training
13 | CP2K:
14 | cores_per_worker: 2
15 | max_evaluation_time: 0.3
16 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -bind-to core -np 2 -env OMP_NUM_THREADS 1 cp2k.psmp'
17 | CP2K_container:
18 | cores_per_worker: 2
19 | max_evaluation_time: 0.3
20 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -bind-to core -np 2 -env OMP_NUM_THREADS 1 cp2k.psmp'
21 | GPAW:
22 | cores_per_worker: 2
23 | max_evaluation_time: 0.3
24 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/gpaw:24.1 /opt/entry.sh mpirun -np 2 gpaw python /opt/run_gpaw.py'
25 | GPAW_container:
26 | cores_per_worker: 2
27 | max_evaluation_time: 0.3
28 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/gpaw:24.1 /opt/entry.sh mpirun -np 2 gpaw python /opt/run_gpaw.py'
29 | ...
30 |
--------------------------------------------------------------------------------
/configs/wq.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | parsl_log_level: WARNING
3 | default_threads: 4
4 | ModelEvaluation:
5 | cores_per_worker: 4
6 | gpu: True
7 | max_simulation_time: 0.4
8 | ModelTraining:
9 | cores_per_worker: 4
10 | gpu: true
11 | max_training_time: 1
12 | max_workers: 1
13 | CP2K:
14 | cores_per_worker: 2
15 | max_evaluation_time: 0.3
16 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2023.2 /opt/entry.sh mpirun -np 2 -x OMP_NUM_THREADS=1 cp2k.psmp'
17 | ...
18 |
--------------------------------------------------------------------------------
/docs/api_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/docs/api_example.png
--------------------------------------------------------------------------------
/docs/free_energy.md:
--------------------------------------------------------------------------------
1 | TODO
2 |
--------------------------------------------------------------------------------
/docs/hamiltonian.md:
--------------------------------------------------------------------------------
1 | In Born-Oppenheimer-based molecular simulation, atomic nuclei are treated as classical
2 | particles that are subject to *effective* interactions -- these are the result of the quantum
3 | mechanical behavior of the electrons. These interactions determine the interatomic forces
4 | which are used in a dynamic simulation to propagate the atomic positions from one timestep
5 | to the next.
6 | Traditionally, dynamic simulations required an explicit evaluation of these effective
7 | forces in terms of a quantum mechanical calculation (e.g. DFT(B)).
8 | Recently, it became clear that it is much more efficient to perform such simulations
9 | using a machine-learned representation of the interaction energy, i.e. an ML potential.
10 | The development and application of ML potentials throughout large simulation workflows is in
11 | fact one of the core applications of psiflow.
12 |
13 | The `Hamiltonian` class is used to represent any type of interaction potential.
14 | Examples are pre-trained, 'universal' models (e.g. [MACE-MP0](https://arxiv.org/abs/2401.00096)),
15 | ML potentials trained within psiflow (see [ML potentials](model.md)), or a quadratic
16 | (hessian-based) approximation to a local energy minimum, to name a few.
17 | In addition, various sampling schemes employ bias potentials which are superimposed on the
18 | QM-based Born-Oppenheimer surface in order to drive the system
19 | along specific reaction coordinates (e.g. metadynamics, umbrella sampling).
20 | Such bias potentials are also instances of a `Hamiltonian`.
21 |
22 | By far the simplest hamiltonian is the Einstein crystal, which binds atoms to a certain
23 | reference position using harmonic springs with a single, fixed force constant.
24 |
25 | ```py
26 | from psiflow.geometry import Geometry
27 | from psiflow.hamiltonians import EinsteinCrystal
28 |
29 |
30 | # isolated H2 molecule
31 | geometry = Geometry.from_string('''
32 | 2
33 | H 0.0 0.0 0.0
34 | H 0.0 0.0 0.8
35 | ''')
36 |
37 | einstein = EinsteinCrystal(geometry, force_constant=0.1) # in eV/A**2
38 |
39 | ```
40 | As mentioned earlier, the key feature of hamiltonians is that they represent an interaction energy between atoms,
41 | i.e. they output an energy (and its gradients) when given a geometry as input.
42 | Because hamiltonians might require specialized resources for their evaluation (e.g. an ML
43 | potential which gets executed on a GPU), evaluation of a hamiltonian does not necessarily
44 | happen instantly (e.g. if a GPU node is not immediately available). Similar to how
45 | `Dataset` instances return futures of a `Geometry` when a particular index is queried,
46 | hamiltonians return a future when asked to evaluate the energy/forces/stress of a
47 | particular `Geometry`:
48 |
49 | ```py
50 | energy = einstein.compute(geometry, 'energy') # AppFuture of an energy (np.ndarray with shape (1,))
51 | print(energy.result()) # wait for the result to complete, and print it (in eV)
52 |
53 |
54 | data = Dataset.load('snapshots.xyz') # N snapshots
55 | energy, forces, stress = einstein.compute(data) # returns energy and gradients for each snapshot in data
56 |
57 |
58 | assert energy.result().shape == (N,) # one energy per snapshot
59 | assert forces.result().shape == (N, max_natoms, 3) # forces for each snapshot, with padded natoms
60 | assert stress.result().shape == (N, 3, 3) # stress; filled with NaNs if not applicable
61 | ```
62 | Aside from a dataset or a geometry, `compute` takes the following keyword arguments:
63 |
64 | - **outputs**: (type `str` or `list[str]`]): determines which properties to compute and
65 | return. Accepts both a single property name (`'energy'`, `'forces'`, or `'stress'`) or a list
66 | of properties (e.g. `['energy', 'forces', 'stress']`.
67 | - **batch_size**: (type `int`): splits the calculation into batches of this size. For
68 | expensive models and/or large datasets, it makes sense to pick a smaller batch size such
69 | that the calculation is parallelized over a large number of resources. For a very simple
70 | calculation (e.g. the einstein crystal), it is faster to pick a larger batch size in
71 | order to reduce overhead due to batching. Its default value is 100.
72 |
73 | A particularly important hamiltonian is MACE, one of the most ubiquitous ML potentials.
74 | The MACE community has developed a few foundation models (MACE-MP) which are readily applicable to
75 | virtually any molecule or material:
76 |
77 | ```py
78 | from psiflow.hamiltonians import MACEHamiltonian
79 |
80 |
81 | mace = MACEHamiltonian.mace_mp0() # downloads MACE-MP0 from github
82 | forces = mace.compute(geometry, 'forces') # evaluates the MACE potential on the geometry
83 |
84 | forces = forces.result() # wait for evaluation to complete and get actual value
85 |
86 | assert np.sum(np.dot(forces[0], forces[1])) < 0 # forces in H2 always point opposite of each other
87 |
88 | assert np.allclose(np.sum(forces, axis=0), 0.0) # forces are conservative --> sum to [0, 0, 0]
89 | ```
90 | A unique feature of psiflow `Hamiltonian` instances is the ability to create a new
91 | hamiltonian from a linear combination of two or more existing hamiltonians.
92 | This is relevant for many types of free energy calculations and/or enhanced sampling
93 | techniques, including umbrella sampling, Hamiltonian replica exchange, or thermodynamic
94 | integration.
95 | Let us consider the particular example of [umbrella
96 | sampling](https://wires.onlinelibrary.wiley.com/doi/10.1002/wcms.66).
97 | As activated event, we consider the decay of vinyl alcohol to acetaldehyde,
98 | which consists of a proton jump from the oxygen to the opposite carbon:
99 |
100 |
101 | { width="500" }
102 | Transformation of vinyl alcohol into acetaldehyde by means of a proton jump.
103 | A reaction coordinate is constructed based on the distance of hydrogen with respect to
104 | oxygen and with respect to carbon.
105 |
106 |
107 | The harmonic restraint is implemented and evaluated via [PLUMED](https://www.plumed.org/).
108 | In psiflow, this can be done by passing a plumed input string which describes the bias
109 | potential into a `PlumedHamiltonian`.
110 |
111 | ```py
112 | from psiflow.hamiltonians import PlumedHamiltonian
113 |
114 | plumed_str = """UNITS LENGTH=A ENERGY=kj/mol
115 | d_C: DISTANCE ATOMS=3,5
116 | d_O: DISTANCE ATOMS=1,5
117 | CV: COMBINE ARG=d_C,d_O COEFFICIENTS=1,-1 PERIODIC=NO
118 | RESTRAINT ARG=CV KAPPA=1500 AT=0.0
119 | """
120 |
121 | bias = PlumedHamiltonian(plumed_str)
122 |
123 | ```
124 | To add this contribution to our MACE potential, we simply sum both hamiltonians:
125 |
126 | ```py
127 | potential = mace + bias
128 |
129 | # double check
130 | alcohol = Geometry.load('vinyl_alcohol.xyz')
131 | total_energy = potential.compute(alcohol, 'energy')
132 | mace_energy = mace.compute(alcohol, 'energy')
133 | bias_energy = bias.compute(alcohol, 'energy')
134 |
135 | assert np.allclose(
136 | total_energy.result(),
137 | mace_energy.result() + bias_energy.result(),
138 | )
139 | ```
140 |
141 | Aside from bias potentials, the combination of multiple hamiltonians is also employed in
142 | e.g. the calculation of anharmonic free energy corrections.
143 | In that case, we consider a "base" potential energy surface which is described by a
144 | general quadratic function (i.e. a 3Nx3N hessian matrix and a minimum-energy geometry)
145 | and a small perturbation which describes the difference between the quadratic
146 | function and the fully anharmonic potential.
147 | The following code snippet demonstrates the construction of mixtures of the two energy
148 | surfaces:
149 | ```py
150 | # hessian computed via geometry optimization and finite differences
151 | # see sampling section
152 | type(hessian) # np.ndarray
153 | hessian.shape # (3n, 3n)
154 | type(minimum) # Geometry
155 | len(minimum) # n
156 |
157 | harmonic = Harmonic(minimum, hessian) # create quadratic hessian; x.T @ H @ x / 2
158 | delta = mace - harmonic
159 |
160 | hamiltonanians = [] # linear intepolation between quadratic and MACE PES, in 10 steps
161 | for scale in np.linspace(0, 1, 10):
162 | hamiltonians.append(hessian + scale * delta)
163 |
164 | ```
165 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | hide:
3 | - toc
4 | ---
5 |
6 | # **psiflow** - scalable molecular simulation
7 |
8 |
9 | Psiflow is a scalable molecular simulation engine for chemistry and materials science applications.
10 | It supports:
11 |
12 | - **quantum mechanical calculations** at various levels of theory (GGA and hybrid DFT, post-HF methods such as MP2 or RPA, and even coupled cluster; using CP2K | GPAW | ORCA)
13 |
14 | - **trainable interaction potentials** as well as easy-to-use universal potentials, e.g. [MACE-MP0](https://arxiv.org/abs/2401.00096)
15 | - a wide range of **sampling algorithms**: NVE | NVT | NPT, path-integral molecular dynamics, alchemical replica exchange, metadynamics, phonon-based sampling, thermodynamic integration; using [i-PI](https://ipi-code.org/),
16 | [PLUMED](https://www.plumed.org/), ...
17 |
18 | Users may define arbitrarily complex workflows and execute them **automatically** on local, HPC, and/or cloud infrastructure.
19 | To achieve this, psiflow is built using [Parsl](https://parsl-project.org/): a parallel execution library which manages job submission and workload distribution.
20 | As such, psiflow can orchestrate large molecular simulation pipelines on hundreds or even thousands of nodes.
21 |
22 |
23 | { width="500" }
24 |
25 |
26 | ---
27 |
28 |
29 | # FAQ
30 |
31 | **Where do I start?**
32 |
33 | Take a brief look at the [examples](https://github.com/molmod/psiflow/examples/) or walk
34 | through the
35 | [documentation](https://molmod.github.io/psiflow/data) to get an idea for psiflow's
36 | capabilities. Next, head over to the [setup & configuration](https://molmod.github.io/psiflow/configuration/) section of the docs to get started!
37 |
38 | **Is psiflow a workflow manager?**
39 |
40 | Absolutely not! Psiflow is a Python library which allows you to perform complex molecular simulations and scale them towards large numbers of compute nodes automatically.
41 | It does not have 'fixed' workflow recipes, it does not require you to set up 'databases'
42 | or 'server daemons'. The only thing it does is expose a concise and powerful API to
43 | perform arbitrarily complex calculations in a highly efficiently manner.
44 |
45 | **Is it compatible with my cluster?**
46 |
47 | Most likely yes. Check which resource scheduling system your cluster uses (probably either
48 | SLURM/PBSPro/SGE). If you're not sure, ask your system administrators or open an issue
49 |
50 | **Can I use VASP with it?**
51 |
52 | You cannot automate VASP calculations with it, but in 99% of cases there is either no need
53 | to use VASP, or it's very easy to quickly perform the VASP part manually, outside of psiflow,
54 | and do everything else (data generation, ML potential training, sampling) with psiflow.
55 | Open an issue if you're not sure how to do this.
56 |
57 | **I would like to have feature X**
58 |
59 | Psiflow is continuously in development; if you're missing a feature feel free to open an
60 | issue or pull request!
61 |
62 | **I have a bug. Where is my error message and how do I solve it?**
63 |
64 | Psiflow covers essentially all major aspects of computational molecular simulation (most
65 | notably including the executation and parallelization), so there's bound to be some bug
66 | once in a while. Debugging can be challenging, and we recommend to follow the following steps in
67 | order:
68 |
69 | 1. Check the stderr/stdout of the main Python process (i.e. the `python main.py
70 | config.yaml` one). See if there are any clues. If it has contents which you don't
71 | understand, open an issue. If there's seemingly nothing there, go to step 2.
72 | 2. Check Parsl's log file. This can be found in the current working directory, under
73 | `psiflow_internal/parsl.log`. If it's a long file, search for any errors using `Error`
74 | or `ERROR`. If you find anything suspicious but do not know how to solve it,
75 | open an issue.
76 | 3. Check the output files of individual ML training, QM singlepoints, or i-PI molecular
77 | dynamics runs. These can be found under `psiflow_internal/000/task_logs/*`.
78 | Again, if you find an error but do not exactly know why it happens or how to solve it,
79 | feel free to open an issue. Most likely, it will be useful to other people as well
80 | 4. Check the actual 'jobscripts' that were generated and which were submitted to the
81 | cluster. Quite often, there can be a spelling mistake in e.g. the compute project you
82 | are using, or you are requesting a resource on a partition that is not available.
83 | These jobscripts (and there output and error) can be found under
84 | `psiflow_internal/000/submit_scripts/`.
85 |
86 | **Where do these container images come from?**
87 |
88 | They were generated using Docker based on the recipes in this repository, and were then
89 | converted to `.sif` format using `apptainer`
90 |
91 | **Can I run psiflow locally for small runs or debug purposes?**
92 |
93 | Of course! If you do not provide a `config.yaml`, psiflow will just use your local
94 | workstation for its execution. See e.g. [this](https://github.com/molmod/psiflow/blob/main/configs/threadpool.yaml) or [this](https://github.com/molmod/psiflow/blob/main/configs/wq.yaml) config used for testing.
95 |
96 |
97 | !!! note "Citing psiflow"
98 |
99 | Psiflow is developed at the
100 | [Center for Molecular Modeling](https://molmod.ugent.be).
101 | If you use it in your research, please cite the following paper:
102 |
103 | Machine learning Potentials for Metal-Organic Frameworks using an
104 | Incremental Learning Approach,
105 | _Sander Vandenhaute et al._,
106 | [npj Computational Materials](https://www.nature.com/articles/s41524-023-00969-x),
107 | __9__, 19 __(2023)__
108 |
--------------------------------------------------------------------------------
/docs/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
4 | export MAMBA_ROOT_PREFIX=$(pwd) # optional, defaults to ~/micromamba
5 |
6 | eval "$(./bin/micromamba shell hook -s posix)"
7 | micromamba activate
8 | micromamba create -n _psiflow_env -y python=3.10 pip ndcctools=7.14.0 -c conda-forge
9 | micromamba activate _psiflow_env
10 | pip install git+https://github.com/molmod/psiflow.git@v4.0.0
11 |
12 | # create activate.sh
13 | echo 'ORIGDIR=$PWD' >>activate.sh # prevent variable substitution
14 | echo "cd $(pwd)" >>activate.sh
15 | echo "export MAMBA_ROOT_PREFIX=$(pwd)" >>activate.sh
16 | echo 'eval "$(./bin/micromamba shell hook -s posix)"' >>activate.sh
17 | echo "micromamba activate _psiflow_env" >>activate.sh
18 | echo 'cd $ORIGDIR' >>activate.sh # prevent variable substitution
19 |
--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
1 | # Installation
2 |
3 | Psiflow is designed as an end-to-end framework for developing interatomic potentials. As such, it has a number of dependencies
4 | which should be available in order to be able to perform all steps in the workflow. The following table groups
5 | the main dependencies according to how they are used in psiflow:
6 |
7 |
8 |
9 | | category | name | version | uses GPU | uses MPI |
10 | | -------------------- | -------- | ------- | :---------------: | :--------: |
11 | | **QM evaluation** | CP2K | >= 2023.1 | | :material-check: |
12 | | | PySCF | >=2.4 | | |
13 | | **trainable potentials** | MACE | 0.2.0 | :material-check: |
14 | | | NequIP | 0.5.6 | :material-check: |
15 | | | Allegro | 0.2.0 | :material-check: |
16 | | **molecular dynamics**| OpenMM | 8.0 | :material-check: |
17 | | | PLUMED | 2.9.0 | |
18 | | | YAFF | 1.6.0 | |
19 | | **miscellaneous** | Parsl | 2024.02.12 | |
20 | | | e3nn | 0.4.4 | :material-check: |
21 | | | PyTorch | 1.13.1 | :material-check: |
22 | | | ASE | >=3.22.1 | |
23 | | | wandb | 0.15.8 | |
24 | | | Python | 3.10, 3.11 | |
25 |
26 |
27 |
28 | ## Containerized
29 | To alleviate users from having to go through all of the installation
30 | shenanigans, psiflow provides a convenient portable entity which bundles all of the above
31 | dependencies -- a container image!
32 | Whether you're executing your calculations on a high-memory node in a cluster
33 | or using a GPU from a Google Cloud instance, all that is required is a working
34 | container engine and you're good to go.
35 | The vast majority of HPCs and cloud computing providers support containerized execution,
36 | using engines like [Apptainer/Singularity](https://apptainer.org/),
37 | [Shifter](https://docs.nersc.gov/development/shifter/how-to-use/),
38 | or [Docker](https://www.docker.com/).
39 | These engines are also very easily installed on your local workstation, which facilitates
40 | local debugging.
41 |
42 | Besides a container engine, it's necessary to install a standalone Python environment
43 | which needs to take care of possible job submissions and input/output writing.
44 | Since the actual calculations are performed inside the container, the standalone
45 | Python environment requires barely anything, and is straightforward to install
46 | using `micromamba` -- a blazingly fast drop-in replacement for `conda`:
47 |
48 | ```console
49 | micromamba create -n psiflow_env -c conda-forge -y python=3.10
50 | micromamba activate psiflow_env
51 | pip install parsl==2023.10.23 git+https://github.com/molmod/psiflow
52 | ```
53 | That's it! Before running actual calculations, it is still necessary to set up Parsl
54 | to use the compute resources you have at your disposal -- whether it's a local GPU,
55 | a SLURM cluster, or a cloud computing provider; check out the
56 | [Execution](execution.md) page for more details.
57 |
58 | !!! note "Containers 101"
59 |
60 | Apptainer -- now the most widely used container system for HPCs -- is part of the
61 | Linux Foundation. It is easy to set up on most Linux distributions, as explained in the [Apptainer documentation](https://apptainer.org/docs/admin/main/installation.html#install-ubuntu-packages).
62 |
63 | Psiflow's containers are hosted on the GitHub Container Registry (GHCR), for both Python 3.9 and 3.10.
64 | To download and run commands in them, simply execute:
65 |
66 | ```console
67 | # show available pip packages
68 | apptainer exec oras://ghcr.io/molmod/psiflow:3.0.0_python3.9_cuda /usr/local/bin/entry.sh pip list
69 |
70 | # inspect cp2k version
71 | apptainer exec oras://ghcr.io/molmod/psiflow:3.0.0_python3.9_cuda /usr/local/bin/entry.sh cp2k.pmsp --version
72 | ```
73 |
74 | Internally, Apptainer will store the container in a local cache directory such that it does not have to
75 | redownload it every time a command gets executed. Usually, it's a good idea to manually change the location
76 | of these cache directories since they can end up clogging your `$HOME$` directory quite quickly.
77 | To do this, simply put the following lines in your `.bashrc`:
78 |
79 | ```console
80 |
81 | export APPTAINER_CACHEDIR=/some/dir/on/local/scratch/apptainer_cache
82 | ```
83 |
84 | If your compute resources use SingularityCE instead of Apptainer,
85 | replace 'APPTAINER' with 'SINGULARITY' in the environment variable names.
86 |
87 | !!! note "Weights & Biases"
88 | To ensure psiflow can communicate its data to [W&B](https://wandb.ai), add
89 |
90 | ```console
91 | export WANDB_API_KEY=
92 | ```
93 | to your `.bashrc`.
94 |
95 | !!! note "AMD GPU support"
96 |
97 | As the name of the container suggests, GPU acceleration for PyTorch models in OpenMM
98 | is currently only available for Nvidia GPUs because the compatibility of conda/mamba
99 | with AMD GPUs (HIP) is not great at the moment. If you really must use AMD GPUs
100 | in psiflow, you'll have to manually create a separate Python environment with a ROCm-enabled
101 | PyTorch for training, and the regular containerized setup for CPU-only
102 | molecular dynamics with OpenMM.
103 |
104 | A ROCm-compatible PyTorch can be installed using the following command:
105 | ```console
106 | pip install --force torch==1.13.1 --index-url https://download.pytorch.org/whl/rocm5.2
107 | ```
108 |
109 |
110 | ## Manual
111 | While a containerized setup guarantees reproducibility and is faster to install,
112 | a fully manual setup of Psiflow and its dependencies provides the user with full control
113 | over software versions or compiler flags.
114 | While this is not really necessary in the vast majority of cases, we mention for completeness
115 | the following manual setup using `micromamba`:
116 | ```console
117 | CONDA_OVERRIDE_CUDA="11.8" micromamba create -p ./psiflow_env -y -c conda-forge \
118 | python=3.10 pip \
119 | openmm-plumed openmm-torch pytorch=1.13.1=cuda* \
120 | nwchem py-plumed cp2k && \
121 | micromamba clean -af --yes
122 | pip install cython==0.29.36 matscipy prettytable && \
123 | pip install git+https://github.com/molmod/molmod && \
124 | pip install git+https://github.com/molmod/yaff && \
125 | pip install e3nn==0.4.4
126 | pip install numpy ase tqdm pyyaml 'torch-runstats>=0.2.0' 'torch-ema>=0.3.0' mdtraj tables
127 | pip install git+https://github.com/acesuit/MACE.git@55f7411 && \
128 | pip install git+https://github.com/mir-group/nequip.git@develop --no-deps && \
129 | pip install git+https://github.com/mir-group/allegro --no-deps && \
130 | pip install git+https://github.com/svandenhaute/openmm-ml.git@triclinic
131 | pip install 'psiflow[parsl] @ git+https://github.com/molmod/psiflow'
132 | ```
133 | This is mostly a copy-paste from psiflow's [Dockerfiles](https://github.com/molmod/psiflow/blob/main/container).
134 |
--------------------------------------------------------------------------------
/docs/learning.md:
--------------------------------------------------------------------------------
1 | Psiflow allows for the seamless development and scalable
2 | execution of online learning algorithms for ML potentials.
3 | The `Learning` class provides an interface based on which such
4 | algorithms can be implemented.
5 | They keep track of the generated data, error metrics, optional [Weights &
6 | Biases](https://wandb.ai) logging, and provide basic restart functionalities in case
7 | something goes wrong.
8 | Learning objects are instantiated using the following arguments:
9 |
10 | - **reference** (type `Reference`): the `Reference` instance which will be used to
11 | evaluate ground-truth energy/force labels for each of the samples generated.
12 | - **path_output** (type `str | Path`): the location to a folder in which intermediate
13 | models, datasets, walker states, and restart files can be saved.
14 | - **train_valid_split** (type `float`): fraction of generated data which should be used
15 | for the training set (as opposed to validation).
16 | - **error_thresholds_for_reset** (type `list[Optional[float]]`): during online learning,
17 | it is not uncommon to have walkers explore unphysical regions in phase space due to
18 | irregularities in the intermediate potential, excessive temperatures/pressures, ...
19 | In those cases, it is beneficial to reset walkers to their starting configurations, of
20 | which it is known to be a physically sound starting point. The decision to reset walkers
21 | is made every time the 'exact' energy and forces have been computed from a sampled
22 | state. If the error between the corresponding walker's model (i.e. the previous model)
23 | and the QM-evaluated energy and forces exceeds a certain threshold (both on energies and
24 | forces), the walker is reset.
25 | This argument expects a list of length two (threshold on energy error, and threshold on
26 | force error), with optional `None` values if no reset is desired.
27 | For example: `[None, 0.1]` indicates to reset whenever the force RMSE exceeds 100 meV/A,
28 | and ignore any energy discrepancy.
29 | - **error_thresholds_for_discard** (type `list[Optional[float]]`): states which are
30 | entirely unphysical do not contribute to the accuracy of the model, and sometimes even
31 | hinder proper training. If these error thresholds are exceeded, the state is discarded and the walker is reset.
32 | - **wandb_group** (type `str`): if specified, the computed dataset metrics will be logged
33 | to Weights & Biases in the corresponding group of runs for easy visual analysis.
34 | - **wandb_project** (type `str`): if specified, the computed dataset metrics will be logged
35 | to Weights & Biases in the corresponding project for easy visual analysis.
36 | - **initial_data** (type `Dataset`): existing, labeled data from which the learning can be
37 | bootstrapped. Note that *all* states in this dataset must be labeled, and that this is
38 | only sensible if the labeling agrees with the given Reference instance. (Same level of
39 | theory, same basis set, grid settings, ... ).
40 |
41 |
42 |
43 | { width="900" }
44 | Illustration of what the Weights & biases logging looks like.
45 | The graph on top simply shows the force RMSE on each data point versus a unique
46 | 'identifier' per data point. The bottom plot shows the same data points, but now
47 | grouped according to which walker generated them. In this case, walkers were sorted
48 | according to temperature (lower walker index were lower temperature), and this is seen
49 | in the fact that walkers with a higher index generated data with on average higher errors,
50 | as they explored more out-of-equilibrium configurations.
51 |
52 |
53 |
54 | The core business of a `Learning` instance is the following sequence of operations:
55 |
56 | 1. use walkers in a `sample()` call to generate atomic geometries
57 | 2. evaluate those atomic geometries with the provided reference to obtain QM energy and
58 | forces
59 | 3. include those geometries to the training data, or discard them if they exceed
60 | `error_thresholds_for_discard`. Reset walkers if they exceed
61 | `error_thresholds_for_reset`.
62 | 4. Train the model using the new data.
63 | 5. Compute metrics for the trained model across the new dataset and optionally log them to
64 | W&B.
65 |
66 | Currently, there are two variants of this implemented: passive and active learning.
67 |
68 | ## passive learning
69 |
70 | During passive learning, walkers are propagated using an external and 'fixed' Hamiltonian
71 | which is not trained at any point (e.g. a pre-trained universal potential or a
72 | hessian-based Hamiltonian).
73 |
74 | ```py
75 | model, walkers = learning.passive_learning(
76 | model,
77 | walkers,
78 | hamiltonian=MACEHamiltonian.mace_mp0(), # fixed hamiltonian
79 | steps=20000,
80 | step=2000,
81 | **optional_sampling_kwargs,
82 | )
83 | ```
84 | Walkers are propagated for a total of 20,000 steps, and samples are drawn every 2,000
85 | steps which are QM evaluated by the reference and added to the training data.
86 | If the walkers contain bias contributions, their total hamiltonian is simply the sum of
87 | the existing bias contributions and the hamiltonian given to the `passive_learning()`
88 | call.
89 | Additional keyword arguments to this function are passed directly into the sample function (e.g. for
90 | specifying the log level or the center-of-mass behavior).
91 |
92 | The returned model is the one trained on all data generated in the `passive_learning()` call as well as all data which was already present in the learning instance (for example if it had been initialized with `initial_data`, see above).
93 | The returned walkers are identical to the ones passed into the method, but this is done to
94 | emphasize that internally, they do change due to calling `passive_learning` (because they
95 | are either propagated or reset, or their metadynamics bias has changed because there are
96 | more hills present than before).
97 |
98 | ## active learning
99 |
100 | During active learning, walkers are propagated with a Hamiltonian generated using the
101 | current model. They are propagated for a given number of steps after which their final
102 | state is passed into the reference for correct labeling.
103 | Different from passive learning, active learning *does not allow for subsampling of the
104 | trajectories of the walkers*. The idea behind this is that if you wish to propagate the
105 | walker for 10 ps, and sample a structure every 1 ps to let each walker generate 10 states,
106 | it is likely much better to instead increase the number of walkers (to cover more regions
107 | in phase space) and propagate them in steps of 1 ps. Active learning is ideally suited for
108 | massively parallel workflows (maximal number of walkers, with minimal sampling time per
109 | walker) and we encourage users to exploit this.
110 |
111 | ```py
112 | model, walkers = learning.active_learning(
113 | model, # used to generate hamiltonian
114 | walkers,
115 | steps=2000, # no more 'step' argument!
116 | **optional_sampling_kwargs,
117 | )
118 | ```
119 | ## restarting a run
120 |
121 | `Learning` has first-class support for restarted runs -- simply resubmit your calculation!
122 | It will detect whether or not the corresponding output folder has already fully logged the
123 | each of the iterations, and if so, load the final state of the model, the walkers, and the
124 | learning instance without actually doing any calculations.
125 |
--------------------------------------------------------------------------------
/docs/logo_dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/docs/logo_dark.png
--------------------------------------------------------------------------------
/docs/logo_light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/docs/logo_light.png
--------------------------------------------------------------------------------
/docs/models.md:
--------------------------------------------------------------------------------
1 | Once we know how to represent datasets of atomic geometries and label with them with
2 | target QM energy and force values, we can start defining and training ML potentials.
3 | Psiflow defines an abstract `Model` interface which each
4 | particular ML potential should subclass, though at the moment only
5 | [MACE](https://github.com/acesuit/mace) is implemented.
6 | In addition, psiflow provides configuration dataclasses for each model with
7 | reasonable defaults.
8 |
9 | A `Model` has essentially three methods:
10 |
11 | - `initialize`: compute energy shifts and scalings as well as the average number
12 | of neighbors (and any other network normalization metrics) using a given *training* dataset,
13 | and initialize model weights.
14 | - `train`: train the parameters of a model using two separate datasets, one for
15 | actual training and one for validation. The current model parameters are used as
16 | starting parameters for the training
17 | - `create_hamiltonian`: spawn a hamiltonian in order to use the model with its current
18 | weights in molecular dynamics simulations
19 |
20 | The following is a minimal illustration:
21 | ```py
22 | from psiflow.data import Dataset
23 | from psiflow.models import MACE
24 |
25 |
26 | # load data with energy and force labels included as extxyz
27 | train, valid = Dataset.load('all_data.xyz').split(0.9, shuffle=True)
28 |
29 | model = MACE( # for full arg list, see psiflow/models/_mace:MACEConfig
30 | num_channels=16,
31 | max_L=2,
32 | max_num_epochs=400,
33 | batch_size=16,
34 | )
35 |
36 | # initialize, train
37 | model.initialize(train) # this will calculate the scale/shifts, and average number of neighbors
38 | model.train(train, valid) # train using supplied datasets
39 |
40 | model.save('./') # saves model and config to current working directory!
41 |
42 | hamiltonian = model.create_hamiltonian()
43 | forces_pred = hamiltonian.compute(valid, 'forces')
44 | forces_target = valid.get('forces')
45 |
46 | rmse = compute_rmse(forces_pred, forces_target) # this is a Future!
47 | print('forces RMSE: {} eV/A'.format(rmse.result()))
48 |
49 | ```
50 | Note that `model.save()` will save both a `.yaml` file with all hyperparameters as well as the actual `.pth` model which is needed to reconstruct the corresponding PyTorch module (possibly outside of psiflow if needed).
51 | As such, it expects a directory as argument (which may either already exist or will be
52 | created).
53 |
54 | In many cases, it is generally recommended to provide these models with some estimate of the absolute energy of an isolated
55 | atom for the specific level of theory and basis set considered (and this for each element).
56 | Instead of having the model learn the *absolute* total energy of the system, we first subtract these atomic energies in order
57 | to train the model on the *formation* energy of the system instead, as this generally improves the generalization performance
58 | of the model towards unseen stoichiometries.
59 |
60 | ```py
61 | model.add_atomic_energy('H', -13.7) # add atomic energy of isolated hydrogen atom
62 | model.initialize(some_training_data)
63 |
64 | model.add_atomic_energy('O', -400) # will raise an exception; model needs to be reinitialized first
65 | model.reset() # removes current model, but keeps raw config
66 | model.add_atomic_energy('O', -400) # OK!
67 | model.initialize(some_training_data) # offsets total energy with given atomic energy values per atom
68 |
69 | ```
70 | Whenever atomic energies are available, `Model` instances will automatically offset the potential energy in a (labeled)
71 | `Dataset` by the sum of the energies of the isolated atoms; the underlying PyTorch network is then initialized/trained
72 | on the formation energy of the system instead.
73 | In order to avoid artificially high energy discrepancies between models trained on the formation energy on one hand,
74 | and reference potential energies as obtained from any `BaseReference`,
75 | the `evaluate` method will first perform the converse operation, i.e. add the energies of the isolated atoms
76 | to the model's prediction of the formation energy.
77 | Similarly, `create_hamiltonian()` also passes any atomic energies which were added to the
78 | model.
79 |
--------------------------------------------------------------------------------
/docs/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/docs/overview.png
--------------------------------------------------------------------------------
/docs/reference.md:
--------------------------------------------------------------------------------
1 | # QM calculations
2 | The energy and gradients of the ground-state Born-Oppenheimer surface can be obtained
3 | using varying levels of approximation.
4 | In psiflow, the calculation of the energy and its gradients can be performed for both
5 | `Geometry` and `Dataset` instances, using different software packages:
6 |
7 | - **CP2K** (periodic, mixed PW/lcao): very fast, and very useful for pretty much any periodic
8 | structure. Its forces tend to be quite noisy with the default grid settings so some
9 | level of caution is advised. Also, even though it uses both plane waves and atomic basis
10 | sets, it does suffer from BSSE.
11 | - **GPAW** (periodic/cluster, PW/lcao/grid): slower but more numerically stable than CP2K;
12 | essentially a fully open-source (and therefore transparant), free, and well-tested
13 | alternative to VASP. Particularly useful for applications in which BSSE is a concern
14 | (e.g. adsorption).
15 | - **ORCA** (cluster, lcao): useful for accurate high-level quantum chemistry calculations,
16 | e.g. MP2 and CCSD(T). *TODO*
17 |
18 | !!! note "Installation"
19 | Because the 'correct' compilation and installation of quantum chemistry software is
20 | notoriously cumbersome, we host separate container images for each of the packages
21 | on Github, which are ready to use with psiflow on HPCs with either a Singularity
22 | or Apptainer container runtime. The Docker files used to generate those images are
23 | available in the respository;
24 | [CP2K](https://github.com/molmod/psiflow/blob/main/Dockerfile.cp2k) or
25 | [GPAW](https://github.com/molmod/psiflow/blob/main/Dockerfile.gpaw).
26 | See the [configuration](configuration.md) section for more details.
27 |
28 | For each software package, psiflow provides a corresponding class which implements
29 | the appropriate input file manipulations, launch commands, and output parsing
30 | functionalities.
31 | They all inherit from the `Reference` base class, which provides a few key
32 | functionalities:
33 |
34 | - `data.evaluate(reference)`: this is the most common operation involving QM calculations;
35 | given a `Dataset` of atomic geometries, compute the energy and its gradients and insert
36 | them into the dataset such that they are saved for future reuse.
37 | - `reference.compute_atomic_energy`: provides the ability to compute isolated atom
38 | reference energies, as this facilitates ML potential training to datasets with varying
39 | number of atoms.
40 | - `reference.compute(data)`: this is somewhat equivalent to the hamiltonian `compute`
41 | method, except that its argument `data` must be a `Dataset` instance, and the optional
42 | `batch_size` defaults to 1 (in order to maximize parallelization). It does not insert
43 | the computed properties into the data, but returns them as numpy arrays.
44 |
45 | From a distance, QM reference objects look almost identical to hamiltonians, in the sense
46 | that they both take atomic geometries as input and return energies and gradients as
47 | output. The (imposed) distinction between both can be summarized in the following points.
48 |
49 | - hamiltonians can compute energies and forces for pretty much *any* structure. There is
50 | no reason they would fail. QM calculations on the other hand can fail due to unconverged
51 | SCF cycles and/or time limit constraints. In fact, this happens relatively often when performing
52 | active learning workflows. Reference objects take this into account by returning a unique
53 | `NullState` whenever a calculation has failed.
54 | - hamiltonians are orders of magnitude faster, and can be employed in meaningfully long
55 | molecular dynamics simulations. This is not the case for QM calculations. As such, they
56 | cannot be used in combination with walker sampling or geometry optimizations. If the
57 | purpose is to perform molecular simulation at the DFT level, then the better approach is
58 | to train an ML potential to any desired level of accuracy (almost always possible in
59 | psiflow) and use that as proxy for the QM interaction energy.
60 | For the same reason, the default batch size for `reference.compute` calls is 1, i.e.
61 | each the QM calculation for each structure in the dataset is immediately scheduled
62 | independently from the other ones.
63 | With hamiltonians, that batch size defaults to 100 (split data in chunks of 100 and
64 | evaluate each set of 100 states serially).
65 |
66 |
67 | ## CP2K 2024.1
68 | A `CP2K` reference instance can be created based on a (multiline) input string.
69 | Only the `FORCE_EVAL` section of the input is important since the atomic coordinates and cell
70 | parameters are automatically inserted for every calculation.
71 | All basis set, pseudopotential, and D3 parameters from the official
72 | [CP2K repository](https://github.com/cp2k/cp2k) are directly available in the
73 | container image (i.e. no need to download or provide these files separately).
74 | Choose which one you would like to use by using the corresponding filename in the input
75 | file (i.e. omit any preceding filepaths).
76 | A typical [input file](https://github.com/molmod/psiflow/blob/main/examples/data/cp2k_input.txt)
77 | is provided in the [examples](https://github.com/molmod/psiflow/tree/main/examples).
78 |
79 | ```py
80 | from psiflow.reference import CP2K
81 |
82 |
83 | # create reference instance
84 | with open('cp2k_input.txt', 'r') as f:
85 | force_eval_input_str = f.read()
86 | cp2k = CP2K(force_eval_input_str)
87 |
88 | # compute energy and forces, and store them in the geometries
89 | evaluated_data = data.evaluate(cp2k)
90 |
91 | for geometry in evaluated_data.geometries().result():
92 | print('energy: {} eV'.format(geometry.energy))
93 | print('forces: {} eV/A'.format(geometry.per_atom.forces))
94 |
95 | ```
96 |
97 | ## GPAW 24.1
98 | a `GPAW` reference is created in much the same way as a traditional `GPAW` 'calculator'
99 | instance, with support for entirely the same keyword arguments:
100 | ```py
101 | from psiflow.reference import GPAW
102 |
103 | gpaw = GPAW(mode='fd', nbands=0, xc='PBE') # see GPAW calculator on gitlab for full list
104 | energies = gpaw.compute(data, 'energy')
105 |
106 | ```
107 | A notable feature from GPAW is that it already outputs all energies as formation energies,
108 | i.e. it internally subtracts the sum of the energies of the isolated atoms. As such, the
109 | `compute_atomic_energy` for a GPAW reference always just returns 0 eV.
110 |
111 | ## ORCA
112 | TODO
113 |
--------------------------------------------------------------------------------
/docs/wandb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/docs/wandb.png
--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | - [Replica exchange molecular dynamics](https://github.com/molmod/psiflow/tree/main/examples/alanine_replica_exchange.py) | **alanine dipeptide**: replica exchange molecular dynamics simulation of alanine dipeptide, using the MACE-MP0 universal potential.
2 | The inclusion of high-temperature replicas allows for fast conformational transitions and improves ergodicity.
3 | - [Geometry optimizations](https://github.com/molmod/psiflow/tree/main/examples/formic_acid_transition.py) | **formic acid dimer**: approximate transition state calculation for the proton exchange reaction in a formic acid dimer,
4 | using simple bias potentials and a few geometry optimizations.
5 | - [Static and dynamic frequency analysis](https://github.com/molmod/psiflow/tree/main/examples/h2_static_dynamic.py) | **dihydrogen**: Hessian-based estimate of the H-H bond strength and corresponding IR absorption frequency, and a comparison with a dynamical estimate from NVE simulation and Fourier analysis.
6 |
7 | - [Bulk modulus calculation](https://github.com/molmod/psiflow/tree/main/examples/iron_bulk_modulus.py) | **iron**: estimate of the bulk modulus of fcc iron using a series of NPT simulations at different pressures
8 |
9 | - [Solid-state phase stabilities](https://github.com/molmod/psiflow/tree/main/examples/iron_harmonic_fcc_bcc.py) | **iron**: estimating the relative stability of fcc and bcc iron with anharmonic corrections using thermodynamic integration (see e.g. [Phys Rev B., 2018](https://journals.aps.org/prb/abstract/10.1103/PhysRevB.97.054102))
10 |
11 | - [ML potentials from scratch](https://github.com/molmod/psiflow/tree/main/examples/online_learning_pimd.py) | **water**: develop an ML potential for water based on a single geometry as input, using a combination of passive and active learning.
12 |
13 | - [Replica exchange umbrella sampling](https://github.com/molmod/psiflow/tree/main/examples/proton_jump_plumed.py) |
14 | **vinyl alcohol**: explore a reactive transition path with metadynamics, and use the
15 | resulting data to perform umbrella sampling with replica exchange between umbrellas.
16 |
17 | - [DFT singlepoints](https://github.com/molmod/psiflow/tree/main/examples/water_cp2k_noise.py) | **water**: analysis of the numerical noise DFT energy and force evaluations using CP2K and the RPBE(D3) functional, for a collection of water molecules.
18 |
19 | - [Path-integral molecular dynamics](https://github.com/molmod/psiflow/tree/main/examples/water_path_integral_md.py) | **water**: demonstration of the impact of nuclear quantum effects on the variance in O-H distance in liquid water. Path-integral molecular dynamics simulations with increasing number of beads (1, 2, 4, 8, 16) approximate the proton delocalization, and lead to systematically larger variance in O-H distance.
20 |
21 | - [ML potential training](https://github.com/molmod/psiflow/tree/main/examples/water_train_validate.py) | **water**: simple training and validation script for MACE on a small dataset of water configurations.
22 |
--------------------------------------------------------------------------------
/examples/alanine_replica_exchange.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import psiflow
4 | from psiflow.geometry import Geometry
5 | from psiflow.hamiltonians import MACEHamiltonian
6 | from psiflow.sampling import Walker, replica_exchange, sample
7 |
8 |
9 | def compute_dihedrals(positions):
10 | indices_phi = np.array([4, 6, 8, 14], dtype=int)
11 | indices_psi = np.array([6, 8, 14, 16], dtype=int)
12 |
13 | dihedrals = []
14 | for indices in [indices_phi, indices_psi]:
15 | p1 = positions[:, indices[0], :]
16 | p2 = positions[:, indices[1], :]
17 | p3 = positions[:, indices[2], :]
18 | p4 = positions[:, indices[3], :]
19 |
20 | # Calculate vectors between the points
21 | v1 = p2 - p1
22 | v2 = p3 - p2
23 | v3 = p4 - p3
24 |
25 | # Normal vectors of the planes formed by the atoms
26 | n1 = np.cross(v1, v2)
27 | n2 = np.cross(v2, v3)
28 |
29 | # Normalize the normal vectors
30 | n1_norm = np.linalg.norm(n1, axis=1, keepdims=True)
31 | n2_norm = np.linalg.norm(n2, axis=1, keepdims=True)
32 | n1 = n1 / n1_norm
33 | n2 = n2 / n2_norm
34 |
35 | dot_product = np.einsum("ij,ij->i", n1, n2)
36 | dot_product = np.clip(dot_product, -1.0, 1.0)
37 | dihedrals.append(np.arccos(dot_product))
38 | return dihedrals[0], dihedrals[1] # phi, psi
39 |
40 |
41 | def main():
42 | c7eq = np.array([2.8, 2.9]) # noqa: F841
43 | c7ax = np.array([1.2, -0.9]) # noqa: F841
44 | alanine = Geometry.from_string( # starts in c7ax config
45 | """
46 | 22
47 | Properties=species:S:1:pos:R:3 pbc="F F F"
48 | H 12.16254811 17.00740464 -2.89412387
49 | C 12.83019906 16.90038734 -2.04015291
50 | H 12.24899130 16.91941920 -1.11925017
51 | H 13.51243976 17.75054269 -2.01566384
52 | C 13.65038992 15.63877411 -2.06030255
53 | O 14.36738511 15.33906728 -1.11622456
54 | N 13.53865222 14.88589532 -3.17304444
55 | H 12.86898792 15.18433500 -3.85740375
56 | C 14.28974353 13.67606132 -3.48863158
57 | H 14.01914560 13.42643243 -4.51320992
58 | C 15.79729109 13.88220294 -3.42319959
59 | H 16.12104919 14.14072623 -2.41784410
60 | H 16.29775468 12.96420765 -3.73059171
61 | H 16.09643748 14.68243453 -4.10096574
62 | C 13.86282687 12.43546588 -2.69127862
63 | O 13.58257313 11.40703144 -3.28015921
64 | N 13.87365846 12.57688288 -1.35546630
65 | H 14.15017274 13.47981654 -0.98516877
66 | C 13.53768820 11.50108113 -0.46287859
67 | H 14.38392004 11.24258036 0.17699860
68 | H 12.69022125 11.76658121 0.17241519
69 | H 13.27142638 10.63298597 -1.06170510
70 | """
71 | )
72 | mace = MACEHamiltonian.mace_mp0()
73 |
74 | walkers = []
75 | for temperature in [150, 200, 250, 300, 400, 500, 600, 700, 800, 900, 1000, 1100]:
76 | walker = Walker(
77 | alanine,
78 | mace,
79 | temperature=temperature,
80 | )
81 | walkers.append(walker)
82 | replica_exchange(walkers, trial_frequency=50)
83 |
84 | outputs = sample(walkers, steps=20000, step=200)
85 | phi, psi = compute_dihedrals(outputs[0].trajectory.get("positions").result())
86 | for f, s in zip(phi, psi): # some c7eq conformations should appear here
87 | print("{:5.3f} {:5.3f}".format(f, s))
88 |
89 |
90 | if __name__ == "__main__":
91 | with psiflow.load():
92 | main()
93 |
--------------------------------------------------------------------------------
/examples/data/acetaldehyde.xyz:
--------------------------------------------------------------------------------
1 | 7
2 | Properties=species:S:1:pos:R:3
3 | O 0.694151672 0.776743934 -0.455455855
4 | C 0.195993254 -0.270095005 -0.307053207
5 | C -0.846060202 -0.538006022 0.669585079
6 | H 0.515801613 -1.097661033 -0.987914453
7 | H -0.589257101 -0.505600908 1.733123281
8 | H -1.553309062 0.309375207 0.558315778
9 | H -1.411674563 -1.440354174 0.5617281699
10 |
--------------------------------------------------------------------------------
/examples/data/ani500k_cc_cpu.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/examples/data/ani500k_cc_cpu.model
--------------------------------------------------------------------------------
/examples/data/cp2k_input.txt:
--------------------------------------------------------------------------------
1 | &FORCE_EVAL
2 | STRESS_TENSOR ANALYTICAL
3 | METHOD QS
4 | &DFT
5 | BASIS_SET_FILE_NAME BASIS_MOLOPT_UZH
6 | POTENTIAL_FILE_NAME POTENTIAL_UZH
7 | CHARGE 0
8 | &QS
9 | METHOD GPW
10 | EPS_DEFAULT 1.0E-12
11 | &END QS
12 | &XC
13 | &XC_FUNCTIONAL
14 | &GGA_X_RPBE
15 | &END GGA_X_RPBE
16 | &GGA_C_PBE
17 | &END GGA_C_PBE
18 | &END XC_FUNCTIONAL
19 | !&XC_FUNCTIONAL PBE
20 | !&END XC_FUNCTIONAL
21 | !&VDW_POTENTIAL
22 | ! POTENTIAL_TYPE PAIR_POTENTIAL
23 | ! &PAIR_POTENTIAL
24 | ! TYPE DFTD3(BJ)
25 | ! REFERENCE_FUNCTIONAL PBE
26 | ! PARAMETER_FILE_NAME dftd3.dat
27 | ! &END PAIR_POTENTIAL
28 | !&END VDW_POTENTIAL
29 | &END XC
30 | &SCF
31 | EPS_SCF 1.0E-8
32 | SCF_GUESS RESTART
33 | MAX_SCF 20
34 | &OT
35 | MINIMIZER DIIS
36 | PRECONDITIONER FULL_ALL
37 | &END OT
38 | &OUTER_SCF
39 | EPS_SCF 1.0E-8
40 | MAX_SCF 5
41 | &END OUTER_SCF
42 | &END SCF
43 | &MGRID
44 | CUTOFF 1000
45 | REL_CUTOFF 60
46 | &END MGRID
47 | &END DFT
48 | &SUBSYS
49 | &KIND O
50 | BASIS_SET TZVP-MOLOPT-PBE-GTH-q6
51 | POTENTIAL GTH-PBE-q6
52 | &END KIND
53 | &KIND H
54 | BASIS_SET TZVP-MOLOPT-PBE-GTH-q1
55 | POTENTIAL GTH-PBE-q1
56 | &END KIND
57 | &END SUBSYS
58 | &END FORCE_EVAL
59 |
--------------------------------------------------------------------------------
/examples/data/h2o_32.xyz:
--------------------------------------------------------------------------------
1 | 96
2 | Lattice="9.8528 0 0 0 9.8528 0 0 0 9.8528"
3 | O 2.280398 9.146539 5.088696
4 | O 1.251703 2.406261 7.769908
5 | O 1.596302 6.920128 0.656695
6 | O 2.957518 3.771868 1.877387
7 | O 0.228972 5.884026 6.532308
8 | O 9.023431 6.119654 0.092451
9 | O 7.256289 8.493641 5.772041
10 | O 5.090422 9.467016 0.743177
11 | O 6.330888 7.363471 3.747750
12 | O 7.763819 8.349367 9.279457
13 | O 8.280798 3.837153 5.799282
14 | O 8.878250 2.025797 1.664102
15 | O 9.160372 0.285100 6.871004
16 | O 4.962043 4.134437 0.173376
17 | O 2.802896 8.690383 2.435952
18 | O 9.123223 3.549232 8.876721
19 | O 1.453702 1.402538 2.358278
20 | O 6.536550 1.146790 7.609732
21 | O 2.766709 0.881503 9.544263
22 | O 0.856426 2.075964 5.010625
23 | O 6.386036 1.918950 0.242690
24 | O 2.733023 4.452756 5.850203
25 | O 4.600039 9.254314 6.575944
26 | O 3.665373 6.210561 3.158420
27 | O 3.371648 6.925594 7.476036
28 | O 5.287920 3.270653 6.155080
29 | O 5.225237 6.959594 9.582991
30 | O 0.846293 5.595877 3.820630
31 | O 9.785620 8.164617 3.657879
32 | O 8.509982 4.430362 2.679946
33 | O 1.337625 8.580920 8.272484
34 | O 8.054437 9.221335 1.991376
35 | H 1.762019 9.820429 5.528454
36 | H 3.095987 9.107088 5.588186
37 | H 0.554129 2.982634 8.082024
38 | H 1.771257 2.954779 7.182181
39 | H 2.112148 6.126321 0.798136
40 | H 1.776389 7.463264 1.424030
41 | H 3.754249 3.824017 1.349436
42 | H 3.010580 4.524142 2.466878
43 | H 0.939475 5.243834 6.571945
44 | H 0.515723 6.520548 5.877445
45 | H 9.852960 6.490366 0.393593
46 | H 8.556008 6.860063 -0.294256
47 | H 7.886607 7.941321 6.234506
48 | H 7.793855 9.141028 5.315813
49 | H 4.467366 9.971162 0.219851
50 | H 5.758685 10.102795 0.998994
51 | H 6.652693 7.917443 3.036562
52 | H 6.711966 7.743594 4.539279
53 | H 7.751955 8.745180 10.150905
54 | H 7.829208 9.092212 8.679343
55 | H 8.312540 3.218330 6.528858
56 | H 8.508855 4.680699 6.189990
57 | H 9.742249 1.704975 1.922581
58 | H 8.799060 2.876412 2.095861
59 | H 9.505360 1.161677 6.701213
60 | H 9.920117 -0.219794 7.161006
61 | H 4.749903 4.186003 -0.758595
62 | H 5.248010 5.018415 0.403676
63 | H 3.576065 9.078451 2.026264
64 | H 2.720238 9.146974 3.273164
65 | H 9.085561 4.493058 9.031660
66 | H 9.215391 3.166305 9.749133
67 | H 1.999705 2.060411 1.927796
68 | H 1.824184 0.564565 2.081195
69 | H 7.430334 0.849764 7.438978
70 | H 6.576029 1.537017 8.482885
71 | H 2.415851 1.576460 8.987338
72 | H 2.276957 0.099537 9.289499
73 | H 1.160987 1.818023 4.140602
74 | H 0.350256 2.874437 4.860741
75 | H 5.768804 2.638450 0.375264
76 | H 7.221823 2.257514 0.563730
77 | H 3.260797 5.243390 5.962382
78 | H 3.347848 3.732214 5.988196
79 | H 5.328688 9.073059 5.982269
80 | H 5.007063 9.672150 7.334875
81 | H 4.566850 6.413356 3.408312
82 | H 3.273115 7.061666 2.963521
83 | H 3.878372 7.435003 6.843607
84 | H 3.884673 6.966316 8.283117
85 | H 5.918240 3.116802 5.451335
86 | H 5.355924 2.495093 6.711958
87 | H 5.071858 7.687254 10.185667
88 | H 6.106394 7.112302 9.241707
89 | H 1.637363 5.184910 4.169264
90 | H 0.427645 4.908936 3.301903
91 | H 9.971698 7.227076 3.709104
92 | H 10.647901 8.579244 3.629806
93 | H 8.046808 5.126383 2.213838
94 | H 7.995317 4.290074 3.474723
95 | H 1.872601 7.864672 7.930401
96 | H 0.837635 8.186808 8.987268
97 | H 8.314696 10.115534 2.212519
98 | H 8.687134 8.667252 2.448452
99 |
--------------------------------------------------------------------------------
/examples/data/vinyl_alcohol.xyz:
--------------------------------------------------------------------------------
1 | 7
2 | Properties=species:S:1:pos:R:3
3 | O 1.041371715 -0.216863172 0.001603252
4 | C -0.098316254 0.512294574 -0.01021628
5 | C -1.225162144 -0.248210652 0.020868361
6 | H -0.087363805 1.596485281 -0.07557041
7 | H 0.61765221 -1.094559605 -0.02702971
8 | H -2.216985293 0.211688229 -0.00469380
9 | H -1.115257687 -1.357478425 -0.04507284
10 |
--------------------------------------------------------------------------------
/examples/h2_static_dynamic.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from ase.units import _c, second
3 |
4 | import psiflow
5 | from psiflow.free_energy import compute_frequencies, compute_harmonic
6 | from psiflow.geometry import Geometry
7 | from psiflow.hamiltonians import MACEHamiltonian
8 | from psiflow.sampling import Walker, optimize, sample
9 |
10 |
11 | def frequency_dynamic(start, hamiltonian):
12 | walker = Walker(
13 | start,
14 | hamiltonian=hamiltonian,
15 | temperature=None, # NVE!
16 | timestep=0.25,
17 | )
18 |
19 | step = 10
20 | output = sample(
21 | [walker],
22 | steps=20,
23 | step=step,
24 | max_force=10,
25 | )[0]
26 | positions = output.trajectory.get("positions").result()
27 | distances = np.linalg.norm(positions[:, 0, :] - positions[:, 1, :], axis=1)
28 | distances -= np.mean(distances) # don't need average interatomic distance
29 |
30 | timestep = walker.timestep * 1e-15 * step
31 | spectrum = np.abs(np.fft.fft(distances))
32 |
33 | freq_axis = np.fft.fftfreq(len(distances), timestep)
34 | index = np.argmax(spectrum[np.where(freq_axis > 0)])
35 | peak_frequency = freq_axis[np.where(freq_axis > 0)][index]
36 |
37 | return peak_frequency / (100 * _c)
38 |
39 |
40 | def frequency_static(start, hamiltonian):
41 | minimum = optimize(
42 | start,
43 | hamiltonian,
44 | 2000,
45 | ftol=1e-4,
46 | )
47 | hessian = compute_harmonic(
48 | minimum,
49 | hamiltonian,
50 | asr="crystal",
51 | pos_shift=0.001,
52 | )
53 | frequencies = compute_frequencies(hessian, minimum).result()
54 | return frequencies[-1] * second / (100 * _c)
55 |
56 |
57 | def main():
58 | geometry = Geometry.from_data(
59 | numbers=np.ones(2),
60 | positions=np.array([[0, 0, 0], [0.8, 0, 0]]),
61 | cell=None,
62 | )
63 | mace = MACEHamiltonian.mace_mp0()
64 |
65 | dynamic = frequency_dynamic(geometry, mace)
66 | static = frequency_static(geometry, mace)
67 |
68 | print("H2 frequency (dynamic) [inv(cm)]: {}".format(dynamic))
69 | print("H2 frequency (static) [inv(cm)]: {}".format(static))
70 |
71 |
72 | if __name__ == "__main__":
73 | with psiflow.load():
74 | main()
75 |
--------------------------------------------------------------------------------
/examples/iron_bulk_modulus.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from ase.build import bulk, make_supercell
3 |
4 | import psiflow
5 | from psiflow.geometry import Geometry
6 | from psiflow.hamiltonians import MACEHamiltonian
7 | from psiflow.sampling import Walker, sample
8 |
9 |
10 | def main():
11 | iron = bulk("Fe", "bcc", a=2.8)
12 | geometry = Geometry.from_atoms(make_supercell(iron, 3 * np.eye(3)))
13 | mace = MACEHamiltonian.mace_mp0()
14 |
15 | pressures = (-10 + np.arange(5) * 5) * 1e3 # in MPa
16 | walkers = [Walker(geometry, mace, temperature=300, pressure=p) for p in pressures]
17 |
18 | name = "volume{angstrom3}"
19 | outputs = sample(walkers, steps=4000, step=50, observables=[name])
20 | volumes = [np.mean(o[name].result()) for o in outputs]
21 |
22 | p = np.polyfit(volumes, pressures, deg=1)
23 | volume0 = (-1.0) * p[1] / p[0]
24 | bulk_modulus = (-1.0) * volume0 * p[0] / 1000 # in GPa
25 | print("bulk modulus [GPa]: {}".format(bulk_modulus))
26 |
27 |
28 | if __name__ == "__main__":
29 | with psiflow.load():
30 | main()
31 |
--------------------------------------------------------------------------------
/examples/iron_harmonic_fcc_bcc.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from ase.build import bulk, make_supercell
3 | from ase.units import kB
4 |
5 | import psiflow
6 | from psiflow.data import Dataset
7 | from psiflow.free_energy import Integration, compute_harmonic, harmonic_free_energy
8 | from psiflow.geometry import Geometry
9 | from psiflow.hamiltonians import Harmonic, MACEHamiltonian
10 | from psiflow.sampling import optimize
11 |
12 |
13 | def main():
14 | iron = bulk("Fe", "bcc", a=2.87, orthorhombic=True)
15 | bcc = Geometry.from_atoms(make_supercell(iron, 3 * np.eye(3)))
16 | iron = bulk("Fe", "fcc", a=3.57, orthorhombic=True)
17 | fcc = Geometry.from_atoms(make_supercell(iron, 3 * np.eye(3)))
18 |
19 | geometries = {
20 | "bcc": bcc,
21 | "fcc": fcc,
22 | }
23 | theoretical = {name: None for name in geometries}
24 | simulated = {name: None for name in geometries}
25 |
26 | mace = MACEHamiltonian.mace_mp0("small")
27 | scaling = 0.9
28 | temperature = 800
29 | beta = 1 / (kB * temperature)
30 |
31 | for name in geometries:
32 | minimum = optimize(
33 | geometries[name], mace, ftol=1e-4, steps=1000, mode="bfgstrm"
34 | )
35 | hessian = compute_harmonic(minimum, mace, pos_shift=0.001)
36 |
37 | # simulate
38 | harmonic = Harmonic(minimum, hessian)
39 | integration = Integration(
40 | harmonic,
41 | temperatures=[temperature],
42 | delta_hamiltonian=(scaling - 1) * harmonic,
43 | delta_coefficients=np.linspace(0, 1, num=4, endpoint=True),
44 | )
45 | walkers = integration.create_walkers( # noqa: F841
46 | Dataset([harmonic.reference_geometry]),
47 | timestep=3,
48 | ) # heavy atoms
49 | integration.sample(steps=500, step=10, start=300)
50 | integration.compute_gradients()
51 |
52 | reduced_f = integration.along_delta(temperature=temperature).result()
53 | f_harmonic = harmonic_free_energy(
54 | hessian,
55 | temperature=temperature,
56 | quantum=False,
57 | )
58 | simulated[name] = (f_harmonic.result() + reduced_f[-1]) / beta
59 |
60 | # theoretical
61 | f_harmonic_scaled = harmonic_free_energy(
62 | scaling * hessian.result(),
63 | temperature=temperature,
64 | quantum=False,
65 | )
66 | theoretical[name] = f_harmonic_scaled.result() / beta
67 |
68 | ddF = theoretical["bcc"] - theoretical["fcc"]
69 | print("theoretical delta(delta(F)) [eV]: {}".format(ddF))
70 |
71 | ddF = simulated["bcc"] - simulated["fcc"]
72 | print(" simulated delta(delta(F)) [eV]: {}".format(ddF))
73 |
74 |
75 | if __name__ == "__main__":
76 | with psiflow.load():
77 | main()
78 |
--------------------------------------------------------------------------------
/examples/online_learning_pimd.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import psiflow
4 | from psiflow.reference import CP2K
5 | from psiflow.data import Dataset
6 | from psiflow.sampling import Walker
7 | from psiflow.models import MACE
8 | from psiflow.hamiltonians import MACEHamiltonian
9 | from psiflow.learning import Learning
10 |
11 |
12 | def main():
13 | path_output = Path.cwd() / 'output'
14 |
15 | with open('data/cp2k_input.txt', 'r') as f: cp2k_input = f.read()
16 | cp2k = CP2K(cp2k_input)
17 |
18 | model = MACE(
19 | batch_size=4,
20 | lr=0.02,
21 | max_ell=3,
22 | r_max=6.5,
23 | energy_weight=100,
24 | correlation=3,
25 | max_L=1,
26 | num_channels=24,
27 | patience=8,
28 | scheduler_patience=4,
29 | max_num_epochs=200,
30 | )
31 | model.add_atomic_energy('H', cp2k.compute_atomic_energy('H', box_size=9))
32 | model.add_atomic_energy('O', cp2k.compute_atomic_energy('O', box_size=9))
33 |
34 | state = Dataset.load('data/water_train.xyz')[0]
35 | walkers = (
36 | Walker(state, temperature=300, pressure=0.1).multiply(40) +
37 | Walker(state, temperature=450, pressure=0.1).multiply(40) +
38 | Walker(state, temperature=600, pressure=0.1).multiply(40)
39 | )
40 | learning = Learning(
41 | cp2k,
42 | path_output,
43 | wandb_project='psiflow_examples',
44 | wandb_group='water_learning_pimd',
45 | )
46 |
47 | model, walkers = learning.passive_learning(
48 | model,
49 | walkers,
50 | hamiltonian=MACEHamiltonian.mace_mp0(),
51 | steps=10000,
52 | step=2000,
53 | )
54 |
55 | for i in range(3):
56 | model, walkers = learning.active_learning(
57 | model,
58 | walkers,
59 | steps=2000,
60 | )
61 |
62 | # PIMD phase for low-temperature walkers
63 | for j, walker in enumerate(walkers[:40]):
64 | walker.nbeads = 32
65 | model, walkers = learning.active_learning(
66 | model,
67 | walkers,
68 | steps=500,
69 | )
70 |
71 |
72 | if __name__ == '__main__':
73 | with psiflow.load():
74 | main()
75 |
--------------------------------------------------------------------------------
/examples/proton_jump_plumed.py:
--------------------------------------------------------------------------------
1 | from ase.units import kJ, mol
2 | import numpy as np
3 |
4 | import psiflow
5 | from psiflow.data import Dataset
6 | from psiflow.geometry import Geometry
7 | from psiflow.hamiltonians import PlumedHamiltonian, MACEHamiltonian
8 | from psiflow.sampling import Walker, sample, quench, Metadynamics, replica_exchange
9 |
10 |
11 | PLUMED_INPUT = """UNITS LENGTH=A ENERGY=kj/mol
12 | d_C: DISTANCE ATOMS=3,5
13 | d_O: DISTANCE ATOMS=1,5
14 | CV: COMBINE ARG=d_C,d_O COEFFICIENTS=1,-1 PERIODIC=NO
15 |
16 | """
17 |
18 |
19 | def get_bias(kappa: float, center: float):
20 | plumed_str = PLUMED_INPUT
21 | plumed_str += '\n'
22 | plumed_str += 'RESTRAINT ARG=CV KAPPA={} AT={}\n'.format(kappa, center)
23 | return PlumedHamiltonian(plumed_str)
24 |
25 |
26 | def main():
27 | aldehyd = Geometry.load('data/acetaldehyde.xyz')
28 | alcohol = Geometry.load('data/vinyl_alcohol.xyz')
29 |
30 | mace = MACEHamiltonian.mace_cc()
31 | energy = mace.compute([aldehyd, alcohol], 'energy').result()
32 | energy = (energy - np.min(energy)) / (kJ / mol)
33 | print('E_vinyl - E_aldehyde = {:7.3f} kJ/mol'.format(energy[1] - energy[0]))
34 |
35 | # generate initial structures using metadynamics
36 | plumed_str = PLUMED_INPUT
37 | plumed_str += 'METAD ARG=CV PACE=10 SIGMA=0.1 HEIGHT=5\n'
38 | metadynamics = Metadynamics(plumed_str)
39 |
40 | # create 40 identical walkers
41 | walker = Walker(
42 | alcohol,
43 | hamiltonian=mace,
44 | temperature=300,
45 | metadynamics=metadynamics,
46 | )
47 |
48 | # do MTD and create large dataset from all trajectories
49 | outputs = sample([walker], steps=8000, step=50)
50 | data_mtd = sum([o.trajectory for o in outputs], start=Dataset([]))
51 | data_mtd.save('mtd.xyz')
52 |
53 | # initialize walkers for umbrella sampling
54 | walkers = []
55 | for i, center in enumerate(np.linspace(1, 3, num=16)):
56 | bias = get_bias(kappa=1500, center=center)
57 | hamiltonian = mace + bias
58 | walker = Walker(alcohol, hamiltonian=hamiltonian, temperature=300)
59 | walkers.append(walker)
60 | quench(walkers, data_mtd) # make sure initial structure is reasonable
61 | replica_exchange(walkers, trial_frequency=100) # use REX for improved sampling
62 |
63 | outputs = sample(walkers, steps=1000, step=10)
64 |
65 |
66 | if __name__ == '__main__':
67 | with psiflow.load() as f:
68 | main()
69 |
--------------------------------------------------------------------------------
/examples/submit/hortense.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | parsl_log_level: DEBUG
3 | container_engine: 'apptainer'
4 | container_uri: 'oras://ghcr.io/molmod/psiflow:main_cu118'
5 | default_threads: 8
6 | ModelEvaluation:
7 | cores_per_worker: 12
8 | gpu: True
9 | max_simulation_time: 20
10 | env_vars:
11 | KMP_BLOCKTIME: "1"
12 | slurm:
13 | partition: "gpu_rome_a100"
14 | account: "2023_070"
15 | nodes_per_block: 1
16 | cores_per_node: 48
17 | max_blocks: 1
18 | walltime: "12:00:00"
19 | scheduler_options: "#SBATCH --clusters=dodrio\n#SBATCH --gpus=4\n"
20 | ModelTraining:
21 | cores_per_worker: 12
22 | gpu: true
23 | max_training_time: 40
24 | env_vars:
25 | OMP_PROC_BIND: "spread"
26 | slurm:
27 | partition: "gpu_rome_a100"
28 | account: "2023_070"
29 | nodes_per_block: 1
30 | cores_per_node: 12
31 | max_blocks: 1
32 | walltime: "12:00:00"
33 | scheduler_options: "#SBATCH --clusters=dodrio\n#SBATCH --gpus=1\n"
34 | CP2K:
35 | cores_per_worker: 64
36 | max_evaluation_time: 30
37 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -np 32 -bind-to core cp2k.psmp'
38 | slurm:
39 | partition: "cpu_rome"
40 | account: "2024_079"
41 | nodes_per_block: 1
42 | cores_per_node: 64
43 | max_blocks: 2
44 | walltime: "06:00:00"
45 | scheduler_options: "#SBATCH --clusters=dodrio\n"
46 | ...
47 |
--------------------------------------------------------------------------------
/examples/submit/lumi.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | parsl_log_level: DEBUG
3 | container_engine: 'singularity'
4 | container_uri: 'oras://ghcr.io/molmod/psiflow:main_rocm5.6'
5 | default_threads: 8
6 | CP2K:
7 | cores_per_worker: 32
8 | max_evaluation_time: 20
9 | launch_command: 'singularity exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -np 32 cp2k.psmp'
10 | slurm:
11 | partition: "standard"
12 | account: "project_465001125"
13 | nodes_per_block: 1
14 | cores_per_node: 128
15 | max_blocks: 10
16 | walltime: "01:00:00"
17 | ModelEvaluation:
18 | cores_per_worker: 7
19 | gpu: True
20 | slurm:
21 | partition: "standard-g"
22 | account: "project_465001125"
23 | nodes_per_block: 1
24 | cores_per_node: 56
25 | max_blocks: 5
26 | walltime: "01:00:00"
27 | scheduler_options: "#SBATCH --gres=gpu:8\n"
28 | worker_init: "ml LUMI/23.09 && ml partition/G && ml rocm/5.6\n"
29 | ModelTraining:
30 | cores_per_worker: 7
31 | gpu: true
32 | multigpu: true
33 | slurm:
34 | partition: "standard-g"
35 | account: "project_465001125"
36 | nodes_per_block: 1
37 | cores_per_node: 56
38 | walltime: "01:00:00"
39 | scheduler_options: "#SBATCH --gres=gpu:8\n"
40 | worker_init: "ml LUMI/23.09 && ml partition/G && ml rocm/5.6\n"
41 | ...
42 |
--------------------------------------------------------------------------------
/examples/submit/submit_hortense.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # List of filenames
4 | files=(
5 | "h2_static_dynamic.py"
6 | "iron_bulk_modulus.py"
7 | "iron_harmonic_fcc_bcc.py"
8 | "water_cp2k_noise.py"
9 | "water_path_integral_md.py"
10 | "water_train_validate.py"
11 | "alanine_replica_exchange.py"
12 | )
13 |
14 | curl -O https://raw.githubusercontent.com/molmod/psiflow/main/examples/hortense.yaml
15 |
16 | run_dir=$(pwd)/run_examples
17 | mkdir $run_dir && cp hortense.yaml $run_dir && cd $run_dir
18 |
19 | # Loop over each filename
20 | for filename in "${files[@]}"
21 | do
22 | name="${filename%.*}"
23 | mkdir $name
24 | cp hortense.yaml $name
25 |
26 | cat > $name/job.sh < $name/job.sh < std(O-H) = {} A".format(nbeads, std))
46 |
47 |
48 | if __name__ == "__main__":
49 | with psiflow.load():
50 | main()
51 |
--------------------------------------------------------------------------------
/examples/water_train_validate.py:
--------------------------------------------------------------------------------
1 | import psiflow
2 | from psiflow.data import Dataset, compute_rmse
3 | from psiflow.models import MACE
4 |
5 |
6 | def main():
7 | data = Dataset.load("data/water_train.xyz")
8 | model = MACE(
9 | batch_size=2,
10 | lr=0.02,
11 | max_ell=3,
12 | r_max=5.5,
13 | energy_weight=100,
14 | correlation=3,
15 | max_L=1,
16 | num_channels=16,
17 | max_num_epochs=20,
18 | swa=False,
19 | )
20 |
21 | train, valid = data.split(0.9, shuffle=True)
22 | model.initialize(train)
23 | model.train(train, valid)
24 | hamiltonian = model.create_hamiltonian()
25 |
26 | target_e = data.get("per_atom_energy")
27 | target_f = data.get("forces")
28 |
29 | data_predicted = data.evaluate(hamiltonian)
30 | predict_e = data_predicted.get("per_atom_energy")
31 | predict_f = data_predicted.get("forces")
32 |
33 | e_rmse = compute_rmse(target_e, predict_e)
34 | f_rmse = compute_rmse(target_f, predict_f)
35 |
36 | print("RMSE(energy) [meV/atom]: {}".format(e_rmse.result() * 1000))
37 | print("RMSE(forces) [meV/angstrom]: {}".format(f_rmse.result() * 1000))
38 |
39 |
40 | if __name__ == "__main__":
41 | with psiflow.load():
42 | main()
43 |
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: psiflow
2 | theme:
3 | favicon: icon.svg
4 | name:
5 | material
6 | #font:
7 | # text: overpass
8 | palette:
9 | primary: teal
10 | accent: yellow
11 | scheme: default
12 | logo: icon.svg
13 | features:
14 | - content.code.copy
15 | - navigation.instant
16 | - navigation.tracking
17 | #- navigation.tabs
18 | #- navigation.tabs.sticky
19 | - navigation.indexes
20 | - navigation.sections
21 | - navigation.expand
22 | - toc.integrate
23 | - toc.follow
24 | nav:
25 | - overview: index.md
26 | - atomic geometries: data.md
27 | - hamiltonians: hamiltonian.md
28 | - sampling: sampling.md
29 | - QM calculations: reference.md
30 | - ML potentials: models.md
31 | - online learning: learning.md
32 | - free energy calculations: free_energy.md
33 | - setup & configuration: configuration.md
34 |
35 | plugins:
36 | - mkdocstrings:
37 | python:
38 | docstring_style: google
39 | repo_url: https://github.com/molmod/psiflow
40 | markdown_extensions:
41 | - tables
42 | - md_in_html
43 | - admonition
44 | - footnotes
45 | - pymdownx.highlight:
46 | anchor_linenums: true
47 | - pymdownx.inlinehilite
48 | - pymdownx.snippets
49 | - pymdownx.superfences
50 | - pymdownx.details
51 | - pymdownx.critic
52 | - pymdownx.caret
53 | - pymdownx.keys
54 | - pymdownx.mark
55 | - pymdownx.tilde
56 | - pymdownx.arithmatex:
57 | generic: true
58 | - attr_list
59 | - pymdownx.emoji:
60 | emoji_index: !!python/name:materialx.emoji.twemoji
61 | emoji_generator: !!python/name:materialx.emoji.to_svg
62 |
63 | #extra_javascript:
64 | # - javascripts/mathjax.js
65 | # - https://polyfill.io/v3/polyfill.min.js?features=es6
66 | # - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js
67 | #
68 | extra_javascript:
69 | - javascripts/mathjax.js
70 | - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js
71 |
--------------------------------------------------------------------------------
/psiflow/__init__.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import typeguard
4 |
5 | from .config import setup_slurm_config # noqa: F401
6 | from .execution import ExecutionContextLoader
7 | from .serialization import ( # noqa: F401
8 | _DataFuture,
9 | deserialize,
10 | serializable,
11 | serialize,
12 | )
13 |
14 |
15 | @typeguard.typechecked
16 | def resolve_and_check(path: Path) -> Path:
17 | path = path.resolve()
18 | if Path.cwd() in path.parents:
19 | pass
20 | elif path.exists() and Path.cwd().samefile(path):
21 | pass
22 | else:
23 | raise ValueError(
24 | "requested file and/or path at location: {}"
25 | "\nwhich is not in the present working directory: {}"
26 | "\npsiflow can only load and/or save in its present "
27 | "working directory because this is the only directory"
28 | " that will get bound into the container.".format(path, Path.cwd())
29 | )
30 | return path
31 |
32 |
33 | load = ExecutionContextLoader.load
34 | context = ExecutionContextLoader.context
35 | wait = ExecutionContextLoader.wait
36 |
--------------------------------------------------------------------------------
/psiflow/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import Computable, Dataset, aggregate_multiple, compute # noqa: F401
2 | from .utils import compute_mae, compute_rmse # noqa: F401
3 |
--------------------------------------------------------------------------------
/psiflow/free_energy/__init__.py:
--------------------------------------------------------------------------------
1 | from .integration import Integration # noqa: F401
2 | from .phonons import ( # noqa: F401
3 | compute_frequencies,
4 | compute_harmonic,
5 | harmonic_free_energy,
6 | )
7 |
--------------------------------------------------------------------------------
/psiflow/free_energy/integration.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations # necessary for type-guarding class methods
2 |
3 | from typing import Optional, Union
4 |
5 | import numpy as np
6 | import typeguard
7 | from ase.units import bar, kB
8 | from parsl.app.app import python_app
9 |
10 | from psiflow.data import Dataset
11 | from psiflow.hamiltonians import Hamiltonian, Zero
12 | from psiflow.sampling import SimulationOutput, Walker, quench, randomize, sample
13 | from psiflow.utils.apps import compute_sum, multiply
14 |
15 | length = python_app(len, executors=["default_threads"])
16 | take_mean = python_app(np.mean, executors=["default_threads"])
17 |
18 |
19 | @typeguard.typechecked
20 | def _integrate(x: np.ndarray, *args: float) -> np.ndarray:
21 | import scipy.integrate
22 |
23 | assert len(args) == len(x)
24 | y = np.array(args, dtype=float)
25 | return scipy.integrate.cumulative_trapezoid(y, x=x, initial=0.0)
26 |
27 |
28 | integrate = python_app(_integrate, executors=["default_threads"])
29 |
30 |
31 | @typeguard.typechecked
32 | class ThermodynamicState:
33 | temperature: float
34 | natoms: int
35 | delta_hamiltonian: Optional[Hamiltonian]
36 | pressure: Optional[float]
37 | mass: Optional[float]
38 |
39 | def __init__(
40 | self,
41 | temperature: float,
42 | natoms: int,
43 | delta_hamiltonian: Optional[Hamiltonian],
44 | pressure: Optional[float],
45 | mass: Optional[float],
46 | ):
47 | self.temperature = temperature
48 | self.natoms = natoms
49 | self.delta_hamiltonian = delta_hamiltonian
50 | self.pressure = pressure
51 | self.mass = mass
52 |
53 | self.gradients = {
54 | "temperature": None,
55 | "delta": None,
56 | "pressure": None,
57 | "mass": None,
58 | }
59 |
60 | def gradient(
61 | self,
62 | output: SimulationOutput,
63 | hamiltonian: Optional[Hamiltonian] = None,
64 | ):
65 | self.temperature_gradient(output, hamiltonian)
66 | self.delta_gradient(output)
67 | if self.mass is not None:
68 | self.mass_gradient(output)
69 |
70 | def temperature_gradient(
71 | self,
72 | output: SimulationOutput,
73 | hamiltonian: Optional[Hamiltonian] = None,
74 | ):
75 | energies = output.get_energy(hamiltonian)
76 | _energy = take_mean(energies)
77 | if self.pressure is not None: # use enthalpy
78 | volumes = output["volume{angstrom3}"]
79 | pv = multiply(take_mean(volumes), 10 * bar * self.pressure)
80 | _energy = compute_sum(_energy, pv)
81 |
82 | # grad_u = < - u / kBT**2 >
83 | # grad_k = < - E_kin > / kBT**2 >
84 | gradient_u = multiply(
85 | _energy,
86 | (-1.0) / (kB * self.temperature**2),
87 | )
88 | gradient_k = (-1.0) * (3 * self.natoms - 3) / (2 * self.temperature)
89 | self.gradients["temperature"] = compute_sum(gradient_u, gradient_k)
90 |
91 | def delta_gradient(self, output: SimulationOutput):
92 | energies = output.get_energy(self.delta_hamiltonian)
93 | self.gradients["delta"] = multiply(
94 | take_mean(energies),
95 | 1 / (kB * self.temperature),
96 | )
97 |
98 | def mass_gradient(output):
99 | raise NotImplementedError
100 |
101 |
102 | @typeguard.typechecked
103 | class Integration:
104 | def __init__(
105 | self,
106 | hamiltonian: Hamiltonian,
107 | temperatures: Union[list[float], np.ndarray],
108 | delta_hamiltonian: Optional[Hamiltonian] = None,
109 | delta_coefficients: Union[list[float], np.ndarray, None] = None,
110 | pressure: Optional[float] = None,
111 | ):
112 | self.hamiltonian = hamiltonian
113 | self.temperatures = np.array(temperatures, dtype=float)
114 | if delta_hamiltonian is not None:
115 | assert delta_coefficients is not None
116 | self.delta_hamiltonian = delta_hamiltonian
117 | self.delta_coefficients = np.array(delta_coefficients, dtype=float)
118 | else:
119 | self.delta_coefficients = np.array([0.0])
120 | self.delta_hamiltonian = Zero()
121 | self.pressure = pressure
122 |
123 | assert len(np.unique(self.temperatures)) == len(self.temperatures)
124 | assert len(np.unique(self.delta_coefficients)) == len(self.delta_coefficients)
125 |
126 | self.states = []
127 | self.walkers = []
128 | self.outputs = []
129 |
130 | def create_walkers(
131 | self,
132 | dataset: Dataset,
133 | initialize_by: str = "quench",
134 | **walker_kwargs,
135 | ) -> list[Walker]:
136 | natoms = len(dataset[0].result())
137 | for delta in self.delta_coefficients:
138 | for T in self.temperatures:
139 | hamiltonian = self.hamiltonian + delta * self.delta_hamiltonian
140 | walker = Walker(
141 | dataset[0], # do quench later
142 | hamiltonian,
143 | temperature=T,
144 | **walker_kwargs,
145 | )
146 | self.walkers.append(walker)
147 | state = ThermodynamicState(
148 | temperature=T,
149 | natoms=natoms,
150 | delta_hamiltonian=self.delta_hamiltonian,
151 | pressure=self.pressure,
152 | mass=None,
153 | )
154 | self.states.append(state)
155 |
156 | # initialize walkers
157 | if initialize_by == "quench":
158 | quench(self.walkers, dataset)
159 | elif initialize_by == "shuffle":
160 | randomize(self.walkers, dataset)
161 | else:
162 | raise ValueError("unknown initialization")
163 | return self.walkers
164 |
165 | def sample(self, **sampling_kwargs):
166 | self.outputs[:] = sample(
167 | self.walkers,
168 | **sampling_kwargs,
169 | )
170 |
171 | def compute_gradients(self):
172 | for output, state in zip(self.outputs, self.states):
173 | state.gradient(output, hamiltonian=self.hamiltonian)
174 |
175 | def along_delta(self, temperature: Optional[float] = None):
176 | if temperature is None:
177 | assert self.ntemperatures == 1
178 | temperature = self.temperatures[0]
179 | index = np.where(self.temperatures == temperature)[0][0]
180 | assert self.temperatures[index] == temperature
181 | N = self.ntemperatures
182 | states = [self.states[N * i + index] for i in range(self.ndeltas)]
183 |
184 | # do integration
185 | x = self.delta_coefficients
186 | y = [state.gradients["delta"] for state in states]
187 | f = integrate(x, *y)
188 | return f
189 | # return multiply(f, kB * temperature)
190 |
191 | def along_temperature(self, delta_coefficient: Optional[float] = None):
192 | if delta_coefficient is None:
193 | assert self.ndeltas == 1
194 | delta_coefficient = self.delta_coefficients[0]
195 | index = np.where(self.delta_coefficients == delta_coefficient)[0][0]
196 | assert self.delta_coefficients[index] == delta_coefficient
197 | N = self.ntemperatures
198 | states = [self.states[N * index + i] for i in range(self.ntemperatures)]
199 |
200 | # do integration
201 | x = self.temperatures
202 | y = [state.gradients["temperature"] for state in states]
203 | f = integrate(x, *y)
204 | return f
205 | # return multiply(f, kB * self.temperatures)
206 |
207 | @property
208 | def ntemperatures(self):
209 | return len(self.temperatures)
210 |
211 | @property
212 | def ndeltas(self):
213 | return len(self.delta_coefficients)
214 |
--------------------------------------------------------------------------------
/psiflow/free_energy/phonons.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations # necessary for type-guarding class methods
2 |
3 | import xml.etree.ElementTree as ET
4 | from typing import Optional, Union
5 |
6 | import numpy as np
7 | import parsl
8 | import typeguard
9 | from ase.units import Bohr, Ha, J, _c, _hplanck, _k, kB, second
10 | from parsl.app.app import bash_app, python_app
11 | from parsl.dataflow.futures import AppFuture
12 |
13 | import psiflow
14 | from psiflow.data import Dataset
15 | from psiflow.geometry import Geometry, mass_weight
16 | from psiflow.hamiltonians import Hamiltonian, MixtureHamiltonian
17 | from psiflow.sampling.sampling import (
18 | setup_sockets,
19 | label_forces,
20 | make_force_xml,
21 | serialize_mixture,
22 | make_start_command,
23 | make_client_command
24 | )
25 | from psiflow.utils.apps import multiply
26 | from psiflow.utils.io import load_numpy, save_xml
27 | from psiflow.utils import TMP_COMMAND, CD_COMMAND
28 |
29 |
30 | @typeguard.typechecked
31 | def _compute_frequencies(hessian: np.ndarray, geometry: Geometry) -> np.ndarray:
32 | assert hessian.shape[0] == hessian.shape[1]
33 | assert len(geometry) * 3 == hessian.shape[0]
34 | return np.sqrt(np.linalg.eigvalsh(mass_weight(hessian, geometry))) / (2 * np.pi)
35 |
36 |
37 | compute_frequencies = python_app(_compute_frequencies, executors=["default_threads"])
38 |
39 |
40 | @typeguard.typechecked
41 | def _harmonic_free_energy(
42 | frequencies: Union[float, np.ndarray],
43 | temperature: float,
44 | quantum: bool = False,
45 | threshold: float = 1, # in invcm
46 | ) -> float:
47 | if isinstance(frequencies, float):
48 | frequencies = np.array([frequencies], dtype=float)
49 |
50 | threshold_ = threshold / second * (100 * _c) # from invcm to ASE
51 | frequencies = frequencies[np.abs(frequencies) > threshold_]
52 |
53 | # _hplanck in J s
54 | # _k in J / K
55 | if quantum:
56 | arg = (-1.0) * _hplanck * frequencies * second / (_k * temperature)
57 | F = kB * temperature * np.sum(np.log(1 - np.exp(arg)))
58 | F += _hplanck * J * second * np.sum(frequencies) / 2
59 | else:
60 | constant = kB * temperature * np.log(_hplanck)
61 | actual = np.log(frequencies / (kB * temperature))
62 | F = len(frequencies) * constant + kB * temperature * np.sum(actual)
63 | F /= kB * temperature
64 | return F
65 |
66 |
67 | harmonic_free_energy = python_app(_harmonic_free_energy, executors=["default_threads"])
68 |
69 |
70 | @typeguard.typechecked
71 | def setup_motion(
72 | mode: str,
73 | asr: str,
74 | pos_shift: float,
75 | energy_shift: float,
76 | ) -> ET.Element:
77 | motion = ET.Element("motion", mode="vibrations")
78 | vibrations = ET.Element("vibrations", mode="fd")
79 | pos = ET.Element("pos_shift")
80 | pos.text = " {} ".format(pos_shift)
81 | vibrations.append(pos)
82 | energy = ET.Element("energy_shift")
83 | energy.text = " {} ".format(energy_shift)
84 | vibrations.append(energy)
85 | prefix = ET.Element("prefix")
86 | prefix.text = " output "
87 | vibrations.append(prefix)
88 | asr_ = ET.Element("asr")
89 | asr_.text = " {} ".format(asr)
90 | vibrations.append(asr_)
91 | motion.append(vibrations)
92 | return motion
93 |
94 |
95 | def _execute_ipi(
96 | hamiltonian_names: list[str],
97 | client_args: list[list[str]],
98 | command_server: str,
99 | command_client: str,
100 | stdout: str = "",
101 | stderr: str = "",
102 | inputs: list = [],
103 | outputs: list = [],
104 | parsl_resource_specification: Optional[dict] = None,
105 | ) -> str:
106 | command_start = make_start_command(command_server, inputs[0], inputs[1])
107 | commands_client = []
108 | for i, name in enumerate(hamiltonian_names):
109 | args = client_args[i]
110 | assert len(args) == 1 # only have one client per hamiltonian
111 | for arg in args:
112 | commands_client += make_client_command(command_client, name, inputs[2 + i], inputs[1], arg),
113 |
114 | command_end = f'{command_server} --cleanup'
115 | command_copy = f'cp i-pi.output_full.hess {outputs[0]}'
116 |
117 | command_list = [
118 | TMP_COMMAND,
119 | CD_COMMAND,
120 | command_start,
121 | *commands_client,
122 | "wait",
123 | command_end,
124 | command_copy,
125 | ]
126 | return "\n".join(command_list)
127 |
128 |
129 | execute_ipi = bash_app(_execute_ipi, executors=["ModelEvaluation"])
130 |
131 |
132 | @typeguard.typechecked
133 | def compute_harmonic(
134 | state: Union[Geometry, AppFuture],
135 | hamiltonian: Hamiltonian,
136 | mode: str = "fd",
137 | asr: str = "crystal",
138 | pos_shift: float = 0.01,
139 | energy_shift: float = 0.00095,
140 | ) -> AppFuture:
141 | hamiltonian: MixtureHamiltonian = 1 * hamiltonian
142 | names = label_forces(hamiltonian)
143 | sockets = setup_sockets(names)
144 | forces = make_force_xml(hamiltonian, names)
145 |
146 | initialize = ET.Element("initialize", nbeads="1")
147 | start = ET.Element("file", mode="ase", cell_units="angstrom")
148 | start.text = " start_0.xyz "
149 | initialize.append(start)
150 | motion = setup_motion(mode, asr, pos_shift, energy_shift)
151 |
152 | system = ET.Element("system")
153 | system.append(initialize)
154 | system.append(motion)
155 | system.append(forces)
156 |
157 | # output = setup_output(keep_trajectory)
158 |
159 | simulation = ET.Element("simulation", mode="static")
160 | # simulation.append(output)
161 | for socket in sockets:
162 | simulation.append(socket)
163 | simulation.append(system)
164 | total_steps = ET.Element("total_steps")
165 | total_steps.text = " {} ".format(1000000)
166 | simulation.append(total_steps)
167 |
168 | context = psiflow.context()
169 | definition = context.definitions["ModelEvaluation"]
170 | input_future = save_xml(
171 | simulation,
172 | outputs=[context.new_file("input_", ".xml")],
173 | ).outputs[0]
174 | inputs = [
175 | input_future,
176 | Dataset([state]).extxyz,
177 | ]
178 | inputs += serialize_mixture(hamiltonian, dtype="float64")
179 |
180 | client_args = []
181 | for name in names:
182 | args = definition.get_client_args(name, 1, "vibrations")
183 | client_args.append(args)
184 | outputs = [
185 | context.new_file("hess_", ".txt"),
186 | ]
187 |
188 | command_server = definition.server_command()
189 | command_client = definition.client_command()
190 | resources = definition.wq_resources(1)
191 |
192 | result = execute_ipi(
193 | names,
194 | client_args,
195 | command_server,
196 | command_client,
197 | stdout=parsl.AUTO_LOGNAME,
198 | stderr=parsl.AUTO_LOGNAME,
199 | inputs=inputs,
200 | outputs=outputs,
201 | parsl_resource_specification=resources,
202 | )
203 | return multiply(load_numpy(inputs=[result.outputs[0]]), Ha / Bohr**2)
--------------------------------------------------------------------------------
/psiflow/models/__init__.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Union
3 |
4 | import typeguard
5 | import yaml
6 | from ase.data import chemical_symbols
7 | from parsl.data_provider.files import File
8 |
9 | import psiflow
10 | from psiflow.models._mace import MACE, MACEConfig # noqa: F401
11 | from psiflow.models.model import Model
12 | from psiflow.utils.apps import copy_data_future
13 |
14 |
15 | @typeguard.typechecked
16 | def load_model(path: Union[Path, str]) -> Model:
17 | path = psiflow.resolve_and_check(Path(path))
18 | assert path.is_dir()
19 | classes = [
20 | MACE,
21 | ]
22 | for model_cls in classes + [None]:
23 | assert model_cls is not None
24 | name = model_cls.__name__
25 | path_config = path / (name + ".yaml")
26 | if path_config.is_file():
27 | break
28 | with open(path_config, "r") as f:
29 | config = yaml.load(f, Loader=yaml.FullLoader)
30 | atomic_energies = {}
31 | for key in list(config):
32 | print(key)
33 | if key.startswith("atomic_energies_"):
34 | element = key.split("atomic_energies_")[-1]
35 | assert element in chemical_symbols
36 | atomic_energies[element] = config.pop(key)
37 | model = model_cls(**config)
38 | for element, energy in atomic_energies.items():
39 | model.add_atomic_energy(element, energy)
40 | path_model = path / "{}.pth".format(name)
41 | if path_model.is_file():
42 | model.model_future = copy_data_future(
43 | inputs=[File(str(path_model))],
44 | outputs=[psiflow.context().new_file("model_", ".pth")],
45 | ).outputs[0]
46 | return model
47 |
--------------------------------------------------------------------------------
/psiflow/models/model.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations # necessary for type-guarding class methods
2 |
3 | from dataclasses import asdict
4 | from pathlib import Path
5 | from typing import Optional, Union
6 |
7 | import parsl
8 | import typeguard
9 | from parsl.data_provider.files import File
10 | from parsl.dataflow.futures import AppFuture
11 |
12 | import psiflow
13 | from psiflow.data import Dataset
14 | from psiflow.utils.apps import copy_data_future, log_message, setup_logger
15 | from psiflow.utils.io import save_yaml
16 |
17 | logger = setup_logger(__name__)
18 |
19 |
20 | @typeguard.typechecked
21 | @psiflow.serializable
22 | class Model:
23 | _config: dict
24 | model_future: Optional[psiflow._DataFuture]
25 | atomic_energies: dict
26 |
27 | def add_atomic_energy(self, element: str, energy: Union[float, AppFuture]) -> None:
28 | assert self.model_future is None, (
29 | "cannot add atomic energies after model has "
30 | "been initialized; reset model, add energy, and reinitialize"
31 | )
32 | if element in self.atomic_energies:
33 | if isinstance(energy, AppFuture):
34 | energy = energy.result()
35 | if isinstance(self.atomic_energies[element], AppFuture):
36 | existing = self.atomic_energies[element].result()
37 | assert energy == existing, (
38 | "model already has atomic energy "
39 | "for element {} ({}), which is different from {}"
40 | "".format(element, existing, energy)
41 | )
42 | self.atomic_energies[element] = energy
43 |
44 | def train(self, training: Dataset, validation: Dataset) -> None:
45 | log_message(
46 | logger,
47 | "training model using {} states for training and {} for validation",
48 | training.length(),
49 | validation.length(),
50 | )
51 | inputs = [self.model_future]
52 | if self.do_offset:
53 | inputs += [
54 | training.subtract_offset(**self.atomic_energies).extxyz,
55 | validation.subtract_offset(**self.atomic_energies).extxyz,
56 | ]
57 | else:
58 | inputs += [
59 | training.extxyz,
60 | validation.extxyz,
61 | ]
62 | future = self._train(
63 | dict(self._config),
64 | stdout=parsl.AUTO_LOGNAME,
65 | stderr=parsl.AUTO_LOGNAME,
66 | inputs=inputs,
67 | outputs=[psiflow.context().new_file("model_", ".pth")],
68 | )
69 | self.model_future = future.outputs[0]
70 |
71 | def initialize(self, dataset: Dataset) -> None:
72 | """Initializes the model based on a dataset"""
73 | assert self.model_future is None
74 | if self.do_offset:
75 | inputs = [dataset.subtract_offset(**self.atomic_energies).extxyz]
76 | else:
77 | inputs = [dataset.extxyz]
78 | future = self._initialize(
79 | self._config,
80 | stdout=parsl.AUTO_LOGNAME,
81 | stderr=parsl.AUTO_LOGNAME,
82 | inputs=inputs,
83 | outputs=[psiflow.context().new_file("model_", ".pth")],
84 | )
85 | self.model_future = future.outputs[0]
86 |
87 | def reset(self) -> None:
88 | self.model_future = None
89 |
90 | def save(
91 | self,
92 | path: Union[Path, str],
93 | ) -> None:
94 | path = psiflow.resolve_and_check(Path(path))
95 | path.mkdir(exist_ok=True)
96 |
97 | name = self.__class__.__name__
98 | path_config = path / "{}.yaml".format(name)
99 |
100 | atomic_energies = {
101 | "atomic_energies_" + key: value
102 | for key, value in self.atomic_energies.items()
103 | }
104 | save_yaml(
105 | self._config,
106 | outputs=[File(str(path_config))],
107 | **atomic_energies,
108 | )
109 | if self.model_future is not None:
110 | path_model = path / "{}.pth".format(name)
111 | copy_data_future(
112 | inputs=[self.model_future],
113 | outputs=[File(str(path_model))],
114 | )
115 |
116 | def copy(self) -> Model:
117 | model = self.__class__(**asdict(self.config))
118 | for element, energy in self.atomic_energies.items():
119 | model.add_atomic_energy(element, energy)
120 | if self.model_future is not None:
121 | model.model_future = copy_data_future(
122 | inputs=[self.model_future],
123 | outputs=[psiflow.context().new_file("model_", ".pth")],
124 | ).outputs[0]
125 | return model
126 |
127 | @property
128 | def do_offset(self) -> bool:
129 | return len(self.atomic_energies) > 0
130 |
131 | @property
132 | def seed(self) -> int:
133 | raise NotImplementedError
134 |
135 | @seed.setter
136 | def seed(self, arg) -> None:
137 | raise NotImplementedError
138 |
--------------------------------------------------------------------------------
/psiflow/order_parameters.py:
--------------------------------------------------------------------------------
1 | class OrderParameter:
2 | pass
3 |
--------------------------------------------------------------------------------
/psiflow/reference/__init__.py:
--------------------------------------------------------------------------------
1 | from ._cp2k import CP2K # noqa: F401
2 | from ._dftd3 import D3 # noqa: F401
3 | from .gpaw_ import GPAW # noqa: F401
4 | from .reference import Reference, evaluate # noqa: F401
5 |
--------------------------------------------------------------------------------
/psiflow/reference/_dftd3.py:
--------------------------------------------------------------------------------
1 | import json
2 | from functools import partial
3 |
4 | import numpy as np
5 | import typeguard
6 | from parsl.app.app import bash_app, python_app
7 | from parsl.dataflow.futures import AppFuture
8 |
9 | import psiflow
10 | from psiflow.geometry import Geometry
11 | from psiflow.reference.reference import Reference
12 | from psiflow.utils.apps import copy_app_future
13 | from psiflow.utils import TMP_COMMAND, CD_COMMAND
14 |
15 |
16 | @typeguard.typechecked
17 | def input_string(geometry: Geometry, parameters: dict, properties: tuple) -> str:
18 | geometry_str = geometry.to_string()
19 | data = {
20 | "geometry": geometry_str,
21 | "parameters": parameters,
22 | "properties": properties,
23 | }
24 | return json.dumps(data)
25 |
26 |
27 | def d3_singlepoint_pre(
28 | geometry: Geometry,
29 | parameters: dict,
30 | properties: tuple,
31 | d3_command: str,
32 | stdout: str = "",
33 | stderr: str = "",
34 | ) -> str:
35 | from psiflow.reference._dftd3 import input_string
36 | input_str = input_string(geometry, parameters, properties)
37 | command_list = [
38 | TMP_COMMAND,
39 | CD_COMMAND,
40 | f"echo '{input_str}' > input.json",
41 | f"python -u {d3_command}",
42 | ]
43 | return "\n".join(command_list)
44 |
45 |
46 | @typeguard.typechecked
47 | def d3_singlepoint_post(
48 | geometry: Geometry,
49 | inputs: list = [],
50 | ) -> Geometry:
51 | from psiflow.geometry import new_nullstate
52 |
53 | with open(inputs[0], "r") as f:
54 | lines = f.read().split("\n")
55 |
56 | geometry = new_nullstate()
57 | for i, line in enumerate(lines):
58 | if "CALCULATION SUCCESSFUL" in line:
59 | natoms = int(lines[i + 1])
60 | geometry_str = "\n".join(lines[i + 1 : i + 3 + natoms])
61 | geometry = Geometry.from_string(geometry_str)
62 | assert geometry.energy is not None
63 | geometry.stdout = inputs[0]
64 | return geometry
65 |
66 |
67 | @typeguard.typechecked
68 | @psiflow.serializable
69 | class D3(Reference):
70 | outputs: list # json does deserialize(serialize(tuple)) = list
71 | executor: str
72 | parameters: dict
73 |
74 | def __init__(
75 | self,
76 | **parameters,
77 | ):
78 | self.parameters = parameters
79 | self.outputs = ["energy", "forces"]
80 | self.executor = "default_htex"
81 | self._create_apps()
82 |
83 | def _create_apps(self):
84 | path = "psiflow.reference._dftd3"
85 | d3_command = "$(python -c 'import {}; print({}.__file__)')".format(path, path)
86 | app_pre = bash_app(d3_singlepoint_pre, executors=["default_htex"])
87 | app_post = python_app(d3_singlepoint_post, executors=["default_threads"])
88 | self.app_pre = partial(
89 | app_pre,
90 | parameters=self.parameters,
91 | properties=tuple(self.outputs),
92 | d3_command=d3_command,
93 | )
94 | self.app_post = app_post
95 |
96 | def compute_atomic_energy(self, element, box_size=None) -> AppFuture:
97 | return copy_app_future(0.0) # GPAW computes formation energy by default
98 |
99 |
100 | if __name__ == "__main__":
101 | from ase import Atoms
102 | from dftd3.ase import DFTD3
103 |
104 | with open("input.json", "r") as f:
105 | input_dict = json.loads(f.read())
106 |
107 | geometry = Geometry.from_string(input_dict["geometry"])
108 | parameters = input_dict["parameters"]
109 | properties = input_dict["properties"]
110 |
111 | atoms = Atoms(
112 | numbers=np.copy(geometry.per_atom.numbers),
113 | positions=np.copy(geometry.per_atom.positions),
114 | cell=np.copy(geometry.cell),
115 | pbc=geometry.periodic,
116 | )
117 |
118 | calculator = DFTD3(**parameters)
119 | atoms.calc = calculator
120 |
121 | if "forces" in properties:
122 | geometry.per_atom.forces[:] = atoms.get_forces()
123 | if "energy" in properties:
124 | geometry.energy = atoms.get_potential_energy()
125 |
126 | output_str = geometry.to_string()
127 | print("CALCULATION SUCCESSFUL")
128 | print(output_str)
129 |
--------------------------------------------------------------------------------
/psiflow/reference/gpaw_.py:
--------------------------------------------------------------------------------
1 | import json
2 | from functools import partial
3 | from typing import Union
4 |
5 | import numpy as np
6 | import typeguard
7 | from parsl.app.app import bash_app, python_app
8 | from parsl.dataflow.futures import AppFuture
9 |
10 | import psiflow
11 | from psiflow.geometry import Geometry, new_nullstate
12 | from psiflow.reference.reference import Reference
13 | from psiflow.utils.apps import copy_app_future
14 | from psiflow.utils import TMP_COMMAND, CD_COMMAND
15 |
16 |
17 | @typeguard.typechecked
18 | def input_string(geometry: Geometry, gpaw_parameters: dict, properties: tuple) -> str:
19 | geometry_str = geometry.to_string()
20 | data = {
21 | "geometry": geometry_str,
22 | "gpaw_parameters": gpaw_parameters,
23 | "properties": properties,
24 | }
25 | return json.dumps(data)
26 |
27 |
28 | def gpaw_singlepoint_pre(
29 | geometry: Geometry,
30 | gpaw_parameters: dict,
31 | properties: tuple,
32 | gpaw_command: str,
33 | parsl_resource_specification: dict = {},
34 | stdout: str = "",
35 | stderr: str = "",
36 | ) -> str:
37 | from psiflow.reference.gpaw_ import input_string
38 | input_str = input_string(geometry, gpaw_parameters, properties)
39 | write_command = f"echo '{input_str}' > input.json"
40 | command_list = [
41 | TMP_COMMAND,
42 | CD_COMMAND,
43 | write_command,
44 | gpaw_command,
45 | ]
46 | return "\n".join(command_list)
47 |
48 |
49 | @typeguard.typechecked
50 | def gpaw_singlepoint_post(
51 | geometry: Geometry,
52 | inputs: list = [],
53 | ) -> Geometry:
54 | with open(inputs[0], "r") as f:
55 | lines = f.read().split("\n")
56 |
57 | geometry = new_nullstate() # GPAW parsing doesn't require initial geometry
58 | for i, line in enumerate(lines):
59 | if "CALCULATION SUCCESSFUL" in line:
60 | natoms = int(lines[i + 1])
61 | geometry_str = "\n".join(lines[i + 1 : i + 3 + natoms])
62 | geometry = Geometry.from_string(geometry_str)
63 | assert geometry.energy is not None
64 | geometry.stdout = inputs[0]
65 | return geometry
66 |
67 |
68 | @typeguard.typechecked
69 | @psiflow.serializable
70 | class GPAW(Reference):
71 | outputs: list # json does deserialize(serialize(tuple)) = list
72 | executor: str
73 | parameters: dict
74 |
75 | def __init__(
76 | self,
77 | outputs: Union[tuple, list] = ("energy", "forces"),
78 | executor: str = "GPAW",
79 | **parameters,
80 | ):
81 | self.outputs = list(outputs)
82 | self.parameters = parameters
83 | self.executor = executor
84 | self._create_apps()
85 |
86 | def _create_apps(self):
87 | definition = psiflow.context().definitions[self.executor]
88 | gpaw_command = definition.command()
89 | wq_resources = definition.wq_resources()
90 | app_pre = bash_app(gpaw_singlepoint_pre, executors=[self.executor])
91 | app_post = python_app(gpaw_singlepoint_post, executors=["default_threads"])
92 | self.app_pre = partial(
93 | app_pre,
94 | gpaw_parameters=self.parameters,
95 | properties=tuple(self.outputs),
96 | gpaw_command=gpaw_command,
97 | parsl_resource_specification=wq_resources,
98 | )
99 | self.app_post = app_post
100 |
101 | def compute_atomic_energy(self, element, box_size=None) -> AppFuture:
102 | return copy_app_future(0.0) # GPAW computes formation energy by default
103 |
104 |
105 | if __name__ == "__main__":
106 | from ase import Atoms
107 | from ase.calculators.mixing import SumCalculator
108 | from ase.parallel import world
109 | from dftd3.ase import DFTD3
110 | from gpaw import GPAW as GPAWCalculator
111 |
112 | def minimal_box(
113 | atoms: Atoms,
114 | border: float = 0.0,
115 | h: float = 0.2,
116 | multiple: int = 4,
117 | ) -> None:
118 | # inspired by gpaw.cluster.Cluster
119 | if len(atoms) == 0:
120 | return None
121 | min_bounds, max_bounds = np.array(
122 | [np.minimum.reduce(atoms.positions), np.maximum.reduce(atoms.positions)]
123 | )
124 | if isinstance(border, list):
125 | b = np.array(border)
126 | else:
127 | b = np.array([border, border, border])
128 | if not hasattr(h, "__len__"):
129 | h = np.array([h, h, h])
130 | min_bounds -= b
131 | max_bounds += b - min_bounds
132 | grid_points = np.ceil(max_bounds / h / multiple) * multiple
133 | length_diff = grid_points * h - max_bounds
134 | max_bounds += length_diff
135 | min_bounds -= length_diff / 2
136 | shift = tuple(-1.0 * min_bounds)
137 | atoms.translate(shift)
138 | atoms.set_cell(tuple(max_bounds))
139 |
140 | with open("input.json", "r") as f:
141 | input_dict = json.loads(f.read())
142 |
143 | geometry = Geometry.from_string(input_dict["geometry"])
144 | gpaw_parameters = input_dict["gpaw_parameters"]
145 | properties = input_dict["properties"]
146 | d3 = gpaw_parameters.pop("d3", {})
147 |
148 | atoms = Atoms(
149 | numbers=np.copy(geometry.per_atom.numbers),
150 | positions=np.copy(geometry.per_atom.positions),
151 | cell=np.copy(geometry.cell),
152 | pbc=geometry.periodic,
153 | )
154 | if not geometry.periodic:
155 | minimal_box(
156 | atoms,
157 | gpaw_parameters.get("h", 0.2),
158 | gpaw_parameters.pop("minimal_box_border", 2), # if present, remove
159 | gpaw_parameters.pop("minimal_box_multiple", 4),
160 | )
161 |
162 | calculator = GPAWCalculator(**gpaw_parameters)
163 | if len(d3) > 0:
164 | calculator = SumCalculator([calculator, DFTD3(**d3)])
165 | atoms.calc = calculator
166 |
167 | if "forces" in properties:
168 | geometry.per_atom.forces[:] = atoms.get_forces()
169 | if "energy" in properties:
170 | geometry.energy = atoms.get_potential_energy()
171 |
172 | output_str = geometry.to_string()
173 | if world.rank == 0:
174 | print("CALCULATION SUCCESSFUL")
175 | print(output_str)
176 |
--------------------------------------------------------------------------------
/psiflow/reference/orca.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/psiflow/reference/orca.py
--------------------------------------------------------------------------------
/psiflow/reference/reference.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations # necessary for type-guarding class methods
2 |
3 | import logging
4 | from typing import ClassVar, Optional, Union
5 |
6 | import numpy as np
7 | import parsl
8 | import typeguard
9 | from ase.data import atomic_numbers
10 | from parsl.app.app import join_app, python_app
11 | from parsl.dataflow.futures import AppFuture
12 |
13 | import psiflow
14 | from psiflow.data import Computable, Dataset
15 | from psiflow.geometry import Geometry, NullState
16 | from psiflow.utils.apps import copy_app_future, unpack_i
17 |
18 | logger = logging.getLogger(__name__) # logging per module
19 |
20 |
21 | @typeguard.typechecked
22 | def _extract_energy(state: Geometry):
23 | if state.energy is None:
24 | return 1e10
25 | else:
26 | return state.energy
27 |
28 |
29 | extract_energy = python_app(_extract_energy, executors=["default_threads"])
30 |
31 |
32 | @join_app
33 | @typeguard.typechecked
34 | def get_minimum_energy(element, configs, *energies):
35 | logger.info("atomic energies for element {}:".format(element))
36 | for config, energy in zip(configs, energies):
37 | logger.info("\t{} eV; ".format(energy) + str(config))
38 | energy = min(energies)
39 | assert not energy == 1e10, "atomic energy calculation of {} failed".format(element)
40 | return copy_app_future(energy)
41 |
42 |
43 | @typeguard.typechecked
44 | def _nan_if_unsuccessful(
45 | geometry: Geometry,
46 | result: Geometry,
47 | ) -> Geometry:
48 | if result == NullState:
49 | geometry.energy = None
50 | geometry.per_atom.forces[:] = np.nan
51 | geometry.per_atom.stress = None
52 | geometry.stdout = result.stdout
53 | return geometry
54 | else:
55 | return result
56 |
57 |
58 | nan_if_unsuccessful = python_app(_nan_if_unsuccessful, executors=["default_threads"])
59 |
60 |
61 | @join_app
62 | @typeguard.typechecked
63 | def evaluate(
64 | geometry: Geometry,
65 | reference: Reference,
66 | ) -> AppFuture:
67 | if geometry == NullState:
68 | return copy_app_future(NullState)
69 | else:
70 | future = reference.app_pre(
71 | geometry,
72 | stdout=parsl.AUTO_LOGNAME,
73 | stderr=parsl.AUTO_LOGNAME,
74 | )
75 | result = reference.app_post(
76 | geometry=geometry.copy(),
77 | inputs=[future.stdout, future.stderr, future],
78 | )
79 | return nan_if_unsuccessful(geometry, result)
80 |
81 |
82 | @join_app
83 | @typeguard.typechecked
84 | def compute_dataset(
85 | dataset: Dataset,
86 | length: int,
87 | reference: Reference,
88 | ) -> AppFuture:
89 | from psiflow.data.utils import extract_quantities
90 |
91 | geometries = dataset.geometries() # read it once
92 | evaluated = [evaluate(unpack_i(geometries, i), reference) for i in range(length)]
93 | future = extract_quantities(
94 | tuple(reference.outputs),
95 | None,
96 | None,
97 | *evaluated,
98 | )
99 | return future
100 |
101 |
102 | @typeguard.typechecked
103 | @psiflow.serializable
104 | class Reference(Computable):
105 | outputs: tuple
106 | batch_size: ClassVar[int] = 1 # not really used
107 |
108 | def compute(
109 | self,
110 | arg: Union[Dataset, Geometry, AppFuture, list],
111 | *outputs: Optional[Union[str, tuple]],
112 | ):
113 | if isinstance(arg, Dataset):
114 | dataset = arg
115 | elif isinstance(arg, list):
116 | dataset = Dataset(arg)
117 | elif isinstance(arg, AppFuture) or isinstance(arg, Geometry):
118 | dataset = Dataset([arg])
119 | compute_outputs = compute_dataset(dataset, dataset.length(), self)
120 | if len(outputs) == 0:
121 | outputs_ = tuple(self.outputs)
122 | else:
123 | outputs_ = outputs
124 | to_return = []
125 | for output in outputs_:
126 | if output not in self.outputs:
127 | raise ValueError("output {} not in {}".format(output, self.outputs))
128 | index = self.outputs.index(output)
129 | to_return.append(compute_outputs[index])
130 | if len(outputs_) == 1:
131 | return to_return[0]
132 | else:
133 | return to_return
134 |
135 | def compute_atomic_energy(self, element, box_size=None):
136 | energies = []
137 | references = self.get_single_atom_references(element)
138 | configs = [c for c, _ in references]
139 | if box_size is not None:
140 | state = Geometry.from_data(
141 | numbers=np.array([atomic_numbers[element]]),
142 | positions=np.array([[0, 0, 0]]),
143 | cell=np.eye(3) * box_size,
144 | )
145 | else:
146 | state = Geometry(
147 | numbers=np.array([atomic_numbers[element]]),
148 | positions=np.array([[0, 0, 0]]),
149 | cell=np.zeros((3, 3)),
150 | )
151 | for _, reference in references:
152 | energies.append(extract_energy(evaluate(state, reference)))
153 | return get_minimum_energy(element, configs, *energies)
154 |
155 | def get_single_atom_references(self, element):
156 | return [(None, self)]
157 |
--------------------------------------------------------------------------------
/psiflow/sampling/__init__.py:
--------------------------------------------------------------------------------
1 | from .metadynamics import Metadynamics # noqa: F401
2 | # from .optimize import optimize, optimize_dataset # noqa: F401
3 | from .output import SimulationOutput # noqa: F401
4 | from .sampling import sample # noqa: F401
5 | from .walker import ReplicaExchange # noqa: F401
6 | from .walker import Walker # noqa: F401
7 | from .walker import quench # noqa: F401
8 | from .walker import randomize # noqa: F401
9 | from .walker import replica_exchange # noqa: F401
10 |
--------------------------------------------------------------------------------
/psiflow/sampling/_ase.py:
--------------------------------------------------------------------------------
1 | """
2 | Structure optimisation through ASE
3 | TODO: do we need to check for very large forces?
4 | TODO: what units are pressure?
5 | TODO: what to do when max_steps is reached before converging?
6 | TODO: timeout is duplicated code
7 | """
8 |
9 | import os
10 | import json
11 | import warnings
12 | import signal
13 | import argparse
14 | from pathlib import Path
15 | from types import SimpleNamespace
16 |
17 | import ase
18 | import ase.io
19 | import numpy as np
20 | from ase.io.extxyz import save_calc_results
21 | from ase.calculators.calculator import Calculator, all_properties
22 | from ase.calculators.mixing import LinearCombinationCalculator
23 | from ase.optimize.precon import PreconLBFGS
24 | from ase.filters import FrechetCellFilter
25 |
26 | from psiflow.geometry import Geometry
27 | from psiflow.functions import function_from_json, EnergyFunction
28 | from psiflow.sampling.utils import TimeoutException, timeout_handler
29 |
30 |
31 | ALLOWED_MODES: tuple[str, ...] = ('full', 'fix_volume', 'fix_shape', 'fix_cell')
32 | FILE_OUT: str = 'out.xyz'
33 | FILE_TRAJ: str = 'out.traj'
34 |
35 |
36 | class FunctionCalculator(Calculator):
37 | implemented_properties = ['energy', 'free_energy', 'forces', 'stress']
38 |
39 | def __init__(self, function: EnergyFunction, **kwargs):
40 | super().__init__(**kwargs)
41 | self.function = function
42 |
43 | def calculate(
44 | self,
45 | atoms=None,
46 | properties=all_properties,
47 | system_changes=None,
48 | ):
49 | super().calculate(atoms, properties, system_changes)
50 | geometry = Geometry.from_atoms(self.atoms)
51 | self.results = self.function(geometry)
52 | self.results['free_energy'] = self.results['energy'] # required by optimiser
53 |
54 |
55 | def log_state(atoms: ase.Atoms) -> None:
56 | """"""
57 | def make_log(data: list[tuple[str]]):
58 | """"""
59 | txt = ['', 'Current atoms state:']
60 | txt += [f'{_[0]:<15}: {_[1]:<25}[{_[2]}]' for _ in data]
61 | txt += 'End', ''
62 | print(*txt, sep='\n')
63 |
64 | data = []
65 | if atoms.calc:
66 | energy, max_force = atoms.get_potential_energy(), np.linalg.norm(atoms.get_forces(), axis=0).max()
67 | else:
68 | energy, max_force = [np.nan] * 2
69 | data += ('Energy', f'{energy:.2f}', 'eV'), ('Max. force', f'{max_force:.2E}', 'eV/A')
70 |
71 | if not all(atoms.pbc):
72 | make_log(data)
73 | return
74 |
75 | volume, cell = atoms.get_volume(), atoms.get_cell().cellpar().round(3)
76 | data += ('Cell volume', f'{atoms.get_volume():.2f}', 'A^3'),
77 | data += ('Box norms', str(cell[:3])[1:-1], 'A'), ('Box angles', str(cell[3:])[1:-1], 'degrees')
78 |
79 | make_log(data)
80 | return
81 |
82 |
83 | def get_dof_filter(atoms: ase.Atoms, mode: str, pressure: float) -> ase.Atoms | FrechetCellFilter:
84 | """"""
85 | if mode == 'fix_cell':
86 | if pressure:
87 | warnings.warn('Ignoring external pressure..')
88 | return atoms
89 | kwargs = {'mask': [True] * 6, 'scalar_pressure': pressure} # enable cell DOFs
90 | if mode == 'fix_shape':
91 | kwargs['hydrostatic_strain'] = True
92 | if mode == 'fix_volume':
93 | kwargs['constant_volume'] = True
94 | if pressure:
95 | warnings.warn('Ignoring applied pressure during fixed volume optimisation..')
96 | return FrechetCellFilter(atoms, **kwargs)
97 |
98 |
99 | def run(args: SimpleNamespace):
100 | """"""
101 | config = json.load(Path(args.input_config).open('r'))
102 |
103 | atoms = ase.io.read(args.start_xyz)
104 | if not any(atoms.pbc):
105 | atoms.center(vacuum=0) # optimiser mysteriously requires a nonzero unit cell
106 | if config['mode'] != 'fix_cell':
107 | config['mode'] = 'fix_cell'
108 | warnings.warn('Molecular structure is not periodic. Ignoring cell..')
109 |
110 | # construct calculator by combining hamiltonians
111 | assert args.path_hamiltonian is not None
112 | print('Making calculator from:', *config['forces'], sep='\n')
113 | functions = [function_from_json(p) for p in args.path_hamiltonian]
114 | calc = LinearCombinationCalculator(
115 | [FunctionCalculator(f) for f in functions],
116 | [float(h['weight']) for h in config['forces']]
117 | )
118 |
119 | atoms.calc = calc
120 | dof = get_dof_filter(atoms, config['mode'], config['pressure'])
121 | opt = PreconLBFGS(dof, trajectory=FILE_TRAJ if config['keep_trajectory'] else None)
122 |
123 | print(f"pid: {os.getpid()}")
124 | print(f"CPU affinity: {os.sched_getaffinity(os.getpid())}")
125 | log_state(atoms)
126 | try:
127 | opt.run(fmax=config['f_max'], steps=config['max_steps'])
128 | except TimeoutException:
129 | print('OPTIMISATION TIMEOUT')
130 | # TODO: what to do here?
131 | return
132 |
133 | log_state(atoms)
134 | save_calc_results(atoms, calc_prefix='', remove_atoms_calc=True)
135 | if not any(atoms.pbc):
136 | atoms.cell = None # remove meaningless cell
137 | ase.io.write(FILE_OUT, atoms)
138 | print('OPTIMISATION SUCCESSFUL')
139 | return
140 |
141 |
142 | def clean(args: SimpleNamespace):
143 | """"""
144 | from psiflow.data.utils import _write_frames
145 |
146 | geometry = Geometry.load(FILE_OUT)
147 | _write_frames(geometry, outputs=[args.output_xyz])
148 | if Path(FILE_TRAJ).is_file():
149 | traj = [at for at in ase.io.trajectory.Trajectory(FILE_TRAJ)]
150 | geometries = [Geometry.from_atoms(at) for at in traj]
151 | _write_frames(*geometries, outputs=[args.output_traj])
152 | print('FILES MOVED')
153 | return
154 |
155 |
156 | def main():
157 | signal.signal(signal.SIGTERM, timeout_handler)
158 | parser = argparse.ArgumentParser()
159 | subparsers = parser.add_subparsers(help='what to do', dest='action')
160 | run_parser = subparsers.add_parser("run")
161 | run_parser.set_defaults(func=run)
162 | run_parser.add_argument(
163 | "--path_hamiltonian",
164 | action='extend',
165 | nargs='*',
166 | type=str,
167 | )
168 | run_parser.add_argument(
169 | "--input_config",
170 | type=str,
171 | default=None,
172 | )
173 | run_parser.add_argument(
174 | "--start_xyz",
175 | type=str,
176 | default=None,
177 | )
178 | clean_parser = subparsers.add_parser("clean")
179 | clean_parser.set_defaults(func=clean)
180 | clean_parser.add_argument(
181 | "--output_xyz",
182 | type=str,
183 | default=None,
184 | )
185 | clean_parser.add_argument(
186 | "--output_traj",
187 | type=str,
188 | default=None,
189 | )
190 | args = parser.parse_args()
191 | args.func(args)
192 |
193 |
194 |
--------------------------------------------------------------------------------
/psiflow/sampling/ase.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations # necessary for type-guarding class methods
2 |
3 | from typing import Optional, Union
4 |
5 | import parsl
6 | import typeguard
7 | from parsl.app.app import bash_app, join_app
8 | from parsl.dataflow.futures import AppFuture, DataFuture
9 |
10 | import psiflow
11 | from psiflow.data import Dataset
12 | from psiflow.data.utils import write_frames
13 | from psiflow.geometry import Geometry
14 | from psiflow.hamiltonians import Hamiltonian
15 | from psiflow.utils.io import dump_json
16 | from psiflow.sampling.sampling import serialize_mixture, label_forces
17 | from psiflow.utils import TMP_COMMAND, CD_COMMAND
18 |
19 | from ._ase import ALLOWED_MODES
20 |
21 | EXECUTABLE = 'psiflow-ase-opt' # not stored in ModelEvaluation (yet?)
22 |
23 |
24 | def _execute_ase(
25 | command_launch: str,
26 | inputs: list[DataFuture],
27 | outputs: list[DataFuture],
28 | env_vars: dict = {},
29 | stdout: str = "",
30 | stderr: str = "",
31 | parsl_resource_specification: Optional[dict] = None,
32 | ) -> str:
33 | env_command = 'export ' + ' '.join([f"{name}={value}" for name, value in env_vars.items()])
34 | command_start = ' '.join([
35 | f'{command_launch} run --input_config={inputs[0].filepath} --start_xyz={inputs[1].filepath}',
36 | *[f'--path_hamiltonian={future.filepath}' for future in inputs[2:]], '&'
37 | ])
38 | command_end = f'{command_launch} clean --output_xyz={outputs[0].filepath}'
39 | if len(outputs) == 2:
40 | command_end += f' --output_traj={outputs[1].filepath}'
41 |
42 | command_list = [
43 | TMP_COMMAND,
44 | CD_COMMAND,
45 | env_command,
46 | command_start,
47 | "wait",
48 | command_end,
49 | ]
50 | return "\n".join(command_list)
51 |
52 |
53 | execute_ase = bash_app(_execute_ase, executors=["ModelEvaluation"])
54 |
55 |
56 | @typeguard.typechecked
57 | def optimize(
58 | state: Union[Geometry, AppFuture],
59 | hamiltonian: Hamiltonian,
60 | mode: str = 'full',
61 | steps: int = int(1e12),
62 | keep_trajectory: bool = False,
63 | pressure: float = 0,
64 | f_max: float = 1e-3,
65 | ) -> Union[AppFuture, tuple[AppFuture, Dataset]]:
66 |
67 | assert mode in ALLOWED_MODES
68 | assert steps > 0
69 | assert f_max > 0
70 |
71 | context = psiflow.context()
72 | definition = context.definitions["ModelEvaluation"]
73 |
74 | command_list = [EXECUTABLE]
75 | if definition.max_simulation_time is not None:
76 | max_time = 0.9 * (60 * definition.max_simulation_time)
77 | command_list = ["timeout -s 15 {}s".format(max_time), *command_list]
78 | command_launch = " ".join(command_list)
79 |
80 | input_geometry = Dataset([state]).extxyz
81 | hamiltonian = 1.0 * hamiltonian # convert to mixture
82 | names, coeffs = label_forces(hamiltonian), hamiltonian.coefficients
83 | input_forces = serialize_mixture(hamiltonian, dtype="float64") # double precision for MLPs
84 | forces = [
85 | dict(forcefield=n, weight=str(c), file=f.filename) for n, c, f in zip(names, coeffs, input_forces)
86 | ]
87 |
88 | config = dict(
89 | task='ASE optimisation',
90 | forces=forces,
91 | mode=mode,
92 | f_max=f_max,
93 | pressure=pressure,
94 | max_steps=steps,
95 | keep_trajectory=keep_trajectory,
96 | )
97 | input_future = dump_json(
98 | outputs=[context.new_file("input_", ".json")],
99 | **config,
100 | ).outputs[0]
101 | inputs = [input_future, input_geometry, *input_forces]
102 |
103 | outputs = [context.new_file("data_", ".xyz")]
104 | if keep_trajectory:
105 | outputs.append(context.new_file("opt_", ".xyz"))
106 |
107 | result = execute_ase(
108 | command_launch=command_launch,
109 | env_vars=definition.env_vars,
110 | inputs=inputs,
111 | outputs=outputs,
112 | stdout=parsl.AUTO_LOGNAME,
113 | stderr=parsl.AUTO_LOGNAME,
114 | parsl_resource_specification=definition.wq_resources(1),
115 | )
116 |
117 | final = Dataset(None, result.outputs[0])[-1]
118 | if keep_trajectory:
119 | trajectory = Dataset(None, result.outputs[1])
120 | return final, trajectory
121 | else:
122 | return final
123 |
124 |
125 | @join_app
126 | @typeguard.typechecked
127 | def _optimize_dataset(
128 | geometries: list[Geometry], *args, outputs: list = [], **kwargs
129 | ) -> AppFuture:
130 | assert not kwargs.get("keep_trajectory", False)
131 | optimized = []
132 | for geometry in geometries:
133 | optimized.append(optimize(geometry, *args, **kwargs))
134 | return write_frames(*optimized, outputs=[outputs[0]])
135 |
136 |
137 | @typeguard.typechecked
138 | def optimize_dataset(dataset: Dataset, *args, **kwargs) -> Dataset:
139 | extxyz = _optimize_dataset(
140 | dataset.geometries(),
141 | *args,
142 | outputs=[psiflow.context().new_file("data_", ".xyz")],
143 | **kwargs,
144 | ).outputs[0]
145 | return Dataset(None, extxyz)
146 |
--------------------------------------------------------------------------------
/psiflow/sampling/client.py:
--------------------------------------------------------------------------------
1 | # top level imports should be lightweight!
2 | import os
3 |
4 |
5 | class SocketNotFoundException(Exception):
6 | pass
7 |
8 |
9 | def wait_for_socket(address: 'Path', timeout: float = 10, interval: float = 0.1) -> None:
10 | """"""
11 | import time
12 | while not address.exists():
13 | time.sleep(interval)
14 | timeout -= interval
15 | if timeout < 0:
16 | raise SocketNotFoundException(f'Could not find socket "{address}" to connect to..')
17 | return
18 |
19 |
20 | def main():
21 | import argparse
22 | import time
23 | from pathlib import Path
24 |
25 | from ase.io import read
26 | from ipi._driver.driver import run_driver
27 |
28 | from psiflow.functions import function_from_json
29 | from psiflow.geometry import Geometry
30 | from psiflow.sampling.utils import ForceMagnitudeException, FunctionDriver
31 |
32 | print("OS environment values:")
33 | for key, value in os.environ.items():
34 | print(key, value)
35 | parser = argparse.ArgumentParser()
36 | parser.add_argument(
37 | "--path_hamiltonian",
38 | type=str,
39 | default=None,
40 | )
41 | parser.add_argument(
42 | "--device",
43 | type=str,
44 | default=None,
45 | )
46 | parser.add_argument(
47 | "--dtype",
48 | type=str,
49 | default=None,
50 | )
51 | parser.add_argument(
52 | "--address",
53 | type=str,
54 | default=None,
55 | )
56 | parser.add_argument(
57 | "--start",
58 | type=str,
59 | default=None,
60 | )
61 | parser.add_argument(
62 | "--max_force",
63 | type=float,
64 | default=None,
65 | )
66 | args = parser.parse_args()
67 | assert args.path_hamiltonian is not None
68 | assert args.address is not None
69 | assert args.start is not None
70 |
71 | print("pid: {}".format(os.getpid()))
72 | affinity = os.sched_getaffinity(os.getpid())
73 | print("CPU affinity before function init: {}".format(affinity))
74 |
75 | template = Geometry.from_atoms(read(args.start))
76 | function = function_from_json(
77 | args.path_hamiltonian,
78 | device=args.device,
79 | dtype=args.dtype,
80 | )
81 |
82 | driver = FunctionDriver(
83 | template=template,
84 | function=function,
85 | max_force=args.max_force,
86 | verbose=True,
87 | )
88 |
89 | affinity = os.sched_getaffinity(os.getpid())
90 | print("CPU affinity after function init: {}".format(affinity))
91 | try:
92 | t0 = time.time()
93 | for _ in range(10):
94 | function(template) # torch warm-up before simulation
95 | print("time for 10 evaluations: {}".format(time.time() - t0))
96 | socket_address = Path.cwd() / args.address
97 | wait_for_socket(socket_address)
98 | run_driver(
99 | unix=True,
100 | address=str(socket_address),
101 | driver=driver,
102 | sockets_prefix="",
103 | )
104 | except ForceMagnitudeException as e:
105 | print(e) # induce timeout in server
106 | except ConnectionResetError as e: # some other client induced a timeout
107 | print(e)
108 | except SocketNotFoundException as e:
109 | print(e, *list(Path.cwd().iterdir()), sep='\n') # server-side socket not found
110 |
111 |
--------------------------------------------------------------------------------
/psiflow/sampling/metadynamics.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations # necessary for type-guarding class methods
2 |
3 | from pathlib import Path
4 | from typing import Optional, Union
5 |
6 | import typeguard
7 | from parsl.data_provider.files import File
8 | from parsl.dataflow.futures import AppFuture
9 |
10 | import psiflow
11 | from psiflow.utils._plumed import remove_comments_printflush, set_path_in_plumed
12 | from psiflow.utils.apps import copy_app_future, copy_data_future
13 |
14 |
15 | @typeguard.typechecked
16 | @psiflow.serializable
17 | class Metadynamics:
18 | _plumed_input: str
19 | external: Optional[psiflow._DataFuture]
20 |
21 | def __init__(
22 | self,
23 | plumed_input: str,
24 | external: Union[None, str, Path, psiflow._DataFuture] = None,
25 | ):
26 | _plumed_input = remove_comments_printflush(plumed_input)
27 | assert "METAD" in _plumed_input
28 | if "RESTART" not in _plumed_input:
29 | _plumed_input = "\nRESTART\n" + _plumed_input
30 | if "FLUSH" not in _plumed_input: # add at the end!
31 | _plumed_input = _plumed_input + "\nFLUSH STRIDE=1\nPRINT"
32 |
33 | # PLUMED + WQ cannot deal with nonexisting hills files!
34 | if type(external) in [str, Path]:
35 | external = File(str(external))
36 | Path(external).touch()
37 | if external is None:
38 | external = psiflow.context().new_file("hills_", ".txt")
39 | Path(external.filepath).touch()
40 | else:
41 | assert external.filepath in _plumed_input
42 | Path(external.filepath).touch()
43 | _plumed_input = set_path_in_plumed(
44 | _plumed_input,
45 | "METAD",
46 | "PLACEHOLDER",
47 | )
48 | self._plumed_input = _plumed_input
49 | self.external = external
50 |
51 | def plumed_input(self):
52 | plumed_input = self._plumed_input
53 | plumed_input = plumed_input.replace("PLACEHOLDER", self.external.filepath)
54 | return plumed_input
55 |
56 | def input(self) -> AppFuture:
57 | return copy_app_future(self.plumed_input(), inputs=[self.external])
58 |
59 | def wait_for(self, result: AppFuture) -> None:
60 | self.external = copy_app_future(
61 | 0,
62 | inputs=[result, self.external],
63 | outputs=[File(self.external.filepath)],
64 | ).outputs[0]
65 |
66 | def reset(self) -> None:
67 | self.external = psiflow.context().new_file("hills_", ".txt")
68 |
69 | def __eq__(self, other) -> bool:
70 | if type(other) is not Metadynamics:
71 | return False
72 | return self.plumed_input() == other.plumed_input()
73 |
74 | def copy(self) -> Metadynamics:
75 | new_external = copy_data_future(
76 | inputs=[self.external],
77 | outputs=[psiflow.context().new_file("hills_", ".txt")],
78 | ).outputs[0]
79 | mtd = Metadynamics(
80 | str(self.plumed_input()),
81 | )
82 | assert "PLACEHOLDER" in mtd._plumed_input # instead of original filepath
83 | mtd.external = new_external
84 | return mtd
85 |
--------------------------------------------------------------------------------
/psiflow/sampling/optimize.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations # necessary for type-guarding class methods
2 |
3 | import xml.etree.ElementTree as ET
4 | from typing import Optional, Union
5 |
6 | import parsl
7 | import typeguard
8 | from ase.units import Bohr, Ha
9 | from parsl.app.app import bash_app, join_app
10 | from parsl.dataflow.futures import AppFuture
11 |
12 | import psiflow
13 | from psiflow.data import Dataset
14 | from psiflow.data.utils import write_frames
15 | from psiflow.geometry import Geometry
16 | from psiflow.hamiltonians import Hamiltonian
17 | from psiflow.sampling.sampling import setup_sockets, make_start_command, make_client_command
18 | from psiflow.utils.io import save_xml
19 | from psiflow.utils import TMP_COMMAND, CD_COMMAND
20 |
21 |
22 | @typeguard.typechecked
23 | def setup_forces(hamiltonian: Hamiltonian) -> tuple[dict[str, Hamiltonian], ET.Element]:
24 | hamiltonian = 1.0 * hamiltonian # convert to mixture
25 | counts = {}
26 | hamiltonians_map = {}
27 | forces = ET.Element("forces")
28 | for h, c in zip(hamiltonian.hamiltonians, hamiltonian.coefficients):
29 | name = h.__class__.__name__
30 | if name not in counts:
31 | counts[name] = 0
32 | count = counts.get(name)
33 | counts[name] += 1
34 | force = ET.Element("force", forcefield=name + str(count), weight=str(c))
35 | forces.append(force)
36 | hamiltonians_map[name + str(count)] = h
37 | return hamiltonians_map, forces
38 |
39 |
40 | @typeguard.typechecked
41 | def setup_motion(
42 | mode: str,
43 | etol: float,
44 | ptol: float,
45 | ftol: float,
46 | ) -> ET.Element:
47 | motion = ET.Element("motion", mode="minimize")
48 | optimizer = ET.Element("optimizer", mode=mode)
49 | tolerances = ET.Element("tolerances")
50 |
51 | energy = ET.Element("energy")
52 | energy.text = " {} ".format(etol / Ha)
53 | tolerances.append(energy)
54 | position = ET.Element("position")
55 | position.text = " {} ".format(ptol / Bohr)
56 | tolerances.append(position)
57 | force = ET.Element("force")
58 | force.text = " {} ".format(ftol / Ha * Bohr)
59 | tolerances.append(force)
60 | optimizer.append(tolerances)
61 | motion.append(optimizer)
62 | return motion
63 |
64 |
65 | @typeguard.typechecked
66 | def setup_output(keep_trajectory: bool) -> ET.Element:
67 | output = ET.Element("output", prefix="output")
68 | checkpoint = ET.Element(
69 | "checkpoint",
70 | filename="checkpoint",
71 | stride="1",
72 | overwrite="True",
73 | )
74 | output.append(checkpoint)
75 | if keep_trajectory:
76 | trajectory = ET.Element( # needed in any case
77 | "trajectory",
78 | stride="1",
79 | format="ase",
80 | filename="trajectory",
81 | bead="0",
82 | )
83 | trajectory.text = r" positions "
84 | output.append(trajectory)
85 | return output
86 |
87 |
88 | def _execute_ipi(
89 | hamiltonian_names: list[str],
90 | client_args: list[list[str]],
91 | keep_trajectory: bool,
92 | command_server: str,
93 | command_client: str,
94 | env_vars: dict = {},
95 | stdout: str = "",
96 | stderr: str = "",
97 | inputs: list = [],
98 | outputs: list = [],
99 | parsl_resource_specification: Optional[dict] = None,
100 | ) -> str:
101 | env_command = 'export ' + ' '.join([f"{name}={value}" for name, value in env_vars.items()])
102 | command_start = make_start_command(command_server, inputs[0], inputs[1])
103 | commands_client = []
104 | for i, name in enumerate(hamiltonian_names):
105 | args = client_args[i]
106 | assert len(args) == 1 # only have one client per hamiltonian
107 | for arg in args:
108 | commands_client += make_client_command(command_client, name, inputs[2 + i], inputs[1], arg),
109 |
110 | command_end = f'{command_server} --cleanup --output_xyz={outputs[0].filepath}'
111 | command_copy = f'cp walker-0_output.trajectory_0.ase {outputs[1].filepath}' if keep_trajectory else ''
112 | command_list = [
113 | TMP_COMMAND,
114 | CD_COMMAND,
115 | env_command,
116 | command_start,
117 | *commands_client,
118 | "wait",
119 | command_end,
120 | command_copy,
121 | ]
122 | return "\n".join(command_list)
123 |
124 |
125 | execute_ipi = bash_app(_execute_ipi, executors=["ModelEvaluation"])
126 |
127 |
128 | @typeguard.typechecked
129 | def optimize(
130 | state: Union[Geometry, AppFuture],
131 | hamiltonian: Hamiltonian,
132 | steps: int = 5000,
133 | keep_trajectory: bool = False,
134 | mode: str = "lbfgs",
135 | etol: float = 1e-3,
136 | ptol: float = 1e-5,
137 | ftol: float = 1e-3,
138 | ) -> Union[AppFuture, tuple[AppFuture, Dataset]]:
139 | hamiltonians_map, forces = setup_forces(hamiltonian)
140 | sockets = setup_sockets(hamiltonians_map)
141 |
142 | initialize = ET.Element("initialize", nbeads="1")
143 | start = ET.Element("file", mode="ase", cell_units="angstrom")
144 | start.text = " start_0.xyz "
145 | initialize.append(start)
146 | motion = setup_motion(mode, etol, ptol, ftol)
147 |
148 | system = ET.Element("system", prefix="walker-0")
149 | system.append(initialize)
150 | system.append(motion)
151 | system.append(forces)
152 |
153 | output = setup_output(keep_trajectory)
154 |
155 | simulation = ET.Element("simulation", mode="static")
156 | simulation.append(output)
157 | for socket in sockets:
158 | simulation.append(socket)
159 | simulation.append(system)
160 | total_steps = ET.Element("total_steps")
161 | total_steps.text = " {} ".format(steps)
162 | simulation.append(total_steps)
163 |
164 | context = psiflow.context()
165 | definition = context.definitions["ModelEvaluation"]
166 | input_future = save_xml(
167 | simulation,
168 | outputs=[context.new_file("input_", ".xml")],
169 | ).outputs[0]
170 | inputs = [
171 | input_future,
172 | Dataset([state]).extxyz,
173 | ]
174 | inputs += [h.serialize_function(dtype="float64") for h in hamiltonians_map.values()]
175 |
176 | hamiltonian_names = list(hamiltonians_map.keys())
177 | client_args = []
178 | for name in hamiltonian_names:
179 | args = definition.get_client_args(name, 1, "minimize")
180 | client_args.append(args)
181 | outputs = [context.new_file("data_", ".xyz")]
182 | if keep_trajectory:
183 | outputs.append(context.new_file("opt_", ".xyz"))
184 |
185 | command_server = definition.server_command()
186 | command_client = definition.client_command()
187 | resources = definition.wq_resources(1)
188 |
189 | result = execute_ipi(
190 | hamiltonian_names,
191 | client_args,
192 | keep_trajectory,
193 | command_server,
194 | command_client,
195 | env_vars=definition.env_vars,
196 | stdout=parsl.AUTO_LOGNAME,
197 | stderr=parsl.AUTO_LOGNAME,
198 | inputs=inputs,
199 | outputs=outputs,
200 | parsl_resource_specification=resources,
201 | )
202 |
203 | final = Dataset(None, result.outputs[0]).evaluate(hamiltonian)[-1]
204 | if keep_trajectory:
205 | trajectory = Dataset(None, result.outputs[1])
206 | return final, trajectory
207 | else:
208 | return final
209 |
210 |
211 | @join_app
212 | @typeguard.typechecked
213 | def _optimize_dataset(
214 | geometries: list[Geometry], *args, outputs: list = [], **kwargs
215 | ) -> AppFuture:
216 | assert not kwargs.get("keep_trajectory", False)
217 | optimized = []
218 | for geometry in geometries:
219 | optimized.append(optimize(geometry, *args, **kwargs))
220 | return write_frames(*optimized, outputs=[outputs[0]])
221 |
222 |
223 | @typeguard.typechecked
224 | def optimize_dataset(dataset: Dataset, *args, **kwargs) -> Dataset:
225 | extxyz = _optimize_dataset(
226 | dataset.geometries(),
227 | *args,
228 | outputs=[psiflow.context().new_file("data_", ".xyz")],
229 | **kwargs,
230 | ).outputs[0]
231 | return Dataset(None, extxyz)
232 |
--------------------------------------------------------------------------------
/psiflow/sampling/order.py:
--------------------------------------------------------------------------------
1 | """
2 | TODO: these imports are outdated.. Is this module still used?
3 | """
4 | from __future__ import annotations # necessary for type-guarding class methods
5 |
6 | from functools import partial
7 | from typing import Optional, Union
8 |
9 | import typeguard
10 | from ase.units import kJ, mol
11 | from parsl.app.app import python_app
12 | from parsl.dataflow.futures import AppFuture
13 |
14 | import psiflow
15 | from psiflow.data import Dataset, batch_apply
16 | from psiflow.geometry import Geometry
17 | from psiflow.hamiltonians._plumed import PlumedHamiltonian
18 | from psiflow.hamiltonians.hamiltonian import Hamiltonian
19 |
20 |
21 | @typeguard.typechecked
22 | def insert_in_state(
23 | state: Geometry,
24 | name: str,
25 | ) -> Geometry:
26 | value = state.energy
27 | state.order[name] = value
28 | state.energy = None
29 | return state
30 |
31 |
32 | @typeguard.typechecked
33 | def _insert(
34 | state_or_states: Union[Geometry, list[Geometry]],
35 | name: str,
36 | ) -> Union[list[Geometry], Geometry]:
37 | if not isinstance(state_or_states, list):
38 | return insert_in_state(state_or_states, name)
39 | else:
40 | for state in state_or_states:
41 | insert_in_state(state, name) # modify list in place
42 | return state_or_states
43 |
44 |
45 | insert = python_app(_insert, executors=["default_threads"])
46 |
47 |
48 | @typeguard.typechecked
49 | def insert_in_dataset(
50 | data: Dataset,
51 | name: str,
52 | ) -> Dataset:
53 | geometries = insert(
54 | data.geometries(),
55 | name,
56 | )
57 | return Dataset(geometries)
58 |
59 |
60 | @typeguard.typechecked
61 | class OrderParameter:
62 | # TODO: batched evaluation
63 |
64 | def __init__(self, name: str):
65 | self.name = name
66 |
67 | def evaluate(self, state: Union[Geometry, AppFuture]) -> AppFuture:
68 | raise NotImplementedError
69 |
70 | def __eq__(self, other):
71 | raise NotImplementedError
72 |
73 |
74 | @typeguard.typechecked
75 | @psiflow.serializable
76 | class HamiltonianOrderParameter(OrderParameter):
77 | name: str
78 | hamiltonian: Hamiltonian
79 |
80 | def __init__(self, name: str, hamiltonian: Hamiltonian):
81 | super().__init__(name)
82 | self.hamiltonian = hamiltonian
83 |
84 | def evaluate(
85 | self,
86 | arg: Union[Dataset, Geometry, AppFuture[Geometry]],
87 | batch_size: Optional[int] = 100,
88 | ) -> Union[Dataset, AppFuture]:
89 | if isinstance(arg, Dataset):
90 | # avoid batching the dataset twice:
91 | # apply hamiltonian in batched sense and put insert afterwards
92 | funcs = [
93 | self.hamiltonian.single_evaluate,
94 | partial(insert_in_dataset, name=self.name),
95 | ]
96 | future = batch_apply(
97 | funcs,
98 | batch_size,
99 | arg.length(),
100 | inputs=[arg.extxyz],
101 | outputs=[psiflow.context().new_file("data_", ".xyz")],
102 | )
103 | return Dataset(None, future.outputs[0])
104 | else:
105 | state = self.hamiltonian.evaluate(arg)
106 | return insert(state, self.name)
107 |
108 | def __eq__(self, other):
109 | if type(other) is not HamiltonianOrderParameter:
110 | return False
111 | return self.hamiltonian == other.hamiltonian
112 |
113 | @classmethod
114 | def from_plumed(
115 | cls, name: str, hamiltonian: PlumedHamiltonian
116 | ) -> HamiltonianOrderParameter:
117 | assert name in hamiltonian.plumed_input()
118 | action_prefixes = [
119 | "ABMD",
120 | "BIASVALUE",
121 | "EXTENDED_LAGRANGIAN",
122 | "EXTERNAL",
123 | "LOWER_WALLS",
124 | "MAXENT",
125 | "METAD",
126 | "MOVINGRESTRAINT",
127 | "PBMETAD",
128 | "RESTRAINT",
129 | "UPPER_WALLS",
130 | "RESTART",
131 | ]
132 | lines = hamiltonian.plumed_input().split("\n")
133 | new_lines = []
134 | for line in lines:
135 | found = [p in line for p in action_prefixes]
136 | if sum(found, start=False):
137 | continue
138 | else:
139 | new_lines.append(line)
140 | ev_to_kjmol = 1 / (
141 | kJ / mol
142 | ) # compensate plumed to ASE unit conversion of 'energy'
143 | new_lines.append(
144 | "rescaled: MATHEVAL ARG={} FUNC=x*{} PERIODIC=NO".format(name, ev_to_kjmol)
145 | )
146 | new_lines.append("BIASVALUE ARG=rescaled")
147 | return HamiltonianOrderParameter(
148 | name=name,
149 | hamiltonian=PlumedHamiltonian(plumed_input="\n".join(new_lines)),
150 | )
151 |
--------------------------------------------------------------------------------
/psiflow/sampling/utils.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Optional
2 |
3 | import numpy as np
4 | import typeguard
5 | from ase.data import chemical_symbols
6 |
7 | from psiflow.functions import Function
8 |
9 | # do not use psiflow apps; parsl config is not loaded in this process!
10 | from psiflow.geometry import Geometry
11 |
12 | # only import stuff which does not issue useless warnings; otherwise
13 | # python -c 'import .client; print(client.__file__)' is going to be polluted
14 | # with those import-related warnings
15 |
16 |
17 | class ForceMagnitudeException(Exception):
18 | pass
19 |
20 |
21 | class TimeoutException(Exception):
22 | pass
23 |
24 |
25 | def timeout_handler(signum, frame):
26 | raise TimeoutException
27 |
28 |
29 | @typeguard.typechecked
30 | def check_forces(
31 | forces: np.ndarray,
32 | geometry: Any,
33 | max_force: float,
34 | ):
35 | if not isinstance(geometry, Geometry):
36 | geometry = Geometry.from_atoms(geometry)
37 |
38 | exceeded = np.linalg.norm(forces, axis=1) > max_force
39 | if np.sum(exceeded):
40 | indices = np.arange(len(geometry))[exceeded]
41 | numbers = geometry.per_atom.numbers[exceeded]
42 | symbols = [chemical_symbols[n] for n in numbers]
43 | raise ForceMagnitudeException(
44 | "\nforce exceeded {} eV/A for atoms {}"
45 | " with chemical elements {}\n".format(
46 | max_force,
47 | indices,
48 | symbols,
49 | )
50 | )
51 | else:
52 | pass
53 |
54 |
55 | class FunctionDriver:
56 |
57 | def __init__(
58 | self,
59 | template: Geometry,
60 | function: Function,
61 | max_force: Optional[float],
62 | verbose: bool = True, # used by i-PI internally?
63 | error_msg="",
64 | ):
65 | self.verbose = verbose
66 | self.template = template
67 | self.function = function
68 | self.max_force = max_force
69 |
70 | def check_arguments(self):
71 | pass
72 |
73 | def __call__(self, cell, pos):
74 | from ipi.utils.units import unit_to_internal, unit_to_user
75 |
76 | pos = unit_to_user("length", "angstrom", pos)
77 | cell = unit_to_user("length", "angstrom", cell.T)
78 |
79 | self.template.per_atom.positions[:] = pos
80 | if self.template.periodic:
81 | self.template.cell[:] = cell
82 |
83 | outputs = self.function(self.template)
84 | energy = outputs["energy"]
85 | forces = outputs["forces"]
86 | stress = outputs["stress"]
87 |
88 | # check for max_force
89 | if self.max_force is not None:
90 | check_forces(forces, self.template, self.max_force)
91 |
92 | # converts to internal quantities
93 | pot_ipi = np.asarray(
94 | unit_to_internal("energy", "electronvolt", energy), np.float64
95 | )
96 | force_ipi = np.asarray(unit_to_internal("force", "ev/ang", forces), np.float64)
97 | vir_calc = -stress * self.template.volume
98 | vir_ipi = np.array(
99 | unit_to_internal("energy", "electronvolt", vir_calc.T), dtype=np.float64
100 | )
101 | extras = ""
102 |
103 | return pot_ipi, force_ipi, vir_ipi, extras
104 |
--------------------------------------------------------------------------------
/psiflow/utils/__init__.py:
--------------------------------------------------------------------------------
1 | TMP_COMMAND = 'tmpdir=$(mktemp -d -p /tmp "mytmpdir.XXXXXXXXXX" || mktemp -d -t "mytmpdir.XXXXXXXXXX")'
2 | CD_COMMAND = 'cd $tmpdir; echo "tmpdir: $PWD"'
3 |
4 |
5 |
--------------------------------------------------------------------------------
/psiflow/utils/_plumed.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 |
4 | import typeguard
5 |
6 |
7 | @typeguard.typechecked
8 | def try_manual_plumed_linking() -> str:
9 | if "PLUMED_KERNEL" not in os.environ.keys():
10 | # try linking manually
11 | if "CONDA_PREFIX" in os.environ.keys(): # for conda environments
12 | p = "CONDA_PREFIX"
13 | elif "PREFIX" in os.environ.keys(): # for pip environments
14 | p = "PREFIX"
15 | else:
16 | raise ValueError("failed to set plumed .so kernel")
17 | path = os.environ[p] + "/lib/libplumedKernel.so"
18 | if os.path.exists(path):
19 | os.environ["PLUMED_KERNEL"] = path
20 | logging.info("plumed kernel manually set at : {}".format(path))
21 | else:
22 | raise ValueError("plumed kernel not found at {}".format(path))
23 | return os.environ["PLUMED_KERNEL"]
24 |
25 |
26 | @typeguard.typechecked
27 | def remove_comments_printflush(plumed_input: str) -> str:
28 | new_input = []
29 | for line in list(plumed_input.split("\n")):
30 | pre_comment = line.strip().split("#")[0].strip()
31 | if len(pre_comment) == 0:
32 | continue
33 | if pre_comment.startswith("PRINT"):
34 | continue
35 | if pre_comment.startswith("FLUSH"):
36 | continue
37 | new_input.append(pre_comment)
38 | return "\n".join(new_input)
39 |
40 |
41 | @typeguard.typechecked
42 | def set_path_in_plumed(plumed_input: str, keyword: str, path_to_set: str) -> str:
43 | lines = plumed_input.split("\n")
44 | for i, line in enumerate(lines):
45 | if keyword in line:
46 | if "FILE=" not in line:
47 | lines[i] = line + " FILE={}".format(path_to_set)
48 | continue
49 | line_before = line.split("FILE=")[0]
50 | line_after = line.split("FILE=")[1].split()[1:]
51 | lines[i] = (
52 | line_before + "FILE={} ".format(path_to_set) + " ".join(line_after)
53 | )
54 | return "\n".join(lines)
55 |
--------------------------------------------------------------------------------
/psiflow/utils/apps.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations # necessary for type-guarding class methods
2 |
3 | import logging
4 | import sys
5 | from typing import Any, Union
6 |
7 | import numpy as np
8 | import typeguard
9 | from parsl.app.app import python_app
10 | from parsl.data_provider.files import File
11 |
12 |
13 | @typeguard.typechecked
14 | def get_attribute(obj: Any, *attribute_names: str) -> Any:
15 | for name in attribute_names:
16 | obj = getattr(obj, name)
17 | return obj
18 |
19 |
20 | @typeguard.typechecked
21 | def _boolean_or(*args: Union[bool, np.bool_]) -> bool:
22 | return any(args)
23 |
24 |
25 | boolean_or = python_app(_boolean_or, executors=["default_threads"])
26 |
27 |
28 | def _multiply(a, b):
29 | return a * b
30 |
31 |
32 | multiply = python_app(_multiply, executors=["default_threads"])
33 |
34 |
35 | @typeguard.typechecked
36 | def setup_logger(module_name):
37 | # Create logger instance for the module
38 | module_logger = logging.getLogger(module_name)
39 |
40 | # Set the desired format string
41 | formatter = logging.Formatter("%(name)s - %(message)s")
42 |
43 | # Create handler to send logs to stdout
44 | stdout_handler = logging.StreamHandler(sys.stdout)
45 | stdout_handler.setFormatter(formatter)
46 |
47 | # Add handler to the logger instance
48 | module_logger.addHandler(stdout_handler)
49 |
50 | # Set the logging level for the logger
51 | module_logger.setLevel(logging.INFO)
52 |
53 | return module_logger
54 |
55 |
56 | def _compute_sum(a, b):
57 | return np.add(a, b)
58 |
59 |
60 | compute_sum = python_app(_compute_sum, executors=["default_threads"])
61 |
62 |
63 | @typeguard.typechecked
64 | def _combine_futures(inputs: list[Any]) -> list[Any]:
65 | return list(inputs)
66 |
67 |
68 | combine_futures = python_app(_combine_futures, executors=["default_threads"])
69 |
70 |
71 | @typeguard.typechecked
72 | def _copy_data_future(
73 | pass_on_exist: bool = False,
74 | inputs: list[File] = [],
75 | outputs: list[File] = [],
76 | ) -> None:
77 | import shutil
78 | from pathlib import Path
79 |
80 | assert len(inputs) == 1
81 | assert len(outputs) == 1
82 | if Path(outputs[0]).is_file() and pass_on_exist:
83 | return None
84 | if Path(inputs[0]).is_file():
85 | shutil.copyfile(inputs[0], outputs[0])
86 | else: # no need to copy empty file
87 | pass
88 |
89 |
90 | copy_data_future = python_app(_copy_data_future, executors=["default_threads"])
91 |
92 |
93 | @typeguard.typechecked
94 | def _copy_app_future(future: Any, inputs: list = [], outputs: list = []) -> Any:
95 | # inputs/outputs to enforce additional dependencies
96 | from copy import deepcopy
97 |
98 | return deepcopy(future)
99 |
100 |
101 | copy_app_future = python_app(_copy_app_future, executors=["default_threads"])
102 |
103 |
104 | @typeguard.typechecked
105 | def _log_message(logger, message, *futures):
106 | if len(futures) > 0:
107 | logger.info(message.format(*futures))
108 | else:
109 | logger.info(message)
110 |
111 |
112 | log_message = python_app(_log_message, executors=["default_threads"])
113 |
114 |
115 | def _pack(*args):
116 | return args
117 |
118 |
119 | pack = python_app(_pack, executors=["default_threads"])
120 |
121 |
122 | @typeguard.typechecked
123 | def _unpack_i(result: Union[np.ndarray, list, tuple], i: int) -> Any:
124 | assert i <= len(result)
125 | return result[i]
126 |
127 |
128 | unpack_i = python_app(_unpack_i, executors=["default_threads"])
129 |
130 |
131 | @typeguard.typechecked
132 | def _concatenate(*arrays: np.ndarray) -> np.ndarray:
133 | return np.concatenate(arrays)
134 |
135 |
136 | concatenate = python_app(_concatenate, executors=["default_threads"])
137 |
138 |
139 | @typeguard.typechecked
140 | def _isnan(a: Union[float, np.ndarray]) -> bool:
141 | return bool(np.any(np.isnan(a)))
142 |
143 |
144 | isnan = python_app(_isnan, executors=["default_threads"])
145 |
--------------------------------------------------------------------------------
/psiflow/utils/io.py:
--------------------------------------------------------------------------------
1 | import xml.etree.ElementTree as ET
2 | from typing import Any
3 |
4 | import numpy as np
5 | import typeguard
6 | from parsl.app.app import python_app
7 | from parsl.data_provider.files import File
8 |
9 |
10 | @typeguard.typechecked
11 | def _save_yaml(
12 | input_dict: dict,
13 | outputs: list[File] = [],
14 | **extra_keys: Any,
15 | ) -> None:
16 | import yaml
17 |
18 | def _make_dict_safe(arg):
19 | # walks through dict and converts numpy types to python natives
20 | for key in list(arg.keys()):
21 | if hasattr(arg[key], "item"):
22 | arg[key] = arg[key].item()
23 | elif type(arg[key]) is dict:
24 | arg[key] = _make_dict_safe(arg[key])
25 | else:
26 | pass
27 | return arg
28 |
29 | input_dict = dict(input_dict)
30 | for key, value in extra_keys.items():
31 | assert key not in input_dict
32 | input_dict[key] = value
33 | input_dict = _make_dict_safe(input_dict)
34 | with open(outputs[0], "w") as f:
35 | yaml.dump(input_dict, f, default_flow_style=False)
36 |
37 |
38 | save_yaml = python_app(_save_yaml, executors=["default_threads"])
39 |
40 |
41 | @typeguard.typechecked
42 | def _save_xml(
43 | element: ET.Element,
44 | outputs: list = [],
45 | ) -> None:
46 | tree = ET.ElementTree(element)
47 | ET.indent(tree, " ")
48 | tree.write(outputs[0], encoding="utf-8", xml_declaration=True)
49 |
50 |
51 | save_xml = python_app(_save_xml, executors=["default_threads"])
52 |
53 |
54 | @typeguard.typechecked
55 | def _load_numpy(inputs: list[File] = [], **kwargs) -> np.ndarray:
56 | return np.loadtxt(inputs[0], **kwargs)
57 |
58 |
59 | load_numpy = python_app(_load_numpy, executors=["default_threads"])
60 |
61 |
62 | @typeguard.typechecked
63 | def _read_yaml(inputs: list[File] = [], outputs: list[File] = []) -> dict:
64 | import yaml
65 |
66 | with open(inputs[0], "r") as f:
67 | config_dict = yaml.load(f, Loader=yaml.FullLoader)
68 | return config_dict
69 |
70 |
71 | read_yaml = python_app(_read_yaml, executors=["default_threads"])
72 |
73 |
74 | @typeguard.typechecked
75 | def _save_txt(data: str, outputs: list[File] = []) -> None:
76 | with open(outputs[0], "w") as f:
77 | f.write(data)
78 |
79 |
80 | save_txt = python_app(_save_txt, executors=["default_threads"])
81 |
82 |
83 | @typeguard.typechecked
84 | def _load_metrics(inputs: list = []) -> np.recarray:
85 | return np.load(inputs[0], allow_pickle=True)
86 |
87 |
88 | load_metrics = python_app(_load_metrics, executors=["default_threads"])
89 |
90 |
91 | @typeguard.typechecked
92 | def _save_metrics(data: np.recarray, outputs: list = []) -> None:
93 | with open(outputs[0], "wb") as f:
94 | data.dump(f)
95 |
96 |
97 | save_metrics = python_app(_save_metrics, executors=["default_threads"])
98 |
99 |
100 | @typeguard.typechecked
101 | def _dump_json(
102 | inputs: list = [],
103 | outputs: list = [],
104 | **kwargs,
105 | ) -> None:
106 | import json
107 |
108 | import numpy as np
109 |
110 | def convert_to_list(array):
111 | if not type(array) is np.ndarray:
112 | if type(array) is np.floating:
113 | return float(array)
114 | return array
115 | as_list = []
116 | for item in array:
117 | as_list.append(convert_to_list(item))
118 | return as_list
119 |
120 | for name in list(kwargs.keys()):
121 | value = kwargs[name]
122 | if type(value) is np.ndarray:
123 | value = convert_to_list(value)
124 | elif type(value) is np.floating:
125 | value = float(value)
126 | kwargs[name] = value
127 | with open(outputs[0], "w") as f:
128 | f.write(json.dumps(kwargs))
129 |
130 |
131 | dump_json = python_app(_dump_json, executors=["default_threads"])
132 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools"]
3 | build-backend = "setuptools.build_meta"
4 |
5 |
6 | [project]
7 | name = "psiflow"
8 | version = "4.0.0"
9 | description = "Library for developing interatomic potentials"
10 | readme = "README.md"
11 | requires-python = ">=3.10"
12 | dependencies = [
13 | "ase>=3.23.0",
14 | "pyyaml>=6.0",
15 | "numpy>=1.22.3, <2",
16 | "parsl==2024.12.16",
17 | "prettytable",
18 | "psutil",
19 | "cp2k-input-tools @ git+https://github.com/cp2k/cp2k-input-tools.git@3b9929735dcb3c8c0620a548b1fe20efecbad077", # need 2024.1
20 | "pytimeparse",
21 | ]
22 |
23 |
24 | [project.scripts]
25 | psiflow-client = "psiflow.sampling.client:main"
26 | psiflow-server = "psiflow.sampling.server:main"
27 | psiflow-mace-train = "psiflow.models.mace_utils:main"
28 | psiflow-ase-opt = "psiflow.sampling._ase:main"
29 |
30 |
31 | [project.optional-dependencies]
32 | docs = [
33 | "mkdocs>=1.4.2",
34 | "mkdocs-autorefs>=0.4.1",
35 | "mkdocs-material>=9.0.3",
36 | "mkdocs-material-extensions>=1.1.1",
37 | "mkdocstrings>=0.19.1",
38 | "mkdocstrings-python>=0.8.3",
39 | ]
40 | dev = [
41 | "pre-commit",
42 | "black",
43 | "isort",
44 | "flake8",
45 | "flake8-bugbear",
46 | "flake8-pyproject",
47 | "pytest>=7.2.0",
48 | "coverage>=6.5.0",
49 | "coveralls>=3.3.1",
50 | ]
51 |
52 |
53 | [tool.setuptools.packages.find]
54 | include = [
55 | "psiflow",
56 | "psiflow.models",
57 | "psiflow.data",
58 | "psiflow.reference",
59 | "psiflow.sampling",
60 | "psiflow.utils",
61 | "psiflow.free_energy",
62 | ]
63 |
64 |
65 | [tool.flake8]
66 | max-line-length = 88
67 | extend-ignore = ["E203", "E501", "E704", "B006"]
68 | #select = C,E,F,W,B,B950
69 |
70 | [tool.isort]
71 | profile = "black"
72 |
73 | [tool.pytest.ini_options]
74 | log_cli = 0
75 | addopts = [
76 | "--basetemp=pytest-tmp", # /tmp/ may be different for each worker!
77 | "--import-mode=append",
78 | "--psiflow-config=configs/threadpool.yaml",
79 | "-W ignore::DeprecationWarning",
80 | "--log-cli-level=WARNING",
81 | ]
82 | testpaths = ["tests"]
83 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import xml.etree.ElementTree as ET
2 | from dataclasses import asdict
3 | from pathlib import Path
4 |
5 | import numpy as np
6 | import parsl
7 | import pytest
8 | import yaml
9 | from ase import Atoms
10 | from ase.build import bulk, make_supercell
11 | from ase.calculators.emt import EMT
12 |
13 | import psiflow
14 | from psiflow.data import Dataset
15 | from psiflow.geometry import Geometry
16 | from psiflow.models import MACE, MACEConfig
17 |
18 |
19 | def pytest_addoption(parser):
20 | parser.addoption(
21 | "--psiflow-config",
22 | action="store",
23 | help="test",
24 | )
25 | parser.addoption(
26 | "--skip-gpu",
27 | action="store_true",
28 | default=False,
29 | help="whether to run tests which require a GPU",
30 | )
31 |
32 |
33 | @pytest.fixture(scope="session")
34 | def gpu(request):
35 | if request.config.getoption("--skip-gpu"):
36 | pytest.skip("skipping tests which require GPU")
37 |
38 |
39 | @pytest.fixture(scope="session", autouse=True)
40 | def context(request, tmp_path_factory):
41 | try:
42 | context = psiflow.context()
43 | except RuntimeError:
44 | path_config = Path(request.config.getoption("--psiflow-config"))
45 | with open(path_config, "r") as f:
46 | psiflow_config = yaml.safe_load(f)
47 | psiflow_config["path"] = tmp_path_factory.mktemp("psiflow_internal")
48 | psiflow.load(psiflow_config)
49 | context = psiflow.context() # noqa: F841
50 | yield
51 | parsl.dfk().cleanup()
52 |
53 |
54 | @pytest.fixture(scope="session")
55 | def mace_config():
56 | mace_config = MACEConfig()
57 | mace_config.num_radial_basis = 3
58 | mace_config.num_cutoff_basis = 2
59 | mace_config.max_ell = 1
60 | mace_config.correlation = 1
61 | mace_config.MLP_irreps = "2x0e"
62 | mace_config.num_channels = 2
63 | mace_config.max_L = 0
64 | mace_config.r_max = 4
65 | mace_config.radial_MLP = "[4]"
66 | return asdict(mace_config)
67 |
68 |
69 | def generate_emt_cu_data(nstates, amplitude, supercell=None):
70 | if supercell is None:
71 | supercell = np.eye(3)
72 | atoms = make_supercell(bulk("Cu", "fcc", a=3.6, cubic=True), supercell)
73 | atoms.calc = EMT()
74 | pos = atoms.get_positions()
75 | box = atoms.get_cell()
76 | atoms_list = []
77 | for _ in range(nstates):
78 | atoms.set_positions(
79 | pos + np.random.uniform(-amplitude, amplitude, size=(len(atoms), 3))
80 | )
81 | atoms.set_cell(box + np.random.uniform(-amplitude, amplitude, size=(3, 3)))
82 | _atoms = atoms.copy()
83 | _atoms.calc = None
84 | _atoms.info["energy"] = atoms.get_potential_energy()
85 | _atoms.info["stress"] = atoms.get_stress(voigt=False)
86 | _atoms.arrays["forces"] = atoms.get_forces()
87 | # make content heterogeneous to test per_element functions
88 | _atoms.numbers[0] = 1
89 | _atoms.symbols[0] = "H"
90 | atoms_list.append(_atoms)
91 | return atoms_list
92 |
93 |
94 | @pytest.fixture
95 | def dataset(context):
96 | data = generate_emt_cu_data(20, 0.2)
97 | data += generate_emt_cu_data(5, 0.15, supercell=np.diag([1, 2, 1]))
98 | data_ = [Geometry.from_atoms(atoms) for atoms in data]
99 | return Dataset(data_).align_axes()
100 |
101 |
102 | @pytest.fixture(scope="session")
103 | def mace_model(mace_config):
104 | # manually recreate dataset with 'session' scope
105 | data = generate_emt_cu_data(20, 0.2)
106 | data_ = [Geometry.from_atoms(atoms) for atoms in data]
107 | dataset = Dataset(data_)
108 | model = MACE(**mace_config)
109 | # add additional state to initialize other atomic numbers
110 | # mace cannot handle partially periodic datasets
111 | geometry = Geometry.from_data(
112 | numbers=np.array(2 * [101]),
113 | positions=np.array([[0, 0, 0], [2, 0, 0]]),
114 | cell=2 * np.eye(3),
115 | )
116 | geometry.energy = -1.0
117 | geometry.per_atom.forces[:] = np.random.uniform(size=(2, 3))
118 | model.initialize(dataset[:5] + Dataset([geometry]))
119 | return model
120 |
121 |
122 | @pytest.fixture
123 | def dataset_h2(context):
124 | h2 = Atoms(
125 | numbers=[1, 1],
126 | positions=[[0, 0, 0], [0.74, 0, 0]],
127 | pbc=False,
128 | )
129 | data = [h2.copy() for i in range(20)]
130 | for atoms in data:
131 | atoms.set_positions(
132 | atoms.get_positions() + np.random.uniform(-0.05, 0.05, size=(2, 3))
133 | )
134 | return Dataset([Geometry.from_atoms(a) for a in data])
135 |
136 |
137 | @pytest.fixture
138 | def checkpoint():
139 | checkpoint_str = """
140 |
141 |
142 | 1
143 | [ time, temperature, potential ]
144 |
145 | 100
146 |
147 | cSzwsJ2A/einsteincrystal0
148 | 8.33333333e-02
149 |
150 |
151 | cSzwsJ2A/plumedhamiltonian0
152 | 8.33333333e-02
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 | 1.90008912e-03
163 | 4.11423554e-03
164 | [ 1.00000000e+00 ]
165 | 2.06706865e+02
166 |
167 |
168 |
169 |
170 | 4.13413730e+03
171 |
172 | 2.06706865e+01
173 | [ 1 ]
174 |
175 |
176 |
177 |
178 | [ 1.44513572e-01, -2.22608601e-02, 6.90340566e-02, -1.48068714e-01, 3.67026570e+00,
179 | 3.24415892e+00, 3.09455639e+00, -2.66306646e-01, 3.36282329e+00, 3.54200180e+00,
180 | 3.39685661e+00, 5.46722856e-01 ]
181 |
182 |
183 | [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
184 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
185 | 0.00000000e+00, 0.00000000e+00 ]
186 |
187 | [ 1.83747161e+03, 1.15837273e+05, 1.15837273e+05, 1.15837273e+05 ]
188 | [ H, Cu, Cu, Cu ]
189 |
190 |
191 | [ 1e+00, 1e-01, 0, 0.00000000e+00, 2e+00,
192 | 0, 0.00000000e+00, 0.00000000e+00, 3e+00 ]
193 | |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 | 1.90008912e-03
204 | 4.11423554e-03
205 | [ 1.00000000e+00 ]
206 | 2.06706865e+02
207 |
208 |
209 |
210 |
211 | 4.13413730e+03
212 |
213 | 2.06706865e+01
214 | [ 1 ]
215 |
216 |
217 |
218 |
219 | [ 1.44513572e-01, -2.22608601e-02, 6.90340566e-02, -1.48068714e-01, 3.67026570e+00,
220 | 3.24415892e+00, 3.09455639e+00, -2.66306646e-01, 3.36282329e+00, 3.54200180e+00,
221 | 3.39685661e+00, 5.46722856e-01 ]
222 |
223 |
224 | [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
225 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
226 | 0.00000000e+00, 0.00000000e+00 ]
227 |
228 | [ 1.83747161e+03, 1.15837273e+05, 1.15837273e+05, 1.15837273e+05 ]
229 | [ H, Cu, Cu, Cu ]
230 |
231 |
232 | [ 6.92067797e+00, 1.35926184e-01, -3.29542567e-02, 0.00000000e+00, 6.46614176e+00,
233 | -3.74701247e-01, 0.00000000e+00, 0.00000000e+00, 6.45073059e+00 ]
234 | |
235 |
236 |
237 | """
238 | return ET.ElementTree(element=ET.fromstring(checkpoint_str))
239 |
--------------------------------------------------------------------------------
/tests/test_free_energy.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from ase.units import _c, kB, second
3 |
4 | from psiflow.free_energy import (
5 | Integration,
6 | compute_frequencies,
7 | compute_harmonic,
8 | harmonic_free_energy,
9 | )
10 | from psiflow.geometry import check_equality
11 | from psiflow.hamiltonians import EinsteinCrystal, Harmonic, MACEHamiltonian
12 | from psiflow.sampling.ase import optimize
13 |
14 |
15 | def test_integration_simple(dataset):
16 | dataset = dataset[:10]
17 | einstein = EinsteinCrystal(dataset[1], force_constant=2)
18 | geometry = optimize(
19 | dataset[3],
20 | einstein,
21 | mode='fix_cell',
22 | f_max=1e-4,
23 | )
24 | hessian = compute_harmonic(
25 | geometry,
26 | einstein,
27 | pos_shift=5e-4,
28 | )
29 | harmonic = Harmonic(geometry, hessian)
30 |
31 | integration = Integration(
32 | harmonic,
33 | temperatures=[300, 400],
34 | delta_hamiltonian=(-0.1) * harmonic,
35 | delta_coefficients=np.array([0.0, 0.5, 1.0]),
36 | )
37 | walkers = integration.create_walkers(
38 | dataset,
39 | initialize_by="quench",
40 | )
41 | for walker in walkers:
42 | assert check_equality(walker.start, dataset[1]).result()
43 |
44 | assert len(integration.states) == 6
45 |
46 | integration.sample(steps=100, step=6)
47 | integration.compute_gradients()
48 | for i, state in enumerate(integration.states):
49 | assert state.gradients["delta"] is not None
50 | assert state.gradients["temperature"] is not None
51 |
52 | # manual computation of delta gradient
53 | delta = -0.1 * harmonic
54 | energies = delta.compute(integration.outputs[i].trajectory, "energy")
55 | assert np.allclose(
56 | state.gradients["delta"].result(),
57 | np.mean(energies.result()) / (kB * state.temperature),
58 | )
59 |
60 | hessian = hessian.result()
61 | frequencies0 = compute_frequencies(hessian, geometry)
62 | frequencies1 = compute_frequencies(hessian * 0.9, geometry)
63 | F0 = harmonic_free_energy(frequencies0, 300).result()
64 | F1 = harmonic_free_energy(frequencies1, 300).result()
65 |
66 | integrated = integration.along_delta(temperature=300).result()
67 | assert len(integrated) == 3
68 | print("\nalong delta")
69 | print(" computed delta_F: {}".format(integrated[-1]))
70 | print("theoretical delta_F: {}".format(F1 - F0))
71 | print("")
72 |
73 | # integrated = integration.along_temperature(delta_coefficient=1.0).result()
74 | # assert len(integrated) == 2
75 | # assert np.allclose(integrated[0], 0.0)
76 | # F2 = np.sum(compute_free_energy(frequencies, 400).result())
77 | # print('\nalong temperature')
78 | # print(' computed delta_F: {}'.format(integrated[-1] / (kB * 400)))
79 | # print('theoretical delta_F: {}'.format(F2 / (kB * 400) - F1 / (kB * 300)))
80 |
81 |
82 | def test_integration_temperature(dataset):
83 | einstein = EinsteinCrystal(dataset[0], force_constant=1)
84 | integration = Integration(
85 | hamiltonian=einstein,
86 | temperatures=[300, 400],
87 | pressure=0.0,
88 | )
89 | integration.create_walkers(dataset[:3])
90 | integration.sample(steps=10, step=1)
91 | integration.compute_gradients()
92 | gradient0 = integration.states[0].gradients["temperature"]
93 |
94 | integration = Integration(
95 | hamiltonian=einstein,
96 | temperatures=[300, 400],
97 | )
98 | integration.create_walkers(dataset[:3])
99 | integration.sample(steps=10, step=1)
100 | integration.compute_gradients()
101 | gradient1 = integration.states[0].gradients["temperature"]
102 | assert np.allclose(gradient0.result(), gradient1.result())
103 |
104 |
105 | def test_phonons(dataset):
106 | reference = dataset[2].result()
107 | constant = 10
108 | einstein = EinsteinCrystal(reference, force_constant=constant)
109 |
110 | hessian = compute_harmonic(
111 | reference,
112 | einstein,
113 | asr="none", # einstein == translationally VARIANT
114 | )
115 | assert np.allclose(
116 | hessian.result(), constant * np.eye(3 * len(reference)), rtol=1e-4
117 | )
118 |
119 |
120 | def test_dihydrogen(dataset_h2):
121 | geometry = dataset_h2[0].result()
122 | geometry.cell = 20 * np.eye(3)
123 | hamiltonian = MACEHamiltonian.mace_mp0("small")
124 | optimized = optimize(
125 | geometry,
126 | hamiltonian,
127 | mode='fix_cell',
128 | f_max=1e-4,
129 | ).result()
130 | assert optimized.energy is not None
131 | assert np.linalg.norm(optimized.per_atom.forces) < 1e-2
132 | hessian = compute_harmonic(
133 | optimized,
134 | hamiltonian,
135 | asr="crystal",
136 | pos_shift=0.001,
137 | )
138 | frequencies = compute_frequencies(hessian, geometry).result()
139 | # check that highest frequency in inv cm corresponds to 3500 - 4000
140 | frequencies_invcm = (frequencies * second) / (_c * 1e2) # in invcm
141 | assert np.abs(frequencies_invcm[-1] - 4000) < 1000
142 |
143 |
144 | def test_frequency_oscillator():
145 | for quantum in [True, False]:
146 | f0 = harmonic_free_energy(1.0, 300, quantum=quantum).result()
147 | f1 = harmonic_free_energy(1.1, 300, quantum=quantum).result()
148 | assert f1 > f0
149 |
150 | f2 = harmonic_free_energy(1.0, 400, quantum=quantum).result()
151 | assert f0 > f2
152 |
--------------------------------------------------------------------------------
/tests/test_models.py:
--------------------------------------------------------------------------------
1 | import copy
2 |
3 | import numpy as np
4 | from parsl.app.futures import DataFuture
5 |
6 | import psiflow
7 | from psiflow.data import compute_rmse
8 | from psiflow.hamiltonians import MACEHamiltonian
9 | from psiflow.models import MACE, load_model
10 |
11 |
12 | def test_mace_init(mace_config, dataset):
13 | model = MACE(**mace_config)
14 | assert "model_future" in model._files
15 | assert model.model_future is None
16 | model.initialize(dataset[:1])
17 | assert model.model_future is not None
18 |
19 | _config = model._config
20 |
21 | data_str = psiflow.serialize(model).result()
22 | model = psiflow.deserialize(data_str)
23 |
24 | _config_ = model._config
25 | for key, value in _config.items():
26 | assert key in _config_
27 | if type(value) is not list:
28 | assert value == _config_[key]
29 |
30 | config = copy.deepcopy(mace_config)
31 | config["batch_size"] = (
32 | 100000 # bigger than ntrain --> should get reduced internally
33 | )
34 | model = MACE(**config)
35 | model.seed = 1
36 | model.initialize(dataset[:3])
37 | assert isinstance(model.model_future, DataFuture)
38 |
39 | # create hamiltonian and verify addition of atomic energies
40 | hamiltonian = model.create_hamiltonian()
41 | assert hamiltonian == model.create_hamiltonian()
42 | energies = hamiltonian.compute(dataset, "energy").result()
43 |
44 | nstates = dataset.length().result()
45 | # energies = np.array([evaluated[i].result().energy for i in range(nstates)])
46 | assert not np.any(np.allclose(energies, 0.0))
47 | energy_Cu = 3
48 | energy_H = 7
49 | atomic_energies = {
50 | "Cu": energy_Cu,
51 | "H": energy_H,
52 | }
53 | hamiltonian = MACEHamiltonian(
54 | hamiltonian.external,
55 | atomic_energies=atomic_energies,
56 | )
57 | assert hamiltonian != model.create_hamiltonian() # atomic energies
58 |
59 | evaluated = dataset.evaluate(hamiltonian)
60 | for i in range(nstates):
61 | assert np.allclose(
62 | energies[i],
63 | evaluated.subtract_offset(Cu=energy_Cu, H=energy_H)[i].result().energy,
64 | )
65 |
66 | energies = hamiltonian.compute(dataset, "energy").result()
67 | second = psiflow.deserialize(psiflow.serialize(hamiltonian).result())
68 | energies_ = second.compute(dataset, "energy").result()
69 | assert np.allclose(energies, energies_)
70 |
71 | hamiltonian = model.create_hamiltonian()
72 | model.reset()
73 | model.initialize(dataset[:3])
74 | assert hamiltonian != model.create_hamiltonian()
75 |
76 |
77 | def test_mace_train(gpu, mace_config, dataset, tmp_path):
78 | # as an additional verification, this test can be executed while monitoring
79 | # the mace logging, and in particular the rmse_r during training, to compare
80 | # it with the manually computed value
81 | training = dataset[:-5]
82 | validation = dataset[-5:]
83 | mace_config["start_swa"] = 100
84 | model = MACE(**mace_config)
85 | model.initialize(training)
86 | hamiltonian0 = model.create_hamiltonian()
87 | rmse0 = compute_rmse(
88 | validation.get("per_atom_energy"),
89 | validation.evaluate(hamiltonian0).get("per_atom_energy"),
90 | )
91 | model.train(training, validation)
92 | hamiltonian1 = model.create_hamiltonian()
93 | rmse1 = compute_rmse(
94 | validation.get("per_atom_energy"),
95 | validation.evaluate(hamiltonian1).get("per_atom_energy"),
96 | )
97 | assert rmse0.result() > rmse1.result()
98 |
99 |
100 | def test_mace_save_load(mace_config, dataset, tmp_path):
101 | model = MACE(**mace_config)
102 | model.add_atomic_energy("H", 3)
103 | model.add_atomic_energy("Cu", 4)
104 | model.save(tmp_path)
105 | model.initialize(dataset[:2])
106 | e0 = model.create_hamiltonian().compute(dataset[3], "energy").result()
107 |
108 | psiflow.wait()
109 | assert (tmp_path / "MACE.yaml").exists()
110 | assert not (tmp_path / "MACE.pth").exists()
111 |
112 | model.save(tmp_path)
113 | psiflow.wait()
114 | assert (tmp_path / "MACE.pth").exists()
115 |
116 | model_ = load_model(tmp_path)
117 | assert type(model_) is MACE
118 | assert model_.model_future is not None
119 | e1 = model_.create_hamiltonian().compute(dataset[3], "energy").result()
120 | assert np.allclose(e0, e1, atol=1e-4) # up to single precision
121 |
122 |
123 | def test_mace_seed(mace_config):
124 | model = MACE(**mace_config)
125 | assert model.seed == 0
126 | model.seed = 111
127 | assert model.seed == 111
128 | model._config["seed"] = 112
129 | assert model.seed == 112
130 |
--------------------------------------------------------------------------------
/tests/test_serialization.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | from pathlib import Path
4 | from typing import Optional, Union
5 |
6 | import pytest
7 | import typeguard
8 | from parsl.data_provider.files import File
9 | from parsl.dataflow.futures import AppFuture
10 |
11 | import psiflow
12 | from psiflow.data import Dataset
13 | from psiflow.geometry import Geometry, NullState, new_nullstate
14 | from psiflow.utils.apps import copy_app_future
15 |
16 |
17 | def test_serial_simple(tmp_path):
18 | @psiflow.serializable
19 | class SomeSerial:
20 | pass
21 |
22 | @typeguard.typechecked
23 | class Test:
24 | foo: int
25 | bar: psiflow._DataFuture
26 | baz: Union[float, str]
27 | bam: Optional[SomeSerial]
28 | bao: SomeSerial
29 | bap: list[SomeSerial, ...]
30 | baq: Union[Geometry, AppFuture]
31 | bas: Geometry
32 |
33 | def __init__(self, **kwargs):
34 | for key, value in kwargs.items():
35 | setattr(self, key, value)
36 |
37 | new_cls = psiflow.serializable(Test)
38 | instance = new_cls(
39 | foo=3,
40 | bar=File("asdfl"),
41 | baz="asdflk",
42 | bam=None,
43 | bao=SomeSerial(),
44 | bap=[SomeSerial(), SomeSerial()],
45 | baq=copy_app_future(NullState),
46 | bas=new_nullstate(),
47 | )
48 | assert instance.foo == 3
49 | assert instance._attrs["foo"] == 3
50 |
51 | # test independence
52 | instance._attrs["test"] = 1
53 | instance_ = new_cls(foo=4, bar=File("asdfl"))
54 | assert "test" not in instance_._attrs
55 | assert instance_.foo == 4
56 | assert instance.foo == 3
57 |
58 | assert tuple(instance._files.keys()) == ("bar",)
59 | assert tuple(instance._attrs.keys()) == ("foo", "baz", "test")
60 | assert tuple(instance._serial.keys()) == ("bam", "bao", "bap")
61 | assert type(instance._serial["bap"]) is list
62 | assert len(instance._serial["bap"]) == 2
63 | assert len(instance._geoms) == 2
64 | assert "baq" in instance._geoms
65 | assert "bas" in instance._geoms
66 |
67 | # serialization/deserialization of 'complex' Test instance
68 | json_dump = psiflow.serialize(instance).result()
69 | instance_ = psiflow.deserialize(json_dump, custom_cls=[new_cls, SomeSerial])
70 |
71 | assert instance.foo == instance_.foo
72 | assert instance.bar.filepath == instance_.bar.filepath
73 | assert instance.baz == instance_.baz
74 | assert instance.bam == instance_.bam
75 | assert type(instance_.bao) is SomeSerial
76 | assert len(instance_.bap) == 2
77 | assert type(instance_.bap[0]) is SomeSerial
78 | assert type(instance_.bap[1]) is SomeSerial
79 | assert id(instance) != id(instance_)
80 | assert isinstance(instance_.baq, Geometry)
81 | assert instance_.baq == NullState
82 | assert instance_.bas == NullState
83 |
84 | # check classes created before test execution, e.g. Dataset
85 | data = Dataset([NullState])
86 | assert "extxyz" in data._files
87 | assert len(data._attrs) == 0
88 | assert len(data._serial) == 0
89 | with pytest.raises(typeguard.TypeCheckError): # try something stupid
90 | data.extxyz = 0
91 |
92 | # test getter / setter
93 | data.extxyz = File("some_file")
94 | assert type(data.extxyz) is File
95 |
96 | # test basic serialization
97 | dumped_json = psiflow.serialize(data).result()
98 | assert "Dataset" in dumped_json
99 | data_dict = json.loads(dumped_json)
100 | assert len(data_dict["Dataset"]["_attrs"]) == 0
101 | assert len(data_dict["Dataset"]["_serial"]) == 0
102 | assert len(data_dict["Dataset"]["_files"]) == 1
103 | assert data_dict["Dataset"]["_files"]["extxyz"] == data.extxyz.filepath
104 |
105 | # test copy_to serialization
106 | data = Dataset([NullState])
107 | data.extxyz.result()
108 | filename = Path(data.extxyz.filepath).name
109 | assert os.path.exists(data.extxyz.filepath)
110 | dumped_json = psiflow.serialize(data, copy_to=tmp_path / "test").result()
111 | os.remove(data.extxyz.filepath)
112 | assert (tmp_path / "test").exists()
113 | assert (tmp_path / "test" / filename).exists() # new file
114 |
--------------------------------------------------------------------------------