├── .github ├── threadpool.yaml └── workflows │ └── run_pytest.yaml ├── .gitignore ├── Dockerfile ├── Dockerfile.cp2k ├── Dockerfile.gpaw ├── LICENSE ├── README.md ├── build_containers.sh ├── configs ├── hortense.yaml ├── lumi.yaml ├── threadpool.yaml └── wq.yaml ├── docs ├── api_example.png ├── configuration.md ├── data.md ├── free_energy.md ├── hamiltonian.md ├── hamiltonians_umbrella.svg ├── icon.svg ├── index.md ├── install.sh ├── installation.md ├── learning.md ├── logo_dark.png ├── logo_light.png ├── models.md ├── overview.png ├── reference.md ├── sampling.md └── wandb.png ├── examples ├── README.md ├── alanine_replica_exchange.py ├── data │ ├── acetaldehyde.xyz │ ├── ani500k_cc_cpu.model │ ├── cp2k_input.txt │ ├── h2o_32.xyz │ ├── vinyl_alcohol.xyz │ └── water_train.xyz ├── h2_static_dynamic.py ├── iron_bulk_modulus.py ├── iron_harmonic_fcc_bcc.py ├── online_learning_pimd.py ├── proton_jump_plumed.py ├── submit │ ├── hortense.yaml │ ├── lumi.yaml │ ├── submit_hortense.sh │ └── submit_lumi.sh ├── water_cp2k_noise.py ├── water_online_learning.py ├── water_path_integral_md.py └── water_train_validate.py ├── mkdocs.yml ├── psiflow ├── __init__.py ├── config.py ├── data │ ├── __init__.py │ ├── dataset.py │ └── utils.py ├── execution.py ├── free_energy │ ├── __init__.py │ ├── integration.py │ └── phonons.py ├── functions.py ├── geometry.py ├── hamiltonians.py ├── learning.py ├── metrics.py ├── models │ ├── __init__.py │ ├── _mace.py │ ├── mace_utils.py │ └── model.py ├── order_parameters.py ├── reference │ ├── __init__.py │ ├── _cp2k.py │ ├── _dftd3.py │ ├── gpaw_.py │ ├── orca.py │ └── reference.py ├── sampling │ ├── __init__.py │ ├── _ase.py │ ├── ase.py │ ├── client.py │ ├── metadynamics.py │ ├── optimize.py │ ├── order.py │ ├── output.py │ ├── sampling.py │ ├── server.py │ ├── utils.py │ └── walker.py ├── serialization.py └── utils │ ├── __init__.py │ ├── _plumed.py │ ├── apps.py │ └── io.py ├── pyproject.toml └── tests ├── conftest.py ├── test_data.py ├── test_free_energy.py ├── test_function.py ├── test_learning.py ├── test_models.py ├── test_reference.py ├── test_sampling.py └── test_serialization.py /.github/threadpool.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | parsl_log_level: WARNING 3 | retries: 0 4 | ModelEvaluation: 5 | max_simulation_time: 0.4 6 | gpu: false 7 | use_threadpool: true 8 | ModelTraining: 9 | max_training_time: 1 10 | gpu: true 11 | use_threadpool: true 12 | max_workers: 1 13 | CP2K: 14 | cores_per_worker: 2 15 | max_evaluation_time: 0.3 16 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -bind-to core -np 2 -env OMP_NUM_THREADS 1 cp2k.psmp' 17 | CP2K_container: 18 | cores_per_worker: 2 19 | max_evaluation_time: 0.3 20 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -bind-to core -np 2 -env OMP_NUM_THREADS 1 cp2k.psmp' 21 | GPAW: 22 | cores_per_worker: 2 23 | max_evaluation_time: 0.3 24 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/gpaw:24.1 /opt/entry.sh mpirun -np 2 gpaw python /opt/run_gpaw.py' 25 | GPAW_container: 26 | cores_per_worker: 2 27 | max_evaluation_time: 0.3 28 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/gpaw:24.1 /opt/entry.sh mpirun -np 2 gpaw python /opt/run_gpaw.py' 29 | ... 30 | -------------------------------------------------------------------------------- /.github/workflows/run_pytest.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | run-name: pytest-CI 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | jobs: 9 | test-py310: 10 | runs-on: ubuntu-24.04 11 | steps: 12 | - uses: eWaterCycle/setup-apptainer@v2 13 | with: 14 | apptainer-version: 1.3.6 15 | - uses: mamba-org/setup-micromamba@v1 16 | with: 17 | micromamba-version: '2.0.5-0' 18 | environment-name: 'test-env' 19 | generate-run-shell: true 20 | create-args: >- 21 | python=3.10 22 | ndcctools==7.14.0 23 | py-plumed 24 | simple-dftd3 25 | dftd3-python 26 | pip 27 | -c conda-forge 28 | init-shell: bash 29 | cache-environment: true 30 | post-cleanup: 'all' 31 | - name: Install dependencies and download containers 32 | shell: micromamba-shell {0} 33 | run: | 34 | which pip 35 | pip install pyfftw colorcet wandb pandas plotly plumed 'numpy<2.0.0' 36 | pip install --no-cache-dir git+https://github.com/i-pi/i-pi.git@v3.0.0-beta4 37 | pip install torch==2.5.1 38 | pip install git+https://github.com/acesuit/MACE.git@v0.3.5 39 | apptainer exec oras://ghcr.io/molmod/cp2k:2024.1 ls 40 | apptainer exec oras://ghcr.io/molmod/gpaw:24.1 ls 41 | - name: Checkout specific commit 42 | uses: actions/checkout@v4 43 | - name: Install Psiflow and run tests 44 | shell: micromamba-shell {0} 45 | env: 46 | WANDB_API_KEY: secrets.WANDB_API_KEY 47 | WANDB_MODE: offline # disables WandB server calls 48 | run: | 49 | pip install .[dev] 50 | pip list 51 | pytest --skip-gpu --psiflow-config=.github/threadpool.yaml 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.swp 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | 132 | pytest-tmp/ 133 | wandb/ 134 | 135 | # psiflow internal and its symlinks 136 | psiflow_internal/ 137 | psiflow_log 138 | psiflow_submit_scripts 139 | psiflow_task_logs 140 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | ENV TZ=Etc/UTC 5 | 6 | RUN apt-get update && apt-get install -y \ 7 | python3.10 \ 8 | python3.10-venv \ 9 | python3.10-dev \ 10 | build-essential \ 11 | cmake \ 12 | wget \ 13 | git \ 14 | zlib1g-dev \ 15 | libssl-dev \ 16 | libcurl4-openssl-dev \ 17 | libgsl-dev \ 18 | perl \ 19 | fftw3 \ 20 | && apt-get clean \ 21 | && rm -rf /var/lib/apt/lists/* 22 | 23 | RUN python3.10 -m venv /opt/venv \ 24 | && /opt/venv/bin/pip install --upgrade pip 25 | 26 | # Install PLUMED 27 | ARG PLUMED_VERSION 28 | RUN wget https://github.com/plumed/plumed2/archive/refs/tags/v${PLUMED_VERSION}.tar.gz \ 29 | && tar -xzf v${PLUMED_VERSION}.tar.gz \ 30 | && cd plumed2-${PLUMED_VERSION} \ 31 | && ./configure --prefix=/usr/local/plumed \ 32 | && make -j$(nproc) \ 33 | && make install \ 34 | && cd .. \ 35 | && rm -rf plumed2-${PLUMED_VERSION} v${PLUMED_VERSION}.tar.gz 36 | 37 | # Ensure cctools can find the Python environment 38 | ENV PYTHONPATH="/opt/venv/lib/python3.10/site-packages:$PYTHONPATH" 39 | ENV PATH="/opt/venv/bin:$PATH" 40 | 41 | # Install cctools 42 | ARG CCTOOLS_VERSION 43 | RUN wget https://github.com/cooperative-computing-lab/cctools/archive/refs/tags/release/${CCTOOLS_VERSION}.tar.gz \ 44 | && tar -xzf ${CCTOOLS_VERSION}.tar.gz \ 45 | && cd cctools-release-${CCTOOLS_VERSION} \ 46 | && ./configure --prefix=/usr/local/cctools \ 47 | && make -j$(nproc) \ 48 | && make install \ 49 | && cd .. \ 50 | && rm -rf cctools-release-${CCTOOLS_VERSION} ${CCTOOLS_VERSION}.tar.gz 51 | 52 | # Set environment variables for PLUMED and cctools 53 | ENV PATH="/usr/local/plumed/bin:/usr/local/cctools/bin:$PATH" 54 | ENV LD_LIBRARY_PATH="/usr/local/plumed/lib:/usr/local/cctools/lib:$LD_LIBRARY_PATH" 55 | 56 | ARG PSIFLOW_VERSION 57 | ARG PARSL_VERSION 58 | ARG GPU_LIBRARY 59 | RUN /bin/bash -c -o pipefail \ 60 | "source /opt/venv/bin/activate && \ 61 | pip install --no-cache-dir pyfftw colorcet wandb pandas plotly plumed 'numpy<2.0.0' && \ 62 | pip install --no-cache-dir git+https://github.com/i-pi/i-pi.git@v3.0.0-beta4 && \ 63 | pip install --no-cache-dir torch>=2.5 --index-url https://download.pytorch.org/whl/${GPU_LIBRARY} && \ 64 | pip install --no-cache-dir git+https://github.com/acesuit/mace.git@v0.3.5" 65 | ARG DATE 66 | RUN /bin/bash -c -o pipefail \ 67 | "pip install --no-cache-dir git+https://github.com/molmod/psiflow.git@${PSIFLOW_VERSION}" 68 | 69 | # Set entrypoint 70 | RUN echo '#!/bin/bash' >> /opt/entry.sh && \ 71 | echo 'source /opt/venv/bin/activate' >> /opt/entry.sh && \ 72 | echo 'export PLUMED_KERNEL=/usr/local/plumed/lib/libplumedKernel.so' >> /opt/entry.sh && \ 73 | echo '"$@"' >> /opt/entry.sh 74 | RUN chmod +x /opt/entry.sh 75 | ENTRYPOINT ["/opt/entry.sh"] 76 | 77 | # Default command 78 | CMD ["bash"] 79 | -------------------------------------------------------------------------------- /Dockerfile.cp2k: -------------------------------------------------------------------------------- 1 | # 2 | # This file was created by generate_docker_files.py 3 | # 4 | # Usage: docker build -f ./2024.1_mpich_generic_psmp.Dockerfile -t cp2k/cp2k:2024.1_mpich_generic_psmp . 5 | 6 | # Stage 1: build step 7 | FROM ubuntu:22.04 AS build 8 | 9 | 10 | # Install packages required for the CP2K toolchain build 11 | RUN apt-get update -qq && apt-get install -qq --no-install-recommends \ 12 | g++ gcc gfortran libmpich-dev mpich openssh-client python3 \ 13 | bzip2 ca-certificates git make patch pkg-config unzip wget zlib1g-dev 14 | 15 | # Download CP2K 16 | RUN git clone --recursive -b support/v2024.1 https://github.com/cp2k/cp2k.git /opt/cp2k 17 | 18 | # Build CP2K toolchain for target CPU generic 19 | WORKDIR /opt/cp2k/tools/toolchain 20 | RUN /bin/bash -c -o pipefail \ 21 | "./install_cp2k_toolchain.sh -j 8 \ 22 | --install-all \ 23 | --enable-cuda=no \ 24 | --target-cpu=generic \ 25 | --with-cusolvermp=no \ 26 | --with-gcc=system \ 27 | --with-mpich=system" 28 | 29 | # Build CP2K for target CPU generic 30 | WORKDIR /opt/cp2k 31 | RUN /bin/bash -c -o pipefail \ 32 | "cp ./tools/toolchain/install/arch/local.psmp ./arch/; \ 33 | source ./tools/toolchain/install/setup; \ 34 | make -j 8 ARCH=local VERSION=psmp" 35 | 36 | # Collect components for installation and remove symbolic links 37 | RUN /bin/bash -c -o pipefail \ 38 | "mkdir -p /toolchain/install /toolchain/scripts; \ 39 | for libdir in \$(ldd ./exe/local/cp2k.psmp | \ 40 | grep /opt/cp2k/tools/toolchain/install | \ 41 | awk '{print \$3}' | cut -d/ -f7 | \ 42 | sort | uniq) setup; do \ 43 | cp -ar /opt/cp2k/tools/toolchain/install/\${libdir} /toolchain/install; \ 44 | done; \ 45 | cp /opt/cp2k/tools/toolchain/scripts/tool_kit.sh /toolchain/scripts; \ 46 | unlink ./exe/local/cp2k.popt; \ 47 | unlink ./exe/local/cp2k_shell.psmp" 48 | 49 | # Stage 2: install step 50 | FROM ubuntu:22.04 AS install 51 | 52 | # Install required packages 53 | RUN apt-get update -qq && apt-get install -qq --no-install-recommends \ 54 | g++ gcc gfortran libmpich-dev mpich openssh-client python3 && rm -rf /var/lib/apt/lists/* 55 | 56 | # Install CP2K binaries 57 | COPY --from=build /opt/cp2k/exe/local/ /opt/cp2k/exe/local/ 58 | 59 | # Install CP2K regression tests 60 | COPY --from=build /opt/cp2k/tests/ /opt/cp2k/tests/ 61 | COPY --from=build /opt/cp2k/tools/regtesting/ /opt/cp2k/tools/regtesting/ 62 | COPY --from=build /opt/cp2k/src/grid/sample_tasks/ /opt/cp2k/src/grid/sample_tasks/ 63 | 64 | # Install CP2K database files 65 | COPY --from=build /opt/cp2k/data/ /opt/cp2k/data/ 66 | 67 | # Install shared libraries required by the CP2K binaries 68 | COPY --from=build /toolchain/ /opt/cp2k/tools/toolchain/ 69 | 70 | # Create links to CP2K binaries 71 | RUN /bin/bash -c -o pipefail \ 72 | "for binary in cp2k dumpdcd graph xyz2dcd; do \ 73 | ln -sf /opt/cp2k/exe/local/\${binary}.psmp \ 74 | /usr/local/bin/\${binary}; \ 75 | done; \ 76 | ln -sf /opt/cp2k/exe/local/cp2k.psmp \ 77 | /usr/local/bin/cp2k_shell; \ 78 | ln -sf /opt/cp2k/exe/local/cp2k.psmp \ 79 | /usr/local/bin/cp2k.popt" 80 | 81 | # Create entrypoint script file 82 | RUN printf "#!/bin/bash\n\ 83 | ulimit -c 0 -s unlimited\n\ 84 | \ 85 | export OMP_STACKSIZE=16M\n\ 86 | export OMP_NUM_THREADS=1\n\ 87 | export PATH=/opt/cp2k/exe/local:\${PATH}\n\ 88 | source /opt/cp2k/tools/toolchain/install/setup\n\ 89 | if [ -n "\${MEMORY_LIMIT}" ]; then\n\ 90 | ulimit -v \${MEMORY_LIMIT}\n\ 91 | fi\n\ 92 | \n\ 93 | \"\$@\"\n" \ 94 | >/opt/entry.sh && chmod 755 /opt/entry.sh 95 | 96 | # Create shortcut for regression test 97 | RUN printf "/opt/cp2k/tests/do_regtest.py --maxtasks 8 --workbasedir /mnt \$* local psmp" \ 98 | >/usr/local/bin/run_tests && chmod 755 /usr/local/bin/run_tests 99 | 100 | # Define entrypoint 101 | WORKDIR /mnt 102 | ENTRYPOINT ["/opt/entry.sh"] 103 | CMD ["cp2k", "--help"] 104 | 105 | # Label docker image 106 | LABEL author="CP2K Developers" \ 107 | cp2k_version="2024.1" \ 108 | dockerfile_generator_version="0.2" 109 | 110 | # EOF 111 | -------------------------------------------------------------------------------- /Dockerfile.gpaw: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | ENV TZ=Etc/UTC 5 | 6 | RUN apt-get update && apt-get install -y \ 7 | python3.10 \ 8 | python3.10-venv \ 9 | python3.10-dev \ 10 | build-essential \ 11 | autoconf \ 12 | automake \ 13 | libtool \ 14 | make \ 15 | cmake \ 16 | wget \ 17 | git \ 18 | libopenblas-dev \ 19 | libfftw3-dev \ 20 | libopenmpi-dev \ 21 | openmpi-bin \ 22 | libscalapack-mpi-dev \ 23 | libelpa-dev \ 24 | libomp-dev \ 25 | && apt-get clean \ 26 | && rm -rf /var/lib/apt/lists/* 27 | 28 | RUN python3.10 -m venv /opt/venv \ 29 | && /opt/venv/bin/pip install --upgrade pip 30 | 31 | RUN /bin/bash -c -o pipefail \ 32 | "source /opt/venv/bin/activate && \ 33 | pip install --no-cache-dir numpy && \ 34 | pip install --no-cache-dir git+https://gitlab.com/ase/ase" 35 | 36 | # install libxc 37 | RUN cd /opt && \ 38 | wget https://gitlab.com/libxc/libxc/-/archive/6.2.2/libxc-6.2.2.tar.bz2 -O libxc-6.2.2.tar.bz2 && \ 39 | tar -xvf libxc-6.2.2.tar.bz2 && \ 40 | cd libxc-6.2.2 && \ 41 | autoreconf -i && \ 42 | ./configure --prefix=/usr/local CFLAGS="-fPIC" && \ 43 | make -j$(nproc) && \ 44 | make install && \ 45 | ldconfig 46 | 47 | # install GPAW 48 | WORKDIR /opt/gpaw 49 | ENV GPAW_CONFIG=/opt/gpaw/siteconfig.py 50 | RUN echo "scalapack = True" >> siteconfig.py \ 51 | && echo "fftw = True" >> siteconfig.py \ 52 | && echo "elpa = True" >> siteconfig.py \ 53 | && echo "libraries = ['openblas', 'fftw3', 'scalapack-openmpi', 'elpa', 'omp5']" >> siteconfig.py \ 54 | && echo "library_dirs = ['/usr/lib', '/usr/local/lib']" >> siteconfig.py \ 55 | && echo "extra_link_args = ['/usr/local/lib/libxc.a']" >> siteconfig.py \ 56 | && echo "include_dirs = ['/usr/include', '/usr/local/include', '/usr/include/elpa']" >> siteconfig.py 57 | RUN /bin/bash -c -o pipefail "source /opt/venv/bin/activate && pip install --no-cache-dir gpaw dftd3" 58 | RUN mkdir /opt/gpaw-data 59 | RUN /bin/bash -c -o pipefail "source /opt/venv/bin/activate && yes | gpaw install-data /opt/gpaw-data" || true 60 | 61 | ARG PSIFLOW_VERSION 62 | RUN /bin/bash -c -o pipefail \ 63 | "source /opt/venv/bin/activate && pip install --no-cache-dir git+https://github.com/molmod/psiflow.git@${PSIFLOW_VERSION}" 64 | RUN ln -s /opt/venv/lib/python3.10/site-packages/psiflow/reference/gpaw_.py /opt/run_gpaw.py 65 | 66 | # Create entrypoint script 67 | RUN echo '#!/bin/bash' >> /opt/entry.sh && \ 68 | echo 'source /opt/venv/bin/activate' >> /opt/entry.sh && \ 69 | echo 'export LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}"' >> /opt/entry.sh && \ 70 | echo 'export GPAW_SETUP_PATH="/opt/gpaw-data/gpaw-setups-24.1.0"' >> /opt/entry.sh && \ 71 | echo '"$@"' >> /opt/entry.sh 72 | RUN chmod +x /opt/entry.sh 73 | 74 | # libxc needed at runtime 75 | ENV LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}" 76 | ENTRYPOINT ["/opt/entry.sh"] 77 | 78 | 79 | # Default command 80 | CMD ["bash"] 81 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022-2025 Ghent University 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![psiflow](./docs/logo_light.png#gh-light-mode-only) 2 | ![psiflow](./docs/logo_dark.png#gh-dark-mode-only) 3 | 4 | 5 | ![License](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat-square) 6 | [![Docs](https://flat.badgen.net/static/docs/passing/green)](https://molmod.github.io/psiflow) 7 | [![Build Status](https://img.shields.io/endpoint.svg?url=https%3A%2F%2Factions-badge.atrox.dev%2Fmolmod%2Fpsiflow%2Fbadge%3Fref%3Dmain&style=flat-square)](https://actions-badge.atrox.dev/molmod/psiflow/goto?ref=main) 8 | ![Python](https://flat.badgen.net/static/python/3.10%20|%203.11/blue) 9 | ![Code style](https://flat.badgen.net/static/code%20style/black/black) 10 | [![DOI](https://flat.badgen.net/static/DOI/10.1038%2Fs41524-023-00969-x)](https://www.nature.com/articles/s41524-023-00969-x) 11 | 12 | 13 | # Scalable Molecular Simulation 14 | 15 | Psiflow is a scalable molecular simulation engine for chemistry and materials science applications. 16 | It supports: 17 | - **quantum mechanical calculations** at various levels of theory (GGA and hybrid DFT, post-HF methods such as MP2 or RPA, and even coupled cluster; using CP2K|GPAW|ORCA) 18 | - **trainable interaction potentials** as well as easy-to-use universal potentials, e.g. [MACE-MP0](https://arxiv.org/abs/2401.00096) 19 | - a wide range of **sampling algorithms**: NVE | NVT | NPT, path-integral molecular dynamics, alchemical replica exchange, metadynamics, phonon-based sampling, thermodynamic integration; using [i-PI](https://ipi-code.org/), 20 | [PLUMED](https://www.plumed.org/), ... 21 | 22 | Users may define arbitrarily complex workflows and execute them **automatically** on local, HPC, and/or cloud infrastructure. 23 | To achieve this, psiflow is built using [Parsl](https://parsl-project.org/): a parallel execution library which manages job submission and workload distribution. 24 | As such, psiflow can orchestrate large molecular simulation pipelines on hundreds or even thousands of nodes. 25 | 26 | 27 |

28 | 29 |

30 | 31 | # Setup 32 | 33 | Use the following one-liner to create a lightweight [micromamba](https://mamba.readthedocs.io/en/latest/user_guide/micromamba.html) Python environment with all dependencies readily available: 34 | ```sh 35 | curl -L molmod.github.io/psiflow/install.sh | bash 36 | ``` 37 | The environment can be activated by sourcing the `activate.sh` file which will be created in the current working directory. 38 | Next, create a `config.yaml` file which defines the compute resources. For SLURM-based HPC systems, psiflow can initialize your configuration automatically via the following command: 39 | ```sh 40 | python -c 'import psiflow; psiflow.setup_slurm_config()' 41 | ``` 42 | Example configuration files for [LUMI](https://lumi-supercomputer.eu/), [MeluXina](https://luxembourg.public.lu/en/invest/innovation/meluxina-supercomputer.html), or [VSC](https://www.vscentrum.be/) can be found [here](https://github.com/molmod/psiflow/tree/main/configs). 43 | No additional software compilation is required since all of the heavy lifting (CP2K/ORCA/GPAW, PyTorch model training, i-PI dynamics) is executed within preconfigured [Apptainer](https://apptainer.org/)/[Singularity](https://sylabs.io/singularity/) containers which are production-ready for most HPCs. 44 | 45 | That's it! Contrary to frameworks like pyiron or aiida, psiflow does not require any databases or web servers. 46 | The only requirement is that you set up a Python environment and provide a `config.yaml`. 47 | 48 | [**EXAMPLES**](https://github.com/molmod/psiflow/tree/main/examples) 49 | 50 | 51 | 52 | 53 | # FAQ 54 | 55 | **Where do I start?** 56 | 57 | Take a brief look at the [examples](https://github.com/molmod/psiflow/tree/main/examples) or the 58 | [documentation](https://molmod.github.io/psiflow) to get an idea for psiflow's 59 | capabilities. Next, head over to the [setup & configuration](https://molmod.github.io/psiflow/configuration/) section of the docs to get started! 60 | 61 | **Is psiflow a workflow manager?** 62 | 63 | Absolutely not! Psiflow is a Python library which allows you to perform complex molecular simulations and scale them towards large numbers of compute nodes automatically. 64 | It does not have 'fixed' workflow recipes, it does not require you to set up 'databases' 65 | or 'server daemons'. The only thing it does is expose a concise and powerful API to 66 | perform arbitrarily complex calculations in a highly efficiently manner. 67 | 68 | **Is it compatible with my cluster?** 69 | 70 | Most likely yes. Check which resource scheduling system your cluster uses (probably either 71 | SLURM/PBSPro/SGE). If you're not sure, ask your system administrators or open an issue 72 | 73 | **Can I use VASP with it?** 74 | 75 | You cannot automate VASP calculations with it, but in 83% of cases there is either no need 76 | to use VASP, or it's very easy to quickly perform the VASP part manually, outside of psiflow, 77 | and do everything else (data generation, ML potential training, sampling) with psiflow. 78 | Open an issue if you're not sure how to do this. 79 | 80 | **I would like to have feature X** 81 | 82 | Psiflow is continuously in development; if you're missing a feature feel free to open an 83 | issue or pull request! 84 | 85 | **I have a bug. Where is my error message and how do I solve it?** 86 | 87 | Psiflow covers essentially all major aspects of computational molecular simulation (most 88 | notably including the executation and parallelization), so there's bound to be some bug 89 | once in a while. Debugging can be challenging, and we recommend to follow the following steps in 90 | order: 91 | 92 | 1. Check the stderr/stdout of the main Python process (i.e. the `python main.py 93 | config.yaml` one). See if there are any clues. If it has contents which you don't 94 | understand, open an issue. If there's seemingly nothing there, go to step 2. 95 | 2. Check Parsl's log file. This can be found in the current working directory, under 96 | `psiflow_internal/parsl.log`. If it's a long file, search for any errors using `Error` 97 | or `ERROR`. If you find anything suspicious but do not know how to solve it, 98 | open an issue. 99 | 3. Check the output files of individual ML training, QM singlepoints, or i-PI molecular 100 | dynamics runs. These can be found under `psiflow_internal/000/task_logs/*`. 101 | Again, if you find an error but do not exactly know why it happens or how to solve it, 102 | feel free to open an issue. Most likely, it will be useful to other people as well 103 | 4. Check the actual 'jobscripts' that were generated and which were submitted to the 104 | cluster. Quite often, there can be a spelling mistake in e.g. the compute project you 105 | are using, or you are requesting a resource on a partition that is not available. 106 | These jobscripts (and their output and error) can be found under 107 | `psiflow_internal/000/submit_scripts/`. 108 | 109 | **Where do these container images come from?** 110 | 111 | They were generated using Docker based on the recipes in this repository, and were then 112 | converted to `.sif` format using `apptainer` 113 | 114 | **Can I run psiflow locally for small runs or debug purposes?** 115 | 116 | Of course! If you do not provide a `config.yaml`, psiflow will just use your local 117 | workstation for its execution. See e.g. [this](https://github.com/molmod/psiflow/blob/main/configs/threadpool.yaml) or [this](https://github.com/molmod/psiflow/blob/main/configs/wq.yaml) config used for testing. 118 | -------------------------------------------------------------------------------- /build_containers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e # exit upon failure 4 | 5 | if [ "$EUID" -ne 0 ]; then 6 | echo "Please run this script as root or with sudo." 7 | exit 1 8 | fi 9 | 10 | # Initialize flags 11 | psiflow=false 12 | gpaw=false 13 | cp2k=false 14 | build_sif=false 15 | #mpi=mpich 16 | 17 | # Parse command line options 18 | while [[ $# -gt 0 ]]; do 19 | case "$1" in 20 | --gpaw) 21 | gpaw=true 22 | shift # Shift to next argument 23 | ;; 24 | --cp2k) 25 | cp2k=true 26 | shift 27 | ;; 28 | --psiflow) 29 | psiflow=true 30 | shift 31 | ;; 32 | --build_sif) 33 | build_sif=true 34 | shift 35 | ;; 36 | *) 37 | echo "Unknown option: $1" 38 | exit 1 39 | ;; 40 | esac 41 | done 42 | 43 | PSIFLOW_VERSION="v4.0.0" 44 | CCTOOLS_VERSION=7.14.0 45 | PLUMED_VERSION=2.9.0 46 | GPU_LIBRARIES=("rocm6.2" "cu118") 47 | 48 | # build model 49 | if [ "$psiflow" = "true" ]; then 50 | for GPU_LIBRARY in "${GPU_LIBRARIES[@]}"; do 51 | TAG="psiflow:${PSIFLOW_VERSION}_${GPU_LIBRARY}" 52 | docker build \ 53 | --build-arg GPU_LIBRARY=${GPU_LIBRARY} \ 54 | --build-arg PARSL_VERSION=$PARSL_VERSION \ 55 | --build-arg PSIFLOW_VERSION=$PSIFLOW_VERSION \ 56 | --build-arg CCTOOLS_VERSION=$CCTOOLS_VERSION \ 57 | --build-arg PLUMED_VERSION=$PLUMED_VERSION \ 58 | --build-arg DATE=$(date +%s) \ 59 | -t ghcr.io/molmod/$TAG \ 60 | -f Dockerfile . # test 61 | if [ "$build_sif" = "true" ]; then 62 | export TMPDIR=$(pwd)/tmp 63 | mkdir -p $TMPDIR 64 | apptainer build -F $TAG.sif docker-daemon:ghcr.io/molmod/$TAG 65 | apptainer push $TAG.sif oras://ghcr.io/molmod/$TAG 66 | rm $TAG.sif 67 | rm -rf $TMPDIR 68 | fi 69 | done 70 | fi 71 | 72 | if [ "$cp2k" = "true" ]; then 73 | TAG="cp2k:2024.1" 74 | docker build \ 75 | -t ghcr.io/molmod/$TAG \ 76 | -f Dockerfile.cp2k . 77 | if [ "$build_sif" = "true" ]; then 78 | apptainer build -F $TAG.sif docker-daemon:ghcr.io/molmod/$TAG 79 | apptainer push $TAG.sif oras://ghcr.io/molmod/$TAG 80 | rm $TAG.sif 81 | fi 82 | fi 83 | 84 | if [ "$gpaw" = "true" ]; then 85 | TAG="gpaw:24.1" 86 | sudo docker build \ 87 | --build-arg PSIFLOW_VERSION=$PSIFLOW_VERSION \ 88 | -t ghcr.io/molmod/$TAG \ 89 | -f Dockerfile.gpaw . 90 | if [ "$build_sif" = "true" ]; then 91 | apptainer build -F $TAG.sif docker-daemon:ghcr.io/molmod/$TAG 92 | apptainer push $TAG.sif oras://ghcr.io/molmod/$TAG 93 | rm $TAG.sif 94 | fi 95 | fi 96 | -------------------------------------------------------------------------------- /configs/hortense.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | parsl_log_level: WARNING 3 | container_engine: 'apptainer' 4 | container_uri: 'oras://ghcr.io/molmod/psiflow:4.0.0_cu118' 5 | default_threads: 8 6 | ModelEvaluation: 7 | cores_per_worker: 12 8 | gpu: True 9 | max_simulation_time: 20 10 | slurm: 11 | partition: "gpu_rome_a100" 12 | account: "2023_070" 13 | nodes_per_block: 1 14 | cores_per_node: 48 15 | max_blocks: 1 16 | walltime: "12:00:00" 17 | scheduler_options: "#SBATCH --clusters=dodrio\n#SBATCH --gpus=4\n" 18 | ModelTraining: 19 | cores_per_worker: 12 20 | gpu: true 21 | max_training_time: 40 22 | slurm: 23 | partition: "gpu_rome_a100" 24 | account: "2023_070" 25 | nodes_per_block: 1 26 | cores_per_node: 12 27 | max_blocks: 1 28 | walltime: "12:00:00" 29 | scheduler_options: "#SBATCH --clusters=dodrio\n#SBATCH --gpus=1\n" 30 | CP2K: 31 | cores_per_worker: 64 32 | max_evaluation_time: 30 33 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -np 32 -bind-to core cp2k.psmp' 34 | slurm: 35 | partition: "cpu_rome" 36 | account: "2024_079" 37 | nodes_per_block: 1 38 | cores_per_node: 64 39 | max_blocks: 25 40 | walltime: "06:00:00" 41 | scheduler_options: "#SBATCH --clusters=dodrio\n" 42 | ... 43 | -------------------------------------------------------------------------------- /configs/lumi.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | parsl_log_level: WARNING 3 | container_engine: 'singularity' 4 | container_uri: 'oras://ghcr.io/molmod/psiflow:4.0.0_rocm6.2' 5 | default_threads: 8 6 | CP2K: 7 | cores_per_worker: 32 8 | max_evaluation_time: 20 9 | launch_command: 'singularity exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -np 32 cp2k.psmp' 10 | slurm: 11 | partition: "standard" 12 | account: "project_465001125" 13 | nodes_per_block: 1 14 | cores_per_node: 128 15 | max_blocks: 10 16 | walltime: "01:00:00" 17 | ModelEvaluation: 18 | cores_per_worker: 7 19 | gpu: True 20 | slurm: 21 | partition: "standard-g" 22 | account: "project_465001125" 23 | nodes_per_block: 1 24 | cores_per_node: 56 25 | max_blocks: 10 26 | walltime: "01:00:00" 27 | scheduler_options: "#SBATCH --gres=gpu:8\n" 28 | ModelTraining: 29 | cores_per_worker: 7 30 | gpu: true 31 | multigpu: true 32 | slurm: 33 | partition: "standard-g" 34 | account: "project_465001125" 35 | nodes_per_block: 1 36 | cores_per_node: 56 37 | walltime: "01:00:00" 38 | scheduler_options: "#SBATCH --gres=gpu:8\n" 39 | ... 40 | -------------------------------------------------------------------------------- /configs/threadpool.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | parsl_log_level: WARNING 3 | retries: 0 4 | ModelEvaluation: 5 | gpu: false 6 | use_threadpool: true 7 | max_simulation_time: 0.4 8 | ModelTraining: 9 | gpu: true 10 | use_threadpool: true 11 | max_training_time: 1 12 | max_workers: 1 # suppress assertion for multigpu training 13 | CP2K: 14 | cores_per_worker: 2 15 | max_evaluation_time: 0.3 16 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -bind-to core -np 2 -env OMP_NUM_THREADS 1 cp2k.psmp' 17 | CP2K_container: 18 | cores_per_worker: 2 19 | max_evaluation_time: 0.3 20 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -bind-to core -np 2 -env OMP_NUM_THREADS 1 cp2k.psmp' 21 | GPAW: 22 | cores_per_worker: 2 23 | max_evaluation_time: 0.3 24 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/gpaw:24.1 /opt/entry.sh mpirun -np 2 gpaw python /opt/run_gpaw.py' 25 | GPAW_container: 26 | cores_per_worker: 2 27 | max_evaluation_time: 0.3 28 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/gpaw:24.1 /opt/entry.sh mpirun -np 2 gpaw python /opt/run_gpaw.py' 29 | ... 30 | -------------------------------------------------------------------------------- /configs/wq.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | parsl_log_level: WARNING 3 | default_threads: 4 4 | ModelEvaluation: 5 | cores_per_worker: 4 6 | gpu: True 7 | max_simulation_time: 0.4 8 | ModelTraining: 9 | cores_per_worker: 4 10 | gpu: true 11 | max_training_time: 1 12 | max_workers: 1 13 | CP2K: 14 | cores_per_worker: 2 15 | max_evaluation_time: 0.3 16 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2023.2 /opt/entry.sh mpirun -np 2 -x OMP_NUM_THREADS=1 cp2k.psmp' 17 | ... 18 | -------------------------------------------------------------------------------- /docs/api_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/docs/api_example.png -------------------------------------------------------------------------------- /docs/free_energy.md: -------------------------------------------------------------------------------- 1 | TODO 2 | -------------------------------------------------------------------------------- /docs/hamiltonian.md: -------------------------------------------------------------------------------- 1 | In Born-Oppenheimer-based molecular simulation, atomic nuclei are treated as classical 2 | particles that are subject to *effective* interactions -- these are the result of the quantum 3 | mechanical behavior of the electrons. These interactions determine the interatomic forces 4 | which are used in a dynamic simulation to propagate the atomic positions from one timestep 5 | to the next. 6 | Traditionally, dynamic simulations required an explicit evaluation of these effective 7 | forces in terms of a quantum mechanical calculation (e.g. DFT(B)). 8 | Recently, it became clear that it is much more efficient to perform such simulations 9 | using a machine-learned representation of the interaction energy, i.e. an ML potential. 10 | The development and application of ML potentials throughout large simulation workflows is in 11 | fact one of the core applications of psiflow. 12 | 13 | The `Hamiltonian` class is used to represent any type of interaction potential. 14 | Examples are pre-trained, 'universal' models (e.g. [MACE-MP0](https://arxiv.org/abs/2401.00096)), 15 | ML potentials trained within psiflow (see [ML potentials](model.md)), or a quadratic 16 | (hessian-based) approximation to a local energy minimum, to name a few. 17 | In addition, various sampling schemes employ bias potentials which are superimposed on the 18 | QM-based Born-Oppenheimer surface in order to drive the system 19 | along specific reaction coordinates (e.g. metadynamics, umbrella sampling). 20 | Such bias potentials are also instances of a `Hamiltonian`. 21 | 22 | By far the simplest hamiltonian is the Einstein crystal, which binds atoms to a certain 23 | reference position using harmonic springs with a single, fixed force constant. 24 | 25 | ```py 26 | from psiflow.geometry import Geometry 27 | from psiflow.hamiltonians import EinsteinCrystal 28 | 29 | 30 | # isolated H2 molecule 31 | geometry = Geometry.from_string(''' 32 | 2 33 | H 0.0 0.0 0.0 34 | H 0.0 0.0 0.8 35 | ''') 36 | 37 | einstein = EinsteinCrystal(geometry, force_constant=0.1) # in eV/A**2 38 | 39 | ``` 40 | As mentioned earlier, the key feature of hamiltonians is that they represent an interaction energy between atoms, 41 | i.e. they output an energy (and its gradients) when given a geometry as input. 42 | Because hamiltonians might require specialized resources for their evaluation (e.g. an ML 43 | potential which gets executed on a GPU), evaluation of a hamiltonian does not necessarily 44 | happen instantly (e.g. if a GPU node is not immediately available). Similar to how 45 | `Dataset` instances return futures of a `Geometry` when a particular index is queried, 46 | hamiltonians return a future when asked to evaluate the energy/forces/stress of a 47 | particular `Geometry`: 48 | 49 | ```py 50 | energy = einstein.compute(geometry, 'energy') # AppFuture of an energy (np.ndarray with shape (1,)) 51 | print(energy.result()) # wait for the result to complete, and print it (in eV) 52 | 53 | 54 | data = Dataset.load('snapshots.xyz') # N snapshots 55 | energy, forces, stress = einstein.compute(data) # returns energy and gradients for each snapshot in data 56 | 57 | 58 | assert energy.result().shape == (N,) # one energy per snapshot 59 | assert forces.result().shape == (N, max_natoms, 3) # forces for each snapshot, with padded natoms 60 | assert stress.result().shape == (N, 3, 3) # stress; filled with NaNs if not applicable 61 | ``` 62 | Aside from a dataset or a geometry, `compute` takes the following keyword arguments: 63 | 64 | - **outputs**: (type `str` or `list[str]`]): determines which properties to compute and 65 | return. Accepts both a single property name (`'energy'`, `'forces'`, or `'stress'`) or a list 66 | of properties (e.g. `['energy', 'forces', 'stress']`. 67 | - **batch_size**: (type `int`): splits the calculation into batches of this size. For 68 | expensive models and/or large datasets, it makes sense to pick a smaller batch size such 69 | that the calculation is parallelized over a large number of resources. For a very simple 70 | calculation (e.g. the einstein crystal), it is faster to pick a larger batch size in 71 | order to reduce overhead due to batching. Its default value is 100. 72 | 73 | A particularly important hamiltonian is MACE, one of the most ubiquitous ML potentials. 74 | The MACE community has developed a few foundation models (MACE-MP) which are readily applicable to 75 | virtually any molecule or material: 76 | 77 | ```py 78 | from psiflow.hamiltonians import MACEHamiltonian 79 | 80 | 81 | mace = MACEHamiltonian.mace_mp0() # downloads MACE-MP0 from github 82 | forces = mace.compute(geometry, 'forces') # evaluates the MACE potential on the geometry 83 | 84 | forces = forces.result() # wait for evaluation to complete and get actual value 85 | 86 | assert np.sum(np.dot(forces[0], forces[1])) < 0 # forces in H2 always point opposite of each other 87 | 88 | assert np.allclose(np.sum(forces, axis=0), 0.0) # forces are conservative --> sum to [0, 0, 0] 89 | ``` 90 | A unique feature of psiflow `Hamiltonian` instances is the ability to create a new 91 | hamiltonian from a linear combination of two or more existing hamiltonians. 92 | This is relevant for many types of free energy calculations and/or enhanced sampling 93 | techniques, including umbrella sampling, Hamiltonian replica exchange, or thermodynamic 94 | integration. 95 | Let us consider the particular example of [umbrella 96 | sampling](https://wires.onlinelibrary.wiley.com/doi/10.1002/wcms.66). 97 | As activated event, we consider the decay of vinyl alcohol to acetaldehyde, 98 | which consists of a proton jump from the oxygen to the opposite carbon: 99 | 100 |
101 | ![Image title](hamiltonians_umbrella.svg){ width="500" } 102 |
Transformation of vinyl alcohol into acetaldehyde by means of a proton jump. 103 | A reaction coordinate is constructed based on the distance of hydrogen with respect to 104 | oxygen and with respect to carbon.
105 |
106 | 107 | The harmonic restraint is implemented and evaluated via [PLUMED](https://www.plumed.org/). 108 | In psiflow, this can be done by passing a plumed input string which describes the bias 109 | potential into a `PlumedHamiltonian`. 110 | 111 | ```py 112 | from psiflow.hamiltonians import PlumedHamiltonian 113 | 114 | plumed_str = """UNITS LENGTH=A ENERGY=kj/mol 115 | d_C: DISTANCE ATOMS=3,5 116 | d_O: DISTANCE ATOMS=1,5 117 | CV: COMBINE ARG=d_C,d_O COEFFICIENTS=1,-1 PERIODIC=NO 118 | RESTRAINT ARG=CV KAPPA=1500 AT=0.0 119 | """ 120 | 121 | bias = PlumedHamiltonian(plumed_str) 122 | 123 | ``` 124 | To add this contribution to our MACE potential, we simply sum both hamiltonians: 125 | 126 | ```py 127 | potential = mace + bias 128 | 129 | # double check 130 | alcohol = Geometry.load('vinyl_alcohol.xyz') 131 | total_energy = potential.compute(alcohol, 'energy') 132 | mace_energy = mace.compute(alcohol, 'energy') 133 | bias_energy = bias.compute(alcohol, 'energy') 134 | 135 | assert np.allclose( 136 | total_energy.result(), 137 | mace_energy.result() + bias_energy.result(), 138 | ) 139 | ``` 140 | 141 | Aside from bias potentials, the combination of multiple hamiltonians is also employed in 142 | e.g. the calculation of anharmonic free energy corrections. 143 | In that case, we consider a "base" potential energy surface which is described by a 144 | general quadratic function (i.e. a 3Nx3N hessian matrix and a minimum-energy geometry) 145 | and a small perturbation which describes the difference between the quadratic 146 | function and the fully anharmonic potential. 147 | The following code snippet demonstrates the construction of mixtures of the two energy 148 | surfaces: 149 | ```py 150 | # hessian computed via geometry optimization and finite differences 151 | # see sampling section 152 | type(hessian) # np.ndarray 153 | hessian.shape # (3n, 3n) 154 | type(minimum) # Geometry 155 | len(minimum) # n 156 | 157 | harmonic = Harmonic(minimum, hessian) # create quadratic hessian; x.T @ H @ x / 2 158 | delta = mace - harmonic 159 | 160 | hamiltonanians = [] # linear intepolation between quadratic and MACE PES, in 10 steps 161 | for scale in np.linspace(0, 1, 10): 162 | hamiltonians.append(hessian + scale * delta) 163 | 164 | ``` 165 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | hide: 3 | - toc 4 | --- 5 | 6 | # **psiflow** - scalable molecular simulation 7 | 8 | 9 | Psiflow is a scalable molecular simulation engine for chemistry and materials science applications. 10 | It supports: 11 | 12 | - **quantum mechanical calculations** at various levels of theory (GGA and hybrid DFT, post-HF methods such as MP2 or RPA, and even coupled cluster; using CP2K | GPAW | ORCA) 13 | 14 | - **trainable interaction potentials** as well as easy-to-use universal potentials, e.g. [MACE-MP0](https://arxiv.org/abs/2401.00096) 15 | - a wide range of **sampling algorithms**: NVE | NVT | NPT, path-integral molecular dynamics, alchemical replica exchange, metadynamics, phonon-based sampling, thermodynamic integration; using [i-PI](https://ipi-code.org/), 16 | [PLUMED](https://www.plumed.org/), ... 17 | 18 | Users may define arbitrarily complex workflows and execute them **automatically** on local, HPC, and/or cloud infrastructure. 19 | To achieve this, psiflow is built using [Parsl](https://parsl-project.org/): a parallel execution library which manages job submission and workload distribution. 20 | As such, psiflow can orchestrate large molecular simulation pipelines on hundreds or even thousands of nodes. 21 | 22 |
23 | ![Image title](overview.png){ width="500" } 24 |
25 | 26 | --- 27 | 28 | 29 | # FAQ 30 | 31 | **Where do I start?** 32 | 33 | Take a brief look at the [examples](https://github.com/molmod/psiflow/examples/) or walk 34 | through the 35 | [documentation](https://molmod.github.io/psiflow/data) to get an idea for psiflow's 36 | capabilities. Next, head over to the [setup & configuration](https://molmod.github.io/psiflow/configuration/) section of the docs to get started! 37 | 38 | **Is psiflow a workflow manager?** 39 | 40 | Absolutely not! Psiflow is a Python library which allows you to perform complex molecular simulations and scale them towards large numbers of compute nodes automatically. 41 | It does not have 'fixed' workflow recipes, it does not require you to set up 'databases' 42 | or 'server daemons'. The only thing it does is expose a concise and powerful API to 43 | perform arbitrarily complex calculations in a highly efficiently manner. 44 | 45 | **Is it compatible with my cluster?** 46 | 47 | Most likely yes. Check which resource scheduling system your cluster uses (probably either 48 | SLURM/PBSPro/SGE). If you're not sure, ask your system administrators or open an issue 49 | 50 | **Can I use VASP with it?** 51 | 52 | You cannot automate VASP calculations with it, but in 99% of cases there is either no need 53 | to use VASP, or it's very easy to quickly perform the VASP part manually, outside of psiflow, 54 | and do everything else (data generation, ML potential training, sampling) with psiflow. 55 | Open an issue if you're not sure how to do this. 56 | 57 | **I would like to have feature X** 58 | 59 | Psiflow is continuously in development; if you're missing a feature feel free to open an 60 | issue or pull request! 61 | 62 | **I have a bug. Where is my error message and how do I solve it?** 63 | 64 | Psiflow covers essentially all major aspects of computational molecular simulation (most 65 | notably including the executation and parallelization), so there's bound to be some bug 66 | once in a while. Debugging can be challenging, and we recommend to follow the following steps in 67 | order: 68 | 69 | 1. Check the stderr/stdout of the main Python process (i.e. the `python main.py 70 | config.yaml` one). See if there are any clues. If it has contents which you don't 71 | understand, open an issue. If there's seemingly nothing there, go to step 2. 72 | 2. Check Parsl's log file. This can be found in the current working directory, under 73 | `psiflow_internal/parsl.log`. If it's a long file, search for any errors using `Error` 74 | or `ERROR`. If you find anything suspicious but do not know how to solve it, 75 | open an issue. 76 | 3. Check the output files of individual ML training, QM singlepoints, or i-PI molecular 77 | dynamics runs. These can be found under `psiflow_internal/000/task_logs/*`. 78 | Again, if you find an error but do not exactly know why it happens or how to solve it, 79 | feel free to open an issue. Most likely, it will be useful to other people as well 80 | 4. Check the actual 'jobscripts' that were generated and which were submitted to the 81 | cluster. Quite often, there can be a spelling mistake in e.g. the compute project you 82 | are using, or you are requesting a resource on a partition that is not available. 83 | These jobscripts (and there output and error) can be found under 84 | `psiflow_internal/000/submit_scripts/`. 85 | 86 | **Where do these container images come from?** 87 | 88 | They were generated using Docker based on the recipes in this repository, and were then 89 | converted to `.sif` format using `apptainer` 90 | 91 | **Can I run psiflow locally for small runs or debug purposes?** 92 | 93 | Of course! If you do not provide a `config.yaml`, psiflow will just use your local 94 | workstation for its execution. See e.g. [this](https://github.com/molmod/psiflow/blob/main/configs/threadpool.yaml) or [this](https://github.com/molmod/psiflow/blob/main/configs/wq.yaml) config used for testing. 95 | 96 | 97 | !!! note "Citing psiflow" 98 | 99 | Psiflow is developed at the 100 | [Center for Molecular Modeling](https://molmod.ugent.be). 101 | If you use it in your research, please cite the following paper: 102 | 103 | Machine learning Potentials for Metal-Organic Frameworks using an 104 | Incremental Learning Approach, 105 | _Sander Vandenhaute et al._, 106 | [npj Computational Materials](https://www.nature.com/articles/s41524-023-00969-x), 107 | __9__, 19 __(2023)__ 108 | -------------------------------------------------------------------------------- /docs/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba 4 | export MAMBA_ROOT_PREFIX=$(pwd) # optional, defaults to ~/micromamba 5 | 6 | eval "$(./bin/micromamba shell hook -s posix)" 7 | micromamba activate 8 | micromamba create -n _psiflow_env -y python=3.10 pip ndcctools=7.14.0 -c conda-forge 9 | micromamba activate _psiflow_env 10 | pip install git+https://github.com/molmod/psiflow.git@v4.0.0 11 | 12 | # create activate.sh 13 | echo 'ORIGDIR=$PWD' >>activate.sh # prevent variable substitution 14 | echo "cd $(pwd)" >>activate.sh 15 | echo "export MAMBA_ROOT_PREFIX=$(pwd)" >>activate.sh 16 | echo 'eval "$(./bin/micromamba shell hook -s posix)"' >>activate.sh 17 | echo "micromamba activate _psiflow_env" >>activate.sh 18 | echo 'cd $ORIGDIR' >>activate.sh # prevent variable substitution 19 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | Psiflow is designed as an end-to-end framework for developing interatomic potentials. As such, it has a number of dependencies 4 | which should be available in order to be able to perform all steps in the workflow. The following table groups 5 | the main dependencies according to how they are used in psiflow: 6 | 7 |
8 | 9 | | category | name | version | uses GPU | uses MPI | 10 | | -------------------- | -------- | ------- | :---------------: | :--------: | 11 | | **QM evaluation** | CP2K | >= 2023.1 | | :material-check: | 12 | | | PySCF | >=2.4 | | | 13 | | **trainable potentials** | MACE | 0.2.0 | :material-check: | 14 | | | NequIP | 0.5.6 | :material-check: | 15 | | | Allegro | 0.2.0 | :material-check: | 16 | | **molecular dynamics**| OpenMM | 8.0 | :material-check: | 17 | | | PLUMED | 2.9.0 | | 18 | | | YAFF | 1.6.0 | | 19 | | **miscellaneous** | Parsl | 2024.02.12 | | 20 | | | e3nn | 0.4.4 | :material-check: | 21 | | | PyTorch | 1.13.1 | :material-check: | 22 | | | ASE | >=3.22.1 | | 23 | | | wandb | 0.15.8 | | 24 | | | Python | 3.10, 3.11 | | 25 | 26 |
27 | 28 | ## Containerized 29 | To alleviate users from having to go through all of the installation 30 | shenanigans, psiflow provides a convenient portable entity which bundles all of the above 31 | dependencies -- a container image! 32 | Whether you're executing your calculations on a high-memory node in a cluster 33 | or using a GPU from a Google Cloud instance, all that is required is a working 34 | container engine and you're good to go. 35 | The vast majority of HPCs and cloud computing providers support containerized execution, 36 | using engines like [Apptainer/Singularity](https://apptainer.org/), 37 | [Shifter](https://docs.nersc.gov/development/shifter/how-to-use/), 38 | or [Docker](https://www.docker.com/). 39 | These engines are also very easily installed on your local workstation, which facilitates 40 | local debugging. 41 | 42 | Besides a container engine, it's necessary to install a standalone Python environment 43 | which needs to take care of possible job submissions and input/output writing. 44 | Since the actual calculations are performed inside the container, the standalone 45 | Python environment requires barely anything, and is straightforward to install 46 | using `micromamba` -- a blazingly fast drop-in replacement for `conda`: 47 | 48 | ```console 49 | micromamba create -n psiflow_env -c conda-forge -y python=3.10 50 | micromamba activate psiflow_env 51 | pip install parsl==2023.10.23 git+https://github.com/molmod/psiflow 52 | ``` 53 | That's it! Before running actual calculations, it is still necessary to set up Parsl 54 | to use the compute resources you have at your disposal -- whether it's a local GPU, 55 | a SLURM cluster, or a cloud computing provider; check out the 56 | [Execution](execution.md) page for more details. 57 | 58 | !!! note "Containers 101" 59 | 60 | Apptainer -- now the most widely used container system for HPCs -- is part of the 61 | Linux Foundation. It is easy to set up on most Linux distributions, as explained in the [Apptainer documentation](https://apptainer.org/docs/admin/main/installation.html#install-ubuntu-packages). 62 | 63 | Psiflow's containers are hosted on the GitHub Container Registry (GHCR), for both Python 3.9 and 3.10. 64 | To download and run commands in them, simply execute: 65 | 66 | ```console 67 | # show available pip packages 68 | apptainer exec oras://ghcr.io/molmod/psiflow:3.0.0_python3.9_cuda /usr/local/bin/entry.sh pip list 69 | 70 | # inspect cp2k version 71 | apptainer exec oras://ghcr.io/molmod/psiflow:3.0.0_python3.9_cuda /usr/local/bin/entry.sh cp2k.pmsp --version 72 | ``` 73 | 74 | Internally, Apptainer will store the container in a local cache directory such that it does not have to 75 | redownload it every time a command gets executed. Usually, it's a good idea to manually change the location 76 | of these cache directories since they can end up clogging your `$HOME$` directory quite quickly. 77 | To do this, simply put the following lines in your `.bashrc`: 78 | 79 | ```console 80 | 81 | export APPTAINER_CACHEDIR=/some/dir/on/local/scratch/apptainer_cache 82 | ``` 83 | 84 | If your compute resources use SingularityCE instead of Apptainer, 85 | replace 'APPTAINER' with 'SINGULARITY' in the environment variable names. 86 | 87 | !!! note "Weights & Biases" 88 | To ensure psiflow can communicate its data to [W&B](https://wandb.ai), add 89 | 90 | ```console 91 | export WANDB_API_KEY= 92 | ``` 93 | to your `.bashrc`. 94 | 95 | !!! note "AMD GPU support" 96 | 97 | As the name of the container suggests, GPU acceleration for PyTorch models in OpenMM 98 | is currently only available for Nvidia GPUs because the compatibility of conda/mamba 99 | with AMD GPUs (HIP) is not great at the moment. If you really must use AMD GPUs 100 | in psiflow, you'll have to manually create a separate Python environment with a ROCm-enabled 101 | PyTorch for training, and the regular containerized setup for CPU-only 102 | molecular dynamics with OpenMM. 103 | 104 | A ROCm-compatible PyTorch can be installed using the following command: 105 | ```console 106 | pip install --force torch==1.13.1 --index-url https://download.pytorch.org/whl/rocm5.2 107 | ``` 108 | 109 | 110 | ## Manual 111 | While a containerized setup guarantees reproducibility and is faster to install, 112 | a fully manual setup of Psiflow and its dependencies provides the user with full control 113 | over software versions or compiler flags. 114 | While this is not really necessary in the vast majority of cases, we mention for completeness 115 | the following manual setup using `micromamba`: 116 | ```console 117 | CONDA_OVERRIDE_CUDA="11.8" micromamba create -p ./psiflow_env -y -c conda-forge \ 118 | python=3.10 pip \ 119 | openmm-plumed openmm-torch pytorch=1.13.1=cuda* \ 120 | nwchem py-plumed cp2k && \ 121 | micromamba clean -af --yes 122 | pip install cython==0.29.36 matscipy prettytable && \ 123 | pip install git+https://github.com/molmod/molmod && \ 124 | pip install git+https://github.com/molmod/yaff && \ 125 | pip install e3nn==0.4.4 126 | pip install numpy ase tqdm pyyaml 'torch-runstats>=0.2.0' 'torch-ema>=0.3.0' mdtraj tables 127 | pip install git+https://github.com/acesuit/MACE.git@55f7411 && \ 128 | pip install git+https://github.com/mir-group/nequip.git@develop --no-deps && \ 129 | pip install git+https://github.com/mir-group/allegro --no-deps && \ 130 | pip install git+https://github.com/svandenhaute/openmm-ml.git@triclinic 131 | pip install 'psiflow[parsl] @ git+https://github.com/molmod/psiflow' 132 | ``` 133 | This is mostly a copy-paste from psiflow's [Dockerfiles](https://github.com/molmod/psiflow/blob/main/container). 134 | -------------------------------------------------------------------------------- /docs/learning.md: -------------------------------------------------------------------------------- 1 | Psiflow allows for the seamless development and scalable 2 | execution of online learning algorithms for ML potentials. 3 | The `Learning` class provides an interface based on which such 4 | algorithms can be implemented. 5 | They keep track of the generated data, error metrics, optional [Weights & 6 | Biases](https://wandb.ai) logging, and provide basic restart functionalities in case 7 | something goes wrong. 8 | Learning objects are instantiated using the following arguments: 9 | 10 | - **reference** (type `Reference`): the `Reference` instance which will be used to 11 | evaluate ground-truth energy/force labels for each of the samples generated. 12 | - **path_output** (type `str | Path`): the location to a folder in which intermediate 13 | models, datasets, walker states, and restart files can be saved. 14 | - **train_valid_split** (type `float`): fraction of generated data which should be used 15 | for the training set (as opposed to validation). 16 | - **error_thresholds_for_reset** (type `list[Optional[float]]`): during online learning, 17 | it is not uncommon to have walkers explore unphysical regions in phase space due to 18 | irregularities in the intermediate potential, excessive temperatures/pressures, ... 19 | In those cases, it is beneficial to reset walkers to their starting configurations, of 20 | which it is known to be a physically sound starting point. The decision to reset walkers 21 | is made every time the 'exact' energy and forces have been computed from a sampled 22 | state. If the error between the corresponding walker's model (i.e. the previous model) 23 | and the QM-evaluated energy and forces exceeds a certain threshold (both on energies and 24 | forces), the walker is reset. 25 | This argument expects a list of length two (threshold on energy error, and threshold on 26 | force error), with optional `None` values if no reset is desired. 27 | For example: `[None, 0.1]` indicates to reset whenever the force RMSE exceeds 100 meV/A, 28 | and ignore any energy discrepancy. 29 | - **error_thresholds_for_discard** (type `list[Optional[float]]`): states which are 30 | entirely unphysical do not contribute to the accuracy of the model, and sometimes even 31 | hinder proper training. If these error thresholds are exceeded, the state is discarded and the walker is reset. 32 | - **wandb_group** (type `str`): if specified, the computed dataset metrics will be logged 33 | to Weights & Biases in the corresponding group of runs for easy visual analysis. 34 | - **wandb_project** (type `str`): if specified, the computed dataset metrics will be logged 35 | to Weights & Biases in the corresponding project for easy visual analysis. 36 | - **initial_data** (type `Dataset`): existing, labeled data from which the learning can be 37 | bootstrapped. Note that *all* states in this dataset must be labeled, and that this is 38 | only sensible if the labeling agrees with the given Reference instance. (Same level of 39 | theory, same basis set, grid settings, ... ). 40 | 41 | 42 |
43 | ![Image title](wandb.png){ width="900" } 44 |
Illustration of what the Weights & biases logging looks like. 45 | The graph on top simply shows the force RMSE on each data point versus a unique 46 | 'identifier' per data point. The bottom plot shows the same data points, but now 47 | grouped according to which walker generated them. In this case, walkers were sorted 48 | according to temperature (lower walker index were lower temperature), and this is seen 49 | in the fact that walkers with a higher index generated data with on average higher errors, 50 | as they explored more out-of-equilibrium configurations.
51 |
52 | 53 | 54 | The core business of a `Learning` instance is the following sequence of operations: 55 | 56 | 1. use walkers in a `sample()` call to generate atomic geometries 57 | 2. evaluate those atomic geometries with the provided reference to obtain QM energy and 58 | forces 59 | 3. include those geometries to the training data, or discard them if they exceed 60 | `error_thresholds_for_discard`. Reset walkers if they exceed 61 | `error_thresholds_for_reset`. 62 | 4. Train the model using the new data. 63 | 5. Compute metrics for the trained model across the new dataset and optionally log them to 64 | W&B. 65 | 66 | Currently, there are two variants of this implemented: passive and active learning. 67 | 68 | ## passive learning 69 | 70 | During passive learning, walkers are propagated using an external and 'fixed' Hamiltonian 71 | which is not trained at any point (e.g. a pre-trained universal potential or a 72 | hessian-based Hamiltonian). 73 | 74 | ```py 75 | model, walkers = learning.passive_learning( 76 | model, 77 | walkers, 78 | hamiltonian=MACEHamiltonian.mace_mp0(), # fixed hamiltonian 79 | steps=20000, 80 | step=2000, 81 | **optional_sampling_kwargs, 82 | ) 83 | ``` 84 | Walkers are propagated for a total of 20,000 steps, and samples are drawn every 2,000 85 | steps which are QM evaluated by the reference and added to the training data. 86 | If the walkers contain bias contributions, their total hamiltonian is simply the sum of 87 | the existing bias contributions and the hamiltonian given to the `passive_learning()` 88 | call. 89 | Additional keyword arguments to this function are passed directly into the sample function (e.g. for 90 | specifying the log level or the center-of-mass behavior). 91 | 92 | The returned model is the one trained on all data generated in the `passive_learning()` call as well as all data which was already present in the learning instance (for example if it had been initialized with `initial_data`, see above). 93 | The returned walkers are identical to the ones passed into the method, but this is done to 94 | emphasize that internally, they do change due to calling `passive_learning` (because they 95 | are either propagated or reset, or their metadynamics bias has changed because there are 96 | more hills present than before). 97 | 98 | ## active learning 99 | 100 | During active learning, walkers are propagated with a Hamiltonian generated using the 101 | current model. They are propagated for a given number of steps after which their final 102 | state is passed into the reference for correct labeling. 103 | Different from passive learning, active learning *does not allow for subsampling of the 104 | trajectories of the walkers*. The idea behind this is that if you wish to propagate the 105 | walker for 10 ps, and sample a structure every 1 ps to let each walker generate 10 states, 106 | it is likely much better to instead increase the number of walkers (to cover more regions 107 | in phase space) and propagate them in steps of 1 ps. Active learning is ideally suited for 108 | massively parallel workflows (maximal number of walkers, with minimal sampling time per 109 | walker) and we encourage users to exploit this. 110 | 111 | ```py 112 | model, walkers = learning.active_learning( 113 | model, # used to generate hamiltonian 114 | walkers, 115 | steps=2000, # no more 'step' argument! 116 | **optional_sampling_kwargs, 117 | ) 118 | ``` 119 | ## restarting a run 120 | 121 | `Learning` has first-class support for restarted runs -- simply resubmit your calculation! 122 | It will detect whether or not the corresponding output folder has already fully logged the 123 | each of the iterations, and if so, load the final state of the model, the walkers, and the 124 | learning instance without actually doing any calculations. 125 | -------------------------------------------------------------------------------- /docs/logo_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/docs/logo_dark.png -------------------------------------------------------------------------------- /docs/logo_light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/docs/logo_light.png -------------------------------------------------------------------------------- /docs/models.md: -------------------------------------------------------------------------------- 1 | Once we know how to represent datasets of atomic geometries and label with them with 2 | target QM energy and force values, we can start defining and training ML potentials. 3 | Psiflow defines an abstract `Model` interface which each 4 | particular ML potential should subclass, though at the moment only 5 | [MACE](https://github.com/acesuit/mace) is implemented. 6 | In addition, psiflow provides configuration dataclasses for each model with 7 | reasonable defaults. 8 | 9 | A `Model` has essentially three methods: 10 | 11 | - `initialize`: compute energy shifts and scalings as well as the average number 12 | of neighbors (and any other network normalization metrics) using a given *training* dataset, 13 | and initialize model weights. 14 | - `train`: train the parameters of a model using two separate datasets, one for 15 | actual training and one for validation. The current model parameters are used as 16 | starting parameters for the training 17 | - `create_hamiltonian`: spawn a hamiltonian in order to use the model with its current 18 | weights in molecular dynamics simulations 19 | 20 | The following is a minimal illustration: 21 | ```py 22 | from psiflow.data import Dataset 23 | from psiflow.models import MACE 24 | 25 | 26 | # load data with energy and force labels included as extxyz 27 | train, valid = Dataset.load('all_data.xyz').split(0.9, shuffle=True) 28 | 29 | model = MACE( # for full arg list, see psiflow/models/_mace:MACEConfig 30 | num_channels=16, 31 | max_L=2, 32 | max_num_epochs=400, 33 | batch_size=16, 34 | ) 35 | 36 | # initialize, train 37 | model.initialize(train) # this will calculate the scale/shifts, and average number of neighbors 38 | model.train(train, valid) # train using supplied datasets 39 | 40 | model.save('./') # saves model and config to current working directory! 41 | 42 | hamiltonian = model.create_hamiltonian() 43 | forces_pred = hamiltonian.compute(valid, 'forces') 44 | forces_target = valid.get('forces') 45 | 46 | rmse = compute_rmse(forces_pred, forces_target) # this is a Future! 47 | print('forces RMSE: {} eV/A'.format(rmse.result())) 48 | 49 | ``` 50 | Note that `model.save()` will save both a `.yaml` file with all hyperparameters as well as the actual `.pth` model which is needed to reconstruct the corresponding PyTorch module (possibly outside of psiflow if needed). 51 | As such, it expects a directory as argument (which may either already exist or will be 52 | created). 53 | 54 | In many cases, it is generally recommended to provide these models with some estimate of the absolute energy of an isolated 55 | atom for the specific level of theory and basis set considered (and this for each element). 56 | Instead of having the model learn the *absolute* total energy of the system, we first subtract these atomic energies in order 57 | to train the model on the *formation* energy of the system instead, as this generally improves the generalization performance 58 | of the model towards unseen stoichiometries. 59 | 60 | ```py 61 | model.add_atomic_energy('H', -13.7) # add atomic energy of isolated hydrogen atom 62 | model.initialize(some_training_data) 63 | 64 | model.add_atomic_energy('O', -400) # will raise an exception; model needs to be reinitialized first 65 | model.reset() # removes current model, but keeps raw config 66 | model.add_atomic_energy('O', -400) # OK! 67 | model.initialize(some_training_data) # offsets total energy with given atomic energy values per atom 68 | 69 | ``` 70 | Whenever atomic energies are available, `Model` instances will automatically offset the potential energy in a (labeled) 71 | `Dataset` by the sum of the energies of the isolated atoms; the underlying PyTorch network is then initialized/trained 72 | on the formation energy of the system instead. 73 | In order to avoid artificially high energy discrepancies between models trained on the formation energy on one hand, 74 | and reference potential energies as obtained from any `BaseReference`, 75 | the `evaluate` method will first perform the converse operation, i.e. add the energies of the isolated atoms 76 | to the model's prediction of the formation energy. 77 | Similarly, `create_hamiltonian()` also passes any atomic energies which were added to the 78 | model. 79 | -------------------------------------------------------------------------------- /docs/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/docs/overview.png -------------------------------------------------------------------------------- /docs/reference.md: -------------------------------------------------------------------------------- 1 | # QM calculations 2 | The energy and gradients of the ground-state Born-Oppenheimer surface can be obtained 3 | using varying levels of approximation. 4 | In psiflow, the calculation of the energy and its gradients can be performed for both 5 | `Geometry` and `Dataset` instances, using different software packages: 6 | 7 | - **CP2K** (periodic, mixed PW/lcao): very fast, and very useful for pretty much any periodic 8 | structure. Its forces tend to be quite noisy with the default grid settings so some 9 | level of caution is advised. Also, even though it uses both plane waves and atomic basis 10 | sets, it does suffer from BSSE. 11 | - **GPAW** (periodic/cluster, PW/lcao/grid): slower but more numerically stable than CP2K; 12 | essentially a fully open-source (and therefore transparant), free, and well-tested 13 | alternative to VASP. Particularly useful for applications in which BSSE is a concern 14 | (e.g. adsorption). 15 | - **ORCA** (cluster, lcao): useful for accurate high-level quantum chemistry calculations, 16 | e.g. MP2 and CCSD(T). *TODO* 17 | 18 | !!! note "Installation" 19 | Because the 'correct' compilation and installation of quantum chemistry software is 20 | notoriously cumbersome, we host separate container images for each of the packages 21 | on Github, which are ready to use with psiflow on HPCs with either a Singularity 22 | or Apptainer container runtime. The Docker files used to generate those images are 23 | available in the respository; 24 | [CP2K](https://github.com/molmod/psiflow/blob/main/Dockerfile.cp2k) or 25 | [GPAW](https://github.com/molmod/psiflow/blob/main/Dockerfile.gpaw). 26 | See the [configuration](configuration.md) section for more details. 27 | 28 | For each software package, psiflow provides a corresponding class which implements 29 | the appropriate input file manipulations, launch commands, and output parsing 30 | functionalities. 31 | They all inherit from the `Reference` base class, which provides a few key 32 | functionalities: 33 | 34 | - `data.evaluate(reference)`: this is the most common operation involving QM calculations; 35 | given a `Dataset` of atomic geometries, compute the energy and its gradients and insert 36 | them into the dataset such that they are saved for future reuse. 37 | - `reference.compute_atomic_energy`: provides the ability to compute isolated atom 38 | reference energies, as this facilitates ML potential training to datasets with varying 39 | number of atoms. 40 | - `reference.compute(data)`: this is somewhat equivalent to the hamiltonian `compute` 41 | method, except that its argument `data` must be a `Dataset` instance, and the optional 42 | `batch_size` defaults to 1 (in order to maximize parallelization). It does not insert 43 | the computed properties into the data, but returns them as numpy arrays. 44 | 45 | From a distance, QM reference objects look almost identical to hamiltonians, in the sense 46 | that they both take atomic geometries as input and return energies and gradients as 47 | output. The (imposed) distinction between both can be summarized in the following points. 48 | 49 | - hamiltonians can compute energies and forces for pretty much *any* structure. There is 50 | no reason they would fail. QM calculations on the other hand can fail due to unconverged 51 | SCF cycles and/or time limit constraints. In fact, this happens relatively often when performing 52 | active learning workflows. Reference objects take this into account by returning a unique 53 | `NullState` whenever a calculation has failed. 54 | - hamiltonians are orders of magnitude faster, and can be employed in meaningfully long 55 | molecular dynamics simulations. This is not the case for QM calculations. As such, they 56 | cannot be used in combination with walker sampling or geometry optimizations. If the 57 | purpose is to perform molecular simulation at the DFT level, then the better approach is 58 | to train an ML potential to any desired level of accuracy (almost always possible in 59 | psiflow) and use that as proxy for the QM interaction energy. 60 | For the same reason, the default batch size for `reference.compute` calls is 1, i.e. 61 | each the QM calculation for each structure in the dataset is immediately scheduled 62 | independently from the other ones. 63 | With hamiltonians, that batch size defaults to 100 (split data in chunks of 100 and 64 | evaluate each set of 100 states serially). 65 | 66 | 67 | ## CP2K 2024.1 68 | A `CP2K` reference instance can be created based on a (multiline) input string. 69 | Only the `FORCE_EVAL` section of the input is important since the atomic coordinates and cell 70 | parameters are automatically inserted for every calculation. 71 | All basis set, pseudopotential, and D3 parameters from the official 72 | [CP2K repository](https://github.com/cp2k/cp2k) are directly available in the 73 | container image (i.e. no need to download or provide these files separately). 74 | Choose which one you would like to use by using the corresponding filename in the input 75 | file (i.e. omit any preceding filepaths). 76 | A typical [input file](https://github.com/molmod/psiflow/blob/main/examples/data/cp2k_input.txt) 77 | is provided in the [examples](https://github.com/molmod/psiflow/tree/main/examples). 78 | 79 | ```py 80 | from psiflow.reference import CP2K 81 | 82 | 83 | # create reference instance 84 | with open('cp2k_input.txt', 'r') as f: 85 | force_eval_input_str = f.read() 86 | cp2k = CP2K(force_eval_input_str) 87 | 88 | # compute energy and forces, and store them in the geometries 89 | evaluated_data = data.evaluate(cp2k) 90 | 91 | for geometry in evaluated_data.geometries().result(): 92 | print('energy: {} eV'.format(geometry.energy)) 93 | print('forces: {} eV/A'.format(geometry.per_atom.forces)) 94 | 95 | ``` 96 | 97 | ## GPAW 24.1 98 | a `GPAW` reference is created in much the same way as a traditional `GPAW` 'calculator' 99 | instance, with support for entirely the same keyword arguments: 100 | ```py 101 | from psiflow.reference import GPAW 102 | 103 | gpaw = GPAW(mode='fd', nbands=0, xc='PBE') # see GPAW calculator on gitlab for full list 104 | energies = gpaw.compute(data, 'energy') 105 | 106 | ``` 107 | A notable feature from GPAW is that it already outputs all energies as formation energies, 108 | i.e. it internally subtracts the sum of the energies of the isolated atoms. As such, the 109 | `compute_atomic_energy` for a GPAW reference always just returns 0 eV. 110 | 111 | ## ORCA 112 | TODO 113 | -------------------------------------------------------------------------------- /docs/wandb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/docs/wandb.png -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | - [Replica exchange molecular dynamics](https://github.com/molmod/psiflow/tree/main/examples/alanine_replica_exchange.py) | **alanine dipeptide**: replica exchange molecular dynamics simulation of alanine dipeptide, using the MACE-MP0 universal potential. 2 | The inclusion of high-temperature replicas allows for fast conformational transitions and improves ergodicity. 3 | - [Geometry optimizations](https://github.com/molmod/psiflow/tree/main/examples/formic_acid_transition.py) | **formic acid dimer**: approximate transition state calculation for the proton exchange reaction in a formic acid dimer, 4 | using simple bias potentials and a few geometry optimizations. 5 | - [Static and dynamic frequency analysis](https://github.com/molmod/psiflow/tree/main/examples/h2_static_dynamic.py) | **dihydrogen**: Hessian-based estimate of the H-H bond strength and corresponding IR absorption frequency, and a comparison with a dynamical estimate from NVE simulation and Fourier analysis. 6 | 7 | - [Bulk modulus calculation](https://github.com/molmod/psiflow/tree/main/examples/iron_bulk_modulus.py) | **iron**: estimate of the bulk modulus of fcc iron using a series of NPT simulations at different pressures 8 | 9 | - [Solid-state phase stabilities](https://github.com/molmod/psiflow/tree/main/examples/iron_harmonic_fcc_bcc.py) | **iron**: estimating the relative stability of fcc and bcc iron with anharmonic corrections using thermodynamic integration (see e.g. [Phys Rev B., 2018](https://journals.aps.org/prb/abstract/10.1103/PhysRevB.97.054102)) 10 | 11 | - [ML potentials from scratch](https://github.com/molmod/psiflow/tree/main/examples/online_learning_pimd.py) | **water**: develop an ML potential for water based on a single geometry as input, using a combination of passive and active learning. 12 | 13 | - [Replica exchange umbrella sampling](https://github.com/molmod/psiflow/tree/main/examples/proton_jump_plumed.py) | 14 | **vinyl alcohol**: explore a reactive transition path with metadynamics, and use the 15 | resulting data to perform umbrella sampling with replica exchange between umbrellas. 16 | 17 | - [DFT singlepoints](https://github.com/molmod/psiflow/tree/main/examples/water_cp2k_noise.py) | **water**: analysis of the numerical noise DFT energy and force evaluations using CP2K and the RPBE(D3) functional, for a collection of water molecules. 18 | 19 | - [Path-integral molecular dynamics](https://github.com/molmod/psiflow/tree/main/examples/water_path_integral_md.py) | **water**: demonstration of the impact of nuclear quantum effects on the variance in O-H distance in liquid water. Path-integral molecular dynamics simulations with increasing number of beads (1, 2, 4, 8, 16) approximate the proton delocalization, and lead to systematically larger variance in O-H distance. 20 | 21 | - [ML potential training](https://github.com/molmod/psiflow/tree/main/examples/water_train_validate.py) | **water**: simple training and validation script for MACE on a small dataset of water configurations. 22 | -------------------------------------------------------------------------------- /examples/alanine_replica_exchange.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import psiflow 4 | from psiflow.geometry import Geometry 5 | from psiflow.hamiltonians import MACEHamiltonian 6 | from psiflow.sampling import Walker, replica_exchange, sample 7 | 8 | 9 | def compute_dihedrals(positions): 10 | indices_phi = np.array([4, 6, 8, 14], dtype=int) 11 | indices_psi = np.array([6, 8, 14, 16], dtype=int) 12 | 13 | dihedrals = [] 14 | for indices in [indices_phi, indices_psi]: 15 | p1 = positions[:, indices[0], :] 16 | p2 = positions[:, indices[1], :] 17 | p3 = positions[:, indices[2], :] 18 | p4 = positions[:, indices[3], :] 19 | 20 | # Calculate vectors between the points 21 | v1 = p2 - p1 22 | v2 = p3 - p2 23 | v3 = p4 - p3 24 | 25 | # Normal vectors of the planes formed by the atoms 26 | n1 = np.cross(v1, v2) 27 | n2 = np.cross(v2, v3) 28 | 29 | # Normalize the normal vectors 30 | n1_norm = np.linalg.norm(n1, axis=1, keepdims=True) 31 | n2_norm = np.linalg.norm(n2, axis=1, keepdims=True) 32 | n1 = n1 / n1_norm 33 | n2 = n2 / n2_norm 34 | 35 | dot_product = np.einsum("ij,ij->i", n1, n2) 36 | dot_product = np.clip(dot_product, -1.0, 1.0) 37 | dihedrals.append(np.arccos(dot_product)) 38 | return dihedrals[0], dihedrals[1] # phi, psi 39 | 40 | 41 | def main(): 42 | c7eq = np.array([2.8, 2.9]) # noqa: F841 43 | c7ax = np.array([1.2, -0.9]) # noqa: F841 44 | alanine = Geometry.from_string( # starts in c7ax config 45 | """ 46 | 22 47 | Properties=species:S:1:pos:R:3 pbc="F F F" 48 | H 12.16254811 17.00740464 -2.89412387 49 | C 12.83019906 16.90038734 -2.04015291 50 | H 12.24899130 16.91941920 -1.11925017 51 | H 13.51243976 17.75054269 -2.01566384 52 | C 13.65038992 15.63877411 -2.06030255 53 | O 14.36738511 15.33906728 -1.11622456 54 | N 13.53865222 14.88589532 -3.17304444 55 | H 12.86898792 15.18433500 -3.85740375 56 | C 14.28974353 13.67606132 -3.48863158 57 | H 14.01914560 13.42643243 -4.51320992 58 | C 15.79729109 13.88220294 -3.42319959 59 | H 16.12104919 14.14072623 -2.41784410 60 | H 16.29775468 12.96420765 -3.73059171 61 | H 16.09643748 14.68243453 -4.10096574 62 | C 13.86282687 12.43546588 -2.69127862 63 | O 13.58257313 11.40703144 -3.28015921 64 | N 13.87365846 12.57688288 -1.35546630 65 | H 14.15017274 13.47981654 -0.98516877 66 | C 13.53768820 11.50108113 -0.46287859 67 | H 14.38392004 11.24258036 0.17699860 68 | H 12.69022125 11.76658121 0.17241519 69 | H 13.27142638 10.63298597 -1.06170510 70 | """ 71 | ) 72 | mace = MACEHamiltonian.mace_mp0() 73 | 74 | walkers = [] 75 | for temperature in [150, 200, 250, 300, 400, 500, 600, 700, 800, 900, 1000, 1100]: 76 | walker = Walker( 77 | alanine, 78 | mace, 79 | temperature=temperature, 80 | ) 81 | walkers.append(walker) 82 | replica_exchange(walkers, trial_frequency=50) 83 | 84 | outputs = sample(walkers, steps=20000, step=200) 85 | phi, psi = compute_dihedrals(outputs[0].trajectory.get("positions").result()) 86 | for f, s in zip(phi, psi): # some c7eq conformations should appear here 87 | print("{:5.3f} {:5.3f}".format(f, s)) 88 | 89 | 90 | if __name__ == "__main__": 91 | with psiflow.load(): 92 | main() 93 | -------------------------------------------------------------------------------- /examples/data/acetaldehyde.xyz: -------------------------------------------------------------------------------- 1 | 7 2 | Properties=species:S:1:pos:R:3 3 | O 0.694151672 0.776743934 -0.455455855 4 | C 0.195993254 -0.270095005 -0.307053207 5 | C -0.846060202 -0.538006022 0.669585079 6 | H 0.515801613 -1.097661033 -0.987914453 7 | H -0.589257101 -0.505600908 1.733123281 8 | H -1.553309062 0.309375207 0.558315778 9 | H -1.411674563 -1.440354174 0.5617281699 10 | -------------------------------------------------------------------------------- /examples/data/ani500k_cc_cpu.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/examples/data/ani500k_cc_cpu.model -------------------------------------------------------------------------------- /examples/data/cp2k_input.txt: -------------------------------------------------------------------------------- 1 | &FORCE_EVAL 2 | STRESS_TENSOR ANALYTICAL 3 | METHOD QS 4 | &DFT 5 | BASIS_SET_FILE_NAME BASIS_MOLOPT_UZH 6 | POTENTIAL_FILE_NAME POTENTIAL_UZH 7 | CHARGE 0 8 | &QS 9 | METHOD GPW 10 | EPS_DEFAULT 1.0E-12 11 | &END QS 12 | &XC 13 | &XC_FUNCTIONAL 14 | &GGA_X_RPBE 15 | &END GGA_X_RPBE 16 | &GGA_C_PBE 17 | &END GGA_C_PBE 18 | &END XC_FUNCTIONAL 19 | !&XC_FUNCTIONAL PBE 20 | !&END XC_FUNCTIONAL 21 | !&VDW_POTENTIAL 22 | ! POTENTIAL_TYPE PAIR_POTENTIAL 23 | ! &PAIR_POTENTIAL 24 | ! TYPE DFTD3(BJ) 25 | ! REFERENCE_FUNCTIONAL PBE 26 | ! PARAMETER_FILE_NAME dftd3.dat 27 | ! &END PAIR_POTENTIAL 28 | !&END VDW_POTENTIAL 29 | &END XC 30 | &SCF 31 | EPS_SCF 1.0E-8 32 | SCF_GUESS RESTART 33 | MAX_SCF 20 34 | &OT 35 | MINIMIZER DIIS 36 | PRECONDITIONER FULL_ALL 37 | &END OT 38 | &OUTER_SCF 39 | EPS_SCF 1.0E-8 40 | MAX_SCF 5 41 | &END OUTER_SCF 42 | &END SCF 43 | &MGRID 44 | CUTOFF 1000 45 | REL_CUTOFF 60 46 | &END MGRID 47 | &END DFT 48 | &SUBSYS 49 | &KIND O 50 | BASIS_SET TZVP-MOLOPT-PBE-GTH-q6 51 | POTENTIAL GTH-PBE-q6 52 | &END KIND 53 | &KIND H 54 | BASIS_SET TZVP-MOLOPT-PBE-GTH-q1 55 | POTENTIAL GTH-PBE-q1 56 | &END KIND 57 | &END SUBSYS 58 | &END FORCE_EVAL 59 | -------------------------------------------------------------------------------- /examples/data/h2o_32.xyz: -------------------------------------------------------------------------------- 1 | 96 2 | Lattice="9.8528 0 0 0 9.8528 0 0 0 9.8528" 3 | O 2.280398 9.146539 5.088696 4 | O 1.251703 2.406261 7.769908 5 | O 1.596302 6.920128 0.656695 6 | O 2.957518 3.771868 1.877387 7 | O 0.228972 5.884026 6.532308 8 | O 9.023431 6.119654 0.092451 9 | O 7.256289 8.493641 5.772041 10 | O 5.090422 9.467016 0.743177 11 | O 6.330888 7.363471 3.747750 12 | O 7.763819 8.349367 9.279457 13 | O 8.280798 3.837153 5.799282 14 | O 8.878250 2.025797 1.664102 15 | O 9.160372 0.285100 6.871004 16 | O 4.962043 4.134437 0.173376 17 | O 2.802896 8.690383 2.435952 18 | O 9.123223 3.549232 8.876721 19 | O 1.453702 1.402538 2.358278 20 | O 6.536550 1.146790 7.609732 21 | O 2.766709 0.881503 9.544263 22 | O 0.856426 2.075964 5.010625 23 | O 6.386036 1.918950 0.242690 24 | O 2.733023 4.452756 5.850203 25 | O 4.600039 9.254314 6.575944 26 | O 3.665373 6.210561 3.158420 27 | O 3.371648 6.925594 7.476036 28 | O 5.287920 3.270653 6.155080 29 | O 5.225237 6.959594 9.582991 30 | O 0.846293 5.595877 3.820630 31 | O 9.785620 8.164617 3.657879 32 | O 8.509982 4.430362 2.679946 33 | O 1.337625 8.580920 8.272484 34 | O 8.054437 9.221335 1.991376 35 | H 1.762019 9.820429 5.528454 36 | H 3.095987 9.107088 5.588186 37 | H 0.554129 2.982634 8.082024 38 | H 1.771257 2.954779 7.182181 39 | H 2.112148 6.126321 0.798136 40 | H 1.776389 7.463264 1.424030 41 | H 3.754249 3.824017 1.349436 42 | H 3.010580 4.524142 2.466878 43 | H 0.939475 5.243834 6.571945 44 | H 0.515723 6.520548 5.877445 45 | H 9.852960 6.490366 0.393593 46 | H 8.556008 6.860063 -0.294256 47 | H 7.886607 7.941321 6.234506 48 | H 7.793855 9.141028 5.315813 49 | H 4.467366 9.971162 0.219851 50 | H 5.758685 10.102795 0.998994 51 | H 6.652693 7.917443 3.036562 52 | H 6.711966 7.743594 4.539279 53 | H 7.751955 8.745180 10.150905 54 | H 7.829208 9.092212 8.679343 55 | H 8.312540 3.218330 6.528858 56 | H 8.508855 4.680699 6.189990 57 | H 9.742249 1.704975 1.922581 58 | H 8.799060 2.876412 2.095861 59 | H 9.505360 1.161677 6.701213 60 | H 9.920117 -0.219794 7.161006 61 | H 4.749903 4.186003 -0.758595 62 | H 5.248010 5.018415 0.403676 63 | H 3.576065 9.078451 2.026264 64 | H 2.720238 9.146974 3.273164 65 | H 9.085561 4.493058 9.031660 66 | H 9.215391 3.166305 9.749133 67 | H 1.999705 2.060411 1.927796 68 | H 1.824184 0.564565 2.081195 69 | H 7.430334 0.849764 7.438978 70 | H 6.576029 1.537017 8.482885 71 | H 2.415851 1.576460 8.987338 72 | H 2.276957 0.099537 9.289499 73 | H 1.160987 1.818023 4.140602 74 | H 0.350256 2.874437 4.860741 75 | H 5.768804 2.638450 0.375264 76 | H 7.221823 2.257514 0.563730 77 | H 3.260797 5.243390 5.962382 78 | H 3.347848 3.732214 5.988196 79 | H 5.328688 9.073059 5.982269 80 | H 5.007063 9.672150 7.334875 81 | H 4.566850 6.413356 3.408312 82 | H 3.273115 7.061666 2.963521 83 | H 3.878372 7.435003 6.843607 84 | H 3.884673 6.966316 8.283117 85 | H 5.918240 3.116802 5.451335 86 | H 5.355924 2.495093 6.711958 87 | H 5.071858 7.687254 10.185667 88 | H 6.106394 7.112302 9.241707 89 | H 1.637363 5.184910 4.169264 90 | H 0.427645 4.908936 3.301903 91 | H 9.971698 7.227076 3.709104 92 | H 10.647901 8.579244 3.629806 93 | H 8.046808 5.126383 2.213838 94 | H 7.995317 4.290074 3.474723 95 | H 1.872601 7.864672 7.930401 96 | H 0.837635 8.186808 8.987268 97 | H 8.314696 10.115534 2.212519 98 | H 8.687134 8.667252 2.448452 99 | -------------------------------------------------------------------------------- /examples/data/vinyl_alcohol.xyz: -------------------------------------------------------------------------------- 1 | 7 2 | Properties=species:S:1:pos:R:3 3 | O 1.041371715 -0.216863172 0.001603252 4 | C -0.098316254 0.512294574 -0.01021628 5 | C -1.225162144 -0.248210652 0.020868361 6 | H -0.087363805 1.596485281 -0.07557041 7 | H 0.61765221 -1.094559605 -0.02702971 8 | H -2.216985293 0.211688229 -0.00469380 9 | H -1.115257687 -1.357478425 -0.04507284 10 | -------------------------------------------------------------------------------- /examples/h2_static_dynamic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ase.units import _c, second 3 | 4 | import psiflow 5 | from psiflow.free_energy import compute_frequencies, compute_harmonic 6 | from psiflow.geometry import Geometry 7 | from psiflow.hamiltonians import MACEHamiltonian 8 | from psiflow.sampling import Walker, optimize, sample 9 | 10 | 11 | def frequency_dynamic(start, hamiltonian): 12 | walker = Walker( 13 | start, 14 | hamiltonian=hamiltonian, 15 | temperature=None, # NVE! 16 | timestep=0.25, 17 | ) 18 | 19 | step = 10 20 | output = sample( 21 | [walker], 22 | steps=20, 23 | step=step, 24 | max_force=10, 25 | )[0] 26 | positions = output.trajectory.get("positions").result() 27 | distances = np.linalg.norm(positions[:, 0, :] - positions[:, 1, :], axis=1) 28 | distances -= np.mean(distances) # don't need average interatomic distance 29 | 30 | timestep = walker.timestep * 1e-15 * step 31 | spectrum = np.abs(np.fft.fft(distances)) 32 | 33 | freq_axis = np.fft.fftfreq(len(distances), timestep) 34 | index = np.argmax(spectrum[np.where(freq_axis > 0)]) 35 | peak_frequency = freq_axis[np.where(freq_axis > 0)][index] 36 | 37 | return peak_frequency / (100 * _c) 38 | 39 | 40 | def frequency_static(start, hamiltonian): 41 | minimum = optimize( 42 | start, 43 | hamiltonian, 44 | 2000, 45 | ftol=1e-4, 46 | ) 47 | hessian = compute_harmonic( 48 | minimum, 49 | hamiltonian, 50 | asr="crystal", 51 | pos_shift=0.001, 52 | ) 53 | frequencies = compute_frequencies(hessian, minimum).result() 54 | return frequencies[-1] * second / (100 * _c) 55 | 56 | 57 | def main(): 58 | geometry = Geometry.from_data( 59 | numbers=np.ones(2), 60 | positions=np.array([[0, 0, 0], [0.8, 0, 0]]), 61 | cell=None, 62 | ) 63 | mace = MACEHamiltonian.mace_mp0() 64 | 65 | dynamic = frequency_dynamic(geometry, mace) 66 | static = frequency_static(geometry, mace) 67 | 68 | print("H2 frequency (dynamic) [inv(cm)]: {}".format(dynamic)) 69 | print("H2 frequency (static) [inv(cm)]: {}".format(static)) 70 | 71 | 72 | if __name__ == "__main__": 73 | with psiflow.load(): 74 | main() 75 | -------------------------------------------------------------------------------- /examples/iron_bulk_modulus.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ase.build import bulk, make_supercell 3 | 4 | import psiflow 5 | from psiflow.geometry import Geometry 6 | from psiflow.hamiltonians import MACEHamiltonian 7 | from psiflow.sampling import Walker, sample 8 | 9 | 10 | def main(): 11 | iron = bulk("Fe", "bcc", a=2.8) 12 | geometry = Geometry.from_atoms(make_supercell(iron, 3 * np.eye(3))) 13 | mace = MACEHamiltonian.mace_mp0() 14 | 15 | pressures = (-10 + np.arange(5) * 5) * 1e3 # in MPa 16 | walkers = [Walker(geometry, mace, temperature=300, pressure=p) for p in pressures] 17 | 18 | name = "volume{angstrom3}" 19 | outputs = sample(walkers, steps=4000, step=50, observables=[name]) 20 | volumes = [np.mean(o[name].result()) for o in outputs] 21 | 22 | p = np.polyfit(volumes, pressures, deg=1) 23 | volume0 = (-1.0) * p[1] / p[0] 24 | bulk_modulus = (-1.0) * volume0 * p[0] / 1000 # in GPa 25 | print("bulk modulus [GPa]: {}".format(bulk_modulus)) 26 | 27 | 28 | if __name__ == "__main__": 29 | with psiflow.load(): 30 | main() 31 | -------------------------------------------------------------------------------- /examples/iron_harmonic_fcc_bcc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ase.build import bulk, make_supercell 3 | from ase.units import kB 4 | 5 | import psiflow 6 | from psiflow.data import Dataset 7 | from psiflow.free_energy import Integration, compute_harmonic, harmonic_free_energy 8 | from psiflow.geometry import Geometry 9 | from psiflow.hamiltonians import Harmonic, MACEHamiltonian 10 | from psiflow.sampling import optimize 11 | 12 | 13 | def main(): 14 | iron = bulk("Fe", "bcc", a=2.87, orthorhombic=True) 15 | bcc = Geometry.from_atoms(make_supercell(iron, 3 * np.eye(3))) 16 | iron = bulk("Fe", "fcc", a=3.57, orthorhombic=True) 17 | fcc = Geometry.from_atoms(make_supercell(iron, 3 * np.eye(3))) 18 | 19 | geometries = { 20 | "bcc": bcc, 21 | "fcc": fcc, 22 | } 23 | theoretical = {name: None for name in geometries} 24 | simulated = {name: None for name in geometries} 25 | 26 | mace = MACEHamiltonian.mace_mp0("small") 27 | scaling = 0.9 28 | temperature = 800 29 | beta = 1 / (kB * temperature) 30 | 31 | for name in geometries: 32 | minimum = optimize( 33 | geometries[name], mace, ftol=1e-4, steps=1000, mode="bfgstrm" 34 | ) 35 | hessian = compute_harmonic(minimum, mace, pos_shift=0.001) 36 | 37 | # simulate 38 | harmonic = Harmonic(minimum, hessian) 39 | integration = Integration( 40 | harmonic, 41 | temperatures=[temperature], 42 | delta_hamiltonian=(scaling - 1) * harmonic, 43 | delta_coefficients=np.linspace(0, 1, num=4, endpoint=True), 44 | ) 45 | walkers = integration.create_walkers( # noqa: F841 46 | Dataset([harmonic.reference_geometry]), 47 | timestep=3, 48 | ) # heavy atoms 49 | integration.sample(steps=500, step=10, start=300) 50 | integration.compute_gradients() 51 | 52 | reduced_f = integration.along_delta(temperature=temperature).result() 53 | f_harmonic = harmonic_free_energy( 54 | hessian, 55 | temperature=temperature, 56 | quantum=False, 57 | ) 58 | simulated[name] = (f_harmonic.result() + reduced_f[-1]) / beta 59 | 60 | # theoretical 61 | f_harmonic_scaled = harmonic_free_energy( 62 | scaling * hessian.result(), 63 | temperature=temperature, 64 | quantum=False, 65 | ) 66 | theoretical[name] = f_harmonic_scaled.result() / beta 67 | 68 | ddF = theoretical["bcc"] - theoretical["fcc"] 69 | print("theoretical delta(delta(F)) [eV]: {}".format(ddF)) 70 | 71 | ddF = simulated["bcc"] - simulated["fcc"] 72 | print(" simulated delta(delta(F)) [eV]: {}".format(ddF)) 73 | 74 | 75 | if __name__ == "__main__": 76 | with psiflow.load(): 77 | main() 78 | -------------------------------------------------------------------------------- /examples/online_learning_pimd.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import psiflow 4 | from psiflow.reference import CP2K 5 | from psiflow.data import Dataset 6 | from psiflow.sampling import Walker 7 | from psiflow.models import MACE 8 | from psiflow.hamiltonians import MACEHamiltonian 9 | from psiflow.learning import Learning 10 | 11 | 12 | def main(): 13 | path_output = Path.cwd() / 'output' 14 | 15 | with open('data/cp2k_input.txt', 'r') as f: cp2k_input = f.read() 16 | cp2k = CP2K(cp2k_input) 17 | 18 | model = MACE( 19 | batch_size=4, 20 | lr=0.02, 21 | max_ell=3, 22 | r_max=6.5, 23 | energy_weight=100, 24 | correlation=3, 25 | max_L=1, 26 | num_channels=24, 27 | patience=8, 28 | scheduler_patience=4, 29 | max_num_epochs=200, 30 | ) 31 | model.add_atomic_energy('H', cp2k.compute_atomic_energy('H', box_size=9)) 32 | model.add_atomic_energy('O', cp2k.compute_atomic_energy('O', box_size=9)) 33 | 34 | state = Dataset.load('data/water_train.xyz')[0] 35 | walkers = ( 36 | Walker(state, temperature=300, pressure=0.1).multiply(40) + 37 | Walker(state, temperature=450, pressure=0.1).multiply(40) + 38 | Walker(state, temperature=600, pressure=0.1).multiply(40) 39 | ) 40 | learning = Learning( 41 | cp2k, 42 | path_output, 43 | wandb_project='psiflow_examples', 44 | wandb_group='water_learning_pimd', 45 | ) 46 | 47 | model, walkers = learning.passive_learning( 48 | model, 49 | walkers, 50 | hamiltonian=MACEHamiltonian.mace_mp0(), 51 | steps=10000, 52 | step=2000, 53 | ) 54 | 55 | for i in range(3): 56 | model, walkers = learning.active_learning( 57 | model, 58 | walkers, 59 | steps=2000, 60 | ) 61 | 62 | # PIMD phase for low-temperature walkers 63 | for j, walker in enumerate(walkers[:40]): 64 | walker.nbeads = 32 65 | model, walkers = learning.active_learning( 66 | model, 67 | walkers, 68 | steps=500, 69 | ) 70 | 71 | 72 | if __name__ == '__main__': 73 | with psiflow.load(): 74 | main() 75 | -------------------------------------------------------------------------------- /examples/proton_jump_plumed.py: -------------------------------------------------------------------------------- 1 | from ase.units import kJ, mol 2 | import numpy as np 3 | 4 | import psiflow 5 | from psiflow.data import Dataset 6 | from psiflow.geometry import Geometry 7 | from psiflow.hamiltonians import PlumedHamiltonian, MACEHamiltonian 8 | from psiflow.sampling import Walker, sample, quench, Metadynamics, replica_exchange 9 | 10 | 11 | PLUMED_INPUT = """UNITS LENGTH=A ENERGY=kj/mol 12 | d_C: DISTANCE ATOMS=3,5 13 | d_O: DISTANCE ATOMS=1,5 14 | CV: COMBINE ARG=d_C,d_O COEFFICIENTS=1,-1 PERIODIC=NO 15 | 16 | """ 17 | 18 | 19 | def get_bias(kappa: float, center: float): 20 | plumed_str = PLUMED_INPUT 21 | plumed_str += '\n' 22 | plumed_str += 'RESTRAINT ARG=CV KAPPA={} AT={}\n'.format(kappa, center) 23 | return PlumedHamiltonian(plumed_str) 24 | 25 | 26 | def main(): 27 | aldehyd = Geometry.load('data/acetaldehyde.xyz') 28 | alcohol = Geometry.load('data/vinyl_alcohol.xyz') 29 | 30 | mace = MACEHamiltonian.mace_cc() 31 | energy = mace.compute([aldehyd, alcohol], 'energy').result() 32 | energy = (energy - np.min(energy)) / (kJ / mol) 33 | print('E_vinyl - E_aldehyde = {:7.3f} kJ/mol'.format(energy[1] - energy[0])) 34 | 35 | # generate initial structures using metadynamics 36 | plumed_str = PLUMED_INPUT 37 | plumed_str += 'METAD ARG=CV PACE=10 SIGMA=0.1 HEIGHT=5\n' 38 | metadynamics = Metadynamics(plumed_str) 39 | 40 | # create 40 identical walkers 41 | walker = Walker( 42 | alcohol, 43 | hamiltonian=mace, 44 | temperature=300, 45 | metadynamics=metadynamics, 46 | ) 47 | 48 | # do MTD and create large dataset from all trajectories 49 | outputs = sample([walker], steps=8000, step=50) 50 | data_mtd = sum([o.trajectory for o in outputs], start=Dataset([])) 51 | data_mtd.save('mtd.xyz') 52 | 53 | # initialize walkers for umbrella sampling 54 | walkers = [] 55 | for i, center in enumerate(np.linspace(1, 3, num=16)): 56 | bias = get_bias(kappa=1500, center=center) 57 | hamiltonian = mace + bias 58 | walker = Walker(alcohol, hamiltonian=hamiltonian, temperature=300) 59 | walkers.append(walker) 60 | quench(walkers, data_mtd) # make sure initial structure is reasonable 61 | replica_exchange(walkers, trial_frequency=100) # use REX for improved sampling 62 | 63 | outputs = sample(walkers, steps=1000, step=10) 64 | 65 | 66 | if __name__ == '__main__': 67 | with psiflow.load() as f: 68 | main() 69 | -------------------------------------------------------------------------------- /examples/submit/hortense.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | parsl_log_level: DEBUG 3 | container_engine: 'apptainer' 4 | container_uri: 'oras://ghcr.io/molmod/psiflow:main_cu118' 5 | default_threads: 8 6 | ModelEvaluation: 7 | cores_per_worker: 12 8 | gpu: True 9 | max_simulation_time: 20 10 | env_vars: 11 | KMP_BLOCKTIME: "1" 12 | slurm: 13 | partition: "gpu_rome_a100" 14 | account: "2023_070" 15 | nodes_per_block: 1 16 | cores_per_node: 48 17 | max_blocks: 1 18 | walltime: "12:00:00" 19 | scheduler_options: "#SBATCH --clusters=dodrio\n#SBATCH --gpus=4\n" 20 | ModelTraining: 21 | cores_per_worker: 12 22 | gpu: true 23 | max_training_time: 40 24 | env_vars: 25 | OMP_PROC_BIND: "spread" 26 | slurm: 27 | partition: "gpu_rome_a100" 28 | account: "2023_070" 29 | nodes_per_block: 1 30 | cores_per_node: 12 31 | max_blocks: 1 32 | walltime: "12:00:00" 33 | scheduler_options: "#SBATCH --clusters=dodrio\n#SBATCH --gpus=1\n" 34 | CP2K: 35 | cores_per_worker: 64 36 | max_evaluation_time: 30 37 | launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -np 32 -bind-to core cp2k.psmp' 38 | slurm: 39 | partition: "cpu_rome" 40 | account: "2024_079" 41 | nodes_per_block: 1 42 | cores_per_node: 64 43 | max_blocks: 2 44 | walltime: "06:00:00" 45 | scheduler_options: "#SBATCH --clusters=dodrio\n" 46 | ... 47 | -------------------------------------------------------------------------------- /examples/submit/lumi.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | parsl_log_level: DEBUG 3 | container_engine: 'singularity' 4 | container_uri: 'oras://ghcr.io/molmod/psiflow:main_rocm5.6' 5 | default_threads: 8 6 | CP2K: 7 | cores_per_worker: 32 8 | max_evaluation_time: 20 9 | launch_command: 'singularity exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -np 32 cp2k.psmp' 10 | slurm: 11 | partition: "standard" 12 | account: "project_465001125" 13 | nodes_per_block: 1 14 | cores_per_node: 128 15 | max_blocks: 10 16 | walltime: "01:00:00" 17 | ModelEvaluation: 18 | cores_per_worker: 7 19 | gpu: True 20 | slurm: 21 | partition: "standard-g" 22 | account: "project_465001125" 23 | nodes_per_block: 1 24 | cores_per_node: 56 25 | max_blocks: 5 26 | walltime: "01:00:00" 27 | scheduler_options: "#SBATCH --gres=gpu:8\n" 28 | worker_init: "ml LUMI/23.09 && ml partition/G && ml rocm/5.6\n" 29 | ModelTraining: 30 | cores_per_worker: 7 31 | gpu: true 32 | multigpu: true 33 | slurm: 34 | partition: "standard-g" 35 | account: "project_465001125" 36 | nodes_per_block: 1 37 | cores_per_node: 56 38 | walltime: "01:00:00" 39 | scheduler_options: "#SBATCH --gres=gpu:8\n" 40 | worker_init: "ml LUMI/23.09 && ml partition/G && ml rocm/5.6\n" 41 | ... 42 | -------------------------------------------------------------------------------- /examples/submit/submit_hortense.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # List of filenames 4 | files=( 5 | "h2_static_dynamic.py" 6 | "iron_bulk_modulus.py" 7 | "iron_harmonic_fcc_bcc.py" 8 | "water_cp2k_noise.py" 9 | "water_path_integral_md.py" 10 | "water_train_validate.py" 11 | "alanine_replica_exchange.py" 12 | ) 13 | 14 | curl -O https://raw.githubusercontent.com/molmod/psiflow/main/examples/hortense.yaml 15 | 16 | run_dir=$(pwd)/run_examples 17 | mkdir $run_dir && cp hortense.yaml $run_dir && cd $run_dir 18 | 19 | # Loop over each filename 20 | for filename in "${files[@]}" 21 | do 22 | name="${filename%.*}" 23 | mkdir $name 24 | cp hortense.yaml $name 25 | 26 | cat > $name/job.sh < $name/job.sh < std(O-H) = {} A".format(nbeads, std)) 46 | 47 | 48 | if __name__ == "__main__": 49 | with psiflow.load(): 50 | main() 51 | -------------------------------------------------------------------------------- /examples/water_train_validate.py: -------------------------------------------------------------------------------- 1 | import psiflow 2 | from psiflow.data import Dataset, compute_rmse 3 | from psiflow.models import MACE 4 | 5 | 6 | def main(): 7 | data = Dataset.load("data/water_train.xyz") 8 | model = MACE( 9 | batch_size=2, 10 | lr=0.02, 11 | max_ell=3, 12 | r_max=5.5, 13 | energy_weight=100, 14 | correlation=3, 15 | max_L=1, 16 | num_channels=16, 17 | max_num_epochs=20, 18 | swa=False, 19 | ) 20 | 21 | train, valid = data.split(0.9, shuffle=True) 22 | model.initialize(train) 23 | model.train(train, valid) 24 | hamiltonian = model.create_hamiltonian() 25 | 26 | target_e = data.get("per_atom_energy") 27 | target_f = data.get("forces") 28 | 29 | data_predicted = data.evaluate(hamiltonian) 30 | predict_e = data_predicted.get("per_atom_energy") 31 | predict_f = data_predicted.get("forces") 32 | 33 | e_rmse = compute_rmse(target_e, predict_e) 34 | f_rmse = compute_rmse(target_f, predict_f) 35 | 36 | print("RMSE(energy) [meV/atom]: {}".format(e_rmse.result() * 1000)) 37 | print("RMSE(forces) [meV/angstrom]: {}".format(f_rmse.result() * 1000)) 38 | 39 | 40 | if __name__ == "__main__": 41 | with psiflow.load(): 42 | main() 43 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: psiflow 2 | theme: 3 | favicon: icon.svg 4 | name: 5 | material 6 | #font: 7 | # text: overpass 8 | palette: 9 | primary: teal 10 | accent: yellow 11 | scheme: default 12 | logo: icon.svg 13 | features: 14 | - content.code.copy 15 | - navigation.instant 16 | - navigation.tracking 17 | #- navigation.tabs 18 | #- navigation.tabs.sticky 19 | - navigation.indexes 20 | - navigation.sections 21 | - navigation.expand 22 | - toc.integrate 23 | - toc.follow 24 | nav: 25 | - overview: index.md 26 | - atomic geometries: data.md 27 | - hamiltonians: hamiltonian.md 28 | - sampling: sampling.md 29 | - QM calculations: reference.md 30 | - ML potentials: models.md 31 | - online learning: learning.md 32 | - free energy calculations: free_energy.md 33 | - setup & configuration: configuration.md 34 | 35 | plugins: 36 | - mkdocstrings: 37 | python: 38 | docstring_style: google 39 | repo_url: https://github.com/molmod/psiflow 40 | markdown_extensions: 41 | - tables 42 | - md_in_html 43 | - admonition 44 | - footnotes 45 | - pymdownx.highlight: 46 | anchor_linenums: true 47 | - pymdownx.inlinehilite 48 | - pymdownx.snippets 49 | - pymdownx.superfences 50 | - pymdownx.details 51 | - pymdownx.critic 52 | - pymdownx.caret 53 | - pymdownx.keys 54 | - pymdownx.mark 55 | - pymdownx.tilde 56 | - pymdownx.arithmatex: 57 | generic: true 58 | - attr_list 59 | - pymdownx.emoji: 60 | emoji_index: !!python/name:materialx.emoji.twemoji 61 | emoji_generator: !!python/name:materialx.emoji.to_svg 62 | 63 | #extra_javascript: 64 | # - javascripts/mathjax.js 65 | # - https://polyfill.io/v3/polyfill.min.js?features=es6 66 | # - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js 67 | # 68 | extra_javascript: 69 | - javascripts/mathjax.js 70 | - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js 71 | -------------------------------------------------------------------------------- /psiflow/__init__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import typeguard 4 | 5 | from .config import setup_slurm_config # noqa: F401 6 | from .execution import ExecutionContextLoader 7 | from .serialization import ( # noqa: F401 8 | _DataFuture, 9 | deserialize, 10 | serializable, 11 | serialize, 12 | ) 13 | 14 | 15 | @typeguard.typechecked 16 | def resolve_and_check(path: Path) -> Path: 17 | path = path.resolve() 18 | if Path.cwd() in path.parents: 19 | pass 20 | elif path.exists() and Path.cwd().samefile(path): 21 | pass 22 | else: 23 | raise ValueError( 24 | "requested file and/or path at location: {}" 25 | "\nwhich is not in the present working directory: {}" 26 | "\npsiflow can only load and/or save in its present " 27 | "working directory because this is the only directory" 28 | " that will get bound into the container.".format(path, Path.cwd()) 29 | ) 30 | return path 31 | 32 | 33 | load = ExecutionContextLoader.load 34 | context = ExecutionContextLoader.context 35 | wait = ExecutionContextLoader.wait 36 | -------------------------------------------------------------------------------- /psiflow/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import Computable, Dataset, aggregate_multiple, compute # noqa: F401 2 | from .utils import compute_mae, compute_rmse # noqa: F401 3 | -------------------------------------------------------------------------------- /psiflow/free_energy/__init__.py: -------------------------------------------------------------------------------- 1 | from .integration import Integration # noqa: F401 2 | from .phonons import ( # noqa: F401 3 | compute_frequencies, 4 | compute_harmonic, 5 | harmonic_free_energy, 6 | ) 7 | -------------------------------------------------------------------------------- /psiflow/free_energy/integration.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations # necessary for type-guarding class methods 2 | 3 | from typing import Optional, Union 4 | 5 | import numpy as np 6 | import typeguard 7 | from ase.units import bar, kB 8 | from parsl.app.app import python_app 9 | 10 | from psiflow.data import Dataset 11 | from psiflow.hamiltonians import Hamiltonian, Zero 12 | from psiflow.sampling import SimulationOutput, Walker, quench, randomize, sample 13 | from psiflow.utils.apps import compute_sum, multiply 14 | 15 | length = python_app(len, executors=["default_threads"]) 16 | take_mean = python_app(np.mean, executors=["default_threads"]) 17 | 18 | 19 | @typeguard.typechecked 20 | def _integrate(x: np.ndarray, *args: float) -> np.ndarray: 21 | import scipy.integrate 22 | 23 | assert len(args) == len(x) 24 | y = np.array(args, dtype=float) 25 | return scipy.integrate.cumulative_trapezoid(y, x=x, initial=0.0) 26 | 27 | 28 | integrate = python_app(_integrate, executors=["default_threads"]) 29 | 30 | 31 | @typeguard.typechecked 32 | class ThermodynamicState: 33 | temperature: float 34 | natoms: int 35 | delta_hamiltonian: Optional[Hamiltonian] 36 | pressure: Optional[float] 37 | mass: Optional[float] 38 | 39 | def __init__( 40 | self, 41 | temperature: float, 42 | natoms: int, 43 | delta_hamiltonian: Optional[Hamiltonian], 44 | pressure: Optional[float], 45 | mass: Optional[float], 46 | ): 47 | self.temperature = temperature 48 | self.natoms = natoms 49 | self.delta_hamiltonian = delta_hamiltonian 50 | self.pressure = pressure 51 | self.mass = mass 52 | 53 | self.gradients = { 54 | "temperature": None, 55 | "delta": None, 56 | "pressure": None, 57 | "mass": None, 58 | } 59 | 60 | def gradient( 61 | self, 62 | output: SimulationOutput, 63 | hamiltonian: Optional[Hamiltonian] = None, 64 | ): 65 | self.temperature_gradient(output, hamiltonian) 66 | self.delta_gradient(output) 67 | if self.mass is not None: 68 | self.mass_gradient(output) 69 | 70 | def temperature_gradient( 71 | self, 72 | output: SimulationOutput, 73 | hamiltonian: Optional[Hamiltonian] = None, 74 | ): 75 | energies = output.get_energy(hamiltonian) 76 | _energy = take_mean(energies) 77 | if self.pressure is not None: # use enthalpy 78 | volumes = output["volume{angstrom3}"] 79 | pv = multiply(take_mean(volumes), 10 * bar * self.pressure) 80 | _energy = compute_sum(_energy, pv) 81 | 82 | # grad_u = < - u / kBT**2 > 83 | # grad_k = < - E_kin > / kBT**2 > 84 | gradient_u = multiply( 85 | _energy, 86 | (-1.0) / (kB * self.temperature**2), 87 | ) 88 | gradient_k = (-1.0) * (3 * self.natoms - 3) / (2 * self.temperature) 89 | self.gradients["temperature"] = compute_sum(gradient_u, gradient_k) 90 | 91 | def delta_gradient(self, output: SimulationOutput): 92 | energies = output.get_energy(self.delta_hamiltonian) 93 | self.gradients["delta"] = multiply( 94 | take_mean(energies), 95 | 1 / (kB * self.temperature), 96 | ) 97 | 98 | def mass_gradient(output): 99 | raise NotImplementedError 100 | 101 | 102 | @typeguard.typechecked 103 | class Integration: 104 | def __init__( 105 | self, 106 | hamiltonian: Hamiltonian, 107 | temperatures: Union[list[float], np.ndarray], 108 | delta_hamiltonian: Optional[Hamiltonian] = None, 109 | delta_coefficients: Union[list[float], np.ndarray, None] = None, 110 | pressure: Optional[float] = None, 111 | ): 112 | self.hamiltonian = hamiltonian 113 | self.temperatures = np.array(temperatures, dtype=float) 114 | if delta_hamiltonian is not None: 115 | assert delta_coefficients is not None 116 | self.delta_hamiltonian = delta_hamiltonian 117 | self.delta_coefficients = np.array(delta_coefficients, dtype=float) 118 | else: 119 | self.delta_coefficients = np.array([0.0]) 120 | self.delta_hamiltonian = Zero() 121 | self.pressure = pressure 122 | 123 | assert len(np.unique(self.temperatures)) == len(self.temperatures) 124 | assert len(np.unique(self.delta_coefficients)) == len(self.delta_coefficients) 125 | 126 | self.states = [] 127 | self.walkers = [] 128 | self.outputs = [] 129 | 130 | def create_walkers( 131 | self, 132 | dataset: Dataset, 133 | initialize_by: str = "quench", 134 | **walker_kwargs, 135 | ) -> list[Walker]: 136 | natoms = len(dataset[0].result()) 137 | for delta in self.delta_coefficients: 138 | for T in self.temperatures: 139 | hamiltonian = self.hamiltonian + delta * self.delta_hamiltonian 140 | walker = Walker( 141 | dataset[0], # do quench later 142 | hamiltonian, 143 | temperature=T, 144 | **walker_kwargs, 145 | ) 146 | self.walkers.append(walker) 147 | state = ThermodynamicState( 148 | temperature=T, 149 | natoms=natoms, 150 | delta_hamiltonian=self.delta_hamiltonian, 151 | pressure=self.pressure, 152 | mass=None, 153 | ) 154 | self.states.append(state) 155 | 156 | # initialize walkers 157 | if initialize_by == "quench": 158 | quench(self.walkers, dataset) 159 | elif initialize_by == "shuffle": 160 | randomize(self.walkers, dataset) 161 | else: 162 | raise ValueError("unknown initialization") 163 | return self.walkers 164 | 165 | def sample(self, **sampling_kwargs): 166 | self.outputs[:] = sample( 167 | self.walkers, 168 | **sampling_kwargs, 169 | ) 170 | 171 | def compute_gradients(self): 172 | for output, state in zip(self.outputs, self.states): 173 | state.gradient(output, hamiltonian=self.hamiltonian) 174 | 175 | def along_delta(self, temperature: Optional[float] = None): 176 | if temperature is None: 177 | assert self.ntemperatures == 1 178 | temperature = self.temperatures[0] 179 | index = np.where(self.temperatures == temperature)[0][0] 180 | assert self.temperatures[index] == temperature 181 | N = self.ntemperatures 182 | states = [self.states[N * i + index] for i in range(self.ndeltas)] 183 | 184 | # do integration 185 | x = self.delta_coefficients 186 | y = [state.gradients["delta"] for state in states] 187 | f = integrate(x, *y) 188 | return f 189 | # return multiply(f, kB * temperature) 190 | 191 | def along_temperature(self, delta_coefficient: Optional[float] = None): 192 | if delta_coefficient is None: 193 | assert self.ndeltas == 1 194 | delta_coefficient = self.delta_coefficients[0] 195 | index = np.where(self.delta_coefficients == delta_coefficient)[0][0] 196 | assert self.delta_coefficients[index] == delta_coefficient 197 | N = self.ntemperatures 198 | states = [self.states[N * index + i] for i in range(self.ntemperatures)] 199 | 200 | # do integration 201 | x = self.temperatures 202 | y = [state.gradients["temperature"] for state in states] 203 | f = integrate(x, *y) 204 | return f 205 | # return multiply(f, kB * self.temperatures) 206 | 207 | @property 208 | def ntemperatures(self): 209 | return len(self.temperatures) 210 | 211 | @property 212 | def ndeltas(self): 213 | return len(self.delta_coefficients) 214 | -------------------------------------------------------------------------------- /psiflow/free_energy/phonons.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations # necessary for type-guarding class methods 2 | 3 | import xml.etree.ElementTree as ET 4 | from typing import Optional, Union 5 | 6 | import numpy as np 7 | import parsl 8 | import typeguard 9 | from ase.units import Bohr, Ha, J, _c, _hplanck, _k, kB, second 10 | from parsl.app.app import bash_app, python_app 11 | from parsl.dataflow.futures import AppFuture 12 | 13 | import psiflow 14 | from psiflow.data import Dataset 15 | from psiflow.geometry import Geometry, mass_weight 16 | from psiflow.hamiltonians import Hamiltonian, MixtureHamiltonian 17 | from psiflow.sampling.sampling import ( 18 | setup_sockets, 19 | label_forces, 20 | make_force_xml, 21 | serialize_mixture, 22 | make_start_command, 23 | make_client_command 24 | ) 25 | from psiflow.utils.apps import multiply 26 | from psiflow.utils.io import load_numpy, save_xml 27 | from psiflow.utils import TMP_COMMAND, CD_COMMAND 28 | 29 | 30 | @typeguard.typechecked 31 | def _compute_frequencies(hessian: np.ndarray, geometry: Geometry) -> np.ndarray: 32 | assert hessian.shape[0] == hessian.shape[1] 33 | assert len(geometry) * 3 == hessian.shape[0] 34 | return np.sqrt(np.linalg.eigvalsh(mass_weight(hessian, geometry))) / (2 * np.pi) 35 | 36 | 37 | compute_frequencies = python_app(_compute_frequencies, executors=["default_threads"]) 38 | 39 | 40 | @typeguard.typechecked 41 | def _harmonic_free_energy( 42 | frequencies: Union[float, np.ndarray], 43 | temperature: float, 44 | quantum: bool = False, 45 | threshold: float = 1, # in invcm 46 | ) -> float: 47 | if isinstance(frequencies, float): 48 | frequencies = np.array([frequencies], dtype=float) 49 | 50 | threshold_ = threshold / second * (100 * _c) # from invcm to ASE 51 | frequencies = frequencies[np.abs(frequencies) > threshold_] 52 | 53 | # _hplanck in J s 54 | # _k in J / K 55 | if quantum: 56 | arg = (-1.0) * _hplanck * frequencies * second / (_k * temperature) 57 | F = kB * temperature * np.sum(np.log(1 - np.exp(arg))) 58 | F += _hplanck * J * second * np.sum(frequencies) / 2 59 | else: 60 | constant = kB * temperature * np.log(_hplanck) 61 | actual = np.log(frequencies / (kB * temperature)) 62 | F = len(frequencies) * constant + kB * temperature * np.sum(actual) 63 | F /= kB * temperature 64 | return F 65 | 66 | 67 | harmonic_free_energy = python_app(_harmonic_free_energy, executors=["default_threads"]) 68 | 69 | 70 | @typeguard.typechecked 71 | def setup_motion( 72 | mode: str, 73 | asr: str, 74 | pos_shift: float, 75 | energy_shift: float, 76 | ) -> ET.Element: 77 | motion = ET.Element("motion", mode="vibrations") 78 | vibrations = ET.Element("vibrations", mode="fd") 79 | pos = ET.Element("pos_shift") 80 | pos.text = " {} ".format(pos_shift) 81 | vibrations.append(pos) 82 | energy = ET.Element("energy_shift") 83 | energy.text = " {} ".format(energy_shift) 84 | vibrations.append(energy) 85 | prefix = ET.Element("prefix") 86 | prefix.text = " output " 87 | vibrations.append(prefix) 88 | asr_ = ET.Element("asr") 89 | asr_.text = " {} ".format(asr) 90 | vibrations.append(asr_) 91 | motion.append(vibrations) 92 | return motion 93 | 94 | 95 | def _execute_ipi( 96 | hamiltonian_names: list[str], 97 | client_args: list[list[str]], 98 | command_server: str, 99 | command_client: str, 100 | stdout: str = "", 101 | stderr: str = "", 102 | inputs: list = [], 103 | outputs: list = [], 104 | parsl_resource_specification: Optional[dict] = None, 105 | ) -> str: 106 | command_start = make_start_command(command_server, inputs[0], inputs[1]) 107 | commands_client = [] 108 | for i, name in enumerate(hamiltonian_names): 109 | args = client_args[i] 110 | assert len(args) == 1 # only have one client per hamiltonian 111 | for arg in args: 112 | commands_client += make_client_command(command_client, name, inputs[2 + i], inputs[1], arg), 113 | 114 | command_end = f'{command_server} --cleanup' 115 | command_copy = f'cp i-pi.output_full.hess {outputs[0]}' 116 | 117 | command_list = [ 118 | TMP_COMMAND, 119 | CD_COMMAND, 120 | command_start, 121 | *commands_client, 122 | "wait", 123 | command_end, 124 | command_copy, 125 | ] 126 | return "\n".join(command_list) 127 | 128 | 129 | execute_ipi = bash_app(_execute_ipi, executors=["ModelEvaluation"]) 130 | 131 | 132 | @typeguard.typechecked 133 | def compute_harmonic( 134 | state: Union[Geometry, AppFuture], 135 | hamiltonian: Hamiltonian, 136 | mode: str = "fd", 137 | asr: str = "crystal", 138 | pos_shift: float = 0.01, 139 | energy_shift: float = 0.00095, 140 | ) -> AppFuture: 141 | hamiltonian: MixtureHamiltonian = 1 * hamiltonian 142 | names = label_forces(hamiltonian) 143 | sockets = setup_sockets(names) 144 | forces = make_force_xml(hamiltonian, names) 145 | 146 | initialize = ET.Element("initialize", nbeads="1") 147 | start = ET.Element("file", mode="ase", cell_units="angstrom") 148 | start.text = " start_0.xyz " 149 | initialize.append(start) 150 | motion = setup_motion(mode, asr, pos_shift, energy_shift) 151 | 152 | system = ET.Element("system") 153 | system.append(initialize) 154 | system.append(motion) 155 | system.append(forces) 156 | 157 | # output = setup_output(keep_trajectory) 158 | 159 | simulation = ET.Element("simulation", mode="static") 160 | # simulation.append(output) 161 | for socket in sockets: 162 | simulation.append(socket) 163 | simulation.append(system) 164 | total_steps = ET.Element("total_steps") 165 | total_steps.text = " {} ".format(1000000) 166 | simulation.append(total_steps) 167 | 168 | context = psiflow.context() 169 | definition = context.definitions["ModelEvaluation"] 170 | input_future = save_xml( 171 | simulation, 172 | outputs=[context.new_file("input_", ".xml")], 173 | ).outputs[0] 174 | inputs = [ 175 | input_future, 176 | Dataset([state]).extxyz, 177 | ] 178 | inputs += serialize_mixture(hamiltonian, dtype="float64") 179 | 180 | client_args = [] 181 | for name in names: 182 | args = definition.get_client_args(name, 1, "vibrations") 183 | client_args.append(args) 184 | outputs = [ 185 | context.new_file("hess_", ".txt"), 186 | ] 187 | 188 | command_server = definition.server_command() 189 | command_client = definition.client_command() 190 | resources = definition.wq_resources(1) 191 | 192 | result = execute_ipi( 193 | names, 194 | client_args, 195 | command_server, 196 | command_client, 197 | stdout=parsl.AUTO_LOGNAME, 198 | stderr=parsl.AUTO_LOGNAME, 199 | inputs=inputs, 200 | outputs=outputs, 201 | parsl_resource_specification=resources, 202 | ) 203 | return multiply(load_numpy(inputs=[result.outputs[0]]), Ha / Bohr**2) -------------------------------------------------------------------------------- /psiflow/models/__init__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Union 3 | 4 | import typeguard 5 | import yaml 6 | from ase.data import chemical_symbols 7 | from parsl.data_provider.files import File 8 | 9 | import psiflow 10 | from psiflow.models._mace import MACE, MACEConfig # noqa: F401 11 | from psiflow.models.model import Model 12 | from psiflow.utils.apps import copy_data_future 13 | 14 | 15 | @typeguard.typechecked 16 | def load_model(path: Union[Path, str]) -> Model: 17 | path = psiflow.resolve_and_check(Path(path)) 18 | assert path.is_dir() 19 | classes = [ 20 | MACE, 21 | ] 22 | for model_cls in classes + [None]: 23 | assert model_cls is not None 24 | name = model_cls.__name__ 25 | path_config = path / (name + ".yaml") 26 | if path_config.is_file(): 27 | break 28 | with open(path_config, "r") as f: 29 | config = yaml.load(f, Loader=yaml.FullLoader) 30 | atomic_energies = {} 31 | for key in list(config): 32 | print(key) 33 | if key.startswith("atomic_energies_"): 34 | element = key.split("atomic_energies_")[-1] 35 | assert element in chemical_symbols 36 | atomic_energies[element] = config.pop(key) 37 | model = model_cls(**config) 38 | for element, energy in atomic_energies.items(): 39 | model.add_atomic_energy(element, energy) 40 | path_model = path / "{}.pth".format(name) 41 | if path_model.is_file(): 42 | model.model_future = copy_data_future( 43 | inputs=[File(str(path_model))], 44 | outputs=[psiflow.context().new_file("model_", ".pth")], 45 | ).outputs[0] 46 | return model 47 | -------------------------------------------------------------------------------- /psiflow/models/model.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations # necessary for type-guarding class methods 2 | 3 | from dataclasses import asdict 4 | from pathlib import Path 5 | from typing import Optional, Union 6 | 7 | import parsl 8 | import typeguard 9 | from parsl.data_provider.files import File 10 | from parsl.dataflow.futures import AppFuture 11 | 12 | import psiflow 13 | from psiflow.data import Dataset 14 | from psiflow.utils.apps import copy_data_future, log_message, setup_logger 15 | from psiflow.utils.io import save_yaml 16 | 17 | logger = setup_logger(__name__) 18 | 19 | 20 | @typeguard.typechecked 21 | @psiflow.serializable 22 | class Model: 23 | _config: dict 24 | model_future: Optional[psiflow._DataFuture] 25 | atomic_energies: dict 26 | 27 | def add_atomic_energy(self, element: str, energy: Union[float, AppFuture]) -> None: 28 | assert self.model_future is None, ( 29 | "cannot add atomic energies after model has " 30 | "been initialized; reset model, add energy, and reinitialize" 31 | ) 32 | if element in self.atomic_energies: 33 | if isinstance(energy, AppFuture): 34 | energy = energy.result() 35 | if isinstance(self.atomic_energies[element], AppFuture): 36 | existing = self.atomic_energies[element].result() 37 | assert energy == existing, ( 38 | "model already has atomic energy " 39 | "for element {} ({}), which is different from {}" 40 | "".format(element, existing, energy) 41 | ) 42 | self.atomic_energies[element] = energy 43 | 44 | def train(self, training: Dataset, validation: Dataset) -> None: 45 | log_message( 46 | logger, 47 | "training model using {} states for training and {} for validation", 48 | training.length(), 49 | validation.length(), 50 | ) 51 | inputs = [self.model_future] 52 | if self.do_offset: 53 | inputs += [ 54 | training.subtract_offset(**self.atomic_energies).extxyz, 55 | validation.subtract_offset(**self.atomic_energies).extxyz, 56 | ] 57 | else: 58 | inputs += [ 59 | training.extxyz, 60 | validation.extxyz, 61 | ] 62 | future = self._train( 63 | dict(self._config), 64 | stdout=parsl.AUTO_LOGNAME, 65 | stderr=parsl.AUTO_LOGNAME, 66 | inputs=inputs, 67 | outputs=[psiflow.context().new_file("model_", ".pth")], 68 | ) 69 | self.model_future = future.outputs[0] 70 | 71 | def initialize(self, dataset: Dataset) -> None: 72 | """Initializes the model based on a dataset""" 73 | assert self.model_future is None 74 | if self.do_offset: 75 | inputs = [dataset.subtract_offset(**self.atomic_energies).extxyz] 76 | else: 77 | inputs = [dataset.extxyz] 78 | future = self._initialize( 79 | self._config, 80 | stdout=parsl.AUTO_LOGNAME, 81 | stderr=parsl.AUTO_LOGNAME, 82 | inputs=inputs, 83 | outputs=[psiflow.context().new_file("model_", ".pth")], 84 | ) 85 | self.model_future = future.outputs[0] 86 | 87 | def reset(self) -> None: 88 | self.model_future = None 89 | 90 | def save( 91 | self, 92 | path: Union[Path, str], 93 | ) -> None: 94 | path = psiflow.resolve_and_check(Path(path)) 95 | path.mkdir(exist_ok=True) 96 | 97 | name = self.__class__.__name__ 98 | path_config = path / "{}.yaml".format(name) 99 | 100 | atomic_energies = { 101 | "atomic_energies_" + key: value 102 | for key, value in self.atomic_energies.items() 103 | } 104 | save_yaml( 105 | self._config, 106 | outputs=[File(str(path_config))], 107 | **atomic_energies, 108 | ) 109 | if self.model_future is not None: 110 | path_model = path / "{}.pth".format(name) 111 | copy_data_future( 112 | inputs=[self.model_future], 113 | outputs=[File(str(path_model))], 114 | ) 115 | 116 | def copy(self) -> Model: 117 | model = self.__class__(**asdict(self.config)) 118 | for element, energy in self.atomic_energies.items(): 119 | model.add_atomic_energy(element, energy) 120 | if self.model_future is not None: 121 | model.model_future = copy_data_future( 122 | inputs=[self.model_future], 123 | outputs=[psiflow.context().new_file("model_", ".pth")], 124 | ).outputs[0] 125 | return model 126 | 127 | @property 128 | def do_offset(self) -> bool: 129 | return len(self.atomic_energies) > 0 130 | 131 | @property 132 | def seed(self) -> int: 133 | raise NotImplementedError 134 | 135 | @seed.setter 136 | def seed(self, arg) -> None: 137 | raise NotImplementedError 138 | -------------------------------------------------------------------------------- /psiflow/order_parameters.py: -------------------------------------------------------------------------------- 1 | class OrderParameter: 2 | pass 3 | -------------------------------------------------------------------------------- /psiflow/reference/__init__.py: -------------------------------------------------------------------------------- 1 | from ._cp2k import CP2K # noqa: F401 2 | from ._dftd3 import D3 # noqa: F401 3 | from .gpaw_ import GPAW # noqa: F401 4 | from .reference import Reference, evaluate # noqa: F401 5 | -------------------------------------------------------------------------------- /psiflow/reference/_dftd3.py: -------------------------------------------------------------------------------- 1 | import json 2 | from functools import partial 3 | 4 | import numpy as np 5 | import typeguard 6 | from parsl.app.app import bash_app, python_app 7 | from parsl.dataflow.futures import AppFuture 8 | 9 | import psiflow 10 | from psiflow.geometry import Geometry 11 | from psiflow.reference.reference import Reference 12 | from psiflow.utils.apps import copy_app_future 13 | from psiflow.utils import TMP_COMMAND, CD_COMMAND 14 | 15 | 16 | @typeguard.typechecked 17 | def input_string(geometry: Geometry, parameters: dict, properties: tuple) -> str: 18 | geometry_str = geometry.to_string() 19 | data = { 20 | "geometry": geometry_str, 21 | "parameters": parameters, 22 | "properties": properties, 23 | } 24 | return json.dumps(data) 25 | 26 | 27 | def d3_singlepoint_pre( 28 | geometry: Geometry, 29 | parameters: dict, 30 | properties: tuple, 31 | d3_command: str, 32 | stdout: str = "", 33 | stderr: str = "", 34 | ) -> str: 35 | from psiflow.reference._dftd3 import input_string 36 | input_str = input_string(geometry, parameters, properties) 37 | command_list = [ 38 | TMP_COMMAND, 39 | CD_COMMAND, 40 | f"echo '{input_str}' > input.json", 41 | f"python -u {d3_command}", 42 | ] 43 | return "\n".join(command_list) 44 | 45 | 46 | @typeguard.typechecked 47 | def d3_singlepoint_post( 48 | geometry: Geometry, 49 | inputs: list = [], 50 | ) -> Geometry: 51 | from psiflow.geometry import new_nullstate 52 | 53 | with open(inputs[0], "r") as f: 54 | lines = f.read().split("\n") 55 | 56 | geometry = new_nullstate() 57 | for i, line in enumerate(lines): 58 | if "CALCULATION SUCCESSFUL" in line: 59 | natoms = int(lines[i + 1]) 60 | geometry_str = "\n".join(lines[i + 1 : i + 3 + natoms]) 61 | geometry = Geometry.from_string(geometry_str) 62 | assert geometry.energy is not None 63 | geometry.stdout = inputs[0] 64 | return geometry 65 | 66 | 67 | @typeguard.typechecked 68 | @psiflow.serializable 69 | class D3(Reference): 70 | outputs: list # json does deserialize(serialize(tuple)) = list 71 | executor: str 72 | parameters: dict 73 | 74 | def __init__( 75 | self, 76 | **parameters, 77 | ): 78 | self.parameters = parameters 79 | self.outputs = ["energy", "forces"] 80 | self.executor = "default_htex" 81 | self._create_apps() 82 | 83 | def _create_apps(self): 84 | path = "psiflow.reference._dftd3" 85 | d3_command = "$(python -c 'import {}; print({}.__file__)')".format(path, path) 86 | app_pre = bash_app(d3_singlepoint_pre, executors=["default_htex"]) 87 | app_post = python_app(d3_singlepoint_post, executors=["default_threads"]) 88 | self.app_pre = partial( 89 | app_pre, 90 | parameters=self.parameters, 91 | properties=tuple(self.outputs), 92 | d3_command=d3_command, 93 | ) 94 | self.app_post = app_post 95 | 96 | def compute_atomic_energy(self, element, box_size=None) -> AppFuture: 97 | return copy_app_future(0.0) # GPAW computes formation energy by default 98 | 99 | 100 | if __name__ == "__main__": 101 | from ase import Atoms 102 | from dftd3.ase import DFTD3 103 | 104 | with open("input.json", "r") as f: 105 | input_dict = json.loads(f.read()) 106 | 107 | geometry = Geometry.from_string(input_dict["geometry"]) 108 | parameters = input_dict["parameters"] 109 | properties = input_dict["properties"] 110 | 111 | atoms = Atoms( 112 | numbers=np.copy(geometry.per_atom.numbers), 113 | positions=np.copy(geometry.per_atom.positions), 114 | cell=np.copy(geometry.cell), 115 | pbc=geometry.periodic, 116 | ) 117 | 118 | calculator = DFTD3(**parameters) 119 | atoms.calc = calculator 120 | 121 | if "forces" in properties: 122 | geometry.per_atom.forces[:] = atoms.get_forces() 123 | if "energy" in properties: 124 | geometry.energy = atoms.get_potential_energy() 125 | 126 | output_str = geometry.to_string() 127 | print("CALCULATION SUCCESSFUL") 128 | print(output_str) 129 | -------------------------------------------------------------------------------- /psiflow/reference/gpaw_.py: -------------------------------------------------------------------------------- 1 | import json 2 | from functools import partial 3 | from typing import Union 4 | 5 | import numpy as np 6 | import typeguard 7 | from parsl.app.app import bash_app, python_app 8 | from parsl.dataflow.futures import AppFuture 9 | 10 | import psiflow 11 | from psiflow.geometry import Geometry, new_nullstate 12 | from psiflow.reference.reference import Reference 13 | from psiflow.utils.apps import copy_app_future 14 | from psiflow.utils import TMP_COMMAND, CD_COMMAND 15 | 16 | 17 | @typeguard.typechecked 18 | def input_string(geometry: Geometry, gpaw_parameters: dict, properties: tuple) -> str: 19 | geometry_str = geometry.to_string() 20 | data = { 21 | "geometry": geometry_str, 22 | "gpaw_parameters": gpaw_parameters, 23 | "properties": properties, 24 | } 25 | return json.dumps(data) 26 | 27 | 28 | def gpaw_singlepoint_pre( 29 | geometry: Geometry, 30 | gpaw_parameters: dict, 31 | properties: tuple, 32 | gpaw_command: str, 33 | parsl_resource_specification: dict = {}, 34 | stdout: str = "", 35 | stderr: str = "", 36 | ) -> str: 37 | from psiflow.reference.gpaw_ import input_string 38 | input_str = input_string(geometry, gpaw_parameters, properties) 39 | write_command = f"echo '{input_str}' > input.json" 40 | command_list = [ 41 | TMP_COMMAND, 42 | CD_COMMAND, 43 | write_command, 44 | gpaw_command, 45 | ] 46 | return "\n".join(command_list) 47 | 48 | 49 | @typeguard.typechecked 50 | def gpaw_singlepoint_post( 51 | geometry: Geometry, 52 | inputs: list = [], 53 | ) -> Geometry: 54 | with open(inputs[0], "r") as f: 55 | lines = f.read().split("\n") 56 | 57 | geometry = new_nullstate() # GPAW parsing doesn't require initial geometry 58 | for i, line in enumerate(lines): 59 | if "CALCULATION SUCCESSFUL" in line: 60 | natoms = int(lines[i + 1]) 61 | geometry_str = "\n".join(lines[i + 1 : i + 3 + natoms]) 62 | geometry = Geometry.from_string(geometry_str) 63 | assert geometry.energy is not None 64 | geometry.stdout = inputs[0] 65 | return geometry 66 | 67 | 68 | @typeguard.typechecked 69 | @psiflow.serializable 70 | class GPAW(Reference): 71 | outputs: list # json does deserialize(serialize(tuple)) = list 72 | executor: str 73 | parameters: dict 74 | 75 | def __init__( 76 | self, 77 | outputs: Union[tuple, list] = ("energy", "forces"), 78 | executor: str = "GPAW", 79 | **parameters, 80 | ): 81 | self.outputs = list(outputs) 82 | self.parameters = parameters 83 | self.executor = executor 84 | self._create_apps() 85 | 86 | def _create_apps(self): 87 | definition = psiflow.context().definitions[self.executor] 88 | gpaw_command = definition.command() 89 | wq_resources = definition.wq_resources() 90 | app_pre = bash_app(gpaw_singlepoint_pre, executors=[self.executor]) 91 | app_post = python_app(gpaw_singlepoint_post, executors=["default_threads"]) 92 | self.app_pre = partial( 93 | app_pre, 94 | gpaw_parameters=self.parameters, 95 | properties=tuple(self.outputs), 96 | gpaw_command=gpaw_command, 97 | parsl_resource_specification=wq_resources, 98 | ) 99 | self.app_post = app_post 100 | 101 | def compute_atomic_energy(self, element, box_size=None) -> AppFuture: 102 | return copy_app_future(0.0) # GPAW computes formation energy by default 103 | 104 | 105 | if __name__ == "__main__": 106 | from ase import Atoms 107 | from ase.calculators.mixing import SumCalculator 108 | from ase.parallel import world 109 | from dftd3.ase import DFTD3 110 | from gpaw import GPAW as GPAWCalculator 111 | 112 | def minimal_box( 113 | atoms: Atoms, 114 | border: float = 0.0, 115 | h: float = 0.2, 116 | multiple: int = 4, 117 | ) -> None: 118 | # inspired by gpaw.cluster.Cluster 119 | if len(atoms) == 0: 120 | return None 121 | min_bounds, max_bounds = np.array( 122 | [np.minimum.reduce(atoms.positions), np.maximum.reduce(atoms.positions)] 123 | ) 124 | if isinstance(border, list): 125 | b = np.array(border) 126 | else: 127 | b = np.array([border, border, border]) 128 | if not hasattr(h, "__len__"): 129 | h = np.array([h, h, h]) 130 | min_bounds -= b 131 | max_bounds += b - min_bounds 132 | grid_points = np.ceil(max_bounds / h / multiple) * multiple 133 | length_diff = grid_points * h - max_bounds 134 | max_bounds += length_diff 135 | min_bounds -= length_diff / 2 136 | shift = tuple(-1.0 * min_bounds) 137 | atoms.translate(shift) 138 | atoms.set_cell(tuple(max_bounds)) 139 | 140 | with open("input.json", "r") as f: 141 | input_dict = json.loads(f.read()) 142 | 143 | geometry = Geometry.from_string(input_dict["geometry"]) 144 | gpaw_parameters = input_dict["gpaw_parameters"] 145 | properties = input_dict["properties"] 146 | d3 = gpaw_parameters.pop("d3", {}) 147 | 148 | atoms = Atoms( 149 | numbers=np.copy(geometry.per_atom.numbers), 150 | positions=np.copy(geometry.per_atom.positions), 151 | cell=np.copy(geometry.cell), 152 | pbc=geometry.periodic, 153 | ) 154 | if not geometry.periodic: 155 | minimal_box( 156 | atoms, 157 | gpaw_parameters.get("h", 0.2), 158 | gpaw_parameters.pop("minimal_box_border", 2), # if present, remove 159 | gpaw_parameters.pop("minimal_box_multiple", 4), 160 | ) 161 | 162 | calculator = GPAWCalculator(**gpaw_parameters) 163 | if len(d3) > 0: 164 | calculator = SumCalculator([calculator, DFTD3(**d3)]) 165 | atoms.calc = calculator 166 | 167 | if "forces" in properties: 168 | geometry.per_atom.forces[:] = atoms.get_forces() 169 | if "energy" in properties: 170 | geometry.energy = atoms.get_potential_energy() 171 | 172 | output_str = geometry.to_string() 173 | if world.rank == 0: 174 | print("CALCULATION SUCCESSFUL") 175 | print(output_str) 176 | -------------------------------------------------------------------------------- /psiflow/reference/orca.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/psiflow/reference/orca.py -------------------------------------------------------------------------------- /psiflow/reference/reference.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations # necessary for type-guarding class methods 2 | 3 | import logging 4 | from typing import ClassVar, Optional, Union 5 | 6 | import numpy as np 7 | import parsl 8 | import typeguard 9 | from ase.data import atomic_numbers 10 | from parsl.app.app import join_app, python_app 11 | from parsl.dataflow.futures import AppFuture 12 | 13 | import psiflow 14 | from psiflow.data import Computable, Dataset 15 | from psiflow.geometry import Geometry, NullState 16 | from psiflow.utils.apps import copy_app_future, unpack_i 17 | 18 | logger = logging.getLogger(__name__) # logging per module 19 | 20 | 21 | @typeguard.typechecked 22 | def _extract_energy(state: Geometry): 23 | if state.energy is None: 24 | return 1e10 25 | else: 26 | return state.energy 27 | 28 | 29 | extract_energy = python_app(_extract_energy, executors=["default_threads"]) 30 | 31 | 32 | @join_app 33 | @typeguard.typechecked 34 | def get_minimum_energy(element, configs, *energies): 35 | logger.info("atomic energies for element {}:".format(element)) 36 | for config, energy in zip(configs, energies): 37 | logger.info("\t{} eV; ".format(energy) + str(config)) 38 | energy = min(energies) 39 | assert not energy == 1e10, "atomic energy calculation of {} failed".format(element) 40 | return copy_app_future(energy) 41 | 42 | 43 | @typeguard.typechecked 44 | def _nan_if_unsuccessful( 45 | geometry: Geometry, 46 | result: Geometry, 47 | ) -> Geometry: 48 | if result == NullState: 49 | geometry.energy = None 50 | geometry.per_atom.forces[:] = np.nan 51 | geometry.per_atom.stress = None 52 | geometry.stdout = result.stdout 53 | return geometry 54 | else: 55 | return result 56 | 57 | 58 | nan_if_unsuccessful = python_app(_nan_if_unsuccessful, executors=["default_threads"]) 59 | 60 | 61 | @join_app 62 | @typeguard.typechecked 63 | def evaluate( 64 | geometry: Geometry, 65 | reference: Reference, 66 | ) -> AppFuture: 67 | if geometry == NullState: 68 | return copy_app_future(NullState) 69 | else: 70 | future = reference.app_pre( 71 | geometry, 72 | stdout=parsl.AUTO_LOGNAME, 73 | stderr=parsl.AUTO_LOGNAME, 74 | ) 75 | result = reference.app_post( 76 | geometry=geometry.copy(), 77 | inputs=[future.stdout, future.stderr, future], 78 | ) 79 | return nan_if_unsuccessful(geometry, result) 80 | 81 | 82 | @join_app 83 | @typeguard.typechecked 84 | def compute_dataset( 85 | dataset: Dataset, 86 | length: int, 87 | reference: Reference, 88 | ) -> AppFuture: 89 | from psiflow.data.utils import extract_quantities 90 | 91 | geometries = dataset.geometries() # read it once 92 | evaluated = [evaluate(unpack_i(geometries, i), reference) for i in range(length)] 93 | future = extract_quantities( 94 | tuple(reference.outputs), 95 | None, 96 | None, 97 | *evaluated, 98 | ) 99 | return future 100 | 101 | 102 | @typeguard.typechecked 103 | @psiflow.serializable 104 | class Reference(Computable): 105 | outputs: tuple 106 | batch_size: ClassVar[int] = 1 # not really used 107 | 108 | def compute( 109 | self, 110 | arg: Union[Dataset, Geometry, AppFuture, list], 111 | *outputs: Optional[Union[str, tuple]], 112 | ): 113 | if isinstance(arg, Dataset): 114 | dataset = arg 115 | elif isinstance(arg, list): 116 | dataset = Dataset(arg) 117 | elif isinstance(arg, AppFuture) or isinstance(arg, Geometry): 118 | dataset = Dataset([arg]) 119 | compute_outputs = compute_dataset(dataset, dataset.length(), self) 120 | if len(outputs) == 0: 121 | outputs_ = tuple(self.outputs) 122 | else: 123 | outputs_ = outputs 124 | to_return = [] 125 | for output in outputs_: 126 | if output not in self.outputs: 127 | raise ValueError("output {} not in {}".format(output, self.outputs)) 128 | index = self.outputs.index(output) 129 | to_return.append(compute_outputs[index]) 130 | if len(outputs_) == 1: 131 | return to_return[0] 132 | else: 133 | return to_return 134 | 135 | def compute_atomic_energy(self, element, box_size=None): 136 | energies = [] 137 | references = self.get_single_atom_references(element) 138 | configs = [c for c, _ in references] 139 | if box_size is not None: 140 | state = Geometry.from_data( 141 | numbers=np.array([atomic_numbers[element]]), 142 | positions=np.array([[0, 0, 0]]), 143 | cell=np.eye(3) * box_size, 144 | ) 145 | else: 146 | state = Geometry( 147 | numbers=np.array([atomic_numbers[element]]), 148 | positions=np.array([[0, 0, 0]]), 149 | cell=np.zeros((3, 3)), 150 | ) 151 | for _, reference in references: 152 | energies.append(extract_energy(evaluate(state, reference))) 153 | return get_minimum_energy(element, configs, *energies) 154 | 155 | def get_single_atom_references(self, element): 156 | return [(None, self)] 157 | -------------------------------------------------------------------------------- /psiflow/sampling/__init__.py: -------------------------------------------------------------------------------- 1 | from .metadynamics import Metadynamics # noqa: F401 2 | # from .optimize import optimize, optimize_dataset # noqa: F401 3 | from .output import SimulationOutput # noqa: F401 4 | from .sampling import sample # noqa: F401 5 | from .walker import ReplicaExchange # noqa: F401 6 | from .walker import Walker # noqa: F401 7 | from .walker import quench # noqa: F401 8 | from .walker import randomize # noqa: F401 9 | from .walker import replica_exchange # noqa: F401 10 | -------------------------------------------------------------------------------- /psiflow/sampling/_ase.py: -------------------------------------------------------------------------------- 1 | """ 2 | Structure optimisation through ASE 3 | TODO: do we need to check for very large forces? 4 | TODO: what units are pressure? 5 | TODO: what to do when max_steps is reached before converging? 6 | TODO: timeout is duplicated code 7 | """ 8 | 9 | import os 10 | import json 11 | import warnings 12 | import signal 13 | import argparse 14 | from pathlib import Path 15 | from types import SimpleNamespace 16 | 17 | import ase 18 | import ase.io 19 | import numpy as np 20 | from ase.io.extxyz import save_calc_results 21 | from ase.calculators.calculator import Calculator, all_properties 22 | from ase.calculators.mixing import LinearCombinationCalculator 23 | from ase.optimize.precon import PreconLBFGS 24 | from ase.filters import FrechetCellFilter 25 | 26 | from psiflow.geometry import Geometry 27 | from psiflow.functions import function_from_json, EnergyFunction 28 | from psiflow.sampling.utils import TimeoutException, timeout_handler 29 | 30 | 31 | ALLOWED_MODES: tuple[str, ...] = ('full', 'fix_volume', 'fix_shape', 'fix_cell') 32 | FILE_OUT: str = 'out.xyz' 33 | FILE_TRAJ: str = 'out.traj' 34 | 35 | 36 | class FunctionCalculator(Calculator): 37 | implemented_properties = ['energy', 'free_energy', 'forces', 'stress'] 38 | 39 | def __init__(self, function: EnergyFunction, **kwargs): 40 | super().__init__(**kwargs) 41 | self.function = function 42 | 43 | def calculate( 44 | self, 45 | atoms=None, 46 | properties=all_properties, 47 | system_changes=None, 48 | ): 49 | super().calculate(atoms, properties, system_changes) 50 | geometry = Geometry.from_atoms(self.atoms) 51 | self.results = self.function(geometry) 52 | self.results['free_energy'] = self.results['energy'] # required by optimiser 53 | 54 | 55 | def log_state(atoms: ase.Atoms) -> None: 56 | """""" 57 | def make_log(data: list[tuple[str]]): 58 | """""" 59 | txt = ['', 'Current atoms state:'] 60 | txt += [f'{_[0]:<15}: {_[1]:<25}[{_[2]}]' for _ in data] 61 | txt += 'End', '' 62 | print(*txt, sep='\n') 63 | 64 | data = [] 65 | if atoms.calc: 66 | energy, max_force = atoms.get_potential_energy(), np.linalg.norm(atoms.get_forces(), axis=0).max() 67 | else: 68 | energy, max_force = [np.nan] * 2 69 | data += ('Energy', f'{energy:.2f}', 'eV'), ('Max. force', f'{max_force:.2E}', 'eV/A') 70 | 71 | if not all(atoms.pbc): 72 | make_log(data) 73 | return 74 | 75 | volume, cell = atoms.get_volume(), atoms.get_cell().cellpar().round(3) 76 | data += ('Cell volume', f'{atoms.get_volume():.2f}', 'A^3'), 77 | data += ('Box norms', str(cell[:3])[1:-1], 'A'), ('Box angles', str(cell[3:])[1:-1], 'degrees') 78 | 79 | make_log(data) 80 | return 81 | 82 | 83 | def get_dof_filter(atoms: ase.Atoms, mode: str, pressure: float) -> ase.Atoms | FrechetCellFilter: 84 | """""" 85 | if mode == 'fix_cell': 86 | if pressure: 87 | warnings.warn('Ignoring external pressure..') 88 | return atoms 89 | kwargs = {'mask': [True] * 6, 'scalar_pressure': pressure} # enable cell DOFs 90 | if mode == 'fix_shape': 91 | kwargs['hydrostatic_strain'] = True 92 | if mode == 'fix_volume': 93 | kwargs['constant_volume'] = True 94 | if pressure: 95 | warnings.warn('Ignoring applied pressure during fixed volume optimisation..') 96 | return FrechetCellFilter(atoms, **kwargs) 97 | 98 | 99 | def run(args: SimpleNamespace): 100 | """""" 101 | config = json.load(Path(args.input_config).open('r')) 102 | 103 | atoms = ase.io.read(args.start_xyz) 104 | if not any(atoms.pbc): 105 | atoms.center(vacuum=0) # optimiser mysteriously requires a nonzero unit cell 106 | if config['mode'] != 'fix_cell': 107 | config['mode'] = 'fix_cell' 108 | warnings.warn('Molecular structure is not periodic. Ignoring cell..') 109 | 110 | # construct calculator by combining hamiltonians 111 | assert args.path_hamiltonian is not None 112 | print('Making calculator from:', *config['forces'], sep='\n') 113 | functions = [function_from_json(p) for p in args.path_hamiltonian] 114 | calc = LinearCombinationCalculator( 115 | [FunctionCalculator(f) for f in functions], 116 | [float(h['weight']) for h in config['forces']] 117 | ) 118 | 119 | atoms.calc = calc 120 | dof = get_dof_filter(atoms, config['mode'], config['pressure']) 121 | opt = PreconLBFGS(dof, trajectory=FILE_TRAJ if config['keep_trajectory'] else None) 122 | 123 | print(f"pid: {os.getpid()}") 124 | print(f"CPU affinity: {os.sched_getaffinity(os.getpid())}") 125 | log_state(atoms) 126 | try: 127 | opt.run(fmax=config['f_max'], steps=config['max_steps']) 128 | except TimeoutException: 129 | print('OPTIMISATION TIMEOUT') 130 | # TODO: what to do here? 131 | return 132 | 133 | log_state(atoms) 134 | save_calc_results(atoms, calc_prefix='', remove_atoms_calc=True) 135 | if not any(atoms.pbc): 136 | atoms.cell = None # remove meaningless cell 137 | ase.io.write(FILE_OUT, atoms) 138 | print('OPTIMISATION SUCCESSFUL') 139 | return 140 | 141 | 142 | def clean(args: SimpleNamespace): 143 | """""" 144 | from psiflow.data.utils import _write_frames 145 | 146 | geometry = Geometry.load(FILE_OUT) 147 | _write_frames(geometry, outputs=[args.output_xyz]) 148 | if Path(FILE_TRAJ).is_file(): 149 | traj = [at for at in ase.io.trajectory.Trajectory(FILE_TRAJ)] 150 | geometries = [Geometry.from_atoms(at) for at in traj] 151 | _write_frames(*geometries, outputs=[args.output_traj]) 152 | print('FILES MOVED') 153 | return 154 | 155 | 156 | def main(): 157 | signal.signal(signal.SIGTERM, timeout_handler) 158 | parser = argparse.ArgumentParser() 159 | subparsers = parser.add_subparsers(help='what to do', dest='action') 160 | run_parser = subparsers.add_parser("run") 161 | run_parser.set_defaults(func=run) 162 | run_parser.add_argument( 163 | "--path_hamiltonian", 164 | action='extend', 165 | nargs='*', 166 | type=str, 167 | ) 168 | run_parser.add_argument( 169 | "--input_config", 170 | type=str, 171 | default=None, 172 | ) 173 | run_parser.add_argument( 174 | "--start_xyz", 175 | type=str, 176 | default=None, 177 | ) 178 | clean_parser = subparsers.add_parser("clean") 179 | clean_parser.set_defaults(func=clean) 180 | clean_parser.add_argument( 181 | "--output_xyz", 182 | type=str, 183 | default=None, 184 | ) 185 | clean_parser.add_argument( 186 | "--output_traj", 187 | type=str, 188 | default=None, 189 | ) 190 | args = parser.parse_args() 191 | args.func(args) 192 | 193 | 194 | -------------------------------------------------------------------------------- /psiflow/sampling/ase.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations # necessary for type-guarding class methods 2 | 3 | from typing import Optional, Union 4 | 5 | import parsl 6 | import typeguard 7 | from parsl.app.app import bash_app, join_app 8 | from parsl.dataflow.futures import AppFuture, DataFuture 9 | 10 | import psiflow 11 | from psiflow.data import Dataset 12 | from psiflow.data.utils import write_frames 13 | from psiflow.geometry import Geometry 14 | from psiflow.hamiltonians import Hamiltonian 15 | from psiflow.utils.io import dump_json 16 | from psiflow.sampling.sampling import serialize_mixture, label_forces 17 | from psiflow.utils import TMP_COMMAND, CD_COMMAND 18 | 19 | from ._ase import ALLOWED_MODES 20 | 21 | EXECUTABLE = 'psiflow-ase-opt' # not stored in ModelEvaluation (yet?) 22 | 23 | 24 | def _execute_ase( 25 | command_launch: str, 26 | inputs: list[DataFuture], 27 | outputs: list[DataFuture], 28 | env_vars: dict = {}, 29 | stdout: str = "", 30 | stderr: str = "", 31 | parsl_resource_specification: Optional[dict] = None, 32 | ) -> str: 33 | env_command = 'export ' + ' '.join([f"{name}={value}" for name, value in env_vars.items()]) 34 | command_start = ' '.join([ 35 | f'{command_launch} run --input_config={inputs[0].filepath} --start_xyz={inputs[1].filepath}', 36 | *[f'--path_hamiltonian={future.filepath}' for future in inputs[2:]], '&' 37 | ]) 38 | command_end = f'{command_launch} clean --output_xyz={outputs[0].filepath}' 39 | if len(outputs) == 2: 40 | command_end += f' --output_traj={outputs[1].filepath}' 41 | 42 | command_list = [ 43 | TMP_COMMAND, 44 | CD_COMMAND, 45 | env_command, 46 | command_start, 47 | "wait", 48 | command_end, 49 | ] 50 | return "\n".join(command_list) 51 | 52 | 53 | execute_ase = bash_app(_execute_ase, executors=["ModelEvaluation"]) 54 | 55 | 56 | @typeguard.typechecked 57 | def optimize( 58 | state: Union[Geometry, AppFuture], 59 | hamiltonian: Hamiltonian, 60 | mode: str = 'full', 61 | steps: int = int(1e12), 62 | keep_trajectory: bool = False, 63 | pressure: float = 0, 64 | f_max: float = 1e-3, 65 | ) -> Union[AppFuture, tuple[AppFuture, Dataset]]: 66 | 67 | assert mode in ALLOWED_MODES 68 | assert steps > 0 69 | assert f_max > 0 70 | 71 | context = psiflow.context() 72 | definition = context.definitions["ModelEvaluation"] 73 | 74 | command_list = [EXECUTABLE] 75 | if definition.max_simulation_time is not None: 76 | max_time = 0.9 * (60 * definition.max_simulation_time) 77 | command_list = ["timeout -s 15 {}s".format(max_time), *command_list] 78 | command_launch = " ".join(command_list) 79 | 80 | input_geometry = Dataset([state]).extxyz 81 | hamiltonian = 1.0 * hamiltonian # convert to mixture 82 | names, coeffs = label_forces(hamiltonian), hamiltonian.coefficients 83 | input_forces = serialize_mixture(hamiltonian, dtype="float64") # double precision for MLPs 84 | forces = [ 85 | dict(forcefield=n, weight=str(c), file=f.filename) for n, c, f in zip(names, coeffs, input_forces) 86 | ] 87 | 88 | config = dict( 89 | task='ASE optimisation', 90 | forces=forces, 91 | mode=mode, 92 | f_max=f_max, 93 | pressure=pressure, 94 | max_steps=steps, 95 | keep_trajectory=keep_trajectory, 96 | ) 97 | input_future = dump_json( 98 | outputs=[context.new_file("input_", ".json")], 99 | **config, 100 | ).outputs[0] 101 | inputs = [input_future, input_geometry, *input_forces] 102 | 103 | outputs = [context.new_file("data_", ".xyz")] 104 | if keep_trajectory: 105 | outputs.append(context.new_file("opt_", ".xyz")) 106 | 107 | result = execute_ase( 108 | command_launch=command_launch, 109 | env_vars=definition.env_vars, 110 | inputs=inputs, 111 | outputs=outputs, 112 | stdout=parsl.AUTO_LOGNAME, 113 | stderr=parsl.AUTO_LOGNAME, 114 | parsl_resource_specification=definition.wq_resources(1), 115 | ) 116 | 117 | final = Dataset(None, result.outputs[0])[-1] 118 | if keep_trajectory: 119 | trajectory = Dataset(None, result.outputs[1]) 120 | return final, trajectory 121 | else: 122 | return final 123 | 124 | 125 | @join_app 126 | @typeguard.typechecked 127 | def _optimize_dataset( 128 | geometries: list[Geometry], *args, outputs: list = [], **kwargs 129 | ) -> AppFuture: 130 | assert not kwargs.get("keep_trajectory", False) 131 | optimized = [] 132 | for geometry in geometries: 133 | optimized.append(optimize(geometry, *args, **kwargs)) 134 | return write_frames(*optimized, outputs=[outputs[0]]) 135 | 136 | 137 | @typeguard.typechecked 138 | def optimize_dataset(dataset: Dataset, *args, **kwargs) -> Dataset: 139 | extxyz = _optimize_dataset( 140 | dataset.geometries(), 141 | *args, 142 | outputs=[psiflow.context().new_file("data_", ".xyz")], 143 | **kwargs, 144 | ).outputs[0] 145 | return Dataset(None, extxyz) 146 | -------------------------------------------------------------------------------- /psiflow/sampling/client.py: -------------------------------------------------------------------------------- 1 | # top level imports should be lightweight! 2 | import os 3 | 4 | 5 | class SocketNotFoundException(Exception): 6 | pass 7 | 8 | 9 | def wait_for_socket(address: 'Path', timeout: float = 10, interval: float = 0.1) -> None: 10 | """""" 11 | import time 12 | while not address.exists(): 13 | time.sleep(interval) 14 | timeout -= interval 15 | if timeout < 0: 16 | raise SocketNotFoundException(f'Could not find socket "{address}" to connect to..') 17 | return 18 | 19 | 20 | def main(): 21 | import argparse 22 | import time 23 | from pathlib import Path 24 | 25 | from ase.io import read 26 | from ipi._driver.driver import run_driver 27 | 28 | from psiflow.functions import function_from_json 29 | from psiflow.geometry import Geometry 30 | from psiflow.sampling.utils import ForceMagnitudeException, FunctionDriver 31 | 32 | print("OS environment values:") 33 | for key, value in os.environ.items(): 34 | print(key, value) 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument( 37 | "--path_hamiltonian", 38 | type=str, 39 | default=None, 40 | ) 41 | parser.add_argument( 42 | "--device", 43 | type=str, 44 | default=None, 45 | ) 46 | parser.add_argument( 47 | "--dtype", 48 | type=str, 49 | default=None, 50 | ) 51 | parser.add_argument( 52 | "--address", 53 | type=str, 54 | default=None, 55 | ) 56 | parser.add_argument( 57 | "--start", 58 | type=str, 59 | default=None, 60 | ) 61 | parser.add_argument( 62 | "--max_force", 63 | type=float, 64 | default=None, 65 | ) 66 | args = parser.parse_args() 67 | assert args.path_hamiltonian is not None 68 | assert args.address is not None 69 | assert args.start is not None 70 | 71 | print("pid: {}".format(os.getpid())) 72 | affinity = os.sched_getaffinity(os.getpid()) 73 | print("CPU affinity before function init: {}".format(affinity)) 74 | 75 | template = Geometry.from_atoms(read(args.start)) 76 | function = function_from_json( 77 | args.path_hamiltonian, 78 | device=args.device, 79 | dtype=args.dtype, 80 | ) 81 | 82 | driver = FunctionDriver( 83 | template=template, 84 | function=function, 85 | max_force=args.max_force, 86 | verbose=True, 87 | ) 88 | 89 | affinity = os.sched_getaffinity(os.getpid()) 90 | print("CPU affinity after function init: {}".format(affinity)) 91 | try: 92 | t0 = time.time() 93 | for _ in range(10): 94 | function(template) # torch warm-up before simulation 95 | print("time for 10 evaluations: {}".format(time.time() - t0)) 96 | socket_address = Path.cwd() / args.address 97 | wait_for_socket(socket_address) 98 | run_driver( 99 | unix=True, 100 | address=str(socket_address), 101 | driver=driver, 102 | sockets_prefix="", 103 | ) 104 | except ForceMagnitudeException as e: 105 | print(e) # induce timeout in server 106 | except ConnectionResetError as e: # some other client induced a timeout 107 | print(e) 108 | except SocketNotFoundException as e: 109 | print(e, *list(Path.cwd().iterdir()), sep='\n') # server-side socket not found 110 | 111 | -------------------------------------------------------------------------------- /psiflow/sampling/metadynamics.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations # necessary for type-guarding class methods 2 | 3 | from pathlib import Path 4 | from typing import Optional, Union 5 | 6 | import typeguard 7 | from parsl.data_provider.files import File 8 | from parsl.dataflow.futures import AppFuture 9 | 10 | import psiflow 11 | from psiflow.utils._plumed import remove_comments_printflush, set_path_in_plumed 12 | from psiflow.utils.apps import copy_app_future, copy_data_future 13 | 14 | 15 | @typeguard.typechecked 16 | @psiflow.serializable 17 | class Metadynamics: 18 | _plumed_input: str 19 | external: Optional[psiflow._DataFuture] 20 | 21 | def __init__( 22 | self, 23 | plumed_input: str, 24 | external: Union[None, str, Path, psiflow._DataFuture] = None, 25 | ): 26 | _plumed_input = remove_comments_printflush(plumed_input) 27 | assert "METAD" in _plumed_input 28 | if "RESTART" not in _plumed_input: 29 | _plumed_input = "\nRESTART\n" + _plumed_input 30 | if "FLUSH" not in _plumed_input: # add at the end! 31 | _plumed_input = _plumed_input + "\nFLUSH STRIDE=1\nPRINT" 32 | 33 | # PLUMED + WQ cannot deal with nonexisting hills files! 34 | if type(external) in [str, Path]: 35 | external = File(str(external)) 36 | Path(external).touch() 37 | if external is None: 38 | external = psiflow.context().new_file("hills_", ".txt") 39 | Path(external.filepath).touch() 40 | else: 41 | assert external.filepath in _plumed_input 42 | Path(external.filepath).touch() 43 | _plumed_input = set_path_in_plumed( 44 | _plumed_input, 45 | "METAD", 46 | "PLACEHOLDER", 47 | ) 48 | self._plumed_input = _plumed_input 49 | self.external = external 50 | 51 | def plumed_input(self): 52 | plumed_input = self._plumed_input 53 | plumed_input = plumed_input.replace("PLACEHOLDER", self.external.filepath) 54 | return plumed_input 55 | 56 | def input(self) -> AppFuture: 57 | return copy_app_future(self.plumed_input(), inputs=[self.external]) 58 | 59 | def wait_for(self, result: AppFuture) -> None: 60 | self.external = copy_app_future( 61 | 0, 62 | inputs=[result, self.external], 63 | outputs=[File(self.external.filepath)], 64 | ).outputs[0] 65 | 66 | def reset(self) -> None: 67 | self.external = psiflow.context().new_file("hills_", ".txt") 68 | 69 | def __eq__(self, other) -> bool: 70 | if type(other) is not Metadynamics: 71 | return False 72 | return self.plumed_input() == other.plumed_input() 73 | 74 | def copy(self) -> Metadynamics: 75 | new_external = copy_data_future( 76 | inputs=[self.external], 77 | outputs=[psiflow.context().new_file("hills_", ".txt")], 78 | ).outputs[0] 79 | mtd = Metadynamics( 80 | str(self.plumed_input()), 81 | ) 82 | assert "PLACEHOLDER" in mtd._plumed_input # instead of original filepath 83 | mtd.external = new_external 84 | return mtd 85 | -------------------------------------------------------------------------------- /psiflow/sampling/optimize.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations # necessary for type-guarding class methods 2 | 3 | import xml.etree.ElementTree as ET 4 | from typing import Optional, Union 5 | 6 | import parsl 7 | import typeguard 8 | from ase.units import Bohr, Ha 9 | from parsl.app.app import bash_app, join_app 10 | from parsl.dataflow.futures import AppFuture 11 | 12 | import psiflow 13 | from psiflow.data import Dataset 14 | from psiflow.data.utils import write_frames 15 | from psiflow.geometry import Geometry 16 | from psiflow.hamiltonians import Hamiltonian 17 | from psiflow.sampling.sampling import setup_sockets, make_start_command, make_client_command 18 | from psiflow.utils.io import save_xml 19 | from psiflow.utils import TMP_COMMAND, CD_COMMAND 20 | 21 | 22 | @typeguard.typechecked 23 | def setup_forces(hamiltonian: Hamiltonian) -> tuple[dict[str, Hamiltonian], ET.Element]: 24 | hamiltonian = 1.0 * hamiltonian # convert to mixture 25 | counts = {} 26 | hamiltonians_map = {} 27 | forces = ET.Element("forces") 28 | for h, c in zip(hamiltonian.hamiltonians, hamiltonian.coefficients): 29 | name = h.__class__.__name__ 30 | if name not in counts: 31 | counts[name] = 0 32 | count = counts.get(name) 33 | counts[name] += 1 34 | force = ET.Element("force", forcefield=name + str(count), weight=str(c)) 35 | forces.append(force) 36 | hamiltonians_map[name + str(count)] = h 37 | return hamiltonians_map, forces 38 | 39 | 40 | @typeguard.typechecked 41 | def setup_motion( 42 | mode: str, 43 | etol: float, 44 | ptol: float, 45 | ftol: float, 46 | ) -> ET.Element: 47 | motion = ET.Element("motion", mode="minimize") 48 | optimizer = ET.Element("optimizer", mode=mode) 49 | tolerances = ET.Element("tolerances") 50 | 51 | energy = ET.Element("energy") 52 | energy.text = " {} ".format(etol / Ha) 53 | tolerances.append(energy) 54 | position = ET.Element("position") 55 | position.text = " {} ".format(ptol / Bohr) 56 | tolerances.append(position) 57 | force = ET.Element("force") 58 | force.text = " {} ".format(ftol / Ha * Bohr) 59 | tolerances.append(force) 60 | optimizer.append(tolerances) 61 | motion.append(optimizer) 62 | return motion 63 | 64 | 65 | @typeguard.typechecked 66 | def setup_output(keep_trajectory: bool) -> ET.Element: 67 | output = ET.Element("output", prefix="output") 68 | checkpoint = ET.Element( 69 | "checkpoint", 70 | filename="checkpoint", 71 | stride="1", 72 | overwrite="True", 73 | ) 74 | output.append(checkpoint) 75 | if keep_trajectory: 76 | trajectory = ET.Element( # needed in any case 77 | "trajectory", 78 | stride="1", 79 | format="ase", 80 | filename="trajectory", 81 | bead="0", 82 | ) 83 | trajectory.text = r" positions " 84 | output.append(trajectory) 85 | return output 86 | 87 | 88 | def _execute_ipi( 89 | hamiltonian_names: list[str], 90 | client_args: list[list[str]], 91 | keep_trajectory: bool, 92 | command_server: str, 93 | command_client: str, 94 | env_vars: dict = {}, 95 | stdout: str = "", 96 | stderr: str = "", 97 | inputs: list = [], 98 | outputs: list = [], 99 | parsl_resource_specification: Optional[dict] = None, 100 | ) -> str: 101 | env_command = 'export ' + ' '.join([f"{name}={value}" for name, value in env_vars.items()]) 102 | command_start = make_start_command(command_server, inputs[0], inputs[1]) 103 | commands_client = [] 104 | for i, name in enumerate(hamiltonian_names): 105 | args = client_args[i] 106 | assert len(args) == 1 # only have one client per hamiltonian 107 | for arg in args: 108 | commands_client += make_client_command(command_client, name, inputs[2 + i], inputs[1], arg), 109 | 110 | command_end = f'{command_server} --cleanup --output_xyz={outputs[0].filepath}' 111 | command_copy = f'cp walker-0_output.trajectory_0.ase {outputs[1].filepath}' if keep_trajectory else '' 112 | command_list = [ 113 | TMP_COMMAND, 114 | CD_COMMAND, 115 | env_command, 116 | command_start, 117 | *commands_client, 118 | "wait", 119 | command_end, 120 | command_copy, 121 | ] 122 | return "\n".join(command_list) 123 | 124 | 125 | execute_ipi = bash_app(_execute_ipi, executors=["ModelEvaluation"]) 126 | 127 | 128 | @typeguard.typechecked 129 | def optimize( 130 | state: Union[Geometry, AppFuture], 131 | hamiltonian: Hamiltonian, 132 | steps: int = 5000, 133 | keep_trajectory: bool = False, 134 | mode: str = "lbfgs", 135 | etol: float = 1e-3, 136 | ptol: float = 1e-5, 137 | ftol: float = 1e-3, 138 | ) -> Union[AppFuture, tuple[AppFuture, Dataset]]: 139 | hamiltonians_map, forces = setup_forces(hamiltonian) 140 | sockets = setup_sockets(hamiltonians_map) 141 | 142 | initialize = ET.Element("initialize", nbeads="1") 143 | start = ET.Element("file", mode="ase", cell_units="angstrom") 144 | start.text = " start_0.xyz " 145 | initialize.append(start) 146 | motion = setup_motion(mode, etol, ptol, ftol) 147 | 148 | system = ET.Element("system", prefix="walker-0") 149 | system.append(initialize) 150 | system.append(motion) 151 | system.append(forces) 152 | 153 | output = setup_output(keep_trajectory) 154 | 155 | simulation = ET.Element("simulation", mode="static") 156 | simulation.append(output) 157 | for socket in sockets: 158 | simulation.append(socket) 159 | simulation.append(system) 160 | total_steps = ET.Element("total_steps") 161 | total_steps.text = " {} ".format(steps) 162 | simulation.append(total_steps) 163 | 164 | context = psiflow.context() 165 | definition = context.definitions["ModelEvaluation"] 166 | input_future = save_xml( 167 | simulation, 168 | outputs=[context.new_file("input_", ".xml")], 169 | ).outputs[0] 170 | inputs = [ 171 | input_future, 172 | Dataset([state]).extxyz, 173 | ] 174 | inputs += [h.serialize_function(dtype="float64") for h in hamiltonians_map.values()] 175 | 176 | hamiltonian_names = list(hamiltonians_map.keys()) 177 | client_args = [] 178 | for name in hamiltonian_names: 179 | args = definition.get_client_args(name, 1, "minimize") 180 | client_args.append(args) 181 | outputs = [context.new_file("data_", ".xyz")] 182 | if keep_trajectory: 183 | outputs.append(context.new_file("opt_", ".xyz")) 184 | 185 | command_server = definition.server_command() 186 | command_client = definition.client_command() 187 | resources = definition.wq_resources(1) 188 | 189 | result = execute_ipi( 190 | hamiltonian_names, 191 | client_args, 192 | keep_trajectory, 193 | command_server, 194 | command_client, 195 | env_vars=definition.env_vars, 196 | stdout=parsl.AUTO_LOGNAME, 197 | stderr=parsl.AUTO_LOGNAME, 198 | inputs=inputs, 199 | outputs=outputs, 200 | parsl_resource_specification=resources, 201 | ) 202 | 203 | final = Dataset(None, result.outputs[0]).evaluate(hamiltonian)[-1] 204 | if keep_trajectory: 205 | trajectory = Dataset(None, result.outputs[1]) 206 | return final, trajectory 207 | else: 208 | return final 209 | 210 | 211 | @join_app 212 | @typeguard.typechecked 213 | def _optimize_dataset( 214 | geometries: list[Geometry], *args, outputs: list = [], **kwargs 215 | ) -> AppFuture: 216 | assert not kwargs.get("keep_trajectory", False) 217 | optimized = [] 218 | for geometry in geometries: 219 | optimized.append(optimize(geometry, *args, **kwargs)) 220 | return write_frames(*optimized, outputs=[outputs[0]]) 221 | 222 | 223 | @typeguard.typechecked 224 | def optimize_dataset(dataset: Dataset, *args, **kwargs) -> Dataset: 225 | extxyz = _optimize_dataset( 226 | dataset.geometries(), 227 | *args, 228 | outputs=[psiflow.context().new_file("data_", ".xyz")], 229 | **kwargs, 230 | ).outputs[0] 231 | return Dataset(None, extxyz) 232 | -------------------------------------------------------------------------------- /psiflow/sampling/order.py: -------------------------------------------------------------------------------- 1 | """ 2 | TODO: these imports are outdated.. Is this module still used? 3 | """ 4 | from __future__ import annotations # necessary for type-guarding class methods 5 | 6 | from functools import partial 7 | from typing import Optional, Union 8 | 9 | import typeguard 10 | from ase.units import kJ, mol 11 | from parsl.app.app import python_app 12 | from parsl.dataflow.futures import AppFuture 13 | 14 | import psiflow 15 | from psiflow.data import Dataset, batch_apply 16 | from psiflow.geometry import Geometry 17 | from psiflow.hamiltonians._plumed import PlumedHamiltonian 18 | from psiflow.hamiltonians.hamiltonian import Hamiltonian 19 | 20 | 21 | @typeguard.typechecked 22 | def insert_in_state( 23 | state: Geometry, 24 | name: str, 25 | ) -> Geometry: 26 | value = state.energy 27 | state.order[name] = value 28 | state.energy = None 29 | return state 30 | 31 | 32 | @typeguard.typechecked 33 | def _insert( 34 | state_or_states: Union[Geometry, list[Geometry]], 35 | name: str, 36 | ) -> Union[list[Geometry], Geometry]: 37 | if not isinstance(state_or_states, list): 38 | return insert_in_state(state_or_states, name) 39 | else: 40 | for state in state_or_states: 41 | insert_in_state(state, name) # modify list in place 42 | return state_or_states 43 | 44 | 45 | insert = python_app(_insert, executors=["default_threads"]) 46 | 47 | 48 | @typeguard.typechecked 49 | def insert_in_dataset( 50 | data: Dataset, 51 | name: str, 52 | ) -> Dataset: 53 | geometries = insert( 54 | data.geometries(), 55 | name, 56 | ) 57 | return Dataset(geometries) 58 | 59 | 60 | @typeguard.typechecked 61 | class OrderParameter: 62 | # TODO: batched evaluation 63 | 64 | def __init__(self, name: str): 65 | self.name = name 66 | 67 | def evaluate(self, state: Union[Geometry, AppFuture]) -> AppFuture: 68 | raise NotImplementedError 69 | 70 | def __eq__(self, other): 71 | raise NotImplementedError 72 | 73 | 74 | @typeguard.typechecked 75 | @psiflow.serializable 76 | class HamiltonianOrderParameter(OrderParameter): 77 | name: str 78 | hamiltonian: Hamiltonian 79 | 80 | def __init__(self, name: str, hamiltonian: Hamiltonian): 81 | super().__init__(name) 82 | self.hamiltonian = hamiltonian 83 | 84 | def evaluate( 85 | self, 86 | arg: Union[Dataset, Geometry, AppFuture[Geometry]], 87 | batch_size: Optional[int] = 100, 88 | ) -> Union[Dataset, AppFuture]: 89 | if isinstance(arg, Dataset): 90 | # avoid batching the dataset twice: 91 | # apply hamiltonian in batched sense and put insert afterwards 92 | funcs = [ 93 | self.hamiltonian.single_evaluate, 94 | partial(insert_in_dataset, name=self.name), 95 | ] 96 | future = batch_apply( 97 | funcs, 98 | batch_size, 99 | arg.length(), 100 | inputs=[arg.extxyz], 101 | outputs=[psiflow.context().new_file("data_", ".xyz")], 102 | ) 103 | return Dataset(None, future.outputs[0]) 104 | else: 105 | state = self.hamiltonian.evaluate(arg) 106 | return insert(state, self.name) 107 | 108 | def __eq__(self, other): 109 | if type(other) is not HamiltonianOrderParameter: 110 | return False 111 | return self.hamiltonian == other.hamiltonian 112 | 113 | @classmethod 114 | def from_plumed( 115 | cls, name: str, hamiltonian: PlumedHamiltonian 116 | ) -> HamiltonianOrderParameter: 117 | assert name in hamiltonian.plumed_input() 118 | action_prefixes = [ 119 | "ABMD", 120 | "BIASVALUE", 121 | "EXTENDED_LAGRANGIAN", 122 | "EXTERNAL", 123 | "LOWER_WALLS", 124 | "MAXENT", 125 | "METAD", 126 | "MOVINGRESTRAINT", 127 | "PBMETAD", 128 | "RESTRAINT", 129 | "UPPER_WALLS", 130 | "RESTART", 131 | ] 132 | lines = hamiltonian.plumed_input().split("\n") 133 | new_lines = [] 134 | for line in lines: 135 | found = [p in line for p in action_prefixes] 136 | if sum(found, start=False): 137 | continue 138 | else: 139 | new_lines.append(line) 140 | ev_to_kjmol = 1 / ( 141 | kJ / mol 142 | ) # compensate plumed to ASE unit conversion of 'energy' 143 | new_lines.append( 144 | "rescaled: MATHEVAL ARG={} FUNC=x*{} PERIODIC=NO".format(name, ev_to_kjmol) 145 | ) 146 | new_lines.append("BIASVALUE ARG=rescaled") 147 | return HamiltonianOrderParameter( 148 | name=name, 149 | hamiltonian=PlumedHamiltonian(plumed_input="\n".join(new_lines)), 150 | ) 151 | -------------------------------------------------------------------------------- /psiflow/sampling/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional 2 | 3 | import numpy as np 4 | import typeguard 5 | from ase.data import chemical_symbols 6 | 7 | from psiflow.functions import Function 8 | 9 | # do not use psiflow apps; parsl config is not loaded in this process! 10 | from psiflow.geometry import Geometry 11 | 12 | # only import stuff which does not issue useless warnings; otherwise 13 | # python -c 'import .client; print(client.__file__)' is going to be polluted 14 | # with those import-related warnings 15 | 16 | 17 | class ForceMagnitudeException(Exception): 18 | pass 19 | 20 | 21 | class TimeoutException(Exception): 22 | pass 23 | 24 | 25 | def timeout_handler(signum, frame): 26 | raise TimeoutException 27 | 28 | 29 | @typeguard.typechecked 30 | def check_forces( 31 | forces: np.ndarray, 32 | geometry: Any, 33 | max_force: float, 34 | ): 35 | if not isinstance(geometry, Geometry): 36 | geometry = Geometry.from_atoms(geometry) 37 | 38 | exceeded = np.linalg.norm(forces, axis=1) > max_force 39 | if np.sum(exceeded): 40 | indices = np.arange(len(geometry))[exceeded] 41 | numbers = geometry.per_atom.numbers[exceeded] 42 | symbols = [chemical_symbols[n] for n in numbers] 43 | raise ForceMagnitudeException( 44 | "\nforce exceeded {} eV/A for atoms {}" 45 | " with chemical elements {}\n".format( 46 | max_force, 47 | indices, 48 | symbols, 49 | ) 50 | ) 51 | else: 52 | pass 53 | 54 | 55 | class FunctionDriver: 56 | 57 | def __init__( 58 | self, 59 | template: Geometry, 60 | function: Function, 61 | max_force: Optional[float], 62 | verbose: bool = True, # used by i-PI internally? 63 | error_msg="", 64 | ): 65 | self.verbose = verbose 66 | self.template = template 67 | self.function = function 68 | self.max_force = max_force 69 | 70 | def check_arguments(self): 71 | pass 72 | 73 | def __call__(self, cell, pos): 74 | from ipi.utils.units import unit_to_internal, unit_to_user 75 | 76 | pos = unit_to_user("length", "angstrom", pos) 77 | cell = unit_to_user("length", "angstrom", cell.T) 78 | 79 | self.template.per_atom.positions[:] = pos 80 | if self.template.periodic: 81 | self.template.cell[:] = cell 82 | 83 | outputs = self.function(self.template) 84 | energy = outputs["energy"] 85 | forces = outputs["forces"] 86 | stress = outputs["stress"] 87 | 88 | # check for max_force 89 | if self.max_force is not None: 90 | check_forces(forces, self.template, self.max_force) 91 | 92 | # converts to internal quantities 93 | pot_ipi = np.asarray( 94 | unit_to_internal("energy", "electronvolt", energy), np.float64 95 | ) 96 | force_ipi = np.asarray(unit_to_internal("force", "ev/ang", forces), np.float64) 97 | vir_calc = -stress * self.template.volume 98 | vir_ipi = np.array( 99 | unit_to_internal("energy", "electronvolt", vir_calc.T), dtype=np.float64 100 | ) 101 | extras = "" 102 | 103 | return pot_ipi, force_ipi, vir_ipi, extras 104 | -------------------------------------------------------------------------------- /psiflow/utils/__init__.py: -------------------------------------------------------------------------------- 1 | TMP_COMMAND = 'tmpdir=$(mktemp -d -p /tmp "mytmpdir.XXXXXXXXXX" || mktemp -d -t "mytmpdir.XXXXXXXXXX")' 2 | CD_COMMAND = 'cd $tmpdir; echo "tmpdir: $PWD"' 3 | 4 | 5 | -------------------------------------------------------------------------------- /psiflow/utils/_plumed.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import typeguard 5 | 6 | 7 | @typeguard.typechecked 8 | def try_manual_plumed_linking() -> str: 9 | if "PLUMED_KERNEL" not in os.environ.keys(): 10 | # try linking manually 11 | if "CONDA_PREFIX" in os.environ.keys(): # for conda environments 12 | p = "CONDA_PREFIX" 13 | elif "PREFIX" in os.environ.keys(): # for pip environments 14 | p = "PREFIX" 15 | else: 16 | raise ValueError("failed to set plumed .so kernel") 17 | path = os.environ[p] + "/lib/libplumedKernel.so" 18 | if os.path.exists(path): 19 | os.environ["PLUMED_KERNEL"] = path 20 | logging.info("plumed kernel manually set at : {}".format(path)) 21 | else: 22 | raise ValueError("plumed kernel not found at {}".format(path)) 23 | return os.environ["PLUMED_KERNEL"] 24 | 25 | 26 | @typeguard.typechecked 27 | def remove_comments_printflush(plumed_input: str) -> str: 28 | new_input = [] 29 | for line in list(plumed_input.split("\n")): 30 | pre_comment = line.strip().split("#")[0].strip() 31 | if len(pre_comment) == 0: 32 | continue 33 | if pre_comment.startswith("PRINT"): 34 | continue 35 | if pre_comment.startswith("FLUSH"): 36 | continue 37 | new_input.append(pre_comment) 38 | return "\n".join(new_input) 39 | 40 | 41 | @typeguard.typechecked 42 | def set_path_in_plumed(plumed_input: str, keyword: str, path_to_set: str) -> str: 43 | lines = plumed_input.split("\n") 44 | for i, line in enumerate(lines): 45 | if keyword in line: 46 | if "FILE=" not in line: 47 | lines[i] = line + " FILE={}".format(path_to_set) 48 | continue 49 | line_before = line.split("FILE=")[0] 50 | line_after = line.split("FILE=")[1].split()[1:] 51 | lines[i] = ( 52 | line_before + "FILE={} ".format(path_to_set) + " ".join(line_after) 53 | ) 54 | return "\n".join(lines) 55 | -------------------------------------------------------------------------------- /psiflow/utils/apps.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations # necessary for type-guarding class methods 2 | 3 | import logging 4 | import sys 5 | from typing import Any, Union 6 | 7 | import numpy as np 8 | import typeguard 9 | from parsl.app.app import python_app 10 | from parsl.data_provider.files import File 11 | 12 | 13 | @typeguard.typechecked 14 | def get_attribute(obj: Any, *attribute_names: str) -> Any: 15 | for name in attribute_names: 16 | obj = getattr(obj, name) 17 | return obj 18 | 19 | 20 | @typeguard.typechecked 21 | def _boolean_or(*args: Union[bool, np.bool_]) -> bool: 22 | return any(args) 23 | 24 | 25 | boolean_or = python_app(_boolean_or, executors=["default_threads"]) 26 | 27 | 28 | def _multiply(a, b): 29 | return a * b 30 | 31 | 32 | multiply = python_app(_multiply, executors=["default_threads"]) 33 | 34 | 35 | @typeguard.typechecked 36 | def setup_logger(module_name): 37 | # Create logger instance for the module 38 | module_logger = logging.getLogger(module_name) 39 | 40 | # Set the desired format string 41 | formatter = logging.Formatter("%(name)s - %(message)s") 42 | 43 | # Create handler to send logs to stdout 44 | stdout_handler = logging.StreamHandler(sys.stdout) 45 | stdout_handler.setFormatter(formatter) 46 | 47 | # Add handler to the logger instance 48 | module_logger.addHandler(stdout_handler) 49 | 50 | # Set the logging level for the logger 51 | module_logger.setLevel(logging.INFO) 52 | 53 | return module_logger 54 | 55 | 56 | def _compute_sum(a, b): 57 | return np.add(a, b) 58 | 59 | 60 | compute_sum = python_app(_compute_sum, executors=["default_threads"]) 61 | 62 | 63 | @typeguard.typechecked 64 | def _combine_futures(inputs: list[Any]) -> list[Any]: 65 | return list(inputs) 66 | 67 | 68 | combine_futures = python_app(_combine_futures, executors=["default_threads"]) 69 | 70 | 71 | @typeguard.typechecked 72 | def _copy_data_future( 73 | pass_on_exist: bool = False, 74 | inputs: list[File] = [], 75 | outputs: list[File] = [], 76 | ) -> None: 77 | import shutil 78 | from pathlib import Path 79 | 80 | assert len(inputs) == 1 81 | assert len(outputs) == 1 82 | if Path(outputs[0]).is_file() and pass_on_exist: 83 | return None 84 | if Path(inputs[0]).is_file(): 85 | shutil.copyfile(inputs[0], outputs[0]) 86 | else: # no need to copy empty file 87 | pass 88 | 89 | 90 | copy_data_future = python_app(_copy_data_future, executors=["default_threads"]) 91 | 92 | 93 | @typeguard.typechecked 94 | def _copy_app_future(future: Any, inputs: list = [], outputs: list = []) -> Any: 95 | # inputs/outputs to enforce additional dependencies 96 | from copy import deepcopy 97 | 98 | return deepcopy(future) 99 | 100 | 101 | copy_app_future = python_app(_copy_app_future, executors=["default_threads"]) 102 | 103 | 104 | @typeguard.typechecked 105 | def _log_message(logger, message, *futures): 106 | if len(futures) > 0: 107 | logger.info(message.format(*futures)) 108 | else: 109 | logger.info(message) 110 | 111 | 112 | log_message = python_app(_log_message, executors=["default_threads"]) 113 | 114 | 115 | def _pack(*args): 116 | return args 117 | 118 | 119 | pack = python_app(_pack, executors=["default_threads"]) 120 | 121 | 122 | @typeguard.typechecked 123 | def _unpack_i(result: Union[np.ndarray, list, tuple], i: int) -> Any: 124 | assert i <= len(result) 125 | return result[i] 126 | 127 | 128 | unpack_i = python_app(_unpack_i, executors=["default_threads"]) 129 | 130 | 131 | @typeguard.typechecked 132 | def _concatenate(*arrays: np.ndarray) -> np.ndarray: 133 | return np.concatenate(arrays) 134 | 135 | 136 | concatenate = python_app(_concatenate, executors=["default_threads"]) 137 | 138 | 139 | @typeguard.typechecked 140 | def _isnan(a: Union[float, np.ndarray]) -> bool: 141 | return bool(np.any(np.isnan(a))) 142 | 143 | 144 | isnan = python_app(_isnan, executors=["default_threads"]) 145 | -------------------------------------------------------------------------------- /psiflow/utils/io.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | from typing import Any 3 | 4 | import numpy as np 5 | import typeguard 6 | from parsl.app.app import python_app 7 | from parsl.data_provider.files import File 8 | 9 | 10 | @typeguard.typechecked 11 | def _save_yaml( 12 | input_dict: dict, 13 | outputs: list[File] = [], 14 | **extra_keys: Any, 15 | ) -> None: 16 | import yaml 17 | 18 | def _make_dict_safe(arg): 19 | # walks through dict and converts numpy types to python natives 20 | for key in list(arg.keys()): 21 | if hasattr(arg[key], "item"): 22 | arg[key] = arg[key].item() 23 | elif type(arg[key]) is dict: 24 | arg[key] = _make_dict_safe(arg[key]) 25 | else: 26 | pass 27 | return arg 28 | 29 | input_dict = dict(input_dict) 30 | for key, value in extra_keys.items(): 31 | assert key not in input_dict 32 | input_dict[key] = value 33 | input_dict = _make_dict_safe(input_dict) 34 | with open(outputs[0], "w") as f: 35 | yaml.dump(input_dict, f, default_flow_style=False) 36 | 37 | 38 | save_yaml = python_app(_save_yaml, executors=["default_threads"]) 39 | 40 | 41 | @typeguard.typechecked 42 | def _save_xml( 43 | element: ET.Element, 44 | outputs: list = [], 45 | ) -> None: 46 | tree = ET.ElementTree(element) 47 | ET.indent(tree, " ") 48 | tree.write(outputs[0], encoding="utf-8", xml_declaration=True) 49 | 50 | 51 | save_xml = python_app(_save_xml, executors=["default_threads"]) 52 | 53 | 54 | @typeguard.typechecked 55 | def _load_numpy(inputs: list[File] = [], **kwargs) -> np.ndarray: 56 | return np.loadtxt(inputs[0], **kwargs) 57 | 58 | 59 | load_numpy = python_app(_load_numpy, executors=["default_threads"]) 60 | 61 | 62 | @typeguard.typechecked 63 | def _read_yaml(inputs: list[File] = [], outputs: list[File] = []) -> dict: 64 | import yaml 65 | 66 | with open(inputs[0], "r") as f: 67 | config_dict = yaml.load(f, Loader=yaml.FullLoader) 68 | return config_dict 69 | 70 | 71 | read_yaml = python_app(_read_yaml, executors=["default_threads"]) 72 | 73 | 74 | @typeguard.typechecked 75 | def _save_txt(data: str, outputs: list[File] = []) -> None: 76 | with open(outputs[0], "w") as f: 77 | f.write(data) 78 | 79 | 80 | save_txt = python_app(_save_txt, executors=["default_threads"]) 81 | 82 | 83 | @typeguard.typechecked 84 | def _load_metrics(inputs: list = []) -> np.recarray: 85 | return np.load(inputs[0], allow_pickle=True) 86 | 87 | 88 | load_metrics = python_app(_load_metrics, executors=["default_threads"]) 89 | 90 | 91 | @typeguard.typechecked 92 | def _save_metrics(data: np.recarray, outputs: list = []) -> None: 93 | with open(outputs[0], "wb") as f: 94 | data.dump(f) 95 | 96 | 97 | save_metrics = python_app(_save_metrics, executors=["default_threads"]) 98 | 99 | 100 | @typeguard.typechecked 101 | def _dump_json( 102 | inputs: list = [], 103 | outputs: list = [], 104 | **kwargs, 105 | ) -> None: 106 | import json 107 | 108 | import numpy as np 109 | 110 | def convert_to_list(array): 111 | if not type(array) is np.ndarray: 112 | if type(array) is np.floating: 113 | return float(array) 114 | return array 115 | as_list = [] 116 | for item in array: 117 | as_list.append(convert_to_list(item)) 118 | return as_list 119 | 120 | for name in list(kwargs.keys()): 121 | value = kwargs[name] 122 | if type(value) is np.ndarray: 123 | value = convert_to_list(value) 124 | elif type(value) is np.floating: 125 | value = float(value) 126 | kwargs[name] = value 127 | with open(outputs[0], "w") as f: 128 | f.write(json.dumps(kwargs)) 129 | 130 | 131 | dump_json = python_app(_dump_json, executors=["default_threads"]) 132 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | 6 | [project] 7 | name = "psiflow" 8 | version = "4.0.0" 9 | description = "Library for developing interatomic potentials" 10 | readme = "README.md" 11 | requires-python = ">=3.10" 12 | dependencies = [ 13 | "ase>=3.23.0", 14 | "pyyaml>=6.0", 15 | "numpy>=1.22.3, <2", 16 | "parsl==2024.12.16", 17 | "prettytable", 18 | "psutil", 19 | "cp2k-input-tools @ git+https://github.com/cp2k/cp2k-input-tools.git@3b9929735dcb3c8c0620a548b1fe20efecbad077", # need 2024.1 20 | "pytimeparse", 21 | ] 22 | 23 | 24 | [project.scripts] 25 | psiflow-client = "psiflow.sampling.client:main" 26 | psiflow-server = "psiflow.sampling.server:main" 27 | psiflow-mace-train = "psiflow.models.mace_utils:main" 28 | psiflow-ase-opt = "psiflow.sampling._ase:main" 29 | 30 | 31 | [project.optional-dependencies] 32 | docs = [ 33 | "mkdocs>=1.4.2", 34 | "mkdocs-autorefs>=0.4.1", 35 | "mkdocs-material>=9.0.3", 36 | "mkdocs-material-extensions>=1.1.1", 37 | "mkdocstrings>=0.19.1", 38 | "mkdocstrings-python>=0.8.3", 39 | ] 40 | dev = [ 41 | "pre-commit", 42 | "black", 43 | "isort", 44 | "flake8", 45 | "flake8-bugbear", 46 | "flake8-pyproject", 47 | "pytest>=7.2.0", 48 | "coverage>=6.5.0", 49 | "coveralls>=3.3.1", 50 | ] 51 | 52 | 53 | [tool.setuptools.packages.find] 54 | include = [ 55 | "psiflow", 56 | "psiflow.models", 57 | "psiflow.data", 58 | "psiflow.reference", 59 | "psiflow.sampling", 60 | "psiflow.utils", 61 | "psiflow.free_energy", 62 | ] 63 | 64 | 65 | [tool.flake8] 66 | max-line-length = 88 67 | extend-ignore = ["E203", "E501", "E704", "B006"] 68 | #select = C,E,F,W,B,B950 69 | 70 | [tool.isort] 71 | profile = "black" 72 | 73 | [tool.pytest.ini_options] 74 | log_cli = 0 75 | addopts = [ 76 | "--basetemp=pytest-tmp", # /tmp/ may be different for each worker! 77 | "--import-mode=append", 78 | "--psiflow-config=configs/threadpool.yaml", 79 | "-W ignore::DeprecationWarning", 80 | "--log-cli-level=WARNING", 81 | ] 82 | testpaths = ["tests"] 83 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | from dataclasses import asdict 3 | from pathlib import Path 4 | 5 | import numpy as np 6 | import parsl 7 | import pytest 8 | import yaml 9 | from ase import Atoms 10 | from ase.build import bulk, make_supercell 11 | from ase.calculators.emt import EMT 12 | 13 | import psiflow 14 | from psiflow.data import Dataset 15 | from psiflow.geometry import Geometry 16 | from psiflow.models import MACE, MACEConfig 17 | 18 | 19 | def pytest_addoption(parser): 20 | parser.addoption( 21 | "--psiflow-config", 22 | action="store", 23 | help="test", 24 | ) 25 | parser.addoption( 26 | "--skip-gpu", 27 | action="store_true", 28 | default=False, 29 | help="whether to run tests which require a GPU", 30 | ) 31 | 32 | 33 | @pytest.fixture(scope="session") 34 | def gpu(request): 35 | if request.config.getoption("--skip-gpu"): 36 | pytest.skip("skipping tests which require GPU") 37 | 38 | 39 | @pytest.fixture(scope="session", autouse=True) 40 | def context(request, tmp_path_factory): 41 | try: 42 | context = psiflow.context() 43 | except RuntimeError: 44 | path_config = Path(request.config.getoption("--psiflow-config")) 45 | with open(path_config, "r") as f: 46 | psiflow_config = yaml.safe_load(f) 47 | psiflow_config["path"] = tmp_path_factory.mktemp("psiflow_internal") 48 | psiflow.load(psiflow_config) 49 | context = psiflow.context() # noqa: F841 50 | yield 51 | parsl.dfk().cleanup() 52 | 53 | 54 | @pytest.fixture(scope="session") 55 | def mace_config(): 56 | mace_config = MACEConfig() 57 | mace_config.num_radial_basis = 3 58 | mace_config.num_cutoff_basis = 2 59 | mace_config.max_ell = 1 60 | mace_config.correlation = 1 61 | mace_config.MLP_irreps = "2x0e" 62 | mace_config.num_channels = 2 63 | mace_config.max_L = 0 64 | mace_config.r_max = 4 65 | mace_config.radial_MLP = "[4]" 66 | return asdict(mace_config) 67 | 68 | 69 | def generate_emt_cu_data(nstates, amplitude, supercell=None): 70 | if supercell is None: 71 | supercell = np.eye(3) 72 | atoms = make_supercell(bulk("Cu", "fcc", a=3.6, cubic=True), supercell) 73 | atoms.calc = EMT() 74 | pos = atoms.get_positions() 75 | box = atoms.get_cell() 76 | atoms_list = [] 77 | for _ in range(nstates): 78 | atoms.set_positions( 79 | pos + np.random.uniform(-amplitude, amplitude, size=(len(atoms), 3)) 80 | ) 81 | atoms.set_cell(box + np.random.uniform(-amplitude, amplitude, size=(3, 3))) 82 | _atoms = atoms.copy() 83 | _atoms.calc = None 84 | _atoms.info["energy"] = atoms.get_potential_energy() 85 | _atoms.info["stress"] = atoms.get_stress(voigt=False) 86 | _atoms.arrays["forces"] = atoms.get_forces() 87 | # make content heterogeneous to test per_element functions 88 | _atoms.numbers[0] = 1 89 | _atoms.symbols[0] = "H" 90 | atoms_list.append(_atoms) 91 | return atoms_list 92 | 93 | 94 | @pytest.fixture 95 | def dataset(context): 96 | data = generate_emt_cu_data(20, 0.2) 97 | data += generate_emt_cu_data(5, 0.15, supercell=np.diag([1, 2, 1])) 98 | data_ = [Geometry.from_atoms(atoms) for atoms in data] 99 | return Dataset(data_).align_axes() 100 | 101 | 102 | @pytest.fixture(scope="session") 103 | def mace_model(mace_config): 104 | # manually recreate dataset with 'session' scope 105 | data = generate_emt_cu_data(20, 0.2) 106 | data_ = [Geometry.from_atoms(atoms) for atoms in data] 107 | dataset = Dataset(data_) 108 | model = MACE(**mace_config) 109 | # add additional state to initialize other atomic numbers 110 | # mace cannot handle partially periodic datasets 111 | geometry = Geometry.from_data( 112 | numbers=np.array(2 * [101]), 113 | positions=np.array([[0, 0, 0], [2, 0, 0]]), 114 | cell=2 * np.eye(3), 115 | ) 116 | geometry.energy = -1.0 117 | geometry.per_atom.forces[:] = np.random.uniform(size=(2, 3)) 118 | model.initialize(dataset[:5] + Dataset([geometry])) 119 | return model 120 | 121 | 122 | @pytest.fixture 123 | def dataset_h2(context): 124 | h2 = Atoms( 125 | numbers=[1, 1], 126 | positions=[[0, 0, 0], [0.74, 0, 0]], 127 | pbc=False, 128 | ) 129 | data = [h2.copy() for i in range(20)] 130 | for atoms in data: 131 | atoms.set_positions( 132 | atoms.get_positions() + np.random.uniform(-0.05, 0.05, size=(2, 3)) 133 | ) 134 | return Dataset([Geometry.from_atoms(a) for a in data]) 135 | 136 | 137 | @pytest.fixture 138 | def checkpoint(): 139 | checkpoint_str = """ 140 | 141 | 142 | 1 143 | [ time, temperature, potential ] 144 | 145 | 100 146 | 147 |
cSzwsJ2A/einsteincrystal0
148 | 8.33333333e-02 149 |
150 | 151 |
cSzwsJ2A/plumedhamiltonian0
152 | 8.33333333e-02 153 |
154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 1.90008912e-03 163 | 4.11423554e-03 164 | [ 1.00000000e+00 ] 165 | 166 | 167 | 168 | 169 | 170 | 4.13413730e+03 171 | 172 | 2.06706865e+01 173 | [ 1 ] 174 | 175 | 176 | 177 | 178 | [ 1.44513572e-01, -2.22608601e-02, 6.90340566e-02, -1.48068714e-01, 3.67026570e+00, 179 | 3.24415892e+00, 3.09455639e+00, -2.66306646e-01, 3.36282329e+00, 3.54200180e+00, 180 | 3.39685661e+00, 5.46722856e-01 ] 181 | 182 |

183 | [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 184 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 185 | 0.00000000e+00, 0.00000000e+00 ] 186 |

187 | [ 1.83747161e+03, 1.15837273e+05, 1.15837273e+05, 1.15837273e+05 ] 188 | [ H, Cu, Cu, Cu ] 189 |
190 | 191 | [ 1e+00, 1e-01, 0, 0.00000000e+00, 2e+00, 192 | 0, 0.00000000e+00, 0.00000000e+00, 3e+00 ] 193 | 194 |
195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 1.90008912e-03 204 | 4.11423554e-03 205 | [ 1.00000000e+00 ] 206 | 207 | 208 | 209 | 210 | 211 | 4.13413730e+03 212 | 213 | 2.06706865e+01 214 | [ 1 ] 215 | 216 | 217 | 218 | 219 | [ 1.44513572e-01, -2.22608601e-02, 6.90340566e-02, -1.48068714e-01, 3.67026570e+00, 220 | 3.24415892e+00, 3.09455639e+00, -2.66306646e-01, 3.36282329e+00, 3.54200180e+00, 221 | 3.39685661e+00, 5.46722856e-01 ] 222 | 223 |

224 | [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 225 | 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 226 | 0.00000000e+00, 0.00000000e+00 ] 227 |

228 | [ 1.83747161e+03, 1.15837273e+05, 1.15837273e+05, 1.15837273e+05 ] 229 | [ H, Cu, Cu, Cu ] 230 |
231 | 232 | [ 6.92067797e+00, 1.35926184e-01, -3.29542567e-02, 0.00000000e+00, 6.46614176e+00, 233 | -3.74701247e-01, 0.00000000e+00, 0.00000000e+00, 6.45073059e+00 ] 234 | 235 |
236 |
237 | """ 238 | return ET.ElementTree(element=ET.fromstring(checkpoint_str)) 239 | -------------------------------------------------------------------------------- /tests/test_free_energy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ase.units import _c, kB, second 3 | 4 | from psiflow.free_energy import ( 5 | Integration, 6 | compute_frequencies, 7 | compute_harmonic, 8 | harmonic_free_energy, 9 | ) 10 | from psiflow.geometry import check_equality 11 | from psiflow.hamiltonians import EinsteinCrystal, Harmonic, MACEHamiltonian 12 | from psiflow.sampling.ase import optimize 13 | 14 | 15 | def test_integration_simple(dataset): 16 | dataset = dataset[:10] 17 | einstein = EinsteinCrystal(dataset[1], force_constant=2) 18 | geometry = optimize( 19 | dataset[3], 20 | einstein, 21 | mode='fix_cell', 22 | f_max=1e-4, 23 | ) 24 | hessian = compute_harmonic( 25 | geometry, 26 | einstein, 27 | pos_shift=5e-4, 28 | ) 29 | harmonic = Harmonic(geometry, hessian) 30 | 31 | integration = Integration( 32 | harmonic, 33 | temperatures=[300, 400], 34 | delta_hamiltonian=(-0.1) * harmonic, 35 | delta_coefficients=np.array([0.0, 0.5, 1.0]), 36 | ) 37 | walkers = integration.create_walkers( 38 | dataset, 39 | initialize_by="quench", 40 | ) 41 | for walker in walkers: 42 | assert check_equality(walker.start, dataset[1]).result() 43 | 44 | assert len(integration.states) == 6 45 | 46 | integration.sample(steps=100, step=6) 47 | integration.compute_gradients() 48 | for i, state in enumerate(integration.states): 49 | assert state.gradients["delta"] is not None 50 | assert state.gradients["temperature"] is not None 51 | 52 | # manual computation of delta gradient 53 | delta = -0.1 * harmonic 54 | energies = delta.compute(integration.outputs[i].trajectory, "energy") 55 | assert np.allclose( 56 | state.gradients["delta"].result(), 57 | np.mean(energies.result()) / (kB * state.temperature), 58 | ) 59 | 60 | hessian = hessian.result() 61 | frequencies0 = compute_frequencies(hessian, geometry) 62 | frequencies1 = compute_frequencies(hessian * 0.9, geometry) 63 | F0 = harmonic_free_energy(frequencies0, 300).result() 64 | F1 = harmonic_free_energy(frequencies1, 300).result() 65 | 66 | integrated = integration.along_delta(temperature=300).result() 67 | assert len(integrated) == 3 68 | print("\nalong delta") 69 | print(" computed delta_F: {}".format(integrated[-1])) 70 | print("theoretical delta_F: {}".format(F1 - F0)) 71 | print("") 72 | 73 | # integrated = integration.along_temperature(delta_coefficient=1.0).result() 74 | # assert len(integrated) == 2 75 | # assert np.allclose(integrated[0], 0.0) 76 | # F2 = np.sum(compute_free_energy(frequencies, 400).result()) 77 | # print('\nalong temperature') 78 | # print(' computed delta_F: {}'.format(integrated[-1] / (kB * 400))) 79 | # print('theoretical delta_F: {}'.format(F2 / (kB * 400) - F1 / (kB * 300))) 80 | 81 | 82 | def test_integration_temperature(dataset): 83 | einstein = EinsteinCrystal(dataset[0], force_constant=1) 84 | integration = Integration( 85 | hamiltonian=einstein, 86 | temperatures=[300, 400], 87 | pressure=0.0, 88 | ) 89 | integration.create_walkers(dataset[:3]) 90 | integration.sample(steps=10, step=1) 91 | integration.compute_gradients() 92 | gradient0 = integration.states[0].gradients["temperature"] 93 | 94 | integration = Integration( 95 | hamiltonian=einstein, 96 | temperatures=[300, 400], 97 | ) 98 | integration.create_walkers(dataset[:3]) 99 | integration.sample(steps=10, step=1) 100 | integration.compute_gradients() 101 | gradient1 = integration.states[0].gradients["temperature"] 102 | assert np.allclose(gradient0.result(), gradient1.result()) 103 | 104 | 105 | def test_phonons(dataset): 106 | reference = dataset[2].result() 107 | constant = 10 108 | einstein = EinsteinCrystal(reference, force_constant=constant) 109 | 110 | hessian = compute_harmonic( 111 | reference, 112 | einstein, 113 | asr="none", # einstein == translationally VARIANT 114 | ) 115 | assert np.allclose( 116 | hessian.result(), constant * np.eye(3 * len(reference)), rtol=1e-4 117 | ) 118 | 119 | 120 | def test_dihydrogen(dataset_h2): 121 | geometry = dataset_h2[0].result() 122 | geometry.cell = 20 * np.eye(3) 123 | hamiltonian = MACEHamiltonian.mace_mp0("small") 124 | optimized = optimize( 125 | geometry, 126 | hamiltonian, 127 | mode='fix_cell', 128 | f_max=1e-4, 129 | ).result() 130 | assert optimized.energy is not None 131 | assert np.linalg.norm(optimized.per_atom.forces) < 1e-2 132 | hessian = compute_harmonic( 133 | optimized, 134 | hamiltonian, 135 | asr="crystal", 136 | pos_shift=0.001, 137 | ) 138 | frequencies = compute_frequencies(hessian, geometry).result() 139 | # check that highest frequency in inv cm corresponds to 3500 - 4000 140 | frequencies_invcm = (frequencies * second) / (_c * 1e2) # in invcm 141 | assert np.abs(frequencies_invcm[-1] - 4000) < 1000 142 | 143 | 144 | def test_frequency_oscillator(): 145 | for quantum in [True, False]: 146 | f0 = harmonic_free_energy(1.0, 300, quantum=quantum).result() 147 | f1 = harmonic_free_energy(1.1, 300, quantum=quantum).result() 148 | assert f1 > f0 149 | 150 | f2 = harmonic_free_energy(1.0, 400, quantum=quantum).result() 151 | assert f0 > f2 152 | -------------------------------------------------------------------------------- /tests/test_models.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import numpy as np 4 | from parsl.app.futures import DataFuture 5 | 6 | import psiflow 7 | from psiflow.data import compute_rmse 8 | from psiflow.hamiltonians import MACEHamiltonian 9 | from psiflow.models import MACE, load_model 10 | 11 | 12 | def test_mace_init(mace_config, dataset): 13 | model = MACE(**mace_config) 14 | assert "model_future" in model._files 15 | assert model.model_future is None 16 | model.initialize(dataset[:1]) 17 | assert model.model_future is not None 18 | 19 | _config = model._config 20 | 21 | data_str = psiflow.serialize(model).result() 22 | model = psiflow.deserialize(data_str) 23 | 24 | _config_ = model._config 25 | for key, value in _config.items(): 26 | assert key in _config_ 27 | if type(value) is not list: 28 | assert value == _config_[key] 29 | 30 | config = copy.deepcopy(mace_config) 31 | config["batch_size"] = ( 32 | 100000 # bigger than ntrain --> should get reduced internally 33 | ) 34 | model = MACE(**config) 35 | model.seed = 1 36 | model.initialize(dataset[:3]) 37 | assert isinstance(model.model_future, DataFuture) 38 | 39 | # create hamiltonian and verify addition of atomic energies 40 | hamiltonian = model.create_hamiltonian() 41 | assert hamiltonian == model.create_hamiltonian() 42 | energies = hamiltonian.compute(dataset, "energy").result() 43 | 44 | nstates = dataset.length().result() 45 | # energies = np.array([evaluated[i].result().energy for i in range(nstates)]) 46 | assert not np.any(np.allclose(energies, 0.0)) 47 | energy_Cu = 3 48 | energy_H = 7 49 | atomic_energies = { 50 | "Cu": energy_Cu, 51 | "H": energy_H, 52 | } 53 | hamiltonian = MACEHamiltonian( 54 | hamiltonian.external, 55 | atomic_energies=atomic_energies, 56 | ) 57 | assert hamiltonian != model.create_hamiltonian() # atomic energies 58 | 59 | evaluated = dataset.evaluate(hamiltonian) 60 | for i in range(nstates): 61 | assert np.allclose( 62 | energies[i], 63 | evaluated.subtract_offset(Cu=energy_Cu, H=energy_H)[i].result().energy, 64 | ) 65 | 66 | energies = hamiltonian.compute(dataset, "energy").result() 67 | second = psiflow.deserialize(psiflow.serialize(hamiltonian).result()) 68 | energies_ = second.compute(dataset, "energy").result() 69 | assert np.allclose(energies, energies_) 70 | 71 | hamiltonian = model.create_hamiltonian() 72 | model.reset() 73 | model.initialize(dataset[:3]) 74 | assert hamiltonian != model.create_hamiltonian() 75 | 76 | 77 | def test_mace_train(gpu, mace_config, dataset, tmp_path): 78 | # as an additional verification, this test can be executed while monitoring 79 | # the mace logging, and in particular the rmse_r during training, to compare 80 | # it with the manually computed value 81 | training = dataset[:-5] 82 | validation = dataset[-5:] 83 | mace_config["start_swa"] = 100 84 | model = MACE(**mace_config) 85 | model.initialize(training) 86 | hamiltonian0 = model.create_hamiltonian() 87 | rmse0 = compute_rmse( 88 | validation.get("per_atom_energy"), 89 | validation.evaluate(hamiltonian0).get("per_atom_energy"), 90 | ) 91 | model.train(training, validation) 92 | hamiltonian1 = model.create_hamiltonian() 93 | rmse1 = compute_rmse( 94 | validation.get("per_atom_energy"), 95 | validation.evaluate(hamiltonian1).get("per_atom_energy"), 96 | ) 97 | assert rmse0.result() > rmse1.result() 98 | 99 | 100 | def test_mace_save_load(mace_config, dataset, tmp_path): 101 | model = MACE(**mace_config) 102 | model.add_atomic_energy("H", 3) 103 | model.add_atomic_energy("Cu", 4) 104 | model.save(tmp_path) 105 | model.initialize(dataset[:2]) 106 | e0 = model.create_hamiltonian().compute(dataset[3], "energy").result() 107 | 108 | psiflow.wait() 109 | assert (tmp_path / "MACE.yaml").exists() 110 | assert not (tmp_path / "MACE.pth").exists() 111 | 112 | model.save(tmp_path) 113 | psiflow.wait() 114 | assert (tmp_path / "MACE.pth").exists() 115 | 116 | model_ = load_model(tmp_path) 117 | assert type(model_) is MACE 118 | assert model_.model_future is not None 119 | e1 = model_.create_hamiltonian().compute(dataset[3], "energy").result() 120 | assert np.allclose(e0, e1, atol=1e-4) # up to single precision 121 | 122 | 123 | def test_mace_seed(mace_config): 124 | model = MACE(**mace_config) 125 | assert model.seed == 0 126 | model.seed = 111 127 | assert model.seed == 111 128 | model._config["seed"] = 112 129 | assert model.seed == 112 130 | -------------------------------------------------------------------------------- /tests/test_serialization.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from pathlib import Path 4 | from typing import Optional, Union 5 | 6 | import pytest 7 | import typeguard 8 | from parsl.data_provider.files import File 9 | from parsl.dataflow.futures import AppFuture 10 | 11 | import psiflow 12 | from psiflow.data import Dataset 13 | from psiflow.geometry import Geometry, NullState, new_nullstate 14 | from psiflow.utils.apps import copy_app_future 15 | 16 | 17 | def test_serial_simple(tmp_path): 18 | @psiflow.serializable 19 | class SomeSerial: 20 | pass 21 | 22 | @typeguard.typechecked 23 | class Test: 24 | foo: int 25 | bar: psiflow._DataFuture 26 | baz: Union[float, str] 27 | bam: Optional[SomeSerial] 28 | bao: SomeSerial 29 | bap: list[SomeSerial, ...] 30 | baq: Union[Geometry, AppFuture] 31 | bas: Geometry 32 | 33 | def __init__(self, **kwargs): 34 | for key, value in kwargs.items(): 35 | setattr(self, key, value) 36 | 37 | new_cls = psiflow.serializable(Test) 38 | instance = new_cls( 39 | foo=3, 40 | bar=File("asdfl"), 41 | baz="asdflk", 42 | bam=None, 43 | bao=SomeSerial(), 44 | bap=[SomeSerial(), SomeSerial()], 45 | baq=copy_app_future(NullState), 46 | bas=new_nullstate(), 47 | ) 48 | assert instance.foo == 3 49 | assert instance._attrs["foo"] == 3 50 | 51 | # test independence 52 | instance._attrs["test"] = 1 53 | instance_ = new_cls(foo=4, bar=File("asdfl")) 54 | assert "test" not in instance_._attrs 55 | assert instance_.foo == 4 56 | assert instance.foo == 3 57 | 58 | assert tuple(instance._files.keys()) == ("bar",) 59 | assert tuple(instance._attrs.keys()) == ("foo", "baz", "test") 60 | assert tuple(instance._serial.keys()) == ("bam", "bao", "bap") 61 | assert type(instance._serial["bap"]) is list 62 | assert len(instance._serial["bap"]) == 2 63 | assert len(instance._geoms) == 2 64 | assert "baq" in instance._geoms 65 | assert "bas" in instance._geoms 66 | 67 | # serialization/deserialization of 'complex' Test instance 68 | json_dump = psiflow.serialize(instance).result() 69 | instance_ = psiflow.deserialize(json_dump, custom_cls=[new_cls, SomeSerial]) 70 | 71 | assert instance.foo == instance_.foo 72 | assert instance.bar.filepath == instance_.bar.filepath 73 | assert instance.baz == instance_.baz 74 | assert instance.bam == instance_.bam 75 | assert type(instance_.bao) is SomeSerial 76 | assert len(instance_.bap) == 2 77 | assert type(instance_.bap[0]) is SomeSerial 78 | assert type(instance_.bap[1]) is SomeSerial 79 | assert id(instance) != id(instance_) 80 | assert isinstance(instance_.baq, Geometry) 81 | assert instance_.baq == NullState 82 | assert instance_.bas == NullState 83 | 84 | # check classes created before test execution, e.g. Dataset 85 | data = Dataset([NullState]) 86 | assert "extxyz" in data._files 87 | assert len(data._attrs) == 0 88 | assert len(data._serial) == 0 89 | with pytest.raises(typeguard.TypeCheckError): # try something stupid 90 | data.extxyz = 0 91 | 92 | # test getter / setter 93 | data.extxyz = File("some_file") 94 | assert type(data.extxyz) is File 95 | 96 | # test basic serialization 97 | dumped_json = psiflow.serialize(data).result() 98 | assert "Dataset" in dumped_json 99 | data_dict = json.loads(dumped_json) 100 | assert len(data_dict["Dataset"]["_attrs"]) == 0 101 | assert len(data_dict["Dataset"]["_serial"]) == 0 102 | assert len(data_dict["Dataset"]["_files"]) == 1 103 | assert data_dict["Dataset"]["_files"]["extxyz"] == data.extxyz.filepath 104 | 105 | # test copy_to serialization 106 | data = Dataset([NullState]) 107 | data.extxyz.result() 108 | filename = Path(data.extxyz.filepath).name 109 | assert os.path.exists(data.extxyz.filepath) 110 | dumped_json = psiflow.serialize(data, copy_to=tmp_path / "test").result() 111 | os.remove(data.extxyz.filepath) 112 | assert (tmp_path / "test").exists() 113 | assert (tmp_path / "test" / filename).exists() # new file 114 | --------------------------------------------------------------------------------