├── .github
    ├── threadpool.yaml
    └── workflows
    │   └── run_pytest.yaml
├── .gitignore
├── Dockerfile
├── Dockerfile.cp2k
├── Dockerfile.gpaw
├── LICENSE
├── README.md
├── build_containers.sh
├── configs
    ├── hortense.yaml
    ├── lumi.yaml
    ├── threadpool.yaml
    └── wq.yaml
├── docs
    ├── api_example.png
    ├── configuration.md
    ├── data.md
    ├── free_energy.md
    ├── hamiltonian.md
    ├── hamiltonians_umbrella.svg
    ├── icon.svg
    ├── index.md
    ├── install.sh
    ├── installation.md
    ├── learning.md
    ├── logo_dark.png
    ├── logo_light.png
    ├── models.md
    ├── overview.png
    ├── reference.md
    ├── sampling.md
    └── wandb.png
├── examples
    ├── README.md
    ├── alanine_replica_exchange.py
    ├── data
    │   ├── acetaldehyde.xyz
    │   ├── ani500k_cc_cpu.model
    │   ├── cp2k_input.txt
    │   ├── h2o_32.xyz
    │   ├── vinyl_alcohol.xyz
    │   └── water_train.xyz
    ├── h2_static_dynamic.py
    ├── iron_bulk_modulus.py
    ├── iron_harmonic_fcc_bcc.py
    ├── online_learning_pimd.py
    ├── proton_jump_plumed.py
    ├── submit
    │   ├── hortense.yaml
    │   ├── lumi.yaml
    │   ├── submit_hortense.sh
    │   └── submit_lumi.sh
    ├── water_cp2k_noise.py
    ├── water_online_learning.py
    ├── water_path_integral_md.py
    └── water_train_validate.py
├── mkdocs.yml
├── psiflow
    ├── __init__.py
    ├── config.py
    ├── data
    │   ├── __init__.py
    │   ├── dataset.py
    │   └── utils.py
    ├── execution.py
    ├── free_energy
    │   ├── __init__.py
    │   ├── integration.py
    │   └── phonons.py
    ├── functions.py
    ├── geometry.py
    ├── hamiltonians.py
    ├── learning.py
    ├── metrics.py
    ├── models
    │   ├── __init__.py
    │   ├── _mace.py
    │   ├── mace_utils.py
    │   └── model.py
    ├── order_parameters.py
    ├── reference
    │   ├── __init__.py
    │   ├── _cp2k.py
    │   ├── _dftd3.py
    │   ├── gpaw_.py
    │   ├── orca.py
    │   └── reference.py
    ├── sampling
    │   ├── __init__.py
    │   ├── _ase.py
    │   ├── ase.py
    │   ├── client.py
    │   ├── metadynamics.py
    │   ├── optimize.py
    │   ├── order.py
    │   ├── output.py
    │   ├── sampling.py
    │   ├── server.py
    │   ├── utils.py
    │   └── walker.py
    ├── serialization.py
    └── utils
    │   ├── __init__.py
    │   ├── _plumed.py
    │   ├── apps.py
    │   └── io.py
├── pyproject.toml
└── tests
    ├── conftest.py
    ├── test_data.py
    ├── test_free_energy.py
    ├── test_function.py
    ├── test_learning.py
    ├── test_models.py
    ├── test_reference.py
    ├── test_sampling.py
    └── test_serialization.py


/.github/threadpool.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | parsl_log_level: WARNING
 3 | retries: 0
 4 | ModelEvaluation:
 5 |   max_simulation_time: 0.4
 6 |   gpu: false
 7 |   use_threadpool: true
 8 | ModelTraining:
 9 |   max_training_time: 1
10 |   gpu: true
11 |   use_threadpool: true
12 |   max_workers: 1
13 | CP2K:
14 |   cores_per_worker: 2
15 |   max_evaluation_time: 0.3
16 |   launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -bind-to core -np 2 -env OMP_NUM_THREADS 1 cp2k.psmp'
17 | CP2K_container:
18 |   cores_per_worker: 2
19 |   max_evaluation_time: 0.3
20 |   launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -bind-to core -np 2 -env OMP_NUM_THREADS 1 cp2k.psmp'
21 | GPAW:
22 |   cores_per_worker: 2
23 |   max_evaluation_time: 0.3
24 |   launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/gpaw:24.1 /opt/entry.sh mpirun -np 2 gpaw python /opt/run_gpaw.py'
25 | GPAW_container:
26 |   cores_per_worker: 2
27 |   max_evaluation_time: 0.3
28 |   launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/gpaw:24.1 /opt/entry.sh mpirun -np 2 gpaw python /opt/run_gpaw.py'
29 | ...
30 | 


--------------------------------------------------------------------------------
/.github/workflows/run_pytest.yaml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | run-name: pytest-CI
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | jobs:
 9 |   test-py310:
10 |     runs-on: ubuntu-24.04
11 |     steps:
12 |       - uses: eWaterCycle/setup-apptainer@v2
13 |         with:
14 |           apptainer-version: 1.3.6
15 |       - uses: mamba-org/setup-micromamba@v1
16 |         with:
17 |           micromamba-version: '2.0.5-0'
18 |           environment-name: 'test-env'
19 |           generate-run-shell: true
20 |           create-args: >-
21 |             python=3.10
22 |             ndcctools==7.14.0
23 |             py-plumed
24 |             simple-dftd3
25 |             dftd3-python
26 |             pip
27 |             -c conda-forge
28 |           init-shell: bash
29 |           cache-environment: true
30 |           post-cleanup: 'all'
31 |       - name: Install dependencies and download containers
32 |         shell: micromamba-shell {0}
33 |         run: |
34 |           which pip
35 |           pip install pyfftw colorcet wandb pandas plotly plumed 'numpy<2.0.0'
36 |           pip install --no-cache-dir git+https://github.com/i-pi/i-pi.git@v3.0.0-beta4
37 |           pip install torch==2.5.1
38 |           pip install git+https://github.com/acesuit/MACE.git@v0.3.5
39 |           apptainer exec oras://ghcr.io/molmod/cp2k:2024.1 ls
40 |           apptainer exec oras://ghcr.io/molmod/gpaw:24.1 ls
41 |       - name: Checkout specific commit
42 |         uses: actions/checkout@v4
43 |       - name: Install Psiflow and run tests
44 |         shell: micromamba-shell {0}
45 |         env:
46 |           WANDB_API_KEY: secrets.WANDB_API_KEY
47 |           WANDB_MODE: offline                        # disables WandB server calls
48 |         run: |
49 |           pip install .[dev]
50 |           pip list
51 |           pytest --skip-gpu --psiflow-config=.github/threadpool.yaml
52 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | *.swp
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | pip-wheel-metadata/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | *.py,cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # pipenv
 89 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 90 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 91 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 92 | #   install all needed dependencies.
 93 | #Pipfile.lock
 94 | 
 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 96 | __pypackages__/
 97 | 
 98 | # Celery stuff
 99 | celerybeat-schedule
100 | celerybeat.pid
101 | 
102 | # SageMath parsed files
103 | *.sage.py
104 | 
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | 
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 | 
129 | # Pyre type checker
130 | .pyre/
131 | 
132 | pytest-tmp/
133 | wandb/
134 | 
135 | # psiflow internal and its symlinks
136 | psiflow_internal/
137 | psiflow_log
138 | psiflow_submit_scripts
139 | psiflow_task_logs
140 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | ENV TZ=Etc/UTC
 5 | 
 6 | RUN apt-get update && apt-get install -y \
 7 |     python3.10 \
 8 |     python3.10-venv \
 9 |     python3.10-dev \
10 |     build-essential \
11 |     cmake \
12 |     wget \
13 |     git \
14 |     zlib1g-dev \
15 |     libssl-dev \
16 |     libcurl4-openssl-dev \
17 |     libgsl-dev \
18 |     perl \
19 |     fftw3 \
20 |     && apt-get clean \
21 |     && rm -rf /var/lib/apt/lists/*
22 | 
23 | RUN python3.10 -m venv /opt/venv \
24 |     && /opt/venv/bin/pip install --upgrade pip
25 | 
26 | # Install PLUMED
27 | ARG PLUMED_VERSION
28 | RUN wget https://github.com/plumed/plumed2/archive/refs/tags/v${PLUMED_VERSION}.tar.gz \
29 |     && tar -xzf v${PLUMED_VERSION}.tar.gz \
30 |     && cd plumed2-${PLUMED_VERSION} \
31 |     && ./configure --prefix=/usr/local/plumed \
32 |     && make -j$(nproc) \
33 |     && make install \
34 |     && cd .. \
35 |     && rm -rf plumed2-${PLUMED_VERSION} v${PLUMED_VERSION}.tar.gz
36 | 
37 | # Ensure cctools can find the Python environment
38 | ENV PYTHONPATH="/opt/venv/lib/python3.10/site-packages:$PYTHONPATH"
39 | ENV PATH="/opt/venv/bin:$PATH"
40 | 
41 | # Install cctools
42 | ARG CCTOOLS_VERSION
43 | RUN wget https://github.com/cooperative-computing-lab/cctools/archive/refs/tags/release/${CCTOOLS_VERSION}.tar.gz \
44 |     && tar -xzf ${CCTOOLS_VERSION}.tar.gz \
45 |     && cd cctools-release-${CCTOOLS_VERSION} \
46 |     && ./configure --prefix=/usr/local/cctools \
47 |     && make -j$(nproc) \
48 |     && make install \
49 |     && cd .. \
50 |     && rm -rf cctools-release-${CCTOOLS_VERSION} ${CCTOOLS_VERSION}.tar.gz
51 | 
52 | # Set environment variables for PLUMED and cctools
53 | ENV PATH="/usr/local/plumed/bin:/usr/local/cctools/bin:$PATH"
54 | ENV LD_LIBRARY_PATH="/usr/local/plumed/lib:/usr/local/cctools/lib:$LD_LIBRARY_PATH"
55 | 
56 | ARG PSIFLOW_VERSION
57 | ARG PARSL_VERSION
58 | ARG GPU_LIBRARY
59 | RUN /bin/bash -c -o pipefail \
60 |     "source /opt/venv/bin/activate && \
61 |      pip install --no-cache-dir pyfftw colorcet wandb pandas plotly plumed 'numpy<2.0.0' && \
62 |      pip install --no-cache-dir git+https://github.com/i-pi/i-pi.git@v3.0.0-beta4 && \
63 |      pip install --no-cache-dir torch>=2.5 --index-url https://download.pytorch.org/whl/${GPU_LIBRARY} && \
64 |      pip install --no-cache-dir git+https://github.com/acesuit/mace.git@v0.3.5"
65 | ARG DATE
66 | RUN /bin/bash -c -o pipefail \
67 |      "pip install --no-cache-dir git+https://github.com/molmod/psiflow.git@${PSIFLOW_VERSION}"
68 | 
69 | # Set entrypoint
70 | RUN echo '#!/bin/bash' >> /opt/entry.sh && \
71 |     echo 'source /opt/venv/bin/activate' >> /opt/entry.sh && \
72 |     echo 'export PLUMED_KERNEL=/usr/local/plumed/lib/libplumedKernel.so' >> /opt/entry.sh && \
73 |     echo '"$@"' >> /opt/entry.sh
74 | RUN chmod +x /opt/entry.sh
75 | ENTRYPOINT ["/opt/entry.sh"]
76 | 
77 | # Default command
78 | CMD ["bash"]
79 | 


--------------------------------------------------------------------------------
/Dockerfile.cp2k:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file was created by generate_docker_files.py
  3 | #
  4 | # Usage: docker build -f ./2024.1_mpich_generic_psmp.Dockerfile -t cp2k/cp2k:2024.1_mpich_generic_psmp .
  5 | 
  6 | # Stage 1: build step
  7 | FROM ubuntu:22.04 AS build
  8 | 
  9 | 
 10 | # Install packages required for the CP2K toolchain build
 11 | RUN apt-get update -qq && apt-get install -qq --no-install-recommends \
 12 |     g++ gcc gfortran libmpich-dev mpich openssh-client python3 \
 13 |     bzip2 ca-certificates git make patch pkg-config unzip wget zlib1g-dev
 14 | 
 15 | # Download CP2K
 16 | RUN git clone --recursive -b support/v2024.1 https://github.com/cp2k/cp2k.git /opt/cp2k
 17 | 
 18 | # Build CP2K toolchain for target CPU generic
 19 | WORKDIR /opt/cp2k/tools/toolchain
 20 | RUN /bin/bash -c -o pipefail \
 21 |     "./install_cp2k_toolchain.sh -j 8 \
 22 |      --install-all \
 23 |      --enable-cuda=no \
 24 |      --target-cpu=generic \
 25 |      --with-cusolvermp=no \
 26 |      --with-gcc=system \
 27 |      --with-mpich=system"
 28 | 
 29 | # Build CP2K for target CPU generic
 30 | WORKDIR /opt/cp2k
 31 | RUN /bin/bash -c -o pipefail \
 32 |     "cp ./tools/toolchain/install/arch/local.psmp ./arch/; \
 33 |      source ./tools/toolchain/install/setup; \
 34 |      make -j 8 ARCH=local VERSION=psmp"
 35 | 
 36 | # Collect components for installation and remove symbolic links
 37 | RUN /bin/bash -c -o pipefail \
 38 |     "mkdir -p /toolchain/install /toolchain/scripts; \
 39 |      for libdir in \$(ldd ./exe/local/cp2k.psmp | \
 40 |                       grep /opt/cp2k/tools/toolchain/install | \
 41 |                       awk '{print \$3}' | cut -d/ -f7 | \
 42 |                       sort | uniq) setup; do \
 43 |         cp -ar /opt/cp2k/tools/toolchain/install/\${libdir} /toolchain/install; \
 44 |      done; \
 45 |      cp /opt/cp2k/tools/toolchain/scripts/tool_kit.sh /toolchain/scripts; \
 46 |      unlink ./exe/local/cp2k.popt; \
 47 |      unlink ./exe/local/cp2k_shell.psmp"
 48 | 
 49 | # Stage 2: install step
 50 | FROM ubuntu:22.04 AS install
 51 | 
 52 | # Install required packages
 53 | RUN apt-get update -qq && apt-get install -qq --no-install-recommends \
 54 |     g++ gcc gfortran libmpich-dev mpich openssh-client python3 && rm -rf /var/lib/apt/lists/*
 55 | 
 56 | # Install CP2K binaries
 57 | COPY --from=build /opt/cp2k/exe/local/ /opt/cp2k/exe/local/
 58 | 
 59 | # Install CP2K regression tests
 60 | COPY --from=build /opt/cp2k/tests/ /opt/cp2k/tests/
 61 | COPY --from=build /opt/cp2k/tools/regtesting/ /opt/cp2k/tools/regtesting/
 62 | COPY --from=build /opt/cp2k/src/grid/sample_tasks/ /opt/cp2k/src/grid/sample_tasks/
 63 | 
 64 | # Install CP2K database files
 65 | COPY --from=build /opt/cp2k/data/ /opt/cp2k/data/
 66 | 
 67 | # Install shared libraries required by the CP2K binaries
 68 | COPY --from=build /toolchain/ /opt/cp2k/tools/toolchain/
 69 | 
 70 | # Create links to CP2K binaries
 71 | RUN /bin/bash -c -o pipefail \
 72 |     "for binary in cp2k dumpdcd graph xyz2dcd; do \
 73 |         ln -sf /opt/cp2k/exe/local/\${binary}.psmp \
 74 |                /usr/local/bin/\${binary}; \
 75 |      done; \
 76 |      ln -sf /opt/cp2k/exe/local/cp2k.psmp \
 77 |             /usr/local/bin/cp2k_shell; \
 78 |      ln -sf /opt/cp2k/exe/local/cp2k.psmp \
 79 |             /usr/local/bin/cp2k.popt"
 80 | 
 81 | # Create entrypoint script file
 82 | RUN printf "#!/bin/bash\n\
 83 | ulimit -c 0 -s unlimited\n\
 84 | \
 85 | export OMP_STACKSIZE=16M\n\
 86 | export OMP_NUM_THREADS=1\n\
 87 | export PATH=/opt/cp2k/exe/local:\${PATH}\n\
 88 | source /opt/cp2k/tools/toolchain/install/setup\n\
 89 | if [ -n "\${MEMORY_LIMIT}" ]; then\n\
 90 |   ulimit -v \${MEMORY_LIMIT}\n\
 91 | fi\n\
 92 | \n\
 93 | \"\$@\"\n" \
 94 | >/opt/entry.sh && chmod 755 /opt/entry.sh
 95 | 
 96 | # Create shortcut for regression test
 97 | RUN printf "/opt/cp2k/tests/do_regtest.py --maxtasks 8 --workbasedir /mnt \$* local psmp" \
 98 | >/usr/local/bin/run_tests && chmod 755 /usr/local/bin/run_tests
 99 | 
100 | # Define entrypoint
101 | WORKDIR /mnt
102 | ENTRYPOINT ["/opt/entry.sh"]
103 | CMD ["cp2k", "--help"]
104 | 
105 | # Label docker image
106 | LABEL author="CP2K Developers" \
107 |       cp2k_version="2024.1" \
108 |       dockerfile_generator_version="0.2"
109 | 
110 | # EOF
111 | 


--------------------------------------------------------------------------------
/Dockerfile.gpaw:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | ENV TZ=Etc/UTC
 5 | 
 6 | RUN apt-get update && apt-get install -y \
 7 |     python3.10 \
 8 |     python3.10-venv \
 9 |     python3.10-dev \
10 |     build-essential \
11 |     autoconf \
12 |     automake \
13 |     libtool \
14 |     make \
15 |     cmake \
16 |     wget \
17 |     git \
18 |     libopenblas-dev \
19 |     libfftw3-dev \
20 |     libopenmpi-dev \
21 |     openmpi-bin \
22 |     libscalapack-mpi-dev \
23 |     libelpa-dev \
24 |     libomp-dev \
25 |     && apt-get clean \
26 |     && rm -rf /var/lib/apt/lists/*
27 | 
28 | RUN python3.10 -m venv /opt/venv \
29 |     && /opt/venv/bin/pip install --upgrade pip
30 | 
31 | RUN /bin/bash -c -o pipefail \
32 |     "source /opt/venv/bin/activate && \
33 |      pip install --no-cache-dir numpy && \
34 |      pip install --no-cache-dir git+https://gitlab.com/ase/ase"
35 | 
36 | # install libxc
37 | RUN cd /opt && \
38 |     wget https://gitlab.com/libxc/libxc/-/archive/6.2.2/libxc-6.2.2.tar.bz2 -O libxc-6.2.2.tar.bz2 && \
39 |     tar -xvf libxc-6.2.2.tar.bz2 && \
40 |     cd libxc-6.2.2 && \
41 |     autoreconf -i && \
42 |     ./configure --prefix=/usr/local CFLAGS="-fPIC" && \
43 |     make -j$(nproc) && \
44 |     make install && \
45 |     ldconfig
46 | 
47 | # install GPAW
48 | WORKDIR /opt/gpaw
49 | ENV GPAW_CONFIG=/opt/gpaw/siteconfig.py
50 | RUN echo "scalapack = True" >> siteconfig.py \
51 |     && echo "fftw = True" >> siteconfig.py \
52 |     && echo "elpa = True" >> siteconfig.py \
53 |     && echo "libraries = ['openblas', 'fftw3', 'scalapack-openmpi', 'elpa', 'omp5']" >> siteconfig.py \
54 |     && echo "library_dirs = ['/usr/lib', '/usr/local/lib']" >> siteconfig.py \
55 |     && echo "extra_link_args = ['/usr/local/lib/libxc.a']" >> siteconfig.py \
56 |     && echo "include_dirs = ['/usr/include', '/usr/local/include', '/usr/include/elpa']" >> siteconfig.py
57 | RUN /bin/bash -c -o pipefail "source /opt/venv/bin/activate && pip install --no-cache-dir gpaw dftd3"
58 | RUN mkdir /opt/gpaw-data
59 | RUN /bin/bash -c -o pipefail "source /opt/venv/bin/activate && yes | gpaw install-data /opt/gpaw-data" || true
60 | 
61 | ARG PSIFLOW_VERSION
62 | RUN /bin/bash -c -o pipefail \
63 |     "source /opt/venv/bin/activate && pip install --no-cache-dir git+https://github.com/molmod/psiflow.git@${PSIFLOW_VERSION}"
64 | RUN ln -s /opt/venv/lib/python3.10/site-packages/psiflow/reference/gpaw_.py /opt/run_gpaw.py 
65 | 
66 | # Create entrypoint script
67 | RUN echo '#!/bin/bash' >> /opt/entry.sh && \
68 |     echo 'source /opt/venv/bin/activate' >> /opt/entry.sh && \
69 |     echo 'export LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}"' >> /opt/entry.sh && \
70 |     echo 'export GPAW_SETUP_PATH="/opt/gpaw-data/gpaw-setups-24.1.0"' >> /opt/entry.sh && \
71 |     echo '"$@"' >> /opt/entry.sh
72 | RUN chmod +x /opt/entry.sh
73 | 
74 | # libxc needed at runtime
75 | ENV LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}"
76 | ENTRYPOINT ["/opt/entry.sh"]
77 | 
78 | 
79 | # Default command
80 | CMD ["bash"]
81 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022-2025 Ghent University
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![psiflow](./docs/logo_light.png#gh-light-mode-only)
  2 | ![psiflow](./docs/logo_dark.png#gh-dark-mode-only)
  3 | 
  4 | 
  5 | ![License](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat-square)
  6 | [![Docs](https://flat.badgen.net/static/docs/passing/green)](https://molmod.github.io/psiflow)
  7 | [![Build Status](https://img.shields.io/endpoint.svg?url=https%3A%2F%2Factions-badge.atrox.dev%2Fmolmod%2Fpsiflow%2Fbadge%3Fref%3Dmain&style=flat-square)](https://actions-badge.atrox.dev/molmod/psiflow/goto?ref=main)
  8 | ![Python](https://flat.badgen.net/static/python/3.10%20|%203.11/blue)
  9 | ![Code style](https://flat.badgen.net/static/code%20style/black/black)
 10 | [![DOI](https://flat.badgen.net/static/DOI/10.1038%2Fs41524-023-00969-x)](https://www.nature.com/articles/s41524-023-00969-x)
 11 | 
 12 | 
 13 | # Scalable Molecular Simulation
 14 | 
 15 | Psiflow is a scalable molecular simulation engine for chemistry and materials science applications.
 16 | It supports:
 17 | - **quantum mechanical calculations** at various levels of theory (GGA and hybrid DFT, post-HF methods such as MP2 or RPA, and even coupled cluster; using CP2K|GPAW|ORCA)
 18 | - **trainable interaction potentials** as well as easy-to-use universal potentials, e.g. [MACE-MP0](https://arxiv.org/abs/2401.00096)
 19 | - a wide range of **sampling algorithms**: NVE | NVT | NPT, path-integral molecular dynamics, alchemical replica exchange, metadynamics, phonon-based sampling, thermodynamic integration; using [i-PI](https://ipi-code.org/),
 20 | [PLUMED](https://www.plumed.org/), ... 
 21 | 
 22 | Users may define arbitrarily complex workflows and execute them **automatically** on local, HPC, and/or cloud infrastructure.
 23 | To achieve this, psiflow is built using [Parsl](https://parsl-project.org/): a parallel execution library which manages job submission and workload distribution.
 24 | As such, psiflow can orchestrate large molecular simulation pipelines on hundreds or even thousands of nodes.
 25 | 
 26 | 
 27 | <p align="center">
 28 | <img src="https://github.com/molmod/psiflow/blob/main/docs/overview.png" width="500" class="center">
 29 | </p>
 30 | 
 31 | # Setup
 32 | 
 33 | Use the following one-liner to create a lightweight [micromamba](https://mamba.readthedocs.io/en/latest/user_guide/micromamba.html) Python environment with all dependencies readily available:
 34 | ```sh
 35 | curl -L molmod.github.io/psiflow/install.sh | bash
 36 | ```
 37 | The environment can be activated by sourcing the `activate.sh` file which will be created in the current working directory.
 38 | Next, create a `config.yaml` file which defines the compute resources. For SLURM-based HPC systems, psiflow can initialize your configuration automatically via the following command:
 39 | ```sh
 40 | python -c 'import psiflow; psiflow.setup_slurm_config()'
 41 | ```
 42 | Example configuration files for [LUMI](https://lumi-supercomputer.eu/), [MeluXina](https://luxembourg.public.lu/en/invest/innovation/meluxina-supercomputer.html), or [VSC](https://www.vscentrum.be/) can be found [here](https://github.com/molmod/psiflow/tree/main/configs).
 43 | No additional software compilation is required since all of the heavy lifting (CP2K/ORCA/GPAW, PyTorch model training, i-PI dynamics) is executed within preconfigured [Apptainer](https://apptainer.org/)/[Singularity](https://sylabs.io/singularity/) containers which are production-ready for most HPCs.
 44 | 
 45 | That's it! Contrary to frameworks like pyiron or aiida, psiflow does not require any databases or web servers.
 46 | The only requirement is that you set up a Python environment and provide a `config.yaml`.
 47 | 
 48 | [**EXAMPLES**](https://github.com/molmod/psiflow/tree/main/examples)
 49 | 
 50 | <img src="https://github.com/molmod/psiflow/blob/main/docs/api_example.png" width="1000" class="center">
 51 | 
 52 | 
 53 | # FAQ
 54 | 
 55 | **Where do I start?**
 56 | 
 57 | Take a brief look at the [examples](https://github.com/molmod/psiflow/tree/main/examples) or the
 58 | [documentation](https://molmod.github.io/psiflow) to get an idea for psiflow's
 59 | capabilities. Next, head over to the [setup & configuration](https://molmod.github.io/psiflow/configuration/) section of the docs to get started!
 60 | 
 61 | **Is psiflow a workflow manager?**
 62 | 
 63 | Absolutely not! Psiflow is a Python library which allows you to perform complex molecular simulations and scale them towards large numbers of compute nodes automatically.
 64 | It does not have 'fixed' workflow recipes, it does not require you to set up 'databases'
 65 | or 'server daemons'. The only thing it does is expose a concise and powerful API to
 66 | perform arbitrarily complex calculations in a highly efficiently manner.
 67 | 
 68 | **Is it compatible with my cluster?**
 69 | 
 70 | Most likely yes. Check which resource scheduling system your cluster uses (probably either
 71 | SLURM/PBSPro/SGE). If you're not sure, ask your system administrators or open an issue
 72 | 
 73 | **Can I use VASP with it?**
 74 | 
 75 | You cannot automate VASP calculations with it, but in 83% of cases there is either no need
 76 | to use VASP, or it's very easy to quickly perform the VASP part manually, outside of psiflow,
 77 | and do everything else (data generation, ML potential training, sampling) with psiflow.
 78 | Open an issue if you're not sure how to do this.
 79 | 
 80 | **I would like to have feature X**
 81 | 
 82 | Psiflow is continuously in development; if you're missing a feature feel free to open an
 83 | issue or pull request!
 84 | 
 85 | **I have a bug. Where is my error message and how do I solve it?**
 86 | 
 87 | Psiflow covers essentially all major aspects of computational molecular simulation (most
 88 | notably including the executation and parallelization), so there's bound to be some bug
 89 | once in a while. Debugging can be challenging, and we recommend to follow the following steps in
 90 | order:
 91 | 
 92 | 1. Check the stderr/stdout of the main Python process (i.e. the `python main.py
 93 |    config.yaml` one). See if there are any clues. If it has contents which you don't
 94 |    understand, open an issue. If there's seemingly nothing there, go to step 2.
 95 | 2. Check Parsl's log file. This can be found in the current working directory, under
 96 |    `psiflow_internal/parsl.log`. If it's a long file, search for any errors using `Error`
 97 |    or `ERROR`. If you find anything suspicious but do not know how to solve it,
 98 |    open an issue.
 99 | 3. Check the output files of individual ML training, QM singlepoints, or i-PI molecular
100 |    dynamics runs. These can be found under `psiflow_internal/000/task_logs/*`.
101 |    Again, if you find an error but do not exactly know why it happens or how to solve it,
102 |    feel free to open an issue. Most likely, it will be useful to other people as well
103 | 4. Check the actual 'jobscripts' that were generated and which were submitted to the
104 |    cluster. Quite often, there can be a spelling mistake in e.g. the compute project you
105 |    are using, or you are requesting a resource on a partition that is not available.
106 |    These jobscripts (and their output and error) can be found under
107 |    `psiflow_internal/000/submit_scripts/`.
108 | 
109 | **Where do these container images come from?**
110 | 
111 | They were generated using Docker based on the recipes in this repository, and were then
112 | converted to `.sif` format using `apptainer`
113 | 
114 | **Can I run psiflow locally for small runs or debug purposes?**
115 | 
116 | Of course! If you do not provide a `config.yaml`, psiflow will just use your local
117 | workstation for its execution. See e.g. [this](https://github.com/molmod/psiflow/blob/main/configs/threadpool.yaml) or [this](https://github.com/molmod/psiflow/blob/main/configs/wq.yaml) config used for testing.
118 | 


--------------------------------------------------------------------------------
/build_containers.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e # exit upon failure
 4 | 
 5 | if [ "$EUID" -ne 0 ]; then
 6 | 	echo "Please run this script as root or with sudo."
 7 | 	exit 1
 8 | fi
 9 | 
10 | # Initialize flags
11 | psiflow=false
12 | gpaw=false
13 | cp2k=false
14 | build_sif=false
15 | #mpi=mpich
16 | 
17 | # Parse command line options
18 | while [[ $# -gt 0 ]]; do
19 | 	case "$1" in
20 | 	--gpaw)
21 | 		gpaw=true
22 | 		shift # Shift to next argument
23 | 		;;
24 | 	--cp2k)
25 | 		cp2k=true
26 | 		shift
27 | 		;;
28 | 	--psiflow)
29 | 		psiflow=true
30 | 		shift
31 | 		;;
32 | 	--build_sif)
33 | 		build_sif=true
34 | 		shift
35 | 		;;
36 | 	*)
37 | 		echo "Unknown option: $1"
38 | 		exit 1
39 | 		;;
40 | 	esac
41 | done
42 | 
43 | PSIFLOW_VERSION="v4.0.0"
44 | CCTOOLS_VERSION=7.14.0
45 | PLUMED_VERSION=2.9.0
46 | GPU_LIBRARIES=("rocm6.2" "cu118")
47 | 
48 | # build model
49 | if [ "$psiflow" = "true" ]; then
50 | 	for GPU_LIBRARY in "${GPU_LIBRARIES[@]}"; do
51 | 		TAG="psiflow:${PSIFLOW_VERSION}_${GPU_LIBRARY}"
52 | 		docker build \
53 | 			--build-arg GPU_LIBRARY=${GPU_LIBRARY} \
54 | 			--build-arg PARSL_VERSION=$PARSL_VERSION \
55 | 			--build-arg PSIFLOW_VERSION=$PSIFLOW_VERSION \
56 | 			--build-arg CCTOOLS_VERSION=$CCTOOLS_VERSION \
57 | 			--build-arg PLUMED_VERSION=$PLUMED_VERSION \
58 | 			--build-arg DATE=$(date +%s) \
59 | 			-t ghcr.io/molmod/$TAG \
60 | 			-f Dockerfile . # test
61 | 		if [ "$build_sif" = "true" ]; then
62 | 			export TMPDIR=$(pwd)/tmp
63 | 			mkdir -p $TMPDIR
64 | 			apptainer build -F $TAG.sif docker-daemon:ghcr.io/molmod/$TAG
65 | 			apptainer push $TAG.sif oras://ghcr.io/molmod/$TAG
66 | 			rm $TAG.sif
67 | 			rm -rf $TMPDIR
68 | 		fi
69 | 	done
70 | fi
71 | 
72 | if [ "$cp2k" = "true" ]; then
73 | 	TAG="cp2k:2024.1"
74 | 	docker build \
75 | 		-t ghcr.io/molmod/$TAG \
76 | 		-f Dockerfile.cp2k .
77 | 	if [ "$build_sif" = "true" ]; then
78 | 		apptainer build -F $TAG.sif docker-daemon:ghcr.io/molmod/$TAG
79 | 		apptainer push $TAG.sif oras://ghcr.io/molmod/$TAG
80 | 		rm $TAG.sif
81 | 	fi
82 | fi
83 | 
84 | if [ "$gpaw" = "true" ]; then
85 | 	TAG="gpaw:24.1"
86 | 	sudo docker build \
87 | 		--build-arg PSIFLOW_VERSION=$PSIFLOW_VERSION \
88 | 		-t ghcr.io/molmod/$TAG \
89 | 		-f Dockerfile.gpaw .
90 | 	if [ "$build_sif" = "true" ]; then
91 | 		apptainer build -F $TAG.sif docker-daemon:ghcr.io/molmod/$TAG
92 | 		apptainer push $TAG.sif oras://ghcr.io/molmod/$TAG
93 | 		rm $TAG.sif
94 | 	fi
95 | fi
96 | 


--------------------------------------------------------------------------------
/configs/hortense.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | parsl_log_level: WARNING
 3 | container_engine: 'apptainer'
 4 | container_uri: 'oras://ghcr.io/molmod/psiflow:4.0.0_cu118'
 5 | default_threads: 8
 6 | ModelEvaluation:
 7 |   cores_per_worker: 12
 8 |   gpu: True
 9 |   max_simulation_time: 20
10 |   slurm:
11 |     partition: "gpu_rome_a100"
12 |     account: "2023_070"
13 |     nodes_per_block: 1
14 |     cores_per_node: 48
15 |     max_blocks: 1
16 |     walltime: "12:00:00"
17 |     scheduler_options: "#SBATCH --clusters=dodrio\n#SBATCH --gpus=4\n"
18 | ModelTraining:
19 |   cores_per_worker: 12
20 |   gpu: true
21 |   max_training_time: 40
22 |   slurm:
23 |     partition: "gpu_rome_a100"
24 |     account: "2023_070"
25 |     nodes_per_block: 1
26 |     cores_per_node: 12
27 |     max_blocks: 1
28 |     walltime: "12:00:00"
29 |     scheduler_options: "#SBATCH --clusters=dodrio\n#SBATCH --gpus=1\n"
30 | CP2K:
31 |   cores_per_worker: 64
32 |   max_evaluation_time: 30
33 |   launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -np 32 -bind-to core cp2k.psmp'
34 |   slurm:
35 |     partition: "cpu_rome"
36 |     account: "2024_079"
37 |     nodes_per_block: 1
38 |     cores_per_node: 64
39 |     max_blocks: 25
40 |     walltime: "06:00:00"
41 |     scheduler_options: "#SBATCH --clusters=dodrio\n"
42 | ...
43 | 


--------------------------------------------------------------------------------
/configs/lumi.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | parsl_log_level: WARNING
 3 | container_engine: 'singularity'
 4 | container_uri: 'oras://ghcr.io/molmod/psiflow:4.0.0_rocm6.2'
 5 | default_threads: 8
 6 | CP2K:
 7 |   cores_per_worker: 32
 8 |   max_evaluation_time: 20
 9 |   launch_command: 'singularity exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -np 32 cp2k.psmp'
10 |   slurm:
11 |     partition: "standard"
12 |     account: "project_465001125"
13 |     nodes_per_block: 1
14 |     cores_per_node: 128
15 |     max_blocks: 10
16 |     walltime: "01:00:00"
17 | ModelEvaluation:
18 |   cores_per_worker: 7
19 |   gpu: True
20 |   slurm:
21 |     partition: "standard-g"
22 |     account: "project_465001125"
23 |     nodes_per_block: 1
24 |     cores_per_node: 56
25 |     max_blocks: 10
26 |     walltime: "01:00:00"
27 |     scheduler_options: "#SBATCH --gres=gpu:8\n"
28 | ModelTraining:
29 |   cores_per_worker: 7
30 |   gpu: true
31 |   multigpu: true
32 |   slurm:
33 |     partition: "standard-g"
34 |     account: "project_465001125"
35 |     nodes_per_block: 1
36 |     cores_per_node: 56
37 |     walltime: "01:00:00"
38 |     scheduler_options: "#SBATCH --gres=gpu:8\n"
39 | ...
40 | 


--------------------------------------------------------------------------------
/configs/threadpool.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | parsl_log_level: WARNING
 3 | retries: 0
 4 | ModelEvaluation:
 5 |   gpu: false
 6 |   use_threadpool: true
 7 |   max_simulation_time: 0.4
 8 | ModelTraining:
 9 |   gpu: true
10 |   use_threadpool: true
11 |   max_training_time: 1
12 |   max_workers: 1  # suppress assertion for multigpu training
13 | CP2K:
14 |   cores_per_worker: 2
15 |   max_evaluation_time: 0.3
16 |   launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -bind-to core -np 2 -env OMP_NUM_THREADS 1 cp2k.psmp'
17 | CP2K_container:
18 |   cores_per_worker: 2
19 |   max_evaluation_time: 0.3
20 |   launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -bind-to core -np 2 -env OMP_NUM_THREADS 1 cp2k.psmp'
21 | GPAW:
22 |   cores_per_worker: 2
23 |   max_evaluation_time: 0.3
24 |   launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/gpaw:24.1 /opt/entry.sh mpirun -np 2 gpaw python /opt/run_gpaw.py'
25 | GPAW_container:
26 |   cores_per_worker: 2
27 |   max_evaluation_time: 0.3
28 |   launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/gpaw:24.1 /opt/entry.sh mpirun -np 2 gpaw python /opt/run_gpaw.py'
29 | ...
30 | 


--------------------------------------------------------------------------------
/configs/wq.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | parsl_log_level: WARNING
 3 | default_threads: 4
 4 | ModelEvaluation:
 5 |   cores_per_worker: 4
 6 |   gpu: True
 7 |   max_simulation_time: 0.4
 8 | ModelTraining:
 9 |   cores_per_worker: 4
10 |   gpu: true
11 |   max_training_time: 1
12 |   max_workers: 1
13 | CP2K:
14 |   cores_per_worker: 2
15 |   max_evaluation_time: 0.3
16 |   launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2023.2 /opt/entry.sh mpirun -np 2 -x OMP_NUM_THREADS=1 cp2k.psmp'
17 | ...
18 | 


--------------------------------------------------------------------------------
/docs/api_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/docs/api_example.png


--------------------------------------------------------------------------------
/docs/free_energy.md:
--------------------------------------------------------------------------------
1 | TODO
2 | 


--------------------------------------------------------------------------------
/docs/hamiltonian.md:
--------------------------------------------------------------------------------
  1 | In Born-Oppenheimer-based molecular simulation, atomic nuclei are treated as classical
  2 | particles that are subject to *effective* interactions -- these are the result of the quantum
  3 | mechanical behavior of the electrons. These interactions determine the interatomic forces
  4 | which are used in a dynamic simulation to propagate the atomic positions from one timestep
  5 | to the next.
  6 | Traditionally, dynamic simulations required an explicit evaluation of these effective
  7 | forces in terms of a quantum mechanical calculation (e.g. DFT(B)).
  8 | Recently, it became clear that it is much more efficient to perform such simulations
  9 | using a machine-learned representation of the interaction energy, i.e. an ML potential. 
 10 | The development and application of ML potentials throughout large simulation workflows is in
 11 | fact one of the core applications of psiflow.
 12 | 
 13 | The `Hamiltonian` class is used to represent any type of interaction potential.
 14 | Examples are pre-trained, 'universal' models (e.g. [MACE-MP0](https://arxiv.org/abs/2401.00096)),
 15 | ML potentials trained within psiflow (see [ML potentials](model.md)), or a quadratic
 16 | (hessian-based) approximation to a local energy minimum, to name a few.
 17 | In addition, various sampling schemes employ bias potentials which are superimposed on the
 18 | QM-based Born-Oppenheimer surface in order to drive the system
 19 | along specific reaction coordinates (e.g. metadynamics, umbrella sampling).
 20 | Such bias potentials are also instances of a `Hamiltonian`.
 21 | 
 22 | By far the simplest hamiltonian is the Einstein crystal, which binds atoms to a certain
 23 | reference position using harmonic springs with a single, fixed force constant.
 24 | 
 25 | ```py
 26 | from psiflow.geometry import Geometry
 27 | from psiflow.hamiltonians import EinsteinCrystal
 28 | 
 29 | 
 30 | # isolated H2 molecule
 31 | geometry = Geometry.from_string('''
 32 |     2
 33 |     H 0.0 0.0 0.0
 34 |     H 0.0 0.0 0.8
 35 | ''')
 36 | 
 37 | einstein = EinsteinCrystal(geometry, force_constant=0.1)  # in eV/A**2
 38 | 
 39 | ```
 40 | As mentioned earlier, the key feature of hamiltonians is that they represent an interaction energy between atoms,
 41 | i.e. they output an energy (and its gradients) when given a geometry as input.
 42 | Because hamiltonians might require specialized resources for their evaluation (e.g. an ML
 43 | potential which gets executed on a GPU), evaluation of a hamiltonian does not necessarily
 44 | happen instantly (e.g. if a GPU node is not immediately available). Similar to how
 45 | `Dataset` instances return futures of a `Geometry` when a particular index is queried,
 46 | hamiltonians return a future when asked to evaluate the energy/forces/stress of a
 47 | particular `Geometry`:
 48 | 
 49 | ```py
 50 | energy = einstein.compute(geometry, 'energy')       # AppFuture of an energy (np.ndarray with shape (1,))
 51 | print(energy.result())                              # wait for the result to complete, and print it (in eV)
 52 | 
 53 | 
 54 | data = Dataset.load('snapshots.xyz')                # N snapshots
 55 | energy, forces, stress = einstein.compute(data)     # returns energy and gradients for each snapshot in data
 56 | 
 57 | 
 58 | assert energy.result().shape == (N,)                # one energy per snapshot
 59 | assert forces.result().shape == (N, max_natoms, 3)  # forces for each snapshot, with padded natoms
 60 | assert stress.result().shape == (N, 3, 3)           # stress; filled with NaNs if not applicable
 61 | ```
 62 | Aside from a dataset or a geometry, `compute` takes the following keyword arguments:
 63 | 
 64 | - **outputs**: (type `str` or `list[str]`]): determines which properties to compute and
 65 |   return. Accepts both a single property name (`'energy'`, `'forces'`, or `'stress'`) or a list
 66 |   of properties (e.g. `['energy', 'forces', 'stress']`.
 67 | - **batch_size**: (type `int`): splits the calculation into batches of this size. For
 68 |   expensive models and/or large datasets, it makes sense to pick a smaller batch size such
 69 |   that the calculation is parallelized over a large number of resources. For a very simple
 70 |   calculation (e.g. the einstein crystal), it is faster to pick a larger batch size in
 71 |   order to reduce overhead due to batching. Its default value is 100.
 72 | 
 73 | A particularly important hamiltonian is MACE, one of the most ubiquitous ML potentials.
 74 | The MACE community has developed a few foundation models (MACE-MP) which are readily applicable to
 75 | virtually any molecule or material:
 76 | 
 77 | ```py
 78 | from psiflow.hamiltonians import MACEHamiltonian
 79 | 
 80 | 
 81 | mace = MACEHamiltonian.mace_mp0()                   # downloads MACE-MP0 from github
 82 | forces = mace.compute(geometry, 'forces')           # evaluates the MACE potential on the geometry
 83 | 
 84 | forces = forces.result()                            # wait for evaluation to complete and get actual value
 85 | 
 86 | assert np.sum(np.dot(forces[0], forces[1])) < 0     # forces in H2 always point opposite of each other
 87 | 
 88 | assert np.allclose(np.sum(forces, axis=0), 0.0)     # forces are conservative --> sum to [0, 0, 0]
 89 | ```
 90 | A unique feature of psiflow `Hamiltonian` instances is the ability to create a new
 91 | hamiltonian from a linear combination of two or more existing hamiltonians.
 92 | This is relevant for many types of free energy calculations and/or enhanced sampling
 93 | techniques, including umbrella sampling, Hamiltonian replica exchange, or thermodynamic
 94 | integration.
 95 | Let us consider the particular example of [umbrella
 96 | sampling](https://wires.onlinelibrary.wiley.com/doi/10.1002/wcms.66).
 97 | As activated event, we consider the decay of vinyl alcohol to acetaldehyde,
 98 | which consists of a proton jump from the oxygen to the opposite carbon:
 99 | 
100 | <figure markdown="span">
101 |   ![Image title](hamiltonians_umbrella.svg){ width="500" }
102 |   <figcaption>Transformation of vinyl alcohol into acetaldehyde by means of a proton jump.
103 |       A reaction coordinate is constructed based on the distance of hydrogen with respect to
104 |       oxygen and with respect to carbon. </figcaption>
105 | </figure>
106 | 
107 | The harmonic restraint is implemented and evaluated via [PLUMED](https://www.plumed.org/).
108 | In psiflow, this can be done by passing a plumed input string which describes the bias
109 | potential into a `PlumedHamiltonian`.
110 | 
111 | ```py
112 | from psiflow.hamiltonians import PlumedHamiltonian
113 | 
114 | plumed_str = """UNITS LENGTH=A ENERGY=kj/mol
115 | d_C: DISTANCE ATOMS=3,5
116 | d_O: DISTANCE ATOMS=1,5
117 | CV: COMBINE ARG=d_C,d_O COEFFICIENTS=1,-1 PERIODIC=NO
118 | RESTRAINT ARG=CV KAPPA=1500 AT=0.0
119 | """
120 | 
121 | bias = PlumedHamiltonian(plumed_str)
122 | 
123 | ```
124 | To add this contribution to our MACE potential, we simply sum both hamiltonians:
125 | 
126 | ```py
127 | potential = mace + bias
128 | 
129 | # double check
130 | alcohol = Geometry.load('vinyl_alcohol.xyz')
131 | total_energy = potential.compute(alcohol, 'energy')
132 | mace_energy  = mace.compute(alcohol, 'energy')
133 | bias_energy  = bias.compute(alcohol, 'energy')
134 | 
135 | assert np.allclose(
136 |     total_energy.result(),
137 |     mace_energy.result() + bias_energy.result(),
138 | )
139 | ```
140 | 
141 | Aside from bias potentials, the combination of multiple hamiltonians is also employed in
142 | e.g. the calculation of anharmonic free energy corrections.
143 | In that case, we consider a "base" potential energy surface which is described by a
144 | general quadratic function (i.e. a 3Nx3N hessian matrix and a minimum-energy geometry)
145 | and a small perturbation which describes the difference between the quadratic
146 | function and the fully anharmonic potential.
147 | The following code snippet demonstrates the construction of mixtures of the two energy
148 | surfaces:
149 | ```py
150 | # hessian computed via geometry optimization and finite differences
151 | # see sampling section
152 | type(hessian)  # np.ndarray
153 | hessian.shape  # (3n, 3n)
154 | type(minimum)  # Geometry
155 | len(minimum)   # n
156 | 
157 | harmonic = Harmonic(minimum, hessian)   # create quadratic hessian; x.T @ H @ x / 2
158 | delta = mace - harmonic
159 | 
160 | hamiltonanians = []     # linear intepolation between quadratic and MACE PES, in 10 steps
161 | for scale in np.linspace(0, 1, 10):
162 |     hamiltonians.append(hessian + scale * delta)
163 | 
164 | ```
165 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | hide:
  3 |   - toc
  4 | ---
  5 | 
  6 | # **psiflow** - scalable molecular simulation
  7 | 
  8 | 
  9 | Psiflow is a scalable molecular simulation engine for chemistry and materials science applications.
 10 | It supports:
 11 | 
 12 | - **quantum mechanical calculations** at various levels of theory (GGA and hybrid DFT, post-HF methods such as MP2 or RPA, and even coupled cluster; using CP2K | GPAW | ORCA)
 13 | 
 14 | - **trainable interaction potentials** as well as easy-to-use universal potentials, e.g. [MACE-MP0](https://arxiv.org/abs/2401.00096)
 15 | - a wide range of **sampling algorithms**: NVE | NVT | NPT, path-integral molecular dynamics, alchemical replica exchange, metadynamics, phonon-based sampling, thermodynamic integration; using [i-PI](https://ipi-code.org/),
 16 | [PLUMED](https://www.plumed.org/), ... 
 17 | 
 18 | Users may define arbitrarily complex workflows and execute them **automatically** on local, HPC, and/or cloud infrastructure.
 19 | To achieve this, psiflow is built using [Parsl](https://parsl-project.org/): a parallel execution library which manages job submission and workload distribution.
 20 | As such, psiflow can orchestrate large molecular simulation pipelines on hundreds or even thousands of nodes.
 21 | 
 22 | <figure markdown="span">
 23 |   ![Image title](overview.png){ width="500" }
 24 | </figure>
 25 | 
 26 | ---
 27 | 
 28 | 
 29 | # FAQ
 30 | 
 31 | **Where do I start?**
 32 | 
 33 | Take a brief look at the [examples](https://github.com/molmod/psiflow/examples/) or walk
 34 | through the
 35 | [documentation](https://molmod.github.io/psiflow/data) to get an idea for psiflow's
 36 | capabilities. Next, head over to the [setup & configuration](https://molmod.github.io/psiflow/configuration/) section of the docs to get started!
 37 | 
 38 | **Is psiflow a workflow manager?**
 39 | 
 40 | Absolutely not! Psiflow is a Python library which allows you to perform complex molecular simulations and scale them towards large numbers of compute nodes automatically.
 41 | It does not have 'fixed' workflow recipes, it does not require you to set up 'databases'
 42 | or 'server daemons'. The only thing it does is expose a concise and powerful API to
 43 | perform arbitrarily complex calculations in a highly efficiently manner.
 44 | 
 45 | **Is it compatible with my cluster?**
 46 | 
 47 | Most likely yes. Check which resource scheduling system your cluster uses (probably either
 48 | SLURM/PBSPro/SGE). If you're not sure, ask your system administrators or open an issue
 49 | 
 50 | **Can I use VASP with it?**
 51 | 
 52 | You cannot automate VASP calculations with it, but in 99% of cases there is either no need
 53 | to use VASP, or it's very easy to quickly perform the VASP part manually, outside of psiflow,
 54 | and do everything else (data generation, ML potential training, sampling) with psiflow.
 55 | Open an issue if you're not sure how to do this.
 56 | 
 57 | **I would like to have feature X**
 58 | 
 59 | Psiflow is continuously in development; if you're missing a feature feel free to open an
 60 | issue or pull request!
 61 | 
 62 | **I have a bug. Where is my error message and how do I solve it?**
 63 | 
 64 | Psiflow covers essentially all major aspects of computational molecular simulation (most
 65 | notably including the executation and parallelization), so there's bound to be some bug
 66 | once in a while. Debugging can be challenging, and we recommend to follow the following steps in
 67 | order:
 68 | 
 69 | 1. Check the stderr/stdout of the main Python process (i.e. the `python main.py
 70 |    config.yaml` one). See if there are any clues. If it has contents which you don't
 71 |    understand, open an issue. If there's seemingly nothing there, go to step 2.
 72 | 2. Check Parsl's log file. This can be found in the current working directory, under
 73 |    `psiflow_internal/parsl.log`. If it's a long file, search for any errors using `Error`
 74 |    or `ERROR`. If you find anything suspicious but do not know how to solve it,
 75 |    open an issue.
 76 | 3. Check the output files of individual ML training, QM singlepoints, or i-PI molecular
 77 |    dynamics runs. These can be found under `psiflow_internal/000/task_logs/*`.
 78 |    Again, if you find an error but do not exactly know why it happens or how to solve it,
 79 |    feel free to open an issue. Most likely, it will be useful to other people as well
 80 | 4. Check the actual 'jobscripts' that were generated and which were submitted to the
 81 |    cluster. Quite often, there can be a spelling mistake in e.g. the compute project you
 82 |    are using, or you are requesting a resource on a partition that is not available.
 83 |    These jobscripts (and there output and error) can be found under
 84 |    `psiflow_internal/000/submit_scripts/`.
 85 | 
 86 | **Where do these container images come from?**
 87 | 
 88 | They were generated using Docker based on the recipes in this repository, and were then
 89 | converted to `.sif` format using `apptainer`
 90 | 
 91 | **Can I run psiflow locally for small runs or debug purposes?**
 92 | 
 93 | Of course! If you do not provide a `config.yaml`, psiflow will just use your local
 94 | workstation for its execution. See e.g. [this](https://github.com/molmod/psiflow/blob/main/configs/threadpool.yaml) or [this](https://github.com/molmod/psiflow/blob/main/configs/wq.yaml) config used for testing.
 95 | 
 96 | 
 97 | !!! note "Citing psiflow"
 98 | 
 99 |     Psiflow is developed at the
100 |     [Center for Molecular Modeling](https://molmod.ugent.be).
101 |     If you use it in your research, please cite the following paper:
102 | 
103 |     Machine learning Potentials for Metal-Organic Frameworks using an
104 |     Incremental Learning Approach,
105 |     _Sander Vandenhaute et al._,
106 |     [npj Computational Materials](https://www.nature.com/articles/s41524-023-00969-x),
107 |     __9__, 19 __(2023)__
108 | 


--------------------------------------------------------------------------------
/docs/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
 4 | export MAMBA_ROOT_PREFIX=$(pwd) # optional, defaults to ~/micromamba
 5 | 
 6 | eval "$(./bin/micromamba shell hook -s posix)"
 7 | micromamba activate
 8 | micromamba create -n _psiflow_env -y python=3.10 pip ndcctools=7.14.0 -c conda-forge
 9 | micromamba activate _psiflow_env
10 | pip install git+https://github.com/molmod/psiflow.git@v4.0.0
11 | 
12 | # create activate.sh
13 | echo 'ORIGDIR=$PWD' >>activate.sh # prevent variable substitution
14 | echo "cd $(pwd)" >>activate.sh
15 | echo "export MAMBA_ROOT_PREFIX=$(pwd)" >>activate.sh
16 | echo 'eval "$(./bin/micromamba shell hook -s posix)"' >>activate.sh
17 | echo "micromamba activate _psiflow_env" >>activate.sh
18 | echo 'cd $ORIGDIR' >>activate.sh # prevent variable substitution
19 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
  1 | # Installation
  2 | 
  3 | Psiflow is designed as an end-to-end framework for developing interatomic potentials. As such, it has a number of dependencies
  4 | which should be available in order to be able to perform all steps in the workflow. The following table groups 
  5 | the main dependencies according to how they are used in psiflow:
  6 | 
  7 | <center>
  8 | 
  9 | | category              | name      | version   | uses GPU          | uses MPI  |
 10 | | --------------------  | --------  | -------   | :---------------: | :--------:  |
 11 | | **QM evaluation**         | CP2K      | >= 2023.1    |  | :material-check: |
 12 | |                       | PySCF     | >=2.4       |  | |
 13 | | **trainable potentials**  | MACE      | 0.2.0     | :material-check:  |
 14 | |                       | NequIP    | 0.5.6     | :material-check:  |
 15 | |                       | Allegro   | 0.2.0     | :material-check:  |
 16 | | **molecular dynamics**| OpenMM    | 8.0       | :material-check:  |
 17 | |                       | PLUMED    | 2.9.0     |  |
 18 | |                       | YAFF      | 1.6.0     |  |
 19 | | **miscellaneous**     | Parsl     | 2024.02.12 |  |
 20 | |                       | e3nn      | 0.4.4     | :material-check:  |
 21 | |                       | PyTorch   | 1.13.1    | :material-check:  |
 22 | |                       | ASE       | >=3.22.1    |  |
 23 | |                       | wandb     | 0.15.8    |  |
 24 | |                       | Python    | 3.10, 3.11       |  |
 25 | 
 26 | </center>
 27 | 
 28 | ## Containerized
 29 | To alleviate users from having to go through all of the installation
 30 | shenanigans, psiflow provides a convenient portable entity which bundles all of the above
 31 | dependencies -- a container image!
 32 | Whether you're executing your calculations on a high-memory node in a cluster
 33 | or using a GPU from a Google Cloud instance, all that is required is a working
 34 | container engine and you're good to go.
 35 | The vast majority of HPCs and cloud computing providers support containerized execution,
 36 | using engines like [Apptainer/Singularity](https://apptainer.org/),
 37 | [Shifter](https://docs.nersc.gov/development/shifter/how-to-use/),
 38 | or [Docker](https://www.docker.com/).
 39 | These engines are also very easily installed on your local workstation, which facilitates
 40 | local debugging.
 41 | 
 42 | Besides a container engine, it's necessary to install a standalone Python environment
 43 | which needs to take care of possible job submissions and input/output writing.
 44 | Since the actual calculations are performed inside the container, the standalone
 45 | Python environment requires barely anything, and is straightforward to install
 46 | using `micromamba` -- a blazingly fast drop-in replacement for `conda`:
 47 | 
 48 | ```console
 49 | micromamba create -n psiflow_env -c conda-forge -y python=3.10
 50 | micromamba activate psiflow_env
 51 | pip install parsl==2023.10.23 git+https://github.com/molmod/psiflow
 52 | ```
 53 | That's it! Before running actual calculations, it is still necessary to set up Parsl
 54 | to use the compute resources you have at your disposal -- whether it's a local GPU,
 55 | a SLURM cluster, or a cloud computing provider; check out the
 56 | [Execution](execution.md) page for more details.
 57 | 
 58 | !!! note "Containers 101"
 59 | 
 60 |     Apptainer -- now the most widely used container system for HPCs -- is part of the
 61 |     Linux Foundation. It is easy to set up on most Linux distributions, as explained in the [Apptainer documentation](https://apptainer.org/docs/admin/main/installation.html#install-ubuntu-packages).
 62 | 
 63 |     Psiflow's containers are hosted on the GitHub Container Registry (GHCR), for both Python 3.9 and 3.10.
 64 |     To download and run commands in them, simply execute:
 65 | 
 66 |     ```console
 67 |     # show available pip packages
 68 |     apptainer exec oras://ghcr.io/molmod/psiflow:3.0.0_python3.9_cuda /usr/local/bin/entry.sh pip list
 69 | 
 70 |     # inspect cp2k version
 71 |     apptainer exec oras://ghcr.io/molmod/psiflow:3.0.0_python3.9_cuda /usr/local/bin/entry.sh cp2k.pmsp --version
 72 |     ```
 73 | 
 74 |     Internally, Apptainer will store the container in a local cache directory such that it does not have to
 75 |     redownload it every time a command gets executed. Usually, it's a good idea to manually change the location 
 76 |     of these cache directories since they can end up clogging your `$HOME$` directory quite quickly.
 77 |     To do this, simply put the following lines in your `.bashrc`:
 78 | 
 79 |     ```console
 80 | 
 81 |     export APPTAINER_CACHEDIR=/some/dir/on/local/scratch/apptainer_cache
 82 |     ```
 83 | 
 84 |     If your compute resources use SingularityCE instead of Apptainer,
 85 |     replace 'APPTAINER' with 'SINGULARITY' in the environment variable names.
 86 | 
 87 | !!! note "Weights & Biases"
 88 |     To ensure psiflow can communicate its data to [W&B](https://wandb.ai), add 
 89 |         
 90 |     ```console
 91 |     export WANDB_API_KEY=<your key from wandb.ai/authorize>
 92 |     ```
 93 |     to your `.bashrc`.
 94 | 
 95 | !!! note "AMD GPU support"
 96 | 
 97 |     As the name of the container suggests, GPU acceleration for PyTorch models in OpenMM
 98 |     is currently only available for Nvidia GPUs because the compatibility of conda/mamba
 99 |     with AMD GPUs (HIP) is not great at the moment. If you really must use AMD GPUs
100 |     in psiflow, you'll have to manually create a separate Python environment with a ROCm-enabled
101 |     PyTorch for training, and the regular containerized setup for CPU-only
102 |     molecular dynamics with OpenMM.
103 | 
104 |     A ROCm-compatible PyTorch can be installed using the following command:
105 |     ```console
106 |     pip install --force torch==1.13.1 --index-url https://download.pytorch.org/whl/rocm5.2
107 |     ```
108 | 
109 | 
110 | ## Manual
111 | While a containerized setup guarantees reproducibility and is faster to install,
112 | a fully manual setup of Psiflow and its dependencies provides the user with full control
113 | over software versions or compiler flags.
114 | While this is not really necessary in the vast majority of cases, we mention for completeness
115 | the following manual setup using `micromamba`:
116 | ```console
117 | CONDA_OVERRIDE_CUDA="11.8" micromamba create -p ./psiflow_env -y -c conda-forge \
118 |     python=3.10 pip \
119 |     openmm-plumed openmm-torch pytorch=1.13.1=cuda* \
120 |     nwchem py-plumed cp2k && \
121 |     micromamba clean -af --yes
122 | pip install cython==0.29.36 matscipy prettytable && \
123 |     pip install git+https://github.com/molmod/molmod && \
124 |     pip install git+https://github.com/molmod/yaff && \
125 |     pip install e3nn==0.4.4
126 | pip install numpy ase tqdm pyyaml 'torch-runstats>=0.2.0' 'torch-ema>=0.3.0' mdtraj tables
127 | pip install git+https://github.com/acesuit/MACE.git@55f7411 && \
128 |     pip install git+https://github.com/mir-group/nequip.git@develop --no-deps && \
129 |     pip install git+https://github.com/mir-group/allegro --no-deps && \
130 |     pip install git+https://github.com/svandenhaute/openmm-ml.git@triclinic
131 | pip install 'psiflow[parsl] @ git+https://github.com/molmod/psiflow'
132 | ```
133 | This is mostly a copy-paste from psiflow's [Dockerfiles](https://github.com/molmod/psiflow/blob/main/container).
134 | 


--------------------------------------------------------------------------------
/docs/learning.md:
--------------------------------------------------------------------------------
  1 | Psiflow allows for the seamless development and scalable
  2 | execution of online learning algorithms for ML potentials.
  3 | The `Learning` class provides an interface based on which such
  4 | algorithms can be implemented.
  5 | They keep track of the generated data, error metrics, optional [Weights &
  6 | Biases](https://wandb.ai) logging, and provide basic restart functionalities in case
  7 | something goes wrong.
  8 | Learning objects are instantiated using the following arguments:
  9 | 
 10 | - **reference** (type `Reference`): the `Reference` instance which will be used to
 11 |   evaluate ground-truth energy/force labels for each of the samples generated.
 12 | - **path_output** (type `str | Path`): the location to a folder in which intermediate
 13 |   models, datasets, walker states, and restart files can be saved.
 14 | - **train_valid_split** (type `float`): fraction of generated data which should be used
 15 |   for the training set (as opposed to validation).
 16 | - **error_thresholds_for_reset** (type `list[Optional[float]]`): during online learning,
 17 |   it is not uncommon to have walkers explore unphysical regions in phase space due to
 18 |   irregularities in the intermediate potential, excessive temperatures/pressures, ...
 19 |   In those cases, it is beneficial to reset walkers to their starting configurations, of
 20 |   which it is known to be a physically sound starting point. The decision to reset walkers
 21 |   is made every time the 'exact' energy and forces have been computed from a sampled
 22 |   state. If the error between the corresponding walker's model (i.e. the previous model)
 23 |   and the QM-evaluated energy and forces exceeds a certain threshold (both on energies and
 24 |   forces), the walker is reset.
 25 |   This argument expects a list of length two (threshold on energy error, and threshold on
 26 |   force error), with optional `None` values if no reset is desired.
 27 |   For example: `[None, 0.1]` indicates to reset whenever the force RMSE exceeds 100 meV/A,
 28 |   and ignore any energy discrepancy.
 29 | - **error_thresholds_for_discard** (type `list[Optional[float]]`): states which are
 30 |   entirely unphysical do not contribute to the accuracy of the model, and sometimes even
 31 |   hinder proper training. If these error thresholds are exceeded, the state is discarded and the walker is reset.
 32 | - **wandb_group** (type `str`): if specified, the computed dataset metrics will be logged
 33 |   to Weights & Biases in the corresponding group of runs for easy visual analysis.
 34 | - **wandb_project** (type `str`): if specified, the computed dataset metrics will be logged
 35 |   to Weights & Biases in the corresponding project for easy visual analysis.
 36 | - **initial_data** (type `Dataset`): existing, labeled data from which the learning can be
 37 |   bootstrapped. Note that *all* states in this dataset must be labeled, and that this is
 38 |   only sensible if the labeling agrees with the given Reference instance. (Same level of
 39 |   theory, same basis set, grid settings, ... ).
 40 | 
 41 | 
 42 | <figure markdown="span">
 43 |   ![Image title](wandb.png){ width="900" }
 44 |   <figcaption> Illustration of what the Weights & biases logging looks like.
 45 |   The graph on top simply shows the force RMSE on each data point versus a unique
 46 |     'identifier' per data point. The bottom plot shows the same data points, but now
 47 |     grouped according to which walker generated them. In this case, walkers were sorted
 48 |     according to temperature (lower walker index were lower temperature), and this is seen
 49 |     in the fact that walkers with a higher index generated data with on average higher errors,
 50 |   as they explored more out-of-equilibrium configurations.</figcaption>
 51 | </figure>
 52 | 
 53 | 
 54 | The core business of a `Learning` instance is the following sequence of operations:
 55 | 
 56 | 1. use walkers in a `sample()` call to generate atomic geometries
 57 | 2. evaluate those atomic geometries with the provided reference to obtain QM energy and
 58 |    forces
 59 | 3. include those geometries to the training data, or discard them if they exceed
 60 |    `error_thresholds_for_discard`. Reset walkers if they exceed
 61 |    `error_thresholds_for_reset`.
 62 | 4. Train the model using the new data.
 63 | 5. Compute metrics for the trained model across the new dataset and optionally log them to
 64 |    W&B.
 65 | 
 66 | Currently, there are two variants of this implemented: passive and active learning.
 67 | 
 68 | ## passive learning
 69 | 
 70 | During passive learning, walkers are propagated using an external and 'fixed' Hamiltonian
 71 | which is not trained at any point (e.g. a pre-trained universal potential or a
 72 | hessian-based Hamiltonian).
 73 | 
 74 | ```py
 75 | model, walkers = learning.passive_learning(
 76 |     model,
 77 |     walkers,
 78 |     hamiltonian=MACEHamiltonian.mace_mp0(),     # fixed hamiltonian
 79 |     steps=20000,
 80 |     step=2000,
 81 |     **optional_sampling_kwargs,
 82 | )
 83 | ```
 84 | Walkers are propagated for a total of 20,000 steps, and samples are drawn every 2,000
 85 | steps which are QM evaluated by the reference and added to the training data.
 86 | If the walkers contain bias contributions, their total hamiltonian is simply the sum of
 87 | the existing bias contributions and the hamiltonian given to the `passive_learning()`
 88 | call.
 89 | Additional keyword arguments to this function are passed directly into the sample function (e.g. for
 90 | specifying the log level or the center-of-mass behavior). 
 91 | 
 92 | The returned model is the one trained on all data generated in the `passive_learning()` call as well as all data which was already present in the learning instance (for example if it had been initialized with `initial_data`, see above).
 93 | The returned walkers are identical to the ones passed into the method, but this is done to
 94 | emphasize that internally, they do change due to calling `passive_learning` (because they
 95 | are either propagated or reset, or their metadynamics bias has changed because there are
 96 | more hills present than before).
 97 | 
 98 | ## active learning
 99 | 
100 | During active learning, walkers are propagated with a Hamiltonian generated using the
101 | current model. They are propagated for a given number of steps after which their final
102 | state is passed into the reference for correct labeling.
103 | Different from passive learning, active learning *does not allow for subsampling of the
104 | trajectories of the walkers*. The idea behind this is that if you wish to propagate the
105 | walker for 10 ps, and sample a structure every 1 ps to let each walker generate 10 states,
106 | it is likely much better to instead increase the number of walkers (to cover more regions
107 | in phase space) and propagate them in steps of 1 ps. Active learning is ideally suited for
108 | massively parallel workflows (maximal number of walkers, with minimal sampling time per
109 | walker) and we encourage users to exploit this.
110 | 
111 | ```py
112 | model, walkers = learning.active_learning(
113 |     model,                      # used to generate hamiltonian
114 |     walkers,      
115 |     steps=2000,                 # no more 'step' argument!
116 |     **optional_sampling_kwargs,
117 | )
118 | ```
119 | ## restarting a run
120 | 
121 | `Learning` has first-class support for restarted runs -- simply resubmit your calculation!
122 | It will detect whether or not the corresponding output folder has already fully logged the
123 | each of the iterations, and if so, load the final state of the model, the walkers, and the
124 | learning instance without actually doing any calculations.
125 | 


--------------------------------------------------------------------------------
/docs/logo_dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/docs/logo_dark.png


--------------------------------------------------------------------------------
/docs/logo_light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/docs/logo_light.png


--------------------------------------------------------------------------------
/docs/models.md:
--------------------------------------------------------------------------------
 1 | Once we know how to represent datasets of atomic geometries and label with them with
 2 | target QM energy and force values, we can start defining and training ML potentials.
 3 | Psiflow defines an abstract `Model` interface which each
 4 | particular ML potential should subclass, though at the moment only
 5 | [MACE](https://github.com/acesuit/mace) is implemented.
 6 | In addition, psiflow provides configuration dataclasses for each model with
 7 | reasonable defaults.
 8 | 
 9 | A `Model` has essentially three methods:
10 | 
11 | - `initialize`: compute energy shifts and scalings as well as the average number
12 | of neighbors (and any other network normalization metrics) using a given *training* dataset,
13 | and initialize model weights.
14 | - `train`: train the parameters of a model using two separate datasets, one for
15 | actual training and one for validation. The current model parameters are used as
16 | starting parameters for the training
17 | - `create_hamiltonian`: spawn a hamiltonian in order to use the model with its current
18 |   weights in molecular dynamics simulations 
19 | 
20 | The following is a minimal illustration:
21 | ```py
22 | from psiflow.data import Dataset
23 | from psiflow.models import MACE
24 | 
25 | 
26 | # load data with energy and force labels included as extxyz
27 | train, valid = Dataset.load('all_data.xyz').split(0.9, shuffle=True)
28 | 
29 | model = MACE(                   # for full arg list, see psiflow/models/_mace:MACEConfig
30 |     num_channels=16,
31 |     max_L=2,
32 |     max_num_epochs=400,
33 |     batch_size=16,
34 | )
35 | 
36 | # initialize, train
37 | model.initialize(train)         # this will calculate the scale/shifts, and average number of neighbors
38 | model.train(train, valid)       # train using supplied datasets
39 | 
40 | model.save('./')                # saves model and config to current working directory!
41 | 
42 | hamiltonian = model.create_hamiltonian()
43 | forces_pred = hamiltonian.compute(valid, 'forces')
44 | forces_target = valid.get('forces')
45 | 
46 | rmse = compute_rmse(forces_pred, forces_target)  # this is a Future!
47 | print('forces RMSE: {} eV/A'.format(rmse.result()))
48 | 
49 | ```
50 | Note that `model.save()` will save both a `.yaml` file with all hyperparameters as well as the actual `.pth` model which is needed to reconstruct the corresponding PyTorch module (possibly outside of psiflow if needed).
51 | As such, it expects a directory as argument (which may either already exist or will be
52 | created).
53 | 
54 | In many cases, it is generally recommended to provide these models with some estimate of the absolute energy of an isolated
55 | atom for the specific level of theory and basis set considered (and this for each element).
56 | Instead of having the model learn the *absolute* total energy of the system, we first subtract these atomic energies in order
57 | to train the model on the *formation* energy of the system instead, as this generally improves the generalization performance
58 | of the model towards unseen stoichiometries.
59 | 
60 | ```py
61 | model.add_atomic_energy('H', -13.7)     # add atomic energy of isolated hydrogen atom
62 | model.initialize(some_training_data)
63 | 
64 | model.add_atomic_energy('O', -400)      # will raise an exception; model needs to be reinitialized first
65 | model.reset()                           # removes current model, but keeps raw config
66 | model.add_atomic_energy('O', -400)      # OK!
67 | model.initialize(some_training_data)    # offsets total energy with given atomic energy values per atom
68 | 
69 | ```
70 | Whenever atomic energies are available, `Model` instances will automatically offset the potential energy in a (labeled)
71 | `Dataset` by the sum of the energies of the isolated atoms; the underlying PyTorch network is then initialized/trained
72 | on the formation energy of the system instead.
73 | In order to avoid artificially high energy discrepancies between models trained on the formation energy on one hand,
74 | and reference potential energies as obtained from any `BaseReference`,
75 | the `evaluate` method will first perform the converse operation, i.e. add the energies of the isolated atoms
76 | to the model's prediction of the formation energy.
77 | Similarly, `create_hamiltonian()` also passes any atomic energies which were added to the
78 | model.
79 | 


--------------------------------------------------------------------------------
/docs/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/docs/overview.png


--------------------------------------------------------------------------------
/docs/reference.md:
--------------------------------------------------------------------------------
  1 | # QM calculations
  2 | The energy and gradients of the ground-state Born-Oppenheimer surface can be obtained
  3 | using varying levels of approximation.
  4 | In psiflow, the calculation of the energy and its gradients can be performed for both
  5 | `Geometry` and `Dataset` instances, using different software packages:
  6 | 
  7 | - **CP2K** (periodic, mixed PW/lcao): very fast, and very useful for pretty much any periodic
  8 |   structure. Its forces tend to be quite noisy with the default grid settings so some
  9 |   level of caution is advised. Also, even though it uses both plane waves and atomic basis
 10 |   sets, it does suffer from BSSE.
 11 | - **GPAW** (periodic/cluster, PW/lcao/grid): slower but more numerically stable than CP2K;
 12 |   essentially a fully open-source (and therefore transparant), free, and well-tested
 13 |   alternative to VASP. Particularly useful for applications in which BSSE is a concern
 14 |   (e.g. adsorption).
 15 | - **ORCA** (cluster, lcao): useful for accurate high-level quantum chemistry calculations,
 16 |   e.g. MP2 and CCSD(T). *TODO*
 17 | 
 18 | !!! note "Installation"
 19 |     Because the 'correct' compilation and installation of quantum chemistry software is
 20 |     notoriously cumbersome, we host separate container images for each of the packages
 21 |     on Github, which are ready to use with psiflow on HPCs with either a Singularity
 22 |     or Apptainer container runtime. The Docker files used to generate those images are
 23 |     available in the respository; 
 24 |     [CP2K](https://github.com/molmod/psiflow/blob/main/Dockerfile.cp2k) or
 25 |     [GPAW](https://github.com/molmod/psiflow/blob/main/Dockerfile.gpaw).
 26 |     See the [configuration](configuration.md) section for more details.
 27 | 
 28 | For each software package, psiflow provides a corresponding class which implements
 29 | the appropriate input file manipulations, launch commands, and output parsing
 30 | functionalities.
 31 | They all inherit from the `Reference` base class, which provides a few key
 32 | functionalities:
 33 | 
 34 | - `data.evaluate(reference)`: this is the most common operation involving QM calculations;
 35 |   given a `Dataset` of atomic geometries, compute the energy and its gradients and insert
 36 |   them into the dataset such that they are saved for future reuse.
 37 | - `reference.compute_atomic_energy`: provides the ability to compute isolated atom
 38 |   reference energies, as this facilitates ML potential training to datasets with varying
 39 |   number of atoms.
 40 | - `reference.compute(data)`: this is somewhat equivalent to the hamiltonian `compute`
 41 |   method, except that its argument `data` must be a `Dataset` instance, and the optional
 42 |   `batch_size` defaults to 1 (in order to maximize parallelization). It does not insert
 43 |   the computed properties into the data, but returns them as numpy arrays.
 44 | 
 45 | From a distance, QM reference objects look almost identical to hamiltonians, in the sense
 46 | that they both take atomic geometries as input and return energies and gradients as
 47 | output. The (imposed) distinction between both can be summarized in the following points.
 48 | 
 49 | - hamiltonians can compute energies and forces for pretty much *any* structure. There is
 50 |   no reason they would fail. QM calculations on the other hand can fail due to unconverged
 51 |   SCF cycles and/or time limit constraints. In fact, this happens relatively often when performing
 52 |   active learning workflows. Reference objects take this into account by returning a unique
 53 |   `NullState` whenever a calculation has failed.
 54 | - hamiltonians are orders of magnitude faster, and can be employed in meaningfully long
 55 |   molecular dynamics simulations. This is not the case for QM calculations. As such, they
 56 |   cannot be used in combination with walker sampling or geometry optimizations. If the
 57 |   purpose is to perform molecular simulation at the DFT level, then the better approach is
 58 |   to train an ML potential to any desired level of accuracy (almost always possible in
 59 |   psiflow) and use that as proxy for the QM interaction energy.
 60 |   For the same reason, the default batch size for `reference.compute` calls is 1, i.e.
 61 |   each the QM calculation for each structure in the dataset is immediately scheduled
 62 |   independently from the other ones.
 63 |   With hamiltonians, that batch size defaults to 100 (split data in chunks of 100 and
 64 |   evaluate each set of 100 states serially).
 65 | 
 66 | 
 67 | ## CP2K 2024.1
 68 | A `CP2K` reference instance can be created based on a (multiline) input string.
 69 | Only the `FORCE_EVAL` section of the input is important since the atomic coordinates and cell
 70 | parameters are automatically inserted for every calculation.
 71 | All basis set, pseudopotential, and D3 parameters from the official
 72 | [CP2K repository](https://github.com/cp2k/cp2k) are directly available in the
 73 | container image (i.e. no need to download or provide these files separately).
 74 | Choose which one you would like to use by using the corresponding filename in the input
 75 | file (i.e. omit any preceding filepaths).
 76 | A typical [input file](https://github.com/molmod/psiflow/blob/main/examples/data/cp2k_input.txt)
 77 | is provided in the [examples](https://github.com/molmod/psiflow/tree/main/examples).
 78 | 
 79 | ```py
 80 | from psiflow.reference import CP2K
 81 | 
 82 | 
 83 | # create reference instance
 84 | with open('cp2k_input.txt', 'r') as f:
 85 |     force_eval_input_str = f.read()
 86 | cp2k = CP2K(force_eval_input_str)
 87 | 
 88 | # compute energy and forces, and store them in the geometries
 89 | evaluated_data = data.evaluate(cp2k)
 90 | 
 91 | for geometry in evaluated_data.geometries().result():
 92 |     print('energy: {} eV'.format(geometry.energy))
 93 |     print('forces: {} eV/A'.format(geometry.per_atom.forces))
 94 | 
 95 | ```
 96 | 
 97 | ## GPAW 24.1
 98 | a `GPAW` reference is created in much the same way as a traditional `GPAW` 'calculator'
 99 | instance, with support for entirely the same keyword arguments:
100 | ```py
101 | from psiflow.reference import GPAW
102 | 
103 | gpaw = GPAW(mode='fd', nbands=0, xc='PBE')  # see GPAW calculator on gitlab for full list
104 | energies = gpaw.compute(data, 'energy')
105 | 
106 | ```
107 | A notable feature from GPAW is that it already outputs all energies as formation energies,
108 | i.e. it internally subtracts the sum of the energies of the isolated atoms. As such, the
109 | `compute_atomic_energy` for a GPAW reference always just returns 0 eV.
110 | 
111 | ## ORCA
112 | TODO
113 | 


--------------------------------------------------------------------------------
/docs/wandb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/docs/wandb.png


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | - [Replica exchange molecular dynamics](https://github.com/molmod/psiflow/tree/main/examples/alanine_replica_exchange.py) | **alanine dipeptide**: replica exchange molecular dynamics simulation of alanine dipeptide, using the MACE-MP0 universal potential.
 2 |   The inclusion of high-temperature replicas allows for fast conformational transitions and improves ergodicity.
 3 | - [Geometry optimizations](https://github.com/molmod/psiflow/tree/main/examples/formic_acid_transition.py) | **formic acid dimer**: approximate transition state calculation for the proton exchange reaction in a formic acid dimer,
 4 |   using simple bias potentials and a few geometry optimizations.
 5 | - [Static and dynamic frequency analysis](https://github.com/molmod/psiflow/tree/main/examples/h2_static_dynamic.py) | **dihydrogen**: Hessian-based estimate of the H-H bond strength and corresponding IR absorption frequency, and a comparison with a dynamical estimate from NVE simulation and Fourier analysis.
 6 |   
 7 | - [Bulk modulus calculation](https://github.com/molmod/psiflow/tree/main/examples/iron_bulk_modulus.py) | **iron**: estimate of the bulk modulus of fcc iron using a series of NPT simulations at different pressures
 8 |   
 9 | - [Solid-state phase stabilities](https://github.com/molmod/psiflow/tree/main/examples/iron_harmonic_fcc_bcc.py) | **iron**: estimating the relative stability of fcc and bcc iron with anharmonic corrections using thermodynamic integration (see e.g. [Phys Rev B., 2018](https://journals.aps.org/prb/abstract/10.1103/PhysRevB.97.054102))
10 | 
11 | - [ML potentials from scratch](https://github.com/molmod/psiflow/tree/main/examples/online_learning_pimd.py) | **water**: develop an ML potential for water based on a single geometry as input, using a combination of passive and active learning.
12 | 
13 | - [Replica exchange umbrella sampling](https://github.com/molmod/psiflow/tree/main/examples/proton_jump_plumed.py) |
14 |   **vinyl alcohol**: explore a reactive transition path with metadynamics, and use the
15 |   resulting data to perform umbrella sampling with replica exchange between umbrellas.
16 | 
17 | - [DFT singlepoints](https://github.com/molmod/psiflow/tree/main/examples/water_cp2k_noise.py) | **water**: analysis of the numerical noise DFT energy and force evaluations using CP2K and the RPBE(D3) functional, for a collection of water molecules.
18 |   
19 | - [Path-integral molecular dynamics](https://github.com/molmod/psiflow/tree/main/examples/water_path_integral_md.py) | **water**: demonstration of the impact of nuclear quantum effects on the variance in O-H distance in liquid water. Path-integral molecular dynamics simulations with increasing number of beads (1, 2, 4, 8, 16) approximate the proton delocalization, and lead to systematically larger variance in O-H distance.
20 |   
21 | - [ML potential training](https://github.com/molmod/psiflow/tree/main/examples/water_train_validate.py) | **water**: simple training and validation script for MACE on a small dataset of water configurations.
22 | 


--------------------------------------------------------------------------------
/examples/alanine_replica_exchange.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import psiflow
 4 | from psiflow.geometry import Geometry
 5 | from psiflow.hamiltonians import MACEHamiltonian
 6 | from psiflow.sampling import Walker, replica_exchange, sample
 7 | 
 8 | 
 9 | def compute_dihedrals(positions):
10 |     indices_phi = np.array([4, 6, 8, 14], dtype=int)
11 |     indices_psi = np.array([6, 8, 14, 16], dtype=int)
12 | 
13 |     dihedrals = []
14 |     for indices in [indices_phi, indices_psi]:
15 |         p1 = positions[:, indices[0], :]
16 |         p2 = positions[:, indices[1], :]
17 |         p3 = positions[:, indices[2], :]
18 |         p4 = positions[:, indices[3], :]
19 | 
20 |         # Calculate vectors between the points
21 |         v1 = p2 - p1
22 |         v2 = p3 - p2
23 |         v3 = p4 - p3
24 | 
25 |         # Normal vectors of the planes formed by the atoms
26 |         n1 = np.cross(v1, v2)
27 |         n2 = np.cross(v2, v3)
28 | 
29 |         # Normalize the normal vectors
30 |         n1_norm = np.linalg.norm(n1, axis=1, keepdims=True)
31 |         n2_norm = np.linalg.norm(n2, axis=1, keepdims=True)
32 |         n1 = n1 / n1_norm
33 |         n2 = n2 / n2_norm
34 | 
35 |         dot_product = np.einsum("ij,ij->i", n1, n2)
36 |         dot_product = np.clip(dot_product, -1.0, 1.0)
37 |         dihedrals.append(np.arccos(dot_product))
38 |     return dihedrals[0], dihedrals[1]  # phi, psi
39 | 
40 | 
41 | def main():
42 |     c7eq = np.array([2.8, 2.9])  # noqa: F841
43 |     c7ax = np.array([1.2, -0.9])  # noqa: F841
44 |     alanine = Geometry.from_string(  # starts in c7ax config
45 |         """
46 | 22
47 | Properties=species:S:1:pos:R:3 pbc="F F F"
48 | H       12.16254811      17.00740464      -2.89412387
49 | C       12.83019906      16.90038734      -2.04015291
50 | H       12.24899130      16.91941920      -1.11925017
51 | H       13.51243976      17.75054269      -2.01566384
52 | C       13.65038992      15.63877411      -2.06030255
53 | O       14.36738511      15.33906728      -1.11622456
54 | N       13.53865222      14.88589532      -3.17304444
55 | H       12.86898792      15.18433500      -3.85740375
56 | C       14.28974353      13.67606132      -3.48863158
57 | H       14.01914560      13.42643243      -4.51320992
58 | C       15.79729109      13.88220294      -3.42319959
59 | H       16.12104919      14.14072623      -2.41784410
60 | H       16.29775468      12.96420765      -3.73059171
61 | H       16.09643748      14.68243453      -4.10096574
62 | C       13.86282687      12.43546588      -2.69127862
63 | O       13.58257313      11.40703144      -3.28015921
64 | N       13.87365846      12.57688288      -1.35546630
65 | H       14.15017274      13.47981654      -0.98516877
66 | C       13.53768820      11.50108113      -0.46287859
67 | H       14.38392004      11.24258036       0.17699860
68 | H       12.69022125      11.76658121       0.17241519
69 | H       13.27142638      10.63298597      -1.06170510
70 | """
71 |     )
72 |     mace = MACEHamiltonian.mace_mp0()
73 | 
74 |     walkers = []
75 |     for temperature in [150, 200, 250, 300, 400, 500, 600, 700, 800, 900, 1000, 1100]:
76 |         walker = Walker(
77 |             alanine,
78 |             mace,
79 |             temperature=temperature,
80 |         )
81 |         walkers.append(walker)
82 |     replica_exchange(walkers, trial_frequency=50)
83 | 
84 |     outputs = sample(walkers, steps=20000, step=200)
85 |     phi, psi = compute_dihedrals(outputs[0].trajectory.get("positions").result())
86 |     for f, s in zip(phi, psi):  # some c7eq conformations should appear here
87 |         print("{:5.3f}  {:5.3f}".format(f, s))
88 | 
89 | 
90 | if __name__ == "__main__":
91 |     with psiflow.load():
92 |         main()
93 | 


--------------------------------------------------------------------------------
/examples/data/acetaldehyde.xyz:
--------------------------------------------------------------------------------
 1 | 7
 2 | Properties=species:S:1:pos:R:3
 3 | O	0.694151672	0.776743934	-0.455455855
 4 | C	0.195993254	-0.270095005	-0.307053207
 5 | C	-0.846060202	-0.538006022	0.669585079 
 6 | H	0.515801613	-1.097661033	-0.987914453
 7 | H	-0.589257101	-0.505600908	1.733123281 
 8 | H	-1.553309062	0.309375207	0.558315778 
 9 | H	-1.411674563	-1.440354174	0.5617281699
10 | 


--------------------------------------------------------------------------------
/examples/data/ani500k_cc_cpu.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/examples/data/ani500k_cc_cpu.model


--------------------------------------------------------------------------------
/examples/data/cp2k_input.txt:
--------------------------------------------------------------------------------
 1 | &FORCE_EVAL
 2 |   STRESS_TENSOR ANALYTICAL
 3 |   METHOD QS
 4 |   &DFT
 5 |     BASIS_SET_FILE_NAME BASIS_MOLOPT_UZH
 6 |     POTENTIAL_FILE_NAME POTENTIAL_UZH
 7 |     CHARGE 0
 8 |     &QS
 9 |       METHOD GPW
10 |       EPS_DEFAULT 1.0E-12
11 |     &END QS
12 |     &XC
13 | 	&XC_FUNCTIONAL
14 | 	  &GGA_X_RPBE
15 | 	  &END GGA_X_RPBE
16 | 	  &GGA_C_PBE
17 | 	  &END GGA_C_PBE
18 | 	&END XC_FUNCTIONAL
19 |       !&XC_FUNCTIONAL PBE
20 |       !&END XC_FUNCTIONAL
21 |       !&VDW_POTENTIAL
22 |       !  POTENTIAL_TYPE PAIR_POTENTIAL
23 |       !  &PAIR_POTENTIAL
24 |       !    TYPE DFTD3(BJ)
25 |       !    REFERENCE_FUNCTIONAL PBE
26 |       !    PARAMETER_FILE_NAME dftd3.dat
27 |       !  &END PAIR_POTENTIAL
28 |       !&END VDW_POTENTIAL
29 |     &END XC
30 |     &SCF
31 |       EPS_SCF 1.0E-8
32 |       SCF_GUESS RESTART
33 |       MAX_SCF 20
34 |       &OT
35 |         MINIMIZER DIIS
36 |         PRECONDITIONER FULL_ALL
37 |       &END OT
38 |       &OUTER_SCF
39 |         EPS_SCF 1.0E-8
40 |         MAX_SCF 5
41 |       &END OUTER_SCF
42 |     &END SCF
43 |     &MGRID
44 |         CUTOFF 1000
45 |         REL_CUTOFF 60
46 |     &END MGRID
47 |   &END DFT
48 |   &SUBSYS
49 |     &KIND O
50 |       BASIS_SET TZVP-MOLOPT-PBE-GTH-q6
51 |       POTENTIAL GTH-PBE-q6
52 |     &END KIND
53 |     &KIND H
54 |       BASIS_SET TZVP-MOLOPT-PBE-GTH-q1
55 |       POTENTIAL GTH-PBE-q1
56 |     &END KIND
57 |   &END SUBSYS
58 | &END FORCE_EVAL
59 | 


--------------------------------------------------------------------------------
/examples/data/h2o_32.xyz:
--------------------------------------------------------------------------------
 1 |     96 
 2 |    Lattice="9.8528 0 0 0 9.8528 0 0 0 9.8528"
 3 |    O       2.280398       9.146539       5.088696
 4 |    O       1.251703       2.406261       7.769908
 5 |    O       1.596302       6.920128       0.656695
 6 |    O       2.957518       3.771868       1.877387
 7 |    O       0.228972       5.884026       6.532308
 8 |    O       9.023431       6.119654       0.092451
 9 |    O       7.256289       8.493641       5.772041
10 |    O       5.090422       9.467016       0.743177
11 |    O       6.330888       7.363471       3.747750
12 |    O       7.763819       8.349367       9.279457
13 |    O       8.280798       3.837153       5.799282
14 |    O       8.878250       2.025797       1.664102
15 |    O       9.160372       0.285100       6.871004
16 |    O       4.962043       4.134437       0.173376
17 |    O       2.802896       8.690383       2.435952
18 |    O       9.123223       3.549232       8.876721
19 |    O       1.453702       1.402538       2.358278
20 |    O       6.536550       1.146790       7.609732
21 |    O       2.766709       0.881503       9.544263
22 |    O       0.856426       2.075964       5.010625
23 |    O       6.386036       1.918950       0.242690
24 |    O       2.733023       4.452756       5.850203
25 |    O       4.600039       9.254314       6.575944
26 |    O       3.665373       6.210561       3.158420
27 |    O       3.371648       6.925594       7.476036
28 |    O       5.287920       3.270653       6.155080
29 |    O       5.225237       6.959594       9.582991
30 |    O       0.846293       5.595877       3.820630
31 |    O       9.785620       8.164617       3.657879
32 |    O       8.509982       4.430362       2.679946
33 |    O       1.337625       8.580920       8.272484
34 |    O       8.054437       9.221335       1.991376
35 |    H       1.762019       9.820429       5.528454
36 |    H       3.095987       9.107088       5.588186
37 |    H       0.554129       2.982634       8.082024
38 |    H       1.771257       2.954779       7.182181
39 |    H       2.112148       6.126321       0.798136
40 |    H       1.776389       7.463264       1.424030
41 |    H       3.754249       3.824017       1.349436
42 |    H       3.010580       4.524142       2.466878
43 |    H       0.939475       5.243834       6.571945
44 |    H       0.515723       6.520548       5.877445
45 |    H       9.852960       6.490366       0.393593
46 |    H       8.556008       6.860063      -0.294256
47 |    H       7.886607       7.941321       6.234506
48 |    H       7.793855       9.141028       5.315813
49 |    H       4.467366       9.971162       0.219851
50 |    H       5.758685      10.102795       0.998994
51 |    H       6.652693       7.917443       3.036562
52 |    H       6.711966       7.743594       4.539279
53 |    H       7.751955       8.745180      10.150905
54 |    H       7.829208       9.092212       8.679343
55 |    H       8.312540       3.218330       6.528858
56 |    H       8.508855       4.680699       6.189990
57 |    H       9.742249       1.704975       1.922581
58 |    H       8.799060       2.876412       2.095861
59 |    H       9.505360       1.161677       6.701213
60 |    H       9.920117      -0.219794       7.161006
61 |    H       4.749903       4.186003      -0.758595
62 |    H       5.248010       5.018415       0.403676
63 |    H       3.576065       9.078451       2.026264
64 |    H       2.720238       9.146974       3.273164
65 |    H       9.085561       4.493058       9.031660
66 |    H       9.215391       3.166305       9.749133
67 |    H       1.999705       2.060411       1.927796
68 |    H       1.824184       0.564565       2.081195
69 |    H       7.430334       0.849764       7.438978
70 |    H       6.576029       1.537017       8.482885
71 |    H       2.415851       1.576460       8.987338
72 |    H       2.276957       0.099537       9.289499
73 |    H       1.160987       1.818023       4.140602
74 |    H       0.350256       2.874437       4.860741
75 |    H       5.768804       2.638450       0.375264
76 |    H       7.221823       2.257514       0.563730
77 |    H       3.260797       5.243390       5.962382
78 |    H       3.347848       3.732214       5.988196
79 |    H       5.328688       9.073059       5.982269
80 |    H       5.007063       9.672150       7.334875
81 |    H       4.566850       6.413356       3.408312
82 |    H       3.273115       7.061666       2.963521
83 |    H       3.878372       7.435003       6.843607
84 |    H       3.884673       6.966316       8.283117
85 |    H       5.918240       3.116802       5.451335
86 |    H       5.355924       2.495093       6.711958
87 |    H       5.071858       7.687254      10.185667
88 |    H       6.106394       7.112302       9.241707
89 |    H       1.637363       5.184910       4.169264
90 |    H       0.427645       4.908936       3.301903
91 |    H       9.971698       7.227076       3.709104
92 |    H      10.647901       8.579244       3.629806
93 |    H       8.046808       5.126383       2.213838
94 |    H       7.995317       4.290074       3.474723
95 |    H       1.872601       7.864672       7.930401
96 |    H       0.837635       8.186808       8.987268
97 |    H       8.314696      10.115534       2.212519
98 |    H       8.687134       8.667252       2.448452
99 | 


--------------------------------------------------------------------------------
/examples/data/vinyl_alcohol.xyz:
--------------------------------------------------------------------------------
 1 | 7
 2 | Properties=species:S:1:pos:R:3
 3 | O	1.041371715	-0.216863172	0.001603252
 4 | C	-0.098316254	0.512294574	-0.01021628
 5 | C	-1.225162144	-0.248210652	0.020868361
 6 | H	-0.087363805	1.596485281	-0.07557041
 7 | H	0.61765221	-1.094559605	-0.02702971
 8 | H	-2.216985293	0.211688229	-0.00469380
 9 | H	-1.115257687	-1.357478425	-0.04507284
10 | 


--------------------------------------------------------------------------------
/examples/h2_static_dynamic.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ase.units import _c, second
 3 | 
 4 | import psiflow
 5 | from psiflow.free_energy import compute_frequencies, compute_harmonic
 6 | from psiflow.geometry import Geometry
 7 | from psiflow.hamiltonians import MACEHamiltonian
 8 | from psiflow.sampling import Walker, optimize, sample
 9 | 
10 | 
11 | def frequency_dynamic(start, hamiltonian):
12 |     walker = Walker(
13 |         start,
14 |         hamiltonian=hamiltonian,
15 |         temperature=None,  # NVE!
16 |         timestep=0.25,
17 |     )
18 | 
19 |     step = 10
20 |     output = sample(
21 |         [walker],
22 |         steps=20,
23 |         step=step,
24 |         max_force=10,
25 |     )[0]
26 |     positions = output.trajectory.get("positions").result()
27 |     distances = np.linalg.norm(positions[:, 0, :] - positions[:, 1, :], axis=1)
28 |     distances -= np.mean(distances)  # don't need average interatomic distance
29 | 
30 |     timestep = walker.timestep * 1e-15 * step
31 |     spectrum = np.abs(np.fft.fft(distances))
32 | 
33 |     freq_axis = np.fft.fftfreq(len(distances), timestep)
34 |     index = np.argmax(spectrum[np.where(freq_axis > 0)])
35 |     peak_frequency = freq_axis[np.where(freq_axis > 0)][index]
36 | 
37 |     return peak_frequency / (100 * _c)
38 | 
39 | 
40 | def frequency_static(start, hamiltonian):
41 |     minimum = optimize(
42 |         start,
43 |         hamiltonian,
44 |         2000,
45 |         ftol=1e-4,
46 |     )
47 |     hessian = compute_harmonic(
48 |         minimum,
49 |         hamiltonian,
50 |         asr="crystal",
51 |         pos_shift=0.001,
52 |     )
53 |     frequencies = compute_frequencies(hessian, minimum).result()
54 |     return frequencies[-1] * second / (100 * _c)
55 | 
56 | 
57 | def main():
58 |     geometry = Geometry.from_data(
59 |         numbers=np.ones(2),
60 |         positions=np.array([[0, 0, 0], [0.8, 0, 0]]),
61 |         cell=None,
62 |     )
63 |     mace = MACEHamiltonian.mace_mp0()
64 | 
65 |     dynamic = frequency_dynamic(geometry, mace)
66 |     static = frequency_static(geometry, mace)
67 | 
68 |     print("H2 frequency (dynamic) [inv(cm)]: {}".format(dynamic))
69 |     print("H2 frequency (static)  [inv(cm)]: {}".format(static))
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     with psiflow.load():
74 |         main()
75 | 


--------------------------------------------------------------------------------
/examples/iron_bulk_modulus.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ase.build import bulk, make_supercell
 3 | 
 4 | import psiflow
 5 | from psiflow.geometry import Geometry
 6 | from psiflow.hamiltonians import MACEHamiltonian
 7 | from psiflow.sampling import Walker, sample
 8 | 
 9 | 
10 | def main():
11 |     iron = bulk("Fe", "bcc", a=2.8)
12 |     geometry = Geometry.from_atoms(make_supercell(iron, 3 * np.eye(3)))
13 |     mace = MACEHamiltonian.mace_mp0()
14 | 
15 |     pressures = (-10 + np.arange(5) * 5) * 1e3  # in MPa
16 |     walkers = [Walker(geometry, mace, temperature=300, pressure=p) for p in pressures]
17 | 
18 |     name = "volume{angstrom3}"
19 |     outputs = sample(walkers, steps=4000, step=50, observables=[name])
20 |     volumes = [np.mean(o[name].result()) for o in outputs]
21 | 
22 |     p = np.polyfit(volumes, pressures, deg=1)
23 |     volume0 = (-1.0) * p[1] / p[0]
24 |     bulk_modulus = (-1.0) * volume0 * p[0] / 1000  # in GPa
25 |     print("bulk modulus [GPa]: {}".format(bulk_modulus))
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     with psiflow.load():
30 |         main()
31 | 


--------------------------------------------------------------------------------
/examples/iron_harmonic_fcc_bcc.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ase.build import bulk, make_supercell
 3 | from ase.units import kB
 4 | 
 5 | import psiflow
 6 | from psiflow.data import Dataset
 7 | from psiflow.free_energy import Integration, compute_harmonic, harmonic_free_energy
 8 | from psiflow.geometry import Geometry
 9 | from psiflow.hamiltonians import Harmonic, MACEHamiltonian
10 | from psiflow.sampling import optimize
11 | 
12 | 
13 | def main():
14 |     iron = bulk("Fe", "bcc", a=2.87, orthorhombic=True)
15 |     bcc = Geometry.from_atoms(make_supercell(iron, 3 * np.eye(3)))
16 |     iron = bulk("Fe", "fcc", a=3.57, orthorhombic=True)
17 |     fcc = Geometry.from_atoms(make_supercell(iron, 3 * np.eye(3)))
18 | 
19 |     geometries = {
20 |         "bcc": bcc,
21 |         "fcc": fcc,
22 |     }
23 |     theoretical = {name: None for name in geometries}
24 |     simulated = {name: None for name in geometries}
25 | 
26 |     mace = MACEHamiltonian.mace_mp0("small")
27 |     scaling = 0.9
28 |     temperature = 800
29 |     beta = 1 / (kB * temperature)
30 | 
31 |     for name in geometries:
32 |         minimum = optimize(
33 |             geometries[name], mace, ftol=1e-4, steps=1000, mode="bfgstrm"
34 |         )
35 |         hessian = compute_harmonic(minimum, mace, pos_shift=0.001)
36 | 
37 |         # simulate
38 |         harmonic = Harmonic(minimum, hessian)
39 |         integration = Integration(
40 |             harmonic,
41 |             temperatures=[temperature],
42 |             delta_hamiltonian=(scaling - 1) * harmonic,
43 |             delta_coefficients=np.linspace(0, 1, num=4, endpoint=True),
44 |         )
45 |         walkers = integration.create_walkers(  # noqa: F841
46 |             Dataset([harmonic.reference_geometry]),
47 |             timestep=3,
48 |         )  # heavy atoms
49 |         integration.sample(steps=500, step=10, start=300)
50 |         integration.compute_gradients()
51 | 
52 |         reduced_f = integration.along_delta(temperature=temperature).result()
53 |         f_harmonic = harmonic_free_energy(
54 |             hessian,
55 |             temperature=temperature,
56 |             quantum=False,
57 |         )
58 |         simulated[name] = (f_harmonic.result() + reduced_f[-1]) / beta
59 | 
60 |         # theoretical
61 |         f_harmonic_scaled = harmonic_free_energy(
62 |             scaling * hessian.result(),
63 |             temperature=temperature,
64 |             quantum=False,
65 |         )
66 |         theoretical[name] = f_harmonic_scaled.result() / beta
67 | 
68 |     ddF = theoretical["bcc"] - theoretical["fcc"]
69 |     print("theoretical delta(delta(F)) [eV]: {}".format(ddF))
70 | 
71 |     ddF = simulated["bcc"] - simulated["fcc"]
72 |     print("  simulated delta(delta(F)) [eV]: {}".format(ddF))
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     with psiflow.load():
77 |         main()
78 | 


--------------------------------------------------------------------------------
/examples/online_learning_pimd.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import psiflow
 4 | from psiflow.reference import CP2K
 5 | from psiflow.data import Dataset
 6 | from psiflow.sampling import Walker
 7 | from psiflow.models import MACE
 8 | from psiflow.hamiltonians import MACEHamiltonian
 9 | from psiflow.learning import Learning
10 | 
11 | 
12 | def main():
13 |     path_output = Path.cwd() / 'output'
14 | 
15 |     with open('data/cp2k_input.txt', 'r') as f: cp2k_input = f.read()
16 |     cp2k = CP2K(cp2k_input)
17 | 
18 |     model = MACE(
19 |         batch_size=4,
20 |         lr=0.02,
21 |         max_ell=3,
22 |         r_max=6.5,
23 |         energy_weight=100,
24 |         correlation=3,
25 |         max_L=1,
26 |         num_channels=24,
27 |         patience=8,
28 |         scheduler_patience=4,
29 |         max_num_epochs=200,
30 |     )
31 |     model.add_atomic_energy('H', cp2k.compute_atomic_energy('H', box_size=9))
32 |     model.add_atomic_energy('O', cp2k.compute_atomic_energy('O', box_size=9))
33 | 
34 |     state = Dataset.load('data/water_train.xyz')[0]
35 |     walkers = (
36 |         Walker(state, temperature=300, pressure=0.1).multiply(40) +
37 |         Walker(state, temperature=450, pressure=0.1).multiply(40) +
38 |         Walker(state, temperature=600, pressure=0.1).multiply(40)
39 |     )
40 |     learning = Learning(
41 |         cp2k,
42 |         path_output,
43 |         wandb_project='psiflow_examples',
44 |         wandb_group='water_learning_pimd',
45 |     )
46 | 
47 |     model, walkers = learning.passive_learning(
48 |         model,
49 |         walkers,
50 |         hamiltonian=MACEHamiltonian.mace_mp0(),
51 |         steps=10000,
52 |         step=2000,
53 |     )
54 | 
55 |     for i in range(3):
56 |         model, walkers = learning.active_learning(
57 |             model,
58 |             walkers,
59 |             steps=2000,
60 |         )
61 | 
62 |     # PIMD phase for low-temperature walkers
63 |     for j, walker in enumerate(walkers[:40]):
64 |         walker.nbeads = 32
65 |     model, walkers = learning.active_learning(
66 |         model,
67 |         walkers,
68 |         steps=500,
69 |     )
70 | 
71 | 
72 | if __name__ == '__main__':
73 |     with psiflow.load():
74 |         main()
75 | 


--------------------------------------------------------------------------------
/examples/proton_jump_plumed.py:
--------------------------------------------------------------------------------
 1 | from ase.units import kJ, mol
 2 | import numpy as np
 3 | 
 4 | import psiflow
 5 | from psiflow.data import Dataset
 6 | from psiflow.geometry import Geometry
 7 | from psiflow.hamiltonians import PlumedHamiltonian, MACEHamiltonian
 8 | from psiflow.sampling import Walker, sample, quench, Metadynamics, replica_exchange
 9 | 
10 | 
11 | PLUMED_INPUT = """UNITS LENGTH=A ENERGY=kj/mol
12 | d_C: DISTANCE ATOMS=3,5
13 | d_O: DISTANCE ATOMS=1,5
14 | CV: COMBINE ARG=d_C,d_O COEFFICIENTS=1,-1 PERIODIC=NO
15 | 
16 | """
17 | 
18 | 
19 | def get_bias(kappa: float, center: float):
20 |     plumed_str = PLUMED_INPUT
21 |     plumed_str += '\n'
22 |     plumed_str += 'RESTRAINT ARG=CV KAPPA={} AT={}\n'.format(kappa, center)
23 |     return PlumedHamiltonian(plumed_str)
24 | 
25 | 
26 | def main():
27 |     aldehyd = Geometry.load('data/acetaldehyde.xyz')
28 |     alcohol = Geometry.load('data/vinyl_alcohol.xyz')
29 | 
30 |     mace = MACEHamiltonian.mace_cc()
31 |     energy = mace.compute([aldehyd, alcohol], 'energy').result()
32 |     energy = (energy - np.min(energy)) / (kJ / mol)
33 |     print('E_vinyl - E_aldehyde = {:7.3f} kJ/mol'.format(energy[1] - energy[0]))
34 | 
35 |     # generate initial structures using metadynamics
36 |     plumed_str = PLUMED_INPUT
37 |     plumed_str += 'METAD ARG=CV PACE=10 SIGMA=0.1 HEIGHT=5\n'
38 |     metadynamics = Metadynamics(plumed_str)
39 | 
40 |     # create 40 identical walkers
41 |     walker = Walker(
42 |         alcohol,
43 |         hamiltonian=mace,
44 |         temperature=300,
45 |         metadynamics=metadynamics,
46 |     )
47 | 
48 |     # do MTD and create large dataset from all trajectories
49 |     outputs = sample([walker], steps=8000, step=50)
50 |     data_mtd = sum([o.trajectory for o in outputs], start=Dataset([]))
51 |     data_mtd.save('mtd.xyz')
52 | 
53 |     # initialize walkers for umbrella sampling
54 |     walkers = []
55 |     for i, center in enumerate(np.linspace(1, 3, num=16)):
56 |         bias = get_bias(kappa=1500, center=center)
57 |         hamiltonian = mace + bias
58 |         walker = Walker(alcohol, hamiltonian=hamiltonian, temperature=300)
59 |         walkers.append(walker)
60 |     quench(walkers, data_mtd)  # make sure initial structure is reasonable
61 |     replica_exchange(walkers, trial_frequency=100)  # use REX for improved sampling
62 | 
63 |     outputs = sample(walkers, steps=1000, step=10)
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     with psiflow.load() as f:
68 |         main()
69 | 


--------------------------------------------------------------------------------
/examples/submit/hortense.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | parsl_log_level: DEBUG
 3 | container_engine: 'apptainer'
 4 | container_uri: 'oras://ghcr.io/molmod/psiflow:main_cu118'
 5 | default_threads: 8
 6 | ModelEvaluation:
 7 |   cores_per_worker: 12
 8 |   gpu: True
 9 |   max_simulation_time: 20
10 |   env_vars:
11 |     KMP_BLOCKTIME: "1"
12 |   slurm:
13 |     partition: "gpu_rome_a100"
14 |     account: "2023_070"
15 |     nodes_per_block: 1
16 |     cores_per_node: 48
17 |     max_blocks: 1
18 |     walltime: "12:00:00"
19 |     scheduler_options: "#SBATCH --clusters=dodrio\n#SBATCH --gpus=4\n"
20 | ModelTraining:
21 |   cores_per_worker: 12
22 |   gpu: true
23 |   max_training_time: 40
24 |   env_vars:
25 |     OMP_PROC_BIND: "spread"
26 |   slurm:
27 |     partition: "gpu_rome_a100"
28 |     account: "2023_070"
29 |     nodes_per_block: 1
30 |     cores_per_node: 12
31 |     max_blocks: 1
32 |     walltime: "12:00:00"
33 |     scheduler_options: "#SBATCH --clusters=dodrio\n#SBATCH --gpus=1\n"
34 | CP2K:
35 |   cores_per_worker: 64
36 |   max_evaluation_time: 30
37 |   launch_command: 'apptainer exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -np 32 -bind-to core cp2k.psmp'
38 |   slurm:
39 |     partition: "cpu_rome"
40 |     account: "2024_079"
41 |     nodes_per_block: 1
42 |     cores_per_node: 64
43 |     max_blocks: 2
44 |     walltime: "06:00:00"
45 |     scheduler_options: "#SBATCH --clusters=dodrio\n"
46 | ...
47 | 


--------------------------------------------------------------------------------
/examples/submit/lumi.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | parsl_log_level: DEBUG
 3 | container_engine: 'singularity'
 4 | container_uri: 'oras://ghcr.io/molmod/psiflow:main_rocm5.6'
 5 | default_threads: 8
 6 | CP2K:
 7 |   cores_per_worker: 32
 8 |   max_evaluation_time: 20
 9 |   launch_command: 'singularity exec -e --no-init oras://ghcr.io/molmod/cp2k:2024.1 /opt/entry.sh mpirun -np 32 cp2k.psmp'
10 |   slurm:
11 |     partition: "standard"
12 |     account: "project_465001125"
13 |     nodes_per_block: 1
14 |     cores_per_node: 128
15 |     max_blocks: 10
16 |     walltime: "01:00:00"
17 | ModelEvaluation:
18 |   cores_per_worker: 7
19 |   gpu: True
20 |   slurm:
21 |     partition: "standard-g"
22 |     account: "project_465001125"
23 |     nodes_per_block: 1
24 |     cores_per_node: 56
25 |     max_blocks: 5
26 |     walltime: "01:00:00"
27 |     scheduler_options: "#SBATCH --gres=gpu:8\n"
28 |     worker_init: "ml LUMI/23.09 && ml partition/G && ml rocm/5.6\n"
29 | ModelTraining:
30 |   cores_per_worker: 7
31 |   gpu: true
32 |   multigpu: true
33 |   slurm:
34 |     partition: "standard-g"
35 |     account: "project_465001125"
36 |     nodes_per_block: 1
37 |     cores_per_node: 56
38 |     walltime: "01:00:00"
39 |     scheduler_options: "#SBATCH --gres=gpu:8\n"
40 |     worker_init: "ml LUMI/23.09 && ml partition/G && ml rocm/5.6\n"
41 | ...
42 | 


--------------------------------------------------------------------------------
/examples/submit/submit_hortense.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # List of filenames
 4 | files=(
 5 | 	"h2_static_dynamic.py"
 6 | 	"iron_bulk_modulus.py"
 7 | 	"iron_harmonic_fcc_bcc.py"
 8 | 	"water_cp2k_noise.py"
 9 |     "water_path_integral_md.py"
10 |     "water_train_validate.py"
11 |     "alanine_replica_exchange.py"
12 | )
13 | 
14 | curl -O https://raw.githubusercontent.com/molmod/psiflow/main/examples/hortense.yaml
15 | 
16 | run_dir=$(pwd)/run_examples
17 | mkdir $run_dir && cp hortense.yaml $run_dir && cd $run_dir
18 | 
19 | # Loop over each filename
20 | for filename in "${files[@]}"
21 | do
22 |     name="${filename%.*}"
23 |     mkdir $name
24 |     cp hortense.yaml $name
25 | 
26 |     cat > $name/job.sh <<EOF
27 | #!/bin/bash
28 | #
29 | #SBATCH -p cpu_rome
30 | #SBATCH --account=2024_079
31 | #SBATCH --time=01:00:00
32 | #SBATCH --nodes=1
33 | #SBATCH --job-name=$name
34 | #SBATCH --ntasks-per-node=1
35 | #SBATCH --cpus-per-task=4
36 | #SBATCH -e error.txt
37 | #SBATCH -o output.txt
38 | 
39 | 
40 | curl -LJO https://github.com/molmod/psiflow/archive/main.zip
41 | unzip -j psiflow-main.zip "psiflow-main/examples/data/*" -d data
42 | 
43 | curl -O https://raw.githubusercontent.com/molmod/psiflow/main/examples/$filename
44 | unset SBATCH_PARTITION
45 | python $filename hortense.yaml
46 | EOF
47 | 
48 |     cd $name
49 |     sbatch job.sh
50 |     cd $run_dir
51 | 
52 | done
53 | 


--------------------------------------------------------------------------------
/examples/submit/submit_lumi.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # List of filenames
 4 | files=(
 5 | 	"h2_static_dynamic.py"
 6 | 	"iron_bulk_modulus.py"
 7 | 	"iron_harmonic_fcc_bcc.py"
 8 | 	"water_cp2k_noise.py"
 9 |     "water_path_integral_md.py"
10 |     "water_train_validate.py"
11 |     "water_online_learning.py"
12 |     "proton_jump_plumed.py"
13 |     "alanine_replica_exchange.py"
14 | )
15 | 
16 | curl -O https://raw.githubusercontent.com/molmod/psiflow/main/examples/submit/lumi.yaml
17 | 
18 | run_dir=$(pwd)/run_examples
19 | mkdir $run_dir && cp lumi.yaml $run_dir && cd $run_dir
20 | 
21 | # Loop over each filename
22 | for filename in "${files[@]}"
23 | do
24 |     name="${filename%.*}"
25 |     mkdir $name
26 |     cp lumi.yaml $name
27 | 
28 |     cat > $name/job.sh <<EOF
29 | #!/bin/bash
30 | #
31 | #SBATCH -p small
32 | #SBATCH --account=project_465001125
33 | #SBATCH --time=01:00:00
34 | #SBATCH --nodes=1
35 | #SBATCH --job-name=$name
36 | #SBATCH --ntasks-per-node=1
37 | #SBATCH --cpus-per-task=4
38 | #SBATCH -e error.txt
39 | #SBATCH -o output.txt
40 | 
41 | 
42 | curl -LJO https://github.com/molmod/psiflow/archive/main.zip
43 | unzip -j psiflow-main.zip "psiflow-main/examples/data/*" -d data
44 | 
45 | curl -O https://raw.githubusercontent.com/molmod/psiflow/main/examples/$filename
46 | python $filename lumi.yaml
47 | EOF
48 | 
49 |     cd $name
50 |     sbatch job.sh
51 |     cd $run_dir
52 | 
53 | done
54 | 


--------------------------------------------------------------------------------
/examples/water_cp2k_noise.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | import numpy as np
 4 | 
 5 | import psiflow
 6 | from psiflow.data import Dataset
 7 | from psiflow.geometry import Geometry
 8 | from psiflow.reference import CP2K, evaluate
 9 | 
10 | 
11 | def main():
12 |     geometry = Geometry.from_string(
13 |         """
14 |     18
15 | Lattice="7 0 0 0 7 0 0 0 7"
16 | O          7.5344  -47.1952    0.4765
17 | H          8.4160  -47.2312    0.0120
18 | H          7.2660  -46.2693    0.4279
19 | O          8.1630  -50.9274    0.5473
20 | H          8.2441  -50.4060    1.3688
21 | H          7.3554  -50.5591    0.1389
22 | O          9.8927  -47.4450   -0.7542
23 | H         10.6734  -47.2098   -0.2369
24 | H         10.0133  -48.4102   -0.9487
25 | O         10.1564  -50.1025   -1.0813
26 | H         10.0613  -50.5472   -1.9320
27 | H          9.4474  -50.4883   -0.5041
28 | O          8.2958  -48.9198    2.5834
29 | H          7.7974  -48.8686    3.4078
30 | H         7.9420  -48.2014    2.0231
31 | O          6.1151  -49.3715   -0.6444
32 | H         6.4601  -48.5223   -0.3019
33 | H         5.1679  -49.3538   -0.4625
34 | """
35 |     )
36 |     with open("data/cp2k_input.txt", "r") as f:
37 |         cp2k_input = f.read()
38 |     cp2k = CP2K(cp2k_input)
39 | 
40 |     delta_x = 0.001
41 |     states = []
42 |     for i in range(50):
43 |         g = copy.deepcopy(geometry)
44 |         g.per_atom.positions += i * delta_x
45 |         states.append(evaluate(g, cp2k))
46 |     data = Dataset(states)
47 |     energy = data.get("per_atom_energy").result()
48 |     forces = data.get("forces").result()
49 | 
50 |     e_avg = np.mean(energy)
51 |     f_avg = np.mean(forces, axis=0, keepdims=True)
52 | 
53 |     e_rmse = np.sqrt(np.mean((energy - e_avg) ** 2))
54 |     f_rmse = np.sqrt(np.mean((forces - f_avg) ** 2))
55 | 
56 |     print("RMSE(energy) [meV/atom]: {}".format(e_rmse * 1000))
57 |     print("RMSE(forces) [meV/angstrom]: {}".format(f_rmse * 1000))
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     with psiflow.load():
62 |         main()
63 | 


--------------------------------------------------------------------------------
/examples/water_online_learning.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import psiflow
 4 | from psiflow.reference import CP2K
 5 | from psiflow.data import Dataset
 6 | from psiflow.sampling import Walker
 7 | from psiflow.models import MACE
 8 | from psiflow.hamiltonians import MACEHamiltonian
 9 | from psiflow.learning import Learning
10 | 
11 | 
12 | def main():
13 |     path_output = Path.cwd() / 'output'
14 | 
15 |     with open('data/cp2k_input.txt', 'r') as f: cp2k_input = f.read()
16 |     cp2k = CP2K(cp2k_input)
17 | 
18 |     model = MACE(
19 |         batch_size=4,
20 |         lr=0.02,
21 |         max_ell=3,
22 |         r_max=6.5,
23 |         energy_weight=100,
24 |         correlation=3,
25 |         max_L=1,
26 |         num_channels=24,
27 |         patience=8,
28 |         scheduler_patience=4,
29 |         max_num_epochs=200,
30 |     )
31 |     model.add_atomic_energy('H',  cp2k.compute_atomic_energy('H', box_size=9))
32 |     model.add_atomic_energy('O',  cp2k.compute_atomic_energy('O', box_size=9))
33 | 
34 |     state = Dataset.load('data/water_train.xyz')[0]
35 |     walkers = (
36 |         Walker(state, temperature=300, pressure=0.1).multiply(40) +
37 |         Walker(state, temperature=450, pressure=0.1).multiply(40) +
38 |         Walker(state, temperature=600, pressure=0.1).multiply(40)
39 |     )
40 |     learning = Learning(
41 |         cp2k,
42 |         path_output,
43 |         wandb_project='psiflow_examples',
44 |         wandb_group='my_water_test',
45 |     )
46 | 
47 |     model, walkers = learning.passive_learning(
48 |         model,
49 |         walkers,
50 |         hamiltonian=MACEHamiltonian.mace_mp0(),
51 |         steps=10000,
52 |         step=2000,
53 |     )
54 | 
55 |     for i in range(3):
56 |         model, walkers = learning.active_learning(
57 |             model,
58 |             walkers,
59 |             steps=500,
60 |         )
61 | 
62 |     # PIMD phase for low-temperature walkers
63 |     for j, walker in enumerate(walkers[:40]):
64 |         walker.nbeads = 8
65 |     model, walkers = learning.active_learning(
66 |         model,
67 |         walkers,
68 |         steps=1000,
69 |     )
70 | 
71 | 
72 | if __name__ == '__main__':
73 |     with psiflow.load():
74 |         main()
75 | 


--------------------------------------------------------------------------------
/examples/water_path_integral_md.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ase.neighborlist import primitive_neighbor_list
 3 | 
 4 | import psiflow
 5 | from psiflow.data import Dataset
 6 | from psiflow.hamiltonians import MACEHamiltonian
 7 | from psiflow.sampling import Walker, sample
 8 | 
 9 | 
10 | def get_OH_distances(geometries):
11 |     distances = []
12 |     for geometry in geometries:
13 |         d = primitive_neighbor_list(
14 |             "d",
15 |             3 * [True],
16 |             geometry.cell,
17 |             geometry.per_atom.positions,
18 |             cutoff={("O", "H"): 1.5},
19 |             numbers=geometry.per_atom.numbers,
20 |         )
21 |         distances.append(d)
22 |     min_n_distances = min([len(d) for d in distances])
23 |     return np.array([d[:min_n_distances] for d in distances]).flatten()
24 | 
25 | 
26 | def main():
27 |     geometry = Dataset.load("data/h2o_32.xyz")[0]
28 |     mace = MACEHamiltonian.mace_mp0()
29 | 
30 |     trajectories = []
31 |     for i in range(6):
32 |         walker = Walker(
33 |             geometry,
34 |             mace,
35 |             nbeads=2**i,
36 |             temperature=300,
37 |         )
38 |         outputs = sample([walker], steps=2000, step=20)
39 |         trajectories.append(outputs[0].trajectory)
40 | 
41 |     for i, trajectory in enumerate(trajectories):
42 |         distances = get_OH_distances(trajectory.geometries().result())
43 |         nbeads = 2**i
44 |         std = np.std(distances)
45 |         print("nbeads = {} --> std(O-H) = {} A".format(nbeads, std))
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     with psiflow.load():
50 |         main()
51 | 


--------------------------------------------------------------------------------
/examples/water_train_validate.py:
--------------------------------------------------------------------------------
 1 | import psiflow
 2 | from psiflow.data import Dataset, compute_rmse
 3 | from psiflow.models import MACE
 4 | 
 5 | 
 6 | def main():
 7 |     data = Dataset.load("data/water_train.xyz")
 8 |     model = MACE(
 9 |         batch_size=2,
10 |         lr=0.02,
11 |         max_ell=3,
12 |         r_max=5.5,
13 |         energy_weight=100,
14 |         correlation=3,
15 |         max_L=1,
16 |         num_channels=16,
17 |         max_num_epochs=20,
18 |         swa=False,
19 |     )
20 | 
21 |     train, valid = data.split(0.9, shuffle=True)
22 |     model.initialize(train)
23 |     model.train(train, valid)
24 |     hamiltonian = model.create_hamiltonian()
25 | 
26 |     target_e = data.get("per_atom_energy")
27 |     target_f = data.get("forces")
28 | 
29 |     data_predicted = data.evaluate(hamiltonian)
30 |     predict_e = data_predicted.get("per_atom_energy")
31 |     predict_f = data_predicted.get("forces")
32 | 
33 |     e_rmse = compute_rmse(target_e, predict_e)
34 |     f_rmse = compute_rmse(target_f, predict_f)
35 | 
36 |     print("RMSE(energy) [meV/atom]: {}".format(e_rmse.result() * 1000))
37 |     print("RMSE(forces) [meV/angstrom]: {}".format(f_rmse.result() * 1000))
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     with psiflow.load():
42 |         main()
43 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: psiflow
 2 | theme:
 3 |   favicon: icon.svg
 4 |   name:
 5 |     material
 6 |     #font:
 7 |     #  text: overpass
 8 |   palette:
 9 |     primary: teal
10 |     accent: yellow
11 |     scheme: default
12 |   logo: icon.svg
13 |   features:
14 |     - content.code.copy
15 |     - navigation.instant
16 |     - navigation.tracking
17 |       #- navigation.tabs
18 |       #- navigation.tabs.sticky
19 |     - navigation.indexes
20 |     - navigation.sections
21 |     - navigation.expand
22 |     - toc.integrate
23 |     - toc.follow
24 | nav:
25 |   - overview: index.md
26 |   - atomic geometries: data.md
27 |   - hamiltonians: hamiltonian.md
28 |   - sampling: sampling.md
29 |   - QM calculations: reference.md
30 |   - ML potentials: models.md
31 |   - online learning: learning.md
32 |   - free energy calculations: free_energy.md
33 |   - setup & configuration: configuration.md
34 | 
35 | plugins:
36 |   - mkdocstrings:
37 |       python:
38 |         docstring_style: google
39 | repo_url: https://github.com/molmod/psiflow
40 | markdown_extensions:
41 |   - tables
42 |   - md_in_html
43 |   - admonition
44 |   - footnotes
45 |   - pymdownx.highlight:
46 |       anchor_linenums: true
47 |   - pymdownx.inlinehilite
48 |   - pymdownx.snippets
49 |   - pymdownx.superfences
50 |   - pymdownx.details
51 |   - pymdownx.critic
52 |   - pymdownx.caret
53 |   - pymdownx.keys
54 |   - pymdownx.mark
55 |   - pymdownx.tilde
56 |   - pymdownx.arithmatex:
57 |       generic: true
58 |   - attr_list
59 |   - pymdownx.emoji:
60 |       emoji_index: !!python/name:materialx.emoji.twemoji
61 |       emoji_generator: !!python/name:materialx.emoji.to_svg
62 | 
63 | #extra_javascript:
64 | #  - javascripts/mathjax.js
65 | #  - https://polyfill.io/v3/polyfill.min.js?features=es6
66 | #  - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js
67 | #
68 | extra_javascript:
69 |   - javascripts/mathjax.js
70 |   - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js
71 | 


--------------------------------------------------------------------------------
/psiflow/__init__.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import typeguard
 4 | 
 5 | from .config import setup_slurm_config  # noqa: F401
 6 | from .execution import ExecutionContextLoader
 7 | from .serialization import (  # noqa: F401
 8 |     _DataFuture,
 9 |     deserialize,
10 |     serializable,
11 |     serialize,
12 | )
13 | 
14 | 
15 | @typeguard.typechecked
16 | def resolve_and_check(path: Path) -> Path:
17 |     path = path.resolve()
18 |     if Path.cwd() in path.parents:
19 |         pass
20 |     elif path.exists() and Path.cwd().samefile(path):
21 |         pass
22 |     else:
23 |         raise ValueError(
24 |             "requested file and/or path at location: {}"
25 |             "\nwhich is not in the present working directory: {}"
26 |             "\npsiflow can only load and/or save in its present "
27 |             "working directory because this is the only directory"
28 |             " that will get bound into the container.".format(path, Path.cwd())
29 |         )
30 |     return path
31 | 
32 | 
33 | load = ExecutionContextLoader.load
34 | context = ExecutionContextLoader.context
35 | wait = ExecutionContextLoader.wait
36 | 


--------------------------------------------------------------------------------
/psiflow/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import Computable, Dataset, aggregate_multiple, compute  # noqa: F401
2 | from .utils import compute_mae, compute_rmse  # noqa: F401
3 | 


--------------------------------------------------------------------------------
/psiflow/free_energy/__init__.py:
--------------------------------------------------------------------------------
1 | from .integration import Integration  # noqa: F401
2 | from .phonons import (  # noqa: F401
3 |     compute_frequencies,
4 |     compute_harmonic,
5 |     harmonic_free_energy,
6 | )
7 | 


--------------------------------------------------------------------------------
/psiflow/free_energy/integration.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations  # necessary for type-guarding class methods
  2 | 
  3 | from typing import Optional, Union
  4 | 
  5 | import numpy as np
  6 | import typeguard
  7 | from ase.units import bar, kB
  8 | from parsl.app.app import python_app
  9 | 
 10 | from psiflow.data import Dataset
 11 | from psiflow.hamiltonians import Hamiltonian, Zero
 12 | from psiflow.sampling import SimulationOutput, Walker, quench, randomize, sample
 13 | from psiflow.utils.apps import compute_sum, multiply
 14 | 
 15 | length = python_app(len, executors=["default_threads"])
 16 | take_mean = python_app(np.mean, executors=["default_threads"])
 17 | 
 18 | 
 19 | @typeguard.typechecked
 20 | def _integrate(x: np.ndarray, *args: float) -> np.ndarray:
 21 |     import scipy.integrate
 22 | 
 23 |     assert len(args) == len(x)
 24 |     y = np.array(args, dtype=float)
 25 |     return scipy.integrate.cumulative_trapezoid(y, x=x, initial=0.0)
 26 | 
 27 | 
 28 | integrate = python_app(_integrate, executors=["default_threads"])
 29 | 
 30 | 
 31 | @typeguard.typechecked
 32 | class ThermodynamicState:
 33 |     temperature: float
 34 |     natoms: int
 35 |     delta_hamiltonian: Optional[Hamiltonian]
 36 |     pressure: Optional[float]
 37 |     mass: Optional[float]
 38 | 
 39 |     def __init__(
 40 |         self,
 41 |         temperature: float,
 42 |         natoms: int,
 43 |         delta_hamiltonian: Optional[Hamiltonian],
 44 |         pressure: Optional[float],
 45 |         mass: Optional[float],
 46 |     ):
 47 |         self.temperature = temperature
 48 |         self.natoms = natoms
 49 |         self.delta_hamiltonian = delta_hamiltonian
 50 |         self.pressure = pressure
 51 |         self.mass = mass
 52 | 
 53 |         self.gradients = {
 54 |             "temperature": None,
 55 |             "delta": None,
 56 |             "pressure": None,
 57 |             "mass": None,
 58 |         }
 59 | 
 60 |     def gradient(
 61 |         self,
 62 |         output: SimulationOutput,
 63 |         hamiltonian: Optional[Hamiltonian] = None,
 64 |     ):
 65 |         self.temperature_gradient(output, hamiltonian)
 66 |         self.delta_gradient(output)
 67 |         if self.mass is not None:
 68 |             self.mass_gradient(output)
 69 | 
 70 |     def temperature_gradient(
 71 |         self,
 72 |         output: SimulationOutput,
 73 |         hamiltonian: Optional[Hamiltonian] = None,
 74 |     ):
 75 |         energies = output.get_energy(hamiltonian)
 76 |         _energy = take_mean(energies)
 77 |         if self.pressure is not None:  # use enthalpy
 78 |             volumes = output["volume{angstrom3}"]
 79 |             pv = multiply(take_mean(volumes), 10 * bar * self.pressure)
 80 |             _energy = compute_sum(_energy, pv)
 81 | 
 82 |         # grad_u = < - u / kBT**2 >
 83 |         # grad_k = < - E_kin > / kBT**2 >
 84 |         gradient_u = multiply(
 85 |             _energy,
 86 |             (-1.0) / (kB * self.temperature**2),
 87 |         )
 88 |         gradient_k = (-1.0) * (3 * self.natoms - 3) / (2 * self.temperature)
 89 |         self.gradients["temperature"] = compute_sum(gradient_u, gradient_k)
 90 | 
 91 |     def delta_gradient(self, output: SimulationOutput):
 92 |         energies = output.get_energy(self.delta_hamiltonian)
 93 |         self.gradients["delta"] = multiply(
 94 |             take_mean(energies),
 95 |             1 / (kB * self.temperature),
 96 |         )
 97 | 
 98 |     def mass_gradient(output):
 99 |         raise NotImplementedError
100 | 
101 | 
102 | @typeguard.typechecked
103 | class Integration:
104 |     def __init__(
105 |         self,
106 |         hamiltonian: Hamiltonian,
107 |         temperatures: Union[list[float], np.ndarray],
108 |         delta_hamiltonian: Optional[Hamiltonian] = None,
109 |         delta_coefficients: Union[list[float], np.ndarray, None] = None,
110 |         pressure: Optional[float] = None,
111 |     ):
112 |         self.hamiltonian = hamiltonian
113 |         self.temperatures = np.array(temperatures, dtype=float)
114 |         if delta_hamiltonian is not None:
115 |             assert delta_coefficients is not None
116 |             self.delta_hamiltonian = delta_hamiltonian
117 |             self.delta_coefficients = np.array(delta_coefficients, dtype=float)
118 |         else:
119 |             self.delta_coefficients = np.array([0.0])
120 |             self.delta_hamiltonian = Zero()
121 |         self.pressure = pressure
122 | 
123 |         assert len(np.unique(self.temperatures)) == len(self.temperatures)
124 |         assert len(np.unique(self.delta_coefficients)) == len(self.delta_coefficients)
125 | 
126 |         self.states = []
127 |         self.walkers = []
128 |         self.outputs = []
129 | 
130 |     def create_walkers(
131 |         self,
132 |         dataset: Dataset,
133 |         initialize_by: str = "quench",
134 |         **walker_kwargs,
135 |     ) -> list[Walker]:
136 |         natoms = len(dataset[0].result())
137 |         for delta in self.delta_coefficients:
138 |             for T in self.temperatures:
139 |                 hamiltonian = self.hamiltonian + delta * self.delta_hamiltonian
140 |                 walker = Walker(
141 |                     dataset[0],  # do quench later
142 |                     hamiltonian,
143 |                     temperature=T,
144 |                     **walker_kwargs,
145 |                 )
146 |                 self.walkers.append(walker)
147 |                 state = ThermodynamicState(
148 |                     temperature=T,
149 |                     natoms=natoms,
150 |                     delta_hamiltonian=self.delta_hamiltonian,
151 |                     pressure=self.pressure,
152 |                     mass=None,
153 |                 )
154 |                 self.states.append(state)
155 | 
156 |         # initialize walkers
157 |         if initialize_by == "quench":
158 |             quench(self.walkers, dataset)
159 |         elif initialize_by == "shuffle":
160 |             randomize(self.walkers, dataset)
161 |         else:
162 |             raise ValueError("unknown initialization")
163 |         return self.walkers
164 | 
165 |     def sample(self, **sampling_kwargs):
166 |         self.outputs[:] = sample(
167 |             self.walkers,
168 |             **sampling_kwargs,
169 |         )
170 | 
171 |     def compute_gradients(self):
172 |         for output, state in zip(self.outputs, self.states):
173 |             state.gradient(output, hamiltonian=self.hamiltonian)
174 | 
175 |     def along_delta(self, temperature: Optional[float] = None):
176 |         if temperature is None:
177 |             assert self.ntemperatures == 1
178 |             temperature = self.temperatures[0]
179 |         index = np.where(self.temperatures == temperature)[0][0]
180 |         assert self.temperatures[index] == temperature
181 |         N = self.ntemperatures
182 |         states = [self.states[N * i + index] for i in range(self.ndeltas)]
183 | 
184 |         # do integration
185 |         x = self.delta_coefficients
186 |         y = [state.gradients["delta"] for state in states]
187 |         f = integrate(x, *y)
188 |         return f
189 |         # return multiply(f, kB * temperature)
190 | 
191 |     def along_temperature(self, delta_coefficient: Optional[float] = None):
192 |         if delta_coefficient is None:
193 |             assert self.ndeltas == 1
194 |             delta_coefficient = self.delta_coefficients[0]
195 |         index = np.where(self.delta_coefficients == delta_coefficient)[0][0]
196 |         assert self.delta_coefficients[index] == delta_coefficient
197 |         N = self.ntemperatures
198 |         states = [self.states[N * index + i] for i in range(self.ntemperatures)]
199 | 
200 |         # do integration
201 |         x = self.temperatures
202 |         y = [state.gradients["temperature"] for state in states]
203 |         f = integrate(x, *y)
204 |         return f
205 |         # return multiply(f, kB * self.temperatures)
206 | 
207 |     @property
208 |     def ntemperatures(self):
209 |         return len(self.temperatures)
210 | 
211 |     @property
212 |     def ndeltas(self):
213 |         return len(self.delta_coefficients)
214 | 


--------------------------------------------------------------------------------
/psiflow/free_energy/phonons.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations  # necessary for type-guarding class methods
  2 | 
  3 | import xml.etree.ElementTree as ET
  4 | from typing import Optional, Union
  5 | 
  6 | import numpy as np
  7 | import parsl
  8 | import typeguard
  9 | from ase.units import Bohr, Ha, J, _c, _hplanck, _k, kB, second
 10 | from parsl.app.app import bash_app, python_app
 11 | from parsl.dataflow.futures import AppFuture
 12 | 
 13 | import psiflow
 14 | from psiflow.data import Dataset
 15 | from psiflow.geometry import Geometry, mass_weight
 16 | from psiflow.hamiltonians import Hamiltonian, MixtureHamiltonian
 17 | from psiflow.sampling.sampling import (
 18 |     setup_sockets,
 19 |     label_forces,
 20 |     make_force_xml,
 21 |     serialize_mixture,
 22 |     make_start_command,
 23 |     make_client_command
 24 | )
 25 | from psiflow.utils.apps import multiply
 26 | from psiflow.utils.io import load_numpy, save_xml
 27 | from psiflow.utils import TMP_COMMAND, CD_COMMAND
 28 | 
 29 | 
 30 | @typeguard.typechecked
 31 | def _compute_frequencies(hessian: np.ndarray, geometry: Geometry) -> np.ndarray:
 32 |     assert hessian.shape[0] == hessian.shape[1]
 33 |     assert len(geometry) * 3 == hessian.shape[0]
 34 |     return np.sqrt(np.linalg.eigvalsh(mass_weight(hessian, geometry))) / (2 * np.pi)
 35 | 
 36 | 
 37 | compute_frequencies = python_app(_compute_frequencies, executors=["default_threads"])
 38 | 
 39 | 
 40 | @typeguard.typechecked
 41 | def _harmonic_free_energy(
 42 |     frequencies: Union[float, np.ndarray],
 43 |     temperature: float,
 44 |     quantum: bool = False,
 45 |     threshold: float = 1,  # in invcm
 46 | ) -> float:
 47 |     if isinstance(frequencies, float):
 48 |         frequencies = np.array([frequencies], dtype=float)
 49 | 
 50 |     threshold_ = threshold / second * (100 * _c)  # from invcm to ASE
 51 |     frequencies = frequencies[np.abs(frequencies) > threshold_]
 52 | 
 53 |     # _hplanck in J s
 54 |     # _k in J / K
 55 |     if quantum:
 56 |         arg = (-1.0) * _hplanck * frequencies * second / (_k * temperature)
 57 |         F = kB * temperature * np.sum(np.log(1 - np.exp(arg)))
 58 |         F += _hplanck * J * second * np.sum(frequencies) / 2
 59 |     else:
 60 |         constant = kB * temperature * np.log(_hplanck)
 61 |         actual = np.log(frequencies / (kB * temperature))
 62 |         F = len(frequencies) * constant + kB * temperature * np.sum(actual)
 63 |     F /= kB * temperature
 64 |     return F
 65 | 
 66 | 
 67 | harmonic_free_energy = python_app(_harmonic_free_energy, executors=["default_threads"])
 68 | 
 69 | 
 70 | @typeguard.typechecked
 71 | def setup_motion(
 72 |     mode: str,
 73 |     asr: str,
 74 |     pos_shift: float,
 75 |     energy_shift: float,
 76 | ) -> ET.Element:
 77 |     motion = ET.Element("motion", mode="vibrations")
 78 |     vibrations = ET.Element("vibrations", mode="fd")
 79 |     pos = ET.Element("pos_shift")
 80 |     pos.text = " {} ".format(pos_shift)
 81 |     vibrations.append(pos)
 82 |     energy = ET.Element("energy_shift")
 83 |     energy.text = " {} ".format(energy_shift)
 84 |     vibrations.append(energy)
 85 |     prefix = ET.Element("prefix")
 86 |     prefix.text = " output "
 87 |     vibrations.append(prefix)
 88 |     asr_ = ET.Element("asr")
 89 |     asr_.text = " {} ".format(asr)
 90 |     vibrations.append(asr_)
 91 |     motion.append(vibrations)
 92 |     return motion
 93 | 
 94 | 
 95 | def _execute_ipi(
 96 |     hamiltonian_names: list[str],
 97 |     client_args: list[list[str]],
 98 |     command_server: str,
 99 |     command_client: str,
100 |     stdout: str = "",
101 |     stderr: str = "",
102 |     inputs: list = [],
103 |     outputs: list = [],
104 |     parsl_resource_specification: Optional[dict] = None,
105 | ) -> str:
106 |     command_start = make_start_command(command_server, inputs[0], inputs[1])
107 |     commands_client = []
108 |     for i, name in enumerate(hamiltonian_names):
109 |         args = client_args[i]
110 |         assert len(args) == 1  # only have one client per hamiltonian
111 |         for arg in args:
112 |             commands_client += make_client_command(command_client, name, inputs[2 + i], inputs[1], arg),
113 | 
114 |     command_end = f'{command_server} --cleanup'
115 |     command_copy = f'cp i-pi.output_full.hess {outputs[0]}'
116 | 
117 |     command_list = [
118 |         TMP_COMMAND,
119 |         CD_COMMAND,
120 |         command_start,
121 |         *commands_client,
122 |         "wait",
123 |         command_end,
124 |         command_copy,
125 |     ]
126 |     return "\n".join(command_list)
127 | 
128 | 
129 | execute_ipi = bash_app(_execute_ipi, executors=["ModelEvaluation"])
130 | 
131 | 
132 | @typeguard.typechecked
133 | def compute_harmonic(
134 |     state: Union[Geometry, AppFuture],
135 |     hamiltonian: Hamiltonian,
136 |     mode: str = "fd",
137 |     asr: str = "crystal",
138 |     pos_shift: float = 0.01,
139 |     energy_shift: float = 0.00095,
140 | ) -> AppFuture:
141 |     hamiltonian: MixtureHamiltonian = 1 * hamiltonian
142 |     names = label_forces(hamiltonian)
143 |     sockets = setup_sockets(names)
144 |     forces = make_force_xml(hamiltonian, names)
145 | 
146 |     initialize = ET.Element("initialize", nbeads="1")
147 |     start = ET.Element("file", mode="ase", cell_units="angstrom")
148 |     start.text = " start_0.xyz "
149 |     initialize.append(start)
150 |     motion = setup_motion(mode, asr, pos_shift, energy_shift)
151 | 
152 |     system = ET.Element("system")
153 |     system.append(initialize)
154 |     system.append(motion)
155 |     system.append(forces)
156 | 
157 |     # output = setup_output(keep_trajectory)
158 | 
159 |     simulation = ET.Element("simulation", mode="static")
160 |     # simulation.append(output)
161 |     for socket in sockets:
162 |         simulation.append(socket)
163 |     simulation.append(system)
164 |     total_steps = ET.Element("total_steps")
165 |     total_steps.text = " {} ".format(1000000)
166 |     simulation.append(total_steps)
167 | 
168 |     context = psiflow.context()
169 |     definition = context.definitions["ModelEvaluation"]
170 |     input_future = save_xml(
171 |         simulation,
172 |         outputs=[context.new_file("input_", ".xml")],
173 |     ).outputs[0]
174 |     inputs = [
175 |         input_future,
176 |         Dataset([state]).extxyz,
177 |     ]
178 |     inputs += serialize_mixture(hamiltonian, dtype="float64")
179 | 
180 |     client_args = []
181 |     for name in names:
182 |         args = definition.get_client_args(name, 1, "vibrations")
183 |         client_args.append(args)
184 |     outputs = [
185 |         context.new_file("hess_", ".txt"),
186 |     ]
187 | 
188 |     command_server = definition.server_command()
189 |     command_client = definition.client_command()
190 |     resources = definition.wq_resources(1)
191 | 
192 |     result = execute_ipi(
193 |         names,
194 |         client_args,
195 |         command_server,
196 |         command_client,
197 |         stdout=parsl.AUTO_LOGNAME,
198 |         stderr=parsl.AUTO_LOGNAME,
199 |         inputs=inputs,
200 |         outputs=outputs,
201 |         parsl_resource_specification=resources,
202 |     )
203 |     return multiply(load_numpy(inputs=[result.outputs[0]]), Ha / Bohr**2)


--------------------------------------------------------------------------------
/psiflow/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Union
 3 | 
 4 | import typeguard
 5 | import yaml
 6 | from ase.data import chemical_symbols
 7 | from parsl.data_provider.files import File
 8 | 
 9 | import psiflow
10 | from psiflow.models._mace import MACE, MACEConfig  # noqa: F401
11 | from psiflow.models.model import Model
12 | from psiflow.utils.apps import copy_data_future
13 | 
14 | 
15 | @typeguard.typechecked
16 | def load_model(path: Union[Path, str]) -> Model:
17 |     path = psiflow.resolve_and_check(Path(path))
18 |     assert path.is_dir()
19 |     classes = [
20 |         MACE,
21 |     ]
22 |     for model_cls in classes + [None]:
23 |         assert model_cls is not None
24 |         name = model_cls.__name__
25 |         path_config = path / (name + ".yaml")
26 |         if path_config.is_file():
27 |             break
28 |     with open(path_config, "r") as f:
29 |         config = yaml.load(f, Loader=yaml.FullLoader)
30 |     atomic_energies = {}
31 |     for key in list(config):
32 |         print(key)
33 |         if key.startswith("atomic_energies_"):
34 |             element = key.split("atomic_energies_")[-1]
35 |             assert element in chemical_symbols
36 |             atomic_energies[element] = config.pop(key)
37 |     model = model_cls(**config)
38 |     for element, energy in atomic_energies.items():
39 |         model.add_atomic_energy(element, energy)
40 |     path_model = path / "{}.pth".format(name)
41 |     if path_model.is_file():
42 |         model.model_future = copy_data_future(
43 |             inputs=[File(str(path_model))],
44 |             outputs=[psiflow.context().new_file("model_", ".pth")],
45 |         ).outputs[0]
46 |     return model
47 | 


--------------------------------------------------------------------------------
/psiflow/models/model.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations  # necessary for type-guarding class methods
  2 | 
  3 | from dataclasses import asdict
  4 | from pathlib import Path
  5 | from typing import Optional, Union
  6 | 
  7 | import parsl
  8 | import typeguard
  9 | from parsl.data_provider.files import File
 10 | from parsl.dataflow.futures import AppFuture
 11 | 
 12 | import psiflow
 13 | from psiflow.data import Dataset
 14 | from psiflow.utils.apps import copy_data_future, log_message, setup_logger
 15 | from psiflow.utils.io import save_yaml
 16 | 
 17 | logger = setup_logger(__name__)
 18 | 
 19 | 
 20 | @typeguard.typechecked
 21 | @psiflow.serializable
 22 | class Model:
 23 |     _config: dict
 24 |     model_future: Optional[psiflow._DataFuture]
 25 |     atomic_energies: dict
 26 | 
 27 |     def add_atomic_energy(self, element: str, energy: Union[float, AppFuture]) -> None:
 28 |         assert self.model_future is None, (
 29 |             "cannot add atomic energies after model has "
 30 |             "been initialized; reset model, add energy, and reinitialize"
 31 |         )
 32 |         if element in self.atomic_energies:
 33 |             if isinstance(energy, AppFuture):
 34 |                 energy = energy.result()
 35 |             if isinstance(self.atomic_energies[element], AppFuture):
 36 |                 existing = self.atomic_energies[element].result()
 37 |             assert energy == existing, (
 38 |                 "model already has atomic energy "
 39 |                 "for element {} ({}), which is different from {}"
 40 |                 "".format(element, existing, energy)
 41 |             )
 42 |         self.atomic_energies[element] = energy
 43 | 
 44 |     def train(self, training: Dataset, validation: Dataset) -> None:
 45 |         log_message(
 46 |             logger,
 47 |             "training model using {} states for training and {} for validation",
 48 |             training.length(),
 49 |             validation.length(),
 50 |         )
 51 |         inputs = [self.model_future]
 52 |         if self.do_offset:
 53 |             inputs += [
 54 |                 training.subtract_offset(**self.atomic_energies).extxyz,
 55 |                 validation.subtract_offset(**self.atomic_energies).extxyz,
 56 |             ]
 57 |         else:
 58 |             inputs += [
 59 |                 training.extxyz,
 60 |                 validation.extxyz,
 61 |             ]
 62 |         future = self._train(
 63 |             dict(self._config),
 64 |             stdout=parsl.AUTO_LOGNAME,
 65 |             stderr=parsl.AUTO_LOGNAME,
 66 |             inputs=inputs,
 67 |             outputs=[psiflow.context().new_file("model_", ".pth")],
 68 |         )
 69 |         self.model_future = future.outputs[0]
 70 | 
 71 |     def initialize(self, dataset: Dataset) -> None:
 72 |         """Initializes the model based on a dataset"""
 73 |         assert self.model_future is None
 74 |         if self.do_offset:
 75 |             inputs = [dataset.subtract_offset(**self.atomic_energies).extxyz]
 76 |         else:
 77 |             inputs = [dataset.extxyz]
 78 |         future = self._initialize(
 79 |             self._config,
 80 |             stdout=parsl.AUTO_LOGNAME,
 81 |             stderr=parsl.AUTO_LOGNAME,
 82 |             inputs=inputs,
 83 |             outputs=[psiflow.context().new_file("model_", ".pth")],
 84 |         )
 85 |         self.model_future = future.outputs[0]
 86 | 
 87 |     def reset(self) -> None:
 88 |         self.model_future = None
 89 | 
 90 |     def save(
 91 |         self,
 92 |         path: Union[Path, str],
 93 |     ) -> None:
 94 |         path = psiflow.resolve_and_check(Path(path))
 95 |         path.mkdir(exist_ok=True)
 96 | 
 97 |         name = self.__class__.__name__
 98 |         path_config = path / "{}.yaml".format(name)
 99 | 
100 |         atomic_energies = {
101 |             "atomic_energies_" + key: value
102 |             for key, value in self.atomic_energies.items()
103 |         }
104 |         save_yaml(
105 |             self._config,
106 |             outputs=[File(str(path_config))],
107 |             **atomic_energies,
108 |         )
109 |         if self.model_future is not None:
110 |             path_model = path / "{}.pth".format(name)
111 |             copy_data_future(
112 |                 inputs=[self.model_future],
113 |                 outputs=[File(str(path_model))],
114 |             )
115 | 
116 |     def copy(self) -> Model:
117 |         model = self.__class__(**asdict(self.config))
118 |         for element, energy in self.atomic_energies.items():
119 |             model.add_atomic_energy(element, energy)
120 |         if self.model_future is not None:
121 |             model.model_future = copy_data_future(
122 |                 inputs=[self.model_future],
123 |                 outputs=[psiflow.context().new_file("model_", ".pth")],
124 |             ).outputs[0]
125 |         return model
126 | 
127 |     @property
128 |     def do_offset(self) -> bool:
129 |         return len(self.atomic_energies) > 0
130 | 
131 |     @property
132 |     def seed(self) -> int:
133 |         raise NotImplementedError
134 | 
135 |     @seed.setter
136 |     def seed(self, arg) -> None:
137 |         raise NotImplementedError
138 | 


--------------------------------------------------------------------------------
/psiflow/order_parameters.py:
--------------------------------------------------------------------------------
1 | class OrderParameter:
2 |     pass
3 | 


--------------------------------------------------------------------------------
/psiflow/reference/__init__.py:
--------------------------------------------------------------------------------
1 | from ._cp2k import CP2K  # noqa: F401
2 | from ._dftd3 import D3  # noqa: F401
3 | from .gpaw_ import GPAW  # noqa: F401
4 | from .reference import Reference, evaluate  # noqa: F401
5 | 


--------------------------------------------------------------------------------
/psiflow/reference/_dftd3.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from functools import partial
  3 | 
  4 | import numpy as np
  5 | import typeguard
  6 | from parsl.app.app import bash_app, python_app
  7 | from parsl.dataflow.futures import AppFuture
  8 | 
  9 | import psiflow
 10 | from psiflow.geometry import Geometry
 11 | from psiflow.reference.reference import Reference
 12 | from psiflow.utils.apps import copy_app_future
 13 | from psiflow.utils import TMP_COMMAND, CD_COMMAND
 14 | 
 15 | 
 16 | @typeguard.typechecked
 17 | def input_string(geometry: Geometry, parameters: dict, properties: tuple) -> str:
 18 |     geometry_str = geometry.to_string()
 19 |     data = {
 20 |         "geometry": geometry_str,
 21 |         "parameters": parameters,
 22 |         "properties": properties,
 23 |     }
 24 |     return json.dumps(data)
 25 | 
 26 | 
 27 | def d3_singlepoint_pre(
 28 |     geometry: Geometry,
 29 |     parameters: dict,
 30 |     properties: tuple,
 31 |     d3_command: str,
 32 |     stdout: str = "",
 33 |     stderr: str = "",
 34 | ) -> str:
 35 |     from psiflow.reference._dftd3 import input_string
 36 |     input_str = input_string(geometry, parameters, properties)
 37 |     command_list = [
 38 |         TMP_COMMAND,
 39 |         CD_COMMAND,
 40 |         f"echo '{input_str}' > input.json",
 41 |         f"python -u {d3_command}",
 42 |     ]
 43 |     return "\n".join(command_list)
 44 | 
 45 | 
 46 | @typeguard.typechecked
 47 | def d3_singlepoint_post(
 48 |     geometry: Geometry,
 49 |     inputs: list = [],
 50 | ) -> Geometry:
 51 |     from psiflow.geometry import new_nullstate
 52 | 
 53 |     with open(inputs[0], "r") as f:
 54 |         lines = f.read().split("\n")
 55 | 
 56 |     geometry = new_nullstate()
 57 |     for i, line in enumerate(lines):
 58 |         if "CALCULATION SUCCESSFUL" in line:
 59 |             natoms = int(lines[i + 1])
 60 |             geometry_str = "\n".join(lines[i + 1 : i + 3 + natoms])
 61 |             geometry = Geometry.from_string(geometry_str)
 62 |             assert geometry.energy is not None
 63 |             geometry.stdout = inputs[0]
 64 |     return geometry
 65 | 
 66 | 
 67 | @typeguard.typechecked
 68 | @psiflow.serializable
 69 | class D3(Reference):
 70 |     outputs: list  # json does deserialize(serialize(tuple)) = list
 71 |     executor: str
 72 |     parameters: dict
 73 | 
 74 |     def __init__(
 75 |         self,
 76 |         **parameters,
 77 |     ):
 78 |         self.parameters = parameters
 79 |         self.outputs = ["energy", "forces"]
 80 |         self.executor = "default_htex"
 81 |         self._create_apps()
 82 | 
 83 |     def _create_apps(self):
 84 |         path = "psiflow.reference._dftd3"
 85 |         d3_command = "$(python -c 'import {}; print({}.__file__)')".format(path, path)
 86 |         app_pre = bash_app(d3_singlepoint_pre, executors=["default_htex"])
 87 |         app_post = python_app(d3_singlepoint_post, executors=["default_threads"])
 88 |         self.app_pre = partial(
 89 |             app_pre,
 90 |             parameters=self.parameters,
 91 |             properties=tuple(self.outputs),
 92 |             d3_command=d3_command,
 93 |         )
 94 |         self.app_post = app_post
 95 | 
 96 |     def compute_atomic_energy(self, element, box_size=None) -> AppFuture:
 97 |         return copy_app_future(0.0)  # GPAW computes formation energy by default
 98 | 
 99 | 
100 | if __name__ == "__main__":
101 |     from ase import Atoms
102 |     from dftd3.ase import DFTD3
103 | 
104 |     with open("input.json", "r") as f:
105 |         input_dict = json.loads(f.read())
106 | 
107 |     geometry = Geometry.from_string(input_dict["geometry"])
108 |     parameters = input_dict["parameters"]
109 |     properties = input_dict["properties"]
110 | 
111 |     atoms = Atoms(
112 |         numbers=np.copy(geometry.per_atom.numbers),
113 |         positions=np.copy(geometry.per_atom.positions),
114 |         cell=np.copy(geometry.cell),
115 |         pbc=geometry.periodic,
116 |     )
117 | 
118 |     calculator = DFTD3(**parameters)
119 |     atoms.calc = calculator
120 | 
121 |     if "forces" in properties:
122 |         geometry.per_atom.forces[:] = atoms.get_forces()
123 |     if "energy" in properties:
124 |         geometry.energy = atoms.get_potential_energy()
125 | 
126 |     output_str = geometry.to_string()
127 |     print("CALCULATION SUCCESSFUL")
128 |     print(output_str)
129 | 


--------------------------------------------------------------------------------
/psiflow/reference/gpaw_.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from functools import partial
  3 | from typing import Union
  4 | 
  5 | import numpy as np
  6 | import typeguard
  7 | from parsl.app.app import bash_app, python_app
  8 | from parsl.dataflow.futures import AppFuture
  9 | 
 10 | import psiflow
 11 | from psiflow.geometry import Geometry, new_nullstate
 12 | from psiflow.reference.reference import Reference
 13 | from psiflow.utils.apps import copy_app_future
 14 | from psiflow.utils import TMP_COMMAND, CD_COMMAND
 15 | 
 16 | 
 17 | @typeguard.typechecked
 18 | def input_string(geometry: Geometry, gpaw_parameters: dict, properties: tuple) -> str:
 19 |     geometry_str = geometry.to_string()
 20 |     data = {
 21 |         "geometry": geometry_str,
 22 |         "gpaw_parameters": gpaw_parameters,
 23 |         "properties": properties,
 24 |     }
 25 |     return json.dumps(data)
 26 | 
 27 | 
 28 | def gpaw_singlepoint_pre(
 29 |     geometry: Geometry,
 30 |     gpaw_parameters: dict,
 31 |     properties: tuple,
 32 |     gpaw_command: str,
 33 |     parsl_resource_specification: dict = {},
 34 |     stdout: str = "",
 35 |     stderr: str = "",
 36 | ) -> str:
 37 |     from psiflow.reference.gpaw_ import input_string
 38 |     input_str = input_string(geometry, gpaw_parameters, properties)
 39 |     write_command = f"echo '{input_str}' > input.json"
 40 |     command_list = [
 41 |         TMP_COMMAND,
 42 |         CD_COMMAND,
 43 |         write_command,
 44 |         gpaw_command,
 45 |     ]
 46 |     return "\n".join(command_list)
 47 | 
 48 | 
 49 | @typeguard.typechecked
 50 | def gpaw_singlepoint_post(
 51 |     geometry: Geometry,
 52 |     inputs: list = [],
 53 | ) -> Geometry:
 54 |     with open(inputs[0], "r") as f:
 55 |         lines = f.read().split("\n")
 56 | 
 57 |     geometry = new_nullstate()  # GPAW parsing doesn't require initial geometry
 58 |     for i, line in enumerate(lines):
 59 |         if "CALCULATION SUCCESSFUL" in line:
 60 |             natoms = int(lines[i + 1])
 61 |             geometry_str = "\n".join(lines[i + 1 : i + 3 + natoms])
 62 |             geometry = Geometry.from_string(geometry_str)
 63 |             assert geometry.energy is not None
 64 |             geometry.stdout = inputs[0]
 65 |     return geometry
 66 | 
 67 | 
 68 | @typeguard.typechecked
 69 | @psiflow.serializable
 70 | class GPAW(Reference):
 71 |     outputs: list  # json does deserialize(serialize(tuple)) = list
 72 |     executor: str
 73 |     parameters: dict
 74 | 
 75 |     def __init__(
 76 |         self,
 77 |         outputs: Union[tuple, list] = ("energy", "forces"),
 78 |         executor: str = "GPAW",
 79 |         **parameters,
 80 |     ):
 81 |         self.outputs = list(outputs)
 82 |         self.parameters = parameters
 83 |         self.executor = executor
 84 |         self._create_apps()
 85 | 
 86 |     def _create_apps(self):
 87 |         definition = psiflow.context().definitions[self.executor]
 88 |         gpaw_command = definition.command()
 89 |         wq_resources = definition.wq_resources()
 90 |         app_pre = bash_app(gpaw_singlepoint_pre, executors=[self.executor])
 91 |         app_post = python_app(gpaw_singlepoint_post, executors=["default_threads"])
 92 |         self.app_pre = partial(
 93 |             app_pre,
 94 |             gpaw_parameters=self.parameters,
 95 |             properties=tuple(self.outputs),
 96 |             gpaw_command=gpaw_command,
 97 |             parsl_resource_specification=wq_resources,
 98 |         )
 99 |         self.app_post = app_post
100 | 
101 |     def compute_atomic_energy(self, element, box_size=None) -> AppFuture:
102 |         return copy_app_future(0.0)  # GPAW computes formation energy by default
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     from ase import Atoms
107 |     from ase.calculators.mixing import SumCalculator
108 |     from ase.parallel import world
109 |     from dftd3.ase import DFTD3
110 |     from gpaw import GPAW as GPAWCalculator
111 | 
112 |     def minimal_box(
113 |         atoms: Atoms,
114 |         border: float = 0.0,
115 |         h: float = 0.2,
116 |         multiple: int = 4,
117 |     ) -> None:
118 |         # inspired by gpaw.cluster.Cluster
119 |         if len(atoms) == 0:
120 |             return None
121 |         min_bounds, max_bounds = np.array(
122 |             [np.minimum.reduce(atoms.positions), np.maximum.reduce(atoms.positions)]
123 |         )
124 |         if isinstance(border, list):
125 |             b = np.array(border)
126 |         else:
127 |             b = np.array([border, border, border])
128 |         if not hasattr(h, "__len__"):
129 |             h = np.array([h, h, h])
130 |         min_bounds -= b
131 |         max_bounds += b - min_bounds
132 |         grid_points = np.ceil(max_bounds / h / multiple) * multiple
133 |         length_diff = grid_points * h - max_bounds
134 |         max_bounds += length_diff
135 |         min_bounds -= length_diff / 2
136 |         shift = tuple(-1.0 * min_bounds)
137 |         atoms.translate(shift)
138 |         atoms.set_cell(tuple(max_bounds))
139 | 
140 |     with open("input.json", "r") as f:
141 |         input_dict = json.loads(f.read())
142 | 
143 |     geometry = Geometry.from_string(input_dict["geometry"])
144 |     gpaw_parameters = input_dict["gpaw_parameters"]
145 |     properties = input_dict["properties"]
146 |     d3 = gpaw_parameters.pop("d3", {})
147 | 
148 |     atoms = Atoms(
149 |         numbers=np.copy(geometry.per_atom.numbers),
150 |         positions=np.copy(geometry.per_atom.positions),
151 |         cell=np.copy(geometry.cell),
152 |         pbc=geometry.periodic,
153 |     )
154 |     if not geometry.periodic:
155 |         minimal_box(
156 |             atoms,
157 |             gpaw_parameters.get("h", 0.2),
158 |             gpaw_parameters.pop("minimal_box_border", 2),  # if present, remove
159 |             gpaw_parameters.pop("minimal_box_multiple", 4),
160 |         )
161 | 
162 |     calculator = GPAWCalculator(**gpaw_parameters)
163 |     if len(d3) > 0:
164 |         calculator = SumCalculator([calculator, DFTD3(**d3)])
165 |     atoms.calc = calculator
166 | 
167 |     if "forces" in properties:
168 |         geometry.per_atom.forces[:] = atoms.get_forces()
169 |     if "energy" in properties:
170 |         geometry.energy = atoms.get_potential_energy()
171 | 
172 |     output_str = geometry.to_string()
173 |     if world.rank == 0:
174 |         print("CALCULATION SUCCESSFUL")
175 |         print(output_str)
176 | 


--------------------------------------------------------------------------------
/psiflow/reference/orca.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/molmod/psiflow/b9573589fd14c5950d884a4f70a3b028e3d2afb5/psiflow/reference/orca.py


--------------------------------------------------------------------------------
/psiflow/reference/reference.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations  # necessary for type-guarding class methods
  2 | 
  3 | import logging
  4 | from typing import ClassVar, Optional, Union
  5 | 
  6 | import numpy as np
  7 | import parsl
  8 | import typeguard
  9 | from ase.data import atomic_numbers
 10 | from parsl.app.app import join_app, python_app
 11 | from parsl.dataflow.futures import AppFuture
 12 | 
 13 | import psiflow
 14 | from psiflow.data import Computable, Dataset
 15 | from psiflow.geometry import Geometry, NullState
 16 | from psiflow.utils.apps import copy_app_future, unpack_i
 17 | 
 18 | logger = logging.getLogger(__name__)  # logging per module
 19 | 
 20 | 
 21 | @typeguard.typechecked
 22 | def _extract_energy(state: Geometry):
 23 |     if state.energy is None:
 24 |         return 1e10
 25 |     else:
 26 |         return state.energy
 27 | 
 28 | 
 29 | extract_energy = python_app(_extract_energy, executors=["default_threads"])
 30 | 
 31 | 
 32 | @join_app
 33 | @typeguard.typechecked
 34 | def get_minimum_energy(element, configs, *energies):
 35 |     logger.info("atomic energies for element {}:".format(element))
 36 |     for config, energy in zip(configs, energies):
 37 |         logger.info("\t{} eV;  ".format(energy) + str(config))
 38 |     energy = min(energies)
 39 |     assert not energy == 1e10, "atomic energy calculation of {} failed".format(element)
 40 |     return copy_app_future(energy)
 41 | 
 42 | 
 43 | @typeguard.typechecked
 44 | def _nan_if_unsuccessful(
 45 |     geometry: Geometry,
 46 |     result: Geometry,
 47 | ) -> Geometry:
 48 |     if result == NullState:
 49 |         geometry.energy = None
 50 |         geometry.per_atom.forces[:] = np.nan
 51 |         geometry.per_atom.stress = None
 52 |         geometry.stdout = result.stdout
 53 |         return geometry
 54 |     else:
 55 |         return result
 56 | 
 57 | 
 58 | nan_if_unsuccessful = python_app(_nan_if_unsuccessful, executors=["default_threads"])
 59 | 
 60 | 
 61 | @join_app
 62 | @typeguard.typechecked
 63 | def evaluate(
 64 |     geometry: Geometry,
 65 |     reference: Reference,
 66 | ) -> AppFuture:
 67 |     if geometry == NullState:
 68 |         return copy_app_future(NullState)
 69 |     else:
 70 |         future = reference.app_pre(
 71 |             geometry,
 72 |             stdout=parsl.AUTO_LOGNAME,
 73 |             stderr=parsl.AUTO_LOGNAME,
 74 |         )
 75 |         result = reference.app_post(
 76 |             geometry=geometry.copy(),
 77 |             inputs=[future.stdout, future.stderr, future],
 78 |         )
 79 |         return nan_if_unsuccessful(geometry, result)
 80 | 
 81 | 
 82 | @join_app
 83 | @typeguard.typechecked
 84 | def compute_dataset(
 85 |     dataset: Dataset,
 86 |     length: int,
 87 |     reference: Reference,
 88 | ) -> AppFuture:
 89 |     from psiflow.data.utils import extract_quantities
 90 | 
 91 |     geometries = dataset.geometries()  # read it once
 92 |     evaluated = [evaluate(unpack_i(geometries, i), reference) for i in range(length)]
 93 |     future = extract_quantities(
 94 |         tuple(reference.outputs),
 95 |         None,
 96 |         None,
 97 |         *evaluated,
 98 |     )
 99 |     return future
100 | 
101 | 
102 | @typeguard.typechecked
103 | @psiflow.serializable
104 | class Reference(Computable):
105 |     outputs: tuple
106 |     batch_size: ClassVar[int] = 1  # not really used
107 | 
108 |     def compute(
109 |         self,
110 |         arg: Union[Dataset, Geometry, AppFuture, list],
111 |         *outputs: Optional[Union[str, tuple]],
112 |     ):
113 |         if isinstance(arg, Dataset):
114 |             dataset = arg
115 |         elif isinstance(arg, list):
116 |             dataset = Dataset(arg)
117 |         elif isinstance(arg, AppFuture) or isinstance(arg, Geometry):
118 |             dataset = Dataset([arg])
119 |         compute_outputs = compute_dataset(dataset, dataset.length(), self)
120 |         if len(outputs) == 0:
121 |             outputs_ = tuple(self.outputs)
122 |         else:
123 |             outputs_ = outputs
124 |         to_return = []
125 |         for output in outputs_:
126 |             if output not in self.outputs:
127 |                 raise ValueError("output {} not in {}".format(output, self.outputs))
128 |             index = self.outputs.index(output)
129 |             to_return.append(compute_outputs[index])
130 |         if len(outputs_) == 1:
131 |             return to_return[0]
132 |         else:
133 |             return to_return
134 | 
135 |     def compute_atomic_energy(self, element, box_size=None):
136 |         energies = []
137 |         references = self.get_single_atom_references(element)
138 |         configs = [c for c, _ in references]
139 |         if box_size is not None:
140 |             state = Geometry.from_data(
141 |                 numbers=np.array([atomic_numbers[element]]),
142 |                 positions=np.array([[0, 0, 0]]),
143 |                 cell=np.eye(3) * box_size,
144 |             )
145 |         else:
146 |             state = Geometry(
147 |                 numbers=np.array([atomic_numbers[element]]),
148 |                 positions=np.array([[0, 0, 0]]),
149 |                 cell=np.zeros((3, 3)),
150 |             )
151 |         for _, reference in references:
152 |             energies.append(extract_energy(evaluate(state, reference)))
153 |         return get_minimum_energy(element, configs, *energies)
154 | 
155 |     def get_single_atom_references(self, element):
156 |         return [(None, self)]
157 | 


--------------------------------------------------------------------------------
/psiflow/sampling/__init__.py:
--------------------------------------------------------------------------------
 1 | from .metadynamics import Metadynamics  # noqa: F401
 2 | # from .optimize import optimize, optimize_dataset  # noqa: F401
 3 | from .output import SimulationOutput  # noqa: F401
 4 | from .sampling import sample  # noqa: F401
 5 | from .walker import ReplicaExchange  # noqa: F401
 6 | from .walker import Walker  # noqa: F401
 7 | from .walker import quench  # noqa: F401
 8 | from .walker import randomize  # noqa: F401
 9 | from .walker import replica_exchange  # noqa: F401
10 | 


--------------------------------------------------------------------------------
/psiflow/sampling/_ase.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Structure optimisation through ASE
  3 | TODO: do we need to check for very large forces?
  4 | TODO: what units are pressure?
  5 | TODO: what to do when max_steps is reached before converging?
  6 | TODO: timeout is duplicated code
  7 | """
  8 | 
  9 | import os
 10 | import json
 11 | import warnings
 12 | import signal
 13 | import argparse
 14 | from pathlib import Path
 15 | from types import SimpleNamespace
 16 | 
 17 | import ase
 18 | import ase.io
 19 | import numpy as np
 20 | from ase.io.extxyz import save_calc_results
 21 | from ase.calculators.calculator import Calculator, all_properties
 22 | from ase.calculators.mixing import LinearCombinationCalculator
 23 | from ase.optimize.precon import PreconLBFGS
 24 | from ase.filters import FrechetCellFilter
 25 | 
 26 | from psiflow.geometry import Geometry
 27 | from psiflow.functions import function_from_json, EnergyFunction
 28 | from psiflow.sampling.utils import TimeoutException, timeout_handler
 29 | 
 30 | 
 31 | ALLOWED_MODES: tuple[str, ...] = ('full', 'fix_volume', 'fix_shape', 'fix_cell')
 32 | FILE_OUT: str = 'out.xyz'
 33 | FILE_TRAJ: str = 'out.traj'
 34 | 
 35 | 
 36 | class FunctionCalculator(Calculator):
 37 |     implemented_properties = ['energy', 'free_energy', 'forces', 'stress']
 38 | 
 39 |     def __init__(self, function: EnergyFunction, **kwargs):
 40 |         super().__init__(**kwargs)
 41 |         self.function = function
 42 | 
 43 |     def calculate(
 44 |         self,
 45 |         atoms=None,
 46 |         properties=all_properties,
 47 |         system_changes=None,
 48 |     ):
 49 |         super().calculate(atoms, properties, system_changes)
 50 |         geometry = Geometry.from_atoms(self.atoms)
 51 |         self.results = self.function(geometry)
 52 |         self.results['free_energy'] = self.results['energy']                # required by optimiser
 53 | 
 54 | 
 55 | def log_state(atoms: ase.Atoms) -> None:
 56 |     """"""
 57 |     def make_log(data: list[tuple[str]]):
 58 |         """"""
 59 |         txt = ['', 'Current atoms state:']
 60 |         txt += [f'{_[0]:<15}: {_[1]:<25}[{_[2]}]' for _ in data]
 61 |         txt += 'End', ''
 62 |         print(*txt, sep='\n')
 63 | 
 64 |     data = []
 65 |     if atoms.calc:
 66 |         energy, max_force = atoms.get_potential_energy(), np.linalg.norm(atoms.get_forces(), axis=0).max()
 67 |     else:
 68 |         energy, max_force = [np.nan] * 2
 69 |     data += ('Energy', f'{energy:.2f}', 'eV'), ('Max. force', f'{max_force:.2E}', 'eV/A')
 70 | 
 71 |     if not all(atoms.pbc):
 72 |         make_log(data)
 73 |         return
 74 | 
 75 |     volume, cell = atoms.get_volume(), atoms.get_cell().cellpar().round(3)
 76 |     data += ('Cell volume', f'{atoms.get_volume():.2f}', 'A^3'),
 77 |     data += ('Box norms', str(cell[:3])[1:-1], 'A'), ('Box angles', str(cell[3:])[1:-1], 'degrees')
 78 | 
 79 |     make_log(data)
 80 |     return
 81 | 
 82 | 
 83 | def get_dof_filter(atoms: ase.Atoms, mode: str, pressure: float) -> ase.Atoms | FrechetCellFilter:
 84 |     """"""
 85 |     if mode == 'fix_cell':
 86 |         if pressure:
 87 |             warnings.warn('Ignoring external pressure..')
 88 |         return atoms
 89 |     kwargs = {'mask': [True] * 6, 'scalar_pressure': pressure}      # enable cell DOFs
 90 |     if mode == 'fix_shape':
 91 |         kwargs['hydrostatic_strain'] = True
 92 |     if mode == 'fix_volume':
 93 |         kwargs['constant_volume'] = True
 94 |         if pressure:
 95 |             warnings.warn('Ignoring applied pressure during fixed volume optimisation..')
 96 |     return FrechetCellFilter(atoms, **kwargs)
 97 | 
 98 | 
 99 | def run(args: SimpleNamespace):
100 |     """"""
101 |     config = json.load(Path(args.input_config).open('r'))
102 | 
103 |     atoms = ase.io.read(args.start_xyz)
104 |     if not any(atoms.pbc):
105 |         atoms.center(vacuum=0)              # optimiser mysteriously requires a nonzero unit cell
106 |         if config['mode'] != 'fix_cell':
107 |             config['mode'] = 'fix_cell'
108 |             warnings.warn('Molecular structure is not periodic. Ignoring cell..')
109 | 
110 |     # construct calculator by combining hamiltonians
111 |     assert args.path_hamiltonian is not None
112 |     print('Making calculator from:', *config['forces'], sep='\n')
113 |     functions = [function_from_json(p) for p in args.path_hamiltonian]
114 |     calc = LinearCombinationCalculator(
115 |         [FunctionCalculator(f) for f in functions],
116 |         [float(h['weight']) for h in config['forces']]
117 |     )
118 | 
119 |     atoms.calc = calc
120 |     dof = get_dof_filter(atoms, config['mode'], config['pressure'])
121 |     opt = PreconLBFGS(dof, trajectory=FILE_TRAJ if config['keep_trajectory'] else None)
122 | 
123 |     print(f"pid: {os.getpid()}")
124 |     print(f"CPU affinity: {os.sched_getaffinity(os.getpid())}")
125 |     log_state(atoms)
126 |     try:
127 |         opt.run(fmax=config['f_max'], steps=config['max_steps'])
128 |     except TimeoutException:
129 |         print('OPTIMISATION TIMEOUT')
130 |         # TODO: what to do here?
131 |         return
132 | 
133 |     log_state(atoms)
134 |     save_calc_results(atoms, calc_prefix='', remove_atoms_calc=True)
135 |     if not any(atoms.pbc):
136 |         atoms.cell = None               # remove meaningless cell
137 |     ase.io.write(FILE_OUT, atoms)
138 |     print('OPTIMISATION SUCCESSFUL')
139 |     return
140 | 
141 | 
142 | def clean(args: SimpleNamespace):
143 |     """"""
144 |     from psiflow.data.utils import _write_frames
145 | 
146 |     geometry = Geometry.load(FILE_OUT)
147 |     _write_frames(geometry, outputs=[args.output_xyz])
148 |     if Path(FILE_TRAJ).is_file():
149 |         traj = [at for at in ase.io.trajectory.Trajectory(FILE_TRAJ)]
150 |         geometries = [Geometry.from_atoms(at) for at in traj]
151 |         _write_frames(*geometries, outputs=[args.output_traj])
152 |     print('FILES MOVED')
153 |     return
154 | 
155 | 
156 | def main():
157 |     signal.signal(signal.SIGTERM, timeout_handler)
158 |     parser = argparse.ArgumentParser()
159 |     subparsers = parser.add_subparsers(help='what to do', dest='action')
160 |     run_parser = subparsers.add_parser("run")
161 |     run_parser.set_defaults(func=run)
162 |     run_parser.add_argument(
163 |         "--path_hamiltonian",
164 |         action='extend',
165 |         nargs='*',
166 |         type=str,
167 |     )
168 |     run_parser.add_argument(
169 |         "--input_config",
170 |         type=str,
171 |         default=None,
172 |     )
173 |     run_parser.add_argument(
174 |         "--start_xyz",
175 |         type=str,
176 |         default=None,
177 |     )
178 |     clean_parser = subparsers.add_parser("clean")
179 |     clean_parser.set_defaults(func=clean)
180 |     clean_parser.add_argument(
181 |         "--output_xyz",
182 |         type=str,
183 |         default=None,
184 |     )
185 |     clean_parser.add_argument(
186 |         "--output_traj",
187 |         type=str,
188 |         default=None,
189 |     )
190 |     args = parser.parse_args()
191 |     args.func(args)
192 | 
193 | 
194 | 


--------------------------------------------------------------------------------
/psiflow/sampling/ase.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations  # necessary for type-guarding class methods
  2 | 
  3 | from typing import Optional, Union
  4 | 
  5 | import parsl
  6 | import typeguard
  7 | from parsl.app.app import bash_app, join_app
  8 | from parsl.dataflow.futures import AppFuture, DataFuture
  9 | 
 10 | import psiflow
 11 | from psiflow.data import Dataset
 12 | from psiflow.data.utils import write_frames
 13 | from psiflow.geometry import Geometry
 14 | from psiflow.hamiltonians import Hamiltonian
 15 | from psiflow.utils.io import dump_json
 16 | from psiflow.sampling.sampling import serialize_mixture, label_forces
 17 | from psiflow.utils import TMP_COMMAND, CD_COMMAND
 18 | 
 19 | from ._ase import ALLOWED_MODES
 20 | 
 21 | EXECUTABLE = 'psiflow-ase-opt'      # not stored in ModelEvaluation (yet?)
 22 | 
 23 | 
 24 | def _execute_ase(
 25 |     command_launch: str,
 26 |     inputs: list[DataFuture],
 27 |     outputs: list[DataFuture],
 28 |     env_vars: dict = {},
 29 |     stdout: str = "",
 30 |     stderr: str = "",
 31 |     parsl_resource_specification: Optional[dict] = None,
 32 | ) -> str:
 33 |     env_command = 'export ' + ' '.join([f"{name}={value}" for name, value in env_vars.items()])
 34 |     command_start = ' '.join([
 35 |         f'{command_launch} run --input_config={inputs[0].filepath} --start_xyz={inputs[1].filepath}',
 36 |         *[f'--path_hamiltonian={future.filepath}' for future in inputs[2:]], '&'
 37 |     ])
 38 |     command_end = f'{command_launch} clean --output_xyz={outputs[0].filepath}'
 39 |     if len(outputs) == 2:
 40 |         command_end += f' --output_traj={outputs[1].filepath}'
 41 | 
 42 |     command_list = [
 43 |         TMP_COMMAND,
 44 |         CD_COMMAND,
 45 |         env_command,
 46 |         command_start,
 47 |         "wait",
 48 |         command_end,
 49 |     ]
 50 |     return "\n".join(command_list)
 51 | 
 52 | 
 53 | execute_ase = bash_app(_execute_ase, executors=["ModelEvaluation"])
 54 | 
 55 | 
 56 | @typeguard.typechecked
 57 | def optimize(
 58 |     state: Union[Geometry, AppFuture],
 59 |     hamiltonian: Hamiltonian,
 60 |     mode: str = 'full',
 61 |     steps: int = int(1e12),
 62 |     keep_trajectory: bool = False,
 63 |     pressure: float = 0,
 64 |     f_max: float = 1e-3,
 65 | ) -> Union[AppFuture, tuple[AppFuture, Dataset]]:
 66 | 
 67 |     assert mode in ALLOWED_MODES
 68 |     assert steps > 0
 69 |     assert f_max > 0
 70 | 
 71 |     context = psiflow.context()
 72 |     definition = context.definitions["ModelEvaluation"]
 73 | 
 74 |     command_list = [EXECUTABLE]
 75 |     if definition.max_simulation_time is not None:
 76 |         max_time = 0.9 * (60 * definition.max_simulation_time)
 77 |         command_list = ["timeout -s 15 {}s".format(max_time), *command_list]
 78 |     command_launch = " ".join(command_list)
 79 | 
 80 |     input_geometry = Dataset([state]).extxyz
 81 |     hamiltonian = 1.0 * hamiltonian  # convert to mixture
 82 |     names, coeffs = label_forces(hamiltonian), hamiltonian.coefficients
 83 |     input_forces = serialize_mixture(hamiltonian, dtype="float64")          # double precision for MLPs
 84 |     forces = [
 85 |         dict(forcefield=n, weight=str(c), file=f.filename) for n, c, f in zip(names, coeffs, input_forces)
 86 |     ]
 87 | 
 88 |     config = dict(
 89 |         task='ASE optimisation',
 90 |         forces=forces,
 91 |         mode=mode,
 92 |         f_max=f_max,
 93 |         pressure=pressure,
 94 |         max_steps=steps,
 95 |         keep_trajectory=keep_trajectory,
 96 |     )
 97 |     input_future = dump_json(
 98 |         outputs=[context.new_file("input_", ".json")],
 99 |         **config,
100 |     ).outputs[0]
101 |     inputs = [input_future, input_geometry, *input_forces]
102 | 
103 |     outputs = [context.new_file("data_", ".xyz")]
104 |     if keep_trajectory:
105 |         outputs.append(context.new_file("opt_", ".xyz"))
106 | 
107 |     result = execute_ase(
108 |         command_launch=command_launch,
109 |         env_vars=definition.env_vars,
110 |         inputs=inputs,
111 |         outputs=outputs,
112 |         stdout=parsl.AUTO_LOGNAME,
113 |         stderr=parsl.AUTO_LOGNAME,
114 |         parsl_resource_specification=definition.wq_resources(1),
115 |     )
116 | 
117 |     final = Dataset(None, result.outputs[0])[-1]
118 |     if keep_trajectory:
119 |         trajectory = Dataset(None, result.outputs[1])
120 |         return final, trajectory
121 |     else:
122 |         return final
123 | 
124 | 
125 | @join_app
126 | @typeguard.typechecked
127 | def _optimize_dataset(
128 |     geometries: list[Geometry], *args, outputs: list = [], **kwargs
129 | ) -> AppFuture:
130 |     assert not kwargs.get("keep_trajectory", False)
131 |     optimized = []
132 |     for geometry in geometries:
133 |         optimized.append(optimize(geometry, *args, **kwargs))
134 |     return write_frames(*optimized, outputs=[outputs[0]])
135 | 
136 | 
137 | @typeguard.typechecked
138 | def optimize_dataset(dataset: Dataset, *args, **kwargs) -> Dataset:
139 |     extxyz = _optimize_dataset(
140 |         dataset.geometries(),
141 |         *args,
142 |         outputs=[psiflow.context().new_file("data_", ".xyz")],
143 |         **kwargs,
144 |     ).outputs[0]
145 |     return Dataset(None, extxyz)
146 | 


--------------------------------------------------------------------------------
/psiflow/sampling/client.py:
--------------------------------------------------------------------------------
  1 | # top level imports should be lightweight!
  2 | import os
  3 | 
  4 | 
  5 | class SocketNotFoundException(Exception):
  6 |     pass
  7 | 
  8 | 
  9 | def wait_for_socket(address: 'Path', timeout: float = 10, interval: float = 0.1) -> None:
 10 |     """"""
 11 |     import time
 12 |     while not address.exists():
 13 |         time.sleep(interval)
 14 |         timeout -= interval
 15 |         if timeout < 0:
 16 |             raise SocketNotFoundException(f'Could not find socket "{address}" to connect to..')
 17 |     return
 18 | 
 19 | 
 20 | def main():
 21 |     import argparse
 22 |     import time
 23 |     from pathlib import Path
 24 | 
 25 |     from ase.io import read
 26 |     from ipi._driver.driver import run_driver
 27 | 
 28 |     from psiflow.functions import function_from_json
 29 |     from psiflow.geometry import Geometry
 30 |     from psiflow.sampling.utils import ForceMagnitudeException, FunctionDriver
 31 | 
 32 |     print("OS environment values:")
 33 |     for key, value in os.environ.items():
 34 |         print(key, value)
 35 |     parser = argparse.ArgumentParser()
 36 |     parser.add_argument(
 37 |         "--path_hamiltonian",
 38 |         type=str,
 39 |         default=None,
 40 |     )
 41 |     parser.add_argument(
 42 |         "--device",
 43 |         type=str,
 44 |         default=None,
 45 |     )
 46 |     parser.add_argument(
 47 |         "--dtype",
 48 |         type=str,
 49 |         default=None,
 50 |     )
 51 |     parser.add_argument(
 52 |         "--address",
 53 |         type=str,
 54 |         default=None,
 55 |     )
 56 |     parser.add_argument(
 57 |         "--start",
 58 |         type=str,
 59 |         default=None,
 60 |     )
 61 |     parser.add_argument(
 62 |         "--max_force",
 63 |         type=float,
 64 |         default=None,
 65 |     )
 66 |     args = parser.parse_args()
 67 |     assert args.path_hamiltonian is not None
 68 |     assert args.address is not None
 69 |     assert args.start is not None
 70 | 
 71 |     print("pid: {}".format(os.getpid()))
 72 |     affinity = os.sched_getaffinity(os.getpid())
 73 |     print("CPU affinity before function init: {}".format(affinity))
 74 | 
 75 |     template = Geometry.from_atoms(read(args.start))
 76 |     function = function_from_json(
 77 |         args.path_hamiltonian,
 78 |         device=args.device,
 79 |         dtype=args.dtype,
 80 |     )
 81 | 
 82 |     driver = FunctionDriver(
 83 |         template=template,
 84 |         function=function,
 85 |         max_force=args.max_force,
 86 |         verbose=True,
 87 |     )
 88 | 
 89 |     affinity = os.sched_getaffinity(os.getpid())
 90 |     print("CPU affinity after function init: {}".format(affinity))
 91 |     try:
 92 |         t0 = time.time()
 93 |         for _ in range(10):
 94 |             function(template)  # torch warm-up before simulation
 95 |         print("time for 10 evaluations: {}".format(time.time() - t0))
 96 |         socket_address = Path.cwd() / args.address
 97 |         wait_for_socket(socket_address)
 98 |         run_driver(
 99 |             unix=True,
100 |             address=str(socket_address),
101 |             driver=driver,
102 |             sockets_prefix="",
103 |         )
104 |     except ForceMagnitudeException as e:
105 |         print(e)                                                # induce timeout in server
106 |     except ConnectionResetError as e:                           # some other client induced a timeout
107 |         print(e)
108 |     except SocketNotFoundException as e:
109 |         print(e, *list(Path.cwd().iterdir()), sep='\n')         # server-side socket not found
110 | 
111 | 


--------------------------------------------------------------------------------
/psiflow/sampling/metadynamics.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations  # necessary for type-guarding class methods
 2 | 
 3 | from pathlib import Path
 4 | from typing import Optional, Union
 5 | 
 6 | import typeguard
 7 | from parsl.data_provider.files import File
 8 | from parsl.dataflow.futures import AppFuture
 9 | 
10 | import psiflow
11 | from psiflow.utils._plumed import remove_comments_printflush, set_path_in_plumed
12 | from psiflow.utils.apps import copy_app_future, copy_data_future
13 | 
14 | 
15 | @typeguard.typechecked
16 | @psiflow.serializable
17 | class Metadynamics:
18 |     _plumed_input: str
19 |     external: Optional[psiflow._DataFuture]
20 | 
21 |     def __init__(
22 |         self,
23 |         plumed_input: str,
24 |         external: Union[None, str, Path, psiflow._DataFuture] = None,
25 |     ):
26 |         _plumed_input = remove_comments_printflush(plumed_input)
27 |         assert "METAD" in _plumed_input
28 |         if "RESTART" not in _plumed_input:
29 |             _plumed_input = "\nRESTART\n" + _plumed_input
30 |         if "FLUSH" not in _plumed_input:  # add at the end!
31 |             _plumed_input = _plumed_input + "\nFLUSH STRIDE=1\nPRINT"
32 | 
33 |         # PLUMED + WQ cannot deal with nonexisting hills files!
34 |         if type(external) in [str, Path]:
35 |             external = File(str(external))
36 |             Path(external).touch()
37 |         if external is None:
38 |             external = psiflow.context().new_file("hills_", ".txt")
39 |             Path(external.filepath).touch()
40 |         else:
41 |             assert external.filepath in _plumed_input
42 |             Path(external.filepath).touch()
43 |         _plumed_input = set_path_in_plumed(
44 |             _plumed_input,
45 |             "METAD",
46 |             "PLACEHOLDER",
47 |         )
48 |         self._plumed_input = _plumed_input
49 |         self.external = external
50 | 
51 |     def plumed_input(self):
52 |         plumed_input = self._plumed_input
53 |         plumed_input = plumed_input.replace("PLACEHOLDER", self.external.filepath)
54 |         return plumed_input
55 | 
56 |     def input(self) -> AppFuture:
57 |         return copy_app_future(self.plumed_input(), inputs=[self.external])
58 | 
59 |     def wait_for(self, result: AppFuture) -> None:
60 |         self.external = copy_app_future(
61 |             0,
62 |             inputs=[result, self.external],
63 |             outputs=[File(self.external.filepath)],
64 |         ).outputs[0]
65 | 
66 |     def reset(self) -> None:
67 |         self.external = psiflow.context().new_file("hills_", ".txt")
68 | 
69 |     def __eq__(self, other) -> bool:
70 |         if type(other) is not Metadynamics:
71 |             return False
72 |         return self.plumed_input() == other.plumed_input()
73 | 
74 |     def copy(self) -> Metadynamics:
75 |         new_external = copy_data_future(
76 |             inputs=[self.external],
77 |             outputs=[psiflow.context().new_file("hills_", ".txt")],
78 |         ).outputs[0]
79 |         mtd = Metadynamics(
80 |             str(self.plumed_input()),
81 |         )
82 |         assert "PLACEHOLDER" in mtd._plumed_input  # instead of original filepath
83 |         mtd.external = new_external
84 |         return mtd
85 | 


--------------------------------------------------------------------------------
/psiflow/sampling/optimize.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations  # necessary for type-guarding class methods
  2 | 
  3 | import xml.etree.ElementTree as ET
  4 | from typing import Optional, Union
  5 | 
  6 | import parsl
  7 | import typeguard
  8 | from ase.units import Bohr, Ha
  9 | from parsl.app.app import bash_app, join_app
 10 | from parsl.dataflow.futures import AppFuture
 11 | 
 12 | import psiflow
 13 | from psiflow.data import Dataset
 14 | from psiflow.data.utils import write_frames
 15 | from psiflow.geometry import Geometry
 16 | from psiflow.hamiltonians import Hamiltonian
 17 | from psiflow.sampling.sampling import setup_sockets, make_start_command, make_client_command
 18 | from psiflow.utils.io import save_xml
 19 | from psiflow.utils import TMP_COMMAND, CD_COMMAND
 20 | 
 21 | 
 22 | @typeguard.typechecked
 23 | def setup_forces(hamiltonian: Hamiltonian) -> tuple[dict[str, Hamiltonian], ET.Element]:
 24 |     hamiltonian = 1.0 * hamiltonian  # convert to mixture
 25 |     counts = {}
 26 |     hamiltonians_map = {}
 27 |     forces = ET.Element("forces")
 28 |     for h, c in zip(hamiltonian.hamiltonians, hamiltonian.coefficients):
 29 |         name = h.__class__.__name__
 30 |         if name not in counts:
 31 |             counts[name] = 0
 32 |         count = counts.get(name)
 33 |         counts[name] += 1
 34 |         force = ET.Element("force", forcefield=name + str(count), weight=str(c))
 35 |         forces.append(force)
 36 |         hamiltonians_map[name + str(count)] = h
 37 |     return hamiltonians_map, forces
 38 | 
 39 | 
 40 | @typeguard.typechecked
 41 | def setup_motion(
 42 |     mode: str,
 43 |     etol: float,
 44 |     ptol: float,
 45 |     ftol: float,
 46 | ) -> ET.Element:
 47 |     motion = ET.Element("motion", mode="minimize")
 48 |     optimizer = ET.Element("optimizer", mode=mode)
 49 |     tolerances = ET.Element("tolerances")
 50 | 
 51 |     energy = ET.Element("energy")
 52 |     energy.text = " {} ".format(etol / Ha)
 53 |     tolerances.append(energy)
 54 |     position = ET.Element("position")
 55 |     position.text = " {} ".format(ptol / Bohr)
 56 |     tolerances.append(position)
 57 |     force = ET.Element("force")
 58 |     force.text = " {} ".format(ftol / Ha * Bohr)
 59 |     tolerances.append(force)
 60 |     optimizer.append(tolerances)
 61 |     motion.append(optimizer)
 62 |     return motion
 63 | 
 64 | 
 65 | @typeguard.typechecked
 66 | def setup_output(keep_trajectory: bool) -> ET.Element:
 67 |     output = ET.Element("output", prefix="output")
 68 |     checkpoint = ET.Element(
 69 |         "checkpoint",
 70 |         filename="checkpoint",
 71 |         stride="1",
 72 |         overwrite="True",
 73 |     )
 74 |     output.append(checkpoint)
 75 |     if keep_trajectory:
 76 |         trajectory = ET.Element(  # needed in any case
 77 |             "trajectory",
 78 |             stride="1",
 79 |             format="ase",
 80 |             filename="trajectory",
 81 |             bead="0",
 82 |         )
 83 |         trajectory.text = r" positions "
 84 |         output.append(trajectory)
 85 |     return output
 86 | 
 87 | 
 88 | def _execute_ipi(
 89 |     hamiltonian_names: list[str],
 90 |     client_args: list[list[str]],
 91 |     keep_trajectory: bool,
 92 |     command_server: str,
 93 |     command_client: str,
 94 |     env_vars: dict = {},
 95 |     stdout: str = "",
 96 |     stderr: str = "",
 97 |     inputs: list = [],
 98 |     outputs: list = [],
 99 |     parsl_resource_specification: Optional[dict] = None,
100 | ) -> str:
101 |     env_command = 'export ' + ' '.join([f"{name}={value}" for name, value in env_vars.items()])
102 |     command_start = make_start_command(command_server, inputs[0], inputs[1])
103 |     commands_client = []
104 |     for i, name in enumerate(hamiltonian_names):
105 |         args = client_args[i]
106 |         assert len(args) == 1  # only have one client per hamiltonian
107 |         for arg in args:
108 |             commands_client += make_client_command(command_client, name, inputs[2 + i], inputs[1], arg),
109 | 
110 |     command_end = f'{command_server} --cleanup --output_xyz={outputs[0].filepath}'
111 |     command_copy = f'cp walker-0_output.trajectory_0.ase {outputs[1].filepath}' if keep_trajectory else ''
112 |     command_list = [
113 |         TMP_COMMAND,
114 |         CD_COMMAND,
115 |         env_command,
116 |         command_start,
117 |         *commands_client,
118 |         "wait",
119 |         command_end,
120 |         command_copy,
121 |     ]
122 |     return "\n".join(command_list)
123 | 
124 | 
125 | execute_ipi = bash_app(_execute_ipi, executors=["ModelEvaluation"])
126 | 
127 | 
128 | @typeguard.typechecked
129 | def optimize(
130 |     state: Union[Geometry, AppFuture],
131 |     hamiltonian: Hamiltonian,
132 |     steps: int = 5000,
133 |     keep_trajectory: bool = False,
134 |     mode: str = "lbfgs",
135 |     etol: float = 1e-3,
136 |     ptol: float = 1e-5,
137 |     ftol: float = 1e-3,
138 | ) -> Union[AppFuture, tuple[AppFuture, Dataset]]:
139 |     hamiltonians_map, forces = setup_forces(hamiltonian)
140 |     sockets = setup_sockets(hamiltonians_map)
141 | 
142 |     initialize = ET.Element("initialize", nbeads="1")
143 |     start = ET.Element("file", mode="ase", cell_units="angstrom")
144 |     start.text = " start_0.xyz "
145 |     initialize.append(start)
146 |     motion = setup_motion(mode, etol, ptol, ftol)
147 | 
148 |     system = ET.Element("system", prefix="walker-0")
149 |     system.append(initialize)
150 |     system.append(motion)
151 |     system.append(forces)
152 | 
153 |     output = setup_output(keep_trajectory)
154 | 
155 |     simulation = ET.Element("simulation", mode="static")
156 |     simulation.append(output)
157 |     for socket in sockets:
158 |         simulation.append(socket)
159 |     simulation.append(system)
160 |     total_steps = ET.Element("total_steps")
161 |     total_steps.text = " {} ".format(steps)
162 |     simulation.append(total_steps)
163 | 
164 |     context = psiflow.context()
165 |     definition = context.definitions["ModelEvaluation"]
166 |     input_future = save_xml(
167 |         simulation,
168 |         outputs=[context.new_file("input_", ".xml")],
169 |     ).outputs[0]
170 |     inputs = [
171 |         input_future,
172 |         Dataset([state]).extxyz,
173 |     ]
174 |     inputs += [h.serialize_function(dtype="float64") for h in hamiltonians_map.values()]
175 | 
176 |     hamiltonian_names = list(hamiltonians_map.keys())
177 |     client_args = []
178 |     for name in hamiltonian_names:
179 |         args = definition.get_client_args(name, 1, "minimize")
180 |         client_args.append(args)
181 |     outputs = [context.new_file("data_", ".xyz")]
182 |     if keep_trajectory:
183 |         outputs.append(context.new_file("opt_", ".xyz"))
184 | 
185 |     command_server = definition.server_command()
186 |     command_client = definition.client_command()
187 |     resources = definition.wq_resources(1)
188 | 
189 |     result = execute_ipi(
190 |         hamiltonian_names,
191 |         client_args,
192 |         keep_trajectory,
193 |         command_server,
194 |         command_client,
195 |         env_vars=definition.env_vars,
196 |         stdout=parsl.AUTO_LOGNAME,
197 |         stderr=parsl.AUTO_LOGNAME,
198 |         inputs=inputs,
199 |         outputs=outputs,
200 |         parsl_resource_specification=resources,
201 |     )
202 | 
203 |     final = Dataset(None, result.outputs[0]).evaluate(hamiltonian)[-1]
204 |     if keep_trajectory:
205 |         trajectory = Dataset(None, result.outputs[1])
206 |         return final, trajectory
207 |     else:
208 |         return final
209 | 
210 | 
211 | @join_app
212 | @typeguard.typechecked
213 | def _optimize_dataset(
214 |     geometries: list[Geometry], *args, outputs: list = [], **kwargs
215 | ) -> AppFuture:
216 |     assert not kwargs.get("keep_trajectory", False)
217 |     optimized = []
218 |     for geometry in geometries:
219 |         optimized.append(optimize(geometry, *args, **kwargs))
220 |     return write_frames(*optimized, outputs=[outputs[0]])
221 | 
222 | 
223 | @typeguard.typechecked
224 | def optimize_dataset(dataset: Dataset, *args, **kwargs) -> Dataset:
225 |     extxyz = _optimize_dataset(
226 |         dataset.geometries(),
227 |         *args,
228 |         outputs=[psiflow.context().new_file("data_", ".xyz")],
229 |         **kwargs,
230 |     ).outputs[0]
231 |     return Dataset(None, extxyz)
232 | 


--------------------------------------------------------------------------------
/psiflow/sampling/order.py:
--------------------------------------------------------------------------------
  1 | """
  2 | TODO: these imports are outdated.. Is this module still used?
  3 | """
  4 | from __future__ import annotations  # necessary for type-guarding class methods
  5 | 
  6 | from functools import partial
  7 | from typing import Optional, Union
  8 | 
  9 | import typeguard
 10 | from ase.units import kJ, mol
 11 | from parsl.app.app import python_app
 12 | from parsl.dataflow.futures import AppFuture
 13 | 
 14 | import psiflow
 15 | from psiflow.data import Dataset, batch_apply
 16 | from psiflow.geometry import Geometry
 17 | from psiflow.hamiltonians._plumed import PlumedHamiltonian
 18 | from psiflow.hamiltonians.hamiltonian import Hamiltonian
 19 | 
 20 | 
 21 | @typeguard.typechecked
 22 | def insert_in_state(
 23 |     state: Geometry,
 24 |     name: str,
 25 | ) -> Geometry:
 26 |     value = state.energy
 27 |     state.order[name] = value
 28 |     state.energy = None
 29 |     return state
 30 | 
 31 | 
 32 | @typeguard.typechecked
 33 | def _insert(
 34 |     state_or_states: Union[Geometry, list[Geometry]],
 35 |     name: str,
 36 | ) -> Union[list[Geometry], Geometry]:
 37 |     if not isinstance(state_or_states, list):
 38 |         return insert_in_state(state_or_states, name)
 39 |     else:
 40 |         for state in state_or_states:
 41 |             insert_in_state(state, name)  # modify list in place
 42 |         return state_or_states
 43 | 
 44 | 
 45 | insert = python_app(_insert, executors=["default_threads"])
 46 | 
 47 | 
 48 | @typeguard.typechecked
 49 | def insert_in_dataset(
 50 |     data: Dataset,
 51 |     name: str,
 52 | ) -> Dataset:
 53 |     geometries = insert(
 54 |         data.geometries(),
 55 |         name,
 56 |     )
 57 |     return Dataset(geometries)
 58 | 
 59 | 
 60 | @typeguard.typechecked
 61 | class OrderParameter:
 62 |     # TODO: batched evaluation
 63 | 
 64 |     def __init__(self, name: str):
 65 |         self.name = name
 66 | 
 67 |     def evaluate(self, state: Union[Geometry, AppFuture]) -> AppFuture:
 68 |         raise NotImplementedError
 69 | 
 70 |     def __eq__(self, other):
 71 |         raise NotImplementedError
 72 | 
 73 | 
 74 | @typeguard.typechecked
 75 | @psiflow.serializable
 76 | class HamiltonianOrderParameter(OrderParameter):
 77 |     name: str
 78 |     hamiltonian: Hamiltonian
 79 | 
 80 |     def __init__(self, name: str, hamiltonian: Hamiltonian):
 81 |         super().__init__(name)
 82 |         self.hamiltonian = hamiltonian
 83 | 
 84 |     def evaluate(
 85 |         self,
 86 |         arg: Union[Dataset, Geometry, AppFuture[Geometry]],
 87 |         batch_size: Optional[int] = 100,
 88 |     ) -> Union[Dataset, AppFuture]:
 89 |         if isinstance(arg, Dataset):
 90 |             # avoid batching the dataset twice:
 91 |             # apply hamiltonian in batched sense and put insert afterwards
 92 |             funcs = [
 93 |                 self.hamiltonian.single_evaluate,
 94 |                 partial(insert_in_dataset, name=self.name),
 95 |             ]
 96 |             future = batch_apply(
 97 |                 funcs,
 98 |                 batch_size,
 99 |                 arg.length(),
100 |                 inputs=[arg.extxyz],
101 |                 outputs=[psiflow.context().new_file("data_", ".xyz")],
102 |             )
103 |             return Dataset(None, future.outputs[0])
104 |         else:
105 |             state = self.hamiltonian.evaluate(arg)
106 |             return insert(state, self.name)
107 | 
108 |     def __eq__(self, other):
109 |         if type(other) is not HamiltonianOrderParameter:
110 |             return False
111 |         return self.hamiltonian == other.hamiltonian
112 | 
113 |     @classmethod
114 |     def from_plumed(
115 |         cls, name: str, hamiltonian: PlumedHamiltonian
116 |     ) -> HamiltonianOrderParameter:
117 |         assert name in hamiltonian.plumed_input()
118 |         action_prefixes = [
119 |             "ABMD",
120 |             "BIASVALUE",
121 |             "EXTENDED_LAGRANGIAN",
122 |             "EXTERNAL",
123 |             "LOWER_WALLS",
124 |             "MAXENT",
125 |             "METAD",
126 |             "MOVINGRESTRAINT",
127 |             "PBMETAD",
128 |             "RESTRAINT",
129 |             "UPPER_WALLS",
130 |             "RESTART",
131 |         ]
132 |         lines = hamiltonian.plumed_input().split("\n")
133 |         new_lines = []
134 |         for line in lines:
135 |             found = [p in line for p in action_prefixes]
136 |             if sum(found, start=False):
137 |                 continue
138 |             else:
139 |                 new_lines.append(line)
140 |         ev_to_kjmol = 1 / (
141 |             kJ / mol
142 |         )  # compensate plumed to ASE unit conversion of 'energy'
143 |         new_lines.append(
144 |             "rescaled: MATHEVAL ARG={} FUNC=x*{} PERIODIC=NO".format(name, ev_to_kjmol)
145 |         )
146 |         new_lines.append("BIASVALUE ARG=rescaled")
147 |         return HamiltonianOrderParameter(
148 |             name=name,
149 |             hamiltonian=PlumedHamiltonian(plumed_input="\n".join(new_lines)),
150 |         )
151 | 


--------------------------------------------------------------------------------
/psiflow/sampling/utils.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Optional
  2 | 
  3 | import numpy as np
  4 | import typeguard
  5 | from ase.data import chemical_symbols
  6 | 
  7 | from psiflow.functions import Function
  8 | 
  9 | # do not use psiflow apps; parsl config is not loaded in this process!
 10 | from psiflow.geometry import Geometry
 11 | 
 12 | # only import stuff which does not issue useless warnings; otherwise
 13 | # python -c 'import .client; print(client.__file__)' is going to be polluted
 14 | # with those import-related warnings
 15 | 
 16 | 
 17 | class ForceMagnitudeException(Exception):
 18 |     pass
 19 | 
 20 | 
 21 | class TimeoutException(Exception):
 22 |     pass
 23 | 
 24 | 
 25 | def timeout_handler(signum, frame):
 26 |     raise TimeoutException
 27 | 
 28 | 
 29 | @typeguard.typechecked
 30 | def check_forces(
 31 |     forces: np.ndarray,
 32 |     geometry: Any,
 33 |     max_force: float,
 34 | ):
 35 |     if not isinstance(geometry, Geometry):
 36 |         geometry = Geometry.from_atoms(geometry)
 37 | 
 38 |     exceeded = np.linalg.norm(forces, axis=1) > max_force
 39 |     if np.sum(exceeded):
 40 |         indices = np.arange(len(geometry))[exceeded]
 41 |         numbers = geometry.per_atom.numbers[exceeded]
 42 |         symbols = [chemical_symbols[n] for n in numbers]
 43 |         raise ForceMagnitudeException(
 44 |             "\nforce exceeded {} eV/A for atoms {}"
 45 |             " with chemical elements {}\n".format(
 46 |                 max_force,
 47 |                 indices,
 48 |                 symbols,
 49 |             )
 50 |         )
 51 |     else:
 52 |         pass
 53 | 
 54 | 
 55 | class FunctionDriver:
 56 | 
 57 |     def __init__(
 58 |         self,
 59 |         template: Geometry,
 60 |         function: Function,
 61 |         max_force: Optional[float],
 62 |         verbose: bool = True,  # used by i-PI internally?
 63 |         error_msg="",
 64 |     ):
 65 |         self.verbose = verbose
 66 |         self.template = template
 67 |         self.function = function
 68 |         self.max_force = max_force
 69 | 
 70 |     def check_arguments(self):
 71 |         pass
 72 | 
 73 |     def __call__(self, cell, pos):
 74 |         from ipi.utils.units import unit_to_internal, unit_to_user
 75 | 
 76 |         pos = unit_to_user("length", "angstrom", pos)
 77 |         cell = unit_to_user("length", "angstrom", cell.T)
 78 | 
 79 |         self.template.per_atom.positions[:] = pos
 80 |         if self.template.periodic:
 81 |             self.template.cell[:] = cell
 82 | 
 83 |         outputs = self.function(self.template)
 84 |         energy = outputs["energy"]
 85 |         forces = outputs["forces"]
 86 |         stress = outputs["stress"]
 87 | 
 88 |         # check for max_force
 89 |         if self.max_force is not None:
 90 |             check_forces(forces, self.template, self.max_force)
 91 | 
 92 |         # converts to internal quantities
 93 |         pot_ipi = np.asarray(
 94 |             unit_to_internal("energy", "electronvolt", energy), np.float64
 95 |         )
 96 |         force_ipi = np.asarray(unit_to_internal("force", "ev/ang", forces), np.float64)
 97 |         vir_calc = -stress * self.template.volume
 98 |         vir_ipi = np.array(
 99 |             unit_to_internal("energy", "electronvolt", vir_calc.T), dtype=np.float64
100 |         )
101 |         extras = ""
102 | 
103 |         return pot_ipi, force_ipi, vir_ipi, extras
104 | 


--------------------------------------------------------------------------------
/psiflow/utils/__init__.py:
--------------------------------------------------------------------------------
1 | TMP_COMMAND = 'tmpdir=$(mktemp -d -p /tmp "mytmpdir.XXXXXXXXXX" || mktemp -d -t "mytmpdir.XXXXXXXXXX")'
2 | CD_COMMAND = 'cd $tmpdir; echo "tmpdir: $PWD"'
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/psiflow/utils/_plumed.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | import typeguard
 5 | 
 6 | 
 7 | @typeguard.typechecked
 8 | def try_manual_plumed_linking() -> str:
 9 |     if "PLUMED_KERNEL" not in os.environ.keys():
10 |         # try linking manually
11 |         if "CONDA_PREFIX" in os.environ.keys():  # for conda environments
12 |             p = "CONDA_PREFIX"
13 |         elif "PREFIX" in os.environ.keys():  # for pip environments
14 |             p = "PREFIX"
15 |         else:
16 |             raise ValueError("failed to set plumed .so kernel")
17 |         path = os.environ[p] + "/lib/libplumedKernel.so"
18 |         if os.path.exists(path):
19 |             os.environ["PLUMED_KERNEL"] = path
20 |             logging.info("plumed kernel manually set at : {}".format(path))
21 |         else:
22 |             raise ValueError("plumed kernel not found at {}".format(path))
23 |     return os.environ["PLUMED_KERNEL"]
24 | 
25 | 
26 | @typeguard.typechecked
27 | def remove_comments_printflush(plumed_input: str) -> str:
28 |     new_input = []
29 |     for line in list(plumed_input.split("\n")):
30 |         pre_comment = line.strip().split("#")[0].strip()
31 |         if len(pre_comment) == 0:
32 |             continue
33 |         if pre_comment.startswith("PRINT"):
34 |             continue
35 |         if pre_comment.startswith("FLUSH"):
36 |             continue
37 |         new_input.append(pre_comment)
38 |     return "\n".join(new_input)
39 | 
40 | 
41 | @typeguard.typechecked
42 | def set_path_in_plumed(plumed_input: str, keyword: str, path_to_set: str) -> str:
43 |     lines = plumed_input.split("\n")
44 |     for i, line in enumerate(lines):
45 |         if keyword in line:
46 |             if "FILE=" not in line:
47 |                 lines[i] = line + " FILE={}".format(path_to_set)
48 |                 continue
49 |             line_before = line.split("FILE=")[0]
50 |             line_after = line.split("FILE=")[1].split()[1:]
51 |             lines[i] = (
52 |                 line_before + "FILE={} ".format(path_to_set) + " ".join(line_after)
53 |             )
54 |     return "\n".join(lines)
55 | 


--------------------------------------------------------------------------------
/psiflow/utils/apps.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations  # necessary for type-guarding class methods
  2 | 
  3 | import logging
  4 | import sys
  5 | from typing import Any, Union
  6 | 
  7 | import numpy as np
  8 | import typeguard
  9 | from parsl.app.app import python_app
 10 | from parsl.data_provider.files import File
 11 | 
 12 | 
 13 | @typeguard.typechecked
 14 | def get_attribute(obj: Any, *attribute_names: str) -> Any:
 15 |     for name in attribute_names:
 16 |         obj = getattr(obj, name)
 17 |     return obj
 18 | 
 19 | 
 20 | @typeguard.typechecked
 21 | def _boolean_or(*args: Union[bool, np.bool_]) -> bool:
 22 |     return any(args)
 23 | 
 24 | 
 25 | boolean_or = python_app(_boolean_or, executors=["default_threads"])
 26 | 
 27 | 
 28 | def _multiply(a, b):
 29 |     return a * b
 30 | 
 31 | 
 32 | multiply = python_app(_multiply, executors=["default_threads"])
 33 | 
 34 | 
 35 | @typeguard.typechecked
 36 | def setup_logger(module_name):
 37 |     # Create logger instance for the module
 38 |     module_logger = logging.getLogger(module_name)
 39 | 
 40 |     # Set the desired format string
 41 |     formatter = logging.Formatter("%(name)s - %(message)s")
 42 | 
 43 |     # Create handler to send logs to stdout
 44 |     stdout_handler = logging.StreamHandler(sys.stdout)
 45 |     stdout_handler.setFormatter(formatter)
 46 | 
 47 |     # Add handler to the logger instance
 48 |     module_logger.addHandler(stdout_handler)
 49 | 
 50 |     # Set the logging level for the logger
 51 |     module_logger.setLevel(logging.INFO)
 52 | 
 53 |     return module_logger
 54 | 
 55 | 
 56 | def _compute_sum(a, b):
 57 |     return np.add(a, b)
 58 | 
 59 | 
 60 | compute_sum = python_app(_compute_sum, executors=["default_threads"])
 61 | 
 62 | 
 63 | @typeguard.typechecked
 64 | def _combine_futures(inputs: list[Any]) -> list[Any]:
 65 |     return list(inputs)
 66 | 
 67 | 
 68 | combine_futures = python_app(_combine_futures, executors=["default_threads"])
 69 | 
 70 | 
 71 | @typeguard.typechecked
 72 | def _copy_data_future(
 73 |     pass_on_exist: bool = False,
 74 |     inputs: list[File] = [],
 75 |     outputs: list[File] = [],
 76 | ) -> None:
 77 |     import shutil
 78 |     from pathlib import Path
 79 | 
 80 |     assert len(inputs) == 1
 81 |     assert len(outputs) == 1
 82 |     if Path(outputs[0]).is_file() and pass_on_exist:
 83 |         return None
 84 |     if Path(inputs[0]).is_file():
 85 |         shutil.copyfile(inputs[0], outputs[0])
 86 |     else:  # no need to copy empty file
 87 |         pass
 88 | 
 89 | 
 90 | copy_data_future = python_app(_copy_data_future, executors=["default_threads"])
 91 | 
 92 | 
 93 | @typeguard.typechecked
 94 | def _copy_app_future(future: Any, inputs: list = [], outputs: list = []) -> Any:
 95 |     # inputs/outputs to enforce additional dependencies
 96 |     from copy import deepcopy
 97 | 
 98 |     return deepcopy(future)
 99 | 
100 | 
101 | copy_app_future = python_app(_copy_app_future, executors=["default_threads"])
102 | 
103 | 
104 | @typeguard.typechecked
105 | def _log_message(logger, message, *futures):
106 |     if len(futures) > 0:
107 |         logger.info(message.format(*futures))
108 |     else:
109 |         logger.info(message)
110 | 
111 | 
112 | log_message = python_app(_log_message, executors=["default_threads"])
113 | 
114 | 
115 | def _pack(*args):
116 |     return args
117 | 
118 | 
119 | pack = python_app(_pack, executors=["default_threads"])
120 | 
121 | 
122 | @typeguard.typechecked
123 | def _unpack_i(result: Union[np.ndarray, list, tuple], i: int) -> Any:
124 |     assert i <= len(result)
125 |     return result[i]
126 | 
127 | 
128 | unpack_i = python_app(_unpack_i, executors=["default_threads"])
129 | 
130 | 
131 | @typeguard.typechecked
132 | def _concatenate(*arrays: np.ndarray) -> np.ndarray:
133 |     return np.concatenate(arrays)
134 | 
135 | 
136 | concatenate = python_app(_concatenate, executors=["default_threads"])
137 | 
138 | 
139 | @typeguard.typechecked
140 | def _isnan(a: Union[float, np.ndarray]) -> bool:
141 |     return bool(np.any(np.isnan(a)))
142 | 
143 | 
144 | isnan = python_app(_isnan, executors=["default_threads"])
145 | 


--------------------------------------------------------------------------------
/psiflow/utils/io.py:
--------------------------------------------------------------------------------
  1 | import xml.etree.ElementTree as ET
  2 | from typing import Any
  3 | 
  4 | import numpy as np
  5 | import typeguard
  6 | from parsl.app.app import python_app
  7 | from parsl.data_provider.files import File
  8 | 
  9 | 
 10 | @typeguard.typechecked
 11 | def _save_yaml(
 12 |     input_dict: dict,
 13 |     outputs: list[File] = [],
 14 |     **extra_keys: Any,
 15 | ) -> None:
 16 |     import yaml
 17 | 
 18 |     def _make_dict_safe(arg):
 19 |         # walks through dict and converts numpy types to python natives
 20 |         for key in list(arg.keys()):
 21 |             if hasattr(arg[key], "item"):
 22 |                 arg[key] = arg[key].item()
 23 |             elif type(arg[key]) is dict:
 24 |                 arg[key] = _make_dict_safe(arg[key])
 25 |             else:
 26 |                 pass
 27 |         return arg
 28 | 
 29 |     input_dict = dict(input_dict)
 30 |     for key, value in extra_keys.items():
 31 |         assert key not in input_dict
 32 |         input_dict[key] = value
 33 |     input_dict = _make_dict_safe(input_dict)
 34 |     with open(outputs[0], "w") as f:
 35 |         yaml.dump(input_dict, f, default_flow_style=False)
 36 | 
 37 | 
 38 | save_yaml = python_app(_save_yaml, executors=["default_threads"])
 39 | 
 40 | 
 41 | @typeguard.typechecked
 42 | def _save_xml(
 43 |     element: ET.Element,
 44 |     outputs: list = [],
 45 | ) -> None:
 46 |     tree = ET.ElementTree(element)
 47 |     ET.indent(tree, "  ")
 48 |     tree.write(outputs[0], encoding="utf-8", xml_declaration=True)
 49 | 
 50 | 
 51 | save_xml = python_app(_save_xml, executors=["default_threads"])
 52 | 
 53 | 
 54 | @typeguard.typechecked
 55 | def _load_numpy(inputs: list[File] = [], **kwargs) -> np.ndarray:
 56 |     return np.loadtxt(inputs[0], **kwargs)
 57 | 
 58 | 
 59 | load_numpy = python_app(_load_numpy, executors=["default_threads"])
 60 | 
 61 | 
 62 | @typeguard.typechecked
 63 | def _read_yaml(inputs: list[File] = [], outputs: list[File] = []) -> dict:
 64 |     import yaml
 65 | 
 66 |     with open(inputs[0], "r") as f:
 67 |         config_dict = yaml.load(f, Loader=yaml.FullLoader)
 68 |     return config_dict
 69 | 
 70 | 
 71 | read_yaml = python_app(_read_yaml, executors=["default_threads"])
 72 | 
 73 | 
 74 | @typeguard.typechecked
 75 | def _save_txt(data: str, outputs: list[File] = []) -> None:
 76 |     with open(outputs[0], "w") as f:
 77 |         f.write(data)
 78 | 
 79 | 
 80 | save_txt = python_app(_save_txt, executors=["default_threads"])
 81 | 
 82 | 
 83 | @typeguard.typechecked
 84 | def _load_metrics(inputs: list = []) -> np.recarray:
 85 |     return np.load(inputs[0], allow_pickle=True)
 86 | 
 87 | 
 88 | load_metrics = python_app(_load_metrics, executors=["default_threads"])
 89 | 
 90 | 
 91 | @typeguard.typechecked
 92 | def _save_metrics(data: np.recarray, outputs: list = []) -> None:
 93 |     with open(outputs[0], "wb") as f:
 94 |         data.dump(f)
 95 | 
 96 | 
 97 | save_metrics = python_app(_save_metrics, executors=["default_threads"])
 98 | 
 99 | 
100 | @typeguard.typechecked
101 | def _dump_json(
102 |     inputs: list = [],
103 |     outputs: list = [],
104 |     **kwargs,
105 | ) -> None:
106 |     import json
107 | 
108 |     import numpy as np
109 | 
110 |     def convert_to_list(array):
111 |         if not type(array) is np.ndarray:
112 |             if type(array) is np.floating:
113 |                 return float(array)
114 |             return array
115 |         as_list = []
116 |         for item in array:
117 |             as_list.append(convert_to_list(item))
118 |         return as_list
119 | 
120 |     for name in list(kwargs.keys()):
121 |         value = kwargs[name]
122 |         if type(value) is np.ndarray:
123 |             value = convert_to_list(value)
124 |         elif type(value) is np.floating:
125 |             value = float(value)
126 |         kwargs[name] = value
127 |     with open(outputs[0], "w") as f:
128 |         f.write(json.dumps(kwargs))
129 | 
130 | 
131 | dump_json = python_app(_dump_json, executors=["default_threads"])
132 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | 
 6 | [project]
 7 | name = "psiflow"
 8 | version = "4.0.0"
 9 | description = "Library for developing interatomic potentials"
10 | readme = "README.md"
11 | requires-python = ">=3.10"
12 | dependencies = [
13 |     "ase>=3.23.0",
14 |     "pyyaml>=6.0",
15 |     "numpy>=1.22.3, <2",
16 |     "parsl==2024.12.16",
17 |     "prettytable",
18 |     "psutil",
19 |     "cp2k-input-tools @ git+https://github.com/cp2k/cp2k-input-tools.git@3b9929735dcb3c8c0620a548b1fe20efecbad077",  # need 2024.1
20 |     "pytimeparse",
21 |     ]
22 | 
23 | 
24 | [project.scripts]
25 | psiflow-client = "psiflow.sampling.client:main"
26 | psiflow-server = "psiflow.sampling.server:main"
27 | psiflow-mace-train = "psiflow.models.mace_utils:main"
28 | psiflow-ase-opt = "psiflow.sampling._ase:main"
29 | 
30 | 
31 | [project.optional-dependencies]
32 | docs = [
33 |     "mkdocs>=1.4.2",
34 |     "mkdocs-autorefs>=0.4.1",
35 |     "mkdocs-material>=9.0.3",
36 |     "mkdocs-material-extensions>=1.1.1",
37 |     "mkdocstrings>=0.19.1",
38 |     "mkdocstrings-python>=0.8.3",
39 |     ]
40 | dev = [
41 |     "pre-commit",
42 |     "black",
43 |     "isort",
44 |     "flake8",
45 |     "flake8-bugbear",
46 |     "flake8-pyproject",
47 |     "pytest>=7.2.0",
48 |     "coverage>=6.5.0",
49 |     "coveralls>=3.3.1",
50 |     ]
51 | 
52 | 
53 | [tool.setuptools.packages.find]
54 | include = [
55 |     "psiflow",
56 |     "psiflow.models",
57 |     "psiflow.data",
58 |     "psiflow.reference",
59 |     "psiflow.sampling",
60 |     "psiflow.utils",
61 |     "psiflow.free_energy",
62 |     ]
63 | 
64 | 
65 | [tool.flake8]
66 | max-line-length = 88
67 | extend-ignore = ["E203", "E501", "E704", "B006"]
68 | #select = C,E,F,W,B,B950
69 | 
70 | [tool.isort]
71 | profile = "black"
72 | 
73 | [tool.pytest.ini_options]
74 | log_cli = 0
75 | addopts = [
76 |     "--basetemp=pytest-tmp", # /tmp/ may be different for each worker!
77 |     "--import-mode=append",
78 |     "--psiflow-config=configs/threadpool.yaml",
79 |     "-W ignore::DeprecationWarning",
80 |     "--log-cli-level=WARNING",
81 | ]
82 | testpaths = ["tests"]
83 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | import xml.etree.ElementTree as ET
  2 | from dataclasses import asdict
  3 | from pathlib import Path
  4 | 
  5 | import numpy as np
  6 | import parsl
  7 | import pytest
  8 | import yaml
  9 | from ase import Atoms
 10 | from ase.build import bulk, make_supercell
 11 | from ase.calculators.emt import EMT
 12 | 
 13 | import psiflow
 14 | from psiflow.data import Dataset
 15 | from psiflow.geometry import Geometry
 16 | from psiflow.models import MACE, MACEConfig
 17 | 
 18 | 
 19 | def pytest_addoption(parser):
 20 |     parser.addoption(
 21 |         "--psiflow-config",
 22 |         action="store",
 23 |         help="test",
 24 |     )
 25 |     parser.addoption(
 26 |         "--skip-gpu",
 27 |         action="store_true",
 28 |         default=False,
 29 |         help="whether to run tests which require a GPU",
 30 |     )
 31 | 
 32 | 
 33 | @pytest.fixture(scope="session")
 34 | def gpu(request):
 35 |     if request.config.getoption("--skip-gpu"):
 36 |         pytest.skip("skipping tests which require GPU")
 37 | 
 38 | 
 39 | @pytest.fixture(scope="session", autouse=True)
 40 | def context(request, tmp_path_factory):
 41 |     try:
 42 |         context = psiflow.context()
 43 |     except RuntimeError:
 44 |         path_config = Path(request.config.getoption("--psiflow-config"))
 45 |         with open(path_config, "r") as f:
 46 |             psiflow_config = yaml.safe_load(f)
 47 |         psiflow_config["path"] = tmp_path_factory.mktemp("psiflow_internal")
 48 |         psiflow.load(psiflow_config)
 49 |         context = psiflow.context()  # noqa: F841
 50 |         yield
 51 |         parsl.dfk().cleanup()
 52 | 
 53 | 
 54 | @pytest.fixture(scope="session")
 55 | def mace_config():
 56 |     mace_config = MACEConfig()
 57 |     mace_config.num_radial_basis = 3
 58 |     mace_config.num_cutoff_basis = 2
 59 |     mace_config.max_ell = 1
 60 |     mace_config.correlation = 1
 61 |     mace_config.MLP_irreps = "2x0e"
 62 |     mace_config.num_channels = 2
 63 |     mace_config.max_L = 0
 64 |     mace_config.r_max = 4
 65 |     mace_config.radial_MLP = "[4]"
 66 |     return asdict(mace_config)
 67 | 
 68 | 
 69 | def generate_emt_cu_data(nstates, amplitude, supercell=None):
 70 |     if supercell is None:
 71 |         supercell = np.eye(3)
 72 |     atoms = make_supercell(bulk("Cu", "fcc", a=3.6, cubic=True), supercell)
 73 |     atoms.calc = EMT()
 74 |     pos = atoms.get_positions()
 75 |     box = atoms.get_cell()
 76 |     atoms_list = []
 77 |     for _ in range(nstates):
 78 |         atoms.set_positions(
 79 |             pos + np.random.uniform(-amplitude, amplitude, size=(len(atoms), 3))
 80 |         )
 81 |         atoms.set_cell(box + np.random.uniform(-amplitude, amplitude, size=(3, 3)))
 82 |         _atoms = atoms.copy()
 83 |         _atoms.calc = None
 84 |         _atoms.info["energy"] = atoms.get_potential_energy()
 85 |         _atoms.info["stress"] = atoms.get_stress(voigt=False)
 86 |         _atoms.arrays["forces"] = atoms.get_forces()
 87 |         # make content heterogeneous to test per_element functions
 88 |         _atoms.numbers[0] = 1
 89 |         _atoms.symbols[0] = "H"
 90 |         atoms_list.append(_atoms)
 91 |     return atoms_list
 92 | 
 93 | 
 94 | @pytest.fixture
 95 | def dataset(context):
 96 |     data = generate_emt_cu_data(20, 0.2)
 97 |     data += generate_emt_cu_data(5, 0.15, supercell=np.diag([1, 2, 1]))
 98 |     data_ = [Geometry.from_atoms(atoms) for atoms in data]
 99 |     return Dataset(data_).align_axes()
100 | 
101 | 
102 | @pytest.fixture(scope="session")
103 | def mace_model(mace_config):
104 |     # manually recreate dataset with 'session' scope
105 |     data = generate_emt_cu_data(20, 0.2)
106 |     data_ = [Geometry.from_atoms(atoms) for atoms in data]
107 |     dataset = Dataset(data_)
108 |     model = MACE(**mace_config)
109 |     # add additional state to initialize other atomic numbers
110 |     # mace cannot handle partially periodic datasets
111 |     geometry = Geometry.from_data(
112 |         numbers=np.array(2 * [101]),
113 |         positions=np.array([[0, 0, 0], [2, 0, 0]]),
114 |         cell=2 * np.eye(3),
115 |     )
116 |     geometry.energy = -1.0
117 |     geometry.per_atom.forces[:] = np.random.uniform(size=(2, 3))
118 |     model.initialize(dataset[:5] + Dataset([geometry]))
119 |     return model
120 | 
121 | 
122 | @pytest.fixture
123 | def dataset_h2(context):
124 |     h2 = Atoms(
125 |         numbers=[1, 1],
126 |         positions=[[0, 0, 0], [0.74, 0, 0]],
127 |         pbc=False,
128 |     )
129 |     data = [h2.copy() for i in range(20)]
130 |     for atoms in data:
131 |         atoms.set_positions(
132 |             atoms.get_positions() + np.random.uniform(-0.05, 0.05, size=(2, 3))
133 |         )
134 |     return Dataset([Geometry.from_atoms(a) for a in data])
135 | 
136 | 
137 | @pytest.fixture
138 | def checkpoint():
139 |     checkpoint_str = """
140 | <simulation>
141 |    <output prefix='output'>
142 |       <checkpoint filename='checkpoint' stride='10'>1</checkpoint>
143 |       <properties shape='(3)' filename='properties' stride='10'> [ time, temperature, potential ] </properties>
144 |    </output>
145 |    <total_steps>100</total_steps>
146 |    <ffsocket mode='unix' name='EinsteinCrystal0'>
147 |       <address>cSzwsJ2A/einsteincrystal0</address>
148 |       <timeout>  8.33333333e-02</timeout>
149 |    </ffsocket>
150 |    <ffsocket mode='unix' name='PlumedHamiltonian0'>
151 |       <address>cSzwsJ2A/plumedhamiltonian0</address>
152 |       <timeout>  8.33333333e-02</timeout>
153 |    </ffsocket>
154 |    <system prefix='walker-0'>
155 |       <forces>
156 |          <force forcefield='EinsteinCrystal0'>
157 |          </force>
158 |          <force forcefield='PlumedHamiltonian0'>
159 |          </force>
160 |       </forces>
161 |       <ensemble>
162 |          <temperature>  1.90008912e-03</temperature>
163 |          <eens>  4.11423554e-03</eens>
164 |          <hamiltonian_weights shape='(1)'> [   1.00000000e+00 ] </hamiltonian_weights>
165 |          <time>  2.06706865e+02</time>
166 |       </ensemble>
167 |       <motion mode='dynamics'>
168 |          <dynamics mode='nvt'>
169 |             <thermostat mode='langevin'>
170 |                <tau>  4.13413730e+03</tau>
171 |             </thermostat>
172 |             <timestep>  2.06706865e+01</timestep>
173 |             <nmts shape='(1)'> [ 1 ] </nmts>
174 |          </dynamics>
175 |       </motion>
176 |       <beads natoms='4' nbeads='1'>
177 |          <q shape='(1, 12)'>
178 |           [   1.44513572e-01,  -2.22608601e-02,   6.90340566e-02,  -1.48068714e-01,   3.67026570e+00,
179 |               3.24415892e+00,   3.09455639e+00,  -2.66306646e-01,   3.36282329e+00,   3.54200180e+00,
180 |               3.39685661e+00,   5.46722856e-01 ]
181 |          </q>
182 |          <p shape='(1, 12)'>
183 |           [   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
184 |               0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
185 |               0.00000000e+00,   0.00000000e+00 ]
186 |          </p>
187 |          <m shape='(4)'> [   1.83747161e+03,   1.15837273e+05,   1.15837273e+05,   1.15837273e+05 ] </m>
188 |          <names shape='(4)'> [ H, Cu, Cu, Cu ] </names>
189 |       </beads>
190 |       <cell shape='(3, 3)'>
191 |        [   1e+00,   1e-01,  0,   0.00000000e+00,   2e+00,
192 |           0,   0.00000000e+00,   0.00000000e+00,   3e+00 ]
193 |       </cell>
194 |    </system>
195 |    <system prefix='walker-1'>
196 |       <forces>
197 |          <force forcefield='EinsteinCrystal0'>
198 |          </force>
199 |          <force weight='  0.00000000e+00' forcefield='PlumedHamiltonian0'>
200 |          </force>
201 |       </forces>
202 |       <ensemble>
203 |          <temperature>  1.90008912e-03</temperature>
204 |          <eens>  4.11423554e-03</eens>
205 |          <hamiltonian_weights shape='(1)'> [   1.00000000e+00 ] </hamiltonian_weights>
206 |          <time>  2.06706865e+02</time>
207 |       </ensemble>
208 |       <motion mode='dynamics'>
209 |          <dynamics mode='nvt'>
210 |             <thermostat mode='langevin'>
211 |                <tau>  4.13413730e+03</tau>
212 |             </thermostat>
213 |             <timestep>  2.06706865e+01</timestep>
214 |             <nmts shape='(1)'> [ 1 ] </nmts>
215 |          </dynamics>
216 |       </motion>
217 |       <beads natoms='4' nbeads='1'>
218 |          <q shape='(1, 12)'>
219 |           [   1.44513572e-01,  -2.22608601e-02,   6.90340566e-02,  -1.48068714e-01,   3.67026570e+00,
220 |               3.24415892e+00,   3.09455639e+00,  -2.66306646e-01,   3.36282329e+00,   3.54200180e+00,
221 |               3.39685661e+00,   5.46722856e-01 ]
222 |          </q>
223 |          <p shape='(1, 12)'>
224 |           [   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
225 |               0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
226 |               0.00000000e+00,   0.00000000e+00 ]
227 |          </p>
228 |          <m shape='(4)'> [   1.83747161e+03,   1.15837273e+05,   1.15837273e+05,   1.15837273e+05 ] </m>
229 |          <names shape='(4)'> [ H, Cu, Cu, Cu ] </names>
230 |       </beads>
231 |       <cell shape='(3, 3)'>
232 |        [   6.92067797e+00,   1.35926184e-01,  -3.29542567e-02,   0.00000000e+00,   6.46614176e+00,
233 |           -3.74701247e-01,   0.00000000e+00,   0.00000000e+00,   6.45073059e+00 ]
234 |       </cell>
235 |    </system>
236 | </simulation>
237 | """
238 |     return ET.ElementTree(element=ET.fromstring(checkpoint_str))
239 | 


--------------------------------------------------------------------------------
/tests/test_free_energy.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from ase.units import _c, kB, second
  3 | 
  4 | from psiflow.free_energy import (
  5 |     Integration,
  6 |     compute_frequencies,
  7 |     compute_harmonic,
  8 |     harmonic_free_energy,
  9 | )
 10 | from psiflow.geometry import check_equality
 11 | from psiflow.hamiltonians import EinsteinCrystal, Harmonic, MACEHamiltonian
 12 | from psiflow.sampling.ase import optimize
 13 | 
 14 | 
 15 | def test_integration_simple(dataset):
 16 |     dataset = dataset[:10]
 17 |     einstein = EinsteinCrystal(dataset[1], force_constant=2)
 18 |     geometry = optimize(
 19 |         dataset[3],
 20 |         einstein,
 21 |         mode='fix_cell',
 22 |         f_max=1e-4,
 23 |     )
 24 |     hessian = compute_harmonic(
 25 |         geometry,
 26 |         einstein,
 27 |         pos_shift=5e-4,
 28 |     )
 29 |     harmonic = Harmonic(geometry, hessian)
 30 | 
 31 |     integration = Integration(
 32 |         harmonic,
 33 |         temperatures=[300, 400],
 34 |         delta_hamiltonian=(-0.1) * harmonic,
 35 |         delta_coefficients=np.array([0.0, 0.5, 1.0]),
 36 |     )
 37 |     walkers = integration.create_walkers(
 38 |         dataset,
 39 |         initialize_by="quench",
 40 |     )
 41 |     for walker in walkers:
 42 |         assert check_equality(walker.start, dataset[1]).result()
 43 | 
 44 |     assert len(integration.states) == 6
 45 | 
 46 |     integration.sample(steps=100, step=6)
 47 |     integration.compute_gradients()
 48 |     for i, state in enumerate(integration.states):
 49 |         assert state.gradients["delta"] is not None
 50 |         assert state.gradients["temperature"] is not None
 51 | 
 52 |         # manual computation of delta gradient
 53 |         delta = -0.1 * harmonic
 54 |         energies = delta.compute(integration.outputs[i].trajectory, "energy")
 55 |         assert np.allclose(
 56 |             state.gradients["delta"].result(),
 57 |             np.mean(energies.result()) / (kB * state.temperature),
 58 |         )
 59 | 
 60 |     hessian = hessian.result()
 61 |     frequencies0 = compute_frequencies(hessian, geometry)
 62 |     frequencies1 = compute_frequencies(hessian * 0.9, geometry)
 63 |     F0 = harmonic_free_energy(frequencies0, 300).result()
 64 |     F1 = harmonic_free_energy(frequencies1, 300).result()
 65 | 
 66 |     integrated = integration.along_delta(temperature=300).result()
 67 |     assert len(integrated) == 3
 68 |     print("\nalong delta")
 69 |     print("   computed delta_F: {}".format(integrated[-1]))
 70 |     print("theoretical delta_F: {}".format(F1 - F0))
 71 |     print("")
 72 | 
 73 |     # integrated = integration.along_temperature(delta_coefficient=1.0).result()
 74 |     # assert len(integrated) == 2
 75 |     # assert np.allclose(integrated[0], 0.0)
 76 |     # F2 = np.sum(compute_free_energy(frequencies, 400).result())
 77 |     # print('\nalong temperature')
 78 |     # print('   computed delta_F: {}'.format(integrated[-1] / (kB * 400)))
 79 |     # print('theoretical delta_F: {}'.format(F2 / (kB * 400) - F1 / (kB * 300)))
 80 | 
 81 | 
 82 | def test_integration_temperature(dataset):
 83 |     einstein = EinsteinCrystal(dataset[0], force_constant=1)
 84 |     integration = Integration(
 85 |         hamiltonian=einstein,
 86 |         temperatures=[300, 400],
 87 |         pressure=0.0,
 88 |     )
 89 |     integration.create_walkers(dataset[:3])
 90 |     integration.sample(steps=10, step=1)
 91 |     integration.compute_gradients()
 92 |     gradient0 = integration.states[0].gradients["temperature"]
 93 | 
 94 |     integration = Integration(
 95 |         hamiltonian=einstein,
 96 |         temperatures=[300, 400],
 97 |     )
 98 |     integration.create_walkers(dataset[:3])
 99 |     integration.sample(steps=10, step=1)
100 |     integration.compute_gradients()
101 |     gradient1 = integration.states[0].gradients["temperature"]
102 |     assert np.allclose(gradient0.result(), gradient1.result())
103 | 
104 | 
105 | def test_phonons(dataset):
106 |     reference = dataset[2].result()
107 |     constant = 10
108 |     einstein = EinsteinCrystal(reference, force_constant=constant)
109 | 
110 |     hessian = compute_harmonic(
111 |         reference,
112 |         einstein,
113 |         asr="none",  # einstein == translationally VARIANT
114 |     )
115 |     assert np.allclose(
116 |         hessian.result(), constant * np.eye(3 * len(reference)), rtol=1e-4
117 |     )
118 | 
119 | 
120 | def test_dihydrogen(dataset_h2):
121 |     geometry = dataset_h2[0].result()
122 |     geometry.cell = 20 * np.eye(3)
123 |     hamiltonian = MACEHamiltonian.mace_mp0("small")
124 |     optimized = optimize(
125 |         geometry,
126 |         hamiltonian,
127 |         mode='fix_cell',
128 |         f_max=1e-4,
129 |     ).result()
130 |     assert optimized.energy is not None
131 |     assert np.linalg.norm(optimized.per_atom.forces) < 1e-2
132 |     hessian = compute_harmonic(
133 |         optimized,
134 |         hamiltonian,
135 |         asr="crystal",
136 |         pos_shift=0.001,
137 |     )
138 |     frequencies = compute_frequencies(hessian, geometry).result()
139 |     # check that highest frequency in inv cm corresponds to 3500 - 4000
140 |     frequencies_invcm = (frequencies * second) / (_c * 1e2)  # in invcm
141 |     assert np.abs(frequencies_invcm[-1] - 4000) < 1000
142 | 
143 | 
144 | def test_frequency_oscillator():
145 |     for quantum in [True, False]:
146 |         f0 = harmonic_free_energy(1.0, 300, quantum=quantum).result()
147 |         f1 = harmonic_free_energy(1.1, 300, quantum=quantum).result()
148 |         assert f1 > f0
149 | 
150 |         f2 = harmonic_free_energy(1.0, 400, quantum=quantum).result()
151 |         assert f0 > f2
152 | 


--------------------------------------------------------------------------------
/tests/test_models.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | 
  3 | import numpy as np
  4 | from parsl.app.futures import DataFuture
  5 | 
  6 | import psiflow
  7 | from psiflow.data import compute_rmse
  8 | from psiflow.hamiltonians import MACEHamiltonian
  9 | from psiflow.models import MACE, load_model
 10 | 
 11 | 
 12 | def test_mace_init(mace_config, dataset):
 13 |     model = MACE(**mace_config)
 14 |     assert "model_future" in model._files
 15 |     assert model.model_future is None
 16 |     model.initialize(dataset[:1])
 17 |     assert model.model_future is not None
 18 | 
 19 |     _config = model._config
 20 | 
 21 |     data_str = psiflow.serialize(model).result()
 22 |     model = psiflow.deserialize(data_str)
 23 | 
 24 |     _config_ = model._config
 25 |     for key, value in _config.items():
 26 |         assert key in _config_
 27 |         if type(value) is not list:
 28 |             assert value == _config_[key]
 29 | 
 30 |     config = copy.deepcopy(mace_config)
 31 |     config["batch_size"] = (
 32 |         100000  # bigger than ntrain --> should get reduced internally
 33 |     )
 34 |     model = MACE(**config)
 35 |     model.seed = 1
 36 |     model.initialize(dataset[:3])
 37 |     assert isinstance(model.model_future, DataFuture)
 38 | 
 39 |     # create hamiltonian and verify addition of atomic energies
 40 |     hamiltonian = model.create_hamiltonian()
 41 |     assert hamiltonian == model.create_hamiltonian()
 42 |     energies = hamiltonian.compute(dataset, "energy").result()
 43 | 
 44 |     nstates = dataset.length().result()
 45 |     # energies = np.array([evaluated[i].result().energy for i in range(nstates)])
 46 |     assert not np.any(np.allclose(energies, 0.0))
 47 |     energy_Cu = 3
 48 |     energy_H = 7
 49 |     atomic_energies = {
 50 |         "Cu": energy_Cu,
 51 |         "H": energy_H,
 52 |     }
 53 |     hamiltonian = MACEHamiltonian(
 54 |         hamiltonian.external,
 55 |         atomic_energies=atomic_energies,
 56 |     )
 57 |     assert hamiltonian != model.create_hamiltonian()  # atomic energies
 58 | 
 59 |     evaluated = dataset.evaluate(hamiltonian)
 60 |     for i in range(nstates):
 61 |         assert np.allclose(
 62 |             energies[i],
 63 |             evaluated.subtract_offset(Cu=energy_Cu, H=energy_H)[i].result().energy,
 64 |         )
 65 | 
 66 |     energies = hamiltonian.compute(dataset, "energy").result()
 67 |     second = psiflow.deserialize(psiflow.serialize(hamiltonian).result())
 68 |     energies_ = second.compute(dataset, "energy").result()
 69 |     assert np.allclose(energies, energies_)
 70 | 
 71 |     hamiltonian = model.create_hamiltonian()
 72 |     model.reset()
 73 |     model.initialize(dataset[:3])
 74 |     assert hamiltonian != model.create_hamiltonian()
 75 | 
 76 | 
 77 | def test_mace_train(gpu, mace_config, dataset, tmp_path):
 78 |     # as an additional verification, this test can be executed while monitoring
 79 |     # the mace logging, and in particular the rmse_r during training, to compare
 80 |     # it with the manually computed value
 81 |     training = dataset[:-5]
 82 |     validation = dataset[-5:]
 83 |     mace_config["start_swa"] = 100
 84 |     model = MACE(**mace_config)
 85 |     model.initialize(training)
 86 |     hamiltonian0 = model.create_hamiltonian()
 87 |     rmse0 = compute_rmse(
 88 |         validation.get("per_atom_energy"),
 89 |         validation.evaluate(hamiltonian0).get("per_atom_energy"),
 90 |     )
 91 |     model.train(training, validation)
 92 |     hamiltonian1 = model.create_hamiltonian()
 93 |     rmse1 = compute_rmse(
 94 |         validation.get("per_atom_energy"),
 95 |         validation.evaluate(hamiltonian1).get("per_atom_energy"),
 96 |     )
 97 |     assert rmse0.result() > rmse1.result()
 98 | 
 99 | 
100 | def test_mace_save_load(mace_config, dataset, tmp_path):
101 |     model = MACE(**mace_config)
102 |     model.add_atomic_energy("H", 3)
103 |     model.add_atomic_energy("Cu", 4)
104 |     model.save(tmp_path)
105 |     model.initialize(dataset[:2])
106 |     e0 = model.create_hamiltonian().compute(dataset[3], "energy").result()
107 | 
108 |     psiflow.wait()
109 |     assert (tmp_path / "MACE.yaml").exists()
110 |     assert not (tmp_path / "MACE.pth").exists()
111 | 
112 |     model.save(tmp_path)
113 |     psiflow.wait()
114 |     assert (tmp_path / "MACE.pth").exists()
115 | 
116 |     model_ = load_model(tmp_path)
117 |     assert type(model_) is MACE
118 |     assert model_.model_future is not None
119 |     e1 = model_.create_hamiltonian().compute(dataset[3], "energy").result()
120 |     assert np.allclose(e0, e1, atol=1e-4)  # up to single precision
121 | 
122 | 
123 | def test_mace_seed(mace_config):
124 |     model = MACE(**mace_config)
125 |     assert model.seed == 0
126 |     model.seed = 111
127 |     assert model.seed == 111
128 |     model._config["seed"] = 112
129 |     assert model.seed == 112
130 | 


--------------------------------------------------------------------------------
/tests/test_serialization.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | from pathlib import Path
  4 | from typing import Optional, Union
  5 | 
  6 | import pytest
  7 | import typeguard
  8 | from parsl.data_provider.files import File
  9 | from parsl.dataflow.futures import AppFuture
 10 | 
 11 | import psiflow
 12 | from psiflow.data import Dataset
 13 | from psiflow.geometry import Geometry, NullState, new_nullstate
 14 | from psiflow.utils.apps import copy_app_future
 15 | 
 16 | 
 17 | def test_serial_simple(tmp_path):
 18 |     @psiflow.serializable
 19 |     class SomeSerial:
 20 |         pass
 21 | 
 22 |     @typeguard.typechecked
 23 |     class Test:
 24 |         foo: int
 25 |         bar: psiflow._DataFuture
 26 |         baz: Union[float, str]
 27 |         bam: Optional[SomeSerial]
 28 |         bao: SomeSerial
 29 |         bap: list[SomeSerial, ...]
 30 |         baq: Union[Geometry, AppFuture]
 31 |         bas: Geometry
 32 | 
 33 |         def __init__(self, **kwargs):
 34 |             for key, value in kwargs.items():
 35 |                 setattr(self, key, value)
 36 | 
 37 |     new_cls = psiflow.serializable(Test)
 38 |     instance = new_cls(
 39 |         foo=3,
 40 |         bar=File("asdfl"),
 41 |         baz="asdflk",
 42 |         bam=None,
 43 |         bao=SomeSerial(),
 44 |         bap=[SomeSerial(), SomeSerial()],
 45 |         baq=copy_app_future(NullState),
 46 |         bas=new_nullstate(),
 47 |     )
 48 |     assert instance.foo == 3
 49 |     assert instance._attrs["foo"] == 3
 50 | 
 51 |     # test independence
 52 |     instance._attrs["test"] = 1
 53 |     instance_ = new_cls(foo=4, bar=File("asdfl"))
 54 |     assert "test" not in instance_._attrs
 55 |     assert instance_.foo == 4
 56 |     assert instance.foo == 3
 57 | 
 58 |     assert tuple(instance._files.keys()) == ("bar",)
 59 |     assert tuple(instance._attrs.keys()) == ("foo", "baz", "test")
 60 |     assert tuple(instance._serial.keys()) == ("bam", "bao", "bap")
 61 |     assert type(instance._serial["bap"]) is list
 62 |     assert len(instance._serial["bap"]) == 2
 63 |     assert len(instance._geoms) == 2
 64 |     assert "baq" in instance._geoms
 65 |     assert "bas" in instance._geoms
 66 | 
 67 |     # serialization/deserialization of 'complex' Test instance
 68 |     json_dump = psiflow.serialize(instance).result()
 69 |     instance_ = psiflow.deserialize(json_dump, custom_cls=[new_cls, SomeSerial])
 70 | 
 71 |     assert instance.foo == instance_.foo
 72 |     assert instance.bar.filepath == instance_.bar.filepath
 73 |     assert instance.baz == instance_.baz
 74 |     assert instance.bam == instance_.bam
 75 |     assert type(instance_.bao) is SomeSerial
 76 |     assert len(instance_.bap) == 2
 77 |     assert type(instance_.bap[0]) is SomeSerial
 78 |     assert type(instance_.bap[1]) is SomeSerial
 79 |     assert id(instance) != id(instance_)
 80 |     assert isinstance(instance_.baq, Geometry)
 81 |     assert instance_.baq == NullState
 82 |     assert instance_.bas == NullState
 83 | 
 84 |     # check classes created before test execution, e.g. Dataset
 85 |     data = Dataset([NullState])
 86 |     assert "extxyz" in data._files
 87 |     assert len(data._attrs) == 0
 88 |     assert len(data._serial) == 0
 89 |     with pytest.raises(typeguard.TypeCheckError):  # try something stupid
 90 |         data.extxyz = 0
 91 | 
 92 |     # test getter / setter
 93 |     data.extxyz = File("some_file")
 94 |     assert type(data.extxyz) is File
 95 | 
 96 |     # test basic serialization
 97 |     dumped_json = psiflow.serialize(data).result()
 98 |     assert "Dataset" in dumped_json
 99 |     data_dict = json.loads(dumped_json)
100 |     assert len(data_dict["Dataset"]["_attrs"]) == 0
101 |     assert len(data_dict["Dataset"]["_serial"]) == 0
102 |     assert len(data_dict["Dataset"]["_files"]) == 1
103 |     assert data_dict["Dataset"]["_files"]["extxyz"] == data.extxyz.filepath
104 | 
105 |     # test copy_to serialization
106 |     data = Dataset([NullState])
107 |     data.extxyz.result()
108 |     filename = Path(data.extxyz.filepath).name
109 |     assert os.path.exists(data.extxyz.filepath)
110 |     dumped_json = psiflow.serialize(data, copy_to=tmp_path / "test").result()
111 |     os.remove(data.extxyz.filepath)
112 |     assert (tmp_path / "test").exists()
113 |     assert (tmp_path / "test" / filename).exists()  # new file
114 | 


--------------------------------------------------------------------------------