├── .git_archival.txt
├── .gitattributes
├── .github
    ├── dependabot.yml
    └── workflows
    │   └── mirror_gitee.yml
├── .gitignore
├── LICENSE
├── README.md
├── deepks
    ├── __init__.py
    ├── __main__.py
    ├── iterate
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── iterate.py
    │   └── template.py
    ├── main.py
    ├── model
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── model.py
    │   ├── reader.py
    │   ├── test.py
    │   └── train.py
    ├── scf
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── _old_grad.py
    │   ├── addons.py
    │   ├── fields.py
    │   ├── grad.py
    │   ├── penalty.py
    │   ├── run.py
    │   ├── scf.py
    │   └── stats.py
    ├── task
    │   ├── __init__.py
    │   ├── job
    │   │   ├── __init__.py
    │   │   ├── batch.py
    │   │   ├── dispatcher.py
    │   │   ├── job_status.py
    │   │   ├── lazy_local_context.py
    │   │   ├── local_context.py
    │   │   ├── shell.py
    │   │   ├── slurm.py
    │   │   └── ssh_context.py
    │   ├── task.py
    │   └── workflow.py
    ├── tools
    │   ├── __init__.py
    │   ├── geom_optim.py
    │   └── num_hessian.py
    └── utils.py
├── examples
    ├── iterate
    │   ├── combined.yaml
    │   └── splitted
    │   │   ├── args.yaml
    │   │   └── share
    │   │       ├── init_scf.yaml
    │   │       ├── init_train.yaml
    │   │       ├── scf_input.yaml
    │   │       ├── systems_test.raw
    │   │       ├── systems_train.raw
    │   │       └── train_input.yaml
    ├── legacy
    │   ├── iter_linear
    │   │   └── run.py
    │   ├── iter_nn_local
    │   │   ├── run.py
    │   │   ├── run_res.py
    │   │   └── share
    │   │   │   ├── e_ref.npy
    │   │   │   ├── init
    │   │   │       ├── model.pth
    │   │   │       ├── test_paths.raw
    │   │   │       └── train_paths.raw
    │   │   │   ├── mol_files.raw
    │   │   │   ├── scf_input.yaml
    │   │   │   └── train_input.yaml
    │   ├── iter_nn_new
    │   │   ├── extra.py
    │   │   ├── init_train
    │   │   │   ├── input.yaml
    │   │   │   ├── log.train
    │   │   │   └── model.pth
    │   │   ├── run.py
    │   │   └── share
    │   │   │   ├── e_ref.npy
    │   │   │   ├── f_ref.npy
    │   │   │   ├── init
    │   │   │       └── model.pth
    │   │   │   ├── mol_files.raw
    │   │   │   ├── raw_scf_input.yaml
    │   │   │   ├── scf_input.yaml
    │   │   │   └── train_input.yaml
    │   ├── iter_nn_slurm
    │   │   ├── run.py
    │   │   ├── run_res.py
    │   │   └── share
    │   │   │   ├── e_ref.npy
    │   │   │   ├── index.raw
    │   │   │   ├── init
    │   │   │       ├── test_paths.raw
    │   │   │       └── train_paths.raw
    │   │   │   ├── input.yaml
    │   │   │   ├── mol_files.raw
    │   │   │   └── test.sh
    │   └── train_active_learning
    │   │   ├── run.py
    │   │   └── share
    │   │       ├── init
    │   │           ├── new_test_paths.raw
    │   │           └── new_train_paths.raw
    │   │       ├── input.yaml
    │   │       └── test_model.sh
    ├── train_input
    │   ├── extended.yaml
    │   ├── force.yaml
    │   ├── gelu.yaml
    │   └── restart.yaml
    ├── water_cluster
    │   ├── .gitignore
    │   ├── README.md
    │   ├── args.yaml
    │   ├── run.sh
    │   ├── run_shell.sh
    │   ├── shell.yaml
    │   ├── systems
    │   │   ├── test.n6
    │   │   │   ├── atom.npy
    │   │   │   ├── energy.npy
    │   │   │   ├── force.npy
    │   │   │   └── unit.raw
    │   │   ├── train.n1
    │   │   │   ├── atom.npy
    │   │   │   ├── energy.npy
    │   │   │   ├── force.npy
    │   │   │   └── unit.raw
    │   │   ├── train.n2
    │   │   │   ├── coord.npy
    │   │   │   ├── energy.npy
    │   │   │   ├── force.npy
    │   │   │   ├── type.raw
    │   │   │   └── unit.raw
    │   │   ├── train.n3
    │   │   │   ├── coord.npy
    │   │   │   ├── energy.npy
    │   │   │   ├── force.npy
    │   │   │   ├── type.raw
    │   │   │   └── unit.raw
    │   │   └── valid.n4
    │   │   │   ├── coord.npy
    │   │   │   ├── energy.npy
    │   │   │   ├── force.npy
    │   │   │   ├── type.raw
    │   │   │   └── unit.raw
    │   └── test.sh
    └── water_single
    │   ├── .gitignore
    │   ├── README.md
    │   ├── init
    │       ├── machines.yaml
    │       ├── params.yaml
    │       ├── run.sh
    │       └── systems.yaml
    │   ├── iter
    │       ├── args.yaml
    │       └── run.sh
    │   ├── systems
    │       ├── group.00
    │       │   ├── atom.npy
    │       │   ├── dm.npy
    │       │   ├── energy.npy
    │       │   └── force.npy
    │       ├── group.01
    │       │   ├── atom.npy
    │       │   ├── dm.npy
    │       │   ├── energy.npy
    │       │   └── force.npy
    │       ├── group.02
    │       │   ├── atom.npy
    │       │   ├── dm.npy
    │       │   ├── energy.npy
    │       │   └── force.npy
    │       └── group.03
    │       │   ├── atom.npy
    │       │   ├── dm.npy
    │       │   ├── energy.npy
    │       │   └── force.npy
    │   └── withdens
    │       ├── base.yaml
    │       ├── penalty.yaml
    │       ├── pipe.sh
    │       ├── relax.yaml
    │       └── run.sh
├── requirements.txt
├── scripts
    ├── convert_xyz.py
    ├── legacy
    │   ├── calc_eig.py
    │   ├── proj_dm.py
    │   ├── rhf.py
    │   ├── rks.py
    │   └── rmp2.py
    └── solve_mol.py
└── setup.py


/.git_archival.txt:
--------------------------------------------------------------------------------
1 | node: 4f133fb60e00bc5e413e80e32214defb7a145415
2 | node-date: 2025-04-29T05:22:58+08:00
3 | describe-name: v0.1-84-g4f133fb
4 | ref-names: HEAD -> master
5 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | .git_archival.txt  export-subst
2 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   - package-ecosystem: "github-actions"
4 |     directory: "/"
5 |     schedule:
6 |       interval: "weekly"
7 | 


--------------------------------------------------------------------------------
/.github/workflows/mirror_gitee.yml:
--------------------------------------------------------------------------------
 1 | name: Mirror to Gitee Repo
 2 | 
 3 | on: [ push, delete, create ]
 4 | 
 5 | # Ensures that only one mirror task will run at a time.
 6 | concurrency:
 7 |   group: git-mirror
 8 | 
 9 | jobs:
10 |   git-mirror:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: wearerequired/git-mirror-action@v1
14 |         env:
15 |           ORGANIZATION: deepmodeling
16 |           SSH_PRIVATE_KEY: ${{ secrets.SYNC_GITEE_PRIVATE_KEY }}
17 |         with:
18 |           source-repo: "https://github.com/deepmodeling/deepks-kit.git"
19 |           destination-repo: "git@gitee.com:deepmodeling/deepks-kit.git"
20 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # User defined
 2 | *~
 3 | checkpoint
 4 | model.ckpt.*
 5 | .vscode
 6 | .ipynb_*
 7 | *.swp
 8 | 
 9 | # Byte-compiled / optimized / DLL files
10 | __pycache__/
11 | *.py[cod]
12 | *$py.class
13 | 
14 | # C extensions
15 | *.so
16 | 
17 | # Distribution / packaging
18 | .Python
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | wheels/
31 | share/python-wheels/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 | MANIFEST
36 | _version.py
37 | 
38 | # PyInstaller
39 | #  Usually these files are written by a python script from a template
40 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
41 | *.manifest
42 | *.spec
43 | 
44 | # Installer logs
45 | pip-log.txt
46 | pip-delete-this-directory.txt
47 | 
48 | # Environments
49 | .env
50 | .venv
51 | env/
52 | venv/
53 | ENV/
54 | env.bak/
55 | venv.bak/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                    GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version.
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DeePKS-kit
 2 | 
 3 | DeePKS-kit is a program to generate accurate energy functionals for quantum chemistry systems,
 4 | for both perturbative scheme (DeePHF) and self-consistent scheme (DeePKS).
 5 | 
 6 | The program provides a command line interface `deepks` that contains five sub-commands, 
 7 | - `train`: train an neural network based post-HF energy functional model
 8 | - `test`: test the post-HF model with given data and show statistics
 9 | - `scf`: run self-consistent field calculation with given energy model
10 | - `stats`: collect and print statistics of the SCF the results
11 | - `iterate`: iteratively train an self-consistent model by combining four commands above
12 | 
13 | ## Installation
14 | 
15 | DeePKS-kit is a pure python library so it can be installed following the standard `git clone` then `pip install` procedure. Note that the two main requirements `pytorch` and `pyscf` will not be installed automatically so you will need to install them manually in advance. Below is a more detailed instruction that includes installing the required libraries in the environment.
16 | 
17 | We use `conda` here as an example. So first you may need to install [Anaconda](https://docs.anaconda.com/anaconda/install/) or [Miniconda](https://docs.conda.io/en/latest/miniconda.html).
18 | 
19 | To reduce the possibility of library conflicts, we suggest create a new environment (named `deepks`) with basic dependencies installed (optional):
20 | ```bash
21 | conda create -n deepks numpy scipy h5py ruamel.yaml paramiko
22 | conda activate deepks
23 | ```
24 | Now you are in the new environment called `deepks`.
25 | Next, install [PyTorch](https://pytorch.org/get-started/locally/) 
26 | ```bash
27 | # assuming a GPU with cudatoolkit 10.2 support
28 | conda install pytorch cudatoolkit=10.2 -c pytorch
29 | ```
30 | and [PySCF](https://github.com/pyscf/pyscf).
31 | ```bash
32 | # the conda package does not support python >= 3.8 so we use pip
33 | pip install pyscf
34 | ```
35 | 
36 | Once the environment has been setup properly, using pip to install DeePKS-kit:
37 | ```bash
38 | pip install git+https://github.com/deepmodeling/deepks-kit/
39 | ```
40 | 
41 | ## Usage
42 | 
43 | An relatively detailed decrisption of the `deepks-kit` library can be found in [here](https://arxiv.org/pdf/2012.14615.pdf). Please also refer to the reference for the description of methods.
44 | 
45 | Please see [`examples`](./examples) folder for the usage of `deepks-kit` library. A detailed example with executable data for single water molecules can be found [here](./examples/water_single). A more complicated one for training water clusters can be found [here](./examples/water_cluster).
46 | 
47 | Check [this input file](./examples/water_cluster/args.yaml) for detailed explanation for possible input parameters, and also [this one](./examples/water_cluster/shell.yaml) if you would like to run on local machine instead of using Slurm scheduler.
48 | 
49 | ## References
50 | 
51 | [1] Chen, Y., Zhang, L., Wang, H. and E, W., 2020. Ground State Energy Functional with Hartree–Fock Efficiency and Chemical Accuracy. The Journal of Physical Chemistry A, 124(35), pp.7155-7165.
52 | 
53 | [2] Chen, Y., Zhang, L., Wang, H. and E, W., 2021. DeePKS: A Comprehensive Data-Driven Approach toward Chemically Accurate Density Functional Theory. Journal of Chemical Theory and Computation, 17(1), pp.170–181.
54 | 
55 | 
56 | <!-- ## TODO
57 | 
58 | - [ ] Print loss separately for E and F in training.
59 | - [ ] Rewrite all `print` function using `logging`.
60 | - [ ] Write a detailed README and more docs.
61 | - [ ] Add unit tests. -->
62 | 
63 | 


--------------------------------------------------------------------------------
/deepks/__init__.py:
--------------------------------------------------------------------------------
 1 | __author__ = "Yixiao Chen"
 2 | 
 3 | try:
 4 |     from ._version import version as __version__
 5 | except ImportError:
 6 |     __version__ = 'unkown'
 7 | 
 8 | __all__ = [
 9 |     "iterate",
10 |     "model",
11 |     "scf",
12 |     "task",
13 |     # "tools" # collection of command line scripts, should not be imported by user
14 | ]
15 | 
16 | def __getattr__(name):
17 |     from importlib import import_module
18 |     if name in __all__:
19 |         return import_module("." + name, __name__)
20 |     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
21 | 


--------------------------------------------------------------------------------
/deepks/__main__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | try:
 4 |     import deepks
 5 | except ImportError as e:
 6 |     sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../")
 7 | 
 8 | from deepks.main import main_cli
 9 | 
10 | if __name__ == "__main__":
11 |     main_cli()


--------------------------------------------------------------------------------
/deepks/iterate/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 |     "iterate",
3 |     "template",
4 | ]
5 | 
6 | from .iterate import make_scf, make_train, make_iterate


--------------------------------------------------------------------------------
/deepks/iterate/__main__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | try:
 4 |     import deepks
 5 | except ImportError as e:
 6 |     sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../")
 7 | 
 8 | from deepks.main import iter_cli
 9 | 
10 | if __name__ == "__main__":
11 |     iter_cli()


--------------------------------------------------------------------------------
/deepks/model/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = [
 2 |     "model",
 3 |     "reader",
 4 |     "train",
 5 |     "test",
 6 | ]
 7 | 
 8 | def __getattr__(name):
 9 |     from importlib import import_module
10 |     if name == "CorrNet":
11 |         from .model import CorrNet
12 |         return CorrNet
13 |     if name in __all__:
14 |         return import_module("." + name, __name__)
15 |     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
16 | 


--------------------------------------------------------------------------------
/deepks/model/__main__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | try:
 4 |     import deepks
 5 | except ImportError as e:
 6 |     sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../")
 7 | 
 8 | from deepks.main import train_cli
 9 | 
10 | if __name__ == "__main__":
11 |     train_cli()


--------------------------------------------------------------------------------
/deepks/model/test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import torch
 4 | import torch.nn as nn
 5 | try:
 6 |     import deepks
 7 | except ImportError as e:
 8 |     import sys
 9 |     sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../../")
10 | from deepks.model.model import CorrNet
11 | from deepks.model.reader import GroupReader
12 | from deepks.utils import load_yaml, load_dirs, check_list
13 | 
14 | 
15 | DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
16 | 
17 | 
18 | def test(model, g_reader, dump_prefix="test", group=False):
19 |     model.eval()
20 |     loss_fn=nn.MSELoss()
21 |     label_list = []
22 |     pred_list = []
23 | 
24 |     for i in range(g_reader.nsystems):
25 |         sample = g_reader.sample_all(i)
26 |         nframes = sample["lb_e"].shape[0]
27 |         sample = {k: v.to(DEVICE, non_blocking=True) for k, v in sample.items()}
28 |         label, data = sample["lb_e"], sample["eig"]
29 |         pred = model(data)
30 |         error = torch.sqrt(loss_fn(pred, label))
31 | 
32 |         error_np = error.item()
33 |         label_np = label.cpu().numpy().reshape(nframes, -1).sum(axis=1)
34 |         pred_np = pred.detach().cpu().numpy().reshape(nframes, -1).sum(axis=1)
35 |         error_l1 = np.mean(np.abs(label_np - pred_np))
36 |         label_list.append(label_np)
37 |         pred_list.append(pred_np)
38 | 
39 |         if not group and dump_prefix is not None:
40 |             nd = max(len(str(g_reader.nsystems)), 2)
41 |             dump_res = np.stack([label_np, pred_np], axis=1)
42 |             header = f"{g_reader.path_list[i]}\nmean l1 error: {error_l1}\nmean l2 error: {error_np}\nreal_ene  pred_ene"
43 |             filename = f"{dump_prefix}.{i:0{nd}}.out"
44 |             np.savetxt(filename, dump_res, header=header)
45 |             # print(f"system {i} finished")
46 | 
47 |     all_label = np.concatenate(label_list, axis=0)
48 |     all_pred = np.concatenate(pred_list, axis=0)
49 |     all_err_l1 = np.mean(np.abs(all_label - all_pred))
50 |     all_err_l2 = np.sqrt(np.mean((all_label - all_pred) ** 2))
51 |     info = f"all systems mean l1 error: {all_err_l1}\nall systems mean l2 error: {all_err_l2}"
52 |     print(info)
53 |     if dump_prefix is not None and group:
54 |         np.savetxt(f"{dump_prefix}.out", np.stack([all_label, all_pred], axis=1), 
55 |                    header=info + "\nreal_ene  pred_ene")
56 |     return all_err_l1, all_err_l2
57 | 
58 | 
59 | def main(data_paths, model_file="model.pth", 
60 |          output_prefix='test', group=False,
61 |          e_name='l_e_delta', d_name=['dm_eig']):
62 |     data_paths = load_dirs(data_paths)
63 |     if len(d_name) == 1:
64 |         d_name = d_name[0]
65 |     g_reader = GroupReader(data_paths, e_name=e_name, d_name=d_name, 
66 |                            conv_filter=False, extra_label=True)
67 |     model_file = check_list(model_file)
68 |     for f in model_file:
69 |         print(f)
70 |         p = os.path.dirname(f)
71 |         model = CorrNet.load(f).double().to(DEVICE)
72 |         dump = os.path.join(p, output_prefix)
73 |         dir_name = os.path.dirname(dump)
74 |         if dir_name:
75 |             os.makedirs(dir_name, exist_ok=True)
76 |         if model.elem_table is not None:
77 |             elist, econst = model.elem_table
78 |             g_reader.collect_elems(elist)
79 |             g_reader.subtract_elem_const(econst)
80 |         test(model, g_reader, dump_prefix=dump, group=group)
81 |         g_reader.revert_elem_const()
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     from deepks.main import test_cli as cli
86 |     cli()
87 | 


--------------------------------------------------------------------------------
/deepks/scf/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = [
 2 |     "scf",
 3 |     "grad",
 4 |     "run",
 5 |     "stats",
 6 |     "fields",
 7 |     "penalty",
 8 | ]
 9 | 
10 | def __getattr__(name):
11 |     from importlib import import_module
12 |     if name in __all__:
13 |         return import_module("." + name, __name__)
14 |     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
15 | 
16 | 
17 | def DSCF(mol, model, xc="HF", **kwargs):
18 |     """A wrap function to create NN SCF object (RDSCF or UDSCF)"""
19 |     from .scf import RDSCF, UDSCF
20 |     if mol.spin == 0:
21 |         return RDSCF(mol, model, xc, **kwargs)
22 |     else:
23 |         return UDSCF(mol, model, xc, **kwargs)
24 | 
25 | DeepSCF = DSCF


--------------------------------------------------------------------------------
/deepks/scf/__main__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | try:
 4 |     import deepks
 5 | except ImportError as e:
 6 |     sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../")
 7 | 
 8 | from deepks.main import scf_cli
 9 | 
10 | if __name__ == "__main__":
11 |     scf_cli()


--------------------------------------------------------------------------------
/deepks/scf/addons.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import torch
  3 | import numpy as np
  4 | from torch import nn
  5 | from pyscf import lib
  6 | from pyscf.lib import logger
  7 | from pyscf import gto
  8 | from pyscf import scf, dft
  9 | from deepks.scf.scf import t_make_eig, t_make_grad_eig_dm
 10 | 
 11 | 
 12 | def t_ele_grad(bfock, c_vir, c_occ, n_occ):
 13 |     g = torch.einsum("pa,qi,...pq->...ai", c_vir, c_occ*n_occ, bfock)
 14 |     return g.flatten(-2)
 15 | 
 16 | 
 17 | def make_grad_eig_egrad(dscf, mo_coeff=None, mo_occ=None, gfock=None):
 18 |     if mo_occ is None: 
 19 |         mo_occ = dscf.mo_occ
 20 |     if mo_coeff is None: 
 21 |         mo_coeff = dscf.mo_coeff
 22 |     if gfock is None:
 23 |         dm = dscf.make_rdm1(mo_coeff, mo_occ)
 24 |         if dm.ndim >= 3 and isinstance(dscf, scf.uhf.UHF):
 25 |             dm = dm.sum(0)
 26 |         gfock = t_make_grad_eig_dm(torch.from_numpy(dm), dscf._t_ovlp_shells).numpy()
 27 |     if mo_coeff.ndim >= 3 and mo_occ.ndim >= 2:
 28 |         return np.concatenate([make_grad_eig_egrad(dscf, mc, mo, gfock) 
 29 |             for mc, mo in zip(mo_coeff, mo_occ)], axis=-1)
 30 |     iocc = mo_occ>0
 31 |     t_no = torch.from_numpy(mo_occ[iocc]).to(dscf.device)
 32 |     t_co = torch.from_numpy(mo_coeff[:, iocc]).to(dscf.device)
 33 |     t_cv = torch.from_numpy(mo_coeff[:, ~iocc]).to(dscf.device)
 34 |     t_gfock = torch.from_numpy(gfock).to(dscf.device)
 35 |     return t_ele_grad(t_gfock, t_cv, t_co, t_no).cpu().numpy()
 36 | 
 37 | 
 38 | def gen_coul_loss(dscf, fock=None, ovlp=None, mo_occ=None):
 39 |     nao = dscf.mol.nao
 40 |     fock = (fock if fock is not None else dscf.get_fock()).reshape(-1, nao, nao)
 41 |     s1e = ovlp if ovlp is not None else dscf.get_ovlp()
 42 |     mo_occ = (mo_occ if mo_occ is not None else dscf.mo_occ).reshape(-1, nao)
 43 |     def _coul_loss_grad(v, target_dm):
 44 |         # return coulomb loss and its grad with respect to fock matrix
 45 |         # only support single dm, do not use directly for UHF
 46 |         a_loss = 0.
 47 |         a_grad = 0.
 48 |         target_dm = target_dm.reshape(fock.shape)
 49 |         for tdm, f1e, nocc in zip(target_dm, fock, mo_occ):
 50 |             iocc = nocc>0
 51 |             moe, moc = dscf._eigh(f1e+v, s1e)
 52 |             eo, ev = moe[iocc], moe[~iocc]
 53 |             co, cv = moc[:, iocc], moc[:, ~iocc]
 54 |             dm = (co * nocc[iocc]) @ co.T
 55 |             # calc loss
 56 |             ddm = dm - tdm
 57 |             dvj = dscf.get_j(dm=ddm)
 58 |             loss = 0.5 * np.einsum("ij,ji", ddm, dvj)
 59 |             a_loss += loss
 60 |             # calc grad with respect to fock matrix
 61 |             ie_mn = 1. / (-ev.reshape(-1, 1) + eo)
 62 |             temp_mn = cv.T @ dvj @ co * nocc[iocc] * ie_mn
 63 |             dldv = cv @ temp_mn @ co.T
 64 |             dldv = dldv + dldv.T
 65 |             a_grad += dldv
 66 |         return a_loss, a_grad
 67 |     return _coul_loss_grad
 68 | 
 69 | 
 70 | def make_grad_coul_veig(dscf, target_dm):
 71 |     clfn = gen_coul_loss(dscf)
 72 |     dm = dscf.make_rdm1()
 73 |     if dm.ndim == 3 and isinstance(dscf, scf.uhf.UHF):
 74 |         dm = dm.sum(0)
 75 |     t_dm = torch.from_numpy(dm).requires_grad_()
 76 |     t_eig = t_make_eig(t_dm, dscf._t_ovlp_shells).requires_grad_()
 77 |     loss, dldv = clfn(np.zeros_like(dm), target_dm)
 78 |     t_veig = torch.zeros_like(t_eig).requires_grad_()
 79 |     [t_vc] = torch.autograd.grad(t_eig, t_dm, t_veig, create_graph=True)
 80 |     [t_ghead] = torch.autograd.grad(t_vc, t_veig, torch.from_numpy(dldv))
 81 |     return t_ghead.detach().cpu().numpy()
 82 | 
 83 | 
 84 | def calc_optim_veig(dscf, target_dm, 
 85 |                     target_dec=None, gvx=None, 
 86 |                     nstep=1, force_factor=1., **optim_args):
 87 |     clfn = gen_coul_loss(dscf, fock=dscf.get_fock(vhf=dscf.get_veff0()))
 88 |     dm = dscf.make_rdm1()
 89 |     if dm.ndim == 3 and isinstance(dscf, scf.uhf.UHF):
 90 |         dm = dm.sum(0)
 91 |     t_dm = torch.from_numpy(dm).requires_grad_()
 92 |     t_eig = t_make_eig(t_dm, dscf._t_ovlp_shells).requires_grad_()
 93 |     t_ec = dscf.net(t_eig.to(dscf.device))
 94 |     t_veig = torch.autograd.grad(t_ec, t_eig)[0].requires_grad_()
 95 |     t_lde = torch.from_numpy(target_dec) if target_dec is not None else None
 96 |     t_gvx = torch.from_numpy(gvx) if gvx is not None else None
 97 |     # build closure
 98 |     def closure():
 99 |         [t_vc] = torch.autograd.grad(
100 |             t_eig, t_dm, t_veig, retain_graph=True, create_graph=True)
101 |         loss, dldv = clfn(t_vc.detach().numpy(), target_dm)
102 |         grad = torch.autograd.grad(
103 |             t_vc, t_veig, torch.from_numpy(dldv), only_inputs=True)[0]
104 |         # build closure for force loss
105 |         if t_lde is not None and t_gvx is not None:
106 |             t_pde = torch.tensordot(t_gvx, t_veig)
107 |             lossde = force_factor * torch.sum((t_pde - t_lde)**2)
108 |             grad = grad + torch.autograd.grad(lossde, t_veig, only_inputs=True)[0]
109 |             loss = loss + lossde
110 |         t_veig.grad = grad
111 |         return loss
112 |     # do the optimization
113 |     optim = torch.optim.LBFGS([t_veig], **optim_args)
114 |     tic = (time.process_time(), time.perf_counter())
115 |     for _ in range(nstep):
116 |         optim.step(closure)
117 |         tic = logger.timer(dscf, 'LBFGS step', *tic)
118 |     logger.note(dscf, f"optimized loss for veig = {closure()}")        
119 |     return t_veig.detach().numpy()
120 | 
121 | 
122 | def gcalc_optim_veig(gdscf, target_dm, target_grad, 
123 |                      nstep=1, force_factor=1., **optim_args):
124 |     target_dec = target_grad - gdscf.de0
125 |     gvx = gdscf.make_grad_eig_x()
126 |     return calc_optim_veig(
127 |             gdscf.base,
128 |             target_dm=target_dm, 
129 |             target_dec=target_dec, gvx=gvx, 
130 |             nstep=nstep, force_factor=force_factor, **optim_args)
131 | 


--------------------------------------------------------------------------------
/deepks/scf/fields.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from typing import List, Callable
  3 | from dataclasses import dataclass, field
  4 | 
  5 | # Field = namedtuple("Field", ["name", "alias", "calc", "shape"])
  6 | # LabelField = namedtuple("LabelField", ["name", "alias", "calc", "shape", "required_labels"])
  7 | @dataclass
  8 | class Field:
  9 |       name: str
 10 |       alias: List[str]
 11 |       calc: Callable
 12 |       shape: str
 13 |       required_labels: List[str] = field(default_factory=list)
 14 | 
 15 | 
 16 | def select_fields(names):
 17 |     names = [n.lower() for n in names]
 18 |     scfs   = [fd for fd in SCF_FIELDS 
 19 |                   if fd.name in names 
 20 |                   or any(al in names for al in fd.alias)]
 21 |     grads  = [fd for fd in GRAD_FIELDS 
 22 |                   if fd.name in names 
 23 |                   or any(al in names for al in fd.alias)]
 24 |     return {"scf": scfs, "grad": grads}
 25 | 
 26 | 
 27 | BOHR = 0.52917721092
 28 | 
 29 | def isinbohr(mol):
 30 |     return mol.unit.upper().startswith(("B", "AU"))
 31 | 
 32 | def _Lunit(mol):
 33 |     return (1. if isinbohr(mol) else BOHR)
 34 | 
 35 | def atom_data(mol):
 36 |     raw_data = np.concatenate(
 37 |         [mol.atom_charges().reshape(-1,1), mol.atom_coords(unit='Bohr')], 
 38 |         axis=1)
 39 |     non_ghost = [ii for ii in range(mol.natm) 
 40 |         if not mol.elements[ii].startswith("X")]
 41 |     return raw_data[non_ghost]
 42 | 
 43 | 
 44 | SCF_FIELDS = [
 45 |     Field("atom",
 46 |           ["atoms", "mol", "molecule"],
 47 |           lambda mf: atom_data(mf.mol),
 48 |           "(nframe, natom, 4)"),
 49 |     Field("e_base", 
 50 |           ["ebase", "ene_base", "e0",
 51 |            "e_hf", "ehf", "ene_hf", 
 52 |            "e_ks", "eks", "ene_ks"], 
 53 |           lambda mf: mf.energy_tot0(),
 54 |           "(nframe, 1)"),
 55 |     Field("e_tot", 
 56 |           ["e_cf", "ecf", "ene_cf", "etot", "ene", "energy", "e"],
 57 |           lambda mf: mf.e_tot,
 58 |           "(nframe, 1)"),
 59 |     Field("rdm",
 60 |           ["dm"],
 61 |           lambda mf: mf.make_rdm1(),
 62 |           "(nframe, nao, nao)"),
 63 |     Field("proj_dm",
 64 |           ["pdm"],
 65 |           lambda mf: mf.make_pdm(flatten=True),
 66 |           "(nframe, natom, -1)"),
 67 |     Field("dm_eig",
 68 |           ["eig"],
 69 |           lambda mf: mf.make_eig(),
 70 |           "(nframe, natom, nproj)"),
 71 |     Field("hcore_eig",
 72 |           ["heig"],
 73 |           lambda mf: mf.make_eig(mf.get_hcore()),
 74 |           "(nframe, natom, nproj)"),
 75 |     Field("ovlp_eig",
 76 |           ["oeig"],
 77 |           lambda mf: mf.make_eig(mf.get_ovlp()),
 78 |           "(nframe, natom, nproj)"),
 79 |     Field("veff_eig",
 80 |           ["veig"],
 81 |           lambda mf: mf.make_eig(mf.get_veff()),
 82 |           "(nframe, natom, nproj)"),
 83 |     Field("fock_eig",
 84 |           ["feig"],
 85 |           lambda mf: mf.make_eig(mf.get_fock()),
 86 |           "(nframe, natom, nproj)"),
 87 |     Field("conv", 
 88 |           ["converged", "convergence"], 
 89 |           lambda mf: mf.converged,
 90 |           "(nframe, 1)"),
 91 |     Field("mo_coef_occ", # do not support UHF
 92 |           ["mo_coeff_occ, orbital_coeff_occ"],
 93 |           lambda mf: mf.mo_coeff[:,mf.mo_occ>0].T,
 94 |           "(nframe, -1, nao)"),
 95 |     Field("mo_ene_occ", # do not support UHF
 96 |           ["mo_energy_occ, orbital_ene_occ"],
 97 |           lambda mf: mf.mo_energy[mf.mo_occ>0],
 98 |           "(nframe, -1)"),
 99 |     # below are fields that requires labels
100 |     Field("l_e_ref", 
101 |           ["e_ref", "lbl_e_ref", "label_e_ref", "le_ref"],
102 |           lambda mf, **lbl: lbl["energy"],
103 |           "(nframe, 1)",
104 |           ["energy"]),
105 |     Field("l_e_delta", 
106 |           ["le_delta", "lbl_e_delta", "label_e_delta", "lbl_ed"],
107 |           lambda mf, **lbl: lbl["energy"] - mf.energy_tot0(),
108 |           "(nframe, 1)",
109 |           ["energy"]),
110 |     Field("err_e", 
111 |           ["e_err", "err_e_tot", "err_e_cf"],
112 |           lambda mf, **lbl: lbl["energy"] - mf.e_tot,
113 |           "(nframe, 1)",
114 |           ["energy"]),
115 | ]
116 | 
117 | GRAD_FIELDS = [
118 |     Field("f_base", 
119 |           ["fbase", "force_base", "f0",
120 |            "f_hf", "fhf", "force_hf", 
121 |            "f_ks", "fks", "force_ks"], 
122 |           lambda grad: - grad.get_base() / _Lunit(grad.mol),
123 |           "(nframe, natom_raw, 3)"),
124 |     Field("f_tot", 
125 |           ["f_cf", "fcf", "force_cf", "ftot", "force", "f"], 
126 |           lambda grad: - grad.de / _Lunit(grad.mol),
127 |           "(nframe, natom_raw, 3)"),
128 |     Field("grad_dmx",
129 |           ["grad_dm_x", "grad_pdm_x"],
130 |           lambda grad: grad.make_grad_pdm_x(flatten=True) / _Lunit(grad.mol),
131 |           "(nframe, natom_raw, 3, natom, -1)"),
132 |     Field("grad_vx",
133 |           ["grad_eig_x", "geigx", "gvx"],
134 |           lambda grad: grad.make_grad_eig_x()  / _Lunit(grad.mol),
135 |           "(nframe, natom_raw, 3, natom, nproj)"),
136 |     # below are fields that requires labels
137 |     Field("l_f_ref", 
138 |           ["f_ref", "lbl_f_ref", "label_f_ref", "lf_ref"],
139 |           lambda grad, **lbl: lbl["force"],
140 |           "(nframe, natom_raw, 3)",
141 |           ["force"]),
142 |     Field("l_f_delta", 
143 |           ["lf_delta", "lbl_f_delta", "label_f_delta", "lbl_fd"],
144 |           lambda grad, **lbl: lbl["force"] - (-grad.get_base() / _Lunit(grad.mol)),
145 |           "(nframe, natom_raw, 3)",
146 |           ["force"]),
147 |     Field("err_f", 
148 |           ["f_err", "err_f_tot", "err_f_cf"],
149 |           lambda grad, **lbl: lbl["force"] - (-grad.de / _Lunit(grad.mol)),
150 |           "(nframe, natom_raw, 3)",
151 |           ["force"]),
152 | ]
153 | 
154 | 
155 | # below are additional methods from addons
156 | from deepks.scf import addons
157 | 
158 | SCF_FIELDS.extend([
159 |     # the following two are used for regularizing the potential
160 |     Field("grad_veg",
161 |           ["grad_eig_egrad", "jac_eig_egrad"],
162 |           lambda mf: addons.make_grad_eig_egrad(mf),
163 |           "(nframe, natom, nproj, -1)"),
164 |     Field("eg_base",
165 |           ["ele_grad_base", "egrad0", "egrad_base"],
166 |           lambda mf: mf.get_grad0(),
167 |           "(nframe, -1)"),
168 |     # the following one is used for coulomb loss optimization
169 |     Field("grad_ldv",
170 |           ["grad_coul_dv", "grad_coul_deig", "coulomb_grad"], 
171 |           lambda mf, **lbl: addons.make_grad_coul_veig(mf, target_dm=lbl["dm"]),
172 |           "(nframe, natom, nproj)",
173 |           ["dm"]),
174 |     Field("l_veig_raw",
175 |           ["optim_veig_raw", "l_opt_v_raw", "l_optim_veig_raw"], 
176 |           lambda mf, **lbl: addons.calc_optim_veig(mf, lbl["dm"], nstep=1),
177 |           "(nframe, natom, nproj)",
178 |           ["dm"]),
179 | ])
180 | 
181 | GRAD_FIELDS.extend([
182 |     # the following one is used for coulomb loss optimization from grad class
183 |     Field("l_veig",
184 |           ["optim_veig", "l_opt_v", "l_optim_veig"], 
185 |           lambda grad, **lbl: addons.gcalc_optim_veig(
186 |               grad, lbl["dm"], -_Lunit(grad.mol)*lbl["force"], nstep=1),
187 |           "(nframe, natom, nproj)",
188 |           ["dm", "force"]),
189 |     Field("l_veig_nof",
190 |           ["optim_veig_nof", "l_opt_v_nof", "l_optim_veig_nof"], 
191 |           lambda grad, **lbl: addons.gcalc_optim_veig(
192 |               grad, lbl["dm"], grad.de, nstep=1),
193 |           "(nframe, natom, nproj)",
194 |           ["dm"]),
195 | ])


--------------------------------------------------------------------------------
/deepks/scf/penalty.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import numpy as np
  3 | from pyscf.dft import numint, gen_grid
  4 | from pyscf.lib import logger
  5 | from deepks.utils import check_list
  6 | 
  7 | 
  8 | def select_penalty(name):
  9 |     name = name.lower()
 10 |     if name == "density":
 11 |         return DensityPenalty
 12 |     if name == "coulomb":
 13 |         return CoulombPenalty
 14 |     raise ValueError(f"unknown penalty type: {name}")
 15 | 
 16 | 
 17 | class PenaltyMixin(object):
 18 |     """Mixin class to add penalty potential in Fock matrix"""
 19 | 
 20 |     def __init__(self, penalties=None):
 21 |         self.penalties = check_list(penalties)
 22 |         for pnt in self.penalties:
 23 |             pnt.init_hook(self)
 24 | 
 25 |     def get_fock(self, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, 
 26 |                  diis=None, diis_start_cycle=None, 
 27 |                  level_shift_factor=None, damp_factor=None, **kwargs):
 28 |         """modified get_fock method to apply penalty terms onto vhf"""
 29 |         if dm is None:
 30 |             dm = self.make_rdm1()
 31 |         if h1e is None: 
 32 |             h1e = self.get_hcore()
 33 |         if vhf is None: 
 34 |             vhf = self.get_veff(dm=dm)
 35 |         vp = sum(pnt.fock_hook(self, dm=dm, h1e=h1e, vhf=vhf, cycle=cycle) 
 36 |                     for pnt in self.penalties)
 37 |         vhf = vhf + vp
 38 |         return super().get_fock(
 39 |             h1e=h1e, s1e=s1e, vhf=vhf, dm=dm, cycle=cycle, 
 40 |             diis=diis, diis_start_cycle=diis_start_cycle, 
 41 |             level_shift_factor=level_shift_factor, damp_factor=damp_factor, **kwargs)
 42 | 
 43 | 
 44 | class AbstructPenalty(object):
 45 |     """
 46 |     Abstruct class for penalty term in scf hamiltonian.
 47 |     To implement a penalty one needs to implement 
 48 |     fock_hook and (optional) init_hook methods.
 49 |     """
 50 |     required_labels = [] # the label would be load and pass to __init__
 51 | 
 52 |     def init_hook(self, mf, **envs):
 53 |         """
 54 |         Method to be called when initialize the scf object.
 55 |         Used to initialize the penalty with molecule info.
 56 |         """
 57 |         pass
 58 | 
 59 |     def fock_hook(self, mf, dm=None, h1e=None, vhf=None, cycle=-1, **envs):
 60 |         """
 61 |         Method to be called before get_fock is called.
 62 |         The returned matrix would be added to the vhf matrix
 63 |         """
 64 |         raise NotImplementedError("fock_hook method is not implemented")
 65 | 
 66 | 
 67 | class DummyPenalty(AbstructPenalty):
 68 |     def fock_hook(self, mf, dm=None, h1e=None, vhf=None, cycle=-1, **envs):
 69 |         return 0
 70 | 
 71 | 
 72 | class DensityPenalty(AbstructPenalty):
 73 |     r"""
 74 |     penalty on the difference w.r.t target density
 75 |     E_p = \lambda / 2 * \int dx (\rho(x) - \rho_target(x))^2
 76 |     V_p = \lambda * \int dx <ao_i|x> (\rho(x) - \rho_target(x)) <x|ao_j> 
 77 |     The target density should be given as density matrix in ao basis
 78 |     """
 79 |     required_labels = ["dm"]
 80 | 
 81 |     def __init__(self, target_dm, strength=1, random=False, start_cycle=0):
 82 |         if isinstance(target_dm, str):
 83 |             target_dm = np.load(target_dm)
 84 |         self.dm_t = target_dm
 85 |         self.init_strength = strength
 86 |         self.strength = strength * np.random.rand() if random else strength
 87 |         self.start_cycle = start_cycle
 88 |         # below are values to be initialized later in init_hook
 89 |         self.grids = None
 90 |         self.ao_value = None
 91 | 
 92 |     def init_hook(self, mf, **envs):
 93 |         if hasattr(mf, "grid"):
 94 |             self.grids = mf.grids
 95 |         else:
 96 |             self.grids = gen_grid.Grids(mf.mol)
 97 | 
 98 |     def fock_hook(self, mf, dm=None, h1e=None, vhf=None, cycle=-1, **envs):
 99 |         # cycle > 0 means it is doing scf iteration
100 |         if 0 <= cycle < self.start_cycle:
101 |             return 0
102 |         if self.grids.coords is None:
103 |             self.grids.build()
104 |         if self.ao_value is None:
105 |             self.ao_value = numint.eval_ao(mf.mol, self.grids.coords, deriv=0)
106 |         tic = (time.process_time(), time.perf_counter())
107 |         rho_diff = numint.eval_rho(mf.mol, self.ao_value, dm - self.dm_t)
108 |         v_p = numint.eval_mat(mf.mol, self.ao_value, self.grids.weights, rho_diff, rho_diff)
109 |         # cycle < 0 means it is just checking, we only print here
110 |         if cycle < 0 and mf.verbose >=4:
111 |             diff_norm = np.sum(np.abs(rho_diff)*self.grids.weights)
112 |             logger.info(mf, f"  Density Penalty: |diff| = {diff_norm}")
113 |             logger.timer(mf, "dens_pnt", *tic)
114 |         return self.strength * v_p
115 | 
116 | 
117 | class CoulombPenalty(AbstructPenalty):
118 |     r"""
119 |     penalty given by the coulomb energy of density difference
120 | 
121 |     """
122 |     required_labels = ["dm"]
123 | 
124 |     def __init__(self, target_dm, strength=1, random=False, start_cycle=0):
125 |         if isinstance(target_dm, str):
126 |             target_dm = np.load(target_dm)
127 |         self.dm_t = target_dm
128 |         self.init_strength = strength
129 |         self.strength = strength * np.random.rand() if random else strength
130 |         self.start_cycle = start_cycle
131 | 
132 |     def fock_hook(self, mf, dm=None, h1e=None, vhf=None, cycle=-1, **envs):
133 |         # cycle > 0 means it is doing scf iteration
134 |         if 0 <= cycle < self.start_cycle:
135 |             return 0
136 |         tic = (time.process_time(), time.perf_counter())
137 |         ddm = dm - self.dm_t
138 |         v_p = mf.get_j(dm=ddm)
139 |         # cycle < 0 means it is just checking, we only print here
140 |         if cycle < 0 and mf.verbose >=4:
141 |             diff_norm = np.sum(ddm * v_p)
142 |             logger.info(mf, f"  Coulomb Penalty: |diff| = {diff_norm}")
143 |             logger.timer(mf, "coul_pnt", *tic)
144 |         return self.strength * v_p
145 | 


--------------------------------------------------------------------------------
/deepks/task/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 |     "task",
3 |     "workflow",
4 |     "job"
5 | ]
6 | 
7 | from .task import *
8 | from .workflow import *


--------------------------------------------------------------------------------
/deepks/task/job/__init__.py:
--------------------------------------------------------------------------------
1 | # this sub package is borrowed and modified from dpgen project
2 | # https://github.com/deepmodeling/dpgen/tree/master/dpgen/dispatcher


--------------------------------------------------------------------------------
/deepks/task/job/job_status.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | class JobStatus (Enum) :
 4 |     unsubmitted = 1
 5 |     waiting = 2
 6 |     running = 3
 7 |     terminated = 4
 8 |     finished = 5
 9 |     completing = 6
10 |     unknown = 100
11 | 
12 | 


--------------------------------------------------------------------------------
/deepks/task/job/lazy_local_context.py:
--------------------------------------------------------------------------------
  1 | import os,shutil,uuid
  2 | import subprocess as sp
  3 | from glob import glob
  4 | 
  5 | class SPRetObj(object) :
  6 |     def __init__ (self,
  7 |                   ret) :
  8 |         self.data = ret
  9 | 
 10 |     def read(self) :
 11 |         return self.data
 12 | 
 13 |     def readlines(self) :
 14 |         lines = self.data.decode('utf-8').splitlines()
 15 |         ret = []
 16 |         for aa in lines:
 17 |             ret.append(aa+'\n')
 18 |         return ret
 19 | 
 20 | class LazyLocalContext(object) :
 21 |     def __init__ (self,
 22 |                   local_root,
 23 |                   work_profile = None,
 24 |                   job_uuid = None) :
 25 |         """
 26 |         work_profile:
 27 |         local_root:
 28 |         """
 29 |         assert(type(local_root) == str)
 30 |         self.local_root = os.path.abspath(local_root)
 31 |         self.remote_root = self.local_root
 32 |         if job_uuid:
 33 |            self.job_uuid=job_uuid
 34 |         else:
 35 |            self.job_uuid = str(uuid.uuid4())
 36 |         
 37 |     def get_job_root(self) :
 38 |         return self.local_root
 39 | 
 40 |     def upload(self,
 41 |                job_dirs,
 42 |                local_up_files,
 43 |                dereference = True) :
 44 |         pass
 45 | 
 46 |     def download(self, 
 47 |                  job_dirs,
 48 |                  remote_down_files,
 49 |                  check_exists = False,
 50 |                  mark_failure = True,
 51 |                  back_error=False) :
 52 |         for ii in job_dirs :
 53 |             for jj in remote_down_files :
 54 |                 fname = os.path.join(self.local_root, ii, jj)
 55 |                 exists = os.path.exists(fname)
 56 |                 if not exists:
 57 |                     if check_exists:
 58 |                         if mark_failure:
 59 |                             with open(os.path.join(self.local_root, ii, 'tag_failure_download_%s' % jj), 'w') as fp: pass
 60 |                         else:
 61 |                             pass
 62 |                     else:
 63 |                         raise RuntimeError('do not find download file ' + fname)
 64 | 
 65 |     def block_checkcall(self,
 66 |                         cmd) :
 67 |         cwd = os.getcwd()
 68 |         os.chdir(self.local_root)
 69 |         proc = sp.Popen(cmd, shell=True, stdout = sp.PIPE, stderr = sp.PIPE)
 70 |         o, e = proc.communicate()
 71 |         stdout = SPRetObj(o)
 72 |         stderr = SPRetObj(e)
 73 |         code = proc.returncode
 74 |         if code != 0:
 75 |             os.chdir(cwd)        
 76 |             raise RuntimeError("Get error code %d in locally calling %s with job: %s " % (code, cmd, self.job_uuid))
 77 |         os.chdir(cwd)        
 78 |         return None, stdout, stderr
 79 |         
 80 |     def block_call(self, cmd) :
 81 |         cwd = os.getcwd()
 82 |         os.chdir(self.local_root)
 83 |         proc = sp.Popen(cmd, shell=True, stdout = sp.PIPE, stderr = sp.PIPE)
 84 |         o, e = proc.communicate()
 85 |         stdout = SPRetObj(o)
 86 |         stderr = SPRetObj(e)
 87 |         code = proc.returncode
 88 |         os.chdir(cwd)        
 89 |         return code, None, stdout, stderr
 90 | 
 91 |     def clean(self):
 92 |         tmp_files = [f'{self.job_uuid}.sub', 
 93 |                      f'{self.job_uuid}_job_id',
 94 |                      f'{self.job_uuid}_tag_finished']
 95 |         for fn in tmp_files:
 96 |             if self.check_file_exists(fn):
 97 |                 os.remove(os.path.join(self.local_root, fn))
 98 | 
 99 |     def write_file(self, fname, write_str):
100 |         with open(os.path.join(self.local_root, fname), 'w') as fp :
101 |             fp.write(write_str)
102 | 
103 |     def read_file(self, fname):
104 |         with open(os.path.join(self.local_root, fname), 'r') as fp:
105 |             ret = fp.read()
106 |         return ret
107 | 
108 |     def check_file_exists(self, fname):
109 |         return os.path.isfile(os.path.join(self.local_root, fname))
110 |         
111 |     def call(self, cmd) :
112 |         cwd = os.getcwd()
113 |         os.chdir(self.local_root)
114 |         proc = sp.Popen(cmd, shell=True, stdout = sp.PIPE, stderr = sp.PIPE)
115 |         os.chdir(cwd)        
116 |         return proc
117 | 
118 |     def kill(self, proc):
119 |         proc.kill()
120 | 
121 |     def check_finish(self, proc):
122 |         return (proc.poll() != None)
123 | 
124 |     def get_return(self, proc):
125 |         ret = proc.poll()
126 |         if ret is None:
127 |             return None, None, None
128 |         else :
129 |             try:
130 |                 o, e = proc.communicate()
131 |                 stdout = SPRetObj(o)
132 |                 stderr = SPRetObj(e)
133 |             except:
134 |                 stdout = None
135 |                 stderr = None
136 |         return ret, stdout, stderr
137 |     
138 |     
139 | 


--------------------------------------------------------------------------------
/deepks/task/job/local_context.py:
--------------------------------------------------------------------------------
  1 | import os,shutil,uuid,hashlib
  2 | import subprocess as sp
  3 | from glob import glob
  4 | 
  5 | class LocalSession (object) :
  6 |     def __init__ (self, jdata) :
  7 |         self.work_path = os.path.abspath(jdata['work_path'])
  8 |         os.makedirs(self.work_path, exist_ok=True)
  9 |         # assert(os.path.exists(self.work_path))
 10 | 
 11 |     def get_work_root(self) :
 12 |         return self.work_path
 13 | 
 14 | class SPRetObj(object) :
 15 |     def __init__ (self,
 16 |                   ret) :
 17 |         self.data = ret
 18 | 
 19 |     def read(self) :
 20 |         return self.data
 21 | 
 22 |     def readlines(self) :
 23 |         lines = self.data.decode('utf-8').splitlines()
 24 |         ret = []
 25 |         for aa in lines:
 26 |             ret.append(aa+'\n')
 27 |         return ret
 28 | 
 29 | def _check_file_path(fname) :
 30 |     dirname = os.path.dirname(fname)    
 31 |     if dirname != "":
 32 |         os.makedirs(dirname, exist_ok=True)
 33 | 
 34 | def _identical_files(fname0, fname1) :
 35 |     with open(fname0) as fp:
 36 |         code0 = hashlib.sha1(fp.read().encode('utf-8')).hexdigest()
 37 |     with open(fname1) as fp:
 38 |         code1 = hashlib.sha1(fp.read().encode('utf-8')).hexdigest()
 39 |     return code0 == code1
 40 | 
 41 | 
 42 | class LocalContext(object) :
 43 |     def __init__ (self,
 44 |                   local_root,
 45 |                   work_profile,
 46 |                   job_uuid = None) :
 47 |         """
 48 |         work_profile:
 49 |         local_root:
 50 |         """
 51 |         assert(type(local_root) == str)
 52 |         self.local_root = os.path.abspath(local_root)
 53 |         if job_uuid:
 54 |            self.job_uuid=job_uuid
 55 |         else:
 56 |            self.job_uuid = str(uuid.uuid4())
 57 | 
 58 |         self.remote_root = os.path.join(work_profile.get_work_root(), self.job_uuid)
 59 |         # dlog.debug("local_root is %s"% local_root)
 60 |         # dlog.debug("remote_root is %s"% self.remote_root)
 61 | 
 62 |         os.makedirs(self.remote_root, exist_ok = True)
 63 |         
 64 |     def get_job_root(self) :
 65 |         return self.remote_root
 66 | 
 67 |     def upload(self,
 68 |                job_dirs,
 69 |                local_up_files,
 70 |                dereference = True) :
 71 |         cwd = os.getcwd()
 72 |         for ii in job_dirs :
 73 |             local_job = os.path.join(self.local_root, ii)
 74 |             remote_job = os.path.join(self.remote_root, ii)
 75 |             os.makedirs(remote_job, exist_ok = True)
 76 |             os.chdir(remote_job)
 77 |             for jj in local_up_files :
 78 |                 if not os.path.exists(os.path.join(local_job, jj)):
 79 |                     os.chdir(cwd)
 80 |                     raise RuntimeError('cannot find upload file ' + os.path.join(local_job, jj))
 81 |                 if os.path.exists(os.path.join(remote_job, jj)) :
 82 |                     os.remove(os.path.join(remote_job, jj))
 83 |                 _check_file_path(jj)
 84 |                 os.symlink(os.path.join(local_job, jj),
 85 |                            os.path.join(remote_job, jj))
 86 |         os.chdir(cwd)
 87 | 
 88 |     def download(self, 
 89 |                  job_dirs,
 90 |                  remote_down_files,
 91 |                  check_exists = False,
 92 |                  mark_failure = True,
 93 |                  back_error=False) :
 94 |         cwd = os.getcwd()
 95 |         for ii in job_dirs :
 96 |             local_job = os.path.join(self.local_root, ii)
 97 |             remote_job = os.path.join(self.remote_root, ii)
 98 |             flist = remote_down_files
 99 |             if back_error :
100 |                 os.chdir(remote_job)
101 |                 flist += glob('err*')                        
102 |                 os.chdir(cwd)
103 |             for jj in flist :
104 |                 rfile = os.path.join(remote_job, jj)
105 |                 lfile = os.path.join(local_job, jj)
106 |                 if not os.path.realpath(rfile) == os.path.realpath(lfile) :
107 |                     if (not os.path.exists(rfile)) and (not os.path.exists(lfile)):
108 |                         if check_exists :
109 |                             if mark_failure:
110 |                                 with open(os.path.join(self.local_root, ii, 'tag_failure_download_%s' % jj), 'w') as fp: pass
111 |                             else :
112 |                                 pass
113 |                         else :
114 |                             raise RuntimeError('do not find download file ' + rfile)
115 |                     elif (not os.path.exists(rfile)) and (os.path.exists(lfile)) :
116 |                         # already downloaded
117 |                         pass
118 |                     elif (os.path.exists(rfile)) and (not os.path.exists(lfile)) :
119 |                         # trivial case, download happily
120 |                         os.makedirs(os.path.dirname(lfile), exist_ok=True)
121 |                         shutil.move(rfile, lfile)
122 |                     elif (os.path.exists(rfile)) and (os.path.exists(lfile)) :
123 |                         # both exists, replace!
124 |                         # dlog.info('find existing %s, replacing by %s' % (lfile, rfile))
125 |                         if os.path.isdir(lfile):
126 |                             shutil.rmtree(lfile, ignore_errors=True)
127 |                         elif os.path.isfile(lfile) or os.path.islink(lfile):
128 |                             os.remove(lfile)
129 |                         os.makedirs(os.path.dirname(lfile), exist_ok=True)
130 |                         shutil.move(rfile, lfile)
131 |                     else :
132 |                         raise RuntimeError('should not reach here!')
133 |                 else :
134 |                     # no nothing in the case of linked files
135 |                     pass
136 |         os.chdir(cwd)
137 | 
138 |     def block_checkcall(self,
139 |                         cmd) :
140 |         cwd = os.getcwd()
141 |         os.chdir(self.remote_root)
142 |         proc = sp.Popen(cmd, shell=True, stdout = sp.PIPE, stderr = sp.PIPE)
143 |         o, e = proc.communicate()
144 |         stdout = SPRetObj(o)
145 |         stderr = SPRetObj(e)
146 |         code = proc.returncode
147 |         if code != 0:
148 |             os.chdir(cwd)        
149 |             raise RuntimeError("Get error code %d in locally calling %s with job: %s " % (code, cmd, self.job_uuid))
150 |         os.chdir(cwd)        
151 |         return None, stdout, stderr
152 |         
153 |     def block_call(self, cmd) :
154 |         cwd = os.getcwd()
155 |         os.chdir(self.remote_root)
156 |         proc = sp.Popen(cmd, shell=True, stdout = sp.PIPE, stderr = sp.PIPE)
157 |         o, e = proc.communicate()
158 |         stdout = SPRetObj(o)
159 |         stderr = SPRetObj(e)
160 |         code = proc.returncode
161 |         os.chdir(cwd)        
162 |         return code, None, stdout, stderr
163 | 
164 |     def clean(self) :
165 |         shutil.rmtree(self.remote_root, ignore_errors=True)
166 | 
167 |     def write_file(self, fname, write_str):
168 |         with open(os.path.join(self.remote_root, fname), 'w') as fp :
169 |             fp.write(write_str)
170 | 
171 |     def read_file(self, fname):
172 |         with open(os.path.join(self.remote_root, fname), 'r') as fp:
173 |             ret = fp.read()
174 |         return ret
175 | 
176 |     def check_file_exists(self, fname):
177 |         return os.path.isfile(os.path.join(self.remote_root, fname))
178 |         
179 |     def call(self, cmd) :
180 |         cwd = os.getcwd()
181 |         os.chdir(self.remote_root)
182 |         proc = sp.Popen(cmd, shell=True, stdout = sp.PIPE, stderr = sp.PIPE)
183 |         os.chdir(cwd)        
184 |         return proc
185 | 
186 |     def kill(self, proc):
187 |         proc.kill()
188 | 
189 |     def check_finish(self, proc):
190 |         return (proc.poll() != None)
191 | 
192 |     def get_return(self, proc):
193 |         ret = proc.poll()
194 |         if ret is None:
195 |             return None, None, None
196 |         else :
197 |             try:
198 |                 o, e = proc.communicate()
199 |                 stdout = SPRetObj(o)
200 |                 stderr = SPRetObj(e)
201 |             except:
202 |                 stdout = None
203 |                 stderr = None
204 |         return ret, stdout, stderr
205 |     
206 |     
207 | 


--------------------------------------------------------------------------------
/deepks/task/job/shell.py:
--------------------------------------------------------------------------------
 1 | import os,getpass,time
 2 | from .batch import Batch
 3 | from .job_status import JobStatus
 4 | 
 5 | def _default_item(resources, key, value) :
 6 |     if key not in resources :
 7 |         resources[key] = value
 8 | 
 9 | 
10 | class Shell(Batch) :
11 | 
12 |     def check_status(self) :
13 |         if self.check_finish_tag():
14 |             return JobStatus.finished
15 |         elif self.check_running():
16 |             return JobStatus.running
17 |         else:
18 |             return JobStatus.terminated
19 |         ## warn: cannont distinguish terminated from unsubmitted.
20 | 
21 |     def check_running(self):
22 |         uuid_names = self.context.job_uuid
23 |         ## Check if the uuid.sub is running on remote machine
24 |         cnt = 0
25 |         ret, stdin, stdout, stderr = self.context.block_call("ps aux | grep %s"%uuid_names)
26 |         response_list = stdout.read().decode('utf-8').split("\n")
27 |         for response in response_list:
28 |             if  uuid_names + ".sub" in response:
29 |                 return True
30 |         return False
31 |     
32 |     def exec_sub_script(self, script_str):
33 |         self.context.write_file(self.sub_script_name, script_str)
34 |         self.proc = self.context.call('cd %s && exec bash %s' % (self.context.remote_root, self.sub_script_name))
35 | 
36 |     def default_resources(self, res_) :
37 |         if res_ is None :
38 |             res = {}
39 |         else:
40 |             res = res_
41 |         _default_item(res, 'task_per_node', 1)
42 |         _default_item(res, 'module_list', [])
43 |         _default_item(res, 'module_unload_list', [])
44 |         _default_item(res, 'source_list', [])
45 |         _default_item(res, 'envs', {})
46 |         _default_item(res, 'with_mpi', False)
47 |         _default_item(res, 'cuda_multi_tasks', False)
48 |         _default_item(res, 'allow_failure', False)
49 |         return res
50 | 
51 |     def sub_script_head(self, resources) :
52 |         envs = resources['envs']
53 |         module_list = resources['module_list']
54 |         module_unload_list = resources['module_unload_list']
55 |         task_per_node = resources['task_per_node']
56 |         source_list = resources['source_list']
57 |         
58 |         ret = ''
59 |         ret += ('#!/bin/bash\n\n')
60 |         # fp.write('set -euo pipefail\n')
61 |         for key in envs.keys() :
62 |             ret += ('export %s=%s\n' % (key, envs[key]))
63 |         ret += ('\n')
64 |         for ii in module_unload_list :
65 |             ret += ('module unload %s\n' % ii)
66 |         ret += ('\n')
67 |         for ii in module_list :
68 |             ret += ('module load %s\n' % ii)
69 |         ret += ('\n')
70 |         for ii in source_list :
71 |             ret += ('source %s\n' % ii)
72 |         ret += ('\n')
73 |         return ret
74 | 
75 |     def sub_script_cmd(self,
76 |                        cmd,
77 |                        arg,
78 |                        res) :
79 |         _cmd = cmd.split('1>')[0].strip()
80 |         if res['with_mpi']:
81 |             _cmd = 'mpirun -n %d %s %s' % (res['task_per_node'],  _cmd, arg)
82 |         else :
83 |             _cmd = '%s %s' % (_cmd, arg)
84 |         return _cmd
85 |         
86 |     def make_non_blocking(self, inner_script, step_res=None):
87 |         return f"({inner_script})&\n"


--------------------------------------------------------------------------------
/deepks/task/job/slurm.py:
--------------------------------------------------------------------------------
  1 | import os,getpass,time
  2 | from .batch import Batch
  3 | from .job_status import JobStatus
  4 | 
  5 | 
  6 | def _default_item(resources, key, value) :
  7 |     if key not in resources :
  8 |         resources[key] = value
  9 | 
 10 | class Slurm(Batch) :
 11 | 
 12 |     def check_status(self):
 13 |         """
 14 |         check the status of a job
 15 |         """
 16 |         job_id = self._get_job_id()
 17 |         if job_id == '' :
 18 |             return JobStatus.unsubmitted
 19 |         while True:
 20 |             stat = self._check_status_inner(job_id)
 21 |             if stat != JobStatus.completing:
 22 |                 return stat
 23 |             else:
 24 |                 time.sleep(5)
 25 |     
 26 |     def check_before_sub(self, res):
 27 |         if 'task_max' in res and res['task_max'] > 0:
 28 |             while self._check_sub_limit(task_max=res['task_max']):
 29 |                 time.sleep(60)      
 30 | 
 31 |     def exec_sub_script(self, script_str):
 32 |         self.context.write_file(self.sub_script_name, script_str)
 33 |         stdin, stdout, stderr = self.context.block_checkcall('cd %s && %s %s' % (self.context.remote_root, 'sbatch', self.sub_script_name))
 34 |         subret = (stdout.readlines())
 35 |         job_id = subret[0].split()[-1]
 36 |         self.context.write_file(self.job_id_name, job_id)          
 37 |                 
 38 |     def default_resources(self, res_) :
 39 |         """
 40 |         set default value if a key in res_ is not fhound
 41 |         """
 42 |         if res_ == None :
 43 |             res = {}
 44 |         else:
 45 |             res = res_
 46 |         _default_item(res, 'numb_node', 1)
 47 |         _default_item(res, 'task_per_node', 1)
 48 |         _default_item(res, 'cpus_per_task', 1)
 49 |         _default_item(res, 'numb_gpu', 0)
 50 |         _default_item(res, 'time_limit', '1:0:0')
 51 |         _default_item(res, 'mem_limit', -1)
 52 |         _default_item(res, 'partition', '')
 53 |         _default_item(res, 'account', '')
 54 |         _default_item(res, 'qos', '')
 55 |         _default_item(res, 'constraint_list', [])
 56 |         _default_item(res, 'license_list', [])
 57 |         _default_item(res, 'exclude_list', [])
 58 |         _default_item(res, 'module_unload_list', [])
 59 |         _default_item(res, 'module_list', [])
 60 |         _default_item(res, 'source_list', [])
 61 |         _default_item(res, 'envs', None)
 62 |         _default_item(res, 'with_mpi', False)
 63 |         _default_item(res, 'cuda_multi_tasks', False)
 64 |         _default_item(res, 'allow_failure', False)
 65 |         return res
 66 | 
 67 |     def sub_script_head(self, res):
 68 |         ret = ''
 69 |         ret += "#!/bin/bash -l\n"
 70 |         ret += "#SBATCH -N %d\n" % res['numb_node']
 71 |         ret += "#SBATCH --ntasks-per-node=%d\n" % res['task_per_node']
 72 |         if res['cpus_per_task'] > 0 :            
 73 |             ret += "#SBATCH --cpus-per-task=%d\n" % res['cpus_per_task']
 74 |         ret += "#SBATCH -t %s\n" % res['time_limit']
 75 |         if res['mem_limit'] > 0 :
 76 |             ret += "#SBATCH --mem=%dG \n" % res['mem_limit']
 77 |         if len(res['account']) > 0 :
 78 |             ret += "#SBATCH --account=%s \n" % res['account']
 79 |         if len(res['partition']) > 0 :
 80 |             ret += "#SBATCH --partition=%s \n" % res['partition']
 81 |         if len(res['qos']) > 0 :
 82 |             ret += "#SBATCH --qos=%s \n" % res['qos']
 83 |         if res['numb_gpu'] > 0 :
 84 |             ret += "#SBATCH --gres=gpu:%d\n" % res['numb_gpu']
 85 |         for ii in res['constraint_list'] :
 86 |             ret += '#SBATCH -C %s \n' % ii
 87 |         for ii in res['license_list'] :
 88 |             ret += '#SBATCH -L %s \n' % ii
 89 |         if len(res['exclude_list']) >0:
 90 |             temp_exclude = ""
 91 |             for ii in res['exclude_list'] :
 92 |                 temp_exclude += ii
 93 |                 temp_exclude += ","
 94 |             temp_exclude = temp_exclude[:-1]
 95 |             ret += '#SBATCH --exclude=%s \n' % temp_exclude
 96 |         ret += "\n"
 97 |         for ii in res['module_unload_list'] :
 98 |             ret += "module unload %s\n" % ii
 99 |         for ii in res['module_list'] :
100 |             ret += "module load %s\n" % ii
101 |         ret += "\n"
102 |         for ii in res['source_list'] :
103 |             ret += "source %s\n" %ii
104 |         ret += "\n"
105 |         envs = res['envs']
106 |         if envs != None :
107 |             for key in envs.keys() :
108 |                 ret += 'export %s=%s\n' % (key, envs[key])
109 |             ret += '\n'        
110 |         return ret
111 |     
112 |     def sub_step_head(self, step_res=None, **kwargs):
113 |         if step_res is None:
114 |             return ""
115 |         # exclusive = step_res.get("exclusive", False)
116 |         # numb_node = step_res.get("numb_node", 1)
117 |         # task_per_node = step_res.get("task_per_node", 1)
118 |         # cpus_per_task = step_res.get("cpus_per_task", 1)
119 |         # numb_gpu = step_res.get('numb_gpu', 0)
120 |         params = ""
121 |         if "numb_node" in step_res:
122 |             params += f" -N {step_res['numb_node']} "
123 |         if "task_per_node" in step_res:
124 |             params += f" -n {step_res['task_per_node'] * step_res.get('numb_node', 1)} "
125 |         if "cpus_per_task" in step_res:
126 |             params += f" -c {step_res['cpus_per_task']} "
127 |         if step_res.get("exclusive", False):
128 |             params += " --exclusive "
129 |         if step_res.get('numb_gpu', 0) > 0 :
130 |             params += " --gres=gpu:%d\n " % step_res['numb_gpu']
131 |         return f"srun {params} "
132 | 
133 |     def sub_script_cmd(self,
134 |                        cmd,
135 |                        arg,
136 |                        res) :
137 |         _cmd = cmd.split('1>')[0].strip()
138 |         if res['with_mpi']:
139 |             _cmd = 'srun %s %s' % (_cmd, arg)
140 |         else :
141 |             _cmd = '%s %s' % (_cmd, arg)        
142 |         return _cmd
143 | 
144 |     def _get_job_id(self) :
145 |         if self.context.check_file_exists(self.job_id_name) :
146 |             return self.context.read_file(self.job_id_name)
147 |         else:
148 |             return ""
149 | 
150 |     def _check_status_inner(self, job_id):
151 |         ret, stdin, stdout, stderr\
152 |             = self.context.block_call ('squeue -o "%.18i %.2t" -j ' + job_id)
153 |         if (ret != 0) :
154 |             err_str = stderr.read().decode('utf-8')
155 |             if str("Invalid job id specified") in err_str :
156 |                 if self.check_finish_tag() :
157 |                     return JobStatus.finished
158 |                 else :
159 |                     return JobStatus.terminated
160 |             else :
161 |                 raise RuntimeError\
162 |                     ("status command squeue fails to execute\nerror message:%s\nreturn code %d\n" % (err_str, ret))
163 |         status_line = stdout.read().decode('utf-8').split ('\n')[-2]
164 |         status_word = status_line.split ()[-1]
165 |         if not (len(status_line.split()) == 2 and status_word.isupper()): 
166 |             raise RuntimeError("Error in getting job status, " +
167 |                               f"status_line = {status_line}, " + 
168 |                               f"parsed status_word = {status_word}")
169 |         if status_word in ["PD","CF","S"] :
170 |             return JobStatus.waiting
171 |         elif status_word in ["R"] :
172 |             return JobStatus.running
173 |         elif status_word in ["CG"] :
174 |             return JobStatus.completing
175 |         elif status_word in ["C","E","K","BF","CA","CD","F","NF","PR","SE","ST","TO"] :
176 |             if self.check_finish_tag() :
177 |                 return JobStatus.finished
178 |             else :
179 |                 return JobStatus.terminated
180 |         else :
181 |             return JobStatus.unknown                    
182 | 
183 |     def _check_sub_limit(self, task_max, **kwarg) :
184 |         if task_max <= 0:
185 |             return True
186 |         username = getpass.getuser()
187 |         stdin, stdout, stderr = self.context.block_checkcall('squeue -u %s -h' % username)
188 |         nj = len(stdout.readlines())
189 |         return nj >= task_max
190 | 
191 |     def _make_squeue(self,mdata1, res):
192 |         ret = ''
193 |         ret += 'squeue -u %s ' % mdata1['username']
194 |         ret += '-p %s ' % res['partition']
195 |         ret += '| grep PD'
196 |         return ret
197 | 


--------------------------------------------------------------------------------
/deepks/task/workflow.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | from deepks.utils import check_list
  3 | from deepks.utils import get_abs_path
  4 | from deepks.task.task import AbstructStep
  5 | 
  6 | 
  7 | __all__ = ["Workflow", "Sequence", "Iteration"]
  8 | 
  9 | 
 10 | class Workflow(AbstructStep):
 11 |     def __init__(self, child_tasks, workdir='.', record_file=None):
 12 |         super().__init__(workdir)
 13 |         self.record_file = get_abs_path(record_file)
 14 |         self.child_tasks = [self.make_child(task) for task in child_tasks]
 15 |         self.postmod_hook()
 16 |         # self.set_record_file(record_file)
 17 |     
 18 |     def make_child(self, task):
 19 |         if not isinstance(task, AbstructStep):
 20 |             raise TypeError("Workflow only accept tasks and other task as childs, "
 21 |                             "but got " + type(task).__name__)
 22 |         assert not task.workdir.is_absolute()
 23 |         copied = deepcopy(task)
 24 |         copied.prepend_workdir(self.workdir)
 25 |         if isinstance(task, Workflow):
 26 |             copied.set_record_file(self.record_file)
 27 |         return copied
 28 |     
 29 |     def postmod_hook(self):
 30 |         pass
 31 |         
 32 |     def run(self, parent_tag=(), restart_tag=None):
 33 |         start_idx = 0
 34 |         if restart_tag is not None:
 35 |             last_idx = restart_tag[0]
 36 |             rest_tag = restart_tag[1:]
 37 |             if last_idx >= len(self.child_tasks):
 38 |                 print(f'# restart tag {last_idx} out of range, stop now')
 39 |                 return
 40 |             if rest_tag:
 41 |                 last_tag = parent_tag+(last_idx,)
 42 |                 self.child_tasks[last_idx].run(last_tag, restart_tag=rest_tag)
 43 |                 self.write_record(last_tag)
 44 |             start_idx = last_idx + 1
 45 |         for i in range(start_idx, len(self.child_tasks)):
 46 |             curr_tag = parent_tag + (i,)
 47 |             print('# starting step:', curr_tag) 
 48 |             task = self.child_tasks[i]
 49 |             task.run(curr_tag)
 50 |             self.write_record(curr_tag)
 51 |             
 52 |     def prepend_workdir(self, path):
 53 |         super().prepend_workdir(path)
 54 |         for task in self.child_tasks:
 55 |             task.prepend_workdir(path)
 56 | 
 57 |     def set_record_file(self, record_file):
 58 |         self.record_file = get_abs_path(record_file)
 59 |         for task in self.child_tasks:
 60 |             if isinstance(task, Workflow):
 61 |                 task.set_record_file(record_file)
 62 | 
 63 |     def write_record(self, tag):
 64 |         if self.record_file is None:
 65 |             return
 66 |         if isinstance(tag, (list, tuple)):
 67 |             tag = ' '.join(map(str,tag))
 68 |         with self.record_file.open('a') as lf:
 69 |             lf.write(tag + '\n')
 70 | 
 71 |     def max_depth(self):
 72 |         if not any(isinstance(task, Workflow) for task in self.child_tasks):
 73 |             return 1
 74 |         else:
 75 |             return 1 + max(task.max_depth() for task in self.child_tasks if isinstance(task, Workflow))
 76 |             
 77 |     def restart(self):
 78 |         if not self.record_file.exists():
 79 |             print('# no record file, starting from scratch')
 80 |             self.run(())
 81 |             return
 82 |         with self.record_file.open() as lf:
 83 |             all_tags = [tuple(map(int, l.split())) for l in lf.readlines()]
 84 |         # assert max(map(len, all_tags)) == self.max_depth()
 85 |         restart_tag = all_tags[-1]
 86 |         print('# restarting after step', restart_tag)
 87 |         self.run((), restart_tag=restart_tag)
 88 |         
 89 |     def __getitem__(self, idx):
 90 |         return self.child_tasks[idx]
 91 | 
 92 |     def __setitem__(self, idx, task):
 93 |         self.child_tasks[idx] = self.make_child(task)
 94 |         self.postmod_hook()
 95 | 
 96 |     def __delitem__(self, idx):
 97 |         self.child_tasks.__delitem__(idx)
 98 |         self.postmod_hook()
 99 |     
100 |     def __len__(self):
101 |         return len(self.child_tasks)
102 | 
103 |     def __iter__(self):
104 |         return self.child_tasks.__iter__()
105 |     
106 |     def insert(self, index, task):
107 |         self.child_tasks.insert(index, self.make_child(task))
108 |         self.postmod_hook()
109 |     
110 |     def append(self, task):
111 |         self.child_tasks.append(self.make_child(task))
112 |         self.postmod_hook()
113 | 
114 |     def prepend(self, task):
115 |         self.child_tasks.insert(0, self.make_child(task))
116 |         self.postmod_hook()
117 | 
118 | 
119 | class Sequence(Workflow):
120 |     def __init__(self, child_tasks, workdir='.', record_file=None, init_folder=None):
121 |         # would reset all tasks' prev folder into their prev task, except for the first one
122 |         super().__init__(child_tasks, workdir, record_file)
123 |         if init_folder is not None:
124 |             self.set_init_folder(init_folder)
125 |         
126 |     def chain_tasks(self):    
127 |         for prev, curr in zip(self.child_tasks[:-1], self.child_tasks[1:]):
128 |             while isinstance(prev, Workflow):
129 |                 prev = prev.child_tasks[-1]
130 |             while isinstance(curr, Workflow):
131 |                 curr = curr.child_tasks[0]
132 |             curr.set_prev_task(prev)
133 |     
134 |     def set_init_folder(self, init_folder):
135 |         start = self.child_tasks[0]
136 |         while isinstance(start, Workflow):
137 |             start = start.child_tasks[0]
138 |         start.set_prev_folder(get_abs_path(init_folder))
139 | 
140 |     def postmod_hook(self):
141 |         self.chain_tasks()
142 | 
143 | 
144 | class Iteration(Sequence):
145 |     def __init__(self, task, iternum, workdir='.', record_file=None, init_folder=None):
146 |         # iterated task should have workdir='.' to avoid redundant folders
147 |         # handle multple tasks by first make a sequence
148 |         if not isinstance(task, AbstructStep):
149 |             task = Sequence(task)
150 |         iter_tasks = [deepcopy(task) for i in range(iternum)]
151 |         nd = max(len(str(iternum)), 2)
152 |         for ii, itask in enumerate(iter_tasks):
153 |             itask.prepend_workdir(f'iter.{ii:0>{nd}d}')
154 |         super().__init__(iter_tasks, workdir, record_file, init_folder)
155 | 
156 | 


--------------------------------------------------------------------------------
/deepks/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/deepks/tools/__init__.py


--------------------------------------------------------------------------------
/deepks/tools/geom_optim.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #SBATCH -N 1
 3 | #SBATCH -c 20
 4 | #SBATCH -t 24:00:00
 5 | #SBATCH --mem=8G
 6 | 
 7 | import time
 8 | import numpy as np
 9 | from deepks.utils import load_yaml
10 | from deepks.scf.scf import DSCF
11 | from pyscf import gto, lib
12 | try:
13 |     from pyscf.geomopt.berny_solver import optimize
14 | except ImportError:
15 |     from pyscf.geomopt.geometric_solver import optimize
16 | 
17 | 
18 | def run_optim(mol, model=None, proj_basis=None, scf_args={}, conv_args={}):
19 |     cf = DSCF(mol, model, proj_basis=proj_basis).set(**scf_args)
20 |     mol_eq = optimize(cf, **conv_args)
21 |     return mol_eq
22 | 
23 | def dump_xyz(filename, mol):
24 |     coords = mol.atom_coords(unit="Angstrom").reshape(-1,3)
25 |     elems = mol.elements
26 |     with open(filename, 'w') as fp:
27 |         fp.write(f"{mol.natm}\n\n")
28 |         for x, e in zip(coords, elems):
29 |             fp.write("%s %.18g %.18g %.18g\n" % (e, x[0], x[1], x[2]))
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     import argparse
34 |     import os
35 |     parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.")
36 |     parser.add_argument("files", nargs="+", help="input xyz files")
37 |     parser.add_argument("-m", "--model-file", help="file of the trained model")
38 |     parser.add_argument("-d", "--dump-dir", help="dir of dumped files, default is same dir as xyz file")
39 |     parser.add_argument("-B", "--basis", default="ccpvdz", type=str, help="basis used to do the calculation")
40 |     parser.add_argument("-P", "--proj_basis", help="basis set used to project dm, must match with model") 
41 |     parser.add_argument("-C", "--charge", default=0, type=int, help="net charge of the molecule")
42 |     parser.add_argument("-v", "--verbose", default=1, type=int, help="output calculation information")
43 |     parser.add_argument("-S", "--suffix", help="suffix added to the saved xyz")
44 |     parser.add_argument("--scf-input", help="yaml file to specify scf arguments")
45 |     parser.add_argument("--conv-input", help="yaml file to specify convergence arguments")
46 |     args = parser.parse_args()
47 |     
48 |     if args.verbose:
49 |         print(f"starting calculation with OMP threads: {lib.num_threads()}",
50 |               f"and max memory: {lib.param.MAX_MEMORY}")
51 | 
52 |     if args.dump_dir is not None:
53 |         os.makedirs(args.dump_dir, exist_ok = True)
54 |     for fn in args.files:
55 |         tic = time.time()
56 |         mol = gto.M(atom=fn, basis=args.basis, verbose=args.verbose, charge=args.charge, parse_arg=False)
57 |         model = args.model_file
58 |         scf_args = {}
59 |         if args.scf_input is not None:
60 |             argdict = load_yaml(args.scf_input)
61 |             if "scf_args" in argdict:
62 |                 scf_args = argdict["scf_args"]
63 |                 if model is None and "model" in argdict:
64 |                     model = argdict["model"]
65 |             else:
66 |                 scf_args = argdict
67 |         conv_args = load_yaml(args.conv_input) if args.conv_input is not None else {}
68 |         mol_eq = run_optim(mol, model, args.proj_basis, scf_args, conv_args)
69 |         suffix = args.suffix
70 |         if args.dump_dir is None:
71 |             dump_dir = os.path.dirname(fn)
72 |             if not suffix:
73 |                 suffix = "eq"
74 |         else:
75 |             dump_dir = args.dump_dir
76 |         dump = os.path.join(dump_dir, os.path.splitext(os.path.basename(fn))[0])
77 |         if suffix:
78 |             dump += f".{suffix}"
79 |         dump_xyz(dump+".xyz", mol_eq)
80 |         if args.verbose:
81 |             print(fn, f"done, time = {time.time()-tic}")


--------------------------------------------------------------------------------
/deepks/tools/num_hessian.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #SBATCH -N 1
  3 | #SBATCH -c 20
  4 | #SBATCH -t 24:00:00
  5 | #SBATCH --mem=8G
  6 | 
  7 | import time
  8 | import numpy as np
  9 | from deepks.utils import load_yaml
 10 | from deepks.scf.scf import DSCF
 11 | from pyscf import gto, lib
 12 | 
 13 | BOHR = 0.52917721092
 14 | 
 15 | def finite_difference(f, x, delta=1e-6):
 16 |     in_shape = x.shape
 17 |     y0 = f(x)
 18 |     out_shape = y0.shape
 19 |     res = np.empty(in_shape + out_shape)
 20 |     for idx in np.ndindex(*in_shape):
 21 |         diff = np.zeros(in_shape)
 22 |         diff[idx] += delta
 23 |         y1 = f(x+diff)
 24 |         res[idx] = (y1-y0) / delta
 25 |     return res
 26 | 
 27 | def calc_deriv(mol, model=None, proj_basis=None, **scfargs):
 28 |     cf = DSCF(mol, model, proj_basis=proj_basis).run(**scfargs)
 29 |     if not cf.converged:
 30 |         raise RuntimeError("SCF not converged!")
 31 |     ff = cf.nuc_grad_method().run()
 32 |     return ff.de
 33 | 
 34 | def make_closure(mol, model=None, proj_basis=None, **scfargs):
 35 |     refmol = mol
 36 |     def cc2de(coords):
 37 |         tic = time.time()
 38 |         mol = refmol.set_geom_(coords, inplace=False, unit="Bohr")
 39 |         de = calc_deriv(mol, model, proj_basis, **scfargs)
 40 |         if mol.verbose > 1:
 41 |             print(f"step time = {time.time()-tic}")
 42 |         return de
 43 |     return cc2de
 44 |     # scanner is not very stable. We construct new scf objects every time
 45 |     # scanner = DSCF(mol.set(unit="Bohr"), model).set(**scfargs).nuc_grad_method().as_scanner()
 46 |     # return lambda m: scanner(m)[-1]
 47 | 
 48 | def calc_hessian(mol, model=None, delta=1e-6, proj_basis=None, **scfargs):
 49 |     cc2de = make_closure(mol, model, proj_basis, **scfargs)
 50 |     cc0 = mol.atom_coords(unit="Bohr")
 51 |     hess = finite_difference(cc2de, cc0, delta).transpose((0,2,1,3))
 52 |     return hess
 53 | 
 54 | 
 55 | if __name__ == "__main__":
 56 |     import argparse
 57 |     import os
 58 |     parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.")
 59 |     parser.add_argument("files", nargs="+", help="input xyz files")
 60 |     parser.add_argument("-m", "--model-file", help="file of the trained model")
 61 |     parser.add_argument("-d", "--dump-dir", help="dir of dumped files, default is same dir as xyz file")
 62 |     parser.add_argument("-D", "--delta", default=1e-6, type=float, help="numerical difference step size")
 63 |     parser.add_argument("-B", "--basis", default="ccpvdz", type=str, help="basis used to do the calculation")
 64 |     parser.add_argument("-P", "--proj_basis", help="basis set used to project dm, must match with model") 
 65 |     parser.add_argument("-C", "--charge", default=0, type=int, help="net charge of the molecule")
 66 |     parser.add_argument("-U", "--unit", default="Angstrom", help="choose length unit (Bohr or Angstrom)")
 67 |     parser.add_argument("-v", "--verbose", default=1, type=int, help="output calculation information")
 68 |     parser.add_argument("--scf-input", help="yaml file to specify scf arguments")
 69 |     args = parser.parse_args()
 70 |     
 71 |     if args.verbose:
 72 |         print(f"starting calculation with OMP threads: {lib.num_threads()}",
 73 |               f"and max memory: {lib.param.MAX_MEMORY}")
 74 | 
 75 |     if args.dump_dir is not None:
 76 |         os.makedirs(args.dump_dir, exist_ok = True)
 77 |     for fn in args.files:
 78 |         tic = time.time()
 79 |         mol = gto.M(atom=fn, basis=args.basis, verbose=args.verbose, charge=args.charge, parse_arg=False)
 80 |         model = args.model_file
 81 |         scfargs = {}
 82 |         if args.scf_input is not None:
 83 |             argdict = load_yaml(args.scf_input)
 84 |             if "scf_args" in argdict:
 85 |                 scfargs = argdict["scf_args"]
 86 |                 if model is None and "model" in argdict:
 87 |                     model = argdict["model"]
 88 |             else:
 89 |                 scfargs = argdict
 90 |         hess = calc_hessian(mol, model, args.delta, args.proj_basis, **scfargs)
 91 |         if not args.unit.upper().startswith(("B", "AU")):
 92 |             hess /= BOHR**2
 93 |         if args.dump_dir is None:
 94 |             dump_dir = os.path.dirname(fn)
 95 |         else:
 96 |             dump_dir = args.dump_dir
 97 |         dump = os.path.join(dump_dir, os.path.splitext(os.path.basename(fn))[0])
 98 |         np.save(dump+".hessian.npy", hess)
 99 |         if args.verbose:
100 |             print(fn, f"done, time = {time.time()-tic}")


--------------------------------------------------------------------------------
/examples/iterate/combined.yaml:
--------------------------------------------------------------------------------
  1 | # all arguments are flatten into this file
  2 | # they can also be splitted into separate files and referenced here
  3 | 
  4 | # number of iterations to do, can be set to zero for deephf training
  5 | n_iter: 5
  6 | 
  7 | # training and testing systems
  8 | systems_train: # can also be files that containing system paths
  9 |   - ../system/batch/set.0[0-5]* # support glob
 10 |   - ../system/batch/set.060
 11 |   - ../system/batch/set.061
 12 |   - ../system/batch/set.062
 13 | 
 14 | systems_test: # if empty, use the last system of training set
 15 |   - ../system/batch/set.063
 16 |   
 17 | # directory setting
 18 | workdir: "."
 19 | share_folder: "share" # folder that stores all other settings
 20 | 
 21 | # scf settings
 22 | scf_input: # can also be specified by a separete file
 23 |   basis: ccpvdz
 24 |   # this is for force training
 25 |   dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta]
 26 |   verbose: 1
 27 |   mol_args:
 28 |     incore_anyway: True
 29 |   scf_args:
 30 |     conv_tol: 1e-6
 31 |     conv_tol_grad: 1e-2
 32 |     level_shift: 0.1
 33 |     diis_space: 20
 34 |     conv_check: false # pyscf conv_check has bug
 35 | 
 36 | scf_machine: 
 37 |   sub_size: 5 # 5 systems will be in one task, default is 1
 38 |   group_size: 2 # 2 tasks will be gathered into one group and submitted together
 39 |   ingroup_parallel: 2 # this will set numb_node to 2 in resources
 40 |   dispatcher: 
 41 |     context: local
 42 |     batch: slurm
 43 |     remote_profile: null # use lazy local
 44 |   resources:
 45 |     numb_node: 2 # parallel in two nodes
 46 |     time_limit: '24:00:00'
 47 |     cpus_per_task: 8
 48 |     mem_limit: 8
 49 |     envs:
 50 |       PYSCF_MAX_MEMORY: 8000 # increase from 4G to 8G
 51 |   sub_res: # resources for each sub task
 52 |     cpus_per_task: 8
 53 |   python: "python" # use python in path
 54 | 
 55 | # train settings
 56 | train_input:
 57 |   # model_args is ignored, since this is used as restart
 58 |   data_args: 
 59 |     batch_size: 16
 60 |     group_batch: 1
 61 |     extra_label: true
 62 |     conv_filter: true
 63 |     conv_name: conv
 64 |   preprocess_args:
 65 |     preshift: false # restarting model already shifted. Will not recompute shift value
 66 |     prescale: false # same as above
 67 |     prefit_ridge: 1e1
 68 |     prefit_trainable: false
 69 |   train_args: 
 70 |     decay_rate: 0.5
 71 |     decay_steps: 1000
 72 |     display_epoch: 100
 73 |     force_factor: 0.1
 74 |     n_epoch: 5000
 75 |     start_lr: 0.0001
 76 | 
 77 | train_machine: 
 78 |   dispatcher: 
 79 |     context: local
 80 |     batch: slurm
 81 |     remote_profile: null # use lazy local
 82 |   resources:
 83 |     time_limit: '24:00:00'
 84 |     cpus_per_task: 4
 85 |     numb_gpu: 1
 86 |     mem_limit: 8
 87 |   python: "python" # use python in path
 88 | 
 89 | # init settings
 90 | init_model: false # do not use existing model in share_folder/init/model.pth
 91 | 
 92 | init_scf: 
 93 |   basis: ccpvdz
 94 |   # this is for pure energy training
 95 |   dump_fields: [e_base, e_tot, dm_eig, conv, l_e_delta]
 96 |   verbose: 1
 97 |   mol_args:
 98 |     incore_anyway: True
 99 |   scf_args:
100 |     conv_tol: 1e-8
101 |     conv_check: false # pyscf conv_check has bug
102 | 
103 | init_train: 
104 |   model_args: # necessary as this is init training
105 |     hidden_sizes: [200, 200, 200]
106 |     output_scale: 100
107 |     use_resnet: true
108 |     actv_fn: mygelu
109 |   data_args: 
110 |     batch_size: 16
111 |     group_batch: 1
112 |   preprocess_args:
113 |     preshift: true
114 |     prescale: false
115 |     prefit_ridge: 1e1
116 |     prefit_trainable: false
117 |   train_args: 
118 |     decay_rate: 0.96
119 |     decay_steps: 500
120 |     display_epoch: 100
121 |     n_epoch: 50000
122 |     start_lr: 0.0003
123 | 
124 | # other settings
125 | cleanup: false
126 | strict: true
127 | 


--------------------------------------------------------------------------------
/examples/iterate/splitted/args.yaml:
--------------------------------------------------------------------------------
 1 | # all value here are defaults parameters
 2 | # except for `scf_machine` which shows grouping and ingroup parallelization
 3 | n_iter: 5
 4 | systems_train: null # use share_folder/systems_train.raw (check exist)
 5 | systems_test: null # use share_folder/systems_test.raw
 6 | workdir: "."
 7 | share_folder: "share" # folder that contains all other settings
 8 | # scf parameters
 9 | scf_input: true # use share_folder/scf_input.yaml (check exist)
10 | scf_machine: 
11 |   sub_size: 5 # 5 systems will be in one task, default is 1
12 |   group_size: 2 # 2 tasks will be gathered into one group and submitted together
13 |   ingroup_parallel: 2 # 2 tasks in one group submission can run toghther
14 | # train parameters
15 | train_input: true # use share_folder/train_input.yaml (check exist)
16 | train_machine: 
17 |   resources: # add 1 gpu
18 |     numb_gpu: 1
19 | # init parameters
20 | init_model: false # do not use existing model in share_folder/init/model.pth
21 | init_scf: true # use share_folder/init_scf.yaml (check exist)
22 | init_train: true # use share_folder/init_train.yaml (check exist)
23 | # other settings
24 | cleanup: false
25 | strict: true
26 | 


--------------------------------------------------------------------------------
/examples/iterate/splitted/share/init_scf.yaml:
--------------------------------------------------------------------------------
 1 | basis: ccpvdz
 2 | model_file: null
 3 | dump_fields: [e_base, e_tot, dm_eig, conv, l_e_delta]
 4 | verbose: 1
 5 | mol_args:
 6 |   incore_anyway: True
 7 | scf_args:
 8 |   conv_tol: 1e-8
 9 |   conv_check: false
10 | #penalty_terms:
11 | #  - type: coulomb
12 | #    required_labels: dm
13 | #    strength: 1
14 | #    random: true    
15 | 


--------------------------------------------------------------------------------
/examples/iterate/splitted/share/init_train.yaml:
--------------------------------------------------------------------------------
 1 | model_args:
 2 |   hidden_sizes: [100, 100, 100]
 3 |   output_scale: 100
 4 |   use_resnet: true
 5 |   actv_fn: mygelu
 6 | data_args: 
 7 |   batch_size: 16
 8 |   group_batch: 1
 9 |   e_name: l_e_delta
10 | preprocess_args:
11 |   preshift: true
12 |   prescale: false
13 |   prefit_ridge: 1e1
14 |   prefit_trainable: false
15 | train_args: 
16 |   ckpt_file: model.pth
17 |   decay_rate: 0.96
18 |   decay_steps: 500
19 |   display_epoch: 100
20 |   n_epoch: 50000
21 |   start_lr: 0.0001
22 | train_paths:
23 |   - data_train/* 
24 | test_paths:
25 |   - data_test/* 
26 | 


--------------------------------------------------------------------------------
/examples/iterate/splitted/share/scf_input.yaml:
--------------------------------------------------------------------------------
 1 | basis: ccpvdz
 2 | model_file: model.pth
 3 | dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta]
 4 | verbose: 1
 5 | mol_args:
 6 |   incore_anyway: True
 7 | scf_args:
 8 |   conv_tol: 1e-6
 9 |   conv_tol_grad: 1e-2
10 |   level_shift: 0.1
11 |   diis_space: 20
12 |   conv_check: false
13 | #penalty_terms:
14 | #  - type: coulomb
15 | #    required_labels: dm
16 | #    strength: 1
17 | #    random: true    
18 | 


--------------------------------------------------------------------------------
/examples/iterate/splitted/share/systems_test.raw:
--------------------------------------------------------------------------------
1 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.063
2 | 


--------------------------------------------------------------------------------
/examples/iterate/splitted/share/systems_train.raw:
--------------------------------------------------------------------------------
 1 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.000
 2 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.001
 3 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.002
 4 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.003
 5 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.004
 6 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.005
 7 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.006
 8 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.007
 9 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.008
10 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.009
11 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.010
12 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.011
13 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.012
14 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.013
15 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.014
16 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.015
17 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.016
18 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.017
19 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.018
20 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.019
21 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.020
22 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.021
23 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.022
24 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.023
25 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.024
26 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.025
27 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.026
28 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.027
29 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.028
30 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.029
31 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.030
32 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.031
33 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.032
34 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.033
35 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.034
36 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.035
37 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.036
38 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.037
39 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.038
40 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.039
41 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.040
42 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.041
43 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.042
44 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.043
45 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.044
46 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.045
47 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.046
48 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.047
49 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.048
50 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.049
51 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.050
52 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.051
53 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.052
54 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.053
55 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.054
56 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.055
57 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.056
58 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.057
59 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.058
60 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.059
61 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.060
62 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.061
63 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.062
64 | 


--------------------------------------------------------------------------------
/examples/iterate/splitted/share/train_input.yaml:
--------------------------------------------------------------------------------
 1 | model_args:
 2 |   hidden_sizes: [100, 100, 100] 
 3 |   output_scale: 100
 4 |   use_resnet: true
 5 |   actv_fn: mygelu
 6 | data_args: 
 7 |   batch_size: 16
 8 |   group_batch: 1
 9 |   e_name: l_e_delta
10 |   f_name: l_f_delta
11 |   extra_label: true
12 |   conv_filter: true
13 | preprocess_args:
14 |   preshift: false 
15 |   prescale: false
16 |   prefit_ridge: 1e1
17 |   prefit_trainable: false
18 | train_args: 
19 |   ckpt_file: model.pth
20 |   decay_rate: 0.5
21 |   decay_steps: 1000
22 |   display_epoch: 100
23 |   force_factor: 0.1
24 |   n_epoch: 5000
25 |   start_lr: 0.0001
26 | train_paths:
27 |   - data_train/* 
28 | test_paths:
29 |   - data_test/*
30 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_linear/run.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import torch
 4 | import numpy as np
 5 | import pyscf
 6 | from pyscf import gto
 7 | from sklearn import linear_model
 8 | 
 9 | sys.path.append(os.path.dirname(os.path.realpath(__file__)) + '/../../')
10 | from deepks.scf.scf import DSCF
11 | from deepks.scf.run import build_mol, solve_mol
12 | 
13 | def get_linear_model(weig, wec):
14 | #     too_small = weig.reshape(-1,108).std(0) < 1e-3
15 |     wreg = linear_model.Ridge(1e-7, tol=1e-9)
16 |     wreg.fit(weig.sum(1)[:], wec[:])
17 |     linear = torch.nn.Linear(108,1).double()
18 |     linear.weight.data[:] = torch.from_numpy(wreg.coef_)
19 |     linear.bias.data[:] = torch.tensor(wreg.intercept_ / 3)
20 |     model = lambda x: linear(x).sum(1)
21 |     return model
22 | 
23 | def get_linear_model_normed(weig, wec, stdmin=1e-3):
24 | #     too_small = weig.reshape(-1,108).std(0) < 1e-3
25 |     input_scale = weig.reshape(-1,108).std(0).clip(stdmin)
26 |     t_input_scale = torch.from_numpy(input_scale)
27 |     weig /= input_scale
28 |     wreg = linear_model.Ridge(1e-7, tol=1e-9)
29 |     wreg.fit(weig.sum(1)[:], wec[:])
30 |     linear = torch.nn.Linear(108,1).double()
31 |     linear.weight.data[:] = torch.from_numpy(wreg.coef_)
32 |     linear.bias.data[:] = torch.tensor(wreg.intercept_ / 3)
33 |     model = lambda x: linear(x / t_input_scale).sum(1)
34 |     return model
35 | 
36 | nmol = 1000
37 | ntrain = 900
38 | niter = 10
39 | 
40 | mol_list = [build_mol(f'../path/to/data/water/geometry/{i:0>5}.xyz') for i in range(nmol)]
41 | ehfs = np.load('../path/to/data/water/rproj_mb2/e_hf.npy').reshape(-1)[:nmol]
42 | wene = np.loadtxt('../path/to/data/water/energy.dat', usecols=(1,2,3,4))[:nmol]
43 | erefs = wene[:,3]
44 | ecfs = ehfs
45 | ecs = erefs - ehfs
46 | ceigs = np.load('../../../data/tom_miller/water/rproj_mb2/dm_eig.npy')[:nmol]
47 | model = get_linear_model(ceigs[:ntrain], ecs[:ntrain])
48 | 
49 | os.makedirs('dump', exist_ok=True)
50 | np.save('dump/000.ehfs.npy', ehfs)
51 | np.save('dump/000.ecfs.npy', ecfs)
52 | np.save('dump/000.ceigs.npy', ceigs)
53 | np.save('dump/000.ecs.npy', ecs)
54 | np.save('dump/000.convs.npy', np.ones(ehfs.shape, dtype=bool))
55 | 
56 | for i in range(1, niter+1):
57 |     oldecfs, oldceigs, oldehfs = ecfs, ceigs, ehfs
58 |     oldecs = ecs
59 |     oldmodel = model
60 |     
61 |     results = [solve_mol(mol, model) for mol in mol_list]
62 |     meta, ehfs, ecfs, cdms, ceigs, convs = map(np.array, zip(*results))
63 |     ecs = erefs - ehfs
64 |     model = get_linear_model(ceigs[:ntrain], ecs[:ntrain])
65 |     
66 |     print((ecfs - erefs).mean(), np.abs(ecfs - erefs).mean())
67 |     
68 |     np.save(f'dump/{i:0>3}.ehfs.npy', ehfs)
69 |     np.save(f'dump/{i:0>3}.ecfs.npy', ecfs)
70 |     np.save(f'dump/{i:0>3}.ceigs.npy', ceigs)
71 |     np.save(f'dump/{i:0>3}.ecs.npy', ecs)
72 |     np.save(f'dump/{i:0>3}.convs.npy', convs)
73 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_local/run.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import os
 5 | import sys
 6 | import numpy as np
 7 | 
 8 | # sys.path.append('/path/to/source')
 9 | import deepks
10 | from deepks.model.train import main as train_main
11 | from deepks.scf.run import main as scf_main
12 | from deepks.utils import load_yaml
13 | from deepks.task.task import PythonTask
14 | from deepks.task.workflow import Sequence, Iteration
15 | 
16 | from pathlib import Path
17 | import shutil
18 | 
19 | def collect_data(nmol, ntrain):
20 |     ecf = np.load('results/e_tot.npy')
21 |     assert ecf.size == nmol
22 |     eref = np.load('e_ref.npy')
23 | 
24 |     err = eref.reshape(-1) - ecf.reshape(-1)
25 |     convs = np.load("results/conv.npy").reshape(-1)
26 |     print(f'converged calculation: {np.sum(convs)} / {nmol} = {np.sum(convs) / nmol:.3f}')
27 |     print(f'mean error: {err.mean()}')
28 |     print(f'mean absolute error: {np.abs(err).mean()}')
29 |     print(f'mean absolute error after shift: {np.abs(err - err[:ntrain].mean()).mean()}')
30 |     print(f'  training: {np.abs(err[:ntrain] - err[:ntrain].mean()).mean()}')
31 |     print(f'  testing: {np.abs(err[ntrain:] - err[:ntrain].mean()).mean()}')
32 | 
33 |     ehf = np.load('results/e_base.npy')
34 |     np.save('results/l_e_delta.npy', eref - ehf)
35 | 
36 |     dd = ['dm_eig.npy', 'l_e_delta.npy']
37 |     os.makedirs('train', exist_ok=True)
38 |     os.makedirs('test', exist_ok=True)
39 |     for d in dd:
40 |         np.save(f"train/{d}", np.load(f'results/{d}')[:ntrain])
41 |     for d in dd:
42 |         np.save(f"test/{d}", np.load(f'results/{d}')[ntrain:])
43 |     shutil.copy('results/system.raw', 'train')
44 |     shutil.copy('results/system.raw', 'test')
45 |     Path('train_paths.raw').write_text(str(Path('train').absolute()))
46 |     Path('test_paths.raw').write_text(str(Path('test').absolute()))
47 | 
48 | 
49 | niter = 5
50 | nmol = 1500
51 | ntrain = 1000
52 | ntest = 500
53 | 
54 | train_input = load_yaml('share/train_input.yaml')
55 | scf_input = load_yaml('share/scf_input.yaml')
56 | 
57 | task_train = PythonTask(train_main, call_kwargs=train_input,
58 |                         outlog='log.train',
59 |                         workdir='00.train',
60 |                         link_prev_files=['train_paths.raw', 'test_paths.raw'])
61 | 
62 | task_scf = PythonTask(scf_main, call_kwargs=scf_input,
63 |                       outlog='log.scf',
64 |                       workdir='01.scf',
65 |                       link_prev_files=['model.pth'],
66 |                       share_folder='share', link_share_files=['mol_files.raw'])
67 | 
68 | task_data = PythonTask(collect_data, call_args=[nmol, ntrain],
69 |                        outlog='log.data',
70 |                        workdir='02.data',
71 |                        link_prev_files=['results'],
72 |                        share_folder='share', link_share_files=['e_ref.npy'])
73 | 
74 | seq = Sequence([task_train, task_scf, task_data])
75 | iterate = Iteration(seq, niter, init_folder='share/init', record_file='RECORD')
76 | 
77 | if Path('RECORD').exists():
78 |     iterate.restart()
79 | else:
80 |     iterate.run()
81 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_local/run_res.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import os
 5 | import sys
 6 | import numpy as np
 7 | 
 8 | # sys.path.append('/path/to/source')
 9 | import deepks
10 | from deepks.model.train import main as train_main
11 | from deepks.model.test import main as train_test
12 | from deepks.scf.run import main as scf_main
13 | from deepks.scf.stats import collect_data_grouped
14 | from deepks.utils import load_yaml
15 | from deepks.task.task import PythonTask
16 | from deepks.task.workflow import Sequence, Iteration
17 | 
18 | 
19 | niter = 5
20 | nmol = 1500
21 | ntrain = 1000
22 | ntest = 500
23 | 
24 | train_input = load_yaml('share/train_input.yaml')
25 | scf_input = load_yaml('share/scf_input.yaml')
26 | train_idx = np.arange(ntrain)
27 | 
28 | task_scf = PythonTask(scf_main, call_kwargs=scf_input,
29 |                       outlog='log.scf',
30 |                       workdir='00.scf',
31 |                       link_prev_files=['model.pth'],
32 |                       share_folder='share', link_share_files=['mol_files.raw'])
33 | 
34 | task_data = PythonTask(collect_data_grouped, call_args=[train_idx],
35 |                        outlog='log.data',
36 |                        workdir='01.data',
37 |                        link_prev_files=['model.pth', "results"],
38 |                        share_folder='share', link_share_files=['e_ref.npy'])
39 | 
40 | task_train = PythonTask(train_main, call_args=["old_model.pth"], call_kwargs=train_input,
41 |                         outlog='log.train',
42 |                         workdir='02.train',
43 |                         link_prev_files=[('model.pth', 'old_model.pth'),
44 |                                          'train_paths.raw', 'test_paths.raw'])
45 | 
46 | seq = Sequence([task_scf, task_data, task_train])
47 | iterate = Iteration(seq, niter, init_folder='share/init', record_file='RECORD')
48 | 
49 | if os.path.exists('RECORD'):
50 |     iterate.restart()
51 | else:
52 |     iterate.run()
53 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_local/share/e_ref.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/legacy/iter_nn_local/share/e_ref.npy


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_local/share/init/model.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/legacy/iter_nn_local/share/init/model.pth


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_local/share/init/test_paths.raw:
--------------------------------------------------------------------------------
1 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/data/sGDML/malonaldehyde/proj_hf_dz/test
2 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_local/share/init/train_paths.raw:
--------------------------------------------------------------------------------
1 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/data/sGDML/malonaldehyde/proj_hf_dz/train
2 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_local/share/scf_input.yaml:
--------------------------------------------------------------------------------
1 | basis: ccpvdz
2 | conv_tol: 1e-7
3 | xyz_files: [mol_files.raw]
4 | model_file: model.pth
5 | dump_dir: results
6 | dump_fields: [e_base, e_tot, dm_eig, conv]
7 | group: true 
8 | verbose: 1
9 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_local/share/train_input.yaml:
--------------------------------------------------------------------------------
 1 | model_args:
 2 |   hidden_sizes: [100, 100, 100] 
 3 |   output_scale: 100
 4 |   use_resnet: true
 5 |   actv_fn: gelu
 6 | data_args: 
 7 |   batch_size: 16
 8 |   group_batch: 1
 9 |   e_name: l_e_delta
10 |   d_name: [dm_eig]
11 | preprocess_args:
12 |   preshift: false
13 |   prescale: true
14 |   prescale_clip: 0.05
15 |   prefit_ridge: 1e1
16 |   prefit_trainable: false
17 | train_args: 
18 |   ckpt_file: model.pth
19 |   decay_rate: 0.96
20 |   decay_steps: 300
21 |   display_epoch: 100
22 |   n_epoch: 30000
23 |   start_lr: 0.0001
24 | train_paths:
25 |   - train_paths.raw 
26 | test_paths:
27 |   - test_paths.raw 
28 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_new/extra.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import os
 5 | import sys
 6 | import numpy as np
 7 | 
 8 | # sys.path.append('/path/to/source')
 9 | import deepks
10 | from deepks.scf.stats import collect_data_grouped
11 | from deepks.task.task import PythonTask, BatchTask, GroupBatchTask
12 | from deepks.task.workflow import Sequence, Iteration
13 | 
14 | nsys = 1
15 | niter = 25
16 | ntrain = 1000
17 | train_idx = np.arange(ntrain)
18 | 
19 | # SCF
20 | 
21 | scf_cmd_tmpl = " ".join([
22 |     "python -u /path/to/source/deepks/scf/main.py",
23 |     "scf_input.yaml",
24 |     "-m model.pth",
25 |     "-s mol_files.raw",
26 |     "-d results"])
27 | 
28 | envs = {"PYSCF_MAX_MEMORY": 16000}
29 | scf_res = {"cpus_per_task": 10,
30 |            "time_limit": "6:00:00",
31 |            "mem_limit": 16,
32 |            "envs": envs}
33 | 
34 | task_scf = GroupBatchTask(
35 |                 [BatchTask(scf_cmd_tmpl.format(i=i),
36 |                            workdir=".", #f'task.{i}',
37 |                            share_folder='share', 
38 |                            link_share_files=['mol_files.raw', 
39 |                                              ('raw_scf_input.yaml', 'scf_input.yaml')])
40 |                     for i in range(nsys)],
41 |                 workdir='00.scf',
42 |                 outlog='log.scf',
43 |                 resources=scf_res,
44 |                 link_prev_files=['model.pth'])
45 | 
46 | # labeling
47 | 
48 | task_data = PythonTask(
49 |                 lambda: [collect_data_grouped(train_idx=train_idx,
50 |                                               append=True,
51 |                                               ene_ref=f"e_ref.npy",
52 |                                               force_ref=f"f_ref.npy",
53 |                                               sys_dir=f"results") 
54 |                          for i in range(nsys)],
55 |                 outlog='log.data',
56 |                 workdir='01.data',
57 |                 link_prev_files=['model.pth'] + [f"results" for i in range(nsys)],
58 |                 share_folder='share', 
59 |                 link_share_files=[f'e_ref.npy' for i in range(nsys)]
60 |                                 +[f'f_ref.npy' for i in range(nsys)])
61 | 
62 | # training
63 | 
64 | train_cmd = " ".join([
65 |     "python -u /path/to/source/deepks/train/main.py",
66 |     "train_input.yaml",
67 |     "--restart old_model.pth"])
68 | 
69 | train_res = {"time_limit": "24:00:00",
70 |              "mem_limit": 32,
71 |              "numb_gpu": 1}
72 | 
73 | task_train = BatchTask(cmds=train_cmd,
74 |                        outlog='log.train',
75 |                        workdir='02.train',
76 |                        resources=train_res, 
77 |                        link_prev_files=[('model.pth', 'old_model.pth'),
78 |                                         'train_paths.raw', 'test_paths.raw'],
79 |                        share_folder = 'share',
80 |                        link_share_files=["train_input.yaml"])
81 | 
82 | # combine
83 | 
84 | seq = Sequence([task_scf, task_data, task_train])
85 | iterate = Iteration(seq, niter, init_folder='share/init', record_file='RECORD')
86 | 
87 | if os.path.exists('RECORD'):
88 |     iterate.restart()
89 | else:
90 |     iterate.run()
91 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_new/init_train/input.yaml:
--------------------------------------------------------------------------------
 1 | model_args:
 2 |   hidden_sizes: [100, 100, 100]
 3 |   output_scale: 100
 4 |   use_resnet: true
 5 |   actv_fn: mygelu
 6 | data_args: 
 7 |   batch_size: 16
 8 |   group_batch: 1
 9 |   e_name: e_cc
10 |   d_name: [dm_eig]
11 | preprocess_args:
12 |   preshift: true 
13 |   prescale: true
14 |   prescale_clip: 0.05
15 |   prefit_ridge: 1e1
16 |   prefit_trainable: false
17 | train_args: 
18 |   ckpt_file: model.pth
19 |   decay_rate: 0.96
20 |   decay_steps: 300
21 |   display_epoch: 100
22 |   n_epoch: 30000
23 |   start_lr: 0.0001
24 | train_paths:
25 |   - /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/data/sGDML/malonaldehyde/proj_ccsd_dz/train.1000
26 | test_paths:
27 |   - /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/data/sGDML/malonaldehyde/proj_ccsd_dz/test.1000
28 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_new/init_train/model.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/legacy/iter_nn_new/init_train/model.pth


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_new/run.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import os
 5 | import sys
 6 | import numpy as np
 7 | 
 8 | # sys.path.append('/path/to/source')
 9 | import deepks
10 | from deepks.scf.stats import collect_data_grouped
11 | from deepks.task.task import PythonTask, BatchTask, GroupBatchTask
12 | from deepks.task.workflow import Sequence, Iteration
13 | 
14 | nsys = 1
15 | niter = 10
16 | ntrain = 1000
17 | train_idx = np.arange(ntrain)
18 | 
19 | # SCF
20 | 
21 | scf_cmd_tmpl = " ".join([
22 |     "python -u /path/to/source/deepks/scf/main.py",
23 |     "scf_input.yaml",
24 |     "-m model.pth",
25 |     "-s mol_files.raw",
26 |     "-d results"])
27 | 
28 | envs = {"PYSCF_MAX_MEMORY": 16000}
29 | scf_res = {"cpus_per_task": 10,
30 |            "time_limit": "6:00:00",
31 |            "mem_limit": 16,
32 |            "envs": envs}
33 | 
34 | task_scf = GroupBatchTask(
35 |                 [BatchTask(scf_cmd_tmpl.format(i=i),
36 |                            workdir=".", #f'task.{i}',
37 |                            share_folder='share', 
38 |                            link_share_files=['mol_files.raw', 'scf_input.yaml'])
39 |                     for i in range(nsys)],
40 |                 workdir='00.scf',
41 |                 outlog='log.scf',
42 |                 resources=scf_res,
43 |                 link_prev_files=['model.pth'])
44 | 
45 | # labeling
46 | 
47 | task_data = PythonTask(
48 |                 lambda: [collect_data_grouped(train_idx=train_idx,
49 |                                               append=True,
50 |                                               ene_ref=f"e_ref.npy",
51 |                                               force_ref=f"f_ref.npy",
52 |                                               sys_dir=f"results") 
53 |                          for i in range(nsys)],
54 |                 outlog='log.data',
55 |                 workdir='01.data',
56 |                 link_prev_files=['model.pth'] + [f"results" for i in range(nsys)],
57 |                 share_folder='share', 
58 |                 link_share_files=[f'e_ref.npy' for i in range(nsys)]
59 |                                 +[f'f_ref.npy' for i in range(nsys)])
60 | 
61 | # training
62 | 
63 | train_cmd = " ".join([
64 |     "python -u /path/to/source/deepks/train/main.py",
65 |     "train_input.yaml",
66 |     "--restart old_model.pth"])
67 | 
68 | train_res = {"time_limit": "24:00:00",
69 |              "mem_limit": 32,
70 |              "numb_gpu": 1}
71 | 
72 | task_train = BatchTask(cmds=train_cmd,
73 |                        outlog='log.train',
74 |                        workdir='02.train',
75 |                        resources=train_res, 
76 |                        link_prev_files=[('model.pth', 'old_model.pth'),
77 |                                         'train_paths.raw', 'test_paths.raw'],
78 |                        share_folder = 'share',
79 |                        link_share_files=["train_input.yaml"])
80 | 
81 | # combine
82 | 
83 | seq = Sequence([task_scf, task_data, task_train])
84 | iterate = Iteration(seq, niter, init_folder='share/init', record_file='RECORD')
85 | 
86 | if os.path.exists('RECORD'):
87 |     iterate.restart()
88 | else:
89 |     iterate.run()
90 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_new/share/e_ref.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/legacy/iter_nn_new/share/e_ref.npy


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_new/share/f_ref.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/legacy/iter_nn_new/share/f_ref.npy


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_new/share/init/model.pth:
--------------------------------------------------------------------------------
1 | ../../init_train/model.pth


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_new/share/raw_scf_input.yaml:
--------------------------------------------------------------------------------
 1 | basis: ccpvdz
 2 | systems: [mol_files.raw]
 3 | model_file: model.pth
 4 | dump_dir: results
 5 | dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx]
 6 | group: true 
 7 | verbose: 1
 8 | scf_args:
 9 |   conv_tol: 1e-7
10 |   conv_tol_grad: 3e-3
11 |   level_shift: 0.1
12 |   diis_space: 20
13 |   conv_check: false    
14 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_new/share/scf_input.yaml:
--------------------------------------------------------------------------------
 1 | basis: ccpvdz
 2 | systems: [mol_files.raw]
 3 | model_file: model.pth
 4 | dump_dir: results
 5 | dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx]
 6 | group: true 
 7 | verbose: 1
 8 | scf_args:
 9 |   conv_tol: 1e-7
10 |   conv_tol_grad: 3e-3
11 |   level_shift: 0.1
12 |   diis_space: 20
13 |   conv_check: false
14 | penalty_terms:
15 |   - type: coulomb
16 |     required_labels: [dm]
17 |     strength: 1
18 |     random: true
19 |     
20 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_new/share/train_input.yaml:
--------------------------------------------------------------------------------
 1 | model_args:
 2 |   hidden_sizes: [100, 100, 100] 
 3 |   output_scale: 100
 4 |   use_resnet: true
 5 |   actv_fn: mygelu
 6 | data_args: 
 7 |   batch_size: 16
 8 |   group_batch: 1
 9 |   e_name: l_e_delta
10 |   f_name: l_f_delta
11 |   extra_label: true
12 | preprocess_args:
13 |   preshift: false 
14 |   prescale: false
15 |   prefit_ridge: 1e1
16 |   prefit_trainable: false
17 | train_args: 
18 |   ckpt_file: model.pth
19 |   decay_rate: 0.7
20 |   decay_steps: 1000
21 |   display_epoch: 100
22 |   force_factor: 0.1
23 |   n_epoch: 10000
24 |   start_lr: 0.0001
25 | train_paths:
26 |   - train_paths.raw
27 | test_paths:
28 |   - test_paths.raw
29 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_slurm/run.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | import os
  5 | import sys
  6 | import glob
  7 | import numpy as np
  8 | 
  9 | # sys.path.append('/path/to/source')
 10 | import deepks
 11 | from deepks.task.task import PythonTask
 12 | from deepks.task.task import ShellTask
 13 | from deepks.task.task import BatchTask
 14 | from deepks.task.task import GroupBatchTask
 15 | from deepks.task.workflow import Sequence
 16 | from deepks.task.workflow import Iteration
 17 | from deepks.scf.stats import collect_data
 18 | 
 19 | 
 20 | niter = 20
 21 | 
 22 | # Define Training
 23 | nmodel = 4
 24 | 
 25 | train_res = {"time_limit": "24:00:00",
 26 |              "mem_limit": 32,
 27 |              "numb_gpu": 1}
 28 | 
 29 | train_cmd = "python -u /path/to/source/deepks/train/main.py input.yaml"
 30 | 
 31 | batch_train = [BatchTask(cmds=train_cmd, 
 32 |                          workdir=f'task.{i:02}',
 33 |                          share_folder="share",
 34 |                          link_share_files=["input.yaml"], 
 35 |                          link_prev_files=['train_paths.raw', 'test_paths.raw'])
 36 |                for i in range(nmodel)]
 37 | run_train = GroupBatchTask(batch_train, 
 38 |                            resources=train_res, 
 39 |                            outlog="log.train")
 40 | 
 41 | post_train = ShellTask("ln -s task.00/model.pth .")
 42 | 
 43 | clean_train = ShellTask("rm slurm-*.out")
 44 | 
 45 | train_flow = Sequence([run_train, post_train, clean_train], workdir='00.train')
 46 | 
 47 | 
 48 | # Define SCF
 49 | ngroup = 24
 50 | ntrain = 3000
 51 | 
 52 | mol_files = np.loadtxt('share/mol_files.raw', dtype=str)
 53 | group_files = [mol_files[i::ngroup] for i in range(ngroup)]
 54 | 
 55 | envs = {"PYSCF_MAX_MEMORY": 32000}
 56 | scf_res = {"cpus_per_task": 5,
 57 |            "time_limit": "24:00:00",
 58 |            "mem_limit": 32,
 59 |            "envs": envs}
 60 | 
 61 | remote = {"work_path": '/home/yixiaoc/SCR/yixiaoc/tmp',
 62 |           "hostname": "della",
 63 |           "username": "yixiaoc",
 64 |           "port": 22}
 65 | disp = {"context_type": 'ssh',
 66 |         "batch_type": 'slurm',
 67 |         "remote_profile": remote}
 68 | 
 69 | cmd_templ = " ".join([
 70 |     "python -u /path/to/source/deepks/scf/main.py",
 71 |     "{mol_files}",
 72 |     "-m ../model.pth",
 73 |     "-d ../results", 
 74 |     "-B ccpvdz",
 75 |     "--verbose 1",
 76 |     "--conv-tol 1e-6", 
 77 |     "--conv-tol-grad 3e-2"
 78 | ])
 79 | 
 80 | batch_scf = [BatchTask(cmds=cmd_templ.format(mol_files=" ".join(gf)),
 81 |                        workdir=f'task.{i:02}',
 82 |                        backward_files=['log.scf', 'err'])
 83 |              for i, gf in enumerate(group_files)]
 84 | run_scf = GroupBatchTask(batch_scf, 
 85 |                          dispatcher=disp,
 86 |                          resources=scf_res, 
 87 |                          outlog="log.scf",
 88 |                          link_prev_files=['model.pth'],
 89 |                          forward_files=['model.pth'],
 90 |                          backward_files=['results/*'])
 91 | 
 92 | all_idx = np.loadtxt('share/index.raw', dtype=int)
 93 | train_idx = all_idx[:ntrain]
 94 | test_idx = all_idx[ntrain:]
 95 | 
 96 | post_scf = PythonTask(collect_data, call_args=[train_idx, test_idx],
 97 |                       call_kwargs={"sys_dir": "results", "ene_ref": "e_ref.npy"},
 98 |                       outlog='log.data',
 99 |                       share_folder='share', 
100 |                       link_share_files=['e_ref.npy'])
101 | 
102 | clean_scf = ShellTask("rm slurm-*.out")
103 | 
104 | scf_flow = Sequence([run_scf, post_scf, clean_scf], workdir='01.scf')
105 | 
106 | 
107 | # Group them together
108 | per_iter = Sequence([train_flow, scf_flow])
109 | iterate = Iteration(per_iter, niter, init_folder='share/init', record_file='RECORD')
110 | 
111 | if os.path.exists('RECORD'):
112 |     iterate.restart()
113 | else:
114 |     iterate.run()
115 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_slurm/run_res.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | import os
  5 | import sys
  6 | import glob
  7 | import numpy as np
  8 | 
  9 | # sys.path.append('/path/to/source')
 10 | import deepks
 11 | from deepks.task.task import PythonTask
 12 | from deepks.task.task import ShellTask
 13 | from deepks.task.task import BatchTask
 14 | from deepks.task.task import GroupBatchTask
 15 | from deepks.task.workflow import Sequence
 16 | from deepks.task.workflow import Iteration
 17 | from deepks.scf.stats import collect_data
 18 | 
 19 | 
 20 | niter = 5
 21 | ntrain = 7000
 22 | 
 23 | # Define Training
 24 | nmodel = 4
 25 | 
 26 | train_res = {"time_limit": "6:00:00",
 27 |              "mem_limit": 32,
 28 |              "numb_gpu": 1}
 29 | 
 30 | train_cmd = "python -u /path/to/source/deepks/train/main.py input.yaml --restart ../old_model.pth"
 31 | 
 32 | batch_train = [BatchTask(cmds=train_cmd, 
 33 |                          workdir=f'task.{i:02}',
 34 |                          share_folder="share",
 35 |                          link_share_files=["input.yaml"], 
 36 |                          link_prev_files=['train_paths.raw', 'test_paths.raw'])
 37 |                for i in range(nmodel)]
 38 | run_train = GroupBatchTask(batch_train, 
 39 |                            resources=train_res, 
 40 |                            outlog="log.train",
 41 |                            link_prev_files=[('model.pth', 'old_model.pth')])
 42 | 
 43 | post_train = ShellTask("ln -s task.00/model.pth .")
 44 | 
 45 | clean_train = ShellTask("rm slurm-*.out")
 46 | 
 47 | train_flow = Sequence([run_train, post_train, clean_train], workdir='00.train')
 48 | 
 49 | 
 50 | # Define SCF
 51 | ngroup = 12
 52 | 
 53 | mol_files = np.loadtxt('share/mol_files.raw', dtype=str)
 54 | group_files = [mol_files[i::ngroup] for i in range(ngroup)]
 55 | 
 56 | envs = {"PYSCF_MAX_MEMORY": 32000}
 57 | scf_res = {"cpus_per_task": 5,
 58 |            "time_limit": "6:00:00",
 59 |            "mem_limit": 32,
 60 |            "envs": envs}
 61 | 
 62 | remote = {"work_path": '/home/yixiaoc/SCR/yixiaoc/tmp',
 63 |           "hostname": "della",
 64 |           "username": "yixiaoc",
 65 |           "port": 22}
 66 | disp = {"context_type": 'ssh',
 67 |         "batch_type": 'slurm',
 68 |         "remote_profile": remote}
 69 | 
 70 | cmd_templ = " ".join([
 71 |     "python -u /path/to/source/deepks/scf/main.py",
 72 |     "{mol_files}",
 73 |     "-m ../model.pth",
 74 |     "-d ../results",
 75 |     "-B ccpvdz",
 76 |     "--verbose 1",
 77 |     "--conv-tol 1e-6", 
 78 |     "--conv-tol-grad 3e-2"
 79 | ])
 80 | 
 81 | batch_scf = [BatchTask(cmds=cmd_templ.format(mol_files=" ".join(gf)),
 82 |                        workdir=f'task.{i:02}',
 83 |                        backward_files=['log.scf', 'err'])
 84 |              for i, gf in enumerate(group_files)]
 85 | run_scf = GroupBatchTask(batch_scf, 
 86 |                          dispatcher=disp,
 87 |                          resources=scf_res, 
 88 |                          outlog="log.scf",
 89 |                          link_prev_files=['model.pth'],
 90 |                          forward_files=['model.pth'],
 91 |                          backward_files=['results/*'])
 92 | 
 93 | all_idx = np.loadtxt('share/index.raw', dtype=int)
 94 | train_idx = all_idx[:ntrain]
 95 | test_idx = all_idx[ntrain:]
 96 | 
 97 | post_scf = PythonTask(collect_data, call_args=[train_idx, test_idx],
 98 |                       call_kwargs={"sys_dir": "results", "ene_ref": "e_ref.npy"},
 99 |                       outlog='log.data',
100 |                       share_folder='share', 
101 |                       link_share_files=['e_ref.npy'])
102 | 
103 | clean_scf = ShellTask("rm slurm-*.out")
104 | 
105 | scf_flow = Sequence([run_scf, post_scf, clean_scf], workdir='01.scf')
106 | 
107 | 
108 | # Group them together
109 | per_iter = Sequence([train_flow, scf_flow])
110 | iterate = Iteration(per_iter, niter, init_folder='share/init', record_file='RECORD')
111 | 
112 | if os.path.exists('RECORD'):
113 |     iterate.restart()
114 | else:
115 |     iterate.run()
116 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_slurm/share/e_ref.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/legacy/iter_nn_slurm/share/e_ref.npy


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_slurm/share/input.yaml:
--------------------------------------------------------------------------------
 1 | model_args:
 2 |   hidden_sizes: [100, 100, 100] 
 3 |   output_scale: 100
 4 |   use_resnet: true
 5 |   actv_fn: gelu
 6 | data_args: 
 7 |   batch_size: 1
 8 |   group_batch: 16
 9 |   e_name: l_e_delta
10 |   d_name: [dm_eig]
11 | preprocess_args:
12 |   preshift: false
13 |   prescale: false
14 |   prefit_ridge: 1e0
15 |   prefit_trainable: false
16 | train_args: 
17 |   ckpt_file: model.pth
18 |   decay_rate: 0.96
19 |   decay_steps: 500
20 |   display_epoch: 100
21 |   n_epoch: 30000
22 |   start_lr: 0.0001
23 | train_paths:
24 |   - train_paths.raw 
25 | test_paths:
26 |   - test_paths.raw 
27 | 


--------------------------------------------------------------------------------
/examples/legacy/iter_nn_slurm/share/test.sh:
--------------------------------------------------------------------------------
1 | mkdir test
2 | python /path/to/source/deepks/train/test.py -m model.pth -d `cat train_paths.raw` -o test/train
3 | python /path/to/source/deepks/train/test.py -m model.pth -d `cat test_paths.raw` -o test/test
4 | 


--------------------------------------------------------------------------------
/examples/legacy/train_active_learning/run.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import os
 5 | import sys
 6 | from glob import glob
 7 | import numpy as np
 8 | 
 9 | # sys.path.append('/path/to/source')
10 | import deepks
11 | from deepks.task.task import PythonTask
12 | from deepks.task.task import ShellTask
13 | from deepks.task.task import BatchTask
14 | from deepks.task.task import GroupBatchTask
15 | from deepks.task.workflow import Sequence
16 | from deepks.task.workflow import Iteration
17 | 
18 | 
19 | # define key parameters
20 | nsel = 200
21 | nmodel = 4
22 | niter = 21
23 | 
24 | # define training task
25 | train_res = {"time_limit": "24:00:00",
26 |              "mem_limit": 32,
27 |              "numb_gpu": 1}
28 | 
29 | disp = {"context_type": 'local',
30 |         "batch_type": 'slurm'}
31 | 
32 | train_cmd = "python -u /path/to/source/deepks/train/main.py input.yaml"
33 | 
34 | batch_train = [BatchTask(cmds=train_cmd, 
35 |                          workdir=f'model.{i:02}',
36 |                          share_folder="share",
37 |                          link_share_files=["input.yaml"])
38 |                for i in range(nmodel)]
39 | task_train = GroupBatchTask(batch_train, 
40 |                            resources=train_res,
41 |                            dispatcher=disp,
42 |                            outlog="log.train",
43 |                            errlog="err.train",
44 |                            link_prev_files=[('new_train_paths.raw', 'train_paths.raw'),
45 |                                             ('new_test_paths.raw', 'test_paths.raw')])
46 | 
47 | 
48 | # define testing task
49 | test_cmd = "srun -N 1 -t 1:00:00 --gres=gpu:1 bash test_model.sh 1> log.test 2> err.test"
50 | task_test = ShellTask(test_cmd,
51 |                       share_folder="share",
52 |                       link_share_files=["test_model.sh"])
53 | 
54 | 
55 | #define selecting task
56 | def select_data(nsel):
57 |     paths = glob("model.*")
58 |     old_trn = np.loadtxt("train_paths.raw", dtype=str)
59 |     old_tst = np.loadtxt("test_paths.raw", dtype=str)
60 |     trn_res = np.stack([np.loadtxt(f"{m}/test/train.all.out")[:,1] for m in paths], -1)
61 |     tst_res = np.stack([np.loadtxt(f"{m}/test/test.all.out")[:,1] for m in paths], -1)
62 | 
63 |     tst_std = np.std(tst_res, axis=-1)
64 |     order = np.argsort(tst_std)[::-1]
65 |     sel = order[:nsel]
66 |     rst = np.sort(order[nsel:])
67 | 
68 |     new_trn = np.concatenate([old_trn, old_tst[sel]])
69 |     new_tst = old_tst[rst]
70 |     np.savetxt("new_train_paths.raw", new_trn, fmt="%s")
71 |     np.savetxt("new_test_paths.raw", new_tst, fmt="%s")
72 |     
73 | task_select = PythonTask(select_data, call_args=[nsel])
74 | 
75 | 
76 | # combine them together
77 | iterate = Iteration([task_train, task_test, task_select], niter, init_folder='share/init', record_file='RECORD')
78 | 
79 | if os.path.exists('RECORD'):
80 |     iterate.restart()
81 | else:
82 |     iterate.run()
83 | 


--------------------------------------------------------------------------------
/examples/legacy/train_active_learning/share/input.yaml:
--------------------------------------------------------------------------------
 1 | model_args:
 2 |   hidden_sizes: [100, 100, 100] 
 3 |   output_scale: 100
 4 |   use_resnet: true
 5 |   actv_fn: gelu
 6 | data_args: 
 7 |   batch_size: 1
 8 |   group_batch: 16
 9 |   e_name: l_e_delta
10 |   d_name: [dm_eig, se_eig, fe_eig]
11 | preprocess_args:
12 |   preshift: false
13 |   prescale: false
14 |   prefit_ridge: 1e0
15 |   prefit_trainable: false
16 | train_args: 
17 |   ckpt_file: model.pth
18 |   decay_rate: 0.96
19 |   decay_steps: 500
20 |   display_epoch: 100
21 |   n_epoch: 50000
22 |   start_lr: 0.0001
23 | train_paths:
24 |   - ../train_paths.raw 
25 | test_paths:
26 |   - ../test_paths.raw 
27 | 


--------------------------------------------------------------------------------
/examples/legacy/train_active_learning/share/test_model.sh:
--------------------------------------------------------------------------------
1 | for fd in model.*; do mkdir $fd/test; done
2 | 
3 | echo training set
4 | python /path/to/source/deepks/train/test.py -m model*/model.pth -d `cat train_paths.raw` -o test/train -D dm_eig se_eig fe_eig
5 | 
6 | echo testing set
7 | python /path/to/source/deepks/train/test.py -m model*/model.pth -d `cat test_paths.raw` -o test/test -D dm_eig se_eig fe_eig
8 | 


--------------------------------------------------------------------------------
/examples/train_input/extended.yaml:
--------------------------------------------------------------------------------
 1 | model_args:
 2 |   hidden_sizes: [100, 100, 100] 
 3 |   output_scale: 100
 4 |   use_resnet: true
 5 |   actv_fn: gelu
 6 | data_args: 
 7 |   batch_size: 1
 8 |   group_batch: 16
 9 |   e_name: l_e_delta
10 |   d_name: [dm_eig, se_eig, fe_eig]
11 | preprocess_args:
12 |   preshift: false
13 |   prescale: false
14 |   prefit_ridge: 1e0
15 |   prefit_trainable: false
16 | train_args: 
17 |   ckpt_file: model.pth
18 |   decay_rate: 0.96
19 |   decay_steps: 500
20 |   display_epoch: 100
21 |   n_epoch: 50000
22 |   start_lr: 0.0001
23 | train_paths:
24 |   - train_paths.raw 
25 | test_paths:
26 |   - test_paths.raw 
27 | 


--------------------------------------------------------------------------------
/examples/train_input/force.yaml:
--------------------------------------------------------------------------------
 1 | model_args:
 2 |   hidden_sizes: [100, 100, 100] 
 3 |   output_scale: 100
 4 |   use_resnet: true
 5 |   actv_fn: mygelu
 6 | data_args: 
 7 |   batch_size: 16
 8 |   group_batch: 1
 9 |   e_name: l_e_delta
10 |   d_name: dm_eig
11 |   f_name: l_f_delta
12 |   # gvx_name: grad_vx # experimental dm training
13 |   extra_label: true
14 |   conv_name: conv
15 | preprocess_args:
16 |   preshift: false 
17 |   prescale: false
18 |   prefit_ridge: 1e1
19 |   prefit_trainable: false
20 | train_args: 
21 |   ckpt_file: model.pth
22 |   decay_rate: 0.5
23 |   decay_steps: 1000
24 |   display_epoch: 100
25 |   force_factor: 0.1
26 |   n_epoch: 5000
27 |   start_lr: 0.0001
28 | train_paths:
29 |   - data_train/* 
30 | test_paths:
31 |   - data_test/*
32 | 


--------------------------------------------------------------------------------
/examples/train_input/gelu.yaml:
--------------------------------------------------------------------------------
 1 | model_args:
 2 |   hidden_sizes: [100, 100, 100] 
 3 |   output_scale: 100
 4 |   use_resnet: true
 5 |   actv_fn: gelu
 6 | data_args: 
 7 |   batch_size: 1
 8 |   group_batch: 16
 9 |   e_name: l_e_delta
10 |   d_name: [dm_eig]
11 | preprocess_args:
12 |   preshift: false
13 |   prescale: false
14 |   prefit_ridge: 1e1
15 |   prefit_trainable: false
16 | train_args: 
17 |   ckpt_file: model.pth
18 |   decay_rate: 0.98
19 |   decay_steps: 500
20 |   display_epoch: 100
21 |   n_epoch: 100000
22 |   start_lr: 0.0001
23 | train_paths:
24 |   - train_paths.raw 
25 | test_paths:
26 |   - test_paths.raw 
27 | 


--------------------------------------------------------------------------------
/examples/train_input/restart.yaml:
--------------------------------------------------------------------------------
 1 | model_args:
 2 |   hidden_sizes: [100, 100, 100] 
 3 |   output_scale: 100
 4 |   use_resnet: true
 5 |   actv_fn: gelu
 6 | data_args: 
 7 |   batch_size: 1
 8 |   group_batch: 16
 9 |   e_name: l_e_delta
10 |   d_name: [dm_eig]
11 | preprocess_args:
12 |   preshift: false
13 |   prescale: false
14 |   prefit_ridge: 1e0
15 |   prefit_trainable: false
16 | train_args: 
17 |   ckpt_file: model.pth
18 |   decay_rate: 0.5
19 |   decay_steps: 1000
20 |   display_epoch: 100
21 |   n_epoch: 5000
22 |   start_lr: 0.0001
23 | train_paths:
24 |   - train_paths.raw 
25 | test_paths:
26 |   - test_paths.raw 
27 | 


--------------------------------------------------------------------------------
/examples/water_cluster/.gitignore:
--------------------------------------------------------------------------------
1 | iter.*
2 | share
3 | log.*
4 | err.*
5 | RECORD
6 | PID
7 | test_results
8 | 


--------------------------------------------------------------------------------
/examples/water_cluster/README.md:
--------------------------------------------------------------------------------
 1 | # Example of water cluster
 2 | 
 3 | We provide here a detailed example on generating a DeePHF or DeePKS functional for water clusters, and demonstrate its generalizability with a test on proton transfer of a water hexamer ring.
 4 | 
 5 | Here we take `args.yaml` as the configuration file. The iteration can be run directly by execute the [`./run.sh`](./run.sh) file, which contains the following lines:
 6 | ```bash
 7 | nohup python -u -m deepks iterate args.yaml >> log.iter 2> err.iter &
 8 | echo $! > PID
 9 | ```
10 | that runs the iterative learning procedure in background and record its PID in the designated file.
11 | Note that we use `python -u -m deepks` to turn off python's output buffer. You can also use `deepks` or `dks` directly if you have installed it properly. 
12 | 
13 | Here we are using Slurm to schedule jobs. If Slurm is not available, please execute [`./run_shell.sh`](./run_shell.sh) to run on local machine. In the following section we provide a work through on how to write the arguments for deepks input in the [`args.yaml`](./args.yaml). You can also take a look at it for explanation on each specific parameters.
14 | 
15 | ## System preparation
16 | 
17 | We use randomly generated water monomers, dimers and trimers as training datasets. Each dataset contains 100 near equilibrium configurations. We also include 50 tetramers as a validation dataset. We use energy and force as labels. The reference values are given by CCSD calculation with cc-pVDZ basis. The system configurations and corresponding labels are grouped into different folders by the number of atoms, follow the convention described in [another example](../water_single/README.md). Note that the default length unit in deepks is Bohr. The systems we provided here are in Angstrom, so we add a `unit.raw` file containing "Angstrom" in each system folder to specify the unit different from default. The path to the folders can be specified in the config file as follows:
18 | ```yaml
19 | systems_train: # can also be files that containing system paths
20 |   - ./systems/train.n[1-3]
21 | systems_test: # if empty, use the last system of training set
22 |   - ./systems/valid.n4 
23 | ```
24 | 
25 | ## Initialization (DeePHF model)
26 | 
27 | As a first step, we need to train an energy model as the starting point of the iterative learning procedure. This consists of two steps. First, we solve the systems using the baseline method such as HF or PBE and dump descriptors needed for training the energy model. Second, we conduct the training from scratch using the previously dumped descriptors. If there is already an existing model, this step can be skipped, by provide the path of the model to the `init_model` key.
28 | 
29 | The energy model generated in this step is also a ready-to-use DeePHF model, saved at `iter.init/01.train/model.pth`. If self-consistency is not needed, the rest iteration steps can be ignored. We do not use forces as labels when training the energy model in this example.
30 | 
31 | The parameters of the init SCF calculation is specified under the `init_scf` key. The same set of parameters is also accepted as a standalone file by the `deepks scf` command when running SCF calculations directly.  We use cc-pVDZ as the calculation basis. The required fields to be dumped are `dm_eig` for descriptors and `l_e_delta` for reference correction energies as labels. In addition, we also include `e_tot` for total energy, `conv` for a record of convergence.
32 | ```yaml
33 | dump_fields: [dm_eig, l_e_delta, conv, e_tot]
34 | ```
35 | Additional parameters for molecule and SCF calculation can also be provided to `mol_args` and `scf_args` keys, and will be directly passed to corresponding interfaces in PySCF.
36 | 
37 | The parameters of the init training is specified under the `init_train` key. Similarly, the parameters can also be passed to `deepks train` command as a standalone file. In `model_args`, we set the construction of the neural network model with three hidden layers and 100 neurons per layer, using GELU activation function and skip connections. We also scale the output correction energies by a factor of 100 so that it is of order one and easier to learn. In `preprocess_args`, the descriptors are set to be preprocessed to have zero mean on the training set. A prefitted ridge regression with penalty strength 10 is also added to the model to speed up training. We set in `data_args` the batch size to be 16 and in `train_args` the total number of training epochs to be 50000. The learning rate starts at 3e-4 and decays by a factor of 0.96 for every 500 steps.
38 | 
39 | ## Iterative learning (DeePKS model)
40 | 
41 | For self-consistency, we take the model acquired in last step and perform several additional iterations of SCF calculation and NN training. The number of iterations is set in the `n_iter` key to be 10. If it is set to 0, no iteration will be performed, which gives the DeePHF model. In the iterative learning procedure, we also include forces as labels to improve accuracy.
42 | 
43 | The SCF parameters are provided in the `scf_input` key, following the same rules as the `init_scf` key. In order to use forces as labels, we added additional `grad_vx` for the gradients of descriptors and `l_f_delta` for reference correction forces. `f_tot` is also included for the total force results.
44 | ```yaml
45 | dump_fields: [conv, e_tot, dm_eig, l_e_delta, f_tot, grad_vx, l_f_delta]
46 | ```
47 | Due to the complexity of the neural network functional, we use looser (but still accurate enough) convergence criteria in `scf_args`, with `conv_tol` to be 1e-6.
48 | 
49 | The training parameters are provided in the `train_input` key, similar to `init_train`. But since we are restarting from the existing model, no `model_args` is needed, and the preprocessing procedure can be turned off. In addition, we add `extra_label: true` in `data_args` and `force_factor: 1` in `train_args` to enable using forces in training. The total number of training epochs is also reduced to 5000. The learning rate starts as 1e-4 and decays by a factor of 0.5 for every 1000 steps.
50 | 
51 | ## Machine settings
52 | 
53 | How the SCF and training tasks are executed is specified in `scf_machine` and `train_machine`, respectively. Currently, both the initial and the following iterations share the same machine settings. In this example, we run our tasks on local computing cluster with Slurm as the job scheduler. The platform to run the tasks is specified under the `dispatcher` key, and the computing resources assigned to each task is specified under `resources`. The setting of this part differs on every computing platform. We provide here our `training_machine` settings as an example:
54 | ```yaml
55 | dispatcher: 
56 |   context: local
57 |   batch: slurm # set to "shell" to run on local machine
58 |   remote_profile: null # unnecessary in local context
59 | resources:
60 |   time_limit: '24:00:00'
61 |   cpus_per_task: 4
62 |   numb_gpu: 1
63 |   mem_limit: 8 # gigabyte
64 | python: "python" # use python in path
65 | ```
66 | where we assign four CPU cores and one GPU to the training task, and set its time limit to be 24 hours and memory limit to be 8GB. The detailed settings available for `dispatcher` and `resources` can be found in the document of DP-GEN software, with a slightly different interface.
67 | 
68 | In case there's no Slurm scheduler system, DeePKS-kit can also be run on a local machine with vanilla shell scripts, simply by setting `batch: shell`. Please check [`shell.yaml`](./shell.yaml) for an example. In that case, `resources` will be ignored and all available resources on the machine will be used. Support for more scheduler systems will also be implemented in the future.
69 | 
70 | ## Testing the model
71 | 
72 | During each iteration of the learning procedure, a brief summary on the accuracy of the SCF calculation can be found in `iter.xx/00.scf/log.data`. Average energy and force (if applicable) errors are shown for both training and validation dataset. The results of the SCF calculations is also stored in `iter.xx/00.scf/data_train` and `iter.xx/00.scf/data_test` grouped by training and testing systems.
73 | 
74 | After we finished our 10 iterations, the resulted DeePKS model can be found at `iter.09/01.train/model.pth`. The model can be used in either a python script creating the extended PySCF class, or directly the `deepks scf` command. As a testing example, we run the SCF calculation using the learned DeePKS model on the simultaneous six proton transfer path of a water hexamer ring. 
75 | The command can be found in [`test.sh`](./test.sh).
76 | The results of each configuration during the proton transfer are grouped in the `test_result` folder. 
77 | 
78 | We can see that all the predicted energy falls within the chemical accuracy range of the reference value given by the CCSD calculation. We note that none of the training dataset includes dissociated configurations in the proton transfer case. The DeePKS model trained on up to three water molecules exhibits good transferability, even in the bond breaking case.


--------------------------------------------------------------------------------
/examples/water_cluster/args.yaml:
--------------------------------------------------------------------------------
  1 | # all arguments are flatten into this file
  2 | # they can also be splitted into separate files and referenced here
  3 | n_iter: 10
  4 | 
  5 | # training and testing systems
  6 | systems_train: # can also be files that containing system paths
  7 |   - ./systems/train.n[1-3]
  8 | 
  9 | systems_test: # if empty, use the last system of training set
 10 |   - ./systems/valid.n4 
 11 |   
 12 | # directory setting
 13 | workdir: "."
 14 | share_folder: "share" # folder that stores all other settings
 15 | 
 16 | # scf settings
 17 | scf_input: # can also be specified by a separete file
 18 |   basis: ccpvdz
 19 |   # this is for force training
 20 |   # the following properties will be dumped in data folder
 21 |   # please refer to https://arxiv.org/abs/2012.14615 for detailed explaination of each fields
 22 |   dump_fields: [atom, e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta]
 23 |   verbose: 1
 24 |   # parameters that will be passed directly to pyscf Mol class
 25 |   mol_args:
 26 |     incore_anyway: True
 27 |   # parameters that will be passed directly to pyscf SCF class
 28 |   scf_args:
 29 |     conv_tol: 1e-6
 30 |     conv_tol_grad: 3e-2
 31 |     level_shift: 0.1
 32 |     diis_space: 20
 33 |     conv_check: false # pyscf conv_check has a bug
 34 | 
 35 | scf_machine: 
 36 |   # # of systems that will be in one task, default is 1
 37 |   # task corresponds to a set of commands, and is the smallest unit to be tracked
 38 |   sub_size: 1 
 39 |   # 2 tasks will be gathered into one group and submitted together
 40 |   # group correspond to a job submitted to schedule system
 41 |   group_size: 2 
 42 |   # if larger than 1, run n tasks parallelly in one group (one job)
 43 |   ingroup_parallel: 1 
 44 |   # the parameters determining the machine settings that the jobs are running on
 45 |   dispatcher: 
 46 |     # "local" to run on local machine, or "ssh" to run on a remote machine
 47 |     context: local 
 48 |     # "slurm" to use slurm scheduler system, or "shell" to just use shell
 49 |     batch: slurm 
 50 |     # only needed when using "ssh" in context
 51 |     # pass a dict like {username: USERNAME, password: PASSWORD, work_path: /path/to/tmp/folder}
 52 |     remote_profile: null 
 53 |   # the parameters determining the resources allocated for each job (group of tasks)
 54 |   # only needed when batch is set to "slurm"
 55 |   # for shell users, will automatically use all resources available
 56 |   resources:
 57 |     # only set to larger than 1 if parallel in multiple nodes with `ingroup_parallel`
 58 |     # otherwise please keep to 1 since pyscf does not support mpi and can only run on a single node
 59 |     numb_node: 1 
 60 |     time_limit: '24:00:00'
 61 |     cpus_per_task: 8
 62 |     mem_limit: 8 #GB
 63 |     # environment variables
 64 |     envs:
 65 |       PYSCF_MAX_MEMORY: 8000 #MB, increase from default 4G to 8G to match the mem_limit above
 66 |   # resources for each sub task in jobs (groups of tasks)
 67 |   # only needed when ingroup_parallel is larger than 1 
 68 |   # the resources are reallocated between parallel tasks
 69 |   sub_res: 
 70 |     cpus_per_task: 8
 71 |   python: "python" # use python in path
 72 | 
 73 | # training settings
 74 | train_input:
 75 |   # model_args is ignored, since this is used as restart
 76 |   # see init_train for potential model_args
 77 |   data_args: 
 78 |     # training batch size, 16 is recommended
 79 |     batch_size: 16
 80 |     # if larger than 1, n batch will be grouped together to form a larger one
 81 |     # final batch size would be group_bath * batch_size
 82 |     # only needed when a lot of systems have only one datapoint hence the batch size can only be 1
 83 |     group_batch: 1
 84 |     # if set to true, will try to find force labels and use them in training
 85 |     extra_label: true
 86 |     # if set to true, will read the convergence data from conv_name 
 87 |     # and only use converged datapoints to train
 88 |     conv_filter: true
 89 |     conv_name: conv
 90 |   # to speed up training, deepks support first normalize the data (preshift and prescale)
 91 |   # and do a linear regression on the whole training set as prefitting
 92 |   preprocess_args:
 93 |     preshift: false # restarting model already shifted. Will not recompute shift value
 94 |     prescale: false # same as above
 95 |     # prefitting is by default enabled
 96 |     prefit_ridge: 1e1 # the ridge factor used in linear regression
 97 |     prefit_trainable: false # make the linear regression fixed during the training
 98 |   train_args: 
 99 |     # the start learning rate, will decay later
100 |     start_lr: 0.0001
101 |     # lr will decay a factor of `decay_rate` every `decay_steps` epoches
102 |     decay_rate: 0.5
103 |     decay_steps: 1000
104 |     # show training results every n epoch
105 |     display_epoch: 100
106 |     # the prefactor multiplied infront of the force part of the loss
107 |     force_factor: 1
108 |     # total number of epoch needed in training
109 |     n_epoch: 5000
110 | 
111 | train_machine: 
112 |   # for training, no tasks or groups are needed since there's only one task
113 |   # the dispatcher settings are same as above
114 |   dispatcher: 
115 |     context: local
116 |     batch: slurm
117 |     remote_profile: null # use lazy local
118 |   # resources settings are also same as above
119 |   resources:
120 |     time_limit: '24:00:00'
121 |     cpus_per_task: 4
122 |     # using gpu in training, current only support 1
123 |     numb_gpu: 1
124 |     mem_limit: 8 #GB
125 |   python: "python" # use python in path
126 | 
127 | # init settings
128 | init_model: false # do not use existing model in share_folder/init/model.pth
129 | 
130 | # the first scf iteration, needed if init_model is false
131 | # possible settings are same as scf_input
132 | init_scf: 
133 |   basis: ccpvdz
134 |   dump_fields: [atom, e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta]
135 |   verbose: 1
136 |   mol_args:
137 |     incore_anyway: True
138 |   scf_args:
139 |     conv_tol: 1e-8
140 |     conv_check: false # pyscf conv_check has a bug
141 | 
142 | # the first scf iteration, needed if init_model is false
143 | # most settings are same as scf_input but model_args will be specified here
144 | init_train: 
145 |   # whether to fit element-wise energy constant from the training data
146 |   # will require `dump_fields` contain `atom` if set to true
147 |   fit_elem: false # this is the default
148 |   # necessary as this is init training
149 |   model_args: 
150 |     # the number of *hidden* neurons
151 |     # note the first (n_descriptor) and last (1) layer is not included here
152 |     hidden_sizes: [100, 100, 100]
153 |     # the output will be devided by 100 before comparing with labels, to improve training
154 |     output_scale: 100
155 |     # use skip connection between layers if the sizes are same
156 |     use_resnet: true
157 |     # gelu generally performs better than others
158 |     actv_fn: gelu
159 |     # whether to use a predefined embedding function 
160 |     # to further symmetrize the eigenvalues as descriptors
161 |     # add embedding can make the energy surface smooth, hence improve convergence
162 |     # but may slightly reduce the accuracy (especially in generalization)
163 |     # for water we do not use it, if you encounter convergence problem, set it to
164 |     # embedding: thermal
165 |     embedding: null
166 |     # if `fit_elem` is true, set this will use user defined 
167 |     # element energy constant, instead of fitting from data.
168 |     # can be an absolute path to the file, or a length 2 list
169 |     # containing element charges and constants, like
170 |     # [[1, 8], [-0.08, -0.04]]
171 |     elem_table: null
172 |   # the rest are the same as abpve
173 |   data_args: 
174 |     batch_size: 16
175 |     group_batch: 1
176 |   preprocess_args:
177 |     preshift: true # init model will shift the input descriptors to mean zero
178 |     prescale: false
179 |     prefit_ridge: 1e1
180 |     prefit_trainable: false
181 |   # following are suggested parameters for initial training
182 |   # note in the deepks-kit paper the training curve shown use a different set of parameters
183 |   # the paper parameters take an unnecessary length of time and is no longer suggested
184 |   train_args: 
185 |     decay_rate: 0.95 # 0.96 in paper example training curve
186 |     decay_steps: 300 # 500 in paper example training curve
187 |     display_epoch: 100
188 |     n_epoch: 15000 # 50000 in paper example training curve
189 |     start_lr: 0.0003
190 | 
191 | # other settings
192 | cleanup: false
193 | strict: true
194 | 


--------------------------------------------------------------------------------
/examples/water_cluster/run.sh:
--------------------------------------------------------------------------------
1 | nohup python -u -m deepks iterate args.yaml >> log.iter 2> err.iter &
2 | echo $! > PID
3 | 


--------------------------------------------------------------------------------
/examples/water_cluster/run_shell.sh:
--------------------------------------------------------------------------------
1 | nohup python -u -m deepks iterate args.yaml shell.yaml >> log.iter 2> err.iter &
2 | echo $! > PID
3 | 


--------------------------------------------------------------------------------
/examples/water_cluster/shell.yaml:
--------------------------------------------------------------------------------
 1 | # to use this file, simply add its name as another argument 
 2 | # in the command line after the main args.yaml
 3 | # for example, `deepks iterate args.yaml shell.yaml`
 4 | # this overwrite the settings by those specified in this file 
 5 | 
 6 | scf_machine: 
 7 |   # every system will be run as a separate command (a task)
 8 |   sub_size: 1 
 9 |   # 2 tasks will be gathered into one group and submitted together as a shell script
10 |   # all shell scirpt will be executed at same time 
11 |   # hence in parallel and share the whole machine's resources
12 |   # you may want to set this as a large number
13 |   # because the number of tasks run at same time would be nsystems / (sub_size * group_size)
14 |   group_size: 2 
15 |   dispatcher: 
16 |     context: local
17 |     batch: shell # set to shell to run on local machine
18 |     remote_profile: null # not needed in local case
19 |   # resources are no longer needed, other than the envs can still be set here
20 |   resources:
21 |     envs:
22 |       PYSCF_MAX_MEMORY: 8000 # increase from 4G to 8G
23 |   python: "python" # use python in path
24 | 
25 | train_machine: 
26 |   dispatcher: 
27 |     context: local
28 |     batch: shell # same as above, use shell to run on local machine
29 |     remote_profile: null # use lazy local
30 |   python: "python" # use python in path
31 |   # resources are no longer needed, and the task will use gpu automatically if there is one
32 | 
33 | 


--------------------------------------------------------------------------------
/examples/water_cluster/systems/test.n6/atom.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/test.n6/atom.npy


--------------------------------------------------------------------------------
/examples/water_cluster/systems/test.n6/energy.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/test.n6/energy.npy


--------------------------------------------------------------------------------
/examples/water_cluster/systems/test.n6/force.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/test.n6/force.npy


--------------------------------------------------------------------------------
/examples/water_cluster/systems/test.n6/unit.raw:
--------------------------------------------------------------------------------
1 | Angstrom
2 | 


--------------------------------------------------------------------------------
/examples/water_cluster/systems/train.n1/atom.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n1/atom.npy


--------------------------------------------------------------------------------
/examples/water_cluster/systems/train.n1/energy.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n1/energy.npy


--------------------------------------------------------------------------------
/examples/water_cluster/systems/train.n1/force.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n1/force.npy


--------------------------------------------------------------------------------
/examples/water_cluster/systems/train.n1/unit.raw:
--------------------------------------------------------------------------------
1 | Angstrom
2 | 


--------------------------------------------------------------------------------
/examples/water_cluster/systems/train.n2/coord.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n2/coord.npy


--------------------------------------------------------------------------------
/examples/water_cluster/systems/train.n2/energy.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n2/energy.npy


--------------------------------------------------------------------------------
/examples/water_cluster/systems/train.n2/force.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n2/force.npy


--------------------------------------------------------------------------------
/examples/water_cluster/systems/train.n2/type.raw:
--------------------------------------------------------------------------------
1 | O
2 | H
3 | H
4 | O
5 | H
6 | H
7 | 


--------------------------------------------------------------------------------
/examples/water_cluster/systems/train.n2/unit.raw:
--------------------------------------------------------------------------------
1 | Angstrom
2 | 


--------------------------------------------------------------------------------
/examples/water_cluster/systems/train.n3/coord.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n3/coord.npy


--------------------------------------------------------------------------------
/examples/water_cluster/systems/train.n3/energy.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n3/energy.npy


--------------------------------------------------------------------------------
/examples/water_cluster/systems/train.n3/force.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n3/force.npy


--------------------------------------------------------------------------------
/examples/water_cluster/systems/train.n3/type.raw:
--------------------------------------------------------------------------------
 1 | O
 2 | H
 3 | H
 4 | O
 5 | H
 6 | H
 7 | O
 8 | H
 9 | H
10 | 


--------------------------------------------------------------------------------
/examples/water_cluster/systems/train.n3/unit.raw:
--------------------------------------------------------------------------------
1 | Angstrom
2 | 


--------------------------------------------------------------------------------
/examples/water_cluster/systems/valid.n4/coord.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/valid.n4/coord.npy


--------------------------------------------------------------------------------
/examples/water_cluster/systems/valid.n4/energy.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/valid.n4/energy.npy


--------------------------------------------------------------------------------
/examples/water_cluster/systems/valid.n4/force.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/valid.n4/force.npy


--------------------------------------------------------------------------------
/examples/water_cluster/systems/valid.n4/type.raw:
--------------------------------------------------------------------------------
 1 | O
 2 | H
 3 | H
 4 | O
 5 | H
 6 | H
 7 | O
 8 | H
 9 | H
10 | O
11 | H
12 | H
13 | 


--------------------------------------------------------------------------------
/examples/water_cluster/systems/valid.n4/unit.raw:
--------------------------------------------------------------------------------
1 | Angstrom
2 | 


--------------------------------------------------------------------------------
/examples/water_cluster/test.sh:
--------------------------------------------------------------------------------
1 | deepks scf share/scf_input.yaml -m iter.09/01.train/model.pth -s systems/test.n6 -F e_tot f_tot conv rdm -d test_results -G
2 | 


--------------------------------------------------------------------------------
/examples/water_single/.gitignore:
--------------------------------------------------------------------------------
1 | */iter.*
2 | */share
3 | */log.*
4 | */err.*
5 | */RECORD
6 | */PID
7 | 


--------------------------------------------------------------------------------
/examples/water_single/README.md:
--------------------------------------------------------------------------------
 1 | # Example for water
 2 | 
 3 | This is an example on how to use `deepks` library to train a energy functional for water molecules. The sub-folders are grouped as following:
 4 | 
 5 | - `systems` contains all data that has been prepared in `deepks` format.
 6 | - `init` contains input files used to train a (perturbative) energy model (DeePHF).
 7 | - `iter` contains input files used to train a self consistent model iteratively (DeePKS).
 8 | - `withdens` contains input files used to train a SCF model with density labels.
 9 | 
10 | 
11 | ## Prepare data
12 | 
13 | To prepare data, please first note that `deepks` use the atomic units by default but will switch to Angstrom(Å) as length unit when using xyz files as systems. 
14 | 
15 | Property | Unit
16 | ---	     | :---:
17 | Length	 | Bohr (Å if from xyz)
18 | Energy	 | $E_h$ (Hartree)
19 | Force	   | $E_h$/Bohr ($E_h$/Å if from xyz)
20 | 
21 | `deepks` accepts data in three formats. 
22 | 
23 | - **single `xyz` files** with properties saved as separate files sharing same base name.
24 |   e.g. for `0000.xyz`, its energy can be saved as `0000.energy.npy`, and forces as `0000.force.npy`, density matrix as `0000.dm.npy` in the same folder.
25 | - **grouped into folders** with same number of atoms. 
26 |   Such folder should contain an `atom.npy` that has shape `n_frames x n_atoms x 4` and the four elements correspond to the nuclear charge of the atom and its three spacial coordinates.
27 |   Other properties can be provided as separate files like `energy.npy` and `force.npy`.
28 | - **grouped with explicit `type.raw` file** with all frames have same type of elements.
29 |   This is similar as above, only that `atom.npy` is substituted by `coord.npy` containing pure special coordinates and a `type.raw` containing the element type for all the frames of this system. This format is very similar to the one used in DeePMD-Kit, but the `type.raw` must contains real element types here.
30 | 
31 | Note the property files are optional. For pure SCF calculation, they are not needed. But in order to train a model, they are needed as labels.
32 | 
33 | The two grouped data formats can be converted from the xyz format by using [this script](../../scripts/convert_xyz.py). As an example, the data in `systems` folder is created using the following command.
34 | ```
35 | python ../../scripts/convert_xyz.py some/path/to/all/*.xyz -d systems -G 300 -P group
36 | ```
37 | 
38 | 
39 | ## Train an energy model
40 | 
41 | To train a perturbative energy model is a pure machine learning task. Please see [DeePHF paper](https://arxiv.org/pdf/2005.00169.pdf) for a detailed explanation of the construction of the descriptors. Here we provide two sub-commands. `deepks scf` can do the Hartree-Fock calculation and save the descriptor (`dm_eig`) as well as labels (`l_e_delta` for energy and `l_f_delta` for force) automatically. `deepks train` can use the dumped descriptors and labels to train a neural network model.
42 | 
43 | To further simplify the procedure, we can combine the two steps together and use `deepks iterate` to run them sequentially. The required input files and execution scripts can be found in `init` folder. There `machines.yaml` specifies the resources needed for the calculations. `params.yaml` specifies the parameters needed for the Hartree-Fock calculation and neural network training. `systems.yaml` specifies the data needed for training and testing. Note the name `init` is because it also serves as an initialization step of the self consistent training described below. For same reason, the `niter` attribute in `params.yaml` is set to 0, to avoid iterative training.
44 | 
45 | As shown in `run.sh`, the input files can be loaded and run by 
46 | ```
47 | deepks iterate machines.yaml params.yaml systems.yaml
48 | ```
49 | where `deepks` is a shortcut for `python -m deepks`. Or one can directly use `./run.sh` to run it in background. Make sure you are in `init` folder before you run the command.
50 | 
51 | 
52 | ## Train a self consistent model
53 | 
54 | To train a self consistent model we follow the iterative approach described in [DeePKS paper](https://arxiv.org/pdf/2008.00167.pdf). We provide `deepks iterate` as a tool to do the iteration automatically. Same as above, the example input file and execution scripts can be found in `iter` folder. Note here instead of splitting the input file into three, we combined all input settings in one `args.yaml` file, to show that `deepks iterate` can take variable number of input files. The file provided at last will have highest priority.
55 | 
56 | For each iteration, there will be four steps using four corresponding tools provided by `deepks`. Each step would correspond to a row in `RECORD` file, used to indicate which steps have finished. It would have three numbers. The first one correspond to the iteration number. The second one correspond to the sub-folder in the iteration and the third correspond to step in that folder.
57 | 
58 | - `deepks scf` (`X 0 0`): do the SCF calculation with given model and save the results
59 | - `deepks stats` (`X 0 1`): check the SCF result and print convergence and accuracy
60 | - `deepks train` (`X 1 0`): train a new model using the old one as starting point
61 | - `deepks test` (`X 1 1`): test the model on all data to see the pure fitting error
62 | 
63 | To run the iteration, again, use `./run.sh` or the following command
64 | ```
65 | deepks iterate args.yaml
66 | ```
67 | Make sure you are in `iter` folder before you run the command.
68 | 
69 | One can check `iter.*/00.scf/log.data` for stats of SCF results, `iter*/01.train/log.train` for training curve and `iter*/01.train/log.test` for model prediction of $E_\delta$ (e_delta).
70 | 
71 | 
72 | ## Train a self consistent model with density labels
73 | 
74 | We provide in `withdens` folder a set of inputs of using density labels during the iterative training (as additional penalty terms in the Hamiltonian). We again follow the [DeePKS paper](https://arxiv.org/pdf/2008.00167.pdf) to add first a randomized penalty using Coulomb loss for 5 iterations and then remove it and relax for another 5 iterations.
75 | 
76 | Most of the inputs are same as the normal iterative training case described in the last section, which we put in the `base.yaml` Only that we are overwritten `scf_input` in `penalty.yaml` to add the penalties. Also we change the number of iteration `n_iter` in both `penalty.yaml` and `relax.yaml`.
77 | 
78 | `pipe.sh` shows how we combine the different inputs together. A simplified version is as follows:
79 | ```
80 | deepks iterate base.yaml penalty.yaml && deepks iterate base.yaml relax.yaml
81 | ```
82 | The `iterate` command can take multiple input files and the latter ones would overwrite the former ones.
83 | 
84 | Again, running `./run.sh` in the `withdens` folder would run the commands in the background. You can check the results in `iter.*` folders like above.


--------------------------------------------------------------------------------
/examples/water_single/init/machines.yaml:
--------------------------------------------------------------------------------
 1 | # this is only part of input settings. 
 2 | # should be used together with systems.yaml and params.yaml
 3 | 
 4 | scf_machine: 
 5 |   # every system will be run as a separate command (a task)
 6 |   sub_size: 1 
 7 |   # 4 tasks will be gathered into one group and submitted together as a shell script
 8 |   group_size: 4
 9 |   dispatcher: 
10 |     context: local
11 |     batch: shell # set to shell to run on local machine, you can also use `slurm`
12 |     remote_profile: null # not needed in local case
13 |   # resources are no longer needed, other than the envs can still be set here
14 |   resources:
15 |     envs:
16 |       PYSCF_MAX_MEMORY: 8000 # increase from 4G to 8G
17 |   python: "python" # use python in path
18 | 
19 | train_machine: 
20 |   dispatcher: 
21 |     context: local
22 |     batch: shell # same as above, use shell to run on local machine
23 |     remote_profile: null # use lazy local
24 |   python: "python" # use python in path
25 |   # resources are no longer needed, and the task will use gpu automatically if there is one
26 | 
27 | # other settings (these are default, can be omitted)
28 | cleanup: false # whether to delete slurm and err files
29 | strict: true # do not allow undefined machine parameters
30 | 


--------------------------------------------------------------------------------
/examples/water_single/init/params.yaml:
--------------------------------------------------------------------------------
 1 | # this is only part of input settings. 
 2 | # should be used together with systems.yaml and machines.yaml
 3 | 
 4 | # number of iterations to do, can be set to zero for DeePHF training
 5 | n_iter: 0
 6 |   
 7 | # directory setting (these are default choices, can be omitted)
 8 | workdir: "."
 9 | share_folder: "share" # folder that stores all other settings
10 | 
11 | # scf settings, set to false when n_iter = 0 to skip checking
12 | scf_input: false
13 | 
14 | # train settings, set to false when n_iter = 0 to skip checking
15 | train_input: false
16 | 
17 | # init settings, these are for DeePHF task
18 | init_model: false # do not use existing model to restart from
19 | 
20 | init_scf: # parameters for SCF calculation
21 |   basis: ccpvdz
22 |   # this is for pure energy training
23 |   dump_fields: 
24 |     - e_base # Hartree Fock energy
25 |     - dm_eig # Descriptors
26 |     - conv # whether converged or not
27 |     - l_e_delta # delta energy betweem e_base and reference, label
28 |   verbose: 1
29 |   mol_args: # args to be passed to pyscf.gto.Mole.build
30 |     incore_anyway: True
31 |   scf_args: # args to be passed to pyscf.scf.RHF.run
32 |     conv_tol: 1e-8
33 |     conv_check: false # pyscf conv_check has a bug
34 | 
35 | init_train: # parameters for nn training
36 |   model_args:
37 |     hidden_sizes: [100, 100, 100] # neurons in hidden layers
38 |     output_scale: 100 # the output will be divided by 100 before compare with label
39 |     use_resnet: true # skip connection
40 |     actv_fn: mygelu # same as gelu, support force calculation
41 |   data_args: 
42 |     batch_size: 16
43 |     group_batch: 1 # can collect multiple system in one batch
44 |   preprocess_args:
45 |     preshift: true # shift the descriptor by its mean
46 |     prescale: false # scale the descriptor by its variance (can cause convergence problem)
47 |     prefit_ridge: 1e1 # do a ridge regression as prefitting
48 |     prefit_trainable: false
49 |   train_args: 
50 |     decay_rate: 0.96 # learning rate decay factor
51 |     decay_steps: 500 # decay the learning rate every this steps
52 |     display_epoch: 100
53 |     n_epoch: 10000
54 |     start_lr: 0.0003
55 | 


--------------------------------------------------------------------------------
/examples/water_single/init/run.sh:
--------------------------------------------------------------------------------
1 | nohup python -u -m deepks iterate machines.yaml params.yaml systems.yaml >> log.iter 2> err.iter & 
2 | echo $! > PID


--------------------------------------------------------------------------------
/examples/water_single/init/systems.yaml:
--------------------------------------------------------------------------------
 1 | # this is only part of input settings. 
 2 | # should be used together with params.yaml and machines.yaml
 3 | 
 4 | # training and testing systems
 5 | systems_train: # can also be files that containing system paths
 6 |   - ../systems/group.0[0-2] # support glob
 7 | 
 8 | systems_test: # if empty, use the last system of training set
 9 |   - ../systems/group.03
10 | 


--------------------------------------------------------------------------------
/examples/water_single/iter/args.yaml:
--------------------------------------------------------------------------------
  1 | # all arguments are flatten into this file
  2 | # they can also be splitted into separate files and referenced here
  3 | n_iter: 5
  4 | 
  5 | # training and testing systems
  6 | systems_train: # can also be files that containing system paths
  7 |   - ../systems/group.0[0-2] # support glob
  8 | 
  9 | systems_test: # if empty, use the last system of training set
 10 |   - ../systems/group.03
 11 |   
 12 | # directory setting
 13 | workdir: "."
 14 | share_folder: "share" # folder that stores all other settings
 15 | 
 16 | # scf settings
 17 | scf_input: # can also be specified by a separete file
 18 |   basis: ccpvdz
 19 |   # this is for force training
 20 |   dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta]
 21 |   verbose: 1
 22 |   mol_args:
 23 |     incore_anyway: True
 24 |   scf_args:
 25 |     conv_tol: 1e-6
 26 |     conv_tol_grad: 1e-2
 27 |     level_shift: 0.1
 28 |     diis_space: 20
 29 |     conv_check: false # pyscf conv_check has a bug
 30 | 
 31 | scf_machine: 
 32 |   # every system will be run as a separate command (a task)
 33 |   sub_size: 1 
 34 |   # 4 tasks will be gathered into one group and submitted together as a shell script
 35 |   group_size: 4
 36 |   dispatcher: 
 37 |     context: local
 38 |     batch: shell # set to shell to run on local machine
 39 |     remote_profile: null # not needed in local case
 40 |   # resources are no longer needed, other than the envs can still be set here
 41 |   resources:
 42 |     envs:
 43 |       PYSCF_MAX_MEMORY: 8000 # increase from 4G to 8G
 44 |   python: "python" # use python in path
 45 | 
 46 | # train settings
 47 | train_input:
 48 |   # model_args is ignored, since this is used as restart
 49 |   data_args: 
 50 |     batch_size: 16
 51 |     group_batch: 1
 52 |     extra_label: true
 53 |     conv_filter: true
 54 |     conv_name: conv
 55 |   preprocess_args:
 56 |     preshift: false # restarting model already shifted. Will not recompute shift value
 57 |     prescale: false # same as above
 58 |     prefit_ridge: 1e1
 59 |     prefit_trainable: false
 60 |   train_args: 
 61 |     decay_rate: 0.5
 62 |     decay_steps: 1000
 63 |     display_epoch: 100
 64 |     force_factor: 1
 65 |     n_epoch: 5000
 66 |     start_lr: 0.0001
 67 | 
 68 | train_machine: 
 69 |   dispatcher: 
 70 |     context: local
 71 |     batch: shell # same as above, use shell to run on local machine
 72 |     remote_profile: null # use lazy local
 73 |   python: "python" # use python in path
 74 |   # resources are no longer needed, and the task will use gpu automatically if there is one
 75 | 
 76 | # init settings
 77 | init_model: false # do not use existing model in share_folder/init/model.pth
 78 | 
 79 | init_scf: 
 80 |   basis: ccpvdz
 81 |   # this is for pure energy training
 82 |   dump_fields: [e_base, e_tot, dm_eig, conv, l_e_delta]
 83 |   verbose: 1
 84 |   mol_args:
 85 |     incore_anyway: True
 86 |   scf_args:
 87 |     conv_tol: 1e-8
 88 |     conv_check: false # pyscf conv_check has a bug
 89 | 
 90 | init_train: 
 91 |   model_args: # necessary as this is init training
 92 |     hidden_sizes: [100, 100, 100]
 93 |     output_scale: 100
 94 |     use_resnet: true
 95 |     actv_fn: gelu
 96 |   data_args: 
 97 |     batch_size: 16
 98 |     group_batch: 1
 99 |   preprocess_args:
100 |     preshift: true
101 |     prescale: false
102 |     prefit_ridge: 1e1
103 |     prefit_trainable: false
104 |   train_args: 
105 |     decay_rate: 0.95
106 |     decay_steps: 300
107 |     display_epoch: 100
108 |     n_epoch: 10000
109 |     start_lr: 0.0003
110 | 
111 | # other settings
112 | cleanup: false
113 | strict: true
114 | 


--------------------------------------------------------------------------------
/examples/water_single/iter/run.sh:
--------------------------------------------------------------------------------
1 | nohup python -u -m deepks iterate args.yaml >> log.iter 2> err.iter &
2 | echo $! > PID
3 | 


--------------------------------------------------------------------------------
/examples/water_single/systems/group.00/atom.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.00/atom.npy


--------------------------------------------------------------------------------
/examples/water_single/systems/group.00/dm.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.00/dm.npy


--------------------------------------------------------------------------------
/examples/water_single/systems/group.00/energy.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.00/energy.npy


--------------------------------------------------------------------------------
/examples/water_single/systems/group.00/force.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.00/force.npy


--------------------------------------------------------------------------------
/examples/water_single/systems/group.01/atom.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.01/atom.npy


--------------------------------------------------------------------------------
/examples/water_single/systems/group.01/dm.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.01/dm.npy


--------------------------------------------------------------------------------
/examples/water_single/systems/group.01/energy.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.01/energy.npy


--------------------------------------------------------------------------------
/examples/water_single/systems/group.01/force.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.01/force.npy


--------------------------------------------------------------------------------
/examples/water_single/systems/group.02/atom.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.02/atom.npy


--------------------------------------------------------------------------------
/examples/water_single/systems/group.02/dm.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.02/dm.npy


--------------------------------------------------------------------------------
/examples/water_single/systems/group.02/energy.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.02/energy.npy


--------------------------------------------------------------------------------
/examples/water_single/systems/group.02/force.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.02/force.npy


--------------------------------------------------------------------------------
/examples/water_single/systems/group.03/atom.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.03/atom.npy


--------------------------------------------------------------------------------
/examples/water_single/systems/group.03/dm.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.03/dm.npy


--------------------------------------------------------------------------------
/examples/water_single/systems/group.03/energy.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.03/energy.npy


--------------------------------------------------------------------------------
/examples/water_single/systems/group.03/force.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.03/force.npy


--------------------------------------------------------------------------------
/examples/water_single/withdens/base.yaml:
--------------------------------------------------------------------------------
  1 | # all arguments are flatten into this file
  2 | # they can also be splitted into separate files and referenced here
  3 | n_iter: 0 # use 0 as a placeholder
  4 | 
  5 | # training and testing systems
  6 | systems_train: # can also be files that containing system paths
  7 |   - ../systems/group.0[0-2] # support glob
  8 | 
  9 | systems_test: # if empty, use the last system of training set
 10 |   - ../systems/group.03
 11 |   
 12 | # directory setting
 13 | workdir: "."
 14 | share_folder: "share" # folder that stores all other settings
 15 | 
 16 | # scf settings
 17 | scf_input: # can also be specified by a separete file
 18 |   basis: ccpvdz
 19 |   # this is for force training
 20 |   dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta]
 21 |   verbose: 1
 22 |   mol_args:
 23 |     incore_anyway: True
 24 |   scf_args:
 25 |     conv_tol: 1e-6
 26 |     conv_tol_grad: 1e-2
 27 |     level_shift: 0.1
 28 |     diis_space: 20
 29 |     conv_check: false # pyscf conv_check has a bug
 30 | 
 31 | scf_machine: 
 32 |   # every system will be run as a separate command (a task)
 33 |   sub_size: 1 
 34 |   # 4 tasks will be gathered into one group and submitted together as a shell script
 35 |   group_size: 4
 36 |   dispatcher: 
 37 |     context: local
 38 |     batch: shell # set to shell to run on local machine, you can also use `slurm`
 39 |     remote_profile: null # not needed in local case
 40 |   # resources are no longer needed, other than the envs can still be set here
 41 |   resources:
 42 |     envs:
 43 |       PYSCF_MAX_MEMORY: 8000 # increase from 4G to 8G
 44 |   python: "python" # use python in path
 45 | 
 46 | # train settings
 47 | train_input:
 48 |   # model_args is ignored, since this is used as restart
 49 |   data_args: 
 50 |     batch_size: 16
 51 |     group_batch: 1
 52 |     extra_label: true
 53 |     conv_filter: true
 54 |     conv_name: conv
 55 |   preprocess_args:
 56 |     preshift: false # restarting model already shifted. Will not recompute shift value
 57 |     prescale: false # same as above
 58 |     prefit_ridge: 1e1
 59 |     prefit_trainable: false
 60 |   train_args: 
 61 |     decay_rate: 0.5
 62 |     decay_steps: 1000
 63 |     display_epoch: 100
 64 |     force_factor: 1
 65 |     n_epoch: 5000
 66 |     start_lr: 0.0001
 67 | 
 68 | train_machine: 
 69 |   dispatcher: 
 70 |     context: local
 71 |     batch: shell # same as above, use shell to run on local machine
 72 |     remote_profile: null # use lazy local
 73 |   python: "python" # use python in path
 74 |   # resources are no longer needed, and the task will use gpu automatically if there is one
 75 | 
 76 | # init settings
 77 | init_model: false # do not use existing model in share_folder/init/model.pth
 78 | 
 79 | init_scf: 
 80 |   basis: ccpvdz
 81 |   # this is for pure energy training
 82 |   dump_fields: [e_base, e_tot, dm_eig, conv, l_e_delta]
 83 |   verbose: 1
 84 |   mol_args:
 85 |     incore_anyway: True
 86 |   scf_args:
 87 |     conv_tol: 1e-8
 88 |     conv_check: false # pyscf conv_check has a bug
 89 | 
 90 | init_train: 
 91 |   model_args: # necessary as this is init training
 92 |     hidden_sizes: [100, 100, 100]
 93 |     output_scale: 100
 94 |     use_resnet: true
 95 |     actv_fn: mygelu
 96 |   data_args: 
 97 |     batch_size: 16
 98 |     group_batch: 1
 99 |   preprocess_args:
100 |     preshift: true
101 |     prescale: false
102 |     prefit_ridge: 1e1
103 |     prefit_trainable: false
104 |   train_args: 
105 |     decay_rate: 0.96
106 |     decay_steps: 500
107 |     display_epoch: 100
108 |     n_epoch: 15000
109 |     start_lr: 0.0003
110 | 
111 | # other settings
112 | cleanup: false
113 | strict: true
114 | 


--------------------------------------------------------------------------------
/examples/water_single/withdens/penalty.yaml:
--------------------------------------------------------------------------------
 1 | # overwriting the base config
 2 | n_iter: 5
 3 | 
 4 | # adding penalty
 5 | scf_input: # can also be specified by a separete file
 6 |   basis: ccpvdz
 7 |   # this is for force training
 8 |   dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta]
 9 |   verbose: 1
10 |   mol_args:
11 |     incore_anyway: True
12 |   scf_args:
13 |     conv_tol: 1e-6
14 |     conv_tol_grad: 1e-2
15 |     level_shift: 0.1
16 |     diis_space: 20
17 |     conv_check: false # pyscf conv_check has a bug
18 |   penalty_terms:
19 |     # Coulomb loss as penalty, random strength 
20 |     - type: coulomb
21 |       required_labels: dm # where the label is stored (sysfolder/dm.npy)
22 |       strength: 1 # can be larger, like 5 
23 |       random: true # actual strength vary between [0, strength]


--------------------------------------------------------------------------------
/examples/water_single/withdens/pipe.sh:
--------------------------------------------------------------------------------
1 | python -u -m deepks iterate base.yaml penalty.yaml >> log.iter 2> err.iter &&\
2 | python -u -m deepks iterate base.yaml relax.yaml >> log.iter 2> err.iter


--------------------------------------------------------------------------------
/examples/water_single/withdens/relax.yaml:
--------------------------------------------------------------------------------
1 | # overwriting the base config to run longer
2 | n_iter: 10


--------------------------------------------------------------------------------
/examples/water_single/withdens/run.sh:
--------------------------------------------------------------------------------
1 | nohup bash pipe.sh >/dev/null 2>&1 &
2 | echo $! > PID
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | paramiko
3 | ruamel.yaml
4 | torch
5 | pyscf


--------------------------------------------------------------------------------
/scripts/convert_xyz.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | from glob import glob
  4 | 
  5 | 
  6 | BOHR = 0.52917721092
  7 | ELEMENTS = ['X',  # Ghost
  8 |     'H' , 'He', 'Li', 'Be', 'B' , 'C' , 'N' , 'O' , 'F' , 'Ne',
  9 |     'Na', 'Mg', 'Al', 'Si', 'P' , 'S' , 'Cl', 'Ar', 'K' , 'Ca',
 10 |     'Sc', 'Ti', 'V' , 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn',
 11 |     'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y' , 'Zr',
 12 |     'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn',
 13 |     'Sb', 'Te', 'I' , 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd',
 14 |     'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb',
 15 |     'Lu', 'Hf', 'Ta', 'W' , 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg',
 16 |     'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th',
 17 |     'Pa', 'U' , 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm',
 18 |     'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds',
 19 |     'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og',
 20 | ]
 21 | CHARGES = dict(((x,i) for i,x in enumerate(ELEMENTS)))
 22 | 
 23 | 
 24 | def parse_xyz(filename):
 25 |     with open(filename) as fp:
 26 |         natom = int(fp.readline())
 27 |         comments = fp.readline().strip()
 28 |         atom_str = fp.readlines()
 29 |     atom_list = [a.split() for a in atom_str if a.strip()]
 30 |     elements = [a[0] for a in atom_list]
 31 |     coords = np.array([a[1:] for a in atom_list], dtype=float)
 32 |     return natom, comments, elements, coords
 33 | 
 34 | 
 35 | def parse_unit(rawunit):
 36 |     if isinstance(rawunit, str):
 37 |         try:
 38 |             unit = float(rawunit)
 39 |         except ValueError:
 40 |             if rawunit.upper().startswith(('B', 'AU')):
 41 |                 unit = BOHR
 42 |             else: #unit[:3].upper() == 'ANG':
 43 |                 unit = 1.
 44 |     else:
 45 |         unit = rawunit
 46 |     return unit
 47 | 
 48 | 
 49 | def load_array(file):
 50 |     ext = os.path.splitext(file)[-1]
 51 |     if "npy" in ext:
 52 |         return np.load(file)
 53 |     elif "npz" in ext:
 54 |         raise NotImplementedError
 55 |     else:
 56 |         try:
 57 |             arr = np.loadtxt(file)
 58 |         except ValueError:
 59 |             arr = np.loadtxt(file, dtype=str)
 60 |         return arr
 61 | 
 62 | 
 63 | def load_glob(pattern):
 64 |     [fn] = glob(pattern)
 65 |     return load_array(fn)
 66 | 
 67 | 
 68 | def load_system(xyz_file):
 69 |     base, ext = os.path.splitext(xyz_file)
 70 |     assert ext == '.xyz'
 71 |     natom, _, ele, coord = parse_xyz(xyz_file)
 72 |     try:
 73 |         energy = load_glob(f"{base}.energy*").reshape(1)
 74 |     except:
 75 |         energy = None
 76 |     try:
 77 |         force = load_glob(f"{base}.force*").reshape(natom, 3)
 78 |     except:
 79 |         force = None
 80 |     try:
 81 |         dm = load_glob(f"{base}.dm*")
 82 |         nao = np.sqrt(dm.size).astype(int)
 83 |         dm = dm.reshape(nao, nao)
 84 |     except:
 85 |         dm = None
 86 |     return ele, coord, energy, force, dm
 87 | 
 88 | 
 89 | def dump_systems(xyz_files, dump_dir, unit="Bohr", ext_type=False):
 90 |     print(f"saving to {dump_dir} ... ", end="", flush=True)
 91 |     os.makedirs(dump_dir, exist_ok=True)
 92 |     if not xyz_files:
 93 |         print("empty list! did nothing")
 94 |         return
 95 |     unit = parse_unit(unit)
 96 |     a_ele, a_coord, a_energy, a_force, a_dm = map(np.array,
 97 |         zip(*[load_system(fl) for fl in xyz_files]))
 98 |     a_coord /= unit
 99 |     if ext_type:
100 |         ele = a_ele[0]
101 |         assert all(e == ele for e in a_ele), "element type for each xyz file has to be the same"
102 |         np.savetxt(os.path.join(dump_dir, "type.raw"), ele, fmt="%s")
103 |         np.save(os.path.join(dump_dir, "coord.npy"), a_coord)
104 |     else:
105 |         a_chg = [[[CHARGES[e]] for e in ele] for ele in a_ele]
106 |         a_atom = np.concatenate([a_chg, a_coord], axis=-1)
107 |         np.save(os.path.join(dump_dir, "atom.npy"), a_atom)
108 |     if not all(ene is None for ene in a_energy):
109 |         assert not any(ele is None for ele in a_energy)
110 |         np.save(os.path.join(dump_dir, "energy.npy"), a_energy)
111 |     if not all(ff is None for ff in a_force):
112 |         assert not any(ff is None for ff in a_force)
113 |         a_force *= unit
114 |         np.save(os.path.join(dump_dir, "force.npy"), a_force)
115 |     if not all(dm is None for dm in a_dm):
116 |         assert not any(dm is None for dm in a_dm)
117 |         np.save(os.path.join(dump_dir, "dm.npy"), a_dm)
118 |     print(f"finished", flush=True)
119 |     return
120 | 
121 | 
122 | def main(xyz_files, dump_dir=".", group_size=-1, group_prefix="sys", unit="Bohr", ext_type=False):
123 |     if isinstance(xyz_files, str):
124 |         xyz_files = [xyz_files]
125 |     if group_size <= 0:
126 |         dump_systems(xyz_files, dump_dir, unit=unit, ext_type=ext_type)
127 |         return
128 |     ns = len(xyz_files)
129 |     ngroup = np.ceil(ns / group_size).astype(int)
130 |     nd = max(len(str(ngroup)), 2)
131 |     for i in range(ngroup):
132 |         dump_systems(xyz_files[i*group_size:(i+1)*group_size],
133 |                      os.path.join(dump_dir, f"{group_prefix}.{i:0>{nd}d}"),
134 |                      unit=unit, ext_type=ext_type)
135 |     return
136 | 
137 | 
138 | if __name__ == "__main__":
139 |     import argparse
140 |     parser = argparse.ArgumentParser(
141 |         description="convert .xyz files and corresponding properties "
142 |                     "into systems with .npy files grouped in folders.",
143 |         argument_default=argparse.SUPPRESS)
144 |     parser.add_argument("xyz_files", metavar='FILE', nargs="+", 
145 |                         help="input xyz files")
146 |     parser.add_argument("-d", "--dump-dir", 
147 |                         help="directory of dumped system, default is current dir")
148 |     parser.add_argument("-U", "--unit", 
149 |                         help="length unit used to save npy files (assume xyz in Angstrom)")
150 |     parser.add_argument("-G", "--group-size", type=int, 
151 |                         help="if positive, split data into sub systems with given size, default: -1")
152 |     parser.add_argument("-P", "--group-prefix", 
153 |                         help=r"save sub systems with given prefix as `$dump_dir/$prefix.ii`, default: sys")
154 |     parser.add_argument("-T", "--ext-type", action="store_true", 
155 |                         help="if set, save the element type into separete `type.raw` file")
156 |     args = parser.parse_args()
157 | 
158 |     main(**vars(args))
159 | 
160 | 
161 | 
162 | 


--------------------------------------------------------------------------------
/scripts/legacy/calc_eig.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.spatial.distance import squareform, pdist
 3 | 
 4 | 
 5 | def load_coords(filename):
 6 |     return np.loadtxt(filename, skiprows=2, usecols=[1,2,3])
 7 | 
 8 | 
 9 | def cosine_switching(x, lower=1.9, upper=2.0, threshold=1e-5):
10 |     zx = x < threshold
11 |     lx = x < lower
12 |     ux = x > upper
13 |     mx = (~lx) & (~ux)
14 |     res = np.zeros_like(x)
15 |     res[~zx & lx] = 1
16 |     res[mx] = 0.5*np.cos(np.pi * (x[mx]-lower) / (upper-lower)) + 0.5
17 |     return res
18 | 
19 | 
20 | def calc_weight(coords, lower=1.9, upper=2.0):
21 |     natom = coords.shape[0]
22 |     pair_dist = squareform(pdist(coords))
23 |     weight = cosine_switching(pair_dist, lower, upper).reshape(1, natom, natom)
24 |     return weight
25 | 
26 | 
27 | def split(ci, shell):
28 |     sec = [1]*shell[0] + [3]*shell[1] + [5]*shell[2]
29 |     assert np.sum(sec) == ci.shape[-1]
30 |     ci_list = np.split(ci, np.cumsum(sec)[:-1], axis=-1)
31 |     return ci_list
32 | 
33 | 
34 | def calc_atom_eig(ci, shell=(12,12,12), frozen=0):
35 |     ci_list = split(ci[:, frozen:], shell)
36 |     dm_list = [np.einsum('niap,niaq->napq', _ci, _ci) for _ci in ci_list]
37 |     eig_list = [np.linalg.eigvalsh(dm) for dm in dm_list]
38 |     eig = np.concatenate(eig_list, -1)
39 |     return eig
40 | 
41 | 
42 | def calc_atom_ener_eig(ci, ei, kernel=None, shell=(12,12,12), frozen=0):
43 |     if kernel is not None:
44 |         ei = kernel(ei)
45 |     ci_list = split(ci[:, frozen:], shell)
46 |     dm_list = [np.einsum('niap,niaq,ni->napq', _ci, _ci, ei[:, frozen:]) for _ci in ci_list]
47 |     eig_list = [np.linalg.eigvalsh(dm) for dm in dm_list]
48 |     eig = np.concatenate(eig_list, -1)
49 |     return eig
50 | 
51 | 
52 | def calc_neighbor_eig(ci, weight=None, shell=(12,12,12), frozen=0):
53 |     ci_list = split(ci[:, frozen:], shell)
54 |     dm_list = [np.einsum('niap,nibq->nabpq', _ci, _ci) for _ci in ci_list]
55 |     if weight is not None:
56 |         dm_list = [np.einsum('nabpq,nab->nabpq', _dm, weight) for _dm in dm_list]
57 |     eig_list = [np.linalg.eigvalsh(0.5*(_dm.sum(1) + _dm.sum(2))) for _dm in dm_list]
58 |     eig = np.concatenate(eig_list, -1)
59 |     return eig
60 | 
61 | 
62 | def calc_eig(name, ci, ei=None, xyz_file=None, shell=(12,12,12)):
63 |     if name == 'dm_eig':
64 |         return calc_atom_eig(ci, shell=shell)
65 |     if name == 'od_eig':
66 |         assert xyz_file is not None
67 |         return calc_neighbor_eig(ci, calc_weight(load_coords(xyz_file)), shell=shell)
68 |     if name == 'se_eig':
69 |         assert ei is not None
70 |         return calc_atom_ener_eig(ci, ei, kernel=None, shell=shell)
71 |     if name == 'fe_eig':
72 |         assert ei is not None
73 |         return calc_atom_ener_eig(ci, ei, kernel=np.exp, shell=shell)
74 | 
75 |     raise ValueError(f'unsupport name: {name}')


--------------------------------------------------------------------------------
/scripts/legacy/proj_dm.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from pyscf import gto
  3 | import os
  4 | import sys
  5 | import argparse
  6 | import mendeleev
  7 | from calc_eig import calc_eig
  8 | 
  9 | 
 10 | # aa = 2.0**np.arange(6,-3,-1)
 11 | aa = 1.5**np.array([17,13,10,7,5,3,2,1,0,-1,-2,-3])
 12 | bb = np.diag(np.ones(aa.size)) - np.diag(np.ones(aa.size-1), k=1)
 13 | SHELL = [aa.size] * 3
 14 | coef = np.concatenate([aa.reshape(-1,1), bb], axis=1)
 15 | BASIS = [[0, *coef.tolist()], [1, *coef.tolist()], [2, *coef.tolist()]]
 16 | 
 17 | 
 18 | def parse_xyz(filename, basis='ccpvtz', verbose=False):
 19 |     with open(filename) as fp:
 20 |         natoms = int(fp.readline())
 21 |         comments = fp.readline()
 22 |         xyz_str = "".join(fp.readlines())
 23 |     mol = gto.Mole()
 24 |     mol.verbose = 4 if verbose else 0
 25 |     mol.atom = xyz_str
 26 |     mol.basis  = basis
 27 |     try:
 28 |         mol.build(0,0,unit="Ang")
 29 |     except RuntimeError as e:
 30 |         mol.spin = 1
 31 |         mol.build(0,0,unit="Ang")
 32 |     return mol  
 33 | 
 34 | 
 35 | def gen_proj(mol, intor = 'ovlp', verbose = False) :
 36 |     natm = mol.natm
 37 |     mole_coords = mol.atom_coords(unit="Ang")
 38 |     test_mol = gto.Mole()
 39 |     if verbose :
 40 |         test_mol.verbose = 4
 41 |     else :
 42 |         test_mol.verbose = 0
 43 |     test_mol.atom = [["Ne", coord] for coord in mole_coords]
 44 |     test_mol.basis = BASIS
 45 |     test_mol.spin = 0
 46 |     test_mol.build(0,0,unit="Ang")
 47 |     proj = gto.intor_cross(f'int1e_{intor}_sph', mol, test_mol) 
 48 |     
 49 |     def proj_func(mo):
 50 |         proj_coeff = np.matmul(mo, proj).reshape(*mo.shape[:2], natm, -1)
 51 |         if verbose:
 52 |             print('shape of coeff data          ', proj_coeff.shape)
 53 |         # res : nframe x nocc/nvir x natm x nproj
 54 |         return proj_coeff, proj_coeff.shape[-1]
 55 |     
 56 |     return proj_func
 57 | 
 58 | 
 59 | def proj_frame(xyz_file, mo_dir, dump_dir=None, basis='ccpvtz', ename="e_hf", intor='ovlp', verbose=False):
 60 |     mol = parse_xyz(xyz_file, basis=basis)
 61 |     meta, ehf, e_occ, c_occ = load_data(mo_dir, ename)
 62 |     
 63 |     proj_func = gen_proj(mol, intor, verbose)
 64 |     c_proj_occ,nproj = proj_func(c_occ)
 65 |     c_occ = c_proj_occ
 66 |     meta = np.append(meta, nproj)
 67 |     # print(meta, c_proj_occ.shape)
 68 | 
 69 |     if dump_dir is not None:
 70 |         dump_data(dump_dir, meta, ehf, e_occ, c_occ)
 71 |     return meta, ehf, e_occ, c_occ
 72 | 
 73 | 
 74 | def load_data(dir_name, ename="e_hf"):
 75 |     meta = np.loadtxt(os.path.join(dir_name, 'system.raw'), dtype=int).reshape(-1)
 76 |     natm = meta[0]
 77 |     nao = meta[1]
 78 |     nocc = meta[2]
 79 |     nvir = meta[3]
 80 |     ehf = np.loadtxt(os.path.join(dir_name, f'{ename}.raw')).reshape(-1, 1)
 81 |     e_occ = np.loadtxt(os.path.join(dir_name, 'ener_occ.raw')).reshape(-1, nocc)
 82 |     c_occ = np.loadtxt(os.path.join(dir_name, 'coeff_occ.raw')).reshape([-1, nocc, nao])
 83 |     return meta, ehf, e_occ, c_occ
 84 | 
 85 | 
 86 | def dump_data(dir_name, meta, ehf, e_occ, c_occ, dm_dict={}) :
 87 |     os.makedirs(dir_name, exist_ok = True)
 88 |     np.savetxt(os.path.join(dir_name, 'system.raw'), 
 89 |                meta.reshape(1,-1), 
 90 |                fmt = '%d',
 91 |                header = 'natm nao nocc nvir nproj')
 92 |     nframe = e_occ.shape[0]
 93 |     natm = meta[0]
 94 |     nao = meta[1]
 95 |     nocc = meta[2]
 96 |     nvir = meta[3]
 97 |     nproj = meta[4]
 98 |     # ntest == natm
 99 |     assert(all(c_occ.shape == np.array([nframe, nocc, natm, nproj], dtype=int)))
100 |     assert(all(e_occ.shape == np.array([nframe, nocc], dtype=int)))
101 |     assert(all(all(dm.shape == np.array([nframe, natm, nproj], dtype=int)) for dm in dm_dict.values()))
102 |     np.save(os.path.join(dir_name, 'e_hf.npy'), ehf) 
103 |     np.save(os.path.join(dir_name, 'ener_occ.npy'), e_occ)
104 |     np.save(os.path.join(dir_name, 'coeff_occ.npy'), c_occ)
105 |     for name, dm in dm_dict.items():
106 |         np.save(os.path.join(dir_name, f'{name}.npy'), dm)
107 | 
108 | 
109 | def main(xyz_files, mo_dirs, dump_dir, basis='ccpvtz', ename="e_hf", eig_names=['dm_eig', 'od_eig', 'se_eig', 'fe_eig'], intor='ovlp', verbose='False'):
110 |     assert len(xyz_files) == len(mo_dirs)
111 |     oldmeta = None
112 |     all_e_hf = []
113 |     all_e_occ = []
114 |     all_c_occ = []
115 |     all_dm_dict = {name:[] for name in eig_names}
116 |     
117 |     for xf, md in zip(xyz_files, mo_dirs):
118 |         meta, e_hf, e_occ, c_occ = proj_frame(xf, md, basis=basis, ename=ename, intor=intor, verbose=verbose)
119 |         if oldmeta is not None:
120 |             assert all(oldmeta == meta), "all frames has to be in the same system thus meta has to be equal!"
121 |         oldmeta = meta
122 |         all_e_hf.append(e_hf)
123 |         all_e_occ.append(e_occ)
124 |         all_c_occ.append(c_occ)
125 |         for name, dm_list in all_dm_dict.items():
126 |             dm_list.append(2 * calc_eig(name, c_occ, e_occ, xf, shell=SHELL)) # multiply by 2 for restricted method, doubly occupied orbitals
127 |         print(f"{xf} && {md} finished")
128 | 
129 |     all_e_hf = np.concatenate(all_e_hf)
130 |     all_e_occ = np.concatenate(all_e_occ)
131 |     all_c_occ = np.concatenate(all_c_occ)
132 |     for name in all_dm_dict.keys():
133 |         all_dm_dict[name] = np.concatenate(all_dm_dict[name])
134 | 
135 |     dump_data(dump_dir, meta, all_e_hf, all_e_occ, all_c_occ, all_dm_dict)
136 |     print("done")
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     parser = argparse.ArgumentParser(description="project mo_coeffs into atomic basis and calculate descriptors.")
141 |     parser.add_argument("-x", "--xyz-file", nargs="+", help="input xyz file(s), if more than one, concat them")
142 |     parser.add_argument("-f", "--mo-dir", nargs="+", help="input mo folder(s), must of same number with xyz files")
143 |     parser.add_argument("-d", "--dump-dir", default=".", help="dir of dumped files, if not specified, use current folder")
144 |     parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information")
145 |     parser.add_argument("-I", "--intor", default="ovlp", help="intor string used to calculate int1e")
146 |     parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation")
147 |     parser.add_argument("-e", "--ename", default="e_hf", help="file name for total energy")
148 |     parser.add_argument("-E", "--eig-name", nargs="*", default=['dm_eig', 'od_eig', 'se_eig', 'fe_eig'], 
149 |                         help="name of eigen values to be calculated and dumped")
150 |     args = parser.parse_args()
151 |     
152 |     main(args.xyz_file, args.mo_dir, args.dump_dir, args.basis,
153 |          args.ename, args.eig_name, args.intor, args.verbose)


--------------------------------------------------------------------------------
/scripts/legacy/rhf.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | import numpy as np
  4 | from pyscf import gto, scf, lib
  5 | from pyscf.mp.mp2 import _mo_energy_without_core
  6 | from time import time
  7 | import os
  8 | import sys
  9 | import argparse
 10 | 
 11 | 
 12 | def parse_xyz(filename, basis='ccpvtz', verbose=False):
 13 |     with open(filename) as fp:
 14 |         natoms = int(fp.readline())
 15 |         comments = fp.readline()
 16 |         xyz_str = "".join(fp.readlines())
 17 |     mol = gto.Mole()
 18 |     mol.verbose = 4 if verbose else 0
 19 |     mol.atom = xyz_str
 20 |     mol.basis  = basis
 21 |     mol.build(0,0,unit="Ang")
 22 |     return mol  
 23 | 
 24 | 
 25 | def fix_gauge(mo_coeff) :
 26 |     nvec = mo_coeff.shape[1]
 27 |     ndim = mo_coeff.shape[0]
 28 |     ret = np.zeros(mo_coeff.shape)
 29 |     count = 0
 30 |     for ii in range(nvec) :
 31 |         for jj in range(ndim) :
 32 |             if np.sign(mo_coeff[jj,ii]) != 0 :
 33 |                 break
 34 |         if jj == ndim :
 35 |             # mo_coeff[:,ii] == 0
 36 |             assert(np.max(np.abs(mo_coeff[:,ii])) == 0)
 37 |             raise RuntimeError( 'ERROR: zero eigen func, should not happen')
 38 |             continue
 39 |         else :
 40 |             if (jj != 0) :
 41 |                 print('gauge ref is not 0')
 42 |             factor = np.sign(mo_coeff[jj,ii])
 43 |             ret[:,ii] = factor * mo_coeff[:,ii]
 44 |             count += 1
 45 |     #         break
 46 |     # print(count)
 47 |     return ret
 48 | 
 49 | 
 50 | def mol_electron(mol, chkfile=None, verbose=False) :
 51 |     if verbose:
 52 |         start_t = time()
 53 |     nao = mol.nao
 54 |     natm = mol.natm
 55 |     rhf = scf.RHF(mol)
 56 |     if chkfile:
 57 |         rhf.set(chkfile=chkfile)
 58 |     erhf = rhf.kernel()
 59 |     if verbose:
 60 |         rhf_t = time()
 61 |         print(f"time of rhf: {rhf_t - start_t}")
 62 | 
 63 |     mo_energy = rhf.mo_energy
 64 |     mo_occ = rhf.mo_occ
 65 |     # mo_coeff = rhf.mo_coeff
 66 |     mo_coeff_ = rhf.mo_coeff
 67 |     mo_coeff= fix_gauge(mo_coeff_)
 68 |     occ_a = (mo_occ>0)
 69 |     # occ_b = (mo_occ[1]>0)
 70 |     vir_a = (mo_occ==0)
 71 |     # vir_b = (mo_occ[1]==0)
 72 |     nocc_a = sum(occ_a)
 73 |     # nocc_b = sum(occ_b)
 74 |     nocc = nocc_a
 75 |     nvir_a = sum(vir_a)
 76 |     # nvir_b = sum(vir_b)
 77 |     nvir = nvir_a
 78 |     assert(nocc + nvir == nao)
 79 |     if verbose :
 80 |         print('nao = %d, nocc = %d, nvir = %d' % \
 81 |               (nao, nocc, nvir))
 82 |         print('shape of a and b coeffs:     ', mo_coeff[0].shape, mo_coeff[1].shape)
 83 |     c_occ = mo_coeff[:,occ_a]
 84 |     c_vir = mo_coeff[:,vir_a]
 85 |     e_occ = mo_energy[occ_a]
 86 |     e_vir = mo_energy[vir_a]
 87 |     c_occ = c_occ.T
 88 |     c_vir = c_vir.T
 89 |     meta = [natm, nao, nocc, nvir]        
 90 |     if verbose :
 91 |         print('shape of coeff data          ', c_occ.shape)
 92 |         print('shape of ener  data          ', e_occ.shape)
 93 |         print('shape of coeff data          ', c_vir.shape)
 94 |         print('shape of ener  data          ', e_vir.shape)
 95 |         print('E(RKS)   = %.9g' % erhf)
 96 |     return meta, erhf, (e_occ, e_vir), (c_occ, c_vir)
 97 |     # return erhf, myemp2, ener_data, coeff_data
 98 | 
 99 |     
100 | def dump_data(dir_name, meta, ehf, e_data, c_data) :
101 |     os.makedirs(dir_name, exist_ok = True)
102 |     np.savetxt(os.path.join(dir_name, 'system.raw'), 
103 |                np.array(meta).reshape(1,-1), 
104 |                fmt = '%d',
105 |                header = 'natm nao nocc nvir')
106 |     nframe = 1
107 |     natm = meta[0]
108 |     nao = meta[1]
109 |     nocc = meta[2]
110 |     nvir = meta[3]
111 |     # ntest == natm
112 |     assert(all(c_data[0].shape == np.array([nocc, nao], dtype = int)))
113 |     assert(all(c_data[1].shape == np.array([nvir, nao], dtype = int)))
114 |     assert(all(e_data[0].shape == np.array([nocc], dtype = int)))
115 |     assert(all(e_data[1].shape == np.array([nvir], dtype = int)))
116 |     np.savetxt(os.path.join(dir_name, 'e_hf.raw'), np.reshape(ehf, [nframe,1])) 
117 |     np.savetxt(os.path.join(dir_name, 'ener_occ.raw'), e_data[0].reshape([nframe, -1]))
118 |     np.savetxt(os.path.join(dir_name, 'ener_vir.raw'), e_data[1].reshape([nframe, -1]))
119 |     np.savetxt(os.path.join(dir_name, 'coeff_occ.raw'), c_data[0].reshape([nframe, -1]))
120 |     np.savetxt(os.path.join(dir_name, 'coeff_vir.raw'), c_data[1].reshape([nframe, -1]))
121 | 
122 | 
123 | def gen_frame(xyz_file, basis='ccpvtz', dump_dir=None, verbose=False):
124 |     if dump_dir is None:
125 |         dump_dir = os.path.splitext(xyz_file)[0]
126 |     mol = parse_xyz(xyz_file, basis=basis ,verbose=verbose)
127 |     mol_meta, ehf, e_data, c_data = mol_electron(mol, verbose=verbose)
128 |     dump_data(dump_dir, mol_meta, ehf, e_data, c_data)
129 | 
130 | 
131 | def main():
132 |     parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.")
133 |     parser.add_argument("files", nargs="+", help="input xyz files")
134 |     parser.add_argument("-d", "--dump-dir", default=None, help="dir of dumped files, if not specified, using same dir as input")
135 |     parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information")
136 |     parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation")
137 |     args = parser.parse_args()
138 | 
139 |     for fn in args.files:
140 |         if args.dump_dir is None:
141 |             dump = None
142 |         else:
143 |             dump = os.path.join(args.dump_dir, os.path.splitext(os.path.basename(fn))[0])
144 |         try:
145 |             gen_frame(fn, args.basis, dump, args.verbose)
146 |             print(f"{fn} finished")
147 |         except Exception as e:
148 |             print(f"{fn} failed,", e, file=sys.stderr)
149 |             raise
150 | 
151 | 
152 | if __name__ == "__main__":
153 |     main()
154 | 


--------------------------------------------------------------------------------
/scripts/legacy/rks.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | import numpy as np
  4 | from pyscf import gto, dft, lib
  5 | from pyscf.mp.mp2 import _mo_energy_without_core
  6 | from time import time
  7 | import os
  8 | import sys
  9 | import argparse
 10 | 
 11 | 
 12 | def parse_xyz(filename, basis='ccpvtz', verbose=False):
 13 |     with open(filename) as fp:
 14 |         natoms = int(fp.readline())
 15 |         comments = fp.readline()
 16 |         xyz_str = "".join(fp.readlines())
 17 |     mol = gto.Mole()
 18 |     mol.verbose = 4 if verbose else 0
 19 |     mol.atom = xyz_str
 20 |     mol.basis  = basis
 21 |     mol.build(0,0,unit="Ang")
 22 |     return mol  
 23 | 
 24 | 
 25 | def fix_gauge(mo_coeff) :
 26 |     nvec = mo_coeff.shape[1]
 27 |     ndim = mo_coeff.shape[0]
 28 |     ret = np.zeros(mo_coeff.shape)
 29 |     count = 0
 30 |     for ii in range(nvec) :
 31 |         for jj in range(ndim) :
 32 |             if np.sign(mo_coeff[jj,ii]) != 0 :
 33 |                 break
 34 |         if jj == ndim :
 35 |             # mo_coeff[:,ii] == 0
 36 |             assert(np.max(np.abs(mo_coeff[:,ii])) == 0)
 37 |             raise RuntimeError( 'ERROR: zero eigen func, should not happen')
 38 |             continue
 39 |         else :
 40 |             if (jj != 0) :
 41 |                 print('gauge ref is not 0')
 42 |             factor = np.sign(mo_coeff[jj,ii])
 43 |             ret[:,ii] = factor * mo_coeff[:,ii]
 44 |             count += 1
 45 |     #         break
 46 |     # print(count)
 47 |     return ret
 48 | 
 49 | 
 50 | def mol_electron(mol, xc='pbe', chkfile=None, verbose=False) :
 51 |     if verbose:
 52 |         start_t = time()
 53 |     nao = mol.nao
 54 |     natm = mol.natm
 55 |     rks = dft.RKS(mol)
 56 |     rks.xc = xc
 57 |     if chkfile:
 58 |         rks.set(chkfile=chkfile)
 59 |     erks = rks.kernel()
 60 |     if verbose:
 61 |         rks_t = time()
 62 |         print(f"time of rks: {rks_t - start_t}")
 63 | 
 64 |     mo_energy = rks.mo_energy
 65 |     mo_occ = rks.mo_occ
 66 |     # mo_coeff = rks.mo_coeff
 67 |     mo_coeff_ = rks.mo_coeff
 68 |     mo_coeff= fix_gauge(mo_coeff_)
 69 |     occ_a = (mo_occ>0)
 70 |     # occ_b = (mo_occ[1]>0)
 71 |     vir_a = (mo_occ==0)
 72 |     # vir_b = (mo_occ[1]==0)
 73 |     nocc_a = sum(occ_a)
 74 |     # nocc_b = sum(occ_b)
 75 |     nocc = nocc_a
 76 |     nvir_a = sum(vir_a)
 77 |     # nvir_b = sum(vir_b)
 78 |     nvir = nvir_a
 79 |     assert(nocc + nvir == nao)
 80 |     if verbose :
 81 |         print('nao = %d, nocc = %d, nvir = %d' % \
 82 |               (nao, nocc, nvir))
 83 |         print('shape of a and b coeffs:     ', mo_coeff[0].shape, mo_coeff[1].shape)
 84 |     c_occ = mo_coeff[:,occ_a]
 85 |     c_vir = mo_coeff[:,vir_a]
 86 |     e_occ = mo_energy[occ_a]
 87 |     e_vir = mo_energy[vir_a]
 88 |     c_occ = c_occ.T
 89 |     c_vir = c_vir.T
 90 |     meta = [natm, nao, nocc, nvir]        
 91 |     if verbose :
 92 |         print('shape of coeff data          ', c_occ.shape)
 93 |         print('shape of ener  data          ', e_occ.shape)
 94 |         print('shape of coeff data          ', c_vir.shape)
 95 |         print('shape of ener  data          ', e_vir.shape)
 96 |         print('E(RKS)   = %.9g' % erks)
 97 |     return meta, erks, (e_occ, e_vir), (c_occ, c_vir)
 98 |     # return erks, myemp2, ener_data, coeff_data
 99 | 
100 |     
101 | def dump_data(dir_name, meta, ehf, e_data, c_data) :
102 |     os.makedirs(dir_name, exist_ok = True)
103 |     np.savetxt(os.path.join(dir_name, 'system.raw'), 
104 |                np.array(meta).reshape(1,-1), 
105 |                fmt = '%d',
106 |                header = 'natm nao nocc nvir')
107 |     nframe = 1
108 |     natm = meta[0]
109 |     nao = meta[1]
110 |     nocc = meta[2]
111 |     nvir = meta[3]
112 |     # ntest == natm
113 |     assert(all(c_data[0].shape == np.array([nocc, nao], dtype = int)))
114 |     assert(all(c_data[1].shape == np.array([nvir, nao], dtype = int)))
115 |     assert(all(e_data[0].shape == np.array([nocc], dtype = int)))
116 |     assert(all(e_data[1].shape == np.array([nvir], dtype = int)))
117 |     np.savetxt(os.path.join(dir_name, 'e_dft.raw'), np.reshape(ehf, [nframe,1])) 
118 |     np.savetxt(os.path.join(dir_name, 'ener_occ.raw'), e_data[0].reshape([nframe, -1]))
119 |     np.savetxt(os.path.join(dir_name, 'ener_vir.raw'), e_data[1].reshape([nframe, -1]))
120 |     np.savetxt(os.path.join(dir_name, 'coeff_occ.raw'), c_data[0].reshape([nframe, -1]))
121 |     np.savetxt(os.path.join(dir_name, 'coeff_vir.raw'), c_data[1].reshape([nframe, -1]))
122 | 
123 | 
124 | def gen_frame(xyz_file, basis='ccpvtz', xc='pbe', dump_dir=None, verbose=False):
125 |     if dump_dir is None:
126 |         dump_dir = os.path.splitext(xyz_file)[0]
127 |     mol = parse_xyz(xyz_file, basis=basis ,verbose=verbose)
128 |     mol_meta, ehf, e_data, c_data = mol_electron(mol, xc=xc, verbose=verbose)
129 |     dump_data(dump_dir, mol_meta, ehf, e_data, c_data)
130 | 
131 | 
132 | def main():
133 |     parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.")
134 |     parser.add_argument("files", nargs="+", help="input xyz files")
135 |     parser.add_argument("-d", "--dump-dir", default=None, help="dir of dumped files, if not specified, using same dir as input")
136 |     parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information")
137 |     parser.add_argument("-X", "--xc", default='pbe', type=str, help="xc functional")
138 |     parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation")
139 |     args = parser.parse_args()
140 | 
141 |     for fn in args.files:
142 |         if args.dump_dir is None:
143 |             dump = None
144 |         else:
145 |             dump = os.path.join(args.dump_dir, os.path.splitext(os.path.basename(fn))[0])
146 |         try:
147 |             gen_frame(fn, args.basis, args.xc, dump, args.verbose)
148 |             print(f"{fn} finished")
149 |         except Exception as e:
150 |             print(f"{fn} failed,", e, file=sys.stderr)
151 |             raise
152 | 
153 | 
154 | if __name__ == "__main__":
155 |     main()
156 | 


--------------------------------------------------------------------------------
/scripts/legacy/rmp2.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | import numpy as np
  4 | from pyscf import gto, scf, mp, lib
  5 | from pyscf.mp.mp2 import _mo_energy_without_core
  6 | from time import time
  7 | import os
  8 | import sys
  9 | import argparse
 10 | 
 11 | 
 12 | def my_kernel(mp, mo_energy=None, mo_coeff=None, eris=None, with_eij=True):
 13 |     if mo_energy is None or mo_coeff is None:
 14 |         if mp.mo_energy is None or mp.mo_coeff is None:
 15 |             raise RuntimeError('mo_coeff, mo_energy are not initialized.\n'
 16 |                                'You may need to call mf.kernel() to generate them.')
 17 |         mo_coeff = None
 18 |         mo_energy = _mo_energy_without_core(mp, mp.mo_energy)
 19 |     else:
 20 |         # For backward compatibility.  In pyscf-1.4 or earlier, mp.frozen is
 21 |         # not supported when mo_energy or mo_coeff is given.
 22 |         assert(mp.frozen is 0 or mp.frozen is None)
 23 | 
 24 |     if eris is None: eris = mp.ao2mo(mo_coeff)
 25 | 
 26 |     nocc = mp.nocc
 27 |     nvir = mp.nmo - nocc
 28 |     eia = mo_energy[:nocc,None] - mo_energy[None,nocc:]
 29 | 
 30 |     if with_eij:
 31 |         eij = np.empty((nocc,nocc), dtype=eia.dtype)
 32 |     else:
 33 |         eij = None
 34 | 
 35 |     emp2 = 0
 36 |     for i in range(nocc):
 37 |         gi = np.asarray(eris.ovov[i*nvir:(i+1)*nvir])
 38 |         gi = gi.reshape(nvir,nocc,nvir).transpose(1,0,2)
 39 |         t2i = gi.conj()/lib.direct_sum('jb+a->jba', eia, eia[i])
 40 |         tmp_eij = 2 * np.einsum('jab,jab->j', t2i, gi) - np.einsum('jab,jba->j', t2i, gi)
 41 |         emp2 += tmp_eij.sum()
 42 |         if with_eij:
 43 |             eij[i] = tmp_eij
 44 | 
 45 |     return emp2.real, eij.real
 46 | 
 47 | 
 48 | def parse_xyz(filename, basis='ccpvtz', verbose=False):
 49 |     with open(filename) as fp:
 50 |         natoms = int(fp.readline())
 51 |         comments = fp.readline()
 52 |         xyz_str = "".join(fp.readlines())
 53 |     mol = gto.Mole()
 54 |     mol.verbose = 4 if verbose else 0
 55 |     mol.atom = xyz_str
 56 |     mol.basis  = basis
 57 |     mol.build(0,0,unit="Ang")
 58 |     return mol  
 59 | 
 60 | 
 61 | def fix_gauge(mo_coeff) :
 62 |     nvec = mo_coeff.shape[1]
 63 |     ndim = mo_coeff.shape[0]
 64 |     ret = np.zeros(mo_coeff.shape)
 65 |     count = 0
 66 |     for ii in range(nvec) :
 67 |         for jj in range(ndim) :
 68 |             if np.sign(mo_coeff[jj,ii]) != 0 :
 69 |                 break
 70 |         if jj == ndim :
 71 |             # mo_coeff[:,ii] == 0
 72 |             assert(np.max(np.abs(mo_coeff[:,ii])) == 0)
 73 |             raise RuntimeError( 'ERROR: zero eigen func, should not happen')
 74 |             continue
 75 |         else :
 76 |             if (jj != 0) :
 77 |                 print('gauge ref is not 0')
 78 |             factor = np.sign(mo_coeff[jj,ii])
 79 |             ret[:,ii] = factor * mo_coeff[:,ii]
 80 |             count += 1
 81 |     #         break
 82 |     # print(count)
 83 |     return ret
 84 | 
 85 | 
 86 | def mol_electron(mol, frozen=0, chkfile=None, verbose=False) :
 87 |     if verbose:
 88 |         start_t = time()
 89 |     nao = mol.nao
 90 |     natm = mol.natm
 91 |     rhf = scf.RHF(mol)
 92 |     if chkfile:
 93 |         rhf.set(chkfile=chkfile)
 94 |     erhf = rhf.kernel()
 95 |     if verbose:
 96 |         rhf_t = time()
 97 |         print(f"time of rhf: {rhf_t - start_t}")
 98 | 
 99 |     mo_energy = rhf.mo_energy
100 |     mo_occ = rhf.mo_occ
101 |     # mo_coeff = rhf.mo_coeff
102 |     mo_coeff_ = rhf.mo_coeff
103 |     mo_coeff= fix_gauge(mo_coeff_)
104 |     occ_a = (mo_occ>0)
105 |     occ_a[:frozen] = False
106 |     # occ_b = (mo_occ[1]>0)
107 |     vir_a = (mo_occ==0)
108 |     # vir_b = (mo_occ[1]==0)
109 |     nocc_a = sum(occ_a)
110 |     # nocc_b = sum(occ_b)
111 |     nocc = nocc_a
112 |     nvir_a = sum(vir_a)
113 |     # nvir_b = sum(vir_b)
114 |     nvir = nvir_a
115 |     assert(nocc + nvir + frozen == nao)
116 |     if verbose :
117 |         print('nao = %d, nocc = %d, nvir = %d' % \
118 |               (nao, nocc, nvir))
119 |         print('shape of a and b coeffs:     ', mo_coeff[0].shape, mo_coeff[1].shape)
120 |     c_occ = mo_coeff[:,occ_a]
121 |     c_vir = mo_coeff[:,vir_a]
122 |     e_occ = mo_energy[occ_a]
123 |     e_vir = mo_energy[vir_a]
124 |     c_occ = c_occ.T
125 |     c_vir = c_vir.T
126 |     meta = [natm, nao, nocc, nvir]        
127 |     if verbose :
128 |         print('shape of coeff data          ', c_occ.shape)
129 |         print('shape of ener  data          ', e_occ.shape)
130 |         print('shape of coeff data          ', c_vir.shape)
131 |         print('shape of ener  data          ', e_vir.shape)
132 |         mid_t = time()
133 |         # print(f"time of collecting results: {mid_t - rhf_t}")
134 | 
135 |     mp2 = mp.MP2(rhf, frozen=frozen)
136 |     # emp2 = mp2.kernel()
137 |     emp2, emp2_ij = my_kernel(mp2)
138 |     if verbose :
139 |         print('E(HF)   = %.9g' % erhf)
140 |         print('E(RMP2) = %.9g' % emp2)
141 |         print(f"time of mp2: {time()-mid_t}")
142 |     return meta, erhf, emp2, emp2_ij, (e_occ, e_vir), (c_occ, c_vir)
143 |     # return erhf, myemp2, ener_data, coeff_data
144 | 
145 |     
146 | def dump_data(dir_name, meta, ehf, emp2, ec_ij, e_data, c_data) :
147 |     os.makedirs(dir_name, exist_ok = True)
148 |     np.savetxt(os.path.join(dir_name, 'system.raw'), 
149 |                np.array(meta).reshape(1,-1), 
150 |                fmt = '%d',
151 |                header = 'natm nao nocc nvir')
152 |     nframe = 1
153 |     natm = meta[0]
154 |     nao = meta[1]
155 |     nocc = meta[2]
156 |     nvir = meta[3]
157 |     # ntest == natm
158 |     assert(all(c_data[0].shape == np.array([nocc, nao], dtype = int)))
159 |     assert(all(c_data[1].shape == np.array([nvir, nao], dtype = int)))
160 |     assert(all(e_data[0].shape == np.array([nocc], dtype = int)))
161 |     assert(all(e_data[1].shape == np.array([nvir], dtype = int)))
162 |     assert(all(ec_ij.shape == np.array([nocc, nocc], dtype = int)))
163 |     np.savetxt(os.path.join(dir_name, 'e_hf.raw'), np.reshape(ehf, [nframe,1])) 
164 |     np.savetxt(os.path.join(dir_name, 'e_mp2.raw'), np.reshape(emp2, [nframe,1])) 
165 |     np.savetxt(os.path.join(dir_name, 'ec_ij.raw'), ec_ij.reshape([nframe, -1]))
166 |     np.savetxt(os.path.join(dir_name, 'ener_occ.raw'), e_data[0].reshape([nframe, -1]))
167 |     np.savetxt(os.path.join(dir_name, 'ener_vir.raw'), e_data[1].reshape([nframe, -1]))
168 |     np.savetxt(os.path.join(dir_name, 'coeff_occ.raw'), c_data[0].reshape([nframe, -1]))
169 |     np.savetxt(os.path.join(dir_name, 'coeff_vir.raw'), c_data[1].reshape([nframe, -1]))
170 | 
171 | 
172 | def gen_frame(xyz_file, basis='ccpvtz', frozen=0, dump_dir=None, verbose=False):
173 |     if dump_dir is None:
174 |         dump_dir = os.path.splitext(xyz_file)[0]
175 |     mol = parse_xyz(xyz_file, basis=basis ,verbose=verbose)
176 |     mol_meta, ehf, emp2, ec_ij, e_data, c_data = mol_electron(mol, frozen=frozen, verbose=verbose)
177 |     dump_data(dump_dir, mol_meta, ehf, emp2, ec_ij, e_data, c_data)
178 | 
179 | 
180 | def main():
181 |     parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.")
182 |     parser.add_argument("files", nargs="+", help="input xyz files")
183 |     parser.add_argument("-d", "--dump-dir", default=None, help="dir of dumped files, if not specified, using same dir as input")
184 |     parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information")
185 |     parser.add_argument("-F", "--frozen", default=0, type=int, help="number of orbit to be frozen when calculate mp2")
186 |     parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation")
187 |     args = parser.parse_args()
188 | 
189 |     for fn in args.files:
190 |         if args.dump_dir is None:
191 |             dump = None
192 |         else:
193 |             dump = os.path.join(args.dump_dir, os.path.splitext(os.path.basename(fn))[0])
194 |         try:
195 |             gen_frame(fn, args.basis, args.frozen, dump, args.verbose)
196 |             print(f"{fn} finished")
197 |         except Exception as e:
198 |             print(f"{fn} failed,", e, file=sys.stderr)
199 |             raise
200 | 
201 | 
202 | if __name__ == "__main__":
203 |     main()
204 | 


--------------------------------------------------------------------------------
/scripts/solve_mol.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #SBATCH -N 1
  3 | #SBATCH -c 10
  4 | #SBATCH -t 24:00:00
  5 | #SBATCH --mem=32G
  6 | 
  7 | import time
  8 | import numpy as np
  9 | from pyscf import gto, scf
 10 | 
 11 | BOHR = 0.52917721092
 12 | 
 13 | _NO_FORCE = False
 14 | _NO_DM = False
 15 | _MUST_UNRES = False
 16 | _USE_NEWTON = False
 17 | 
 18 | def parse_xyz(filename, basis='ccpvdz', **kwargs):
 19 |     with open(filename) as fp:
 20 |         natoms = int(fp.readline())
 21 |         comments = fp.readline()
 22 |         xyz_str = "".join(fp.readlines())
 23 |     mol = gto.Mole()
 24 |     mol.atom = xyz_str
 25 |     mol.basis = basis
 26 |     mol.set(**kwargs)
 27 |     if "spin" not in kwargs:
 28 |         mol.spin = mol.nelectron % 2
 29 |     mol.build(0,0,unit="Ang")
 30 |     return mol  
 31 | 
 32 | 
 33 | def get_method(name: str):
 34 |     lname = name.lower()
 35 |     if lname == "hf":
 36 |         return calc_hf
 37 |     if lname[:3] == "dft":
 38 |         xc = lname.split("@")[1] if "@" in lname else "pbe"
 39 |         return lambda mol, **scfargs: calc_dft(mol, xc, **scfargs)
 40 |     if lname == "mp2":
 41 |         return calc_mp2
 42 |     if lname == "ccsd":
 43 |         return calc_ccsd
 44 |     if lname.startswith(("ccsd_t", "ccsd-t", "ccsd(t)")):
 45 |         return calc_ccsd_t
 46 |     if lname == "fci":
 47 |         return calc_fci
 48 |     raise ValueError(f"Unknown calculation method: {name}")
 49 | 
 50 | def solve_scf(mol, **scfargs):
 51 |     HFmethod = scf.HF if not _MUST_UNRES else scf.UHF
 52 |     mf = HFmethod(mol).set(init_guess_breaksym=True)
 53 |     init_dm = mf.get_init_guess()
 54 |     # if _MUST_UNRES:
 55 |     #     init_dm[1][:2,:2] = 0
 56 |     mf.kernel(init_dm)
 57 |     if _USE_NEWTON:
 58 |         mf = scf.fast_newton(mf)
 59 |     return mf
 60 | 
 61 | def calc_hf(mol, **scfargs):
 62 |     mf = solve_scf(mol, **scfargs)
 63 |     if not mf.converged:
 64 |         raise RuntimeError("SCF not converged!")
 65 |     etot = mf.e_tot
 66 |     grad = mf.nuc_grad_method().kernel() if not _NO_FORCE else None
 67 |     rdm = mf.make_rdm1() if not _NO_DM else None
 68 |     return etot, grad, rdm
 69 | 
 70 | def calc_dft(mol, xc="pbe", **scfargs):
 71 |     from pyscf import dft
 72 |     KSmethod = dft.KS if not _MUST_UNRES else dft.UKS
 73 |     mf = KSmethod(mol, xc).run(**scfargs)
 74 |     if not mf.converged:
 75 |         raise RuntimeError("SCF not converged!")
 76 |     etot = mf.e_tot
 77 |     if _NO_FORCE or dft.libxc.xc_type(xc) in ('MGGA', 'NLC'):
 78 |         grad = None
 79 |     else:
 80 |         grad = mf.nuc_grad_method().kernel()
 81 |     rdm = mf.make_rdm1() if not _NO_DM else None
 82 |     return etot, grad, rdm
 83 | 
 84 | def calc_mp2(mol, **scfargs):
 85 |     import pyscf.mp
 86 |     mf = solve_scf(mol, **scfargs)
 87 |     if not mf.converged:
 88 |         raise RuntimeError("SCF not converged!")
 89 |     postmf = pyscf.mp.MP2(mf).run()
 90 |     etot = postmf.e_tot
 91 |     grad = postmf.nuc_grad_method().kernel() if not _NO_FORCE else None
 92 |     return etot, grad, None
 93 | 
 94 | def calc_ccsd(mol, **scfargs):
 95 |     import pyscf.cc
 96 |     mf = solve_scf(mol, **scfargs)
 97 |     if not mf.converged:
 98 |         raise RuntimeError("SCF not converged!")
 99 |     mycc = mf.CCSD().run()
100 |     etot = mycc.e_tot
101 |     grad = mycc.nuc_grad_method().kernel() if not _NO_FORCE else None
102 |     ccdm = np.einsum('...pi,...ij,...qj->...pq', 
103 |         mf.mo_coeff, mycc.make_rdm1(), mf.mo_coeff.conj()) if not _NO_DM else None
104 |     return etot, grad, ccdm
105 | 
106 | def calc_ccsd_t(mol, **scfargs):
107 |     import pyscf.cc
108 |     mf = solve_scf(mol, **scfargs)
109 |     if not mf.converged:
110 |         raise RuntimeError("SCF not converged!")
111 |     mycc = mf.CCSD().run()
112 |     et_correction = mycc.ccsd_t()
113 |     etot = mycc.e_tot + et_correction
114 |     if _NO_FORCE:
115 |         return etot, None, None
116 |     import pyscf.grad.ccsd_t as ccsd_t_grad
117 |     grad = ccsd_t_grad.Gradients(mycc).kernel()
118 |     return etot, grad, None
119 | 
120 | def calc_fci(mol, **scfargs):
121 |     import pyscf.fci
122 |     mf = solve_scf(mol, **scfargs)
123 |     if not mf.converged:
124 |         raise RuntimeError("SCF not converged!")
125 |     myci = pyscf.fci.FCI(mf)
126 |     etot, fcivec = myci.kernel()
127 |     rdm = np.einsum('...pi,...ij,...qj->...pq', 
128 |             mf.mo_coeff, 
129 |             myci.make_rdm1s(fcivec, mol.nao, mol.nelec), 
130 |             mf.mo_coeff.conj()).sum(0) if not _NO_DM else None
131 |     return etot, None, rdm
132 | 
133 | 
134 | if __name__ == "__main__":
135 |     import argparse
136 |     import os
137 |     parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.")
138 |     parser.add_argument("files", nargs="+", help="input xyz files")
139 |     parser.add_argument("-d", "--dump-dir", help="dir of dumped files, default is same dir as xyz file")
140 |     parser.add_argument("-v", "--verbose", default=1, type=int, help="output calculation information")
141 |     parser.add_argument("-B", "--basis", default="ccpvdz", type=str, help="basis used to do the calculation")
142 |     parser.add_argument("-C", "--charge", default=0, type=int, help="net charge of the molecule")
143 |     parser.add_argument("-S", "--spin", default=0, type=int, help="net spin of the molecule")
144 |     parser.add_argument("-M", "--method", default="ccsd", help="method used to do the calculation. support MP2, CCSD and CCSD(T)")
145 |     parser.add_argument("-U", "--unrestrict", action="store_true", help="force using unrestricted methods")
146 |     parser.add_argument("-NF", "--no-force", action="store_true", help="do not calculate force")
147 |     parser.add_argument("-ND", "--no-dm", action="store_true", help="do not calculate dm")
148 |     parser.add_argument("-SO", "--newton", action="store_true", help="allow using newton method when scf not converged")
149 |     parser.add_argument("--scf-input", help="yaml file to specify scf arguments")
150 |     args = parser.parse_args()
151 |     
152 |     if args.unrestrict: _MUST_UNRES = True
153 |     if args.no_force: _NO_FORCE = True
154 |     if args.no_dm: _NO_DM = True
155 |     if args.newton: _USE_NEWTON = True
156 | 
157 |     scfargs = {}
158 |     if args.scf_input is not None:
159 |         import ruamel.yaml as yaml
160 |         with open(args.scf_input, 'r') as fp:
161 |             scfargs = yaml.safe_load(fp)        
162 |     if args.dump_dir is not None:
163 |         os.makedirs(args.dump_dir, exist_ok = True)
164 |     calculator = get_method(args.method)
165 | 
166 |     for fn in args.files:
167 |         tic = time.time()
168 |         mol = parse_xyz(fn, args.basis, verbose=args.verbose, charge=args.charge, spin=args.spin)
169 |         try:
170 |             res = calculator(mol, **scfargs)
171 |         except RuntimeError as err:
172 |             print(fn, f"failed, {err}")
173 |             continue
174 |         etot, grad, rdm = res
175 |         if args.dump_dir is None:
176 |             dump_dir = os.path.dirname(fn)
177 |         else:
178 |             dump_dir = args.dump_dir
179 |         dump = os.path.join(dump_dir, os.path.splitext(os.path.basename(fn))[0])
180 |         np.save(dump+".energy.npy", [etot])
181 |         if grad is not None:
182 |             force = -grad / BOHR
183 |             np.save(dump+".force.npy", force)
184 |         if rdm is not None:
185 |             np.save(dump+".dm.npy", rdm)
186 |         if args.verbose:
187 |             print(fn, f"done, time = {time.time()-tic}")


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | import setuptools
 3 | 
 4 | 
 5 | here = pathlib.Path(__file__).parent.resolve()
 6 | readme = (here / 'README.md').read_text(encoding='utf-8')
 7 | 
 8 | # did not include torch and pyscf here
 9 | install_requires=['numpy', 'paramiko', 'ruamel.yaml']
10 | 
11 | 
12 | setuptools.setup(
13 |     name="deepks",
14 |     use_scm_version={'write_to': 'deepks/_version.py'},
15 |     setup_requires=['setuptools_scm'],
16 |     author="Yixiao Chen",
17 |     author_email="yixiaoc@princeton.edu",
18 |     description="DeePKS-kit: generate accurate (self-consistent) energy functionals",
19 |     long_description=readme,
20 |     long_description_content_type="text/markdown",
21 |     packages=setuptools.find_packages(include=['deepks', 'deepks.*']),
22 |     classifiers=[
23 |         "Programming Language :: Python :: 3.7",
24 |     ],
25 |     keywords='deepks DeePKS-kit',
26 |     install_requires=install_requires,
27 |     python_requires=">=3.7",
28 |     entry_points={
29 |         'console_scripts': [
30 |             'deepks=deepks.main:main_cli',
31 |             'dks=deepks.main:main_cli',
32 |         ],
33 |     },
34 | )


--------------------------------------------------------------------------------