├── .git_archival.txt ├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ └── mirror_gitee.yml ├── .gitignore ├── LICENSE ├── README.md ├── deepks ├── __init__.py ├── __main__.py ├── iterate │ ├── __init__.py │ ├── __main__.py │ ├── iterate.py │ └── template.py ├── main.py ├── model │ ├── __init__.py │ ├── __main__.py │ ├── model.py │ ├── reader.py │ ├── test.py │ └── train.py ├── scf │ ├── __init__.py │ ├── __main__.py │ ├── _old_grad.py │ ├── addons.py │ ├── fields.py │ ├── grad.py │ ├── penalty.py │ ├── run.py │ ├── scf.py │ └── stats.py ├── task │ ├── __init__.py │ ├── job │ │ ├── __init__.py │ │ ├── batch.py │ │ ├── dispatcher.py │ │ ├── job_status.py │ │ ├── lazy_local_context.py │ │ ├── local_context.py │ │ ├── shell.py │ │ ├── slurm.py │ │ └── ssh_context.py │ ├── task.py │ └── workflow.py ├── tools │ ├── __init__.py │ ├── geom_optim.py │ └── num_hessian.py └── utils.py ├── examples ├── iterate │ ├── combined.yaml │ └── splitted │ │ ├── args.yaml │ │ └── share │ │ ├── init_scf.yaml │ │ ├── init_train.yaml │ │ ├── scf_input.yaml │ │ ├── systems_test.raw │ │ ├── systems_train.raw │ │ └── train_input.yaml ├── legacy │ ├── iter_linear │ │ └── run.py │ ├── iter_nn_local │ │ ├── run.py │ │ ├── run_res.py │ │ └── share │ │ │ ├── e_ref.npy │ │ │ ├── init │ │ │ ├── model.pth │ │ │ ├── test_paths.raw │ │ │ └── train_paths.raw │ │ │ ├── mol_files.raw │ │ │ ├── scf_input.yaml │ │ │ └── train_input.yaml │ ├── iter_nn_new │ │ ├── extra.py │ │ ├── init_train │ │ │ ├── input.yaml │ │ │ ├── log.train │ │ │ └── model.pth │ │ ├── run.py │ │ └── share │ │ │ ├── e_ref.npy │ │ │ ├── f_ref.npy │ │ │ ├── init │ │ │ └── model.pth │ │ │ ├── mol_files.raw │ │ │ ├── raw_scf_input.yaml │ │ │ ├── scf_input.yaml │ │ │ └── train_input.yaml │ ├── iter_nn_slurm │ │ ├── run.py │ │ ├── run_res.py │ │ └── share │ │ │ ├── e_ref.npy │ │ │ ├── index.raw │ │ │ ├── init │ │ │ ├── test_paths.raw │ │ │ └── train_paths.raw │ │ │ ├── input.yaml │ │ │ ├── mol_files.raw │ │ │ └── test.sh │ └── train_active_learning │ │ ├── run.py │ │ └── share │ │ ├── init │ │ ├── new_test_paths.raw │ │ └── new_train_paths.raw │ │ ├── input.yaml │ │ └── test_model.sh ├── train_input │ ├── extended.yaml │ ├── force.yaml │ ├── gelu.yaml │ └── restart.yaml ├── water_cluster │ ├── .gitignore │ ├── README.md │ ├── args.yaml │ ├── run.sh │ ├── run_shell.sh │ ├── shell.yaml │ ├── systems │ │ ├── test.n6 │ │ │ ├── atom.npy │ │ │ ├── energy.npy │ │ │ ├── force.npy │ │ │ └── unit.raw │ │ ├── train.n1 │ │ │ ├── atom.npy │ │ │ ├── energy.npy │ │ │ ├── force.npy │ │ │ └── unit.raw │ │ ├── train.n2 │ │ │ ├── coord.npy │ │ │ ├── energy.npy │ │ │ ├── force.npy │ │ │ ├── type.raw │ │ │ └── unit.raw │ │ ├── train.n3 │ │ │ ├── coord.npy │ │ │ ├── energy.npy │ │ │ ├── force.npy │ │ │ ├── type.raw │ │ │ └── unit.raw │ │ └── valid.n4 │ │ │ ├── coord.npy │ │ │ ├── energy.npy │ │ │ ├── force.npy │ │ │ ├── type.raw │ │ │ └── unit.raw │ └── test.sh └── water_single │ ├── .gitignore │ ├── README.md │ ├── init │ ├── machines.yaml │ ├── params.yaml │ ├── run.sh │ └── systems.yaml │ ├── iter │ ├── args.yaml │ └── run.sh │ ├── systems │ ├── group.00 │ │ ├── atom.npy │ │ ├── dm.npy │ │ ├── energy.npy │ │ └── force.npy │ ├── group.01 │ │ ├── atom.npy │ │ ├── dm.npy │ │ ├── energy.npy │ │ └── force.npy │ ├── group.02 │ │ ├── atom.npy │ │ ├── dm.npy │ │ ├── energy.npy │ │ └── force.npy │ └── group.03 │ │ ├── atom.npy │ │ ├── dm.npy │ │ ├── energy.npy │ │ └── force.npy │ └── withdens │ ├── base.yaml │ ├── penalty.yaml │ ├── pipe.sh │ ├── relax.yaml │ └── run.sh ├── requirements.txt ├── scripts ├── convert_xyz.py ├── legacy │ ├── calc_eig.py │ ├── proj_dm.py │ ├── rhf.py │ ├── rks.py │ └── rmp2.py └── solve_mol.py └── setup.py /.git_archival.txt: -------------------------------------------------------------------------------- 1 | node: 4f133fb60e00bc5e413e80e32214defb7a145415 2 | node-date: 2025-04-29T05:22:58+08:00 3 | describe-name: v0.1-84-g4f133fb 4 | ref-names: HEAD -> master 5 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | .git_archival.txt export-subst 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | -------------------------------------------------------------------------------- /.github/workflows/mirror_gitee.yml: -------------------------------------------------------------------------------- 1 | name: Mirror to Gitee Repo 2 | 3 | on: [ push, delete, create ] 4 | 5 | # Ensures that only one mirror task will run at a time. 6 | concurrency: 7 | group: git-mirror 8 | 9 | jobs: 10 | git-mirror: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: wearerequired/git-mirror-action@v1 14 | env: 15 | ORGANIZATION: deepmodeling 16 | SSH_PRIVATE_KEY: ${{ secrets.SYNC_GITEE_PRIVATE_KEY }} 17 | with: 18 | source-repo: "https://github.com/deepmodeling/deepks-kit.git" 19 | destination-repo: "git@gitee.com:deepmodeling/deepks-kit.git" 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # User defined 2 | *~ 3 | checkpoint 4 | model.ckpt.* 5 | .vscode 6 | .ipynb_* 7 | *.swp 8 | 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | share/python-wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | MANIFEST 36 | _version.py 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Environments 49 | .env 50 | .venv 51 | env/ 52 | venv/ 53 | ENV/ 54 | env.bak/ 55 | venv.bak/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeePKS-kit 2 | 3 | DeePKS-kit is a program to generate accurate energy functionals for quantum chemistry systems, 4 | for both perturbative scheme (DeePHF) and self-consistent scheme (DeePKS). 5 | 6 | The program provides a command line interface `deepks` that contains five sub-commands, 7 | - `train`: train an neural network based post-HF energy functional model 8 | - `test`: test the post-HF model with given data and show statistics 9 | - `scf`: run self-consistent field calculation with given energy model 10 | - `stats`: collect and print statistics of the SCF the results 11 | - `iterate`: iteratively train an self-consistent model by combining four commands above 12 | 13 | ## Installation 14 | 15 | DeePKS-kit is a pure python library so it can be installed following the standard `git clone` then `pip install` procedure. Note that the two main requirements `pytorch` and `pyscf` will not be installed automatically so you will need to install them manually in advance. Below is a more detailed instruction that includes installing the required libraries in the environment. 16 | 17 | We use `conda` here as an example. So first you may need to install [Anaconda](https://docs.anaconda.com/anaconda/install/) or [Miniconda](https://docs.conda.io/en/latest/miniconda.html). 18 | 19 | To reduce the possibility of library conflicts, we suggest create a new environment (named `deepks`) with basic dependencies installed (optional): 20 | ```bash 21 | conda create -n deepks numpy scipy h5py ruamel.yaml paramiko 22 | conda activate deepks 23 | ``` 24 | Now you are in the new environment called `deepks`. 25 | Next, install [PyTorch](https://pytorch.org/get-started/locally/) 26 | ```bash 27 | # assuming a GPU with cudatoolkit 10.2 support 28 | conda install pytorch cudatoolkit=10.2 -c pytorch 29 | ``` 30 | and [PySCF](https://github.com/pyscf/pyscf). 31 | ```bash 32 | # the conda package does not support python >= 3.8 so we use pip 33 | pip install pyscf 34 | ``` 35 | 36 | Once the environment has been setup properly, using pip to install DeePKS-kit: 37 | ```bash 38 | pip install git+https://github.com/deepmodeling/deepks-kit/ 39 | ``` 40 | 41 | ## Usage 42 | 43 | An relatively detailed decrisption of the `deepks-kit` library can be found in [here](https://arxiv.org/pdf/2012.14615.pdf). Please also refer to the reference for the description of methods. 44 | 45 | Please see [`examples`](./examples) folder for the usage of `deepks-kit` library. A detailed example with executable data for single water molecules can be found [here](./examples/water_single). A more complicated one for training water clusters can be found [here](./examples/water_cluster). 46 | 47 | Check [this input file](./examples/water_cluster/args.yaml) for detailed explanation for possible input parameters, and also [this one](./examples/water_cluster/shell.yaml) if you would like to run on local machine instead of using Slurm scheduler. 48 | 49 | ## References 50 | 51 | [1] Chen, Y., Zhang, L., Wang, H. and E, W., 2020. Ground State Energy Functional with Hartree–Fock Efficiency and Chemical Accuracy. The Journal of Physical Chemistry A, 124(35), pp.7155-7165. 52 | 53 | [2] Chen, Y., Zhang, L., Wang, H. and E, W., 2021. DeePKS: A Comprehensive Data-Driven Approach toward Chemically Accurate Density Functional Theory. Journal of Chemical Theory and Computation, 17(1), pp.170–181. 54 | 55 | 56 | 62 | 63 | -------------------------------------------------------------------------------- /deepks/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "Yixiao Chen" 2 | 3 | try: 4 | from ._version import version as __version__ 5 | except ImportError: 6 | __version__ = 'unkown' 7 | 8 | __all__ = [ 9 | "iterate", 10 | "model", 11 | "scf", 12 | "task", 13 | # "tools" # collection of command line scripts, should not be imported by user 14 | ] 15 | 16 | def __getattr__(name): 17 | from importlib import import_module 18 | if name in __all__: 19 | return import_module("." + name, __name__) 20 | raise AttributeError(f"module {__name__!r} has no attribute {name!r}") 21 | -------------------------------------------------------------------------------- /deepks/__main__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | try: 4 | import deepks 5 | except ImportError as e: 6 | sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../") 7 | 8 | from deepks.main import main_cli 9 | 10 | if __name__ == "__main__": 11 | main_cli() -------------------------------------------------------------------------------- /deepks/iterate/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "iterate", 3 | "template", 4 | ] 5 | 6 | from .iterate import make_scf, make_train, make_iterate -------------------------------------------------------------------------------- /deepks/iterate/__main__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | try: 4 | import deepks 5 | except ImportError as e: 6 | sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../") 7 | 8 | from deepks.main import iter_cli 9 | 10 | if __name__ == "__main__": 11 | iter_cli() -------------------------------------------------------------------------------- /deepks/model/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "model", 3 | "reader", 4 | "train", 5 | "test", 6 | ] 7 | 8 | def __getattr__(name): 9 | from importlib import import_module 10 | if name == "CorrNet": 11 | from .model import CorrNet 12 | return CorrNet 13 | if name in __all__: 14 | return import_module("." + name, __name__) 15 | raise AttributeError(f"module {__name__!r} has no attribute {name!r}") 16 | -------------------------------------------------------------------------------- /deepks/model/__main__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | try: 4 | import deepks 5 | except ImportError as e: 6 | sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../") 7 | 8 | from deepks.main import train_cli 9 | 10 | if __name__ == "__main__": 11 | train_cli() -------------------------------------------------------------------------------- /deepks/model/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | try: 6 | import deepks 7 | except ImportError as e: 8 | import sys 9 | sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../../") 10 | from deepks.model.model import CorrNet 11 | from deepks.model.reader import GroupReader 12 | from deepks.utils import load_yaml, load_dirs, check_list 13 | 14 | 15 | DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 16 | 17 | 18 | def test(model, g_reader, dump_prefix="test", group=False): 19 | model.eval() 20 | loss_fn=nn.MSELoss() 21 | label_list = [] 22 | pred_list = [] 23 | 24 | for i in range(g_reader.nsystems): 25 | sample = g_reader.sample_all(i) 26 | nframes = sample["lb_e"].shape[0] 27 | sample = {k: v.to(DEVICE, non_blocking=True) for k, v in sample.items()} 28 | label, data = sample["lb_e"], sample["eig"] 29 | pred = model(data) 30 | error = torch.sqrt(loss_fn(pred, label)) 31 | 32 | error_np = error.item() 33 | label_np = label.cpu().numpy().reshape(nframes, -1).sum(axis=1) 34 | pred_np = pred.detach().cpu().numpy().reshape(nframes, -1).sum(axis=1) 35 | error_l1 = np.mean(np.abs(label_np - pred_np)) 36 | label_list.append(label_np) 37 | pred_list.append(pred_np) 38 | 39 | if not group and dump_prefix is not None: 40 | nd = max(len(str(g_reader.nsystems)), 2) 41 | dump_res = np.stack([label_np, pred_np], axis=1) 42 | header = f"{g_reader.path_list[i]}\nmean l1 error: {error_l1}\nmean l2 error: {error_np}\nreal_ene pred_ene" 43 | filename = f"{dump_prefix}.{i:0{nd}}.out" 44 | np.savetxt(filename, dump_res, header=header) 45 | # print(f"system {i} finished") 46 | 47 | all_label = np.concatenate(label_list, axis=0) 48 | all_pred = np.concatenate(pred_list, axis=0) 49 | all_err_l1 = np.mean(np.abs(all_label - all_pred)) 50 | all_err_l2 = np.sqrt(np.mean((all_label - all_pred) ** 2)) 51 | info = f"all systems mean l1 error: {all_err_l1}\nall systems mean l2 error: {all_err_l2}" 52 | print(info) 53 | if dump_prefix is not None and group: 54 | np.savetxt(f"{dump_prefix}.out", np.stack([all_label, all_pred], axis=1), 55 | header=info + "\nreal_ene pred_ene") 56 | return all_err_l1, all_err_l2 57 | 58 | 59 | def main(data_paths, model_file="model.pth", 60 | output_prefix='test', group=False, 61 | e_name='l_e_delta', d_name=['dm_eig']): 62 | data_paths = load_dirs(data_paths) 63 | if len(d_name) == 1: 64 | d_name = d_name[0] 65 | g_reader = GroupReader(data_paths, e_name=e_name, d_name=d_name, 66 | conv_filter=False, extra_label=True) 67 | model_file = check_list(model_file) 68 | for f in model_file: 69 | print(f) 70 | p = os.path.dirname(f) 71 | model = CorrNet.load(f).double().to(DEVICE) 72 | dump = os.path.join(p, output_prefix) 73 | dir_name = os.path.dirname(dump) 74 | if dir_name: 75 | os.makedirs(dir_name, exist_ok=True) 76 | if model.elem_table is not None: 77 | elist, econst = model.elem_table 78 | g_reader.collect_elems(elist) 79 | g_reader.subtract_elem_const(econst) 80 | test(model, g_reader, dump_prefix=dump, group=group) 81 | g_reader.revert_elem_const() 82 | 83 | 84 | if __name__ == "__main__": 85 | from deepks.main import test_cli as cli 86 | cli() 87 | -------------------------------------------------------------------------------- /deepks/scf/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "scf", 3 | "grad", 4 | "run", 5 | "stats", 6 | "fields", 7 | "penalty", 8 | ] 9 | 10 | def __getattr__(name): 11 | from importlib import import_module 12 | if name in __all__: 13 | return import_module("." + name, __name__) 14 | raise AttributeError(f"module {__name__!r} has no attribute {name!r}") 15 | 16 | 17 | def DSCF(mol, model, xc="HF", **kwargs): 18 | """A wrap function to create NN SCF object (RDSCF or UDSCF)""" 19 | from .scf import RDSCF, UDSCF 20 | if mol.spin == 0: 21 | return RDSCF(mol, model, xc, **kwargs) 22 | else: 23 | return UDSCF(mol, model, xc, **kwargs) 24 | 25 | DeepSCF = DSCF -------------------------------------------------------------------------------- /deepks/scf/__main__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | try: 4 | import deepks 5 | except ImportError as e: 6 | sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../") 7 | 8 | from deepks.main import scf_cli 9 | 10 | if __name__ == "__main__": 11 | scf_cli() -------------------------------------------------------------------------------- /deepks/scf/addons.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch 3 | import numpy as np 4 | from torch import nn 5 | from pyscf import lib 6 | from pyscf.lib import logger 7 | from pyscf import gto 8 | from pyscf import scf, dft 9 | from deepks.scf.scf import t_make_eig, t_make_grad_eig_dm 10 | 11 | 12 | def t_ele_grad(bfock, c_vir, c_occ, n_occ): 13 | g = torch.einsum("pa,qi,...pq->...ai", c_vir, c_occ*n_occ, bfock) 14 | return g.flatten(-2) 15 | 16 | 17 | def make_grad_eig_egrad(dscf, mo_coeff=None, mo_occ=None, gfock=None): 18 | if mo_occ is None: 19 | mo_occ = dscf.mo_occ 20 | if mo_coeff is None: 21 | mo_coeff = dscf.mo_coeff 22 | if gfock is None: 23 | dm = dscf.make_rdm1(mo_coeff, mo_occ) 24 | if dm.ndim >= 3 and isinstance(dscf, scf.uhf.UHF): 25 | dm = dm.sum(0) 26 | gfock = t_make_grad_eig_dm(torch.from_numpy(dm), dscf._t_ovlp_shells).numpy() 27 | if mo_coeff.ndim >= 3 and mo_occ.ndim >= 2: 28 | return np.concatenate([make_grad_eig_egrad(dscf, mc, mo, gfock) 29 | for mc, mo in zip(mo_coeff, mo_occ)], axis=-1) 30 | iocc = mo_occ>0 31 | t_no = torch.from_numpy(mo_occ[iocc]).to(dscf.device) 32 | t_co = torch.from_numpy(mo_coeff[:, iocc]).to(dscf.device) 33 | t_cv = torch.from_numpy(mo_coeff[:, ~iocc]).to(dscf.device) 34 | t_gfock = torch.from_numpy(gfock).to(dscf.device) 35 | return t_ele_grad(t_gfock, t_cv, t_co, t_no).cpu().numpy() 36 | 37 | 38 | def gen_coul_loss(dscf, fock=None, ovlp=None, mo_occ=None): 39 | nao = dscf.mol.nao 40 | fock = (fock if fock is not None else dscf.get_fock()).reshape(-1, nao, nao) 41 | s1e = ovlp if ovlp is not None else dscf.get_ovlp() 42 | mo_occ = (mo_occ if mo_occ is not None else dscf.mo_occ).reshape(-1, nao) 43 | def _coul_loss_grad(v, target_dm): 44 | # return coulomb loss and its grad with respect to fock matrix 45 | # only support single dm, do not use directly for UHF 46 | a_loss = 0. 47 | a_grad = 0. 48 | target_dm = target_dm.reshape(fock.shape) 49 | for tdm, f1e, nocc in zip(target_dm, fock, mo_occ): 50 | iocc = nocc>0 51 | moe, moc = dscf._eigh(f1e+v, s1e) 52 | eo, ev = moe[iocc], moe[~iocc] 53 | co, cv = moc[:, iocc], moc[:, ~iocc] 54 | dm = (co * nocc[iocc]) @ co.T 55 | # calc loss 56 | ddm = dm - tdm 57 | dvj = dscf.get_j(dm=ddm) 58 | loss = 0.5 * np.einsum("ij,ji", ddm, dvj) 59 | a_loss += loss 60 | # calc grad with respect to fock matrix 61 | ie_mn = 1. / (-ev.reshape(-1, 1) + eo) 62 | temp_mn = cv.T @ dvj @ co * nocc[iocc] * ie_mn 63 | dldv = cv @ temp_mn @ co.T 64 | dldv = dldv + dldv.T 65 | a_grad += dldv 66 | return a_loss, a_grad 67 | return _coul_loss_grad 68 | 69 | 70 | def make_grad_coul_veig(dscf, target_dm): 71 | clfn = gen_coul_loss(dscf) 72 | dm = dscf.make_rdm1() 73 | if dm.ndim == 3 and isinstance(dscf, scf.uhf.UHF): 74 | dm = dm.sum(0) 75 | t_dm = torch.from_numpy(dm).requires_grad_() 76 | t_eig = t_make_eig(t_dm, dscf._t_ovlp_shells).requires_grad_() 77 | loss, dldv = clfn(np.zeros_like(dm), target_dm) 78 | t_veig = torch.zeros_like(t_eig).requires_grad_() 79 | [t_vc] = torch.autograd.grad(t_eig, t_dm, t_veig, create_graph=True) 80 | [t_ghead] = torch.autograd.grad(t_vc, t_veig, torch.from_numpy(dldv)) 81 | return t_ghead.detach().cpu().numpy() 82 | 83 | 84 | def calc_optim_veig(dscf, target_dm, 85 | target_dec=None, gvx=None, 86 | nstep=1, force_factor=1., **optim_args): 87 | clfn = gen_coul_loss(dscf, fock=dscf.get_fock(vhf=dscf.get_veff0())) 88 | dm = dscf.make_rdm1() 89 | if dm.ndim == 3 and isinstance(dscf, scf.uhf.UHF): 90 | dm = dm.sum(0) 91 | t_dm = torch.from_numpy(dm).requires_grad_() 92 | t_eig = t_make_eig(t_dm, dscf._t_ovlp_shells).requires_grad_() 93 | t_ec = dscf.net(t_eig.to(dscf.device)) 94 | t_veig = torch.autograd.grad(t_ec, t_eig)[0].requires_grad_() 95 | t_lde = torch.from_numpy(target_dec) if target_dec is not None else None 96 | t_gvx = torch.from_numpy(gvx) if gvx is not None else None 97 | # build closure 98 | def closure(): 99 | [t_vc] = torch.autograd.grad( 100 | t_eig, t_dm, t_veig, retain_graph=True, create_graph=True) 101 | loss, dldv = clfn(t_vc.detach().numpy(), target_dm) 102 | grad = torch.autograd.grad( 103 | t_vc, t_veig, torch.from_numpy(dldv), only_inputs=True)[0] 104 | # build closure for force loss 105 | if t_lde is not None and t_gvx is not None: 106 | t_pde = torch.tensordot(t_gvx, t_veig) 107 | lossde = force_factor * torch.sum((t_pde - t_lde)**2) 108 | grad = grad + torch.autograd.grad(lossde, t_veig, only_inputs=True)[0] 109 | loss = loss + lossde 110 | t_veig.grad = grad 111 | return loss 112 | # do the optimization 113 | optim = torch.optim.LBFGS([t_veig], **optim_args) 114 | tic = (time.process_time(), time.perf_counter()) 115 | for _ in range(nstep): 116 | optim.step(closure) 117 | tic = logger.timer(dscf, 'LBFGS step', *tic) 118 | logger.note(dscf, f"optimized loss for veig = {closure()}") 119 | return t_veig.detach().numpy() 120 | 121 | 122 | def gcalc_optim_veig(gdscf, target_dm, target_grad, 123 | nstep=1, force_factor=1., **optim_args): 124 | target_dec = target_grad - gdscf.de0 125 | gvx = gdscf.make_grad_eig_x() 126 | return calc_optim_veig( 127 | gdscf.base, 128 | target_dm=target_dm, 129 | target_dec=target_dec, gvx=gvx, 130 | nstep=nstep, force_factor=force_factor, **optim_args) 131 | -------------------------------------------------------------------------------- /deepks/scf/fields.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import List, Callable 3 | from dataclasses import dataclass, field 4 | 5 | # Field = namedtuple("Field", ["name", "alias", "calc", "shape"]) 6 | # LabelField = namedtuple("LabelField", ["name", "alias", "calc", "shape", "required_labels"]) 7 | @dataclass 8 | class Field: 9 | name: str 10 | alias: List[str] 11 | calc: Callable 12 | shape: str 13 | required_labels: List[str] = field(default_factory=list) 14 | 15 | 16 | def select_fields(names): 17 | names = [n.lower() for n in names] 18 | scfs = [fd for fd in SCF_FIELDS 19 | if fd.name in names 20 | or any(al in names for al in fd.alias)] 21 | grads = [fd for fd in GRAD_FIELDS 22 | if fd.name in names 23 | or any(al in names for al in fd.alias)] 24 | return {"scf": scfs, "grad": grads} 25 | 26 | 27 | BOHR = 0.52917721092 28 | 29 | def isinbohr(mol): 30 | return mol.unit.upper().startswith(("B", "AU")) 31 | 32 | def _Lunit(mol): 33 | return (1. if isinbohr(mol) else BOHR) 34 | 35 | def atom_data(mol): 36 | raw_data = np.concatenate( 37 | [mol.atom_charges().reshape(-1,1), mol.atom_coords(unit='Bohr')], 38 | axis=1) 39 | non_ghost = [ii for ii in range(mol.natm) 40 | if not mol.elements[ii].startswith("X")] 41 | return raw_data[non_ghost] 42 | 43 | 44 | SCF_FIELDS = [ 45 | Field("atom", 46 | ["atoms", "mol", "molecule"], 47 | lambda mf: atom_data(mf.mol), 48 | "(nframe, natom, 4)"), 49 | Field("e_base", 50 | ["ebase", "ene_base", "e0", 51 | "e_hf", "ehf", "ene_hf", 52 | "e_ks", "eks", "ene_ks"], 53 | lambda mf: mf.energy_tot0(), 54 | "(nframe, 1)"), 55 | Field("e_tot", 56 | ["e_cf", "ecf", "ene_cf", "etot", "ene", "energy", "e"], 57 | lambda mf: mf.e_tot, 58 | "(nframe, 1)"), 59 | Field("rdm", 60 | ["dm"], 61 | lambda mf: mf.make_rdm1(), 62 | "(nframe, nao, nao)"), 63 | Field("proj_dm", 64 | ["pdm"], 65 | lambda mf: mf.make_pdm(flatten=True), 66 | "(nframe, natom, -1)"), 67 | Field("dm_eig", 68 | ["eig"], 69 | lambda mf: mf.make_eig(), 70 | "(nframe, natom, nproj)"), 71 | Field("hcore_eig", 72 | ["heig"], 73 | lambda mf: mf.make_eig(mf.get_hcore()), 74 | "(nframe, natom, nproj)"), 75 | Field("ovlp_eig", 76 | ["oeig"], 77 | lambda mf: mf.make_eig(mf.get_ovlp()), 78 | "(nframe, natom, nproj)"), 79 | Field("veff_eig", 80 | ["veig"], 81 | lambda mf: mf.make_eig(mf.get_veff()), 82 | "(nframe, natom, nproj)"), 83 | Field("fock_eig", 84 | ["feig"], 85 | lambda mf: mf.make_eig(mf.get_fock()), 86 | "(nframe, natom, nproj)"), 87 | Field("conv", 88 | ["converged", "convergence"], 89 | lambda mf: mf.converged, 90 | "(nframe, 1)"), 91 | Field("mo_coef_occ", # do not support UHF 92 | ["mo_coeff_occ, orbital_coeff_occ"], 93 | lambda mf: mf.mo_coeff[:,mf.mo_occ>0].T, 94 | "(nframe, -1, nao)"), 95 | Field("mo_ene_occ", # do not support UHF 96 | ["mo_energy_occ, orbital_ene_occ"], 97 | lambda mf: mf.mo_energy[mf.mo_occ>0], 98 | "(nframe, -1)"), 99 | # below are fields that requires labels 100 | Field("l_e_ref", 101 | ["e_ref", "lbl_e_ref", "label_e_ref", "le_ref"], 102 | lambda mf, **lbl: lbl["energy"], 103 | "(nframe, 1)", 104 | ["energy"]), 105 | Field("l_e_delta", 106 | ["le_delta", "lbl_e_delta", "label_e_delta", "lbl_ed"], 107 | lambda mf, **lbl: lbl["energy"] - mf.energy_tot0(), 108 | "(nframe, 1)", 109 | ["energy"]), 110 | Field("err_e", 111 | ["e_err", "err_e_tot", "err_e_cf"], 112 | lambda mf, **lbl: lbl["energy"] - mf.e_tot, 113 | "(nframe, 1)", 114 | ["energy"]), 115 | ] 116 | 117 | GRAD_FIELDS = [ 118 | Field("f_base", 119 | ["fbase", "force_base", "f0", 120 | "f_hf", "fhf", "force_hf", 121 | "f_ks", "fks", "force_ks"], 122 | lambda grad: - grad.get_base() / _Lunit(grad.mol), 123 | "(nframe, natom_raw, 3)"), 124 | Field("f_tot", 125 | ["f_cf", "fcf", "force_cf", "ftot", "force", "f"], 126 | lambda grad: - grad.de / _Lunit(grad.mol), 127 | "(nframe, natom_raw, 3)"), 128 | Field("grad_dmx", 129 | ["grad_dm_x", "grad_pdm_x"], 130 | lambda grad: grad.make_grad_pdm_x(flatten=True) / _Lunit(grad.mol), 131 | "(nframe, natom_raw, 3, natom, -1)"), 132 | Field("grad_vx", 133 | ["grad_eig_x", "geigx", "gvx"], 134 | lambda grad: grad.make_grad_eig_x() / _Lunit(grad.mol), 135 | "(nframe, natom_raw, 3, natom, nproj)"), 136 | # below are fields that requires labels 137 | Field("l_f_ref", 138 | ["f_ref", "lbl_f_ref", "label_f_ref", "lf_ref"], 139 | lambda grad, **lbl: lbl["force"], 140 | "(nframe, natom_raw, 3)", 141 | ["force"]), 142 | Field("l_f_delta", 143 | ["lf_delta", "lbl_f_delta", "label_f_delta", "lbl_fd"], 144 | lambda grad, **lbl: lbl["force"] - (-grad.get_base() / _Lunit(grad.mol)), 145 | "(nframe, natom_raw, 3)", 146 | ["force"]), 147 | Field("err_f", 148 | ["f_err", "err_f_tot", "err_f_cf"], 149 | lambda grad, **lbl: lbl["force"] - (-grad.de / _Lunit(grad.mol)), 150 | "(nframe, natom_raw, 3)", 151 | ["force"]), 152 | ] 153 | 154 | 155 | # below are additional methods from addons 156 | from deepks.scf import addons 157 | 158 | SCF_FIELDS.extend([ 159 | # the following two are used for regularizing the potential 160 | Field("grad_veg", 161 | ["grad_eig_egrad", "jac_eig_egrad"], 162 | lambda mf: addons.make_grad_eig_egrad(mf), 163 | "(nframe, natom, nproj, -1)"), 164 | Field("eg_base", 165 | ["ele_grad_base", "egrad0", "egrad_base"], 166 | lambda mf: mf.get_grad0(), 167 | "(nframe, -1)"), 168 | # the following one is used for coulomb loss optimization 169 | Field("grad_ldv", 170 | ["grad_coul_dv", "grad_coul_deig", "coulomb_grad"], 171 | lambda mf, **lbl: addons.make_grad_coul_veig(mf, target_dm=lbl["dm"]), 172 | "(nframe, natom, nproj)", 173 | ["dm"]), 174 | Field("l_veig_raw", 175 | ["optim_veig_raw", "l_opt_v_raw", "l_optim_veig_raw"], 176 | lambda mf, **lbl: addons.calc_optim_veig(mf, lbl["dm"], nstep=1), 177 | "(nframe, natom, nproj)", 178 | ["dm"]), 179 | ]) 180 | 181 | GRAD_FIELDS.extend([ 182 | # the following one is used for coulomb loss optimization from grad class 183 | Field("l_veig", 184 | ["optim_veig", "l_opt_v", "l_optim_veig"], 185 | lambda grad, **lbl: addons.gcalc_optim_veig( 186 | grad, lbl["dm"], -_Lunit(grad.mol)*lbl["force"], nstep=1), 187 | "(nframe, natom, nproj)", 188 | ["dm", "force"]), 189 | Field("l_veig_nof", 190 | ["optim_veig_nof", "l_opt_v_nof", "l_optim_veig_nof"], 191 | lambda grad, **lbl: addons.gcalc_optim_veig( 192 | grad, lbl["dm"], grad.de, nstep=1), 193 | "(nframe, natom, nproj)", 194 | ["dm"]), 195 | ]) -------------------------------------------------------------------------------- /deepks/scf/penalty.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | from pyscf.dft import numint, gen_grid 4 | from pyscf.lib import logger 5 | from deepks.utils import check_list 6 | 7 | 8 | def select_penalty(name): 9 | name = name.lower() 10 | if name == "density": 11 | return DensityPenalty 12 | if name == "coulomb": 13 | return CoulombPenalty 14 | raise ValueError(f"unknown penalty type: {name}") 15 | 16 | 17 | class PenaltyMixin(object): 18 | """Mixin class to add penalty potential in Fock matrix""" 19 | 20 | def __init__(self, penalties=None): 21 | self.penalties = check_list(penalties) 22 | for pnt in self.penalties: 23 | pnt.init_hook(self) 24 | 25 | def get_fock(self, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, 26 | diis=None, diis_start_cycle=None, 27 | level_shift_factor=None, damp_factor=None, **kwargs): 28 | """modified get_fock method to apply penalty terms onto vhf""" 29 | if dm is None: 30 | dm = self.make_rdm1() 31 | if h1e is None: 32 | h1e = self.get_hcore() 33 | if vhf is None: 34 | vhf = self.get_veff(dm=dm) 35 | vp = sum(pnt.fock_hook(self, dm=dm, h1e=h1e, vhf=vhf, cycle=cycle) 36 | for pnt in self.penalties) 37 | vhf = vhf + vp 38 | return super().get_fock( 39 | h1e=h1e, s1e=s1e, vhf=vhf, dm=dm, cycle=cycle, 40 | diis=diis, diis_start_cycle=diis_start_cycle, 41 | level_shift_factor=level_shift_factor, damp_factor=damp_factor, **kwargs) 42 | 43 | 44 | class AbstructPenalty(object): 45 | """ 46 | Abstruct class for penalty term in scf hamiltonian. 47 | To implement a penalty one needs to implement 48 | fock_hook and (optional) init_hook methods. 49 | """ 50 | required_labels = [] # the label would be load and pass to __init__ 51 | 52 | def init_hook(self, mf, **envs): 53 | """ 54 | Method to be called when initialize the scf object. 55 | Used to initialize the penalty with molecule info. 56 | """ 57 | pass 58 | 59 | def fock_hook(self, mf, dm=None, h1e=None, vhf=None, cycle=-1, **envs): 60 | """ 61 | Method to be called before get_fock is called. 62 | The returned matrix would be added to the vhf matrix 63 | """ 64 | raise NotImplementedError("fock_hook method is not implemented") 65 | 66 | 67 | class DummyPenalty(AbstructPenalty): 68 | def fock_hook(self, mf, dm=None, h1e=None, vhf=None, cycle=-1, **envs): 69 | return 0 70 | 71 | 72 | class DensityPenalty(AbstructPenalty): 73 | r""" 74 | penalty on the difference w.r.t target density 75 | E_p = \lambda / 2 * \int dx (\rho(x) - \rho_target(x))^2 76 | V_p = \lambda * \int dx (\rho(x) - \rho_target(x)) 77 | The target density should be given as density matrix in ao basis 78 | """ 79 | required_labels = ["dm"] 80 | 81 | def __init__(self, target_dm, strength=1, random=False, start_cycle=0): 82 | if isinstance(target_dm, str): 83 | target_dm = np.load(target_dm) 84 | self.dm_t = target_dm 85 | self.init_strength = strength 86 | self.strength = strength * np.random.rand() if random else strength 87 | self.start_cycle = start_cycle 88 | # below are values to be initialized later in init_hook 89 | self.grids = None 90 | self.ao_value = None 91 | 92 | def init_hook(self, mf, **envs): 93 | if hasattr(mf, "grid"): 94 | self.grids = mf.grids 95 | else: 96 | self.grids = gen_grid.Grids(mf.mol) 97 | 98 | def fock_hook(self, mf, dm=None, h1e=None, vhf=None, cycle=-1, **envs): 99 | # cycle > 0 means it is doing scf iteration 100 | if 0 <= cycle < self.start_cycle: 101 | return 0 102 | if self.grids.coords is None: 103 | self.grids.build() 104 | if self.ao_value is None: 105 | self.ao_value = numint.eval_ao(mf.mol, self.grids.coords, deriv=0) 106 | tic = (time.process_time(), time.perf_counter()) 107 | rho_diff = numint.eval_rho(mf.mol, self.ao_value, dm - self.dm_t) 108 | v_p = numint.eval_mat(mf.mol, self.ao_value, self.grids.weights, rho_diff, rho_diff) 109 | # cycle < 0 means it is just checking, we only print here 110 | if cycle < 0 and mf.verbose >=4: 111 | diff_norm = np.sum(np.abs(rho_diff)*self.grids.weights) 112 | logger.info(mf, f" Density Penalty: |diff| = {diff_norm}") 113 | logger.timer(mf, "dens_pnt", *tic) 114 | return self.strength * v_p 115 | 116 | 117 | class CoulombPenalty(AbstructPenalty): 118 | r""" 119 | penalty given by the coulomb energy of density difference 120 | 121 | """ 122 | required_labels = ["dm"] 123 | 124 | def __init__(self, target_dm, strength=1, random=False, start_cycle=0): 125 | if isinstance(target_dm, str): 126 | target_dm = np.load(target_dm) 127 | self.dm_t = target_dm 128 | self.init_strength = strength 129 | self.strength = strength * np.random.rand() if random else strength 130 | self.start_cycle = start_cycle 131 | 132 | def fock_hook(self, mf, dm=None, h1e=None, vhf=None, cycle=-1, **envs): 133 | # cycle > 0 means it is doing scf iteration 134 | if 0 <= cycle < self.start_cycle: 135 | return 0 136 | tic = (time.process_time(), time.perf_counter()) 137 | ddm = dm - self.dm_t 138 | v_p = mf.get_j(dm=ddm) 139 | # cycle < 0 means it is just checking, we only print here 140 | if cycle < 0 and mf.verbose >=4: 141 | diff_norm = np.sum(ddm * v_p) 142 | logger.info(mf, f" Coulomb Penalty: |diff| = {diff_norm}") 143 | logger.timer(mf, "coul_pnt", *tic) 144 | return self.strength * v_p 145 | -------------------------------------------------------------------------------- /deepks/task/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "task", 3 | "workflow", 4 | "job" 5 | ] 6 | 7 | from .task import * 8 | from .workflow import * -------------------------------------------------------------------------------- /deepks/task/job/__init__.py: -------------------------------------------------------------------------------- 1 | # this sub package is borrowed and modified from dpgen project 2 | # https://github.com/deepmodeling/dpgen/tree/master/dpgen/dispatcher -------------------------------------------------------------------------------- /deepks/task/job/job_status.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | class JobStatus (Enum) : 4 | unsubmitted = 1 5 | waiting = 2 6 | running = 3 7 | terminated = 4 8 | finished = 5 9 | completing = 6 10 | unknown = 100 11 | 12 | -------------------------------------------------------------------------------- /deepks/task/job/lazy_local_context.py: -------------------------------------------------------------------------------- 1 | import os,shutil,uuid 2 | import subprocess as sp 3 | from glob import glob 4 | 5 | class SPRetObj(object) : 6 | def __init__ (self, 7 | ret) : 8 | self.data = ret 9 | 10 | def read(self) : 11 | return self.data 12 | 13 | def readlines(self) : 14 | lines = self.data.decode('utf-8').splitlines() 15 | ret = [] 16 | for aa in lines: 17 | ret.append(aa+'\n') 18 | return ret 19 | 20 | class LazyLocalContext(object) : 21 | def __init__ (self, 22 | local_root, 23 | work_profile = None, 24 | job_uuid = None) : 25 | """ 26 | work_profile: 27 | local_root: 28 | """ 29 | assert(type(local_root) == str) 30 | self.local_root = os.path.abspath(local_root) 31 | self.remote_root = self.local_root 32 | if job_uuid: 33 | self.job_uuid=job_uuid 34 | else: 35 | self.job_uuid = str(uuid.uuid4()) 36 | 37 | def get_job_root(self) : 38 | return self.local_root 39 | 40 | def upload(self, 41 | job_dirs, 42 | local_up_files, 43 | dereference = True) : 44 | pass 45 | 46 | def download(self, 47 | job_dirs, 48 | remote_down_files, 49 | check_exists = False, 50 | mark_failure = True, 51 | back_error=False) : 52 | for ii in job_dirs : 53 | for jj in remote_down_files : 54 | fname = os.path.join(self.local_root, ii, jj) 55 | exists = os.path.exists(fname) 56 | if not exists: 57 | if check_exists: 58 | if mark_failure: 59 | with open(os.path.join(self.local_root, ii, 'tag_failure_download_%s' % jj), 'w') as fp: pass 60 | else: 61 | pass 62 | else: 63 | raise RuntimeError('do not find download file ' + fname) 64 | 65 | def block_checkcall(self, 66 | cmd) : 67 | cwd = os.getcwd() 68 | os.chdir(self.local_root) 69 | proc = sp.Popen(cmd, shell=True, stdout = sp.PIPE, stderr = sp.PIPE) 70 | o, e = proc.communicate() 71 | stdout = SPRetObj(o) 72 | stderr = SPRetObj(e) 73 | code = proc.returncode 74 | if code != 0: 75 | os.chdir(cwd) 76 | raise RuntimeError("Get error code %d in locally calling %s with job: %s " % (code, cmd, self.job_uuid)) 77 | os.chdir(cwd) 78 | return None, stdout, stderr 79 | 80 | def block_call(self, cmd) : 81 | cwd = os.getcwd() 82 | os.chdir(self.local_root) 83 | proc = sp.Popen(cmd, shell=True, stdout = sp.PIPE, stderr = sp.PIPE) 84 | o, e = proc.communicate() 85 | stdout = SPRetObj(o) 86 | stderr = SPRetObj(e) 87 | code = proc.returncode 88 | os.chdir(cwd) 89 | return code, None, stdout, stderr 90 | 91 | def clean(self): 92 | tmp_files = [f'{self.job_uuid}.sub', 93 | f'{self.job_uuid}_job_id', 94 | f'{self.job_uuid}_tag_finished'] 95 | for fn in tmp_files: 96 | if self.check_file_exists(fn): 97 | os.remove(os.path.join(self.local_root, fn)) 98 | 99 | def write_file(self, fname, write_str): 100 | with open(os.path.join(self.local_root, fname), 'w') as fp : 101 | fp.write(write_str) 102 | 103 | def read_file(self, fname): 104 | with open(os.path.join(self.local_root, fname), 'r') as fp: 105 | ret = fp.read() 106 | return ret 107 | 108 | def check_file_exists(self, fname): 109 | return os.path.isfile(os.path.join(self.local_root, fname)) 110 | 111 | def call(self, cmd) : 112 | cwd = os.getcwd() 113 | os.chdir(self.local_root) 114 | proc = sp.Popen(cmd, shell=True, stdout = sp.PIPE, stderr = sp.PIPE) 115 | os.chdir(cwd) 116 | return proc 117 | 118 | def kill(self, proc): 119 | proc.kill() 120 | 121 | def check_finish(self, proc): 122 | return (proc.poll() != None) 123 | 124 | def get_return(self, proc): 125 | ret = proc.poll() 126 | if ret is None: 127 | return None, None, None 128 | else : 129 | try: 130 | o, e = proc.communicate() 131 | stdout = SPRetObj(o) 132 | stderr = SPRetObj(e) 133 | except: 134 | stdout = None 135 | stderr = None 136 | return ret, stdout, stderr 137 | 138 | 139 | -------------------------------------------------------------------------------- /deepks/task/job/local_context.py: -------------------------------------------------------------------------------- 1 | import os,shutil,uuid,hashlib 2 | import subprocess as sp 3 | from glob import glob 4 | 5 | class LocalSession (object) : 6 | def __init__ (self, jdata) : 7 | self.work_path = os.path.abspath(jdata['work_path']) 8 | os.makedirs(self.work_path, exist_ok=True) 9 | # assert(os.path.exists(self.work_path)) 10 | 11 | def get_work_root(self) : 12 | return self.work_path 13 | 14 | class SPRetObj(object) : 15 | def __init__ (self, 16 | ret) : 17 | self.data = ret 18 | 19 | def read(self) : 20 | return self.data 21 | 22 | def readlines(self) : 23 | lines = self.data.decode('utf-8').splitlines() 24 | ret = [] 25 | for aa in lines: 26 | ret.append(aa+'\n') 27 | return ret 28 | 29 | def _check_file_path(fname) : 30 | dirname = os.path.dirname(fname) 31 | if dirname != "": 32 | os.makedirs(dirname, exist_ok=True) 33 | 34 | def _identical_files(fname0, fname1) : 35 | with open(fname0) as fp: 36 | code0 = hashlib.sha1(fp.read().encode('utf-8')).hexdigest() 37 | with open(fname1) as fp: 38 | code1 = hashlib.sha1(fp.read().encode('utf-8')).hexdigest() 39 | return code0 == code1 40 | 41 | 42 | class LocalContext(object) : 43 | def __init__ (self, 44 | local_root, 45 | work_profile, 46 | job_uuid = None) : 47 | """ 48 | work_profile: 49 | local_root: 50 | """ 51 | assert(type(local_root) == str) 52 | self.local_root = os.path.abspath(local_root) 53 | if job_uuid: 54 | self.job_uuid=job_uuid 55 | else: 56 | self.job_uuid = str(uuid.uuid4()) 57 | 58 | self.remote_root = os.path.join(work_profile.get_work_root(), self.job_uuid) 59 | # dlog.debug("local_root is %s"% local_root) 60 | # dlog.debug("remote_root is %s"% self.remote_root) 61 | 62 | os.makedirs(self.remote_root, exist_ok = True) 63 | 64 | def get_job_root(self) : 65 | return self.remote_root 66 | 67 | def upload(self, 68 | job_dirs, 69 | local_up_files, 70 | dereference = True) : 71 | cwd = os.getcwd() 72 | for ii in job_dirs : 73 | local_job = os.path.join(self.local_root, ii) 74 | remote_job = os.path.join(self.remote_root, ii) 75 | os.makedirs(remote_job, exist_ok = True) 76 | os.chdir(remote_job) 77 | for jj in local_up_files : 78 | if not os.path.exists(os.path.join(local_job, jj)): 79 | os.chdir(cwd) 80 | raise RuntimeError('cannot find upload file ' + os.path.join(local_job, jj)) 81 | if os.path.exists(os.path.join(remote_job, jj)) : 82 | os.remove(os.path.join(remote_job, jj)) 83 | _check_file_path(jj) 84 | os.symlink(os.path.join(local_job, jj), 85 | os.path.join(remote_job, jj)) 86 | os.chdir(cwd) 87 | 88 | def download(self, 89 | job_dirs, 90 | remote_down_files, 91 | check_exists = False, 92 | mark_failure = True, 93 | back_error=False) : 94 | cwd = os.getcwd() 95 | for ii in job_dirs : 96 | local_job = os.path.join(self.local_root, ii) 97 | remote_job = os.path.join(self.remote_root, ii) 98 | flist = remote_down_files 99 | if back_error : 100 | os.chdir(remote_job) 101 | flist += glob('err*') 102 | os.chdir(cwd) 103 | for jj in flist : 104 | rfile = os.path.join(remote_job, jj) 105 | lfile = os.path.join(local_job, jj) 106 | if not os.path.realpath(rfile) == os.path.realpath(lfile) : 107 | if (not os.path.exists(rfile)) and (not os.path.exists(lfile)): 108 | if check_exists : 109 | if mark_failure: 110 | with open(os.path.join(self.local_root, ii, 'tag_failure_download_%s' % jj), 'w') as fp: pass 111 | else : 112 | pass 113 | else : 114 | raise RuntimeError('do not find download file ' + rfile) 115 | elif (not os.path.exists(rfile)) and (os.path.exists(lfile)) : 116 | # already downloaded 117 | pass 118 | elif (os.path.exists(rfile)) and (not os.path.exists(lfile)) : 119 | # trivial case, download happily 120 | os.makedirs(os.path.dirname(lfile), exist_ok=True) 121 | shutil.move(rfile, lfile) 122 | elif (os.path.exists(rfile)) and (os.path.exists(lfile)) : 123 | # both exists, replace! 124 | # dlog.info('find existing %s, replacing by %s' % (lfile, rfile)) 125 | if os.path.isdir(lfile): 126 | shutil.rmtree(lfile, ignore_errors=True) 127 | elif os.path.isfile(lfile) or os.path.islink(lfile): 128 | os.remove(lfile) 129 | os.makedirs(os.path.dirname(lfile), exist_ok=True) 130 | shutil.move(rfile, lfile) 131 | else : 132 | raise RuntimeError('should not reach here!') 133 | else : 134 | # no nothing in the case of linked files 135 | pass 136 | os.chdir(cwd) 137 | 138 | def block_checkcall(self, 139 | cmd) : 140 | cwd = os.getcwd() 141 | os.chdir(self.remote_root) 142 | proc = sp.Popen(cmd, shell=True, stdout = sp.PIPE, stderr = sp.PIPE) 143 | o, e = proc.communicate() 144 | stdout = SPRetObj(o) 145 | stderr = SPRetObj(e) 146 | code = proc.returncode 147 | if code != 0: 148 | os.chdir(cwd) 149 | raise RuntimeError("Get error code %d in locally calling %s with job: %s " % (code, cmd, self.job_uuid)) 150 | os.chdir(cwd) 151 | return None, stdout, stderr 152 | 153 | def block_call(self, cmd) : 154 | cwd = os.getcwd() 155 | os.chdir(self.remote_root) 156 | proc = sp.Popen(cmd, shell=True, stdout = sp.PIPE, stderr = sp.PIPE) 157 | o, e = proc.communicate() 158 | stdout = SPRetObj(o) 159 | stderr = SPRetObj(e) 160 | code = proc.returncode 161 | os.chdir(cwd) 162 | return code, None, stdout, stderr 163 | 164 | def clean(self) : 165 | shutil.rmtree(self.remote_root, ignore_errors=True) 166 | 167 | def write_file(self, fname, write_str): 168 | with open(os.path.join(self.remote_root, fname), 'w') as fp : 169 | fp.write(write_str) 170 | 171 | def read_file(self, fname): 172 | with open(os.path.join(self.remote_root, fname), 'r') as fp: 173 | ret = fp.read() 174 | return ret 175 | 176 | def check_file_exists(self, fname): 177 | return os.path.isfile(os.path.join(self.remote_root, fname)) 178 | 179 | def call(self, cmd) : 180 | cwd = os.getcwd() 181 | os.chdir(self.remote_root) 182 | proc = sp.Popen(cmd, shell=True, stdout = sp.PIPE, stderr = sp.PIPE) 183 | os.chdir(cwd) 184 | return proc 185 | 186 | def kill(self, proc): 187 | proc.kill() 188 | 189 | def check_finish(self, proc): 190 | return (proc.poll() != None) 191 | 192 | def get_return(self, proc): 193 | ret = proc.poll() 194 | if ret is None: 195 | return None, None, None 196 | else : 197 | try: 198 | o, e = proc.communicate() 199 | stdout = SPRetObj(o) 200 | stderr = SPRetObj(e) 201 | except: 202 | stdout = None 203 | stderr = None 204 | return ret, stdout, stderr 205 | 206 | 207 | -------------------------------------------------------------------------------- /deepks/task/job/shell.py: -------------------------------------------------------------------------------- 1 | import os,getpass,time 2 | from .batch import Batch 3 | from .job_status import JobStatus 4 | 5 | def _default_item(resources, key, value) : 6 | if key not in resources : 7 | resources[key] = value 8 | 9 | 10 | class Shell(Batch) : 11 | 12 | def check_status(self) : 13 | if self.check_finish_tag(): 14 | return JobStatus.finished 15 | elif self.check_running(): 16 | return JobStatus.running 17 | else: 18 | return JobStatus.terminated 19 | ## warn: cannont distinguish terminated from unsubmitted. 20 | 21 | def check_running(self): 22 | uuid_names = self.context.job_uuid 23 | ## Check if the uuid.sub is running on remote machine 24 | cnt = 0 25 | ret, stdin, stdout, stderr = self.context.block_call("ps aux | grep %s"%uuid_names) 26 | response_list = stdout.read().decode('utf-8').split("\n") 27 | for response in response_list: 28 | if uuid_names + ".sub" in response: 29 | return True 30 | return False 31 | 32 | def exec_sub_script(self, script_str): 33 | self.context.write_file(self.sub_script_name, script_str) 34 | self.proc = self.context.call('cd %s && exec bash %s' % (self.context.remote_root, self.sub_script_name)) 35 | 36 | def default_resources(self, res_) : 37 | if res_ is None : 38 | res = {} 39 | else: 40 | res = res_ 41 | _default_item(res, 'task_per_node', 1) 42 | _default_item(res, 'module_list', []) 43 | _default_item(res, 'module_unload_list', []) 44 | _default_item(res, 'source_list', []) 45 | _default_item(res, 'envs', {}) 46 | _default_item(res, 'with_mpi', False) 47 | _default_item(res, 'cuda_multi_tasks', False) 48 | _default_item(res, 'allow_failure', False) 49 | return res 50 | 51 | def sub_script_head(self, resources) : 52 | envs = resources['envs'] 53 | module_list = resources['module_list'] 54 | module_unload_list = resources['module_unload_list'] 55 | task_per_node = resources['task_per_node'] 56 | source_list = resources['source_list'] 57 | 58 | ret = '' 59 | ret += ('#!/bin/bash\n\n') 60 | # fp.write('set -euo pipefail\n') 61 | for key in envs.keys() : 62 | ret += ('export %s=%s\n' % (key, envs[key])) 63 | ret += ('\n') 64 | for ii in module_unload_list : 65 | ret += ('module unload %s\n' % ii) 66 | ret += ('\n') 67 | for ii in module_list : 68 | ret += ('module load %s\n' % ii) 69 | ret += ('\n') 70 | for ii in source_list : 71 | ret += ('source %s\n' % ii) 72 | ret += ('\n') 73 | return ret 74 | 75 | def sub_script_cmd(self, 76 | cmd, 77 | arg, 78 | res) : 79 | _cmd = cmd.split('1>')[0].strip() 80 | if res['with_mpi']: 81 | _cmd = 'mpirun -n %d %s %s' % (res['task_per_node'], _cmd, arg) 82 | else : 83 | _cmd = '%s %s' % (_cmd, arg) 84 | return _cmd 85 | 86 | def make_non_blocking(self, inner_script, step_res=None): 87 | return f"({inner_script})&\n" -------------------------------------------------------------------------------- /deepks/task/job/slurm.py: -------------------------------------------------------------------------------- 1 | import os,getpass,time 2 | from .batch import Batch 3 | from .job_status import JobStatus 4 | 5 | 6 | def _default_item(resources, key, value) : 7 | if key not in resources : 8 | resources[key] = value 9 | 10 | class Slurm(Batch) : 11 | 12 | def check_status(self): 13 | """ 14 | check the status of a job 15 | """ 16 | job_id = self._get_job_id() 17 | if job_id == '' : 18 | return JobStatus.unsubmitted 19 | while True: 20 | stat = self._check_status_inner(job_id) 21 | if stat != JobStatus.completing: 22 | return stat 23 | else: 24 | time.sleep(5) 25 | 26 | def check_before_sub(self, res): 27 | if 'task_max' in res and res['task_max'] > 0: 28 | while self._check_sub_limit(task_max=res['task_max']): 29 | time.sleep(60) 30 | 31 | def exec_sub_script(self, script_str): 32 | self.context.write_file(self.sub_script_name, script_str) 33 | stdin, stdout, stderr = self.context.block_checkcall('cd %s && %s %s' % (self.context.remote_root, 'sbatch', self.sub_script_name)) 34 | subret = (stdout.readlines()) 35 | job_id = subret[0].split()[-1] 36 | self.context.write_file(self.job_id_name, job_id) 37 | 38 | def default_resources(self, res_) : 39 | """ 40 | set default value if a key in res_ is not fhound 41 | """ 42 | if res_ == None : 43 | res = {} 44 | else: 45 | res = res_ 46 | _default_item(res, 'numb_node', 1) 47 | _default_item(res, 'task_per_node', 1) 48 | _default_item(res, 'cpus_per_task', 1) 49 | _default_item(res, 'numb_gpu', 0) 50 | _default_item(res, 'time_limit', '1:0:0') 51 | _default_item(res, 'mem_limit', -1) 52 | _default_item(res, 'partition', '') 53 | _default_item(res, 'account', '') 54 | _default_item(res, 'qos', '') 55 | _default_item(res, 'constraint_list', []) 56 | _default_item(res, 'license_list', []) 57 | _default_item(res, 'exclude_list', []) 58 | _default_item(res, 'module_unload_list', []) 59 | _default_item(res, 'module_list', []) 60 | _default_item(res, 'source_list', []) 61 | _default_item(res, 'envs', None) 62 | _default_item(res, 'with_mpi', False) 63 | _default_item(res, 'cuda_multi_tasks', False) 64 | _default_item(res, 'allow_failure', False) 65 | return res 66 | 67 | def sub_script_head(self, res): 68 | ret = '' 69 | ret += "#!/bin/bash -l\n" 70 | ret += "#SBATCH -N %d\n" % res['numb_node'] 71 | ret += "#SBATCH --ntasks-per-node=%d\n" % res['task_per_node'] 72 | if res['cpus_per_task'] > 0 : 73 | ret += "#SBATCH --cpus-per-task=%d\n" % res['cpus_per_task'] 74 | ret += "#SBATCH -t %s\n" % res['time_limit'] 75 | if res['mem_limit'] > 0 : 76 | ret += "#SBATCH --mem=%dG \n" % res['mem_limit'] 77 | if len(res['account']) > 0 : 78 | ret += "#SBATCH --account=%s \n" % res['account'] 79 | if len(res['partition']) > 0 : 80 | ret += "#SBATCH --partition=%s \n" % res['partition'] 81 | if len(res['qos']) > 0 : 82 | ret += "#SBATCH --qos=%s \n" % res['qos'] 83 | if res['numb_gpu'] > 0 : 84 | ret += "#SBATCH --gres=gpu:%d\n" % res['numb_gpu'] 85 | for ii in res['constraint_list'] : 86 | ret += '#SBATCH -C %s \n' % ii 87 | for ii in res['license_list'] : 88 | ret += '#SBATCH -L %s \n' % ii 89 | if len(res['exclude_list']) >0: 90 | temp_exclude = "" 91 | for ii in res['exclude_list'] : 92 | temp_exclude += ii 93 | temp_exclude += "," 94 | temp_exclude = temp_exclude[:-1] 95 | ret += '#SBATCH --exclude=%s \n' % temp_exclude 96 | ret += "\n" 97 | for ii in res['module_unload_list'] : 98 | ret += "module unload %s\n" % ii 99 | for ii in res['module_list'] : 100 | ret += "module load %s\n" % ii 101 | ret += "\n" 102 | for ii in res['source_list'] : 103 | ret += "source %s\n" %ii 104 | ret += "\n" 105 | envs = res['envs'] 106 | if envs != None : 107 | for key in envs.keys() : 108 | ret += 'export %s=%s\n' % (key, envs[key]) 109 | ret += '\n' 110 | return ret 111 | 112 | def sub_step_head(self, step_res=None, **kwargs): 113 | if step_res is None: 114 | return "" 115 | # exclusive = step_res.get("exclusive", False) 116 | # numb_node = step_res.get("numb_node", 1) 117 | # task_per_node = step_res.get("task_per_node", 1) 118 | # cpus_per_task = step_res.get("cpus_per_task", 1) 119 | # numb_gpu = step_res.get('numb_gpu', 0) 120 | params = "" 121 | if "numb_node" in step_res: 122 | params += f" -N {step_res['numb_node']} " 123 | if "task_per_node" in step_res: 124 | params += f" -n {step_res['task_per_node'] * step_res.get('numb_node', 1)} " 125 | if "cpus_per_task" in step_res: 126 | params += f" -c {step_res['cpus_per_task']} " 127 | if step_res.get("exclusive", False): 128 | params += " --exclusive " 129 | if step_res.get('numb_gpu', 0) > 0 : 130 | params += " --gres=gpu:%d\n " % step_res['numb_gpu'] 131 | return f"srun {params} " 132 | 133 | def sub_script_cmd(self, 134 | cmd, 135 | arg, 136 | res) : 137 | _cmd = cmd.split('1>')[0].strip() 138 | if res['with_mpi']: 139 | _cmd = 'srun %s %s' % (_cmd, arg) 140 | else : 141 | _cmd = '%s %s' % (_cmd, arg) 142 | return _cmd 143 | 144 | def _get_job_id(self) : 145 | if self.context.check_file_exists(self.job_id_name) : 146 | return self.context.read_file(self.job_id_name) 147 | else: 148 | return "" 149 | 150 | def _check_status_inner(self, job_id): 151 | ret, stdin, stdout, stderr\ 152 | = self.context.block_call ('squeue -o "%.18i %.2t" -j ' + job_id) 153 | if (ret != 0) : 154 | err_str = stderr.read().decode('utf-8') 155 | if str("Invalid job id specified") in err_str : 156 | if self.check_finish_tag() : 157 | return JobStatus.finished 158 | else : 159 | return JobStatus.terminated 160 | else : 161 | raise RuntimeError\ 162 | ("status command squeue fails to execute\nerror message:%s\nreturn code %d\n" % (err_str, ret)) 163 | status_line = stdout.read().decode('utf-8').split ('\n')[-2] 164 | status_word = status_line.split ()[-1] 165 | if not (len(status_line.split()) == 2 and status_word.isupper()): 166 | raise RuntimeError("Error in getting job status, " + 167 | f"status_line = {status_line}, " + 168 | f"parsed status_word = {status_word}") 169 | if status_word in ["PD","CF","S"] : 170 | return JobStatus.waiting 171 | elif status_word in ["R"] : 172 | return JobStatus.running 173 | elif status_word in ["CG"] : 174 | return JobStatus.completing 175 | elif status_word in ["C","E","K","BF","CA","CD","F","NF","PR","SE","ST","TO"] : 176 | if self.check_finish_tag() : 177 | return JobStatus.finished 178 | else : 179 | return JobStatus.terminated 180 | else : 181 | return JobStatus.unknown 182 | 183 | def _check_sub_limit(self, task_max, **kwarg) : 184 | if task_max <= 0: 185 | return True 186 | username = getpass.getuser() 187 | stdin, stdout, stderr = self.context.block_checkcall('squeue -u %s -h' % username) 188 | nj = len(stdout.readlines()) 189 | return nj >= task_max 190 | 191 | def _make_squeue(self,mdata1, res): 192 | ret = '' 193 | ret += 'squeue -u %s ' % mdata1['username'] 194 | ret += '-p %s ' % res['partition'] 195 | ret += '| grep PD' 196 | return ret 197 | -------------------------------------------------------------------------------- /deepks/task/workflow.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from deepks.utils import check_list 3 | from deepks.utils import get_abs_path 4 | from deepks.task.task import AbstructStep 5 | 6 | 7 | __all__ = ["Workflow", "Sequence", "Iteration"] 8 | 9 | 10 | class Workflow(AbstructStep): 11 | def __init__(self, child_tasks, workdir='.', record_file=None): 12 | super().__init__(workdir) 13 | self.record_file = get_abs_path(record_file) 14 | self.child_tasks = [self.make_child(task) for task in child_tasks] 15 | self.postmod_hook() 16 | # self.set_record_file(record_file) 17 | 18 | def make_child(self, task): 19 | if not isinstance(task, AbstructStep): 20 | raise TypeError("Workflow only accept tasks and other task as childs, " 21 | "but got " + type(task).__name__) 22 | assert not task.workdir.is_absolute() 23 | copied = deepcopy(task) 24 | copied.prepend_workdir(self.workdir) 25 | if isinstance(task, Workflow): 26 | copied.set_record_file(self.record_file) 27 | return copied 28 | 29 | def postmod_hook(self): 30 | pass 31 | 32 | def run(self, parent_tag=(), restart_tag=None): 33 | start_idx = 0 34 | if restart_tag is not None: 35 | last_idx = restart_tag[0] 36 | rest_tag = restart_tag[1:] 37 | if last_idx >= len(self.child_tasks): 38 | print(f'# restart tag {last_idx} out of range, stop now') 39 | return 40 | if rest_tag: 41 | last_tag = parent_tag+(last_idx,) 42 | self.child_tasks[last_idx].run(last_tag, restart_tag=rest_tag) 43 | self.write_record(last_tag) 44 | start_idx = last_idx + 1 45 | for i in range(start_idx, len(self.child_tasks)): 46 | curr_tag = parent_tag + (i,) 47 | print('# starting step:', curr_tag) 48 | task = self.child_tasks[i] 49 | task.run(curr_tag) 50 | self.write_record(curr_tag) 51 | 52 | def prepend_workdir(self, path): 53 | super().prepend_workdir(path) 54 | for task in self.child_tasks: 55 | task.prepend_workdir(path) 56 | 57 | def set_record_file(self, record_file): 58 | self.record_file = get_abs_path(record_file) 59 | for task in self.child_tasks: 60 | if isinstance(task, Workflow): 61 | task.set_record_file(record_file) 62 | 63 | def write_record(self, tag): 64 | if self.record_file is None: 65 | return 66 | if isinstance(tag, (list, tuple)): 67 | tag = ' '.join(map(str,tag)) 68 | with self.record_file.open('a') as lf: 69 | lf.write(tag + '\n') 70 | 71 | def max_depth(self): 72 | if not any(isinstance(task, Workflow) for task in self.child_tasks): 73 | return 1 74 | else: 75 | return 1 + max(task.max_depth() for task in self.child_tasks if isinstance(task, Workflow)) 76 | 77 | def restart(self): 78 | if not self.record_file.exists(): 79 | print('# no record file, starting from scratch') 80 | self.run(()) 81 | return 82 | with self.record_file.open() as lf: 83 | all_tags = [tuple(map(int, l.split())) for l in lf.readlines()] 84 | # assert max(map(len, all_tags)) == self.max_depth() 85 | restart_tag = all_tags[-1] 86 | print('# restarting after step', restart_tag) 87 | self.run((), restart_tag=restart_tag) 88 | 89 | def __getitem__(self, idx): 90 | return self.child_tasks[idx] 91 | 92 | def __setitem__(self, idx, task): 93 | self.child_tasks[idx] = self.make_child(task) 94 | self.postmod_hook() 95 | 96 | def __delitem__(self, idx): 97 | self.child_tasks.__delitem__(idx) 98 | self.postmod_hook() 99 | 100 | def __len__(self): 101 | return len(self.child_tasks) 102 | 103 | def __iter__(self): 104 | return self.child_tasks.__iter__() 105 | 106 | def insert(self, index, task): 107 | self.child_tasks.insert(index, self.make_child(task)) 108 | self.postmod_hook() 109 | 110 | def append(self, task): 111 | self.child_tasks.append(self.make_child(task)) 112 | self.postmod_hook() 113 | 114 | def prepend(self, task): 115 | self.child_tasks.insert(0, self.make_child(task)) 116 | self.postmod_hook() 117 | 118 | 119 | class Sequence(Workflow): 120 | def __init__(self, child_tasks, workdir='.', record_file=None, init_folder=None): 121 | # would reset all tasks' prev folder into their prev task, except for the first one 122 | super().__init__(child_tasks, workdir, record_file) 123 | if init_folder is not None: 124 | self.set_init_folder(init_folder) 125 | 126 | def chain_tasks(self): 127 | for prev, curr in zip(self.child_tasks[:-1], self.child_tasks[1:]): 128 | while isinstance(prev, Workflow): 129 | prev = prev.child_tasks[-1] 130 | while isinstance(curr, Workflow): 131 | curr = curr.child_tasks[0] 132 | curr.set_prev_task(prev) 133 | 134 | def set_init_folder(self, init_folder): 135 | start = self.child_tasks[0] 136 | while isinstance(start, Workflow): 137 | start = start.child_tasks[0] 138 | start.set_prev_folder(get_abs_path(init_folder)) 139 | 140 | def postmod_hook(self): 141 | self.chain_tasks() 142 | 143 | 144 | class Iteration(Sequence): 145 | def __init__(self, task, iternum, workdir='.', record_file=None, init_folder=None): 146 | # iterated task should have workdir='.' to avoid redundant folders 147 | # handle multple tasks by first make a sequence 148 | if not isinstance(task, AbstructStep): 149 | task = Sequence(task) 150 | iter_tasks = [deepcopy(task) for i in range(iternum)] 151 | nd = max(len(str(iternum)), 2) 152 | for ii, itask in enumerate(iter_tasks): 153 | itask.prepend_workdir(f'iter.{ii:0>{nd}d}') 154 | super().__init__(iter_tasks, workdir, record_file, init_folder) 155 | 156 | -------------------------------------------------------------------------------- /deepks/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/deepks/tools/__init__.py -------------------------------------------------------------------------------- /deepks/tools/geom_optim.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #SBATCH -N 1 3 | #SBATCH -c 20 4 | #SBATCH -t 24:00:00 5 | #SBATCH --mem=8G 6 | 7 | import time 8 | import numpy as np 9 | from deepks.utils import load_yaml 10 | from deepks.scf.scf import DSCF 11 | from pyscf import gto, lib 12 | try: 13 | from pyscf.geomopt.berny_solver import optimize 14 | except ImportError: 15 | from pyscf.geomopt.geometric_solver import optimize 16 | 17 | 18 | def run_optim(mol, model=None, proj_basis=None, scf_args={}, conv_args={}): 19 | cf = DSCF(mol, model, proj_basis=proj_basis).set(**scf_args) 20 | mol_eq = optimize(cf, **conv_args) 21 | return mol_eq 22 | 23 | def dump_xyz(filename, mol): 24 | coords = mol.atom_coords(unit="Angstrom").reshape(-1,3) 25 | elems = mol.elements 26 | with open(filename, 'w') as fp: 27 | fp.write(f"{mol.natm}\n\n") 28 | for x, e in zip(coords, elems): 29 | fp.write("%s %.18g %.18g %.18g\n" % (e, x[0], x[1], x[2])) 30 | 31 | 32 | if __name__ == "__main__": 33 | import argparse 34 | import os 35 | parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.") 36 | parser.add_argument("files", nargs="+", help="input xyz files") 37 | parser.add_argument("-m", "--model-file", help="file of the trained model") 38 | parser.add_argument("-d", "--dump-dir", help="dir of dumped files, default is same dir as xyz file") 39 | parser.add_argument("-B", "--basis", default="ccpvdz", type=str, help="basis used to do the calculation") 40 | parser.add_argument("-P", "--proj_basis", help="basis set used to project dm, must match with model") 41 | parser.add_argument("-C", "--charge", default=0, type=int, help="net charge of the molecule") 42 | parser.add_argument("-v", "--verbose", default=1, type=int, help="output calculation information") 43 | parser.add_argument("-S", "--suffix", help="suffix added to the saved xyz") 44 | parser.add_argument("--scf-input", help="yaml file to specify scf arguments") 45 | parser.add_argument("--conv-input", help="yaml file to specify convergence arguments") 46 | args = parser.parse_args() 47 | 48 | if args.verbose: 49 | print(f"starting calculation with OMP threads: {lib.num_threads()}", 50 | f"and max memory: {lib.param.MAX_MEMORY}") 51 | 52 | if args.dump_dir is not None: 53 | os.makedirs(args.dump_dir, exist_ok = True) 54 | for fn in args.files: 55 | tic = time.time() 56 | mol = gto.M(atom=fn, basis=args.basis, verbose=args.verbose, charge=args.charge, parse_arg=False) 57 | model = args.model_file 58 | scf_args = {} 59 | if args.scf_input is not None: 60 | argdict = load_yaml(args.scf_input) 61 | if "scf_args" in argdict: 62 | scf_args = argdict["scf_args"] 63 | if model is None and "model" in argdict: 64 | model = argdict["model"] 65 | else: 66 | scf_args = argdict 67 | conv_args = load_yaml(args.conv_input) if args.conv_input is not None else {} 68 | mol_eq = run_optim(mol, model, args.proj_basis, scf_args, conv_args) 69 | suffix = args.suffix 70 | if args.dump_dir is None: 71 | dump_dir = os.path.dirname(fn) 72 | if not suffix: 73 | suffix = "eq" 74 | else: 75 | dump_dir = args.dump_dir 76 | dump = os.path.join(dump_dir, os.path.splitext(os.path.basename(fn))[0]) 77 | if suffix: 78 | dump += f".{suffix}" 79 | dump_xyz(dump+".xyz", mol_eq) 80 | if args.verbose: 81 | print(fn, f"done, time = {time.time()-tic}") -------------------------------------------------------------------------------- /deepks/tools/num_hessian.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #SBATCH -N 1 3 | #SBATCH -c 20 4 | #SBATCH -t 24:00:00 5 | #SBATCH --mem=8G 6 | 7 | import time 8 | import numpy as np 9 | from deepks.utils import load_yaml 10 | from deepks.scf.scf import DSCF 11 | from pyscf import gto, lib 12 | 13 | BOHR = 0.52917721092 14 | 15 | def finite_difference(f, x, delta=1e-6): 16 | in_shape = x.shape 17 | y0 = f(x) 18 | out_shape = y0.shape 19 | res = np.empty(in_shape + out_shape) 20 | for idx in np.ndindex(*in_shape): 21 | diff = np.zeros(in_shape) 22 | diff[idx] += delta 23 | y1 = f(x+diff) 24 | res[idx] = (y1-y0) / delta 25 | return res 26 | 27 | def calc_deriv(mol, model=None, proj_basis=None, **scfargs): 28 | cf = DSCF(mol, model, proj_basis=proj_basis).run(**scfargs) 29 | if not cf.converged: 30 | raise RuntimeError("SCF not converged!") 31 | ff = cf.nuc_grad_method().run() 32 | return ff.de 33 | 34 | def make_closure(mol, model=None, proj_basis=None, **scfargs): 35 | refmol = mol 36 | def cc2de(coords): 37 | tic = time.time() 38 | mol = refmol.set_geom_(coords, inplace=False, unit="Bohr") 39 | de = calc_deriv(mol, model, proj_basis, **scfargs) 40 | if mol.verbose > 1: 41 | print(f"step time = {time.time()-tic}") 42 | return de 43 | return cc2de 44 | # scanner is not very stable. We construct new scf objects every time 45 | # scanner = DSCF(mol.set(unit="Bohr"), model).set(**scfargs).nuc_grad_method().as_scanner() 46 | # return lambda m: scanner(m)[-1] 47 | 48 | def calc_hessian(mol, model=None, delta=1e-6, proj_basis=None, **scfargs): 49 | cc2de = make_closure(mol, model, proj_basis, **scfargs) 50 | cc0 = mol.atom_coords(unit="Bohr") 51 | hess = finite_difference(cc2de, cc0, delta).transpose((0,2,1,3)) 52 | return hess 53 | 54 | 55 | if __name__ == "__main__": 56 | import argparse 57 | import os 58 | parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.") 59 | parser.add_argument("files", nargs="+", help="input xyz files") 60 | parser.add_argument("-m", "--model-file", help="file of the trained model") 61 | parser.add_argument("-d", "--dump-dir", help="dir of dumped files, default is same dir as xyz file") 62 | parser.add_argument("-D", "--delta", default=1e-6, type=float, help="numerical difference step size") 63 | parser.add_argument("-B", "--basis", default="ccpvdz", type=str, help="basis used to do the calculation") 64 | parser.add_argument("-P", "--proj_basis", help="basis set used to project dm, must match with model") 65 | parser.add_argument("-C", "--charge", default=0, type=int, help="net charge of the molecule") 66 | parser.add_argument("-U", "--unit", default="Angstrom", help="choose length unit (Bohr or Angstrom)") 67 | parser.add_argument("-v", "--verbose", default=1, type=int, help="output calculation information") 68 | parser.add_argument("--scf-input", help="yaml file to specify scf arguments") 69 | args = parser.parse_args() 70 | 71 | if args.verbose: 72 | print(f"starting calculation with OMP threads: {lib.num_threads()}", 73 | f"and max memory: {lib.param.MAX_MEMORY}") 74 | 75 | if args.dump_dir is not None: 76 | os.makedirs(args.dump_dir, exist_ok = True) 77 | for fn in args.files: 78 | tic = time.time() 79 | mol = gto.M(atom=fn, basis=args.basis, verbose=args.verbose, charge=args.charge, parse_arg=False) 80 | model = args.model_file 81 | scfargs = {} 82 | if args.scf_input is not None: 83 | argdict = load_yaml(args.scf_input) 84 | if "scf_args" in argdict: 85 | scfargs = argdict["scf_args"] 86 | if model is None and "model" in argdict: 87 | model = argdict["model"] 88 | else: 89 | scfargs = argdict 90 | hess = calc_hessian(mol, model, args.delta, args.proj_basis, **scfargs) 91 | if not args.unit.upper().startswith(("B", "AU")): 92 | hess /= BOHR**2 93 | if args.dump_dir is None: 94 | dump_dir = os.path.dirname(fn) 95 | else: 96 | dump_dir = args.dump_dir 97 | dump = os.path.join(dump_dir, os.path.splitext(os.path.basename(fn))[0]) 98 | np.save(dump+".hessian.npy", hess) 99 | if args.verbose: 100 | print(fn, f"done, time = {time.time()-tic}") -------------------------------------------------------------------------------- /examples/iterate/combined.yaml: -------------------------------------------------------------------------------- 1 | # all arguments are flatten into this file 2 | # they can also be splitted into separate files and referenced here 3 | 4 | # number of iterations to do, can be set to zero for deephf training 5 | n_iter: 5 6 | 7 | # training and testing systems 8 | systems_train: # can also be files that containing system paths 9 | - ../system/batch/set.0[0-5]* # support glob 10 | - ../system/batch/set.060 11 | - ../system/batch/set.061 12 | - ../system/batch/set.062 13 | 14 | systems_test: # if empty, use the last system of training set 15 | - ../system/batch/set.063 16 | 17 | # directory setting 18 | workdir: "." 19 | share_folder: "share" # folder that stores all other settings 20 | 21 | # scf settings 22 | scf_input: # can also be specified by a separete file 23 | basis: ccpvdz 24 | # this is for force training 25 | dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta] 26 | verbose: 1 27 | mol_args: 28 | incore_anyway: True 29 | scf_args: 30 | conv_tol: 1e-6 31 | conv_tol_grad: 1e-2 32 | level_shift: 0.1 33 | diis_space: 20 34 | conv_check: false # pyscf conv_check has bug 35 | 36 | scf_machine: 37 | sub_size: 5 # 5 systems will be in one task, default is 1 38 | group_size: 2 # 2 tasks will be gathered into one group and submitted together 39 | ingroup_parallel: 2 # this will set numb_node to 2 in resources 40 | dispatcher: 41 | context: local 42 | batch: slurm 43 | remote_profile: null # use lazy local 44 | resources: 45 | numb_node: 2 # parallel in two nodes 46 | time_limit: '24:00:00' 47 | cpus_per_task: 8 48 | mem_limit: 8 49 | envs: 50 | PYSCF_MAX_MEMORY: 8000 # increase from 4G to 8G 51 | sub_res: # resources for each sub task 52 | cpus_per_task: 8 53 | python: "python" # use python in path 54 | 55 | # train settings 56 | train_input: 57 | # model_args is ignored, since this is used as restart 58 | data_args: 59 | batch_size: 16 60 | group_batch: 1 61 | extra_label: true 62 | conv_filter: true 63 | conv_name: conv 64 | preprocess_args: 65 | preshift: false # restarting model already shifted. Will not recompute shift value 66 | prescale: false # same as above 67 | prefit_ridge: 1e1 68 | prefit_trainable: false 69 | train_args: 70 | decay_rate: 0.5 71 | decay_steps: 1000 72 | display_epoch: 100 73 | force_factor: 0.1 74 | n_epoch: 5000 75 | start_lr: 0.0001 76 | 77 | train_machine: 78 | dispatcher: 79 | context: local 80 | batch: slurm 81 | remote_profile: null # use lazy local 82 | resources: 83 | time_limit: '24:00:00' 84 | cpus_per_task: 4 85 | numb_gpu: 1 86 | mem_limit: 8 87 | python: "python" # use python in path 88 | 89 | # init settings 90 | init_model: false # do not use existing model in share_folder/init/model.pth 91 | 92 | init_scf: 93 | basis: ccpvdz 94 | # this is for pure energy training 95 | dump_fields: [e_base, e_tot, dm_eig, conv, l_e_delta] 96 | verbose: 1 97 | mol_args: 98 | incore_anyway: True 99 | scf_args: 100 | conv_tol: 1e-8 101 | conv_check: false # pyscf conv_check has bug 102 | 103 | init_train: 104 | model_args: # necessary as this is init training 105 | hidden_sizes: [200, 200, 200] 106 | output_scale: 100 107 | use_resnet: true 108 | actv_fn: mygelu 109 | data_args: 110 | batch_size: 16 111 | group_batch: 1 112 | preprocess_args: 113 | preshift: true 114 | prescale: false 115 | prefit_ridge: 1e1 116 | prefit_trainable: false 117 | train_args: 118 | decay_rate: 0.96 119 | decay_steps: 500 120 | display_epoch: 100 121 | n_epoch: 50000 122 | start_lr: 0.0003 123 | 124 | # other settings 125 | cleanup: false 126 | strict: true 127 | -------------------------------------------------------------------------------- /examples/iterate/splitted/args.yaml: -------------------------------------------------------------------------------- 1 | # all value here are defaults parameters 2 | # except for `scf_machine` which shows grouping and ingroup parallelization 3 | n_iter: 5 4 | systems_train: null # use share_folder/systems_train.raw (check exist) 5 | systems_test: null # use share_folder/systems_test.raw 6 | workdir: "." 7 | share_folder: "share" # folder that contains all other settings 8 | # scf parameters 9 | scf_input: true # use share_folder/scf_input.yaml (check exist) 10 | scf_machine: 11 | sub_size: 5 # 5 systems will be in one task, default is 1 12 | group_size: 2 # 2 tasks will be gathered into one group and submitted together 13 | ingroup_parallel: 2 # 2 tasks in one group submission can run toghther 14 | # train parameters 15 | train_input: true # use share_folder/train_input.yaml (check exist) 16 | train_machine: 17 | resources: # add 1 gpu 18 | numb_gpu: 1 19 | # init parameters 20 | init_model: false # do not use existing model in share_folder/init/model.pth 21 | init_scf: true # use share_folder/init_scf.yaml (check exist) 22 | init_train: true # use share_folder/init_train.yaml (check exist) 23 | # other settings 24 | cleanup: false 25 | strict: true 26 | -------------------------------------------------------------------------------- /examples/iterate/splitted/share/init_scf.yaml: -------------------------------------------------------------------------------- 1 | basis: ccpvdz 2 | model_file: null 3 | dump_fields: [e_base, e_tot, dm_eig, conv, l_e_delta] 4 | verbose: 1 5 | mol_args: 6 | incore_anyway: True 7 | scf_args: 8 | conv_tol: 1e-8 9 | conv_check: false 10 | #penalty_terms: 11 | # - type: coulomb 12 | # required_labels: dm 13 | # strength: 1 14 | # random: true 15 | -------------------------------------------------------------------------------- /examples/iterate/splitted/share/init_train.yaml: -------------------------------------------------------------------------------- 1 | model_args: 2 | hidden_sizes: [100, 100, 100] 3 | output_scale: 100 4 | use_resnet: true 5 | actv_fn: mygelu 6 | data_args: 7 | batch_size: 16 8 | group_batch: 1 9 | e_name: l_e_delta 10 | preprocess_args: 11 | preshift: true 12 | prescale: false 13 | prefit_ridge: 1e1 14 | prefit_trainable: false 15 | train_args: 16 | ckpt_file: model.pth 17 | decay_rate: 0.96 18 | decay_steps: 500 19 | display_epoch: 100 20 | n_epoch: 50000 21 | start_lr: 0.0001 22 | train_paths: 23 | - data_train/* 24 | test_paths: 25 | - data_test/* 26 | -------------------------------------------------------------------------------- /examples/iterate/splitted/share/scf_input.yaml: -------------------------------------------------------------------------------- 1 | basis: ccpvdz 2 | model_file: model.pth 3 | dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta] 4 | verbose: 1 5 | mol_args: 6 | incore_anyway: True 7 | scf_args: 8 | conv_tol: 1e-6 9 | conv_tol_grad: 1e-2 10 | level_shift: 0.1 11 | diis_space: 20 12 | conv_check: false 13 | #penalty_terms: 14 | # - type: coulomb 15 | # required_labels: dm 16 | # strength: 1 17 | # random: true 18 | -------------------------------------------------------------------------------- /examples/iterate/splitted/share/systems_test.raw: -------------------------------------------------------------------------------- 1 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.063 2 | -------------------------------------------------------------------------------- /examples/iterate/splitted/share/systems_train.raw: -------------------------------------------------------------------------------- 1 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.000 2 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.001 3 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.002 4 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.003 5 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.004 6 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.005 7 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.006 8 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.007 9 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.008 10 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.009 11 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.010 12 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.011 13 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.012 14 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.013 15 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.014 16 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.015 17 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.016 18 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.017 19 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.018 20 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.019 21 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.020 22 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.021 23 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.022 24 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.023 25 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.024 26 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.025 27 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.026 28 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.027 29 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.028 30 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.029 31 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.030 32 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.031 33 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.032 34 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.033 35 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.034 36 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.035 37 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.036 38 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.037 39 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.038 40 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.039 41 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.040 42 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.041 43 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.042 44 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.043 45 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.044 46 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.045 47 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.046 48 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.047 49 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.048 50 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.049 51 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.050 52 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.051 53 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.052 54 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.053 55 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.054 56 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.055 57 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.056 58 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.057 59 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.058 60 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.059 61 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.060 62 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.061 63 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/test/system/batch/set.062 64 | -------------------------------------------------------------------------------- /examples/iterate/splitted/share/train_input.yaml: -------------------------------------------------------------------------------- 1 | model_args: 2 | hidden_sizes: [100, 100, 100] 3 | output_scale: 100 4 | use_resnet: true 5 | actv_fn: mygelu 6 | data_args: 7 | batch_size: 16 8 | group_batch: 1 9 | e_name: l_e_delta 10 | f_name: l_f_delta 11 | extra_label: true 12 | conv_filter: true 13 | preprocess_args: 14 | preshift: false 15 | prescale: false 16 | prefit_ridge: 1e1 17 | prefit_trainable: false 18 | train_args: 19 | ckpt_file: model.pth 20 | decay_rate: 0.5 21 | decay_steps: 1000 22 | display_epoch: 100 23 | force_factor: 0.1 24 | n_epoch: 5000 25 | start_lr: 0.0001 26 | train_paths: 27 | - data_train/* 28 | test_paths: 29 | - data_test/* 30 | -------------------------------------------------------------------------------- /examples/legacy/iter_linear/run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import numpy as np 5 | import pyscf 6 | from pyscf import gto 7 | from sklearn import linear_model 8 | 9 | sys.path.append(os.path.dirname(os.path.realpath(__file__)) + '/../../') 10 | from deepks.scf.scf import DSCF 11 | from deepks.scf.run import build_mol, solve_mol 12 | 13 | def get_linear_model(weig, wec): 14 | # too_small = weig.reshape(-1,108).std(0) < 1e-3 15 | wreg = linear_model.Ridge(1e-7, tol=1e-9) 16 | wreg.fit(weig.sum(1)[:], wec[:]) 17 | linear = torch.nn.Linear(108,1).double() 18 | linear.weight.data[:] = torch.from_numpy(wreg.coef_) 19 | linear.bias.data[:] = torch.tensor(wreg.intercept_ / 3) 20 | model = lambda x: linear(x).sum(1) 21 | return model 22 | 23 | def get_linear_model_normed(weig, wec, stdmin=1e-3): 24 | # too_small = weig.reshape(-1,108).std(0) < 1e-3 25 | input_scale = weig.reshape(-1,108).std(0).clip(stdmin) 26 | t_input_scale = torch.from_numpy(input_scale) 27 | weig /= input_scale 28 | wreg = linear_model.Ridge(1e-7, tol=1e-9) 29 | wreg.fit(weig.sum(1)[:], wec[:]) 30 | linear = torch.nn.Linear(108,1).double() 31 | linear.weight.data[:] = torch.from_numpy(wreg.coef_) 32 | linear.bias.data[:] = torch.tensor(wreg.intercept_ / 3) 33 | model = lambda x: linear(x / t_input_scale).sum(1) 34 | return model 35 | 36 | nmol = 1000 37 | ntrain = 900 38 | niter = 10 39 | 40 | mol_list = [build_mol(f'../path/to/data/water/geometry/{i:0>5}.xyz') for i in range(nmol)] 41 | ehfs = np.load('../path/to/data/water/rproj_mb2/e_hf.npy').reshape(-1)[:nmol] 42 | wene = np.loadtxt('../path/to/data/water/energy.dat', usecols=(1,2,3,4))[:nmol] 43 | erefs = wene[:,3] 44 | ecfs = ehfs 45 | ecs = erefs - ehfs 46 | ceigs = np.load('../../../data/tom_miller/water/rproj_mb2/dm_eig.npy')[:nmol] 47 | model = get_linear_model(ceigs[:ntrain], ecs[:ntrain]) 48 | 49 | os.makedirs('dump', exist_ok=True) 50 | np.save('dump/000.ehfs.npy', ehfs) 51 | np.save('dump/000.ecfs.npy', ecfs) 52 | np.save('dump/000.ceigs.npy', ceigs) 53 | np.save('dump/000.ecs.npy', ecs) 54 | np.save('dump/000.convs.npy', np.ones(ehfs.shape, dtype=bool)) 55 | 56 | for i in range(1, niter+1): 57 | oldecfs, oldceigs, oldehfs = ecfs, ceigs, ehfs 58 | oldecs = ecs 59 | oldmodel = model 60 | 61 | results = [solve_mol(mol, model) for mol in mol_list] 62 | meta, ehfs, ecfs, cdms, ceigs, convs = map(np.array, zip(*results)) 63 | ecs = erefs - ehfs 64 | model = get_linear_model(ceigs[:ntrain], ecs[:ntrain]) 65 | 66 | print((ecfs - erefs).mean(), np.abs(ecfs - erefs).mean()) 67 | 68 | np.save(f'dump/{i:0>3}.ehfs.npy', ehfs) 69 | np.save(f'dump/{i:0>3}.ecfs.npy', ecfs) 70 | np.save(f'dump/{i:0>3}.ceigs.npy', ceigs) 71 | np.save(f'dump/{i:0>3}.ecs.npy', ecs) 72 | np.save(f'dump/{i:0>3}.convs.npy', convs) 73 | -------------------------------------------------------------------------------- /examples/legacy/iter_nn_local/run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import os 5 | import sys 6 | import numpy as np 7 | 8 | # sys.path.append('/path/to/source') 9 | import deepks 10 | from deepks.model.train import main as train_main 11 | from deepks.scf.run import main as scf_main 12 | from deepks.utils import load_yaml 13 | from deepks.task.task import PythonTask 14 | from deepks.task.workflow import Sequence, Iteration 15 | 16 | from pathlib import Path 17 | import shutil 18 | 19 | def collect_data(nmol, ntrain): 20 | ecf = np.load('results/e_tot.npy') 21 | assert ecf.size == nmol 22 | eref = np.load('e_ref.npy') 23 | 24 | err = eref.reshape(-1) - ecf.reshape(-1) 25 | convs = np.load("results/conv.npy").reshape(-1) 26 | print(f'converged calculation: {np.sum(convs)} / {nmol} = {np.sum(convs) / nmol:.3f}') 27 | print(f'mean error: {err.mean()}') 28 | print(f'mean absolute error: {np.abs(err).mean()}') 29 | print(f'mean absolute error after shift: {np.abs(err - err[:ntrain].mean()).mean()}') 30 | print(f' training: {np.abs(err[:ntrain] - err[:ntrain].mean()).mean()}') 31 | print(f' testing: {np.abs(err[ntrain:] - err[:ntrain].mean()).mean()}') 32 | 33 | ehf = np.load('results/e_base.npy') 34 | np.save('results/l_e_delta.npy', eref - ehf) 35 | 36 | dd = ['dm_eig.npy', 'l_e_delta.npy'] 37 | os.makedirs('train', exist_ok=True) 38 | os.makedirs('test', exist_ok=True) 39 | for d in dd: 40 | np.save(f"train/{d}", np.load(f'results/{d}')[:ntrain]) 41 | for d in dd: 42 | np.save(f"test/{d}", np.load(f'results/{d}')[ntrain:]) 43 | shutil.copy('results/system.raw', 'train') 44 | shutil.copy('results/system.raw', 'test') 45 | Path('train_paths.raw').write_text(str(Path('train').absolute())) 46 | Path('test_paths.raw').write_text(str(Path('test').absolute())) 47 | 48 | 49 | niter = 5 50 | nmol = 1500 51 | ntrain = 1000 52 | ntest = 500 53 | 54 | train_input = load_yaml('share/train_input.yaml') 55 | scf_input = load_yaml('share/scf_input.yaml') 56 | 57 | task_train = PythonTask(train_main, call_kwargs=train_input, 58 | outlog='log.train', 59 | workdir='00.train', 60 | link_prev_files=['train_paths.raw', 'test_paths.raw']) 61 | 62 | task_scf = PythonTask(scf_main, call_kwargs=scf_input, 63 | outlog='log.scf', 64 | workdir='01.scf', 65 | link_prev_files=['model.pth'], 66 | share_folder='share', link_share_files=['mol_files.raw']) 67 | 68 | task_data = PythonTask(collect_data, call_args=[nmol, ntrain], 69 | outlog='log.data', 70 | workdir='02.data', 71 | link_prev_files=['results'], 72 | share_folder='share', link_share_files=['e_ref.npy']) 73 | 74 | seq = Sequence([task_train, task_scf, task_data]) 75 | iterate = Iteration(seq, niter, init_folder='share/init', record_file='RECORD') 76 | 77 | if Path('RECORD').exists(): 78 | iterate.restart() 79 | else: 80 | iterate.run() 81 | -------------------------------------------------------------------------------- /examples/legacy/iter_nn_local/run_res.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import os 5 | import sys 6 | import numpy as np 7 | 8 | # sys.path.append('/path/to/source') 9 | import deepks 10 | from deepks.model.train import main as train_main 11 | from deepks.model.test import main as train_test 12 | from deepks.scf.run import main as scf_main 13 | from deepks.scf.stats import collect_data_grouped 14 | from deepks.utils import load_yaml 15 | from deepks.task.task import PythonTask 16 | from deepks.task.workflow import Sequence, Iteration 17 | 18 | 19 | niter = 5 20 | nmol = 1500 21 | ntrain = 1000 22 | ntest = 500 23 | 24 | train_input = load_yaml('share/train_input.yaml') 25 | scf_input = load_yaml('share/scf_input.yaml') 26 | train_idx = np.arange(ntrain) 27 | 28 | task_scf = PythonTask(scf_main, call_kwargs=scf_input, 29 | outlog='log.scf', 30 | workdir='00.scf', 31 | link_prev_files=['model.pth'], 32 | share_folder='share', link_share_files=['mol_files.raw']) 33 | 34 | task_data = PythonTask(collect_data_grouped, call_args=[train_idx], 35 | outlog='log.data', 36 | workdir='01.data', 37 | link_prev_files=['model.pth', "results"], 38 | share_folder='share', link_share_files=['e_ref.npy']) 39 | 40 | task_train = PythonTask(train_main, call_args=["old_model.pth"], call_kwargs=train_input, 41 | outlog='log.train', 42 | workdir='02.train', 43 | link_prev_files=[('model.pth', 'old_model.pth'), 44 | 'train_paths.raw', 'test_paths.raw']) 45 | 46 | seq = Sequence([task_scf, task_data, task_train]) 47 | iterate = Iteration(seq, niter, init_folder='share/init', record_file='RECORD') 48 | 49 | if os.path.exists('RECORD'): 50 | iterate.restart() 51 | else: 52 | iterate.run() 53 | -------------------------------------------------------------------------------- /examples/legacy/iter_nn_local/share/e_ref.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/legacy/iter_nn_local/share/e_ref.npy -------------------------------------------------------------------------------- /examples/legacy/iter_nn_local/share/init/model.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/legacy/iter_nn_local/share/init/model.pth -------------------------------------------------------------------------------- /examples/legacy/iter_nn_local/share/init/test_paths.raw: -------------------------------------------------------------------------------- 1 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/data/sGDML/malonaldehyde/proj_hf_dz/test 2 | -------------------------------------------------------------------------------- /examples/legacy/iter_nn_local/share/init/train_paths.raw: -------------------------------------------------------------------------------- 1 | /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/data/sGDML/malonaldehyde/proj_hf_dz/train 2 | -------------------------------------------------------------------------------- /examples/legacy/iter_nn_local/share/scf_input.yaml: -------------------------------------------------------------------------------- 1 | basis: ccpvdz 2 | conv_tol: 1e-7 3 | xyz_files: [mol_files.raw] 4 | model_file: model.pth 5 | dump_dir: results 6 | dump_fields: [e_base, e_tot, dm_eig, conv] 7 | group: true 8 | verbose: 1 9 | -------------------------------------------------------------------------------- /examples/legacy/iter_nn_local/share/train_input.yaml: -------------------------------------------------------------------------------- 1 | model_args: 2 | hidden_sizes: [100, 100, 100] 3 | output_scale: 100 4 | use_resnet: true 5 | actv_fn: gelu 6 | data_args: 7 | batch_size: 16 8 | group_batch: 1 9 | e_name: l_e_delta 10 | d_name: [dm_eig] 11 | preprocess_args: 12 | preshift: false 13 | prescale: true 14 | prescale_clip: 0.05 15 | prefit_ridge: 1e1 16 | prefit_trainable: false 17 | train_args: 18 | ckpt_file: model.pth 19 | decay_rate: 0.96 20 | decay_steps: 300 21 | display_epoch: 100 22 | n_epoch: 30000 23 | start_lr: 0.0001 24 | train_paths: 25 | - train_paths.raw 26 | test_paths: 27 | - test_paths.raw 28 | -------------------------------------------------------------------------------- /examples/legacy/iter_nn_new/extra.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import os 5 | import sys 6 | import numpy as np 7 | 8 | # sys.path.append('/path/to/source') 9 | import deepks 10 | from deepks.scf.stats import collect_data_grouped 11 | from deepks.task.task import PythonTask, BatchTask, GroupBatchTask 12 | from deepks.task.workflow import Sequence, Iteration 13 | 14 | nsys = 1 15 | niter = 25 16 | ntrain = 1000 17 | train_idx = np.arange(ntrain) 18 | 19 | # SCF 20 | 21 | scf_cmd_tmpl = " ".join([ 22 | "python -u /path/to/source/deepks/scf/main.py", 23 | "scf_input.yaml", 24 | "-m model.pth", 25 | "-s mol_files.raw", 26 | "-d results"]) 27 | 28 | envs = {"PYSCF_MAX_MEMORY": 16000} 29 | scf_res = {"cpus_per_task": 10, 30 | "time_limit": "6:00:00", 31 | "mem_limit": 16, 32 | "envs": envs} 33 | 34 | task_scf = GroupBatchTask( 35 | [BatchTask(scf_cmd_tmpl.format(i=i), 36 | workdir=".", #f'task.{i}', 37 | share_folder='share', 38 | link_share_files=['mol_files.raw', 39 | ('raw_scf_input.yaml', 'scf_input.yaml')]) 40 | for i in range(nsys)], 41 | workdir='00.scf', 42 | outlog='log.scf', 43 | resources=scf_res, 44 | link_prev_files=['model.pth']) 45 | 46 | # labeling 47 | 48 | task_data = PythonTask( 49 | lambda: [collect_data_grouped(train_idx=train_idx, 50 | append=True, 51 | ene_ref=f"e_ref.npy", 52 | force_ref=f"f_ref.npy", 53 | sys_dir=f"results") 54 | for i in range(nsys)], 55 | outlog='log.data', 56 | workdir='01.data', 57 | link_prev_files=['model.pth'] + [f"results" for i in range(nsys)], 58 | share_folder='share', 59 | link_share_files=[f'e_ref.npy' for i in range(nsys)] 60 | +[f'f_ref.npy' for i in range(nsys)]) 61 | 62 | # training 63 | 64 | train_cmd = " ".join([ 65 | "python -u /path/to/source/deepks/train/main.py", 66 | "train_input.yaml", 67 | "--restart old_model.pth"]) 68 | 69 | train_res = {"time_limit": "24:00:00", 70 | "mem_limit": 32, 71 | "numb_gpu": 1} 72 | 73 | task_train = BatchTask(cmds=train_cmd, 74 | outlog='log.train', 75 | workdir='02.train', 76 | resources=train_res, 77 | link_prev_files=[('model.pth', 'old_model.pth'), 78 | 'train_paths.raw', 'test_paths.raw'], 79 | share_folder = 'share', 80 | link_share_files=["train_input.yaml"]) 81 | 82 | # combine 83 | 84 | seq = Sequence([task_scf, task_data, task_train]) 85 | iterate = Iteration(seq, niter, init_folder='share/init', record_file='RECORD') 86 | 87 | if os.path.exists('RECORD'): 88 | iterate.restart() 89 | else: 90 | iterate.run() 91 | -------------------------------------------------------------------------------- /examples/legacy/iter_nn_new/init_train/input.yaml: -------------------------------------------------------------------------------- 1 | model_args: 2 | hidden_sizes: [100, 100, 100] 3 | output_scale: 100 4 | use_resnet: true 5 | actv_fn: mygelu 6 | data_args: 7 | batch_size: 16 8 | group_batch: 1 9 | e_name: e_cc 10 | d_name: [dm_eig] 11 | preprocess_args: 12 | preshift: true 13 | prescale: true 14 | prescale_clip: 0.05 15 | prefit_ridge: 1e1 16 | prefit_trainable: false 17 | train_args: 18 | ckpt_file: model.pth 19 | decay_rate: 0.96 20 | decay_steps: 300 21 | display_epoch: 100 22 | n_epoch: 30000 23 | start_lr: 0.0001 24 | train_paths: 25 | - /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/data/sGDML/malonaldehyde/proj_ccsd_dz/train.1000 26 | test_paths: 27 | - /scratch/gpfs/yixiaoc/yixiaoc/deep.qc/data/sGDML/malonaldehyde/proj_ccsd_dz/test.1000 28 | -------------------------------------------------------------------------------- /examples/legacy/iter_nn_new/init_train/model.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/legacy/iter_nn_new/init_train/model.pth -------------------------------------------------------------------------------- /examples/legacy/iter_nn_new/run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import os 5 | import sys 6 | import numpy as np 7 | 8 | # sys.path.append('/path/to/source') 9 | import deepks 10 | from deepks.scf.stats import collect_data_grouped 11 | from deepks.task.task import PythonTask, BatchTask, GroupBatchTask 12 | from deepks.task.workflow import Sequence, Iteration 13 | 14 | nsys = 1 15 | niter = 10 16 | ntrain = 1000 17 | train_idx = np.arange(ntrain) 18 | 19 | # SCF 20 | 21 | scf_cmd_tmpl = " ".join([ 22 | "python -u /path/to/source/deepks/scf/main.py", 23 | "scf_input.yaml", 24 | "-m model.pth", 25 | "-s mol_files.raw", 26 | "-d results"]) 27 | 28 | envs = {"PYSCF_MAX_MEMORY": 16000} 29 | scf_res = {"cpus_per_task": 10, 30 | "time_limit": "6:00:00", 31 | "mem_limit": 16, 32 | "envs": envs} 33 | 34 | task_scf = GroupBatchTask( 35 | [BatchTask(scf_cmd_tmpl.format(i=i), 36 | workdir=".", #f'task.{i}', 37 | share_folder='share', 38 | link_share_files=['mol_files.raw', 'scf_input.yaml']) 39 | for i in range(nsys)], 40 | workdir='00.scf', 41 | outlog='log.scf', 42 | resources=scf_res, 43 | link_prev_files=['model.pth']) 44 | 45 | # labeling 46 | 47 | task_data = PythonTask( 48 | lambda: [collect_data_grouped(train_idx=train_idx, 49 | append=True, 50 | ene_ref=f"e_ref.npy", 51 | force_ref=f"f_ref.npy", 52 | sys_dir=f"results") 53 | for i in range(nsys)], 54 | outlog='log.data', 55 | workdir='01.data', 56 | link_prev_files=['model.pth'] + [f"results" for i in range(nsys)], 57 | share_folder='share', 58 | link_share_files=[f'e_ref.npy' for i in range(nsys)] 59 | +[f'f_ref.npy' for i in range(nsys)]) 60 | 61 | # training 62 | 63 | train_cmd = " ".join([ 64 | "python -u /path/to/source/deepks/train/main.py", 65 | "train_input.yaml", 66 | "--restart old_model.pth"]) 67 | 68 | train_res = {"time_limit": "24:00:00", 69 | "mem_limit": 32, 70 | "numb_gpu": 1} 71 | 72 | task_train = BatchTask(cmds=train_cmd, 73 | outlog='log.train', 74 | workdir='02.train', 75 | resources=train_res, 76 | link_prev_files=[('model.pth', 'old_model.pth'), 77 | 'train_paths.raw', 'test_paths.raw'], 78 | share_folder = 'share', 79 | link_share_files=["train_input.yaml"]) 80 | 81 | # combine 82 | 83 | seq = Sequence([task_scf, task_data, task_train]) 84 | iterate = Iteration(seq, niter, init_folder='share/init', record_file='RECORD') 85 | 86 | if os.path.exists('RECORD'): 87 | iterate.restart() 88 | else: 89 | iterate.run() 90 | -------------------------------------------------------------------------------- /examples/legacy/iter_nn_new/share/e_ref.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/legacy/iter_nn_new/share/e_ref.npy -------------------------------------------------------------------------------- /examples/legacy/iter_nn_new/share/f_ref.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/legacy/iter_nn_new/share/f_ref.npy -------------------------------------------------------------------------------- /examples/legacy/iter_nn_new/share/init/model.pth: -------------------------------------------------------------------------------- 1 | ../../init_train/model.pth -------------------------------------------------------------------------------- /examples/legacy/iter_nn_new/share/raw_scf_input.yaml: -------------------------------------------------------------------------------- 1 | basis: ccpvdz 2 | systems: [mol_files.raw] 3 | model_file: model.pth 4 | dump_dir: results 5 | dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx] 6 | group: true 7 | verbose: 1 8 | scf_args: 9 | conv_tol: 1e-7 10 | conv_tol_grad: 3e-3 11 | level_shift: 0.1 12 | diis_space: 20 13 | conv_check: false 14 | -------------------------------------------------------------------------------- /examples/legacy/iter_nn_new/share/scf_input.yaml: -------------------------------------------------------------------------------- 1 | basis: ccpvdz 2 | systems: [mol_files.raw] 3 | model_file: model.pth 4 | dump_dir: results 5 | dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx] 6 | group: true 7 | verbose: 1 8 | scf_args: 9 | conv_tol: 1e-7 10 | conv_tol_grad: 3e-3 11 | level_shift: 0.1 12 | diis_space: 20 13 | conv_check: false 14 | penalty_terms: 15 | - type: coulomb 16 | required_labels: [dm] 17 | strength: 1 18 | random: true 19 | 20 | -------------------------------------------------------------------------------- /examples/legacy/iter_nn_new/share/train_input.yaml: -------------------------------------------------------------------------------- 1 | model_args: 2 | hidden_sizes: [100, 100, 100] 3 | output_scale: 100 4 | use_resnet: true 5 | actv_fn: mygelu 6 | data_args: 7 | batch_size: 16 8 | group_batch: 1 9 | e_name: l_e_delta 10 | f_name: l_f_delta 11 | extra_label: true 12 | preprocess_args: 13 | preshift: false 14 | prescale: false 15 | prefit_ridge: 1e1 16 | prefit_trainable: false 17 | train_args: 18 | ckpt_file: model.pth 19 | decay_rate: 0.7 20 | decay_steps: 1000 21 | display_epoch: 100 22 | force_factor: 0.1 23 | n_epoch: 10000 24 | start_lr: 0.0001 25 | train_paths: 26 | - train_paths.raw 27 | test_paths: 28 | - test_paths.raw 29 | -------------------------------------------------------------------------------- /examples/legacy/iter_nn_slurm/run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import os 5 | import sys 6 | import glob 7 | import numpy as np 8 | 9 | # sys.path.append('/path/to/source') 10 | import deepks 11 | from deepks.task.task import PythonTask 12 | from deepks.task.task import ShellTask 13 | from deepks.task.task import BatchTask 14 | from deepks.task.task import GroupBatchTask 15 | from deepks.task.workflow import Sequence 16 | from deepks.task.workflow import Iteration 17 | from deepks.scf.stats import collect_data 18 | 19 | 20 | niter = 20 21 | 22 | # Define Training 23 | nmodel = 4 24 | 25 | train_res = {"time_limit": "24:00:00", 26 | "mem_limit": 32, 27 | "numb_gpu": 1} 28 | 29 | train_cmd = "python -u /path/to/source/deepks/train/main.py input.yaml" 30 | 31 | batch_train = [BatchTask(cmds=train_cmd, 32 | workdir=f'task.{i:02}', 33 | share_folder="share", 34 | link_share_files=["input.yaml"], 35 | link_prev_files=['train_paths.raw', 'test_paths.raw']) 36 | for i in range(nmodel)] 37 | run_train = GroupBatchTask(batch_train, 38 | resources=train_res, 39 | outlog="log.train") 40 | 41 | post_train = ShellTask("ln -s task.00/model.pth .") 42 | 43 | clean_train = ShellTask("rm slurm-*.out") 44 | 45 | train_flow = Sequence([run_train, post_train, clean_train], workdir='00.train') 46 | 47 | 48 | # Define SCF 49 | ngroup = 24 50 | ntrain = 3000 51 | 52 | mol_files = np.loadtxt('share/mol_files.raw', dtype=str) 53 | group_files = [mol_files[i::ngroup] for i in range(ngroup)] 54 | 55 | envs = {"PYSCF_MAX_MEMORY": 32000} 56 | scf_res = {"cpus_per_task": 5, 57 | "time_limit": "24:00:00", 58 | "mem_limit": 32, 59 | "envs": envs} 60 | 61 | remote = {"work_path": '/home/yixiaoc/SCR/yixiaoc/tmp', 62 | "hostname": "della", 63 | "username": "yixiaoc", 64 | "port": 22} 65 | disp = {"context_type": 'ssh', 66 | "batch_type": 'slurm', 67 | "remote_profile": remote} 68 | 69 | cmd_templ = " ".join([ 70 | "python -u /path/to/source/deepks/scf/main.py", 71 | "{mol_files}", 72 | "-m ../model.pth", 73 | "-d ../results", 74 | "-B ccpvdz", 75 | "--verbose 1", 76 | "--conv-tol 1e-6", 77 | "--conv-tol-grad 3e-2" 78 | ]) 79 | 80 | batch_scf = [BatchTask(cmds=cmd_templ.format(mol_files=" ".join(gf)), 81 | workdir=f'task.{i:02}', 82 | backward_files=['log.scf', 'err']) 83 | for i, gf in enumerate(group_files)] 84 | run_scf = GroupBatchTask(batch_scf, 85 | dispatcher=disp, 86 | resources=scf_res, 87 | outlog="log.scf", 88 | link_prev_files=['model.pth'], 89 | forward_files=['model.pth'], 90 | backward_files=['results/*']) 91 | 92 | all_idx = np.loadtxt('share/index.raw', dtype=int) 93 | train_idx = all_idx[:ntrain] 94 | test_idx = all_idx[ntrain:] 95 | 96 | post_scf = PythonTask(collect_data, call_args=[train_idx, test_idx], 97 | call_kwargs={"sys_dir": "results", "ene_ref": "e_ref.npy"}, 98 | outlog='log.data', 99 | share_folder='share', 100 | link_share_files=['e_ref.npy']) 101 | 102 | clean_scf = ShellTask("rm slurm-*.out") 103 | 104 | scf_flow = Sequence([run_scf, post_scf, clean_scf], workdir='01.scf') 105 | 106 | 107 | # Group them together 108 | per_iter = Sequence([train_flow, scf_flow]) 109 | iterate = Iteration(per_iter, niter, init_folder='share/init', record_file='RECORD') 110 | 111 | if os.path.exists('RECORD'): 112 | iterate.restart() 113 | else: 114 | iterate.run() 115 | -------------------------------------------------------------------------------- /examples/legacy/iter_nn_slurm/run_res.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import os 5 | import sys 6 | import glob 7 | import numpy as np 8 | 9 | # sys.path.append('/path/to/source') 10 | import deepks 11 | from deepks.task.task import PythonTask 12 | from deepks.task.task import ShellTask 13 | from deepks.task.task import BatchTask 14 | from deepks.task.task import GroupBatchTask 15 | from deepks.task.workflow import Sequence 16 | from deepks.task.workflow import Iteration 17 | from deepks.scf.stats import collect_data 18 | 19 | 20 | niter = 5 21 | ntrain = 7000 22 | 23 | # Define Training 24 | nmodel = 4 25 | 26 | train_res = {"time_limit": "6:00:00", 27 | "mem_limit": 32, 28 | "numb_gpu": 1} 29 | 30 | train_cmd = "python -u /path/to/source/deepks/train/main.py input.yaml --restart ../old_model.pth" 31 | 32 | batch_train = [BatchTask(cmds=train_cmd, 33 | workdir=f'task.{i:02}', 34 | share_folder="share", 35 | link_share_files=["input.yaml"], 36 | link_prev_files=['train_paths.raw', 'test_paths.raw']) 37 | for i in range(nmodel)] 38 | run_train = GroupBatchTask(batch_train, 39 | resources=train_res, 40 | outlog="log.train", 41 | link_prev_files=[('model.pth', 'old_model.pth')]) 42 | 43 | post_train = ShellTask("ln -s task.00/model.pth .") 44 | 45 | clean_train = ShellTask("rm slurm-*.out") 46 | 47 | train_flow = Sequence([run_train, post_train, clean_train], workdir='00.train') 48 | 49 | 50 | # Define SCF 51 | ngroup = 12 52 | 53 | mol_files = np.loadtxt('share/mol_files.raw', dtype=str) 54 | group_files = [mol_files[i::ngroup] for i in range(ngroup)] 55 | 56 | envs = {"PYSCF_MAX_MEMORY": 32000} 57 | scf_res = {"cpus_per_task": 5, 58 | "time_limit": "6:00:00", 59 | "mem_limit": 32, 60 | "envs": envs} 61 | 62 | remote = {"work_path": '/home/yixiaoc/SCR/yixiaoc/tmp', 63 | "hostname": "della", 64 | "username": "yixiaoc", 65 | "port": 22} 66 | disp = {"context_type": 'ssh', 67 | "batch_type": 'slurm', 68 | "remote_profile": remote} 69 | 70 | cmd_templ = " ".join([ 71 | "python -u /path/to/source/deepks/scf/main.py", 72 | "{mol_files}", 73 | "-m ../model.pth", 74 | "-d ../results", 75 | "-B ccpvdz", 76 | "--verbose 1", 77 | "--conv-tol 1e-6", 78 | "--conv-tol-grad 3e-2" 79 | ]) 80 | 81 | batch_scf = [BatchTask(cmds=cmd_templ.format(mol_files=" ".join(gf)), 82 | workdir=f'task.{i:02}', 83 | backward_files=['log.scf', 'err']) 84 | for i, gf in enumerate(group_files)] 85 | run_scf = GroupBatchTask(batch_scf, 86 | dispatcher=disp, 87 | resources=scf_res, 88 | outlog="log.scf", 89 | link_prev_files=['model.pth'], 90 | forward_files=['model.pth'], 91 | backward_files=['results/*']) 92 | 93 | all_idx = np.loadtxt('share/index.raw', dtype=int) 94 | train_idx = all_idx[:ntrain] 95 | test_idx = all_idx[ntrain:] 96 | 97 | post_scf = PythonTask(collect_data, call_args=[train_idx, test_idx], 98 | call_kwargs={"sys_dir": "results", "ene_ref": "e_ref.npy"}, 99 | outlog='log.data', 100 | share_folder='share', 101 | link_share_files=['e_ref.npy']) 102 | 103 | clean_scf = ShellTask("rm slurm-*.out") 104 | 105 | scf_flow = Sequence([run_scf, post_scf, clean_scf], workdir='01.scf') 106 | 107 | 108 | # Group them together 109 | per_iter = Sequence([train_flow, scf_flow]) 110 | iterate = Iteration(per_iter, niter, init_folder='share/init', record_file='RECORD') 111 | 112 | if os.path.exists('RECORD'): 113 | iterate.restart() 114 | else: 115 | iterate.run() 116 | -------------------------------------------------------------------------------- /examples/legacy/iter_nn_slurm/share/e_ref.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/legacy/iter_nn_slurm/share/e_ref.npy -------------------------------------------------------------------------------- /examples/legacy/iter_nn_slurm/share/input.yaml: -------------------------------------------------------------------------------- 1 | model_args: 2 | hidden_sizes: [100, 100, 100] 3 | output_scale: 100 4 | use_resnet: true 5 | actv_fn: gelu 6 | data_args: 7 | batch_size: 1 8 | group_batch: 16 9 | e_name: l_e_delta 10 | d_name: [dm_eig] 11 | preprocess_args: 12 | preshift: false 13 | prescale: false 14 | prefit_ridge: 1e0 15 | prefit_trainable: false 16 | train_args: 17 | ckpt_file: model.pth 18 | decay_rate: 0.96 19 | decay_steps: 500 20 | display_epoch: 100 21 | n_epoch: 30000 22 | start_lr: 0.0001 23 | train_paths: 24 | - train_paths.raw 25 | test_paths: 26 | - test_paths.raw 27 | -------------------------------------------------------------------------------- /examples/legacy/iter_nn_slurm/share/test.sh: -------------------------------------------------------------------------------- 1 | mkdir test 2 | python /path/to/source/deepks/train/test.py -m model.pth -d `cat train_paths.raw` -o test/train 3 | python /path/to/source/deepks/train/test.py -m model.pth -d `cat test_paths.raw` -o test/test 4 | -------------------------------------------------------------------------------- /examples/legacy/train_active_learning/run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import os 5 | import sys 6 | from glob import glob 7 | import numpy as np 8 | 9 | # sys.path.append('/path/to/source') 10 | import deepks 11 | from deepks.task.task import PythonTask 12 | from deepks.task.task import ShellTask 13 | from deepks.task.task import BatchTask 14 | from deepks.task.task import GroupBatchTask 15 | from deepks.task.workflow import Sequence 16 | from deepks.task.workflow import Iteration 17 | 18 | 19 | # define key parameters 20 | nsel = 200 21 | nmodel = 4 22 | niter = 21 23 | 24 | # define training task 25 | train_res = {"time_limit": "24:00:00", 26 | "mem_limit": 32, 27 | "numb_gpu": 1} 28 | 29 | disp = {"context_type": 'local', 30 | "batch_type": 'slurm'} 31 | 32 | train_cmd = "python -u /path/to/source/deepks/train/main.py input.yaml" 33 | 34 | batch_train = [BatchTask(cmds=train_cmd, 35 | workdir=f'model.{i:02}', 36 | share_folder="share", 37 | link_share_files=["input.yaml"]) 38 | for i in range(nmodel)] 39 | task_train = GroupBatchTask(batch_train, 40 | resources=train_res, 41 | dispatcher=disp, 42 | outlog="log.train", 43 | errlog="err.train", 44 | link_prev_files=[('new_train_paths.raw', 'train_paths.raw'), 45 | ('new_test_paths.raw', 'test_paths.raw')]) 46 | 47 | 48 | # define testing task 49 | test_cmd = "srun -N 1 -t 1:00:00 --gres=gpu:1 bash test_model.sh 1> log.test 2> err.test" 50 | task_test = ShellTask(test_cmd, 51 | share_folder="share", 52 | link_share_files=["test_model.sh"]) 53 | 54 | 55 | #define selecting task 56 | def select_data(nsel): 57 | paths = glob("model.*") 58 | old_trn = np.loadtxt("train_paths.raw", dtype=str) 59 | old_tst = np.loadtxt("test_paths.raw", dtype=str) 60 | trn_res = np.stack([np.loadtxt(f"{m}/test/train.all.out")[:,1] for m in paths], -1) 61 | tst_res = np.stack([np.loadtxt(f"{m}/test/test.all.out")[:,1] for m in paths], -1) 62 | 63 | tst_std = np.std(tst_res, axis=-1) 64 | order = np.argsort(tst_std)[::-1] 65 | sel = order[:nsel] 66 | rst = np.sort(order[nsel:]) 67 | 68 | new_trn = np.concatenate([old_trn, old_tst[sel]]) 69 | new_tst = old_tst[rst] 70 | np.savetxt("new_train_paths.raw", new_trn, fmt="%s") 71 | np.savetxt("new_test_paths.raw", new_tst, fmt="%s") 72 | 73 | task_select = PythonTask(select_data, call_args=[nsel]) 74 | 75 | 76 | # combine them together 77 | iterate = Iteration([task_train, task_test, task_select], niter, init_folder='share/init', record_file='RECORD') 78 | 79 | if os.path.exists('RECORD'): 80 | iterate.restart() 81 | else: 82 | iterate.run() 83 | -------------------------------------------------------------------------------- /examples/legacy/train_active_learning/share/input.yaml: -------------------------------------------------------------------------------- 1 | model_args: 2 | hidden_sizes: [100, 100, 100] 3 | output_scale: 100 4 | use_resnet: true 5 | actv_fn: gelu 6 | data_args: 7 | batch_size: 1 8 | group_batch: 16 9 | e_name: l_e_delta 10 | d_name: [dm_eig, se_eig, fe_eig] 11 | preprocess_args: 12 | preshift: false 13 | prescale: false 14 | prefit_ridge: 1e0 15 | prefit_trainable: false 16 | train_args: 17 | ckpt_file: model.pth 18 | decay_rate: 0.96 19 | decay_steps: 500 20 | display_epoch: 100 21 | n_epoch: 50000 22 | start_lr: 0.0001 23 | train_paths: 24 | - ../train_paths.raw 25 | test_paths: 26 | - ../test_paths.raw 27 | -------------------------------------------------------------------------------- /examples/legacy/train_active_learning/share/test_model.sh: -------------------------------------------------------------------------------- 1 | for fd in model.*; do mkdir $fd/test; done 2 | 3 | echo training set 4 | python /path/to/source/deepks/train/test.py -m model*/model.pth -d `cat train_paths.raw` -o test/train -D dm_eig se_eig fe_eig 5 | 6 | echo testing set 7 | python /path/to/source/deepks/train/test.py -m model*/model.pth -d `cat test_paths.raw` -o test/test -D dm_eig se_eig fe_eig 8 | -------------------------------------------------------------------------------- /examples/train_input/extended.yaml: -------------------------------------------------------------------------------- 1 | model_args: 2 | hidden_sizes: [100, 100, 100] 3 | output_scale: 100 4 | use_resnet: true 5 | actv_fn: gelu 6 | data_args: 7 | batch_size: 1 8 | group_batch: 16 9 | e_name: l_e_delta 10 | d_name: [dm_eig, se_eig, fe_eig] 11 | preprocess_args: 12 | preshift: false 13 | prescale: false 14 | prefit_ridge: 1e0 15 | prefit_trainable: false 16 | train_args: 17 | ckpt_file: model.pth 18 | decay_rate: 0.96 19 | decay_steps: 500 20 | display_epoch: 100 21 | n_epoch: 50000 22 | start_lr: 0.0001 23 | train_paths: 24 | - train_paths.raw 25 | test_paths: 26 | - test_paths.raw 27 | -------------------------------------------------------------------------------- /examples/train_input/force.yaml: -------------------------------------------------------------------------------- 1 | model_args: 2 | hidden_sizes: [100, 100, 100] 3 | output_scale: 100 4 | use_resnet: true 5 | actv_fn: mygelu 6 | data_args: 7 | batch_size: 16 8 | group_batch: 1 9 | e_name: l_e_delta 10 | d_name: dm_eig 11 | f_name: l_f_delta 12 | # gvx_name: grad_vx # experimental dm training 13 | extra_label: true 14 | conv_name: conv 15 | preprocess_args: 16 | preshift: false 17 | prescale: false 18 | prefit_ridge: 1e1 19 | prefit_trainable: false 20 | train_args: 21 | ckpt_file: model.pth 22 | decay_rate: 0.5 23 | decay_steps: 1000 24 | display_epoch: 100 25 | force_factor: 0.1 26 | n_epoch: 5000 27 | start_lr: 0.0001 28 | train_paths: 29 | - data_train/* 30 | test_paths: 31 | - data_test/* 32 | -------------------------------------------------------------------------------- /examples/train_input/gelu.yaml: -------------------------------------------------------------------------------- 1 | model_args: 2 | hidden_sizes: [100, 100, 100] 3 | output_scale: 100 4 | use_resnet: true 5 | actv_fn: gelu 6 | data_args: 7 | batch_size: 1 8 | group_batch: 16 9 | e_name: l_e_delta 10 | d_name: [dm_eig] 11 | preprocess_args: 12 | preshift: false 13 | prescale: false 14 | prefit_ridge: 1e1 15 | prefit_trainable: false 16 | train_args: 17 | ckpt_file: model.pth 18 | decay_rate: 0.98 19 | decay_steps: 500 20 | display_epoch: 100 21 | n_epoch: 100000 22 | start_lr: 0.0001 23 | train_paths: 24 | - train_paths.raw 25 | test_paths: 26 | - test_paths.raw 27 | -------------------------------------------------------------------------------- /examples/train_input/restart.yaml: -------------------------------------------------------------------------------- 1 | model_args: 2 | hidden_sizes: [100, 100, 100] 3 | output_scale: 100 4 | use_resnet: true 5 | actv_fn: gelu 6 | data_args: 7 | batch_size: 1 8 | group_batch: 16 9 | e_name: l_e_delta 10 | d_name: [dm_eig] 11 | preprocess_args: 12 | preshift: false 13 | prescale: false 14 | prefit_ridge: 1e0 15 | prefit_trainable: false 16 | train_args: 17 | ckpt_file: model.pth 18 | decay_rate: 0.5 19 | decay_steps: 1000 20 | display_epoch: 100 21 | n_epoch: 5000 22 | start_lr: 0.0001 23 | train_paths: 24 | - train_paths.raw 25 | test_paths: 26 | - test_paths.raw 27 | -------------------------------------------------------------------------------- /examples/water_cluster/.gitignore: -------------------------------------------------------------------------------- 1 | iter.* 2 | share 3 | log.* 4 | err.* 5 | RECORD 6 | PID 7 | test_results 8 | -------------------------------------------------------------------------------- /examples/water_cluster/README.md: -------------------------------------------------------------------------------- 1 | # Example of water cluster 2 | 3 | We provide here a detailed example on generating a DeePHF or DeePKS functional for water clusters, and demonstrate its generalizability with a test on proton transfer of a water hexamer ring. 4 | 5 | Here we take `args.yaml` as the configuration file. The iteration can be run directly by execute the [`./run.sh`](./run.sh) file, which contains the following lines: 6 | ```bash 7 | nohup python -u -m deepks iterate args.yaml >> log.iter 2> err.iter & 8 | echo $! > PID 9 | ``` 10 | that runs the iterative learning procedure in background and record its PID in the designated file. 11 | Note that we use `python -u -m deepks` to turn off python's output buffer. You can also use `deepks` or `dks` directly if you have installed it properly. 12 | 13 | Here we are using Slurm to schedule jobs. If Slurm is not available, please execute [`./run_shell.sh`](./run_shell.sh) to run on local machine. In the following section we provide a work through on how to write the arguments for deepks input in the [`args.yaml`](./args.yaml). You can also take a look at it for explanation on each specific parameters. 14 | 15 | ## System preparation 16 | 17 | We use randomly generated water monomers, dimers and trimers as training datasets. Each dataset contains 100 near equilibrium configurations. We also include 50 tetramers as a validation dataset. We use energy and force as labels. The reference values are given by CCSD calculation with cc-pVDZ basis. The system configurations and corresponding labels are grouped into different folders by the number of atoms, follow the convention described in [another example](../water_single/README.md). Note that the default length unit in deepks is Bohr. The systems we provided here are in Angstrom, so we add a `unit.raw` file containing "Angstrom" in each system folder to specify the unit different from default. The path to the folders can be specified in the config file as follows: 18 | ```yaml 19 | systems_train: # can also be files that containing system paths 20 | - ./systems/train.n[1-3] 21 | systems_test: # if empty, use the last system of training set 22 | - ./systems/valid.n4 23 | ``` 24 | 25 | ## Initialization (DeePHF model) 26 | 27 | As a first step, we need to train an energy model as the starting point of the iterative learning procedure. This consists of two steps. First, we solve the systems using the baseline method such as HF or PBE and dump descriptors needed for training the energy model. Second, we conduct the training from scratch using the previously dumped descriptors. If there is already an existing model, this step can be skipped, by provide the path of the model to the `init_model` key. 28 | 29 | The energy model generated in this step is also a ready-to-use DeePHF model, saved at `iter.init/01.train/model.pth`. If self-consistency is not needed, the rest iteration steps can be ignored. We do not use forces as labels when training the energy model in this example. 30 | 31 | The parameters of the init SCF calculation is specified under the `init_scf` key. The same set of parameters is also accepted as a standalone file by the `deepks scf` command when running SCF calculations directly. We use cc-pVDZ as the calculation basis. The required fields to be dumped are `dm_eig` for descriptors and `l_e_delta` for reference correction energies as labels. In addition, we also include `e_tot` for total energy, `conv` for a record of convergence. 32 | ```yaml 33 | dump_fields: [dm_eig, l_e_delta, conv, e_tot] 34 | ``` 35 | Additional parameters for molecule and SCF calculation can also be provided to `mol_args` and `scf_args` keys, and will be directly passed to corresponding interfaces in PySCF. 36 | 37 | The parameters of the init training is specified under the `init_train` key. Similarly, the parameters can also be passed to `deepks train` command as a standalone file. In `model_args`, we set the construction of the neural network model with three hidden layers and 100 neurons per layer, using GELU activation function and skip connections. We also scale the output correction energies by a factor of 100 so that it is of order one and easier to learn. In `preprocess_args`, the descriptors are set to be preprocessed to have zero mean on the training set. A prefitted ridge regression with penalty strength 10 is also added to the model to speed up training. We set in `data_args` the batch size to be 16 and in `train_args` the total number of training epochs to be 50000. The learning rate starts at 3e-4 and decays by a factor of 0.96 for every 500 steps. 38 | 39 | ## Iterative learning (DeePKS model) 40 | 41 | For self-consistency, we take the model acquired in last step and perform several additional iterations of SCF calculation and NN training. The number of iterations is set in the `n_iter` key to be 10. If it is set to 0, no iteration will be performed, which gives the DeePHF model. In the iterative learning procedure, we also include forces as labels to improve accuracy. 42 | 43 | The SCF parameters are provided in the `scf_input` key, following the same rules as the `init_scf` key. In order to use forces as labels, we added additional `grad_vx` for the gradients of descriptors and `l_f_delta` for reference correction forces. `f_tot` is also included for the total force results. 44 | ```yaml 45 | dump_fields: [conv, e_tot, dm_eig, l_e_delta, f_tot, grad_vx, l_f_delta] 46 | ``` 47 | Due to the complexity of the neural network functional, we use looser (but still accurate enough) convergence criteria in `scf_args`, with `conv_tol` to be 1e-6. 48 | 49 | The training parameters are provided in the `train_input` key, similar to `init_train`. But since we are restarting from the existing model, no `model_args` is needed, and the preprocessing procedure can be turned off. In addition, we add `extra_label: true` in `data_args` and `force_factor: 1` in `train_args` to enable using forces in training. The total number of training epochs is also reduced to 5000. The learning rate starts as 1e-4 and decays by a factor of 0.5 for every 1000 steps. 50 | 51 | ## Machine settings 52 | 53 | How the SCF and training tasks are executed is specified in `scf_machine` and `train_machine`, respectively. Currently, both the initial and the following iterations share the same machine settings. In this example, we run our tasks on local computing cluster with Slurm as the job scheduler. The platform to run the tasks is specified under the `dispatcher` key, and the computing resources assigned to each task is specified under `resources`. The setting of this part differs on every computing platform. We provide here our `training_machine` settings as an example: 54 | ```yaml 55 | dispatcher: 56 | context: local 57 | batch: slurm # set to "shell" to run on local machine 58 | remote_profile: null # unnecessary in local context 59 | resources: 60 | time_limit: '24:00:00' 61 | cpus_per_task: 4 62 | numb_gpu: 1 63 | mem_limit: 8 # gigabyte 64 | python: "python" # use python in path 65 | ``` 66 | where we assign four CPU cores and one GPU to the training task, and set its time limit to be 24 hours and memory limit to be 8GB. The detailed settings available for `dispatcher` and `resources` can be found in the document of DP-GEN software, with a slightly different interface. 67 | 68 | In case there's no Slurm scheduler system, DeePKS-kit can also be run on a local machine with vanilla shell scripts, simply by setting `batch: shell`. Please check [`shell.yaml`](./shell.yaml) for an example. In that case, `resources` will be ignored and all available resources on the machine will be used. Support for more scheduler systems will also be implemented in the future. 69 | 70 | ## Testing the model 71 | 72 | During each iteration of the learning procedure, a brief summary on the accuracy of the SCF calculation can be found in `iter.xx/00.scf/log.data`. Average energy and force (if applicable) errors are shown for both training and validation dataset. The results of the SCF calculations is also stored in `iter.xx/00.scf/data_train` and `iter.xx/00.scf/data_test` grouped by training and testing systems. 73 | 74 | After we finished our 10 iterations, the resulted DeePKS model can be found at `iter.09/01.train/model.pth`. The model can be used in either a python script creating the extended PySCF class, or directly the `deepks scf` command. As a testing example, we run the SCF calculation using the learned DeePKS model on the simultaneous six proton transfer path of a water hexamer ring. 75 | The command can be found in [`test.sh`](./test.sh). 76 | The results of each configuration during the proton transfer are grouped in the `test_result` folder. 77 | 78 | We can see that all the predicted energy falls within the chemical accuracy range of the reference value given by the CCSD calculation. We note that none of the training dataset includes dissociated configurations in the proton transfer case. The DeePKS model trained on up to three water molecules exhibits good transferability, even in the bond breaking case. -------------------------------------------------------------------------------- /examples/water_cluster/args.yaml: -------------------------------------------------------------------------------- 1 | # all arguments are flatten into this file 2 | # they can also be splitted into separate files and referenced here 3 | n_iter: 10 4 | 5 | # training and testing systems 6 | systems_train: # can also be files that containing system paths 7 | - ./systems/train.n[1-3] 8 | 9 | systems_test: # if empty, use the last system of training set 10 | - ./systems/valid.n4 11 | 12 | # directory setting 13 | workdir: "." 14 | share_folder: "share" # folder that stores all other settings 15 | 16 | # scf settings 17 | scf_input: # can also be specified by a separete file 18 | basis: ccpvdz 19 | # this is for force training 20 | # the following properties will be dumped in data folder 21 | # please refer to https://arxiv.org/abs/2012.14615 for detailed explaination of each fields 22 | dump_fields: [atom, e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta] 23 | verbose: 1 24 | # parameters that will be passed directly to pyscf Mol class 25 | mol_args: 26 | incore_anyway: True 27 | # parameters that will be passed directly to pyscf SCF class 28 | scf_args: 29 | conv_tol: 1e-6 30 | conv_tol_grad: 3e-2 31 | level_shift: 0.1 32 | diis_space: 20 33 | conv_check: false # pyscf conv_check has a bug 34 | 35 | scf_machine: 36 | # # of systems that will be in one task, default is 1 37 | # task corresponds to a set of commands, and is the smallest unit to be tracked 38 | sub_size: 1 39 | # 2 tasks will be gathered into one group and submitted together 40 | # group correspond to a job submitted to schedule system 41 | group_size: 2 42 | # if larger than 1, run n tasks parallelly in one group (one job) 43 | ingroup_parallel: 1 44 | # the parameters determining the machine settings that the jobs are running on 45 | dispatcher: 46 | # "local" to run on local machine, or "ssh" to run on a remote machine 47 | context: local 48 | # "slurm" to use slurm scheduler system, or "shell" to just use shell 49 | batch: slurm 50 | # only needed when using "ssh" in context 51 | # pass a dict like {username: USERNAME, password: PASSWORD, work_path: /path/to/tmp/folder} 52 | remote_profile: null 53 | # the parameters determining the resources allocated for each job (group of tasks) 54 | # only needed when batch is set to "slurm" 55 | # for shell users, will automatically use all resources available 56 | resources: 57 | # only set to larger than 1 if parallel in multiple nodes with `ingroup_parallel` 58 | # otherwise please keep to 1 since pyscf does not support mpi and can only run on a single node 59 | numb_node: 1 60 | time_limit: '24:00:00' 61 | cpus_per_task: 8 62 | mem_limit: 8 #GB 63 | # environment variables 64 | envs: 65 | PYSCF_MAX_MEMORY: 8000 #MB, increase from default 4G to 8G to match the mem_limit above 66 | # resources for each sub task in jobs (groups of tasks) 67 | # only needed when ingroup_parallel is larger than 1 68 | # the resources are reallocated between parallel tasks 69 | sub_res: 70 | cpus_per_task: 8 71 | python: "python" # use python in path 72 | 73 | # training settings 74 | train_input: 75 | # model_args is ignored, since this is used as restart 76 | # see init_train for potential model_args 77 | data_args: 78 | # training batch size, 16 is recommended 79 | batch_size: 16 80 | # if larger than 1, n batch will be grouped together to form a larger one 81 | # final batch size would be group_bath * batch_size 82 | # only needed when a lot of systems have only one datapoint hence the batch size can only be 1 83 | group_batch: 1 84 | # if set to true, will try to find force labels and use them in training 85 | extra_label: true 86 | # if set to true, will read the convergence data from conv_name 87 | # and only use converged datapoints to train 88 | conv_filter: true 89 | conv_name: conv 90 | # to speed up training, deepks support first normalize the data (preshift and prescale) 91 | # and do a linear regression on the whole training set as prefitting 92 | preprocess_args: 93 | preshift: false # restarting model already shifted. Will not recompute shift value 94 | prescale: false # same as above 95 | # prefitting is by default enabled 96 | prefit_ridge: 1e1 # the ridge factor used in linear regression 97 | prefit_trainable: false # make the linear regression fixed during the training 98 | train_args: 99 | # the start learning rate, will decay later 100 | start_lr: 0.0001 101 | # lr will decay a factor of `decay_rate` every `decay_steps` epoches 102 | decay_rate: 0.5 103 | decay_steps: 1000 104 | # show training results every n epoch 105 | display_epoch: 100 106 | # the prefactor multiplied infront of the force part of the loss 107 | force_factor: 1 108 | # total number of epoch needed in training 109 | n_epoch: 5000 110 | 111 | train_machine: 112 | # for training, no tasks or groups are needed since there's only one task 113 | # the dispatcher settings are same as above 114 | dispatcher: 115 | context: local 116 | batch: slurm 117 | remote_profile: null # use lazy local 118 | # resources settings are also same as above 119 | resources: 120 | time_limit: '24:00:00' 121 | cpus_per_task: 4 122 | # using gpu in training, current only support 1 123 | numb_gpu: 1 124 | mem_limit: 8 #GB 125 | python: "python" # use python in path 126 | 127 | # init settings 128 | init_model: false # do not use existing model in share_folder/init/model.pth 129 | 130 | # the first scf iteration, needed if init_model is false 131 | # possible settings are same as scf_input 132 | init_scf: 133 | basis: ccpvdz 134 | dump_fields: [atom, e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta] 135 | verbose: 1 136 | mol_args: 137 | incore_anyway: True 138 | scf_args: 139 | conv_tol: 1e-8 140 | conv_check: false # pyscf conv_check has a bug 141 | 142 | # the first scf iteration, needed if init_model is false 143 | # most settings are same as scf_input but model_args will be specified here 144 | init_train: 145 | # whether to fit element-wise energy constant from the training data 146 | # will require `dump_fields` contain `atom` if set to true 147 | fit_elem: false # this is the default 148 | # necessary as this is init training 149 | model_args: 150 | # the number of *hidden* neurons 151 | # note the first (n_descriptor) and last (1) layer is not included here 152 | hidden_sizes: [100, 100, 100] 153 | # the output will be devided by 100 before comparing with labels, to improve training 154 | output_scale: 100 155 | # use skip connection between layers if the sizes are same 156 | use_resnet: true 157 | # gelu generally performs better than others 158 | actv_fn: gelu 159 | # whether to use a predefined embedding function 160 | # to further symmetrize the eigenvalues as descriptors 161 | # add embedding can make the energy surface smooth, hence improve convergence 162 | # but may slightly reduce the accuracy (especially in generalization) 163 | # for water we do not use it, if you encounter convergence problem, set it to 164 | # embedding: thermal 165 | embedding: null 166 | # if `fit_elem` is true, set this will use user defined 167 | # element energy constant, instead of fitting from data. 168 | # can be an absolute path to the file, or a length 2 list 169 | # containing element charges and constants, like 170 | # [[1, 8], [-0.08, -0.04]] 171 | elem_table: null 172 | # the rest are the same as abpve 173 | data_args: 174 | batch_size: 16 175 | group_batch: 1 176 | preprocess_args: 177 | preshift: true # init model will shift the input descriptors to mean zero 178 | prescale: false 179 | prefit_ridge: 1e1 180 | prefit_trainable: false 181 | # following are suggested parameters for initial training 182 | # note in the deepks-kit paper the training curve shown use a different set of parameters 183 | # the paper parameters take an unnecessary length of time and is no longer suggested 184 | train_args: 185 | decay_rate: 0.95 # 0.96 in paper example training curve 186 | decay_steps: 300 # 500 in paper example training curve 187 | display_epoch: 100 188 | n_epoch: 15000 # 50000 in paper example training curve 189 | start_lr: 0.0003 190 | 191 | # other settings 192 | cleanup: false 193 | strict: true 194 | -------------------------------------------------------------------------------- /examples/water_cluster/run.sh: -------------------------------------------------------------------------------- 1 | nohup python -u -m deepks iterate args.yaml >> log.iter 2> err.iter & 2 | echo $! > PID 3 | -------------------------------------------------------------------------------- /examples/water_cluster/run_shell.sh: -------------------------------------------------------------------------------- 1 | nohup python -u -m deepks iterate args.yaml shell.yaml >> log.iter 2> err.iter & 2 | echo $! > PID 3 | -------------------------------------------------------------------------------- /examples/water_cluster/shell.yaml: -------------------------------------------------------------------------------- 1 | # to use this file, simply add its name as another argument 2 | # in the command line after the main args.yaml 3 | # for example, `deepks iterate args.yaml shell.yaml` 4 | # this overwrite the settings by those specified in this file 5 | 6 | scf_machine: 7 | # every system will be run as a separate command (a task) 8 | sub_size: 1 9 | # 2 tasks will be gathered into one group and submitted together as a shell script 10 | # all shell scirpt will be executed at same time 11 | # hence in parallel and share the whole machine's resources 12 | # you may want to set this as a large number 13 | # because the number of tasks run at same time would be nsystems / (sub_size * group_size) 14 | group_size: 2 15 | dispatcher: 16 | context: local 17 | batch: shell # set to shell to run on local machine 18 | remote_profile: null # not needed in local case 19 | # resources are no longer needed, other than the envs can still be set here 20 | resources: 21 | envs: 22 | PYSCF_MAX_MEMORY: 8000 # increase from 4G to 8G 23 | python: "python" # use python in path 24 | 25 | train_machine: 26 | dispatcher: 27 | context: local 28 | batch: shell # same as above, use shell to run on local machine 29 | remote_profile: null # use lazy local 30 | python: "python" # use python in path 31 | # resources are no longer needed, and the task will use gpu automatically if there is one 32 | 33 | -------------------------------------------------------------------------------- /examples/water_cluster/systems/test.n6/atom.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/test.n6/atom.npy -------------------------------------------------------------------------------- /examples/water_cluster/systems/test.n6/energy.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/test.n6/energy.npy -------------------------------------------------------------------------------- /examples/water_cluster/systems/test.n6/force.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/test.n6/force.npy -------------------------------------------------------------------------------- /examples/water_cluster/systems/test.n6/unit.raw: -------------------------------------------------------------------------------- 1 | Angstrom 2 | -------------------------------------------------------------------------------- /examples/water_cluster/systems/train.n1/atom.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n1/atom.npy -------------------------------------------------------------------------------- /examples/water_cluster/systems/train.n1/energy.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n1/energy.npy -------------------------------------------------------------------------------- /examples/water_cluster/systems/train.n1/force.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n1/force.npy -------------------------------------------------------------------------------- /examples/water_cluster/systems/train.n1/unit.raw: -------------------------------------------------------------------------------- 1 | Angstrom 2 | -------------------------------------------------------------------------------- /examples/water_cluster/systems/train.n2/coord.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n2/coord.npy -------------------------------------------------------------------------------- /examples/water_cluster/systems/train.n2/energy.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n2/energy.npy -------------------------------------------------------------------------------- /examples/water_cluster/systems/train.n2/force.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n2/force.npy -------------------------------------------------------------------------------- /examples/water_cluster/systems/train.n2/type.raw: -------------------------------------------------------------------------------- 1 | O 2 | H 3 | H 4 | O 5 | H 6 | H 7 | -------------------------------------------------------------------------------- /examples/water_cluster/systems/train.n2/unit.raw: -------------------------------------------------------------------------------- 1 | Angstrom 2 | -------------------------------------------------------------------------------- /examples/water_cluster/systems/train.n3/coord.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n3/coord.npy -------------------------------------------------------------------------------- /examples/water_cluster/systems/train.n3/energy.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n3/energy.npy -------------------------------------------------------------------------------- /examples/water_cluster/systems/train.n3/force.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/train.n3/force.npy -------------------------------------------------------------------------------- /examples/water_cluster/systems/train.n3/type.raw: -------------------------------------------------------------------------------- 1 | O 2 | H 3 | H 4 | O 5 | H 6 | H 7 | O 8 | H 9 | H 10 | -------------------------------------------------------------------------------- /examples/water_cluster/systems/train.n3/unit.raw: -------------------------------------------------------------------------------- 1 | Angstrom 2 | -------------------------------------------------------------------------------- /examples/water_cluster/systems/valid.n4/coord.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/valid.n4/coord.npy -------------------------------------------------------------------------------- /examples/water_cluster/systems/valid.n4/energy.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/valid.n4/energy.npy -------------------------------------------------------------------------------- /examples/water_cluster/systems/valid.n4/force.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_cluster/systems/valid.n4/force.npy -------------------------------------------------------------------------------- /examples/water_cluster/systems/valid.n4/type.raw: -------------------------------------------------------------------------------- 1 | O 2 | H 3 | H 4 | O 5 | H 6 | H 7 | O 8 | H 9 | H 10 | O 11 | H 12 | H 13 | -------------------------------------------------------------------------------- /examples/water_cluster/systems/valid.n4/unit.raw: -------------------------------------------------------------------------------- 1 | Angstrom 2 | -------------------------------------------------------------------------------- /examples/water_cluster/test.sh: -------------------------------------------------------------------------------- 1 | deepks scf share/scf_input.yaml -m iter.09/01.train/model.pth -s systems/test.n6 -F e_tot f_tot conv rdm -d test_results -G 2 | -------------------------------------------------------------------------------- /examples/water_single/.gitignore: -------------------------------------------------------------------------------- 1 | */iter.* 2 | */share 3 | */log.* 4 | */err.* 5 | */RECORD 6 | */PID 7 | -------------------------------------------------------------------------------- /examples/water_single/README.md: -------------------------------------------------------------------------------- 1 | # Example for water 2 | 3 | This is an example on how to use `deepks` library to train a energy functional for water molecules. The sub-folders are grouped as following: 4 | 5 | - `systems` contains all data that has been prepared in `deepks` format. 6 | - `init` contains input files used to train a (perturbative) energy model (DeePHF). 7 | - `iter` contains input files used to train a self consistent model iteratively (DeePKS). 8 | - `withdens` contains input files used to train a SCF model with density labels. 9 | 10 | 11 | ## Prepare data 12 | 13 | To prepare data, please first note that `deepks` use the atomic units by default but will switch to Angstrom(Å) as length unit when using xyz files as systems. 14 | 15 | Property | Unit 16 | --- | :---: 17 | Length | Bohr (Å if from xyz) 18 | Energy | $E_h$ (Hartree) 19 | Force | $E_h$/Bohr ($E_h$/Å if from xyz) 20 | 21 | `deepks` accepts data in three formats. 22 | 23 | - **single `xyz` files** with properties saved as separate files sharing same base name. 24 | e.g. for `0000.xyz`, its energy can be saved as `0000.energy.npy`, and forces as `0000.force.npy`, density matrix as `0000.dm.npy` in the same folder. 25 | - **grouped into folders** with same number of atoms. 26 | Such folder should contain an `atom.npy` that has shape `n_frames x n_atoms x 4` and the four elements correspond to the nuclear charge of the atom and its three spacial coordinates. 27 | Other properties can be provided as separate files like `energy.npy` and `force.npy`. 28 | - **grouped with explicit `type.raw` file** with all frames have same type of elements. 29 | This is similar as above, only that `atom.npy` is substituted by `coord.npy` containing pure special coordinates and a `type.raw` containing the element type for all the frames of this system. This format is very similar to the one used in DeePMD-Kit, but the `type.raw` must contains real element types here. 30 | 31 | Note the property files are optional. For pure SCF calculation, they are not needed. But in order to train a model, they are needed as labels. 32 | 33 | The two grouped data formats can be converted from the xyz format by using [this script](../../scripts/convert_xyz.py). As an example, the data in `systems` folder is created using the following command. 34 | ``` 35 | python ../../scripts/convert_xyz.py some/path/to/all/*.xyz -d systems -G 300 -P group 36 | ``` 37 | 38 | 39 | ## Train an energy model 40 | 41 | To train a perturbative energy model is a pure machine learning task. Please see [DeePHF paper](https://arxiv.org/pdf/2005.00169.pdf) for a detailed explanation of the construction of the descriptors. Here we provide two sub-commands. `deepks scf` can do the Hartree-Fock calculation and save the descriptor (`dm_eig`) as well as labels (`l_e_delta` for energy and `l_f_delta` for force) automatically. `deepks train` can use the dumped descriptors and labels to train a neural network model. 42 | 43 | To further simplify the procedure, we can combine the two steps together and use `deepks iterate` to run them sequentially. The required input files and execution scripts can be found in `init` folder. There `machines.yaml` specifies the resources needed for the calculations. `params.yaml` specifies the parameters needed for the Hartree-Fock calculation and neural network training. `systems.yaml` specifies the data needed for training and testing. Note the name `init` is because it also serves as an initialization step of the self consistent training described below. For same reason, the `niter` attribute in `params.yaml` is set to 0, to avoid iterative training. 44 | 45 | As shown in `run.sh`, the input files can be loaded and run by 46 | ``` 47 | deepks iterate machines.yaml params.yaml systems.yaml 48 | ``` 49 | where `deepks` is a shortcut for `python -m deepks`. Or one can directly use `./run.sh` to run it in background. Make sure you are in `init` folder before you run the command. 50 | 51 | 52 | ## Train a self consistent model 53 | 54 | To train a self consistent model we follow the iterative approach described in [DeePKS paper](https://arxiv.org/pdf/2008.00167.pdf). We provide `deepks iterate` as a tool to do the iteration automatically. Same as above, the example input file and execution scripts can be found in `iter` folder. Note here instead of splitting the input file into three, we combined all input settings in one `args.yaml` file, to show that `deepks iterate` can take variable number of input files. The file provided at last will have highest priority. 55 | 56 | For each iteration, there will be four steps using four corresponding tools provided by `deepks`. Each step would correspond to a row in `RECORD` file, used to indicate which steps have finished. It would have three numbers. The first one correspond to the iteration number. The second one correspond to the sub-folder in the iteration and the third correspond to step in that folder. 57 | 58 | - `deepks scf` (`X 0 0`): do the SCF calculation with given model and save the results 59 | - `deepks stats` (`X 0 1`): check the SCF result and print convergence and accuracy 60 | - `deepks train` (`X 1 0`): train a new model using the old one as starting point 61 | - `deepks test` (`X 1 1`): test the model on all data to see the pure fitting error 62 | 63 | To run the iteration, again, use `./run.sh` or the following command 64 | ``` 65 | deepks iterate args.yaml 66 | ``` 67 | Make sure you are in `iter` folder before you run the command. 68 | 69 | One can check `iter.*/00.scf/log.data` for stats of SCF results, `iter*/01.train/log.train` for training curve and `iter*/01.train/log.test` for model prediction of $E_\delta$ (e_delta). 70 | 71 | 72 | ## Train a self consistent model with density labels 73 | 74 | We provide in `withdens` folder a set of inputs of using density labels during the iterative training (as additional penalty terms in the Hamiltonian). We again follow the [DeePKS paper](https://arxiv.org/pdf/2008.00167.pdf) to add first a randomized penalty using Coulomb loss for 5 iterations and then remove it and relax for another 5 iterations. 75 | 76 | Most of the inputs are same as the normal iterative training case described in the last section, which we put in the `base.yaml` Only that we are overwritten `scf_input` in `penalty.yaml` to add the penalties. Also we change the number of iteration `n_iter` in both `penalty.yaml` and `relax.yaml`. 77 | 78 | `pipe.sh` shows how we combine the different inputs together. A simplified version is as follows: 79 | ``` 80 | deepks iterate base.yaml penalty.yaml && deepks iterate base.yaml relax.yaml 81 | ``` 82 | The `iterate` command can take multiple input files and the latter ones would overwrite the former ones. 83 | 84 | Again, running `./run.sh` in the `withdens` folder would run the commands in the background. You can check the results in `iter.*` folders like above. -------------------------------------------------------------------------------- /examples/water_single/init/machines.yaml: -------------------------------------------------------------------------------- 1 | # this is only part of input settings. 2 | # should be used together with systems.yaml and params.yaml 3 | 4 | scf_machine: 5 | # every system will be run as a separate command (a task) 6 | sub_size: 1 7 | # 4 tasks will be gathered into one group and submitted together as a shell script 8 | group_size: 4 9 | dispatcher: 10 | context: local 11 | batch: shell # set to shell to run on local machine, you can also use `slurm` 12 | remote_profile: null # not needed in local case 13 | # resources are no longer needed, other than the envs can still be set here 14 | resources: 15 | envs: 16 | PYSCF_MAX_MEMORY: 8000 # increase from 4G to 8G 17 | python: "python" # use python in path 18 | 19 | train_machine: 20 | dispatcher: 21 | context: local 22 | batch: shell # same as above, use shell to run on local machine 23 | remote_profile: null # use lazy local 24 | python: "python" # use python in path 25 | # resources are no longer needed, and the task will use gpu automatically if there is one 26 | 27 | # other settings (these are default, can be omitted) 28 | cleanup: false # whether to delete slurm and err files 29 | strict: true # do not allow undefined machine parameters 30 | -------------------------------------------------------------------------------- /examples/water_single/init/params.yaml: -------------------------------------------------------------------------------- 1 | # this is only part of input settings. 2 | # should be used together with systems.yaml and machines.yaml 3 | 4 | # number of iterations to do, can be set to zero for DeePHF training 5 | n_iter: 0 6 | 7 | # directory setting (these are default choices, can be omitted) 8 | workdir: "." 9 | share_folder: "share" # folder that stores all other settings 10 | 11 | # scf settings, set to false when n_iter = 0 to skip checking 12 | scf_input: false 13 | 14 | # train settings, set to false when n_iter = 0 to skip checking 15 | train_input: false 16 | 17 | # init settings, these are for DeePHF task 18 | init_model: false # do not use existing model to restart from 19 | 20 | init_scf: # parameters for SCF calculation 21 | basis: ccpvdz 22 | # this is for pure energy training 23 | dump_fields: 24 | - e_base # Hartree Fock energy 25 | - dm_eig # Descriptors 26 | - conv # whether converged or not 27 | - l_e_delta # delta energy betweem e_base and reference, label 28 | verbose: 1 29 | mol_args: # args to be passed to pyscf.gto.Mole.build 30 | incore_anyway: True 31 | scf_args: # args to be passed to pyscf.scf.RHF.run 32 | conv_tol: 1e-8 33 | conv_check: false # pyscf conv_check has a bug 34 | 35 | init_train: # parameters for nn training 36 | model_args: 37 | hidden_sizes: [100, 100, 100] # neurons in hidden layers 38 | output_scale: 100 # the output will be divided by 100 before compare with label 39 | use_resnet: true # skip connection 40 | actv_fn: mygelu # same as gelu, support force calculation 41 | data_args: 42 | batch_size: 16 43 | group_batch: 1 # can collect multiple system in one batch 44 | preprocess_args: 45 | preshift: true # shift the descriptor by its mean 46 | prescale: false # scale the descriptor by its variance (can cause convergence problem) 47 | prefit_ridge: 1e1 # do a ridge regression as prefitting 48 | prefit_trainable: false 49 | train_args: 50 | decay_rate: 0.96 # learning rate decay factor 51 | decay_steps: 500 # decay the learning rate every this steps 52 | display_epoch: 100 53 | n_epoch: 10000 54 | start_lr: 0.0003 55 | -------------------------------------------------------------------------------- /examples/water_single/init/run.sh: -------------------------------------------------------------------------------- 1 | nohup python -u -m deepks iterate machines.yaml params.yaml systems.yaml >> log.iter 2> err.iter & 2 | echo $! > PID -------------------------------------------------------------------------------- /examples/water_single/init/systems.yaml: -------------------------------------------------------------------------------- 1 | # this is only part of input settings. 2 | # should be used together with params.yaml and machines.yaml 3 | 4 | # training and testing systems 5 | systems_train: # can also be files that containing system paths 6 | - ../systems/group.0[0-2] # support glob 7 | 8 | systems_test: # if empty, use the last system of training set 9 | - ../systems/group.03 10 | -------------------------------------------------------------------------------- /examples/water_single/iter/args.yaml: -------------------------------------------------------------------------------- 1 | # all arguments are flatten into this file 2 | # they can also be splitted into separate files and referenced here 3 | n_iter: 5 4 | 5 | # training and testing systems 6 | systems_train: # can also be files that containing system paths 7 | - ../systems/group.0[0-2] # support glob 8 | 9 | systems_test: # if empty, use the last system of training set 10 | - ../systems/group.03 11 | 12 | # directory setting 13 | workdir: "." 14 | share_folder: "share" # folder that stores all other settings 15 | 16 | # scf settings 17 | scf_input: # can also be specified by a separete file 18 | basis: ccpvdz 19 | # this is for force training 20 | dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta] 21 | verbose: 1 22 | mol_args: 23 | incore_anyway: True 24 | scf_args: 25 | conv_tol: 1e-6 26 | conv_tol_grad: 1e-2 27 | level_shift: 0.1 28 | diis_space: 20 29 | conv_check: false # pyscf conv_check has a bug 30 | 31 | scf_machine: 32 | # every system will be run as a separate command (a task) 33 | sub_size: 1 34 | # 4 tasks will be gathered into one group and submitted together as a shell script 35 | group_size: 4 36 | dispatcher: 37 | context: local 38 | batch: shell # set to shell to run on local machine 39 | remote_profile: null # not needed in local case 40 | # resources are no longer needed, other than the envs can still be set here 41 | resources: 42 | envs: 43 | PYSCF_MAX_MEMORY: 8000 # increase from 4G to 8G 44 | python: "python" # use python in path 45 | 46 | # train settings 47 | train_input: 48 | # model_args is ignored, since this is used as restart 49 | data_args: 50 | batch_size: 16 51 | group_batch: 1 52 | extra_label: true 53 | conv_filter: true 54 | conv_name: conv 55 | preprocess_args: 56 | preshift: false # restarting model already shifted. Will not recompute shift value 57 | prescale: false # same as above 58 | prefit_ridge: 1e1 59 | prefit_trainable: false 60 | train_args: 61 | decay_rate: 0.5 62 | decay_steps: 1000 63 | display_epoch: 100 64 | force_factor: 1 65 | n_epoch: 5000 66 | start_lr: 0.0001 67 | 68 | train_machine: 69 | dispatcher: 70 | context: local 71 | batch: shell # same as above, use shell to run on local machine 72 | remote_profile: null # use lazy local 73 | python: "python" # use python in path 74 | # resources are no longer needed, and the task will use gpu automatically if there is one 75 | 76 | # init settings 77 | init_model: false # do not use existing model in share_folder/init/model.pth 78 | 79 | init_scf: 80 | basis: ccpvdz 81 | # this is for pure energy training 82 | dump_fields: [e_base, e_tot, dm_eig, conv, l_e_delta] 83 | verbose: 1 84 | mol_args: 85 | incore_anyway: True 86 | scf_args: 87 | conv_tol: 1e-8 88 | conv_check: false # pyscf conv_check has a bug 89 | 90 | init_train: 91 | model_args: # necessary as this is init training 92 | hidden_sizes: [100, 100, 100] 93 | output_scale: 100 94 | use_resnet: true 95 | actv_fn: gelu 96 | data_args: 97 | batch_size: 16 98 | group_batch: 1 99 | preprocess_args: 100 | preshift: true 101 | prescale: false 102 | prefit_ridge: 1e1 103 | prefit_trainable: false 104 | train_args: 105 | decay_rate: 0.95 106 | decay_steps: 300 107 | display_epoch: 100 108 | n_epoch: 10000 109 | start_lr: 0.0003 110 | 111 | # other settings 112 | cleanup: false 113 | strict: true 114 | -------------------------------------------------------------------------------- /examples/water_single/iter/run.sh: -------------------------------------------------------------------------------- 1 | nohup python -u -m deepks iterate args.yaml >> log.iter 2> err.iter & 2 | echo $! > PID 3 | -------------------------------------------------------------------------------- /examples/water_single/systems/group.00/atom.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.00/atom.npy -------------------------------------------------------------------------------- /examples/water_single/systems/group.00/dm.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.00/dm.npy -------------------------------------------------------------------------------- /examples/water_single/systems/group.00/energy.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.00/energy.npy -------------------------------------------------------------------------------- /examples/water_single/systems/group.00/force.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.00/force.npy -------------------------------------------------------------------------------- /examples/water_single/systems/group.01/atom.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.01/atom.npy -------------------------------------------------------------------------------- /examples/water_single/systems/group.01/dm.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.01/dm.npy -------------------------------------------------------------------------------- /examples/water_single/systems/group.01/energy.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.01/energy.npy -------------------------------------------------------------------------------- /examples/water_single/systems/group.01/force.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.01/force.npy -------------------------------------------------------------------------------- /examples/water_single/systems/group.02/atom.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.02/atom.npy -------------------------------------------------------------------------------- /examples/water_single/systems/group.02/dm.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.02/dm.npy -------------------------------------------------------------------------------- /examples/water_single/systems/group.02/energy.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.02/energy.npy -------------------------------------------------------------------------------- /examples/water_single/systems/group.02/force.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.02/force.npy -------------------------------------------------------------------------------- /examples/water_single/systems/group.03/atom.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.03/atom.npy -------------------------------------------------------------------------------- /examples/water_single/systems/group.03/dm.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.03/dm.npy -------------------------------------------------------------------------------- /examples/water_single/systems/group.03/energy.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.03/energy.npy -------------------------------------------------------------------------------- /examples/water_single/systems/group.03/force.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepmodeling/deepks-kit/4f133fb60e00bc5e413e80e32214defb7a145415/examples/water_single/systems/group.03/force.npy -------------------------------------------------------------------------------- /examples/water_single/withdens/base.yaml: -------------------------------------------------------------------------------- 1 | # all arguments are flatten into this file 2 | # they can also be splitted into separate files and referenced here 3 | n_iter: 0 # use 0 as a placeholder 4 | 5 | # training and testing systems 6 | systems_train: # can also be files that containing system paths 7 | - ../systems/group.0[0-2] # support glob 8 | 9 | systems_test: # if empty, use the last system of training set 10 | - ../systems/group.03 11 | 12 | # directory setting 13 | workdir: "." 14 | share_folder: "share" # folder that stores all other settings 15 | 16 | # scf settings 17 | scf_input: # can also be specified by a separete file 18 | basis: ccpvdz 19 | # this is for force training 20 | dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta] 21 | verbose: 1 22 | mol_args: 23 | incore_anyway: True 24 | scf_args: 25 | conv_tol: 1e-6 26 | conv_tol_grad: 1e-2 27 | level_shift: 0.1 28 | diis_space: 20 29 | conv_check: false # pyscf conv_check has a bug 30 | 31 | scf_machine: 32 | # every system will be run as a separate command (a task) 33 | sub_size: 1 34 | # 4 tasks will be gathered into one group and submitted together as a shell script 35 | group_size: 4 36 | dispatcher: 37 | context: local 38 | batch: shell # set to shell to run on local machine, you can also use `slurm` 39 | remote_profile: null # not needed in local case 40 | # resources are no longer needed, other than the envs can still be set here 41 | resources: 42 | envs: 43 | PYSCF_MAX_MEMORY: 8000 # increase from 4G to 8G 44 | python: "python" # use python in path 45 | 46 | # train settings 47 | train_input: 48 | # model_args is ignored, since this is used as restart 49 | data_args: 50 | batch_size: 16 51 | group_batch: 1 52 | extra_label: true 53 | conv_filter: true 54 | conv_name: conv 55 | preprocess_args: 56 | preshift: false # restarting model already shifted. Will not recompute shift value 57 | prescale: false # same as above 58 | prefit_ridge: 1e1 59 | prefit_trainable: false 60 | train_args: 61 | decay_rate: 0.5 62 | decay_steps: 1000 63 | display_epoch: 100 64 | force_factor: 1 65 | n_epoch: 5000 66 | start_lr: 0.0001 67 | 68 | train_machine: 69 | dispatcher: 70 | context: local 71 | batch: shell # same as above, use shell to run on local machine 72 | remote_profile: null # use lazy local 73 | python: "python" # use python in path 74 | # resources are no longer needed, and the task will use gpu automatically if there is one 75 | 76 | # init settings 77 | init_model: false # do not use existing model in share_folder/init/model.pth 78 | 79 | init_scf: 80 | basis: ccpvdz 81 | # this is for pure energy training 82 | dump_fields: [e_base, e_tot, dm_eig, conv, l_e_delta] 83 | verbose: 1 84 | mol_args: 85 | incore_anyway: True 86 | scf_args: 87 | conv_tol: 1e-8 88 | conv_check: false # pyscf conv_check has a bug 89 | 90 | init_train: 91 | model_args: # necessary as this is init training 92 | hidden_sizes: [100, 100, 100] 93 | output_scale: 100 94 | use_resnet: true 95 | actv_fn: mygelu 96 | data_args: 97 | batch_size: 16 98 | group_batch: 1 99 | preprocess_args: 100 | preshift: true 101 | prescale: false 102 | prefit_ridge: 1e1 103 | prefit_trainable: false 104 | train_args: 105 | decay_rate: 0.96 106 | decay_steps: 500 107 | display_epoch: 100 108 | n_epoch: 15000 109 | start_lr: 0.0003 110 | 111 | # other settings 112 | cleanup: false 113 | strict: true 114 | -------------------------------------------------------------------------------- /examples/water_single/withdens/penalty.yaml: -------------------------------------------------------------------------------- 1 | # overwriting the base config 2 | n_iter: 5 3 | 4 | # adding penalty 5 | scf_input: # can also be specified by a separete file 6 | basis: ccpvdz 7 | # this is for force training 8 | dump_fields: [e_base, e_tot, dm_eig, conv, f_base, f_tot, grad_vx, l_f_delta, l_e_delta] 9 | verbose: 1 10 | mol_args: 11 | incore_anyway: True 12 | scf_args: 13 | conv_tol: 1e-6 14 | conv_tol_grad: 1e-2 15 | level_shift: 0.1 16 | diis_space: 20 17 | conv_check: false # pyscf conv_check has a bug 18 | penalty_terms: 19 | # Coulomb loss as penalty, random strength 20 | - type: coulomb 21 | required_labels: dm # where the label is stored (sysfolder/dm.npy) 22 | strength: 1 # can be larger, like 5 23 | random: true # actual strength vary between [0, strength] -------------------------------------------------------------------------------- /examples/water_single/withdens/pipe.sh: -------------------------------------------------------------------------------- 1 | python -u -m deepks iterate base.yaml penalty.yaml >> log.iter 2> err.iter &&\ 2 | python -u -m deepks iterate base.yaml relax.yaml >> log.iter 2> err.iter -------------------------------------------------------------------------------- /examples/water_single/withdens/relax.yaml: -------------------------------------------------------------------------------- 1 | # overwriting the base config to run longer 2 | n_iter: 10 -------------------------------------------------------------------------------- /examples/water_single/withdens/run.sh: -------------------------------------------------------------------------------- 1 | nohup bash pipe.sh >/dev/null 2>&1 & 2 | echo $! > PID 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | paramiko 3 | ruamel.yaml 4 | torch 5 | pyscf -------------------------------------------------------------------------------- /scripts/convert_xyz.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from glob import glob 4 | 5 | 6 | BOHR = 0.52917721092 7 | ELEMENTS = ['X', # Ghost 8 | 'H' , 'He', 'Li', 'Be', 'B' , 'C' , 'N' , 'O' , 'F' , 'Ne', 9 | 'Na', 'Mg', 'Al', 'Si', 'P' , 'S' , 'Cl', 'Ar', 'K' , 'Ca', 10 | 'Sc', 'Ti', 'V' , 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 11 | 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y' , 'Zr', 12 | 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 13 | 'Sb', 'Te', 'I' , 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 14 | 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 15 | 'Lu', 'Hf', 'Ta', 'W' , 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 16 | 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 17 | 'Pa', 'U' , 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 18 | 'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 19 | 'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og', 20 | ] 21 | CHARGES = dict(((x,i) for i,x in enumerate(ELEMENTS))) 22 | 23 | 24 | def parse_xyz(filename): 25 | with open(filename) as fp: 26 | natom = int(fp.readline()) 27 | comments = fp.readline().strip() 28 | atom_str = fp.readlines() 29 | atom_list = [a.split() for a in atom_str if a.strip()] 30 | elements = [a[0] for a in atom_list] 31 | coords = np.array([a[1:] for a in atom_list], dtype=float) 32 | return natom, comments, elements, coords 33 | 34 | 35 | def parse_unit(rawunit): 36 | if isinstance(rawunit, str): 37 | try: 38 | unit = float(rawunit) 39 | except ValueError: 40 | if rawunit.upper().startswith(('B', 'AU')): 41 | unit = BOHR 42 | else: #unit[:3].upper() == 'ANG': 43 | unit = 1. 44 | else: 45 | unit = rawunit 46 | return unit 47 | 48 | 49 | def load_array(file): 50 | ext = os.path.splitext(file)[-1] 51 | if "npy" in ext: 52 | return np.load(file) 53 | elif "npz" in ext: 54 | raise NotImplementedError 55 | else: 56 | try: 57 | arr = np.loadtxt(file) 58 | except ValueError: 59 | arr = np.loadtxt(file, dtype=str) 60 | return arr 61 | 62 | 63 | def load_glob(pattern): 64 | [fn] = glob(pattern) 65 | return load_array(fn) 66 | 67 | 68 | def load_system(xyz_file): 69 | base, ext = os.path.splitext(xyz_file) 70 | assert ext == '.xyz' 71 | natom, _, ele, coord = parse_xyz(xyz_file) 72 | try: 73 | energy = load_glob(f"{base}.energy*").reshape(1) 74 | except: 75 | energy = None 76 | try: 77 | force = load_glob(f"{base}.force*").reshape(natom, 3) 78 | except: 79 | force = None 80 | try: 81 | dm = load_glob(f"{base}.dm*") 82 | nao = np.sqrt(dm.size).astype(int) 83 | dm = dm.reshape(nao, nao) 84 | except: 85 | dm = None 86 | return ele, coord, energy, force, dm 87 | 88 | 89 | def dump_systems(xyz_files, dump_dir, unit="Bohr", ext_type=False): 90 | print(f"saving to {dump_dir} ... ", end="", flush=True) 91 | os.makedirs(dump_dir, exist_ok=True) 92 | if not xyz_files: 93 | print("empty list! did nothing") 94 | return 95 | unit = parse_unit(unit) 96 | a_ele, a_coord, a_energy, a_force, a_dm = map(np.array, 97 | zip(*[load_system(fl) for fl in xyz_files])) 98 | a_coord /= unit 99 | if ext_type: 100 | ele = a_ele[0] 101 | assert all(e == ele for e in a_ele), "element type for each xyz file has to be the same" 102 | np.savetxt(os.path.join(dump_dir, "type.raw"), ele, fmt="%s") 103 | np.save(os.path.join(dump_dir, "coord.npy"), a_coord) 104 | else: 105 | a_chg = [[[CHARGES[e]] for e in ele] for ele in a_ele] 106 | a_atom = np.concatenate([a_chg, a_coord], axis=-1) 107 | np.save(os.path.join(dump_dir, "atom.npy"), a_atom) 108 | if not all(ene is None for ene in a_energy): 109 | assert not any(ele is None for ele in a_energy) 110 | np.save(os.path.join(dump_dir, "energy.npy"), a_energy) 111 | if not all(ff is None for ff in a_force): 112 | assert not any(ff is None for ff in a_force) 113 | a_force *= unit 114 | np.save(os.path.join(dump_dir, "force.npy"), a_force) 115 | if not all(dm is None for dm in a_dm): 116 | assert not any(dm is None for dm in a_dm) 117 | np.save(os.path.join(dump_dir, "dm.npy"), a_dm) 118 | print(f"finished", flush=True) 119 | return 120 | 121 | 122 | def main(xyz_files, dump_dir=".", group_size=-1, group_prefix="sys", unit="Bohr", ext_type=False): 123 | if isinstance(xyz_files, str): 124 | xyz_files = [xyz_files] 125 | if group_size <= 0: 126 | dump_systems(xyz_files, dump_dir, unit=unit, ext_type=ext_type) 127 | return 128 | ns = len(xyz_files) 129 | ngroup = np.ceil(ns / group_size).astype(int) 130 | nd = max(len(str(ngroup)), 2) 131 | for i in range(ngroup): 132 | dump_systems(xyz_files[i*group_size:(i+1)*group_size], 133 | os.path.join(dump_dir, f"{group_prefix}.{i:0>{nd}d}"), 134 | unit=unit, ext_type=ext_type) 135 | return 136 | 137 | 138 | if __name__ == "__main__": 139 | import argparse 140 | parser = argparse.ArgumentParser( 141 | description="convert .xyz files and corresponding properties " 142 | "into systems with .npy files grouped in folders.", 143 | argument_default=argparse.SUPPRESS) 144 | parser.add_argument("xyz_files", metavar='FILE', nargs="+", 145 | help="input xyz files") 146 | parser.add_argument("-d", "--dump-dir", 147 | help="directory of dumped system, default is current dir") 148 | parser.add_argument("-U", "--unit", 149 | help="length unit used to save npy files (assume xyz in Angstrom)") 150 | parser.add_argument("-G", "--group-size", type=int, 151 | help="if positive, split data into sub systems with given size, default: -1") 152 | parser.add_argument("-P", "--group-prefix", 153 | help=r"save sub systems with given prefix as `$dump_dir/$prefix.ii`, default: sys") 154 | parser.add_argument("-T", "--ext-type", action="store_true", 155 | help="if set, save the element type into separete `type.raw` file") 156 | args = parser.parse_args() 157 | 158 | main(**vars(args)) 159 | 160 | 161 | 162 | -------------------------------------------------------------------------------- /scripts/legacy/calc_eig.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.spatial.distance import squareform, pdist 3 | 4 | 5 | def load_coords(filename): 6 | return np.loadtxt(filename, skiprows=2, usecols=[1,2,3]) 7 | 8 | 9 | def cosine_switching(x, lower=1.9, upper=2.0, threshold=1e-5): 10 | zx = x < threshold 11 | lx = x < lower 12 | ux = x > upper 13 | mx = (~lx) & (~ux) 14 | res = np.zeros_like(x) 15 | res[~zx & lx] = 1 16 | res[mx] = 0.5*np.cos(np.pi * (x[mx]-lower) / (upper-lower)) + 0.5 17 | return res 18 | 19 | 20 | def calc_weight(coords, lower=1.9, upper=2.0): 21 | natom = coords.shape[0] 22 | pair_dist = squareform(pdist(coords)) 23 | weight = cosine_switching(pair_dist, lower, upper).reshape(1, natom, natom) 24 | return weight 25 | 26 | 27 | def split(ci, shell): 28 | sec = [1]*shell[0] + [3]*shell[1] + [5]*shell[2] 29 | assert np.sum(sec) == ci.shape[-1] 30 | ci_list = np.split(ci, np.cumsum(sec)[:-1], axis=-1) 31 | return ci_list 32 | 33 | 34 | def calc_atom_eig(ci, shell=(12,12,12), frozen=0): 35 | ci_list = split(ci[:, frozen:], shell) 36 | dm_list = [np.einsum('niap,niaq->napq', _ci, _ci) for _ci in ci_list] 37 | eig_list = [np.linalg.eigvalsh(dm) for dm in dm_list] 38 | eig = np.concatenate(eig_list, -1) 39 | return eig 40 | 41 | 42 | def calc_atom_ener_eig(ci, ei, kernel=None, shell=(12,12,12), frozen=0): 43 | if kernel is not None: 44 | ei = kernel(ei) 45 | ci_list = split(ci[:, frozen:], shell) 46 | dm_list = [np.einsum('niap,niaq,ni->napq', _ci, _ci, ei[:, frozen:]) for _ci in ci_list] 47 | eig_list = [np.linalg.eigvalsh(dm) for dm in dm_list] 48 | eig = np.concatenate(eig_list, -1) 49 | return eig 50 | 51 | 52 | def calc_neighbor_eig(ci, weight=None, shell=(12,12,12), frozen=0): 53 | ci_list = split(ci[:, frozen:], shell) 54 | dm_list = [np.einsum('niap,nibq->nabpq', _ci, _ci) for _ci in ci_list] 55 | if weight is not None: 56 | dm_list = [np.einsum('nabpq,nab->nabpq', _dm, weight) for _dm in dm_list] 57 | eig_list = [np.linalg.eigvalsh(0.5*(_dm.sum(1) + _dm.sum(2))) for _dm in dm_list] 58 | eig = np.concatenate(eig_list, -1) 59 | return eig 60 | 61 | 62 | def calc_eig(name, ci, ei=None, xyz_file=None, shell=(12,12,12)): 63 | if name == 'dm_eig': 64 | return calc_atom_eig(ci, shell=shell) 65 | if name == 'od_eig': 66 | assert xyz_file is not None 67 | return calc_neighbor_eig(ci, calc_weight(load_coords(xyz_file)), shell=shell) 68 | if name == 'se_eig': 69 | assert ei is not None 70 | return calc_atom_ener_eig(ci, ei, kernel=None, shell=shell) 71 | if name == 'fe_eig': 72 | assert ei is not None 73 | return calc_atom_ener_eig(ci, ei, kernel=np.exp, shell=shell) 74 | 75 | raise ValueError(f'unsupport name: {name}') -------------------------------------------------------------------------------- /scripts/legacy/proj_dm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pyscf import gto 3 | import os 4 | import sys 5 | import argparse 6 | import mendeleev 7 | from calc_eig import calc_eig 8 | 9 | 10 | # aa = 2.0**np.arange(6,-3,-1) 11 | aa = 1.5**np.array([17,13,10,7,5,3,2,1,0,-1,-2,-3]) 12 | bb = np.diag(np.ones(aa.size)) - np.diag(np.ones(aa.size-1), k=1) 13 | SHELL = [aa.size] * 3 14 | coef = np.concatenate([aa.reshape(-1,1), bb], axis=1) 15 | BASIS = [[0, *coef.tolist()], [1, *coef.tolist()], [2, *coef.tolist()]] 16 | 17 | 18 | def parse_xyz(filename, basis='ccpvtz', verbose=False): 19 | with open(filename) as fp: 20 | natoms = int(fp.readline()) 21 | comments = fp.readline() 22 | xyz_str = "".join(fp.readlines()) 23 | mol = gto.Mole() 24 | mol.verbose = 4 if verbose else 0 25 | mol.atom = xyz_str 26 | mol.basis = basis 27 | try: 28 | mol.build(0,0,unit="Ang") 29 | except RuntimeError as e: 30 | mol.spin = 1 31 | mol.build(0,0,unit="Ang") 32 | return mol 33 | 34 | 35 | def gen_proj(mol, intor = 'ovlp', verbose = False) : 36 | natm = mol.natm 37 | mole_coords = mol.atom_coords(unit="Ang") 38 | test_mol = gto.Mole() 39 | if verbose : 40 | test_mol.verbose = 4 41 | else : 42 | test_mol.verbose = 0 43 | test_mol.atom = [["Ne", coord] for coord in mole_coords] 44 | test_mol.basis = BASIS 45 | test_mol.spin = 0 46 | test_mol.build(0,0,unit="Ang") 47 | proj = gto.intor_cross(f'int1e_{intor}_sph', mol, test_mol) 48 | 49 | def proj_func(mo): 50 | proj_coeff = np.matmul(mo, proj).reshape(*mo.shape[:2], natm, -1) 51 | if verbose: 52 | print('shape of coeff data ', proj_coeff.shape) 53 | # res : nframe x nocc/nvir x natm x nproj 54 | return proj_coeff, proj_coeff.shape[-1] 55 | 56 | return proj_func 57 | 58 | 59 | def proj_frame(xyz_file, mo_dir, dump_dir=None, basis='ccpvtz', ename="e_hf", intor='ovlp', verbose=False): 60 | mol = parse_xyz(xyz_file, basis=basis) 61 | meta, ehf, e_occ, c_occ = load_data(mo_dir, ename) 62 | 63 | proj_func = gen_proj(mol, intor, verbose) 64 | c_proj_occ,nproj = proj_func(c_occ) 65 | c_occ = c_proj_occ 66 | meta = np.append(meta, nproj) 67 | # print(meta, c_proj_occ.shape) 68 | 69 | if dump_dir is not None: 70 | dump_data(dump_dir, meta, ehf, e_occ, c_occ) 71 | return meta, ehf, e_occ, c_occ 72 | 73 | 74 | def load_data(dir_name, ename="e_hf"): 75 | meta = np.loadtxt(os.path.join(dir_name, 'system.raw'), dtype=int).reshape(-1) 76 | natm = meta[0] 77 | nao = meta[1] 78 | nocc = meta[2] 79 | nvir = meta[3] 80 | ehf = np.loadtxt(os.path.join(dir_name, f'{ename}.raw')).reshape(-1, 1) 81 | e_occ = np.loadtxt(os.path.join(dir_name, 'ener_occ.raw')).reshape(-1, nocc) 82 | c_occ = np.loadtxt(os.path.join(dir_name, 'coeff_occ.raw')).reshape([-1, nocc, nao]) 83 | return meta, ehf, e_occ, c_occ 84 | 85 | 86 | def dump_data(dir_name, meta, ehf, e_occ, c_occ, dm_dict={}) : 87 | os.makedirs(dir_name, exist_ok = True) 88 | np.savetxt(os.path.join(dir_name, 'system.raw'), 89 | meta.reshape(1,-1), 90 | fmt = '%d', 91 | header = 'natm nao nocc nvir nproj') 92 | nframe = e_occ.shape[0] 93 | natm = meta[0] 94 | nao = meta[1] 95 | nocc = meta[2] 96 | nvir = meta[3] 97 | nproj = meta[4] 98 | # ntest == natm 99 | assert(all(c_occ.shape == np.array([nframe, nocc, natm, nproj], dtype=int))) 100 | assert(all(e_occ.shape == np.array([nframe, nocc], dtype=int))) 101 | assert(all(all(dm.shape == np.array([nframe, natm, nproj], dtype=int)) for dm in dm_dict.values())) 102 | np.save(os.path.join(dir_name, 'e_hf.npy'), ehf) 103 | np.save(os.path.join(dir_name, 'ener_occ.npy'), e_occ) 104 | np.save(os.path.join(dir_name, 'coeff_occ.npy'), c_occ) 105 | for name, dm in dm_dict.items(): 106 | np.save(os.path.join(dir_name, f'{name}.npy'), dm) 107 | 108 | 109 | def main(xyz_files, mo_dirs, dump_dir, basis='ccpvtz', ename="e_hf", eig_names=['dm_eig', 'od_eig', 'se_eig', 'fe_eig'], intor='ovlp', verbose='False'): 110 | assert len(xyz_files) == len(mo_dirs) 111 | oldmeta = None 112 | all_e_hf = [] 113 | all_e_occ = [] 114 | all_c_occ = [] 115 | all_dm_dict = {name:[] for name in eig_names} 116 | 117 | for xf, md in zip(xyz_files, mo_dirs): 118 | meta, e_hf, e_occ, c_occ = proj_frame(xf, md, basis=basis, ename=ename, intor=intor, verbose=verbose) 119 | if oldmeta is not None: 120 | assert all(oldmeta == meta), "all frames has to be in the same system thus meta has to be equal!" 121 | oldmeta = meta 122 | all_e_hf.append(e_hf) 123 | all_e_occ.append(e_occ) 124 | all_c_occ.append(c_occ) 125 | for name, dm_list in all_dm_dict.items(): 126 | dm_list.append(2 * calc_eig(name, c_occ, e_occ, xf, shell=SHELL)) # multiply by 2 for restricted method, doubly occupied orbitals 127 | print(f"{xf} && {md} finished") 128 | 129 | all_e_hf = np.concatenate(all_e_hf) 130 | all_e_occ = np.concatenate(all_e_occ) 131 | all_c_occ = np.concatenate(all_c_occ) 132 | for name in all_dm_dict.keys(): 133 | all_dm_dict[name] = np.concatenate(all_dm_dict[name]) 134 | 135 | dump_data(dump_dir, meta, all_e_hf, all_e_occ, all_c_occ, all_dm_dict) 136 | print("done") 137 | 138 | 139 | if __name__ == "__main__": 140 | parser = argparse.ArgumentParser(description="project mo_coeffs into atomic basis and calculate descriptors.") 141 | parser.add_argument("-x", "--xyz-file", nargs="+", help="input xyz file(s), if more than one, concat them") 142 | parser.add_argument("-f", "--mo-dir", nargs="+", help="input mo folder(s), must of same number with xyz files") 143 | parser.add_argument("-d", "--dump-dir", default=".", help="dir of dumped files, if not specified, use current folder") 144 | parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information") 145 | parser.add_argument("-I", "--intor", default="ovlp", help="intor string used to calculate int1e") 146 | parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation") 147 | parser.add_argument("-e", "--ename", default="e_hf", help="file name for total energy") 148 | parser.add_argument("-E", "--eig-name", nargs="*", default=['dm_eig', 'od_eig', 'se_eig', 'fe_eig'], 149 | help="name of eigen values to be calculated and dumped") 150 | args = parser.parse_args() 151 | 152 | main(args.xyz_file, args.mo_dir, args.dump_dir, args.basis, 153 | args.ename, args.eig_name, args.intor, args.verbose) -------------------------------------------------------------------------------- /scripts/legacy/rhf.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import numpy as np 4 | from pyscf import gto, scf, lib 5 | from pyscf.mp.mp2 import _mo_energy_without_core 6 | from time import time 7 | import os 8 | import sys 9 | import argparse 10 | 11 | 12 | def parse_xyz(filename, basis='ccpvtz', verbose=False): 13 | with open(filename) as fp: 14 | natoms = int(fp.readline()) 15 | comments = fp.readline() 16 | xyz_str = "".join(fp.readlines()) 17 | mol = gto.Mole() 18 | mol.verbose = 4 if verbose else 0 19 | mol.atom = xyz_str 20 | mol.basis = basis 21 | mol.build(0,0,unit="Ang") 22 | return mol 23 | 24 | 25 | def fix_gauge(mo_coeff) : 26 | nvec = mo_coeff.shape[1] 27 | ndim = mo_coeff.shape[0] 28 | ret = np.zeros(mo_coeff.shape) 29 | count = 0 30 | for ii in range(nvec) : 31 | for jj in range(ndim) : 32 | if np.sign(mo_coeff[jj,ii]) != 0 : 33 | break 34 | if jj == ndim : 35 | # mo_coeff[:,ii] == 0 36 | assert(np.max(np.abs(mo_coeff[:,ii])) == 0) 37 | raise RuntimeError( 'ERROR: zero eigen func, should not happen') 38 | continue 39 | else : 40 | if (jj != 0) : 41 | print('gauge ref is not 0') 42 | factor = np.sign(mo_coeff[jj,ii]) 43 | ret[:,ii] = factor * mo_coeff[:,ii] 44 | count += 1 45 | # break 46 | # print(count) 47 | return ret 48 | 49 | 50 | def mol_electron(mol, chkfile=None, verbose=False) : 51 | if verbose: 52 | start_t = time() 53 | nao = mol.nao 54 | natm = mol.natm 55 | rhf = scf.RHF(mol) 56 | if chkfile: 57 | rhf.set(chkfile=chkfile) 58 | erhf = rhf.kernel() 59 | if verbose: 60 | rhf_t = time() 61 | print(f"time of rhf: {rhf_t - start_t}") 62 | 63 | mo_energy = rhf.mo_energy 64 | mo_occ = rhf.mo_occ 65 | # mo_coeff = rhf.mo_coeff 66 | mo_coeff_ = rhf.mo_coeff 67 | mo_coeff= fix_gauge(mo_coeff_) 68 | occ_a = (mo_occ>0) 69 | # occ_b = (mo_occ[1]>0) 70 | vir_a = (mo_occ==0) 71 | # vir_b = (mo_occ[1]==0) 72 | nocc_a = sum(occ_a) 73 | # nocc_b = sum(occ_b) 74 | nocc = nocc_a 75 | nvir_a = sum(vir_a) 76 | # nvir_b = sum(vir_b) 77 | nvir = nvir_a 78 | assert(nocc + nvir == nao) 79 | if verbose : 80 | print('nao = %d, nocc = %d, nvir = %d' % \ 81 | (nao, nocc, nvir)) 82 | print('shape of a and b coeffs: ', mo_coeff[0].shape, mo_coeff[1].shape) 83 | c_occ = mo_coeff[:,occ_a] 84 | c_vir = mo_coeff[:,vir_a] 85 | e_occ = mo_energy[occ_a] 86 | e_vir = mo_energy[vir_a] 87 | c_occ = c_occ.T 88 | c_vir = c_vir.T 89 | meta = [natm, nao, nocc, nvir] 90 | if verbose : 91 | print('shape of coeff data ', c_occ.shape) 92 | print('shape of ener data ', e_occ.shape) 93 | print('shape of coeff data ', c_vir.shape) 94 | print('shape of ener data ', e_vir.shape) 95 | print('E(RKS) = %.9g' % erhf) 96 | return meta, erhf, (e_occ, e_vir), (c_occ, c_vir) 97 | # return erhf, myemp2, ener_data, coeff_data 98 | 99 | 100 | def dump_data(dir_name, meta, ehf, e_data, c_data) : 101 | os.makedirs(dir_name, exist_ok = True) 102 | np.savetxt(os.path.join(dir_name, 'system.raw'), 103 | np.array(meta).reshape(1,-1), 104 | fmt = '%d', 105 | header = 'natm nao nocc nvir') 106 | nframe = 1 107 | natm = meta[0] 108 | nao = meta[1] 109 | nocc = meta[2] 110 | nvir = meta[3] 111 | # ntest == natm 112 | assert(all(c_data[0].shape == np.array([nocc, nao], dtype = int))) 113 | assert(all(c_data[1].shape == np.array([nvir, nao], dtype = int))) 114 | assert(all(e_data[0].shape == np.array([nocc], dtype = int))) 115 | assert(all(e_data[1].shape == np.array([nvir], dtype = int))) 116 | np.savetxt(os.path.join(dir_name, 'e_hf.raw'), np.reshape(ehf, [nframe,1])) 117 | np.savetxt(os.path.join(dir_name, 'ener_occ.raw'), e_data[0].reshape([nframe, -1])) 118 | np.savetxt(os.path.join(dir_name, 'ener_vir.raw'), e_data[1].reshape([nframe, -1])) 119 | np.savetxt(os.path.join(dir_name, 'coeff_occ.raw'), c_data[0].reshape([nframe, -1])) 120 | np.savetxt(os.path.join(dir_name, 'coeff_vir.raw'), c_data[1].reshape([nframe, -1])) 121 | 122 | 123 | def gen_frame(xyz_file, basis='ccpvtz', dump_dir=None, verbose=False): 124 | if dump_dir is None: 125 | dump_dir = os.path.splitext(xyz_file)[0] 126 | mol = parse_xyz(xyz_file, basis=basis ,verbose=verbose) 127 | mol_meta, ehf, e_data, c_data = mol_electron(mol, verbose=verbose) 128 | dump_data(dump_dir, mol_meta, ehf, e_data, c_data) 129 | 130 | 131 | def main(): 132 | parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.") 133 | parser.add_argument("files", nargs="+", help="input xyz files") 134 | parser.add_argument("-d", "--dump-dir", default=None, help="dir of dumped files, if not specified, using same dir as input") 135 | parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information") 136 | parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation") 137 | args = parser.parse_args() 138 | 139 | for fn in args.files: 140 | if args.dump_dir is None: 141 | dump = None 142 | else: 143 | dump = os.path.join(args.dump_dir, os.path.splitext(os.path.basename(fn))[0]) 144 | try: 145 | gen_frame(fn, args.basis, dump, args.verbose) 146 | print(f"{fn} finished") 147 | except Exception as e: 148 | print(f"{fn} failed,", e, file=sys.stderr) 149 | raise 150 | 151 | 152 | if __name__ == "__main__": 153 | main() 154 | -------------------------------------------------------------------------------- /scripts/legacy/rks.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import numpy as np 4 | from pyscf import gto, dft, lib 5 | from pyscf.mp.mp2 import _mo_energy_without_core 6 | from time import time 7 | import os 8 | import sys 9 | import argparse 10 | 11 | 12 | def parse_xyz(filename, basis='ccpvtz', verbose=False): 13 | with open(filename) as fp: 14 | natoms = int(fp.readline()) 15 | comments = fp.readline() 16 | xyz_str = "".join(fp.readlines()) 17 | mol = gto.Mole() 18 | mol.verbose = 4 if verbose else 0 19 | mol.atom = xyz_str 20 | mol.basis = basis 21 | mol.build(0,0,unit="Ang") 22 | return mol 23 | 24 | 25 | def fix_gauge(mo_coeff) : 26 | nvec = mo_coeff.shape[1] 27 | ndim = mo_coeff.shape[0] 28 | ret = np.zeros(mo_coeff.shape) 29 | count = 0 30 | for ii in range(nvec) : 31 | for jj in range(ndim) : 32 | if np.sign(mo_coeff[jj,ii]) != 0 : 33 | break 34 | if jj == ndim : 35 | # mo_coeff[:,ii] == 0 36 | assert(np.max(np.abs(mo_coeff[:,ii])) == 0) 37 | raise RuntimeError( 'ERROR: zero eigen func, should not happen') 38 | continue 39 | else : 40 | if (jj != 0) : 41 | print('gauge ref is not 0') 42 | factor = np.sign(mo_coeff[jj,ii]) 43 | ret[:,ii] = factor * mo_coeff[:,ii] 44 | count += 1 45 | # break 46 | # print(count) 47 | return ret 48 | 49 | 50 | def mol_electron(mol, xc='pbe', chkfile=None, verbose=False) : 51 | if verbose: 52 | start_t = time() 53 | nao = mol.nao 54 | natm = mol.natm 55 | rks = dft.RKS(mol) 56 | rks.xc = xc 57 | if chkfile: 58 | rks.set(chkfile=chkfile) 59 | erks = rks.kernel() 60 | if verbose: 61 | rks_t = time() 62 | print(f"time of rks: {rks_t - start_t}") 63 | 64 | mo_energy = rks.mo_energy 65 | mo_occ = rks.mo_occ 66 | # mo_coeff = rks.mo_coeff 67 | mo_coeff_ = rks.mo_coeff 68 | mo_coeff= fix_gauge(mo_coeff_) 69 | occ_a = (mo_occ>0) 70 | # occ_b = (mo_occ[1]>0) 71 | vir_a = (mo_occ==0) 72 | # vir_b = (mo_occ[1]==0) 73 | nocc_a = sum(occ_a) 74 | # nocc_b = sum(occ_b) 75 | nocc = nocc_a 76 | nvir_a = sum(vir_a) 77 | # nvir_b = sum(vir_b) 78 | nvir = nvir_a 79 | assert(nocc + nvir == nao) 80 | if verbose : 81 | print('nao = %d, nocc = %d, nvir = %d' % \ 82 | (nao, nocc, nvir)) 83 | print('shape of a and b coeffs: ', mo_coeff[0].shape, mo_coeff[1].shape) 84 | c_occ = mo_coeff[:,occ_a] 85 | c_vir = mo_coeff[:,vir_a] 86 | e_occ = mo_energy[occ_a] 87 | e_vir = mo_energy[vir_a] 88 | c_occ = c_occ.T 89 | c_vir = c_vir.T 90 | meta = [natm, nao, nocc, nvir] 91 | if verbose : 92 | print('shape of coeff data ', c_occ.shape) 93 | print('shape of ener data ', e_occ.shape) 94 | print('shape of coeff data ', c_vir.shape) 95 | print('shape of ener data ', e_vir.shape) 96 | print('E(RKS) = %.9g' % erks) 97 | return meta, erks, (e_occ, e_vir), (c_occ, c_vir) 98 | # return erks, myemp2, ener_data, coeff_data 99 | 100 | 101 | def dump_data(dir_name, meta, ehf, e_data, c_data) : 102 | os.makedirs(dir_name, exist_ok = True) 103 | np.savetxt(os.path.join(dir_name, 'system.raw'), 104 | np.array(meta).reshape(1,-1), 105 | fmt = '%d', 106 | header = 'natm nao nocc nvir') 107 | nframe = 1 108 | natm = meta[0] 109 | nao = meta[1] 110 | nocc = meta[2] 111 | nvir = meta[3] 112 | # ntest == natm 113 | assert(all(c_data[0].shape == np.array([nocc, nao], dtype = int))) 114 | assert(all(c_data[1].shape == np.array([nvir, nao], dtype = int))) 115 | assert(all(e_data[0].shape == np.array([nocc], dtype = int))) 116 | assert(all(e_data[1].shape == np.array([nvir], dtype = int))) 117 | np.savetxt(os.path.join(dir_name, 'e_dft.raw'), np.reshape(ehf, [nframe,1])) 118 | np.savetxt(os.path.join(dir_name, 'ener_occ.raw'), e_data[0].reshape([nframe, -1])) 119 | np.savetxt(os.path.join(dir_name, 'ener_vir.raw'), e_data[1].reshape([nframe, -1])) 120 | np.savetxt(os.path.join(dir_name, 'coeff_occ.raw'), c_data[0].reshape([nframe, -1])) 121 | np.savetxt(os.path.join(dir_name, 'coeff_vir.raw'), c_data[1].reshape([nframe, -1])) 122 | 123 | 124 | def gen_frame(xyz_file, basis='ccpvtz', xc='pbe', dump_dir=None, verbose=False): 125 | if dump_dir is None: 126 | dump_dir = os.path.splitext(xyz_file)[0] 127 | mol = parse_xyz(xyz_file, basis=basis ,verbose=verbose) 128 | mol_meta, ehf, e_data, c_data = mol_electron(mol, xc=xc, verbose=verbose) 129 | dump_data(dump_dir, mol_meta, ehf, e_data, c_data) 130 | 131 | 132 | def main(): 133 | parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.") 134 | parser.add_argument("files", nargs="+", help="input xyz files") 135 | parser.add_argument("-d", "--dump-dir", default=None, help="dir of dumped files, if not specified, using same dir as input") 136 | parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information") 137 | parser.add_argument("-X", "--xc", default='pbe', type=str, help="xc functional") 138 | parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation") 139 | args = parser.parse_args() 140 | 141 | for fn in args.files: 142 | if args.dump_dir is None: 143 | dump = None 144 | else: 145 | dump = os.path.join(args.dump_dir, os.path.splitext(os.path.basename(fn))[0]) 146 | try: 147 | gen_frame(fn, args.basis, args.xc, dump, args.verbose) 148 | print(f"{fn} finished") 149 | except Exception as e: 150 | print(f"{fn} failed,", e, file=sys.stderr) 151 | raise 152 | 153 | 154 | if __name__ == "__main__": 155 | main() 156 | -------------------------------------------------------------------------------- /scripts/legacy/rmp2.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import numpy as np 4 | from pyscf import gto, scf, mp, lib 5 | from pyscf.mp.mp2 import _mo_energy_without_core 6 | from time import time 7 | import os 8 | import sys 9 | import argparse 10 | 11 | 12 | def my_kernel(mp, mo_energy=None, mo_coeff=None, eris=None, with_eij=True): 13 | if mo_energy is None or mo_coeff is None: 14 | if mp.mo_energy is None or mp.mo_coeff is None: 15 | raise RuntimeError('mo_coeff, mo_energy are not initialized.\n' 16 | 'You may need to call mf.kernel() to generate them.') 17 | mo_coeff = None 18 | mo_energy = _mo_energy_without_core(mp, mp.mo_energy) 19 | else: 20 | # For backward compatibility. In pyscf-1.4 or earlier, mp.frozen is 21 | # not supported when mo_energy or mo_coeff is given. 22 | assert(mp.frozen is 0 or mp.frozen is None) 23 | 24 | if eris is None: eris = mp.ao2mo(mo_coeff) 25 | 26 | nocc = mp.nocc 27 | nvir = mp.nmo - nocc 28 | eia = mo_energy[:nocc,None] - mo_energy[None,nocc:] 29 | 30 | if with_eij: 31 | eij = np.empty((nocc,nocc), dtype=eia.dtype) 32 | else: 33 | eij = None 34 | 35 | emp2 = 0 36 | for i in range(nocc): 37 | gi = np.asarray(eris.ovov[i*nvir:(i+1)*nvir]) 38 | gi = gi.reshape(nvir,nocc,nvir).transpose(1,0,2) 39 | t2i = gi.conj()/lib.direct_sum('jb+a->jba', eia, eia[i]) 40 | tmp_eij = 2 * np.einsum('jab,jab->j', t2i, gi) - np.einsum('jab,jba->j', t2i, gi) 41 | emp2 += tmp_eij.sum() 42 | if with_eij: 43 | eij[i] = tmp_eij 44 | 45 | return emp2.real, eij.real 46 | 47 | 48 | def parse_xyz(filename, basis='ccpvtz', verbose=False): 49 | with open(filename) as fp: 50 | natoms = int(fp.readline()) 51 | comments = fp.readline() 52 | xyz_str = "".join(fp.readlines()) 53 | mol = gto.Mole() 54 | mol.verbose = 4 if verbose else 0 55 | mol.atom = xyz_str 56 | mol.basis = basis 57 | mol.build(0,0,unit="Ang") 58 | return mol 59 | 60 | 61 | def fix_gauge(mo_coeff) : 62 | nvec = mo_coeff.shape[1] 63 | ndim = mo_coeff.shape[0] 64 | ret = np.zeros(mo_coeff.shape) 65 | count = 0 66 | for ii in range(nvec) : 67 | for jj in range(ndim) : 68 | if np.sign(mo_coeff[jj,ii]) != 0 : 69 | break 70 | if jj == ndim : 71 | # mo_coeff[:,ii] == 0 72 | assert(np.max(np.abs(mo_coeff[:,ii])) == 0) 73 | raise RuntimeError( 'ERROR: zero eigen func, should not happen') 74 | continue 75 | else : 76 | if (jj != 0) : 77 | print('gauge ref is not 0') 78 | factor = np.sign(mo_coeff[jj,ii]) 79 | ret[:,ii] = factor * mo_coeff[:,ii] 80 | count += 1 81 | # break 82 | # print(count) 83 | return ret 84 | 85 | 86 | def mol_electron(mol, frozen=0, chkfile=None, verbose=False) : 87 | if verbose: 88 | start_t = time() 89 | nao = mol.nao 90 | natm = mol.natm 91 | rhf = scf.RHF(mol) 92 | if chkfile: 93 | rhf.set(chkfile=chkfile) 94 | erhf = rhf.kernel() 95 | if verbose: 96 | rhf_t = time() 97 | print(f"time of rhf: {rhf_t - start_t}") 98 | 99 | mo_energy = rhf.mo_energy 100 | mo_occ = rhf.mo_occ 101 | # mo_coeff = rhf.mo_coeff 102 | mo_coeff_ = rhf.mo_coeff 103 | mo_coeff= fix_gauge(mo_coeff_) 104 | occ_a = (mo_occ>0) 105 | occ_a[:frozen] = False 106 | # occ_b = (mo_occ[1]>0) 107 | vir_a = (mo_occ==0) 108 | # vir_b = (mo_occ[1]==0) 109 | nocc_a = sum(occ_a) 110 | # nocc_b = sum(occ_b) 111 | nocc = nocc_a 112 | nvir_a = sum(vir_a) 113 | # nvir_b = sum(vir_b) 114 | nvir = nvir_a 115 | assert(nocc + nvir + frozen == nao) 116 | if verbose : 117 | print('nao = %d, nocc = %d, nvir = %d' % \ 118 | (nao, nocc, nvir)) 119 | print('shape of a and b coeffs: ', mo_coeff[0].shape, mo_coeff[1].shape) 120 | c_occ = mo_coeff[:,occ_a] 121 | c_vir = mo_coeff[:,vir_a] 122 | e_occ = mo_energy[occ_a] 123 | e_vir = mo_energy[vir_a] 124 | c_occ = c_occ.T 125 | c_vir = c_vir.T 126 | meta = [natm, nao, nocc, nvir] 127 | if verbose : 128 | print('shape of coeff data ', c_occ.shape) 129 | print('shape of ener data ', e_occ.shape) 130 | print('shape of coeff data ', c_vir.shape) 131 | print('shape of ener data ', e_vir.shape) 132 | mid_t = time() 133 | # print(f"time of collecting results: {mid_t - rhf_t}") 134 | 135 | mp2 = mp.MP2(rhf, frozen=frozen) 136 | # emp2 = mp2.kernel() 137 | emp2, emp2_ij = my_kernel(mp2) 138 | if verbose : 139 | print('E(HF) = %.9g' % erhf) 140 | print('E(RMP2) = %.9g' % emp2) 141 | print(f"time of mp2: {time()-mid_t}") 142 | return meta, erhf, emp2, emp2_ij, (e_occ, e_vir), (c_occ, c_vir) 143 | # return erhf, myemp2, ener_data, coeff_data 144 | 145 | 146 | def dump_data(dir_name, meta, ehf, emp2, ec_ij, e_data, c_data) : 147 | os.makedirs(dir_name, exist_ok = True) 148 | np.savetxt(os.path.join(dir_name, 'system.raw'), 149 | np.array(meta).reshape(1,-1), 150 | fmt = '%d', 151 | header = 'natm nao nocc nvir') 152 | nframe = 1 153 | natm = meta[0] 154 | nao = meta[1] 155 | nocc = meta[2] 156 | nvir = meta[3] 157 | # ntest == natm 158 | assert(all(c_data[0].shape == np.array([nocc, nao], dtype = int))) 159 | assert(all(c_data[1].shape == np.array([nvir, nao], dtype = int))) 160 | assert(all(e_data[0].shape == np.array([nocc], dtype = int))) 161 | assert(all(e_data[1].shape == np.array([nvir], dtype = int))) 162 | assert(all(ec_ij.shape == np.array([nocc, nocc], dtype = int))) 163 | np.savetxt(os.path.join(dir_name, 'e_hf.raw'), np.reshape(ehf, [nframe,1])) 164 | np.savetxt(os.path.join(dir_name, 'e_mp2.raw'), np.reshape(emp2, [nframe,1])) 165 | np.savetxt(os.path.join(dir_name, 'ec_ij.raw'), ec_ij.reshape([nframe, -1])) 166 | np.savetxt(os.path.join(dir_name, 'ener_occ.raw'), e_data[0].reshape([nframe, -1])) 167 | np.savetxt(os.path.join(dir_name, 'ener_vir.raw'), e_data[1].reshape([nframe, -1])) 168 | np.savetxt(os.path.join(dir_name, 'coeff_occ.raw'), c_data[0].reshape([nframe, -1])) 169 | np.savetxt(os.path.join(dir_name, 'coeff_vir.raw'), c_data[1].reshape([nframe, -1])) 170 | 171 | 172 | def gen_frame(xyz_file, basis='ccpvtz', frozen=0, dump_dir=None, verbose=False): 173 | if dump_dir is None: 174 | dump_dir = os.path.splitext(xyz_file)[0] 175 | mol = parse_xyz(xyz_file, basis=basis ,verbose=verbose) 176 | mol_meta, ehf, emp2, ec_ij, e_data, c_data = mol_electron(mol, frozen=frozen, verbose=verbose) 177 | dump_data(dump_dir, mol_meta, ehf, emp2, ec_ij, e_data, c_data) 178 | 179 | 180 | def main(): 181 | parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.") 182 | parser.add_argument("files", nargs="+", help="input xyz files") 183 | parser.add_argument("-d", "--dump-dir", default=None, help="dir of dumped files, if not specified, using same dir as input") 184 | parser.add_argument("-v", "--verbose", action='store_true', help="output calculation information") 185 | parser.add_argument("-F", "--frozen", default=0, type=int, help="number of orbit to be frozen when calculate mp2") 186 | parser.add_argument("-B", "--basis", default="ccpvtz", type=str, help="basis used to do the calculation") 187 | args = parser.parse_args() 188 | 189 | for fn in args.files: 190 | if args.dump_dir is None: 191 | dump = None 192 | else: 193 | dump = os.path.join(args.dump_dir, os.path.splitext(os.path.basename(fn))[0]) 194 | try: 195 | gen_frame(fn, args.basis, args.frozen, dump, args.verbose) 196 | print(f"{fn} finished") 197 | except Exception as e: 198 | print(f"{fn} failed,", e, file=sys.stderr) 199 | raise 200 | 201 | 202 | if __name__ == "__main__": 203 | main() 204 | -------------------------------------------------------------------------------- /scripts/solve_mol.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #SBATCH -N 1 3 | #SBATCH -c 10 4 | #SBATCH -t 24:00:00 5 | #SBATCH --mem=32G 6 | 7 | import time 8 | import numpy as np 9 | from pyscf import gto, scf 10 | 11 | BOHR = 0.52917721092 12 | 13 | _NO_FORCE = False 14 | _NO_DM = False 15 | _MUST_UNRES = False 16 | _USE_NEWTON = False 17 | 18 | def parse_xyz(filename, basis='ccpvdz', **kwargs): 19 | with open(filename) as fp: 20 | natoms = int(fp.readline()) 21 | comments = fp.readline() 22 | xyz_str = "".join(fp.readlines()) 23 | mol = gto.Mole() 24 | mol.atom = xyz_str 25 | mol.basis = basis 26 | mol.set(**kwargs) 27 | if "spin" not in kwargs: 28 | mol.spin = mol.nelectron % 2 29 | mol.build(0,0,unit="Ang") 30 | return mol 31 | 32 | 33 | def get_method(name: str): 34 | lname = name.lower() 35 | if lname == "hf": 36 | return calc_hf 37 | if lname[:3] == "dft": 38 | xc = lname.split("@")[1] if "@" in lname else "pbe" 39 | return lambda mol, **scfargs: calc_dft(mol, xc, **scfargs) 40 | if lname == "mp2": 41 | return calc_mp2 42 | if lname == "ccsd": 43 | return calc_ccsd 44 | if lname.startswith(("ccsd_t", "ccsd-t", "ccsd(t)")): 45 | return calc_ccsd_t 46 | if lname == "fci": 47 | return calc_fci 48 | raise ValueError(f"Unknown calculation method: {name}") 49 | 50 | def solve_scf(mol, **scfargs): 51 | HFmethod = scf.HF if not _MUST_UNRES else scf.UHF 52 | mf = HFmethod(mol).set(init_guess_breaksym=True) 53 | init_dm = mf.get_init_guess() 54 | # if _MUST_UNRES: 55 | # init_dm[1][:2,:2] = 0 56 | mf.kernel(init_dm) 57 | if _USE_NEWTON: 58 | mf = scf.fast_newton(mf) 59 | return mf 60 | 61 | def calc_hf(mol, **scfargs): 62 | mf = solve_scf(mol, **scfargs) 63 | if not mf.converged: 64 | raise RuntimeError("SCF not converged!") 65 | etot = mf.e_tot 66 | grad = mf.nuc_grad_method().kernel() if not _NO_FORCE else None 67 | rdm = mf.make_rdm1() if not _NO_DM else None 68 | return etot, grad, rdm 69 | 70 | def calc_dft(mol, xc="pbe", **scfargs): 71 | from pyscf import dft 72 | KSmethod = dft.KS if not _MUST_UNRES else dft.UKS 73 | mf = KSmethod(mol, xc).run(**scfargs) 74 | if not mf.converged: 75 | raise RuntimeError("SCF not converged!") 76 | etot = mf.e_tot 77 | if _NO_FORCE or dft.libxc.xc_type(xc) in ('MGGA', 'NLC'): 78 | grad = None 79 | else: 80 | grad = mf.nuc_grad_method().kernel() 81 | rdm = mf.make_rdm1() if not _NO_DM else None 82 | return etot, grad, rdm 83 | 84 | def calc_mp2(mol, **scfargs): 85 | import pyscf.mp 86 | mf = solve_scf(mol, **scfargs) 87 | if not mf.converged: 88 | raise RuntimeError("SCF not converged!") 89 | postmf = pyscf.mp.MP2(mf).run() 90 | etot = postmf.e_tot 91 | grad = postmf.nuc_grad_method().kernel() if not _NO_FORCE else None 92 | return etot, grad, None 93 | 94 | def calc_ccsd(mol, **scfargs): 95 | import pyscf.cc 96 | mf = solve_scf(mol, **scfargs) 97 | if not mf.converged: 98 | raise RuntimeError("SCF not converged!") 99 | mycc = mf.CCSD().run() 100 | etot = mycc.e_tot 101 | grad = mycc.nuc_grad_method().kernel() if not _NO_FORCE else None 102 | ccdm = np.einsum('...pi,...ij,...qj->...pq', 103 | mf.mo_coeff, mycc.make_rdm1(), mf.mo_coeff.conj()) if not _NO_DM else None 104 | return etot, grad, ccdm 105 | 106 | def calc_ccsd_t(mol, **scfargs): 107 | import pyscf.cc 108 | mf = solve_scf(mol, **scfargs) 109 | if not mf.converged: 110 | raise RuntimeError("SCF not converged!") 111 | mycc = mf.CCSD().run() 112 | et_correction = mycc.ccsd_t() 113 | etot = mycc.e_tot + et_correction 114 | if _NO_FORCE: 115 | return etot, None, None 116 | import pyscf.grad.ccsd_t as ccsd_t_grad 117 | grad = ccsd_t_grad.Gradients(mycc).kernel() 118 | return etot, grad, None 119 | 120 | def calc_fci(mol, **scfargs): 121 | import pyscf.fci 122 | mf = solve_scf(mol, **scfargs) 123 | if not mf.converged: 124 | raise RuntimeError("SCF not converged!") 125 | myci = pyscf.fci.FCI(mf) 126 | etot, fcivec = myci.kernel() 127 | rdm = np.einsum('...pi,...ij,...qj->...pq', 128 | mf.mo_coeff, 129 | myci.make_rdm1s(fcivec, mol.nao, mol.nelec), 130 | mf.mo_coeff.conj()).sum(0) if not _NO_DM else None 131 | return etot, None, rdm 132 | 133 | 134 | if __name__ == "__main__": 135 | import argparse 136 | import os 137 | parser = argparse.ArgumentParser(description="Calculate and save mp2 energy and mo_coeffs for given xyz files.") 138 | parser.add_argument("files", nargs="+", help="input xyz files") 139 | parser.add_argument("-d", "--dump-dir", help="dir of dumped files, default is same dir as xyz file") 140 | parser.add_argument("-v", "--verbose", default=1, type=int, help="output calculation information") 141 | parser.add_argument("-B", "--basis", default="ccpvdz", type=str, help="basis used to do the calculation") 142 | parser.add_argument("-C", "--charge", default=0, type=int, help="net charge of the molecule") 143 | parser.add_argument("-S", "--spin", default=0, type=int, help="net spin of the molecule") 144 | parser.add_argument("-M", "--method", default="ccsd", help="method used to do the calculation. support MP2, CCSD and CCSD(T)") 145 | parser.add_argument("-U", "--unrestrict", action="store_true", help="force using unrestricted methods") 146 | parser.add_argument("-NF", "--no-force", action="store_true", help="do not calculate force") 147 | parser.add_argument("-ND", "--no-dm", action="store_true", help="do not calculate dm") 148 | parser.add_argument("-SO", "--newton", action="store_true", help="allow using newton method when scf not converged") 149 | parser.add_argument("--scf-input", help="yaml file to specify scf arguments") 150 | args = parser.parse_args() 151 | 152 | if args.unrestrict: _MUST_UNRES = True 153 | if args.no_force: _NO_FORCE = True 154 | if args.no_dm: _NO_DM = True 155 | if args.newton: _USE_NEWTON = True 156 | 157 | scfargs = {} 158 | if args.scf_input is not None: 159 | import ruamel.yaml as yaml 160 | with open(args.scf_input, 'r') as fp: 161 | scfargs = yaml.safe_load(fp) 162 | if args.dump_dir is not None: 163 | os.makedirs(args.dump_dir, exist_ok = True) 164 | calculator = get_method(args.method) 165 | 166 | for fn in args.files: 167 | tic = time.time() 168 | mol = parse_xyz(fn, args.basis, verbose=args.verbose, charge=args.charge, spin=args.spin) 169 | try: 170 | res = calculator(mol, **scfargs) 171 | except RuntimeError as err: 172 | print(fn, f"failed, {err}") 173 | continue 174 | etot, grad, rdm = res 175 | if args.dump_dir is None: 176 | dump_dir = os.path.dirname(fn) 177 | else: 178 | dump_dir = args.dump_dir 179 | dump = os.path.join(dump_dir, os.path.splitext(os.path.basename(fn))[0]) 180 | np.save(dump+".energy.npy", [etot]) 181 | if grad is not None: 182 | force = -grad / BOHR 183 | np.save(dump+".force.npy", force) 184 | if rdm is not None: 185 | np.save(dump+".dm.npy", rdm) 186 | if args.verbose: 187 | print(fn, f"done, time = {time.time()-tic}") -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import setuptools 3 | 4 | 5 | here = pathlib.Path(__file__).parent.resolve() 6 | readme = (here / 'README.md').read_text(encoding='utf-8') 7 | 8 | # did not include torch and pyscf here 9 | install_requires=['numpy', 'paramiko', 'ruamel.yaml'] 10 | 11 | 12 | setuptools.setup( 13 | name="deepks", 14 | use_scm_version={'write_to': 'deepks/_version.py'}, 15 | setup_requires=['setuptools_scm'], 16 | author="Yixiao Chen", 17 | author_email="yixiaoc@princeton.edu", 18 | description="DeePKS-kit: generate accurate (self-consistent) energy functionals", 19 | long_description=readme, 20 | long_description_content_type="text/markdown", 21 | packages=setuptools.find_packages(include=['deepks', 'deepks.*']), 22 | classifiers=[ 23 | "Programming Language :: Python :: 3.7", 24 | ], 25 | keywords='deepks DeePKS-kit', 26 | install_requires=install_requires, 27 | python_requires=">=3.7", 28 | entry_points={ 29 | 'console_scripts': [ 30 | 'deepks=deepks.main:main_cli', 31 | 'dks=deepks.main:main_cli', 32 | ], 33 | }, 34 | ) --------------------------------------------------------------------------------