├── .gitignore ├── README.md ├── asi_io.py ├── example.ipynb ├── maxvol.py ├── maxvol_cpu.py ├── maxvol_gpu.py ├── select_active.py ├── select_extend.py ├── select_gamma.py └── tools.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | active_set.asi 3 | nep.txt 4 | *.xyz 5 | *.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Active Learning of NEP 2 | 3 | ## Features 4 | 5 | - Selecting active set with `MaxVol` algorithem by CPU or GPU. 6 | - Calcualting extrapolation grade with `PyNEP`. 7 | - Calcualting extrapolation grade during MD simulations with `GPUMD`. 8 | 9 | ## Installation 10 | 11 | ### 1. (Optional) Create a conda environment 12 | 13 | ```shell 14 | conda create -n nep_active python=3.10 15 | conda activate nep_active 16 | ``` 17 | 18 | If you choose to install it in your current env, jump to the next step. 19 | 20 | ### 2. Install PyNEP 21 | 22 | Install the **latest** `PyNEP`. You can check its own [repository](https://github.com/bigd4/PyNEP) for details. 23 | 24 | ```shell 25 | pip install git+https://github.com/bigd4/PyNEP.git 26 | ``` 27 | 28 | ### 3. (Optional) Install CuPy 29 | 30 | When selecting the active set, you may use `cupy` or `numpy`. `cupy` uses your GPU and is much faster when performing `MaxVol`. Since you are using `GPUMD`, I assume you have a GPU. You can check its [website](https://cupy.dev/) for installation details. 31 | 32 | ```shell 33 | pip install cupy-cuda12x 34 | ``` 35 | 36 | ## Usage 37 | 38 | ### 1. Training an NEP potential 39 | 40 | You need to have a NEP. 41 | 42 | ### 2. Selecting an _Active Set_ 43 | 44 | An active set invsersion (`.asi` file) is needed when calculating the extrapolation grade. The active set can also be considered as the environments with the maximum diversity. You can use `select_active.py` to get an active set inversion (`.asi` file) by `MaxVol` and corresponding structures (`.xyz` file). 45 | 46 | ### 3. Selecting structures with large gamma 47 | 48 | If you want to select some structures to add to the training set, you can calculate their extrapolation grade (`gamma`) and judge if their are outside the training set. 49 | This can be performed by the `compute_extrapolatione` command in `GPUMD` or by `select_gamma.py`. You may modify the gamma cutoff to control how far they are from the training set. The default value in `select_gamma.py` is `1`. 50 | 51 | However, the selected structures can be dupelicated, so you need to perform the next step. 52 | 53 | ### 4. Extending your training set 54 | 55 | If you want to select some structures to add to the training set, your can put them together and perform a `MaxVol` selection. This is in `select_extend.py`. 56 | -------------------------------------------------------------------------------- /asi_io.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def save_asi(asi, filename="active_set.asi"): 5 | with open(filename, "w") as f: 6 | for k, v in asi.items(): 7 | f.write(f"{k} {v.shape[0]} {v.shape[1]}\n") 8 | for i in v.flatten(): 9 | f.write(str(i) + "\n") 10 | 11 | 12 | def load_asi(asi): 13 | ret = {} 14 | with open(asi, "r") as f: 15 | while True: 16 | B = [] 17 | line1 = f.readline() 18 | if len(line1) == 0: 19 | break 20 | line1 = line1.split(" ") 21 | element, shape1, shape2 = line1[0], int(line1[1]), int(line1[2]) 22 | for _ in range(shape1 * shape2): 23 | B.append(float(f.readline())) 24 | ret[element] = np.array(B).reshape((shape1, shape2)) 25 | return ret 26 | -------------------------------------------------------------------------------- /example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Elements in the NEP potential: ['Na', 'Cl']\n", 13 | "Calculating B projections...\n" 14 | ] 15 | }, 16 | { 17 | "name": "stderr", 18 | "output_type": "stream", 19 | "text": [ 20 | "100%|██████████| 4725/4725 [00:29<00:00, 160.53it/s]\n" 21 | ] 22 | }, 23 | { 24 | "name": "stdout", 25 | "output_type": "stream", 26 | "text": [ 27 | "Shape of the B matrix:\n", 28 | "Na: (119999, 960)\n", 29 | "Cl: (119999, 960)\n" 30 | ] 31 | } 32 | ], 33 | "source": [ 34 | "from tools import get_B_projections, get_active_set\n", 35 | "from pynep.io import load_nep, dump_nep\n", 36 | "\n", 37 | "nep_file = \"nep.txt\"\n", 38 | "traj = load_nep(\"train.xyz\", ftype=\"exyz\")\n", 39 | "\n", 40 | "B_projections, B_projections_struct_index = get_B_projections(traj, nep_file)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "Performing MaxVol...\n", 53 | "Maxvol Speed: 330 iters/s\n", 54 | "Batch 0: adding 960 envs. \n", 55 | "Maxvol Speed: 369 iters/s\n", 56 | "Batch 1: adding 545 envs. \n", 57 | "Maxvol Speed: 364 iters/s\n", 58 | "Batch 2: adding 357 envs. \n", 59 | "Maxvol Speed: 348 iters/s\n", 60 | "Batch 3: adding 331 envs. \n", 61 | "Maxvol Speed: 354 iters/s\n", 62 | "Batch 4: adding 316 envs. \n", 63 | "Maxvol Speed: 353 iters/s\n", 64 | "Batch 5: adding 319 envs. \n", 65 | "Maxvol Speed: 322 iters/s\n", 66 | "Batch 6: adding 251 envs. \n", 67 | "Maxvol Speed: 313 iters/s\n", 68 | "Batch 7: adding 196 envs. \n", 69 | "Maxvol Speed: 310 iters/s\n", 70 | "Batch 8: adding 112 envs. \n", 71 | "Maxvol Speed: 143 iters/s\n", 72 | "Batch 9: adding 3 envs. \n", 73 | "Maxvol Speed: 203 iters/s\n", 74 | "Batch 10: adding 19 envs. \n", 75 | "Maxvol Speed: 267 iters/s\n", 76 | "Batch 11: adding 61 envs. \n", 77 | "Refinement round 0: 0 envs out of active set. Max gamma = 0.9997219572219311\n", 78 | "Refinement done.\n", 79 | "Shape of the active set:\n", 80 | "Na: (960, 960)\n", 81 | "Maxvol Speed: 440 iters/s\n", 82 | "Batch 0: adding 960 envs. \n", 83 | "Maxvol Speed: 397 iters/s\n", 84 | "Batch 1: adding 572 envs. \n", 85 | "Maxvol Speed: 415 iters/s\n", 86 | "Batch 2: adding 367 envs. \n", 87 | "Maxvol Speed: 399 iters/s\n", 88 | "Batch 3: adding 326 envs. \n", 89 | "Maxvol Speed: 393 iters/s\n", 90 | "Batch 4: adding 349 envs. \n", 91 | "Maxvol Speed: 372 iters/s\n", 92 | "Batch 5: adding 372 envs. \n", 93 | "Maxvol Speed: 389 iters/s\n", 94 | "Batch 6: adding 290 envs. \n", 95 | "Maxvol Speed: 382 iters/s\n", 96 | "Batch 7: adding 210 envs. \n", 97 | "Maxvol Speed: 369 iters/s\n", 98 | "Batch 8: adding 134 envs. \n", 99 | "Maxvol Speed: 362 iters/s\n", 100 | "Batch 9: adding 5 envs. \n", 101 | "Maxvol Speed: 355 iters/s\n", 102 | "Batch 10: adding 13 envs. \n", 103 | "Maxvol Speed: 387 iters/s\n", 104 | "Batch 11: adding 55 envs. \n", 105 | "Refinement round 0: 0 envs out of active set. Max gamma = 0.9999919673420719\n", 106 | "Refinement done.\n", 107 | "Shape of the active set:\n", 108 | "Cl: (960, 960)\n", 109 | "Finding inverse...\n", 110 | "Saving active set inverse...\n" 111 | ] 112 | } 113 | ], 114 | "source": [ 115 | "active_set_inv, active_set_struct = get_active_set(\n", 116 | " B_projections, B_projections_struct_index\n", 117 | ")" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "from tools import get_gamma\n", 127 | "from pynep.io import load_nep, dump_nep\n", 128 | "\n", 129 | "nep_file = \"nep.txt\"\n", 130 | "traj = load_nep(\"train.xyz\", ftype=\"exyz\")\n", 131 | "\n", 132 | "get_gamma(traj, nep_file, \"active_set.asi\")\n", 133 | "\n", 134 | "from ase.io import write\n", 135 | "\n", 136 | "write(\"with_gamma.xyz\", traj, write_results=False)\n", 137 | "\n", 138 | "out_traj = [atoms for atoms in traj if atoms.arrays[\"gamma\"].max() > 1]\n", 139 | "dump_nep(\"large_gamma.xyz\", out_traj, ftype=\"exyz\")" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 4, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "name": "stdout", 149 | "output_type": "stream", 150 | "text": [ 151 | "Calculating gamma...\n" 152 | ] 153 | }, 154 | { 155 | "name": "stderr", 156 | "output_type": "stream", 157 | "text": [ 158 | "100%|██████████| 4725/4725 [00:38<00:00, 123.68it/s]\n" 159 | ] 160 | }, 161 | { 162 | "data": { 163 | "image/png": "", 164 | "text/plain": [ 165 | "
" 166 | ] 167 | }, 168 | "metadata": {}, 169 | "output_type": "display_data" 170 | } 171 | ], 172 | "source": [ 173 | "import numpy as np\n", 174 | "from tqdm import tqdm\n", 175 | "from pynep.calculate import NEP\n", 176 | "from ase.io import read\n", 177 | "import matplotlib.pyplot as plt\n", 178 | "from asi_io import load_asi\n", 179 | "\n", 180 | "nep_file = \"nep.txt\"\n", 181 | "calc = NEP(nep_file)\n", 182 | "\n", 183 | "active_set_inverse = load_asi(\"active_set.asi\")\n", 184 | "\n", 185 | "\n", 186 | "def get_gamma_dF(file):\n", 187 | " traj = read(file, index=\":\")\n", 188 | " gamma = {k: [] for k in active_set_inverse.keys()}\n", 189 | " dF = {k: [] for k in active_set_inverse.keys()}\n", 190 | " phase = {k: [] for k in active_set_inverse.keys()}\n", 191 | "\n", 192 | " print(\"Calculating gamma...\")\n", 193 | "\n", 194 | " for atoms in tqdm(traj):\n", 195 | " dft_F = atoms.arrays[\"force\"]\n", 196 | " calc.calculate(atoms, [\"B_projection\"])\n", 197 | " B_projection = calc.results[\"B_projection\"]\n", 198 | " nep_F = calc.results[\"forces\"]\n", 199 | " F_error = np.linalg.norm(dft_F - nep_F, axis=1)\n", 200 | " for e in active_set_inverse.keys():\n", 201 | " index = [\n", 202 | " ii for ii in range(len(atoms)) if atoms.get_chemical_symbols()[ii] == e\n", 203 | " ]\n", 204 | " g = B_projection[index] @ active_set_inverse[e]\n", 205 | " g = np.max(np.abs(g), axis=1)\n", 206 | " gamma[e].extend(g)\n", 207 | " dF[e].extend(F_error[index])\n", 208 | " phase[e].extend([atoms.info[\"config_type\"]] * len(index))\n", 209 | "\n", 210 | " return gamma, dF, phase\n", 211 | "\n", 212 | "\n", 213 | "gamma, dF, phase = get_gamma_dF(\"train.xyz\")\n", 214 | "plt.scatter(gamma[\"Na\"], dF[\"Na\"], s=3, label=\"train\")\n", 215 | "\n", 216 | "\n", 217 | "plt.gca().set_xscale(\"log\")\n", 218 | "plt.gca().set_yscale(\"log\")\n", 219 | "plt.xlabel(r\"$\\gamma$\")\n", 220 | "plt.ylabel(\"|dF| (eV/A)\")\n", 221 | "plt.tight_layout()\n", 222 | "plt.legend()\n", 223 | "plt.savefig(\"gamma-dF.png\", transparent=False)" 224 | ] 225 | } 226 | ], 227 | "metadata": { 228 | "kernelspec": { 229 | "display_name": "ase", 230 | "language": "python", 231 | "name": "python3" 232 | }, 233 | "language_info": { 234 | "codemirror_mode": { 235 | "name": "ipython", 236 | "version": 3 237 | }, 238 | "file_extension": ".py", 239 | "mimetype": "text/x-python", 240 | "name": "python", 241 | "nbconvert_exporter": "python", 242 | "pygments_lexer": "ipython3", 243 | "version": "3.10.15" 244 | } 245 | }, 246 | "nbformat": 4, 247 | "nbformat_minor": 2 248 | } 249 | -------------------------------------------------------------------------------- /maxvol.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | # avoid large value since GPUMD use float 5 | def find_inverse(m): 6 | return np.linalg.pinv(m, rcond=1e-8) 7 | 8 | 9 | def calculate_maxvol( 10 | A, 11 | struct_index, 12 | gamma_tol=1.001, 13 | maxvol_iter=1000, 14 | mode="GPU", 15 | batch_size=None, 16 | n_refinement=10, 17 | ): 18 | if mode == "GPU": 19 | from maxvol_gpu import maxvol 20 | elif mode == "CPU": 21 | from maxvol_cpu import maxvol 22 | else: 23 | raise Exception("mode should be CPU or GPU.") 24 | 25 | # one batch 26 | if batch_size is None: 27 | selected = maxvol(A, gamma_tol, maxvol_iter) 28 | return A[selected], struct_index[selected] 29 | 30 | # multiple batches 31 | batch_num = np.ceil(len(A) / batch_size) 32 | batch_splits_indices = np.array_split( 33 | np.arange(len(A)), 34 | batch_num, 35 | ) 36 | 37 | # stage 1 - cumulative maxvol 38 | A_selected = None 39 | struct_index_selected = None 40 | for i, ind in enumerate(batch_splits_indices): 41 | # first batch 42 | if A_selected is None: 43 | A_joint = A[ind] 44 | struct_index_joint = struct_index[ind] 45 | # other batches 46 | else: 47 | A_joint = np.vstack([A_selected, A[ind]]) 48 | struct_index_joint = np.hstack([struct_index_selected, struct_index[ind]]) 49 | 50 | selected = maxvol(A_joint, gamma_tol, maxvol_iter) 51 | if A_selected is None: 52 | l = 0 53 | else: 54 | l = len(A_selected) 55 | A_selected = A_joint[selected] 56 | struct_index_selected = struct_index_joint[selected] 57 | n_add = (selected >= l).sum() 58 | print(f"Batch {i}: adding {n_add} envs. ") 59 | 60 | # stage 2 - refinement 61 | for ii in range(n_refinement): 62 | # check max gamma, if small enough, no need to refine 63 | inv = find_inverse(A_selected) 64 | gamma = np.abs(A_selected @ inv) 65 | large_gamma = gamma > gamma_tol 66 | print( 67 | f"Refinement round {ii}: {large_gamma.sum()} envs out of active set. Max gamma = {np.max(gamma)}" 68 | ) 69 | if np.max(gamma) < gamma_tol: 70 | print("Refinement done.") 71 | return A_selected, struct_index_selected 72 | 73 | A_joint = np.vstack([A_selected, A[large_gamma]]) 74 | struct_index_joint = np.hstack( 75 | [struct_index_selected, struct_index[large_gamma]] 76 | ) 77 | selected = maxvol(A_joint, gamma_tol, maxvol_iter) 78 | A_selected = A_joint[selected] 79 | struct_index_selected = struct_index_joint[selected] 80 | -------------------------------------------------------------------------------- /maxvol_cpu.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.linalg import lu, solve_triangular 3 | from time import time 4 | 5 | """ 6 | The following code comes from https://github.com/AndreiChertkov/teneva 7 | """ 8 | 9 | 10 | def maxvol(A, e, k): 11 | """Compute the maximal-volume submatrix for given tall matrix. 12 | 13 | Args: 14 | A (np.ndarray): tall matrix of the shape [n, r] (n > r). 15 | e (float): accuracy parameter (should be >= 1). If the parameter is 16 | equal to 1, then the maximum number of iterations will be performed 17 | until true convergence is achieved. If the value is greater than 18 | one, the algorithm will complete its work faster, but the accuracy 19 | will be slightly lower (in most cases, the optimal value is within 20 | the range of 1.01 - 1.1). 21 | k (int): maximum number of iterations (should be >= 1). 22 | 23 | Returns: 24 | (np.ndarray, np.ndarray): the row numbers I containing the maximal 25 | volume submatrix in the form of 1D array of length r and coefficient 26 | matrix B in the form of 2D array of shape [n, r], such that 27 | A = B A[I, :] and A (A[I, :])^{-1} = B. 28 | 29 | Note: 30 | The description of the basic implementation of this algorithm is 31 | presented in the work: Goreinov S., Oseledets, I., Savostyanov, D., 32 | Tyrtyshnikov, E., Zamarashkin, N. "How to find a good submatrix". 33 | Matrix Methods: Theory, Algorithms And Applications: Dedicated to the Memory of Gene Golub (2010): 247-256. 34 | 35 | """ 36 | n, r = A.shape 37 | 38 | if n <= r: 39 | raise ValueError('Input matrix should be "tall"') 40 | 41 | P, L, U = lu(A, check_finite=False) 42 | I = P[:, :r].argmax(axis=0) 43 | Q = solve_triangular(U, A.T, trans=1, check_finite=False) 44 | B = solve_triangular( 45 | L[:r, :], Q, trans=1, check_finite=False, unit_diagonal=True, lower=True 46 | ).T 47 | 48 | t0 = time() 49 | for iter in range(k): 50 | i, j = np.divmod(np.abs(B).argmax(), r) 51 | E = np.abs(B[i, j]) 52 | if E <= e: 53 | v = iter / (time() - t0) 54 | print(f"Maxvol Speed: {int(v)} iters/s") 55 | break 56 | 57 | I[j] = i 58 | 59 | bj = B[:, j] 60 | bi = B[i, :].copy() 61 | bi[j] -= 1.0 62 | 63 | B -= np.outer(bj, bi / B[i, j]) 64 | 65 | return I 66 | -------------------------------------------------------------------------------- /maxvol_gpu.py: -------------------------------------------------------------------------------- 1 | import cupy as cp 2 | from cupyx.scipy.linalg import lu, solve_triangular 3 | from time import time 4 | 5 | """ 6 | The following code comes from https://github.com/AndreiChertkov/teneva 7 | """ 8 | 9 | 10 | def maxvol(A, e, k): 11 | A = cp.array(A) 12 | """Compute the maximal-volume submatrix for given tall matrix. 13 | 14 | Args: 15 | A (np.ndarray): tall matrix of the shape [n, r] (n > r). 16 | e (float): accuracy parameter (should be >= 1). If the parameter is 17 | equal to 1, then the maximum number of iterations will be performed 18 | until true convergence is achieved. If the value is greater than 19 | one, the algorithm will complete its work faster, but the accuracy 20 | will be slightly lower (in most cases, the optimal value is within 21 | the range of 1.01 - 1.1). 22 | k (int): maximum number of iterations (should be >= 1). 23 | 24 | Returns: 25 | (np.ndarray, np.ndarray): the row numbers I containing the maximal 26 | volume submatrix in the form of 1D array of length r and coefficient 27 | matrix B in the form of 2D array of shape [n, r], such that 28 | A = B A[I, :] and A (A[I, :])^{-1} = B. 29 | 30 | Note: 31 | The description of the basic implementation of this algorithm is 32 | presented in the work: Goreinov S., Oseledets, I., Savostyanov, D., 33 | Tyrtyshnikov, E., Zamarashkin, N. "How to find a good submatrix". 34 | Matrix Methods: Theory, Algorithms And Applications: Dedicated to the Memory of Gene Golub (2010): 247-256. 35 | 36 | """ 37 | n, r = A.shape 38 | 39 | if n <= r: 40 | raise ValueError('Input matrix should be "tall"') 41 | 42 | P, L, U = lu(A, check_finite=False) 43 | I = P[:, :r].argmax(axis=0) 44 | Q = solve_triangular(U, A.T, trans=1, check_finite=False) 45 | B = solve_triangular( 46 | L[:r, :], Q, trans=1, check_finite=False, unit_diagonal=True, lower=True 47 | ).T 48 | 49 | t0 = time() 50 | for iter in range(k): 51 | i, j = cp.divmod(cp.abs(B).argmax(), r) 52 | E = cp.abs(B[i, j]) 53 | if E <= e: 54 | v = iter / (time() - t0) 55 | print(f"Maxvol Speed: {int(v)} iters/s") 56 | break 57 | 58 | I[j] = i 59 | 60 | bj = B[:, j] 61 | bi = B[i, :].copy() 62 | bi[j] -= 1.0 63 | 64 | B -= cp.outer(bj, bi / B[i, j]) 65 | 66 | return I.get() 67 | -------------------------------------------------------------------------------- /select_active.py: -------------------------------------------------------------------------------- 1 | from ase.io import write 2 | from pynep.io import load_nep, dump_nep 3 | from tools import get_B_projections, get_active_set 4 | 5 | nep_file = "nep.txt" 6 | traj = load_nep("train.xyz") 7 | 8 | B_projections, B_projections_struct_index = get_B_projections(traj, nep_file) 9 | active_set_inv, active_set_struct = get_active_set( 10 | B_projections, B_projections_struct_index 11 | ) 12 | 13 | out_traj = [traj[i] for i in active_set_struct] 14 | try: 15 | dump_nep("active_set.xyz", out_traj) 16 | except: 17 | write("active_set.xyz", out_traj) 18 | -------------------------------------------------------------------------------- /select_extend.py: -------------------------------------------------------------------------------- 1 | from ase.io import write, read 2 | from pynep.io import load_nep, dump_nep 3 | from tools import get_B_projections, get_active_set 4 | 5 | nep_file = "nep.txt" 6 | data1 = load_nep("train.xyz") 7 | try: 8 | data2 = load_nep("large_gamma.xyz") 9 | except: 10 | data2 = read("large_gamma.xyz", index=":") 11 | 12 | data = data1 + data2 13 | 14 | B_projections, B_projections_struct_index = get_B_projections(data, nep_file) 15 | active_set_inv, active_set_struct = get_active_set( 16 | B_projections, B_projections_struct_index, write_asi=False 17 | ) 18 | 19 | out = [data[i] for i in active_set_struct if i >= len(data1)] 20 | 21 | try: 22 | dump_nep("to_add.xyz", out) 23 | except: 24 | write("to_add.xyz", out) 25 | -------------------------------------------------------------------------------- /select_gamma.py: -------------------------------------------------------------------------------- 1 | from ase.io import write 2 | from pynep.io import load_nep, dump_nep 3 | from tools import get_gamma 4 | 5 | nep_file = "nep.txt" 6 | traj = load_nep("to_select.xyz") 7 | 8 | get_gamma(traj, nep_file, "active_set.asi") 9 | 10 | out_traj = [atoms for atoms in traj if atoms.arrays["gamma"].max() > 1] 11 | try: 12 | dump_nep("large_gamma.xyz", out_traj) 13 | except: 14 | write("large_gamma.xyz", out_traj) 15 | -------------------------------------------------------------------------------- /tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tqdm import tqdm 3 | from pynep.calculate import NEP 4 | from asi_io import save_asi, load_asi 5 | from maxvol import calculate_maxvol, find_inverse 6 | 7 | 8 | def get_gamma(traj, nep_file, asi_file): 9 | calc = NEP(nep_file) 10 | active_set_inverse = load_asi(asi_file) 11 | for atoms in tqdm(traj): 12 | atoms.arrays["gamma"] = np.zeros(len(atoms)) 13 | calc.calculate(atoms, ["B_projection"]) 14 | B_projection = calc.results["B_projection"] 15 | for e in active_set_inverse.keys(): 16 | index = [ 17 | ii for ii in range(len(atoms)) if atoms.get_chemical_symbols()[ii] == e 18 | ] 19 | g = B_projection[index] @ active_set_inverse[e] 20 | g = np.max(np.abs(g), axis=1) 21 | atoms.arrays["gamma"][index] = g 22 | return traj 23 | 24 | 25 | def get_B_projections(traj, nep_file): 26 | calc = NEP(nep_file) 27 | with open(nep_file) as f: 28 | first_line = f.readline() 29 | elements = first_line.split(" ")[2:-1] 30 | print(f"Elements in the NEP potential: {elements}") 31 | 32 | B_projections = {e: [] for e in elements} 33 | B_projections_struct_index = {e: [] for e in elements} 34 | print("Calculating B projections...") 35 | for index, atoms in enumerate(tqdm(traj)): 36 | calc.calculate(atoms, ["B_projection"]) 37 | B_projection = calc.results["B_projection"] 38 | for b, e in zip(B_projection, atoms.get_chemical_symbols()): 39 | B_projections[e].append(b) 40 | B_projections_struct_index[e].append(index) 41 | 42 | B_projections_struct_index = { 43 | e: np.array(i) for e, i in B_projections_struct_index.items() 44 | } 45 | 46 | print("Shape of the B matrix:") 47 | for e, b in B_projections.items(): 48 | B_projections[e] = np.vstack(b) 49 | print(f"{e}: {B_projections[e].shape}") 50 | assert ( 51 | B_projections[e].shape[0] >= B_projections[e].shape[1] 52 | ), f"Not enough environments for {e}." 53 | 54 | return B_projections, B_projections_struct_index 55 | 56 | 57 | def get_active_set( 58 | B_projections, 59 | B_projections_struct_index, 60 | write_asi=True, 61 | batch_size=10000, 62 | mode="GPU", 63 | ): 64 | print("Performing MaxVol...") 65 | active_set = {} 66 | active_set_struct = [] # the index of structure 67 | for e, b in B_projections.items(): 68 | A, selected_index = calculate_maxvol( 69 | b, B_projections_struct_index[e], batch_size=batch_size, mode=mode 70 | ) 71 | active_set[e] = A 72 | active_set_struct.extend(selected_index) 73 | print("Shape of the active set:") 74 | print(f"{e}: {active_set[e].shape}") 75 | 76 | active_set_struct = list(set(active_set_struct)) 77 | active_set_struct.sort() 78 | 79 | print("Finding inverse...") 80 | active_set_inv = {e: find_inverse(b) for e, b in active_set.items()} 81 | 82 | if write_asi: 83 | print("Saving active set inverse...") 84 | save_asi(active_set_inv) 85 | 86 | return active_set_inv, active_set_struct 87 | --------------------------------------------------------------------------------