├── README.md ├── ccsdt ├── ccsd-aspirin-n950-l3.yaml ├── ccsdt-benzene-n950-l3.yaml ├── ccsdt-ethanol-n950-l3.yaml ├── ccsdt-malonaldehyde-n950-l3.yaml └── ccsdt-toluene-n950-l3.yaml ├── fcu └── fcu.yaml ├── lipo └── lipo.yaml ├── lips ├── lips-n10.yaml ├── lips-n100.yaml ├── lips-n1000.yaml └── lips-n2500.yaml ├── md17 ├── md17-aspirin-n150-l0-feature-controlled.yaml ├── md17-aspirin-n150-l0-weight-controlled.yaml ├── md17-aspirin-n150-l0.yaml ├── md17-aspirin-n150-l1.yaml ├── md17-aspirin-n150-l2.yaml ├── md17-aspirin-n150-l3.yaml ├── md17-aspirin-n350-l0-feature-controlled.yaml ├── md17-aspirin-n350-l0-rerun.yaml ├── md17-aspirin-n350-l0-weight-controlled.yaml ├── md17-aspirin-n350-l0.yaml ├── md17-aspirin-n350-l1.yaml ├── md17-aspirin-n350-l2.yaml ├── md17-aspirin-n350-l3.yaml ├── md17-aspirin-n50-l0-feature-controlled.yaml ├── md17-aspirin-n50-l0-weight-controlled.yaml ├── md17-aspirin-n50-l0.yaml ├── md17-aspirin-n50-l1-rerun.yaml ├── md17-aspirin-n50-l1.yaml ├── md17-aspirin-n50-l2-rerun.yaml ├── md17-aspirin-n50-l2.yaml ├── md17-aspirin-n50-l3.yaml ├── md17-aspirin-n550-l0-feature-controlled.yaml ├── md17-aspirin-n550-l0-weight-controlled-rerun.yaml ├── md17-aspirin-n550-l0-weight-controlled.yaml ├── md17-aspirin-n550-l0.yaml ├── md17-aspirin-n550-l1.yaml ├── md17-aspirin-n550-l2.yaml ├── md17-aspirin-n550-l3-rerun.yaml ├── md17-aspirin-n550-l3.yaml ├── md17-aspirin-n750-l0-feature-controlled.yaml ├── md17-aspirin-n750-l0-weight-controlled.yaml ├── md17-aspirin-n750-l0.yaml ├── md17-aspirin-n750-l1.yaml ├── md17-aspirin-n750-l2.yaml ├── md17-aspirin-n750-l3.yaml ├── md17-aspirin-n950-l0-feature-controlled.yaml ├── md17-aspirin-n950-l0-lr0.0001.yaml ├── md17-aspirin-n950-l0-lr0.00025.yaml ├── md17-aspirin-n950-l0-lr0.0005.yaml ├── md17-aspirin-n950-l0-lr0.00075.yaml ├── md17-aspirin-n950-l0-lr0.001.yaml ├── md17-aspirin-n950-l0-lr0.0025.yaml ├── md17-aspirin-n950-l0-lr0.005.yaml ├── md17-aspirin-n950-l0-lr0.0075.yaml ├── md17-aspirin-n950-l0-weight-controlled.yaml ├── md17-aspirin-n950-l0.yaml ├── md17-aspirin-n950-l1.yaml ├── md17-aspirin-n950-l2.yaml ├── md17-aspirin-n950-l3.yaml ├── md17-benzene-n950-l3.yaml ├── md17-benzene_old-n950-l3.yaml ├── md17-ethanol-n950-l3.yaml ├── md17-malonaldehyde-n950-l3.yaml ├── md17-naphthalene-n950-l3.yaml ├── md17-salicylic-n950-l3.yaml ├── md17-toluene-n950-l3.yaml └── md17-uracil-n950-l3.yaml ├── revmd17 ├── revmd17-aspirin-n950-l0.yaml ├── revmd17-aspirin-n950-l1.yaml ├── revmd17-aspirin-n950-l2.yaml ├── revmd17-aspirin-n950-l3.yaml ├── revmd17-azobenzene-n950-l0.yaml ├── revmd17-azobenzene-n950-l1.yaml ├── revmd17-azobenzene-n950-l2.yaml ├── revmd17-azobenzene-n950-l3.yaml ├── revmd17-benzene-n950-l0.yaml ├── revmd17-benzene-n950-l1.yaml ├── revmd17-benzene-n950-l2.yaml ├── revmd17-benzene-n950-l3.yaml ├── revmd17-ethanol-n950-l0.yaml ├── revmd17-ethanol-n950-l1.yaml ├── revmd17-ethanol-n950-l2.yaml ├── revmd17-ethanol-n950-l3.yaml ├── revmd17-malonaldehyde-n950-l0.yaml ├── revmd17-malonaldehyde-n950-l1.yaml ├── revmd17-malonaldehyde-n950-l2.yaml ├── revmd17-malonaldehyde-n950-l3.yaml ├── revmd17-naphthalene-n950-l0.yaml ├── revmd17-naphthalene-n950-l1.yaml ├── revmd17-naphthalene-n950-l2.yaml ├── revmd17-naphthalene-n950-l3.yaml ├── revmd17-paracetamol-n950-l0.yaml ├── revmd17-paracetamol-n950-l1.yaml ├── revmd17-paracetamol-n950-l2.yaml ├── revmd17-paracetamol-n950-l3.yaml ├── revmd17-salicylic-n950-l0.yaml ├── revmd17-salicylic-n950-l1.yaml ├── revmd17-salicylic-n950-l2.yaml ├── revmd17-salicylic-n950-l3.yaml ├── revmd17-toluene-n950-l0.yaml ├── revmd17-toluene-n950-l1.yaml ├── revmd17-toluene-n950-l2.yaml ├── revmd17-toluene-n950-l3.yaml ├── revmd17-uracil-n950-l0.yaml ├── revmd17-uracil-n950-l1.yaml ├── revmd17-uracil-n950-l2.yaml └── revmd17-uracil-n950-l3.yaml ├── water-cheng-fixedtest ├── water-n10-l0.yaml ├── water-n10-l1.yaml ├── water-n10-l2.yaml ├── water-n10-l3.yaml ├── water-n100-l0.yaml ├── water-n100-l1.yaml ├── water-n100-l2.yaml ├── water-n100-l3.yaml ├── water-n1000-l0.yaml ├── water-n1000-l1.yaml ├── water-n1000-l2.yaml ├── water-n1000-l3-rerun.yaml ├── water-n1000-l3.yaml ├── water-n1303-l0.yaml ├── water-n1303-l1.yaml ├── water-n1303-l2.yaml ├── water-n1303-l3-rerun.yaml ├── water-n1303-l3.yaml ├── water-n25-l0.yaml ├── water-n25-l1.yaml ├── water-n25-l2.yaml ├── water-n25-l3.yaml ├── water-n250-l0.yaml ├── water-n250-l1.yaml ├── water-n250-l2.yaml ├── water-n250-l3.yaml ├── water-n50-l0.yaml ├── water-n50-l1.yaml ├── water-n50-l2.yaml ├── water-n50-l3.yaml ├── water-n500-l0.yaml ├── water-n500-l1.yaml ├── water-n500-l2.yaml ├── water-n500-l3.yaml ├── water-n750-l0.yaml ├── water-n750-l1.yaml ├── water-n750-l2.yaml └── water-n750-l3.yaml └── water-deepmd ├── water-deepmd-e0-f1.yaml ├── water-deepmd-e1-f100.yaml └── water-deepmd-e1-f100000.yaml /README.md: -------------------------------------------------------------------------------- 1 | ### NequIP Input Files 2 | 3 | Input files for the NequIP code used in Batzner, S., Musaelian, A., Sun, L., Geiger, M., Mailoa, J. P., Kornbluth, M., ... & Kozinsky, B. (2021). E(3)-equivariant graph neural networks for data-efficient and accurate interatomic potentials. arXiv preprint arXiv:2101.03164. 4 | 5 | Please not that in order to reproduce the results from the paper you will have to use the specific NequIP version and git commit as specified in the paper. For an input file to get started on the latest version of the software, see `configs/example.yaml` in the [NequIP repo](https://github.com/mir-group/nequip) 6 | -------------------------------------------------------------------------------- /fcu/fcu.yaml: -------------------------------------------------------------------------------- 1 | root: fcu-l2-root 2 | run_name: fcu-l2-run_name 3 | workdir: fcu-l2-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 5.0 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e + 32x2o + 32x2e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e + 1o + 2e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 28 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/formate-on-cu/new_version/fcu-with-energies-subset0.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-fcu # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 2500 # number of training data 64 | n_val: 250 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 2704.0 86 | - PerSpeciesMSELoss 87 | total_energy: 88 | - 1. 89 | 90 | 91 | # output metrics 92 | metrics_components: 93 | - - forces # key 94 | - rmse # "rmse" or "mse" 95 | - PerSpecies: True # if true, per species contribution is counted separately 96 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 97 | - - forces 98 | - mae 99 | - PerSpecies: True 100 | report_per_component: False 101 | - - total_energy 102 | - mae 103 | - - total_energy 104 | - rmse 105 | 106 | # optimizer, may be any optimizer defined in torch.optim 107 | # the name `optimizer_name`is case sensitive 108 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 109 | optimizer_amsgrad: true 110 | optimizer_betas: !!python/tuple 111 | - 0.9 112 | - 0.999 113 | optimizer_eps: 1.0e-08 114 | optimizer_weight_decay: 0 115 | 116 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 117 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 118 | lr_scheduler_name: ReduceLROnPlateau 119 | lr_scheduler_patience: 50 120 | lr_scheduler_factor: 0.8 121 | 122 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 123 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 124 | 125 | # whether to apply a shift and scale, defined per-species, to the atomic energies 126 | PerSpeciesScaleShift_enable: true 127 | 128 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 129 | PerSpeciesScaleShift_trainable: true 130 | 131 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 132 | PerSpeciesScaleShift_shifts: [0.0, 0.0, 0.0, 0.0] 133 | 134 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 135 | PerSpeciesScaleShift_scales: [1.0, 1.0, 1.0, 1.0] 136 | 137 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 138 | global_rescale_shift: dataset_energy_mean 139 | 140 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 141 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 142 | global_rescale_scale: dataset_force_rms 143 | 144 | # whether the shift of the final global energy rescaling should be trainable 145 | trainable_global_rescale_shift: false 146 | 147 | # whether the scale of the final global energy rescaling should be trainable 148 | trainable_global_rescale_scale: false 149 | -------------------------------------------------------------------------------- /lipo/lipo.yaml: -------------------------------------------------------------------------------- 1 | root: lipo-n1000-l2-root 2 | run_name: lipo-n1000-l2-run_name 3 | workdir: lipo-n1000-l2-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 5.0 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e + 32x2o + 32x2e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e + 1o + 2e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 46 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/lipos/melt/melt_20180508_1312_e02677/lipo-melt.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-lipo # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 1000 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 43264. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /lips/lips-n10.yaml: -------------------------------------------------------------------------------- 1 | root: lips-n10-l2-root 2 | run_name: lips-n10-l2-run_name 3 | workdir: lips-n10-l2-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 5.0 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e + 32x2o + 32x2e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e + 1o + 2e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 25 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/lips/AIMD_LPS_small/lips-with-calculator-and-energy.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-lips # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 10 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 6889. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /lips/lips-n100.yaml: -------------------------------------------------------------------------------- 1 | root: lips-n100-l2-root 2 | run_name: lips-n100-l2-run_name 3 | workdir: lips-n100-l2-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 5.0 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e + 32x2o + 32x2e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e + 1o + 2e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 25 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/lips/AIMD_LPS_small/lips-with-calculator-and-energy.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-lips # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 100 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 6889. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /lips/lips-n1000.yaml: -------------------------------------------------------------------------------- 1 | root: lips-n1000-l2-root 2 | run_name: lips-n1000-l2-run_name 3 | workdir: lips-n1000-l2-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 5.0 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e + 32x2o + 32x2e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e + 1o + 2e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 25 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/lips/AIMD_LPS_small/lips-with-calculator-and-energy.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-lips # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 1000 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 6889. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /lips/lips-n2500.yaml: -------------------------------------------------------------------------------- 1 | root: lips-n2500-l2-root 2 | run_name: lips-n2500-l2-run_name 3 | workdir: lips-n2500-l2-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 5.0 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e + 32x2o + 32x2e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e + 1o + 2e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 25 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/lips/AIMD_LPS_small/lips-with-calculator-and-energy.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-lips # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 2500 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 6889. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /water-cheng-fixedtest/water-n10-l0.yaml: -------------------------------------------------------------------------------- 1 | root: water-n10-l0-fixedtest-root 2 | run_name: water-n10-l0-fixedtest-run_name 3 | workdir: water-n10-l0-fixedtest-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 4.5 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 34 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 10 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 36864. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /water-cheng-fixedtest/water-n10-l1.yaml: -------------------------------------------------------------------------------- 1 | root: water-n10-l1-fixedtest-root 2 | run_name: water-n10-l1-fixedtest-run_name 3 | workdir: water-n10-l1-fixedtest-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 4.5 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e + 1o # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 34 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 10 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 36864. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /water-cheng-fixedtest/water-n100-l0.yaml: -------------------------------------------------------------------------------- 1 | root: water-n100-l0-fixedtest-root 2 | run_name: water-n100-l0-fixedtest-run_name 3 | workdir: water-n100-l0-fixedtest-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 4.5 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 34 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 100 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 36864. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /water-cheng-fixedtest/water-n100-l1.yaml: -------------------------------------------------------------------------------- 1 | root: water-n100-l1-fixedtest-root 2 | run_name: water-n100-l1-fixedtest-run_name 3 | workdir: water-n100-l1-fixedtest-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 4.5 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e + 1o # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 34 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 100 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 36864. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /water-cheng-fixedtest/water-n1000-l0.yaml: -------------------------------------------------------------------------------- 1 | root: water-n1000-l0-fixedtest-root 2 | run_name: water-n1000-l0-fixedtest-run_name 3 | workdir: water-n1000-l0-fixedtest-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 4.5 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 34 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 1000 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 36864. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /water-cheng-fixedtest/water-n1303-l0.yaml: -------------------------------------------------------------------------------- 1 | root: water-n1303-l0-fixedtest-root 2 | run_name: water-n1303-l0-fixedtest-run_name 3 | workdir: water-n1303-l0-fixedtest-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 4.5 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 34 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 1303 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 36864. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /water-cheng-fixedtest/water-n25-l0.yaml: -------------------------------------------------------------------------------- 1 | root: water-n25-l0-fixedtest-root 2 | run_name: water-n25-l0-fixedtest-run_name 3 | workdir: water-n25-l0-fixedtest-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 4.5 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 34 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 25 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 36864. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /water-cheng-fixedtest/water-n25-l1.yaml: -------------------------------------------------------------------------------- 1 | root: water-n25-l1-fixedtest-root 2 | run_name: water-n25-l1-fixedtest-run_name 3 | workdir: water-n25-l1-fixedtest-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 4.5 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e + 1o # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 34 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 25 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 36864. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /water-cheng-fixedtest/water-n250-l0.yaml: -------------------------------------------------------------------------------- 1 | root: water-n250-l0-fixedtest-root 2 | run_name: water-n250-l0-fixedtest-run_name 3 | workdir: water-n250-l0-fixedtest-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 4.5 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 34 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 250 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 36864. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /water-cheng-fixedtest/water-n250-l1.yaml: -------------------------------------------------------------------------------- 1 | root: water-n250-l1-fixedtest-root 2 | run_name: water-n250-l1-fixedtest-run_name 3 | workdir: water-n250-l1-fixedtest-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 4.5 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e + 1o # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 34 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 250 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 36864. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /water-cheng-fixedtest/water-n50-l0.yaml: -------------------------------------------------------------------------------- 1 | root: water-n50-l0-fixedtest-root 2 | run_name: water-n50-l0-fixedtest-run_name 3 | workdir: water-n50-l0-fixedtest-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 4.5 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 34 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 50 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 36864. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /water-cheng-fixedtest/water-n50-l1.yaml: -------------------------------------------------------------------------------- 1 | root: water-n50-l1-fixedtest-root 2 | run_name: water-n50-l1-fixedtest-run_name 3 | workdir: water-n50-l1-fixedtest-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 4.5 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e + 1o # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 34 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 50 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 36864. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /water-cheng-fixedtest/water-n500-l0.yaml: -------------------------------------------------------------------------------- 1 | root: water-n500-l0-fixedtest-root 2 | run_name: water-n500-l0-fixedtest-run_name 3 | workdir: water-n500-l0-fixedtest-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 4.5 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 34 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 500 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 36864. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /water-cheng-fixedtest/water-n500-l1.yaml: -------------------------------------------------------------------------------- 1 | root: water-n500-l1-fixedtest-root 2 | run_name: water-n500-l1-fixedtest-run_name 3 | workdir: water-n500-l1-fixedtest-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 4.5 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e + 1o # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 34 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 500 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 36864. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /water-cheng-fixedtest/water-n750-l0.yaml: -------------------------------------------------------------------------------- 1 | root: water-n750-l0-fixedtest-root 2 | run_name: water-n750-l0-fixedtest-run_name 3 | workdir: water-n750-l0-fixedtest-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 4.5 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 34 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 750 # number of training data 64 | n_val: 100 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 36864. 86 | total_energy: 87 | - 1. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [0.0, 0.0] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.0, 1.0] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: dataset_energy_mean 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: dataset_force_rms 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | -------------------------------------------------------------------------------- /water-deepmd/water-deepmd-e0-f1.yaml: -------------------------------------------------------------------------------- 1 | root: water-deepmd-e0-f1-root 2 | run_name: water-deepmd-e0-f1-run_name 3 | workdir: water-deepmd-e0-f1-workdir 4 | 5 | requeue: true 6 | seed: 0 # random number seed for numpy and torch 7 | append: true # set True if a restarted run should append to the previous log file 8 | default_dtype: float32 # type of float to use, e.g. float32 and float64 9 | allow_tf32: false # whether to use TensorFloat32 if it is available 10 | 11 | # network 12 | r_max: 6.0 # cutoff radius in length units 13 | 14 | num_layers: 6 # number of interaction blocks, we found 5-6 to work best 15 | chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e + 32x2o + 32x2e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities 17 | irreps_edge_sh: 0e + 1o + 2e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer 18 | conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block 19 | 20 | nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended 21 | 22 | nonlinearity_scalars: 23 | e: silu 24 | o: tanh 25 | nonlinearity_gates: 26 | e: silu 27 | o: tanh 28 | 29 | resnet: false # set true to make interaction block a resnet-style update 30 | num_basis: 8 # number of basis functions used in the radial basis 31 | BesselBasis_trainable: true # set true to train the bessel weights 32 | PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function 33 | 34 | # radial network 35 | invariant_layers: 3 # number of radial layers, we found it important to keep this small, 1 or 2 36 | invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster 37 | avg_num_neighbors: 90 # number of neighbors to divide by, None => no normalization. 38 | use_sc: true # use self-connection or not, usually gives big improvement 39 | compile_model: false # whether to compile the constructed model to TorchScript 40 | 41 | 42 | # data set 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py) 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields 46 | dataset: ase # type of data set, can be npz or ase 47 | dataset_file_name: path-to-nequip-data/water-deepmd/water-pbe0ts/deepmd-water-183-preselected.xyz 48 | ase_args: 49 | format: extxyz 50 | 51 | # logging 52 | wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional 53 | wandb_project: nequip-paper-results-water-deepmd # project name used in wandb 54 | wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. 55 | # if false, a new wandb run will be generated 56 | verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive 57 | log_batch_freq: 1000000 # batch frequency, how often to print training errors withinin the same epoch 58 | log_epoch_freq: 1 # epoch frequency, how often to print and save the model 59 | save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive. 60 | save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive. 61 | 62 | # training 63 | n_train: 133 # number of training data 64 | n_val: 50 # number of validation data 65 | learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best 66 | batch_size: 1 # batch size, we found it important to keep this small for most applications (1-5) 67 | max_epochs: 1000000 # stop training after _ number of epochs 68 | train_val_split: random # can be random or sequential 69 | shuffle: true # If true, the data loader will shuffle the data, usually a good idea 70 | metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse 71 | use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors 72 | ema_decay: 0.99 # ema weight, commonly set to 0.999 73 | ema_use_num_updates: true # whether to use number of updates when computing averages 74 | 75 | # early stopping based on metrics values. 76 | # LR, wall and any keys printed in the log file can be used. 77 | # The key can start with Training or Validation. If not defined, the validation value will be used. 78 | early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs 79 | Validation_loss: 1000 # 80 | early_stopping_lower_bounds: # stop early if a metric value is lower than the bound 81 | LR: 1.0e-6 # 82 | 83 | loss_coeffs: 84 | forces: 85 | - 1. 86 | total_energy: 87 | - 0. 88 | 89 | 90 | # output metrics 91 | metrics_components: 92 | - - forces # key 93 | - rmse # "rmse" or "mse" 94 | - PerSpecies: True # if true, per species contribution is counted separately 95 | report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately 96 | - - forces 97 | - mae 98 | - PerSpecies: True 99 | report_per_component: False 100 | - - total_energy 101 | - mae 102 | - - total_energy 103 | - rmse 104 | 105 | # optimizer, may be any optimizer defined in torch.optim 106 | # the name `optimizer_name`is case sensitive 107 | optimizer_name: Adam # default optimizer is Adam in the amsgrad mode 108 | optimizer_amsgrad: true 109 | optimizer_betas: !!python/tuple 110 | - 0.9 111 | - 0.999 112 | optimizer_eps: 1.0e-08 113 | optimizer_weight_decay: 0 114 | 115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue 116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch 117 | lr_scheduler_name: ReduceLROnPlateau 118 | lr_scheduler_patience: 50 119 | lr_scheduler_factor: 0.8 120 | 121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean 122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom 123 | 124 | # whether to apply a shift and scale, defined per-species, to the atomic energies 125 | PerSpeciesScaleShift_enable: true 126 | 127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable 128 | PerSpeciesScaleShift_trainable: true 129 | 130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros. 131 | PerSpeciesScaleShift_shifts: [-155.91459812556295, -155.91459812556295] 132 | 133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones. 134 | PerSpeciesScaleShift_scales: [1.966191139611105, 1.966191139611105] 135 | 136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly. 137 | global_rescale_shift: null 138 | 139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly. 140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained. 141 | global_rescale_scale: null 142 | 143 | # whether the shift of the final global energy rescaling should be trainable 144 | trainable_global_rescale_shift: false 145 | 146 | # whether the scale of the final global energy rescaling should be trainable 147 | trainable_global_rescale_scale: false 148 | --------------------------------------------------------------------------------