├── README.md
├── ccsdt
    ├── ccsd-aspirin-n950-l3.yaml
    ├── ccsdt-benzene-n950-l3.yaml
    ├── ccsdt-ethanol-n950-l3.yaml
    ├── ccsdt-malonaldehyde-n950-l3.yaml
    └── ccsdt-toluene-n950-l3.yaml
├── fcu
    └── fcu.yaml
├── lipo
    └── lipo.yaml
├── lips
    ├── lips-n10.yaml
    ├── lips-n100.yaml
    ├── lips-n1000.yaml
    └── lips-n2500.yaml
├── md17
    ├── md17-aspirin-n150-l0-feature-controlled.yaml
    ├── md17-aspirin-n150-l0-weight-controlled.yaml
    ├── md17-aspirin-n150-l0.yaml
    ├── md17-aspirin-n150-l1.yaml
    ├── md17-aspirin-n150-l2.yaml
    ├── md17-aspirin-n150-l3.yaml
    ├── md17-aspirin-n350-l0-feature-controlled.yaml
    ├── md17-aspirin-n350-l0-rerun.yaml
    ├── md17-aspirin-n350-l0-weight-controlled.yaml
    ├── md17-aspirin-n350-l0.yaml
    ├── md17-aspirin-n350-l1.yaml
    ├── md17-aspirin-n350-l2.yaml
    ├── md17-aspirin-n350-l3.yaml
    ├── md17-aspirin-n50-l0-feature-controlled.yaml
    ├── md17-aspirin-n50-l0-weight-controlled.yaml
    ├── md17-aspirin-n50-l0.yaml
    ├── md17-aspirin-n50-l1-rerun.yaml
    ├── md17-aspirin-n50-l1.yaml
    ├── md17-aspirin-n50-l2-rerun.yaml
    ├── md17-aspirin-n50-l2.yaml
    ├── md17-aspirin-n50-l3.yaml
    ├── md17-aspirin-n550-l0-feature-controlled.yaml
    ├── md17-aspirin-n550-l0-weight-controlled-rerun.yaml
    ├── md17-aspirin-n550-l0-weight-controlled.yaml
    ├── md17-aspirin-n550-l0.yaml
    ├── md17-aspirin-n550-l1.yaml
    ├── md17-aspirin-n550-l2.yaml
    ├── md17-aspirin-n550-l3-rerun.yaml
    ├── md17-aspirin-n550-l3.yaml
    ├── md17-aspirin-n750-l0-feature-controlled.yaml
    ├── md17-aspirin-n750-l0-weight-controlled.yaml
    ├── md17-aspirin-n750-l0.yaml
    ├── md17-aspirin-n750-l1.yaml
    ├── md17-aspirin-n750-l2.yaml
    ├── md17-aspirin-n750-l3.yaml
    ├── md17-aspirin-n950-l0-feature-controlled.yaml
    ├── md17-aspirin-n950-l0-lr0.0001.yaml
    ├── md17-aspirin-n950-l0-lr0.00025.yaml
    ├── md17-aspirin-n950-l0-lr0.0005.yaml
    ├── md17-aspirin-n950-l0-lr0.00075.yaml
    ├── md17-aspirin-n950-l0-lr0.001.yaml
    ├── md17-aspirin-n950-l0-lr0.0025.yaml
    ├── md17-aspirin-n950-l0-lr0.005.yaml
    ├── md17-aspirin-n950-l0-lr0.0075.yaml
    ├── md17-aspirin-n950-l0-weight-controlled.yaml
    ├── md17-aspirin-n950-l0.yaml
    ├── md17-aspirin-n950-l1.yaml
    ├── md17-aspirin-n950-l2.yaml
    ├── md17-aspirin-n950-l3.yaml
    ├── md17-benzene-n950-l3.yaml
    ├── md17-benzene_old-n950-l3.yaml
    ├── md17-ethanol-n950-l3.yaml
    ├── md17-malonaldehyde-n950-l3.yaml
    ├── md17-naphthalene-n950-l3.yaml
    ├── md17-salicylic-n950-l3.yaml
    ├── md17-toluene-n950-l3.yaml
    └── md17-uracil-n950-l3.yaml
├── revmd17
    ├── revmd17-aspirin-n950-l0.yaml
    ├── revmd17-aspirin-n950-l1.yaml
    ├── revmd17-aspirin-n950-l2.yaml
    ├── revmd17-aspirin-n950-l3.yaml
    ├── revmd17-azobenzene-n950-l0.yaml
    ├── revmd17-azobenzene-n950-l1.yaml
    ├── revmd17-azobenzene-n950-l2.yaml
    ├── revmd17-azobenzene-n950-l3.yaml
    ├── revmd17-benzene-n950-l0.yaml
    ├── revmd17-benzene-n950-l1.yaml
    ├── revmd17-benzene-n950-l2.yaml
    ├── revmd17-benzene-n950-l3.yaml
    ├── revmd17-ethanol-n950-l0.yaml
    ├── revmd17-ethanol-n950-l1.yaml
    ├── revmd17-ethanol-n950-l2.yaml
    ├── revmd17-ethanol-n950-l3.yaml
    ├── revmd17-malonaldehyde-n950-l0.yaml
    ├── revmd17-malonaldehyde-n950-l1.yaml
    ├── revmd17-malonaldehyde-n950-l2.yaml
    ├── revmd17-malonaldehyde-n950-l3.yaml
    ├── revmd17-naphthalene-n950-l0.yaml
    ├── revmd17-naphthalene-n950-l1.yaml
    ├── revmd17-naphthalene-n950-l2.yaml
    ├── revmd17-naphthalene-n950-l3.yaml
    ├── revmd17-paracetamol-n950-l0.yaml
    ├── revmd17-paracetamol-n950-l1.yaml
    ├── revmd17-paracetamol-n950-l2.yaml
    ├── revmd17-paracetamol-n950-l3.yaml
    ├── revmd17-salicylic-n950-l0.yaml
    ├── revmd17-salicylic-n950-l1.yaml
    ├── revmd17-salicylic-n950-l2.yaml
    ├── revmd17-salicylic-n950-l3.yaml
    ├── revmd17-toluene-n950-l0.yaml
    ├── revmd17-toluene-n950-l1.yaml
    ├── revmd17-toluene-n950-l2.yaml
    ├── revmd17-toluene-n950-l3.yaml
    ├── revmd17-uracil-n950-l0.yaml
    ├── revmd17-uracil-n950-l1.yaml
    ├── revmd17-uracil-n950-l2.yaml
    └── revmd17-uracil-n950-l3.yaml
├── water-cheng-fixedtest
    ├── water-n10-l0.yaml
    ├── water-n10-l1.yaml
    ├── water-n10-l2.yaml
    ├── water-n10-l3.yaml
    ├── water-n100-l0.yaml
    ├── water-n100-l1.yaml
    ├── water-n100-l2.yaml
    ├── water-n100-l3.yaml
    ├── water-n1000-l0.yaml
    ├── water-n1000-l1.yaml
    ├── water-n1000-l2.yaml
    ├── water-n1000-l3-rerun.yaml
    ├── water-n1000-l3.yaml
    ├── water-n1303-l0.yaml
    ├── water-n1303-l1.yaml
    ├── water-n1303-l2.yaml
    ├── water-n1303-l3-rerun.yaml
    ├── water-n1303-l3.yaml
    ├── water-n25-l0.yaml
    ├── water-n25-l1.yaml
    ├── water-n25-l2.yaml
    ├── water-n25-l3.yaml
    ├── water-n250-l0.yaml
    ├── water-n250-l1.yaml
    ├── water-n250-l2.yaml
    ├── water-n250-l3.yaml
    ├── water-n50-l0.yaml
    ├── water-n50-l1.yaml
    ├── water-n50-l2.yaml
    ├── water-n50-l3.yaml
    ├── water-n500-l0.yaml
    ├── water-n500-l1.yaml
    ├── water-n500-l2.yaml
    ├── water-n500-l3.yaml
    ├── water-n750-l0.yaml
    ├── water-n750-l1.yaml
    ├── water-n750-l2.yaml
    └── water-n750-l3.yaml
└── water-deepmd
    ├── water-deepmd-e0-f1.yaml
    ├── water-deepmd-e1-f100.yaml
    └── water-deepmd-e1-f100000.yaml


/README.md:
--------------------------------------------------------------------------------
1 | ### NequIP Input Files
2 | 
3 | Input files for the NequIP code used in Batzner, S., Musaelian, A., Sun, L., Geiger, M., Mailoa, J. P., Kornbluth, M., ... &amp; Kozinsky, B. (2021). E(3)-equivariant graph neural networks for data-efficient and accurate interatomic potentials. arXiv preprint arXiv:2101.03164.
4 | 
5 | Please not that in order to reproduce the results from the paper you will have to use the specific NequIP version and git commit as specified in the paper. For an input file to get started on the latest version of the software, see `configs/example.yaml` in the [NequIP repo](https://github.com/mir-group/nequip)
6 | 


--------------------------------------------------------------------------------
/fcu/fcu.yaml:
--------------------------------------------------------------------------------
  1 | root: fcu-l2-root
  2 | run_name: fcu-l2-run_name
  3 | workdir: fcu-l2-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 5.0                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e + 32x2o + 32x2e                # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e + 1o + 2e                                                 # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 28                                                             # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/formate-on-cu/new_version/fcu-with-energies-subset0.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-fcu                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 2500                                                                       # number of training data
 64 | n_val: 250                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 2704.0
 86 |     - PerSpeciesMSELoss
 87 |   total_energy:
 88 |     - 1.
 89 | 
 90 | 
 91 | # output metrics
 92 | metrics_components:
 93 |   - - forces                               # key
 94 |     - rmse                                 # "rmse" or "mse"
 95 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 96 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 97 |   - - forces
 98 |     - mae
 99 |     - PerSpecies: True
100 |       report_per_component: False
101 |   - - total_energy
102 |     - mae
103 |   - - total_energy 
104 |     - rmse
105 | 
106 | # optimizer, may be any optimizer defined in torch.optim
107 | # the name `optimizer_name`is case sensitive
108 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
109 | optimizer_amsgrad: true
110 | optimizer_betas: !!python/tuple
111 |   - 0.9
112 |   - 0.999
113 | optimizer_eps: 1.0e-08
114 | optimizer_weight_decay: 0
115 | 
116 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
117 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
118 | lr_scheduler_name: ReduceLROnPlateau
119 | lr_scheduler_patience: 50
120 | lr_scheduler_factor: 0.8
121 | 
122 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
123 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
124 | 
125 | # whether to apply a shift and scale, defined per-species, to the atomic energies
126 | PerSpeciesScaleShift_enable: true
127 | 
128 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
129 | PerSpeciesScaleShift_trainable: true
130 | 
131 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
132 | PerSpeciesScaleShift_shifts: [0.0, 0.0, 0.0, 0.0]
133 | 
134 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
135 | PerSpeciesScaleShift_scales: [1.0, 1.0, 1.0, 1.0]
136 | 
137 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
138 | global_rescale_shift: dataset_energy_mean
139 | 
140 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
141 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
142 | global_rescale_scale: dataset_force_rms
143 | 
144 | # whether the shift of the final global energy rescaling should be trainable
145 | trainable_global_rescale_shift: false
146 | 
147 | # whether the scale of the final global energy rescaling should be trainable
148 | trainable_global_rescale_scale: false
149 | 


--------------------------------------------------------------------------------
/lipo/lipo.yaml:
--------------------------------------------------------------------------------
  1 | root: lipo-n1000-l2-root
  2 | run_name: lipo-n1000-l2-run_name
  3 | workdir: lipo-n1000-l2-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 5.0                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e + 32x2o + 32x2e                # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e + 1o + 2e                                                 # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 46                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/lipos/melt/melt_20180508_1312_e02677/lipo-melt.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-lipo                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 1000                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 43264.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/lips/lips-n10.yaml:
--------------------------------------------------------------------------------
  1 | root: lips-n10-l2-root
  2 | run_name: lips-n10-l2-run_name
  3 | workdir: lips-n10-l2-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 5.0                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e + 32x2o + 32x2e                # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e + 1o + 2e                                                 # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 25                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/lips/AIMD_LPS_small/lips-with-calculator-and-energy.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-lips                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 10                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 6889.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/lips/lips-n100.yaml:
--------------------------------------------------------------------------------
  1 | root: lips-n100-l2-root
  2 | run_name: lips-n100-l2-run_name
  3 | workdir: lips-n100-l2-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 5.0                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e + 32x2o + 32x2e                # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e + 1o + 2e                                                 # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 25                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/lips/AIMD_LPS_small/lips-with-calculator-and-energy.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-lips                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 100                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 6889.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/lips/lips-n1000.yaml:
--------------------------------------------------------------------------------
  1 | root: lips-n1000-l2-root
  2 | run_name: lips-n1000-l2-run_name
  3 | workdir: lips-n1000-l2-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 5.0                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e + 32x2o + 32x2e                # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e + 1o + 2e                                                 # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 25                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/lips/AIMD_LPS_small/lips-with-calculator-and-energy.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-lips                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 1000                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 6889.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/lips/lips-n2500.yaml:
--------------------------------------------------------------------------------
  1 | root: lips-n2500-l2-root
  2 | run_name: lips-n2500-l2-run_name
  3 | workdir: lips-n2500-l2-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 5.0                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e + 32x2o + 32x2e                # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e + 1o + 2e                                                 # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 25                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/lips/AIMD_LPS_small/lips-with-calculator-and-energy.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-lips                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 2500                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 6889.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/water-cheng-fixedtest/water-n10-l0.yaml:
--------------------------------------------------------------------------------
  1 | root: water-n10-l0-fixedtest-root
  2 | run_name: water-n10-l0-fixedtest-run_name
  3 | workdir: water-n10-l0-fixedtest-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 4.5                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0e              # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e                                                # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 34                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 10                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 36864.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/water-cheng-fixedtest/water-n10-l1.yaml:
--------------------------------------------------------------------------------
  1 | root: water-n10-l1-fixedtest-root
  2 | run_name: water-n10-l1-fixedtest-run_name
  3 | workdir: water-n10-l1-fixedtest-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 4.5                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e               # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e + 1o                                              # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 34                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 10                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 36864.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/water-cheng-fixedtest/water-n100-l0.yaml:
--------------------------------------------------------------------------------
  1 | root: water-n100-l0-fixedtest-root
  2 | run_name: water-n100-l0-fixedtest-run_name
  3 | workdir: water-n100-l0-fixedtest-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 4.5                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0e              # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e                                                # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 34                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 100                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 36864.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/water-cheng-fixedtest/water-n100-l1.yaml:
--------------------------------------------------------------------------------
  1 | root: water-n100-l1-fixedtest-root
  2 | run_name: water-n100-l1-fixedtest-run_name
  3 | workdir: water-n100-l1-fixedtest-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 4.5                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e               # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e + 1o                                              # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 34                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 100                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 36864.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/water-cheng-fixedtest/water-n1000-l0.yaml:
--------------------------------------------------------------------------------
  1 | root: water-n1000-l0-fixedtest-root
  2 | run_name: water-n1000-l0-fixedtest-run_name
  3 | workdir: water-n1000-l0-fixedtest-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 4.5                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0e              # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e                                                # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 34                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 1000                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 36864.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/water-cheng-fixedtest/water-n1303-l0.yaml:
--------------------------------------------------------------------------------
  1 | root: water-n1303-l0-fixedtest-root
  2 | run_name: water-n1303-l0-fixedtest-run_name
  3 | workdir: water-n1303-l0-fixedtest-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 4.5                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0e              # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e                                                # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 34                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 1303                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 36864.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/water-cheng-fixedtest/water-n25-l0.yaml:
--------------------------------------------------------------------------------
  1 | root: water-n25-l0-fixedtest-root
  2 | run_name: water-n25-l0-fixedtest-run_name
  3 | workdir: water-n25-l0-fixedtest-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 4.5                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0e              # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e                                                # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 34                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 25                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 36864.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/water-cheng-fixedtest/water-n25-l1.yaml:
--------------------------------------------------------------------------------
  1 | root: water-n25-l1-fixedtest-root
  2 | run_name: water-n25-l1-fixedtest-run_name
  3 | workdir: water-n25-l1-fixedtest-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 4.5                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e               # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e + 1o                                              # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 34                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 25                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 36864.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/water-cheng-fixedtest/water-n250-l0.yaml:
--------------------------------------------------------------------------------
  1 | root: water-n250-l0-fixedtest-root
  2 | run_name: water-n250-l0-fixedtest-run_name
  3 | workdir: water-n250-l0-fixedtest-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 4.5                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0e              # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e                                                # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 34                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 250                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 36864.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/water-cheng-fixedtest/water-n250-l1.yaml:
--------------------------------------------------------------------------------
  1 | root: water-n250-l1-fixedtest-root
  2 | run_name: water-n250-l1-fixedtest-run_name
  3 | workdir: water-n250-l1-fixedtest-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 4.5                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e               # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e + 1o                                              # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 34                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 250                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 36864.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/water-cheng-fixedtest/water-n50-l0.yaml:
--------------------------------------------------------------------------------
  1 | root: water-n50-l0-fixedtest-root
  2 | run_name: water-n50-l0-fixedtest-run_name
  3 | workdir: water-n50-l0-fixedtest-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 4.5                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0e              # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e                                                # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 34                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 50                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 36864.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/water-cheng-fixedtest/water-n50-l1.yaml:
--------------------------------------------------------------------------------
  1 | root: water-n50-l1-fixedtest-root
  2 | run_name: water-n50-l1-fixedtest-run_name
  3 | workdir: water-n50-l1-fixedtest-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 4.5                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e               # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e + 1o                                              # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 34                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 50                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 36864.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/water-cheng-fixedtest/water-n500-l0.yaml:
--------------------------------------------------------------------------------
  1 | root: water-n500-l0-fixedtest-root
  2 | run_name: water-n500-l0-fixedtest-run_name
  3 | workdir: water-n500-l0-fixedtest-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 4.5                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0e              # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e                                                # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 34                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 500                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 36864.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/water-cheng-fixedtest/water-n500-l1.yaml:
--------------------------------------------------------------------------------
  1 | root: water-n500-l1-fixedtest-root
  2 | run_name: water-n500-l1-fixedtest-run_name
  3 | workdir: water-n500-l1-fixedtest-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 4.5                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e               # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e + 1o                                              # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 34                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 500                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 36864.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/water-cheng-fixedtest/water-n750-l0.yaml:
--------------------------------------------------------------------------------
  1 | root: water-n750-l0-fixedtest-root
  2 | run_name: water-n750-l0-fixedtest-run_name
  3 | workdir: water-n750-l0-fixedtest-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 4.5                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0e              # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e                                                # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 34                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/water-cheng/ab-initio-thermodynamics-of-water/training-set/water-cheng-1403-train.xyz
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-water-cheng-fixedtest                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 750                                                                       # number of training data
 64 | n_val: 100                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 36864.
 86 |   total_energy:
 87 |     - 1.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [0.0, 0.0]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.0, 1.0]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: dataset_energy_mean
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: dataset_force_rms
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------
/water-deepmd/water-deepmd-e0-f1.yaml:
--------------------------------------------------------------------------------
  1 | root: water-deepmd-e0-f1-root
  2 | run_name: water-deepmd-e0-f1-run_name
  3 | workdir: water-deepmd-e0-f1-workdir
  4 | 
  5 | requeue: true
  6 | seed: 0                                                                           # random number seed for numpy and torch
  7 | append: true                                                                      # set True if a restarted run should append to the previous log file
  8 | default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
  9 | allow_tf32: false                                                                  # whether to use TensorFloat32 if it is available
 10 | 
 11 | # network
 12 | r_max: 6.0                                                                        # cutoff radius in length units
 13 | 
 14 | num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
 15 | chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
 16 | feature_irreps_hidden: 32x0o + 32x0e + 32x1o + 32x1e + 32x2o + 32x2e               # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
 17 | irreps_edge_sh: 0e + 1o + 2e                                                 # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
 18 | conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block
 19 | 
 20 | nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended
 21 | 
 22 | nonlinearity_scalars:
 23 |   e: silu
 24 |   o: tanh
 25 | nonlinearity_gates:
 26 |   e: silu
 27 |   o: tanh
 28 | 
 29 | resnet: false                                                                     # set true to make interaction block a resnet-style update
 30 | num_basis: 8                                                                      # number of basis functions used in the radial basis
 31 | BesselBasis_trainable: true                                                       # set true to train the bessel weights
 32 | PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function
 33 | 
 34 | # radial network
 35 | invariant_layers: 3                                                               # number of radial layers, we found it important to keep this small, 1 or 2
 36 | invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
 37 | avg_num_neighbors: 90                                                            # number of neighbors to divide by, None => no normalization.
 38 | use_sc: true                                                                      # use self-connection or not, usually gives big improvement
 39 | compile_model: false                                                              # whether to compile the constructed model to TorchScript
 40 | 
 41 | 
 42 | # data set
 43 | # the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
 44 | # key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
 45 | # all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
 46 | dataset: ase                                                            # type of data set, can be npz or ase
 47 | dataset_file_name: path-to-nequip-data/water-deepmd/water-pbe0ts/deepmd-water-183-preselected.xyz 
 48 | ase_args:   
 49 |   format: extxyz
 50 | 
 51 | # logging
 52 | wandb: true                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
 53 | wandb_project: nequip-paper-results-water-deepmd                                                             # project name used in wandb
 54 | wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
 55 |                                                                                    # if false, a new wandb run will be generated
 56 | verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
 57 | log_batch_freq: 1000000                                                                  # batch frequency, how often to print training errors withinin the same epoch
 58 | log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
 59 | save_checkpoint_freq: -1                                                            # frequency to save the intermediate checkpoint. no saving when the value is not positive.
 60 | save_ema_checkpoint_freq: -1                                                        # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
 61 | 
 62 | # training
 63 | n_train: 133                                                                       # number of training data
 64 | n_val: 50                                                                          # number of validation data
 65 | learning_rate: 0.005                                                                # learning rate, we found values between 0.01 and 0.005 to work best 
 66 | batch_size: 1                                                                       # batch size, we found it important to keep this small for most applications (1-5)
 67 | max_epochs: 1000000                                                                # stop training after _ number of epochs
 68 | train_val_split: random                                                            # can be random or sequential
 69 | shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
 70 | metrics_key: loss    # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
 71 | use_ema: true        # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
 72 | ema_decay: 0.99                                                                    # ema weight, commonly set to 0.999
 73 | ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
 74 | 
 75 | # early stopping based on metrics values. 
 76 | # LR, wall and any keys printed in the log file can be used. 
 77 | # The key can start with Training or Validation. If not defined, the validation value will be used.
 78 | early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
 79 |   Validation_loss: 1000                                                              # 
 80 | early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
 81 |   LR: 1.0e-6                                                                        # 
 82 | 
 83 | loss_coeffs:
 84 |   forces:
 85 |     - 1.
 86 |   total_energy:
 87 |     - 0.
 88 | 
 89 | 
 90 | # output metrics
 91 | metrics_components:
 92 |   - - forces                               # key
 93 |     - rmse                                 # "rmse" or "mse"
 94 |     - PerSpecies: True                     # if true, per species contribution is counted separately
 95 |       report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
 96 |   - - forces
 97 |     - mae
 98 |     - PerSpecies: True
 99 |       report_per_component: False
100 |   - - total_energy
101 |     - mae
102 |   - - total_energy 
103 |     - rmse
104 | 
105 | # optimizer, may be any optimizer defined in torch.optim
106 | # the name `optimizer_name`is case sensitive
107 | optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
108 | optimizer_amsgrad: true
109 | optimizer_betas: !!python/tuple
110 |   - 0.9
111 |   - 0.999
112 | optimizer_eps: 1.0e-08
113 | optimizer_weight_decay: 0
114 | 
115 | # lr scheduler, currently only supports the two options listed below, if you need more please file an issue
116 | # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
117 | lr_scheduler_name: ReduceLROnPlateau
118 | lr_scheduler_patience: 50
119 | lr_scheduler_factor: 0.8
120 | 
121 | # the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
122 | # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
123 | 
124 | # whether to apply a shift and scale, defined per-species, to the atomic energies
125 | PerSpeciesScaleShift_enable: true
126 | 
127 | # if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
128 | PerSpeciesScaleShift_trainable: true
129 | 
130 | # optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
131 | PerSpeciesScaleShift_shifts: [-155.91459812556295, -155.91459812556295]
132 | 
133 | # optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
134 | PerSpeciesScaleShift_scales: [1.966191139611105, 1.966191139611105]
135 | 
136 | # global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
137 | global_rescale_shift: null
138 | 
139 | # global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
140 | # If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
141 | global_rescale_scale: null
142 | 
143 | # whether the shift of the final global energy rescaling should be trainable
144 | trainable_global_rescale_shift: false
145 | 
146 | # whether the scale of the final global energy rescaling should be trainable
147 | trainable_global_rescale_scale: false
148 | 


--------------------------------------------------------------------------------