├── neural_network ├── grimme_d3 │ ├── tables │ │ ├── c6ab.npy │ │ ├── r0ab.npy │ │ ├── r2r4.npy │ │ └── rcov.npy │ └── grimme_d3.py ├── layers │ ├── NeuronLayer.py │ ├── OutputBlock.py │ ├── InteractionBlock.py │ ├── ResidualLayer.py │ ├── RBFLayer.py │ ├── DenseLayer.py │ ├── util.py │ └── InteractionLayer.py ├── activation_fn.py └── NeuralNetwork.py ├── config.txt ├── LICENSE ├── README.md ├── training ├── Trainer.py ├── DataQueue.py ├── DataContainer.py ├── DataProvider.py └── AMSGrad.py ├── NNCalculator.py └── train.py /neural_network/grimme_d3/tables/c6ab.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MMunibas/PhysNet/HEAD/neural_network/grimme_d3/tables/c6ab.npy -------------------------------------------------------------------------------- /neural_network/grimme_d3/tables/r0ab.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MMunibas/PhysNet/HEAD/neural_network/grimme_d3/tables/r0ab.npy -------------------------------------------------------------------------------- /neural_network/grimme_d3/tables/r2r4.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MMunibas/PhysNet/HEAD/neural_network/grimme_d3/tables/r2r4.npy -------------------------------------------------------------------------------- /neural_network/grimme_d3/tables/rcov.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MMunibas/PhysNet/HEAD/neural_network/grimme_d3/tables/rcov.npy -------------------------------------------------------------------------------- /neural_network/layers/NeuronLayer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | ''' parent class for all neuron layers ''' 4 | class NeuronLayer: 5 | def __str__(self): 6 | return "[" + str(self.n_in) + "->" + str(self.n_out) + "]" 7 | 8 | def __init__(self, n_in, n_out, activation_fn=None): 9 | self._n_in = n_in #number of inputs 10 | self._n_out = n_out #number of outpus 11 | self._activation_fn = activation_fn #activation function 12 | 13 | @property 14 | def n_in(self): 15 | return self._n_in 16 | 17 | @property 18 | def n_out(self): 19 | return self._n_out 20 | 21 | @property 22 | def activation_fn(self): 23 | return self._activation_fn -------------------------------------------------------------------------------- /config.txt: -------------------------------------------------------------------------------- 1 | --num_features=128 2 | --num_basis=64 3 | --num_blocks=5 4 | --num_residual_atomic=2 5 | --num_residual_interaction=3 6 | --num_residual_output=1 7 | --cutoff=10.0 8 | --use_electrostatic=1 9 | --use_dispersion=1 10 | --grimme_s6=0.5 11 | --grimme_s8=0.2130 12 | --grimme_a1=0.0 13 | --grimme_a2=6.0519 14 | --dataset=sn2_reactions.npz 15 | --num_train=400000 16 | --num_valid=5000 17 | --seed=42 18 | --max_steps=10000000 19 | --learning_rate=0.001 20 | --max_norm=1000.0 21 | --ema_decay=0.999 22 | --keep_prob=1.0 23 | --l2lambda=0.0 24 | --nhlambda=0.01 25 | --decay_steps=10000000 26 | --decay_rate=0.1 27 | --batch_size=32 28 | --valid_batch_size=1000 29 | --force_weight=52.91772105638412 30 | --charge_weight=14.399645351950548 31 | --dipole_weight=27.211386024367243 32 | --summary_interval=10000 33 | --validation_interval=10000 34 | --save_interval=10000 35 | --record_run_metadata=0 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Meuwly Group 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PhysNet 2 | 3 | Tensorflow implementation of PhysNet (see https://arxiv.org/abs/1902.08408) for details 4 | 5 | ## Requirements 6 | 7 | To run this software, you need: 8 | 9 | - python3 (tested with version 3.6.3) 10 | - TensorFlow (tested with version 1.10.1) 11 | 12 | 13 | 14 | ## How to use 15 | 16 | Edit the config.txt file to specify hyperparameters, dataset location, training/validation set size etc. 17 | (see "train.py" for a list of all options) 18 | 19 | Then, simply run 20 | 21 | ``` 22 | python3 train.py 23 | ``` 24 | 25 | in a terminal to start training. 26 | 27 | The included "config.txt" assumes that the dataset "sn2_reactions.npz" is present. It can be downloaded from: https://zenodo.org/record/2605341. In order to use a different dataset, it needs to be formatted in the same way as this example ("sn2_reactions.npz"). Please refer to the README file of the dataset (available from https://zenodo.org/record/2605341) for details. 28 | 29 | 30 | ## How to cite 31 | 32 | If you find this software useful, please cite: 33 | 34 | ``` 35 | Unke, O. T. and Meuwly, M. "PhysNet: A Neural Network for Predicting Energies, Forces, Dipole Moments and Partial Charges" arxiv:1902.08408 (2019). 36 | ``` 37 | 38 | 39 | -------------------------------------------------------------------------------- /neural_network/layers/OutputBlock.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .NeuronLayer import * 3 | from .DenseLayer import * 4 | from .ResidualLayer import * 5 | 6 | class OutputBlock(NeuronLayer): 7 | def __str__(self): 8 | return "output"+super().__str__() 9 | 10 | def __init__(self, F, num_residual, activation_fn=None, seed=None, scope=None, keep_prob=1.0, dtype=tf.float32): 11 | super().__init__(F, 2, activation_fn) 12 | with tf.variable_scope(scope): 13 | self._residual_layer = [] 14 | for i in range(num_residual): 15 | self._residual_layer.append(ResidualLayer(F, F, activation_fn, seed=seed, scope="residual_layer"+str(i), keep_prob=keep_prob, dtype=dtype)) 16 | self._dense = DenseLayer(F, 2, W_init=tf.zeros([F, 2], dtype=dtype), use_bias=False, scope="dense_layer", dtype=dtype) 17 | 18 | @property 19 | def residual_layer(self): 20 | return self._residual_layer 21 | 22 | @property 23 | def dense(self): 24 | return self._dense 25 | 26 | def __call__(self, x): 27 | for i in range(len(self.residual_layer)): 28 | x = self.residual_layer[i](x) 29 | if self.activation_fn is not None: 30 | x = self.activation_fn(x) 31 | return self.dense(x) 32 | -------------------------------------------------------------------------------- /neural_network/layers/InteractionBlock.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .NeuronLayer import * 3 | from .InteractionLayer import * 4 | from .ResidualLayer import * 5 | 6 | class InteractionBlock(NeuronLayer): 7 | def __str__(self): 8 | return "interaction_block"+super().__str__() 9 | 10 | def __init__(self, K, F, num_residual_atomic, num_residual_interaction, activation_fn=None, seed=None, scope=None, keep_prob=1.0, dtype=tf.float32): 11 | super().__init__(K, F, activation_fn) 12 | with tf.variable_scope(scope): 13 | #interaction layer 14 | self._interaction = InteractionLayer(K, F, num_residual_interaction, activation_fn=activation_fn, seed=seed, scope="interaction_layer", keep_prob=keep_prob, dtype=dtype) 15 | 16 | #residual layers 17 | self._residual_layer = [] 18 | for i in range(num_residual_atomic): 19 | self._residual_layer.append(ResidualLayer(F, F, activation_fn, seed=seed, scope="residual_layer"+str(i), keep_prob=keep_prob, dtype=dtype)) 20 | 21 | @property 22 | def interaction(self): 23 | return self._interaction 24 | 25 | @property 26 | def residual_layer(self): 27 | return self._residual_layer 28 | 29 | def __call__(self, x, rbf, idx_i, idx_j): 30 | x = self.interaction(x, rbf, idx_i, idx_j) 31 | for i in range(len(self.residual_layer)): 32 | x = self.residual_layer[i](x) 33 | return x 34 | -------------------------------------------------------------------------------- /neural_network/layers/ResidualLayer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from .NeuronLayer import * 4 | from .DenseLayer import * 5 | 6 | class ResidualLayer(NeuronLayer): 7 | def __str__(self): 8 | return "residual_layer"+super().__str__() 9 | 10 | def __init__(self, n_in, n_out, activation_fn=None, W_init=None, b_init=None, use_bias=True, seed=None, scope=None, keep_prob=1.0, dtype=tf.float32): 11 | super().__init__(n_in, n_out, activation_fn) 12 | self._keep_prob = keep_prob 13 | with tf.variable_scope(scope): 14 | self._dense = DenseLayer(n_in, n_out, activation_fn=activation_fn, 15 | W_init=W_init, b_init=b_init, use_bias=use_bias, seed=seed, scope="dense", dtype=dtype) 16 | self._residual = DenseLayer(n_out, n_out, activation_fn=None, 17 | W_init=W_init, b_init=b_init, use_bias=use_bias, seed=seed, scope="residual", dtype=dtype) 18 | 19 | @property 20 | def keep_prob(self): 21 | return self._keep_prob 22 | 23 | @property 24 | def dense(self): 25 | return self._dense 26 | 27 | @property 28 | def residual(self): 29 | return self._residual 30 | 31 | def __call__(self, x): 32 | #pre-activation 33 | if self.activation_fn is not None: 34 | y = tf.nn.dropout(self.activation_fn(x), self.keep_prob) 35 | else: 36 | y = tf.nn.dropout(x, self.keep_prob) 37 | x += self.residual(self.dense(y)) 38 | return x 39 | -------------------------------------------------------------------------------- /neural_network/activation_fn.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | #google's swish function 5 | def swish(x): 6 | return x*tf.sigmoid(x) 7 | 8 | #First time softplus was used as activation function: "Incorporating Second-Order Functional Knowledge for Better Option Pricing" 9 | #(https://papers.nips.cc/paper/1920-incorporating-second-order-functional-knowledge-for-better-option-pricing.pdf) 10 | def _softplus(x): 11 | return tf.log1p(tf.exp(x)) 12 | 13 | def softplus(x): 14 | #this definition is for numerical stability for x larger than 15 (single precision) 15 | #or x larger than 34 (double precision), there is no numerical difference anymore 16 | #between the softplus and a linear function 17 | return tf.where(x < 15.0, _softplus(tf.where(x < 15.0, x, tf.zeros_like(x))), x) 18 | 19 | def shifted_softplus(x): 20 | #return softplus(x) - np.log(2.0) 21 | return tf.nn.softplus(x) - tf.log(2.0) 22 | 23 | #this ensures that the function is close to linear near the origin! 24 | def scaled_shifted_softplus(x): 25 | return 2*shifted_softplus(x) 26 | 27 | #is not really self-normalizing sadly... 28 | def self_normalizing_shifted_softplus(x): 29 | return 1.875596256135042*shifted_softplus(x) 30 | 31 | #general: log((exp(alpha)-1)*exp(x)+1)-alpha 32 | def smooth_ELU(x): 33 | return tf.log1p(1.718281828459045*tf.exp(x))-1.0 #(e-1) = 1.718281828459045 34 | 35 | def self_normalizing_smooth_ELU(x): 36 | return 1.574030675714671*smooth_ELU(x) 37 | 38 | def self_normalizing_asinh(x): 39 | return 1.256734802399369*tf.asinh(x) 40 | 41 | def self_normalizing_tanh(x): 42 | return 1.592537419722831*tf.tanh(x) 43 | -------------------------------------------------------------------------------- /neural_network/layers/RBFLayer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from .NeuronLayer import * 4 | from .DenseLayer import * 5 | 6 | #inverse softplus transformation 7 | def softplus_inverse(x): 8 | return x + np.log(-np.expm1(-x)) 9 | 10 | #radial basis function expansion 11 | class RBFLayer(NeuronLayer): 12 | def __str__(self): 13 | return "radial_basis_function_layer"+super().__str__() 14 | 15 | def __init__(self, K, cutoff, scope=None, dtype=tf.float32): 16 | super().__init__(1, K, None) 17 | self._K = K 18 | self._cutoff = cutoff 19 | with tf.variable_scope(scope): 20 | #initialize centers 21 | centers = softplus_inverse(np.linspace(1.0,np.exp(-cutoff),K)) 22 | self._centers = tf.nn.softplus(tf.Variable(np.asarray(centers), name="centers", dtype=dtype)) 23 | tf.summary.histogram("rbf_centers", self.centers) 24 | 25 | #initialize widths (inverse softplus transformation is applied, such that softplus can be used to guarantee positive values) 26 | widths = [softplus_inverse((0.5/((1.0-np.exp(-cutoff))/K))**2)]*K 27 | self._widths = tf.nn.softplus(tf.Variable(np.asarray(widths), name="widths", dtype=dtype)) 28 | tf.summary.histogram("rbf_widths", self.widths) 29 | 30 | @property 31 | def K(self): 32 | return self._K 33 | 34 | @property 35 | def cutoff(self): 36 | return self._cutoff 37 | 38 | @property 39 | def centers(self): 40 | return self._centers 41 | 42 | @property 43 | def widths(self): 44 | return self._widths 45 | 46 | #cutoff function that ensures a smooth cutoff 47 | def cutoff_fn(self, D): 48 | x = D/self.cutoff 49 | x3 = x**3 50 | x4 = x3*x 51 | x5 = x4*x 52 | return tf.where(x < 1, 1 - 6*x5 + 15*x4 - 10*x3, tf.zeros_like(x)) 53 | 54 | def __call__(self, D): 55 | D = tf.expand_dims(D, -1) #necessary for proper broadcasting behaviour 56 | rbf = self.cutoff_fn(D)*tf.exp(-self.widths*(tf.exp(-D)-self.centers)**2) 57 | return rbf 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /neural_network/layers/DenseLayer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from .NeuronLayer import * 4 | from .util import * 5 | 6 | class DenseLayer(NeuronLayer): 7 | def __str__(self): 8 | return "dense"+super().__str__() 9 | 10 | def __init__(self, n_in, n_out, activation_fn=None, W_init=None, b_init=None, use_bias=True, regularization=True, seed=None, scope=None, dtype=tf.float32): 11 | super().__init__(n_in, n_out, activation_fn) 12 | with tf.variable_scope(scope): 13 | #define weight 14 | if W_init is None: 15 | W_init = semi_orthogonal_glorot_weights(n_in, n_out, seed=seed) 16 | self._W = tf.Variable(W_init, name="W", dtype=dtype) 17 | tf.add_to_collection(tf.GraphKeys.WEIGHTS, self.W) 18 | tf.summary.histogram("weights", self.W) 19 | 20 | #define l2 loss term for regularization 21 | if regularization: 22 | self._l2loss = tf.nn.l2_loss(self.W, name="l2loss") 23 | tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.l2loss) 24 | else: 25 | self._l2loss = 0.0 26 | 27 | #define bias 28 | self._use_bias = use_bias 29 | if self.use_bias: 30 | if b_init is None: 31 | b_init = tf.zeros([self.n_out], name="b_init", dtype=dtype) 32 | self._b = tf.Variable(b_init, name="b", dtype=dtype) 33 | tf.add_to_collection(tf.GraphKeys.BIASES, self.b) 34 | tf.summary.histogram("biases", self.b) 35 | 36 | @property 37 | def W(self): 38 | return self._W 39 | 40 | @property 41 | def b(self): 42 | return self._b 43 | 44 | @property 45 | def l2loss(self): 46 | return self._l2loss 47 | 48 | @property 49 | def use_bias(self): 50 | return self._use_bias 51 | 52 | def __call__(self, x): 53 | y = tf.matmul(x, self.W) 54 | if self.use_bias: 55 | y += self.b 56 | if self.activation_fn is not None: 57 | y = self.activation_fn(y) 58 | tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, y) 59 | return y 60 | -------------------------------------------------------------------------------- /neural_network/layers/util.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | #generates a random square orthogonal matrix of dimension dim 5 | def square_orthogonal_matrix(dim=3, seed=None): 6 | random_state = np.random 7 | if seed is not None: #allows to get the same matrix every time 8 | random_state.seed(seed) 9 | H = np.eye(dim) 10 | D = np.ones((dim,)) 11 | for n in range(1, dim): 12 | x = random_state.normal(size=(dim-n+1,)) 13 | D[n-1] = np.sign(x[0]) 14 | x[0] -= D[n-1]*np.sqrt((x*x).sum()) 15 | # Householder transformation 16 | Hx = (np.eye(dim-n+1) - 2.*np.outer(x, x)/(x*x).sum()) 17 | mat = np.eye(dim) 18 | mat[n-1:, n-1:] = Hx 19 | H = np.dot(H, mat) 20 | # Fix the last sign such that the determinant is 1 21 | D[-1] = (-1)**(1-(dim % 2))*D.prod() 22 | # Equivalent to np.dot(np.diag(D), H) but faster, apparently 23 | H = (D*H.T).T 24 | return H 25 | 26 | #generates a random (semi-)orthogonal matrix of size NxM 27 | def semi_orthogonal_matrix(N, M, seed=None): 28 | if N > M: #number of rows is larger than number of columns 29 | square_matrix = square_orthogonal_matrix(dim=N, seed=seed) 30 | else: #number of columns is larger than number of rows 31 | square_matrix = square_orthogonal_matrix(dim=M, seed=seed) 32 | return square_matrix[:N,:M] 33 | 34 | #generates a weight matrix with variance according to Glorot initialization 35 | #based on a random (semi-)orthogonal matrix 36 | #neural networks are expected to learn better when features are decorrelated 37 | #(stated by eg. "Reducing overfitting in deep networks by decorrelating representations", 38 | #"Dropout: a simple way to prevent neural networks from overfitting", 39 | #"Exact solutions to the nonlinear dynamics of learning in deep linear neural networks") 40 | def semi_orthogonal_glorot_weights(n_in, n_out, scale=2.0, seed=None): 41 | W = semi_orthogonal_matrix(n_in, n_out) 42 | W *= np.sqrt(scale/((n_in+n_out)*W.var())) 43 | return W 44 | 45 | #given a tensor x, gives back the variance 46 | def reduce_var(x, axis=None, keepdims=False): 47 | """Variance of a tensor, alongside the specified axis. 48 | 49 | # Arguments 50 | x: A tensor or variable. 51 | axis: An integer, the axis to compute the variance. 52 | keepdims: A boolean, whether to keep the dimensions or not. 53 | If `keepdims` is `False`, the rank of the tensor is reduced 54 | by 1. If `keepdims` is `True`, 55 | the reduced dimension is retained with length 1. 56 | 57 | # Returns 58 | A tensor with the variance of elements of `x`. 59 | """ 60 | 61 | m = tf.reduce_mean(x, axis=axis, keep_dims=True) 62 | devs_squared = tf.square(x - m) 63 | return tf.reduce_mean(devs_squared, axis=axis, keep_dims=keepdims) 64 | 65 | -------------------------------------------------------------------------------- /neural_network/layers/InteractionLayer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from .RBFLayer import * 4 | from .NeuronLayer import * 5 | from .DenseLayer import * 6 | from .ResidualLayer import * 7 | 8 | class InteractionLayer(NeuronLayer): 9 | def __str__(self): 10 | return "interaction_layer"+super().__str__() 11 | 12 | def __init__(self, K, F, num_residual, activation_fn=None, seed=None, scope=None, keep_prob=1.0, dtype=tf.float32): 13 | super().__init__(K, F, activation_fn) 14 | self._keep_prob = keep_prob 15 | with tf.variable_scope(scope): 16 | #transforms radial basis functions to feature space 17 | self._k2f = DenseLayer(K, F, W_init=tf.zeros([K, F], dtype=dtype), use_bias=False, seed=seed, scope='k2f', dtype=dtype) 18 | #rearrange feature vectors for computing the "message" 19 | self._dense_i = DenseLayer(F, F, activation_fn, seed=seed, scope="dense_i", dtype=dtype) # central atoms 20 | self._dense_j = DenseLayer(F, F, activation_fn, seed=seed, scope="dense_j", dtype=dtype) # neighbouring atoms 21 | #for performing residual transformation on the "message" 22 | self._residual_layer = [] 23 | for i in range(num_residual): 24 | self._residual_layer.append(ResidualLayer(F, F, activation_fn, seed=seed, scope="residual_layer"+str(i), keep_prob=keep_prob, dtype=dtype)) 25 | #for performing the final update to the feature vectors 26 | self._dense = DenseLayer(F, F, seed=seed, scope="dense", dtype=dtype) 27 | self._u = tf.Variable(tf.ones([F], dtype=dtype), name="u", dtype=dtype) 28 | tf.summary.histogram("gates", self.u) 29 | 30 | @property 31 | def keep_prob(self): 32 | return self._keep_prob 33 | 34 | @property 35 | def k2f(self): 36 | return self._k2f 37 | 38 | @property 39 | def dense_i(self): 40 | return self._dense_i 41 | 42 | @property 43 | def dense_j(self): 44 | return self._dense_j 45 | 46 | @property 47 | def residual_layer(self): 48 | return self._residual_layer 49 | 50 | @property 51 | def dense(self): 52 | return self._dense 53 | 54 | @property 55 | def u(self): 56 | return self._u 57 | 58 | def __call__(self, x, rbf, idx_i, idx_j): 59 | #pre-activation 60 | if self.activation_fn is not None: 61 | xa = tf.nn.dropout(self.activation_fn(x), self.keep_prob) 62 | else: 63 | xa = tf.nn.dropout(x, self.keep_prob) 64 | #calculate feature mask from radial basis functions 65 | g = self.k2f(rbf) 66 | #calculate contribution of neighbors and central atom 67 | xi = self.dense_i(xa) 68 | xj = tf.segment_sum(g*tf.gather(self.dense_j(xa), idx_j), idx_i) 69 | #add contributions to get the "message" 70 | m = xi + xj 71 | for i in range(len(self.residual_layer)): 72 | m = self.residual_layer[i](m) 73 | if self.activation_fn is not None: 74 | m = self.activation_fn(m) 75 | x = self.u*x + self.dense(m) 76 | return x 77 | -------------------------------------------------------------------------------- /training/Trainer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from .AMSGrad import AMSGrad 4 | 5 | class Trainer: 6 | def __init__(self, learning_rate=1e-3, decay_steps=100000, decay_rate=0.96, scope=None): 7 | self._scope = scope 8 | with tf.variable_scope(self.scope): 9 | self._global_step = tf.Variable(0, name='global_step', trainable=False) 10 | self._learning_rate = tf.train.exponential_decay(learning_rate, self._global_step, decay_steps, decay_rate) 11 | self._optimizer = AMSGrad(learning_rate=self._learning_rate) 12 | 13 | def build_train_op(self, loss, moving_avg_decay=0.999, max_norm=10.0, dependencies=[]): 14 | #clipped gradients 15 | gradients, variables = zip(*self._optimizer.compute_gradients(loss)) 16 | summary_op = tf.summary.scalar("global_gradient_norm", tf.global_norm(gradients)) 17 | gradients, _ = tf.clip_by_global_norm(gradients, max_norm) 18 | apply_gradient_op = self._optimizer.apply_gradients(zip(gradients, variables), global_step=self._global_step) 19 | 20 | #get model variable collection 21 | self._model_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) 22 | 23 | #create ExponentialMovingAverage object and build its apply operation 24 | self._ema = tf.train.ExponentialMovingAverage(moving_avg_decay, self._global_step) 25 | ema_op = self.ema.apply(self.model_vars) 26 | 27 | #make backup variables 28 | with tf.variable_scope('backup_variables'): 29 | self._backup_vars = [tf.get_variable(var.op.name, dtype=var.value().dtype, trainable=False, 30 | initializer=var.initialized_value()) for var in self.model_vars] 31 | 32 | #generate train op 33 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 34 | with tf.control_dependencies( 35 | [apply_gradient_op, 36 | summary_op, 37 | ema_op] + update_ops + dependencies): 38 | train_op = tf.no_op(name='train') 39 | 40 | return train_op 41 | 42 | def load_averaged_variables(self): 43 | return tf.group(*(tf.assign(var, self.ema.average(var).read_value()) 44 | for var in self.model_vars)) 45 | 46 | def save_variable_backups(self): 47 | return tf.group(*(tf.assign(bck, var.read_value()) 48 | for var, bck in zip(self.model_vars, self.backup_vars))) 49 | 50 | def restore_variable_backups(self): 51 | return tf.group(*(tf.assign(var, bck.read_value()) 52 | for var, bck in zip(self.model_vars, self.backup_vars))) 53 | 54 | @property 55 | def scope(self): 56 | return self._scope 57 | 58 | @property 59 | def global_step(self): 60 | return self._global_step 61 | 62 | @property 63 | def ema(self): 64 | return self._ema 65 | 66 | @property 67 | def model_vars(self): 68 | return self._model_vars 69 | 70 | @property 71 | def backup_vars(self): 72 | return self._backup_vars 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /training/DataQueue.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import threading 3 | 4 | class DataQueue: 5 | def __repr__(self): 6 | return "DataQueue" 7 | def __init__(self, get_data, capacity=5000, scope=None, dtype=tf.float32): 8 | self._get_data = get_data 9 | self._is_running = False 10 | self._scope = scope 11 | 12 | with tf.variable_scope(self.scope): 13 | dtypes = [ dtype, dtype, dtype, tf.int32, dtype, dtype, dtype, dtype, tf.int32, tf.int32, tf.int32 ] 14 | shapes = [ [None, ], [None, ], [None,3], [None, ], [None,3], [None, ], [None, ], [None,3], [None, ], [None, ], [None, ] ] 15 | 16 | #define placeholders 17 | self._E = tf.placeholder(dtypes[ 0], shape=shapes[ 0], name="E") 18 | self._Ea = tf.placeholder(dtypes[ 1], shape=shapes[ 1], name="Ea") 19 | self._F = tf.placeholder(dtypes[ 2], shape=shapes[ 2], name="F") 20 | self._Z = tf.placeholder(dtypes[ 3], shape=shapes[ 3], name="Z") 21 | self._D = tf.placeholder(dtypes[ 4], shape=shapes[ 4], name="D") 22 | self._Q = tf.placeholder(dtypes[ 5], shape=shapes[ 5], name="Q") 23 | self._Qa = tf.placeholder(dtypes[ 6], shape=shapes[ 6], name="Qa") 24 | self._R = tf.placeholder(dtypes[ 7], shape=shapes[ 7], name="R") 25 | self._idx_i = tf.placeholder(dtypes[ 8], shape=shapes[ 8], name="idx_i") 26 | self._idx_j = tf.placeholder(dtypes[ 9], shape=shapes[ 9], name="idx_j") 27 | self._batch_seg = tf.placeholder(dtypes[10], shape=shapes[10], name="batch_seg") 28 | placeholders = [ self.E, self.Ea, self.F, self.Z, self.D, self.Q, self.Qa, self.R, self.idx_i, self.idx_j, self.batch_seg] 29 | 30 | self._queue = tf.PaddingFIFOQueue(capacity=capacity, dtypes=dtypes, shapes=shapes, name="queue") 31 | self._enqueue_op = self.queue.enqueue(placeholders) 32 | self._dequeue_op = self.queue.dequeue() 33 | 34 | def create_thread(self, sess, coord=None, daemon=False): 35 | if coord is None: 36 | coord = tf.train.Coordinator() 37 | 38 | if self.is_running: 39 | return [] 40 | 41 | thread = threading.Thread(target=self._run, args=(sess, coord)) 42 | thread.daemon = daemon 43 | thread.start() 44 | self._is_running = True 45 | return [thread] 46 | 47 | def _run(self, sess, coord): 48 | while not coord.should_stop(): 49 | data = self.get_data() 50 | feed_dict = { 51 | self.E: data["E"], 52 | self.Ea: data["Ea"], 53 | self.F: data["F"], 54 | self.Z: data["Z"], 55 | self.D: data["D"], 56 | self.Q: data["Q"], 57 | self.Qa: data["Qa"], 58 | self.R: data["R"], 59 | self.idx_i: data["idx_i"], 60 | self.idx_j: data["idx_j"], 61 | self.batch_seg: data["batch_seg"] 62 | } 63 | try: 64 | sess.run(self.enqueue_op, feed_dict=feed_dict) 65 | except Exception as e: 66 | coord.request_stop(e) 67 | 68 | @property 69 | def E(self): 70 | return self._E 71 | 72 | @property 73 | def Ea(self): 74 | return self._Ea 75 | 76 | @property 77 | def F(self): 78 | return self._F 79 | 80 | @property 81 | def Z(self): 82 | return self._Z 83 | 84 | @property 85 | def D(self): 86 | return self._D 87 | 88 | @property 89 | def Q(self): 90 | return self._Q 91 | 92 | @property 93 | def Qa(self): 94 | return self._Qa 95 | 96 | @property 97 | def R(self): 98 | return self._R 99 | 100 | @property 101 | def idx_i(self): 102 | return self._idx_i 103 | 104 | @property 105 | def idx_j(self): 106 | return self._idx_j 107 | 108 | @property 109 | def batch_seg(self): 110 | return self._batch_seg 111 | 112 | @property 113 | def offsets(self): 114 | return self._offsets 115 | 116 | @property 117 | def scope(self): 118 | return self._scope 119 | 120 | @property 121 | def queue(self): 122 | return self._queue 123 | 124 | @property 125 | def enqueue_op(self): 126 | return self._enqueue_op 127 | 128 | @property 129 | def dequeue_op(self): 130 | return self._dequeue_op 131 | 132 | @property 133 | def get_data(self): 134 | return self._get_data 135 | 136 | @property 137 | def is_running(self): 138 | return self._is_running 139 | 140 | 141 | -------------------------------------------------------------------------------- /neural_network/grimme_d3/grimme_d3.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import tensorflow as tf 4 | """ 5 | Tensorflow implementation of Grimme's D3 method (only Becke-Johnson damping is implemented) 6 | Grimme, Stefan, et al. "A consistent and accurate ab initio parametrization of density functional dispersion correction (DFT-D) for the 94 elements H-Pu." The Journal of chemical physics 132.15 (2010): 154104. 7 | """ 8 | 9 | 10 | #relative filepath to package folder 11 | package_directory = os.path.dirname(os.path.abspath(__file__)) 12 | 13 | #conversion factors used in grimme d3 code 14 | d3_autoang = 0.52917726 #for converting distance from bohr to angstrom 15 | d3_autoev = 27.21138505 #for converting a.u. to eV 16 | 17 | #global parameters (the values here are the standard for HF) 18 | d3_s6 = 1.0000 19 | d3_s8 = 0.9171 20 | d3_a1 = 0.3385 21 | d3_a2 = 2.8830 22 | d3_k1 = 16.000 23 | d3_k2 = 4/3 24 | d3_k3 = -4.000 25 | 26 | #tables with reference values 27 | d3_c6ab = np.load(os.path.join(package_directory,"tables","c6ab.npy")) 28 | d3_r0ab = np.load(os.path.join(package_directory,"tables","r0ab.npy")) 29 | d3_rcov = np.load(os.path.join(package_directory,"tables","rcov.npy")) 30 | d3_r2r4 = np.load(os.path.join(package_directory,"tables","r2r4.npy")) 31 | d3_maxc = 5 #maximum number of coordination complexes 32 | 33 | def _smootherstep(r, cutoff): 34 | ''' 35 | computes a smooth step from 1 to 0 starting at 1 bohr 36 | before the cutoff 37 | ''' 38 | cuton = cutoff-1 39 | x = (cutoff-r)/(cutoff-cuton) 40 | x2 = x**2 41 | x3 = x2*x 42 | x4 = x3*x 43 | x5 = x4*x 44 | return tf.where(r <= cuton, tf.ones_like(x), tf.where(r >= cutoff, tf.zeros_like(x), 6*x5-15*x4+10*x3)) 45 | 46 | def _ncoord(Zi, Zj, r, idx_i, cutoff=None, k1=d3_k1, rcov=d3_rcov): 47 | ''' 48 | compute coordination numbers by adding an inverse damping function 49 | ''' 50 | rco = tf.gather(rcov,Zi) + tf.gather(rcov,Zj) 51 | rr = tf.cast(rco,r.dtype)/r 52 | damp = 1.0/(1.0+tf.exp(-k1*(rr-1.0))) 53 | if cutoff is not None: 54 | damp *= _smootherstep(r, cutoff) 55 | return tf.segment_sum(damp,idx_i) 56 | 57 | def _getc6(ZiZj, nci, ncj, c6ab=d3_c6ab, k3=d3_k3): 58 | ''' 59 | interpolate c6 60 | ''' 61 | #gather the relevant entries from the table 62 | c6ab_ = tf.cast(tf.gather_nd(c6ab, ZiZj),nci.dtype) 63 | #calculate c6 coefficients 64 | c6mem = -1.0e99*tf.ones_like(nci) 65 | r_save = 1.0e99*tf.ones_like(nci) 66 | rsum = tf.zeros_like(nci) 67 | csum = tf.zeros_like(nci) 68 | for i in range(d3_maxc): 69 | for j in range(d3_maxc): 70 | cn0 = c6ab_[:,i,j,0] 71 | cn1 = c6ab_[:,i,j,1] 72 | cn2 = c6ab_[:,i,j,2] 73 | r = (cn1-nci)**2 + (cn2-ncj)**2 74 | r_save = tf.where(r < r_save, r, r_save) 75 | c6mem = tf.where(r < r_save, cn0, c6mem) 76 | tmp1 = tf.exp(k3*r) 77 | rsum += tf.where(cn0 > 0.0, tmp1, tf.zeros_like(tmp1)) 78 | csum += tf.where(cn0 > 0.0, tmp1*cn0, tf.zeros_like(tmp1)) 79 | c6 = tf.where(rsum > 0.0, csum/rsum, c6mem) 80 | return c6 81 | 82 | def edisp(Z, r, idx_i, idx_j, cutoff=None, r2=None, 83 | r6=None, r8=None, s6=d3_s6, s8=d3_s8, a1=d3_a1, a2=d3_a2, k1=d3_k1, k2=d3_k2, 84 | k3=d3_k3, c6ab=d3_c6ab, r0ab=d3_r0ab, rcov=d3_rcov, r2r4=d3_r2r4): 85 | ''' 86 | compute d3 dispersion energy in Hartree 87 | r: distance in bohr! 88 | ''' 89 | #compute all necessary quantities 90 | Zi = tf.gather(Z, idx_i) 91 | Zj = tf.gather(Z, idx_j) 92 | ZiZj = tf.stack([Zi,Zj],axis=1) #necessary for gatherin 93 | nc = _ncoord(Zi, Zj, r, idx_i, cutoff=cutoff, rcov=rcov) #coordination numbers 94 | nci = tf.gather(nc, idx_i) 95 | ncj = tf.gather(nc, idx_j) 96 | c6 = _getc6(ZiZj, nci, ncj, c6ab=c6ab, k3=k3) #c6 coefficients 97 | c8 = 3*c6*tf.cast(tf.gather(r2r4, Zi),c6.dtype)*tf.cast(tf.gather(r2r4, Zj),c6.dtype) #c8 coefficient 98 | 99 | #compute all necessary powers of the distance 100 | if r2 is None: 101 | r2 = r**2 #square of distances 102 | if r6 is None: 103 | r6 = r2**3 104 | if r8 is None: 105 | r8 = r6*r2 106 | 107 | #Becke-Johnson damping, zero-damping introduces spurious repulsion 108 | #and is therefore not supported/implemented 109 | tmp = a1*tf.sqrt(c8/c6) + a2 110 | tmp2 = tmp**2 111 | tmp6 = tmp2**3 112 | tmp8 = tmp6*tmp2 113 | if cutoff is None: 114 | e6 = 1/(r6+tmp6) 115 | e8 = 1/(r8+tmp8) 116 | else: #apply cutoff 117 | cut2 = cutoff**2 118 | cut6 = cut2**3 119 | cut8 = cut6*cut2 120 | cut6tmp6 = cut6 + tmp6 121 | cut8tmp8 = cut8 + tmp8 122 | e6 = 1/(r6+tmp6) - 1/cut6tmp6 + 6*cut6/cut6tmp6**2 * (r/cutoff-1) 123 | e8 = 1/(r8+tmp8) - 1/cut8tmp8 + 8*cut8/cut8tmp8**2 * (r/cutoff-1) 124 | e6 = tf.where(r < cutoff, e6, tf.zeros_like(e6)) 125 | e8 = tf.where(r < cutoff, e8, tf.zeros_like(e8)) 126 | e6 = -0.5*s6*c6*e6 127 | e8 = -0.5*s8*c8*e8 128 | return tf.segment_sum(e6+e8,idx_i) 129 | 130 | 131 | -------------------------------------------------------------------------------- /training/DataContainer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class DataContainer: 4 | def __repr__(self): 5 | return "DataContainer" 6 | def __init__(self, filename): 7 | #read in data 8 | dictionary = np.load(filename) 9 | #number of atoms 10 | if 'N' in dictionary: 11 | self._N = dictionary['N'] 12 | else: 13 | self._N = None 14 | #atomic numbers/nuclear charges 15 | if 'Z' in dictionary: 16 | self._Z = dictionary['Z'] 17 | else: 18 | self._Z = None 19 | #reference dipole moment vector 20 | if 'D' in dictionary: 21 | self._D = dictionary['D'] 22 | else: 23 | self._D = None 24 | #reference total charge 25 | if 'Q' in dictionary: 26 | self._Q = dictionary['Q'] 27 | else: 28 | self._Q = None 29 | #reference atomic charges 30 | if 'Qa' in dictionary: 31 | self._Qa = dictionary['Qa'] 32 | else: 33 | self._Qa = None 34 | #positions (cartesian coordinates) 35 | if 'R' in dictionary: 36 | self._R = dictionary['R'] 37 | else: 38 | self._R = None 39 | #reference energy 40 | if 'E' in dictionary: 41 | self._E = dictionary['E'] 42 | else: 43 | self._E = None 44 | #reference atomic energies 45 | if 'Ea' in dictionary: 46 | self._Ea = dictionary['Ea'] 47 | else: 48 | self._Ea = None 49 | #reference forces 50 | if 'F' in dictionary: 51 | self._F = dictionary['F'] 52 | else: 53 | self._F = None 54 | 55 | #maximum number of atoms per molecule 56 | self._N_max = self.Z.shape[1] 57 | 58 | #construct indices used to extract position vectors to calculate relative positions 59 | #(basically, constructs indices for calculating all possible interactions (excluding self interactions), 60 | #this is a naive (but simple) O(N^2) approach, could be replaced by something more sophisticated) 61 | self._idx_i = np.empty([self.N_max, self.N_max-1],dtype=int) 62 | for i in range(self.idx_i.shape[0]): 63 | for j in range(self.idx_i.shape[1]): 64 | self._idx_i[i,j] = i 65 | 66 | self._idx_j = np.empty([self.N_max, self.N_max-1],dtype=int) 67 | for i in range(self.idx_j.shape[0]): 68 | c = 0 69 | for j in range(self.idx_j.shape[0]): 70 | if j != i: 71 | self._idx_j[i,c] = j 72 | c += 1 73 | 74 | @property 75 | def N_max(self): 76 | return self._N_max 77 | 78 | @property 79 | def N(self): 80 | return self._N 81 | 82 | @property 83 | def Z(self): 84 | return self._Z 85 | 86 | @property 87 | def Q(self): 88 | return self._Q 89 | 90 | @property 91 | def Qa(self): 92 | return self._Qa 93 | 94 | @property 95 | def D(self): 96 | return self._D 97 | 98 | @property 99 | def R(self): 100 | return self._R 101 | 102 | @property 103 | def E(self): 104 | return self._E 105 | 106 | @property 107 | def Ea(self): 108 | return self._Ea 109 | 110 | @property 111 | def F(self): 112 | return self._F 113 | 114 | #indices for atoms i (when calculating interactions) 115 | @property 116 | def idx_i(self): 117 | return self._idx_i 118 | 119 | #indices for atoms j (when calculating interactions) 120 | @property 121 | def idx_j(self): 122 | return self._idx_j 123 | 124 | def __len__(self): 125 | return self.Z.shape[0] 126 | 127 | def __getitem__(self, idx): 128 | if type(idx) is int or type(idx) is np.int64: 129 | idx = [idx] 130 | 131 | data = {'E': [], 132 | 'Ea': [], 133 | 'F': [], 134 | 'Z': [], 135 | 'D': [], 136 | 'Q': [], 137 | 'Qa': [], 138 | 'R': [], 139 | 'idx_i': [], 140 | 'idx_j': [], 141 | 'batch_seg': [], 142 | 'offsets' : [] 143 | } 144 | 145 | Ntot = 0 #total number of atoms 146 | Itot = 0 #total number of interactions 147 | for k, i in enumerate(idx): 148 | N = self.N[i] #number of atoms 149 | I = N*(N-1) #number of interactions 150 | #append data 151 | if self.E is not None: 152 | data['E'].append(self.E[i]) 153 | else: 154 | data['E'].append(np.nan) 155 | if self.Ea is not None: 156 | data['Ea'].extend(self.Ea[i,:N].tolist()) 157 | else: 158 | data['Ea'].extend([np.nan]) 159 | if self.Q is not None: 160 | data['Q'].append(self.Q[i]) 161 | else: 162 | data['Q'].append(np.nan) 163 | if self.Qa is not None: 164 | data['Qa'].extend(self.Qa[i,:N].tolist()) 165 | else: 166 | data['Qa'].extend([np.nan]) 167 | if self.Z is not None: 168 | data['Z'].extend(self.Z[i,:N].tolist()) 169 | else: 170 | data['Z'].append(0) 171 | if self.D is not None: 172 | data['D'].extend(self.D[i:i+1,:].tolist()) 173 | else: 174 | data['D'].extend([[np.nan,np.nan,np.nan]]) 175 | if self.R is not None: 176 | data['R'].extend(self.R[i,:N,:].tolist()) 177 | else: 178 | data['R'].extend([[np.nan,np.nan,np.nan]]) 179 | if self.F is not None: 180 | data['F'].extend(self.F[i,:N,:].tolist()) 181 | else: 182 | data['F'].extend([[np.nan,np.nan,np.nan]]) 183 | data['idx_i'].extend(np.reshape(self.idx_i[:N,:N-1]+Ntot,[-1]).tolist()) 184 | data['idx_j'].extend(np.reshape(self.idx_j[:N,:N-1]+Ntot,[-1]).tolist()) 185 | #offsets could be added in case they are need 186 | data['batch_seg'].extend([k] * N) 187 | #increment totals 188 | Ntot += N 189 | Itot += I 190 | 191 | return data 192 | 193 | -------------------------------------------------------------------------------- /training/DataProvider.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class DataProvider: 4 | def __repr__(self): 5 | return "DataProvider" 6 | def __init__(self, data, ntrain, nvalid, batch_size=1, valid_batch_size=1, seed=None): 7 | self._data = data 8 | self._ndata = len(data) 9 | self._ntrain = ntrain 10 | self._nvalid = nvalid 11 | self._ntest = len(data)-self.ntrain-self.nvalid 12 | self._batch_size = batch_size 13 | self._valid_batch_size = valid_batch_size 14 | 15 | #random state parameter, such that random operations are reproducible if wanted 16 | self._random_state = np.random.RandomState(seed=seed) 17 | 18 | #create shuffled list of indices 19 | idx = self._random_state.permutation(np.arange(len(self.data))) 20 | 21 | #store indices of training, validation and test data 22 | self._idx_train = idx[0:self.ntrain] 23 | self._idx_valid = idx[self.ntrain:self.ntrain+self.nvalid] 24 | self._idx_test = idx[self.ntrain+self.nvalid:] 25 | 26 | #initialize mean/stdev of properties to None, only get calculated if requested 27 | self._EperA_mean = None 28 | self._EperA_stdev = None 29 | self._FperA_mean = None 30 | self._FperA_stdev = None 31 | self._DperA_mean = None 32 | self._DperA_stdev = None 33 | 34 | #for retrieving batches 35 | self._idx_in_epoch = 0 36 | self._valid_idx = 0 37 | 38 | 39 | @property 40 | def data(self): 41 | return self._data 42 | 43 | @property 44 | def ndata(self): 45 | return self._ndata 46 | 47 | @property 48 | def ntrain(self): 49 | return self._ntrain 50 | 51 | @property 52 | def nvalid(self): 53 | return self._nvalid 54 | 55 | @property 56 | def ntest(self): 57 | return self._ntest 58 | 59 | @property 60 | def random_state(self): 61 | return self._random_state 62 | 63 | @property 64 | def idx_train(self): 65 | return self._idx_train 66 | 67 | @property 68 | def idx_valid(self): 69 | return self._idx_valid 70 | 71 | @property 72 | def idx_test(self): 73 | return self._idx_test 74 | 75 | @property 76 | def idx_in_epoch(self): 77 | return self._idx_in_epoch 78 | 79 | @property 80 | def valid_idx(self): 81 | return self._valid_idx 82 | 83 | @property 84 | def batch_size(self): 85 | return self._batch_size 86 | 87 | @property 88 | def valid_batch_size(self): 89 | return self._valid_batch_size 90 | 91 | def _compute_E_statistics(self): 92 | self._EperA_mean = 0.0 93 | self._EperA_stdev = 0.0 94 | for i in range(self.ntrain): 95 | tmp = self.get_data(self.idx_train[i]) 96 | m_prev = self.EperA_mean 97 | x = tmp['E'][0]/(np.shape(tmp['Z'])[0]) 98 | self._EperA_mean += (x - self._EperA_mean)/(i+1) 99 | self._EperA_stdev += (x - self._EperA_mean) * (x - m_prev) 100 | self._EperA_stdev = np.sqrt(self._EperA_stdev/self.ntrain) 101 | return 102 | 103 | @property 104 | def EperA_mean(self): #mean energy per atom in the training set 105 | if self._EperA_mean is None: 106 | self._compute_E_statistics() 107 | return self._EperA_mean 108 | 109 | @property 110 | def EperA_stdev(self): #stdev of energy per atom in the training set 111 | if self._EperA_stdev is None: 112 | self._compute_E_statistics() 113 | return self._EperA_stdev 114 | 115 | def _compute_F_statistics(self): 116 | self._FperA_mean = 0.0 117 | self._FperA_stdev = 0.0 118 | for i in range(self.ntrain): 119 | tmp = self.get_data(self.idx_train[i]) 120 | F = tmp["F"] 121 | x = 0.0 122 | for i in range(len(F)): 123 | x += np.sqrt(F[i][0]**2 + F[i][1]**2 + F[i][2]**2) 124 | m_prev = self.FperA_mean 125 | x /= len(F) 126 | self._FperA_mean += (x - self._FperA_mean)/(i+1) 127 | self._FperA_stdev += (x - self._FperA_mean) * (x - m_prev) 128 | self._FperA_stdev = np.sqrt(self._FperA_stdev/self.ntrain) 129 | return 130 | 131 | @property 132 | def FperA_mean(self): #mean force magnitude per atom in the training set 133 | if self._FperA_mean is None: 134 | self._compute_F_statistics() 135 | return self._FperA_mean 136 | 137 | @property 138 | def FperA_stdev(self): #stdev of force magnitude per atom in the training set 139 | if self._FperA_stdev is None: 140 | self._compute_F_statistics() 141 | return self._FperA_stdev 142 | 143 | def _compute_D_statistics(self): 144 | self._DperA_mean = 0.0 145 | self._DperA_stdev = 0.0 146 | for i in range(self.ntrain): 147 | tmp = self.get_data(self.idx_train[i]) 148 | D = tmp["D"] 149 | x = np.sqrt(D[0]**2 + D[1]**2 + D[2]**2) 150 | m_prev = self.DperA_mean 151 | self._DperA_mean += (x - self._DperA_mean)/(i+1) 152 | self._DperA_stdev += (x - self._DperA_mean) * (x - m_prev) 153 | self._DperA_stdev = np.sqrt(self._DperA_stdev/self.ntrain) 154 | return 155 | 156 | @property 157 | def DperA_mean(self): #mean partial charge per atom in the training set 158 | if self._DperA_mean is None: 159 | self._compute_D_statistics() 160 | return self._DperA_mean 161 | 162 | @property 163 | def DperA_stdev(self): #stdev of partial charge per atom in the training set 164 | if self._DperA_stdev is None: 165 | self._compute_D_statistics() 166 | return self._DperA_stdev 167 | 168 | #shuffle the training data 169 | def shuffle(self): 170 | self._idx_train = self.random_state.permutation(self.idx_train) 171 | 172 | #returns a batch of samples from the training set 173 | def next_batch(self): 174 | start = self.idx_in_epoch 175 | self._idx_in_epoch += self.batch_size 176 | #epoch is finished, set needs to be shuffled 177 | if self.idx_in_epoch > self.ntrain: 178 | self.shuffle() 179 | start = 0 180 | self._idx_in_epoch = self.batch_size 181 | end = self.idx_in_epoch 182 | return self.data[self.idx_train[start:end]] 183 | 184 | #returns a batch of samples from the validation set 185 | def next_valid_batch(self): 186 | start = self.valid_idx 187 | self._valid_idx += self.valid_batch_size 188 | #finished one pass-through, reset index 189 | if self.valid_idx > self.nvalid: 190 | start = 0 191 | self._valid_idx = self.valid_batch_size 192 | end = self.valid_idx 193 | return self.data[self.idx_valid[start:end]] 194 | 195 | def get_data(self, idx): 196 | return self.data[idx] 197 | 198 | def get_train_data(self, i): 199 | idx = self.idx_train[i] 200 | return self.data[idx] 201 | 202 | def get_all_train_data(self): 203 | return self.data[self.idx_train] 204 | 205 | def get_valid_data(self, i): 206 | idx = self.idx_valid[i] 207 | return self.data[idx] 208 | 209 | def get_all_valid_data(self): 210 | return self.data[self.idx_valid] 211 | 212 | def get_test_data(self, i): 213 | idx = self.idx_test[i] 214 | return self.data[idx] 215 | 216 | def get_all_test_data(self): 217 | return self.data[self.idx_test] -------------------------------------------------------------------------------- /training/AMSGrad.py: -------------------------------------------------------------------------------- 1 | """ 2 | AMSGrad for TensorFlow. 3 | obtained from: https://github.com/taki0112/AMSGrad-Tensorflow 4 | author: Junho Kim 5 | """ 6 | 7 | from tensorflow.python.framework import ops 8 | from tensorflow.python.ops import control_flow_ops 9 | from tensorflow.python.ops import math_ops 10 | from tensorflow.python.ops import resource_variable_ops 11 | from tensorflow.python.ops import state_ops 12 | from tensorflow.python.ops import variable_scope 13 | from tensorflow.python.training import optimizer 14 | 15 | class AMSGrad(optimizer.Optimizer): 16 | def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.99, epsilon=1e-3, use_locking=False, name="AMSGrad"): 17 | super(AMSGrad, self).__init__(use_locking, name) 18 | self._lr = learning_rate 19 | self._beta1 = beta1 20 | self._beta2 = beta2 21 | self._epsilon = epsilon 22 | 23 | self._lr_t = None 24 | self._beta1_t = None 25 | self._beta2_t = None 26 | self._epsilon_t = None 27 | 28 | self._beta1_power = None 29 | self._beta2_power = None 30 | 31 | def _create_slots(self, var_list): 32 | first_var = min(var_list, key=lambda x: x.name) 33 | 34 | create_new = self._beta1_power is None 35 | #if not create_new and context.in_graph_mode(): 36 | # create_new = (self._beta1_power.graph is not first_var.graph) 37 | 38 | if create_new: 39 | with ops.colocate_with(first_var): 40 | self._beta1_power = variable_scope.variable(self._beta1, name="beta1_power", trainable=False) 41 | self._beta2_power = variable_scope.variable(self._beta2, name="beta2_power", trainable=False) 42 | # Create slots for the first and second moments. 43 | for v in var_list : 44 | self._zeros_slot(v, "m", self._name) 45 | self._zeros_slot(v, "v", self._name) 46 | self._zeros_slot(v, "vhat", self._name) 47 | 48 | def _prepare(self): 49 | self._lr_t = ops.convert_to_tensor(self._lr) 50 | self._beta1_t = ops.convert_to_tensor(self._beta1) 51 | self._beta2_t = ops.convert_to_tensor(self._beta2) 52 | self._epsilon_t = ops.convert_to_tensor(self._epsilon) 53 | 54 | def _apply_dense(self, grad, var): 55 | beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype) 56 | beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype) 57 | lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) 58 | beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) 59 | beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) 60 | epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) 61 | 62 | lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) 63 | 64 | # m_t = beta1 * m + (1 - beta1) * g_t 65 | m = self.get_slot(var, "m") 66 | m_scaled_g_values = grad * (1 - beta1_t) 67 | m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) 68 | 69 | # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) 70 | v = self.get_slot(var, "v") 71 | v_scaled_g_values = (grad * grad) * (1 - beta2_t) 72 | v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) 73 | 74 | # amsgrad 75 | vhat = self.get_slot(var, "vhat") 76 | vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) 77 | v_sqrt = math_ops.sqrt(vhat_t) 78 | 79 | var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) 80 | return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) 81 | 82 | def _resource_apply_dense(self, grad, var): 83 | var = var.handle 84 | beta1_power = math_ops.cast(self._beta1_power, grad.dtype.base_dtype) 85 | beta2_power = math_ops.cast(self._beta2_power, grad.dtype.base_dtype) 86 | lr_t = math_ops.cast(self._lr_t, grad.dtype.base_dtype) 87 | beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype) 88 | beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype) 89 | epsilon_t = math_ops.cast(self._epsilon_t, grad.dtype.base_dtype) 90 | 91 | lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) 92 | 93 | # m_t = beta1 * m + (1 - beta1) * g_t 94 | m = self.get_slot(var, "m").handle 95 | m_scaled_g_values = grad * (1 - beta1_t) 96 | m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking) 97 | 98 | # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) 99 | v = self.get_slot(var, "v").handle 100 | v_scaled_g_values = (grad * grad) * (1 - beta2_t) 101 | v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking) 102 | 103 | # amsgrad 104 | vhat = self.get_slot(var, "vhat").handle 105 | vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) 106 | v_sqrt = math_ops.sqrt(vhat_t) 107 | 108 | var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) 109 | return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) 110 | 111 | def _apply_sparse_shared(self, grad, var, indices, scatter_add): 112 | beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype) 113 | beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype) 114 | lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) 115 | beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) 116 | beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) 117 | epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) 118 | 119 | lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) 120 | 121 | # m_t = beta1 * m + (1 - beta1) * g_t 122 | m = self.get_slot(var, "m") 123 | m_scaled_g_values = grad * (1 - beta1_t) 124 | m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking) 125 | with ops.control_dependencies([m_t]): 126 | m_t = scatter_add(m, indices, m_scaled_g_values) 127 | 128 | # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) 129 | v = self.get_slot(var, "v") 130 | v_scaled_g_values = (grad * grad) * (1 - beta2_t) 131 | v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) 132 | with ops.control_dependencies([v_t]): 133 | v_t = scatter_add(v, indices, v_scaled_g_values) 134 | 135 | # amsgrad 136 | vhat = self.get_slot(var, "vhat") 137 | vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat)) 138 | v_sqrt = math_ops.sqrt(vhat_t) 139 | var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking) 140 | return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t]) 141 | 142 | def _apply_sparse(self, grad, var): 143 | return self._apply_sparse_shared( 144 | grad.values, var, grad.indices, 145 | lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda 146 | x, i, v, use_locking=self._use_locking)) 147 | 148 | def _resource_scatter_add(self, x, i, v): 149 | with ops.control_dependencies( 150 | [resource_variable_ops.resource_scatter_add(x.handle, i, v)]): 151 | return x.value() 152 | 153 | def _resource_apply_sparse(self, grad, var, indices): 154 | return self._apply_sparse_shared( 155 | grad, var, indices, self._resource_scatter_add) 156 | 157 | def _finish(self, update_ops, name_scope): 158 | # Update the power accumulators. 159 | with ops.control_dependencies(update_ops): 160 | with ops.colocate_with(self._beta1_power): 161 | update_beta1 = self._beta1_power.assign( 162 | self._beta1_power * self._beta1_t, 163 | use_locking=self._use_locking) 164 | update_beta2 = self._beta2_power.assign( 165 | self._beta2_power * self._beta2_t, 166 | use_locking=self._use_locking) 167 | return control_flow_ops.group(*update_ops + [update_beta1, update_beta2], 168 | name=name_scope) 169 | -------------------------------------------------------------------------------- /NNCalculator.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import ase 4 | from ase.neighborlist import neighbor_list 5 | from .neural_network.NeuralNetwork import * 6 | from .neural_network.activation_fn import * 7 | 8 | ''' 9 | Calculator for the atomic simulation environment (ASE) 10 | that evaluates energies and forces using a neural network 11 | ''' 12 | class NNCalculator: 13 | #most parameters are just passed to the neural network 14 | def __init__(self, 15 | checkpoint, #ckpt file from which to restore the model (can also be a list for ensembles) 16 | atoms, #ASE atoms object 17 | charge=0, #system charge 18 | F=128, #dimensionality of feature vector 19 | K=64, #number of radial basis functions 20 | sr_cut=6.0, #short range cutoff distance 21 | lr_cut = None, #long range cutoff distance 22 | num_blocks=5, #number of building blocks to be stacked 23 | num_residual_atomic=2, #number of residual layers for atomic refinements of feature vector 24 | num_residual_interaction=3, #number of residual layers for refinement of message vector 25 | num_residual_output=1, #number of residual layers for the output blocks 26 | use_electrostatic=True, #adds electrostatic contributions to atomic energy 27 | use_dispersion=True, #adds dispersion contributions to atomic energy 28 | s6=None, #s6 coefficient for d3 dispersion, by default is learned 29 | s8=None, #s8 coefficient for d3 dispersion, by default is learned 30 | a1=None, #a1 coefficient for d3 dispersion, by default is learned 31 | a2=None, #a2 coefficient for d3 dispersion, by default is learned 32 | activation_fn=shifted_softplus, #activation function 33 | dtype=tf.float32): #single or double precision 34 | 35 | #create neighborlist 36 | if lr_cut is None: 37 | self._sr_cutoff = sr_cut 38 | self._lr_cutoff = None 39 | self._use_neighborlist = False 40 | else: 41 | self._sr_cutoff = sr_cut 42 | self._lr_cutoff = lr_cut 43 | self._use_neighborlist = True 44 | 45 | 46 | #save checkpoint 47 | self._checkpoint = checkpoint 48 | 49 | #create neural network 50 | self._nn = NeuralNetwork(F=F, 51 | K=K, 52 | sr_cut=sr_cut, 53 | lr_cut=lr_cut, 54 | num_blocks=num_blocks, 55 | num_residual_atomic=num_residual_atomic, 56 | num_residual_interaction=num_residual_interaction, 57 | num_residual_output=num_residual_output, 58 | use_electrostatic=use_electrostatic, 59 | use_dispersion=use_dispersion, 60 | s6=s6, 61 | s8=s8, 62 | a1=a1, 63 | a2=a2, 64 | activation_fn=activation_fn, 65 | dtype=dtype, scope="neural_network") 66 | 67 | #create placeholders for feeding data 68 | self._Q_tot = np.array(1*[charge]) 69 | self._Z = tf.placeholder(tf.int32, shape=[None, ], name="Z") 70 | self._R = tf.placeholder(dtype, shape=[None,3], name="R") 71 | self._idx_i = tf.placeholder(tf.int32, shape=[None, ], name="idx_i") 72 | self._idx_j = tf.placeholder(tf.int32, shape=[None, ], name="idx_j") 73 | self._offsets = tf.placeholder(dtype, shape=[None,3], name="offsets") 74 | self._sr_idx_i = tf.placeholder(tf.int32, shape=[None, ], name="sr_idx_i") 75 | self._sr_idx_j = tf.placeholder(tf.int32, shape=[None, ], name="sr_idx_j") 76 | self._sr_offsets = tf.placeholder(dtype, shape=[None,3], name="sr_offsets") 77 | 78 | #calculate atomic charges, energy and force evaluation nodes 79 | if self.use_neighborlist: 80 | Ea, Qa, Dij, nhloss = self.nn.atomic_properties(self.Z, self.R, self.idx_i, self.idx_j, self.offsets, self.sr_idx_i, self.sr_idx_j, self.sr_offsets) 81 | else: 82 | Ea, Qa, Dij, nhloss = self.nn.atomic_properties(self.Z, self.R, self.idx_i, self.idx_j, self.offsets) 83 | self._charges = self.nn.scaled_charges(self.Z, Qa, self.Q_tot) 84 | self._energy, self._forces = self.nn.energy_and_forces_from_scaled_atomic_properties(Ea, self.charges, Dij, self.Z, self.R, self.idx_i, self.idx_j) 85 | 86 | #create TensorFlow session and load neural network(s) 87 | self._sess = tf.Session() 88 | if(type(self.checkpoint) is not list): 89 | self.nn.restore(self.sess, self.checkpoint) 90 | 91 | #calculate properties once to initialize everything 92 | self._calculate_all_properties(atoms) 93 | 94 | def calculation_required(self, atoms, quantities=None): 95 | return atoms != self.last_atoms 96 | 97 | def _calculate_all_properties(self, atoms): 98 | #find neighbors and offsets 99 | if self.use_neighborlist or any(atoms.get_pbc()): 100 | idx_i, idx_j, S = neighbor_list('ijS', atoms, self.lr_cutoff) 101 | offsets = np.dot(S, atoms.get_cell()) 102 | sr_idx_i, sr_idx_j, sr_S = neighbor_list('ijS', atoms, self.sr_cutoff) 103 | sr_offsets = np.dot(sr_S, atoms.get_cell()) 104 | feed_dict = {self.Z: atoms.get_atomic_numbers(), self.R: atoms.get_positions(), 105 | self.idx_i: idx_i, self.idx_j: idx_j, self.offsets: offsets, 106 | self.sr_idx_i: sr_idx_i, self.sr_idx_j: sr_idx_j, self.sr_offsets: sr_offsets} 107 | else: 108 | N = len(atoms) 109 | idx_i = np.zeros([N*(N-1)], dtype=int) 110 | 111 | idx_j = np.zeros([N*(N-1)], dtype=int) 112 | offsets = np.zeros([N*(N-1),3], dtype=float) 113 | count = 0 114 | for i in range(N): 115 | for j in range(N): 116 | if i != j: 117 | idx_i[count] = i 118 | idx_j[count] = j 119 | count += 1 120 | feed_dict = {self.Z: atoms.get_atomic_numbers(), self.R: atoms.get_positions(), 121 | self.idx_i: idx_i, self.idx_j: idx_j, self.offsets: offsets} 122 | 123 | #calculate energy and forces (in case multiple NNs are used as ensemble, this forms the average) 124 | if(type(self.checkpoint) is not list): #only one NN 125 | self._last_energy, self._last_forces, self._last_charges = self.sess.run([self.energy, self.forces, self.charges], feed_dict=feed_dict) 126 | self._energy_stdev = 0 127 | else: #ensemble is used 128 | for i in range(len(self.checkpoint)): 129 | self.nn.restore(self.sess, self.checkpoint[i]) 130 | energy, forces, charges = self.sess.run([self.energy, self.forces, self.charges], feed_dict=feed_dict) 131 | if i == 0: 132 | self._last_energy = energy 133 | self._last_forces = forces 134 | self._last_charges = charges 135 | self._energy_stdev = 0 136 | else: 137 | n = i+1 138 | delta = energy-self.last_energy 139 | self._last_energy += delta/n 140 | self._energy_stdev += delta*(energy-self.last_energy) 141 | for a in range(np.shape(charges)[0]): #loop over atoms 142 | self._last_charges[a] += (charges[a]-self.last_charges[a])/n 143 | for b in range(3): 144 | self._last_forces[a,b] += (forces[a,b]-self.last_forces[a,b])/n 145 | if(len(self.checkpoint) > 1): 146 | self._energy_stdev = np.sqrt(self.energy_stdev/len(self.checkpoint)) 147 | 148 | self._last_energy = np.array(1*[self.last_energy]) #prevents some problems... 149 | 150 | #store copy of atoms 151 | self._last_atoms = atoms.copy() 152 | 153 | def get_potential_energy(self, atoms, force_consistent=False): 154 | if self.calculation_required(atoms): 155 | self._calculate_all_properties(atoms) 156 | return self.last_energy 157 | 158 | def get_forces(self, atoms): 159 | if self.calculation_required(atoms): 160 | self._calculate_all_properties(atoms) 161 | return self.last_forces 162 | 163 | def get_charges(self, atoms): 164 | if self.calculation_required(atoms): 165 | self._calculate_all_properties(atoms) 166 | return self.last_charges 167 | 168 | @property 169 | def sess(self): 170 | return self._sess 171 | 172 | @property 173 | def last_atoms(self): 174 | return self._last_atoms 175 | 176 | @property 177 | def last_energy(self): 178 | return self._last_energy 179 | 180 | @property 181 | def last_forces(self): 182 | return self._last_forces 183 | 184 | @property 185 | def last_charges(self): 186 | return self._last_charges 187 | 188 | @property 189 | def energy_stdev(self): 190 | return self._energy_stdev 191 | 192 | @property 193 | def sr_cutoff(self): 194 | return self._sr_cutoff 195 | 196 | @property 197 | def lr_cutoff(self): 198 | return self._lr_cutoff 199 | 200 | @property 201 | def use_neighborlist(self): 202 | return self._use_neighborlist 203 | 204 | @property 205 | def nn(self): 206 | return self._nn 207 | 208 | @property 209 | def checkpoint(self): 210 | return self._checkpoint 211 | 212 | @property 213 | def Z(self): 214 | return self._Z 215 | 216 | @property 217 | def Q_tot(self): 218 | return self._Q_tot 219 | 220 | @property 221 | def R(self): 222 | return self._R 223 | 224 | @property 225 | def offsets(self): 226 | return self._offsets 227 | 228 | @property 229 | def idx_i(self): 230 | return self._idx_i 231 | 232 | @property 233 | def idx_j(self): 234 | return self._idx_j 235 | 236 | @property 237 | def sr_offsets(self): 238 | return self._sr_offsets 239 | 240 | @property 241 | def sr_idx_i(self): 242 | return self._sr_idx_i 243 | 244 | @property 245 | def sr_idx_j(self): 246 | return self._sr_idx_j 247 | 248 | @property 249 | def energy(self): 250 | return self._energy 251 | 252 | @property 253 | def forces(self): 254 | return self._forces 255 | 256 | @property 257 | def charges(self): 258 | return self._charges 259 | -------------------------------------------------------------------------------- /neural_network/NeuralNetwork.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .layers.RBFLayer import * 3 | from .layers.InteractionBlock import * 4 | from .layers.OutputBlock import * 5 | from .activation_fn import * 6 | from .grimme_d3.grimme_d3 import * 7 | 8 | def softplus_inverse(x): 9 | '''numerically stable inverse of softplus transform''' 10 | return x + np.log(-np.expm1(-x)) 11 | 12 | class NeuralNetwork: 13 | def __str__(self): 14 | return "Neural Network" 15 | 16 | def __init__(self, 17 | F, #dimensionality of feature vector 18 | K, #number of radial basis functions 19 | sr_cut, #cutoff distance for short range interactions 20 | lr_cut = None, #cutoff distance for long range interactions (default: no cutoff) 21 | num_blocks=3, #number of building blocks to be stacked 22 | num_residual_atomic=2, #number of residual layers for atomic refinements of feature vector 23 | num_residual_interaction=2, #number of residual layers for refinement of message vector 24 | num_residual_output=1, #number of residual layers for the output blocks 25 | use_electrostatic=True, #adds electrostatic contributions to atomic energy 26 | use_dispersion=True, #adds dispersion contributions to atomic energy 27 | s6=None, #s6 coefficient for d3 dispersion, by default is learned 28 | s8=None, #s8 coefficient for d3 dispersion, by default is learned 29 | a1=None, #a1 coefficient for d3 dispersion, by default is learned 30 | a2=None, #a2 coefficient for d3 dispersion, by default is learned 31 | Eshift=0.0, #initial value for output energy shift (makes convergence faster) 32 | Escale=1.0, #initial value for output energy scale (makes convergence faster) 33 | Qshift=0.0, #initial value for output charge shift 34 | Qscale=1.0, #initial value for output charge scale 35 | kehalf=7.199822675975274, #half (else double counting) of the Coulomb constant (default is in units e=1, eV=1, A=1) 36 | activation_fn=shifted_softplus, #activation function 37 | dtype=tf.float32, #single or double precision 38 | seed=None, 39 | scope=None): 40 | assert(num_blocks > 0) 41 | self._num_blocks = num_blocks 42 | self._dtype = dtype 43 | self._kehalf = kehalf 44 | self._F = F 45 | self._K = K 46 | self._sr_cut = sr_cut #cutoff for neural network interactions 47 | self._lr_cut = lr_cut #cutoff for long-range interactions 48 | self._use_electrostatic = use_electrostatic 49 | self._use_dispersion = use_dispersion 50 | self._activation_fn = activation_fn 51 | self._scope = scope 52 | 53 | with tf.variable_scope(self.scope): 54 | #keep probability for dropout regularization 55 | self._keep_prob = tf.placeholder_with_default(1.0, shape=[], name="keep_prob") 56 | 57 | #atom embeddings (we go up to Pu(94), 95 because indices start with 0) 58 | self._embeddings = tf.Variable(tf.random_uniform([95, self.F], minval=-np.sqrt(3), maxval=np.sqrt(3), seed=seed, dtype=dtype), name="embeddings", dtype=dtype) 59 | tf.summary.histogram("embeddings", self.embeddings) 60 | 61 | #radial basis function expansion layer 62 | self._rbf_layer = RBFLayer(K, sr_cut, scope="rbf_layer") 63 | 64 | #initialize variables for d3 dispersion (the way this is done, positive values are guaranteed) 65 | if s6 is None: 66 | self._s6 = tf.nn.softplus(tf.Variable(softplus_inverse(d3_s6), name="s6", dtype=dtype, trainable=True)) 67 | else: 68 | self._s6 = tf.Variable(s6, name="s6", dtype=dtype, trainable=False) 69 | tf.summary.scalar("d3-s6", self.s6) 70 | if s8 is None: 71 | self._s8 = tf.nn.softplus(tf.Variable(softplus_inverse(d3_s8), name="s8", dtype=dtype, trainable=True)) 72 | else: 73 | self._s8 = tf.Variable(s8, name="s8", dtype=dtype, trainable=False) 74 | tf.summary.scalar("d3-s8", self.s8) 75 | if a1 is None: 76 | self._a1 = tf.nn.softplus(tf.Variable(softplus_inverse(d3_a1), name="a1", dtype=dtype, trainable=True)) 77 | else: 78 | self._a1 = tf.Variable(a1, name="a1", dtype=dtype, trainable=False) 79 | tf.summary.scalar("d3-a1", self.a1) 80 | if a2 is None: 81 | self._a2 = tf.nn.softplus(tf.Variable(softplus_inverse(d3_a2), name="a2", dtype=dtype, trainable=True)) 82 | else: 83 | self._a2 = tf.Variable(a2, name="a2", dtype=dtype, trainable=False) 84 | tf.summary.scalar("d3-a2", self.a2) 85 | 86 | #initialize output scale/shift variables 87 | self._Eshift = tf.Variable(tf.constant(Eshift, shape=[95], dtype=dtype), name="Eshift", dtype=dtype) 88 | self._Escale = tf.Variable(tf.constant(Escale, shape=[95], dtype=dtype), name="Escale", dtype=dtype) 89 | self._Qshift = tf.Variable(tf.constant(Qshift, shape=[95], dtype=dtype), name="Qshift", dtype=dtype) 90 | self._Qscale = tf.Variable(tf.constant(Qscale, shape=[95], dtype=dtype), name="Qscale", dtype=dtype) 91 | 92 | #embedding blocks and output layers 93 | self._interaction_block = [] 94 | self._output_block = [] 95 | for i in range(num_blocks): 96 | self.interaction_block.append( 97 | InteractionBlock(K, F, num_residual_atomic, num_residual_interaction, activation_fn=activation_fn, seed=seed, scope="interaction_block"+str(i), keep_prob=self.keep_prob, dtype=dtype)) 98 | self.output_block.append( 99 | OutputBlock(F, num_residual_output, activation_fn=activation_fn, seed=seed, scope="output_block"+str(i), keep_prob=self.keep_prob, dtype=dtype)) 100 | 101 | #saver node to save/restore the model 102 | self._saver = tf.train.Saver(self.variables, save_relative_paths=True, max_to_keep=50) 103 | 104 | def calculate_interatomic_distances(self, R, idx_i, idx_j, offsets=None): 105 | #calculate interatomic distances 106 | Ri = tf.gather(R, idx_i) 107 | Rj = tf.gather(R, idx_j) 108 | if offsets is not None: 109 | Rj += offsets 110 | Dij = tf.sqrt(tf.nn.relu(tf.reduce_sum((Ri-Rj)**2, -1))) #relu prevents negative numbers in sqrt 111 | return Dij 112 | 113 | #calculates the atomic energies, charges and distances (needed if unscaled charges are wanted e.g. for loss function) 114 | def atomic_properties(self, Z, R, idx_i, idx_j, offsets=None, sr_idx_i=None, sr_idx_j=None, sr_offsets=None): 115 | with tf.name_scope("atomic_properties"): 116 | #calculate distances (for long range interaction) 117 | Dij_lr = self.calculate_interatomic_distances(R, idx_i, idx_j, offsets=offsets) 118 | #optionally, it is possible to calculate separate distances for short range interactions (computational efficiency) 119 | if sr_idx_i is not None and sr_idx_j is not None: 120 | Dij_sr = self.calculate_interatomic_distances(R, sr_idx_i, sr_idx_j, offsets=sr_offsets) 121 | else: 122 | sr_idx_i = idx_i 123 | sr_idx_j = idx_j 124 | Dij_sr = Dij_lr 125 | 126 | #calculate radial basis function expansion 127 | rbf = self.rbf_layer(Dij_sr) 128 | 129 | #initialize feature vectors according to embeddings for nuclear charges 130 | x = tf.gather(self.embeddings, Z) 131 | 132 | #apply blocks 133 | Ea = 0 #atomic energy 134 | Qa = 0 #atomic charge 135 | nhloss = 0 #non-hierarchicality loss 136 | for i in range(self.num_blocks): 137 | x = self.interaction_block[i](x, rbf, sr_idx_i, sr_idx_j) 138 | out = self.output_block[i](x) 139 | Ea += out[:,0] 140 | Qa += out[:,1] 141 | #compute non-hierarchicality loss 142 | out2 = out**2 143 | if i > 0: 144 | nhloss += tf.reduce_mean(out2/(out2 + lastout2 + 1e-7)) 145 | lastout2 = out2 146 | 147 | #apply scaling/shifting 148 | Ea = tf.gather(self.Escale, Z) * Ea + tf.gather(self.Eshift, Z) + 0*tf.reduce_sum(R, -1) #last term necessary to guarantee no "None" in force evaluation 149 | Qa = tf.gather(self.Qscale, Z) * Qa + tf.gather(self.Qshift, Z) 150 | return Ea, Qa, Dij_lr, nhloss 151 | 152 | #calculates the energy given the scaled atomic properties (in order to prevent recomputation if atomic properties are calculated) 153 | def energy_from_scaled_atomic_properties(self, Ea, Qa, Dij, Z, idx_i, idx_j, batch_seg=None): 154 | with tf.name_scope("energy_from_atomic_properties"): 155 | if batch_seg is None: 156 | batch_seg = tf.zeros_like(Z) 157 | #add electrostatic and dispersion contribution to atomic energy 158 | if self.use_electrostatic: 159 | Ea += self.electrostatic_energy_per_atom(Dij, Qa, idx_i, idx_j) 160 | if self.use_dispersion: 161 | if self.lr_cut is not None: 162 | Ea += d3_autoev*edisp(Z, Dij/d3_autoang, idx_i, idx_j, s6=self.s6, s8=self.s8, a1=self.a1, a2=self.a2, cutoff=self.lr_cut/d3_autoang) 163 | else: 164 | Ea += d3_autoev*edisp(Z, Dij/d3_autoang, idx_i, idx_j, s6=self.s6, s8=self.s8, a1=self.a1, a2=self.a2) 165 | return tf.squeeze(tf.segment_sum(Ea, batch_seg)) 166 | 167 | #calculates the energy and forces given the scaled atomic atomic properties (in order to prevent recomputation if atomic properties are calculated) 168 | def energy_and_forces_from_scaled_atomic_properties(self, Ea, Qa, Dij, Z, R, idx_i, idx_j, batch_seg=None): 169 | with tf.name_scope("energy_and_forces_from_atomic_properties"): 170 | energy = self.energy_from_scaled_atomic_properties(Ea, Qa, Dij, Z, idx_i, idx_j, batch_seg) 171 | forces = -tf.convert_to_tensor(tf.gradients(tf.reduce_sum(energy), R)[0]) 172 | return energy, forces 173 | 174 | #calculates the energy given the atomic properties (in order to prevent recomputation if atomic properties are calculated) 175 | def energy_from_atomic_properties(self, Ea, Qa, Dij, Z, idx_i, idx_j, Q_tot=None, batch_seg=None): 176 | with tf.name_scope("energy_from_atomic_properties"): 177 | if batch_seg is None: 178 | batch_seg = tf.zeros_like(Z) 179 | #scale charges such that they have the desired total charge 180 | Qa = self.scaled_charges(Z, Qa, Q_tot, batch_seg) 181 | return self.energy_from_scaled_atomic_properties(Ea, Qa, Dij, Z, idx_i, idx_j, batch_seg) 182 | 183 | #calculates the energy and force given the atomic properties (in order to prevent recomputation if atomic properties are calculated) 184 | def energy_and_forces_from_atomic_properties(self, Ea, Qa, Dij, Z, R, idx_i, idx_j, Q_tot=None, batch_seg=None): 185 | with tf.name_scope("energy_and_forces_from_atomic_properties"): 186 | energy = self.energy_from_atomic_properties(Ea, Qa, Dij, Z, idx_i, idx_j, Q_tot, batch_seg) 187 | forces = -tf.convert_to_tensor(tf.gradients(tf.reduce_sum(energy), R)[0]) 188 | return energy, forces 189 | 190 | #calculates the total energy (including electrostatic interactions) 191 | def energy(self, Z, R, idx_i, idx_j, Q_tot=None, batch_seg=None, offsets=None, sr_idx_i=None, sr_idx_j=None, sr_offsets=None): 192 | with tf.name_scope("energy"): 193 | Ea, Qa, Dij, _ = self.atomic_properties(Z, R, idx_i, idx_j, offsets, sr_idx_i, sr_idx_j, sr_offsets) 194 | energy = self.energy_from_atomic_properties(Ea, Qa, Dij, Z, idx_i, idx_j, Q_tot, batch_seg) 195 | return energy 196 | 197 | #calculates the total energy and forces (including electrostatic interactions) 198 | def energy_and_forces(self, Z, R, idx_i, idx_j, Q_tot=None, batch_seg=None, offsets=None, sr_idx_i=None, sr_idx_j=None, sr_offsets=None): 199 | with tf.name_scope("energy_and_forces"): 200 | Ea, Qa, Dij, _ = self.atomic_properties(Z, R, idx_i, idx_j, offsets, sr_idx_i, sr_idx_j, sr_offsets) 201 | energy, forces = self.energy_and_forces_from_atomic_properties(Ea, Qa, Dij, Z, R, idx_i, idx_j, Q_tot, batch_seg) 202 | return energy, forces 203 | 204 | #returns scaled charges such that the sum of the partial atomic charges equals Q_tot (defaults to 0) 205 | def scaled_charges(self, Z, Qa, Q_tot=None, batch_seg=None): 206 | with tf.name_scope("scaled_charges"): 207 | if batch_seg is None: 208 | batch_seg = tf.zeros_like(Z) 209 | #number of atoms per batch (needed for charge scaling) 210 | Na_per_batch = tf.segment_sum(tf.ones_like(batch_seg, dtype=self.dtype), batch_seg) 211 | if Q_tot is None: #assume desired total charge zero if not given 212 | Q_tot = tf.zeros_like(Na_per_batch, dtype=self.dtype) 213 | #return scaled charges (such that they have the desired total charge) 214 | return Qa + tf.gather(((Q_tot-tf.segment_sum(Qa, batch_seg))/Na_per_batch), batch_seg) 215 | 216 | #switch function for electrostatic interaction (switches between shielded and unshielded electrostatic interaction) 217 | def _switch(self, Dij): 218 | cut = self.sr_cut/2 219 | x = Dij/cut 220 | x3 = x*x*x 221 | x4 = x3*x 222 | x5 = x4*x 223 | return tf.where(Dij < cut, 6*x5-15*x4+10*x3, tf.ones_like(Dij)) 224 | 225 | #calculates the electrostatic energy per atom 226 | #for very small distances, the 1/r law is shielded to avoid singularities 227 | def electrostatic_energy_per_atom(self, Dij, Qa, idx_i, idx_j): 228 | #gather charges 229 | Qi = tf.gather(Qa, idx_i) 230 | Qj = tf.gather(Qa, idx_j) 231 | #calculate variants of Dij which we need to calculate 232 | #the various shileded/non-shielded potentials 233 | DijS = tf.sqrt(Dij*Dij + 1.0) #shielded distance 234 | #calculate value of switching function 235 | switch = self._switch(Dij) #normal switch 236 | cswitch = 1.0-switch #complementary switch 237 | #calculate shielded/non-shielded potentials 238 | if self.lr_cut is None: #no non-bonded cutoff 239 | Eele_ordinary = 1.0/Dij #ordinary electrostatic energy 240 | Eele_shielded = 1.0/DijS #shielded electrostatic energy 241 | #combine shielded and ordinary interactions and apply prefactors 242 | Eele = self.kehalf*Qi*Qj*(cswitch*Eele_shielded + switch*Eele_ordinary) 243 | else: #with non-bonded cutoff 244 | cut = self.lr_cut 245 | cut2 = self.lr_cut*self.lr_cut 246 | Eele_ordinary = 1.0/Dij + Dij/cut2 - 2.0/cut 247 | Eele_shielded = 1.0/DijS + DijS/cut2 - 2.0/cut 248 | #combine shielded and ordinary interactions and apply prefactors 249 | Eele = self.kehalf*Qi*Qj*(cswitch*Eele_shielded + switch*Eele_ordinary) 250 | Eele = tf.where(Dij <= cut, Eele, tf.zeros_like(Eele)) 251 | return tf.segment_sum(Eele, idx_i) 252 | 253 | #save the current model 254 | def save(self, sess, save_path, global_step=None): 255 | self.saver.save(sess, save_path, global_step) 256 | 257 | #load a model 258 | def restore(self, sess, save_path): 259 | self.saver.restore(sess, save_path) 260 | 261 | @property 262 | def keep_prob(self): 263 | return self._keep_prob 264 | 265 | @property 266 | def num_blocks(self): 267 | return self._num_blocks 268 | 269 | @property 270 | def dtype(self): 271 | return self._dtype 272 | 273 | @property 274 | def saver(self): 275 | return self._saver 276 | 277 | @property 278 | def scope(self): 279 | return self._scope 280 | 281 | @property 282 | def variables(self): 283 | scope_filter = self.scope + '/' 284 | varlist = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope_filter) 285 | return { v.name[len(scope_filter):]: v for v in varlist } 286 | 287 | @property 288 | def embeddings(self): 289 | return self._embeddings 290 | 291 | @property 292 | def Eshift(self): 293 | return self._Eshift 294 | 295 | @property 296 | def Escale(self): 297 | return self._Escale 298 | 299 | @property 300 | def Qshift(self): 301 | return self._Qshift 302 | 303 | @property 304 | def Qscale(self): 305 | return self._Qscale 306 | 307 | @property 308 | def s6(self): 309 | return self._s6 310 | 311 | @property 312 | def s8(self): 313 | return self._s8 314 | 315 | @property 316 | def a1(self): 317 | return self._a1 318 | 319 | @property 320 | def a2(self): 321 | return self._a2 322 | 323 | @property 324 | def use_electrostatic(self): 325 | return self._use_electrostatic 326 | 327 | @property 328 | def use_dispersion(self): 329 | return self._use_dispersion 330 | 331 | @property 332 | def kehalf(self): 333 | return self._kehalf 334 | 335 | @property 336 | def F(self): 337 | return self._F 338 | 339 | @property 340 | def K(self): 341 | return self._K 342 | 343 | @property 344 | def sr_cut(self): 345 | return self._sr_cut 346 | 347 | @property 348 | def lr_cut(self): 349 | return self._lr_cut 350 | 351 | @property 352 | def activation_fn(self): 353 | return self._activation_fn 354 | 355 | @property 356 | def rbf_layer(self): 357 | return self._rbf_layer 358 | 359 | @property 360 | def interaction_block(self): 361 | return self._interaction_block 362 | 363 | @property 364 | def output_block(self): 365 | return self._output_block 366 | 367 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import tensorflow as tf 3 | import numpy as np 4 | import os 5 | import sys 6 | import argparse 7 | import logging 8 | import string 9 | import random 10 | from shutil import copyfile 11 | from datetime import datetime 12 | from neural_network.NeuralNetwork import * 13 | from neural_network.activation_fn import * 14 | from training.Trainer import * 15 | from training.DataContainer import * 16 | from training.DataProvider import * 17 | from training.DataQueue import * 18 | 19 | #used for creating a "unique" id for a run (almost impossible to generate the same twice) 20 | def id_generator(size=8, chars=string.ascii_uppercase + string.ascii_lowercase + string.digits): 21 | return ''.join(random.SystemRandom().choice(chars) for _ in range(size)) 22 | 23 | logging.basicConfig(filename='train.log',level=logging.DEBUG) 24 | 25 | #define command line arguments 26 | parser = argparse.ArgumentParser(fromfile_prefix_chars='@') 27 | parser.add_argument("--restart", type=str, default=None, help="restart training from a specific folder") 28 | parser.add_argument("--num_features", type=int, help="dimensionality of feature vectors") 29 | parser.add_argument("--num_basis", type=int, help="number of radial basis functions") 30 | parser.add_argument("--num_blocks", type=int, help="number of interaction blocks") 31 | parser.add_argument("--num_residual_atomic", type=int, help="number of residual layers for atomic refinements") 32 | parser.add_argument("--num_residual_interaction", type=int, help="number of residual layers for the message phase") 33 | parser.add_argument("--num_residual_output", type=int, help="number of residual layers for the output blocks") 34 | parser.add_argument("--cutoff", default=10.0, type=float, help="cutoff distance for short range interactions") 35 | parser.add_argument("--use_electrostatic", default=1, type=int, help="use electrostatics in energy prediction (0/1)") 36 | parser.add_argument("--use_dispersion", default=1, type=int, help="use dispersion in energy prediction (0/1)") 37 | parser.add_argument("--grimme_s6", default=None, type=float, help="grimme s6 dispersion coefficient") 38 | parser.add_argument("--grimme_s8", default=None, type=float, help="grimme s8 dispersion coefficient") 39 | parser.add_argument("--grimme_a1", default=None, type=float, help="grimme a1 dispersion coefficient") 40 | parser.add_argument("--grimme_a2", default=None, type=float, help="grimme a2 dispersion coefficient") 41 | parser.add_argument("--dataset", type=str, help="file path to dataset") 42 | parser.add_argument("--num_train", type=int, help="number of training samples") 43 | parser.add_argument("--num_valid", type=int, help="number of validation samples") 44 | parser.add_argument("--seed", default=42, type=int, help="seed for splitting dataset into training/validation/test") 45 | parser.add_argument("--max_steps", type=int, help="maximum number of training steps") 46 | parser.add_argument("--learning_rate", default=0.001, type=float, help="learning rate used by the optimizer") 47 | parser.add_argument("--max_norm", default=1000.0, type=float, help="max norm for gradient clipping") 48 | parser.add_argument("--ema_decay", default=0.999, type=float, help="exponential moving average decay used by the trainer") 49 | parser.add_argument("--keep_prob", default=1.0, type=float, help="keep probability for dropout regularization of rbf layer") 50 | parser.add_argument("--l2lambda", type=float, help="lambda multiplier for l2 loss (regularization)") 51 | parser.add_argument("--nhlambda", type=float, help="lambda multiplier for non-hierarchicality loss (regularization)") 52 | parser.add_argument("--decay_steps", type=int, help="decay the learning rate every N steps by decay_rate") 53 | parser.add_argument("--decay_rate", type=float, help="factor with which the learning rate gets multiplied by every decay_steps steps") 54 | parser.add_argument("--batch_size", type=int, help="batch size used per training step") 55 | parser.add_argument("--valid_batch_size", type=int, help="batch size used for going through validation_set") 56 | parser.add_argument('--force_weight', default=52.91772105638412, type=float, help="this defines the force contribution to the loss function relative to the energy contribution (to take into account the different numerical range)") 57 | parser.add_argument('--charge_weight', default=14.399645351950548, type=float, help="this defines the charge contribution to the loss function relative to the energy contribution (to take into account the different numerical range)") 58 | parser.add_argument('--dipole_weight', default=27.211386024367243, type=float, help="this defines the dipole contribution to the loss function relative to the energy contribution (to take into account the different numerical range)") 59 | parser.add_argument('--summary_interval', type=int, help="write a summary every N steps") 60 | parser.add_argument('--validation_interval', type=int, help="check performance on validation set every N steps") 61 | parser.add_argument('--save_interval', type=int, help="save progress every N steps") 62 | parser.add_argument('--record_run_metadata', type=int, help="records metadata like memory consumption etc.") 63 | 64 | #if no command line arguments are present, config file is parsed 65 | config_file='config.txt' 66 | if len(sys.argv) == 1: 67 | if os.path.isfile(config_file): 68 | args = parser.parse_args(["@"+config_file]) 69 | else: 70 | args = parser.parse_args(["--help"]) 71 | else: 72 | args = parser.parse_args() 73 | 74 | #create directories 75 | #a unique directory name is created for this run based on the input 76 | if args.restart is None: 77 | directory=datetime.utcnow().strftime("%Y%m%d%H%M%S") + "_" + id_generator() +"_F"+str(args.num_features)+"K"+str(args.num_basis)+"b"+str(args.num_blocks)+"a"+str(args.num_residual_atomic)+"i"+str(args.num_residual_interaction)+"o"+str(args.num_residual_output)+"cut"+str(args.cutoff)+"e"+str(args.use_electrostatic)+"d"+str(args.use_dispersion)+"l2"+str(args.l2lambda)+"nh"+str(args.nhlambda)+"keep"+str(args.keep_prob) 78 | else: 79 | directory=args.restart 80 | 81 | logging.info("creating directories...") 82 | if not os.path.exists(directory): 83 | os.makedirs(directory) 84 | best_dir = os.path.join(directory, 'best') 85 | if not os.path.exists(best_dir): 86 | os.makedirs(best_dir) 87 | log_dir = os.path.join(directory, 'logs') 88 | if not os.path.exists(log_dir): 89 | os.makedirs(log_dir) 90 | best_loss_file = os.path.join(best_dir, 'best_loss.npz') 91 | best_checkpoint = os.path.join(best_dir, 'best_model.ckpt') 92 | step_checkpoint = os.path.join(log_dir, 'model.ckpt') 93 | 94 | #write config file (to restore command line arguments) 95 | logging.info("writing args to file...") 96 | with open(os.path.join(directory, config_file), 'w') as f: 97 | for arg in vars(args): 98 | f.write('--'+ arg + '='+ str(getattr(args, arg)) + "\n") 99 | 100 | #load dataset 101 | logging.info("loading dataset...") 102 | data = DataContainer(args.dataset) 103 | 104 | #generate DataProvider (splits dataset into training, validation and test set based on seed) 105 | data_provider = DataProvider(data, args.num_train, args.num_valid, args.batch_size, args.valid_batch_size, seed=args.seed) 106 | 107 | #create neural network 108 | logging.info("creating neural network...") 109 | nn = NeuralNetwork(F=args.num_features, 110 | K=args.num_basis, 111 | sr_cut=args.cutoff, 112 | num_blocks=args.num_blocks, 113 | num_residual_atomic=args.num_residual_atomic, 114 | num_residual_interaction=args.num_residual_interaction, 115 | num_residual_output=args.num_residual_output, 116 | use_electrostatic=(args.use_electrostatic==1), 117 | use_dispersion=(args.use_dispersion==1), 118 | s6=args.grimme_s6, 119 | s8=args.grimme_s8, 120 | a1=args.grimme_a1, 121 | a2=args.grimme_a2, 122 | Eshift=data_provider.EperA_mean, 123 | Escale=data_provider.EperA_stdev, 124 | activation_fn=shifted_softplus, 125 | seed=None, 126 | scope="neural_network") 127 | 128 | logging.info("prepare training...") 129 | #generate data queues for efficient training 130 | train_queue = DataQueue(data_provider.next_batch, capacity=1000, scope="train_data_queue") 131 | valid_queue = DataQueue(data_provider.next_valid_batch, capacity=args.num_valid//args.valid_batch_size, scope="valid_data_queue") 132 | 133 | #get values for training and validation set 134 | Eref_t, Earef_t, Fref_t, Z_t, Dref_t, Qref_t, Qaref_t, R_t, idx_i_t, idx_j_t, batch_seg_t = train_queue.dequeue_op 135 | Eref_v, Earef_v, Fref_v, Z_v, Dref_v, Qref_v, Qaref_v, R_v, idx_i_v, idx_j_v, batch_seg_v = valid_queue.dequeue_op 136 | 137 | #calculate all necessary quantities (unscaled partial charges, energies, forces) 138 | Ea_t, Qa_t, Dij_t, nhloss_t = nn.atomic_properties(Z_t, R_t, idx_i_t, idx_j_t) 139 | Ea_v, Qa_v, Dij_v, nhloss_v = nn.atomic_properties(Z_v, R_v, idx_i_v, idx_j_v) 140 | energy_t, forces_t = nn.energy_and_forces_from_atomic_properties(Ea_t, Qa_t, Dij_t, Z_t, R_t, idx_i_t, idx_j_t, Qref_t, batch_seg_t) 141 | energy_v, forces_v = nn.energy_and_forces_from_atomic_properties(Ea_v, Qa_v, Dij_v, Z_v, R_v, idx_i_v, idx_j_v, Qref_v, batch_seg_v) 142 | #total charge 143 | Qtot_t = tf.segment_sum(Qa_t, batch_seg_t) 144 | Qtot_v = tf.segment_sum(Qa_v, batch_seg_v) 145 | #dipole moment vector 146 | QR_t = tf.stack([Qa_t*R_t[:,0], Qa_t*R_t[:,1], Qa_t*R_t[:,2]],1) 147 | QR_v = tf.stack([Qa_v*R_v[:,0], Qa_v*R_v[:,1], Qa_v*R_v[:,2]],1) 148 | D_t = tf.segment_sum(QR_t, batch_seg_t) 149 | D_v = tf.segment_sum(QR_v, batch_seg_v) 150 | 151 | #function to calculate loss, mean squared error, mean absolute error between two values 152 | def calculate_errors(val1, val2, weights=1): 153 | with tf.name_scope("calculate_errors"): 154 | delta = tf.abs(val1-val2) 155 | delta2 = delta**2 156 | mse = tf.reduce_mean(delta2) 157 | mae = tf.reduce_mean(delta) 158 | loss = mae #mean absolute error loss 159 | return loss, mse, mae 160 | 161 | with tf.name_scope("loss"): 162 | #calculate energy, force, charge and dipole errors/loss 163 | #energy 164 | if data.E is not None: 165 | eloss_t, emse_t, emae_t = calculate_errors(Eref_t, energy_t) 166 | eloss_v, emse_v, emae_v = calculate_errors(Eref_v, energy_v) 167 | else: 168 | eloss_t, emse_t, emae_t = tf.constant(0.0), tf.constant(0.0), tf.constant(0.0) 169 | eloss_v, emse_v, emae_v = tf.constant(0.0), tf.constant(0.0), tf.constant(0.0) 170 | #atomic energies 171 | if data.Ea is not None: 172 | ealoss_t, eamse_t, eamae_t = calculate_errors(Earef_t, Ea_t) 173 | ealoss_v, eamse_v, eamae_v = calculate_errors(Earef_v, Ea_v) 174 | else: 175 | ealoss_t, eamse_t, eamae_t = tf.constant(0.0), tf.constant(0.0), tf.constant(0.0) 176 | ealoss_v, eamse_v, eamae_v = tf.constant(0.0), tf.constant(0.0), tf.constant(0.0) 177 | #forces 178 | if data.F is not None: 179 | floss_t, fmse_t, fmae_t = calculate_errors(Fref_t, forces_t) 180 | floss_v, fmse_v, fmae_v = calculate_errors(Fref_v, forces_v) 181 | else: 182 | floss_t, fmse_t, fmae_t = tf.constant(0.0), tf.constant(0.0), tf.constant(0.0) 183 | floss_v, fmse_v, fmae_v = tf.constant(0.0), tf.constant(0.0), tf.constant(0.0) 184 | #charge 185 | if data.Q is not None: 186 | qloss_t, qmse_t, qmae_t = calculate_errors(Qref_t, Qtot_t) 187 | qloss_v, qmse_v, qmae_v = calculate_errors(Qref_v, Qtot_v) 188 | else: 189 | qloss_t, qmse_t, qmae_t = tf.constant(0.0), tf.constant(0.0), tf.constant(0.0) 190 | qloss_v, qmse_v, qmae_v = tf.constant(0.0), tf.constant(0.0), tf.constant(0.0) 191 | #atomic charges 192 | if data.Qa is not None: 193 | qaloss_t, qamse_t, qamae_t = calculate_errors(Qaref_t, Qa_t) 194 | qaloss_v, qamse_v, qamae_v = calculate_errors(Qaref_v, Qa_v) 195 | else: 196 | qaloss_t, qamse_t, qamae_t = tf.constant(0.0), tf.constant(0.0), tf.constant(0.0) 197 | qaloss_v, qamse_v, qamae_v = tf.constant(0.0), tf.constant(0.0), tf.constant(0.0) 198 | #dipole 199 | if data.D is not None: 200 | dloss_t, dmse_t, dmae_t = calculate_errors(Dref_t, D_t) 201 | dloss_v, dmse_v, dmae_v = calculate_errors(Dref_v, D_v) 202 | else: 203 | dloss_t, dmse_t, dmae_t = tf.constant(0.0), tf.constant(0.0), tf.constant(0.0) 204 | dloss_v, dmse_v, dmae_v = tf.constant(0.0), tf.constant(0.0), tf.constant(0.0) 205 | 206 | #define additional variables (such that certain losses can be overwritten) 207 | eloss_train = eloss_t 208 | floss_train = floss_t 209 | qloss_train = qloss_t 210 | dloss_train = dloss_t 211 | eloss_valid = eloss_v 212 | floss_valid = floss_v 213 | qloss_valid = qloss_v 214 | dloss_valid = dloss_v 215 | 216 | #atomic energies are present, so they replace the normal energy loss 217 | if data.Ea is not None: 218 | eloss_train = ealoss_t 219 | eloss_valid = ealoss_v 220 | 221 | #atomic charges are present, so they replace the normal charge loss and nullify dipole loss 222 | if data.Qa is not None: 223 | qloss_train = qaloss_t 224 | qloss_valid = qaloss_v 225 | dloss_train = tf.constant(0.0) 226 | dloss_valid = tf.constant(0.0) 227 | 228 | #define loss function (used to train the model) 229 | l2loss = tf.reduce_mean(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) 230 | loss_t = eloss_train + args.force_weight*floss_train + args.charge_weight*qloss_train + args.dipole_weight*dloss_train + args.nhlambda*nhloss_t + args.l2lambda*l2loss 231 | loss_v = eloss_valid + args.force_weight*floss_valid + args.charge_weight*qloss_valid + args.dipole_weight*dloss_valid + args.nhlambda*nhloss_v + args.l2lambda*l2loss 232 | 233 | #create trainer 234 | trainer = Trainer(args.learning_rate, args.decay_steps, args.decay_rate, scope="trainer") 235 | with tf.name_scope("trainer_ops"): 236 | train_op = trainer.build_train_op(loss_t, args.ema_decay, args.max_norm) 237 | save_variable_backups_op = trainer.save_variable_backups() 238 | load_averaged_variables_op = trainer.load_averaged_variables() 239 | restore_variable_backups_op = trainer.restore_variable_backups() 240 | 241 | #creates a summary from key-value pairs given a dictionary 242 | def create_summary(dictionary): 243 | summary = tf.Summary() 244 | for key, value in dictionary.items(): 245 | summary.value.add(tag=key, simple_value=value) 246 | return summary 247 | 248 | #create summary writer 249 | nn_summary_op = tf.summary.merge_all() 250 | summary_writer = tf.summary.FileWriter(logdir=log_dir, graph=tf.get_default_graph()) 251 | 252 | #create saver 253 | with tf.name_scope("saver"): 254 | saver = tf.train.Saver(max_to_keep=50) 255 | 256 | #save/load best recorded loss (only the best model is saved) 257 | if os.path.isfile(best_loss_file): 258 | loss_file = np.load(best_loss_file) 259 | best_loss = loss_file["loss"].item() 260 | best_emae = loss_file["emae"].item() 261 | best_ermse = loss_file["ermse"].item() 262 | best_fmae = loss_file["fmae"].item() 263 | best_frmse = loss_file["frmse"].item() 264 | best_qmae = loss_file["qmae"].item() 265 | best_qrmse = loss_file["qrmse"].item() 266 | best_dmae = loss_file["dmae"].item() 267 | best_drmse = loss_file["drmse"].item() 268 | best_step = loss_file["step"].item() 269 | else: 270 | best_loss = np.Inf #initialize best loss to infinity 271 | best_emae = np.Inf 272 | best_ermse = np.Inf 273 | best_fmae = np.Inf 274 | best_frmse = np.Inf 275 | best_qmae = np.Inf 276 | best_qrmse = np.Inf 277 | best_dmae = np.Inf 278 | best_drmse = np.Inf 279 | best_step = 0. 280 | np.savez(best_loss_file, loss=best_loss, emae=best_emae, ermse=best_ermse, 281 | fmae=best_fmae, frmse=best_frmse, 282 | qmae=best_qmae, qrmse=best_qrmse, 283 | dmae=best_dmae, drmse=best_drmse, 284 | step=best_step) 285 | 286 | #for calculating average performance on the training set 287 | def reset_averages(): 288 | return 0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 289 | 290 | def update_averages(num, tmploss_avg, tmploss, emse_avg, emse, emae_avg, emae, fmse_avg, fmse, fmae_avg, fmae, 291 | qmse_avg, qmse, qmae_avg, qmae, dmse_avg, dmse, dmae_avg, dmae): 292 | num += 1 293 | tmploss_avg += (tmploss-tmploss_avg)/num 294 | emse_avg += (emse-emse_avg)/num 295 | emae_avg += (emae-emae_avg)/num 296 | fmse_avg += (fmse-fmse_avg)/num 297 | fmae_avg += (fmae-fmae_avg)/num 298 | qmse_avg += (qmse-qmse_avg)/num 299 | qmae_avg += (qmae-qmae_avg)/num 300 | dmse_avg += (dmse-dmse_avg)/num 301 | dmae_avg += (dmae-dmae_avg)/num 302 | return num, tmploss_avg, emse_avg, emae_avg, fmse_avg, fmae_avg, qmse_avg, qmae_avg, dmse_avg, dmae_avg 303 | 304 | #initialize training set error averages 305 | num_t, tmploss_avg_t, emse_avg_t, emae_avg_t, fmse_avg_t, fmae_avg_t, qmse_avg_t, qmae_avg_t, dmse_avg_t, dmae_avg_t = reset_averages() 306 | 307 | #create tensorflow session 308 | with tf.Session() as sess: 309 | if (args.record_run_metadata > 0): 310 | run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) 311 | run_metadata = tf.RunMetadata() 312 | else: 313 | run_options = None 314 | run_metadata = None 315 | 316 | #start data queues 317 | coord = tf.train.Coordinator() 318 | train_queue.create_thread(sess, coord) 319 | valid_queue.create_thread(sess, coord) 320 | 321 | #initialize variables 322 | tf.global_variables_initializer().run() 323 | 324 | #restore latest checkpoint 325 | checkpoint = tf.train.latest_checkpoint(log_dir) 326 | if checkpoint is not None: 327 | step = int(checkpoint.split('-')[-1]) #reads step from checkpoint filename 328 | saver.restore(sess, checkpoint) 329 | sess.run(trainer.global_step.assign(step)) 330 | else: 331 | step = 0 332 | 333 | #training loop 334 | logging.info("starting training...") 335 | while not coord.should_stop(): 336 | #finish training when maximum number of iterations is reached 337 | if step > args.max_steps: 338 | coord.request_stop() 339 | break 340 | 341 | #perform training step 342 | step += 1 343 | _, tmploss, emse, emae, fmse, fmae, qmse, qmae, dmse, dmae = sess.run([train_op, loss_t, emse_t, emae_t, fmse_t, fmae_t, qmse_t, qmae_t, dmse_t, dmae_t], options=run_options, feed_dict={nn.keep_prob: args.keep_prob}, run_metadata=run_metadata) 344 | 345 | #update averages 346 | num_t, tmploss_avg_t, emse_avg_t, emae_avg_t, fmse_avg_t, fmae_avg_t, qmse_avg_t, qmae_avg_t, dmse_avg_t, dmae_avg_t = update_averages(num_t, tmploss_avg_t, tmploss, emse_avg_t, emse, emae_avg_t, emae, fmse_avg_t, fmse, fmae_avg_t, fmae, qmse_avg_t, qmse, qmae_avg_t, qmae, dmse_avg_t, dmse, dmae_avg_t, dmae) 347 | 348 | #save progress 349 | if (step % args.save_interval == 0): 350 | saver.save(sess, step_checkpoint, global_step=step) 351 | 352 | #check performance on the validation set 353 | if (step % args.validation_interval == 0): 354 | #save backup variables and load averaged variables 355 | sess.run(save_variable_backups_op) 356 | sess.run(load_averaged_variables_op) 357 | 358 | #initialize averages to 0 359 | num_v, tmploss_avg_v, emse_avg_v, emae_avg_v, fmse_avg_v, fmae_avg_v, qmse_avg_v, qmae_avg_v, dmse_avg_v, dmae_avg_v = reset_averages() 360 | #compute averages 361 | for i in range(args.num_valid//args.valid_batch_size): 362 | tmploss, emse, emae, fmse, fmae, qmse, qmae, dmse, dmae = sess.run([loss_v, emse_v, emae_v, fmse_v, fmae_v, qmse_v, qmae_v, dmse_v, dmae_v]) 363 | num_v, tmploss_avg_v, emse_avg_v, emae_avg_v, fmse_avg_v, fmae_avg_v, qmse_avg_v, qmae_avg_v, dmse_avg_v, dmae_avg_v = update_averages(num_v, tmploss_avg_v, tmploss, emse_avg_v, emse, emae_avg_v, emae, fmse_avg_v, fmse, fmae_avg_v, fmae, qmse_avg_v, qmse, qmae_avg_v, qmae, dmse_avg_v, dmse, dmae_avg_v, dmae) 364 | 365 | #store results in dictionary 366 | results = {} 367 | results["loss_valid"] = tmploss_avg_v 368 | if data.E is not None: 369 | results["energy_mae_valid"] = emae_avg_v 370 | results["energy_rmse_valid"] = np.sqrt(emse_avg_v) 371 | if data.F is not None: 372 | results["forces_mae_valid"] = fmae_avg_v 373 | results["forces_rmse_valid"] = np.sqrt(fmse_avg_v) 374 | if data.Q is not None: 375 | results["charge_mae_valid"] = qmae_avg_v 376 | results["charge_rmse_valid"] = np.sqrt(qmse_avg_v) 377 | if data.D is not None: 378 | results["dipole_mae_valid"] = dmae_avg_v 379 | results["dipole_rmse_valid"] = np.sqrt(dmse_avg_v) 380 | 381 | if results["loss_valid"] < best_loss: 382 | best_loss = results["loss_valid"] 383 | if data.E is not None: 384 | best_emae = results["energy_mae_valid"] 385 | best_ermse = results["energy_rmse_valid"] 386 | else: 387 | best_emae = np.Inf 388 | best_ermse = np.Inf 389 | if data.F is not None: 390 | best_fmae = results["forces_mae_valid"] 391 | best_frmse = results["forces_rmse_valid"] 392 | else: 393 | best_fmae = np.Inf 394 | best_frmse = np.Inf 395 | if data.Q is not None: 396 | best_qmae = results["charge_mae_valid"] 397 | best_qrmse = results["charge_rmse_valid"] 398 | else: 399 | best_qmae = np.Inf 400 | best_qrmse = np.Inf 401 | if data.D is not None: 402 | best_dmae = results["dipole_mae_valid"] 403 | best_drmse = results["dipole_rmse_valid"] 404 | else: 405 | best_dmae = np.Inf 406 | best_drmse = np.Inf 407 | best_step = step 408 | np.savez(best_loss_file, loss=best_loss, emae=best_emae, ermse=best_ermse, 409 | fmae=best_fmae, frmse=best_frmse, 410 | qmae=best_qmae, qrmse=best_qrmse, 411 | dmae=best_dmae, drmse=best_drmse, 412 | step=best_step) 413 | nn.save(sess, best_checkpoint, global_step=step) 414 | results["loss_best"] = best_loss 415 | if data.E is not None: 416 | results["energy_mae_best"] = best_emae 417 | results["energy_rmse_best"] = best_ermse 418 | if data.F is not None: 419 | results["forces_mae_best"] = best_fmae 420 | results["forces_rmse_best"] = best_frmse 421 | if data.Q is not None: 422 | results["charge_mae_best"] = best_qmae 423 | results["charge_rmse_best"] = best_qrmse 424 | if data.D is not None: 425 | results["dipole_mae_best"] = best_dmae 426 | results["dipole_rmse_best"] = best_drmse 427 | summary = create_summary(results) 428 | summary_writer.add_summary(summary, global_step=step) 429 | 430 | #restore backup variables 431 | sess.run(restore_variable_backups_op) 432 | 433 | #generate summaries 434 | if (step % args.summary_interval == 0) and (step > 0): 435 | results = {} 436 | results["loss_train"] = tmploss_avg_t 437 | if data.E is not None: 438 | results["energy_mae_train"] = emae_avg_t 439 | results["energy_rmse_train"] = np.sqrt(emse_avg_t) 440 | if data.F is not None: 441 | results["forces_mae_train"] = fmae_avg_t 442 | results["forces_rmse_train"] = np.sqrt(fmse_avg_t) 443 | if data.Q is not None: 444 | results["charge_mae_train"] = qmae_avg_t 445 | results["charge_rmse_train"] = np.sqrt(qmse_avg_t) 446 | if data.D is not None: 447 | results["dipole_mae_train"] = dmae_avg_t 448 | results["dipole_rmse_train"] = np.sqrt(dmse_avg_t) 449 | num_t, tmploss_avg_t, emse_avg_t, emae_avg_t, fmse_avg_t, fmae_avg_t, qmse_avg_t, qmae_avg_t, dmse_avg_t, dmae_avg_t = reset_averages() 450 | 451 | summary = create_summary(results) 452 | summary_writer.add_summary(summary, global_step=step) 453 | nn_summary = nn_summary_op.eval() 454 | summary_writer.add_summary(nn_summary, global_step=step) 455 | if (args.record_run_metadata > 0): 456 | summary_writer.add_run_metadata(run_metadata, 'step %d' % step, global_step=step) 457 | if data.E is not None: 458 | print(str(step)+'/'+str(args.max_steps), "loss:", results["loss_train"], "best:", best_loss, "emae:", results["energy_mae_train"], "best:", best_emae) 459 | --------------------------------------------------------------------------------