├── tftf ├── preprocessing │ ├── __init__.py │ └── sequence │ │ ├── __init__.py │ │ ├── Pad.py │ │ ├── sort.py │ │ └── pad_sequences.py ├── models │ ├── __init__.py │ ├── callbacks │ │ ├── __init__.py │ │ └── EarlyStopping.py │ ├── optimizers │ │ ├── adagrad.py │ │ ├── rmsprop.py │ │ ├── sgd.py │ │ ├── adadelta.py │ │ ├── adam.py │ │ ├── momentum.py │ │ └── __init__.py │ ├── metrics │ │ ├── __init__.py │ │ ├── accuracy.py │ │ ├── f1.py │ │ ├── recall.py │ │ └── precision.py │ ├── losses │ │ ├── mean_squared_error.py │ │ ├── __init__.py │ │ ├── categorical_crossentropy.py │ │ └── binary_crossentropy.py │ └── Model.py ├── layers │ ├── modules │ │ ├── Module.py │ │ ├── __init__.py │ │ ├── ResNet.py │ │ └── Transformer.py │ ├── activations │ │ ├── linear.py │ │ ├── elu.py │ │ ├── relu.py │ │ ├── selu.py │ │ ├── tanh.py │ │ ├── sigmoid.py │ │ ├── softmax.py │ │ ├── swish.py │ │ ├── leaky_relu.py │ │ ├── hard_sigmoid.py │ │ ├── prelu.py │ │ └── __init__.py │ ├── regularizers │ │ ├── __init__.py │ │ ├── l2.py │ │ ├── l1.py │ │ └── l1_l2.py │ ├── initializers │ │ ├── ones.py │ │ ├── zeros.py │ │ ├── __init__.py │ │ ├── normal.py │ │ ├── orthogonal.py │ │ ├── glorot_normal.py │ │ └── glorot_uniform.py │ ├── GlobalAveragePooling2D.py │ ├── Flatten.py │ ├── Activation.py │ ├── Dropout.py │ ├── __init__.py │ ├── TimeDistributedDense.py │ ├── Dense.py │ ├── Embedding.py │ ├── LayerNormalization.py │ ├── NALU.py │ ├── NAC.py │ ├── BatchNormalization.py │ ├── PositionalEncoding.py │ ├── MaxPooling2D.py │ ├── Conv2D.py │ ├── Attention.py │ ├── RNN.py │ ├── Layer.py │ └── LSTM.py ├── losses │ └── __init__.py ├── metrics │ └── __init__.py ├── optimizers │ └── __init__.py ├── activations │ └── __init__.py ├── initializers │ └── __init__.py ├── __init__.py └── datasets │ ├── __init__.py │ ├── Dataset.py │ ├── imdb.py │ ├── mnist.py │ └── small_parallel_enja.py ├── .gitignore ├── setup.py ├── examples ├── save_restore_model.py ├── lenet_mnist.py ├── lstm_imdb.py ├── mlp_mnist.py ├── lenet_advanced_mnist.py ├── rnn_sin.py ├── lstm_sin.py ├── transformer_translation_enja.py ├── low_level_example.py ├── lstm_translation_enja.py └── attention_translation_enja.py ├── README.md └── LICENSE /tftf/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tftf/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .Model import Model 2 | -------------------------------------------------------------------------------- /tftf/layers/modules/Module.py: -------------------------------------------------------------------------------- 1 | class Module(object): 2 | pass 3 | -------------------------------------------------------------------------------- /tftf/layers/activations/linear.py: -------------------------------------------------------------------------------- 1 | def linear(x): 2 | return x 3 | -------------------------------------------------------------------------------- /tftf/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # alias 2 | from ..models.losses import * 3 | -------------------------------------------------------------------------------- /tftf/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # alias 2 | from ..models.metrics import * 3 | -------------------------------------------------------------------------------- /tftf/models/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | from .EarlyStopping import EarlyStopping 2 | -------------------------------------------------------------------------------- /tftf/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | # alias 2 | from ..models.optimizers import * 3 | -------------------------------------------------------------------------------- /tftf/activations/__init__.py: -------------------------------------------------------------------------------- 1 | # alias 2 | from ..layers.activations import * 3 | -------------------------------------------------------------------------------- /tftf/initializers/__init__.py: -------------------------------------------------------------------------------- 1 | # alias 2 | from ..layers.initializers import * 3 | -------------------------------------------------------------------------------- /tftf/layers/activations/elu.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def elu(x): 5 | return tf.nn.elu(x) 6 | -------------------------------------------------------------------------------- /tftf/layers/regularizers/__init__.py: -------------------------------------------------------------------------------- 1 | from .l1 import l1 2 | from .l2 import l2 3 | from .l1_l2 import l1_l2 4 | -------------------------------------------------------------------------------- /tftf/layers/activations/relu.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def relu(x): 5 | return tf.nn.relu(x) 6 | -------------------------------------------------------------------------------- /tftf/layers/activations/selu.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def selu(x): 5 | return tf.nn.selu(x) 6 | -------------------------------------------------------------------------------- /tftf/layers/activations/tanh.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def tanh(x): 5 | return tf.nn.tanh(x) 6 | -------------------------------------------------------------------------------- /tftf/__init__.py: -------------------------------------------------------------------------------- 1 | from .datasets import * 2 | from .layers import * 3 | from .models import * 4 | 5 | __version__ = '0.0.29' 6 | -------------------------------------------------------------------------------- /tftf/layers/activations/sigmoid.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def sigmoid(x): 5 | return tf.nn.sigmoid(x) 6 | -------------------------------------------------------------------------------- /tftf/layers/activations/softmax.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def softmax(x): 5 | return tf.nn.softmax(x) 6 | -------------------------------------------------------------------------------- /tftf/layers/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .Module import Module 2 | from .ResNet import ResNet 3 | from .Transformer import Transformer 4 | -------------------------------------------------------------------------------- /tftf/models/optimizers/adagrad.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def adagrad(lr=0.01): 5 | return tf.train.AdagradOptimizer(lr) 6 | -------------------------------------------------------------------------------- /tftf/models/optimizers/rmsprop.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def rmsprop(lr=0.001): 5 | return tf.train.RMSPropOptimizer(lr) 6 | -------------------------------------------------------------------------------- /tftf/models/optimizers/sgd.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def sgd(lr=0.01): 5 | return tf.train.GradientDescentOptimizer(lr) 6 | -------------------------------------------------------------------------------- /tftf/preprocessing/sequence/__init__.py: -------------------------------------------------------------------------------- 1 | from .pad_sequences import pad_sequences 2 | from .sort import sort 3 | 4 | from .Pad import Pad 5 | -------------------------------------------------------------------------------- /tftf/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .imdb import load_imdb 2 | from .mnist import load_mnist 3 | from .small_parallel_enja import load_small_parallel_enja 4 | -------------------------------------------------------------------------------- /tftf/layers/activations/swish.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .sigmoid import sigmoid 3 | 4 | 5 | def swish(x): 6 | return x * sigmoid(x) 7 | -------------------------------------------------------------------------------- /tftf/layers/activations/leaky_relu.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def leaky_relu(x, alpha=0.2): 5 | return tf.nn.leaky_relu(x, alpha=alpha) 6 | -------------------------------------------------------------------------------- /tftf/models/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import accuracy 2 | from .f1 import f1 3 | from .precision import precision 4 | from .recall import recall 5 | -------------------------------------------------------------------------------- /tftf/models/optimizers/adadelta.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def adadelta(lr=1.0, rho=0.95): 5 | return tf.train.AdadeltaOptimizer(lr, rho) 6 | -------------------------------------------------------------------------------- /tftf/layers/activations/hard_sigmoid.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def hard_sigmoid(x): 5 | return tf.minimum(1.0, tf.maximum(0.0, 0.2 * x + 0.5)) 6 | -------------------------------------------------------------------------------- /tftf/models/losses/mean_squared_error.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def mean_squared_error(y, t): 5 | loss = tf.reduce_mean(tf.square(y - t)) 6 | return loss 7 | -------------------------------------------------------------------------------- /tftf/models/optimizers/adam.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def adam(lr=0.001, beta1=0.9, beta2=0.999, eps=1e-8): 5 | return tf.train.AdamOptimizer(lr, beta1, beta2, eps) 6 | -------------------------------------------------------------------------------- /tftf/models/optimizers/momentum.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def momentum(lr=0.01, momentum=0.9, use_nesterov=True): 5 | return tf.train.MomentumOptimizer(lr, momentum, use_nesterov) 6 | -------------------------------------------------------------------------------- /tftf/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .binary_crossentropy import binary_crossentropy 2 | from .categorical_crossentropy import categorical_crossentropy 3 | from .mean_squared_error import mean_squared_error 4 | -------------------------------------------------------------------------------- /tftf/models/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | from .adadelta import adadelta 2 | from .adagrad import adagrad 3 | from .adam import adam 4 | from .momentum import momentum 5 | from .rmsprop import rmsprop 6 | from .sgd import sgd 7 | -------------------------------------------------------------------------------- /tftf/layers/initializers/ones.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def ones(shape, name=None, type='float32'): 6 | init = np.ones(shape).astype(type) 7 | return tf.Variable(init, name=name) 8 | -------------------------------------------------------------------------------- /tftf/layers/initializers/zeros.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def zeros(shape, name=None, type='float32'): 6 | init = np.zeros(shape).astype(type) 7 | return tf.Variable(init, name=name) 8 | -------------------------------------------------------------------------------- /tftf/layers/initializers/__init__.py: -------------------------------------------------------------------------------- 1 | from .glorot_normal import glorot_normal 2 | from .glorot_uniform import glorot_uniform 3 | from .normal import normal 4 | from .ones import ones 5 | from .orthogonal import orthogonal 6 | from .zeros import zeros 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | mldata/* 4 | tmp 5 | 6 | __pycache__ 7 | *.pyc 8 | .ipynb_checkpoints 9 | *.ipynb 10 | 11 | *.egg 12 | *.egg-info 13 | dist 14 | build 15 | eggs 16 | parts 17 | var 18 | sdist 19 | develop-eggs 20 | .installed.cfg 21 | lib 22 | lib64 23 | 24 | pip-log.txt 25 | -------------------------------------------------------------------------------- /tftf/layers/activations/prelu.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def prelu(x, type='float32'): 6 | alpha = tf.Variable(np.zeros([x.get_shape()[-1]]).astype(type), 7 | name='alpha') 8 | return tf.maximum(0., x) + alpha * tf.minimum(0., x) 9 | -------------------------------------------------------------------------------- /tftf/layers/regularizers/l2.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class L2(object): 5 | def __init__(self, alpha): 6 | self.alpha = alpha 7 | 8 | def loss(self, weights): 9 | return self.alpha * tf.nn.l2_loss(weights) 10 | 11 | 12 | def l2(alpha=0.): 13 | reg = L2(alpha) 14 | return reg.loss 15 | -------------------------------------------------------------------------------- /tftf/layers/regularizers/l1.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class L1(object): 5 | def __init__(self, alpha): 6 | self.alpha = alpha 7 | 8 | def loss(self, weights): 9 | return self.alpha * tf.reduce_sum(tf.abs(weights)) 10 | 11 | 12 | def l1(alpha=0.): 13 | reg = L1(alpha) 14 | return reg.loss 15 | -------------------------------------------------------------------------------- /tftf/layers/initializers/normal.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def normal(shape, mean=0., std=1., name=None, rng=None, type='float32'): 6 | if rng is None: 7 | rng = np.random 8 | 9 | init = rng.normal(loc=std, scale=std, size=shape).astype(type) 10 | return tf.Variable(init, name=name) 11 | -------------------------------------------------------------------------------- /tftf/models/losses/categorical_crossentropy.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def categorical_crossentropy(y, t): 5 | loss = \ 6 | tf.reduce_mean(-tf.reduce_sum( 7 | t * tf.log(tf.clip_by_value(y, 1e-10, 1.0)), 8 | axis=list(range(1, len(y.get_shape()))))) 9 | 10 | return loss 11 | -------------------------------------------------------------------------------- /tftf/layers/activations/__init__.py: -------------------------------------------------------------------------------- 1 | from .elu import elu 2 | from .hard_sigmoid import hard_sigmoid 3 | from .leaky_relu import leaky_relu 4 | from .linear import linear 5 | # from .prelu import prelu 6 | from .relu import relu 7 | from .selu import selu 8 | from .sigmoid import sigmoid 9 | from .softmax import softmax 10 | from .swish import swish 11 | from .tanh import tanh 12 | -------------------------------------------------------------------------------- /tftf/models/metrics/accuracy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import accuracy_score 3 | 4 | 5 | def accuracy(preds, target, thres=0.5): 6 | if len(preds[0]) == 1: 7 | return accuracy_score(preds > thres, target) 8 | else: 9 | return accuracy_score(np.argmax(preds, 1), 10 | np.argmax(target, 1).astype('int32')) 11 | -------------------------------------------------------------------------------- /tftf/models/metrics/f1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import f1_score 3 | 4 | 5 | def f1(preds, target, thres=0.5): 6 | if len(preds[0]) == 1: 7 | return f1_score(preds > thres, target) 8 | else: 9 | return f1_score(np.argmax(preds, 1), 10 | np.argmax(target, 1).astype('int32'), 11 | average='macro') 12 | -------------------------------------------------------------------------------- /tftf/preprocessing/sequence/Pad.py: -------------------------------------------------------------------------------- 1 | from .pad_sequences import pad_sequences 2 | 3 | 4 | class Pad(object): 5 | def __init__(self, padding='pre', value=0): 6 | self.padding = padding 7 | self.value = value 8 | 9 | def __call__(self, data): 10 | return pad_sequences(data, 11 | padding=self.padding, 12 | value=self.value) 13 | -------------------------------------------------------------------------------- /tftf/models/metrics/recall.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import recall_score 3 | 4 | 5 | def recall(preds, target, thres=0.5): 6 | if len(preds[0]) == 1: 7 | return recall_score(preds > thres, target) 8 | else: 9 | return recall_score(np.argmax(preds, 1), 10 | np.argmax(target, 1).astype('int32'), 11 | average='macro') 12 | -------------------------------------------------------------------------------- /tftf/layers/regularizers/l1_l2.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .l1 import L1 3 | from .l2 import L2 4 | 5 | 6 | class L1_L2(object): 7 | def __init__(self, l1, l2): 8 | self.L1 = L1(l1) 9 | self.L2 = L2(l2) 10 | 11 | def loss(self, weights): 12 | return self.L1.loss(weights) + self.L2.loss(weights) 13 | 14 | 15 | def l1_l2(l1=0., l2=0.): 16 | reg = L1_L2(l1, l2) 17 | return reg.loss 18 | -------------------------------------------------------------------------------- /tftf/models/losses/binary_crossentropy.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def binary_crossentropy(y, t): 5 | loss = \ 6 | -tf.reduce_mean(t * tf.log(tf.clip_by_value(y, 1e-10, 1.0)) 7 | + (1. - t) * tf.log(tf.clip_by_value(1. - y, 8 | 1e-10, 9 | 1.0))) 10 | return loss 11 | -------------------------------------------------------------------------------- /tftf/models/metrics/precision.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import precision_score 3 | 4 | 5 | def precision(preds, target, thres=0.5): 6 | if len(preds[0]) == 1: 7 | return precision_score(preds > thres, target) 8 | else: 9 | return precision_score(np.argmax(preds, 1), 10 | np.argmax(target, 1).astype('int32'), 11 | average='macro') 12 | -------------------------------------------------------------------------------- /tftf/layers/initializers/orthogonal.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def orthogonal(shape, scale=1., name=None, rng=None, type='float32'): 6 | if rng is None: 7 | rng = np.random 8 | 9 | rndn = rng.normal(0., 1., shape).astype(type) 10 | u, _, v = np.linalg.svd(rndn, full_matrices=False) 11 | if u.shape == shape: 12 | return scale * u 13 | else: 14 | return scale * v 15 | -------------------------------------------------------------------------------- /tftf/datasets/Dataset.py: -------------------------------------------------------------------------------- 1 | class Dataset(object): 2 | def __init__(self, data=None, target=None): 3 | self._data = data 4 | self._target = target 5 | 6 | @property 7 | def data(self): 8 | return self._data 9 | 10 | @data.setter 11 | def data(self, val): 12 | self._data = val 13 | 14 | @property 15 | def target(self): 16 | return self._target 17 | 18 | @target.setter 19 | def target(self, val): 20 | self._target = val 21 | -------------------------------------------------------------------------------- /tftf/layers/GlobalAveragePooling2D.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .Layer import Layer 3 | 4 | 5 | class GlobalAveragePooling2D(Layer): 6 | def __init__(self): 7 | super().__init__() 8 | 9 | def compile(self): 10 | pass 11 | 12 | def forward(self, x, **kwargs): 13 | return tf.reduce_mean(x, axis=[1, 2]) 14 | 15 | def initialize_output_dim(self): 16 | super().initialize_output_dim() 17 | self.output_dim = self.input_dim[-1] 18 | return self.output_dim 19 | -------------------------------------------------------------------------------- /tftf/layers/Flatten.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from .Layer import Layer 4 | 5 | 6 | class Flatten(Layer): 7 | def __init__(self): 8 | super().__init__() 9 | 10 | def compile(self): 11 | pass 12 | 13 | def forward(self, x, **kwargs): 14 | return tf.reshape(x, (-1, self.output_dim)) 15 | 16 | def initialize_output_dim(self): 17 | super().initialize_output_dim() 18 | self.output_dim = np.prod(self.input_shape) 19 | return self.output_dim 20 | -------------------------------------------------------------------------------- /tftf/layers/initializers/glorot_normal.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def glorot_normal(shape, name=None, rng=None, type='float32'): 6 | if rng is None: 7 | rng = np.random 8 | 9 | if len(shape) == 2: 10 | fan_in = shape[0] 11 | elif len(shape) == 4: 12 | fan_in = np.prod(shape[:3]) 13 | else: 14 | raise ValueError('Dimension of shape must be 2 or 4.') 15 | 16 | init = np.sqrt(1 / fan_in) * rng.normal(size=shape).astype(type) 17 | return tf.Variable(init, name=name) 18 | -------------------------------------------------------------------------------- /tftf/preprocessing/sequence/sort.py: -------------------------------------------------------------------------------- 1 | def sort(data, target, 2 | order='ascend'): 3 | if order == 'ascend' or order == 'ascending': 4 | a = True 5 | elif order == 'descend' or order == 'descending': 6 | a = False 7 | else: 8 | raise ValueError('`order` must be of \'ascend\' or \'descend\'.') 9 | 10 | lens = [len(i) for i in data] 11 | indices = sorted(range(len(lens)), 12 | key=lambda x: (2 * a - 1) * lens[x]) 13 | data = [data[i] for i in indices] 14 | target = [target[i] for i in indices] 15 | 16 | return (data, target) 17 | -------------------------------------------------------------------------------- /tftf/layers/Activation.py: -------------------------------------------------------------------------------- 1 | from .Layer import Layer 2 | 3 | 4 | class Activation(Layer): 5 | def __init__(self, activation='linear'): 6 | super().__init__() 7 | self.activation = self.activation_initializer(activation) 8 | 9 | def __repr__(self): 10 | return '<{}: {}({}, {})>'.format(self.__class__.__name__, 11 | self.activation.__name__, 12 | self.input_dim, 13 | self.output_dim) 14 | 15 | def compile(self): 16 | pass 17 | 18 | def forward(self, x, **kwargs): 19 | return self.activation(x) 20 | -------------------------------------------------------------------------------- /tftf/layers/Dropout.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .Layer import Layer 3 | 4 | 5 | class Dropout(Layer): 6 | def __init__(self, p_dropout, rng=None): 7 | super().__init__() 8 | if p_dropout < 0. or p_dropout >= 1: 9 | raise ValueError('p_dropout must be between 0 and 1.') 10 | self.p = 1. - p_dropout 11 | 12 | def compile(self): 13 | pass 14 | 15 | def forward(self, x, **kwargs): 16 | training = kwargs['training'] \ 17 | if 'training' in kwargs else tf.constant(False, dtype=tf.bool) 18 | p = tf.cond(training, lambda: self.p, lambda: 1.) 19 | return tf.nn.dropout(x, p) 20 | -------------------------------------------------------------------------------- /tftf/models/callbacks/EarlyStopping.py: -------------------------------------------------------------------------------- 1 | class EarlyStopping(object): 2 | def __init__(self, patience=10, verbose=0): 3 | self._step = 0 4 | self._loss = float('inf') 5 | self.patience = patience 6 | self.verbose = verbose 7 | 8 | def on_epoch_end(self, epoch, loss): 9 | if self._loss < loss: 10 | self._step += 1 11 | if self._step > self.patience: 12 | if self.verbose: 13 | print('Early stopping on epoch {}.'.format(epoch)) 14 | return True 15 | else: 16 | self._step = 0 17 | self._loss = loss 18 | return False 19 | -------------------------------------------------------------------------------- /tftf/layers/initializers/glorot_uniform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def glorot_uniform(shape, name=None, rng=None, type='float32'): 6 | if rng is None: 7 | rng = np.random 8 | 9 | if len(shape) == 2: 10 | fan_in = shape[0] 11 | fan_out = shape[1] 12 | elif len(shape) == 4: 13 | fan_in = np.prod(shape[:3]) 14 | fan_out = np.prod(shape[:2]) * shape[3] 15 | else: 16 | raise ValueError('Dimension of shape must be 2 or 4.') 17 | 18 | high = np.sqrt(6 / (fan_in + fan_out)) 19 | init = rng.uniform(low=-high, 20 | high=high, 21 | size=shape).astype(type) 22 | return tf.Variable(init, name=name) 23 | -------------------------------------------------------------------------------- /tftf/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from . import activations 2 | from . import initializers 3 | from . import regularizers 4 | 5 | from .Activation import Activation 6 | from .Attention import Attention 7 | from .BatchNormalization import BatchNormalization 8 | from .Conv2D import Conv2D 9 | from .Dense import Dense 10 | from .Dropout import Dropout 11 | from .Embedding import Embedding 12 | from .Flatten import Flatten 13 | from .GlobalAveragePooling2D import GlobalAveragePooling2D 14 | from .Layer import Layer 15 | from .LayerNormalization import LayerNormalization 16 | from .LSTM import LSTM 17 | from .MaxPooling2D import MaxPooling2D 18 | from .NAC import NAC 19 | from .NALU import NALU 20 | from .PositionalEncoding import PositionalEncoding 21 | from .RNN import RNN 22 | from .TimeDistributedDense import TimeDistributedDense 23 | -------------------------------------------------------------------------------- /tftf/layers/TimeDistributedDense.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .Dense import Dense 3 | from .initializers import zeros 4 | 5 | 6 | class TimeDistributedDense(Dense): 7 | def __init__(self, output_dim, 8 | input_dim=None, 9 | initializer='glorot_normal', 10 | regularizer=None, 11 | rng=None): 12 | super().__init__(output_dim, 13 | input_dim=input_dim, 14 | initializer=initializer, 15 | regularizer=regularizer, 16 | rng=rng) 17 | 18 | def forward(self, x, **kwargs): 19 | recurr = kwargs['recurrent'] if 'recurrent' in kwargs else True 20 | if not recurr: 21 | return tf.matmul(x, self.W) + self.b 22 | else: 23 | return tf.einsum('ijk,kl->ijl', x, self.W) + self.b 24 | -------------------------------------------------------------------------------- /tftf/preprocessing/sequence/pad_sequences.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def pad_sequences(data, 5 | padding='pre', 6 | value=0): 7 | ''' 8 | # Arguments 9 | data: list of lists / np.array of lists 10 | 11 | # Returns 12 | numpy.ndarray 13 | ''' 14 | if type(data[0]) is not list: 15 | raise ValueError('`data` must be a list of lists') 16 | maxlen = len(max(data, key=len)) 17 | 18 | if padding == 'pre': 19 | data = \ 20 | [[value] * (maxlen - len(data[i])) + data[i] 21 | for i in range(len(data))] 22 | elif padding == 'post': 23 | data = \ 24 | [data[i] + [value] * (maxlen - len(data[i])) 25 | for i in range(len(data))] 26 | else: 27 | raise ValueError('`padding` must be one of \'pre\' or \'post\'') 28 | 29 | return np.array(data) 30 | -------------------------------------------------------------------------------- /tftf/layers/Dense.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .Layer import Layer 3 | from .initializers import zeros 4 | 5 | 6 | class Dense(Layer): 7 | def __init__(self, output_dim, 8 | input_dim=None, 9 | initializer='glorot_normal', 10 | regularizer=None, 11 | rng=None): 12 | super().__init__() 13 | self.output_dim = output_dim 14 | self.input_dim = input_dim 15 | self.initializer = initializer 16 | self.regularizer = regularizer 17 | 18 | def compile(self): 19 | self.W = \ 20 | self.kernel_initializer(self.initializer, 21 | shape=(self.input_dim, self.output_dim), 22 | name='W') 23 | self.b = zeros((self.output_dim), name='b') 24 | 25 | self.params = [self.W, self.b] 26 | 27 | if self.regularizer is not None: 28 | self.reg_loss = [self.regularizer(self.W)] 29 | 30 | def forward(self, x, **kwargs): 31 | return tf.matmul(x, self.W) + self.b 32 | -------------------------------------------------------------------------------- /tftf/datasets/imdb.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from .Dataset import Dataset 4 | 5 | 6 | class IMDb(Dataset): 7 | pass 8 | 9 | 10 | def load_imdb(one_hot=True, 11 | num_words=None, 12 | start_char=1, 13 | oov_char=2, 14 | index_from=3, 15 | train_test_split=True): 16 | train, valid = \ 17 | tf.keras.datasets.imdb.load_data(num_words=num_words, 18 | start_char=start_char, 19 | oov_char=oov_char, 20 | index_from=index_from) 21 | train = list(train) 22 | valid = list(valid) 23 | 24 | if one_hot: 25 | train[1] = train[1][:, np.newaxis] 26 | valid[1] = valid[1][:, np.newaxis] 27 | 28 | if not train_test_split: 29 | data = np.append(train[0], valid[0], axis=0) 30 | target = np.append(train[1], valid[1], axis=0) 31 | 32 | return IMDb(data, target) 33 | 34 | else: 35 | return tuple(train), tuple(valid) 36 | -------------------------------------------------------------------------------- /tftf/datasets/mnist.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from .Dataset import Dataset 4 | 5 | 6 | class MNIST(Dataset): 7 | pass 8 | 9 | 10 | def load_mnist(one_hot=True, 11 | train_test_split=True, 12 | flatten=False, 13 | include_channel=True): 14 | 15 | train, valid = tf.keras.datasets.mnist.load_data() 16 | train = list(train) 17 | valid = list(valid) 18 | 19 | if flatten: 20 | train[0] = train[0].reshape(-1, 784) 21 | valid[0] = valid[0].reshape(-1, 784) 22 | elif include_channel: 23 | train[0] = train[0].reshape(len(train[0]), 28, 28, 1) 24 | valid[0] = valid[0].reshape(len(valid[0]), 28, 28, 1) 25 | 26 | if one_hot: 27 | train[1] = np.eye(10)[train[1].astype(int)] 28 | valid[1] = np.eye(10)[valid[1].astype(int)] 29 | 30 | if not train_test_split: 31 | data = np.append(train[0], valid[0], axis=0) 32 | target = np.append(train[1], valid[1], axis=0) 33 | return MNIST(data, target) 34 | else: 35 | return tuple(train), tuple(valid) 36 | -------------------------------------------------------------------------------- /tftf/layers/Embedding.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .Layer import Layer 3 | from .initializers import normal 4 | 5 | 6 | class Embedding(Layer): 7 | def __init__(self, output_dim, 8 | input_dim=None, 9 | initializer='normal'): 10 | ''' 11 | # Arguments 12 | input_dim: num of words (maximum index) 13 | output_dim: embedding dimension 14 | ''' 15 | super().__init__() 16 | self._input_dtype = tf.int32 17 | 18 | self.output_dim = output_dim 19 | self.input_dim = input_dim 20 | self.initializer = initializer 21 | 22 | if self.input_dim is None: 23 | raise ValueError('input_dim must be specified on Embedding layer.') 24 | 25 | def compile(self): 26 | self.W = \ 27 | self.kernel_initializer(self.initializer, 28 | shape=(self.input_dim, self.output_dim), 29 | name='W') 30 | 31 | self.params = [self.W] 32 | 33 | def forward(self, x, **kwargs): 34 | return tf.nn.embedding_lookup(self.W, x) 35 | -------------------------------------------------------------------------------- /tftf/layers/LayerNormalization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from .Layer import Layer 4 | 5 | 6 | class LayerNormalization(Layer): 7 | def __init__(self, 8 | a_initializer='ones', 9 | b_initializer='zeros', 10 | eps=np.float32(1e-6)): 11 | super().__init__() 12 | self.a_initializer = a_initializer 13 | self.b_initializer = b_initializer 14 | self.eps = eps 15 | 16 | def compile(self): 17 | self.a = \ 18 | self.kernel_initializer(self.a_initializer, 19 | shape=(self.input_dim), 20 | name='a') 21 | self.b = \ 22 | self.kernel_initializer(self.b_initializer, 23 | shape=(self.input_dim), 24 | name='b') 25 | 26 | self.params = [self.a, self.b] 27 | 28 | def forward(self, x, **kwargs): 29 | axes = 0 30 | mean, var = tf.nn.moments(x, axes=-1, keep_dims=True) 31 | std = tf.sqrt(var) + self.eps 32 | return self.a * (x - mean) / std + self.b 33 | -------------------------------------------------------------------------------- /tftf/layers/NALU.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .NAC import NAC 3 | from .activations import sigmoid 4 | 5 | 6 | class NALU(NAC): 7 | ''' 8 | Neural Arithmetic Logic Unit 9 | https://arxiv.org/pdf/1808.00508.pdf 10 | ''' 11 | def __init__(self, output_dim, 12 | input_dim=None, 13 | initializer='normal', 14 | rng=None): 15 | super().__init__(output_dim, 16 | input_dim=input_dim, 17 | initializer=initializer, 18 | rng=rng) 19 | 20 | def compile(self): 21 | super().compile() 22 | self.G = \ 23 | self.kernel_initializer(self.initializer, 24 | shape=(self.input_dim, self.output_dim), 25 | name='G') 26 | 27 | self.params.append(self.G) 28 | 29 | def forward(self, x, **kwargs): 30 | eps = 1e-8 31 | self.g = sigmoid(tf.matmul(x, self.G)) 32 | self.m = tf.exp(tf.matmul(tf.log(tf.abs(x) + eps), self.W)) 33 | self.a = tf.matmul(x, self.W) 34 | 35 | return self.g * self.a + (1 - self.g) * self.m 36 | -------------------------------------------------------------------------------- /tftf/layers/NAC.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .Layer import Layer 3 | from .activations import sigmoid, tanh 4 | 5 | 6 | class NAC(Layer): 7 | ''' 8 | Neural Accumulator 9 | https://arxiv.org/pdf/1808.00508.pdf 10 | ''' 11 | def __init__(self, output_dim, 12 | input_dim=None, 13 | initializer='normal', 14 | rng=None): 15 | super().__init__() 16 | self.output_dim = output_dim 17 | self.input_dim = input_dim 18 | self.initializer = initializer 19 | 20 | def compile(self): 21 | self.W_hat = \ 22 | self.kernel_initializer(self.initializer, 23 | shape=(self.input_dim, self.output_dim), 24 | name='W_hat') 25 | self.M_hat = \ 26 | self.kernel_initializer(self.initializer, 27 | shape=(self.input_dim, self.output_dim), 28 | name='W_hat') 29 | self.W = tanh(self.W_hat) * sigmoid(self.M_hat) 30 | 31 | self.params = [self.W_hat, self.M_hat] 32 | 33 | def forward(self, x, **kwargs): 34 | return tf.matmul(x, self.W) 35 | -------------------------------------------------------------------------------- /tftf/layers/BatchNormalization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from .Layer import Layer 4 | 5 | 6 | class BatchNormalization(Layer): 7 | def __init__(self, 8 | gamma_initializer='ones', 9 | beta_initializer='zeros', 10 | eps=np.float32(1e-6)): 11 | super().__init__() 12 | self.gamma_initializer = gamma_initializer 13 | self.beta_initializer = beta_initializer 14 | self.eps = eps 15 | 16 | def compile(self): 17 | self.gamma = \ 18 | self.kernel_initializer(self.gamma_initializer, 19 | shape=(self.input_dim), 20 | name='gamma') 21 | self.beta = \ 22 | self.kernel_initializer(self.beta_initializer, 23 | shape=(self.input_dim), 24 | name='beta') 25 | 26 | self.params = [self.gamma, self.beta] 27 | 28 | def forward(self, x, **kwargs): 29 | axes = list(range(len(x.get_shape()) - 1)) 30 | mean, var = tf.nn.moments(x, axes, keep_dims=True) 31 | std = tf.sqrt(var + self.eps) 32 | return self.gamma * (x - mean) / std + self.beta 33 | -------------------------------------------------------------------------------- /tftf/layers/PositionalEncoding.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from .Layer import Layer 4 | 5 | 6 | class PositionalEncoding(Layer): 7 | def __init__(self, output_dim, 8 | with_identity=True, 9 | maxlen=6000): 10 | ''' 11 | Positional encoding layer with sinusoid 12 | 13 | # Arguments 14 | maxlen: max length of sequence 15 | ''' 16 | super().__init__() 17 | self.output_dim = output_dim 18 | self.maxlen = maxlen 19 | self._with_identity = with_identity 20 | self._pe = self._initialize_pe() 21 | 22 | def compile(self): 23 | pass 24 | 25 | def forward(self, x, **kwargs): 26 | pe = self._pe[:, :tf.shape(x)[1]] 27 | if self._with_identity: 28 | return x + pe 29 | else: 30 | return pe 31 | 32 | def _initialize_pe(self): 33 | pe = np.zeros(shape=(self.maxlen, self.output_dim), dtype=np.float32) 34 | pos = np.arange(0, self.maxlen)[:, np.newaxis] 35 | div = np.exp(np.arange(0, self.output_dim, 2) 36 | * -(np.log(10000.0) / self.output_dim)) 37 | 38 | pe[:, 0::2] = np.sin(pos * div) 39 | pe[:, 1::2] = np.cos(pos * div) 40 | return tf.constant(pe[np.newaxis, :]) 41 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | setup( 5 | name='tftf', 6 | version='0.0.29', 7 | description='TensorFlow TransFormer', 8 | author='Yusuke Sugomori', 9 | author_email='me@yusugomori.com', 10 | url='https://github.com/yusugomori/tftf', 11 | download_url='', 12 | license='Apache 2.0', 13 | install_requires=['numpy>=1.13.3', 14 | 'scikit-learn>=0.19.1'], 15 | classifiers=[ 16 | 'Development Status :: 3 - Alpha', 17 | 'Intended Audience :: Developers', 18 | 'Intended Audience :: Education', 19 | 'Intended Audience :: Science/Research', 20 | 'License :: OSI Approved :: Apache Software License', 21 | 'Programming Language :: Python :: 3', 22 | 'Programming Language :: Python :: 3.4', 23 | 'Programming Language :: Python :: 3.5', 24 | 'Programming Language :: Python :: 3.6', 25 | 'Topic :: Scientific/Engineering', 26 | 'Topic :: Scientific/Engineering :: Mathematics', 27 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 28 | 'Topic :: Software Development', 29 | 'Topic :: Software Development :: Libraries', 30 | 'Topic :: Software Development :: Libraries :: Python Modules', 31 | ], 32 | keywords='tensorflow keras machine deep learning', 33 | packages=find_packages() 34 | ) 35 | -------------------------------------------------------------------------------- /examples/save_restore_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from sklearn.model_selection import train_test_split 4 | from tftf.datasets import load_mnist 5 | from tftf.layers import Layer, Dense, Activation, Dropout 6 | from tftf.models import Model 7 | 8 | 9 | if __name__ == '__main__': 10 | np.random.seed(0) 11 | tf.set_random_seed(0) 12 | 13 | ''' 14 | Load data 15 | ''' 16 | mnist = load_mnist(train_test_split=False, flatten=True) 17 | 18 | n = len(mnist.data) 19 | N = 30000 20 | indices = np.random.permutation(range(n))[:N] 21 | 22 | X = mnist.data[indices] 23 | X = X / 255.0 24 | y = mnist.target[indices] 25 | 26 | train_X, test_X, train_y, test_y = train_test_split(X, y) 27 | 28 | ''' 29 | Build model 30 | ''' 31 | model = Model() 32 | model.add(Dense(10, input_dim=784)) 33 | model.add(Activation('softmax')) 34 | model.compile() 35 | 36 | model.describe() 37 | 38 | ''' 39 | Train model 40 | ''' 41 | model.fit(train_X, train_y, epochs=3, metrics=['accuracy']) 42 | 43 | ''' 44 | Test model 45 | ''' 46 | print(model.accuracy(test_X, test_y)) 47 | 48 | ''' 49 | Save model 50 | ''' 51 | model.save('tmp/model.ckpt') 52 | 53 | ''' 54 | Restore model 55 | ''' 56 | model = Model() 57 | model.add(Dense(10, input_dim=784)) 58 | model.add(Activation('softmax')) 59 | model.restore('tmp/model.ckpt') 60 | model.compile() 61 | 62 | ''' 63 | Test model 64 | ''' 65 | print(model.accuracy(test_X, test_y)) 66 | -------------------------------------------------------------------------------- /examples/lenet_mnist.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from sklearn.model_selection import train_test_split 4 | from tftf.datasets import load_mnist 5 | from tftf.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten 6 | from tftf.models import Model 7 | 8 | 9 | if __name__ == '__main__': 10 | np.random.seed(0) 11 | tf.set_random_seed(123) 12 | 13 | ''' 14 | Load data 15 | ''' 16 | mnist = load_mnist(train_test_split=False) 17 | 18 | n = len(mnist.data) 19 | N = 30000 20 | indices = np.random.permutation(range(n))[:N] 21 | 22 | X = mnist.data[indices] 23 | X = X / 255.0 24 | y = mnist.target[indices] 25 | 26 | train_X, test_X, train_y, test_y = train_test_split(X, y) 27 | 28 | ''' 29 | Build model 30 | ''' 31 | model = Model() 32 | model.add(Conv2D(input_dim=(28, 28, 1), 33 | kernel_size=(3, 3, 20), 34 | padding='valid')) 35 | model.add(Activation('tanh')) 36 | model.add(MaxPooling2D()) 37 | model.add(Conv2D(kernel_size=(3, 3, 50), 38 | padding='valid')) 39 | model.add(Activation('tanh')) 40 | model.add(MaxPooling2D()) 41 | model.add(Flatten()) 42 | model.add(Dense(1024)) 43 | model.add(Activation('tanh')) 44 | model.add(Dense(200)) 45 | model.add(Activation('tanh')) 46 | model.add(Dense(10)) 47 | model.add(Activation('softmax')) 48 | model.compile() 49 | 50 | model.describe() 51 | 52 | ''' 53 | Train model 54 | ''' 55 | model.fit(train_X, train_y, 56 | metrics=['accuracy']) 57 | 58 | ''' 59 | Test model 60 | ''' 61 | print(model.accuracy(test_X, test_y)) 62 | -------------------------------------------------------------------------------- /examples/lstm_imdb.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from sklearn.model_selection import train_test_split 4 | from tftf.datasets import load_imdb 5 | from tftf.layers \ 6 | import Dense, Activation, RNN, LSTM, Embedding 7 | from tftf.preprocessing.sequence import Pad 8 | from tftf.preprocessing.sequence import pad_sequences, sort 9 | from tftf.models import Model 10 | 11 | if __name__ == '__main__': 12 | np.random.seed(0) 13 | tf.set_random_seed(123) 14 | 15 | ''' 16 | Load data 17 | ''' 18 | num_words = 10000 19 | imdb = load_imdb(num_words=num_words, 20 | train_test_split=False) 21 | X = imdb.data 22 | y = imdb.target 23 | train_X, test_X, train_y, test_y = train_test_split(X, y) 24 | train_X, valid_X, train_y, valid_y = train_test_split(train_X, train_y) 25 | 26 | train_X, train_y = sort(train_X, train_y) 27 | valid_X, valid_y = sort(valid_X, valid_y) 28 | test_X, test_y = sort(test_X, test_y) 29 | 30 | ''' 31 | Build model 32 | ''' 33 | model = Model() 34 | model.add(Embedding(100, input_dim=num_words)) 35 | model.add(LSTM(50)) 36 | model.add(Dense(1)) 37 | model.add(Activation('sigmoid')) 38 | model.compile(variable_input=True, 39 | use_mask=True, 40 | pad_value=0) 41 | model.describe() 42 | 43 | model.fit(train_X[:10000], train_y[:10000], 44 | epochs=30, 45 | shuffle=False, 46 | metrics=['accuracy', 'f1'], 47 | preprocesses=[Pad(value=0)], 48 | validation_data=(valid_X[:5000], valid_y[:5000])) 49 | 50 | ''' 51 | Test model 52 | ''' 53 | test_X, test_y = test_X[:2000], test_y[:2000] 54 | test_X = pad_sequences(test_X) 55 | print(model.accuracy(test_X, test_y)) 56 | -------------------------------------------------------------------------------- /examples/mlp_mnist.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from sklearn.model_selection import train_test_split 4 | from tftf.datasets import load_mnist 5 | from tftf.layers import Layer, Dense, Activation, Dropout 6 | from tftf.models import Model 7 | 8 | 9 | if __name__ == '__main__': 10 | np.random.seed(0) 11 | tf.set_random_seed(0) 12 | 13 | ''' 14 | Load data 15 | ''' 16 | mnist = load_mnist(train_test_split=False, flatten=True) 17 | 18 | n = len(mnist.data) 19 | N = 30000 20 | indices = np.random.permutation(range(n))[:N] 21 | 22 | X = mnist.data[indices] 23 | X = X / 255.0 24 | y = mnist.target[indices] 25 | 26 | train_X, test_X, train_y, test_y = train_test_split(X, y) 27 | train_X, valid_X, train_y, valid_y = train_test_split(train_X, train_y) 28 | 29 | ''' 30 | Build model 31 | ''' 32 | model = Model() 33 | model.add(Dense(200, 34 | input_dim=784, 35 | initializer='glorot_uniform')) 36 | model.add(Activation('relu')) 37 | model.add(Dropout(0.2)) 38 | model.add(Dense(200)) 39 | model.add(Activation('relu')) 40 | model.add(Dropout(0.5)) 41 | model.add(Dense(10, initializer='glorot_uniform')) 42 | model.add(Activation('softmax')) 43 | model.compile() 44 | 45 | # model.describe() 46 | model.describe_params() 47 | 48 | ''' 49 | Train model 50 | ''' 51 | model.fit(train_X, train_y, 52 | validation_data=(valid_X, valid_y), 53 | metrics=['accuracy', 'f1'], 54 | early_stopping=3, 55 | epochs=1000) 56 | 57 | ''' 58 | Test model 59 | ''' 60 | print('acc: {:.3}, f1: {:.3}'.format(model.accuracy(test_X, test_y), 61 | model.f1(test_X, test_y))) 62 | -------------------------------------------------------------------------------- /examples/lenet_advanced_mnist.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from sklearn.model_selection import train_test_split 4 | from tftf.datasets import load_mnist 5 | from tftf.layers import Dense, Activation, BatchNormalization, Dropout, \ 6 | Conv2D, MaxPooling2D, GlobalAveragePooling2D 7 | from tftf.models import Model 8 | 9 | 10 | if __name__ == '__main__': 11 | np.random.seed(0) 12 | tf.set_random_seed(123) 13 | 14 | ''' 15 | Load data 16 | ''' 17 | mnist = load_mnist(train_test_split=False) 18 | 19 | n = len(mnist.data) 20 | N = 30000 21 | indices = np.random.permutation(range(n))[:N] 22 | 23 | X = mnist.data[indices] 24 | X = X / 255.0 25 | y = mnist.target[indices] 26 | 27 | train_X, test_X, train_y, test_y = train_test_split(X, y) 28 | 29 | ''' 30 | Build model 31 | ''' 32 | model = Model() 33 | model.add(Conv2D(input_dim=(28, 28, 1), 34 | kernel_size=(3, 3, 20), 35 | padding='valid')) 36 | model.add(BatchNormalization()) 37 | model.add(Activation('relu')) 38 | model.add(Dropout(0.3)) 39 | model.add(MaxPooling2D()) 40 | model.add(Conv2D(kernel_size=(3, 3, 50), 41 | padding='valid')) 42 | model.add(BatchNormalization()) 43 | model.add(Activation('relu')) 44 | model.add(Dropout(0.3)) 45 | model.add(MaxPooling2D()) 46 | model.add(GlobalAveragePooling2D()) 47 | model.add(Dense(1024)) 48 | model.add(BatchNormalization()) 49 | model.add(Activation('relu')) 50 | model.add(Dropout(0.5)) 51 | model.add(Dense(10)) 52 | model.add(Activation('softmax')) 53 | model.compile() 54 | 55 | model.describe() 56 | 57 | ''' 58 | Train model 59 | ''' 60 | model.fit(train_X, train_y, 61 | metrics=['accuracy']) 62 | 63 | ''' 64 | Test model 65 | ''' 66 | print(model.accuracy(test_X, test_y)) 67 | -------------------------------------------------------------------------------- /tftf/layers/MaxPooling2D.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from .Layer import Layer 4 | 5 | 6 | class MaxPooling2D(Layer): 7 | def __init__(self, 8 | pool_size=(2, 2), 9 | strides=None, 10 | padding='valid'): 11 | super().__init__() 12 | 13 | if len(pool_size) != 2: 14 | raise ValueError('Dimension of pool_size must be 2.') 15 | 16 | if strides is None: 17 | strides = pool_size 18 | elif len(strides) != 2: 19 | raise ValueError('Dimension of strides must be 2.') 20 | 21 | padding = padding.upper() 22 | if padding not in ('VALID', 'SAME'): 23 | raise ValueError('padding must be one of \'valid\' or \'same\'.') 24 | 25 | self.pool_size = pool_size 26 | self.strides = strides 27 | self.padding = padding 28 | 29 | @property 30 | def input_shape(self): 31 | return self.input_dim 32 | 33 | @property 34 | def output_shape(self): 35 | return self.output_dim 36 | 37 | @property 38 | def _pool_size(self): 39 | return tuple([1] + list(self.pool_size) + [1]) 40 | 41 | @property 42 | def _strides(self): 43 | return tuple([1] + list(self.strides) + [1]) 44 | 45 | def compile(self): 46 | pass 47 | 48 | def forward(self, x, **kwargs): 49 | return tf.nn.max_pool(x, 50 | ksize=self._pool_size, 51 | strides=self._strides, 52 | padding=self.padding) 53 | 54 | def initialize_output_dim(self): 55 | super().initialize_output_dim() 56 | self.output_dim = self._get_output_shape() 57 | return self.output_dim 58 | 59 | def _get_output_shape(self): 60 | input_shape = self.input_shape 61 | pool_size = self.pool_size 62 | strides = self.strides 63 | padding = self.padding 64 | 65 | if padding == 'SAME': 66 | return input_shape 67 | else: 68 | return tuple(list(np.ceil((np.array(input_shape[:2]) 69 | - np.array(pool_size) + 1) 70 | / np.array(strides)).astype('int32')) 71 | + [input_shape[2]]) 72 | -------------------------------------------------------------------------------- /examples/rnn_sin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import matplotlib.pyplot as plt 4 | from sklearn import datasets 5 | from sklearn.model_selection import train_test_split 6 | from tftf.layers import Layer, Dense, Activation, RNN 7 | from tftf.models import Model 8 | 9 | 10 | if __name__ == '__main__': 11 | np.random.seed(12345) 12 | tf.set_random_seed(0) 13 | 14 | ''' 15 | Load data 16 | ''' 17 | def sin(x, T=100): 18 | return np.sin(2.0 * np.pi * x / T) 19 | 20 | def toy_problem(T=100, ampl=0.05): 21 | x = np.arange(0, 2 * T + 1) 22 | noise = ampl * np.random.uniform(low=-1.0, high=1.0, size=len(x)) 23 | return sin(x) + noise 24 | 25 | T = 200 26 | f = toy_problem(T) 27 | 28 | length_of_sequences = 2 * T 29 | maxlen = 50 30 | 31 | data = [] 32 | target = [] 33 | 34 | for i in range(0, length_of_sequences - maxlen + 1): 35 | data.append(f[i: i + maxlen]) 36 | target.append(f[i + maxlen]) 37 | 38 | X = np.array(data, dtype='float32').reshape(len(data), maxlen, 1) 39 | y = np.array(target, dtype='float32').reshape(len(data), 1) 40 | 41 | train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.1) 42 | 43 | ''' 44 | Build model 45 | ''' 46 | model = Model() 47 | model.add(RNN(25, input_dim=1)) 48 | model.add(Dense(1)) 49 | model.add(Activation('linear')) 50 | model.compile() 51 | 52 | model.describe() 53 | 54 | ''' 55 | Train model 56 | ''' 57 | model.fit(train_X, train_y, epochs=150, batch_size=50) 58 | 59 | ''' 60 | Test model 61 | ''' 62 | truncate = maxlen 63 | Z = X[:1] 64 | 65 | original = [f[i] for i in range(maxlen)] 66 | predicted = [None for i in range(maxlen)] 67 | 68 | for i in range(length_of_sequences - maxlen + 1): 69 | _z = Z[-1:] 70 | _y = model.predict(_z) 71 | _sequence = np.concatenate((_z.reshape(maxlen, 1)[1:], _y), 72 | axis=0).reshape(1, maxlen, 1) 73 | Z = np.append(Z, _sequence, axis=0) 74 | predicted.append(_y.reshape(-1)) 75 | 76 | plt.rc('font', family='serif') 77 | plt.figure() 78 | plt.ylim([-1.5, 1.5]) 79 | plt.plot(toy_problem(T, ampl=0), linestyle='dotted', color='#aaaaaa') 80 | plt.plot(original, linestyle='dashed', color='black') 81 | plt.plot(predicted, color='black') 82 | plt.show() 83 | -------------------------------------------------------------------------------- /examples/lstm_sin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import matplotlib.pyplot as plt 4 | from sklearn import datasets 5 | from sklearn.model_selection import train_test_split 6 | from tftf.layers import Layer, Dense, Activation, LSTM 7 | from tftf.models import Model 8 | 9 | 10 | if __name__ == '__main__': 11 | np.random.seed(1234) 12 | tf.set_random_seed(0) 13 | 14 | ''' 15 | Load data 16 | ''' 17 | def sin(x, T=100): 18 | return np.sin(2.0 * np.pi * x / T) 19 | 20 | def toy_problem(T=100, ampl=0.05): 21 | x = np.arange(0, 2 * T + 1) 22 | noise = ampl * np.random.uniform(low=-1.0, high=1.0, size=len(x)) 23 | return sin(x) + noise 24 | 25 | T = 200 26 | f = toy_problem(T) 27 | 28 | length_of_sequences = 2 * T 29 | maxlen = 25 30 | 31 | data = [] 32 | target = [] 33 | 34 | for i in range(0, length_of_sequences - maxlen + 1): 35 | data.append(f[i: i + maxlen]) 36 | target.append(f[i + maxlen]) 37 | 38 | X = np.array(data, dtype='float32').reshape(len(data), maxlen, 1) 39 | y = np.array(target, dtype='float32').reshape(len(data), 1) 40 | 41 | train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.1) 42 | 43 | ''' 44 | Build model 45 | ''' 46 | model = Model() 47 | model.add(LSTM(10, input_dim=1, return_sequence=True)) 48 | model.add(LSTM(10)) 49 | model.add(Dense(1)) 50 | model.add(Activation('linear')) 51 | model.compile() 52 | 53 | model.describe() 54 | 55 | ''' 56 | Train model 57 | ''' 58 | model.fit(train_X, train_y, epochs=150, batch_size=50) 59 | 60 | ''' 61 | Test model 62 | ''' 63 | truncate = maxlen 64 | Z = X[:1] 65 | 66 | original = [f[i] for i in range(maxlen)] 67 | predicted = [None for i in range(maxlen)] 68 | 69 | for i in range(length_of_sequences - maxlen + 1): 70 | _z = Z[-1:] 71 | _y = model.predict(_z) 72 | _sequence = np.concatenate((_z.reshape(maxlen, 1)[1:], _y), 73 | axis=0).reshape(1, maxlen, 1) 74 | Z = np.append(Z, _sequence, axis=0) 75 | predicted.append(_y.reshape(-1)) 76 | 77 | plt.rc('font', family='serif') 78 | plt.figure() 79 | plt.ylim([-1.5, 1.5]) 80 | plt.plot(toy_problem(T, ampl=0), linestyle='dotted', color='#aaaaaa') 81 | plt.plot(original, linestyle='dashed', color='black') 82 | plt.plot(predicted, color='black') 83 | plt.show() 84 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TFTF: TensorFlow TransFormer🍔 2 | 3 | TensorFlow for everybody. 4 | 5 | ## Quick glance 6 | 7 | ```python 8 | from tftf.layers import Layer, Dense, Activation 9 | from tftf.models import Model 10 | 11 | ''' 12 | Build model 13 | ''' 14 | model = Model() 15 | model.add(Dense(500, input_dim=784)) 16 | model.add(Activation('sigmoid')) 17 | model.add(Dense(10)) 18 | model.add(Activation('softmax')) 19 | model.compile() 20 | 21 | model.describe() 22 | 23 | ''' 24 | Train model 25 | ''' 26 | model.fit(train_X, train_y) 27 | 28 | ''' 29 | Test model 30 | ''' 31 | print(model.accuracy(test_X, test_y)) 32 | ``` 33 | 34 | See [examples](https://github.com/yusugomori/tftf/tree/master/examples) for other implementations. 35 | 36 | ## Installation 37 | 38 | - **Install TFTF from PyPI (recommended):** 39 | 40 | ```sh 41 | pip install tensorflow 42 | pip install tftf 43 | ``` 44 | 45 | - **Alternatively: install TFTF from the GitHub source:** 46 | 47 | First, clone TFTF using `git`: 48 | 49 | ```sh 50 | git clone https://github.com/yusugomori/tftf.git 51 | ``` 52 | 53 | Then, `cd` to the TFTF folder and run the install command: 54 | ```sh 55 | cd tftf 56 | sudo python setup.py install 57 | ``` 58 | 59 | ## Importable Layers, APIs 60 | 61 | You can import low-level tftf APIs to your own TensorFlow implementations. 62 | 63 | ```python 64 | from tftf.layers import Dense, Activation, NALU 65 | from tftf import initializers as ini 66 | from tftf import activations as act 67 | from tftf import losses as loss 68 | from tftf import optimizers as opt 69 | from tftf.metrics import accuracy, f1 70 | 71 | x = tf.placeholder(tf.float32, shape=[None, 784]) 72 | t = tf.placeholder(tf.float32, shape=[None, 10]) 73 | 74 | # import APIs 75 | W = ini.glorot_normal([784, 200]) # or just write tf.Variable(...) 76 | b = ini.zeros([200]) 77 | h = act.tanh(tf.matmul(x, W) + b) # or just write tf.nn.tanh(...) 78 | 79 | # import Layers 80 | h = Dense(200)(h) 81 | h = Activation('tanh')(h) 82 | h = NALU(200)(h) 83 | 84 | W = ini.glorot_normal([200, 10]) 85 | b = ini.zeros([10]) 86 | y = act.softmax(tf.matmul(h, W) + b) 87 | 88 | cost = loss.categorical_crossentropy(y, t) 89 | train_step = opt.sgd(0.01).minimize(cost) 90 | 91 | # Train 92 | # ... 93 | 94 | preds = y.eval(session=sess, feed_dict={x: test_X}) 95 | acc = accuracy(preds, test_y) 96 | f = f1(preds, test_y) 97 | print('accuracy: {:.3}'.format(acc)) 98 | print('f1: {:.3}'.format(f)) 99 | ``` 100 | -------------------------------------------------------------------------------- /examples/transformer_translation_enja.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from sklearn.model_selection import train_test_split 4 | from tftf.layers import Dense, Activation 5 | from tftf.layers import LSTM, Embedding, TimeDistributedDense, Attention 6 | from tftf.preprocessing.sequence import pad_sequences, sort 7 | from tftf.datasets import load_small_parallel_enja 8 | from tftf.layers.modules import Transformer 9 | 10 | if __name__ == '__main__': 11 | np.random.seed(0) 12 | tf.set_random_seed(123) 13 | 14 | ''' 15 | Load data 16 | ''' 17 | start_char = 1 18 | end_char = 2 19 | (train_X, train_y), (test_X, test_y), (num_X, num_y), \ 20 | (w2i_X, w2i_y), (i2w_X, i2w_y) = load_small_parallel_enja(to_ja=True) 21 | 22 | train_X, train_y = sort(train_X, train_y) 23 | test_X, test_y = sort(test_X, test_y) 24 | 25 | train_size = 50000 # up to 50000 26 | test_size = 100 # up to 500 27 | train_X, train_y = train_X[:train_size], train_y[:train_size] 28 | test_X, test_y = test_X[:test_size], test_y[:test_size] 29 | 30 | ''' 31 | Build model 32 | ''' 33 | pad_value = 0 34 | x = tf.placeholder(tf.int32, [None, None], name='x') 35 | t = tf.placeholder(tf.int32, [None, None], name='t') 36 | 37 | transformer = Transformer(num_X, num_y) 38 | preds = transformer.v1(x, t) 39 | 40 | cost = transformer.loss() 41 | optimizer, lr = transformer.optimizer(cost) 42 | train = transformer.is_training 43 | 44 | ''' 45 | Train model 46 | ''' 47 | epochs = 10 48 | batch_size = 100 49 | 50 | init = tf.global_variables_initializer() 51 | sess = tf.Session() 52 | sess.run(init) 53 | 54 | n_batches = len(train_X) // batch_size 55 | 56 | for epoch in range(epochs): 57 | loss = 0. 58 | for i in range(n_batches): 59 | start = i * batch_size 60 | end = start + batch_size 61 | 62 | _train_X = pad_sequences(train_X[start:end], 63 | value=pad_value) 64 | _train_y = pad_sequences(train_y[start:end], 65 | value=pad_value) 66 | 67 | _, _cost = sess.run([optimizer, cost], feed_dict={ 68 | x: _train_X, 69 | t: _train_y, 70 | lr: transformer.lrate(epoch), 71 | train: True 72 | }) 73 | loss += _cost 74 | 75 | loss /= n_batches 76 | 77 | _test_X = pad_sequences(test_X, value=pad_value) 78 | _test_y = pad_sequences(test_y, value=pad_value) 79 | 80 | val_loss = cost.eval(session=sess, feed_dict={ 81 | x: _test_X, 82 | t: _test_y 83 | }) 84 | 85 | print('epoch: {}, ' 86 | 'loss: {:.3}, ' 87 | 'val_loss: {:.3}'.format(epoch+1, loss, val_loss)) 88 | -------------------------------------------------------------------------------- /tftf/layers/Conv2D.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from .Layer import Layer 4 | 5 | 6 | class Conv2D(Layer): 7 | def __init__(self, 8 | input_dim=None, 9 | kernel_size=(3, 3, 20), 10 | strides=(1, 1), 11 | padding='same', 12 | initializer='glorot_uniform', 13 | regularizer=None, 14 | rng=None): 15 | super().__init__() 16 | 17 | if input_dim is not None and len(input_dim) != 3: 18 | raise ValueError('Dimension of input_dim must be 3.') 19 | 20 | if len(kernel_size) != 3: 21 | raise ValueError('Dimension of kernel_size must be 3.') 22 | 23 | if len(strides) != 2: 24 | raise ValueError('Dimension of strides must be 2.') 25 | 26 | padding = padding.upper() 27 | if padding not in ('VALID', 'SAME'): 28 | raise ValueError('padding must be one of \'valid\' or \'same\'.') 29 | 30 | self.input_dim = input_dim 31 | self.kernel_size = kernel_size 32 | self.strides = strides 33 | self.padding = padding 34 | self.initializer = initializer 35 | self.regularizer = regularizer 36 | 37 | @property 38 | def input_shape(self): 39 | return self.input_dim 40 | 41 | @property 42 | def output_shape(self): 43 | return self.output_dim 44 | 45 | @property 46 | def _strides(self): 47 | return tuple([1] + list(self.strides) + [1]) 48 | 49 | def compile(self): 50 | kernel_shape = \ 51 | self.kernel_size[:2] + (self.input_dim[2], self.kernel_size[2]) 52 | 53 | self.W = self.kernel_initializer(self.initializer, 54 | shape=kernel_shape, 55 | name='W') 56 | 57 | self.params = [self.W] 58 | 59 | if self.regularizer is not None: 60 | self.reg_loss = [self.regularizer(self.W)] 61 | 62 | def forward(self, x, **kwargs): 63 | return tf.nn.conv2d(x, self.W, 64 | strides=self._strides, 65 | padding=self.padding) 66 | 67 | def initialize_output_dim(self): 68 | super().initialize_output_dim() 69 | self.output_dim = self._get_output_shape() 70 | return self.output_dim 71 | 72 | def _get_output_shape(self): 73 | input_shape = self.input_shape 74 | kernel_size = self.kernel_size 75 | strides = self.strides 76 | padding = self.padding 77 | 78 | image_size = input_shape[:2] 79 | channel = kernel_size[2] 80 | 81 | if padding == 'SAME': 82 | return tuple(list(image_size) + [channel]) 83 | else: 84 | return tuple(list(np.ceil((np.array(image_size) 85 | - np.array(kernel_size[:2]) + 1) 86 | / np.array(strides)).astype('int32')) 87 | + [channel]) 88 | -------------------------------------------------------------------------------- /examples/low_level_example.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from sklearn import datasets 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.utils import shuffle 6 | from tftf.layers import Dense, Activation, NALU 7 | from tftf import regularizers as reg 8 | from tftf import initializers as ini 9 | from tftf import activations as act 10 | from tftf import losses as loss 11 | from tftf import optimizers as opt 12 | from tftf.metrics import accuracy, f1 13 | 14 | 15 | if __name__ == '__main__': 16 | np.random.seed(0) 17 | tf.set_random_seed(123) 18 | 19 | ''' 20 | Load data 21 | ''' 22 | mnist = datasets.fetch_mldata('MNIST original', data_home='.') 23 | 24 | n = len(mnist.data) 25 | N = 30000 26 | indices = np.random.permutation(range(n))[:N] 27 | 28 | X = mnist.data[indices] 29 | X = X / 255.0 30 | X = X - X.mean(axis=1).reshape(len(X), 1) 31 | y = mnist.target[indices] 32 | Y = np.eye(10)[y.astype(int)] 33 | 34 | train_X, test_X, train_y, test_y = train_test_split(X, Y) 35 | 36 | ''' 37 | Build model 38 | ''' 39 | n_in = 784 40 | n_hidden = 200 41 | n_out = 10 42 | 43 | x = tf.placeholder(tf.float32, shape=[None, n_in]) 44 | t = tf.placeholder(tf.float32, shape=[None, n_out]) 45 | 46 | W = ini.glorot_normal([n_in, n_hidden], name='W0') 47 | b = ini.zeros([n_hidden], name='b0') 48 | h = act.tanh(tf.matmul(x, W) + b) 49 | 50 | W = ini.glorot_normal([n_hidden, n_hidden], name='W1') 51 | b = ini.zeros([n_hidden], name='b1') 52 | h = act.tanh(tf.matmul(h, W) + b) 53 | 54 | h = NALU(n_hidden)(h) # import from tftf.layers 55 | 56 | W = ini.glorot_normal([n_hidden, n_out], name='W_out') 57 | b = ini.zeros([n_out], name='b_out') 58 | y = act.softmax(tf.matmul(h, W) + b) 59 | 60 | cost = loss.categorical_crossentropy(y, t) 61 | train_step = opt.sgd(0.01).minimize(cost) 62 | 63 | ''' 64 | Train model 65 | ''' 66 | epochs = 10 67 | batch_size = 100 68 | 69 | init = tf.global_variables_initializer() 70 | sess = tf.Session() 71 | sess.run(init) 72 | 73 | n_batches = len(train_X) // batch_size 74 | 75 | for epoch in range(epochs): 76 | _X, _y = shuffle(train_X, train_y) 77 | 78 | for i in range(n_batches): 79 | start = i * batch_size 80 | end = start + batch_size 81 | 82 | sess.run(train_step, feed_dict={ 83 | x: _X[start:end], 84 | t: _y[start:end] 85 | }) 86 | 87 | loss = cost.eval(session=sess, feed_dict={ 88 | x: _X, 89 | t: _y 90 | }) 91 | 92 | preds = y.eval(session=sess, feed_dict={x: _X}) 93 | acc = accuracy(preds, _y) 94 | 95 | print('epoch: {}, loss: {:.3}, acc: {:.3}'.format(epoch, loss, acc)) 96 | 97 | ''' 98 | Test model 99 | ''' 100 | preds = y.eval(session=sess, feed_dict={x: test_X}) 101 | acc = accuracy(preds, test_y) 102 | f = f1(preds, test_y) 103 | print('accuracy: {:.3}'.format(acc)) 104 | print('f1: {:.3}'.format(f)) 105 | -------------------------------------------------------------------------------- /tftf/layers/modules/ResNet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from . import Module 3 | from .. import Activation 4 | from .. import BatchNormalization 5 | from .. import Conv2D 6 | from .. import Dense 7 | from .. import GlobalAveragePooling2D 8 | from .. import MaxPooling2D 9 | 10 | 11 | class ResNet(Module): 12 | ''' 13 | # Example 14 | 15 | ``` 16 | x = tf.placeholder(tf.float32, shape=[None, 224, 224, 3]) 17 | t = tf.placeholder(tf.float32, shape=[None, 10]) 18 | 19 | resnet = ResNet() 20 | h = resnet.v1(x) 21 | h = Activation('relu')(h) 22 | h = Dense(10)(h) 23 | y = Activation('softmax')(h) 24 | 25 | cost = categorical_crossentropy(y, t) 26 | train_step = sgd(0.01).minimize(cost) 27 | ``` 28 | ''' 29 | def __init__(self): 30 | pass 31 | 32 | def v1(self, x, n_out=1000): 33 | ''' 34 | ResNet-34 35 | 36 | # Arguments 37 | x: placeholder 38 | ''' 39 | layers = [ 40 | Conv2D(kernel_size=(7, 7, 64)), 41 | BatchNormalization(), 42 | Activation('relu'), 43 | MaxPooling2D(pool_size=(3, 3), 44 | strides=(2, 2), 45 | padding='same') 46 | ] 47 | for layer in layers: 48 | x = layer(x) 49 | x = self._add_base_block(x, channel_out=64) 50 | x = self._add_base_block(x, channel_out=128) 51 | x = self._add_base_block(x, channel_out=256) 52 | x = self._add_base_block(x, channel_out=512) 53 | x = GlobalAveragePooling2D()(x) 54 | x = Dense(n_out)(x) 55 | 56 | return x 57 | 58 | def _add_base_block(self, x, channel_out=64): 59 | x = Conv2D(kernel_size=(1, 1, channel_out), 60 | strides=(2, 2))(x) 61 | x = self._base_block(x, channel_out=channel_out) 62 | return x 63 | 64 | def _base_block(self, x, channel_out=64): 65 | ''' 66 | # Arguments 67 | x: placeholder 68 | ''' 69 | layers = [ 70 | Conv2D(kernel_size=(3, 3, channel_out)), 71 | BatchNormalization(), 72 | Activation('relu'), 73 | Conv2D(kernel_size=(3, 3, channel_out)), 74 | BatchNormalization() 75 | ] 76 | for layer in layers: 77 | h = layer(x) 78 | shortcut = self._shortcut(x, output_shape=h.get_shape()) 79 | 80 | return Activation('relu')(h + shortcut) 81 | 82 | def _bottleneck(self, x, channel_out=256): 83 | ''' 84 | # Arguments 85 | x: placeholder 86 | ''' 87 | channel = channel_out // 4 88 | layers = [ 89 | Conv2D(kernel_size=(1, 1, channel)), 90 | BatchNormalization(), 91 | Activation('relu'), 92 | Conv2D(kernel_size=(3, 3, channel)), 93 | BatchNormalization(), 94 | Activation('relu'), 95 | Conv2D(kernel_size=(1, 1, channel)), 96 | BatchNormalization() 97 | ] 98 | for layer in layers: 99 | h = layer(x) 100 | shortcut = self._shortcut(x, output_shape=h.get_shape()) 101 | 102 | return Activation('relu')(h + shortcut) 103 | 104 | def _projection(self, x, channel_out): 105 | layer = Conv2D(kernel_size=(1, 1, channel_out)) 106 | return layer(x) 107 | 108 | def _shortcut(self, x, output_shape): 109 | input_shape = x.get_shape() 110 | channel_in = input_shape[-1] 111 | channel_out = output_shape[-1] 112 | 113 | if channel_in != channel_out: 114 | return self._projection(x, channel_out) 115 | else: 116 | return x 117 | -------------------------------------------------------------------------------- /tftf/layers/Attention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from .Layer import Layer 4 | from .initializers import zeros 5 | 6 | 7 | class Attention(Layer): 8 | ''' 9 | Attention Layer for Seq2Seq 10 | "Effective Approaches to Attention-based Neural Machine Translation", 11 | Minh-Thang Luong et al., EMNLP 2015 12 | https://arxiv.org/abs/1508.04025 13 | ''' 14 | def __init__(self, output_dim, 15 | input_dim=None, 16 | initializer='glorot_uniform', 17 | activation='tanh', 18 | state=None): 19 | ''' 20 | # Arguments 21 | input_dim: tuple or list. shape of (encoder_dim, decoder_dim). 22 | state: (default None). Encoder state (output). 23 | shape of (batch_size, len_sequence, encoder_dim) 24 | ''' 25 | super().__init__() 26 | 27 | if type(input_dim) != list and type(input_dim) != tuple: 28 | raise ValueError('`input_dim` must be given as a list or tuple.') 29 | 30 | if len(input_dim) != 2: 31 | raise ValueError('Length of `input_dim` must be 2. ' 32 | 'Not {}.'.format(len(input_dim))) 33 | 34 | if state is None: 35 | raise ValueError('`state` must be given.') 36 | 37 | self.output_dim = output_dim 38 | self.input_dim = input_dim 39 | 40 | self.initializer = initializer 41 | self.activation = \ 42 | self.activation_initializer(activation) 43 | self.state = state 44 | self._use_mask = False 45 | self.mask = None 46 | 47 | @property 48 | def input_shape(self): 49 | return tuple(self.input_dim) 50 | 51 | def compile(self): 52 | input_dim = self.input_dim 53 | output_dim = self.output_dim 54 | initializer = self.initializer 55 | 56 | self.W_a = \ 57 | self.kernel_initializer(initializer, 58 | shape=(input_dim[0], input_dim[1]), 59 | name='W_a') 60 | self.W_c = \ 61 | self.kernel_initializer(initializer, 62 | shape=(input_dim[0], output_dim), 63 | name='W_c') 64 | self.W_h = \ 65 | self.kernel_initializer(initializer, 66 | shape=(input_dim[1], output_dim), 67 | name='W_h') 68 | self.b = zeros((output_dim), name='b') 69 | 70 | self.params = [self.W_a, self.W_c, self.W_h, self.b] 71 | 72 | def forward(self, x, **kwargs): 73 | ''' 74 | # Arguments 75 | mask: Tensor. Mask for padded value. 76 | shape of (batch_size, encoder_dim) 77 | recurrent: boolean (default True). 78 | ''' 79 | if self.mask is None: 80 | self.mask = kwargs['mask'] if 'mask' in kwargs else None 81 | self._use_mask = True if self.mask is not None else False 82 | 83 | recurr = kwargs['recurrent'] if 'recurrent' in kwargs else True 84 | 85 | if recurr: 86 | score = tf.einsum('ijk,ilk->ijl', 87 | x, 88 | tf.einsum('ijk,kl->ijl', self.state, self.W_a)) 89 | if self._use_mask: 90 | score *= self.mask[:, np.newaxis] 91 | 92 | attn = self.attn = tf.nn.softmax(score) 93 | c = tf.einsum('ijk,ikl->ijl', attn, self.state) 94 | 95 | return self.activation(tf.einsum('ijk,kl->ijl', c, self.W_c) 96 | + tf.einsum('ijk,kl->ijl', x, self.W_h) 97 | + self.b) 98 | else: 99 | score = tf.einsum('ij,ikj->ik', 100 | x, 101 | tf.einsum('ijk,kl->ijl', self.state, self.W_a)) 102 | if self._use_mask: 103 | score *= self.mask 104 | 105 | attn = self.attn = tf.nn.softmax(score) 106 | c = tf.einsum('ij,ijk->ik', attn, self.state) 107 | 108 | return self.activation(tf.matmul(c, self.W_c) 109 | + tf.matmul(x, self.W_h) 110 | + self.b) 111 | -------------------------------------------------------------------------------- /tftf/layers/RNN.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from .Layer import Layer 4 | from .initializers import zeros 5 | 6 | 7 | class RNN(Layer): 8 | def __init__(self, output_dim, 9 | input_dim=None, 10 | initializer='glorot_uniform', 11 | recurrent_initializer='orthogonal', 12 | recurrent_activation='tanh', 13 | length_of_sequences=None, 14 | return_sequence=False, 15 | initial_state=None, 16 | rng=None): 17 | super().__init__() 18 | self.output_dim = output_dim 19 | self.input_dim = input_dim 20 | self.initializer = initializer 21 | self.recurrent_initializer = recurrent_initializer 22 | self.recurrent_activation = \ 23 | self.activation_initializer(recurrent_activation) 24 | self._length_of_sequences = length_of_sequences 25 | self._return_sequence = return_sequence 26 | self._initial_state = initial_state 27 | self._use_mask = False 28 | 29 | @property 30 | def input_shape(self): 31 | return (self._length_of_sequences, self.input_dim) 32 | 33 | def compile(self): 34 | input_dim = self.input_dim 35 | output_dim = self.output_dim 36 | initializer = self.initializer 37 | recurrent_initializer = self.recurrent_initializer 38 | 39 | self.W = self.kernel_initializer(initializer, 40 | shape=(input_dim, output_dim), 41 | name='W') 42 | self.W_recurrent = \ 43 | self.kernel_initializer(recurrent_initializer, 44 | shape=(output_dim, output_dim), 45 | name='W_recurrent') 46 | self.b = zeros((output_dim), name='b') 47 | 48 | self.params = [self.W, self.W_recurrent, self.b] 49 | 50 | def forward(self, x, **kwargs): 51 | ''' 52 | # Arguments 53 | mask: Tensor. Mask for padded value. 54 | recurrent: boolean (default True). 55 | Whether to loop the input sequence. 56 | initial_state: (default None). Override self._initial_state. 57 | ''' 58 | def _recurrent(state, elems): 59 | if not self._use_mask: 60 | x = elems 61 | else: 62 | x = elems[0] 63 | mask = elems[1] 64 | h = self.recurrent_activation(tf.matmul(x, self.W) 65 | + tf.matmul(state, self.W_recurrent) 66 | + self.b) 67 | if not self._use_mask: 68 | return h 69 | else: 70 | mask = mask[:, np.newaxis] 71 | return mask * h + (1 - mask) * state 72 | 73 | mask = kwargs['mask'] if 'mask' in kwargs else None 74 | self._use_mask = True if mask is not None else False 75 | 76 | recurr = kwargs['recurrent'] if 'recurrent' in kwargs else True 77 | 78 | if 'initial_state' in kwargs: 79 | initial_state = kwargs['initial_state'] 80 | else: 81 | initial_state = self._initial_state 82 | 83 | if initial_state is None: 84 | initial_state = \ 85 | tf.matmul(x[:, 0, :], 86 | tf.zeros((self.input_dim, self.output_dim))) 87 | 88 | if not recurr: 89 | if mask is None: 90 | states = _recurrent(initial_state, x) 91 | else: 92 | states = _recurrent(initial_state, [x, mask]) 93 | 94 | return states 95 | else: 96 | if mask is None: 97 | states = tf.scan(fn=_recurrent, 98 | elems=tf.transpose(x, perm=[1, 0, 2]), 99 | initializer=initial_state) 100 | else: 101 | mask = tf.transpose(mask) 102 | states = tf.scan(fn=_recurrent, 103 | elems=[tf.transpose(x, perm=[1, 0, 2]), mask], 104 | initializer=initial_state) 105 | 106 | if self._return_sequence is True: 107 | return tf.transpose(states, perm=[1, 0, 2]) 108 | else: 109 | return states[-1] 110 | -------------------------------------------------------------------------------- /tftf/layers/Layer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .activations import * 3 | from .initializers import * 4 | 5 | 6 | class Layer(object): 7 | def __init__(self): 8 | self._input_dim = None 9 | self._output_dim = None 10 | self._input_dtype = tf.float32 11 | self._output_dtype = tf.float32 12 | self._params = [] 13 | self._reg_loss = [] 14 | self._compiled = False 15 | 16 | def __repr__(self): 17 | return '<{}: shape({}, {})>'.format(self.name, 18 | self.input_dim, 19 | self.output_dim) 20 | 21 | def __call__(self, x, **kwargs): 22 | if self.input_dim is None: 23 | input_shape = x.get_shape().as_list() 24 | if len(input_shape) == 2: # genral data 25 | self.input_dim = input_shape[1] 26 | elif len(input_shape) == 3: # sequencial data 27 | self.input_dim = input_shape[-1] 28 | else: # image data 29 | self.input_dim = tuple(input_shape[1:]) 30 | 31 | # OPTIMIZE 32 | if len(self.params) == 0: 33 | self.compile() 34 | x = self.forward(x, **kwargs) 35 | 36 | if self.output_dim is None: 37 | output_shape = x.get_shape().as_list() 38 | if len(output_shape) == 2: 39 | self.output_dim = output_shape[1] 40 | else: 41 | self.output_dim = tuple(output_shape[1:]) 42 | 43 | return x 44 | 45 | @property 46 | def name(self): 47 | return self.__class__.__name__ 48 | 49 | @property 50 | def shape(self): 51 | return (self.input_dim, self.output_dim) 52 | 53 | @property 54 | def input_dim(self): 55 | return self._input_dim 56 | 57 | @input_dim.setter 58 | def input_dim(self, val): 59 | self._input_dim = val 60 | 61 | @property 62 | def input_shape(self): 63 | return (self.input_dim,) 64 | 65 | @property 66 | def input_dtype(self): 67 | return self._input_dtype 68 | 69 | @property 70 | def output_dim(self): 71 | return self._output_dim 72 | 73 | @output_dim.setter 74 | def output_dim(self, val): 75 | self._output_dim = val 76 | 77 | @property 78 | def output_shape(self): 79 | return (self.output_dim,) 80 | 81 | @property 82 | def output_dtype(self): 83 | return self._output_dtype 84 | 85 | @property 86 | def params(self): 87 | return self._params 88 | 89 | @params.setter 90 | def params(self, val): 91 | if type(val) != list: 92 | raise AttributeError('type of params must be \'list\', ' 93 | 'not \'{}\'.'.format(type(val).__name__)) 94 | self._params = val 95 | 96 | @property 97 | def reg_loss(self): 98 | return self._reg_loss 99 | 100 | @reg_loss.setter 101 | def reg_loss(self, val): 102 | if type(val) != list: 103 | raise AttributeError('type of reg_loss must be \'list\', ' 104 | 'not \'{}\'.'.format(type(val).__name__)) 105 | self._reg_loss = val 106 | 107 | def activation_initializer(self, activation): 108 | activations = { 109 | 'elu': elu, 110 | 'hard_sigmoid': hard_sigmoid, 111 | 'leaky_relu': leaky_relu, 112 | 'linear': linear, 113 | # 'prelu': prelu, 114 | 'relu': relu, 115 | 'selu': selu, 116 | 'sigmoid': sigmoid, 117 | 'softmax': softmax, 118 | 'swish': swish, 119 | 'tanh': tanh 120 | } 121 | if activation in activations: 122 | activation = activations[activation] 123 | 124 | return activation 125 | 126 | def compile(self): 127 | raise NotImplementedError() 128 | 129 | def forward(self, x, **kwargs): 130 | raise NotImplementedError() 131 | 132 | def initialize_output_dim(self): 133 | if self.input_dim is None: 134 | raise ValueError('input_dim not definfed.') 135 | 136 | self.output_dim = self.input_dim 137 | return self.output_dim 138 | 139 | def kernel_initializer(self, initializer, shape, name=None): 140 | initializers = { 141 | 'glorot_normal': glorot_normal, 142 | 'glorot_uniform': glorot_uniform, 143 | 'normal': normal, 144 | 'ones': ones, 145 | 'orthogonal': orthogonal, 146 | 'zeros': zeros 147 | } 148 | 149 | if initializer in initializers: 150 | initializer = initializers[initializer] 151 | 152 | return initializer(shape, name=name) 153 | -------------------------------------------------------------------------------- /tftf/datasets/small_parallel_enja.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import numpy as np 4 | from .Dataset import Dataset 5 | 6 | 7 | ''' 8 | Download 50k En/Ja Parallel Corpus 9 | from https://github.com/odashi/small_parallel_enja 10 | and transform words to IDs. 11 | ''' 12 | 13 | 14 | def load_small_parallel_enja(path=None, 15 | to_ja=True, 16 | pad_value=0, 17 | start_char=1, 18 | end_char=2, 19 | oov_char=3, 20 | index_from=4, 21 | pad='', 22 | bos='', 23 | eos='', 24 | oov=''): 25 | url_base = 'https://raw.githubusercontent.com/' \ 26 | 'odashi/small_parallel_enja/master/' 27 | 28 | path = path or 'small_parallel_enja' 29 | dir_path = os.path.join(os.path.expanduser('~'), 30 | '.tftf', 'datasets', path) 31 | if not os.path.exists(dir_path): 32 | os.makedirs(dir_path) 33 | 34 | f_ja = ['train.ja', 'test.ja'] 35 | f_en = ['train.en', 'test.en'] 36 | 37 | for f in (f_ja + f_en): 38 | f_path = os.path.join(dir_path, f) 39 | if not os.path.exists(f_path): 40 | url = url_base + f 41 | print('Downloading {}'.format(f)) 42 | cmd = ['curl', '-o', f_path, url] 43 | subprocess.call(cmd) 44 | 45 | f_train_ja = os.path.join(dir_path, f_ja[0]) 46 | f_test_ja = os.path.join(dir_path, f_ja[1]) 47 | f_train_en = os.path.join(dir_path, f_en[0]) 48 | f_test_en = os.path.join(dir_path, f_en[1]) 49 | 50 | (train_ja, test_ja), num_words_ja, (w2i_ja, i2w_ja) = \ 51 | _build(f_train_ja, f_test_ja, 52 | pad_value, start_char, end_char, oov_char, index_from, 53 | pad, bos, eos, oov) 54 | (train_en, test_en), num_words_en, (w2i_en, i2w_en) = \ 55 | _build(f_train_en, f_test_en, 56 | pad_value, start_char, end_char, oov_char, index_from, 57 | pad, bos, eos, oov) 58 | 59 | if to_ja: 60 | train_X, test_X, num_X, w2i_X, i2w_X = \ 61 | train_en, test_en, num_words_en, w2i_en, i2w_en 62 | train_y, test_y, num_y, w2i_y, i2w_y = \ 63 | train_ja, test_ja, num_words_ja, w2i_ja, i2w_ja 64 | else: 65 | train_X, test_X, num_X, w2i_X, i2w_X = \ 66 | train_ja, test_ja, num_words_ja, w2i_ja, i2w_ja 67 | train_y, test_y, num_y, w2i_y, i2w_y = \ 68 | train_en, test_en, num_words_en, w2i_en, i2w_en 69 | 70 | train_X, test_X = np.array(train_X), np.array(test_X) 71 | train_y, test_y = np.array(train_y), np.array(test_y) 72 | 73 | return (train_X, train_y), (test_X, test_y), \ 74 | (num_X, num_y), (w2i_X, w2i_y), (i2w_X, i2w_y) 75 | 76 | 77 | def _build(f_train, f_test, 78 | pad_value=0, 79 | start_char=1, 80 | end_char=2, 81 | oov_char=3, 82 | index_from=4, 83 | pad='', 84 | bos='', 85 | eos='', 86 | oov=''): 87 | 88 | builder = _Builder(pad_value=pad_value, 89 | start_char=start_char, 90 | end_char=end_char, 91 | oov_char=oov_char, 92 | index_from=index_from, 93 | pad=pad, 94 | bos=bos, 95 | eos=eos, 96 | oov=oov) 97 | builder.fit(f_train) 98 | train = builder.transform(f_train) 99 | test = builder.transform(f_test) 100 | 101 | return (train, test), builder.num_words, (builder.w2i, builder.i2w) 102 | 103 | 104 | class _Builder(object): 105 | def __init__(self, 106 | pad_value=0, 107 | start_char=1, 108 | end_char=2, 109 | oov_char=3, 110 | index_from=4, 111 | pad='', 112 | bos='', 113 | eos='', 114 | oov=''): 115 | self._vocab = None 116 | self._w2i = None 117 | self._i2w = None 118 | 119 | self.pad_value = pad_value 120 | self.start_char = start_char 121 | self.end_char = end_char 122 | self.oov_char = oov_char 123 | self.index_from = index_from 124 | self.pad = pad 125 | self.bos = bos 126 | self.eos = eos 127 | self.oov = oov 128 | 129 | @property 130 | def num_words(self): 131 | return max(self._w2i.values()) + 1 132 | 133 | @property 134 | def w2i(self): 135 | ''' 136 | Dict of word to index 137 | ''' 138 | return self._w2i 139 | 140 | @property 141 | def i2w(self): 142 | ''' 143 | Dict of index to word 144 | ''' 145 | return self._i2w 146 | 147 | def fit(self, f_path): 148 | self._vocab = set() 149 | self._w2i = {} 150 | for line in open(f_path, encoding='utf-8'): 151 | _sentence = line.strip().split() 152 | self._vocab.update(_sentence) 153 | 154 | self._w2i = {w: (i + self.index_from) 155 | for i, w in enumerate(self._vocab)} 156 | if self.pad_value >= 0: 157 | self._w2i[self.pad] = self.pad_value 158 | self._w2i[self.bos] = self.start_char 159 | self._w2i[self.eos] = self.end_char 160 | self._w2i[self.oov] = self.oov_char 161 | self._i2w = {i: w for w, i in self._w2i.items()} 162 | 163 | def transform(self, f_path): 164 | if self._vocab is None or self._w2i is None: 165 | raise AttributeError('`{}.fit` must be called before `transform`.' 166 | ''.format(self.__class__.__name__)) 167 | sentences = [] 168 | for line in open(f_path, encoding='utf-8'): 169 | _sentence = line.strip().split() 170 | _sentence = [self.bos] + _sentence + [self.eos] 171 | sentences.append(self._encode(_sentence)) 172 | return sentences 173 | 174 | def _encode(self, sentence): 175 | encoded = [] 176 | for w in sentence: 177 | if w not in self._w2i: 178 | id = self.oov_char 179 | else: 180 | id = self._w2i[w] 181 | encoded.append(id) 182 | 183 | return encoded 184 | -------------------------------------------------------------------------------- /examples/lstm_translation_enja.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from sklearn.model_selection import train_test_split 4 | from tftf.layers import Dense, Activation 5 | from tftf.layers import LSTM, Embedding, TimeDistributedDense 6 | from tftf.preprocessing.sequence import pad_sequences, sort 7 | from tftf.datasets import load_small_parallel_enja 8 | from tftf import losses as loss 9 | from tftf import optimizers as opt 10 | 11 | if __name__ == '__main__': 12 | np.random.seed(0) 13 | tf.set_random_seed(123) 14 | 15 | ''' 16 | Load data 17 | ''' 18 | end_char = 2 19 | (train_X, train_y), (test_X, test_y), (num_X, num_y), \ 20 | (w2i_X, w2i_y), (i2w_X, i2w_y) = load_small_parallel_enja(to_ja=True) 21 | 22 | train_X, train_y = sort(train_X, train_y) 23 | test_X, test_y = sort(test_X, test_y) 24 | 25 | train_size = 50000 # up to 50000 26 | test_size = 500 # up to 500 27 | train_X, train_y = train_X[:train_size], train_y[:train_size] 28 | test_X, test_y = test_X[:test_size], test_y[:test_size] 29 | 30 | ''' 31 | Build model 32 | ''' 33 | pad_value = 0 34 | x = tf.placeholder(tf.int32, [None, None], name='x') 35 | t = tf.placeholder(tf.int32, [None, None], name='t') 36 | target = tf.one_hot(t[:, 1:], depth=num_y, dtype=tf.float32) 37 | mask_enc = tf.cast(tf.not_equal(x, pad_value), tf.float32) 38 | mask_dec = tf.cast(tf.not_equal(t[:, 1:], pad_value), tf.float32) 39 | 40 | encoder = [ 41 | Embedding(128, input_dim=num_X), 42 | LSTM(128, return_sequence=True, return_cell=True) 43 | ] 44 | 45 | h = x 46 | for layer in encoder: 47 | h = layer(h, mask=mask_enc) 48 | encoder_output, encoder_cell = h 49 | 50 | decoder = [ 51 | [ 52 | Embedding(128, input_dim=num_y), 53 | LSTM(128, return_sequence=True, return_cell=True, 54 | initial_state=encoder_output[:, -1, :], 55 | cell_state=encoder_cell[:, -1, :]) 56 | ], 57 | [ 58 | TimeDistributedDense(num_y), 59 | Activation('softmax') 60 | ] 61 | ] 62 | 63 | h = t[:, :-1] 64 | for layer in decoder[0]: 65 | h = layer(h) 66 | decoder_output, _ = h 67 | 68 | output = decoder_output 69 | for layer in decoder[1]: 70 | output = layer(output) 71 | 72 | cost = \ 73 | loss.categorical_crossentropy(output, 74 | target 75 | * tf.transpose(mask_dec[:, np.newaxis], 76 | perm=[0, 2, 1])) 77 | train_step = opt.adam().minimize(cost) 78 | 79 | ''' 80 | Train model 81 | ''' 82 | epochs = 25 83 | batch_size = 50 84 | 85 | init = tf.global_variables_initializer() 86 | sess = tf.Session() 87 | sess.run(init) 88 | 89 | n_batches = len(train_X) // batch_size 90 | 91 | for epoch in range(epochs): 92 | loss = 0. 93 | for i in range(n_batches): 94 | start = i * batch_size 95 | end = start + batch_size 96 | 97 | _train_X = pad_sequences(train_X[start:end], 98 | value=pad_value) 99 | _train_y = pad_sequences(train_y[start:end], 100 | value=pad_value) 101 | 102 | _, _cost = sess.run([train_step, cost], feed_dict={ 103 | x: _train_X, 104 | t: _train_y 105 | }) 106 | loss += _cost 107 | 108 | loss /= n_batches 109 | 110 | _test_X = pad_sequences(test_X, value=pad_value) 111 | _test_y = pad_sequences(test_y, value=pad_value) 112 | 113 | val_loss = cost.eval(session=sess, feed_dict={ 114 | x: _test_X, 115 | t: _test_y 116 | }) 117 | 118 | print('epoch: {}, ' 119 | 'loss: {:.3}, ' 120 | 'val_loss: {:.3}'.format(epoch+1, loss, val_loss)) 121 | 122 | ''' 123 | Generate sentences 124 | ''' 125 | initial = { 126 | 'y': tf.placeholder(tf.int32, [None, None]), 127 | 'state': tf.placeholder(tf.float32, [None, None]), 128 | 'cell_state': tf.placeholder(tf.float32, [None, None]), 129 | 'step': tf.constant(0) 130 | } 131 | initial['flg'] = tf.cast(tf.zeros_like(initial['y'][:, 0]), dtype=tf.bool) 132 | maxlen = 100 133 | 134 | def cond(y, state, cell_state, step, flg): 135 | n_flg = tf.reduce_sum(tf.cast(flg, tf.int32)) 136 | next = \ 137 | tf.not_equal(n_flg, 138 | tf.reduce_sum(tf.ones_like(initial['flg'], 139 | dtype=tf.int32))) 140 | return tf.logical_and(step+1 < maxlen, next) 141 | 142 | def body(y, state, cell_state, step, flg): 143 | h = y[:, -1] 144 | for layer in decoder[0]: 145 | h = layer(h, 146 | recurrent=False, 147 | initial_state=state, 148 | cell_state=cell_state) 149 | decoder_output, decoder_cell = h 150 | 151 | output = decoder_output 152 | for layer in decoder[1]: 153 | output = layer(output, recurrent=False) 154 | output = tf.cast(tf.argmax(output, axis=1), tf.int32) 155 | y = tf.concat([y, output[:, np.newaxis]], axis=1) 156 | flg = tf.logical_or(flg, tf.equal(output, end_char)) 157 | 158 | return [y, 159 | decoder_output, 160 | decoder_cell, 161 | step+1, 162 | flg] 163 | generator = \ 164 | tf.while_loop(cond, 165 | body, 166 | loop_vars=[initial['y'], 167 | initial['state'], 168 | initial['cell_state'], 169 | initial['step'], 170 | initial['flg']], 171 | shape_invariants=[tf.TensorShape([None, None]), 172 | tf.TensorShape([None, None]), 173 | tf.TensorShape([None, None]), 174 | initial['step'].get_shape(), 175 | tf.TensorShape([None])]) 176 | 177 | test_X_ = pad_sequences(test_X, value=pad_value) 178 | init_y = np.zeros_like(test_X, dtype='int32')[:, np.newaxis] 179 | state, cell_state = \ 180 | sess.run([encoder_output, encoder_cell], feed_dict={ 181 | x: test_X_ 182 | }) 183 | init_state = state[:, -1, :] 184 | init_cell_state = cell_state[:, -1, :] 185 | 186 | preds, _, _, _, _ = sess.run(generator, feed_dict={ 187 | initial['y']: init_y, 188 | initial['state']: init_state, 189 | initial['cell_state']: init_cell_state 190 | }) 191 | 192 | for n in range(len(test_X)): 193 | data = test_X[n][1:-1] 194 | target = test_y[n][1:-1] 195 | pred = list(preds[n])[1:] 196 | pred.append(end_char) 197 | 198 | print('-' * 20) 199 | print('Original sentence:', 200 | ' '.join([i2w_X[i] for i in data])) 201 | print('True sentence:', 202 | ' '.join([i2w_y[i] for i in target])) 203 | print('Generated sentence:', 204 | ' '.join([i2w_y[i] for i in pred[:pred.index(end_char)]])) 205 | -------------------------------------------------------------------------------- /examples/attention_translation_enja.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from sklearn.model_selection import train_test_split 4 | from tftf.layers import Dense, Activation 5 | from tftf.layers import LSTM, Embedding, TimeDistributedDense, Attention 6 | from tftf.preprocessing.sequence import pad_sequences, sort 7 | from tftf.datasets import load_small_parallel_enja 8 | from tftf import losses as loss 9 | from tftf import optimizers as opt 10 | 11 | if __name__ == '__main__': 12 | np.random.seed(0) 13 | tf.set_random_seed(123) 14 | 15 | ''' 16 | Load data 17 | ''' 18 | end_char = 2 19 | (train_X, train_y), (test_X, test_y), (num_X, num_y), \ 20 | (w2i_X, w2i_y), (i2w_X, i2w_y) = load_small_parallel_enja(to_ja=True) 21 | 22 | train_X, train_y = sort(train_X, train_y) 23 | test_X, test_y = sort(test_X, test_y) 24 | 25 | train_size = 50000 # up to 50000 26 | test_size = 500 # up to 500 27 | train_X, train_y = train_X[:train_size], train_y[:train_size] 28 | test_X, test_y = test_X[:test_size], test_y[:test_size] 29 | 30 | ''' 31 | Build model 32 | ''' 33 | pad_value = 0 34 | x = tf.placeholder(tf.int32, [None, None], name='x') 35 | t = tf.placeholder(tf.int32, [None, None], name='t') 36 | target = tf.one_hot(t[:, 1:], depth=num_y, dtype=tf.float32) 37 | mask_enc = tf.cast(tf.not_equal(x, pad_value), tf.float32) 38 | mask_dec = tf.cast(tf.not_equal(t[:, 1:], pad_value), tf.float32) 39 | mask_attn = \ 40 | tf.where(condition=tf.equal(x, pad_value), 41 | x=tf.ones_like(x, dtype=tf.float32) * np.float32(-1e+10), 42 | y=tf.ones_like(x, dtype=tf.float32)) 43 | 44 | encoder = [ 45 | Embedding(128, input_dim=num_X), 46 | LSTM(128, return_sequence=True, return_cell=True) 47 | ] 48 | 49 | h = x 50 | for layer in encoder: 51 | h = layer(h, mask=mask_enc) 52 | encoder_output, encoder_cell = h 53 | 54 | decoder = [ 55 | [ 56 | Embedding(128, input_dim=num_y), 57 | LSTM(128, return_sequence=True, return_cell=True, 58 | initial_state=encoder_output[:, -1, :], 59 | cell_state=encoder_cell[:, -1, :]) 60 | ], 61 | [ 62 | Attention(128, input_dim=(128, 128), state=encoder_output), 63 | TimeDistributedDense(num_y), 64 | Activation('softmax') 65 | ] 66 | ] 67 | 68 | h = t[:, :-1] 69 | for layer in decoder[0]: 70 | h = layer(h) 71 | decoder_output, _ = h 72 | 73 | output = decoder_output 74 | for layer in decoder[1]: 75 | output = layer(output, mask=mask_attn) 76 | 77 | cost = \ 78 | loss.categorical_crossentropy(output, 79 | target 80 | * tf.transpose(mask_dec[:, np.newaxis], 81 | perm=[0, 2, 1])) 82 | train_step = opt.adam().minimize(cost) 83 | 84 | ''' 85 | Train model 86 | ''' 87 | epochs = 25 88 | batch_size = 50 89 | 90 | init = tf.global_variables_initializer() 91 | sess = tf.Session() 92 | sess.run(init) 93 | 94 | n_batches = len(train_X) // batch_size 95 | 96 | for epoch in range(epochs): 97 | loss = 0. 98 | for i in range(n_batches): 99 | start = i * batch_size 100 | end = start + batch_size 101 | 102 | _train_X = pad_sequences(train_X[start:end], 103 | value=pad_value) 104 | _train_y = pad_sequences(train_y[start:end], 105 | value=pad_value) 106 | 107 | _, _cost = sess.run([train_step, cost], feed_dict={ 108 | x: _train_X, 109 | t: _train_y 110 | }) 111 | loss += _cost 112 | 113 | loss /= n_batches 114 | 115 | _test_X = pad_sequences(test_X, value=pad_value) 116 | _test_y = pad_sequences(test_y, value=pad_value) 117 | 118 | val_loss = cost.eval(session=sess, feed_dict={ 119 | x: _test_X, 120 | t: _test_y 121 | }) 122 | 123 | print('epoch: {}, ' 124 | 'loss: {:.3}, ' 125 | 'val_loss: {:.3}'.format(epoch+1, loss, val_loss)) 126 | 127 | ''' 128 | Generate sentences 129 | ''' 130 | initial = { 131 | 'y': tf.placeholder(tf.int32, [None, None]), 132 | 'state': tf.placeholder(tf.float32, [None, None]), 133 | 'cell_state': tf.placeholder(tf.float32, [None, None]), 134 | 'attn': tf.zeros_like(encoder_output[:, :, :1], dtype=tf.float32), 135 | 'step': tf.constant(0) 136 | } 137 | initial['flg'] = tf.cast(tf.zeros_like(initial['y'][:, 0]), dtype=tf.bool) 138 | maxlen = 100 139 | 140 | def cond(y, state, cell_state, attn, step, flg): 141 | n_flg = tf.reduce_sum(tf.cast(flg, tf.int32)) 142 | next = \ 143 | tf.not_equal(n_flg, 144 | tf.reduce_sum(tf.ones_like(initial['flg'], 145 | dtype=tf.int32))) 146 | return tf.logical_and(step+1 < maxlen, next) 147 | 148 | def body(y, state, cell_state, attn, step, flg): 149 | h = y[:, -1] 150 | for layer in decoder[0]: 151 | h = layer(h, 152 | recurrent=False, 153 | initial_state=state, 154 | cell_state=cell_state) 155 | decoder_output, decoder_cell = h 156 | 157 | output = decoder_output 158 | for layer in decoder[1]: 159 | output = layer(output, recurrent=False) 160 | output = tf.cast(tf.argmax(output, axis=1), tf.int32) 161 | y = tf.concat([y, output[:, np.newaxis]], axis=1) 162 | a = decoder[1][0].attn 163 | attn = tf.concat([attn, a[:, :, np.newaxis]], axis=2) 164 | flg = tf.logical_or(flg, tf.equal(output, end_char)) 165 | 166 | return [y, 167 | decoder_output, 168 | decoder_cell, 169 | attn, 170 | step+1, 171 | flg] 172 | generator = \ 173 | tf.while_loop(cond, 174 | body, 175 | loop_vars=[initial['y'], 176 | initial['state'], 177 | initial['cell_state'], 178 | initial['attn'], 179 | initial['step'], 180 | initial['flg']], 181 | shape_invariants=[tf.TensorShape([None, None]), 182 | tf.TensorShape([None, None]), 183 | tf.TensorShape([None, None]), 184 | tf.TensorShape([None, None, None]), 185 | initial['step'].get_shape(), 186 | tf.TensorShape([None])]) 187 | 188 | test_X_ = pad_sequences(test_X, value=pad_value) 189 | init_y = np.zeros_like(test_X, dtype='int32')[:, np.newaxis] 190 | state, cell_state, mask = \ 191 | sess.run([encoder_output, encoder_cell, mask_attn], feed_dict={ 192 | x: test_X_ 193 | }) 194 | init_state = state[:, -1, :] 195 | init_cell_state = cell_state[:, -1, :] 196 | 197 | preds, _, _, _, _, _ = sess.run(generator, feed_dict={ 198 | decoder[1][0].mask: mask, 199 | decoder[1][0].state: state, 200 | initial['y']: init_y, 201 | initial['state']: init_state, 202 | initial['cell_state']: init_cell_state 203 | }) 204 | 205 | for n in range(len(test_X)): 206 | data = test_X[n][1:-1] 207 | target = test_y[n][1:-1] 208 | pred = list(preds[n])[1:] 209 | pred.append(end_char) 210 | 211 | print('-' * 20) 212 | print('Original sentence:', 213 | ' '.join([i2w_X[i] for i in data])) 214 | print('True sentence:', 215 | ' '.join([i2w_y[i] for i in target])) 216 | print('Generated sentence:', 217 | ' '.join([i2w_y[i] for i in pred[:pred.index(end_char)]])) 218 | -------------------------------------------------------------------------------- /tftf/layers/LSTM.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from .Layer import Layer 4 | from .initializers import zeros 5 | 6 | 7 | class LSTM(Layer): 8 | def __init__(self, output_dim, 9 | input_dim=None, 10 | initializer='glorot_uniform', 11 | activation='tanh', 12 | recurrent_initializer='orthogonal', 13 | recurrent_activation='sigmoid', 14 | length_of_sequences=None, 15 | return_sequence=False, 16 | return_cell=False, 17 | initial_state=None, 18 | cell_state=None, 19 | rng=None): 20 | super().__init__() 21 | self.output_dim = output_dim 22 | self.input_dim = input_dim 23 | 24 | self.initializer = initializer 25 | self.recurrent_initializer = recurrent_initializer 26 | self.activation = \ 27 | self.activation_initializer(activation) 28 | self.recurrent_activation = \ 29 | self.activation_initializer(recurrent_activation) 30 | self._length_of_sequences = length_of_sequences 31 | self._return_sequence = return_sequence 32 | self._return_cell = return_cell 33 | self._initial_state = initial_state 34 | self._cell_state = cell_state 35 | self._use_mask = False 36 | 37 | @property 38 | def input_shape(self): 39 | return (self._length_of_sequences, self.input_dim) 40 | 41 | def compile(self): 42 | input_dim = self.input_dim 43 | output_dim = self.output_dim 44 | initializer = self.initializer 45 | recurrent_initializer = self.recurrent_initializer 46 | 47 | self.W_c = \ 48 | self.kernel_initializer(initializer, 49 | shape=(input_dim, output_dim), 50 | name='W_c') 51 | self.W_i = \ 52 | self.kernel_initializer(initializer, 53 | shape=(input_dim, output_dim), 54 | name='W_i') 55 | self.W_f = \ 56 | self.kernel_initializer(initializer, 57 | shape=(input_dim, output_dim), 58 | name='W_f') 59 | self.W_o = \ 60 | self.kernel_initializer(initializer, 61 | shape=(input_dim, output_dim), 62 | name='W_o') 63 | self.W_recurrent_c = \ 64 | self.kernel_initializer(recurrent_initializer, 65 | shape=(output_dim, output_dim), 66 | name='W_recurrent_c') 67 | self.W_recurrent_i = \ 68 | self.kernel_initializer(recurrent_initializer, 69 | shape=(output_dim, output_dim), 70 | name='W_recurrent_i') 71 | self.W_recurrent_f = \ 72 | self.kernel_initializer(recurrent_initializer, 73 | shape=(output_dim, output_dim), 74 | name='W_recurrent_f') 75 | self.W_recurrent_o = \ 76 | self.kernel_initializer(recurrent_initializer, 77 | shape=(output_dim, output_dim), 78 | name='W_recurrent_o') 79 | 80 | self.b_c = zeros((output_dim), name='b_c') 81 | self.b_i = zeros((output_dim), name='b_i') 82 | self.b_f = zeros((output_dim), name='b_f') 83 | self.b_o = zeros((output_dim), name='b_o') 84 | 85 | self.params = [self.W_c, self.W_i, self.W_f, self.W_o, 86 | self.W_recurrent_c, self.W_recurrent_i, 87 | self.W_recurrent_f, self.W_recurrent_o, 88 | self.b_c, self.b_i, self.b_f, self.b_o] 89 | 90 | def forward(self, x, **kwargs): 91 | ''' 92 | # Arguments 93 | mask: Tensor. Mask for padded value. 94 | recurrent: boolean (default True). 95 | Whether to loop the input sequence. 96 | initial_state: (default None). Override self._initial_state. 97 | cell_state: (default None). Override self._cell_state. 98 | ''' 99 | def _recurrent(state, elems): 100 | if not self._use_mask: 101 | x = elems 102 | else: 103 | x = elems[0] 104 | mask = elems[1] 105 | 106 | a = activation(tf.matmul(x, self.W_c) 107 | + tf.matmul(state[0], self.W_recurrent_c) 108 | + self.b_c) 109 | i = recurrent_activation(tf.matmul(x, self.W_i) 110 | + tf.matmul(state[0], self.W_recurrent_i) 111 | + self.b_i) 112 | f = recurrent_activation(tf.matmul(x, self.W_f) 113 | + tf.matmul(state[0], self.W_recurrent_f) 114 | + self.b_f) 115 | o = recurrent_activation(tf.matmul(x, self.W_o) 116 | + tf.matmul(state[0], self.W_recurrent_o) 117 | + self.b_o) 118 | 119 | cell = i * a + f * state[1] 120 | h = o * activation(cell) 121 | 122 | if not self._use_mask: 123 | return [h, cell] 124 | else: 125 | mask = mask[:, np.newaxis] 126 | cell = mask * cell + (1 - mask) * state[1] 127 | h = mask * h + (1 - mask) * state[0] 128 | return [h, cell] 129 | 130 | activation = self.activation 131 | recurrent_activation = self.recurrent_activation 132 | 133 | mask = kwargs['mask'] if 'mask' in kwargs else None 134 | self._use_mask = True if mask is not None else False 135 | 136 | recurr = kwargs['recurrent'] if 'recurrent' in kwargs else True 137 | 138 | if 'initial_state' in kwargs: 139 | initial_state = kwargs['initial_state'] 140 | else: 141 | initial_state = self._initial_state 142 | 143 | if 'cell_state' in kwargs: 144 | cell_state = kwargs['cell_state'] 145 | else: 146 | cell_state = self._cell_state 147 | 148 | if initial_state is None: 149 | initial_state = \ 150 | tf.matmul(x[:, 0, :], 151 | tf.zeros((self.input_dim, self.output_dim))) 152 | 153 | if cell_state is None: 154 | cell_state = \ 155 | tf.matmul(x[:, 0, :], 156 | tf.zeros((self.input_dim, self.output_dim))) 157 | 158 | if not recurr: 159 | if mask is None: 160 | states, cell = _recurrent([initial_state, cell_state], x) 161 | else: 162 | states, cell = _recurrent([initial_state, cell_state], 163 | [x, mask]) 164 | if self._return_cell: 165 | return (states, cell) 166 | else: 167 | return states 168 | else: 169 | if mask is None: 170 | states, cell = \ 171 | tf.scan(fn=_recurrent, 172 | elems=tf.transpose(x, perm=[1, 0, 2]), 173 | initializer=[initial_state, cell_state]) 174 | else: 175 | mask = tf.transpose(mask) 176 | states, cell = \ 177 | tf.scan(fn=_recurrent, 178 | elems=[tf.transpose(x, 179 | perm=[1, 0, 2]), mask], 180 | initializer=[initial_state, cell_state]) 181 | 182 | if self._return_sequence: 183 | states = tf.transpose(states, perm=[1, 0, 2]) 184 | cell = tf.transpose(cell, perm=[1, 0, 2]) 185 | else: 186 | states = states[-1] 187 | cell = cell[-1] 188 | 189 | if self._return_cell: 190 | return (states, cell) 191 | else: 192 | return states 193 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /tftf/models/Model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import tensorflow as tf 4 | from sklearn.utils import shuffle as shuffle_data 5 | from ..layers import Dense 6 | from .callbacks import EarlyStopping 7 | from .losses import * 8 | from .metrics import * 9 | from .optimizers import * 10 | 11 | 12 | class Model(object): 13 | def __init__(self, 14 | name='model', 15 | reset_graph=True): 16 | if reset_graph: 17 | tf.reset_default_graph() 18 | 19 | self._name = name if name is not None else '' 20 | self._layers = [] 21 | self._shapes = [] 22 | self._sess = None 23 | self._restored = False 24 | 25 | def __del__(self): 26 | if self._sess is not None: 27 | self._sess.close() 28 | 29 | @property 30 | def layers(self): 31 | return self._layers 32 | 33 | def add(self, layer): 34 | input_dim = layer.input_dim 35 | output_dim = layer.output_dim 36 | 37 | if input_dim is None: 38 | if len(self.layers) == 0: 39 | raise AttributeError('input_dim must be specified ' 40 | 'on first layer.') 41 | else: 42 | layer.input_dim = self._shapes[-1][1] 43 | 44 | if output_dim is None: 45 | layer.initialize_output_dim() 46 | 47 | self._shapes.append(layer.shape) 48 | self._layers.append(layer) 49 | 50 | def compile(self, loss='mse', optimizer='rmsprop', 51 | variable_input=False, 52 | use_mask=False, 53 | pad_value=0): 54 | if not self._restored: 55 | self._compile_layers() 56 | 57 | if not variable_input: 58 | input_shape = [None] + list(self.layers[0].input_shape) 59 | else: 60 | input_shape = [None] + [None] * len(self.layers[0].input_shape) 61 | output_shape = [None] + list(self.layers[-1].output_shape) 62 | 63 | x = self.data = \ 64 | tf.placeholder(self.layers[0].input_dtype, 65 | shape=input_shape, name='x') 66 | t = self.target = \ 67 | tf.placeholder(self.layers[-1].output_dtype, 68 | shape=output_shape, name='t') 69 | 70 | if use_mask: 71 | mask = tf.cast(tf.not_equal(x, pad_value), tf.float32) 72 | else: 73 | mask = None 74 | 75 | training = self.training = \ 76 | tf.placeholder_with_default(False, ()) 77 | 78 | y = self._y = self._predict(x, training=training, mask=mask) 79 | self._loss = self._compile_loss(loss, y, t) 80 | self._train_step = \ 81 | self._optimize(optimizer).minimize(self._loss) 82 | 83 | if not self._restored: 84 | self._sess = tf.Session() 85 | self._init = tf.global_variables_initializer() 86 | self._sess.run(self._init) 87 | else: 88 | uninitialized_variables = [ 89 | var for var in tf.global_variables() 90 | if var.name.split(':')[0].encode() 91 | in set(self._sess.run(tf.report_uninitialized_variables())) 92 | ] 93 | self._sess.run(tf.variables_initializer(uninitialized_variables)) 94 | 95 | def describe(self): 96 | layers = self.layers 97 | digits = int(np.log10(len(layers))) + 1 98 | for i, layer in enumerate(layers): 99 | print('#{}: {}'.format(str(i).zfill(digits), layer)) 100 | 101 | def describe_params(self): 102 | layers = self.layers 103 | digits = int(np.log10(len(layers))) + 1 104 | for i, layer in enumerate(layers): 105 | _params = layer.params 106 | print('-' * 48) 107 | print('#{}: {}'.format(str(i).zfill(digits), layer)) 108 | print('-' * 48) 109 | if len(_params) == 0: 110 | print('No params') 111 | else: 112 | for j, param in enumerate(_params): 113 | print('{}: {}'.format(param.name, 114 | param.get_shape())) 115 | if i == len(layers) - 1: 116 | print('-' * 48) 117 | 118 | def eval(self, elem, feed_dict): 119 | return self._sess.run(elem, feed_dict=feed_dict) 120 | 121 | def fit(self, data, target, 122 | epochs=10, batch_size=100, 123 | validation_data=None, 124 | metrics=[], 125 | preprocesses=[], 126 | shuffle=True, 127 | early_stopping=-1, 128 | verbose=1): 129 | 130 | if len(data) != len(target): 131 | raise AttributeError('Length of X and y does not match.') 132 | 133 | es = None 134 | if early_stopping > -1: 135 | if validation_data is None: 136 | raise AttributeError('early_stopping needs validation_data.') 137 | es = EarlyStopping(patience=early_stopping, verbose=verbose) 138 | 139 | n_data = len(data) 140 | if n_data < batch_size: 141 | batch_size = n_data 142 | n_batches = n_data // batch_size 143 | 144 | for epoch in range(epochs): 145 | results = [['loss', 0.]] 146 | 147 | if shuffle: 148 | indices = shuffle_data(np.arange(n_data)) 149 | _data = data[indices] 150 | _target = target[indices] 151 | else: 152 | _data = data 153 | _target = target 154 | 155 | for i in range(n_batches): 156 | _start = i * batch_size 157 | _end = _start + batch_size 158 | 159 | _batch_data = _data[_start:_end] 160 | _batch_target = _target[_start:_end] 161 | 162 | for _preprocess in preprocesses: 163 | _batch_data = _preprocess(_batch_data) 164 | 165 | self.eval(self._train_step, 166 | feed_dict={ 167 | self.data: _batch_data, 168 | self.target: _batch_target, 169 | self.training: True 170 | }) 171 | results[0][1] += self.loss(_batch_data, _batch_target) 172 | 173 | if verbose: 174 | for j, metric in enumerate(metrics): 175 | _res = self.metric(metric, _batch_data, _batch_target) 176 | if i == 0: 177 | results.append(_res) 178 | else: 179 | results[j+1][1] += _res[1] 180 | 181 | if validation_data is not None: 182 | val_data = validation_data[0] 183 | for _preprocess in preprocesses: 184 | val_data = _preprocess(val_data) 185 | val_target = validation_data[1] 186 | val_loss = self.loss(val_data, val_target) 187 | 188 | if verbose: 189 | def _format(results): 190 | return ', '.join(map(lambda tup: 191 | '{}: {:.3}'.format(tup[0], tup[1]), 192 | results)) 193 | 194 | out = 'epoch: {}, '.format(epoch + 1) 195 | for i, res in enumerate(results): 196 | results[i][1] /= n_batches 197 | 198 | out += _format(results) 199 | 200 | if validation_data is not None: 201 | out += ', ' 202 | results = [('val_loss', val_loss)] 203 | for metric in metrics: 204 | results.append(self.metric(metric, 205 | val_data, 206 | val_target, 207 | validation=True)) 208 | out += _format(results) 209 | print(out) 210 | if es is not None: 211 | if es.on_epoch_end(epoch, val_loss): 212 | break 213 | 214 | def predict(self, data): 215 | ret = self.eval(self._y, 216 | feed_dict={ 217 | self.data: data 218 | }) 219 | return ret 220 | 221 | def loss(self, data, target): 222 | loss = self.eval(self._loss, 223 | feed_dict={ 224 | self.data: data, 225 | self.target: target 226 | }) 227 | return loss 228 | 229 | def metric(self, metric, data, target, validation=False): 230 | metrics = { 231 | 'accuracy': ('acc', self.accuracy), 232 | 'f1': ('f1', self.f1), 233 | 'precision': ('pre', self.precision), 234 | 'recall': ('rec', self.recall) 235 | } 236 | 237 | if metric in metrics: 238 | name = metrics[metric][0] 239 | score = metrics[metric][1](data, target) 240 | else: 241 | name = 'custom' 242 | score = metric(data, target) 243 | 244 | if validation: 245 | name = 'val_' + name 246 | 247 | if not validation: 248 | return [name, score] 249 | else: 250 | return (name, score) 251 | 252 | def accuracy(self, data, target): 253 | return accuracy(self.predict(data), target) 254 | 255 | def f1(self, data, target): 256 | return f1(self.predict(data), target) 257 | 258 | def precision(self, data, target): 259 | return precision(self.predict(data), target) 260 | 261 | def recall(self, data, target): 262 | return recall(self.predict(data), target) 263 | 264 | def restore(self, model_path): 265 | if self._sess is not None: 266 | raise AttributeError('Session alrady initialized. ' 267 | 'Model variables must be restored ' 268 | 'before compile.') 269 | self._compile_layers() 270 | self._sess = tf.Session() 271 | saver = tf.train.Saver() 272 | saver.restore(self._sess, model_path) 273 | self._restored = True 274 | 275 | def save(self, out_path, verbose=1): 276 | out_dir = out_path.split('/')[:-1] 277 | if len(out_dir) > 0: 278 | os.makedirs(os.path.join(*out_dir), exist_ok=True) 279 | saver = tf.train.Saver() 280 | saver.save(self._sess, out_path) 281 | 282 | if verbose: 283 | print('Model saved to: \'{}\''.format(out_path)) 284 | 285 | def _compile_layers(self): 286 | with tf.variable_scope(self._name): 287 | for layer in self._layers: 288 | layer.compile() 289 | 290 | def _compile_loss(self, loss, data, target): 291 | losses = { 292 | 'binary_crossentropy': binary_crossentropy, 293 | 'categorical_crossentropy': categorical_crossentropy, 294 | 'mean_squared_error': mean_squared_error, 295 | 'mse': mean_squared_error 296 | } 297 | 298 | if loss in losses: 299 | cost = losses[loss](data, target) 300 | else: 301 | cost = loss(data, target) 302 | 303 | for layer in self._layers: 304 | cost += tf.reduce_sum(layer.reg_loss) 305 | 306 | return cost 307 | 308 | def _predict(self, x, **kwargs): 309 | output = x 310 | for layer in self.layers: 311 | output = layer.forward(output, **kwargs) 312 | 313 | return output 314 | 315 | def _optimize(self, optimizer): 316 | optimizers = { 317 | 'adadelta': adadelta, 318 | 'adagrad': adagrad, 319 | 'adam': adam, 320 | 'momentum': momentum, 321 | 'rmsprop': rmsprop, 322 | 'sgd': sgd 323 | } 324 | 325 | if optimizer in optimizers: 326 | return optimizers[optimizer]() 327 | else: 328 | return optimizer() 329 | -------------------------------------------------------------------------------- /tftf/layers/modules/Transformer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from . import Module 4 | from .. import Embedding, PositionalEncoding 5 | from .. import LayerNormalization 6 | from .. import TimeDistributedDense as Dense 7 | from .. import Activation, Dropout 8 | from ...losses import categorical_crossentropy 9 | from ...optimizers import adam 10 | 11 | 12 | class Transformer(Module): 13 | ''' 14 | Implementation of Transformer model from 15 | "Attention Is All You Need", 16 | Ashish Vaswani et al. 17 | https://arxiv.org/abs/1706.03762 18 | ''' 19 | def __init__(self, 20 | len_src_vocab, 21 | len_target_vocab, 22 | d_model=512, 23 | d_ff=2048, 24 | N=6, 25 | h=8, 26 | pad_value=0, 27 | p_dropout=0.1, 28 | label_smooth=0.1, 29 | maxlen=6000, 30 | warmup_steps=4000): 31 | 32 | assert label_smooth >= 0. 33 | 34 | self.len_src_vocab = len_src_vocab 35 | self.len_target_vocab = len_target_vocab 36 | self.d_model = d_model 37 | self.d_ff = d_ff 38 | self.N = N 39 | self.h = h 40 | self.pad_value = pad_value 41 | self.p_dropout = p_dropout 42 | self.label_smooth = label_smooth 43 | self.maxlen = maxlen 44 | self.warmup_steps = warmup_steps 45 | self.is_training = tf.placeholder_with_default(False, ()) 46 | 47 | ''' 48 | Model Architecture 49 | ''' 50 | def v1(self, x, t, **kwargs): 51 | mask_src = self._pad_mask(x) 52 | encoder = self.encoder = \ 53 | Encoder(len_src_vocab=self.len_src_vocab, 54 | N=self.N, 55 | d_model=self.d_model, 56 | d_ff=self.d_ff, 57 | h=self.h, 58 | p_dropout=self.p_dropout, 59 | maxlen=self.maxlen) 60 | 61 | x = self.encode(x, mask=mask_src) 62 | 63 | mask_tgt = self._pad_subsequent_mask(t) 64 | decoder = self.decoder = \ 65 | Decoder(len_target_vocab=self.len_target_vocab, 66 | N=self.N, 67 | d_model=self.d_model, 68 | d_ff=self.d_ff, 69 | h=self.h, 70 | p_dropout=self.p_dropout, 71 | maxlen=self.maxlen) 72 | 73 | x = self.decode(t, memory=x, 74 | mask_src=mask_src, mask_tgt=mask_tgt, **kwargs) 75 | 76 | generator = self.generator = \ 77 | Generator(len_target_vocab=self.len_target_vocab) 78 | x = self.generate(x, **kwargs) 79 | 80 | self.x = x 81 | self.t = tf.one_hot(t, depth=self.len_target_vocab, dtype=tf.float32) 82 | 83 | return x 84 | 85 | def encode(self, x, mask=None, **kwargs): 86 | return self.encoder(x, mask=mask, 87 | training=self.is_training, **kwargs) 88 | 89 | def decode(self, x, memory, mask_src=None, mask_tgt=None, **kwargs): 90 | return self.decoder(x, memory=memory, 91 | mask_src=mask_src, mask_tgt=mask_tgt, 92 | training=self.is_training, **kwargs) 93 | 94 | def generate(self, x, **kwargs): 95 | return self.generator(x, training=self.is_training, **kwargs) 96 | 97 | def _pad_mask(self, x): 98 | mask = tf.cast(tf.not_equal(x, self.pad_value), tf.float32) 99 | return mask[:, np.newaxis] 100 | 101 | def _subsequent_mask(self, x): 102 | size = tf.shape(x)[-1] 103 | shape = (1, size, size) 104 | mask = tf.matrix_band_part(tf.ones(shape), -1, 0) 105 | return tf.cast(mask, tf.float32) 106 | 107 | def _pad_subsequent_mask(self, x): 108 | mask = self._pad_mask(x) 109 | mask = \ 110 | tf.cast( 111 | tf.logical_and(tf.cast(mask, tf.bool), 112 | tf.cast(self._subsequent_mask(x), 113 | tf.bool)), 114 | tf.float32 115 | ) 116 | return mask 117 | 118 | ''' 119 | Training 120 | ''' 121 | def loss(self, preds=None, target=None): 122 | if preds is None: 123 | preds = self.x 124 | if target is None: 125 | target = self.t 126 | 127 | e = self.label_smooth 128 | if e > 0.: 129 | target = (1 - e) * target + e / self.len_target_vocab 130 | return categorical_crossentropy(preds, target) 131 | 132 | def optimizer(self, loss=None): 133 | if loss is None: 134 | loss = self.loss() 135 | lrate = tf.placeholder(tf.float32, shape=(), name='lrate') 136 | opt = adam(lr=lrate, beta1=0.9, beta2=0.98, eps=1e-9) 137 | return (opt.minimize(loss), lrate) 138 | 139 | def lrate(self, epoch=0): 140 | ''' 141 | Learning rate for Adam in the model 142 | ''' 143 | step = epoch + 1 144 | return self.d_model ** (-0.5) * \ 145 | min(step ** (-0.5), step * self.warmup_steps ** (-1.5)) 146 | 147 | 148 | class Encoder(object): 149 | def __init__(self, 150 | len_src_vocab, 151 | N=6, 152 | d_model=512, 153 | d_ff=2048, 154 | h=8, 155 | p_dropout=0.1, 156 | maxlen=6000): 157 | self.layers = [ 158 | Embedding(d_model, len_src_vocab), 159 | PositionalEncoding(d_model, maxlen), 160 | Dropout(p_dropout) 161 | ] 162 | self.sub_layers = \ 163 | [EncoderSubLayer(d_model, d_ff, h, p_dropout) for _ in range(N)] 164 | 165 | def __call__(self, x, mask=None, **kwargs): 166 | return self.forward(x, mask, **kwargs) 167 | 168 | def forward(self, x, mask=None, **kwargs): 169 | for l in self.layers: 170 | x = l(x, **kwargs) 171 | for sub_layer in self.sub_layers: 172 | x = sub_layer(x, mask=mask, **kwargs) 173 | 174 | return x 175 | 176 | 177 | class EncoderSubLayer(object): 178 | def __init__(self, 179 | d_model, 180 | d_ff, 181 | h, 182 | p_dropout): 183 | self.layers = [ 184 | [MultiHeadAttention(d_model, h), 185 | Dropout(p_dropout), 186 | LayerNormalization()], 187 | [FFN(d_model, d_ff), 188 | Dropout(p_dropout), 189 | LayerNormalization()] 190 | ] 191 | 192 | def __call__(self, x, mask=None, **kwargs): 193 | return self.forward(x, mask, **kwargs) 194 | 195 | def forward(self, x, mask=None, **kwargs): 196 | # 1st sub-layer 197 | layers = self.layers[0] 198 | h = layers[0](query=x, key=x, value=x, mask=mask, **kwargs) 199 | h = layers[1](h, **kwargs) 200 | x = layers[2](h + x, **kwargs) 201 | 202 | # 2nd sub-layer 203 | layers = self.layers[1] 204 | h = layers[0](x, **kwargs) 205 | h = layers[1](h, **kwargs) 206 | x = layers[2](h + x, **kwargs) 207 | 208 | return x 209 | 210 | 211 | class Decoder(object): 212 | def __init__(self, 213 | len_target_vocab, 214 | N=6, 215 | d_model=512, 216 | d_ff=2048, 217 | h=8, 218 | p_dropout=0.1, 219 | maxlen=6000): 220 | self.layers = [ 221 | Embedding(d_model, len_target_vocab), 222 | PositionalEncoding(d_model, maxlen), 223 | Dropout(p_dropout) 224 | ] 225 | self.sub_layers = \ 226 | [DecoderSubLayer(d_model, d_ff, h, p_dropout) for _ in range(N)] 227 | 228 | def __call__(self, x, memory, 229 | mask_src=None, mask_tgt=None, **kwargs): 230 | return self.forward(x, memory, mask_src, mask_tgt, **kwargs) 231 | 232 | def forward(self, x, memory, 233 | mask_src=None, mask_tgt=None, **kwargs): 234 | for l in self.layers: 235 | x = l(x, **kwargs) 236 | for sub_layer in self.sub_layers: 237 | x = sub_layer(x, memory, 238 | mask_src=mask_src, mask_tgt=mask_tgt, **kwargs) 239 | 240 | return x 241 | 242 | 243 | class DecoderSubLayer(object): 244 | def __init__(self, 245 | d_model, 246 | d_ff, 247 | h, 248 | p_dropout): 249 | self.layers = [ 250 | [MultiHeadAttention(d_model, h), 251 | Dropout(p_dropout), 252 | LayerNormalization()], 253 | [MultiHeadAttention(d_model, h), 254 | Dropout(p_dropout), 255 | LayerNormalization()], 256 | [FFN(d_model, d_ff), 257 | Dropout(p_dropout), 258 | LayerNormalization()] 259 | ] 260 | 261 | def __call__(self, x, memory, 262 | mask_src=None, mask_tgt=None, **kwargs): 263 | return self.forward(x, memory, mask_src, mask_tgt, **kwargs) 264 | 265 | def forward(self, x, memory, 266 | mask_src=None, mask_tgt=None, **kwargs): 267 | # 1st sub_layer 268 | layers = self.layers[0] 269 | h = layers[0](query=x, key=x, value=x, mask=mask_tgt, **kwargs) 270 | h = layers[1](h, **kwargs) 271 | x = layers[2](h + x, **kwargs) 272 | 273 | # 2nd sub-layer 274 | layers = self.layers[1] 275 | h = layers[0](query=x, key=memory, value=memory, 276 | mask=mask_src, **kwargs) 277 | h = layers[1](h, **kwargs) 278 | x = layers[2](h + x, **kwargs) 279 | 280 | # 3rd sub-layer 281 | layers = self.layers[2] 282 | h = layers[0](x, **kwargs) 283 | h = layers[1](h, **kwargs) 284 | x = layers[2](h + x, **kwargs) 285 | 286 | return x 287 | 288 | 289 | class Generator(object): 290 | def __init__(self, 291 | len_target_vocab): 292 | self.layers = [ 293 | Dense(len_target_vocab), 294 | Activation('softmax') 295 | ] 296 | 297 | def __call__(self, x, **kwargs): 298 | return self.forward(x, **kwargs) 299 | 300 | def forward(self, x, **kwargs): 301 | for l in self.layers: 302 | x = l(x, **kwargs) 303 | return x 304 | 305 | 306 | class MultiHeadAttention(object): 307 | ''' 308 | Multi-Head Attention / Masked Multi-Head Attention 309 | ''' 310 | def __init__(self, d_model, h): 311 | self.d_model = d_model 312 | self.h = h 313 | self.linears = [Dense(d_model, d_model) for _ in range(4)] 314 | 315 | def __call__(self, query, key, value, mask=None, **kwargs): 316 | return self.forward(query, key, value, mask, **kwargs) 317 | 318 | def forward(self, query, key, value, mask=None, **kwargs): 319 | d_k = d_v = self.d_k = self.d_v = self.d_model // self.h 320 | n_batches = tf.shape(query)[0] 321 | query, key, value = \ 322 | [tf.transpose(tf.reshape(l(x), 323 | shape=[n_batches, -1, self.h, d_k]), 324 | perm=[0, 2, 1, 3]) 325 | for l, x in zip(self.linears, (query, key, value))] 326 | 327 | if mask is not None: 328 | mask = mask[:, np.newaxis] # apply to all heads 329 | x, attn = self._attention(query, key, value, mask=mask, **kwargs) 330 | x = tf.reshape(tf.transpose(x, perm=[0, 2, 1, 3]), 331 | shape=[n_batches, -1, self.h * d_k]) 332 | 333 | return self.linears[-1](x) 334 | 335 | def _attention(self, query, key, value, mask=None, **kwargs): 336 | ''' 337 | Scaled Dot-Product Attention 338 | ''' 339 | d_k = self.d_k 340 | score = tf.matmul(query, 341 | tf.transpose(key, perm=[0, 1, 3, 2])) / np.sqrt(d_k) 342 | if mask is not None: 343 | mask = self._to_attention_mask(mask) 344 | score *= mask 345 | 346 | attn = tf.nn.softmax(score) 347 | c = tf.matmul(attn, value) 348 | 349 | return c, attn 350 | 351 | def _to_attention_mask(self, mask): 352 | return tf.where(condition=tf.equal(mask, 0), 353 | x=tf.ones_like(mask, 354 | dtype=tf.float32) * np.float32(-1e+9), 355 | y=tf.ones_like(mask, 356 | dtype=tf.float32)) 357 | 358 | 359 | class FFN(object): 360 | ''' 361 | Position-wise Feed-Forward Networks 362 | ''' 363 | def __init__(self, d_model, d_ff): 364 | self.layers = [ 365 | Dense(d_ff, d_model), 366 | Activation('relu'), 367 | Dense(d_model, d_ff) 368 | ] 369 | 370 | def __call__(self, x, **kwargs): 371 | return self.forward(x, **kwargs) 372 | 373 | def forward(self, x, **kwargs): 374 | for l in self.layers: 375 | x = l(x, **kwargs) 376 | return x 377 | --------------------------------------------------------------------------------