├── tftf
    ├── preprocessing
    │   ├── __init__.py
    │   └── sequence
    │   │   ├── __init__.py
    │   │   ├── Pad.py
    │   │   ├── sort.py
    │   │   └── pad_sequences.py
    ├── models
    │   ├── __init__.py
    │   ├── callbacks
    │   │   ├── __init__.py
    │   │   └── EarlyStopping.py
    │   ├── optimizers
    │   │   ├── adagrad.py
    │   │   ├── rmsprop.py
    │   │   ├── sgd.py
    │   │   ├── adadelta.py
    │   │   ├── adam.py
    │   │   ├── momentum.py
    │   │   └── __init__.py
    │   ├── metrics
    │   │   ├── __init__.py
    │   │   ├── accuracy.py
    │   │   ├── f1.py
    │   │   ├── recall.py
    │   │   └── precision.py
    │   ├── losses
    │   │   ├── mean_squared_error.py
    │   │   ├── __init__.py
    │   │   ├── categorical_crossentropy.py
    │   │   └── binary_crossentropy.py
    │   └── Model.py
    ├── layers
    │   ├── modules
    │   │   ├── Module.py
    │   │   ├── __init__.py
    │   │   ├── ResNet.py
    │   │   └── Transformer.py
    │   ├── activations
    │   │   ├── linear.py
    │   │   ├── elu.py
    │   │   ├── relu.py
    │   │   ├── selu.py
    │   │   ├── tanh.py
    │   │   ├── sigmoid.py
    │   │   ├── softmax.py
    │   │   ├── swish.py
    │   │   ├── leaky_relu.py
    │   │   ├── hard_sigmoid.py
    │   │   ├── prelu.py
    │   │   └── __init__.py
    │   ├── regularizers
    │   │   ├── __init__.py
    │   │   ├── l2.py
    │   │   ├── l1.py
    │   │   └── l1_l2.py
    │   ├── initializers
    │   │   ├── ones.py
    │   │   ├── zeros.py
    │   │   ├── __init__.py
    │   │   ├── normal.py
    │   │   ├── orthogonal.py
    │   │   ├── glorot_normal.py
    │   │   └── glorot_uniform.py
    │   ├── GlobalAveragePooling2D.py
    │   ├── Flatten.py
    │   ├── Activation.py
    │   ├── Dropout.py
    │   ├── __init__.py
    │   ├── TimeDistributedDense.py
    │   ├── Dense.py
    │   ├── Embedding.py
    │   ├── LayerNormalization.py
    │   ├── NALU.py
    │   ├── NAC.py
    │   ├── BatchNormalization.py
    │   ├── PositionalEncoding.py
    │   ├── MaxPooling2D.py
    │   ├── Conv2D.py
    │   ├── Attention.py
    │   ├── RNN.py
    │   ├── Layer.py
    │   └── LSTM.py
    ├── losses
    │   └── __init__.py
    ├── metrics
    │   └── __init__.py
    ├── optimizers
    │   └── __init__.py
    ├── activations
    │   └── __init__.py
    ├── initializers
    │   └── __init__.py
    ├── __init__.py
    └── datasets
    │   ├── __init__.py
    │   ├── Dataset.py
    │   ├── imdb.py
    │   ├── mnist.py
    │   └── small_parallel_enja.py
├── .gitignore
├── setup.py
├── examples
    ├── save_restore_model.py
    ├── lenet_mnist.py
    ├── lstm_imdb.py
    ├── mlp_mnist.py
    ├── lenet_advanced_mnist.py
    ├── rnn_sin.py
    ├── lstm_sin.py
    ├── transformer_translation_enja.py
    ├── low_level_example.py
    ├── lstm_translation_enja.py
    └── attention_translation_enja.py
├── README.md
└── LICENSE


/tftf/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tftf/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .Model import Model
2 | 


--------------------------------------------------------------------------------
/tftf/layers/modules/Module.py:
--------------------------------------------------------------------------------
1 | class Module(object):
2 |     pass
3 | 


--------------------------------------------------------------------------------
/tftf/layers/activations/linear.py:
--------------------------------------------------------------------------------
1 | def linear(x):
2 |     return x
3 | 


--------------------------------------------------------------------------------
/tftf/losses/__init__.py:
--------------------------------------------------------------------------------
1 | # alias
2 | from ..models.losses import *
3 | 


--------------------------------------------------------------------------------
/tftf/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | # alias
2 | from ..models.metrics import *
3 | 


--------------------------------------------------------------------------------
/tftf/models/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | from .EarlyStopping import EarlyStopping
2 | 


--------------------------------------------------------------------------------
/tftf/optimizers/__init__.py:
--------------------------------------------------------------------------------
1 | # alias
2 | from ..models.optimizers import *
3 | 


--------------------------------------------------------------------------------
/tftf/activations/__init__.py:
--------------------------------------------------------------------------------
1 | # alias
2 | from ..layers.activations import *
3 | 


--------------------------------------------------------------------------------
/tftf/initializers/__init__.py:
--------------------------------------------------------------------------------
1 | # alias
2 | from ..layers.initializers import *
3 | 


--------------------------------------------------------------------------------
/tftf/layers/activations/elu.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def elu(x):
5 |     return tf.nn.elu(x)
6 | 


--------------------------------------------------------------------------------
/tftf/layers/regularizers/__init__.py:
--------------------------------------------------------------------------------
1 | from .l1 import l1
2 | from .l2 import l2
3 | from .l1_l2 import l1_l2
4 | 


--------------------------------------------------------------------------------
/tftf/layers/activations/relu.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def relu(x):
5 |     return tf.nn.relu(x)
6 | 


--------------------------------------------------------------------------------
/tftf/layers/activations/selu.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def selu(x):
5 |     return tf.nn.selu(x)
6 | 


--------------------------------------------------------------------------------
/tftf/layers/activations/tanh.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def tanh(x):
5 |     return tf.nn.tanh(x)
6 | 


--------------------------------------------------------------------------------
/tftf/__init__.py:
--------------------------------------------------------------------------------
1 | from .datasets import *
2 | from .layers import *
3 | from .models import *
4 | 
5 | __version__ = '0.0.29'
6 | 


--------------------------------------------------------------------------------
/tftf/layers/activations/sigmoid.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def sigmoid(x):
5 |     return tf.nn.sigmoid(x)
6 | 


--------------------------------------------------------------------------------
/tftf/layers/activations/softmax.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def softmax(x):
5 |     return tf.nn.softmax(x)
6 | 


--------------------------------------------------------------------------------
/tftf/layers/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .Module import Module
2 | from .ResNet import ResNet
3 | from .Transformer import Transformer
4 | 


--------------------------------------------------------------------------------
/tftf/models/optimizers/adagrad.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def adagrad(lr=0.01):
5 |     return tf.train.AdagradOptimizer(lr)
6 | 


--------------------------------------------------------------------------------
/tftf/models/optimizers/rmsprop.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def rmsprop(lr=0.001):
5 |     return tf.train.RMSPropOptimizer(lr)
6 | 


--------------------------------------------------------------------------------
/tftf/models/optimizers/sgd.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def sgd(lr=0.01):
5 |     return tf.train.GradientDescentOptimizer(lr)
6 | 


--------------------------------------------------------------------------------
/tftf/preprocessing/sequence/__init__.py:
--------------------------------------------------------------------------------
1 | from .pad_sequences import pad_sequences
2 | from .sort import sort
3 | 
4 | from .Pad import Pad
5 | 


--------------------------------------------------------------------------------
/tftf/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .imdb import load_imdb
2 | from .mnist import load_mnist
3 | from .small_parallel_enja import load_small_parallel_enja
4 | 


--------------------------------------------------------------------------------
/tftf/layers/activations/swish.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from .sigmoid import sigmoid
3 | 
4 | 
5 | def swish(x):
6 |     return x * sigmoid(x)
7 | 


--------------------------------------------------------------------------------
/tftf/layers/activations/leaky_relu.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def leaky_relu(x, alpha=0.2):
5 |     return tf.nn.leaky_relu(x, alpha=alpha)
6 | 


--------------------------------------------------------------------------------
/tftf/models/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from .accuracy import accuracy
2 | from .f1 import f1
3 | from .precision import precision
4 | from .recall import recall
5 | 


--------------------------------------------------------------------------------
/tftf/models/optimizers/adadelta.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def adadelta(lr=1.0, rho=0.95):
5 |     return tf.train.AdadeltaOptimizer(lr, rho)
6 | 


--------------------------------------------------------------------------------
/tftf/layers/activations/hard_sigmoid.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def hard_sigmoid(x):
5 |     return tf.minimum(1.0, tf.maximum(0.0, 0.2 * x + 0.5))
6 | 


--------------------------------------------------------------------------------
/tftf/models/losses/mean_squared_error.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def mean_squared_error(y, t):
5 |     loss = tf.reduce_mean(tf.square(y - t))
6 |     return loss
7 | 


--------------------------------------------------------------------------------
/tftf/models/optimizers/adam.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def adam(lr=0.001, beta1=0.9, beta2=0.999, eps=1e-8):
5 |     return tf.train.AdamOptimizer(lr, beta1, beta2, eps)
6 | 


--------------------------------------------------------------------------------
/tftf/models/optimizers/momentum.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | 
4 | def momentum(lr=0.01, momentum=0.9, use_nesterov=True):
5 |     return tf.train.MomentumOptimizer(lr, momentum, use_nesterov)
6 | 


--------------------------------------------------------------------------------
/tftf/models/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .binary_crossentropy import binary_crossentropy
2 | from .categorical_crossentropy import categorical_crossentropy
3 | from .mean_squared_error import mean_squared_error
4 | 


--------------------------------------------------------------------------------
/tftf/models/optimizers/__init__.py:
--------------------------------------------------------------------------------
1 | from .adadelta import adadelta
2 | from .adagrad import adagrad
3 | from .adam import adam
4 | from .momentum import momentum
5 | from .rmsprop import rmsprop
6 | from .sgd import sgd
7 | 


--------------------------------------------------------------------------------
/tftf/layers/initializers/ones.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | 
4 | 
5 | def ones(shape, name=None, type='float32'):
6 |     init = np.ones(shape).astype(type)
7 |     return tf.Variable(init, name=name)
8 | 


--------------------------------------------------------------------------------
/tftf/layers/initializers/zeros.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | 
4 | 
5 | def zeros(shape, name=None, type='float32'):
6 |     init = np.zeros(shape).astype(type)
7 |     return tf.Variable(init, name=name)
8 | 


--------------------------------------------------------------------------------
/tftf/layers/initializers/__init__.py:
--------------------------------------------------------------------------------
1 | from .glorot_normal import glorot_normal
2 | from .glorot_uniform import glorot_uniform
3 | from .normal import normal
4 | from .ones import ones
5 | from .orthogonal import orthogonal
6 | from .zeros import zeros
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | 
 3 | mldata/*
 4 | tmp
 5 | 
 6 | __pycache__
 7 | *.pyc
 8 | .ipynb_checkpoints
 9 | *.ipynb
10 | 
11 | *.egg
12 | *.egg-info
13 | dist
14 | build
15 | eggs
16 | parts
17 | var
18 | sdist
19 | develop-eggs
20 | .installed.cfg
21 | lib
22 | lib64
23 | 
24 | pip-log.txt
25 | 


--------------------------------------------------------------------------------
/tftf/layers/activations/prelu.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | 
4 | 
5 | def prelu(x, type='float32'):
6 |     alpha = tf.Variable(np.zeros([x.get_shape()[-1]]).astype(type),
7 |                         name='alpha')
8 |     return tf.maximum(0., x) + alpha * tf.minimum(0., x)
9 | 


--------------------------------------------------------------------------------
/tftf/layers/regularizers/l2.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class L2(object):
 5 |     def __init__(self, alpha):
 6 |         self.alpha = alpha
 7 | 
 8 |     def loss(self, weights):
 9 |         return self.alpha * tf.nn.l2_loss(weights)
10 | 
11 | 
12 | def l2(alpha=0.):
13 |     reg = L2(alpha)
14 |     return reg.loss
15 | 


--------------------------------------------------------------------------------
/tftf/layers/regularizers/l1.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class L1(object):
 5 |     def __init__(self, alpha):
 6 |         self.alpha = alpha
 7 | 
 8 |     def loss(self, weights):
 9 |         return self.alpha * tf.reduce_sum(tf.abs(weights))
10 | 
11 | 
12 | def l1(alpha=0.):
13 |     reg = L1(alpha)
14 |     return reg.loss
15 | 


--------------------------------------------------------------------------------
/tftf/layers/initializers/normal.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | def normal(shape, mean=0., std=1., name=None, rng=None, type='float32'):
 6 |     if rng is None:
 7 |         rng = np.random
 8 | 
 9 |     init = rng.normal(loc=std, scale=std, size=shape).astype(type)
10 |     return tf.Variable(init, name=name)
11 | 


--------------------------------------------------------------------------------
/tftf/models/losses/categorical_crossentropy.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def categorical_crossentropy(y, t):
 5 |     loss = \
 6 |         tf.reduce_mean(-tf.reduce_sum(
 7 |                        t * tf.log(tf.clip_by_value(y, 1e-10, 1.0)),
 8 |                        axis=list(range(1, len(y.get_shape())))))
 9 | 
10 |     return loss
11 | 


--------------------------------------------------------------------------------
/tftf/layers/activations/__init__.py:
--------------------------------------------------------------------------------
 1 | from .elu import elu
 2 | from .hard_sigmoid import hard_sigmoid
 3 | from .leaky_relu import leaky_relu
 4 | from .linear import linear
 5 | # from .prelu import prelu
 6 | from .relu import relu
 7 | from .selu import selu
 8 | from .sigmoid import sigmoid
 9 | from .softmax import softmax
10 | from .swish import swish
11 | from .tanh import tanh
12 | 


--------------------------------------------------------------------------------
/tftf/models/metrics/accuracy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import accuracy_score
 3 | 
 4 | 
 5 | def accuracy(preds, target, thres=0.5):
 6 |     if len(preds[0]) == 1:
 7 |         return accuracy_score(preds > thres, target)
 8 |     else:
 9 |         return accuracy_score(np.argmax(preds, 1),
10 |                               np.argmax(target, 1).astype('int32'))
11 | 


--------------------------------------------------------------------------------
/tftf/models/metrics/f1.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import f1_score
 3 | 
 4 | 
 5 | def f1(preds, target, thres=0.5):
 6 |     if len(preds[0]) == 1:
 7 |         return f1_score(preds > thres, target)
 8 |     else:
 9 |         return f1_score(np.argmax(preds, 1),
10 |                         np.argmax(target, 1).astype('int32'),
11 |                         average='macro')
12 | 


--------------------------------------------------------------------------------
/tftf/preprocessing/sequence/Pad.py:
--------------------------------------------------------------------------------
 1 | from .pad_sequences import pad_sequences
 2 | 
 3 | 
 4 | class Pad(object):
 5 |     def __init__(self, padding='pre', value=0):
 6 |         self.padding = padding
 7 |         self.value = value
 8 | 
 9 |     def __call__(self, data):
10 |         return pad_sequences(data,
11 |                              padding=self.padding,
12 |                              value=self.value)
13 | 


--------------------------------------------------------------------------------
/tftf/models/metrics/recall.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import recall_score
 3 | 
 4 | 
 5 | def recall(preds, target, thres=0.5):
 6 |     if len(preds[0]) == 1:
 7 |         return recall_score(preds > thres, target)
 8 |     else:
 9 |         return recall_score(np.argmax(preds, 1),
10 |                             np.argmax(target, 1).astype('int32'),
11 |                             average='macro')
12 | 


--------------------------------------------------------------------------------
/tftf/layers/regularizers/l1_l2.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .l1 import L1
 3 | from .l2 import L2
 4 | 
 5 | 
 6 | class L1_L2(object):
 7 |     def __init__(self, l1, l2):
 8 |         self.L1 = L1(l1)
 9 |         self.L2 = L2(l2)
10 | 
11 |     def loss(self, weights):
12 |         return self.L1.loss(weights) + self.L2.loss(weights)
13 | 
14 | 
15 | def l1_l2(l1=0., l2=0.):
16 |     reg = L1_L2(l1, l2)
17 |     return reg.loss
18 | 


--------------------------------------------------------------------------------
/tftf/models/losses/binary_crossentropy.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def binary_crossentropy(y, t):
 5 |     loss = \
 6 |         -tf.reduce_mean(t * tf.log(tf.clip_by_value(y, 1e-10, 1.0))
 7 |                         + (1. - t) * tf.log(tf.clip_by_value(1. - y,
 8 |                                                              1e-10,
 9 |                                                              1.0)))
10 |     return loss
11 | 


--------------------------------------------------------------------------------
/tftf/models/metrics/precision.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import precision_score
 3 | 
 4 | 
 5 | def precision(preds, target, thres=0.5):
 6 |     if len(preds[0]) == 1:
 7 |         return precision_score(preds > thres, target)
 8 |     else:
 9 |         return precision_score(np.argmax(preds, 1),
10 |                                np.argmax(target, 1).astype('int32'),
11 |                                average='macro')
12 | 


--------------------------------------------------------------------------------
/tftf/layers/initializers/orthogonal.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | def orthogonal(shape, scale=1., name=None, rng=None, type='float32'):
 6 |     if rng is None:
 7 |         rng = np.random
 8 | 
 9 |     rndn = rng.normal(0., 1., shape).astype(type)
10 |     u, _, v = np.linalg.svd(rndn, full_matrices=False)
11 |     if u.shape == shape:
12 |         return scale * u
13 |     else:
14 |         return scale * v
15 | 


--------------------------------------------------------------------------------
/tftf/datasets/Dataset.py:
--------------------------------------------------------------------------------
 1 | class Dataset(object):
 2 |     def __init__(self, data=None, target=None):
 3 |         self._data = data
 4 |         self._target = target
 5 | 
 6 |     @property
 7 |     def data(self):
 8 |         return self._data
 9 | 
10 |     @data.setter
11 |     def data(self, val):
12 |         self._data = val
13 | 
14 |     @property
15 |     def target(self):
16 |         return self._target
17 | 
18 |     @target.setter
19 |     def target(self, val):
20 |         self._target = val
21 | 


--------------------------------------------------------------------------------
/tftf/layers/GlobalAveragePooling2D.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .Layer import Layer
 3 | 
 4 | 
 5 | class GlobalAveragePooling2D(Layer):
 6 |     def __init__(self):
 7 |         super().__init__()
 8 | 
 9 |     def compile(self):
10 |         pass
11 | 
12 |     def forward(self, x, **kwargs):
13 |         return tf.reduce_mean(x, axis=[1, 2])
14 | 
15 |     def initialize_output_dim(self):
16 |         super().initialize_output_dim()
17 |         self.output_dim = self.input_dim[-1]
18 |         return self.output_dim
19 | 


--------------------------------------------------------------------------------
/tftf/layers/Flatten.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from .Layer import Layer
 4 | 
 5 | 
 6 | class Flatten(Layer):
 7 |     def __init__(self):
 8 |         super().__init__()
 9 | 
10 |     def compile(self):
11 |         pass
12 | 
13 |     def forward(self, x, **kwargs):
14 |         return tf.reshape(x, (-1, self.output_dim))
15 | 
16 |     def initialize_output_dim(self):
17 |         super().initialize_output_dim()
18 |         self.output_dim = np.prod(self.input_shape)
19 |         return self.output_dim
20 | 


--------------------------------------------------------------------------------
/tftf/layers/initializers/glorot_normal.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | def glorot_normal(shape, name=None, rng=None, type='float32'):
 6 |     if rng is None:
 7 |         rng = np.random
 8 | 
 9 |     if len(shape) == 2:
10 |         fan_in = shape[0]
11 |     elif len(shape) == 4:
12 |         fan_in = np.prod(shape[:3])
13 |     else:
14 |         raise ValueError('Dimension of shape must be 2 or 4.')
15 | 
16 |     init = np.sqrt(1 / fan_in) * rng.normal(size=shape).astype(type)
17 |     return tf.Variable(init, name=name)
18 | 


--------------------------------------------------------------------------------
/tftf/preprocessing/sequence/sort.py:
--------------------------------------------------------------------------------
 1 | def sort(data, target,
 2 |          order='ascend'):
 3 |     if order == 'ascend' or order == 'ascending':
 4 |         a = True
 5 |     elif order == 'descend' or order == 'descending':
 6 |         a = False
 7 |     else:
 8 |         raise ValueError('`order` must be of \'ascend\' or \'descend\'.')
 9 | 
10 |     lens = [len(i) for i in data]
11 |     indices = sorted(range(len(lens)),
12 |                      key=lambda x: (2 * a - 1) * lens[x])
13 |     data = [data[i] for i in indices]
14 |     target = [target[i] for i in indices]
15 | 
16 |     return (data, target)
17 | 


--------------------------------------------------------------------------------
/tftf/layers/Activation.py:
--------------------------------------------------------------------------------
 1 | from .Layer import Layer
 2 | 
 3 | 
 4 | class Activation(Layer):
 5 |     def __init__(self, activation='linear'):
 6 |         super().__init__()
 7 |         self.activation = self.activation_initializer(activation)
 8 | 
 9 |     def __repr__(self):
10 |         return '<{}: {}({}, {})>'.format(self.__class__.__name__,
11 |                                          self.activation.__name__,
12 |                                          self.input_dim,
13 |                                          self.output_dim)
14 | 
15 |     def compile(self):
16 |         pass
17 | 
18 |     def forward(self, x, **kwargs):
19 |         return self.activation(x)
20 | 


--------------------------------------------------------------------------------
/tftf/layers/Dropout.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .Layer import Layer
 3 | 
 4 | 
 5 | class Dropout(Layer):
 6 |     def __init__(self, p_dropout, rng=None):
 7 |         super().__init__()
 8 |         if p_dropout < 0. or p_dropout >= 1:
 9 |             raise ValueError('p_dropout must be between 0 and 1.')
10 |         self.p = 1. - p_dropout
11 | 
12 |     def compile(self):
13 |         pass
14 | 
15 |     def forward(self, x, **kwargs):
16 |         training = kwargs['training'] \
17 |             if 'training' in kwargs else tf.constant(False, dtype=tf.bool)
18 |         p = tf.cond(training, lambda: self.p, lambda: 1.)
19 |         return tf.nn.dropout(x, p)
20 | 


--------------------------------------------------------------------------------
/tftf/models/callbacks/EarlyStopping.py:
--------------------------------------------------------------------------------
 1 | class EarlyStopping(object):
 2 |     def __init__(self, patience=10, verbose=0):
 3 |         self._step = 0
 4 |         self._loss = float('inf')
 5 |         self.patience = patience
 6 |         self.verbose = verbose
 7 | 
 8 |     def on_epoch_end(self, epoch, loss):
 9 |         if self._loss < loss:
10 |             self._step += 1
11 |             if self._step > self.patience:
12 |                 if self.verbose:
13 |                     print('Early stopping on epoch {}.'.format(epoch))
14 |                 return True
15 |         else:
16 |             self._step = 0
17 |             self._loss = loss
18 |         return False
19 | 


--------------------------------------------------------------------------------
/tftf/layers/initializers/glorot_uniform.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | def glorot_uniform(shape, name=None, rng=None, type='float32'):
 6 |     if rng is None:
 7 |         rng = np.random
 8 | 
 9 |     if len(shape) == 2:
10 |         fan_in = shape[0]
11 |         fan_out = shape[1]
12 |     elif len(shape) == 4:
13 |         fan_in = np.prod(shape[:3])
14 |         fan_out = np.prod(shape[:2]) * shape[3]
15 |     else:
16 |         raise ValueError('Dimension of shape must be 2 or 4.')
17 | 
18 |     high = np.sqrt(6 / (fan_in + fan_out))
19 |     init = rng.uniform(low=-high,
20 |                        high=high,
21 |                        size=shape).astype(type)
22 |     return tf.Variable(init, name=name)
23 | 


--------------------------------------------------------------------------------
/tftf/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import activations
 2 | from . import initializers
 3 | from . import regularizers
 4 | 
 5 | from .Activation import Activation
 6 | from .Attention import Attention
 7 | from .BatchNormalization import BatchNormalization
 8 | from .Conv2D import Conv2D
 9 | from .Dense import Dense
10 | from .Dropout import Dropout
11 | from .Embedding import Embedding
12 | from .Flatten import Flatten
13 | from .GlobalAveragePooling2D import GlobalAveragePooling2D
14 | from .Layer import Layer
15 | from .LayerNormalization import LayerNormalization
16 | from .LSTM import LSTM
17 | from .MaxPooling2D import MaxPooling2D
18 | from .NAC import NAC
19 | from .NALU import NALU
20 | from .PositionalEncoding import PositionalEncoding
21 | from .RNN import RNN
22 | from .TimeDistributedDense import TimeDistributedDense
23 | 


--------------------------------------------------------------------------------
/tftf/layers/TimeDistributedDense.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .Dense import Dense
 3 | from .initializers import zeros
 4 | 
 5 | 
 6 | class TimeDistributedDense(Dense):
 7 |     def __init__(self, output_dim,
 8 |                  input_dim=None,
 9 |                  initializer='glorot_normal',
10 |                  regularizer=None,
11 |                  rng=None):
12 |         super().__init__(output_dim,
13 |                          input_dim=input_dim,
14 |                          initializer=initializer,
15 |                          regularizer=regularizer,
16 |                          rng=rng)
17 | 
18 |     def forward(self, x, **kwargs):
19 |         recurr = kwargs['recurrent'] if 'recurrent' in kwargs else True
20 |         if not recurr:
21 |             return tf.matmul(x, self.W) + self.b
22 |         else:
23 |             return tf.einsum('ijk,kl->ijl', x, self.W) + self.b
24 | 


--------------------------------------------------------------------------------
/tftf/preprocessing/sequence/pad_sequences.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def pad_sequences(data,
 5 |                   padding='pre',
 6 |                   value=0):
 7 |     '''
 8 |     # Arguments
 9 |         data: list of lists / np.array of lists
10 | 
11 |     # Returns
12 |         numpy.ndarray
13 |     '''
14 |     if type(data[0]) is not list:
15 |         raise ValueError('`data` must be a list of lists')
16 |     maxlen = len(max(data, key=len))
17 | 
18 |     if padding == 'pre':
19 |         data = \
20 |             [[value] * (maxlen - len(data[i])) + data[i]
21 |              for i in range(len(data))]
22 |     elif padding == 'post':
23 |         data = \
24 |             [data[i] + [value] * (maxlen - len(data[i]))
25 |              for i in range(len(data))]
26 |     else:
27 |         raise ValueError('`padding` must be one of \'pre\' or \'post\'')
28 | 
29 |     return np.array(data)
30 | 


--------------------------------------------------------------------------------
/tftf/layers/Dense.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .Layer import Layer
 3 | from .initializers import zeros
 4 | 
 5 | 
 6 | class Dense(Layer):
 7 |     def __init__(self, output_dim,
 8 |                  input_dim=None,
 9 |                  initializer='glorot_normal',
10 |                  regularizer=None,
11 |                  rng=None):
12 |         super().__init__()
13 |         self.output_dim = output_dim
14 |         self.input_dim = input_dim
15 |         self.initializer = initializer
16 |         self.regularizer = regularizer
17 | 
18 |     def compile(self):
19 |         self.W = \
20 |             self.kernel_initializer(self.initializer,
21 |                                     shape=(self.input_dim, self.output_dim),
22 |                                     name='W')
23 |         self.b = zeros((self.output_dim), name='b')
24 | 
25 |         self.params = [self.W, self.b]
26 | 
27 |         if self.regularizer is not None:
28 |             self.reg_loss = [self.regularizer(self.W)]
29 | 
30 |     def forward(self, x, **kwargs):
31 |         return tf.matmul(x, self.W) + self.b
32 | 


--------------------------------------------------------------------------------
/tftf/datasets/imdb.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from .Dataset import Dataset
 4 | 
 5 | 
 6 | class IMDb(Dataset):
 7 |     pass
 8 | 
 9 | 
10 | def load_imdb(one_hot=True,
11 |               num_words=None,
12 |               start_char=1,
13 |               oov_char=2,
14 |               index_from=3,
15 |               train_test_split=True):
16 |     train, valid = \
17 |         tf.keras.datasets.imdb.load_data(num_words=num_words,
18 |                                          start_char=start_char,
19 |                                          oov_char=oov_char,
20 |                                          index_from=index_from)
21 |     train = list(train)
22 |     valid = list(valid)
23 | 
24 |     if one_hot:
25 |         train[1] = train[1][:, np.newaxis]
26 |         valid[1] = valid[1][:, np.newaxis]
27 | 
28 |     if not train_test_split:
29 |         data = np.append(train[0], valid[0], axis=0)
30 |         target = np.append(train[1], valid[1], axis=0)
31 | 
32 |         return IMDb(data, target)
33 | 
34 |     else:
35 |         return tuple(train), tuple(valid)
36 | 


--------------------------------------------------------------------------------
/tftf/datasets/mnist.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from .Dataset import Dataset
 4 | 
 5 | 
 6 | class MNIST(Dataset):
 7 |     pass
 8 | 
 9 | 
10 | def load_mnist(one_hot=True,
11 |                train_test_split=True,
12 |                flatten=False,
13 |                include_channel=True):
14 | 
15 |     train, valid = tf.keras.datasets.mnist.load_data()
16 |     train = list(train)
17 |     valid = list(valid)
18 | 
19 |     if flatten:
20 |         train[0] = train[0].reshape(-1, 784)
21 |         valid[0] = valid[0].reshape(-1, 784)
22 |     elif include_channel:
23 |         train[0] = train[0].reshape(len(train[0]), 28, 28, 1)
24 |         valid[0] = valid[0].reshape(len(valid[0]), 28, 28, 1)
25 | 
26 |     if one_hot:
27 |         train[1] = np.eye(10)[train[1].astype(int)]
28 |         valid[1] = np.eye(10)[valid[1].astype(int)]
29 | 
30 |     if not train_test_split:
31 |         data = np.append(train[0], valid[0], axis=0)
32 |         target = np.append(train[1], valid[1], axis=0)
33 |         return MNIST(data, target)
34 |     else:
35 |         return tuple(train), tuple(valid)
36 | 


--------------------------------------------------------------------------------
/tftf/layers/Embedding.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .Layer import Layer
 3 | from .initializers import normal
 4 | 
 5 | 
 6 | class Embedding(Layer):
 7 |     def __init__(self, output_dim,
 8 |                  input_dim=None,
 9 |                  initializer='normal'):
10 |         '''
11 |         # Arguments
12 |             input_dim: num of words (maximum index)
13 |             output_dim: embedding dimension
14 |         '''
15 |         super().__init__()
16 |         self._input_dtype = tf.int32
17 | 
18 |         self.output_dim = output_dim
19 |         self.input_dim = input_dim
20 |         self.initializer = initializer
21 | 
22 |         if self.input_dim is None:
23 |             raise ValueError('input_dim must be specified on Embedding layer.')
24 | 
25 |     def compile(self):
26 |         self.W = \
27 |             self.kernel_initializer(self.initializer,
28 |                                     shape=(self.input_dim, self.output_dim),
29 |                                     name='W')
30 | 
31 |         self.params = [self.W]
32 | 
33 |     def forward(self, x, **kwargs):
34 |         return tf.nn.embedding_lookup(self.W, x)
35 | 


--------------------------------------------------------------------------------
/tftf/layers/LayerNormalization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from .Layer import Layer
 4 | 
 5 | 
 6 | class LayerNormalization(Layer):
 7 |     def __init__(self,
 8 |                  a_initializer='ones',
 9 |                  b_initializer='zeros',
10 |                  eps=np.float32(1e-6)):
11 |         super().__init__()
12 |         self.a_initializer = a_initializer
13 |         self.b_initializer = b_initializer
14 |         self.eps = eps
15 | 
16 |     def compile(self):
17 |         self.a = \
18 |             self.kernel_initializer(self.a_initializer,
19 |                                     shape=(self.input_dim),
20 |                                     name='a')
21 |         self.b = \
22 |             self.kernel_initializer(self.b_initializer,
23 |                                     shape=(self.input_dim),
24 |                                     name='b')
25 | 
26 |         self.params = [self.a, self.b]
27 | 
28 |     def forward(self, x, **kwargs):
29 |         axes = 0
30 |         mean, var = tf.nn.moments(x, axes=-1, keep_dims=True)
31 |         std = tf.sqrt(var) + self.eps
32 |         return self.a * (x - mean) / std + self.b
33 | 


--------------------------------------------------------------------------------
/tftf/layers/NALU.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .NAC import NAC
 3 | from .activations import sigmoid
 4 | 
 5 | 
 6 | class NALU(NAC):
 7 |     '''
 8 |     Neural Arithmetic Logic Unit
 9 |     https://arxiv.org/pdf/1808.00508.pdf
10 |     '''
11 |     def __init__(self, output_dim,
12 |                  input_dim=None,
13 |                  initializer='normal',
14 |                  rng=None):
15 |         super().__init__(output_dim,
16 |                          input_dim=input_dim,
17 |                          initializer=initializer,
18 |                          rng=rng)
19 | 
20 |     def compile(self):
21 |         super().compile()
22 |         self.G = \
23 |             self.kernel_initializer(self.initializer,
24 |                                     shape=(self.input_dim, self.output_dim),
25 |                                     name='G')
26 | 
27 |         self.params.append(self.G)
28 | 
29 |     def forward(self, x, **kwargs):
30 |         eps = 1e-8
31 |         self.g = sigmoid(tf.matmul(x, self.G))
32 |         self.m = tf.exp(tf.matmul(tf.log(tf.abs(x) + eps), self.W))
33 |         self.a = tf.matmul(x, self.W)
34 | 
35 |         return self.g * self.a + (1 - self.g) * self.m
36 | 


--------------------------------------------------------------------------------
/tftf/layers/NAC.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .Layer import Layer
 3 | from .activations import sigmoid, tanh
 4 | 
 5 | 
 6 | class NAC(Layer):
 7 |     '''
 8 |     Neural Accumulator
 9 |     https://arxiv.org/pdf/1808.00508.pdf
10 |     '''
11 |     def __init__(self, output_dim,
12 |                  input_dim=None,
13 |                  initializer='normal',
14 |                  rng=None):
15 |         super().__init__()
16 |         self.output_dim = output_dim
17 |         self.input_dim = input_dim
18 |         self.initializer = initializer
19 | 
20 |     def compile(self):
21 |         self.W_hat = \
22 |             self.kernel_initializer(self.initializer,
23 |                                     shape=(self.input_dim, self.output_dim),
24 |                                     name='W_hat')
25 |         self.M_hat = \
26 |             self.kernel_initializer(self.initializer,
27 |                                     shape=(self.input_dim, self.output_dim),
28 |                                     name='W_hat')
29 |         self.W = tanh(self.W_hat) * sigmoid(self.M_hat)
30 | 
31 |         self.params = [self.W_hat, self.M_hat]
32 | 
33 |     def forward(self, x, **kwargs):
34 |         return tf.matmul(x, self.W)
35 | 


--------------------------------------------------------------------------------
/tftf/layers/BatchNormalization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from .Layer import Layer
 4 | 
 5 | 
 6 | class BatchNormalization(Layer):
 7 |     def __init__(self,
 8 |                  gamma_initializer='ones',
 9 |                  beta_initializer='zeros',
10 |                  eps=np.float32(1e-6)):
11 |         super().__init__()
12 |         self.gamma_initializer = gamma_initializer
13 |         self.beta_initializer = beta_initializer
14 |         self.eps = eps
15 | 
16 |     def compile(self):
17 |         self.gamma = \
18 |             self.kernel_initializer(self.gamma_initializer,
19 |                                     shape=(self.input_dim),
20 |                                     name='gamma')
21 |         self.beta = \
22 |             self.kernel_initializer(self.beta_initializer,
23 |                                     shape=(self.input_dim),
24 |                                     name='beta')
25 | 
26 |         self.params = [self.gamma, self.beta]
27 | 
28 |     def forward(self, x, **kwargs):
29 |         axes = list(range(len(x.get_shape()) - 1))
30 |         mean, var = tf.nn.moments(x, axes, keep_dims=True)
31 |         std = tf.sqrt(var + self.eps)
32 |         return self.gamma * (x - mean) / std + self.beta
33 | 


--------------------------------------------------------------------------------
/tftf/layers/PositionalEncoding.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from .Layer import Layer
 4 | 
 5 | 
 6 | class PositionalEncoding(Layer):
 7 |     def __init__(self, output_dim,
 8 |                  with_identity=True,
 9 |                  maxlen=6000):
10 |         '''
11 |         Positional encoding layer with sinusoid
12 | 
13 |         # Arguments
14 |             maxlen: max length of sequence
15 |         '''
16 |         super().__init__()
17 |         self.output_dim = output_dim
18 |         self.maxlen = maxlen
19 |         self._with_identity = with_identity
20 |         self._pe = self._initialize_pe()
21 | 
22 |     def compile(self):
23 |         pass
24 | 
25 |     def forward(self, x, **kwargs):
26 |         pe = self._pe[:, :tf.shape(x)[1]]
27 |         if self._with_identity:
28 |             return x + pe
29 |         else:
30 |             return pe
31 | 
32 |     def _initialize_pe(self):
33 |         pe = np.zeros(shape=(self.maxlen, self.output_dim), dtype=np.float32)
34 |         pos = np.arange(0, self.maxlen)[:, np.newaxis]
35 |         div = np.exp(np.arange(0, self.output_dim, 2)
36 |                      * -(np.log(10000.0) / self.output_dim))
37 | 
38 |         pe[:, 0::2] = np.sin(pos * div)
39 |         pe[:, 1::2] = np.cos(pos * div)
40 |         return tf.constant(pe[np.newaxis, :])
41 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from setuptools import find_packages
 3 | 
 4 | setup(
 5 |     name='tftf',
 6 |     version='0.0.29',
 7 |     description='TensorFlow TransFormer',
 8 |     author='Yusuke Sugomori',
 9 |     author_email='me@yusugomori.com',
10 |     url='https://github.com/yusugomori/tftf',
11 |     download_url='',
12 |     license='Apache 2.0',
13 |     install_requires=['numpy>=1.13.3',
14 |                       'scikit-learn>=0.19.1'],
15 |     classifiers=[
16 |         'Development Status :: 3 - Alpha',
17 |         'Intended Audience :: Developers',
18 |         'Intended Audience :: Education',
19 |         'Intended Audience :: Science/Research',
20 |         'License :: OSI Approved :: Apache Software License',
21 |         'Programming Language :: Python :: 3',
22 |         'Programming Language :: Python :: 3.4',
23 |         'Programming Language :: Python :: 3.5',
24 |         'Programming Language :: Python :: 3.6',
25 |         'Topic :: Scientific/Engineering',
26 |         'Topic :: Scientific/Engineering :: Mathematics',
27 |         'Topic :: Scientific/Engineering :: Artificial Intelligence',
28 |         'Topic :: Software Development',
29 |         'Topic :: Software Development :: Libraries',
30 |         'Topic :: Software Development :: Libraries :: Python Modules',
31 |     ],
32 |     keywords='tensorflow keras machine deep learning',
33 |     packages=find_packages()
34 | )
35 | 


--------------------------------------------------------------------------------
/examples/save_restore_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from sklearn.model_selection import train_test_split
 4 | from tftf.datasets import load_mnist
 5 | from tftf.layers import Layer, Dense, Activation, Dropout
 6 | from tftf.models import Model
 7 | 
 8 | 
 9 | if __name__ == '__main__':
10 |     np.random.seed(0)
11 |     tf.set_random_seed(0)
12 | 
13 |     '''
14 |     Load data
15 |     '''
16 |     mnist = load_mnist(train_test_split=False, flatten=True)
17 | 
18 |     n = len(mnist.data)
19 |     N = 30000
20 |     indices = np.random.permutation(range(n))[:N]
21 | 
22 |     X = mnist.data[indices]
23 |     X = X / 255.0
24 |     y = mnist.target[indices]
25 | 
26 |     train_X, test_X, train_y, test_y = train_test_split(X, y)
27 | 
28 |     '''
29 |     Build model
30 |     '''
31 |     model = Model()
32 |     model.add(Dense(10, input_dim=784))
33 |     model.add(Activation('softmax'))
34 |     model.compile()
35 | 
36 |     model.describe()
37 | 
38 |     '''
39 |     Train model
40 |     '''
41 |     model.fit(train_X, train_y, epochs=3, metrics=['accuracy'])
42 | 
43 |     '''
44 |     Test model
45 |     '''
46 |     print(model.accuracy(test_X, test_y))
47 | 
48 |     '''
49 |     Save model
50 |     '''
51 |     model.save('tmp/model.ckpt')
52 | 
53 |     '''
54 |     Restore model
55 |     '''
56 |     model = Model()
57 |     model.add(Dense(10, input_dim=784))
58 |     model.add(Activation('softmax'))
59 |     model.restore('tmp/model.ckpt')
60 |     model.compile()
61 | 
62 |     '''
63 |     Test model
64 |     '''
65 |     print(model.accuracy(test_X, test_y))
66 | 


--------------------------------------------------------------------------------
/examples/lenet_mnist.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from sklearn.model_selection import train_test_split
 4 | from tftf.datasets import load_mnist
 5 | from tftf.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten
 6 | from tftf.models import Model
 7 | 
 8 | 
 9 | if __name__ == '__main__':
10 |     np.random.seed(0)
11 |     tf.set_random_seed(123)
12 | 
13 |     '''
14 |     Load data
15 |     '''
16 |     mnist = load_mnist(train_test_split=False)
17 | 
18 |     n = len(mnist.data)
19 |     N = 30000
20 |     indices = np.random.permutation(range(n))[:N]
21 | 
22 |     X = mnist.data[indices]
23 |     X = X / 255.0
24 |     y = mnist.target[indices]
25 | 
26 |     train_X, test_X, train_y, test_y = train_test_split(X, y)
27 | 
28 |     '''
29 |     Build model
30 |     '''
31 |     model = Model()
32 |     model.add(Conv2D(input_dim=(28, 28, 1),
33 |                      kernel_size=(3, 3, 20),
34 |                      padding='valid'))
35 |     model.add(Activation('tanh'))
36 |     model.add(MaxPooling2D())
37 |     model.add(Conv2D(kernel_size=(3, 3, 50),
38 |                      padding='valid'))
39 |     model.add(Activation('tanh'))
40 |     model.add(MaxPooling2D())
41 |     model.add(Flatten())
42 |     model.add(Dense(1024))
43 |     model.add(Activation('tanh'))
44 |     model.add(Dense(200))
45 |     model.add(Activation('tanh'))
46 |     model.add(Dense(10))
47 |     model.add(Activation('softmax'))
48 |     model.compile()
49 | 
50 |     model.describe()
51 | 
52 |     '''
53 |     Train model
54 |     '''
55 |     model.fit(train_X, train_y,
56 |               metrics=['accuracy'])
57 | 
58 |     '''
59 |     Test model
60 |     '''
61 |     print(model.accuracy(test_X, test_y))
62 | 


--------------------------------------------------------------------------------
/examples/lstm_imdb.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from sklearn.model_selection import train_test_split
 4 | from tftf.datasets import load_imdb
 5 | from tftf.layers \
 6 |     import Dense, Activation, RNN, LSTM, Embedding
 7 | from tftf.preprocessing.sequence import Pad
 8 | from tftf.preprocessing.sequence import pad_sequences, sort
 9 | from tftf.models import Model
10 | 
11 | if __name__ == '__main__':
12 |     np.random.seed(0)
13 |     tf.set_random_seed(123)
14 | 
15 |     '''
16 |     Load data
17 |     '''
18 |     num_words = 10000
19 |     imdb = load_imdb(num_words=num_words,
20 |                      train_test_split=False)
21 |     X = imdb.data
22 |     y = imdb.target
23 |     train_X, test_X, train_y, test_y = train_test_split(X, y)
24 |     train_X, valid_X, train_y, valid_y = train_test_split(train_X, train_y)
25 | 
26 |     train_X, train_y = sort(train_X, train_y)
27 |     valid_X, valid_y = sort(valid_X, valid_y)
28 |     test_X, test_y = sort(test_X, test_y)
29 | 
30 |     '''
31 |     Build model
32 |     '''
33 |     model = Model()
34 |     model.add(Embedding(100, input_dim=num_words))
35 |     model.add(LSTM(50))
36 |     model.add(Dense(1))
37 |     model.add(Activation('sigmoid'))
38 |     model.compile(variable_input=True,
39 |                   use_mask=True,
40 |                   pad_value=0)
41 |     model.describe()
42 | 
43 |     model.fit(train_X[:10000], train_y[:10000],
44 |               epochs=30,
45 |               shuffle=False,
46 |               metrics=['accuracy', 'f1'],
47 |               preprocesses=[Pad(value=0)],
48 |               validation_data=(valid_X[:5000], valid_y[:5000]))
49 | 
50 |     '''
51 |     Test model
52 |     '''
53 |     test_X, test_y = test_X[:2000], test_y[:2000]
54 |     test_X = pad_sequences(test_X)
55 |     print(model.accuracy(test_X, test_y))
56 | 


--------------------------------------------------------------------------------
/examples/mlp_mnist.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from sklearn.model_selection import train_test_split
 4 | from tftf.datasets import load_mnist
 5 | from tftf.layers import Layer, Dense, Activation, Dropout
 6 | from tftf.models import Model
 7 | 
 8 | 
 9 | if __name__ == '__main__':
10 |     np.random.seed(0)
11 |     tf.set_random_seed(0)
12 | 
13 |     '''
14 |     Load data
15 |     '''
16 |     mnist = load_mnist(train_test_split=False, flatten=True)
17 | 
18 |     n = len(mnist.data)
19 |     N = 30000
20 |     indices = np.random.permutation(range(n))[:N]
21 | 
22 |     X = mnist.data[indices]
23 |     X = X / 255.0
24 |     y = mnist.target[indices]
25 | 
26 |     train_X, test_X, train_y, test_y = train_test_split(X, y)
27 |     train_X, valid_X, train_y, valid_y = train_test_split(train_X, train_y)
28 | 
29 |     '''
30 |     Build model
31 |     '''
32 |     model = Model()
33 |     model.add(Dense(200,
34 |                     input_dim=784,
35 |                     initializer='glorot_uniform'))
36 |     model.add(Activation('relu'))
37 |     model.add(Dropout(0.2))
38 |     model.add(Dense(200))
39 |     model.add(Activation('relu'))
40 |     model.add(Dropout(0.5))
41 |     model.add(Dense(10, initializer='glorot_uniform'))
42 |     model.add(Activation('softmax'))
43 |     model.compile()
44 | 
45 |     # model.describe()
46 |     model.describe_params()
47 | 
48 |     '''
49 |     Train model
50 |     '''
51 |     model.fit(train_X, train_y,
52 |               validation_data=(valid_X, valid_y),
53 |               metrics=['accuracy', 'f1'],
54 |               early_stopping=3,
55 |               epochs=1000)
56 | 
57 |     '''
58 |     Test model
59 |     '''
60 |     print('acc: {:.3}, f1: {:.3}'.format(model.accuracy(test_X, test_y),
61 |                                          model.f1(test_X, test_y)))
62 | 


--------------------------------------------------------------------------------
/examples/lenet_advanced_mnist.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from sklearn.model_selection import train_test_split
 4 | from tftf.datasets import load_mnist
 5 | from tftf.layers import Dense, Activation, BatchNormalization, Dropout, \
 6 |     Conv2D, MaxPooling2D, GlobalAveragePooling2D
 7 | from tftf.models import Model
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     np.random.seed(0)
12 |     tf.set_random_seed(123)
13 | 
14 |     '''
15 |     Load data
16 |     '''
17 |     mnist = load_mnist(train_test_split=False)
18 | 
19 |     n = len(mnist.data)
20 |     N = 30000
21 |     indices = np.random.permutation(range(n))[:N]
22 | 
23 |     X = mnist.data[indices]
24 |     X = X / 255.0
25 |     y = mnist.target[indices]
26 | 
27 |     train_X, test_X, train_y, test_y = train_test_split(X, y)
28 | 
29 |     '''
30 |     Build model
31 |     '''
32 |     model = Model()
33 |     model.add(Conv2D(input_dim=(28, 28, 1),
34 |                      kernel_size=(3, 3, 20),
35 |                      padding='valid'))
36 |     model.add(BatchNormalization())
37 |     model.add(Activation('relu'))
38 |     model.add(Dropout(0.3))
39 |     model.add(MaxPooling2D())
40 |     model.add(Conv2D(kernel_size=(3, 3, 50),
41 |                      padding='valid'))
42 |     model.add(BatchNormalization())
43 |     model.add(Activation('relu'))
44 |     model.add(Dropout(0.3))
45 |     model.add(MaxPooling2D())
46 |     model.add(GlobalAveragePooling2D())
47 |     model.add(Dense(1024))
48 |     model.add(BatchNormalization())
49 |     model.add(Activation('relu'))
50 |     model.add(Dropout(0.5))
51 |     model.add(Dense(10))
52 |     model.add(Activation('softmax'))
53 |     model.compile()
54 | 
55 |     model.describe()
56 | 
57 |     '''
58 |     Train model
59 |     '''
60 |     model.fit(train_X, train_y,
61 |               metrics=['accuracy'])
62 | 
63 |     '''
64 |     Test model
65 |     '''
66 |     print(model.accuracy(test_X, test_y))
67 | 


--------------------------------------------------------------------------------
/tftf/layers/MaxPooling2D.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from .Layer import Layer
 4 | 
 5 | 
 6 | class MaxPooling2D(Layer):
 7 |     def __init__(self,
 8 |                  pool_size=(2, 2),
 9 |                  strides=None,
10 |                  padding='valid'):
11 |         super().__init__()
12 | 
13 |         if len(pool_size) != 2:
14 |             raise ValueError('Dimension of pool_size must be 2.')
15 | 
16 |         if strides is None:
17 |             strides = pool_size
18 |         elif len(strides) != 2:
19 |             raise ValueError('Dimension of strides must be 2.')
20 | 
21 |         padding = padding.upper()
22 |         if padding not in ('VALID', 'SAME'):
23 |             raise ValueError('padding must be one of \'valid\' or \'same\'.')
24 | 
25 |         self.pool_size = pool_size
26 |         self.strides = strides
27 |         self.padding = padding
28 | 
29 |     @property
30 |     def input_shape(self):
31 |         return self.input_dim
32 | 
33 |     @property
34 |     def output_shape(self):
35 |         return self.output_dim
36 | 
37 |     @property
38 |     def _pool_size(self):
39 |         return tuple([1] + list(self.pool_size) + [1])
40 | 
41 |     @property
42 |     def _strides(self):
43 |         return tuple([1] + list(self.strides) + [1])
44 | 
45 |     def compile(self):
46 |         pass
47 | 
48 |     def forward(self, x, **kwargs):
49 |         return tf.nn.max_pool(x,
50 |                               ksize=self._pool_size,
51 |                               strides=self._strides,
52 |                               padding=self.padding)
53 | 
54 |     def initialize_output_dim(self):
55 |         super().initialize_output_dim()
56 |         self.output_dim = self._get_output_shape()
57 |         return self.output_dim
58 | 
59 |     def _get_output_shape(self):
60 |         input_shape = self.input_shape
61 |         pool_size = self.pool_size
62 |         strides = self.strides
63 |         padding = self.padding
64 | 
65 |         if padding == 'SAME':
66 |             return input_shape
67 |         else:
68 |             return tuple(list(np.ceil((np.array(input_shape[:2])
69 |                                        - np.array(pool_size) + 1)
70 |                                       / np.array(strides)).astype('int32'))
71 |                          + [input_shape[2]])
72 | 


--------------------------------------------------------------------------------
/examples/rnn_sin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | import matplotlib.pyplot as plt
 4 | from sklearn import datasets
 5 | from sklearn.model_selection import train_test_split
 6 | from tftf.layers import Layer, Dense, Activation, RNN
 7 | from tftf.models import Model
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     np.random.seed(12345)
12 |     tf.set_random_seed(0)
13 | 
14 |     '''
15 |     Load data
16 |     '''
17 |     def sin(x, T=100):
18 |         return np.sin(2.0 * np.pi * x / T)
19 | 
20 |     def toy_problem(T=100, ampl=0.05):
21 |         x = np.arange(0, 2 * T + 1)
22 |         noise = ampl * np.random.uniform(low=-1.0, high=1.0, size=len(x))
23 |         return sin(x) + noise
24 | 
25 |     T = 200
26 |     f = toy_problem(T)
27 | 
28 |     length_of_sequences = 2 * T
29 |     maxlen = 50
30 | 
31 |     data = []
32 |     target = []
33 | 
34 |     for i in range(0, length_of_sequences - maxlen + 1):
35 |         data.append(f[i: i + maxlen])
36 |         target.append(f[i + maxlen])
37 | 
38 |     X = np.array(data, dtype='float32').reshape(len(data), maxlen, 1)
39 |     y = np.array(target, dtype='float32').reshape(len(data), 1)
40 | 
41 |     train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.1)
42 | 
43 |     '''
44 |     Build model
45 |     '''
46 |     model = Model()
47 |     model.add(RNN(25, input_dim=1))
48 |     model.add(Dense(1))
49 |     model.add(Activation('linear'))
50 |     model.compile()
51 | 
52 |     model.describe()
53 | 
54 |     '''
55 |     Train model
56 |     '''
57 |     model.fit(train_X, train_y, epochs=150, batch_size=50)
58 | 
59 |     '''
60 |     Test model
61 |     '''
62 |     truncate = maxlen
63 |     Z = X[:1]
64 | 
65 |     original = [f[i] for i in range(maxlen)]
66 |     predicted = [None for i in range(maxlen)]
67 | 
68 |     for i in range(length_of_sequences - maxlen + 1):
69 |         _z = Z[-1:]
70 |         _y = model.predict(_z)
71 |         _sequence = np.concatenate((_z.reshape(maxlen, 1)[1:], _y),
72 |                                    axis=0).reshape(1, maxlen, 1)
73 |         Z = np.append(Z, _sequence, axis=0)
74 |         predicted.append(_y.reshape(-1))
75 | 
76 |     plt.rc('font', family='serif')
77 |     plt.figure()
78 |     plt.ylim([-1.5, 1.5])
79 |     plt.plot(toy_problem(T, ampl=0), linestyle='dotted', color='#aaaaaa')
80 |     plt.plot(original, linestyle='dashed', color='black')
81 |     plt.plot(predicted, color='black')
82 |     plt.show()
83 | 


--------------------------------------------------------------------------------
/examples/lstm_sin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | import matplotlib.pyplot as plt
 4 | from sklearn import datasets
 5 | from sklearn.model_selection import train_test_split
 6 | from tftf.layers import Layer, Dense, Activation, LSTM
 7 | from tftf.models import Model
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     np.random.seed(1234)
12 |     tf.set_random_seed(0)
13 | 
14 |     '''
15 |     Load data
16 |     '''
17 |     def sin(x, T=100):
18 |         return np.sin(2.0 * np.pi * x / T)
19 | 
20 |     def toy_problem(T=100, ampl=0.05):
21 |         x = np.arange(0, 2 * T + 1)
22 |         noise = ampl * np.random.uniform(low=-1.0, high=1.0, size=len(x))
23 |         return sin(x) + noise
24 | 
25 |     T = 200
26 |     f = toy_problem(T)
27 | 
28 |     length_of_sequences = 2 * T
29 |     maxlen = 25
30 | 
31 |     data = []
32 |     target = []
33 | 
34 |     for i in range(0, length_of_sequences - maxlen + 1):
35 |         data.append(f[i: i + maxlen])
36 |         target.append(f[i + maxlen])
37 | 
38 |     X = np.array(data, dtype='float32').reshape(len(data), maxlen, 1)
39 |     y = np.array(target, dtype='float32').reshape(len(data), 1)
40 | 
41 |     train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.1)
42 | 
43 |     '''
44 |     Build model
45 |     '''
46 |     model = Model()
47 |     model.add(LSTM(10, input_dim=1, return_sequence=True))
48 |     model.add(LSTM(10))
49 |     model.add(Dense(1))
50 |     model.add(Activation('linear'))
51 |     model.compile()
52 | 
53 |     model.describe()
54 | 
55 |     '''
56 |     Train model
57 |     '''
58 |     model.fit(train_X, train_y, epochs=150, batch_size=50)
59 | 
60 |     '''
61 |     Test model
62 |     '''
63 |     truncate = maxlen
64 |     Z = X[:1]
65 | 
66 |     original = [f[i] for i in range(maxlen)]
67 |     predicted = [None for i in range(maxlen)]
68 | 
69 |     for i in range(length_of_sequences - maxlen + 1):
70 |         _z = Z[-1:]
71 |         _y = model.predict(_z)
72 |         _sequence = np.concatenate((_z.reshape(maxlen, 1)[1:], _y),
73 |                                    axis=0).reshape(1, maxlen, 1)
74 |         Z = np.append(Z, _sequence, axis=0)
75 |         predicted.append(_y.reshape(-1))
76 | 
77 |     plt.rc('font', family='serif')
78 |     plt.figure()
79 |     plt.ylim([-1.5, 1.5])
80 |     plt.plot(toy_problem(T, ampl=0), linestyle='dotted', color='#aaaaaa')
81 |     plt.plot(original, linestyle='dashed', color='black')
82 |     plt.plot(predicted, color='black')
83 |     plt.show()
84 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # TFTF: TensorFlow TransFormer🍔
  2 | 
  3 | TensorFlow for everybody.
  4 | 
  5 | ## Quick glance
  6 | 
  7 | ```python
  8 | from tftf.layers import Layer, Dense, Activation
  9 | from tftf.models import Model
 10 | 
 11 | '''
 12 | Build model
 13 | '''
 14 | model = Model()
 15 | model.add(Dense(500, input_dim=784))
 16 | model.add(Activation('sigmoid'))
 17 | model.add(Dense(10))
 18 | model.add(Activation('softmax'))
 19 | model.compile()
 20 | 
 21 | model.describe()
 22 | 
 23 | '''
 24 | Train model
 25 | '''
 26 | model.fit(train_X, train_y)
 27 | 
 28 | '''
 29 | Test model
 30 | '''
 31 | print(model.accuracy(test_X, test_y))
 32 | ```
 33 | 
 34 | See [examples](https://github.com/yusugomori/tftf/tree/master/examples) for other implementations.
 35 | 
 36 | ## Installation
 37 | 
 38 | - **Install TFTF from PyPI (recommended):**
 39 | 
 40 | ```sh
 41 | pip install tensorflow
 42 | pip install tftf
 43 | ```
 44 | 
 45 | - **Alternatively: install TFTF from the GitHub source:**
 46 | 
 47 | First, clone TFTF using `git`:
 48 | 
 49 | ```sh
 50 | git clone https://github.com/yusugomori/tftf.git
 51 | ```
 52 | 
 53 |  Then, `cd` to the TFTF folder and run the install command:
 54 | ```sh
 55 | cd tftf
 56 | sudo python setup.py install
 57 | ```
 58 | 
 59 | ## Importable Layers, APIs
 60 | 
 61 | You can import low-level tftf APIs to your own TensorFlow implementations.
 62 | 
 63 | ```python
 64 | from tftf.layers import Dense, Activation, NALU
 65 | from tftf import initializers as ini
 66 | from tftf import activations as act
 67 | from tftf import losses as loss
 68 | from tftf import optimizers as opt
 69 | from tftf.metrics import accuracy, f1
 70 | 
 71 | x = tf.placeholder(tf.float32, shape=[None, 784])
 72 | t = tf.placeholder(tf.float32, shape=[None, 10])
 73 | 
 74 | # import APIs
 75 | W = ini.glorot_normal([784, 200])  # or just write tf.Variable(...)
 76 | b = ini.zeros([200])
 77 | h = act.tanh(tf.matmul(x, W) + b)  # or just write tf.nn.tanh(...)
 78 | 
 79 | # import Layers
 80 | h = Dense(200)(h)
 81 | h = Activation('tanh')(h)
 82 | h = NALU(200)(h)
 83 | 
 84 | W = ini.glorot_normal([200, 10])
 85 | b = ini.zeros([10])
 86 | y = act.softmax(tf.matmul(h, W) + b)
 87 | 
 88 | cost = loss.categorical_crossentropy(y, t)
 89 | train_step = opt.sgd(0.01).minimize(cost)
 90 | 
 91 | # Train
 92 | #     ...
 93 | 
 94 | preds = y.eval(session=sess, feed_dict={x: test_X})
 95 | acc = accuracy(preds, test_y)
 96 | f = f1(preds, test_y)
 97 | print('accuracy: {:.3}'.format(acc))
 98 | print('f1: {:.3}'.format(f))
 99 | ```
100 | 


--------------------------------------------------------------------------------
/examples/transformer_translation_enja.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from sklearn.model_selection import train_test_split
 4 | from tftf.layers import Dense, Activation
 5 | from tftf.layers import LSTM, Embedding, TimeDistributedDense, Attention
 6 | from tftf.preprocessing.sequence import pad_sequences, sort
 7 | from tftf.datasets import load_small_parallel_enja
 8 | from tftf.layers.modules import Transformer
 9 | 
10 | if __name__ == '__main__':
11 |     np.random.seed(0)
12 |     tf.set_random_seed(123)
13 | 
14 |     '''
15 |     Load data
16 |     '''
17 |     start_char = 1
18 |     end_char = 2
19 |     (train_X, train_y), (test_X, test_y), (num_X, num_y), \
20 |         (w2i_X, w2i_y), (i2w_X, i2w_y) = load_small_parallel_enja(to_ja=True)
21 | 
22 |     train_X, train_y = sort(train_X, train_y)
23 |     test_X, test_y = sort(test_X, test_y)
24 | 
25 |     train_size = 50000  # up to 50000
26 |     test_size = 100     # up to 500
27 |     train_X, train_y = train_X[:train_size], train_y[:train_size]
28 |     test_X, test_y = test_X[:test_size], test_y[:test_size]
29 | 
30 |     '''
31 |     Build model
32 |     '''
33 |     pad_value = 0
34 |     x = tf.placeholder(tf.int32, [None, None], name='x')
35 |     t = tf.placeholder(tf.int32, [None, None], name='t')
36 | 
37 |     transformer = Transformer(num_X, num_y)
38 |     preds = transformer.v1(x, t)
39 | 
40 |     cost = transformer.loss()
41 |     optimizer, lr = transformer.optimizer(cost)
42 |     train = transformer.is_training
43 | 
44 |     '''
45 |     Train model
46 |     '''
47 |     epochs = 10
48 |     batch_size = 100
49 | 
50 |     init = tf.global_variables_initializer()
51 |     sess = tf.Session()
52 |     sess.run(init)
53 | 
54 |     n_batches = len(train_X) // batch_size
55 | 
56 |     for epoch in range(epochs):
57 |         loss = 0.
58 |         for i in range(n_batches):
59 |             start = i * batch_size
60 |             end = start + batch_size
61 | 
62 |             _train_X = pad_sequences(train_X[start:end],
63 |                                      value=pad_value)
64 |             _train_y = pad_sequences(train_y[start:end],
65 |                                      value=pad_value)
66 | 
67 |             _, _cost = sess.run([optimizer, cost], feed_dict={
68 |                 x: _train_X,
69 |                 t: _train_y,
70 |                 lr: transformer.lrate(epoch),
71 |                 train: True
72 |             })
73 |             loss += _cost
74 | 
75 |         loss /= n_batches
76 | 
77 |         _test_X = pad_sequences(test_X, value=pad_value)
78 |         _test_y = pad_sequences(test_y, value=pad_value)
79 | 
80 |         val_loss = cost.eval(session=sess, feed_dict={
81 |             x: _test_X,
82 |             t: _test_y
83 |         })
84 | 
85 |         print('epoch: {}, '
86 |               'loss: {:.3}, '
87 |               'val_loss: {:.3}'.format(epoch+1, loss, val_loss))
88 | 


--------------------------------------------------------------------------------
/tftf/layers/Conv2D.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from .Layer import Layer
 4 | 
 5 | 
 6 | class Conv2D(Layer):
 7 |     def __init__(self,
 8 |                  input_dim=None,
 9 |                  kernel_size=(3, 3, 20),
10 |                  strides=(1, 1),
11 |                  padding='same',
12 |                  initializer='glorot_uniform',
13 |                  regularizer=None,
14 |                  rng=None):
15 |         super().__init__()
16 | 
17 |         if input_dim is not None and len(input_dim) != 3:
18 |             raise ValueError('Dimension of input_dim must be 3.')
19 | 
20 |         if len(kernel_size) != 3:
21 |             raise ValueError('Dimension of kernel_size must be 3.')
22 | 
23 |         if len(strides) != 2:
24 |             raise ValueError('Dimension of strides must be 2.')
25 | 
26 |         padding = padding.upper()
27 |         if padding not in ('VALID', 'SAME'):
28 |             raise ValueError('padding must be one of \'valid\' or \'same\'.')
29 | 
30 |         self.input_dim = input_dim
31 |         self.kernel_size = kernel_size
32 |         self.strides = strides
33 |         self.padding = padding
34 |         self.initializer = initializer
35 |         self.regularizer = regularizer
36 | 
37 |     @property
38 |     def input_shape(self):
39 |         return self.input_dim
40 | 
41 |     @property
42 |     def output_shape(self):
43 |         return self.output_dim
44 | 
45 |     @property
46 |     def _strides(self):
47 |         return tuple([1] + list(self.strides) + [1])
48 | 
49 |     def compile(self):
50 |         kernel_shape = \
51 |             self.kernel_size[:2] + (self.input_dim[2], self.kernel_size[2])
52 | 
53 |         self.W = self.kernel_initializer(self.initializer,
54 |                                          shape=kernel_shape,
55 |                                          name='W')
56 | 
57 |         self.params = [self.W]
58 | 
59 |         if self.regularizer is not None:
60 |             self.reg_loss = [self.regularizer(self.W)]
61 | 
62 |     def forward(self, x, **kwargs):
63 |         return tf.nn.conv2d(x, self.W,
64 |                             strides=self._strides,
65 |                             padding=self.padding)
66 | 
67 |     def initialize_output_dim(self):
68 |         super().initialize_output_dim()
69 |         self.output_dim = self._get_output_shape()
70 |         return self.output_dim
71 | 
72 |     def _get_output_shape(self):
73 |         input_shape = self.input_shape
74 |         kernel_size = self.kernel_size
75 |         strides = self.strides
76 |         padding = self.padding
77 | 
78 |         image_size = input_shape[:2]
79 |         channel = kernel_size[2]
80 | 
81 |         if padding == 'SAME':
82 |             return tuple(list(image_size) + [channel])
83 |         else:
84 |             return tuple(list(np.ceil((np.array(image_size)
85 |                                        - np.array(kernel_size[:2]) + 1)
86 |                                       / np.array(strides)).astype('int32'))
87 |                          + [channel])
88 | 


--------------------------------------------------------------------------------
/examples/low_level_example.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from sklearn import datasets
  4 | from sklearn.model_selection import train_test_split
  5 | from sklearn.utils import shuffle
  6 | from tftf.layers import Dense, Activation, NALU
  7 | from tftf import regularizers as reg
  8 | from tftf import initializers as ini
  9 | from tftf import activations as act
 10 | from tftf import losses as loss
 11 | from tftf import optimizers as opt
 12 | from tftf.metrics import accuracy, f1
 13 | 
 14 | 
 15 | if __name__ == '__main__':
 16 |     np.random.seed(0)
 17 |     tf.set_random_seed(123)
 18 | 
 19 |     '''
 20 |     Load data
 21 |     '''
 22 |     mnist = datasets.fetch_mldata('MNIST original', data_home='.')
 23 | 
 24 |     n = len(mnist.data)
 25 |     N = 30000
 26 |     indices = np.random.permutation(range(n))[:N]
 27 | 
 28 |     X = mnist.data[indices]
 29 |     X = X / 255.0
 30 |     X = X - X.mean(axis=1).reshape(len(X), 1)
 31 |     y = mnist.target[indices]
 32 |     Y = np.eye(10)[y.astype(int)]
 33 | 
 34 |     train_X, test_X, train_y, test_y = train_test_split(X, Y)
 35 | 
 36 |     '''
 37 |     Build model
 38 |     '''
 39 |     n_in = 784
 40 |     n_hidden = 200
 41 |     n_out = 10
 42 | 
 43 |     x = tf.placeholder(tf.float32, shape=[None, n_in])
 44 |     t = tf.placeholder(tf.float32, shape=[None, n_out])
 45 | 
 46 |     W = ini.glorot_normal([n_in, n_hidden], name='W0')
 47 |     b = ini.zeros([n_hidden], name='b0')
 48 |     h = act.tanh(tf.matmul(x, W) + b)
 49 | 
 50 |     W = ini.glorot_normal([n_hidden, n_hidden], name='W1')
 51 |     b = ini.zeros([n_hidden], name='b1')
 52 |     h = act.tanh(tf.matmul(h, W) + b)
 53 | 
 54 |     h = NALU(n_hidden)(h)  # import from tftf.layers
 55 | 
 56 |     W = ini.glorot_normal([n_hidden, n_out], name='W_out')
 57 |     b = ini.zeros([n_out], name='b_out')
 58 |     y = act.softmax(tf.matmul(h, W) + b)
 59 | 
 60 |     cost = loss.categorical_crossentropy(y, t)
 61 |     train_step = opt.sgd(0.01).minimize(cost)
 62 | 
 63 |     '''
 64 |     Train model
 65 |     '''
 66 |     epochs = 10
 67 |     batch_size = 100
 68 | 
 69 |     init = tf.global_variables_initializer()
 70 |     sess = tf.Session()
 71 |     sess.run(init)
 72 | 
 73 |     n_batches = len(train_X) // batch_size
 74 | 
 75 |     for epoch in range(epochs):
 76 |         _X, _y = shuffle(train_X, train_y)
 77 | 
 78 |         for i in range(n_batches):
 79 |             start = i * batch_size
 80 |             end = start + batch_size
 81 | 
 82 |             sess.run(train_step, feed_dict={
 83 |                 x: _X[start:end],
 84 |                 t: _y[start:end]
 85 |             })
 86 | 
 87 |         loss = cost.eval(session=sess, feed_dict={
 88 |             x: _X,
 89 |             t: _y
 90 |         })
 91 | 
 92 |         preds = y.eval(session=sess, feed_dict={x: _X})
 93 |         acc = accuracy(preds, _y)
 94 | 
 95 |         print('epoch: {}, loss: {:.3}, acc: {:.3}'.format(epoch, loss, acc))
 96 | 
 97 |     '''
 98 |     Test model
 99 |     '''
100 |     preds = y.eval(session=sess, feed_dict={x: test_X})
101 |     acc = accuracy(preds, test_y)
102 |     f = f1(preds, test_y)
103 |     print('accuracy: {:.3}'.format(acc))
104 |     print('f1: {:.3}'.format(f))
105 | 


--------------------------------------------------------------------------------
/tftf/layers/modules/ResNet.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from . import Module
  3 | from .. import Activation
  4 | from .. import BatchNormalization
  5 | from .. import Conv2D
  6 | from .. import Dense
  7 | from .. import GlobalAveragePooling2D
  8 | from .. import MaxPooling2D
  9 | 
 10 | 
 11 | class ResNet(Module):
 12 |     '''
 13 |     # Example
 14 | 
 15 |         ```
 16 |         x = tf.placeholder(tf.float32, shape=[None, 224, 224, 3])
 17 |         t = tf.placeholder(tf.float32, shape=[None, 10])
 18 | 
 19 |         resnet = ResNet()
 20 |         h = resnet.v1(x)
 21 |         h = Activation('relu')(h)
 22 |         h = Dense(10)(h)
 23 |         y = Activation('softmax')(h)
 24 | 
 25 |         cost = categorical_crossentropy(y, t)
 26 |         train_step = sgd(0.01).minimize(cost)
 27 |         ```
 28 |     '''
 29 |     def __init__(self):
 30 |         pass
 31 | 
 32 |     def v1(self, x, n_out=1000):
 33 |         '''
 34 |         ResNet-34
 35 | 
 36 |         # Arguments
 37 |             x: placeholder
 38 |         '''
 39 |         layers = [
 40 |             Conv2D(kernel_size=(7, 7, 64)),
 41 |             BatchNormalization(),
 42 |             Activation('relu'),
 43 |             MaxPooling2D(pool_size=(3, 3),
 44 |                          strides=(2, 2),
 45 |                          padding='same')
 46 |         ]
 47 |         for layer in layers:
 48 |             x = layer(x)
 49 |         x = self._add_base_block(x, channel_out=64)
 50 |         x = self._add_base_block(x, channel_out=128)
 51 |         x = self._add_base_block(x, channel_out=256)
 52 |         x = self._add_base_block(x, channel_out=512)
 53 |         x = GlobalAveragePooling2D()(x)
 54 |         x = Dense(n_out)(x)
 55 | 
 56 |         return x
 57 | 
 58 |     def _add_base_block(self, x, channel_out=64):
 59 |         x = Conv2D(kernel_size=(1, 1, channel_out),
 60 |                    strides=(2, 2))(x)
 61 |         x = self._base_block(x, channel_out=channel_out)
 62 |         return x
 63 | 
 64 |     def _base_block(self, x, channel_out=64):
 65 |         '''
 66 |         # Arguments
 67 |             x: placeholder
 68 |         '''
 69 |         layers = [
 70 |             Conv2D(kernel_size=(3, 3, channel_out)),
 71 |             BatchNormalization(),
 72 |             Activation('relu'),
 73 |             Conv2D(kernel_size=(3, 3, channel_out)),
 74 |             BatchNormalization()
 75 |         ]
 76 |         for layer in layers:
 77 |             h = layer(x)
 78 |         shortcut = self._shortcut(x, output_shape=h.get_shape())
 79 | 
 80 |         return Activation('relu')(h + shortcut)
 81 | 
 82 |     def _bottleneck(self, x, channel_out=256):
 83 |         '''
 84 |         # Arguments
 85 |             x: placeholder
 86 |         '''
 87 |         channel = channel_out // 4
 88 |         layers = [
 89 |             Conv2D(kernel_size=(1, 1, channel)),
 90 |             BatchNormalization(),
 91 |             Activation('relu'),
 92 |             Conv2D(kernel_size=(3, 3, channel)),
 93 |             BatchNormalization(),
 94 |             Activation('relu'),
 95 |             Conv2D(kernel_size=(1, 1, channel)),
 96 |             BatchNormalization()
 97 |         ]
 98 |         for layer in layers:
 99 |             h = layer(x)
100 |         shortcut = self._shortcut(x, output_shape=h.get_shape())
101 | 
102 |         return Activation('relu')(h + shortcut)
103 | 
104 |     def _projection(self, x, channel_out):
105 |         layer = Conv2D(kernel_size=(1, 1, channel_out))
106 |         return layer(x)
107 | 
108 |     def _shortcut(self, x, output_shape):
109 |         input_shape = x.get_shape()
110 |         channel_in = input_shape[-1]
111 |         channel_out = output_shape[-1]
112 | 
113 |         if channel_in != channel_out:
114 |             return self._projection(x, channel_out)
115 |         else:
116 |             return x
117 | 


--------------------------------------------------------------------------------
/tftf/layers/Attention.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from .Layer import Layer
  4 | from .initializers import zeros
  5 | 
  6 | 
  7 | class Attention(Layer):
  8 |     '''
  9 |     Attention Layer for Seq2Seq
 10 |     "Effective Approaches to Attention-based Neural Machine Translation",
 11 |     Minh-Thang Luong et al., EMNLP 2015
 12 |     https://arxiv.org/abs/1508.04025
 13 |     '''
 14 |     def __init__(self, output_dim,
 15 |                  input_dim=None,
 16 |                  initializer='glorot_uniform',
 17 |                  activation='tanh',
 18 |                  state=None):
 19 |         '''
 20 |         # Arguments
 21 |             input_dim: tuple or list. shape of (encoder_dim, decoder_dim).
 22 |             state: (default None). Encoder state (output).
 23 |                    shape of (batch_size, len_sequence, encoder_dim)
 24 |         '''
 25 |         super().__init__()
 26 | 
 27 |         if type(input_dim) != list and type(input_dim) != tuple:
 28 |             raise ValueError('`input_dim` must be given as a list or tuple.')
 29 | 
 30 |         if len(input_dim) != 2:
 31 |             raise ValueError('Length of `input_dim` must be 2. '
 32 |                              'Not {}.'.format(len(input_dim)))
 33 | 
 34 |         if state is None:
 35 |             raise ValueError('`state` must be given.')
 36 | 
 37 |         self.output_dim = output_dim
 38 |         self.input_dim = input_dim
 39 | 
 40 |         self.initializer = initializer
 41 |         self.activation = \
 42 |             self.activation_initializer(activation)
 43 |         self.state = state
 44 |         self._use_mask = False
 45 |         self.mask = None
 46 | 
 47 |     @property
 48 |     def input_shape(self):
 49 |         return tuple(self.input_dim)
 50 | 
 51 |     def compile(self):
 52 |         input_dim = self.input_dim
 53 |         output_dim = self.output_dim
 54 |         initializer = self.initializer
 55 | 
 56 |         self.W_a = \
 57 |             self.kernel_initializer(initializer,
 58 |                                     shape=(input_dim[0], input_dim[1]),
 59 |                                     name='W_a')
 60 |         self.W_c = \
 61 |             self.kernel_initializer(initializer,
 62 |                                     shape=(input_dim[0], output_dim),
 63 |                                     name='W_c')
 64 |         self.W_h = \
 65 |             self.kernel_initializer(initializer,
 66 |                                     shape=(input_dim[1], output_dim),
 67 |                                     name='W_h')
 68 |         self.b = zeros((output_dim), name='b')
 69 | 
 70 |         self.params = [self.W_a, self.W_c, self.W_h, self.b]
 71 | 
 72 |     def forward(self, x, **kwargs):
 73 |         '''
 74 |         # Arguments
 75 |             mask: Tensor. Mask for padded value.
 76 |                   shape of (batch_size, encoder_dim)
 77 |             recurrent: boolean (default True).
 78 |         '''
 79 |         if self.mask is None:
 80 |             self.mask = kwargs['mask'] if 'mask' in kwargs else None
 81 |         self._use_mask = True if self.mask is not None else False
 82 | 
 83 |         recurr = kwargs['recurrent'] if 'recurrent' in kwargs else True
 84 | 
 85 |         if recurr:
 86 |             score = tf.einsum('ijk,ilk->ijl',
 87 |                               x,
 88 |                               tf.einsum('ijk,kl->ijl', self.state, self.W_a))
 89 |             if self._use_mask:
 90 |                 score *= self.mask[:, np.newaxis]
 91 | 
 92 |             attn = self.attn = tf.nn.softmax(score)
 93 |             c = tf.einsum('ijk,ikl->ijl', attn, self.state)
 94 | 
 95 |             return self.activation(tf.einsum('ijk,kl->ijl', c, self.W_c)
 96 |                                    + tf.einsum('ijk,kl->ijl', x, self.W_h)
 97 |                                    + self.b)
 98 |         else:
 99 |             score = tf.einsum('ij,ikj->ik',
100 |                               x,
101 |                               tf.einsum('ijk,kl->ijl', self.state, self.W_a))
102 |             if self._use_mask:
103 |                 score *= self.mask
104 | 
105 |             attn = self.attn = tf.nn.softmax(score)
106 |             c = tf.einsum('ij,ijk->ik', attn, self.state)
107 | 
108 |             return self.activation(tf.matmul(c, self.W_c)
109 |                                    + tf.matmul(x, self.W_h)
110 |                                    + self.b)
111 | 


--------------------------------------------------------------------------------
/tftf/layers/RNN.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from .Layer import Layer
  4 | from .initializers import zeros
  5 | 
  6 | 
  7 | class RNN(Layer):
  8 |     def __init__(self, output_dim,
  9 |                  input_dim=None,
 10 |                  initializer='glorot_uniform',
 11 |                  recurrent_initializer='orthogonal',
 12 |                  recurrent_activation='tanh',
 13 |                  length_of_sequences=None,
 14 |                  return_sequence=False,
 15 |                  initial_state=None,
 16 |                  rng=None):
 17 |         super().__init__()
 18 |         self.output_dim = output_dim
 19 |         self.input_dim = input_dim
 20 |         self.initializer = initializer
 21 |         self.recurrent_initializer = recurrent_initializer
 22 |         self.recurrent_activation = \
 23 |             self.activation_initializer(recurrent_activation)
 24 |         self._length_of_sequences = length_of_sequences
 25 |         self._return_sequence = return_sequence
 26 |         self._initial_state = initial_state
 27 |         self._use_mask = False
 28 | 
 29 |     @property
 30 |     def input_shape(self):
 31 |         return (self._length_of_sequences, self.input_dim)
 32 | 
 33 |     def compile(self):
 34 |         input_dim = self.input_dim
 35 |         output_dim = self.output_dim
 36 |         initializer = self.initializer
 37 |         recurrent_initializer = self.recurrent_initializer
 38 | 
 39 |         self.W = self.kernel_initializer(initializer,
 40 |                                          shape=(input_dim, output_dim),
 41 |                                          name='W')
 42 |         self.W_recurrent = \
 43 |             self.kernel_initializer(recurrent_initializer,
 44 |                                     shape=(output_dim, output_dim),
 45 |                                     name='W_recurrent')
 46 |         self.b = zeros((output_dim), name='b')
 47 | 
 48 |         self.params = [self.W, self.W_recurrent, self.b]
 49 | 
 50 |     def forward(self, x, **kwargs):
 51 |         '''
 52 |         # Arguments
 53 |             mask: Tensor. Mask for padded value.
 54 |             recurrent: boolean (default True).
 55 |                        Whether to loop the input sequence.
 56 |             initial_state: (default None). Override self._initial_state.
 57 |         '''
 58 |         def _recurrent(state, elems):
 59 |             if not self._use_mask:
 60 |                 x = elems
 61 |             else:
 62 |                 x = elems[0]
 63 |                 mask = elems[1]
 64 |             h = self.recurrent_activation(tf.matmul(x, self.W)
 65 |                                           + tf.matmul(state, self.W_recurrent)
 66 |                                           + self.b)
 67 |             if not self._use_mask:
 68 |                 return h
 69 |             else:
 70 |                 mask = mask[:, np.newaxis]
 71 |                 return mask * h + (1 - mask) * state
 72 | 
 73 |         mask = kwargs['mask'] if 'mask' in kwargs else None
 74 |         self._use_mask = True if mask is not None else False
 75 | 
 76 |         recurr = kwargs['recurrent'] if 'recurrent' in kwargs else True
 77 | 
 78 |         if 'initial_state' in kwargs:
 79 |             initial_state = kwargs['initial_state']
 80 |         else:
 81 |             initial_state = self._initial_state
 82 | 
 83 |         if initial_state is None:
 84 |             initial_state = \
 85 |                 tf.matmul(x[:, 0, :],
 86 |                           tf.zeros((self.input_dim, self.output_dim)))
 87 | 
 88 |         if not recurr:
 89 |             if mask is None:
 90 |                 states = _recurrent(initial_state, x)
 91 |             else:
 92 |                 states = _recurrent(initial_state, [x, mask])
 93 | 
 94 |             return states
 95 |         else:
 96 |             if mask is None:
 97 |                 states = tf.scan(fn=_recurrent,
 98 |                                  elems=tf.transpose(x, perm=[1, 0, 2]),
 99 |                                  initializer=initial_state)
100 |             else:
101 |                 mask = tf.transpose(mask)
102 |                 states = tf.scan(fn=_recurrent,
103 |                                  elems=[tf.transpose(x, perm=[1, 0, 2]), mask],
104 |                                  initializer=initial_state)
105 | 
106 |             if self._return_sequence is True:
107 |                 return tf.transpose(states, perm=[1, 0, 2])
108 |             else:
109 |                 return states[-1]
110 | 


--------------------------------------------------------------------------------
/tftf/layers/Layer.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from .activations import *
  3 | from .initializers import *
  4 | 
  5 | 
  6 | class Layer(object):
  7 |     def __init__(self):
  8 |         self._input_dim = None
  9 |         self._output_dim = None
 10 |         self._input_dtype = tf.float32
 11 |         self._output_dtype = tf.float32
 12 |         self._params = []
 13 |         self._reg_loss = []
 14 |         self._compiled = False
 15 | 
 16 |     def __repr__(self):
 17 |         return '<{}: shape({}, {})>'.format(self.name,
 18 |                                             self.input_dim,
 19 |                                             self.output_dim)
 20 | 
 21 |     def __call__(self, x, **kwargs):
 22 |         if self.input_dim is None:
 23 |             input_shape = x.get_shape().as_list()
 24 |             if len(input_shape) == 2:  # genral data
 25 |                 self.input_dim = input_shape[1]
 26 |             elif len(input_shape) == 3:  # sequencial data
 27 |                 self.input_dim = input_shape[-1]
 28 |             else:  # image data
 29 |                 self.input_dim = tuple(input_shape[1:])
 30 | 
 31 |         # OPTIMIZE
 32 |         if len(self.params) == 0:
 33 |             self.compile()
 34 |         x = self.forward(x, **kwargs)
 35 | 
 36 |         if self.output_dim is None:
 37 |             output_shape = x.get_shape().as_list()
 38 |             if len(output_shape) == 2:
 39 |                 self.output_dim = output_shape[1]
 40 |             else:
 41 |                 self.output_dim = tuple(output_shape[1:])
 42 | 
 43 |         return x
 44 | 
 45 |     @property
 46 |     def name(self):
 47 |         return self.__class__.__name__
 48 | 
 49 |     @property
 50 |     def shape(self):
 51 |         return (self.input_dim, self.output_dim)
 52 | 
 53 |     @property
 54 |     def input_dim(self):
 55 |         return self._input_dim
 56 | 
 57 |     @input_dim.setter
 58 |     def input_dim(self, val):
 59 |         self._input_dim = val
 60 | 
 61 |     @property
 62 |     def input_shape(self):
 63 |         return (self.input_dim,)
 64 | 
 65 |     @property
 66 |     def input_dtype(self):
 67 |         return self._input_dtype
 68 | 
 69 |     @property
 70 |     def output_dim(self):
 71 |         return self._output_dim
 72 | 
 73 |     @output_dim.setter
 74 |     def output_dim(self, val):
 75 |         self._output_dim = val
 76 | 
 77 |     @property
 78 |     def output_shape(self):
 79 |         return (self.output_dim,)
 80 | 
 81 |     @property
 82 |     def output_dtype(self):
 83 |         return self._output_dtype
 84 | 
 85 |     @property
 86 |     def params(self):
 87 |         return self._params
 88 | 
 89 |     @params.setter
 90 |     def params(self, val):
 91 |         if type(val) != list:
 92 |             raise AttributeError('type of params must be \'list\', '
 93 |                                  'not \'{}\'.'.format(type(val).__name__))
 94 |         self._params = val
 95 | 
 96 |     @property
 97 |     def reg_loss(self):
 98 |         return self._reg_loss
 99 | 
100 |     @reg_loss.setter
101 |     def reg_loss(self, val):
102 |         if type(val) != list:
103 |             raise AttributeError('type of reg_loss must be \'list\', '
104 |                                  'not \'{}\'.'.format(type(val).__name__))
105 |         self._reg_loss = val
106 | 
107 |     def activation_initializer(self, activation):
108 |         activations = {
109 |             'elu': elu,
110 |             'hard_sigmoid': hard_sigmoid,
111 |             'leaky_relu': leaky_relu,
112 |             'linear': linear,
113 |             # 'prelu': prelu,
114 |             'relu': relu,
115 |             'selu': selu,
116 |             'sigmoid': sigmoid,
117 |             'softmax': softmax,
118 |             'swish': swish,
119 |             'tanh': tanh
120 |         }
121 |         if activation in activations:
122 |             activation = activations[activation]
123 | 
124 |         return activation
125 | 
126 |     def compile(self):
127 |         raise NotImplementedError()
128 | 
129 |     def forward(self, x, **kwargs):
130 |         raise NotImplementedError()
131 | 
132 |     def initialize_output_dim(self):
133 |         if self.input_dim is None:
134 |             raise ValueError('input_dim not definfed.')
135 | 
136 |         self.output_dim = self.input_dim
137 |         return self.output_dim
138 | 
139 |     def kernel_initializer(self, initializer, shape, name=None):
140 |         initializers = {
141 |             'glorot_normal': glorot_normal,
142 |             'glorot_uniform': glorot_uniform,
143 |             'normal': normal,
144 |             'ones': ones,
145 |             'orthogonal': orthogonal,
146 |             'zeros': zeros
147 |         }
148 | 
149 |         if initializer in initializers:
150 |             initializer = initializers[initializer]
151 | 
152 |         return initializer(shape, name=name)
153 | 


--------------------------------------------------------------------------------
/tftf/datasets/small_parallel_enja.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | import numpy as np
  4 | from .Dataset import Dataset
  5 | 
  6 | 
  7 | '''
  8 | Download 50k En/Ja Parallel Corpus
  9 | from https://github.com/odashi/small_parallel_enja
 10 | and transform words to IDs.
 11 | '''
 12 | 
 13 | 
 14 | def load_small_parallel_enja(path=None,
 15 |                              to_ja=True,
 16 |                              pad_value=0,
 17 |                              start_char=1,
 18 |                              end_char=2,
 19 |                              oov_char=3,
 20 |                              index_from=4,
 21 |                              pad='<PAD>',
 22 |                              bos='<BOS>',
 23 |                              eos='<EOS>',
 24 |                              oov='<UNK>'):
 25 |     url_base = 'https://raw.githubusercontent.com/' \
 26 |                'odashi/small_parallel_enja/master/'
 27 | 
 28 |     path = path or 'small_parallel_enja'
 29 |     dir_path = os.path.join(os.path.expanduser('~'),
 30 |                             '.tftf', 'datasets', path)
 31 |     if not os.path.exists(dir_path):
 32 |         os.makedirs(dir_path)
 33 | 
 34 |     f_ja = ['train.ja', 'test.ja']
 35 |     f_en = ['train.en', 'test.en']
 36 | 
 37 |     for f in (f_ja + f_en):
 38 |         f_path = os.path.join(dir_path, f)
 39 |         if not os.path.exists(f_path):
 40 |             url = url_base + f
 41 |             print('Downloading {}'.format(f))
 42 |             cmd = ['curl', '-o', f_path, url]
 43 |             subprocess.call(cmd)
 44 | 
 45 |     f_train_ja = os.path.join(dir_path, f_ja[0])
 46 |     f_test_ja = os.path.join(dir_path, f_ja[1])
 47 |     f_train_en = os.path.join(dir_path, f_en[0])
 48 |     f_test_en = os.path.join(dir_path, f_en[1])
 49 | 
 50 |     (train_ja, test_ja), num_words_ja, (w2i_ja, i2w_ja) = \
 51 |         _build(f_train_ja, f_test_ja,
 52 |                pad_value, start_char, end_char, oov_char, index_from,
 53 |                pad, bos, eos, oov)
 54 |     (train_en, test_en), num_words_en, (w2i_en, i2w_en) = \
 55 |         _build(f_train_en, f_test_en,
 56 |                pad_value, start_char, end_char, oov_char, index_from,
 57 |                pad, bos, eos, oov)
 58 | 
 59 |     if to_ja:
 60 |         train_X, test_X, num_X, w2i_X, i2w_X = \
 61 |             train_en, test_en, num_words_en, w2i_en, i2w_en
 62 |         train_y, test_y, num_y, w2i_y, i2w_y = \
 63 |             train_ja, test_ja, num_words_ja, w2i_ja, i2w_ja
 64 |     else:
 65 |         train_X, test_X, num_X, w2i_X, i2w_X = \
 66 |             train_ja, test_ja, num_words_ja, w2i_ja, i2w_ja
 67 |         train_y, test_y, num_y, w2i_y, i2w_y = \
 68 |             train_en, test_en, num_words_en, w2i_en, i2w_en
 69 | 
 70 |     train_X, test_X = np.array(train_X), np.array(test_X)
 71 |     train_y, test_y = np.array(train_y), np.array(test_y)
 72 | 
 73 |     return (train_X, train_y), (test_X, test_y), \
 74 |         (num_X, num_y), (w2i_X, w2i_y), (i2w_X, i2w_y)
 75 | 
 76 | 
 77 | def _build(f_train, f_test,
 78 |            pad_value=0,
 79 |            start_char=1,
 80 |            end_char=2,
 81 |            oov_char=3,
 82 |            index_from=4,
 83 |            pad='<PAD>',
 84 |            bos='<BOS>',
 85 |            eos='<EOS>',
 86 |            oov='<UNK>'):
 87 | 
 88 |     builder = _Builder(pad_value=pad_value,
 89 |                        start_char=start_char,
 90 |                        end_char=end_char,
 91 |                        oov_char=oov_char,
 92 |                        index_from=index_from,
 93 |                        pad=pad,
 94 |                        bos=bos,
 95 |                        eos=eos,
 96 |                        oov=oov)
 97 |     builder.fit(f_train)
 98 |     train = builder.transform(f_train)
 99 |     test = builder.transform(f_test)
100 | 
101 |     return (train, test), builder.num_words, (builder.w2i, builder.i2w)
102 | 
103 | 
104 | class _Builder(object):
105 |     def __init__(self,
106 |                  pad_value=0,
107 |                  start_char=1,
108 |                  end_char=2,
109 |                  oov_char=3,
110 |                  index_from=4,
111 |                  pad='<PAD>',
112 |                  bos='<BOS>',
113 |                  eos='<EOS>',
114 |                  oov='<UNK>'):
115 |         self._vocab = None
116 |         self._w2i = None
117 |         self._i2w = None
118 | 
119 |         self.pad_value = pad_value
120 |         self.start_char = start_char
121 |         self.end_char = end_char
122 |         self.oov_char = oov_char
123 |         self.index_from = index_from
124 |         self.pad = pad
125 |         self.bos = bos
126 |         self.eos = eos
127 |         self.oov = oov
128 | 
129 |     @property
130 |     def num_words(self):
131 |         return max(self._w2i.values()) + 1
132 | 
133 |     @property
134 |     def w2i(self):
135 |         '''
136 |         Dict of word to index
137 |         '''
138 |         return self._w2i
139 | 
140 |     @property
141 |     def i2w(self):
142 |         '''
143 |         Dict of index to word
144 |         '''
145 |         return self._i2w
146 | 
147 |     def fit(self, f_path):
148 |         self._vocab = set()
149 |         self._w2i = {}
150 |         for line in open(f_path, encoding='utf-8'):
151 |             _sentence = line.strip().split()
152 |             self._vocab.update(_sentence)
153 | 
154 |         self._w2i = {w: (i + self.index_from)
155 |                      for i, w in enumerate(self._vocab)}
156 |         if self.pad_value >= 0:
157 |             self._w2i[self.pad] = self.pad_value
158 |         self._w2i[self.bos] = self.start_char
159 |         self._w2i[self.eos] = self.end_char
160 |         self._w2i[self.oov] = self.oov_char
161 |         self._i2w = {i: w for w, i in self._w2i.items()}
162 | 
163 |     def transform(self, f_path):
164 |         if self._vocab is None or self._w2i is None:
165 |             raise AttributeError('`{}.fit` must be called before `transform`.'
166 |                                  ''.format(self.__class__.__name__))
167 |         sentences = []
168 |         for line in open(f_path, encoding='utf-8'):
169 |             _sentence = line.strip().split()
170 |             _sentence = [self.bos] + _sentence + [self.eos]
171 |             sentences.append(self._encode(_sentence))
172 |         return sentences
173 | 
174 |     def _encode(self, sentence):
175 |         encoded = []
176 |         for w in sentence:
177 |             if w not in self._w2i:
178 |                 id = self.oov_char
179 |             else:
180 |                 id = self._w2i[w]
181 |             encoded.append(id)
182 | 
183 |         return encoded
184 | 


--------------------------------------------------------------------------------
/examples/lstm_translation_enja.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from sklearn.model_selection import train_test_split
  4 | from tftf.layers import Dense, Activation
  5 | from tftf.layers import LSTM, Embedding, TimeDistributedDense
  6 | from tftf.preprocessing.sequence import pad_sequences, sort
  7 | from tftf.datasets import load_small_parallel_enja
  8 | from tftf import losses as loss
  9 | from tftf import optimizers as opt
 10 | 
 11 | if __name__ == '__main__':
 12 |     np.random.seed(0)
 13 |     tf.set_random_seed(123)
 14 | 
 15 |     '''
 16 |     Load data
 17 |     '''
 18 |     end_char = 2
 19 |     (train_X, train_y), (test_X, test_y), (num_X, num_y), \
 20 |         (w2i_X, w2i_y), (i2w_X, i2w_y) = load_small_parallel_enja(to_ja=True)
 21 | 
 22 |     train_X, train_y = sort(train_X, train_y)
 23 |     test_X, test_y = sort(test_X, test_y)
 24 | 
 25 |     train_size = 50000  # up to 50000
 26 |     test_size = 500     # up to 500
 27 |     train_X, train_y = train_X[:train_size], train_y[:train_size]
 28 |     test_X, test_y = test_X[:test_size], test_y[:test_size]
 29 | 
 30 |     '''
 31 |     Build model
 32 |     '''
 33 |     pad_value = 0
 34 |     x = tf.placeholder(tf.int32, [None, None], name='x')
 35 |     t = tf.placeholder(tf.int32, [None, None], name='t')
 36 |     target = tf.one_hot(t[:, 1:], depth=num_y, dtype=tf.float32)
 37 |     mask_enc = tf.cast(tf.not_equal(x, pad_value), tf.float32)
 38 |     mask_dec = tf.cast(tf.not_equal(t[:, 1:], pad_value), tf.float32)
 39 | 
 40 |     encoder = [
 41 |         Embedding(128, input_dim=num_X),
 42 |         LSTM(128, return_sequence=True, return_cell=True)
 43 |     ]
 44 | 
 45 |     h = x
 46 |     for layer in encoder:
 47 |         h = layer(h, mask=mask_enc)
 48 |     encoder_output, encoder_cell = h
 49 | 
 50 |     decoder = [
 51 |         [
 52 |             Embedding(128, input_dim=num_y),
 53 |             LSTM(128, return_sequence=True, return_cell=True,
 54 |                  initial_state=encoder_output[:, -1, :],
 55 |                  cell_state=encoder_cell[:, -1, :])
 56 |         ],
 57 |         [
 58 |             TimeDistributedDense(num_y),
 59 |             Activation('softmax')
 60 |         ]
 61 |     ]
 62 | 
 63 |     h = t[:, :-1]
 64 |     for layer in decoder[0]:
 65 |         h = layer(h)
 66 |     decoder_output, _ = h
 67 | 
 68 |     output = decoder_output
 69 |     for layer in decoder[1]:
 70 |         output = layer(output)
 71 | 
 72 |     cost = \
 73 |         loss.categorical_crossentropy(output,
 74 |                                       target
 75 |                                       * tf.transpose(mask_dec[:, np.newaxis],
 76 |                                                      perm=[0, 2, 1]))
 77 |     train_step = opt.adam().minimize(cost)
 78 | 
 79 |     '''
 80 |     Train model
 81 |     '''
 82 |     epochs = 25
 83 |     batch_size = 50
 84 | 
 85 |     init = tf.global_variables_initializer()
 86 |     sess = tf.Session()
 87 |     sess.run(init)
 88 | 
 89 |     n_batches = len(train_X) // batch_size
 90 | 
 91 |     for epoch in range(epochs):
 92 |         loss = 0.
 93 |         for i in range(n_batches):
 94 |             start = i * batch_size
 95 |             end = start + batch_size
 96 | 
 97 |             _train_X = pad_sequences(train_X[start:end],
 98 |                                      value=pad_value)
 99 |             _train_y = pad_sequences(train_y[start:end],
100 |                                      value=pad_value)
101 | 
102 |             _, _cost = sess.run([train_step, cost], feed_dict={
103 |                 x: _train_X,
104 |                 t: _train_y
105 |             })
106 |             loss += _cost
107 | 
108 |         loss /= n_batches
109 | 
110 |         _test_X = pad_sequences(test_X, value=pad_value)
111 |         _test_y = pad_sequences(test_y, value=pad_value)
112 | 
113 |         val_loss = cost.eval(session=sess, feed_dict={
114 |             x: _test_X,
115 |             t: _test_y
116 |         })
117 | 
118 |         print('epoch: {}, '
119 |               'loss: {:.3}, '
120 |               'val_loss: {:.3}'.format(epoch+1, loss, val_loss))
121 | 
122 |     '''
123 |     Generate sentences
124 |     '''
125 |     initial = {
126 |         'y': tf.placeholder(tf.int32, [None, None]),
127 |         'state': tf.placeholder(tf.float32, [None, None]),
128 |         'cell_state': tf.placeholder(tf.float32, [None, None]),
129 |         'step': tf.constant(0)
130 |     }
131 |     initial['flg'] = tf.cast(tf.zeros_like(initial['y'][:, 0]), dtype=tf.bool)
132 |     maxlen = 100
133 | 
134 |     def cond(y, state, cell_state, step, flg):
135 |         n_flg = tf.reduce_sum(tf.cast(flg, tf.int32))
136 |         next = \
137 |             tf.not_equal(n_flg,
138 |                          tf.reduce_sum(tf.ones_like(initial['flg'],
139 |                                                     dtype=tf.int32)))
140 |         return tf.logical_and(step+1 < maxlen, next)
141 | 
142 |     def body(y, state, cell_state, step, flg):
143 |         h = y[:, -1]
144 |         for layer in decoder[0]:
145 |             h = layer(h,
146 |                       recurrent=False,
147 |                       initial_state=state,
148 |                       cell_state=cell_state)
149 |         decoder_output, decoder_cell = h
150 | 
151 |         output = decoder_output
152 |         for layer in decoder[1]:
153 |             output = layer(output, recurrent=False)
154 |         output = tf.cast(tf.argmax(output, axis=1), tf.int32)
155 |         y = tf.concat([y, output[:, np.newaxis]], axis=1)
156 |         flg = tf.logical_or(flg, tf.equal(output, end_char))
157 | 
158 |         return [y,
159 |                 decoder_output,
160 |                 decoder_cell,
161 |                 step+1,
162 |                 flg]
163 |     generator = \
164 |         tf.while_loop(cond,
165 |                       body,
166 |                       loop_vars=[initial['y'],
167 |                                  initial['state'],
168 |                                  initial['cell_state'],
169 |                                  initial['step'],
170 |                                  initial['flg']],
171 |                       shape_invariants=[tf.TensorShape([None, None]),
172 |                                         tf.TensorShape([None, None]),
173 |                                         tf.TensorShape([None, None]),
174 |                                         initial['step'].get_shape(),
175 |                                         tf.TensorShape([None])])
176 | 
177 |     test_X_ = pad_sequences(test_X, value=pad_value)
178 |     init_y = np.zeros_like(test_X, dtype='int32')[:, np.newaxis]
179 |     state, cell_state = \
180 |         sess.run([encoder_output, encoder_cell], feed_dict={
181 |             x: test_X_
182 |         })
183 |     init_state = state[:, -1, :]
184 |     init_cell_state = cell_state[:, -1, :]
185 | 
186 |     preds, _, _, _, _ = sess.run(generator, feed_dict={
187 |         initial['y']: init_y,
188 |         initial['state']: init_state,
189 |         initial['cell_state']: init_cell_state
190 |     })
191 | 
192 |     for n in range(len(test_X)):
193 |         data = test_X[n][1:-1]
194 |         target = test_y[n][1:-1]
195 |         pred = list(preds[n])[1:]
196 |         pred.append(end_char)
197 | 
198 |         print('-' * 20)
199 |         print('Original sentence:',
200 |               ' '.join([i2w_X[i] for i in data]))
201 |         print('True sentence:',
202 |               ' '.join([i2w_y[i] for i in target]))
203 |         print('Generated sentence:',
204 |               ' '.join([i2w_y[i] for i in pred[:pred.index(end_char)]]))
205 | 


--------------------------------------------------------------------------------
/examples/attention_translation_enja.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from sklearn.model_selection import train_test_split
  4 | from tftf.layers import Dense, Activation
  5 | from tftf.layers import LSTM, Embedding, TimeDistributedDense, Attention
  6 | from tftf.preprocessing.sequence import pad_sequences, sort
  7 | from tftf.datasets import load_small_parallel_enja
  8 | from tftf import losses as loss
  9 | from tftf import optimizers as opt
 10 | 
 11 | if __name__ == '__main__':
 12 |     np.random.seed(0)
 13 |     tf.set_random_seed(123)
 14 | 
 15 |     '''
 16 |     Load data
 17 |     '''
 18 |     end_char = 2
 19 |     (train_X, train_y), (test_X, test_y), (num_X, num_y), \
 20 |         (w2i_X, w2i_y), (i2w_X, i2w_y) = load_small_parallel_enja(to_ja=True)
 21 | 
 22 |     train_X, train_y = sort(train_X, train_y)
 23 |     test_X, test_y = sort(test_X, test_y)
 24 | 
 25 |     train_size = 50000  # up to 50000
 26 |     test_size = 500     # up to 500
 27 |     train_X, train_y = train_X[:train_size], train_y[:train_size]
 28 |     test_X, test_y = test_X[:test_size], test_y[:test_size]
 29 | 
 30 |     '''
 31 |     Build model
 32 |     '''
 33 |     pad_value = 0
 34 |     x = tf.placeholder(tf.int32, [None, None], name='x')
 35 |     t = tf.placeholder(tf.int32, [None, None], name='t')
 36 |     target = tf.one_hot(t[:, 1:], depth=num_y, dtype=tf.float32)
 37 |     mask_enc = tf.cast(tf.not_equal(x, pad_value), tf.float32)
 38 |     mask_dec = tf.cast(tf.not_equal(t[:, 1:], pad_value), tf.float32)
 39 |     mask_attn = \
 40 |         tf.where(condition=tf.equal(x, pad_value),
 41 |                  x=tf.ones_like(x, dtype=tf.float32) * np.float32(-1e+10),
 42 |                  y=tf.ones_like(x, dtype=tf.float32))
 43 | 
 44 |     encoder = [
 45 |         Embedding(128, input_dim=num_X),
 46 |         LSTM(128, return_sequence=True, return_cell=True)
 47 |     ]
 48 | 
 49 |     h = x
 50 |     for layer in encoder:
 51 |         h = layer(h, mask=mask_enc)
 52 |     encoder_output, encoder_cell = h
 53 | 
 54 |     decoder = [
 55 |         [
 56 |             Embedding(128, input_dim=num_y),
 57 |             LSTM(128, return_sequence=True, return_cell=True,
 58 |                  initial_state=encoder_output[:, -1, :],
 59 |                  cell_state=encoder_cell[:, -1, :])
 60 |         ],
 61 |         [
 62 |             Attention(128, input_dim=(128, 128), state=encoder_output),
 63 |             TimeDistributedDense(num_y),
 64 |             Activation('softmax')
 65 |         ]
 66 |     ]
 67 | 
 68 |     h = t[:, :-1]
 69 |     for layer in decoder[0]:
 70 |         h = layer(h)
 71 |     decoder_output, _ = h
 72 | 
 73 |     output = decoder_output
 74 |     for layer in decoder[1]:
 75 |         output = layer(output, mask=mask_attn)
 76 | 
 77 |     cost = \
 78 |         loss.categorical_crossentropy(output,
 79 |                                       target
 80 |                                       * tf.transpose(mask_dec[:, np.newaxis],
 81 |                                                      perm=[0, 2, 1]))
 82 |     train_step = opt.adam().minimize(cost)
 83 | 
 84 |     '''
 85 |     Train model
 86 |     '''
 87 |     epochs = 25
 88 |     batch_size = 50
 89 | 
 90 |     init = tf.global_variables_initializer()
 91 |     sess = tf.Session()
 92 |     sess.run(init)
 93 | 
 94 |     n_batches = len(train_X) // batch_size
 95 | 
 96 |     for epoch in range(epochs):
 97 |         loss = 0.
 98 |         for i in range(n_batches):
 99 |             start = i * batch_size
100 |             end = start + batch_size
101 | 
102 |             _train_X = pad_sequences(train_X[start:end],
103 |                                      value=pad_value)
104 |             _train_y = pad_sequences(train_y[start:end],
105 |                                      value=pad_value)
106 | 
107 |             _, _cost = sess.run([train_step, cost], feed_dict={
108 |                 x: _train_X,
109 |                 t: _train_y
110 |             })
111 |             loss += _cost
112 | 
113 |         loss /= n_batches
114 | 
115 |         _test_X = pad_sequences(test_X, value=pad_value)
116 |         _test_y = pad_sequences(test_y, value=pad_value)
117 | 
118 |         val_loss = cost.eval(session=sess, feed_dict={
119 |             x: _test_X,
120 |             t: _test_y
121 |         })
122 | 
123 |         print('epoch: {}, '
124 |               'loss: {:.3}, '
125 |               'val_loss: {:.3}'.format(epoch+1, loss, val_loss))
126 | 
127 |     '''
128 |     Generate sentences
129 |     '''
130 |     initial = {
131 |         'y': tf.placeholder(tf.int32, [None, None]),
132 |         'state': tf.placeholder(tf.float32, [None, None]),
133 |         'cell_state': tf.placeholder(tf.float32, [None, None]),
134 |         'attn': tf.zeros_like(encoder_output[:, :, :1], dtype=tf.float32),
135 |         'step': tf.constant(0)
136 |     }
137 |     initial['flg'] = tf.cast(tf.zeros_like(initial['y'][:, 0]), dtype=tf.bool)
138 |     maxlen = 100
139 | 
140 |     def cond(y, state, cell_state, attn, step, flg):
141 |         n_flg = tf.reduce_sum(tf.cast(flg, tf.int32))
142 |         next = \
143 |             tf.not_equal(n_flg,
144 |                          tf.reduce_sum(tf.ones_like(initial['flg'],
145 |                                                     dtype=tf.int32)))
146 |         return tf.logical_and(step+1 < maxlen, next)
147 | 
148 |     def body(y, state, cell_state, attn, step, flg):
149 |         h = y[:, -1]
150 |         for layer in decoder[0]:
151 |             h = layer(h,
152 |                       recurrent=False,
153 |                       initial_state=state,
154 |                       cell_state=cell_state)
155 |         decoder_output, decoder_cell = h
156 | 
157 |         output = decoder_output
158 |         for layer in decoder[1]:
159 |             output = layer(output, recurrent=False)
160 |         output = tf.cast(tf.argmax(output, axis=1), tf.int32)
161 |         y = tf.concat([y, output[:, np.newaxis]], axis=1)
162 |         a = decoder[1][0].attn
163 |         attn = tf.concat([attn, a[:, :, np.newaxis]], axis=2)
164 |         flg = tf.logical_or(flg, tf.equal(output, end_char))
165 | 
166 |         return [y,
167 |                 decoder_output,
168 |                 decoder_cell,
169 |                 attn,
170 |                 step+1,
171 |                 flg]
172 |     generator = \
173 |         tf.while_loop(cond,
174 |                       body,
175 |                       loop_vars=[initial['y'],
176 |                                  initial['state'],
177 |                                  initial['cell_state'],
178 |                                  initial['attn'],
179 |                                  initial['step'],
180 |                                  initial['flg']],
181 |                       shape_invariants=[tf.TensorShape([None, None]),
182 |                                         tf.TensorShape([None, None]),
183 |                                         tf.TensorShape([None, None]),
184 |                                         tf.TensorShape([None, None, None]),
185 |                                         initial['step'].get_shape(),
186 |                                         tf.TensorShape([None])])
187 | 
188 |     test_X_ = pad_sequences(test_X, value=pad_value)
189 |     init_y = np.zeros_like(test_X, dtype='int32')[:, np.newaxis]
190 |     state, cell_state, mask = \
191 |         sess.run([encoder_output, encoder_cell, mask_attn], feed_dict={
192 |             x: test_X_
193 |         })
194 |     init_state = state[:, -1, :]
195 |     init_cell_state = cell_state[:, -1, :]
196 | 
197 |     preds, _, _, _, _, _ = sess.run(generator, feed_dict={
198 |         decoder[1][0].mask: mask,
199 |         decoder[1][0].state: state,
200 |         initial['y']: init_y,
201 |         initial['state']: init_state,
202 |         initial['cell_state']: init_cell_state
203 |     })
204 | 
205 |     for n in range(len(test_X)):
206 |         data = test_X[n][1:-1]
207 |         target = test_y[n][1:-1]
208 |         pred = list(preds[n])[1:]
209 |         pred.append(end_char)
210 | 
211 |         print('-' * 20)
212 |         print('Original sentence:',
213 |               ' '.join([i2w_X[i] for i in data]))
214 |         print('True sentence:',
215 |               ' '.join([i2w_y[i] for i in target]))
216 |         print('Generated sentence:',
217 |               ' '.join([i2w_y[i] for i in pred[:pred.index(end_char)]]))
218 | 


--------------------------------------------------------------------------------
/tftf/layers/LSTM.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from .Layer import Layer
  4 | from .initializers import zeros
  5 | 
  6 | 
  7 | class LSTM(Layer):
  8 |     def __init__(self, output_dim,
  9 |                  input_dim=None,
 10 |                  initializer='glorot_uniform',
 11 |                  activation='tanh',
 12 |                  recurrent_initializer='orthogonal',
 13 |                  recurrent_activation='sigmoid',
 14 |                  length_of_sequences=None,
 15 |                  return_sequence=False,
 16 |                  return_cell=False,
 17 |                  initial_state=None,
 18 |                  cell_state=None,
 19 |                  rng=None):
 20 |         super().__init__()
 21 |         self.output_dim = output_dim
 22 |         self.input_dim = input_dim
 23 | 
 24 |         self.initializer = initializer
 25 |         self.recurrent_initializer = recurrent_initializer
 26 |         self.activation = \
 27 |             self.activation_initializer(activation)
 28 |         self.recurrent_activation = \
 29 |             self.activation_initializer(recurrent_activation)
 30 |         self._length_of_sequences = length_of_sequences
 31 |         self._return_sequence = return_sequence
 32 |         self._return_cell = return_cell
 33 |         self._initial_state = initial_state
 34 |         self._cell_state = cell_state
 35 |         self._use_mask = False
 36 | 
 37 |     @property
 38 |     def input_shape(self):
 39 |         return (self._length_of_sequences, self.input_dim)
 40 | 
 41 |     def compile(self):
 42 |         input_dim = self.input_dim
 43 |         output_dim = self.output_dim
 44 |         initializer = self.initializer
 45 |         recurrent_initializer = self.recurrent_initializer
 46 | 
 47 |         self.W_c = \
 48 |             self.kernel_initializer(initializer,
 49 |                                     shape=(input_dim, output_dim),
 50 |                                     name='W_c')
 51 |         self.W_i = \
 52 |             self.kernel_initializer(initializer,
 53 |                                     shape=(input_dim, output_dim),
 54 |                                     name='W_i')
 55 |         self.W_f = \
 56 |             self.kernel_initializer(initializer,
 57 |                                     shape=(input_dim, output_dim),
 58 |                                     name='W_f')
 59 |         self.W_o = \
 60 |             self.kernel_initializer(initializer,
 61 |                                     shape=(input_dim, output_dim),
 62 |                                     name='W_o')
 63 |         self.W_recurrent_c = \
 64 |             self.kernel_initializer(recurrent_initializer,
 65 |                                     shape=(output_dim, output_dim),
 66 |                                     name='W_recurrent_c')
 67 |         self.W_recurrent_i = \
 68 |             self.kernel_initializer(recurrent_initializer,
 69 |                                     shape=(output_dim, output_dim),
 70 |                                     name='W_recurrent_i')
 71 |         self.W_recurrent_f = \
 72 |             self.kernel_initializer(recurrent_initializer,
 73 |                                     shape=(output_dim, output_dim),
 74 |                                     name='W_recurrent_f')
 75 |         self.W_recurrent_o = \
 76 |             self.kernel_initializer(recurrent_initializer,
 77 |                                     shape=(output_dim, output_dim),
 78 |                                     name='W_recurrent_o')
 79 | 
 80 |         self.b_c = zeros((output_dim), name='b_c')
 81 |         self.b_i = zeros((output_dim), name='b_i')
 82 |         self.b_f = zeros((output_dim), name='b_f')
 83 |         self.b_o = zeros((output_dim), name='b_o')
 84 | 
 85 |         self.params = [self.W_c, self.W_i, self.W_f, self.W_o,
 86 |                        self.W_recurrent_c, self.W_recurrent_i,
 87 |                        self.W_recurrent_f, self.W_recurrent_o,
 88 |                        self.b_c, self.b_i, self.b_f, self.b_o]
 89 | 
 90 |     def forward(self, x, **kwargs):
 91 |         '''
 92 |         # Arguments
 93 |             mask: Tensor. Mask for padded value.
 94 |             recurrent: boolean (default True).
 95 |                        Whether to loop the input sequence.
 96 |             initial_state: (default None). Override self._initial_state.
 97 |             cell_state: (default None). Override self._cell_state.
 98 |         '''
 99 |         def _recurrent(state, elems):
100 |             if not self._use_mask:
101 |                 x = elems
102 |             else:
103 |                 x = elems[0]
104 |                 mask = elems[1]
105 | 
106 |             a = activation(tf.matmul(x, self.W_c)
107 |                            + tf.matmul(state[0], self.W_recurrent_c)
108 |                            + self.b_c)
109 |             i = recurrent_activation(tf.matmul(x, self.W_i)
110 |                                      + tf.matmul(state[0], self.W_recurrent_i)
111 |                                      + self.b_i)
112 |             f = recurrent_activation(tf.matmul(x, self.W_f)
113 |                                      + tf.matmul(state[0], self.W_recurrent_f)
114 |                                      + self.b_f)
115 |             o = recurrent_activation(tf.matmul(x, self.W_o)
116 |                                      + tf.matmul(state[0], self.W_recurrent_o)
117 |                                      + self.b_o)
118 | 
119 |             cell = i * a + f * state[1]
120 |             h = o * activation(cell)
121 | 
122 |             if not self._use_mask:
123 |                 return [h, cell]
124 |             else:
125 |                 mask = mask[:, np.newaxis]
126 |                 cell = mask * cell + (1 - mask) * state[1]
127 |                 h = mask * h + (1 - mask) * state[0]
128 |                 return [h, cell]
129 | 
130 |         activation = self.activation
131 |         recurrent_activation = self.recurrent_activation
132 | 
133 |         mask = kwargs['mask'] if 'mask' in kwargs else None
134 |         self._use_mask = True if mask is not None else False
135 | 
136 |         recurr = kwargs['recurrent'] if 'recurrent' in kwargs else True
137 | 
138 |         if 'initial_state' in kwargs:
139 |             initial_state = kwargs['initial_state']
140 |         else:
141 |             initial_state = self._initial_state
142 | 
143 |         if 'cell_state' in kwargs:
144 |             cell_state = kwargs['cell_state']
145 |         else:
146 |             cell_state = self._cell_state
147 | 
148 |         if initial_state is None:
149 |             initial_state = \
150 |                 tf.matmul(x[:, 0, :],
151 |                           tf.zeros((self.input_dim, self.output_dim)))
152 | 
153 |         if cell_state is None:
154 |             cell_state = \
155 |                 tf.matmul(x[:, 0, :],
156 |                           tf.zeros((self.input_dim, self.output_dim)))
157 | 
158 |         if not recurr:
159 |             if mask is None:
160 |                 states, cell = _recurrent([initial_state, cell_state], x)
161 |             else:
162 |                 states, cell = _recurrent([initial_state, cell_state],
163 |                                           [x, mask])
164 |             if self._return_cell:
165 |                 return (states, cell)
166 |             else:
167 |                 return states
168 |         else:
169 |             if mask is None:
170 |                 states, cell = \
171 |                     tf.scan(fn=_recurrent,
172 |                             elems=tf.transpose(x, perm=[1, 0, 2]),
173 |                             initializer=[initial_state, cell_state])
174 |             else:
175 |                 mask = tf.transpose(mask)
176 |                 states, cell = \
177 |                     tf.scan(fn=_recurrent,
178 |                             elems=[tf.transpose(x,
179 |                                                 perm=[1, 0, 2]), mask],
180 |                             initializer=[initial_state, cell_state])
181 | 
182 |             if self._return_sequence:
183 |                 states = tf.transpose(states, perm=[1, 0, 2])
184 |                 cell = tf.transpose(cell, perm=[1, 0, 2])
185 |             else:
186 |                 states = states[-1]
187 |                 cell = cell[-1]
188 | 
189 |             if self._return_cell:
190 |                 return (states, cell)
191 |             else:
192 |                 return states
193 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/tftf/models/Model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | from sklearn.utils import shuffle as shuffle_data
  5 | from ..layers import Dense
  6 | from .callbacks import EarlyStopping
  7 | from .losses import *
  8 | from .metrics import *
  9 | from .optimizers import *
 10 | 
 11 | 
 12 | class Model(object):
 13 |     def __init__(self,
 14 |                  name='model',
 15 |                  reset_graph=True):
 16 |         if reset_graph:
 17 |             tf.reset_default_graph()
 18 | 
 19 |         self._name = name if name is not None else ''
 20 |         self._layers = []
 21 |         self._shapes = []
 22 |         self._sess = None
 23 |         self._restored = False
 24 | 
 25 |     def __del__(self):
 26 |         if self._sess is not None:
 27 |             self._sess.close()
 28 | 
 29 |     @property
 30 |     def layers(self):
 31 |         return self._layers
 32 | 
 33 |     def add(self, layer):
 34 |         input_dim = layer.input_dim
 35 |         output_dim = layer.output_dim
 36 | 
 37 |         if input_dim is None:
 38 |             if len(self.layers) == 0:
 39 |                 raise AttributeError('input_dim must be specified '
 40 |                                      'on first layer.')
 41 |             else:
 42 |                 layer.input_dim = self._shapes[-1][1]
 43 | 
 44 |         if output_dim is None:
 45 |             layer.initialize_output_dim()
 46 | 
 47 |         self._shapes.append(layer.shape)
 48 |         self._layers.append(layer)
 49 | 
 50 |     def compile(self, loss='mse', optimizer='rmsprop',
 51 |                 variable_input=False,
 52 |                 use_mask=False,
 53 |                 pad_value=0):
 54 |         if not self._restored:
 55 |             self._compile_layers()
 56 | 
 57 |         if not variable_input:
 58 |             input_shape = [None] + list(self.layers[0].input_shape)
 59 |         else:
 60 |             input_shape = [None] + [None] * len(self.layers[0].input_shape)
 61 |         output_shape = [None] + list(self.layers[-1].output_shape)
 62 | 
 63 |         x = self.data = \
 64 |             tf.placeholder(self.layers[0].input_dtype,
 65 |                            shape=input_shape, name='x')
 66 |         t = self.target = \
 67 |             tf.placeholder(self.layers[-1].output_dtype,
 68 |                            shape=output_shape, name='t')
 69 | 
 70 |         if use_mask:
 71 |             mask = tf.cast(tf.not_equal(x, pad_value), tf.float32)
 72 |         else:
 73 |             mask = None
 74 | 
 75 |         training = self.training = \
 76 |             tf.placeholder_with_default(False, ())
 77 | 
 78 |         y = self._y = self._predict(x, training=training, mask=mask)
 79 |         self._loss = self._compile_loss(loss, y, t)
 80 |         self._train_step = \
 81 |             self._optimize(optimizer).minimize(self._loss)
 82 | 
 83 |         if not self._restored:
 84 |             self._sess = tf.Session()
 85 |             self._init = tf.global_variables_initializer()
 86 |             self._sess.run(self._init)
 87 |         else:
 88 |             uninitialized_variables = [
 89 |                 var for var in tf.global_variables()
 90 |                 if var.name.split(':')[0].encode()
 91 |                 in set(self._sess.run(tf.report_uninitialized_variables()))
 92 |             ]
 93 |             self._sess.run(tf.variables_initializer(uninitialized_variables))
 94 | 
 95 |     def describe(self):
 96 |         layers = self.layers
 97 |         digits = int(np.log10(len(layers))) + 1
 98 |         for i, layer in enumerate(layers):
 99 |             print('#{}: {}'.format(str(i).zfill(digits), layer))
100 | 
101 |     def describe_params(self):
102 |         layers = self.layers
103 |         digits = int(np.log10(len(layers))) + 1
104 |         for i, layer in enumerate(layers):
105 |             _params = layer.params
106 |             print('-' * 48)
107 |             print('#{}: {}'.format(str(i).zfill(digits), layer))
108 |             print('-' * 48)
109 |             if len(_params) == 0:
110 |                 print('No params')
111 |             else:
112 |                 for j, param in enumerate(_params):
113 |                     print('{}: {}'.format(param.name,
114 |                                           param.get_shape()))
115 |             if i == len(layers) - 1:
116 |                 print('-' * 48)
117 | 
118 |     def eval(self, elem, feed_dict):
119 |         return self._sess.run(elem, feed_dict=feed_dict)
120 | 
121 |     def fit(self, data, target,
122 |             epochs=10, batch_size=100,
123 |             validation_data=None,
124 |             metrics=[],
125 |             preprocesses=[],
126 |             shuffle=True,
127 |             early_stopping=-1,
128 |             verbose=1):
129 | 
130 |         if len(data) != len(target):
131 |             raise AttributeError('Length of X and y does not match.')
132 | 
133 |         es = None
134 |         if early_stopping > -1:
135 |             if validation_data is None:
136 |                 raise AttributeError('early_stopping needs validation_data.')
137 |             es = EarlyStopping(patience=early_stopping, verbose=verbose)
138 | 
139 |         n_data = len(data)
140 |         if n_data < batch_size:
141 |             batch_size = n_data
142 |         n_batches = n_data // batch_size
143 | 
144 |         for epoch in range(epochs):
145 |             results = [['loss', 0.]]
146 | 
147 |             if shuffle:
148 |                 indices = shuffle_data(np.arange(n_data))
149 |                 _data = data[indices]
150 |                 _target = target[indices]
151 |             else:
152 |                 _data = data
153 |                 _target = target
154 | 
155 |             for i in range(n_batches):
156 |                 _start = i * batch_size
157 |                 _end = _start + batch_size
158 | 
159 |                 _batch_data = _data[_start:_end]
160 |                 _batch_target = _target[_start:_end]
161 | 
162 |                 for _preprocess in preprocesses:
163 |                     _batch_data = _preprocess(_batch_data)
164 | 
165 |                 self.eval(self._train_step,
166 |                           feed_dict={
167 |                               self.data: _batch_data,
168 |                               self.target: _batch_target,
169 |                               self.training: True
170 |                           })
171 |                 results[0][1] += self.loss(_batch_data, _batch_target)
172 | 
173 |                 if verbose:
174 |                     for j, metric in enumerate(metrics):
175 |                         _res = self.metric(metric, _batch_data, _batch_target)
176 |                         if i == 0:
177 |                             results.append(_res)
178 |                         else:
179 |                             results[j+1][1] += _res[1]
180 | 
181 |             if validation_data is not None:
182 |                 val_data = validation_data[0]
183 |                 for _preprocess in preprocesses:
184 |                     val_data = _preprocess(val_data)
185 |                 val_target = validation_data[1]
186 |                 val_loss = self.loss(val_data, val_target)
187 | 
188 |             if verbose:
189 |                 def _format(results):
190 |                     return ', '.join(map(lambda tup:
191 |                                      '{}: {:.3}'.format(tup[0], tup[1]),
192 |                                          results))
193 | 
194 |                 out = 'epoch: {}, '.format(epoch + 1)
195 |                 for i, res in enumerate(results):
196 |                     results[i][1] /= n_batches
197 | 
198 |                 out += _format(results)
199 | 
200 |                 if validation_data is not None:
201 |                     out += ', '
202 |                     results = [('val_loss', val_loss)]
203 |                     for metric in metrics:
204 |                         results.append(self.metric(metric,
205 |                                                    val_data,
206 |                                                    val_target,
207 |                                                    validation=True))
208 |                     out += _format(results)
209 |                 print(out)
210 |             if es is not None:
211 |                 if es.on_epoch_end(epoch, val_loss):
212 |                     break
213 | 
214 |     def predict(self, data):
215 |         ret = self.eval(self._y,
216 |                         feed_dict={
217 |                             self.data: data
218 |                         })
219 |         return ret
220 | 
221 |     def loss(self, data, target):
222 |         loss = self.eval(self._loss,
223 |                          feed_dict={
224 |                             self.data: data,
225 |                             self.target: target
226 |                          })
227 |         return loss
228 | 
229 |     def metric(self, metric, data, target, validation=False):
230 |         metrics = {
231 |             'accuracy': ('acc', self.accuracy),
232 |             'f1': ('f1', self.f1),
233 |             'precision': ('pre', self.precision),
234 |             'recall': ('rec', self.recall)
235 |         }
236 | 
237 |         if metric in metrics:
238 |             name = metrics[metric][0]
239 |             score = metrics[metric][1](data, target)
240 |         else:
241 |             name = 'custom'
242 |             score = metric(data, target)
243 | 
244 |         if validation:
245 |             name = 'val_' + name
246 | 
247 |         if not validation:
248 |             return [name, score]
249 |         else:
250 |             return (name, score)
251 | 
252 |     def accuracy(self, data, target):
253 |         return accuracy(self.predict(data), target)
254 | 
255 |     def f1(self, data, target):
256 |         return f1(self.predict(data), target)
257 | 
258 |     def precision(self, data, target):
259 |         return precision(self.predict(data), target)
260 | 
261 |     def recall(self, data, target):
262 |         return recall(self.predict(data), target)
263 | 
264 |     def restore(self, model_path):
265 |         if self._sess is not None:
266 |             raise AttributeError('Session alrady initialized. '
267 |                                  'Model variables must be restored '
268 |                                  'before compile.')
269 |         self._compile_layers()
270 |         self._sess = tf.Session()
271 |         saver = tf.train.Saver()
272 |         saver.restore(self._sess, model_path)
273 |         self._restored = True
274 | 
275 |     def save(self, out_path, verbose=1):
276 |         out_dir = out_path.split('/')[:-1]
277 |         if len(out_dir) > 0:
278 |             os.makedirs(os.path.join(*out_dir), exist_ok=True)
279 |         saver = tf.train.Saver()
280 |         saver.save(self._sess, out_path)
281 | 
282 |         if verbose:
283 |             print('Model saved to: \'{}\''.format(out_path))
284 | 
285 |     def _compile_layers(self):
286 |         with tf.variable_scope(self._name):
287 |             for layer in self._layers:
288 |                 layer.compile()
289 | 
290 |     def _compile_loss(self, loss, data, target):
291 |         losses = {
292 |             'binary_crossentropy': binary_crossentropy,
293 |             'categorical_crossentropy': categorical_crossentropy,
294 |             'mean_squared_error': mean_squared_error,
295 |             'mse': mean_squared_error
296 |         }
297 | 
298 |         if loss in losses:
299 |             cost = losses[loss](data, target)
300 |         else:
301 |             cost = loss(data, target)
302 | 
303 |         for layer in self._layers:
304 |             cost += tf.reduce_sum(layer.reg_loss)
305 | 
306 |         return cost
307 | 
308 |     def _predict(self, x, **kwargs):
309 |         output = x
310 |         for layer in self.layers:
311 |             output = layer.forward(output, **kwargs)
312 | 
313 |         return output
314 | 
315 |     def _optimize(self, optimizer):
316 |         optimizers = {
317 |             'adadelta': adadelta,
318 |             'adagrad': adagrad,
319 |             'adam': adam,
320 |             'momentum': momentum,
321 |             'rmsprop': rmsprop,
322 |             'sgd': sgd
323 |         }
324 | 
325 |         if optimizer in optimizers:
326 |             return optimizers[optimizer]()
327 |         else:
328 |             return optimizer()
329 | 


--------------------------------------------------------------------------------
/tftf/layers/modules/Transformer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from . import Module
  4 | from .. import Embedding, PositionalEncoding
  5 | from .. import LayerNormalization
  6 | from .. import TimeDistributedDense as Dense
  7 | from .. import Activation, Dropout
  8 | from ...losses import categorical_crossentropy
  9 | from ...optimizers import adam
 10 | 
 11 | 
 12 | class Transformer(Module):
 13 |     '''
 14 |     Implementation of Transformer model from
 15 |     "Attention Is All You Need",
 16 |     Ashish Vaswani et al.
 17 |     https://arxiv.org/abs/1706.03762
 18 |     '''
 19 |     def __init__(self,
 20 |                  len_src_vocab,
 21 |                  len_target_vocab,
 22 |                  d_model=512,
 23 |                  d_ff=2048,
 24 |                  N=6,
 25 |                  h=8,
 26 |                  pad_value=0,
 27 |                  p_dropout=0.1,
 28 |                  label_smooth=0.1,
 29 |                  maxlen=6000,
 30 |                  warmup_steps=4000):
 31 | 
 32 |         assert label_smooth >= 0.
 33 | 
 34 |         self.len_src_vocab = len_src_vocab
 35 |         self.len_target_vocab = len_target_vocab
 36 |         self.d_model = d_model
 37 |         self.d_ff = d_ff
 38 |         self.N = N
 39 |         self.h = h
 40 |         self.pad_value = pad_value
 41 |         self.p_dropout = p_dropout
 42 |         self.label_smooth = label_smooth
 43 |         self.maxlen = maxlen
 44 |         self.warmup_steps = warmup_steps
 45 |         self.is_training = tf.placeholder_with_default(False, ())
 46 | 
 47 |     '''
 48 |     Model Architecture
 49 |     '''
 50 |     def v1(self, x, t, **kwargs):
 51 |         mask_src = self._pad_mask(x)
 52 |         encoder = self.encoder = \
 53 |             Encoder(len_src_vocab=self.len_src_vocab,
 54 |                     N=self.N,
 55 |                     d_model=self.d_model,
 56 |                     d_ff=self.d_ff,
 57 |                     h=self.h,
 58 |                     p_dropout=self.p_dropout,
 59 |                     maxlen=self.maxlen)
 60 | 
 61 |         x = self.encode(x, mask=mask_src)
 62 | 
 63 |         mask_tgt = self._pad_subsequent_mask(t)
 64 |         decoder = self.decoder = \
 65 |             Decoder(len_target_vocab=self.len_target_vocab,
 66 |                     N=self.N,
 67 |                     d_model=self.d_model,
 68 |                     d_ff=self.d_ff,
 69 |                     h=self.h,
 70 |                     p_dropout=self.p_dropout,
 71 |                     maxlen=self.maxlen)
 72 | 
 73 |         x = self.decode(t, memory=x,
 74 |                         mask_src=mask_src, mask_tgt=mask_tgt, **kwargs)
 75 | 
 76 |         generator = self.generator = \
 77 |             Generator(len_target_vocab=self.len_target_vocab)
 78 |         x = self.generate(x, **kwargs)
 79 | 
 80 |         self.x = x
 81 |         self.t = tf.one_hot(t, depth=self.len_target_vocab, dtype=tf.float32)
 82 | 
 83 |         return x
 84 | 
 85 |     def encode(self, x, mask=None, **kwargs):
 86 |         return self.encoder(x, mask=mask,
 87 |                             training=self.is_training, **kwargs)
 88 | 
 89 |     def decode(self, x, memory, mask_src=None, mask_tgt=None, **kwargs):
 90 |         return self.decoder(x, memory=memory,
 91 |                             mask_src=mask_src, mask_tgt=mask_tgt,
 92 |                             training=self.is_training, **kwargs)
 93 | 
 94 |     def generate(self, x, **kwargs):
 95 |         return self.generator(x, training=self.is_training, **kwargs)
 96 | 
 97 |     def _pad_mask(self, x):
 98 |         mask = tf.cast(tf.not_equal(x, self.pad_value), tf.float32)
 99 |         return mask[:, np.newaxis]
100 | 
101 |     def _subsequent_mask(self, x):
102 |         size = tf.shape(x)[-1]
103 |         shape = (1, size, size)
104 |         mask = tf.matrix_band_part(tf.ones(shape), -1, 0)
105 |         return tf.cast(mask, tf.float32)
106 | 
107 |     def _pad_subsequent_mask(self, x):
108 |         mask = self._pad_mask(x)
109 |         mask = \
110 |             tf.cast(
111 |                 tf.logical_and(tf.cast(mask, tf.bool),
112 |                                tf.cast(self._subsequent_mask(x),
113 |                                        tf.bool)),
114 |                 tf.float32
115 |             )
116 |         return mask
117 | 
118 |     '''
119 |     Training
120 |     '''
121 |     def loss(self, preds=None, target=None):
122 |         if preds is None:
123 |             preds = self.x
124 |         if target is None:
125 |             target = self.t
126 | 
127 |         e = self.label_smooth
128 |         if e > 0.:
129 |             target = (1 - e) * target + e / self.len_target_vocab
130 |         return categorical_crossentropy(preds, target)
131 | 
132 |     def optimizer(self, loss=None):
133 |         if loss is None:
134 |             loss = self.loss()
135 |         lrate = tf.placeholder(tf.float32, shape=(), name='lrate')
136 |         opt = adam(lr=lrate, beta1=0.9, beta2=0.98, eps=1e-9)
137 |         return (opt.minimize(loss), lrate)
138 | 
139 |     def lrate(self, epoch=0):
140 |         '''
141 |         Learning rate for Adam in the model
142 |         '''
143 |         step = epoch + 1
144 |         return self.d_model ** (-0.5) * \
145 |             min(step ** (-0.5), step * self.warmup_steps ** (-1.5))
146 | 
147 | 
148 | class Encoder(object):
149 |     def __init__(self,
150 |                  len_src_vocab,
151 |                  N=6,
152 |                  d_model=512,
153 |                  d_ff=2048,
154 |                  h=8,
155 |                  p_dropout=0.1,
156 |                  maxlen=6000):
157 |         self.layers = [
158 |             Embedding(d_model, len_src_vocab),
159 |             PositionalEncoding(d_model, maxlen),
160 |             Dropout(p_dropout)
161 |         ]
162 |         self.sub_layers = \
163 |             [EncoderSubLayer(d_model, d_ff, h, p_dropout) for _ in range(N)]
164 | 
165 |     def __call__(self, x, mask=None, **kwargs):
166 |         return self.forward(x, mask, **kwargs)
167 | 
168 |     def forward(self, x, mask=None, **kwargs):
169 |         for l in self.layers:
170 |             x = l(x, **kwargs)
171 |         for sub_layer in self.sub_layers:
172 |             x = sub_layer(x, mask=mask, **kwargs)
173 | 
174 |         return x
175 | 
176 | 
177 | class EncoderSubLayer(object):
178 |     def __init__(self,
179 |                  d_model,
180 |                  d_ff,
181 |                  h,
182 |                  p_dropout):
183 |         self.layers = [
184 |             [MultiHeadAttention(d_model, h),
185 |              Dropout(p_dropout),
186 |              LayerNormalization()],
187 |             [FFN(d_model, d_ff),
188 |              Dropout(p_dropout),
189 |              LayerNormalization()]
190 |         ]
191 | 
192 |     def __call__(self, x, mask=None, **kwargs):
193 |         return self.forward(x, mask, **kwargs)
194 | 
195 |     def forward(self, x, mask=None, **kwargs):
196 |         # 1st sub-layer
197 |         layers = self.layers[0]
198 |         h = layers[0](query=x, key=x, value=x, mask=mask, **kwargs)
199 |         h = layers[1](h, **kwargs)
200 |         x = layers[2](h + x, **kwargs)
201 | 
202 |         # 2nd sub-layer
203 |         layers = self.layers[1]
204 |         h = layers[0](x, **kwargs)
205 |         h = layers[1](h, **kwargs)
206 |         x = layers[2](h + x, **kwargs)
207 | 
208 |         return x
209 | 
210 | 
211 | class Decoder(object):
212 |     def __init__(self,
213 |                  len_target_vocab,
214 |                  N=6,
215 |                  d_model=512,
216 |                  d_ff=2048,
217 |                  h=8,
218 |                  p_dropout=0.1,
219 |                  maxlen=6000):
220 |         self.layers = [
221 |             Embedding(d_model, len_target_vocab),
222 |             PositionalEncoding(d_model, maxlen),
223 |             Dropout(p_dropout)
224 |         ]
225 |         self.sub_layers = \
226 |             [DecoderSubLayer(d_model, d_ff, h, p_dropout) for _ in range(N)]
227 | 
228 |     def __call__(self, x, memory,
229 |                  mask_src=None, mask_tgt=None, **kwargs):
230 |         return self.forward(x, memory, mask_src, mask_tgt, **kwargs)
231 | 
232 |     def forward(self, x, memory,
233 |                 mask_src=None, mask_tgt=None, **kwargs):
234 |         for l in self.layers:
235 |             x = l(x, **kwargs)
236 |         for sub_layer in self.sub_layers:
237 |             x = sub_layer(x, memory,
238 |                           mask_src=mask_src, mask_tgt=mask_tgt, **kwargs)
239 | 
240 |         return x
241 | 
242 | 
243 | class DecoderSubLayer(object):
244 |     def __init__(self,
245 |                  d_model,
246 |                  d_ff,
247 |                  h,
248 |                  p_dropout):
249 |         self.layers = [
250 |             [MultiHeadAttention(d_model, h),
251 |              Dropout(p_dropout),
252 |              LayerNormalization()],
253 |             [MultiHeadAttention(d_model, h),
254 |              Dropout(p_dropout),
255 |              LayerNormalization()],
256 |             [FFN(d_model, d_ff),
257 |              Dropout(p_dropout),
258 |              LayerNormalization()]
259 |         ]
260 | 
261 |     def __call__(self, x, memory,
262 |                  mask_src=None, mask_tgt=None, **kwargs):
263 |         return self.forward(x, memory, mask_src, mask_tgt, **kwargs)
264 | 
265 |     def forward(self, x, memory,
266 |                 mask_src=None, mask_tgt=None, **kwargs):
267 |         # 1st sub_layer
268 |         layers = self.layers[0]
269 |         h = layers[0](query=x, key=x, value=x, mask=mask_tgt, **kwargs)
270 |         h = layers[1](h, **kwargs)
271 |         x = layers[2](h + x, **kwargs)
272 | 
273 |         # 2nd sub-layer
274 |         layers = self.layers[1]
275 |         h = layers[0](query=x, key=memory, value=memory,
276 |                       mask=mask_src, **kwargs)
277 |         h = layers[1](h, **kwargs)
278 |         x = layers[2](h + x, **kwargs)
279 | 
280 |         # 3rd sub-layer
281 |         layers = self.layers[2]
282 |         h = layers[0](x, **kwargs)
283 |         h = layers[1](h, **kwargs)
284 |         x = layers[2](h + x, **kwargs)
285 | 
286 |         return x
287 | 
288 | 
289 | class Generator(object):
290 |     def __init__(self,
291 |                  len_target_vocab):
292 |         self.layers = [
293 |             Dense(len_target_vocab),
294 |             Activation('softmax')
295 |         ]
296 | 
297 |     def __call__(self, x, **kwargs):
298 |         return self.forward(x, **kwargs)
299 | 
300 |     def forward(self, x, **kwargs):
301 |         for l in self.layers:
302 |             x = l(x, **kwargs)
303 |         return x
304 | 
305 | 
306 | class MultiHeadAttention(object):
307 |     '''
308 |     Multi-Head Attention / Masked Multi-Head Attention
309 |     '''
310 |     def __init__(self, d_model, h):
311 |         self.d_model = d_model
312 |         self.h = h
313 |         self.linears = [Dense(d_model, d_model) for _ in range(4)]
314 | 
315 |     def __call__(self, query, key, value, mask=None, **kwargs):
316 |         return self.forward(query, key, value, mask, **kwargs)
317 | 
318 |     def forward(self, query, key, value, mask=None, **kwargs):
319 |         d_k = d_v = self.d_k = self.d_v = self.d_model // self.h
320 |         n_batches = tf.shape(query)[0]
321 |         query, key, value = \
322 |             [tf.transpose(tf.reshape(l(x),
323 |                                      shape=[n_batches, -1, self.h, d_k]),
324 |                           perm=[0, 2, 1, 3])
325 |              for l, x in zip(self.linears, (query, key, value))]
326 | 
327 |         if mask is not None:
328 |             mask = mask[:, np.newaxis]  # apply to all heads
329 |         x, attn = self._attention(query, key, value, mask=mask, **kwargs)
330 |         x = tf.reshape(tf.transpose(x, perm=[0, 2, 1, 3]),
331 |                        shape=[n_batches, -1, self.h * d_k])
332 | 
333 |         return self.linears[-1](x)
334 | 
335 |     def _attention(self, query, key, value, mask=None, **kwargs):
336 |         '''
337 |         Scaled Dot-Product Attention
338 |         '''
339 |         d_k = self.d_k
340 |         score = tf.matmul(query,
341 |                           tf.transpose(key, perm=[0, 1, 3, 2])) / np.sqrt(d_k)
342 |         if mask is not None:
343 |             mask = self._to_attention_mask(mask)
344 |             score *= mask
345 | 
346 |         attn = tf.nn.softmax(score)
347 |         c = tf.matmul(attn, value)
348 | 
349 |         return c, attn
350 | 
351 |     def _to_attention_mask(self, mask):
352 |         return tf.where(condition=tf.equal(mask, 0),
353 |                         x=tf.ones_like(mask,
354 |                                        dtype=tf.float32) * np.float32(-1e+9),
355 |                         y=tf.ones_like(mask,
356 |                                        dtype=tf.float32))
357 | 
358 | 
359 | class FFN(object):
360 |     '''
361 |     Position-wise Feed-Forward Networks
362 |     '''
363 |     def __init__(self, d_model, d_ff):
364 |         self.layers = [
365 |             Dense(d_ff, d_model),
366 |             Activation('relu'),
367 |             Dense(d_model, d_ff)
368 |         ]
369 | 
370 |     def __call__(self, x, **kwargs):
371 |         return self.forward(x, **kwargs)
372 | 
373 |     def forward(self, x, **kwargs):
374 |         for l in self.layers:
375 |             x = l(x, **kwargs)
376 |         return x
377 | 


--------------------------------------------------------------------------------