├── .gitignore
├── LICENSE
├── README.md
├── avb
    ├── README.md
    └── avb.py
├── binarynet
    ├── README.md
    ├── binary_layers.py
    ├── binary_ops.py
    ├── mnist_cnn.py
    └── mnist_mlp.py
├── focal_loss
    ├── README.md
    ├── losses.py
    └── mnist_mlp.py
├── gcnn
    ├── README.md
    ├── char_generator.py
    ├── char_lm_gcnn.py
    ├── data
    │   ├── imdb_preprocess_semi.py
    │   ├── nonbreaking_prefixes
    │   │   └── nonbreaking_prefix.en
    │   ├── tinyshakespeare
    │   │   └── input.txt
    │   └── tokenizer.perl
    ├── gcnn.py
    ├── imdb_generator.py
    └── imdb_lm_gcnn.py
├── glsgan
    ├── README.md
    └── glsgan_mlp.py
├── layernorm
    ├── README.md
    ├── babi_lnlstm.py
    ├── data
    ├── imdb_cnn_lstm.py
    ├── imdb_generator.py
    ├── imdb_lm.py
    ├── layer_norm_layers.py
    ├── mnist_cnn.py
    └── mnist_mlp.py
├── lsgan
    ├── README.md
    └── lsgan_mlp.py
├── qrnn
    ├── README.md
    ├── imbd_qrnn.py
    ├── imbd_qrnn_Bidirecional.py
    └── qrnn.py
├── senet
    ├── README.md
    ├── layers.py
    └── mnist_cnn.py
├── ternarynet
    ├── README.md
    ├── imdb_generator.py
    ├── imdb_lm.py
    ├── mnist_cnn.py
    ├── mnist_mlp.py
    ├── ternary_layers.py
    └── ternary_ops.py
├── vae
    ├── README.md
    ├── img
    │   ├── i_mse.png
    │   ├── i_xent.png
    │   ├── x_mse.png
    │   ├── x_xent.png
    │   ├── z_mse.png
    │   └── z_xent.png
    └── variational_autoencoder.py
├── weightnorm
    ├── README.md
    ├── cifar10_cnn.py
    ├── gru_text_generation.py
    ├── imdb_generator.py
    ├── imdb_lm.py
    ├── mnist_cnn.py
    ├── mnist_mlp.py
    └── weight_norm_layers.py
├── wgan
    ├── README.md
    ├── wgan_cnn.py
    └── wgan_mlp.py
└── xnornet
    ├── README.md
    ├── binary_layers.py
    ├── binary_ops.py
    ├── mnist_cnn.py
    ├── mnist_mlp.py
    └── xnor_layers.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Ke Ding
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # binarynet
 2 | Binary Networks
 3 | 
 4 | # xnornet
 5 | XNOR Networks
 6 | 
 7 | # ternarynet
 8 | Ternary Networks
 9 | 
10 | # qrnn
11 | Quasi-Recurrent Nueral Networks
12 | 
13 | # vae
14 | Variational Auto-Encoder
15 | 
16 | # gcnn
17 | Gated Convolutional Nueral Networks
18 | 
19 | # weighnorm
20 | Weight Normalization
21 | 
22 | # layernorm
23 | Layer Normalization
24 | 
25 | # wgan 
26 | Wasserstein GAN
27 | 
28 | # lsgan 
29 | Least Squares GAN
30 | 
31 | # glsgan
32 | Generalized Loss Sensitive GAN
33 | 
34 | # focal_loss
35 | Focal Loss
36 | 
37 | # senet
38 | Squeeze-and-Excitation Networks
39 | 
40 | Note:
41 | By default, all keras scripts run on tensorflow backend with image_data_format 'channels_first'.
42 | 


--------------------------------------------------------------------------------
/avb/README.md:
--------------------------------------------------------------------------------
1 | Try to implement Adversarial Variational Bayes (AVB) on MNIST.
2 | 
3 | Caveat: the paper (as of v1 22/01/2017) lacks implementation details (especially for how to manipulate epsilon), I don't know if I do it in the right way. 
4 | The training just does not converge.
5 | 
6 | ---
7 | ## References
8 | * Mescheder et al. [Adversarial Variational Bayes: Unifying Variantional Autoencoders and Generative Adersarial Networks](www.arxiv.org/abs/1701.04722).
9 | 


--------------------------------------------------------------------------------
/avb/avb.py:
--------------------------------------------------------------------------------
  1 | '''This script demonstrates how to build a adversarial variational bayes with Keras.
  2 | '''
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | from scipy.stats import norm
  6 | 
  7 | from keras.layers import Input, Dense, Lambda, Activation, Concatenate, Dot
  8 | from keras.models import Model
  9 | from keras.regularizers import l2
 10 | from keras import backend as K
 11 | from keras import objectives
 12 | from keras.datasets import mnist
 13 | from keras.optimizers import *
 14 | 
 15 | np.random.seed(1111)  # for reproducibility
 16 | 
 17 | training = True 
 18 | 
 19 | batch_size = 50
 20 | n = 784 # for datapoint
 21 | m = 2 # for hidden variables
 22 | l = 5 # for random noise
 23 | hidden_dim = 256
 24 | epochs = 50
 25 | epsilon_std = 1.0
 26 | loss = 'categorical_crossentorpy' # 'mse' or 'categorical_crossentropy'
 27 | 
 28 | decay = 1e-4 # weight decay, a.k. l2 regularization
 29 | use_bias = True
 30 | 
 31 | ## Encoder
 32 | x = Input(shape=(n,))
 33 | g = Input(shape=(l,))
 34 | x_g = Concatenate(-1)([x, g]) 
 35 | h_encoded = Dense(hidden_dim, kernel_regularizer=l2(decay), bias_regularizer=l2(decay), use_bias=use_bias, activation='relu')(x_g)
 36 | z = Dense(m, activation='relu', kernel_regularizer=l2(decay), bias_regularizer=l2(decay), use_bias=use_bias)(h_encoded)
 37 | 
 38 | encoder = Model([x, g], z)
 39 | 
 40 | ## Decoder
 41 | decoder_h = Dense(hidden_dim, kernel_regularizer=l2(decay), bias_regularizer=l2(decay), use_bias=use_bias, activation='relu')
 42 | decoder_mean = Dense(n, kernel_regularizer=l2(decay), bias_regularizer=l2(decay), use_bias=use_bias, activation='sigmoid')
 43 | h_decoded = decoder_h(z)
 44 | x_hat = decoder_mean(h_decoded)
 45 | 
 46 | rec = Model([x, g], x_hat)
 47 | rec.compile(optimizer=Adam(1e-3), loss='binary_crossentropy')
 48 | 
 49 | ## Discriminator
 50 | x_h = Dense(hidden_dim, kernel_regularizer=l2(decay), bias_regularizer=l2(decay), use_bias=use_bias, activation='relu')
 51 | z_h = Dense(hidden_dim, kernel_regularizer=l2(decay), bias_regularizer=l2(decay), use_bias=use_bias, activation='relu')
 52 | 
 53 | ### gan_g
 54 | x_h.trainable = False
 55 | z_h.trainable = False
 56 | T = Dot(-1)([x_h(x), z_h(z)])
 57 | gan_g = Model([x, g], T)
 58 | gan_g.compile(optimizer=Adam(1e-4), loss=lambda y_true, y_predict: -K.mean(y_predict, -1))
 59 | 
 60 | ### gan_d
 61 | x_h.trainable = True 
 62 | z_h.trainable = True
 63 | x = Input(shape=(n,))
 64 | z = Input(shape=(m,))
 65 | fake = Dot(-1)([x_h(x), z_h(z)])
 66 | fake = Activation('sigmoid')(fake)
 67 | gan_d = Model([x, z], fake)
 68 | gan_d.compile(optimizer=Adam(1e-4), loss='binary_crossentropy')
 69 | 
 70 | if training:
 71 |     # train the VAE on MNIST digits
 72 |     (x_train, y_train), (x_test, y_test) = mnist.load_data()
 73 | 
 74 |     x_train = x_train.astype('float32') / 255
 75 |     x_test = x_test.astype('float32') / 255
 76 |     x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
 77 |     x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
 78 | 
 79 |     ids = range(len(x_train))
 80 |     for i in range(epochs):
 81 |         print('Epoch {}:'.format(i + 1))
 82 |         rec_loss = [] 
 83 |         gen_loss = [] 
 84 |         dis_loss = [] 
 85 |         for s in range(0, len(ids), batch_size):
 86 |             x = x_train[ids[s:s+batch_size]]
 87 |             y = y_train[ids[s:s+batch_size]]
 88 |             bs = len(x)
 89 | 
 90 |             eps = np.random.randn(bs, l)
 91 |             z = np.random.randn(bs, m)
 92 | 
 93 |             # reconstruction 
 94 |             loss = rec.train_on_batch([x, eps], x)
 95 |             rec_loss.append(loss)
 96 | 
 97 |             # encoder
 98 |             loss = gan_g.train_on_batch([x, eps], [1] * bs)
 99 |             gen_loss.append(loss)
100 | 
101 |             # discriminator
102 |             z_fake = encoder.predict([x, eps])
103 |             x = np.concatenate([x, x], axis=0)
104 |             z = np.concatenate([z, z_fake], axis=0)
105 |             y = np.asarray([1] * bs + [0] * bs, dtype='float32')
106 | 
107 |             loss = gan_d.train_on_batch([x, z], y)
108 |             dis_loss.append(loss)
109 | 
110 |             if s % 1000 == 0:
111 |                 print('rec loss: {}, gen loss: {}, dis loss: {}'.\
112 |                        format(np.mean(rec_loss), np.mean(gen_loss), np.mean(dis_loss)))
113 | 
114 |         print('rec loss: {}, gen loss: {}, dis loss: {}'.\
115 |                format(np.mean(rec_loss), np.mean(gen_loss), np.mean(dis_loss)))
116 |         rec.save_weights('weights_{}.h5'.format(i))
117 | 
118 | # display a 2D manifold of the digits
119 | n = 15  # figure with 15x15 digits
120 | digit_size = 28
121 | figure = np.zeros((digit_size * n, digit_size * n))
122 | # linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian
123 | # to produce values of the latent variables z, since the prior of the latent space is Gaussian
124 | grid_x = norm.ppf(np.linspace(0.05, 0.95, n))
125 | grid_y = norm.ppf(np.linspace(0.05, 0.95, n))
126 | 
127 | z = Input(shape=(m,))
128 | h_decoded = decoder_h(z)
129 | x_hat = decoder_mean(h_decoded)
130 | decoder = Model(z, x_hat)
131 | decoder.load_weights('weights_8.h5', by_name=True)
132 | 
133 | for i, yi in enumerate(grid_x):
134 |     for j, xi in enumerate(grid_y):
135 |         z_sample = np.array([[xi, yi]])
136 |         x_decoded = decoder.predict(z_sample)
137 |         digit = x_decoded[0].reshape(digit_size, digit_size)
138 |         figure[i * digit_size: (i + 1) * digit_size,
139 |                j * digit_size: (j + 1) * digit_size] = digit
140 | 
141 | fig = plt.figure(figsize=(10, 10))
142 | plt.imshow(figure, cmap='Greys_r')
143 | plt.show()
144 | fig.savefig('x.png')
145 | 


--------------------------------------------------------------------------------
/binarynet/README.md:
--------------------------------------------------------------------------------
 1 | An implemtation of binaryNet for Keras.
 2 | 
 3 | The binarized Dense and Conv2D are two keras layers, thus can be integrated into keras framework out of box.
 4 | 
 5 | ## To run the demo:
 6 | ### train a binary MLP model on MNIST
 7 | python mnist_mlp.py 
 8 | ### train a binary CNN model on MNIST
 9 | python mnist_cnn.py 
10 | 
11 | The code is according to the [theano version](https://github.com/MatthieuCourbariaux/BinaryNet).
12 | The only missing ingredient is that the learning rate is not scaled w.r.t. weight' fan-in & fan-out. 
13 | (An involved [patch](https://github.com/fchollet/keras/pull/3004) is needed.)
14 | 
15 | ## Reference
16 | * Courbariaux et al. [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1](http://arxiv.org/abs/1602.02830).
17 | 


--------------------------------------------------------------------------------
/binarynet/binary_layers.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import numpy as np
  3 | 
  4 | from keras import backend as K
  5 | 
  6 | from keras.layers import InputSpec, Layer, Dense, Conv2D
  7 | from keras import constraints
  8 | from keras import initializers
  9 | 
 10 | from binary_ops import binarize
 11 | 
 12 | 
 13 | class Clip(constraints.Constraint):
 14 |     def __init__(self, min_value, max_value=None):
 15 |         self.min_value = min_value
 16 |         self.max_value = max_value
 17 |         if not self.max_value:
 18 |             self.max_value = -self.min_value
 19 |         if self.min_value > self.max_value:
 20 |             self.min_value, self.max_value = self.max_value, self.min_value
 21 | 
 22 |     def __call__(self, p):
 23 |         return K.clip(p, self.min_value, self.max_value)
 24 | 
 25 |     def get_config(self):
 26 |         return {"min_value": self.min_value,
 27 |                 "max_value": self.max_value}
 28 | 
 29 | 
 30 | class BinaryDense(Dense):
 31 |     ''' Binarized Dense layer
 32 |     References: 
 33 |     "BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830]
 34 |     '''
 35 |     def __init__(self, units, H=1., kernel_lr_multiplier='Glorot', bias_lr_multiplier=None, **kwargs):
 36 |         super(BinaryDense, self).__init__(units, **kwargs)
 37 |         self.H = H
 38 |         self.kernel_lr_multiplier = kernel_lr_multiplier
 39 |         self.bias_lr_multiplier = bias_lr_multiplier
 40 |         
 41 |         super(BinaryDense, self).__init__(units, **kwargs)
 42 |     
 43 |     def build(self, input_shape):
 44 |         assert len(input_shape) >= 2
 45 |         input_dim = input_shape[1]
 46 | 
 47 |         if self.H == 'Glorot':
 48 |             self.H = np.float32(np.sqrt(1.5 / (input_dim + self.units)))
 49 |             #print('Glorot H: {}'.format(self.H))
 50 |         if self.kernel_lr_multiplier == 'Glorot':
 51 |             self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5 / (input_dim + self.units)))
 52 |             #print('Glorot learning rate multiplier: {}'.format(self.kernel_lr_multiplier))
 53 |             
 54 |         self.kernel_constraint = Clip(-self.H, self.H)
 55 |         self.kernel_initializer = initializers.RandomUniform(-self.H, self.H)
 56 |         self.kernel = self.add_weight(shape=(input_dim, self.units),
 57 |                                      initializer=self.kernel_initializer,
 58 |                                      name='kernel',
 59 |                                      regularizer=self.kernel_regularizer,
 60 |                                      constraint=self.kernel_constraint)
 61 | 
 62 |         if self.use_bias:
 63 |             self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier]
 64 |             self.bias = self.add_weight(shape=(self.output_dim,),
 65 |                                      initializer=self.bias_initializer,
 66 |                                      name='bias',
 67 |                                      regularizer=self.bias_regularizer,
 68 |                                      constraint=self.bias_constraint)
 69 |         else:
 70 |             self.lr_multipliers = [self.kernel_lr_multiplier]
 71 |             self.bias = None
 72 | 
 73 |         self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
 74 |         self.built = True
 75 | 
 76 | 
 77 |     def call(self, inputs):
 78 |         binary_kernel = binarize(self.kernel, H=self.H)
 79 |         output = K.dot(inputs, binary_kernel)
 80 |         if self.use_bias:
 81 |             output = K.bias_add(output, self.bias)
 82 |         if self.activation is not None:
 83 |             output = self.activation(output)
 84 |         return output
 85 |         
 86 |     def get_config(self):
 87 |         config = {'H': self.H,
 88 |                   'kernel_lr_multiplier': self.kernel_lr_multiplier,
 89 |                   'bias_lr_multiplier': self.bias_lr_multiplier}
 90 |         base_config = super(BinaryDense, self).get_config()
 91 |         return dict(list(base_config.items()) + list(config.items()))
 92 | 
 93 | 
 94 | class BinaryConv2D(Conv2D):
 95 |     '''Binarized Convolution2D layer
 96 |     References: 
 97 |     "BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830]
 98 |     '''
 99 |     def __init__(self, filters, kernel_lr_multiplier='Glorot', 
100 |                  bias_lr_multiplier=None, H=1., **kwargs):
101 |         super(BinaryConv2D, self).__init__(filters, **kwargs)
102 |         self.H = H
103 |         self.kernel_lr_multiplier = kernel_lr_multiplier
104 |         self.bias_lr_multiplier = bias_lr_multiplier
105 |         
106 |         
107 |     def build(self, input_shape):
108 |         if self.data_format == 'channels_first':
109 |             channel_axis = 1
110 |         else:
111 |             channel_axis = -1 
112 |         if input_shape[channel_axis] is None:
113 |                 raise ValueError('The channel dimension of the inputs '
114 |                                  'should be defined. Found `None`.')
115 | 
116 |         input_dim = input_shape[channel_axis]
117 |         kernel_shape = self.kernel_size + (input_dim, self.filters)
118 |             
119 |         base = self.kernel_size[0] * self.kernel_size[1]
120 |         if self.H == 'Glorot':
121 |             nb_input = int(input_dim * base)
122 |             nb_output = int(self.filters * base)
123 |             self.H = np.float32(np.sqrt(1.5 / (nb_input + nb_output)))
124 |             #print('Glorot H: {}'.format(self.H))
125 |             
126 |         if self.kernel_lr_multiplier == 'Glorot':
127 |             nb_input = int(input_dim * base)
128 |             nb_output = int(self.filters * base)
129 |             self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5/ (nb_input + nb_output)))
130 |             #print('Glorot learning rate multiplier: {}'.format(self.lr_multiplier))
131 | 
132 |         self.kernel_constraint = Clip(-self.H, self.H)
133 |         self.kernel_initializer = initializers.RandomUniform(-self.H, self.H)
134 |         self.kernel = self.add_weight(shape=kernel_shape,
135 |                                  initializer=self.kernel_initializer,
136 |                                  name='kernel',
137 |                                  regularizer=self.kernel_regularizer,
138 |                                  constraint=self.kernel_constraint)
139 | 
140 |         if self.use_bias:
141 |             self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier]
142 |             self.bias = self.add_weight((self.output_dim,),
143 |                                      initializer=self.bias_initializers,
144 |                                      name='bias',
145 |                                      regularizer=self.bias_regularizer,
146 |                                      constraint=self.bias_constraint)
147 | 
148 |         else:
149 |             self.lr_multipliers = [self.kernel_lr_multiplier]
150 |             self.bias = None
151 | 
152 |         # Set input spec.
153 |         self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim})
154 |         self.built = True
155 | 
156 |     def call(self, inputs):
157 |         binary_kernel = binarize(self.kernel, H=self.H) 
158 |         outputs = K.conv2d(
159 |             inputs,
160 |             binary_kernel,
161 |             strides=self.strides,
162 |             padding=self.padding,
163 |             data_format=self.data_format,
164 |             dilation_rate=self.dilation_rate)
165 | 
166 |         if self.use_bias:
167 |             outputs = K.bias_add(
168 |                 outputs,
169 |                 self.bias,
170 |                 data_format=self.data_format)
171 | 
172 |         if self.activation is not None:
173 |             return self.activation(outputs)
174 |         return outputs
175 |         
176 |     def get_config(self):
177 |         config = {'H': self.H,
178 |                   'kernel_lr_multiplier': self.kernel_lr_multiplier,
179 |                   'bias_lr_multiplier': self.bias_lr_multiplier}
180 |         base_config = super(BinaryConv2D, self).get_config()
181 |         return dict(list(base_config.items()) + list(config.items()))
182 | 
183 | 
184 | # Aliases
185 | 
186 | BinaryConvolution2D = BinaryConv2D
187 | 


--------------------------------------------------------------------------------
/binarynet/binary_ops.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | import keras.backend as K
 4 | 
 5 | 
 6 | def round_through(x):
 7 |     '''Element-wise rounding to the closest integer with full gradient propagation.
 8 |     A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182)
 9 |     '''
10 |     rounded = K.round(x)
11 |     return x + K.stop_gradient(rounded - x)
12 | 
13 | 
14 | def _hard_sigmoid(x):
15 |     '''Hard sigmoid different from the more conventional form (see definition of K.hard_sigmoid).
16 | 
17 |     # Reference:
18 |     - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830}
19 | 
20 |     '''
21 |     x = (0.5 * x) + 0.5
22 |     return K.clip(x, 0, 1)
23 | 
24 | 
25 | def binary_sigmoid(x):
26 |     '''Binary hard sigmoid for training binarized neural network.
27 | 
28 |     # Reference:
29 |     - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830}
30 | 
31 |     '''
32 |     return round_through(_hard_sigmoid(x))
33 | 
34 | 
35 | def binary_tanh(x):
36 |     '''Binary hard sigmoid for training binarized neural network.
37 |      The neurons' activations binarization function
38 |      It behaves like the sign function during forward propagation
39 |      And like:
40 |         hard_tanh(x) = 2 * _hard_sigmoid(x) - 1 
41 |         clear gradient when |x| > 1 during back propagation
42 | 
43 |     # Reference:
44 |     - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830}
45 | 
46 |     '''
47 |     return 2 * round_through(_hard_sigmoid(x)) - 1
48 | 
49 | 
50 | def binarize(W, H=1):
51 |     '''The weights' binarization function, 
52 | 
53 |     # Reference:
54 |     - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830}
55 | 
56 |     '''
57 |     # [-H, H] -> -H or H
58 |     Wb = H * binary_tanh(W / H)
59 |     return Wb
60 | 
61 | 
62 | def _mean_abs(x, axis=None, keepdims=False):
63 |     return K.stop_gradient(K.mean(K.abs(x), axis=axis, keepdims=keepdims))
64 | 
65 |     
66 | def xnorize(W, H=1., axis=None, keepdims=False):
67 |     Wb = binarize(W, H)
68 |     Wa = _mean_abs(W, axis, keepdims)
69 |     
70 |     return Wa, Wb
71 | 


--------------------------------------------------------------------------------
/binarynet/mnist_cnn.py:
--------------------------------------------------------------------------------
  1 | '''Trains a simple binarize CNN on the MNIST dataset.
  2 | Modified from keras' examples/mnist_mlp.py
  3 | Gets to 98.98% test accuracy after 20 epochs using tensorflow backend
  4 | '''
  5 | 
  6 | from __future__ import print_function
  7 | import numpy as np
  8 | np.random.seed(1337)  # for reproducibility
  9 | 
 10 | import keras.backend as K
 11 | from keras.datasets import mnist
 12 | from keras.models import Sequential
 13 | from keras.layers import Dense, Dropout, Activation, BatchNormalization, MaxPooling2D
 14 | from keras.layers import Flatten
 15 | from keras.optimizers import SGD, Adam, RMSprop
 16 | from keras.callbacks import LearningRateScheduler
 17 | from keras.utils import np_utils
 18 | 
 19 | from binary_ops import binary_tanh as binary_tanh_op
 20 | from binary_layers import BinaryDense, BinaryConv2D
 21 | 
 22 | 
 23 | def binary_tanh(x):
 24 |     return binary_tanh_op(x)
 25 | 
 26 | 
 27 | H = 1.
 28 | kernel_lr_multiplier = 'Glorot'
 29 | 
 30 | # nn
 31 | batch_size = 50
 32 | epochs = 20 
 33 | channels = 1
 34 | img_rows = 28 
 35 | img_cols = 28 
 36 | filters = 32 
 37 | kernel_size = (3, 3)
 38 | pool_size = (2, 2)
 39 | hidden_units = 128
 40 | classes = 10
 41 | use_bias = False
 42 | 
 43 | # learning rate schedule
 44 | lr_start = 1e-3
 45 | lr_end = 1e-4
 46 | lr_decay = (lr_end / lr_start)**(1. / epochs)
 47 | 
 48 | # BN
 49 | epsilon = 1e-6
 50 | momentum = 0.9
 51 | 
 52 | # dropout
 53 | p1 = 0.25
 54 | p2 = 0.5
 55 | 
 56 | # the data, shuffled and split between train and test sets
 57 | (X_train, y_train), (X_test, y_test) = mnist.load_data()
 58 | 
 59 | X_train = X_train.reshape(60000, 1, 28, 28)
 60 | X_test = X_test.reshape(10000, 1, 28, 28)
 61 | X_train = X_train.astype('float32')
 62 | X_test = X_test.astype('float32')
 63 | X_train /= 255
 64 | X_test /= 255
 65 | print(X_train.shape[0], 'train samples')
 66 | print(X_test.shape[0], 'test samples')
 67 | 
 68 | # convert class vectors to binary class matrices
 69 | Y_train = np_utils.to_categorical(y_train, classes) * 2 - 1 # -1 or 1 for hinge loss
 70 | Y_test = np_utils.to_categorical(y_test, classes) * 2 - 1
 71 | 
 72 | 
 73 | model = Sequential()
 74 | # conv1
 75 | model.add(BinaryConv2D(128, kernel_size=kernel_size, input_shape=(channels, img_rows, img_cols),
 76 |                        data_format='channels_first',
 77 |                        H=H, kernel_lr_multiplier=kernel_lr_multiplier, 
 78 |                        padding='same', use_bias=use_bias, name='conv1'))
 79 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn1'))
 80 | model.add(Activation(binary_tanh, name='act1'))
 81 | # conv2
 82 | model.add(BinaryConv2D(128, kernel_size=kernel_size, H=H, kernel_lr_multiplier=kernel_lr_multiplier, 
 83 |                        data_format='channels_first',
 84 |                        padding='same', use_bias=use_bias, name='conv2'))
 85 | model.add(MaxPooling2D(pool_size=pool_size, name='pool2', data_format='channels_first'))
 86 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn2'))
 87 | model.add(Activation(binary_tanh, name='act2'))
 88 | # conv3
 89 | model.add(BinaryConv2D(256, kernel_size=kernel_size, H=H, kernel_lr_multiplier=kernel_lr_multiplier,
 90 |                        data_format='channels_first',
 91 |                        padding='same', use_bias=use_bias, name='conv3'))
 92 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn3'))
 93 | model.add(Activation(binary_tanh, name='act3'))
 94 | # conv4
 95 | model.add(BinaryConv2D(256, kernel_size=kernel_size, H=H, kernel_lr_multiplier=kernel_lr_multiplier,
 96 |                        data_format='channels_first',
 97 |                        padding='same', use_bias=use_bias, name='conv4'))
 98 | model.add(MaxPooling2D(pool_size=pool_size, name='pool4', data_format='channels_first'))
 99 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn4'))
100 | model.add(Activation(binary_tanh, name='act4'))
101 | model.add(Flatten())
102 | # dense1
103 | model.add(BinaryDense(1024, H=H, kernel_lr_multiplier=kernel_lr_multiplier, use_bias=use_bias, name='dense5'))
104 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn5'))
105 | model.add(Activation(binary_tanh, name='act5'))
106 | # dense2
107 | model.add(BinaryDense(classes, H=H, kernel_lr_multiplier=kernel_lr_multiplier, use_bias=use_bias, name='dense6'))
108 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn6'))
109 | 
110 | opt = Adam(lr=lr_start) 
111 | model.compile(loss='squared_hinge', optimizer=opt, metrics=['acc'])
112 | model.summary()
113 | 
114 | lr_scheduler = LearningRateScheduler(lambda e: lr_start * lr_decay ** e)
115 | history = model.fit(X_train, Y_train,
116 |                     batch_size=batch_size, epochs=epochs,
117 |                     verbose=1, validation_data=(X_test, Y_test),
118 |                     callbacks=[lr_scheduler])
119 | score = model.evaluate(X_test, Y_test, verbose=0)
120 | print('Test score:', score[0])
121 | print('Test accuracy:', score[1])
122 | 


--------------------------------------------------------------------------------
/binarynet/mnist_mlp.py:
--------------------------------------------------------------------------------
  1 | '''Trains a simple binarize fully connected NN on the MNIST dataset.
  2 | Modified from keras' examples/mnist_mlp.py
  3 | Gets to 97.9% test accuracy after 20 epochs using theano backend
  4 | '''
  5 | 
  6 | 
  7 | from __future__ import print_function
  8 | import numpy as np
  9 | np.random.seed(1337)  # for reproducibility
 10 | 
 11 | import keras.backend as K
 12 | from keras.datasets import mnist
 13 | from keras.models import Sequential
 14 | from keras.layers import Dense, Dropout, Activation, BatchNormalization
 15 | from keras.optimizers import SGD, Adam, RMSprop
 16 | from keras.callbacks import LearningRateScheduler
 17 | from keras.utils import np_utils
 18 | 
 19 | from binary_ops import binary_tanh as binary_tanh_op
 20 | from binary_layers import BinaryDense, Clip
 21 | 
 22 | from keras.models import load_model
 23 | 
 24 | 
 25 | class DropoutNoScale(Dropout):
 26 |     '''Keras Dropout does scale the input in training phase, which is undesirable here.
 27 |     '''
 28 |     def call(self, inputs, training=None):
 29 |         if 0. < self.rate < 1.:
 30 |             noise_shape = self._get_noise_shape(inputs)
 31 | 
 32 |             def dropped_inputs():
 33 |                 return K.dropout(inputs, self.rate, noise_shape,
 34 |                                  seed=self.seed) * (1 - self.rate)
 35 |             return K.in_train_phase(dropped_inputs, inputs,
 36 |                                     training=training)
 37 |         return inputs
 38 | 
 39 | def binary_tanh(x):
 40 |     return binary_tanh_op(x)
 41 | 
 42 | 
 43 | batch_size = 100
 44 | epochs = 20
 45 | nb_classes = 10
 46 | 
 47 | H = 'Glorot'
 48 | kernel_lr_multiplier = 'Glorot'
 49 | 
 50 | # network
 51 | num_unit = 2048
 52 | num_hidden = 3
 53 | use_bias = False
 54 | 
 55 | # learning rate schedule
 56 | lr_start = 1e-3
 57 | lr_end = 1e-4
 58 | lr_decay = (lr_end / lr_start)**(1. / epochs)
 59 | 
 60 | # BN
 61 | epsilon = 1e-6
 62 | momentum = 0.9
 63 | 
 64 | # dropout
 65 | drop_in = 0.2
 66 | drop_hidden = 0.5
 67 | 
 68 | # the data, shuffled and split between train and test sets
 69 | (X_train, y_train), (X_test, y_test) = mnist.load_data()
 70 | 
 71 | X_train = X_train.reshape(60000, 784)
 72 | X_test = X_test.reshape(10000, 784)
 73 | X_train = X_train.astype('float32')
 74 | X_test = X_test.astype('float32')
 75 | X_train /= 255
 76 | X_test /= 255
 77 | print(X_train.shape[0], 'train samples')
 78 | print(X_test.shape[0], 'test samples')
 79 | 
 80 | # convert class vectors to binary class matrices
 81 | Y_train = np_utils.to_categorical(y_train, nb_classes) * 2 - 1 # -1 or 1 for hinge loss
 82 | Y_test = np_utils.to_categorical(y_test, nb_classes) * 2 - 1
 83 | 
 84 | model = Sequential()
 85 | model.add(DropoutNoScale(drop_in, input_shape=(784,), name='drop0'))
 86 | for i in range(num_hidden):
 87 |     model.add(BinaryDense(num_unit, H=H, kernel_lr_multiplier=kernel_lr_multiplier, use_bias=use_bias,
 88 |               name='dense{}'.format(i+1)))
 89 |     model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn{}'.format(i+1)))
 90 |     model.add(Activation(binary_tanh, name='act{}'.format(i+1)))
 91 |     model.add(DropoutNoScale(drop_hidden, name='drop{}'.format(i+1)))
 92 | model.add(BinaryDense(10, H=H, kernel_lr_multiplier=kernel_lr_multiplier, use_bias=use_bias,
 93 |           name='dense'))
 94 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn'))
 95 | 
 96 | model.summary()
 97 | 
 98 | opt = Adam(lr=lr_start) 
 99 | model.compile(loss='squared_hinge', optimizer=opt, metrics=['acc'])
100 | 
101 | # deserialized custom layers
102 | #model.save('mlp.h5')
103 | #model = load_model('mlp.h5', custom_objects={'DropoutNoScale': DropoutNoScale,
104 | #                                             'BinaryDense': BinaryDense,
105 | #                                             'Clip': Clip, 
106 | #                                             'binary_tanh': binary_tanh})
107 | 
108 | lr_scheduler = LearningRateScheduler(lambda e: lr_start * lr_decay ** e)
109 | history = model.fit(X_train, Y_train,
110 |                     batch_size=batch_size, epochs=epochs,
111 |                     verbose=1, validation_data=(X_test, Y_test),
112 |                     callbacks=[lr_scheduler])
113 | score = model.evaluate(X_test, Y_test, verbose=0)
114 | print('Test score:', score[0])
115 | print('Test accuracy:', score[1])
116 | 


--------------------------------------------------------------------------------
/focal_loss/README.md:
--------------------------------------------------------------------------------
1 | Proof-of-concept implementation of **Focal Loss**, similiar to cross entropy loss, but pus less weight on well
2 | classified samples.
3 | 
4 | ## References
5 | * Lin et al. [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002)
6 | 


--------------------------------------------------------------------------------
/focal_loss/losses.py:
--------------------------------------------------------------------------------
 1 | import keras.backend as K
 2 | 
 3 | 
 4 | def focal_loss(target, output, gamma=2):
 5 |     output /= K.sum(output, axis=-1, keepdims=True)
 6 |     eps = K.epsilon()
 7 |     output = K.clip(output, eps, 1. - eps)
 8 |     return -K.sum(K.pow(1. - output, gamma) * target * K.log(output),
 9 |                   axis=-1)
10 | 


--------------------------------------------------------------------------------
/focal_loss/mnist_mlp.py:
--------------------------------------------------------------------------------
 1 | '''Trains a simple deep NN on the MNIST dataset using **Focal Loss**.
 2 | '''
 3 | 
 4 | from __future__ import print_function
 5 | 
 6 | import keras
 7 | from keras.datasets import mnist
 8 | from keras.models import Sequential
 9 | from keras.layers import Dense, Dropout
10 | from keras.optimizers import RMSprop
11 | 
12 | from losses import focal_loss
13 | 
14 | batch_size = 128
15 | num_classes = 10
16 | epochs = 20
17 | 
18 | gamma = 5.
19 | 
20 | # the data, shuffled and split between train and test sets
21 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
22 | 
23 | x_train = x_train.reshape(60000, 784)
24 | x_test = x_test.reshape(10000, 784)
25 | x_train = x_train.astype('float32')
26 | x_test = x_test.astype('float32')
27 | x_train /= 255
28 | x_test /= 255
29 | print(x_train.shape[0], 'train samples')
30 | print(x_test.shape[0], 'test samples')
31 | 
32 | # convert class vectors to binary class matrices
33 | y_train = keras.utils.to_categorical(y_train, num_classes)
34 | y_test = keras.utils.to_categorical(y_test, num_classes)
35 | 
36 | model = Sequential()
37 | model.add(Dense(512, activation='relu', input_shape=(784,)))
38 | model.add(Dropout(0.2))
39 | model.add(Dense(512, activation='relu'))
40 | model.add(Dropout(0.2))
41 | model.add(Dense(10, activation='softmax'))
42 | 
43 | model.summary()
44 | 
45 | model.compile(loss=lambda y, y_hat: focal_loss(y, y_hat, gamma),
46 |               optimizer=RMSprop(),
47 |               metrics=['accuracy'])
48 | 
49 | history = model.fit(x_train, y_train,
50 |                     batch_size=batch_size,
51 |                     epochs=epochs,
52 |                     verbose=1,
53 |                     validation_data=(x_test, y_test))
54 | score = model.evaluate(x_test, y_test, verbose=0)
55 | print('Test loss:', score[0])
56 | print('Test accuracy:', score[1])
57 | 


--------------------------------------------------------------------------------
/gcnn/README.md:
--------------------------------------------------------------------------------
 1 | Toy Keras example using Gated Convoluational Networks for language model on IMDB datasets.
 2 | 
 3 | Note:
 4 | Only work for tensorflow backend, see [issue 1](https://github.com/DingKe/nn_playground/issues/1).
 5 | 
 6 | ## Prepare data
 7 | change current directory to  data, and follow the instuctions in imdb_preprocess_semi.py.
 8 | 
 9 | ## Run
10 | 
11 | ### Word LM
12 | python imdb_lm_gcnn.py
13 | 
14 | ### Charactor LM
15 | python char_lm_gcc.py
16 | 
17 | 
18 | ## References
19 | * Dauphin et al. [Language Modeling with Gated Convolutional Networks](https://arxiv.org/abs/1612.08083).
20 | 


--------------------------------------------------------------------------------
/gcnn/char_generator.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | modified from https://github.com/sherjilozair/char-rnn-tensorflow
 3 | '''
 4 | import codecs
 5 | import os
 6 | import collections
 7 | from six.moves import cPickle
 8 | import numpy as np
 9 | 
10 | class TextLoader(object):
11 |     def __init__(self, data_dir, batch_size, seq_length, encoding='utf-8'):
12 |         self.data_dir = data_dir
13 |         self.batch_size = batch_size
14 |         self.seq_length = seq_length
15 |         self.encoding = encoding
16 | 
17 |         input_file = os.path.join(data_dir, "input.txt")
18 |         vocab_file = os.path.join(data_dir, "vocab.pkl")
19 |         tensor_file = os.path.join(data_dir, "data.npy")
20 | 
21 |         if not (os.path.exists(vocab_file) and os.path.exists(tensor_file)):
22 |             print("reading text file")
23 |             self.preprocess(input_file, vocab_file, tensor_file)
24 |         else:
25 |             print("loading preprocessed files")
26 |             self.load_preprocessed(vocab_file, tensor_file)
27 |         self.create_batches()
28 | 
29 |     def preprocess(self, input_file, vocab_file, tensor_file):
30 |         with codecs.open(input_file, "r", encoding=self.encoding) as f:
31 |             data = f.read()
32 |         counter = collections.Counter(data)
33 |         count_pairs = sorted(counter.items(), key=lambda x: -x[1])
34 |         self.chars, _ = zip(*count_pairs)
35 |         self.vocab_size = len(self.chars)
36 |         self.vocab = dict(zip(self.chars, range(len(self.chars))))
37 |         with open(vocab_file, 'wb') as f:
38 |             cPickle.dump(self.chars, f)
39 |         self.tensor = np.array(list(map(self.vocab.get, data)))
40 |         np.save(tensor_file, self.tensor)
41 | 
42 |     def load_preprocessed(self, vocab_file, tensor_file):
43 |         with open(vocab_file, 'rb') as f:
44 |             self.chars = cPickle.load(f)
45 |         self.vocab_size = len(self.chars)
46 |         self.vocab = dict(zip(self.chars, range(len(self.chars))))
47 |         self.tensor = np.load(tensor_file)
48 |         self.num_batches = int(self.tensor.size / (self.batch_size *
49 |                                                    self.seq_length))
50 | 
51 |     def create_batches(self):
52 |         self.num_batches = int(self.tensor.size / (self.batch_size *
53 |                                                    self.seq_length))
54 | 
55 |         # When the data (tensor) is too small, let's give them a better error message
56 |         if self.num_batches==0:
57 |             assert False, "Not enough data. Make seq_length and batch_size small."
58 | 
59 |         self.tensor = self.tensor[:self.num_batches * self.batch_size * self.seq_length]
60 |         xdata = self.tensor
61 |         ydata = np.copy(self.tensor)
62 |         ydata[:-1] = xdata[1:]
63 |         ydata[-1] = xdata[0]
64 |         self.x_batches = np.split(xdata.reshape(self.batch_size, -1), self.num_batches, 1)
65 |         self.y_batches = np.split(ydata.reshape(self.batch_size, -1), self.num_batches, 1)
66 | 
67 |     def __call__(self):
68 |         while True:
69 |             for x, y in zip(self.x_batches, self.y_batches):
70 |                 yield x, np.expand_dims(y, -1)
71 | 


--------------------------------------------------------------------------------
/gcnn/char_lm_gcnn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | '''Simple RNN for Language Model
 3 | '''
 4 | from __future__ import print_function
 5 | import os
 6 | 
 7 | from keras.models import Model
 8 | from keras.layers import Input, Embedding, Dense, TimeDistributed
 9 | from keras.optimizers import *
10 | 
11 | from gcnn import GCNN
12 | from char_generator import TextLoader
13 | 
14 | 
15 | def LM(batch_size, window_size=3, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1):
16 |     x = Input(batch_shape=(batch_size, None))
17 |     # mebedding
18 |     y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x)
19 |     for i in range(nb_layers-1):
20 |         y = GCNN(hidden_dim, window_size=window_size,
21 |                  name='gcnn{}'.format(i + 1))(y)
22 |     y = GCNN(hidden_dim, window_size=window_size, 
23 |              name='gcnn{}'.format(nb_layers))(y)
24 |     y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y)
25 | 
26 |     model = Model(inputs=x, outputs=y)
27 | 
28 |     return model
29 | 
30 | 
31 | def run_demo():
32 |     batch_size = 50 
33 |     epochs = 100 
34 |     nb_layers = 3
35 | 
36 |     max_len = 50 
37 |     window_size = 5
38 |     
39 |     # Prepare data
40 |     path = './data/tinyshakespeare'
41 |     data_loader = TextLoader(path, batch_size, max_len)
42 |     vocsize = data_loader.vocab_size
43 |     print('vocsize: {}'.format(vocsize))
44 | 
45 |     # Build model
46 |     model = LM(batch_size, window_size=window_size, vocsize=vocsize, nb_layers=nb_layers)
47 |     model.compile(optimizer='adam',
48 |                   loss='sparse_categorical_crossentropy')
49 | 
50 |     
51 |     train_steps = data_loader.num_batches
52 | 
53 |     # Start training
54 |     model.summary()   
55 |     model.fit_generator(data_loader(), steps_per_epoch=train_steps,                         
56 |                         epochs=epochs, verbose=1)
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     run_demo()
61 | 


--------------------------------------------------------------------------------
/gcnn/data/imdb_preprocess_semi.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script is what created the dataset pickled.
  3 | 
  4 | 1) You need to download this file and put it in the same directory as this file.
  5 | https://github.com/moses-smt/mosesdecoder/raw/master/scripts/tokenizer/tokenizer.perl . Give it execution permission.
  6 | 
  7 | 2) Get the dataset from http://ai.stanford.edu/~amaas/data/sentiment/ and extract it in the current directory.
  8 | 
  9 | 3) Then run this script.
 10 | """
 11 | 
 12 | 
 13 | import numpy
 14 | import cPickle as pkl
 15 | 
 16 | from collections import OrderedDict
 17 | 
 18 | import glob
 19 | import os
 20 | 
 21 | from subprocess import Popen, PIPE
 22 | 
 23 | dataset_path = os.path.expandvars('$PWD/aclImdb/')
 24 | 
 25 | # tokenizer.perl is from Moses: https://github.com/moses-smt/mosesdecoder/tree/master/scripts/tokenizer
 26 | tokenizer_cmd = ['./tokenizer.perl', '-l', 'en', '-q', '-']
 27 | 
 28 | 
 29 | def tokenize(sentences):
 30 | 
 31 |     print 'Tokenizing..',
 32 |     text = "\n".join(sentences)
 33 |     tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE)
 34 |     tok_text, _ = tokenizer.communicate(text)
 35 |     toks = tok_text.split('\n')[:-1]
 36 |     print 'Done'
 37 | 
 38 |     return toks
 39 | 
 40 | 
 41 | def build_dict(path):
 42 |     sentences = []
 43 |     currdir = os.getcwd()
 44 |     os.chdir('%s/pos/' % path)
 45 |     for ff in glob.glob("*.txt"):
 46 |         with open(ff, 'r') as f:
 47 |             sentences.append(f.readline().strip())
 48 |     os.chdir('%s/neg/' % path)
 49 |     for ff in glob.glob("*.txt"):
 50 |         with open(ff, 'r') as f:
 51 |             sentences.append(f.readline().strip())
 52 |     os.chdir(currdir)
 53 | 
 54 |     sentences = tokenize(sentences)
 55 | 
 56 |     print 'Building dictionary..',
 57 |     wordcount = dict()
 58 |     for ss in sentences:
 59 |         words = ss.strip().lower().split()
 60 |         for w in words:
 61 |             if w not in wordcount:
 62 |                 wordcount[w] = 1
 63 |             else:
 64 |                 wordcount[w] += 1
 65 | 
 66 |     counts = wordcount.values()
 67 |     keys = wordcount.keys()
 68 | 
 69 |     sorted_idx = numpy.argsort(counts)[::-1]
 70 | 
 71 |     worddict = dict()
 72 | 
 73 |     for idx, ss in enumerate(sorted_idx):
 74 |         worddict[keys[ss]] = idx+2  # leave 0 and 1 (UNK)
 75 | 
 76 |     print numpy.sum(counts), ' total words ', len(keys), ' unique words'
 77 | 
 78 |     return worddict
 79 | 
 80 | 
 81 | def grab_data(path, dictionary):
 82 |     sentences = []
 83 |     currdir = os.getcwd()
 84 |     os.chdir(path)
 85 |     for ff in glob.glob("*.txt"):
 86 |         with open(ff, 'r') as f:
 87 |             sentences.append(f.readline().strip())
 88 |     os.chdir(currdir)
 89 |     sentences = tokenize(sentences)
 90 | 
 91 |     seqs = [None] * len(sentences)
 92 |     for idx, ss in enumerate(sentences):
 93 |         words = ss.strip().lower().split()
 94 |         seqs[idx] = [dictionary[w] if w in dictionary else 1 for w in words]
 95 | 
 96 |     return seqs
 97 | 
 98 | 
 99 | def main():
100 |     # Get the dataset from http://ai.stanford.edu/~amaas/data/sentiment/
101 |     path = dataset_path
102 |     dictionary = build_dict(os.path.join(path, 'train'))
103 | 
104 |     train_x_pos = grab_data(path+'train/pos', dictionary)
105 |     train_x_neg = grab_data(path+'train/neg', dictionary)
106 |     train_x_unsup = grab_data(path+'train/unsup', dictionary)
107 |     train_x = train_x_pos + train_x_neg
108 |     train_y = [1] * len(train_x_pos) + [0] * len(train_x_neg)
109 | 
110 |     test_x_pos = grab_data(path+'test/pos', dictionary)
111 |     test_x_neg = grab_data(path+'test/neg', dictionary)
112 |     test_x = test_x_pos + test_x_neg
113 |     test_y = [1] * len(test_x_pos) + [0] * len(test_x_neg)
114 | 
115 |     f = open('imdb-full.pkl', 'wb')
116 |     data = {'train_x': train_x, 'train_y': train_y,
117 |             'test_x': test_x, 'test_y': test_y,
118 |             'train_x_unsup': train_x_unsup
119 |             }
120 |     pkl.dump(data, f, True)
121 |     f.close()
122 | 
123 |     f = open('imdb.dict.pkl', 'wb')
124 |     pkl.dump(dictionary, f, True)
125 |     f.close()
126 | 
127 | if __name__ == '__main__':
128 |     main()
129 | 


--------------------------------------------------------------------------------
/gcnn/data/nonbreaking_prefixes/nonbreaking_prefix.en:
--------------------------------------------------------------------------------
  1 | #Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker.
  2 | #Special cases are included for prefixes that ONLY appear before 0-9 numbers.
  3 | 
  4 | #any single upper case letter  followed by a period is not a sentence ender (excluding I occasionally, but we leave it in)
  5 | #usually upper case letters are initials in a name
  6 | A
  7 | B
  8 | C
  9 | D
 10 | E
 11 | F
 12 | G
 13 | H
 14 | I
 15 | J
 16 | K
 17 | L
 18 | M
 19 | N
 20 | O
 21 | P
 22 | Q
 23 | R
 24 | S
 25 | T
 26 | U
 27 | V
 28 | W
 29 | X
 30 | Y
 31 | Z
 32 | 
 33 | #List of titles. These are often followed by upper-case names, but do not indicate sentence breaks
 34 | Adj
 35 | Adm
 36 | Adv
 37 | Asst
 38 | Bart
 39 | Bldg
 40 | Brig
 41 | Bros
 42 | Capt
 43 | Cmdr
 44 | Col
 45 | Comdr
 46 | Con
 47 | Corp
 48 | Cpl
 49 | DR
 50 | Dr
 51 | Drs
 52 | Ens
 53 | Gen
 54 | Gov
 55 | Hon
 56 | Hr
 57 | Hosp
 58 | Insp
 59 | Lt
 60 | MM
 61 | MR
 62 | MRS
 63 | MS
 64 | Maj
 65 | Messrs
 66 | Mlle
 67 | Mme
 68 | Mr
 69 | Mrs
 70 | Ms
 71 | Msgr
 72 | Op
 73 | Ord
 74 | Pfc
 75 | Ph
 76 | Prof
 77 | Pvt
 78 | Rep
 79 | Reps
 80 | Res
 81 | Rev
 82 | Rt
 83 | Sen
 84 | Sens
 85 | Sfc
 86 | Sgt
 87 | Sr
 88 | St
 89 | Supt
 90 | Surg
 91 | 
 92 | #misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence)
 93 | v
 94 | vs
 95 | i.e
 96 | rev
 97 | e.g
 98 | 
 99 | #Numbers only. These should only induce breaks when followed by a numeric sequence
100 | # add NUMERIC_ONLY after the word for this function
101 | #This case is mostly for the english "No." which can either be a sentence of its own, or
102 | #if followed by a number, a non-breaking prefix
103 | No #NUMERIC_ONLY# 
104 | Nos
105 | Art #NUMERIC_ONLY#
106 | Nr
107 | pp #NUMERIC_ONLY#
108 | 
109 | #month abbreviations
110 | Jan
111 | Feb
112 | Mar
113 | Apr
114 | #May is a full word
115 | Jun
116 | Jul
117 | Aug
118 | Sep
119 | Oct
120 | Nov
121 | Dec
122 | 


--------------------------------------------------------------------------------
/gcnn/gcnn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | 
  5 | from keras import backend as K
  6 | from keras import activations, initializers, regularizers, constraints
  7 | from keras.layers import Layer, InputSpec
  8 | 
  9 | from keras.utils.conv_utils import conv_output_length
 10 | 
 11 | class GCNN(Layer):
 12 |     '''Gated Convolutional Networks
 13 | 
 14 |     # Arguments
 15 | 
 16 |     # References
 17 |         - Dauphin et al. [Language Modeling with Gated Convolutional Networks](https://arxiv.org/abs/1612.08083)
 18 |     '''
 19 |     def __init__(self, output_dim, window_size=3, stride=1,
 20 |                  kernel_initializer='uniform', bias_initializer='zero',
 21 |                  activation='linear', activity_regularizer=None,
 22 |                  kernel_regularizer=None, bias_regularizer=None,
 23 |                  kernel_constraint=None, bias_constraint=None, 
 24 |                  use_bias=True, input_dim=None, input_length=None, **kwargs):
 25 |         self.output_dim = output_dim
 26 |         self.window_size = window_size
 27 |         self.strides = (stride, 1)
 28 | 
 29 |         self.use_bias = use_bias
 30 |         self.kernel_initializer = initializers.get(kernel_initializer)
 31 |         self.bias_initializer = initializers.get(bias_initializer)
 32 |         self.activation = activations.get(activation)
 33 |         self.kernel_regularizer = regularizers.get(kernel_regularizer)
 34 |         self.bias_regularizer = regularizers.get(bias_regularizer)
 35 |         self.activity_regularizer = regularizers.get(activity_regularizer)
 36 |         self.kernel_constraint = constraints.get(kernel_constraint)
 37 |         self.bias_constraint = constraints.get(bias_constraint)
 38 | 
 39 |         self.input_spec = [InputSpec(ndim=3)]
 40 |         self.input_dim = input_dim
 41 |         self.input_length = input_length
 42 |         if self.input_dim:
 43 |             kwargs['input_shape'] = (self.input_length, self.input_dim)
 44 |         super(GCNN, self).__init__(**kwargs)
 45 | 
 46 |     def build(self, input_shape):
 47 |         input_dim = input_shape[2]
 48 |         self.input_dim = input_dim
 49 |         self.input_spec = [InputSpec(shape=input_shape)]
 50 |         self.kernel_shape = (self.window_size, 1, input_dim, self.output_dim * 2)
 51 |         self.kernel = self.add_weight(self.kernel_shape,
 52 |                                       initializer=self.kernel_initializer,
 53 |                                       name='kernel',
 54 |                                       regularizer=self.kernel_regularizer,
 55 |                                       constraint=self.kernel_constraint)
 56 | 
 57 |         if self.use_bias:
 58 |             self.bias = self.add_weight((self.output_dim * 2,),
 59 |                                         initializer=self.bias_initializer,
 60 |                                         name='b',
 61 |                                         regularizer=self.bias_regularizer,
 62 |                                         constraint=self.bias_constraint)
 63 | 
 64 |         self.built = True
 65 | 
 66 |     def compute_output_shape(self, input_shape):
 67 |         length = input_shape[1]
 68 |         if length:
 69 |             length = conv_output_length(length + self.window_size - 1,
 70 |                                         self.window_size, 'valid',
 71 |                                         self.strides[0])
 72 |         return (input_shape[0], length, self.output_dim)
 73 | 
 74 |     def call(self, x):
 75 |         # input shape: (nb_samples, time (padded with zeros), input_dim)
 76 |         # note that the .build() method of subclasses MUST define
 77 |         # self.input_spec with a complete input shape.
 78 |         input_shape = self.input_spec[0].shape
 79 | 
 80 |         if self.window_size > 1:
 81 |             x = K.temporal_padding(x, (self.window_size-1, 0))
 82 |         x = K.expand_dims(x, 2)  # add a dummy dimension
 83 | 
 84 |         # z, g
 85 |         output = K.conv2d(x, self.kernel, strides=self.strides,
 86 |                           padding='valid',
 87 |                           data_format='channels_last')
 88 |         output = K.squeeze(output, 2)  # remove the dummy dimension
 89 |         if self.use_bias:
 90 |             output = K.bias_add(output, self.bias, data_format='channels_last')
 91 |         z  = output[:, :, :self.output_dim]
 92 |         g = output[:, :, self.output_dim:]
 93 | 
 94 |         return self.activation(z) * K.sigmoid(g)
 95 | 
 96 |     def get_config(self):
 97 |         config = {'output_dim': self.output_dim,
 98 |                   'window_size': self.window_size,
 99 |                   'init': self.init.get_config(),
100 |                   'stride': self.strides[0],
101 |                   'activation': activations.serialize(self.activation),
102 |                   'kernel_initializer': initializers.serialize(self.kernel_initializer),
103 |                   'bias_initializer': initializers.serialize(self.bias_initializer),
104 |                   'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
105 |                   'bias_regularizer': regularizers.serialize(self.bias_regularizer),
106 |                   'activity_regularizer': regularizers.serialize(self.activy_regularizer),
107 |                   'kernel_constraint': constraints.serialize(self.kernel_constraint),
108 |                   'bias_constraint': constraints.serialize(self.bias_constraint),
109 |                   'use_bias': self.use_bias,
110 |                   'input_dim': self.input_dim,
111 |                   'input_length': self.input_length}
112 |         base_config = super(GCNN, self).get_config()
113 |         return dict(list(base_config.items()) + list(config.items()))
114 | 


--------------------------------------------------------------------------------
/gcnn/imdb_generator.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | import os
  4 | import gzip
  5 | import numpy as np
  6 | import random
  7 | from six.moves import cPickle
  8 | 
  9 | 
 10 | def set_unk(sent, vocab_size=None):
 11 |     return [w if not vocab_size or w <= vocab_size + 1 else 1 for w in sent]
 12 | 
 13 | 
 14 | def pad(xs, ys, max_len=None):
 15 |     new_xs = []
 16 |     for x in xs:
 17 |         if max_len:
 18 |             x = x[:max_len]
 19 |         new_xs.append(x)
 20 |     xs = new_xs
 21 |     max_len = max_len if max_len else max([len(x) for x in xs])
 22 | 
 23 |     xa = np.zeros((len(xs), max_len), dtype='int32')
 24 |     for i in range(len(xs)):
 25 |         for j, c in enumerate(xs[i]):
 26 |             xa[i, j] = c
 27 |     ya = np.asarray(ys, dtype='int32') if ys else None
 28 | 
 29 |     return xa, ya
 30 | 
 31 | 
 32 | def add_eos(xs, max_len=None, eos=None):
 33 |     new_xs, new_ys = []
 34 |     for x in xs:
 35 |         y = x + [eos]
 36 |         x = [eos] + x
 37 |         new_xs.append(x)
 38 |         new_ys.apped(y)
 39 |     xs, ys = new_xs, new_ys
 40 |     max_len = max_len + 1 if max_len else max(max([len(x) for x in xs]))
 41 | 
 42 |     xa = np.zeros((len(xs), max_len), dtype='int32')
 43 |     ya = np.zeros((len(ys), max_len), dtype='int32')
 44 |     for i in range(len(xa)):
 45 |         for j, c in enumerate(xs[i]):
 46 |             xa[i, j] = c
 47 |             ya[i, j] = ys[i][j]
 48 | 
 49 |     return xa, ya
 50 | 
 51 | 
 52 | class IMDBLM(object):
 53 |     '''IMDB for training language model
 54 |     '''
 55 |     def __init__(self, path, which_set=None, train_ratio=0.9, 
 56 |                  max_len=5, vocab_size=101745, batch_size=10,
 57 |                  shuffle=True, seed=1111):
 58 |         self.__dict__.update(locals())
 59 |         self.__dict__.pop('self')
 60 |         
 61 |         self.random = random.Random()
 62 |         self.random.seed(self.seed)
 63 | 
 64 |         self.path = os.path.expandvars(self.path)
 65 |         
 66 |     def __call__(self):
 67 |         if self.path.endswith('.gz'):
 68 |             with gzip.open(self.path, 'r') as fp:
 69 |                 data = cPickle.load(fp)
 70 |         else:
 71 |             with open(self.path, 'r') as fp:
 72 |                 data = cPickle.load(fp)
 73 |             
 74 |         assert self.which_set in ['train', 'validation', 'test', None], \
 75 |                 "which_set should be 'train' or 'validation' or 'test', " + \
 76 |                 "but '{}' is given.".format(self.which_set)
 77 | 
 78 |         if self.which_set in ['train', 'validation', None]:
 79 |             train_x, train_x_unsup = data['train_x'], data['train_x_unsup']
 80 |             # concatenate all sentences
 81 |             text = []
 82 |             for x in train_x + train_x_unsup:
 83 |                 text += x
 84 |             
 85 |             NUM_TRAIN = int(len(text) * self.train_ratio)
 86 |             NUM_VALID = len(text) - NUM_TRAIN
 87 |             if self.which_set == 'train':
 88 |                 text = text[:NUM_TRAIN]
 89 |             elif self.which_set ==  'validation':
 90 |                 text = text[-NUM_VALID:]
 91 |         else:
 92 |             # concatenate all sentences
 93 |             text = []
 94 |             for x in data['test_x']:
 95 |                 text += x
 96 | 
 97 |         text = set_unk(text, self.vocab_size)
 98 | 
 99 |         del data
100 |     
101 |         nb_words = len(text)
102 |         seg_size = nb_words // self.batch_size
103 |         cursors = [i * seg_size  for i in range(self.batch_size)]
104 | 
105 |         while True:
106 |             x = np.zeros((self.batch_size, self.max_len), dtype='int32')
107 |             y = np.zeros((self.batch_size, self.max_len, 1), dtype='int32')
108 |             for i in range(self.batch_size):
109 |                 c = cursors[i]
110 |                 for j in range(self.max_len):
111 |                     x[i, j] = text[(c + j) % nb_words]
112 |                     y[i, j, 0] = text[(c + j + 1) % nb_words]
113 |                 cursors[i] = (c + self.max_len) % nb_words
114 |             yield x, y
115 | 


--------------------------------------------------------------------------------
/gcnn/imdb_lm_gcnn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | '''Simple RNN for Language Model
 3 | '''
 4 | from __future__ import print_function
 5 | import os
 6 | 
 7 | from keras.models import Model
 8 | from keras.layers import Input, Embedding, Dense, TimeDistributed
 9 | from keras.optimizers import *
10 | 
11 | from gcnn import GCNN
12 | from imdb_generator import IMDBLM
13 | 
14 | 
15 | def LM(batch_size, window_size=3, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1):
16 |     x = Input(batch_shape=(batch_size, None))
17 |     # mebedding
18 |     y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x)
19 |     for i in range(nb_layers-1):
20 |         y = GCNN(hidden_dim, window_size=window_size,
21 |                  name='gcnn{}'.format(i + 1))(y)
22 |     y = GCNN(hidden_dim, window_size=window_size, 
23 |              name='gcnn{}'.format(nb_layers))(y)
24 |     y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y)
25 | 
26 |     model = Model(inputs=x, outputs=y)
27 | 
28 |     return model
29 | 
30 | 
31 | def train_model():
32 |     batch_size = 32 
33 |     epochs = 100 
34 |     nb_layers = 3
35 |     vocsize = 2000 # top 2k
36 |     max_len = 30 
37 |     train_ratio = 0.99
38 |     window_size = 3
39 | 
40 |     # Build model
41 |     model = LM(batch_size, window_size=window_size, vocsize=vocsize, nb_layers=nb_layers)
42 |     model.compile(optimizer='adam',
43 |                   loss='sparse_categorical_crossentropy')
44 | 
45 |     # Prepare data
46 |     path = './data/imdb-full.pkl'
47 |     # Train
48 |     train_gen = IMDBLM(path=path, max_len=max_len, vocab_size=vocsize, shuffle=True,
49 |                      which_set='train', train_ratio=train_ratio, batch_size=batch_size)
50 |     # Validation 
51 |     val_gen = IMDBLM(path=path, max_len=max_len, vocab_size=vocsize,
52 |                    which_set='validation', train_ratio=train_ratio, batch_size=batch_size)
53 | 
54 |     train_steps = 500
55 |     val_steps = 200
56 | 
57 |     # Start training
58 |     model.summary()
59 |     model.fit_generator(train_gen(), steps_per_epoch=train_steps, 
60 |                         validation_data=val_gen(), validation_steps=val_steps,
61 |                         epochs=epochs, verbose=1)
62 | 
63 | 
64 | def run_demo():
65 |     train_model()
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     run_demo()
70 | 


--------------------------------------------------------------------------------
/glsgan/README.md:
--------------------------------------------------------------------------------
1 | Generalized Loss Sensitive GAN -- a unified perspective for WGAN and LSGAN.
2 | 
3 | This repo is the toy demo for the [blog on GAN](http://blog.csdn.net/jackytintin/article/details/61908718). Please refer https://github.com/guojunq/glsgan for the 'official' repo for GLSGAN in torch.
4 | 
5 | ## Reference
6 | * Arjovsky et al. [Wasserstein GAN](https://www.arxiv.org/abs/1701.07875).
7 | * Qi. [Loss-Sensitive Generative Adversarial Networks on Lipschitz Densities](https://arxiv.org/abs/1701.06264).
8 | 


--------------------------------------------------------------------------------
/glsgan/glsgan_mlp.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Generalized Loss Sensentive  Generative Adversarial Network (GLSGAN) 
  5 | """
  6 | from __future__ import print_function
  7 | from PIL import Image
  8 | from six.moves import range
  9 | 
 10 | import keras.backend as K
 11 | K.set_image_data_format('channels_first')
 12 | 
 13 | from keras.datasets import mnist
 14 | from keras.layers import Input, Dense, Reshape, Flatten, Dropout, Activation, BatchNormalization, ELU, LeakyReLU
 15 | from keras.models import Sequential, Model
 16 | from keras.optimizers import RMSprop, Adam
 17 | from keras.utils.generic_utils import Progbar
 18 | 
 19 | import numpy as np
 20 | np.random.seed(1337)
 21 | 
 22 | 
 23 | def clip_weights(model, lower, upper):
 24 |     for l in model.layers:
 25 |         weights = l.get_weights()
 26 |         weights = [np.clip(w, lower, upper) for w in weights]
 27 |         l.set_weights(weights)
 28 | 
 29 | 
 30 | def dummy_loss(loss_to_backprop, y_pred):
 31 |     return K.mean(loss_to_backprop * y_pred)
 32 | 
 33 | 
 34 | def build_generator(latent_size):
 35 |     '''
 36 |     Any model with input shape (?, latent_size) and output shape (?, 1, 28, 28) fits here.
 37 |     '''
 38 |     model = Sequential()
 39 |     model.add(Dense(1024, input_dim=latent_size, activation='relu'))
 40 |     model.add(Dense(28 * 28, activation='tanh'))
 41 |     model.add(Reshape((1, 28, 28)))
 42 | 
 43 |     return model 
 44 | 
 45 | 
 46 | def build_discriminator(act):
 47 |     '''
 48 |     Any model with input shape (?, 1, 28, 28) and output shape (?, 1) fits here.
 49 |     Use different activator for different type of GAN.
 50 |     '''
 51 |     model = Sequential()
 52 |     model.add(Flatten(input_shape=(1, 28, 28)))
 53 |     model.add(Dense(256))
 54 |     model.add(Activation('relu'))
 55 |     model.add(Dense(128))
 56 |     model.add(Activation('relu'))
 57 |     model.add(Dense(1, activation='linear'))
 58 |     model.add(act)
 59 | 
 60 |     return model
 61 | 
 62 | 
 63 | if __name__ == '__main__':
 64 |     gan_type = 'wgan' # 'wgan', 'lsgan', 'elu', 'l1'
 65 |     epochs = 5000
 66 |     batch_size = 50
 67 |     latent_size = 20
 68 |     lr = 0.00005
 69 |     c = 0.08
 70 |     slope = 0 # [-inf, 1], 1 for  WGAN, 0 for LSGAN
 71 |     
 72 |     act = LeakyReLU(slope) # try act = ELU()
 73 | 
 74 |     # build the discriminator 
 75 |     disc = build_discriminator(act)
 76 |     disc.compile(
 77 |         optimizer=RMSprop(lr=lr),
 78 |         loss=dummy_loss
 79 |     )
 80 | 
 81 |     # build the generator
 82 |     generator = build_generator(latent_size)
 83 | 
 84 |     latent = Input(shape=(latent_size, ))
 85 |     # get a fake image
 86 |     fake = generator(latent)
 87 |     # we only want to be able to train generation for the combined model
 88 |     disc.trainable = False
 89 |     fake = disc(fake)
 90 |     combined = Model(inputs=latent, outputs=fake)
 91 |     combined.compile(
 92 |         optimizer=RMSprop(lr=lr),
 93 |         loss=dummy_loss
 94 |     )
 95 | 
 96 |     # get our mnist data, and force it to be of shape (..., 1, 28, 28) with
 97 |     # range [-1, 1]
 98 |     (X_train, y_train), (X_test, y_test) = mnist.load_data()
 99 |     X_train = (X_train.astype(np.float32) - 127.5) / 127.5
100 |     X_train = np.expand_dims(X_train, axis=1)
101 | 
102 |     X_test = (X_test.astype(np.float32) - 127.5) / 127.5
103 |     X_test = np.expand_dims(X_test, axis=1)
104 | 
105 |     nb_train, nb_test = X_train.shape[0], X_test.shape[0]
106 | 
107 |     for epoch in range(epochs):
108 |         print('Epoch {} of {}'.format(epoch + 1, epochs))
109 | 
110 |         nb_batches = int(X_train.shape[0] / batch_size)
111 |         progress_bar = Progbar(target=nb_batches)
112 | 
113 |         epoch_disc_loss= []
114 |         epoch_gen_loss = []
115 | 
116 |         index = 0
117 |         while index < nb_batches:
118 |             ## discriminator 
119 |             if epoch < 5 or epoch % 100 == 0:
120 |                 Diters = 1 
121 |             else:
122 |                 Diters = 1
123 |             iter = 0
124 |             disc_loss= []
125 |             while index < nb_batches and iter < Diters:
126 |                 progress_bar.update(index)
127 |                 index += 1
128 |                 iter += 1
129 | 
130 |                 # generate a new batch of noise
131 |                 noise = np.random.uniform(-1, 1, (batch_size, latent_size))
132 |                 # generate a batch of fake images
133 |                 generated_images = generator.predict(noise, verbose=0)
134 | 
135 |                 # get a batch of real images
136 |                 image_batch = X_train[index * batch_size:(index + 1) * batch_size]
137 |                 label_batch = y_train[index * batch_size:(index + 1) * batch_size]
138 | 
139 |                 X = np.concatenate((image_batch, generated_images))
140 |                 y = np.array([-1] * len(image_batch) + [1] * batch_size)
141 | 
142 |                 disc_loss.append(-disc.train_on_batch(X, y))
143 | 
144 |                 clip_weights(disc, -c, c)
145 | 
146 |             epoch_disc_loss.append(sum(disc_loss)/len(disc_loss))
147 | 
148 |             ## generator
149 |             # make new noise. we generate 2 * batch size here such that we have
150 |             # the generator optimize over an identical number of images as the disc
151 |             noise = np.random.uniform(-1, 1, (batch_size, latent_size))
152 |             target = -np.ones(batch_size)
153 |             epoch_gen_loss.append(-combined.train_on_batch(noise, target))
154 | 
155 |         print('\n[Loss_D: {:.3f}, Loss_G: {:.3f}]'.format(np.mean(epoch_disc_loss), np.mean(epoch_gen_loss)))
156 | 
157 |         # save weights every epoch
158 |         if False:
159 |             generator.save_weights(
160 |                 'slope_{}_mlp_generator_epoch_{:03d}.hdf5'.format(slope, epoch), True)
161 |             disc.save_weights(
162 |                 'slope_{}_mlp_disc_epoch_{:03d}.hdf5'.format(slop, epoch), True)
163 | 
164 |         # generate some digits to display
165 |         noise = np.random.uniform(-1, 1, (100, latent_size))
166 |         # get a batch to display
167 |         generated_images = generator.predict(noise, verbose=0)
168 | 
169 |         # arrange them into a grid
170 |         img = (np.concatenate([r.reshape(-1, 28)
171 |                                for r in np.split(generated_images, 10)
172 |                                ], axis=-1) * 127.5 + 127.5).astype(np.uint8)
173 | 
174 |         Image.fromarray(img).save(
175 |             'slope_{}_mlp_epoch_{:03d}_generated.png'.format(slope, epoch))
176 | 


--------------------------------------------------------------------------------
/layernorm/README.md:
--------------------------------------------------------------------------------
 1 | Layer Normalization
 2 | 
 3 | ## Run
 4 | 
 5 | ### MLP on MNIST
 6 | python mnist_mlp.py
 7 | 
 8 | ### CNN on MNIST
 9 | python mnist_cnn.py
10 | 
11 | ### bAbI toy QA tasks
12 | python babi_lnlstm.py
13 | 
14 | 
15 | ### IMDB sentiment classification
16 | imdb_cnn_lstm.py
17 | 
18 | ## References:
19 | Ba et al. [Layer Normalization](https://arxiv.org/abs/1607.06450).
20 | [Git repo](https://github.com/ryankiros/layer-norm).
21 | 


--------------------------------------------------------------------------------
/layernorm/babi_lnlstm.py:
--------------------------------------------------------------------------------
  1 | '''Trains two recurrent neural networks based upon a story and a question.
  2 | The resulting merged vector is then queried to answer a range of bAbI tasks.
  3 | 
  4 | The results are comparable to those for an LSTM model provided in Weston et al.:
  5 | "Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks"
  6 | http://arxiv.org/abs/1502.05698
  7 | 
  8 | Task Number                  | FB LSTM Baseline | LSTM | LSTM w/ LN 
  9 | ---                          | ---              | ---  | ---
 10 | QA1 - Single Supporting Fact | 50               | 52.0 | 58.0
 11 | 
 12 | For the resources related to the bAbI project, refer to:
 13 | https://research.facebook.com/researchers/1543934539189348
 14 | '''
 15 | 
 16 | from __future__ import print_function
 17 | from functools import reduce
 18 | import re
 19 | import tarfile
 20 | 
 21 | import numpy as np
 22 | np.random.seed(1337)  # for reproducibility
 23 | 
 24 | from keras.utils.data_utils import get_file
 25 | from keras import layers
 26 | from keras.models import Model
 27 | from keras.preprocessing.sequence import pad_sequences
 28 | 
 29 | from layer_norm_layers import *
 30 | 
 31 | 
 32 | def tokenize(sent):
 33 |     '''Return the tokens of a sentence including punctuation.
 34 | 
 35 |     >>> tokenize('Bob dropped the apple. Where is the apple?')
 36 |     ['Bob', 'dropped', 'the', 'apple', '.', 'Where', 'is', 'the', 'apple', '?']
 37 |     '''
 38 |     return [x.strip() for x in re.split('(\W+)?', sent) if x.strip()]
 39 | 
 40 | 
 41 | def parse_stories(lines, only_supporting=False):
 42 |     '''Parse stories provided in the bAbi tasks format
 43 | 
 44 |     If only_supporting is true, only the sentences that support the answer are kept.
 45 |     '''
 46 |     data = []
 47 |     story = []
 48 |     for line in lines:
 49 |         line = line.decode('utf-8').strip()
 50 |         nid, line = line.split(' ', 1)
 51 |         nid = int(nid)
 52 |         if nid == 1:
 53 |             story = []
 54 |         if '\t' in line:
 55 |             q, a, supporting = line.split('\t')
 56 |             q = tokenize(q)
 57 |             substory = None
 58 |             if only_supporting:
 59 |                 # Only select the related substory
 60 |                 supporting = map(int, supporting.split())
 61 |                 substory = [story[i - 1] for i in supporting]
 62 |             else:
 63 |                 # Provide all the substories
 64 |                 substory = [x for x in story if x]
 65 |             data.append((substory, q, a))
 66 |             story.append('')
 67 |         else:
 68 |             sent = tokenize(line)
 69 |             story.append(sent)
 70 |     return data
 71 | 
 72 | 
 73 | def get_stories(f, only_supporting=False, max_length=None):
 74 |     '''Given a file name, read the file, retrieve the stories, and then convert the sentences into a single story.
 75 | 
 76 |     If max_length is supplied, any stories longer than max_length tokens will be discarded.
 77 |     '''
 78 |     data = parse_stories(f.readlines(), only_supporting=only_supporting)
 79 |     flatten = lambda data: reduce(lambda x, y: x + y, data)
 80 |     data = [(flatten(story), q, answer) for story, q, answer in data if not max_length or len(flatten(story)) < max_length]
 81 |     return data
 82 | 
 83 | 
 84 | def vectorize_stories(data, word_idx, story_maxlen, query_maxlen):
 85 |     X = []
 86 |     Xq = []
 87 |     Y = []
 88 |     for story, query, answer in data:
 89 |         x = [word_idx[w] for w in story]
 90 |         xq = [word_idx[w] for w in query]
 91 |         y = np.zeros(len(word_idx) + 1)  # let's not forget that index 0 is reserved
 92 |         y[word_idx[answer]] = 1
 93 |         X.append(x)
 94 |         Xq.append(xq)
 95 |         Y.append(y)
 96 |     return pad_sequences(X, maxlen=story_maxlen), pad_sequences(Xq, maxlen=query_maxlen), np.array(Y)
 97 | 
 98 | RNN = LayerNormLSTM # layers.LSTM
 99 | EMBED_HIDDEN_SIZE = 50
100 | SENT_HIDDEN_SIZE = 100
101 | QUERY_HIDDEN_SIZE = 100
102 | BATCH_SIZE = 32
103 | EPOCHS = 40
104 | print('RNN / Embed / Sent / Query = {}, {}, {}, {}'.format(RNN, EMBED_HIDDEN_SIZE, SENT_HIDDEN_SIZE, QUERY_HIDDEN_SIZE))
105 | 
106 | try:
107 |     path = get_file('babi-tasks-v1-2.tar.gz', origin='https://s3.amazonaws.com/text-datasets/babi_tasks_1-20_v1-2.tar.gz')
108 | except:
109 |     print('Error downloading dataset, please download it manually:\n'
110 |           '$ wget http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2.tar.gz\n'
111 |           '$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz')
112 |     raise
113 | tar = tarfile.open(path)
114 | # Default QA1 with 1000 samples
115 | challenge = 'tasks_1-20_v1-2/en/qa1_single-supporting-fact_{}.txt'
116 | # QA1 with 10,000 samples
117 | # challenge = 'tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_{}.txt'
118 | # QA2 with 1000 samples
119 | # challenge = 'tasks_1-20_v1-2/en/qa2_two-supporting-facts_{}.txt'
120 | # QA2 with 10,000 samples
121 | # challenge = 'tasks_1-20_v1-2/en-10k/qa2_two-supporting-facts_{}.txt'
122 | train = get_stories(tar.extractfile(challenge.format('train')))
123 | test = get_stories(tar.extractfile(challenge.format('test')))
124 | 
125 | vocab = sorted(reduce(lambda x, y: x | y, (set(story + q + [answer]) for story, q, answer in train + test)))
126 | # Reserve 0 for masking via pad_sequences
127 | vocab_size = len(vocab) + 1
128 | word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
129 | story_maxlen = max(map(len, (x for x, _, _ in train + test)))
130 | query_maxlen = max(map(len, (x for _, x, _ in train + test)))
131 | 
132 | X, Xq, Y = vectorize_stories(train, word_idx, story_maxlen, query_maxlen)
133 | tX, tXq, tY = vectorize_stories(test, word_idx, story_maxlen, query_maxlen)
134 | 
135 | print('vocab = {}'.format(vocab))
136 | print('X.shape = {}'.format(X.shape))
137 | print('Xq.shape = {}'.format(Xq.shape))
138 | print('Y.shape = {}'.format(Y.shape))
139 | print('story_maxlen, query_maxlen = {}, {}'.format(story_maxlen, query_maxlen))
140 | 
141 | print('Build model...')
142 | 
143 | sentence = layers.Input(shape=(story_maxlen,), dtype='int32')
144 | encoded_sentence = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(sentence)
145 | encoded_sentence = layers.Dropout(0.3)(encoded_sentence)
146 | 
147 | question = layers.Input(shape=(query_maxlen,), dtype='int32')
148 | encoded_question = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(question)
149 | encoded_question = layers.Dropout(0.3)(encoded_question)
150 | encoded_question = RNN(EMBED_HIDDEN_SIZE)(encoded_question)
151 | encoded_question = layers.RepeatVector(story_maxlen)(encoded_question)
152 | 
153 | merged = layers.add([encoded_sentence, encoded_question])
154 | merged = RNN(EMBED_HIDDEN_SIZE)(merged)
155 | merged = layers.Dropout(0.3)(merged)
156 | preds = layers.Dense(vocab_size, activation='softmax')(merged)
157 | 
158 | model = Model([sentence, question], preds)
159 | model.compile(optimizer='rmsprop',
160 |               loss='categorical_crossentropy',
161 |               metrics=['accuracy'])
162 | 
163 | print('Training')
164 | model.fit([X, Xq], Y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.05)
165 | loss, acc = model.evaluate([tX, tXq], tY, batch_size=BATCH_SIZE)
166 | print('Test loss / test accuracy = {:.4f} / {:.4f}'.format(loss, acc))
167 | 


--------------------------------------------------------------------------------
/layernorm/data:
--------------------------------------------------------------------------------
1 | ../gcnn/data


--------------------------------------------------------------------------------
/layernorm/imdb_cnn_lstm.py:
--------------------------------------------------------------------------------
 1 | '''Train a recurrent convolutional network on the IMDB sentiment
 2 | classification task.
 3 | 
 4 | w/o LN:
 5 | Gets to 0.8544 test accuracy after 2 epochs.
 6 | w/ LN:
 7 | Gets to 0.8564 test accuracy after 2 epochs.
 8 | '''
 9 | from __future__ import print_function
10 | import numpy as np
11 | np.random.seed(1337)  # for reproducibility
12 | 
13 | from keras.preprocessing import sequence
14 | from keras.models import Sequential
15 | from keras.layers import Dense, Dropout, Activation
16 | from keras.layers import Embedding
17 | from keras.layers import LSTM
18 | from keras.layers import Conv1D, MaxPooling1D
19 | from keras.datasets import imdb
20 | 
21 | from layer_norm_layers import *
22 | 
23 | 
24 | # Embedding
25 | max_features = 20000
26 | maxlen = 100
27 | embedding_size = 128
28 | 
29 | # Convolution
30 | filter_length = 5
31 | filters = 64
32 | pool_size = 4
33 | 
34 | # LSTM
35 | lstm_output_size = 70
36 | 
37 | # Training
38 | batch_size = 30
39 | epochs = 2
40 | 
41 | '''
42 | Note:
43 | batch_size is highly sensitive.
44 | Only 2 epochs are needed as the dataset is very small.
45 | '''
46 | 
47 | print('Build model...')
48 | model = Sequential()
49 | model.add(Embedding(max_features, embedding_size, input_length=maxlen))
50 | model.add(Dropout(0.25))
51 | model.add(Conv1D(filters=filters,
52 |                  kernel_size=5,
53 |                  padding='valid',
54 |                  activation='relu',
55 |                  strides=1))
56 | model.add(MaxPooling1D(pool_size=pool_size))
57 | model.add(LayerNormLSTM(lstm_output_size))
58 | #model.add(LSTM(lstm_output_size))
59 | model.add(Dense(1))
60 | model.add(Activation('sigmoid'))
61 | 
62 | model.compile(loss='binary_crossentropy',
63 |               optimizer='adam',
64 |               metrics=['accuracy'])
65 | 
66 | print('Loading data...')
67 | (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
68 | print(len(X_train), 'train sequences')
69 | print(len(X_test), 'test sequences')
70 | 
71 | print('Pad sequences (samples x time)')
72 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
73 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
74 | print('X_train shape:', X_train.shape)
75 | print('X_test shape:', X_test.shape)
76 | 
77 | 
78 | print('Train...')
79 | model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs,
80 |           validation_data=(X_test, y_test))
81 | score, acc = model.evaluate(X_test, y_test, batch_size=batch_size)
82 | print('Test score:', score)
83 | print('Test accuracy:', acc)
84 | 


--------------------------------------------------------------------------------
/layernorm/imdb_generator.py:
--------------------------------------------------------------------------------
1 | ../gcnn/imdb_generator.py


--------------------------------------------------------------------------------
/layernorm/imdb_lm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | '''Simple RNN for  Language Model
 3 | '''
 4 | from __future__ import print_function
 5 | import os
 6 | 
 7 | from keras.models import Model
 8 | from keras.layers import Input, Embedding, Dense, TimeDistributed
 9 | from keras.optimizers import *
10 | 
11 | from layer_norm_layers import *
12 | from imdb_generator import IMDBLM
13 | 
14 | 
15 | def LM(batch_size, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1):
16 |     x = Input(batch_shape=(batch_size, None))
17 |     # mebedding
18 |     y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x)
19 |     for i in range(nb_layers-1):
20 |         y = LayerNormLSTM(hidden_dim, return_sequences=True, name='lnlstm{}'.format(i + 1))(y)
21 |     y = LayerNormLSTM(hidden_dim, return_sequences=True, name='lnlstm{}'.format(nb_layers))(y)
22 |     y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y)
23 | 
24 |     model = Model(inputs=x, outputs=y)
25 | 
26 |     return model
27 | 
28 | 
29 | def train_model():
30 |     batch_size = 32 
31 |     epochs = 100 
32 | 
33 |     vocsize = 2000 # top 2k
34 |     max_len = 30 
35 |     train_ratio = 0.99
36 | 
37 |     # Build model
38 |     model = LM(batch_size, vocsize=vocsize, nb_layers=3)
39 |     model.compile(optimizer='adam',
40 |                   loss='sparse_categorical_crossentropy')
41 | 
42 |     # Prepare data
43 |     path = './data/imdb-full.pkl'
44 |     # Train
45 |     train_gen = IMDBLM(path=path, max_len=max_len, vocab_size=vocsize, shuffle=True,
46 |                        which_set='train', train_ratio=train_ratio, batch_size=batch_size)
47 |     # Validation 
48 |     val_gen = IMDBLM(path=path, max_len=max_len, vocab_size=vocsize,
49 |                      which_set='validation', train_ratio=train_ratio, batch_size=batch_size)
50 | 
51 |     train_steps = 2000
52 |     val_steps = 200
53 | 
54 |     # Start training
55 |     model.summary()
56 |     model.fit_generator(train_gen(), steps_per_epoch=train_steps, 
57 |                         validation_data=val_gen(), validation_steps=val_steps,
58 |                         epochs=epochs, verbose=1)
59 | 
60 | 
61 | def run_demo():
62 |     train_model()
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     run_demo()
67 | 


--------------------------------------------------------------------------------
/layernorm/layer_norm_layers.py:
--------------------------------------------------------------------------------
  1 | from keras.engine import Layer, InputSpec
  2 | from keras.layers import LSTM, GRU
  3 | from keras import initializers, regularizers
  4 | from keras import backend as K
  5 | 
  6 | import numpy as np
  7 | 
  8 | 
  9 | def to_list(x):
 10 |     if type(x) not in [list, tuple]:
 11 |         return [x]
 12 |     else:
 13 |         return list(x)
 14 | 
 15 | 
 16 | def LN(x, gamma, beta, epsilon=1e-6, axis=-1):
 17 |     m = K.mean(x, axis=axis, keepdims=True)
 18 |     std = K.sqrt(K.var(x, axis=axis, keepdims=True) + epsilon)
 19 |     x_normed = (x - m) / (std + epsilon)
 20 |     x_normed = gamma * x_normed + beta
 21 | 
 22 |     return x_normed
 23 | 
 24 | 
 25 | class LayerNormalization(Layer):
 26 |     def __init__(self, axis=-1,
 27 |                  gamma_init='one', beta_init='zero',
 28 |                  gamma_regularizer=None, beta_regularizer=None,
 29 |                  epsilon=1e-6, **kwargs): 
 30 |         super(LayerNormalization, self).__init__(**kwargs)
 31 | 
 32 |         self.axis = to_list(axis)
 33 |         self.gamma_init = initializers.get(gamma_init)
 34 |         self.beta_init = initializers.get(beta_init)
 35 |         self.gamma_regularizer = regularizers.get(gamma_regularizer)
 36 |         self.beta_regularizer = regularizers.get(beta_regularizer)
 37 |         self.epsilon = epsilon
 38 | 
 39 |         self.supports_masking = True
 40 | 
 41 |     def build(self, input_shape):
 42 |         self.input_spec = [InputSpec(shape=input_shape)]
 43 |         shape = [1 for _ in input_shape]
 44 |         for i in self.axis:
 45 |             shape[i] = input_shape[i]
 46 |         self.gamma = self.add_weight(shape=shape,
 47 |                                      initializer=self.gamma_init,
 48 |                                      regularizer=self.gamma_regularizer,
 49 |                                      name='gamma')
 50 |         self.beta = self.add_weight(shape=shape,
 51 |                                     initializer=self.beta_init,
 52 |                                     regularizer=self.beta_regularizer,
 53 |                                     name='beta')
 54 |         self.built = True
 55 | 
 56 |     def call(self, inputs, mask=None):
 57 |         return LN(inputs, gamma=self.gamma, beta=self.beta, 
 58 |                   axis=self.axis, epsilon=self.epsilon)
 59 | 
 60 |     def get_config(self):
 61 |         config = {'epsilon': self.epsilon,
 62 |                   'axis': self.axis,
 63 |                   'gamma_init': initializers.serialize(self.gamma_init),
 64 |                   'beta_init': initializers.serialize(self.beta_init),
 65 |                   'gamma_regularizer': regularizers.serialize(self.gamma_regularizer),
 66 |                   'beta_regularizer': regularizers.serialize(self.gamma_regularizer)}
 67 |         base_config = super(LayerNormalization, self).get_config()
 68 |         return dict(list(base_config.items()) + list(config.items()))
 69 | 
 70 | 
 71 | class LayerNormLSTM(LSTM):
 72 |     def build(self, input_shape):
 73 |         if isinstance(input_shape, list):
 74 |             input_shape = input_shape[0]
 75 |         batch_size = input_shape[0] if self.stateful else None
 76 |         self.input_dim = input_shape[2]
 77 |         self.input_spec = InputSpec(shape=(batch_size, None, self.input_dim))
 78 |         self.state_spec = [InputSpec(shape=(batch_size, self.units)),
 79 |                            InputSpec(shape=(batch_size, self.units))]
 80 | 
 81 | 
 82 |         # initial states: 2 all-zero tensors of shape (units)
 83 |         self.states = [None, None]
 84 |         if self.stateful:
 85 |             self.reset_states()
 86 | 
 87 |         self.kernel = self.add_weight(shape=(self.input_dim, 4 * self.units),
 88 |                                  initializer=self.kernel_initializer,
 89 |                                  name='kernel',
 90 |                                  regularizer=self.kernel_regularizer,
 91 |                                  constraint=self.kernel_constraint)
 92 |         self.recurrent_kernel = self.add_weight(shape=(self.units, self.units * 4),
 93 |                                 name='recurrent_kernel',
 94 |                                 initializer=self.recurrent_initializer,
 95 |                                 regularizer=self.recurrent_regularizer,
 96 |                                 constraint=self.recurrent_constraint)
 97 | 
 98 |         self.gamma_1 = self.add_weight(shape=(4 * self.units,),
 99 |                                        initializer='one',
100 |                                        name='gamma_1')
101 |         self.beta_1 = self.add_weight(shape=(4 * self.units,),
102 |                                       initializer='zero',
103 |                                       name='beta_1')
104 |         self.gamma_2 = self.add_weight(shape=(4 * self.units,),
105 |                                        initializer='one',
106 |                                        name='gamma_2')
107 |         self.beta_2 = self.add_weight(shape=(4 * self.units,),
108 |                                       initializer='zero',
109 |                                       name='beta_2')
110 |         self.gamma_3 = self.add_weight(shape=(self.units,),
111 |                                        initializer='one',
112 |                                        name='gamma_3')
113 |         self.beta_3 = self.add_weight((self.units,),
114 |                                        initializer='zero',
115 |                                        name='beta_3')
116 | 
117 |         if self.use_bias:
118 |             if self.unit_forget_bias:
119 |                 def bias_initializer(shape, *args, **kwargs):
120 |                     return K.concatenate([
121 |                         self.bias_initializer((self.units,), *args, **kwargs),
122 |                         initializers.Ones()((self.units,), *args, **kwargs),
123 |                         self.bias_initializer((self.units * 2,), *args, **kwargs),
124 |                     ])
125 |             else:
126 |                 bias_initializer = self.bias_initializer
127 |             self.bias = self.add_weight(shape=(self.units * 4,),
128 |                                         name='bias',
129 |                                         initializer=bias_initializer,
130 |                                         regularizer=self.bias_regularizer,
131 |                                         constraint=self.bias_constraint)
132 |         else:
133 |             self.bias = None
134 |         self.built = True
135 | 
136 |     def preprocess_input(self, inputs, training=None):
137 |         return inputs 
138 | 
139 |     def step(self, x, states):
140 |         h_tm1 = states[0]
141 |         c_tm1 = states[1]
142 |         B_U = states[2]
143 |         B_W = states[3]
144 | 
145 |         z = LN(K.dot(x * B_W[0], self.kernel), self.gamma_1, self.beta_1) +  \
146 |             LN(K.dot(h_tm1 * B_U[0], self.recurrent_kernel), self.gamma_2, self.beta_2)
147 |         if self.use_bias:
148 |             z = K.bias_add(z, self.bias)
149 | 
150 |         z0 = z[:, :self.units]
151 |         z1 = z[:, self.units: 2 * self.units]
152 |         z2 = z[:, 2 * self.units: 3 * self.units]
153 |         z3 = z[:, 3 * self.units:]
154 | 
155 |         i = self.recurrent_activation(z0)
156 |         f = self.recurrent_activation(z1)
157 |         c = f * c_tm1 + i * self.activation(z2)
158 |         o = self.recurrent_activation(z3)
159 | 
160 |         h = o * self.activation(LN(c, self.gamma_3, self.beta_3))
161 |         return h, [h, c]
162 | 


--------------------------------------------------------------------------------
/layernorm/mnist_cnn.py:
--------------------------------------------------------------------------------
 1 | '''Trains a simple convnet on the MNIST dataset.
 2 | 
 3 | w/o LN:
 4 | Gets to 99.25% test accuracy after 12 epochs
 5 | w/ LN:
 6 | Gets to 98.99% test accuracy after 12 epochs
 7 | 
 8 | Caveat: the original paper doesn't discuss explicitly how to 
 9 | apply LN to Convolution layers or if LN is effective of conv.
10 | '''
11 | 
12 | from __future__ import print_function
13 | import numpy as np
14 | np.random.seed(1337)  # for reproducibility
15 | 
16 | from keras.datasets import mnist
17 | from keras.models import Sequential
18 | from keras.layers import Dense, Dropout, Activation, Flatten
19 | from keras.layers import Conv2D, MaxPooling2D
20 | from keras.utils import np_utils
21 | from keras import backend as K
22 | 
23 | from layer_norm_layers import *
24 | 
25 | K.set_image_dim_ordering('th')
26 | 
27 | batch_size = 128
28 | nb_classes = 10
29 | epochs = 12
30 | 
31 | # input image dimensions
32 | img_rows, img_cols = 28, 28
33 | # number of convolutional filters to use
34 | filters = 32
35 | # size of pooling area for max pooling
36 | pool_size = (2, 2)
37 | # convolution kernel size
38 | kernel_size = (3, 3)
39 | 
40 | # the data, shuffled and split between train and test sets
41 | (X_train, y_train), (X_test, y_test) = mnist.load_data()
42 | 
43 | if K.image_data_format() == 'channels_first':
44 |     X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
45 |     X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
46 |     input_shape = (1, img_rows, img_cols)
47 |     ln_axis = 1 # try 1, [2, 3] or [1, 2, 3]
48 | else:
49 |     X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
50 |     X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
51 |     input_shape = (img_rows, img_cols, 1)
52 |     ln_axis = 3 # try 3, [1, 2] or [1, 2, 3]
53 | 
54 | X_train = X_train.astype('float32')
55 | X_test = X_test.astype('float32')
56 | X_train /= 255
57 | X_test /= 255
58 | print('X_train shape:', X_train.shape)
59 | print(X_train.shape[0], 'train samples')
60 | print(X_test.shape[0], 'test samples')
61 | 
62 | # convert class vectors to binary class matrices
63 | Y_train = np_utils.to_categorical(y_train, nb_classes)
64 | Y_test = np_utils.to_categorical(y_test, nb_classes)
65 | 
66 | model = Sequential()
67 | 
68 | model.add(Conv2D(filters, kernel_size,
69 |                  padding='valid',
70 |                  input_shape=input_shape))
71 | model.add(LayerNormalization(axis=ln_axis))
72 | model.add(Activation('relu'))
73 | model.add(Conv2D(filters, kernel_size))
74 | model.add(LayerNormalization(axis=ln_axis))
75 | model.add(Activation('relu'))
76 | model.add(MaxPooling2D(pool_size=pool_size))
77 | model.add(Dropout(0.25))
78 | 
79 | model.add(Flatten())
80 | model.add(Dense(128))
81 | model.add(LayerNormalization())
82 | model.add(Activation('relu'))
83 | model.add(Dropout(0.5))
84 | model.add(Dense(nb_classes))
85 | model.add(LayerNormalization())
86 | model.add(Activation('softmax'))
87 | 
88 | model.compile(loss='categorical_crossentropy',
89 |               optimizer='adadelta',
90 |               metrics=['accuracy'])
91 | 
92 | model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs,
93 |           verbose=1, validation_data=(X_test, Y_test))
94 | score = model.evaluate(X_test, Y_test, verbose=0)
95 | print('Test score:', score[0])
96 | print('Test accuracy:', score[1])
97 | 


--------------------------------------------------------------------------------
/layernorm/mnist_mlp.py:
--------------------------------------------------------------------------------
 1 | '''Trains a simple fully connected NN with Weight Normalzition on the MNIST dataset.
 2 | Modified from keras' examples/mnist_mlp.py
 3 | Gets to 98.28% test accuracy after 20 epochs using tensorflow backend
 4 | '''
 5 | 
 6 | from __future__ import print_function
 7 | import numpy as np
 8 | np.random.seed(1337)  # for reproducibility
 9 | 
10 | from keras.datasets import mnist
11 | from keras.models import Sequential
12 | from keras.layers.core import Dense, Dropout, Activation
13 | from keras.optimizers import SGD, Adam, RMSprop
14 | from keras.utils import np_utils
15 | 
16 | from layer_norm_layers import *
17 | 
18 | batch_size = 128
19 | nb_classes = 10
20 | epochs = 20
21 | 
22 | # the data, shuffled and split between train and test sets
23 | (X_train, y_train), (X_test, y_test) = mnist.load_data()
24 | 
25 | X_train = X_train.reshape(60000, 784)
26 | X_test = X_test.reshape(10000, 784)
27 | X_train = X_train.astype('float32')
28 | X_test = X_test.astype('float32')
29 | X_train /= 255
30 | X_test /= 255
31 | print(X_train.shape[0], 'train samples')
32 | print(X_test.shape[0], 'test samples')
33 | 
34 | # convert class vectors to binary class matrices
35 | Y_train = np_utils.to_categorical(y_train, nb_classes)
36 | Y_test = np_utils.to_categorical(y_test, nb_classes)
37 | 
38 | model = Sequential()
39 | model.add(Dense(512, input_shape=(784,)))
40 | model.add(LayerNormalization())
41 | model.add(Activation('relu'))
42 | model.add(Dropout(0.2))
43 | model.add(Dense(512))
44 | model.add(LayerNormalization())
45 | model.add(Activation('relu'))
46 | model.add(Dropout(0.2))
47 | model.add(Dense(10))
48 | model.add(LayerNormalization())
49 | model.add(Activation('softmax'))
50 | 
51 | model.summary()
52 | 
53 | model.compile(loss='categorical_crossentropy',
54 |               optimizer=RMSprop(),
55 |               metrics=['accuracy'])
56 | 
57 | history = model.fit(X_train, Y_train,
58 |                     batch_size=batch_size, epochs=epochs,
59 |                     verbose=1, validation_data=(X_test, Y_test))
60 | score = model.evaluate(X_test, Y_test, verbose=0)
61 | print('Test score:', score[0])
62 | print('Test accuracy:', score[1])
63 | 


--------------------------------------------------------------------------------
/lsgan/README.md:
--------------------------------------------------------------------------------
1 | Toy keras implementation of Least Squares GAN (LSGAN) on MNIST
2 | 
3 | ## Reference
4 | * Mao et al. [Least Squares Generative Adversarial Networks](https://www.arxiv.org/abs/1611.04076)
5 | 


--------------------------------------------------------------------------------
/lsgan/lsgan_mlp.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Train a Least Square  Generative Adversarial Network (WGAN) on the MNIST
  5 | """
  6 | from __future__ import print_function
  7 | from PIL import Image
  8 | from six.moves import range
  9 | 
 10 | import keras.backend as K
 11 | K.set_image_data_format('channels_first')
 12 | 
 13 | from keras.datasets import mnist
 14 | from keras.layers import Input, Dense, Reshape, Flatten, Dropout, Activation, BatchNormalization
 15 | from keras.models import Sequential, Model
 16 | from keras.optimizers import RMSprop, Adam
 17 | from keras.utils.generic_utils import Progbar
 18 | 
 19 | import numpy as np
 20 | np.random.seed(1337)
 21 | 
 22 | 
 23 | def build_generator(latent_size):
 24 |     model = Sequential()
 25 |     model.add(Dense(1024, input_dim=latent_size, activation='relu'))
 26 |     model.add(Dense(28 * 28, activation='tanh'))
 27 |     model.add(Reshape((1, 28, 28)))
 28 | 
 29 |     return model 
 30 | 
 31 | 
 32 | def build_discriminator():
 33 |     f = Sequential()
 34 |     f.add(Flatten(input_shape=(1, 28, 28)))
 35 |     f.add(Dense(256))
 36 |     f.add(Activation('relu'))
 37 |     f.add(Dense(128))
 38 |     f.add(Activation('relu'))
 39 |     f.add(Dense(1, activation='linear'))
 40 | 
 41 |     image = Input(shape=(1, 28, 28))
 42 |     score = f(image)
 43 | 
 44 |     model = Model(image, score)
 45 | 
 46 |     return model
 47 | 
 48 | 
 49 | if __name__ == '__main__':
 50 | 
 51 |     epochs = 5000
 52 |     batch_size = 50
 53 |     latent_size = 20
 54 | 
 55 |     lr = 0.001
 56 | 
 57 |     # build the discriminator
 58 |     disc = build_discriminator()
 59 |     disc.compile(
 60 |         optimizer=Adam(lr=lr),
 61 |         loss='mse'
 62 |     )
 63 | 
 64 |     # build the generator
 65 |     generator = build_generator(latent_size)
 66 | 
 67 |     latent = Input(shape=(latent_size, ))
 68 |     # get a fake image
 69 |     fake = generator(latent)
 70 |     # we only want to be able to train generation for the combined model
 71 |     disc.trainable = False
 72 |     fake = disc(fake)
 73 |     combined = Model(inputs=latent, outputs=fake)
 74 |     combined.compile(
 75 |         optimizer=Adam(lr=lr),
 76 |         loss='mse'
 77 |     )
 78 | 
 79 |     # get our mnist data, and force it to be of shape (..., 1, 28, 28) with
 80 |     # range [-1, 1]
 81 |     (X_train, y_train), (X_test, y_test) = mnist.load_data()
 82 |     X_train = (X_train.astype(np.float32) - 127.5) / 127.5
 83 |     X_train = np.expand_dims(X_train, axis=1)
 84 | 
 85 |     X_test = (X_test.astype(np.float32) - 127.5) / 127.5
 86 |     X_test = np.expand_dims(X_test, axis=1)
 87 | 
 88 |     nb_train, nb_test = X_train.shape[0], X_test.shape[0]
 89 | 
 90 |     for epoch in range(epochs):
 91 |         print('Epoch {} of {}'.format(epoch + 1, epochs))
 92 | 
 93 |         nb_batches = int(X_train.shape[0] / batch_size)
 94 |         progress_bar = Progbar(target=nb_batches)
 95 | 
 96 |         epoch_disc_loss = []
 97 |         epoch_gen_loss = []
 98 | 
 99 |         index = 0
100 |         while index < nb_batches:
101 |             ## discrminator
102 |             progress_bar.update(index)
103 |             index += 1
104 | 
105 |             # generate a new batch of noise
106 |             noise = np.random.uniform(-1, 1, (batch_size, latent_size))
107 |             # generate a batch of fake images
108 |             generated_images = generator.predict(noise, verbose=0)
109 | 
110 |             # get a batch of real images
111 |             image_batch = X_train[index * batch_size:(index + 1) * batch_size]
112 |             label_batch = y_train[index * batch_size:(index + 1) * batch_size]
113 | 
114 |             X = np.concatenate((image_batch, generated_images))
115 |             # a == 0, b == 1
116 |             y = np.array([1] * len(image_batch) + [0] * batch_size)
117 | 
118 |             epoch_disc_loss.append(disc.train_on_batch(X, y))
119 | 
120 | 
121 |             ## generator
122 |             # make new noise. we generate 2 * batch size here such that we have
123 |             # the generator optimize over an identical number of images as the
124 |             noise = np.random.uniform(-1, 1, (batch_size, latent_size))
125 |             target = np.ones(batch_size) # c == b == 1, cf. Eq. (9)
126 |             epoch_gen_loss.append(combined.train_on_batch(noise, target))
127 | 
128 |         print('\n[Loss_D: {:.3f}, Loss_G: {:.3f}]'.format(np.mean(epoch_disc_loss), np.mean(epoch_gen_loss)))
129 | 
130 |         # save weights every epoch
131 |         if False:
132 |             generator.save_weights(
133 |                 'mlp_generator_epoch_{0:03d}.hdf5'.format(epoch), True)
134 |             disc.save_weights(
135 |                 'mlp_disc_epoch_{0:03d}.hdf5'.format(epoch), True)
136 | 
137 |         # generate some digits to display
138 |         noise = np.random.uniform(-1, 1, (100, latent_size))
139 |         # get a batch to display
140 |         generated_images = generator.predict(noise, verbose=0)
141 | 
142 |         # arrange them into a grid
143 |         img = (np.concatenate([r.reshape(-1, 28)
144 |                                for r in np.split(generated_images, 10)
145 |                                ], axis=-1) * 127.5 + 127.5).astype(np.uint8)
146 | 
147 |         Image.fromarray(img).save(
148 |             'mlp_epoch_{0:03d}_generated.png'.format(epoch))
149 | 


--------------------------------------------------------------------------------
/qrnn/README.md:
--------------------------------------------------------------------------------
1 | A trial implemention of QRNN-fo with dropout for Keras
2 | 
3 | ## Run the demo:
4 | python imbd_qrnn.py
5 | 
6 | ## Reference
7 | * Bradbury et al. [Quasi-recurrent Neural Networks](https://arxiv.org/abs/1611.01576)
8 | 


--------------------------------------------------------------------------------
/qrnn/imbd_qrnn.py:
--------------------------------------------------------------------------------
 1 | '''Trains a QRNN on the IMDB sentiment classification task.
 2 | Modified from keras' examples/imbd_lstm.py.
 3 | '''
 4 | from __future__ import print_function
 5 | import numpy as np
 6 | np.random.seed(1337)  # for reproducibility
 7 | 
 8 | from keras.preprocessing import sequence
 9 | from keras.utils import np_utils
10 | from keras.models import Sequential
11 | from keras.layers import Dense, Dropout, Activation, Embedding, SpatialDropout1D
12 | from keras.layers import LSTM, SimpleRNN, GRU
13 | from keras.regularizers import l2
14 | from keras.constraints import maxnorm
15 | from keras.datasets import imdb
16 | 
17 | from qrnn import QRNN
18 | 
19 | max_features = 20000
20 | maxlen = 80  # cut texts after this number of words (among top max_features most common words)
21 | batch_size = 32 
22 | 
23 | print('Build model...')
24 | model = Sequential()
25 | model.add(Embedding(max_features, 128))
26 | model.add(SpatialDropout1D(0.2))
27 | model.add(QRNN(128, window_size=3, dropout=0.2, 
28 |                kernel_regularizer=l2(1e-4), bias_regularizer=l2(1e-4), 
29 |                kernel_constraint=maxnorm(10), bias_constraint=maxnorm(10)))
30 | model.add(Dense(1))
31 | model.add(Activation('sigmoid'))
32 | 
33 | # try using different optimizers and different optimizer configs
34 | model.compile(loss='binary_crossentropy',
35 |               optimizer='adam',
36 |               metrics=['accuracy'])
37 | 
38 | print('Loading data...')
39 | (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
40 | print(len(X_train), 'train sequences')
41 | print(len(X_test), 'test sequences')
42 | 
43 | print('Pad sequences (samples x time)')
44 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
45 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
46 | print('X_train shape:', X_train.shape)
47 | print('X_test shape:', X_test.shape)
48 | 
49 | print('Train...')
50 | model.fit(X_train, y_train, batch_size=batch_size, epochs=15,
51 |           validation_data=(X_test, y_test))
52 | score, acc = model.evaluate(X_test, y_test,
53 |                             batch_size=batch_size)
54 | print('Test score:', score)
55 | print('Test accuracy:', acc)
56 | 


--------------------------------------------------------------------------------
/qrnn/imbd_qrnn_Bidirecional.py:
--------------------------------------------------------------------------------
 1 | '''Trains a QRNN on the IMDB sentiment classification task.
 2 | Modified from keras' examples/imbd_lstm.py.
 3 | '''
 4 | from __future__ import print_function
 5 | import numpy as np
 6 | np.random.seed(1337)  # for reproducibility
 7 | 
 8 | from keras.preprocessing import sequence
 9 | from keras.utils import np_utils
10 | from keras.models import Sequential
11 | from keras.layers import Dense, Dropout, Activation, Embedding, SpatialDropout1D
12 | from keras.layers import LSTM, SimpleRNN, GRU, Bidirecional
13 | from keras.regularizers import l2
14 | from keras.constraints import maxnorm
15 | from keras.datasets import imdb
16 | 
17 | from qrnn import QRNN
18 | 
19 | max_features = 20000
20 | maxlen = 80  # cut texts after this number of words (among top max_features most common words)
21 | batch_size = 32 
22 | 
23 | print('Build model...')
24 | model = Sequential()
25 | model.add(Embedding(max_features, 128))
26 | model.add(SpatialDropout1D(0.2))
27 | model.add(Bidirecional(QRNN(128, window_size=3, dropout=0.2, 
28 |                kernel_regularizer=l2(1e-4), bias_regularizer=l2(1e-4), 
29 |                kernel_constraint=maxnorm(10), bias_constraint=maxnorm(10))))
30 | model.add(Dense(1))
31 | model.add(Activation('sigmoid'))
32 | 
33 | # try using different optimizers and different optimizer configs
34 | model.compile(loss='binary_crossentropy',
35 |               optimizer='adam',
36 |               metrics=['accuracy'])
37 | 
38 | print('Loading data...')
39 | (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
40 | print(len(X_train), 'train sequences')
41 | print(len(X_test), 'test sequences')
42 | 
43 | print('Pad sequences (samples x time)')
44 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
45 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
46 | print('X_train shape:', X_train.shape)
47 | print('X_test shape:', X_test.shape)
48 | 
49 | print('Train...')
50 | model.fit(X_train, y_train, batch_size=batch_size, epochs=15,
51 |           validation_data=(X_test, y_test))
52 | score, acc = model.evaluate(X_test, y_test,
53 |                             batch_size=batch_size)
54 | print('Test score:', score)
55 | print('Test accuracy:', acc)
56 | 


--------------------------------------------------------------------------------
/qrnn/qrnn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | 
  5 | from keras import backend as K
  6 | from keras import activations, initializers, regularizers, constraints
  7 | from keras.layers import Layer, InputSpec
  8 | 
  9 | from keras.utils.conv_utils import conv_output_length
 10 | 
 11 | import theano
 12 | import theano.tensor as T
 13 | 
 14 | 
 15 | def _dropout(x, level, noise_shape=None, seed=None):
 16 |     x = K.dropout(x, level, noise_shape, seed)
 17 |     x *= (1. - level) # compensate for the scaling by the dropout
 18 |     return x
 19 | 
 20 | 
 21 | class QRNN(Layer):
 22 |     '''Quasi RNN
 23 | 
 24 |     # Arguments
 25 |         units: dimension of the internal projections and the final output.
 26 | 
 27 |     # References
 28 |         - [Quasi-recurrent Neural Networks](http://arxiv.org/abs/1611.01576)
 29 |     '''
 30 |     def __init__(self, units, window_size=2, stride=1,
 31 |                  return_sequences=False, go_backwards=False, 
 32 |                  stateful=False, unroll=False, activation='tanh',
 33 |                  kernel_initializer='uniform', bias_initializer='zero',
 34 |                  kernel_regularizer=None, bias_regularizer=None,
 35 |                  activity_regularizer=None,
 36 |                  kernel_constraint=None, bias_constraint=None, 
 37 |                  dropout=0, use_bias=True, input_dim=None, input_length=None,
 38 |                  **kwargs):
 39 |         self.return_sequences = return_sequences
 40 |         self.go_backwards = go_backwards
 41 |         self.stateful = stateful
 42 |         self.unroll = unroll
 43 | 
 44 |         self.units = units 
 45 |         self.window_size = window_size
 46 |         self.strides = (stride, 1)
 47 | 
 48 |         self.use_bias = use_bias
 49 |         self.activation = activations.get(activation)
 50 |         self.kernel_initializer = initializers.get(kernel_initializer)
 51 |         self.bias_initializer = initializers.get(bias_initializer)
 52 |         self.kernel_regularizer = regularizers.get(kernel_regularizer)
 53 |         self.bias_regularizer = regularizers.get(bias_regularizer)
 54 |         self.activity_regularizer = regularizers.get(activity_regularizer)
 55 |         self.kernel_constraint = constraints.get(kernel_constraint)
 56 |         self.bias_constraint = constraints.get(bias_constraint)
 57 | 
 58 |         self.recurrent_dropout = 0 #not used, added to maintain compatibility with keras.Bidirectional
 59 |         self.dropout = dropout
 60 |         self.supports_masking = True
 61 |         self.input_spec = [InputSpec(ndim=3)]
 62 |         self.input_dim = input_dim
 63 |         self.input_length = input_length
 64 |         if self.input_dim:
 65 |             kwargs['input_shape'] = (self.input_length, self.input_dim)
 66 |         super(QRNN, self).__init__(**kwargs)
 67 | 
 68 |     def build(self, input_shape):
 69 |         if isinstance(input_shape, list):
 70 |             input_shape = input_shape[0]
 71 | 
 72 |         batch_size = input_shape[0] if self.stateful else None
 73 |         self.input_dim = input_shape[2]
 74 |         self.input_spec = InputSpec(shape=(batch_size, None, self.input_dim))
 75 |         self.state_spec = InputSpec(shape=(batch_size, self.units))
 76 | 
 77 |         self.states = [None]
 78 |         if self.stateful:
 79 |             self.reset_states()
 80 | 
 81 |         kernel_shape = (self.window_size, 1, self.input_dim, self.units * 3)
 82 |         self.kernel = self.add_weight(name='kernel',
 83 |                                       shape=kernel_shape,
 84 |                                       initializer=self.kernel_initializer,
 85 |                                       regularizer=self.kernel_regularizer,
 86 |                                       constraint=self.kernel_constraint)
 87 |         if self.use_bias:
 88 |             self.bias = self.add_weight(name='bias', 
 89 |                                         shape=(self.units * 3,),
 90 |                                         initializer=self.bias_initializer,
 91 |                                         regularizer=self.bias_regularizer,
 92 |                                         constraint=self.bias_constraint)
 93 | 
 94 |         self.built = True
 95 | 
 96 |     def compute_output_shape(self, input_shape):
 97 |         if isinstance(input_shape, list):
 98 |             input_shape = input_shape[0]
 99 | 
100 |         length = input_shape[1]
101 |         if length:
102 |             length = conv_output_length(length + self.window_size - 1,
103 |                                         self.window_size, 'valid',
104 |                                         self.strides[0])
105 |         if self.return_sequences:
106 |             return (input_shape[0], length, self.units)
107 |         else:
108 |             return (input_shape[0], self.units)
109 | 
110 |     def compute_mask(self, inputs, mask):
111 |         if self.return_sequences:
112 |             return mask
113 |         else:
114 |             return None
115 | 
116 |     def get_initial_states(self, inputs):
117 |         # build an all-zero tensor of shape (samples, units)
118 |         initial_state = K.zeros_like(inputs)  # (samples, timesteps, input_dim)
119 |         initial_state = K.sum(initial_state, axis=(1, 2))  # (samples,)
120 |         initial_state = K.expand_dims(initial_state)  # (samples, 1)
121 |         initial_state = K.tile(initial_state, [1, self.units])  # (samples, units)
122 |         initial_states = [initial_state for _ in range(len(self.states))]
123 |         return initial_states
124 | 
125 |     def reset_states(self, states=None):
126 |         if not self.stateful:
127 |             raise AttributeError('Layer must be stateful.')
128 |         if not self.input_spec:
129 |             raise RuntimeError('Layer has never been called '
130 |                                'and thus has no states.')
131 | 
132 |         batch_size = self.input_spec.shape[0]
133 |         if not batch_size:
134 |             raise ValueError('If a QRNN is stateful, it needs to know '
135 |                              'its batch size. Specify the batch size '
136 |                              'of your input tensors: \n'
137 |                              '- If using a Sequential model, '
138 |                              'specify the batch size by passing '
139 |                              'a `batch_input_shape` '
140 |                              'argument to your first layer.\n'
141 |                              '- If using the functional API, specify '
142 |                              'the time dimension by passing a '
143 |                              '`batch_shape` argument to your Input layer.')
144 | 
145 |         if self.states[0] is None:
146 |             self.states = [K.zeros((batch_size, self.units))
147 |                            for _ in self.states]
148 |         elif states is None:
149 |             for state in self.states:
150 |                 K.set_value(state, np.zeros((batch_size, self.units)))
151 |         else:
152 |             if not isinstance(states, (list, tuple)):
153 |                 states = [states]
154 |             if len(states) != len(self.states):
155 |                 raise ValueError('Layer ' + self.name + ' expects ' +
156 |                                  str(len(self.states)) + ' states, '
157 |                                  'but it received ' + str(len(states)) +
158 |                                  'state values. Input received: ' +
159 |                                  str(states))
160 |             for index, (value, state) in enumerate(zip(states, self.states)):
161 |                 if value.shape != (batch_size, self.units):
162 |                     raise ValueError('State ' + str(index) +
163 |                                      ' is incompatible with layer ' +
164 |                                      self.name + ': expected shape=' +
165 |                                      str((batch_size, self.units)) +
166 |                                      ', found shape=' + str(value.shape))
167 |                 K.set_value(state, value)
168 | 
169 |     def __call__(self, inputs, initial_state=None, **kwargs):
170 |         # If `initial_state` is specified,
171 |         # and if it a Keras tensor,
172 |         # then add it to the inputs and temporarily
173 |         # modify the input spec to include the state.
174 |         if initial_state is not None:
175 |             if hasattr(initial_state, '_keras_history'):
176 |                 # Compute the full input spec, including state
177 |                 input_spec = self.input_spec
178 |                 state_spec = self.state_spec
179 |                 if not isinstance(state_spec, list):
180 |                     state_spec = [state_spec]
181 |                 self.input_spec = [input_spec] + state_spec
182 | 
183 |                 # Compute the full inputs, including state
184 |                 if not isinstance(initial_state, (list, tuple)):
185 |                     initial_state = [initial_state]
186 |                 inputs = [inputs] + list(initial_state)
187 | 
188 |                 # Perform the call
189 |                 output = super(QRNN, self).__call__(inputs, **kwargs)
190 | 
191 |                 # Restore original input spec
192 |                 self.input_spec = input_spec
193 |                 return output
194 |             else:
195 |                 kwargs['initial_state'] = initial_state
196 |         return super(QRNN, self).__call__(inputs, **kwargs)
197 | 
198 |     def call(self, inputs, mask=None, initial_state=None, training=None):
199 |         # input shape: `(samples, time (padded with zeros), input_dim)`
200 |         # note that the .build() method of subclasses MUST define
201 |         # self.input_spec and self.state_spec with complete input shapes.
202 |         if isinstance(inputs, list):
203 |             initial_states = inputs[1:]
204 |             inputs = inputs[0]
205 |         elif initial_state is not None:
206 |             pass
207 |         elif self.stateful:
208 |             initial_states = self.states
209 |         else:
210 |             initial_states = self.get_initial_states(inputs)
211 | 
212 |         if len(initial_states) != len(self.states):
213 |             raise ValueError('Layer has ' + str(len(self.states)) +
214 |                              ' states but was passed ' +
215 |                              str(len(initial_states)) +
216 |                              ' initial states.')
217 |         input_shape = K.int_shape(inputs)
218 |         if self.unroll and input_shape[1] is None:
219 |             raise ValueError('Cannot unroll a RNN if the '
220 |                              'time dimension is undefined. \n'
221 |                              '- If using a Sequential model, '
222 |                              'specify the time dimension by passing '
223 |                              'an `input_shape` or `batch_input_shape` '
224 |                              'argument to your first layer. If your '
225 |                              'first layer is an Embedding, you can '
226 |                              'also use the `input_length` argument.\n'
227 |                              '- If using the functional API, specify '
228 |                              'the time dimension by passing a `shape` '
229 |                              'or `batch_shape` argument to your Input layer.')
230 |         constants = self.get_constants(inputs, training=None)
231 |         preprocessed_input = self.preprocess_input(inputs, training=None)
232 | 
233 |         last_output, outputs, states = K.rnn(self.step, preprocessed_input,
234 |                                             initial_states,
235 |                                             go_backwards=self.go_backwards,
236 |                                             mask=mask,
237 |                                             constants=constants,
238 |                                             unroll=self.unroll,
239 |                                             input_length=input_shape[1])
240 |         if self.stateful:
241 |             updates = []
242 |             for i in range(len(states)):
243 |                 updates.append((self.states[i], states[i]))
244 |             self.add_update(updates, inputs)
245 | 
246 |         # Properly set learning phase
247 |         if 0 < self.dropout < 1:
248 |             last_output._uses_learning_phase = True
249 |             outputs._uses_learning_phase = True
250 | 
251 |         if self.return_sequences:
252 |             return outputs
253 |         else:
254 |             return last_output
255 | 
256 |     def preprocess_input(self, inputs, training=None):
257 |         if self.window_size > 1:
258 |             inputs = K.temporal_padding(inputs, (self.window_size-1, 0))
259 |         inputs = K.expand_dims(inputs, 2)  # add a dummy dimension
260 | 
261 |         output = K.conv2d(inputs, self.kernel, strides=self.strides,
262 |                           padding='valid',
263 |                           data_format='channels_last')
264 |         output = K.squeeze(output, 2)  # remove the dummy dimension
265 |         if self.use_bias:
266 |             output = K.bias_add(output, self.bias, data_format='channels_last')
267 | 
268 |         if self.dropout is not None and 0. < self.dropout < 1.:
269 |             z = output[:, :, :self.units]
270 |             f = output[:, :, self.units:2 * self.units]
271 |             o = output[:, :, 2 * self.units:]
272 |             f = K.in_train_phase(1 - _dropout(1 - f, self.dropout), f, training=training)
273 |             return K.concatenate([z, f, o], -1)
274 |         else:
275 |             return output
276 | 
277 |     def step(self, inputs, states):
278 |         prev_output = states[0]
279 | 
280 |         z = inputs[:, :self.units]
281 |         f = inputs[:, self.units:2 * self.units]
282 |         o = inputs[:, 2 * self.units:]
283 | 
284 |         z = self.activation(z)
285 |         f = f if self.dropout is not None and 0. < self.dropout < 1. else K.sigmoid(f)
286 |         o = K.sigmoid(o)
287 | 
288 |         output = f * prev_output + (1 - f) * z
289 |         output = o * output
290 | 
291 |         return output, [output]
292 | 
293 |     def get_constants(self, inputs, training=None):
294 |         return []
295 |  
296 |     def get_config(self):
297 |         config = {'units': self.units,
298 |                   'window_size': self.window_size,
299 |                   'stride': self.strides[0],
300 |                   'return_sequences': self.return_sequences,
301 |                   'go_backwards': self.go_backwards,
302 |                   'stateful': self.stateful,
303 |                   'unroll': self.unroll,
304 |                   'use_bias': self.use_bias,
305 |                   'dropout': self.dropout,
306 |                   'activation': activations.serialize(self.activation),
307 |                   'kernel_initializer': initializers.serialize(self.kernel_initializer),
308 |                   'bias_initializer': initializers.serialize(self.bias_initializer),
309 |                   'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
310 |                   'bias_regularizer': regularizers.serialize(self.bias_regularizer),
311 |                   'activity_regularizer': regularizers.serialize(self.activity_regularizer),
312 |                   'kernel_constraint': constraints.serialize(self.kernel_constraint),
313 |                   'bias_constraint': constraints.serialize(self.bias_constraint),
314 |                   'input_dim': self.input_dim,
315 |                   'input_length': self.input_length}
316 |         base_config = super(QRNN, self).get_config()
317 |         return dict(list(base_config.items()) + list(config.items()))
318 | 


--------------------------------------------------------------------------------
/senet/README.md:
--------------------------------------------------------------------------------
1 | Squeese & Excitating wrapper (a simple gating mechanism) for 2D convolution (4D data).
2 | 
3 | ## References:
4 | * Hu et al. [Squeeze-and-Excitation Networks](https://arxiv.org/abs/1709.01507)
5 | * Official Repo (Caffe): https://github.com/hujie-frank/SENet
6 | 


--------------------------------------------------------------------------------
/senet/layers.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from keras import backend as K
  3 | from keras import initializers, constraints, regularizers
  4 | from keras.layers import InputSpec, Layer
  5 | 
  6 | from keras.utils import conv_utils
  7 | 
  8 | 
  9 | class SE(Layer):
 10 |     def __init__(self, 
 11 |                  ratio, 
 12 |                  data_format=None,
 13 |                  use_bias=True,
 14 |                  kernel_initializer='glorot_uniform',
 15 |                  bias_initializer='zeros',
 16 |                  kernel_regularizer=None,
 17 |                  bias_regularizer=None,
 18 |                  activity_regularizer=None,
 19 |                  kernel_constraint=None,
 20 |                  bias_constraint=None,
 21 |                  **kwargs):
 22 |         super(SE, self).__init__(**kwargs)
 23 | 
 24 |         self.ratio = ratio
 25 |         self.data_format= conv_utils.normalize_data_format(data_format)
 26 | 
 27 |         self.use_bias = use_bias
 28 |         self.kernel_initializer = initializers.get(kernel_initializer)
 29 |         self.bias_initializer = initializers.get(bias_initializer)
 30 |         self.kernel_regularizer = regularizers.get(kernel_regularizer)
 31 |         self.bias_regularizer = regularizers.get(bias_regularizer)
 32 |         self.activity_regularizer = regularizers.get(activity_regularizer)
 33 |         self.kernel_constraint = constraints.get(kernel_constraint)
 34 |         self.bias_constraint = constraints.get(bias_constraint)
 35 |         self.supports_masking = True
 36 | 
 37 |     def build(self, input_shape):
 38 |         assert len(input_shape) == 4
 39 |         self.input_spec = InputSpec(shape=input_shape)
 40 | 
 41 |         if self.data_format == 'channels_first':
 42 |             channel_axis = 1
 43 |         else:
 44 |             channel_axis = 3  
 45 |         channels = input_shape[channel_axis]
 46 | 
 47 |         self.kernel1 = self.add_weight(shape=(channels, channels // self.ratio),
 48 |                                       initializer=self.kernel_initializer,
 49 |                                       name='kernel1',
 50 |                                       regularizer=self.kernel_regularizer,
 51 |                                       constraint=self.kernel_constraint)
 52 |         if self.use_bias:
 53 |             self.bias1 = self.add_weight(shape=(channels // self.ratio,),
 54 |                                          initializer=self.bias_initializer,
 55 |                                          name='bias1',
 56 |                                          regularizer=self.bias_regularizer,
 57 |                                          constraint=self.bias_constraint)
 58 |         else:
 59 |             self.bias1 = None
 60 | 
 61 |         self.kernel2 = self.add_weight(shape=(channels // self.ratio, channels),
 62 |                                       initializer=self.kernel_initializer,
 63 |                                       name='kernel2',
 64 |                                       regularizer=self.kernel_regularizer,
 65 |                                       constraint=self.kernel_constraint)
 66 |         if self.use_bias:
 67 |             self.bias2 = self.add_weight(shape=(channels,),
 68 |                                          initializer=self.bias_initializer,
 69 |                                          name='bias2',
 70 |                                          regularizer=self.bias_regularizer,
 71 |                                          constraint=self.bias_constraint)
 72 |         else:
 73 |             self.bias2 = None
 74 | 
 75 |         self.built = True
 76 | 
 77 |     def compute_output_shape(self, input_shape):
 78 |         return input_shape
 79 | 
 80 |     def call(self, inputs):
 81 |         if self.data_format == 'channels_first':
 82 |             sq = K.mean(inputs, [2, 3])
 83 |         else:
 84 |             sq = K.mean(inputs, [1, 2])
 85 | 
 86 |         ex = K.dot(sq, self.kernel1)
 87 |         if self.use_bias:
 88 |             ex = K.bias_add(ex, self.bias1)
 89 |         ex= K.relu(ex)
 90 | 
 91 |         ex = K.dot(ex, self.kernel2)
 92 |         if self.use_bias:
 93 |             ex = K.bias_add(ex, self.bias2)
 94 |         ex= K.sigmoid(ex)
 95 | 
 96 |         if self.data_format == 'channels_first':
 97 |             ex = K.expand_dims(ex, -1)
 98 |             ex = K.expand_dims(ex, -1)
 99 |         else:
100 |             ex = K.expand_dims(ex, 1)
101 |             ex = K.expand_dims(ex, 1)
102 | 
103 |         return inputs * ex
104 | 
105 |     def get_config(self):
106 |         config = {
107 |             'ratio': self.ratio,
108 |             'data_format': self.data_format,
109 |             'use_bias': self.use_bias,
110 |             'kernel_initializer': initializers.serialize(self.kernel_initializer),
111 |             'bias_initializer': initializers.serialize(self.bias_initializer),
112 |             'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
113 |             'bias_regularizer': regularizers.serialize(self.bias_regularizer),
114 |             'kernel_constraint': constraints.serialize(self.kernel_constraint),
115 |             'bias_constraint': constraints.serialize(self.bias_constraint)
116 |         }
117 |         base_config = super(SE, self).get_config()
118 |         return dict(list(base_config.items()) + list(config.items()))
119 | 


--------------------------------------------------------------------------------
/senet/mnist_cnn.py:
--------------------------------------------------------------------------------
 1 | '''Trains a simple convnet on the MNIST dataset.
 2 | 
 3 | Gets to 98.96% test accuracy after 12 epochs
 4 | '''
 5 | 
 6 | from __future__ import print_function
 7 | import keras
 8 | from keras.datasets import mnist
 9 | from keras.models import Sequential
10 | from keras.layers import Dense, Dropout, Flatten
11 | from keras.layers import Conv2D, MaxPooling2D
12 | from keras import backend as K
13 | 
14 | from layers import SE
15 | 
16 | 
17 | batch_size = 128
18 | num_classes = 10
19 | epochs = 12
20 | 
21 | ratio = 4
22 | 
23 | # input image dimensions
24 | img_rows, img_cols = 28, 28
25 | 
26 | # the data, shuffled and split between train and test sets
27 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
28 | 
29 | if K.image_data_format() == 'channels_first':
30 |     x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
31 |     x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
32 |     input_shape = (1, img_rows, img_cols)
33 | else:
34 |     x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
35 |     x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
36 |     input_shape = (img_rows, img_cols, 1)
37 | 
38 | x_train = x_train.astype('float32')
39 | x_test = x_test.astype('float32')
40 | x_train /= 255
41 | x_test /= 255
42 | print('x_train shape:', x_train.shape)
43 | print(x_train.shape[0], 'train samples')
44 | print(x_test.shape[0], 'test samples')
45 | 
46 | # convert class vectors to binary class matrices
47 | y_train = keras.utils.to_categorical(y_train, num_classes)
48 | y_test = keras.utils.to_categorical(y_test, num_classes)
49 | 
50 | model = Sequential()
51 | model.add(Conv2D(32, kernel_size=(3, 3),
52 |                  activation='relu',
53 |                  input_shape=input_shape))
54 | model.add(SE(ratio))
55 | model.add(Conv2D(64, (3, 3), activation='relu'))
56 | model.add(SE(ratio))
57 | model.add(MaxPooling2D(pool_size=(2, 2)))
58 | model.add(Dropout(0.25))
59 | model.add(Flatten())
60 | model.add(Dense(128, activation='relu'))
61 | model.add(Dropout(0.5))
62 | model.add(Dense(num_classes, activation='softmax'))
63 | 
64 | model.compile(loss=keras.losses.categorical_crossentropy,
65 |               optimizer=keras.optimizers.Adadelta(),
66 |               metrics=['accuracy'])
67 | 
68 | model.fit(x_train, y_train,
69 |           batch_size=batch_size,
70 |           epochs=epochs,
71 |           verbose=1,
72 |           validation_data=(x_test, y_test))
73 | score = model.evaluate(x_test, y_test, verbose=0)
74 | print('Test loss:', score[0])
75 | print('Test accuracy:', score[1])
76 | 


--------------------------------------------------------------------------------
/ternarynet/README.md:
--------------------------------------------------------------------------------
 1 | Ternary Recurrent Networks for Keras.
 2 | 
 3 | ## run
 4 | ### MLP
 5 | python mnist_mlp.py
 6 | 
 7 | ### CNN
 8 | python mnist_cnn.py
 9 | 
10 | ### RNN
11 | pyton imdb_lm.py
12 | 
13 | ## References 
14 | * Ott et al. [Recurrent Neural Networks with Limited Numerical Precision](http://arxiv.org/abs/1608.06902)
15 | * Li et al. [Ternary Weight Networks](http://arxiv.org/abs/1605.04711)
16 | 


--------------------------------------------------------------------------------
/ternarynet/imdb_generator.py:
--------------------------------------------------------------------------------
1 | ../gcnn/imdb_generator.py


--------------------------------------------------------------------------------
/ternarynet/imdb_lm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | '''Simple RNN for Language Model
 3 | '''
 4 | from __future__ import print_function
 5 | import os
 6 | 
 7 | from keras.models import Model
 8 | from keras.layers import Input, Embedding, Dense, TimeDistributed
 9 | from keras.optimizers import *
10 | 
11 | from ternary_layers import *
12 | from imdb_generator import IMDBLM
13 | 
14 | 
15 | def LM(batch_size, window_size=3, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1):
16 |     x = Input(batch_shape=(batch_size, None))
17 |     # mebedding
18 |     y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x)
19 |     for i in range(nb_layers-1):
20 |         y = TernaryRNN(hidden_dim, return_sequences=True, name='trnn{}'.format(i + 1))(y)
21 |     y = TernaryRNN(hidden_dim, return_sequences=True, name='trnn{}'.format(nb_layers))(y)
22 |     y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y)
23 | 
24 |     model = Model(input=x, output=y)
25 | 
26 |     return model
27 | 
28 | 
29 | def train_model():
30 |     batch_size = 32 
31 |     epochs = 100 
32 | 
33 |     vocsize = 2000 # top 2k
34 |     max_len = 30 
35 |     train_ratio = 0.99
36 | 
37 |     # Build model
38 |     model = LM(batch_size, vocsize=vocsize, nb_layers=1)
39 |     model.compile(optimizer='adam',
40 |                   loss='sparse_categorical_crossentropy')
41 | 
42 |     # Prepare data
43 |     path = './data/imdb-full.pkl'
44 |     # Train
45 |     train_gen = IMDBLM(path=path, max_len=max_len, vocab_size=vocsize, shuffle=True,
46 |                        which_set='train', train_ratio=train_ratio, batch_size=batch_size)
47 |     # Validation 
48 |     val_gen = IMDBLM(path=path, max_len=max_len, vocab_size=vocsize,
49 |                      which_set='validation', train_ratio=train_ratio, batch_size=batch_size)
50 | 
51 |     train_steps = 2000
52 |     val_steps = 200
53 | 
54 |     # Start training
55 |     model.summary()
56 |     model.fit_generator(train_gen(), steps_per_epoch=train_steps, 
57 |                         validation_data=val_gen(), validation_steps=val_steps,
58 |                         epochs=epochs, verbose=1)
59 | 
60 | 
61 | def run_demo():
62 |     train_model()
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     run_demo()
67 | 


--------------------------------------------------------------------------------
/ternarynet/mnist_cnn.py:
--------------------------------------------------------------------------------
  1 | '''Trains a simple ternarize CNN on the MNIST dataset.
  2 | Modified from keras' examples/mnist_mlp.py
  3 | Gets to % test accuracy after 20 epochs using tensorflow backend
  4 | '''
  5 | 
  6 | from __future__ import print_function
  7 | import numpy as np
  8 | np.random.seed(1337)  # for reproducibility
  9 | 
 10 | from keras.datasets import mnist
 11 | from keras.models import Sequential
 12 | from keras.layers import Dense, Dropout, Activation, BatchNormalization, MaxPooling2D
 13 | from keras.layers import Flatten
 14 | from keras.optimizers import SGD, Adam, RMSprop
 15 | from keras.callbacks import LearningRateScheduler
 16 | from keras.utils import np_utils
 17 | import keras.backend as K
 18 | 
 19 | K.set_image_data_format('channels_first')
 20 | 
 21 | from ternary_ops import ternarize
 22 | from ternary_layers import TernaryDense, TernaryConv2D
 23 | 
 24 | 
 25 | def ternary_tanh(x):
 26 |     x = K.clip(x, -1, 1)
 27 |     return ternarize(x)
 28 | 
 29 | H = 1.
 30 | kernel_lr_multiplier = 'Glorot'
 31 | 
 32 | # nn
 33 | batch_size = 50
 34 | epochs = 20 
 35 | nb_channel = 1
 36 | img_rows = 28 
 37 | img_cols = 28 
 38 | nb_filters = 32 
 39 | nb_conv = 3
 40 | nb_pool = 2
 41 | nb_hid = 128
 42 | nb_classes = 10
 43 | use_bias = False
 44 | 
 45 | # learning rate schedule
 46 | lr_start = 1e-3
 47 | lr_end = 1e-4
 48 | lr_decay = (lr_end / lr_start)**(1. / epochs)
 49 | 
 50 | # BN
 51 | epsilon = 1e-6
 52 | momentum = 0.9
 53 | 
 54 | # dropout
 55 | p1 = 0.25
 56 | p2 = 0.5
 57 | 
 58 | # the data, shuffled and split between train and test sets
 59 | (X_train, y_train), (X_test, y_test) = mnist.load_data()
 60 | 
 61 | X_train = X_train.reshape(60000, 1, 28, 28)
 62 | X_test = X_test.reshape(10000, 1, 28, 28)
 63 | X_train = X_train.astype('float32')
 64 | X_test = X_test.astype('float32')
 65 | X_train /= 255
 66 | X_test /= 255
 67 | print(X_train.shape[0], 'train samples')
 68 | print(X_test.shape[0], 'test samples')
 69 | 
 70 | # convert class vectors to binary class matrices
 71 | Y_train = np_utils.to_categorical(y_train, nb_classes) * 2 - 1 # -1 or 1 for hinge loss
 72 | Y_test = np_utils.to_categorical(y_test, nb_classes) * 2 - 1
 73 | 
 74 | 
 75 | model = Sequential()
 76 | # conv1
 77 | model.add(TernaryConv2D(128, kernel_size=(3, 3), input_shape=(nb_channel, img_rows, img_cols),
 78 |                         H=H, kernel_lr_multiplier=kernel_lr_multiplier, 
 79 |                         padding='same', use_bias=use_bias, name='conv1'))
 80 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn1'))
 81 | model.add(Activation(ternary_tanh, name='act1'))
 82 | # conv2
 83 | model.add(TernaryConv2D(128, kernel_size=(3, 3), H=H, kernel_lr_multiplier=kernel_lr_multiplier, 
 84 |                         padding='same', use_bias=use_bias, name='conv2'))
 85 | model.add(MaxPooling2D(pool_size=(2, 2), name='pool2'))
 86 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn2'))
 87 | model.add(Activation(ternary_tanh, name='act2'))
 88 | # conv3
 89 | model.add(TernaryConv2D(256, kernel_size=(3, 3,), H=H, kernel_lr_multiplier=kernel_lr_multiplier,
 90 |                         padding='same', use_bias=use_bias, name='conv3'))
 91 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn3'))
 92 | model.add(Activation(ternary_tanh, name='act3'))
 93 | # conv4
 94 | model.add(TernaryConv2D(256, kernel_size=(3, 3,), H=H, kernel_lr_multiplier=kernel_lr_multiplier,
 95 |                         padding='same', use_bias=use_bias, name='conv4'))
 96 | model.add(MaxPooling2D(pool_size=(2, 2), name='pool4'))
 97 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn4'))
 98 | model.add(Activation(ternary_tanh, name='act4'))
 99 | model.add(Flatten())
100 | # dense1
101 | model.add(TernaryDense(1024, H=H, kernel_lr_multiplier=kernel_lr_multiplier, use_bias=use_bias, name='dense5'))
102 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn5'))
103 | model.add(Activation(ternary_tanh, name='act5'))
104 | # dense2
105 | model.add(TernaryDense(nb_classes, H=H, kernel_lr_multiplier=kernel_lr_multiplier, use_bias=use_bias, name='dense6'))
106 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn6'))
107 | 
108 | opt = Adam(lr=lr_start) 
109 | model.compile(loss='squared_hinge', optimizer=opt, metrics=['acc'])
110 | model.summary()
111 | 
112 | lr_scheduler = LearningRateScheduler(lambda e: lr_start * lr_decay ** e)
113 | history = model.fit(X_train, Y_train,
114 |                     batch_size=batch_size, epochs=epochs,
115 |                     verbose=1, validation_data=(X_test, Y_test),
116 |                     callbacks=[lr_scheduler])
117 | score = model.evaluate(X_test, Y_test, verbose=0)
118 | print('Test score:', score[0])
119 | print('Test accuracy:', score[1])
120 | 


--------------------------------------------------------------------------------
/ternarynet/mnist_mlp.py:
--------------------------------------------------------------------------------
  1 | '''Trains a simple binarize fully connected NN on the MNIST dataset.
  2 | Modified from keras' examples/mnist_mlp.py
  3 | Gets to 98.10% test accuracy after 20 epochs using theano backend
  4 | '''
  5 | 
  6 | 
  7 | from __future__ import print_function
  8 | import numpy as np
  9 | np.random.seed(1337)  # for reproducibility
 10 | 
 11 | import keras.backend as K
 12 | from keras.datasets import mnist
 13 | from keras.models import Sequential
 14 | from keras.layers import Dense, Dropout, Activation, BatchNormalization
 15 | from keras.optimizers import SGD, Adam, RMSprop
 16 | from keras.callbacks import LearningRateScheduler
 17 | from keras.utils import np_utils
 18 | 
 19 | from ternary_ops import ternarize
 20 | from ternary_layers import TernaryDense
 21 | 
 22 | 
 23 | class DropoutNoScale(Dropout):
 24 |     '''Keras Dropout does scale the input in training phase, which is undesirable here.
 25 |     '''
 26 |     def call(self, inputs, mask=None):
 27 |         if 0. < self.rate < 1.:
 28 |             noise_shape = self._get_noise_shape(inputs)
 29 |             inputs = K.in_train_phase(
 30 |                         K.dropout(inputs, self.rate, noise_shape) * (1. - self.rate), 
 31 |                         inputs)# multiplied by (1. - self.rate) for compensation
 32 |         return inputs 
 33 | 
 34 | 
 35 | def ternary_tanh(x):
 36 |     x = K.clip(x, -1, 1)
 37 |     return ternarize(x)
 38 | 
 39 | 
 40 | batch_size = 100
 41 | epochs = 20
 42 | nb_classes = 10
 43 | 
 44 | H = 'Glorot'
 45 | kernel_lr_multiplier = 'Glorot'
 46 | 
 47 | # network
 48 | num_unit = 2048
 49 | num_hidden = 3
 50 | use_bias = False
 51 | 
 52 | # learning rate schedule
 53 | lr_start = 1e-3
 54 | lr_end = 1e-4
 55 | lr_decay = (lr_end / lr_start)**(1. / epochs)
 56 | 
 57 | # BN
 58 | epsilon = 1e-6
 59 | momentum = 0.9
 60 | 
 61 | # dropout
 62 | drop_in = 0.2
 63 | drop_hidden = 0.5
 64 | 
 65 | # the data, shuffled and split between train and test sets
 66 | (X_train, y_train), (X_test, y_test) = mnist.load_data()
 67 | 
 68 | X_train = X_train.reshape(60000, 784)
 69 | X_test = X_test.reshape(10000, 784)
 70 | X_train = X_train.astype('float32')
 71 | X_test = X_test.astype('float32')
 72 | X_train /= 255
 73 | X_test /= 255
 74 | print(X_train.shape[0], 'train samples')
 75 | print(X_test.shape[0], 'test samples')
 76 | 
 77 | # convert class vectors to binary class matrices
 78 | Y_train = np_utils.to_categorical(y_train, nb_classes) * 2 - 1 # -1 or 1 for hinge loss
 79 | Y_test = np_utils.to_categorical(y_test, nb_classes) * 2 - 1
 80 | 
 81 | model = Sequential()
 82 | model.add(DropoutNoScale(drop_in, input_shape=(784,), name='drop0'))
 83 | for i in range(num_hidden):
 84 |     model.add(TernaryDense(num_unit, H=H, kernel_lr_multiplier=kernel_lr_multiplier, use_bias=use_bias,
 85 |               name='dense{}'.format(i+1)))
 86 |     model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn{}'.format(i+1)))
 87 |     model.add(Activation(ternary_tanh, name='act{}'.format(i+1)))
 88 |     model.add(DropoutNoScale(drop_hidden, name='drop{}'.format(i+1)))
 89 | model.add(TernaryDense(10, H=H, kernel_lr_multiplier=kernel_lr_multiplier, use_bias=use_bias,
 90 |           name='dense'))
 91 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn'))
 92 | 
 93 | model.summary()
 94 | 
 95 | opt = Adam(lr=lr_start) 
 96 | model.compile(loss='squared_hinge', optimizer=opt, metrics=['acc'])
 97 | 
 98 | lr_scheduler = LearningRateScheduler(lambda e: lr_start * lr_decay ** e)
 99 | history = model.fit(X_train, Y_train,
100 |                     batch_size=batch_size, epochs=epochs,
101 |                     verbose=1, validation_data=(X_test, Y_test),
102 |                     callbacks=[lr_scheduler])
103 | score = model.evaluate(X_test, Y_test, verbose=0)
104 | print('Test score:', score[0])
105 | print('Test accuracy:', score[1])
106 | 


--------------------------------------------------------------------------------
/ternarynet/ternary_layers.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import numpy as np
  3 | 
  4 | from keras import backend as K
  5 | 
  6 | from keras.layers import InputSpec, Dense, Conv2D, SimpleRNN
  7 | from keras import constraints
  8 | from keras import initializers
  9 | 
 10 | from ternary_ops import ternarize as ternarize, ternarize_dot
 11 | 
 12 | 
 13 | class Clip(constraints.Constraint):
 14 |     def __init__(self, min_value, max_value=None):
 15 |         self.min_value = min_value
 16 |         self.max_value = max_value
 17 |         if not self.max_value:
 18 |             self.max_value = -self.min_value
 19 |         if self.min_value > self.max_value:
 20 |             self.min_value, self.max_value = self.max_value, self.min_value
 21 | 
 22 |     def __call__(self, p):
 23 |         return K.clip(p, self.min_value, self.max_value)
 24 | 
 25 |     def get_config(self):
 26 |         return {"min_value": self.min_value,
 27 |                 "max_value": self.max_value}
 28 | 
 29 | 
 30 | class TernaryDense(Dense):
 31 |     ''' Ternarized Dense layer
 32 | 
 33 |     References: 
 34 |     - [Recurrent Neural Networks with Limited Numerical Precision](http://arxiv.org/abs/1608.06902}
 35 |     - [Ternary Weight Networks](http://arxiv.org/abs/1605.04711)
 36 |     '''
 37 |     def __init__(self, units, H=1., kernel_lr_multiplier='Glorot', bias_lr_multiplier=None, **kwargs):
 38 |         super(TernaryDense, self).__init__(units, **kwargs)
 39 |         self.H = H
 40 |         self.kernel_lr_multiplier = kernel_lr_multiplier
 41 |         self.bias_lr_multiplier = bias_lr_multiplier
 42 |         
 43 |     
 44 |     def build(self, input_shape):
 45 |         assert len(input_shape) >= 2
 46 |         input_dim = input_shape[1]
 47 | 
 48 |         if self.H == 'Glorot':
 49 |             self.H = np.float32(np.sqrt(1.5 / (input_dim + self.units)))
 50 |             #print('Glorot H: {}'.format(self.H))
 51 |         if self.kernel_lr_multiplier == 'Glorot':
 52 |             self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5 / (input_dim + self.units)))
 53 |             #print('Glorot learning rate multiplier: {}'.format(self.kernel_lr_multiplier))
 54 |             
 55 |         self.kernel_constraint = Clip(-self.H, self.H)
 56 |         self.kernel_initializer = initializers.RandomUniform(-self.H, self.H)
 57 |         self.kernel = self.add_weight(shape=(input_dim, self.units),
 58 |                                      initializer=self.kernel_initializer,
 59 |                                      name='kernel',
 60 |                                      regularizer=self.kernel_regularizer,
 61 |                                      constraint=self.kernel_constraint)
 62 | 
 63 |         if self.use_bias:
 64 |             self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier]
 65 |             self.bias = self.add_weight(shape=(self.output_dim,),
 66 |                                      initializer=self.bias_initializer,
 67 |                                      name='bias',
 68 |                                      regularizer=self.bias_regularizer,
 69 |                                      constraint=self.bias_constraint)
 70 |         else:
 71 |             self.lr_multipliers = [self.kernel_lr_multiplier]
 72 |             self.bias = None
 73 | 
 74 |         self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
 75 |         self.built = True
 76 | 
 77 |     def call(self, inputs):
 78 |         ternary_kernel = ternarize(self.kernel, H=self.H)
 79 |         output = K.dot(inputs, ternary_kernel)
 80 |         if self.use_bias:
 81 |             output = K.bias_add(output, self.bias)
 82 |         if self.activation is not None:
 83 |             output = self.activation(output)
 84 |         return output
 85 |         
 86 |     def get_config(self):
 87 |         config = {'H': self.H,
 88 |                   'kernel_lr_multiplier': self.kernel_lr_multiplier,
 89 |                   'bias_lr_multiplier': self.bias_lr_multiplier}
 90 |         base_config = super(TernaryDense, self).get_config()
 91 |         return dict(list(base_config.items()) + list(config.items()))
 92 | 
 93 | 
 94 | class TernaryConv2D(Conv2D):
 95 |     '''Ternarized Convolution2D layer
 96 |     References: 
 97 |     - [Recurrent Neural Networks with Limited Numerical Precision](http://arxiv.org/abs/1608.06902}
 98 |     - [Ternary Weight Networks](http://arxiv.org/abs/1605.04711)
 99 |     '''
100 |     def __init__(self, filters, kernel_lr_multiplier='Glorot', 
101 |                  bias_lr_multiplier=None, H=1., **kwargs):
102 |         super(TernaryConv2D, self).__init__(filters, **kwargs)
103 |         self.H = H
104 |         self.kernel_lr_multiplier = kernel_lr_multiplier
105 |         self.bias_lr_multiplier = bias_lr_multiplier
106 |         
107 |     def build(self, input_shape):
108 |         if self.data_format == 'channels_first':
109 |             channel_axis = 1
110 |         else:
111 |             channel_axis = -1 
112 |         if input_shape[channel_axis] is None:
113 |                 raise ValueError('The channel dimension of the inputs '
114 |                                  'should be defined. Found `None`.')
115 | 
116 |         input_dim = input_shape[channel_axis]
117 |         kernel_shape = self.kernel_size + (input_dim, self.filters)
118 |             
119 |         base = self.kernel_size[0] * self.kernel_size[1]
120 |         if self.H == 'Glorot':
121 |             nb_input = int(input_dim * base)
122 |             nb_output = int(self.filters * base)
123 |             self.H = np.float32(np.sqrt(1.5 / (nb_input + nb_output)))
124 |             #print('Glorot H: {}'.format(self.H))
125 |             
126 |         if self.kernel_lr_multiplier == 'Glorot':
127 |             nb_input = int(input_dim * base)
128 |             nb_output = int(self.filters * base)
129 |             self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5/ (nb_input + nb_output)))
130 |             #print('Glorot learning rate multiplier: {}'.format(self.lr_multiplier))
131 | 
132 |         self.kernel_constraint = Clip(-self.H, self.H)
133 |         self.kernel_initializer = initializers.RandomUniform(-self.H, self.H)
134 |         self.kernel = self.add_weight(shape=kernel_shape,
135 |                                  initializer=self.kernel_initializer,
136 |                                  name='kernel',
137 |                                  regularizer=self.kernel_regularizer,
138 |                                  constraint=self.kernel_constraint)
139 | 
140 |         if self.use_bias:
141 |             self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier]
142 |             self.bias = self.add_weight((self.output_dim,),
143 |                                      initializer=self.bias_initializers,
144 |                                      name='bias',
145 |                                      regularizer=self.bias_regularizer,
146 |                                      constraint=self.bias_constraint)
147 | 
148 |         else:
149 |             self.lr_multipliers = [self.kernel_lr_multiplier]
150 |             self.bias = None
151 | 
152 |         # Set input spec.
153 |         self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim})
154 |         self.built = True
155 | 
156 |     def call(self, inputs):
157 |         ternary_kernel = ternarize(self.kernel, H=self.H) 
158 |         outputs = K.conv2d(
159 |             inputs,
160 |             ternary_kernel,
161 |             strides=self.strides,
162 |             padding=self.padding,
163 |             data_format=self.data_format,
164 |             dilation_rate=self.dilation_rate)
165 | 
166 |         if self.use_bias:
167 |             outputs = K.bias_add(
168 |                 outputs,
169 |                 self.bias,
170 |                 data_format=self.data_format)
171 | 
172 |         if self.activation is not None:
173 |             return self.activation(outputs)
174 |         return outputs
175 |         
176 |     def get_config(self):
177 |         config = {'H': self.H,
178 |                   'kernel_lr_multiplier': self.kernel_lr_multiplier,
179 |                   'bias_lr_multiplier': self.bias_lr_multiplier}
180 |         base_config = super(TernaryConv2D, self).get_config()
181 |         return dict(list(base_config.items()) + list(config.items()))
182 | 
183 | 
184 | class TernaryRNN(SimpleRNN):
185 |     ''' Ternarized RNN layer
186 | 
187 |     References: 
188 |     - [Recurrent Neural Networks with Limited Numerical Precision](http://arxiv.org/abs/1608.06902}
189 |     '''
190 |     def preprocess_input(self, inputs, training=None):
191 |         return inputs
192 | 
193 |     def step(self, inputs, states):
194 |         if 0 < self.dropout < 1:
195 |             h = ternarize_dot(inputs * states[1], self.kernel)
196 |         else:
197 |             h = ternarize_dot(inputs, self.kernel)
198 |         if self.bias is not None:
199 |             h = K.bias_add(h, self.bias)
200 | 
201 |         prev_output = states[0]
202 |         if 0 < self.recurrent_dropout < 1:
203 |             prev_output *= states[2]
204 |         output = h + ternarize_dot(prev_output, self.recurrent_kernel)
205 |         if self.activation is not None:
206 |             output = self.activation(output)
207 | 
208 |         # Properly set learning phase on output tensor.
209 |         if 0 < self.dropout + self.recurrent_dropout:
210 |             output._uses_learning_phase = True
211 |         return output, [output]
212 | 
213 |     def get_constants(self, inputs, training=None):
214 |         constants = []
215 |         if 0 < self.dropout < 1:
216 |             input_shape = K.int_shape(inputs)
217 |             input_dim = input_shape[-1]
218 |             ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
219 |             ones = K.tile(ones, (1, int(input_dim)))
220 | 
221 |             def dropped_inputs():
222 |                 return K.dropout(ones, self.dropout)
223 | 
224 |             dp_mask = K.in_train_phase(dropped_inputs,
225 |                                        ones,
226 |                                        training=training)
227 |             constants.append(dp_mask)
228 |         else:
229 |             constants.append(K.cast_to_floatx(1.))
230 | 
231 |         if 0 < self.recurrent_dropout < 1:
232 |             ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
233 |             ones = K.tile(ones, (1, self.units))
234 | 
235 |             def dropped_inputs():
236 |                 return K.dropout(ones, self.recurrent_dropout)
237 |             rec_dp_mask = K.in_train_phase(dropped_inputs,
238 |                                            ones,
239 |                                            training=training)
240 |             constants.append(rec_dp_mask)
241 |         else:
242 |             constants.append(K.cast_to_floatx(1.))
243 |         return constants
244 | 
245 | # Aliases
246 | 
247 | TernaryConvolution2D = TernaryConv2D
248 | 


--------------------------------------------------------------------------------
/ternarynet/ternary_ops.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | import keras.backend as K
 4 | 
 5 | 
 6 | def switch(condition, t, e):
 7 |     if K.backend() == 'tensorflow':
 8 |         import tensorflow as tf
 9 |         return tf.where(condition, t, e)
10 |     elif K.backend() == 'theano':
11 |         import theano.tensor as tt
12 |         return tt.switch(condition, t, e)
13 | 
14 | 
15 | def _ternarize(W, H=1):
16 |     '''The weights' ternarization function, 
17 | 
18 |     # References:
19 |     - [Recurrent Neural Networks with Limited Numerical Precision](http://arxiv.org/abs/1608.06902)
20 |     - [Ternary Weight Networks](http://arxiv.org/abs/1605.04711)
21 |     '''
22 |     W /= H
23 | 
24 |     ones = K.ones_like(W)
25 |     zeros = K.zeros_like(W)
26 |     Wt = switch(W > 0.5, ones, switch(W <= -0.5, -ones, zeros))
27 | 
28 |     Wt *= H
29 | 
30 |     return Wt
31 | 
32 | 
33 | def ternarize(W, H=1):
34 |     '''The weights' ternarization function, 
35 | 
36 |     # References:
37 |     - [Recurrent Neural Networks with Limited Numerical Precision](http://arxiv.org/abs/1608.06902)
38 |     - [Ternary Weight Networks](http://arxiv.org/abs/1605.04711)
39 |     '''
40 |     Wt = _ternarize(W, H)
41 |     return W + K.stop_gradient(Wt - W)
42 | 
43 | 
44 | def ternarize_dot(x, W):
45 |     '''For RNN (maybe Dense or Conv too). 
46 |     Refer to 'Recurrent Neural Networks with Limited Numerical Precision' Section 3.1
47 |     '''
48 |     Wt = _ternarize(W)
49 |     return K.dot(x, W) + K.stop_gradient(K.dot(x, Wt - W))
50 | 


--------------------------------------------------------------------------------
/vae/README.md:
--------------------------------------------------------------------------------
 1 | Simple keras implementation of Variational Auto-Encoder (VAE) on MNIST
 2 | 
 3 | > This repo is with the [blog on VAE (in Chinese)](http://blog.csdn.net/jackytintin/article/details/53641885).
 4 | 
 5 | 
 6 | ## Learned latent space
 7 | ### MSE loss
 8 | ![](./img/z_mse.png)
 9 | ### crossentropy loss
10 | ![](./img/z_xent.png)
11 | 
12 | ## Learned manifold 
13 | ### MSE loss
14 | ![](./img/x_mse.png)
15 | ### crossentropy loss
16 | ![](./img/x_xent.png)
17 | 
18 | 
19 | ## Imputation
20 | ### MSE loss
21 | ![](./img/i_mse.png)
22 | ### crossentropy loss
23 | ![](./img/i_xent.png)
24 | 
25 | ## Refenreces:
26 | * Kingma et al. [https://arxiv.org/abs/1312.6114](Auto-Encoding Variational Bayes).
27 | 


--------------------------------------------------------------------------------
/vae/img/i_mse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingKe/nn_playground/8254491a79bd0e02d392f250b7da07bccc7cea2a/vae/img/i_mse.png


--------------------------------------------------------------------------------
/vae/img/i_xent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingKe/nn_playground/8254491a79bd0e02d392f250b7da07bccc7cea2a/vae/img/i_xent.png


--------------------------------------------------------------------------------
/vae/img/x_mse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingKe/nn_playground/8254491a79bd0e02d392f250b7da07bccc7cea2a/vae/img/x_mse.png


--------------------------------------------------------------------------------
/vae/img/x_xent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingKe/nn_playground/8254491a79bd0e02d392f250b7da07bccc7cea2a/vae/img/x_xent.png


--------------------------------------------------------------------------------
/vae/img/z_mse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingKe/nn_playground/8254491a79bd0e02d392f250b7da07bccc7cea2a/vae/img/z_mse.png


--------------------------------------------------------------------------------
/vae/img/z_xent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingKe/nn_playground/8254491a79bd0e02d392f250b7da07bccc7cea2a/vae/img/z_xent.png


--------------------------------------------------------------------------------
/vae/variational_autoencoder.py:
--------------------------------------------------------------------------------
  1 | '''This script demonstrates how to build a variational autoencoder with Keras.
  2 | 
  3 | Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114
  4 | '''
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | from scipy.stats import norm
  8 | 
  9 | from keras.layers import Input, Dense, Lambda
 10 | from keras.models import Model
 11 | from keras.regularizers import l2
 12 | from keras import backend as K
 13 | from keras import objectives
 14 | from keras.datasets import mnist
 15 | 
 16 | np.random.seed(1111)  # for reproducibility
 17 | 
 18 | batch_size = 100 
 19 | n = 784
 20 | m = 2
 21 | hidden_dim = 256
 22 | epochs = 50
 23 | epsilon_std = 1.0
 24 | use_loss = 'xent' # 'mse' or 'xent'
 25 | 
 26 | decay = 1e-4 # weight decay, a.k. l2 regularization
 27 | use_bias = True
 28 | 
 29 | ## Encoder
 30 | x = Input(batch_shape=(batch_size, n))
 31 | h_encoded = Dense(hidden_dim, kernel_regularizer=l2(decay), bias_regularizer=l2(decay), use_bias=use_bias, activation='tanh')(x)
 32 | z_mean = Dense(m, kernel_regularizer=l2(decay), bias_regularizer=l2(decay), use_bias=use_bias)(h_encoded)
 33 | z_log_var = Dense(m, kernel_regularizer=l2(decay), bias_regularizer=l2(decay), use_bias=use_bias)(h_encoded)
 34 | 
 35 | 
 36 | ## Sampler
 37 | def sampling(args):
 38 |     z_mean, z_log_var = args
 39 |     epsilon = K.random_normal_variable(shape=(batch_size, m), mean=0.,
 40 |                                        scale=epsilon_std)
 41 |     return z_mean + K.exp(z_log_var / 2) * epsilon
 42 | 
 43 | z = Lambda(sampling, output_shape=(m,))([z_mean, z_log_var])
 44 | 
 45 | # we instantiate these layers separately so as to reuse them later
 46 | decoder_h = Dense(hidden_dim, kernel_regularizer=l2(decay), bias_regularizer=l2(decay), use_bias=use_bias, activation='tanh')
 47 | decoder_mean = Dense(n, kernel_regularizer=l2(decay), bias_regularizer=l2(decay), use_bias=use_bias, activation='sigmoid')
 48 | 
 49 | ## Decoder
 50 | h_decoded = decoder_h(z)
 51 | x_hat = decoder_mean(h_decoded)
 52 | 
 53 | 
 54 | ## loss
 55 | def vae_loss(x, x_hat):
 56 |     kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
 57 |     xent_loss = n * objectives.binary_crossentropy(x, x_hat)
 58 |     mse_loss = n * objectives.mse(x, x_hat) 
 59 |     if use_loss == 'xent':
 60 |         return xent_loss + kl_loss
 61 |     elif use_loss == 'mse':
 62 |         return mse_loss + kl_loss
 63 |     else:
 64 |         raise Expception, 'Nonknow loss!'
 65 | 
 66 | vae = Model(x, x_hat)
 67 | vae.compile(optimizer='rmsprop', loss=vae_loss)
 68 | 
 69 | # train the VAE on MNIST digits
 70 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
 71 | 
 72 | x_train = x_train.astype('float32') / 255.
 73 | x_test = x_test.astype('float32') / 255.
 74 | x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
 75 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
 76 | 
 77 | vae.fit(x_train, x_train,
 78 |         shuffle=True,
 79 |         epochs=epochs,
 80 |         batch_size=batch_size,
 81 |         validation_data=(x_test, x_test))
 82 | 
 83 | ##----------Visualization----------##
 84 | # build a model to project inputs on the latent space
 85 | encoder = Model(x, z_mean)
 86 | 
 87 | # display a 2D plot of the digit classes in the latent space
 88 | x_test_encoded = encoder.predict(x_test, batch_size=batch_size)
 89 | fig = plt.figure(figsize=(6, 6))
 90 | plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test)
 91 | plt.colorbar()
 92 | plt.show()
 93 | fig.savefig('z_{}.png'.format(use_loss))
 94 | 
 95 | # build a digit generator that can sample from the learned distribution
 96 | decoder_input = Input(shape=(m,))
 97 | _h_decoded = decoder_h(decoder_input)
 98 | _x_hat = decoder_mean(_h_decoded)
 99 | generator = Model(decoder_input, _x_hat)
100 | 
101 | # display a 2D manifold of the digits
102 | n = 15  # figure with 15x15 digits
103 | digit_size = 28
104 | figure = np.zeros((digit_size * n, digit_size * n))
105 | # linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian
106 | # to produce values of the latent variables z, since the prior of the latent space is Gaussian
107 | grid_x = norm.ppf(np.linspace(0.05, 0.95, n))
108 | grid_y = norm.ppf(np.linspace(0.05, 0.95, n))
109 | 
110 | for i, yi in enumerate(grid_x):
111 |     for j, xi in enumerate(grid_y):
112 |         z_sample = np.array([[xi, yi]])
113 |         x_decoded = generator.predict(z_sample)
114 |         digit = x_decoded[0].reshape(digit_size, digit_size)
115 |         figure[i * digit_size: (i + 1) * digit_size,
116 |                j * digit_size: (j + 1) * digit_size] = digit
117 | 
118 | fig = plt.figure(figsize=(10, 10))
119 | plt.imshow(figure, cmap='Greys_r')
120 | plt.show()
121 | fig.savefig('x_{}.png'.format(use_loss))
122 | 
123 | # data imputation
124 | figure = np.zeros((digit_size * 3, digit_size * n))
125 | x = x_test[:batch_size,:]
126 | x_corupted = np.copy(x)
127 | x_corupted[:, 300:400] = 0
128 | x_encoded = vae.predict(x_corupted, batch_size=batch_size).reshape((-1, digit_size, digit_size))
129 | x = x.reshape((-1, digit_size, digit_size))
130 | x_corupted = x_corupted.reshape((-1, digit_size, digit_size))
131 | for i in range(n):
132 |     xi = x[i]
133 |     xi_c = x_corupted[i]
134 |     xi_e = x_encoded[i]
135 |     figure[:digit_size, i * digit_size:(i+1)*digit_size] = xi
136 |     figure[digit_size:2 * digit_size, i * digit_size:(i+1)*digit_size] = xi_c
137 |     figure[2 * digit_size:, i * digit_size:(i+1)*digit_size] = xi_e
138 | 
139 | fig = plt.figure(figsize=(10, 10))
140 | plt.imshow(figure, cmap='Greys_r')
141 | plt.show()
142 | fig.savefig('i_{}.png'.format(use_loss))
143 | 


--------------------------------------------------------------------------------
/weightnorm/README.md:
--------------------------------------------------------------------------------
 1 | Weight Normalization
 2 | 
 3 | ## Run
 4 | 
 5 | ### MLP w/ WN on MNIST
 6 | python mnist_mlp.py
 7 | 
 8 | ### CNN w/ WN on MNIST
 9 | pyton minist_cnn.py
10 | 
11 | ### CNN w/ WN on  CIFAR10
12 | pyton cifar10_cnn.py
13 | 
14 | ### GRU text generator  w/ WN
15 | python gru_text_generation.py
16 | 
17 | ### GRU Language Model
18 | imdb_lm.py
19 | > See [this](../gcnn/README.md) how to prepare data.
20 | 
21 | ## References:
22 | Salimans and Kingma. [Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks](https://papers.nips.cc/paper/6114-weight-normalization-a-simple-reparameterization-to-accelerate-training-of-deep-neural-networks.pdf)
23 | 


--------------------------------------------------------------------------------
/weightnorm/cifar10_cnn.py:
--------------------------------------------------------------------------------
  1 | '''Train a simple deep CNN on the CIFAR10 small images dataset.
  2 | 
  3 | GPU run command with Theano backend (with TensorFlow, the GPU is automatically used):
  4 |     THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python cifar10_cnn.py
  5 | 
  6 | Conventional CNN:
  7 | It gets down to 0.65 test logloss in 25 epochs, and down to 0.55 after 50 epochs.
  8 | CNN with WN:
  9 | It gets down to 0.61 test logloss in 25 epochs, and down to 0.55 after 50 epochs.
 10 | '''
 11 | 
 12 | from __future__ import print_function
 13 | from keras.datasets import cifar10
 14 | from keras.preprocessing.image import ImageDataGenerator
 15 | from keras.models import Sequential
 16 | from keras.layers import Dense, Dropout, Activation, Flatten
 17 | from keras.layers import Conv2D, MaxPooling2D
 18 | from keras.utils import np_utils
 19 | 
 20 | from weight_norm_layers import *
 21 | 
 22 | K.set_image_data_format('channels_first')
 23 | 
 24 | batch_size = 32
 25 | nb_classes = 10
 26 | epochs = 50
 27 | data_augmentation = True
 28 | 
 29 | # input image dimensions
 30 | img_rows, img_cols = 32, 32
 31 | # The CIFAR10 images are RGB.
 32 | img_channels = 3
 33 | 
 34 | # build model
 35 | model = Sequential()
 36 | model.add(WeightNormConv2D(32, (3, 3), padding='same',
 37 |                            input_shape=(img_channels, img_rows, img_cols)))
 38 | model.add(Activation('relu'))
 39 | model.add(WeightNormConv2D(32, (3, 3)))
 40 | model.add(Activation('relu'))
 41 | model.add(MaxPooling2D(pool_size=(2, 2)))
 42 | model.add(Dropout(0.25))
 43 | 
 44 | model.add(WeightNormConv2D(64, (3, 3), padding='same'))
 45 | model.add(Activation('relu'))
 46 | model.add(WeightNormConv2D(64, (3, 3)))
 47 | model.add(Activation('relu'))
 48 | model.add(MaxPooling2D(pool_size=(2, 2)))
 49 | model.add(Dropout(0.25))
 50 | 
 51 | model.add(Flatten())
 52 | model.add(WeightNormDense(512))
 53 | model.add(Activation('relu'))
 54 | model.add(Dropout(0.5))
 55 | model.add(WeightNormDense(nb_classes))
 56 | model.add(Activation('softmax'))
 57 | 
 58 | # Let's train the model using RMSprop
 59 | model.compile(loss='categorical_crossentropy',
 60 |               optimizer='rmsprop',
 61 |               metrics=['accuracy'])
 62 | 
 63 | model.summary()
 64 | 
 65 | # The data, shuffled and split between train and test sets:
 66 | (X_train, y_train), (X_test, y_test) = cifar10.load_data()
 67 | print('X_train shape:', X_train.shape)
 68 | print(X_train.shape[0], 'train samples')
 69 | print(X_test.shape[0], 'test samples')
 70 | 
 71 | # Convert class vectors to binary class matrices.
 72 | Y_train = np_utils.to_categorical(y_train, nb_classes)
 73 | Y_test = np_utils.to_categorical(y_test, nb_classes)
 74 | 
 75 | X_train = X_train.astype('float32')
 76 | X_test = X_test.astype('float32')
 77 | X_train /= 255
 78 | X_test /= 255
 79 | 
 80 | if not data_augmentation:
 81 |     print('Not using data augmentation.')
 82 |     model.fit(X_train, Y_train,
 83 |               batch_size=batch_size,
 84 |               epochs=epochs,
 85 |               validation_data=(X_test, Y_test),
 86 |               shuffle=True)
 87 | else:
 88 |     print('Using real-time data augmentation.')
 89 |     # This will do preprocessing and realtime data augmentation:
 90 |     datagen = ImageDataGenerator(
 91 |         featurewise_center=False,  # set input mean to 0 over the dataset
 92 |         samplewise_center=False,  # set each sample mean to 0
 93 |         featurewise_std_normalization=False,  # divide inputs by std of the dataset
 94 |         samplewise_std_normalization=False,  # divide each input by its std
 95 |         zca_whitening=False,  # apply ZCA whitening
 96 |         rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
 97 |         width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
 98 |         height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
 99 |         horizontal_flip=True,  # randomly flip images
100 |         vertical_flip=False)  # randomly flip images
101 | 
102 |     # Compute quantities required for featurewise normalization
103 |     # (std, mean, and principal components if ZCA whitening is applied).
104 |     datagen.fit(X_train)
105 | 
106 |     # Fit the model on the batches generated by datagen.flow().
107 |     model.fit_generator(datagen.flow(X_train, Y_train,
108 |                         batch_size=batch_size),
109 |                         steps_per_epoch=X_train.shape[0]/batch_size,
110 |                         epochs=epochs,
111 |                         validation_data=(X_test, Y_test))
112 | 


--------------------------------------------------------------------------------
/weightnorm/gru_text_generation.py:
--------------------------------------------------------------------------------
 1 | '''Example script to generate text from Nietzsche's writings.
 2 | Modified from keras' example/lstm_text_generation.py
 3 | '''
 4 | 
 5 | from __future__ import print_function
 6 | from keras.models import Sequential
 7 | from keras.layers import Dense, Activation, Dropout
 8 | from keras.optimizers import RMSprop
 9 | from keras.utils.data_utils import get_file
10 | import numpy as np
11 | import random
12 | import sys
13 | 
14 | from weight_norm_layers import WeightNormGRU
15 | 
16 | path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
17 | text = open(path).read().lower()
18 | print('corpus length:', len(text))
19 | 
20 | chars = sorted(list(set(text)))
21 | print('total chars:', len(chars))
22 | char_indices = dict((c, i) for i, c in enumerate(chars))
23 | indices_char = dict((i, c) for i, c in enumerate(chars))
24 | 
25 | # cut the text in semi-redundant sequences of maxlen characters
26 | maxlen = 40
27 | step = 3
28 | sentences = []
29 | next_chars = []
30 | for i in range(0, len(text) - maxlen, step):
31 |     sentences.append(text[i: i + maxlen])
32 |     next_chars.append(text[i + maxlen])
33 | print('nb sequences:', len(sentences))
34 | 
35 | print('Vectorization...')
36 | X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
37 | y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
38 | for i, sentence in enumerate(sentences):
39 |     for t, char in enumerate(sentence):
40 |         X[i, t, char_indices[char]] = 1
41 |     y[i, char_indices[next_chars[i]]] = 1
42 | 
43 | 
44 | # build the model: a single GRU
45 | print('Build model...')
46 | model = Sequential()
47 | model.add(WeightNormGRU(128, input_shape=(maxlen, len(chars))))
48 | model.add(Dense(len(chars)))
49 | model.add(Activation('softmax'))
50 | 
51 | optimizer = RMSprop(lr=0.01)
52 | model.compile(loss='categorical_crossentropy', optimizer=optimizer)
53 | 
54 | 
55 | def sample(preds, temperature=1.0):
56 |     # helper function to sample an index from a probability array
57 |     preds = np.asarray(preds).astype('float64')
58 |     preds = np.log(preds) / temperature
59 |     exp_preds = np.exp(preds)
60 |     preds = exp_preds / np.sum(exp_preds)
61 |     probas = np.random.multinomial(1, preds, 1)
62 |     return np.argmax(probas)
63 | 
64 | # train the model, output generated text after each iteration
65 | for iteration in range(1, 60):
66 |     print()
67 |     print('-' * 50)
68 |     print('Iteration', iteration)
69 |     model.fit(X, y, batch_size=128, epochs=1)
70 | 
71 |     start_index = random.randint(0, len(text) - maxlen - 1)
72 | 
73 |     for diversity in [0.2, 0.5, 1.0, 1.2]:
74 |         print()
75 |         print('----- diversity:', diversity)
76 | 
77 |         generated = ''
78 |         sentence = text[start_index: start_index + maxlen]
79 |         generated += sentence
80 |         print('----- Generating with seed: "' + sentence + '"')
81 |         sys.stdout.write(generated)
82 | 
83 |         for i in range(400):
84 |             x = np.zeros((1, maxlen, len(chars)))
85 |             for t, char in enumerate(sentence):
86 |                 x[0, t, char_indices[char]] = 1.
87 | 
88 |             preds = model.predict(x, verbose=0)[0]
89 |             next_index = sample(preds, diversity)
90 |             next_char = indices_char[next_index]
91 | 
92 |             generated += next_char
93 |             sentence = sentence[1:] + next_char
94 | 
95 |             sys.stdout.write(next_char)
96 |             sys.stdout.flush()
97 |         print()
98 | 


--------------------------------------------------------------------------------
/weightnorm/imdb_generator.py:
--------------------------------------------------------------------------------
1 | ../gcnn/imdb_generator.py


--------------------------------------------------------------------------------
/weightnorm/imdb_lm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | '''Simple RNN for Language Model
 3 | '''
 4 | from __future__ import print_function
 5 | import os
 6 | 
 7 | from keras.models import Model
 8 | from keras.layers import Input, Embedding, Dense, TimeDistributed
 9 | from keras.optimizers import *
10 | 
11 | from weight_norm_layers import *
12 | from imdb_generator import IMDBLM
13 | 
14 | 
15 | def LM(batch_size, vocsize=20000, embed_dim=20, hidden_dim=30, nb_layers=1):
16 |     x = Input(batch_shape=(batch_size, None))
17 |     # mebedding
18 |     y = Embedding(vocsize+2, embed_dim, mask_zero=False)(x)
19 |     for i in range(nb_layers-1):
20 |         y = WeightNormGRU(hidden_dim, return_sequences=True, name='wngru{}'.format(i + 1))(y)
21 |     y = WeightNormGRU(hidden_dim, return_sequences=True, name='wngru{}'.format(nb_layers))(y)
22 |     y = TimeDistributed(Dense(vocsize+2, activation='softmax', name='dense{}'.format(nb_layers)))(y)
23 | 
24 |     model = Model(input=x, output=y)
25 | 
26 |     return model
27 | 
28 | 
29 | def train_model():
30 |     batch_size = 32 
31 |     nb_epoch = 100 
32 | 
33 |     vocsize = 2000 # top 2k
34 |     max_len = 30 
35 |     train_ratio = 0.99
36 | 
37 |     # Build model
38 |     model = LM(batch_size, vocsize=vocsize, nb_layers=3)
39 |     model.compile(optimizer='adam',
40 |                   loss='sparse_categorical_crossentropy')
41 | 
42 |     # Prepare data
43 |     path = './data/imdb-full.pkl'
44 |     # Train
45 |     train_gen = IMDBLM(path=path, max_len=max_len, vocab_size=vocsize, shuffle=True,
46 |                      which_set='train', train_ratio=train_ratio, batch_size=batch_size)
47 |     # Validation 
48 |     val_gen = IMDBLM(path=path, max_len=max_len, vocab_size=vocsize,
49 |                    which_set='validation', train_ratio=train_ratio, batch_size=batch_size)
50 | 
51 |     train_samples = 20000
52 |     val_samples = 2000
53 | 
54 |     # Start training
55 |     model.summary()
56 |     model.fit_generator(train_gen(), samples_per_epoch=train_samples, 
57 |                         validation_data=val_gen(), nb_val_samples=val_samples,
58 |                         nb_epoch=nb_epoch, verbose=1)
59 | 
60 | 
61 | def run_demo():
62 |     train_model()
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     run_demo()
67 | 


--------------------------------------------------------------------------------
/weightnorm/mnist_cnn.py:
--------------------------------------------------------------------------------
  1 | '''Trains a simple CNN with weight normalization on the MNIST dataset.
  2 | Modified from keras' examples/mnist_mlp.py
  3 | w/o WN:
  4 | Gets to 99.25% test accuracy after 12 epochs
  5 | w/ WN:
  6 | Gets to 99.45% test accuracy after 10 epochs using tensorflow backend
  7 | '''
  8 | 
  9 | from __future__ import print_function
 10 | import numpy as np
 11 | np.random.seed(1337)  # for reproducibility
 12 | 
 13 | from keras.datasets import mnist
 14 | from keras.models import Sequential
 15 | from keras.layers import Dense, Dropout, Activation, BatchNormalization, MaxPooling2D
 16 | from keras.layers import Flatten
 17 | from keras.optimizers import SGD, Adam, RMSprop
 18 | from keras.callbacks import LearningRateScheduler
 19 | from keras.utils import np_utils
 20 | import keras.backend as K
 21 | K.set_image_data_format('channels_first')
 22 | 
 23 | from weight_norm_layers import *
 24 | 
 25 | 
 26 | # nn
 27 | batch_size = 50
 28 | epochs = 20 
 29 | nb_channel = 1
 30 | img_rows = 28 
 31 | img_cols = 28 
 32 | nb_classes = 10
 33 | use_bias = False
 34 | 
 35 | # learning rate schedule
 36 | lr_start = 1e-3
 37 | lr_end = 1e-4
 38 | lr_decay = (lr_end / lr_start)**(1. / epochs)
 39 | 
 40 | # BN
 41 | epsilon = 1e-6
 42 | momentum = 0.9
 43 | 
 44 | # dropout
 45 | p1 = 0.25
 46 | p2 = 0.5
 47 | 
 48 | # the data, shuffled and split between train and test sets
 49 | (X_train, y_train), (X_test, y_test) = mnist.load_data()
 50 | 
 51 | X_train = X_train.reshape(60000, 1, 28, 28)
 52 | X_test = X_test.reshape(10000, 1, 28, 28)
 53 | X_train = X_train.astype('float32')
 54 | X_test = X_test.astype('float32')
 55 | X_train /= 255
 56 | X_test /= 255
 57 | print(X_train.shape[0], 'train samples')
 58 | print(X_test.shape[0], 'test samples')
 59 | 
 60 | # convert class vectors to binary class matrices
 61 | Y_train = np_utils.to_categorical(y_train, nb_classes) 
 62 | Y_test = np_utils.to_categorical(y_test, nb_classes)
 63 | 
 64 | 
 65 | model = Sequential()
 66 | # conv1
 67 | model.add(WeightNormConv2D(128, (3, 3), input_shape=(nb_channel, img_rows, img_cols),
 68 |                            padding='same', use_bias=use_bias, name='conv1'))
 69 | #model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn1'))
 70 | model.add(Activation('relu', name='act1'))
 71 | # conv2
 72 | model.add(WeightNormConv2D(128, (3, 3),
 73 |                            padding='same', use_bias=use_bias, name='conv2'))
 74 | model.add(MaxPooling2D(pool_size=(2, 2), name='pool2'))
 75 | #model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn2'))
 76 | model.add(Activation('relu', name='act2'))
 77 | # conv3
 78 | model.add(WeightNormConv2D(256, (3, 3),
 79 |                            padding='same', use_bias=use_bias, name='conv3'))
 80 | #model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn3'))
 81 | model.add(Activation('relu', name='act3'))
 82 | # conv4
 83 | model.add(WeightNormConv2D(256, (3, 3),
 84 |                            padding='same', use_bias=use_bias, name='conv4'))
 85 | model.add(MaxPooling2D(pool_size=(2, 2), name='pool4'))
 86 | #model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn4'))
 87 | model.add(Activation('relu', name='act4'))
 88 | model.add(Flatten())
 89 | # dense1
 90 | model.add(WeightNormDense(1024, use_bias=use_bias, name='dense5'))
 91 | #model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn5'))
 92 | model.add(Activation('relu', name='act5'))
 93 | # dense2
 94 | model.add(WeightNormDense(nb_classes, use_bias=use_bias, name='dense6'))
 95 | #model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn6'))
 96 | model.add(Activation('softmax', name='act6'))
 97 | 
 98 | opt = Adam(lr=lr_start) 
 99 | model.compile(loss='squared_hinge', optimizer=opt, metrics=['acc'])
100 | model.summary()
101 | 
102 | lr_scheduler = LearningRateScheduler(lambda e: lr_start * lr_decay ** e)
103 | history = model.fit(X_train, Y_train,
104 |                     batch_size=batch_size, epochs=epochs,
105 |                     verbose=1, validation_data=(X_test, Y_test),
106 |                     callbacks=[lr_scheduler])
107 | score = model.evaluate(X_test, Y_test, verbose=0)
108 | print('Test score:', score[0])
109 | print('Test accuracy:', score[1])
110 | 


--------------------------------------------------------------------------------
/weightnorm/mnist_mlp.py:
--------------------------------------------------------------------------------
 1 | '''Trains a simple fully connected NN with Weight Normalzition on the MNIST dataset.
 2 | Modified from keras' examples/mnist_mlp.py
 3 | Gets to 98.4% test accuracy after 20 epochs using tensorflow backend
 4 | '''
 5 | 
 6 | from __future__ import print_function
 7 | import numpy as np
 8 | np.random.seed(1337)  # for reproducibility
 9 | 
10 | from keras.datasets import mnist
11 | from keras.models import Sequential
12 | from keras.layers.core import Dense, Dropout, Activation
13 | from keras.optimizers import SGD, Adam, RMSprop
14 | from keras.utils import np_utils
15 | 
16 | from weight_norm_layers import *
17 | 
18 | batch_size = 128
19 | nb_classes = 10
20 | epochs = 20
21 | 
22 | # the data, shuffled and split between train and test sets
23 | (X_train, y_train), (X_test, y_test) = mnist.load_data()
24 | 
25 | X_train = X_train.reshape(60000, 784)
26 | X_test = X_test.reshape(10000, 784)
27 | X_train = X_train.astype('float32')
28 | X_test = X_test.astype('float32')
29 | X_train /= 255
30 | X_test /= 255
31 | print(X_train.shape[0], 'train samples')
32 | print(X_test.shape[0], 'test samples')
33 | 
34 | # convert class vectors to binary class matrices
35 | Y_train = np_utils.to_categorical(y_train, nb_classes)
36 | Y_test = np_utils.to_categorical(y_test, nb_classes)
37 | 
38 | model = Sequential()
39 | model.add(WeightNormDense(512, input_shape=(784,)))
40 | model.add(Activation('relu'))
41 | model.add(Dropout(0.2))
42 | model.add(WeightNormDense(512))
43 | model.add(Activation('relu'))
44 | model.add(Dropout(0.2))
45 | model.add(WeightNormDense(10))
46 | model.add(Activation('softmax'))
47 | 
48 | model.summary()
49 | 
50 | model.compile(loss='categorical_crossentropy',
51 |               optimizer=RMSprop(),
52 |               metrics=['accuracy'])
53 | 
54 | history = model.fit(X_train, Y_train,
55 |                     batch_size=batch_size, epochs=epochs,
56 |                     verbose=1, validation_data=(X_test, Y_test))
57 | score = model.evaluate(X_test, Y_test, verbose=0)
58 | print('Test score:', score[0])
59 | print('Test accuracy:', score[1])
60 | 


--------------------------------------------------------------------------------
/weightnorm/weight_norm_layers.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | 
  4 | from keras import backend as K
  5 | from keras.engine import InputSpec
  6 | from keras.layers import Dense, Conv2D, GRU
  7 | 
  8 | 
  9 | class WeightNormDense(Dense):
 10 |     def build(self, input_shape):
 11 |         assert len(input_shape) >= 2
 12 |         input_dim = input_shape[-1]
 13 | 
 14 |         self.kernel = self.add_weight(shape=(input_dim, self.units),
 15 |                                       initializer=self.kernel_initializer,
 16 |                                       name='kernel',
 17 |                                       regularizer=self.kernel_regularizer,
 18 |                                       constraint=self.kernel_constraint)
 19 |         self.g = self.add_weight(shape=(self.units,),
 20 |                                  initializer='one',
 21 |                                  name='g')
 22 |         if self.use_bias:
 23 |             self.bias = self.add_weight(shape=(self.units,),
 24 |                                         initializer=self.bias_initializer,
 25 |                                         name='bias',
 26 |                                         regularizer=self.bias_regularizer,
 27 |                                         constraint=self.bias_constraint)
 28 |         else:
 29 |             self.bias = None
 30 |         self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
 31 |         self.built = True
 32 | 
 33 |     def call(self, inputs):
 34 |         kernel = self.kernel * self.g / K.sqrt(K.sum(K.square(self.kernel), axis=0))
 35 |         output = K.dot(inputs, kernel)
 36 |         if self.use_bias:
 37 |             output = K.bias_add(output, self.bias)
 38 |         if self.activation is not None:
 39 |             output = self.activation(output)
 40 |         return output
 41 | 
 42 | 
 43 | class WeightNormConv2D(Conv2D):
 44 |     def build(self, input_shape):
 45 |         if self.data_format == 'channels_first':
 46 |             channel_axis = 1
 47 |         else:
 48 |             channel_axis = -1
 49 |         if input_shape[channel_axis] is None:
 50 |             raise ValueError('The channel dimension of the inputs '
 51 |                              'should be defined. Found `None`.')
 52 |         input_dim = input_shape[channel_axis]
 53 |         kernel_shape = self.kernel_size + (input_dim, self.filters)
 54 | 
 55 |         self.kernel = self.add_weight(shape=kernel_shape,
 56 |                                       initializer=self.kernel_initializer,
 57 |                                       name='kernel',
 58 |                                       regularizer=self.kernel_regularizer,
 59 |                                       constraint=self.kernel_constraint)
 60 |         self.g = self.add_weight(shape=(1, 1, 1, self.filters),
 61 |                                  initializer='one',
 62 |                                  name='g')
 63 |         if self.use_bias:
 64 |             self.bias = self.add_weight(shape=(self.filters,),
 65 |                                         initializer=self.bias_initializer,
 66 |                                         name='bias',
 67 |                                         regularizer=self.bias_regularizer,
 68 |                                         constraint=self.bias_constraint)
 69 |         else:
 70 |             self.bias = None
 71 |         # Set input spec.
 72 |         self.input_spec = InputSpec(ndim=self.rank + 2,
 73 |                                     axes={channel_axis: input_dim})
 74 | 
 75 |         self.built = True
 76 | 
 77 |     def call(self, x):
 78 |         kernel = self.kernel * self.g / K.sqrt(K.sum(K.square(self.kernel), axis=[0, 1, 2], keepdims=True))
 79 |         output = K.conv2d(x, kernel, strides=self.strides,
 80 |                           padding=self.padding,
 81 |                           data_format=self.data_format)
 82 |         if self.use_bias:
 83 |             output = K.bias_add(output, self.bias, data_format=self.data_format)
 84 |         if self.activation is not None:
 85 |             output = self.activation(output)
 86 |         return output
 87 | 
 88 | 
 89 | class WeightNormGRU(GRU):
 90 |     def build(self, input_shape):
 91 |         if isinstance(input_shape, list):
 92 |             input_shape = input_shape[0]
 93 | 
 94 |         batch_size = input_shape[0] if self.stateful else None
 95 |         self.input_dim = input_shape[2]
 96 |         self.input_spec = InputSpec(shape=(batch_size, None, self.input_dim))
 97 |         self.state_spec = InputSpec(shape=(batch_size, self.units))
 98 | 
 99 |         self.states = [None]
100 |         if self.stateful:
101 |             self.reset_states()
102 | 
103 |         self.kernel = self.add_weight(shape=(self.input_dim, self.units * 3),
104 |                                       name='kernel',
105 |                                       initializer=self.kernel_initializer,
106 |                                       regularizer=self.kernel_regularizer,
107 |                                       constraint=self.kernel_constraint)
108 |         self.recurrent_kernel = self.add_weight(
109 |             shape=(self.units, self.units * 3),
110 |             name='recurrent_kernel',
111 |             initializer=self.recurrent_initializer,
112 |             regularizer=self.recurrent_regularizer,
113 |             constraint=self.recurrent_constraint)
114 | 
115 |         if self.use_bias:
116 |             self.bias = self.add_weight(shape=(self.units * 3,),
117 |                                         name='bias',
118 |                                         initializer=self.bias_initializer,
119 |                                         regularizer=self.bias_regularizer,
120 |                                         constraint=self.bias_constraint)
121 |         else:
122 |             self.bias = None
123 | 
124 |         self.kernel_z = self.kernel[:, :self.units]
125 |         self.recurrent_kernel_z = self.recurrent_kernel[:, :self.units]
126 |         self.kernel_r = self.kernel[:, self.units: self.units * 2]
127 |         self.recurrent_kernel_r = self.recurrent_kernel[:,
128 |                                                         self.units:
129 |                                                         self.units * 2]
130 |         self.kernel_h = self.kernel[:, self.units * 2:]
131 |         self.recurrent_kernel_h = self.recurrent_kernel[:, self.units * 2:]
132 | 
133 |         if self.use_bias:
134 |             self.bias_z = self.bias[:self.units]
135 |             self.bias_r = self.bias[self.units: self.units * 2]
136 |             self.bias_h = self.bias[self.units * 2:]
137 |         else:
138 |             self.bias_z = None
139 |             self.bias_r = None
140 |             self.bias_h = None
141 | 
142 |         self.g_kernel_z = self.add_weight((self.units,),
143 |                                    initializer='one',
144 |                                    name='g_kernel_z')
145 |         self.g_kernel_r = self.add_weight((self.units,),
146 |                                    initializer='one',
147 |                                    name='g_kernel_r')
148 |         self.g_kernel_h = self.add_weight((self.units,),
149 |                                    initializer='one',
150 |                                    name='g_kernel_h')
151 |         self.g_recurrent_kernel_z = self.add_weight((self.units,),
152 |                                    initializer='one',
153 |                                    name='g_recurrent_kernel_z')
154 |         self.g_recurrent_kernel_r = self.add_weight((self.units,),
155 |                                    initializer='one',
156 |                                    name='g_recurrent_kernel_r')
157 |         self.g_recurrent_kernel_h = self.add_weight((self.units,),
158 |                                    initializer='one',
159 |                                    name='g_recurrrent_kernel_h')
160 |         self.built = True
161 | 
162 |     def preprocess_input(self, x, training=None):
163 |         return x
164 | 
165 |     def step(self, x, states):
166 |         h_tm1 = states[0]  # previous memory
167 |         B_U = states[1]  # dropout matrices for recurrent units
168 |         B_W = states[2]
169 | 
170 |         kernel_z = self.kernel_z * self.g_kernel_z / K.sqrt(K.sum(K.square(self.g_kernel_z), axis=0))
171 |         kernel_r = self.kernel_r * self.g_kernel_r / K.sqrt(K.sum(K.square(self.g_kernel_r), axis=0))
172 |         kernel_h = self.kernel_h * self.g_kernel_h / K.sqrt(K.sum(K.square(self.g_kernel_h), axis=0))
173 |         recurrent_kernel_z = self.recurrent_kernel_z * self.g_recurrent_kernel_z / K.sqrt(K.sum(K.square(self.g_recurrent_kernel_z), axis=0))
174 |         recurrent_kernel_r = self.recurrent_kernel_r * self.g_recurrent_kernel_r / K.sqrt(K.sum(K.square(self.g_recurrent_kernel_r), axis=0))
175 |         recurrent_kernel_h = self.recurrent_kernel_h * self.g_recurrent_kernel_h / K.sqrt(K.sum(K.square(self.g_recurrent_kernel_h), axis=0))
176 | 
177 |         x_z = K.dot(x * B_W[0], kernel_z)
178 |         x_r = K.dot(x * B_W[1], kernel_r)
179 |         x_h = K.dot(x * B_W[2], kernel_h)
180 |         if self.use_bias:
181 |             x_z += self.bias_z
182 |             x_r += self.bias_r
183 |             x_h += self.bias_h
184 | 
185 |         z = self.recurrent_activation(x_z + K.dot(h_tm1 * B_U[0], recurrent_kernel_z))
186 |         r = self.recurrent_activation(x_r + K.dot(h_tm1 * B_U[1], recurrent_kernel_r))
187 |         hh = self.activation(x_h + K.dot(r * h_tm1 * B_U[2], recurrent_kernel_h))
188 |         h = z * h_tm1 + (1 - z) * hh
189 | 
190 |         return h, [h]
191 | 
192 | 
193 | # Aliases
194 | 
195 | WeightNormConvolution2D = WeightNormConv2D
196 | 


--------------------------------------------------------------------------------
/wgan/README.md:
--------------------------------------------------------------------------------
1 | Toy keras implementation of Wasserstein GAN (WGAN) on MNIST
2 | 
3 | ## Reference
4 | * Arjovsky et al. [Wasserstein GAN](https://www.arxiv.org/abs/1701.07875)
5 | 


--------------------------------------------------------------------------------
/wgan/wgan_cnn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Train a Wasserstein  Generative Adversarial Network (WGAN) on the MNIST
  5 | """
  6 | from __future__ import print_function
  7 | from PIL import Image
  8 | from six.moves import range
  9 | 
 10 | import keras.backend as K
 11 | K.set_image_data_format('channels_first')
 12 | 
 13 | from keras.datasets import mnist
 14 | from keras.layers import Input, Dense, Reshape, Flatten, Dropout, Activation, BatchNormalization
 15 | from keras.layers import Conv2D, UpSampling2D, LeakyReLU
 16 | from keras.models import Sequential, Model
 17 | from keras.optimizers import RMSprop, Adam
 18 | from keras.utils.generic_utils import Progbar
 19 | 
 20 | import numpy as np
 21 | np.random.seed(1337)
 22 | 
 23 | 
 24 | def clip_weights(model, lower, upper):
 25 |     for l in model.layers:
 26 |         weights = l.get_weights()
 27 |         weights = [np.clip(w, lower, upper) for w in weights]
 28 |         l.set_weights(weights)
 29 | 
 30 | 
 31 | def wasserstein(y_true, y_pred):
 32 |     return K.mean(y_true * y_pred)
 33 | 
 34 | 
 35 | def build_generator(latent_size):
 36 |     cnn = Sequential()
 37 |     cnn.add(Dense(1024, input_dim=latent_size, activation='relu'))
 38 |     cnn.add(Dense(128 * 7 * 7, activation='relu'))
 39 |     cnn.add(Reshape((128, 7, 7)))
 40 | 
 41 |     # upsample to (..., 14, 14)
 42 |     cnn.add(UpSampling2D(size=(2, 2)))
 43 |     cnn.add(Conv2D(256, 5, padding='same',
 44 |                    activation='relu', kernel_initializer='glorot_normal'))
 45 | 
 46 |     # upsample to (..., 28, 28)
 47 |     cnn.add(UpSampling2D(size=(2, 2)))
 48 |     cnn.add(Conv2D(128, 5, padding='same',
 49 |                    activation='relu', kernel_initializer='glorot_normal'))
 50 | 
 51 |     # take a channel axis reduction
 52 |     cnn.add(Conv2D(1, 2, padding='same',
 53 |                    activation='tanh', kernel_initializer='glorot_normal'))
 54 | 
 55 |     # this is the z space commonly refered to in GAN papers
 56 |     latent = Input(shape=(latent_size,))
 57 | 
 58 |     fake_image = cnn(latent)
 59 | 
 60 |     return Model(inputs=latent, outputs=fake_image)
 61 | 
 62 | 
 63 | def build_critic(c=0.01):
 64 |     # build a relatively standard conv net with LeakyReLUs
 65 |     cnn = Sequential()
 66 | 
 67 |     cnn.add(Conv2D(32, 3, padding='same', strides=(2, 2),
 68 |                    input_shape=(1, 28, 28)))
 69 |     cnn.add(LeakyReLU())
 70 |     cnn.add(Dropout(0.3))
 71 | 
 72 |     cnn.add(Conv2D(64, 3, padding='same', strides=(1, 1)))
 73 |     cnn.add(LeakyReLU())
 74 |     cnn.add(Dropout(0.3))
 75 | 
 76 |     cnn.add(Conv2D(128, 3, padding='same', strides=(2, 2)))
 77 |     cnn.add(LeakyReLU())
 78 |     cnn.add(Dropout(0.3))
 79 | 
 80 |     cnn.add(Conv2D(256, 3, padding='same', strides=(1, 1)))
 81 |     cnn.add(LeakyReLU())
 82 |     cnn.add(Dropout(0.3))
 83 | 
 84 |     cnn.add(Flatten())
 85 | 
 86 |     image = Input(shape=(1, 28, 28))
 87 | 
 88 |     features = cnn(image)
 89 |     fake = Dense(1, activation='linear', name='critic')(features)
 90 | 
 91 |     return Model(inputs=image, outputs=fake)
 92 | 
 93 | 
 94 | if __name__ == '__main__':
 95 | 
 96 |     epochs = 5000
 97 |     batch_size = 50
 98 |     latent_size = 20
 99 | 
100 |     lr = 0.0001
101 |     c = 0.01
102 | 
103 |     # build the critic
104 |     critic = build_critic()
105 |     critic.compile(
106 |         optimizer=RMSprop(lr=lr),
107 |         loss=wasserstein
108 |     )
109 | 
110 |     # build the generator
111 |     generator = build_generator(latent_size)
112 | 
113 |     latent = Input(shape=(latent_size, ))
114 |     # get a fake image
115 |     fake = generator(latent)
116 |     # we only want to be able to train generation for the combined model
117 |     critic.trainable = False
118 |     fake = critic(fake)
119 |     combined = Model(inputs=latent, outputs=fake)
120 |     combined.compile(
121 |         optimizer=Adam(lr=lr),
122 |         loss=wasserstein
123 |     )
124 | 
125 |     # get our mnist data, and force it to be of shape (..., 1, 28, 28) with
126 |     # range [-1, 1]
127 |     (X_train, y_train), (X_test, y_test) = mnist.load_data()
128 |     X_train = (X_train.astype(np.float32) - 127.5) / 127.5
129 |     X_train = np.expand_dims(X_train, axis=1)
130 | 
131 |     X_test = (X_test.astype(np.float32) - 127.5) / 127.5
132 |     X_test = np.expand_dims(X_test, axis=1)
133 | 
134 |     nb_train, nb_test = X_train.shape[0], X_test.shape[0]
135 | 
136 |     for epoch in range(epochs):
137 |         print('Epoch {} of {}'.format(epoch + 1, epochs))
138 | 
139 |         nb_batches = int(X_train.shape[0] / batch_size)
140 |         progress_bar = Progbar(target=nb_batches)
141 | 
142 |         epoch_critic_loss = []
143 |         epoch_gen_loss = []
144 | 
145 |         index = 0
146 |         while index < nb_batches:
147 |             ## critic
148 |             if epoch < 5 or epoch % 100 == 0:
149 |                 Diters = 100
150 |             else:
151 |                 Diters = 5
152 |             iter = 0
153 |             critic_loss = []
154 |             while index < nb_batches and iter < Diters:
155 |                 progress_bar.update(index)
156 |                 index += 1
157 |                 iter += 1
158 | 
159 |                 # generate a new batch of noise
160 |                 noise = np.random.uniform(-1, 1, (batch_size, latent_size))
161 |                 # generate a batch of fake images
162 |                 generated_images = generator.predict(noise, verbose=0)
163 | 
164 |                 # get a batch of real images
165 |                 image_batch = X_train[index * batch_size:(index + 1) * batch_size]
166 |                 label_batch = y_train[index * batch_size:(index + 1) * batch_size]
167 | 
168 |                 X = np.concatenate((image_batch, generated_images))
169 |                 y = np.array([-1] * len(image_batch) + [1] * batch_size)
170 | 
171 |                 critic_loss.append(-critic.train_on_batch(X, y))
172 | 
173 |                 clip_weights(critic, -c, c)
174 | 
175 |             epoch_critic_loss.append(sum(critic_loss)/len(critic_loss))
176 | 
177 |             ## generator
178 |             # make new noise. we generate 2 * batch size here such that we have
179 |             # the generator optimize over an identical number of images as the
180 |             # critic 
181 |             noise = np.random.uniform(-1, 1, (batch_size, latent_size))
182 |             target = -np.ones(batch_size)
183 |             epoch_gen_loss.append(-combined.train_on_batch(noise, target))
184 | 
185 |         print('\n[Loss_C: {:.3f}, Loss_G: {:.3f}]'.format(np.mean(epoch_critic_loss), np.mean(epoch_gen_loss)))
186 | 
187 |         # save weights every epoch
188 |         generator.save_weights(
189 |             'cnn_generator_epoch_{0:03d}.hdf5'.format(epoch), True)
190 |         critic.save_weights(
191 |             'cnn_critic_epoch_{0:03d}.hdf5'.format(epoch), True)
192 | 
193 |         # generate some digits to display
194 |         noise = np.random.uniform(-1, 1, (100, latent_size))
195 |         # get a batch to display
196 |         generated_images = generator.predict(noise, verbose=0)
197 | 
198 |         # arrange them into a grid
199 |         img = (np.concatenate([r.reshape(-1, 28)
200 |                                for r in np.split(generated_images, 10)
201 |                                ], axis=-1) * 127.5 + 127.5).astype(np.uint8)
202 | 
203 |         Image.fromarray(img).save(
204 |             'cnn_epoch_{0:03d}_generated.png'.format(epoch))
205 | 


--------------------------------------------------------------------------------
/wgan/wgan_mlp.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Train a Wasserstein  Generative Adversarial Network (WGAN) on the MNIST
  5 | """
  6 | from __future__ import print_function
  7 | from PIL import Image
  8 | from six.moves import range
  9 | 
 10 | import keras.backend as K
 11 | 
 12 | from keras.datasets import mnist
 13 | from keras.layers import Input, Dense, Reshape, Flatten, Dropout, Activation, BatchNormalization
 14 | from keras.models import Sequential, Model
 15 | from keras.optimizers import RMSprop, Adam
 16 | from keras.utils.generic_utils import Progbar
 17 | 
 18 | import numpy as np
 19 | np.random.seed(1337)
 20 | 
 21 | 
 22 | def clip_weights(model, lower, upper):
 23 |     for l in model.layers:
 24 |         weights = l.get_weights()
 25 |         weights = [np.clip(w, lower, upper) for w in weights]
 26 |         l.set_weights(weights)
 27 | 
 28 | 
 29 | def wasserstein(y_true, y_pred):
 30 |     return K.mean(y_true * y_pred)
 31 | 
 32 | 
 33 | def build_generator(latent_size):
 34 |     model = Sequential()
 35 |     model.add(Dense(1024, input_dim=latent_size, activation='relu'))
 36 |     model.add(Dense(28 * 28, activation='tanh'))
 37 |     model.add(Reshape((1, 28, 28)))
 38 | 
 39 |     return model 
 40 | 
 41 | 
 42 | def build_critic(c=0.01):
 43 |     f = Sequential()
 44 |     f.add(Flatten(input_shape=(1, 28, 28)))
 45 |     f.add(Dense(256))
 46 |     f.add(Activation('relu'))
 47 |     f.add(Dense(128))
 48 |     f.add(Activation('relu'))
 49 |     f.add(Dense(1, activation='linear'))
 50 | 
 51 |     image = Input(shape=(1, 28, 28))
 52 |     score = f(image)
 53 | 
 54 |     model = Model(image, score)
 55 | 
 56 |     return model
 57 | 
 58 | 
 59 | if __name__ == '__main__':
 60 | 
 61 |     epochs = 5000
 62 |     batch_size = 50
 63 |     latent_size = 20
 64 | 
 65 |     lr = 0.0001
 66 |     c = 0.01
 67 | 
 68 |     # build the critic
 69 |     critic = build_critic()
 70 |     critic.compile(
 71 |         optimizer=RMSprop(lr=lr),
 72 |         loss=wasserstein
 73 |     )
 74 | 
 75 |     # build the generator
 76 |     generator = build_generator(latent_size)
 77 | 
 78 |     latent = Input(shape=(latent_size, ))
 79 |     # get a fake image
 80 |     fake = generator(latent)
 81 |     # we only want to be able to train generation for the combined model
 82 |     critic.trainable = False
 83 |     fake = critic(fake)
 84 |     combined = Model(inputs=latent, outputs=fake)
 85 |     combined.compile(
 86 |         optimizer=Adam(lr=lr),
 87 |         loss=wasserstein
 88 |     )
 89 | 
 90 |     # get our mnist data, and force it to be of shape (..., 1, 28, 28) with
 91 |     # range [-1, 1]
 92 |     (X_train, y_train), (X_test, y_test) = mnist.load_data()
 93 |     X_train = (X_train.astype(np.float32) - 127.5) / 127.5
 94 |     X_train = np.expand_dims(X_train, axis=1)
 95 | 
 96 |     X_test = (X_test.astype(np.float32) - 127.5) / 127.5
 97 |     X_test = np.expand_dims(X_test, axis=1)
 98 | 
 99 |     nb_train, nb_test = X_train.shape[0], X_test.shape[0]
100 | 
101 |     for epoch in range(epochs):
102 |         print('Epoch {} of {}'.format(epoch + 1, epochs))
103 | 
104 |         nb_batches = int(X_train.shape[0] / batch_size)
105 |         progress_bar = Progbar(target=nb_batches)
106 | 
107 |         epoch_critic_loss = []
108 |         epoch_gen_loss = []
109 | 
110 |         index = 0
111 |         while index < nb_batches:
112 |             ## critic
113 |             if epoch < 5 or epoch % 100 == 0:
114 |                 Diters = 100
115 |             else:
116 |                 Diters = 5
117 |             iter = 0
118 |             critic_loss = []
119 |             while index < nb_batches and iter < Diters:
120 |                 progress_bar.update(index)
121 |                 index += 1
122 |                 iter += 1
123 | 
124 |                 # generate a new batch of noise
125 |                 noise = np.random.uniform(-1, 1, (batch_size, latent_size))
126 |                 # generate a batch of fake images
127 |                 generated_images = generator.predict(noise, verbose=0)
128 | 
129 |                 # get a batch of real images
130 |                 image_batch = X_train[index * batch_size:(index + 1) * batch_size]
131 |                 label_batch = y_train[index * batch_size:(index + 1) * batch_size]
132 | 
133 |                 X = np.concatenate((image_batch, generated_images))
134 |                 y = np.array([-1] * len(image_batch) + [1] * batch_size)
135 | 
136 |                 critic_loss.append(-critic.train_on_batch(X, y))
137 | 
138 |                 clip_weights(critic, -c, c)
139 | 
140 |             epoch_critic_loss.append(sum(critic_loss)/len(critic_loss))
141 | 
142 |             ## generator
143 |             # make new noise. we generate 2 * batch size here such that we have
144 |             # the generator optimize over an identical number of images as the
145 |             # critic 
146 |             noise = np.random.uniform(-1, 1, (batch_size, latent_size))
147 |             target = -np.ones(batch_size)
148 |             epoch_gen_loss.append(-combined.train_on_batch(noise, target))
149 | 
150 |         print('\n[Loss_C: {:.3f}, Loss_G: {:.3f}]'.format(np.mean(epoch_critic_loss), np.mean(epoch_gen_loss)))
151 | 
152 |         # save weights every epoch
153 |         if False:
154 |             generator.save_weights(
155 |                 'mlp_generator_epoch_{0:03d}.hdf5'.format(epoch), True)
156 |             critic.save_weights(
157 |                 'mlp_critic_epoch_{0:03d}.hdf5'.format(epoch), True)
158 | 
159 |         # generate some digits to display
160 |         noise = np.random.uniform(-1, 1, (100, latent_size))
161 |         # get a batch to display
162 |         generated_images = generator.predict(noise, verbose=0)
163 | 
164 |         # arrange them into a grid
165 |         img = (np.concatenate([r.reshape(-1, 28)
166 |                                for r in np.split(generated_images, 10)
167 |                                ], axis=-1) * 127.5 + 127.5).astype(np.uint8)
168 | 
169 |         Image.fromarray(img).save(
170 |             'mlp_epoch_{0:03d}_generated.png'.format(epoch))
171 | 


--------------------------------------------------------------------------------
/xnornet/README.md:
--------------------------------------------------------------------------------
 1 | This is a keras implementation of XNOR Networks.
 2 | 
 3 | ## Run
 4 | ### train a XNOR MLP model on MNIST
 5 | python mnist_mlp.py 
 6 | ### train a XNOR CNN model on MNIST
 7 | python mnist_cnn.py 
 8 | 
 9 | ## Reference
10 | * Rastegari et al. [XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks](https://arxiv.org/abs/1603.05279)
11 | 


--------------------------------------------------------------------------------
/xnornet/binary_layers.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import numpy as np
  3 | 
  4 | from keras import backend as K
  5 | 
  6 | from keras.layers import InputSpec, Layer, Dense, Conv2D
  7 | from keras import constraints
  8 | from keras import initializers
  9 | 
 10 | from binary_ops import binarize
 11 | 
 12 | 
 13 | class Clip(constraints.Constraint):
 14 |     def __init__(self, min_value, max_value=None):
 15 |         self.min_value = min_value
 16 |         self.max_value = max_value
 17 |         if not self.max_value:
 18 |             self.max_value = -self.min_value
 19 |         if self.min_value > self.max_value:
 20 |             self.min_value, self.max_value = self.max_value, self.min_value
 21 | 
 22 |     def __call__(self, p):
 23 |         return K.clip(p, self.min_value, self.max_value)
 24 | 
 25 |     def get_config(self):
 26 |         return {"min_value": self.min_value,
 27 |                 "max_value": self.max_value}
 28 | 
 29 | 
 30 | class BinaryDense(Dense):
 31 |     ''' Binarized Dense layer
 32 |     References: 
 33 |     "BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830]
 34 |     '''
 35 |     def __init__(self, units, H=1., kernel_lr_multiplier='Glorot', bias_lr_multiplier=None, **kwargs):
 36 |         super(BinaryDense, self).__init__(units, **kwargs)
 37 |         self.H = H
 38 |         self.kernel_lr_multiplier = kernel_lr_multiplier
 39 |         self.bias_lr_multiplier = bias_lr_multiplier
 40 |         
 41 |         super(BinaryDense, self).__init__(units, **kwargs)
 42 |     
 43 |     def build(self, input_shape):
 44 |         assert len(input_shape) >= 2
 45 |         input_dim = input_shape[1]
 46 | 
 47 |         if self.H == 'Glorot':
 48 |             self.H = np.float32(np.sqrt(1.5 / (input_dim + self.units)))
 49 |             #print('Glorot H: {}'.format(self.H))
 50 |         if self.kernel_lr_multiplier == 'Glorot':
 51 |             self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5 / (input_dim + self.units)))
 52 |             #print('Glorot learning rate multiplier: {}'.format(self.lr_multiplier))
 53 |             
 54 |         self.kernel_constraint = Clip(-self.H, self.H)
 55 |         self.kernel_initializer = initializers.RandomUniform(-self.H, self.H)
 56 |         self.kernel = self.add_weight(shape=(input_dim, self.units),
 57 |                                      initializer=self.kernel_initializer,
 58 |                                      name='kernel',
 59 |                                      regularizer=self.kernel_regularizer,
 60 |                                      constraint=self.kernel_constraint)
 61 | 
 62 |         if self.use_bias:
 63 |             self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier]
 64 |             self.bias = self.add_weight(shape=(self.output_dim,),
 65 |                                      initializer=self.bias_initializer,
 66 |                                      name='bias',
 67 |                                      regularizer=self.bias_regularizer,
 68 |                                      constraint=self.bias_constraint)
 69 |         else:
 70 |             self.lr_multipliers = [self.kernel_lr_multiplier]
 71 |             self.bias = None
 72 |         self.built = True
 73 | 
 74 | 
 75 |     def call(self, inputs):
 76 |         binary_kernel = binarize(self.kernel, H=self.H)
 77 |         output = K.dot(inputs, binary_kernel)
 78 |         if self.use_bias:
 79 |             output = K.bias_add(output, self.bias)
 80 |         if self.activation is not None:
 81 |             output = self.activation(output)
 82 |         return output
 83 |         
 84 |     def get_config(self):
 85 |         config = {'H': self.H,
 86 |                   'W_lr_multiplier': self.W_lr_multiplier,
 87 |                   'b_lr_multiplier': self.b_lr_multiplier}
 88 |         base_config = super(BinaryDense, self).get_config()
 89 |         return dict(list(base_config.items()) + list(config.items()))
 90 | 
 91 | 
 92 | class BinaryConv2D(Conv2D):
 93 |     '''Binarized Convolution2D layer
 94 |     References: 
 95 |     "BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830]
 96 |     '''
 97 |     def __init__(self, filters, kernel_lr_multiplier='Glorot', 
 98 |                  bias_lr_multiplier=None, H=1., **kwargs):
 99 |         super(BinaryConv2D, self).__init__(filters, **kwargs)
100 |         self.H = H
101 |         self.kernel_lr_multiplier = kernel_lr_multiplier
102 |         self.bias_lr_multiplier = bias_lr_multiplier
103 |         
104 |         
105 |     def build(self, input_shape):
106 |         if self.data_format == 'channels_first':
107 |             channel_axis = 1
108 |         else:
109 |             channel_axis = -1 
110 |         if input_shape[channel_axis] is None:
111 |                 raise ValueError('The channel dimension of the inputs '
112 |                                  'should be defined. Found `None`.')
113 | 
114 |         input_dim = input_shape[channel_axis]
115 |         kernel_shape = self.kernel_size + (input_dim, self.filters)
116 |             
117 |         base = self.kernel_size[0] * self.kernel_size[1]
118 |         if self.H == 'Glorot':
119 |             nb_input = int(input_dim * base)
120 |             nb_output = int(self.filters * base)
121 |             self.H = np.float32(np.sqrt(1.5 / (nb_input + nb_output)))
122 |             #print('Glorot H: {}'.format(self.H))
123 |             
124 |         if self.kernel_lr_multiplier == 'Glorot':
125 |             nb_input = int(input_dim * base)
126 |             nb_output = int(self.filters * base)
127 |             self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5/ (nb_input + nb_output)))
128 |             #print('Glorot learning rate multiplier: {}'.format(self.lr_multiplier))
129 | 
130 |         self.kernel_constraint = Clip(-self.H, self.H)
131 |         self.kernel_initializer = initializers.RandomUniform(-self.H, self.H)
132 |         self.kernel = self.add_weight(shape=kernel_shape,
133 |                                  initializer=self.kernel_initializer,
134 |                                  name='kernel',
135 |                                  regularizer=self.kernel_regularizer,
136 |                                  constraint=self.kernel_constraint)
137 | 
138 |         if self.use_bias:
139 |             self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier]
140 |             self.bias = self.add_weight((self.output_dim,),
141 |                                      initializer=self.bias_initializers,
142 |                                      name='bias',
143 |                                      regularizer=self.bias_regularizer,
144 |                                      constraint=self.bias_constraint)
145 | 
146 |         else:
147 |             self.lr_multipliers = [self.kernel_lr_multiplier]
148 |             self.bias = None
149 | 
150 |         self.built = True
151 | 
152 |     def call(self, inputs):
153 |         binary_kernel = binarize(self.kernel, H=self.H) 
154 |         outputs = K.conv2d(
155 |             inputs,
156 |             binary_kernel,
157 |             strides=self.strides,
158 |             padding=self.padding,
159 |             data_format=self.data_format,
160 |             dilation_rate=self.dilation_rate)
161 | 
162 |         if self.use_bias:
163 |             outputs = K.bias_add(
164 |                 outputs,
165 |                 self.bias,
166 |                 data_format=self.data_format)
167 | 
168 |         if self.activation is not None:
169 |             return self.activation(outputs)
170 |         return outputs
171 |         
172 |     def get_config(self):
173 |         config = {'H': self.H,
174 |                   'kernel_lr_multiplier': self.kernel_lr_multiplier,
175 |                   'bias_lr_multiplier': self.bias_lr_multiplier}
176 |         base_config = super(BinaryConv2D, self).get_config()
177 |         return dict(list(base_config.items()) + list(config.items()))
178 | 
179 | 
180 | # Aliases
181 | 
182 | BinaryConvolution2D = BinaryConv2D
183 | 


--------------------------------------------------------------------------------
/xnornet/binary_ops.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | import keras.backend as K
 4 | 
 5 | 
 6 | def round_through(x):
 7 |     '''Element-wise rounding to the closest integer with full gradient propagation.
 8 |     A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182)
 9 |     '''
10 |     rounded = K.round(x)
11 |     return x + K.stop_gradient(rounded - x)
12 | 
13 | 
14 | def _hard_sigmoid(x):
15 |     '''Hard sigmoid different from the more conventional form (see definition of K.hard_sigmoid).
16 | 
17 |     # Reference:
18 |     - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830}
19 | 
20 |     '''
21 |     x = (0.5 * x) + 0.5
22 |     return K.clip(x, 0, 1)
23 | 
24 | 
25 | def binary_sigmoid(x):
26 |     '''Binary hard sigmoid for training binarized neural network.
27 | 
28 |     # Reference:
29 |     - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830}
30 | 
31 |     '''
32 |     return round_through(_hard_sigmoid(x))
33 | 
34 | 
35 | def binary_tanh(x):
36 |     '''Binary hard sigmoid for training binarized neural network.
37 |      The neurons' activations binarization function
38 |      It behaves like the sign function during forward propagation
39 |      And like:
40 |         hard_tanh(x) = 2 * _hard_sigmoid(x) - 1 
41 |         clear gradient when |x| > 1 during back propagation
42 | 
43 |     # Reference:
44 |     - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830}
45 | 
46 |     '''
47 |     return 2 * round_through(_hard_sigmoid(x)) - 1
48 | 
49 | 
50 | def binarize(W, H=1):
51 |     '''The weights' binarization function, 
52 | 
53 |     # Reference:
54 |     - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830}
55 | 
56 |     '''
57 |     # [-H, H] -> -H or H
58 |     Wb = H * binary_tanh(W / H)
59 |     return Wb
60 | 
61 | 
62 | def _mean_abs(x, axis=None, keepdims=False):
63 |     return K.stop_gradient(K.mean(K.abs(x), axis=axis, keepdims=keepdims))
64 | 
65 |     
66 | def xnorize(W, H=1., axis=None, keepdims=False):
67 |     Wb = binarize(W, H)
68 |     Wa = _mean_abs(W, axis, keepdims)
69 |     
70 |     return Wa, Wb
71 | 


--------------------------------------------------------------------------------
/xnornet/mnist_cnn.py:
--------------------------------------------------------------------------------
  1 | '''Trains a simple xnor CNN on the MNIST dataset.
  2 | Modified from keras' examples/mnist_mlp.py
  3 | Gets to 98.18% test accuracy after 20 epochs using tensorflow backend
  4 | '''
  5 | 
  6 | from __future__ import print_function
  7 | import numpy as np
  8 | np.random.seed(1337)  # for reproducibility
  9 | 
 10 | from keras.datasets import mnist
 11 | from keras.models import Sequential
 12 | from keras.layers import Dense, Dropout, Activation, BatchNormalization, MaxPooling2D
 13 | from keras.layers import Flatten
 14 | from keras.optimizers import SGD, Adam, RMSprop
 15 | from keras.callbacks import LearningRateScheduler
 16 | from keras.utils import np_utils
 17 | import keras.backend as K
 18 | K.set_image_data_format('channels_first')
 19 | 
 20 | 
 21 | from binary_ops import binary_tanh as binary_tanh_op
 22 | from xnor_layers import XnorDense, XnorConv2D
 23 | 
 24 | 
 25 | H = 1.
 26 | kernel_lr_multiplier = 'Glorot'
 27 | 
 28 | # nn
 29 | batch_size = 50
 30 | epochs = 20 
 31 | nb_channel = 1
 32 | img_rows = 28 
 33 | img_cols = 28 
 34 | nb_filters = 32 
 35 | nb_conv = 3
 36 | nb_pool = 2
 37 | nb_hid = 128
 38 | nb_classes = 10
 39 | use_bias = False
 40 | 
 41 | # learning rate schedule
 42 | lr_start = 1e-3
 43 | lr_end = 1e-4
 44 | lr_decay = (lr_end / lr_start)**(1. / epochs)
 45 | 
 46 | # BN
 47 | epsilon = 1e-6
 48 | momentum = 0.9
 49 | 
 50 | # dropout
 51 | p1 = 0.25
 52 | p2 = 0.5
 53 | 
 54 | # the data, shuffled and split between train and test sets
 55 | (X_train, y_train), (X_test, y_test) = mnist.load_data()
 56 | 
 57 | X_train = X_train.reshape(60000, 1, 28, 28)
 58 | X_test = X_test.reshape(10000, 1, 28, 28)
 59 | X_train = X_train.astype('float32')
 60 | X_test = X_test.astype('float32')
 61 | X_train /= 255
 62 | X_test /= 255
 63 | print(X_train.shape[0], 'train samples')
 64 | print(X_test.shape[0], 'test samples')
 65 | 
 66 | # convert class vectors to binary class matrices
 67 | Y_train = np_utils.to_categorical(y_train, nb_classes) * 2 - 1 # -1 or 1 for hinge loss
 68 | Y_test = np_utils.to_categorical(y_test, nb_classes) * 2 - 1
 69 | 
 70 | 
 71 | model = Sequential()
 72 | # conv1
 73 | model.add(XnorConv2D(128, kernel_size=(3, 3), input_shape=(nb_channel, img_rows, img_cols),
 74 |                      H=H, kernel_lr_multiplier=kernel_lr_multiplier, 
 75 |                      padding='same', use_bias=use_bias, name='conv1'))
 76 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn1'))
 77 | model.add(Activation('relu', name='act1'))
 78 | # conv2
 79 | model.add(XnorConv2D(128, kernel_size=(3, 3), H=H, kernel_lr_multiplier=kernel_lr_multiplier, 
 80 |                      padding='same', use_bias=use_bias, name='conv2'))
 81 | model.add(MaxPooling2D(pool_size=(2, 2), name='pool2'))
 82 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn2'))
 83 | model.add(Activation('relu', name='act2'))
 84 | # conv3
 85 | model.add(XnorConv2D(256, kernel_size=(3, 3), H=H, kernel_lr_multiplier=kernel_lr_multiplier, 
 86 |                      padding='same', use_bias=use_bias, name='conv3'))
 87 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn3'))
 88 | model.add(Activation('relu', name='act3'))
 89 | # conv4
 90 | model.add(XnorConv2D(256, kernel_size=(3, 3), H=H, kernel_lr_multiplier=kernel_lr_multiplier, 
 91 |                      padding='same', use_bias=use_bias, name='conv4'))
 92 | model.add(MaxPooling2D(pool_size=(2, 2), name='pool4'))
 93 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn4'))
 94 | model.add(Activation('relu', name='act4'))
 95 | model.add(Flatten())
 96 | # dense1
 97 | model.add(XnorDense(1024, H=H, kernel_lr_multiplier=kernel_lr_multiplier, use_bias=use_bias, name='dense5'))
 98 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn5'))
 99 | model.add(Activation('relu', name='act5'))
100 | # dense2
101 | model.add(XnorDense(nb_classes, H=H, kernel_lr_multiplier=kernel_lr_multiplier, use_bias=use_bias, name='dense6'))
102 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn6'))
103 | 
104 | opt = Adam(lr=lr_start) 
105 | model.compile(loss='squared_hinge', optimizer=opt, metrics=['acc'])
106 | model.summary()
107 | 
108 | lr_scheduler = LearningRateScheduler(lambda e: lr_start * lr_decay ** e)
109 | history = model.fit(X_train, Y_train,
110 |                     batch_size=batch_size, epochs=epochs,
111 |                     verbose=1, validation_data=(X_test, Y_test),
112 |                     callbacks=[lr_scheduler])
113 | score = model.evaluate(X_test, Y_test, verbose=0)
114 | print('Test score:', score[0])
115 | print('Test accuracy:', score[1])
116 | 


--------------------------------------------------------------------------------
/xnornet/mnist_mlp.py:
--------------------------------------------------------------------------------
 1 | '''Trains a simple xnor fully connected NN on the MNIST dataset.
 2 | Modified from keras' examples/mnist_mlp.py
 3 | Gets to 97.41% test accuracy after 20 epochs using tensorflow backend
 4 | '''
 5 | 
 6 | 
 7 | from __future__ import print_function
 8 | import numpy as np
 9 | np.random.seed(1337)  # for reproducibility
10 | 
11 | import keras.backend as K
12 | from keras.datasets import mnist
13 | from keras.models import Sequential
14 | from keras.layers import Dense, Dropout, Activation, BatchNormalization
15 | from keras.optimizers import SGD, Adam, RMSprop
16 | from keras.callbacks import LearningRateScheduler
17 | from keras.utils import np_utils
18 | 
19 | from xnor_layers import XnorDense
20 | 
21 | 
22 | batch_size = 100
23 | epochs = 20
24 | classes = 10
25 | 
26 | H = 'Glorot'
27 | kernel_lr_multiplier = 'Glorot'
28 | 
29 | # network
30 | num_unit = 2048
31 | num_hidden = 3
32 | use_bias = False
33 | 
34 | # learning rate schedule
35 | lr_start = 1e-3
36 | lr_end = 1e-4
37 | lr_decay = (lr_end / lr_start)**(1. / epochs)
38 | 
39 | # BN
40 | epsilon = 1e-6
41 | momentum = 0.9
42 | 
43 | # dropout
44 | drop_in = 0 #0.2
45 | drop_hidden = 0# 0.5
46 | 
47 | # the data, shuffled and split between train and test sets
48 | (X_train, y_train), (X_test, y_test) = mnist.load_data()
49 | 
50 | X_train = X_train.reshape(60000, 784)
51 | X_test = X_test.reshape(10000, 784)
52 | X_train = X_train.astype('float32')
53 | X_test = X_test.astype('float32')
54 | X_train /= 255
55 | X_test /= 255
56 | print(X_train.shape[0], 'train samples')
57 | print(X_test.shape[0], 'test samples')
58 | 
59 | # convert class vectors to binary class matrices
60 | Y_train = np_utils.to_categorical(y_train, classes) * 2 - 1 # -1 or 1 for hinge loss
61 | Y_test = np_utils.to_categorical(y_test, classes) * 2 - 1
62 | 
63 | model = Sequential()
64 | model.add(Dropout(drop_in, input_shape=(784,), name='drop0'))
65 | for i in range(num_hidden):
66 |     model.add(XnorDense(num_unit, H=H, kernel_lr_multiplier=kernel_lr_multiplier, use_bias=use_bias,
67 |               name='dense{}'.format(i+1)))
68 |     model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn{}'.format(i+1)))
69 |     model.add(Activation('relu', name='act{}'.format(i+1)))
70 |     model.add(Dropout(drop_hidden, name='drop{}'.format(i+1)))
71 | model.add(XnorDense(10, H=H, kernel_lr_multiplier=kernel_lr_multiplier, use_bias=use_bias,
72 |           name='dense'))
73 | model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn'))
74 | 
75 | model.summary()
76 | 
77 | opt = Adam(lr=lr_start) 
78 | model.compile(loss='squared_hinge', optimizer=opt, metrics=['acc'])
79 | 
80 | lr_scheduler = LearningRateScheduler(lambda e: lr_start * lr_decay ** e)
81 | history = model.fit(X_train, Y_train,
82 |                     batch_size=batch_size, epochs=epochs,
83 |                     verbose=1, validation_data=(X_test, Y_test),
84 |                     callbacks=[lr_scheduler])
85 | score = model.evaluate(X_test, Y_test, verbose=0)
86 | print('Test score:', score[0])
87 | print('Test accuracy:', score[1])
88 | 


--------------------------------------------------------------------------------
/xnornet/xnor_layers.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import numpy as np
 3 | from keras import backend as K
 4 | from binary_ops import xnorize
 5 | 
 6 | from binary_layers import BinaryDense, BinaryConv2D
 7 | 
 8 | 
 9 | class XnorDense(BinaryDense):
10 |     '''XNOR Dense layer
11 |     References: 
12 |     - [XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks](http://arxiv.org/abs/1603.05279)
13 |     '''
14 |     def call(self, inputs, mask=None):
15 |         inputs_a, inputs_b = xnorize(inputs, 1., axis=1, keepdims=True) # (nb_sample, 1)
16 |         kernel_a, kernel_b = xnorize(self.kernel, self.H, axis=0, keepdims=True) # (1, units)
17 |         output = K.dot(inputs_b, kernel_b) * kernel_a * inputs_a
18 |         if self.use_bias:
19 |             output = K.bias_add(output, self.bias)
20 |         if self.activation is not None:
21 |             output = self.activation(output)
22 |         return output
23 | 
24 | 
25 | class XnorConv2D(BinaryConv2D):
26 |     '''XNOR Conv2D layer
27 |     References: 
28 |     - [XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks](http://arxiv.org/abs/1603.05279)
29 |     '''
30 |     def call(self, inputs):
31 |         _, kernel_b = xnorize(self.kernel, self.H)
32 |         _, inputs_b = xnorize(inputs)
33 |         outputs = K.conv2d(inputs_b, kernel_b, strides=self.strides,
34 |                            padding=self.padding,
35 |                            data_format=self.data_format,
36 |                            dilation_rate=self.dilation_rate)
37 | 
38 |         # calculate Wa and xa
39 |         
40 |         # kernel_a
41 |         mask = K.reshape(self.kernel, (-1, self.filters)) # self.nb_row * self.nb_col * channels, filters 
42 |         kernel_a = K.stop_gradient(K.mean(K.abs(mask), axis=0)) # filters
43 |         
44 |         # inputs_a
45 |         if self.data_format == 'channels_first':
46 |             channel_axis = 1
47 |         else:
48 |             channel_axis = -1 
49 |         mask = K.mean(K.abs(inputs), axis=channel_axis, keepdims=True) 
50 |         ones = K.ones(self.kernel_size + (1, 1))
51 |         inputs_a = K.conv2d(mask, ones, strides=self.strides,
52 |                       padding=self.padding,
53 |                       data_format=self.data_format,
54 |                       dilation_rate=self.dilation_rate) # nb_sample, 1, new_nb_row, new_nb_col
55 |         if self.data_format == 'channels_first':
56 |             outputs = outputs * K.stop_gradient(inputs_a) * K.expand_dims(K.expand_dims(K.expand_dims(kernel_a, 0), -1), -1)
57 |         else:
58 |             outputs = outputs * K.stop_gradient(inputs_a) * K.expand_dims(K.expand_dims(K.expand_dims(kernel_a, 0), 0), 0)
59 |                                 
60 |         if self.use_bias:
61 |             outputs = K.bias_add(
62 |                 outputs,
63 |                 self.bias,
64 |                 data_format=self.data_format)
65 | 
66 |         if self.activation is not None:
67 |             return self.activation(outputs)
68 |         return outputs
69 | 
70 | 
71 | # Aliases
72 | 
73 | XnorConvolution2D = XnorConv2D
74 | 


--------------------------------------------------------------------------------