├── .gitignore
├── LICENSE
├── train.py
├── README.md
└── elm.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.h5
3 | .vscode/
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Otenim
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | from keras.datasets import mnist
  2 | from keras.utils import to_categorical
  3 | from elm import ELM, load_model
  4 | import argparse
  5 | import os
  6 | import numpy as np
  7 | 
  8 | parser = argparse.ArgumentParser()
  9 | parser.add_argument('--n_hidden_nodes', type=int, default=1024)
 10 | parser.add_argument('--loss',
 11 |     choices=['mean_squared_error', 'mean_absolute_error'],
 12 |     default='mean_squared_error',
 13 | )
 14 | parser.add_argument('--activation',
 15 |     choices=['sigmoid', 'identity'],
 16 |     default='sigmoid',
 17 | )
 18 | 
 19 | def softmax(x):
 20 |     c = np.max(x, axis=-1)
 21 |     upper = np.exp(x - c)
 22 |     lower = np.sum(upper, axis=-1)
 23 |     return upper / lower
 24 | 
 25 | def main(args):
 26 |     # ===============================
 27 |     # Load dataset
 28 |     # ===============================
 29 |     n_classes = 10
 30 |     (x_train, t_train), (x_test, t_test) = mnist.load_data()
 31 | 
 32 |     # ===============================
 33 |     # Preprocess
 34 |     # ===============================
 35 |     x_train = x_train.astype(np.float32) / 255.
 36 |     x_train = x_train.reshape(-1, 28**2)
 37 |     x_test = x_test.astype(np.float32) / 255.
 38 |     x_test = x_test.reshape(-1, 28**2)
 39 |     t_train = to_categorical(t_train, n_classes).astype(np.float32)
 40 |     t_test = to_categorical(t_test, n_classes).astype(np.float32)
 41 | 
 42 |     # ===============================
 43 |     # Instantiate ELM
 44 |     # ===============================
 45 |     model = ELM(
 46 |         n_input_nodes=28**2,
 47 |         n_hidden_nodes=args.n_hidden_nodes,
 48 |         n_output_nodes=n_classes,
 49 |         loss=args.loss,
 50 |         activation=args.activation,
 51 |         name='elm',
 52 |     )
 53 | 
 54 |     # ===============================
 55 |     # Training
 56 |     # ===============================
 57 |     model.fit(x_train, t_train)
 58 |     train_loss, train_acc, train_uar = model.evaluate(x_train, t_train, metrics=['loss', 'accuracy', 'uar'])
 59 |     print('train_loss: %f' % train_loss) # loss value
 60 |     print('train_acc: %f' % train_acc) # accuracy
 61 |     print('train_uar: %f' % train_uar) # uar (unweighted average recall)
 62 | 
 63 |     # ===============================
 64 |     # Validation
 65 |     # ===============================
 66 |     val_loss, val_acc, val_uar = model.evaluate(x_test, t_test, metrics=['loss', 'accuracy', 'uar'])
 67 |     print('val_loss: %f' % val_loss)
 68 |     print('val_acc: %f' % val_acc)
 69 |     print('val_uar: %f' % val_uar)
 70 | 
 71 |     # ===============================
 72 |     # Prediction
 73 |     # ===============================
 74 |     x = x_test[:10]
 75 |     t = t_test[:10]
 76 |     y = softmax(model.predict(x))
 77 | 
 78 |     for i in range(len(y)):
 79 |         print('---------- prediction %d ----------' % (i+1))
 80 |         class_pred = np.argmax(y[i])
 81 |         prob_pred = y[i][class_pred]
 82 |         class_true = np.argmax(t[i])
 83 |         print('prediction:')
 84 |         print('\tclass: %d, probability: %f' % (class_pred, prob_pred))
 85 |         print('\tclass (true): %d' % class_true)
 86 | 
 87 |     # ===============================
 88 |     # Save model
 89 |     # ===============================
 90 |     print('saving model...')
 91 |     model.save('model.h5')
 92 |     del model
 93 | 
 94 |     # ===============================
 95 |     # Load model
 96 |     # ===============================
 97 |     print('loading model...')
 98 |     model = load_model('model.h5')
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     args = parser.parse_args()
103 |     main(args)
104 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Numpy-ELM
  2 | 
  3 | ## Overview
  4 | 
  5 | <div align="center">
  6 |     <img src="https://i.imgur.com/YdgQOlH.png" width=600>
  7 | </div>
  8 | 
  9 | In this repository, we provide a keras-like numpy implementation of Extreme Learning Machine (ELM)
 10 | introduced by Huang et al. in this [paper](http://ieeexplore.ieee.org/document/1380068/?reload=true).
 11 | 
 12 | ELM is a neural-network-based new learning schieme which can learn fast.
 13 | The training will always converge to the global optimal solution,
 14 | while ordinary backpropagation-based neural networks have to deal with
 15 | the local minima problem.
 16 | 
 17 | ## Dependencies
 18 | 
 19 | We tested our codes using the following libraries.
 20 | 
 21 | * Python 3.6.0
 22 | * Numpy 1.14.1
 23 | * Keras (tensorflow backend) 2.1.5
 24 | * Tensorflow 1.7.0
 25 | 
 26 | We used Keras only for downloading the MNIST dataset.
 27 | 
 28 | You don't have to use exactly the same version of the each library,
 29 | but we can not guarantee the codes work well in the case.
 30 | 
 31 | All the above libraries can be installed in the following command.
 32 | 
 33 | `$ pip install -U numpy Keras tensorflow`
 34 | 
 35 | ## Usage
 36 | 
 37 | Here, we show how to train an ELM model and predict on it.
 38 | For the sake of simplicity, we assume to train the model on MNIST, a
 39 | hand-written digits dataset.
 40 | 
 41 | ```python
 42 | from keras.datasets import mnist
 43 | from keras.utils import to_categorical
 44 | from elm import ELM, load_model
 45 | import argparse
 46 | import os
 47 | import numpy as np
 48 | 
 49 | parser = argparse.ArgumentParser()
 50 | parser.add_argument('--n_hidden_nodes', type=int, default=1024)
 51 | parser.add_argument('--loss',
 52 |     choices=['mean_squared_error', 'mean_absolute_error'],
 53 |     default='mean_squared_error',
 54 | )
 55 | parser.add_argument('--activation',
 56 |     choices=['sigmoid', 'identity'],
 57 |     default='sigmoid',
 58 | )
 59 | 
 60 | def softmax(x):
 61 |     c = np.max(x, axis=-1)
 62 |     upper = np.exp(x - c)
 63 |     lower = np.sum(upper, axis=-1)
 64 |     return upper / lower
 65 | 
 66 | def main(args):
 67 |     # ===============================
 68 |     # Load dataset
 69 |     # ===============================
 70 |     n_classes = 10
 71 |     (x_train, t_train), (x_test, t_test) = mnist.load_data()
 72 | 
 73 |     # ===============================
 74 |     # Preprocess
 75 |     # ===============================
 76 |     x_train = x_train.astype(np.float32) / 255.
 77 |     x_train = x_train.reshape(-1, 28**2)
 78 |     x_test = x_test.astype(np.float32) / 255.
 79 |     x_test = x_test.reshape(-1, 28**2)
 80 |     t_train = to_categorical(t_train, n_classes).astype(np.float32)
 81 |     t_test = to_categorical(t_test, n_classes).astype(np.float32)
 82 | 
 83 |     # ===============================
 84 |     # Instantiate ELM
 85 |     # ===============================
 86 |     model = ELM(
 87 |         n_input_nodes=28**2,
 88 |         n_hidden_nodes=args.n_hidden_nodes,
 89 |         n_output_nodes=n_classes,
 90 |         loss=args.loss,
 91 |         activation=args.activation,
 92 |         name='elm',
 93 |     )
 94 | 
 95 |     # ===============================
 96 |     # Training
 97 |     # ===============================
 98 |     model.fit(x_train, t_train)
 99 |     train_loss, train_acc = model.evaluate(x_train, t_train, metrics=['loss', 'accuracy'])
100 |     print('train_loss: %f' % train_loss)
101 |     print('train_acc: %f' % train_acc)
102 | 
103 |     # ===============================
104 |     # Validation
105 |     # ===============================
106 |     val_loss, val_acc = model.evaluate(x_test, t_test, metrics=['loss', 'accuracy'])
107 |     print('val_loss: %f' % val_loss)
108 |     print('val_acc: %f' % val_acc)
109 | 
110 |     # ===============================
111 |     # Prediction
112 |     # ===============================
113 |     x = x_test[:10]
114 |     t = t_test[:10]
115 |     y = softmax(model.predict(x))
116 | 
117 |     for i in range(len(y)):
118 |         print('---------- prediction %d ----------' % (i+1))
119 |         class_pred = np.argmax(y[i])
120 |         prob_pred = y[i][class_pred]
121 |         class_true = np.argmax(t[i])
122 |         print('prediction:')
123 |         print('\tclass: %d, probability: %f' % (class_pred, prob_pred))
124 |         print('\tclass (true): %d' % class_true)
125 | 
126 |     # ===============================
127 |     # Save model
128 |     # ===============================
129 |     print('saving model...')
130 |     model.save('model.h5')
131 |     del model
132 | 
133 |     # ===============================
134 |     # Load model
135 |     # ===============================
136 |     print('loading model...')
137 |     model = load_model('model.h5')
138 | 
139 | 
140 | if __name__ == '__main__':
141 |     args = parser.parse_args()
142 |     main(args)
143 | ```
144 | 
145 | ## Notes
146 | 
147 | * In ELM, you can apply an activation function only to the hidden nodes.
148 | * ELM always finds the global optimal solution for the weight matrices at every training.
149 | * ELM does not need to train iteratively on the same data samples,
150 | while backpropagation-based models usually need to do that.
151 | * ELM does not update 'alpha', the weight matrix connecting the input nodes
152 | and the hidden nodes. It reduces the computational cost by half.
153 | * ELM does not need to compute gradients. The weight matrices are trained by
154 | computing a pseudoinverse.
155 | * The computational complexity for the matrix inversion is about O(batch\_size^3 \* n\_hidden\_nodes),
156 | so take care for the cost when you increase batch\_size.
157 | 
158 | ## Demo
159 | 
160 | You can execute the above sample code with the following command.
161 | 
162 | `$ python train.py [--n_hidden_nodes] [--activation] [--loss]`
163 | 
164 | * `--n_hidden_nodes`: (optional) the number of hidden nodes (default: 1024).
165 | * `--activation`: (optional) an activation function applied to the hidden nodes.
166 | we currently support `sigmoid` and `identity` (default: sigmoid).
167 | * `--loss`: (optional) a loss function applied to the output nodes.
168 | we currently support `mean_squared_error` and 'mean_absolute_error' (default: mean_squared_error).
169 | 
170 | ex) `$ python train.py --n_hidden_nodes 2048 --activation sigmoid --loss mean_absolute_error`
171 | 
172 | ## Todos
173 | 
174 | * support more activation functions
175 | * support more loss functions
176 | * provide benchmark results
177 | 


--------------------------------------------------------------------------------
/elm.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import h5py
  3 | 
  4 | def _mean_squared_error(y_true, y_pred):
  5 |     return 0.5 * np.mean((y_true - y_pred)**2)
  6 | 
  7 | def _mean_absolute_error(y_true, y_pred):
  8 |     return np.mean(np.abs(y_true - y_pred))
  9 | 
 10 | def _sigmoid(x):
 11 |     return 1. / (1. + np.exp(-x))
 12 | 
 13 | def _identity(x):
 14 |     return x
 15 | 
 16 | class ELM(object):
 17 |     def __init__(
 18 |         self, n_input_nodes, n_hidden_nodes, n_output_nodes,
 19 |         activation='sigmoid', loss='mean_squared_error', name=None,
 20 |         beta_init=None, alpha_init=None, bias_init=None):
 21 | 
 22 |         self.name = name
 23 |         self.__n_input_nodes = n_input_nodes
 24 |         self.__n_hidden_nodes = n_hidden_nodes
 25 |         self.__n_output_nodes = n_output_nodes
 26 | 
 27 |         # initialize weights and a bias
 28 |         if isinstance(beta_init, np.ndarray):
 29 |             if beta_init.shape != (self.__n_hidden_nodes, self.__n_output_nodes):
 30 |                 raise ValueError(
 31 |                     'the shape of beta_init is expected to be (%d,%d).' % (self.__n_hidden_nodes, self.__n_output_nodes)
 32 |                 )
 33 |             self.__beta = beta_init
 34 |         else:
 35 |             self.__beta = np.random.uniform(-1.,1.,size=(self.__n_hidden_nodes, self.__n_output_nodes))
 36 |         if isinstance(alpha_init, np.ndarray):
 37 |             if alpha_init.shape != (self.__n_input_nodes, self.__n_hidden_nodes):
 38 |                 raise ValueError(
 39 |                     'the shape of alpha_init is expected to be (%d,%d).' % (self.__n_hidden_nodes, self.__n_output_nodes)
 40 |                 )
 41 |             self.__alpha = alpha_init
 42 |         else:
 43 |             self.__alpha = np.random.uniform(-1.,1.,size=(self.__n_input_nodes, self.__n_hidden_nodes))
 44 |         if isinstance(bias_init, np.ndarray):
 45 |             if bias_init.shape != (self.__n_hidden_nodes,):
 46 |                 raise ValueError(
 47 |                     'the shape of bias_init is expected to be (%d,).' % (self.__n_hidden_nodes,)
 48 |                 )
 49 |             self.__bias = bias_init
 50 |         else:
 51 |             self.__bias = np.zeros(shape=(self.__n_hidden_nodes,))
 52 | 
 53 |         # set an activation function
 54 |         self.__activation = self.__get_activation_function(activation)
 55 | 
 56 |         # set a loss function
 57 |         self.__loss = self.__get_loss_function(loss)
 58 | 
 59 |     def __call__(self, x):
 60 |         h = self.__activation(x.dot(self.__alpha) + self.__bias)
 61 |         return h.dot(self.__beta)
 62 | 
 63 |     def predict(self, x):
 64 |         return list(self(x))
 65 | 
 66 |     def evaluate(self, x, t, metrics=['loss']):
 67 |         y_pred = self.predict(x)
 68 |         y_true = t
 69 |         y_pred_argmax = np.argmax(y_pred, axis=-1)
 70 |         y_true_argmax = np.argmax(y_true, axis=-1)
 71 |         ret = []
 72 |         for m in metrics:
 73 |             if m == 'loss':
 74 |                 loss = self.__loss(y_true, y_pred)
 75 |                 ret.append(loss)
 76 |             elif m == 'accuracy':
 77 |                 acc = np.sum(y_pred_argmax == y_true_argmax) / len(t)
 78 |                 ret.append(acc)
 79 |             elif m == 'uar':
 80 |                 num_classes = len(t[0])
 81 |                 uar = []
 82 |                 for i in range(num_classes):
 83 |                     tp = np.sum((y_pred_argmax == i) & (y_true_argmax == i))
 84 |                     tp_fn = np.sum(y_true_argmax == i)
 85 |                     uar.append(tp / tp_fn)
 86 |                 uar = np.mean(uar)
 87 |                 ret.append(uar)
 88 |             else:
 89 |                 raise ValueError(
 90 |                     'an unknown evaluation indicator \'%s\'.' % m
 91 |                 )
 92 |         if len(ret) == 1:
 93 |             ret = ret[0]
 94 |         elif len(ret) == 0:
 95 |             ret = None
 96 |         return ret
 97 | 
 98 | 
 99 |     def fit(self, x, t):
100 |         H = self.__activation(x.dot(self.__alpha) + self.__bias)
101 | 
102 |         # compute a pseudoinverse of H
103 |         H_pinv = np.linalg.pinv(H)
104 | 
105 |         # update beta
106 |         self.__beta = H_pinv.dot(t)
107 | 
108 |     def save(self, filepath):
109 |         with h5py.File(filepath, 'w') as f:
110 |             arc = f.create_dataset('architecture', data=np.array([self.__n_input_nodes, self.__n_hidden_nodes, self.__n_output_nodes]))
111 |             arc.attrs['activation'] = self.__get_activation_name(self.__activation).encode('utf-8')
112 |             arc.attrs['loss'] = self.__get_loss_name(self.__loss).encode('utf-8')
113 |             arc.attrs['name'] = self.name.encode('utf-8')
114 |             f.create_group('weights')
115 |             f.create_dataset('weights/alpha', data=self.__alpha)
116 |             f.create_dataset('weights/beta', data=self.__beta)
117 |             f.create_dataset('weights/bias', data=self.__bias)
118 | 
119 |     def __get_activation_function(self, name):
120 |         if name == 'sigmoid':
121 |             return _sigmoid
122 |         elif name == 'identity':
123 |             return _identity
124 |         else:
125 |             raise ValueError(
126 |                 'an unknown activation function \'%s\'.' % name
127 |             )
128 | 
129 |     def __get_activation_name(self, activation):
130 |         if activation == _sigmoid:
131 |             return 'sigmoid'
132 |         elif activation == _identity:
133 |             return 'identity'
134 | 
135 |     def __get_loss_function(self, name):
136 |         if name == 'mean_squared_error':
137 |             return _mean_squared_error
138 |         elif name == 'mean_absolute_error':
139 |             return _mean_absolute_error
140 |         else:
141 |             raise ValueError(
142 |                 'an unknown loss function \'%s\'.' % name
143 |             )
144 | 
145 |     def __get_loss_name(self, loss):
146 |         if loss == _mean_squared_error:
147 |             return 'mean_squared_error'
148 |         elif loss == _mean_absolute_error:
149 |             return 'mean_absolute_error'
150 |     
151 |     @property
152 |     def weights(self):
153 |         return {
154 |             'alpha': self.__alpha,
155 |             'beta': self.__beta,
156 |             'bias': self.__bias,
157 |         }
158 | 
159 |     @property
160 |     def input_shape(self):
161 |         return (self.__n_input_nodes,)
162 | 
163 |     @property
164 |     def output_shape(self):
165 |         return (self.__n_output_nodes,)
166 | 
167 |     @property
168 |     def n_input_nodes(self):
169 |         return self.__n_input_nodes
170 | 
171 |     @property
172 |     def n_hidden_nodes(self):
173 |         return self.__n_hidden_nodes
174 | 
175 |     @property
176 |     def n_output_nodes(self):
177 |         return self.__n_output_nodes
178 | 
179 |     @property
180 |     def activation(self):
181 |         return self.__get_activation_name(self.__activation)
182 | 
183 |     @property
184 |     def loss(self):
185 |         return self.__get_loss_name(self.__loss)
186 | 
187 | def load_model(filepath):
188 |     with h5py.File(filepath, 'r') as f:
189 |         alpha_init = f['weights/alpha'][...]
190 |         beta_init = f['weights/beta'][...]
191 |         bias_init = f['weights/bias'][...]
192 |         arc = f['architecture']
193 |         n_input_nodes = arc[0]
194 |         n_hidden_nodes = arc[1]
195 |         n_output_nodes = arc[2]
196 |         activation = arc.attrs['activation'].decode('utf-8')
197 |         loss = arc.attrs['loss'].decode('utf-8')
198 |         name = arc.attrs['name'].decode('utf-8')
199 |         model = ELM(
200 |             n_input_nodes=n_input_nodes,
201 |             n_hidden_nodes=n_hidden_nodes,
202 |             n_output_nodes=n_output_nodes,
203 |             activation=activation,
204 |             loss=loss,
205 |             alpha_init=alpha_init,
206 |             beta_init=beta_init,
207 |             bias_init=bias_init,
208 |             name=name,
209 |         )
210 |     return model
211 | 


--------------------------------------------------------------------------------