├── .gitignore ├── LICENSE ├── train.py ├── README.md └── elm.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.h5 3 | .vscode/ 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Otenim 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from keras.datasets import mnist 2 | from keras.utils import to_categorical 3 | from elm import ELM, load_model 4 | import argparse 5 | import os 6 | import numpy as np 7 | 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('--n_hidden_nodes', type=int, default=1024) 10 | parser.add_argument('--loss', 11 | choices=['mean_squared_error', 'mean_absolute_error'], 12 | default='mean_squared_error', 13 | ) 14 | parser.add_argument('--activation', 15 | choices=['sigmoid', 'identity'], 16 | default='sigmoid', 17 | ) 18 | 19 | def softmax(x): 20 | c = np.max(x, axis=-1) 21 | upper = np.exp(x - c) 22 | lower = np.sum(upper, axis=-1) 23 | return upper / lower 24 | 25 | def main(args): 26 | # =============================== 27 | # Load dataset 28 | # =============================== 29 | n_classes = 10 30 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 31 | 32 | # =============================== 33 | # Preprocess 34 | # =============================== 35 | x_train = x_train.astype(np.float32) / 255. 36 | x_train = x_train.reshape(-1, 28**2) 37 | x_test = x_test.astype(np.float32) / 255. 38 | x_test = x_test.reshape(-1, 28**2) 39 | t_train = to_categorical(t_train, n_classes).astype(np.float32) 40 | t_test = to_categorical(t_test, n_classes).astype(np.float32) 41 | 42 | # =============================== 43 | # Instantiate ELM 44 | # =============================== 45 | model = ELM( 46 | n_input_nodes=28**2, 47 | n_hidden_nodes=args.n_hidden_nodes, 48 | n_output_nodes=n_classes, 49 | loss=args.loss, 50 | activation=args.activation, 51 | name='elm', 52 | ) 53 | 54 | # =============================== 55 | # Training 56 | # =============================== 57 | model.fit(x_train, t_train) 58 | train_loss, train_acc, train_uar = model.evaluate(x_train, t_train, metrics=['loss', 'accuracy', 'uar']) 59 | print('train_loss: %f' % train_loss) # loss value 60 | print('train_acc: %f' % train_acc) # accuracy 61 | print('train_uar: %f' % train_uar) # uar (unweighted average recall) 62 | 63 | # =============================== 64 | # Validation 65 | # =============================== 66 | val_loss, val_acc, val_uar = model.evaluate(x_test, t_test, metrics=['loss', 'accuracy', 'uar']) 67 | print('val_loss: %f' % val_loss) 68 | print('val_acc: %f' % val_acc) 69 | print('val_uar: %f' % val_uar) 70 | 71 | # =============================== 72 | # Prediction 73 | # =============================== 74 | x = x_test[:10] 75 | t = t_test[:10] 76 | y = softmax(model.predict(x)) 77 | 78 | for i in range(len(y)): 79 | print('---------- prediction %d ----------' % (i+1)) 80 | class_pred = np.argmax(y[i]) 81 | prob_pred = y[i][class_pred] 82 | class_true = np.argmax(t[i]) 83 | print('prediction:') 84 | print('\tclass: %d, probability: %f' % (class_pred, prob_pred)) 85 | print('\tclass (true): %d' % class_true) 86 | 87 | # =============================== 88 | # Save model 89 | # =============================== 90 | print('saving model...') 91 | model.save('model.h5') 92 | del model 93 | 94 | # =============================== 95 | # Load model 96 | # =============================== 97 | print('loading model...') 98 | model = load_model('model.h5') 99 | 100 | 101 | if __name__ == '__main__': 102 | args = parser.parse_args() 103 | main(args) 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Numpy-ELM 2 | 3 | ## Overview 4 | 5 |
6 | 7 |
8 | 9 | In this repository, we provide a keras-like numpy implementation of Extreme Learning Machine (ELM) 10 | introduced by Huang et al. in this [paper](http://ieeexplore.ieee.org/document/1380068/?reload=true). 11 | 12 | ELM is a neural-network-based new learning schieme which can learn fast. 13 | The training will always converge to the global optimal solution, 14 | while ordinary backpropagation-based neural networks have to deal with 15 | the local minima problem. 16 | 17 | ## Dependencies 18 | 19 | We tested our codes using the following libraries. 20 | 21 | * Python 3.6.0 22 | * Numpy 1.14.1 23 | * Keras (tensorflow backend) 2.1.5 24 | * Tensorflow 1.7.0 25 | 26 | We used Keras only for downloading the MNIST dataset. 27 | 28 | You don't have to use exactly the same version of the each library, 29 | but we can not guarantee the codes work well in the case. 30 | 31 | All the above libraries can be installed in the following command. 32 | 33 | `$ pip install -U numpy Keras tensorflow` 34 | 35 | ## Usage 36 | 37 | Here, we show how to train an ELM model and predict on it. 38 | For the sake of simplicity, we assume to train the model on MNIST, a 39 | hand-written digits dataset. 40 | 41 | ```python 42 | from keras.datasets import mnist 43 | from keras.utils import to_categorical 44 | from elm import ELM, load_model 45 | import argparse 46 | import os 47 | import numpy as np 48 | 49 | parser = argparse.ArgumentParser() 50 | parser.add_argument('--n_hidden_nodes', type=int, default=1024) 51 | parser.add_argument('--loss', 52 | choices=['mean_squared_error', 'mean_absolute_error'], 53 | default='mean_squared_error', 54 | ) 55 | parser.add_argument('--activation', 56 | choices=['sigmoid', 'identity'], 57 | default='sigmoid', 58 | ) 59 | 60 | def softmax(x): 61 | c = np.max(x, axis=-1) 62 | upper = np.exp(x - c) 63 | lower = np.sum(upper, axis=-1) 64 | return upper / lower 65 | 66 | def main(args): 67 | # =============================== 68 | # Load dataset 69 | # =============================== 70 | n_classes = 10 71 | (x_train, t_train), (x_test, t_test) = mnist.load_data() 72 | 73 | # =============================== 74 | # Preprocess 75 | # =============================== 76 | x_train = x_train.astype(np.float32) / 255. 77 | x_train = x_train.reshape(-1, 28**2) 78 | x_test = x_test.astype(np.float32) / 255. 79 | x_test = x_test.reshape(-1, 28**2) 80 | t_train = to_categorical(t_train, n_classes).astype(np.float32) 81 | t_test = to_categorical(t_test, n_classes).astype(np.float32) 82 | 83 | # =============================== 84 | # Instantiate ELM 85 | # =============================== 86 | model = ELM( 87 | n_input_nodes=28**2, 88 | n_hidden_nodes=args.n_hidden_nodes, 89 | n_output_nodes=n_classes, 90 | loss=args.loss, 91 | activation=args.activation, 92 | name='elm', 93 | ) 94 | 95 | # =============================== 96 | # Training 97 | # =============================== 98 | model.fit(x_train, t_train) 99 | train_loss, train_acc = model.evaluate(x_train, t_train, metrics=['loss', 'accuracy']) 100 | print('train_loss: %f' % train_loss) 101 | print('train_acc: %f' % train_acc) 102 | 103 | # =============================== 104 | # Validation 105 | # =============================== 106 | val_loss, val_acc = model.evaluate(x_test, t_test, metrics=['loss', 'accuracy']) 107 | print('val_loss: %f' % val_loss) 108 | print('val_acc: %f' % val_acc) 109 | 110 | # =============================== 111 | # Prediction 112 | # =============================== 113 | x = x_test[:10] 114 | t = t_test[:10] 115 | y = softmax(model.predict(x)) 116 | 117 | for i in range(len(y)): 118 | print('---------- prediction %d ----------' % (i+1)) 119 | class_pred = np.argmax(y[i]) 120 | prob_pred = y[i][class_pred] 121 | class_true = np.argmax(t[i]) 122 | print('prediction:') 123 | print('\tclass: %d, probability: %f' % (class_pred, prob_pred)) 124 | print('\tclass (true): %d' % class_true) 125 | 126 | # =============================== 127 | # Save model 128 | # =============================== 129 | print('saving model...') 130 | model.save('model.h5') 131 | del model 132 | 133 | # =============================== 134 | # Load model 135 | # =============================== 136 | print('loading model...') 137 | model = load_model('model.h5') 138 | 139 | 140 | if __name__ == '__main__': 141 | args = parser.parse_args() 142 | main(args) 143 | ``` 144 | 145 | ## Notes 146 | 147 | * In ELM, you can apply an activation function only to the hidden nodes. 148 | * ELM always finds the global optimal solution for the weight matrices at every training. 149 | * ELM does not need to train iteratively on the same data samples, 150 | while backpropagation-based models usually need to do that. 151 | * ELM does not update 'alpha', the weight matrix connecting the input nodes 152 | and the hidden nodes. It reduces the computational cost by half. 153 | * ELM does not need to compute gradients. The weight matrices are trained by 154 | computing a pseudoinverse. 155 | * The computational complexity for the matrix inversion is about O(batch\_size^3 \* n\_hidden\_nodes), 156 | so take care for the cost when you increase batch\_size. 157 | 158 | ## Demo 159 | 160 | You can execute the above sample code with the following command. 161 | 162 | `$ python train.py [--n_hidden_nodes] [--activation] [--loss]` 163 | 164 | * `--n_hidden_nodes`: (optional) the number of hidden nodes (default: 1024). 165 | * `--activation`: (optional) an activation function applied to the hidden nodes. 166 | we currently support `sigmoid` and `identity` (default: sigmoid). 167 | * `--loss`: (optional) a loss function applied to the output nodes. 168 | we currently support `mean_squared_error` and 'mean_absolute_error' (default: mean_squared_error). 169 | 170 | ex) `$ python train.py --n_hidden_nodes 2048 --activation sigmoid --loss mean_absolute_error` 171 | 172 | ## Todos 173 | 174 | * support more activation functions 175 | * support more loss functions 176 | * provide benchmark results 177 | -------------------------------------------------------------------------------- /elm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import h5py 3 | 4 | def _mean_squared_error(y_true, y_pred): 5 | return 0.5 * np.mean((y_true - y_pred)**2) 6 | 7 | def _mean_absolute_error(y_true, y_pred): 8 | return np.mean(np.abs(y_true - y_pred)) 9 | 10 | def _sigmoid(x): 11 | return 1. / (1. + np.exp(-x)) 12 | 13 | def _identity(x): 14 | return x 15 | 16 | class ELM(object): 17 | def __init__( 18 | self, n_input_nodes, n_hidden_nodes, n_output_nodes, 19 | activation='sigmoid', loss='mean_squared_error', name=None, 20 | beta_init=None, alpha_init=None, bias_init=None): 21 | 22 | self.name = name 23 | self.__n_input_nodes = n_input_nodes 24 | self.__n_hidden_nodes = n_hidden_nodes 25 | self.__n_output_nodes = n_output_nodes 26 | 27 | # initialize weights and a bias 28 | if isinstance(beta_init, np.ndarray): 29 | if beta_init.shape != (self.__n_hidden_nodes, self.__n_output_nodes): 30 | raise ValueError( 31 | 'the shape of beta_init is expected to be (%d,%d).' % (self.__n_hidden_nodes, self.__n_output_nodes) 32 | ) 33 | self.__beta = beta_init 34 | else: 35 | self.__beta = np.random.uniform(-1.,1.,size=(self.__n_hidden_nodes, self.__n_output_nodes)) 36 | if isinstance(alpha_init, np.ndarray): 37 | if alpha_init.shape != (self.__n_input_nodes, self.__n_hidden_nodes): 38 | raise ValueError( 39 | 'the shape of alpha_init is expected to be (%d,%d).' % (self.__n_hidden_nodes, self.__n_output_nodes) 40 | ) 41 | self.__alpha = alpha_init 42 | else: 43 | self.__alpha = np.random.uniform(-1.,1.,size=(self.__n_input_nodes, self.__n_hidden_nodes)) 44 | if isinstance(bias_init, np.ndarray): 45 | if bias_init.shape != (self.__n_hidden_nodes,): 46 | raise ValueError( 47 | 'the shape of bias_init is expected to be (%d,).' % (self.__n_hidden_nodes,) 48 | ) 49 | self.__bias = bias_init 50 | else: 51 | self.__bias = np.zeros(shape=(self.__n_hidden_nodes,)) 52 | 53 | # set an activation function 54 | self.__activation = self.__get_activation_function(activation) 55 | 56 | # set a loss function 57 | self.__loss = self.__get_loss_function(loss) 58 | 59 | def __call__(self, x): 60 | h = self.__activation(x.dot(self.__alpha) + self.__bias) 61 | return h.dot(self.__beta) 62 | 63 | def predict(self, x): 64 | return list(self(x)) 65 | 66 | def evaluate(self, x, t, metrics=['loss']): 67 | y_pred = self.predict(x) 68 | y_true = t 69 | y_pred_argmax = np.argmax(y_pred, axis=-1) 70 | y_true_argmax = np.argmax(y_true, axis=-1) 71 | ret = [] 72 | for m in metrics: 73 | if m == 'loss': 74 | loss = self.__loss(y_true, y_pred) 75 | ret.append(loss) 76 | elif m == 'accuracy': 77 | acc = np.sum(y_pred_argmax == y_true_argmax) / len(t) 78 | ret.append(acc) 79 | elif m == 'uar': 80 | num_classes = len(t[0]) 81 | uar = [] 82 | for i in range(num_classes): 83 | tp = np.sum((y_pred_argmax == i) & (y_true_argmax == i)) 84 | tp_fn = np.sum(y_true_argmax == i) 85 | uar.append(tp / tp_fn) 86 | uar = np.mean(uar) 87 | ret.append(uar) 88 | else: 89 | raise ValueError( 90 | 'an unknown evaluation indicator \'%s\'.' % m 91 | ) 92 | if len(ret) == 1: 93 | ret = ret[0] 94 | elif len(ret) == 0: 95 | ret = None 96 | return ret 97 | 98 | 99 | def fit(self, x, t): 100 | H = self.__activation(x.dot(self.__alpha) + self.__bias) 101 | 102 | # compute a pseudoinverse of H 103 | H_pinv = np.linalg.pinv(H) 104 | 105 | # update beta 106 | self.__beta = H_pinv.dot(t) 107 | 108 | def save(self, filepath): 109 | with h5py.File(filepath, 'w') as f: 110 | arc = f.create_dataset('architecture', data=np.array([self.__n_input_nodes, self.__n_hidden_nodes, self.__n_output_nodes])) 111 | arc.attrs['activation'] = self.__get_activation_name(self.__activation).encode('utf-8') 112 | arc.attrs['loss'] = self.__get_loss_name(self.__loss).encode('utf-8') 113 | arc.attrs['name'] = self.name.encode('utf-8') 114 | f.create_group('weights') 115 | f.create_dataset('weights/alpha', data=self.__alpha) 116 | f.create_dataset('weights/beta', data=self.__beta) 117 | f.create_dataset('weights/bias', data=self.__bias) 118 | 119 | def __get_activation_function(self, name): 120 | if name == 'sigmoid': 121 | return _sigmoid 122 | elif name == 'identity': 123 | return _identity 124 | else: 125 | raise ValueError( 126 | 'an unknown activation function \'%s\'.' % name 127 | ) 128 | 129 | def __get_activation_name(self, activation): 130 | if activation == _sigmoid: 131 | return 'sigmoid' 132 | elif activation == _identity: 133 | return 'identity' 134 | 135 | def __get_loss_function(self, name): 136 | if name == 'mean_squared_error': 137 | return _mean_squared_error 138 | elif name == 'mean_absolute_error': 139 | return _mean_absolute_error 140 | else: 141 | raise ValueError( 142 | 'an unknown loss function \'%s\'.' % name 143 | ) 144 | 145 | def __get_loss_name(self, loss): 146 | if loss == _mean_squared_error: 147 | return 'mean_squared_error' 148 | elif loss == _mean_absolute_error: 149 | return 'mean_absolute_error' 150 | 151 | @property 152 | def weights(self): 153 | return { 154 | 'alpha': self.__alpha, 155 | 'beta': self.__beta, 156 | 'bias': self.__bias, 157 | } 158 | 159 | @property 160 | def input_shape(self): 161 | return (self.__n_input_nodes,) 162 | 163 | @property 164 | def output_shape(self): 165 | return (self.__n_output_nodes,) 166 | 167 | @property 168 | def n_input_nodes(self): 169 | return self.__n_input_nodes 170 | 171 | @property 172 | def n_hidden_nodes(self): 173 | return self.__n_hidden_nodes 174 | 175 | @property 176 | def n_output_nodes(self): 177 | return self.__n_output_nodes 178 | 179 | @property 180 | def activation(self): 181 | return self.__get_activation_name(self.__activation) 182 | 183 | @property 184 | def loss(self): 185 | return self.__get_loss_name(self.__loss) 186 | 187 | def load_model(filepath): 188 | with h5py.File(filepath, 'r') as f: 189 | alpha_init = f['weights/alpha'][...] 190 | beta_init = f['weights/beta'][...] 191 | bias_init = f['weights/bias'][...] 192 | arc = f['architecture'] 193 | n_input_nodes = arc[0] 194 | n_hidden_nodes = arc[1] 195 | n_output_nodes = arc[2] 196 | activation = arc.attrs['activation'].decode('utf-8') 197 | loss = arc.attrs['loss'].decode('utf-8') 198 | name = arc.attrs['name'].decode('utf-8') 199 | model = ELM( 200 | n_input_nodes=n_input_nodes, 201 | n_hidden_nodes=n_hidden_nodes, 202 | n_output_nodes=n_output_nodes, 203 | activation=activation, 204 | loss=loss, 205 | alpha_init=alpha_init, 206 | beta_init=beta_init, 207 | bias_init=bias_init, 208 | name=name, 209 | ) 210 | return model 211 | --------------------------------------------------------------------------------