├── .gitignore
├── LICENSE
├── train.py
├── README.md
└── elm.py
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.h5
3 | .vscode/
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Otenim
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | from keras.datasets import mnist
2 | from keras.utils import to_categorical
3 | from elm import ELM, load_model
4 | import argparse
5 | import os
6 | import numpy as np
7 |
8 | parser = argparse.ArgumentParser()
9 | parser.add_argument('--n_hidden_nodes', type=int, default=1024)
10 | parser.add_argument('--loss',
11 | choices=['mean_squared_error', 'mean_absolute_error'],
12 | default='mean_squared_error',
13 | )
14 | parser.add_argument('--activation',
15 | choices=['sigmoid', 'identity'],
16 | default='sigmoid',
17 | )
18 |
19 | def softmax(x):
20 | c = np.max(x, axis=-1)
21 | upper = np.exp(x - c)
22 | lower = np.sum(upper, axis=-1)
23 | return upper / lower
24 |
25 | def main(args):
26 | # ===============================
27 | # Load dataset
28 | # ===============================
29 | n_classes = 10
30 | (x_train, t_train), (x_test, t_test) = mnist.load_data()
31 |
32 | # ===============================
33 | # Preprocess
34 | # ===============================
35 | x_train = x_train.astype(np.float32) / 255.
36 | x_train = x_train.reshape(-1, 28**2)
37 | x_test = x_test.astype(np.float32) / 255.
38 | x_test = x_test.reshape(-1, 28**2)
39 | t_train = to_categorical(t_train, n_classes).astype(np.float32)
40 | t_test = to_categorical(t_test, n_classes).astype(np.float32)
41 |
42 | # ===============================
43 | # Instantiate ELM
44 | # ===============================
45 | model = ELM(
46 | n_input_nodes=28**2,
47 | n_hidden_nodes=args.n_hidden_nodes,
48 | n_output_nodes=n_classes,
49 | loss=args.loss,
50 | activation=args.activation,
51 | name='elm',
52 | )
53 |
54 | # ===============================
55 | # Training
56 | # ===============================
57 | model.fit(x_train, t_train)
58 | train_loss, train_acc, train_uar = model.evaluate(x_train, t_train, metrics=['loss', 'accuracy', 'uar'])
59 | print('train_loss: %f' % train_loss) # loss value
60 | print('train_acc: %f' % train_acc) # accuracy
61 | print('train_uar: %f' % train_uar) # uar (unweighted average recall)
62 |
63 | # ===============================
64 | # Validation
65 | # ===============================
66 | val_loss, val_acc, val_uar = model.evaluate(x_test, t_test, metrics=['loss', 'accuracy', 'uar'])
67 | print('val_loss: %f' % val_loss)
68 | print('val_acc: %f' % val_acc)
69 | print('val_uar: %f' % val_uar)
70 |
71 | # ===============================
72 | # Prediction
73 | # ===============================
74 | x = x_test[:10]
75 | t = t_test[:10]
76 | y = softmax(model.predict(x))
77 |
78 | for i in range(len(y)):
79 | print('---------- prediction %d ----------' % (i+1))
80 | class_pred = np.argmax(y[i])
81 | prob_pred = y[i][class_pred]
82 | class_true = np.argmax(t[i])
83 | print('prediction:')
84 | print('\tclass: %d, probability: %f' % (class_pred, prob_pred))
85 | print('\tclass (true): %d' % class_true)
86 |
87 | # ===============================
88 | # Save model
89 | # ===============================
90 | print('saving model...')
91 | model.save('model.h5')
92 | del model
93 |
94 | # ===============================
95 | # Load model
96 | # ===============================
97 | print('loading model...')
98 | model = load_model('model.h5')
99 |
100 |
101 | if __name__ == '__main__':
102 | args = parser.parse_args()
103 | main(args)
104 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Numpy-ELM
2 |
3 | ## Overview
4 |
5 |
6 |

7 |
8 |
9 | In this repository, we provide a keras-like numpy implementation of Extreme Learning Machine (ELM)
10 | introduced by Huang et al. in this [paper](http://ieeexplore.ieee.org/document/1380068/?reload=true).
11 |
12 | ELM is a neural-network-based new learning schieme which can learn fast.
13 | The training will always converge to the global optimal solution,
14 | while ordinary backpropagation-based neural networks have to deal with
15 | the local minima problem.
16 |
17 | ## Dependencies
18 |
19 | We tested our codes using the following libraries.
20 |
21 | * Python 3.6.0
22 | * Numpy 1.14.1
23 | * Keras (tensorflow backend) 2.1.5
24 | * Tensorflow 1.7.0
25 |
26 | We used Keras only for downloading the MNIST dataset.
27 |
28 | You don't have to use exactly the same version of the each library,
29 | but we can not guarantee the codes work well in the case.
30 |
31 | All the above libraries can be installed in the following command.
32 |
33 | `$ pip install -U numpy Keras tensorflow`
34 |
35 | ## Usage
36 |
37 | Here, we show how to train an ELM model and predict on it.
38 | For the sake of simplicity, we assume to train the model on MNIST, a
39 | hand-written digits dataset.
40 |
41 | ```python
42 | from keras.datasets import mnist
43 | from keras.utils import to_categorical
44 | from elm import ELM, load_model
45 | import argparse
46 | import os
47 | import numpy as np
48 |
49 | parser = argparse.ArgumentParser()
50 | parser.add_argument('--n_hidden_nodes', type=int, default=1024)
51 | parser.add_argument('--loss',
52 | choices=['mean_squared_error', 'mean_absolute_error'],
53 | default='mean_squared_error',
54 | )
55 | parser.add_argument('--activation',
56 | choices=['sigmoid', 'identity'],
57 | default='sigmoid',
58 | )
59 |
60 | def softmax(x):
61 | c = np.max(x, axis=-1)
62 | upper = np.exp(x - c)
63 | lower = np.sum(upper, axis=-1)
64 | return upper / lower
65 |
66 | def main(args):
67 | # ===============================
68 | # Load dataset
69 | # ===============================
70 | n_classes = 10
71 | (x_train, t_train), (x_test, t_test) = mnist.load_data()
72 |
73 | # ===============================
74 | # Preprocess
75 | # ===============================
76 | x_train = x_train.astype(np.float32) / 255.
77 | x_train = x_train.reshape(-1, 28**2)
78 | x_test = x_test.astype(np.float32) / 255.
79 | x_test = x_test.reshape(-1, 28**2)
80 | t_train = to_categorical(t_train, n_classes).astype(np.float32)
81 | t_test = to_categorical(t_test, n_classes).astype(np.float32)
82 |
83 | # ===============================
84 | # Instantiate ELM
85 | # ===============================
86 | model = ELM(
87 | n_input_nodes=28**2,
88 | n_hidden_nodes=args.n_hidden_nodes,
89 | n_output_nodes=n_classes,
90 | loss=args.loss,
91 | activation=args.activation,
92 | name='elm',
93 | )
94 |
95 | # ===============================
96 | # Training
97 | # ===============================
98 | model.fit(x_train, t_train)
99 | train_loss, train_acc = model.evaluate(x_train, t_train, metrics=['loss', 'accuracy'])
100 | print('train_loss: %f' % train_loss)
101 | print('train_acc: %f' % train_acc)
102 |
103 | # ===============================
104 | # Validation
105 | # ===============================
106 | val_loss, val_acc = model.evaluate(x_test, t_test, metrics=['loss', 'accuracy'])
107 | print('val_loss: %f' % val_loss)
108 | print('val_acc: %f' % val_acc)
109 |
110 | # ===============================
111 | # Prediction
112 | # ===============================
113 | x = x_test[:10]
114 | t = t_test[:10]
115 | y = softmax(model.predict(x))
116 |
117 | for i in range(len(y)):
118 | print('---------- prediction %d ----------' % (i+1))
119 | class_pred = np.argmax(y[i])
120 | prob_pred = y[i][class_pred]
121 | class_true = np.argmax(t[i])
122 | print('prediction:')
123 | print('\tclass: %d, probability: %f' % (class_pred, prob_pred))
124 | print('\tclass (true): %d' % class_true)
125 |
126 | # ===============================
127 | # Save model
128 | # ===============================
129 | print('saving model...')
130 | model.save('model.h5')
131 | del model
132 |
133 | # ===============================
134 | # Load model
135 | # ===============================
136 | print('loading model...')
137 | model = load_model('model.h5')
138 |
139 |
140 | if __name__ == '__main__':
141 | args = parser.parse_args()
142 | main(args)
143 | ```
144 |
145 | ## Notes
146 |
147 | * In ELM, you can apply an activation function only to the hidden nodes.
148 | * ELM always finds the global optimal solution for the weight matrices at every training.
149 | * ELM does not need to train iteratively on the same data samples,
150 | while backpropagation-based models usually need to do that.
151 | * ELM does not update 'alpha', the weight matrix connecting the input nodes
152 | and the hidden nodes. It reduces the computational cost by half.
153 | * ELM does not need to compute gradients. The weight matrices are trained by
154 | computing a pseudoinverse.
155 | * The computational complexity for the matrix inversion is about O(batch\_size^3 \* n\_hidden\_nodes),
156 | so take care for the cost when you increase batch\_size.
157 |
158 | ## Demo
159 |
160 | You can execute the above sample code with the following command.
161 |
162 | `$ python train.py [--n_hidden_nodes] [--activation] [--loss]`
163 |
164 | * `--n_hidden_nodes`: (optional) the number of hidden nodes (default: 1024).
165 | * `--activation`: (optional) an activation function applied to the hidden nodes.
166 | we currently support `sigmoid` and `identity` (default: sigmoid).
167 | * `--loss`: (optional) a loss function applied to the output nodes.
168 | we currently support `mean_squared_error` and 'mean_absolute_error' (default: mean_squared_error).
169 |
170 | ex) `$ python train.py --n_hidden_nodes 2048 --activation sigmoid --loss mean_absolute_error`
171 |
172 | ## Todos
173 |
174 | * support more activation functions
175 | * support more loss functions
176 | * provide benchmark results
177 |
--------------------------------------------------------------------------------
/elm.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import h5py
3 |
4 | def _mean_squared_error(y_true, y_pred):
5 | return 0.5 * np.mean((y_true - y_pred)**2)
6 |
7 | def _mean_absolute_error(y_true, y_pred):
8 | return np.mean(np.abs(y_true - y_pred))
9 |
10 | def _sigmoid(x):
11 | return 1. / (1. + np.exp(-x))
12 |
13 | def _identity(x):
14 | return x
15 |
16 | class ELM(object):
17 | def __init__(
18 | self, n_input_nodes, n_hidden_nodes, n_output_nodes,
19 | activation='sigmoid', loss='mean_squared_error', name=None,
20 | beta_init=None, alpha_init=None, bias_init=None):
21 |
22 | self.name = name
23 | self.__n_input_nodes = n_input_nodes
24 | self.__n_hidden_nodes = n_hidden_nodes
25 | self.__n_output_nodes = n_output_nodes
26 |
27 | # initialize weights and a bias
28 | if isinstance(beta_init, np.ndarray):
29 | if beta_init.shape != (self.__n_hidden_nodes, self.__n_output_nodes):
30 | raise ValueError(
31 | 'the shape of beta_init is expected to be (%d,%d).' % (self.__n_hidden_nodes, self.__n_output_nodes)
32 | )
33 | self.__beta = beta_init
34 | else:
35 | self.__beta = np.random.uniform(-1.,1.,size=(self.__n_hidden_nodes, self.__n_output_nodes))
36 | if isinstance(alpha_init, np.ndarray):
37 | if alpha_init.shape != (self.__n_input_nodes, self.__n_hidden_nodes):
38 | raise ValueError(
39 | 'the shape of alpha_init is expected to be (%d,%d).' % (self.__n_hidden_nodes, self.__n_output_nodes)
40 | )
41 | self.__alpha = alpha_init
42 | else:
43 | self.__alpha = np.random.uniform(-1.,1.,size=(self.__n_input_nodes, self.__n_hidden_nodes))
44 | if isinstance(bias_init, np.ndarray):
45 | if bias_init.shape != (self.__n_hidden_nodes,):
46 | raise ValueError(
47 | 'the shape of bias_init is expected to be (%d,).' % (self.__n_hidden_nodes,)
48 | )
49 | self.__bias = bias_init
50 | else:
51 | self.__bias = np.zeros(shape=(self.__n_hidden_nodes,))
52 |
53 | # set an activation function
54 | self.__activation = self.__get_activation_function(activation)
55 |
56 | # set a loss function
57 | self.__loss = self.__get_loss_function(loss)
58 |
59 | def __call__(self, x):
60 | h = self.__activation(x.dot(self.__alpha) + self.__bias)
61 | return h.dot(self.__beta)
62 |
63 | def predict(self, x):
64 | return list(self(x))
65 |
66 | def evaluate(self, x, t, metrics=['loss']):
67 | y_pred = self.predict(x)
68 | y_true = t
69 | y_pred_argmax = np.argmax(y_pred, axis=-1)
70 | y_true_argmax = np.argmax(y_true, axis=-1)
71 | ret = []
72 | for m in metrics:
73 | if m == 'loss':
74 | loss = self.__loss(y_true, y_pred)
75 | ret.append(loss)
76 | elif m == 'accuracy':
77 | acc = np.sum(y_pred_argmax == y_true_argmax) / len(t)
78 | ret.append(acc)
79 | elif m == 'uar':
80 | num_classes = len(t[0])
81 | uar = []
82 | for i in range(num_classes):
83 | tp = np.sum((y_pred_argmax == i) & (y_true_argmax == i))
84 | tp_fn = np.sum(y_true_argmax == i)
85 | uar.append(tp / tp_fn)
86 | uar = np.mean(uar)
87 | ret.append(uar)
88 | else:
89 | raise ValueError(
90 | 'an unknown evaluation indicator \'%s\'.' % m
91 | )
92 | if len(ret) == 1:
93 | ret = ret[0]
94 | elif len(ret) == 0:
95 | ret = None
96 | return ret
97 |
98 |
99 | def fit(self, x, t):
100 | H = self.__activation(x.dot(self.__alpha) + self.__bias)
101 |
102 | # compute a pseudoinverse of H
103 | H_pinv = np.linalg.pinv(H)
104 |
105 | # update beta
106 | self.__beta = H_pinv.dot(t)
107 |
108 | def save(self, filepath):
109 | with h5py.File(filepath, 'w') as f:
110 | arc = f.create_dataset('architecture', data=np.array([self.__n_input_nodes, self.__n_hidden_nodes, self.__n_output_nodes]))
111 | arc.attrs['activation'] = self.__get_activation_name(self.__activation).encode('utf-8')
112 | arc.attrs['loss'] = self.__get_loss_name(self.__loss).encode('utf-8')
113 | arc.attrs['name'] = self.name.encode('utf-8')
114 | f.create_group('weights')
115 | f.create_dataset('weights/alpha', data=self.__alpha)
116 | f.create_dataset('weights/beta', data=self.__beta)
117 | f.create_dataset('weights/bias', data=self.__bias)
118 |
119 | def __get_activation_function(self, name):
120 | if name == 'sigmoid':
121 | return _sigmoid
122 | elif name == 'identity':
123 | return _identity
124 | else:
125 | raise ValueError(
126 | 'an unknown activation function \'%s\'.' % name
127 | )
128 |
129 | def __get_activation_name(self, activation):
130 | if activation == _sigmoid:
131 | return 'sigmoid'
132 | elif activation == _identity:
133 | return 'identity'
134 |
135 | def __get_loss_function(self, name):
136 | if name == 'mean_squared_error':
137 | return _mean_squared_error
138 | elif name == 'mean_absolute_error':
139 | return _mean_absolute_error
140 | else:
141 | raise ValueError(
142 | 'an unknown loss function \'%s\'.' % name
143 | )
144 |
145 | def __get_loss_name(self, loss):
146 | if loss == _mean_squared_error:
147 | return 'mean_squared_error'
148 | elif loss == _mean_absolute_error:
149 | return 'mean_absolute_error'
150 |
151 | @property
152 | def weights(self):
153 | return {
154 | 'alpha': self.__alpha,
155 | 'beta': self.__beta,
156 | 'bias': self.__bias,
157 | }
158 |
159 | @property
160 | def input_shape(self):
161 | return (self.__n_input_nodes,)
162 |
163 | @property
164 | def output_shape(self):
165 | return (self.__n_output_nodes,)
166 |
167 | @property
168 | def n_input_nodes(self):
169 | return self.__n_input_nodes
170 |
171 | @property
172 | def n_hidden_nodes(self):
173 | return self.__n_hidden_nodes
174 |
175 | @property
176 | def n_output_nodes(self):
177 | return self.__n_output_nodes
178 |
179 | @property
180 | def activation(self):
181 | return self.__get_activation_name(self.__activation)
182 |
183 | @property
184 | def loss(self):
185 | return self.__get_loss_name(self.__loss)
186 |
187 | def load_model(filepath):
188 | with h5py.File(filepath, 'r') as f:
189 | alpha_init = f['weights/alpha'][...]
190 | beta_init = f['weights/beta'][...]
191 | bias_init = f['weights/bias'][...]
192 | arc = f['architecture']
193 | n_input_nodes = arc[0]
194 | n_hidden_nodes = arc[1]
195 | n_output_nodes = arc[2]
196 | activation = arc.attrs['activation'].decode('utf-8')
197 | loss = arc.attrs['loss'].decode('utf-8')
198 | name = arc.attrs['name'].decode('utf-8')
199 | model = ELM(
200 | n_input_nodes=n_input_nodes,
201 | n_hidden_nodes=n_hidden_nodes,
202 | n_output_nodes=n_output_nodes,
203 | activation=activation,
204 | loss=loss,
205 | alpha_init=alpha_init,
206 | beta_init=beta_init,
207 | bias_init=bias_init,
208 | name=name,
209 | )
210 | return model
211 |
--------------------------------------------------------------------------------