├── model.png
├── LICENSE
├── README.md
├── cifar100.py
└── cifar10.py
/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnolafenwa/FastNet/HEAD/model.png
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 John Olafenwa
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # FastNet
2 | Official Repository for FastNet, An Efficient Convolutional Neural Network Architecture, highly optimized for Edge Devices and Mobile Applications.
3 |
4 | Read The Paper for more details
5 |
6 | In light of the great need for intelligence at the edge of smart devices including SmartPhones, IoT devices, Smart Cameras and low cost drones, we have developed a new architecture that archieves high accuracy on standard datasets while being incredibly fast on both GPUs and CPUs.
7 | Recent Architectures have explored absolute depth, very great width and layer parallelization. We explictly avoid using any of these as they lead to models that can only be used on the cloud and are too slow and too large to be deployed on Smart Devices. We instead, make use of medium depth and medium width throughout the network and we greatly optimized the parameters of the network to archieve highly competetive accuracies at very low computational cost. Our architecture is also very simple and can be replicated by any ML engineer using any Deep Learning library.
8 |
9 |
10 |
11 |
12 |
13 | ACCURACIES
14 |
15 | Number of Parameters: 1.6 Million
16 | CIFAR 10 Accuracy: 93.98 %
17 | CIFAR 100 Accuracy: 70.81 %
18 |
19 | Our architecture completes 200 training epochs in just three hours, despite we were using real-time data augmentation and saving the models at every epoch while we conducted our experiments. Our experiments were conducted using keras with Tensorflow backend, running on a NVIDIA P100.
20 |
21 | The scripts provided in this repository would complete 200 epochs is less than 3 hours, if run using the same environment.
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/cifar100.py:
--------------------------------------------------------------------------------
1 | import keras
2 | from keras.optimizers import Adam
3 | from keras.callbacks import ModelCheckpoint, LearningRateScheduler
4 | from keras.callbacks import ReduceLROnPlateau
5 | from keras.preprocessing.image import ImageDataGenerator
6 | import numpy as np
7 | from keras.datasets import cifar100
8 | import os
9 |
10 | from keras.layers import BatchNormalization,Conv2D,Activation,MaxPooling2D,Dense
11 | from keras.layers import AveragePooling2D, Input, Flatten
12 | from keras.models import Model
13 | from math import ceil
14 |
15 |
16 | #A single Unit
17 | def UnitCell(x, channels, kernel_size=[3, 3], strides=(1, 1)):
18 | y = BatchNormalization(scale=True, momentum=0.95)(x)
19 | y = Activation("relu")(y)
20 | y = Conv2D(channels, kernel_initializer='he_normal', kernel_size=kernel_size, strides=(strides), padding="same")(y)
21 |
22 | return y
23 |
24 | #The network
25 | def FastNet(input_shape, num_classes=100):
26 | inputs = Input(input_shape)
27 |
28 | y = UnitCell(inputs, 64)
29 | y = UnitCell(y, 128)
30 | y = UnitCell(y, 128)
31 | y = UnitCell(y, 128)
32 | y = MaxPooling2D(pool_size=(2, 2), strides=[2, 2])(y)
33 |
34 | y = UnitCell(y, 128)
35 | y = UnitCell(y, 128)
36 |
37 | y = UnitCell(y, 128)
38 | y = MaxPooling2D(pool_size=(2, 2), strides=[2, 2])(y)
39 |
40 | y = UnitCell(y, 128)
41 | y = UnitCell(y, 128)
42 |
43 | y = UnitCell(y, 128)
44 | y = MaxPooling2D(pool_size=(2, 2), strides=[2, 2])(y)
45 |
46 | y = UnitCell(y, 128)
47 | y = UnitCell(y, 128)
48 | y = MaxPooling2D(pool_size=(2, 2), strides=[2, 2])(y)
49 |
50 | y = UnitCell(y, 128, kernel_size=[1, 1])
51 | y = UnitCell(y, 128, kernel_size=[1, 1])
52 | y = UnitCell(y, 128, kernel_size=[1, 1])
53 | y = AveragePooling2D(pool_size=(2, 2))(y)
54 | y = Flatten()(y)
55 | outputs = Dense(num_classes,
56 | activation='softmax')(y)
57 |
58 | model = Model(inputs=inputs, outputs=outputs, name="FastNet")
59 | return model
60 |
61 |
62 |
63 | #Load the CIFAR10 data.
64 | (x_train, y_train), (x_test, y_test) = cifar100.load_data()
65 |
66 | # Input image dimensions
67 | input_shape = x_train.shape[1:]
68 |
69 | num_classes = 100
70 | batch_size = 128
71 |
72 | # Normalize data.
73 | x_train = x_train.astype('float32') / 255
74 | x_test = x_test.astype('float32') / 255
75 |
76 | x_train = x_train.astype('float32')
77 | x_train = (x_train - x_train.mean(axis=0)) / (x_train.std(axis=0))
78 | x_test = x_test.astype('float32')
79 | x_test = (x_test - x_test.mean(axis=0)) / (x_test.std(axis=0))
80 |
81 | num_train_samples = x_train.shape[0]
82 | print('x_train shape:', x_train.shape)
83 | print(x_train.shape[0], 'train samples')
84 | print(x_test.shape[0], 'test samples')
85 | print('y_train shape:', y_train.shape)
86 |
87 | # Convert class vectors to binary class matrices.
88 | y_train = keras.utils.to_categorical(y_train, num_classes)
89 | y_test = keras.utils.to_categorical(y_test, num_classes)
90 | model = FastNet(input_shape, num_classes=num_classes)
91 |
92 | save_direc = os.path.join(os.getcwd(), 'cifar100_saved_modelsbest')
93 |
94 | model_name = 'cifar100_model.{epoch:03d}.h5'
95 | if not os.path.isdir(save_direc):
96 | os.makedirs(save_direc)
97 | filepath = os.path.join(save_direc, model_name)
98 |
99 | # Prepare callbacks for model saving and for learning rate adjustment.
100 | checkpoint = ModelCheckpoint(filepath=filepath,
101 | monitor='val_acc',
102 | verbose=1,
103 | save_best_only=True)
104 |
105 |
106 | def lr_schedule(epoch):
107 | """Learning Rate Schedule
108 | Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
109 | Called automatically every epoch as part of callbacks during training.
110 | # Arguments
111 | epoch (int): The number of epochs
112 | # Returns
113 | lr (float32): learning rate
114 | """
115 |
116 | lr = 1e-3
117 | if epoch > 180:
118 | lr *= 0.5e-3
119 | elif epoch > 160:
120 | lr *= 1e-3
121 | elif epoch > 120:
122 | lr *= 1e-2
123 | elif epoch > 80:
124 | lr *= 1e-1
125 |
126 | print('Learning rate: ', lr)
127 |
128 | return lr
129 |
130 |
131 | model.compile(loss='categorical_crossentropy',
132 | optimizer=Adam(lr=lr_schedule(0)),
133 | metrics=['accuracy'])
134 | model.summary()
135 |
136 | lr_scheduler = LearningRateScheduler(lr_schedule)
137 |
138 | lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
139 | cooldown=0,
140 | patience=5,
141 | min_lr=0.5e-6)
142 |
143 | callbacks = [checkpoint, lr_reducer, lr_scheduler]
144 |
145 | # preprocessing and realtime data augmentation:
146 | datagen = ImageDataGenerator(rotation_range=10,
147 | width_shift_range=5. / 32,
148 | height_shift_range=5. / 32,
149 | horizontal_flip=True)
150 |
151 | # Compute quantities required for featurewise normalization
152 | # (std, mean, and principal components if ZCA whitening is applied).
153 | datagen.fit(x_train)
154 |
155 | epochs = 200
156 | steps_per_epoch = ceil(num_train_samples/batch_size)
157 | # Fit the model on the batches generated by datagen.flow().
158 | model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
159 | validation_data=(x_test, y_test),
160 | epochs=epochs,steps_per_epoch=steps_per_epoch, verbose=1, workers=4,
161 | callbacks=callbacks)
162 |
163 | # Score trained model.
164 | scores = model.evaluate(x_test, y_test, verbose=1)
165 | print('Test loss:', scores[0])
166 | print('Test accuracy:', scores[1])
167 |
--------------------------------------------------------------------------------
/cifar10.py:
--------------------------------------------------------------------------------
1 | from keras.optimizers import Adam
2 | from keras.callbacks import ModelCheckpoint, LearningRateScheduler
3 | from keras.callbacks import ReduceLROnPlateau
4 | from keras.preprocessing.image import ImageDataGenerator
5 | import numpy as np
6 | import keras.applications.resnet50
7 | from keras.datasets import cifar10,mnist
8 | import os
9 | import keras.backend as K
10 |
11 | from keras.layers import BatchNormalization,Conv2D,Activation,MaxPooling2D,Dense
12 | from keras.layers import AveragePooling2D, Input, Flatten
13 | from keras.models import Model
14 | from math import ceil
15 |
16 |
17 | #A single Unit
18 | def UnitCell(x, channels, kernel_size=[3, 3], strides=(1, 1)):
19 | y = BatchNormalization(scale=True, momentum=0.95)(x)
20 | y = Activation("relu")(y)
21 | y = Conv2D(channels, kernel_initializer='he_normal', kernel_size=kernel_size, strides=(strides), padding="same")(y)
22 |
23 | return y
24 |
25 | #The network
26 | def FastNet(input_shape, num_classes=10):
27 | inputs = Input(input_shape)
28 |
29 | y = UnitCell(inputs, 64)
30 | y = UnitCell(y, 128)
31 | y = UnitCell(y, 128)
32 | y = UnitCell(y, 128)
33 | y = MaxPooling2D(pool_size=(2, 2), strides=[2, 2])(y)
34 |
35 | y = UnitCell(y, 128)
36 | y = UnitCell(y, 128)
37 |
38 | y = UnitCell(y, 128)
39 | y = MaxPooling2D(pool_size=(2, 2), strides=[2, 2])(y)
40 |
41 | y = UnitCell(y, 128)
42 | y = UnitCell(y, 128)
43 |
44 | y = UnitCell(y, 128)
45 | y = MaxPooling2D(pool_size=(2, 2), strides=[2, 2])(y)
46 |
47 | y = UnitCell(y, 128)
48 | y = UnitCell(y, 128)
49 | y = MaxPooling2D(pool_size=(2, 2), strides=[2, 2])(y)
50 |
51 | y = UnitCell(y, 128, kernel_size=[1, 1])
52 | y = UnitCell(y, 128, kernel_size=[1, 1])
53 | y = UnitCell(y, 128, kernel_size=[1, 1])
54 | y = AveragePooling2D(pool_size=(2, 2))(y)
55 | y = Flatten()(y)
56 | outputs = Dense(num_classes,
57 | activation='softmax')(y)
58 |
59 | model = Model(inputs=inputs, outputs=outputs, name="FastNet")
60 | return model
61 |
62 |
63 |
64 | #Load the CIFAR10 data.
65 | (x_train, y_train), (x_test, y_test) = cifar10.load_data()
66 |
67 | # Input image dimensions
68 | input_shape = x_train.shape[1:]
69 |
70 | num_classes = 10
71 | batch_size = 128
72 |
73 | # Normalize data.
74 | x_train = x_train.astype('float32') / 255
75 | x_test = x_test.astype('float32') / 255
76 |
77 | x_train = x_train.astype('float32')
78 | x_train = (x_train - x_train.mean(axis=0)) / (x_train.std(axis=0))
79 | x_test = x_test.astype('float32')
80 | x_test = (x_test - x_test.mean(axis=0)) / (x_test.std(axis=0))
81 |
82 | num_train_samples = x_train.shape[0]
83 | print('x_train shape:', x_train.shape)
84 | print(x_train.shape[0], 'train samples')
85 | print(x_test.shape[0], 'test samples')
86 | print('y_train shape:', y_train.shape)
87 |
88 | # Convert class vectors to binary class matrices.
89 | y_train = keras.utils.to_categorical(y_train, num_classes)
90 | y_test = keras.utils.to_categorical(y_test, num_classes)
91 |
92 | model = FastNet(input_shape, num_classes=10)
93 |
94 | save_direc = os.path.join(os.getcwd(), 'cifar10_saved_modelsbest')
95 |
96 | model_name = 'cifar10_model.{epoch:03d}.h5'
97 | if not os.path.isdir(save_direc):
98 | os.makedirs(save_direc)
99 | filepath = os.path.join(save_direc, model_name)
100 |
101 | # Prepare callbacks for model saving and for learning rate adjustment.
102 | checkpoint = ModelCheckpoint(filepath=filepath,
103 | monitor='val_acc',
104 | verbose=1,
105 | save_best_only=True)
106 |
107 |
108 | def lr_schedule(epoch):
109 | """Learning Rate Schedule
110 | Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
111 | Called automatically every epoch as part of callbacks during training.
112 | # Arguments
113 | epoch (int): The number of epochs
114 | # Returns
115 | lr (float32): learning rate
116 | """
117 |
118 | lr = 1e-3
119 | if epoch > 180:
120 | lr *= 0.5e-3
121 | elif epoch > 160:
122 | lr *= 1e-3
123 | elif epoch > 120:
124 | lr *= 1e-2
125 | elif epoch > 80:
126 | lr *= 1e-1
127 |
128 | print('Learning rate: ', lr)
129 |
130 | return lr
131 |
132 |
133 | model.compile(loss='categorical_crossentropy',
134 | optimizer=Adam(lr=lr_schedule(0)),
135 | metrics=['accuracy'])
136 | model.summary()
137 |
138 | lr_scheduler = LearningRateScheduler(lr_schedule)
139 |
140 | lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
141 | cooldown=0,
142 | patience=5,
143 | min_lr=0.5e-6)
144 |
145 | callbacks = [checkpoint, lr_reducer, lr_scheduler]
146 |
147 | # preprocessing and realtime data augmentation:
148 | datagen = ImageDataGenerator(rotation_range=10,
149 | width_shift_range=5. / 32,
150 | height_shift_range=5. / 32,
151 | horizontal_flip=True)
152 |
153 | # Compute quantities required for featurewise normalization
154 | # (std, mean, and principal components if ZCA whitening is applied).
155 | datagen.fit(x_train)
156 |
157 | epochs = 200
158 | steps_per_epoch = ceil(num_train_samples/batch_size)
159 | # Fit the model on the batches generated by datagen.flow().
160 | model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
161 | validation_data=(x_test, y_test),
162 | epochs=epochs,steps_per_epoch=steps_per_epoch, verbose=1, workers=4,
163 | callbacks=callbacks)
164 |
165 | # Score trained model.
166 | scores = model.evaluate(x_test, y_test, verbose=1)
167 | print('Test loss:', scores[0])
168 | print('Test accuracy:', scores[1])
169 |
--------------------------------------------------------------------------------