├── model.png ├── LICENSE ├── README.md ├── cifar100.py └── cifar10.py /model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/johnolafenwa/FastNet/HEAD/model.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 John Olafenwa 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FastNet 2 | Official Repository for FastNet, An Efficient Convolutional Neural Network Architecture, highly optimized for Edge Devices and Mobile Applications. 3 | 4 | Read The Paper for more details 5 | 6 | In light of the great need for intelligence at the edge of smart devices including SmartPhones, IoT devices, Smart Cameras and low cost drones, we have developed a new architecture that archieves high accuracy on standard datasets while being incredibly fast on both GPUs and CPUs. 7 | Recent Architectures have explored absolute depth, very great width and layer parallelization. We explictly avoid using any of these as they lead to models that can only be used on the cloud and are too slow and too large to be deployed on Smart Devices. We instead, make use of medium depth and medium width throughout the network and we greatly optimized the parameters of the network to archieve highly competetive accuracies at very low computational cost. Our architecture is also very simple and can be replicated by any ML engineer using any Deep Learning library. 8 | 9 |
10 | 11 |
12 | 13 |

ACCURACIES

14 | 15 |

Number of Parameters: 1.6 Million

16 |

CIFAR 10 Accuracy: 93.98 %

17 |

CIFAR 100 Accuracy: 70.81 %

18 | 19 | Our architecture completes 200 training epochs in just three hours, despite we were using real-time data augmentation and saving the models at every epoch while we conducted our experiments. Our experiments were conducted using keras with Tensorflow backend, running on a NVIDIA P100. 20 | 21 | The scripts provided in this repository would complete 200 epochs is less than 3 hours, if run using the same environment. 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /cifar100.py: -------------------------------------------------------------------------------- 1 | import keras 2 | from keras.optimizers import Adam 3 | from keras.callbacks import ModelCheckpoint, LearningRateScheduler 4 | from keras.callbacks import ReduceLROnPlateau 5 | from keras.preprocessing.image import ImageDataGenerator 6 | import numpy as np 7 | from keras.datasets import cifar100 8 | import os 9 | 10 | from keras.layers import BatchNormalization,Conv2D,Activation,MaxPooling2D,Dense 11 | from keras.layers import AveragePooling2D, Input, Flatten 12 | from keras.models import Model 13 | from math import ceil 14 | 15 | 16 | #A single Unit 17 | def UnitCell(x, channels, kernel_size=[3, 3], strides=(1, 1)): 18 | y = BatchNormalization(scale=True, momentum=0.95)(x) 19 | y = Activation("relu")(y) 20 | y = Conv2D(channels, kernel_initializer='he_normal', kernel_size=kernel_size, strides=(strides), padding="same")(y) 21 | 22 | return y 23 | 24 | #The network 25 | def FastNet(input_shape, num_classes=100): 26 | inputs = Input(input_shape) 27 | 28 | y = UnitCell(inputs, 64) 29 | y = UnitCell(y, 128) 30 | y = UnitCell(y, 128) 31 | y = UnitCell(y, 128) 32 | y = MaxPooling2D(pool_size=(2, 2), strides=[2, 2])(y) 33 | 34 | y = UnitCell(y, 128) 35 | y = UnitCell(y, 128) 36 | 37 | y = UnitCell(y, 128) 38 | y = MaxPooling2D(pool_size=(2, 2), strides=[2, 2])(y) 39 | 40 | y = UnitCell(y, 128) 41 | y = UnitCell(y, 128) 42 | 43 | y = UnitCell(y, 128) 44 | y = MaxPooling2D(pool_size=(2, 2), strides=[2, 2])(y) 45 | 46 | y = UnitCell(y, 128) 47 | y = UnitCell(y, 128) 48 | y = MaxPooling2D(pool_size=(2, 2), strides=[2, 2])(y) 49 | 50 | y = UnitCell(y, 128, kernel_size=[1, 1]) 51 | y = UnitCell(y, 128, kernel_size=[1, 1]) 52 | y = UnitCell(y, 128, kernel_size=[1, 1]) 53 | y = AveragePooling2D(pool_size=(2, 2))(y) 54 | y = Flatten()(y) 55 | outputs = Dense(num_classes, 56 | activation='softmax')(y) 57 | 58 | model = Model(inputs=inputs, outputs=outputs, name="FastNet") 59 | return model 60 | 61 | 62 | 63 | #Load the CIFAR10 data. 64 | (x_train, y_train), (x_test, y_test) = cifar100.load_data() 65 | 66 | # Input image dimensions 67 | input_shape = x_train.shape[1:] 68 | 69 | num_classes = 100 70 | batch_size = 128 71 | 72 | # Normalize data. 73 | x_train = x_train.astype('float32') / 255 74 | x_test = x_test.astype('float32') / 255 75 | 76 | x_train = x_train.astype('float32') 77 | x_train = (x_train - x_train.mean(axis=0)) / (x_train.std(axis=0)) 78 | x_test = x_test.astype('float32') 79 | x_test = (x_test - x_test.mean(axis=0)) / (x_test.std(axis=0)) 80 | 81 | num_train_samples = x_train.shape[0] 82 | print('x_train shape:', x_train.shape) 83 | print(x_train.shape[0], 'train samples') 84 | print(x_test.shape[0], 'test samples') 85 | print('y_train shape:', y_train.shape) 86 | 87 | # Convert class vectors to binary class matrices. 88 | y_train = keras.utils.to_categorical(y_train, num_classes) 89 | y_test = keras.utils.to_categorical(y_test, num_classes) 90 | model = FastNet(input_shape, num_classes=num_classes) 91 | 92 | save_direc = os.path.join(os.getcwd(), 'cifar100_saved_modelsbest') 93 | 94 | model_name = 'cifar100_model.{epoch:03d}.h5' 95 | if not os.path.isdir(save_direc): 96 | os.makedirs(save_direc) 97 | filepath = os.path.join(save_direc, model_name) 98 | 99 | # Prepare callbacks for model saving and for learning rate adjustment. 100 | checkpoint = ModelCheckpoint(filepath=filepath, 101 | monitor='val_acc', 102 | verbose=1, 103 | save_best_only=True) 104 | 105 | 106 | def lr_schedule(epoch): 107 | """Learning Rate Schedule 108 | Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. 109 | Called automatically every epoch as part of callbacks during training. 110 | # Arguments 111 | epoch (int): The number of epochs 112 | # Returns 113 | lr (float32): learning rate 114 | """ 115 | 116 | lr = 1e-3 117 | if epoch > 180: 118 | lr *= 0.5e-3 119 | elif epoch > 160: 120 | lr *= 1e-3 121 | elif epoch > 120: 122 | lr *= 1e-2 123 | elif epoch > 80: 124 | lr *= 1e-1 125 | 126 | print('Learning rate: ', lr) 127 | 128 | return lr 129 | 130 | 131 | model.compile(loss='categorical_crossentropy', 132 | optimizer=Adam(lr=lr_schedule(0)), 133 | metrics=['accuracy']) 134 | model.summary() 135 | 136 | lr_scheduler = LearningRateScheduler(lr_schedule) 137 | 138 | lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), 139 | cooldown=0, 140 | patience=5, 141 | min_lr=0.5e-6) 142 | 143 | callbacks = [checkpoint, lr_reducer, lr_scheduler] 144 | 145 | # preprocessing and realtime data augmentation: 146 | datagen = ImageDataGenerator(rotation_range=10, 147 | width_shift_range=5. / 32, 148 | height_shift_range=5. / 32, 149 | horizontal_flip=True) 150 | 151 | # Compute quantities required for featurewise normalization 152 | # (std, mean, and principal components if ZCA whitening is applied). 153 | datagen.fit(x_train) 154 | 155 | epochs = 200 156 | steps_per_epoch = ceil(num_train_samples/batch_size) 157 | # Fit the model on the batches generated by datagen.flow(). 158 | model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), 159 | validation_data=(x_test, y_test), 160 | epochs=epochs,steps_per_epoch=steps_per_epoch, verbose=1, workers=4, 161 | callbacks=callbacks) 162 | 163 | # Score trained model. 164 | scores = model.evaluate(x_test, y_test, verbose=1) 165 | print('Test loss:', scores[0]) 166 | print('Test accuracy:', scores[1]) 167 | -------------------------------------------------------------------------------- /cifar10.py: -------------------------------------------------------------------------------- 1 | from keras.optimizers import Adam 2 | from keras.callbacks import ModelCheckpoint, LearningRateScheduler 3 | from keras.callbacks import ReduceLROnPlateau 4 | from keras.preprocessing.image import ImageDataGenerator 5 | import numpy as np 6 | import keras.applications.resnet50 7 | from keras.datasets import cifar10,mnist 8 | import os 9 | import keras.backend as K 10 | 11 | from keras.layers import BatchNormalization,Conv2D,Activation,MaxPooling2D,Dense 12 | from keras.layers import AveragePooling2D, Input, Flatten 13 | from keras.models import Model 14 | from math import ceil 15 | 16 | 17 | #A single Unit 18 | def UnitCell(x, channels, kernel_size=[3, 3], strides=(1, 1)): 19 | y = BatchNormalization(scale=True, momentum=0.95)(x) 20 | y = Activation("relu")(y) 21 | y = Conv2D(channels, kernel_initializer='he_normal', kernel_size=kernel_size, strides=(strides), padding="same")(y) 22 | 23 | return y 24 | 25 | #The network 26 | def FastNet(input_shape, num_classes=10): 27 | inputs = Input(input_shape) 28 | 29 | y = UnitCell(inputs, 64) 30 | y = UnitCell(y, 128) 31 | y = UnitCell(y, 128) 32 | y = UnitCell(y, 128) 33 | y = MaxPooling2D(pool_size=(2, 2), strides=[2, 2])(y) 34 | 35 | y = UnitCell(y, 128) 36 | y = UnitCell(y, 128) 37 | 38 | y = UnitCell(y, 128) 39 | y = MaxPooling2D(pool_size=(2, 2), strides=[2, 2])(y) 40 | 41 | y = UnitCell(y, 128) 42 | y = UnitCell(y, 128) 43 | 44 | y = UnitCell(y, 128) 45 | y = MaxPooling2D(pool_size=(2, 2), strides=[2, 2])(y) 46 | 47 | y = UnitCell(y, 128) 48 | y = UnitCell(y, 128) 49 | y = MaxPooling2D(pool_size=(2, 2), strides=[2, 2])(y) 50 | 51 | y = UnitCell(y, 128, kernel_size=[1, 1]) 52 | y = UnitCell(y, 128, kernel_size=[1, 1]) 53 | y = UnitCell(y, 128, kernel_size=[1, 1]) 54 | y = AveragePooling2D(pool_size=(2, 2))(y) 55 | y = Flatten()(y) 56 | outputs = Dense(num_classes, 57 | activation='softmax')(y) 58 | 59 | model = Model(inputs=inputs, outputs=outputs, name="FastNet") 60 | return model 61 | 62 | 63 | 64 | #Load the CIFAR10 data. 65 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 66 | 67 | # Input image dimensions 68 | input_shape = x_train.shape[1:] 69 | 70 | num_classes = 10 71 | batch_size = 128 72 | 73 | # Normalize data. 74 | x_train = x_train.astype('float32') / 255 75 | x_test = x_test.astype('float32') / 255 76 | 77 | x_train = x_train.astype('float32') 78 | x_train = (x_train - x_train.mean(axis=0)) / (x_train.std(axis=0)) 79 | x_test = x_test.astype('float32') 80 | x_test = (x_test - x_test.mean(axis=0)) / (x_test.std(axis=0)) 81 | 82 | num_train_samples = x_train.shape[0] 83 | print('x_train shape:', x_train.shape) 84 | print(x_train.shape[0], 'train samples') 85 | print(x_test.shape[0], 'test samples') 86 | print('y_train shape:', y_train.shape) 87 | 88 | # Convert class vectors to binary class matrices. 89 | y_train = keras.utils.to_categorical(y_train, num_classes) 90 | y_test = keras.utils.to_categorical(y_test, num_classes) 91 | 92 | model = FastNet(input_shape, num_classes=10) 93 | 94 | save_direc = os.path.join(os.getcwd(), 'cifar10_saved_modelsbest') 95 | 96 | model_name = 'cifar10_model.{epoch:03d}.h5' 97 | if not os.path.isdir(save_direc): 98 | os.makedirs(save_direc) 99 | filepath = os.path.join(save_direc, model_name) 100 | 101 | # Prepare callbacks for model saving and for learning rate adjustment. 102 | checkpoint = ModelCheckpoint(filepath=filepath, 103 | monitor='val_acc', 104 | verbose=1, 105 | save_best_only=True) 106 | 107 | 108 | def lr_schedule(epoch): 109 | """Learning Rate Schedule 110 | Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. 111 | Called automatically every epoch as part of callbacks during training. 112 | # Arguments 113 | epoch (int): The number of epochs 114 | # Returns 115 | lr (float32): learning rate 116 | """ 117 | 118 | lr = 1e-3 119 | if epoch > 180: 120 | lr *= 0.5e-3 121 | elif epoch > 160: 122 | lr *= 1e-3 123 | elif epoch > 120: 124 | lr *= 1e-2 125 | elif epoch > 80: 126 | lr *= 1e-1 127 | 128 | print('Learning rate: ', lr) 129 | 130 | return lr 131 | 132 | 133 | model.compile(loss='categorical_crossentropy', 134 | optimizer=Adam(lr=lr_schedule(0)), 135 | metrics=['accuracy']) 136 | model.summary() 137 | 138 | lr_scheduler = LearningRateScheduler(lr_schedule) 139 | 140 | lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), 141 | cooldown=0, 142 | patience=5, 143 | min_lr=0.5e-6) 144 | 145 | callbacks = [checkpoint, lr_reducer, lr_scheduler] 146 | 147 | # preprocessing and realtime data augmentation: 148 | datagen = ImageDataGenerator(rotation_range=10, 149 | width_shift_range=5. / 32, 150 | height_shift_range=5. / 32, 151 | horizontal_flip=True) 152 | 153 | # Compute quantities required for featurewise normalization 154 | # (std, mean, and principal components if ZCA whitening is applied). 155 | datagen.fit(x_train) 156 | 157 | epochs = 200 158 | steps_per_epoch = ceil(num_train_samples/batch_size) 159 | # Fit the model on the batches generated by datagen.flow(). 160 | model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), 161 | validation_data=(x_test, y_test), 162 | epochs=epochs,steps_per_epoch=steps_per_epoch, verbose=1, workers=4, 163 | callbacks=callbacks) 164 | 165 | # Score trained model. 166 | scores = model.evaluate(x_test, y_test, verbose=1) 167 | print('Test loss:', scores[0]) 168 | print('Test accuracy:', scores[1]) 169 | --------------------------------------------------------------------------------