├── runtime.txt
├── .gitignore
├── requirements.txt
├── helpers
    ├── draw_prob_dist.py
    ├── restructure_dataset.py
    └── get_prob_dist.py
├── README.md
└── cnn_rnn_classifier.py


/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.5.2
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | __pycache__
3 | venv
4 | tiny-imagenet-100-A
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cycler==0.10.0
 2 | h5py==2.6.0
 3 | Keras==1.1.2
 4 | matplotlib==1.5.3
 5 | numpy==1.11.2
 6 | Pillow==3.4.2
 7 | pkg-resources==0.0.0
 8 | protobuf==3.1.0
 9 | pyparsing==2.1.10
10 | python-dateutil==2.6.0
11 | pytz==2016.10
12 | PyYAML==3.12
13 | scipy==0.18.1
14 | six==1.10.0
15 | tensorflow==0.12.0rc0
16 | Theano==0.8.2
17 | 


--------------------------------------------------------------------------------
/helpers/draw_prob_dist.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | if __name__ == '__main__':
 4 |     labels_list = []
 5 |     top_1_list = []
 6 |     top_5_list = []
 7 |     with open('./cnn_rnn_results.txt', 'r') as f:
 8 |         # Get rid of first line
 9 |         f.readline()
10 | 
11 |         for line in f:
12 |             line_list = line.strip().split()
13 |             label, top_1, top_5 = line_list
14 |             labels_list.append(label)
15 |             top_1_list.append(float(top_1))
16 |             top_5_list.append(float(top_5))
17 | 
18 |     index = [i for i in range(len(labels_list))]
19 | 
20 |     print("Top 1 Average: {}%".format(np.average(top_1_list)))
21 |     print("Top 5 Average: {}%".format(np.average(top_5_list)))
22 | 
23 |     plt.figure(1, figsize=(20, 10))
24 |     plt.bar(index, top_1_list)
25 |     plt.suptitle('Top-1 Accuracy per label')
26 | 
27 |     plt.ylabel('Probability (%)')
28 |     plt.xlabel('Label')
29 | 
30 |     plt.xticks(index, labels_list, rotation='vertical')
31 |     plt.subplots_adjust(bottom=0.15)
32 | 
33 |     plt.figure(2, figsize=(20, 10))
34 |     plt.bar(index, top_5_list)
35 |     plt.suptitle('Top-5 Accuracy per label')
36 | 
37 |     plt.ylabel('Probability (%)')
38 |     plt.xlabel('Label')
39 | 
40 |     plt.xticks(index, labels_list, rotation='vertical')
41 |     plt.subplots_adjust(bottom=0.15)
42 | 
43 |     plt.show()
44 | 
45 | 


--------------------------------------------------------------------------------
/helpers/restructure_dataset.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import os
 3 | import shutil
 4 | 
 5 | dataset_train_path = './tiny-imagenet-100-A/train/'
 6 | folders = [folder for folder in os.listdir(dataset_train_path)]
 7 | 
 8 | for folder in folders:
 9 |     label_dir = os.path.join(dataset_train_path, folder)
10 |     images_dir = os.path.join(label_dir, 'images/')
11 |     images = [image for image in os.listdir(images_dir)]
12 | 
13 |     for img in images:
14 |         img_path = os.path.join(images_dir, img)
15 |         relocated_img = Image.open(img_path)
16 |         relocated_img.save(os.path.join(label_dir, img))
17 | 
18 |     shutil.rmtree(images_dir)
19 |     os.unlink(os.path.join(label_dir, folder + '_boxes.txt'))
20 | 
21 | dataset_val_path = './tiny-imagenet-100-A/val/'
22 | val_images_dir = os.path.join(dataset_val_path, 'images')
23 | 
24 | labels_dict = {}
25 | with open(os.path.join(dataset_val_path, 'val_annotations.txt'), 'r') as f:
26 |     for line in f:
27 |         line_list = line.strip().split()
28 |         label = line_list[1]
29 |         labels_dict[line_list[0]] = label
30 |         try:
31 |             os.mkdir(os.path.join(dataset_val_path, label))
32 |         except OSError:
33 |             pass
34 | 
35 | val_images = [img for img in os.listdir(val_images_dir)]
36 | 
37 | for val_image in val_images:
38 |     img_path = os.path.join(val_images_dir, val_image)
39 |     label = labels_dict[val_image]
40 | 
41 |     relocated_img = Image.open(img_path)
42 |     relocated_img.save(os.path.join(dataset_val_path, label, val_image))
43 | 
44 | shutil.rmtree(val_images_dir)
45 | os.unlink(os.path.join(dataset_val_path, 'val_annotations.txt'))
46 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This is a practical example on how to combine both a CNN and a RNN to classify images.
 2 | 
 3 | _NOTE: This classifier was tested with the tiny-imagenet-100 dataset only._
 4 | 
 5 | ## Network Architecture
 6 | 
 7 | The network consists of two different branches: a CNN branch which uses the Xception model, pretrained
 8 | on imagenet and provided by Keras (https://keras.io/applications/#xception) and another indepented RNN branch.
 9 | 
10 | Each one of these branches runs parallel to each other.
11 | 
12 | Initially, the entire network takes an RGB image whose shape is 299x299x3.
13 | 
14 | On the CNN branch, this image is taken as is (299x299x3) and passed through the pretrained Xception
15 | model until it reaches the final convolution block which has the bottleneck features, which is of size
16 | (batch_size, 2048).
17 | 
18 | On the other branch, the 299x299x3 image is transformed into a grayscale image of size 299x299x1 to
19 | be able to properly split it into chunks to feed it into the RNN. Afterwards, this 299x299 image is
20 | reshaped into (23, 3887), where 23 is the timesteps and 3887 is the dim of each timestep. These values
21 | were chosen because 23*3887 == 299*299. The reshaped image is then passed through two LSTM
22 | layers, each of which are of (batch_size, 2048) output.
23 | 
24 | Next, now that we have (batch_size, 2048) from both the CNN and RNN branches, these two outputs
25 | are merged using element-wise multiplication. The output of this multiplication is then fed to the
26 | classification layer which consists of 100 nodes (100 classes) and a softmax activation.
27 | 
28 | ## Network Training
29 | 
30 | The network was trained in two phases. In the first phase, all the layers of the CNN were frozen and only
31 | the last classification layer and the RNN network were trained. This was done using the RMSProp
32 | optimizer.
33 | 
34 | In the second phase, all the layers of the entire network were unfrozen and finetuned using Adam
35 | optimizer with a learning rate of 0.0001.
36 | 
37 | Using this two phase training technique, the cnn/rnn model combination is able to achieve a Top 5 Accuracy of 96.14% on 
38 | a minified version of the ImageNet dataset that contains only 100 classes (tiny-imagenet-100)
39 | 
40 | ## Dataset Structure
41 | 
42 | Keras’ ImageDataGenerator flow_from_directory method
43 | expects the dataset to be in a certain structure. 
44 | 
45 | The restructure_dataset.py script in the helpers directory can be used
46 | to reorganize the original dataset (given it has the same structure as the tiny-imagenet-100 dataset) into the strucutre Keras
47 | expects.
48 | 
49 | ## Image Preprocessing
50 | 
51 | The Xception model expects images to be processed in a certain way. However, because
52 | Keras’ built in ImageDataGenerator is used, We could not easily preprocess the input while using the
53 | fit_generator() training method.
54 | 
55 | Consequently, in cnn_rnn_classifier.py, a new class was created, CustomImageDataGenerator that inherits
56 | from ImageDataGenerator and has an overloaded standardize() method which is called by
57 | ImageDataGenerator before batch is yielded to fit_generator().
58 | 
59 | The standardize() method of CustomImageDataGenerator applies the Xception model’s required
60 | preprocessing on the input.
61 | 


--------------------------------------------------------------------------------
/helpers/get_prob_dist.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from keras.applications.xception import Xception, decode_predictions, preprocess_input
  4 | from keras.callbacks import ModelCheckpoint
  5 | from keras.engine import Input, merge
  6 | from keras.layers import GlobalAveragePooling2D, Dense, Reshape, Lambda, K, LSTM
  7 | from keras.preprocessing import image
  8 | from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
  9 | from keras.models import Model
 10 | from keras.callbacks import TensorBoard
 11 | from keras.optimizers import Adam
 12 | 
 13 | import time
 14 | import numpy as np
 15 | from keras.utils import np_utils
 16 | 
 17 | np.random.seed(1337)
 18 | 
 19 | 
 20 | def load_val_dataset():
 21 |     validation_data_dir = './tiny-imagenet-100-A/val/'
 22 | 
 23 |     classes = []
 24 |     for subdir in sorted(os.listdir(validation_data_dir)):
 25 |         if os.path.isdir(os.path.join(validation_data_dir, subdir)):
 26 |             classes.append(subdir)
 27 | 
 28 |     class_indices = dict(zip(classes, range(len(classes))))
 29 | 
 30 |     X_val = []
 31 | 
 32 |     # Extracting validation dat
 33 |     i = 0
 34 |     y_val = []
 35 |     for subdir in classes:
 36 |         subpath = os.path.join(validation_data_dir, subdir)
 37 |         for fname in sorted(os.listdir(subpath)):
 38 |             y_val.append(class_indices[subdir])
 39 | 
 40 |             # Load image as numpy array and append it to X_val
 41 |             img = load_img(os.path.join(subpath, fname), target_size=(img_width, img_height))
 42 |             x = img_to_array(img)
 43 |             X_val.append(x)
 44 | 
 45 |             i += 1
 46 | 
 47 |     Y_val = np_utils.to_categorical(y_val)
 48 |     X_val = np.asarray(X_val, dtype='float32')
 49 |     return classes, X_val, Y_val
 50 | 
 51 | 
 52 | def rgb_to_grayscale(input):
 53 |     """Average out each pixel across its 3 RGB layers resulting in a grayscale image"""
 54 |     return K.mean(input, axis=3)
 55 | 
 56 | 
 57 | def rgb_to_grayscale_output_shape(input_shape):
 58 |     return input_shape[:-1]
 59 | 
 60 | 
 61 | nb_val_samples = 5000
 62 | 
 63 | img_width = 299
 64 | img_height = 299
 65 | 
 66 | print("Building model...")
 67 | input_tensor = Input(shape=(img_width, img_height, 3))
 68 | 
 69 | # Creating CNN
 70 | cnn_model = Xception(weights='imagenet', include_top=False, input_tensor=input_tensor)
 71 | 
 72 | x = cnn_model.output
 73 | cnn_bottleneck = GlobalAveragePooling2D()(x)
 74 | 
 75 | # Creating RNN
 76 | x = Lambda(rgb_to_grayscale, rgb_to_grayscale_output_shape)(input_tensor)
 77 | x = Reshape((23, 3887))(x)  # 23 timesteps, input dim of each timestep 3887
 78 | x = LSTM(2048, return_sequences=True)(x)
 79 | rnn_output = LSTM(2048)(x)
 80 | 
 81 | # Merging both cnn bottleneck and rnn's output wise element wise multiplication
 82 | x = merge([cnn_bottleneck, rnn_output], mode='mul')
 83 | predictions = Dense(100, activation='softmax')(x)
 84 | 
 85 | model = Model(input=input_tensor, output=predictions)
 86 | 
 87 | model.load_weights("./finetuned_cnn_rnn_weights_2.hdf5")
 88 | 
 89 | print("Model built")
 90 | 
 91 | model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy',
 92 |               metrics=['accuracy', 'top_k_categorical_accuracy'])
 93 | 
 94 | classes, X_val, Y_val = load_val_dataset()
 95 | 
 96 | n_labels = len(classes)
 97 | 
 98 | n_imgs_by_label = np.zeros(n_labels, dtype=np.dtype(int))
 99 | n_top1_accurate_by_label = np.zeros(n_labels, dtype=np.dtype(int))
100 | n_top5_accurate_by_label = np.zeros(n_labels, dtype=np.dtype(int))
101 | 
102 | # Loop over each validation image and calculate Top-1 and Top-5 Correct Classification Rate
103 | for i, img in enumerate(X_val):
104 |     print(i)
105 |     ground_truth = Y_val[i].argmax()
106 |     n_imgs_by_label[ground_truth] += 1
107 | 
108 |     img = np.expand_dims(img, axis=0)
109 |     img = preprocess_input(img)
110 |     preds = model.predict(img)
111 | 
112 |     top_5_indices = (-preds).argsort()[:, :5]
113 |     top_5_indices = top_5_indices[0]
114 |     if ground_truth == top_5_indices[0]:
115 |         n_top1_accurate_by_label[ground_truth] += 1
116 |     if ground_truth in top_5_indices:
117 |         n_top5_accurate_by_label[ground_truth] += 1
118 | 
119 | # Create a text file that contains the top 1 and top 5 ACCR of each label
120 | results_path = '/home/shady-fanous/cnn_rnn_results.txt'
121 | with open(results_path, 'w+') as f:
122 |     f.write('Label\tTop-1 Accuracy\tTop-5 Accuracy\n')
123 |     for i, label in enumerate(classes):
124 |         label_top1_accuracy = round(100.0 * n_top1_accurate_by_label[i] / n_imgs_by_label[i], 2)
125 |         label_top5_accuracy = round(100.0 * n_top5_accurate_by_label[i] / n_imgs_by_label[i], 2)
126 |         line = '{}\t{}\t{}\n'.format(label, label_top1_accuracy, label_top5_accuracy)
127 |         f.write(line)
128 | 


--------------------------------------------------------------------------------
/cnn_rnn_classifier.py:
--------------------------------------------------------------------------------
  1 | from keras.applications.xception import Xception
  2 | from keras.callbacks import ModelCheckpoint
  3 | from keras.engine import Input, merge
  4 | from keras.layers import GlobalAveragePooling2D, Dense, Reshape, Lambda, K, LSTM
  5 | from keras.preprocessing.image import ImageDataGenerator
  6 | from keras.models import Model
  7 | from keras.callbacks import TensorBoard
  8 | from keras.optimizers import Adam
  9 | 
 10 | import time
 11 | import numpy as np
 12 | 
 13 | np.random.seed(1337)
 14 | 
 15 | 
 16 | class CustomImageDataGenerator(ImageDataGenerator):
 17 |     """
 18 |     Because Xception utilizes a custom preprocessing method, the only way to utilize this
 19 |     preprocessing method using the ImageDataGenerator is to overload the standardize method.
 20 | 
 21 |     The standardize method gets applied to each batch before ImageDataGenerator yields that batch.
 22 |     """
 23 | 
 24 |     def standardize(self, x):
 25 |         """
 26 |         Taken from keras.applications.xception.preprocess_input
 27 |         """
 28 |         if self.featurewise_center:
 29 |             x /= 255.
 30 |             x -= 0.5
 31 |             x *= 2.
 32 |         return x
 33 | 
 34 | 
 35 | def get_training_generator(batch_size=128):
 36 |     train_data_dir = './tiny-imagenet-100-A/train/'
 37 |     validation_data_dir = './tiny-imagenet-100-A/val/'
 38 |     image_datagen = CustomImageDataGenerator(featurewise_center=True)
 39 | 
 40 |     train_generator = image_datagen.flow_from_directory(
 41 |         train_data_dir,
 42 |         target_size=(img_width, img_height),
 43 |         batch_size=batch_size
 44 |     )
 45 | 
 46 |     val_generator = image_datagen.flow_from_directory(
 47 |         validation_data_dir,
 48 |         target_size=(img_width, img_height),
 49 |         batch_size=batch_size,
 50 |         shuffle=False
 51 |     )
 52 | 
 53 |     return train_generator, val_generator
 54 | 
 55 | 
 56 | def rgb_to_grayscale(input):
 57 |     """Average out each pixel across its 3 RGB layers resulting in a grayscale image"""
 58 |     return K.mean(input, axis=3)
 59 | 
 60 | 
 61 | def rgb_to_grayscale_output_shape(input_shape):
 62 |     return input_shape[:-1]
 63 | 
 64 | 
 65 | batch_size_phase_one = 32
 66 | batch_size_phase_two = 16
 67 | nb_val_samples = 5000
 68 | 
 69 | nb_epochs = 30
 70 | 
 71 | img_width = 299
 72 | img_height = 299
 73 | 
 74 | # Setting tensorbord callback
 75 | now = time.strftime("%c")
 76 | tensorboard_callback = TensorBoard(log_dir='./logs/' + 'cnn_rnn ' + now, histogram_freq=0, write_graph=True,
 77 |                                    write_images=False)
 78 | 
 79 | # Loading dataset
 80 | print("Loading the dataset with batch size of {}...".format(batch_size_phase_one))
 81 | train_generator, val_generator = get_training_generator(batch_size_phase_one)
 82 | print("Dataset loaded")
 83 | 
 84 | print("Building model...")
 85 | input_tensor = Input(shape=(img_width, img_height, 3))
 86 | 
 87 | # Creating CNN
 88 | cnn_model = Xception(weights='imagenet', include_top=False, input_tensor=input_tensor)
 89 | 
 90 | x = cnn_model.output
 91 | cnn_bottleneck = GlobalAveragePooling2D()(x)
 92 | 
 93 | # Make CNN layers not trainable
 94 | for layer in cnn_model.layers:
 95 |     layer.trainable = False
 96 | 
 97 | # Creating RNN
 98 | x = Lambda(rgb_to_grayscale, rgb_to_grayscale_output_shape)(input_tensor)
 99 | x = Reshape((23, 3887))(x)  # 23 timesteps, input dim of each timestep 3887
100 | x = LSTM(2048, return_sequences=True)(x)
101 | rnn_output = LSTM(2048)(x)
102 | 
103 | # Merging both cnn bottleneck and rnn's output wise element wise multiplication
104 | x = merge([cnn_bottleneck, rnn_output], mode='mul')
105 | predictions = Dense(100, activation='softmax')(x)
106 | 
107 | model = Model(input=input_tensor, output=predictions)
108 | 
109 | print("Model built")
110 | 
111 | model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
112 | 
113 | print("Starting training")
114 | checkpointer = ModelCheckpoint(filepath="./initial_cnn_rnn_weights_2.hdf5", verbose=1, save_best_only=True)
115 | model.fit_generator(train_generator, samples_per_epoch=4480, nb_epoch=nb_epochs, verbose=1,
116 |                     validation_data=val_generator,
117 |                     nb_val_samples=nb_val_samples,
118 |                     callbacks=[tensorboard_callback, checkpointer])
119 | 
120 | print("Initial training done, starting phase two (finetuning)")
121 | 
122 | # Load two new generator with smaller batch size, needed because using the same batch size
123 | # for the fine tuning will result in GPU running out of memory and tensorflow raising an error
124 | print("Loading the dataset with batch size of {}...".format(batch_size_phase_two))
125 | train_generator, val_generator = get_training_generator(batch_size_phase_two)
126 | print("Dataset loaded")
127 | 
128 | # Load best weights from initial training
129 | model.load_weights("./initial_cnn_rnn_weights_2.hdf5")
130 | 
131 | # Make all layers trainable for finetuning
132 | for layer in model.layers:
133 |     layer.trainable = True
134 | 
135 | model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy',
136 |               metrics=['accuracy', 'top_k_categorical_accuracy'])
137 | 
138 | checkpointer = ModelCheckpoint(filepath="./finetuned_cnn_rnn_weights_2.hdf5", verbose=1, save_best_only=True,
139 |                                monitor='val_acc')
140 | model.fit_generator(train_generator, samples_per_epoch=2240, nb_epoch=nb_epochs, verbose=1,
141 |                     validation_data=val_generator,
142 |                     nb_val_samples=nb_val_samples,
143 |                     callbacks=[tensorboard_callback, checkpointer])
144 | 
145 | # Final evaluation of the model
146 | print("Training done, doing final evaluation...")
147 | 
148 | model.load_weights("./finetuned_cnn_rnn_weights_2.hdf5")
149 | 
150 | model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy',
151 |               metrics=['accuracy', 'top_k_categorical_accuracy'])
152 | 
153 | scores = model.evaluate_generator(val_generator, val_samples=nb_val_samples)
154 | print(model.metrics_names, scores)
155 | print("%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
156 | 


--------------------------------------------------------------------------------