├── requirements.txt
├── .gitignore
├── utils.py
├── README.md
├── baseline.py
├── hypermodels.py
└── tuner_comparison.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | black==19.10b0
2 | keras-tuner==1.0.1
3 | loguru==0.4.0
4 | tensorflow==2.5.1
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # virtualenv
 2 | venv/
 3 | 
 4 | # PyCharm utils
 5 | .idea/
 6 | 
 7 | # Polyaxon files
 8 | .polyaxonignore
 9 | .polyaxon/
10 | polyaxon_experiments/
11 | polyaxon_requirements.txt
12 | 
13 | # outputs
14 | output/
15 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from loguru import logger
 3 | from tensorflow_core.python.keras.api._v2.keras.datasets import cifar10
 4 | 
 5 | 
 6 | def set_gpu_config():
 7 |     # Set up GPU config
 8 |     logger.info("Setting up GPU if found")
 9 |     physical_devices = tf.config.experimental.list_physical_devices("GPU")
10 |     if physical_devices:
11 |         for device in physical_devices:
12 |             tf.config.experimental.set_memory_growth(device, True)
13 | 
14 | 
15 | def load_data():
16 |     (x_train, y_train), (x_test, y_test) = cifar10.load_data()
17 |     x_train = x_train.astype("float32") / 255.0
18 |     x_test = x_test.astype("float32") / 255.0
19 |     return x_test, x_train, y_test, y_train
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # keras-tuner-tutorial
 2 | Hands on tutorial for keras-tuner
 3 | 
 4 | This repo aims at introducing hyperparameter tuning through the Keras Tuner library.
 5 | It provides a comparison of its different tuners, applied to computer vision through the CIFAR10 dataset.
 6 | 
 7 | This is work in progress, all feedback is welcomed.
 8 | 
 9 | ### Install the project
10 | - Clone the repo
11 | - Create a virtualenv and activate it:
12 | ```
13 | virtualenv -p python3 venv
14 | source venv/bin/activate
15 | ```
16 | - Install the requirements:
17 | ```
18 | pip install requirements.txt
19 | ```
20 | 
21 | 
22 | ### Results
23 | 
24 | Tasks duration was measured on an RTX 2080 GPU
25 | 
26 | | Tuner                 | Search time   | Best accuracy (%) |
27 | |-----------------------|---------------|-------------------|
28 | | Worst Baseline       | 20min | 63.1             |
29 | | Default Baseline      | 20min | 74.5              |
30 | | Random Search         | 10h 59min  | 76.8              |
31 | | Hyperband             | 10h 0min   | 75.1              |
32 | 
33 | Here, the worst baseline is the worst accuracy obtained by a set of hyperparameters 
34 | during random search.
35 | The default baseline is obtained by setting all hyperparameters to their default value.
36 | 
37 | ### Run the baseline
38 | 
39 | ```
40 | python baseline.py
41 | ```
42 | 
43 | ### Run the comparison
44 | Available tuners :
45 | 
46 | - Random Search
47 | - Hyperband
48 | 
49 | ```
50 | python tuner_comparison.py
51 | ```
52 | 


--------------------------------------------------------------------------------
/baseline.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from loguru import logger
 4 | 
 5 | from tuner_comparison import (
 6 |     INPUT_SHAPE,
 7 |     NUM_CLASSES,
 8 |     N_EPOCH_SEARCH,
 9 | )
10 | from utils import (
11 |     set_gpu_config,
12 |     load_data,
13 | )
14 | 
15 | 
16 | def base_experiment():
17 |     from tensorflow import keras
18 |     from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPooling2D
19 | 
20 |     set_gpu_config()
21 |     x_test, x_train, y_test, y_train = load_data()
22 | 
23 |     model = keras.Sequential()
24 |     model.add(
25 |         Conv2D(filters=16, kernel_size=3, activation="relu", input_shape=INPUT_SHAPE)
26 |     )
27 |     model.add(Conv2D(16, 3, activation="relu"))
28 |     model.add(MaxPooling2D(pool_size=2))
29 |     model.add(Dropout(rate=0.25))
30 |     model.add(Conv2D(32, 3, activation="relu"))
31 |     model.add(Conv2D(64, 3, activation="relu"))
32 |     model.add(MaxPooling2D(pool_size=2))
33 |     model.add(Dropout(rate=0.25))
34 |     model.add(Flatten())
35 |     model.add(Dense(units=128, activation="relu"))
36 |     model.add(Dropout(rate=0.25))
37 |     model.add(Dense(NUM_CLASSES, activation="softmax"))
38 | 
39 |     model.compile(
40 |         optimizer=keras.optimizers.Adam(1e-3),
41 |         loss="sparse_categorical_crossentropy",
42 |         metrics=["accuracy"],
43 |     )
44 | 
45 |     logger.info("Start training")
46 |     search_start = time.time()
47 |     model.fit(x_train, y_train, epochs=N_EPOCH_SEARCH, validation_split=0.1)
48 |     search_end = time.time()
49 |     elapsed_time = search_end - search_start
50 |     logger.info(f"Elapsed time (s): {elapsed_time}")
51 | 
52 |     loss, accuracy = model.evaluate(x_test, y_test)
53 |     logger.info(f"loss: {loss}, accuracy: {accuracy}")
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     base_experiment()
58 | 


--------------------------------------------------------------------------------
/hypermodels.py:
--------------------------------------------------------------------------------
 1 | from kerastuner import HyperModel
 2 | from tensorflow import keras
 3 | from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPooling2D
 4 | 
 5 | 
 6 | class CNNHyperModel(HyperModel):
 7 |     def __init__(self, input_shape, num_classes):
 8 |         self.input_shape = input_shape
 9 |         self.num_classes = num_classes
10 | 
11 |     def build(self, hp):
12 |         model = keras.Sequential()
13 |         model.add(
14 |             Conv2D(
15 |                 filters=16,
16 |                 kernel_size=3,
17 |                 activation="relu",
18 |                 input_shape=self.input_shape,
19 |             )
20 |         )
21 |         model.add(Conv2D(filters=16, activation="relu", kernel_size=3))
22 |         model.add(MaxPooling2D(pool_size=2))
23 |         model.add(
24 |             Dropout(
25 |                 rate=hp.Float(
26 |                     "dropout_1", min_value=0.0, max_value=0.5, default=0.25, step=0.05,
27 |                 )
28 |             )
29 |         )
30 |         model.add(Conv2D(filters=32, kernel_size=3, activation="relu"))
31 |         model.add(
32 |             Conv2D(
33 |                 filters=hp.Choice("num_filters", values=[32, 64], default=64,),
34 |                 activation="relu",
35 |                 kernel_size=3,
36 |             )
37 |         )
38 |         model.add(MaxPooling2D(pool_size=2))
39 |         model.add(
40 |             Dropout(
41 |                 rate=hp.Float(
42 |                     "dropout_2", min_value=0.0, max_value=0.5, default=0.25, step=0.05,
43 |                 )
44 |             )
45 |         )
46 |         model.add(Flatten())
47 |         model.add(
48 |             Dense(
49 |                 units=hp.Int(
50 |                     "units", min_value=32, max_value=512, step=32, default=128
51 |                 ),
52 |                 activation=hp.Choice(
53 |                     "dense_activation",
54 |                     values=["relu", "tanh", "sigmoid"],
55 |                     default="relu",
56 |                 ),
57 |             )
58 |         )
59 |         model.add(
60 |             Dropout(
61 |                 rate=hp.Float(
62 |                     "dropout_3", min_value=0.0, max_value=0.5, default=0.25, step=0.05
63 |                 )
64 |             )
65 |         )
66 |         model.add(Dense(self.num_classes, activation="softmax"))
67 | 
68 |         model.compile(
69 |             optimizer=keras.optimizers.Adam(
70 |                 hp.Float(
71 |                     "learning_rate",
72 |                     min_value=1e-4,
73 |                     max_value=1e-2,
74 |                     sampling="LOG",
75 |                     default=1e-3,
76 |                 )
77 |             ),
78 |             loss="sparse_categorical_crossentropy",
79 |             metrics=["accuracy"],
80 |         )
81 |         return model
82 | 


--------------------------------------------------------------------------------
/tuner_comparison.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | from kerastuner.tuners import (
  4 |     BayesianOptimization,
  5 |     Hyperband,
  6 |     RandomSearch,
  7 | )
  8 | from loguru import logger
  9 | from pathlib import Path
 10 | 
 11 | from hypermodels import CNNHyperModel
 12 | from utils import (
 13 |     set_gpu_config,
 14 |     load_data,
 15 | )
 16 | 
 17 | SEED = 1
 18 | 
 19 | NUM_CLASSES = 10
 20 | INPUT_SHAPE = (32, 32, 3)
 21 | 
 22 | N_EPOCH_SEARCH = 40
 23 | HYPERBAND_MAX_EPOCHS = 40
 24 | MAX_TRIALS = 20
 25 | EXECUTION_PER_TRIAL = 2
 26 | BAYESIAN_NUM_INITIAL_POINTS = 1
 27 | 
 28 | 
 29 | def run_hyperparameter_tuning():
 30 |     x_test, x_train, y_test, y_train = load_data()
 31 | 
 32 |     hypermodel = CNNHyperModel(input_shape=INPUT_SHAPE, num_classes=NUM_CLASSES)
 33 | 
 34 |     output_dir = Path("./output/cifar10/")
 35 |     tuners = define_tuners(
 36 |         hypermodel, directory=output_dir, project_name="simple_cnn_tuning"
 37 |     )
 38 | 
 39 |     results = []
 40 |     for tuner in tuners:
 41 |         elapsed_time, loss, accuracy = tuner_evaluation(
 42 |             tuner, x_test, x_train, y_test, y_train
 43 |         )
 44 |         logger.info(
 45 |             f"Elapsed time = {elapsed_time:10.4f} s, accuracy = {accuracy}, loss = {loss}"
 46 |         )
 47 |         results.append([elapsed_time, loss, accuracy])
 48 |     logger.info(results)
 49 | 
 50 | 
 51 | def tuner_evaluation(tuner, x_test, x_train, y_test, y_train):
 52 |     set_gpu_config()
 53 | 
 54 |     # Overview of the task
 55 |     tuner.search_space_summary()
 56 | 
 57 |     # Performs the hyperparameter tuning
 58 |     logger.info("Start hyperparameter tuning")
 59 |     search_start = time.time()
 60 |     tuner.search(x_train, y_train, epochs=N_EPOCH_SEARCH, validation_split=0.1)
 61 |     search_end = time.time()
 62 |     elapsed_time = search_end - search_start
 63 | 
 64 |     # Show a summary of the search
 65 |     tuner.results_summary()
 66 | 
 67 |     # Retrieve the best model.
 68 |     best_model = tuner.get_best_models(num_models=1)[0]
 69 | 
 70 |     # Evaluate the best model.
 71 |     loss, accuracy = best_model.evaluate(x_test, y_test)
 72 |     return elapsed_time, loss, accuracy
 73 | 
 74 | 
 75 | def define_tuners(hypermodel, directory, project_name):
 76 |     random_tuner = RandomSearch(
 77 |         hypermodel,
 78 |         objective="val_accuracy",
 79 |         seed=SEED,
 80 |         max_trials=MAX_TRIALS,
 81 |         executions_per_trial=EXECUTION_PER_TRIAL,
 82 |         directory=f"{directory}_random_search",
 83 |         project_name=project_name,
 84 |     )
 85 |     hyperband_tuner = Hyperband(
 86 |         hypermodel,
 87 |         max_epochs=HYPERBAND_MAX_EPOCHS,
 88 |         objective="val_accuracy",
 89 |         seed=SEED,
 90 |         executions_per_trial=EXECUTION_PER_TRIAL,
 91 |         directory=f"{directory}_hyperband",
 92 |         project_name=project_name,
 93 |     )
 94 |     bayesian_tuner = BayesianOptimization(
 95 |         hypermodel,
 96 |         objective='val_accuracy',
 97 |         seed=SEED,
 98 |         num_initial_points=BAYESIAN_NUM_INITIAL_POINTS,
 99 |         max_trials=MAX_TRIALS,
100 |         directory=f"{directory}_bayesian",
101 |         project_name=project_name
102 |     )
103 |     return [random_tuner, hyperband_tuner, bayesian_tuner]
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     run_hyperparameter_tuning()
108 | 


--------------------------------------------------------------------------------