├── requirements.txt ├── .gitignore ├── utils.py ├── README.md ├── baseline.py ├── hypermodels.py └── tuner_comparison.py /requirements.txt: -------------------------------------------------------------------------------- 1 | black==19.10b0 2 | keras-tuner==1.0.1 3 | loguru==0.4.0 4 | tensorflow==2.5.1 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # virtualenv 2 | venv/ 3 | 4 | # PyCharm utils 5 | .idea/ 6 | 7 | # Polyaxon files 8 | .polyaxonignore 9 | .polyaxon/ 10 | polyaxon_experiments/ 11 | polyaxon_requirements.txt 12 | 13 | # outputs 14 | output/ 15 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from loguru import logger 3 | from tensorflow_core.python.keras.api._v2.keras.datasets import cifar10 4 | 5 | 6 | def set_gpu_config(): 7 | # Set up GPU config 8 | logger.info("Setting up GPU if found") 9 | physical_devices = tf.config.experimental.list_physical_devices("GPU") 10 | if physical_devices: 11 | for device in physical_devices: 12 | tf.config.experimental.set_memory_growth(device, True) 13 | 14 | 15 | def load_data(): 16 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 17 | x_train = x_train.astype("float32") / 255.0 18 | x_test = x_test.astype("float32") / 255.0 19 | return x_test, x_train, y_test, y_train 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # keras-tuner-tutorial 2 | Hands on tutorial for keras-tuner 3 | 4 | This repo aims at introducing hyperparameter tuning through the Keras Tuner library. 5 | It provides a comparison of its different tuners, applied to computer vision through the CIFAR10 dataset. 6 | 7 | This is work in progress, all feedback is welcomed. 8 | 9 | ### Install the project 10 | - Clone the repo 11 | - Create a virtualenv and activate it: 12 | ``` 13 | virtualenv -p python3 venv 14 | source venv/bin/activate 15 | ``` 16 | - Install the requirements: 17 | ``` 18 | pip install requirements.txt 19 | ``` 20 | 21 | 22 | ### Results 23 | 24 | Tasks duration was measured on an RTX 2080 GPU 25 | 26 | | Tuner | Search time | Best accuracy (%) | 27 | |-----------------------|---------------|-------------------| 28 | | Worst Baseline | 20min | 63.1 | 29 | | Default Baseline | 20min | 74.5 | 30 | | Random Search | 10h 59min | 76.8 | 31 | | Hyperband | 10h 0min | 75.1 | 32 | 33 | Here, the worst baseline is the worst accuracy obtained by a set of hyperparameters 34 | during random search. 35 | The default baseline is obtained by setting all hyperparameters to their default value. 36 | 37 | ### Run the baseline 38 | 39 | ``` 40 | python baseline.py 41 | ``` 42 | 43 | ### Run the comparison 44 | Available tuners : 45 | 46 | - Random Search 47 | - Hyperband 48 | 49 | ``` 50 | python tuner_comparison.py 51 | ``` 52 | -------------------------------------------------------------------------------- /baseline.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from loguru import logger 4 | 5 | from tuner_comparison import ( 6 | INPUT_SHAPE, 7 | NUM_CLASSES, 8 | N_EPOCH_SEARCH, 9 | ) 10 | from utils import ( 11 | set_gpu_config, 12 | load_data, 13 | ) 14 | 15 | 16 | def base_experiment(): 17 | from tensorflow import keras 18 | from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPooling2D 19 | 20 | set_gpu_config() 21 | x_test, x_train, y_test, y_train = load_data() 22 | 23 | model = keras.Sequential() 24 | model.add( 25 | Conv2D(filters=16, kernel_size=3, activation="relu", input_shape=INPUT_SHAPE) 26 | ) 27 | model.add(Conv2D(16, 3, activation="relu")) 28 | model.add(MaxPooling2D(pool_size=2)) 29 | model.add(Dropout(rate=0.25)) 30 | model.add(Conv2D(32, 3, activation="relu")) 31 | model.add(Conv2D(64, 3, activation="relu")) 32 | model.add(MaxPooling2D(pool_size=2)) 33 | model.add(Dropout(rate=0.25)) 34 | model.add(Flatten()) 35 | model.add(Dense(units=128, activation="relu")) 36 | model.add(Dropout(rate=0.25)) 37 | model.add(Dense(NUM_CLASSES, activation="softmax")) 38 | 39 | model.compile( 40 | optimizer=keras.optimizers.Adam(1e-3), 41 | loss="sparse_categorical_crossentropy", 42 | metrics=["accuracy"], 43 | ) 44 | 45 | logger.info("Start training") 46 | search_start = time.time() 47 | model.fit(x_train, y_train, epochs=N_EPOCH_SEARCH, validation_split=0.1) 48 | search_end = time.time() 49 | elapsed_time = search_end - search_start 50 | logger.info(f"Elapsed time (s): {elapsed_time}") 51 | 52 | loss, accuracy = model.evaluate(x_test, y_test) 53 | logger.info(f"loss: {loss}, accuracy: {accuracy}") 54 | 55 | 56 | if __name__ == "__main__": 57 | base_experiment() 58 | -------------------------------------------------------------------------------- /hypermodels.py: -------------------------------------------------------------------------------- 1 | from kerastuner import HyperModel 2 | from tensorflow import keras 3 | from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPooling2D 4 | 5 | 6 | class CNNHyperModel(HyperModel): 7 | def __init__(self, input_shape, num_classes): 8 | self.input_shape = input_shape 9 | self.num_classes = num_classes 10 | 11 | def build(self, hp): 12 | model = keras.Sequential() 13 | model.add( 14 | Conv2D( 15 | filters=16, 16 | kernel_size=3, 17 | activation="relu", 18 | input_shape=self.input_shape, 19 | ) 20 | ) 21 | model.add(Conv2D(filters=16, activation="relu", kernel_size=3)) 22 | model.add(MaxPooling2D(pool_size=2)) 23 | model.add( 24 | Dropout( 25 | rate=hp.Float( 26 | "dropout_1", min_value=0.0, max_value=0.5, default=0.25, step=0.05, 27 | ) 28 | ) 29 | ) 30 | model.add(Conv2D(filters=32, kernel_size=3, activation="relu")) 31 | model.add( 32 | Conv2D( 33 | filters=hp.Choice("num_filters", values=[32, 64], default=64,), 34 | activation="relu", 35 | kernel_size=3, 36 | ) 37 | ) 38 | model.add(MaxPooling2D(pool_size=2)) 39 | model.add( 40 | Dropout( 41 | rate=hp.Float( 42 | "dropout_2", min_value=0.0, max_value=0.5, default=0.25, step=0.05, 43 | ) 44 | ) 45 | ) 46 | model.add(Flatten()) 47 | model.add( 48 | Dense( 49 | units=hp.Int( 50 | "units", min_value=32, max_value=512, step=32, default=128 51 | ), 52 | activation=hp.Choice( 53 | "dense_activation", 54 | values=["relu", "tanh", "sigmoid"], 55 | default="relu", 56 | ), 57 | ) 58 | ) 59 | model.add( 60 | Dropout( 61 | rate=hp.Float( 62 | "dropout_3", min_value=0.0, max_value=0.5, default=0.25, step=0.05 63 | ) 64 | ) 65 | ) 66 | model.add(Dense(self.num_classes, activation="softmax")) 67 | 68 | model.compile( 69 | optimizer=keras.optimizers.Adam( 70 | hp.Float( 71 | "learning_rate", 72 | min_value=1e-4, 73 | max_value=1e-2, 74 | sampling="LOG", 75 | default=1e-3, 76 | ) 77 | ), 78 | loss="sparse_categorical_crossentropy", 79 | metrics=["accuracy"], 80 | ) 81 | return model 82 | -------------------------------------------------------------------------------- /tuner_comparison.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from kerastuner.tuners import ( 4 | BayesianOptimization, 5 | Hyperband, 6 | RandomSearch, 7 | ) 8 | from loguru import logger 9 | from pathlib import Path 10 | 11 | from hypermodels import CNNHyperModel 12 | from utils import ( 13 | set_gpu_config, 14 | load_data, 15 | ) 16 | 17 | SEED = 1 18 | 19 | NUM_CLASSES = 10 20 | INPUT_SHAPE = (32, 32, 3) 21 | 22 | N_EPOCH_SEARCH = 40 23 | HYPERBAND_MAX_EPOCHS = 40 24 | MAX_TRIALS = 20 25 | EXECUTION_PER_TRIAL = 2 26 | BAYESIAN_NUM_INITIAL_POINTS = 1 27 | 28 | 29 | def run_hyperparameter_tuning(): 30 | x_test, x_train, y_test, y_train = load_data() 31 | 32 | hypermodel = CNNHyperModel(input_shape=INPUT_SHAPE, num_classes=NUM_CLASSES) 33 | 34 | output_dir = Path("./output/cifar10/") 35 | tuners = define_tuners( 36 | hypermodel, directory=output_dir, project_name="simple_cnn_tuning" 37 | ) 38 | 39 | results = [] 40 | for tuner in tuners: 41 | elapsed_time, loss, accuracy = tuner_evaluation( 42 | tuner, x_test, x_train, y_test, y_train 43 | ) 44 | logger.info( 45 | f"Elapsed time = {elapsed_time:10.4f} s, accuracy = {accuracy}, loss = {loss}" 46 | ) 47 | results.append([elapsed_time, loss, accuracy]) 48 | logger.info(results) 49 | 50 | 51 | def tuner_evaluation(tuner, x_test, x_train, y_test, y_train): 52 | set_gpu_config() 53 | 54 | # Overview of the task 55 | tuner.search_space_summary() 56 | 57 | # Performs the hyperparameter tuning 58 | logger.info("Start hyperparameter tuning") 59 | search_start = time.time() 60 | tuner.search(x_train, y_train, epochs=N_EPOCH_SEARCH, validation_split=0.1) 61 | search_end = time.time() 62 | elapsed_time = search_end - search_start 63 | 64 | # Show a summary of the search 65 | tuner.results_summary() 66 | 67 | # Retrieve the best model. 68 | best_model = tuner.get_best_models(num_models=1)[0] 69 | 70 | # Evaluate the best model. 71 | loss, accuracy = best_model.evaluate(x_test, y_test) 72 | return elapsed_time, loss, accuracy 73 | 74 | 75 | def define_tuners(hypermodel, directory, project_name): 76 | random_tuner = RandomSearch( 77 | hypermodel, 78 | objective="val_accuracy", 79 | seed=SEED, 80 | max_trials=MAX_TRIALS, 81 | executions_per_trial=EXECUTION_PER_TRIAL, 82 | directory=f"{directory}_random_search", 83 | project_name=project_name, 84 | ) 85 | hyperband_tuner = Hyperband( 86 | hypermodel, 87 | max_epochs=HYPERBAND_MAX_EPOCHS, 88 | objective="val_accuracy", 89 | seed=SEED, 90 | executions_per_trial=EXECUTION_PER_TRIAL, 91 | directory=f"{directory}_hyperband", 92 | project_name=project_name, 93 | ) 94 | bayesian_tuner = BayesianOptimization( 95 | hypermodel, 96 | objective='val_accuracy', 97 | seed=SEED, 98 | num_initial_points=BAYESIAN_NUM_INITIAL_POINTS, 99 | max_trials=MAX_TRIALS, 100 | directory=f"{directory}_bayesian", 101 | project_name=project_name 102 | ) 103 | return [random_tuner, hyperband_tuner, bayesian_tuner] 104 | 105 | 106 | if __name__ == "__main__": 107 | run_hyperparameter_tuning() 108 | --------------------------------------------------------------------------------