├── README.md ├── environment ├── Dockerfile ├── Dockerfile.gpu ├── README.md ├── prepare_data.sh ├── requirements.txt └── requirements_gpu.txt └── fine_tune_inceptionv3.py /README.md: -------------------------------------------------------------------------------- 1 | # keras-fine-tune-inception 2 | Fine tuning inception v3 on Kaggle dogs-vs-cats dataset. 3 | 4 | This is a combination of Keras blog tutorial (https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html) on fine tuning VGG 16 nets and some guidance from keras tuning project (https://github.com/danielvarga/keras-finetuning). 5 | -------------------------------------------------------------------------------- /environment/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | 3 | # Pick up some TF dependencies 4 | RUN apt-get update \ 5 | && apt-get install -y --no-install-recommends \ 6 | python3-pip \ 7 | unzip \ 8 | git \ 9 | wget \ 10 | python3 \ 11 | && apt-get clean \ 12 | && rm -rf /var/lib/apt/lists/* 13 | 14 | RUN mkdir /tmp/model 15 | 16 | COPY prepare_data.sh /tmp/prepare_data.sh 17 | 18 | RUN chmod 0755 /tmp/prepare_data.sh \ 19 | && /tmp/prepare_data.sh 20 | 21 | COPY requirements.txt /tmp/requirements.txt 22 | 23 | RUN pip3 install --upgrade pip \ 24 | && pip3 install -U setuptools \ 25 | && pip3 --no-cache-dir install -r /tmp/requirements.txt 26 | 27 | WORKDIR "/tmp/model" 28 | -------------------------------------------------------------------------------- /environment/Dockerfile.gpu: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:8.0-cudnn5-devel 2 | 3 | # Pick up some TF dependencies 4 | RUN apt-get update \ 5 | && apt-get install -y --no-install-recommends \ 6 | python3-pip \ 7 | git \ 8 | unzip \ 9 | wget \ 10 | python3 \ 11 | && apt-get clean \ 12 | && rm -rf /var/lib/apt/lists/* 13 | 14 | RUN mkdir /tmp/model 15 | 16 | COPY prepare_data.sh /tmp/prepare_data.sh 17 | 18 | RUN chmod 0755 /tmp/prepare_data.sh \ 19 | && /tmp/prepare_data.sh 20 | 21 | COPY requirements_gpu.txt /tmp/requirements_gpu.txt 22 | 23 | RUN pip3 install --upgrade pip \ 24 | && pip3 install -U setuptools \ 25 | && pip3 --no-cache-dir install -r /tmp/requirements_gpu.txt 26 | 27 | WORKDIR "/tmp/model" 28 | -------------------------------------------------------------------------------- /environment/README.md: -------------------------------------------------------------------------------- 1 | # Prepare environment using pip 2 | 3 | ## CPU 4 | `pip install -r requirements.txt` 5 | 6 | ## GPU 7 | `pip install -r requirements_gpu.txt` 8 | 9 | # Prepare docker container (RECOMMENDED) 10 | 11 | ## Build container 12 | 13 | ### CPU 14 | `docker build -t "keras-fine-tune-inception:cpu" . -f Dockerfile` 15 | ### GPU 16 | `docker build -t "keras-fine-tune-inception:gpu" . -f Dockerfile.gpu` 17 | 18 | ## Run TRAIN container 19 | * Replace `LOCAL_REPO_DIRECTORY` with path to the directory this repo was cloned to. 20 | * From inside container `cpu` or `gpu` container run `jupyter notebook --ip='*'`. 21 | 22 | ### GPU 23 | `nvidia-docker run -i -t -v LOCAL_REPO_DIRECTORY:/tmp/model -p 8888:8888 -p 6006:6006 keras-fine-tune-inception:gpu` 24 | ### CPU 25 | `docker run -i -t -v LOCAL_REPO_DIRECTORY:/tmp/model -p 8888:8888 -p 6006:6006 keras-fine-tune-inception:cpu` 26 | 27 | -------------------------------------------------------------------------------- /environment/prepare_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p /tmp/data 4 | 5 | FILE=/tmp/download/kagglecatsanddogs_3367a.zip 6 | if [ ! -f $FILE ]; then 7 | wget --directory-prefix=/tmp/download/ https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip 8 | unzip $FILE -d /tmp/download/ 9 | fi 10 | 11 | mkdir -p /tmp/data/train/cats 12 | mkdir -p /tmp/data/train/dogs 13 | mkdir -p /tmp/data/validation/cats 14 | mkdir -p /tmp/data/validation/dogs 15 | 16 | for ((i=0;i<=999;i++)); 17 | do 18 | cp /tmp/download/PetImages/Cat/$i.jpg /tmp/data/train/cats/ 19 | cp /tmp/download/PetImages/Dog/$i.jpg /tmp/data/train/dogs/ 20 | done 21 | 22 | for ((i=1000;i<=1400;i++)); 23 | do 24 | cp /tmp/download/PetImages/Cat/$i.jpg /tmp/data/validation/cats/ 25 | cp /tmp/download/PetImages/Dog/$i.jpg /tmp/data/validation/dogs/ 26 | done 27 | 28 | rm -r -f 29 | -------------------------------------------------------------------------------- /environment/requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.3 2 | appnope==0.1.0 3 | bleach==2.0.0 4 | cycler==0.10.0 5 | decorator==4.0.11 6 | entrypoints==0.2.2 7 | html5lib==0.999999999 8 | ipykernel==4.5.2 9 | ipython==5.3.0 10 | ipython-genutils==0.2.0 11 | ipywidgets==6.0.0 12 | Jinja2>=2.10.1 13 | jsonschema==2.6.0 14 | jupyter==1.0.0 15 | jupyter-client==5.0.0 16 | jupyter-console==5.1.0 17 | jupyter-core==4.3.0 18 | MarkupSafe==1.0 19 | matplotlib==2.0.0 20 | mistune>=0.8.1 21 | nbconvert==5.1.1 22 | nbformat==4.3.0 23 | networkx==1.11 24 | notebook>=5.7.2 25 | numpy==1.12.1 26 | olefile==0.44 27 | packaging==16.8 28 | pandocfilters==1.4.1 29 | pexpect==4.2.1 30 | pickleshare==0.7.4 31 | Pillow==4.0.0 32 | prompt-toolkit==1.0.14 33 | protobuf==3.2.0 34 | ptyprocess==0.5.1 35 | Pygments==2.2.0 36 | pyparsing==2.2.0 37 | python-dateutil==2.6.0 38 | pytz==2017.2 39 | PyWavelets==0.5.2 40 | pyzmq==16.0.2 41 | qtconsole==4.3.0 42 | scikit-image==0.13.0 43 | scikit-learn==0.18.1 44 | scipy==0.19.0 45 | simplegeneric==0.8.1 46 | six==1.10.0 47 | tensorflow>=1.6.0 48 | terminado==0.6 49 | testpath==0.3 50 | tornado==4.4.3 51 | tqdm==4.11.2 52 | traitlets==4.3.2 53 | wcwidth==0.1.7 54 | webencodings==0.5 55 | widgetsnbextension==2.0.0 56 | keras==1.2.2 57 | h5py=2.6.0 58 | -------------------------------------------------------------------------------- /environment/requirements_gpu.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.3 2 | appnope==0.1.0 3 | bleach==2.0.0 4 | cycler==0.10.0 5 | decorator==4.0.11 6 | entrypoints==0.2.2 7 | html5lib==0.999999999 8 | ipykernel==4.5.2 9 | ipython==5.3.0 10 | ipython-genutils==0.2.0 11 | ipywidgets==6.0.0 12 | Jinja2>=2.10.1 13 | jsonschema==2.6.0 14 | jupyter==1.0.0 15 | jupyter-client==5.0.0 16 | jupyter-console==5.1.0 17 | jupyter-core==4.3.0 18 | MarkupSafe==1.0 19 | matplotlib==2.0.0 20 | mistune>=0.8.1 21 | nbconvert==5.1.1 22 | nbformat==4.3.0 23 | networkx==1.11 24 | notebook>=5.7.2 25 | numpy==1.12.1 26 | olefile==0.44 27 | packaging==16.8 28 | pandocfilters==1.4.1 29 | pexpect==4.2.1 30 | pickleshare==0.7.4 31 | Pillow==4.0.0 32 | prompt-toolkit==1.0.14 33 | protobuf==3.2.0 34 | ptyprocess==0.5.1 35 | Pygments==2.2.0 36 | pyparsing==2.2.0 37 | python-dateutil==2.6.0 38 | pytz==2017.2 39 | PyWavelets==0.5.2 40 | pyzmq==16.0.2 41 | qtconsole==4.3.0 42 | scikit-image==0.13.0 43 | scikit-learn==0.18.1 44 | scipy==0.19.0 45 | simplegeneric==0.8.1 46 | six==1.10.0 47 | tensorflow-gpu>=1.6.0 48 | terminado==0.6 49 | testpath==0.3 50 | tornado==4.4.3 51 | tqdm==4.11.2 52 | traitlets==4.3.2 53 | wcwidth==0.1.7 54 | webencodings==0.5 55 | widgetsnbextension==2.0.0 56 | keras==1.2.2 57 | h5py=2.6.0 58 | -------------------------------------------------------------------------------- /fine_tune_inceptionv3.py: -------------------------------------------------------------------------------- 1 | '''This script goes along the blog post 2 | "Building powerful image classification models using very little data" 3 | from blog.keras.io. 4 | It uses data that can be downloaded at: 5 | https://www.kaggle.com/c/dogs-vs-cats/data 6 | In our setup, we: 7 | - created a data/ folder 8 | - created train/ and validation/ subfolders inside data/ 9 | - created cats/ and dogs/ subfolders inside train/ and validation/ 10 | - put the cat pictures index 0-999 in data/train/cats 11 | - put the cat pictures index 1000-1400 in data/validation/cats 12 | - put the dogs pictures index 12500-13499 in data/train/dogs 13 | - put the dog pictures index 13500-13900 in data/validation/dogs 14 | So that we have 1000 training examples for each class, and 400 validation examples for each class. 15 | In summary, this is our directory structure: 16 | ``` 17 | data/ 18 | train/ 19 | dogs/ 20 | dog001.jpg 21 | dog002.jpg 22 | ... 23 | cats/ 24 | cat001.jpg 25 | cat002.jpg 26 | ... 27 | validation/ 28 | dogs/ 29 | dog001.jpg 30 | dog002.jpg 31 | ... 32 | cats/ 33 | cat001.jpg 34 | cat002.jpg 35 | ... 36 | ``` 37 | ''' 38 | from keras.applications.inception_v3 import InceptionV3 39 | from keras.preprocessing import image 40 | from keras.models import Model 41 | from keras.layers import Dense, GlobalAveragePooling2D 42 | from keras.preprocessing.image import ImageDataGenerator 43 | from keras import backend as K 44 | from keras.callbacks import ModelCheckpoint 45 | from keras.callbacks import TensorBoard 46 | import os.path 47 | 48 | # create the base pre-trained model 49 | base_model = InceptionV3(weights='imagenet', include_top=False) 50 | 51 | # dimensions of our images. 52 | #Inception input size 53 | img_width, img_height = 299, 299 54 | 55 | top_layers_checkpoint_path = 'cp.top.best.hdf5' 56 | fine_tuned_checkpoint_path = 'cp.fine_tuned.best.hdf5' 57 | new_extended_inception_weights = 'final_weights.hdf5' 58 | 59 | train_data_dir = '/tmp/data/train' 60 | validation_data_dir = '/tmp/data/validation' 61 | 62 | nb_train_samples = 2000 63 | nb_validation_samples = 800 64 | 65 | top_epochs = 50 66 | fit_epochs = 50 67 | 68 | batch_size = 24 69 | 70 | # add a global spatial average pooling layer 71 | x = base_model.output 72 | x = GlobalAveragePooling2D()(x) 73 | # let's add a fully-connected layer 74 | x = Dense(1024, activation='relu')(x) 75 | # and a logistic layer -- we have 2 classes 76 | predictions = Dense(2, activation='softmax')(x) 77 | 78 | # this is the model we will train 79 | model = Model(input=base_model.input, output=predictions) 80 | 81 | if os.path.exists(top_layers_checkpoint_path): 82 | model.load_weights(top_layers_checkpoint_path) 83 | print ("Checkpoint '" + top_layers_checkpoint_path + "' loaded.") 84 | 85 | # first: train only the top layers (which were randomly initialized) 86 | # i.e. freeze all convolutional InceptionV3 layers 87 | for layer in base_model.layers: 88 | layer.trainable = False 89 | 90 | # compile the model (should be done *after* setting layers to non-trainable) 91 | model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'], ) 92 | 93 | # prepare data augmentation configuration 94 | train_datagen = ImageDataGenerator( 95 | rescale=1. / 255, 96 | shear_range=0.2, 97 | zoom_range=0.2, 98 | horizontal_flip=True) 99 | 100 | test_datagen = ImageDataGenerator(rescale=1. / 255) 101 | 102 | train_generator = train_datagen.flow_from_directory( 103 | train_data_dir, 104 | target_size=(img_height, img_width), 105 | batch_size=batch_size, 106 | class_mode='categorical') 107 | 108 | validation_generator = test_datagen.flow_from_directory( 109 | validation_data_dir, 110 | target_size=(img_height, img_width), 111 | batch_size=batch_size, 112 | class_mode='categorical') 113 | 114 | 115 | #Save the model after every epoch. 116 | mc_top = ModelCheckpoint(top_layers_checkpoint_path, monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1) 117 | 118 | #Save the TensorBoard logs. 119 | tb = TensorBoard(log_dir='./logs', histogram_freq=1, write_graph=True, write_images=True) 120 | 121 | # train the model on the new data for a few epochs 122 | #model.fit_generator(...) 123 | 124 | model.fit_generator( 125 | train_generator, 126 | samples_per_epoch=nb_train_samples // batch_size, 127 | nb_epoch=top_epochs, 128 | validation_data=validation_generator, 129 | nb_val_samples=nb_validation_samples // batch_size, 130 | callbacks=[mc_top, tb]) 131 | 132 | # at this point, the top layers are well trained and we can start fine-tuning 133 | # convolutional layers from inception V3. We will freeze the bottom N layers 134 | # and train the remaining top layers. 135 | 136 | # let's visualize layer names and layer indices to see how many layers 137 | # we should freeze: 138 | for i, layer in enumerate(base_model.layers): 139 | print(i, layer.name) 140 | 141 | 142 | #Save the model after every epoch. 143 | mc_fit = ModelCheckpoint(fine_tuned_checkpoint_path, monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1) 144 | 145 | 146 | if os.path.exists(fine_tuned_checkpoint_path): 147 | model.load_weights(fine_tuned_checkpoint_path) 148 | print ("Checkpoint '" + fine_tuned_checkpoint_path + "' loaded.") 149 | 150 | # we chose to train the top 2 inception blocks, i.e. we will freeze 151 | # the first 172 layers and unfreeze the rest: 152 | for layer in model.layers[:172]: 153 | layer.trainable = False 154 | for layer in model.layers[172:]: 155 | layer.trainable = True 156 | 157 | # we need to recompile the model for these modifications to take effect 158 | # we use SGD with a low learning rate 159 | from keras.optimizers import SGD 160 | model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy']) 161 | 162 | # we train our model again (this time fine-tuning the top 2 inception blocks 163 | # alongside the top Dense layers 164 | #model.fit_generator(...) 165 | 166 | model.fit_generator( 167 | train_generator, 168 | samples_per_epoch=nb_train_samples // batch_size, 169 | nb_epoch=fit_epochs, 170 | validation_data=validation_generator, 171 | nb_val_samples=nb_validation_samples // batch_size, 172 | callbacks=[mc_fit, tb]) 173 | 174 | model.save_weights(new_extended_inception_weights) 175 | --------------------------------------------------------------------------------