├── README.md
├── environment
    ├── Dockerfile
    ├── Dockerfile.gpu
    ├── README.md
    ├── prepare_data.sh
    ├── requirements.txt
    └── requirements_gpu.txt
└── fine_tune_inceptionv3.py


/README.md:
--------------------------------------------------------------------------------
1 | # keras-fine-tune-inception
2 | Fine tuning inception v3 on Kaggle dogs-vs-cats dataset.
3 | 
4 | This is a combination of Keras blog tutorial (https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html) on fine tuning VGG 16 nets and some guidance from keras tuning project (https://github.com/danielvarga/keras-finetuning).
5 | 


--------------------------------------------------------------------------------
/environment/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:16.04
 2 | 
 3 | # Pick up some TF dependencies
 4 | RUN apt-get update \
 5 |  && apt-get install -y --no-install-recommends \
 6 |         python3-pip \
 7 |         unzip \
 8 |         git \
 9 |         wget \
10 |         python3 \
11 |  && apt-get clean \
12 |  && rm -rf /var/lib/apt/lists/*
13 | 
14 | RUN mkdir /tmp/model
15 | 
16 | COPY prepare_data.sh /tmp/prepare_data.sh
17 | 
18 | RUN chmod 0755 /tmp/prepare_data.sh \
19 |  && /tmp/prepare_data.sh
20 | 
21 | COPY requirements.txt /tmp/requirements.txt
22 | 
23 | RUN pip3 install --upgrade pip \
24 |  && pip3 install -U setuptools \
25 |  && pip3 --no-cache-dir install -r /tmp/requirements.txt
26 | 
27 | WORKDIR "/tmp/model"
28 | 


--------------------------------------------------------------------------------
/environment/Dockerfile.gpu:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:8.0-cudnn5-devel
 2 | 
 3 | # Pick up some TF dependencies
 4 | RUN apt-get update \
 5 |  && apt-get install -y --no-install-recommends \
 6 |         python3-pip \
 7 |         git \
 8 |         unzip \
 9 |         wget \
10 |         python3 \
11 |  && apt-get clean \
12 |  && rm -rf /var/lib/apt/lists/*
13 | 
14 | RUN mkdir /tmp/model
15 | 
16 | COPY prepare_data.sh /tmp/prepare_data.sh
17 | 
18 | RUN chmod 0755 /tmp/prepare_data.sh \
19 |  && /tmp/prepare_data.sh
20 | 
21 | COPY requirements_gpu.txt /tmp/requirements_gpu.txt
22 | 
23 | RUN pip3 install --upgrade pip \
24 |  && pip3 install -U setuptools \
25 |  && pip3 --no-cache-dir install -r /tmp/requirements_gpu.txt
26 | 
27 | WORKDIR "/tmp/model"
28 | 


--------------------------------------------------------------------------------
/environment/README.md:
--------------------------------------------------------------------------------
 1 | # Prepare environment using pip
 2 | 
 3 | ## CPU
 4 | `pip install -r  requirements.txt`
 5 | 
 6 | ## GPU
 7 | `pip install -r requirements_gpu.txt`
 8 | 
 9 | # Prepare docker container (RECOMMENDED)
10 | 
11 | ## Build container
12 | 
13 | ### CPU
14 | `docker build -t "keras-fine-tune-inception:cpu" . -f Dockerfile`
15 | ### GPU
16 | `docker build -t "keras-fine-tune-inception:gpu" . -f Dockerfile.gpu`
17 | 
18 | ## Run TRAIN container
19 | * Replace `LOCAL_REPO_DIRECTORY` with path to the directory this repo was cloned to.
20 | * From inside container `cpu` or `gpu` container run `jupyter notebook --ip='*'`.
21 | 
22 | ### GPU
23 | `nvidia-docker run -i -t -v LOCAL_REPO_DIRECTORY:/tmp/model -p 8888:8888 -p 6006:6006 keras-fine-tune-inception:gpu`
24 | ### CPU
25 | `docker run -i -t -v LOCAL_REPO_DIRECTORY:/tmp/model -p 8888:8888 -p 6006:6006 keras-fine-tune-inception:cpu`
26 | 
27 | 


--------------------------------------------------------------------------------
/environment/prepare_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | mkdir -p /tmp/data
 4 | 
 5 | FILE=/tmp/download/kagglecatsanddogs_3367a.zip
 6 | if [ ! -f $FILE ]; then
 7 |   wget --directory-prefix=/tmp/download/ https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip
 8 |   unzip $FILE -d /tmp/download/
 9 | fi
10 | 
11 | mkdir -p /tmp/data/train/cats
12 | mkdir -p /tmp/data/train/dogs
13 | mkdir -p /tmp/data/validation/cats
14 | mkdir -p /tmp/data/validation/dogs
15 | 
16 | for ((i=0;i<=999;i++));
17 | do
18 |    cp /tmp/download/PetImages/Cat/$i.jpg /tmp/data/train/cats/
19 |    cp /tmp/download/PetImages/Dog/$i.jpg /tmp/data/train/dogs/
20 | done
21 | 
22 | for ((i=1000;i<=1400;i++));
23 | do
24 |    cp /tmp/download/PetImages/Cat/$i.jpg /tmp/data/validation/cats/
25 |    cp /tmp/download/PetImages/Dog/$i.jpg /tmp/data/validation/dogs/
26 | done
27 | 
28 | rm -r -f
29 | 


--------------------------------------------------------------------------------
/environment/requirements.txt:
--------------------------------------------------------------------------------
 1 | appdirs==1.4.3
 2 | appnope==0.1.0
 3 | bleach==2.0.0
 4 | cycler==0.10.0
 5 | decorator==4.0.11
 6 | entrypoints==0.2.2
 7 | html5lib==0.999999999
 8 | ipykernel==4.5.2
 9 | ipython==5.3.0
10 | ipython-genutils==0.2.0
11 | ipywidgets==6.0.0
12 | Jinja2>=2.10.1
13 | jsonschema==2.6.0
14 | jupyter==1.0.0
15 | jupyter-client==5.0.0
16 | jupyter-console==5.1.0
17 | jupyter-core==4.3.0
18 | MarkupSafe==1.0
19 | matplotlib==2.0.0
20 | mistune>=0.8.1
21 | nbconvert==5.1.1
22 | nbformat==4.3.0
23 | networkx==1.11
24 | notebook>=5.7.2
25 | numpy==1.12.1
26 | olefile==0.44
27 | packaging==16.8
28 | pandocfilters==1.4.1
29 | pexpect==4.2.1
30 | pickleshare==0.7.4
31 | Pillow==4.0.0
32 | prompt-toolkit==1.0.14
33 | protobuf==3.2.0
34 | ptyprocess==0.5.1
35 | Pygments==2.2.0
36 | pyparsing==2.2.0
37 | python-dateutil==2.6.0
38 | pytz==2017.2
39 | PyWavelets==0.5.2
40 | pyzmq==16.0.2
41 | qtconsole==4.3.0
42 | scikit-image==0.13.0
43 | scikit-learn==0.18.1
44 | scipy==0.19.0
45 | simplegeneric==0.8.1
46 | six==1.10.0
47 | tensorflow>=1.6.0
48 | terminado==0.6
49 | testpath==0.3
50 | tornado==4.4.3
51 | tqdm==4.11.2
52 | traitlets==4.3.2
53 | wcwidth==0.1.7
54 | webencodings==0.5
55 | widgetsnbextension==2.0.0
56 | keras==1.2.2
57 | h5py=2.6.0
58 | 


--------------------------------------------------------------------------------
/environment/requirements_gpu.txt:
--------------------------------------------------------------------------------
 1 | appdirs==1.4.3
 2 | appnope==0.1.0
 3 | bleach==2.0.0
 4 | cycler==0.10.0
 5 | decorator==4.0.11
 6 | entrypoints==0.2.2
 7 | html5lib==0.999999999
 8 | ipykernel==4.5.2
 9 | ipython==5.3.0
10 | ipython-genutils==0.2.0
11 | ipywidgets==6.0.0
12 | Jinja2>=2.10.1
13 | jsonschema==2.6.0
14 | jupyter==1.0.0
15 | jupyter-client==5.0.0
16 | jupyter-console==5.1.0
17 | jupyter-core==4.3.0
18 | MarkupSafe==1.0
19 | matplotlib==2.0.0
20 | mistune>=0.8.1
21 | nbconvert==5.1.1
22 | nbformat==4.3.0
23 | networkx==1.11
24 | notebook>=5.7.2
25 | numpy==1.12.1
26 | olefile==0.44
27 | packaging==16.8
28 | pandocfilters==1.4.1
29 | pexpect==4.2.1
30 | pickleshare==0.7.4
31 | Pillow==4.0.0
32 | prompt-toolkit==1.0.14
33 | protobuf==3.2.0
34 | ptyprocess==0.5.1
35 | Pygments==2.2.0
36 | pyparsing==2.2.0
37 | python-dateutil==2.6.0
38 | pytz==2017.2
39 | PyWavelets==0.5.2
40 | pyzmq==16.0.2
41 | qtconsole==4.3.0
42 | scikit-image==0.13.0
43 | scikit-learn==0.18.1
44 | scipy==0.19.0
45 | simplegeneric==0.8.1
46 | six==1.10.0
47 | tensorflow-gpu>=1.6.0
48 | terminado==0.6
49 | testpath==0.3
50 | tornado==4.4.3
51 | tqdm==4.11.2
52 | traitlets==4.3.2
53 | wcwidth==0.1.7
54 | webencodings==0.5
55 | widgetsnbextension==2.0.0
56 | keras==1.2.2
57 | h5py=2.6.0
58 | 


--------------------------------------------------------------------------------
/fine_tune_inceptionv3.py:
--------------------------------------------------------------------------------
  1 | '''This script goes along the blog post
  2 | "Building powerful image classification models using very little data"
  3 | from blog.keras.io.
  4 | It uses data that can be downloaded at:
  5 | https://www.kaggle.com/c/dogs-vs-cats/data
  6 | In our setup, we:
  7 | - created a data/ folder
  8 | - created train/ and validation/ subfolders inside data/
  9 | - created cats/ and dogs/ subfolders inside train/ and validation/
 10 | - put the cat pictures index 0-999 in data/train/cats
 11 | - put the cat pictures index 1000-1400 in data/validation/cats
 12 | - put the dogs pictures index 12500-13499 in data/train/dogs
 13 | - put the dog pictures index 13500-13900 in data/validation/dogs
 14 | So that we have 1000 training examples for each class, and 400 validation examples for each class.
 15 | In summary, this is our directory structure:
 16 | ```
 17 | data/
 18 |     train/
 19 |         dogs/
 20 |             dog001.jpg
 21 |             dog002.jpg
 22 |             ...
 23 |         cats/
 24 |             cat001.jpg
 25 |             cat002.jpg
 26 |             ...
 27 |     validation/
 28 |         dogs/
 29 |             dog001.jpg
 30 |             dog002.jpg
 31 |             ...
 32 |         cats/
 33 |             cat001.jpg
 34 |             cat002.jpg
 35 |             ...
 36 | ```
 37 | '''
 38 | from keras.applications.inception_v3 import InceptionV3
 39 | from keras.preprocessing import image
 40 | from keras.models import Model
 41 | from keras.layers import Dense, GlobalAveragePooling2D
 42 | from keras.preprocessing.image import ImageDataGenerator
 43 | from keras import backend as K
 44 | from keras.callbacks import ModelCheckpoint
 45 | from keras.callbacks import TensorBoard
 46 | import os.path
 47 | 
 48 | # create the base pre-trained model
 49 | base_model = InceptionV3(weights='imagenet', include_top=False)
 50 | 
 51 | # dimensions of our images.
 52 | #Inception input size
 53 | img_width, img_height = 299, 299
 54 | 
 55 | top_layers_checkpoint_path = 'cp.top.best.hdf5'
 56 | fine_tuned_checkpoint_path = 'cp.fine_tuned.best.hdf5'
 57 | new_extended_inception_weights = 'final_weights.hdf5'
 58 | 
 59 | train_data_dir = '/tmp/data/train'
 60 | validation_data_dir = '/tmp/data/validation'
 61 | 
 62 | nb_train_samples = 2000
 63 | nb_validation_samples = 800
 64 | 
 65 | top_epochs = 50
 66 | fit_epochs = 50
 67 | 
 68 | batch_size = 24
 69 | 
 70 | # add a global spatial average pooling layer
 71 | x = base_model.output
 72 | x = GlobalAveragePooling2D()(x)
 73 | # let's add a fully-connected layer
 74 | x = Dense(1024, activation='relu')(x)
 75 | # and a logistic layer -- we have 2 classes
 76 | predictions = Dense(2, activation='softmax')(x)
 77 | 
 78 | # this is the model we will train
 79 | model = Model(input=base_model.input, output=predictions)
 80 | 
 81 | if os.path.exists(top_layers_checkpoint_path):
 82 | 	model.load_weights(top_layers_checkpoint_path)
 83 | 	print ("Checkpoint '" + top_layers_checkpoint_path + "' loaded.")
 84 | 
 85 | # first: train only the top layers (which were randomly initialized)
 86 | # i.e. freeze all convolutional InceptionV3 layers
 87 | for layer in base_model.layers:
 88 |     layer.trainable = False
 89 | 
 90 | # compile the model (should be done *after* setting layers to non-trainable)
 91 | model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'], )
 92 | 
 93 | # prepare data augmentation configuration
 94 | train_datagen = ImageDataGenerator(
 95 |     rescale=1. / 255,
 96 |     shear_range=0.2,
 97 |     zoom_range=0.2,
 98 |     horizontal_flip=True)
 99 | 
100 | test_datagen = ImageDataGenerator(rescale=1. / 255)
101 | 
102 | train_generator = train_datagen.flow_from_directory(
103 |     train_data_dir,
104 |     target_size=(img_height, img_width),
105 |     batch_size=batch_size,
106 |     class_mode='categorical')
107 | 
108 | validation_generator = test_datagen.flow_from_directory(
109 |     validation_data_dir,
110 |     target_size=(img_height, img_width),
111 |     batch_size=batch_size,
112 |     class_mode='categorical')
113 | 
114 | 
115 | #Save the model after every epoch.
116 | mc_top = ModelCheckpoint(top_layers_checkpoint_path, monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
117 | 
118 | #Save the TensorBoard logs.
119 | tb = TensorBoard(log_dir='./logs', histogram_freq=1, write_graph=True, write_images=True)
120 | 
121 | # train the model on the new data for a few epochs
122 | #model.fit_generator(...)
123 | 
124 | model.fit_generator(
125 |     train_generator,
126 |     samples_per_epoch=nb_train_samples // batch_size,
127 |     nb_epoch=top_epochs,
128 |     validation_data=validation_generator,
129 |     nb_val_samples=nb_validation_samples // batch_size,
130 |     callbacks=[mc_top, tb])
131 | 
132 | # at this point, the top layers are well trained and we can start fine-tuning
133 | # convolutional layers from inception V3. We will freeze the bottom N layers
134 | # and train the remaining top layers.
135 | 
136 | # let's visualize layer names and layer indices to see how many layers
137 | # we should freeze:
138 | for i, layer in enumerate(base_model.layers):
139 |    print(i, layer.name)
140 | 
141 | 
142 | #Save the model after every epoch.
143 | mc_fit = ModelCheckpoint(fine_tuned_checkpoint_path, monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
144 | 
145 | 
146 | if os.path.exists(fine_tuned_checkpoint_path):
147 | 	model.load_weights(fine_tuned_checkpoint_path)
148 | 	print ("Checkpoint '" + fine_tuned_checkpoint_path + "' loaded.")
149 | 
150 | # we chose to train the top 2 inception blocks, i.e. we will freeze
151 | # the first 172 layers and unfreeze the rest:
152 | for layer in model.layers[:172]:
153 |    layer.trainable = False
154 | for layer in model.layers[172:]:
155 |    layer.trainable = True
156 | 
157 | # we need to recompile the model for these modifications to take effect
158 | # we use SGD with a low learning rate
159 | from keras.optimizers import SGD
160 | model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])
161 | 
162 | # we train our model again (this time fine-tuning the top 2 inception blocks
163 | # alongside the top Dense layers
164 | #model.fit_generator(...)
165 | 
166 | model.fit_generator(
167 |     train_generator,
168 |     samples_per_epoch=nb_train_samples // batch_size,
169 |     nb_epoch=fit_epochs,
170 |     validation_data=validation_generator,
171 |     nb_val_samples=nb_validation_samples // batch_size,
172 |     callbacks=[mc_fit, tb])
173 | 
174 | model.save_weights(new_extended_inception_weights)
175 | 


--------------------------------------------------------------------------------