├── .gitignore
├── COMMANDS.md
├── README.md
├── caffe_to_tensorflow.py
├── checkpoints
    └── ssd_300_vgg.ckpt.zip
├── datasets
    ├── __init__.py
    ├── cifar10.py
    ├── dataset_factory.py
    ├── dataset_utils.py
    ├── imagenet.py
    ├── pascalvoc_2007.py
    ├── pascalvoc_2012.py
    ├── pascalvoc_common.py
    └── pascalvoc_to_tfrecords.py
├── demo
    ├── 000001.jpg
    ├── 000002.jpg
    ├── 000003.jpg
    ├── 000004.jpg
    ├── 000006.jpg
    ├── 000008.jpg
    ├── 000010.jpg
    ├── 000022.jpg
    ├── dog.jpg
    ├── eagle.jpg
    ├── horses.jpg
    ├── person.jpg
    └── street.jpg
├── deployment
    ├── __init__.py
    └── model_deploy.py
├── eval_ssd_network.py
├── inspect_checkpoint.py
├── nets
    ├── __init__.py
    ├── caffe_scope.py
    ├── custom_layers.py
    ├── inception.py
    ├── inception_resnet_v2.py
    ├── inception_v3.py
    ├── nets_factory.py
    ├── np_methods.py
    ├── ssd_common.py
    ├── ssd_vgg_300.py
    ├── ssd_vgg_512.py
    ├── vgg.py
    └── xception.py
├── notebooks
    ├── ssd_notebook.ipynb
    ├── ssd_tests.ipynb
    └── visualization.py
├── pictures
    ├── ex1.png
    └── ex2.png
├── preprocessing
    ├── __init__.py
    ├── inception_preprocessing.py
    ├── preprocessing_factory.py
    ├── ssd_vgg_preprocessing.py
    ├── tf_image.py
    └── vgg_preprocessing.py
├── tf_convert_data.py
├── tf_extended
    ├── __init__.py
    ├── bboxes.py
    ├── image.py
    ├── math.py
    ├── metrics.py
    └── tensors.py
├── tf_utils.py
└── train_ssd_network.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Directories.
 2 | __pycache__/
 3 | datasets/__pycache__/
 4 | deployment/__pycache__/
 5 | nets/__pycache__/
 6 | preprocessing/__pycache__/
 7 | 
 8 | .ipynb_checkpoints/
 9 | notebooks/.ipynb_checkpoints/
10 | 
11 | checkpoints/ssd_300_vgg.ckpt.data-00000-of-00001
12 | checkpoints/ssd_300_vgg.ckpt.index
13 | checkpoints/models/*
14 | checkpoints/VGG_VOC0712_SSD_*
15 | checkpoints/vgg_16.ckpt
16 | checkpoints/model.ckpt-*
17 | 
18 | logs/
19 | *.log
20 | nohup.out
21 | 
22 | ssd-tensorflow.sublime-workspace
23 | ssd-tensorflow.sublime-project
24 | 
25 | 


--------------------------------------------------------------------------------
/COMMANDS.md:
--------------------------------------------------------------------------------
  1 | # =========================================================================== #
  2 | # Dataset convert...
  3 | # =========================================================================== #
  4 | rm events* graph* model* checkpoint
  5 | mv events* graph* model* checkpoint ./log
  6 | 
  7 | DATASET_DIR=/media/paul/DataExt4/PascalVOC/rawdata/VOC2012/trainval/
  8 | OUTPUT_DIR=/media/paul/DataExt4/PascalVOC/dataset
  9 | python tf_convert_data.py \
 10 |     --dataset_name=pascalvoc \
 11 |     --dataset_dir=${DATASET_DIR} \
 12 |     --output_name=voc_2012_train \
 13 |     --output_dir=${OUTPUT_DIR}
 14 | 
 15 | CAFFE_MODEL=/media/paul/DataExt4/PascalVOC/training/ckpts/SSD_300x300_VOC0712/VGG_VOC0712_SSD_300x300_iter_120000.caffemodel
 16 | python caffe_to_tensorflow.py \
 17 |     --model_name=ssd_300_vgg \
 18 |     --num_classes=21 \
 19 |     --caffemodel_path=${CAFFE_MODEL}
 20 | 
 21 | # =========================================================================== #
 22 | # VGG-based SSD network
 23 | # =========================================================================== #
 24 | DATASET_DIR=/media/paul/DataExt4/PascalVOC/dataset
 25 | TRAIN_DIR=./logs/ssd_300_vgg_3
 26 | CHECKPOINT_PATH=./checkpoints/ssd_300_vgg.ckpt
 27 | python train_ssd_network.py \
 28 |     --train_dir=${TRAIN_DIR} \
 29 |     --dataset_dir=${DATASET_DIR} \
 30 |     --dataset_name=pascalvoc_2012 \
 31 |     --dataset_split_name=train \
 32 |     --model_name=ssd_300_vgg \
 33 |     --checkpoint_path=${CHECKPOINT_PATH} \
 34 |     --save_summaries_secs=60 \
 35 |     --save_interval_secs=600 \
 36 |     --weight_decay=0.0005 \
 37 |     --optimizer=adam \
 38 |     --learning_rate=0.001 \
 39 |     --learning_rate_decay_factor=0.95 \
 40 |     --batch_size=32
 41 | 
 42 | DATASET_DIR=/media/paul/DataExt4/PascalVOC/dataset
 43 | TRAIN_DIR=./logs/ssd_300_vgg_3
 44 | EVAL_DIR=${TRAIN_DIR}/eval
 45 | python eval_ssd_network.py \
 46 |     --eval_dir=${EVAL_DIR} \
 47 |     --dataset_dir=${DATASET_DIR} \
 48 |     --dataset_name=pascalvoc_2007 \
 49 |     --dataset_split_name=test \
 50 |     --model_name=ssd_300_vgg \
 51 |     --checkpoint_path=${TRAIN_DIR} \
 52 |     --wait_for_checkpoints=True \
 53 |     --batch_size=1 \
 54 |     --max_num_batches=500
 55 | 
 56 | 
 57 | DATASET_DIR=/media/paul/DataExt4/PascalVOC/dataset
 58 | EVAL_DIR=./logs/ssd_300_vgg_1_eval
 59 | CHECKPOINT_PATH=./checkpoints/ssd_300_vgg.ckpt
 60 | CHECKPOINT_PATH=./checkpoints/VGG_VOC0712_SSD_300x300_iter_120000.ckpt
 61 | CHECKPOINT_PATH=./checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt
 62 | python eval_ssd_network.py \
 63 |     --eval_dir=${EVAL_DIR} \
 64 |     --dataset_dir=${DATASET_DIR} \
 65 |     --dataset_name=pascalvoc_2007 \
 66 |     --dataset_split_name=test \
 67 |     --model_name=ssd_300_vgg \
 68 |     --checkpoint_path=${CHECKPOINT_PATH} \
 69 |     --batch_size=1 \
 70 |     --max_num_batches=10
 71 | 
 72 | 
 73 | DATASET_DIR=/media/paul/DataExt4/PascalVOC/dataset
 74 | EVAL_DIR=./logs/ssd_300_vgg_1_eval
 75 | CHECKPOINT_PATH=./checkpoints/VGG_VOC0712_SSD_512x512_ft_iter_120000.ckpt
 76 | python eval_ssd_network.py \
 77 |     --eval_dir=${EVAL_DIR} \
 78 |     --dataset_dir=${DATASET_DIR} \
 79 |     --dataset_name=pascalvoc_2007 \
 80 |     --dataset_split_name=test \
 81 |     --model_name=ssd_512_vgg \
 82 |     --checkpoint_path=${CHECKPOINT_PATH} \
 83 |     --batch_size=1 \
 84 |     --max_num_batches=10
 85 | 
 86 | # =========================================================================== #
 87 | # Fine tune VGG-based SSD network
 88 | # =========================================================================== #
 89 | DATASET_DIR=/media/paul/DataExt4/PascalVOC/dataset
 90 | TRAIN_DIR=/media/paul/DataExt4/PascalVOC/training/logs/ssd_300_vgg_6
 91 | CHECKPOINT_PATH=./checkpoints/vgg_16.ckpt
 92 | python train_ssd_network.py \
 93 |     --train_dir=${TRAIN_DIR} \
 94 |     --dataset_dir=${DATASET_DIR} \
 95 |     --dataset_name=pascalvoc_2012 \
 96 |     --dataset_split_name=train \
 97 |     --model_name=ssd_300_vgg \
 98 |     --checkpoint_path=${CHECKPOINT_PATH} \
 99 |     --checkpoint_model_scope=vgg_16 \
100 |     --checkpoint_exclude_scopes=ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box \
101 |     --save_summaries_secs=60 \
102 |     --save_interval_secs=600 \
103 |     --weight_decay=0.0005 \
104 |     --optimizer=adam \
105 |     --learning_rate=0.001 \
106 |     --learning_rate_decay_factor=0.94 \
107 |     --batch_size=32
108 | 
109 | DATASET_DIR=/media/paul/DataExt4/PascalVOC/dataset
110 | TRAIN_DIR=/media/paul/DataExt4/PascalVOC/training/logs/ssd_300_vgg_13
111 | CHECKPOINT_PATH=./checkpoints/vgg_16.ckpt
112 | python train_ssd_network.py \
113 |     --train_dir=${TRAIN_DIR} \
114 |     --dataset_dir=${DATASET_DIR} \
115 |     --dataset_name=pascalvoc_2012 \
116 |     --dataset_split_name=train \
117 |     --model_name=ssd_300_vgg \
118 |     --checkpoint_path=${CHECKPOINT_PATH} \
119 |     --checkpoint_model_scope=vgg_16 \
120 |     --checkpoint_exclude_scopes=ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box \
121 |     --trainable_scopes=ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box \
122 |     --save_summaries_secs=60 \
123 |     --save_interval_secs=600 \
124 |     --weight_decay=0.0005 \
125 |     --optimizer=adam \
126 |     --learning_rate=0.001 \
127 |     --learning_rate_decay_factor=0.94 \
128 |     --batch_size=32
129 | 
130 | DATASET_DIR=/media/paul/DataExt4/PascalVOC/dataset
131 | TRAIN_DIR=/media/paul/DataExt4/PascalVOC/training/logs/ssd_300_vgg_2
132 | CHECKPOINT_PATH=./checkpoints/vgg_16.ckpt
133 | CHECKPOINT_PATH=media/paul/DataExt4/PascalVOC/training/logs/ssd_300_vgg_1/
134 | python train_ssd_network.py \
135 |     --train_dir=${TRAIN_DIR} \
136 |     --dataset_dir=${DATASET_DIR} \
137 |     --dataset_name=pascalvoc_2012 \
138 |     --dataset_split_name=train \
139 |     --model_name=ssd_300_vgg \
140 |     --checkpoint_path=${CHECKPOINT_PATH} \
141 |     --save_summaries_secs=60 \
142 |     --save_interval_secs=600 \
143 |     --weight_decay=0.0005 \
144 |     --optimizer=adam \
145 |     --learning_rate=0.0005 \
146 |     --learning_rate_decay_factor=0.96 \
147 |     --batch_size=32
148 | 
149 | EVAL_DIR=${TRAIN_DIR}/eval
150 | python eval_ssd_network.py \
151 |     --eval_dir=${EVAL_DIR} \
152 |     --dataset_dir=${DATASET_DIR} \
153 |     --dataset_name=pascalvoc_2007 \
154 |     --dataset_split_name=test \
155 |     --model_name=ssd_300_vgg \
156 |     --checkpoint_path=${TRAIN_DIR} \
157 |     --wait_for_checkpoints=True \
158 |     --batch_size=1
159 | 
160 | 
161 | # =========================================================================== #
162 | # Inception v3
163 | # =========================================================================== #
164 | DATASET_DIR=/media/paul/DataExt4/ImageNet/Dataset
165 | DATASET_DIR=../datasets/ImageNet
166 | TRAIN_DIR=./logs/inception_v3
167 | CHECKPOINT_PATH=/media/paul/DataExt4/ImageNet/Training/ckpts/inception_v3.ckpt
168 | CHECKPOINT_PATH=./checkpoints/inception_v3.ckpt
169 | python train_image_classifier.py \
170 |     --train_dir=${TRAIN_DIR} \
171 |     --dataset_dir=${DATASET_DIR} \
172 |     --dataset_name=imagenet \
173 |     --dataset_split_name=train \
174 |     --model_name=inception_v3 \
175 |     --checkpoint_path=${CHECKPOINT_PATH} \
176 |     --save_summaries_secs=60 \
177 |     --save_interval_secs=60 \
178 |     --weight_decay=0.00001 \
179 |     --optimizer=rmsprop \
180 |     --learning_rate=0.00005 \
181 |     --batch_size=4
182 | 
183 | 
184 | CHECKPOINT_PATH=/media/paul/DataExt4/ImageNet/Training/logs
185 | CHECKPOINT_PATH=/media/paul/DataExt4/ImageNet/Training/ckpts/inception_v3.ckpt
186 | DATASET_DIR=/media/paul/DataExt4/ImageNet/Dataset
187 | python eval_image_classifier.py \
188 |     --alsologtostderr \
189 |     --checkpoint_path=${CHECKPOINT_PATH} \
190 |     --dataset_dir=${DATASET_DIR} \
191 |     --dataset_name=imagenet \
192 |     --dataset_split_name=validation \
193 |     --model_name=inception_v3
194 | 
195 | 
196 | # =========================================================================== #
197 | # VGG 16 and 19
198 | # =========================================================================== #
199 | CHECKPOINT_PATH=/media/paul/DataExt4/ImageNet/Training/ckpts/vgg_19.ckpt
200 | DATASET_DIR=/media/paul/DataExt4/ImageNet/Dataset
201 | python eval_image_classifier.py \
202 |     --alsologtostderr \
203 |     --checkpoint_path=${CHECKPOINT_PATH} \
204 |     --dataset_dir=${DATASET_DIR} \
205 |     --dataset_name=imagenet \
206 |     --labels_offset=1 \
207 |     --dataset_split_name=validation \
208 |     --model_name=vgg_19
209 | 
210 | 
211 | CHECKPOINT_PATH=/media/paul/DataExt4/ImageNet/Training/ckpts/vgg_16.ckpt
212 | DATASET_DIR=/media/paul/DataExt4/ImageNet/Dataset
213 | python eval_image_classifier.py \
214 |     --alsologtostderr \
215 |     --checkpoint_path=${CHECKPOINT_PATH} \
216 |     --dataset_dir=${DATASET_DIR} \
217 |     --dataset_name=imagenet \
218 |     --labels_offset=1 \
219 |     --dataset_split_name=validation \
220 |     --model_name=vgg_16
221 | 
222 | 
223 | # =========================================================================== #
224 | # Xception
225 | # =========================================================================== #
226 | DATASET_DIR=../datasets/ImageNet
227 | DATASET_DIR=/media/paul/DataExt4/ImageNet/Dataset
228 | TRAIN_DIR=./logs/xception
229 | CHECKPOINT_PATH=./checkpoints/xception_weights_tf_dim_ordering_tf_kernels.ckpt
230 | 
231 | python train_image_classifier.py \
232 |     --train_dir=${TRAIN_DIR} \
233 |     --dataset_dir=${DATASET_DIR} \
234 |     --dataset_name=imagenet \
235 |     --dataset_split_name=train \
236 |     --model_name=xception \
237 |     --labels_offset=1 \
238 |     --checkpoint_path=${CHECKPOINT_PATH} \
239 |     --save_summaries_secs=600 \
240 |     --save_interval_secs=600 \
241 |     --weight_decay=0.00001 \
242 |     --optimizer=rmsprop \
243 |     --learning_rate=0.0001 \
244 |     --batch_size=32
245 | 
246 | python train_image_classifier.py \
247 |     --train_dir=${TRAIN_DIR} \
248 |     --dataset_dir=${DATASET_DIR} \
249 |     --dataset_name=imagenet \
250 |     --dataset_split_name=train \
251 |     --model_name=xception \
252 |     --labels_offset=1 \
253 |     --save_summaries_secs=60 \
254 |     --save_interval_secs=60 \
255 |     --weight_decay=0.00001 \
256 |     --optimizer=rmsprop \
257 |     --learning_rate=0.00005 \
258 |     --batch_size=1
259 | 
260 | 
261 | CHECKPOINT_PATH=./checkpoints/xception_weights_tf_dim_ordering_tf_kernels.ckpt
262 | CHECKPOINT_PATH=./logs/xception
263 | CHECKPOINT_PATH=./checkpoints/xception_weights_tf_dim_ordering_tf_kernels.ckpt
264 | DATASET_DIR=../datasets/ImageNet
265 | DATASET_DIR=/media/paul/DataExt4/ImageNet/Dataset
266 | python eval_image_classifier.py \
267 |     --alsologtostderr \
268 |     --checkpoint_path=${CHECKPOINT_PATH} \
269 |     --dataset_dir=${DATASET_DIR} \
270 |     --labels_offset=1 \
271 |     --dataset_name=imagenet \
272 |     --dataset_split_name=validation \
273 |     --model_name=xception \
274 |     --max_num_batches=10
275 | 
276 | 
277 | CHECKPOINT_PATH=./checkpoints/xception_weights_tf_dim_ordering_tf_kernels.h5
278 | python ckpt_keras_to_tensorflow.py \
279 |     --model_name=xception_keras \
280 |     --num_classes=1000 \
281 |     --checkpoint_path=${CHECKPOINT_PATH}
282 | 
283 | 
284 | # =========================================================================== #
285 | # Dception
286 | # =========================================================================== #
287 | DATASET_DIR=../datasets/ImageNet
288 | DATASET_DIR=/media/paul/DataExt4/ImageNet/Dataset
289 | TRAIN_DIR=./logs/dception
290 | CHECKPOINT_PATH=./checkpoints/xception_weights_tf_dim_ordering_tf_kernels.ckpt
291 | 
292 | python train_image_classifier.py \
293 |     --train_dir=${TRAIN_DIR} \
294 |     --dataset_dir=${DATASET_DIR} \
295 |     --dataset_name=imagenet \
296 |     --dataset_split_name=train \
297 |     --model_name=dception \
298 |     --labels_offset=1 \
299 |     --checkpoint_path=${CHECKPOINT_PATH} \
300 |     --save_summaries_secs=60 \
301 |     --save_interval_secs=60 \
302 |     --weight_decay=0.00001 \
303 |     --optimizer=rmsprop \
304 |     --learning_rate=0.00005 \
305 |     --batch_size=32
306 | 
307 | python train_image_classifier.py \
308 |     --train_dir=${TRAIN_DIR} \
309 |     --dataset_dir=${DATASET_DIR} \
310 |     --dataset_name=imagenet \
311 |     --dataset_split_name=train \
312 |     --model_name=dception \
313 |     --labels_offset=1 \
314 |     --save_summaries_secs=60 \
315 |     --save_interval_secs=60 \
316 |     --weight_decay=0.00001 \
317 |     --optimizer=rmsprop \
318 |     --learning_rate=0.00005 \
319 |     --batch_size=1
320 | 
321 | 
322 | CHECKPOINT_PATH=./checkpoints/xception_weights_tf_dim_ordering_tf_kernels.ckpt
323 | CHECKPOINT_PATH=./logs/dception
324 | DATASET_DIR=../datasets/ImageNet
325 | DATASET_DIR=/media/paul/DataExt4/ImageNet/Dataset
326 | python eval_image_classifier.py \
327 |     --alsologtostderr \
328 |     --checkpoint_path=${CHECKPOINT_PATH} \
329 |     --dataset_dir=${DATASET_DIR} \
330 |     --labels_offset=1 \
331 |     --dataset_name=imagenet \
332 |     --dataset_split_name=validation \
333 |     --model_name=dception
334 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # SSD: Single Shot MultiBox Detector in TensorFlow
  2 | 
  3 | SSD is an unified framework for object detection with a single network. It has been originally introduced in this research [article](http://arxiv.org/abs/1512.02325).
  4 | 
  5 | This repository contains a TensorFlow re-implementation of the original [Caffe code](https://github.com/weiliu89/caffe/tree/ssd). At present, it only implements VGG-based SSD networks (with 300 and 512 inputs), but the architecture of the project is modular, and should make easy the implementation and training of other SSD variants (ResNet or Inception based for instance). Present TF checkpoints have been directly converted from SSD Caffe models.
  6 | 
  7 | The organisation is inspired by the TF-Slim models repository containing the implementation of popular architectures (ResNet, Inception and VGG). Hence, it is separated in three main parts:
  8 | * datasets: interface to popular datasets (Pascal VOC, COCO, ...) and scripts to convert the former to TF-Records;
  9 | * networks: definition of SSD networks, and common encoding and decoding methods (we refer to the paper on this precise topic);
 10 | * pre-processing: pre-processing and data augmentation routines, inspired by original VGG and Inception implementations.
 11 | 
 12 | ## SSD minimal example
 13 | 
 14 | The [SSD Notebook](notebooks/ssd_notebook.ipynb) contains a minimal example of the SSD TensorFlow pipeline. Shortly, the detection is made of two main steps: running the SSD network on the image and post-processing the output using common algorithms (top-k filtering and Non-Maximum Suppression algorithm).
 15 | 
 16 | Here are two examples of successful detection outputs:
 17 | ![](pictures/ex1.png "SSD anchors")
 18 | ![](pictures/ex2.png "SSD anchors")
 19 | 
 20 | To run the notebook you first have to unzip the checkpoint files in ./checkpoint
 21 | ```bash
 22 | unzip ssd_300_vgg.ckpt.zip
 23 | ```
 24 | and then start a jupyter notebook with
 25 | ```bash
 26 | jupyter notebook notebooks/ssd_notebook.ipynb
 27 | ```
 28 | 
 29 | 
 30 | ## Datasets
 31 | 
 32 | The current version only supports Pascal VOC datasets (2007 and 2012). In order to be used for training a SSD model, the former need to be converted to TF-Records using the `tf_convert_data.py` script:
 33 | ```bash
 34 | DATASET_DIR=./VOC2007/test/
 35 | OUTPUT_DIR=./tfrecords
 36 | python tf_convert_data.py \
 37 |     --dataset_name=pascalvoc \
 38 |     --dataset_dir=${DATASET_DIR} \
 39 |     --output_name=voc_2007_train \
 40 |     --output_dir=${OUTPUT_DIR}
 41 | ```
 42 | Note the previous command generated a collection of TF-Records instead of a single file in order to ease shuffling during training.
 43 | 
 44 | ## Evaluation on Pascal VOC 2007
 45 | 
 46 | The present TensorFlow implementation of SSD models have the following performances:
 47 | 
 48 | | Model | Training data  | Testing data | mAP | FPS  |
 49 | |--------|:---------:|:------:|:------:|:------:|
 50 | | [SSD-300 VGG-based](https://drive.google.com/open?id=0B0qPCUZ-3YwWZlJaRTRRQWRFYXM) | VOC07+12 trainval | VOC07 test | 0.778 | - |
 51 | | [SSD-300 VGG-based](https://drive.google.com/file/d/0B0qPCUZ-3YwWUXh4UHJrd1RDM3c/view?usp=sharing) | VOC07+12+COCO trainval | VOC07 test | 0.817 | - |
 52 | | [SSD-512 VGG-based](https://drive.google.com/open?id=0B0qPCUZ-3YwWT1RCLVZNN3RTVEU) | VOC07+12+COCO trainval | VOC07 test | 0.837 | - |
 53 | 
 54 | We are working hard at reproducing the same performance as the original [Caffe implementation](https://github.com/weiliu89/caffe/tree/ssd)!
 55 | 
 56 | After downloading and extracting the previous checkpoints, the evaluation metrics should be reproducible by running the following command:
 57 | ```bash
 58 | EVAL_DIR=./logs/
 59 | CHECKPOINT_PATH=./checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt
 60 | python eval_ssd_network.py \
 61 |     --eval_dir=${EVAL_DIR} \
 62 |     --dataset_dir=${DATASET_DIR} \
 63 |     --dataset_name=pascalvoc_2007 \
 64 |     --dataset_split_name=test \
 65 |     --model_name=ssd_300_vgg \
 66 |     --checkpoint_path=${CHECKPOINT_PATH} \
 67 |     --batch_size=1
 68 | ```
 69 | The evaluation script provides estimates on the recall-precision curve and compute the mAP metrics following the Pascal VOC 2007 and 2012 guidelines.
 70 | 
 71 | In addition, if one wants to experiment/test a different Caffe SSD checkpoint, the former can be converted to TensorFlow checkpoints as following:
 72 | ```sh
 73 | CAFFE_MODEL=./ckpts/SSD_300x300_ft_VOC0712/VGG_VOC0712_SSD_300x300_ft_iter_120000.caffemodel
 74 | python caffe_to_tensorflow.py \
 75 |     --model_name=ssd_300_vgg \
 76 |     --num_classes=21 \
 77 |     --caffemodel_path=${CAFFE_MODEL}
 78 | ```
 79 | 
 80 | ## Training
 81 | 
 82 | The script `train_ssd_network.py` is in charged of training the network. Similarly to TF-Slim models, one can pass numerous options to the training process (dataset, optimiser, hyper-parameters, model, ...). In particular, it is possible to provide a checkpoint file which can be use as starting point in order to fine-tune a network.
 83 | 
 84 | ### Fine-tuning existing SSD checkpoints
 85 | 
 86 | The easiest way to fine the SSD model is to use as pre-trained SSD network (VGG-300 or VGG-512). For instance, one can fine a model starting from the former as following:
 87 | ```bash
 88 | DATASET_DIR=./tfrecords
 89 | TRAIN_DIR=./logs/
 90 | CHECKPOINT_PATH=./checkpoints/ssd_300_vgg.ckpt
 91 | python train_ssd_network.py \
 92 |     --train_dir=${TRAIN_DIR} \
 93 |     --dataset_dir=${DATASET_DIR} \
 94 |     --dataset_name=pascalvoc_2012 \
 95 |     --dataset_split_name=train \
 96 |     --model_name=ssd_300_vgg \
 97 |     --checkpoint_path=${CHECKPOINT_PATH} \
 98 |     --save_summaries_secs=60 \
 99 |     --save_interval_secs=600 \
100 |     --weight_decay=0.0005 \
101 |     --optimizer=adam \
102 |     --learning_rate=0.001 \
103 |     --batch_size=32
104 | ```
105 | Note that in addition to the training script flags, one may also want to experiment with data augmentation parameters (random cropping, resolution, ...) in `ssd_vgg_preprocessing.py` or/and network parameters (feature layers, anchors boxes, ...) in `ssd_vgg_300/512.py`
106 | 
107 | Furthermore, the training script can be combined with the evaluation routine in order to monitor the performance of saved checkpoints on a validation dataset. For that purpose, one can pass to training and validation scripts a GPU memory upper limit such that both can run in parallel on the same device. If some GPU memory is available for the evaluation script, the former can be run in parallel as follows:
108 | ```bash
109 | EVAL_DIR=${TRAIN_DIR}/eval
110 | python eval_ssd_network.py \
111 |     --eval_dir=${EVAL_DIR} \
112 |     --dataset_dir=${DATASET_DIR} \
113 |     --dataset_name=pascalvoc_2007 \
114 |     --dataset_split_name=test \
115 |     --model_name=ssd_300_vgg \
116 |     --checkpoint_path=${TRAIN_DIR} \
117 |     --wait_for_checkpoints=True \
118 |     --batch_size=1 \
119 |     --max_num_batches=500
120 | ```
121 | 
122 | ### Fine-tuning a network trained on ImageNet
123 | 
124 | One can also try to build a new SSD model based on standard architecture (VGG, ResNet, Inception, ...) and set up on top of it the `multibox` layers (with specific anchors, ratios, ...). For that purpose, you can fine-tune a network by only loading the weights of the original architecture, and initialize randomly the rest of network. For instance, in the case of the [VGG-16 architecture](http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz), one can train a new model as following:
125 | ```bash
126 | DATASET_DIR=./tfrecords
127 | TRAIN_DIR=./log/
128 | CHECKPOINT_PATH=./checkpoints/vgg_16.ckpt
129 | python train_ssd_network.py \
130 |     --train_dir=${TRAIN_DIR} \
131 |     --dataset_dir=${DATASET_DIR} \
132 |     --dataset_name=pascalvoc_2007 \
133 |     --dataset_split_name=train \
134 |     --model_name=ssd_300_vgg \
135 |     --checkpoint_path=${CHECKPOINT_PATH} \
136 |     --checkpoint_model_scope=vgg_16 \
137 |     --checkpoint_exclude_scopes=ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box \
138 |     --trainable_scopes=ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box \
139 |     --save_summaries_secs=60 \
140 |     --save_interval_secs=600 \
141 |     --weight_decay=0.0005 \
142 |     --optimizer=adam \
143 |     --learning_rate=0.001 \
144 |     --learning_rate_decay_factor=0.94 \
145 |     --batch_size=32
146 | ```
147 | Hence, in the former command, the training script randomly initializes the weights belonging to the `checkpoint_exclude_scopes` and load from the checkpoint file `vgg_16.ckpt` the remaining part of the network. Note that we also specify with the `trainable_scopes` parameter to first only train the new SSD components and left the rest of VGG network unchanged. Once the network has converged to a good first result (~0.5 mAP for instance), you can fine-tuned the complete network as following:
148 | ```bash
149 | DATASET_DIR=./tfrecords
150 | TRAIN_DIR=./log_finetune/
151 | CHECKPOINT_PATH=./log/model.ckpt-N
152 | python train_ssd_network.py \
153 |     --train_dir=${TRAIN_DIR} \
154 |     --dataset_dir=${DATASET_DIR} \
155 |     --dataset_name=pascalvoc_2007 \
156 |     --dataset_split_name=train \
157 |     --model_name=ssd_300_vgg \
158 |     --checkpoint_path=${CHECKPOINT_PATH} \
159 |     --checkpoint_model_scope=vgg_16 \
160 |     --save_summaries_secs=60 \
161 |     --save_interval_secs=600 \
162 |     --weight_decay=0.0005 \
163 |     --optimizer=adam \
164 |     --learning_rate=0.00001 \
165 |     --learning_rate_decay_factor=0.94 \
166 |     --batch_size=32
167 | ```
168 | 
169 | A number of pre-trained weights of popular deep architectures can be found on [TF-Slim models page](https://github.com/tensorflow/models/tree/master/slim).
170 | 


--------------------------------------------------------------------------------
/caffe_to_tensorflow.py:
--------------------------------------------------------------------------------
 1 | """Convert a Caffe model file to TensorFlow checkpoint format.
 2 | 
 3 | Assume that the network built is a equivalent (or a sub-) to the Caffe
 4 | definition.
 5 | """
 6 | import tensorflow as tf
 7 | 
 8 | from nets import caffe_scope
 9 | from nets import nets_factory
10 | 
11 | slim = tf.contrib.slim
12 | 
13 | # =========================================================================== #
14 | # Main flags.
15 | # =========================================================================== #
16 | tf.app.flags.DEFINE_string(
17 |     'model_name', 'ssd_300_vgg', 'Name of the model to convert.')
18 | tf.app.flags.DEFINE_string(
19 |     'num_classes', 21, 'Number of classes in the dataset.')
20 | tf.app.flags.DEFINE_string(
21 |     'caffemodel_path', None,
22 |     'The path to the Caffe model file to convert.')
23 | 
24 | FLAGS = tf.app.flags.FLAGS
25 | 
26 | 
27 | # =========================================================================== #
28 | # Main converting routine.
29 | # =========================================================================== #
30 | def main(_):
31 |     # Caffe scope...
32 |     caffemodel = caffe_scope.CaffeScope()
33 |     caffemodel.load(FLAGS.caffemodel_path)
34 | 
35 |     tf.logging.set_verbosity(tf.logging.INFO)
36 |     with tf.Graph().as_default():
37 |         global_step = slim.create_global_step()
38 |         num_classes = int(FLAGS.num_classes)
39 | 
40 |         # Select the network.
41 |         ssd_class = nets_factory.get_network(FLAGS.model_name)
42 |         ssd_params = ssd_class.default_params._replace(num_classes=num_classes)
43 |         ssd_net = ssd_class(ssd_params)
44 |         ssd_shape = ssd_net.params.img_shape
45 | 
46 |         # Image placeholder and model.
47 |         shape = (1, ssd_shape[0], ssd_shape[1], 3)
48 |         img_input = tf.placeholder(shape=shape, dtype=tf.float32)
49 |         # Create model.
50 |         with slim.arg_scope(ssd_net.arg_scope_caffe(caffemodel)):
51 |             ssd_net.net(img_input, is_training=False)
52 | 
53 |         init_op = tf.global_variables_initializer()
54 |         with tf.Session() as session:
55 |             # Run the init operation.
56 |             session.run(init_op)
57 | 
58 |             # Save model in checkpoint.
59 |             saver = tf.train.Saver()
60 |             ckpt_path = FLAGS.caffemodel_path.replace('.caffemodel', '.ckpt')
61 |             saver.save(session, ckpt_path, write_meta_graph=False)
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     tf.app.run()
66 | 
67 | 


--------------------------------------------------------------------------------
/checkpoints/ssd_300_vgg.ckpt.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/checkpoints/ssd_300_vgg.ckpt.zip


--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/datasets/cifar10.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Provides data for the Cifar10 dataset.
16 | 
17 | The dataset scripts used to create the dataset can be found at:
18 | tensorflow/models/slim/data/create_cifar10_dataset.py
19 | """
20 | 
21 | from __future__ import absolute_import
22 | from __future__ import division
23 | from __future__ import print_function
24 | 
25 | import os
26 | import tensorflow as tf
27 | 
28 | from datasets import dataset_utils
29 | 
30 | slim = tf.contrib.slim
31 | 
32 | _FILE_PATTERN = 'cifar10_%s.tfrecord'
33 | 
34 | SPLITS_TO_SIZES = {'train': 50000, 'test': 10000}
35 | 
36 | _NUM_CLASSES = 10
37 | 
38 | _ITEMS_TO_DESCRIPTIONS = {
39 |     'image': 'A [32 x 32 x 3] color image.',
40 |     'label': 'A single integer between 0 and 9',
41 | }
42 | 
43 | 
44 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
45 |   """Gets a dataset tuple with instructions for reading cifar10.
46 | 
47 |   Args:
48 |     split_name: A train/test split name.
49 |     dataset_dir: The base directory of the dataset sources.
50 |     file_pattern: The file pattern to use when matching the dataset sources.
51 |       It is assumed that the pattern contains a '%s' string so that the split
52 |       name can be inserted.
53 |     reader: The TensorFlow reader type.
54 | 
55 |   Returns:
56 |     A `Dataset` namedtuple.
57 | 
58 |   Raises:
59 |     ValueError: if `split_name` is not a valid train/test split.
60 |   """
61 |   if split_name not in SPLITS_TO_SIZES:
62 |     raise ValueError('split name %s was not recognized.' % split_name)
63 | 
64 |   if not file_pattern:
65 |     file_pattern = _FILE_PATTERN
66 |   file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
67 | 
68 |   # Allowing None in the signature so that dataset_factory can use the default.
69 |   if not reader:
70 |     reader = tf.TFRecordReader
71 | 
72 |   keys_to_features = {
73 |       'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
74 |       'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),
75 |       'image/class/label': tf.FixedLenFeature(
76 |           [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
77 |   }
78 | 
79 |   items_to_handlers = {
80 |       'image': slim.tfexample_decoder.Image(shape=[32, 32, 3]),
81 |       'label': slim.tfexample_decoder.Tensor('image/class/label'),
82 |   }
83 | 
84 |   decoder = slim.tfexample_decoder.TFExampleDecoder(
85 |       keys_to_features, items_to_handlers)
86 | 
87 |   labels_to_names = None
88 |   if dataset_utils.has_labels(dataset_dir):
89 |     labels_to_names = dataset_utils.read_label_file(dataset_dir)
90 | 
91 |   return slim.dataset.Dataset(
92 |       data_sources=file_pattern,
93 |       reader=reader,
94 |       decoder=decoder,
95 |       num_samples=SPLITS_TO_SIZES[split_name],
96 |       items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
97 |       num_classes=_NUM_CLASSES,
98 |       labels_to_names=labels_to_names)
99 | 


--------------------------------------------------------------------------------
/datasets/dataset_factory.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """A factory-pattern class which returns classification image/label pairs."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | from datasets import cifar10
22 | from datasets import imagenet
23 | 
24 | from datasets import pascalvoc_2007
25 | from datasets import pascalvoc_2012
26 | 
27 | datasets_map = {
28 |     'cifar10': cifar10,
29 |     'imagenet': imagenet,
30 |     'pascalvoc_2007': pascalvoc_2007,
31 |     'pascalvoc_2012': pascalvoc_2012,
32 | }
33 | 
34 | 
35 | def get_dataset(name, split_name, dataset_dir, file_pattern=None, reader=None):
36 |     """Given a dataset name and a split_name returns a Dataset.
37 | 
38 |     Args:
39 |         name: String, the name of the dataset.
40 |         split_name: A train/test split name.
41 |         dataset_dir: The directory where the dataset files are stored.
42 |         file_pattern: The file pattern to use for matching the dataset source files.
43 |         reader: The subclass of tf.ReaderBase. If left as `None`, then the default
44 |             reader defined by each dataset is used.
45 |     Returns:
46 |         A `Dataset` class.
47 |     Raises:
48 |         ValueError: If the dataset `name` is unknown.
49 |     """
50 |     if name not in datasets_map:
51 |         raise ValueError('Name of dataset unknown %s' % name)
52 |     return datasets_map[name].get_split(split_name,
53 |                                         dataset_dir,
54 |                                         file_pattern,
55 |                                         reader)
56 | 


--------------------------------------------------------------------------------
/datasets/dataset_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains utilities for downloading and converting datasets."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import os
 21 | import sys
 22 | import tarfile
 23 | 
 24 | from six.moves import urllib
 25 | import tensorflow as tf
 26 | 
 27 | LABELS_FILENAME = 'labels.txt'
 28 | 
 29 | 
 30 | def int64_feature(value):
 31 |     """Wrapper for inserting int64 features into Example proto.
 32 |     """
 33 |     if not isinstance(value, list):
 34 |         value = [value]
 35 |     return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
 36 | 
 37 | 
 38 | def float_feature(value):
 39 |     """Wrapper for inserting float features into Example proto.
 40 |     """
 41 |     if not isinstance(value, list):
 42 |         value = [value]
 43 |     return tf.train.Feature(float_list=tf.train.FloatList(value=value))
 44 | 
 45 | 
 46 | def bytes_feature(value):
 47 |     """Wrapper for inserting bytes features into Example proto.
 48 |     """
 49 |     if not isinstance(value, list):
 50 |         value = [value]
 51 |     return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
 52 | 
 53 | 
 54 | def image_to_tfexample(image_data, image_format, height, width, class_id):
 55 |     return tf.train.Example(features=tf.train.Features(feature={
 56 |       'image/encoded': bytes_feature(image_data),
 57 |       'image/format': bytes_feature(image_format),
 58 |       'image/class/label': int64_feature(class_id),
 59 |       'image/height': int64_feature(height),
 60 |       'image/width': int64_feature(width),
 61 |     }))
 62 | 
 63 | 
 64 | def download_and_uncompress_tarball(tarball_url, dataset_dir):
 65 |     """Downloads the `tarball_url` and uncompresses it locally.
 66 | 
 67 |     Args:
 68 |     tarball_url: The URL of a tarball file.
 69 |     dataset_dir: The directory where the temporary files are stored.
 70 |     """
 71 |     filename = tarball_url.split('/')[-1]
 72 |     filepath = os.path.join(dataset_dir, filename)
 73 | 
 74 |     def _progress(count, block_size, total_size):
 75 |         sys.stdout.write('\r>> Downloading %s %.1f%%' % (
 76 |             filename, float(count * block_size) / float(total_size) * 100.0))
 77 |         sys.stdout.flush()
 78 |     filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress)
 79 |     print()
 80 |     statinfo = os.stat(filepath)
 81 |     print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
 82 |     tarfile.open(filepath, 'r:gz').extractall(dataset_dir)
 83 | 
 84 | 
 85 | def write_label_file(labels_to_class_names, dataset_dir,
 86 |                      filename=LABELS_FILENAME):
 87 |     """Writes a file with the list of class names.
 88 | 
 89 |     Args:
 90 |     labels_to_class_names: A map of (integer) labels to class names.
 91 |     dataset_dir: The directory in which the labels file should be written.
 92 |     filename: The filename where the class names are written.
 93 |     """
 94 |     labels_filename = os.path.join(dataset_dir, filename)
 95 |     with tf.gfile.Open(labels_filename, 'w') as f:
 96 |         for label in labels_to_class_names:
 97 |             class_name = labels_to_class_names[label]
 98 |             f.write('%d:%s\n' % (label, class_name))
 99 | 
100 | 
101 | def has_labels(dataset_dir, filename=LABELS_FILENAME):
102 |     """Specifies whether or not the dataset directory contains a label map file.
103 | 
104 |     Args:
105 |     dataset_dir: The directory in which the labels file is found.
106 |     filename: The filename where the class names are written.
107 | 
108 |     Returns:
109 |     `True` if the labels file exists and `False` otherwise.
110 |     """
111 |     return tf.gfile.Exists(os.path.join(dataset_dir, filename))
112 | 
113 | 
114 | def read_label_file(dataset_dir, filename=LABELS_FILENAME):
115 |     """Reads the labels file and returns a mapping from ID to class name.
116 | 
117 |     Args:
118 |     dataset_dir: The directory in which the labels file is found.
119 |     filename: The filename where the class names are written.
120 | 
121 |     Returns:
122 |     A map from a label (integer) to class name.
123 |     """
124 |     labels_filename = os.path.join(dataset_dir, filename)
125 |     with tf.gfile.Open(labels_filename, 'rb') as f:
126 |         lines = f.read()
127 |     lines = lines.split(b'\n')
128 |     lines = filter(None, lines)
129 | 
130 |     labels_to_class_names = {}
131 |     for line in lines:
132 |         index = line.index(b':')
133 |         labels_to_class_names[int(line[:index])] = line[index+1:]
134 |     return labels_to_class_names
135 | 


--------------------------------------------------------------------------------
/datasets/imagenet.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Provides data for the ImageNet ILSVRC 2012 Dataset plus some bounding boxes.
 16 | 
 17 | Some images have one or more bounding boxes associated with the label of the
 18 | image. See details here: http://image-net.org/download-bboxes
 19 | 
 20 | ImageNet is based upon WordNet 3.0. To uniquely identify a synset, we use
 21 | "WordNet ID" (wnid), which is a concatenation of POS ( i.e. part of speech )
 22 | and SYNSET OFFSET of WordNet. For more information, please refer to the
 23 | WordNet documentation[http://wordnet.princeton.edu/wordnet/documentation/].
 24 | 
 25 | "There are bounding boxes for over 3000 popular synsets available.
 26 | For each synset, there are on average 150 images with bounding boxes."
 27 | 
 28 | WARNING: Don't use for object detection, in this case all the bounding boxes
 29 | of the image belong to just one class.
 30 | """
 31 | from __future__ import absolute_import
 32 | from __future__ import division
 33 | from __future__ import print_function
 34 | 
 35 | import os
 36 | from six.moves import urllib
 37 | import tensorflow as tf
 38 | 
 39 | from datasets import dataset_utils
 40 | 
 41 | slim = tf.contrib.slim
 42 | 
 43 | # TODO(nsilberman): Add tfrecord file type once the script is updated.
 44 | _FILE_PATTERN = '%s-*'
 45 | 
 46 | _SPLITS_TO_SIZES = {
 47 |     'train': 1281167,
 48 |     'validation': 50000,
 49 | }
 50 | 
 51 | _ITEMS_TO_DESCRIPTIONS = {
 52 |     'image': 'A color image of varying height and width.',
 53 |     'label': 'The label id of the image, integer between 0 and 999',
 54 |     'label_text': 'The text of the label.',
 55 |     'object/bbox': 'A list of bounding boxes.',
 56 |     'object/label': 'A list of labels, one per each object.',
 57 | }
 58 | 
 59 | _NUM_CLASSES = 1001
 60 | 
 61 | 
 62 | def create_readable_names_for_imagenet_labels():
 63 |     """Create a dict mapping label id to human readable string.
 64 | 
 65 |     Returns:
 66 |             labels_to_names: dictionary where keys are integers from to 1000
 67 |             and values are human-readable names.
 68 | 
 69 |     We retrieve a synset file, which contains a list of valid synset labels used
 70 |     by ILSVRC competition. There is one synset one per line, eg.
 71 |                     #   n01440764
 72 |                     #   n01443537
 73 |     We also retrieve a synset_to_human_file, which contains a mapping from synsets
 74 |     to human-readable names for every synset in Imagenet. These are stored in a
 75 |     tsv format, as follows:
 76 |                     #   n02119247    black fox
 77 |                     #   n02119359    silver fox
 78 |     We assign each synset (in alphabetical order) an integer, starting from 1
 79 |     (since 0 is reserved for the background class).
 80 | 
 81 |     Code is based on
 82 |     https://github.com/tensorflow/models/blob/master/inception/inception/data/build_imagenet_data.py#L463
 83 |     """
 84 | 
 85 |     # pylint: disable=g-line-too-long
 86 |     base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/'
 87 |     synset_url = '{}/imagenet_lsvrc_2015_synsets.txt'.format(base_url)
 88 |     synset_to_human_url = '{}/imagenet_metadata.txt'.format(base_url)
 89 | 
 90 |     filename, _ = urllib.request.urlretrieve(synset_url)
 91 |     synset_list = [s.strip() for s in open(filename).readlines()]
 92 |     num_synsets_in_ilsvrc = len(synset_list)
 93 |     assert num_synsets_in_ilsvrc == 1000
 94 | 
 95 |     filename, _ = urllib.request.urlretrieve(synset_to_human_url)
 96 |     synset_to_human_list = open(filename).readlines()
 97 |     num_synsets_in_all_imagenet = len(synset_to_human_list)
 98 |     assert num_synsets_in_all_imagenet == 21842
 99 | 
100 |     synset_to_human = {}
101 |     for s in synset_to_human_list:
102 |         parts = s.strip().split('\t')
103 |         assert len(parts) == 2
104 |         synset = parts[0]
105 |         human = parts[1]
106 |         synset_to_human[synset] = human
107 | 
108 |     label_index = 1
109 |     labels_to_names = {0: 'background'}
110 |     for synset in synset_list:
111 |         name = synset_to_human[synset]
112 |         labels_to_names[label_index] = name
113 |         label_index += 1
114 | 
115 |     return labels_to_names
116 | 
117 | 
118 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
119 |     """Gets a dataset tuple with instructions for reading ImageNet.
120 | 
121 |     Args:
122 |         split_name: A train/test split name.
123 |         dataset_dir: The base directory of the dataset sources.
124 |         file_pattern: The file pattern to use when matching the dataset sources.
125 |             It is assumed that the pattern contains a '%s' string so that the split
126 |             name can be inserted.
127 |         reader: The TensorFlow reader type.
128 | 
129 |     Returns:
130 |         A `Dataset` namedtuple.
131 | 
132 |     Raises:
133 |         ValueError: if `split_name` is not a valid train/test split.
134 |     """
135 |     if split_name not in _SPLITS_TO_SIZES:
136 |         raise ValueError('split name %s was not recognized.' % split_name)
137 | 
138 |     if not file_pattern:
139 |         file_pattern = _FILE_PATTERN
140 |     file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
141 | 
142 |     # Allowing None in the signature so that dataset_factory can use the default.
143 |     if reader is None:
144 |         reader = tf.TFRecordReader
145 | 
146 |     keys_to_features = {
147 |         'image/encoded': tf.FixedLenFeature(
148 |                 (), tf.string, default_value=''),
149 |         'image/format': tf.FixedLenFeature(
150 |                 (), tf.string, default_value='jpeg'),
151 |         'image/class/label': tf.FixedLenFeature(
152 |                 [], dtype=tf.int64, default_value=-1),
153 |         'image/class/text': tf.FixedLenFeature(
154 |                 [], dtype=tf.string, default_value=''),
155 |         'image/object/bbox/xmin': tf.VarLenFeature(
156 |                 dtype=tf.float32),
157 |         'image/object/bbox/ymin': tf.VarLenFeature(
158 |                 dtype=tf.float32),
159 |         'image/object/bbox/xmax': tf.VarLenFeature(
160 |                 dtype=tf.float32),
161 |         'image/object/bbox/ymax': tf.VarLenFeature(
162 |                 dtype=tf.float32),
163 |         'image/object/class/label': tf.VarLenFeature(
164 |                 dtype=tf.int64),
165 |     }
166 | 
167 |     items_to_handlers = {
168 |         'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
169 |         'label': slim.tfexample_decoder.Tensor('image/class/label'),
170 |         'label_text': slim.tfexample_decoder.Tensor('image/class/text'),
171 |         'object/bbox': slim.tfexample_decoder.BoundingBox(
172 |                 ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),
173 |         'object/label': slim.tfexample_decoder.Tensor('image/object/class/label'),
174 |     }
175 | 
176 |     decoder = slim.tfexample_decoder.TFExampleDecoder(
177 |             keys_to_features, items_to_handlers)
178 | 
179 |     labels_to_names = None
180 |     if dataset_utils.has_labels(dataset_dir):
181 |         labels_to_names = dataset_utils.read_label_file(dataset_dir)
182 |     else:
183 |         labels_to_names = create_readable_names_for_imagenet_labels()
184 |         dataset_utils.write_label_file(labels_to_names, dataset_dir)
185 | 
186 |     return slim.dataset.Dataset(
187 |             data_sources=file_pattern,
188 |             reader=reader,
189 |             decoder=decoder,
190 |             num_samples=_SPLITS_TO_SIZES[split_name],
191 |             items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
192 |             num_classes=_NUM_CLASSES,
193 |             labels_to_names=labels_to_names)
194 | 


--------------------------------------------------------------------------------
/datasets/pascalvoc_2007.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Paul Balanca. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Provides data for the Pascal VOC Dataset (images + annotations).
 16 | """
 17 | import tensorflow as tf
 18 | from datasets import pascalvoc_common
 19 | 
 20 | slim = tf.contrib.slim
 21 | 
 22 | FILE_PATTERN = 'voc_2007_%s_*.tfrecord'
 23 | ITEMS_TO_DESCRIPTIONS = {
 24 |     'image': 'A color image of varying height and width.',
 25 |     'shape': 'Shape of the image',
 26 |     'object/bbox': 'A list of bounding boxes, one per each object.',
 27 |     'object/label': 'A list of labels, one per each object.',
 28 | }
 29 | # (Images, Objects) statistics on every class.
 30 | TRAIN_STATISTICS = {
 31 |     'none': (0, 0),
 32 |     'aeroplane': (238, 306),
 33 |     'bicycle': (243, 353),
 34 |     'bird': (330, 486),
 35 |     'boat': (181, 290),
 36 |     'bottle': (244, 505),
 37 |     'bus': (186, 229),
 38 |     'car': (713, 1250),
 39 |     'cat': (337, 376),
 40 |     'chair': (445, 798),
 41 |     'cow': (141, 259),
 42 |     'diningtable': (200, 215),
 43 |     'dog': (421, 510),
 44 |     'horse': (287, 362),
 45 |     'motorbike': (245, 339),
 46 |     'person': (2008, 4690),
 47 |     'pottedplant': (245, 514),
 48 |     'sheep': (96, 257),
 49 |     'sofa': (229, 248),
 50 |     'train': (261, 297),
 51 |     'tvmonitor': (256, 324),
 52 |     'total': (5011, 12608),
 53 | }
 54 | TEST_STATISTICS = {
 55 |     'none': (0, 0),
 56 |     'aeroplane': (1, 1),
 57 |     'bicycle': (1, 1),
 58 |     'bird': (1, 1),
 59 |     'boat': (1, 1),
 60 |     'bottle': (1, 1),
 61 |     'bus': (1, 1),
 62 |     'car': (1, 1),
 63 |     'cat': (1, 1),
 64 |     'chair': (1, 1),
 65 |     'cow': (1, 1),
 66 |     'diningtable': (1, 1),
 67 |     'dog': (1, 1),
 68 |     'horse': (1, 1),
 69 |     'motorbike': (1, 1),
 70 |     'person': (1, 1),
 71 |     'pottedplant': (1, 1),
 72 |     'sheep': (1, 1),
 73 |     'sofa': (1, 1),
 74 |     'train': (1, 1),
 75 |     'tvmonitor': (1, 1),
 76 |     'total': (20, 20),
 77 | }
 78 | SPLITS_TO_SIZES = {
 79 |     'train': 5011,
 80 |     'test': 4952,
 81 | }
 82 | SPLITS_TO_STATISTICS = {
 83 |     'train': TRAIN_STATISTICS,
 84 |     'test': TEST_STATISTICS,
 85 | }
 86 | NUM_CLASSES = 20
 87 | 
 88 | 
 89 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
 90 |     """Gets a dataset tuple with instructions for reading ImageNet.
 91 | 
 92 |     Args:
 93 |       split_name: A train/test split name.
 94 |       dataset_dir: The base directory of the dataset sources.
 95 |       file_pattern: The file pattern to use when matching the dataset sources.
 96 |         It is assumed that the pattern contains a '%s' string so that the split
 97 |         name can be inserted.
 98 |       reader: The TensorFlow reader type.
 99 | 
100 |     Returns:
101 |       A `Dataset` namedtuple.
102 | 
103 |     Raises:
104 |         ValueError: if `split_name` is not a valid train/test split.
105 |     """
106 |     if not file_pattern:
107 |         file_pattern = FILE_PATTERN
108 |     return pascalvoc_common.get_split(split_name, dataset_dir,
109 |                                       file_pattern, reader,
110 |                                       SPLITS_TO_SIZES,
111 |                                       ITEMS_TO_DESCRIPTIONS,
112 |                                       NUM_CLASSES)
113 | 


--------------------------------------------------------------------------------
/datasets/pascalvoc_2012.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Paul Balanca. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Provides data for the Pascal VOC Dataset (images + annotations).
16 | """
17 | import tensorflow as tf
18 | from datasets import pascalvoc_common
19 | 
20 | slim = tf.contrib.slim
21 | 
22 | FILE_PATTERN = 'voc_2012_%s_*.tfrecord'
23 | ITEMS_TO_DESCRIPTIONS = {
24 |     'image': 'A color image of varying height and width.',
25 |     'shape': 'Shape of the image',
26 |     'object/bbox': 'A list of bounding boxes, one per each object.',
27 |     'object/label': 'A list of labels, one per each object.',
28 | }
29 | # (Images, Objects) statistics on every class.
30 | TRAIN_STATISTICS = {
31 |     'none': (0, 0),
32 |     'aeroplane': (670, 865),
33 |     'bicycle': (552, 711),
34 |     'bird': (765, 1119),
35 |     'boat': (508, 850),
36 |     'bottle': (706, 1259),
37 |     'bus': (421, 593),
38 |     'car': (1161, 2017),
39 |     'cat': (1080, 1217),
40 |     'chair': (1119, 2354),
41 |     'cow': (303, 588),
42 |     'diningtable': (538, 609),
43 |     'dog': (1286, 1515),
44 |     'horse': (482, 710),
45 |     'motorbike': (526, 713),
46 |     'person': (4087, 8566),
47 |     'pottedplant': (527, 973),
48 |     'sheep': (325, 813),
49 |     'sofa': (507, 566),
50 |     'train': (544, 628),
51 |     'tvmonitor': (575, 784),
52 |     'total': (11540, 27450),
53 | }
54 | SPLITS_TO_SIZES = {
55 |     'train': 17125,
56 | }
57 | SPLITS_TO_STATISTICS = {
58 |     'train': TRAIN_STATISTICS,
59 | }
60 | NUM_CLASSES = 20
61 | 
62 | 
63 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
64 |     """Gets a dataset tuple with instructions for reading ImageNet.
65 | 
66 |     Args:
67 |       split_name: A train/test split name.
68 |       dataset_dir: The base directory of the dataset sources.
69 |       file_pattern: The file pattern to use when matching the dataset sources.
70 |         It is assumed that the pattern contains a '%s' string so that the split
71 |         name can be inserted.
72 |       reader: The TensorFlow reader type.
73 | 
74 |     Returns:
75 |       A `Dataset` namedtuple.
76 | 
77 |     Raises:
78 |         ValueError: if `split_name` is not a valid train/test split.
79 |     """
80 |     if not file_pattern:
81 |         file_pattern = FILE_PATTERN
82 |     return pascalvoc_common.get_split(split_name, dataset_dir,
83 |                                       file_pattern, reader,
84 |                                       SPLITS_TO_SIZES,
85 |                                       ITEMS_TO_DESCRIPTIONS,
86 |                                       NUM_CLASSES)
87 | 
88 | 


--------------------------------------------------------------------------------
/datasets/pascalvoc_common.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Paul Balanca. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Provides data for the Pascal VOC Dataset (images + annotations).
 16 | """
 17 | import os
 18 | 
 19 | import tensorflow as tf
 20 | from datasets import dataset_utils
 21 | 
 22 | slim = tf.contrib.slim
 23 | 
 24 | VOC_LABELS = {
 25 |     'none': (0, 'Background'),
 26 |     'aeroplane': (1, 'Vehicle'),
 27 |     'bicycle': (2, 'Vehicle'),
 28 |     'bird': (3, 'Animal'),
 29 |     'boat': (4, 'Vehicle'),
 30 |     'bottle': (5, 'Indoor'),
 31 |     'bus': (6, 'Vehicle'),
 32 |     'car': (7, 'Vehicle'),
 33 |     'cat': (8, 'Animal'),
 34 |     'chair': (9, 'Indoor'),
 35 |     'cow': (10, 'Animal'),
 36 |     'diningtable': (11, 'Indoor'),
 37 |     'dog': (12, 'Animal'),
 38 |     'horse': (13, 'Animal'),
 39 |     'motorbike': (14, 'Vehicle'),
 40 |     'person': (15, 'Person'),
 41 |     'pottedplant': (16, 'Indoor'),
 42 |     'sheep': (17, 'Animal'),
 43 |     'sofa': (18, 'Indoor'),
 44 |     'train': (19, 'Vehicle'),
 45 |     'tvmonitor': (20, 'Indoor'),
 46 | }
 47 | 
 48 | 
 49 | def get_split(split_name, dataset_dir, file_pattern, reader,
 50 |               split_to_sizes, items_to_descriptions, num_classes):
 51 |     """Gets a dataset tuple with instructions for reading Pascal VOC dataset.
 52 | 
 53 |     Args:
 54 |       split_name: A train/test split name.
 55 |       dataset_dir: The base directory of the dataset sources.
 56 |       file_pattern: The file pattern to use when matching the dataset sources.
 57 |         It is assumed that the pattern contains a '%s' string so that the split
 58 |         name can be inserted.
 59 |       reader: The TensorFlow reader type.
 60 | 
 61 |     Returns:
 62 |       A `Dataset` namedtuple.
 63 | 
 64 |     Raises:
 65 |         ValueError: if `split_name` is not a valid train/test split.
 66 |     """
 67 |     if split_name not in split_to_sizes:
 68 |         raise ValueError('split name %s was not recognized.' % split_name)
 69 |     file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
 70 | 
 71 |     # Allowing None in the signature so that dataset_factory can use the default.
 72 |     if reader is None:
 73 |         reader = tf.TFRecordReader
 74 |     # Features in Pascal VOC TFRecords.
 75 |     keys_to_features = {
 76 |         'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
 77 |         'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
 78 |         'image/height': tf.FixedLenFeature([1], tf.int64),
 79 |         'image/width': tf.FixedLenFeature([1], tf.int64),
 80 |         'image/channels': tf.FixedLenFeature([1], tf.int64),
 81 |         'image/shape': tf.FixedLenFeature([3], tf.int64),
 82 |         'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
 83 |         'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
 84 |         'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
 85 |         'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
 86 |         'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
 87 |         'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64),
 88 |         'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64),
 89 |     }
 90 |     items_to_handlers = {
 91 |         'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
 92 |         'shape': slim.tfexample_decoder.Tensor('image/shape'),
 93 |         'object/bbox': slim.tfexample_decoder.BoundingBox(
 94 |                 ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),
 95 |         'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'),
 96 |         'object/difficult': slim.tfexample_decoder.Tensor('image/object/bbox/difficult'),
 97 |         'object/truncated': slim.tfexample_decoder.Tensor('image/object/bbox/truncated'),
 98 |     }
 99 |     decoder = slim.tfexample_decoder.TFExampleDecoder(
100 |         keys_to_features, items_to_handlers)
101 | 
102 |     labels_to_names = None
103 |     if dataset_utils.has_labels(dataset_dir):
104 |         labels_to_names = dataset_utils.read_label_file(dataset_dir)
105 |     # else:
106 |     #     labels_to_names = create_readable_names_for_imagenet_labels()
107 |     #     dataset_utils.write_label_file(labels_to_names, dataset_dir)
108 | 
109 |     return slim.dataset.Dataset(
110 |             data_sources=file_pattern,
111 |             reader=reader,
112 |             decoder=decoder,
113 |             num_samples=split_to_sizes[split_name],
114 |             items_to_descriptions=items_to_descriptions,
115 |             num_classes=num_classes,
116 |             labels_to_names=labels_to_names)
117 | 


--------------------------------------------------------------------------------
/datasets/pascalvoc_to_tfrecords.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Paul Balanca. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Converts Pascal VOC data to TFRecords file format with Example protos.
 16 | 
 17 | The raw Pascal VOC data set is expected to reside in JPEG files located in the
 18 | directory 'JPEGImages'. Similarly, bounding box annotations are supposed to be
 19 | stored in the 'Annotation directory'
 20 | 
 21 | This TensorFlow script converts the training and evaluation data into
 22 | a sharded data set consisting of 1024 and 128 TFRecord files, respectively.
 23 | 
 24 | Each validation TFRecord file contains ~500 records. Each training TFREcord
 25 | file contains ~1000 records. Each record within the TFRecord file is a
 26 | serialized Example proto. The Example proto contains the following fields:
 27 | 
 28 |     image/encoded: string containing JPEG encoded image in RGB colorspace
 29 |     image/height: integer, image height in pixels
 30 |     image/width: integer, image width in pixels
 31 |     image/channels: integer, specifying the number of channels, always 3
 32 |     image/format: string, specifying the format, always'JPEG'
 33 | 
 34 | 
 35 |     image/object/bbox/xmin: list of float specifying the 0+ human annotated
 36 |         bounding boxes
 37 |     image/object/bbox/xmax: list of float specifying the 0+ human annotated
 38 |         bounding boxes
 39 |     image/object/bbox/ymin: list of float specifying the 0+ human annotated
 40 |         bounding boxes
 41 |     image/object/bbox/ymax: list of float specifying the 0+ human annotated
 42 |         bounding boxes
 43 |     image/object/bbox/label: list of integer specifying the classification index.
 44 |     image/object/bbox/label_text: list of string descriptions.
 45 | 
 46 | Note that the length of xmin is identical to the length of xmax, ymin and ymax
 47 | for each example.
 48 | """
 49 | import os
 50 | import sys
 51 | import random
 52 | 
 53 | import numpy as np
 54 | import tensorflow as tf
 55 | 
 56 | import xml.etree.ElementTree as ET
 57 | 
 58 | from datasets.dataset_utils import int64_feature, float_feature, bytes_feature
 59 | from datasets.pascalvoc_common import VOC_LABELS
 60 | 
 61 | # Original dataset organisation.
 62 | DIRECTORY_ANNOTATIONS = 'Annotations/'
 63 | DIRECTORY_IMAGES = 'JPEGImages/'
 64 | 
 65 | # TFRecords convertion parameters.
 66 | RANDOM_SEED = 4242
 67 | SAMPLES_PER_FILES = 200
 68 | 
 69 | 
 70 | def _process_image(directory, name):
 71 |     """Process a image and annotation file.
 72 | 
 73 |     Args:
 74 |       filename: string, path to an image file e.g., '/path/to/example.JPG'.
 75 |       coder: instance of ImageCoder to provide TensorFlow image coding utils.
 76 |     Returns:
 77 |       image_buffer: string, JPEG encoding of RGB image.
 78 |       height: integer, image height in pixels.
 79 |       width: integer, image width in pixels.
 80 |     """
 81 |     # Read the image file.
 82 |     filename = directory + DIRECTORY_IMAGES + name + '.jpg'
 83 |     image_data = tf.gfile.FastGFile(filename, 'r').read()
 84 | 
 85 |     # Read the XML annotation file.
 86 |     filename = os.path.join(directory, DIRECTORY_ANNOTATIONS, name + '.xml')
 87 |     tree = ET.parse(filename)
 88 |     root = tree.getroot()
 89 | 
 90 |     # Image shape.
 91 |     size = root.find('size')
 92 |     shape = [int(size.find('height').text),
 93 |              int(size.find('width').text),
 94 |              int(size.find('depth').text)]
 95 |     # Find annotations.
 96 |     bboxes = []
 97 |     labels = []
 98 |     labels_text = []
 99 |     difficult = []
100 |     truncated = []
101 |     for obj in root.findall('object'):
102 |         label = obj.find('name').text
103 |         labels.append(int(VOC_LABELS[label][0]))
104 |         labels_text.append(label.encode('ascii'))
105 | 
106 |         if obj.find('difficult'):
107 |             difficult.append(int(obj.find('difficult').text))
108 |         else:
109 |             difficult.append(0)
110 |         if obj.find('truncated'):
111 |             truncated.append(int(obj.find('truncated').text))
112 |         else:
113 |             truncated.append(0)
114 | 
115 |         bbox = obj.find('bndbox')
116 |         bboxes.append((float(bbox.find('ymin').text) / shape[0],
117 |                        float(bbox.find('xmin').text) / shape[1],
118 |                        float(bbox.find('ymax').text) / shape[0],
119 |                        float(bbox.find('xmax').text) / shape[1]
120 |                        ))
121 |     return image_data, shape, bboxes, labels, labels_text, difficult, truncated
122 | 
123 | 
124 | def _convert_to_example(image_data, labels, labels_text, bboxes, shape,
125 |                         difficult, truncated):
126 |     """Build an Example proto for an image example.
127 | 
128 |     Args:
129 |       image_data: string, JPEG encoding of RGB image;
130 |       labels: list of integers, identifier for the ground truth;
131 |       labels_text: list of strings, human-readable labels;
132 |       bboxes: list of bounding boxes; each box is a list of integers;
133 |           specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong
134 |           to the same label as the image label.
135 |       shape: 3 integers, image shapes in pixels.
136 |     Returns:
137 |       Example proto
138 |     """
139 |     xmin = []
140 |     ymin = []
141 |     xmax = []
142 |     ymax = []
143 |     for b in bboxes:
144 |         assert len(b) == 4
145 |         # pylint: disable=expression-not-assigned
146 |         [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)]
147 |         # pylint: enable=expression-not-assigned
148 | 
149 |     image_format = b'JPEG'
150 |     example = tf.train.Example(features=tf.train.Features(feature={
151 |             'image/height': int64_feature(shape[0]),
152 |             'image/width': int64_feature(shape[1]),
153 |             'image/channels': int64_feature(shape[2]),
154 |             'image/shape': int64_feature(shape),
155 |             'image/object/bbox/xmin': float_feature(xmin),
156 |             'image/object/bbox/xmax': float_feature(xmax),
157 |             'image/object/bbox/ymin': float_feature(ymin),
158 |             'image/object/bbox/ymax': float_feature(ymax),
159 |             'image/object/bbox/label': int64_feature(labels),
160 |             'image/object/bbox/label_text': bytes_feature(labels_text),
161 |             'image/object/bbox/difficult': int64_feature(difficult),
162 |             'image/object/bbox/truncated': int64_feature(truncated),
163 |             'image/format': bytes_feature(image_format),
164 |             'image/encoded': bytes_feature(image_data)}))
165 |     return example
166 | 
167 | 
168 | def _add_to_tfrecord(dataset_dir, name, tfrecord_writer):
169 |     """Loads data from image and annotations files and add them to a TFRecord.
170 | 
171 |     Args:
172 |       dataset_dir: Dataset directory;
173 |       name: Image name to add to the TFRecord;
174 |       tfrecord_writer: The TFRecord writer to use for writing.
175 |     """
176 |     image_data, shape, bboxes, labels, labels_text, difficult, truncated = \
177 |         _process_image(dataset_dir, name)
178 |     example = _convert_to_example(image_data, labels, labels_text,
179 |                                   bboxes, shape, difficult, truncated)
180 |     tfrecord_writer.write(example.SerializeToString())
181 | 
182 | 
183 | def _get_output_filename(output_dir, name, idx):
184 |     return '%s/%s_%03d.tfrecord' % (output_dir, name, idx)
185 | 
186 | 
187 | def run(dataset_dir, output_dir, name='voc_train', shuffling=False):
188 |     """Runs the conversion operation.
189 | 
190 |     Args:
191 |       dataset_dir: The dataset directory where the dataset is stored.
192 |       output_dir: Output directory.
193 |     """
194 |     if not tf.gfile.Exists(dataset_dir):
195 |         tf.gfile.MakeDirs(dataset_dir)
196 | 
197 |     # Dataset filenames, and shuffling.
198 |     path = os.path.join(dataset_dir, DIRECTORY_ANNOTATIONS)
199 |     filenames = sorted(os.listdir(path))
200 |     if shuffling:
201 |         random.seed(RANDOM_SEED)
202 |         random.shuffle(filenames)
203 | 
204 |     # Process dataset files.
205 |     i = 0
206 |     fidx = 0
207 |     while i < len(filenames):
208 |         # Open new TFRecord file.
209 |         tf_filename = _get_output_filename(output_dir, name, fidx)
210 |         with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer:
211 |             j = 0
212 |             while i < len(filenames) and j < SAMPLES_PER_FILES:
213 |                 sys.stdout.write('\r>> Converting image %d/%d' % (i+1, len(filenames)))
214 |                 sys.stdout.flush()
215 | 
216 |                 filename = filenames[i]
217 |                 img_name = filename[:-4]
218 |                 _add_to_tfrecord(dataset_dir, img_name, tfrecord_writer)
219 |                 i += 1
220 |                 j += 1
221 |             fidx += 1
222 | 
223 |     # Finally, write the labels file:
224 |     # labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
225 |     # dataset_utils.write_label_file(labels_to_class_names, dataset_dir)
226 |     print('\nFinished converting the Pascal VOC dataset!')
227 | 


--------------------------------------------------------------------------------
/demo/000001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/demo/000001.jpg


--------------------------------------------------------------------------------
/demo/000002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/demo/000002.jpg


--------------------------------------------------------------------------------
/demo/000003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/demo/000003.jpg


--------------------------------------------------------------------------------
/demo/000004.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/demo/000004.jpg


--------------------------------------------------------------------------------
/demo/000006.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/demo/000006.jpg


--------------------------------------------------------------------------------
/demo/000008.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/demo/000008.jpg


--------------------------------------------------------------------------------
/demo/000010.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/demo/000010.jpg


--------------------------------------------------------------------------------
/demo/000022.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/demo/000022.jpg


--------------------------------------------------------------------------------
/demo/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/demo/dog.jpg


--------------------------------------------------------------------------------
/demo/eagle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/demo/eagle.jpg


--------------------------------------------------------------------------------
/demo/horses.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/demo/horses.jpg


--------------------------------------------------------------------------------
/demo/person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/demo/person.jpg


--------------------------------------------------------------------------------
/demo/street.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/demo/street.jpg


--------------------------------------------------------------------------------
/deployment/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/inspect_checkpoint.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """A simple script for inspect checkpoint files."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import argparse
 21 | import sys
 22 | 
 23 | import numpy as np
 24 | 
 25 | from tensorflow.python import pywrap_tensorflow
 26 | from tensorflow.python.platform import app
 27 | from tensorflow.python.platform import flags
 28 | 
 29 | FLAGS = None
 30 | 
 31 | 
 32 | def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors):
 33 |     """Prints tensors in a checkpoint file.
 34 | 
 35 |     If no `tensor_name` is provided, prints the tensor names and shapes
 36 |     in the checkpoint file.
 37 | 
 38 |     If `tensor_name` is provided, prints the content of the tensor.
 39 | 
 40 |     Args:
 41 |         file_name: Name of the checkpoint file.
 42 |         tensor_name: Name of the tensor in the checkpoint file to print.
 43 |         all_tensors: Boolean indicating whether to print all tensors.
 44 |     """
 45 |     try:
 46 |         reader = pywrap_tensorflow.NewCheckpointReader(file_name)
 47 |         if all_tensors:
 48 |             var_to_shape_map = reader.get_variable_to_shape_map()
 49 |             for key in var_to_shape_map:
 50 |                 print("tensor_name: ", key)
 51 |                 print(reader.get_tensor(key))
 52 |         elif not tensor_name:
 53 |             print(reader.debug_string().decode("utf-8"))
 54 |         else:
 55 |             print("tensor_name: ", tensor_name)
 56 |             print(reader.get_tensor(tensor_name))
 57 |     except Exception as e:  # pylint: disable=broad-except
 58 |         print(str(e))
 59 |         if "corrupted compressed block contents" in str(e):
 60 |             print("It's likely that your checkpoint file has been compressed "
 61 |                   "with SNAPPY.")
 62 | 
 63 | 
 64 | def parse_numpy_printoption(kv_str):
 65 |     """Sets a single numpy printoption from a string of the form 'x=y'.
 66 | 
 67 |     See documentation on numpy.set_printoptions() for details about what values
 68 |     x and y can take. x can be any option listed there other than 'formatter'.
 69 | 
 70 |     Args:
 71 |         kv_str: A string of the form 'x=y', such as 'threshold=100000'
 72 | 
 73 |     Raises:
 74 |         argparse.ArgumentTypeError: If the string couldn't be used to set any
 75 |                 nump printoption.
 76 |     """
 77 |     k_v_str = kv_str.split("=", 1)
 78 |     if len(k_v_str) != 2 or not k_v_str[0]:
 79 |         raise argparse.ArgumentTypeError("'%s' is not in the form k=v." % kv_str)
 80 |     k, v_str = k_v_str
 81 |     printoptions = np.get_printoptions()
 82 |     if k not in printoptions:
 83 |         raise argparse.ArgumentTypeError("'%s' is not a valid printoption." % k)
 84 |     v_type = type(printoptions[k])
 85 |     if v_type is type(None):
 86 |         raise argparse.ArgumentTypeError(
 87 |                 "Setting '%s' from the command line is not supported." % k)
 88 |     try:
 89 |         v = (v_type(v_str) if v_type is not bool
 90 |              else flags.BooleanParser().Parse(v_str))
 91 |     except ValueError as e:
 92 |         raise argparse.ArgumentTypeError(e.message)
 93 |     np.set_printoptions(**{k: v})
 94 | 
 95 | 
 96 | def main(unused_argv):
 97 |     if not FLAGS.file_name:
 98 |         print("Usage: inspect_checkpoint --file_name=checkpoint_file_name "
 99 |               "[--tensor_name=tensor_to_print]")
100 |         sys.exit(1)
101 |     else:
102 |         print_tensors_in_checkpoint_file(FLAGS.file_name, FLAGS.tensor_name,
103 |                                          FLAGS.all_tensors)
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     parser = argparse.ArgumentParser()
108 |     parser.register("type", "bool", lambda v: v.lower() == "true")
109 |     parser.add_argument(
110 |             "--file_name", type=str, default="", help="Checkpoint filename. "
111 |                                         "Note, if using Checkpoint V2 format, file_name is the "
112 |                                         "shared prefix between all files in the checkpoint.")
113 |     parser.add_argument(
114 |             "--tensor_name",
115 |             type=str,
116 |             default="",
117 |             help="Name of the tensor to inspect")
118 |     parser.add_argument(
119 |             "--all_tensors",
120 |             nargs="?",
121 |             const=True,
122 |             type="bool",
123 |             default=False,
124 |             help="If True, print the values of all the tensors.")
125 |     parser.add_argument(
126 |             "--printoptions",
127 |             nargs="*",
128 |             type=parse_numpy_printoption,
129 |             help="Argument for numpy.set_printoptions(), in the form 'k=v'.")
130 |     FLAGS, unparsed = parser.parse_known_args()
131 |     app.run(main=main, argv=[sys.argv[0]] + unparsed)
132 | 


--------------------------------------------------------------------------------
/nets/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/nets/caffe_scope.py:
--------------------------------------------------------------------------------
 1 | """Specific Caffe scope used to import weights from a .caffemodel file.
 2 | 
 3 | The idea is to create special initializers loading weights from protobuf
 4 | .caffemodel files.
 5 | """
 6 | import caffe
 7 | from caffe.proto import caffe_pb2
 8 | 
 9 | import numpy as np
10 | import tensorflow as tf
11 | 
12 | slim = tf.contrib.slim
13 | 
14 | 
15 | class CaffeScope(object):
16 |     """Caffe scope.
17 |     """
18 |     def __init__(self):
19 |         """Initialize the caffee scope.
20 |         """
21 |         self.counters = {}
22 |         self.layers = {}
23 |         self.caffe_layers = None
24 |         self.bgr_to_rgb = 0
25 | 
26 |     def load(self, filename, bgr_to_rgb=True):
27 |         """Load weights from a .caffemodel file and initialize counters.
28 | 
29 |         Params:
30 |           filename: caffemodel file.
31 |         """
32 |         print('Loading Caffe file:', filename)
33 |         caffemodel_params = caffe_pb2.NetParameter()
34 |         caffemodel_str = open(filename, 'rb').read()
35 |         caffemodel_params.ParseFromString(caffemodel_str)
36 |         self.caffe_layers = caffemodel_params.layer
37 | 
38 |         # Layers collection.
39 |         self.layers['convolution'] = [i for i, l in enumerate(self.caffe_layers)
40 |                                       if l.type == 'Convolution']
41 |         self.layers['l2_normalization'] = [i for i, l in enumerate(self.caffe_layers)
42 |                                            if l.type == 'Normalize']
43 |         # BGR to RGB convertion. Tries to find the first convolution with 3
44 |         # and exchange parameters.
45 |         if bgr_to_rgb:
46 |             self.bgr_to_rgb = 1
47 | 
48 |     def conv_weights_init(self):
49 |         def _initializer(shape, dtype, partition_info=None):
50 |             counter = self.counters.get(self.conv_weights_init, 0)
51 |             idx = self.layers['convolution'][counter]
52 |             layer = self.caffe_layers[idx]
53 |             # Weights: reshape and transpose dimensions.
54 |             w = np.array(layer.blobs[0].data)
55 |             w = np.reshape(w, layer.blobs[0].shape.dim)
56 |             # w = np.transpose(w, (1, 0, 2, 3))
57 |             w = np.transpose(w, (2, 3, 1, 0))
58 |             if self.bgr_to_rgb == 1 and w.shape[2] == 3:
59 |                 print('Convert BGR to RGB in convolution layer:', layer.name)
60 |                 w[:, :, (0, 1, 2)] = w[:, :, (2, 1, 0)]
61 |                 self.bgr_to_rgb += 1
62 |             self.counters[self.conv_weights_init] = counter + 1
63 |             print('Load weights from convolution layer:', layer.name, w.shape)
64 |             return tf.cast(w, dtype)
65 |         return _initializer
66 | 
67 |     def conv_biases_init(self):
68 |         def _initializer(shape, dtype, partition_info=None):
69 |             counter = self.counters.get(self.conv_biases_init, 0)
70 |             idx = self.layers['convolution'][counter]
71 |             layer = self.caffe_layers[idx]
72 |             # Biases data...
73 |             b = np.array(layer.blobs[1].data)
74 |             self.counters[self.conv_biases_init] = counter + 1
75 |             print('Load biases from convolution layer:', layer.name, b.shape)
76 |             return tf.cast(b, dtype)
77 |         return _initializer
78 | 
79 |     def l2_norm_scale_init(self):
80 |         def _initializer(shape, dtype, partition_info=None):
81 |             counter = self.counters.get(self.l2_norm_scale_init, 0)
82 |             idx = self.layers['l2_normalization'][counter]
83 |             layer = self.caffe_layers[idx]
84 |             # Scaling parameter.
85 |             s = np.array(layer.blobs[0].data)
86 |             s = np.reshape(s, layer.blobs[0].shape.dim)
87 |             self.counters[self.l2_norm_scale_init] = counter + 1
88 |             print('Load scaling from L2 normalization layer:', layer.name, s.shape)
89 |             return tf.cast(s, dtype)
90 |         return _initializer
91 | 


--------------------------------------------------------------------------------
/nets/custom_layers.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Paul Balanca. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Implement some custom layers, not provided by TensorFlow.
 16 | 
 17 | Trying to follow as much as possible the style/standards used in
 18 | tf.contrib.layers
 19 | """
 20 | import tensorflow as tf
 21 | 
 22 | from tensorflow.contrib.framework.python.ops import add_arg_scope
 23 | from tensorflow.contrib.layers.python.layers import initializers
 24 | from tensorflow.contrib.framework.python.ops import variables
 25 | from tensorflow.contrib.layers.python.layers import utils
 26 | from tensorflow.python.ops import nn
 27 | from tensorflow.python.ops import init_ops
 28 | from tensorflow.python.ops import variable_scope
 29 | 
 30 | 
 31 | def abs_smooth(x):
 32 |     """Smoothed absolute function. Useful to compute an L1 smooth error.
 33 | 
 34 |     Define as:
 35 |         x^2 / 2         if abs(x) < 1
 36 |         abs(x) - 0.5    if abs(x) > 1
 37 |     We use here a differentiable definition using min(x) and abs(x). Clearly
 38 |     not optimal, but good enough for our purpose!
 39 |     """
 40 |     absx = tf.abs(x)
 41 |     minx = tf.minimum(absx, 1)
 42 |     r = 0.5 * ((absx - 1) * minx + absx)
 43 |     return r
 44 | 
 45 | 
 46 | @add_arg_scope
 47 | def l2_normalization(
 48 |         inputs,
 49 |         scaling=False,
 50 |         scale_initializer=init_ops.ones_initializer(),
 51 |         reuse=None,
 52 |         variables_collections=None,
 53 |         outputs_collections=None,
 54 |         data_format='NHWC',
 55 |         trainable=True,
 56 |         scope=None):
 57 |     """Implement L2 normalization on every feature (i.e. spatial normalization).
 58 | 
 59 |     Should be extended in some near future to other dimensions, providing a more
 60 |     flexible normalization framework.
 61 | 
 62 |     Args:
 63 |       inputs: a 4-D tensor with dimensions [batch_size, height, width, channels].
 64 |       scaling: whether or not to add a post scaling operation along the dimensions
 65 |         which have been normalized.
 66 |       scale_initializer: An initializer for the weights.
 67 |       reuse: whether or not the layer and its variables should be reused. To be
 68 |         able to reuse the layer scope must be given.
 69 |       variables_collections: optional list of collections for all the variables or
 70 |         a dictionary containing a different list of collection per variable.
 71 |       outputs_collections: collection to add the outputs.
 72 |       data_format:  NHWC or NCHW data format.
 73 |       trainable: If `True` also add variables to the graph collection
 74 |         `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
 75 |       scope: Optional scope for `variable_scope`.
 76 |     Returns:
 77 |       A `Tensor` representing the output of the operation.
 78 |     """
 79 | 
 80 |     with variable_scope.variable_scope(
 81 |             scope, 'L2Normalization', [inputs], reuse=reuse) as sc:
 82 |         inputs_shape = inputs.get_shape()
 83 |         inputs_rank = inputs_shape.ndims
 84 |         dtype = inputs.dtype.base_dtype
 85 |         if data_format == 'NHWC':
 86 |             # norm_dim = tf.range(1, inputs_rank-1)
 87 |             norm_dim = tf.range(inputs_rank-1, inputs_rank)
 88 |             params_shape = inputs_shape[-1:]
 89 |         elif data_format == 'NCHW':
 90 |             # norm_dim = tf.range(2, inputs_rank)
 91 |             norm_dim = tf.range(1, 2)
 92 |             params_shape = (inputs_shape[1])
 93 | 
 94 |         # Normalize along spatial dimensions.
 95 |         outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12)
 96 |         # Additional scaling.
 97 |         if scaling:
 98 |             scale_collections = utils.get_variable_collections(
 99 |                 variables_collections, 'scale')
100 |             scale = variables.model_variable('gamma',
101 |                                              shape=params_shape,
102 |                                              dtype=dtype,
103 |                                              initializer=scale_initializer,
104 |                                              collections=scale_collections,
105 |                                              trainable=trainable)
106 |             if data_format == 'NHWC':
107 |                 outputs = tf.multiply(outputs, scale)
108 |             elif data_format == 'NCHW':
109 |                 scale = tf.expand_dims(scale, axis=-1)
110 |                 scale = tf.expand_dims(scale, axis=-1)
111 |                 outputs = tf.multiply(outputs, scale)
112 |                 # outputs = tf.transpose(outputs, perm=(0, 2, 3, 1))
113 | 
114 |         return utils.collect_named_outputs(outputs_collections,
115 |                                            sc.original_name_scope, outputs)
116 | 
117 | 
118 | @add_arg_scope
119 | def pad2d(inputs,
120 |           pad=(0, 0),
121 |           mode='CONSTANT',
122 |           data_format='NHWC',
123 |           trainable=True,
124 |           scope=None):
125 |     """2D Padding layer, adding a symmetric padding to H and W dimensions.
126 | 
127 |     Aims to mimic padding in Caffe and MXNet, helping the port of models to
128 |     TensorFlow. Tries to follow the naming convention of `tf.contrib.layers`.
129 | 
130 |     Args:
131 |       inputs: 4D input Tensor;
132 |       pad: 2-Tuple with padding values for H and W dimensions;
133 |       mode: Padding mode. C.f. `tf.pad`
134 |       data_format:  NHWC or NCHW data format.
135 |     """
136 |     with tf.name_scope(scope, 'pad2d', [inputs]):
137 |         # Padding shape.
138 |         if data_format == 'NHWC':
139 |             paddings = [[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]]
140 |         elif data_format == 'NCHW':
141 |             paddings = [[0, 0], [0, 0], [pad[0], pad[0]], [pad[1], pad[1]]]
142 |         net = tf.pad(inputs, paddings, mode=mode)
143 |         return net
144 | 
145 | 
146 | @add_arg_scope
147 | def channel_to_last(inputs,
148 |                     data_format='NHWC',
149 |                     scope=None):
150 |     """Move the channel axis to the last dimension. Allows to
151 |     provide a single output format whatever the input data format.
152 | 
153 |     Args:
154 |       inputs: Input Tensor;
155 |       data_format: NHWC or NCHW.
156 |     Return:
157 |       Input in NHWC format.
158 |     """
159 |     with tf.name_scope(scope, 'channel_to_last', [inputs]):
160 |         if data_format == 'NHWC':
161 |             net = inputs
162 |         elif data_format == 'NCHW':
163 |             net = tf.transpose(inputs, perm=(0, 2, 3, 1))
164 |         return net
165 | 


--------------------------------------------------------------------------------
/nets/inception.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Brings inception_v1, inception_v2 and inception_v3 under one namespace."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | # pylint: disable=unused-import
22 | from nets.inception_resnet_v2 import inception_resnet_v2
23 | from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope
24 | # from nets.inception_v1 import inception_v1
25 | # from nets.inception_v1 import inception_v1_arg_scope
26 | # from nets.inception_v1 import inception_v1_base
27 | # from nets.inception_v2 import inception_v2
28 | # from nets.inception_v2 import inception_v2_arg_scope
29 | # from nets.inception_v2 import inception_v2_base
30 | from nets.inception_v3 import inception_v3
31 | from nets.inception_v3 import inception_v3_arg_scope
32 | from nets.inception_v3 import inception_v3_base
33 | # pylint: enable=unused-import
34 | 


--------------------------------------------------------------------------------
/nets/inception_resnet_v2.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains the definition of the Inception Resnet V2 architecture.
 16 | 
 17 | As described in http://arxiv.org/abs/1602.07261.
 18 | 
 19 |   Inception-v4, Inception-ResNet and the Impact of Residual Connections
 20 |     on Learning
 21 |   Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
 22 | """
 23 | from __future__ import absolute_import
 24 | from __future__ import division
 25 | from __future__ import print_function
 26 | 
 27 | 
 28 | import tensorflow as tf
 29 | 
 30 | slim = tf.contrib.slim
 31 | 
 32 | 
 33 | def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
 34 |   """Builds the 35x35 resnet block."""
 35 |   with tf.variable_scope(scope, 'Block35', [net], reuse=reuse):
 36 |     with tf.variable_scope('Branch_0'):
 37 |       tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1')
 38 |     with tf.variable_scope('Branch_1'):
 39 |       tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
 40 |       tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3')
 41 |     with tf.variable_scope('Branch_2'):
 42 |       tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
 43 |       tower_conv2_1 = slim.conv2d(tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3')
 44 |       tower_conv2_2 = slim.conv2d(tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3')
 45 |     mixed = tf.concat(3, [tower_conv, tower_conv1_1, tower_conv2_2])
 46 |     up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
 47 |                      activation_fn=None, scope='Conv2d_1x1')
 48 |     net += scale * up
 49 |     if activation_fn:
 50 |       net = activation_fn(net)
 51 |   return net
 52 | 
 53 | 
 54 | def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
 55 |   """Builds the 17x17 resnet block."""
 56 |   with tf.variable_scope(scope, 'Block17', [net], reuse=reuse):
 57 |     with tf.variable_scope('Branch_0'):
 58 |       tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')
 59 |     with tf.variable_scope('Branch_1'):
 60 |       tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1')
 61 |       tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7],
 62 |                                   scope='Conv2d_0b_1x7')
 63 |       tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1],
 64 |                                   scope='Conv2d_0c_7x1')
 65 |     mixed = tf.concat(3, [tower_conv, tower_conv1_2])
 66 |     up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
 67 |                      activation_fn=None, scope='Conv2d_1x1')
 68 |     net += scale * up
 69 |     if activation_fn:
 70 |       net = activation_fn(net)
 71 |   return net
 72 | 
 73 | 
 74 | def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
 75 |   """Builds the 8x8 resnet block."""
 76 |   with tf.variable_scope(scope, 'Block8', [net], reuse=reuse):
 77 |     with tf.variable_scope('Branch_0'):
 78 |       tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')
 79 |     with tf.variable_scope('Branch_1'):
 80 |       tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1')
 81 |       tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3],
 82 |                                   scope='Conv2d_0b_1x3')
 83 |       tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1],
 84 |                                   scope='Conv2d_0c_3x1')
 85 |     mixed = tf.concat(3, [tower_conv, tower_conv1_2])
 86 |     up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
 87 |                      activation_fn=None, scope='Conv2d_1x1')
 88 |     net += scale * up
 89 |     if activation_fn:
 90 |       net = activation_fn(net)
 91 |   return net
 92 | 
 93 | 
 94 | def inception_resnet_v2(inputs, num_classes=1001, is_training=True,
 95 |                         dropout_keep_prob=0.8,
 96 |                         reuse=None,
 97 |                         scope='InceptionResnetV2'):
 98 |   """Creates the Inception Resnet V2 model.
 99 | 
100 |   Args:
101 |     inputs: a 4-D tensor of size [batch_size, height, width, 3].
102 |     num_classes: number of predicted classes.
103 |     is_training: whether is training or not.
104 |     dropout_keep_prob: float, the fraction to keep before final layer.
105 |     reuse: whether or not the network and its variables should be reused. To be
106 |       able to reuse 'scope' must be given.
107 |     scope: Optional variable_scope.
108 | 
109 |   Returns:
110 |     logits: the logits outputs of the model.
111 |     end_points: the set of end_points from the inception model.
112 |   """
113 |   end_points = {}
114 | 
115 |   with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse):
116 |     with slim.arg_scope([slim.batch_norm, slim.dropout],
117 |                         is_training=is_training):
118 |       with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
119 |                           stride=1, padding='SAME'):
120 | 
121 |         # 149 x 149 x 32
122 |         net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID',
123 |                           scope='Conv2d_1a_3x3')
124 |         end_points['Conv2d_1a_3x3'] = net
125 |         # 147 x 147 x 32
126 |         net = slim.conv2d(net, 32, 3, padding='VALID',
127 |                           scope='Conv2d_2a_3x3')
128 |         end_points['Conv2d_2a_3x3'] = net
129 |         # 147 x 147 x 64
130 |         net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
131 |         end_points['Conv2d_2b_3x3'] = net
132 |         # 73 x 73 x 64
133 |         net = slim.max_pool2d(net, 3, stride=2, padding='VALID',
134 |                               scope='MaxPool_3a_3x3')
135 |         end_points['MaxPool_3a_3x3'] = net
136 |         # 73 x 73 x 80
137 |         net = slim.conv2d(net, 80, 1, padding='VALID',
138 |                           scope='Conv2d_3b_1x1')
139 |         end_points['Conv2d_3b_1x1'] = net
140 |         # 71 x 71 x 192
141 |         net = slim.conv2d(net, 192, 3, padding='VALID',
142 |                           scope='Conv2d_4a_3x3')
143 |         end_points['Conv2d_4a_3x3'] = net
144 |         # 35 x 35 x 192
145 |         net = slim.max_pool2d(net, 3, stride=2, padding='VALID',
146 |                               scope='MaxPool_5a_3x3')
147 |         end_points['MaxPool_5a_3x3'] = net
148 | 
149 |         # 35 x 35 x 320
150 |         with tf.variable_scope('Mixed_5b'):
151 |           with tf.variable_scope('Branch_0'):
152 |             tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1')
153 |           with tf.variable_scope('Branch_1'):
154 |             tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1')
155 |             tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5,
156 |                                         scope='Conv2d_0b_5x5')
157 |           with tf.variable_scope('Branch_2'):
158 |             tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1')
159 |             tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3,
160 |                                         scope='Conv2d_0b_3x3')
161 |             tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3,
162 |                                         scope='Conv2d_0c_3x3')
163 |           with tf.variable_scope('Branch_3'):
164 |             tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME',
165 |                                          scope='AvgPool_0a_3x3')
166 |             tower_pool_1 = slim.conv2d(tower_pool, 64, 1,
167 |                                        scope='Conv2d_0b_1x1')
168 |           net = tf.concat(3, [tower_conv, tower_conv1_1,
169 |                               tower_conv2_2, tower_pool_1])
170 | 
171 |         end_points['Mixed_5b'] = net
172 |         net = slim.repeat(net, 10, block35, scale=0.17)
173 | 
174 |         # 17 x 17 x 1024
175 |         with tf.variable_scope('Mixed_6a'):
176 |           with tf.variable_scope('Branch_0'):
177 |             tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID',
178 |                                      scope='Conv2d_1a_3x3')
179 |           with tf.variable_scope('Branch_1'):
180 |             tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
181 |             tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3,
182 |                                         scope='Conv2d_0b_3x3')
183 |             tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3,
184 |                                         stride=2, padding='VALID',
185 |                                         scope='Conv2d_1a_3x3')
186 |           with tf.variable_scope('Branch_2'):
187 |             tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
188 |                                          scope='MaxPool_1a_3x3')
189 |           net = tf.concat(3, [tower_conv, tower_conv1_2, tower_pool])
190 | 
191 |         end_points['Mixed_6a'] = net
192 |         net = slim.repeat(net, 20, block17, scale=0.10)
193 | 
194 |         # Auxillary tower
195 |         with tf.variable_scope('AuxLogits'):
196 |           aux = slim.avg_pool2d(net, 5, stride=3, padding='VALID',
197 |                                 scope='Conv2d_1a_3x3')
198 |           aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1')
199 |           aux = slim.conv2d(aux, 768, aux.get_shape()[1:3],
200 |                             padding='VALID', scope='Conv2d_2a_5x5')
201 |           aux = slim.flatten(aux)
202 |           aux = slim.fully_connected(aux, num_classes, activation_fn=None,
203 |                                      scope='Logits')
204 |           end_points['AuxLogits'] = aux
205 | 
206 |         with tf.variable_scope('Mixed_7a'):
207 |           with tf.variable_scope('Branch_0'):
208 |             tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
209 |             tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2,
210 |                                        padding='VALID', scope='Conv2d_1a_3x3')
211 |           with tf.variable_scope('Branch_1'):
212 |             tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
213 |             tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2,
214 |                                         padding='VALID', scope='Conv2d_1a_3x3')
215 |           with tf.variable_scope('Branch_2'):
216 |             tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
217 |             tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3,
218 |                                         scope='Conv2d_0b_3x3')
219 |             tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2,
220 |                                         padding='VALID', scope='Conv2d_1a_3x3')
221 |           with tf.variable_scope('Branch_3'):
222 |             tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
223 |                                          scope='MaxPool_1a_3x3')
224 |           net = tf.concat(3, [tower_conv_1, tower_conv1_1,
225 |                               tower_conv2_2, tower_pool])
226 | 
227 |         end_points['Mixed_7a'] = net
228 | 
229 |         net = slim.repeat(net, 9, block8, scale=0.20)
230 |         net = block8(net, activation_fn=None)
231 | 
232 |         net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1')
233 |         end_points['Conv2d_7b_1x1'] = net
234 | 
235 |         with tf.variable_scope('Logits'):
236 |           end_points['PrePool'] = net
237 |           net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID',
238 |                                 scope='AvgPool_1a_8x8')
239 |           net = slim.flatten(net)
240 | 
241 |           net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
242 |                              scope='Dropout')
243 | 
244 |           end_points['PreLogitsFlatten'] = net
245 |           logits = slim.fully_connected(net, num_classes, activation_fn=None,
246 |                                         scope='Logits')
247 |           end_points['Logits'] = logits
248 |           end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')
249 | 
250 |     return logits, end_points
251 | inception_resnet_v2.default_image_size = 299
252 | 
253 | 
254 | def inception_resnet_v2_arg_scope(weight_decay=0.00004,
255 |                                   batch_norm_decay=0.9997,
256 |                                   batch_norm_epsilon=0.001):
257 |   """Yields the scope with the default parameters for inception_resnet_v2.
258 | 
259 |   Args:
260 |     weight_decay: the weight decay for weights variables.
261 |     batch_norm_decay: decay for the moving average of batch_norm momentums.
262 |     batch_norm_epsilon: small float added to variance to avoid dividing by zero.
263 | 
264 |   Returns:
265 |     a arg_scope with the parameters needed for inception_resnet_v2.
266 |   """
267 |   # Set weight_decay for weights in conv2d and fully_connected layers.
268 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
269 |                       weights_regularizer=slim.l2_regularizer(weight_decay),
270 |                       biases_regularizer=slim.l2_regularizer(weight_decay)):
271 | 
272 |     batch_norm_params = {
273 |         'decay': batch_norm_decay,
274 |         'epsilon': batch_norm_epsilon,
275 |     }
276 |     # Set activation_fn and parameters for batch_norm.
277 |     with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu,
278 |                         normalizer_fn=slim.batch_norm,
279 |                         normalizer_params=batch_norm_params) as scope:
280 |       return scope
281 | 


--------------------------------------------------------------------------------
/nets/nets_factory.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains a factory for building various models.
16 | """
17 | 
18 | import functools
19 | import tensorflow as tf
20 | 
21 | # from nets import inception
22 | # from nets import overfeat
23 | # from nets import resnet_v1
24 | # from nets import resnet_v2
25 | from nets import vgg
26 | # from nets import xception
27 | 
28 | from nets import ssd_vgg_300
29 | from nets import ssd_vgg_512
30 | 
31 | slim = tf.contrib.slim
32 | 
33 | networks_map = {'vgg_a': vgg.vgg_a,
34 |                 'vgg_16': vgg.vgg_16,
35 |                 'vgg_19': vgg.vgg_19,
36 |                 'ssd_300_vgg': ssd_vgg_300.ssd_net,
37 |                 'ssd_300_vgg_caffe': ssd_vgg_300.ssd_net,
38 |                 'ssd_512_vgg': ssd_vgg_512.ssd_net,
39 |                 'ssd_512_vgg_caffe': ssd_vgg_512.ssd_net,
40 |                 }
41 | 
42 | arg_scopes_map = {'vgg_a': vgg.vgg_arg_scope,
43 |                   'vgg_16': vgg.vgg_arg_scope,
44 |                   'vgg_19': vgg.vgg_arg_scope,
45 |                   'ssd_300_vgg': ssd_vgg_300.ssd_arg_scope,
46 |                   'ssd_300_vgg_caffe': ssd_vgg_300.ssd_arg_scope_caffe,
47 |                   'ssd_512_vgg': ssd_vgg_512.ssd_arg_scope,
48 |                   'ssd_512_vgg_caffe': ssd_vgg_512.ssd_arg_scope_caffe,
49 |                   }
50 | 
51 | networks_obj = {'ssd_300_vgg': ssd_vgg_300.SSDNet,
52 |                 'ssd_512_vgg': ssd_vgg_512.SSDNet,
53 |                 }
54 | 
55 | 
56 | def get_network(name):
57 |     """Get a network object from a name.
58 |     """
59 |     # params = networks_obj[name].default_params if params is None else params
60 |     return networks_obj[name]
61 | 
62 | 
63 | def get_network_fn(name, num_classes, is_training=False, **kwargs):
64 |     """Returns a network_fn such as `logits, end_points = network_fn(images)`.
65 | 
66 |     Args:
67 |       name: The name of the network.
68 |       num_classes: The number of classes to use for classification.
69 |       is_training: `True` if the model is being used for training and `False`
70 |         otherwise.
71 |       weight_decay: The l2 coefficient for the model weights.
72 |     Returns:
73 |       network_fn: A function that applies the model to a batch of images. It has
74 |         the following signature: logits, end_points = network_fn(images)
75 |     Raises:
76 |       ValueError: If network `name` is not recognized.
77 |     """
78 |     if name not in networks_map:
79 |         raise ValueError('Name of network unknown %s' % name)
80 |     arg_scope = arg_scopes_map[name](**kwargs)
81 |     func = networks_map[name]
82 |     @functools.wraps(func)
83 |     def network_fn(images, **kwargs):
84 |         with slim.arg_scope(arg_scope):
85 |             return func(images, num_classes, is_training=is_training, **kwargs)
86 |     if hasattr(func, 'default_image_size'):
87 |         network_fn.default_image_size = func.default_image_size
88 | 
89 |     return network_fn
90 | 


--------------------------------------------------------------------------------
/nets/np_methods.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 Paul Balanca. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Additional Numpy methods. Big mess of many things!
 16 | """
 17 | import numpy as np
 18 | 
 19 | 
 20 | # =========================================================================== #
 21 | # Numpy implementations of SSD boxes functions.
 22 | # =========================================================================== #
 23 | def ssd_bboxes_decode(feat_localizations,
 24 |                       anchor_bboxes,
 25 |                       prior_scaling=[0.1, 0.1, 0.2, 0.2]):
 26 |     """Compute the relative bounding boxes from the layer features and
 27 |     reference anchor bounding boxes.
 28 | 
 29 |     Return:
 30 |       numpy array Nx4: ymin, xmin, ymax, xmax
 31 |     """
 32 |     # Reshape for easier broadcasting.
 33 |     l_shape = feat_localizations.shape
 34 |     feat_localizations = np.reshape(feat_localizations,
 35 |                                     (-1, l_shape[-2], l_shape[-1]))
 36 |     yref, xref, href, wref = anchor_bboxes
 37 |     xref = np.reshape(xref, [-1, 1])
 38 |     yref = np.reshape(yref, [-1, 1])
 39 | 
 40 |     # Compute center, height and width
 41 |     cx = feat_localizations[:, :, 0] * wref * prior_scaling[0] + xref
 42 |     cy = feat_localizations[:, :, 1] * href * prior_scaling[1] + yref
 43 |     w = wref * np.exp(feat_localizations[:, :, 2] * prior_scaling[2])
 44 |     h = href * np.exp(feat_localizations[:, :, 3] * prior_scaling[3])
 45 |     # bboxes: ymin, xmin, xmax, ymax.
 46 |     bboxes = np.zeros_like(feat_localizations)
 47 |     bboxes[:, :, 0] = cy - h / 2.
 48 |     bboxes[:, :, 1] = cx - w / 2.
 49 |     bboxes[:, :, 2] = cy + h / 2.
 50 |     bboxes[:, :, 3] = cx + w / 2.
 51 |     # Back to original shape.
 52 |     bboxes = np.reshape(bboxes, l_shape)
 53 |     return bboxes
 54 | 
 55 | 
 56 | def ssd_bboxes_select_layer(predictions_layer,
 57 |                             localizations_layer,
 58 |                             anchors_layer,
 59 |                             select_threshold=0.5,
 60 |                             img_shape=(300, 300),
 61 |                             num_classes=21,
 62 |                             decode=True):
 63 |     """Extract classes, scores and bounding boxes from features in one layer.
 64 | 
 65 |     Return:
 66 |       classes, scores, bboxes: Numpy arrays...
 67 |     """
 68 |     # First decode localizations features if necessary.
 69 |     if decode:
 70 |         localizations_layer = ssd_bboxes_decode(localizations_layer, anchors_layer)
 71 | 
 72 |     # Reshape features to: Batches x N x N_labels | 4.
 73 |     p_shape = predictions_layer.shape
 74 |     batch_size = p_shape[0] if len(p_shape) == 5 else 1
 75 |     predictions_layer = np.reshape(predictions_layer,
 76 |                                    (batch_size, -1, p_shape[-1]))
 77 |     l_shape = localizations_layer.shape
 78 |     localizations_layer = np.reshape(localizations_layer,
 79 |                                      (batch_size, -1, l_shape[-1]))
 80 | 
 81 |     # Boxes selection: use threshold or score > no-label criteria.
 82 |     if select_threshold is None or select_threshold == 0:
 83 |         # Class prediction and scores: assign 0. to 0-class
 84 |         classes = np.argmax(predictions_layer, axis=2)
 85 |         scores = np.amax(predictions_layer, axis=2)
 86 |         mask = (classes > 0)
 87 |         classes = classes[mask]
 88 |         scores = scores[mask]
 89 |         bboxes = localizations_layer[mask]
 90 |     else:
 91 |         sub_predictions = predictions_layer[:, :, 1:]
 92 |         idxes = np.where(sub_predictions > select_threshold)
 93 |         classes = idxes[-1]+1
 94 |         scores = sub_predictions[idxes]
 95 |         bboxes = localizations_layer[idxes[:-1]]
 96 | 
 97 |     return classes, scores, bboxes
 98 | 
 99 | 
100 | def ssd_bboxes_select(predictions_net,
101 |                       localizations_net,
102 |                       anchors_net,
103 |                       select_threshold=0.5,
104 |                       img_shape=(300, 300),
105 |                       num_classes=21,
106 |                       decode=True):
107 |     """Extract classes, scores and bounding boxes from network output layers.
108 | 
109 |     Return:
110 |       classes, scores, bboxes: Numpy arrays...
111 |     """
112 |     l_classes = []
113 |     l_scores = []
114 |     l_bboxes = []
115 |     # l_layers = []
116 |     # l_idxes = []
117 |     for i in range(len(predictions_net)):
118 |         classes, scores, bboxes = ssd_bboxes_select_layer(
119 |             predictions_net[i], localizations_net[i], anchors_net[i],
120 |             select_threshold, img_shape, num_classes, decode)
121 |         l_classes.append(classes)
122 |         l_scores.append(scores)
123 |         l_bboxes.append(bboxes)
124 |         # Debug information.
125 |         # l_layers.append(i)
126 |         # l_idxes.append((i, idxes))
127 | 
128 |     classes = np.concatenate(l_classes, 0)
129 |     scores = np.concatenate(l_scores, 0)
130 |     bboxes = np.concatenate(l_bboxes, 0)
131 |     return classes, scores, bboxes
132 | 
133 | 
134 | # =========================================================================== #
135 | # Common functions for bboxes handling and selection.
136 | # =========================================================================== #
137 | def bboxes_sort(classes, scores, bboxes, top_k=400):
138 |     """Sort bounding boxes by decreasing order and keep only the top_k
139 |     """
140 |     # if priority_inside:
141 |     #     inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \
142 |     #         (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin)
143 |     #     idxes = np.argsort(-scores)
144 |     #     inside = inside[idxes]
145 |     #     idxes = np.concatenate([idxes[inside], idxes[~inside]])
146 |     idxes = np.argsort(-scores)
147 |     classes = classes[idxes][:top_k]
148 |     scores = scores[idxes][:top_k]
149 |     bboxes = bboxes[idxes][:top_k]
150 |     return classes, scores, bboxes
151 | 
152 | 
153 | def bboxes_clip(bbox_ref, bboxes):
154 |     """Clip bounding boxes with respect to reference bbox.
155 |     """
156 |     bboxes = np.copy(bboxes)
157 |     bboxes = np.transpose(bboxes)
158 |     bbox_ref = np.transpose(bbox_ref)
159 |     bboxes[0] = np.maximum(bboxes[0], bbox_ref[0])
160 |     bboxes[1] = np.maximum(bboxes[1], bbox_ref[1])
161 |     bboxes[2] = np.minimum(bboxes[2], bbox_ref[2])
162 |     bboxes[3] = np.minimum(bboxes[3], bbox_ref[3])
163 |     bboxes = np.transpose(bboxes)
164 |     return bboxes
165 | 
166 | 
167 | def bboxes_resize(bbox_ref, bboxes):
168 |     """Resize bounding boxes based on a reference bounding box,
169 |     assuming that the latter is [0, 0, 1, 1] after transform.
170 |     """
171 |     bboxes = np.copy(bboxes)
172 |     # Translate.
173 |     bboxes[:, 0] -= bbox_ref[0]
174 |     bboxes[:, 1] -= bbox_ref[1]
175 |     bboxes[:, 2] -= bbox_ref[0]
176 |     bboxes[:, 3] -= bbox_ref[1]
177 |     # Resize.
178 |     resize = [bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1]]
179 |     bboxes[:, 0] /= resize[0]
180 |     bboxes[:, 1] /= resize[1]
181 |     bboxes[:, 2] /= resize[0]
182 |     bboxes[:, 3] /= resize[1]
183 |     return bboxes
184 | 
185 | 
186 | def bboxes_jaccard(bboxes1, bboxes2):
187 |     """Computing jaccard index between bboxes1 and bboxes2.
188 |     Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.
189 |     """
190 |     bboxes1 = np.transpose(bboxes1)
191 |     bboxes2 = np.transpose(bboxes2)
192 |     # Intersection bbox and volume.
193 |     int_ymin = np.maximum(bboxes1[0], bboxes2[0])
194 |     int_xmin = np.maximum(bboxes1[1], bboxes2[1])
195 |     int_ymax = np.minimum(bboxes1[2], bboxes2[2])
196 |     int_xmax = np.minimum(bboxes1[3], bboxes2[3])
197 | 
198 |     int_h = np.maximum(int_ymax - int_ymin, 0.)
199 |     int_w = np.maximum(int_xmax - int_xmin, 0.)
200 |     int_vol = int_h * int_w
201 |     # Union volume.
202 |     vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1])
203 |     vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1])
204 |     jaccard = int_vol / (vol1 + vol2 - int_vol)
205 |     return jaccard
206 | 
207 | 
208 | def bboxes_intersection(bboxes_ref, bboxes2):
209 |     """Computing jaccard index between bboxes1 and bboxes2.
210 |     Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.
211 |     """
212 |     bboxes_ref = np.transpose(bboxes_ref)
213 |     bboxes2 = np.transpose(bboxes2)
214 |     # Intersection bbox and volume.
215 |     int_ymin = np.maximum(bboxes_ref[0], bboxes2[0])
216 |     int_xmin = np.maximum(bboxes_ref[1], bboxes2[1])
217 |     int_ymax = np.minimum(bboxes_ref[2], bboxes2[2])
218 |     int_xmax = np.minimum(bboxes_ref[3], bboxes2[3])
219 | 
220 |     int_h = np.maximum(int_ymax - int_ymin, 0.)
221 |     int_w = np.maximum(int_xmax - int_xmin, 0.)
222 |     int_vol = int_h * int_w
223 |     # Union volume.
224 |     vol = (bboxes_ref[2] - bboxes_ref[0]) * (bboxes_ref[3] - bboxes_ref[1])
225 |     score = int_vol / vol
226 |     return score
227 | 
228 | 
229 | def bboxes_nms(classes, scores, bboxes, nms_threshold=0.45):
230 |     """Apply non-maximum selection to bounding boxes.
231 |     """
232 |     keep_bboxes = np.ones(scores.shape, dtype=np.bool)
233 |     for i in range(scores.size-1):
234 |         if keep_bboxes[i]:
235 |             # Computer overlap with bboxes which are following.
236 |             overlap = bboxes_jaccard(bboxes[i], bboxes[(i+1):])
237 |             # Overlap threshold for keeping + checking part of the same class
238 |             keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i])
239 |             keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap)
240 | 
241 |     idxes = np.where(keep_bboxes)
242 |     return classes[idxes], scores[idxes], bboxes[idxes]
243 | 
244 | 
245 | def bboxes_nms_fast(classes, scores, bboxes, threshold=0.45):
246 |     """Apply non-maximum selection to bounding boxes.
247 |     """
248 |     pass
249 | 
250 | 
251 | 
252 | 
253 | 


--------------------------------------------------------------------------------
/nets/vgg.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains model definitions for versions of the Oxford VGG network.
 16 | 
 17 | These model definitions were introduced in the following technical report:
 18 | 
 19 |   Very Deep Convolutional Networks For Large-Scale Image Recognition
 20 |   Karen Simonyan and Andrew Zisserman
 21 |   arXiv technical report, 2015
 22 |   PDF: http://arxiv.org/pdf/1409.1556.pdf
 23 |   ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf
 24 |   CC-BY-4.0
 25 | 
 26 | More information can be obtained from the VGG website:
 27 | www.robots.ox.ac.uk/~vgg/research/very_deep/
 28 | 
 29 | Usage:
 30 |   with slim.arg_scope(vgg.vgg_arg_scope()):
 31 |     outputs, end_points = vgg.vgg_a(inputs)
 32 | 
 33 |   with slim.arg_scope(vgg.vgg_arg_scope()):
 34 |     outputs, end_points = vgg.vgg_16(inputs)
 35 | 
 36 | @@vgg_a
 37 | @@vgg_16
 38 | @@vgg_19
 39 | """
 40 | from __future__ import absolute_import
 41 | from __future__ import division
 42 | from __future__ import print_function
 43 | 
 44 | import tensorflow as tf
 45 | 
 46 | slim = tf.contrib.slim
 47 | 
 48 | 
 49 | def vgg_arg_scope(weight_decay=0.0005):
 50 |   """Defines the VGG arg scope.
 51 | 
 52 |   Args:
 53 |     weight_decay: The l2 regularization coefficient.
 54 | 
 55 |   Returns:
 56 |     An arg_scope.
 57 |   """
 58 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
 59 |                       activation_fn=tf.nn.relu,
 60 |                       weights_regularizer=slim.l2_regularizer(weight_decay),
 61 |                       biases_initializer=tf.zeros_initializer):
 62 |     with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc:
 63 |       return arg_sc
 64 | 
 65 | 
 66 | def vgg_a(inputs,
 67 |           num_classes=1000,
 68 |           is_training=True,
 69 |           dropout_keep_prob=0.5,
 70 |           spatial_squeeze=True,
 71 |           scope='vgg_a'):
 72 |   """Oxford Net VGG 11-Layers version A Example.
 73 | 
 74 |   Note: All the fully_connected layers have been transformed to conv2d layers.
 75 |         To use in classification mode, resize input to 224x224.
 76 | 
 77 |   Args:
 78 |     inputs: a tensor of size [batch_size, height, width, channels].
 79 |     num_classes: number of predicted classes.
 80 |     is_training: whether or not the model is being trained.
 81 |     dropout_keep_prob: the probability that activations are kept in the dropout
 82 |       layers during training.
 83 |     spatial_squeeze: whether or not should squeeze the spatial dimensions of the
 84 |       outputs. Useful to remove unnecessary dimensions for classification.
 85 |     scope: Optional scope for the variables.
 86 | 
 87 |   Returns:
 88 |     the last op containing the log predictions and end_points dict.
 89 |   """
 90 |   with tf.variable_scope(scope, 'vgg_a', [inputs]) as sc:
 91 |     end_points_collection = sc.name + '_end_points'
 92 |     # Collect outputs for conv2d, fully_connected and max_pool2d.
 93 |     with slim.arg_scope([slim.conv2d, slim.max_pool2d],
 94 |                         outputs_collections=end_points_collection):
 95 |       net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1')
 96 |       net = slim.max_pool2d(net, [2, 2], scope='pool1')
 97 |       net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2')
 98 |       net = slim.max_pool2d(net, [2, 2], scope='pool2')
 99 |       net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3')
100 |       net = slim.max_pool2d(net, [2, 2], scope='pool3')
101 |       net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4')
102 |       net = slim.max_pool2d(net, [2, 2], scope='pool4')
103 |       net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')
104 |       net = slim.max_pool2d(net, [2, 2], scope='pool5')
105 |       # Use conv2d instead of fully_connected layers.
106 |       net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
107 |       net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
108 |                          scope='dropout6')
109 |       net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
110 |       net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
111 |                          scope='dropout7')
112 |       net = slim.conv2d(net, num_classes, [1, 1],
113 |                         activation_fn=None,
114 |                         normalizer_fn=None,
115 |                         scope='fc8')
116 |       # Convert end_points_collection into a end_point dict.
117 |       end_points = slim.utils.convert_collection_to_dict(end_points_collection)
118 |       if spatial_squeeze:
119 |         net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
120 |         end_points[sc.name + '/fc8'] = net
121 |       return net, end_points
122 | vgg_a.default_image_size = 224
123 | 
124 | 
125 | def vgg_16(inputs,
126 |            num_classes=1000,
127 |            is_training=True,
128 |            dropout_keep_prob=0.5,
129 |            spatial_squeeze=True,
130 |            scope='vgg_16'):
131 |   """Oxford Net VGG 16-Layers version D Example.
132 | 
133 |   Note: All the fully_connected layers have been transformed to conv2d layers.
134 |         To use in classification mode, resize input to 224x224.
135 | 
136 |   Args:
137 |     inputs: a tensor of size [batch_size, height, width, channels].
138 |     num_classes: number of predicted classes.
139 |     is_training: whether or not the model is being trained.
140 |     dropout_keep_prob: the probability that activations are kept in the dropout
141 |       layers during training.
142 |     spatial_squeeze: whether or not should squeeze the spatial dimensions of the
143 |       outputs. Useful to remove unnecessary dimensions for classification.
144 |     scope: Optional scope for the variables.
145 | 
146 |   Returns:
147 |     the last op containing the log predictions and end_points dict.
148 |   """
149 |   with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc:
150 |     end_points_collection = sc.name + '_end_points'
151 |     # Collect outputs for conv2d, fully_connected and max_pool2d.
152 |     with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
153 |                         outputs_collections=end_points_collection):
154 |       net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
155 |       net = slim.max_pool2d(net, [2, 2], scope='pool1')
156 |       net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
157 |       net = slim.max_pool2d(net, [2, 2], scope='pool2')
158 |       net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
159 |       net = slim.max_pool2d(net, [2, 2], scope='pool3')
160 |       net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
161 |       net = slim.max_pool2d(net, [2, 2], scope='pool4')
162 |       net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
163 |       net = slim.max_pool2d(net, [2, 2], scope='pool5')
164 |       # Use conv2d instead of fully_connected layers.
165 |       net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
166 |       net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
167 |                          scope='dropout6')
168 |       net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
169 |       net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
170 |                          scope='dropout7')
171 |       net = slim.conv2d(net, num_classes, [1, 1],
172 |                         activation_fn=None,
173 |                         normalizer_fn=None,
174 |                         scope='fc8')
175 |       # Convert end_points_collection into a end_point dict.
176 |       end_points = slim.utils.convert_collection_to_dict(end_points_collection)
177 |       if spatial_squeeze:
178 |         net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
179 |         end_points[sc.name + '/fc8'] = net
180 |       return net, end_points
181 | vgg_16.default_image_size = 224
182 | 
183 | 
184 | def vgg_19(inputs,
185 |            num_classes=1000,
186 |            is_training=True,
187 |            dropout_keep_prob=0.5,
188 |            spatial_squeeze=True,
189 |            scope='vgg_19'):
190 |   """Oxford Net VGG 19-Layers version E Example.
191 | 
192 |   Note: All the fully_connected layers have been transformed to conv2d layers.
193 |         To use in classification mode, resize input to 224x224.
194 | 
195 |   Args:
196 |     inputs: a tensor of size [batch_size, height, width, channels].
197 |     num_classes: number of predicted classes.
198 |     is_training: whether or not the model is being trained.
199 |     dropout_keep_prob: the probability that activations are kept in the dropout
200 |       layers during training.
201 |     spatial_squeeze: whether or not should squeeze the spatial dimensions of the
202 |       outputs. Useful to remove unnecessary dimensions for classification.
203 |     scope: Optional scope for the variables.
204 | 
205 |   Returns:
206 |     the last op containing the log predictions and end_points dict.
207 |   """
208 |   with tf.variable_scope(scope, 'vgg_19', [inputs]) as sc:
209 |     end_points_collection = sc.name + '_end_points'
210 |     # Collect outputs for conv2d, fully_connected and max_pool2d.
211 |     with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
212 |                         outputs_collections=end_points_collection):
213 |       net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
214 |       net = slim.max_pool2d(net, [2, 2], scope='pool1')
215 |       net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
216 |       net = slim.max_pool2d(net, [2, 2], scope='pool2')
217 |       net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3')
218 |       net = slim.max_pool2d(net, [2, 2], scope='pool3')
219 |       net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4')
220 |       net = slim.max_pool2d(net, [2, 2], scope='pool4')
221 |       net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5')
222 |       net = slim.max_pool2d(net, [2, 2], scope='pool5')
223 |       # Use conv2d instead of fully_connected layers.
224 |       net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
225 |       net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
226 |                          scope='dropout6')
227 |       net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
228 |       net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
229 |                          scope='dropout7')
230 |       net = slim.conv2d(net, num_classes, [1, 1],
231 |                         activation_fn=None,
232 |                         normalizer_fn=None,
233 |                         scope='fc8')
234 |       # Convert end_points_collection into a end_point dict.
235 |       end_points = slim.utils.convert_collection_to_dict(end_points_collection)
236 |       if spatial_squeeze:
237 |         net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
238 |         end_points[sc.name + '/fc8'] = net
239 |       return net, end_points
240 | vgg_19.default_image_size = 224
241 | 
242 | # Alias
243 | vgg_d = vgg_16
244 | vgg_e = vgg_19
245 | 


--------------------------------------------------------------------------------
/nets/xception.py:
--------------------------------------------------------------------------------
  1 | """Definition of Xception model introduced by F. Chollet.
  2 | 
  3 | Usage:
  4 |   with slim.arg_scope(xception.xception_arg_scope()):
  5 |     outputs, end_points = xception.xception(inputs)
  6 | @@xception
  7 | """
  8 | 
  9 | import tensorflow as tf
 10 | slim = tf.contrib.slim
 11 | 
 12 | 
 13 | # =========================================================================== #
 14 | # Xception implementation (clean)
 15 | # =========================================================================== #
 16 | def xception(inputs,
 17 |              num_classes=1000,
 18 |              is_training=True,
 19 |              dropout_keep_prob=0.5,
 20 |              prediction_fn=slim.softmax,
 21 |              reuse=None,
 22 |              scope='xception'):
 23 |     """Xception model from https://arxiv.org/pdf/1610.02357v2.pdf
 24 | 
 25 |     The default image size used to train this network is 299x299.
 26 |     """
 27 | 
 28 |     # end_points collect relevant activations for external use, for example
 29 |     # summaries or losses.
 30 |     end_points = {}
 31 | 
 32 |     with tf.variable_scope(scope, 'xception', [inputs]):
 33 |         # Block 1.
 34 |         end_point = 'block1'
 35 |         with tf.variable_scope(end_point):
 36 |             net = slim.conv2d(inputs, 32, [3, 3], stride=2, padding='VALID', scope='conv1')
 37 |             net = slim.conv2d(net, 64, [3, 3], padding='VALID', scope='conv2')
 38 |         end_points[end_point] = net
 39 | 
 40 |         # Residual block 2.
 41 |         end_point = 'block2'
 42 |         with tf.variable_scope(end_point):
 43 |             res = slim.conv2d(net, 128, [1, 1], stride=2, activation_fn=None, scope='res')
 44 |             net = slim.separable_convolution2d(net, 128, [3, 3], 1, scope='sepconv1')
 45 |             net = slim.separable_convolution2d(net, 128, [3, 3], 1, activation_fn=None, scope='sepconv2')
 46 |             net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool')
 47 |             net = res + net
 48 |         end_points[end_point] = net
 49 | 
 50 |         # Residual block 3.
 51 |         end_point = 'block3'
 52 |         with tf.variable_scope(end_point):
 53 |             res = slim.conv2d(net, 256, [1, 1], stride=2, activation_fn=None, scope='res')
 54 |             net = tf.nn.relu(net)
 55 |             net = slim.separable_convolution2d(net, 256, [3, 3], 1, scope='sepconv1')
 56 |             net = slim.separable_convolution2d(net, 256, [3, 3], 1, activation_fn=None, scope='sepconv2')
 57 |             net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool')
 58 |             net = res + net
 59 |         end_points[end_point] = net
 60 | 
 61 |         # Residual block 4.
 62 |         end_point = 'block4'
 63 |         with tf.variable_scope(end_point):
 64 |             res = slim.conv2d(net, 728, [1, 1], stride=2, activation_fn=None, scope='res')
 65 |             net = tf.nn.relu(net)
 66 |             net = slim.separable_convolution2d(net, 728, [3, 3], 1, scope='sepconv1')
 67 |             net = slim.separable_convolution2d(net, 728, [3, 3], 1, activation_fn=None, scope='sepconv2')
 68 |             net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool')
 69 |             net = res + net
 70 |         end_points[end_point] = net
 71 | 
 72 |         # Middle flow blocks.
 73 |         for i in range(8):
 74 |             end_point = 'block' + str(i + 5)
 75 |             with tf.variable_scope(end_point):
 76 |                 res = net
 77 |                 net = tf.nn.relu(net)
 78 |                 net = slim.separable_convolution2d(net, 728, [3, 3], 1, activation_fn=None,
 79 |                                                    scope='sepconv1')
 80 |                 net = tf.nn.relu(net)
 81 |                 net = slim.separable_convolution2d(net, 728, [3, 3], 1, activation_fn=None,
 82 |                                                    scope='sepconv2')
 83 |                 net = tf.nn.relu(net)
 84 |                 net = slim.separable_convolution2d(net, 728, [3, 3], 1, activation_fn=None,
 85 |                                                    scope='sepconv3')
 86 |                 net = res + net
 87 |             end_points[end_point] = net
 88 | 
 89 |         # Exit flow: blocks 13 and 14.
 90 |         end_point = 'block13'
 91 |         with tf.variable_scope(end_point):
 92 |             res = slim.conv2d(net, 1024, [1, 1], stride=2, activation_fn=None, scope='res')
 93 |             net = tf.nn.relu(net)
 94 |             net = slim.separable_convolution2d(net, 728, [3, 3], 1, activation_fn=None, scope='sepconv1')
 95 |             net = tf.nn.relu(net)
 96 |             net = slim.separable_convolution2d(net, 1024, [3, 3], 1, activation_fn=None, scope='sepconv2')
 97 |             net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool')
 98 |             net = res + net
 99 |         end_points[end_point] = net
100 | 
101 |         end_point = 'block14'
102 |         with tf.variable_scope(end_point):
103 |             net = slim.separable_convolution2d(net, 1536, [3, 3], 1, scope='sepconv1')
104 |             net = slim.separable_convolution2d(net, 2048, [3, 3], 1, scope='sepconv2')
105 |         end_points[end_point] = net
106 | 
107 |         # Global averaging.
108 |         end_point = 'dense'
109 |         with tf.variable_scope(end_point):
110 |             net = tf.reduce_mean(net, [1, 2], name='reduce_avg')
111 |             logits = slim.fully_connected(net, 1000, activation_fn=None)
112 | 
113 |             end_points['logits'] = logits
114 |             end_points['predictions'] = prediction_fn(logits, scope='Predictions')
115 | 
116 |         return logits, end_points
117 | xception.default_image_size = 299
118 | 
119 | 
120 | def xception_arg_scope(weight_decay=0.00001, stddev=0.1):
121 |     """Defines the default Xception arg scope.
122 | 
123 |     Args:
124 |       weight_decay: The weight decay to use for regularizing the model.
125 |       stddev: The standard deviation of the trunctated normal weight initializer.
126 | 
127 |     Returns:
128 |       An `arg_scope` to use for the xception model.
129 |     """
130 |     batch_norm_params = {
131 |       # Decay for the moving averages.
132 |       'decay': 0.9997,
133 |       # epsilon to prevent 0s in variance.
134 |       'epsilon': 0.001,
135 |       # collection containing update_ops.
136 |       'updates_collections': tf.GraphKeys.UPDATE_OPS,
137 |     }
138 | 
139 |     # Set weight_decay for weights in Conv and FC layers.
140 |     with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.separable_convolution2d],
141 |                         weights_regularizer=slim.l2_regularizer(weight_decay)):
142 |         with slim.arg_scope(
143 |                 [slim.conv2d, slim.separable_convolution2d],
144 |                 padding='SAME',
145 |                 weights_initializer=tf.contrib.layers.variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False),
146 |                 activation_fn=tf.nn.relu,
147 |                 normalizer_fn=slim.batch_norm,
148 |                 normalizer_params=batch_norm_params):
149 |             with slim.arg_scope([slim.max_pool2d], padding='SAME') as sc:
150 |                 return sc
151 | 
152 | 
153 | # =========================================================================== #
154 | # Xception arg scope (Keras hack!)
155 | # =========================================================================== #
156 | def xception_keras_arg_scope(hdf5_file, weight_decay=0.00001):
157 |     """Defines an Xception arg scope which initialize layers weights
158 |     using a Keras HDF5 file.
159 | 
160 |     Quite hacky implementaion, but seems to be working!
161 | 
162 |     Args:
163 |       hdf5_file: HDF5 file handle.
164 |       weight_decay: The weight decay to use for regularizing the model.
165 | 
166 |     Returns:
167 |       An `arg_scope` to use for the xception model.
168 |     """
169 |     # Default batch normalization parameters.
170 |     batch_norm_params = {
171 |         'center': True,
172 |         'scale': False,
173 |         'decay': 0.9997,
174 |         'epsilon': 0.001,
175 |         'updates_collections': tf.GraphKeys.UPDATE_OPS,
176 |     }
177 | 
178 |     # Read weights from HDF5 file.
179 |     def keras_bn_params():
180 |         def _beta_initializer(shape, dtype, partition_info=None):
181 |             keras_bn_params.bidx += 1
182 |             k = 'batchnormalization_%i' % keras_bn_params.bidx
183 |             kb = 'batchnormalization_%i_beta:0' % keras_bn_params.bidx
184 |             return tf.cast(hdf5_file[k][kb][:], dtype)
185 | 
186 |         def _gamma_initializer(shape, dtype, partition_info=None):
187 |             keras_bn_params.gidx += 1
188 |             k = 'batchnormalization_%i' % keras_bn_params.gidx
189 |             kg = 'batchnormalization_%i_gamma:0' % keras_bn_params.gidx
190 |             return tf.cast(hdf5_file[k][kg][:], dtype)
191 | 
192 |         def _mean_initializer(shape, dtype, partition_info=None):
193 |             keras_bn_params.midx += 1
194 |             k = 'batchnormalization_%i' % keras_bn_params.midx
195 |             km = 'batchnormalization_%i_running_mean:0' % keras_bn_params.midx
196 |             return tf.cast(hdf5_file[k][km][:], dtype)
197 | 
198 |         def _variance_initializer(shape, dtype, partition_info=None):
199 |             keras_bn_params.vidx += 1
200 |             k = 'batchnormalization_%i' % keras_bn_params.vidx
201 |             kv = 'batchnormalization_%i_running_std:0' % keras_bn_params.vidx
202 |             return tf.cast(hdf5_file[k][kv][:], dtype)
203 | 
204 |         # Batch normalisation initializers.
205 |         params = batch_norm_params.copy()
206 |         params['initializers'] = {
207 |             'beta': _beta_initializer,
208 |             'gamma': _gamma_initializer,
209 |             'moving_mean': _mean_initializer,
210 |             'moving_variance': _variance_initializer,
211 |         }
212 |         return params
213 |     keras_bn_params.bidx = 0
214 |     keras_bn_params.gidx = 0
215 |     keras_bn_params.midx = 0
216 |     keras_bn_params.vidx = 0
217 | 
218 |     def keras_conv2d_weights():
219 |         def _initializer(shape, dtype, partition_info=None):
220 |             keras_conv2d_weights.idx += 1
221 |             k = 'convolution2d_%i' % keras_conv2d_weights.idx
222 |             kw = 'convolution2d_%i_W:0' % keras_conv2d_weights.idx
223 |             return tf.cast(hdf5_file[k][kw][:], dtype)
224 |         return _initializer
225 |     keras_conv2d_weights.idx = 0
226 | 
227 |     def keras_sep_conv2d_weights():
228 |         def _initializer(shape, dtype, partition_info=None):
229 |             # Depthwise or Pointwise convolution?
230 |             if shape[0] > 1 or shape[1] > 1:
231 |                 keras_sep_conv2d_weights.didx += 1
232 |                 k = 'separableconvolution2d_%i' % keras_sep_conv2d_weights.didx
233 |                 kd = 'separableconvolution2d_%i_depthwise_kernel:0' % keras_sep_conv2d_weights.didx
234 |                 weights = hdf5_file[k][kd][:]
235 |             else:
236 |                 keras_sep_conv2d_weights.pidx += 1
237 |                 k = 'separableconvolution2d_%i' % keras_sep_conv2d_weights.pidx
238 |                 kp = 'separableconvolution2d_%i_pointwise_kernel:0' % keras_sep_conv2d_weights.pidx
239 |                 weights = hdf5_file[k][kp][:]
240 |             return tf.cast(weights, dtype)
241 |         return _initializer
242 |     keras_sep_conv2d_weights.didx = 0
243 |     keras_sep_conv2d_weights.pidx = 0
244 | 
245 |     def keras_dense_weights():
246 |         def _initializer(shape, dtype, partition_info=None):
247 |             keras_dense_weights.idx += 1
248 |             k = 'dense_%i' % keras_dense_weights.idx
249 |             kw = 'dense_%i_W:0' % keras_dense_weights.idx
250 |             return tf.cast(hdf5_file[k][kw][:], dtype)
251 |         return _initializer
252 |     keras_dense_weights.idx = 1
253 | 
254 |     def keras_dense_biases():
255 |         def _initializer(shape, dtype, partition_info=None):
256 |             keras_dense_biases.idx += 1
257 |             k = 'dense_%i' % keras_dense_biases.idx
258 |             kb = 'dense_%i_b:0' % keras_dense_biases.idx
259 |             return tf.cast(hdf5_file[k][kb][:], dtype)
260 |         return _initializer
261 |     keras_dense_biases.idx = 1
262 | 
263 |     # Default network arg scope.
264 |     with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.separable_convolution2d],
265 |                         weights_regularizer=slim.l2_regularizer(weight_decay)):
266 |         with slim.arg_scope(
267 |                 [slim.conv2d, slim.separable_convolution2d],
268 |                 padding='SAME',
269 |                 activation_fn=tf.nn.relu,
270 |                 normalizer_fn=slim.batch_norm,
271 |                 normalizer_params=keras_bn_params()):
272 |             with slim.arg_scope([slim.max_pool2d], padding='SAME'):
273 | 
274 |                 # Weights initializers from Keras weights.
275 |                 with slim.arg_scope([slim.conv2d],
276 |                                     weights_initializer=keras_conv2d_weights()):
277 |                     with slim.arg_scope([slim.separable_convolution2d],
278 |                                         weights_initializer=keras_sep_conv2d_weights()):
279 |                         with slim.arg_scope([slim.fully_connected],
280 |                                             weights_initializer=keras_dense_weights(),
281 |                                             biases_initializer=keras_dense_biases()) as sc:
282 |                             return sc
283 | 
284 | 


--------------------------------------------------------------------------------
/notebooks/visualization.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 Paul Balanca. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | import cv2
 16 | import random
 17 | 
 18 | import matplotlib.pyplot as plt
 19 | import matplotlib.image as mpimg
 20 | import matplotlib.cm as mpcm
 21 | 
 22 | 
 23 | # =========================================================================== #
 24 | # Some colormaps.
 25 | # =========================================================================== #
 26 | def colors_subselect(colors, num_classes=21):
 27 |     dt = len(colors) // num_classes
 28 |     sub_colors = []
 29 |     for i in range(num_classes):
 30 |         color = colors[i*dt]
 31 |         if isinstance(color[0], float):
 32 |             sub_colors.append([int(c * 255) for c in color])
 33 |         else:
 34 |             sub_colors.append([c for c in color])
 35 |     return sub_colors
 36 | 
 37 | colors_plasma = colors_subselect(mpcm.plasma.colors, num_classes=21)
 38 | colors_tableau = [(255, 255, 255), (31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),
 39 |                   (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),
 40 |                   (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),
 41 |                   (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),
 42 |                   (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]
 43 | 
 44 | 
 45 | # =========================================================================== #
 46 | # OpenCV drawing.
 47 | # =========================================================================== #
 48 | def draw_lines(img, lines, color=[255, 0, 0], thickness=2):
 49 |     """Draw a collection of lines on an image.
 50 |     """
 51 |     for line in lines:
 52 |         for x1, y1, x2, y2 in line:
 53 |             cv2.line(img, (x1, y1), (x2, y2), color, thickness)
 54 | 
 55 | 
 56 | def draw_rectangle(img, p1, p2, color=[255, 0, 0], thickness=2):
 57 |     cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)
 58 | 
 59 | 
 60 | def draw_bbox(img, bbox, shape, label, color=[255, 0, 0], thickness=2):
 61 |     p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1]))
 62 |     p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1]))
 63 |     cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)
 64 |     p1 = (p1[0]+15, p1[1])
 65 |     cv2.putText(img, str(label), p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.5, color, 1)
 66 | 
 67 | 
 68 | def bboxes_draw_on_img(img, classes, scores, bboxes, colors, thickness=2):
 69 |     shape = img.shape
 70 |     for i in range(bboxes.shape[0]):
 71 |         bbox = bboxes[i]
 72 |         color = colors[classes[i]]
 73 |         # Draw bounding box...
 74 |         p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1]))
 75 |         p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1]))
 76 |         cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)
 77 |         # Draw text...
 78 |         s = '%s/%.3f' % (classes[i], scores[i])
 79 |         p1 = (p1[0]-5, p1[1])
 80 |         cv2.putText(img, s, p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1)
 81 | 
 82 | 
 83 | # =========================================================================== #
 84 | # Matplotlib show...
 85 | # =========================================================================== #
 86 | def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5):
 87 |     """Visualize bounding boxes. Largely inspired by SSD-MXNET!
 88 |     """
 89 |     fig = plt.figure(figsize=figsize)
 90 |     plt.imshow(img)
 91 |     height = img.shape[0]
 92 |     width = img.shape[1]
 93 |     colors = dict()
 94 |     for i in range(classes.shape[0]):
 95 |         cls_id = int(classes[i])
 96 |         if cls_id >= 0:
 97 |             score = scores[i]
 98 |             if cls_id not in colors:
 99 |                 colors[cls_id] = (random.random(), random.random(), random.random())
100 |             ymin = int(bboxes[i, 0] * height)
101 |             xmin = int(bboxes[i, 1] * width)
102 |             ymax = int(bboxes[i, 2] * height)
103 |             xmax = int(bboxes[i, 3] * width)
104 |             rect = plt.Rectangle((xmin, ymin), xmax - xmin,
105 |                                  ymax - ymin, fill=False,
106 |                                  edgecolor=colors[cls_id],
107 |                                  linewidth=linewidth)
108 |             plt.gca().add_patch(rect)
109 |             class_name = str(cls_id)
110 |             plt.gca().text(xmin, ymin - 2,
111 |                            '{:s} | {:.3f}'.format(class_name, score),
112 |                            bbox=dict(facecolor=colors[cls_id], alpha=0.5),
113 |                            fontsize=12, color='white')
114 |     plt.show()
115 | 


--------------------------------------------------------------------------------
/pictures/ex1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/pictures/ex1.png


--------------------------------------------------------------------------------
/pictures/ex2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/pictures/ex2.png


--------------------------------------------------------------------------------
/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/preprocessing/inception_preprocessing.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Provides utilities to preprocess images for the Inception networks."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import tensorflow as tf
 22 | 
 23 | from tensorflow.python.ops import control_flow_ops
 24 | 
 25 | 
 26 | def apply_with_random_selector(x, func, num_cases):
 27 |     """Computes func(x, sel), with sel sampled from [0...num_cases-1].
 28 | 
 29 |     Args:
 30 |         x: input Tensor.
 31 |         func: Python function to apply.
 32 |         num_cases: Python int32, number of cases to sample sel from.
 33 | 
 34 |     Returns:
 35 |         The result of func(x, sel), where func receives the value of the
 36 |         selector as a python integer, but sel is sampled dynamically.
 37 |     """
 38 |     sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
 39 |     # Pass the real x only to one of the func calls.
 40 |     return control_flow_ops.merge([
 41 |             func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case)
 42 |             for case in range(num_cases)])[0]
 43 | 
 44 | 
 45 | def distort_color(image, color_ordering=0, fast_mode=True, scope=None):
 46 |     """Distort the color of a Tensor image.
 47 | 
 48 |     Each color distortion is non-commutative and thus ordering of the color ops
 49 |     matters. Ideally we would randomly permute the ordering of the color ops.
 50 |     Rather then adding that level of complication, we select a distinct ordering
 51 |     of color ops for each preprocessing thread.
 52 | 
 53 |     Args:
 54 |         image: 3-D Tensor containing single image in [0, 1].
 55 |         color_ordering: Python int, a type of distortion (valid values: 0-3).
 56 |         fast_mode: Avoids slower ops (random_hue and random_contrast)
 57 |         scope: Optional scope for name_scope.
 58 |     Returns:
 59 |         3-D Tensor color-distorted image on range [0, 1]
 60 |     Raises:
 61 |         ValueError: if color_ordering not in [0, 3]
 62 |     """
 63 |     with tf.name_scope(scope, 'distort_color', [image]):
 64 |         if fast_mode:
 65 |             if color_ordering == 0:
 66 |                 image = tf.image.random_brightness(image, max_delta=32. / 255.)
 67 |                 image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
 68 |             else:
 69 |                 image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
 70 |                 image = tf.image.random_brightness(image, max_delta=32. / 255.)
 71 |         else:
 72 |             if color_ordering == 0:
 73 |                 image = tf.image.random_brightness(image, max_delta=32. / 255.)
 74 |                 image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
 75 |                 image = tf.image.random_hue(image, max_delta=0.2)
 76 |                 image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
 77 |             elif color_ordering == 1:
 78 |                 image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
 79 |                 image = tf.image.random_brightness(image, max_delta=32. / 255.)
 80 |                 image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
 81 |                 image = tf.image.random_hue(image, max_delta=0.2)
 82 |             elif color_ordering == 2:
 83 |                 image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
 84 |                 image = tf.image.random_hue(image, max_delta=0.2)
 85 |                 image = tf.image.random_brightness(image, max_delta=32. / 255.)
 86 |                 image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
 87 |             elif color_ordering == 3:
 88 |                 image = tf.image.random_hue(image, max_delta=0.2)
 89 |                 image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
 90 |                 image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
 91 |                 image = tf.image.random_brightness(image, max_delta=32. / 255.)
 92 |             else:
 93 |                 raise ValueError('color_ordering must be in [0, 3]')
 94 | 
 95 |         # The random_* ops do not necessarily clamp.
 96 |         return tf.clip_by_value(image, 0.0, 1.0)
 97 | 
 98 | 
 99 | def distorted_bounding_box_crop(image,
100 |                                 bbox,
101 |                                 min_object_covered=0.1,
102 |                                 aspect_ratio_range=(0.75, 1.33),
103 |                                 area_range=(0.05, 1.0),
104 |                                 max_attempts=100,
105 |                                 scope=None):
106 |     """Generates cropped_image using a one of the bboxes randomly distorted.
107 | 
108 |     See `tf.image.sample_distorted_bounding_box` for more documentation.
109 | 
110 |     Args:
111 |         image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
112 |         bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
113 |             where each coordinate is [0, 1) and the coordinates are arranged
114 |             as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
115 |             image.
116 |         min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
117 |             area of the image must contain at least this fraction of any bounding box
118 |             supplied.
119 |         aspect_ratio_range: An optional list of `floats`. The cropped area of the
120 |             image must have an aspect ratio = width / height within this range.
121 |         area_range: An optional list of `floats`. The cropped area of the image
122 |             must contain a fraction of the supplied image within in this range.
123 |         max_attempts: An optional `int`. Number of attempts at generating a cropped
124 |             region of the image of the specified constraints. After `max_attempts`
125 |             failures, return the entire image.
126 |         scope: Optional scope for name_scope.
127 |     Returns:
128 |         A tuple, a 3-D Tensor cropped_image and the distorted bbox
129 |     """
130 |     with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]):
131 |         # Each bounding box has shape [1, num_boxes, box coords] and
132 |         # the coordinates are ordered [ymin, xmin, ymax, xmax].
133 | 
134 |         # A large fraction of image datasets contain a human-annotated bounding
135 |         # box delineating the region of the image containing the object of interest.
136 |         # We choose to create a new bounding box for the object which is a randomly
137 |         # distorted version of the human-annotated bounding box that obeys an
138 |         # allowed range of aspect ratios, sizes and overlap with the human-annotated
139 |         # bounding box. If no box is supplied, then we assume the bounding box is
140 |         # the entire image.
141 |         sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
142 |                 tf.shape(image),
143 |                 bounding_boxes=bbox,
144 |                 min_object_covered=min_object_covered,
145 |                 aspect_ratio_range=aspect_ratio_range,
146 |                 area_range=area_range,
147 |                 max_attempts=max_attempts,
148 |                 use_image_if_no_bounding_boxes=True)
149 |         bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
150 | 
151 |         # Crop the image to the specified bounding box.
152 |         cropped_image = tf.slice(image, bbox_begin, bbox_size)
153 |         return cropped_image, distort_bbox
154 | 
155 | 
156 | def preprocess_for_train(image, height, width, bbox,
157 |                          fast_mode=True, scope=None):
158 |     """Distort one image for training a network.
159 | 
160 |     Distorting images provides a useful technique for augmenting the data
161 |     set during training in order to make the network invariant to aspects
162 |     of the image that do not effect the label.
163 | 
164 |     Additionally it would create image_summaries to display the different
165 |     transformations applied to the image.
166 | 
167 |     Args:
168 |         image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
169 |             [0, 1], otherwise it would converted to tf.float32 assuming that the range
170 |             is [0, MAX], where MAX is largest positive representable number for
171 |             int(8/16/32) data type (see `tf.image.convert_image_dtype` for details).
172 |         height: integer
173 |         width: integer
174 |         bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
175 |             where each coordinate is [0, 1) and the coordinates are arranged
176 |             as [ymin, xmin, ymax, xmax].
177 |         fast_mode: Optional boolean, if True avoids slower transformations (i.e.
178 |             bi-cubic resizing, random_hue or random_contrast).
179 |         scope: Optional scope for name_scope.
180 |     Returns:
181 |         3-D float Tensor of distorted image used for training with range [-1, 1].
182 |     """
183 |     with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]):
184 |         if bbox is None:
185 |             bbox = tf.constant([0.0, 0.0, 1.0, 1.0],
186 |                                dtype=tf.float32,
187 |                                shape=[1, 1, 4])
188 |         if image.dtype != tf.float32:
189 |             image = tf.image.convert_image_dtype(image, dtype=tf.float32)
190 |         # Each bounding box has shape [1, num_boxes, box coords] and
191 |         # the coordinates are ordered [ymin, xmin, ymax, xmax].
192 |         image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
193 |                                                       bbox)
194 |         tf.image_summary('image_with_bounding_boxes', image_with_box)
195 | 
196 |         distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox)
197 |         # Restore the shape since the dynamic slice based upon the bbox_size loses
198 |         # the third dimension.
199 |         distorted_image.set_shape([None, None, 3])
200 |         image_with_distorted_box = tf.image.draw_bounding_boxes(
201 |                 tf.expand_dims(image, 0), distorted_bbox)
202 |         tf.image_summary('images_with_distorted_bounding_box',
203 |                          image_with_distorted_box)
204 | 
205 |         # This resizing operation may distort the images because the aspect
206 |         # ratio is not respected. We select a resize method in a round robin
207 |         # fashion based on the thread number.
208 |         # Note that ResizeMethod contains 4 enumerated resizing methods.
209 | 
210 |         # We select only 1 case for fast_mode bilinear.
211 |         num_resize_cases = 1 if fast_mode else 4
212 |         distorted_image = apply_with_random_selector(
213 |                 distorted_image,
214 |                 lambda x, method: tf.image.resize_images(x, [height, width], method),
215 |                 num_cases=num_resize_cases)
216 | 
217 |         tf.image_summary('cropped_resized_image',
218 |                          tf.expand_dims(distorted_image, 0))
219 | 
220 |         # Randomly flip the image horizontally.
221 |         distorted_image = tf.image.random_flip_left_right(distorted_image)
222 | 
223 |         # Randomly distort the colors. There are 4 ways to do it.
224 |         distorted_image = apply_with_random_selector(
225 |                 distorted_image,
226 |                 lambda x, ordering: distort_color(x, ordering, fast_mode),
227 |                 num_cases=4)
228 | 
229 |         tf.image_summary('final_distorted_image',
230 |                          tf.expand_dims(distorted_image, 0))
231 |         distorted_image = tf.sub(distorted_image, 0.5)
232 |         distorted_image = tf.mul(distorted_image, 2.0)
233 |         return distorted_image
234 | 
235 | 
236 | def preprocess_for_eval(image, height, width,
237 |                         central_fraction=0.875, scope=None):
238 |     """Prepare one image for evaluation.
239 | 
240 |     If height and width are specified it would output an image with that size by
241 |     applying resize_bilinear.
242 | 
243 |     If central_fraction is specified it would cropt the central fraction of the
244 |     input image.
245 | 
246 |     Args:
247 |         image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
248 |             [0, 1], otherwise it would converted to tf.float32 assuming that the range
249 |             is [0, MAX], where MAX is largest positive representable number for
250 |             int(8/16/32) data type (see `tf.image.convert_image_dtype` for details)
251 |         height: integer
252 |         width: integer
253 |         central_fraction: Optional Float, fraction of the image to crop.
254 |         scope: Optional scope for name_scope.
255 |     Returns:
256 |         3-D float Tensor of prepared image.
257 |     """
258 |     with tf.name_scope(scope, 'eval_image', [image, height, width]):
259 |         if image.dtype != tf.float32:
260 |             image = tf.image.convert_image_dtype(image, dtype=tf.float32)
261 |         # Crop the central region of the image with an area containing 87.5% of
262 |         # the original image.
263 |         if central_fraction:
264 |             image = tf.image.central_crop(image, central_fraction=central_fraction)
265 | 
266 |         if height and width:
267 |             # Resize the image to the specified height and width.
268 |             image = tf.expand_dims(image, 0)
269 |             image = tf.image.resize_bilinear(image, [height, width],
270 |                                              align_corners=False)
271 |             image = tf.squeeze(image, [0])
272 |         image = tf.sub(image, 0.5)
273 |         image = tf.mul(image, 2.0)
274 |         return image
275 | 
276 | 
277 | def preprocess_image(image, height, width,
278 |                      is_training=False, bbox=None, fast_mode=True):
279 |     """Pre-process one image for training or evaluation.
280 | 
281 |     Args:
282 |         image: 3-D Tensor [height, width, channels] with the image.
283 |         height: integer, image expected height.
284 |         width: integer, image expected width.
285 |         is_training: Boolean. If true it would transform an image for train,
286 |             otherwise it would transform it for evaluation.
287 |         bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
288 |             where each coordinate is [0, 1) and the coordinates are arranged as
289 |             [ymin, xmin, ymax, xmax].
290 |         fast_mode: Optional boolean, if True avoids slower transformations.
291 | 
292 |     Returns:
293 |         3-D float Tensor containing an appropriately scaled image
294 | 
295 |     Raises:
296 |         ValueError: if user does not provide bounding box
297 |     """
298 |     if is_training:
299 |         return preprocess_for_train(image, height, width, bbox, fast_mode)
300 |     else:
301 |         return preprocess_for_eval(image, height, width)
302 | 


--------------------------------------------------------------------------------
/preprocessing/preprocessing_factory.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains a factory for building various models."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import tensorflow as tf
22 | 
23 | # from preprocessing import cifarnet_preprocessing
24 | # from preprocessing import inception_preprocessing
25 | # from preprocessing import vgg_preprocessing
26 | 
27 | from preprocessing import ssd_vgg_preprocessing
28 | 
29 | slim = tf.contrib.slim
30 | 
31 | 
32 | def get_preprocessing(name, is_training=False):
33 |     """Returns preprocessing_fn(image, height, width, **kwargs).
34 | 
35 |     Args:
36 |       name: The name of the preprocessing function.
37 |       is_training: `True` if the model is being used for training.
38 | 
39 |     Returns:
40 |       preprocessing_fn: A function that preprocessing a single image (pre-batch).
41 |         It has the following signature:
42 |           image = preprocessing_fn(image, output_height, output_width, ...).
43 | 
44 |     Raises:
45 |       ValueError: If Preprocessing `name` is not recognized.
46 |     """
47 |     preprocessing_fn_map = {
48 |         'ssd_300_vgg': ssd_vgg_preprocessing,
49 |         'ssd_512_vgg': ssd_vgg_preprocessing,
50 |     }
51 | 
52 |     if name not in preprocessing_fn_map:
53 |         raise ValueError('Preprocessing name [%s] was not recognized' % name)
54 | 
55 |     def preprocessing_fn(image, labels, bboxes,
56 |                          out_shape, data_format='NHWC', **kwargs):
57 |         return preprocessing_fn_map[name].preprocess_image(
58 |             image, labels, bboxes, out_shape, data_format=data_format,
59 |             is_training=is_training, **kwargs)
60 |     return preprocessing_fn
61 | 


--------------------------------------------------------------------------------
/preprocessing/tf_image.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 The TensorFlow Authors and Paul Balanca. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Custom image operations.
 16 | Most of the following methods extend TensorFlow image library, and part of
 17 | the code is shameless copy-paste of the former!
 18 | """
 19 | import tensorflow as tf
 20 | 
 21 | from tensorflow.python.framework import constant_op
 22 | from tensorflow.python.framework import dtypes
 23 | from tensorflow.python.framework import ops
 24 | from tensorflow.python.framework import tensor_shape
 25 | from tensorflow.python.framework import tensor_util
 26 | from tensorflow.python.ops import array_ops
 27 | from tensorflow.python.ops import check_ops
 28 | from tensorflow.python.ops import clip_ops
 29 | from tensorflow.python.ops import control_flow_ops
 30 | from tensorflow.python.ops import gen_image_ops
 31 | from tensorflow.python.ops import gen_nn_ops
 32 | from tensorflow.python.ops import string_ops
 33 | from tensorflow.python.ops import math_ops
 34 | from tensorflow.python.ops import random_ops
 35 | from tensorflow.python.ops import variables
 36 | 
 37 | 
 38 | # =========================================================================== #
 39 | # Modification of TensorFlow image routines.
 40 | # =========================================================================== #
 41 | def _assert(cond, ex_type, msg):
 42 |     """A polymorphic assert, works with tensors and boolean expressions.
 43 |     If `cond` is not a tensor, behave like an ordinary assert statement, except
 44 |     that a empty list is returned. If `cond` is a tensor, return a list
 45 |     containing a single TensorFlow assert op.
 46 |     Args:
 47 |       cond: Something evaluates to a boolean value. May be a tensor.
 48 |       ex_type: The exception class to use.
 49 |       msg: The error message.
 50 |     Returns:
 51 |       A list, containing at most one assert op.
 52 |     """
 53 |     if _is_tensor(cond):
 54 |         return [control_flow_ops.Assert(cond, [msg])]
 55 |     else:
 56 |         if not cond:
 57 |             raise ex_type(msg)
 58 |         else:
 59 |             return []
 60 | 
 61 | 
 62 | def _is_tensor(x):
 63 |     """Returns `True` if `x` is a symbolic tensor-like object.
 64 |     Args:
 65 |       x: A python object to check.
 66 |     Returns:
 67 |       `True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`.
 68 |     """
 69 |     return isinstance(x, (ops.Tensor, variables.Variable))
 70 | 
 71 | 
 72 | def _ImageDimensions(image):
 73 |     """Returns the dimensions of an image tensor.
 74 |     Args:
 75 |       image: A 3-D Tensor of shape `[height, width, channels]`.
 76 |     Returns:
 77 |       A list of `[height, width, channels]` corresponding to the dimensions of the
 78 |         input image.  Dimensions that are statically known are python integers,
 79 |         otherwise they are integer scalar tensors.
 80 |     """
 81 |     if image.get_shape().is_fully_defined():
 82 |         return image.get_shape().as_list()
 83 |     else:
 84 |         static_shape = image.get_shape().with_rank(3).as_list()
 85 |         dynamic_shape = array_ops.unstack(array_ops.shape(image), 3)
 86 |         return [s if s is not None else d
 87 |                 for s, d in zip(static_shape, dynamic_shape)]
 88 | 
 89 | 
 90 | def _Check3DImage(image, require_static=True):
 91 |     """Assert that we are working with properly shaped image.
 92 |     Args:
 93 |       image: 3-D Tensor of shape [height, width, channels]
 94 |         require_static: If `True`, requires that all dimensions of `image` are
 95 |         known and non-zero.
 96 |     Raises:
 97 |       ValueError: if `image.shape` is not a 3-vector.
 98 |     Returns:
 99 |       An empty list, if `image` has fully defined dimensions. Otherwise, a list
100 |         containing an assert op is returned.
101 |     """
102 |     try:
103 |         image_shape = image.get_shape().with_rank(3)
104 |     except ValueError:
105 |         raise ValueError("'image' must be three-dimensional.")
106 |     if require_static and not image_shape.is_fully_defined():
107 |         raise ValueError("'image' must be fully defined.")
108 |     if any(x == 0 for x in image_shape):
109 |         raise ValueError("all dims of 'image.shape' must be > 0: %s" %
110 |                          image_shape)
111 |     if not image_shape.is_fully_defined():
112 |         return [check_ops.assert_positive(array_ops.shape(image),
113 |                                           ["all dims of 'image.shape' "
114 |                                            "must be > 0."])]
115 |     else:
116 |         return []
117 | 
118 | 
119 | def fix_image_flip_shape(image, result):
120 |     """Set the shape to 3 dimensional if we don't know anything else.
121 |     Args:
122 |       image: original image size
123 |       result: flipped or transformed image
124 |     Returns:
125 |       An image whose shape is at least None,None,None.
126 |     """
127 |     image_shape = image.get_shape()
128 |     if image_shape == tensor_shape.unknown_shape():
129 |         result.set_shape([None, None, None])
130 |     else:
131 |         result.set_shape(image_shape)
132 |     return result
133 | 
134 | 
135 | # =========================================================================== #
136 | # Image + BBoxes methods: cropping, resizing, flipping, ...
137 | # =========================================================================== #
138 | def bboxes_crop_or_pad(bboxes,
139 |                        height, width,
140 |                        offset_y, offset_x,
141 |                        target_height, target_width):
142 |     """Adapt bounding boxes to crop or pad operations.
143 |     Coordinates are always supposed to be relative to the image.
144 | 
145 |     Arguments:
146 |       bboxes: Tensor Nx4 with bboxes coordinates [y_min, x_min, y_max, x_max];
147 |       height, width: Original image dimension;
148 |       offset_y, offset_x: Offset to apply,
149 |         negative if cropping, positive if padding;
150 |       target_height, target_width: Target dimension after cropping / padding.
151 |     """
152 |     with tf.name_scope('bboxes_crop_or_pad'):
153 |         # Rescale bounding boxes in pixels.
154 |         scale = tf.cast(tf.stack([height, width, height, width]), bboxes.dtype)
155 |         bboxes = bboxes * scale
156 |         # Add offset.
157 |         offset = tf.cast(tf.stack([offset_y, offset_x, offset_y, offset_x]), bboxes.dtype)
158 |         bboxes = bboxes + offset
159 |         # Rescale to target dimension.
160 |         scale = tf.cast(tf.stack([target_height, target_width,
161 |                                   target_height, target_width]), bboxes.dtype)
162 |         bboxes = bboxes / scale
163 |         return bboxes
164 | 
165 | 
166 | def resize_image_bboxes_with_crop_or_pad(image, bboxes,
167 |                                          target_height, target_width):
168 |     """Crops and/or pads an image to a target width and height.
169 |     Resizes an image to a target width and height by either centrally
170 |     cropping the image or padding it evenly with zeros.
171 | 
172 |     If `width` or `height` is greater than the specified `target_width` or
173 |     `target_height` respectively, this op centrally crops along that dimension.
174 |     If `width` or `height` is smaller than the specified `target_width` or
175 |     `target_height` respectively, this op centrally pads with 0 along that
176 |     dimension.
177 |     Args:
178 |       image: 3-D tensor of shape `[height, width, channels]`
179 |       target_height: Target height.
180 |       target_width: Target width.
181 |     Raises:
182 |       ValueError: if `target_height` or `target_width` are zero or negative.
183 |     Returns:
184 |       Cropped and/or padded image of shape
185 |         `[target_height, target_width, channels]`
186 |     """
187 |     with tf.name_scope('resize_with_crop_or_pad'):
188 |         image = ops.convert_to_tensor(image, name='image')
189 | 
190 |         assert_ops = []
191 |         assert_ops += _Check3DImage(image, require_static=False)
192 |         assert_ops += _assert(target_width > 0, ValueError,
193 |                               'target_width must be > 0.')
194 |         assert_ops += _assert(target_height > 0, ValueError,
195 |                               'target_height must be > 0.')
196 | 
197 |         image = control_flow_ops.with_dependencies(assert_ops, image)
198 |         # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks.
199 |         # Make sure our checks come first, so that error messages are clearer.
200 |         if _is_tensor(target_height):
201 |             target_height = control_flow_ops.with_dependencies(
202 |                 assert_ops, target_height)
203 |         if _is_tensor(target_width):
204 |             target_width = control_flow_ops.with_dependencies(assert_ops, target_width)
205 | 
206 |         def max_(x, y):
207 |             if _is_tensor(x) or _is_tensor(y):
208 |                 return math_ops.maximum(x, y)
209 |             else:
210 |                 return max(x, y)
211 | 
212 |         def min_(x, y):
213 |             if _is_tensor(x) or _is_tensor(y):
214 |                 return math_ops.minimum(x, y)
215 |             else:
216 |                 return min(x, y)
217 | 
218 |         def equal_(x, y):
219 |             if _is_tensor(x) or _is_tensor(y):
220 |                 return math_ops.equal(x, y)
221 |             else:
222 |                 return x == y
223 | 
224 |         height, width, _ = _ImageDimensions(image)
225 |         width_diff = target_width - width
226 |         offset_crop_width = max_(-width_diff // 2, 0)
227 |         offset_pad_width = max_(width_diff // 2, 0)
228 | 
229 |         height_diff = target_height - height
230 |         offset_crop_height = max_(-height_diff // 2, 0)
231 |         offset_pad_height = max_(height_diff // 2, 0)
232 | 
233 |         # Maybe crop if needed.
234 |         height_crop = min_(target_height, height)
235 |         width_crop = min_(target_width, width)
236 |         cropped = tf.image.crop_to_bounding_box(image, offset_crop_height, offset_crop_width,
237 |                                                 height_crop, width_crop)
238 |         bboxes = bboxes_crop_or_pad(bboxes,
239 |                                     height, width,
240 |                                     -offset_crop_height, -offset_crop_width,
241 |                                     height_crop, width_crop)
242 |         # Maybe pad if needed.
243 |         resized = tf.image.pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width,
244 |                                                target_height, target_width)
245 |         bboxes = bboxes_crop_or_pad(bboxes,
246 |                                     height_crop, width_crop,
247 |                                     offset_pad_height, offset_pad_width,
248 |                                     target_height, target_width)
249 | 
250 |         # In theory all the checks below are redundant.
251 |         if resized.get_shape().ndims is None:
252 |             raise ValueError('resized contains no shape.')
253 | 
254 |         resized_height, resized_width, _ = _ImageDimensions(resized)
255 | 
256 |         assert_ops = []
257 |         assert_ops += _assert(equal_(resized_height, target_height), ValueError,
258 |                               'resized height is not correct.')
259 |         assert_ops += _assert(equal_(resized_width, target_width), ValueError,
260 |                               'resized width is not correct.')
261 | 
262 |         resized = control_flow_ops.with_dependencies(assert_ops, resized)
263 |         return resized, bboxes
264 | 
265 | 
266 | def resize_image(image, size,
267 |                  method=tf.image.ResizeMethod.BILINEAR,
268 |                  align_corners=False):
269 |     """Resize an image and bounding boxes.
270 |     """
271 |     # Resize image.
272 |     with tf.name_scope('resize_image'):
273 |         height, width, channels = _ImageDimensions(image)
274 |         image = tf.expand_dims(image, 0)
275 |         image = tf.image.resize_images(image, size,
276 |                                        method, align_corners)
277 |         image = tf.reshape(image, tf.stack([size[0], size[1], channels]))
278 |         return image
279 | 
280 | 
281 | def random_flip_left_right(image, bboxes, seed=None):
282 |     """Random flip left-right of an image and its bounding boxes.
283 |     """
284 |     def flip_bboxes(bboxes):
285 |         """Flip bounding boxes coordinates.
286 |         """
287 |         bboxes = tf.stack([bboxes[:, 0], 1 - bboxes[:, 3],
288 |                            bboxes[:, 2], 1 - bboxes[:, 1]], axis=-1)
289 |         return bboxes
290 | 
291 |     # Random flip. Tensorflow implementation.
292 |     with tf.name_scope('random_flip_left_right'):
293 |         image = ops.convert_to_tensor(image, name='image')
294 |         _Check3DImage(image, require_static=False)
295 |         uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
296 |         mirror_cond = math_ops.less(uniform_random, .5)
297 |         # Flip image.
298 |         result = control_flow_ops.cond(mirror_cond,
299 |                                        lambda: array_ops.reverse_v2(image, [1]),
300 |                                        lambda: image)
301 |         # Flip bboxes.
302 |         bboxes = control_flow_ops.cond(mirror_cond,
303 |                                        lambda: flip_bboxes(bboxes),
304 |                                        lambda: bboxes)
305 |         return fix_image_flip_shape(image, result), bboxes
306 | 
307 | 


--------------------------------------------------------------------------------
/preprocessing/vgg_preprocessing.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Provides utilities to preprocess images.
 16 | 
 17 | The preprocessing steps for VGG were introduced in the following technical
 18 | report:
 19 | 
 20 |     Very Deep Convolutional Networks For Large-Scale Image Recognition
 21 |     Karen Simonyan and Andrew Zisserman
 22 |     arXiv technical report, 2015
 23 |     PDF: http://arxiv.org/pdf/1409.1556.pdf
 24 |     ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf
 25 |     CC-BY-4.0
 26 | 
 27 | More information can be obtained from the VGG website:
 28 | www.robots.ox.ac.uk/~vgg/research/very_deep/
 29 | """
 30 | 
 31 | from __future__ import absolute_import
 32 | from __future__ import division
 33 | from __future__ import print_function
 34 | 
 35 | import tensorflow as tf
 36 | 
 37 | from tensorflow.python.ops import control_flow_ops
 38 | 
 39 | slim = tf.contrib.slim
 40 | 
 41 | _R_MEAN = 123.68
 42 | _G_MEAN = 116.78
 43 | _B_MEAN = 103.94
 44 | 
 45 | _RESIZE_SIDE_MIN = 256
 46 | _RESIZE_SIDE_MAX = 512
 47 | 
 48 | 
 49 | def _crop(image, offset_height, offset_width, crop_height, crop_width):
 50 |     """Crops the given image using the provided offsets and sizes.
 51 | 
 52 |     Note that the method doesn't assume we know the input image size but it does
 53 |     assume we know the input image rank.
 54 | 
 55 |     Args:
 56 |         image: an image of shape [height, width, channels].
 57 |         offset_height: a scalar tensor indicating the height offset.
 58 |         offset_width: a scalar tensor indicating the width offset.
 59 |         crop_height: the height of the cropped image.
 60 |         crop_width: the width of the cropped image.
 61 | 
 62 |     Returns:
 63 |         the cropped (and resized) image.
 64 | 
 65 |     Raises:
 66 |         InvalidArgumentError: if the rank is not 3 or if the image dimensions are
 67 |             less than the crop size.
 68 |     """
 69 |     original_shape = tf.shape(image)
 70 | 
 71 |     rank_assertion = tf.Assert(
 72 |             tf.equal(tf.rank(image), 3),
 73 |             ['Rank of image must be equal to 3.'])
 74 |     cropped_shape = control_flow_ops.with_dependencies(
 75 |             [rank_assertion],
 76 |             tf.pack([crop_height, crop_width, original_shape[2]]))
 77 | 
 78 |     size_assertion = tf.Assert(
 79 |             tf.logical_and(
 80 |                     tf.greater_equal(original_shape[0], crop_height),
 81 |                     tf.greater_equal(original_shape[1], crop_width)),
 82 |             ['Crop size greater than the image size.'])
 83 | 
 84 |     offsets = tf.to_int32(tf.pack([offset_height, offset_width, 0]))
 85 | 
 86 |     # Use tf.slice instead of crop_to_bounding box as it accepts tensors to
 87 |     # define the crop size.
 88 |     image = control_flow_ops.with_dependencies(
 89 |             [size_assertion],
 90 |             tf.slice(image, offsets, cropped_shape))
 91 |     return tf.reshape(image, cropped_shape)
 92 | 
 93 | 
 94 | def _random_crop(image_list, crop_height, crop_width):
 95 |     """Crops the given list of images.
 96 | 
 97 |     The function applies the same crop to each image in the list. This can be
 98 |     effectively applied when there are multiple image inputs of the same
 99 |     dimension such as:
100 | 
101 |         image, depths, normals = _random_crop([image, depths, normals], 120, 150)
102 | 
103 |     Args:
104 |         image_list: a list of image tensors of the same dimension but possibly
105 |             varying channel.
106 |         crop_height: the new height.
107 |         crop_width: the new width.
108 | 
109 |     Returns:
110 |         the image_list with cropped images.
111 | 
112 |     Raises:
113 |         ValueError: if there are multiple image inputs provided with different size
114 |             or the images are smaller than the crop dimensions.
115 |     """
116 |     if not image_list:
117 |         raise ValueError('Empty image_list.')
118 | 
119 |     # Compute the rank assertions.
120 |     rank_assertions = []
121 |     for i in range(len(image_list)):
122 |         image_rank = tf.rank(image_list[i])
123 |         rank_assert = tf.Assert(
124 |                 tf.equal(image_rank, 3),
125 |                 ['Wrong rank for tensor  %s [expected] [actual]',
126 |                  image_list[i].name, 3, image_rank])
127 |         rank_assertions.append(rank_assert)
128 | 
129 |     image_shape = control_flow_ops.with_dependencies(
130 |             [rank_assertions[0]],
131 |             tf.shape(image_list[0]))
132 |     image_height = image_shape[0]
133 |     image_width = image_shape[1]
134 |     crop_size_assert = tf.Assert(
135 |             tf.logical_and(
136 |                     tf.greater_equal(image_height, crop_height),
137 |                     tf.greater_equal(image_width, crop_width)),
138 |             ['Crop size greater than the image size.'])
139 | 
140 |     asserts = [rank_assertions[0], crop_size_assert]
141 | 
142 |     for i in range(1, len(image_list)):
143 |         image = image_list[i]
144 |         asserts.append(rank_assertions[i])
145 |         shape = control_flow_ops.with_dependencies([rank_assertions[i]],
146 |                                                    tf.shape(image))
147 |         height = shape[0]
148 |         width = shape[1]
149 | 
150 |         height_assert = tf.Assert(
151 |                 tf.equal(height, image_height),
152 |                 ['Wrong height for tensor %s [expected][actual]',
153 |                  image.name, height, image_height])
154 |         width_assert = tf.Assert(
155 |                 tf.equal(width, image_width),
156 |                 ['Wrong width for tensor %s [expected][actual]',
157 |                  image.name, width, image_width])
158 |         asserts.extend([height_assert, width_assert])
159 | 
160 |     # Create a random bounding box.
161 |     #
162 |     # Use tf.random_uniform and not numpy.random.rand as doing the former would
163 |     # generate random numbers at graph eval time, unlike the latter which
164 |     # generates random numbers at graph definition time.
165 |     max_offset_height = control_flow_ops.with_dependencies(
166 |             asserts, tf.reshape(image_height - crop_height + 1, []))
167 |     max_offset_width = control_flow_ops.with_dependencies(
168 |             asserts, tf.reshape(image_width - crop_width + 1, []))
169 |     offset_height = tf.random_uniform(
170 |             [], maxval=max_offset_height, dtype=tf.int32)
171 |     offset_width = tf.random_uniform(
172 |             [], maxval=max_offset_width, dtype=tf.int32)
173 | 
174 |     return [_crop(image, offset_height, offset_width,
175 |                   crop_height, crop_width) for image in image_list]
176 | 
177 | 
178 | def _central_crop(image_list, crop_height, crop_width):
179 |     """Performs central crops of the given image list.
180 | 
181 |     Args:
182 |         image_list: a list of image tensors of the same dimension but possibly
183 |             varying channel.
184 |         crop_height: the height of the image following the crop.
185 |         crop_width: the width of the image following the crop.
186 | 
187 |     Returns:
188 |         the list of cropped images.
189 |     """
190 |     outputs = []
191 |     for image in image_list:
192 |         image_height = tf.shape(image)[0]
193 |         image_width = tf.shape(image)[1]
194 | 
195 |         offset_height = (image_height - crop_height) / 2
196 |         offset_width = (image_width - crop_width) / 2
197 | 
198 |         outputs.append(_crop(image, offset_height, offset_width,
199 |                              crop_height, crop_width))
200 |     return outputs
201 | 
202 | 
203 | def _mean_image_subtraction(image, means):
204 |     """Subtracts the given means from each image channel.
205 | 
206 |     For example:
207 |         means = [123.68, 116.779, 103.939]
208 |         image = _mean_image_subtraction(image, means)
209 | 
210 |     Note that the rank of `image` must be known.
211 | 
212 |     Args:
213 |         image: a tensor of size [height, width, C].
214 |         means: a C-vector of values to subtract from each channel.
215 | 
216 |     Returns:
217 |         the centered image.
218 | 
219 |     Raises:
220 |         ValueError: If the rank of `image` is unknown, if `image` has a rank other
221 |             than three or if the number of channels in `image` doesn't match the
222 |             number of values in `means`.
223 |     """
224 |     if image.get_shape().ndims != 3:
225 |         raise ValueError('Input must be of size [height, width, C>0]')
226 |     num_channels = image.get_shape().as_list()[-1]
227 |     if len(means) != num_channels:
228 |         raise ValueError('len(means) must match the number of channels')
229 | 
230 |     channels = tf.split(2, num_channels, image)
231 |     for i in range(num_channels):
232 |         channels[i] -= means[i]
233 |     return tf.concat(channels, axis=2)
234 | 
235 | 
236 | def _smallest_size_at_least(height, width, smallest_side):
237 |     """Computes new shape with the smallest side equal to `smallest_side`.
238 | 
239 |     Computes new shape with the smallest side equal to `smallest_side` while
240 |     preserving the original aspect ratio.
241 | 
242 |     Args:
243 |         height: an int32 scalar tensor indicating the current height.
244 |         width: an int32 scalar tensor indicating the current width.
245 |         smallest_side: A python integer or scalar `Tensor` indicating the size of
246 |             the smallest side after resize.
247 | 
248 |     Returns:
249 |         new_height: an int32 scalar tensor indicating the new height.
250 |         new_width: and int32 scalar tensor indicating the new width.
251 |     """
252 |     smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)
253 | 
254 |     height = tf.to_float(height)
255 |     width = tf.to_float(width)
256 |     smallest_side = tf.to_float(smallest_side)
257 | 
258 |     scale = tf.cond(tf.greater(height, width),
259 |                     lambda: smallest_side / width,
260 |                     lambda: smallest_side / height)
261 |     new_height = tf.to_int32(height * scale)
262 |     new_width = tf.to_int32(width * scale)
263 |     return new_height, new_width
264 | 
265 | 
266 | def _aspect_preserving_resize(image, smallest_side):
267 |     """Resize images preserving the original aspect ratio.
268 | 
269 |     Args:
270 |         image: A 3-D image `Tensor`.
271 |         smallest_side: A python integer or scalar `Tensor` indicating the size of
272 |             the smallest side after resize.
273 | 
274 |     Returns:
275 |         resized_image: A 3-D tensor containing the resized image.
276 |     """
277 |     smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)
278 | 
279 |     shape = tf.shape(image)
280 |     height = shape[0]
281 |     width = shape[1]
282 |     new_height, new_width = _smallest_size_at_least(height, width, smallest_side)
283 |     image = tf.expand_dims(image, 0)
284 |     resized_image = tf.image.resize_bilinear(image, [new_height, new_width],
285 |                                              align_corners=False)
286 |     resized_image = tf.squeeze(resized_image)
287 |     resized_image.set_shape([None, None, 3])
288 |     return resized_image
289 | 
290 | 
291 | def preprocess_for_train(image,
292 |                          output_height,
293 |                          output_width,
294 |                          resize_side_min=_RESIZE_SIDE_MIN,
295 |                          resize_side_max=_RESIZE_SIDE_MAX):
296 |     """Preprocesses the given image for training.
297 | 
298 |     Note that the actual resizing scale is sampled from
299 |         [`resize_size_min`, `resize_size_max`].
300 | 
301 |     Args:
302 |         image: A `Tensor` representing an image of arbitrary size.
303 |         output_height: The height of the image after preprocessing.
304 |         output_width: The width of the image after preprocessing.
305 |         resize_side_min: The lower bound for the smallest side of the image for
306 |             aspect-preserving resizing.
307 |         resize_side_max: The upper bound for the smallest side of the image for
308 |             aspect-preserving resizing.
309 | 
310 |     Returns:
311 |         A preprocessed image.
312 |     """
313 |     resize_side = tf.random_uniform(
314 |             [], minval=resize_side_min, maxval=resize_side_max+1, dtype=tf.int32)
315 | 
316 |     image = _aspect_preserving_resize(image, resize_side)
317 |     image = _random_crop([image], output_height, output_width)[0]
318 |     image.set_shape([output_height, output_width, 3])
319 |     image = tf.to_float(image)
320 |     image = tf.image.random_flip_left_right(image)
321 |     return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
322 | 
323 | 
324 | def preprocess_for_eval(image, output_height, output_width, resize_side):
325 |     """Preprocesses the given image for evaluation.
326 | 
327 |     Args:
328 |         image: A `Tensor` representing an image of arbitrary size.
329 |         output_height: The height of the image after preprocessing.
330 |         output_width: The width of the image after preprocessing.
331 |         resize_side: The smallest side of the image for aspect-preserving resizing.
332 | 
333 |     Returns:
334 |         A preprocessed image.
335 |     """
336 |     image = _aspect_preserving_resize(image, resize_side)
337 |     image = _central_crop([image], output_height, output_width)[0]
338 |     image.set_shape([output_height, output_width, 3])
339 |     image = tf.to_float(image)
340 |     return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
341 | 
342 | 
343 | def preprocess_image(image, output_height, output_width, is_training=False,
344 |                      resize_side_min=_RESIZE_SIDE_MIN,
345 |                      resize_side_max=_RESIZE_SIDE_MAX):
346 |     """Preprocesses the given image.
347 | 
348 |     Args:
349 |       image: A `Tensor` representing an image of arbitrary size.
350 |       output_height: The height of the image after preprocessing.
351 |       output_width: The width of the image after preprocessing.
352 |       is_training: `True` if we're preprocessing the image for training and
353 |         `False` otherwise.
354 |       resize_side_min: The lower bound for the smallest side of the image for
355 |         aspect-preserving resizing. If `is_training` is `False`, then this value
356 |         is used for rescaling.
357 |       resize_side_max: The upper bound for the smallest side of the image for
358 |         aspect-preserving resizing. If `is_training` is `False`, this value is
359 |          ignored. Otherwise, the resize side is sampled from
360 |          [resize_size_min, resize_size_max].
361 | 
362 |     Returns:
363 |         A preprocessed image.
364 |     """
365 |     if is_training:
366 |         return preprocess_for_train(image, output_height, output_width,
367 |                                     resize_side_min, resize_side_max)
368 |     else:
369 |         return preprocess_for_eval(image, output_height, output_width,
370 |                                    resize_side_min)
371 | 


--------------------------------------------------------------------------------
/tf_convert_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Convert a dataset to TFRecords format, which can be easily integrated into
16 | a TensorFlow pipeline.
17 | 
18 | Usage:
19 | ```shell
20 | python tf_convert_data.py \
21 |     --dataset_name=pascalvoc \
22 |     --dataset_dir=/tmp/pascalvoc \
23 |     --output_name=pascalvoc \
24 |     --output_dir=/tmp/
25 | ```
26 | """
27 | import tensorflow as tf
28 | 
29 | from datasets import pascalvoc_to_tfrecords
30 | 
31 | FLAGS = tf.app.flags.FLAGS
32 | 
33 | tf.app.flags.DEFINE_string(
34 |     'dataset_name', 'pascalvoc',
35 |     'The name of the dataset to convert.')
36 | tf.app.flags.DEFINE_string(
37 |     'dataset_dir', None,
38 |     'Directory where the original dataset is stored.')
39 | tf.app.flags.DEFINE_string(
40 |     'output_name', 'pascalvoc',
41 |     'Basename used for TFRecords output files.')
42 | tf.app.flags.DEFINE_string(
43 |     'output_dir', './',
44 |     'Output directory where to store TFRecords files.')
45 | 
46 | 
47 | def main(_):
48 |     if not FLAGS.dataset_dir:
49 |         raise ValueError('You must supply the dataset directory with --dataset_dir')
50 |     print('Dataset directory:', FLAGS.dataset_dir)
51 |     print('Output directory:', FLAGS.output_dir)
52 | 
53 |     if FLAGS.dataset_name == 'pascalvoc':
54 |         pascalvoc_to_tfrecords.run(FLAGS.dataset_dir, FLAGS.output_dir, FLAGS.output_name)
55 |     else:
56 |         raise ValueError('Dataset [%s] was not recognized.' % FLAGS.dataset_name)
57 | 
58 | if __name__ == '__main__':
59 |     tf.app.run()
60 | 
61 | 


--------------------------------------------------------------------------------
/tf_extended/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Paul Balanca. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """TF Extended: additional metrics.
16 | """
17 | 
18 | # pylint: disable=unused-import,line-too-long,g-importing-member,wildcard-import
19 | from tf_extended.metrics import *
20 | from tf_extended.tensors import *
21 | from tf_extended.bboxes import *
22 | from tf_extended.image import *
23 | from tf_extended.math import *
24 | 
25 | 


--------------------------------------------------------------------------------
/tf_extended/image.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balancap/SSD-Tensorflow/e0e3104d3a2cc5d830fad041d4e56ebcf84caac3/tf_extended/image.py


--------------------------------------------------------------------------------
/tf_extended/math.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Paul Balanca. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """TF Extended: additional math functions.
16 | """
17 | import tensorflow as tf
18 | 
19 | from tensorflow.python.ops import array_ops
20 | from tensorflow.python.ops import math_ops
21 | from tensorflow.python.framework import dtypes
22 | from tensorflow.python.framework import ops
23 | 
24 | 
25 | def safe_divide(numerator, denominator, name):
26 |     """Divides two values, returning 0 if the denominator is <= 0.
27 |     Args:
28 |       numerator: A real `Tensor`.
29 |       denominator: A real `Tensor`, with dtype matching `numerator`.
30 |       name: Name for the returned op.
31 |     Returns:
32 |       0 if `denominator` <= 0, else `numerator` / `denominator`
33 |     """
34 |     return tf.where(
35 |         math_ops.greater(denominator, 0),
36 |         math_ops.divide(numerator, denominator),
37 |         tf.zeros_like(numerator),
38 |         name=name)
39 | 
40 | 
41 | def cummax(x, reverse=False, name=None):
42 |     """Compute the cumulative maximum of the tensor `x` along `axis`. This
43 |     operation is similar to the more classic `cumsum`. Only support 1D Tensor
44 |     for now.
45 | 
46 |     Args:
47 |     x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
48 |        `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
49 |        `complex128`, `qint8`, `quint8`, `qint32`, `half`.
50 |        axis: A `Tensor` of type `int32` (default: 0).
51 |        reverse: A `bool` (default: False).
52 |        name: A name for the operation (optional).
53 |     Returns:
54 |     A `Tensor`. Has the same type as `x`.
55 |     """
56 |     with ops.name_scope(name, "Cummax", [x]) as name:
57 |         x = ops.convert_to_tensor(x, name="x")
58 |         # Not very optimal: should directly integrate reverse into tf.scan.
59 |         if reverse:
60 |             x = tf.reverse(x, axis=[0])
61 |         # 'Accumlating' maximum: ensure it is always increasing.
62 |         cmax = tf.scan(lambda a, y: tf.maximum(a, y), x,
63 |                        initializer=None, parallel_iterations=1,
64 |                        back_prop=False, swap_memory=False)
65 |         if reverse:
66 |             cmax = tf.reverse(cmax, axis=[0])
67 |         return cmax
68 | 


--------------------------------------------------------------------------------
/tf_extended/tensors.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Paul Balanca. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """TF Extended: additional tensors operations.
16 | """
17 | import tensorflow as tf
18 | 
19 | from tensorflow.contrib.framework.python.ops import variables as contrib_variables
20 | from tensorflow.contrib.metrics.python.ops import set_ops
21 | from tensorflow.python.framework import dtypes
22 | from tensorflow.python.framework import ops
23 | from tensorflow.python.framework import sparse_tensor
24 | from tensorflow.python.ops import array_ops
25 | from tensorflow.python.ops import check_ops
26 | from tensorflow.python.ops import control_flow_ops
27 | from tensorflow.python.ops import math_ops
28 | from tensorflow.python.ops import nn
29 | from tensorflow.python.ops import state_ops
30 | from tensorflow.python.ops import variable_scope
31 | from tensorflow.python.ops import variables
32 | 
33 | 
34 | def get_shape(x, rank=None):
35 |     """Returns the dimensions of a Tensor as list of integers or scale tensors.
36 | 
37 |     Args:
38 |       x: N-d Tensor;
39 |       rank: Rank of the Tensor. If None, will try to guess it.
40 |     Returns:
41 |       A list of `[d1, d2, ..., dN]` corresponding to the dimensions of the
42 |         input tensor.  Dimensions that are statically known are python integers,
43 |         otherwise they are integer scalar tensors.
44 |     """
45 |     if x.get_shape().is_fully_defined():
46 |         return x.get_shape().as_list()
47 |     else:
48 |         static_shape = x.get_shape()
49 |         if rank is None:
50 |             static_shape = static_shape.as_list()
51 |             rank = len(static_shape)
52 |         else:
53 |             static_shape = x.get_shape().with_rank(rank).as_list()
54 |         dynamic_shape = tf.unstack(tf.shape(x), rank)
55 |         return [s if s is not None else d
56 |                 for s, d in zip(static_shape, dynamic_shape)]
57 | 
58 | 
59 | def pad_axis(x, offset, size, axis=0, name=None):
60 |     """Pad a tensor on an axis, with a given offset and output size.
61 |     The tensor is padded with zero (i.e. CONSTANT mode). Note that the if the
62 |     `size` is smaller than existing size + `offset`, the output tensor
63 |     was the latter dimension.
64 | 
65 |     Args:
66 |       x: Tensor to pad;
67 |       offset: Offset to add on the dimension chosen;
68 |       size: Final size of the dimension.
69 |     Return:
70 |       Padded tensor whose dimension on `axis` is `size`, or greater if
71 |       the input vector was larger.
72 |     """
73 |     with tf.name_scope(name, 'pad_axis'):
74 |         shape = get_shape(x)
75 |         rank = len(shape)
76 |         # Padding description.
77 |         new_size = tf.maximum(size-offset-shape[axis], 0)
78 |         pad1 = tf.stack([0]*axis + [offset] + [0]*(rank-axis-1))
79 |         pad2 = tf.stack([0]*axis + [new_size] + [0]*(rank-axis-1))
80 |         paddings = tf.stack([pad1, pad2], axis=1)
81 |         x = tf.pad(x, paddings, mode='CONSTANT')
82 |         # Reshape, to get fully defined shape if possible.
83 |         # TODO: fix with tf.slice
84 |         shape[axis] = size
85 |         x = tf.reshape(x, tf.stack(shape))
86 |         return x
87 | 
88 | 
89 | # def select_at_index(idx, val, t):
90 | #     """Return a tensor.
91 | #     """
92 | #     idx = tf.expand_dims(tf.expand_dims(idx, 0), 0)
93 | #     val = tf.expand_dims(val, 0)
94 | #     t = t + tf.scatter_nd(idx, val, tf.shape(t))
95 | #     return t
96 | 


--------------------------------------------------------------------------------
/tf_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Paul Balanca. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Diverse TensorFlow utils, for training, evaluation and so on!
 16 | """
 17 | import os
 18 | from pprint import pprint
 19 | 
 20 | import tensorflow as tf
 21 | from tensorflow.contrib.slim.python.slim.data import parallel_reader
 22 | 
 23 | slim = tf.contrib.slim
 24 | 
 25 | 
 26 | # =========================================================================== #
 27 | # General tools.
 28 | # =========================================================================== #
 29 | def reshape_list(l, shape=None):
 30 |     """Reshape list of (list): 1D to 2D or the other way around.
 31 | 
 32 |     Args:
 33 |       l: List or List of list.
 34 |       shape: 1D or 2D shape.
 35 |     Return
 36 |       Reshaped list.
 37 |     """
 38 |     r = []
 39 |     if shape is None:
 40 |         # Flatten everything.
 41 |         for a in l:
 42 |             if isinstance(a, (list, tuple)):
 43 |                 r = r + list(a)
 44 |             else:
 45 |                 r.append(a)
 46 |     else:
 47 |         # Reshape to list of list.
 48 |         i = 0
 49 |         for s in shape:
 50 |             if s == 1:
 51 |                 r.append(l[i])
 52 |             else:
 53 |                 r.append(l[i:i+s])
 54 |             i += s
 55 |     return r
 56 | 
 57 | 
 58 | # =========================================================================== #
 59 | # Training utils.
 60 | # =========================================================================== #
 61 | def print_configuration(flags, ssd_params, data_sources, save_dir=None):
 62 |     """Print the training configuration.
 63 |     """
 64 |     def print_config(stream=None):
 65 |         print('\n# =========================================================================== #', file=stream)
 66 |         print('# Training | Evaluation flags:', file=stream)
 67 |         print('# =========================================================================== #', file=stream)
 68 |         pprint(flags, stream=stream)
 69 | 
 70 |         print('\n# =========================================================================== #', file=stream)
 71 |         print('# SSD net parameters:', file=stream)
 72 |         print('# =========================================================================== #', file=stream)
 73 |         pprint(dict(ssd_params._asdict()), stream=stream)
 74 | 
 75 |         print('\n# =========================================================================== #', file=stream)
 76 |         print('# Training | Evaluation dataset files:', file=stream)
 77 |         print('# =========================================================================== #', file=stream)
 78 |         data_files = parallel_reader.get_data_files(data_sources)
 79 |         pprint(sorted(data_files), stream=stream)
 80 |         print('', file=stream)
 81 | 
 82 |     print_config(None)
 83 |     # Save to a text file as well.
 84 |     if save_dir is not None:
 85 |         if not os.path.exists(save_dir):
 86 |             os.makedirs(save_dir)
 87 |         path = os.path.join(save_dir, 'training_config.txt')
 88 |         with open(path, "w") as out:
 89 |             print_config(out)
 90 | 
 91 | 
 92 | def configure_learning_rate(flags, num_samples_per_epoch, global_step):
 93 |     """Configures the learning rate.
 94 | 
 95 |     Args:
 96 |       num_samples_per_epoch: The number of samples in each epoch of training.
 97 |       global_step: The global_step tensor.
 98 |     Returns:
 99 |       A `Tensor` representing the learning rate.
100 |     """
101 |     decay_steps = int(num_samples_per_epoch / flags.batch_size *
102 |                       flags.num_epochs_per_decay)
103 | 
104 |     if flags.learning_rate_decay_type == 'exponential':
105 |         return tf.train.exponential_decay(flags.learning_rate,
106 |                                           global_step,
107 |                                           decay_steps,
108 |                                           flags.learning_rate_decay_factor,
109 |                                           staircase=True,
110 |                                           name='exponential_decay_learning_rate')
111 |     elif flags.learning_rate_decay_type == 'fixed':
112 |         return tf.constant(flags.learning_rate, name='fixed_learning_rate')
113 |     elif flags.learning_rate_decay_type == 'polynomial':
114 |         return tf.train.polynomial_decay(flags.learning_rate,
115 |                                          global_step,
116 |                                          decay_steps,
117 |                                          flags.end_learning_rate,
118 |                                          power=1.0,
119 |                                          cycle=False,
120 |                                          name='polynomial_decay_learning_rate')
121 |     else:
122 |         raise ValueError('learning_rate_decay_type [%s] was not recognized',
123 |                          flags.learning_rate_decay_type)
124 | 
125 | 
126 | def configure_optimizer(flags, learning_rate):
127 |     """Configures the optimizer used for training.
128 | 
129 |     Args:
130 |       learning_rate: A scalar or `Tensor` learning rate.
131 |     Returns:
132 |       An instance of an optimizer.
133 |     """
134 |     if flags.optimizer == 'adadelta':
135 |         optimizer = tf.train.AdadeltaOptimizer(
136 |             learning_rate,
137 |             rho=flags.adadelta_rho,
138 |             epsilon=flags.opt_epsilon)
139 |     elif flags.optimizer == 'adagrad':
140 |         optimizer = tf.train.AdagradOptimizer(
141 |             learning_rate,
142 |             initial_accumulator_value=flags.adagrad_initial_accumulator_value)
143 |     elif flags.optimizer == 'adam':
144 |         optimizer = tf.train.AdamOptimizer(
145 |             learning_rate,
146 |             beta1=flags.adam_beta1,
147 |             beta2=flags.adam_beta2,
148 |             epsilon=flags.opt_epsilon)
149 |     elif flags.optimizer == 'ftrl':
150 |         optimizer = tf.train.FtrlOptimizer(
151 |             learning_rate,
152 |             learning_rate_power=flags.ftrl_learning_rate_power,
153 |             initial_accumulator_value=flags.ftrl_initial_accumulator_value,
154 |             l1_regularization_strength=flags.ftrl_l1,
155 |             l2_regularization_strength=flags.ftrl_l2)
156 |     elif flags.optimizer == 'momentum':
157 |         optimizer = tf.train.MomentumOptimizer(
158 |             learning_rate,
159 |             momentum=flags.momentum,
160 |             name='Momentum')
161 |     elif flags.optimizer == 'rmsprop':
162 |         optimizer = tf.train.RMSPropOptimizer(
163 |             learning_rate,
164 |             decay=flags.rmsprop_decay,
165 |             momentum=flags.rmsprop_momentum,
166 |             epsilon=flags.opt_epsilon)
167 |     elif flags.optimizer == 'sgd':
168 |         optimizer = tf.train.GradientDescentOptimizer(learning_rate)
169 |     else:
170 |         raise ValueError('Optimizer [%s] was not recognized', flags.optimizer)
171 |     return optimizer
172 | 
173 | 
174 | def add_variables_summaries(learning_rate):
175 |     summaries = []
176 |     for variable in slim.get_model_variables():
177 |         summaries.append(tf.summary.histogram(variable.op.name, variable))
178 |     summaries.append(tf.summary.scalar('training/Learning Rate', learning_rate))
179 |     return summaries
180 | 
181 | 
182 | def update_model_scope(var, ckpt_scope, new_scope):
183 |     return var.op.name.replace(new_scope,'vgg_16')
184 | 
185 | 
186 | def get_init_fn(flags):
187 |     """Returns a function run by the chief worker to warm-start the training.
188 |     Note that the init_fn is only run when initializing the model during the very
189 |     first global step.
190 | 
191 |     Returns:
192 |       An init function run by the supervisor.
193 |     """
194 |     if flags.checkpoint_path is None:
195 |         return None
196 |     # Warn the user if a checkpoint exists in the train_dir. Then ignore.
197 |     if tf.train.latest_checkpoint(flags.train_dir):
198 |         tf.logging.info(
199 |             'Ignoring --checkpoint_path because a checkpoint already exists in %s'
200 |             % flags.train_dir)
201 |         return None
202 | 
203 |     exclusions = []
204 |     if flags.checkpoint_exclude_scopes:
205 |         exclusions = [scope.strip()
206 |                       for scope in flags.checkpoint_exclude_scopes.split(',')]
207 | 
208 |     # TODO(sguada) variables.filter_variables()
209 |     variables_to_restore = []
210 |     for var in slim.get_model_variables():
211 |         excluded = False
212 |         for exclusion in exclusions:
213 |             if var.op.name.startswith(exclusion):
214 |                 excluded = True
215 |                 break
216 |         if not excluded:
217 |             variables_to_restore.append(var)
218 |     # Change model scope if necessary.
219 |     if flags.checkpoint_model_scope is not None:
220 |         variables_to_restore = \
221 |             {var.op.name.replace(flags.model_name,
222 |                                  flags.checkpoint_model_scope): var
223 |              for var in variables_to_restore}
224 | 
225 | 
226 |     if tf.gfile.IsDirectory(flags.checkpoint_path):
227 |         checkpoint_path = tf.train.latest_checkpoint(flags.checkpoint_path)
228 |     else:
229 |         checkpoint_path = flags.checkpoint_path
230 |     tf.logging.info('Fine-tuning from %s. Ignoring missing vars: %s' % (checkpoint_path, flags.ignore_missing_vars))
231 | 
232 |     return slim.assign_from_checkpoint_fn(
233 |         checkpoint_path,
234 |         variables_to_restore,
235 |         ignore_missing_vars=flags.ignore_missing_vars)
236 | 
237 | 
238 | def get_variables_to_train(flags):
239 |     """Returns a list of variables to train.
240 | 
241 |     Returns:
242 |       A list of variables to train by the optimizer.
243 |     """
244 |     if flags.trainable_scopes is None:
245 |         return tf.trainable_variables()
246 |     else:
247 |         scopes = [scope.strip() for scope in flags.trainable_scopes.split(',')]
248 | 
249 |     variables_to_train = []
250 |     for scope in scopes:
251 |         variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
252 |         variables_to_train.extend(variables)
253 |     return variables_to_train
254 | 
255 | 
256 | # =========================================================================== #
257 | # Evaluation utils.
258 | # =========================================================================== #
259 | 


--------------------------------------------------------------------------------