├── .gitignore ├── LICENSE ├── README.md ├── datasets ├── dataset_factory.py ├── dataset_utils.py ├── pascalvoc_2007.py ├── pascalvoc_2012.py ├── pascalvoc_common.py └── pascalvoc_to_tfrecords.py ├── deployment └── model_deploy.py ├── eval_ssd_network.py ├── nets ├── custom_layers.py ├── mobilenet_V2.py ├── nets_factory.py ├── ssd_300_mobilenet2.py ├── ssd_common.py └── ssd_vgg_300.py ├── preprocessing ├── preprocessing_factory.py ├── ssd_vgg_preprocessing.py └── tf_image.py ├── ssd_visualize.py ├── tf_convert_data.py ├── tf_extended ├── __init__.py ├── bboxes.py ├── image.py ├── math.py ├── metrics.py └── tensors.py ├── tf_utils.py ├── train.py └── visualization.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Fanbinqi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | SSD: Single Shot MultiBox Detector in TensorFlow 3 | ======= 4 | A Tensorflow implementation of [SSD](https://arxiv.org/abs/1512.02325) from the 2016 paper by Wei Liu. As a classical network framework of one-stage detectors, SSD are widely used. Our code is based on [balancap/SSD-Tensorflow](https://github.com/balancap/SSD-Tensorflow). The official and original Caffe code can be found in [Caffe](https://github.com/weiliu89/caffe/tree/ssd). 5 | 6 | DATASET 7 | ------- 8 | 9 | You can edit the data and path information yourself in the `tf_convert_data.py` file, then run `python tf_convert_data.py`
10 | Note the previous command generated a collection of TF-Records instead of a single file in order to ease shuffling during training.
11 | 12 | 13 | Pre-trained model 14 | ------------------------------- 15 | SSD300 trained on VOC0712[balancap/SSD-Tensorflow](https://github.com/balancap/SSD-Tensorflow) 16 | 17 | Train 18 | --------- 19 | `python train.py` You can track your training on the tensorboard real time
20 | In the CITY data set, single-class car have reached the 84% mAP 21 | 22 | In addition 23 | ------- 24 | We implemented *Mobilenet2-SSD*, you can change framework in `nets/ssd_300_mobilenet2.py` Mobilenet-v2 is an improved version of Mobilenet, but we found that it's not a big improvement for detection. 25 | 26 | Modified Network 27 | --------------------- 28 | There are two improved network structures for SSD, [CEBNet](https://github.com/dlyldxwl/CEBNet) ICME2019, and [FFBNet](https://github.com/fanbinqi/FFBNet) ICIP2019. 29 | -------------------------------------------------------------------------------- /datasets/dataset_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from datasets import pascalvoc_2007 6 | from datasets import pascalvoc_2012 7 | 8 | datasets_map = { 9 | 'pascalvoc_2007' : pascalvoc_2007, 10 | 'pascalvoc_2012' : pascalvoc_2012, 11 | } 12 | 13 | def get_dataset(name, split_name, dataset_dir, file_pattern = None, reader = None): 14 | """Given a dataset name and a split_name returns a Dataset. 15 | 16 | Args: 17 | name: String, the name of the dataset. 18 | split_name: A train/test split name. 19 | dataset_dir: The directory where the dataset files are stored. 20 | file_pattern: The file pattern to use for matching the dataset source files. 21 | reader: The subclass of tf.ReaderBase. If left as `None`, then the default 22 | reader defined by each dataset is used. 23 | Returns: 24 | A `Dataset` class. 25 | Raises: 26 | ValueError: If the dataset `name` is unknown. 27 | """ 28 | if name not in datasets_map: 29 | raise ValueError('Name of dataset unknow %s' % name) 30 | return datasets_map[name].get_split(split_name, 31 | dataset_dir, 32 | file_pattern, 33 | reader) -------------------------------------------------------------------------------- /datasets/dataset_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains utilities for downloading and converting datasets.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import os 21 | import sys 22 | import tarfile 23 | 24 | from six.moves import urllib 25 | import tensorflow as tf 26 | 27 | LABELS_FILENAME = 'labels.txt' 28 | 29 | 30 | def int64_feature(value): 31 | """Wrapper for inserting int64 features into Example proto. 32 | """ 33 | if not isinstance(value, list): 34 | value = [value] 35 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) 36 | 37 | 38 | def float_feature(value): 39 | """Wrapper for inserting float features into Example proto. 40 | """ 41 | if not isinstance(value, list): 42 | value = [value] 43 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 44 | 45 | 46 | def bytes_feature(value): 47 | """Wrapper for inserting bytes features into Example proto. 48 | """ 49 | if not isinstance(value, list): 50 | value = [value] 51 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) 52 | 53 | 54 | def image_to_tfexample(image_data, image_format, height, width, class_id): 55 | return tf.train.Example(features=tf.train.Features(feature={ 56 | 'image/encoded': bytes_feature(image_data), 57 | 'image/format': bytes_feature(image_format), 58 | 'image/class/label': int64_feature(class_id), 59 | 'image/height': int64_feature(height), 60 | 'image/width': int64_feature(width), 61 | })) 62 | 63 | 64 | def download_and_uncompress_tarball(tarball_url, dataset_dir): 65 | """Downloads the `tarball_url` and uncompresses it locally. 66 | 67 | Args: 68 | tarball_url: The URL of a tarball file. 69 | dataset_dir: The directory where the temporary files are stored. 70 | """ 71 | filename = tarball_url.split('/')[-1] 72 | filepath = os.path.join(dataset_dir, filename) 73 | 74 | def _progress(count, block_size, total_size): 75 | sys.stdout.write('\r>> Downloading %s %.1f%%' % ( 76 | filename, float(count * block_size) / float(total_size) * 100.0)) 77 | sys.stdout.flush() 78 | filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress) 79 | print() 80 | statinfo = os.stat(filepath) 81 | print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') 82 | tarfile.open(filepath, 'r:gz').extractall(dataset_dir) 83 | 84 | 85 | def write_label_file(labels_to_class_names, dataset_dir, 86 | filename=LABELS_FILENAME): 87 | """Writes a file with the list of class names. 88 | 89 | Args: 90 | labels_to_class_names: A map of (integer) labels to class names. 91 | dataset_dir: The directory in which the labels file should be written. 92 | filename: The filename where the class names are written. 93 | """ 94 | labels_filename = os.path.join(dataset_dir, filename) 95 | with tf.gfile.Open(labels_filename, 'w') as f: 96 | for label in labels_to_class_names: 97 | class_name = labels_to_class_names[label] 98 | f.write('%d:%s\n' % (label, class_name)) 99 | 100 | 101 | def has_labels(dataset_dir, filename=LABELS_FILENAME): 102 | """Specifies whether or not the dataset directory contains a label map file. 103 | 104 | Args: 105 | dataset_dir: The directory in which the labels file is found. 106 | filename: The filename where the class names are written. 107 | 108 | Returns: 109 | `True` if the labels file exists and `False` otherwise. 110 | """ 111 | return tf.gfile.Exists(os.path.join(dataset_dir, filename)) 112 | 113 | 114 | def read_label_file(dataset_dir, filename=LABELS_FILENAME): 115 | """Reads the labels file and returns a mapping from ID to class name. 116 | 117 | Args: 118 | dataset_dir: The directory in which the labels file is found. 119 | filename: The filename where the class names are written. 120 | 121 | Returns: 122 | A map from a label (integer) to class name. 123 | """ 124 | labels_filename = os.path.join(dataset_dir, filename) 125 | with tf.gfile.Open(labels_filename, 'rb') as f: 126 | lines = f.read() 127 | lines = lines.split(b'\n') 128 | lines = filter(None, lines) 129 | 130 | labels_to_class_names = {} 131 | for line in lines: 132 | index = line.index(b':') 133 | labels_to_class_names[int(line[:index])] = line[index+1:] 134 | return labels_to_class_names 135 | -------------------------------------------------------------------------------- /datasets/pascalvoc_2007.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Paul Balanca. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Provides data for the Pascal VOC Dataset (images + annotations). 16 | """ 17 | import tensorflow as tf 18 | from datasets import pascalvoc_common 19 | 20 | slim = tf.contrib.slim 21 | 22 | FILE_PATTERN = 'voc_2007_%s_*.tfrecord' 23 | ITEMS_TO_DESCRIPTIONS = { 24 | 'image': 'A color image of varying height and width.', 25 | 'shape': 'Shape of the image', 26 | 'object/bbox': 'A list of bounding boxes, one per each object.', 27 | 'object/label': 'A list of labels, one per each object.', 28 | } 29 | # (Images, Objects) statistics on every class. 30 | TRAIN_STATISTICS = { 31 | 'none': (0, 0), 32 | 'aeroplane': (238, 306), 33 | 'bicycle': (243, 353), 34 | 'bird': (330, 486), 35 | 'boat': (181, 290), 36 | 'bottle': (244, 505), 37 | 'bus': (186, 229), 38 | 'car': (713, 1250), 39 | 'cat': (337, 376), 40 | 'chair': (445, 798), 41 | 'cow': (141, 259), 42 | 'diningtable': (200, 215), 43 | 'dog': (421, 510), 44 | 'horse': (287, 362), 45 | 'motorbike': (245, 339), 46 | 'person': (2008, 4690), 47 | 'pottedplant': (245, 514), 48 | 'sheep': (96, 257), 49 | 'sofa': (229, 248), 50 | 'train': (261, 297), 51 | 'tvmonitor': (256, 324), 52 | 'total': (5011, 12608), 53 | } 54 | TEST_STATISTICS = { 55 | 'none': (0, 0), 56 | 'aeroplane': (1, 1), 57 | 'bicycle': (1, 1), 58 | 'bird': (1, 1), 59 | 'boat': (1, 1), 60 | 'bottle': (1, 1), 61 | 'bus': (1, 1), 62 | 'car': (1, 1), 63 | 'cat': (1, 1), 64 | 'chair': (1, 1), 65 | 'cow': (1, 1), 66 | 'diningtable': (1, 1), 67 | 'dog': (1, 1), 68 | 'horse': (1, 1), 69 | 'motorbike': (1, 1), 70 | 'person': (1, 1), 71 | 'pottedplant': (1, 1), 72 | 'sheep': (1, 1), 73 | 'sofa': (1, 1), 74 | 'train': (1, 1), 75 | 'tvmonitor': (1, 1), 76 | 'total': (20, 20), 77 | } 78 | SPLITS_TO_SIZES = { 79 | 'train': 5011, 80 | 'test': 4952, 81 | } 82 | SPLITS_TO_STATISTICS = { 83 | 'train': TRAIN_STATISTICS, 84 | 'test': TEST_STATISTICS, 85 | } 86 | NUM_CLASSES = 20 87 | 88 | 89 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None): 90 | """Gets a dataset tuple with instructions for reading ImageNet. 91 | 92 | Args: 93 | split_name: A train/test split name. 94 | dataset_dir: The base directory of the dataset sources. 95 | file_pattern: The file pattern to use when matching the dataset sources. 96 | It is assumed that the pattern contains a '%s' string so that the split 97 | name can be inserted. 98 | reader: The TensorFlow reader type. 99 | 100 | Returns: 101 | A `Dataset` namedtuple. 102 | 103 | Raises: 104 | ValueError: if `split_name` is not a valid train/test split. 105 | """ 106 | if not file_pattern: 107 | file_pattern = FILE_PATTERN 108 | return pascalvoc_common.get_split(split_name, dataset_dir, 109 | file_pattern, reader, 110 | SPLITS_TO_SIZES, 111 | ITEMS_TO_DESCRIPTIONS, 112 | NUM_CLASSES) 113 | -------------------------------------------------------------------------------- /datasets/pascalvoc_2012.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Paul Balanca. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Provides data for the Pascal VOC Dataset (images + annotations). 16 | """ 17 | import tensorflow as tf 18 | from datasets import pascalvoc_common 19 | 20 | slim = tf.contrib.slim 21 | 22 | FILE_PATTERN = 'voc_2012_%s_*.tfrecord' 23 | ITEMS_TO_DESCRIPTIONS = { 24 | 'image': 'A color image of varying height and width.', 25 | 'shape': 'Shape of the image', 26 | 'object/bbox': 'A list of bounding boxes, one per each object.', 27 | 'object/label': 'A list of labels, one per each object.', 28 | } 29 | # (Images, Objects) statistics on every class. 30 | TRAIN_STATISTICS = { 31 | 'none': (0, 0), 32 | 'aeroplane': (670, 865), 33 | 'bicycle': (552, 711), 34 | 'bird': (765, 1119), 35 | 'boat': (508, 850), 36 | 'bottle': (706, 1259), 37 | 'bus': (421, 593), 38 | 'car': (1161, 2017), 39 | 'cat': (1080, 1217), 40 | 'chair': (1119, 2354), 41 | 'cow': (303, 588), 42 | 'diningtable': (538, 609), 43 | 'dog': (1286, 1515), 44 | 'horse': (482, 710), 45 | 'motorbike': (526, 713), 46 | 'person': (4087, 8566), 47 | 'pottedplant': (527, 973), 48 | 'sheep': (325, 813), 49 | 'sofa': (507, 566), 50 | 'train': (544, 628), 51 | 'tvmonitor': (575, 784), 52 | 'total': (11540, 27450), 53 | } 54 | SPLITS_TO_SIZES = { 55 | 'train': 17125, 56 | } 57 | SPLITS_TO_STATISTICS = { 58 | 'train': TRAIN_STATISTICS, 59 | } 60 | NUM_CLASSES = 20 61 | 62 | 63 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None): 64 | """Gets a dataset tuple with instructions for reading ImageNet. 65 | 66 | Args: 67 | split_name: A train/test split name. 68 | dataset_dir: The base directory of the dataset sources. 69 | file_pattern: The file pattern to use when matching the dataset sources. 70 | It is assumed that the pattern contains a '%s' string so that the split 71 | name can be inserted. 72 | reader: The TensorFlow reader type. 73 | 74 | Returns: 75 | A `Dataset` namedtuple. 76 | 77 | Raises: 78 | ValueError: if `split_name` is not a valid train/test split. 79 | """ 80 | if not file_pattern: 81 | file_pattern = FILE_PATTERN 82 | return pascalvoc_common.get_split(split_name, dataset_dir, 83 | file_pattern, reader, 84 | SPLITS_TO_SIZES, 85 | ITEMS_TO_DESCRIPTIONS, 86 | NUM_CLASSES) 87 | 88 | -------------------------------------------------------------------------------- /datasets/pascalvoc_common.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Paul Balanca. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Provides data for the Pascal VOC Dataset (images + annotations). 16 | """ 17 | import os 18 | 19 | import tensorflow as tf 20 | from datasets import dataset_utils 21 | 22 | slim = tf.contrib.slim 23 | 24 | VOC_LABELS = { 25 | 'none': (0, 'Background'), 26 | 'aeroplane': (1, 'Vehicle'), 27 | 'bicycle': (2, 'Vehicle'), 28 | 'bird': (3, 'Animal'), 29 | 'boat': (4, 'Vehicle'), 30 | 'bottle': (5, 'Indoor'), 31 | 'bus': (6, 'Vehicle'), 32 | 'car': (7, 'Vehicle'), 33 | 'cat': (8, 'Animal'), 34 | 'chair': (9, 'Indoor'), 35 | 'cow': (10, 'Animal'), 36 | 'diningtable': (11, 'Indoor'), 37 | 'dog': (12, 'Animal'), 38 | 'horse': (13, 'Animal'), 39 | 'motorbike': (14, 'Vehicle'), 40 | 'person': (15, 'Person'), 41 | 'pottedplant': (16, 'Indoor'), 42 | 'sheep': (17, 'Animal'), 43 | 'sofa': (18, 'Indoor'), 44 | 'train': (19, 'Vehicle'), 45 | 'tvmonitor': (20, 'Indoor'), 46 | } 47 | 48 | 49 | def get_split(split_name, dataset_dir, file_pattern, reader, 50 | split_to_sizes, items_to_descriptions, num_classes): 51 | """Gets a dataset tuple with instructions for reading Pascal VOC dataset. 52 | 53 | Args: 54 | split_name: A train/test split name. 55 | dataset_dir: The base directory of the dataset sources. 56 | file_pattern: The file pattern to use when matching the dataset sources. 57 | It is assumed that the pattern contains a '%s' string so that the split 58 | name can be inserted. 59 | reader: The TensorFlow reader type. 60 | 61 | Returns: 62 | A `Dataset` namedtuple. 63 | 64 | Raises: 65 | ValueError: if `split_name` is not a valid train/test split. 66 | """ 67 | if split_name not in split_to_sizes: 68 | raise ValueError('split name %s was not recognized.' % split_name) 69 | file_pattern = os.path.join(dataset_dir, file_pattern % split_name) 70 | 71 | # Allowing None in the signature so that dataset_factory can use the default. 72 | if reader is None: 73 | reader = tf.TFRecordReader 74 | # Features in Pascal VOC TFRecords. 75 | keys_to_features = { 76 | 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 77 | 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 78 | 'image/height': tf.FixedLenFeature([1], tf.int64), 79 | 'image/width': tf.FixedLenFeature([1], tf.int64), 80 | 'image/channels': tf.FixedLenFeature([1], tf.int64), 81 | 'image/shape': tf.FixedLenFeature([3], tf.int64), 82 | 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 83 | 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 84 | 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 85 | 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 86 | 'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64), 87 | 'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64), 88 | 'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64), 89 | } 90 | items_to_handlers = { 91 | 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), 92 | 'shape': slim.tfexample_decoder.Tensor('image/shape'), 93 | 'object/bbox': slim.tfexample_decoder.BoundingBox( 94 | ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), 95 | 'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'), 96 | 'object/difficult': slim.tfexample_decoder.Tensor('image/object/bbox/difficult'), 97 | 'object/truncated': slim.tfexample_decoder.Tensor('image/object/bbox/truncated'), 98 | } 99 | decoder = slim.tfexample_decoder.TFExampleDecoder( 100 | keys_to_features, items_to_handlers) 101 | 102 | labels_to_names = None 103 | if dataset_utils.has_labels(dataset_dir): 104 | labels_to_names = dataset_utils.read_label_file(dataset_dir) 105 | # else: 106 | # labels_to_names = create_readable_names_for_imagenet_labels() 107 | # dataset_utils.write_label_file(labels_to_names, dataset_dir) 108 | 109 | return slim.dataset.Dataset( 110 | data_sources=file_pattern, 111 | reader=reader, 112 | decoder=decoder, 113 | num_samples=split_to_sizes[split_name], 114 | items_to_descriptions=items_to_descriptions, 115 | num_classes=num_classes, 116 | labels_to_names=labels_to_names) 117 | -------------------------------------------------------------------------------- /datasets/pascalvoc_to_tfrecords.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Paul Balanca. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Converts Pascal VOC data to TFRecords file format with Example protos. 16 | 17 | The raw Pascal VOC data set is expected to reside in JPEG files located in the 18 | directory 'JPEGImages'. Similarly, bounding box annotations are supposed to be 19 | stored in the 'Annotation directory' 20 | 21 | This TensorFlow script converts the training and evaluation data into 22 | a sharded data set consisting of 1024 and 128 TFRecord files, respectively. 23 | 24 | Each validation TFRecord file contains ~500 records. Each training TFREcord 25 | file contains ~1000 records. Each record within the TFRecord file is a 26 | serialized Example proto. The Example proto contains the following fields: 27 | 28 | image/encoded: string containing JPEG encoded image in RGB colorspace 29 | image/height: integer, image height in pixels 30 | image/width: integer, image width in pixels 31 | image/channels: integer, specifying the number of channels, always 3 32 | image/format: string, specifying the format, always'JPEG' 33 | 34 | 35 | image/object/bbox/xmin: list of float specifying the 0+ human annotated 36 | bounding boxes 37 | image/object/bbox/xmax: list of float specifying the 0+ human annotated 38 | bounding boxes 39 | image/object/bbox/ymin: list of float specifying the 0+ human annotated 40 | bounding boxes 41 | image/object/bbox/ymax: list of float specifying the 0+ human annotated 42 | bounding boxes 43 | image/object/bbox/label: list of integer specifying the classification index. 44 | image/object/bbox/label_text: list of string descriptions. 45 | 46 | Note that the length of xmin is identical to the length of xmax, ymin and ymax 47 | for each example. 48 | """ 49 | import os 50 | import sys 51 | import random 52 | 53 | import numpy as np 54 | import tensorflow as tf 55 | 56 | import xml.etree.ElementTree as ET 57 | 58 | from datasets.dataset_utils import int64_feature, float_feature, bytes_feature 59 | from datasets.pascalvoc_common import VOC_LABELS 60 | 61 | # Original dataset organisation. 62 | DIRECTORY_ANNOTATIONS = 'Annotations/' 63 | DIRECTORY_IMAGES = 'JPEGImages/' 64 | 65 | # TFRecords convertion parameters. 66 | RANDOM_SEED = 4242 67 | SAMPLES_PER_FILES = 200 68 | 69 | 70 | def _process_image(directory, name): 71 | """Process a image and annotation file. 72 | 73 | Args: 74 | filename: string, path to an image file e.g., '/path/to/example.JPG'. 75 | coder: instance of ImageCoder to provide TensorFlow image coding utils. 76 | Returns: 77 | image_buffer: string, JPEG encoding of RGB image. 78 | height: integer, image height in pixels. 79 | width: integer, image width in pixels. 80 | """ 81 | # Read the image file. 82 | filename = directory + DIRECTORY_IMAGES + name + '.jpg' 83 | image_data = tf.gfile.FastGFile(filename, 'r').read() 84 | 85 | # Read the XML annotation file. 86 | filename = os.path.join(directory, DIRECTORY_ANNOTATIONS, name + '.xml') 87 | tree = ET.parse(filename) 88 | root = tree.getroot() 89 | 90 | # Image shape. 91 | size = root.find('size') 92 | shape = [int(size.find('height').text), 93 | int(size.find('width').text), 94 | int(size.find('depth').text)] 95 | # Find annotations. 96 | bboxes = [] 97 | labels = [] 98 | labels_text = [] 99 | difficult = [] 100 | truncated = [] 101 | for obj in root.findall('object'): 102 | label = obj.find('name').text 103 | labels.append(int(VOC_LABELS[label][0])) 104 | labels_text.append(label.encode('ascii')) 105 | 106 | if obj.find('difficult'): 107 | difficult.append(int(obj.find('difficult').text)) 108 | else: 109 | difficult.append(0) 110 | if obj.find('truncated'): 111 | truncated.append(int(obj.find('truncated').text)) 112 | else: 113 | truncated.append(0) 114 | 115 | bbox = obj.find('bndbox') 116 | bboxes.append((float(bbox.find('ymin').text) / shape[0], 117 | float(bbox.find('xmin').text) / shape[1], 118 | float(bbox.find('ymax').text) / shape[0], 119 | float(bbox.find('xmax').text) / shape[1] 120 | )) 121 | return image_data, shape, bboxes, labels, labels_text, difficult, truncated 122 | 123 | 124 | def _convert_to_example(image_data, labels, labels_text, bboxes, shape, 125 | difficult, truncated): 126 | """Build an Example proto for an image example. 127 | 128 | Args: 129 | image_data: string, JPEG encoding of RGB image; 130 | labels: list of integers, identifier for the ground truth; 131 | labels_text: list of strings, human-readable labels; 132 | bboxes: list of bounding boxes; each box is a list of integers; 133 | specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong 134 | to the same label as the image label. 135 | shape: 3 integers, image shapes in pixels. 136 | Returns: 137 | Example proto 138 | """ 139 | xmin = [] 140 | ymin = [] 141 | xmax = [] 142 | ymax = [] 143 | for b in bboxes: 144 | assert len(b) == 4 145 | # pylint: disable=expression-not-assigned 146 | [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] 147 | # pylint: enable=expression-not-assigned 148 | 149 | image_format = b'JPEG' 150 | example = tf.train.Example(features=tf.train.Features(feature={ 151 | 'image/height': int64_feature(shape[0]), 152 | 'image/width': int64_feature(shape[1]), 153 | 'image/channels': int64_feature(shape[2]), 154 | 'image/shape': int64_feature(shape), 155 | 'image/object/bbox/xmin': float_feature(xmin), 156 | 'image/object/bbox/xmax': float_feature(xmax), 157 | 'image/object/bbox/ymin': float_feature(ymin), 158 | 'image/object/bbox/ymax': float_feature(ymax), 159 | 'image/object/bbox/label': int64_feature(labels), 160 | 'image/object/bbox/label_text': bytes_feature(labels_text), 161 | 'image/object/bbox/difficult': int64_feature(difficult), 162 | 'image/object/bbox/truncated': int64_feature(truncated), 163 | 'image/format': bytes_feature(image_format), 164 | 'image/encoded': bytes_feature(image_data)})) 165 | return example 166 | 167 | 168 | def _add_to_tfrecord(dataset_dir, name, tfrecord_writer): 169 | """Loads data from image and annotations files and add them to a TFRecord. 170 | 171 | Args: 172 | dataset_dir: Dataset directory; 173 | name: Image name to add to the TFRecord; 174 | tfrecord_writer: The TFRecord writer to use for writing. 175 | """ 176 | image_data, shape, bboxes, labels, labels_text, difficult, truncated = \ 177 | _process_image(dataset_dir, name) 178 | example = _convert_to_example(image_data, labels, labels_text, 179 | bboxes, shape, difficult, truncated) 180 | tfrecord_writer.write(example.SerializeToString()) 181 | 182 | 183 | def _get_output_filename(output_dir, name, idx): 184 | return '%s/%s_%03d.tfrecord' % (output_dir, name, idx) 185 | 186 | 187 | def run(dataset_dir, output_dir, name='voc_train', shuffling=False): 188 | """Runs the conversion operation. 189 | 190 | Args: 191 | dataset_dir: The dataset directory where the dataset is stored. 192 | output_dir: Output directory. 193 | """ 194 | if not tf.gfile.Exists(dataset_dir): #判断路径是否存在 195 | tf.gfile.MakeDirs(dataset_dir) #创建一个目录 196 | 197 | # Dataset filenames, and shuffling. 198 | path = os.path.join(dataset_dir, DIRECTORY_ANNOTATIONS) 199 | filenames = sorted(os.listdir(path)) #sorted是排序函数 200 | if shuffling: 201 | random.seed(RANDOM_SEED) 202 | random.shuffle(filenames) 203 | 204 | # Process dataset files. 205 | i = 0 206 | fidx = 0 207 | while i < len(filenames): 208 | # Open new TFRecord file. 209 | tf_filename = _get_output_filename(output_dir, name, fidx) 210 | with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer: #tf.python_io模块是tensorflow用来处理tfrecords文件的接口,TFRecordWriter是将记录写入TFRecords文件的类 211 | j = 0 212 | while i < len(filenames) and j < SAMPLES_PER_FILES: 213 | sys.stdout.write('\r>> Converting image %d/%d' % (i+1, len(filenames))) 214 | sys.stdout.flush()# 强制刷新缓冲区 这两行不会生成多行报告 而是在一行不断刷新 215 | 216 | filename = filenames[i] 217 | img_name = filename[:-4] 218 | _add_to_tfrecord(dataset_dir, img_name, tfrecord_writer) 219 | i += 1 220 | j += 1 221 | fidx += 1 222 | 223 | # Finally, write the labels file: 224 | # labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES)) 225 | # dataset_utils.write_label_file(labels_to_class_names, dataset_dir) 226 | print('\nFinished converting the Pascal VOC dataset!') 227 | -------------------------------------------------------------------------------- /eval_ssd_network.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Generic evaluation script that evaluates a SSD model 16 | on a given dataset.""" 17 | import math 18 | import sys 19 | import six 20 | import time 21 | 22 | import numpy as np 23 | import tensorflow as tf 24 | import tf_extended as tfe 25 | import tf_utils 26 | from tensorflow.python.framework import ops 27 | 28 | from datasets import dataset_factory 29 | from nets import nets_factory 30 | from preprocessing import preprocessing_factory 31 | 32 | slim = tf.contrib.slim 33 | 34 | # =========================================================================== # 35 | # Some default EVAL parameters 36 | # =========================================================================== # 37 | # List of recalls values at which precision is evaluated. 38 | LIST_RECALLS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.85, 39 | 0.90, 0.95, 0.96, 0.97, 0.98, 0.99] 40 | DATA_FORMAT = 'NHWC' 41 | 42 | # =========================================================================== # 43 | # SSD evaluation Flags. 44 | # =========================================================================== # 45 | tf.app.flags.DEFINE_float( 46 | 'select_threshold', 0.01, 'Selection threshold.') 47 | tf.app.flags.DEFINE_integer( 48 | 'select_top_k', 400, 'Select top-k detected bounding boxes.') 49 | tf.app.flags.DEFINE_integer( 50 | 'keep_top_k', 200, 'Keep top-k detected objects.') 51 | tf.app.flags.DEFINE_float( 52 | 'nms_threshold', 0.45, 'Non-Maximum Selection threshold.') 53 | tf.app.flags.DEFINE_float( 54 | 'matching_threshold', 0.5, 'Matching threshold with groundtruth objects.') 55 | tf.app.flags.DEFINE_integer( 56 | 'eval_resize', 4, 'Image resizing: None / CENTRAL_CROP / PAD_AND_RESIZE / WARP_RESIZE.') 57 | tf.app.flags.DEFINE_integer( 58 | 'eval_image_size', None, 'Eval image size.') 59 | tf.app.flags.DEFINE_boolean( 60 | 'remove_difficult', True, 'Remove difficult objects from evaluation.') 61 | 62 | # =========================================================================== # 63 | # Main evaluation flags. 64 | # =========================================================================== # 65 | tf.app.flags.DEFINE_integer( 66 | 'num_classes', 21, 'Number of classes to use in the dataset.') 67 | tf.app.flags.DEFINE_integer( 68 | 'batch_size', 1, 'The number of samples in each batch.') 69 | tf.app.flags.DEFINE_integer( 70 | 'max_num_batches', None, 71 | 'Max number of batches to evaluate by default use all.') 72 | tf.app.flags.DEFINE_string( 73 | 'master', '', 'The address of the TensorFlow master to use.') 74 | tf.app.flags.DEFINE_string( 75 | 'checkpoint_path', '/tmp/tfmodel/', 76 | 'The directory where the model was written to or an absolute path to a ' 77 | 'checkpoint file.') 78 | tf.app.flags.DEFINE_string( 79 | 'eval_dir', '/tmp/tfmodel/', 'Directory where the results are saved to.') 80 | tf.app.flags.DEFINE_integer( 81 | 'num_preprocessing_threads', 4, 82 | 'The number of threads used to create the batches.') 83 | tf.app.flags.DEFINE_string( 84 | 'dataset_name', 'imagenet', 'The name of the dataset to load.') 85 | tf.app.flags.DEFINE_string( 86 | 'dataset_split_name', 'test', 'The name of the train/test split.') 87 | tf.app.flags.DEFINE_string( 88 | 'dataset_dir', None, 'The directory where the dataset files are stored.') 89 | tf.app.flags.DEFINE_string( 90 | 'model_name', 'inception_v3', 'The name of the architecture to evaluate.') 91 | tf.app.flags.DEFINE_string( 92 | 'preprocessing_name', None, 'The name of the preprocessing to use. If left ' 93 | 'as `None`, then the model_name flag is used.') 94 | tf.app.flags.DEFINE_float( 95 | 'moving_average_decay', None, 96 | 'The decay to use for the moving average.' 97 | 'If left as None, then moving averages are not used.') 98 | tf.app.flags.DEFINE_float( 99 | 'gpu_memory_fraction', 0.1, 'GPU memory fraction to use.') 100 | tf.app.flags.DEFINE_boolean( 101 | 'wait_for_checkpoints', False, 'Wait for new checkpoints in the eval loop.') 102 | 103 | 104 | FLAGS = tf.app.flags.FLAGS 105 | 106 | 107 | def main(_): 108 | if not FLAGS.dataset_dir: 109 | raise ValueError('You must supply the dataset directory with --dataset_dir') 110 | 111 | tf.logging.set_verbosity(tf.logging.INFO) 112 | with tf.Graph().as_default(): 113 | tf_global_step = slim.get_or_create_global_step() 114 | 115 | # =================================================================== # 116 | # Dataset + SSD model + Pre-processing 117 | # =================================================================== # 118 | dataset = dataset_factory.get_dataset( 119 | FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) 120 | 121 | # Get the SSD network and its anchors. 122 | ssd_class = nets_factory.get_network(FLAGS.model_name) 123 | ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes) 124 | ssd_net = ssd_class(ssd_params) 125 | 126 | # Evaluation shape and associated anchors: eval_image_size 127 | ssd_shape = ssd_net.params.img_shape 128 | ssd_anchors = ssd_net.anchors(ssd_shape) 129 | 130 | # Select the preprocessing function. 131 | preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name 132 | image_preprocessing_fn = preprocessing_factory.get_preprocessing( 133 | preprocessing_name, is_training=False) 134 | 135 | tf_utils.print_configuration(FLAGS.__flags, ssd_params, 136 | dataset.data_sources, FLAGS.eval_dir) 137 | # =================================================================== # 138 | # Create a dataset provider and batches. 139 | # =================================================================== # 140 | with tf.device('/cpu:0'): 141 | with tf.name_scope(FLAGS.dataset_name + '_data_provider'): 142 | provider = slim.dataset_data_provider.DatasetDataProvider( 143 | dataset, 144 | common_queue_capacity=2 * FLAGS.batch_size, 145 | common_queue_min=FLAGS.batch_size, 146 | shuffle=False) 147 | # Get for SSD network: image, labels, bboxes. 148 | [image, shape, glabels, gbboxes] = provider.get(['image', 'shape', 149 | 'object/label', 150 | 'object/bbox']) 151 | if FLAGS.remove_difficult: 152 | [gdifficults] = provider.get(['object/difficult']) 153 | else: 154 | gdifficults = tf.zeros(tf.shape(glabels), dtype=tf.int64) 155 | 156 | # Pre-processing image, labels and bboxes. 157 | image, glabels, gbboxes, gbbox_img = \ 158 | image_preprocessing_fn(image, glabels, gbboxes, 159 | out_shape=ssd_shape, 160 | data_format=DATA_FORMAT, 161 | resize=FLAGS.eval_resize, 162 | difficults=None) 163 | 164 | # Encode groundtruth labels and bboxes. 165 | gclasses, glocalisations, gscores = \ 166 | ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors) 167 | batch_shape = [1] * 5 + [len(ssd_anchors)] * 3 168 | 169 | # Evaluation batch. 170 | r = tf.train.batch( 171 | tf_utils.reshape_list([image, glabels, gbboxes, gdifficults, gbbox_img, 172 | gclasses, glocalisations, gscores]), 173 | batch_size=FLAGS.batch_size, 174 | num_threads=FLAGS.num_preprocessing_threads, 175 | capacity=5 * FLAGS.batch_size, 176 | dynamic_pad=True) 177 | (b_image, b_glabels, b_gbboxes, b_gdifficults, b_gbbox_img, b_gclasses, 178 | b_glocalisations, b_gscores) = tf_utils.reshape_list(r, batch_shape) 179 | 180 | # =================================================================== # 181 | # SSD Network + Ouputs decoding. 182 | # =================================================================== # 183 | dict_metrics = {} 184 | arg_scope = ssd_net.arg_scope(data_format=DATA_FORMAT) 185 | with slim.arg_scope(arg_scope): 186 | predictions, localisations, logits, end_points = \ 187 | ssd_net.net(b_image, is_training=False) 188 | # Add losses functions. 189 | ssd_net.losses(logits, localisations, 190 | b_gclasses, b_glocalisations, b_gscores) 191 | 192 | # Performing post-processing on CPU: loop-intensive, usually more efficient. 193 | with tf.device('/device:CPU:0'): 194 | # Detected objects from SSD output. 195 | localisations = ssd_net.bboxes_decode(localisations, ssd_anchors) 196 | rscores, rbboxes = \ 197 | ssd_net.detected_bboxes(predictions, localisations, 198 | select_threshold=FLAGS.select_threshold, 199 | nms_threshold=FLAGS.nms_threshold, 200 | clipping_bbox=None, 201 | top_k=FLAGS.select_top_k, 202 | keep_top_k=FLAGS.keep_top_k) 203 | # Compute TP and FP statistics. 204 | num_gbboxes, tp, fp, rscores = \ 205 | tfe.bboxes_matching_batch(rscores.keys(), rscores, rbboxes, 206 | b_glabels, b_gbboxes, b_gdifficults, 207 | matching_threshold=FLAGS.matching_threshold) 208 | 209 | # Variables to restore: moving avg. or normal weights. 210 | if FLAGS.moving_average_decay: 211 | variable_averages = tf.train.ExponentialMovingAverage( 212 | FLAGS.moving_average_decay, tf_global_step) 213 | variables_to_restore = variable_averages.variables_to_restore( 214 | slim.get_model_variables()) 215 | variables_to_restore[tf_global_step.op.name] = tf_global_step 216 | else: 217 | variables_to_restore = slim.get_variables_to_restore() 218 | 219 | # =================================================================== # 220 | # Evaluation metrics. 221 | # =================================================================== # 222 | with tf.device('/device:CPU:0'): 223 | dict_metrics = {} 224 | # First add all losses. 225 | for loss in tf.get_collection(tf.GraphKeys.LOSSES): 226 | dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss) 227 | # Extra losses as well. 228 | for loss in tf.get_collection('EXTRA_LOSSES'): 229 | dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss) 230 | 231 | # Add metrics to summaries and Print on screen. 232 | for name, metric in dict_metrics.items(): 233 | # summary_name = 'eval/%s' % name 234 | summary_name = name 235 | op = tf.summary.scalar(summary_name, metric[0], collections=[]) 236 | # op = tf.Print(op, [metric[0]], summary_name) 237 | tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) 238 | 239 | # FP and TP metrics. 240 | tp_fp_metric = tfe.streaming_tp_fp_arrays(num_gbboxes, tp, fp, rscores) 241 | for c in tp_fp_metric[0].keys(): 242 | dict_metrics['tp_fp_%s' % c] = (tp_fp_metric[0][c], 243 | tp_fp_metric[1][c]) 244 | 245 | # Add to summaries precision/recall values. 246 | aps_voc07 = {} 247 | aps_voc12 = {} 248 | for c in tp_fp_metric[0].keys(): 249 | # Precison and recall values. 250 | prec, rec = tfe.precision_recall(*tp_fp_metric[0][c]) 251 | 252 | # Average precision VOC07. 253 | v = tfe.average_precision_voc07(prec, rec) 254 | summary_name = 'AP_VOC07/%s' % c 255 | op = tf.summary.scalar(summary_name, v, collections=[]) 256 | # op = tf.Print(op, [v], summary_name) 257 | tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) 258 | aps_voc07[c] = v 259 | 260 | # Average precision VOC12. 261 | v = tfe.average_precision_voc12(prec, rec) 262 | summary_name = 'AP_VOC12/%s' % c 263 | op = tf.summary.scalar(summary_name, v, collections=[]) 264 | # op = tf.Print(op, [v], summary_name) 265 | tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) 266 | aps_voc12[c] = v 267 | 268 | # Mean average precision VOC07. 269 | summary_name = 'AP_VOC07/mAP' 270 | mAP = tf.add_n(list(aps_voc07.values())) / len(aps_voc07) 271 | op = tf.summary.scalar(summary_name, mAP, collections=[]) 272 | op = tf.Print(op, [mAP], summary_name) 273 | tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) 274 | 275 | # Mean average precision VOC12. 276 | summary_name = 'AP_VOC12/mAP' 277 | mAP = tf.add_n(list(aps_voc12.values())) / len(aps_voc12) 278 | op = tf.summary.scalar(summary_name, mAP, collections=[]) 279 | op = tf.Print(op, [mAP], summary_name) 280 | tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) 281 | 282 | # for i, v in enumerate(l_precisions): 283 | # summary_name = 'eval/precision_at_recall_%.2f' % LIST_RECALLS[i] 284 | # op = tf.summary.scalar(summary_name, v, collections=[]) 285 | # op = tf.Print(op, [v], summary_name) 286 | # tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) 287 | 288 | # Split into values and updates ops. 289 | names_to_values, names_to_updates = slim.metrics.aggregate_metric_map(dict_metrics) 290 | 291 | # =================================================================== # 292 | # Evaluation loop. 293 | # =================================================================== # 294 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) 295 | config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) 296 | # config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 297 | 298 | # Number of batches... 299 | if FLAGS.max_num_batches: 300 | num_batches = FLAGS.max_num_batches 301 | else: 302 | num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) 303 | 304 | if not FLAGS.wait_for_checkpoints: 305 | if tf.gfile.IsDirectory(FLAGS.checkpoint_path): 306 | checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) 307 | else: 308 | checkpoint_path = FLAGS.checkpoint_path 309 | tf.logging.info('Evaluating %s' % checkpoint_path) 310 | 311 | # Standard evaluation loop. 312 | start = time.time() 313 | slim.evaluation.evaluate_once( 314 | master=FLAGS.master, 315 | checkpoint_path=checkpoint_path, 316 | logdir=FLAGS.eval_dir, 317 | num_evals=num_batches, 318 | eval_op=list(names_to_updates.values()), 319 | variables_to_restore=variables_to_restore, 320 | session_config=config) 321 | # Log time spent. 322 | elapsed = time.time() 323 | elapsed = elapsed - start 324 | print('Time spent : %.3f seconds.' % elapsed) 325 | print('Time spent per BATCH: %.3f seconds.' % (elapsed / num_batches)) 326 | 327 | else: 328 | checkpoint_path = FLAGS.checkpoint_path 329 | tf.logging.info('Evaluating %s' % checkpoint_path) 330 | 331 | # Waiting loop. 332 | slim.evaluation.evaluation_loop( 333 | master=FLAGS.master, 334 | checkpoint_dir=checkpoint_path, 335 | logdir=FLAGS.eval_dir, 336 | num_evals=num_batches, 337 | eval_op=list(names_to_updates.values()), 338 | variables_to_restore=variables_to_restore, 339 | eval_interval_secs=60, 340 | max_number_of_evaluations=np.inf, 341 | session_config=config, 342 | timeout=None) 343 | 344 | 345 | if __name__ == '__main__': 346 | tf.app.run() 347 | -------------------------------------------------------------------------------- /nets/custom_layers.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from tensorflow.contrib.framework.python.ops import add_arg_scope 4 | from tensorflow.contrib.layers.python.layers import initializers 5 | from tensorflow.contrib.framework.python.ops import variables 6 | from tensorflow.contrib.layers.python.layers import utils 7 | from tensorflow.python.ops import nn 8 | from tensorflow.python.ops import init_ops 9 | from tensorflow.python.ops import variable_scope 10 | 11 | 12 | def abs_smooth(x): 13 | """Smoothed absolute function. Useful to compute an L1 smooth error. 14 | 15 | Define as: 16 | x^2 / 2 if abs(x) < 1 17 | abs(x) - 0.5 if abs(x) > 1 18 | We use here a differentiable definition using min(x) and abs(x). Clearly 19 | not optimal, but good enough for our purpose! 20 | """ 21 | absx = tf.abs(x) 22 | minx = tf.minimum(absx, 1) 23 | r = 0.5 * ((absx - 1) * minx + absx) 24 | return r 25 | 26 | @add_arg_scope 27 | def l2_normalization( 28 | inputs, 29 | scaling = False, 30 | scale_initializer = init_ops.ones_initializer(), 31 | reuse = None, 32 | variables_collections = None, 33 | outputs_collections = None, 34 | data_format = 'NHWC', 35 | trainable = True, 36 | scope = None 37 | ): 38 | """Implement L2 normalization on every feature (i.e. spatial normalization). 39 | 40 | Should be extended in some near future to other dimensions, providing a more 41 | flexible normalization framework. 42 | 43 | Args: 44 | inputs: a 4-D tensor with dimensions [batch_size, height, width, channels]. 45 | scaling: whether or not to add a post scaling operation along the dimensions 46 | which have been normalized. 47 | scale_initializer: An initializer for the weights. 48 | reuse: whether or not the layer and its variables should be reused. To be 49 | able to reuse the layer scope must be given. 50 | variables_collections: optional list of collections for all the variables or 51 | a dictionary containing a different list of collection per variable. 52 | outputs_collections: collection to add the outputs. 53 | data_format: NHWC or NCHW data format. 54 | trainable: If `True` also add variables to the graph collection 55 | `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). 56 | scope: Optional scope for `variable_scope`. 57 | Returns: 58 | A `Tensor` representing the output of the operation. 59 | """ 60 | with variable_scope.variable_scope( 61 | scope, 'L2Normalization', [inputs], reuse = reuse) as sc: 62 | inputs_shape = inputs.get_shape() 63 | inputs_rank = inputs_shape.ndims 64 | dtype = inputs.dtype.base_dtype 65 | if data_format == 'NHWC': 66 | norm_dim = tf.range(inputs_rank - 1, inputs_rank) 67 | params_shape = inputs_shape[-1 : ] 68 | elif data_format == 'NCHW': 69 | norm_dim = tf.range(1, 2) 70 | params_shape = (inputs_shape[1]) 71 | 72 | outputs = nn.l2_normalize(inputs, norm_dim, epsilon = 1e-12) 73 | if scaling: 74 | scale_collections = utils.get_variable_collections( 75 | variables_collections, 'scale') 76 | scale = variables.model_variable('gamma', 77 | shape=params_shape, 78 | dtype=dtype, 79 | initializer=scale_initializer, 80 | collections=scale_collections, 81 | trainable=trainable) 82 | if data_format == 'NHWC': 83 | outputs = tf.multiply(outputs, scale) 84 | elif data_format == 'NCHW': 85 | scale = tf.expand_dims(scale, axis=-1) 86 | scale = tf.expand_dims(scale, axis=-1) 87 | outputs = tf.multiply(outputs, scale) 88 | # outputs = tf.transpose(outputs, perm=(0, 2, 3, 1)) 89 | 90 | return utils.collect_named_outputs(outputs_collections, 91 | sc.original_name_scope, outputs) 92 | 93 | 94 | @add_arg_scope 95 | def pad2d(inputs, 96 | pad=(0, 0), 97 | mode='CONSTANT', 98 | data_format='NHWC', 99 | trainable=True, 100 | scope=None): 101 | """2D Padding layer, adding a symmetric padding to H and W dimensions. 102 | 103 | Aims to mimic padding in Caffe and MXNet, helping the port of models to 104 | TensorFlow. Tries to follow the naming convention of `tf.contrib.layers`. 105 | 106 | Args: 107 | inputs: 4D input Tensor; 108 | pad: 2-Tuple with padding values for H and W dimensions; 109 | mode: Padding mode. C.f. `tf.pad` 110 | data_format: NHWC or NCHW data format. 111 | """ 112 | with tf.name_scope(scope, 'pad2d', [inputs]): 113 | # Padding shape. 114 | if data_format == 'NHWC': 115 | paddings = [[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]] 116 | elif data_format == 'NCHW': 117 | paddings = [[0, 0], [0, 0], [pad[0], pad[0]], [pad[1], pad[1]]] 118 | net = tf.pad(inputs, paddings, mode=mode) 119 | return net 120 | 121 | 122 | @add_arg_scope 123 | def channel_to_last(inputs, 124 | data_format='NHWC', 125 | scope=None): 126 | """Move the channel axis to the last dimension. Allows to 127 | provide a single output format whatever the input data format. 128 | 129 | Args: 130 | inputs: Input Tensor; 131 | data_format: NHWC or NCHW. 132 | Return: 133 | Input in NHWC format. 134 | """ 135 | with tf.name_scope(scope, 'channel_to_last', [inputs]): 136 | if data_format == 'NHWC': 137 | net = inputs 138 | elif data_format == 'NCHW': 139 | net = tf.transpose(inputs, perm=(0, 2, 3, 1)) 140 | return net -------------------------------------------------------------------------------- /nets/mobilenet_V2.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Implementation of Mobilenet V2. 16 | Architecture: https://arxiv.org/abs/1801.04381 17 | The base model gives 72.2% accuracy on ImageNet, with 300MMadds, 18 | 3.4 M parameters. 19 | """ 20 | 21 | from __future__ import absolute_import 22 | from __future__ import division 23 | from __future__ import print_function 24 | 25 | import copy 26 | import functools 27 | 28 | import tensorflow as tf 29 | 30 | from nets import conv_blocks as ops 31 | from nets import mobilenet as lib 32 | 33 | slim = tf.contrib.slim 34 | op = lib.op 35 | 36 | expand_input = ops.expand_input_by_factor 37 | 38 | # pyformat: disable 39 | # Architecture: https://arxiv.org/abs/1801.04381 40 | V2_DEF = dict( 41 | defaults={ 42 | # Note: these parameters of batch norm affect the architecture 43 | # that's why they are here and not in training_scope. 44 | (slim.batch_norm,): {'center': True, 'scale': True}, 45 | (slim.conv2d, slim.fully_connected, slim.separable_conv2d): { 46 | 'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6 47 | }, 48 | (ops.expanded_conv,): { 49 | 'expansion_size': expand_input(6), 50 | 'split_expansion': 1, 51 | 'normalizer_fn': slim.batch_norm, 52 | 'residual': True 53 | }, 54 | (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'} 55 | }, 56 | spec=[ 57 | op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]), 58 | op(ops.expanded_conv, 59 | expansion_size=expand_input(1, divisible_by=1), 60 | num_outputs=16), 61 | op(ops.expanded_conv, stride=2, num_outputs=24), 62 | op(ops.expanded_conv, stride=1, num_outputs=24), 63 | op(ops.expanded_conv, stride=2, num_outputs=32), 64 | op(ops.expanded_conv, stride=1, num_outputs=32), 65 | op(ops.expanded_conv, stride=1, num_outputs=32), 66 | op(ops.expanded_conv, stride=2, num_outputs=64), 67 | op(ops.expanded_conv, stride=1, num_outputs=64), 68 | op(ops.expanded_conv, stride=1, num_outputs=64), 69 | op(ops.expanded_conv, stride=1, num_outputs=64), 70 | op(ops.expanded_conv, stride=1, num_outputs=96), 71 | op(ops.expanded_conv, stride=1, num_outputs=96), 72 | op(ops.expanded_conv, stride=1, num_outputs=96), 73 | op(ops.expanded_conv, stride=2, num_outputs=160), 74 | op(ops.expanded_conv, stride=1, num_outputs=160), 75 | op(ops.expanded_conv, stride=1, num_outputs=160), 76 | op(ops.expanded_conv, stride=1, num_outputs=320), 77 | #op(slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280) 78 | ], 79 | ) 80 | # pyformat: enable 81 | 82 | 83 | @slim.add_arg_scope 84 | def mobilenet(input_tensor, 85 | num_classes=1001, 86 | depth_multiplier=1.0, 87 | scope='MobilenetV2', 88 | conv_defs=None, 89 | finegrain_classification_mode=False, 90 | min_depth=None, 91 | divisible_by=None, 92 | activation_fn=None, 93 | **kwargs): 94 | """Creates mobilenet V2 network. 95 | Inference mode is created by default. To create training use training_scope 96 | below. 97 | with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): 98 | logits, endpoints = mobilenet_v2.mobilenet(input_tensor) 99 | Args: 100 | input_tensor: The input tensor 101 | num_classes: number of classes 102 | depth_multiplier: The multiplier applied to scale number of 103 | channels in each layer. Note: this is called depth multiplier in the 104 | paper but the name is kept for consistency with slim's model builder. 105 | scope: Scope of the operator 106 | conv_defs: Allows to override default conv def. 107 | finegrain_classification_mode: When set to True, the model 108 | will keep the last layer large even for small multipliers. Following 109 | https://arxiv.org/abs/1801.04381 110 | suggests that it improves performance for ImageNet-type of problems. 111 | *Note* ignored if final_endpoint makes the builder exit earlier. 112 | min_depth: If provided, will ensure that all layers will have that 113 | many channels after application of depth multiplier. 114 | divisible_by: If provided will ensure that all layers # channels 115 | will be divisible by this number. 116 | activation_fn: Activation function to use, defaults to tf.nn.relu6 if not 117 | specified. 118 | **kwargs: passed directly to mobilenet.mobilenet: 119 | prediction_fn- what prediction function to use. 120 | reuse-: whether to reuse variables (if reuse set to true, scope 121 | must be given). 122 | Returns: 123 | logits/endpoints pair 124 | Raises: 125 | ValueError: On invalid arguments 126 | """ 127 | if conv_defs is None: 128 | conv_defs = V2_DEF 129 | if 'multiplier' in kwargs: 130 | raise ValueError('mobilenetv2 doesn\'t support generic ' 131 | 'multiplier parameter use "depth_multiplier" instead.') 132 | if finegrain_classification_mode: 133 | conv_defs = copy.deepcopy(conv_defs) 134 | if depth_multiplier < 1: 135 | conv_defs['spec'][-1].params['num_outputs'] /= depth_multiplier 136 | if activation_fn: 137 | conv_defs = copy.deepcopy(conv_defs) 138 | defaults = conv_defs['defaults'] 139 | conv_defaults = ( 140 | defaults[(slim.conv2d, slim.fully_connected, slim.separable_conv2d)]) 141 | conv_defaults['activation_fn'] = activation_fn 142 | 143 | depth_args = {} 144 | # NB: do not set depth_args unless they are provided to avoid overriding 145 | # whatever default depth_multiplier might have thanks to arg_scope. 146 | if min_depth is not None: 147 | depth_args['min_depth'] = min_depth 148 | if divisible_by is not None: 149 | depth_args['divisible_by'] = divisible_by 150 | 151 | with slim.arg_scope((lib.depth_multiplier,), **depth_args): 152 | return lib.mobilenet( 153 | input_tensor, 154 | num_classes=num_classes, 155 | conv_defs=conv_defs, 156 | scope=scope, 157 | multiplier=depth_multiplier, 158 | **kwargs) 159 | 160 | mobilenet.default_image_size = 224 161 | 162 | def wrapped_partial(func, *args, **kwargs): 163 | partial_func = functools.partial(func, *args, **kwargs) 164 | functools.update_wrapper(partial_func, func) 165 | return partial_func 166 | 167 | 168 | # Wrappers for mobilenet v2 with depth-multipliers. Be noticed that 169 | # 'finegrain_classification_mode' is set to True, which means the embedding 170 | # layer will not be shrinked when given a depth-multiplier < 1.0. 171 | mobilenet_v2_140 = wrapped_partial(mobilenet, depth_multiplier=1.4) 172 | mobilenet_v2_050 = wrapped_partial(mobilenet, depth_multiplier=0.50, 173 | finegrain_classification_mode=True) 174 | mobilenet_v2_035 = wrapped_partial(mobilenet, depth_multiplier=0.35, 175 | finegrain_classification_mode=True) 176 | 177 | 178 | @slim.add_arg_scope 179 | def mobilenet_base(input_tensor, depth_multiplier=1.0, **kwargs): 180 | """Creates base of the mobilenet (no pooling and no logits) .""" 181 | return mobilenet(input_tensor, 182 | depth_multiplier=depth_multiplier, 183 | base_only=True, **kwargs) 184 | 185 | 186 | def training_scope(**kwargs): 187 | """Defines MobilenetV2 training scope. 188 | Usage: 189 | with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): 190 | logits, endpoints = mobilenet_v2.mobilenet(input_tensor) 191 | with slim. 192 | Args: 193 | **kwargs: Passed to mobilenet.training_scope. The following parameters 194 | are supported: 195 | weight_decay- The weight decay to use for regularizing the model. 196 | stddev- Standard deviation for initialization, if negative uses xavier. 197 | dropout_keep_prob- dropout keep probability 198 | bn_decay- decay for the batch norm moving averages. 199 | Returns: 200 | An `arg_scope` to use for the mobilenet v2 model. 201 | """ 202 | return lib.training_scope(**kwargs) 203 | 204 | 205 | __all__ = ['training_scope', 'mobilenet_base', 'mobilenet', 'V2_DEF'] -------------------------------------------------------------------------------- /nets/nets_factory.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import tensorflow as tf 3 | 4 | from nets import ssd_vgg_300 5 | #from nets import ssd_vgg_512 6 | 7 | slim = tf.contrib.slim 8 | 9 | networks_map = {#'vgg_a': vgg.vgg_a, 10 | #'vgg_16': vgg.vgg_16, 11 | #'vgg_19': vgg.vgg_19, 12 | 'ssd_300_vgg': ssd_vgg_300.ssd_net, 13 | #'ssd_512_vgg': ssd_vgg_512.ssd_net, 14 | } 15 | 16 | arg_scopes_map = {#'vgg_a': vgg.vgg_arg_scope, 17 | #'vgg_16': vgg.vgg_arg_scope, 18 | #'vgg_19': vgg.vgg_arg_scope, 19 | 'ssd_300_vgg': ssd_vgg_300.ssd_arg_scope, 20 | #'ssd_512_vgg': ssd_vgg_512.ssd_arg_scope, 21 | } 22 | 23 | networks_obj = {'ssd_300_vgg': ssd_vgg_300.SSDNet, 24 | #'ssd_512_vgg': ssd_vgg_512.SSDNet, 25 | } 26 | 27 | def get_network(name): 28 | return networks_obj[name] 29 | 30 | def get_network_fn(name, num_classes, is_training = False, **kwargs): 31 | """Returns a network_fn such as `logits, end_points = network_fn(images)`. 32 | 33 | Args: 34 | name: The name of the network. 35 | num_classes: The number of classes to use for classification. 36 | is_training: `True` if the model is being used for training and `False` 37 | otherwise. 38 | weight_decay: The l2 coefficient for the model weights. 39 | Returns: 40 | network_fn: A function that applies the model to a batch of images. It has 41 | the following signature: logits, end_points = network_fn(images) 42 | Raises: 43 | ValueError: If network `name` is not recognized. 44 | """ 45 | if name not in networks_map: 46 | raise ValueError('Name of network unknown %s' % name) 47 | arg_scope = arg_scopes_map[name](**kwargs) 48 | func = networks_map[name] 49 | 50 | @functools.wraps(func) 51 | def network_fn(images, **kwargs): 52 | with slim.arg_scope(arg_scope): 53 | return func(images, num_classes, is_training = is_training, **kwargs) 54 | if hasattr(func, 'default_image_size'): 55 | network_fn.default_image_size = func.default_image_size 56 | 57 | return network_fn 58 | -------------------------------------------------------------------------------- /nets/ssd_common.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import tf_extended as tfe 4 | 5 | def tf_ssd_bboxes_encode_layer(labels, 6 | bboxes, 7 | anchors_layer, 8 | num_classes, 9 | no_annotation_label, 10 | ignore_threshold=0.5, 11 | prior_scaling=[0.1, 0.1, 0.2, 0.2], 12 | dtype=tf.float32): 13 | """Encode groundtruth labels and bounding boxes using SSD anchors from 14 | one layer. 15 | 16 | Arguments: 17 | labels: 1D Tensor(int64) containing groundtruth labels; 18 | bboxes: Nx4 Tensor(float) with bboxes relative coordinates; 19 | anchors_layer: Numpy array with layer anchors; 20 | matching_threshold: Threshold for positive match with groundtruth bboxes; 21 | prior_scaling: Scaling of encoded coordinates. 22 | 23 | Return: 24 | (target_labels, target_localizations, target_scores): Target Tensors. 25 | """ 26 | # Anchors coordinates and volume. 27 | yref, xref, href, wref = anchors_layer 28 | ymin = yref - href / 2. 29 | xmin = xref - wref / 2. 30 | ymax = yref + href / 2. 31 | xmax = xref + wref / 2. 32 | vol_anchors = (xmax - xmin) * (ymax - ymin) 33 | 34 | # Initialize tensors... 35 | shape = (yref.shape[0], yref.shape[1], href.size) 36 | feat_labels = tf.zeros(shape, dtype=tf.int64) 37 | feat_scores = tf.zeros(shape, dtype=dtype) 38 | 39 | feat_ymin = tf.zeros(shape, dtype=dtype) 40 | feat_xmin = tf.zeros(shape, dtype=dtype) 41 | feat_ymax = tf.ones(shape, dtype=dtype) 42 | feat_xmax = tf.ones(shape, dtype=dtype) 43 | 44 | def jaccard_with_anchors(bbox): 45 | """Compute jaccard score between a box and the anchors. 46 | """ 47 | int_ymin = tf.maximum(ymin, bbox[0]) 48 | int_xmin = tf.maximum(xmin, bbox[1]) 49 | int_ymax = tf.minimum(ymax, bbox[2]) 50 | int_xmax = tf.minimum(xmax, bbox[3]) 51 | h = tf.maximum(int_ymax - int_ymin, 0.) 52 | w = tf.maximum(int_xmax - int_xmin, 0.) 53 | # Volumes. 54 | inter_vol = h * w 55 | union_vol = vol_anchors - inter_vol \ 56 | + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) 57 | jaccard = tf.div(inter_vol, union_vol) 58 | return jaccard 59 | 60 | def intersection_with_anchors(bbox): 61 | """Compute intersection between score a box and the anchors. 62 | """ 63 | int_ymin = tf.maximum(ymin, bbox[0]) 64 | int_xmin = tf.maximum(xmin, bbox[1]) 65 | int_ymax = tf.minimum(ymax, bbox[2]) 66 | int_xmax = tf.minimum(xmax, bbox[3]) 67 | h = tf.maximum(int_ymax - int_ymin, 0.) 68 | w = tf.maximum(int_xmax - int_xmin, 0.) 69 | inter_vol = h * w 70 | scores = tf.div(inter_vol, vol_anchors) 71 | return scores 72 | 73 | def condition(i, feat_labels, feat_scores, 74 | feat_ymin, feat_xmin, feat_ymax, feat_xmax): 75 | """Condition: check label index. 76 | """ 77 | r = tf.less(i, tf.shape(labels)) 78 | return r[0] 79 | 80 | def body(i, feat_labels, feat_scores, 81 | feat_ymin, feat_xmin, feat_ymax, feat_xmax): 82 | """Body: update feature labels, scores and bboxes. 83 | Follow the original SSD paper for that purpose: 84 | - assign values when jaccard > 0.5; 85 | - only update if beat the score of other bboxes. 86 | """ 87 | # Jaccard score. 88 | label = labels[i] 89 | bbox = bboxes[i] 90 | jaccard = jaccard_with_anchors(bbox) 91 | # Mask: check threshold + scores + no annotations + num_classes. 92 | mask = tf.greater(jaccard, feat_scores) 93 | # mask = tf.logical_and(mask, tf.greater(jaccard, matching_threshold)) 94 | mask = tf.logical_and(mask, feat_scores > -0.5) 95 | mask = tf.logical_and(mask, label < num_classes) 96 | imask = tf.cast(mask, tf.int64) 97 | fmask = tf.cast(mask, dtype) 98 | # Update values using mask. 99 | feat_labels = imask * label + (1 - imask) * feat_labels 100 | feat_scores = tf.where(mask, jaccard, feat_scores) 101 | 102 | feat_ymin = fmask * bbox[0] + (1 - fmask) * feat_ymin 103 | feat_xmin = fmask * bbox[1] + (1 - fmask) * feat_xmin 104 | feat_ymax = fmask * bbox[2] + (1 - fmask) * feat_ymax 105 | feat_xmax = fmask * bbox[3] + (1 - fmask) * feat_xmax 106 | 107 | # Check no annotation label: ignore these anchors... 108 | # interscts = intersection_with_anchors(bbox) 109 | # mask = tf.logical_and(interscts > ignore_threshold, 110 | # label == no_annotation_label) 111 | # # Replace scores by -1. 112 | # feat_scores = tf.where(mask, -tf.cast(mask, dtype), feat_scores) 113 | 114 | return [i+1, feat_labels, feat_scores, 115 | feat_ymin, feat_xmin, feat_ymax, feat_xmax] 116 | # Main loop definition. 117 | i = 0 118 | [i, feat_labels, feat_scores, 119 | feat_ymin, feat_xmin, 120 | feat_ymax, feat_xmax] = tf.while_loop(condition, body, 121 | [i, feat_labels, feat_scores, 122 | feat_ymin, feat_xmin, 123 | feat_ymax, feat_xmax]) 124 | # Transform to center / size. 125 | feat_cy = (feat_ymax + feat_ymin) / 2. 126 | feat_cx = (feat_xmax + feat_xmin) / 2. 127 | feat_h = feat_ymax - feat_ymin 128 | feat_w = feat_xmax - feat_xmin 129 | # Encode features. 130 | feat_cy = (feat_cy - yref) / href / prior_scaling[0] 131 | feat_cx = (feat_cx - xref) / wref / prior_scaling[1] 132 | feat_h = tf.log(feat_h / href) / prior_scaling[2] 133 | feat_w = tf.log(feat_w / wref) / prior_scaling[3] 134 | # Use SSD ordering: x / y / w / h instead of ours. 135 | feat_localizations = tf.stack([feat_cx, feat_cy, feat_w, feat_h], axis=-1) 136 | return feat_labels, feat_localizations, feat_scores 137 | 138 | 139 | def tf_ssd_bboxes_encode(labels, 140 | bboxes, 141 | anchors, 142 | num_classes, 143 | no_annotation_label, 144 | ignore_threshold=0.5, 145 | prior_scaling=[0.1, 0.1, 0.2, 0.2], 146 | dtype=tf.float32, 147 | scope='ssd_bboxes_encode'): 148 | """Encode groundtruth labels and bounding boxes using SSD net anchors. 149 | Encoding boxes for all feature layers. 150 | 151 | Arguments: 152 | labels: 1D Tensor(int64) containing groundtruth labels; 153 | bboxes: Nx4 Tensor(float) with bboxes relative coordinates; 154 | anchors: List of Numpy array with layer anchors; 155 | matching_threshold: Threshold for positive match with groundtruth bboxes; 156 | prior_scaling: Scaling of encoded coordinates. 157 | 158 | Return: 159 | (target_labels, target_localizations, target_scores): 160 | Each element is a list of target Tensors. 161 | """ 162 | with tf.name_scope(scope): 163 | target_labels = [] 164 | target_localizations = [] 165 | target_scores = [] 166 | for i, anchors_layer in enumerate(anchors): 167 | with tf.name_scope('bboxes_encode_block_%i' % i): 168 | t_labels, t_loc, t_scores = \ 169 | tf_ssd_bboxes_encode_layer(labels, bboxes, anchors_layer, 170 | num_classes, no_annotation_label, 171 | ignore_threshold, 172 | prior_scaling, dtype) 173 | target_labels.append(t_labels) 174 | target_localizations.append(t_loc) 175 | target_scores.append(t_scores) 176 | return target_labels, target_localizations, target_scores 177 | 178 | def ssd_bboxes_select(predictions_net, 179 | localizations_net, 180 | anchors_net, 181 | select_threshold = 0.5, 182 | img_shape = (300, 300), 183 | num_classes = 21, 184 | decode = True): 185 | """Extract classes, scores and bounding boxes from network output layers. 186 | 187 | Return: 188 | classes, scores, bboxes: Numpy arrays... 189 | """ 190 | l_classes = [] 191 | l_scores = [] 192 | l_bboxes = [] 193 | 194 | for i in range(len(predictions_net)): 195 | classes, scores, bboxes = ssd_bboxes_select_layer( 196 | predictions_net[i], localizations_net[i], anchors_net[i], 197 | select_threshold, img_shape, num_classes, decode 198 | ) 199 | l_classes.append(classes) 200 | l_scores.append(scores) 201 | l_bboxes.append(bboxes) 202 | 203 | classes = np.concatenate(l_classes, 0) 204 | scores = np.concatenate(l_scores, 0) 205 | bboxes = np.concatenate(l_bboxes, 0) 206 | return classes, scores, bboxes 207 | 208 | 209 | def ssd_bboxes_select_layer(predictions_layer, 210 | localizations_layer, 211 | anchors_layer, 212 | select_threshold = 0.5, 213 | img_shape = (300, 300), 214 | num_classes = 21, 215 | decode = True): 216 | """Extract classes, scores and bounding boxes from features in one layer. 217 | 218 | Return: 219 | classes, scores, bboxes: Numpy arrays... 220 | """ 221 | # First decode localizations features if necessary. 222 | if decode: 223 | localizations_layer = ssd_bboxes_decode(localizations_layer, anchors_layer) 224 | p_shape = predictions_layer.shape 225 | batch_size = p_shape[0] if len(p_shape) == 5 else 1 226 | predictions_layer = np.reshape(predictions_layer, 227 | (batch_size, -1, p_shape[-1])) 228 | l_shape = localizations_layer.shape 229 | localizations_layer = np.reshape(localizations_layer, 230 | (batch_size, -1, l_shape[-1])) 231 | 232 | if select_threshold is None or select_threshold == 0: 233 | classes = np.argmax(predictions_layer, axis=2) 234 | scores = np.amax(predictions_layer, axis = 2) 235 | mask = (classes > 0) 236 | classes = classes[mask] 237 | scores = scores[mask] 238 | bboxes = localizations_layer[mask] 239 | else: 240 | sub_predictions = predictions_layer[:, :, 1:] 241 | idxes = np.where(sub_predictions > select_threshold) 242 | classes = idxes[-1] + 1 243 | scores = sub_predictions[idxes] 244 | bboxes = localizations_layer[idxes[:-1]] 245 | 246 | return classes, scores, bboxes 247 | 248 | 249 | def ssd_bboxes_decode(feat_localizations, 250 | anchor_bboxes, 251 | prior_scaling = [0.1, 0.1, 0.2, 0.2]): 252 | """Compute the relative bounding boxes from the layer features and 253 | reference anchor bounding boxes. 254 | 255 | Return: 256 | numpy array Nx4: ymin, xmin, ymax, xmax 257 | """ 258 | l_shape = feat_localizations.shape 259 | feat_localizations = np.reshape(feat_localizations, 260 | (-1, l_shape[-2], l_shape[-1])) 261 | yref, xref, href, wref = anchor_bboxes 262 | xref = np.reshape(xref, [-1, 1]) 263 | yref = np.reshape(yref, [-1, 1]) 264 | 265 | cx = feat_localizations[:, :, 0] * wref * prior_scaling[0] + xref 266 | cy = feat_localizations[:, :, 1] * href * prior_scaling[1] + yref 267 | w = wref * np.exp(feat_localizations[:, :, 2] * prior_scaling[2]) 268 | h = href * np.exp(feat_localizations[:, :, 3] * prior_scaling[3]) 269 | 270 | bboxes = np.zeros_like(feat_localizations) 271 | bboxes[:, :, 0] = cy - h / 2. 272 | bboxes[:, :, 1] = cx - w / 2. 273 | bboxes[:, :, 2] = cy + h / 2. 274 | bboxes[:, :, 3] = cx + w / 2. 275 | 276 | bboxes = np.reshape(bboxes, l_shape) 277 | return bboxes 278 | 279 | def bboxes_clip(bbox_ref, bboxes): 280 | bboxes = np.copy(bboxes) 281 | bboxes = np.transpose(bboxes) 282 | bbox_ref = np.transpose(bbox_ref) 283 | bboxes[0] = np.maximum(bboxes[0], bbox_ref[0]) 284 | bboxes[1] = np.maximum(bboxes[1], bbox_ref[1]) 285 | bboxes[2] = np.minimum(bboxes[2], bbox_ref[2]) 286 | bboxes[3] = np.minimum(bboxes[3], bbox_ref[3]) 287 | bboxes = np.transpose(bboxes) 288 | return bboxes 289 | 290 | def bboxes_sort(classes, scores, bboxes, top_k = 400): 291 | """Sort bounding boxes by decreasing order and keep only the top_k 292 | """ 293 | # if priority_inside: 294 | # inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \ 295 | # (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin) 296 | # idxes = np.argsort(-scores) 297 | # inside = inside[idxes] 298 | # idxes = np.concatenate([idxes[inside], idxes[~inside]]) 299 | idxes = np.argsort(-scores) 300 | classes = classes[idxes][:top_k] 301 | scores = scores[idxes][:top_k] 302 | bboxes = bboxes[idxes][:top_k] 303 | return classes, scores, bboxes 304 | 305 | def bboxes_nms(classes, scores, bboxes, nms_threshold = 0.45): 306 | keep_bboxes = np.ones(scores.shape, dtype=np.bool) 307 | for i in range(scores.size-1): 308 | if keep_bboxes[i]: 309 | overlap = bboxes_jaccard(bboxes[i], bboxes[(i + 1):]) 310 | keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i + 1):] != classes[i]) 311 | keep_bboxes[(i + 1):] = np.logical_and(keep_bboxes[(i + 1):], keep_overlap) 312 | idxes = np.where(keep_bboxes) 313 | return classes[idxes], scores[idxes], bboxes[idxes] 314 | 315 | def bboxes_jaccard(bboxes1, bboxes2): 316 | """Computing jaccard index between bboxes1 and bboxes2. 317 | Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable. 318 | """ 319 | bboxes1 = np.transpose(bboxes1) 320 | bboxes2 = np.transpose(bboxes2) 321 | # Intersection bbox and volume. 322 | int_ymin = np.maximum(bboxes1[0], bboxes2[0]) 323 | int_xmin = np.maximum(bboxes1[1], bboxes2[1]) 324 | int_ymax = np.minimum(bboxes1[2], bboxes2[2]) 325 | int_xmax = np.minimum(bboxes1[3], bboxes2[3]) 326 | 327 | int_h = np.maximum(int_ymax - int_ymin, 0.) 328 | int_w = np.maximum(int_xmax - int_xmin, 0.) 329 | int_vol = int_h * int_w 330 | # Union volume. 331 | vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1]) 332 | vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1]) 333 | jaccard = int_vol / (vol1 + vol2 - int_vol) 334 | return jaccard 335 | 336 | def bboxes_resize(bbox_ref, bboxes): 337 | """Resize bounding boxes based on a reference bounding box, 338 | assuming that the latter is [0, 0, 1, 1] after transform. 339 | """ 340 | bboxes = np.copy(bboxes) 341 | # Translate. 342 | bboxes[:, 0] -= bbox_ref[0] 343 | bboxes[:, 1] -= bbox_ref[1] 344 | bboxes[:, 2] -= bbox_ref[0] 345 | bboxes[:, 3] -= bbox_ref[1] 346 | # Resize. 347 | resize = [bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1]] 348 | bboxes[:, 0] /= resize[0] 349 | bboxes[:, 1] /= resize[1] 350 | bboxes[:, 2] /= resize[0] 351 | bboxes[:, 3] /= resize[1] 352 | return bboxes 353 | -------------------------------------------------------------------------------- /preprocessing/preprocessing_factory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a factory for building various models.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | # from preprocessing import cifarnet_preprocessing 24 | # from preprocessing import inception_preprocessing 25 | # from preprocessing import vgg_preprocessing 26 | 27 | from preprocessing import ssd_vgg_preprocessing 28 | 29 | slim = tf.contrib.slim 30 | 31 | 32 | def get_preprocessing(name, is_training=False): 33 | """Returns preprocessing_fn(image, height, width, **kwargs). 34 | 35 | Args: 36 | name: The name of the preprocessing function. 37 | is_training: `True` if the model is being used for training. 38 | 39 | Returns: 40 | preprocessing_fn: A function that preprocessing a single image (pre-batch). 41 | It has the following signature: 42 | image = preprocessing_fn(image, output_height, output_width, ...). 43 | 44 | Raises: 45 | ValueError: If Preprocessing `name` is not recognized. 46 | """ 47 | preprocessing_fn_map = { 48 | 'ssd_300_vgg': ssd_vgg_preprocessing, 49 | 'ssd_512_vgg': ssd_vgg_preprocessing, 50 | } 51 | 52 | if name not in preprocessing_fn_map: 53 | raise ValueError('Preprocessing name [%s] was not recognized' % name) 54 | 55 | def preprocessing_fn(image, labels, bboxes, 56 | out_shape, data_format='NHWC', **kwargs): 57 | return preprocessing_fn_map[name].preprocess_image( 58 | image, labels, bboxes, out_shape, data_format=data_format, 59 | is_training=is_training, **kwargs) 60 | return preprocessing_fn 61 | -------------------------------------------------------------------------------- /preprocessing/ssd_vgg_preprocessing.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Paul Balanca. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Pre-processing images for SSD-type networks. 16 | """ 17 | from enum import Enum, IntEnum 18 | import numpy as np 19 | 20 | import tensorflow as tf 21 | import tf_extended as tfe 22 | 23 | from tensorflow.python.ops import control_flow_ops 24 | 25 | from preprocessing import tf_image 26 | from nets import ssd_common 27 | 28 | slim = tf.contrib.slim 29 | 30 | # Resizing strategies. 31 | Resize = IntEnum('Resize', ('NONE', # Nothing! 32 | 'CENTRAL_CROP', # Crop (and pad if necessary). 33 | 'PAD_AND_RESIZE', # Pad, and resize to output shape. 34 | 'WARP_RESIZE')) # Warp resize. 35 | 36 | # VGG mean parameters. 37 | _R_MEAN = 123. 38 | _G_MEAN = 117. 39 | _B_MEAN = 104. 40 | 41 | # Some training pre-processing parameters. 42 | BBOX_CROP_OVERLAP = 0.5 # Minimum overlap to keep a bbox after cropping. 43 | MIN_OBJECT_COVERED = 0.25 44 | CROP_RATIO_RANGE = (0.6, 1.67) # Distortion ratio during cropping. 45 | EVAL_SIZE = (300, 300) 46 | 47 | 48 | def tf_image_whitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN]): 49 | """Subtracts the given means from each image channel. 50 | 51 | Returns: 52 | the centered image. 53 | """ 54 | if image.get_shape().ndims != 3: 55 | raise ValueError('Input must be of size [height, width, C>0]') 56 | num_channels = image.get_shape().as_list()[-1] 57 | if len(means) != num_channels: 58 | raise ValueError('len(means) must match the number of channels') 59 | 60 | mean = tf.constant(means, dtype=image.dtype) 61 | image = image - mean 62 | return image 63 | 64 | 65 | def tf_image_unwhitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN], to_int=True): 66 | """Re-convert to original image distribution, and convert to int if 67 | necessary. 68 | 69 | Returns: 70 | Centered image. 71 | """ 72 | mean = tf.constant(means, dtype=image.dtype) 73 | image = image + mean 74 | if to_int: 75 | image = tf.cast(image, tf.int32) 76 | return image 77 | 78 | 79 | def np_image_unwhitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN], to_int=True): 80 | """Re-convert to original image distribution, and convert to int if 81 | necessary. Numpy version. 82 | 83 | Returns: 84 | Centered image. 85 | """ 86 | img = np.copy(image) 87 | img += np.array(means, dtype=img.dtype) 88 | if to_int: 89 | img = img.astype(np.uint8) 90 | return img 91 | 92 | 93 | def tf_summary_image(image, bboxes, name='image', unwhitened=False): 94 | """Add image with bounding boxes to summary. 95 | """ 96 | if unwhitened: 97 | image = tf_image_unwhitened(image) 98 | image = tf.expand_dims(image, 0) 99 | bboxes = tf.expand_dims(bboxes, 0) 100 | image_with_box = tf.image.draw_bounding_boxes(image, bboxes) 101 | # 边界框坐标是相对于宽度和宽度在[0.0,1.0]内的浮点数,即这里给出的都是图像的相对位置[0.1, 0.2, 0.8, 0.8]即(0.1*wide, 0.2*high)到(0.8*wide, 0.8*high) 102 | tf.summary.image(name, image_with_box) 103 | #将图像写入summary,可以在tensorboard上进行可视化 104 | 105 | def apply_with_random_selector(x, func, num_cases): 106 | """Computes func(x, sel), with sel sampled from [0...num_cases-1]. 107 | 108 | Args: 109 | x: input Tensor. 110 | func: Python function to apply. 111 | num_cases: Python int32, number of cases to sample sel from. 112 | 113 | Returns: 114 | The result of func(x, sel), where func receives the value of the 115 | selector as a python integer, but sel is sampled dynamically. 116 | """ 117 | sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) 118 | # Pass the real x only to one of the func calls. 119 | return control_flow_ops.merge([ 120 | func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) 121 | for case in range(num_cases)])[0] 122 | 123 | 124 | def distort_color(image, color_ordering=0, fast_mode=True, scope=None): 125 | """Distort the color of a Tensor image. 126 | 127 | Each color distortion is non-commutative and thus ordering of the color ops 128 | matters. Ideally we would randomly permute the ordering of the color ops. 129 | Rather then adding that level of complication, we select a distinct ordering 130 | of color ops for each preprocessing thread. 131 | 132 | Args: 133 | image: 3-D Tensor containing single image in [0, 1]. 134 | color_ordering: Python int, a type of distortion (valid values: 0-3). 135 | fast_mode: Avoids slower ops (random_hue and random_contrast) 136 | scope: Optional scope for name_scope. 137 | Returns: 138 | 3-D Tensor color-distorted image on range [0, 1] 139 | Raises: 140 | ValueError: if color_ordering not in [0, 3] 141 | """ 142 | with tf.name_scope(scope, 'distort_color', [image]): 143 | if fast_mode: 144 | if color_ordering == 0: 145 | image = tf.image.random_brightness(image, max_delta=32. / 255.) 146 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5) 147 | else: 148 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5) 149 | image = tf.image.random_brightness(image, max_delta=32. / 255.) 150 | else: 151 | if color_ordering == 0: 152 | image = tf.image.random_brightness(image, max_delta=32. / 255.) 153 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5) 154 | image = tf.image.random_hue(image, max_delta=0.2) 155 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5) 156 | elif color_ordering == 1: 157 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5) 158 | image = tf.image.random_brightness(image, max_delta=32. / 255.) 159 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5) 160 | image = tf.image.random_hue(image, max_delta=0.2) 161 | elif color_ordering == 2: 162 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5) 163 | image = tf.image.random_hue(image, max_delta=0.2) 164 | image = tf.image.random_brightness(image, max_delta=32. / 255.) 165 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5) 166 | elif color_ordering == 3: 167 | image = tf.image.random_hue(image, max_delta=0.2) 168 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5) 169 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5) 170 | image = tf.image.random_brightness(image, max_delta=32. / 255.) 171 | else: 172 | raise ValueError('color_ordering must be in [0, 3]') 173 | # The random_* ops do not necessarily clamp. 174 | return tf.clip_by_value(image, 0.0, 1.0) 175 | 176 | 177 | def distorted_bounding_box_crop(image, 178 | labels, 179 | bboxes, 180 | min_object_covered=0.3, 181 | aspect_ratio_range=(0.9, 1.1), 182 | area_range=(0.1, 1.0), 183 | max_attempts=200, 184 | clip_bboxes=True, 185 | scope=None): 186 | """Generates cropped_image using a one of the bboxes randomly distorted. 187 | 188 | See `tf.image.sample_distorted_bounding_box` for more documentation. 189 | 190 | Args: 191 | image: 3-D Tensor of image (it will be converted to floats in [0, 1]). 192 | bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] 193 | where each coordinate is [0, 1) and the coordinates are arranged 194 | as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole 195 | image. 196 | min_object_covered: An optional `float`. Defaults to `0.1`. The cropped 197 | area of the image must contain at least this fraction of any bounding box 198 | supplied. 199 | aspect_ratio_range: An optional list of `floats`. The cropped area of the 200 | image must have an aspect ratio = width / height within this range. 201 | area_range: An optional list of `floats`. The cropped area of the image 202 | must contain a fraction of the supplied image within in this range. 203 | max_attempts: An optional `int`. Number of attempts at generating a cropped 204 | region of the image of the specified constraints. After `max_attempts` 205 | failures, return the entire image. 206 | scope: Optional scope for name_scope. 207 | Returns: 208 | A tuple, a 3-D Tensor cropped_image and the distorted bbox 209 | """ 210 | with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]): 211 | # Each bounding box has shape [1, num_boxes, box coords] and 212 | # the coordinates are ordered [ymin, xmin, ymax, xmax]. 213 | bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box( 214 | tf.shape(image), 215 | bounding_boxes=tf.expand_dims(bboxes, 0), 216 | min_object_covered=min_object_covered, 217 | aspect_ratio_range=aspect_ratio_range, 218 | area_range=area_range, 219 | max_attempts=max_attempts, 220 | use_image_if_no_bounding_boxes=True) 221 | distort_bbox = distort_bbox[0, 0] 222 | 223 | # Crop the image to the specified bounding box. 224 | cropped_image = tf.slice(image, bbox_begin, bbox_size) 225 | # Restore the shape since the dynamic slice loses 3rd dimension. 226 | cropped_image.set_shape([None, None, 3]) 227 | 228 | # Update bounding boxes: resize and filter out. 229 | bboxes = tfe.bboxes_resize(distort_bbox, bboxes) 230 | labels, bboxes = tfe.bboxes_filter_overlap(labels, bboxes, 231 | threshold=BBOX_CROP_OVERLAP, 232 | assign_negative=False) 233 | return cropped_image, labels, bboxes, distort_bbox 234 | 235 | 236 | def preprocess_for_train(image, labels, bboxes, 237 | out_shape, data_format='NHWC', 238 | scope='ssd_preprocessing_train'): 239 | """Preprocesses the given image for training. 240 | 241 | Note that the actual resizing scale is sampled from 242 | [`resize_size_min`, `resize_size_max`]. 243 | 244 | Args: 245 | image: A `Tensor` representing an image of arbitrary size. 246 | output_height: The height of the image after preprocessing. 247 | output_width: The width of the image after preprocessing. 248 | resize_side_min: The lower bound for the smallest side of the image for 249 | aspect-preserving resizing. 250 | resize_side_max: The upper bound for the smallest side of the image for 251 | aspect-preserving resizing. 252 | 253 | Returns: 254 | A preprocessed image. 255 | """ 256 | fast_mode = False 257 | with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): 258 | if image.get_shape().ndims != 3: 259 | raise ValueError('Input must be of size [height, width, C>0]') 260 | # Convert to float scaled [0, 1]. 261 | if image.dtype != tf.float32: 262 | image = tf.image.convert_image_dtype(image, dtype=tf.float32) #tf.image.draw_bounding_boxes要求图像矩阵中的数字为实数 263 | tf_summary_image(image, bboxes, 'image_with_bboxes') # 利用tf.image.convert_image_dtype将图像矩阵转化为实数 264 | 265 | # # Remove DontCare labels. 266 | # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label, 267 | # labels, 268 | # bboxes) 269 | 270 | # Distort image and bounding boxes. 271 | dst_image = image 272 | dst_image, labels, bboxes, distort_bbox = \ 273 | distorted_bounding_box_crop(image, labels, bboxes, 274 | min_object_covered=MIN_OBJECT_COVERED, 275 | aspect_ratio_range=CROP_RATIO_RANGE) 276 | # Resize image to output size. 277 | dst_image = tf_image.resize_image(dst_image, out_shape, 278 | method=tf.image.ResizeMethod.BILINEAR, 279 | align_corners=False) 280 | tf_summary_image(dst_image, bboxes, 'image_shape_distorted') 281 | 282 | # Randomly flip the image horizontally. 283 | dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) 284 | 285 | # Randomly distort the colors. There are 4 ways to do it. 286 | dst_image = apply_with_random_selector( 287 | dst_image, 288 | lambda x, ordering: distort_color(x, ordering, fast_mode), 289 | num_cases=4) 290 | tf_summary_image(dst_image, bboxes, 'image_color_distorted') 291 | 292 | # Rescale to VGG input scale. 293 | image = dst_image * 255. 294 | image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) 295 | # Image data format. 296 | if data_format == 'NCHW': 297 | image = tf.transpose(image, perm=(2, 0, 1)) 298 | return image, labels, bboxes 299 | 300 | 301 | def preprocess_for_eval(image, labels, bboxes, 302 | out_shape=EVAL_SIZE, data_format='NHWC', 303 | difficults=None, resize=Resize.WARP_RESIZE, 304 | scope='ssd_preprocessing_train'): 305 | """Preprocess an image for evaluation. 306 | 307 | Args: 308 | image: A `Tensor` representing an image of arbitrary size. 309 | out_shape: Output shape after pre-processing (if resize != None) 310 | resize: Resize strategy. 311 | 312 | Returns: 313 | A preprocessed image. 314 | """ 315 | with tf.name_scope(scope): 316 | if image.get_shape().ndims != 3: 317 | raise ValueError('Input must be of size [height, width, C>0]') 318 | 319 | image = tf.to_float(image) 320 | image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) 321 | 322 | # Add image rectangle to bboxes. 323 | bbox_img = tf.constant([[0., 0., 1., 1.]]) 324 | if bboxes is None: 325 | bboxes = bbox_img 326 | else: 327 | bboxes = tf.concat([bbox_img, bboxes], axis=0) 328 | 329 | if resize == Resize.NONE: 330 | # No resizing... 331 | pass 332 | elif resize == Resize.CENTRAL_CROP: 333 | # Central cropping of the image. 334 | image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( 335 | image, bboxes, out_shape[0], out_shape[1]) 336 | elif resize == Resize.PAD_AND_RESIZE: 337 | # Resize image first: find the correct factor... 338 | shape = tf.shape(image) 339 | factor = tf.minimum(tf.to_double(1.0), 340 | tf.minimum(tf.to_double(out_shape[0] / shape[0]), 341 | tf.to_double(out_shape[1] / shape[1]))) 342 | resize_shape = factor * tf.to_double(shape[0:2]) 343 | resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) 344 | 345 | image = tf_image.resize_image(image, resize_shape, 346 | method=tf.image.ResizeMethod.BILINEAR, 347 | align_corners=False) 348 | # Pad to expected size. 349 | image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( 350 | image, bboxes, out_shape[0], out_shape[1]) 351 | elif resize == Resize.WARP_RESIZE: 352 | # Warp resize of the image. 353 | image = tf_image.resize_image(image, out_shape, 354 | method=tf.image.ResizeMethod.BILINEAR, 355 | align_corners=False) 356 | 357 | # Split back bounding boxes. 358 | bbox_img = bboxes[0] 359 | bboxes = bboxes[1:] 360 | # Remove difficult boxes. 361 | if difficults is not None: 362 | mask = tf.logical_not(tf.cast(difficults, tf.bool)) 363 | labels = tf.boolean_mask(labels, mask) 364 | bboxes = tf.boolean_mask(bboxes, mask) 365 | # Image data format. 366 | if data_format == 'NCHW': 367 | image = tf.transpose(image, perm=(2, 0, 1)) 368 | return image, labels, bboxes, bbox_img 369 | 370 | 371 | def preprocess_image(image, 372 | labels, 373 | bboxes, 374 | out_shape, 375 | data_format, 376 | is_training=False, 377 | **kwargs): 378 | """Pre-process an given image. 379 | 380 | Args: 381 | image: A `Tensor` representing an image of arbitrary size. 382 | output_height: The height of the image after preprocessing. 383 | output_width: The width of the image after preprocessing. 384 | is_training: `True` if we're preprocessing the image for training and 385 | `False` otherwise. 386 | resize_side_min: The lower bound for the smallest side of the image for 387 | aspect-preserving resizing. If `is_training` is `False`, then this value 388 | is used for rescaling. 389 | resize_side_max: The upper bound for the smallest side of the image for 390 | aspect-preserving resizing. If `is_training` is `False`, this value is 391 | ignored. Otherwise, the resize side is sampled from 392 | [resize_size_min, resize_size_max]. 393 | 394 | Returns: 395 | A preprocessed image. 396 | """ 397 | if is_training: 398 | return preprocess_for_train(image, labels, bboxes, 399 | out_shape=out_shape, 400 | data_format=data_format) 401 | else: 402 | return preprocess_for_eval(image, labels, bboxes, 403 | out_shape=out_shape, 404 | data_format=data_format, 405 | **kwargs) 406 | -------------------------------------------------------------------------------- /preprocessing/tf_image.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors and Paul Balanca. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Custom image operations. 16 | Most of the following methods extend TensorFlow image library, and part of 17 | the code is shameless copy-paste of the former! 18 | """ 19 | import tensorflow as tf 20 | 21 | from tensorflow.python.framework import constant_op 22 | from tensorflow.python.framework import dtypes 23 | from tensorflow.python.framework import ops 24 | from tensorflow.python.framework import tensor_shape 25 | from tensorflow.python.framework import tensor_util 26 | from tensorflow.python.ops import array_ops 27 | from tensorflow.python.ops import check_ops 28 | from tensorflow.python.ops import clip_ops 29 | from tensorflow.python.ops import control_flow_ops 30 | from tensorflow.python.ops import gen_image_ops 31 | from tensorflow.python.ops import gen_nn_ops 32 | from tensorflow.python.ops import string_ops 33 | from tensorflow.python.ops import math_ops 34 | from tensorflow.python.ops import random_ops 35 | from tensorflow.python.ops import variables 36 | 37 | 38 | # =========================================================================== # 39 | # Modification of TensorFlow image routines. 40 | # =========================================================================== # 41 | def _assert(cond, ex_type, msg): 42 | """A polymorphic assert, works with tensors and boolean expressions. 43 | If `cond` is not a tensor, behave like an ordinary assert statement, except 44 | that a empty list is returned. If `cond` is a tensor, return a list 45 | containing a single TensorFlow assert op. 46 | Args: 47 | cond: Something evaluates to a boolean value. May be a tensor. 48 | ex_type: The exception class to use. 49 | msg: The error message. 50 | Returns: 51 | A list, containing at most one assert op. 52 | """ 53 | if _is_tensor(cond): 54 | return [control_flow_ops.Assert(cond, [msg])] 55 | else: 56 | if not cond: 57 | raise ex_type(msg) 58 | else: 59 | return [] 60 | 61 | 62 | def _is_tensor(x): 63 | """Returns `True` if `x` is a symbolic tensor-like object. 64 | Args: 65 | x: A python object to check. 66 | Returns: 67 | `True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`. 68 | """ 69 | return isinstance(x, (ops.Tensor, variables.Variable)) 70 | 71 | 72 | def _ImageDimensions(image): 73 | """Returns the dimensions of an image tensor. 74 | Args: 75 | image: A 3-D Tensor of shape `[height, width, channels]`. 76 | Returns: 77 | A list of `[height, width, channels]` corresponding to the dimensions of the 78 | input image. Dimensions that are statically known are python integers, 79 | otherwise they are integer scalar tensors. 80 | """ 81 | if image.get_shape().is_fully_defined(): 82 | return image.get_shape().as_list() 83 | else: 84 | static_shape = image.get_shape().with_rank(3).as_list() 85 | dynamic_shape = array_ops.unstack(array_ops.shape(image), 3) 86 | return [s if s is not None else d 87 | for s, d in zip(static_shape, dynamic_shape)] 88 | 89 | 90 | def _Check3DImage(image, require_static=True): 91 | """Assert that we are working with properly shaped image. 92 | Args: 93 | image: 3-D Tensor of shape [height, width, channels] 94 | require_static: If `True`, requires that all dimensions of `image` are 95 | known and non-zero. 96 | Raises: 97 | ValueError: if `image.shape` is not a 3-vector. 98 | Returns: 99 | An empty list, if `image` has fully defined dimensions. Otherwise, a list 100 | containing an assert op is returned. 101 | """ 102 | try: 103 | image_shape = image.get_shape().with_rank(3) 104 | except ValueError: 105 | raise ValueError("'image' must be three-dimensional.") 106 | if require_static and not image_shape.is_fully_defined(): 107 | raise ValueError("'image' must be fully defined.") 108 | if any(x == 0 for x in image_shape): 109 | raise ValueError("all dims of 'image.shape' must be > 0: %s" % 110 | image_shape) 111 | if not image_shape.is_fully_defined(): 112 | return [check_ops.assert_positive(array_ops.shape(image), 113 | ["all dims of 'image.shape' " 114 | "must be > 0."])] 115 | else: 116 | return [] 117 | 118 | 119 | def fix_image_flip_shape(image, result): 120 | """Set the shape to 3 dimensional if we don't know anything else. 121 | Args: 122 | image: original image size 123 | result: flipped or transformed image 124 | Returns: 125 | An image whose shape is at least None,None,None. 126 | """ 127 | image_shape = image.get_shape() 128 | if image_shape == tensor_shape.unknown_shape(): 129 | result.set_shape([None, None, None]) 130 | else: 131 | result.set_shape(image_shape) 132 | return result 133 | 134 | 135 | # =========================================================================== # 136 | # Image + BBoxes methods: cropping, resizing, flipping, ... 137 | # =========================================================================== # 138 | def bboxes_crop_or_pad(bboxes, 139 | height, width, 140 | offset_y, offset_x, 141 | target_height, target_width): 142 | """Adapt bounding boxes to crop or pad operations. 143 | Coordinates are always supposed to be relative to the image. 144 | 145 | Arguments: 146 | bboxes: Tensor Nx4 with bboxes coordinates [y_min, x_min, y_max, x_max]; 147 | height, width: Original image dimension; 148 | offset_y, offset_x: Offset to apply, 149 | negative if cropping, positive if padding; 150 | target_height, target_width: Target dimension after cropping / padding. 151 | """ 152 | with tf.name_scope('bboxes_crop_or_pad'): 153 | # Rescale bounding boxes in pixels. 154 | scale = tf.cast(tf.stack([height, width, height, width]), bboxes.dtype) 155 | bboxes = bboxes * scale 156 | # Add offset. 157 | offset = tf.cast(tf.stack([offset_y, offset_x, offset_y, offset_x]), bboxes.dtype) 158 | bboxes = bboxes + offset 159 | # Rescale to target dimension. 160 | scale = tf.cast(tf.stack([target_height, target_width, 161 | target_height, target_width]), bboxes.dtype) 162 | bboxes = bboxes / scale 163 | return bboxes 164 | 165 | 166 | def resize_image_bboxes_with_crop_or_pad(image, bboxes, 167 | target_height, target_width): 168 | """Crops and/or pads an image to a target width and height. 169 | Resizes an image to a target width and height by either centrally 170 | cropping the image or padding it evenly with zeros. 171 | 172 | If `width` or `height` is greater than the specified `target_width` or 173 | `target_height` respectively, this op centrally crops along that dimension. 174 | If `width` or `height` is smaller than the specified `target_width` or 175 | `target_height` respectively, this op centrally pads with 0 along that 176 | dimension. 177 | Args: 178 | image: 3-D tensor of shape `[height, width, channels]` 179 | target_height: Target height. 180 | target_width: Target width. 181 | Raises: 182 | ValueError: if `target_height` or `target_width` are zero or negative. 183 | Returns: 184 | Cropped and/or padded image of shape 185 | `[target_height, target_width, channels]` 186 | """ 187 | with tf.name_scope('resize_with_crop_or_pad'): 188 | image = ops.convert_to_tensor(image, name='image') 189 | 190 | assert_ops = [] 191 | assert_ops += _Check3DImage(image, require_static=False) 192 | assert_ops += _assert(target_width > 0, ValueError, 193 | 'target_width must be > 0.') 194 | assert_ops += _assert(target_height > 0, ValueError, 195 | 'target_height must be > 0.') 196 | 197 | image = control_flow_ops.with_dependencies(assert_ops, image) 198 | # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks. 199 | # Make sure our checks come first, so that error messages are clearer. 200 | if _is_tensor(target_height): 201 | target_height = control_flow_ops.with_dependencies( 202 | assert_ops, target_height) 203 | if _is_tensor(target_width): 204 | target_width = control_flow_ops.with_dependencies(assert_ops, target_width) 205 | 206 | def max_(x, y): 207 | if _is_tensor(x) or _is_tensor(y): 208 | return math_ops.maximum(x, y) 209 | else: 210 | return max(x, y) 211 | 212 | def min_(x, y): 213 | if _is_tensor(x) or _is_tensor(y): 214 | return math_ops.minimum(x, y) 215 | else: 216 | return min(x, y) 217 | 218 | def equal_(x, y): 219 | if _is_tensor(x) or _is_tensor(y): 220 | return math_ops.equal(x, y) 221 | else: 222 | return x == y 223 | 224 | height, width, _ = _ImageDimensions(image) 225 | width_diff = target_width - width 226 | offset_crop_width = max_(-width_diff // 2, 0) 227 | offset_pad_width = max_(width_diff // 2, 0) 228 | 229 | height_diff = target_height - height 230 | offset_crop_height = max_(-height_diff // 2, 0) 231 | offset_pad_height = max_(height_diff // 2, 0) 232 | 233 | # Maybe crop if needed. 234 | height_crop = min_(target_height, height) 235 | width_crop = min_(target_width, width) 236 | cropped = tf.image.crop_to_bounding_box(image, offset_crop_height, offset_crop_width, 237 | height_crop, width_crop) 238 | bboxes = bboxes_crop_or_pad(bboxes, 239 | height, width, 240 | -offset_crop_height, -offset_crop_width, 241 | height_crop, width_crop) 242 | # Maybe pad if needed. 243 | resized = tf.image.pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width, 244 | target_height, target_width) 245 | bboxes = bboxes_crop_or_pad(bboxes, 246 | height_crop, width_crop, 247 | offset_pad_height, offset_pad_width, 248 | target_height, target_width) 249 | 250 | # In theory all the checks below are redundant. 251 | if resized.get_shape().ndims is None: 252 | raise ValueError('resized contains no shape.') 253 | 254 | resized_height, resized_width, _ = _ImageDimensions(resized) 255 | 256 | assert_ops = [] 257 | assert_ops += _assert(equal_(resized_height, target_height), ValueError, 258 | 'resized height is not correct.') 259 | assert_ops += _assert(equal_(resized_width, target_width), ValueError, 260 | 'resized width is not correct.') 261 | 262 | resized = control_flow_ops.with_dependencies(assert_ops, resized) 263 | return resized, bboxes 264 | 265 | 266 | def resize_image(image, size, 267 | method=tf.image.ResizeMethod.BILINEAR, 268 | align_corners=False): 269 | """Resize an image and bounding boxes. 270 | """ 271 | # Resize image. 272 | with tf.name_scope('resize_image'): 273 | height, width, channels = _ImageDimensions(image) 274 | image = tf.expand_dims(image, 0) 275 | image = tf.image.resize_images(image, size, 276 | method, align_corners) 277 | image = tf.reshape(image, tf.stack([size[0], size[1], channels])) 278 | return image 279 | 280 | 281 | def random_flip_left_right(image, bboxes, seed=None): 282 | """Random flip left-right of an image and its bounding boxes. 283 | """ 284 | def flip_bboxes(bboxes): 285 | """Flip bounding boxes coordinates. 286 | """ 287 | bboxes = tf.stack([bboxes[:, 0], 1 - bboxes[:, 3], 288 | bboxes[:, 2], 1 - bboxes[:, 1]], axis=-1) 289 | return bboxes 290 | 291 | # Random flip. Tensorflow implementation. 292 | with tf.name_scope('random_flip_left_right'): 293 | image = ops.convert_to_tensor(image, name='image') 294 | _Check3DImage(image, require_static=False) 295 | uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) 296 | mirror_cond = math_ops.less(uniform_random, .5) 297 | # Flip image. 298 | result = control_flow_ops.cond(mirror_cond, 299 | lambda: array_ops.reverse_v2(image, [1]), 300 | lambda: image) 301 | # Flip bboxes. 302 | bboxes = control_flow_ops.cond(mirror_cond, 303 | lambda: flip_bboxes(bboxes), 304 | lambda: bboxes) 305 | return fix_image_flip_shape(image, result), bboxes 306 | 307 | -------------------------------------------------------------------------------- /ssd_visualize.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import random 4 | import numpy as np 5 | import tensorflow as tf 6 | import cv2 7 | 8 | slim = tf.contrib.slim 9 | 10 | import matplotlib.pyplot as plt 11 | import matplotlib.image as mpimg 12 | import sys 13 | sys.path.append('../') 14 | 15 | from nets import ssd_vgg_300, ssd_common 16 | from preprocessing import ssd_vgg_preprocessing 17 | import visualization 18 | 19 | gpu_options = tf.GPUOptions(allow_growth = True) 20 | config = tf.ConfigProto(log_device_placement = False, gpu_options = gpu_options) 21 | isess = tf.InteractiveSession(config = config) 22 | 23 | net_shape = (300, 300) 24 | data_format = 'NHWC' 25 | img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) 26 | 27 | image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( 28 | img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) 29 | image_4d = tf.expand_dims(image_pre, 0) 30 | 31 | # Define the SSD model. 32 | reuse = True if 'ssd_net' in locals() else None 33 | ssd_net = ssd_vgg_300.SSDNet() 34 | with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): 35 | predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) 36 | 37 | # Restore SSD model. 38 | ckpt_filename = './logs/model.ckpt-62962' 39 | # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' 40 | isess.run(tf.global_variables_initializer()) 41 | saver = tf.train.Saver() 42 | saver.restore(isess, ckpt_filename) 43 | 44 | # SSD default anchor boxes. 45 | ssd_anchors = ssd_net.anchors(net_shape) 46 | 47 | 48 | # Main image processing routine. 49 | def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): 50 | # Run SSD network. 51 | rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img], 52 | feed_dict={img_input: img}) 53 | 54 | # Get classes and bboxes from the net outputs. 55 | rclasses, rscores, rbboxes = ssd_common.ssd_bboxes_select( 56 | rpredictions, rlocalisations, ssd_anchors, 57 | select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) 58 | 59 | rbboxes = ssd_common.bboxes_clip(rbbox_img, rbboxes) 60 | rclasses, rscores, rbboxes = ssd_common.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) 61 | rclasses, rscores, rbboxes = ssd_common.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold) 62 | # Resize bboxes to original image shape. Note: useless for Resize.WARP! 63 | rbboxes = ssd_common.bboxes_resize(rbbox_img, rbboxes) 64 | return rclasses, rscores, rbboxes 65 | 66 | # Test on some demo image and visualize output. 67 | path = './demo/' 68 | image_names = sorted(os.listdir(path)) 69 | for i in range(10): 70 | img = mpimg.imread(path + image_names[i]) 71 | rclasses, rscores, rbboxes = process_image(img) 72 | 73 | # visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma) 74 | visualization.plt_bboxes(img, rclasses, rscores, rbboxes) 75 | -------------------------------------------------------------------------------- /tf_convert_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Convert a dataset to TFRecords format, which can be easily integrated into 16 | a TensorFlow pipeline. 17 | 18 | Usage: 19 | ```shell 20 | python tf_convert_data.py \ 21 | --dataset_name=pascalvoc \ 22 | --dataset_dir=/tmp/pascalvoc \ 23 | --output_name=pascalvoc \ 24 | --output_dir=/tmp/ 25 | ``` 26 | """ 27 | import tensorflow as tf 28 | 29 | from datasets import pascalvoc_to_tfrecords 30 | 31 | FLAGS = tf.app.flags.FLAGS 32 | #tf定义了tf.app.flags,用于支持接受命令行传递参数,相当于接受argv。 33 | tf.app.flags.DEFINE_string( 34 | 'dataset_name', 'pascalvoc', 35 | 'The name of the dataset to convert.') 36 | tf.app.flags.DEFINE_string( 37 | 'dataset_dir', None, 38 | 'Directory where the original dataset is stored.') 39 | tf.app.flags.DEFINE_string( 40 | 'output_name', 'pascalvoc', 41 | 'Basename used for TFRecords output files.') 42 | tf.app.flags.DEFINE_string( 43 | 'output_dir', './', 44 | 'Output directory where to store TFRecords files.') 45 | 46 | 47 | def main(_): 48 | if not FLAGS.dataset_dir: 49 | raise ValueError('You must supply the dataset directory with --dataset_dir') 50 | print('Dataset directory:', FLAGS.dataset_dir) 51 | print('Output directory:', FLAGS.output_dir) 52 | 53 | if FLAGS.dataset_name == 'pascalvoc': 54 | pascalvoc_to_tfrecords.run(FLAGS.dataset_dir, FLAGS.output_dir, FLAGS.output_name) 55 | else: 56 | raise ValueError('Dataset [%s] was not recognized.' % FLAGS.dataset_name) 57 | 58 | if __name__ == '__main__': 59 | tf.app.run() 60 | 61 | -------------------------------------------------------------------------------- /tf_extended/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Paul Balanca. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """TF Extended: additional metrics. 16 | """ 17 | 18 | # pylint: disable=unused-import,line-too-long,g-importing-member,wildcard-import 19 | from tf_extended.metrics import * 20 | from tf_extended.tensors import * 21 | from tf_extended.bboxes import * 22 | from tf_extended.image import * 23 | from tf_extended.math import * 24 | 25 | -------------------------------------------------------------------------------- /tf_extended/bboxes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Paul Balanca. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """TF Extended: additional bounding boxes methods. 16 | """ 17 | import numpy as np 18 | import tensorflow as tf 19 | 20 | from tf_extended import tensors as tfe_tensors 21 | from tf_extended import math as tfe_math 22 | 23 | 24 | # =========================================================================== # 25 | # Standard boxes algorithms. 26 | # =========================================================================== # 27 | def bboxes_sort_all_classes(classes, scores, bboxes, top_k=400, scope=None): 28 | """Sort bounding boxes by decreasing order and keep only the top_k. 29 | Assume the input Tensors mix-up objects with different classes. 30 | Assume a batch-type input. 31 | 32 | Args: 33 | classes: Batch x N Tensor containing integer classes. 34 | scores: Batch x N Tensor containing float scores. 35 | bboxes: Batch x N x 4 Tensor containing boxes coordinates. 36 | top_k: Top_k boxes to keep. 37 | Return: 38 | classes, scores, bboxes: Sorted tensors of shape Batch x Top_k. 39 | """ 40 | with tf.name_scope(scope, 'bboxes_sort', [classes, scores, bboxes]): 41 | scores, idxes = tf.nn.top_k(scores, k=top_k, sorted=True) 42 | 43 | # Trick to be able to use tf.gather: map for each element in the batch. 44 | def fn_gather(classes, bboxes, idxes): 45 | cl = tf.gather(classes, idxes) 46 | bb = tf.gather(bboxes, idxes) 47 | return [cl, bb] 48 | r = tf.map_fn(lambda x: fn_gather(x[0], x[1], x[2]), 49 | [classes, bboxes, idxes], 50 | dtype=[classes.dtype, bboxes.dtype], 51 | parallel_iterations=10, 52 | back_prop=False, 53 | swap_memory=False, 54 | infer_shape=True) 55 | classes = r[0] 56 | bboxes = r[1] 57 | return classes, scores, bboxes 58 | 59 | 60 | def bboxes_sort(scores, bboxes, top_k=400, scope=None): 61 | """Sort bounding boxes by decreasing order and keep only the top_k. 62 | If inputs are dictionnaries, assume every key is a different class. 63 | Assume a batch-type input. 64 | 65 | Args: 66 | scores: Batch x N Tensor/Dictionary containing float scores. 67 | bboxes: Batch x N x 4 Tensor/Dictionary containing boxes coordinates. 68 | top_k: Top_k boxes to keep. 69 | Return: 70 | scores, bboxes: Sorted Tensors/Dictionaries of shape Batch x Top_k x 1|4. 71 | """ 72 | # Dictionaries as inputs. 73 | if isinstance(scores, dict) or isinstance(bboxes, dict): 74 | with tf.name_scope(scope, 'bboxes_sort_dict'): 75 | d_scores = {} 76 | d_bboxes = {} 77 | for c in scores.keys(): 78 | s, b = bboxes_sort(scores[c], bboxes[c], top_k=top_k) 79 | d_scores[c] = s 80 | d_bboxes[c] = b 81 | return d_scores, d_bboxes 82 | 83 | # Tensors inputs. 84 | with tf.name_scope(scope, 'bboxes_sort', [scores, bboxes]): 85 | # Sort scores... 86 | scores, idxes = tf.nn.top_k(scores, k=top_k, sorted=True) 87 | 88 | # Trick to be able to use tf.gather: map for each element in the first dim. 89 | def fn_gather(bboxes, idxes): 90 | bb = tf.gather(bboxes, idxes) 91 | return [bb] 92 | r = tf.map_fn(lambda x: fn_gather(x[0], x[1]), 93 | [bboxes, idxes], 94 | dtype=[bboxes.dtype], 95 | parallel_iterations=10, 96 | back_prop=False, 97 | swap_memory=False, 98 | infer_shape=True) 99 | bboxes = r[0] 100 | return scores, bboxes 101 | 102 | 103 | def bboxes_clip(bbox_ref, bboxes, scope=None): 104 | """Clip bounding boxes to a reference box. 105 | Batch-compatible if the first dimension of `bbox_ref` and `bboxes` 106 | can be broadcasted. 107 | 108 | Args: 109 | bbox_ref: Reference bounding box. Nx4 or 4 shaped-Tensor; 110 | bboxes: Bounding boxes to clip. Nx4 or 4 shaped-Tensor or dictionary. 111 | Return: 112 | Clipped bboxes. 113 | """ 114 | # Bboxes is dictionary. 115 | if isinstance(bboxes, dict): 116 | with tf.name_scope(scope, 'bboxes_clip_dict'): 117 | d_bboxes = {} 118 | for c in bboxes.keys(): 119 | d_bboxes[c] = bboxes_clip(bbox_ref, bboxes[c]) 120 | return d_bboxes 121 | 122 | # Tensors inputs. 123 | with tf.name_scope(scope, 'bboxes_clip'): 124 | # Easier with transposed bboxes. Especially for broadcasting. 125 | bbox_ref = tf.transpose(bbox_ref) 126 | bboxes = tf.transpose(bboxes) 127 | # Intersection bboxes and reference bbox. 128 | ymin = tf.maximum(bboxes[0], bbox_ref[0]) 129 | xmin = tf.maximum(bboxes[1], bbox_ref[1]) 130 | ymax = tf.minimum(bboxes[2], bbox_ref[2]) 131 | xmax = tf.minimum(bboxes[3], bbox_ref[3]) 132 | # Double check! Empty boxes when no-intersection. 133 | ymin = tf.minimum(ymin, ymax) 134 | xmin = tf.minimum(xmin, xmax) 135 | bboxes = tf.transpose(tf.stack([ymin, xmin, ymax, xmax], axis=0)) 136 | return bboxes 137 | 138 | 139 | def bboxes_resize(bbox_ref, bboxes, name=None): 140 | """Resize bounding boxes based on a reference bounding box, 141 | assuming that the latter is [0, 0, 1, 1] after transform. Useful for 142 | updating a collection of boxes after cropping an image. 143 | """ 144 | # Bboxes is dictionary. 145 | if isinstance(bboxes, dict): 146 | with tf.name_scope(name, 'bboxes_resize_dict'): 147 | d_bboxes = {} 148 | for c in bboxes.keys(): 149 | d_bboxes[c] = bboxes_resize(bbox_ref, bboxes[c]) 150 | return d_bboxes 151 | 152 | # Tensors inputs. 153 | with tf.name_scope(name, 'bboxes_resize'): 154 | # Translate. 155 | v = tf.stack([bbox_ref[0], bbox_ref[1], bbox_ref[0], bbox_ref[1]]) 156 | bboxes = bboxes - v 157 | # Scale. 158 | s = tf.stack([bbox_ref[2] - bbox_ref[0], 159 | bbox_ref[3] - bbox_ref[1], 160 | bbox_ref[2] - bbox_ref[0], 161 | bbox_ref[3] - bbox_ref[1]]) 162 | bboxes = bboxes / s 163 | return bboxes 164 | 165 | 166 | def bboxes_nms(scores, bboxes, nms_threshold=0.5, keep_top_k=200, scope=None): 167 | """Apply non-maximum selection to bounding boxes. In comparison to TF 168 | implementation, use classes information for matching. 169 | Should only be used on single-entries. Use batch version otherwise. 170 | 171 | Args: 172 | scores: N Tensor containing float scores. 173 | bboxes: N x 4 Tensor containing boxes coordinates. 174 | nms_threshold: Matching threshold in NMS algorithm; 175 | keep_top_k: Number of total object to keep after NMS. 176 | Return: 177 | classes, scores, bboxes Tensors, sorted by score. 178 | Padded with zero if necessary. 179 | """ 180 | with tf.name_scope(scope, 'bboxes_nms_single', [scores, bboxes]): 181 | # Apply NMS algorithm. 182 | idxes = tf.image.non_max_suppression(bboxes, scores, 183 | keep_top_k, nms_threshold) 184 | scores = tf.gather(scores, idxes) 185 | bboxes = tf.gather(bboxes, idxes) 186 | # Pad results. 187 | scores = tfe_tensors.pad_axis(scores, 0, keep_top_k, axis=0) 188 | bboxes = tfe_tensors.pad_axis(bboxes, 0, keep_top_k, axis=0) 189 | return scores, bboxes 190 | 191 | 192 | def bboxes_nms_batch(scores, bboxes, nms_threshold=0.5, keep_top_k=200, 193 | scope=None): 194 | """Apply non-maximum selection to bounding boxes. In comparison to TF 195 | implementation, use classes information for matching. 196 | Use only on batched-inputs. Use zero-padding in order to batch output 197 | results. 198 | 199 | Args: 200 | scores: Batch x N Tensor/Dictionary containing float scores. 201 | bboxes: Batch x N x 4 Tensor/Dictionary containing boxes coordinates. 202 | nms_threshold: Matching threshold in NMS algorithm; 203 | keep_top_k: Number of total object to keep after NMS. 204 | Return: 205 | scores, bboxes Tensors/Dictionaries, sorted by score. 206 | Padded with zero if necessary. 207 | """ 208 | # Dictionaries as inputs. 209 | if isinstance(scores, dict) or isinstance(bboxes, dict): 210 | with tf.name_scope(scope, 'bboxes_nms_batch_dict'): 211 | d_scores = {} 212 | d_bboxes = {} 213 | for c in scores.keys(): 214 | s, b = bboxes_nms_batch(scores[c], bboxes[c], 215 | nms_threshold=nms_threshold, 216 | keep_top_k=keep_top_k) 217 | d_scores[c] = s 218 | d_bboxes[c] = b 219 | return d_scores, d_bboxes 220 | 221 | # Tensors inputs. 222 | with tf.name_scope(scope, 'bboxes_nms_batch'): 223 | r = tf.map_fn(lambda x: bboxes_nms(x[0], x[1], 224 | nms_threshold, keep_top_k), 225 | (scores, bboxes), 226 | dtype=(scores.dtype, bboxes.dtype), 227 | parallel_iterations=10, 228 | back_prop=False, 229 | swap_memory=False, 230 | infer_shape=True) 231 | scores, bboxes = r 232 | return scores, bboxes 233 | 234 | 235 | # def bboxes_fast_nms(classes, scores, bboxes, 236 | # nms_threshold=0.5, eta=3., num_classes=21, 237 | # pad_output=True, scope=None): 238 | # with tf.name_scope(scope, 'bboxes_fast_nms', 239 | # [classes, scores, bboxes]): 240 | 241 | # nms_classes = tf.zeros((0,), dtype=classes.dtype) 242 | # nms_scores = tf.zeros((0,), dtype=scores.dtype) 243 | # nms_bboxes = tf.zeros((0, 4), dtype=bboxes.dtype) 244 | 245 | 246 | def bboxes_matching(label, scores, bboxes, 247 | glabels, gbboxes, gdifficults, 248 | matching_threshold=0.5, scope=None): 249 | """Matching a collection of detected boxes with groundtruth values. 250 | Does not accept batched-inputs. 251 | The algorithm goes as follows: for every detected box, check 252 | if one grountruth box is matching. If none, then considered as False Positive. 253 | If the grountruth box is already matched with another one, it also counts 254 | as a False Positive. We refer the Pascal VOC documentation for the details. 255 | 256 | Args: 257 | rclasses, rscores, rbboxes: N(x4) Tensors. Detected objects, sorted by score; 258 | glabels, gbboxes: Groundtruth bounding boxes. May be zero padded, hence 259 | zero-class objects are ignored. 260 | matching_threshold: Threshold for a positive match. 261 | Return: Tuple of: 262 | n_gbboxes: Scalar Tensor with number of groundtruth boxes (may difer from 263 | size because of zero padding). 264 | tp_match: (N,)-shaped boolean Tensor containing with True Positives. 265 | fp_match: (N,)-shaped boolean Tensor containing with False Positives. 266 | """ 267 | with tf.name_scope(scope, 'bboxes_matching_single', 268 | [scores, bboxes, glabels, gbboxes]): 269 | rsize = tf.size(scores) 270 | rshape = tf.shape(scores) 271 | rlabel = tf.cast(label, glabels.dtype) 272 | # Number of groundtruth boxes. 273 | gdifficults = tf.cast(gdifficults, tf.bool) 274 | n_gbboxes = tf.count_nonzero(tf.logical_and(tf.equal(glabels, label), 275 | tf.logical_not(gdifficults))) 276 | # Grountruth matching arrays. 277 | gmatch = tf.zeros(tf.shape(glabels), dtype=tf.bool) 278 | grange = tf.range(tf.size(glabels), dtype=tf.int32) 279 | # True/False positive matching TensorArrays. 280 | sdtype = tf.bool 281 | ta_tp_bool = tf.TensorArray(sdtype, size=rsize, dynamic_size=False, infer_shape=True) 282 | ta_fp_bool = tf.TensorArray(sdtype, size=rsize, dynamic_size=False, infer_shape=True) 283 | 284 | # Loop over returned objects. 285 | def m_condition(i, ta_tp, ta_fp, gmatch): 286 | r = tf.less(i, rsize) 287 | return r 288 | 289 | def m_body(i, ta_tp, ta_fp, gmatch): 290 | # Jaccard score with groundtruth bboxes. 291 | rbbox = bboxes[i] 292 | jaccard = bboxes_jaccard(rbbox, gbboxes) 293 | jaccard = jaccard * tf.cast(tf.equal(glabels, rlabel), dtype=jaccard.dtype) 294 | 295 | # Best fit, checking it's above threshold. 296 | idxmax = tf.cast(tf.argmax(jaccard, axis=0), tf.int32) 297 | jcdmax = jaccard[idxmax] 298 | match = jcdmax > matching_threshold 299 | existing_match = gmatch[idxmax] 300 | not_difficult = tf.logical_not(gdifficults[idxmax]) 301 | 302 | # TP: match & no previous match and FP: previous match | no match. 303 | # If difficult: no record, i.e FP=False and TP=False. 304 | tp = tf.logical_and(not_difficult, 305 | tf.logical_and(match, tf.logical_not(existing_match))) 306 | ta_tp = ta_tp.write(i, tp) 307 | fp = tf.logical_and(not_difficult, 308 | tf.logical_or(existing_match, tf.logical_not(match))) 309 | ta_fp = ta_fp.write(i, fp) 310 | # Update grountruth match. 311 | mask = tf.logical_and(tf.equal(grange, idxmax), 312 | tf.logical_and(not_difficult, match)) 313 | gmatch = tf.logical_or(gmatch, mask) 314 | 315 | return [i+1, ta_tp, ta_fp, gmatch] 316 | # Main loop definition. 317 | i = 0 318 | [i, ta_tp_bool, ta_fp_bool, gmatch] = \ 319 | tf.while_loop(m_condition, m_body, 320 | [i, ta_tp_bool, ta_fp_bool, gmatch], 321 | parallel_iterations=1, 322 | back_prop=False) 323 | # TensorArrays to Tensors and reshape. 324 | tp_match = tf.reshape(ta_tp_bool.stack(), rshape) 325 | fp_match = tf.reshape(ta_fp_bool.stack(), rshape) 326 | 327 | # Some debugging information... 328 | # tp_match = tf.Print(tp_match, 329 | # [n_gbboxes, 330 | # tf.reduce_sum(tf.cast(tp_match, tf.int64)), 331 | # tf.reduce_sum(tf.cast(fp_match, tf.int64)), 332 | # tf.reduce_sum(tf.cast(gmatch, tf.int64))], 333 | # 'Matching (NG, TP, FP, GM): ') 334 | return n_gbboxes, tp_match, fp_match 335 | 336 | 337 | def bboxes_matching_batch(labels, scores, bboxes, 338 | glabels, gbboxes, gdifficults, 339 | matching_threshold=0.5, scope=None): 340 | """Matching a collection of detected boxes with groundtruth values. 341 | Batched-inputs version. 342 | 343 | Args: 344 | rclasses, rscores, rbboxes: BxN(x4) Tensors. Detected objects, sorted by score; 345 | glabels, gbboxes: Groundtruth bounding boxes. May be zero padded, hence 346 | zero-class objects are ignored. 347 | matching_threshold: Threshold for a positive match. 348 | Return: Tuple or Dictionaries with: 349 | n_gbboxes: Scalar Tensor with number of groundtruth boxes (may difer from 350 | size because of zero padding). 351 | tp: (B, N)-shaped boolean Tensor containing with True Positives. 352 | fp: (B, N)-shaped boolean Tensor containing with False Positives. 353 | """ 354 | # Dictionaries as inputs. 355 | if isinstance(scores, dict) or isinstance(bboxes, dict): 356 | with tf.name_scope(scope, 'bboxes_matching_batch_dict'): 357 | d_n_gbboxes = {} 358 | d_tp = {} 359 | d_fp = {} 360 | for c in labels: 361 | n, tp, fp, _ = bboxes_matching_batch(c, scores[c], bboxes[c], 362 | glabels, gbboxes, gdifficults, 363 | matching_threshold) 364 | d_n_gbboxes[c] = n 365 | d_tp[c] = tp 366 | d_fp[c] = fp 367 | return d_n_gbboxes, d_tp, d_fp, scores 368 | 369 | with tf.name_scope(scope, 'bboxes_matching_batch', 370 | [scores, bboxes, glabels, gbboxes]): 371 | r = tf.map_fn(lambda x: bboxes_matching(labels, x[0], x[1], 372 | x[2], x[3], x[4], 373 | matching_threshold), 374 | (scores, bboxes, glabels, gbboxes, gdifficults), 375 | dtype=(tf.int64, tf.bool, tf.bool), 376 | parallel_iterations=10, 377 | back_prop=False, 378 | swap_memory=True, 379 | infer_shape=True) 380 | return r[0], r[1], r[2], scores 381 | 382 | 383 | # =========================================================================== # 384 | # Some filteting methods. 385 | # =========================================================================== # 386 | def bboxes_filter_center(labels, bboxes, margins=[0., 0., 0., 0.], 387 | scope=None): 388 | """Filter out bounding boxes whose center are not in 389 | the rectangle [0, 0, 1, 1] + margins. The margin Tensor 390 | can be used to enforce or loosen this condition. 391 | 392 | Return: 393 | labels, bboxes: Filtered elements. 394 | """ 395 | with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]): 396 | cy = (bboxes[:, 0] + bboxes[:, 2]) / 2. 397 | cx = (bboxes[:, 1] + bboxes[:, 3]) / 2. 398 | mask = tf.greater(cy, margins[0]) 399 | mask = tf.logical_and(mask, tf.greater(cx, margins[1])) 400 | mask = tf.logical_and(mask, tf.less(cx, 1. + margins[2])) 401 | mask = tf.logical_and(mask, tf.less(cx, 1. + margins[3])) 402 | # Boolean masking... 403 | labels = tf.boolean_mask(labels, mask) 404 | bboxes = tf.boolean_mask(bboxes, mask) 405 | return labels, bboxes 406 | 407 | 408 | def bboxes_filter_overlap(labels, bboxes, 409 | threshold=0.5, assign_negative=False, 410 | scope=None): 411 | """Filter out bounding boxes based on (relative )overlap with reference 412 | box [0, 0, 1, 1]. Remove completely bounding boxes, or assign negative 413 | labels to the one outside (useful for latter processing...). 414 | 415 | Return: 416 | labels, bboxes: Filtered (or newly assigned) elements. 417 | """ 418 | with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]): 419 | scores = bboxes_intersection(tf.constant([0, 0, 1, 1], bboxes.dtype), 420 | bboxes) 421 | mask = scores > threshold 422 | if assign_negative: 423 | labels = tf.where(mask, labels, -labels) 424 | # bboxes = tf.where(mask, bboxes, bboxes) 425 | else: 426 | labels = tf.boolean_mask(labels, mask) 427 | bboxes = tf.boolean_mask(bboxes, mask) 428 | return labels, bboxes 429 | 430 | 431 | def bboxes_filter_labels(labels, bboxes, 432 | out_labels=[], num_classes=np.inf, 433 | scope=None): 434 | """Filter out labels from a collection. Typically used to get 435 | of DontCare elements. Also remove elements based on the number of classes. 436 | 437 | Return: 438 | labels, bboxes: Filtered elements. 439 | """ 440 | with tf.name_scope(scope, 'bboxes_filter_labels', [labels, bboxes]): 441 | mask = tf.greater_equal(labels, num_classes) 442 | for l in labels: 443 | mask = tf.logical_and(mask, tf.not_equal(labels, l)) 444 | labels = tf.boolean_mask(labels, mask) 445 | bboxes = tf.boolean_mask(bboxes, mask) 446 | return labels, bboxes 447 | 448 | 449 | # =========================================================================== # 450 | # Standard boxes computation. 451 | # =========================================================================== # 452 | def bboxes_jaccard(bbox_ref, bboxes, name=None): 453 | """Compute jaccard score between a reference box and a collection 454 | of bounding boxes. 455 | 456 | Args: 457 | bbox_ref: (N, 4) or (4,) Tensor with reference bounding box(es). 458 | bboxes: (N, 4) Tensor, collection of bounding boxes. 459 | Return: 460 | (N,) Tensor with Jaccard scores. 461 | """ 462 | with tf.name_scope(name, 'bboxes_jaccard'): 463 | # Should be more efficient to first transpose. 464 | bboxes = tf.transpose(bboxes) 465 | bbox_ref = tf.transpose(bbox_ref) 466 | # Intersection bbox and volume. 467 | int_ymin = tf.maximum(bboxes[0], bbox_ref[0]) 468 | int_xmin = tf.maximum(bboxes[1], bbox_ref[1]) 469 | int_ymax = tf.minimum(bboxes[2], bbox_ref[2]) 470 | int_xmax = tf.minimum(bboxes[3], bbox_ref[3]) 471 | h = tf.maximum(int_ymax - int_ymin, 0.) 472 | w = tf.maximum(int_xmax - int_xmin, 0.) 473 | # Volumes. 474 | inter_vol = h * w 475 | union_vol = -inter_vol \ 476 | + (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1]) \ 477 | + (bbox_ref[2] - bbox_ref[0]) * (bbox_ref[3] - bbox_ref[1]) 478 | jaccard = tfe_math.safe_divide(inter_vol, union_vol, 'jaccard') 479 | return jaccard 480 | 481 | 482 | def bboxes_intersection(bbox_ref, bboxes, name=None): 483 | """Compute relative intersection between a reference box and a 484 | collection of bounding boxes. Namely, compute the quotient between 485 | intersection area and box area. 486 | 487 | Args: 488 | bbox_ref: (N, 4) or (4,) Tensor with reference bounding box(es). 489 | bboxes: (N, 4) Tensor, collection of bounding boxes. 490 | Return: 491 | (N,) Tensor with relative intersection. 492 | """ 493 | with tf.name_scope(name, 'bboxes_intersection'): 494 | # Should be more efficient to first transpose. 495 | bboxes = tf.transpose(bboxes) 496 | bbox_ref = tf.transpose(bbox_ref) 497 | # Intersection bbox and volume. 498 | int_ymin = tf.maximum(bboxes[0], bbox_ref[0]) 499 | int_xmin = tf.maximum(bboxes[1], bbox_ref[1]) 500 | int_ymax = tf.minimum(bboxes[2], bbox_ref[2]) 501 | int_xmax = tf.minimum(bboxes[3], bbox_ref[3]) 502 | h = tf.maximum(int_ymax - int_ymin, 0.) 503 | w = tf.maximum(int_xmax - int_xmin, 0.) 504 | # Volumes. 505 | inter_vol = h * w 506 | bboxes_vol = (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1]) 507 | scores = tfe_math.safe_divide(inter_vol, bboxes_vol, 'intersection') 508 | return scores 509 | -------------------------------------------------------------------------------- /tf_extended/image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fanbinqi/SSD-Tensorflow/7d77fc3e4eda3109ea104f59644d6d93cb829215/tf_extended/image.py -------------------------------------------------------------------------------- /tf_extended/math.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Paul Balanca. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """TF Extended: additional math functions. 16 | """ 17 | import tensorflow as tf 18 | 19 | from tensorflow.python.ops import array_ops 20 | from tensorflow.python.ops import math_ops 21 | from tensorflow.python.framework import dtypes 22 | from tensorflow.python.framework import ops 23 | 24 | 25 | def safe_divide(numerator, denominator, name): 26 | """Divides two values, returning 0 if the denominator is <= 0. 27 | Args: 28 | numerator: A real `Tensor`. 29 | denominator: A real `Tensor`, with dtype matching `numerator`. 30 | name: Name for the returned op. 31 | Returns: 32 | 0 if `denominator` <= 0, else `numerator` / `denominator` 33 | """ 34 | return tf.where( 35 | math_ops.greater(denominator, 0), 36 | math_ops.divide(numerator, denominator), 37 | tf.zeros_like(numerator), 38 | name=name) 39 | 40 | 41 | def cummax(x, reverse=False, name=None): 42 | """Compute the cumulative maximum of the tensor `x` along `axis`. This 43 | operation is similar to the more classic `cumsum`. Only support 1D Tensor 44 | for now. 45 | 46 | Args: 47 | x: A `Tensor`. Must be one of the following types: `float32`, `float64`, 48 | `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, 49 | `complex128`, `qint8`, `quint8`, `qint32`, `half`. 50 | axis: A `Tensor` of type `int32` (default: 0). 51 | reverse: A `bool` (default: False). 52 | name: A name for the operation (optional). 53 | Returns: 54 | A `Tensor`. Has the same type as `x`. 55 | """ 56 | with ops.name_scope(name, "Cummax", [x]) as name: 57 | x = ops.convert_to_tensor(x, name="x") 58 | # Not very optimal: should directly integrate reverse into tf.scan. 59 | if reverse: 60 | x = tf.reverse(x, axis=[0]) 61 | # 'Accumlating' maximum: ensure it is always increasing. 62 | cmax = tf.scan(lambda a, y: tf.maximum(a, y), x, 63 | initializer=None, parallel_iterations=1, 64 | back_prop=False, swap_memory=False) 65 | if reverse: 66 | cmax = tf.reverse(cmax, axis=[0]) 67 | return cmax 68 | -------------------------------------------------------------------------------- /tf_extended/metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Paul Balanca. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """TF Extended: additional metrics. 16 | """ 17 | import tensorflow as tf 18 | import numpy as np 19 | 20 | from tensorflow.contrib.framework.python.ops import variables as contrib_variables 21 | from tensorflow.python.framework import dtypes 22 | from tensorflow.python.framework import ops 23 | from tensorflow.python.ops import array_ops 24 | from tensorflow.python.ops import math_ops 25 | from tensorflow.python.ops import nn 26 | from tensorflow.python.ops import state_ops 27 | from tensorflow.python.ops import variable_scope 28 | from tensorflow.python.ops import variables 29 | 30 | from tf_extended import math as tfe_math 31 | 32 | 33 | # =========================================================================== # 34 | # TensorFlow utils 35 | # =========================================================================== # 36 | def _create_local(name, shape, collections=None, validate_shape=True, 37 | dtype=dtypes.float32): 38 | """Creates a new local variable. 39 | Args: 40 | name: The name of the new or existing variable. 41 | shape: Shape of the new or existing variable. 42 | collections: A list of collection names to which the Variable will be added. 43 | validate_shape: Whether to validate the shape of the variable. 44 | dtype: Data type of the variables. 45 | Returns: 46 | The created variable. 47 | """ 48 | # Make sure local variables are added to tf.GraphKeys.LOCAL_VARIABLES 49 | collections = list(collections or []) 50 | collections += [ops.GraphKeys.LOCAL_VARIABLES] 51 | return variables.Variable( 52 | initial_value=array_ops.zeros(shape, dtype=dtype), 53 | name=name, 54 | trainable=False, 55 | collections=collections, 56 | validate_shape=validate_shape) 57 | 58 | 59 | def _safe_div(numerator, denominator, name): 60 | """Divides two values, returning 0 if the denominator is <= 0. 61 | Args: 62 | numerator: A real `Tensor`. 63 | denominator: A real `Tensor`, with dtype matching `numerator`. 64 | name: Name for the returned op. 65 | Returns: 66 | 0 if `denominator` <= 0, else `numerator` / `denominator` 67 | """ 68 | return tf.where( 69 | math_ops.greater(denominator, 0), 70 | math_ops.divide(numerator, denominator), 71 | tf.zeros_like(numerator), 72 | name=name) 73 | 74 | 75 | def _broadcast_weights(weights, values): 76 | """Broadcast `weights` to the same shape as `values`. 77 | This returns a version of `weights` following the same broadcast rules as 78 | `mul(weights, values)`. When computing a weighted average, use this function 79 | to broadcast `weights` before summing them; e.g., 80 | `reduce_sum(w * v) / reduce_sum(_broadcast_weights(w, v))`. 81 | Args: 82 | weights: `Tensor` whose shape is broadcastable to `values`. 83 | values: `Tensor` of any shape. 84 | Returns: 85 | `weights` broadcast to `values` shape. 86 | """ 87 | weights_shape = weights.get_shape() 88 | values_shape = values.get_shape() 89 | if(weights_shape.is_fully_defined() and 90 | values_shape.is_fully_defined() and 91 | weights_shape.is_compatible_with(values_shape)): 92 | return weights 93 | return math_ops.mul( 94 | weights, array_ops.ones_like(values), name='broadcast_weights') 95 | 96 | 97 | # =========================================================================== # 98 | # TF Extended metrics: TP and FP arrays. 99 | # =========================================================================== # 100 | def precision_recall(num_gbboxes, num_detections, tp, fp, scores, 101 | dtype=tf.float64, scope=None): 102 | """Compute precision and recall from scores, true positives and false 103 | positives booleans arrays 104 | """ 105 | # Input dictionaries: dict outputs as streaming metrics. 106 | if isinstance(scores, dict): 107 | d_precision = {} 108 | d_recall = {} 109 | for c in num_gbboxes.keys(): 110 | scope = 'precision_recall_%s' % c 111 | p, r = precision_recall(num_gbboxes[c], num_detections[c], 112 | tp[c], fp[c], scores[c], 113 | dtype, scope) 114 | d_precision[c] = p 115 | d_recall[c] = r 116 | return d_precision, d_recall 117 | 118 | # Sort by score. 119 | with tf.name_scope(scope, 'precision_recall', 120 | [num_gbboxes, num_detections, tp, fp, scores]): 121 | # Sort detections by score. 122 | scores, idxes = tf.nn.top_k(scores, k=num_detections, sorted=True) 123 | tp = tf.gather(tp, idxes) 124 | fp = tf.gather(fp, idxes) 125 | # Computer recall and precision. 126 | tp = tf.cumsum(tf.cast(tp, dtype), axis=0) 127 | fp = tf.cumsum(tf.cast(fp, dtype), axis=0) 128 | recall = _safe_div(tp, tf.cast(num_gbboxes, dtype), 'recall') 129 | precision = _safe_div(tp, tp + fp, 'precision') 130 | return tf.tuple([precision, recall]) 131 | 132 | 133 | def streaming_tp_fp_arrays(num_gbboxes, tp, fp, scores, 134 | remove_zero_scores=True, 135 | metrics_collections=None, 136 | updates_collections=None, 137 | name=None): 138 | """Streaming computation of True and False Positive arrays. This metrics 139 | also keeps track of scores and number of grountruth objects. 140 | """ 141 | # Input dictionaries: dict outputs as streaming metrics. 142 | if isinstance(scores, dict) or isinstance(fp, dict): 143 | d_values = {} 144 | d_update_ops = {} 145 | for c in num_gbboxes.keys(): 146 | scope = 'streaming_tp_fp_%s' % c 147 | v, up = streaming_tp_fp_arrays(num_gbboxes[c], tp[c], fp[c], scores[c], 148 | remove_zero_scores, 149 | metrics_collections, 150 | updates_collections, 151 | name=scope) 152 | d_values[c] = v 153 | d_update_ops[c] = up 154 | return d_values, d_update_ops 155 | 156 | # Input Tensors... 157 | with variable_scope.variable_scope(name, 'streaming_tp_fp', 158 | [num_gbboxes, tp, fp, scores]): 159 | num_gbboxes = math_ops.to_int64(num_gbboxes) 160 | scores = math_ops.to_float(scores) 161 | stype = tf.bool 162 | tp = tf.cast(tp, stype) 163 | fp = tf.cast(fp, stype) 164 | # Reshape TP and FP tensors and clean away 0 class values. 165 | scores = tf.reshape(scores, [-1]) 166 | tp = tf.reshape(tp, [-1]) 167 | fp = tf.reshape(fp, [-1]) 168 | # Remove TP and FP both false. 169 | mask = tf.logical_or(tp, fp) 170 | if remove_zero_scores: 171 | rm_threshold = 1e-4 172 | mask = tf.logical_and(mask, tf.greater(scores, rm_threshold)) 173 | scores = tf.boolean_mask(scores, mask) 174 | tp = tf.boolean_mask(tp, mask) 175 | fp = tf.boolean_mask(fp, mask) 176 | 177 | # Local variables accumlating information over batches. 178 | v_nobjects = _create_local('v_num_gbboxes', shape=[], dtype=tf.int64) 179 | v_ndetections = _create_local('v_num_detections', shape=[], dtype=tf.int32) 180 | v_scores = _create_local('v_scores', shape=[0, ]) 181 | v_tp = _create_local('v_tp', shape=[0, ], dtype=stype) 182 | v_fp = _create_local('v_fp', shape=[0, ], dtype=stype) 183 | 184 | # Update operations. 185 | nobjects_op = state_ops.assign_add(v_nobjects, 186 | tf.reduce_sum(num_gbboxes)) 187 | ndetections_op = state_ops.assign_add(v_ndetections, 188 | tf.size(scores, out_type=tf.int32)) 189 | scores_op = state_ops.assign(v_scores, tf.concat([v_scores, scores], axis=0), 190 | validate_shape=False) 191 | tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp], axis=0), 192 | validate_shape=False) 193 | fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp], axis=0), 194 | validate_shape=False) 195 | 196 | # Value and update ops. 197 | val = (v_nobjects, v_ndetections, v_tp, v_fp, v_scores) 198 | with ops.control_dependencies([nobjects_op, ndetections_op, 199 | scores_op, tp_op, fp_op]): 200 | update_op = (nobjects_op, ndetections_op, tp_op, fp_op, scores_op) 201 | 202 | if metrics_collections: 203 | ops.add_to_collections(metrics_collections, val) 204 | if updates_collections: 205 | ops.add_to_collections(updates_collections, update_op) 206 | return val, update_op 207 | 208 | 209 | # =========================================================================== # 210 | # Average precision computations. 211 | # =========================================================================== # 212 | def average_precision_voc12(precision, recall, name=None): 213 | """Compute (interpolated) average precision from precision and recall Tensors. 214 | 215 | The implementation follows Pascal 2012 and ILSVRC guidelines. 216 | See also: https://sanchom.wordpress.com/tag/average-precision/ 217 | """ 218 | with tf.name_scope(name, 'average_precision_voc12', [precision, recall]): 219 | # Convert to float64 to decrease error on Riemann sums. 220 | precision = tf.cast(precision, dtype=tf.float64) 221 | recall = tf.cast(recall, dtype=tf.float64) 222 | 223 | # Add bounds values to precision and recall. 224 | precision = tf.concat([[0.], precision, [0.]], axis=0) 225 | recall = tf.concat([[0.], recall, [1.]], axis=0) 226 | # Ensures precision is increasing in reverse order. 227 | precision = tfe_math.cummax(precision, reverse=True) 228 | 229 | # Riemann sums for estimating the integral. 230 | # mean_pre = (precision[1:] + precision[:-1]) / 2. 231 | mean_pre = precision[1:] 232 | diff_rec = recall[1:] - recall[:-1] 233 | ap = tf.reduce_sum(mean_pre * diff_rec) 234 | return ap 235 | 236 | 237 | def average_precision_voc07(precision, recall, name=None): 238 | """Compute (interpolated) average precision from precision and recall Tensors. 239 | 240 | The implementation follows Pascal 2007 guidelines. 241 | See also: https://sanchom.wordpress.com/tag/average-precision/ 242 | """ 243 | with tf.name_scope(name, 'average_precision_voc07', [precision, recall]): 244 | # Convert to float64 to decrease error on cumulated sums. 245 | precision = tf.cast(precision, dtype=tf.float64) 246 | recall = tf.cast(recall, dtype=tf.float64) 247 | # Add zero-limit value to avoid any boundary problem... 248 | precision = tf.concat([precision, [0.]], axis=0) 249 | recall = tf.concat([recall, [np.inf]], axis=0) 250 | 251 | # Split the integral into 10 bins. 252 | l_aps = [] 253 | for t in np.arange(0., 1.1, 0.1): 254 | mask = tf.greater_equal(recall, t) 255 | v = tf.reduce_max(tf.boolean_mask(precision, mask)) 256 | l_aps.append(v / 11.) 257 | ap = tf.add_n(l_aps) 258 | return ap 259 | 260 | 261 | def precision_recall_values(xvals, precision, recall, name=None): 262 | """Compute values on the precision/recall curve. 263 | 264 | Args: 265 | x: Python list of floats; 266 | precision: 1D Tensor decreasing. 267 | recall: 1D Tensor increasing. 268 | Return: 269 | list of precision values. 270 | """ 271 | with ops.name_scope(name, "precision_recall_values", 272 | [precision, recall]) as name: 273 | # Add bounds values to precision and recall. 274 | precision = tf.concat([[0.], precision, [0.]], axis=0) 275 | recall = tf.concat([[0.], recall, [1.]], axis=0) 276 | precision = tfe_math.cummax(precision, reverse=True) 277 | 278 | prec_values = [] 279 | for x in xvals: 280 | mask = tf.less_equal(recall, x) 281 | val = tf.reduce_min(tf.boolean_mask(precision, mask)) 282 | prec_values.append(val) 283 | return tf.tuple(prec_values) 284 | 285 | 286 | # =========================================================================== # 287 | # TF Extended metrics: old stuff! 288 | # =========================================================================== # 289 | def _precision_recall(n_gbboxes, n_detections, scores, tp, fp, scope=None): 290 | """Compute precision and recall from scores, true positives and false 291 | positives booleans arrays 292 | """ 293 | # Sort by score. 294 | with tf.name_scope(scope, 'prec_rec', [n_gbboxes, scores, tp, fp]): 295 | # Sort detections by score. 296 | scores, idxes = tf.nn.top_k(scores, k=n_detections, sorted=True) 297 | tp = tf.gather(tp, idxes) 298 | fp = tf.gather(fp, idxes) 299 | # Computer recall and precision. 300 | dtype = tf.float64 301 | tp = tf.cumsum(tf.cast(tp, dtype), axis=0) 302 | fp = tf.cumsum(tf.cast(fp, dtype), axis=0) 303 | recall = _safe_div(tp, tf.cast(n_gbboxes, dtype), 'recall') 304 | precision = _safe_div(tp, tp + fp, 'precision') 305 | 306 | return tf.tuple([precision, recall]) 307 | 308 | 309 | def streaming_precision_recall_arrays(n_gbboxes, rclasses, rscores, 310 | tp_tensor, fp_tensor, 311 | remove_zero_labels=True, 312 | metrics_collections=None, 313 | updates_collections=None, 314 | name=None): 315 | """Streaming computation of precision / recall arrays. This metrics 316 | keeps tracks of boolean True positives and False positives arrays. 317 | """ 318 | with variable_scope.variable_scope(name, 'stream_precision_recall', 319 | [n_gbboxes, rclasses, tp_tensor, fp_tensor]): 320 | n_gbboxes = math_ops.to_int64(n_gbboxes) 321 | rclasses = math_ops.to_int64(rclasses) 322 | rscores = math_ops.to_float(rscores) 323 | 324 | stype = tf.int32 325 | tp_tensor = tf.cast(tp_tensor, stype) 326 | fp_tensor = tf.cast(fp_tensor, stype) 327 | 328 | # Reshape TP and FP tensors and clean away 0 class values. 329 | rclasses = tf.reshape(rclasses, [-1]) 330 | rscores = tf.reshape(rscores, [-1]) 331 | tp_tensor = tf.reshape(tp_tensor, [-1]) 332 | fp_tensor = tf.reshape(fp_tensor, [-1]) 333 | if remove_zero_labels: 334 | mask = tf.greater(rclasses, 0) 335 | rclasses = tf.boolean_mask(rclasses, mask) 336 | rscores = tf.boolean_mask(rscores, mask) 337 | tp_tensor = tf.boolean_mask(tp_tensor, mask) 338 | fp_tensor = tf.boolean_mask(fp_tensor, mask) 339 | 340 | # Local variables accumlating information over batches. 341 | v_nobjects = _create_local('v_nobjects', shape=[], dtype=tf.int64) 342 | v_ndetections = _create_local('v_ndetections', shape=[], dtype=tf.int32) 343 | v_scores = _create_local('v_scores', shape=[0, ]) 344 | v_tp = _create_local('v_tp', shape=[0, ], dtype=stype) 345 | v_fp = _create_local('v_fp', shape=[0, ], dtype=stype) 346 | 347 | # Update operations. 348 | nobjects_op = state_ops.assign_add(v_nobjects, 349 | tf.reduce_sum(n_gbboxes)) 350 | ndetections_op = state_ops.assign_add(v_ndetections, 351 | tf.size(rscores, out_type=tf.int32)) 352 | scores_op = state_ops.assign(v_scores, tf.concat([v_scores, rscores], axis=0), 353 | validate_shape=False) 354 | tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp_tensor], axis=0), 355 | validate_shape=False) 356 | fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp_tensor], axis=0), 357 | validate_shape=False) 358 | 359 | # Precision and recall computations. 360 | # r = _precision_recall(nobjects_op, scores_op, tp_op, fp_op, 'value') 361 | r = _precision_recall(v_nobjects, v_ndetections, v_scores, 362 | v_tp, v_fp, 'value') 363 | 364 | with ops.control_dependencies([nobjects_op, ndetections_op, 365 | scores_op, tp_op, fp_op]): 366 | update_op = _precision_recall(nobjects_op, ndetections_op, 367 | scores_op, tp_op, fp_op, 'update_op') 368 | 369 | # update_op = tf.Print(update_op, 370 | # [tf.reduce_sum(tf.cast(mask, tf.int64)), 371 | # tf.reduce_sum(tf.cast(mask2, tf.int64)), 372 | # tf.reduce_min(rscores), 373 | # tf.reduce_sum(n_gbboxes)], 374 | # 'Metric: ') 375 | # Some debugging stuff! 376 | # update_op = tf.Print(update_op, 377 | # [tf.shape(tp_op), 378 | # tf.reduce_sum(tf.cast(tp_op, tf.int64), axis=0)], 379 | # 'TP and FP shape: ') 380 | # update_op[0] = tf.Print(update_op, 381 | # [nobjects_op], 382 | # '# Groundtruth bboxes: ') 383 | # update_op = tf.Print(update_op, 384 | # [update_op[0][0], 385 | # update_op[0][-1], 386 | # tf.reduce_min(update_op[0]), 387 | # tf.reduce_max(update_op[0]), 388 | # tf.reduce_min(update_op[1]), 389 | # tf.reduce_max(update_op[1])], 390 | # 'Precision and recall :') 391 | 392 | if metrics_collections: 393 | ops.add_to_collections(metrics_collections, r) 394 | if updates_collections: 395 | ops.add_to_collections(updates_collections, update_op) 396 | return r, update_op 397 | 398 | -------------------------------------------------------------------------------- /tf_extended/tensors.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Paul Balanca. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """TF Extended: additional tensors operations. 16 | """ 17 | import tensorflow as tf 18 | 19 | from tensorflow.contrib.framework.python.ops import variables as contrib_variables 20 | from tensorflow.contrib.metrics.python.ops import set_ops 21 | from tensorflow.python.framework import dtypes 22 | from tensorflow.python.framework import ops 23 | from tensorflow.python.framework import sparse_tensor 24 | from tensorflow.python.ops import array_ops 25 | from tensorflow.python.ops import check_ops 26 | from tensorflow.python.ops import control_flow_ops 27 | from tensorflow.python.ops import math_ops 28 | from tensorflow.python.ops import nn 29 | from tensorflow.python.ops import state_ops 30 | from tensorflow.python.ops import variable_scope 31 | from tensorflow.python.ops import variables 32 | 33 | 34 | def get_shape(x, rank=None): 35 | """Returns the dimensions of a Tensor as list of integers or scale tensors. 36 | 37 | Args: 38 | x: N-d Tensor; 39 | rank: Rank of the Tensor. If None, will try to guess it. 40 | Returns: 41 | A list of `[d1, d2, ..., dN]` corresponding to the dimensions of the 42 | input tensor. Dimensions that are statically known are python integers, 43 | otherwise they are integer scalar tensors. 44 | """ 45 | if x.get_shape().is_fully_defined(): 46 | return x.get_shape().as_list() 47 | else: 48 | static_shape = x.get_shape() 49 | if rank is None: 50 | static_shape = static_shape.as_list() 51 | rank = len(static_shape) 52 | else: 53 | static_shape = x.get_shape().with_rank(rank).as_list() 54 | dynamic_shape = tf.unstack(tf.shape(x), rank) 55 | return [s if s is not None else d 56 | for s, d in zip(static_shape, dynamic_shape)] 57 | 58 | 59 | def pad_axis(x, offset, size, axis=0, name=None): 60 | """Pad a tensor on an axis, with a given offset and output size. 61 | The tensor is padded with zero (i.e. CONSTANT mode). Note that the if the 62 | `size` is smaller than existing size + `offset`, the output tensor 63 | was the latter dimension. 64 | 65 | Args: 66 | x: Tensor to pad; 67 | offset: Offset to add on the dimension chosen; 68 | size: Final size of the dimension. 69 | Return: 70 | Padded tensor whose dimension on `axis` is `size`, or greater if 71 | the input vector was larger. 72 | """ 73 | with tf.name_scope(name, 'pad_axis'): 74 | shape = get_shape(x) 75 | rank = len(shape) 76 | # Padding description. 77 | new_size = tf.maximum(size-offset-shape[axis], 0) 78 | pad1 = tf.stack([0]*axis + [offset] + [0]*(rank-axis-1)) 79 | pad2 = tf.stack([0]*axis + [new_size] + [0]*(rank-axis-1)) 80 | paddings = tf.stack([pad1, pad2], axis=1) 81 | x = tf.pad(x, paddings, mode='CONSTANT') 82 | # Reshape, to get fully defined shape if possible. 83 | # TODO: fix with tf.slice 84 | shape[axis] = size 85 | x = tf.reshape(x, tf.stack(shape)) 86 | return x 87 | 88 | 89 | # def select_at_index(idx, val, t): 90 | # """Return a tensor. 91 | # """ 92 | # idx = tf.expand_dims(tf.expand_dims(idx, 0), 0) 93 | # val = tf.expand_dims(val, 0) 94 | # t = t + tf.scatter_nd(idx, val, tf.shape(t)) 95 | # return t 96 | -------------------------------------------------------------------------------- /tf_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Paul Balanca. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Diverse TensorFlow utils, for training, evaluation and so on! 16 | """ 17 | import os 18 | from pprint import pprint 19 | 20 | import tensorflow as tf 21 | from tensorflow.contrib.slim.python.slim.data import parallel_reader 22 | 23 | slim = tf.contrib.slim 24 | 25 | 26 | # =========================================================================== # 27 | # General tools. 28 | # =========================================================================== # 29 | def reshape_list(l, shape=None): 30 | """Reshape list of (list): 1D to 2D or the other way around. 31 | 32 | Args: 33 | l: List or List of list. 34 | shape: 1D or 2D shape. 35 | Return 36 | Reshaped list. 37 | """ 38 | r = [] 39 | if shape is None: 40 | # Flatten everything. 41 | for a in l: 42 | if isinstance(a, (list, tuple)): 43 | r = r + list(a) 44 | else: 45 | r.append(a) 46 | else: 47 | # Reshape to list of list. 48 | i = 0 49 | for s in shape: 50 | if s == 1: 51 | r.append(l[i]) 52 | else: 53 | r.append(l[i:i+s]) 54 | i += s 55 | return r 56 | 57 | 58 | # =========================================================================== # 59 | # Training utils. 60 | # =========================================================================== # 61 | def print_configuration(flags, ssd_params, data_sources, save_dir=None): 62 | """Print the training configuration. 63 | """ 64 | def print_config(stream=None): 65 | #print('\n# =========================================================================== #', file=stream) 66 | #print('# Training | Evaluation flags:', file=stream) 67 | #print('# =========================================================================== #', file=stream) 68 | pprint(flags, stream=stream) 69 | 70 | #print('\n# =========================================================================== #', file=stream) 71 | #print('# SSD net parameters:', file=stream) 72 | #print('# =========================================================================== #', file=stream) 73 | pprint(dict(ssd_params._asdict()), stream=stream) 74 | 75 | #print('\n# =========================================================================== #', file=stream) 76 | #print('# Training | Evaluation dataset files:', file=stream) 77 | #print('# =========================================================================== #', file=stream) 78 | data_files = parallel_reader.get_data_files(data_sources) 79 | pprint(sorted(data_files), stream=stream) 80 | #print('', file=stream) 81 | 82 | print_config(None) 83 | # Save to a text file as well. 84 | if save_dir is not None: 85 | if not os.path.exists(save_dir): 86 | os.makedirs(save_dir) 87 | path = os.path.join(save_dir, 'training_config.txt') 88 | with open(path, "w") as out: 89 | print_config(out) 90 | 91 | 92 | def configure_learning_rate(flags, num_samples_per_epoch, global_step): 93 | """Configures the learning rate. 94 | 95 | Args: 96 | num_samples_per_epoch: The number of samples in each epoch of training. 97 | global_step: The global_step tensor. 98 | Returns: 99 | A `Tensor` representing the learning rate. 100 | """ 101 | decay_steps = int(num_samples_per_epoch / flags.batch_size * 102 | flags.num_epochs_per_decay) 103 | 104 | if flags.learning_rate_decay_type == 'exponential': 105 | return tf.train.exponential_decay(flags.learning_rate, 106 | global_step, 107 | decay_steps, 108 | flags.learning_rate_decay_factor, 109 | staircase=True, 110 | name='exponential_decay_learning_rate') 111 | elif flags.learning_rate_decay_type == 'fixed': 112 | return tf.constant(flags.learning_rate, name='fixed_learning_rate') 113 | elif flags.learning_rate_decay_type == 'polynomial': 114 | return tf.train.polynomial_decay(flags.learning_rate, 115 | global_step, 116 | decay_steps, 117 | flags.end_learning_rate, 118 | power=1.0, 119 | cycle=False, 120 | name='polynomial_decay_learning_rate') 121 | else: 122 | raise ValueError('learning_rate_decay_type [%s] was not recognized', 123 | flags.learning_rate_decay_type) 124 | 125 | 126 | def configure_optimizer(flags, learning_rate): 127 | """Configures the optimizer used for training. 128 | 129 | Args: 130 | learning_rate: A scalar or `Tensor` learning rate. 131 | Returns: 132 | An instance of an optimizer. 133 | """ 134 | if flags.optimizer == 'adadelta': 135 | optimizer = tf.train.AdadeltaOptimizer( 136 | learning_rate, 137 | rho=flags.adadelta_rho, 138 | epsilon=flags.opt_epsilon) 139 | elif flags.optimizer == 'adagrad': 140 | optimizer = tf.train.AdagradOptimizer( 141 | learning_rate, 142 | initial_accumulator_value=flags.adagrad_initial_accumulator_value) 143 | elif flags.optimizer == 'adam': 144 | optimizer = tf.train.AdamOptimizer( 145 | learning_rate, 146 | beta1=flags.adam_beta1, 147 | beta2=flags.adam_beta2, 148 | epsilon=flags.opt_epsilon) 149 | elif flags.optimizer == 'ftrl': 150 | optimizer = tf.train.FtrlOptimizer( 151 | learning_rate, 152 | learning_rate_power=flags.ftrl_learning_rate_power, 153 | initial_accumulator_value=flags.ftrl_initial_accumulator_value, 154 | l1_regularization_strength=flags.ftrl_l1, 155 | l2_regularization_strength=flags.ftrl_l2) 156 | elif flags.optimizer == 'momentum': 157 | optimizer = tf.train.MomentumOptimizer( 158 | learning_rate, 159 | momentum=flags.momentum, 160 | name='Momentum') 161 | elif flags.optimizer == 'rmsprop': 162 | optimizer = tf.train.RMSPropOptimizer( 163 | learning_rate, 164 | decay=flags.rmsprop_decay, 165 | momentum=flags.rmsprop_momentum, 166 | epsilon=flags.opt_epsilon) 167 | elif flags.optimizer == 'sgd': 168 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 169 | else: 170 | raise ValueError('Optimizer [%s] was not recognized', flags.optimizer) 171 | return optimizer 172 | 173 | 174 | def add_variables_summaries(learning_rate): 175 | summaries = [] 176 | for variable in slim.get_model_variables(): 177 | summaries.append(tf.summary.histogram(variable.op.name, variable)) 178 | summaries.append(tf.summary.scalar('training/Learning Rate', learning_rate)) 179 | return summaries 180 | 181 | 182 | def update_model_scope(var, ckpt_scope, new_scope): 183 | return var.op.name.replace(new_scope,'vgg_16') 184 | 185 | 186 | def get_init_fn(flags): 187 | """Returns a function run by the chief worker to warm-start the training. 188 | Note that the init_fn is only run when initializing the model during the very 189 | first global step. 190 | 191 | Returns: 192 | An init function run by the supervisor. 193 | """ 194 | if flags.checkpoint_path is None: 195 | return None 196 | # Warn the user if a checkpoint exists in the train_dir. Then ignore. 197 | if tf.train.latest_checkpoint(flags.train_dir): 198 | tf.logging.info( 199 | 'Ignoring --checkpoint_path because a checkpoint already exists in %s' 200 | % flags.train_dir) 201 | return None 202 | 203 | exclusions = [] 204 | if flags.checkpoint_exclude_scopes: 205 | exclusions = [scope.strip() 206 | for scope in flags.checkpoint_exclude_scopes.split(',')] 207 | 208 | # TODO(sguada) variables.filter_variables() 209 | variables_to_restore = [] 210 | for var in slim.get_model_variables(): 211 | excluded = False 212 | for exclusion in exclusions: 213 | if var.op.name.startswith(exclusion): 214 | excluded = True 215 | break 216 | if not excluded: 217 | variables_to_restore.append(var) 218 | # Change model scope if necessary. 219 | if flags.checkpoint_model_scope is not None: 220 | variables_to_restore = \ 221 | {var.op.name.replace(flags.model_name, 222 | flags.checkpoint_model_scope): var 223 | for var in variables_to_restore} 224 | 225 | 226 | if tf.gfile.IsDirectory(flags.checkpoint_path): 227 | checkpoint_path = tf.train.latest_checkpoint(flags.checkpoint_path) 228 | else: 229 | checkpoint_path = flags.checkpoint_path 230 | tf.logging.info('Fine-tuning from %s. Ignoring missing vars: %s' % (checkpoint_path, flags.ignore_missing_vars)) 231 | 232 | return slim.assign_from_checkpoint_fn( 233 | checkpoint_path, 234 | variables_to_restore, 235 | ignore_missing_vars=flags.ignore_missing_vars) 236 | 237 | 238 | def get_variables_to_train(flags): 239 | """Returns a list of variables to train. 240 | 241 | Returns: 242 | A list of variables to train by the optimizer. 243 | """ 244 | if flags.trainable_scopes is None: 245 | return tf.trainable_variables() 246 | else: 247 | scopes = [scope.strip() for scope in flags.trainable_scopes.split(',')] 248 | 249 | variables_to_train = [] 250 | for scope in scopes: 251 | variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) 252 | variables_to_train.extend(variables) 253 | return variables_to_train 254 | 255 | 256 | # =========================================================================== # 257 | # Evaluation utils. 258 | # =========================================================================== # 259 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.ops import control_flow_ops 3 | 4 | from datasets import dataset_factory 5 | from deployment import model_deploy 6 | from nets import nets_factory 7 | from preprocessing import preprocessing_factory 8 | import tf_utils 9 | import os 10 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 11 | slim = tf.contrib.slim 12 | 13 | DATA_FORMAT = 'NCHW' 14 | 15 | # =========================================================================== # 16 | # SSD Network flags. 17 | # =========================================================================== # 18 | tf.app.flags.DEFINE_float( 19 | 'loss_alpha', 1., 'Alpha parameter in the loss function.') 20 | tf.app.flags.DEFINE_float( 21 | 'negative_ratio', 3., 'Negative ratio in the loss function.') 22 | tf.app.flags.DEFINE_float( 23 | 'match_threshold', 0.5, 'Matching threshold in the loss function.') 24 | 25 | # =========================================================================== # 26 | # General Flags. 27 | # =========================================================================== # 28 | tf.app.flags.DEFINE_string( 29 | 'train_dir', './logs/', 30 | 'Directory where checkpoints and event logs are written to.') 31 | tf.app.flags.DEFINE_integer('num_clones', 1, 32 | 'Number of model clones to deploy.') 33 | tf.app.flags.DEFINE_boolean('clone_on_cpu', False, 34 | 'Use CPUs to deploy clones.') 35 | tf.app.flags.DEFINE_integer( 36 | 'num_readers', 4, 37 | 'The number of parallel readers that read data from the dataset.') 38 | tf.app.flags.DEFINE_integer( 39 | 'num_preprocessing_threads', 4, 40 | 'The number of threads used to create the batches.') 41 | 42 | tf.app.flags.DEFINE_integer( 43 | 'log_every_n_steps', 10, 44 | 'The frequency with which logs are print.') 45 | tf.app.flags.DEFINE_integer( 46 | 'save_summaries_secs', 60, 47 | 'The frequency with which summaries are saved, in seconds.') 48 | tf.app.flags.DEFINE_integer( 49 | 'save_interval_secs', 600, 50 | 'The frequency with which the model is saved, in seconds.') 51 | tf.app.flags.DEFINE_float( 52 | 'gpu_memory_fraction', 0.8, 'GPU memory fraction to use.') 53 | 54 | # =========================================================================== # 55 | # Optimization Flags. 56 | # =========================================================================== # 57 | tf.app.flags.DEFINE_float( 58 | 'weight_decay', 0.0005, 'The weight decay on the model weights.') 59 | tf.app.flags.DEFINE_string( 60 | 'optimizer', 'sgd', 61 | 'The name of the optimizer, one of "adadelta", "adagrad", "adam",' 62 | '"ftrl", "momentum", "sgd" or "rmsprop".') 63 | tf.app.flags.DEFINE_float( 64 | 'adadelta_rho', 0.95, 65 | 'The decay rate for adadelta.') 66 | tf.app.flags.DEFINE_float( 67 | 'adagrad_initial_accumulator_value', 0.1, 68 | 'Starting value for the AdaGrad accumulators.') 69 | tf.app.flags.DEFINE_float( 70 | 'adam_beta1', 0.9, 71 | 'The exponential decay rate for the 1st moment estimates.') 72 | tf.app.flags.DEFINE_float( 73 | 'adam_beta2', 0.999, 74 | 'The exponential decay rate for the 2nd moment estimates.') 75 | tf.app.flags.DEFINE_float('opt_epsilon', 1.0, 'Epsilon term for the optimizer.') 76 | tf.app.flags.DEFINE_float('ftrl_learning_rate_power', -0.5, 77 | 'The learning rate power.') 78 | tf.app.flags.DEFINE_float( 79 | 'ftrl_initial_accumulator_value', 0.1, 80 | 'Starting value for the FTRL accumulators.') 81 | tf.app.flags.DEFINE_float( 82 | 'ftrl_l1', 0.0, 'The FTRL l1 regularization strength.') 83 | tf.app.flags.DEFINE_float( 84 | 'ftrl_l2', 0.0, 'The FTRL l2 regularization strength.') 85 | tf.app.flags.DEFINE_float( 86 | 'momentum', 0.9, 87 | 'The momentum for the MomentumOptimizer and RMSPropOptimizer.') 88 | tf.app.flags.DEFINE_float('rmsprop_momentum', 0.9, 'Momentum.') 89 | tf.app.flags.DEFINE_float('rmsprop_decay', 0.9, 'Decay term for RMSProp.') 90 | 91 | # =========================================================================== # 92 | # Learning Rate Flags. 93 | # =========================================================================== # 94 | tf.app.flags.DEFINE_string( 95 | 'learning_rate_decay_type', 96 | 'fixed', 97 | 'Specifies how the learning rate is decayed. One of "fixed", "exponential",' 98 | ' or "polynomial"') 99 | tf.app.flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.') 100 | tf.app.flags.DEFINE_float( 101 | 'end_learning_rate', 0.00001, 102 | 'The minimal end learning rate used by a polynomial decay learning rate.') 103 | tf.app.flags.DEFINE_float( 104 | 'label_smoothing', 0.0, 'The amount of label smoothing.') 105 | tf.app.flags.DEFINE_float( 106 | 'learning_rate_decay_factor', 0.94, 'Learning rate decay factor.') 107 | tf.app.flags.DEFINE_float( 108 | 'num_epochs_per_decay', 2.0, 109 | 'Number of epochs after which learning rate decays.') 110 | tf.app.flags.DEFINE_float( 111 | 'moving_average_decay', None, 112 | 'The decay to use for the moving average.' 113 | 'If left as None, then moving averages are not used.') 114 | 115 | # =========================================================================== # 116 | # Dataset Flags. 117 | # =========================================================================== # 118 | tf.app.flags.DEFINE_string( 119 | 'dataset_name', 'pascalvoc_2007', 'The name of the dataset to load.') 120 | tf.app.flags.DEFINE_integer( 121 | 'num_classes', 21, 'Number of classes to use in the dataset.') 122 | tf.app.flags.DEFINE_string( 123 | 'dataset_split_name', 'train', 'The name of the train/test split.') 124 | tf.app.flags.DEFINE_string( 125 | 'dataset_dir', './tfrecords/', 'The directory where the dataset files are stored.') 126 | tf.app.flags.DEFINE_integer( 127 | 'labels_offset', 0, 128 | 'An offset for the labels in the dataset. This flag is primarily used to ' 129 | 'evaluate the VGG and ResNet architectures which do not use a background ' 130 | 'class for the ImageNet dataset.') 131 | tf.app.flags.DEFINE_string( 132 | 'model_name', 'ssd_300_vgg', 'The name of the architecture to train.') 133 | tf.app.flags.DEFINE_string( 134 | 'preprocessing_name', None, 'The name of the preprocessing to use. If left ' 135 | 'as `None`, then the model_name flag is used.') 136 | tf.app.flags.DEFINE_integer( 137 | 'batch_size', 32, 'The number of samples in each batch.') 138 | tf.app.flags.DEFINE_integer( 139 | 'train_image_size', None, 'Train image size') 140 | tf.app.flags.DEFINE_integer('max_number_of_steps', None, 141 | 'The maximum number of training steps.') 142 | 143 | # =========================================================================== # 144 | # Fine-Tuning Flags. 145 | # =========================================================================== # 146 | tf.app.flags.DEFINE_string( 147 | 'checkpoint_path', './checkpoints/vgg_16.ckpt', 148 | 'The path to a checkpoint from which to fine-tune.') 149 | tf.app.flags.DEFINE_string( 150 | 'checkpoint_model_scope', 'vgg_16', 151 | 'Model scope in the checkpoint. None if the same as the trained model.') 152 | tf.app.flags.DEFINE_string( 153 | 'checkpoint_exclude_scopes', 'ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box', 154 | 'Comma-separated list of scopes of variables to exclude when restoring ' 155 | 'from a checkpoint.') 156 | tf.app.flags.DEFINE_string( 157 | 'trainable_scopes', 'ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box', 158 | 'Comma-separated list of scopes to filter the set of variables to train.' 159 | 'By default, None would train all the variables.') 160 | tf.app.flags.DEFINE_boolean( 161 | 'ignore_missing_vars', False, 162 | 'When restoring a checkpoint would ignore missing variables.') 163 | 164 | FLAGS = tf.app.flags.FLAGS 165 | 166 | 167 | # =========================================================================== # 168 | # Main training routine. 169 | # =========================================================================== # 170 | def main(_): 171 | if not FLAGS.dataset_dir: 172 | raise ValueError('You must supply the dataset directory with --dataset_dir') 173 | 174 | tf.logging.set_verbosity(tf.logging.DEBUG) 175 | with tf.Graph().as_default(): 176 | # Config model_deploy. Keep TF Slim Models structure. 177 | # Useful if want to need multiple GPUs and/or servers in the future. 178 | deploy_config = model_deploy.DeploymentConfig( 179 | num_clones=FLAGS.num_clones, 180 | clone_on_cpu=FLAGS.clone_on_cpu, 181 | replica_id=0, 182 | num_replicas=1, 183 | num_ps_tasks=0) 184 | # Create global_step. 185 | with tf.device(deploy_config.variables_device()): 186 | global_step = slim.create_global_step() 187 | 188 | # Select the dataset. 189 | dataset = dataset_factory.get_dataset( 190 | FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) 191 | 192 | # Get the SSD network and its anchors. 193 | ssd_class = nets_factory.get_network(FLAGS.model_name) 194 | ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes) 195 | ssd_net = ssd_class(ssd_params) 196 | ssd_shape = ssd_net.params.img_shape 197 | ssd_anchors = ssd_net.anchors(ssd_shape) 198 | 199 | # Select the preprocessing function. 200 | preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name 201 | image_preprocessing_fn = preprocessing_factory.get_preprocessing( 202 | preprocessing_name, is_training=True) 203 | 204 | tf_utils.print_configuration(FLAGS.__flags, ssd_params, 205 | dataset.data_sources, FLAGS.train_dir) 206 | # =================================================================== # 207 | # Create a dataset provider and batches. 208 | # =================================================================== # 209 | with tf.device(deploy_config.inputs_device()): 210 | with tf.name_scope(FLAGS.dataset_name + '_data_provider'): 211 | provider = slim.dataset_data_provider.DatasetDataProvider( 212 | dataset, 213 | num_readers=FLAGS.num_readers, 214 | common_queue_capacity=20 * FLAGS.batch_size, 215 | common_queue_min=10 * FLAGS.batch_size, 216 | shuffle=True) 217 | # Get for SSD network: image, labels, bboxes. 218 | [image, shape, glabels, gbboxes] = provider.get(['image', 'shape', 219 | 'object/label', 220 | 'object/bbox']) 221 | # Pre-processing image, labels and bboxes. 222 | image, glabels, gbboxes = \ 223 | image_preprocessing_fn(image, glabels, gbboxes, 224 | out_shape=ssd_shape, 225 | data_format=DATA_FORMAT) 226 | # Encode groundtruth labels and bboxes. 227 | gclasses, glocalisations, gscores = \ 228 | ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors) 229 | batch_shape = [1] + [len(ssd_anchors)] * 3 230 | 231 | # Training batches and queue. 232 | r = tf.train.batch( 233 | tf_utils.reshape_list([image, gclasses, glocalisations, gscores]), 234 | batch_size=FLAGS.batch_size, 235 | num_threads=FLAGS.num_preprocessing_threads, 236 | capacity=5 * FLAGS.batch_size) 237 | b_image, b_gclasses, b_glocalisations, b_gscores = \ 238 | tf_utils.reshape_list(r, batch_shape) 239 | 240 | # Intermediate queueing: unique batch computation pipeline for all 241 | # GPUs running the training. 242 | batch_queue = slim.prefetch_queue.prefetch_queue( 243 | tf_utils.reshape_list([b_image, b_gclasses, b_glocalisations, b_gscores]), 244 | capacity=2 * deploy_config.num_clones) 245 | 246 | # =================================================================== # 247 | # Define the model running on every GPU. 248 | # =================================================================== # 249 | def clone_fn(batch_queue): 250 | """Allows data parallelism by creating multiple 251 | clones of network_fn.""" 252 | # Dequeue batch. 253 | b_image, b_gclasses, b_glocalisations, b_gscores = \ 254 | tf_utils.reshape_list(batch_queue.dequeue(), batch_shape) 255 | 256 | # Construct SSD network. 257 | arg_scope = ssd_net.arg_scope(weight_decay=FLAGS.weight_decay, 258 | data_format=DATA_FORMAT) 259 | with slim.arg_scope(arg_scope): 260 | predictions, localisations, logits, end_points = \ 261 | ssd_net.net(b_image, is_training=True) 262 | # Add loss function. 263 | ssd_net.losses(logits, localisations, 264 | b_gclasses, b_glocalisations, b_gscores, 265 | match_threshold=FLAGS.match_threshold, 266 | negative_ratio=FLAGS.negative_ratio, 267 | alpha=FLAGS.loss_alpha, 268 | label_smoothing=FLAGS.label_smoothing) 269 | return end_points 270 | 271 | # Gather initial summaries. 272 | summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) 273 | 274 | # =================================================================== # 275 | # Add summaries from first clone. 276 | # =================================================================== # 277 | clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) 278 | first_clone_scope = deploy_config.clone_scope(0) 279 | # Gather update_ops from the first clone. These contain, for example, 280 | # the updates for the batch_norm variables created by network_fn. 281 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) 282 | 283 | # Add summaries for end_points. 284 | end_points = clones[0].outputs 285 | for end_point in end_points: 286 | x = end_points[end_point] 287 | summaries.add(tf.summary.histogram('activations/' + end_point, x)) 288 | summaries.add(tf.summary.scalar('sparsity/' + end_point, 289 | tf.nn.zero_fraction(x))) 290 | # Add summaries for losses and extra losses. 291 | for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): 292 | summaries.add(tf.summary.scalar(loss.op.name, loss)) 293 | for loss in tf.get_collection('EXTRA_LOSSES', first_clone_scope): 294 | summaries.add(tf.summary.scalar(loss.op.name, loss)) 295 | 296 | # Add summaries for variables. 297 | for variable in slim.get_model_variables(): 298 | summaries.add(tf.summary.histogram(variable.op.name, variable)) 299 | 300 | # =================================================================== # 301 | # Configure the moving averages. 302 | # =================================================================== # 303 | if FLAGS.moving_average_decay: 304 | moving_average_variables = slim.get_model_variables() 305 | variable_averages = tf.train.ExponentialMovingAverage( 306 | FLAGS.moving_average_decay, global_step) 307 | else: 308 | moving_average_variables, variable_averages = None, None 309 | 310 | # =================================================================== # 311 | # Configure the optimization procedure. 312 | # =================================================================== # 313 | with tf.device(deploy_config.optimizer_device()): 314 | learning_rate = tf_utils.configure_learning_rate(FLAGS, 315 | dataset.num_samples, 316 | global_step) 317 | optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate) 318 | summaries.add(tf.summary.scalar('learning_rate', learning_rate)) 319 | 320 | if FLAGS.moving_average_decay: 321 | # Update ops executed locally by trainer. 322 | update_ops.append(variable_averages.apply(moving_average_variables)) 323 | 324 | # Variables to train. 325 | variables_to_train = tf_utils.get_variables_to_train(FLAGS) 326 | 327 | # and returns a train_tensor and summary_op 328 | total_loss, clones_gradients = model_deploy.optimize_clones( 329 | clones, 330 | optimizer, 331 | var_list=variables_to_train) 332 | # Add total_loss to summary. 333 | summaries.add(tf.summary.scalar('total_loss', total_loss)) 334 | 335 | # Create gradient updates. 336 | grad_updates = optimizer.apply_gradients(clones_gradients, 337 | global_step=global_step) 338 | update_ops.append(grad_updates) 339 | update_op = tf.group(*update_ops) 340 | train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, 341 | name='train_op') 342 | 343 | # Add the summaries from the first clone. These contain the summaries 344 | summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, 345 | first_clone_scope)) 346 | # Merge all summaries together. 347 | summary_op = tf.summary.merge(list(summaries), name='summary_op') 348 | 349 | # =================================================================== # 350 | # Kicks off the training. 351 | # =================================================================== # 352 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) 353 | config = tf.ConfigProto(log_device_placement=False, 354 | gpu_options=gpu_options) 355 | saver = tf.train.Saver(max_to_keep=5, 356 | keep_checkpoint_every_n_hours=1.0, 357 | write_version=2, 358 | pad_step_number=False) 359 | slim.learning.train( 360 | train_tensor, 361 | logdir=FLAGS.train_dir, 362 | master='', 363 | is_chief=True, 364 | init_fn=tf_utils.get_init_fn(FLAGS), 365 | summary_op=summary_op, 366 | number_of_steps=FLAGS.max_number_of_steps, 367 | log_every_n_steps=FLAGS.log_every_n_steps, 368 | save_summaries_secs=FLAGS.save_summaries_secs, 369 | saver=saver, 370 | save_interval_secs=FLAGS.save_interval_secs, 371 | session_config=config, 372 | sync_optimizer=None) 373 | 374 | 375 | if __name__ == '__main__': 376 | tf.app.run() 377 | -------------------------------------------------------------------------------- /visualization.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Paul Balanca. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | import cv2 16 | import random 17 | 18 | import matplotlib.pyplot as plt 19 | import matplotlib.image as mpimg 20 | import matplotlib.cm as mpcm 21 | 22 | 23 | # =========================================================================== # 24 | # Some colormaps. 25 | # =========================================================================== # 26 | def colors_subselect(colors, num_classes=21): 27 | dt = len(colors) // num_classes 28 | sub_colors = [] 29 | for i in range(num_classes): 30 | color = colors[i*dt] 31 | if isinstance(color[0], float): 32 | sub_colors.append([int(c * 255) for c in color]) 33 | else: 34 | sub_colors.append([c for c in color]) 35 | return sub_colors 36 | 37 | colors_plasma = colors_subselect(mpcm.plasma.colors, num_classes=21) 38 | colors_tableau = [(255, 255, 255), (31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120), 39 | (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150), 40 | (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148), 41 | (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199), 42 | (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)] 43 | 44 | 45 | # =========================================================================== # 46 | # OpenCV drawing. 47 | # =========================================================================== # 48 | def draw_lines(img, lines, color=[255, 0, 0], thickness=2): 49 | """Draw a collection of lines on an image. 50 | """ 51 | for line in lines: 52 | for x1, y1, x2, y2 in line: 53 | cv2.line(img, (x1, y1), (x2, y2), color, thickness) 54 | 55 | 56 | def draw_rectangle(img, p1, p2, color=[255, 0, 0], thickness=2): 57 | cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness) 58 | 59 | 60 | def draw_bbox(img, bbox, shape, label, color=[255, 0, 0], thickness=2): 61 | p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1])) 62 | p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1])) 63 | cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness) 64 | p1 = (p1[0]+15, p1[1]) 65 | cv2.putText(img, str(label), p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.5, color, 1) 66 | 67 | 68 | def bboxes_draw_on_img(img, classes, scores, bboxes, colors, thickness=2): 69 | shape = img.shape 70 | for i in range(bboxes.shape[0]): 71 | bbox = bboxes[i] 72 | color = colors[classes[i]] 73 | # Draw bounding box... 74 | p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1])) 75 | p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1])) 76 | cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness) 77 | # Draw text... 78 | s = '%s/%.3f' % (classes[i], scores[i]) 79 | p1 = (p1[0]-5, p1[1]) 80 | cv2.putText(img, s, p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1) 81 | 82 | 83 | # =========================================================================== # 84 | # Matplotlib show... 85 | # =========================================================================== # 86 | def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5): 87 | """Visualize bounding boxes. Largely inspired by SSD-MXNET! 88 | """ 89 | fig = plt.figure(figsize=figsize) 90 | plt.imshow(img) 91 | height = img.shape[0] 92 | width = img.shape[1] 93 | colors = dict() 94 | for i in range(classes.shape[0]): 95 | cls_id = int(classes[i]) 96 | if cls_id >= 0: 97 | score = scores[i] 98 | if cls_id not in colors: 99 | colors[cls_id] = (random.random(), random.random(), random.random()) 100 | ymin = int(bboxes[i, 0] * height) 101 | xmin = int(bboxes[i, 1] * width) 102 | ymax = int(bboxes[i, 2] * height) 103 | xmax = int(bboxes[i, 3] * width) 104 | rect = plt.Rectangle((xmin, ymin), xmax - xmin, 105 | ymax - ymin, fill=False, 106 | edgecolor=colors[cls_id], 107 | linewidth=linewidth) 108 | plt.gca().add_patch(rect) 109 | class_name = str(cls_id) 110 | plt.gca().text(xmin, ymin - 2, 111 | '{:s} | {:.3f}'.format(class_name, score), 112 | bbox=dict(facecolor=colors[cls_id], alpha=0.5), 113 | fontsize=12, color='white') 114 | plt.show() 115 | --------------------------------------------------------------------------------